diff --git a/include/crm/pengine/internal.h b/include/crm/pengine/internal.h index 04c8494afc..46e63c4eb1 100644 --- a/include/crm/pengine/internal.h +++ b/include/crm/pengine/internal.h @@ -1,268 +1,268 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef PE_INTERNAL__H # define PE_INTERNAL__H # include # define pe_rsc_info(rsc, fmt, args...) crm_log_tag(LOG_INFO, rsc ? rsc->id : "", fmt, ##args) # define pe_rsc_debug(rsc, fmt, args...) crm_log_tag(LOG_DEBUG, rsc ? rsc->id : "", fmt, ##args) # define pe_rsc_trace(rsc, fmt, args...) crm_log_tag(LOG_TRACE, rsc ? rsc->id : "", fmt, ##args) # define pe_err(fmt...) { was_processing_error = TRUE; crm_config_error = TRUE; crm_err(fmt); } # define pe_warn(fmt...) { was_processing_warning = TRUE; crm_config_warning = TRUE; crm_warn(fmt); } # define pe_proc_err(fmt...) { was_processing_error = TRUE; crm_err(fmt); } # define pe_proc_warn(fmt...) { was_processing_warning = TRUE; crm_warn(fmt); } # define pe_set_action_bit(action, bit) action->flags = crm_set_bit(__FUNCTION__, action->uuid, action->flags, bit) # define pe_clear_action_bit(action, bit) action->flags = crm_clear_bit(__FUNCTION__, action->uuid, action->flags, bit) typedef struct notify_data_s { GHashTable *keys; const char *action; action_t *pre; action_t *post; action_t *pre_done; action_t *post_done; GListPtr active; /* notify_entry_t* */ GListPtr inactive; /* notify_entry_t* */ GListPtr start; /* notify_entry_t* */ GListPtr stop; /* notify_entry_t* */ GListPtr demote; /* notify_entry_t* */ GListPtr promote; /* notify_entry_t* */ GListPtr master; /* notify_entry_t* */ GListPtr slave; /* notify_entry_t* */ } notify_data_t; bool pe_can_fence(pe_working_set_t *data_set, node_t *node); int merge_weights(int w1, int w2); void add_hash_param(GHashTable * hash, const char *name, const char *value); void append_hashtable(gpointer key, gpointer value, gpointer user_data); char *native_parameter(resource_t * rsc, node_t * node, gboolean create, const char *name, pe_working_set_t * data_set); node_t *native_location(resource_t * rsc, GListPtr * list, gboolean current); void pe_metadata(void); void verify_pe_options(GHashTable * options); void common_update_score(resource_t * rsc, const char *id, int score); void native_add_running(resource_t * rsc, node_t * node, pe_working_set_t * data_set); node_t *rsc_known_on(resource_t * rsc, GListPtr * list); gboolean native_unpack(resource_t * rsc, pe_working_set_t * data_set); gboolean group_unpack(resource_t * rsc, pe_working_set_t * data_set); gboolean clone_unpack(resource_t * rsc, pe_working_set_t * data_set); gboolean master_unpack(resource_t * rsc, pe_working_set_t * data_set); resource_t *native_find_rsc(resource_t * rsc, const char *id, node_t * node, int flags); gboolean native_active(resource_t * rsc, gboolean all); gboolean group_active(resource_t * rsc, gboolean all); gboolean clone_active(resource_t * rsc, gboolean all); gboolean master_active(resource_t * rsc, gboolean all); void native_print(resource_t * rsc, const char *pre_text, long options, void *print_data); void group_print(resource_t * rsc, const char *pre_text, long options, void *print_data); void clone_print(resource_t * rsc, const char *pre_text, long options, void *print_data); void master_print(resource_t * rsc, const char *pre_text, long options, void *print_data); void native_free(resource_t * rsc); void group_free(resource_t * rsc); void clone_free(resource_t * rsc); void master_free(resource_t * rsc); enum rsc_role_e native_resource_state(const resource_t * rsc, gboolean current); enum rsc_role_e group_resource_state(const resource_t * rsc, gboolean current); enum rsc_role_e clone_resource_state(const resource_t * rsc, gboolean current); enum rsc_role_e master_resource_state(const resource_t * rsc, gboolean current); gboolean common_unpack(xmlNode * xml_obj, resource_t ** rsc, resource_t * parent, pe_working_set_t * data_set); void common_print(resource_t * rsc, const char *pre_text, long options, void *print_data); void common_free(resource_t * rsc); extern pe_working_set_t *pe_dataset; extern node_t *node_copy(node_t * this_node); extern time_t get_effective_time(pe_working_set_t * data_set); extern int get_failcount(node_t * node, resource_t * rsc, time_t *last_failure, pe_working_set_t * data_set); extern int get_failcount_full(node_t * node, resource_t * rsc, time_t *last_failure, bool effective, pe_working_set_t * data_set); extern int get_failcount_all(node_t * node, resource_t * rsc, time_t *last_failure, pe_working_set_t * data_set); /* Binary like operators for lists of nodes */ extern void node_list_exclude(GHashTable * list, GListPtr list2, gboolean merge_scores); extern GListPtr node_list_dup(GListPtr list, gboolean reset, gboolean filter); extern GListPtr node_list_from_hash(GHashTable * hash, gboolean reset, gboolean filter); extern GHashTable *node_hash_from_list(GListPtr list); static inline gpointer pe_hash_table_lookup(GHashTable * hash, gconstpointer key) { if (hash) { return g_hash_table_lookup(hash, key); } return NULL; } extern action_t *get_pseudo_op(const char *name, pe_working_set_t * data_set); extern gboolean order_actions(action_t * lh_action, action_t * rh_action, enum pe_ordering order); GHashTable *node_hash_dup(GHashTable * hash); extern GListPtr node_list_and(GListPtr list1, GListPtr list2, gboolean filter); extern GListPtr node_list_xor(GListPtr list1, GListPtr list2, gboolean filter); extern GListPtr node_list_minus(GListPtr list1, GListPtr list2, gboolean filter); extern void pe_free_shallow(GListPtr alist); extern void pe_free_shallow_adv(GListPtr alist, gboolean with_data); /* Printing functions for debug */ extern void print_node(const char *pre_text, node_t * node, gboolean details); extern void print_resource(int log_level, const char *pre_text, resource_t * rsc, gboolean details); extern void dump_node_scores_worker(int level, const char *file, const char *function, int line, resource_t * rsc, const char *comment, GHashTable * nodes); extern void dump_node_capacity(int level, const char *comment, node_t * node); extern void dump_rsc_utilization(int level, const char *comment, resource_t * rsc, node_t * node); # define dump_node_scores(level, rsc, text, nodes) do { \ dump_node_scores_worker(level, __FILE__, __FUNCTION__, __LINE__, rsc, text, nodes); \ } while(0) /* Sorting functions */ extern gint sort_rsc_priority(gconstpointer a, gconstpointer b); extern gint sort_rsc_index(gconstpointer a, gconstpointer b); extern xmlNode *find_rsc_op_entry(resource_t * rsc, const char *key); extern action_t *custom_action(resource_t * rsc, char *key, const char *task, node_t * on_node, gboolean optional, gboolean foo, pe_working_set_t * data_set); # define delete_key(rsc) generate_op_key(rsc->id, CRMD_ACTION_DELETE, 0) # define delete_action(rsc, node, optional) custom_action( \ rsc, delete_key(rsc), CRMD_ACTION_DELETE, node, \ optional, TRUE, data_set); # define stopped_key(rsc) generate_op_key(rsc->id, CRMD_ACTION_STOPPED, 0) # define stopped_action(rsc, node, optional) custom_action( \ rsc, stopped_key(rsc), CRMD_ACTION_STOPPED, node, \ optional, TRUE, data_set); # define stop_key(rsc) generate_op_key(rsc->id, CRMD_ACTION_STOP, 0) # define stop_action(rsc, node, optional) custom_action( \ rsc, stop_key(rsc), CRMD_ACTION_STOP, node, \ optional, TRUE, data_set); # define start_key(rsc) generate_op_key(rsc->id, CRMD_ACTION_START, 0) # define start_action(rsc, node, optional) custom_action( \ rsc, start_key(rsc), CRMD_ACTION_START, node, \ optional, TRUE, data_set) # define started_key(rsc) generate_op_key(rsc->id, CRMD_ACTION_STARTED, 0) # define started_action(rsc, node, optional) custom_action( \ rsc, started_key(rsc), CRMD_ACTION_STARTED, node, \ optional, TRUE, data_set) # define promote_key(rsc) generate_op_key(rsc->id, CRMD_ACTION_PROMOTE, 0) # define promote_action(rsc, node, optional) custom_action( \ rsc, promote_key(rsc), CRMD_ACTION_PROMOTE, node, \ optional, TRUE, data_set) # define promoted_key(rsc) generate_op_key(rsc->id, CRMD_ACTION_PROMOTED, 0) # define promoted_action(rsc, node, optional) custom_action( \ rsc, promoted_key(rsc), CRMD_ACTION_PROMOTED, node, \ optional, TRUE, data_set) # define demote_key(rsc) generate_op_key(rsc->id, CRMD_ACTION_DEMOTE, 0) # define demote_action(rsc, node, optional) custom_action( \ rsc, demote_key(rsc), CRMD_ACTION_DEMOTE, node, \ optional, TRUE, data_set) # define demoted_key(rsc) generate_op_key(rsc->id, CRMD_ACTION_DEMOTED, 0) # define demoted_action(rsc, node, optional) custom_action( \ rsc, demoted_key(rsc), CRMD_ACTION_DEMOTED, node, \ optional, TRUE, data_set) extern action_t *find_first_action(GListPtr input, const char *uuid, const char *task, node_t * on_node); extern enum action_tasks get_complex_task(resource_t * rsc, const char *name, gboolean allow_non_atomic); extern GListPtr find_actions(GListPtr input, const char *key, node_t * on_node); extern GListPtr find_actions_exact(GListPtr input, const char *key, node_t * on_node); extern GListPtr find_recurring_actions(GListPtr input, node_t * not_on_node); extern void pe_free_action(action_t * action); extern void resource_location(resource_t * rsc, node_t * node, int score, const char *tag, pe_working_set_t * data_set); extern gint sort_op_by_callid(gconstpointer a, gconstpointer b); extern gboolean get_target_role(resource_t * rsc, enum rsc_role_e *role); extern resource_t *find_clone_instance(resource_t * rsc, const char *sub_id, pe_working_set_t * data_set); extern void destroy_ticket(gpointer data); extern ticket_t *ticket_new(const char *ticket_id, pe_working_set_t * data_set); char *clone_strip(const char *last_rsc_id); char *clone_zero(const char *last_rsc_id); gint sort_node_uname(gconstpointer a, gconstpointer b); bool is_set_recursive(resource_t * rsc, long long flag, bool any); enum rsc_digest_cmp_val { /*! Digests are the same */ RSC_DIGEST_MATCH = 0, /*! Params that require a restart changed */ RSC_DIGEST_RESTART, /*! Some parameter changed. */ RSC_DIGEST_ALL, /*! rsc op didn't have a digest associated with it, so * it is unknown if parameters changed or not. */ RSC_DIGEST_UNKNOWN, }; typedef struct op_digest_cache_s { enum rsc_digest_cmp_val rc; xmlNode *params_all; xmlNode *params_restart; char *digest_all_calc; char *digest_restart_calc; } op_digest_cache_t; op_digest_cache_t *rsc_action_digest_cmp(resource_t * rsc, xmlNode * xml_op, node_t * node, pe_working_set_t * data_set); gboolean xml_contains_remote_node(xmlNode *xml); gboolean is_baremetal_remote_node(node_t *node); gboolean is_container_remote_node(node_t *node); gboolean is_remote_node(node_t *node); -gboolean rsc_contains_remote_node(pe_working_set_t * data_set, resource_t *rsc); +resource_t * rsc_contains_remote_node(pe_working_set_t * data_set, resource_t *rsc); #endif diff --git a/include/crm/pengine/status.h b/include/crm/pengine/status.h index 02e3e04e19..b746813041 100644 --- a/include/crm/pengine/status.h +++ b/include/crm/pengine/status.h @@ -1,365 +1,365 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef PENGINE_STATUS__H # define PENGINE_STATUS__H # include # include # include typedef struct node_s pe_node_t; typedef struct node_s node_t; typedef struct pe_action_s action_t; typedef struct pe_action_s pe_action_t; typedef struct resource_s resource_t; typedef struct ticket_s ticket_t; typedef enum no_quorum_policy_e { no_quorum_freeze, no_quorum_stop, no_quorum_ignore, no_quorum_suicide } no_quorum_policy_t; enum node_type { node_ping, node_member, node_remote }; enum pe_restart { pe_restart_restart, pe_restart_ignore }; enum pe_find { pe_find_renamed = 0x001, pe_find_clone = 0x004, pe_find_current = 0x008, pe_find_inactive = 0x010, }; # define pe_flag_have_quorum 0x00000001ULL # define pe_flag_symmetric_cluster 0x00000002ULL # define pe_flag_is_managed_default 0x00000004ULL # define pe_flag_maintenance_mode 0x00000008ULL # define pe_flag_stonith_enabled 0x00000010ULL # define pe_flag_have_stonith_resource 0x00000020ULL # define pe_flag_stop_rsc_orphans 0x00000100ULL # define pe_flag_stop_action_orphans 0x00000200ULL # define pe_flag_stop_everything 0x00000400ULL # define pe_flag_start_failure_fatal 0x00001000ULL # define pe_flag_remove_after_stop 0x00002000ULL # define pe_flag_startup_probes 0x00010000ULL # define pe_flag_have_status 0x00020000ULL # define pe_flag_have_remote_nodes 0x00040000ULL -# define pe_flag_container_probes 0x00080000ULL # define pe_flag_quick_location 0x00100000ULL typedef struct pe_working_set_s { xmlNode *input; crm_time_t *now; /* options extracted from the input */ char *dc_uuid; node_t *dc_node; const char *stonith_action; const char *placement_strategy; unsigned long long flags; int stonith_timeout; int default_resource_stickiness; no_quorum_policy_t no_quorum_policy; GHashTable *config_hash; GHashTable *domains; GHashTable *tickets; GListPtr nodes; GListPtr resources; GListPtr placement_constraints; GListPtr ordering_constraints; GListPtr colocation_constraints; GListPtr ticket_constraints; GListPtr actions; xmlNode *failed; xmlNode *op_defaults; xmlNode *rsc_defaults; /* stats */ int num_synapse; int max_valid_nodes; int order_id; int action_id; /* final output */ xmlNode *graph; GHashTable *template_rsc_sets; const char *localhost; } pe_working_set_t; struct node_shared_s { const char *id; const char *uname; gboolean online; gboolean standby; gboolean standby_onfail; gboolean pending; gboolean unclean; gboolean unseen; gboolean shutdown; gboolean expected_up; gboolean is_dc; int num_resources; GListPtr running_rsc; /* resource_t* */ GListPtr allocated_rsc; /* resource_t* */ resource_t *remote_rsc; GHashTable *attrs; /* char* => char* */ enum node_type type; GHashTable *utilization; /*! cache of calculated rsc digests for this node. */ GHashTable *digest_cache; gboolean maintenance; }; struct node_s { int weight; gboolean fixed; int count; struct node_shared_s *details; }; # include # define pe_rsc_orphan 0x00000001ULL # define pe_rsc_managed 0x00000002ULL # define pe_rsc_block 0x00000004ULL /* Further operations are prohibited due to failure policy */ # define pe_rsc_orphan_container_filler 0x00000008ULL # define pe_rsc_notify 0x00000010ULL # define pe_rsc_unique 0x00000020ULL # define pe_rsc_provisional 0x00000100ULL # define pe_rsc_allocating 0x00000200ULL # define pe_rsc_merging 0x00000400ULL # define pe_rsc_munging 0x00000800ULL # define pe_rsc_try_reload 0x00001000ULL # define pe_rsc_reload 0x00002000ULL # define pe_rsc_failed 0x00010000ULL # define pe_rsc_shutdown 0x00020000ULL # define pe_rsc_runnable 0x00040000ULL # define pe_rsc_start_pending 0x00080000ULL # define pe_rsc_starting 0x00100000ULL # define pe_rsc_stopping 0x00200000ULL # define pe_rsc_migrating 0x00400000ULL # define pe_rsc_failure_ignored 0x01000000ULL +# define pe_rsc_unexpectedly_running 0x02000000ULL # define pe_rsc_needs_quorum 0x10000000ULL # define pe_rsc_needs_fencing 0x20000000ULL # define pe_rsc_needs_unfencing 0x40000000ULL enum pe_graph_flags { pe_graph_none = 0x00000, pe_graph_updated_first = 0x00001, pe_graph_updated_then = 0x00002, pe_graph_disable = 0x00004, }; /* *INDENT-OFF* */ enum pe_action_flags { pe_action_pseudo = 0x00001, pe_action_runnable = 0x00002, pe_action_optional = 0x00004, pe_action_print_always = 0x00008, pe_action_have_node_attrs = 0x00010, pe_action_failure_is_fatal = 0x00020, pe_action_implied_by_stonith = 0x00040, pe_action_dumped = 0x00100, pe_action_processed = 0x00200, pe_action_clear = 0x00400, pe_action_dangle = 0x00800, pe_action_requires_any = 0x01000, /* This action requires one or mre of its dependancies to be runnable * We use this to clear the runnable flag before checking dependancies */ }; /* *INDENT-ON* */ struct resource_s { char *id; char *clone_name; xmlNode *xml; xmlNode *orig_xml; xmlNode *ops_xml; resource_t *parent; void *variant_opaque; enum pe_obj_types variant; resource_object_functions_t *fns; resource_alloc_functions_t *cmds; enum rsc_recovery_type recovery_type; enum pe_restart restart_type; int priority; int stickiness; int sort_index; int failure_timeout; int effective_priority; int migration_threshold; gboolean is_remote_node; unsigned long long flags; GListPtr rsc_cons_lhs; /* rsc_colocation_t* */ GListPtr rsc_cons; /* rsc_colocation_t* */ GListPtr rsc_location; /* rsc_to_node_t* */ GListPtr actions; /* action_t* */ GListPtr rsc_tickets; /* rsc_ticket* */ node_t *allocated_to; GListPtr running_on; /* node_t* */ GHashTable *known_on; /* node_t* */ GHashTable *allowed_nodes; /* node_t* */ enum rsc_role_e role; enum rsc_role_e next_role; GHashTable *meta; GHashTable *parameters; GHashTable *utilization; GListPtr children; /* resource_t* */ GListPtr dangling_migrations; /* node_t* */ node_t *partial_migration_target; node_t *partial_migration_source; resource_t *container; GListPtr fillers; }; struct pe_action_s { int id; int priority; resource_t *rsc; node_t *node; xmlNode *op_entry; char *task; char *uuid; enum pe_action_flags flags; enum rsc_start_requirement needs; enum action_fail_response on_fail; enum rsc_role_e fail_role; action_t *pre_notify; action_t *pre_notified; action_t *post_notify; action_t *post_notified; int seen_count; GHashTable *meta; GHashTable *extra; GListPtr actions_before; /* action_warpper_t* */ GListPtr actions_after; /* action_warpper_t* */ }; struct ticket_s { char *id; gboolean granted; time_t last_granted; gboolean standby; GHashTable *state; }; enum pe_link_state { pe_link_not_dumped, pe_link_dumped, pe_link_dup, }; /* *INDENT-OFF* */ enum pe_ordering { pe_order_none = 0x0, /* deleted */ pe_order_optional = 0x1, /* pure ordering, nothing implied */ pe_order_implies_first = 0x10, /* If 'first' is required, ensure 'then' is too */ pe_order_implies_then = 0x20, /* If 'then' is required, ensure 'first' is too */ pe_order_implies_first_master = 0x40, /* Imply 'first' is required when 'then' is required and then's rsc holds Master role. */ pe_order_runnable_left = 0x100, /* 'then' requires 'first' to be runnable */ pe_order_restart = 0x1000, /* 'then' is runnable if 'first' is optional or runnable */ pe_order_stonith_stop = 0x2000, /* only applies if the action is non-pseudo */ pe_order_serialize_only = 0x4000, /* serialize */ pe_order_implies_first_printed = 0x10000, /* Like ..implies_first but only ensures 'first' is printed, not manditory */ pe_order_implies_then_printed = 0x20000, /* Like ..implies_then but only ensures 'then' is printed, not manditory */ pe_order_asymmetrical = 0x100000, /* Indicates asymmetrical one way ordering constraint. */ pe_order_load = 0x200000, /* Only relevant if... */ pe_order_one_or_more = 0x400000, /* 'then' is only runnable if one or more of it's dependancies are too */ pe_order_trace = 0x4000000 /* test marker */ }; /* *INDENT-ON* */ typedef struct action_wrapper_s action_wrapper_t; struct action_wrapper_s { enum pe_ordering type; enum pe_link_state state; action_t *action; }; const char *rsc_printable_id(resource_t *rsc); gboolean cluster_status(pe_working_set_t * data_set); void set_working_set_defaults(pe_working_set_t * data_set); void cleanup_calculations(pe_working_set_t * data_set); resource_t *pe_find_resource(GListPtr rsc_list, const char *id_rh); node_t *pe_find_node(GListPtr node_list, const char *uname); node_t *pe_find_node_id(GListPtr node_list, const char *id); node_t *pe_find_node_any(GListPtr node_list, const char *id, const char *uname); GListPtr find_operations(const char *rsc, const char *node, gboolean active_filter, pe_working_set_t * data_set); #endif diff --git a/lib/pengine/common.c b/lib/pengine/common.c index 8874c38ca8..6f32490ca5 100644 --- a/lib/pengine/common.c +++ b/lib/pengine/common.c @@ -1,443 +1,441 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include gboolean was_processing_error = FALSE; gboolean was_processing_warning = FALSE; static gboolean check_quorum(const char *value) { if (safe_str_eq(value, "stop")) { return TRUE; } else if (safe_str_eq(value, "freeze")) { return TRUE; } else if (safe_str_eq(value, "ignore")) { return TRUE; } else if (safe_str_eq(value, "suicide")) { return TRUE; } return FALSE; } static gboolean check_health(const char *value) { if (safe_str_eq(value, "none")) { return TRUE; } else if (safe_str_eq(value, "custom")) { return TRUE; } else if (safe_str_eq(value, "only-green")) { return TRUE; } else if (safe_str_eq(value, "progressive")) { return TRUE; } else if (safe_str_eq(value, "migrate-on-red")) { return TRUE; } return FALSE; } static gboolean check_stonith_action(const char *value) { if (safe_str_eq(value, "reboot")) { return TRUE; } else if (safe_str_eq(value, "poweroff")) { return TRUE; } else if (safe_str_eq(value, "off")) { return TRUE; } return FALSE; } static gboolean check_placement_strategy(const char *value) { if (safe_str_eq(value, "default")) { return TRUE; } else if (safe_str_eq(value, "utilization")) { return TRUE; } else if (safe_str_eq(value, "minimal")) { return TRUE; } else if (safe_str_eq(value, "balanced")) { return TRUE; } return FALSE; } /* *INDENT-OFF* */ pe_cluster_option pe_opts[] = { /* name, old-name, validate, default, description */ { "no-quorum-policy", "no_quorum_policy", "enum", "stop, freeze, ignore, suicide", "stop", &check_quorum, "What to do when the cluster does not have quorum", NULL }, { "symmetric-cluster", "symmetric_cluster", "boolean", NULL, "true", &check_boolean, "All resources can run anywhere by default", NULL }, { "default-resource-stickiness", "default_resource_stickiness", "integer", NULL, "0", &check_number, "", NULL }, { "is-managed-default", "is_managed_default", "boolean", NULL, "true", &check_boolean, "Should the cluster start/stop resources as required", NULL }, { "maintenance-mode", NULL, "boolean", NULL, "false", &check_boolean, "Should the cluster monitor resources and start/stop them as required", NULL }, { "start-failure-is-fatal", NULL, "boolean", NULL, "true", &check_boolean, "Always treat start failures as fatal", "This was the old default. However when set to FALSE, the cluster will instead use the resource's failcount and value for resource-failure-stickiness" }, { "enable-startup-probes", NULL, "boolean", NULL, "true", &check_boolean, "Should the cluster check for active resources during startup", NULL }, - { "enable-container-probes", NULL, "boolean", NULL, "true", &check_boolean, - "Should the cluster check for active resources on container nodes during startup", NULL }, /* Stonith Options */ { "stonith-enabled", "stonith_enabled", "boolean", NULL, "true", &check_boolean, "Failed nodes are STONITH'd", NULL }, { "stonith-action", "stonith_action", "enum", "reboot, poweroff, off", "reboot", &check_stonith_action, "Action to send to STONITH device", NULL }, { "stonith-timeout", NULL, "time", NULL, "60s", &check_timer, "How long to wait for the STONITH action to complete", NULL }, { "startup-fencing", "startup_fencing", "boolean", NULL, "true", &check_boolean, "STONITH unseen nodes", "Advanced Use Only! Not using the default is very unsafe!" }, /* Timeouts etc */ { "cluster-delay", "transition_idle_timeout", "time", NULL, "60s", &check_time, "Round trip delay over the network (excluding action execution)", "The \"correct\" value will depend on the speed and load of your network and cluster nodes." }, { "batch-limit", NULL, "integer", NULL, "0", &check_number, "The number of jobs that the TE is allowed to execute in parallel", "The \"correct\" value will depend on the speed and load of your network and cluster nodes." }, { "migration-limit", NULL, "integer", NULL, "-1", &check_number, "The number of migration jobs that the TE is allowed to execute in parallel on a node"}, { "default-action-timeout", "default_action_timeout", "time", NULL, "20s", &check_time, "How long to wait for actions to complete", NULL }, /* Orphans and stopping */ { "stop-all-resources", NULL, "boolean", NULL, "false", &check_boolean, "Should the cluster stop all active resources (except those needed for fencing)", NULL }, { "stop-orphan-resources", "stop_orphan_resources", "boolean", NULL, "true", &check_boolean, "Should deleted resources be stopped", NULL }, { "stop-orphan-actions", "stop_orphan_actions", "boolean", NULL, "true", &check_boolean, "Should deleted actions be cancelled", NULL }, { "remove-after-stop", "remove_after_stop", "boolean", NULL, "false", &check_boolean, "Remove resources from the LRM after they are stopped", "Always set this to false. Other values are, at best, poorly tested and potentially dangerous." }, /* { "", "", , "0", "", NULL }, */ /* Storing inputs */ { "pe-error-series-max", NULL, "integer", NULL, "-1", &check_number, "The number of PE inputs resulting in ERRORs to save", "Zero to disable, -1 to store unlimited." }, { "pe-warn-series-max", NULL, "integer", NULL, "5000", &check_number, "The number of PE inputs resulting in WARNINGs to save", "Zero to disable, -1 to store unlimited." }, { "pe-input-series-max", NULL, "integer", NULL, "4000", &check_number, "The number of other PE inputs to save", "Zero to disable, -1 to store unlimited." }, /* Node health */ { "node-health-strategy", NULL, "enum", "none, migrate-on-red, only-green, progressive, custom", "none", &check_health, "The strategy combining node attributes to determine overall node health.", "Requires external entities to create node attributes (named with the prefix '#health') with values: 'red', 'yellow' or 'green'."}, { "node-health-green", NULL, "integer", NULL, "0", &check_number, "The score 'green' translates to in rsc_location constraints", "Only used when node-health-strategy is set to custom or progressive." }, { "node-health-yellow", NULL, "integer", NULL, "0", &check_number, "The score 'yellow' translates to in rsc_location constraints", "Only used when node-health-strategy is set to custom or progressive." }, { "node-health-red", NULL, "integer", NULL, "-INFINITY", &check_number, "The score 'red' translates to in rsc_location constraints", "Only used when node-health-strategy is set to custom or progressive." }, /*Placement Strategy*/ { "placement-strategy", NULL, "enum", "default, utilization, minimal, balanced", "default", &check_placement_strategy, "The strategy to determine resource placement", NULL}, }; /* *INDENT-ON* */ void pe_metadata(void) { config_metadata("Policy Engine", "1.0", "Policy Engine Options", "This is a fake resource that details the options that can be configured for the Policy Engine.", pe_opts, DIMOF(pe_opts)); } void verify_pe_options(GHashTable * options) { verify_all_options(options, pe_opts, DIMOF(pe_opts)); } const char * pe_pref(GHashTable * options, const char *name) { return get_cluster_pref(options, pe_opts, DIMOF(pe_opts), name); } const char * fail2text(enum action_fail_response fail) { const char *result = ""; switch (fail) { case action_fail_ignore: result = "ignore"; break; case action_fail_block: result = "block"; break; case action_fail_recover: result = "recover"; break; case action_fail_migrate: result = "migrate"; break; case action_fail_stop: result = "stop"; break; case action_fail_fence: result = "fence"; break; case action_fail_standby: result = "standby"; break; case action_fail_restart_container: result = "restart-container"; break; } return result; } enum action_tasks text2task(const char *task) { if (safe_str_eq(task, CRMD_ACTION_STOP)) { return stop_rsc; } else if (safe_str_eq(task, CRMD_ACTION_STOPPED)) { return stopped_rsc; } else if (safe_str_eq(task, CRMD_ACTION_START)) { return start_rsc; } else if (safe_str_eq(task, CRMD_ACTION_STARTED)) { return started_rsc; } else if (safe_str_eq(task, CRM_OP_SHUTDOWN)) { return shutdown_crm; } else if (safe_str_eq(task, CRM_OP_FENCE)) { return stonith_node; } else if (safe_str_eq(task, CRMD_ACTION_STATUS)) { return monitor_rsc; } else if (safe_str_eq(task, CRMD_ACTION_NOTIFY)) { return action_notify; } else if (safe_str_eq(task, CRMD_ACTION_NOTIFIED)) { return action_notified; } else if (safe_str_eq(task, CRMD_ACTION_PROMOTE)) { return action_promote; } else if (safe_str_eq(task, CRMD_ACTION_DEMOTE)) { return action_demote; } else if (safe_str_eq(task, CRMD_ACTION_PROMOTED)) { return action_promoted; } else if (safe_str_eq(task, CRMD_ACTION_DEMOTED)) { return action_demoted; } #if SUPPORT_TRACING if (safe_str_eq(task, CRMD_ACTION_CANCEL)) { return no_action; } else if (safe_str_eq(task, CRMD_ACTION_DELETE)) { return no_action; } else if (safe_str_eq(task, CRMD_ACTION_STATUS)) { return no_action; } else if (safe_str_eq(task, CRM_OP_PROBED)) { return no_action; } else if (safe_str_eq(task, CRM_OP_LRM_REFRESH)) { return no_action; } else if (safe_str_eq(task, CRMD_ACTION_MIGRATE)) { return no_action; } else if (safe_str_eq(task, CRMD_ACTION_MIGRATED)) { return no_action; } else if (safe_str_eq(task, "fail")) { return no_action; } else if (safe_str_eq(task, "stonith_up")) { return no_action; } else if (safe_str_eq(task, "stonith_complete")) { return no_action; } else if (safe_str_eq(task, "all_stopped")) { return no_action; } crm_trace("Unsupported action: %s", task); #endif return no_action; } const char * task2text(enum action_tasks task) { const char *result = ""; switch (task) { case no_action: result = "no_action"; break; case stop_rsc: result = CRMD_ACTION_STOP; break; case stopped_rsc: result = CRMD_ACTION_STOPPED; break; case start_rsc: result = CRMD_ACTION_START; break; case started_rsc: result = CRMD_ACTION_STARTED; break; case shutdown_crm: result = CRM_OP_SHUTDOWN; break; case stonith_node: result = CRM_OP_FENCE; break; case monitor_rsc: result = CRMD_ACTION_STATUS; break; case action_notify: result = CRMD_ACTION_NOTIFY; break; case action_notified: result = CRMD_ACTION_NOTIFIED; break; case action_promote: result = CRMD_ACTION_PROMOTE; break; case action_promoted: result = CRMD_ACTION_PROMOTED; break; case action_demote: result = CRMD_ACTION_DEMOTE; break; case action_demoted: result = CRMD_ACTION_DEMOTED; break; } return result; } const char * role2text(enum rsc_role_e role) { CRM_CHECK(role >= RSC_ROLE_UNKNOWN, return RSC_ROLE_UNKNOWN_S); CRM_CHECK(role < RSC_ROLE_MAX, return RSC_ROLE_UNKNOWN_S); switch (role) { case RSC_ROLE_UNKNOWN: return RSC_ROLE_UNKNOWN_S; case RSC_ROLE_STOPPED: return RSC_ROLE_STOPPED_S; case RSC_ROLE_STARTED: return RSC_ROLE_STARTED_S; case RSC_ROLE_SLAVE: return RSC_ROLE_SLAVE_S; case RSC_ROLE_MASTER: return RSC_ROLE_MASTER_S; } return RSC_ROLE_UNKNOWN_S; } enum rsc_role_e text2role(const char *role) { CRM_ASSERT(role != NULL); if (safe_str_eq(role, RSC_ROLE_STOPPED_S)) { return RSC_ROLE_STOPPED; } else if (safe_str_eq(role, RSC_ROLE_STARTED_S)) { return RSC_ROLE_STARTED; } else if (safe_str_eq(role, RSC_ROLE_SLAVE_S)) { return RSC_ROLE_SLAVE; } else if (safe_str_eq(role, RSC_ROLE_MASTER_S)) { return RSC_ROLE_MASTER; } else if (safe_str_eq(role, RSC_ROLE_UNKNOWN_S)) { return RSC_ROLE_UNKNOWN; } crm_err("Unknown role: %s", role); return RSC_ROLE_UNKNOWN; } int merge_weights(int w1, int w2) { int result = w1 + w2; if (w1 <= -INFINITY || w2 <= -INFINITY) { if (w1 >= INFINITY || w2 >= INFINITY) { crm_trace("-INFINITY + INFINITY == -INFINITY"); } return -INFINITY; } else if (w1 >= INFINITY || w2 >= INFINITY) { return INFINITY; } /* detect wrap-around */ if (result > 0) { if (w1 <= 0 && w2 < 0) { result = -INFINITY; } } else if (w1 > 0 && w2 > 0) { result = INFINITY; } /* detect +/- INFINITY */ if (result >= INFINITY) { result = INFINITY; } else if (result <= -INFINITY) { result = -INFINITY; } crm_trace("%d + %d = %d", w1, w2, result); return result; } void add_hash_param(GHashTable * hash, const char *name, const char *value) { CRM_CHECK(hash != NULL, return); crm_trace("adding: name=%s value=%s", crm_str(name), crm_str(value)); if (name == NULL || value == NULL) { return; } else if (safe_str_eq(value, "#default")) { return; } else if (g_hash_table_lookup(hash, name) == NULL) { g_hash_table_insert(hash, strdup(name), strdup(value)); } } diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c index 6dffd56537..772fc91e37 100644 --- a/lib/pengine/unpack.c +++ b/lib/pengine/unpack.c @@ -1,3059 +1,3078 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include CRM_TRACE_INIT_DATA(pe_status); #define set_config_flag(data_set, option, flag) do { \ const char *tmp = pe_pref(data_set->config_hash, option); \ if(tmp) { \ if(crm_is_true(tmp)) { \ set_bit(data_set->flags, flag); \ } else { \ clear_bit(data_set->flags, flag); \ } \ } \ } while(0) gboolean unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op, enum action_fail_response *failed, pe_working_set_t * data_set); static gboolean determine_remote_online_status(node_t * this_node); static void pe_fence_node(pe_working_set_t * data_set, node_t * node, const char *reason) { CRM_CHECK(node, return); /* fence remote nodes living in a container by marking the container as failed. */ if (is_container_remote_node(node)) { resource_t *rsc = node->details->remote_rsc->container; if (is_set(rsc->flags, pe_rsc_failed) == FALSE) { crm_warn("Remote node %s will be fenced by recovering container resource %s", node->details->uname, rsc->id, reason); set_bit(rsc->flags, pe_rsc_failed); } } else if (node->details->unclean == FALSE) { if(pe_can_fence(data_set, node)) { crm_warn("Node %s will be fenced %s", node->details->uname, reason); } else { crm_warn("Node %s is unclean %s", node->details->uname, reason); } node->details->unclean = TRUE; } } gboolean unpack_config(xmlNode * config, pe_working_set_t * data_set) { const char *value = NULL; GHashTable *config_hash = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); data_set->config_hash = config_hash; unpack_instance_attributes(data_set->input, config, XML_CIB_TAG_PROPSET, NULL, config_hash, CIB_OPTIONS_FIRST, FALSE, data_set->now); verify_pe_options(data_set->config_hash); set_config_flag(data_set, "enable-startup-probes", pe_flag_startup_probes); if(is_not_set(data_set->flags, pe_flag_startup_probes)) { crm_info("Startup probes: disabled (dangerous)"); } - set_config_flag(data_set, "enable-container-probes", pe_flag_container_probes); - if(is_not_set(data_set->flags, pe_flag_container_probes)) { - crm_info("Container probes: disabled"); - } - value = pe_pref(data_set->config_hash, "stonith-timeout"); data_set->stonith_timeout = crm_get_msec(value); crm_debug("STONITH timeout: %d", data_set->stonith_timeout); set_config_flag(data_set, "stonith-enabled", pe_flag_stonith_enabled); crm_debug("STONITH of failed nodes is %s", is_set(data_set->flags, pe_flag_stonith_enabled) ? "enabled" : "disabled"); data_set->stonith_action = pe_pref(data_set->config_hash, "stonith-action"); crm_trace("STONITH will %s nodes", data_set->stonith_action); set_config_flag(data_set, "stop-all-resources", pe_flag_stop_everything); crm_debug("Stop all active resources: %s", is_set(data_set->flags, pe_flag_stop_everything) ? "true" : "false"); set_config_flag(data_set, "symmetric-cluster", pe_flag_symmetric_cluster); if (is_set(data_set->flags, pe_flag_symmetric_cluster)) { crm_debug("Cluster is symmetric" " - resources can run anywhere by default"); } value = pe_pref(data_set->config_hash, "default-resource-stickiness"); data_set->default_resource_stickiness = char2score(value); crm_debug("Default stickiness: %d", data_set->default_resource_stickiness); value = pe_pref(data_set->config_hash, "no-quorum-policy"); if (safe_str_eq(value, "ignore")) { data_set->no_quorum_policy = no_quorum_ignore; } else if (safe_str_eq(value, "freeze")) { data_set->no_quorum_policy = no_quorum_freeze; } else if (safe_str_eq(value, "suicide")) { gboolean do_panic = FALSE; crm_element_value_int(data_set->input, XML_ATTR_QUORUM_PANIC, &do_panic); if (is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE) { crm_config_err ("Setting no-quorum-policy=suicide makes no sense if stonith-enabled=false"); } if (do_panic && is_set(data_set->flags, pe_flag_stonith_enabled)) { data_set->no_quorum_policy = no_quorum_suicide; } else if (is_set(data_set->flags, pe_flag_have_quorum) == FALSE && do_panic == FALSE) { crm_notice("Resetting no-quorum-policy to 'stop': The cluster has never had quorum"); data_set->no_quorum_policy = no_quorum_stop; } } else { data_set->no_quorum_policy = no_quorum_stop; } switch (data_set->no_quorum_policy) { case no_quorum_freeze: crm_debug("On loss of CCM Quorum: Freeze resources"); break; case no_quorum_stop: crm_debug("On loss of CCM Quorum: Stop ALL resources"); break; case no_quorum_suicide: crm_notice("On loss of CCM Quorum: Fence all remaining nodes"); break; case no_quorum_ignore: crm_notice("On loss of CCM Quorum: Ignore"); break; } set_config_flag(data_set, "stop-orphan-resources", pe_flag_stop_rsc_orphans); crm_trace("Orphan resources are %s", is_set(data_set->flags, pe_flag_stop_rsc_orphans) ? "stopped" : "ignored"); set_config_flag(data_set, "stop-orphan-actions", pe_flag_stop_action_orphans); crm_trace("Orphan resource actions are %s", is_set(data_set->flags, pe_flag_stop_action_orphans) ? "stopped" : "ignored"); set_config_flag(data_set, "remove-after-stop", pe_flag_remove_after_stop); crm_trace("Stopped resources are removed from the status section: %s", is_set(data_set->flags, pe_flag_remove_after_stop) ? "true" : "false"); set_config_flag(data_set, "maintenance-mode", pe_flag_maintenance_mode); crm_trace("Maintenance mode: %s", is_set(data_set->flags, pe_flag_maintenance_mode) ? "true" : "false"); if (is_set(data_set->flags, pe_flag_maintenance_mode)) { clear_bit(data_set->flags, pe_flag_is_managed_default); } else { set_config_flag(data_set, "is-managed-default", pe_flag_is_managed_default); } crm_trace("By default resources are %smanaged", is_set(data_set->flags, pe_flag_is_managed_default) ? "" : "not "); set_config_flag(data_set, "start-failure-is-fatal", pe_flag_start_failure_fatal); crm_trace("Start failures are %s", is_set(data_set->flags, pe_flag_start_failure_fatal) ? "always fatal" : "handled by failcount"); node_score_red = char2score(pe_pref(data_set->config_hash, "node-health-red")); node_score_green = char2score(pe_pref(data_set->config_hash, "node-health-green")); node_score_yellow = char2score(pe_pref(data_set->config_hash, "node-health-yellow")); crm_debug("Node scores: 'red' = %s, 'yellow' = %s, 'green' = %s", pe_pref(data_set->config_hash, "node-health-red"), pe_pref(data_set->config_hash, "node-health-yellow"), pe_pref(data_set->config_hash, "node-health-green")); data_set->placement_strategy = pe_pref(data_set->config_hash, "placement-strategy"); crm_trace("Placement strategy: %s", data_set->placement_strategy); return TRUE; } static void destroy_digest_cache(gpointer ptr) { op_digest_cache_t *data = ptr; free_xml(data->params_all); free_xml(data->params_restart); free(data->digest_all_calc); free(data->digest_restart_calc); free(data); } static node_t * create_node(const char *id, const char *uname, const char *type, const char *score, pe_working_set_t * data_set) { node_t *new_node = NULL; if (pe_find_node(data_set->nodes, uname) != NULL) { crm_config_warn("Detected multiple node entries with uname=%s" " - this is rarely intended", uname); } new_node = calloc(1, sizeof(node_t)); if (new_node == NULL) { return NULL; } new_node->weight = char2score(score); new_node->fixed = FALSE; new_node->details = calloc(1, sizeof(struct node_shared_s)); if (new_node->details == NULL) { free(new_node); return NULL; } crm_trace("Creating node for entry %s/%s", uname, id); new_node->details->id = id; new_node->details->uname = uname; new_node->details->online = FALSE; new_node->details->shutdown = FALSE; new_node->details->running_rsc = NULL; new_node->details->type = node_ping; if (safe_str_eq(type, "remote")) { new_node->details->type = node_remote; set_bit(data_set->flags, pe_flag_have_remote_nodes); } else if (type == NULL || safe_str_eq(type, "member") || safe_str_eq(type, NORMALNODE)) { new_node->details->type = node_member; } new_node->details->attrs = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); new_node->details->utilization = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); new_node->details->digest_cache = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, destroy_digest_cache); data_set->nodes = g_list_insert_sorted(data_set->nodes, new_node, sort_node_uname); return new_node; } static const char * expand_remote_rsc_meta(xmlNode *xml_obj, xmlNode *parent, GHashTable **rsc_name_check) { xmlNode *xml_rsc = NULL; xmlNode *xml_tmp = NULL; xmlNode *attr_set = NULL; xmlNode *attr = NULL; const char *container_id = ID(xml_obj); const char *remote_name = NULL; const char *remote_server = NULL; const char *remote_port = NULL; const char *connect_timeout = "60s"; char *tmp_id = NULL; for (attr_set = __xml_first_child(xml_obj); attr_set != NULL; attr_set = __xml_next(attr_set)) { if (safe_str_neq((const char *)attr_set->name, XML_TAG_META_SETS)) { continue; } for (attr = __xml_first_child(attr_set); attr != NULL; attr = __xml_next(attr)) { const char *value = crm_element_value(attr, XML_NVPAIR_ATTR_VALUE); const char *name = crm_element_value(attr, XML_NVPAIR_ATTR_NAME); if (safe_str_eq(name, "remote-node")) { remote_name = value; } else if (safe_str_eq(name, "remote-addr")) { remote_server = value; } else if (safe_str_eq(name, "remote-port")) { remote_port = value; } else if (safe_str_eq(name, "remote-connect-timeout")) { connect_timeout = value; } } } if (remote_name == NULL) { return NULL; } if (*rsc_name_check == NULL) { *rsc_name_check = g_hash_table_new(crm_str_hash, g_str_equal); for (xml_rsc = __xml_first_child(parent); xml_rsc != NULL; xml_rsc = __xml_next(xml_rsc)) { const char *id = ID(xml_rsc); /* avoiding heap allocation here because we know the duration of this hashtable allows us to */ g_hash_table_insert(*rsc_name_check, (char *) id, (char *) id); } } if (g_hash_table_lookup(*rsc_name_check, remote_name)) { crm_err("Naming conflict with remote-node=%s. remote-nodes can not have the same name as a resource.", remote_name); return NULL; } xml_rsc = create_xml_node(parent, XML_CIB_TAG_RESOURCE); crm_xml_add(xml_rsc, XML_ATTR_ID, remote_name); crm_xml_add(xml_rsc, XML_AGENT_ATTR_CLASS, "ocf"); crm_xml_add(xml_rsc, XML_AGENT_ATTR_PROVIDER, "pacemaker"); crm_xml_add(xml_rsc, XML_ATTR_TYPE, "remote"); xml_tmp = create_xml_node(xml_rsc, XML_TAG_META_SETS); tmp_id = crm_concat(remote_name, XML_TAG_META_SETS, '_'); crm_xml_add(xml_tmp, XML_ATTR_ID, tmp_id); free(tmp_id); attr = create_xml_node(xml_tmp, XML_CIB_TAG_NVPAIR); tmp_id = crm_concat(remote_name, "meta-attributes-container", '_'); crm_xml_add(attr, XML_ATTR_ID, tmp_id); crm_xml_add(attr, XML_NVPAIR_ATTR_NAME, XML_RSC_ATTR_CONTAINER); crm_xml_add(attr, XML_NVPAIR_ATTR_VALUE, container_id); free(tmp_id); attr = create_xml_node(xml_tmp, XML_CIB_TAG_NVPAIR); tmp_id = crm_concat(remote_name, "meta-attributes-internal", '_'); crm_xml_add(attr, XML_ATTR_ID, tmp_id); crm_xml_add(attr, XML_NVPAIR_ATTR_NAME, XML_RSC_ATTR_INTERNAL_RSC); crm_xml_add(attr, XML_NVPAIR_ATTR_VALUE, "true"); free(tmp_id); xml_tmp = create_xml_node(xml_rsc, "operations"); attr = create_xml_node(xml_tmp, XML_ATTR_OP); tmp_id = crm_concat(remote_name, "monitor-interval-30s", '_'); crm_xml_add(attr, XML_ATTR_ID, tmp_id); crm_xml_add(attr, XML_ATTR_TIMEOUT, "30s"); crm_xml_add(attr, XML_LRM_ATTR_INTERVAL, "30s"); crm_xml_add(attr, XML_NVPAIR_ATTR_NAME, "monitor"); free(tmp_id); if (connect_timeout) { attr = create_xml_node(xml_tmp, XML_ATTR_OP); tmp_id = crm_concat(remote_name, "start-interval-0", '_'); crm_xml_add(attr, XML_ATTR_ID, tmp_id); crm_xml_add(attr, XML_ATTR_TIMEOUT, connect_timeout); crm_xml_add(attr, XML_LRM_ATTR_INTERVAL, "0"); crm_xml_add(attr, XML_NVPAIR_ATTR_NAME, "start"); free(tmp_id); } if (remote_port || remote_server) { xml_tmp = create_xml_node(xml_rsc, XML_TAG_ATTR_SETS); tmp_id = crm_concat(remote_name, XML_TAG_ATTR_SETS, '_'); crm_xml_add(xml_tmp, XML_ATTR_ID, tmp_id); free(tmp_id); if (remote_server) { attr = create_xml_node(xml_tmp, XML_CIB_TAG_NVPAIR); tmp_id = crm_concat(remote_name, "instance-attributes-addr", '_'); crm_xml_add(attr, XML_ATTR_ID, tmp_id); crm_xml_add(attr, XML_NVPAIR_ATTR_NAME, "addr"); crm_xml_add(attr, XML_NVPAIR_ATTR_VALUE, remote_server); free(tmp_id); } if (remote_port) { attr = create_xml_node(xml_tmp, XML_CIB_TAG_NVPAIR); tmp_id = crm_concat(remote_name, "instance-attributes-port", '_'); crm_xml_add(attr, XML_ATTR_ID, tmp_id); crm_xml_add(attr, XML_NVPAIR_ATTR_NAME, "port"); crm_xml_add(attr, XML_NVPAIR_ATTR_VALUE, remote_port); free(tmp_id); } } return remote_name; } static void handle_startup_fencing(pe_working_set_t *data_set, node_t *new_node) { static const char *blind_faith = NULL; static gboolean unseen_are_unclean = TRUE; static gboolean init_startup_fence_params = FALSE; if (init_startup_fence_params == FALSE) { blind_faith = pe_pref(data_set->config_hash, "startup-fencing"); init_startup_fence_params = TRUE; if (crm_is_true(blind_faith) == FALSE) { unseen_are_unclean = FALSE; crm_warn("Blind faith: not fencing unseen nodes"); } } if (is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE || unseen_are_unclean == FALSE) { /* blind faith... */ new_node->details->unclean = FALSE; } else { /* all nodes are unclean until we've seen their * status entry */ new_node->details->unclean = TRUE; } /* We need to be able to determine if a node's status section * exists or not separate from whether the node is unclean. */ new_node->details->unseen = TRUE; } gboolean unpack_nodes(xmlNode * xml_nodes, pe_working_set_t * data_set) { xmlNode *xml_obj = NULL; node_t *new_node = NULL; const char *id = NULL; const char *uname = NULL; const char *type = NULL; const char *score = NULL; for (xml_obj = __xml_first_child(xml_nodes); xml_obj != NULL; xml_obj = __xml_next(xml_obj)) { if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_NODE, TRUE)) { new_node = NULL; id = crm_element_value(xml_obj, XML_ATTR_ID); uname = crm_element_value(xml_obj, XML_ATTR_UNAME); type = crm_element_value(xml_obj, XML_ATTR_TYPE); score = crm_element_value(xml_obj, XML_RULE_ATTR_SCORE); crm_trace("Processing node %s/%s", uname, id); if (id == NULL) { crm_config_err("Must specify id tag in "); continue; } new_node = create_node(id, uname, type, score, data_set); if (new_node == NULL) { return FALSE; } /* if(data_set->have_quorum == FALSE */ /* && data_set->no_quorum_policy == no_quorum_stop) { */ /* /\* start shutting resources down *\/ */ /* new_node->weight = -INFINITY; */ /* } */ handle_startup_fencing(data_set, new_node); add_node_attrs(xml_obj, new_node, FALSE, data_set); unpack_instance_attributes(data_set->input, xml_obj, XML_TAG_UTILIZATION, NULL, new_node->details->utilization, NULL, FALSE, data_set->now); crm_trace("Done with node %s", crm_element_value(xml_obj, XML_ATTR_UNAME)); } } if (data_set->localhost && pe_find_node(data_set->nodes, data_set->localhost) == NULL) { crm_info("Creating a fake local node"); create_node(data_set->localhost, data_set->localhost, NULL, 0, data_set); } return TRUE; } static void g_hash_destroy_node_list(gpointer data) { GListPtr domain = data; g_list_free_full(domain, free); } gboolean unpack_domains(xmlNode * xml_domains, pe_working_set_t * data_set) { const char *id = NULL; GListPtr domain = NULL; xmlNode *xml_node = NULL; xmlNode *xml_domain = NULL; crm_debug("Unpacking domains"); data_set->domains = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_node_list); for (xml_domain = __xml_first_child(xml_domains); xml_domain != NULL; xml_domain = __xml_next(xml_domain)) { if (crm_str_eq((const char *)xml_domain->name, XML_CIB_TAG_DOMAIN, TRUE)) { domain = NULL; id = crm_element_value(xml_domain, XML_ATTR_ID); for (xml_node = __xml_first_child(xml_domain); xml_node != NULL; xml_node = __xml_next(xml_node)) { if (crm_str_eq((const char *)xml_node->name, XML_CIB_TAG_NODE, TRUE)) { node_t *copy = NULL; node_t *node = NULL; const char *uname = crm_element_value(xml_node, "name"); const char *score = crm_element_value(xml_node, XML_RULE_ATTR_SCORE); if (uname == NULL) { crm_config_err("Invalid domain %s: Must specify id tag in ", id); continue; } node = pe_find_node(data_set->nodes, uname); if (node == NULL) { node = pe_find_node_id(data_set->nodes, uname); } if (node == NULL) { crm_config_warn("Invalid domain %s: Node %s does not exist", id, uname); continue; } copy = node_copy(node); copy->weight = char2score(score); crm_debug("Adding %s to domain %s with score %s", node->details->uname, id, score); domain = g_list_prepend(domain, copy); } } if (domain) { crm_debug("Created domain %s with %d members", id, g_list_length(domain)); g_hash_table_replace(data_set->domains, strdup(id), domain); } } } return TRUE; } static void destroy_template_rsc_set(gpointer data) { xmlNode *rsc_set = data; free_xml(rsc_set); } static void setup_container(resource_t * rsc, pe_working_set_t * data_set) { const char *container_id = NULL; if (rsc->children) { GListPtr gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; setup_container(child_rsc, data_set); } return; } container_id = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_CONTAINER); if (container_id && safe_str_neq(container_id, rsc->id)) { resource_t *container = pe_find_resource(data_set->resources, container_id); if (container) { rsc->container = container; container->fillers = g_list_append(container->fillers, rsc); pe_rsc_trace(rsc, "Resource %s's container is %s", rsc->id, container_id); } else { pe_err("Resource %s: Unknown resource container (%s)", rsc->id, container_id); } } } gboolean unpack_remote_nodes(xmlNode * xml_resources, pe_working_set_t * data_set) { xmlNode *xml_obj = NULL; GHashTable *rsc_name_check = NULL; /* generate remote nodes from resource config before unpacking resources */ for (xml_obj = __xml_first_child(xml_resources); xml_obj != NULL; xml_obj = __xml_next(xml_obj)) { const char *new_node_id = NULL; /* remote rsc can be defined as primitive, or exist within the metadata of another rsc */ if (xml_contains_remote_node(xml_obj)) { new_node_id = ID(xml_obj); /* This check is here to make sure we don't iterate over * an expanded node that has already been added to the node list. */ if (new_node_id && pe_find_node(data_set->nodes, new_node_id) != NULL) { continue; } } else { /* expands a metadata defined remote resource into the xml config * as an actual rsc primitive to be unpacked later. */ new_node_id = expand_remote_rsc_meta(xml_obj, xml_resources, &rsc_name_check); } if (new_node_id) { crm_trace("detected remote node %s", new_node_id); create_node(new_node_id, new_node_id, "remote", NULL, data_set); } } if (rsc_name_check) { g_hash_table_destroy(rsc_name_check); } return TRUE; } /* Call this after all the nodes and resources have been * unpacked, but before the status section is read. * * A remote node's online status is reflected by the state * of the remote node's connection resource. We need to link * the remote node to this connection resource so we can have * easy access to the connection resource during the PE calculations. */ static void link_rsc2remotenode(pe_working_set_t *data_set, resource_t *new_rsc) { node_t *remote_node = NULL; if (new_rsc->is_remote_node == FALSE) { return; } if (is_set(data_set->flags, pe_flag_quick_location)) { /* remote_nodes and remote_resources are not linked in quick location calculations */ return; } print_resource(LOG_DEBUG_3, "Linking remote-node connection resource, ", new_rsc, FALSE); remote_node = pe_find_node(data_set->nodes, new_rsc->id); CRM_CHECK(remote_node != NULL, return;); remote_node->details->remote_rsc = new_rsc; /* If this is a baremetal remote-node (no container resource * associated with it) then we need to handle startup fencing the same way * as cluster nodes. */ if (new_rsc->container == NULL) { handle_startup_fencing(data_set, remote_node); return; } } gboolean unpack_resources(xmlNode * xml_resources, pe_working_set_t * data_set) { xmlNode *xml_obj = NULL; GListPtr gIter = NULL; data_set->template_rsc_sets = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, destroy_template_rsc_set); for (xml_obj = __xml_first_child(xml_resources); xml_obj != NULL; xml_obj = __xml_next(xml_obj)) { resource_t *new_rsc = NULL; if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_RSC_TEMPLATE, TRUE)) { const char *template_id = ID(xml_obj); if (template_id && g_hash_table_lookup_extended(data_set->template_rsc_sets, template_id, NULL, NULL) == FALSE) { /* Record the template's ID for the knowledge of its existence anyway. */ g_hash_table_insert(data_set->template_rsc_sets, strdup(template_id), NULL); } continue; } crm_trace("Beginning unpack... <%s id=%s... >", crm_element_name(xml_obj), ID(xml_obj)); if (common_unpack(xml_obj, &new_rsc, NULL, data_set)) { data_set->resources = g_list_append(data_set->resources, new_rsc); if (xml_contains_remote_node(xml_obj)) { new_rsc->is_remote_node = TRUE; } print_resource(LOG_DEBUG_3, "Added ", new_rsc, FALSE); } else { crm_config_err("Failed unpacking %s %s", crm_element_name(xml_obj), crm_element_value(xml_obj, XML_ATTR_ID)); if (new_rsc != NULL && new_rsc->fns != NULL) { new_rsc->fns->free(new_rsc); } } } for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; setup_container(rsc, data_set); link_rsc2remotenode(data_set, rsc); } data_set->resources = g_list_sort(data_set->resources, sort_rsc_priority); if (is_not_set(data_set->flags, pe_flag_quick_location) && is_set(data_set->flags, pe_flag_stonith_enabled) && is_set(data_set->flags, pe_flag_have_stonith_resource) == FALSE) { crm_config_err("Resource start-up disabled since no STONITH resources have been defined"); crm_config_err("Either configure some or disable STONITH with the stonith-enabled option"); crm_config_err("NOTE: Clusters with shared data need STONITH to ensure data integrity"); } return TRUE; } /* The ticket state section: * "/cib/status/tickets/ticket_state" */ static gboolean unpack_ticket_state(xmlNode * xml_ticket, pe_working_set_t * data_set) { const char *ticket_id = NULL; const char *granted = NULL; const char *last_granted = NULL; const char *standby = NULL; xmlAttrPtr xIter = NULL; ticket_t *ticket = NULL; ticket_id = ID(xml_ticket); if (ticket_id == NULL || strlen(ticket_id) == 0) { return FALSE; } crm_trace("Processing ticket state for %s", ticket_id); ticket = g_hash_table_lookup(data_set->tickets, ticket_id); if (ticket == NULL) { ticket = ticket_new(ticket_id, data_set); if (ticket == NULL) { return FALSE; } } for (xIter = xml_ticket->properties; xIter; xIter = xIter->next) { const char *prop_name = (const char *)xIter->name; const char *prop_value = crm_element_value(xml_ticket, prop_name); if (crm_str_eq(prop_name, XML_ATTR_ID, TRUE)) { continue; } g_hash_table_replace(ticket->state, strdup(prop_name), strdup(prop_value)); } granted = g_hash_table_lookup(ticket->state, "granted"); if (granted && crm_is_true(granted)) { ticket->granted = TRUE; crm_info("We have ticket '%s'", ticket->id); } else { ticket->granted = FALSE; crm_info("We do not have ticket '%s'", ticket->id); } last_granted = g_hash_table_lookup(ticket->state, "last-granted"); if (last_granted) { ticket->last_granted = crm_parse_int(last_granted, 0); } standby = g_hash_table_lookup(ticket->state, "standby"); if (standby && crm_is_true(standby)) { ticket->standby = TRUE; if (ticket->granted) { crm_info("Granted ticket '%s' is in standby-mode", ticket->id); } } else { ticket->standby = FALSE; } crm_trace("Done with ticket state for %s", ticket_id); return TRUE; } static gboolean unpack_tickets_state(xmlNode * xml_tickets, pe_working_set_t * data_set) { xmlNode *xml_obj = NULL; for (xml_obj = __xml_first_child(xml_tickets); xml_obj != NULL; xml_obj = __xml_next(xml_obj)) { if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_TICKET_STATE, TRUE) == FALSE) { continue; } unpack_ticket_state(xml_obj, data_set); } return TRUE; } /* Compatibility with the deprecated ticket state section: * "/cib/status/tickets/instance_attributes" */ static void get_ticket_state_legacy(gpointer key, gpointer value, gpointer user_data) { const char *long_key = key; char *state_key = NULL; const char *granted_prefix = "granted-ticket-"; const char *last_granted_prefix = "last-granted-"; static int granted_prefix_strlen = 0; static int last_granted_prefix_strlen = 0; const char *ticket_id = NULL; const char *is_granted = NULL; const char *last_granted = NULL; const char *sep = NULL; ticket_t *ticket = NULL; pe_working_set_t *data_set = user_data; if (granted_prefix_strlen == 0) { granted_prefix_strlen = strlen(granted_prefix); } if (last_granted_prefix_strlen == 0) { last_granted_prefix_strlen = strlen(last_granted_prefix); } if (strstr(long_key, granted_prefix) == long_key) { ticket_id = long_key + granted_prefix_strlen; if (strlen(ticket_id)) { state_key = strdup("granted"); is_granted = value; } } else if (strstr(long_key, last_granted_prefix) == long_key) { ticket_id = long_key + last_granted_prefix_strlen; if (strlen(ticket_id)) { state_key = strdup("last-granted"); last_granted = value; } } else if ((sep = strrchr(long_key, '-'))) { ticket_id = sep + 1; state_key = strndup(long_key, strlen(long_key) - strlen(sep)); } if (ticket_id == NULL || strlen(ticket_id) == 0) { free(state_key); return; } if (state_key == NULL || strlen(state_key) == 0) { free(state_key); return; } ticket = g_hash_table_lookup(data_set->tickets, ticket_id); if (ticket == NULL) { ticket = ticket_new(ticket_id, data_set); if (ticket == NULL) { free(state_key); return; } } g_hash_table_replace(ticket->state, state_key, strdup(value)); if (is_granted) { if (crm_is_true(is_granted)) { ticket->granted = TRUE; crm_info("We have ticket '%s'", ticket->id); } else { ticket->granted = FALSE; crm_info("We do not have ticket '%s'", ticket->id); } } else if (last_granted) { ticket->last_granted = crm_parse_int(last_granted, 0); } } /* remove nodes that are down, stopping */ /* create +ve rsc_to_node constraints between resources and the nodes they are running on */ /* anything else? */ gboolean unpack_status(xmlNode * status, pe_working_set_t * data_set) { const char *id = NULL; const char *uname = NULL; xmlNode *state = NULL; xmlNode *lrm_rsc = NULL; node_t *this_node = NULL; crm_trace("Beginning unpack"); if (data_set->tickets == NULL) { data_set->tickets = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, destroy_ticket); } for (state = __xml_first_child(status); state != NULL; state = __xml_next(state)) { if (crm_str_eq((const char *)state->name, XML_CIB_TAG_TICKETS, TRUE)) { xmlNode *xml_tickets = state; GHashTable *state_hash = NULL; /* Compatibility with the deprecated ticket state section: * Unpack the attributes in the deprecated "/cib/status/tickets/instance_attributes" if it exists. */ state_hash = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); unpack_instance_attributes(data_set->input, xml_tickets, XML_TAG_ATTR_SETS, NULL, state_hash, NULL, TRUE, data_set->now); g_hash_table_foreach(state_hash, get_ticket_state_legacy, data_set); if (state_hash) { g_hash_table_destroy(state_hash); } /* Unpack the new "/cib/status/tickets/ticket_state"s */ unpack_tickets_state(xml_tickets, data_set); } if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE)) { xmlNode *attrs = NULL; id = crm_element_value(state, XML_ATTR_ID); uname = crm_element_value(state, XML_ATTR_UNAME); this_node = pe_find_node_any(data_set->nodes, id, uname); if (uname == NULL) { /* error */ continue; } else if (this_node == NULL) { crm_config_warn("Node %s in status section no longer exists", uname); continue; } else if (is_remote_node(this_node)) { /* online state for remote nodes is determined by the rsc state * after all the unpacking is done. */ continue; } crm_trace("Processing node id=%s, uname=%s", id, uname); /* Mark the node as provisionally clean * - at least we have seen it in the current cluster's lifetime */ this_node->details->unclean = FALSE; this_node->details->unseen = FALSE; attrs = find_xml_node(state, XML_TAG_TRANSIENT_NODEATTRS, FALSE); add_node_attrs(attrs, this_node, TRUE, data_set); if (crm_is_true(g_hash_table_lookup(this_node->details->attrs, "standby"))) { crm_info("Node %s is in standby-mode", this_node->details->uname); this_node->details->standby = TRUE; } if (crm_is_true(g_hash_table_lookup(this_node->details->attrs, "maintenance"))) { crm_info("Node %s is in maintenance-mode", this_node->details->uname); this_node->details->maintenance = TRUE; } crm_trace("determining node state"); determine_online_status(state, this_node, data_set); if (this_node->details->online && data_set->no_quorum_policy == no_quorum_suicide) { /* Everything else should flow from this automatically * At least until the PE becomes able to migrate off healthy resources */ pe_fence_node(data_set, this_node, "because the cluster does not have quorum"); } } } /* Now that we know all node states, we can safely handle migration ops */ for (state = __xml_first_child(status); state != NULL; state = __xml_next(state)) { if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE) == FALSE) { continue; } id = crm_element_value(state, XML_ATTR_ID); uname = crm_element_value(state, XML_ATTR_UNAME); this_node = pe_find_node_any(data_set->nodes, id, uname); if (this_node == NULL) { crm_info("Node %s is unknown", id); continue; } else if (is_remote_node(this_node)) { /* online status of remote node can not be determined until all other * resource status is unpacked. */ continue; } else if (this_node->details->online || is_set(data_set->flags, pe_flag_stonith_enabled)) { crm_trace("Processing lrm resource entries on healthy node: %s", this_node->details->uname); lrm_rsc = find_xml_node(state, XML_CIB_TAG_LRM, FALSE); lrm_rsc = find_xml_node(lrm_rsc, XML_LRM_TAG_RESOURCES, FALSE); unpack_lrm_resources(this_node, lrm_rsc, data_set); } } /* now that the rest of the cluster's status is determined * calculate remote-nodes */ unpack_remote_status(status, data_set); return TRUE; } gboolean unpack_remote_status(xmlNode * status, pe_working_set_t * data_set) { const char *id = NULL; const char *uname = NULL; GListPtr gIter = NULL; xmlNode *state = NULL; xmlNode *lrm_rsc = NULL; node_t *this_node = NULL; if (is_set(data_set->flags, pe_flag_have_remote_nodes) == FALSE) { crm_trace("no remote nodes to unpack"); return TRUE; } /* get online status */ for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { this_node = gIter->data; if ((this_node == NULL) || (is_remote_node(this_node) == FALSE)) { continue; } determine_remote_online_status(this_node); } /* process attributes */ for (state = __xml_first_child(status); state != NULL; state = __xml_next(state)) { xmlNode *attrs = NULL; if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE) == FALSE) { continue; } id = crm_element_value(state, XML_ATTR_ID); uname = crm_element_value(state, XML_ATTR_UNAME); this_node = pe_find_node_any(data_set->nodes, id, uname); if ((this_node == NULL) || (is_remote_node(this_node) == FALSE)) { continue; } crm_trace("Processing remote node id=%s, uname=%s", id, uname); this_node->details->unclean = FALSE; this_node->details->unseen = FALSE; attrs = find_xml_node(state, XML_TAG_TRANSIENT_NODEATTRS, FALSE); add_node_attrs(attrs, this_node, TRUE, data_set); if (crm_is_true(g_hash_table_lookup(this_node->details->attrs, "standby"))) { crm_info("Node %s is in standby-mode", this_node->details->uname); this_node->details->standby = TRUE; } } /* process node rsc status */ for (state = __xml_first_child(status); state != NULL; state = __xml_next(state)) { if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE) == FALSE) { continue; } id = crm_element_value(state, XML_ATTR_ID); uname = crm_element_value(state, XML_ATTR_UNAME); this_node = pe_find_node_any(data_set->nodes, id, uname); if ((this_node == NULL) || (is_remote_node(this_node) == FALSE)) { continue; } crm_trace("Processing lrm resource entries on healthy remote node: %s", this_node->details->uname); lrm_rsc = find_xml_node(state, XML_CIB_TAG_LRM, FALSE); lrm_rsc = find_xml_node(lrm_rsc, XML_LRM_TAG_RESOURCES, FALSE); unpack_lrm_resources(this_node, lrm_rsc, data_set); } return TRUE; } static gboolean determine_online_status_no_fencing(pe_working_set_t * data_set, xmlNode * node_state, node_t * this_node) { gboolean online = FALSE; const char *join = crm_element_value(node_state, XML_NODE_JOIN_STATE); const char *is_peer = crm_element_value(node_state, XML_NODE_IS_PEER); const char *in_cluster = crm_element_value(node_state, XML_NODE_IN_CLUSTER); const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED); if (!crm_is_true(in_cluster)) { crm_trace("Node is down: in_cluster=%s", crm_str(in_cluster)); } else if (safe_str_eq(is_peer, ONLINESTATUS)) { if (safe_str_eq(join, CRMD_JOINSTATE_MEMBER)) { online = TRUE; } else { crm_debug("Node is not ready to run resources: %s", join); } } else if (this_node->details->expected_up == FALSE) { crm_trace("CRMd is down: in_cluster=%s", crm_str(in_cluster)); crm_trace("\tis_peer=%s, join=%s, expected=%s", crm_str(is_peer), crm_str(join), crm_str(exp_state)); } else { /* mark it unclean */ pe_fence_node(data_set, this_node, "because it is partially and/or un-expectedly down"); crm_info("\tin_cluster=%s, is_peer=%s, join=%s, expected=%s", crm_str(in_cluster), crm_str(is_peer), crm_str(join), crm_str(exp_state)); } return online; } static gboolean determine_online_status_fencing(pe_working_set_t * data_set, xmlNode * node_state, node_t * this_node) { gboolean online = FALSE; gboolean do_terminate = FALSE; const char *join = crm_element_value(node_state, XML_NODE_JOIN_STATE); const char *is_peer = crm_element_value(node_state, XML_NODE_IS_PEER); const char *in_cluster = crm_element_value(node_state, XML_NODE_IN_CLUSTER); const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED); const char *terminate = g_hash_table_lookup(this_node->details->attrs, "terminate"); /* - XML_NODE_IN_CLUSTER ::= true|false - XML_NODE_IS_PEER ::= true|false|online|offline - XML_NODE_JOIN_STATE ::= member|down|pending|banned - XML_NODE_EXPECTED ::= member|down */ if (crm_is_true(terminate)) { do_terminate = TRUE; } else if (terminate != NULL && strlen(terminate) > 0) { /* could be a time() value */ char t = terminate[0]; if (t != '0' && isdigit(t)) { do_terminate = TRUE; } } crm_trace("%s: in_cluster=%s, is_peer=%s, join=%s, expected=%s, term=%d", this_node->details->uname, crm_str(in_cluster), crm_str(is_peer), crm_str(join), crm_str(exp_state), do_terminate); online = crm_is_true(in_cluster); if (safe_str_eq(is_peer, ONLINESTATUS)) { is_peer = XML_BOOLEAN_YES; } if (exp_state == NULL) { exp_state = CRMD_JOINSTATE_DOWN; } if (this_node->details->shutdown) { crm_debug("%s is shutting down", this_node->details->uname); online = crm_is_true(is_peer); /* Slightly different criteria since we cant shut down a dead peer */ } else if (in_cluster == NULL) { pe_fence_node(data_set, this_node, "because the peer has not been seen by the cluster"); } else if (safe_str_eq(join, CRMD_JOINSTATE_NACK)) { pe_fence_node(data_set, this_node, "because it failed the pacemaker membership criteria"); } else if (do_terminate == FALSE && safe_str_eq(exp_state, CRMD_JOINSTATE_DOWN)) { if (crm_is_true(in_cluster) || crm_is_true(is_peer)) { crm_info("- Node %s is not ready to run resources", this_node->details->uname); this_node->details->standby = TRUE; this_node->details->pending = TRUE; } else { crm_trace("%s is down or still coming up", this_node->details->uname); } } else if (do_terminate && safe_str_eq(join, CRMD_JOINSTATE_DOWN) && crm_is_true(in_cluster) == FALSE && crm_is_true(is_peer) == FALSE) { crm_info("Node %s was just shot", this_node->details->uname); online = FALSE; } else if (crm_is_true(in_cluster) == FALSE) { pe_fence_node(data_set, this_node, "because the node is no longer part of the cluster"); } else if (crm_is_true(is_peer) == FALSE) { pe_fence_node(data_set, this_node, "because our peer process is no longer available"); /* Everything is running at this point, now check join state */ } else if (do_terminate) { pe_fence_node(data_set, this_node, "because termination was requested"); } else if (safe_str_eq(join, CRMD_JOINSTATE_MEMBER)) { crm_info("Node %s is active", this_node->details->uname); } else if (safe_str_eq(join, CRMD_JOINSTATE_PENDING) || safe_str_eq(join, CRMD_JOINSTATE_DOWN)) { crm_info("Node %s is not ready to run resources", this_node->details->uname); this_node->details->standby = TRUE; this_node->details->pending = TRUE; } else { pe_fence_node(data_set, this_node, "because the peer was in an unknown state"); crm_warn("%s: in-cluster=%s, is-peer=%s, join=%s, expected=%s, term=%d, shutdown=%d", this_node->details->uname, crm_str(in_cluster), crm_str(is_peer), crm_str(join), crm_str(exp_state), do_terminate, this_node->details->shutdown); } return online; } static gboolean determine_remote_online_status(node_t * this_node) { resource_t *rsc = this_node->details->remote_rsc; resource_t *container = rsc->container; CRM_ASSERT(rsc != NULL); /* If the resource is currently started, mark it online. */ if (rsc->role == RSC_ROLE_STARTED) { crm_trace("Remote node %s is set to ONLINE. role == started", this_node->details->id); this_node->details->online = TRUE; } /* consider this node shutting down if transitioning start->stop */ if (rsc->role == RSC_ROLE_STARTED && rsc->next_role == RSC_ROLE_STOPPED) { crm_trace("Remote node %s shutdown. transition from start to stop role", this_node->details->id); this_node->details->shutdown = TRUE; } /* Now check all the failure conditions. */ if (is_set(rsc->flags, pe_rsc_failed) || (rsc->role == RSC_ROLE_STOPPED) || (container && is_set(container->flags, pe_rsc_failed)) || (container && container->role == RSC_ROLE_STOPPED)) { crm_trace("Remote node %s is set to OFFLINE. node is stopped or rsc failed.", this_node->details->id); this_node->details->online = FALSE; } crm_trace("Remote node %s online=%s", this_node->details->id, this_node->details->online ? "TRUE" : "FALSE"); return this_node->details->online; } gboolean determine_online_status(xmlNode * node_state, node_t * this_node, pe_working_set_t * data_set) { gboolean online = FALSE; const char *shutdown = NULL; const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED); if (this_node == NULL) { crm_config_err("No node to check"); return online; } this_node->details->shutdown = FALSE; this_node->details->expected_up = FALSE; shutdown = g_hash_table_lookup(this_node->details->attrs, XML_CIB_ATTR_SHUTDOWN); if (shutdown != NULL && safe_str_neq("0", shutdown)) { this_node->details->shutdown = TRUE; } else if (safe_str_eq(exp_state, CRMD_JOINSTATE_MEMBER)) { this_node->details->expected_up = TRUE; } if (this_node->details->type == node_ping) { this_node->details->unclean = FALSE; online = FALSE; /* As far as resource management is concerned, * the node is safely offline. * Anyone caught abusing this logic will be shot */ } else if (is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE) { online = determine_online_status_no_fencing(data_set, node_state, this_node); } else { online = determine_online_status_fencing(data_set, node_state, this_node); } if (online) { this_node->details->online = TRUE; } else { /* remove node from contention */ this_node->fixed = TRUE; this_node->weight = -INFINITY; } if (online && this_node->details->shutdown) { /* dont run resources here */ this_node->fixed = TRUE; this_node->weight = -INFINITY; } if (this_node->details->type == node_ping) { crm_info("Node %s is not a pacemaker node", this_node->details->uname); } else if (this_node->details->unclean) { pe_proc_warn("Node %s is unclean", this_node->details->uname); } else if (this_node->details->online) { crm_info("Node %s is %s", this_node->details->uname, this_node->details->shutdown ? "shutting down" : this_node->details->pending ? "pending" : this_node->details->standby ? "standby" : this_node->details->maintenance ? "maintenance" : "online"); } else { crm_trace("Node %s is offline", this_node->details->uname); } return online; } char * clone_strip(const char *last_rsc_id) { int lpc = 0; char *zero = NULL; CRM_CHECK(last_rsc_id != NULL, return NULL); lpc = strlen(last_rsc_id); while (--lpc > 0) { switch (last_rsc_id[lpc]) { case 0: crm_err("Empty string: %s", last_rsc_id); return NULL; break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': break; case ':': zero = calloc(1, lpc + 1); memcpy(zero, last_rsc_id, lpc); zero[lpc] = 0; return zero; default: goto done; } } done: zero = strdup(last_rsc_id); return zero; } char * clone_zero(const char *last_rsc_id) { int lpc = 0; char *zero = NULL; CRM_CHECK(last_rsc_id != NULL, return NULL); if (last_rsc_id != NULL) { lpc = strlen(last_rsc_id); } while (--lpc > 0) { switch (last_rsc_id[lpc]) { case 0: return NULL; break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': break; case ':': zero = calloc(1, lpc + 3); memcpy(zero, last_rsc_id, lpc); zero[lpc] = ':'; zero[lpc + 1] = '0'; zero[lpc + 2] = 0; return zero; default: goto done; } } done: lpc = strlen(last_rsc_id); zero = calloc(1, lpc + 3); memcpy(zero, last_rsc_id, lpc); zero[lpc] = ':'; zero[lpc + 1] = '0'; zero[lpc + 2] = 0; crm_trace("%s -> %s", last_rsc_id, zero); return zero; } static resource_t * create_fake_resource(const char *rsc_id, xmlNode * rsc_entry, pe_working_set_t * data_set) { resource_t *rsc = NULL; xmlNode *xml_rsc = create_xml_node(NULL, XML_CIB_TAG_RESOURCE); copy_in_properties(xml_rsc, rsc_entry); crm_xml_add(xml_rsc, XML_ATTR_ID, rsc_id); crm_log_xml_debug(xml_rsc, "Orphan resource"); if (!common_unpack(xml_rsc, &rsc, NULL, data_set)) { return NULL; } if (xml_contains_remote_node(xml_rsc)) { node_t *node; crm_debug("Detected orphaned remote node %s", rsc_id); rsc->is_remote_node = TRUE; node = create_node(rsc_id, rsc_id, "remote", NULL, data_set); link_rsc2remotenode(data_set, rsc); if (node) { crm_trace("Setting node %s as shutting down due to orphaned connection resource", rsc_id); node->details->shutdown = TRUE; } } if (crm_element_value(rsc_entry, XML_RSC_ATTR_CONTAINER)) { /* This orphaned rsc needs to be mapped to a container. */ crm_trace("Detected orphaned container filler %s", rsc_id); set_bit(rsc->flags, pe_rsc_orphan_container_filler); } set_bit(rsc->flags, pe_rsc_orphan); data_set->resources = g_list_append(data_set->resources, rsc); return rsc; } extern resource_t *create_child_clone(resource_t * rsc, int sub_id, pe_working_set_t * data_set); static resource_t * find_anonymous_clone(pe_working_set_t * data_set, node_t * node, resource_t * parent, const char *rsc_id) { GListPtr rIter = NULL; resource_t *rsc = NULL; gboolean skip_inactive = FALSE; CRM_ASSERT(parent != NULL); CRM_ASSERT(parent->variant == pe_clone || parent->variant == pe_master); CRM_ASSERT(is_not_set(parent->flags, pe_rsc_unique)); /* Find an instance active (or partially active for grouped clones) on the specified node */ pe_rsc_trace(parent, "Looking for %s on %s in %s", rsc_id, node->details->uname, parent->id); for (rIter = parent->children; rsc == NULL && rIter; rIter = rIter->next) { GListPtr nIter = NULL; GListPtr locations = NULL; resource_t *child = rIter->data; child->fns->location(child, &locations, TRUE); if (locations == NULL) { pe_rsc_trace(child, "Resource %s, skip inactive", child->id); continue; } for (nIter = locations; nIter && rsc == NULL; nIter = nIter->next) { node_t *childnode = nIter->data; if (childnode->details == node->details) { /* ->find_rsc() because we might be a cloned group */ rsc = parent->fns->find_rsc(child, rsc_id, NULL, pe_find_clone); pe_rsc_trace(rsc, "Resource %s, active", rsc->id); } /* Keep this block, it means we'll do the right thing if * anyone toggles the unique flag to 'off' */ if (rsc && rsc->running_on) { crm_notice("/Anonymous/ clone %s is already running on %s", parent->id, node->details->uname); skip_inactive = TRUE; rsc = NULL; } } g_list_free(locations); } /* Find an inactive instance */ if (skip_inactive == FALSE) { pe_rsc_trace(parent, "Looking for %s anywhere", rsc_id); for (rIter = parent->children; rsc == NULL && rIter; rIter = rIter->next) { GListPtr locations = NULL; resource_t *child = rIter->data; if (is_set(child->flags, pe_rsc_block)) { pe_rsc_trace(child, "Skip: blocked in stopped state"); continue; } child->fns->location(child, &locations, TRUE); if (locations == NULL) { /* ->find_rsc() because we might be a cloned group */ rsc = parent->fns->find_rsc(child, rsc_id, NULL, pe_find_clone); pe_rsc_trace(parent, "Resource %s, empty slot", rsc->id); } g_list_free(locations); } } if (rsc == NULL) { /* Create an extra orphan */ resource_t *top = create_child_clone(parent, -1, data_set); /* ->find_rsc() because we might be a cloned group */ rsc = top->fns->find_rsc(top, rsc_id, NULL, pe_find_clone); CRM_ASSERT(rsc != NULL); pe_rsc_debug(parent, "Created orphan %s for %s: %s on %s", top->id, parent->id, rsc_id, node->details->uname); } if (safe_str_neq(rsc_id, rsc->id)) { pe_rsc_debug(rsc, "Internally renamed %s on %s to %s%s", rsc_id, node->details->uname, rsc->id, is_set(rsc->flags, pe_rsc_orphan) ? " (ORPHAN)" : ""); } return rsc; } static resource_t * unpack_find_resource(pe_working_set_t * data_set, node_t * node, const char *rsc_id, xmlNode * rsc_entry) { resource_t *rsc = NULL; resource_t *parent = NULL; crm_trace("looking for %s", rsc_id); rsc = pe_find_resource(data_set->resources, rsc_id); /* no match */ if (rsc == NULL) { /* Even when clone-max=0, we still create a single :0 orphan to match against */ char *tmp = clone_zero(rsc_id); resource_t *clone0 = pe_find_resource(data_set->resources, tmp); if (clone0 && is_not_set(clone0->flags, pe_rsc_unique)) { rsc = clone0; } else { crm_trace("%s is not known as %s either", rsc_id, tmp); } parent = uber_parent(clone0); free(tmp); crm_trace("%s not found: %s", rsc_id, parent ? parent->id : "orphan"); } else if (rsc->variant > pe_native) { crm_trace("%s is no longer a primitve resource, the lrm_resource entry is obsolete", rsc_id); return NULL; } else { parent = uber_parent(rsc); } if (parent && parent->variant > pe_group) { if (is_not_set(parent->flags, pe_rsc_unique)) { char *base = clone_strip(rsc_id); rsc = find_anonymous_clone(data_set, node, parent, base); CRM_ASSERT(rsc != NULL); free(base); } if (rsc && safe_str_neq(rsc_id, rsc->id)) { free(rsc->clone_name); rsc->clone_name = strdup(rsc_id); } } return rsc; } static resource_t * process_orphan_resource(xmlNode * rsc_entry, node_t * node, pe_working_set_t * data_set) { resource_t *rsc = NULL; const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID); crm_debug("Detected orphan resource %s on %s", rsc_id, node->details->uname); rsc = create_fake_resource(rsc_id, rsc_entry, data_set); if (is_set(data_set->flags, pe_flag_stop_rsc_orphans) == FALSE) { clear_bit(rsc->flags, pe_rsc_managed); } else { GListPtr gIter = NULL; print_resource(LOG_DEBUG_3, "Added orphan", rsc, FALSE); CRM_CHECK(rsc != NULL, return NULL); resource_location(rsc, NULL, -INFINITY, "__orphan_dont_run__", data_set); for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; if (node->details->online && get_failcount(node, rsc, NULL, data_set)) { action_t *clear_op = NULL; action_t *ready = NULL; if (is_remote_node(node)) { char *pseudo_op_name = crm_concat(CRM_OP_PROBED, node->details->id, '_'); ready = get_pseudo_op(pseudo_op_name, data_set); free(pseudo_op_name); } else { ready = get_pseudo_op(CRM_OP_PROBED, data_set); } clear_op = custom_action(rsc, crm_concat(rsc->id, CRM_OP_CLEAR_FAILCOUNT, '_'), CRM_OP_CLEAR_FAILCOUNT, node, FALSE, TRUE, data_set); add_hash_param(clear_op->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE); pe_rsc_info(rsc, "Clearing failcount (%d) for orphaned resource %s on %s (%s)", get_failcount(node, rsc, NULL, data_set), rsc->id, node->details->uname, clear_op->uuid); order_actions(clear_op, ready, pe_order_optional); } } } return rsc; } static void process_rsc_state(resource_t * rsc, node_t * node, enum action_fail_response on_fail, xmlNode * migrate_op, pe_working_set_t * data_set) { pe_rsc_trace(rsc, "Resource %s is %s on %s: on_fail=%s", rsc->id, role2text(rsc->role), node->details->uname, fail2text(on_fail)); /* process current state */ if (rsc->role != RSC_ROLE_UNKNOWN) { resource_t *iter = rsc; while (iter) { if (g_hash_table_lookup(iter->known_on, node->details->id) == NULL) { node_t *n = node_copy(node); pe_rsc_trace(rsc, "%s (aka. %s) known on %s", rsc->id, rsc->clone_name, n->details->uname); g_hash_table_insert(iter->known_on, (gpointer) n->details->id, n); } if (is_set(iter->flags, pe_rsc_unique)) { break; } iter = iter->parent; } } if (rsc->role > RSC_ROLE_STOPPED && node->details->online == FALSE && is_set(rsc->flags, pe_rsc_managed)) { gboolean should_fence = FALSE; /* if this is a remote_node living in a container, fence the container * by recovering it. Mark the resource as unmanaged. Once the container * and remote connenction are re-established, the status section will * get reset in the crmd freeing up this resource to run again once we * are sure we know the resources state. */ if (is_container_remote_node(node)) { set_bit(rsc->flags, pe_rsc_failed); should_fence = TRUE; } else if (is_set(data_set->flags, pe_flag_stonith_enabled)) { should_fence = TRUE; } if (should_fence) { char *reason = g_strdup_printf("because %s is thought to be active there", rsc->id); pe_fence_node(data_set, node, reason); g_free(reason); } } if (node->details->unclean) { /* No extra processing needed * Also allows resources to be started again after a node is shot */ on_fail = action_fail_ignore; } switch (on_fail) { case action_fail_ignore: /* nothing to do */ break; case action_fail_fence: /* treat it as if it is still running * but also mark the node as unclean */ pe_fence_node(data_set, node, "because of resource failure(s)"); break; case action_fail_standby: node->details->standby = TRUE; node->details->standby_onfail = TRUE; break; case action_fail_block: /* is_managed == FALSE will prevent any * actions being sent for the resource */ clear_bit(rsc->flags, pe_rsc_managed); set_bit(rsc->flags, pe_rsc_block); break; case action_fail_migrate: /* make sure it comes up somewhere else * or not at all */ resource_location(rsc, node, -INFINITY, "__action_migration_auto__", data_set); break; case action_fail_stop: rsc->next_role = RSC_ROLE_STOPPED; break; case action_fail_recover: if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) { set_bit(rsc->flags, pe_rsc_failed); stop_action(rsc, node, FALSE); } break; case action_fail_restart_container: set_bit(rsc->flags, pe_rsc_failed); if (rsc->container) { stop_action(rsc->container, node, FALSE); } else if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) { stop_action(rsc, node, FALSE); } break; } if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) { if (is_set(rsc->flags, pe_rsc_orphan)) { if (is_set(rsc->flags, pe_rsc_managed)) { crm_config_warn("Detected active orphan %s running on %s", rsc->id, node->details->uname); } else { crm_config_warn("Cluster configured not to stop active orphans." " %s must be stopped manually on %s", rsc->id, node->details->uname); } } native_add_running(rsc, node, data_set); if (on_fail != action_fail_ignore) { set_bit(rsc->flags, pe_rsc_failed); } } else if (rsc->clone_name && strchr(rsc->clone_name, ':') != NULL) { /* Only do this for older status sections that included instance numbers * Otherwise stopped instances will appear as orphans */ pe_rsc_trace(rsc, "Resetting clone_name %s for %s (stopped)", rsc->clone_name, rsc->id); free(rsc->clone_name); rsc->clone_name = NULL; } else { char *key = stop_key(rsc); GListPtr possible_matches = find_actions(rsc->actions, key, node); GListPtr gIter = possible_matches; for (; gIter != NULL; gIter = gIter->next) { action_t *stop = (action_t *) gIter->data; stop->flags |= pe_action_optional; } free(key); } } /* create active recurring operations as optional */ static void process_recurring(node_t * node, resource_t * rsc, int start_index, int stop_index, GListPtr sorted_op_list, pe_working_set_t * data_set) { int counter = -1; const char *task = NULL; const char *status = NULL; GListPtr gIter = sorted_op_list; pe_rsc_trace(rsc, "%s: Start index %d, stop index = %d", rsc->id, start_index, stop_index); for (; gIter != NULL; gIter = gIter->next) { xmlNode *rsc_op = (xmlNode *) gIter->data; int interval = 0; char *key = NULL; const char *id = ID(rsc_op); const char *interval_s = NULL; counter++; if (node->details->online == FALSE) { pe_rsc_trace(rsc, "Skipping %s/%s: node is offline", rsc->id, node->details->uname); break; /* Need to check if there's a monitor for role="Stopped" */ } else if (start_index < stop_index && counter <= stop_index) { pe_rsc_trace(rsc, "Skipping %s/%s: resource is not active", id, node->details->uname); continue; } else if (counter < start_index) { pe_rsc_trace(rsc, "Skipping %s/%s: old %d", id, node->details->uname, counter); continue; } interval_s = crm_element_value(rsc_op, XML_LRM_ATTR_INTERVAL); interval = crm_parse_int(interval_s, "0"); if (interval == 0) { pe_rsc_trace(rsc, "Skipping %s/%s: non-recurring", id, node->details->uname); continue; } status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS); if (safe_str_eq(status, "-1")) { pe_rsc_trace(rsc, "Skipping %s/%s: status", id, node->details->uname); continue; } task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); /* create the action */ key = generate_op_key(rsc->id, task, interval); pe_rsc_trace(rsc, "Creating %s/%s", key, node->details->uname); custom_action(rsc, key, task, node, TRUE, TRUE, data_set); } } void calculate_active_ops(GListPtr sorted_op_list, int *start_index, int *stop_index) { int counter = -1; int implied_monitor_start = -1; int implied_master_start = -1; const char *task = NULL; const char *status = NULL; GListPtr gIter = sorted_op_list; *stop_index = -1; *start_index = -1; for (; gIter != NULL; gIter = gIter->next) { xmlNode *rsc_op = (xmlNode *) gIter->data; counter++; task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS); if (safe_str_eq(task, CRMD_ACTION_STOP) && safe_str_eq(status, "0")) { *stop_index = counter; } else if (safe_str_eq(task, CRMD_ACTION_START) || safe_str_eq(task, CRMD_ACTION_MIGRATED)) { *start_index = counter; } else if ((implied_monitor_start <= *stop_index) && safe_str_eq(task, CRMD_ACTION_STATUS)) { const char *rc = crm_element_value(rsc_op, XML_LRM_ATTR_RC); if (safe_str_eq(rc, "0") || safe_str_eq(rc, "8")) { implied_monitor_start = counter; } } else if (safe_str_eq(task, CRMD_ACTION_PROMOTE) || safe_str_eq(task, CRMD_ACTION_DEMOTE)) { implied_master_start = counter; } } if (*start_index == -1) { if (implied_master_start != -1) { *start_index = implied_master_start; } else if (implied_monitor_start != -1) { *start_index = implied_monitor_start; } } } static resource_t * unpack_lrm_rsc_state(node_t * node, xmlNode * rsc_entry, pe_working_set_t * data_set) { GListPtr gIter = NULL; int stop_index = -1; int start_index = -1; enum rsc_role_e req_role = RSC_ROLE_UNKNOWN; const char *task = NULL; const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID); resource_t *rsc = NULL; GListPtr op_list = NULL; GListPtr sorted_op_list = NULL; xmlNode *migrate_op = NULL; xmlNode *rsc_op = NULL; enum action_fail_response on_fail = FALSE; enum rsc_role_e saved_role = RSC_ROLE_UNKNOWN; crm_trace("[%s] Processing %s on %s", crm_element_name(rsc_entry), rsc_id, node->details->uname); /* extract operations */ op_list = NULL; sorted_op_list = NULL; for (rsc_op = __xml_first_child(rsc_entry); rsc_op != NULL; rsc_op = __xml_next(rsc_op)) { if (crm_str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, TRUE)) { op_list = g_list_prepend(op_list, rsc_op); } } if (op_list == NULL) { /* if there are no operations, there is nothing to do */ return NULL; } /* find the resource */ rsc = unpack_find_resource(data_set, node, rsc_id, rsc_entry); if (rsc == NULL) { rsc = process_orphan_resource(rsc_entry, node, data_set); } CRM_ASSERT(rsc != NULL); /* process operations */ saved_role = rsc->role; on_fail = action_fail_ignore; rsc->role = RSC_ROLE_UNKNOWN; sorted_op_list = g_list_sort(op_list, sort_op_by_callid); for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) { xmlNode *rsc_op = (xmlNode *) gIter->data; task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); if (safe_str_eq(task, CRMD_ACTION_MIGRATED)) { migrate_op = rsc_op; } unpack_rsc_op(rsc, node, rsc_op, &on_fail, data_set); } /* create active recurring operations as optional */ calculate_active_ops(sorted_op_list, &start_index, &stop_index); process_recurring(node, rsc, start_index, stop_index, sorted_op_list, data_set); /* no need to free the contents */ g_list_free(sorted_op_list); process_rsc_state(rsc, node, on_fail, migrate_op, data_set); if (get_target_role(rsc, &req_role)) { if (rsc->next_role == RSC_ROLE_UNKNOWN || req_role < rsc->next_role) { pe_rsc_debug(rsc, "%s: Overwriting calculated next role %s" " with requested next role %s", rsc->id, role2text(rsc->next_role), role2text(req_role)); rsc->next_role = req_role; } else if (req_role > rsc->next_role) { pe_rsc_info(rsc, "%s: Not overwriting calculated next role %s" " with requested next role %s", rsc->id, role2text(rsc->next_role), role2text(req_role)); } } if (saved_role > rsc->role) { rsc->role = saved_role; } return rsc; } static void handle_orphaned_container_fillers(xmlNode * lrm_rsc_list, pe_working_set_t * data_set) { xmlNode *rsc_entry = NULL; for (rsc_entry = __xml_first_child(lrm_rsc_list); rsc_entry != NULL; rsc_entry = __xml_next(rsc_entry)) { resource_t *rsc; resource_t *container; const char *rsc_id; const char *container_id; if (safe_str_neq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE)) { continue; } container_id = crm_element_value(rsc_entry, XML_RSC_ATTR_CONTAINER); rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID); if (container_id == NULL || rsc_id == NULL) { continue; } container = pe_find_resource(data_set->resources, container_id); if (container == NULL) { continue; } rsc = pe_find_resource(data_set->resources, rsc_id); if (rsc == NULL || is_set(rsc->flags, pe_rsc_orphan_container_filler) == FALSE || rsc->container != NULL) { continue; } pe_rsc_trace(rsc, "Mapped orphaned rsc %s's container to %s", rsc->id, container_id); rsc->container = container; container->fillers = g_list_append(container->fillers, rsc); } } gboolean unpack_lrm_resources(node_t * node, xmlNode * lrm_rsc_list, pe_working_set_t * data_set) { xmlNode *rsc_entry = NULL; gboolean found_orphaned_container_filler = FALSE; + GListPtr unexpected_containers = NULL; + GListPtr gIter = NULL; + resource_t *remote = NULL; CRM_CHECK(node != NULL, return FALSE); crm_trace("Unpacking resources on %s", node->details->uname); for (rsc_entry = __xml_first_child(lrm_rsc_list); rsc_entry != NULL; rsc_entry = __xml_next(rsc_entry)) { if (crm_str_eq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE, TRUE)) { resource_t *rsc; rsc = unpack_lrm_rsc_state(node, rsc_entry, data_set); - if (rsc && is_set(rsc->flags, pe_rsc_orphan_container_filler)) { + if (!rsc) { + continue; + } + if (is_set(rsc->flags, pe_rsc_orphan_container_filler)) { found_orphaned_container_filler = TRUE; } + if (is_set(rsc->flags, pe_rsc_unexpectedly_running)) { + remote = rsc_contains_remote_node(data_set, rsc); + if (remote) { + unexpected_containers = g_list_append(unexpected_containers, remote); + } + } + } + } + + /* If a container resource is unexpectedly up... and the remote-node + * connection resource for that container is not up, the entire container + * must be recovered. */ + for (gIter = unexpected_containers; gIter != NULL; gIter = gIter->next) { + remote = (resource_t *) gIter->data; + if (remote->role != RSC_ROLE_STARTED) { + crm_warn("Recovering container resource %s. Resource is unexpectedly running and involves a remote-node."); + set_bit(remote->container->flags, pe_rsc_failed); } } /* now that all the resource state has been unpacked for this node * we have to go back and map any orphaned container fillers to their * container resource */ if (found_orphaned_container_filler) { handle_orphaned_container_fillers(lrm_rsc_list, data_set); } - + g_list_free(unexpected_containers); return TRUE; } static void set_active(resource_t * rsc) { resource_t *top = uber_parent(rsc); if (top && top->variant == pe_master) { rsc->role = RSC_ROLE_SLAVE; } else { rsc->role = RSC_ROLE_STARTED; } } static void set_node_score(gpointer key, gpointer value, gpointer user_data) { node_t *node = value; int *score = user_data; node->weight = *score; } #define STATUS_PATH_MAX 1024 static xmlNode * find_lrm_op(const char *resource, const char *op, const char *node, const char *source, pe_working_set_t * data_set) { int offset = 0; char xpath[STATUS_PATH_MAX]; offset += snprintf(xpath + offset, STATUS_PATH_MAX - offset, "//node_state[@uname='%s']", node); offset += snprintf(xpath + offset, STATUS_PATH_MAX - offset, "//" XML_LRM_TAG_RESOURCE "[@id='%s']", resource); /* Need to check against transition_magic too? */ if (source && safe_str_eq(op, CRMD_ACTION_MIGRATE)) { offset += snprintf(xpath + offset, STATUS_PATH_MAX - offset, "/" XML_LRM_TAG_RSC_OP "[@operation='%s' and @migrate_target='%s']", op, source); } else if (source && safe_str_eq(op, CRMD_ACTION_MIGRATED)) { offset += snprintf(xpath + offset, STATUS_PATH_MAX - offset, "/" XML_LRM_TAG_RSC_OP "[@operation='%s' and @migrate_source='%s']", op, source); } else { offset += snprintf(xpath + offset, STATUS_PATH_MAX - offset, "/" XML_LRM_TAG_RSC_OP "[@operation='%s']", op); } return get_xpath_object(xpath, data_set->input, LOG_DEBUG); } static void unpack_rsc_migration(resource_t *rsc, node_t *node, xmlNode *xml_op, pe_working_set_t * data_set) { /* * The normal sequence is (now): migrate_to(Src) -> migrate_from(Tgt) -> stop(Src) * * So if a migrate_to is followed by a stop, then we dont need to care what * happended on the target node * * Without the stop, we need to look for a successful migrate_from. * This would also imply we're no longer running on the source * * Without the stop, and without a migrate_from op we make sure the resource * gets stopped on both source and target (assuming the target is up) * */ int stop_id = 0; int task_id = 0; xmlNode *stop_op = find_lrm_op(rsc->id, CRMD_ACTION_STOP, node->details->id, NULL, data_set); if (stop_op) { crm_element_value_int(stop_op, XML_LRM_ATTR_CALLID, &stop_id); } crm_element_value_int(xml_op, XML_LRM_ATTR_CALLID, &task_id); if (stop_op == NULL || stop_id < task_id) { int from_rc = 0, from_status = 0; const char *migrate_source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE); const char *migrate_target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET); node_t *target = pe_find_node(data_set->nodes, migrate_target); node_t *source = pe_find_node(data_set->nodes, migrate_source); xmlNode *migrate_from = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, migrate_target, migrate_source, data_set); rsc->role = RSC_ROLE_STARTED; /* can be master? */ if (migrate_from) { crm_element_value_int(migrate_from, XML_LRM_ATTR_RC, &from_rc); crm_element_value_int(migrate_from, XML_LRM_ATTR_OPSTATUS, &from_status); pe_rsc_trace(rsc, "%s op on %s exited with status=%d, rc=%d", ID(migrate_from), migrate_target, from_status, from_rc); } if (migrate_from && from_rc == PCMK_OCF_OK && from_status == PCMK_LRM_OP_DONE) { pe_rsc_trace(rsc, "Detected dangling migration op: %s on %s", ID(xml_op), migrate_source); /* all good * just need to arrange for the stop action to get sent * but _without_ affecting the target somehow */ rsc->role = RSC_ROLE_STOPPED; rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, node); } else if (migrate_from) { /* Failed */ if (target && target->details->online) { pe_rsc_trace(rsc, "Marking active on %s %p %d", migrate_target, target, target->details->online); native_add_running(rsc, target, data_set); } } else { /* Pending or complete but erased */ node_t *target = pe_find_node_id(data_set->nodes, migrate_target); if (target && target->details->online) { pe_rsc_trace(rsc, "Marking active on %s %p %d", migrate_target, target, target->details->online); native_add_running(rsc, target, data_set); if (source && source->details->online) { /* If we make it here we have a partial migration. The migrate_to * has completed but the migrate_from on the target has not. Hold on * to the target and source on the resource. Later on if we detect that * the resource is still going to run on that target, we may continue * the migration */ rsc->partial_migration_target = target; rsc->partial_migration_source = source; } } else { /* Consider it failed here - forces a restart, prevents migration */ set_bit(rsc->flags, pe_rsc_failed); } } } } static void unpack_rsc_migration_failure(resource_t *rsc, node_t *node, xmlNode *xml_op, pe_working_set_t * data_set) { const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); if (safe_str_eq(task, CRMD_ACTION_MIGRATED)) { int stop_id = 0; int migrate_id = 0; const char *migrate_source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE); const char *migrate_target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET); xmlNode *stop_op = find_lrm_op(rsc->id, CRMD_ACTION_STOP, migrate_source, NULL, data_set); xmlNode *migrate_op = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATE, migrate_source, migrate_target, data_set); if (stop_op) { crm_element_value_int(stop_op, XML_LRM_ATTR_CALLID, &stop_id); } if (migrate_op) { crm_element_value_int(migrate_op, XML_LRM_ATTR_CALLID, &migrate_id); } /* Get our state right */ rsc->role = RSC_ROLE_STARTED; /* can be master? */ if (stop_op == NULL || stop_id < migrate_id) { node_t *source = pe_find_node(data_set->nodes, migrate_source); if (source && source->details->online) { native_add_running(rsc, source, data_set); } } } else if (safe_str_eq(task, CRMD_ACTION_MIGRATE)) { int stop_id = 0; int migrate_id = 0; const char *migrate_source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE); const char *migrate_target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET); xmlNode *stop_op = find_lrm_op(rsc->id, CRMD_ACTION_STOP, migrate_target, NULL, data_set); xmlNode *migrate_op = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, migrate_target, migrate_source, data_set); if (stop_op) { crm_element_value_int(stop_op, XML_LRM_ATTR_CALLID, &stop_id); } if (migrate_op) { crm_element_value_int(migrate_op, XML_LRM_ATTR_CALLID, &migrate_id); } /* Get our state right */ rsc->role = RSC_ROLE_STARTED; /* can be master? */ if (stop_op == NULL || stop_id < migrate_id) { node_t *target = pe_find_node(data_set->nodes, migrate_target); pe_rsc_trace(rsc, "Stop: %p %d, Migrated: %p %d", stop_op, stop_id, migrate_op, migrate_id); if (target && target->details->online) { native_add_running(rsc, target, data_set); } } else if (migrate_op == NULL) { /* Make sure it gets cleaned up, the stop may pre-date the migrate_from */ rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, node); } } } static const char *get_op_key(xmlNode *xml_op) { const char *key = crm_element_value(xml_op, XML_LRM_ATTR_TASK_KEY); if(key == NULL) { key = ID(xml_op); } return key; } static void unpack_rsc_op_failure(resource_t *rsc, node_t *node, int rc, xmlNode *xml_op, enum action_fail_response *on_fail, pe_working_set_t * data_set) { int interval = 0; bool is_probe = FALSE; action_t *action = NULL; const char *key = get_op_key(xml_op); const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); const char *op_version = crm_element_value(xml_op, XML_ATTR_CRM_VERSION); crm_element_value_int(xml_op, XML_LRM_ATTR_INTERVAL, &interval); if(interval == 0 && safe_str_eq(task, CRMD_ACTION_STATUS)) { is_probe = TRUE; pe_rsc_trace(rsc, "is a probe: %s", key); } if (rc != PCMK_OCF_NOT_INSTALLED || is_set(data_set->flags, pe_flag_symmetric_cluster)) { crm_warn("Processing failed op %s for %s on %s: %s (%d)", task, rsc->id, node->details->uname, services_ocf_exitcode_str(rc), rc); crm_xml_add(xml_op, XML_ATTR_UNAME, node->details->uname); if ((node->details->shutdown == FALSE) || (node->details->online == TRUE)) { add_node_copy(data_set->failed, xml_op); } } else { crm_trace("Processing failed op %s for %s on %s: %s (%d)", task, rsc->id, node->details->uname, services_ocf_exitcode_str(rc), rc); } action = custom_action(rsc, strdup(key), task, NULL, TRUE, FALSE, data_set); if ((action->on_fail <= action_fail_fence && *on_fail < action->on_fail) || (action->on_fail == action_fail_restart_container && *on_fail <= action_fail_recover) || (*on_fail == action_fail_restart_container && action->on_fail >= action_fail_migrate)) { pe_rsc_trace(rsc, "on-fail %s -> %s for %s (%s)", fail2text(*on_fail), fail2text(action->on_fail), action->uuid, key); *on_fail = action->on_fail; } if (safe_str_eq(task, CRMD_ACTION_STOP)) { resource_location(rsc, node, -INFINITY, "__stop_fail__", data_set); } else if (safe_str_eq(task, CRMD_ACTION_MIGRATE) || safe_str_eq(task, CRMD_ACTION_MIGRATED)) { unpack_rsc_migration_failure(rsc, node, xml_op, data_set); } else if (safe_str_eq(task, CRMD_ACTION_PROMOTE)) { rsc->role = RSC_ROLE_MASTER; } else if (safe_str_eq(task, CRMD_ACTION_DEMOTE)) { /* * staying in role=master ends up putting the PE/TE into a loop * setting role=slave is not dangerous because no master will be * promoted until the failed resource has been fully stopped */ rsc->next_role = RSC_ROLE_STOPPED; if (action->on_fail == action_fail_block) { rsc->role = RSC_ROLE_MASTER; } else { crm_warn("Forcing %s to stop after a failed demote action", rsc->id); rsc->role = RSC_ROLE_SLAVE; } } else if (compare_version("2.0", op_version) > 0 && safe_str_eq(task, CRMD_ACTION_START)) { crm_warn("Compatibility handling for failed op %s on %s", key, node->details->uname); resource_location(rsc, node, -INFINITY, "__legacy_start__", data_set); } if(is_probe && rc == PCMK_OCF_NOT_INSTALLED) { /* leave stopped */ pe_rsc_trace(rsc, "Leaving %s stopped", rsc->id); rsc->role = RSC_ROLE_STOPPED; } else if (rsc->role < RSC_ROLE_STARTED) { pe_rsc_trace(rsc, "Setting %s active", rsc->id); set_active(rsc); } pe_rsc_trace(rsc, "Resource %s: role=%s, unclean=%s, on_fail=%s, fail_role=%s", rsc->id, role2text(rsc->role), node->details->unclean ? "true" : "false", fail2text(action->on_fail), role2text(action->fail_role)); if (action->fail_role != RSC_ROLE_STARTED && rsc->next_role < action->fail_role) { rsc->next_role = action->fail_role; } if (action->fail_role == RSC_ROLE_STOPPED) { int score = -INFINITY; resource_t *fail_rsc = rsc; if (fail_rsc->parent) { resource_t *parent = uber_parent(fail_rsc); if ((parent->variant == pe_clone || parent->variant == pe_master) && is_not_set(parent->flags, pe_rsc_unique)) { /* for clone and master resources, if a child fails on an operation * with on-fail = stop, all the resources fail. Do this by preventing * the parent from coming up again. */ fail_rsc = parent; } } crm_warn("Making sure %s doesn't come up again", fail_rsc->id); /* make sure it doesnt come up again */ g_hash_table_destroy(fail_rsc->allowed_nodes); fail_rsc->allowed_nodes = node_hash_from_list(data_set->nodes); g_hash_table_foreach(fail_rsc->allowed_nodes, set_node_score, &score); } pe_free_action(action); } static int determine_op_status( resource_t *rsc, int rc, int target_rc, node_t * node, xmlNode * xml_op, enum action_fail_response * on_fail, pe_working_set_t * data_set) { int interval = 0; int result = PCMK_LRM_OP_DONE; const char *key = get_op_key(xml_op); const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); bool is_probe = FALSE; crm_element_value_int(xml_op, XML_LRM_ATTR_INTERVAL, &interval); if (interval == 0 && safe_str_eq(task, CRMD_ACTION_STATUS)) { is_probe = TRUE; } if (target_rc >= 0 && target_rc != rc) { result = PCMK_LRM_OP_ERROR; pe_rsc_debug(rsc, "%s on %s returned '%s' (%d) instead of the expected value: '%s' (%d)", key, node->details->uname, services_ocf_exitcode_str(rc), rc, services_ocf_exitcode_str(target_rc), target_rc); } /* we could clean this up significantly except for old LRMs and CRMs that * didnt include target_rc and liked to remap status */ switch (rc) { case PCMK_OCF_OK: if (is_probe && target_rc == 7) { result = PCMK_LRM_OP_DONE; pe_rsc_info(rsc, "Operation %s found resource %s active on %s", task, rsc->id, node->details->uname); + set_bit(rsc->flags, pe_rsc_unexpectedly_running); /* legacy code for pre-0.6.5 operations */ } else if (target_rc < 0 && interval > 0 && rsc->role == RSC_ROLE_MASTER) { /* catch status ops that return 0 instead of 8 while they * are supposed to be in master mode */ result = PCMK_LRM_OP_ERROR; } break; case PCMK_OCF_NOT_RUNNING: if (is_probe || target_rc == rc) { result = PCMK_LRM_OP_DONE; rsc->role = RSC_ROLE_STOPPED; /* clear any previous failure actions */ *on_fail = action_fail_ignore; rsc->next_role = RSC_ROLE_UNKNOWN; } else if (safe_str_neq(task, CRMD_ACTION_STOP)) { result = PCMK_LRM_OP_ERROR; } break; case PCMK_OCF_RUNNING_MASTER: if (is_probe) { result = PCMK_LRM_OP_DONE; crm_notice("Operation %s found resource %s active in master mode on %s", task, rsc->id, node->details->uname); } else if (target_rc == rc) { /* nothing to do */ } else if (target_rc >= 0) { result = PCMK_LRM_OP_ERROR; /* legacy code for pre-0.6.5 operations */ } else if (safe_str_neq(task, CRMD_ACTION_STATUS) || rsc->role != RSC_ROLE_MASTER) { result = PCMK_LRM_OP_ERROR; if (rsc->role != RSC_ROLE_MASTER) { crm_err("%s reported %s in master mode on %s", key, rsc->id, node->details->uname); } } rsc->role = RSC_ROLE_MASTER; break; case PCMK_OCF_FAILED_MASTER: rsc->role = RSC_ROLE_MASTER; result = PCMK_LRM_OP_ERROR; break; case PCMK_OCF_NOT_CONFIGURED: result = PCMK_LRM_OP_ERROR_FATAL; break; case PCMK_OCF_NOT_INSTALLED: case PCMK_OCF_INVALID_PARAM: case PCMK_OCF_INSUFFICIENT_PRIV: case PCMK_OCF_UNIMPLEMENT_FEATURE: if (rc == PCMK_OCF_UNIMPLEMENT_FEATURE && interval > 0) { result = PCMK_LRM_OP_NOTSUPPORTED; break; } else if(pe_can_fence(data_set, node) == FALSE && safe_str_eq(task, CRMD_ACTION_STOP)) { /* If a stop fails and we can't fence, there's nothing else we can do */ pe_proc_err("No further recovery can be attempted for %s: %s action failed with '%s' (%d)", rsc->id, task, services_ocf_exitcode_str(rc), rc); clear_bit(rsc->flags, pe_rsc_managed); set_bit(rsc->flags, pe_rsc_block); } result = PCMK_LRM_OP_ERROR_HARD; break; default: if (result == PCMK_LRM_OP_DONE) { crm_info("Treating %s (rc=%d) on %s as an ERROR", key, rc, node->details->uname); result = PCMK_LRM_OP_ERROR; } } return result; } static bool check_operation_expiry(resource_t *rsc, node_t *node, int rc, xmlNode *xml_op, pe_working_set_t * data_set) { bool expired = FALSE; time_t last_failure = 0; int clear_failcount = 0; int interval = 0; const char *key = get_op_key(xml_op); const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); if (rsc->failure_timeout > 0) { int last_run = 0; if (crm_element_value_int(xml_op, XML_RSC_OP_LAST_CHANGE, &last_run) == 0) { time_t now = get_effective_time(data_set); if (now > (last_run + rsc->failure_timeout)) { expired = TRUE; } } } if (expired) { if (rsc->failure_timeout > 0) { int fc = get_failcount_full(node, rsc, &last_failure, FALSE, data_set); if(fc && get_failcount_full(node, rsc, &last_failure, TRUE, data_set) == 0) { clear_failcount = 1; crm_notice("Clearing expired failcount for %s on %s", rsc->id, node->details->uname); } } } else if (strstr(ID(xml_op), "last_failure") && ((strcmp(task, "start") == 0) || (strcmp(task, "monitor") == 0))) { op_digest_cache_t *digest_data = NULL; digest_data = rsc_action_digest_cmp(rsc, xml_op, node, data_set); if (digest_data->rc == RSC_DIGEST_UNKNOWN) { crm_trace("rsc op %s on node %s does not have a op digest to compare against", rsc->id, key, node->details->id); } else if (digest_data->rc != RSC_DIGEST_MATCH) { clear_failcount = 1; crm_info ("Clearing failcount for %s on %s, %s failed and now resource parameters have changed.", task, rsc->id, node->details->uname); } } if (clear_failcount) { action_t *clear_op = NULL; clear_op = custom_action(rsc, crm_concat(rsc->id, CRM_OP_CLEAR_FAILCOUNT, '_'), CRM_OP_CLEAR_FAILCOUNT, node, FALSE, TRUE, data_set); add_hash_param(clear_op->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE); } crm_element_value_int(xml_op, XML_LRM_ATTR_INTERVAL, &interval); if(expired && interval == 0 && safe_str_eq(task, CRMD_ACTION_STATUS)) { switch(rc) { case PCMK_OCF_OK: case PCMK_OCF_NOT_RUNNING: case PCMK_OCF_RUNNING_MASTER: /* Don't expire probes that return these values */ expired = FALSE; break; } } return expired; } static int get_target_rc(xmlNode *xml_op) { int dummy = 0; int target_rc = 0; char *dummy_string = NULL; const char *key = crm_element_value(xml_op, XML_ATTR_TRANSITION_KEY); if (key == NULL) { return -1; } decode_transition_key(key, &dummy_string, &dummy, &dummy, &target_rc); free(dummy_string); return target_rc; } static enum action_fail_response get_action_on_fail(resource_t *rsc, const char *key, const char *task, pe_working_set_t * data_set) { int result = action_fail_recover; action_t *action = custom_action(rsc, strdup(key), task, NULL, TRUE, FALSE, data_set); result = action->on_fail; pe_free_action(action); return result; } static void update_resource_state(resource_t *rsc, node_t * node, xmlNode * xml_op, const char *task, int rc, enum action_fail_response *on_fail, pe_working_set_t * data_set) { gboolean clear_past_failure = FALSE; if (rc == PCMK_OCF_NOT_RUNNING) { clear_past_failure = TRUE; } else if (rc == PCMK_OCF_NOT_INSTALLED) { rsc->role = RSC_ROLE_STOPPED; } else if (safe_str_eq(task, CRMD_ACTION_STATUS)) { clear_past_failure = TRUE; if (rsc->role < RSC_ROLE_STARTED) { set_active(rsc); } } else if (safe_str_eq(task, CRMD_ACTION_START)) { rsc->role = RSC_ROLE_STARTED; clear_past_failure = TRUE; } else if (safe_str_eq(task, CRMD_ACTION_STOP)) { rsc->role = RSC_ROLE_STOPPED; clear_past_failure = TRUE; } else if (safe_str_eq(task, CRMD_ACTION_PROMOTE)) { rsc->role = RSC_ROLE_MASTER; clear_past_failure = TRUE; } else if (safe_str_eq(task, CRMD_ACTION_DEMOTE)) { /* Demote from Master does not clear an error */ rsc->role = RSC_ROLE_SLAVE; } else if (safe_str_eq(task, CRMD_ACTION_MIGRATED)) { rsc->role = RSC_ROLE_STARTED; clear_past_failure = TRUE; } else if (safe_str_eq(task, CRMD_ACTION_MIGRATE)) { unpack_rsc_migration(rsc, node, xml_op, data_set); } else if (rsc->role < RSC_ROLE_STARTED) { /* migrate_to and migrate_from will land here */ pe_rsc_trace(rsc, "%s active on %s", rsc->id, node->details->uname); set_active(rsc); } /* clear any previous failure actions */ if (clear_past_failure) { switch (*on_fail) { case action_fail_stop: case action_fail_fence: case action_fail_migrate: case action_fail_standby: pe_rsc_trace(rsc, "%s.%s is not cleared by a completed stop", rsc->id, fail2text(*on_fail)); break; case action_fail_block: case action_fail_ignore: case action_fail_recover: *on_fail = action_fail_ignore; rsc->next_role = RSC_ROLE_UNKNOWN; break; case action_fail_restart_container: *on_fail = action_fail_ignore; rsc->next_role = RSC_ROLE_UNKNOWN; } } } gboolean unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op, enum action_fail_response * on_fail, pe_working_set_t * data_set) { int task_id = 0; const char *key = NULL; const char *task = NULL; const char *task_key = NULL; int rc = 0; int status = PCMK_LRM_OP_PENDING-1; int target_rc = get_target_rc(xml_op); gboolean expired = FALSE; resource_t *parent = rsc; enum action_fail_response failure_strategy = action_fail_recover; CRM_CHECK(rsc != NULL, return FALSE); CRM_CHECK(node != NULL, return FALSE); CRM_CHECK(xml_op != NULL, return FALSE); task_key = get_op_key(xml_op); task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); key = crm_element_value(xml_op, XML_ATTR_TRANSITION_KEY); crm_element_value_int(xml_op, XML_LRM_ATTR_RC, &rc); crm_element_value_int(xml_op, XML_LRM_ATTR_CALLID, &task_id); crm_element_value_int(xml_op, XML_LRM_ATTR_OPSTATUS, &status); CRM_CHECK(task != NULL, return FALSE); CRM_CHECK(status <= PCMK_LRM_OP_NOT_INSTALLED, return FALSE); CRM_CHECK(status >= PCMK_LRM_OP_PENDING, return FALSE); if (safe_str_eq(task, CRMD_ACTION_NOTIFY)) { /* safe to ignore these */ return TRUE; } if (is_not_set(rsc->flags, pe_rsc_unique)) { parent = uber_parent(rsc); } pe_rsc_trace(rsc, "Unpacking task %s/%s (call_id=%d, status=%d, rc=%d) on %s (role=%s)", task_key, task, task_id, status, rc, node->details->uname, role2text(rsc->role)); if (node->details->unclean) { pe_rsc_trace(rsc, "Node %s (where %s is running) is unclean." " Further action depends on the value of the stop's on-fail attribue", node->details->uname, rsc->id); } if (status == PCMK_LRM_OP_ERROR) { /* Older versions set this if rc != 0 but its up to us to decide */ status = PCMK_LRM_OP_DONE; } if(status != PCMK_LRM_OP_NOT_INSTALLED) { expired = check_operation_expiry(rsc, node, rc, xml_op, data_set); } if (expired && target_rc != rc) { int interval = 0; const char *magic = crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC); pe_rsc_debug(rsc, "Expired operation '%s' on %s returned '%s' (%d) instead of the expected value: '%s' (%d)", key, node->details->uname, services_ocf_exitcode_str(rc), rc, services_ocf_exitcode_str(target_rc), target_rc); crm_element_value_int(xml_op, XML_LRM_ATTR_INTERVAL, &interval); if(interval == 0) { crm_notice("Ignoring expired calculated failure %s (rc=%d, magic=%s) on %s", task_key, rc, magic, node->details->uname); goto done; } else if(node->details->online && node->details->unclean == FALSE) { crm_notice("Re-initiated expired calculated failure %s (rc=%d, magic=%s) on %s", task_key, rc, magic, node->details->uname); /* This is SO horrible, but we don't have access to CancelXmlOp() yet */ crm_xml_add(xml_op, XML_LRM_ATTR_RESTART_DIGEST, "calculated-failure-timeout"); goto done; } } if(status == PCMK_LRM_OP_DONE || status == PCMK_LRM_OP_ERROR) { status = determine_op_status(rsc, rc, target_rc, node, xml_op, on_fail, data_set); } pe_rsc_trace(rsc, "Handling status: %d", status); switch (status) { case PCMK_LRM_OP_CANCELLED: /* do nothing?? */ pe_err("Dont know what to do for cancelled ops yet"); break; case PCMK_LRM_OP_PENDING: if (safe_str_eq(task, CRMD_ACTION_START)) { set_bit(rsc->flags, pe_rsc_start_pending); set_active(rsc); } else if (safe_str_eq(task, CRMD_ACTION_PROMOTE)) { rsc->role = RSC_ROLE_MASTER; } /* * Intentionally ignoring pending migrate ops here; * haven't decided if we need to do anything special * with them yet... */ break; case PCMK_LRM_OP_DONE: pe_rsc_trace(rsc, "%s/%s completed on %s", rsc->id, task, node->details->uname); update_resource_state(rsc, node, xml_op, task, rc, on_fail, data_set); break; case PCMK_LRM_OP_NOT_INSTALLED: failure_strategy = get_action_on_fail(rsc, task_key, task, data_set); if (failure_strategy == action_fail_ignore) { crm_warn("Cannot ignore failed %s (status=%d, rc=%d) on %s: " "Resource agent doesn't exist", task_key, status, rc, node->details->uname); /* Also for printing it as "FAILED" by marking it as pe_rsc_failed later */ *on_fail = action_fail_migrate; } resource_location(parent, node, -INFINITY, "hard-error", data_set); unpack_rsc_op_failure(rsc, node, rc, xml_op, on_fail, data_set); break; case PCMK_LRM_OP_ERROR: case PCMK_LRM_OP_ERROR_HARD: case PCMK_LRM_OP_ERROR_FATAL: case PCMK_LRM_OP_TIMEOUT: case PCMK_LRM_OP_NOTSUPPORTED: failure_strategy = get_action_on_fail(rsc, task_key, task, data_set); if ((failure_strategy == action_fail_ignore) || (failure_strategy == action_fail_restart_container && safe_str_eq(task, CRMD_ACTION_STOP))) { crm_warn("Pretending the failure of %s (rc=%d) on %s succeeded", task_key, rc, node->details->uname); update_resource_state(rsc, node, xml_op, task, target_rc, on_fail, data_set); crm_xml_add(xml_op, XML_ATTR_UNAME, node->details->uname); set_bit(rsc->flags, pe_rsc_failure_ignored); if ((node->details->shutdown == FALSE) || (node->details->online == TRUE)) { crm_xml_add(xml_op, XML_ATTR_UNAME, node->details->uname); add_node_copy(data_set->failed, xml_op); } if (failure_strategy == action_fail_restart_container && *on_fail <= action_fail_recover) { *on_fail = failure_strategy; } } else { unpack_rsc_op_failure(rsc, node, rc, xml_op, on_fail, data_set); if(status == PCMK_LRM_OP_ERROR_HARD) { do_crm_log(rc != PCMK_OCF_NOT_INSTALLED?LOG_ERR:LOG_NOTICE, "Preventing %s from re-starting on %s: operation %s failed '%s' (%d)", parent->id, node->details->uname, task, services_ocf_exitcode_str(rc), rc); resource_location(parent, node, -INFINITY, "hard-error", data_set); } else if(status == PCMK_LRM_OP_ERROR_FATAL) { crm_err("Preventing %s from re-starting anywhere: operation %s failed '%s' (%d)", parent->id, task, services_ocf_exitcode_str(rc), rc); resource_location(parent, NULL, -INFINITY, "fatal-error", data_set); } } break; } done: pe_rsc_trace(rsc, "Resource %s after %s: role=%s", rsc->id, task, role2text(rsc->role)); return TRUE; } gboolean add_node_attrs(xmlNode * xml_obj, node_t * node, gboolean overwrite, pe_working_set_t * data_set) { g_hash_table_insert(node->details->attrs, strdup("#uname"), strdup(node->details->uname)); g_hash_table_insert(node->details->attrs, strdup("#kind"), strdup(node->details->remote_rsc?"container":"cluster")); g_hash_table_insert(node->details->attrs, strdup("#" XML_ATTR_ID), strdup(node->details->id)); if (safe_str_eq(node->details->id, data_set->dc_uuid)) { data_set->dc_node = node; node->details->is_dc = TRUE; g_hash_table_insert(node->details->attrs, strdup("#" XML_ATTR_DC), strdup(XML_BOOLEAN_TRUE)); } else { g_hash_table_insert(node->details->attrs, strdup("#" XML_ATTR_DC), strdup(XML_BOOLEAN_FALSE)); } unpack_instance_attributes(data_set->input, xml_obj, XML_TAG_ATTR_SETS, NULL, node->details->attrs, NULL, overwrite, data_set->now); return TRUE; } static GListPtr extract_operations(const char *node, const char *rsc, xmlNode * rsc_entry, gboolean active_filter) { int counter = -1; int stop_index = -1; int start_index = -1; xmlNode *rsc_op = NULL; GListPtr gIter = NULL; GListPtr op_list = NULL; GListPtr sorted_op_list = NULL; /* extract operations */ op_list = NULL; sorted_op_list = NULL; for (rsc_op = __xml_first_child(rsc_entry); rsc_op != NULL; rsc_op = __xml_next(rsc_op)) { if (crm_str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, TRUE)) { crm_xml_add(rsc_op, "resource", rsc); crm_xml_add(rsc_op, XML_ATTR_UNAME, node); op_list = g_list_prepend(op_list, rsc_op); } } if (op_list == NULL) { /* if there are no operations, there is nothing to do */ return NULL; } sorted_op_list = g_list_sort(op_list, sort_op_by_callid); /* create active recurring operations as optional */ if (active_filter == FALSE) { return sorted_op_list; } op_list = NULL; calculate_active_ops(sorted_op_list, &start_index, &stop_index); for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) { xmlNode *rsc_op = (xmlNode *) gIter->data; counter++; if (start_index < stop_index) { crm_trace("Skipping %s: not active", ID(rsc_entry)); break; } else if (counter < start_index) { crm_trace("Skipping %s: old", ID(rsc_op)); continue; } op_list = g_list_append(op_list, rsc_op); } g_list_free(sorted_op_list); return op_list; } GListPtr find_operations(const char *rsc, const char *node, gboolean active_filter, pe_working_set_t * data_set) { GListPtr output = NULL; GListPtr intermediate = NULL; xmlNode *tmp = NULL; xmlNode *status = find_xml_node(data_set->input, XML_CIB_TAG_STATUS, TRUE); node_t *this_node = NULL; xmlNode *node_state = NULL; for (node_state = __xml_first_child(status); node_state != NULL; node_state = __xml_next(node_state)) { if (crm_str_eq((const char *)node_state->name, XML_CIB_TAG_STATE, TRUE)) { const char *uname = crm_element_value(node_state, XML_ATTR_UNAME); if (node != NULL && safe_str_neq(uname, node)) { continue; } this_node = pe_find_node(data_set->nodes, uname); CRM_CHECK(this_node != NULL, continue); if (is_remote_node(this_node)) { determine_remote_online_status(this_node); } else { determine_online_status(node_state, this_node, data_set); } if (this_node->details->online || is_set(data_set->flags, pe_flag_stonith_enabled)) { /* offline nodes run no resources... * unless stonith is enabled in which case we need to * make sure rsc start events happen after the stonith */ xmlNode *lrm_rsc = NULL; tmp = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE); tmp = find_xml_node(tmp, XML_LRM_TAG_RESOURCES, FALSE); for (lrm_rsc = __xml_first_child(tmp); lrm_rsc != NULL; lrm_rsc = __xml_next(lrm_rsc)) { if (crm_str_eq((const char *)lrm_rsc->name, XML_LRM_TAG_RESOURCE, TRUE)) { const char *rsc_id = crm_element_value(lrm_rsc, XML_ATTR_ID); if (rsc != NULL && safe_str_neq(rsc_id, rsc)) { continue; } intermediate = extract_operations(uname, rsc_id, lrm_rsc, active_filter); output = g_list_concat(output, intermediate); } } } } } return output; } diff --git a/lib/pengine/utils.c b/lib/pengine/utils.c index ac71de6c0f..5d7ac23bfb 100644 --- a/lib/pengine/utils.c +++ b/lib/pengine/utils.c @@ -1,1840 +1,1840 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include pe_working_set_t *pe_dataset = NULL; extern xmlNode *get_object_root(const char *object_type, xmlNode * the_root); void print_str_str(gpointer key, gpointer value, gpointer user_data); gboolean ghash_free_str_str(gpointer key, gpointer value, gpointer user_data); void unpack_operation(action_t * action, xmlNode * xml_obj, resource_t * container, pe_working_set_t * data_set); static xmlNode *find_rsc_op_entry_helper(resource_t * rsc, const char *key, gboolean include_disabled); bool pe_can_fence(pe_working_set_t * data_set, node_t *node) { if(is_not_set(data_set->flags, pe_flag_stonith_enabled)) { return FALSE; /* Turned off */ } else if (is_not_set(data_set->flags, pe_flag_have_stonith_resource)) { return FALSE; /* No devices */ } else if (is_set(data_set->flags, pe_flag_have_quorum)) { return TRUE; } else if (data_set->no_quorum_policy == no_quorum_ignore) { return TRUE; } else if(node == NULL) { return FALSE; } else if(node->details->online) { crm_notice("We can fence %s without quorum because they're in our membership", node->details->uname); return TRUE; } crm_trace("Cannot fence %s", node->details->uname); return FALSE; } node_t * node_copy(node_t * this_node) { node_t *new_node = NULL; CRM_CHECK(this_node != NULL, return NULL); new_node = calloc(1, sizeof(node_t)); CRM_ASSERT(new_node != NULL); crm_trace("Copying %p (%s) to %p", this_node, this_node->details->uname, new_node); new_node->weight = this_node->weight; new_node->fixed = this_node->fixed; new_node->details = this_node->details; return new_node; } /* any node in list1 or list2 and not in the other gets a score of -INFINITY */ void node_list_exclude(GHashTable * hash, GListPtr list, gboolean merge_scores) { GHashTable *result = hash; node_t *other_node = NULL; GListPtr gIter = list; GHashTableIter iter; node_t *node = NULL; g_hash_table_iter_init(&iter, hash); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { other_node = pe_find_node_id(list, node->details->id); if (other_node == NULL) { node->weight = -INFINITY; } else if (merge_scores) { node->weight = merge_weights(node->weight, other_node->weight); } } for (; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; other_node = pe_hash_table_lookup(result, node->details->id); if (other_node == NULL) { node_t *new_node = node_copy(node); new_node->weight = -INFINITY; g_hash_table_insert(result, (gpointer) new_node->details->id, new_node); } } } GHashTable * node_hash_from_list(GListPtr list) { GListPtr gIter = list; GHashTable *result = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, g_hash_destroy_str); for (; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; node_t *n = node_copy(node); g_hash_table_insert(result, (gpointer) n->details->id, n); } return result; } GListPtr node_list_dup(GListPtr list1, gboolean reset, gboolean filter) { GListPtr result = NULL; GListPtr gIter = list1; for (; gIter != NULL; gIter = gIter->next) { node_t *new_node = NULL; node_t *this_node = (node_t *) gIter->data; if (filter && this_node->weight < 0) { continue; } new_node = node_copy(this_node); if (reset) { new_node->weight = 0; } if (new_node != NULL) { result = g_list_prepend(result, new_node); } } return result; } gint sort_node_uname(gconstpointer a, gconstpointer b) { const node_t *node_a = a; const node_t *node_b = b; return strcmp(node_a->details->uname, node_b->details->uname); } void dump_node_scores_worker(int level, const char *file, const char *function, int line, resource_t * rsc, const char *comment, GHashTable * nodes) { GHashTable *hash = nodes; GHashTableIter iter; node_t *node = NULL; if (rsc) { hash = rsc->allowed_nodes; } if (rsc && is_set(rsc->flags, pe_rsc_orphan)) { /* Don't show the allocation scores for orphans */ return; } if (level == 0) { /* For now we want this in sorted order to keep the regression tests happy */ GListPtr gIter = NULL; GListPtr list = g_hash_table_get_values(hash); list = g_list_sort(list, sort_node_uname); gIter = list; for (; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; char *score = score2char(node->weight); if (rsc) { printf("%s: %s allocation score on %s: %s\n", comment, rsc->id, node->details->uname, score); } else { printf("%s: %s = %s\n", comment, node->details->uname, score); } free(score); } g_list_free(list); } else if (hash) { g_hash_table_iter_init(&iter, hash); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { char *score = score2char(node->weight); if (rsc) { do_crm_log_alias(LOG_TRACE, file, function, line, "%s: %s allocation score on %s: %s", comment, rsc->id, node->details->uname, score); } else { do_crm_log_alias(LOG_TRACE, file, function, line + 1, "%s: %s = %s", comment, node->details->uname, score); } free(score); } } if (rsc && rsc->children) { GListPtr gIter = NULL; gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { resource_t *child = (resource_t *) gIter->data; dump_node_scores_worker(level, file, function, line, child, comment, nodes); } } } static void append_dump_text(gpointer key, gpointer value, gpointer user_data) { char **dump_text = user_data; int len = 0; char *new_text = NULL; len = strlen(*dump_text) + strlen(" ") + strlen(key) + strlen("=") + strlen(value) + 1; new_text = calloc(1, len); sprintf(new_text, "%s %s=%s", *dump_text, (char *)key, (char *)value); free(*dump_text); *dump_text = new_text; } void dump_node_capacity(int level, const char *comment, node_t * node) { int len = 0; char *dump_text = NULL; len = strlen(comment) + strlen(": ") + strlen(node->details->uname) + strlen(" capacity:") + 1; dump_text = calloc(1, len); sprintf(dump_text, "%s: %s capacity:", comment, node->details->uname); g_hash_table_foreach(node->details->utilization, append_dump_text, &dump_text); if (level == 0) { fprintf(stdout, "%s\n", dump_text); } else { crm_trace("%s", dump_text); } free(dump_text); } void dump_rsc_utilization(int level, const char *comment, resource_t * rsc, node_t * node) { int len = 0; char *dump_text = NULL; len = strlen(comment) + strlen(": ") + strlen(rsc->id) + strlen(" utilization on ") + strlen(node->details->uname) + strlen(":") + 1; dump_text = calloc(1, len); sprintf(dump_text, "%s: %s utilization on %s:", comment, rsc->id, node->details->uname); g_hash_table_foreach(rsc->utilization, append_dump_text, &dump_text); if (level == 0) { fprintf(stdout, "%s\n", dump_text); } else { crm_trace("%s", dump_text); } free(dump_text); } gint sort_rsc_index(gconstpointer a, gconstpointer b) { const resource_t *resource1 = (const resource_t *)a; const resource_t *resource2 = (const resource_t *)b; if (a == NULL && b == NULL) { return 0; } if (a == NULL) { return 1; } if (b == NULL) { return -1; } if (resource1->sort_index > resource2->sort_index) { return -1; } if (resource1->sort_index < resource2->sort_index) { return 1; } return 0; } gint sort_rsc_priority(gconstpointer a, gconstpointer b) { const resource_t *resource1 = (const resource_t *)a; const resource_t *resource2 = (const resource_t *)b; if (a == NULL && b == NULL) { return 0; } if (a == NULL) { return 1; } if (b == NULL) { return -1; } if (resource1->priority > resource2->priority) { return -1; } if (resource1->priority < resource2->priority) { return 1; } return 0; } action_t * custom_action(resource_t * rsc, char *key, const char *task, node_t * on_node, gboolean optional, gboolean save_action, pe_working_set_t * data_set) { action_t *action = NULL; GListPtr possible_matches = NULL; CRM_CHECK(key != NULL, return NULL); CRM_CHECK(task != NULL, return NULL); if (save_action && rsc != NULL) { possible_matches = find_actions(rsc->actions, key, on_node); } if (possible_matches != NULL) { if (g_list_length(possible_matches) > 1) { pe_warn("Action %s for %s on %s exists %d times", task, rsc ? rsc->id : "", on_node ? on_node->details->uname : "", g_list_length(possible_matches)); } action = g_list_nth_data(possible_matches, 0); pe_rsc_trace(rsc, "Found existing action (%d) %s for %s on %s", action->id, task, rsc ? rsc->id : "", on_node ? on_node->details->uname : ""); g_list_free(possible_matches); } if (action == NULL) { if (save_action) { pe_rsc_trace(rsc, "Creating%s action %d: %s for %s on %s", optional ? "" : " manditory", data_set->action_id, key, rsc ? rsc->id : "", on_node ? on_node->details->uname : ""); } action = calloc(1, sizeof(action_t)); if (save_action) { action->id = data_set->action_id++; } else { action->id = 0; } action->rsc = rsc; CRM_ASSERT(task != NULL); action->task = strdup(task); if (on_node) { action->node = node_copy(on_node); } action->uuid = strdup(key); pe_set_action_bit(action, pe_action_failure_is_fatal); pe_set_action_bit(action, pe_action_runnable); if (optional) { pe_set_action_bit(action, pe_action_optional); } else { pe_clear_action_bit(action, pe_action_optional); } /* Implied by calloc()... action->actions_before = NULL; action->actions_after = NULL; action->pseudo = FALSE; action->dumped = FALSE; action->processed = FALSE; action->seen_count = 0; */ action->extra = g_hash_table_new_full(crm_str_hash, g_str_equal, free, free); action->meta = g_hash_table_new_full(crm_str_hash, g_str_equal, free, free); if (save_action) { data_set->actions = g_list_prepend(data_set->actions, action); } if (rsc != NULL) { action->op_entry = find_rsc_op_entry_helper(rsc, key, TRUE); unpack_operation(action, action->op_entry, rsc->container, data_set); if (save_action) { rsc->actions = g_list_prepend(rsc->actions, action); } } if (save_action) { pe_rsc_trace(rsc, "Action %d created", action->id); } } if (optional == FALSE) { pe_rsc_trace(rsc, "Action %d (%s) marked manditory", action->id, action->uuid); pe_clear_action_bit(action, pe_action_optional); } if (rsc != NULL) { enum action_tasks a_task = text2task(action->task); int warn_level = LOG_TRACE; if (save_action) { warn_level = LOG_WARNING; } if (is_set(action->flags, pe_action_have_node_attrs) == FALSE && action->node != NULL && action->op_entry != NULL) { pe_set_action_bit(action, pe_action_have_node_attrs); unpack_instance_attributes(data_set->input, action->op_entry, XML_TAG_ATTR_SETS, action->node->details->attrs, action->extra, NULL, FALSE, data_set->now); } if (is_set(action->flags, pe_action_pseudo)) { /* leave untouched */ } else if (action->node == NULL) { pe_clear_action_bit(action, pe_action_runnable); } else if (is_not_set(rsc->flags, pe_rsc_managed) && g_hash_table_lookup(action->meta, XML_LRM_ATTR_INTERVAL) == NULL) { crm_debug("Action %s (unmanaged)", action->uuid); pe_set_action_bit(action, pe_action_optional); /* action->runnable = FALSE; */ } else if (action->node->details->online == FALSE) { pe_clear_action_bit(action, pe_action_runnable); do_crm_log(warn_level, "Action %s on %s is unrunnable (offline)", action->uuid, action->node->details->uname); if (is_set(action->rsc->flags, pe_rsc_managed) && action->node->details->unclean == FALSE && save_action && a_task == stop_rsc) { do_crm_log(warn_level, "Marking node %s unclean", action->node->details->uname); action->node->details->unclean = TRUE; } } else if (action->node->details->pending) { pe_clear_action_bit(action, pe_action_runnable); do_crm_log(warn_level, "Action %s on %s is unrunnable (pending)", action->uuid, action->node->details->uname); } else if (action->needs == rsc_req_nothing) { pe_rsc_trace(rsc, "Action %s doesnt require anything", action->uuid); pe_set_action_bit(action, pe_action_runnable); #if 0 /* * No point checking this * - if we dont have quorum we cant stonith anyway */ } else if (action->needs == rsc_req_stonith) { crm_trace("Action %s requires only stonith", action->uuid); action->runnable = TRUE; #endif } else if (is_set(data_set->flags, pe_flag_have_quorum) == FALSE && data_set->no_quorum_policy == no_quorum_stop) { pe_clear_action_bit(action, pe_action_runnable); crm_debug("%s\t%s (cancelled : quorum)", action->node->details->uname, action->uuid); } else if (is_set(data_set->flags, pe_flag_have_quorum) == FALSE && data_set->no_quorum_policy == no_quorum_freeze) { pe_rsc_trace(rsc, "Check resource is already active"); if (rsc->fns->active(rsc, TRUE) == FALSE) { pe_clear_action_bit(action, pe_action_runnable); pe_rsc_debug(rsc, "%s\t%s (cancelled : quorum freeze)", action->node->details->uname, action->uuid); } } else { pe_rsc_trace(rsc, "Action %s is runnable", action->uuid); pe_set_action_bit(action, pe_action_runnable); } if (save_action) { switch (a_task) { case stop_rsc: set_bit(rsc->flags, pe_rsc_stopping); break; case start_rsc: clear_bit(rsc->flags, pe_rsc_starting); if (is_set(action->flags, pe_action_runnable)) { set_bit(rsc->flags, pe_rsc_starting); } break; default: break; } } } free(key); return action; } static const char * unpack_operation_on_fail(action_t * action) { const char *value = g_hash_table_lookup(action->meta, XML_OP_ATTR_ON_FAIL); if (safe_str_eq(action->task, CRMD_ACTION_STOP) && safe_str_eq(value, "standby")) { crm_config_err("on-fail=standby is not allowed for stop actions: %s", action->rsc->id); return NULL; } else if (safe_str_eq(action->task, CRMD_ACTION_DEMOTE) && !value) { /* demote on_fail defaults to master monitor value if present */ xmlNode *operation = NULL; const char *name = NULL; const char *role = NULL; const char *on_fail = NULL; const char *interval = NULL; const char *enabled = NULL; CRM_CHECK(action->rsc != NULL, return NULL); for (operation = __xml_first_child(action->rsc->ops_xml); operation && !value; operation = __xml_next(operation)) { if (!crm_str_eq((const char *)operation->name, "op", TRUE)) { continue; } name = crm_element_value(operation, "name"); role = crm_element_value(operation, "role"); on_fail = crm_element_value(operation, XML_OP_ATTR_ON_FAIL); enabled = crm_element_value(operation, "enabled"); interval = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); if (!on_fail) { continue; } else if (enabled && !crm_is_true(enabled)) { continue; } else if (safe_str_neq(name, "monitor") || safe_str_neq(role, "Master")) { continue; } else if (crm_get_interval(interval) <= 0) { continue; } value = on_fail; } } return value; } static xmlNode * find_min_interval_mon(resource_t * rsc, gboolean include_disabled) { int number = 0; int min_interval = -1; const char *name = NULL; const char *value = NULL; const char *interval = NULL; xmlNode *op = NULL; xmlNode *operation = NULL; for (operation = __xml_first_child(rsc->ops_xml); operation != NULL; operation = __xml_next(operation)) { if (crm_str_eq((const char *)operation->name, "op", TRUE)) { name = crm_element_value(operation, "name"); interval = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); value = crm_element_value(operation, "enabled"); if (!include_disabled && value && crm_is_true(value) == FALSE) { continue; } if (safe_str_neq(name, RSC_STATUS)) { continue; } number = crm_get_interval(interval); if (number < 0) { continue; } if (min_interval < 0 || number < min_interval) { min_interval = number; op = operation; } } } return op; } void unpack_operation(action_t * action, xmlNode * xml_obj, resource_t * container, pe_working_set_t * data_set) { int value_i = 0; unsigned long long interval = 0; unsigned long long start_delay = 0; char *value_ms = NULL; const char *value = NULL; const char *field = NULL; CRM_CHECK(action->rsc != NULL, return); unpack_instance_attributes(data_set->input, data_set->op_defaults, XML_TAG_META_SETS, NULL, action->meta, NULL, FALSE, data_set->now); if (xml_obj) { xmlAttrPtr xIter = NULL; for (xIter = xml_obj->properties; xIter; xIter = xIter->next) { const char *prop_name = (const char *)xIter->name; const char *prop_value = crm_element_value(xml_obj, prop_name); g_hash_table_replace(action->meta, strdup(prop_name), strdup(prop_value)); } } unpack_instance_attributes(data_set->input, xml_obj, XML_TAG_META_SETS, NULL, action->meta, NULL, FALSE, data_set->now); unpack_instance_attributes(data_set->input, xml_obj, XML_TAG_ATTR_SETS, NULL, action->meta, NULL, FALSE, data_set->now); g_hash_table_remove(action->meta, "id"); /* Begin compatability code */ value = g_hash_table_lookup(action->meta, "requires"); if (safe_str_neq(action->task, RSC_START) && safe_str_neq(action->task, RSC_PROMOTE)) { action->needs = rsc_req_nothing; value = "nothing (not start/promote)"; } else if (safe_str_eq(value, "nothing")) { action->needs = rsc_req_nothing; } else if (safe_str_eq(value, "quorum")) { action->needs = rsc_req_quorum; } else if (safe_str_eq(value, "unfencing")) { action->needs = rsc_req_stonith; set_bit(action->rsc->flags, pe_rsc_needs_unfencing); if (is_set(data_set->flags, pe_flag_stonith_enabled)) { crm_notice("%s requires (un)fencing but fencing is disabled", action->rsc->id); } } else if (is_set(data_set->flags, pe_flag_stonith_enabled) && safe_str_eq(value, "fencing")) { action->needs = rsc_req_stonith; if (is_set(data_set->flags, pe_flag_stonith_enabled)) { crm_notice("%s requires fencing but fencing is disabled", action->rsc->id); } /* End compatability code */ } else if (is_set(action->rsc->flags, pe_rsc_needs_fencing)) { action->needs = rsc_req_stonith; value = "fencing (resource)"; } else if (is_set(action->rsc->flags, pe_rsc_needs_quorum)) { action->needs = rsc_req_quorum; value = "quorum (resource)"; } else { action->needs = rsc_req_nothing; value = "nothing (resource)"; } pe_rsc_trace(action->rsc, "\tAction %s requires: %s", action->task, value); value = unpack_operation_on_fail(action); if (value == NULL) { } else if (safe_str_eq(value, "block")) { action->on_fail = action_fail_block; } else if (safe_str_eq(value, "fence")) { action->on_fail = action_fail_fence; value = "node fencing"; if (is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE) { crm_config_err("Specifying on_fail=fence and" " stonith-enabled=false makes no sense"); action->on_fail = action_fail_stop; action->fail_role = RSC_ROLE_STOPPED; value = "stop resource"; } } else if (safe_str_eq(value, "standby")) { action->on_fail = action_fail_standby; value = "node standby"; } else if (safe_str_eq(value, "ignore") || safe_str_eq(value, "nothing")) { action->on_fail = action_fail_ignore; value = "ignore"; } else if (safe_str_eq(value, "migrate")) { action->on_fail = action_fail_migrate; value = "force migration"; } else if (safe_str_eq(value, "stop")) { action->on_fail = action_fail_stop; action->fail_role = RSC_ROLE_STOPPED; value = "stop resource"; } else if (safe_str_eq(value, "restart")) { action->on_fail = action_fail_recover; value = "restart (and possibly migrate)"; } else if (safe_str_eq(value, "restart-container")) { if (container) { action->on_fail = action_fail_restart_container; value = "restart container (and possibly migrate)"; } else { value = NULL; } } else { pe_err("Resource %s: Unknown failure type (%s)", action->rsc->id, value); value = NULL; } /* defaults */ if (value == NULL && container) { action->on_fail = action_fail_restart_container; value = "restart container (and possibly migrate) (default)"; } else if (value == NULL && safe_str_eq(action->task, CRMD_ACTION_STOP)) { if (is_set(data_set->flags, pe_flag_stonith_enabled)) { action->on_fail = action_fail_fence; value = "resource fence (default)"; } else { action->on_fail = action_fail_block; value = "resource block (default)"; } } else if (value == NULL) { action->on_fail = action_fail_recover; value = "restart (and possibly migrate) (default)"; } pe_rsc_trace(action->rsc, "\t%s failure handling: %s", action->task, value); value = NULL; if (xml_obj != NULL) { value = g_hash_table_lookup(action->meta, "role_after_failure"); } if (value != NULL && action->fail_role == RSC_ROLE_UNKNOWN) { action->fail_role = text2role(value); } /* defaults */ if (action->fail_role == RSC_ROLE_UNKNOWN) { if (safe_str_eq(action->task, CRMD_ACTION_PROMOTE)) { action->fail_role = RSC_ROLE_SLAVE; } else { action->fail_role = RSC_ROLE_STARTED; } } pe_rsc_trace(action->rsc, "\t%s failure results in: %s", action->task, role2text(action->fail_role)); field = XML_LRM_ATTR_INTERVAL; value = g_hash_table_lookup(action->meta, field); if (value != NULL) { interval = crm_get_interval(value); if (interval > 0) { value_ms = crm_itoa(interval); g_hash_table_replace(action->meta, strdup(field), value_ms); } else { g_hash_table_remove(action->meta, field); } } field = XML_OP_ATTR_START_DELAY; value = g_hash_table_lookup(action->meta, field); if (value != NULL) { value_i = crm_get_msec(value); if (value_i < 0) { value_i = 0; } start_delay = value_i; value_ms = crm_itoa(value_i); g_hash_table_replace(action->meta, strdup(field), value_ms); } else if (interval > 0 && g_hash_table_lookup(action->meta, XML_OP_ATTR_ORIGIN)) { crm_time_t *origin = NULL; value = g_hash_table_lookup(action->meta, XML_OP_ATTR_ORIGIN); origin = crm_time_new(value); if (origin == NULL) { crm_config_err("Operation %s contained an invalid " XML_OP_ATTR_ORIGIN ": %s", ID(xml_obj), value); } else { crm_time_t *delay = NULL; int rc = crm_time_compare(origin, data_set->now); unsigned long long delay_s = 0; while (rc < 0) { crm_time_add_seconds(origin, interval / 1000); rc = crm_time_compare(origin, data_set->now); } delay = crm_time_subtract(origin, data_set->now); delay_s = crm_time_get_seconds(delay); start_delay = delay_s * 1000; crm_info("Calculated a start delay of %llus for %s", delay_s, ID(xml_obj)); g_hash_table_replace(action->meta, strdup(XML_OP_ATTR_START_DELAY), crm_itoa(start_delay)); crm_time_free(origin); crm_time_free(delay); } } field = XML_ATTR_TIMEOUT; value = g_hash_table_lookup(action->meta, field); if (value == NULL && xml_obj == NULL && safe_str_eq(action->task, RSC_STATUS) && interval == 0) { xmlNode *min_interval_mon = find_min_interval_mon(action->rsc, FALSE); if (min_interval_mon) { value = crm_element_value(min_interval_mon, XML_ATTR_TIMEOUT); pe_rsc_trace(action->rsc, "\t%s uses the timeout value '%s' from the minimum interval monitor", action->uuid, value); } } if (value == NULL) { value = pe_pref(data_set->config_hash, "default-action-timeout"); } value_i = crm_get_msec(value); if (value_i < 0) { value_i = 0; } value_i += start_delay; value_ms = crm_itoa(value_i); g_hash_table_replace(action->meta, strdup(field), value_ms); } static xmlNode * find_rsc_op_entry_helper(resource_t * rsc, const char *key, gboolean include_disabled) { unsigned long long number = 0; gboolean do_retry = TRUE; char *local_key = NULL; const char *name = NULL; const char *value = NULL; const char *interval = NULL; char *match_key = NULL; xmlNode *op = NULL; xmlNode *operation = NULL; retry: for (operation = __xml_first_child(rsc->ops_xml); operation != NULL; operation = __xml_next(operation)) { if (crm_str_eq((const char *)operation->name, "op", TRUE)) { name = crm_element_value(operation, "name"); interval = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); value = crm_element_value(operation, "enabled"); if (!include_disabled && value && crm_is_true(value) == FALSE) { continue; } number = crm_get_interval(interval); if (number < 0) { continue; } match_key = generate_op_key(rsc->id, name, number); if (safe_str_eq(key, match_key)) { op = operation; } free(match_key); if (rsc->clone_name) { match_key = generate_op_key(rsc->clone_name, name, number); if (safe_str_eq(key, match_key)) { op = operation; } free(match_key); } if (op != NULL) { free(local_key); return op; } } } free(local_key); if (do_retry == FALSE) { return NULL; } do_retry = FALSE; if (strstr(key, CRMD_ACTION_MIGRATE) || strstr(key, CRMD_ACTION_MIGRATED)) { local_key = generate_op_key(rsc->id, "migrate", 0); key = local_key; goto retry; } else if (strstr(key, "_notify_")) { local_key = generate_op_key(rsc->id, "notify", 0); key = local_key; goto retry; } return NULL; } xmlNode * find_rsc_op_entry(resource_t * rsc, const char *key) { return find_rsc_op_entry_helper(rsc, key, FALSE); } void print_node(const char *pre_text, node_t * node, gboolean details) { if (node == NULL) { crm_trace("%s%s: ", pre_text == NULL ? "" : pre_text, pre_text == NULL ? "" : ": "); return; } crm_trace("%s%s%sNode %s: (weight=%d, fixed=%s)", pre_text == NULL ? "" : pre_text, pre_text == NULL ? "" : ": ", node->details == NULL ? "error " : node->details->online ? "" : "Unavailable/Unclean ", node->details->uname, node->weight, node->fixed ? "True" : "False"); if (details && node != NULL && node->details != NULL) { char *pe_mutable = strdup("\t\t"); GListPtr gIter = node->details->running_rsc; crm_trace("\t\t===Node Attributes"); g_hash_table_foreach(node->details->attrs, print_str_str, pe_mutable); free(pe_mutable); crm_trace("\t\t=== Resources"); for (; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; print_resource(LOG_DEBUG_4, "\t\t", rsc, FALSE); } } } /* * Used by the HashTable for-loop */ void print_str_str(gpointer key, gpointer value, gpointer user_data) { crm_trace("%s%s %s ==> %s", user_data == NULL ? "" : (char *)user_data, user_data == NULL ? "" : ": ", (char *)key, (char *)value); } void print_resource(int log_level, const char *pre_text, resource_t * rsc, gboolean details) { long options = pe_print_log; if (rsc == NULL) { do_crm_log(log_level - 1, "%s%s: ", pre_text == NULL ? "" : pre_text, pre_text == NULL ? "" : ": "); return; } if (details) { options |= pe_print_details; } rsc->fns->print(rsc, pre_text, options, &log_level); } void pe_free_action(action_t * action) { if (action == NULL) { return; } g_list_free_full(action->actions_before, free); /* action_warpper_t* */ g_list_free_full(action->actions_after, free); /* action_warpper_t* */ if (action->extra) { g_hash_table_destroy(action->extra); } if (action->meta) { g_hash_table_destroy(action->meta); } free(action->task); free(action->uuid); free(action->node); free(action); } GListPtr find_recurring_actions(GListPtr input, node_t * not_on_node) { const char *value = NULL; GListPtr result = NULL; GListPtr gIter = input; CRM_CHECK(input != NULL, return NULL); for (; gIter != NULL; gIter = gIter->next) { action_t *action = (action_t *) gIter->data; value = g_hash_table_lookup(action->meta, XML_LRM_ATTR_INTERVAL); if (value == NULL) { /* skip */ } else if (safe_str_eq(value, "0")) { /* skip */ } else if (safe_str_eq(CRMD_ACTION_CANCEL, action->task)) { /* skip */ } else if (not_on_node == NULL) { crm_trace("(null) Found: %s", action->uuid); result = g_list_prepend(result, action); } else if (action->node == NULL) { /* skip */ } else if (action->node->details != not_on_node->details) { crm_trace("Found: %s", action->uuid); result = g_list_prepend(result, action); } } return result; } enum action_tasks get_complex_task(resource_t * rsc, const char *name, gboolean allow_non_atomic) { enum action_tasks task = text2task(name); if (rsc == NULL) { return task; } else if (allow_non_atomic == FALSE || rsc->variant == pe_native) { switch (task) { case stopped_rsc: case started_rsc: case action_demoted: case action_promoted: crm_trace("Folding %s back into its atomic counterpart for %s", name, rsc->id); return task - 1; break; default: break; } } return task; } action_t * find_first_action(GListPtr input, const char *uuid, const char *task, node_t * on_node) { GListPtr gIter = NULL; CRM_CHECK(uuid || task, return NULL); for (gIter = input; gIter != NULL; gIter = gIter->next) { action_t *action = (action_t *) gIter->data; if (uuid != NULL && safe_str_neq(uuid, action->uuid)) { continue; } else if (task != NULL && safe_str_neq(task, action->task)) { continue; } else if (on_node == NULL) { return action; } else if (action->node == NULL) { continue; } else if (on_node->details == action->node->details) { return action; } } return NULL; } GListPtr find_actions(GListPtr input, const char *key, node_t * on_node) { GListPtr gIter = input; GListPtr result = NULL; CRM_CHECK(key != NULL, return NULL); for (; gIter != NULL; gIter = gIter->next) { action_t *action = (action_t *) gIter->data; crm_trace("Matching %s against %s", key, action->uuid); if (safe_str_neq(key, action->uuid)) { continue; } else if (on_node == NULL) { result = g_list_prepend(result, action); } else if (action->node == NULL) { /* skip */ crm_trace("While looking for %s action on %s, " "found an unallocated one. Assigning" " it to the requested node...", key, on_node->details->uname); action->node = node_copy(on_node); result = g_list_prepend(result, action); } else if (on_node->details == action->node->details) { result = g_list_prepend(result, action); } } return result; } GListPtr find_actions_exact(GListPtr input, const char *key, node_t * on_node) { GListPtr gIter = input; GListPtr result = NULL; CRM_CHECK(key != NULL, return NULL); for (; gIter != NULL; gIter = gIter->next) { action_t *action = (action_t *) gIter->data; crm_trace("Matching %s against %s", key, action->uuid); if (safe_str_neq(key, action->uuid)) { crm_trace("Key mismatch: %s vs. %s", key, action->uuid); continue; } else if (on_node == NULL || action->node == NULL) { crm_trace("on_node=%p, action->node=%p", on_node, action->node); continue; } else if (safe_str_eq(on_node->details->id, action->node->details->id)) { result = g_list_prepend(result, action); } crm_trace("Node mismatch: %s vs. %s", on_node->details->id, action->node->details->id); } return result; } static void resource_node_score(resource_t * rsc, node_t * node, int score, const char *tag) { node_t *match = NULL; if (rsc->children) { GListPtr gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; resource_node_score(child_rsc, node, score, tag); } } pe_rsc_trace(rsc, "Setting %s for %s on %s: %d", tag, rsc->id, node->details->uname, score); match = pe_hash_table_lookup(rsc->allowed_nodes, node->details->id); if (match == NULL) { match = node_copy(node); match->weight = merge_weights(score, node->weight); g_hash_table_insert(rsc->allowed_nodes, (gpointer) match->details->id, match); } match->weight = merge_weights(match->weight, score); } void resource_location(resource_t * rsc, node_t * node, int score, const char *tag, pe_working_set_t * data_set) { if (node != NULL) { resource_node_score(rsc, node, score, tag); } else if (data_set != NULL) { GListPtr gIter = data_set->nodes; for (; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; resource_node_score(rsc, node, score, tag); } } else { GHashTableIter iter; node_t *node = NULL; g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { resource_node_score(rsc, node, score, tag); } } if (node == NULL && score == -INFINITY) { if (rsc->allocated_to) { crm_info("Deallocating %s from %s", rsc->id, rsc->allocated_to->details->uname); free(rsc->allocated_to); rsc->allocated_to = NULL; } } } #define sort_return(an_int, why) do { \ free(a_uuid); \ free(b_uuid); \ crm_trace("%s (%d) %c %s (%d) : %s", \ a_xml_id, a_call_id, an_int>0?'>':an_int<0?'<':'=', \ b_xml_id, b_call_id, why); \ return an_int; \ } while(0) gint sort_op_by_callid(gconstpointer a, gconstpointer b) { int a_call_id = -1; int b_call_id = -1; char *a_uuid = NULL; char *b_uuid = NULL; const xmlNode *xml_a = a; const xmlNode *xml_b = b; const char *a_xml_id = crm_element_value_const(xml_a, XML_ATTR_ID); const char *b_xml_id = crm_element_value_const(xml_b, XML_ATTR_ID); if (safe_str_eq(a_xml_id, b_xml_id)) { /* We have duplicate lrm_rsc_op entries in the status * section which is unliklely to be a good thing * - we can handle it easily enough, but we need to get * to the bottom of why its happening. */ pe_err("Duplicate lrm_rsc_op entries named %s", a_xml_id); sort_return(0, "duplicate"); } crm_element_value_const_int(xml_a, XML_LRM_ATTR_CALLID, &a_call_id); crm_element_value_const_int(xml_b, XML_LRM_ATTR_CALLID, &b_call_id); if (a_call_id == -1 && b_call_id == -1) { /* both are pending ops so it doesnt matter since * stops are never pending */ sort_return(0, "pending"); } else if (a_call_id >= 0 && a_call_id < b_call_id) { sort_return(-1, "call id"); } else if (b_call_id >= 0 && a_call_id > b_call_id) { sort_return(1, "call id"); } else if (b_call_id >= 0 && a_call_id == b_call_id) { /* * The op and last_failed_op are the same * Order on last-rc-change */ int last_a = -1; int last_b = -1; crm_element_value_const_int(xml_a, XML_RSC_OP_LAST_CHANGE, &last_a); crm_element_value_const_int(xml_b, XML_RSC_OP_LAST_CHANGE, &last_b); crm_trace("rc-change: %d vs %d", last_a, last_b); if (last_a >= 0 && last_a < last_b) { sort_return(-1, "rc-change"); } else if (last_b >= 0 && last_a > last_b) { sort_return(1, "rc-change"); } sort_return(0, "rc-change"); } else { /* One of the inputs is a pending operation * Attempt to use XML_ATTR_TRANSITION_MAGIC to determine its age relative to the other */ int a_id = -1; int b_id = -1; int dummy = -1; const char *a_magic = crm_element_value_const(xml_a, XML_ATTR_TRANSITION_MAGIC); const char *b_magic = crm_element_value_const(xml_b, XML_ATTR_TRANSITION_MAGIC); CRM_CHECK(a_magic != NULL && b_magic != NULL, sort_return(0, "No magic")); if(!decode_transition_magic(a_magic, &a_uuid, &a_id, &dummy, &dummy, &dummy, &dummy)) { sort_return(0, "bad magic a"); } if(!decode_transition_magic(b_magic, &b_uuid, &b_id, &dummy, &dummy, &dummy, &dummy)) { sort_return(0, "bad magic b"); } /* try and determin the relative age of the operation... * some pending operations (ie. a start) may have been supuerceeded * by a subsequent stop * * [a|b]_id == -1 means its a shutdown operation and _always_ comes last */ if (safe_str_neq(a_uuid, b_uuid) || a_id == b_id) { /* * some of the logic in here may be redundant... * * if the UUID from the TE doesnt match then one better * be a pending operation. * pending operations dont survive between elections and joins * because we query the LRM directly */ if (b_call_id == -1) { sort_return(-1, "transition + call"); } else if (a_call_id == -1) { sort_return(1, "transition + call"); } } else if ((a_id >= 0 && a_id < b_id) || b_id == -1) { sort_return(-1, "transition"); } else if ((b_id >= 0 && a_id > b_id) || a_id == -1) { sort_return(1, "transition"); } } /* we should never end up here */ CRM_CHECK(FALSE, sort_return(0, "default")); } time_t get_effective_time(pe_working_set_t * data_set) { if(data_set) { if (data_set->now == NULL) { crm_trace("Recording a new 'now'"); data_set->now = crm_time_new(NULL); } return crm_time_get_seconds_since_epoch(data_set->now); } crm_trace("Defaulting to 'now'"); return time(NULL); } struct fail_search { resource_t *rsc; int count; long long last; char *key; }; static void get_failcount_by_prefix(gpointer key_p, gpointer value, gpointer user_data) { struct fail_search *search = user_data; const char *key = key_p; const char *match = strstr(key, search->key); if (match) { if (strstr(key, "last-failure-") == key && (key + 13) == match) { search->last = crm_int_helper(value, NULL); } else if (strstr(key, "fail-count-") == key && (key + 11) == match) { search->count += char2score(value); } } } int get_failcount(node_t * node, resource_t * rsc, time_t *last_failure, pe_working_set_t * data_set) { return get_failcount_full(node, rsc, last_failure, TRUE, data_set); } int get_failcount_full(node_t * node, resource_t * rsc, time_t *last_failure, bool effective, pe_working_set_t * data_set) { char *key = NULL; const char *value = NULL; struct fail_search search = { rsc, 0, 0, NULL }; /* Optimize the "normal" case */ key = crm_concat("fail-count", rsc->clone_name ? rsc->clone_name : rsc->id, '-'); value = g_hash_table_lookup(node->details->attrs, key); search.count = char2score(value); crm_trace("%s = %s", key, value); free(key); if (value) { key = crm_concat("last-failure", rsc->clone_name ? rsc->clone_name : rsc->id, '-'); value = g_hash_table_lookup(node->details->attrs, key); search.last = crm_int_helper(value, NULL); free(key); /* This block is still relevant once we omit anonymous instance numbers * because stopped clones wont have clone_name set */ } else if (is_not_set(rsc->flags, pe_rsc_unique)) { search.rsc = uber_parent(rsc); search.key = clone_strip(rsc->id); g_hash_table_foreach(node->details->attrs, get_failcount_by_prefix, &search); free(search.key); } if (search.count != 0 && search.last != 0 && last_failure) { *last_failure = search.last; } if(search.count && rsc->failure_timeout) { /* Never time-out if blocking failures are configured */ char *xml_name = clone_strip(rsc->id); char *xpath = g_strdup_printf("//primitive[@id='%s']//op[@on-fail='block']", xml_name); xmlXPathObject *xpathObj = xpath_search(rsc->xml, xpath); free(xml_name); free(xpath); if (numXpathResults(xpathObj) > 0) { xmlNode *pref = getXpathResult(xpathObj, 0); pe_warn("Setting %s.failure_timeout=%d in %s conflicts with on-fail=block: ignoring timeout", rsc->id, rsc->failure_timeout, ID(pref)); rsc->failure_timeout = 0; #if 0 /* A good idea? */ } else if (rsc->container == NULL && is_not_set(data_set->flags, pe_flag_stonith_enabled)) { /* In this case, stop.on-fail defaults to block in unpack_operation() */ rsc->failure_timeout = 0; #endif } freeXpathObject(xpathObj); } if (effective && search.count != 0 && search.last != 0 && rsc->failure_timeout) { if (search.last > 0) { time_t now = get_effective_time(data_set); if (now > (search.last + rsc->failure_timeout)) { crm_debug("Failcount for %s on %s has expired (limit was %ds)", search.rsc->id, node->details->uname, rsc->failure_timeout); search.count = 0; } } } if (search.count != 0) { char *score = score2char(search.count); crm_info("%s has failed %s times on %s", search.rsc->id, score, node->details->uname); free(score); } return search.count; } /* If it's a resource container, get its failcount plus all the failcounts of the resources within it */ int get_failcount_all(node_t * node, resource_t * rsc, time_t *last_failure, pe_working_set_t * data_set) { int failcount_all = 0; failcount_all = get_failcount(node, rsc, last_failure, data_set); if (rsc->fillers) { GListPtr gIter = NULL; for (gIter = rsc->fillers; gIter != NULL; gIter = gIter->next) { resource_t *filler = (resource_t *) gIter->data; time_t filler_last_failure = 0; failcount_all += get_failcount(node, filler, &filler_last_failure, data_set); if (last_failure && filler_last_failure > *last_failure) { *last_failure = filler_last_failure; } } if (failcount_all != 0) { char *score = score2char(failcount_all); crm_info("Container %s and the resources within it have failed %s times on %s", rsc->id, score, node->details->uname); free(score); } } return failcount_all; } gboolean get_target_role(resource_t * rsc, enum rsc_role_e * role) { enum rsc_role_e local_role = RSC_ROLE_UNKNOWN; const char *value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET_ROLE); CRM_CHECK(role != NULL, return FALSE); if (value == NULL || safe_str_eq("started", value) || safe_str_eq("default", value)) { return FALSE; } local_role = text2role(value); if (local_role == RSC_ROLE_UNKNOWN) { crm_config_err("%s: Unknown value for %s: %s", rsc->id, XML_RSC_ATTR_TARGET_ROLE, value); return FALSE; } else if (local_role > RSC_ROLE_STARTED) { if (uber_parent(rsc)->variant == pe_master) { if (local_role > RSC_ROLE_SLAVE) { /* This is what we'd do anyway, just leave the default to avoid messing up the placement algorithm */ return FALSE; } } else { crm_config_err("%s is not part of a master/slave resource, a %s of '%s' makes no sense", rsc->id, XML_RSC_ATTR_TARGET_ROLE, value); return FALSE; } } *role = local_role; return TRUE; } gboolean order_actions(action_t * lh_action, action_t * rh_action, enum pe_ordering order) { GListPtr gIter = NULL; action_wrapper_t *wrapper = NULL; GListPtr list = NULL; if (order == pe_order_none) { return FALSE; } if (lh_action == NULL || rh_action == NULL) { return FALSE; } crm_trace("Ordering Action %s before %s", lh_action->uuid, rh_action->uuid); /* Filter dups, otherwise update_action_states() has too much work to do */ gIter = lh_action->actions_after; for (; gIter != NULL; gIter = gIter->next) { action_wrapper_t *after = (action_wrapper_t *) gIter->data; if (after->action == rh_action && (after->type & order)) { return FALSE; } } wrapper = calloc(1, sizeof(action_wrapper_t)); wrapper->action = rh_action; wrapper->type = order; list = lh_action->actions_after; list = g_list_prepend(list, wrapper); lh_action->actions_after = list; wrapper = NULL; /* order |= pe_order_implies_then; */ /* order ^= pe_order_implies_then; */ wrapper = calloc(1, sizeof(action_wrapper_t)); wrapper->action = lh_action; wrapper->type = order; list = rh_action->actions_before; list = g_list_prepend(list, wrapper); rh_action->actions_before = list; return TRUE; } action_t * get_pseudo_op(const char *name, pe_working_set_t * data_set) { action_t *op = NULL; const char *op_s = name; GListPtr possible_matches = NULL; possible_matches = find_actions(data_set->actions, name, NULL); if (possible_matches != NULL) { if (g_list_length(possible_matches) > 1) { pe_warn("Action %s exists %d times", name, g_list_length(possible_matches)); } op = g_list_nth_data(possible_matches, 0); g_list_free(possible_matches); } else { op = custom_action(NULL, strdup(op_s), op_s, NULL, TRUE, TRUE, data_set); set_bit(op->flags, pe_action_pseudo); set_bit(op->flags, pe_action_runnable); } return op; } void destroy_ticket(gpointer data) { ticket_t *ticket = data; if (ticket->state) { g_hash_table_destroy(ticket->state); } free(ticket->id); free(ticket); } ticket_t * ticket_new(const char *ticket_id, pe_working_set_t * data_set) { ticket_t *ticket = NULL; if (ticket_id == NULL || strlen(ticket_id) == 0) { return NULL; } if (data_set->tickets == NULL) { data_set->tickets = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, destroy_ticket); } ticket = g_hash_table_lookup(data_set->tickets, ticket_id); if (ticket == NULL) { ticket = calloc(1, sizeof(ticket_t)); if (ticket == NULL) { crm_err("Cannot allocate ticket '%s'", ticket_id); return NULL; } crm_trace("Creaing ticket entry for %s", ticket_id); ticket->id = strdup(ticket_id); ticket->granted = FALSE; ticket->last_granted = -1; ticket->standby = FALSE; ticket->state = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); g_hash_table_insert(data_set->tickets, strdup(ticket->id), ticket); } return ticket; } op_digest_cache_t * rsc_action_digest_cmp(resource_t * rsc, xmlNode * xml_op, node_t * node, pe_working_set_t * data_set) { op_digest_cache_t *data = NULL; GHashTable *local_rsc_params = NULL; action_t *action = NULL; char *key = NULL; int interval = 0; const char *op_id = ID(xml_op); const char *interval_s = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL); const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); const char *digest_all; const char *digest_restart; const char *restart_list; const char *op_version; data = g_hash_table_lookup(node->details->digest_cache, op_id); if (data) { return data; } data = calloc(1, sizeof(op_digest_cache_t)); digest_all = crm_element_value(xml_op, XML_LRM_ATTR_OP_DIGEST); digest_restart = crm_element_value(xml_op, XML_LRM_ATTR_RESTART_DIGEST); restart_list = crm_element_value(xml_op, XML_LRM_ATTR_OP_RESTART); op_version = crm_element_value(xml_op, XML_ATTR_CRM_VERSION); /* key is freed in custom_action */ interval = crm_parse_int(interval_s, "0"); key = generate_op_key(rsc->id, task, interval); action = custom_action(rsc, key, task, node, TRUE, FALSE, data_set); key = NULL; local_rsc_params = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); get_rsc_attributes(local_rsc_params, rsc, node, data_set); data->params_all = create_xml_node(NULL, XML_TAG_PARAMS); g_hash_table_foreach(local_rsc_params, hash2field, data->params_all); g_hash_table_foreach(action->extra, hash2field, data->params_all); g_hash_table_foreach(rsc->parameters, hash2field, data->params_all); g_hash_table_foreach(action->meta, hash2metafield, data->params_all); filter_action_parameters(data->params_all, op_version); data->digest_all_calc = calculate_operation_digest(data->params_all, op_version); if (digest_restart) { data->params_restart = copy_xml(data->params_all); if (restart_list) { filter_reload_parameters(data->params_restart, restart_list); } data->digest_restart_calc = calculate_operation_digest(data->params_restart, op_version); } if (digest_restart && strcmp(data->digest_restart_calc, digest_restart) != 0) { data->rc = RSC_DIGEST_RESTART; } else if (digest_all == NULL) { /* it is unknown what the previous op digest was */ data->rc = RSC_DIGEST_UNKNOWN; } else if (strcmp(digest_all, data->digest_all_calc) != 0) { data->rc = RSC_DIGEST_ALL; } g_hash_table_insert(node->details->digest_cache, strdup(op_id), data); g_hash_table_destroy(local_rsc_params); pe_free_action(action); return data; } const char *rsc_printable_id(resource_t *rsc) { if (is_not_set(rsc->flags, pe_rsc_unique)) { return ID(rsc->xml); } return rsc->id; } gboolean is_baremetal_remote_node(node_t *node) { if (node->details->remote_rsc && (node->details->remote_rsc->container == FALSE)) { return TRUE; } return FALSE; } gboolean is_container_remote_node(node_t *node) { if (node->details->remote_rsc && node->details->remote_rsc->container) { return TRUE; } return FALSE; } gboolean is_remote_node(node_t *node) { if (node->details->remote_rsc) { return TRUE; } return FALSE; } -gboolean +resource_t * rsc_contains_remote_node(pe_working_set_t * data_set, resource_t *rsc) { if (is_set(data_set->flags, pe_flag_have_remote_nodes) == FALSE) { - return FALSE; + return NULL; } if (rsc->fillers) { GListPtr gIter = NULL; for (gIter = rsc->fillers; gIter != NULL; gIter = gIter->next) { resource_t *filler = (resource_t *) gIter->data; if (filler->is_remote_node) { - return TRUE; + return filler; } } } - return FALSE; + return NULL; } gboolean xml_contains_remote_node(xmlNode *xml) { const char *class = crm_element_value(xml, XML_AGENT_ATTR_CLASS); const char *provider = crm_element_value(xml, XML_AGENT_ATTR_PROVIDER); const char *agent = crm_element_value(xml, XML_ATTR_TYPE); if (safe_str_eq(agent, "remote") && safe_str_eq(provider, "pacemaker") && safe_str_eq(class, "ocf")) { return TRUE; } return FALSE; } diff --git a/pengine/allocate.c b/pengine/allocate.c index f3b0f62754..ebe6b66977 100644 --- a/pengine/allocate.c +++ b/pengine/allocate.c @@ -1,2490 +1,2490 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include CRM_TRACE_INIT_DATA(pe_allocate); void set_alloc_actions(pe_working_set_t * data_set); void migrate_reload_madness(pe_working_set_t * data_set); resource_alloc_functions_t resource_class_alloc_functions[] = { { native_merge_weights, native_color, native_create_actions, native_create_probe, native_internal_constraints, native_rsc_colocation_lh, native_rsc_colocation_rh, native_rsc_location, native_action_flags, native_update_actions, native_expand, native_append_meta, }, { group_merge_weights, group_color, group_create_actions, native_create_probe, group_internal_constraints, group_rsc_colocation_lh, group_rsc_colocation_rh, group_rsc_location, group_action_flags, group_update_actions, group_expand, group_append_meta, }, { clone_merge_weights, clone_color, clone_create_actions, clone_create_probe, clone_internal_constraints, clone_rsc_colocation_lh, clone_rsc_colocation_rh, clone_rsc_location, clone_action_flags, clone_update_actions, clone_expand, clone_append_meta, }, { master_merge_weights, master_color, master_create_actions, clone_create_probe, master_internal_constraints, clone_rsc_colocation_lh, master_rsc_colocation_rh, clone_rsc_location, clone_action_flags, clone_update_actions, clone_expand, master_append_meta, } }; static gboolean check_rsc_parameters(resource_t * rsc, node_t * node, xmlNode * rsc_entry, gboolean active_here, pe_working_set_t * data_set) { int attr_lpc = 0; gboolean force_restart = FALSE; gboolean delete_resource = FALSE; gboolean changed = FALSE; const char *value = NULL; const char *old_value = NULL; const char *attr_list[] = { XML_ATTR_TYPE, XML_AGENT_ATTR_CLASS, XML_AGENT_ATTR_PROVIDER }; for (; attr_lpc < DIMOF(attr_list); attr_lpc++) { value = crm_element_value(rsc->xml, attr_list[attr_lpc]); old_value = crm_element_value(rsc_entry, attr_list[attr_lpc]); if (value == old_value /* ie. NULL */ || crm_str_eq(value, old_value, TRUE)) { continue; } changed = TRUE; if (active_here) { force_restart = TRUE; crm_notice("Forcing restart of %s on %s, %s changed: %s -> %s", rsc->id, node->details->uname, attr_list[attr_lpc], crm_str(old_value), crm_str(value)); } } if (force_restart) { /* make sure the restart happens */ stop_action(rsc, node, FALSE); set_bit(rsc->flags, pe_rsc_start_pending); delete_resource = TRUE; } else if (changed) { delete_resource = TRUE; } return delete_resource; } static void CancelXmlOp(resource_t * rsc, xmlNode * xml_op, node_t * active_node, const char *reason, pe_working_set_t * data_set) { int interval = 0; action_t *cancel = NULL; char *key = NULL; const char *task = NULL; const char *call_id = NULL; const char *interval_s = NULL; CRM_CHECK(xml_op != NULL, return); CRM_CHECK(active_node != NULL, return); task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); call_id = crm_element_value(xml_op, XML_LRM_ATTR_CALLID); interval_s = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL); interval = crm_parse_int(interval_s, "0"); /* we need to reconstruct the key because of the way we used to construct resource IDs */ key = generate_op_key(rsc->id, task, interval); crm_info("Action %s on %s will be stopped: %s", key, active_node->details->uname, reason ? reason : "unknown"); cancel = custom_action(rsc, strdup(key), RSC_CANCEL, active_node, FALSE, TRUE, data_set); free(cancel->task); cancel->task = strdup(RSC_CANCEL); add_hash_param(cancel->meta, XML_LRM_ATTR_TASK, task); add_hash_param(cancel->meta, XML_LRM_ATTR_CALLID, call_id); add_hash_param(cancel->meta, XML_LRM_ATTR_INTERVAL, interval_s); custom_action_order(rsc, stop_key(rsc), NULL, rsc, NULL, cancel, pe_order_optional, data_set); free(key); key = NULL; } static gboolean check_action_definition(resource_t * rsc, node_t * active_node, xmlNode * xml_op, pe_working_set_t * data_set) { char *key = NULL; int interval = 0; const char *interval_s = NULL; const op_digest_cache_t *digest_data = NULL; gboolean did_change = FALSE; const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); const char *op_version; CRM_CHECK(active_node != NULL, return FALSE); if (safe_str_eq(task, RSC_STOP)) { return FALSE; } interval_s = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL); interval = crm_parse_int(interval_s, "0"); if (interval > 0) { xmlNode *op_match = NULL; /* we need to reconstruct the key because of the way we used to construct resource IDs */ key = generate_op_key(rsc->id, task, interval); pe_rsc_trace(rsc, "Checking parameters for %s", key); op_match = find_rsc_op_entry(rsc, key); if (op_match == NULL && is_set(data_set->flags, pe_flag_stop_action_orphans)) { CancelXmlOp(rsc, xml_op, active_node, "orphan", data_set); free(key); return TRUE; } else if (op_match == NULL) { pe_rsc_debug(rsc, "Orphan action detected: %s on %s", key, active_node->details->uname); free(key); return TRUE; } free(key); key = NULL; } if (interval == 0 && safe_str_eq(task, RSC_STATUS)) { /* Reload based on the start action not a probe */ task = RSC_START; } else if (interval == 0 && safe_str_eq(task, RSC_MIGRATED)) { /* Reload based on the start action not a migrate */ task = RSC_START; } digest_data = rsc_action_digest_cmp(rsc, xml_op, active_node, data_set); op_version = crm_element_value(xml_op, XML_ATTR_CRM_VERSION); /* Changes that force a restart */ if (digest_data->rc == RSC_DIGEST_RESTART) { const char *digest_restart = crm_element_value(xml_op, XML_LRM_ATTR_RESTART_DIGEST); did_change = TRUE; key = generate_op_key(rsc->id, task, interval); crm_log_xml_info(digest_data->params_restart, "params:restart"); pe_rsc_info(rsc, "Parameters to %s on %s changed: was %s vs. now %s (restart:%s) %s", key, active_node->details->uname, crm_str(digest_restart), digest_data->digest_restart_calc, op_version, crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC)); custom_action(rsc, key, task, NULL, FALSE, TRUE, data_set); } else if ((digest_data->rc == RSC_DIGEST_ALL) || (digest_data->rc == RSC_DIGEST_UNKNOWN)) { /* Changes that can potentially be handled by a reload */ const char *digest_restart = crm_element_value(xml_op, XML_LRM_ATTR_RESTART_DIGEST); const char *digest_all = crm_element_value(xml_op, XML_LRM_ATTR_OP_DIGEST); did_change = TRUE; crm_log_xml_info(digest_data->params_all, "params:reload"); key = generate_op_key(rsc->id, task, interval); pe_rsc_info(rsc, "Parameters to %s on %s changed: was %s vs. now %s (reload:%s) %s", key, active_node->details->uname, crm_str(digest_all), digest_data->digest_all_calc, op_version, crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC)); if (interval > 0) { action_t *op = NULL; #if 0 /* Always reload/restart the entire resource */ op = custom_action(rsc, start_key(rsc), RSC_START, NULL, FALSE, TRUE, data_set); update_action_flags(op, pe_action_allow_reload_conversion); #else /* Re-sending the recurring op is sufficient - the old one will be cancelled automatically */ op = custom_action(rsc, key, task, NULL, FALSE, TRUE, data_set); custom_action_order(rsc, start_key(rsc), NULL, NULL, NULL, op, pe_order_runnable_left, data_set); #endif } else if (digest_restart) { pe_rsc_trace(rsc, "Reloading '%s' action for resource %s", task, rsc->id); /* Allow this resource to reload - unless something else causes a full restart */ set_bit(rsc->flags, pe_rsc_try_reload); /* Create these for now, it keeps the action IDs the same in the regression outputs */ custom_action(rsc, key, task, NULL, TRUE, TRUE, data_set); } else { pe_rsc_trace(rsc, "Resource %s doesn't know how to reload", rsc->id); /* Re-send the start/demote/promote op * Recurring ops will be detected independantly */ custom_action(rsc, key, task, NULL, FALSE, TRUE, data_set); } } return did_change; } extern gboolean DeleteRsc(resource_t * rsc, node_t * node, gboolean optional, pe_working_set_t * data_set); static void check_actions_for(xmlNode * rsc_entry, resource_t * rsc, node_t * node, pe_working_set_t * data_set) { GListPtr gIter = NULL; int offset = -1; int interval = 0; int stop_index = 0; int start_index = 0; const char *task = NULL; const char *interval_s = NULL; xmlNode *rsc_op = NULL; GListPtr op_list = NULL; GListPtr sorted_op_list = NULL; gboolean is_probe = FALSE; gboolean did_change = FALSE; CRM_CHECK(node != NULL, return); if (is_set(rsc->flags, pe_rsc_orphan)) { resource_t *parent = uber_parent(rsc); if(parent == NULL || parent->variant < pe_clone || is_set(parent->flags, pe_rsc_unique)) { pe_rsc_trace(rsc, "Skipping param check for %s and deleting: orphan", rsc->id); DeleteRsc(rsc, node, FALSE, data_set); } else { pe_rsc_trace(rsc, "Skipping param check for %s (orphan clone)", rsc->id); } return; } else if (pe_find_node_id(rsc->running_on, node->details->id) == NULL) { if (check_rsc_parameters(rsc, node, rsc_entry, FALSE, data_set)) { DeleteRsc(rsc, node, FALSE, data_set); } pe_rsc_trace(rsc, "Skipping param check for %s: no longer active on %s", rsc->id, node->details->uname); return; } pe_rsc_trace(rsc, "Processing %s on %s", rsc->id, node->details->uname); if (check_rsc_parameters(rsc, node, rsc_entry, TRUE, data_set)) { DeleteRsc(rsc, node, FALSE, data_set); } for (rsc_op = __xml_first_child(rsc_entry); rsc_op != NULL; rsc_op = __xml_next(rsc_op)) { if (crm_str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, TRUE)) { op_list = g_list_prepend(op_list, rsc_op); } } sorted_op_list = g_list_sort(op_list, sort_op_by_callid); calculate_active_ops(sorted_op_list, &start_index, &stop_index); for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) { xmlNode *rsc_op = (xmlNode *) gIter->data; offset++; if (start_index < stop_index) { /* stopped */ continue; } else if (offset < start_index) { /* action occurred prior to a start */ continue; } is_probe = FALSE; did_change = FALSE; task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); interval_s = crm_element_value(rsc_op, XML_LRM_ATTR_INTERVAL); interval = crm_parse_int(interval_s, "0"); if (interval == 0 && safe_str_eq(task, RSC_STATUS)) { is_probe = TRUE; } if (interval > 0 && (is_set(data_set->flags, pe_flag_maintenance_mode) || node->details->maintenance)) { CancelXmlOp(rsc, rsc_op, node, "maintenance mode", data_set); } else if (is_probe || safe_str_eq(task, RSC_START) || interval > 0 || safe_str_eq(task, RSC_MIGRATED)) { did_change = check_action_definition(rsc, node, rsc_op, data_set); } if (did_change && get_failcount(node, rsc, NULL, data_set)) { char *key = NULL; action_t *action_clear = NULL; key = generate_op_key(rsc->id, CRM_OP_CLEAR_FAILCOUNT, 0); action_clear = custom_action(rsc, key, CRM_OP_CLEAR_FAILCOUNT, node, FALSE, TRUE, data_set); set_bit(action_clear->flags, pe_action_runnable); } } g_list_free(sorted_op_list); } static GListPtr find_rsc_list(GListPtr result, resource_t * rsc, const char *id, gboolean renamed_clones, gboolean partial, pe_working_set_t * data_set) { GListPtr gIter = NULL; gboolean match = FALSE; if (id == NULL) { return NULL; } else if (rsc == NULL && data_set) { for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { resource_t *child = (resource_t *) gIter->data; result = find_rsc_list(result, child, id, renamed_clones, partial, NULL); } return result; } else if (rsc == NULL) { return NULL; } if (partial) { if (strstr(rsc->id, id)) { match = TRUE; } else if (renamed_clones && rsc->clone_name && strstr(rsc->clone_name, id)) { match = TRUE; } } else { if (strcmp(rsc->id, id) == 0) { match = TRUE; } else if (renamed_clones && rsc->clone_name && strcmp(rsc->clone_name, id) == 0) { match = TRUE; } } if (match) { result = g_list_prepend(result, rsc); } if (rsc->children) { gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { resource_t *child = (resource_t *) gIter->data; result = find_rsc_list(result, child, id, renamed_clones, partial, NULL); } } return result; } static void check_actions(pe_working_set_t * data_set) { const char *id = NULL; node_t *node = NULL; xmlNode *lrm_rscs = NULL; xmlNode *status = get_object_root(XML_CIB_TAG_STATUS, data_set->input); xmlNode *node_state = NULL; for (node_state = __xml_first_child(status); node_state != NULL; node_state = __xml_next(node_state)) { if (crm_str_eq((const char *)node_state->name, XML_CIB_TAG_STATE, TRUE)) { id = crm_element_value(node_state, XML_ATTR_ID); lrm_rscs = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE); lrm_rscs = find_xml_node(lrm_rscs, XML_LRM_TAG_RESOURCES, FALSE); node = pe_find_node_id(data_set->nodes, id); if (node == NULL) { continue; /* Still need to check actions for a maintenance node to cancel existing monitor operations */ } else if (can_run_resources(node) == FALSE && node->details->maintenance == FALSE) { crm_trace("Skipping param check for %s: cant run resources", node->details->uname); continue; } crm_trace("Processing node %s", node->details->uname); if (node->details->online || is_set(data_set->flags, pe_flag_stonith_enabled)) { xmlNode *rsc_entry = NULL; for (rsc_entry = __xml_first_child(lrm_rscs); rsc_entry != NULL; rsc_entry = __xml_next(rsc_entry)) { if (crm_str_eq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE, TRUE)) { if (xml_has_children(rsc_entry)) { GListPtr gIter = NULL; GListPtr result = NULL; const char *rsc_id = ID(rsc_entry); CRM_CHECK(rsc_id != NULL, return); result = find_rsc_list(NULL, NULL, rsc_id, TRUE, FALSE, data_set); for (gIter = result; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; if (rsc->variant != pe_native) { continue; } check_actions_for(rsc_entry, rsc, node, data_set); } g_list_free(result); } } } } } } } static gboolean apply_placement_constraints(pe_working_set_t * data_set) { GListPtr gIter = NULL; crm_trace("Applying constraints..."); for (gIter = data_set->placement_constraints; gIter != NULL; gIter = gIter->next) { rsc_to_node_t *cons = (rsc_to_node_t *) gIter->data; cons->rsc_lh->cmds->rsc_location(cons->rsc_lh, cons); } return TRUE; } static gboolean failcount_clear_action_exists(node_t * node, resource_t * rsc) { gboolean rc = FALSE; char *key = crm_concat(rsc->id, CRM_OP_CLEAR_FAILCOUNT, '_'); GListPtr list = find_actions_exact(rsc->actions, key, node); if (list) { rc = TRUE; } g_list_free(list); free(key); return rc; } static void common_apply_stickiness(resource_t * rsc, node_t * node, pe_working_set_t * data_set) { int fail_count = 0; resource_t *failed = rsc; if (rsc->children) { GListPtr gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; common_apply_stickiness(child_rsc, node, data_set); } return; } if (is_set(rsc->flags, pe_rsc_managed) && rsc->stickiness != 0 && g_list_length(rsc->running_on) == 1) { node_t *current = pe_find_node_id(rsc->running_on, node->details->id); node_t *match = pe_hash_table_lookup(rsc->allowed_nodes, node->details->id); if (current == NULL) { } else if (match != NULL || is_set(data_set->flags, pe_flag_symmetric_cluster)) { resource_t *sticky_rsc = rsc; resource_location(sticky_rsc, node, rsc->stickiness, "stickiness", data_set); pe_rsc_debug(sticky_rsc, "Resource %s: preferring current location" " (node=%s, weight=%d)", sticky_rsc->id, node->details->uname, rsc->stickiness); } else { GHashTableIter iter; node_t *nIter = NULL; pe_rsc_debug(rsc, "Ignoring stickiness for %s: the cluster is asymmetric" " and node %s is not explicitly allowed", rsc->id, node->details->uname); g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&nIter)) { crm_err("%s[%s] = %d", rsc->id, nIter->details->uname, nIter->weight); } } } /* only check failcount here if a failcount clear action * has not already been placed for this resource on the node. * There is no sense in potentially forcing the rsc from this * node if the failcount is being reset anyway. */ if (failcount_clear_action_exists(node, rsc) == FALSE) { fail_count = get_failcount_all(node, rsc, NULL, data_set); } if (fail_count > 0 && rsc->migration_threshold != 0) { if (is_not_set(rsc->flags, pe_rsc_unique)) { failed = uber_parent(rsc); } if (rsc->migration_threshold <= fail_count) { resource_location(failed, node, -INFINITY, "__fail_limit__", data_set); crm_warn("Forcing %s away from %s after %d failures (max=%d)", failed->id, node->details->uname, fail_count, rsc->migration_threshold); } else { crm_info("%s can fail %d more times on %s before being forced off", failed->id, rsc->migration_threshold - fail_count, node->details->uname); } } } static void complex_set_cmds(resource_t * rsc) { GListPtr gIter = rsc->children; rsc->cmds = &resource_class_alloc_functions[rsc->variant]; for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; complex_set_cmds(child_rsc); } } void set_alloc_actions(pe_working_set_t * data_set) { GListPtr gIter = data_set->resources; for (; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; complex_set_cmds(rsc); } } static void calculate_system_health(gpointer gKey, gpointer gValue, gpointer user_data) { const char *key = (const char *)gKey; const char *value = (const char *)gValue; int *system_health = (int *)user_data; if (!gKey || !gValue || !user_data) { return; } /* Does it start with #health? */ if (0 == strncmp(key, "#health", 7)) { int score; /* Convert the value into an integer */ score = char2score(value); /* Add it to the running total */ *system_health = merge_weights(score, *system_health); } } static gboolean apply_system_health(pe_working_set_t * data_set) { GListPtr gIter = NULL; const char *health_strategy = pe_pref(data_set->config_hash, "node-health-strategy"); if (health_strategy == NULL || safe_str_eq(health_strategy, "none")) { /* Prevent any accidental health -> score translation */ node_score_red = 0; node_score_yellow = 0; node_score_green = 0; return TRUE; } else if (safe_str_eq(health_strategy, "migrate-on-red")) { /* Resources on nodes which have health values of red are * weighted away from that node. */ node_score_red = -INFINITY; node_score_yellow = 0; node_score_green = 0; } else if (safe_str_eq(health_strategy, "only-green")) { /* Resources on nodes which have health values of red or yellow * are forced away from that node. */ node_score_red = -INFINITY; node_score_yellow = -INFINITY; node_score_green = 0; } else if (safe_str_eq(health_strategy, "progressive")) { /* Same as the above, but use the r/y/g scores provided by the user * Defaults are provided by the pe_prefs table */ } else if (safe_str_eq(health_strategy, "custom")) { /* Requires the admin to configure the rsc_location constaints for * processing the stored health scores */ /* TODO: Check for the existance of appropriate node health constraints */ return TRUE; } else { crm_err("Unknown node health strategy: %s", health_strategy); return FALSE; } crm_info("Applying automated node health strategy: %s", health_strategy); for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { int system_health = 0; node_t *node = (node_t *) gIter->data; /* Search through the node hash table for system health entries. */ g_hash_table_foreach(node->details->attrs, calculate_system_health, &system_health); crm_info(" Node %s has an combined system health of %d", node->details->uname, system_health); /* If the health is non-zero, then create a new rsc2node so that the * weight will be added later on. */ if (system_health != 0) { GListPtr gIter2 = data_set->resources; for (; gIter2 != NULL; gIter2 = gIter2->next) { resource_t *rsc = (resource_t *) gIter2->data; rsc2node_new(health_strategy, rsc, system_health, node, data_set); } } } return TRUE; } gboolean stage0(pe_working_set_t * data_set) { xmlNode *cib_constraints = get_object_root(XML_CIB_TAG_CONSTRAINTS, data_set->input); if (data_set->input == NULL) { return FALSE; } if (is_set(data_set->flags, pe_flag_have_status) == FALSE) { crm_trace("Calculating status"); cluster_status(data_set); } set_alloc_actions(data_set); apply_system_health(data_set); unpack_constraints(cib_constraints, data_set); return TRUE; } static void wait_for_probe(resource_t * rsc, const char *action, action_t * probe_complete, pe_working_set_t * data_set) { if (probe_complete == NULL) { return; } if (rsc->children) { GListPtr gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { resource_t *child = (resource_t *) gIter->data; wait_for_probe(child, action, probe_complete, data_set); } } else { char *key = NULL; if (safe_str_eq(action, RSC_STOP) && g_list_length(rsc->running_on) == 1) { node_t *node = (node_t *) rsc->running_on->data; /* Stop actions on nodes that are shutting down do not need to wait for probes to complete * Doing so prevents node shutdown in the presence of nodes that are coming up * The purpose of waiting is to not stop resources until we know for sure the * intended destination is able to take them */ if (node && node->details->shutdown) { crm_debug("Skipping %s before %s_%s_0 due to %s shutdown", probe_complete->uuid, rsc->id, action, node->details->uname); return; } } key = generate_op_key(rsc->id, action, 0); custom_action_order(NULL, NULL, probe_complete, rsc, key, NULL, pe_order_optional, data_set); } } /* * Check nodes for resources started outside of the LRM */ gboolean probe_resources(pe_working_set_t * data_set) { action_t *probe_complete = NULL; action_t *probe_node_complete = NULL; action_t *probe_cluster_nodes_complete = NULL; GListPtr gIter = NULL; GListPtr gIter2 = NULL; gIter = data_set->nodes; for (; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; const char *probed = g_hash_table_lookup(node->details->attrs, CRM_OP_PROBED); if (node->details->online == FALSE) { continue; } else if (node->details->unclean) { continue; } else if (is_remote_node(node) && node->details->shutdown) { /* Don't try and probe a remote node we're shutting down. * It causes constraint conflicts to try and run any sort of action * other that 'stop' on resources living within a remote-node when * it is being shutdown. */ continue; - } else if (is_container_remote_node(node) && is_not_set(data_set->flags, pe_flag_container_probes)) { - /* skip container nodes if container node probes are disabled */ + } else if (is_container_remote_node(node)) { + /* TODO enable container node probes once ordered probing is implemented. */ continue; } else if (probe_complete == NULL) { probe_complete = get_pseudo_op(CRM_OP_PROBED, data_set); if (is_set(data_set->flags, pe_flag_have_remote_nodes)) { probe_cluster_nodes_complete = get_pseudo_op(CRM_OP_NODES_PROBED, data_set); } } if (probed != NULL && crm_is_true(probed) == FALSE) { action_t *probe_op = custom_action(NULL, strdup(CRM_OP_REPROBE), CRM_OP_REPROBE, node, FALSE, TRUE, data_set); add_hash_param(probe_op->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE); continue; } probe_node_complete = custom_action(NULL, strdup(CRM_OP_PROBED), CRM_OP_PROBED, node, FALSE, TRUE, data_set); if (crm_is_true(probed)) { crm_trace("unset"); update_action_flags(probe_node_complete, pe_action_optional); } else { crm_trace("set"); update_action_flags(probe_node_complete, pe_action_optional | pe_action_clear); } crm_trace("%s - %d", node->details->uname, probe_node_complete->flags & pe_action_optional); probe_node_complete->priority = INFINITY; add_hash_param(probe_node_complete->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE); if (node->details->pending) { update_action_flags(probe_node_complete, pe_action_runnable | pe_action_clear); crm_info("Action %s on %s is unrunnable (pending)", probe_node_complete->uuid, probe_node_complete->node->details->uname); } if (is_remote_node(node)) { order_actions(probe_node_complete, probe_complete, pe_order_runnable_left /*|pe_order_implies_then */ ); } else if (probe_cluster_nodes_complete == NULL) { order_actions(probe_node_complete, probe_complete, pe_order_runnable_left /*|pe_order_implies_then */ ); } else { order_actions(probe_node_complete, probe_cluster_nodes_complete, pe_order_runnable_left /*|pe_order_implies_then */ ); } gIter2 = data_set->resources; for (; gIter2 != NULL; gIter2 = gIter2->next) { resource_t *rsc = (resource_t *) gIter2->data; if (rsc->cmds->create_probe(rsc, node, probe_node_complete, FALSE, data_set)) { update_action_flags(probe_complete, pe_action_optional | pe_action_clear); update_action_flags(probe_node_complete, pe_action_optional | pe_action_clear); if (rsc->is_remote_node || rsc_contains_remote_node(data_set, rsc)) { update_action_flags(probe_cluster_nodes_complete, pe_action_optional | pe_action_clear); /* allow remote connection resources and resources * containing remote connection resources to run after all * cluster nodes are probed */ wait_for_probe(rsc, RSC_START, probe_cluster_nodes_complete, data_set); } else { wait_for_probe(rsc, RSC_START, probe_complete, data_set); } } } } gIter = data_set->resources; for (; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; if (rsc->is_remote_node || rsc_contains_remote_node(data_set, rsc)) { /* allow remote connection resources and any resources containing * remote connection resources to run after cluster nodes are probed.*/ wait_for_probe(rsc, RSC_STOP, probe_cluster_nodes_complete, data_set); } else { wait_for_probe(rsc, RSC_STOP, probe_complete, data_set); } } return TRUE; } /* * Count how many valid nodes we have (so we know the maximum number of * colors we can resolve). * * Apply node constraints (ie. filter the "allowed_nodes" part of resources */ gboolean stage2(pe_working_set_t * data_set) { GListPtr gIter = NULL; crm_trace("Applying placement constraints"); gIter = data_set->nodes; for (; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; if (node == NULL) { /* error */ } else if (node->weight >= 0.0 /* global weight */ && node->details->online && node->details->type != node_ping) { data_set->max_valid_nodes++; } } apply_placement_constraints(data_set); gIter = data_set->nodes; for (; gIter != NULL; gIter = gIter->next) { GListPtr gIter2 = NULL; node_t *node = (node_t *) gIter->data; gIter2 = data_set->resources; for (; gIter2 != NULL; gIter2 = gIter2->next) { resource_t *rsc = (resource_t *) gIter2->data; common_apply_stickiness(rsc, node, data_set); } } return TRUE; } /* * Create internal resource constraints before allocation */ gboolean stage3(pe_working_set_t * data_set) { GListPtr gIter = data_set->resources; for (; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; rsc->cmds->internal_constraints(rsc, data_set); } return TRUE; } /* * Check for orphaned or redefined actions */ gboolean stage4(pe_working_set_t * data_set) { check_actions(data_set); return TRUE; } static gint sort_rsc_process_order(gconstpointer a, gconstpointer b, gpointer data) { int rc = 0; int r1_weight = -INFINITY; int r2_weight = -INFINITY; const char *reason = "existance"; const GListPtr nodes = (GListPtr) data; resource_t *resource1 = (resource_t *) convert_const_pointer(a); resource_t *resource2 = (resource_t *) convert_const_pointer(b); node_t *node = NULL; GListPtr gIter = NULL; GHashTable *r1_nodes = NULL; GHashTable *r2_nodes = NULL; if (a == NULL && b == NULL) { goto done; } if (a == NULL) { return 1; } if (b == NULL) { return -1; } reason = "priority"; r1_weight = resource1->priority; r2_weight = resource2->priority; if (r1_weight > r2_weight) { rc = -1; goto done; } if (r1_weight < r2_weight) { rc = 1; goto done; } reason = "no node list"; if (nodes == NULL) { goto done; } r1_nodes = rsc_merge_weights(resource1, resource1->id, NULL, NULL, 1, pe_weights_forward | pe_weights_init); dump_node_scores(LOG_TRACE, NULL, resource1->id, r1_nodes); r2_nodes = rsc_merge_weights(resource2, resource2->id, NULL, NULL, 1, pe_weights_forward | pe_weights_init); dump_node_scores(LOG_TRACE, NULL, resource2->id, r2_nodes); /* Current location score */ reason = "current location"; r1_weight = -INFINITY; r2_weight = -INFINITY; if (resource1->running_on) { node = g_list_nth_data(resource1->running_on, 0); node = g_hash_table_lookup(r1_nodes, node->details->id); if (node != NULL) { r1_weight = node->weight; } } if (resource2->running_on) { node = g_list_nth_data(resource2->running_on, 0); node = g_hash_table_lookup(r2_nodes, node->details->id); if (node != NULL) { r2_weight = node->weight; } } if (r1_weight > r2_weight) { rc = -1; goto done; } if (r1_weight < r2_weight) { rc = 1; goto done; } reason = "score"; for (gIter = nodes; gIter != NULL; gIter = gIter->next) { node_t *r1_node = NULL; node_t *r2_node = NULL; node = (node_t *) gIter->data; r1_weight = -INFINITY; if (r1_nodes) { r1_node = g_hash_table_lookup(r1_nodes, node->details->id); } if (r1_node) { r1_weight = r1_node->weight; } r2_weight = -INFINITY; if (r2_nodes) { r2_node = g_hash_table_lookup(r2_nodes, node->details->id); } if (r2_node) { r2_weight = r2_node->weight; } if (r1_weight > r2_weight) { rc = -1; goto done; } if (r1_weight < r2_weight) { rc = 1; goto done; } } done: if (r1_nodes) { g_hash_table_destroy(r1_nodes); } if (r2_nodes) { g_hash_table_destroy(r2_nodes); } crm_trace("%s (%d) %c %s (%d) on %s: %s", resource1->id, r1_weight, rc < 0 ? '>' : rc > 0 ? '<' : '=', resource2->id, r2_weight, node ? node->details->id : "n/a", reason); return rc; } static void allocate_resources(pe_working_set_t * data_set) { GListPtr gIter = data_set->resources; if (is_set(data_set->flags, pe_flag_have_remote_nodes)) { /* Force remote connection resources to be allocated first. This * also forces any colocation dependencies to be allocated as well */ for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; if (rsc->is_remote_node == FALSE) { continue; } pe_rsc_trace(rsc, "Allocating: %s", rsc->id); rsc->cmds->allocate(rsc, NULL, data_set); } } /* now do the rest of the resources */ for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; if (rsc->is_remote_node == TRUE) { continue; } pe_rsc_trace(rsc, "Allocating: %s", rsc->id); rsc->cmds->allocate(rsc, NULL, data_set); } } gboolean stage5(pe_working_set_t * data_set) { GListPtr gIter = NULL; if (safe_str_neq(data_set->placement_strategy, "default")) { GListPtr nodes = g_list_copy(data_set->nodes); nodes = g_list_sort_with_data(nodes, sort_node_weight, NULL); data_set->resources = g_list_sort_with_data(data_set->resources, sort_rsc_process_order, nodes); g_list_free(nodes); } gIter = data_set->nodes; for (; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; dump_node_capacity(show_utilization ? 0 : utilization_log_level, "Original", node); } crm_trace("Allocating services"); /* Take (next) highest resource, assign it and create its actions */ allocate_resources(data_set); gIter = data_set->nodes; for (; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; dump_node_capacity(show_utilization ? 0 : utilization_log_level, "Remaining", node); } if (is_set(data_set->flags, pe_flag_startup_probes)) { crm_trace("Calculating needed probes"); /* This code probably needs optimization * ptest -x with 100 nodes, 100 clones and clone-max=100: With probes: ptest[14781]: 2010/09/27_17:56:46 notice: TRACE: do_calculations: pengine.c:258 Calculate cluster status ptest[14781]: 2010/09/27_17:56:46 notice: TRACE: do_calculations: pengine.c:278 Applying placement constraints ptest[14781]: 2010/09/27_17:56:47 notice: TRACE: do_calculations: pengine.c:285 Create internal constraints ptest[14781]: 2010/09/27_17:56:47 notice: TRACE: do_calculations: pengine.c:292 Check actions ptest[14781]: 2010/09/27_17:56:48 notice: TRACE: do_calculations: pengine.c:299 Allocate resources ptest[14781]: 2010/09/27_17:56:48 notice: TRACE: stage5: allocate.c:881 Allocating services ptest[14781]: 2010/09/27_17:56:49 notice: TRACE: stage5: allocate.c:894 Calculating needed probes ptest[14781]: 2010/09/27_17:56:51 notice: TRACE: stage5: allocate.c:899 Creating actions ptest[14781]: 2010/09/27_17:56:52 notice: TRACE: stage5: allocate.c:905 Creating done ptest[14781]: 2010/09/27_17:56:52 notice: TRACE: do_calculations: pengine.c:306 Processing fencing and shutdown cases ptest[14781]: 2010/09/27_17:56:52 notice: TRACE: do_calculations: pengine.c:313 Applying ordering constraints 36s ptest[14781]: 2010/09/27_17:57:28 notice: TRACE: do_calculations: pengine.c:320 Create transition graph Without probes: ptest[14637]: 2010/09/27_17:56:21 notice: TRACE: do_calculations: pengine.c:258 Calculate cluster status ptest[14637]: 2010/09/27_17:56:22 notice: TRACE: do_calculations: pengine.c:278 Applying placement constraints ptest[14637]: 2010/09/27_17:56:22 notice: TRACE: do_calculations: pengine.c:285 Create internal constraints ptest[14637]: 2010/09/27_17:56:22 notice: TRACE: do_calculations: pengine.c:292 Check actions ptest[14637]: 2010/09/27_17:56:23 notice: TRACE: do_calculations: pengine.c:299 Allocate resources ptest[14637]: 2010/09/27_17:56:23 notice: TRACE: stage5: allocate.c:881 Allocating services ptest[14637]: 2010/09/27_17:56:24 notice: TRACE: stage5: allocate.c:899 Creating actions ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: stage5: allocate.c:905 Creating done ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: do_calculations: pengine.c:306 Processing fencing and shutdown cases ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: do_calculations: pengine.c:313 Applying ordering constraints ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: do_calculations: pengine.c:320 Create transition graph */ probe_resources(data_set); } crm_trace("Creating actions"); gIter = data_set->resources; for (; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; rsc->cmds->create_actions(rsc, data_set); } crm_trace("Creating done"); return TRUE; } static gboolean is_managed(const resource_t * rsc) { GListPtr gIter = rsc->children; if (is_set(rsc->flags, pe_rsc_managed)) { return TRUE; } for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; if (is_managed(child_rsc)) { return TRUE; } } return FALSE; } static gboolean any_managed_resources(pe_working_set_t * data_set) { GListPtr gIter = data_set->resources; for (; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; if (is_managed(rsc)) { return TRUE; } } return FALSE; } action_t * pe_fence_op(node_t * node, const char *op, pe_working_set_t * data_set) { action_t *stonith_op = custom_action(NULL, g_strdup_printf("%s-%s", CRM_OP_FENCE, node->details->uname), CRM_OP_FENCE, node, FALSE, TRUE, data_set); add_hash_param(stonith_op->meta, XML_LRM_ATTR_TARGET, node->details->uname); add_hash_param(stonith_op->meta, XML_LRM_ATTR_TARGET_UUID, node->details->id); add_hash_param(stonith_op->meta, "stonith_action", op ? op : data_set->stonith_action); return stonith_op; } /* * Create dependancies for stonith and shutdown operations */ gboolean stage6(pe_working_set_t * data_set) { action_t *dc_down = NULL; action_t *dc_fence = NULL; action_t *stonith_op = NULL; action_t *last_stonith = NULL; gboolean integrity_lost = FALSE; action_t *all_stopped = get_pseudo_op(ALL_STOPPED, data_set); action_t *done = get_pseudo_op(STONITH_DONE, data_set); gboolean need_stonith = TRUE; GListPtr gIter = data_set->nodes; crm_trace("Processing fencing and shutdown cases"); if (any_managed_resources(data_set) == FALSE) { crm_notice("Delaying fencing operations until there are resources to manage"); need_stonith = FALSE; } for (; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; /* remote-nodes associated with a container resource (such as a vm) are not fenced */ if (is_container_remote_node(node)) { continue; } stonith_op = NULL; if (need_stonith && node->details->unclean && pe_can_fence(data_set, node)) { pe_warn("Scheduling Node %s for STONITH", node->details->uname); stonith_op = pe_fence_op(node, NULL, data_set); stonith_constraints(node, stonith_op, data_set); if (node->details->is_dc) { dc_down = stonith_op; dc_fence = stonith_op; } else { if (last_stonith) { order_actions(last_stonith, stonith_op, pe_order_optional); } last_stonith = stonith_op; } } else if (node->details->online && node->details->shutdown && /* TODO define what a shutdown op means for a baremetal remote node. * For now we do not send shutdown operations for remote nodes, but * if we can come up with a good use for this in the future, we will. */ is_remote_node(node) == FALSE) { action_t *down_op = NULL; crm_notice("Scheduling Node %s for shutdown", node->details->uname); down_op = custom_action(NULL, strdup(CRM_OP_SHUTDOWN), CRM_OP_SHUTDOWN, node, FALSE, TRUE, data_set); shutdown_constraints(node, down_op, data_set); add_hash_param(down_op->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE); if (node->details->is_dc) { dc_down = down_op; } } if (node->details->unclean && stonith_op == NULL) { integrity_lost = TRUE; pe_warn("Node %s is unclean!", node->details->uname); } } if (integrity_lost) { if (is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE) { pe_warn("YOUR RESOURCES ARE NOW LIKELY COMPROMISED"); pe_err("ENABLE STONITH TO KEEP YOUR RESOURCES SAFE"); } else if (is_set(data_set->flags, pe_flag_have_quorum) == FALSE) { crm_notice("Cannot fence unclean nodes until quorum is" " attained (or no-quorum-policy is set to ignore)"); } } if (dc_down != NULL) { GListPtr shutdown_matches = find_actions(data_set->actions, CRM_OP_SHUTDOWN, NULL); crm_trace("Ordering shutdowns before %s on %s (DC)", dc_down->task, dc_down->node->details->uname); add_hash_param(dc_down->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE); gIter = shutdown_matches; for (; gIter != NULL; gIter = gIter->next) { action_t *node_stop = (action_t *) gIter->data; if (node_stop->node->details->is_dc) { continue; } crm_debug("Ordering shutdown on %s before %s on %s", node_stop->node->details->uname, dc_down->task, dc_down->node->details->uname); order_actions(node_stop, dc_down, pe_order_optional); } if (last_stonith && dc_down != last_stonith) { order_actions(last_stonith, dc_down, pe_order_optional); } g_list_free(shutdown_matches); } if (last_stonith) { order_actions(last_stonith, done, pe_order_implies_then); } else if (dc_fence) { order_actions(dc_down, done, pe_order_implies_then); } order_actions(done, all_stopped, pe_order_implies_then); return TRUE; } /* * Determin the sets of independant actions and the correct order for the * actions in each set. * * Mark dependencies of un-runnable actions un-runnable * */ static GListPtr find_actions_by_task(GListPtr actions, resource_t * rsc, const char *original_key) { GListPtr list = NULL; list = find_actions(actions, original_key, NULL); if (list == NULL) { /* we're potentially searching a child of the original resource */ char *key = NULL; char *tmp = NULL; char *task = NULL; int interval = 0; if (parse_op_key(original_key, &tmp, &task, &interval)) { key = generate_op_key(rsc->id, task, interval); /* crm_err("looking up %s instead of %s", key, original_key); */ /* slist_iter(action, action_t, actions, lpc, */ /* crm_err(" - %s", action->uuid)); */ list = find_actions(actions, key, NULL); } else { crm_err("search key: %s", original_key); } free(key); free(tmp); free(task); } return list; } static void rsc_order_then(action_t * lh_action, resource_t * rsc, order_constraint_t * order) { GListPtr gIter = NULL; GListPtr rh_actions = NULL; action_t *rh_action = NULL; enum pe_ordering type = order->type; CRM_CHECK(rsc != NULL, return); CRM_CHECK(order != NULL, return); rh_action = order->rh_action; crm_trace("Processing RH of ordering constraint %d", order->id); if (rh_action != NULL) { rh_actions = g_list_prepend(NULL, rh_action); } else if (rsc != NULL) { rh_actions = find_actions_by_task(rsc->actions, rsc, order->rh_action_task); } if (rh_actions == NULL) { pe_rsc_trace(rsc, "No RH-Side (%s/%s) found for constraint..." " ignoring", rsc->id, order->rh_action_task); if (lh_action) { pe_rsc_trace(rsc, "LH-Side was: %s", lh_action->uuid); } return; } if (lh_action && lh_action->rsc == rsc && is_set(lh_action->flags, pe_action_dangle)) { pe_rsc_trace(rsc, "Detected dangling operation %s -> %s", lh_action->uuid, order->rh_action_task); clear_bit(type, pe_order_implies_then); } gIter = rh_actions; for (; gIter != NULL; gIter = gIter->next) { action_t *rh_action_iter = (action_t *) gIter->data; if (lh_action) { order_actions(lh_action, rh_action_iter, type); } else if (type & pe_order_implies_then) { update_action_flags(rh_action_iter, pe_action_runnable | pe_action_clear); crm_warn("Unrunnable %s 0x%.6x", rh_action_iter->uuid, type); } else { crm_warn("neither %s 0x%.6x", rh_action_iter->uuid, type); } } g_list_free(rh_actions); } static void rsc_order_first(resource_t * lh_rsc, order_constraint_t * order, pe_working_set_t * data_set) { GListPtr gIter = NULL; GListPtr lh_actions = NULL; action_t *lh_action = order->lh_action; resource_t *rh_rsc = order->rh_rsc; crm_trace("Processing LH of ordering constraint %d", order->id); CRM_ASSERT(lh_rsc != NULL); if (lh_action != NULL) { lh_actions = g_list_prepend(NULL, lh_action); } else if (lh_action == NULL) { lh_actions = find_actions_by_task(lh_rsc->actions, lh_rsc, order->lh_action_task); } if (lh_actions == NULL && lh_rsc != rh_rsc) { char *key = NULL; char *rsc_id = NULL; char *op_type = NULL; int interval = 0; parse_op_key(order->lh_action_task, &rsc_id, &op_type, &interval); key = generate_op_key(lh_rsc->id, op_type, interval); if (lh_rsc->fns->state(lh_rsc, TRUE) == RSC_ROLE_STOPPED && safe_str_eq(op_type, RSC_STOP)) { free(key); pe_rsc_trace(lh_rsc, "No LH-Side (%s/%s) found for constraint %d with %s - ignoring", lh_rsc->id, order->lh_action_task, order->id, order->rh_action_task); } else if (lh_rsc->fns->state(lh_rsc, TRUE) == RSC_ROLE_SLAVE && safe_str_eq(op_type, RSC_DEMOTE)) { free(key); pe_rsc_trace(lh_rsc, "No LH-Side (%s/%s) found for constraint %d with %s - ignoring", lh_rsc->id, order->lh_action_task, order->id, order->rh_action_task); } else { pe_rsc_trace(lh_rsc, "No LH-Side (%s/%s) found for constraint %d with %s - creating", lh_rsc->id, order->lh_action_task, order->id, order->rh_action_task); lh_action = custom_action(lh_rsc, key, op_type, NULL, TRUE, TRUE, data_set); lh_actions = g_list_prepend(NULL, lh_action); } free(op_type); free(rsc_id); } gIter = lh_actions; for (; gIter != NULL; gIter = gIter->next) { action_t *lh_action_iter = (action_t *) gIter->data; if (rh_rsc == NULL && order->rh_action) { rh_rsc = order->rh_action->rsc; } if (rh_rsc) { rsc_order_then(lh_action_iter, rh_rsc, order); } else if (order->rh_action) { order_actions(lh_action_iter, order->rh_action, order->type); } } g_list_free(lh_actions); } extern gboolean update_action(action_t * action); static void apply_remote_node_ordering(pe_working_set_t *data_set) { GListPtr gIter = data_set->actions; if (is_set(data_set->flags, pe_flag_have_remote_nodes) == FALSE) { return; } for (; gIter != NULL; gIter = gIter->next) { action_t *action = (action_t *) gIter->data; resource_t *remote_rsc = NULL; resource_t *container = NULL; if (action->node == NULL || is_remote_node(action->node) == FALSE || action->rsc == NULL || is_set(action->flags, pe_action_pseudo)) { continue; } remote_rsc = action->node->details->remote_rsc; container = remote_rsc->container; if (safe_str_eq(action->task, "monitor") || safe_str_eq(action->task, "start") || safe_str_eq(action->task, "promote") || safe_str_eq(action->task, CRM_OP_LRM_REFRESH) || safe_str_eq(action->task, CRM_OP_CLEAR_FAILCOUNT) || safe_str_eq(action->task, "delete")) { custom_action_order(remote_rsc, generate_op_key(remote_rsc->id, RSC_START, 0), NULL, action->rsc, NULL, action, pe_order_implies_then | pe_order_runnable_left, data_set); } else if (safe_str_eq(action->task, "demote")) { /* If the connection is being torn down, we don't want * to build a constraint between a resource's demotion and * the connection resource starting... because the connection * resource can not start. The connection might already be up, * but the START action would not be allowed which in turn would * block the demotion of any resournces living in the remote-node. * * In this case, only build the constraint between the demotion and * the connection's stop action. This allows the connection and all the * resources within the remote-node to be torn down properly. */ if (remote_rsc->next_role == RSC_ROLE_STOPPED) { custom_action_order(action->rsc, NULL, action, remote_rsc, generate_op_key(remote_rsc->id, RSC_STOP, 0), NULL, pe_order_implies_first, data_set); } else { custom_action_order(remote_rsc, generate_op_key(remote_rsc->id, RSC_START, 0), NULL, action->rsc, NULL, action, pe_order_implies_then | pe_order_runnable_left, data_set); } } else if (safe_str_eq(action->task, "stop") && container && is_set(container->flags, pe_rsc_failed)) { /* when the container representing a remote node fails, the stop * action for all the resources living in that container is implied * by the container stopping. This is similar to how fencing operations * work for cluster nodes. */ pe_set_action_bit(action, pe_action_pseudo); custom_action_order(container, generate_op_key(container->id, RSC_STOP, 0), NULL, action->rsc, NULL, action, pe_order_implies_then | pe_order_runnable_left, data_set); } else if (safe_str_eq(action->task, "stop")) { custom_action_order(action->rsc, NULL, action, remote_rsc, generate_op_key(remote_rsc->id, RSC_STOP, 0), NULL, pe_order_implies_first, data_set); } } } gboolean stage7(pe_working_set_t * data_set) { GListPtr gIter = NULL; apply_remote_node_ordering(data_set); crm_trace("Applying ordering constraints"); /* Don't ask me why, but apparently they need to be processed in * the order they were created in... go figure * * Also g_list_prepend() has horrendous performance characteristics * So we need to use g_list_prepend() and then reverse the list here */ data_set->ordering_constraints = g_list_reverse(data_set->ordering_constraints); gIter = data_set->ordering_constraints; for (; gIter != NULL; gIter = gIter->next) { order_constraint_t *order = (order_constraint_t *) gIter->data; resource_t *rsc = order->lh_rsc; crm_trace("Applying ordering constraint: %d", order->id); if (rsc != NULL) { crm_trace("rsc_action-to-*"); rsc_order_first(rsc, order, data_set); continue; } rsc = order->rh_rsc; if (rsc != NULL) { crm_trace("action-to-rsc_action"); rsc_order_then(order->lh_action, rsc, order); } else { crm_trace("action-to-action"); order_actions(order->lh_action, order->rh_action, order->type); } } crm_trace("Updating %d actions", g_list_length(data_set->actions)); gIter = data_set->actions; for (; gIter != NULL; gIter = gIter->next) { action_t *action = (action_t *) gIter->data; update_action(action); } crm_trace("Processing migrations"); gIter = data_set->resources; for (; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; rsc_migrate_reload(rsc, data_set); LogActions(rsc, data_set, FALSE); } return TRUE; } static gint sort_notify_entries(gconstpointer a, gconstpointer b) { int tmp; const notify_entry_t *entry_a = a; const notify_entry_t *entry_b = b; if (entry_a == NULL && entry_b == NULL) { return 0; } if (entry_a == NULL) { return 1; } if (entry_b == NULL) { return -1; } if (entry_a->rsc == NULL && entry_b->rsc == NULL) { return 0; } if (entry_a->rsc == NULL) { return 1; } if (entry_b->rsc == NULL) { return -1; } tmp = strcmp(entry_a->rsc->id, entry_b->rsc->id); if (tmp != 0) { return tmp; } if (entry_a->node == NULL && entry_b->node == NULL) { return 0; } if (entry_a->node == NULL) { return 1; } if (entry_b->node == NULL) { return -1; } return strcmp(entry_a->node->details->id, entry_b->node->details->id); } static void expand_list(GListPtr list, char **rsc_list, char **node_list) { GListPtr gIter = list; const char *uname = NULL; const char *rsc_id = NULL; const char *last_rsc_id = NULL; if (rsc_list) { *rsc_list = NULL; } if (list == NULL) { if (rsc_list) { *rsc_list = strdup(" "); } if (node_list) { *node_list = strdup(" "); } return; } if (node_list) { *node_list = NULL; } for (; gIter != NULL; gIter = gIter->next) { notify_entry_t *entry = (notify_entry_t *) gIter->data; CRM_CHECK(entry != NULL, continue); CRM_CHECK(entry->rsc != NULL, continue); CRM_CHECK(node_list == NULL || entry->node != NULL, continue); uname = NULL; rsc_id = entry->rsc->id; CRM_ASSERT(rsc_id != NULL); /* filter dups */ if (safe_str_eq(rsc_id, last_rsc_id)) { continue; } last_rsc_id = rsc_id; if (rsc_list != NULL) { int existing_len = 0; int len = 2 + strlen(rsc_id); /* +1 space, +1 EOS */ if (rsc_list && *rsc_list) { existing_len = strlen(*rsc_list); } crm_trace("Adding %s (%dc) at offset %d", rsc_id, len - 2, existing_len); *rsc_list = realloc(*rsc_list, len + existing_len); sprintf(*rsc_list + existing_len, "%s ", rsc_id); } if (entry->node != NULL) { uname = entry->node->details->uname; } if (node_list != NULL && uname) { int existing_len = 0; int len = 2 + strlen(uname); if (node_list && *node_list) { existing_len = strlen(*node_list); } crm_trace("Adding %s (%dc) at offset %d", uname, len - 2, existing_len); *node_list = realloc(*node_list, len + existing_len); sprintf(*node_list + existing_len, "%s ", uname); } } } static void dup_attr(gpointer key, gpointer value, gpointer user_data) { add_hash_param(user_data, key, value); } static action_t * pe_notify(resource_t * rsc, node_t * node, action_t * op, action_t * confirm, notify_data_t * n_data, pe_working_set_t * data_set) { char *key = NULL; action_t *trigger = NULL; const char *value = NULL; const char *task = NULL; if (op == NULL || confirm == NULL) { pe_rsc_trace(rsc, "Op=%p confirm=%p", op, confirm); return NULL; } CRM_CHECK(node != NULL, return NULL); if (node->details->online == FALSE) { pe_rsc_trace(rsc, "Skipping notification for %s: node offline", rsc->id); return NULL; } else if (is_set(op->flags, pe_action_runnable) == FALSE) { pe_rsc_trace(rsc, "Skipping notification for %s: not runnable", op->uuid); return NULL; } value = g_hash_table_lookup(op->meta, "notify_type"); task = g_hash_table_lookup(op->meta, "notify_operation"); pe_rsc_trace(rsc, "Creating notify actions for %s: %s (%s-%s)", op->uuid, rsc->id, value, task); key = generate_notify_key(rsc->id, value, task); trigger = custom_action(rsc, key, op->task, node, is_set(op->flags, pe_action_optional), TRUE, data_set); g_hash_table_foreach(op->meta, dup_attr, trigger->meta); g_hash_table_foreach(n_data->keys, dup_attr, trigger->meta); /* pseudo_notify before notify */ pe_rsc_trace(rsc, "Ordering %s before %s (%d->%d)", op->uuid, trigger->uuid, trigger->id, op->id); order_actions(op, trigger, pe_order_optional); order_actions(trigger, confirm, pe_order_optional); return trigger; } static void pe_post_notify(resource_t * rsc, node_t * node, notify_data_t * n_data, pe_working_set_t * data_set) { action_t *notify = NULL; CRM_CHECK(rsc != NULL, return); if (n_data->post == NULL) { return; /* Nothing to do */ } notify = pe_notify(rsc, node, n_data->post, n_data->post_done, n_data, data_set); if (notify != NULL) { notify->priority = INFINITY; } if (n_data->post_done) { GListPtr gIter = rsc->actions; for (; gIter != NULL; gIter = gIter->next) { action_t *mon = (action_t *) gIter->data; const char *interval = g_hash_table_lookup(mon->meta, "interval"); if (interval == NULL || safe_str_eq(interval, "0")) { pe_rsc_trace(rsc, "Skipping %s: interval", mon->uuid); continue; } else if (safe_str_eq(mon->task, RSC_CANCEL)) { pe_rsc_trace(rsc, "Skipping %s: cancel", mon->uuid); continue; } order_actions(n_data->post_done, mon, pe_order_optional); } } } notify_data_t * create_notification_boundaries(resource_t * rsc, const char *action, action_t * start, action_t * end, pe_working_set_t * data_set) { /* Create the pseudo ops that preceed and follow the actual notifications */ /* * Creates two sequences (conditional on start and end being supplied): * pre_notify -> pre_notify_complete -> start, and * end -> post_notify -> post_notify_complete * * 'start' and 'end' may be the same event or ${X} and ${X}ed as per clones */ char *key = NULL; notify_data_t *n_data = NULL; if (is_not_set(rsc->flags, pe_rsc_notify)) { return NULL; } n_data = calloc(1, sizeof(notify_data_t)); n_data->action = action; n_data->keys = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); if (start) { /* create pre-event notification wrappers */ key = generate_notify_key(rsc->id, "pre", start->task); n_data->pre = custom_action(rsc, key, RSC_NOTIFY, NULL, is_set(start->flags, pe_action_optional), TRUE, data_set); update_action_flags(n_data->pre, pe_action_pseudo); update_action_flags(n_data->pre, pe_action_runnable); add_hash_param(n_data->pre->meta, "notify_type", "pre"); add_hash_param(n_data->pre->meta, "notify_operation", n_data->action); add_hash_param(n_data->pre->meta, "notify_key_type", "pre"); add_hash_param(n_data->pre->meta, "notify_key_operation", start->task); /* create pre_notify_complete */ key = generate_notify_key(rsc->id, "confirmed-pre", start->task); n_data->pre_done = custom_action(rsc, key, RSC_NOTIFIED, NULL, is_set(start->flags, pe_action_optional), TRUE, data_set); update_action_flags(n_data->pre_done, pe_action_pseudo); update_action_flags(n_data->pre_done, pe_action_runnable); add_hash_param(n_data->pre_done->meta, "notify_type", "pre"); add_hash_param(n_data->pre_done->meta, "notify_operation", n_data->action); add_hash_param(n_data->pre_done->meta, "notify_key_type", "confirmed-pre"); add_hash_param(n_data->pre_done->meta, "notify_key_operation", start->task); order_actions(n_data->pre_done, start, pe_order_optional); order_actions(n_data->pre, n_data->pre_done, pe_order_optional); } if (end) { /* create post-event notification wrappers */ key = generate_notify_key(rsc->id, "post", end->task); n_data->post = custom_action(rsc, key, RSC_NOTIFY, NULL, is_set(end->flags, pe_action_optional), TRUE, data_set); n_data->post->priority = INFINITY; update_action_flags(n_data->post, pe_action_pseudo); if (is_set(end->flags, pe_action_runnable)) { update_action_flags(n_data->post, pe_action_runnable); } else { update_action_flags(n_data->post, pe_action_runnable | pe_action_clear); } add_hash_param(n_data->post->meta, "notify_type", "post"); add_hash_param(n_data->post->meta, "notify_operation", n_data->action); add_hash_param(n_data->post->meta, "notify_key_type", "post"); add_hash_param(n_data->post->meta, "notify_key_operation", end->task); /* create post_notify_complete */ key = generate_notify_key(rsc->id, "confirmed-post", end->task); n_data->post_done = custom_action(rsc, key, RSC_NOTIFIED, NULL, is_set(end->flags, pe_action_optional), TRUE, data_set); n_data->post_done->priority = INFINITY; update_action_flags(n_data->post_done, pe_action_pseudo); if (is_set(end->flags, pe_action_runnable)) { update_action_flags(n_data->post_done, pe_action_runnable); } else { update_action_flags(n_data->post_done, pe_action_runnable | pe_action_clear); } add_hash_param(n_data->post_done->meta, "notify_type", "post"); add_hash_param(n_data->post_done->meta, "notify_operation", n_data->action); add_hash_param(n_data->post_done->meta, "notify_key_type", "confirmed-post"); add_hash_param(n_data->post_done->meta, "notify_key_operation", end->task); order_actions(end, n_data->post, pe_order_implies_then); order_actions(n_data->post, n_data->post_done, pe_order_implies_then); } if (start && end) { order_actions(n_data->pre_done, n_data->post, pe_order_optional); } if (safe_str_eq(action, RSC_STOP)) { action_t *all_stopped = get_pseudo_op(ALL_STOPPED, data_set); order_actions(n_data->post_done, all_stopped, pe_order_optional); } return n_data; } void collect_notification_data(resource_t * rsc, gboolean state, gboolean activity, notify_data_t * n_data) { if (rsc->children) { GListPtr gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { resource_t *child = (resource_t *) gIter->data; collect_notification_data(child, state, activity, n_data); } return; } if (state) { notify_entry_t *entry = NULL; entry = calloc(1, sizeof(notify_entry_t)); entry->rsc = rsc; if (rsc->running_on) { /* we only take the first one */ entry->node = rsc->running_on->data; } pe_rsc_trace(rsc, "%s state: %s", rsc->id, role2text(rsc->role)); switch (rsc->role) { case RSC_ROLE_STOPPED: n_data->inactive = g_list_prepend(n_data->inactive, entry); break; case RSC_ROLE_STARTED: n_data->active = g_list_prepend(n_data->active, entry); break; case RSC_ROLE_SLAVE: n_data->slave = g_list_prepend(n_data->slave, entry); break; case RSC_ROLE_MASTER: n_data->master = g_list_prepend(n_data->master, entry); break; default: crm_err("Unsupported notify role"); free(entry); break; } } if (activity) { notify_entry_t *entry = NULL; enum action_tasks task; GListPtr gIter = rsc->actions; for (; gIter != NULL; gIter = gIter->next) { action_t *op = (action_t *) gIter->data; if (is_set(op->flags, pe_action_optional) == FALSE && op->node != NULL) { entry = calloc(1, sizeof(notify_entry_t)); entry->node = op->node; entry->rsc = rsc; task = text2task(op->task); switch (task) { case start_rsc: n_data->start = g_list_prepend(n_data->start, entry); break; case stop_rsc: n_data->stop = g_list_prepend(n_data->stop, entry); break; case action_promote: n_data->promote = g_list_prepend(n_data->promote, entry); break; case action_demote: n_data->demote = g_list_prepend(n_data->demote, entry); break; default: free(entry); break; } } } } } gboolean expand_notification_data(notify_data_t * n_data) { /* Expand the notification entries into a key=value hashtable * This hashtable is later used in action2xml() */ gboolean required = FALSE; char *rsc_list = NULL; char *node_list = NULL; if (n_data->stop) { n_data->stop = g_list_sort(n_data->stop, sort_notify_entries); } expand_list(n_data->stop, &rsc_list, &node_list); if (rsc_list != NULL && safe_str_neq(" ", rsc_list)) { if (safe_str_eq(n_data->action, RSC_STOP)) { required = TRUE; } } g_hash_table_insert(n_data->keys, strdup("notify_stop_resource"), rsc_list); g_hash_table_insert(n_data->keys, strdup("notify_stop_uname"), node_list); if (n_data->start) { n_data->start = g_list_sort(n_data->start, sort_notify_entries); if (rsc_list && safe_str_eq(n_data->action, RSC_START)) { required = TRUE; } } expand_list(n_data->start, &rsc_list, &node_list); g_hash_table_insert(n_data->keys, strdup("notify_start_resource"), rsc_list); g_hash_table_insert(n_data->keys, strdup("notify_start_uname"), node_list); if (n_data->demote) { n_data->demote = g_list_sort(n_data->demote, sort_notify_entries); if (safe_str_eq(n_data->action, RSC_DEMOTE)) { required = TRUE; } } expand_list(n_data->demote, &rsc_list, &node_list); g_hash_table_insert(n_data->keys, strdup("notify_demote_resource"), rsc_list); g_hash_table_insert(n_data->keys, strdup("notify_demote_uname"), node_list); if (n_data->promote) { n_data->promote = g_list_sort(n_data->promote, sort_notify_entries); if (safe_str_eq(n_data->action, RSC_PROMOTE)) { required = TRUE; } } expand_list(n_data->promote, &rsc_list, &node_list); g_hash_table_insert(n_data->keys, strdup("notify_promote_resource"), rsc_list); g_hash_table_insert(n_data->keys, strdup("notify_promote_uname"), node_list); if (n_data->active) { n_data->active = g_list_sort(n_data->active, sort_notify_entries); } expand_list(n_data->active, &rsc_list, &node_list); g_hash_table_insert(n_data->keys, strdup("notify_active_resource"), rsc_list); g_hash_table_insert(n_data->keys, strdup("notify_active_uname"), node_list); if (n_data->slave) { n_data->slave = g_list_sort(n_data->slave, sort_notify_entries); } expand_list(n_data->slave, &rsc_list, &node_list); g_hash_table_insert(n_data->keys, strdup("notify_slave_resource"), rsc_list); g_hash_table_insert(n_data->keys, strdup("notify_slave_uname"), node_list); if (n_data->master) { n_data->master = g_list_sort(n_data->master, sort_notify_entries); } expand_list(n_data->master, &rsc_list, &node_list); g_hash_table_insert(n_data->keys, strdup("notify_master_resource"), rsc_list); g_hash_table_insert(n_data->keys, strdup("notify_master_uname"), node_list); if (n_data->inactive) { n_data->inactive = g_list_sort(n_data->inactive, sort_notify_entries); } expand_list(n_data->inactive, &rsc_list, NULL); g_hash_table_insert(n_data->keys, strdup("notify_inactive_resource"), rsc_list); if (required && n_data->pre) { update_action_flags(n_data->pre, pe_action_optional | pe_action_clear); update_action_flags(n_data->pre_done, pe_action_optional | pe_action_clear); } if (required && n_data->post) { update_action_flags(n_data->post, pe_action_optional | pe_action_clear); update_action_flags(n_data->post_done, pe_action_optional | pe_action_clear); } return required; } void create_notifications(resource_t * rsc, notify_data_t * n_data, pe_working_set_t * data_set) { GListPtr gIter = NULL; action_t *stop = NULL; action_t *start = NULL; enum action_tasks task = text2task(n_data->action); if (rsc->children) { gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { resource_t *child = (resource_t *) gIter->data; create_notifications(child, n_data, data_set); } return; } /* Copy notification details into standard ops */ gIter = rsc->actions; for (; gIter != NULL; gIter = gIter->next) { action_t *op = (action_t *) gIter->data; if (is_set(op->flags, pe_action_optional) == FALSE && op->node != NULL) { enum action_tasks t = text2task(op->task); switch (t) { case start_rsc: case stop_rsc: case action_promote: case action_demote: g_hash_table_foreach(n_data->keys, dup_attr, op->meta); break; default: break; } } } pe_rsc_trace(rsc, "Creating notificaitons for: %s.%s (%s->%s)", n_data->action, rsc->id, role2text(rsc->role), role2text(rsc->next_role)); stop = find_first_action(rsc->actions, NULL, RSC_STOP, NULL); start = find_first_action(rsc->actions, NULL, RSC_START, NULL); /* stop / demote */ if (rsc->role != RSC_ROLE_STOPPED) { if (task == stop_rsc || task == action_demote) { gIter = rsc->running_on; for (; gIter != NULL; gIter = gIter->next) { node_t *current_node = (node_t *) gIter->data; pe_notify(rsc, current_node, n_data->pre, n_data->pre_done, n_data, data_set); if (task == action_demote || stop == NULL || is_set(stop->flags, pe_action_optional)) { pe_post_notify(rsc, current_node, n_data, data_set); } } } } /* start / promote */ if (rsc->next_role != RSC_ROLE_STOPPED) { if (rsc->allocated_to == NULL) { pe_proc_err("Next role '%s' but %s is not allocated", role2text(rsc->next_role), rsc->id); } else if (task == start_rsc || task == action_promote) { if (task != start_rsc || start == NULL || is_set(start->flags, pe_action_optional)) { pe_notify(rsc, rsc->allocated_to, n_data->pre, n_data->pre_done, n_data, data_set); } pe_post_notify(rsc, rsc->allocated_to, n_data, data_set); } } } void free_notification_data(notify_data_t * n_data) { if (n_data == NULL) { return; } g_list_free_full(n_data->stop, free); g_list_free_full(n_data->start, free); g_list_free_full(n_data->demote, free); g_list_free_full(n_data->promote, free); g_list_free_full(n_data->master, free); g_list_free_full(n_data->slave, free); g_list_free_full(n_data->active, free); g_list_free_full(n_data->inactive, free); g_hash_table_destroy(n_data->keys); free(n_data); } int transition_id = -1; /* * Create a dependency graph to send to the transitioner (via the CRMd) */ gboolean stage8(pe_working_set_t * data_set) { GListPtr gIter = NULL; const char *value = NULL; transition_id++; crm_trace("Creating transition graph %d.", transition_id); data_set->graph = create_xml_node(NULL, XML_TAG_GRAPH); value = pe_pref(data_set->config_hash, "cluster-delay"); crm_xml_add(data_set->graph, "cluster-delay", value); value = pe_pref(data_set->config_hash, "stonith-timeout"); crm_xml_add(data_set->graph, "stonith-timeout", value); crm_xml_add(data_set->graph, "failed-stop-offset", "INFINITY"); if (is_set(data_set->flags, pe_flag_start_failure_fatal)) { crm_xml_add(data_set->graph, "failed-start-offset", "INFINITY"); } else { crm_xml_add(data_set->graph, "failed-start-offset", "1"); } value = pe_pref(data_set->config_hash, "batch-limit"); crm_xml_add(data_set->graph, "batch-limit", value); crm_xml_add_int(data_set->graph, "transition_id", transition_id); value = pe_pref(data_set->config_hash, "migration-limit"); if (crm_int_helper(value, NULL) > 0) { crm_xml_add(data_set->graph, "migration-limit", value); } /* errors... slist_iter(action, action_t, action_list, lpc, if(action->optional == FALSE && action->runnable == FALSE) { print_action("Ignoring", action, TRUE); } ); */ gIter = data_set->resources; for (; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; pe_rsc_trace(rsc, "processing actions for rsc=%s", rsc->id); rsc->cmds->expand(rsc, data_set); } crm_log_xml_trace(data_set->graph, "created resource-driven action list"); /* catch any non-resource specific actions */ crm_trace("processing non-resource actions"); gIter = data_set->actions; for (; gIter != NULL; gIter = gIter->next) { action_t *action = (action_t *) gIter->data; if (action->rsc && action->node && action->node->details->shutdown && is_not_set(data_set->flags, pe_flag_maintenance_mode) && is_not_set(action->flags, pe_action_optional) && is_not_set(action->flags, pe_action_runnable) && crm_str_eq(action->task, RSC_STOP, TRUE) ) { /* Eventually we should just ignore the 'fence' case * But for now its the best way to detect (in CTS) when * CIB resource updates are being lost */ if (is_set(data_set->flags, pe_flag_have_quorum) || data_set->no_quorum_policy == no_quorum_ignore) { crm_crit("Cannot %s node '%s' because of %s:%s%s", action->node->details->unclean ? "fence" : "shut down", action->node->details->uname, action->rsc->id, is_not_set(action->rsc->flags, pe_rsc_managed) ? " unmanaged" : " blocked", is_set(action->rsc->flags, pe_rsc_failed) ? " failed" : ""); } } graph_element_from_action(action, data_set); } crm_log_xml_trace(data_set->graph, "created generic action list"); crm_trace("Created transition graph %d.", transition_id); return TRUE; } void cleanup_alloc_calculations(pe_working_set_t * data_set) { if (data_set == NULL) { return; } crm_trace("deleting %d order cons: %p", g_list_length(data_set->ordering_constraints), data_set->ordering_constraints); pe_free_ordering(data_set->ordering_constraints); data_set->ordering_constraints = NULL; crm_trace("deleting %d node cons: %p", g_list_length(data_set->placement_constraints), data_set->placement_constraints); pe_free_rsc_to_node(data_set->placement_constraints); data_set->placement_constraints = NULL; crm_trace("deleting %d inter-resource cons: %p", g_list_length(data_set->colocation_constraints), data_set->colocation_constraints); g_list_free_full(data_set->colocation_constraints, free); data_set->colocation_constraints = NULL; crm_trace("deleting %d ticket deps: %p", g_list_length(data_set->ticket_constraints), data_set->ticket_constraints); g_list_free_full(data_set->ticket_constraints, free); data_set->ticket_constraints = NULL; cleanup_calculations(data_set); } diff --git a/pengine/native.c b/pengine/native.c index 0a6e1492a9..0a323fef0e 100644 --- a/pengine/native.c +++ b/pengine/native.c @@ -1,3309 +1,3309 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include /* #define DELETE_THEN_REFRESH 1 // The crmd will remove the resource from the CIB itself, making this redundant */ #define INFINITY_HACK (INFINITY * -100) #define VARIANT_NATIVE 1 #include void native_rsc_colocation_rh_must(resource_t * rsc_lh, gboolean update_lh, resource_t * rsc_rh, gboolean update_rh); void native_rsc_colocation_rh_mustnot(resource_t * rsc_lh, gboolean update_lh, resource_t * rsc_rh, gboolean update_rh); void Recurring(resource_t * rsc, action_t * start, node_t * node, pe_working_set_t * data_set); void RecurringOp(resource_t * rsc, action_t * start, node_t * node, xmlNode * operation, pe_working_set_t * data_set); void Recurring_Stopped(resource_t * rsc, action_t * start, node_t * node, pe_working_set_t * data_set); void RecurringOp_Stopped(resource_t * rsc, action_t * start, node_t * node, xmlNode * operation, pe_working_set_t * data_set); void pe_post_notify(resource_t * rsc, node_t * node, action_t * op, notify_data_t * n_data, pe_working_set_t * data_set); gboolean DeleteRsc(resource_t * rsc, node_t * node, gboolean optional, pe_working_set_t * data_set); gboolean StopRsc(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set); gboolean StartRsc(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set); gboolean DemoteRsc(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set); gboolean PromoteRsc(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set); gboolean RoleError(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set); gboolean NullOp(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set); /* *INDENT-OFF* */ enum rsc_role_e rsc_state_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX] = { /* Current State */ /* Next State: Unknown Stopped Started Slave Master */ /* Unknown */ { RSC_ROLE_UNKNOWN, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, }, /* Stopped */ { RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STARTED, RSC_ROLE_SLAVE, RSC_ROLE_SLAVE, }, /* Started */ { RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STARTED, RSC_ROLE_SLAVE, RSC_ROLE_MASTER, }, /* Slave */ { RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_SLAVE, RSC_ROLE_MASTER, }, /* Master */ { RSC_ROLE_STOPPED, RSC_ROLE_SLAVE, RSC_ROLE_SLAVE, RSC_ROLE_SLAVE, RSC_ROLE_MASTER, }, }; gboolean (*rsc_action_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX])(resource_t*,node_t*,gboolean,pe_working_set_t*) = { /* Current State */ /* Next State: Unknown Stopped Started Slave Master */ /* Unknown */ { RoleError, StopRsc, RoleError, RoleError, RoleError, }, /* Stopped */ { RoleError, NullOp, StartRsc, StartRsc, RoleError, }, /* Started */ { RoleError, StopRsc, NullOp, NullOp, PromoteRsc, }, /* Slave */ { RoleError, StopRsc, StopRsc, NullOp, PromoteRsc, }, /* Master */ { RoleError, DemoteRsc, DemoteRsc, DemoteRsc, NullOp, }, }; /* *INDENT-ON* */ struct capacity_data { node_t *node; resource_t *rsc; gboolean is_enough; }; static void check_capacity(gpointer key, gpointer value, gpointer user_data) { int required = 0; int remaining = 0; struct capacity_data *data = user_data; required = crm_parse_int(value, "0"); remaining = crm_parse_int(g_hash_table_lookup(data->node->details->utilization, key), "0"); if (required > remaining) { pe_rsc_debug(data->rsc, "Node %s has no enough %s for resource %s: required=%d remaining=%d", data->node->details->uname, (char *)key, data->rsc->id, required, remaining); data->is_enough = FALSE; } } static gboolean have_enough_capacity(node_t * node, resource_t * rsc) { struct capacity_data data; data.node = node; data.rsc = rsc; data.is_enough = TRUE; g_hash_table_foreach(rsc->utilization, check_capacity, &data); return data.is_enough; } static gboolean native_choose_node(resource_t * rsc, node_t * prefer, pe_working_set_t * data_set) { /* 1. Sort by weight 2. color.chosen_node = the node (of those with the highest wieght) with the fewest resources 3. remove color.chosen_node from all other colors */ int alloc_details = scores_log_level + 1; GListPtr nodes = NULL; node_t *chosen = NULL; int lpc = 0; int multiple = 0; int length = 0; gboolean result = FALSE; if (safe_str_neq(data_set->placement_strategy, "default")) { GListPtr gIter = NULL; for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; if (have_enough_capacity(node, rsc) == FALSE) { pe_rsc_debug(rsc, "Resource %s cannot be allocated to node %s: none of enough capacity", rsc->id, node->details->uname); resource_location(rsc, node, -INFINITY, "__limit_utilization_", data_set); } } dump_node_scores(alloc_details, rsc, "Post-utilization", rsc->allowed_nodes); } length = g_hash_table_size(rsc->allowed_nodes); if (is_not_set(rsc->flags, pe_rsc_provisional)) { return rsc->allocated_to ? TRUE : FALSE; } if (prefer) { chosen = g_hash_table_lookup(rsc->allowed_nodes, prefer->details->id); if (chosen && chosen->weight >= 0 && can_run_resources(chosen)) { pe_rsc_trace(rsc, "Using preferred node %s for %s instead of choosing from %d candidates", chosen->details->uname, rsc->id, length); } else if (chosen && chosen->weight < 0) { pe_rsc_trace(rsc, "Preferred node %s for %s was unavailable", chosen->details->uname, rsc->id); chosen = NULL; } else if (chosen && can_run_resources(chosen)) { pe_rsc_trace(rsc, "Preferred node %s for %s was unsuitable", chosen->details->uname, rsc->id); chosen = NULL; } else { pe_rsc_trace(rsc, "Preferred node %s for %s was unknown", prefer->details->uname, rsc->id); } } if (chosen == NULL && rsc->allowed_nodes) { nodes = g_hash_table_get_values(rsc->allowed_nodes); nodes = g_list_sort_with_data(nodes, sort_node_weight, g_list_nth_data(rsc->running_on, 0)); chosen = g_list_nth_data(nodes, 0); pe_rsc_trace(rsc, "Chose node %s for %s from %d candidates", chosen ? chosen->details->uname : "", rsc->id, length); if (chosen && chosen->weight > 0 && can_run_resources(chosen)) { node_t *running = g_list_nth_data(rsc->running_on, 0); if (running && can_run_resources(running) == FALSE) { pe_rsc_trace(rsc, "Current node for %s (%s) can't run resources", rsc->id, running->details->uname); running = NULL; } for (lpc = 1; lpc < length && running; lpc++) { node_t *tmp = g_list_nth_data(nodes, lpc); if (tmp->weight == chosen->weight) { multiple++; if (tmp->details == running->details) { /* prefer the existing node if scores are equal */ chosen = tmp; } } } } } if (multiple > 1) { int log_level = LOG_INFO; char *score = score2char(chosen->weight); if (chosen->weight >= INFINITY) { log_level = LOG_WARNING; } do_crm_log(log_level, "%d nodes with equal score (%s) for" " running %s resources. Chose %s.", multiple, score, rsc->id, chosen->details->uname); free(score); } result = native_assign_node(rsc, nodes, chosen, FALSE); g_list_free(nodes); return result; } static int node_list_attr_score(GHashTable * list, const char *attr, const char *value) { GHashTableIter iter; node_t *node = NULL; int best_score = -INFINITY; const char *best_node = NULL; if (attr == NULL) { attr = "#" XML_ATTR_UNAME; } g_hash_table_iter_init(&iter, list); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { int weight = node->weight; if (can_run_resources(node) == FALSE) { weight = -INFINITY; } if (weight > best_score || best_node == NULL) { const char *tmp = g_hash_table_lookup(node->details->attrs, attr); if (safe_str_eq(value, tmp)) { best_score = weight; best_node = node->details->uname; } } } if (safe_str_neq(attr, "#" XML_ATTR_UNAME)) { crm_info("Best score for %s=%s was %s with %d", attr, value, best_node ? best_node : "", best_score); } return best_score; } static void node_hash_update(GHashTable * list1, GHashTable * list2, const char *attr, float factor, gboolean only_positive) { int score = 0; int new_score = 0; GHashTableIter iter; node_t *node = NULL; if (attr == NULL) { attr = "#" XML_ATTR_UNAME; } g_hash_table_iter_init(&iter, list1); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { CRM_CHECK(node != NULL, continue); score = node_list_attr_score(list2, attr, g_hash_table_lookup(node->details->attrs, attr)); new_score = merge_weights(factor * score, node->weight); if (factor < 0 && score < 0) { /* Negative preference for a node with a negative score * should not become a positive preference * * TODO - Decide if we want to filter only if weight == -INFINITY * */ crm_trace("%s: Filtering %d + %f*%d (factor * score)", node->details->uname, node->weight, factor, score); } else if (node->weight == INFINITY_HACK) { crm_trace("%s: Filtering %d + %f*%d (node < 0)", node->details->uname, node->weight, factor, score); } else if (only_positive && new_score < 0 && node->weight > 0) { node->weight = INFINITY_HACK; crm_trace("%s: Filtering %d + %f*%d (score > 0)", node->details->uname, node->weight, factor, score); } else if (only_positive && new_score < 0 && node->weight == 0) { crm_trace("%s: Filtering %d + %f*%d (score == 0)", node->details->uname, node->weight, factor, score); } else { crm_trace("%s: %d + %f*%d", node->details->uname, node->weight, factor, score); node->weight = new_score; } } } GHashTable * node_hash_dup(GHashTable * hash) { /* Hack! */ GListPtr list = g_hash_table_get_values(hash); GHashTable *result = node_hash_from_list(list); g_list_free(list); return result; } GHashTable * native_merge_weights(resource_t * rsc, const char *rhs, GHashTable * nodes, const char *attr, float factor, enum pe_weights flags) { return rsc_merge_weights(rsc, rhs, nodes, attr, factor, flags); } GHashTable * rsc_merge_weights(resource_t * rsc, const char *rhs, GHashTable * nodes, const char *attr, float factor, enum pe_weights flags) { GHashTable *work = NULL; int multiplier = 1; if (factor < 0) { multiplier = -1; } if (is_set(rsc->flags, pe_rsc_merging)) { pe_rsc_info(rsc, "%s: Breaking dependency loop at %s", rhs, rsc->id); return nodes; } set_bit(rsc->flags, pe_rsc_merging); if (is_set(flags, pe_weights_init)) { if (rsc->variant == pe_group && rsc->children) { GListPtr last = rsc->children; while (last->next != NULL) { last = last->next; } pe_rsc_trace(rsc, "Merging %s as a group %p %p", rsc->id, rsc->children, last); work = rsc_merge_weights(last->data, rhs, NULL, attr, factor, flags); } else { work = node_hash_dup(rsc->allowed_nodes); } clear_bit(flags, pe_weights_init); } else if (rsc->variant == pe_group && rsc->children) { GListPtr iter = rsc->children; pe_rsc_trace(rsc, "%s: Combining scores from %d children of %s", rhs, g_list_length(iter), rsc->id); work = node_hash_dup(nodes); for(iter = rsc->children; iter->next != NULL; iter = iter->next) { work = rsc_merge_weights(iter->data, rhs, work, attr, factor, flags); } } else { pe_rsc_trace(rsc, "%s: Combining scores from %s", rhs, rsc->id); work = node_hash_dup(nodes); node_hash_update(work, rsc->allowed_nodes, attr, factor, is_set(flags, pe_weights_positive)); } if (is_set(flags, pe_weights_rollback) && can_run_any(work) == FALSE) { pe_rsc_info(rsc, "%s: Rolling back scores from %s", rhs, rsc->id); g_hash_table_destroy(work); clear_bit(rsc->flags, pe_rsc_merging); return nodes; } if (can_run_any(work)) { GListPtr gIter = NULL; if (is_set(flags, pe_weights_forward)) { gIter = rsc->rsc_cons; crm_trace("Checking %d additional colocation constraints", g_list_length(gIter)); } else if(rsc->variant == pe_group && rsc->children) { GListPtr last = rsc->children; while (last->next != NULL) { last = last->next; } gIter = ((resource_t*)last->data)->rsc_cons_lhs; crm_trace("Checking %d additional optional group colocation constraints from %s", g_list_length(gIter), ((resource_t*)last->data)->id); } else { gIter = rsc->rsc_cons_lhs; crm_trace("Checking %d additional optional colocation constraints %s", g_list_length(gIter), rsc->id); } for (; gIter != NULL; gIter = gIter->next) { resource_t *other = NULL; rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data; if (is_set(flags, pe_weights_forward)) { other = constraint->rsc_rh; } else { other = constraint->rsc_lh; } pe_rsc_trace(rsc, "Applying %s (%s)", constraint->id, other->id); work = rsc_merge_weights(other, rhs, work, constraint->node_attribute, multiplier * (float)constraint->score / INFINITY, flags); dump_node_scores(LOG_TRACE, NULL, rhs, work); } } if (is_set(flags, pe_weights_positive)) { node_t *node = NULL; GHashTableIter iter; g_hash_table_iter_init(&iter, work); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { if (node->weight == INFINITY_HACK) { node->weight = 1; } } } if (nodes) { g_hash_table_destroy(nodes); } clear_bit(rsc->flags, pe_rsc_merging); return work; } node_t * native_color(resource_t * rsc, node_t * prefer, pe_working_set_t * data_set) { GListPtr gIter = NULL; int alloc_details = scores_log_level + 1; if (rsc->parent && is_not_set(rsc->parent->flags, pe_rsc_allocating)) { /* never allocate children on their own */ pe_rsc_debug(rsc, "Escalating allocation of %s to its parent: %s", rsc->id, rsc->parent->id); rsc->parent->cmds->allocate(rsc->parent, prefer, data_set); } if (is_not_set(rsc->flags, pe_rsc_provisional)) { return rsc->allocated_to; } if (is_set(rsc->flags, pe_rsc_allocating)) { pe_rsc_debug(rsc, "Dependency loop detected involving %s", rsc->id); return NULL; } set_bit(rsc->flags, pe_rsc_allocating); print_resource(alloc_details, "Allocating: ", rsc, FALSE); dump_node_scores(alloc_details, rsc, "Pre-allloc", rsc->allowed_nodes); for (gIter = rsc->rsc_cons; gIter != NULL; gIter = gIter->next) { rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data; GHashTable *archive = NULL; resource_t *rsc_rh = constraint->rsc_rh; pe_rsc_trace(rsc, "%s: Pre-Processing %s (%s, %d, %s)", rsc->id, constraint->id, rsc_rh->id, constraint->score, role2text(constraint->role_lh)); if (constraint->role_lh >= RSC_ROLE_MASTER || (constraint->score < 0 && constraint->score > -INFINITY)) { archive = node_hash_dup(rsc->allowed_nodes); } rsc_rh->cmds->allocate(rsc_rh, NULL, data_set); rsc->cmds->rsc_colocation_lh(rsc, rsc_rh, constraint); if (archive && can_run_any(rsc->allowed_nodes) == FALSE) { pe_rsc_info(rsc, "%s: Rolling back scores from %s", rsc->id, rsc_rh->id); g_hash_table_destroy(rsc->allowed_nodes); rsc->allowed_nodes = archive; archive = NULL; } if (archive) { g_hash_table_destroy(archive); } } dump_node_scores(alloc_details, rsc, "Post-coloc", rsc->allowed_nodes); for (gIter = rsc->rsc_cons_lhs; gIter != NULL; gIter = gIter->next) { rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data; rsc->allowed_nodes = constraint->rsc_lh->cmds->merge_weights(constraint->rsc_lh, rsc->id, rsc->allowed_nodes, constraint->node_attribute, (float)constraint->score / INFINITY, pe_weights_rollback); } print_resource(LOG_DEBUG_2, "Allocating: ", rsc, FALSE); if (rsc->next_role == RSC_ROLE_STOPPED) { pe_rsc_trace(rsc, "Making sure %s doesn't get allocated", rsc->id); /* make sure it doesnt come up again */ resource_location(rsc, NULL, -INFINITY, XML_RSC_ATTR_TARGET_ROLE, data_set); } dump_node_scores(show_scores ? 0 : scores_log_level, rsc, __PRETTY_FUNCTION__, rsc->allowed_nodes); if (is_set(data_set->flags, pe_flag_stonith_enabled) && is_set(data_set->flags, pe_flag_have_stonith_resource) == FALSE) { clear_bit(rsc->flags, pe_rsc_managed); } if (is_not_set(rsc->flags, pe_rsc_managed)) { const char *reason = NULL; node_t *assign_to = NULL; rsc->next_role = rsc->role; if (rsc->running_on == NULL) { reason = "inactive"; } else if (rsc->role == RSC_ROLE_MASTER) { assign_to = rsc->running_on->data; reason = "master"; } else if (is_set(rsc->flags, pe_rsc_failed)) { assign_to = rsc->running_on->data; reason = "failed"; } else { assign_to = rsc->running_on->data; reason = "active"; } pe_rsc_info(rsc, "Unmanaged resource %s allocated to %s: %s", rsc->id, assign_to ? assign_to->details->uname : "'nowhere'", reason); native_assign_node(rsc, NULL, assign_to, TRUE); } else if (is_set(data_set->flags, pe_flag_stop_everything)) { pe_rsc_debug(rsc, "Forcing %s to stop", rsc->id); native_assign_node(rsc, NULL, NULL, TRUE); } else if (is_set(rsc->flags, pe_rsc_provisional) && native_choose_node(rsc, prefer, data_set)) { pe_rsc_trace(rsc, "Allocated resource %s to %s", rsc->id, rsc->allocated_to->details->uname); } else if (rsc->allocated_to == NULL) { if (is_not_set(rsc->flags, pe_rsc_orphan)) { pe_rsc_info(rsc, "Resource %s cannot run anywhere", rsc->id); } else if (rsc->running_on != NULL) { pe_rsc_info(rsc, "Stopping orphan resource %s", rsc->id); } } else { pe_rsc_debug(rsc, "Pre-Allocated resource %s to %s", rsc->id, rsc->allocated_to->details->uname); } clear_bit(rsc->flags, pe_rsc_allocating); print_resource(LOG_DEBUG_3, "Allocated ", rsc, TRUE); if (rsc->is_remote_node) { node_t *remote_node = pe_find_node(data_set->nodes, rsc->id); CRM_ASSERT(remote_node != NULL); if (rsc->allocated_to && rsc->next_role != RSC_ROLE_STOPPED) { crm_trace("Setting remote node %s to ONLINE", remote_node->details->id); remote_node->details->online = TRUE; /* We shouldn't consider an unseen remote-node unclean if we are going * to try and connect to it. Otherwise we get an unnecessary fence */ if (remote_node->details->unseen == TRUE) { remote_node->details->unclean = FALSE; } } else { crm_trace("Setting remote node %s to SHUTDOWN. next role = %s, allocated=%s", remote_node->details->id, role2text(rsc->next_role), rsc->allocated_to ? "true" : "false"); remote_node->details->shutdown = TRUE; } } return rsc->allocated_to; } static gboolean is_op_dup(resource_t * rsc, const char *name, const char *interval) { gboolean dup = FALSE; const char *id = NULL; const char *value = NULL; xmlNode *operation = NULL; for (operation = __xml_first_child(rsc->ops_xml); operation != NULL; operation = __xml_next(operation)) { if (crm_str_eq((const char *)operation->name, "op", TRUE)) { value = crm_element_value(operation, "name"); if (safe_str_neq(value, name)) { continue; } value = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); if (value == NULL) { value = "0"; } if (safe_str_neq(value, interval)) { continue; } if (id == NULL) { id = ID(operation); } else { crm_config_err("Operation %s is a duplicate of %s", ID(operation), id); crm_config_err ("Do not use the same (name, interval) combination more than once per resource"); dup = TRUE; } } } return dup; } void RecurringOp(resource_t * rsc, action_t * start, node_t * node, xmlNode * operation, pe_working_set_t * data_set) { char *key = NULL; const char *name = NULL; const char *value = NULL; const char *interval = NULL; const char *node_uname = NULL; unsigned long long interval_ms = 0; action_t *mon = NULL; gboolean is_optional = TRUE; GListPtr possible_matches = NULL; /* Only process for the operations without role="Stopped" */ value = crm_element_value(operation, "role"); if (value && text2role(value) == RSC_ROLE_STOPPED) { return; } pe_rsc_trace(rsc, "Creating recurring action %s for %s in role %s on %s", ID(operation), rsc->id, role2text(rsc->next_role), node ? node->details->uname : "n/a"); if (node != NULL) { node_uname = node->details->uname; } interval = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); interval_ms = crm_get_interval(interval); if (interval_ms == 0) { return; } name = crm_element_value(operation, "name"); if (is_op_dup(rsc, name, interval)) { return; } if (safe_str_eq(name, RSC_STOP) || safe_str_eq(name, RSC_START) || safe_str_eq(name, RSC_DEMOTE) || safe_str_eq(name, RSC_PROMOTE) ) { crm_config_err("Invalid recurring action %s wth name: '%s'", ID(operation), name); return; } key = generate_op_key(rsc->id, name, interval_ms); if (find_rsc_op_entry(rsc, key) == NULL) { /* disabled */ free(key); return; } if (start != NULL) { pe_rsc_trace(rsc, "Marking %s %s due to %s", key, is_set(start->flags, pe_action_optional) ? "optional" : "manditory", start->uuid); is_optional = (rsc->cmds->action_flags(start, NULL) & pe_action_optional); } else { pe_rsc_trace(rsc, "Marking %s optional", key); is_optional = TRUE; } /* start a monitor for an already active resource */ possible_matches = find_actions_exact(rsc->actions, key, node); if (possible_matches == NULL) { is_optional = FALSE; pe_rsc_trace(rsc, "Marking %s manditory: not active", key); } else { g_list_free(possible_matches); } if ((rsc->next_role == RSC_ROLE_MASTER && value == NULL) || (value != NULL && text2role(value) != rsc->next_role)) { int log_level = LOG_DEBUG_2; const char *result = "Ignoring"; if (is_optional) { char *local_key = strdup(key); log_level = LOG_INFO; result = "Cancelling"; /* its running : cancel it */ mon = custom_action(rsc, local_key, RSC_CANCEL, node, FALSE, TRUE, data_set); free(mon->task); mon->task = strdup(RSC_CANCEL); add_hash_param(mon->meta, XML_LRM_ATTR_INTERVAL, interval); add_hash_param(mon->meta, XML_LRM_ATTR_TASK, name); local_key = NULL; switch (rsc->role) { case RSC_ROLE_SLAVE: case RSC_ROLE_STARTED: if (rsc->next_role == RSC_ROLE_MASTER) { local_key = promote_key(rsc); } else if (rsc->next_role == RSC_ROLE_STOPPED) { local_key = stop_key(rsc); } break; case RSC_ROLE_MASTER: local_key = demote_key(rsc); break; default: break; } if (local_key) { custom_action_order(rsc, NULL, mon, rsc, local_key, NULL, pe_order_runnable_left, data_set); } mon = NULL; } do_crm_log(log_level, "%s action %s (%s vs. %s)", result, key, value ? value : role2text(RSC_ROLE_SLAVE), role2text(rsc->next_role)); free(key); key = NULL; return; } mon = custom_action(rsc, key, name, node, is_optional, TRUE, data_set); key = mon->uuid; if (is_optional) { pe_rsc_trace(rsc, "%s\t %s (optional)", crm_str(node_uname), mon->uuid); } if (start == NULL || is_set(start->flags, pe_action_runnable) == FALSE) { pe_rsc_debug(rsc, "%s\t %s (cancelled : start un-runnable)", crm_str(node_uname), mon->uuid); update_action_flags(mon, pe_action_runnable | pe_action_clear); } else if (node == NULL || node->details->online == FALSE || node->details->unclean) { pe_rsc_debug(rsc, "%s\t %s (cancelled : no node available)", crm_str(node_uname), mon->uuid); update_action_flags(mon, pe_action_runnable | pe_action_clear); } else if (is_set(mon->flags, pe_action_optional) == FALSE) { pe_rsc_info(rsc, " Start recurring %s (%llus) for %s on %s", mon->task, interval_ms / 1000, rsc->id, crm_str(node_uname)); } if (rsc->next_role == RSC_ROLE_MASTER) { char *running_master = crm_itoa(PCMK_OCF_RUNNING_MASTER); add_hash_param(mon->meta, XML_ATTR_TE_TARGET_RC, running_master); free(running_master); } if (node == NULL || is_set(rsc->flags, pe_rsc_managed)) { custom_action_order(rsc, start_key(rsc), NULL, NULL, strdup(key), mon, pe_order_implies_then | pe_order_runnable_left, data_set); if (rsc->next_role == RSC_ROLE_MASTER) { custom_action_order(rsc, promote_key(rsc), NULL, rsc, NULL, mon, pe_order_optional | pe_order_runnable_left, data_set); } else if (rsc->role == RSC_ROLE_MASTER) { custom_action_order(rsc, demote_key(rsc), NULL, rsc, NULL, mon, pe_order_optional | pe_order_runnable_left, data_set); } } } void Recurring(resource_t * rsc, action_t * start, node_t * node, pe_working_set_t * data_set) { if (is_not_set(data_set->flags, pe_flag_maintenance_mode) && (node == NULL || node->details->maintenance == FALSE)) { xmlNode *operation = NULL; for (operation = __xml_first_child(rsc->ops_xml); operation != NULL; operation = __xml_next(operation)) { if (crm_str_eq((const char *)operation->name, "op", TRUE)) { RecurringOp(rsc, start, node, operation, data_set); } } } } void RecurringOp_Stopped(resource_t * rsc, action_t * start, node_t * node, xmlNode * operation, pe_working_set_t * data_set) { char *key = NULL; const char *name = NULL; const char *role = NULL; const char *interval = NULL; const char *node_uname = NULL; unsigned long long interval_ms = 0; GListPtr possible_matches = NULL; GListPtr gIter = NULL; /* TODO: Support of non-unique clone */ if (is_set(rsc->flags, pe_rsc_unique) == FALSE) { return; } /* Only process for the operations with role="Stopped" */ role = crm_element_value(operation, "role"); if (role == NULL || text2role(role) != RSC_ROLE_STOPPED) { return; } pe_rsc_trace(rsc, "Creating recurring actions %s for %s in role %s on nodes where it'll not be running", ID(operation), rsc->id, role2text(rsc->next_role)); if (node != NULL) { node_uname = node->details->uname; } interval = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); interval_ms = crm_get_interval(interval); if (interval_ms == 0) { return; } name = crm_element_value(operation, "name"); if (is_op_dup(rsc, name, interval)) { return; } if (safe_str_eq(name, RSC_STOP) || safe_str_eq(name, RSC_START) || safe_str_eq(name, RSC_DEMOTE) || safe_str_eq(name, RSC_PROMOTE) ) { crm_config_err("Invalid recurring action %s wth name: '%s'", ID(operation), name); return; } key = generate_op_key(rsc->id, name, interval_ms); if (find_rsc_op_entry(rsc, key) == NULL) { /* disabled */ free(key); return; } /* if the monitor exists on the node where the resource will be running, cancel it */ if (node != NULL) { possible_matches = find_actions_exact(rsc->actions, key, node); if (possible_matches) { action_t *cancel_op = NULL; char *local_key = strdup(key); g_list_free(possible_matches); cancel_op = custom_action(rsc, local_key, RSC_CANCEL, node, FALSE, TRUE, data_set); free(cancel_op->task); cancel_op->task = strdup(RSC_CANCEL); add_hash_param(cancel_op->meta, XML_LRM_ATTR_INTERVAL, interval); add_hash_param(cancel_op->meta, XML_LRM_ATTR_TASK, name); local_key = NULL; if (rsc->next_role == RSC_ROLE_STARTED || rsc->next_role == RSC_ROLE_SLAVE) { /* rsc->role == RSC_ROLE_STOPPED: cancel the monitor before start */ /* rsc->role == RSC_ROLE_STARTED: for a migration, cancel the monitor on the target node before start */ custom_action_order(rsc, NULL, cancel_op, rsc, start_key(rsc), NULL, pe_order_runnable_left, data_set); } pe_rsc_info(rsc, "Cancel action %s (%s vs. %s) on %s", key, role, role2text(rsc->next_role), crm_str(node_uname)); } } for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { node_t *stop_node = (node_t *) gIter->data; const char *stop_node_uname = stop_node->details->uname; gboolean is_optional = TRUE; gboolean probe_is_optional = TRUE; gboolean stop_is_optional = TRUE; action_t *stopped_mon = NULL; char *rc_inactive = NULL; GListPtr probe_complete_ops = NULL; GListPtr stop_ops = NULL; GListPtr local_gIter = NULL; char *stop_op_key = NULL; if (node_uname && safe_str_eq(stop_node_uname, node_uname)) { continue; } pe_rsc_trace(rsc, "Creating recurring action %s for %s on %s", ID(operation), rsc->id, crm_str(stop_node_uname)); /* start a monitor for an already stopped resource */ possible_matches = find_actions_exact(rsc->actions, key, stop_node); if (possible_matches == NULL) { pe_rsc_trace(rsc, "Marking %s manditory on %s: not active", key, crm_str(stop_node_uname)); is_optional = FALSE; } else { pe_rsc_trace(rsc, "Marking %s optional on %s: already active", key, crm_str(stop_node_uname)); is_optional = TRUE; g_list_free(possible_matches); } stopped_mon = custom_action(rsc, strdup(key), name, stop_node, is_optional, TRUE, data_set); rc_inactive = crm_itoa(PCMK_OCF_NOT_RUNNING); add_hash_param(stopped_mon->meta, XML_ATTR_TE_TARGET_RC, rc_inactive); free(rc_inactive); probe_complete_ops = find_actions(data_set->actions, CRM_OP_PROBED, NULL); for (local_gIter = probe_complete_ops; local_gIter != NULL; local_gIter = local_gIter->next) { action_t *probe_complete = (action_t *) local_gIter->data; if (probe_complete->node == NULL) { if (is_set(probe_complete->flags, pe_action_optional) == FALSE) { probe_is_optional = FALSE; } if (is_set(probe_complete->flags, pe_action_runnable) == FALSE) { crm_debug("%s\t %s (cancelled : probe un-runnable)", crm_str(stop_node_uname), stopped_mon->uuid); update_action_flags(stopped_mon, pe_action_runnable | pe_action_clear); } if (is_set(rsc->flags, pe_rsc_managed)) { custom_action_order(NULL, NULL, probe_complete, NULL, strdup(key), stopped_mon, pe_order_optional, data_set); } break; } } if (probe_complete_ops) { g_list_free(probe_complete_ops); } stop_op_key = stop_key(rsc); stop_ops = find_actions_exact(rsc->actions, stop_op_key, stop_node); for (local_gIter = stop_ops; local_gIter != NULL; local_gIter = local_gIter->next) { action_t *stop = (action_t *) local_gIter->data; if (is_set(stop->flags, pe_action_optional) == FALSE) { stop_is_optional = FALSE; } if (is_set(stop->flags, pe_action_runnable) == FALSE) { crm_debug("%s\t %s (cancelled : stop un-runnable)", crm_str(stop_node_uname), stopped_mon->uuid); update_action_flags(stopped_mon, pe_action_runnable | pe_action_clear); } if (is_set(rsc->flags, pe_rsc_managed)) { custom_action_order(rsc, strdup(stop_op_key), stop, NULL, strdup(key), stopped_mon, pe_order_implies_then | pe_order_runnable_left, data_set); } } if (stop_ops) { g_list_free(stop_ops); } free(stop_op_key); if (is_optional == FALSE && probe_is_optional && stop_is_optional && is_set(rsc->flags, pe_rsc_managed) == FALSE) { pe_rsc_trace(rsc, "Marking %s optional on %s due to unmanaged", key, crm_str(stop_node_uname)); update_action_flags(stopped_mon, pe_action_optional); } if (is_set(stopped_mon->flags, pe_action_optional)) { pe_rsc_trace(rsc, "%s\t %s (optional)", crm_str(stop_node_uname), stopped_mon->uuid); } if (stop_node->details->online == FALSE || stop_node->details->unclean) { pe_rsc_debug(rsc, "%s\t %s (cancelled : no node available)", crm_str(stop_node_uname), stopped_mon->uuid); update_action_flags(stopped_mon, pe_action_runnable | pe_action_clear); } if (is_set(stopped_mon->flags, pe_action_runnable) && is_set(stopped_mon->flags, pe_action_optional) == FALSE) { crm_notice(" Start recurring %s (%llus) for %s on %s", stopped_mon->task, interval_ms / 1000, rsc->id, crm_str(stop_node_uname)); } } free(key); } void Recurring_Stopped(resource_t * rsc, action_t * start, node_t * node, pe_working_set_t * data_set) { if (is_not_set(data_set->flags, pe_flag_maintenance_mode) && (node == NULL || node->details->maintenance == FALSE)) { xmlNode *operation = NULL; for (operation = __xml_first_child(rsc->ops_xml); operation != NULL; operation = __xml_next(operation)) { if (crm_str_eq((const char *)operation->name, "op", TRUE)) { RecurringOp_Stopped(rsc, start, node, operation, data_set); } } } } void native_create_actions(resource_t * rsc, pe_working_set_t * data_set) { action_t *start = NULL; node_t *chosen = NULL; node_t *current = NULL; gboolean need_stop = FALSE; GListPtr gIter = NULL; int num_active_nodes = 0; enum rsc_role_e role = RSC_ROLE_UNKNOWN; enum rsc_role_e next_role = RSC_ROLE_UNKNOWN; chosen = rsc->allocated_to; if (chosen != NULL && rsc->next_role == RSC_ROLE_UNKNOWN) { rsc->next_role = RSC_ROLE_STARTED; pe_rsc_trace(rsc, "Fixed next_role: unknown -> %s", role2text(rsc->next_role)); } else if (rsc->next_role == RSC_ROLE_UNKNOWN) { rsc->next_role = RSC_ROLE_STOPPED; pe_rsc_trace(rsc, "Fixed next_role: unknown -> %s", role2text(rsc->next_role)); } pe_rsc_trace(rsc, "Processing state transition for %s %p: %s->%s", rsc->id, rsc, role2text(rsc->role), role2text(rsc->next_role)); if (rsc->running_on) { current = rsc->running_on->data; } for (gIter = rsc->running_on; gIter != NULL; gIter = gIter->next) { /* node_t *n = (node_t *) gIter->data; */ num_active_nodes++; } get_rsc_attributes(rsc->parameters, rsc, chosen, data_set); for (gIter = rsc->dangling_migrations; gIter != NULL; gIter = gIter->next) { node_t *current = (node_t *) gIter->data; action_t *stop = stop_action(rsc, current, FALSE); set_bit(stop->flags, pe_action_dangle); pe_rsc_trace(rsc, "Forcing a cleanup of %s on %s", rsc->id, current->details->uname); if (is_set(data_set->flags, pe_flag_remove_after_stop)) { DeleteRsc(rsc, current, FALSE, data_set); } } if (num_active_nodes > 1) { if (num_active_nodes == 2 && chosen && rsc->partial_migration_target && (chosen->details == rsc->partial_migration_target->details)) { /* Here the chosen node is still the migration target from a partial * migration. Attempt to continue the migration instead of recovering * by stopping the resource everywhere and starting it on a single node. */ pe_rsc_trace(rsc, "Will attempt to continue with a partial migration to target %s from %s", rsc->partial_migration_target->details->id, rsc->partial_migration_source->details->id); } else { const char *type = crm_element_value(rsc->xml, XML_ATTR_TYPE); const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); pe_proc_err("Resource %s (%s::%s) is active on %d nodes %s", rsc->id, class, type, num_active_nodes, recovery2text(rsc->recovery_type)); crm_warn("See %s for more information.", "http://clusterlabs.org/wiki/FAQ#Resource_is_Too_Active"); if (rsc->recovery_type == recovery_stop_start) { need_stop = TRUE; } /* If by chance a partial migration is in process, * but the migration target is not chosen still, clear all * partial migration data. */ rsc->partial_migration_source = rsc->partial_migration_target = NULL; } } if (is_set(rsc->flags, pe_rsc_start_pending)) { start = start_action(rsc, chosen, TRUE); set_bit(start->flags, pe_action_print_always); } if (current && chosen && current->details != chosen->details) { pe_rsc_trace(rsc, "Moving %s", rsc->id); need_stop = TRUE; } else if (is_set(rsc->flags, pe_rsc_failed)) { pe_rsc_trace(rsc, "Recovering %s", rsc->id); need_stop = TRUE; } else if (is_set(rsc->flags, pe_rsc_block)) { pe_rsc_trace(rsc, "Block %s", rsc->id); need_stop = TRUE; } else if (rsc->role > RSC_ROLE_STARTED && current != NULL && chosen != NULL) { /* Recovery of a promoted resource */ start = start_action(rsc, chosen, TRUE); if (is_set(start->flags, pe_action_optional) == FALSE) { pe_rsc_trace(rsc, "Forced start %s", rsc->id); need_stop = TRUE; } } pe_rsc_trace(rsc, "Creating actions for %s: %s->%s", rsc->id, role2text(rsc->role), role2text(rsc->next_role)); role = rsc->role; /* Potentiall optional steps on brining the resource down and back up to the same level */ while (role != RSC_ROLE_STOPPED) { next_role = rsc_state_matrix[role][RSC_ROLE_STOPPED]; pe_rsc_trace(rsc, "Down: Executing: %s->%s (%s)%s", role2text(role), role2text(next_role), rsc->id, need_stop ? " required" : ""); if (rsc_action_matrix[role][next_role] (rsc, current, !need_stop, data_set) == FALSE) { break; } role = next_role; } while (rsc->role <= rsc->next_role && role != rsc->role && is_not_set(rsc->flags, pe_rsc_block)) { next_role = rsc_state_matrix[role][rsc->role]; pe_rsc_trace(rsc, "Up: Executing: %s->%s (%s)%s", role2text(role), role2text(next_role), rsc->id, need_stop ? " required" : ""); if (rsc_action_matrix[role][next_role] (rsc, chosen, !need_stop, data_set) == FALSE) { break; } role = next_role; } role = rsc->role; /* Required steps from this role to the next */ while (role != rsc->next_role) { next_role = rsc_state_matrix[role][rsc->next_role]; pe_rsc_trace(rsc, "Role: Executing: %s->%s = (%s)", role2text(role), role2text(rsc->next_role), role2text(next_role), rsc->id); if (rsc_action_matrix[role][next_role] (rsc, chosen, FALSE, data_set) == FALSE) { break; } role = next_role; } if(is_set(rsc->flags, pe_rsc_block)) { pe_rsc_trace(rsc, "No monitor additional ops for blocked resource"); } else if (rsc->next_role != RSC_ROLE_STOPPED || is_set(rsc->flags, pe_rsc_managed) == FALSE) { pe_rsc_trace(rsc, "Monitor ops for active resource"); start = start_action(rsc, chosen, TRUE); Recurring(rsc, start, chosen, data_set); Recurring_Stopped(rsc, start, chosen, data_set); } else { pe_rsc_trace(rsc, "Monitor ops for in-active resource"); Recurring_Stopped(rsc, NULL, NULL, data_set); } } static void rsc_avoids_remote_nodes(resource_t *rsc) { GHashTableIter iter; node_t *node = NULL; g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { if (node->details->remote_rsc) { node->weight = -INFINITY; } } } void native_internal_constraints(resource_t * rsc, pe_working_set_t * data_set) { /* This function is on the critical path and worth optimizing as much as possible */ resource_t *top = uber_parent(rsc); int type = pe_order_optional | pe_order_implies_then | pe_order_restart; gboolean is_stonith = (rsc->xml && safe_str_eq(crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS), "stonith")) ? TRUE : FALSE; custom_action_order(rsc, generate_op_key(rsc->id, RSC_STOP, 0), NULL, rsc, generate_op_key(rsc->id, RSC_START, 0), NULL, type, data_set); if (top->variant == pe_master || rsc->role > RSC_ROLE_SLAVE) { custom_action_order(rsc, generate_op_key(rsc->id, RSC_DEMOTE, 0), NULL, rsc, generate_op_key(rsc->id, RSC_STOP, 0), NULL, pe_order_implies_first_master, data_set); custom_action_order(rsc, generate_op_key(rsc->id, RSC_START, 0), NULL, rsc, generate_op_key(rsc->id, RSC_PROMOTE, 0), NULL, pe_order_runnable_left, data_set); } if (is_not_set(rsc->flags, pe_rsc_managed)) { pe_rsc_trace(rsc, "Skipping fencing constraints for unmanaged resource: %s", rsc->id); return; } { action_t *all_stopped = get_pseudo_op(ALL_STOPPED, data_set); custom_action_order(rsc, stop_key(rsc), NULL, NULL, strdup(all_stopped->task), all_stopped, pe_order_implies_then | pe_order_runnable_left, data_set); } if (g_hash_table_size(rsc->utilization) > 0 && safe_str_neq(data_set->placement_strategy, "default")) { GHashTableIter iter; node_t *next = NULL; GListPtr gIter = NULL; pe_rsc_trace(rsc, "Creating utilization constraints for %s - strategy: %s", rsc->id, data_set->placement_strategy); for (gIter = rsc->running_on; gIter != NULL; gIter = gIter->next) { node_t *current = (node_t *) gIter->data; char *load_stopped_task = crm_concat(LOAD_STOPPED, current->details->uname, '_'); action_t *load_stopped = get_pseudo_op(load_stopped_task, data_set); if (load_stopped->node == NULL) { load_stopped->node = node_copy(current); update_action_flags(load_stopped, pe_action_optional | pe_action_clear); } custom_action_order(rsc, stop_key(rsc), NULL, NULL, load_stopped_task, load_stopped, pe_order_load, data_set); } g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&next)) { char *load_stopped_task = crm_concat(LOAD_STOPPED, next->details->uname, '_'); action_t *load_stopped = get_pseudo_op(load_stopped_task, data_set); if (load_stopped->node == NULL) { load_stopped->node = node_copy(next); update_action_flags(load_stopped, pe_action_optional | pe_action_clear); } custom_action_order(NULL, strdup(load_stopped_task), load_stopped, rsc, start_key(rsc), NULL, pe_order_load, data_set); free(load_stopped_task); } } if (rsc->container) { crm_trace("Generating order and colocation rules for rsc %s with container %s", rsc->id, rsc->container->id); custom_action_order(rsc->container, generate_op_key(rsc->container->id, RSC_START, 0), NULL, rsc, generate_op_key(rsc->id, RSC_START, 0), NULL, pe_order_implies_then | pe_order_runnable_left, data_set); custom_action_order(rsc, generate_op_key(rsc->id, RSC_STOP, 0), NULL, rsc->container, generate_op_key(rsc->container->id, RSC_STOP, 0), NULL, pe_order_implies_first, data_set); rsc_colocation_new("resource-with-containter", NULL, INFINITY, rsc, rsc->container, NULL, NULL, data_set); } if (rsc->is_remote_node || is_stonith) { /* don't allow remote nodes to run stonith devices * or remote connection resources.*/ rsc_avoids_remote_nodes(rsc); } /* If this rsc is a remote connection resource associated * with a container ( which will most likely be a virtual guest ) * do not allow the container to live on any remote-nodes. * remote-nodes managing nested remote-nodes should not be allowed. */ if (rsc->is_remote_node && rsc->container) { rsc_avoids_remote_nodes(rsc->container); } } void native_rsc_colocation_lh(resource_t * rsc_lh, resource_t * rsc_rh, rsc_colocation_t * constraint) { if (rsc_lh == NULL) { pe_err("rsc_lh was NULL for %s", constraint->id); return; } else if (constraint->rsc_rh == NULL) { pe_err("rsc_rh was NULL for %s", constraint->id); return; } pe_rsc_trace(rsc_lh, "Processing colocation constraint between %s and %s", rsc_lh->id, rsc_rh->id); rsc_rh->cmds->rsc_colocation_rh(rsc_lh, rsc_rh, constraint); } enum filter_colocation_res { influence_nothing = 0, influence_rsc_location, influence_rsc_priority, }; static enum filter_colocation_res filter_colocation_constraint(resource_t * rsc_lh, resource_t * rsc_rh, rsc_colocation_t * constraint) { if (constraint->score == 0) { return influence_nothing; } /* rh side must be allocated before we can process constraint */ if (is_set(rsc_rh->flags, pe_rsc_provisional)) { return influence_nothing; } if ((constraint->role_lh >= RSC_ROLE_SLAVE) && rsc_lh->parent && rsc_lh->parent->variant == pe_master && is_not_set(rsc_lh->flags, pe_rsc_provisional)) { /* LH and RH resources have already been allocated, place the correct * priority oh LH rsc for the given multistate resource role */ return influence_rsc_priority; } if (is_not_set(rsc_lh->flags, pe_rsc_provisional)) { /* error check */ struct node_shared_s *details_lh; struct node_shared_s *details_rh; if ((constraint->score > -INFINITY) && (constraint->score < INFINITY)) { return influence_nothing; } details_rh = rsc_rh->allocated_to ? rsc_rh->allocated_to->details : NULL; details_lh = rsc_lh->allocated_to ? rsc_lh->allocated_to->details : NULL; if (constraint->score == INFINITY && details_lh != details_rh) { crm_err("%s and %s are both allocated" " but to different nodes: %s vs. %s", rsc_lh->id, rsc_rh->id, details_lh ? details_lh->uname : "n/a", details_rh ? details_rh->uname : "n/a"); } else if (constraint->score == -INFINITY && details_lh == details_rh) { crm_err("%s and %s are both allocated" " but to the SAME node: %s", rsc_lh->id, rsc_rh->id, details_rh ? details_rh->uname : "n/a"); } return influence_nothing; } if (constraint->score > 0 && constraint->role_lh != RSC_ROLE_UNKNOWN && constraint->role_lh != rsc_lh->next_role) { crm_trace("LH: Skipping constraint: \"%s\" state filter nextrole is %s", role2text(constraint->role_lh), role2text(rsc_lh->next_role)); return influence_nothing; } if (constraint->score > 0 && constraint->role_rh != RSC_ROLE_UNKNOWN && constraint->role_rh != rsc_rh->next_role) { crm_trace("RH: Skipping constraint: \"%s\" state filter", role2text(constraint->role_rh)); return FALSE; } if (constraint->score < 0 && constraint->role_lh != RSC_ROLE_UNKNOWN && constraint->role_lh == rsc_lh->next_role) { crm_trace("LH: Skipping -ve constraint: \"%s\" state filter", role2text(constraint->role_lh)); return influence_nothing; } if (constraint->score < 0 && constraint->role_rh != RSC_ROLE_UNKNOWN && constraint->role_rh == rsc_rh->next_role) { crm_trace("RH: Skipping -ve constraint: \"%s\" state filter", role2text(constraint->role_rh)); return influence_nothing; } return influence_rsc_location; } static void influence_priority(resource_t * rsc_lh, resource_t * rsc_rh, rsc_colocation_t * constraint) { const char *rh_value = NULL; const char *lh_value = NULL; const char *attribute = "#id"; int score_multiplier = 1; if (constraint->node_attribute != NULL) { attribute = constraint->node_attribute; } if (!rsc_rh->allocated_to || !rsc_lh->allocated_to) { return; } lh_value = g_hash_table_lookup(rsc_lh->allocated_to->details->attrs, attribute); rh_value = g_hash_table_lookup(rsc_rh->allocated_to->details->attrs, attribute); if (!safe_str_eq(lh_value, rh_value)) { return; } if (constraint->role_rh && (constraint->role_rh != rsc_rh->next_role)) { return; } if (constraint->role_lh == RSC_ROLE_SLAVE) { score_multiplier = -1; } rsc_lh->priority = merge_weights(score_multiplier * constraint->score, rsc_lh->priority); } static void colocation_match(resource_t * rsc_lh, resource_t * rsc_rh, rsc_colocation_t * constraint) { const char *tmp = NULL; const char *value = NULL; const char *attribute = "#id"; GHashTable *work = NULL; gboolean do_check = FALSE; GHashTableIter iter; node_t *node = NULL; if (constraint->node_attribute != NULL) { attribute = constraint->node_attribute; } if (rsc_rh->allocated_to) { value = g_hash_table_lookup(rsc_rh->allocated_to->details->attrs, attribute); do_check = TRUE; } else if (constraint->score < 0) { /* nothing to do: * anti-colocation with something thats not running */ return; } work = node_hash_dup(rsc_lh->allowed_nodes); g_hash_table_iter_init(&iter, work); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { tmp = g_hash_table_lookup(node->details->attrs, attribute); if (do_check && safe_str_eq(tmp, value)) { if (constraint->score < INFINITY) { pe_rsc_trace(rsc_lh, "%s: %s.%s += %d", constraint->id, rsc_lh->id, node->details->uname, constraint->score); node->weight = merge_weights(constraint->score, node->weight); } } else if (do_check == FALSE || constraint->score >= INFINITY) { pe_rsc_trace(rsc_lh, "%s: %s.%s -= %d (%s)", constraint->id, rsc_lh->id, node->details->uname, constraint->score, do_check ? "failed" : "unallocated"); node->weight = merge_weights(-constraint->score, node->weight); } } if (can_run_any(work) || constraint->score <= -INFINITY || constraint->score >= INFINITY) { g_hash_table_destroy(rsc_lh->allowed_nodes); rsc_lh->allowed_nodes = work; work = NULL; } else { char *score = score2char(constraint->score); pe_rsc_info(rsc_lh, "%s: Rolling back scores from %s (%d, %s)", rsc_lh->id, rsc_rh->id, do_check, score); free(score); } if (work) { g_hash_table_destroy(work); } } void native_rsc_colocation_rh(resource_t * rsc_lh, resource_t * rsc_rh, rsc_colocation_t * constraint) { enum filter_colocation_res filter_results; filter_results = filter_colocation_constraint(rsc_lh, rsc_rh, constraint); switch (filter_results) { case influence_rsc_priority: influence_priority(rsc_lh, rsc_rh, constraint); break; case influence_rsc_location: pe_rsc_trace(rsc_lh, "%sColocating %s with %s (%s, weight=%d)", constraint->score >= 0 ? "" : "Anti-", rsc_lh->id, rsc_rh->id, constraint->id, constraint->score); colocation_match(rsc_lh, rsc_rh, constraint); break; case influence_nothing: default: return; } } static gboolean filter_rsc_ticket(resource_t * rsc_lh, rsc_ticket_t * rsc_ticket) { if (rsc_ticket->role_lh != RSC_ROLE_UNKNOWN && rsc_ticket->role_lh != rsc_lh->role) { pe_rsc_trace(rsc_lh, "LH: Skipping constraint: \"%s\" state filter", role2text(rsc_ticket->role_lh)); return FALSE; } return TRUE; } void rsc_ticket_constraint(resource_t * rsc_lh, rsc_ticket_t * rsc_ticket, pe_working_set_t * data_set) { if (rsc_ticket == NULL) { pe_err("rsc_ticket was NULL"); return; } if (rsc_lh == NULL) { pe_err("rsc_lh was NULL for %s", rsc_ticket->id); return; } if (rsc_ticket->ticket->granted && rsc_ticket->ticket->standby == FALSE) { return; } if (rsc_lh->children) { GListPtr gIter = rsc_lh->children; pe_rsc_trace(rsc_lh, "Processing ticket dependencies from %s", rsc_lh->id); for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; rsc_ticket_constraint(child_rsc, rsc_ticket, data_set); } return; } pe_rsc_trace(rsc_lh, "%s: Processing ticket dependency on %s (%s, %s)", rsc_lh->id, rsc_ticket->ticket->id, rsc_ticket->id, role2text(rsc_ticket->role_lh)); if (rsc_ticket->ticket->granted == FALSE && g_list_length(rsc_lh->running_on) > 0) { GListPtr gIter = NULL; switch (rsc_ticket->loss_policy) { case loss_ticket_stop: resource_location(rsc_lh, NULL, -INFINITY, "__loss_of_ticket__", data_set); break; case loss_ticket_demote: /*Promotion score will be set to -INFINITY in master_promotion_order() */ if (rsc_ticket->role_lh != RSC_ROLE_MASTER) { resource_location(rsc_lh, NULL, -INFINITY, "__loss_of_ticket__", data_set); } break; case loss_ticket_fence: if (filter_rsc_ticket(rsc_lh, rsc_ticket) == FALSE) { return; } resource_location(rsc_lh, NULL, -INFINITY, "__loss_of_ticket__", data_set); for (gIter = rsc_lh->running_on; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; crm_warn("Node %s will be fenced for deadman", node->details->uname); node->details->unclean = TRUE; } break; case loss_ticket_freeze: if (filter_rsc_ticket(rsc_lh, rsc_ticket) == FALSE) { return; } if (g_list_length(rsc_lh->running_on) > 0) { clear_bit(rsc_lh->flags, pe_rsc_managed); set_bit(rsc_lh->flags, pe_rsc_block); } break; } } else if (rsc_ticket->ticket->granted == FALSE) { if (rsc_ticket->role_lh != RSC_ROLE_MASTER || rsc_ticket->loss_policy == loss_ticket_stop) { resource_location(rsc_lh, NULL, -INFINITY, "__no_ticket__", data_set); } } else if (rsc_ticket->ticket->standby) { if (rsc_ticket->role_lh != RSC_ROLE_MASTER || rsc_ticket->loss_policy == loss_ticket_stop) { resource_location(rsc_lh, NULL, -INFINITY, "__ticket_standby__", data_set); } } } enum pe_action_flags native_action_flags(action_t * action, node_t * node) { return action->flags; } enum pe_graph_flags native_update_actions(action_t * first, action_t * then, node_t * node, enum pe_action_flags flags, enum pe_action_flags filter, enum pe_ordering type) { /* flags == get_action_flags(first, then_node) called from update_action() */ enum pe_graph_flags changed = pe_graph_none; enum pe_action_flags then_flags = then->flags; enum pe_action_flags first_flags = first->flags; if (type & pe_order_asymmetrical) { resource_t *then_rsc = then->rsc; enum rsc_role_e then_rsc_role = then_rsc ? then_rsc->fns->state(then_rsc, TRUE) : 0; if (!then_rsc) { /* ignore */ } else if ((then_rsc_role == RSC_ROLE_STOPPED) && safe_str_eq(then->task, RSC_STOP)) { /* ignore... if 'then' is supposed to be stopped after 'first', but * then is already stopped, there is nothing to be done when non-symmetrical. */ } else if ((then_rsc_role == RSC_ROLE_STARTED) && safe_str_eq(then->task, RSC_START)) { /* ignore... if 'then' is supposed to be started after 'first', but * then is already started, there is nothing to be done when non-symmetrical. */ } else if (!(first->flags & pe_action_runnable)) { /* prevent 'then' action from happening if 'first' is not runnable and * 'then' has not yet occurred. */ pe_clear_action_bit(then, pe_action_runnable); pe_clear_action_bit(then, pe_action_optional); pe_rsc_trace(then->rsc, "Unset optional and runnable on %s", then->uuid); } else { /* ignore... then is allowed to start/stop if it wants to. */ } } if (type & pe_order_implies_first) { if ((filter & pe_action_optional) && (flags & pe_action_optional) == 0) { pe_rsc_trace(first->rsc, "Unset optional on %s because of %s", first->uuid, then->uuid); pe_clear_action_bit(first, pe_action_optional); } } if (type & pe_order_implies_first_master) { if ((filter & pe_action_optional) && ((then->flags & pe_action_optional) == FALSE) && then->rsc && (then->rsc->role == RSC_ROLE_MASTER)) { clear_bit(first->flags, pe_action_optional); } } if (is_set(type, pe_order_runnable_left) && is_set(filter, pe_action_runnable) && is_set(then->flags, pe_action_runnable) && is_set(flags, pe_action_runnable) == FALSE) { pe_rsc_trace(then->rsc, "Unset runnable on %s because of %s", then->uuid, first->uuid); pe_clear_action_bit(then, pe_action_runnable); } if (is_set(type, pe_order_implies_then) && is_set(filter, pe_action_optional) && is_set(then->flags, pe_action_optional) && is_set(flags, pe_action_optional) == FALSE) { pe_rsc_trace(then->rsc, "Unset optional on %s because of %s", then->uuid, first->uuid); pe_clear_action_bit(then, pe_action_optional); } if (is_set(type, pe_order_restart)) { const char *reason = NULL; CRM_ASSERT(first->rsc && first->rsc->variant == pe_native); CRM_ASSERT(then->rsc && then->rsc->variant == pe_native); if ((filter & pe_action_runnable) && (then->flags & pe_action_runnable) == 0 && (then->rsc->flags & pe_rsc_managed)) { reason = "shutdown"; } if ((filter & pe_action_optional) && (then->flags & pe_action_optional) == 0) { reason = "recover"; } if (reason && is_set(first->flags, pe_action_optional) && is_set(first->flags, pe_action_runnable)) { pe_rsc_trace(first->rsc, "Handling %s: %s -> %s", reason, first->uuid, then->uuid); pe_clear_action_bit(first, pe_action_optional); } if (reason && is_not_set(first->flags, pe_action_optional) && is_not_set(first->flags, pe_action_runnable)) { pe_rsc_trace(then->rsc, "Handling %s: %s -> %s", reason, first->uuid, then->uuid); pe_clear_action_bit(then, pe_action_runnable); } } if (then_flags != then->flags) { changed |= pe_graph_updated_then; pe_rsc_trace(then->rsc, "Then: Flags for %s on %s are now 0x%.6x (was 0x%.6x) because of %s 0x%.6x", then->uuid, then->node ? then->node->details->uname : "[none]", then->flags, then_flags, first->uuid, first->flags); } if (first_flags != first->flags) { changed |= pe_graph_updated_first; pe_rsc_trace(first->rsc, "First: Flags for %s on %s are now 0x%.6x (was 0x%.6x) because of %s 0x%.6x", first->uuid, first->node ? first->node->details->uname : "[none]", first->flags, first_flags, then->uuid, then->flags); } return changed; } void native_rsc_location(resource_t * rsc, rsc_to_node_t * constraint) { GListPtr gIter = NULL; GHashTableIter iter; node_t *node = NULL; if (constraint == NULL) { pe_err("Constraint is NULL"); return; } else if (rsc == NULL) { pe_err("LHS of rsc_to_node (%s) is NULL", constraint->id); return; } pe_rsc_trace(rsc, "Applying %s (%s) to %s", constraint->id, role2text(constraint->role_filter), rsc->id); /* take "lifetime" into account */ if (constraint->role_filter > RSC_ROLE_UNKNOWN && constraint->role_filter != rsc->next_role) { pe_rsc_debug(rsc, "Constraint (%s) is not active (role : %s vs. %s)", constraint->id, role2text(constraint->role_filter), role2text(rsc->next_role)); return; } else if (is_active(constraint) == FALSE) { pe_rsc_trace(rsc, "Constraint (%s) is not active", constraint->id); return; } if (constraint->node_list_rh == NULL) { pe_rsc_trace(rsc, "RHS of constraint %s is NULL", constraint->id); return; } for (gIter = constraint->node_list_rh; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; node_t *other_node = NULL; other_node = (node_t *) pe_hash_table_lookup(rsc->allowed_nodes, node->details->id); if (other_node != NULL) { pe_rsc_trace(rsc, "%s + %s: %d + %d", node->details->uname, other_node->details->uname, node->weight, other_node->weight); other_node->weight = merge_weights(other_node->weight, node->weight); } else { node_t *new_node = node_copy(node); g_hash_table_insert(rsc->allowed_nodes, (gpointer) new_node->details->id, new_node); } } g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { pe_rsc_trace(rsc, "%s + %s : %d", rsc->id, node->details->uname, node->weight); } } void native_expand(resource_t * rsc, pe_working_set_t * data_set) { GListPtr gIter = NULL; pe_rsc_trace(rsc, "Processing actions from %s", rsc->id); for (gIter = rsc->actions; gIter != NULL; gIter = gIter->next) { action_t *action = (action_t *) gIter->data; crm_trace("processing action %d for rsc=%s", action->id, rsc->id); graph_element_from_action(action, data_set); } for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; child_rsc->cmds->expand(child_rsc, data_set); } } void #define log_change(fmt, args...) do { \ if(terminal) { \ printf(" * "fmt"\n", ##args); \ } else { \ crm_notice(fmt, ##args); \ } \ } while(0) LogActions(resource_t * rsc, pe_working_set_t * data_set, gboolean terminal) { node_t *next = NULL; node_t *current = NULL; action_t *stop = NULL; action_t *start = NULL; action_t *demote = NULL; action_t *promote = NULL; char *key = NULL; gboolean moving = FALSE; GListPtr possible_matches = NULL; if (rsc->children) { GListPtr gIter = NULL; for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; LogActions(child_rsc, data_set, terminal); } return; } next = rsc->allocated_to; if (rsc->running_on) { if (g_list_length(rsc->running_on) > 1 && rsc->partial_migration_source) { current = rsc->partial_migration_source; } else { current = rsc->running_on->data; } if (rsc->role == RSC_ROLE_STOPPED) { /* * This can occur when resources are being recovered * We fiddle with the current role in native_create_actions() */ rsc->role = RSC_ROLE_STARTED; } } if (current == NULL && is_set(rsc->flags, pe_rsc_orphan)) { /* Don't log stopped orphans */ return; } if (is_not_set(rsc->flags, pe_rsc_managed) || (current == NULL && next == NULL)) { pe_rsc_info(rsc, "Leave %s\t(%s%s)", rsc->id, role2text(rsc->role), is_not_set(rsc->flags, pe_rsc_managed) ? " unmanaged" : ""); return; } if (current != NULL && next != NULL && safe_str_neq(current->details->id, next->details->id)) { moving = TRUE; } key = start_key(rsc); possible_matches = find_actions(rsc->actions, key, next); free(key); if (possible_matches) { start = possible_matches->data; g_list_free(possible_matches); } key = stop_key(rsc); possible_matches = find_actions(rsc->actions, key, next); free(key); if (possible_matches) { stop = possible_matches->data; g_list_free(possible_matches); } key = promote_key(rsc); possible_matches = find_actions(rsc->actions, key, next); free(key); if (possible_matches) { promote = possible_matches->data; g_list_free(possible_matches); } key = demote_key(rsc); possible_matches = find_actions(rsc->actions, key, next); free(key); if (possible_matches) { demote = possible_matches->data; g_list_free(possible_matches); } if (rsc->role == rsc->next_role) { key = generate_op_key(rsc->id, RSC_MIGRATED, 0); possible_matches = find_actions(rsc->actions, key, next); free(key); CRM_CHECK(next != NULL,); if (next == NULL) { } else if (possible_matches && current) { log_change("Migrate %s\t(%s %s -> %s)", rsc->id, role2text(rsc->role), current->details->uname, next->details->uname); g_list_free(possible_matches); } else if (is_set(rsc->flags, pe_rsc_reload)) { log_change("Reload %s\t(%s %s)", rsc->id, role2text(rsc->role), next->details->uname); } else if (start == NULL || is_set(start->flags, pe_action_optional)) { pe_rsc_info(rsc, "Leave %s\t(%s %s)", rsc->id, role2text(rsc->role), next->details->uname); } else if (start && is_set(start->flags, pe_action_runnable) == FALSE) { log_change("Stop %s\t(%s %s)", rsc->id, role2text(rsc->role), next->details->uname); } else if (moving && current) { log_change("%s %s\t(%s %s -> %s)", is_set(rsc->flags, pe_rsc_failed) ? "Recover" : "Move ", rsc->id, role2text(rsc->role), current->details->uname, next->details->uname); } else if (is_set(rsc->flags, pe_rsc_failed)) { log_change("Recover %s\t(%s %s)", rsc->id, role2text(rsc->role), next->details->uname); } else { log_change("Restart %s\t(%s %s)", rsc->id, role2text(rsc->role), next->details->uname); } return; } if (rsc->role > RSC_ROLE_SLAVE && rsc->role > rsc->next_role) { CRM_CHECK(current != NULL,); if (current != NULL) { gboolean allowed = FALSE; if (demote != NULL && (demote->flags & pe_action_runnable)) { allowed = TRUE; } log_change("Demote %s\t(%s -> %s %s%s)", rsc->id, role2text(rsc->role), role2text(rsc->next_role), current->details->uname, allowed ? "" : " - blocked"); if (stop != NULL && is_not_set(stop->flags, pe_action_optional) && rsc->next_role > RSC_ROLE_STOPPED) { if (is_set(rsc->flags, pe_rsc_failed)) { log_change("Recover %s\t(%s %s)", rsc->id, role2text(rsc->role), next->details->uname); } else if (is_set(rsc->flags, pe_rsc_reload)) { log_change("Reload %s\t(%s %s)", rsc->id, role2text(rsc->role), next->details->uname); } else { log_change("Restart %s\t(%s %s)", rsc->id, role2text(rsc->next_role), next->details->uname); } } } } else if (rsc->next_role == RSC_ROLE_STOPPED) { GListPtr gIter = NULL; CRM_CHECK(current != NULL,); key = stop_key(rsc); for (gIter = rsc->running_on; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; action_t *stop_op = NULL; gboolean allowed = FALSE; possible_matches = find_actions(rsc->actions, key, node); if (possible_matches) { stop_op = possible_matches->data; g_list_free(possible_matches); } if (stop_op && (stop_op->flags & pe_action_runnable)) { allowed = TRUE; } log_change("Stop %s\t(%s%s)", rsc->id, node->details->uname, allowed ? "" : " - blocked"); } free(key); } if (moving) { log_change("Move %s\t(%s %s -> %s)", rsc->id, role2text(rsc->next_role), current->details->uname, next->details->uname); } if (rsc->role == RSC_ROLE_STOPPED) { gboolean allowed = FALSE; if (start && (start->flags & pe_action_runnable)) { allowed = TRUE; } CRM_CHECK(next != NULL,); if (next != NULL) { log_change("Start %s\t(%s%s)", rsc->id, next->details->uname, allowed ? "" : " - blocked"); } if (allowed == FALSE) { return; } } if (rsc->next_role > RSC_ROLE_SLAVE && rsc->role < rsc->next_role) { gboolean allowed = FALSE; CRM_CHECK(next != NULL,); if (stop != NULL && is_not_set(stop->flags, pe_action_optional) && rsc->role > RSC_ROLE_STOPPED) { if (is_set(rsc->flags, pe_rsc_failed)) { log_change("Recover %s\t(%s %s)", rsc->id, role2text(rsc->role), next->details->uname); } else if (is_set(rsc->flags, pe_rsc_reload)) { log_change("Reload %s\t(%s %s)", rsc->id, role2text(rsc->role), next->details->uname); } else { log_change("Restart %s\t(%s %s)", rsc->id, role2text(rsc->role), next->details->uname); } } if (promote && (promote->flags & pe_action_runnable)) { allowed = TRUE; } log_change("Promote %s\t(%s -> %s %s%s)", rsc->id, role2text(rsc->role), role2text(rsc->next_role), next->details->uname, allowed ? "" : " - blocked"); } } gboolean StopRsc(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set) { GListPtr gIter = NULL; pe_rsc_trace(rsc, "%s", rsc->id); for (gIter = rsc->running_on; gIter != NULL; gIter = gIter->next) { node_t *current = (node_t *) gIter->data; action_t *stop; if (rsc->partial_migration_target) { if (rsc->partial_migration_target->details == current->details) { pe_rsc_trace(rsc, "Filtered %s -> %s %s", current->details->uname, next->details->uname, rsc->id); continue; } else { pe_rsc_trace(rsc, "Forced on %s %s", current->details->uname, rsc->id); optional = FALSE; } } pe_rsc_trace(rsc, "%s on %s", rsc->id, current->details->uname); stop = stop_action(rsc, current, optional); if (is_not_set(rsc->flags, pe_rsc_managed)) { update_action_flags(stop, pe_action_runnable | pe_action_clear); } if (is_set(data_set->flags, pe_flag_remove_after_stop)) { DeleteRsc(rsc, current, optional, data_set); } } return TRUE; } gboolean StartRsc(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set) { action_t *start = NULL; pe_rsc_trace(rsc, "%s on %s %d", rsc->id, next ? next->details->uname : "N/A", optional); start = start_action(rsc, next, TRUE); if (is_set(start->flags, pe_action_runnable) && optional == FALSE) { update_action_flags(start, pe_action_optional | pe_action_clear); } return TRUE; } gboolean PromoteRsc(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set) { char *key = NULL; GListPtr gIter = NULL; gboolean runnable = TRUE; GListPtr action_list = NULL; pe_rsc_trace(rsc, "%s on %s", rsc->id, next ? next->details->uname : "N/A"); CRM_CHECK(next != NULL, return FALSE); key = start_key(rsc); action_list = find_actions_exact(rsc->actions, key, next); free(key); for (gIter = action_list; gIter != NULL; gIter = gIter->next) { action_t *start = (action_t *) gIter->data; if (is_set(start->flags, pe_action_runnable) == FALSE) { runnable = FALSE; } } g_list_free(action_list); if (runnable) { promote_action(rsc, next, optional); return TRUE; } pe_rsc_debug(rsc, "%s\tPromote %s (canceled)", next->details->uname, rsc->id); key = promote_key(rsc); action_list = find_actions_exact(rsc->actions, key, next); free(key); for (gIter = action_list; gIter != NULL; gIter = gIter->next) { action_t *promote = (action_t *) gIter->data; update_action_flags(promote, pe_action_runnable | pe_action_clear); } g_list_free(action_list); return TRUE; } gboolean DemoteRsc(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set) { GListPtr gIter = NULL; pe_rsc_trace(rsc, "%s", rsc->id); /* CRM_CHECK(rsc->next_role == RSC_ROLE_SLAVE, return FALSE); */ for (gIter = rsc->running_on; gIter != NULL; gIter = gIter->next) { node_t *current = (node_t *) gIter->data; pe_rsc_trace(rsc, "%s on %s", rsc->id, next ? next->details->uname : "N/A"); demote_action(rsc, current, optional); } return TRUE; } gboolean RoleError(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set) { crm_err("%s on %s", rsc->id, next ? next->details->uname : "N/A"); CRM_CHECK(FALSE, return FALSE); return FALSE; } gboolean NullOp(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set) { pe_rsc_trace(rsc, "%s", rsc->id); return FALSE; } gboolean DeleteRsc(resource_t * rsc, node_t * node, gboolean optional, pe_working_set_t * data_set) { if (is_set(rsc->flags, pe_rsc_failed)) { pe_rsc_trace(rsc, "Resource %s not deleted from %s: failed", rsc->id, node->details->uname); return FALSE; } else if (node == NULL) { pe_rsc_trace(rsc, "Resource %s not deleted: NULL node", rsc->id); return FALSE; } else if (node->details->unclean || node->details->online == FALSE) { pe_rsc_trace(rsc, "Resource %s not deleted from %s: unrunnable", rsc->id, node->details->uname); return FALSE; } crm_notice("Removing %s from %s", rsc->id, node->details->uname); delete_action(rsc, node, optional); new_rsc_order(rsc, RSC_STOP, rsc, RSC_DELETE, optional ? pe_order_implies_then : pe_order_optional, data_set); new_rsc_order(rsc, RSC_DELETE, rsc, RSC_START, optional ? pe_order_implies_then : pe_order_optional, data_set); return TRUE; } #include <../lib/pengine/unpack.h> #define set_char(x) last_rsc_id[lpc] = x; complete = TRUE; static char * increment_clone(char *last_rsc_id) { int lpc = 0; int len = 0; char *tmp = NULL; gboolean complete = FALSE; CRM_CHECK(last_rsc_id != NULL, return NULL); if (last_rsc_id != NULL) { len = strlen(last_rsc_id); } lpc = len - 1; while (complete == FALSE && lpc > 0) { switch (last_rsc_id[lpc]) { case 0: lpc--; break; case '0': set_char('1'); break; case '1': set_char('2'); break; case '2': set_char('3'); break; case '3': set_char('4'); break; case '4': set_char('5'); break; case '5': set_char('6'); break; case '6': set_char('7'); break; case '7': set_char('8'); break; case '8': set_char('9'); break; case '9': last_rsc_id[lpc] = '0'; lpc--; break; case ':': tmp = last_rsc_id; last_rsc_id = calloc(1, len + 2); memcpy(last_rsc_id, tmp, len); last_rsc_id[++lpc] = '1'; last_rsc_id[len] = '0'; last_rsc_id[len + 1] = 0; complete = TRUE; free(tmp); break; default: crm_err("Unexpected char: %c (%d)", last_rsc_id[lpc], lpc); return NULL; break; } } return last_rsc_id; } static node_t * probe_grouped_clone(resource_t * rsc, node_t * node, pe_working_set_t * data_set) { node_t *running = NULL; resource_t *top = uber_parent(rsc); if (running == NULL && is_set(top->flags, pe_rsc_unique) == FALSE) { /* Annoyingly we also need to check any other clone instances * Clumsy, but it will work. * * An alternative would be to update known_on for every peer * during process_rsc_state() * * This code desperately needs optimization * ptest -x with 100 nodes, 100 clones and clone-max=10: * No probes O(25s) * Detection without clone loop O(3m) * Detection with clone loop O(8m) ptest[32211]: 2010/02/18_14:27:55 CRIT: stage5: Probing for unknown resources ptest[32211]: 2010/02/18_14:33:39 CRIT: stage5: Done ptest[32211]: 2010/02/18_14:35:05 CRIT: stage7: Updating action states ptest[32211]: 2010/02/18_14:35:05 CRIT: stage7: Done */ char *clone_id = clone_zero(rsc->id); resource_t *peer = pe_find_resource(top->children, clone_id); while (peer && running == NULL) { running = pe_hash_table_lookup(peer->known_on, node->details->id); if (running != NULL) { /* we already know the status of the resource on this node */ pe_rsc_trace(rsc, "Skipping active clone: %s", rsc->id); free(clone_id); return running; } clone_id = increment_clone(clone_id); peer = pe_find_resource(data_set->resources, clone_id); } free(clone_id); } return running; } gboolean native_create_probe(resource_t * rsc, node_t * node, action_t * complete, gboolean force, pe_working_set_t * data_set) { char *key = NULL; action_t *probe = NULL; node_t *running = NULL; resource_t *top = uber_parent(rsc); static const char *rc_master = NULL; static const char *rc_inactive = NULL; if (rc_inactive == NULL) { rc_inactive = crm_itoa(PCMK_OCF_NOT_RUNNING); rc_master = crm_itoa(PCMK_OCF_RUNNING_MASTER); } CRM_CHECK(node != NULL, return FALSE); if (force == FALSE && is_not_set(data_set->flags, pe_flag_startup_probes)) { pe_rsc_trace(rsc, "Skipping active resource detection for %s", rsc->id); return FALSE; - } else if (force == FALSE && is_container_remote_node(node) && is_not_set(data_set->flags, pe_flag_container_probes)) { + } else if (force == FALSE && is_container_remote_node(node)) { pe_rsc_trace(rsc, "Skipping active resource detection for %s on container %s", rsc->id, node->details->id); return FALSE; } if (is_remote_node(node)) { const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); if (safe_str_eq(class, "stonith")) { pe_rsc_trace(rsc, "Skipping probe for %s on node %s, remote-nodes do not run stonith agents.", rsc->id, node->details->id); return FALSE; } else if (rsc_contains_remote_node(data_set, rsc)) { pe_rsc_trace(rsc, "Skipping probe for %s on node %s, remote-nodes can not run resources that contain connection resources.", rsc->id, node->details->id); return FALSE; } else if (rsc->is_remote_node) { pe_rsc_trace(rsc, "Skipping probe for %s on node %s, remote-nodes can not run connection resources", rsc->id, node->details->id); return FALSE; } } if (rsc->children) { GListPtr gIter = NULL; gboolean any_created = FALSE; for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; any_created = child_rsc->cmds->create_probe(child_rsc, node, complete, force, data_set) || any_created; } return any_created; } else if (rsc->container) { pe_rsc_trace(rsc, "Skipping %s: it is within container %s", rsc->id, rsc->container->id); return FALSE; } if (is_set(rsc->flags, pe_rsc_orphan)) { pe_rsc_trace(rsc, "Skipping orphan: %s", rsc->id); return FALSE; } running = g_hash_table_lookup(rsc->known_on, node->details->id); if (running == NULL && is_set(rsc->flags, pe_rsc_unique) == FALSE) { /* Anonymous clones */ if (rsc->parent == top) { running = g_hash_table_lookup(rsc->parent->known_on, node->details->id); } else { /* Grouped anonymous clones need extra special handling */ running = probe_grouped_clone(rsc, node, data_set); } } if (force == FALSE && running != NULL) { /* we already know the status of the resource on this node */ pe_rsc_trace(rsc, "Skipping active: %s on %s", rsc->id, node->details->uname); return FALSE; } key = generate_op_key(rsc->id, RSC_STATUS, 0); probe = custom_action(rsc, key, RSC_STATUS, node, FALSE, TRUE, data_set); update_action_flags(probe, pe_action_optional | pe_action_clear); /* Check if the node needs to be unfenced first */ if (is_set(rsc->flags, pe_rsc_needs_unfencing)) { action_t *unfence = pe_fence_op(node, "on", data_set); crm_notice("Unfencing %s for %s", node->details->uname, rsc->id); order_actions(unfence, probe, pe_order_implies_then); /* The lack of ordering constraints on STONITH_UP would * traditionally mean unfencing is initiated /before/ the * devices are started. * * However this is a non-issue as stonithd is now smart * enough to be able to use devices directly from the cib */ } /* * We need to know if it's running_on (not just known_on) this node * to correctly determine the target rc. */ running = pe_find_node_id(rsc->running_on, node->details->id); if (running == NULL) { add_hash_param(probe->meta, XML_ATTR_TE_TARGET_RC, rc_inactive); } else if (rsc->role == RSC_ROLE_MASTER) { add_hash_param(probe->meta, XML_ATTR_TE_TARGET_RC, rc_master); } pe_rsc_debug(rsc, "Probing %s on %s (%s)", rsc->id, node->details->uname, role2text(rsc->role)); order_actions(probe, complete, pe_order_implies_then); return TRUE; } static void native_start_constraints(resource_t * rsc, action_t * stonith_op, gboolean is_stonith, pe_working_set_t * data_set) { node_t *target = stonith_op ? stonith_op->node : NULL; GListPtr gIter = NULL; action_t *all_stopped = get_pseudo_op(ALL_STOPPED, data_set); action_t *stonith_done = get_pseudo_op(STONITH_DONE, data_set); for (gIter = rsc->actions; gIter != NULL; gIter = gIter->next) { action_t *action = (action_t *) gIter->data; if (action->needs == rsc_req_stonith) { order_actions(stonith_done, action, pe_order_optional); } else if (target != NULL && safe_str_eq(action->task, RSC_START) && NULL == pe_hash_table_lookup(rsc->known_on, target->details->id)) { /* if known == NULL, then we dont know if * the resource is active on the node * we're about to shoot * * in this case, regardless of action->needs, * the only safe option is to wait until * the node is shot before doing anything * to with the resource * * its analogous to waiting for all the probes * for rscX to complete before starting rscX * * the most likely explaination is that the * DC died and took its status with it */ pe_rsc_debug(rsc, "Ordering %s after %s recovery", action->uuid, target->details->uname); order_actions(all_stopped, action, pe_order_optional | pe_order_runnable_left); } } } static void native_stop_constraints(resource_t * rsc, action_t * stonith_op, gboolean is_stonith, pe_working_set_t * data_set) { char *key = NULL; GListPtr gIter = NULL; GListPtr action_list = NULL; resource_t *top = uber_parent(rsc); key = stop_key(rsc); action_list = find_actions(rsc->actions, key, stonith_op->node); free(key); /* add the stonith OP as a stop pre-req and the mark the stop * as a pseudo op - since its now redundant */ for (gIter = action_list; gIter != NULL; gIter = gIter->next) { action_t *action = (action_t *) gIter->data; if (action->node->details->online && action->node->details->unclean == FALSE && is_set(rsc->flags, pe_rsc_failed)) { continue; } if (is_set(rsc->flags, pe_rsc_failed)) { crm_notice("Stop of failed resource %s is" " implicit after %s is fenced", rsc->id, action->node->details->uname); } else { crm_info("%s is implicit after %s is fenced", action->uuid, action->node->details->uname); } /* the stop would never complete and is * now implied by the stonith operation */ update_action_flags(action, pe_action_pseudo); update_action_flags(action, pe_action_runnable); update_action_flags(action, pe_action_implied_by_stonith); { action_t *parent_stop = find_first_action(top->actions, NULL, RSC_STOP, NULL); order_actions(stonith_op, action, pe_order_optional); order_actions(stonith_op, parent_stop, pe_order_optional); } if (is_set(rsc->flags, pe_rsc_notify)) { /* Create a second notification that will be delivered * immediately after the node is fenced * * Basic problem: * - C is a clone active on the node to be shot and stopping on another * - R is a resource that depends on C * * + C.stop depends on R.stop * + C.stopped depends on STONITH * + C.notify depends on C.stopped * + C.healthy depends on C.notify * + R.stop depends on C.healthy * * The extra notification here changes * + C.healthy depends on C.notify * into: * + C.healthy depends on C.notify' * + C.notify' depends on STONITH' * thus breaking the loop */ notify_data_t *n_data = create_notification_boundaries(rsc, RSC_STOP, NULL, stonith_op, data_set); crm_info("Creating secondary notification for %s", action->uuid); collect_notification_data(rsc, TRUE, FALSE, n_data); g_hash_table_insert(n_data->keys, strdup("notify_stop_resource"), strdup(rsc->id)); g_hash_table_insert(n_data->keys, strdup("notify_stop_uname"), strdup(action->node->details->uname)); create_notifications(uber_parent(rsc), n_data, data_set); free_notification_data(n_data); } /* From Bug #1601, successful fencing must be an input to a failed resources stop action. However given group(rA, rB) running on nodeX and B.stop has failed, A := stop healthy resource (rA.stop) B := stop failed resource (pseudo operation B.stop) C := stonith nodeX A requires B, B requires C, C requires A This loop would prevent the cluster from making progress. This block creates the "C requires A" dependency and therefore must (at least for now) be disabled. Instead, run the block above and treat all resources on nodeX as B would be (marked as a pseudo op depending on the STONITH). TODO: Break the "A requires B" dependency in update_action() and re-enable this block } else if(is_stonith == FALSE) { crm_info("Moving healthy resource %s" " off %s before fencing", rsc->id, node->details->uname); * stop healthy resources before the * stonith op * custom_action_order( rsc, stop_key(rsc), NULL, NULL,strdup(CRM_OP_FENCE),stonith_op, pe_order_optional, data_set); */ } g_list_free(action_list); key = demote_key(rsc); action_list = find_actions(rsc->actions, key, stonith_op->node); free(key); for (gIter = action_list; gIter != NULL; gIter = gIter->next) { action_t *action = (action_t *) gIter->data; if (action->node->details->online == FALSE || action->node->details->unclean == TRUE || is_set(rsc->flags, pe_rsc_failed)) { if (is_set(rsc->flags, pe_rsc_failed)) { pe_rsc_info(rsc, "Demote of failed resource %s is" " implict after %s is fenced", rsc->id, action->node->details->uname); } else { pe_rsc_info(rsc, "%s is implicit after %s is fenced", action->uuid, action->node->details->uname); } /* the stop would never complete and is * now implied by the stonith operation */ crm_trace("here - 1"); update_action_flags(action, pe_action_pseudo); update_action_flags(action, pe_action_runnable); if (is_stonith == FALSE) { order_actions(stonith_op, action, pe_order_optional); } } } g_list_free(action_list); } void rsc_stonith_ordering(resource_t * rsc, action_t * stonith_op, pe_working_set_t * data_set) { gboolean is_stonith = FALSE; if (rsc->children) { GListPtr gIter = NULL; for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; rsc_stonith_ordering(child_rsc, stonith_op, data_set); } return; } if (is_not_set(rsc->flags, pe_rsc_managed)) { pe_rsc_trace(rsc, "Skipping fencing constraints for unmanaged resource: %s", rsc->id); return; } /* Start constraints */ native_start_constraints(rsc, stonith_op, is_stonith, data_set); /* Stop constraints */ if (stonith_op) { native_stop_constraints(rsc, stonith_op, is_stonith, data_set); } } enum stack_activity { stack_stable = 0, stack_starting = 1, stack_stopping = 2, stack_middle = 4, }; static enum stack_activity find_clone_activity_on(resource_t * rsc, resource_t * target, node_t * node, const char *type) { int mode = stack_stable; action_t *active = NULL; if (target->children) { GListPtr gIter = NULL; for (gIter = target->children; gIter != NULL; gIter = gIter->next) { resource_t *child = (resource_t *) gIter->data; mode |= find_clone_activity_on(rsc, child, node, type); } return mode; } active = find_first_action(target->actions, NULL, RSC_START, NULL); if (active && is_set(active->flags, pe_action_optional) == FALSE && is_set(active->flags, pe_action_pseudo) == FALSE) { pe_rsc_debug(rsc, "%s: found scheduled %s action (%s)", rsc->id, active->uuid, type); mode |= stack_starting; } active = find_first_action(target->actions, NULL, RSC_STOP, node); if (active && is_set(active->flags, pe_action_optional) == FALSE && is_set(active->flags, pe_action_pseudo) == FALSE) { pe_rsc_debug(rsc, "%s: found scheduled %s action (%s)", rsc->id, active->uuid, type); mode |= stack_stopping; } return mode; } static enum stack_activity check_stack_element(resource_t * rsc, resource_t * other_rsc, const char *type) { resource_t *other_p = uber_parent(other_rsc); if (other_rsc == NULL || other_rsc == rsc) { return stack_stable; } else if (other_p->variant == pe_native) { crm_notice("Cannot migrate %s due to dependency on %s (%s)", rsc->id, other_rsc->id, type); return stack_middle; } else if (other_rsc == rsc->parent) { int mode = 0; GListPtr gIter = NULL; for (gIter = other_rsc->rsc_cons; gIter != NULL; gIter = gIter->next) { rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data; if (constraint->score > 0) { mode |= check_stack_element(rsc, constraint->rsc_rh, type); } } return mode; } else if (other_p->variant == pe_group) { crm_notice("Cannot migrate %s due to dependency on group %s (%s)", rsc->id, other_rsc->id, type); return stack_middle; } /* else: >= clone */ /* ## Assumption A depends on clone(B) ## Resource Activity During Move N1 N2 N3 --- --- --- t0 A.stop t1 B.stop B.stop t2 B.start B.start t3 A.start ## Resource Activity During Migration N1 N2 N3 --- --- --- t0 B.start B.start t1 A.stop (1) t2 A.start (2) t3 B.stop B.stop Node 1: Rewritten to be a migrate-to operation Node 2: Rewritten to be a migrate-from operation # Constraints The following constraints already exist in the system. The 'ok' and 'fail' column refers to whether they still hold for migration. a) A.stop -> A.start - ok b) B.stop -> B.start - fail c) A.stop -> B.stop - ok d) B.start -> A.start - ok e) B.stop -> A.start - fail f) A.stop -> B.start - fail ## Scenarios B unchanged - ok B stopping only - fail - possible after fixing 'e' B starting only - fail - possible after fixing 'f' B stoping and starting - fail - constraint 'b' is unfixable B restarting only on N2 - fail - as-per previous only rarer */ /* Only allow migration when the clone is either stable, only starting or only stopping */ return find_clone_activity_on(rsc, other_rsc, NULL, type); } static gboolean at_stack_bottom(resource_t * rsc) { char *key = NULL; action_t *start = NULL; action_t *other = NULL; int mode = stack_stable; GListPtr action_list = NULL; GListPtr gIter = NULL; GHashTable *coloc_list = NULL; key = start_key(rsc); action_list = find_actions(rsc->actions, key, NULL); free(key); pe_rsc_trace(rsc, "%s: processing", rsc->id); CRM_CHECK(action_list != NULL, return FALSE); start = action_list->data; g_list_free(action_list); coloc_list = g_hash_table_new(crm_str_hash, g_str_equal); for (gIter = rsc->rsc_cons; gIter != NULL; gIter = gIter->next) { rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data; resource_t *target = constraint->rsc_rh; pe_rsc_trace(rsc, "Checking %s: %s == %s (%d)", constraint->id, rsc->id, target->id, constraint->score); if (constraint->score > 0) { mode |= check_stack_element(rsc, target, "coloc"); if (mode & stack_middle) { goto bail; } else if ((mode & stack_stopping) && (mode & stack_starting)) { crm_notice("Cannot migrate %s due to colocation activity (last was %s)", rsc->id, target->id); goto bail; } g_hash_table_insert(coloc_list, target->id, target); } } for (gIter = start->actions_before; gIter != NULL; gIter = gIter->next) { action_wrapper_t *other_w = (action_wrapper_t *) gIter->data; other = other_w->action; if (other_w->type & pe_order_serialize_only) { pe_rsc_trace(rsc, "%s: depends on %s (serialize ordering)", rsc->id, other->uuid); continue; } pe_rsc_trace(rsc, "%s: Checking %s ordering", rsc->id, other->uuid); if(other->rsc == NULL) { /* No colocation involved */ crm_trace("%s: No colocation for %s", rsc->id, other->uuid); continue; } else if (is_set(other->flags, pe_action_optional) == FALSE) { mode |= check_stack_element(rsc, other->rsc, "order"); if ((mode & stack_middle) && g_hash_table_lookup(coloc_list, other->rsc->id)) { crm_trace("%s: Stack middle: %s", rsc->id, other->rsc->id); goto bail; } else if ((mode & stack_stopping) && (mode & stack_starting)) { crm_notice("Cannot migrate %s due to ordering activity (last was %s)", rsc->id, other->rsc->id); goto bail; } } } g_hash_table_destroy(coloc_list); return TRUE; bail: g_hash_table_destroy(coloc_list); return FALSE; } static action_t * get_first_named_action(resource_t * rsc, const char *action, gboolean only_valid, node_t * current) { action_t *a = NULL; GListPtr action_list = NULL; char *key = generate_op_key(rsc->id, action, 0); action_list = find_actions(rsc->actions, key, current); if (action_list == NULL || action_list->data == NULL) { crm_trace("%s: no %s action", rsc->id, action); free(key); return NULL; } a = action_list->data; g_list_free(action_list); if (only_valid && is_set(a->flags, pe_action_pseudo)) { crm_trace("%s: pseudo", key); a = NULL; } else if (only_valid && is_not_set(a->flags, pe_action_runnable)) { crm_trace("%s: runnable", key); a = NULL; } free(key); return a; } static void MigrateRsc(resource_t * rsc, action_t * stop, action_t * start, pe_working_set_t * data_set, gboolean partial) { action_t *to = NULL; action_t *from = NULL; action_t *then = NULL; action_t *other = NULL; action_t *done = get_pseudo_op(STONITH_DONE, data_set); GListPtr gIter = NULL; const char *value = g_hash_table_lookup(rsc->meta, XML_OP_ATTR_ALLOW_MIGRATE); if(is_set(rsc->flags, pe_rsc_migrating)) { /* Already taken care of or in progress */ return; } if (crm_is_true(value) == FALSE) { return; } if (rsc->next_role > RSC_ROLE_SLAVE) { pe_rsc_trace(rsc, "%s: resource role: role=%s", rsc->id, role2text(rsc->next_role)); return; } if (start == NULL || stop == NULL) { pe_rsc_trace(rsc, "%s: not exists %p -> %p", rsc->id, stop, start); return; } else if (start->node == NULL || stop->node == NULL) { pe_rsc_trace(rsc, "%s: no node %p -> %p", rsc->id, stop->node, start->node); return; } else if (is_set(stop->flags, pe_action_optional)) { pe_rsc_trace(rsc, "%s: stop action", rsc->id); return; } else if (is_set(start->flags, pe_action_optional)) { pe_rsc_trace(rsc, "%s: start action", rsc->id); return; } else if (stop->node->details == start->node->details) { pe_rsc_trace(rsc, "%s: not moving %p -> %p", rsc->id, stop->node, start->node); return; } else if (at_stack_bottom(rsc) == FALSE) { pe_rsc_trace(rsc, "%s: not at stack bottom", rsc->id); return; } pe_rsc_trace(rsc, "%s %s -> %s", rsc->id, stop->node->details->uname, start->node->details->uname); if (partial) { pe_rsc_info(rsc, "Completing partial migration of %s from %s to %s", rsc->id, stop->node ? stop->node->details->uname : "unknown", start->node ? start->node->details->uname : "unknown"); } else { pe_rsc_info(rsc, "Migrating %s from %s to %s", rsc->id, stop->node ? stop->node->details->uname : "unknown", start->node ? start->node->details->uname : "unknown"); } /* Preserve the stop to ensure the end state is sane on that node, * Make the start a pseudo op * Create migrate_to, have it depend on everything the stop did * Create migrate_from * *-> migrate_to -> migrate_from -> stop -> start */ set_bit(rsc->flags, pe_rsc_migrating); update_action_flags(start, pe_action_pseudo); /* easier than trying to delete it from the graph * but perhaps we should have it run anyway */ if (!partial) { to = custom_action(rsc, generate_op_key(rsc->id, RSC_MIGRATE, 0), RSC_MIGRATE, stop->node, FALSE, TRUE, data_set); for (gIter = rsc->dangling_migrations; gIter != NULL; gIter = gIter->next) { node_t *current = (node_t *) gIter->data; action_t *stop = stop_action(rsc, current, FALSE); order_actions(stop, to, pe_order_optional); pe_rsc_trace(rsc, "Ordering migration after cleanup of %s on %s", rsc->id, current->details->uname); } } from = custom_action(rsc, generate_op_key(rsc->id, RSC_MIGRATED, 0), RSC_MIGRATED, start->node, FALSE, TRUE, data_set); /* This is slightly sub-optimal if 'to' fails, but always * run both halves of the migration before terminating the * transition. * * This can be removed if/when we update unpack_rsc_op() to * 'correctly' handle partial migrations. * * Without this, we end up stopping both sides */ from->priority = INFINITY; if (!partial) { order_actions(to, from, pe_order_optional); add_hash_param(to->meta, XML_LRM_ATTR_MIGRATE_SOURCE, stop->node->details->uname); add_hash_param(to->meta, XML_LRM_ATTR_MIGRATE_TARGET, start->node->details->uname); } then = to ? to : from; order_actions(from, stop, pe_order_optional); order_actions(done, then, pe_order_optional); add_hash_param(from->meta, XML_LRM_ATTR_MIGRATE_SOURCE, stop->node->details->uname); add_hash_param(from->meta, XML_LRM_ATTR_MIGRATE_TARGET, start->node->details->uname); /* Create the correct ordering ajustments based on find_clone_activity_on(); */ for (gIter = rsc->rsc_cons; gIter != NULL; gIter = gIter->next) { rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data; resource_t *target = constraint->rsc_rh; pe_rsc_info(rsc, "Repairing %s: %s == %s (%d)", constraint->id, rsc->id, target->id, constraint->score); if (constraint->score > 0) { int mode = check_stack_element(rsc, target, "coloc"); action_t *clone_stop = find_first_action(target->actions, NULL, RSC_STOP, NULL); action_t *clone_start = find_first_action(target->actions, NULL, RSC_STARTED, NULL); CRM_ASSERT(clone_stop != NULL); CRM_ASSERT(clone_start != NULL); CRM_ASSERT((mode & stack_middle) == 0); CRM_ASSERT(((mode & stack_stopping) && (mode & stack_starting)) == 0); if (mode & stack_stopping) { #if 0 crm_debug("Creating %s.start -> %s.stop ordering", rsc->id, target->id); order_actions(from, clone_stop, pe_order_optional); #endif GListPtr lpc2 = NULL; for (lpc2 = start->actions_before; lpc2 != NULL; lpc2 = lpc2->next) { action_wrapper_t *other_w = (action_wrapper_t *) lpc2->data; /* Needed if the clone's started pseudo-action ever gets printed in the graph */ if (other_w->action == clone_start) { crm_debug("Breaking %s -> %s ordering", other_w->action->uuid, start->uuid); other_w->type = pe_order_none; } } } else if (mode & stack_starting) { #if 0 crm_debug("Creating %s.started -> %s.stop ordering", target->id, rsc->id); order_actions(clone_start, to, pe_order_optional); #endif GListPtr lpc2 = NULL; for (lpc2 = clone_stop->actions_before; lpc2 != NULL; lpc2 = lpc2->next) { action_wrapper_t *other_w = (action_wrapper_t *) lpc2->data; /* Needed if the clone's stop pseudo-action ever gets printed in the graph */ if (other_w->action == stop) { crm_debug("Breaking %s -> %s ordering", other_w->action->uuid, clone_stop->uuid); other_w->type = pe_order_none; } } } } } /* Anything that needed stop to complete, now also needs start to have completed * However, this is implied now that start/stop are not morphed into migrate ops */ /* The migrate_to action (aka 'then') also needs to depend on anything that the stop needed to have completed */ for (gIter = stop->actions_before; gIter != NULL; gIter = gIter->next) { action_wrapper_t *other_w = (action_wrapper_t *) gIter->data; other = other_w->action; if (other->rsc == NULL) { /* nothing */ } else if (is_set(other->flags, pe_action_optional) || other->rsc == rsc || other->rsc == rsc->parent) { continue; } else { /* First check if they need to migrate too */ crm_debug("Checking %s first", other->rsc->id); rsc_migrate_reload(other->rsc, data_set); } if(other->rsc && is_set(other->rsc->flags, pe_rsc_migrating)) { crm_debug("Breaking %s before %s (migrating)", other_w->action->uuid, stop->uuid); other_w->type = pe_order_none; continue; } crm_debug("Ordering %s before %s (stop)", other_w->action->uuid, then->uuid); order_actions(other, then, other_w->type); } /* The migrate_to action (aka 'then') also needs to depend on anything that the start needed to have completed */ for (gIter = start->actions_before; gIter != NULL; gIter = gIter->next) { action_wrapper_t *other_w = (action_wrapper_t *) gIter->data; other = other_w->action; if (other->rsc == NULL) { /* nothing */ } else if (is_set(other->flags, pe_action_optional) || other->rsc == rsc || other->rsc == rsc->parent) { continue; } crm_debug("Ordering %s before %s (start)", other_w->action->uuid, then->uuid); order_actions(other, then, other_w->type); } } static void ReloadRsc(resource_t * rsc, action_t * stop, action_t * start, pe_working_set_t * data_set) { action_t *action = NULL; action_t *rewrite = NULL; if (is_not_set(rsc->flags, pe_rsc_try_reload)) { return; } else if (is_not_set(stop->flags, pe_action_optional)) { pe_rsc_trace(rsc, "%s: stop action", rsc->id); return; } else if (is_not_set(start->flags, pe_action_optional)) { pe_rsc_trace(rsc, "%s: start action", rsc->id); return; } pe_rsc_trace(rsc, "%s on %s", rsc->id, stop->node->details->uname); action = get_first_named_action(rsc, RSC_PROMOTE, TRUE, NULL); if (action && is_set(action->flags, pe_action_optional) == FALSE) { update_action_flags(action, pe_action_pseudo); } action = get_first_named_action(rsc, RSC_DEMOTE, TRUE, NULL); if (action && is_set(action->flags, pe_action_optional) == FALSE) { rewrite = action; update_action_flags(stop, pe_action_pseudo); } else { rewrite = start; } pe_rsc_info(rsc, "Rewriting %s of %s on %s as a reload", rewrite->task, rsc->id, stop->node->details->uname); set_bit(rsc->flags, pe_rsc_reload); update_action_flags(rewrite, pe_action_optional | pe_action_clear); free(rewrite->uuid); free(rewrite->task); rewrite->task = strdup("reload"); rewrite->uuid = generate_op_key(rsc->id, rewrite->task, 0); } void rsc_migrate_reload(resource_t * rsc, pe_working_set_t * data_set) { GListPtr gIter = NULL; action_t *stop = NULL; action_t *start = NULL; gboolean partial = FALSE; if(is_set(rsc->flags, pe_rsc_munging)) { return; } set_bit(rsc->flags, pe_rsc_munging); if (rsc->children) { for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; rsc_migrate_reload(child_rsc, data_set); } return; } else if (rsc->variant > pe_native) { return; } pe_rsc_trace(rsc, "Processing %s", rsc->id); if (rsc->partial_migration_target) { start = get_first_named_action(rsc, RSC_START, TRUE, rsc->partial_migration_target); stop = get_first_named_action(rsc, RSC_STOP, TRUE, rsc->partial_migration_source); if (start && stop) { partial = TRUE; } } pe_rsc_trace(rsc, "%s %s %p", rsc->id, partial ? "partial" : "full", stop); if (!partial) { stop = get_first_named_action(rsc, RSC_STOP, TRUE, rsc->running_on ? rsc->running_on->data : NULL); start = get_first_named_action(rsc, RSC_START, TRUE, NULL); } if (is_not_set(rsc->flags, pe_rsc_managed) || is_set(rsc->flags, pe_rsc_failed) || is_set(rsc->flags, pe_rsc_start_pending) || rsc->next_role < RSC_ROLE_STARTED || ((g_list_length(rsc->running_on) != 1) && !partial)) { pe_rsc_trace(rsc, "%s: general resource state: flags=0x%.16llx", rsc->id, rsc->flags); return; } if (stop != NULL && is_set(stop->flags, pe_action_optional) && is_set(rsc->flags, pe_rsc_try_reload)) { ReloadRsc(rsc, stop, start, data_set); } else if (stop == NULL || is_not_set(stop->flags, pe_action_optional)) { MigrateRsc(rsc, stop, start, data_set, partial); } } void native_append_meta(resource_t * rsc, xmlNode * xml) { char *value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INCARNATION); if (value) { char *name = NULL; name = crm_meta_name(XML_RSC_ATTR_INCARNATION); crm_xml_add(xml, name, value); free(name); } } diff --git a/pengine/regression.sh b/pengine/regression.sh index 7b086bb4f5..06ab17cc96 100755 --- a/pengine/regression.sh +++ b/pengine/regression.sh @@ -1,728 +1,728 @@ #!/bin/bash # Copyright (C) 2004 Andrew Beekhof # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public # License as published by the Free Software Foundation; either # version 2 of the License, or (at your option) any later version. # # This software is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA # core=`dirname $0` . $core/regression.core.sh || exit 1 create_mode="true" info Generating test outputs for these tests... # do_test file description info Done. echo "" info Performing the following tests from $io_dir create_mode="false" echo "" do_test simple1 "Offline " do_test simple2 "Start " do_test simple3 "Start 2 " do_test simple4 "Start Failed" do_test simple6 "Stop Start " do_test simple7 "Shutdown " #do_test simple8 "Stonith " #do_test simple9 "Lower version" #do_test simple10 "Higher version" do_test simple11 "Priority (ne)" do_test simple12 "Priority (eq)" do_test simple8 "Stickiness" echo "" do_test group1 "Group " do_test group2 "Group + Native " do_test group3 "Group + Group " do_test group4 "Group + Native (nothing)" do_test group5 "Group + Native (move) " do_test group6 "Group + Group (move) " do_test group7 "Group colocation" do_test group13 "Group colocation (cant run)" do_test group8 "Group anti-colocation" do_test group9 "Group recovery" do_test group10 "Group partial recovery" do_test group11 "Group target_role" do_test group14 "Group stop (graph terminated)" do_test group15 "-ve group colocation" do_test bug-1573 "Partial stop of a group with two children" do_test bug-1718 "Mandatory group ordering - Stop group_FUN" do_test bug-lf-2613 "Move group on failure" do_test bug-lf-2619 "Move group on clone failure" do_test group-fail "Ensure stop order is preserved for partially active groups" do_test group-unmanaged "No need to restart r115 because r114 is unmanaged" do_test group-unmanaged-stopped "Make sure r115 is stopped when r114 fails" do_test group-dependants "Account for the location preferences of things colocated with a group" echo "" do_test rsc_dep1 "Must not " do_test rsc_dep3 "Must " do_test rsc_dep5 "Must not 3 " do_test rsc_dep7 "Must 3 " do_test rsc_dep10 "Must (but cant)" do_test rsc_dep2 "Must (running) " do_test rsc_dep8 "Must (running : alt) " do_test rsc_dep4 "Must (running + move)" do_test asymmetric "Asymmetric - require explicit location constraints" echo "" do_test orphan-0 "Orphan ignore" do_test orphan-1 "Orphan stop" do_test orphan-2 "Orphan stop, remove failcount" echo "" do_test params-0 "Params: No change" do_test params-1 "Params: Changed" do_test params-2 "Params: Resource definition" do_test params-4 "Params: Reload" do_test params-5 "Params: Restart based on probe digest" do_test novell-251689 "Resource definition change + target_role=stopped" do_test bug-lf-2106 "Restart all anonymous clone instances after config change" do_test params-6 "Params: Detect reload in previously migrated resource" echo "" do_test target-0 "Target Role : baseline" do_test target-1 "Target Role : master" do_test target-2 "Target Role : invalid" echo "" do_test domain "Failover domains" do_test base-score "Set a node's default score for all nodes" echo "" do_test date-1 "Dates" -t "2005-020" do_test date-2 "Date Spec - Pass" -t "2005-020T12:30" do_test date-3 "Date Spec - Fail" -t "2005-020T11:30" do_test probe-0 "Probe (anon clone)" do_test probe-1 "Pending Probe" do_test probe-2 "Correctly re-probe cloned groups" do_test probe-3 "Probe (pending node)" do_test probe-4 "Probe (pending node + stopped resource)" --rc 4 do_test standby "Standby" do_test comments "Comments" echo "" do_test one-or-more-0 "Everything starts" do_test one-or-more-1 "Nothing starts because of A" do_test one-or-more-2 "D can start because of C" do_test one-or-more-3 "D cannot start because of B and C" do_test one-or-more-4 "D cannot start because of target-role" do_test one-or-more-5 "Start A and F even though C and D are stopped" do_test one-or-more-6 "Leave A running even though B is stopped" do_test one-or-more-7 "Leave A running even though C is stopped" do_test bug-5140-require-all-false "Allow basegrp:0 to stop" echo "" do_test order1 "Order start 1 " do_test order2 "Order start 2 " do_test order3 "Order stop " do_test order4 "Order (multiple) " do_test order5 "Order (move) " do_test order6 "Order (move w/ restart) " do_test order7 "Order (manditory) " do_test order-optional "Order (score=0) " do_test order-required "Order (score=INFINITY) " do_test bug-lf-2171 "Prevent group start when clone is stopped" do_test order-clone "Clone ordering should be able to prevent startup of dependant clones" do_test order-sets "Ordering for resource sets" do_test order-serialize "Serialize resources without inhibiting migration" do_test order-serialize-set "Serialize a set of resources without inhibiting migration" do_test clone-order-primitive "Order clone start after a primitive" do_test order-optional-keyword "Order (optional keyword)" do_test order-mandatory "Order (mandatory keyword)" do_test bug-lf-2493 "Don't imply colocation requirements when applying ordering constraints with clones" do_test ordered-set-basic-startup "Constraint set with default order settings." do_test order-wrong-kind "Order (error)" echo "" do_test coloc-loop "Colocation - loop" do_test coloc-many-one "Colocation - many-to-one" do_test coloc-list "Colocation - many-to-one with list" do_test coloc-group "Colocation - groups" do_test coloc-slave-anti "Anti-colocation with slave shouldn't prevent master colocation" do_test coloc-attr "Colocation based on node attributes" do_test coloc-negative-group "Negative colocation with a group" do_test coloc-intra-set "Intra-set colocation" do_test bug-lf-2435 "Colocation sets with a negative score" do_test coloc-clone-stays-active "Ensure clones don't get stopped/demoted because a dependant must stop" do_test coloc_fp_logic "Verify floating point calculations in colocation are working" do_test colo_master_w_native "cl#5070 - Verify promotion order is affected when colocating master to native rsc." do_test colo_slave_w_native "cl#5070 - Verify promotion order is affected when colocating slave to native rsc." echo "" do_test rsc-sets-seq-true "Resource Sets - sequential=false" do_test rsc-sets-seq-false "Resource Sets - sequential=true" do_test rsc-sets-clone "Resource Sets - Clone" do_test rsc-sets-master "Resource Sets - Master" do_test rsc-sets-clone-1 "Resource Sets - Clone (lf#2404)" #echo "" #do_test agent1 "version: lt (empty)" #do_test agent2 "version: eq " #do_test agent3 "version: gt " echo "" do_test attrs1 "string: eq (and) " do_test attrs2 "string: lt / gt (and)" do_test attrs3 "string: ne (or) " do_test attrs4 "string: exists " do_test attrs5 "string: not_exists " do_test attrs6 "is_dc: true " do_test attrs7 "is_dc: false " do_test attrs8 "score_attribute " do_test per-node-attrs "Per node resource parameters" echo "" do_test mon-rsc-1 "Schedule Monitor - start" do_test mon-rsc-2 "Schedule Monitor - move " do_test mon-rsc-3 "Schedule Monitor - pending start " do_test mon-rsc-4 "Schedule Monitor - move/pending start" echo "" do_test rec-rsc-0 "Resource Recover - no start " do_test rec-rsc-1 "Resource Recover - start " do_test rec-rsc-2 "Resource Recover - monitor " do_test rec-rsc-3 "Resource Recover - stop - ignore" do_test rec-rsc-4 "Resource Recover - stop - block " do_test rec-rsc-5 "Resource Recover - stop - fence " do_test rec-rsc-6 "Resource Recover - multiple - restart" do_test rec-rsc-7 "Resource Recover - multiple - stop " do_test rec-rsc-8 "Resource Recover - multiple - block " do_test rec-rsc-9 "Resource Recover - group/group" do_test monitor-recovery "on-fail=block + resource recovery detected by recurring monitor" do_test stop-failure-no-quorum "Stop failure without quorum" do_test stop-failure-no-fencing "Stop failure without fencing available" do_test stop-failure-with-fencing "Stop failure with fencing available" echo "" do_test quorum-1 "No quorum - ignore" do_test quorum-2 "No quorum - freeze" do_test quorum-3 "No quorum - stop " do_test quorum-4 "No quorum - start anyway" do_test quorum-5 "No quorum - start anyway (group)" do_test quorum-6 "No quorum - start anyway (clone)" echo "" do_test rec-node-1 "Node Recover - Startup - no fence" do_test rec-node-2 "Node Recover - Startup - fence " do_test rec-node-3 "Node Recover - HA down - no fence" do_test rec-node-4 "Node Recover - HA down - fence " do_test rec-node-5 "Node Recover - CRM down - no fence" do_test rec-node-6 "Node Recover - CRM down - fence " do_test rec-node-7 "Node Recover - no quorum - ignore " do_test rec-node-8 "Node Recover - no quorum - freeze " do_test rec-node-9 "Node Recover - no quorum - stop " do_test rec-node-10 "Node Recover - no quorum - stop w/fence" do_test rec-node-11 "Node Recover - CRM down w/ group - fence " do_test rec-node-12 "Node Recover - nothing active - fence " do_test rec-node-13 "Node Recover - failed resource + shutdown - fence " do_test rec-node-15 "Node Recover - unknown lrm section" do_test rec-node-14 "Serialize all stonith's" echo "" do_test multi1 "Multiple Active (stop/start)" echo "" do_test migrate-begin "Normal migration" do_test migrate-success "Completed migration" do_test migrate-partial-1 "Completed migration, missing stop on source" do_test migrate-partial-2 "Successful migrate_to only" do_test migrate-partial-3 "Successful migrate_to only, target down" do_test migrate-partial-4 "Migrate from the correct host after migrate_to+migrate_from" do_test migrate-fail-2 "Failed migrate_from" do_test migrate-fail-3 "Failed migrate_from + stop on source" do_test migrate-fail-4 "Failed migrate_from + stop on target - ideally we wouldn't need to re-stop on target" do_test migrate-fail-5 "Failed migrate_from + stop on source and target" do_test migrate-fail-6 "Failed migrate_to" do_test migrate-fail-7 "Failed migrate_to + stop on source" do_test migrate-fail-8 "Failed migrate_to + stop on target - ideally we wouldn't need to re-stop on target" do_test migrate-fail-9 "Failed migrate_to + stop on source and target" do_test migrate-stop "Migration in a stopping stack" do_test migrate-start "Migration in a starting stack" do_test migrate-stop_start "Migration in a restarting stack" do_test migrate-stop-complex "Migration in a complex stopping stack" do_test migrate-start-complex "Migration in a complex starting stack" do_test migrate-stop-start-complex "Migration in a complex moving stack" do_test migrate-shutdown "Order the post-migration 'stop' before node shutdown" do_test migrate-1 "Migrate (migrate)" do_test migrate-2 "Migrate (stable)" do_test migrate-3 "Migrate (failed migrate_to)" do_test migrate-4 "Migrate (failed migrate_from)" do_test novell-252693 "Migration in a stopping stack" do_test novell-252693-2 "Migration in a starting stack" do_test novell-252693-3 "Non-Migration in a starting and stopping stack" do_test bug-1820 "Migration in a group" do_test bug-1820-1 "Non-migration in a group" do_test migrate-5 "Primitive migration with a clone" do_test migrate-fencing "Migration after Fencing" do_test migrate-both-vms "Migrate two VMs that have no colocation" #echo "" #do_test complex1 "Complex " do_test bug-lf-2422 "Dependancy on partially active group - stop ocfs:*" echo "" do_test clone-anon-probe-1 "Probe the correct (anonymous) clone instance for each node" do_test clone-anon-probe-2 "Avoid needless re-probing of anonymous clones" do_test clone-anon-failcount "Merge failcounts for anonymous clones" do_test inc0 "Incarnation start" do_test inc1 "Incarnation start order" do_test inc2 "Incarnation silent restart, stop, move" do_test inc3 "Inter-incarnation ordering, silent restart, stop, move" do_test inc4 "Inter-incarnation ordering, silent restart, stop, move (ordered)" do_test inc5 "Inter-incarnation ordering, silent restart, stop, move (restart 1)" do_test inc6 "Inter-incarnation ordering, silent restart, stop, move (restart 2)" do_test inc7 "Clone colocation" do_test inc8 "Clone anti-colocation" do_test inc9 "Non-unique clone" do_test inc10 "Non-unique clone (stop)" do_test inc11 "Primitive colocation with clones" do_test inc12 "Clone shutdown" do_test cloned-group "Make sure only the correct number of cloned groups are started" do_test clone-no-shuffle "Dont prioritize allocation of instances that must be moved" do_test clone-max-zero "Orphan processing with clone-max=0" do_test clone-anon-dup "Bug LF#2087 - Correctly parse the state of anonymous clones that are active more than once per node" do_test bug-lf-2160 "Dont shuffle clones due to colocation" do_test bug-lf-2213 "clone-node-max enforcement for cloned groups" do_test bug-lf-2153 "Clone ordering constraints" do_test bug-lf-2361 "Ensure clones observe mandatory ordering constraints if the LHS is unrunnable" do_test bug-lf-2317 "Avoid needless restart of primitive depending on a clone" do_test clone-colocate-instance-1 "Colocation with a specific clone instance (negative example)" do_test clone-colocate-instance-2 "Colocation with a specific clone instance" do_test clone-order-instance "Ordering with specific clone instances" do_test bug-lf-2453 "Enforce mandatory clone ordering without colocation" do_test bug-lf-2508 "Correctly reconstruct the status of anonymous cloned groups" do_test bug-lf-2544 "Balanced clone placement" do_test bug-lf-2445 "Redistribute clones with node-max > 1 and stickiness = 0" do_test bug-lf-2574 "Avoid clone shuffle" do_test bug-lf-2581 "Avoid group restart due to unrelated clone (re)start" do_test bug-cl-5168 "Don't shuffle clones" do_test bug-cl-5170 "Prevent clone from starting with on-fail=block" do_test clone-fail-block-colocation "Move colocated group when failed clone has on-fail=block" do_test clone-interleave-1 "Clone-3 cannot start on pcmk-1 due to interleaved ordering (no colocation)" do_test clone-interleave-2 "Clone-3 must stop on pcmk-1 due to interleaved ordering (no colocation)" do_test clone-interleave-3 "Clone-3 must be recovered on pcmk-1 due to interleaved ordering (no colocation)" echo "" do_test master-0 "Stopped -> Slave" do_test master-1 "Stopped -> Promote" do_test master-2 "Stopped -> Promote : notify" do_test master-3 "Stopped -> Promote : master location" do_test master-4 "Started -> Promote : master location" do_test master-5 "Promoted -> Promoted" do_test master-6 "Promoted -> Promoted (2)" do_test master-7 "Promoted -> Fenced" do_test master-8 "Promoted -> Fenced -> Moved" do_test master-9 "Stopped + Promotable + No quorum" do_test master-10 "Stopped -> Promotable : notify with monitor" do_test master-11 "Stopped -> Promote : colocation" do_test novell-239082 "Demote/Promote ordering" do_test novell-239087 "Stable master placement" do_test master-12 "Promotion based solely on rsc_location constraints" do_test master-13 "Include preferences of colocated resources when placing master" do_test master-demote "Ordering when actions depends on demoting a slave resource" do_test master-ordering "Prevent resources from starting that need a master" do_test bug-1765 "Master-Master Colocation (dont stop the slaves)" do_test master-group "Promotion of cloned groups" do_test bug-lf-1852 "Don't shuffle master/slave instances unnecessarily" do_test master-failed-demote "Dont retry failed demote actions" do_test master-failed-demote-2 "Dont retry failed demote actions (notify=false)" do_test master-depend "Ensure resources that depend on the master don't get allocated until the master does" do_test master-reattach "Re-attach to a running master" do_test master-allow-start "Don't include master score if it would prevent allocation" do_test master-colocation "Allow master instances placemaker to be influenced by colocation constraints" do_test master-pseudo "Make sure promote/demote pseudo actions are created correctly" do_test master-role "Prevent target-role from promoting more than master-max instances" do_test bug-lf-2358 "Master-Master anti-colocation" do_test master-promotion-constraint "Mandatory master colocation constraints" do_test unmanaged-master "Ensure role is preserved for unmanaged resources" do_test master-unmanaged-monitor "Start the correct monitor operation for unmanaged masters" do_test master-demote-2 "Demote does not clear past failure" do_test master-move "Move master based on failure of colocated group" do_test master-probed-score "Observe the promotion score of probed resources" do_test colocation_constraint_stops_master "cl#5054 - Ensure master is demoted when stopped by colocation constraint" do_test colocation_constraint_stops_slave "cl#5054 - Ensure slave is not demoted when stopped by colocation constraint" do_test order_constraint_stops_master "cl#5054 - Ensure master is demoted when stopped by order constraint" do_test order_constraint_stops_slave "cl#5054 - Ensure slave is not demoted when stopped by order constraint" do_test master_monitor_restart "cl#5072 - Ensure master monitor operation will start after promotion." do_test bug-rh-880249 "Handle replacement of an m/s resource with a primitive" do_test bug-5143-ms-shuffle "Prevent master shuffling due to promotion score" do_test master-demote-block "Block promotion if demote fails with on-fail=block" do_test master-dependant-ban "Don't stop instances from being active because a dependant is banned from that host" do_test master-stop "Stop instances due to location constraint with role=Started" do_test master-partially-demoted-group "Allow partially demoted group to finish demoting" echo "" do_test history-1 "Correctly parse stateful-1 resource state" echo "" do_test managed-0 "Managed (reference)" do_test managed-1 "Not managed - down " do_test managed-2 "Not managed - up " do_test bug-5028 "Shutdown should block if anything depends on an unmanaged resource" do_test bug-5028-detach "Ensure detach still works" do_test bug-5028-bottom "Ensure shutdown still blocks if the blocked resource is at the bottom of the stack" do_test unmanaged-stop-1 "cl#5155 - Block the stop of resources if any depending resource is unmanaged " do_test unmanaged-stop-2 "cl#5155 - Block the stop of resources if the first resource in a mandatory stop order is unmanaged " do_test unmanaged-stop-3 "cl#5155 - Block the stop of resources if any depending resource in a group is unmanaged " do_test unmanaged-stop-4 "cl#5155 - Block the stop of resources if any depending resource in the middle of a group is unmanaged " echo "" do_test interleave-0 "Interleave (reference)" do_test interleave-1 "coloc - not interleaved" do_test interleave-2 "coloc - interleaved " do_test interleave-3 "coloc - interleaved (2)" do_test interleave-pseudo-stop "Interleaved clone during stonith" do_test interleave-stop "Interleaved clone during stop" do_test interleave-restart "Interleaved clone during dependancy restart" echo "" do_test notify-0 "Notify reference" do_test notify-1 "Notify simple" do_test notify-2 "Notify simple, confirm" do_test notify-3 "Notify move, confirm" do_test novell-239079 "Notification priority" #do_test notify-2 "Notify - 764" echo "" do_test 594 "OSDL #594 - Unrunnable actions scheduled in transition" do_test 662 "OSDL #662 - Two resources start on one node when incarnation_node_max = 1" do_test 696 "OSDL #696 - CRM starts stonith RA without monitor" do_test 726 "OSDL #726 - Attempting to schedule rsc_posic041_monitor_5000 _after_ a stop" do_test 735 "OSDL #735 - Correctly detect that rsc_hadev1 is stopped on hadev3" do_test 764 "OSDL #764 - Missing monitor op for DoFencing:child_DoFencing:1" do_test 797 "OSDL #797 - Assert triggered: task_id_i > max_call_id" do_test 829 "OSDL #829" do_test 994 "OSDL #994 - Stopping the last resource in a resource group causes the entire group to be restarted" do_test 994-2 "OSDL #994 - with a dependant resource" do_test 1360 "OSDL #1360 - Clone stickiness" do_test 1484 "OSDL #1484 - on_fail=stop" do_test 1494 "OSDL #1494 - Clone stability" do_test unrunnable-1 "Unrunnable" do_test stonith-0 "Stonith loop - 1" do_test stonith-1 "Stonith loop - 2" do_test stonith-2 "Stonith loop - 3" do_test stonith-3 "Stonith startup" do_test stonith-4 "Stonith node state" --rc 4 do_test bug-1572-1 "Recovery of groups depending on master/slave" do_test bug-1572-2 "Recovery of groups depending on master/slave when the master is never re-promoted" do_test bug-1685 "Depends-on-master ordering" do_test bug-1822 "Dont promote partially active groups" do_test bug-pm-11 "New resource added to a m/s group" do_test bug-pm-12 "Recover only the failed portion of a cloned group" do_test bug-n-387749 "Don't shuffle clone instances" do_test bug-n-385265 "Don't ignore the failure stickiness of group children - resource_idvscommon should stay stopped" do_test bug-n-385265-2 "Ensure groups are migrated instead of remaining partially active on the current node" do_test bug-lf-1920 "Correctly handle probes that find active resources" do_test bnc-515172 "Location constraint with multiple expressions" do_test colocate-primitive-with-clone "Optional colocation with a clone" do_test use-after-free-merge "Use-after-free in native_merge_weights" do_test bug-lf-2551 "STONITH ordering for stop" do_test bug-lf-2606 "Stonith implies demote" do_test bug-lf-2474 "Ensure resource op timeout takes precedence over op_defaults" do_test bug-suse-707150 "Prevent vm-01 from starting due to colocation/ordering" do_test bug-5014-A-start-B-start "Verify when A starts B starts using symmetrical=false" do_test bug-5014-A-stop-B-started "Verify when A stops B does not stop if it has already started using symmetric=false" do_test bug-5014-A-stopped-B-stopped "Verify when A is stopped and B has not started, B does not start before A using symmetric=false" do_test bug-5014-CthenAthenB-C-stopped "Verify when C then A is symmetrical=true, A then B is symmetric=false, and C is stopped that nothing starts." do_test bug-5014-CLONE-A-start-B-start "Verify when A starts B starts using clone resources with symmetric=false" do_test bug-5014-CLONE-A-stop-B-started "Verify when A stops B does not stop if it has already started using clone resources with symmetric=false." do_test bug-5014-GROUP-A-start-B-start "Verify when A starts B starts when using group resources with symmetric=false." do_test bug-5014-GROUP-A-stopped-B-started "Verify when A stops B does not stop if it has already started using group resources with symmetric=false." do_test bug-5014-GROUP-A-stopped-B-stopped "Verify when A is stopped and B has not started, B does not start before A using group resources with symmetric=false." do_test bug-5014-ordered-set-symmetrical-false "Verify ordered sets work with symmetrical=false" do_test bug-5014-ordered-set-symmetrical-true "Verify ordered sets work with symmetrical=true" do_test bug-5007-masterslave_colocation "Verify use of colocation scores other than INFINITY and -INFINITY work on multi-state resources." do_test bug-5038 "Prevent restart of anonymous clones when clone-max decreases" do_test bug-5025-1 "Automatically clean up failcount after resource config change with reload" do_test bug-5025-2 "Make sure clear failcount action isn't set when config does not change." do_test bug-5025-3 "Automatically clean up failcount after resource config change with restart" do_test bug-5025-4 "Clear failcount when last failure is a start op and rsc attributes changed." do_test failcount "Ensure failcounts are correctly expired" do_test failcount-block "Ensure failcounts are not expired when on-fail=block is present" do_test monitor-onfail-restart "bug-5058 - Monitor failure with on-fail set to restart" do_test monitor-onfail-stop "bug-5058 - Monitor failure wiht on-fail set to stop" do_test bug-5059 "No need to restart p_stateful1:*" do_test bug-5069-op-enabled "Test on-fail=ignore with failure when monitor is enabled." do_test bug-5069-op-disabled "Test on-fail-ignore with failure when monitor is disabled." do_test obsolete-lrm-resource "cl#5115 - Do not use obsolete lrm_resource sections" do_test ignore_stonith_rsc_order1 "cl#5056- Ignore order constraint between stonith and non-stonith rsc." do_test ignore_stonith_rsc_order2 "cl#5056- Ignore order constraint with group rsc containing mixed stonith and non-stonith." do_test ignore_stonith_rsc_order3 "cl#5056- Ignore order constraint, stonith clone and mixed group" do_test ignore_stonith_rsc_order4 "cl#5056- Ignore order constraint, stonith clone and clone with nested mixed group" do_test honor_stonith_rsc_order1 "cl#5056- Honor order constraint, stonith clone and pure stonith group(single rsc)." do_test honor_stonith_rsc_order2 "cl#5056- Honor order constraint, stonith clone and pure stonith group(multiple rsc)" do_test honor_stonith_rsc_order3 "cl#5056- Honor order constraint, stonith clones with nested pure stonith group." do_test honor_stonith_rsc_order4 "cl#5056- Honor order constraint, between two native stonith rscs." do_test probe-timeout "cl#5099 - Default probe timeout" echo "" do_test systemhealth1 "System Health () #1" do_test systemhealth2 "System Health () #2" do_test systemhealth3 "System Health () #3" do_test systemhealthn1 "System Health (None) #1" do_test systemhealthn2 "System Health (None) #2" do_test systemhealthn3 "System Health (None) #3" do_test systemhealthm1 "System Health (Migrate On Red) #1" do_test systemhealthm2 "System Health (Migrate On Red) #2" do_test systemhealthm3 "System Health (Migrate On Red) #3" do_test systemhealtho1 "System Health (Only Green) #1" do_test systemhealtho2 "System Health (Only Green) #2" do_test systemhealtho3 "System Health (Only Green) #3" do_test systemhealthp1 "System Health (Progessive) #1" do_test systemhealthp2 "System Health (Progessive) #2" do_test systemhealthp3 "System Health (Progessive) #3" echo "" do_test utilization "Placement Strategy - utilization" do_test minimal "Placement Strategy - minimal" do_test balanced "Placement Strategy - balanced" echo "" do_test placement-stickiness "Optimized Placement Strategy - stickiness" do_test placement-priority "Optimized Placement Strategy - priority" do_test placement-location "Optimized Placement Strategy - location" do_test placement-capacity "Optimized Placement Strategy - capacity" echo "" do_test utilization-order1 "Utilization Order - Simple" do_test utilization-order2 "Utilization Order - Complex" do_test utilization-order3 "Utilization Order - Migrate" do_test utilization-order4 "Utilization Order - Live Mirgration (bnc#695440)" do_test utilization-shuffle "Don't displace prmExPostgreSQLDB2 on act2, Start prmExPostgreSQLDB1 on act3" do_test load-stopped-loop "Avoid transition loop due to load_stopped (cl#5044)" echo "" do_test reprobe-target_rc "Ensure correct target_rc for reprobe of inactive resources" do_test node-maintenance-1 "cl#5128 - Node maintenance" do_test node-maintenance-2 "cl#5128 - Node maintenance (coming out of maintenance mode)" echo "" do_test not-installed-agent "The resource agent is missing" do_test not-installed-tools "Something the resource agent needs is missing" echo "" do_test stopped-monitor-00 "Stopped Monitor - initial start" do_test stopped-monitor-01 "Stopped Monitor - failed started" do_test stopped-monitor-02 "Stopped Monitor - started multi-up" do_test stopped-monitor-03 "Stopped Monitor - stop started" do_test stopped-monitor-04 "Stopped Monitor - failed stop" do_test stopped-monitor-05 "Stopped Monitor - start unmanaged" do_test stopped-monitor-06 "Stopped Monitor - unmanaged multi-up" do_test stopped-monitor-07 "Stopped Monitor - start unmanaged multi-up" do_test stopped-monitor-08 "Stopped Monitor - migrate" do_test stopped-monitor-09 "Stopped Monitor - unmanage started" do_test stopped-monitor-10 "Stopped Monitor - unmanaged started multi-up" do_test stopped-monitor-11 "Stopped Monitor - stop unmanaged started" do_test stopped-monitor-12 "Stopped Monitor - unmanaged started multi-up (targer-role="Stopped")" do_test stopped-monitor-20 "Stopped Monitor - initial stop" do_test stopped-monitor-21 "Stopped Monitor - stopped single-up" do_test stopped-monitor-22 "Stopped Monitor - stopped multi-up" do_test stopped-monitor-23 "Stopped Monitor - start stopped" do_test stopped-monitor-24 "Stopped Monitor - unmanage stopped" do_test stopped-monitor-25 "Stopped Monitor - unmanaged stopped multi-up" do_test stopped-monitor-26 "Stopped Monitor - start unmanaged stopped" do_test stopped-monitor-27 "Stopped Monitor - unmanaged stopped multi-up (target-role="Started")" do_test stopped-monitor-30 "Stopped Monitor - new node started" do_test stopped-monitor-31 "Stopped Monitor - new node stopped" echo"" do_test ticket-primitive-1 "Ticket - Primitive (loss-policy=stop, initial)" do_test ticket-primitive-2 "Ticket - Primitive (loss-policy=stop, granted)" do_test ticket-primitive-3 "Ticket - Primitive (loss-policy-stop, revoked)" do_test ticket-primitive-4 "Ticket - Primitive (loss-policy=demote, initial)" do_test ticket-primitive-5 "Ticket - Primitive (loss-policy=demote, granted)" do_test ticket-primitive-6 "Ticket - Primitive (loss-policy=demote, revoked)" do_test ticket-primitive-7 "Ticket - Primitive (loss-policy=fence, initial)" do_test ticket-primitive-8 "Ticket - Primitive (loss-policy=fence, granted)" do_test ticket-primitive-9 "Ticket - Primitive (loss-policy=fence, revoked)" do_test ticket-primitive-10 "Ticket - Primitive (loss-policy=freeze, initial)" do_test ticket-primitive-11 "Ticket - Primitive (loss-policy=freeze, granted)" do_test ticket-primitive-12 "Ticket - Primitive (loss-policy=freeze, revoked)" do_test ticket-primitive-13 "Ticket - Primitive (loss-policy=stop, standby, granted)" do_test ticket-primitive-14 "Ticket - Primitive (loss-policy=stop, granted, standby)" do_test ticket-primitive-15 "Ticket - Primitive (loss-policy=stop, standby, revoked)" do_test ticket-primitive-16 "Ticket - Primitive (loss-policy=demote, standby, granted)" do_test ticket-primitive-17 "Ticket - Primitive (loss-policy=demote, granted, standby)" do_test ticket-primitive-18 "Ticket - Primitive (loss-policy=demote, standby, revoked)" do_test ticket-primitive-19 "Ticket - Primitive (loss-policy=fence, standby, granted)" do_test ticket-primitive-20 "Ticket - Primitive (loss-policy=fence, granted, standby)" do_test ticket-primitive-21 "Ticket - Primitive (loss-policy=fence, standby, revoked)" do_test ticket-primitive-22 "Ticket - Primitive (loss-policy=freeze, standby, granted)" do_test ticket-primitive-23 "Ticket - Primitive (loss-policy=freeze, granted, standby)" do_test ticket-primitive-24 "Ticket - Primitive (loss-policy=freeze, standby, revoked)" echo"" do_test ticket-group-1 "Ticket - Group (loss-policy=stop, initial)" do_test ticket-group-2 "Ticket - Group (loss-policy=stop, granted)" do_test ticket-group-3 "Ticket - Group (loss-policy-stop, revoked)" do_test ticket-group-4 "Ticket - Group (loss-policy=demote, initial)" do_test ticket-group-5 "Ticket - Group (loss-policy=demote, granted)" do_test ticket-group-6 "Ticket - Group (loss-policy=demote, revoked)" do_test ticket-group-7 "Ticket - Group (loss-policy=fence, initial)" do_test ticket-group-8 "Ticket - Group (loss-policy=fence, granted)" do_test ticket-group-9 "Ticket - Group (loss-policy=fence, revoked)" do_test ticket-group-10 "Ticket - Group (loss-policy=freeze, initial)" do_test ticket-group-11 "Ticket - Group (loss-policy=freeze, granted)" do_test ticket-group-12 "Ticket - Group (loss-policy=freeze, revoked)" do_test ticket-group-13 "Ticket - Group (loss-policy=stop, standby, granted)" do_test ticket-group-14 "Ticket - Group (loss-policy=stop, granted, standby)" do_test ticket-group-15 "Ticket - Group (loss-policy=stop, standby, revoked)" do_test ticket-group-16 "Ticket - Group (loss-policy=demote, standby, granted)" do_test ticket-group-17 "Ticket - Group (loss-policy=demote, granted, standby)" do_test ticket-group-18 "Ticket - Group (loss-policy=demote, standby, revoked)" do_test ticket-group-19 "Ticket - Group (loss-policy=fence, standby, granted)" do_test ticket-group-20 "Ticket - Group (loss-policy=fence, granted, standby)" do_test ticket-group-21 "Ticket - Group (loss-policy=fence, standby, revoked)" do_test ticket-group-22 "Ticket - Group (loss-policy=freeze, standby, granted)" do_test ticket-group-23 "Ticket - Group (loss-policy=freeze, granted, standby)" do_test ticket-group-24 "Ticket - Group (loss-policy=freeze, standby, revoked)" echo"" do_test ticket-clone-1 "Ticket - Clone (loss-policy=stop, initial)" do_test ticket-clone-2 "Ticket - Clone (loss-policy=stop, granted)" do_test ticket-clone-3 "Ticket - Clone (loss-policy-stop, revoked)" do_test ticket-clone-4 "Ticket - Clone (loss-policy=demote, initial)" do_test ticket-clone-5 "Ticket - Clone (loss-policy=demote, granted)" do_test ticket-clone-6 "Ticket - Clone (loss-policy=demote, revoked)" do_test ticket-clone-7 "Ticket - Clone (loss-policy=fence, initial)" do_test ticket-clone-8 "Ticket - Clone (loss-policy=fence, granted)" do_test ticket-clone-9 "Ticket - Clone (loss-policy=fence, revoked)" do_test ticket-clone-10 "Ticket - Clone (loss-policy=freeze, initial)" do_test ticket-clone-11 "Ticket - Clone (loss-policy=freeze, granted)" do_test ticket-clone-12 "Ticket - Clone (loss-policy=freeze, revoked)" do_test ticket-clone-13 "Ticket - Clone (loss-policy=stop, standby, granted)" do_test ticket-clone-14 "Ticket - Clone (loss-policy=stop, granted, standby)" do_test ticket-clone-15 "Ticket - Clone (loss-policy=stop, standby, revoked)" do_test ticket-clone-16 "Ticket - Clone (loss-policy=demote, standby, granted)" do_test ticket-clone-17 "Ticket - Clone (loss-policy=demote, granted, standby)" do_test ticket-clone-18 "Ticket - Clone (loss-policy=demote, standby, revoked)" do_test ticket-clone-19 "Ticket - Clone (loss-policy=fence, standby, granted)" do_test ticket-clone-20 "Ticket - Clone (loss-policy=fence, granted, standby)" do_test ticket-clone-21 "Ticket - Clone (loss-policy=fence, standby, revoked)" do_test ticket-clone-22 "Ticket - Clone (loss-policy=freeze, standby, granted)" do_test ticket-clone-23 "Ticket - Clone (loss-policy=freeze, granted, standby)" do_test ticket-clone-24 "Ticket - Clone (loss-policy=freeze, standby, revoked)" echo"" do_test ticket-master-1 "Ticket - Master (loss-policy=stop, initial)" do_test ticket-master-2 "Ticket - Master (loss-policy=stop, granted)" do_test ticket-master-3 "Ticket - Master (loss-policy-stop, revoked)" do_test ticket-master-4 "Ticket - Master (loss-policy=demote, initial)" do_test ticket-master-5 "Ticket - Master (loss-policy=demote, granted)" do_test ticket-master-6 "Ticket - Master (loss-policy=demote, revoked)" do_test ticket-master-7 "Ticket - Master (loss-policy=fence, initial)" do_test ticket-master-8 "Ticket - Master (loss-policy=fence, granted)" do_test ticket-master-9 "Ticket - Master (loss-policy=fence, revoked)" do_test ticket-master-10 "Ticket - Master (loss-policy=freeze, initial)" do_test ticket-master-11 "Ticket - Master (loss-policy=freeze, granted)" do_test ticket-master-12 "Ticket - Master (loss-policy=freeze, revoked)" do_test ticket-master-13 "Ticket - Master (loss-policy=stop, standby, granted)" do_test ticket-master-14 "Ticket - Master (loss-policy=stop, granted, standby)" do_test ticket-master-15 "Ticket - Master (loss-policy=stop, standby, revoked)" do_test ticket-master-16 "Ticket - Master (loss-policy=demote, standby, granted)" do_test ticket-master-17 "Ticket - Master (loss-policy=demote, granted, standby)" do_test ticket-master-18 "Ticket - Master (loss-policy=demote, standby, revoked)" do_test ticket-master-19 "Ticket - Master (loss-policy=fence, standby, granted)" do_test ticket-master-20 "Ticket - Master (loss-policy=fence, granted, standby)" do_test ticket-master-21 "Ticket - Master (loss-policy=fence, standby, revoked)" do_test ticket-master-22 "Ticket - Master (loss-policy=freeze, standby, granted)" do_test ticket-master-23 "Ticket - Master (loss-policy=freeze, granted, standby)" do_test ticket-master-24 "Ticket - Master (loss-policy=freeze, standby, revoked)" echo "" do_test ticket-rsc-sets-1 "Ticket - Resource sets (1 ticket, initial)" do_test ticket-rsc-sets-2 "Ticket - Resource sets (1 ticket, granted)" do_test ticket-rsc-sets-3 "Ticket - Resource sets (1 ticket, revoked)" do_test ticket-rsc-sets-4 "Ticket - Resource sets (2 tickets, initial)" do_test ticket-rsc-sets-5 "Ticket - Resource sets (2 tickets, granted)" do_test ticket-rsc-sets-6 "Ticket - Resource sets (2 tickets, granted)" do_test ticket-rsc-sets-7 "Ticket - Resource sets (2 tickets, revoked)" do_test ticket-rsc-sets-8 "Ticket - Resource sets (1 ticket, standby, granted)" do_test ticket-rsc-sets-9 "Ticket - Resource sets (1 ticket, granted, standby)" do_test ticket-rsc-sets-10 "Ticket - Resource sets (1 ticket, standby, revoked)" do_test ticket-rsc-sets-11 "Ticket - Resource sets (2 tickets, standby, granted)" do_test ticket-rsc-sets-12 "Ticket - Resource sets (2 tickets, standby, granted)" do_test ticket-rsc-sets-13 "Ticket - Resource sets (2 tickets, granted, standby)" do_test ticket-rsc-sets-14 "Ticket - Resource sets (2 tickets, standby, revoked)" echo "" do_test template-1 "Template - 1" do_test template-2 "Template - 2" do_test template-3 "Template - 3 (merge operations)" do_test template-coloc-1 "Template - Colocation 1" do_test template-coloc-2 "Template - Colocation 2" do_test template-coloc-3 "Template - Colocation 3" do_test template-order-1 "Template - Order 1" do_test template-order-2 "Template - Order 2" do_test template-order-3 "Template - Order 3" do_test template-ticket "Template - Ticket" do_test template-rsc-sets-1 "Template - Resource Sets 1" do_test template-rsc-sets-2 "Template - Resource Sets 2" do_test template-rsc-sets-3 "Template - Resource Sets 3" do_test template-rsc-sets-4 "Template - Resource Sets 4" do_test template-clone-primitive "Cloned primitive from template" do_test template-clone-group "Cloned group from template" echo "" do_test container-1 "Container - initial" do_test container-2 "Container - monitor failed" do_test container-3 "Container - stop failed" do_test container-4 "Container - reached migration-threshold" do_test container-group-1 "Container in group - initial" do_test container-group-2 "Container in group - monitor failed" do_test container-group-3 "Container in group - stop failed" do_test container-group-4 "Container in group - reached migration-threshold" echo "" do_test whitebox-fail1 "Fail whitebox container rsc." do_test whitebox-fail2 "Fail whitebox container rsc lrmd connection." do_test whitebox-fail3 "Failed containers should not run nested on remote nodes." do_test whitebox-start "Start whitebox container with resources assigned to it" do_test whitebox-stop "Stop whitebox container with resources assigned to it" do_test whitebox-move "Move whitebox container with resources assigned to it" do_test whitebox-asymmetric "Verify connection rsc opts-in based on container resource" do_test whitebox-ms-ordering "Verify promote/demote can not occur before connection is established" do_test whitebox-ms-ordering "Verify promote/demote can not occur before connection is established" do_test whitebox-orphaned "Properly shutdown orphaned whitebox container" do_test whitebox-orphan-ms "Properly tear down orphan ms resources on remote-nodes" -do_test whitebox-disable-probes "Verify probes are not executed when container probes are disabled" +do_test whitebox-unexpectedly-running "Recover container nodes the cluster did not start." echo "" do_test remote-startup-probes "Baremetal remote-node startup probes" do_test remote-startup "Startup a newly discovered remote-nodes with no status." do_test remote-fence-unclean "Fence unclean baremetal remote-node" do_test remote-move "Move remote-node connection resource" do_test remote-disable "Disable a baremetal remote-node" do_test remote-orphaned "Properly shutdown orphaned connection resource" echo "" test_results diff --git a/pengine/test10/master-partially-demoted-group.xml b/pengine/test10/master-partially-demoted-group.xml index 56582d11f8..13760ba9c0 100644 --- a/pengine/test10/master-partially-demoted-group.xml +++ b/pengine/test10/master-partially-demoted-group.xml @@ -1,412 +1,411 @@ - diff --git a/pengine/test10/whitebox-asymmetric.dot b/pengine/test10/whitebox-asymmetric.dot index 0c4ee01ef8..896f66a790 100644 --- a/pengine/test10/whitebox-asymmetric.dot +++ b/pengine/test10/whitebox-asymmetric.dot @@ -1,26 +1,10 @@ digraph "g" { "18node2_monitor_30000 18builder" [ style=bold color="green" fontcolor="black"] "18node2_start_0 18builder" -> "18node2_monitor_30000 18builder" [ style = bold] -"18node2_start_0 18builder" -> "nfs_mount_monitor_0 18node2" [ style = bold] "18node2_start_0 18builder" -> "nfs_mount_monitor_10000 18node2" [ style = bold] "18node2_start_0 18builder" -> "nfs_mount_start_0 18node2" [ style = bold] -"18node2_start_0 18builder" -> "vg_tags_dup_monitor_0 18node2" [ style = bold] -"18node2_start_0 18builder" -> "vg_tags_monitor_0 18node2" [ style = bold] -"18node2_start_0 18builder" -> "webserver_monitor_0 18node2" [ style = bold] "18node2_start_0 18builder" [ style=bold color="green" fontcolor="black"] -"nfs_mount_monitor_0 18node2" -> "probe_complete 18node2" [ style = bold] -"nfs_mount_monitor_0 18node2" [ style=bold color="green" fontcolor="black"] "nfs_mount_monitor_10000 18node2" [ style=bold color="green" fontcolor="black"] "nfs_mount_start_0 18node2" -> "nfs_mount_monitor_10000 18node2" [ style = bold] "nfs_mount_start_0 18node2" [ style=bold color="green" fontcolor="black"] -"probe_complete 18node2" -> "probe_complete" [ style = bold] -"probe_complete 18node2" [ style=bold color="green" fontcolor="black"] -"probe_complete" -> "nfs_mount_start_0 18node2" [ style = bold] -"probe_complete" [ style=bold color="green" fontcolor="orange"] -"vg_tags_dup_monitor_0 18node2" -> "probe_complete 18node2" [ style = bold] -"vg_tags_dup_monitor_0 18node2" [ style=bold color="green" fontcolor="black"] -"vg_tags_monitor_0 18node2" -> "probe_complete 18node2" [ style = bold] -"vg_tags_monitor_0 18node2" [ style=bold color="green" fontcolor="black"] -"webserver_monitor_0 18node2" -> "probe_complete 18node2" [ style = bold] -"webserver_monitor_0 18node2" [ style=bold color="green" fontcolor="black"] } diff --git a/pengine/test10/whitebox-asymmetric.exp b/pengine/test10/whitebox-asymmetric.exp index 8b2eede9d6..c6a69bc079 100644 --- a/pengine/test10/whitebox-asymmetric.exp +++ b/pengine/test10/whitebox-asymmetric.exp @@ -1,141 +1,53 @@ - - - - - - - - - - - - - - + - + - + - + - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + - - - - - - - - - - - - - - + - + - + - + - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/pengine/test10/whitebox-asymmetric.summary b/pengine/test10/whitebox-asymmetric.summary index 7d7597707e..1a688022bc 100644 --- a/pengine/test10/whitebox-asymmetric.summary +++ b/pengine/test10/whitebox-asymmetric.summary @@ -1,39 +1,34 @@ Current cluster status: Online: [ 18builder ] fence_false (stonith:fence_false): Stopped container2 (ocf::pacemaker:Dummy): Started 18builder webserver (ocf::pacemaker:Dummy): Stopped nfs_mount (ocf::pacemaker:Dummy): Stopped Resource Group: mygroup vg_tags (ocf::heartbeat:LVM): Stopped vg_tags_dup (ocf::heartbeat:LVM): Stopped Transition Summary: * Start nfs_mount (18node2) * Start 18node2 (18builder) Executing cluster transition: * Resource action: 18node2 start on 18builder - * Resource action: webserver monitor on 18node2 - * Resource action: nfs_mount monitor on 18node2 - * Resource action: vg_tags monitor on 18node2 - * Resource action: vg_tags_dup monitor on 18node2 - * Resource action: 18node2 monitor=30000 on 18builder - * Pseudo action: probe_complete * Resource action: nfs_mount start on 18node2 + * Resource action: 18node2 monitor=30000 on 18builder * Resource action: nfs_mount monitor=10000 on 18node2 Revised cluster status: Online: [ 18builder ] Containers: [ 18node2:container2 ] fence_false (stonith:fence_false): Stopped container2 (ocf::pacemaker:Dummy): Started 18builder webserver (ocf::pacemaker:Dummy): Stopped nfs_mount (ocf::pacemaker:Dummy): Started 18node2 Resource Group: mygroup vg_tags (ocf::heartbeat:LVM): Stopped vg_tags_dup (ocf::heartbeat:LVM): Stopped diff --git a/pengine/test10/whitebox-disable-probes.dot b/pengine/test10/whitebox-disable-probes.dot deleted file mode 100644 index 9a64dea567..0000000000 --- a/pengine/test10/whitebox-disable-probes.dot +++ /dev/null @@ -1,17 +0,0 @@ - digraph "g" { -"M-clone_running_0" [ style=bold color="green" fontcolor="orange"] -"M-clone_start_0" -> "M-clone_running_0" [ style = bold] -"M-clone_start_0" -> "M_start_0 lxc1" [ style = bold] -"M-clone_start_0" [ style=bold color="green" fontcolor="orange"] -"M_monitor_10000 lxc1" [ style=bold color="green" fontcolor="black"] -"M_start_0 lxc1" -> "M-clone_running_0" [ style = bold] -"M_start_0 lxc1" -> "M_monitor_10000 lxc1" [ style = bold] -"M_start_0 lxc1" [ style=bold color="green" fontcolor="black"] -"container1_start_0 18node1" -> "lxc1_start_0 18node1" [ style = bold] -"container1_start_0 18node1" [ style=bold color="green" fontcolor="black"] -"lxc1_monitor_30000 18node1" [ style=bold color="green" fontcolor="black"] -"lxc1_start_0 18node1" -> "M_monitor_10000 lxc1" [ style = bold] -"lxc1_start_0 18node1" -> "M_start_0 lxc1" [ style = bold] -"lxc1_start_0 18node1" -> "lxc1_monitor_30000 18node1" [ style = bold] -"lxc1_start_0 18node1" [ style=bold color="green" fontcolor="black"] -} diff --git a/pengine/test10/whitebox-disable-probes.exp b/pengine/test10/whitebox-disable-probes.exp deleted file mode 100644 index 9ade280cee..0000000000 --- a/pengine/test10/whitebox-disable-probes.exp +++ /dev/null @@ -1,92 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/pengine/test10/whitebox-disable-probes.scores b/pengine/test10/whitebox-disable-probes.scores deleted file mode 100644 index 1a00b280d5..0000000000 --- a/pengine/test10/whitebox-disable-probes.scores +++ /dev/null @@ -1,101 +0,0 @@ -Allocation scores: -clone_color: M-clone allocation score on 18node1: 0 -clone_color: M-clone allocation score on 18node2: 0 -clone_color: M-clone allocation score on 18node3: 0 -clone_color: M-clone allocation score on lxc1: 0 -clone_color: M-clone allocation score on lxc2: 0 -clone_color: M:0 allocation score on 18node1: 100 -clone_color: M:0 allocation score on 18node2: 0 -clone_color: M:0 allocation score on 18node3: 0 -clone_color: M:0 allocation score on lxc1: 0 -clone_color: M:0 allocation score on lxc2: 0 -clone_color: M:1 allocation score on 18node1: 0 -clone_color: M:1 allocation score on 18node2: 100 -clone_color: M:1 allocation score on 18node3: 0 -clone_color: M:1 allocation score on lxc1: 0 -clone_color: M:1 allocation score on lxc2: 0 -clone_color: M:2 allocation score on 18node1: 0 -clone_color: M:2 allocation score on 18node2: 0 -clone_color: M:2 allocation score on 18node3: 100 -clone_color: M:2 allocation score on lxc1: 0 -clone_color: M:2 allocation score on lxc2: 0 -clone_color: M:3 allocation score on 18node1: 0 -clone_color: M:3 allocation score on 18node2: 0 -clone_color: M:3 allocation score on 18node3: 0 -clone_color: M:3 allocation score on lxc1: 0 -clone_color: M:3 allocation score on lxc2: 100 -clone_color: M:4 allocation score on 18node1: 0 -clone_color: M:4 allocation score on 18node2: 0 -clone_color: M:4 allocation score on 18node3: 0 -clone_color: M:4 allocation score on lxc1: 0 -clone_color: M:4 allocation score on lxc2: 0 -native_color: A allocation score on 18node1: 100 -native_color: A allocation score on 18node2: 0 -native_color: A allocation score on 18node3: 0 -native_color: A allocation score on lxc1: 0 -native_color: A allocation score on lxc2: 0 -native_color: B allocation score on 18node1: 0 -native_color: B allocation score on 18node2: 0 -native_color: B allocation score on 18node3: 0 -native_color: B allocation score on lxc1: 0 -native_color: B allocation score on lxc2: 100 -native_color: C allocation score on 18node1: 0 -native_color: C allocation score on 18node2: 0 -native_color: C allocation score on 18node3: 0 -native_color: C allocation score on lxc1: 0 -native_color: C allocation score on lxc2: 100 -native_color: D allocation score on 18node1: 100 -native_color: D allocation score on 18node2: 0 -native_color: D allocation score on 18node3: 0 -native_color: D allocation score on lxc1: 0 -native_color: D allocation score on lxc2: 0 -native_color: M:0 allocation score on 18node1: 100 -native_color: M:0 allocation score on 18node2: 0 -native_color: M:0 allocation score on 18node3: -INFINITY -native_color: M:0 allocation score on lxc1: 0 -native_color: M:0 allocation score on lxc2: -INFINITY -native_color: M:1 allocation score on 18node1: -INFINITY -native_color: M:1 allocation score on 18node2: 100 -native_color: M:1 allocation score on 18node3: -INFINITY -native_color: M:1 allocation score on lxc1: 0 -native_color: M:1 allocation score on lxc2: -INFINITY -native_color: M:2 allocation score on 18node1: 0 -native_color: M:2 allocation score on 18node2: 0 -native_color: M:2 allocation score on 18node3: 100 -native_color: M:2 allocation score on lxc1: 0 -native_color: M:2 allocation score on lxc2: -INFINITY -native_color: M:3 allocation score on 18node1: 0 -native_color: M:3 allocation score on 18node2: 0 -native_color: M:3 allocation score on 18node3: 0 -native_color: M:3 allocation score on lxc1: 0 -native_color: M:3 allocation score on lxc2: 100 -native_color: M:4 allocation score on 18node1: -INFINITY -native_color: M:4 allocation score on 18node2: -INFINITY -native_color: M:4 allocation score on 18node3: -INFINITY -native_color: M:4 allocation score on lxc1: 0 -native_color: M:4 allocation score on lxc2: -INFINITY -native_color: container1 allocation score on 18node1: 0 -native_color: container1 allocation score on 18node2: 0 -native_color: container1 allocation score on 18node3: 0 -native_color: container1 allocation score on lxc1: -INFINITY -native_color: container1 allocation score on lxc2: -INFINITY -native_color: container2 allocation score on 18node1: 0 -native_color: container2 allocation score on 18node2: 200 -native_color: container2 allocation score on 18node3: 0 -native_color: container2 allocation score on lxc1: -INFINITY -native_color: container2 allocation score on lxc2: -INFINITY -native_color: lxc1 allocation score on 18node1: 0 -native_color: lxc1 allocation score on 18node2: -INFINITY -native_color: lxc1 allocation score on 18node3: -INFINITY -native_color: lxc1 allocation score on lxc1: -INFINITY -native_color: lxc1 allocation score on lxc2: -INFINITY -native_color: lxc2 allocation score on 18node1: -INFINITY -native_color: lxc2 allocation score on 18node2: 100 -native_color: lxc2 allocation score on 18node3: -INFINITY -native_color: lxc2 allocation score on lxc1: -INFINITY -native_color: lxc2 allocation score on lxc2: -INFINITY -native_color: shoot1 allocation score on 18node1: 0 -native_color: shoot1 allocation score on 18node2: 0 -native_color: shoot1 allocation score on 18node3: 100 -native_color: shoot1 allocation score on lxc1: -INFINITY -native_color: shoot1 allocation score on lxc2: -INFINITY diff --git a/pengine/test10/whitebox-disable-probes.summary b/pengine/test10/whitebox-disable-probes.summary deleted file mode 100644 index 99cbad0ffb..0000000000 --- a/pengine/test10/whitebox-disable-probes.summary +++ /dev/null @@ -1,44 +0,0 @@ - -Current cluster status: -Online: [ 18node1 18node2 18node3 ] -Containers: [ lxc2:container2 ] - - container1 (ocf::heartbeat:VirtualDomain): Stopped - container2 (ocf::heartbeat:VirtualDomain): Started 18node2 - shoot1 (stonith:fence_xvm): Started 18node3 - Clone Set: M-clone [M] - Started: [ 18node1 18node2 18node3 lxc2 ] - Stopped: [ lxc1 ] - A (ocf::pacemaker:Dummy): Started 18node1 - B (ocf::pacemaker:Dummy): Started lxc2 - C (ocf::pacemaker:Dummy): Started lxc2 - D (ocf::pacemaker:Dummy): Started 18node1 - -Transition Summary: - * Start container1 (18node1) - * Start M:4 (lxc1) - * Start lxc1 (18node1) - -Executing cluster transition: - * Resource action: container1 start on 18node1 - * Pseudo action: M-clone_start_0 - * Resource action: lxc1 start on 18node1 - * Resource action: M start on lxc1 - * Pseudo action: M-clone_running_0 - * Resource action: lxc1 monitor=30000 on 18node1 - * Resource action: M monitor=10000 on lxc1 - -Revised cluster status: -Online: [ 18node1 18node2 18node3 ] -Containers: [ lxc1:container1 lxc2:container2 ] - - container1 (ocf::heartbeat:VirtualDomain): Started 18node1 - container2 (ocf::heartbeat:VirtualDomain): Started 18node2 - shoot1 (stonith:fence_xvm): Started 18node3 - Clone Set: M-clone [M] - Started: [ 18node1 18node2 18node3 lxc1 lxc2 ] - A (ocf::pacemaker:Dummy): Started 18node1 - B (ocf::pacemaker:Dummy): Started lxc2 - C (ocf::pacemaker:Dummy): Started lxc2 - D (ocf::pacemaker:Dummy): Started 18node1 - diff --git a/pengine/test10/whitebox-disable-probes.xml b/pengine/test10/whitebox-disable-probes.xml deleted file mode 100644 index ef2bd31df8..0000000000 --- a/pengine/test10/whitebox-disable-probes.xml +++ /dev/null @@ -1,260 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/pengine/test10/whitebox-fail1.dot b/pengine/test10/whitebox-fail1.dot index 008ee4d1b8..b595015d26 100644 --- a/pengine/test10/whitebox-fail1.dot +++ b/pengine/test10/whitebox-fail1.dot @@ -1,67 +1,44 @@ digraph "g" { -"A_monitor_0 lxc1" -> "probe_complete lxc1" [ style = bold] -"A_monitor_0 lxc1" [ style=bold color="green" fontcolor="black"] -"A_monitor_0 lxc2" -> "probe_complete lxc2" [ style = bold] -"A_monitor_0 lxc2" [ style=bold color="green" fontcolor="black"] -"B_monitor_0 lxc2" -> "probe_complete lxc2" [ style = bold] -"B_monitor_0 lxc2" [ style=bold color="green" fontcolor="black"] "B_monitor_10000 lxc1" [ style=bold color="green" fontcolor="black"] "B_start_0 lxc1" -> "B_monitor_10000 lxc1" [ style = bold] "B_start_0 lxc1" [ style=bold color="green" fontcolor="black"] "B_stop_0 lxc1" -> "B_start_0 lxc1" [ style = bold] "B_stop_0 lxc1" -> "all_stopped" [ style = bold] "B_stop_0 lxc1" [ style=bold color="green" fontcolor="orange"] -"C_monitor_0 lxc1" -> "probe_complete lxc1" [ style = bold] -"C_monitor_0 lxc1" [ style=bold color="green" fontcolor="black"] -"D_monitor_0 lxc1" -> "probe_complete lxc1" [ style = bold] -"D_monitor_0 lxc1" [ style=bold color="green" fontcolor="black"] -"D_monitor_0 lxc2" -> "probe_complete lxc2" [ style = bold] -"D_monitor_0 lxc2" [ style=bold color="green" fontcolor="black"] "M-clone_running_0" [ style=bold color="green" fontcolor="orange"] "M-clone_start_0" -> "M-clone_running_0" [ style = bold] "M-clone_start_0" -> "M_start_0 lxc1" [ style = bold] "M-clone_start_0" [ style=bold color="green" fontcolor="orange"] "M-clone_stop_0" -> "M-clone_stopped_0" [ style = bold] "M-clone_stop_0" -> "M_stop_0 lxc1" [ style = bold] "M-clone_stop_0" [ style=bold color="green" fontcolor="orange"] "M-clone_stopped_0" -> "M-clone_start_0" [ style = bold] "M-clone_stopped_0" [ style=bold color="green" fontcolor="orange"] "M_monitor_10000 lxc1" [ style=bold color="green" fontcolor="black"] "M_start_0 lxc1" -> "M-clone_running_0" [ style = bold] "M_start_0 lxc1" -> "M_monitor_10000 lxc1" [ style = bold] "M_start_0 lxc1" [ style=bold color="green" fontcolor="black"] "M_stop_0 lxc1" -> "M-clone_stopped_0" [ style = bold] "M_stop_0 lxc1" -> "M_start_0 lxc1" [ style = bold] "M_stop_0 lxc1" -> "all_stopped" [ style = bold] "M_stop_0 lxc1" [ style=bold color="green" fontcolor="orange"] "all_stopped" [ style=bold color="green" fontcolor="orange"] "container1_start_0 18node2" -> "lxc1_start_0 18node2" [ style = bold] "container1_start_0 18node2" [ style=bold color="green" fontcolor="black"] "container1_stop_0 18node2" -> "B_stop_0 lxc1" [ style = bold] "container1_stop_0 18node2" -> "M_stop_0 lxc1" [ style = bold] "container1_stop_0 18node2" -> "all_stopped" [ style = bold] "container1_stop_0 18node2" -> "container1_start_0 18node2" [ style = bold] "container1_stop_0 18node2" [ style=bold color="green" fontcolor="black"] "lxc1_monitor_30000 18node2" [ style=bold color="green" fontcolor="black"] -"lxc1_start_0 18node2" -> "A_monitor_0 lxc1" [ style = bold] "lxc1_start_0 18node2" -> "B_monitor_10000 lxc1" [ style = bold] "lxc1_start_0 18node2" -> "B_start_0 lxc1" [ style = bold] -"lxc1_start_0 18node2" -> "C_monitor_0 lxc1" [ style = bold] -"lxc1_start_0 18node2" -> "D_monitor_0 lxc1" [ style = bold] "lxc1_start_0 18node2" -> "M_monitor_10000 lxc1" [ style = bold] "lxc1_start_0 18node2" -> "M_start_0 lxc1" [ style = bold] "lxc1_start_0 18node2" -> "lxc1_monitor_30000 18node2" [ style = bold] "lxc1_start_0 18node2" [ style=bold color="green" fontcolor="black"] "lxc1_stop_0 18node2" -> "all_stopped" [ style = bold] "lxc1_stop_0 18node2" -> "container1_stop_0 18node2" [ style = bold] "lxc1_stop_0 18node2" -> "lxc1_start_0 18node2" [ style = bold] "lxc1_stop_0 18node2" [ style=bold color="green" fontcolor="black"] -"probe_complete lxc1" -> "probe_complete" [ style = bold] -"probe_complete lxc1" [ style=bold color="green" fontcolor="black"] -"probe_complete lxc2" -> "probe_complete" [ style = bold] -"probe_complete lxc2" [ style=bold color="green" fontcolor="black"] -"probe_complete" -> "B_start_0 lxc1" [ style = bold] -"probe_complete" -> "B_stop_0 lxc1" [ style = bold] -"probe_complete" -> "M_stop_0 lxc1" [ style = bold] -"probe_complete" [ style=bold color="green" fontcolor="orange"] } diff --git a/pengine/test10/whitebox-fail1.exp b/pengine/test10/whitebox-fail1.exp index 6e10cf302a..d4c3711400 100644 --- a/pengine/test10/whitebox-fail1.exp +++ b/pengine/test10/whitebox-fail1.exp @@ -1,358 +1,232 @@ - + - + - + - + - + - + - + - + - + - + - - - - + - + - + - + - + - + - + - + - + - + - - - - - - - - - - - - - - - - - - - - - - - + - + - + - + - + - - - - + - + - + - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + - + - + - + - + - + - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + - - - - - - - - - - - - - - - - + - + - + - + diff --git a/pengine/test10/whitebox-fail1.summary b/pengine/test10/whitebox-fail1.summary index 6d9531877d..5e5887b4ce 100644 --- a/pengine/test10/whitebox-fail1.summary +++ b/pengine/test10/whitebox-fail1.summary @@ -1,60 +1,53 @@ Current cluster status: Online: [ 18node1 18node2 18node3 ] Containers: [ lxc2:container2 ] container1 (ocf::heartbeat:VirtualDomain): FAILED 18node2 container2 (ocf::heartbeat:VirtualDomain): Started 18node2 shoot1 (stonith:fence_xvm): Started 18node3 Clone Set: M-clone [M] Started: [ 18node1 18node2 18node3 lxc2 ] A (ocf::pacemaker:Dummy): Started 18node1 B (ocf::pacemaker:Dummy): FAILED lxc1 C (ocf::pacemaker:Dummy): Started lxc2 D (ocf::pacemaker:Dummy): Started 18node1 Transition Summary: * Recover container1 (Started 18node2) * Recover M:4 (Started lxc1) * Recover B (Started lxc1) * Restart lxc1 (Started 18node2) Executing cluster transition: * Pseudo action: M-clone_stop_0 - * Resource action: A monitor on lxc2 - * Resource action: B monitor on lxc2 - * Resource action: D monitor on lxc2 * Resource action: lxc1 stop on 18node2 * Resource action: container1 stop on 18node2 - * Resource action: container1 start on 18node2 - * Resource action: lxc1 start on 18node2 - * Resource action: lxc1 monitor=30000 on 18node2 - * Resource action: A monitor on lxc1 - * Resource action: C monitor on lxc1 - * Resource action: D monitor on lxc1 - * Pseudo action: probe_complete * Pseudo action: M_stop_0 * Pseudo action: M-clone_stopped_0 * Pseudo action: M-clone_start_0 * Pseudo action: B_stop_0 * Pseudo action: all_stopped + * Resource action: container1 start on 18node2 + * Resource action: lxc1 start on 18node2 + * Resource action: lxc1 monitor=30000 on 18node2 * Resource action: M start on lxc1 * Pseudo action: M-clone_running_0 * Resource action: B start on lxc1 * Resource action: M monitor=10000 on lxc1 * Resource action: B monitor=10000 on lxc1 Revised cluster status: Online: [ 18node1 18node2 18node3 ] Containers: [ lxc1:container1 lxc2:container2 ] container1 (ocf::heartbeat:VirtualDomain): Started 18node2 container2 (ocf::heartbeat:VirtualDomain): Started 18node2 shoot1 (stonith:fence_xvm): Started 18node3 Clone Set: M-clone [M] Started: [ 18node1 18node2 18node3 lxc1 lxc2 ] A (ocf::pacemaker:Dummy): Started 18node1 B (ocf::pacemaker:Dummy): Started lxc1 C (ocf::pacemaker:Dummy): Started lxc2 D (ocf::pacemaker:Dummy): Started 18node1 diff --git a/pengine/test10/whitebox-fail2.dot b/pengine/test10/whitebox-fail2.dot index 008ee4d1b8..b595015d26 100644 --- a/pengine/test10/whitebox-fail2.dot +++ b/pengine/test10/whitebox-fail2.dot @@ -1,67 +1,44 @@ digraph "g" { -"A_monitor_0 lxc1" -> "probe_complete lxc1" [ style = bold] -"A_monitor_0 lxc1" [ style=bold color="green" fontcolor="black"] -"A_monitor_0 lxc2" -> "probe_complete lxc2" [ style = bold] -"A_monitor_0 lxc2" [ style=bold color="green" fontcolor="black"] -"B_monitor_0 lxc2" -> "probe_complete lxc2" [ style = bold] -"B_monitor_0 lxc2" [ style=bold color="green" fontcolor="black"] "B_monitor_10000 lxc1" [ style=bold color="green" fontcolor="black"] "B_start_0 lxc1" -> "B_monitor_10000 lxc1" [ style = bold] "B_start_0 lxc1" [ style=bold color="green" fontcolor="black"] "B_stop_0 lxc1" -> "B_start_0 lxc1" [ style = bold] "B_stop_0 lxc1" -> "all_stopped" [ style = bold] "B_stop_0 lxc1" [ style=bold color="green" fontcolor="orange"] -"C_monitor_0 lxc1" -> "probe_complete lxc1" [ style = bold] -"C_monitor_0 lxc1" [ style=bold color="green" fontcolor="black"] -"D_monitor_0 lxc1" -> "probe_complete lxc1" [ style = bold] -"D_monitor_0 lxc1" [ style=bold color="green" fontcolor="black"] -"D_monitor_0 lxc2" -> "probe_complete lxc2" [ style = bold] -"D_monitor_0 lxc2" [ style=bold color="green" fontcolor="black"] "M-clone_running_0" [ style=bold color="green" fontcolor="orange"] "M-clone_start_0" -> "M-clone_running_0" [ style = bold] "M-clone_start_0" -> "M_start_0 lxc1" [ style = bold] "M-clone_start_0" [ style=bold color="green" fontcolor="orange"] "M-clone_stop_0" -> "M-clone_stopped_0" [ style = bold] "M-clone_stop_0" -> "M_stop_0 lxc1" [ style = bold] "M-clone_stop_0" [ style=bold color="green" fontcolor="orange"] "M-clone_stopped_0" -> "M-clone_start_0" [ style = bold] "M-clone_stopped_0" [ style=bold color="green" fontcolor="orange"] "M_monitor_10000 lxc1" [ style=bold color="green" fontcolor="black"] "M_start_0 lxc1" -> "M-clone_running_0" [ style = bold] "M_start_0 lxc1" -> "M_monitor_10000 lxc1" [ style = bold] "M_start_0 lxc1" [ style=bold color="green" fontcolor="black"] "M_stop_0 lxc1" -> "M-clone_stopped_0" [ style = bold] "M_stop_0 lxc1" -> "M_start_0 lxc1" [ style = bold] "M_stop_0 lxc1" -> "all_stopped" [ style = bold] "M_stop_0 lxc1" [ style=bold color="green" fontcolor="orange"] "all_stopped" [ style=bold color="green" fontcolor="orange"] "container1_start_0 18node2" -> "lxc1_start_0 18node2" [ style = bold] "container1_start_0 18node2" [ style=bold color="green" fontcolor="black"] "container1_stop_0 18node2" -> "B_stop_0 lxc1" [ style = bold] "container1_stop_0 18node2" -> "M_stop_0 lxc1" [ style = bold] "container1_stop_0 18node2" -> "all_stopped" [ style = bold] "container1_stop_0 18node2" -> "container1_start_0 18node2" [ style = bold] "container1_stop_0 18node2" [ style=bold color="green" fontcolor="black"] "lxc1_monitor_30000 18node2" [ style=bold color="green" fontcolor="black"] -"lxc1_start_0 18node2" -> "A_monitor_0 lxc1" [ style = bold] "lxc1_start_0 18node2" -> "B_monitor_10000 lxc1" [ style = bold] "lxc1_start_0 18node2" -> "B_start_0 lxc1" [ style = bold] -"lxc1_start_0 18node2" -> "C_monitor_0 lxc1" [ style = bold] -"lxc1_start_0 18node2" -> "D_monitor_0 lxc1" [ style = bold] "lxc1_start_0 18node2" -> "M_monitor_10000 lxc1" [ style = bold] "lxc1_start_0 18node2" -> "M_start_0 lxc1" [ style = bold] "lxc1_start_0 18node2" -> "lxc1_monitor_30000 18node2" [ style = bold] "lxc1_start_0 18node2" [ style=bold color="green" fontcolor="black"] "lxc1_stop_0 18node2" -> "all_stopped" [ style = bold] "lxc1_stop_0 18node2" -> "container1_stop_0 18node2" [ style = bold] "lxc1_stop_0 18node2" -> "lxc1_start_0 18node2" [ style = bold] "lxc1_stop_0 18node2" [ style=bold color="green" fontcolor="black"] -"probe_complete lxc1" -> "probe_complete" [ style = bold] -"probe_complete lxc1" [ style=bold color="green" fontcolor="black"] -"probe_complete lxc2" -> "probe_complete" [ style = bold] -"probe_complete lxc2" [ style=bold color="green" fontcolor="black"] -"probe_complete" -> "B_start_0 lxc1" [ style = bold] -"probe_complete" -> "B_stop_0 lxc1" [ style = bold] -"probe_complete" -> "M_stop_0 lxc1" [ style = bold] -"probe_complete" [ style=bold color="green" fontcolor="orange"] } diff --git a/pengine/test10/whitebox-fail2.exp b/pengine/test10/whitebox-fail2.exp index 6e10cf302a..d4c3711400 100644 --- a/pengine/test10/whitebox-fail2.exp +++ b/pengine/test10/whitebox-fail2.exp @@ -1,358 +1,232 @@ - + - + - + - + - + - + - + - + - + - + - - - - + - + - + - + - + - + - + - + - + - + - - - - - - - - - - - - - - - - - - - - - - - + - + - + - + - + - - - - + - + - + - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + - + - + - + - + - + - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + - - - - - - - - - - - - - - - - + - + - + - + diff --git a/pengine/test10/whitebox-fail2.summary b/pengine/test10/whitebox-fail2.summary index 6b3bc9b6d7..338173dbe2 100644 --- a/pengine/test10/whitebox-fail2.summary +++ b/pengine/test10/whitebox-fail2.summary @@ -1,60 +1,53 @@ Current cluster status: Online: [ 18node1 18node2 18node3 ] Containers: [ lxc2:container2 ] container1 (ocf::heartbeat:VirtualDomain): FAILED 18node2 container2 (ocf::heartbeat:VirtualDomain): Started 18node2 shoot1 (stonith:fence_xvm): Started 18node3 Clone Set: M-clone [M] Started: [ 18node1 18node2 18node3 lxc2 ] A (ocf::pacemaker:Dummy): Started 18node1 B (ocf::pacemaker:Dummy): FAILED lxc1 C (ocf::pacemaker:Dummy): Started lxc2 D (ocf::pacemaker:Dummy): Started 18node1 Transition Summary: * Recover container1 (Started 18node2) * Recover M:4 (Started lxc1) * Recover B (Started lxc1) * Recover lxc1 (Started 18node2) Executing cluster transition: * Pseudo action: M-clone_stop_0 - * Resource action: A monitor on lxc2 - * Resource action: B monitor on lxc2 - * Resource action: D monitor on lxc2 * Resource action: lxc1 stop on 18node2 * Resource action: container1 stop on 18node2 - * Resource action: container1 start on 18node2 - * Resource action: lxc1 start on 18node2 - * Resource action: lxc1 monitor=30000 on 18node2 - * Resource action: A monitor on lxc1 - * Resource action: C monitor on lxc1 - * Resource action: D monitor on lxc1 - * Pseudo action: probe_complete * Pseudo action: M_stop_0 * Pseudo action: M-clone_stopped_0 * Pseudo action: M-clone_start_0 * Pseudo action: B_stop_0 * Pseudo action: all_stopped + * Resource action: container1 start on 18node2 + * Resource action: lxc1 start on 18node2 + * Resource action: lxc1 monitor=30000 on 18node2 * Resource action: M start on lxc1 * Pseudo action: M-clone_running_0 * Resource action: B start on lxc1 * Resource action: M monitor=10000 on lxc1 * Resource action: B monitor=10000 on lxc1 Revised cluster status: Online: [ 18node1 18node2 18node3 ] Containers: [ lxc1:container1 lxc2:container2 ] container1 (ocf::heartbeat:VirtualDomain): Started 18node2 container2 (ocf::heartbeat:VirtualDomain): Started 18node2 shoot1 (stonith:fence_xvm): Started 18node3 Clone Set: M-clone [M] Started: [ 18node1 18node2 18node3 lxc1 lxc2 ] A (ocf::pacemaker:Dummy): Started 18node1 B (ocf::pacemaker:Dummy): Started lxc1 C (ocf::pacemaker:Dummy): Started lxc2 D (ocf::pacemaker:Dummy): Started 18node1 diff --git a/pengine/test10/whitebox-fail3.dot b/pengine/test10/whitebox-fail3.dot index 8b0b77ebc6..123acd0ef0 100644 --- a/pengine/test10/whitebox-fail3.dot +++ b/pengine/test10/whitebox-fail3.dot @@ -1,49 +1,33 @@ digraph "g" { "18builder_monitor_30000 dvossel-laptop2" [ style=bold color="green" fontcolor="black"] "18builder_start_0 dvossel-laptop2" -> "18builder_monitor_30000 dvossel-laptop2" [ style = bold] -"18builder_start_0 dvossel-laptop2" -> "FAKE_monitor_0 18builder" [ style = bold] "18builder_start_0 dvossel-laptop2" -> "FAKE_start_0 18builder" [ style = bold] -"18builder_start_0 dvossel-laptop2" -> "W:1_monitor_0 18builder" [ style = bold] "18builder_start_0 dvossel-laptop2" -> "W:1_monitor_10000 18builder" [ style = bold] "18builder_start_0 dvossel-laptop2" -> "W:1_start_0 18builder" [ style = bold] -"18builder_start_0 dvossel-laptop2" -> "X:1_monitor_0 18builder" [ style = bold] "18builder_start_0 dvossel-laptop2" -> "X:1_monitor_10000 18builder" [ style = bold] "18builder_start_0 dvossel-laptop2" -> "X:1_start_0 18builder" [ style = bold] "18builder_start_0 dvossel-laptop2" [ style=bold color="green" fontcolor="black"] -"FAKE_monitor_0 18builder" -> "probe_complete 18builder" [ style = bold] -"FAKE_monitor_0 18builder" [ style=bold color="green" fontcolor="black"] "FAKE_start_0 18builder" [ style=bold color="green" fontcolor="black"] "FAKE_stop_0 dvossel-laptop2" -> "FAKE_start_0 18builder" [ style = bold] "FAKE_stop_0 dvossel-laptop2" -> "all_stopped" [ style = bold] "FAKE_stop_0 dvossel-laptop2" [ style=bold color="green" fontcolor="black"] "W-master_running_0" [ style=bold color="green" fontcolor="orange"] "W-master_start_0" -> "W-master_running_0" [ style = bold] "W-master_start_0" -> "W:1_start_0 18builder" [ style = bold] "W-master_start_0" [ style=bold color="green" fontcolor="orange"] -"W:1_monitor_0 18builder" -> "probe_complete 18builder" [ style = bold] -"W:1_monitor_0 18builder" [ style=bold color="green" fontcolor="black"] "W:1_monitor_10000 18builder" [ style=bold color="green" fontcolor="black"] "W:1_start_0 18builder" -> "W-master_running_0" [ style = bold] "W:1_start_0 18builder" -> "W:1_monitor_10000 18builder" [ style = bold] "W:1_start_0 18builder" [ style=bold color="green" fontcolor="black"] "X-master_running_0" [ style=bold color="green" fontcolor="orange"] "X-master_start_0" -> "X-master_running_0" [ style = bold] "X-master_start_0" -> "X:1_start_0 18builder" [ style = bold] "X-master_start_0" [ style=bold color="green" fontcolor="orange"] -"X:1_monitor_0 18builder" -> "probe_complete 18builder" [ style = bold] -"X:1_monitor_0 18builder" [ style=bold color="green" fontcolor="black"] "X:1_monitor_10000 18builder" [ style=bold color="green" fontcolor="black"] "X:1_start_0 18builder" -> "X-master_running_0" [ style = bold] "X:1_start_0 18builder" -> "X:1_monitor_10000 18builder" [ style = bold] "X:1_start_0 18builder" [ style=bold color="green" fontcolor="black"] "all_stopped" [ style=bold color="green" fontcolor="orange"] -"probe_complete 18builder" -> "probe_complete" [ style = bold] -"probe_complete 18builder" [ style=bold color="green" fontcolor="black"] -"probe_complete" -> "FAKE_start_0 18builder" [ style = bold] -"probe_complete" -> "FAKE_stop_0 dvossel-laptop2" [ style = bold] -"probe_complete" -> "W:1_start_0 18builder" [ style = bold] -"probe_complete" -> "X:1_start_0 18builder" [ style = bold] -"probe_complete" [ style=bold color="green" fontcolor="orange"] "vm_start_0 dvossel-laptop2" -> "18builder_start_0 dvossel-laptop2" [ style = bold] "vm_start_0 dvossel-laptop2" [ style=bold color="green" fontcolor="black"] } diff --git a/pengine/test10/whitebox-fail3.exp b/pengine/test10/whitebox-fail3.exp index 4035f952cb..0f7c43b16a 100644 --- a/pengine/test10/whitebox-fail3.exp +++ b/pengine/test10/whitebox-fail3.exp @@ -1,267 +1,185 @@ - + - + - + - - - - + - + - - - - - + - - - - - - - - - - - - - - + - + - + - + - + - + - - - - - - - - - - - - - - - - - + - + - + - + - + - + - + - + - + - + - + - + - + - - - - + - + - - - - - - - - - - - - - - + - + - + - + - + - + - + - + - + - + - + - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + - + diff --git a/pengine/test10/whitebox-fail3.summary b/pengine/test10/whitebox-fail3.summary index ec9b2ab000..7acf1b6c8d 100644 --- a/pengine/test10/whitebox-fail3.summary +++ b/pengine/test10/whitebox-fail3.summary @@ -1,57 +1,53 @@ Current cluster status: Online: [ dvossel-laptop2 ] vm (ocf::heartbeat:VirtualDomain): Stopped vm2 (ocf::heartbeat:VirtualDomain): Stopped FAKE (ocf::pacemaker:Dummy): Started dvossel-laptop2 Master/Slave Set: W-master [W] Masters: [ dvossel-laptop2 ] Stopped: [ 18builder 18node1 ] Master/Slave Set: X-master [X] Masters: [ dvossel-laptop2 ] Stopped: [ 18builder 18node1 ] Transition Summary: * Start vm (dvossel-laptop2) * Move FAKE (Started dvossel-laptop2 -> 18builder) * Start W:1 (18builder) * Start X:1 (18builder) * Start 18builder (dvossel-laptop2) Executing cluster transition: * Resource action: vm start on dvossel-laptop2 + * Resource action: FAKE stop on dvossel-laptop2 * Pseudo action: W-master_start_0 * Pseudo action: X-master_start_0 * Resource action: 18builder start on dvossel-laptop2 - * Resource action: FAKE monitor on 18builder - * Resource action: W monitor on 18builder - * Resource action: X monitor on 18builder - * Resource action: 18builder monitor=30000 on dvossel-laptop2 - * Pseudo action: probe_complete - * Resource action: FAKE stop on dvossel-laptop2 + * Pseudo action: all_stopped + * Resource action: FAKE start on 18builder * Resource action: W start on 18builder * Pseudo action: W-master_running_0 * Resource action: X start on 18builder * Pseudo action: X-master_running_0 - * Pseudo action: all_stopped - * Resource action: FAKE start on 18builder + * Resource action: 18builder monitor=30000 on dvossel-laptop2 * Resource action: W monitor=10000 on 18builder * Resource action: X monitor=10000 on 18builder Revised cluster status: Online: [ dvossel-laptop2 ] Containers: [ 18builder:vm ] vm (ocf::heartbeat:VirtualDomain): Started dvossel-laptop2 vm2 (ocf::heartbeat:VirtualDomain): Stopped FAKE (ocf::pacemaker:Dummy): Started 18builder Master/Slave Set: W-master [W] Masters: [ dvossel-laptop2 ] Slaves: [ 18builder ] Stopped: [ 18node1 ] Master/Slave Set: X-master [X] Masters: [ dvossel-laptop2 ] Slaves: [ 18builder ] Stopped: [ 18node1 ] diff --git a/pengine/test10/whitebox-move.dot b/pengine/test10/whitebox-move.dot index 762a0fe1c1..7368b1e005 100644 --- a/pengine/test10/whitebox-move.dot +++ b/pengine/test10/whitebox-move.dot @@ -1,54 +1,44 @@ digraph "g" { -"A_monitor_0 lxc2" -> "probe_complete lxc2" [ style = bold] -"A_monitor_0 lxc2" [ style=bold color="green" fontcolor="black"] "A_monitor_10000 lxc1" [ style=bold color="green" fontcolor="black"] "A_start_0 lxc1" -> "A_monitor_10000 lxc1" [ style = bold] "A_start_0 lxc1" [ style=bold color="green" fontcolor="black"] "A_stop_0 lxc1" -> "A_start_0 lxc1" [ style = bold] "A_stop_0 lxc1" -> "all_stopped" [ style = bold] "A_stop_0 lxc1" -> "lxc1_stop_0 18node1" [ style = bold] "A_stop_0 lxc1" [ style=bold color="green" fontcolor="black"] "M-clone_running_0" [ style=bold color="green" fontcolor="orange"] "M-clone_start_0" -> "M-clone_running_0" [ style = bold] "M-clone_start_0" -> "M_start_0 lxc1" [ style = bold] "M-clone_start_0" [ style=bold color="green" fontcolor="orange"] "M-clone_stop_0" -> "M-clone_stopped_0" [ style = bold] "M-clone_stop_0" -> "M_stop_0 lxc1" [ style = bold] "M-clone_stop_0" [ style=bold color="green" fontcolor="orange"] "M-clone_stopped_0" -> "M-clone_start_0" [ style = bold] "M-clone_stopped_0" [ style=bold color="green" fontcolor="orange"] "M_monitor_10000 lxc1" [ style=bold color="green" fontcolor="black"] "M_start_0 lxc1" -> "M-clone_running_0" [ style = bold] "M_start_0 lxc1" -> "M_monitor_10000 lxc1" [ style = bold] "M_start_0 lxc1" [ style=bold color="green" fontcolor="black"] "M_stop_0 lxc1" -> "M-clone_stopped_0" [ style = bold] "M_stop_0 lxc1" -> "M_start_0 lxc1" [ style = bold] "M_stop_0 lxc1" -> "all_stopped" [ style = bold] "M_stop_0 lxc1" -> "lxc1_stop_0 18node1" [ style = bold] "M_stop_0 lxc1" [ style=bold color="green" fontcolor="black"] "all_stopped" [ style=bold color="green" fontcolor="orange"] "container1_start_0 18node2" -> "lxc1_start_0 18node2" [ style = bold] "container1_start_0 18node2" [ style=bold color="green" fontcolor="black"] "container1_stop_0 18node1" -> "all_stopped" [ style = bold] "container1_stop_0 18node1" -> "container1_start_0 18node2" [ style = bold] "container1_stop_0 18node1" [ style=bold color="green" fontcolor="black"] "lxc1_monitor_30000 18node2" [ style=bold color="green" fontcolor="black"] "lxc1_start_0 18node2" -> "A_monitor_10000 lxc1" [ style = bold] "lxc1_start_0 18node2" -> "A_start_0 lxc1" [ style = bold] "lxc1_start_0 18node2" -> "M_monitor_10000 lxc1" [ style = bold] "lxc1_start_0 18node2" -> "M_start_0 lxc1" [ style = bold] "lxc1_start_0 18node2" -> "lxc1_monitor_30000 18node2" [ style = bold] "lxc1_start_0 18node2" [ style=bold color="green" fontcolor="black"] "lxc1_stop_0 18node1" -> "all_stopped" [ style = bold] "lxc1_stop_0 18node1" -> "container1_stop_0 18node1" [ style = bold] "lxc1_stop_0 18node1" -> "lxc1_start_0 18node2" [ style = bold] "lxc1_stop_0 18node1" [ style=bold color="green" fontcolor="black"] -"probe_complete lxc1" -> "probe_complete" [ style = bold] -"probe_complete lxc1" [ style=bold color="green" fontcolor="black"] -"probe_complete lxc2" -> "probe_complete" [ style = bold] -"probe_complete lxc2" [ style=bold color="green" fontcolor="black"] -"probe_complete" -> "A_start_0 lxc1" [ style = bold] -"probe_complete" -> "A_stop_0 lxc1" [ style = bold] -"probe_complete" -> "M_stop_0 lxc1" [ style = bold] -"probe_complete" [ style=bold color="green" fontcolor="orange"] } diff --git a/pengine/test10/whitebox-move.exp b/pengine/test10/whitebox-move.exp index f54d5b8305..5a9fea622f 100644 --- a/pengine/test10/whitebox-move.exp +++ b/pengine/test10/whitebox-move.exp @@ -1,288 +1,234 @@ - + - + - + - + - + - + - + - + - + - - - - + - + - + - + - + - + - + - + - + - + - + - + - + - - - - + - + - + - - - - - - - - - - - - - - + - + - + - + - + - + - + - + - + - + - + - + - + - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + - + - + - + - + diff --git a/pengine/test10/whitebox-move.summary b/pengine/test10/whitebox-move.summary index 33b31549b2..3422ac7890 100644 --- a/pengine/test10/whitebox-move.summary +++ b/pengine/test10/whitebox-move.summary @@ -1,49 +1,47 @@ Current cluster status: Online: [ 18node1 18node2 18node3 ] Containers: [ lxc1:container1 lxc2:container2 ] container1 (ocf::heartbeat:VirtualDomain): Started 18node1 container2 (ocf::heartbeat:VirtualDomain): Started 18node2 shoot1 (stonith:fence_xvm): Started 18node3 Clone Set: M-clone [M] Started: [ 18node1 18node2 18node3 lxc1 lxc2 ] A (ocf::pacemaker:Dummy): Started lxc1 Transition Summary: * Move container1 (Started 18node1 -> 18node2) * Restart M:3 (Started lxc1) * Restart A (Started lxc1) * Move lxc1 (Started 18node1 -> 18node2) Executing cluster transition: * Pseudo action: M-clone_stop_0 - * Resource action: A monitor on lxc2 - * Pseudo action: probe_complete + * Resource action: A stop on lxc1 * Resource action: M stop on lxc1 * Pseudo action: M-clone_stopped_0 * Pseudo action: M-clone_start_0 - * Resource action: A stop on lxc1 * Resource action: lxc1 stop on 18node1 * Resource action: container1 stop on 18node1 * Pseudo action: all_stopped * Resource action: container1 start on 18node2 * Resource action: lxc1 start on 18node2 * Resource action: M start on lxc1 * Resource action: M monitor=10000 on lxc1 * Pseudo action: M-clone_running_0 * Resource action: A start on lxc1 * Resource action: A monitor=10000 on lxc1 * Resource action: lxc1 monitor=30000 on 18node2 Revised cluster status: Online: [ 18node1 18node2 18node3 ] Containers: [ lxc1:container1 lxc2:container2 ] container1 (ocf::heartbeat:VirtualDomain): Started 18node2 container2 (ocf::heartbeat:VirtualDomain): Started 18node2 shoot1 (stonith:fence_xvm): Started 18node3 Clone Set: M-clone [M] Started: [ 18node1 18node2 18node3 lxc1 lxc2 ] A (ocf::pacemaker:Dummy): Started lxc1 diff --git a/pengine/test10/whitebox-ms-ordering.dot b/pengine/test10/whitebox-ms-ordering.dot index 8cd4798543..74401309e4 100644 --- a/pengine/test10/whitebox-ms-ordering.dot +++ b/pengine/test10/whitebox-ms-ordering.dot @@ -1,98 +1,94 @@ digraph "g" { "all_stopped" [ style=bold color="green" fontcolor="orange"] "container1_monitor_0 18node1" -> "probe_complete 18node1" [ style = bold] "container1_monitor_0 18node1" [ style=bold color="green" fontcolor="black"] "container1_monitor_0 18node2" -> "probe_complete 18node2" [ style = bold] "container1_monitor_0 18node2" [ style=bold color="green" fontcolor="black"] "container1_monitor_0 18node3" -> "probe_complete 18node3" [ style = bold] "container1_monitor_0 18node3" [ style=bold color="green" fontcolor="black"] "container1_start_0 18node1" -> "lxc1_start_0 18node1" [ style = bold] "container1_start_0 18node1" [ style=bold color="green" fontcolor="black"] "container2_monitor_0 18node1" -> "probe_complete 18node1" [ style = bold] "container2_monitor_0 18node1" [ style=bold color="green" fontcolor="black"] "container2_monitor_0 18node2" -> "probe_complete 18node2" [ style = bold] "container2_monitor_0 18node2" [ style=bold color="green" fontcolor="black"] "container2_monitor_0 18node3" -> "probe_complete 18node3" [ style = bold] "container2_monitor_0 18node3" [ style=bold color="green" fontcolor="black"] "container2_start_0 18node1" -> "lxc2_start_0 18node1" [ style = bold] "container2_start_0 18node1" [ style=bold color="green" fontcolor="black"] "lxc-ms-master_demote_0" -> "lxc-ms-master_demoted_0" [ style = bold] "lxc-ms-master_demote_0" -> "lxc-ms_demote_0 lxc1" [ style = bold] "lxc-ms-master_demote_0" [ style=bold color="green" fontcolor="orange"] "lxc-ms-master_demoted_0" -> "lxc-ms-master_promote_0" [ style = bold] "lxc-ms-master_demoted_0" -> "lxc-ms-master_start_0" [ style = bold] "lxc-ms-master_demoted_0" -> "lxc-ms-master_stop_0" [ style = bold] "lxc-ms-master_demoted_0" [ style=bold color="green" fontcolor="orange"] "lxc-ms-master_promote_0" -> "lxc-ms_promote_0 lxc1" [ style = bold] "lxc-ms-master_promote_0" [ style=bold color="green" fontcolor="orange"] "lxc-ms-master_promoted_0" [ style=bold color="green" fontcolor="orange"] "lxc-ms-master_running_0" -> "lxc-ms-master_promote_0" [ style = bold] "lxc-ms-master_running_0" [ style=bold color="green" fontcolor="orange"] "lxc-ms-master_start_0" -> "lxc-ms-master_running_0" [ style = bold] "lxc-ms-master_start_0" -> "lxc-ms_start_0 lxc1" [ style = bold] "lxc-ms-master_start_0" -> "lxc-ms_start_0 lxc2" [ style = bold] "lxc-ms-master_start_0" [ style=bold color="green" fontcolor="orange"] "lxc-ms-master_stop_0" -> "lxc-ms-master_stopped_0" [ style = bold] "lxc-ms-master_stop_0" -> "lxc-ms_stop_0 lxc1" [ style = bold] "lxc-ms-master_stop_0" -> "lxc-ms_stop_0 lxc2" [ style = bold] "lxc-ms-master_stop_0" [ style=bold color="green" fontcolor="orange"] "lxc-ms-master_stopped_0" -> "lxc-ms-master_promote_0" [ style = bold] "lxc-ms-master_stopped_0" -> "lxc-ms-master_start_0" [ style = bold] "lxc-ms-master_stopped_0" [ style=bold color="green" fontcolor="orange"] "lxc-ms_demote_0 lxc1" -> "lxc-ms-master_demoted_0" [ style = bold] "lxc-ms_demote_0 lxc1" -> "lxc-ms_promote_0 lxc1" [ style = bold] "lxc-ms_demote_0 lxc1" -> "lxc-ms_stop_0 lxc1" [ style = bold] "lxc-ms_demote_0 lxc1" [ style=bold color="green" fontcolor="black"] "lxc-ms_monitor_0 18node1" -> "probe_complete 18node1" [ style = bold] "lxc-ms_monitor_0 18node1" [ style=bold color="green" fontcolor="black"] "lxc-ms_monitor_0 18node2" -> "probe_complete 18node2" [ style = bold] "lxc-ms_monitor_0 18node2" [ style=bold color="green" fontcolor="black"] "lxc-ms_monitor_0 18node3" -> "probe_complete 18node3" [ style = bold] "lxc-ms_monitor_0 18node3" [ style=bold color="green" fontcolor="black"] "lxc-ms_monitor_10000 lxc2" [ style=bold color="green" fontcolor="black"] "lxc-ms_promote_0 lxc1" -> "lxc-ms-master_promoted_0" [ style = bold] "lxc-ms_promote_0 lxc1" [ style=bold color="green" fontcolor="black"] "lxc-ms_start_0 lxc1" -> "lxc-ms-master_running_0" [ style = bold] "lxc-ms_start_0 lxc1" -> "lxc-ms_promote_0 lxc1" [ style = bold] "lxc-ms_start_0 lxc1" [ style=bold color="green" fontcolor="black"] "lxc-ms_start_0 lxc2" -> "lxc-ms-master_running_0" [ style = bold] "lxc-ms_start_0 lxc2" -> "lxc-ms_monitor_10000 lxc2" [ style = bold] "lxc-ms_start_0 lxc2" [ style=bold color="green" fontcolor="black"] "lxc-ms_stop_0 lxc1" -> "all_stopped" [ style = bold] "lxc-ms_stop_0 lxc1" -> "lxc-ms-master_stopped_0" [ style = bold] "lxc-ms_stop_0 lxc1" -> "lxc-ms_start_0 lxc1" [ style = bold] "lxc-ms_stop_0 lxc1" [ style=bold color="green" fontcolor="orange"] "lxc-ms_stop_0 lxc2" -> "all_stopped" [ style = bold] "lxc-ms_stop_0 lxc2" -> "lxc-ms-master_stopped_0" [ style = bold] "lxc-ms_stop_0 lxc2" -> "lxc-ms_start_0 lxc2" [ style = bold] "lxc-ms_stop_0 lxc2" [ style=bold color="green" fontcolor="orange"] "lxc1_monitor_30000 18node1" [ style=bold color="green" fontcolor="black"] "lxc1_start_0 18node1" -> "lxc-ms_demote_0 lxc1" [ style = bold] "lxc1_start_0 18node1" -> "lxc-ms_promote_0 lxc1" [ style = bold] "lxc1_start_0 18node1" -> "lxc-ms_start_0 lxc1" [ style = bold] "lxc1_start_0 18node1" -> "lxc1_monitor_30000 18node1" [ style = bold] "lxc1_start_0 18node1" [ style=bold color="green" fontcolor="black"] "lxc2_monitor_30000 18node1" [ style=bold color="green" fontcolor="black"] "lxc2_start_0 18node1" -> "lxc-ms_monitor_10000 lxc2" [ style = bold] "lxc2_start_0 18node1" -> "lxc-ms_start_0 lxc2" [ style = bold] "lxc2_start_0 18node1" -> "lxc2_monitor_30000 18node1" [ style = bold] "lxc2_start_0 18node1" [ style=bold color="green" fontcolor="black"] "probe_complete 18node1" -> "probe_nodes_complete" [ style = bold] "probe_complete 18node1" [ style=bold color="green" fontcolor="black"] "probe_complete 18node2" -> "probe_nodes_complete" [ style = bold] "probe_complete 18node2" [ style=bold color="green" fontcolor="black"] "probe_complete 18node3" -> "probe_nodes_complete" [ style = bold] "probe_complete 18node3" [ style=bold color="green" fontcolor="black"] -"probe_complete lxc1" -> "probe_complete" [ style = bold] -"probe_complete lxc1" [ style=bold color="green" fontcolor="black"] -"probe_complete lxc2" -> "probe_complete" [ style = bold] -"probe_complete lxc2" [ style=bold color="green" fontcolor="black"] "probe_complete" -> "lxc-ms_start_0 lxc1" [ style = bold] "probe_complete" -> "lxc-ms_start_0 lxc2" [ style = bold] "probe_complete" -> "lxc-ms_stop_0 lxc1" [ style = bold] "probe_complete" -> "lxc-ms_stop_0 lxc2" [ style = bold] "probe_complete" [ style=bold color="green" fontcolor="orange"] "probe_nodes_complete" -> "container1_start_0 18node1" [ style = bold] "probe_nodes_complete" -> "container2_start_0 18node1" [ style = bold] "probe_nodes_complete" [ style=bold color="green" fontcolor="orange"] } diff --git a/pengine/test10/whitebox-ms-ordering.exp b/pengine/test10/whitebox-ms-ordering.exp index bbb667184c..f8915297f6 100644 --- a/pengine/test10/whitebox-ms-ordering.exp +++ b/pengine/test10/whitebox-ms-ordering.exp @@ -1,526 +1,503 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - - - - - - - - - - - - - - - - - + - + - + - + - - - - - - - - + - + - + - + diff --git a/pengine/test10/whitebox-orphaned.dot b/pengine/test10/whitebox-orphaned.dot index 0f30e7c24d..289a8f6dc1 100644 --- a/pengine/test10/whitebox-orphaned.dot +++ b/pengine/test10/whitebox-orphaned.dot @@ -1,49 +1,37 @@ digraph "g" { -"A_monitor_0 lxc2" -> "probe_complete lxc2" [ style = bold] -"A_monitor_0 lxc2" [ style=bold color="green" fontcolor="black"] -"B_monitor_0 lxc2" -> "probe_complete lxc2" [ style = bold] -"B_monitor_0 lxc2" [ style=bold color="green" fontcolor="black"] "B_monitor_10000 lxc2" [ style=bold color="green" fontcolor="black"] "B_start_0 lxc2" -> "B_monitor_10000 lxc2" [ style = bold] "B_start_0 lxc2" [ style=bold color="green" fontcolor="black"] "B_stop_0 lxc1" -> "B_start_0 lxc2" [ style = bold] "B_stop_0 lxc1" -> "all_stopped" [ style = bold] "B_stop_0 lxc1" -> "lxc1_stop_0 18node2" [ style = bold] "B_stop_0 lxc1" [ style=bold color="green" fontcolor="black"] -"D_monitor_0 lxc2" -> "probe_complete lxc2" [ style = bold] -"D_monitor_0 lxc2" [ style=bold color="green" fontcolor="black"] "M-clone_stop_0" -> "M-clone_stopped_0" [ style = bold] "M-clone_stop_0" -> "M_stop_0 lxc1" [ style = bold] "M-clone_stop_0" [ style=bold color="green" fontcolor="orange"] "M-clone_stopped_0" [ style=bold color="green" fontcolor="orange"] "M_stop_0 lxc1" -> "M-clone_stopped_0" [ style = bold] "M_stop_0 lxc1" -> "all_stopped" [ style = bold] "M_stop_0 lxc1" -> "lxc1_stop_0 18node2" [ style = bold] "M_stop_0 lxc1" [ style=bold color="green" fontcolor="black"] "all_stopped" [ style=bold color="green" fontcolor="orange"] -"container1_clear_failcount 18node2" -> "probe_complete" [ style = bold] "container1_clear_failcount 18node2" [ style=bold color="green" fontcolor="black"] "container1_delete_0 18node1" [ style=bold color="green" fontcolor="black"] "container1_delete_0 18node2" [ style=bold color="green" fontcolor="black"] "container1_delete_0 18node3" [ style=bold color="green" fontcolor="black"] "container1_stop_0 18node2" -> "all_stopped" [ style = bold] "container1_stop_0 18node2" -> "container1_delete_0 18node1" [ style = bold] "container1_stop_0 18node2" -> "container1_delete_0 18node2" [ style = bold] "container1_stop_0 18node2" -> "container1_delete_0 18node3" [ style = bold] "container1_stop_0 18node2" [ style=bold color="green" fontcolor="black"] -"lxc1_clear_failcount 18node2" -> "probe_complete" [ style = bold] "lxc1_clear_failcount 18node2" [ style=bold color="green" fontcolor="black"] "lxc1_delete_0 18node1" [ style=bold color="green" fontcolor="black"] "lxc1_delete_0 18node2" [ style=bold color="green" fontcolor="black"] "lxc1_delete_0 18node3" [ style=bold color="green" fontcolor="black"] "lxc1_stop_0 18node2" -> "all_stopped" [ style = bold] "lxc1_stop_0 18node2" -> "container1_stop_0 18node2" [ style = bold] "lxc1_stop_0 18node2" -> "lxc1_delete_0 18node1" [ style = bold] "lxc1_stop_0 18node2" -> "lxc1_delete_0 18node2" [ style = bold] "lxc1_stop_0 18node2" -> "lxc1_delete_0 18node3" [ style = bold] "lxc1_stop_0 18node2" [ style=bold color="green" fontcolor="black"] -"probe_complete lxc2" -> "probe_complete" [ style = bold] -"probe_complete lxc2" [ style=bold color="green" fontcolor="black"] -"probe_complete" -> "B_start_0 lxc2" [ style = bold] -"probe_complete" [ style=bold color="green" fontcolor="orange"] } diff --git a/pengine/test10/whitebox-orphaned.exp b/pengine/test10/whitebox-orphaned.exp index 30ffa5ea53..fe96a211d3 100644 --- a/pengine/test10/whitebox-orphaned.exp +++ b/pengine/test10/whitebox-orphaned.exp @@ -1,285 +1,219 @@ - + - + - + - + - + - + - - - - - - - - - - + - + - + - + - - - - + - + - + - - - - - - - - - - - - - - - - - - - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - - - - - - - - - - - - - - - - - - - + - + - - - - - - - - - - - - - - - - - - - + - + - + diff --git a/pengine/test10/whitebox-orphaned.summary b/pengine/test10/whitebox-orphaned.summary index 3484149185..7be845326a 100644 --- a/pengine/test10/whitebox-orphaned.summary +++ b/pengine/test10/whitebox-orphaned.summary @@ -1,59 +1,55 @@ Current cluster status: Online: [ 18node1 18node2 18node3 ] Containers: [ lxc1:container1 lxc2:container2 ] container2 (ocf::heartbeat:VirtualDomain): Started 18node2 shoot1 (stonith:fence_xvm): Started 18node3 Clone Set: M-clone [M] M (ocf::pacemaker:Dummy): ORPHANED Started lxc1 Started: [ 18node1 18node2 18node3 lxc2 ] A (ocf::pacemaker:Dummy): Started 18node1 B (ocf::pacemaker:Dummy): Started lxc1 C (ocf::pacemaker:Dummy): Started lxc2 D (ocf::pacemaker:Dummy): Started 18node1 container1 (ocf::heartbeat:VirtualDomain): ORPHANED Started 18node2 lxc1 (ocf::pacemaker:remote): ORPHANED Started 18node2 Transition Summary: * Stop M:4 (lxc1) * Move B (Started lxc1 -> lxc2) * Stop container1 (18node2) * Stop lxc1 (18node2) Executing cluster transition: * Pseudo action: M-clone_stop_0 - * Resource action: A monitor on lxc2 * Resource action: B stop on lxc1 - * Resource action: B monitor on lxc2 - * Resource action: D monitor on lxc2 * Cluster action: clear_failcount for container1 on 18node2 * Cluster action: clear_failcount for lxc1 on 18node2 - * Pseudo action: probe_complete * Resource action: M stop on lxc1 * Pseudo action: M-clone_stopped_0 * Resource action: B start on lxc2 * Resource action: lxc1 stop on 18node2 * Resource action: lxc1 delete on 18node3 * Resource action: lxc1 delete on 18node2 * Resource action: lxc1 delete on 18node1 * Resource action: B monitor=10000 on lxc2 * Resource action: container1 stop on 18node2 * Resource action: container1 delete on 18node3 * Resource action: container1 delete on 18node2 * Resource action: container1 delete on 18node1 * Pseudo action: all_stopped Revised cluster status: Online: [ 18node1 18node2 18node3 ] Containers: [ lxc2:container2 ] container2 (ocf::heartbeat:VirtualDomain): Started 18node2 shoot1 (stonith:fence_xvm): Started 18node3 Clone Set: M-clone [M] Started: [ 18node1 18node2 18node3 lxc2 ] A (ocf::pacemaker:Dummy): Started 18node1 B (ocf::pacemaker:Dummy): Started lxc2 C (ocf::pacemaker:Dummy): Started lxc2 D (ocf::pacemaker:Dummy): Started 18node1 diff --git a/pengine/test10/whitebox-start.dot b/pengine/test10/whitebox-start.dot index f9866021e3..bec57b0845 100644 --- a/pengine/test10/whitebox-start.dot +++ b/pengine/test10/whitebox-start.dot @@ -1,35 +1,17 @@ digraph "g" { -"A_monitor_0 lxc1" -> "probe_complete lxc1" [ style = bold] -"A_monitor_0 lxc1" [ style=bold color="green" fontcolor="black"] -"A_monitor_0 lxc2" -> "probe_complete lxc2" [ style = bold] -"A_monitor_0 lxc2" [ style=bold color="green" fontcolor="black"] -"C_monitor_0 lxc1" -> "probe_complete lxc1" [ style = bold] -"C_monitor_0 lxc1" [ style=bold color="green" fontcolor="black"] -"D_monitor_0 lxc1" -> "probe_complete lxc1" [ style = bold] -"D_monitor_0 lxc1" [ style=bold color="green" fontcolor="black"] -"D_monitor_0 lxc2" -> "probe_complete lxc2" [ style = bold] -"D_monitor_0 lxc2" [ style=bold color="green" fontcolor="black"] "M-clone_running_0" [ style=bold color="green" fontcolor="orange"] "M-clone_start_0" -> "M-clone_running_0" [ style = bold] "M-clone_start_0" -> "M_start_0 lxc1" [ style = bold] "M-clone_start_0" [ style=bold color="green" fontcolor="orange"] "M_monitor_10000 lxc1" [ style=bold color="green" fontcolor="black"] "M_start_0 lxc1" -> "M-clone_running_0" [ style = bold] "M_start_0 lxc1" -> "M_monitor_10000 lxc1" [ style = bold] "M_start_0 lxc1" [ style=bold color="green" fontcolor="black"] "container1_start_0 18node1" -> "lxc1_start_0 18node1" [ style = bold] "container1_start_0 18node1" [ style=bold color="green" fontcolor="black"] "lxc1_monitor_30000 18node1" [ style=bold color="green" fontcolor="black"] -"lxc1_start_0 18node1" -> "A_monitor_0 lxc1" [ style = bold] -"lxc1_start_0 18node1" -> "C_monitor_0 lxc1" [ style = bold] -"lxc1_start_0 18node1" -> "D_monitor_0 lxc1" [ style = bold] "lxc1_start_0 18node1" -> "M_monitor_10000 lxc1" [ style = bold] "lxc1_start_0 18node1" -> "M_start_0 lxc1" [ style = bold] "lxc1_start_0 18node1" -> "lxc1_monitor_30000 18node1" [ style = bold] "lxc1_start_0 18node1" [ style=bold color="green" fontcolor="black"] -"probe_complete lxc1" -> "probe_complete" [ style = bold] -"probe_complete lxc1" [ style=bold color="green" fontcolor="black"] -"probe_complete lxc2" -> "probe_complete" [ style = bold] -"probe_complete lxc2" [ style=bold color="green" fontcolor="black"] -"probe_complete" [ style=bold color="green" fontcolor="orange"] } diff --git a/pengine/test10/whitebox-start.exp b/pengine/test10/whitebox-start.exp index 1b8938e27a..049fc6d1ec 100644 --- a/pengine/test10/whitebox-start.exp +++ b/pengine/test10/whitebox-start.exp @@ -1,198 +1,93 @@ - + - + - + - + - + - + - + - + - + - + - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + - + - + - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + diff --git a/pengine/test10/whitebox-start.summary b/pengine/test10/whitebox-start.summary index 777fe62ea3..2801abe21f 100644 --- a/pengine/test10/whitebox-start.summary +++ b/pengine/test10/whitebox-start.summary @@ -1,50 +1,44 @@ Current cluster status: Online: [ 18node1 18node2 18node3 ] Containers: [ lxc2:container2 ] container1 (ocf::heartbeat:VirtualDomain): Stopped container2 (ocf::heartbeat:VirtualDomain): Started 18node2 shoot1 (stonith:fence_xvm): Started 18node3 Clone Set: M-clone [M] Started: [ 18node1 18node2 18node3 lxc2 ] Stopped: [ lxc1 ] A (ocf::pacemaker:Dummy): Started 18node1 B (ocf::pacemaker:Dummy): Started lxc2 C (ocf::pacemaker:Dummy): Started lxc2 D (ocf::pacemaker:Dummy): Started 18node1 Transition Summary: * Start container1 (18node1) * Start M:4 (lxc1) * Start lxc1 (18node1) Executing cluster transition: * Resource action: container1 start on 18node1 * Pseudo action: M-clone_start_0 - * Resource action: A monitor on lxc2 - * Resource action: D monitor on lxc2 * Resource action: lxc1 start on 18node1 * Resource action: M start on lxc1 * Pseudo action: M-clone_running_0 - * Resource action: A monitor on lxc1 - * Resource action: C monitor on lxc1 - * Resource action: D monitor on lxc1 * Resource action: lxc1 monitor=30000 on 18node1 - * Pseudo action: probe_complete * Resource action: M monitor=10000 on lxc1 Revised cluster status: Online: [ 18node1 18node2 18node3 ] Containers: [ lxc1:container1 lxc2:container2 ] container1 (ocf::heartbeat:VirtualDomain): Started 18node1 container2 (ocf::heartbeat:VirtualDomain): Started 18node2 shoot1 (stonith:fence_xvm): Started 18node3 Clone Set: M-clone [M] Started: [ 18node1 18node2 18node3 lxc1 lxc2 ] A (ocf::pacemaker:Dummy): Started 18node1 B (ocf::pacemaker:Dummy): Started lxc2 C (ocf::pacemaker:Dummy): Started lxc2 D (ocf::pacemaker:Dummy): Started 18node1 diff --git a/pengine/test10/whitebox-stop.dot b/pengine/test10/whitebox-stop.dot index 26dcb24112..9900483098 100644 --- a/pengine/test10/whitebox-stop.dot +++ b/pengine/test10/whitebox-stop.dot @@ -1,33 +1,23 @@ digraph "g" { -"A_monitor_0 lxc2" -> "probe_complete lxc2" [ style = bold] -"A_monitor_0 lxc2" [ style=bold color="green" fontcolor="black"] -"B_monitor_0 lxc2" -> "probe_complete lxc2" [ style = bold] -"B_monitor_0 lxc2" [ style=bold color="green" fontcolor="black"] "B_monitor_10000 lxc2" [ style=bold color="green" fontcolor="black"] "B_start_0 lxc2" -> "B_monitor_10000 lxc2" [ style = bold] "B_start_0 lxc2" [ style=bold color="green" fontcolor="black"] "B_stop_0 lxc1" -> "B_start_0 lxc2" [ style = bold] "B_stop_0 lxc1" -> "all_stopped" [ style = bold] "B_stop_0 lxc1" -> "lxc1_stop_0 18node2" [ style = bold] "B_stop_0 lxc1" [ style=bold color="green" fontcolor="black"] -"D_monitor_0 lxc2" -> "probe_complete lxc2" [ style = bold] -"D_monitor_0 lxc2" [ style=bold color="green" fontcolor="black"] "M-clone_stop_0" -> "M-clone_stopped_0" [ style = bold] "M-clone_stop_0" -> "M_stop_0 lxc1" [ style = bold] "M-clone_stop_0" [ style=bold color="green" fontcolor="orange"] "M-clone_stopped_0" [ style=bold color="green" fontcolor="orange"] "M_stop_0 lxc1" -> "M-clone_stopped_0" [ style = bold] "M_stop_0 lxc1" -> "all_stopped" [ style = bold] "M_stop_0 lxc1" -> "lxc1_stop_0 18node2" [ style = bold] "M_stop_0 lxc1" [ style=bold color="green" fontcolor="black"] "all_stopped" [ style=bold color="green" fontcolor="orange"] "container1_stop_0 18node2" -> "all_stopped" [ style = bold] "container1_stop_0 18node2" [ style=bold color="green" fontcolor="black"] "lxc1_stop_0 18node2" -> "all_stopped" [ style = bold] "lxc1_stop_0 18node2" -> "container1_stop_0 18node2" [ style = bold] "lxc1_stop_0 18node2" [ style=bold color="green" fontcolor="black"] -"probe_complete lxc2" -> "probe_complete" [ style = bold] -"probe_complete lxc2" [ style=bold color="green" fontcolor="black"] -"probe_complete" -> "B_start_0 lxc2" [ style = bold] -"probe_complete" [ style=bold color="green" fontcolor="orange"] } diff --git a/pengine/test10/whitebox-stop.exp b/pengine/test10/whitebox-stop.exp index d32fdeabf7..71f1bf0e1b 100644 --- a/pengine/test10/whitebox-stop.exp +++ b/pengine/test10/whitebox-stop.exp @@ -1,184 +1,124 @@ - + - + - + - + - + - + - + - + - - - - - - - - - - + - + - + - + - - - - + - + - + - - - - - - - - - - - - - - - - - - - + - + - + - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + - + - + - + - + diff --git a/pengine/test10/whitebox-stop.summary b/pengine/test10/whitebox-stop.summary index 8862c4b075..51357b9552 100644 --- a/pengine/test10/whitebox-stop.summary +++ b/pengine/test10/whitebox-stop.summary @@ -1,51 +1,47 @@ Current cluster status: Online: [ 18node1 18node2 18node3 ] Containers: [ lxc1:container1 lxc2:container2 ] container1 (ocf::heartbeat:VirtualDomain): Started 18node2 container2 (ocf::heartbeat:VirtualDomain): Started 18node2 shoot1 (stonith:fence_xvm): Started 18node3 Clone Set: M-clone [M] Started: [ 18node1 18node2 18node3 lxc1 lxc2 ] A (ocf::pacemaker:Dummy): Started 18node1 B (ocf::pacemaker:Dummy): Started lxc1 C (ocf::pacemaker:Dummy): Started lxc2 D (ocf::pacemaker:Dummy): Started 18node1 Transition Summary: * Stop container1 (18node2) * Stop M:4 (lxc1) * Move B (Started lxc1 -> lxc2) * Stop lxc1 (18node2) Executing cluster transition: * Pseudo action: M-clone_stop_0 - * Resource action: A monitor on lxc2 * Resource action: B stop on lxc1 - * Resource action: B monitor on lxc2 - * Resource action: D monitor on lxc2 - * Pseudo action: probe_complete * Resource action: M stop on lxc1 * Pseudo action: M-clone_stopped_0 * Resource action: B start on lxc2 * Resource action: lxc1 stop on 18node2 * Resource action: container1 stop on 18node2 * Resource action: B monitor=10000 on lxc2 * Pseudo action: all_stopped Revised cluster status: Online: [ 18node1 18node2 18node3 ] Containers: [ lxc2:container2 ] container1 (ocf::heartbeat:VirtualDomain): Stopped container2 (ocf::heartbeat:VirtualDomain): Started 18node2 shoot1 (stonith:fence_xvm): Started 18node3 Clone Set: M-clone [M] Started: [ 18node1 18node2 18node3 lxc2 ] Stopped: [ lxc1 ] A (ocf::pacemaker:Dummy): Started 18node1 B (ocf::pacemaker:Dummy): Started lxc2 C (ocf::pacemaker:Dummy): Started lxc2 D (ocf::pacemaker:Dummy): Started 18node1 diff --git a/pengine/test10/whitebox-unexpectedly-running.dot b/pengine/test10/whitebox-unexpectedly-running.dot new file mode 100644 index 0000000000..d87344a984 --- /dev/null +++ b/pengine/test10/whitebox-unexpectedly-running.dot @@ -0,0 +1,13 @@ + digraph "g" { +"FAKE_monitor_60000 18builder" [ style=bold color="green" fontcolor="black"] +"FAKE_start_0 18builder" -> "FAKE_monitor_60000 18builder" [ style = bold] +"FAKE_start_0 18builder" -> "remote1_start_0 18builder" [ style = bold] +"FAKE_start_0 18builder" [ style=bold color="green" fontcolor="black"] +"FAKE_stop_0 18builder" -> "FAKE_start_0 18builder" [ style = bold] +"FAKE_stop_0 18builder" -> "all_stopped" [ style = bold] +"FAKE_stop_0 18builder" [ style=bold color="green" fontcolor="black"] +"all_stopped" [ style=bold color="green" fontcolor="orange"] +"remote1_monitor_30000 18builder" [ style=bold color="green" fontcolor="black"] +"remote1_start_0 18builder" -> "remote1_monitor_30000 18builder" [ style = bold] +"remote1_start_0 18builder" [ style=bold color="green" fontcolor="black"] +} diff --git a/pengine/test10/whitebox-unexpectedly-running.exp b/pengine/test10/whitebox-unexpectedly-running.exp new file mode 100644 index 0000000000..69994074bf --- /dev/null +++ b/pengine/test10/whitebox-unexpectedly-running.exp @@ -0,0 +1,75 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/pengine/test10/whitebox-unexpectedly-running.scores b/pengine/test10/whitebox-unexpectedly-running.scores new file mode 100644 index 0000000000..7f8a1d91c5 --- /dev/null +++ b/pengine/test10/whitebox-unexpectedly-running.scores @@ -0,0 +1,5 @@ +Allocation scores: +native_color: FAKE allocation score on 18builder: 0 +native_color: FAKE allocation score on remote1: -INFINITY +native_color: remote1 allocation score on 18builder: 0 +native_color: remote1 allocation score on remote1: -INFINITY diff --git a/pengine/test10/whitebox-unexpectedly-running.summary b/pengine/test10/whitebox-unexpectedly-running.summary new file mode 100644 index 0000000000..f834e417a7 --- /dev/null +++ b/pengine/test10/whitebox-unexpectedly-running.summary @@ -0,0 +1,24 @@ + +Current cluster status: +Online: [ 18builder ] + + FAKE (ocf::pacemaker:Dummy): FAILED 18builder + +Transition Summary: + * Recover FAKE (Started 18builder) + * Start remote1 (18builder) + +Executing cluster transition: + * Resource action: FAKE stop on 18builder + * Pseudo action: all_stopped + * Resource action: FAKE start on 18builder + * Resource action: remote1 start on 18builder + * Resource action: FAKE monitor=60000 on 18builder + * Resource action: remote1 monitor=30000 on 18builder + +Revised cluster status: +Online: [ 18builder ] +Containers: [ remote1:FAKE ] + + FAKE (ocf::pacemaker:Dummy): Started 18builder + diff --git a/pengine/test10/whitebox-unexpectedly-running.xml b/pengine/test10/whitebox-unexpectedly-running.xml new file mode 100644 index 0000000000..4ec20c472c --- /dev/null +++ b/pengine/test10/whitebox-unexpectedly-running.xml @@ -0,0 +1,44 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +