diff --git a/include/crm/pengine/status.h b/include/crm/pengine/status.h index c1dbcb470e..ca3091362f 100644 --- a/include/crm/pengine/status.h +++ b/include/crm/pengine/status.h @@ -1,243 +1,244 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef PENGINE_STATUS__H #define PENGINE_STATUS__H #include #include #include typedef struct node_s node_t; typedef struct action_s action_t; typedef struct resource_s resource_t; typedef enum no_quorum_policy_e { no_quorum_freeze, no_quorum_stop, no_quorum_ignore, no_quorum_suicide } no_quorum_policy_t; enum node_type { node_ping, node_member }; enum pe_restart { pe_restart_restart, pe_restart_ignore }; #define pe_flag_have_quorum 0x00000001ULL #define pe_flag_symmetric_cluster 0x00000002ULL #define pe_flag_is_managed_default 0x00000004ULL #define pe_flag_maintenance_mode 0x00000008ULL #define pe_flag_stonith_enabled 0x00000010ULL #define pe_flag_have_stonith_resource 0x00000020ULL #define pe_flag_stop_rsc_orphans 0x00000100ULL #define pe_flag_stop_action_orphans 0x00000200ULL #define pe_flag_stop_everything 0x00000400ULL #define pe_flag_start_failure_fatal 0x00001000ULL #define pe_flag_remove_after_stop 0x00002000ULL typedef struct pe_working_set_s { xmlNode *input; ha_time_t *now; /* options extracted from the input */ char *dc_uuid; node_t *dc_node; const char *stonith_action; unsigned long long flags; int stonith_timeout; int default_resource_stickiness; no_quorum_policy_t no_quorum_policy; GHashTable *config_hash; GListPtr nodes; GListPtr resources; GListPtr placement_constraints; GListPtr ordering_constraints; GListPtr colocation_constraints; GListPtr actions; xmlNode *failed; xmlNode *op_defaults; xmlNode *rsc_defaults; /* stats */ int num_synapse; int max_valid_nodes; int order_id; int action_id; /* final output */ xmlNode *graph; } pe_working_set_t; struct node_shared_s { const char *id; const char *uname; gboolean online; gboolean standby; + gboolean standby_onfail; gboolean pending; gboolean unclean; gboolean shutdown; gboolean expected_up; gboolean is_dc; int num_resources; GListPtr running_rsc; /* resource_t* */ GListPtr allocated_rsc; /* resource_t* */ GHashTable *attrs; /* char* => char* */ enum node_type type; }; struct node_s { int weight; gboolean fixed; int count; struct node_shared_s *details; }; #include #define pe_rsc_orphan 0x00000001ULL #define pe_rsc_managed 0x00000002ULL #define pe_rsc_notify 0x00000010ULL #define pe_rsc_unique 0x00000020ULL #define pe_rsc_can_migrate 0x00000040ULL #define pe_rsc_provisional 0x00000100ULL #define pe_rsc_allocating 0x00000200ULL #define pe_rsc_merging 0x00000400ULL #define pe_rsc_failed 0x00010000ULL #define pe_rsc_shutdown 0x00020000ULL #define pe_rsc_runnable 0x00040000ULL #define pe_rsc_start_pending 0x00080000ULL #define pe_rsc_starting 0x00100000ULL #define pe_rsc_stopping 0x00200000ULL struct resource_s { char *id; char *clone_name; char *long_name; xmlNode *xml; xmlNode *ops_xml; resource_t *parent; void *variant_opaque; enum pe_obj_types variant; resource_object_functions_t *fns; resource_alloc_functions_t *cmds; enum rsc_recovery_type recovery_type; enum pe_restart restart_type; int priority; int stickiness; int sort_index; int failure_timeout; int effective_priority; int migration_threshold; unsigned long long flags; GListPtr rsc_cons_lhs; /* rsc_colocation_t* */ GListPtr rsc_cons; /* rsc_colocation_t* */ GListPtr rsc_location; /* rsc_to_node_t* */ GListPtr actions; /* action_t* */ node_t *allocated_to; GListPtr running_on; /* node_t* */ GListPtr known_on; /* node_t* */ GListPtr allowed_nodes; /* node_t* */ enum rsc_role_e role; enum rsc_role_e next_role; GHashTable *meta; GHashTable *parameters; GListPtr children; /* resource_t* */ }; struct action_s { int id; int priority; resource_t *rsc; void *rsc_opaque; node_t *node; char *task; char *uuid; xmlNode *op_entry; gboolean pseudo; gboolean runnable; gboolean optional; gboolean print_always; gboolean failure_is_fatal; gboolean implied_by_stonith; gboolean allow_reload_conversion; enum rsc_start_requirement needs; enum action_fail_response on_fail; enum rsc_role_e fail_role; gboolean dumped; gboolean processed; action_t *pre_notify; action_t *pre_notified; action_t *post_notify; action_t *post_notified; int seen_count; GHashTable *meta; GHashTable *extra; GHashTable *notify_keys; /* do NOT free */ GListPtr actions_before; /* action_warpper_t* */ GListPtr actions_after; /* action_warpper_t* */ }; gboolean cluster_status(pe_working_set_t *data_set); extern void set_working_set_defaults(pe_working_set_t *data_set); extern void cleanup_calculations(pe_working_set_t *data_set); extern resource_t *pe_find_resource(GListPtr rsc_list, const char *id_rh); extern node_t *pe_find_node(GListPtr node_list, const char *uname); extern node_t *pe_find_node_id(GListPtr node_list, const char *id); extern GListPtr find_operations( const char *rsc, const char *node, gboolean active_filter, pe_working_set_t *data_set); #endif diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c index ef38276a20..66e3b9917c 100644 --- a/lib/pengine/unpack.c +++ b/lib/pengine/unpack.c @@ -1,1597 +1,1598 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include /* for ONLINESTATUS */ #include #include #include #include #define set_config_flag(data_set, option, flag) do { \ const char *tmp = pe_pref(data_set->config_hash, option); \ if(tmp) { \ if(crm_is_true(tmp)) { \ set_bit_inplace(data_set->flags, flag); \ } else { \ clear_bit_inplace(data_set->flags, flag); \ } \ } \ } while(0) gboolean unpack_config(xmlNode *config, pe_working_set_t *data_set) { const char *value = NULL; GHashTable *config_hash = g_hash_table_new_full( g_str_hash,g_str_equal, g_hash_destroy_str,g_hash_destroy_str); data_set->config_hash = config_hash; unpack_instance_attributes( config, XML_CIB_TAG_PROPSET, NULL, config_hash, CIB_OPTIONS_FIRST, FALSE, data_set->now); verify_pe_options(data_set->config_hash); value = pe_pref(data_set->config_hash, "stonith-timeout"); data_set->stonith_timeout = crm_get_msec(value); crm_debug("STONITH timeout: %d", data_set->stonith_timeout); set_config_flag(data_set, "stonith-enabled", pe_flag_stonith_enabled); crm_debug("STONITH of failed nodes is %s", is_set(data_set->flags, pe_flag_stonith_enabled)?"enabled":"disabled"); data_set->stonith_action = pe_pref(data_set->config_hash, "stonith-action"); crm_debug_2("STONITH will %s nodes", data_set->stonith_action); set_config_flag(data_set, "stop-all-resources", pe_flag_stop_everything); crm_debug("Stop all active resources: %s", is_set(data_set->flags, pe_flag_stop_everything)?"true":"false"); set_config_flag(data_set, "symmetric-cluster", pe_flag_symmetric_cluster); if(is_set(data_set->flags, pe_flag_symmetric_cluster)) { crm_debug("Cluster is symmetric" " - resources can run anywhere by default"); } value = pe_pref(data_set->config_hash, "default-resource-stickiness"); data_set->default_resource_stickiness = char2score(value); crm_debug("Default stickiness: %d", data_set->default_resource_stickiness); value = pe_pref(data_set->config_hash, "no-quorum-policy"); if(safe_str_eq(value, "ignore")) { data_set->no_quorum_policy = no_quorum_ignore; } else if(safe_str_eq(value, "freeze")) { data_set->no_quorum_policy = no_quorum_freeze; } else if(safe_str_eq(value, "suicide")) { gboolean do_panic = FALSE; crm_element_value_int(data_set->input, XML_ATTR_QUORUM_PANIC, &do_panic); if(is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE){ crm_config_err("Setting no-quorum-policy=suicide makes no sense if stonith-enabled=false"); } if(do_panic && is_set(data_set->flags, pe_flag_stonith_enabled)) { data_set->no_quorum_policy = no_quorum_suicide; } else if(is_set(data_set->flags, pe_flag_have_quorum) == FALSE && do_panic == FALSE) { crm_notice("Resetting no-quorum-policy to 'stop': The cluster has never had quorum"); data_set->no_quorum_policy = no_quorum_stop; } } else { data_set->no_quorum_policy = no_quorum_stop; } switch (data_set->no_quorum_policy) { case no_quorum_freeze: crm_debug("On loss of CCM Quorum: Freeze resources"); break; case no_quorum_stop: crm_debug("On loss of CCM Quorum: Stop ALL resources"); break; case no_quorum_suicide: crm_notice("On loss of CCM Quorum: Fence all remaining nodes"); break; case no_quorum_ignore: crm_notice("On loss of CCM Quorum: Ignore"); break; } set_config_flag(data_set, "stop-orphan-resources", pe_flag_stop_rsc_orphans); crm_debug_2("Orphan resources are %s", is_set(data_set->flags, pe_flag_stop_rsc_orphans)?"stopped":"ignored"); set_config_flag(data_set, "stop-orphan-actions", pe_flag_stop_action_orphans); crm_debug_2("Orphan resource actions are %s", is_set(data_set->flags, pe_flag_stop_action_orphans)?"stopped":"ignored"); set_config_flag(data_set, "remove-after-stop", pe_flag_remove_after_stop); crm_debug_2("Stopped resources are removed from the status section: %s", is_set(data_set->flags, pe_flag_remove_after_stop)?"true":"false"); set_config_flag(data_set, "maintenance-mode", pe_flag_maintenance_mode); crm_debug_2("Maintenance mode: %s", is_set(data_set->flags, pe_flag_maintenance_mode)?"true":"false"); if(is_set(data_set->flags, pe_flag_maintenance_mode)) { clear_bit(data_set->flags, pe_flag_is_managed_default); } else { set_config_flag(data_set, "is-managed-default", pe_flag_is_managed_default); } crm_debug_2("By default resources are %smanaged", is_set(data_set->flags, pe_flag_is_managed_default)?"":"not "); set_config_flag(data_set, "start-failure-is-fatal", pe_flag_start_failure_fatal); crm_debug_2("Start failures are %s", is_set(data_set->flags, pe_flag_start_failure_fatal)?"always fatal":"handled by failcount"); return TRUE; } gboolean unpack_nodes(xmlNode * xml_nodes, pe_working_set_t *data_set) { node_t *new_node = NULL; const char *id = NULL; const char *uname = NULL; const char *type = NULL; gboolean unseen_are_unclean = TRUE; const char *blind_faith = pe_pref( data_set->config_hash, "startup-fencing"); if(crm_is_true(blind_faith) == FALSE) { unseen_are_unclean = FALSE; crm_warn("Blind faith: not fencing unseen nodes"); } xml_child_iter_filter( xml_nodes, xml_obj, XML_CIB_TAG_NODE, new_node = NULL; id = crm_element_value(xml_obj, XML_ATTR_ID); uname = crm_element_value(xml_obj, XML_ATTR_UNAME); type = crm_element_value(xml_obj, XML_ATTR_TYPE); crm_debug_3("Processing node %s/%s", uname, id); if(id == NULL) { crm_config_err("Must specify id tag in "); continue; } if(type == NULL) { crm_config_err("Must specify type tag in "); continue; } if(pe_find_node(data_set->nodes, uname) != NULL) { crm_config_warn("Detected multiple node entries with uname=%s" " - this is rarely intended", uname); } crm_malloc0(new_node, sizeof(node_t)); if(new_node == NULL) { return FALSE; } new_node->weight = 0; new_node->fixed = FALSE; crm_malloc0(new_node->details, sizeof(struct node_shared_s)); if(new_node->details == NULL) { crm_free(new_node); return FALSE; } crm_debug_3("Creaing node for entry %s/%s", uname, id); new_node->details->id = id; new_node->details->uname = uname; new_node->details->type = node_ping; new_node->details->online = FALSE; new_node->details->shutdown = FALSE; new_node->details->running_rsc = NULL; new_node->details->attrs = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); /* if(data_set->have_quorum == FALSE */ /* && data_set->no_quorum_policy == no_quorum_stop) { */ /* /\* start shutting resources down *\/ */ /* new_node->weight = -INFINITY; */ /* } */ if(is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE || unseen_are_unclean == FALSE) { /* blind faith... */ new_node->details->unclean = FALSE; } else { /* all nodes are unclean until we've seen their * status entry */ new_node->details->unclean = TRUE; } if(type == NULL || safe_str_eq(type, "member") || safe_str_eq(type, NORMALNODE)) { new_node->details->type = node_member; } add_node_attrs(xml_obj, new_node, FALSE, data_set); data_set->nodes = g_list_append(data_set->nodes, new_node); crm_debug_3("Done with node %s", crm_element_value(xml_obj, XML_ATTR_UNAME)); ); return TRUE; } gboolean unpack_resources(xmlNode * xml_resources, pe_working_set_t *data_set) { xml_child_iter( xml_resources, xml_obj, resource_t *new_rsc = NULL; crm_debug_3("Begining unpack... <%s id=%s... >", crm_element_name(xml_obj), ID(xml_obj)); if(common_unpack(xml_obj, &new_rsc, NULL, data_set)) { data_set->resources = g_list_append( data_set->resources, new_rsc); print_resource(LOG_DEBUG_3, "Added", new_rsc, FALSE); } else { crm_config_err("Failed unpacking %s %s", crm_element_name(xml_obj), crm_element_value(xml_obj, XML_ATTR_ID)); if(new_rsc != NULL && new_rsc->fns != NULL) { new_rsc->fns->free(new_rsc); } } ); data_set->resources = g_list_sort( data_set->resources, sort_rsc_priority); if(is_set(data_set->flags, pe_flag_stonith_enabled) && is_set(data_set->flags, pe_flag_have_stonith_resource) == FALSE) { crm_config_warn("No STONITH resources have been defined"); } return TRUE; } /* remove nodes that are down, stopping */ /* create +ve rsc_to_node constraints between resources and the nodes they are running on */ /* anything else? */ gboolean unpack_status(xmlNode * status, pe_working_set_t *data_set) { const char *id = NULL; const char *uname = NULL; xmlNode * lrm_rsc = NULL; xmlNode * attrs = NULL; node_t *this_node = NULL; crm_debug_3("Begining unpack"); xml_child_iter_filter( status, node_state, XML_CIB_TAG_STATE, id = crm_element_value(node_state, XML_ATTR_ID); uname = crm_element_value(node_state, XML_ATTR_UNAME); attrs = find_xml_node( node_state, XML_TAG_TRANSIENT_NODEATTRS, FALSE); lrm_rsc = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE); lrm_rsc = find_xml_node(lrm_rsc, XML_LRM_TAG_RESOURCES, FALSE); crm_debug_3("Processing node %s", uname); this_node = pe_find_node_id(data_set->nodes, id); if(uname == NULL) { /* error */ continue; } else if(this_node == NULL) { crm_config_warn("Node %s in status section no longer exists", uname); continue; } /* Mark the node as provisionally clean * - at least we have seen it in the current cluster's lifetime */ this_node->details->unclean = FALSE; add_node_attrs(attrs, this_node, TRUE, data_set); if(crm_is_true(g_hash_table_lookup(this_node->details->attrs, "standby"))) { crm_info("Node %s is in standby-mode", this_node->details->uname); this_node->details->standby = TRUE; } crm_debug_3("determining node state"); determine_online_status(node_state, this_node, data_set); if(data_set->no_quorum_policy == no_quorum_suicide) { /* Everything else should flow from this automatically * At least until the PE becomes able to migrate off healthy resources */ crm_notice("Marking node %s STONITH: The cluster does not have quorum", this_node->details->uname); this_node->details->unclean = TRUE; } if(this_node->details->online || is_set(data_set->flags, pe_flag_stonith_enabled)) { /* offline nodes run no resources... * unless stonith is enabled in which case we need to * make sure rsc start events happen after the stonith */ crm_debug_3("Processing lrm resource entries"); unpack_lrm_resources(this_node, lrm_rsc, data_set); } ); return TRUE; } static gboolean determine_online_status_no_fencing(xmlNode * node_state, node_t *this_node) { gboolean online = FALSE; const char *join_state = crm_element_value(node_state, XML_CIB_ATTR_JOINSTATE); const char *crm_state = crm_element_value(node_state, XML_CIB_ATTR_CRMDSTATE); const char *ccm_state = crm_element_value(node_state, XML_CIB_ATTR_INCCM); const char *ha_state = crm_element_value(node_state, XML_CIB_ATTR_HASTATE); const char *exp_state = crm_element_value(node_state, XML_CIB_ATTR_EXPSTATE); if(ha_state == NULL) { ha_state = DEADSTATUS; } if(!crm_is_true(ccm_state) || safe_str_eq(ha_state, DEADSTATUS)){ crm_debug_2("Node is down: ha_state=%s, ccm_state=%s", crm_str(ha_state), crm_str(ccm_state)); } else if(!crm_is_true(ccm_state) || safe_str_eq(ha_state, DEADSTATUS)) { } else if(safe_str_eq(crm_state, ONLINESTATUS)) { if(safe_str_eq(join_state, CRMD_JOINSTATE_MEMBER)) { online = TRUE; } else { crm_debug("Node is not ready to run resources: %s", join_state); } } else if(this_node->details->expected_up == FALSE) { crm_debug_2("CRMd is down: ha_state=%s, ccm_state=%s", crm_str(ha_state), crm_str(ccm_state)); crm_debug_2("\tcrm_state=%s, join_state=%s, expected=%s", crm_str(crm_state), crm_str(join_state), crm_str(exp_state)); } else { /* mark it unclean */ this_node->details->unclean = TRUE; crm_warn("Node %s is partially & un-expectedly down", this_node->details->uname); crm_info("\tha_state=%s, ccm_state=%s," " crm_state=%s, join_state=%s, expected=%s", crm_str(ha_state), crm_str(ccm_state), crm_str(crm_state), crm_str(join_state), crm_str(exp_state)); } return online; } static gboolean determine_online_status_fencing(xmlNode * node_state, node_t *this_node) { gboolean online = FALSE; const char *join_state = crm_element_value(node_state, XML_CIB_ATTR_JOINSTATE); const char *crm_state = crm_element_value(node_state, XML_CIB_ATTR_CRMDSTATE); const char *ccm_state = crm_element_value(node_state, XML_CIB_ATTR_INCCM); const char *ha_state = crm_element_value(node_state, XML_CIB_ATTR_HASTATE); const char *exp_state = crm_element_value(node_state, XML_CIB_ATTR_EXPSTATE); const char *terminate = g_hash_table_lookup(this_node->details->attrs, "terminate"); if(ha_state == NULL) { ha_state = DEADSTATUS; } if(crm_is_true(terminate)) { /* TODO: Possibly remove this block */ this_node->details->expected_up = FALSE; } if(crm_is_true(ccm_state) && safe_str_eq(ha_state, ACTIVESTATUS) && safe_str_eq(crm_state, ONLINESTATUS)) { if(safe_str_eq(join_state, CRMD_JOINSTATE_MEMBER)) { online = TRUE; if(crm_is_true(terminate)) { crm_notice("Forcing node %s to be terminated", this_node->details->uname); this_node->details->unclean = TRUE; this_node->details->shutdown = TRUE; } } else if(join_state == exp_state /* == NULL */) { crm_info("Node %s is coming up", this_node->details->uname); crm_debug("\tha_state=%s, ccm_state=%s," " crm_state=%s, join_state=%s, expected=%s", crm_str(ha_state), crm_str(ccm_state), crm_str(crm_state), crm_str(join_state), crm_str(exp_state)); } else if(safe_str_eq(join_state, CRMD_JOINSTATE_PENDING)) { crm_info("Node %s is not ready to run resources", this_node->details->uname); this_node->details->standby = TRUE; this_node->details->pending = TRUE; online = TRUE; } else if(safe_str_eq(join_state, CRMD_JOINSTATE_NACK)) { crm_warn("Node %s is not part of the cluster", this_node->details->uname); this_node->details->standby = TRUE; this_node->details->pending = TRUE; online = TRUE; } else { crm_warn("Node %s (%s) is un-expectedly down", this_node->details->uname, this_node->details->id); crm_info("\tha_state=%s, ccm_state=%s," " crm_state=%s, join_state=%s, expected=%s", crm_str(ha_state), crm_str(ccm_state), crm_str(crm_state), crm_str(join_state), crm_str(exp_state)); this_node->details->unclean = TRUE; } } else if(crm_is_true(ccm_state) == FALSE && safe_str_eq(ha_state, DEADSTATUS) && safe_str_eq(crm_state, OFFLINESTATUS) && this_node->details->expected_up == FALSE) { crm_debug("Node %s is down: join_state=%s, expected=%s", this_node->details->uname, crm_str(join_state), crm_str(exp_state)); } else if(crm_is_true(terminate)) { crm_info("Node %s is %s after forced termination", this_node->details->uname, crm_is_true(ccm_state)?"coming up":"going down"); crm_debug("\tha_state=%s, ccm_state=%s," " crm_state=%s, join_state=%s, expected=%s", crm_str(ha_state), crm_str(ccm_state), crm_str(crm_state), crm_str(join_state), crm_str(exp_state)); if(crm_is_true(ccm_state) == FALSE) { this_node->details->standby = TRUE; this_node->details->pending = TRUE; online = TRUE; } } else if(this_node->details->expected_up) { /* mark it unclean */ this_node->details->unclean = TRUE; crm_warn("Node %s (%s) is un-expectedly down", this_node->details->uname, this_node->details->id); crm_info("\tha_state=%s, ccm_state=%s," " crm_state=%s, join_state=%s, expected=%s", crm_str(ha_state), crm_str(ccm_state), crm_str(crm_state), crm_str(join_state), crm_str(exp_state)); } else { crm_info("Node %s is coming up", this_node->details->uname); crm_debug("\tha_state=%s, ccm_state=%s," " crm_state=%s, join_state=%s, expected=%s", crm_str(ha_state), crm_str(ccm_state), crm_str(crm_state), crm_str(join_state), crm_str(exp_state)); } return online; } gboolean determine_online_status( xmlNode * node_state, node_t *this_node, pe_working_set_t *data_set) { gboolean online = FALSE; const char *exp_state = crm_element_value(node_state, XML_CIB_ATTR_EXPSTATE); const char *shutdown = g_hash_table_lookup(this_node->details->attrs, XML_CIB_ATTR_SHUTDOWN); if(this_node == NULL) { crm_config_err("No node to check"); return online; } this_node->details->shutdown = FALSE; this_node->details->expected_up = FALSE; if(shutdown != NULL && safe_str_neq("0", shutdown)) { this_node->details->shutdown = TRUE; } else if(safe_str_eq(exp_state, CRMD_JOINSTATE_MEMBER)) { this_node->details->expected_up = TRUE; } if(is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE) { online = determine_online_status_no_fencing( node_state, this_node); } else { online = determine_online_status_fencing( node_state, this_node); } if(online) { this_node->details->online = TRUE; } else { /* remove node from contention */ this_node->fixed = TRUE; this_node->weight = -INFINITY; } if(online && this_node->details->shutdown) { /* dont run resources here */ this_node->fixed = TRUE; this_node->weight = -INFINITY; } if(this_node->details->unclean) { pe_proc_warn("Node %s is unclean", this_node->details->uname); } else if(this_node->details->online) { crm_info("Node %s is %s", this_node->details->uname, this_node->details->shutdown?"shutting down": this_node->details->pending?"pending": this_node->details->standby?"standby":"online"); } else { crm_debug_2("Node %s is offline", this_node->details->uname); } return online; } #define set_char(x) last_rsc_id[lpc] = x; complete = TRUE; static char * increment_clone(char *last_rsc_id) { int lpc = 0; int len = 0; char *tmp = NULL; gboolean complete = FALSE; CRM_CHECK(last_rsc_id != NULL, return NULL); if(last_rsc_id != NULL) { len = strlen(last_rsc_id); } lpc = len-1; while(complete == FALSE && lpc > 0) { switch (last_rsc_id[lpc]) { case 0: lpc--; break; case '0': set_char('1'); break; case '1': set_char('2'); break; case '2': set_char('3'); break; case '3': set_char('4'); break; case '4': set_char('5'); break; case '5': set_char('6'); break; case '6': set_char('7'); break; case '7': set_char('8'); break; case '8': set_char('9'); break; case '9': last_rsc_id[lpc] = '0'; lpc--; break; case ':': tmp = last_rsc_id; crm_malloc0(last_rsc_id, len + 2); memcpy(last_rsc_id, tmp, len); last_rsc_id[++lpc] = '1'; last_rsc_id[len] = '0'; last_rsc_id[len+1] = 0; complete = TRUE; crm_free(tmp); break; default: crm_err("Unexpected char: %c (%d)", last_rsc_id[lpc], lpc); break; } } return last_rsc_id; } static resource_t * create_fake_resource(const char *rsc_id, xmlNode *rsc_entry, pe_working_set_t *data_set) { resource_t *rsc = NULL; xmlNode *xml_rsc = create_xml_node(NULL, XML_CIB_TAG_RESOURCE); copy_in_properties(xml_rsc, rsc_entry); crm_xml_add(xml_rsc, XML_ATTR_ID, rsc_id); crm_log_xml_info(xml_rsc, "Orphan resource"); common_unpack(xml_rsc, &rsc, NULL, data_set); set_bit(rsc->flags, pe_rsc_orphan); data_set->resources = g_list_append(data_set->resources, rsc); return rsc; } extern gboolean create_child_clone(resource_t *rsc, int sub_id, pe_working_set_t *data_set); static resource_t * unpack_find_resource( pe_working_set_t *data_set, node_t *node, const char *rsc_id, xmlNode *rsc_entry) { resource_t *rsc = NULL; resource_t *clone_parent = NULL; gboolean is_duped_clone = FALSE; char *alt_rsc_id = crm_strdup(rsc_id); while(rsc == NULL) { crm_debug_3("looking for: %s", alt_rsc_id); rsc = pe_find_resource(data_set->resources, alt_rsc_id); /* no match */ if(rsc == NULL) { crm_debug_2("%s not found: %d", alt_rsc_id, is_duped_clone); if(is_duped_clone == FALSE) { break; } /* create one */ #if 1 create_child_clone(clone_parent, -1, data_set); crm_debug("Looking again for %s", alt_rsc_id); rsc = pe_find_resource(data_set->resources, alt_rsc_id); CRM_CHECK(rsc != NULL, crm_err("%s stil not found", alt_rsc_id); continue); #else rsc = create_fake_resource(alt_rsc_id, rsc_entry, data_set); crm_info("Making sure orphan %s/%s of %s is stopped on %s", rsc_id, rsc->id, clone_parent->id, node->details->uname); resource_location(rsc, NULL, -INFINITY, "__orphan_clone_dont_run__", data_set); break; #endif /* not running anywhere else */ } else if(rsc->running_on == NULL) { crm_debug_3("not active yet"); break; /* always unique */ } else if(is_set(rsc->flags, pe_rsc_unique)) { crm_debug_3("unique"); break; /* running somewhere already but we dont care * find another clone instead */ } else { crm_debug_3("find another one"); clone_parent = uber_parent(rsc); rsc = NULL; is_duped_clone = TRUE; alt_rsc_id = increment_clone(alt_rsc_id); } } crm_free(alt_rsc_id); if(rsc != NULL) { crm_free(rsc->clone_name); rsc->clone_name = NULL; if(is_duped_clone) { crm_info("Internally renamed %s on %s to %s", rsc_id, node->details->uname, rsc->id); rsc->clone_name = crm_strdup(rsc_id); } } return rsc; } static resource_t * process_orphan_resource(xmlNode *rsc_entry, node_t *node, pe_working_set_t *data_set) { resource_t *rsc = NULL; const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID); crm_log_xml_info(rsc_entry, "Orphan resource"); crm_config_warn("Nothing known about resource %s running on %s", rsc_id, node->details->uname); rsc = create_fake_resource(rsc_id, rsc_entry, data_set); if(is_set(data_set->flags, pe_flag_stop_rsc_orphans) == FALSE) { clear_bit(rsc->flags, pe_rsc_managed); } else { crm_info("Making sure orphan %s is stopped", rsc_id); print_resource(LOG_DEBUG_3, "Added orphan", rsc, FALSE); CRM_CHECK(rsc != NULL, return NULL); resource_location(rsc, NULL, -INFINITY, "__orphan_dont_run__", data_set); } return rsc; } static void process_rsc_state(resource_t *rsc, node_t *node, enum action_fail_response on_fail, xmlNode *migrate_op, pe_working_set_t *data_set) { if(on_fail == action_migrate_failure) { node_t *from = NULL; const char *uuid = NULL; uuid = crm_element_value(migrate_op, CRMD_ACTION_MIGRATED); from = pe_find_node_id(data_set->nodes, uuid); process_rsc_state(rsc, from, action_fail_recover,NULL,data_set); on_fail = action_fail_recover; } crm_debug_2("Resource %s is %s on %s: on_fail=%s", rsc->id, role2text(rsc->role), node->details->uname, fail2text(on_fail)); /* process current state */ if(rsc->role != RSC_ROLE_UNKNOWN) { rsc->known_on = g_list_append(rsc->known_on, node); } if(node->details->unclean) { /* No extra processing needed * Also allows resources to be started again after a node is shot */ on_fail = action_fail_ignore; } switch(on_fail) { case action_fail_ignore: /* nothing to do */ break; case action_fail_fence: /* treat it as if it is still running * but also mark the node as unclean */ node->details->unclean = TRUE; break; case action_fail_standby: node->details->standby = TRUE; + node->details->standby_onfail = TRUE; break; case action_fail_block: /* is_managed == FALSE will prevent any * actions being sent for the resource */ clear_bit(rsc->flags, pe_rsc_managed); break; case action_fail_migrate: /* make sure it comes up somewhere else * or not at all */ resource_location(rsc, node, -INFINITY, "__action_migration_auto__",data_set); break; case action_fail_stop: rsc->next_role = RSC_ROLE_STOPPED; break; case action_fail_recover: if(rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) { set_bit(rsc->flags, pe_rsc_failed); stop_action(rsc, node, FALSE); } break; case action_migrate_failure: /* anything extra? */ break; } if(rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) { native_add_running(rsc, node, data_set); if(on_fail != action_fail_ignore) { set_bit(rsc->flags, pe_rsc_failed); } } else if(rsc->clone_name) { crm_debug_2("Resetting clone_name %s for %s (stopped)", rsc->clone_name, rsc->id); crm_free(rsc->clone_name); rsc->clone_name = NULL; } else { char *key = stop_key(rsc); GListPtr possible_matches = find_actions(rsc->actions, key, node); slist_iter(stop, action_t, possible_matches, lpc, stop->optional = TRUE; ); crm_free(key); } } /* create active recurring operations as optional */ static void process_recurring(node_t *node, resource_t *rsc, int start_index, int stop_index, GListPtr sorted_op_list, pe_working_set_t *data_set) { const char *task = NULL; const char *status = NULL; crm_debug_3("%s: Start index %d, stop index = %d", rsc->id, start_index, stop_index); slist_iter(rsc_op, xmlNode, sorted_op_list, lpc, int interval = 0; char *key = NULL; const char *id = ID(rsc_op); const char *interval_s = NULL; if(node->details->online == FALSE) { crm_debug_4("Skipping %s/%s: node is offline", rsc->id, node->details->uname); break; } else if(start_index < stop_index) { crm_debug_4("Skipping %s/%s: not active", rsc->id, node->details->uname); break; } else if(lpc <= start_index) { crm_debug_4("Skipping %s/%s: old", id, node->details->uname); continue; } interval_s = crm_element_value(rsc_op,XML_LRM_ATTR_INTERVAL); interval = crm_parse_int(interval_s, "0"); if(interval == 0) { crm_debug_4("Skipping %s/%s: non-recurring", id, node->details->uname); continue; } status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS); if(safe_str_eq(status, "-1")) { crm_debug_4("Skipping %s/%s: status", id, node->details->uname); continue; } task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); /* create the action */ key = generate_op_key(rsc->id, task, interval); crm_debug_3("Creating %s/%s", key, node->details->uname); custom_action(rsc, key, task, node, TRUE, TRUE, data_set); ); } void calculate_active_ops(GListPtr sorted_op_list, int *start_index, int *stop_index) { const char *task = NULL; const char *status = NULL; *stop_index = -1; *start_index = -1; slist_iter( rsc_op, xmlNode, sorted_op_list, lpc, task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS); if(safe_str_eq(task, CRMD_ACTION_STOP) && safe_str_eq(status, "0")) { *stop_index = lpc; } else if(safe_str_eq(task, CRMD_ACTION_START)) { *start_index = lpc; } else if(*start_index <= *stop_index && safe_str_eq(task, CRMD_ACTION_STATUS)) { const char *rc = crm_element_value(rsc_op, XML_LRM_ATTR_RC); if(safe_str_eq(rc, "0") || safe_str_eq(rc, "8")) { *start_index = lpc; } } ); } static void unpack_lrm_rsc_state( node_t *node, xmlNode * rsc_entry, pe_working_set_t *data_set) { int stop_index = -1; int start_index = -1; int max_call_id = -1; const char *task = NULL; const char *value = NULL; const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID); resource_t *rsc = NULL; GListPtr op_list = NULL; GListPtr sorted_op_list = NULL; xmlNode *migrate_op = NULL; enum action_fail_response on_fail = FALSE; enum rsc_role_e saved_role = RSC_ROLE_UNKNOWN; crm_debug_3("[%s] Processing %s on %s", crm_element_name(rsc_entry), rsc_id, node->details->uname); /* extract operations */ op_list = NULL; sorted_op_list = NULL; xml_child_iter_filter( rsc_entry, rsc_op, XML_LRM_TAG_RSC_OP, op_list = g_list_append(op_list, rsc_op); ); if(op_list == NULL) { /* if there are no operations, there is nothing to do */ return; } /* find the resource */ rsc = unpack_find_resource(data_set, node, rsc_id, rsc_entry); if(rsc == NULL) { rsc = process_orphan_resource(rsc_entry, node, data_set); } CRM_ASSERT(rsc != NULL); /* process operations */ max_call_id = -1; saved_role = rsc->role; on_fail = action_fail_ignore; rsc->role = RSC_ROLE_UNKNOWN; sorted_op_list = g_list_sort(op_list, sort_op_by_callid); slist_iter( rsc_op, xmlNode, sorted_op_list, lpc, task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); if(safe_str_eq(task, CRMD_ACTION_MIGRATED)) { migrate_op = rsc_op; } unpack_rsc_op(rsc, node, rsc_op, &max_call_id, &on_fail, data_set); ); /* create active recurring operations as optional */ calculate_active_ops(sorted_op_list, &start_index, &stop_index); process_recurring(node, rsc, start_index, stop_index, sorted_op_list, data_set); /* no need to free the contents */ g_list_free(sorted_op_list); process_rsc_state(rsc, node, on_fail, migrate_op, data_set); value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET_ROLE); if(value != NULL && safe_str_neq("default", value)) { enum rsc_role_e req_role = text2role(value); if(req_role != RSC_ROLE_UNKNOWN && req_role != rsc->next_role){ if(rsc->next_role != RSC_ROLE_UNKNOWN) { crm_debug("%s: Overwriting calculated next role %s" " with requested next role %s", rsc->id, role2text(rsc->next_role), role2text(req_role)); } rsc->next_role = req_role; } } if(saved_role > rsc->role) { rsc->role = saved_role; } } gboolean unpack_lrm_resources(node_t *node, xmlNode * lrm_rsc_list, pe_working_set_t *data_set) { CRM_CHECK(node != NULL, return FALSE); crm_debug_3("Unpacking resources on %s", node->details->uname); xml_child_iter_filter( lrm_rsc_list, rsc_entry, XML_LRM_TAG_RESOURCE, unpack_lrm_rsc_state(node, rsc_entry, data_set); ); return TRUE; } gboolean unpack_rsc_op(resource_t *rsc, node_t *node, xmlNode *xml_op, int *max_call_id, enum action_fail_response *on_fail, pe_working_set_t *data_set) { const char *id = NULL; const char *key = NULL; const char *task = NULL; const char *magic = NULL; const char *task_id = NULL; const char *actual_rc = NULL; /* const char *target_rc = NULL; */ const char *task_status = NULL; const char *interval_s = NULL; const char *op_digest = NULL; const char *op_version = NULL; int interval = 0; int task_id_i = -1; int task_status_i = -2; int actual_rc_i = 0; int target_rc = -1; action_t *action = NULL; node_t *effective_node = NULL; resource_t *failed = NULL; gboolean expired = FALSE; gboolean is_probe = FALSE; CRM_CHECK(rsc != NULL, return FALSE); CRM_CHECK(node != NULL, return FALSE); CRM_CHECK(xml_op != NULL, return FALSE); id = ID(xml_op); task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); task_id = crm_element_value(xml_op, XML_LRM_ATTR_CALLID); task_status = crm_element_value(xml_op, XML_LRM_ATTR_OPSTATUS); op_digest = crm_element_value(xml_op, XML_LRM_ATTR_OP_DIGEST); op_version = crm_element_value(xml_op, XML_ATTR_CRM_VERSION); magic = crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC); key = crm_element_value(xml_op, XML_ATTR_TRANSITION_KEY); CRM_CHECK(id != NULL, return FALSE); CRM_CHECK(task != NULL, return FALSE); CRM_CHECK(task_status != NULL, return FALSE); task_status_i = crm_parse_int(task_status, NULL); CRM_CHECK(task_status_i <= LRM_OP_ERROR, return FALSE); CRM_CHECK(task_status_i >= LRM_OP_PENDING, return FALSE); if(safe_str_eq(task, CRMD_ACTION_NOTIFY)) { /* safe to ignore these */ return TRUE; } if(rsc->failure_timeout > 0) { int last_run = 0; if(crm_element_value_int(xml_op, "last-run", &last_run) == 0) { /* int last_change = crm_element_value_int(xml_op, "last_rc_change"); */ time_t now = get_timet_now(data_set); if(now > (last_run + rsc->failure_timeout)) { expired = TRUE; } } } crm_debug_2("Unpacking task %s/%s (call_id=%s, status=%s) on %s (role=%s)", id, task, task_id, task_status, node->details->uname, role2text(rsc->role)); interval_s = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL); interval = crm_parse_int(interval_s, "0"); if(interval == 0 && safe_str_eq(task, CRMD_ACTION_STATUS)) { is_probe = TRUE; } if(task_status_i != LRM_OP_PENDING) { task_id_i = crm_parse_int(task_id, "-1"); CRM_CHECK(task_id != NULL, return FALSE); CRM_CHECK(task_id_i >= 0, return FALSE); CRM_CHECK(task_id_i > *max_call_id, return FALSE); } if(*max_call_id < task_id_i) { *max_call_id = task_id_i; } if(node->details->unclean) { crm_debug_2("Node %s (where %s is running) is unclean." " Further action depends on the value of the stop's on-fail attribue", node->details->uname, rsc->id); } actual_rc = crm_element_value(xml_op, XML_LRM_ATTR_RC); CRM_CHECK(actual_rc != NULL, return FALSE); actual_rc_i = crm_parse_int(actual_rc, NULL); if(key) { int dummy = 0; char *dummy_string = NULL; decode_transition_key(key, &dummy_string, &dummy, &dummy, &target_rc); crm_free(dummy_string); } if(task_status_i == LRM_OP_DONE && target_rc >= 0) { if(target_rc == actual_rc_i) { task_status_i = LRM_OP_DONE; } else { task_status_i = LRM_OP_ERROR; crm_info("%s on %s returned %d (%s) instead of the expected value: %d (%s)", id, node->details->uname, actual_rc_i, execra_code2string(actual_rc_i), target_rc, execra_code2string(target_rc)); } } else if(task_status_i == LRM_OP_ERROR) { /* let us decide that */ task_status_i = LRM_OP_DONE; } if(task_status_i == LRM_OP_NOTSUPPORTED) { actual_rc_i = EXECRA_UNIMPLEMENT_FEATURE; } if(expired && actual_rc_i != EXECRA_NOT_RUNNING && actual_rc_i != EXECRA_RUNNING_MASTER && actual_rc_i != EXECRA_OK) { crm_notice("Ignoring expired failure %s (rc=%d, magic=%s) on %s", id, actual_rc_i, magic, node->details->uname); goto done; } /* we could clean this up significantly except for old LRMs and CRMs that * didnt include target_rc and liked to remap status */ switch(actual_rc_i) { case EXECRA_NOT_RUNNING: if(is_probe || target_rc == actual_rc_i) { task_status_i = LRM_OP_DONE; rsc->role = RSC_ROLE_STOPPED; /* clear any previous failure actions */ *on_fail = action_fail_ignore; rsc->next_role = RSC_ROLE_UNKNOWN; } else if(safe_str_neq(task, CRMD_ACTION_STOP)) { task_status_i = LRM_OP_ERROR; } break; case EXECRA_RUNNING_MASTER: if(is_probe) { task_status_i = LRM_OP_DONE; crm_warn("Operation %s found resource %s active in master mode on %s", id, rsc->id, node->details->uname); } else if(target_rc == actual_rc_i) { /* nothing to do */ } else if(target_rc >= 0) { task_status_i = LRM_OP_ERROR; /* legacy code for pre-0.6.5 operations */ } else if(safe_str_neq(task, CRMD_ACTION_STATUS) || rsc->role != RSC_ROLE_MASTER) { task_status_i = LRM_OP_ERROR; if(rsc->role != RSC_ROLE_MASTER) { crm_err("%s reported %s in master mode on %s", id, rsc->id, node->details->uname); } } rsc->role = RSC_ROLE_MASTER; break; case EXECRA_FAILED_MASTER: rsc->role = RSC_ROLE_MASTER; task_status_i = LRM_OP_ERROR; break; case EXECRA_UNIMPLEMENT_FEATURE: if(interval > 0) { task_status_i = LRM_OP_NOTSUPPORTED; break; } /* else: fall through */ case EXECRA_INSUFFICIENT_PRIV: case EXECRA_NOT_INSTALLED: case EXECRA_INVALID_PARAM: effective_node = node; /* fall through */ case EXECRA_NOT_CONFIGURED: failed = rsc; if(is_not_set(rsc->flags, pe_rsc_unique)) { failed = uber_parent(failed); } crm_err("Hard error - %s failed with rc=%d: Preventing %s from re-starting %s %s", id, actual_rc_i, failed->id, effective_node?"on":"anywhere", effective_node?effective_node->details->uname:"in the cluster"); resource_location(failed, effective_node, -INFINITY, "hard-error", data_set); if(is_probe) { /* treat these like stops */ task = CRMD_ACTION_STOP; task_status_i = LRM_OP_DONE; } break; case EXECRA_OK: if(is_probe && target_rc == 7) { task_status_i = LRM_OP_DONE; crm_warn("Operation %s found resource %s active on %s", id, rsc->id, node->details->uname); /* legacy code for pre-0.6.5 operations */ } else if(target_rc < 0 && interval > 0 && rsc->role == RSC_ROLE_MASTER) { /* catch status ops that return 0 instead of 8 while they * are supposed to be in master mode */ task_status_i = LRM_OP_ERROR; } break; default: if(task_status_i == LRM_OP_DONE) { crm_info("Remapping %s (rc=%d) on %s to an ERROR", id, actual_rc_i, node->details->uname); task_status_i = LRM_OP_ERROR; } } if(task_status_i == LRM_OP_ERROR || task_status_i == LRM_OP_TIMEOUT || task_status_i == LRM_OP_NOTSUPPORTED) { action = custom_action(rsc, crm_strdup(id), task, NULL, TRUE, FALSE, data_set); if(expired) { crm_notice("Ignoring expired failure (calculated) %s (rc=%d, magic=%s) on %s", id, actual_rc_i, magic, node->details->uname); goto done; } else if(action->on_fail == action_fail_ignore) { crm_warn("Remapping %s (rc=%d) on %s to DONE: ignore", id, actual_rc_i, node->details->uname); task_status_i = LRM_OP_DONE; } } switch(task_status_i) { case LRM_OP_PENDING: if(safe_str_eq(task, CRMD_ACTION_START)) { set_bit(rsc->flags, pe_rsc_start_pending); rsc->role = RSC_ROLE_STARTED; } else if(safe_str_eq(task, CRMD_ACTION_PROMOTE)) { rsc->role = RSC_ROLE_MASTER; } break; case LRM_OP_DONE: crm_debug_3("%s/%s completed on %s", rsc->id, task, node->details->uname); if(actual_rc_i == EXECRA_NOT_RUNNING) { /* nothing to do */ } else if(safe_str_eq(task, CRMD_ACTION_STOP)) { rsc->role = RSC_ROLE_STOPPED; /* clear any previous failure actions */ switch(*on_fail) { case action_fail_block: case action_fail_stop: case action_fail_fence: case action_fail_migrate: case action_fail_standby: crm_debug_2("%s.%s is not cleared by a completed stop", rsc->id, fail2text(*on_fail)); break; case action_fail_ignore: case action_fail_recover: case action_migrate_failure: *on_fail = action_fail_ignore; rsc->next_role = RSC_ROLE_UNKNOWN; } } else if(safe_str_eq(task, CRMD_ACTION_PROMOTE)) { rsc->role = RSC_ROLE_MASTER; } else if(safe_str_eq(task, CRMD_ACTION_DEMOTE)) { rsc->role = RSC_ROLE_SLAVE; } else if(rsc->role < RSC_ROLE_STARTED) { crm_debug_3("%s active on %s", rsc->id, node->details->uname); rsc->role = RSC_ROLE_STARTED; } break; case LRM_OP_ERROR: case LRM_OP_TIMEOUT: case LRM_OP_NOTSUPPORTED: crm_warn("Processing failed op %s on %s: %s", id, node->details->uname, execra_code2string(actual_rc_i)); crm_xml_add(xml_op, XML_ATTR_UNAME, node->details->uname); add_node_copy(data_set->failed, xml_op); if(*on_fail < action->on_fail) { *on_fail = action->on_fail; } if(safe_str_eq(task, CRMD_ACTION_STOP)) { resource_location( rsc, node, -INFINITY, "__stop_fail__", data_set); } else if(safe_str_eq(task, CRMD_ACTION_PROMOTE)) { rsc->role = RSC_ROLE_MASTER; } else if(safe_str_eq(task, CRMD_ACTION_DEMOTE)) { /* * staying in role=master ends up putting the PE/TE into a loop * setting role=slave is not dangerous because no master will be * promoted until the failed resource has been fully stopped */ crm_warn("Forcing %s to stop after a failed demote action", rsc->id); rsc->next_role = RSC_ROLE_STOPPED; rsc->role = RSC_ROLE_SLAVE; } else if((is_set(data_set->flags, pe_flag_start_failure_fatal) || compare_version("2.0", op_version) > 0) && safe_str_eq(task, CRMD_ACTION_START)) { crm_warn("Compatibility handling for failed op %s on %s", id, node->details->uname); resource_location( rsc, node, -INFINITY, "__legacy_start__", data_set); } if(rsc->role < RSC_ROLE_STARTED) { rsc->role = RSC_ROLE_STARTED; } crm_debug_2("Resource %s: role=%s, unclean=%s, on_fail=%s, fail_role=%s", rsc->id, role2text(rsc->role), node->details->unclean?"true":"false", fail2text(action->on_fail), role2text(action->fail_role)); if(action->fail_role != RSC_ROLE_STARTED && rsc->next_role < action->fail_role) { rsc->next_role = action->fail_role; } if(action->fail_role == RSC_ROLE_STOPPED) { crm_err("Making sure %s doesn't come up again", rsc->id); /* make sure it doesnt come up again */ pe_free_shallow_adv(rsc->allowed_nodes, TRUE); rsc->allowed_nodes = node_list_dup( data_set->nodes, FALSE, FALSE); slist_iter( node, node_t, rsc->allowed_nodes, lpc, node->weight = -INFINITY; ); } pe_free_action(action); action = NULL; break; case LRM_OP_CANCELLED: /* do nothing?? */ pe_err("Dont know what to do for cancelled ops yet"); break; } done: crm_debug_3("Resource %s after %s: role=%s", rsc->id, task, role2text(rsc->role)); pe_free_action(action); return TRUE; } gboolean add_node_attrs(xmlNode *xml_obj, node_t *node, gboolean overwrite, pe_working_set_t *data_set) { g_hash_table_insert(node->details->attrs, crm_strdup("#"XML_ATTR_UNAME), crm_strdup(node->details->uname)); g_hash_table_insert(node->details->attrs, crm_strdup("#"XML_ATTR_ID), crm_strdup(node->details->id)); if(safe_str_eq(node->details->id, data_set->dc_uuid)) { data_set->dc_node = node; node->details->is_dc = TRUE; g_hash_table_insert(node->details->attrs, crm_strdup("#"XML_ATTR_DC), crm_strdup(XML_BOOLEAN_TRUE)); } else { g_hash_table_insert(node->details->attrs, crm_strdup("#"XML_ATTR_DC), crm_strdup(XML_BOOLEAN_FALSE)); } unpack_instance_attributes( xml_obj, XML_TAG_ATTR_SETS, NULL, node->details->attrs, NULL, overwrite, data_set->now); return TRUE; } static GListPtr extract_operations(const char *node, const char *rsc, xmlNode *rsc_entry, gboolean active_filter) { int stop_index = -1; int start_index = -1; GListPtr op_list = NULL; GListPtr sorted_op_list = NULL; /* extract operations */ op_list = NULL; sorted_op_list = NULL; xml_child_iter_filter( rsc_entry, rsc_op, XML_LRM_TAG_RSC_OP, crm_xml_add(rsc_op, "resource", rsc); crm_xml_add(rsc_op, XML_ATTR_UNAME, node); op_list = g_list_append(op_list, rsc_op); ); if(op_list == NULL) { /* if there are no operations, there is nothing to do */ return NULL; } sorted_op_list = g_list_sort(op_list, sort_op_by_callid); /* create active recurring operations as optional */ if(active_filter == FALSE) { return sorted_op_list; } op_list = NULL; calculate_active_ops(sorted_op_list, &start_index, &stop_index); slist_iter(rsc_op, xmlNode, sorted_op_list, lpc, if(start_index < stop_index) { crm_debug_4("Skipping %s: not active", ID(rsc_entry)); break; } else if(lpc < start_index) { crm_debug_4("Skipping %s: old", ID(rsc_op)); continue; } op_list = g_list_append(op_list, rsc_op); ); g_list_free(sorted_op_list); return op_list; } GListPtr find_operations( const char *rsc, const char *node, gboolean active_filter, pe_working_set_t *data_set) { GListPtr output = NULL; GListPtr intermediate = NULL; xmlNode *tmp = NULL; xmlNode *status = find_xml_node(data_set->input, XML_CIB_TAG_STATUS, TRUE); const char *uname = NULL; node_t *this_node = NULL; xml_child_iter_filter( status, node_state, XML_CIB_TAG_STATE, uname = crm_element_value(node_state, XML_ATTR_UNAME); if(node != NULL && safe_str_neq(uname, node)) { continue; } this_node = pe_find_node(data_set->nodes, uname); CRM_CHECK(this_node != NULL, continue); determine_online_status(node_state, this_node, data_set); if(this_node->details->online || is_set(data_set->flags, pe_flag_stonith_enabled)) { /* offline nodes run no resources... * unless stonith is enabled in which case we need to * make sure rsc start events happen after the stonith */ tmp = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE); tmp = find_xml_node(tmp, XML_LRM_TAG_RESOURCES, FALSE); xml_child_iter_filter( tmp, lrm_rsc, XML_LRM_TAG_RESOURCE, const char *rsc_id = crm_element_value(lrm_rsc, XML_ATTR_ID); if(rsc != NULL && safe_str_neq(rsc_id, rsc)) { continue; } intermediate = extract_operations(uname, rsc_id, lrm_rsc, active_filter); output = g_list_concat(output, intermediate); ); } ); return output; } diff --git a/tools/crm_mon.c b/tools/crm_mon.c index 9900e7bb12..4d16e15304 100644 --- a/tools/crm_mon.c +++ b/tools/crm_mon.c @@ -1,1147 +1,1152 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include <../lib/pengine/unpack.h> #ifdef HAVE_GETOPT_H # include #endif /* GMainLoop *mainloop = NULL; */ #define OPTARGS "V?i:nrh:cdp:s1wX:oftN" void usage(const char *cmd, int exit_status); void blank_screen(void); int print_status(xmlNode *cib); void print_warn(const char *descr); int print_simple_status(xmlNode *cib); /* #define printw_at(line, fmt...) move(line, 0); printw(fmt); line++ */ void wait_for_refresh(int offset, const char *prefix, int msec); int print_html_status(xmlNode *cib, const char *filename, gboolean web_cgi); void make_daemon(gboolean daemonize, const char *pidfile); gboolean mon_timer_popped(gpointer data); void mon_update(xmlNode*, int, int, xmlNode*,void*); void clean_up(int rc); char *xml_file = NULL; char *as_html_file = NULL; char *pid_file = NULL; gboolean as_console = FALSE; gboolean simple_status = FALSE; gboolean group_by_node = FALSE; gboolean inactive_resources = FALSE; gboolean web_cgi = FALSE; int interval = 15000; gboolean daemonize = FALSE; GMainLoop* mainloop = NULL; guint timer_id = 0; cib_t *cib_conn = NULL; int failed_connections = 0; gboolean one_shot = FALSE; gboolean has_warnings = FALSE; gboolean print_failcount = FALSE; gboolean print_operations = FALSE; gboolean print_timing = FALSE; /* * Non-mainloop signal handler. */ static void mon_shutdown_wrapper(int nsig) { clean_up(LSB_EXIT_OK); } /* * Mainloop signal handler. */ static gboolean mon_shutdown(int nsig, gpointer unused) { clean_up(-1); if (mainloop && g_main_is_running(mainloop)) { g_main_quit(mainloop); } else { exit(LSB_EXIT_OK); } return FALSE; } #if CURSES_ENABLED # define print_as(fmt...) if(as_console) { \ printw(fmt); \ clrtoeol(); \ refresh(); \ } else { \ fprintf(stdout, fmt); \ } #else # define print_as(fmt...) fprintf(stdout, fmt); #endif int main(int argc, char **argv) { int argerr = 0; int flag; gboolean disable_ncurses = FALSE; #ifdef HAVE_GETOPT_H int option_index = 0; static struct option long_options[] = { /* Top-level Options */ {"verbose", 0, 0, 'V'}, {"help", 0, 0, '?'}, {"interval", 1, 0, 'i'}, {"group-by-node", 0, 0, 'n'}, {"inactive", 0, 0, 'r'}, {"failcounts", 0, 0, 'f'}, {"operations", 0, 0, 'o'}, {"timing-details", 0, 0, 't'}, {"as-html", 1, 0, 'h'}, {"web-cgi", 0, 0, 'w'}, {"simple-status", 0, 0, 's'}, {"as-console", 0, 0, 'c'}, {"one-shot", 0, 0, '1'}, {"daemonize", 0, 0, 'd'}, {"disable-ncurses",0, 0, 'N'}, {"pid-file", 0, 0, 'p'}, {"xml-file", 1, 0, 'x'}, {0, 0, 0, 0} }; #endif pid_file = crm_strdup("/tmp/ClusterMon.pid"); crm_log_init(basename(argv[0]), LOG_ERR-1, FALSE, FALSE, 0, NULL); if (strcmp(crm_system_name, "crm_mon.cgi")==0) { web_cgi = TRUE; one_shot = TRUE; } while (1) { #ifdef HAVE_GETOPT_H flag = getopt_long(argc, argv, OPTARGS, long_options, &option_index); #else flag = getopt(argc, argv, OPTARGS); #endif if (flag == -1) break; switch(flag) { case 'V': cl_log_enable_stderr(TRUE); alter_debug(DEBUG_INC); break; case 'i': interval = crm_get_msec(optarg); break; case 'n': group_by_node = TRUE; break; case 'r': inactive_resources = TRUE; break; case 'd': daemonize = TRUE; break; case 't': print_timing = TRUE; print_operations = TRUE; break; case 'o': print_operations = TRUE; break; case 'f': print_failcount = TRUE; break; case 'p': crm_free(pid_file); pid_file = crm_strdup(optarg); break; case 'X': xml_file = crm_strdup(optarg); one_shot = TRUE; break; case 'h': as_html_file = crm_strdup(optarg); break; case 'w': web_cgi = TRUE; one_shot = TRUE; break; case 'c': #if CURSES_ENABLED as_console = TRUE; #else printf("You need to have curses available at compile time to enable console mode\n"); argerr++; #endif break; case 's': simple_status = TRUE; one_shot = TRUE; break; case '1': one_shot = TRUE; break; case 'N': disable_ncurses = TRUE; break; case '?': usage(crm_system_name, LSB_EXIT_OK); break; default: printf("Argument code 0%o (%c) is not (?yet?) supported\n", flag, flag); ++argerr; break; } } if (optind < argc) { printf("non-option ARGV-elements: "); while (optind < argc) printf("%s ", argv[optind++]); printf("\n"); } if (argerr) { usage(crm_system_name, LSB_EXIT_GENERIC); } /* Set signal callback function. */ signal(SIGTERM, mon_shutdown_wrapper); signal(SIGINT, mon_shutdown_wrapper); if(as_html_file == NULL && !web_cgi && !simple_status) { #if CURSES_ENABLED as_console = TRUE; #else printf("Defaulting to one-shot mode\n"); printf("You need to have curses available at compile time to enable console mode\n"); one_shot = TRUE; #endif } if(daemonize) { as_console = FALSE; } if(one_shot) { daemonize = FALSE; as_console = FALSE; } if(daemonize && as_html_file == NULL) { usage(crm_system_name, LSB_EXIT_GENERIC); } make_daemon(daemonize, pid_file); if(disable_ncurses) { as_console = FALSE; } #if CURSES_ENABLED if(as_console) { initscr(); cbreak(); noecho(); } #endif crm_info("Starting %s", crm_system_name); mainloop = g_main_new(FALSE); if(one_shot == FALSE) { timer_id = Gmain_timeout_add( interval, mon_timer_popped, NULL); } else if(xml_file != NULL) { xmlNode *cib_object = filename2xml(xml_file); one_shot = TRUE; if(cli_config_update(&cib_object, NULL) == FALSE) { return FALSE; } mon_update(NULL, 0, cib_ok, cib_object, NULL); } mon_timer_popped(NULL); G_main_add_SignalHandler( G_PRIORITY_HIGH, SIGTERM, mon_shutdown, NULL, NULL); G_main_add_SignalHandler( G_PRIORITY_HIGH, SIGINT, mon_shutdown, NULL, NULL); g_main_run(mainloop); g_main_destroy(mainloop); return_to_orig_privs(); crm_info("Exiting %s", crm_system_name); #if CURSES_ENABLED if(as_console) { echo(); nocbreak(); endwin(); } #endif return 0; } gboolean mon_timer_popped(gpointer data) { int rc = cib_ok; int options = cib_scope_local|cib_sync_call; static gboolean need_pass = TRUE; xmlNode *output = NULL; if(timer_id > 0) { Gmain_timeout_remove(timer_id); } if(cib_conn == NULL) { crm_debug_4("Creating CIB connection"); cib_conn = cib_new(); CRM_CHECK(cib_conn != NULL, return FALSE); } if(as_console) { print_as("Updating...\n"); } else { crm_notice("Updating..."); } if(cib_conn->state != cib_connected_query && cib_conn->state != cib_connected_command) { crm_debug_4("Connecting to the CIB"); if(as_console) { print_as("Signing on...\n"); } if(need_pass && cib_conn->variant == cib_remote) { need_pass = FALSE; print_as("Password:"); } if(cib_ok == cib_conn->cmds->signon( cib_conn, crm_system_name, cib_query)) { failed_connections = 0; } else if (simple_status || one_shot) { fprintf(stdout, "Critical: Unable to connect to the CIB\n"); clean_up(LSB_EXIT_GENERIC); } else { failed_connections++; CRM_DEV_ASSERT(cib_conn->cmds->signoff(cib_conn) == cib_ok); wait_for_refresh(0, "Not connected: ", 2*interval); return FALSE; } } if(as_console) { blank_screen(); print_as("Querying...\n"); } output = NULL; rc = cib_conn->cmds->query(cib_conn, NULL, &output, options); if(rc != cib_ok) { CRM_DEV_ASSERT(cib_conn->cmds->signoff(cib_conn) == cib_ok); print_as("Query failed: %s", cib_error2string(rc)); sleep(1); wait_for_refresh(2, NULL, 2*interval); return FALSE; } else if(cli_config_update(&output, NULL) == FALSE) { CRM_DEV_ASSERT(cib_conn->cmds->signoff(cib_conn) == cib_ok); print_as("Upgrade failed: %s", cib_error2string(cib_dtd_validation)); sleep(2); clean_up(LSB_EXIT_GENERIC); return FALSE; } mon_update(NULL, 0, rc, output, NULL); free_xml(output); /* add_cib_op_callback(rc, FALSE, NULL, mon_update); */ return FALSE; } void mon_update(xmlNode *msg, int call_id, int rc, xmlNode *output, void*user_data) { const char *prefix = NULL; if(rc == cib_ok) { xmlNode *cib = NULL; cib = output; CRM_DEV_ASSERT(safe_str_eq(crm_element_name(cib), XML_TAG_CIB)); if(as_html_file || web_cgi) { if (print_html_status(cib, as_html_file, web_cgi) != 0) { fprintf(stderr, "Critical: Unable to output html file\n"); clean_up(LSB_EXIT_GENERIC); } } else if (simple_status) { print_simple_status(cib); if (has_warnings) { clean_up(LSB_EXIT_GENERIC); } } else { print_status(cib); } if(one_shot) { clean_up(LSB_EXIT_OK); } } else if(simple_status) { fprintf(stderr, "Critical: query failed: %s", cib_error2string(rc)); clean_up(LSB_EXIT_GENERIC); } else if(one_shot) { fprintf(stderr, "Query failed: %s", cib_error2string(rc)); clean_up(LSB_EXIT_OK); } else { CRM_DEV_ASSERT(cib_conn->cmds->signoff(cib_conn) == cib_ok); print_as("Query failed: %s", cib_error2string(rc)); prefix = "Query failed! "; } wait_for_refresh(prefix?2:0, NULL, interval); } void wait_for_refresh(int offset, const char *prefix, int msec) { int lpc = msec / 1000; struct timespec sleept = {1 , 0}; if(as_console == FALSE) { timer_id = Gmain_timeout_add(msec, mon_timer_popped, NULL); return; } crm_notice("%sRefresh in %ds...", prefix?prefix:"", lpc); while(lpc > 0) { #if CURSES_ENABLED move(offset, 0); /* printw("%sRefresh in \033[01;32m%ds\033[00m...", prefix?prefix:"", lpc); */ printw("%sRefresh in %ds...\n", prefix?prefix:"", lpc); clrtoeol(); refresh(); #endif lpc--; if(lpc == 0) { timer_id = Gmain_timeout_add( 1000, mon_timer_popped, NULL); } else { if (nanosleep(&sleept, NULL) != 0) { return; } } } } #define mon_warn(fmt...) do { \ if (!has_warnings) { \ print_as("Warning:"); \ } else { \ print_as(","); \ } \ print_as(fmt); \ has_warnings = TRUE; \ } while(0) int print_simple_status(xmlNode *cib) { node_t *dc = NULL; int nodes_online = 0; int nodes_standby = 0; pe_working_set_t data_set; set_working_set_defaults(&data_set); data_set.input = cib; cluster_status(&data_set); dc = data_set.dc_node; if(dc == NULL) { mon_warn("No DC "); } slist_iter(node, node_t, data_set.nodes, lpc2, if(node->details->standby && node->details->online) { nodes_standby++; } else if(node->details->online) { nodes_online++; } else { mon_warn("offline node: %s", node->details->uname); } ); if (!has_warnings) { print_as("Ok: %d nodes online", nodes_online); if (nodes_standby > 0) { print_as(", %d standby nodes", nodes_standby); } print_as(", %d resources configured", g_list_length(data_set.resources)); } print_as("\n"); data_set.input = NULL; cleanup_calculations(&data_set); return 0; } extern int get_failcount(node_t *node, resource_t *rsc, int *last_failure, pe_working_set_t *data_set); static void get_ping_score(node_t *node, pe_working_set_t *data_set) { const char *attr = "pingd"; const char *value = NULL; value = g_hash_table_lookup(node->details->attrs, attr); if(value != NULL) { print_as(" %s=%s", attr, value); } } static void print_date(time_t time) { int lpc = 0; char date_str[26]; asctime_r(localtime(&time), date_str); for(; lpc < 26; lpc++) { if(date_str[lpc] == '\n') { date_str[lpc] = 0; } } print_as("'%s'", date_str); } static void print_rsc_summary(pe_working_set_t *data_set, node_t *node, resource_t *rsc, gboolean all) { gboolean printed = FALSE; time_t last_failure = 0; int failcount = get_failcount(node, rsc, (int*)&last_failure, data_set); if(all || failcount || last_failure > 0) { printed = TRUE; print_as(" %s: migration-threshold=%d", rsc->id, rsc->migration_threshold); } if(failcount > 0) { printed = TRUE; print_as(" fail-count=%d", failcount); } if(last_failure > 0) { printed = TRUE; print_as(" last-failure="); print_date(last_failure); } if(printed) { print_as("\n"); } } static void print_rsc_history(pe_working_set_t *data_set, node_t *node, xmlNode *rsc_entry) { GListPtr op_list = NULL; gboolean print_name = TRUE; GListPtr sorted_op_list = NULL; const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID); resource_t *rsc = pe_find_resource(data_set->resources, rsc_id); xml_child_iter_filter( rsc_entry, rsc_op, XML_LRM_TAG_RSC_OP, op_list = g_list_append(op_list, rsc_op); ); sorted_op_list = g_list_sort(op_list, sort_op_by_callid); slist_iter(xml_op, xmlNode, sorted_op_list, lpc, const char *value = NULL; const char *call = crm_element_value(xml_op, XML_LRM_ATTR_CALLID); const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); const char *op_rc = crm_element_value(xml_op, XML_LRM_ATTR_RC); const char *interval = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL); int rc = crm_parse_int(op_rc, "0"); if(safe_str_eq(task, CRMD_ACTION_STATUS) && safe_str_eq(interval, "0")) { task = "probe"; } if(rc == 7 && safe_str_eq(task, "probe")) { continue; } else if(safe_str_eq(task, CRMD_ACTION_NOTIFY)) { continue; } if(print_name) { print_name = FALSE; if(rsc == NULL) { print_as("Orphan resource: %s", rsc_id); } else { print_rsc_summary(data_set, node, rsc, TRUE); } } print_as(" + (%s) %s:", call, task); if(safe_str_neq(interval, "0")) { print_as(" interval=%sms", interval); } if(print_timing) { int int_value; const char *attr = "last-rc-change"; value = crm_element_value(xml_op, attr); if(value) { int_value = crm_parse_int(value, NULL); print_as(" %s=", attr); print_date(int_value); } attr = "last-run"; value = crm_element_value(xml_op, attr); if(value) { int_value = crm_parse_int(value, NULL); print_as(" %s=", attr); print_date(int_value); } attr = "exec-time"; value = crm_element_value(xml_op, attr); if(value) { int_value = crm_parse_int(value, NULL); print_as(" %s=%dms", attr, int_value); } attr = "queue-time"; value = crm_element_value(xml_op, attr); if(value) { int_value = crm_parse_int(value, NULL); print_as(" %s=%dms", attr, int_value); } } print_as(" rc=%s (%s)\n", op_rc, execra_code2string(rc)); ); /* no need to free the contents */ g_list_free(sorted_op_list); } static void print_node_summary(pe_working_set_t *data_set, gboolean operations) { xmlNode *lrm_rsc = NULL; xmlNode *cib_status = get_object_root(XML_CIB_TAG_STATUS, data_set->input); if(operations) { print_as("\nOperations:\n"); } else { print_as("\nMigration summary:\n"); } xml_child_iter_filter( cib_status, node_state, XML_CIB_TAG_STATE, node_t *node = pe_find_node_id(data_set->nodes, ID(node_state)); if(node == NULL || node->details->online == FALSE){ continue; } print_as("* Node %s: ", crm_element_value(node_state, XML_ATTR_UNAME)); get_ping_score(node, data_set); print_as("\n"); lrm_rsc = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE); lrm_rsc = find_xml_node(lrm_rsc, XML_LRM_TAG_RESOURCES, FALSE); xml_child_iter_filter( lrm_rsc, rsc_entry, XML_LRM_TAG_RESOURCE, if(operations) { print_rsc_history(data_set, node, rsc_entry); } else { const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID); resource_t *rsc = pe_find_resource(data_set->resources, rsc_id); print_rsc_summary(data_set, node, rsc, FALSE); } ); ); } int print_status(xmlNode *cib) { node_t *dc = NULL; static int updates = 0; pe_working_set_t data_set; char *since_epoch = NULL; time_t a_time = time(NULL); int configured_resources = 0; int print_opts = pe_print_ncurses; if(as_console) { blank_screen(); } else { print_opts = pe_print_printf; } updates++; set_working_set_defaults(&data_set); data_set.input = cib; cluster_status(&data_set); dc = data_set.dc_node; print_as("\n\n============\n"); if(a_time == (time_t)-1) { cl_perror("set_node_tstamp(): Invalid time returned"); return 1; } since_epoch = ctime(&a_time); if(since_epoch != NULL) { print_as("Last updated: %s", since_epoch); } if(dc == NULL) { print_as("Current DC: NONE\n"); } else { print_as("Current DC: %s (%s)\n", dc->details->uname, dc->details->id); } slist_iter(rsc, resource_t, data_set.resources, lpc, if(is_not_set(rsc->flags, pe_rsc_orphan)) { configured_resources++; } ); print_as("%d Nodes configured.\n", g_list_length(data_set.nodes)); print_as("%d Resources configured.\n", configured_resources); print_as("============\n\n"); slist_iter(node, node_t, data_set.nodes, lpc2, const char *node_mode = "OFFLINE"; if(node->details->pending) { node_mode = "pending"; + } else if(node->details->standby_onfail && node->details->online) { + node_mode = "standby (on-fail)"; + } else if(node->details->standby && node->details->online) { node_mode = "standby"; } else if(node->details->standby) { node_mode = "OFFLINE (standby)"; } else if(node->details->online) { node_mode = "online"; } print_as("Node: %s (%s): %s\n", node->details->uname, node->details->id, node_mode); if(group_by_node) { slist_iter(rsc, resource_t, node->details->running_rsc, lpc2, rsc->fns->print( rsc, "\t", print_opts|pe_print_rsconly, stdout); ); } ); if(group_by_node == FALSE && inactive_resources) { print_as("\nFull list of resources:\n"); } else if(inactive_resources) { print_as("\nInactive resources:\n"); } if(group_by_node == FALSE || inactive_resources) { print_as("\n"); slist_iter(rsc, resource_t, data_set.resources, lpc2, gboolean is_active = rsc->fns->active(rsc, TRUE); gboolean partially_active = rsc->fns->active(rsc, FALSE); if(is_set(rsc->flags, pe_rsc_orphan) && is_active == FALSE) { continue; } else if(group_by_node == FALSE) { if(partially_active || inactive_resources) { rsc->fns->print(rsc, NULL, print_opts, stdout); } } else if(is_active == FALSE && inactive_resources) { rsc->fns->print(rsc, NULL, print_opts, stdout); } ); } if(print_operations || print_failcount) { print_node_summary(&data_set, print_operations); } if(xml_has_children(data_set.failed)) { print_as("\nFailed actions:\n"); xml_child_iter(data_set.failed, xml_op, const char *id = ID(xml_op); const char *last = crm_element_value(xml_op, "last_run"); const char *node = crm_element_value(xml_op, XML_ATTR_UNAME); const char *rc_s = crm_element_value(xml_op, XML_LRM_ATTR_RC); const char *call = crm_element_value(xml_op, XML_LRM_ATTR_CALLID); const char *status = crm_element_value(xml_op, XML_LRM_ATTR_OPSTATUS); int rc = crm_parse_int(rc_s, "0"); print_as(" %s (node=%s, call=%s, rc=%d, status=%s", id, node, call, rc, status); if(last) { time_t run_at = crm_parse_int(last, "0"); print_as(", last-run=%s, queued=%sms, exec=%sms\n", ctime(&run_at), crm_element_value(xml_op, "exec_time"), crm_element_value(xml_op, "queue_time")); } print_as("): %s\n", execra_code2string(rc)); ); } #if CURSES_ENABLED if(as_console) { refresh(); } #endif data_set.input = NULL; cleanup_calculations(&data_set); return 0; } int print_html_status(xmlNode *cib, const char *filename, gboolean web_cgi) { FILE *stream; node_t *dc = NULL; static int updates = 0; pe_working_set_t data_set; char *filename_tmp = NULL; if (web_cgi) { stream=stdout; fprintf(stream, "Content-type: text/html\n\n"); } else { filename_tmp = crm_concat(filename, "tmp", '.'); stream = fopen(filename_tmp, "w"); if(stream == NULL) { cl_perror("Cannot open %s for writing", filename_tmp); crm_free(filename_tmp); return -1; } } updates++; set_working_set_defaults(&data_set); data_set.input = cib; cluster_status(&data_set); dc = data_set.dc_node; fprintf(stream, ""); fprintf(stream, ""); fprintf(stream, "Cluster status"); /* content="%d;url=http://webdesign.about.com" */ fprintf(stream, "", interval); fprintf(stream, ""); /*** SUMMARY ***/ fprintf(stream, "

Cluster summary

"); { char *now_str = NULL; time_t now = time(NULL); now_str = ctime(&now); now_str[24] = EOS; /* replace the newline */ fprintf(stream, "Last updated: %s
\n", now_str); } if(dc == NULL) { fprintf(stream, "Current DC: NONE
"); } else { fprintf(stream, "Current DC: %s (%s)
", dc->details->uname, dc->details->id); } fprintf(stream, "%d Nodes configured.
", g_list_length(data_set.nodes)); fprintf(stream, "%d Resources configured.
", g_list_length(data_set.resources)); /*** CONFIG ***/ fprintf(stream, "

Config Options

\n"); fprintf(stream, "\n"); fprintf(stream, "\n", data_set.default_resource_stickiness); fprintf(stream, "\n", is_set(data_set.flags, pe_flag_symmetric_cluster)?"":"a-"); fprintf(stream, "\n
Default resource stickiness:%d
STONITH of failed nodes:%sCluster is:%ssymmetric
No Quorum Policy:"); switch (data_set.no_quorum_policy) { case no_quorum_freeze: fprintf(stream, "Freeze resources"); break; case no_quorum_stop: fprintf(stream, "Stop ALL resources"); break; case no_quorum_ignore: fprintf(stream, "Ignore"); break; case no_quorum_suicide: fprintf(stream, "Suicide"); break; } fprintf(stream, "\n
\n"); /*** NODE LIST ***/ fprintf(stream, "

Node List

\n"); fprintf(stream, "
    \n"); slist_iter(node, node_t, data_set.nodes, lpc2, fprintf(stream, "
  • "); - if(node->details->standby && node->details->online) { + if(node->details->standby_onfail && node->details->online) { + fprintf(stream, "Node: %s (%s): %s",node->details->uname, node->details->id,"standby (on-fail)\n"); + } else if(node->details->standby && node->details->online) { fprintf(stream, "Node: %s (%s): %s",node->details->uname, node->details->id,"standby\n"); } else if(node->details->standby) { fprintf(stream, "Node: %s (%s): %s",node->details->uname, node->details->id,"OFFLINE (standby)\n"); } else if(node->details->online) { fprintf(stream, "Node: %s (%s): %s",node->details->uname, node->details->id,"online\n"); } else { fprintf(stream, "Node: %s (%s): %s",node->details->uname, node->details->id,"OFFLINE\n"); } if(group_by_node) { fprintf(stream, "
      \n"); slist_iter(rsc, resource_t, node->details->running_rsc, lpc2, fprintf(stream, "
    • "); rsc->fns->print(rsc, NULL, pe_print_html|pe_print_rsconly, stream); fprintf(stream, "
    • \n"); ); fprintf(stream, "
    \n"); } fprintf(stream, "
  • \n"); ); fprintf(stream, "
\n"); if(group_by_node && inactive_resources) { fprintf(stream, "

(Partially) Inactive Resources

\n"); } else if(group_by_node == FALSE) { fprintf(stream, "

Resource List

\n"); } if(group_by_node == FALSE || inactive_resources) { slist_iter(rsc, resource_t, data_set.resources, lpc2, if(group_by_node && rsc->fns->active(rsc, TRUE)) { continue; } rsc->fns->print(rsc, NULL, pe_print_html, stream); ); } data_set.input = NULL; cleanup_calculations(&data_set); fprintf(stream, ""); fflush(stream); fclose(stream); if (!web_cgi) { if(rename(filename_tmp, filename) != 0) { cl_perror("Unable to rename %s->%s", filename_tmp, filename); } crm_free(filename_tmp); } return 0; } void blank_screen(void) { #if CURSES_ENABLED int lpc = 0; for(lpc = 0; lpc < LINES; lpc++) { move(lpc, 0); clrtoeol(); } move(0, 0); refresh(); #endif } void usage(const char *cmd, int exit_status) { FILE *stream; stream = exit_status ? stderr : stdout; fprintf(stream, "%s -- Provides a summary of cluster's current state.\n" " Outputs varying levels of detail in a number of different formats.\n\n", cmd); fprintf(stream, "usage: %s [-%s]\n", cmd, OPTARGS); fprintf(stream, "General options:\n"); fprintf(stream, "\t--%s (-%c) \t: This text\n", "help", '?'); fprintf(stream, "\t--%s (-%c) \t: Increase the debug output\n", "verbose", 'V'); fprintf(stream, "\t--%s (-%c) \t: Update frequency\n", "interval", 'i'); fprintf(stream, "\t--%s (-%c) \t: Daemon pid file location\n", "pid-file", 'p'); fprintf(stream, "Output detail options:\n"); fprintf(stream, "\t--%s (-%c) \t: Group resources by node\n", "group-by-node", 'n'); fprintf(stream, "\t--%s (-%c) \t: Display inactive resources\n", "inactive", 'r'); fprintf(stream, "\t--%s (-%c) \t: Display resource fail counts\n", "failcount", 'f'); fprintf(stream, "\t--%s (-%c) \t: Display resource operation history\n", "operations", 'o'); fprintf(stream, "\t--%s (-%c) \t: Display cluster status on the console\n", "as-console", 'c'); fprintf(stream, "Output destination options:\n"); fprintf(stream, "\t--%s (-%c) \t: Display the cluster status once as " "a simple one line output (suitable for nagios)\n", "simple-status", 's'); fprintf(stream, "\t--%s (-%c) \t: Display the cluster status once on " "the console and exit (doesnt use ncurses)\n", "one-shot", '1'); fprintf(stream, "\t--%s (-%c) \t: Write cluster status to the named file\n", "as-html", 'h'); fprintf(stream, "\t--%s (-%c) \t: Web mode with output suitable for cgi\n", "web-cgi", 'w'); fprintf(stream, "\t--%s (-%c) \t: Run in the background as a daemon\n", "daemonize", 'd'); fflush(stream); clean_up(exit_status); } void make_daemon(gboolean daemonize, const char *pidfile) { long pid; const char *devnull = "/dev/null"; if (daemonize == FALSE){ return; } pid = fork(); if (pid < 0) { fprintf(stderr, "%s: could not start daemon\n", crm_system_name); perror("fork"); clean_up(LSB_EXIT_GENERIC); } else if (pid > 0) { clean_up(LSB_EXIT_OK); } if (cl_lock_pidfile(pidfile) < 0 ){ pid = cl_read_pidfile(pidfile); fprintf(stderr, "%s: already running [pid %ld].\n", crm_system_name, pid); clean_up(LSB_EXIT_OK); } umask(022); close(0); close(1); close(2); (void)open(devnull, O_RDONLY); /* Stdin: fd 0 */ (void)open(devnull, O_WRONLY); /* Stdout: fd 1 */ (void)open(devnull, O_WRONLY); /* Stderr: fd 2 */ } /* * De-init ncurses, signoff from the CIB and deallocate memory. */ void clean_up(int rc) { #if CURSES_ENABLED if(as_console) { as_console = FALSE; echo(); nocbreak(); endwin(); } #endif if (cib_conn != NULL) { cib_conn->cmds->signoff(cib_conn); cib_delete(cib_conn); cib_conn = NULL; } crm_free(as_html_file); crm_free(xml_file); crm_free(pid_file); if(rc >= 0) { exit(rc); } return; }