diff --git a/crm/pengine/pengine.h b/crm/pengine/pengine.h index 71fe22f2ad..9580e67736 100644 --- a/crm/pengine/pengine.h +++ b/crm/pengine/pengine.h @@ -1,389 +1,389 @@ -/* $Id: pengine.h,v 1.70 2005/06/16 12:36:20 andrew Exp $ */ +/* $Id: pengine.h,v 1.71 2005/06/27 11:13:05 andrew Exp $ */ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef PENGINE__H #define PENGINE__H #include typedef struct node_s node_t; typedef struct color_s color_t; typedef struct rsc_to_node_s rsc_to_node_t; typedef struct rsc_colocation_s rsc_colocation_t; typedef struct resource_s resource_t; typedef struct lrm_agent_s lrm_agent_t; typedef struct order_constraint_s order_constraint_t; typedef struct action_s action_t; typedef struct action_wrapper_s action_wrapper_t; #include #include #include typedef enum no_quorum_policy_e { no_quorum_freeze, no_quorum_stop, no_quorum_ignore } no_quorum_policy_t; typedef struct pe_working_set_s { crm_data_t *input; /* options extracted from the input */ char *dc_uuid; gboolean have_quorum; gboolean stonith_enabled; gboolean symmetric_cluster; int default_resource_stickiness; no_quorum_policy_t no_quorum_policy; /* intermediate steps */ color_t *no_color; GListPtr nodes; GListPtr resources; GListPtr placement_constraints; GListPtr ordering_constraints; GListPtr colors; GListPtr actions; /* stats */ int num_synapse; int max_valid_nodes; int order_id; int action_id; int color_id; /* final output */ crm_data_t *graph; } pe_working_set_t; #include enum con_type { type_none, rsc_colocation, rsc_to_node, rsc_to_attr, base_weight }; enum node_type { node_ping, node_member }; enum con_strength { pecs_ignore, pecs_must, pecs_must_not, pecs_startstop }; enum action_tasks { no_action, monitor_rsc, stop_rsc, stopped_rsc, start_rsc, started_rsc, shutdown_crm, stonith_node }; enum rsc_recovery_type { recovery_stop_start, recovery_stop_only, recovery_block }; enum rsc_start_requirement { rsc_req_nothing, rsc_req_quorum, rsc_req_stonith }; enum pe_stop_fail { pesf_block, pesf_stonith, pesf_ignore }; enum pe_restart { pe_restart_restart, pe_restart_ignore }; enum pe_ordering { pe_ordering_manditory, pe_ordering_restart, pe_ordering_recover, pe_ordering_optional }; struct node_shared_s { const char *id; const char *uname; gboolean online; gboolean unclean; gboolean shutdown; gboolean expected_up; gboolean is_dc; int num_resources; GListPtr running_rsc; /* resource_t* */ GHashTable *attrs; /* char* => char* */ enum node_type type; }; struct node_s { float weight; gboolean fixed; struct node_shared_s *details; }; struct color_shared_s { int id; float highest_priority; GListPtr candidate_nodes; /* node_t* */ GListPtr allocated_resources; /* resources_t* */ node_t *chosen_node; gboolean pending; int num_resources; }; struct color_s { int id; struct color_shared_s *details; float local_weight; }; struct rsc_colocation_s { const char *id; resource_t *rsc_lh; resource_t *rsc_rh; enum con_strength strength; }; struct rsc_to_node_s { const char *id; resource_t *rsc_lh; float weight; GListPtr node_list_rh; /* node_t* */ }; struct lrm_agent_s { const char *class; const char *type; const char *version; }; struct resource_s { const char *id; crm_data_t *xml; crm_data_t *ops_xml; void *variant_opaque; enum pe_obj_types variant; resource_object_functions_t *fns; enum rsc_recovery_type recovery_type; enum pe_restart restart_type; float priority; float effective_priority; gboolean start_pending; gboolean recover; gboolean starting; gboolean stopping; gboolean is_stonith; gboolean runnable; gboolean provisional; gboolean unclean; GListPtr candidate_colors; /* color_t* */ GListPtr rsc_cons; /* rsc_colocation_t* */ GListPtr actions; /* action_t* */ GHashTable * parameters; }; struct action_wrapper_s { enum pe_ordering type; action_t *action; }; enum action_fail_response { action_fail_nothing, action_fail_block, action_fail_stop, action_fail_fence }; struct action_s { int id; resource_t *rsc; void *rsc_opaque; node_t *node; const char *task; char *uuid; crm_data_t *op_entry; gboolean pseudo; gboolean runnable; gboolean optional; gboolean failure_is_fatal; enum rsc_start_requirement needs; enum action_fail_response on_fail; gboolean dumped; gboolean processed; int seen_count; /* crm_data_t *args; */ GHashTable *extra; GListPtr actions_before; /* action_warpper_t* */ GListPtr actions_after; /* action_warpper_t* */ }; struct order_constraint_s { int id; enum pe_ordering type; void *lh_opaque; resource_t *lh_rsc; action_t *lh_action; char *lh_action_task; void *rh_opaque; resource_t *rh_rsc; action_t *rh_action; char *rh_action_task; /* (soon to be) variant specific */ /* int lh_rsc_incarnation; */ /* int rh_rsc_incarnation; */ }; extern gboolean stage0(pe_working_set_t *data_set); extern gboolean stage1(pe_working_set_t *data_set); extern gboolean stage2(pe_working_set_t *data_set); extern gboolean stage3(pe_working_set_t *data_set); extern gboolean stage4(pe_working_set_t *data_set); extern gboolean stage5(pe_working_set_t *data_set); extern gboolean stage6(pe_working_set_t *data_set); extern gboolean stage7(pe_working_set_t *data_set); extern gboolean stage8(pe_working_set_t *data_set); extern gboolean summary(GListPtr resources); extern gboolean pe_msg_dispatch(IPC_Channel *sender, void *user_data); extern gboolean process_pe_message( HA_Message *msg, crm_data_t *xml_data, IPC_Channel *sender); extern gboolean unpack_constraints( crm_data_t *xml_constraints, pe_working_set_t *data_set); extern gboolean unpack_resources( crm_data_t *xml_resources, pe_working_set_t *data_set); extern gboolean unpack_config(crm_data_t *config, pe_working_set_t *data_set); extern gboolean unpack_nodes(crm_data_t *xml_nodes, pe_working_set_t *data_set); extern gboolean unpack_status(crm_data_t *status, pe_working_set_t *data_set); extern gboolean apply_placement_constraints(pe_working_set_t *data_set); extern void color_resource(resource_t *lh_resource, pe_working_set_t *data_set); extern gboolean choose_node_from_list(color_t *color); extern gboolean update_action_states(GListPtr actions); extern gboolean shutdown_constraints( node_t *node, action_t *shutdown_op, pe_working_set_t *data_set); extern gboolean stonith_constraints( node_t *node, action_t *stonith_op, action_t *shutdown_op, pe_working_set_t *data_set); extern gboolean custom_action_order( resource_t *lh_rsc, char *lh_task, action_t *lh_action, resource_t *rh_rsc, char *rh_task, action_t *rh_action, enum pe_ordering type, pe_working_set_t *data_set); #define order_start_start(rsc1,rsc2, type) \ custom_action_order(rsc1, start_key(rsc1), NULL, \ rsc2, start_key(rsc2) ,NULL, \ type, data_set) #define order_stop_stop(rsc1, rsc2, type) \ custom_action_order(rsc1, stop_key(rsc1), NULL, \ rsc2, stop_key(rsc2) ,NULL, \ type, data_set) #define order_restart(rsc1) \ custom_action_order(rsc1, stop_key(rsc1), NULL, \ rsc1, start_key(rsc1), NULL, \ pe_ordering_restart, data_set) #define order_stop_start(rsc1, rsc2, type) \ custom_action_order(rsc1, stop_key(rsc1), NULL, \ rsc2, start_key(rsc2) ,NULL, \ type, data_set) #define order_start_stop(rsc1, rsc2, type) \ custom_action_order(rsc1, start_key(rsc1), NULL, \ rsc2, stop_key(rsc2) ,NULL, \ type, data_set) #define pe_err(fmt...) { was_processing_error = TRUE; crm_err(fmt); } #define pe_warn(fmt...) { was_processing_warning = TRUE; crm_warn(fmt); } #define check_and_exit(stage) cleanup_calculations(data_set); \ crm_mem_stats(NULL); \ crm_err("Exiting: stage %d", stage); \ exit(1); extern gboolean process_colored_constraints(resource_t *rsc); extern void graph_element_from_action( action_t *action, pe_working_set_t *data_set); extern void set_working_set_defaults(pe_working_set_t *data_set); extern void cleanup_calculations(pe_working_set_t *data_set); -extern const char* transition_timeout; +extern const char* transition_idle_timeout; extern gboolean was_processing_error; extern gboolean was_processing_warning; #endif diff --git a/crm/pengine/stages.c b/crm/pengine/stages.c index 457b2d0616..e58e1660f6 100644 --- a/crm/pengine/stages.c +++ b/crm/pengine/stages.c @@ -1,458 +1,458 @@ -/* $Id: stages.c,v 1.69 2005/06/16 12:36:22 andrew Exp $ */ +/* $Id: stages.c,v 1.70 2005/06/27 11:13:05 andrew Exp $ */ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include node_t *choose_fencer(action_t *stonith, node_t *node, GListPtr resources); void order_actions(action_t *lh, action_t *rh, order_constraint_t *order); -const char* transition_timeout = NULL; +const char* transition_idle_timeout = NULL; /* * Unpack everything * At the end you'll have: * - A list of nodes * - A list of resources (each with any dependencies on other resources) * - A list of constraints between resources and nodes * - A list of constraints between start/stop actions * - A list of nodes that need to be stonith'd * - A list of nodes that need to be shutdown * - A list of the possible stop/start actions (without dependencies) */ gboolean stage0(pe_working_set_t *data_set) { /* int lpc; */ crm_data_t * config = get_object_root( XML_CIB_TAG_CRMCONFIG, data_set->input); crm_data_t * cib_nodes = get_object_root( XML_CIB_TAG_NODES, data_set->input); crm_data_t * cib_resources = get_object_root( XML_CIB_TAG_RESOURCES, data_set->input); crm_data_t * cib_status = get_object_root( XML_CIB_TAG_STATUS, data_set->input); crm_data_t * cib_constraints = get_object_root( XML_CIB_TAG_CONSTRAINTS, data_set->input); const char *value = crm_element_value( data_set->input, XML_ATTR_HAVE_QUORUM); crm_debug_3("Beginning unpack"); /* reset remaining global variables */ - transition_timeout = "60s"; /* 1 minute */ + transition_idle_timeout = "60s"; /* 1 minute */ if(data_set->input == NULL) { return FALSE; } if(data_set->input != NULL && crm_element_value(data_set->input, XML_ATTR_DC_UUID) != NULL) { /* this should always be present */ data_set->dc_uuid = crm_element_value_copy( data_set->input, XML_ATTR_DC_UUID); } unpack_config(config, data_set); if(value != NULL) { crm_str_to_boolean(value, &data_set->have_quorum); } if(data_set->have_quorum == FALSE) { crm_warn("We do not have quorum" " - fencing and resource management disabled"); } unpack_nodes(cib_nodes, data_set); unpack_resources(cib_resources, data_set); unpack_status(cib_status, data_set); unpack_constraints(cib_constraints, data_set); return TRUE; } /* * Count how many valid nodes we have (so we know the maximum number of * colors we can resolve). * * Apply node constraints (ie. filter the "allowed_nodes" part of resources */ gboolean stage1(pe_working_set_t *data_set) { crm_debug_3("Applying placement constraints"); slist_iter( node, node_t, data_set->nodes, lpc, if(node == NULL) { /* error */ } else if(node->weight >= 0.0 /* global weight */ && node->details->online && node->details->type == node_member) { data_set->max_valid_nodes++; } ); apply_placement_constraints(data_set); return TRUE; } /* * Choose a color for all resources from highest priority and XML_STRENGTH_VAL_MUST * dependencies to lowest, creating new colors as necessary (returned * as "colors"). * * Some nodes may be colored as a "no_color" meaning that it was unresolvable * given the current node stati and constraints. */ gboolean stage2(pe_working_set_t *data_set) { crm_debug_3("Coloring resources"); crm_debug_5("create \"no color\""); data_set->no_color = create_color(data_set, NULL, NULL); /* Take (next) highest resource */ slist_iter( lh_resource, resource_t, data_set->resources, lpc, /* if resource.provisional == FALSE, repeat */ if(lh_resource->provisional == FALSE) { /* already processed this resource */ continue; } color_resource(lh_resource, data_set); /* next resource */ ); return TRUE; } /* * not sure if this is a good idea or not, but eventually we might like * to utilize as many nodes as possible... and this might be a convienient * hook */ gboolean stage3(pe_working_set_t *data_set) { /* not sure if this is a good idea or not */ if((ssize_t)g_list_length(data_set->colors) > data_set->max_valid_nodes) { /* we need to consolidate some */ } else if((ssize_t)g_list_length(data_set->colors) < data_set->max_valid_nodes) { /* we can create a few more */ } return TRUE; } /* * Choose a node for each (if possible) color */ gboolean stage4(pe_working_set_t *data_set) { crm_debug_3("Assigning nodes to colors"); slist_iter( color, color_t, data_set->colors, lpc, crm_debug_4("assigning node to color %d", color->id); if(color == NULL) { pe_err("NULL color detected"); continue; } else if(color->details->pending == FALSE) { continue; } choose_node_from_list(color); if(color->details->chosen_node == NULL) { crm_debug_2("No node available for color %d", color->id); } else { crm_debug_4("assigned %s to color %d", color->details->chosen_node->details->uname, color->id); } slist_iter( rsc, resource_t, color->details->allocated_resources, lpc2, slist_iter( constraint, rsc_colocation_t, rsc->rsc_cons, lpc, rsc->fns->rsc_colocation_lh(constraint); ); ); ); crm_debug_3("done"); return TRUE; } /* * Attach nodes to the actions that need to be taken * * Mark actions XML_LRM_ATTR_OPTIONAL if possible (Ie. if the start and stop are * for the same node) * * Mark unrunnable actions */ gboolean stage5(pe_working_set_t *data_set) { crm_debug_3("Creating actions and internal ording constraints"); slist_iter( rsc, resource_t, data_set->resources, lpc, rsc->fns->create_actions(rsc, data_set); rsc->fns->internal_constraints(rsc, data_set); ); return TRUE; } /* * Create dependacies for stonith and shutdown operations */ gboolean stage6(pe_working_set_t *data_set) { action_t *down_op = NULL; action_t *stonith_op = NULL; crm_debug_3("Processing fencing and shutdown cases"); slist_iter( node, node_t, data_set->nodes, lpc, if(node->details->online && node->details->shutdown) { crm_info("Scheduling Node %s for shutdown", node->details->uname); down_op = custom_action( NULL, crm_strdup(CRM_OP_SHUTDOWN), CRM_OP_SHUTDOWN, node, FALSE, data_set); down_op->runnable = TRUE; shutdown_constraints( node, down_op, data_set); } if(node->details->unclean && data_set->stonith_enabled == FALSE) { pe_err("Node %s is unclean!", node->details->uname); pe_warn("YOUR RESOURCES ARE NOW LIKELY COMPROMISED"); pe_warn("ENABLE STONITH TO KEEP YOUR RESOURCES SAFE"); } else if(node->details->unclean && data_set->stonith_enabled && (data_set->have_quorum || data_set->no_quorum_policy == no_quorum_ignore)) { pe_warn("Scheduling Node %s for STONITH", node->details->uname); stonith_op = custom_action( NULL, crm_strdup(CRM_OP_FENCE), CRM_OP_FENCE, node, FALSE, data_set); stonith_op->runnable = TRUE; add_hash_param( stonith_op->extra, XML_LRM_ATTR_TARGET, node->details->uname); add_hash_param( stonith_op->extra, XML_LRM_ATTR_TARGET_UUID, node->details->id); if(down_op != NULL) { down_op->failure_is_fatal = FALSE; } } if(node->details->unclean) { stonith_constraints( node, stonith_op, down_op, data_set); } ); return TRUE; } /* * Determin the sets of independant actions and the correct order for the * actions in each set. * * Mark dependencies of un-runnable actions un-runnable * */ gboolean stage7(pe_working_set_t *data_set) { crm_debug_3("Applying ordering constraints"); slist_iter( order, order_constraint_t, data_set->ordering_constraints, lpc, /* try rsc_action-to-rsc_action */ resource_t *rsc = order->lh_rsc; if(rsc == NULL && order->lh_action) { rsc = order->lh_action->rsc; } if(rsc != NULL) { rsc->fns->rsc_order_lh(rsc, order); continue; } /* try action-to-rsc_action */ /* que off the rh resource */ rsc = order->rh_rsc; if(rsc == NULL && order->rh_action) { rsc = order->rh_action->rsc; } if(rsc != NULL) { rsc->fns->rsc_order_rh(order->lh_action, rsc, order); } else { /* fall back to action-to-action */ order_actions( order->lh_action, order->rh_action, order); } ); update_action_states(data_set->actions); return TRUE; } static int transition_id = -1; /* * Create a dependency graph to send to the transitioner (via the CRMd) */ gboolean stage8(pe_working_set_t *data_set) { char *transition_id_s = NULL; transition_id++; transition_id_s = crm_itoa(transition_id); crm_info("Creating transition graph %d.", transition_id); data_set->graph = create_xml_node(NULL, XML_TAG_GRAPH); - crm_xml_add(data_set->graph, "global_timeout", transition_timeout); + crm_xml_add(data_set->graph, "global_timeout", transition_idle_timeout); crm_xml_add(data_set->graph, "transition_id", transition_id_s); crm_free(transition_id_s); /* errors... slist_iter(action, action_t, action_list, lpc, if(action->optional == FALSE && action->runnable == FALSE) { print_action("Ignoring", action, TRUE); } ); */ slist_iter( rsc, resource_t, data_set->resources, lpc, crm_debug_4("processing actions for rsc=%s", rsc->id); rsc->fns->expand(rsc, data_set); ); crm_log_xml_debug_3( data_set->graph, "created resource-driven action list"); /* catch any non-resource specific actions */ crm_debug_4("processing non-resource actions"); slist_iter( action, action_t, data_set->actions, lpc, graph_element_from_action(action, data_set); ); crm_log_xml_debug_3(data_set->graph, "created generic action list"); return TRUE; } gboolean choose_node_from_list(color_t *color) { /* 1. Sort by weight 2. color.chosen_node = the node (of those with the highest wieght) with the fewest resources 3. remove color.chosen_node from all other colors */ GListPtr nodes = color->details->candidate_nodes; node_t *chosen = NULL; crm_debug_4("Choosing node for color %d", color->id); color->details->candidate_nodes = g_list_sort(nodes, sort_node_weight); chosen = g_list_nth_data(color->details->candidate_nodes, 0); color->details->chosen_node = NULL; color->details->pending = FALSE; if(chosen == NULL) { crm_debug_2("Could not allocate a node for color %d", color->id); return FALSE; } else if(chosen->details->unclean || chosen->details->shutdown) { crm_debug_2("Even highest ranked node for color %d" " is unclean or shutting down", color->id); return FALSE; } else if(chosen->weight < 0) { crm_debug_2("Even highest ranked node for color %d, had weight %f", color->id, chosen->weight); return FALSE; } /* todo: update the old node for each resource to reflect its * new resource count */ chosen->details->num_resources += color->details->num_resources; color->details->chosen_node = node_copy(chosen); return TRUE; } diff --git a/crm/tengine/tengine.c b/crm/tengine/tengine.c index befd10b7c2..33a38e8da3 100644 --- a/crm/tengine/tengine.c +++ b/crm/tengine/tengine.c @@ -1,1004 +1,1004 @@ -/* $Id: tengine.c,v 1.83 2005/06/16 12:42:54 andrew Exp $ */ +/* $Id: tengine.c,v 1.84 2005/06/27 11:13:05 andrew Exp $ */ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include gboolean graph_complete = FALSE; GListPtr graph = NULL; IPC_Channel *crm_ch = NULL; -uint transition_timeout = 30*1000; /* 30 seconds */ -uint default_transition_timeout = 30*1000; /* 30 seconds */ -uint next_transition_timeout = 30*1000; /* 30 seconds */ +uint transition_idle_timeout = 30*1000; /* 30 seconds */ +uint default_transition_idle_timeout = 30*1000; /* 30 seconds */ +uint next_transition_idle_timeout = 30*1000; /* 30 seconds */ void fire_synapse(synapse_t *synapse); gboolean initiate_action(action_t *action); gboolean confirm_synapse(synapse_t *synapse, int action_id); void check_synapse_triggers(synapse_t *synapse, int action_id); void cib_action_updated( const HA_Message *msg, int call_id, int rc, crm_data_t *output, void *user_data); te_timer_t *transition_timer = NULL; int transition_counter = 1; const te_fsa_state_t te_state_matrix[i_invalid][s_invalid] = { /* s_idle, s_in_transition, s_abort_pending */ /* Got an i_transition */{ s_in_transition, s_abort_pending, s_abort_pending }, /* Got an i_cancel */{ s_idle, s_abort_pending, s_abort_pending }, /* Got an i_complete */{ s_idle, s_idle, s_abort_pending }, /* Got an i_cib_complete*/{ s_idle, s_in_transition, s_idle }, /* Got an i_cib_confirm */{ s_idle, s_in_transition, s_abort_pending }, /* Got an i_cib_notify */{ s_idle, s_in_transition, s_abort_pending } }; te_fsa_state_t te_fsa_state = s_idle; gboolean initialize_graph(void) { remove_cib_op_callback(-1, TRUE); if(transition_timer == NULL) { crm_malloc0(transition_timer, sizeof(te_timer_t)); transition_timer->timeout = 10; transition_timer->source_id = -1; transition_timer->reason = timeout_timeout; transition_timer->action = NULL; } else { stop_te_timer(transition_timer); } while(g_list_length(graph) > 0) { synapse_t *synapse = g_list_nth_data(graph, 0); while(g_list_length(synapse->actions) > 0) { action_t *action = g_list_nth_data(synapse->actions,0); synapse->actions = g_list_remove( synapse->actions, action); if(action->timer->source_id > 0) { crm_debug_3("Removing timer for action: %d", action->id); g_source_remove(action->timer->source_id); } free_xml(action->xml); crm_free(action->timer); crm_free(action); } while(g_list_length(synapse->inputs) > 0) { action_t *action = g_list_nth_data(synapse->inputs, 0); synapse->inputs = g_list_remove(synapse->inputs, action); free_xml(action->xml); crm_free(action); } graph = g_list_remove(graph, synapse); crm_free(synapse); } graph = NULL; return TRUE; } /* * returns the ID of the action if a match is found * returns -1 if a match was not found * returns -2 if a match was found but the action failed (and was * not allowed to) */ int match_graph_event(action_t *action, crm_data_t *event, const char *event_node) { const char *allow_fail = NULL; const char *this_action = NULL; const char *this_node = NULL; const char *this_uname = NULL; const char *this_rsc = NULL; const char *magic = NULL; const char *transition = NULL; char *this_event; const char *op_status; const char *update_event; action_t *match = NULL; int op_status_i = -3; int transition_i = -1; if(event == NULL) { crm_debug_4("Ignoring NULL event"); return -1; } this_rsc = crm_element_value(action->xml, XML_LRM_ATTR_RSCID); if(this_rsc == NULL) { crm_debug_4("Skipping non-resource event"); return -1; } update_event = crm_element_value(event, XML_ATTR_ID); op_status = crm_element_value(event, XML_LRM_ATTR_OPSTATUS); magic = crm_element_value(event, "transition_magic"); if(magic == NULL) { crm_debug_4("Skipping \"non-change\""); return -3; } this_action = crm_element_value(action->xml, XML_LRM_ATTR_TASK); this_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID); this_uname = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); this_event = generate_op_key(this_rsc, this_action, action->interval); if(safe_str_neq(this_event, update_event)) { crm_debug_2("Action %d : Event mismatch %s vs. %s", action->id, this_event, update_event); } else if(safe_str_neq(this_node, event_node)) { crm_debug_2("Action %d : Node mismatch %s (%s) vs. %s", action->id, this_node, this_uname, event_node); } else { match = action; } crm_free(this_event); if(match == NULL) { return -1; } crm_debug("Matched action %d", action->id); if(transition != NULL) { transition_i = atoi(transition); } if(op_status != NULL) { op_status_i = atoi(op_status); } if(op_status == NULL || transition == NULL) { char *alt_status = NULL; char *alt_transition = NULL; decodeNVpair(magic, ':', &alt_transition, &alt_status); if(op_status == NULL && alt_status != NULL) { op_status_i = atoi(alt_status); } else if(op_status == NULL) { crm_err("Status details not found"); crm_log_message(LOG_ERR, event); } if(transition == NULL && alt_transition != NULL) { transition_i = atoi(alt_transition); } else if(transition == NULL) { crm_err("Transition details not found"); crm_log_message(LOG_ERR, event); } } if(transition_i == -1) { /* we never expect these - recompute */ crm_err("Detected an action initiated outside of a transition"); return -1; } else if(transition_counter != transition_i) { crm_warn("Detected an action from a different transition:" " %d vs. %d", transition_i, transition_counter); return -3; } /* stop this event's timer if it had one */ stop_te_timer(match->timer); /* Process OP status */ allow_fail = crm_element_value(match->xml, "allow_fail"); switch(op_status_i) { case -3: crm_err("Action returned the same as last time..." " whatever that was!"); crm_log_message(LOG_ERR, event); break; case LRM_OP_PENDING: crm_debug("Ignoring pending operation"); return -4; break; case LRM_OP_DONE: break; case LRM_OP_ERROR: case LRM_OP_TIMEOUT: case LRM_OP_NOTSUPPORTED: crm_warn("Action %s on %s failed: %s", update_event, event_node, op_status2text(op_status_i)); if(FALSE == crm_is_true(allow_fail)) { send_complete("Action failed", event, te_failed, i_cancel); return -2; } break; case LRM_OP_CANCELLED: /* do nothing?? */ crm_err("Dont know what to do for cancelled ops yet"); break; default: crm_err("Unsupported action result: %d", op_status_i); send_complete("Unsupport action result", event, te_failed, i_cancel); return -2; } te_log_action(LOG_INFO, "Action %d confirmed", match->id); match->complete = TRUE; return match->id; } int match_down_event(const char *target, const char *filter, int rc) { const char *allow_fail = NULL; const char *this_action = NULL; const char *this_node = NULL; action_t *match = NULL; slist_iter( synapse, synapse_t, graph, lpc, /* lookup event */ slist_iter( action, action_t, synapse->actions, lpc2, crm_data_t *action_args = NULL; if(action->type != action_type_crm) { continue; } this_action = crm_element_value( action->xml, XML_LRM_ATTR_TASK); /* if(crm_element_value(action->xml, XML_LRM_ATTR_RSCID)) { */ /* continue; */ /* } else */ if(filter != NULL && safe_str_neq(this_action, filter)) { continue; } if(safe_str_eq(this_action, CRM_OP_FENCE)) { action_args = find_xml_node( action->xml, XML_TAG_ATTRS, TRUE); this_node = crm_element_value( action_args, XML_LRM_ATTR_TARGET_UUID); } else if(safe_str_eq(this_action, CRM_OP_SHUTDOWN)) { this_node = crm_element_value( action->xml, XML_LRM_ATTR_TARGET_UUID); } else { crm_err("Action %d : Bad action %s", action->id, this_action); continue; } if(safe_str_neq(this_node, target)) { crm_debug("Action %d : Node mismatch: %s", action->id, this_node); continue; } match = action; ); if(match != NULL) { break; } ); if(match == NULL) { crm_debug_3("didnt match current action"); return -1; } crm_debug_3("matched"); /* stop this event's timer if it had one */ stop_te_timer(match->timer); /* Process OP status */ switch(rc) { case STONITH_SUCCEEDED: break; case STONITH_CANNOT: case STONITH_TIMEOUT: case STONITH_GENERIC: allow_fail = crm_element_value(match->xml, "allow_fail"); if(FALSE == crm_is_true(allow_fail)) { crm_err("Stonith of %s failed (%d)..." " aborting transition.", target, rc); send_complete("Stonith failed", match->xml, te_failed, i_cancel); return -2; } break; default: crm_err("Unsupported action result: %d", rc); send_complete("Unsupport Stonith result", match->xml, te_failed, i_cancel); return -2; } crm_debug_3("Action %d was successful, looking for next action", match->id); match->complete = TRUE; return match->id; } gboolean process_graph_event(crm_data_t *event, const char *event_node) { int rc = -1; int action_id = -1; int op_status_i = 0; const char *task = NULL; const char *rsc_id = NULL; const char *op_status = NULL; if(event == NULL) { crm_debug("a transition is starting"); process_trigger(action_id); check_for_completion(); return TRUE; } task = crm_element_value(event, XML_LRM_ATTR_LASTOP); rsc_id = crm_element_value(event, XML_ATTR_ID); op_status = crm_element_value(event, XML_LRM_ATTR_OPSTATUS); if(op_status != NULL) { op_status_i = atoi(op_status); if(op_status_i == -1) { /* just information that the action was sent */ crm_debug("Ignoring TE initiated updates"); return TRUE; } } crm_debug("Processing CIB update: %s on %s: %s", rsc_id, event_node, op_status2text(op_status_i)); - next_transition_timeout = transition_timeout; + next_transition_idle_timeout = transition_idle_timeout; slist_iter( synapse, synapse_t, graph, lpc, /* lookup event */ slist_iter( action, action_t, synapse->actions, lpc2, rc = match_graph_event(action, event, event_node); if(action_id >= 0 && rc >= 0) { crm_err("Additional match found: %d [%d]", rc, action_id); } else if(rc != -1) { action_id = rc; } ); if(action_id != -1) { break; } ); if(action_id == -1) { /* didnt find a match... * now try any dangling inputs */ slist_iter( synapse, synapse_t, graph, lpc, slist_iter( action, action_t, synapse->inputs, lpc2, rc = match_graph_event(action,event,event_node); if(action_id >=0 && rc >=0 && rc != action_id) { crm_err("Additional match found:" " %d [%d]", rc, action_id); } else if(rc != -1) { action_id = rc; } ); if(action_id != -1) { break; } ); } if(action_id > -1) { crm_log_xml_debug_3(event, "Event found"); } else if(action_id == -2) { crm_log_xml_info(event, "Event failed"); } else if(action_id == -3) { crm_log_xml_info(event, "Old event found"); } else if(action_id == -4) { crm_log_xml_debug(event, "Pending event found"); } else { /* unexpected event, trigger a pe-recompute */ /* possibly do this only for certain types of actions */ send_complete("Event not matched", event, te_update, i_cancel); return FALSE; } process_trigger(action_id); check_for_completion(); return TRUE; } void check_for_completion(void) { if(graph_complete) { /* allow some slack until we are pretty sure nothing * else is happening */ crm_info("Transition complete"); send_complete("complete", NULL, te_done, i_complete); } else { /* restart the transition timer again */ crm_debug_3("Transition not yet complete"); - transition_timer->timeout = next_transition_timeout; + transition_timer->timeout = next_transition_idle_timeout; start_te_timer(transition_timer); } } gboolean initiate_action(action_t *action) { gboolean ret = FALSE; gboolean send_command = FALSE; const char *on_node = NULL; const char *id = NULL; const char *task = NULL; const char *timeout = NULL; const char *msg_task = XML_GRAPH_TAG_RSC_OP; on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); id = crm_element_value(action->xml, XML_ATTR_ID); task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); timeout = crm_element_value(action->xml, XML_ATTR_TIMEOUT); if(id == NULL || strlen(id) == 0 || task == NULL || strlen(task) == 0) { /* error */ te_log_action(LOG_ERR, "Failed on corrupted command: %s (id=%s) %s", crm_element_name(action->xml), crm_str(id), crm_str(task)); } else if(action->type == action_type_pseudo){ te_log_action(LOG_INFO, "Executing pseudo-event (%d): " "%s on %s", action->id, task, on_node); action->complete = TRUE; process_trigger(action->id); ret = TRUE; } else if(action->type == action_type_crm && safe_str_eq(task, CRM_OP_FENCE)){ crm_data_t *action_args = find_xml_node( action->xml, XML_TAG_ATTRS, TRUE); const char *uuid = NULL; const char *target = NULL; const char *name = NULL; stonith_ops_t * st_op = NULL; xml_child_iter( action_args, nvpair, XML_CIB_TAG_NVPAIR, name = crm_element_value(nvpair, XML_NVPAIR_ATTR_NAME); if(safe_str_eq(name, XML_LRM_ATTR_TARGET)) { target = crm_element_value( nvpair, XML_NVPAIR_ATTR_VALUE); } else if(safe_str_eq(name, XML_LRM_ATTR_TARGET_UUID)) { uuid = crm_element_value( nvpair, XML_NVPAIR_ATTR_VALUE); } ); CRM_DEV_ASSERT(target != NULL); CRM_DEV_ASSERT(uuid != NULL); te_log_action(LOG_INFO,"Executing fencing operation (%s) on %s", id, target); #ifdef TESTING ret = TRUE; action->complete = TRUE; process_trigger(action->id); return TRUE; #endif crm_malloc0(st_op, sizeof(stonith_ops_t)); st_op->optype = RESET; st_op->timeout = crm_atoi(timeout, "10000"); /* ten seconds */ st_op->node_name = crm_strdup(target); st_op->node_uuid = crm_strdup(uuid); if(stonithd_input_IPC_channel() == NULL) { crm_err("Cannot fence %s - stonith not available", target); } else if (ST_OK == stonithd_node_fence( st_op )) { ret = TRUE; } } else if(on_node == NULL || strlen(on_node) == 0) { /* error */ te_log_action(LOG_ERR, "Failed on corrupted command: %s (id=%s) %s on %s", crm_element_name(action->xml), crm_str(id), crm_str(task), crm_str(on_node)); } else if(action->type == action_type_crm){ te_log_action(LOG_INFO, "Executing crm-event (%s): %s on %s", id, task, on_node); #ifdef TESTING action->complete = TRUE; process_trigger(action->id); return TRUE; #endif /* action->complete = TRUE; */ msg_task = task; send_command = TRUE; } else if(action->type == action_type_rsc){ /* never overwrite stop actions in the CIB with * anything other than completed results * * Writing pending stops makes it look like the * resource is running again */ #ifdef TESTING cib_action_update(action, LRM_OP_PENDING); return TRUE; #endif if(safe_str_neq(task, CRMD_ACTION_STOP)) { cib_action_update(action, LRM_OP_PENDING); } else { cib_action_updated(NULL, 0, cib_ok, NULL, action); } ret = TRUE; } else { te_log_action(LOG_ERR, "Failed on unsupported command type: " "%s, %s (id=%s) on %s", crm_element_name(action->xml), task, id, on_node); } if(send_command) { HA_Message *cmd = NULL; char *counter = crm_itoa(transition_counter); cmd = create_request(msg_task, NULL, on_node, CRM_SYSTEM_CRMD, CRM_SYSTEM_TENGINE, NULL); ha_msg_add(cmd, "transition_id", crm_str(counter)); ret = send_ipc_message(crm_ch, cmd); crm_free(counter); if(ret && action->timeout > 0) { crm_debug_3("Setting timer for action %d",action->id); action->timer->reason = timeout_action_warn; start_te_timer(action->timer); } } return ret; } gboolean cib_action_update(action_t *action, int status) { char *code = NULL; crm_data_t *fragment = NULL; crm_data_t *state = NULL; crm_data_t *rsc = NULL; crm_data_t *xml_op = NULL; char *op_id = NULL; enum cib_errors rc = cib_ok; const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); const char *rsc_id = crm_element_value(action->xml, XML_LRM_ATTR_RSCID); const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); const char *target_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID); int call_options = cib_quorum_override; if(status == LRM_OP_TIMEOUT) { if(crm_element_value(action->xml, XML_LRM_ATTR_RSCID) != NULL) { crm_warn("%s: %s %s on %s timed out", crm_element_name(action->xml), task, rsc_id, target); } else { crm_warn("%s: %s on %s timed out", crm_element_name(action->xml), task, target); } #ifdef TESTING /* turn the "pending" notification into a "op completed" notification * when testing... exercises more code this way. */ } else if(status == LRM_OP_PENDING) { status = LRM_OP_DONE; #endif } code = crm_itoa(status); /* update the CIB */ fragment = NULL; state = create_xml_node(NULL, XML_CIB_TAG_STATE); crm_xml_add(state, XML_ATTR_UUID, target_uuid); crm_xml_add(state, XML_ATTR_UNAME, target); rsc = create_xml_node(state, XML_CIB_TAG_LRM); rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCES); rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCE); xml_op = create_xml_node(rsc,XML_LRM_TAG_RSC_OP); crm_xml_add(rsc, XML_ATTR_ID, rsc_id); crm_xml_add(xml_op, XML_ATTR_ID, task); op_id = generate_op_key(rsc_id, task, action->interval); crm_xml_add(xml_op, XML_ATTR_ID, op_id); crm_free(op_id); crm_xml_add(xml_op, XML_LRM_ATTR_TASK, task); crm_xml_add(rsc, XML_LRM_ATTR_RSCSTATE, get_rsc_state(task, status)); crm_xml_add(rsc, XML_LRM_ATTR_OPSTATUS, code); crm_xml_add(rsc, XML_LRM_ATTR_RC, code); crm_xml_add(rsc, XML_LRM_ATTR_LASTOP, task); crm_xml_add(xml_op, XML_LRM_ATTR_OPSTATUS, code); crm_xml_add(xml_op, XML_LRM_ATTR_RC, code); crm_xml_add(xml_op, "origin", __FUNCTION__); crm_free(code); ha_msg_add_int(xml_op, "transition_id", transition_counter); crm_malloc0(code, sizeof(char)*37); if(code != NULL) { snprintf(code, 36, "%d:-1", transition_counter); } crm_xml_add(xml_op, "transition_magic", code); crm_free(code); set_node_tstamp(xml_op); fragment = create_cib_fragment(state, NULL); crm_debug_3("Updating CIB with \"%s\" (%s): %s %s on %s", status<0?"new action":XML_ATTR_TIMEOUT, crm_element_name(action->xml), crm_str(task), rsc_id, target); #ifndef TESTING rc = te_cib_conn->cmds->modify( te_cib_conn, XML_CIB_TAG_STATUS, fragment, NULL, call_options); crm_debug("Updating CIB with %s action %d: %s %s on %s (call_id=%d)", op_status2text(status), action->id, task, rsc_id, target, rc); if(status == LRM_OP_PENDING) { crm_debug_2("Waiting for callback id: %d", rc); add_cib_op_callback(rc, FALSE, action, cib_action_updated); } #else fprintf(stderr, "Initiating action %d: %s %s on %s\n", action->id, task, rsc_id, target); call_options = 0; { HA_Message *cmd = ha_msg_new(11); ha_msg_add(cmd, F_TYPE, T_CRM); ha_msg_add(cmd, F_CRM_VERSION, CRM_VERSION); ha_msg_add(cmd, F_CRM_MSG_TYPE, XML_ATTR_REQUEST); ha_msg_add(cmd, F_CRM_TASK, CRM_OP_EVENTCC); ha_msg_add(cmd, F_CRM_SYS_TO, CRM_SYSTEM_TENGINE); ha_msg_add(cmd, F_CRM_SYS_FROM, CRM_SYSTEM_TENGINE); ha_msg_addstruct(cmd, crm_element_name(state), state); send_ipc_message(crm_ch, cmd); } #endif free_xml(fragment); free_xml(state); if(rc < cib_ok) { return FALSE; } return TRUE; } void cib_action_updated( const HA_Message *msg, int call_id, int rc, crm_data_t *output, void *user_data) { HA_Message *cmd = NULL; crm_data_t *rsc_op = NULL; const char *task = NULL; const char *rsc_id = NULL; const char *on_node = NULL; action_t *action = user_data; char *counter = crm_itoa(transition_counter); CRM_DEV_ASSERT(action != NULL); if(crm_assert_failed) { return; } CRM_DEV_ASSERT(action->xml != NULL); if(crm_assert_failed) { return; } rsc_op = action->xml; task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); rsc_id = crm_element_value(rsc_op, XML_LRM_ATTR_RSCID); on_node = crm_element_value(rsc_op, XML_LRM_ATTR_TARGET); ha_msg_add_int(rsc_op, "transition_id", transition_counter); if(rc < cib_ok) { crm_err("Update for action %d: %s %s on %s FAILED", action->id, task, rsc_id, on_node); send_complete(cib_error2string(rc), output, te_failed, i_cancel); return; } if(te_fsa_state != s_in_transition) { int pending_updates = num_cib_op_callbacks(); if(pending_updates == 0) { send_complete("CIB update queue empty", output, te_done, i_cib_complete); } else { crm_debug("Still waiting on %d callbacks", pending_updates); } crm_debug("Not executing action: Not in a transition: %d", te_fsa_state); return; } crm_info("Initiating action %d: %s %s on %s", action->id, task, rsc_id, on_node); if(rsc_op != NULL) { crm_log_xml_debug_2(rsc_op, "Performing"); } cmd = create_request( task, rsc_op, on_node, CRM_SYSTEM_LRMD,CRM_SYSTEM_TENGINE,NULL); ha_msg_add(cmd, "transition_id", counter); crm_free(counter); #ifndef TESTING send_ipc_message(crm_ch, cmd); #else crm_log_message(LOG_INFO, cmd); #endif if(action->timeout > 0) { crm_debug_3("Setting timer for action %d",action->id); action->timer->reason = timeout_action_warn; start_te_timer(action->timer); } } gboolean initiate_transition(void) { crm_info("Initating transition"); process_graph_event(NULL, NULL); return TRUE; } void check_synapse_triggers(synapse_t *synapse, int action_id) { synapse->triggers_complete = TRUE; if(synapse->confirmed) { crm_debug_3("Skipping confirmed synapse %d", synapse->id); return; } else if(synapse->complete == FALSE) { crm_debug_3("Checking pre-reqs for %d", synapse->id); /* lookup prereqs */ slist_iter( prereq, action_t, synapse->inputs, lpc, crm_debug_3("Processing input %d", prereq->id); if(prereq->id == action_id) { crm_debug_3("Marking input %d complete", action_id); prereq->complete = TRUE; } else if(prereq->complete == FALSE) { crm_debug_3("Inputs for synapse %d not satisfied", synapse->id); synapse->triggers_complete = FALSE; } ); } } void fire_synapse(synapse_t *synapse) { if(synapse == NULL) { crm_err("Synapse was NULL!"); return; } crm_debug_3("Checking if synapse %d needs to be fired", synapse->id); if(synapse->complete) { crm_debug_3("Skipping complete synapse %d", synapse->id); return; } else if(synapse->triggers_complete == FALSE) { crm_debug_3("Synapse %d not yet satisfied", synapse->id); return; } crm_debug("All inputs for synapse %d satisfied... invoking actions", synapse->id); synapse->complete = TRUE; slist_iter( action, action_t, synapse->actions, lpc, /* allow some leway */ unsigned tmp_time = 2 * action->timeout; gboolean passed = FALSE; action->invoked = TRUE; /* Invoke the action and start the timer */ passed = initiate_action(action); if(passed == FALSE) { crm_err("Failed initiating <%s id=%d> in synapse %d", crm_element_name(action->xml), action->id, synapse->id); send_complete("Action init failed", action->xml, te_failed, i_cancel); return; } - if(tmp_time > next_transition_timeout) { - next_transition_timeout = tmp_time; + if(tmp_time > next_transition_idle_timeout) { + next_transition_idle_timeout = tmp_time; } ); crm_debug("Synapse %d complete", synapse->id); } gboolean confirm_synapse(synapse_t *synapse, int action_id) { gboolean complete = TRUE; synapse->confirmed = TRUE; slist_iter( action, action_t, synapse->actions, lpc, if(action->complete == FALSE) { complete = FALSE; synapse->confirmed = FALSE; crm_debug_3("Found an incomplete action" " - transition not complete"); break; } ); if(complete) { crm_debug("Synapse %d complete (action=%d)", synapse->id, action_id); } return complete; } void process_trigger(int action_id) { graph_complete = TRUE; crm_debug_3("Processing trigger from action %d", action_id); /* something happened, stop the timer and start it again at the end */ stop_te_timer(transition_timer); slist_iter( synapse, synapse_t, graph, lpc, if(synapse->confirmed) { crm_debug_3("Skipping confirmed synapse %d", synapse->id); continue; } check_synapse_triggers(synapse, action_id); fire_synapse(synapse); if(graph == NULL) { crm_err("Trigger processing aborted after failed synapse"); break; } crm_debug_3("Checking if %d is confirmed", synapse->id); if(synapse->complete == FALSE) { crm_debug_3("Found an incomplete synapse" " - transition not complete"); /* indicate that the transition is not yet complete */ graph_complete = FALSE; } else if(synapse->confirmed == FALSE) { gboolean confirmed = confirm_synapse(synapse,action_id); graph_complete = graph_complete && confirmed; } crm_debug_3("%d is %s", synapse->id, synapse->confirmed?"confirmed":synapse->complete?"complete":"pending"); ); } diff --git a/crm/tengine/tengine.h b/crm/tengine/tengine.h index 5def0ec92b..0eeb47e8c9 100644 --- a/crm/tengine/tengine.h +++ b/crm/tengine/tengine.h @@ -1,163 +1,163 @@ -/* $Id: tengine.h,v 1.23 2005/06/03 14:05:40 andrew Exp $ */ +/* $Id: tengine.h,v 1.24 2005/06/27 11:13:05 andrew Exp $ */ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef TENGINE__H #define TENGINE__H #include #include extern IPC_Channel *crm_ch; extern GListPtr graph; extern GMainLoop* mainloop; extern gboolean in_transition; typedef enum { action_type_pseudo, action_type_rsc, action_type_crm } action_type_e; typedef enum te_reason_e { te_update, te_done, te_halt, te_abort, te_abort_confirmed, te_failed, te_timeout, } te_reason_t; typedef enum te_fsa_states_e { s_idle, s_in_transition, s_abort_pending, s_invalid } te_fsa_state_t; typedef enum te_fsa_inputs_e { i_transition, i_cancel, i_complete, i_cib_complete, i_cib_confirm, i_cib_notify, i_invalid } te_fsa_input_t; extern const te_fsa_state_t te_state_matrix[i_invalid][s_invalid]; extern te_fsa_state_t te_fsa_state; typedef struct synapse_s { int id; gboolean triggers_complete; gboolean complete; gboolean confirmed; GListPtr actions; /* action_t* */ GListPtr inputs; /* action_t* */ } synapse_t; typedef struct te_timer_s te_timer_t; typedef struct action_s { int id; int timeout; int interval; te_timer_t *timer; action_type_e type; gboolean invoked; gboolean complete; gboolean can_fail; crm_data_t *xml; } action_t; enum timer_reason { timeout_action, timeout_action_warn, timeout_timeout, }; struct te_timer_s { int source_id; int timeout; enum timer_reason reason; action_t *action; }; /* tengine */ extern gboolean initialize_graph(void); extern gboolean process_graph_event(crm_data_t *event, const char *event_node); /* const char *event_node, const char *event_rsc, const char *rsc_state, * const char *event_action, const char *event_rc, const char *op_status); */ extern int match_graph_event( action_t *action, crm_data_t *event, const char *event_node); extern int match_down_event(const char *target, const char *filter, int rc); extern gboolean initiate_transition(void); extern gboolean cib_action_update(action_t *action, int status); /* utils */ extern void print_state(int log_level); extern void send_complete(const char *text, crm_data_t *msg, te_reason_t reason, te_fsa_input_t input); extern gboolean stop_te_timer(te_timer_t *timer); extern gboolean start_te_timer(te_timer_t *timer); extern const char *get_rsc_state(const char *task, op_status_t status); /* unpack */ extern gboolean unpack_graph(crm_data_t *xml_graph); extern gboolean extract_event(crm_data_t *msg); extern gboolean process_te_message( HA_Message * msg, crm_data_t *xml_data, IPC_Channel *sender); -extern uint transition_timeout; -extern uint default_transition_timeout; +extern uint transition_idle_timeout; +extern uint default_transition_idle_timeout; extern te_timer_t *transition_timer; extern cib_t *te_cib_conn; extern const char *actiontype2text(action_type_e type); extern void tengine_stonith_callback(stonith_ops_t * op, void * private_data); extern void tengine_stonith_connection_destroy(gpointer user_data); extern gboolean tengine_stonith_dispatch(IPC_Channel *sender, void *user_data); extern void check_for_completion(void); void process_trigger(int action_id); #ifdef TESTING # define te_log_action(log_level, fmt...) { \ do_crm_log(log_level, __FILE__, __FUNCTION__, fmt); \ fprintf(stderr, fmt); \ fprintf(stderr, "\n"); \ } #else # define te_log_action(log_level, fmt...) do_crm_log(log_level, __FILE__, __FUNCTION__, fmt) #endif #endif diff --git a/crm/tengine/unpack.c b/crm/tengine/unpack.c index c2e3810341..e99d21ca0d 100644 --- a/crm/tengine/unpack.c +++ b/crm/tengine/unpack.c @@ -1,379 +1,379 @@ -/* $Id: unpack.c,v 1.40 2005/06/16 08:12:12 andrew Exp $ */ +/* $Id: unpack.c,v 1.41 2005/06/27 11:13:05 andrew Exp $ */ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include cib_t *te_cib_conn = NULL; action_t* unpack_action(crm_data_t *xml_action); crm_data_t *create_shutdown_event(const char *node, int op_status); void set_timer_value(te_timer_t *timer, const char *time, int time_default); extern int transition_counter; void set_timer_value(te_timer_t *timer, const char *time, int time_default) { int tmp_time; if(timer == NULL) { return; } timer->timeout = time_default; tmp_time = crm_get_msec(time); if(tmp_time > 0) { timer->timeout = tmp_time; } } gboolean unpack_graph(crm_data_t *xml_graph) { /* timeout = crm_get_msec(time); - transition_timeout = transition_timer->timeout; + transition_idle_timeout = transition_timer->timeout; time = crm_element_value(xml_graph, "transition_fuzz"); transition_counter = crm_atoi(t_id, "-1"); crm_info("Beginning transition %d : timeout set to %dms", transition_counter, transition_timer->timeout); xml_child_iter( xml_graph, synapse, "synapse", synapse_t *new_synapse = NULL; crm_debug_3("looking in synapse %s", crm_element_value(synapse, XML_ATTR_ID)); crm_malloc0(new_synapse, sizeof(synapse_t)); new_synapse->id = num_synapses++; new_synapse->complete = FALSE; new_synapse->confirmed = FALSE; new_synapse->actions = NULL; new_synapse->inputs = NULL; graph = g_list_append(graph, new_synapse); crm_debug_3("look for actions in synapse %s", crm_element_value(synapse, XML_ATTR_ID)); xml_child_iter( synapse, actions, "action_set", xml_child_iter( actions, action, NULL, action_t *new_action = unpack_action(action); num_actions++; if(new_action == NULL) { continue; } crm_debug_3("Adding action %d to synapse %d", new_action->id, new_synapse->id); new_synapse->actions = g_list_append( new_synapse->actions, new_action); ); ); crm_debug_3("look for inputs in synapse %s", crm_element_value(synapse, XML_ATTR_ID)); xml_child_iter( synapse, inputs, "inputs", xml_child_iter( inputs, trigger, NULL, xml_child_iter( trigger, input, NULL, action_t *new_input = unpack_action(input); if(new_input == NULL) { continue; } crm_debug_3("Adding input %d to synapse %d", new_input->id, new_synapse->id); new_synapse->inputs = g_list_append( new_synapse->inputs, new_input); ); ); ); ); crm_info("Unpacked %d actions in %d synapses", num_actions, num_synapses); if(num_actions > 0) { return TRUE; } else { /* indicate to caller that there's nothing to do */ return FALSE; } } action_t* unpack_action(crm_data_t *xml_action) { const char *tmp = crm_element_value(xml_action, XML_ATTR_ID); action_t *action = NULL; crm_data_t *action_copy = NULL; crm_data_t *nvpair_list = NULL; if(tmp == NULL) { crm_err("Actions must have an id!"); crm_log_xml_debug_3(xml_action, "Action with missing id"); return NULL; } action_copy = copy_xml(xml_action); crm_malloc0(action, sizeof(action_t)); if(action == NULL) { return NULL; } action->id = atoi(tmp); action->timeout = 0; action->interval = 0; action->timer = NULL; action->invoked = FALSE; action->complete = FALSE; action->can_fail = FALSE; action->type = action_type_rsc; action->xml = action_copy; if(safe_str_eq(crm_element_name(action_copy), XML_GRAPH_TAG_RSC_OP)) { action->type = action_type_rsc; } else if(safe_str_eq(crm_element_name(action_copy), XML_GRAPH_TAG_PSEUDO_EVENT)) { action->type = action_type_pseudo; } else if(safe_str_eq(crm_element_name(action_copy), XML_GRAPH_TAG_CRM_EVENT)) { action->type = action_type_crm; } nvpair_list = find_xml_node(action_copy, XML_TAG_ATTRS, FALSE); if(nvpair_list == NULL) { crm_debug_2("No attributes in %s", crm_element_name(action_copy)); } xml_child_iter( nvpair_list, node_iter, XML_CIB_TAG_NVPAIR, const char *key = crm_element_value( node_iter, XML_NVPAIR_ATTR_NAME); const char *value = crm_element_value( node_iter, XML_NVPAIR_ATTR_VALUE); if(safe_str_eq(key, "timeout")) { action->timeout = crm_get_msec(value); } else if(safe_str_eq(key, "interval")) { action->interval = crm_get_msec(value); } ); crm_debug_3("Action %d has timer set to %dms", action->id, action->timeout); crm_malloc0(action->timer, sizeof(te_timer_t)); action->timer->timeout = 2 * action->timeout; action->timer->source_id = -1; action->timer->reason = timeout_action; action->timer->action = action; tmp = crm_element_value(action_copy, "can_fail"); crm_str_to_boolean(tmp, &(action->can_fail)); return action; } gboolean extract_event(crm_data_t *msg) { const char *event_node = NULL; struct abort_blob_s { const char *text; crm_data_t *update; te_reason_t reason; }; struct abort_blob_s blob = { NULL, NULL, 0 }; blob.reason = te_update; /* [cib fragment] ... */ crm_debug_4("Extracting event from %s", crm_element_name(msg)); xml_child_iter( msg, node_state, XML_CIB_TAG_STATE, crm_data_t *resources = NULL; const char *ccm_state = crm_element_value( node_state, XML_CIB_ATTR_INCCM); const char *crmd_state = crm_element_value( node_state, XML_CIB_ATTR_CRMDSTATE); blob.update = node_state; event_node = crm_element_value(node_state, XML_ATTR_ID); crm_log_xml_debug_3(node_state,"Processing"); if(crm_element_value(node_state, XML_CIB_ATTR_SHUTDOWN) != NULL) { blob.text = "Aborting on "XML_CIB_ATTR_SHUTDOWN" attribute"; break; /* is this still required??? */ } else if(crm_element_value(node_state, CRM_OP_FENCE) != NULL) { /* node marked for STONITH * possibly by us when a shutdown timed out */ int action_id = -1; crm_debug_3("Checking for STONITH"); action_id = match_down_event( event_node, CRM_OP_SHUTDOWN, LRM_OP_DONE); if(action_id < 0) { blob.text="Stonith/shutdown event not matched"; break; } else { process_trigger(action_id); check_for_completion(); } /* END: is this still required??? */ } resources = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE); resources = find_xml_node( resources, XML_LRM_TAG_RESOURCES, FALSE); /* * node state update... possibly from a shutdown we requested */ crm_debug_3("Processing state update"); if(safe_str_eq(ccm_state, XML_BOOLEAN_FALSE) || safe_str_eq(crmd_state, CRMD_JOINSTATE_DOWN)) { int action_id = -1; crm_debug_3("A shutdown we requested?"); action_id = match_down_event( event_node, NULL, LRM_OP_DONE); if(action_id >= 0) { process_trigger(action_id); check_for_completion(); } else { blob.text="Stonith/shutdown event not matched"; break; } } /* LRM resource update... */ xml_child_iter( resources, rsc, NULL, xml_child_iter( rsc, rsc_op, NULL, crm_log_xml_debug_3( rsc_op, "Processing resource update"); if(!process_graph_event(rsc_op, event_node)) { /* the transition has already been * aborted and with better details */ return TRUE; } ); ); ); if(blob.text != NULL) { send_complete(blob.text, blob.update, blob.reason, i_cancel); } return TRUE; } crm_data_t* create_shutdown_event(const char *node, int op_status) { crm_data_t *event = create_xml_node(NULL, XML_CIB_TAG_STATE); char *code = crm_itoa(op_status); crm_xml_add(event, XML_LRM_ATTR_TARGET_UUID, node); /* event_rsc = crm_xml_add(event, XML_ATTR_ID); */ crm_xml_add(event, XML_LRM_ATTR_RC, "0"); crm_xml_add(event, XML_LRM_ATTR_LASTOP, XML_CIB_ATTR_SHUTDOWN); crm_xml_add(event, XML_LRM_ATTR_RSCSTATE, CRMD_ACTION_GENERIC_OK); crm_xml_add(event, XML_LRM_ATTR_OPSTATUS, code); crm_free(code); return event; } diff --git a/cts/CM_LinuxHAv2.py.in b/cts/CM_LinuxHAv2.py.in index 63092cde44..6690c3a115 100755 --- a/cts/CM_LinuxHAv2.py.in +++ b/cts/CM_LinuxHAv2.py.in @@ -1,534 +1,534 @@ #!@PYTHON@ '''CTS: Cluster Testing System: LinuxHA v2 dependent modules... ''' __copyright__=''' Author: Huang Zhen Copyright (C) 2004 International Business Machines Additional Audits, Revised Start action, Default Configuration: Copyright (C) 2004 Andrew Beekhof ''' # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. import os,sys,CTS,CTSaudits,CTStests from CTS import * from CM_hb import HeartbeatCM from xml.dom.minidom import * from CTSaudits import ClusterAudit from CTStests import * ####################################################################### # # LinuxHA v2 dependent modules # ####################################################################### class LinuxHAv2(HeartbeatCM): ''' The linux-ha version 2 cluster manager class. It implements the things we need to talk to and manipulate linux-ha version 2 clusters ''' def __init__(self, Environment, randseed=None): HeartbeatCM.__init__(self, Environment, randseed=randseed) self.update({ "Name" : "linux-ha-v2", "DeadTime" : 300, "StartTime" : 300, # Max time to start up "StableTime" : 30, "StartCmd" : "@libdir@/heartbeat/ha_logd -d >/dev/null 2>&1; @libdir@/heartbeat/heartbeat >/dev/null 2>&1", "StopCmd" : "@libdir@/heartbeat/heartbeat -k", "ElectionCmd" : "@libdir@/heartbeat/crmadmin -E %s", "StatusCmd" : "@libdir@/heartbeat/crmadmin -S %s 2>/dev/null", "EpocheCmd" : "@libdir@/heartbeat/ccm_tool -e", "QuorumCmd" : "@libdir@/heartbeat/ccm_tool -q", "ParitionCmd" : "@libdir@/heartbeat/ccm_tool -p", "IsRscRunning" : "@libdir@/heartbeat/lrmadmin -E %s monitor 0 0 EVERYTIME 2>/dev/null|grep return", "ExecuteRscOp" : "@libdir@/heartbeat/lrmadmin -E %s %s 0 0 EVERYTIME 2>/dev/null", "CIBfile" : "%s:@HA_VARLIBDIR@/heartbeat/crm/cib.xml", "TmpDir" : "/tmp", "BreakCommCmd2" : "/usr/lib/heartbeat/TestHeartbeatComm break-communication %s>/dev/null 2>&1", "IsIPAddrRscRunning" : "", # Patterns to look for in the log files for various occasions... "Pat:DC_IDLE" : "crmd:.*State transition.*-> S_IDLE", # This wont work if we have multiple partitions # Use: "Pat:They_started" : "%s crmd:.*State transition.*-> S_NOT_DC", "Pat:They_started" : "Updating node state to member for %s", "Pat:We_started" : "%s crmd:.*State transition.*-> S_IDLE", "Pat:We_stopped" : "%s heartbeat.*Heartbeat shutdown complete", "Pat:They_stopped" : "%s crmd:.*LOST:.* %s ", "Pat:All_stopped" : "%s .*heartbeat.*Heartbeat shutdown complete", "Pat:They_dead" : "node %s.*: is dead", "Pat:TransitionComplete" : "Transition status: Complete: complete", # Bad news Regexes. Should never occur. "BadRegexes" : ( r"Shutting down\.", r"Forcing shutdown\.", r"Timer I_TERMINATE just popped", r"input=I_ERROR", r"input=I_FAIL", r"input=I_INTEGRATED cause=C_TIMER_POPPED", r"input=I_FINALIZED cause=C_TIMER_POPPED", r"input=I_ERROR", r", exiting\.", r"WARN.*Ignoring HA message.*vote.*not in our membership list", r"pengine:.*Attempting recovery of resource", r"pengine:.*Handling failed ", r"tengine:.*is taking more than 2x its timeout", r"Confirm not received from", r"Welcome reply not received from", r"Resource .* was active at shutdown", r"ERROR:", r"CRIT:", ), }) del self["Standby"] self.check_transitions = 0 self.check_elections = 0 self.CIBsync = {} cib_prefix=''' ''' cib_options=''' - + ''' cib_glue_1=''' ''' cib_glue_2=''' ''' cib_suffix=''' ''' resources=''' ''' constraints=''' ''' cib_fencing = "" if self.Env["CIBResource"] == 1: self.log("Enabling DC resource") resources=''' ''' % self.Env["IPBase"] # DcIPaddr cant run anywhere but the DC constraints=''' ''' fields = string.split(self.Env["IPBase"], '.') for node in self.Env["nodes"]: # These resources prefer to run on the node with the same name fields[3] = str(int(fields[3])+1) ip = string.join(fields, '.') node_resource=(""" """ %("rsc_"+node, ip)) resources = resources + node_resource node_constraint=(""" """ % ("rsc_"+node, "rsc_"+node, "rsc_"+node, node)) constraints = constraints + node_constraint if self.Env["DoFencing"] == 1 : cib_options=cib_options + ''' ''' nodelist = "" for node in self.Env["nodes"]: nodelist += node + " " stonith_resource=(""" """ %(len(self.Env["nodes"]), nodelist)) resources = resources + stonith_resource self.default_cts_cib=cib_prefix + cib_options + cib_glue_1 + \ resources + cib_glue_2 + constraints + cib_suffix self.debug(self.default_cts_cib) def errorstoignore(self): # At some point implement a more elegant solution that # also produces a report at the end '''Return list of errors which are known and very noisey should be ignored''' if 1: return [ "crmadmin:" ] return [] def install_config(self, node): if not self.CIBsync.has_key(node) and self.Env["ClobberCIB"] == 1: self.CIBsync[node] = 1 if self.Env["CIBfilename"] == None: self.debug("Installing Generated CIB on node %s" %(node)) os.system("rm -f /tmp/cts.default.cib") os.system("echo \'" + self.default_cts_cib + "\' > /tmp/cts.default.cib") if 0!=self.rsh.cp("/tmp/cts.default.cib", "root@" + (self["CIBfile"]%node)): raise ValueError("Can not scp file to %s "%node) os.system("rm -f /tmp/cts.default.cib") else: self.debug("Installing CIB (%s) on node %s" %(self.Env["CIBfilename"], node)) if 0!=self.rsh.cp(self.Env["CIBfilename"], "root@" + (self["CIBfile"]%node)): raise ValueError("Can not scp file to %s "%node) def prepare(self): '''Finish the Initialization process. Prepare to test...''' for node in self.Env["nodes"]: self.ShouldBeStatus[node] = "" self.StataCM(node) def test_node_CM(self, node): '''Report the status of the cluster manager on a given node''' watchpats = [ ] watchpats.append("Current state: (S_IDLE|S_NOT_DC)") watchpats.append(self["Pat:They_started"]%node) idle_watch = CTS.LogWatcher(self["LogFileName"], watchpats) idle_watch.setwatch() out=self.rsh.readaline(node, self["StatusCmd"]%node) ret= (string.find(out, 'ok') != -1) self.debug("Node %s status: %s" %(node, out)) if not ret: if self.ShouldBeStatus[node] == self["up"]: self.log( "Node status for %s is %s but we think it should be %s" %(node, self["down"], self.ShouldBeStatus[node])) self.ShouldBeStatus[node]=self["down"] return 0 if self.ShouldBeStatus[node] == self["down"]: self.log( "Node status for %s is %s but we think it should be %s: %s" %(node, self["up"], self.ShouldBeStatus[node], out)) self.ShouldBeStatus[node]=self["up"] if not idle_watch.look(): # just up self.debug("Warn: Node %s is unstable: %s" %(node, out)) return 1 # Up and stable return 2 # Is the node up or is the node down def StataCM(self, node): '''Report the status of the cluster manager on a given node''' if self.test_node_CM(node) > 0: return 1 return None # Being up and being stable is not the same question... def node_stable(self, node): '''Report the status of the cluster manager on a given node''' if self.test_node_CM(node) == 2: return 1 self.log("Warn: Node %s not stable" %(node)) return None def cluster_stable(self, timeout=None): watchpats = [ ] watchpats.append("Current state: S_IDLE") watchpats.append(self["Pat:DC_IDLE"]) if timeout == None: timeout = self["DeadTime"] idle_watch = CTS.LogWatcher(self["LogFileName"], watchpats, timeout) idle_watch.setwatch() any_up = 0 for node in self.Env["nodes"]: # have each node dump its current state if self.ShouldBeStatus[node] == self["up"]: self.rsh.readaline(node, (self["StatusCmd"] %node) ) any_up = 1 if any_up == 0 or idle_watch.look(): return 1 self.log("Warn: Cluster Master not IDLE") return None def is_node_dc(self, node, status_line=None): rc = 0 if not status_line: status_line = self.rsh.readaline(node, self["StatusCmd"]%node) if not status_line: rc = 0 elif string.find(status_line, 'S_IDLE') != -1: rc = 1 elif string.find(status_line, 'S_INTEGRATION') != -1: rc = 1 elif string.find(status_line, 'S_FINALIZE_JOIN') != -1: rc = 1 elif string.find(status_line, 'S_POLICY_ENGINE') != -1: rc = 1 elif string.find(status_line, 'S_TRANSITION_ENGINE') != -1: rc = 1 if rc == 1: self.debug("%s _is_ the DC" % node) return rc def isolate_node(self, node, allowlist): '''isolate the communication between the nodes''' rc = self.rsh(node, self["BreakCommCmd2"]%allowlist) if rc == 0: return 1 else: self.log("Could not break the communication from node: %s",node) return None def Configuration(self): if self.Env["ClobberCIB"] == 1: if self.Env["CIBfilename"] == None: os.system("rm -f /tmp/cts.default.cib") os.system("echo \'" + self.default_cts_cib + "\' > /tmp/cts.default.cib") cib=parse("/tmp/cts.default.cib") # os.system("rm -f /tmp/cts.default.cib") else: cib=parse(self.Env["CIBfilename"]) else: local_cib = "%s/cts_cib_%s.xml"%(self["TmpDir"],str(os.getpid())) if 0!=self.rsh.cp("root@"+self["CIBfile"]%self.Env["nodes"][0],local_cib): raise ValueError("Can not copy file to %s, maybe permission denied"%self["TmpDir"]) cib=parse(local_cib) os.remove(local_cib) return cib.getElementsByTagName('configuration')[0] def Resources(self): ResourceList = [] #read resources in cib configuration = self.Configuration() resources = configuration.getElementsByTagName('resources')[0] rscs = configuration.getElementsByTagName('resource') for rsc in rscs: if rsc in resources.childNodes: ResourceList.append(HAResource(self,rsc)) incs = configuration.getElementsByTagName('incarnation') for inc in incs: max = 0 inc_name = inc.getAttribute("id") instance_attributes = inc.getElementsByTagName('instance_attributes')[0] attributes = instance_attributes.getElementsByTagName('attributes')[0] nvpairs = attributes.getElementsByTagName('nvpair') for nvpair in nvpairs: if nvpair.getAttribute("name") == "incarnation_max": max = int(nvpair.getAttribute("value")) inc_rsc = inc.getElementsByTagName('resource')[0] for i in range(0,max): rsc = HAResource(self,inc_rsc) rsc.inc_no = i rsc.inc_name = inc_name rsc.inc_max = max rsc.rid = inc_name+":"+rsc.rid + ":%d"%i rsc.Instance = rsc.rid ResourceList.append(rsc) return ResourceList def Dependancies(self): DependancyList = [] #read dependancy in cib configuration=self.Configuration() constraints=configuration.getElementsByTagName('constraints')[0] rsc_to_rscs=configuration.getElementsByTagName('rsc_to_rsc') for node in rsc_to_rscs: dependancy = {} dependancy["id"]=node.getAttribute('id') dependancy["from"]=node.getAttribute('from') dependancy["to"]=node.getAttribute('to') dependancy["type"]=node.getAttribute('type') dependancy["strength"]=node.getAttribute('strength') DependancyList.append(dependancy) return DependancyList def find_partitions(self): ccm_partitions = [] for node in self.Env["nodes"]: if self.ShouldBeStatus[node] == self["up"]: partition = self.rsh.readaline(node, self["ParitionCmd"]) if not partition: self.log("no partition details for %s" %node) elif len(partition) > 2: partition = partition[:-1] for a_partition in ccm_partitions: if partition != a_partition: ccm_partitions.append(partition) else: self.log("bad partition details for %s" %node) return ccm_partitions def HasQuorum(self, node_list): # If we are auditing a partition, then one side will # have quorum and the other not. # So the caller needs to tell us which we are checking # If no value for node_list is specified... assume all nodes if not node_list: node_list = self.Env["nodes"] for node in node_list: if self.ShouldBeStatus[node] == self["up"]: quorum = self.rsh.readaline(node, self["QuorumCmd"]) return string.find(quorum,"1") != -1 return 0 def Components(self): complist = [Process("lrmd",self),Process("crmd",self)] if self.Env["DoFencing"] == 1 : complist.append(Process("stonithd",self)) complist.append(Process("heartbeat",self)) return complist class HAResource(Resource): def __init__(self, cm, node): ''' Get information from xml node ''' self.rid = str(node.getAttribute('id')) self.rclass = str(node.getAttribute('class')) self.rtype = str(node.getAttribute('type')) self.inc_name = None self.inc_no = -1 self.inc_max = -1 self.rparameters = {} list = node.getElementsByTagName('instance_attributes') if len(list) > 0: attributes = list[0] list = attributes.getElementsByTagName('attributes') if len(list) > 0: parameters = list[0] nvpairs = parameters.getElementsByTagName('nvpair') for nvpair in nvpairs: name=nvpair.getAttribute('name') value=nvpair.getAttribute('value') self.rparameters[name]=value Resource.__init__(self, cm, self.rtype, self.rid) def IsRunningOn(self, nodename): ''' This member function returns true if our resource is running on the given node in the cluster. We call the status operation for the resource script. ''' out=self.CM.rsh.readaline(nodename, self.CM["IsRscRunning"]%self.rid) return re.search("0",out) def RunningNodes(self): ResourceNodes = [] for node in self.CM.Env["nodes"]: if self.CM.ShouldBeStatus[node] == self.CM["up"]: if self.IsRunningOn(node): ResourceNodes.append(node) return ResourceNodes def _ResourceOperation(self, operation, nodename): ''' Execute an operation on the resource ''' self.CM.rsh.readaline(nodename, self.CM["ExecuteRscOp"]%(self.rid,operation)) return self.CM.rsh.lastrc == 0 def Start(self, nodename): ''' This member function starts or activates the resource. ''' return self._ResourceOperation("start", nodename) def Stop(self, nodename): ''' This member function stops or deactivates the resource. ''' return self._ResourceOperation("stop", nodename) def IsWorkingCorrectly(self, nodename): return self._ResourceOperation("monitor", nodename) ####################################################################### # # A little test code... # # Which you are advised to completely ignore... # ####################################################################### if __name__ == '__main__': pass