diff --git a/crm/pengine/unpack.c b/crm/pengine/unpack.c index 2436ad3597..db7dc90fec 100644 --- a/crm/pengine/unpack.c +++ b/crm/pengine/unpack.c @@ -1,1273 +1,1278 @@ -/* $Id: unpack.c,v 1.114 2005/08/07 08:16:38 andrew Exp $ */ +/* $Id: unpack.c,v 1.115 2005/08/10 09:25:10 andrew Exp $ */ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include /* for ONLINESTATUS */ #include #include #include gint sort_op_by_callid(gconstpointer a, gconstpointer b); gboolean unpack_rsc_to_attr(crm_data_t *xml_obj, pe_working_set_t *data_set); gboolean unpack_rsc_to_node(crm_data_t *xml_obj, pe_working_set_t *data_set); gboolean unpack_rsc_order(crm_data_t *xml_obj, pe_working_set_t *data_set); gboolean unpack_rsc_colocation(crm_data_t *xml_obj, pe_working_set_t *data_set); gboolean unpack_rsc_location(crm_data_t *xml_obj, pe_working_set_t *data_set); gboolean unpack_lrm_rsc_state( node_t *node, crm_data_t * lrm_state, pe_working_set_t *data_set); gboolean add_node_attrs( crm_data_t * attrs, node_t *node, pe_working_set_t *data_set); gboolean unpack_rsc_op( resource_t *rsc, node_t *node, crm_data_t *xml_op, gboolean *running, int *max_call_id, pe_working_set_t *data_set); gboolean determine_online_status( crm_data_t * node_state, node_t *this_node, pe_working_set_t *data_set); gboolean rsc_colocation_new( const char *id, enum con_strength strength, resource_t *rsc_lh, resource_t *rsc_rh); gboolean create_ordering( const char *id, enum con_strength strength, resource_t *rsc_lh, resource_t *rsc_rh, pe_working_set_t *data_set); rsc_to_node_t *rsc2node_new( const char *id, resource_t *rsc, double weight, node_t *node, pe_working_set_t *data_set); const char *param_value(crm_data_t * parent, const char *name); rsc_to_node_t *generate_location_rule( resource_t *rsc, crm_data_t *location_rule, pe_working_set_t *data_set); gboolean unpack_config(crm_data_t * config, pe_working_set_t *data_set) { const char *value = NULL; value = param_value(config, "transition_idle_timeout"); if(value != NULL) { long tmp = crm_get_msec(value); if(tmp > 0) { transition_idle_timeout = value; } else { crm_err("Invalid value for %s: %s", "transition_idle_timeout", value); } } crm_debug_4("%s set to: %s", "transition_idle_timeout", transition_idle_timeout); value = param_value(config, "default_resource_stickiness"); data_set->default_resource_stickiness = crm_atoi(value, "0"); value = param_value(config, "stonith_enabled"); if(value != NULL) { crm_str_to_boolean(value, &data_set->stonith_enabled); } crm_info("STONITH of failed nodes is %s", data_set->stonith_enabled?"enabled":"disabled"); value = param_value(config, "symmetric_cluster"); if(value != NULL) { crm_str_to_boolean(value, &data_set->symmetric_cluster); } if(data_set->symmetric_cluster) { crm_info("Cluster is symmetric" " - resources can run anywhere by default"); } value = param_value(config, "no_quorum_policy"); if(safe_str_eq(value, "ignore")) { data_set->no_quorum_policy = no_quorum_ignore; } else if(safe_str_eq(value, "freeze")) { data_set->no_quorum_policy = no_quorum_freeze; } else { data_set->no_quorum_policy = no_quorum_stop; } switch (data_set->no_quorum_policy) { case no_quorum_freeze: crm_info("On loss of CCM Quorum: Freeze resources"); break; case no_quorum_stop: crm_info("On loss of CCM Quorum: Stop ALL resources"); break; case no_quorum_ignore: crm_warn("On loss of CCM Quorum: Ignore"); break; } return TRUE; } const char * param_value(crm_data_t * parent, const char *name) { crm_data_t * a_default = NULL; if(parent != NULL) { a_default = find_entity(parent, XML_CIB_TAG_NVPAIR, name); } if(a_default == NULL) { crm_warn("Option %s not set", name); return NULL; } return crm_element_value(a_default, XML_NVPAIR_ATTR_VALUE); } gboolean unpack_nodes(crm_data_t * xml_nodes, pe_working_set_t *data_set) { node_t *new_node = NULL; const char *id = NULL; const char *uname = NULL; const char *type = NULL; crm_debug("Begining unpack... %s", xml_nodes?crm_element_name(xml_nodes):""); xml_child_iter( xml_nodes, xml_obj, XML_CIB_TAG_NODE, new_node = NULL; id = crm_element_value(xml_obj, XML_ATTR_ID); uname = crm_element_value(xml_obj, XML_ATTR_UNAME); type = crm_element_value(xml_obj, XML_ATTR_TYPE); crm_debug_3("Processing node %s/%s", uname, id); if(id == NULL) { pe_err("Must specify id tag in "); continue; } if(type == NULL) { pe_err("Must specify type tag in "); continue; } crm_malloc0(new_node, sizeof(node_t)); if(new_node == NULL) { return FALSE; } new_node->weight = 0; new_node->fixed = FALSE; crm_malloc0(new_node->details, sizeof(struct node_shared_s)); if(new_node->details == NULL) { crm_free(new_node); return FALSE; } crm_debug_3("Creaing node for entry %s/%s", uname, id); new_node->details->id = id; new_node->details->uname = uname; new_node->details->type = node_ping; new_node->details->online = FALSE; new_node->details->shutdown = FALSE; new_node->details->running_rsc = NULL; new_node->details->attrs = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); /* if(data_set->have_quorum == FALSE */ /* && data_set->no_quorum_policy == no_quorum_stop) { */ /* /\* start shutting resources down *\/ */ /* new_node->weight = -INFINITY; */ /* } */ if(data_set->stonith_enabled) { /* all nodes are unclean until we've seen their * status entry */ new_node->details->unclean = TRUE; } else { /* blind faith... */ new_node->details->unclean = FALSE; } if(safe_str_eq(type, "member")) { new_node->details->type = node_member; } add_node_attrs(xml_obj, new_node, data_set); if(crm_is_true(g_hash_table_lookup( new_node->details->attrs, "standby"))) { crm_info("Node %s is in standby-mode", new_node->details->uname); new_node->weight = -INFINITY; } data_set->nodes = g_list_append(data_set->nodes, new_node); crm_debug_3("Done with node %s", crm_element_value(xml_obj, XML_ATTR_UNAME)); crm_action_debug_3(print_node("Added", new_node, FALSE)); ); data_set->nodes = g_list_sort(data_set->nodes, sort_node_weight); return TRUE; } gboolean unpack_resources(crm_data_t * xml_resources, pe_working_set_t *data_set) { crm_debug("Begining unpack... %s", xml_resources?crm_element_name(xml_resources):""); xml_child_iter( xml_resources, xml_obj, NULL, resource_t *new_rsc = NULL; crm_debug_2("Begining unpack... %s", xml_obj?crm_element_name(xml_obj):""); if(common_unpack(xml_obj, &new_rsc, data_set)) { data_set->resources = g_list_append( data_set->resources, new_rsc); crm_action_debug_3( print_resource("Added", new_rsc, FALSE)); } else { pe_err("Failed unpacking %s %s", crm_element_name(xml_obj), crm_element_value(xml_obj, XML_ATTR_ID)); } ); data_set->resources = g_list_sort( data_set->resources, sort_rsc_priority); return TRUE; } gboolean unpack_constraints(crm_data_t * xml_constraints, pe_working_set_t *data_set) { crm_data_t *lifetime = NULL; crm_debug("Begining unpack... %s", xml_constraints?crm_element_name(xml_constraints):""); xml_child_iter( xml_constraints, xml_obj, NULL, const char *id = crm_element_value(xml_obj, XML_ATTR_ID); if(id == NULL) { pe_err("Constraint <%s...> must have an id", crm_element_name(xml_obj)); continue; } crm_debug_3("Processing constraint %s %s", crm_element_name(xml_obj),id); lifetime = cl_get_struct(xml_obj, "lifetime"); if(test_ruleset(lifetime, NULL) == FALSE) { crm_info("Constraint %s %s is not active", crm_element_name(xml_obj), id); } else if(safe_str_eq(XML_CONS_TAG_RSC_ORDER, crm_element_name(xml_obj))) { unpack_rsc_order(xml_obj, data_set); } else if(safe_str_eq(XML_CONS_TAG_RSC_DEPEND, crm_element_name(xml_obj))) { unpack_rsc_colocation(xml_obj, data_set); } else if(safe_str_eq(XML_CONS_TAG_RSC_LOCATION, crm_element_name(xml_obj))) { unpack_rsc_location(xml_obj, data_set); } else { pe_err("Unsupported constraint type: %s", crm_element_name(xml_obj)); } ); return TRUE; } rsc_to_node_t * rsc2node_new(const char *id, resource_t *rsc, double weight, node_t *node, pe_working_set_t *data_set) { rsc_to_node_t *new_con = NULL; if(rsc == NULL || id == NULL) { pe_err("Invalid constraint %s for rsc=%p", crm_str(id), rsc); return NULL; } crm_malloc0(new_con, sizeof(rsc_to_node_t)); if(new_con != NULL) { new_con->id = id; new_con->rsc_lh = rsc; new_con->node_list_rh = NULL; new_con->weight = weight; if(node != NULL) { node_t *copy = node_copy(node); new_con->node_list_rh = g_list_append(NULL, copy); } data_set->placement_constraints = g_list_append( data_set->placement_constraints, new_con); } return new_con; } /* remove nodes that are down, stopping */ /* create +ve rsc_to_node constraints between resources and the nodes they are running on */ /* anything else? */ gboolean unpack_status(crm_data_t * status, pe_working_set_t *data_set) { const char *uname = NULL; crm_data_t * lrm_rsc = NULL; crm_data_t * lrm_agents = NULL; crm_data_t * attrs = NULL; node_t *this_node = NULL; crm_debug_3("Begining unpack"); xml_child_iter( status, node_state, XML_CIB_TAG_STATE, /* id = crm_element_value(node_state, XML_ATTR_ID); */ uname = crm_element_value(node_state, XML_ATTR_UNAME); attrs = find_xml_node(node_state, XML_LRM_TAG_ATTRIBUTES,FALSE); lrm_rsc = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE); lrm_agents = find_xml_node(lrm_rsc, XML_LRM_TAG_AGENTS, FALSE); lrm_rsc = find_xml_node(lrm_rsc, XML_LRM_TAG_RESOURCES, FALSE); crm_debug_3("Processing node %s", uname); this_node = pe_find_node(data_set->nodes, uname); if(uname == NULL) { /* error */ continue; } else if(this_node == NULL) { pe_warn("Node %s in status section no longer exists", uname); continue; } /* Mark the node as provisionally clean * - at least we have seen it in the current cluster's lifetime */ this_node->details->unclean = FALSE; crm_debug_3("Adding runtime node attrs"); add_node_attrs(node_state, this_node, data_set); crm_debug_3("determining node state"); determine_online_status(node_state, this_node, data_set); if(this_node->details->online || data_set->stonith_enabled) { /* offline nodes run no resources... * unless stonith is enabled in which case we need to * make sure rsc start events happen after the stonith */ crm_debug_3("Processing lrm resource entries"); unpack_lrm_rsc_state(this_node, lrm_rsc, data_set); } ); return TRUE; } gboolean determine_online_status( crm_data_t * node_state, node_t *this_node, pe_working_set_t *data_set) { gboolean online = FALSE; const char *uname = crm_element_value(node_state,XML_ATTR_UNAME); const char *exp_state = crm_element_value(node_state, XML_CIB_ATTR_EXPSTATE); const char *join_state = crm_element_value(node_state, XML_CIB_ATTR_JOINSTATE); const char *crm_state = crm_element_value(node_state, XML_CIB_ATTR_CRMDSTATE); const char *ccm_state = crm_element_value(node_state, XML_CIB_ATTR_INCCM); const char *ha_state = crm_element_value(node_state, XML_CIB_ATTR_HASTATE); const char *shutdown = crm_element_value(node_state, XML_CIB_ATTR_SHUTDOWN); if(this_node == NULL) { return online; } if(shutdown != NULL) { this_node->details->shutdown = TRUE; } if(safe_str_eq(join_state, CRMD_JOINSTATE_MEMBER)) { this_node->details->expected_up = TRUE; } if(data_set->stonith_enabled == FALSE) { if(!crm_is_true(ccm_state) || safe_str_eq(ha_state,DEADSTATUS)){ crm_debug_2("Node is down: ha_state=%s, ccm_state=%s", crm_str(ha_state), crm_str(ccm_state)); } else if(!crm_is_true(ccm_state) || safe_str_eq(ha_state, DEADSTATUS)) { } else if(safe_str_neq(join_state, CRMD_JOINSTATE_DOWN) && safe_str_eq(crm_state, ONLINESTATUS)) { online = TRUE; } else if(this_node->details->expected_up == FALSE) { crm_debug_2("CRMd is down: ha_state=%s, ccm_state=%s", crm_str(ha_state), crm_str(ccm_state)); crm_debug_2("\tcrm_state=%s, join_state=%s, expected=%s", crm_str(crm_state), crm_str(join_state), crm_str(exp_state)); } else { /* mark it unclean */ this_node->details->unclean = TRUE; pe_err("Node %s is partially & un-expectedly down", uname); crm_debug_2("\tcrm_state=%s, join_state=%s, expected=%s", crm_str(crm_state), crm_str(join_state), crm_str(exp_state)); } } else { if(crm_is_true(ccm_state) && (ha_state == NULL || safe_str_eq(ha_state, ACTIVESTATUS)) && safe_str_eq(crm_state, ONLINESTATUS) && safe_str_neq(join_state, CRMD_JOINSTATE_DOWN)) { online = TRUE; } else if(this_node->details->expected_up == FALSE) { crm_debug_2("CRMd on %s is down: ha_state=%s, ccm_state=%s", uname, crm_str(ha_state), crm_str(ccm_state)); crm_debug_2("\tcrm_state=%s, join_state=%s, expected=%s", crm_str(crm_state), crm_str(join_state), crm_str(exp_state)); } else { /* mark it unclean */ this_node->details->unclean = TRUE; pe_err("Node %s is un-expectedly down", uname); crm_debug_2("\tha_state=%s, ccm_state=%s", crm_str(ha_state), crm_str(ccm_state)); crm_debug_2("\tcrm_state=%s, join_state=%s, expected=%s", crm_str(crm_state), crm_str(join_state), crm_str(exp_state)); } } if(online) { crm_debug_2("Node %s is online", uname); this_node->details->online = TRUE; } else { /* remove node from contention */ crm_debug_2("Node %s is down", uname); this_node->weight = -INFINITY; this_node->fixed = TRUE; } if(this_node->details->unclean) { pe_warn("Node %s is unclean", uname); } if(this_node->details->shutdown) { /* dont run resources here */ this_node->weight = -INFINITY; this_node->fixed = TRUE; crm_debug_2("Node %s is due for shutdown", uname); } return online; } gboolean unpack_lrm_rsc_state(node_t *node, crm_data_t * lrm_rsc_list, pe_working_set_t *data_set) { const char *rsc_id = NULL; const char *node_id = node->details->uname; const char *rsc_state = NULL; int max_call_id = -1; gboolean running = FALSE; resource_t *rsc = NULL; GListPtr op_list = NULL; GListPtr sorted_op_list = NULL; CRM_DEV_ASSERT(node != NULL); if(crm_assert_failed) { return FALSE; } xml_child_iter( lrm_rsc_list, rsc_entry, XML_LRM_TAG_RESOURCE, rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID); rsc_state = crm_element_value(rsc_entry, XML_LRM_ATTR_RSCSTATE); rsc = pe_find_resource(data_set->resources, rsc_id); crm_debug_3("[%s] Processing %s on %s (%s)", crm_element_name(rsc_entry), rsc_id, node_id, rsc_state); if(rsc == NULL) { pe_err("Could not find a match for resource" " %s in %s's status section", rsc_id, node_id); crm_log_xml_debug(rsc_entry, "Invalid status entry"); continue; } running = FALSE; max_call_id = -1; op_list = NULL; sorted_op_list = NULL; xml_child_iter( rsc_entry, rsc_op, XML_LRM_TAG_RSC_OP, op_list = g_list_append(op_list, rsc_op); ); if(op_list == NULL) { continue; } sorted_op_list = g_list_sort(op_list, sort_op_by_callid); slist_iter( rsc_op, crm_data_t, sorted_op_list, lpc, unpack_rsc_op(rsc, node, rsc_op, &running, &max_call_id, data_set); ); /* no need to free the contents */ g_list_free(sorted_op_list); if(running) { native_add_running(rsc, node, data_set); } ); return TRUE; } #define sort_return(an_int) crm_free(a_uuid); crm_free(b_uuid); return an_int gint sort_op_by_callid(gconstpointer a, gconstpointer b) { char *a_uuid = NULL; char *b_uuid = NULL; const char *a_task_id = cl_get_string(a, XML_LRM_ATTR_CALLID); const char *b_task_id = cl_get_string(b, XML_LRM_ATTR_CALLID); const char *a_key = cl_get_string(a, XML_ATTR_TRANSITION_MAGIC); const char *b_key = cl_get_string(b, XML_ATTR_TRANSITION_MAGIC); int a_id = -1; int b_id = -1; int a_status = -1; int b_status = -1; int a_call_id = -1; int b_call_id = -1; CRM_DEV_ASSERT(a_task_id != NULL && b_task_id != NULL); a_call_id = atoi(a_task_id); b_call_id = atoi(b_task_id); if(a_call_id == -1 && b_call_id == -1) { /* both are pending ops so it doesnt matter since * stops are never pending */ sort_return(0); } else if(a_call_id >= 0 && a_call_id < b_call_id) { crm_debug_2("%s (%d) < %s (%d) : call id", ID(a), a_call_id, ID(b), b_call_id); sort_return(-1); } else if(b_call_id >= 0 && a_call_id > b_call_id) { crm_debug_2("%s (%d) > %s (%d) : call id", ID(a), a_call_id, ID(b), b_call_id); sort_return(1); } crm_debug_3("%s (%d) == %s (%d) : continuing", ID(a), a_call_id, ID(b), b_call_id); /* now process pending ops */ CRM_DEV_ASSERT(a_key != NULL && b_key != NULL); CRM_DEV_ASSERT(decode_transition_magic(a_key,&a_uuid,&a_id,&a_status)); CRM_DEV_ASSERT(decode_transition_magic(b_key,&b_uuid,&b_id,&b_status)); /* try and determin the relative age of the operation... * some pending operations (ie. a start) may have been supuerceeded * by a subsequent stop * * [a|b]_id == -1 means its a shutdown operation and _always_ comes last */ if(safe_str_neq(a_uuid, b_uuid) || a_id == b_id) { /* * some of the logic in here may be redundant... * * if the UUID from the TE doesnt match then one better * be a pending operation. * pending operations dont survive between elections and joins * because we query the LRM directly */ CRM_DEV_ASSERT(a_call_id == -1 || b_call_id == -1); CRM_DEV_ASSERT(a_call_id >= 0 || b_call_id >= 0); if(b_call_id == -1) { crm_debug_2("%s (%d) < %s (%d) : transition + call id", ID(a), a_call_id, ID(b), b_call_id); sort_return(-1); } if(a_call_id == -1) { crm_debug_2("%s (%d) > %s (%d) : transition + call id", ID(a), a_call_id, ID(b), b_call_id); sort_return(1); } } else if((a_id >= 0 && a_id < b_id) || b_id == -1) { crm_debug_2("%s (%d) < %s (%d) : transition", ID(a), a_id, ID(b), b_id); sort_return(-1); } else if((b_id >= 0 && a_id > b_id) || a_id == -1) { crm_debug_2("%s (%d) > %s (%d) : transition", ID(a), a_id, ID(b), b_id); sort_return(1); } /* we should never end up here */ crm_err("%s (%d:%d:%s) ?? %s (%d:%d:%s) : default", ID(a), a_call_id, a_id, a_uuid, ID(b), b_call_id, b_id, b_uuid); CRM_DEV_ASSERT(FALSE); sort_return(0); } gboolean unpack_rsc_op(resource_t *rsc, node_t *node, crm_data_t *xml_op, gboolean *running, int *max_call_id, pe_working_set_t *data_set) { const char *id = NULL; const char *task = NULL; const char *task_id = NULL; const char *task_status = NULL; int task_id_i = -1; int task_status_i = -2; action_t *action = NULL; gboolean is_stop_action = FALSE; CRM_DEV_ASSERT(rsc != NULL); if(crm_assert_failed) { return FALSE; } CRM_DEV_ASSERT(node != NULL); if(crm_assert_failed) { return FALSE; } CRM_DEV_ASSERT(xml_op != NULL); if(crm_assert_failed) { return FALSE; } id = ID(xml_op); task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); task_id = crm_element_value(xml_op, XML_LRM_ATTR_CALLID); task_status = crm_element_value(xml_op, XML_LRM_ATTR_OPSTATUS); CRM_DEV_ASSERT(id != NULL); if(crm_assert_failed) { return FALSE; } CRM_DEV_ASSERT(task != NULL); if(crm_assert_failed) { return FALSE; } CRM_DEV_ASSERT(task_status != NULL); if(crm_assert_failed) { return FALSE; } task_status_i = atoi(task_status); CRM_DEV_ASSERT(task_status_i <= LRM_OP_ERROR); if(crm_assert_failed) {return FALSE;} CRM_DEV_ASSERT(task_status_i >= LRM_OP_PENDING); if(crm_assert_failed) {return FALSE;} + if(safe_str_eq(task, CRMD_ACTION_NOTIFY)) { + /* safe to ignore these */ + return TRUE; + } + crm_debug_2("Unpacking task %s/%s (call_id=%s, status=%s) on %s", rsc->id, task, task_id, task_status, node->details->uname); if(safe_str_eq(task, CRMD_ACTION_STOP)) { is_stop_action = TRUE; } if(task_status_i != LRM_OP_PENDING) { task_id_i = crm_atoi(task_id, "-1"); CRM_DEV_ASSERT(task_id != NULL); if(crm_assert_failed) { return FALSE; } CRM_DEV_ASSERT(task_id_i >= 0); if(crm_assert_failed) { return FALSE; } if(task_id_i == *max_call_id) { crm_debug_2("Already processed this call"); return TRUE; } CRM_DEV_ASSERT(task_id_i > *max_call_id); if(crm_assert_failed) { return FALSE; } } if(*max_call_id < task_id_i) { *max_call_id = task_id_i; } if(node->details->unclean) { crm_debug_2("Node %s (where %s is running) is unclean." " Further action depends on the value of %s", node->details->uname, rsc->id, XML_RSC_ATTR_STOPFAIL); } switch(task_status_i) { case LRM_OP_PENDING: /* * TODO: this may need some more thought * Some cases: * - PE reinvoked with pending action that will succeed * - PE reinvoked with pending action that will fail * - After DC election * - After startup * * pending start - required start * pending stop - required stop * pending on unavailable node - stonith * * For now this should do */ if(is_stop_action) { /* re-issue the stop and return */ stop_action(rsc, node, FALSE); *running = TRUE; rsc->recover = TRUE; } else if(safe_str_eq(task, CRMD_ACTION_START)) { rsc->start_pending = TRUE; *running = TRUE; /* make sure it is re-issued but, * only if we have quorum */ if(data_set->have_quorum == TRUE || data_set->no_quorum_policy == no_quorum_ignore){ /* do not specify the node, we may want * to start it elsewhere */ start_action(rsc, NULL, FALSE); } } else if(*running == TRUE) { crm_debug_2("Re-issuing pending recurring task:" " %s for %s on %s", task, rsc->id, node->details->id); /* do not specify the node, we may want * to start it elsewhere */ custom_action(rsc, crm_strdup(id), task, NULL, FALSE, data_set); } break; case LRM_OP_DONE: crm_debug_3("%s/%s completed on %s", rsc->id, task, node->details->uname); if(is_stop_action) { *running = FALSE; } else if(safe_str_eq(task, CRMD_ACTION_START)) { crm_debug_3("%s active on %s", rsc->id, node->details->uname); *running = TRUE; } else if(*running) { /* make sure its already created and is optional * * creating it now tells create_recurring_actions() * that it can safely leave it optional */ custom_action(rsc, crm_strdup(id), task, NULL, TRUE, data_set); } break; case LRM_OP_ERROR: case LRM_OP_TIMEOUT: case LRM_OP_NOTSUPPORTED: crm_debug_2("Processing failed op (%s) for %s on %s", task, rsc->id, node->details->uname); action = custom_action( rsc, crm_strdup(id), task, NULL, TRUE, data_set); if(action->on_fail == action_fail_nothing) { /* pretend the op completed */ if(is_stop_action) { *running = FALSE; } else { *running = TRUE; } break; } if(task_status_i == LRM_OP_NOTSUPPORTED || is_stop_action || safe_str_eq(task, CRMD_ACTION_START) ) { crm_warn("Handling failed %s for %s on %s", task, rsc->id, node->details->uname); rsc2node_new("dont_run__failed_stopstart", rsc, -INFINITY, node, data_set); } if(action->on_fail == action_fail_fence) { /* treat it as if it is still running * but also mark the node as unclean */ rsc->unclean = TRUE; node->details->unclean = TRUE; stop_action(rsc, node, FALSE); *running = TRUE; } else if(action->on_fail == action_fail_block) { /* let this depend on the stop action * which will fail but make sure the * transition continues... */ rsc->unclean = TRUE; *running = TRUE; } else if(action->on_fail == action_fail_stop) { *running = TRUE; stop_action(rsc, node, FALSE); } break; case LRM_OP_CANCELLED: /* do nothing?? */ pe_err("Dont know what to do for cancelled ops yet"); break; } return TRUE; } gboolean rsc_colocation_new(const char *id, enum con_strength strength, resource_t *rsc_lh, resource_t *rsc_rh) { rsc_colocation_t *new_con = NULL; rsc_colocation_t *inverted_con = NULL; if(rsc_lh == NULL || rsc_rh == NULL){ /* error */ return FALSE; } crm_malloc0(new_con, sizeof(rsc_colocation_t)); if(new_con == NULL) { return FALSE; } new_con->id = id; new_con->rsc_lh = rsc_lh; new_con->rsc_rh = rsc_rh; new_con->strength = strength; inverted_con = invert_constraint(new_con); crm_debug_4("Adding constraint %s (%p) to %s", new_con->id, new_con, rsc_lh->id); rsc_lh->rsc_cons = g_list_insert_sorted( rsc_lh->rsc_cons, new_con, sort_cons_strength); crm_debug_4("Adding constraint %s (%p) to %s", inverted_con->id, inverted_con, rsc_rh->id); rsc_rh->rsc_cons = g_list_insert_sorted( rsc_rh->rsc_cons, inverted_con, sort_cons_strength); return TRUE; } /* LHS before RHS */ gboolean custom_action_order( resource_t *lh_rsc, char *lh_action_task, action_t *lh_action, resource_t *rh_rsc, char *rh_action_task, action_t *rh_action, enum pe_ordering type, pe_working_set_t *data_set) { order_constraint_t *order = NULL; if((lh_action == NULL && lh_rsc == NULL) || (rh_action == NULL && rh_rsc == NULL)){ pe_err("Invalid inputs lh_rsc=%p, lh_a=%p," " rh_rsc=%p, rh_a=%p", lh_rsc, lh_action, rh_rsc, rh_action); crm_free(lh_action_task); crm_free(rh_action_task); return FALSE; } crm_malloc0(order, sizeof(order_constraint_t)); if(order == NULL) { return FALSE; } order->id = data_set->order_id++; order->type = type; order->lh_rsc = lh_rsc; order->rh_rsc = rh_rsc; order->lh_action = lh_action; order->rh_action = rh_action; order->lh_action_task = lh_action_task; order->rh_action_task = rh_action_task; data_set->ordering_constraints = g_list_append( data_set->ordering_constraints, order); if(lh_rsc != NULL && rh_rsc != NULL) { crm_debug_4("Created ordering constraint %d (%s):" " %s/%s before %s/%s", order->id, ordering_type2text(order->type), lh_rsc->id, lh_action_task, rh_rsc->id, rh_action_task); } else if(lh_rsc != NULL) { crm_debug_4("Created ordering constraint %d (%s):" " %s/%s before action %d (%s)", order->id, ordering_type2text(order->type), lh_rsc->id, lh_action_task, rh_action->id, rh_action_task); } else if(rh_rsc != NULL) { crm_debug_4("Created ordering constraint %d (%s):" " action %d (%s) before %s/%s", order->id, ordering_type2text(order->type), lh_action->id, lh_action_task, rh_rsc->id, rh_action_task); } else { crm_debug_4("Created ordering constraint %d (%s):" " action %d (%s) before action %d (%s)", order->id, ordering_type2text(order->type), lh_action->id, lh_action_task, rh_action->id, rh_action_task); } return TRUE; } gboolean unpack_rsc_colocation(crm_data_t * xml_obj, pe_working_set_t *data_set) { enum con_strength strength_e = pecs_ignore; const char *id = crm_element_value(xml_obj, XML_ATTR_ID); const char *id_rh = crm_element_value(xml_obj, XML_CONS_ATTR_TO); const char *id_lh = crm_element_value(xml_obj, XML_CONS_ATTR_FROM); const char *score = crm_element_value(xml_obj, XML_RULE_ATTR_SCORE); resource_t *rsc_lh = pe_find_resource(data_set->resources, id_lh); resource_t *rsc_rh = pe_find_resource(data_set->resources, id_rh); if(rsc_lh == NULL) { pe_err("No resource (con=%s, rsc=%s)", id, id_lh); return FALSE; } else if(rsc_rh == NULL) { pe_err("No resource (con=%s, rsc=%s)", id, id_rh); return FALSE; } /* the docs indicate that only +/- INFINITY are allowed, * but no-one ever reads the docs so all positive values will * count as "must" and negative values as "must not" */ if(score == NULL || score[0] != '-') { strength_e = pecs_must; } else { strength_e = pecs_must_not; } return rsc_colocation_new(id, strength_e, rsc_lh, rsc_rh); } gboolean unpack_rsc_order(crm_data_t * xml_obj, pe_working_set_t *data_set) { gboolean type_is_after = TRUE; gboolean action_is_start = TRUE; gboolean symmetrical_bool = TRUE; const char *id = crm_element_value(xml_obj, XML_ATTR_ID); const char *type = crm_element_value(xml_obj, XML_ATTR_TYPE); const char *id_rh = crm_element_value(xml_obj, XML_CONS_ATTR_TO); const char *id_lh = crm_element_value(xml_obj, XML_CONS_ATTR_FROM); const char *action = crm_element_value(xml_obj, XML_CONS_ATTR_ACTION); const char *symmetrical = crm_element_value( xml_obj, XML_CONS_ATTR_SYMMETRICAL); resource_t *rsc_lh = pe_find_resource(data_set->resources, id_lh); resource_t *rsc_rh = pe_find_resource(data_set->resources, id_rh); if(xml_obj == NULL) { pe_err("No constraint object to process."); return FALSE; } else if(id == NULL) { pe_err("%s constraint must have an id", crm_element_name(xml_obj)); return FALSE; } else if(rsc_lh == NULL || rsc_rh == NULL) { pe_err("Constraint %s needs two sides lh: %p rh: %p" " (NULL indicates missing side)", id, rsc_lh, rsc_rh); return FALSE; } crm_str_to_boolean(symmetrical, &symmetrical_bool); if(safe_str_eq(type, "before")) { type_is_after = FALSE; } if(safe_str_eq(action, task2text(stop_rsc))) { action_is_start = FALSE; } if((type_is_after && action_is_start) || (type_is_after == FALSE && action_is_start == FALSE)){ if(symmetrical_bool || action_is_start == FALSE) { if(rsc_lh->restart_type == pe_restart_restart){ order_stop_stop(rsc_lh, rsc_rh, pe_ordering_recover); } order_stop_stop(rsc_lh, rsc_rh, pe_ordering_optional); } if(symmetrical_bool || action_is_start) { if(rsc_lh->restart_type == pe_restart_restart){ order_start_start(rsc_rh, rsc_lh, pe_ordering_recover); } order_start_start(rsc_rh, rsc_lh, pe_ordering_optional); } } else { if(symmetrical_bool || action_is_start == FALSE) { if(rsc_rh->restart_type == pe_restart_restart){ order_stop_stop(rsc_rh, rsc_lh, pe_ordering_recover); } order_stop_stop(rsc_rh, rsc_lh, pe_ordering_optional); } if(symmetrical_bool || action_is_start) { if(rsc_rh->restart_type == pe_restart_restart){ order_start_start(rsc_lh, rsc_rh, pe_ordering_recover); } order_start_start(rsc_lh, rsc_rh, pe_ordering_optional); } } return TRUE; } gboolean add_node_attrs(crm_data_t *xml_obj, node_t *node, pe_working_set_t *data_set) { g_hash_table_insert(node->details->attrs, crm_strdup("#"XML_ATTR_UNAME), crm_strdup(node->details->uname)); g_hash_table_insert(node->details->attrs, crm_strdup("#"XML_ATTR_ID), crm_strdup(node->details->id)); if(safe_str_eq(node->details->id, data_set->dc_uuid)) { data_set->dc_node = node; node->details->is_dc = TRUE; g_hash_table_insert(node->details->attrs, crm_strdup("#"XML_ATTR_DC), crm_strdup(XML_BOOLEAN_TRUE)); } else { g_hash_table_insert(node->details->attrs, crm_strdup("#"XML_ATTR_DC), crm_strdup(XML_BOOLEAN_FALSE)); } unpack_instance_attributes( xml_obj, node, node->details->attrs, NULL, 0); return TRUE; } gboolean unpack_rsc_location(crm_data_t * xml_obj, pe_working_set_t *data_set) { const char *id_lh = crm_element_value(xml_obj, "rsc"); const char *id = crm_element_value(xml_obj, XML_ATTR_ID); resource_t *rsc_lh = pe_find_resource(data_set->resources, id_lh); if(rsc_lh == NULL) { pe_warn("No resource (con=%s, rsc=%s)", id, id_lh); return FALSE; } else if(rsc_lh->is_managed == FALSE) { crm_debug_2("Ignoring constraint %s: resource %s not managed", id, id_lh); return FALSE; } xml_child_iter( xml_obj, rule_xml, XML_TAG_RULE, crm_debug_2("Unpacking %s/%s", id, ID(rule_xml)); generate_location_rule(rsc_lh, rule_xml, data_set); ); return TRUE; } rsc_to_node_t * generate_location_rule( resource_t *rsc, crm_data_t *rule_xml, pe_working_set_t *data_set) { const char *rule_id = NULL; const char *score = NULL; const char *boolean = NULL; GListPtr match_L = NULL; float score_f = 0.0; gboolean do_and = TRUE; gboolean accept = TRUE; rsc_to_node_t *location_rule = NULL; rule_id = crm_element_value(rule_xml, XML_ATTR_ID); score = crm_element_value(rule_xml, XML_RULE_ATTR_SCORE); boolean = crm_element_value(rule_xml, XML_RULE_ATTR_BOOLEAN_OP); crm_debug("processing rule: %s", rule_id); score_f = char2score(score); if(safe_str_eq(boolean, "or")) { do_and = FALSE; } location_rule = rsc2node_new(rule_id, rsc, score_f, NULL, data_set); if(location_rule == NULL) { return NULL; } if(do_and) { match_L = node_list_dup(data_set->nodes, FALSE); } xml_child_iter( rule_xml, expr, XML_TAG_EXPRESSION, slist_iter( node, node_t, data_set->nodes, lpc, accept = test_expression(expr, node); if(!do_and && accept) { if(pe_find_node(match_L, node->details->uname) == NULL) { node_t *dup = node_copy(node); match_L = g_list_append(match_L, dup); crm_debug_5("node %s matched", node->details->uname); } crm_debug_5("node %s already matched", node->details->uname); } else if(do_and && !accept) { /* remove it */ node_t *delete = pe_find_node( match_L, node->details->uname); if(delete != NULL) { match_L = g_list_remove(match_L,delete); crm_debug_5("node %s did not match", node->details->uname); } crm_free(delete); } ); ); location_rule->node_list_rh = match_L; if(location_rule->node_list_rh == NULL) { crm_debug_2("No matching nodes for rule %s", rule_id); return NULL; } crm_debug_2("%s: %d nodes matched", rule_id, g_list_length(location_rule->node_list_rh)); crm_action_debug_3(print_rsc_to_node("Added", location_rule, FALSE)); return location_rule; } diff --git a/crm/tengine/tengine.c b/crm/tengine/tengine.c index 6acaf27d2d..743925114e 100644 --- a/crm/tengine/tengine.c +++ b/crm/tengine/tengine.c @@ -1,1030 +1,1031 @@ -/* $Id: tengine.c,v 1.92 2005/08/08 15:43:05 andrew Exp $ */ +/* $Id: tengine.c,v 1.93 2005/08/10 09:25:10 andrew Exp $ */ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include gboolean graph_complete = FALSE; GListPtr graph = NULL; IPC_Channel *crm_ch = NULL; uint transition_idle_timeout = 30*1000; /* 30 seconds */ void fire_synapse(synapse_t *synapse); gboolean initiate_action(action_t *action); gboolean confirm_synapse(synapse_t *synapse, int action_id); void check_synapse_triggers(synapse_t *synapse, int action_id); void cib_action_updated( const HA_Message *msg, int call_id, int rc, crm_data_t *output, void *user_data); te_timer_t *transition_timer = NULL; te_timer_t *abort_timer = NULL; int transition_counter = 1; char *te_uuid = NULL; const te_fsa_state_t te_state_matrix[i_invalid][s_invalid] = { /* s_idle, s_in_transition, s_abort_pending, s_updates_pending */ /* Got an i_transition */{ s_in_transition, s_abort_pending, s_abort_pending, s_updates_pending }, /* Got an i_cancel */{ s_idle, s_abort_pending, s_abort_pending, s_updates_pending }, /* Got an i_complete */{ s_idle, s_idle, s_abort_pending, s_updates_pending }, /* Got an i_cmd_complete*/{ s_idle, s_in_transition, s_updates_pending, s_updates_pending }, /* Got an i_cib_complete*/{ s_idle, s_in_transition, s_abort_pending, s_idle }, /* Got an i_cib_confirm */{ s_idle, s_in_transition, s_abort_pending, s_updates_pending }, /* Got an i_cib_notify */{ s_idle, s_in_transition, s_abort_pending, s_updates_pending } }; te_fsa_state_t te_fsa_state = s_idle; gboolean initialize_graph(void) { remove_cib_op_callback(-1, TRUE); if(transition_timer == NULL) { crm_malloc0(transition_timer, sizeof(te_timer_t)); transition_timer->timeout = 10; transition_timer->source_id = -1; transition_timer->reason = timeout_timeout; transition_timer->action = NULL; } else { stop_te_timer(transition_timer); } if(abort_timer == NULL) { crm_malloc0(abort_timer, sizeof(te_timer_t)); abort_timer->timeout = 10; abort_timer->source_id = -1; abort_timer->reason = timeout_abort; abort_timer->action = NULL; } else { stop_te_timer(abort_timer); } if(te_uuid == NULL) { cl_uuid_t new_uuid; crm_malloc0(te_uuid, sizeof(char)*38); cl_uuid_generate(&new_uuid); cl_uuid_unparse(&new_uuid, te_uuid); crm_info("Registering TE UUID: %s", te_uuid); } while(g_list_length(graph) > 0) { synapse_t *synapse = g_list_nth_data(graph, 0); while(g_list_length(synapse->actions) > 0) { action_t *action = g_list_nth_data(synapse->actions,0); synapse->actions = g_list_remove( synapse->actions, action); if(action->timer->source_id > 0) { crm_debug_3("Removing timer for action: %d", action->id); g_source_remove(action->timer->source_id); } free_xml(action->xml); crm_free(action->timer); crm_free(action); } while(g_list_length(synapse->inputs) > 0) { action_t *action = g_list_nth_data(synapse->inputs, 0); synapse->inputs = g_list_remove(synapse->inputs, action); free_xml(action->xml); crm_free(action); } graph = g_list_remove(graph, synapse); crm_free(synapse); } graph = NULL; return TRUE; } /* * returns the ID of the action if a match is found * returns -1 if a match was not found * returns -2 if a match was found but the action failed (and was * not allowed to) */ int match_graph_event(action_t *action, crm_data_t *event, const char *event_node) { const char *allow_fail = NULL; const char *this_action = NULL; const char *this_node = NULL; const char *this_uname = NULL; const char *this_rsc = NULL; const char *magic = NULL; - char *this_event; + const char *this_event; char *update_te_uuid = NULL; const char *update_event; action_t *match = NULL; int op_status_i = -3; int transition_i = -1; if(event == NULL) { crm_debug_4("Ignoring NULL event"); return -1; } this_rsc = crm_element_value(action->xml, XML_LRM_ATTR_RSCID); if(this_rsc == NULL) { crm_debug_4("Skipping non-resource event"); return -1; } crm_debug_3("Processing \"%s\" change", crm_element_name(event)); update_event = crm_element_value(event, XML_ATTR_ID); magic = crm_element_value(event, XML_ATTR_TRANSITION_MAGIC); if(magic == NULL) { /* crm_debug("Skipping \"non-change\""); */ crm_log_xml_debug(event, "Skipping \"non-change\""); return -3; } this_action = crm_element_value(action->xml, XML_LRM_ATTR_TASK); this_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID); this_uname = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); - - this_event = generate_op_key(this_rsc, this_action, action->interval); + this_event = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY); + CRM_DEV_ASSERT(this_event != NULL); + if(safe_str_neq(this_event, update_event)) { crm_debug_2("Action %d : Event mismatch %s vs. %s", action->id, this_event, update_event); } else if(safe_str_neq(this_node, event_node)) { crm_debug_2("Action %d : Node mismatch %s (%s) vs. %s", action->id, this_node, this_uname, event_node); } else { match = action; } - crm_free(this_event); if(match == NULL) { return -1; } - crm_debug("Matched action %d", action->id); + crm_debug("Matched action (%d) %s", action->id, this_event); CRM_DEV_ASSERT(decode_transition_magic( magic, &update_te_uuid, &transition_i, &op_status_i)); if(transition_i == -1) { /* we never expect these - recompute */ crm_err("Detected an action initiated outside of a transition"); crm_log_message(LOG_ERR, event); return -5; } else if(safe_str_neq(update_te_uuid, te_uuid)) { crm_err("Detected an action from a different transitioner:" " %s vs. %s", update_te_uuid, te_uuid); return -6; } else if(transition_counter != transition_i) { crm_warn("Detected an action from a different transition:" " %d vs. %d", transition_i, transition_counter); return -3; } /* stop this event's timer if it had one */ stop_te_timer(match->timer); /* Process OP status */ allow_fail = crm_element_value(match->xml, "allow_fail"); switch(op_status_i) { case -3: crm_err("Action returned the same as last time..." " whatever that was!"); crm_log_message(LOG_ERR, event); break; case LRM_OP_PENDING: crm_debug("Ignoring pending operation"); return -4; break; case LRM_OP_DONE: break; case LRM_OP_ERROR: case LRM_OP_TIMEOUT: case LRM_OP_NOTSUPPORTED: crm_warn("Action %s on %s failed: %s", update_event, event_node, op_status2text(op_status_i)); if(FALSE == crm_is_true(allow_fail)) { send_complete("Action failed", event, te_failed, i_cancel); return -2; } break; case LRM_OP_CANCELLED: /* do nothing?? */ crm_err("Dont know what to do for cancelled ops yet"); break; default: crm_err("Unsupported action result: %d", op_status_i); send_complete("Unsupport action result", event, te_failed, i_cancel); return -2; } te_log_action(LOG_INFO, "Action %d confirmed", match->id); match->complete = TRUE; process_trigger(match->id); if(te_fsa_state != s_in_transition) { return -3; } return match->id; } int match_down_event(const char *target, const char *filter, int rc) { const char *allow_fail = NULL; const char *this_action = NULL; const char *this_node = NULL; action_t *match = NULL; slist_iter( synapse, synapse_t, graph, lpc, /* lookup event */ slist_iter( action, action_t, synapse->actions, lpc2, crm_data_t *action_args = NULL; if(action->type != action_type_crm) { continue; } this_action = crm_element_value( action->xml, XML_LRM_ATTR_TASK); /* if(crm_element_value(action->xml, XML_LRM_ATTR_RSCID)) { */ /* continue; */ /* } else */ if(filter != NULL && safe_str_neq(this_action, filter)) { continue; } if(safe_str_eq(this_action, CRM_OP_FENCE)) { action_args = find_xml_node( action->xml, XML_TAG_ATTRS, TRUE); this_node = crm_element_value( action_args, XML_LRM_ATTR_TARGET_UUID); } else if(safe_str_eq(this_action, CRM_OP_SHUTDOWN)) { this_node = crm_element_value( action->xml, XML_LRM_ATTR_TARGET_UUID); } else { crm_err("Action %d : Bad action %s", action->id, this_action); continue; } if(safe_str_neq(this_node, target)) { crm_debug("Action %d : Node mismatch: %s", action->id, this_node); continue; } match = action; ); if(match != NULL) { break; } ); if(match == NULL) { crm_debug_3("didnt match current action"); return -1; } crm_debug_3("matched"); /* stop this event's timer if it had one */ stop_te_timer(match->timer); /* Process OP status */ switch(rc) { case STONITH_SUCCEEDED: break; case STONITH_CANNOT: case STONITH_TIMEOUT: case STONITH_GENERIC: allow_fail = crm_element_value(match->xml, "allow_fail"); if(FALSE == crm_is_true(allow_fail)) { crm_err("Stonith of %s failed (%d)..." " aborting transition.", target, rc); send_complete("Stonith failed", match->xml, te_failed, i_cancel); return -2; } break; default: crm_err("Unsupported action result: %d", rc); send_complete("Unsupport Stonith result", match->xml, te_failed, i_cancel); return -2; } crm_debug_3("Action %d was successful, looking for next action", match->id); match->complete = TRUE; return match->id; } gboolean process_graph_event(crm_data_t *event, const char *event_node) { int rc = -1; int action_id = -1; int op_status_i = 0; const char *task = NULL; const char *rsc_id = NULL; const char *op_status = NULL; if(event == NULL) { crm_debug("a transition is starting"); process_trigger(action_id); check_for_completion(); return TRUE; } task = crm_element_value(event, XML_LRM_ATTR_LASTOP); rsc_id = crm_element_value(event, XML_ATTR_ID); op_status = crm_element_value(event, XML_LRM_ATTR_OPSTATUS); if(op_status != NULL) { op_status_i = atoi(op_status); if(op_status_i == -1) { /* just information that the action was sent */ crm_debug("Ignoring TE initiated updates"); return TRUE; } } crm_debug("Processing CIB update: %s on %s: %s", rsc_id, event_node, op_status2text(op_status_i)); if(crm_element_value(event, XML_ATTR_TRANSITION_MAGIC) == NULL) { crm_log_xml_debug(event, "Skipping \"non-change\""); action_id = -3; } slist_iter( synapse, synapse_t, graph, lpc, /* lookup event */ slist_iter( action, action_t, synapse->actions, lpc2, rc = match_graph_event(action, event, event_node); if(action_id >= 0 && rc >= 0) { crm_err("Additional match found: %d [%d]", rc, action_id); } else if(rc != -1) { action_id = rc; } ); if(action_id != -1) { crm_debug("Terminating search: %d", action_id); break; } ); if(action_id == -1) { /* didnt find a match... * now try any dangling inputs */ slist_iter( synapse, synapse_t, graph, lpc, slist_iter( action, action_t, synapse->inputs, lpc2, rc = match_graph_event(action,event,event_node); if(action_id >=0 && rc >=0 && rc != action_id) { crm_err("Additional match found:" " %d [%d]", rc, action_id); } else if(rc != -1) { action_id = rc; } ); if(action_id != -1) { break; } ); } if(action_id > -1) { crm_log_xml_debug_3(event, "Event found"); } else if(action_id == -2) { crm_log_xml_info(event, "Event failed"); } else if(action_id == -3) { crm_log_xml_info(event, "Old event found"); } else if(action_id == -4) { crm_log_xml_debug(event, "Pending event found"); } else { /* unexpected event, trigger a pe-recompute */ /* possibly do this only for certain types of actions */ crm_debug("Search terminated: %d", action_id); send_complete("Event not matched", event, te_update, i_cancel); return FALSE; } check_for_completion(); return TRUE; } void check_for_completion(void) { if(graph_complete) { /* allow some slack until we are pretty sure nothing * else is happening */ crm_info("Transition complete"); send_complete("complete", NULL, te_done, i_complete); } else { /* restart the transition timer again */ crm_debug_3("Transition not yet complete"); start_te_timer(transition_timer); } } gboolean initiate_action(action_t *action) { gboolean ret = FALSE; gboolean send_command = FALSE; const char *on_node = NULL; const char *id = NULL; const char *task = NULL; const char *timeout = NULL; const char *msg_task = XML_GRAPH_TAG_RSC_OP; on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); id = crm_element_value(action->xml, XML_ATTR_ID); task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); timeout = crm_element_value(action->xml, XML_ATTR_TIMEOUT); if(id == NULL || strlen(id) == 0 || task == NULL || strlen(task) == 0) { /* error */ te_log_action(LOG_ERR, "Failed on corrupted command: %s (id=%s) %s", crm_element_name(action->xml), crm_str(id), crm_str(task)); } else if(action->type == action_type_pseudo){ te_log_action(LOG_INFO, "Executing pseudo-event (%d): " "%s on %s", action->id, task, on_node); action->complete = TRUE; process_trigger(action->id); ret = TRUE; } else if(action->type == action_type_crm && safe_str_eq(task, CRM_OP_FENCE)){ crm_data_t *action_args = find_xml_node( action->xml, XML_TAG_ATTRS, TRUE); const char *uuid = NULL; const char *target = NULL; const char *name = NULL; stonith_ops_t * st_op = NULL; xml_child_iter( action_args, nvpair, XML_CIB_TAG_NVPAIR, name = crm_element_value(nvpair, XML_NVPAIR_ATTR_NAME); if(safe_str_eq(name, XML_LRM_ATTR_TARGET)) { target = crm_element_value( nvpair, XML_NVPAIR_ATTR_VALUE); } else if(safe_str_eq(name, XML_LRM_ATTR_TARGET_UUID)) { uuid = crm_element_value( nvpair, XML_NVPAIR_ATTR_VALUE); } ); CRM_DEV_ASSERT(target != NULL); CRM_DEV_ASSERT(uuid != NULL); te_log_action(LOG_INFO,"Executing fencing operation (%s) on %s", id, target); #ifdef TESTING ret = TRUE; action->complete = TRUE; process_trigger(action->id); return TRUE; #endif crm_malloc0(st_op, sizeof(stonith_ops_t)); st_op->optype = RESET; st_op->timeout = crm_atoi(timeout, "10000"); /* ten seconds */ st_op->node_name = crm_strdup(target); st_op->node_uuid = crm_strdup(uuid); if(stonithd_input_IPC_channel() == NULL) { crm_err("Cannot fence %s - stonith not available", target); } else if (ST_OK == stonithd_node_fence( st_op )) { ret = TRUE; } } else if(on_node == NULL || strlen(on_node) == 0) { /* error */ te_log_action(LOG_ERR, "Failed on corrupted command: %s (id=%s) %s on %s", crm_element_name(action->xml), crm_str(id), crm_str(task), crm_str(on_node)); } else if(action->type == action_type_crm){ te_log_action(LOG_INFO, "Executing crm-event (%s): %s on %s", id, task, on_node); #ifdef TESTING action->complete = TRUE; process_trigger(action->id); return TRUE; #endif /* action->complete = TRUE; */ msg_task = task; send_command = TRUE; } else if(action->type == action_type_rsc){ /* never overwrite stop actions in the CIB with * anything other than completed results * * Writing pending stops makes it look like the * resource is running again */ #ifdef TESTING action->invoked = FALSE; cib_action_update(action, LRM_OP_DONE); return TRUE; #endif action->invoked = FALSE; - if(safe_str_neq(task, CRMD_ACTION_STOP)) { - cib_action_update(action, LRM_OP_PENDING); - } else { + if(safe_str_eq(task, CRMD_ACTION_STOP) + || safe_str_eq(task, CRMD_ACTION_NOTIFY)) { cib_action_updated(NULL, 0, cib_ok, NULL, action); + } else { + cib_action_update(action, LRM_OP_PENDING); } ret = TRUE; } else { te_log_action(LOG_ERR, "Failed on unsupported command type: " "%s, %s (id=%s) on %s", crm_element_name(action->xml), task, id, on_node); } if(send_command) { HA_Message *cmd = NULL; char *counter = crm_itoa(transition_counter); cmd = create_request(msg_task, NULL, on_node, CRM_SYSTEM_CRMD, CRM_SYSTEM_TENGINE, NULL); counter = generate_transition_key(transition_counter, te_uuid); crm_xml_add(cmd, XML_ATTR_TRANSITION_KEY, counter); ret = send_ipc_message(crm_ch, cmd); crm_free(counter); if(ret && action->timeout > 0) { crm_debug_3("Setting timer for action %d",action->id); action->timer->reason = timeout_action_warn; start_te_timer(action->timer); } } return ret; } gboolean cib_action_update(action_t *action, int status) { char *code = NULL; crm_data_t *fragment = NULL; crm_data_t *state = NULL; crm_data_t *rsc = NULL; crm_data_t *xml_op = NULL; char *op_id = NULL; enum cib_errors rc = cib_ok; const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); const char *rsc_id = crm_element_value(action->xml, XML_LRM_ATTR_RSCID); const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); const char *target_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID); int call_options = cib_quorum_override; if(status == LRM_OP_TIMEOUT) { if(crm_element_value(action->xml, XML_LRM_ATTR_RSCID) != NULL) { crm_warn("%s: %s %s on %s timed out", crm_element_name(action->xml), task, rsc_id, target); } else { crm_warn("%s: %s on %s timed out", crm_element_name(action->xml), task, target); } } code = crm_itoa(status); /* update the CIB */ fragment = NULL; state = create_xml_node(NULL, XML_CIB_TAG_STATE); crm_xml_add(state, XML_ATTR_UUID, target_uuid); crm_xml_add(state, XML_ATTR_UNAME, target); rsc = create_xml_node(state, XML_CIB_TAG_LRM); rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCES); rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCE); xml_op = create_xml_node(rsc,XML_LRM_TAG_RSC_OP); crm_xml_add(rsc, XML_ATTR_ID, rsc_id); crm_xml_add(xml_op, XML_ATTR_ID, task); op_id = generate_op_key(rsc_id, task, action->interval); crm_xml_add(xml_op, XML_ATTR_ID, op_id); crm_free(op_id); crm_xml_add(xml_op, XML_LRM_ATTR_TASK, task); crm_xml_add(rsc, XML_LRM_ATTR_RSCSTATE, get_rsc_state(task, status)); crm_xml_add(rsc, XML_LRM_ATTR_OPSTATUS, code); crm_xml_add(rsc, XML_LRM_ATTR_RC, code); crm_xml_add(rsc, XML_LRM_ATTR_LASTOP, task); crm_xml_add(xml_op, XML_LRM_ATTR_OPSTATUS, code); crm_xml_add(xml_op, XML_LRM_ATTR_CALLID, "-1"); crm_xml_add(xml_op, XML_LRM_ATTR_RC, code); crm_xml_add(xml_op, "origin", __FUNCTION__); crm_free(code); code = generate_transition_key(transition_counter, te_uuid); crm_xml_add(xml_op, XML_ATTR_TRANSITION_KEY, code); crm_free(code); code = generate_transition_magic( crm_element_value(xml_op, XML_ATTR_TRANSITION_KEY), status); crm_xml_add(xml_op, XML_ATTR_TRANSITION_MAGIC, code); crm_free(code); set_node_tstamp(xml_op); fragment = create_cib_fragment(state, NULL); crm_debug_3("Updating CIB with \"%s\" (%s): %s %s on %s", status<0?"new action":XML_ATTR_TIMEOUT, crm_element_name(action->xml), crm_str(task), rsc_id, target); #ifndef TESTING rc = te_cib_conn->cmds->modify( te_cib_conn, XML_CIB_TAG_STATUS, fragment, NULL, call_options); crm_debug("Updating CIB with %s action %d: %s %s on %s (call_id=%d)", op_status2text(status), action->id, task, rsc_id, target, rc); if(status == LRM_OP_PENDING) { crm_debug_2("Waiting for callback id: %d", rc); add_cib_op_callback(rc, FALSE, action, cib_action_updated); } #else te_log_action(LOG_INFO, "Initiating action %d: %s %s on %s", action->id, task, rsc_id, target); call_options = 0; { HA_Message *cmd = ha_msg_new(11); ha_msg_add(cmd, F_TYPE, T_CRM); ha_msg_add(cmd, F_CRM_VERSION, CRM_VERSION); ha_msg_add(cmd, F_CRM_MSG_TYPE, XML_ATTR_REQUEST); ha_msg_add(cmd, F_CRM_TASK, CRM_OP_EVENTCC); ha_msg_add(cmd, F_CRM_SYS_TO, CRM_SYSTEM_TENGINE); ha_msg_add(cmd, F_CRM_SYS_FROM, CRM_SYSTEM_TENGINE); ha_msg_addstruct(cmd, crm_element_name(state), state); send_ipc_message(crm_ch, cmd); } #endif free_xml(fragment); free_xml(state); action->sent_update = TRUE; if(rc < cib_ok) { return FALSE; } return TRUE; } void cib_action_updated( const HA_Message *msg, int call_id, int rc, crm_data_t *output, void *user_data) { HA_Message *cmd = NULL; crm_data_t *rsc_op = NULL; const char *task = NULL; const char *rsc_id = NULL; const char *on_node = NULL; action_t *action = user_data; char *counter = crm_itoa(transition_counter); CRM_DEV_ASSERT(action != NULL); if(crm_assert_failed) { return; } CRM_DEV_ASSERT(action->xml != NULL); if(crm_assert_failed) { return; } rsc_op = action->xml; task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); rsc_id = crm_element_value(rsc_op, XML_LRM_ATTR_RSCID); on_node = crm_element_value(rsc_op, XML_LRM_ATTR_TARGET); counter = generate_transition_key(transition_counter, te_uuid); crm_xml_add(rsc_op, XML_ATTR_TRANSITION_KEY, counter); crm_free(counter); if(rc < cib_ok) { crm_err("Update for action %d: %s %s on %s FAILED", action->id, task, rsc_id, on_node); send_complete(cib_error2string(rc), output, te_failed, i_cancel); return; } if(te_fsa_state != s_in_transition) { int pending_updates = num_cib_op_callbacks(); if(pending_updates == 0) { send_complete("CIB update queue empty", output, te_done, i_cib_complete); } else { crm_debug("Still waiting on %d callbacks", pending_updates); } crm_debug("Not executing action: Not in a transition: %d", te_fsa_state); return; } crm_info("Initiating action %d: %s %s on %s", action->id, task, rsc_id, on_node); if(rsc_op != NULL) { crm_log_xml_debug_2(rsc_op, "Performing"); } cmd = create_request(task, rsc_op, on_node, CRM_SYSTEM_LRMD, CRM_SYSTEM_TENGINE, NULL); #ifndef TESTING send_ipc_message(crm_ch, cmd); #else crm_log_message(LOG_INFO, cmd); #endif action->invoked = TRUE; if(action->timeout > 0) { crm_debug_3("Setting timer for action %d",action->id); action->timer->reason = timeout_action_warn; start_te_timer(action->timer); } } gboolean initiate_transition(void) { crm_info("Initating transition"); process_graph_event(NULL, NULL); return TRUE; } void check_synapse_triggers(synapse_t *synapse, int action_id) { synapse->triggers_complete = TRUE; if(synapse->confirmed) { crm_debug_3("Skipping confirmed synapse %d", synapse->id); return; } else if(synapse->complete == FALSE) { crm_debug_3("Checking pre-reqs for %d", synapse->id); /* lookup prereqs */ slist_iter( prereq, action_t, synapse->inputs, lpc, crm_debug_3("Processing input %d", prereq->id); if(prereq->id == action_id) { crm_debug_3("Marking input %d complete", action_id); prereq->complete = TRUE; } else if(prereq->complete == FALSE) { crm_debug_3("Inputs for synapse %d not satisfied", synapse->id); synapse->triggers_complete = FALSE; } ); } } void fire_synapse(synapse_t *synapse) { if(synapse == NULL) { crm_err("Synapse was NULL!"); return; } crm_debug_3("Checking if synapse %d needs to be fired", synapse->id); if(synapse->complete) { crm_debug_3("Skipping complete synapse %d", synapse->id); return; } else if(synapse->triggers_complete == FALSE) { crm_debug_3("Synapse %d not yet satisfied", synapse->id); return; } crm_debug("All inputs for synapse %d satisfied... invoking actions", synapse->id); synapse->complete = TRUE; slist_iter( action, action_t, synapse->actions, lpc, /* allow some leeway */ int tmp_time = 2 * action->timeout; gboolean passed = FALSE; action->invoked = TRUE; /* Invoke the action and start the timer */ passed = initiate_action(action); if(passed == FALSE) { crm_err("Failed initiating <%s id=%d> in synapse %d", crm_element_name(action->xml), action->id, synapse->id); send_complete("Action init failed", action->xml, te_failed, i_cancel); return; } if(tmp_time > transition_timer->timeout) { crm_debug("Action %d: Increasing IDLE timer to %d", action->id, tmp_time); transition_timer->timeout = tmp_time; } ); crm_debug("Synapse %d complete", synapse->id); } gboolean confirm_synapse(synapse_t *synapse, int action_id) { gboolean complete = TRUE; synapse->confirmed = TRUE; slist_iter( action, action_t, synapse->actions, lpc, if(action->complete == FALSE) { complete = FALSE; synapse->confirmed = FALSE; crm_debug_3("Found an incomplete action" " - transition not complete"); break; } ); if(complete) { crm_debug("Synapse %d complete (action=%d)", synapse->id, action_id); } return complete; } void process_trigger(int action_id) { if(te_fsa_state != s_in_transition) { int unconfirmed = unconfirmed_actions(); crm_info("Trigger from action %d (%d more) discarded:" " Not in transition", action_id, unconfirmed); if(unconfirmed == 0) { send_complete("Last pending action confirmed", NULL, te_abort_confirmed, i_cmd_complete); } return; } graph_complete = TRUE; crm_debug_3("Processing trigger from action %d", action_id); /* something happened, stop the timer and start it again at the end */ stop_te_timer(transition_timer); slist_iter( synapse, synapse_t, graph, lpc, if(synapse->confirmed) { crm_debug_3("Skipping confirmed synapse %d", synapse->id); continue; } check_synapse_triggers(synapse, action_id); fire_synapse(synapse); if(graph == NULL) { crm_err("Trigger processing aborted after failed synapse"); break; } crm_debug_3("Checking if %d is confirmed", synapse->id); if(synapse->complete == FALSE) { crm_debug_3("Found an incomplete synapse" " - transition not complete"); /* indicate that the transition is not yet complete */ graph_complete = FALSE; } else if(synapse->confirmed == FALSE) { gboolean confirmed = confirm_synapse(synapse,action_id); graph_complete = graph_complete && confirmed; } crm_debug_3("%d is %s", synapse->id, synapse->confirmed?"confirmed":synapse->complete?"complete":"pending"); ); }