diff --git a/crmd/te_actions.c b/crmd/te_actions.c index e122a941c2..b16b70f236 100644 --- a/crmd/te_actions.c +++ b/crmd/te_actions.c @@ -1,698 +1,712 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include char *te_uuid = NULL; GHashTable *te_targets = NULL; void send_rsc_command(crm_action_t * action); static void te_update_job_count(crm_action_t * action, int offset); static void te_start_action_timer(crm_graph_t * graph, crm_action_t * action) { action->timer = calloc(1, sizeof(crm_action_timer_t)); action->timer->timeout = action->timeout; action->timer->reason = timeout_action; action->timer->action = action; action->timer->source_id = g_timeout_add(action->timer->timeout + graph->network_delay, action_timer_callback, (void *)action->timer); CRM_ASSERT(action->timer->source_id != 0); } static gboolean te_pseudo_action(crm_graph_t * graph, crm_action_t * pseudo) { crm_debug("Pseudo action %d fired and confirmed", pseudo->id); te_action_confirmed(pseudo); update_graph(graph, pseudo); trigger_graph(); return TRUE; } void send_stonith_update(crm_action_t * action, const char *target, const char *uuid) { int rc = pcmk_ok; crm_node_t *peer = NULL; /* zero out the node-status & remove all LRM status info */ xmlNode *node_state = NULL; CRM_CHECK(target != NULL, return); CRM_CHECK(uuid != NULL, return); /* Make sure the membership and join caches are accurate */ peer = crm_get_peer_full(0, target, CRM_GET_PEER_CLUSTER | CRM_GET_PEER_REMOTE); CRM_CHECK(peer != NULL, return); if (peer->uuid == NULL) { crm_info("Recording uuid '%s' for node '%s'", uuid, target); peer->uuid = strdup(uuid); } crm_update_peer_proc(__FUNCTION__, peer, crm_proc_none, NULL); crm_update_peer_state(__FUNCTION__, peer, CRM_NODE_LOST, 0); crm_update_peer_expected(__FUNCTION__, peer, CRMD_JOINSTATE_DOWN); crm_update_peer_join(__FUNCTION__, peer, crm_join_none); node_state = do_update_node_cib(peer, node_update_cluster | node_update_peer | node_update_join | node_update_expected, NULL, __FUNCTION__); /* Force our known ID */ crm_xml_add(node_state, XML_ATTR_UUID, uuid); rc = fsa_cib_conn->cmds->update(fsa_cib_conn, XML_CIB_TAG_STATUS, node_state, cib_quorum_override | cib_scope_local | cib_can_create); /* Delay processing the trigger until the update completes */ crm_debug("Sending fencing update %d for %s", rc, target); fsa_register_cib_callback(rc, FALSE, strdup(target), cib_fencing_updated); /* Make sure it sticks */ /* fsa_cib_conn->cmds->bump_epoch(fsa_cib_conn, cib_quorum_override|cib_scope_local); */ erase_status_tag(target, XML_CIB_TAG_LRM, cib_scope_local); erase_status_tag(target, XML_TAG_TRANSIENT_NODEATTRS, cib_scope_local); free_xml(node_state); return; } static gboolean te_fence_node(crm_graph_t * graph, crm_action_t * action) { int rc = 0; const char *id = NULL; const char *uuid = NULL; const char *target = NULL; const char *type = NULL; gboolean invalid_action = FALSE; enum stonith_call_options options = st_opt_none; id = ID(action->xml); target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID); type = crm_meta_value(action->params, "stonith_action"); CRM_CHECK(id != NULL, invalid_action = TRUE); CRM_CHECK(uuid != NULL, invalid_action = TRUE); CRM_CHECK(type != NULL, invalid_action = TRUE); CRM_CHECK(target != NULL, invalid_action = TRUE); if (invalid_action) { crm_log_xml_warn(action->xml, "BadAction"); return FALSE; } crm_notice("Executing %s fencing operation (%s) on %s (timeout=%d)", type, id, target, transition_graph->stonith_timeout); /* Passing NULL means block until we can connect... */ te_connect_stonith(NULL); if (crmd_join_phase_count(crm_join_confirmed) == 1) { options |= st_opt_allow_suicide; } rc = stonith_api->cmds->fence(stonith_api, options, target, type, transition_graph->stonith_timeout / 1000, 0); stonith_api->cmds->register_callback(stonith_api, rc, transition_graph->stonith_timeout / 1000, st_opt_timeout_updates, generate_transition_key(transition_graph->id, action->id, 0, te_uuid), "tengine_stonith_callback", tengine_stonith_callback); return TRUE; } static int get_target_rc(crm_action_t * action) { const char *target_rc_s = crm_meta_value(action->params, XML_ATTR_TE_TARGET_RC); if (target_rc_s != NULL) { return crm_parse_int(target_rc_s, "0"); } return 0; } static gboolean te_crm_command(crm_graph_t * graph, crm_action_t * action) { char *counter = NULL; xmlNode *cmd = NULL; gboolean is_local = FALSE; const char *id = NULL; const char *task = NULL; const char *value = NULL; const char *on_node = NULL; const char *router_node = NULL; gboolean rc = TRUE; gboolean no_wait = FALSE; id = ID(action->xml); task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); router_node = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE); if (!router_node) { router_node = on_node; } CRM_CHECK(on_node != NULL && strlen(on_node) != 0, crm_err("Corrupted command (id=%s) %s: no node", crm_str(id), crm_str(task)); return FALSE); crm_info("Executing crm-event (%s): %s on %s%s%s", crm_str(id), crm_str(task), on_node, is_local ? " (local)" : "", no_wait ? " - no waiting" : ""); if (safe_str_eq(router_node, fsa_our_uname)) { is_local = TRUE; } value = crm_meta_value(action->params, XML_ATTR_TE_NOWAIT); if (crm_is_true(value)) { no_wait = TRUE; } if (is_local && safe_str_eq(task, CRM_OP_SHUTDOWN)) { /* defer until everything else completes */ crm_info("crm-event (%s) is a local shutdown", crm_str(id)); graph->completion_action = tg_shutdown; graph->abort_reason = "local shutdown"; te_action_confirmed(action); update_graph(graph, action); trigger_graph(); return TRUE; } else if (safe_str_eq(task, CRM_OP_SHUTDOWN)) { crm_node_t *peer = crm_get_peer(0, router_node); crm_update_peer_expected(__FUNCTION__, peer, CRMD_JOINSTATE_DOWN); } cmd = create_request(task, action->xml, router_node, CRM_SYSTEM_CRMD, CRM_SYSTEM_TENGINE, NULL); counter = generate_transition_key(transition_graph->id, action->id, get_target_rc(action), te_uuid); crm_xml_add(cmd, XML_ATTR_TRANSITION_KEY, counter); rc = send_cluster_message(crm_get_peer(0, router_node), crm_msg_crmd, cmd, TRUE); free(counter); free_xml(cmd); if (rc == FALSE) { crm_err("Action %d failed: send", action->id); return FALSE; } else if (no_wait) { te_action_confirmed(action); update_graph(graph, action); trigger_graph(); } else { if (action->timeout <= 0) { crm_err("Action %d: %s on %s had an invalid timeout (%dms). Using %dms instead", action->id, task, on_node, action->timeout, graph->network_delay); action->timeout = graph->network_delay; } te_start_action_timer(graph, action); } return TRUE; } gboolean cib_action_update(crm_action_t * action, int status, int op_rc) { lrmd_event_data_t *op = NULL; xmlNode *state = NULL; xmlNode *rsc = NULL; xmlNode *xml_op = NULL; xmlNode *action_rsc = NULL; int rc = pcmk_ok; const char *name = NULL; const char *value = NULL; const char *rsc_id = NULL; const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); const char *task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY); const char *target_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID); int call_options = cib_quorum_override | cib_scope_local; int target_rc = get_target_rc(action); if (status == PCMK_LRM_OP_PENDING) { crm_debug("%s %d: Recording pending operation %s on %s", crm_element_name(action->xml), action->id, task_uuid, target); } else { crm_warn("%s %d: %s on %s timed out", crm_element_name(action->xml), action->id, task_uuid, target); } action_rsc = find_xml_node(action->xml, XML_CIB_TAG_RESOURCE, TRUE); if (action_rsc == NULL) { return FALSE; } rsc_id = ID(action_rsc); CRM_CHECK(rsc_id != NULL, crm_log_xml_err(action->xml, "Bad:action"); return FALSE); /* update the CIB */ state = create_xml_node(NULL, XML_CIB_TAG_STATE); crm_xml_add(state, XML_ATTR_UUID, target_uuid); crm_xml_add(state, XML_ATTR_UNAME, target); rsc = create_xml_node(state, XML_CIB_TAG_LRM); crm_xml_add(rsc, XML_ATTR_ID, target_uuid); rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCES); rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCE); crm_xml_add(rsc, XML_ATTR_ID, rsc_id); name = XML_ATTR_TYPE; value = crm_element_value(action_rsc, name); crm_xml_add(rsc, name, value); name = XML_AGENT_ATTR_CLASS; value = crm_element_value(action_rsc, name); crm_xml_add(rsc, name, value); name = XML_AGENT_ATTR_PROVIDER; value = crm_element_value(action_rsc, name); crm_xml_add(rsc, name, value); op = convert_graph_action(NULL, action, status, op_rc); op->call_id = -1; op->user_data = generate_transition_key(transition_graph->id, action->id, target_rc, te_uuid); xml_op = create_operation_update(rsc, op, CRM_FEATURE_SET, target_rc, __FUNCTION__, LOG_INFO); lrmd_free_event(op); crm_trace("Updating CIB with \"%s\" (%s): %s %s on %s", status < 0 ? "new action" : XML_ATTR_TIMEOUT, crm_element_name(action->xml), crm_str(task), rsc_id, target); crm_log_xml_trace(xml_op, "Op"); rc = fsa_cib_conn->cmds->update(fsa_cib_conn, XML_CIB_TAG_STATUS, state, call_options); crm_trace("Updating CIB with %s action %d: %s on %s (call_id=%d)", services_lrm_status_str(status), action->id, task_uuid, target, rc); fsa_register_cib_callback(rc, FALSE, NULL, cib_action_updated); free_xml(state); action->sent_update = TRUE; if (rc < pcmk_ok) { return FALSE; } return TRUE; } static gboolean te_rsc_command(crm_graph_t * graph, crm_action_t * action) { /* never overwrite stop actions in the CIB with * anything other than completed results * * Writing pending stops makes it look like the * resource is running again */ xmlNode *cmd = NULL; xmlNode *rsc_op = NULL; gboolean rc = TRUE; gboolean no_wait = FALSE; gboolean is_local = FALSE; char *counter = NULL; const char *task = NULL; const char *value = NULL; const char *on_node = NULL; const char *router_node = NULL; const char *task_uuid = NULL; CRM_ASSERT(action != NULL); CRM_ASSERT(action->xml != NULL); action->executed = FALSE; on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); CRM_CHECK(on_node != NULL && strlen(on_node) != 0, crm_err("Corrupted command(id=%s) %s: no node", ID(action->xml), crm_str(task)); return FALSE); rsc_op = action->xml; task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY); router_node = crm_element_value(rsc_op, XML_LRM_ATTR_ROUTER_NODE); if (!router_node) { router_node = on_node; } counter = generate_transition_key(transition_graph->id, action->id, get_target_rc(action), te_uuid); crm_xml_add(rsc_op, XML_ATTR_TRANSITION_KEY, counter); if (safe_str_eq(router_node, fsa_our_uname)) { is_local = TRUE; } value = crm_meta_value(action->params, XML_ATTR_TE_NOWAIT); if (crm_is_true(value)) { no_wait = TRUE; } crm_notice("Initiating action %d: %s %s on %s%s%s", action->id, task, task_uuid, on_node, is_local ? " (local)" : "", no_wait ? " - no waiting" : ""); cmd = create_request(CRM_OP_INVOKE_LRM, rsc_op, router_node, CRM_SYSTEM_LRMD, CRM_SYSTEM_TENGINE, NULL); if (is_local) { /* shortcut local resource commands */ ha_msg_input_t data = { .msg = cmd, .xml = rsc_op, }; fsa_data_t msg = { .id = 0, .data = &data, .data_type = fsa_dt_ha_msg, .fsa_input = I_NULL, .fsa_cause = C_FSA_INTERNAL, .actions = A_LRM_INVOKE, .origin = __FUNCTION__, }; do_lrm_invoke(A_LRM_INVOKE, C_FSA_INTERNAL, fsa_state, I_NULL, &msg); } else { rc = send_cluster_message(crm_get_peer(0, router_node), crm_msg_lrmd, cmd, TRUE); } free(counter); free_xml(cmd); action->executed = TRUE; if (rc == FALSE) { crm_err("Action %d failed: send", action->id); return FALSE; } else if (no_wait) { crm_info("Action %d confirmed - no wait", action->id); action->confirmed = TRUE; /* Just mark confirmed. * Don't bump the job count only to immediately decrement it */ update_graph(transition_graph, action); trigger_graph(); } else { if (action->timeout <= 0) { crm_err("Action %d: %s %s on %s had an invalid timeout (%dms). Using %dms instead", action->id, task, task_uuid, on_node, action->timeout, graph->network_delay); action->timeout = graph->network_delay; } te_update_job_count(action, 1); te_start_action_timer(graph, action); } value = crm_meta_value(action->params, XML_OP_ATTR_PENDING); if (crm_is_true(value) && safe_str_neq(task, CRMD_ACTION_CANCEL) && safe_str_neq(task, CRMD_ACTION_DELETE)) { /* write a "pending" entry to the CIB, inhibit notification */ crm_debug("Recording pending op %s in the CIB", task_uuid); cib_action_update(action, PCMK_LRM_OP_PENDING, PCMK_OCF_UNKNOWN); } return TRUE; } struct te_peer_s { char *name; int jobs; + int migrate_jobs; }; static void te_peer_free(gpointer p) { struct te_peer_s *peer = p; free(peer->name); free(peer); } void te_reset_job_counts(void) { GHashTableIter iter; struct te_peer_s *peer = NULL; if(te_targets == NULL) { te_targets = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, te_peer_free); } g_hash_table_iter_init(&iter, te_targets); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & peer)) { peer->jobs = 0; + peer->migrate_jobs = 0; } } static void -te_update_job_count_on(const char *target, int offset) +te_update_job_count_on(const char *target, int offset, bool migrate) { struct te_peer_s *r = NULL; if(target == NULL || te_targets == NULL) { return; } r = g_hash_table_lookup(te_targets, target); if(r == NULL) { r = calloc(1, sizeof(struct te_peer_s)); r->name = strdup(target); g_hash_table_insert(te_targets, r->name, r); } r->jobs += offset; + if(migrate) { + r->migrate_jobs += offset; + } crm_trace("jobs[%s] = %d", target, r->jobs); } static void te_update_job_count(crm_action_t * action, int offset) { const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); if (action->type != action_type_rsc || target == NULL) { /* No limit on these */ return; } if (safe_str_eq(task, CRMD_ACTION_MIGRATE) || safe_str_eq(task, CRMD_ACTION_MIGRATED)) { xmlNode *meta = get_xpath_object("//[@"XML_LRM_ATTR_MIGRATE_SOURCE"]", action->xml, LOG_ERR); const char *t1 = crm_element_value(meta, XML_LRM_ATTR_MIGRATE_SOURCE); const char *t2 = crm_element_value(meta, XML_LRM_ATTR_MIGRATE_TARGET); - te_update_job_count_on(t1, offset); - te_update_job_count_on(t2, offset); + te_update_job_count_on(t1, offset, TRUE); + te_update_job_count_on(t2, offset, TRUE); } else { - te_update_job_count_on(target, offset); + te_update_job_count_on(target, offset, FALSE); } } static gboolean -te_should_perform_action_on(const char *action, const char *target) +te_should_perform_action_on(crm_graph_t * graph, crm_action_t * action, const char *target) { int limit = 0; struct te_peer_s *r = NULL; + const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); + const char *id = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY); if(target == NULL) { /* No limit on these */ return TRUE; } else if(te_targets == NULL) { return FALSE; } r = g_hash_table_lookup(te_targets, target); limit = throttle_get_job_limit(target); if(r == NULL) { r = calloc(1, sizeof(struct te_peer_s)); r->name = strdup(target); g_hash_table_insert(te_targets, r->name, r); } if(limit <= r->jobs) { crm_trace("Peer %s is over their job limit of %d (%d): deferring %s", - target, limit, r->jobs, action); + target, limit, r->jobs, id); return FALSE; + + } else if(graph->migration_limit > 0 && r->migrate_jobs >= graph->migration_limit) { + if (safe_str_eq(task, CRMD_ACTION_MIGRATE) || safe_str_eq(task, CRMD_ACTION_MIGRATED)) { + crm_trace("Peer %s is over their migration job limit of %d (%d): deferring %s", + target, graph->migration_limit, r->migrate_jobs, id); + return FALSE; + } } + return TRUE; } static gboolean te_should_perform_action(crm_graph_t * graph, crm_action_t * action) { const char *target = NULL; - const char *id = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY); const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); if (action->type != action_type_rsc) { /* No limit on these */ return TRUE; } if (safe_str_eq(task, CRMD_ACTION_MIGRATE) || safe_str_eq(task, CRMD_ACTION_MIGRATED)) { target = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_SOURCE); - if(te_should_perform_action_on(id, target) == FALSE) { + if(te_should_perform_action_on(graph, action, target) == FALSE) { return FALSE; } target = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_TARGET); } else { target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); } - return te_should_perform_action_on(id, target); + return te_should_perform_action_on(graph, action, target); } void te_action_confirmed(crm_action_t * action) { const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); if (action->confirmed == FALSE && action->type == action_type_rsc && target != NULL) { te_update_job_count(action, -1); } action->confirmed = TRUE; } crm_graph_functions_t te_graph_fns = { te_pseudo_action, te_rsc_command, te_crm_command, te_fence_node, te_should_perform_action, }; void notify_crmd(crm_graph_t * graph) { const char *type = "unknown"; enum crmd_fsa_input event = I_NULL; crm_debug("Processing transition completion in state %s", fsa_state2string(fsa_state)); CRM_CHECK(graph->complete, graph->complete = TRUE); switch (graph->completion_action) { case tg_stop: type = "stop"; /* fall through */ case tg_done: type = "done"; if (fsa_state == S_TRANSITION_ENGINE) { event = I_TE_SUCCESS; } break; case tg_restart: type = "restart"; if (fsa_state == S_TRANSITION_ENGINE) { if (too_many_st_failures() == FALSE) { if (transition_timer->period_ms > 0) { crm_timer_stop(transition_timer); crm_timer_start(transition_timer); } else { event = I_PE_CALC; } } else { event = I_TE_SUCCESS; } } else if (fsa_state == S_POLICY_ENGINE) { register_fsa_action(A_PE_INVOKE); } break; case tg_shutdown: type = "shutdown"; if (is_set(fsa_input_register, R_SHUTDOWN)) { event = I_STOP; } else { crm_err("We didn't ask to be shut down, yet our" " PE is telling us too."); event = I_TERMINATE; } } crm_debug("Transition %d status: %s - %s", graph->id, type, crm_str(graph->abort_reason)); graph->abort_reason = NULL; graph->completion_action = tg_done; clear_bit(fsa_input_register, R_IN_TRANSITION); if (event != I_NULL) { register_fsa_input(C_FSA_INTERNAL, event, NULL); } else if (fsa_source) { mainloop_set_trigger(fsa_source); } } diff --git a/include/crm/transition.h b/include/crm/transition.h index dc7f0cac04..4e96b3abb4 100644 --- a/include/crm/transition.h +++ b/include/crm/transition.h @@ -1,147 +1,146 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include typedef enum { action_type_pseudo, action_type_rsc, action_type_crm } action_type_e; typedef struct te_timer_s crm_action_timer_t; typedef struct synapse_s { int id; int priority; gboolean ready; gboolean failed; gboolean executed; gboolean confirmed; GListPtr actions; /* crm_action_t* */ GListPtr inputs; /* crm_action_t* */ } synapse_t; typedef struct crm_action_s { int id; int timeout; int interval; GHashTable *params; action_type_e type; crm_action_timer_t *timer; synapse_t *synapse; gboolean sent_update; /* sent to the CIB */ gboolean executed; /* sent to the CRM */ gboolean confirmed; gboolean failed; gboolean can_fail; xmlNode *xml; } crm_action_t; enum timer_reason { timeout_action, timeout_action_warn, timeout_abort, }; struct te_timer_s { int source_id; int timeout; enum timer_reason reason; crm_action_t *action; }; /* order matters here */ enum transition_action { tg_done, tg_stop, tg_restart, tg_shutdown, }; typedef struct crm_graph_s { int id; char *source; int abort_priority; gboolean complete; const char *abort_reason; enum transition_action completion_action; int num_actions; int num_synapses; int batch_limit; int network_delay; int stonith_timeout; int transition_timeout; int fired; int pending; int skipped; int completed; int incomplete; GListPtr synapses; /* synpase_t* */ int migration_limit; - GHashTable *migrating; } crm_graph_t; typedef struct crm_graph_functions_s { gboolean(*pseudo) (crm_graph_t * graph, crm_action_t * action); gboolean(*rsc) (crm_graph_t * graph, crm_action_t * action); gboolean(*crmd) (crm_graph_t * graph, crm_action_t * action); gboolean(*stonith) (crm_graph_t * graph, crm_action_t * action); gboolean(*allowed) (crm_graph_t * graph, crm_action_t * action); } crm_graph_functions_t; enum transition_status { transition_active, transition_pending, /* active but no actions performed this time */ transition_complete, transition_stopped, transition_terminated, transition_action_failed, transition_failed, }; void set_default_graph_functions(void); void set_graph_functions(crm_graph_functions_t * fns); crm_graph_t *unpack_graph(xmlNode * xml_graph, const char *reference); int run_graph(crm_graph_t * graph); gboolean update_graph(crm_graph_t * graph, crm_action_t * action); void destroy_graph(crm_graph_t * graph); const char *transition_status(enum transition_status state); void print_graph(unsigned int log_level, crm_graph_t * graph); void print_action(int log_level, const char *prefix, crm_action_t * action); void update_abort_priority(crm_graph_t * graph, int priority, enum transition_action action, const char *abort_reason); const char *actiontype2text(action_type_e type); lrmd_event_data_t *convert_graph_action(xmlNode * resource, crm_action_t * action, int status, int rc); diff --git a/lib/transition/graph.c b/lib/transition/graph.c index 697004f0ee..a8781646a2 100644 --- a/lib/transition/graph.c +++ b/lib/transition/graph.c @@ -1,424 +1,339 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include /* #include */ /* */ crm_graph_functions_t *graph_fns = NULL; static gboolean update_synapse_ready(synapse_t * synapse, int action_id) { GListPtr lpc = NULL; gboolean updates = FALSE; CRM_CHECK(synapse->executed == FALSE, return FALSE); CRM_CHECK(synapse->confirmed == FALSE, return FALSE); synapse->ready = TRUE; for (lpc = synapse->inputs; lpc != NULL; lpc = lpc->next) { crm_action_t *prereq = (crm_action_t *) lpc->data; crm_trace("Processing input %d", prereq->id); if (prereq->id == action_id) { crm_trace("Marking input %d of synapse %d confirmed", action_id, synapse->id); prereq->confirmed = TRUE; updates = TRUE; } else if (prereq->confirmed == FALSE) { synapse->ready = FALSE; } } if (updates) { crm_trace("Updated synapse %d", synapse->id); } return updates; } static gboolean update_synapse_confirmed(synapse_t * synapse, int action_id) { GListPtr lpc = NULL; gboolean updates = FALSE; gboolean is_confirmed = TRUE; CRM_CHECK(synapse->executed, return FALSE); CRM_CHECK(synapse->confirmed == FALSE, return TRUE); is_confirmed = TRUE; for (lpc = synapse->actions; lpc != NULL; lpc = lpc->next) { crm_action_t *action = (crm_action_t *) lpc->data; crm_trace("Processing action %d", action->id); if (action->id == action_id) { crm_trace("Confirmed: Action %d of Synapse %d", action_id, synapse->id); action->confirmed = TRUE; updates = TRUE; } else if (action->confirmed == FALSE) { is_confirmed = FALSE; crm_trace("Synapse %d still not confirmed after action %d", synapse->id, action_id); } } if (is_confirmed && synapse->confirmed == FALSE) { crm_trace("Confirmed: Synapse %d", synapse->id); synapse->confirmed = TRUE; updates = TRUE; } if (updates) { crm_trace("Updated synapse %d", synapse->id); } return updates; } gboolean update_graph(crm_graph_t * graph, crm_action_t * action) { gboolean rc = FALSE; gboolean updates = FALSE; GListPtr lpc = NULL; for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) { synapse_t *synapse = (synapse_t *) lpc->data; if (synapse->confirmed || synapse->failed) { crm_trace("Synapse complete"); } else if (synapse->executed) { crm_trace("Synapse executed"); rc = update_synapse_confirmed(synapse, action->id); } else if (action->failed == FALSE || synapse->priority == INFINITY) { rc = update_synapse_ready(synapse, action->id); } updates = updates || rc; } if (updates) { crm_trace("Updated graph with completed action %d", action->id); } return updates; } static gboolean should_fire_synapse(crm_graph_t * graph, synapse_t * synapse) { GListPtr lpc = NULL; CRM_CHECK(synapse->executed == FALSE, return FALSE); CRM_CHECK(synapse->confirmed == FALSE, return FALSE); crm_trace("Checking pre-reqs for %d", synapse->id); /* lookup prereqs */ synapse->ready = TRUE; for (lpc = synapse->inputs; lpc != NULL; lpc = lpc->next) { crm_action_t *prereq = (crm_action_t *) lpc->data; crm_trace("Processing input %d", prereq->id); if (prereq->confirmed == FALSE) { crm_trace("Inputs for synapse %d not satisfied", synapse->id); synapse->ready = FALSE; break; } } if(synapse->ready && graph_fns->allowed) { for (lpc = synapse->actions; lpc != NULL; lpc = lpc->next) { crm_action_t *a = (crm_action_t *) lpc->data; if(graph_fns->allowed(graph, a) == FALSE) { return FALSE; } } } return synapse->ready; } static gboolean initiate_action(crm_graph_t * graph, crm_action_t * action) { const char *id = NULL; CRM_CHECK(action->executed == FALSE, return FALSE); id = ID(action->xml); CRM_CHECK(id != NULL, return FALSE); action->executed = TRUE; if (action->type == action_type_pseudo) { crm_trace("Executing pseudo-event: %s (%d)", id, action->id); return graph_fns->pseudo(graph, action); } else if (action->type == action_type_rsc) { crm_trace("Executing rsc-event: %s (%d)", id, action->id); return graph_fns->rsc(graph, action); } else if (action->type == action_type_crm) { const char *task = NULL; task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); CRM_CHECK(task != NULL, return FALSE); if (safe_str_eq(task, CRM_OP_FENCE)) { crm_trace("Executing STONITH-event: %s (%d)", id, action->id); return graph_fns->stonith(graph, action); } crm_trace("Executing crm-event: %s (%d)", id, action->id); return graph_fns->crmd(graph, action); } crm_err("Failed on unsupported command type: %s (id=%s)", crm_element_name(action->xml), id); return FALSE; } static gboolean fire_synapse(crm_graph_t * graph, synapse_t * synapse) { GListPtr lpc = NULL; CRM_CHECK(synapse != NULL, return FALSE); CRM_CHECK(synapse->ready, return FALSE); CRM_CHECK(synapse->confirmed == FALSE, return TRUE); crm_trace("Synapse %d fired", synapse->id); synapse->executed = TRUE; for (lpc = synapse->actions; lpc != NULL; lpc = lpc->next) { crm_action_t *action = (crm_action_t *) lpc->data; /* allow some leeway */ gboolean passed = FALSE; /* Invoke the action and start the timer */ passed = initiate_action(graph, action); if (passed == FALSE) { crm_err("Failed initiating <%s id=%d> in synapse %d", crm_element_name(action->xml), action->id, synapse->id); synapse->confirmed = TRUE; action->confirmed = TRUE; action->failed = TRUE; return FALSE; } } return TRUE; } -static gboolean -count_migrating(crm_graph_t * graph, synapse_t * synapse) -{ - GListPtr lpc = NULL; - - CRM_CHECK(synapse != NULL, return FALSE); - - for (lpc = synapse->actions; lpc != NULL; lpc = lpc->next) { - crm_action_t *action = (crm_action_t *) lpc->data; - - const char *task = NULL; - - if (action->type != action_type_rsc) { - continue; - } - - task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); - - if (crm_str_eq(task, CRMD_ACTION_MIGRATE, TRUE) - || crm_str_eq(task, CRMD_ACTION_MIGRATED, TRUE)) { - const char *node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID); - - int *counter = g_hash_table_lookup(graph->migrating, node); - - if (counter == NULL) { - counter = calloc(1, sizeof(int)); - g_hash_table_insert(graph->migrating, strdup(node), counter); - } - - (*counter)++; - } - } - return TRUE; -} - -static gboolean -migration_overrun(crm_graph_t * graph, synapse_t * synapse) -{ - GListPtr lpc = NULL; - - CRM_CHECK(synapse != NULL, return FALSE); - - if (graph->migration_limit < 0) { - return FALSE; - } - - for (lpc = synapse->actions; lpc != NULL; lpc = lpc->next) { - crm_action_t *action = (crm_action_t *) lpc->data; - - const char *task = NULL; - - if (action->type != action_type_rsc) { - continue; - } - - task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); - - if (crm_str_eq(task, CRMD_ACTION_MIGRATE, TRUE) - || crm_str_eq(task, CRMD_ACTION_MIGRATED, TRUE)) { - const char *node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID); - - int *counter = g_hash_table_lookup(graph->migrating, node); - - if (counter && *counter >= graph->migration_limit) { - return TRUE; - } - - } - } - return FALSE; -} - int run_graph(crm_graph_t * graph) { GListPtr lpc = NULL; int stat_log_level = LOG_DEBUG; int pass_result = transition_active; const char *status = "In-progress"; if (graph_fns == NULL) { set_default_graph_functions(); } if (graph == NULL) { return transition_complete; } graph->fired = 0; graph->pending = 0; graph->skipped = 0; graph->completed = 0; graph->incomplete = 0; - g_hash_table_remove_all(graph->migrating); crm_trace("Entering graph %d callback", graph->id); /* Pre-calculate the number of completed and in-flight operations */ for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) { synapse_t *synapse = (synapse_t *) lpc->data; if (synapse->confirmed) { crm_trace("Synapse %d complete", synapse->id); graph->completed++; } else if (synapse->failed == FALSE && synapse->executed) { crm_trace("Synapse %d: confirmation pending", synapse->id); graph->pending++; - - if (graph->migration_limit >= 0) { - count_migrating(graph, synapse); - } } } /* Now check if there is work to do */ for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) { synapse_t *synapse = (synapse_t *) lpc->data; if (graph->batch_limit > 0 && graph->pending >= graph->batch_limit) { crm_debug("Throttling output: batch limit (%d) reached", graph->batch_limit); break; - } else if (graph->migration_limit >= 0 && migration_overrun(graph, synapse)) { - crm_debug("Throttling output: migration limit (%d) reached", graph->migration_limit); - break; - } else if (synapse->failed) { graph->skipped++; continue; } else if (synapse->confirmed || synapse->executed) { /* Already handled */ continue; } if (synapse->priority < graph->abort_priority) { crm_trace("Skipping synapse %d: aborting", synapse->id); graph->skipped++; } else if (should_fire_synapse(graph, synapse)) { crm_trace("Synapse %d fired", synapse->id); graph->fired++; if(fire_synapse(graph, synapse) == FALSE) { crm_err("Synapse %d failed to fire", synapse->id); stat_log_level = LOG_ERR; graph->abort_priority = INFINITY; graph->incomplete++; graph->fired--; } if (synapse->confirmed == FALSE) { graph->pending++; - - if (graph->migration_limit >= 0) { - count_migrating(graph, synapse); - } } } else { crm_trace("Synapse %d cannot fire", synapse->id); graph->incomplete++; } } if (graph->pending == 0 && graph->fired == 0) { graph->complete = TRUE; stat_log_level = LOG_NOTICE; pass_result = transition_complete; status = "Complete"; if (graph->incomplete != 0 && graph->abort_priority <= 0) { stat_log_level = LOG_WARNING; pass_result = transition_terminated; status = "Terminated"; } else if (graph->skipped != 0) { status = "Stopped"; } } else if (graph->fired == 0) { pass_result = transition_pending; } do_crm_log(stat_log_level, "Transition %d (Complete=%d, Pending=%d," " Fired=%d, Skipped=%d, Incomplete=%d, Source=%s): %s", graph->id, graph->completed, graph->pending, graph->fired, graph->skipped, graph->incomplete, graph->source, status); return pass_result; } diff --git a/lib/transition/unpack.c b/lib/transition/unpack.c index 90b7a9602f..01c71f25d2 100644 --- a/lib/transition/unpack.c +++ b/lib/transition/unpack.c @@ -1,338 +1,334 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include CRM_TRACE_INIT_DATA(transitioner); static crm_action_t * unpack_action(synapse_t * parent, xmlNode * xml_action) { crm_action_t *action = NULL; xmlNode *action_copy = NULL; const char *value = crm_element_value(xml_action, XML_ATTR_ID); if (value == NULL) { crm_err("Actions must have an id!"); crm_log_xml_trace(xml_action, "Action with missing id"); return NULL; } action_copy = copy_xml(xml_action); action = calloc(1, sizeof(crm_action_t)); if (action == NULL) { return NULL; } action->id = crm_parse_int(value, NULL); action->type = action_type_rsc; action->xml = action_copy; action->synapse = parent; if (safe_str_eq(crm_element_name(action_copy), XML_GRAPH_TAG_RSC_OP)) { action->type = action_type_rsc; } else if (safe_str_eq(crm_element_name(action_copy), XML_GRAPH_TAG_PSEUDO_EVENT)) { action->type = action_type_pseudo; } else if (safe_str_eq(crm_element_name(action_copy), XML_GRAPH_TAG_CRM_EVENT)) { action->type = action_type_crm; } action->params = xml2list(action_copy); value = g_hash_table_lookup(action->params, "CRM_meta_timeout"); if (value != NULL) { action->timeout = crm_parse_int(value, NULL); } value = g_hash_table_lookup(action->params, "CRM_meta_interval"); if (value != NULL) { action->interval = crm_parse_int(value, NULL); } value = g_hash_table_lookup(action->params, "CRM_meta_can_fail"); if (value != NULL) { crm_str_to_boolean(value, &(action->can_fail)); } crm_trace("Action %d has timer set to %dms", action->id, action->timeout); return action; } static synapse_t * unpack_synapse(crm_graph_t * new_graph, xmlNode * xml_synapse) { const char *value = NULL; xmlNode *inputs = NULL; xmlNode *action_set = NULL; synapse_t *new_synapse = NULL; CRM_CHECK(xml_synapse != NULL, return NULL); crm_trace("looking in synapse %s", ID(xml_synapse)); new_synapse = calloc(1, sizeof(synapse_t)); new_synapse->id = crm_parse_int(ID(xml_synapse), NULL); value = crm_element_value(xml_synapse, XML_CIB_ATTR_PRIORITY); if (value != NULL) { new_synapse->priority = crm_parse_int(value, NULL); } new_graph->num_synapses++; CRM_CHECK(new_synapse->id >= 0, free(new_synapse); return NULL); crm_trace("look for actions in synapse %s", crm_element_value(xml_synapse, XML_ATTR_ID)); for (action_set = __xml_first_child(xml_synapse); action_set != NULL; action_set = __xml_next(action_set)) { if (crm_str_eq((const char *)action_set->name, "action_set", TRUE)) { xmlNode *action = NULL; for (action = __xml_first_child(action_set); action != NULL; action = __xml_next(action)) { crm_action_t *new_action = unpack_action(new_synapse, action); new_graph->num_actions++; if (new_action == NULL) { continue; } crm_trace("Adding action %d to synapse %d", new_action->id, new_synapse->id); new_synapse->actions = g_list_append(new_synapse->actions, new_action); } } } crm_trace("look for inputs in synapse %s", ID(xml_synapse)); for (inputs = __xml_first_child(xml_synapse); inputs != NULL; inputs = __xml_next(inputs)) { if (crm_str_eq((const char *)inputs->name, "inputs", TRUE)) { xmlNode *trigger = NULL; for (trigger = __xml_first_child(inputs); trigger != NULL; trigger = __xml_next(trigger)) { xmlNode *input = NULL; for (input = __xml_first_child(trigger); input != NULL; input = __xml_next(input)) { crm_action_t *new_input = unpack_action(new_synapse, input); if (new_input == NULL) { continue; } crm_trace("Adding input %d to synapse %d", new_input->id, new_synapse->id); new_synapse->inputs = g_list_append(new_synapse->inputs, new_input); } } } } return new_synapse; } crm_graph_t * unpack_graph(xmlNode * xml_graph, const char *reference) { /* id = -1; new_graph->abort_priority = 0; new_graph->network_delay = -1; new_graph->transition_timeout = -1; new_graph->stonith_timeout = -1; new_graph->completion_action = tg_done; - new_graph->migrating = g_hash_table_new_full(crm_str_hash, g_str_equal, - g_hash_destroy_str, g_hash_destroy_str); - if (reference) { new_graph->source = strdup(reference); } else { new_graph->source = strdup("unknown"); } if (xml_graph != NULL) { t_id = crm_element_value(xml_graph, "transition_id"); CRM_CHECK(t_id != NULL, free(new_graph); return NULL); new_graph->id = crm_parse_int(t_id, "-1"); time = crm_element_value(xml_graph, "cluster-delay"); CRM_CHECK(time != NULL, free(new_graph); return NULL); new_graph->network_delay = crm_get_msec(time); time = crm_element_value(xml_graph, "stonith-timeout"); if (time == NULL) { new_graph->stonith_timeout = new_graph->network_delay; } else { new_graph->stonith_timeout = crm_get_msec(time); } t_id = crm_element_value(xml_graph, "batch-limit"); new_graph->batch_limit = crm_parse_int(t_id, "0"); t_id = crm_element_value(xml_graph, "migration-limit"); new_graph->migration_limit = crm_parse_int(t_id, "-1"); } for (synapse = __xml_first_child(xml_graph); synapse != NULL; synapse = __xml_next(synapse)) { if (crm_str_eq((const char *)synapse->name, "synapse", TRUE)) { synapse_t *new_synapse = unpack_synapse(new_graph, synapse); if (new_synapse != NULL) { new_graph->synapses = g_list_append(new_graph->synapses, new_synapse); } } } crm_debug("Unpacked transition %d: %d actions in %d synapses", new_graph->id, new_graph->num_actions, new_graph->num_synapses); return new_graph; } static void destroy_action(crm_action_t * action) { if (action->timer && action->timer->source_id != 0) { crm_warn("Cancelling timer for action %d (src=%d)", action->id, action->timer->source_id); g_source_remove(action->timer->source_id); } if (action->params) { g_hash_table_destroy(action->params); } free_xml(action->xml); free(action->timer); free(action); } static void destroy_synapse(synapse_t * synapse) { while (g_list_length(synapse->actions) > 0) { crm_action_t *action = g_list_nth_data(synapse->actions, 0); synapse->actions = g_list_remove(synapse->actions, action); destroy_action(action); } while (g_list_length(synapse->inputs) > 0) { crm_action_t *action = g_list_nth_data(synapse->inputs, 0); synapse->inputs = g_list_remove(synapse->inputs, action); destroy_action(action); } free(synapse); } void destroy_graph(crm_graph_t * graph) { if (graph == NULL) { return; } while (g_list_length(graph->synapses) > 0) { synapse_t *synapse = g_list_nth_data(graph->synapses, 0); graph->synapses = g_list_remove(graph->synapses, synapse); destroy_synapse(synapse); } - g_hash_table_destroy(graph->migrating); free(graph->source); free(graph); } lrmd_event_data_t * convert_graph_action(xmlNode * resource, crm_action_t * action, int status, int rc) { xmlNode *xop = NULL; lrmd_event_data_t *op = NULL; GHashTableIter iter; const char *name = NULL; const char *value = NULL; xmlNode *action_resource = NULL; CRM_CHECK(action != NULL, return NULL); CRM_CHECK(action->type == action_type_rsc, return NULL); action_resource = first_named_child(action->xml, XML_CIB_TAG_RESOURCE); CRM_CHECK(action_resource != NULL, crm_log_xml_warn(action->xml, "Bad"); return NULL); op = calloc(1, sizeof(lrmd_event_data_t)); op->rsc_id = strdup(ID(action_resource)); op->interval = action->interval; op->op_type = strdup(crm_element_value(action->xml, XML_LRM_ATTR_TASK)); op->rc = rc; op->op_status = status; op->t_run = time(NULL); op->t_rcchange = op->t_run; op->params = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); g_hash_table_iter_init(&iter, action->params); while (g_hash_table_iter_next(&iter, (void **)&name, (void **)&value)) { g_hash_table_insert(op->params, strdup(name), strdup(value)); } for (xop = __xml_first_child(resource); xop != NULL; xop = __xml_next(xop)) { int tmp = 0; crm_element_value_int(xop, XML_LRM_ATTR_CALLID, &tmp); crm_debug("Got call_id=%d for %s", tmp, ID(resource)); if (tmp > op->call_id) { op->call_id = tmp; } } op->call_id++; return op; }