diff --git a/crmd/lrm.c b/crmd/lrm.c index 48195e8f34..78b783cda8 100644 --- a/crmd/lrm.c +++ b/crmd/lrm.c @@ -1,2436 +1,2436 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #define START_DELAY_THRESHOLD 5 * 60 * 1000 #define MAX_LRM_REG_FAILS 30 struct delete_event_s { int rc; const char *rsc; lrm_state_t *lrm_state; }; static gboolean is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id); static gboolean build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list); static gboolean stop_recurring_actions(gpointer key, gpointer value, gpointer user_data); static int delete_rsc_status(lrm_state_t * lrm_state, const char *rsc_id, int call_options, const char *user_name); static lrmd_event_data_t *construct_op(lrm_state_t * lrm_state, xmlNode * rsc_op, const char *rsc_id, const char *operation); static void do_lrm_rsc_op(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *operation, xmlNode * msg, xmlNode * request); void send_direct_ack(const char *to_host, const char *to_sys, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op, const char *rsc_id); static gboolean lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state, int log_level); static int do_update_resource(const char *node_name, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op); static void lrm_connection_destroy(void) { if (is_set(fsa_input_register, R_LRM_CONNECTED)) { crm_crit("LRM Connection failed"); register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL); clear_bit(fsa_input_register, R_LRM_CONNECTED); } else { crm_info("LRM Connection disconnected"); } } static char * make_stop_id(const char *rsc, int call_id) { char *op_id = NULL; op_id = calloc(1, strlen(rsc) + 34); if (op_id != NULL) { snprintf(op_id, strlen(rsc) + 34, "%s:%d", rsc, call_id); } return op_id; } static void copy_instance_keys(gpointer key, gpointer value, gpointer user_data) { if (strstr(key, CRM_META "_") == NULL) { g_hash_table_replace(user_data, strdup((const char *)key), strdup((const char *)value)); } } static void copy_meta_keys(gpointer key, gpointer value, gpointer user_data) { if (strstr(key, CRM_META "_") != NULL) { g_hash_table_replace(user_data, strdup((const char *)key), strdup((const char *)value)); } } /* * \internal * \brief Remove a recurring operation from a resource's history * * \param[in,out] history Resource history to modify * \param[in] op Operation to remove * * \return TRUE if the operation was found and removed, FALSE otherwise */ static gboolean history_remove_recurring_op(rsc_history_t *history, const lrmd_event_data_t *op) { GList *iter; for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) { lrmd_event_data_t *existing = iter->data; if ((op->interval == existing->interval) && crm_str_eq(op->rsc_id, existing->rsc_id, TRUE) && safe_str_eq(op->op_type, existing->op_type)) { history->recurring_op_list = g_list_delete_link(history->recurring_op_list, iter); lrmd_free_event(existing); return TRUE; } } return FALSE; } /* * \internal * \brief Free all recurring operations in resource history * * \param[in,out] history Resource history to modify */ static void history_free_recurring_ops(rsc_history_t *history) { GList *iter; for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) { lrmd_free_event(iter->data); } g_list_free(history->recurring_op_list); history->recurring_op_list = NULL; } /* * \internal * \brief Free resource history * * \param[in,out] history Resource history to free */ void history_free(gpointer data) { rsc_history_t *history = (rsc_history_t*)data; if (history->stop_params) { g_hash_table_destroy(history->stop_params); } /* Don't need to free history->rsc.id because it's set to history->id */ free(history->rsc.type); free(history->rsc.class); free(history->rsc.provider); lrmd_free_event(history->failed); lrmd_free_event(history->last); free(history->id); history_free_recurring_ops(history); free(history); } static void update_history_cache(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op) { int target_rc = 0; rsc_history_t *entry = NULL; if (op->rsc_deleted) { crm_debug("Purged history for '%s' after %s", op->rsc_id, op->op_type); delete_rsc_status(lrm_state, op->rsc_id, cib_quorum_override, NULL); return; } if (safe_str_eq(op->op_type, RSC_NOTIFY)) { return; } crm_debug("Updating history for '%s' with %s op", op->rsc_id, op->op_type); entry = g_hash_table_lookup(lrm_state->resource_history, op->rsc_id); if (entry == NULL && rsc) { entry = calloc(1, sizeof(rsc_history_t)); entry->id = strdup(op->rsc_id); g_hash_table_insert(lrm_state->resource_history, entry->id, entry); entry->rsc.id = entry->id; entry->rsc.type = strdup(rsc->type); entry->rsc.class = strdup(rsc->class); if (rsc->provider) { entry->rsc.provider = strdup(rsc->provider); } else { entry->rsc.provider = NULL; } } else if (entry == NULL) { crm_info("Resource %s no longer exists, not updating cache", op->rsc_id); return; } entry->last_callid = op->call_id; target_rc = rsc_op_expected_rc(op); if (op->op_status == PCMK_LRM_OP_CANCELLED) { if (op->interval > 0) { crm_trace("Removing cancelled recurring op: %s_%s_%d", op->rsc_id, op->op_type, op->interval); history_remove_recurring_op(entry, op); return; } else { crm_trace("Skipping %s_%s_%d rc=%d, status=%d", op->rsc_id, op->op_type, op->interval, op->rc, op->op_status); } } else if (did_rsc_op_fail(op, target_rc)) { /* We must store failed monitors here * - otherwise the block below will cause them to be forgetten them when a stop happens */ if (entry->failed) { lrmd_free_event(entry->failed); } entry->failed = lrmd_copy_event(op); } else if (op->interval == 0) { if (entry->last) { lrmd_free_event(entry->last); } entry->last = lrmd_copy_event(op); if (op->params && (safe_str_eq(CRMD_ACTION_START, op->op_type) || safe_str_eq("reload", op->op_type) || safe_str_eq(CRMD_ACTION_STATUS, op->op_type))) { if (entry->stop_params) { g_hash_table_destroy(entry->stop_params); } entry->stop_params = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); g_hash_table_foreach(op->params, copy_instance_keys, entry->stop_params); } } if (op->interval > 0) { /* Ensure there are no duplicates */ history_remove_recurring_op(entry, op); crm_trace("Adding recurring op: %s_%s_%d", op->rsc_id, op->op_type, op->interval); entry->recurring_op_list = g_list_prepend(entry->recurring_op_list, lrmd_copy_event(op)); } else if (entry->recurring_op_list && safe_str_eq(op->op_type, RSC_STATUS) == FALSE) { crm_trace("Dropping %d recurring ops because of: %s_%s_%d", g_list_length(entry->recurring_op_list), op->rsc_id, op->op_type, op->interval); history_free_recurring_ops(entry); } } void lrm_op_callback(lrmd_event_data_t * op) { const char *nodename = NULL; lrm_state_t *lrm_state = NULL; CRM_CHECK(op != NULL, return); /* determine the node name for this connection. */ nodename = op->remote_nodename ? op->remote_nodename : fsa_our_uname; if (op->type == lrmd_event_disconnect && (safe_str_eq(nodename, fsa_our_uname))) { /* if this is the local lrmd ipc connection, set the right bits in the * crmd when the connection goes down */ lrm_connection_destroy(); return; } else if (op->type != lrmd_event_exec_complete) { /* we only need to process execution results */ return; } lrm_state = lrm_state_find(nodename); CRM_ASSERT(lrm_state != NULL); process_lrm_event(lrm_state, op, NULL); } /* A_LRM_CONNECT */ void do_lrm_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { /* This only pertains to local lrmd connections. Remote connections are handled as * resources within the pengine. Connecting and disconnecting from remote lrmd instances * handled differently than the local. */ lrm_state_t *lrm_state = NULL; if(fsa_our_uname == NULL) { return; /* Nothing to do */ } lrm_state = lrm_state_find_or_create(fsa_our_uname); if (lrm_state == NULL) { register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); return; } if (action & A_LRM_DISCONNECT) { if (lrm_state_verify_stopped(lrm_state, cur_state, LOG_INFO) == FALSE) { if (action == A_LRM_DISCONNECT) { crmd_fsa_stall(FALSE); return; } } clear_bit(fsa_input_register, R_LRM_CONNECTED); crm_info("Disconnecting from the LRM"); lrm_state_disconnect(lrm_state); lrm_state_reset_tables(lrm_state); crm_notice("Disconnected from the LRM"); } if (action & A_LRM_CONNECT) { int ret = pcmk_ok; crm_debug("Connecting to the LRM"); ret = lrm_state_ipc_connect(lrm_state); if (ret != pcmk_ok) { if (lrm_state->num_lrm_register_fails < MAX_LRM_REG_FAILS) { crm_warn("Failed to sign on to the LRM %d" " (%d max) times", lrm_state->num_lrm_register_fails, MAX_LRM_REG_FAILS); crm_timer_start(wait_timer); crmd_fsa_stall(FALSE); return; } } if (ret != pcmk_ok) { crm_err("Failed to sign on to the LRM %d" " (max) times", lrm_state->num_lrm_register_fails); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); return; } set_bit(fsa_input_register, R_LRM_CONNECTED); crm_info("LRM connection established"); } if (action & ~(A_LRM_CONNECT | A_LRM_DISCONNECT)) { crm_err("Unexpected action %s in %s", fsa_action2string(action), __FUNCTION__); } } static gboolean lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state, int log_level) { int counter = 0; gboolean rc = TRUE; const char *when = "lrm disconnect"; GHashTableIter gIter; const char *key = NULL; rsc_history_t *entry = NULL; struct recurring_op_s *pending = NULL; crm_debug("Checking for active resources before exit"); if (cur_state == S_TERMINATE) { log_level = LOG_ERR; when = "shutdown"; } else if (is_set(fsa_input_register, R_SHUTDOWN)) { when = "shutdown... waiting"; } if (lrm_state->pending_ops && lrm_state_is_connected(lrm_state) == TRUE) { guint removed = g_hash_table_foreach_remove( lrm_state->pending_ops, stop_recurring_actions, lrm_state); crm_notice("Stopped %u recurring operations at %s (%u ops remaining)", removed, when, g_hash_table_size(lrm_state->pending_ops)); } if (lrm_state->pending_ops) { g_hash_table_iter_init(&gIter, lrm_state->pending_ops); while (g_hash_table_iter_next(&gIter, NULL, (void **)&pending)) { /* Ignore recurring actions in the shutdown calculations */ if (pending->interval == 0) { counter++; } } } if (counter > 0) { do_crm_log(log_level, "%d pending LRM operations at %s", counter, when); if (cur_state == S_TERMINATE || !is_set(fsa_input_register, R_SENT_RSC_STOP)) { g_hash_table_iter_init(&gIter, lrm_state->pending_ops); while (g_hash_table_iter_next(&gIter, (gpointer*)&key, (gpointer*)&pending)) { do_crm_log(log_level, "Pending action: %s (%s)", key, pending->op_key); } } else { rc = FALSE; } return rc; } if (lrm_state->resource_history == NULL) { return rc; } if (cur_state == S_TERMINATE || is_set(fsa_input_register, R_SHUTDOWN)) { /* At this point we're not waiting, we're just shutting down */ when = "shutdown"; } counter = 0; g_hash_table_iter_init(&gIter, lrm_state->resource_history); while (g_hash_table_iter_next(&gIter, NULL, (gpointer*)&entry)) { if (is_rsc_active(lrm_state, entry->id) == FALSE) { continue; } counter++; crm_trace("Found %s active", entry->id); if (lrm_state->pending_ops) { GHashTableIter hIter; g_hash_table_iter_init(&hIter, lrm_state->pending_ops); while (g_hash_table_iter_next(&hIter, (gpointer*)&key, (gpointer*)&pending)) { if (crm_str_eq(entry->id, pending->rsc_id, TRUE)) { crm_notice("%sction %s (%s) incomplete at %s", pending->interval == 0 ? "A" : "Recurring a", key, pending->op_key, when); } } } } if (counter) { crm_err("%d resources were active at %s.", counter, when); } return rc; } GHashTable *metadata_hash = NULL; static char * get_rsc_metadata(const char *type, const char *rclass, const char *provider, bool force) { int rc = pcmk_ok; int len = 0; char *key = NULL; char *metadata = NULL; /* Always use a local connection for this operation */ lrm_state_t *lrm_state = lrm_state_find(fsa_our_uname); CRM_CHECK(type != NULL, return NULL); CRM_CHECK(rclass != NULL, return NULL); CRM_CHECK(lrm_state != NULL, return NULL); if (provider == NULL) { provider = "heartbeat"; } if (metadata_hash == NULL) { metadata_hash = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); } len = strlen(type) + strlen(rclass) + strlen(provider) + 4; key = malloc(len); if(key == NULL) { return NULL; } snprintf(key, len, "%s::%s:%s", type, rclass, provider); if(force == FALSE) { crm_trace("Retreiving cached metadata for %s", key); metadata = g_hash_table_lookup(metadata_hash, key); } if(metadata == NULL) { rc = lrm_state_get_metadata(lrm_state, rclass, provider, type, &metadata, 0); crm_trace("Retreived live metadata for %s: %s (%d)", key, pcmk_strerror(rc), rc); if(rc == pcmk_ok) { CRM_LOG_ASSERT(metadata != NULL); g_hash_table_insert(metadata_hash, key, metadata); key = NULL; } else { CRM_LOG_ASSERT(metadata == NULL); metadata = NULL; } } if (metadata == NULL) { crm_warn("No metadata found for %s: %s (%d)", key, pcmk_strerror(rc), rc); } free(key); return metadata; } static char * build_parameter_list(lrmd_event_data_t *op, xmlNode *metadata, xmlNode *result, const char *criteria, bool target, bool invert_for_xml) { int len = 0; int max = 0; char *list = NULL; xmlNode *param = NULL; xmlNode *params = NULL; const char *secure_terms[] = { "password", "passwd", "user", }; if(safe_str_eq("private", criteria)) { /* It will take time for the agents to be updated * Check for some common terms */ max = DIMOF(secure_terms); } params = find_xml_node(metadata, "parameters", TRUE); for (param = __xml_first_child(params); param != NULL; param = __xml_next(param)) { if (crm_str_eq((const char *)param->name, "parameter", TRUE)) { bool accept = FALSE; const char *name = crm_element_value(param, "name"); const char *value = crm_element_value(param, criteria); if(max && value) { /* Turn off the compatibility logic once an agent has been updated to know about 'private' */ max = 0; } if (name == NULL) { crm_err("Invalid parameter in %s metadata", op->rsc_id); } else if(target == crm_is_true(value)) { accept = TRUE; } else if(max) { int lpc = 0; bool found = FALSE; for(lpc = 0; found == FALSE && lpc < max; lpc++) { if(safe_str_eq(secure_terms[lpc], name)) { found = TRUE; } } if(found == target) { accept = TRUE; } } if(accept) { int start = len; crm_trace("Attr %s is %s%s", name, target?"":"not ", criteria); len += strlen(name) + 2; list = realloc_safe(list, len + 1); sprintf(list + start, " %s ", name); } else { crm_trace("Rejecting %s for %s", name, criteria); } if(invert_for_xml) { crm_trace("Inverting %s match for %s xml", name, criteria); accept = !accept; } if(result && accept) { value = g_hash_table_lookup(op->params, name); if(value != NULL) { crm_trace("Adding attr to the xml result", name, target?"":"not ", criteria); crm_xml_add(result, name, value); } } } } return list; } static bool resource_supports_action(xmlNode *metadata, const char *name) { const char *value = NULL; xmlNode *action = NULL; xmlNode *actions = NULL; actions = find_xml_node(metadata, "actions", TRUE); for (action = __xml_first_child(actions); action != NULL; action = __xml_next(action)) { if (crm_str_eq((const char *)action->name, "action", TRUE)) { value = crm_element_value(action, "name"); if (safe_str_eq(name, value)) { return TRUE; } } } return FALSE; } static void append_restart_list(lrmd_event_data_t *op, xmlNode *metadata, xmlNode * update, const char *version) { char *list = NULL; char *digest = NULL; xmlNode *restart = NULL; CRM_LOG_ASSERT(op->params != NULL); if (op->interval > 0) { /* monitors are not reloadable */ return; } if(resource_supports_action(metadata, "reload")) { restart = create_xml_node(NULL, XML_TAG_PARAMS); /* Any parameters with unique="1" should be added into the "op-force-restart" list. */ list = build_parameter_list(op, metadata, restart, "unique", TRUE, FALSE); } else { /* Resource does not support reloads */ return; } digest = calculate_operation_digest(restart, version); /* Add "op-force-restart" and "op-restart-digest" to indicate the resource supports reload, * no matter if it actually supports any parameters with unique="1"). */ crm_xml_add(update, XML_LRM_ATTR_OP_RESTART, list? list: ""); crm_xml_add(update, XML_LRM_ATTR_RESTART_DIGEST, digest); crm_trace("%s: %s, %s", op->rsc_id, digest, list); crm_log_xml_trace(restart, "restart digest source"); free_xml(restart); free(digest); free(list); } static void append_secure_list(lrmd_event_data_t *op, xmlNode *metadata, xmlNode * update, const char *version) { char *list = NULL; char *digest = NULL; xmlNode *secure = NULL; CRM_LOG_ASSERT(op->params != NULL); /* * To keep XML_LRM_ATTR_OP_SECURE short, we want it to contain the * secure parameters but XML_LRM_ATTR_SECURE_DIGEST to be based on * the insecure ones */ secure = create_xml_node(NULL, XML_TAG_PARAMS); list = build_parameter_list(op, metadata, secure, "private", TRUE, TRUE); if (list != NULL) { digest = calculate_operation_digest(secure, version); crm_xml_add(update, XML_LRM_ATTR_OP_SECURE, list); crm_xml_add(update, XML_LRM_ATTR_SECURE_DIGEST, digest); crm_trace("%s: %s, %s", op->rsc_id, digest, list); crm_log_xml_trace(secure, "secure digest source"); } else { crm_trace("%s: no secure parameters", op->rsc_id); } free_xml(secure); free(digest); free(list); } static gboolean build_operation_update(xmlNode * parent, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op, const char *src) { int target_rc = 0; xmlNode *xml_op = NULL; xmlNode *metadata = NULL; const char *m_string = NULL; const char *caller_version = NULL; if (op == NULL) { return FALSE; } target_rc = rsc_op_expected_rc(op); /* there is a small risk in formerly mixed clusters that it will * be sub-optimal. * * however with our upgrade policy, the update we send should * still be completely supported anyway */ caller_version = g_hash_table_lookup(op->params, XML_ATTR_CRM_VERSION); CRM_LOG_ASSERT(caller_version != NULL); if(caller_version == NULL) { caller_version = CRM_FEATURE_SET; } crm_trace("Building %s operation update with originator version: %s", op->rsc_id, caller_version); xml_op = create_operation_update(parent, op, caller_version, target_rc, fsa_our_uname, src, LOG_DEBUG); if (xml_op == NULL) { return TRUE; } if (rsc == NULL || op->params == NULL || crm_str_eq(CRMD_ACTION_STOP, op->op_type, TRUE)) { /* Stopped resources don't need the digest logic */ crm_trace("No digests needed for %s %p %p %s", op->rsc_id, op->params, rsc, op->op_type); return TRUE; } m_string = get_rsc_metadata(rsc->type, rsc->class, rsc->provider, safe_str_eq(op->op_type, RSC_START)); if(m_string == NULL) { crm_err("No metadata for %s::%s:%s", rsc->provider, rsc->class, rsc->type); return TRUE; } metadata = string2xml(m_string); if(metadata == NULL) { crm_err("Metadata for %s::%s:%s is not valid XML", rsc->provider, rsc->class, rsc->type); return TRUE; } crm_trace("Includind additional digests for %s::%s:%s", rsc->provider, rsc->class, rsc->type); append_restart_list(op, metadata, xml_op, caller_version); append_secure_list(op, metadata, xml_op, caller_version); free_xml(metadata); return TRUE; } static gboolean is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id) { rsc_history_t *entry = NULL; entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); if (entry == NULL || entry->last == NULL) { return FALSE; } crm_trace("Processing %s: %s.%d=%d", rsc_id, entry->last->op_type, entry->last->interval, entry->last->rc); if (entry->last->rc == PCMK_OCF_OK && safe_str_eq(entry->last->op_type, CRMD_ACTION_STOP)) { return FALSE; } else if (entry->last->rc == PCMK_OCF_OK && safe_str_eq(entry->last->op_type, CRMD_ACTION_MIGRATE)) { /* a stricter check is too complex... * leave that to the PE */ return FALSE; } else if (entry->last->rc == PCMK_OCF_NOT_RUNNING) { return FALSE; } else if (entry->last->interval == 0 && entry->last->rc == PCMK_OCF_NOT_CONFIGURED) { /* Badly configured resources can't be reliably stopped */ return FALSE; } return TRUE; } static gboolean build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list) { GHashTableIter iter; rsc_history_t *entry = NULL; g_hash_table_iter_init(&iter, lrm_state->resource_history); while (g_hash_table_iter_next(&iter, NULL, (void **)&entry)) { GList *gIter = NULL; xmlNode *xml_rsc = create_xml_node(rsc_list, XML_LRM_TAG_RESOURCE); crm_xml_add(xml_rsc, XML_ATTR_ID, entry->id); crm_xml_add(xml_rsc, XML_ATTR_TYPE, entry->rsc.type); crm_xml_add(xml_rsc, XML_AGENT_ATTR_CLASS, entry->rsc.class); crm_xml_add(xml_rsc, XML_AGENT_ATTR_PROVIDER, entry->rsc.provider); if (entry->last && entry->last->params) { const char *container = g_hash_table_lookup(entry->last->params, CRM_META"_"XML_RSC_ATTR_CONTAINER); if (container) { crm_trace("Resource %s is a part of container resource %s", entry->id, container); crm_xml_add(xml_rsc, XML_RSC_ATTR_CONTAINER, container); } } build_operation_update(xml_rsc, &(entry->rsc), entry->failed, __FUNCTION__); build_operation_update(xml_rsc, &(entry->rsc), entry->last, __FUNCTION__); for (gIter = entry->recurring_op_list; gIter != NULL; gIter = gIter->next) { build_operation_update(xml_rsc, &(entry->rsc), gIter->data, __FUNCTION__); } } return FALSE; } xmlNode * do_lrm_query_internal(lrm_state_t * lrm_state, gboolean is_replace) { xmlNode *xml_state = NULL; xmlNode *xml_data = NULL; xmlNode *rsc_list = NULL; const char *uuid = NULL; if (safe_str_eq(lrm_state->node_name, fsa_our_uname)) { crm_node_t *peer = crm_get_peer(0, lrm_state->node_name); xml_state = do_update_node_cib(peer, node_update_cluster|node_update_peer, NULL, __FUNCTION__); /* The next two lines shouldn't be necessary for newer DCs */ crm_xml_add(xml_state, XML_NODE_JOIN_STATE, CRMD_JOINSTATE_MEMBER); crm_xml_add(xml_state, XML_NODE_EXPECTED, CRMD_JOINSTATE_MEMBER); uuid = fsa_our_uuid; } else { xml_state = create_xml_node(NULL, XML_CIB_TAG_STATE); crm_xml_add(xml_state, XML_NODE_IS_REMOTE, "true"); crm_xml_add(xml_state, XML_ATTR_ID, lrm_state->node_name); crm_xml_add(xml_state, XML_ATTR_UNAME, lrm_state->node_name); uuid = lrm_state->node_name; } xml_data = create_xml_node(xml_state, XML_CIB_TAG_LRM); crm_xml_add(xml_data, XML_ATTR_ID, uuid); rsc_list = create_xml_node(xml_data, XML_LRM_TAG_RESOURCES); /* Build a list of active (not always running) resources */ build_active_RAs(lrm_state, rsc_list); crm_log_xml_trace(xml_state, "Current state of the LRM"); return xml_state; } xmlNode * do_lrm_query(gboolean is_replace, const char *node_name) { lrm_state_t *lrm_state = lrm_state_find(node_name); if (!lrm_state) { crm_err("Could not query lrm state for lrmd node %s", node_name); return NULL; } return do_lrm_query_internal(lrm_state, is_replace); } static void notify_deleted(lrm_state_t * lrm_state, ha_msg_input_t * input, const char *rsc_id, int rc) { lrmd_event_data_t *op = NULL; const char *from_sys = crm_element_value(input->msg, F_CRM_SYS_FROM); const char *from_host = crm_element_value(input->msg, F_CRM_HOST_FROM); crm_info("Notifying %s on %s that %s was%s deleted", from_sys, from_host, rsc_id, rc == pcmk_ok ? "" : " not"); op = construct_op(lrm_state, input->xml, rsc_id, CRMD_ACTION_DELETE); CRM_ASSERT(op != NULL); if (rc == pcmk_ok) { op->op_status = PCMK_LRM_OP_DONE; op->rc = PCMK_OCF_OK; } else { op->op_status = PCMK_LRM_OP_ERROR; op->rc = PCMK_OCF_UNKNOWN_ERROR; } send_direct_ack(from_host, from_sys, NULL, op, rsc_id); lrmd_free_event(op); if (safe_str_neq(from_sys, CRM_SYSTEM_TENGINE)) { /* this isn't expected - trigger a new transition */ time_t now = time(NULL); char *now_s = crm_itoa(now); crm_debug("Triggering a refresh after %s deleted %s from the LRM", from_sys, rsc_id); update_attr_delegate(fsa_cib_conn, cib_none, XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL, "last-lrm-refresh", now_s, FALSE, NULL, NULL); free(now_s); } } static gboolean lrm_remove_deleted_rsc(gpointer key, gpointer value, gpointer user_data) { struct delete_event_s *event = user_data; struct pending_deletion_op_s *op = value; if (crm_str_eq(event->rsc, op->rsc, TRUE)) { notify_deleted(event->lrm_state, op->input, event->rsc, event->rc); return TRUE; } return FALSE; } static gboolean lrm_remove_deleted_op(gpointer key, gpointer value, gpointer user_data) { const char *rsc = user_data; struct recurring_op_s *pending = value; if (crm_str_eq(rsc, pending->rsc_id, TRUE)) { crm_info("Removing op %s:%d for deleted resource %s", pending->op_key, pending->call_id, rsc); return TRUE; } return FALSE; } /* * Remove the rsc from the CIB * * Avoids refreshing the entire LRM section of this host */ #define rsc_template "//"XML_CIB_TAG_STATE"[@uname='%s']//"XML_LRM_TAG_RESOURCE"[@id='%s']" static int delete_rsc_status(lrm_state_t * lrm_state, const char *rsc_id, int call_options, const char *user_name) { char *rsc_xpath = NULL; int max = 0; int rc = pcmk_ok; CRM_CHECK(rsc_id != NULL, return -ENXIO); max = strlen(rsc_template) + strlen(rsc_id) + strlen(lrm_state->node_name) + 1; rsc_xpath = calloc(1, max); snprintf(rsc_xpath, max, rsc_template, lrm_state->node_name, rsc_id); rc = cib_internal_op(fsa_cib_conn, CIB_OP_DELETE, NULL, rsc_xpath, NULL, NULL, call_options | cib_xpath, user_name); free(rsc_xpath); return rc; } static void delete_rsc_entry(lrm_state_t * lrm_state, ha_msg_input_t * input, const char *rsc_id, GHashTableIter * rsc_gIter, int rc, const char *user_name) { struct delete_event_s event; CRM_CHECK(rsc_id != NULL, return); if (rc == pcmk_ok) { char *rsc_id_copy = strdup(rsc_id); if (rsc_gIter) g_hash_table_iter_remove(rsc_gIter); else g_hash_table_remove(lrm_state->resource_history, rsc_id_copy); crm_debug("sync: Sending delete op for %s", rsc_id_copy); delete_rsc_status(lrm_state, rsc_id_copy, cib_quorum_override, user_name); g_hash_table_foreach_remove(lrm_state->pending_ops, lrm_remove_deleted_op, rsc_id_copy); free(rsc_id_copy); } if (input) { notify_deleted(lrm_state, input, rsc_id, rc); } event.rc = rc; event.rsc = rsc_id; event.lrm_state = lrm_state; g_hash_table_foreach_remove(lrm_state->deletion_ops, lrm_remove_deleted_rsc, &event); } /* * Remove the op from the CIB * * Avoids refreshing the entire LRM section of this host */ #define op_template "//"XML_CIB_TAG_STATE"[@uname='%s']//"XML_LRM_TAG_RESOURCE"[@id='%s']/"XML_LRM_TAG_RSC_OP"[@id='%s']" #define op_call_template "//"XML_CIB_TAG_STATE"[@uname='%s']//"XML_LRM_TAG_RESOURCE"[@id='%s']/"XML_LRM_TAG_RSC_OP"[@id='%s' and @"XML_LRM_ATTR_CALLID"='%d']" static void delete_op_entry(lrm_state_t * lrm_state, lrmd_event_data_t * op, const char *rsc_id, const char *key, int call_id) { xmlNode *xml_top = NULL; if (op != NULL) { xml_top = create_xml_node(NULL, XML_LRM_TAG_RSC_OP); crm_xml_add_int(xml_top, XML_LRM_ATTR_CALLID, op->call_id); crm_xml_add(xml_top, XML_ATTR_TRANSITION_KEY, op->user_data); if (op->interval > 0) { char *op_id = generate_op_key(op->rsc_id, op->op_type, op->interval); /* Avoid deleting last_failure too (if it was a result of this recurring op failing) */ crm_xml_add(xml_top, XML_ATTR_ID, op_id); free(op_id); } crm_debug("async: Sending delete op for %s_%s_%d (call=%d)", op->rsc_id, op->op_type, op->interval, op->call_id); fsa_cib_conn->cmds->delete(fsa_cib_conn, XML_CIB_TAG_STATUS, xml_top, cib_quorum_override); } else if (rsc_id != NULL && key != NULL) { int max = 0; char *op_xpath = NULL; if (call_id > 0) { max = strlen(op_call_template) + strlen(rsc_id) + strlen(lrm_state->node_name) + strlen(key) + 10; op_xpath = calloc(1, max); snprintf(op_xpath, max, op_call_template, lrm_state->node_name, rsc_id, key, call_id); } else { max = strlen(op_template) + strlen(rsc_id) + strlen(lrm_state->node_name) + strlen(key) + 1; op_xpath = calloc(1, max); snprintf(op_xpath, max, op_template, lrm_state->node_name, rsc_id, key); } crm_debug("sync: Sending delete op for %s (call=%d)", rsc_id, call_id); fsa_cib_conn->cmds->delete(fsa_cib_conn, op_xpath, NULL, cib_quorum_override | cib_xpath); free(op_xpath); } else { crm_err("Not enough information to delete op entry: rsc=%p key=%p", rsc_id, key); return; } crm_log_xml_trace(xml_top, "op:cancel"); free_xml(xml_top); } void lrm_clear_last_failure(const char *rsc_id, const char *node_name) { char *attr = NULL; GHashTableIter iter; GList *lrm_state_list = lrm_state_get_list(); GList *state_entry; rsc_history_t *entry = NULL; attr = generate_op_key(rsc_id, "last_failure", 0); /* This clears last failure for every lrm state that has this rsc.*/ for (state_entry = lrm_state_list; state_entry != NULL; state_entry = state_entry->next) { lrm_state_t *lrm_state = state_entry->data; if (node_name != NULL) { if (strcmp(node_name, lrm_state->node_name) != 0) { /* filter by node_name if node_name is present */ continue; } } delete_op_entry(lrm_state, NULL, rsc_id, attr, 0); if (!lrm_state->resource_history) { continue; } g_hash_table_iter_init(&iter, lrm_state->resource_history); while (g_hash_table_iter_next(&iter, NULL, (void **)&entry)) { if (crm_str_eq(rsc_id, entry->id, TRUE)) { lrmd_free_event(entry->failed); entry->failed = NULL; } } } free(attr); g_list_free(lrm_state_list); } /* Returns: gboolean - cancellation is in progress */ static gboolean cancel_op(lrm_state_t * lrm_state, const char *rsc_id, const char *key, int op, gboolean remove) { int rc = pcmk_ok; char *local_key = NULL; struct recurring_op_s *pending = NULL; CRM_CHECK(op != 0, return FALSE); CRM_CHECK(rsc_id != NULL, return FALSE); if (key == NULL) { local_key = make_stop_id(rsc_id, op); key = local_key; } pending = g_hash_table_lookup(lrm_state->pending_ops, key); if (pending) { if (remove && pending->remove == FALSE) { pending->remove = TRUE; crm_debug("Scheduling %s for removal", key); } if (pending->cancelled) { crm_debug("Operation %s already cancelled", key); free(local_key); return FALSE; } pending->cancelled = TRUE; } else { crm_info("No pending op found for %s", key); free(local_key); return FALSE; } crm_debug("Cancelling op %d for %s (%s)", op, rsc_id, key); rc = lrm_state_cancel(lrm_state, pending->rsc_id, pending->op_type, pending->interval); if (rc == pcmk_ok) { crm_debug("Op %d for %s (%s): cancelled", op, rsc_id, key); free(local_key); return TRUE; } crm_debug("Op %d for %s (%s): Nothing to cancel", op, rsc_id, key); /* The caller needs to make sure the entry is * removed from the pending_ops list * * Usually by returning TRUE inside the worker function * supplied to g_hash_table_foreach_remove() * * Not removing the entry from pending_ops will block * the node from shutting down */ free(local_key); return FALSE; } struct cancel_data { gboolean done; gboolean remove; const char *key; lrmd_rsc_info_t *rsc; lrm_state_t *lrm_state; }; static gboolean cancel_action_by_key(gpointer key, gpointer value, gpointer user_data) { gboolean remove = FALSE; struct cancel_data *data = user_data; struct recurring_op_s *op = (struct recurring_op_s *)value; if (crm_str_eq(op->op_key, data->key, TRUE)) { data->done = TRUE; remove = !cancel_op(data->lrm_state, data->rsc->id, key, op->call_id, data->remove); } return remove; } static gboolean cancel_op_key(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *key, gboolean remove) { guint removed = 0; struct cancel_data data; CRM_CHECK(rsc != NULL, return FALSE); CRM_CHECK(key != NULL, return FALSE); data.key = key; data.rsc = rsc; data.done = FALSE; data.remove = remove; data.lrm_state = lrm_state; removed = g_hash_table_foreach_remove(lrm_state->pending_ops, cancel_action_by_key, &data); crm_trace("Removed %u op cache entries, new size: %u", removed, g_hash_table_size(lrm_state->pending_ops)); return data.done; } static lrmd_rsc_info_t * get_lrm_resource(lrm_state_t * lrm_state, xmlNode * resource, xmlNode * op_msg, gboolean do_create) { lrmd_rsc_info_t *rsc = NULL; const char *id = ID(resource); const char *type = crm_element_value(resource, XML_ATTR_TYPE); const char *class = crm_element_value(resource, XML_AGENT_ATTR_CLASS); const char *provider = crm_element_value(resource, XML_AGENT_ATTR_PROVIDER); const char *long_id = crm_element_value(resource, XML_ATTR_ID_LONG); crm_trace("Retrieving %s from the LRM.", id); CRM_CHECK(id != NULL, return NULL); rsc = lrm_state_get_rsc_info(lrm_state, id, 0); if (!rsc && long_id) { rsc = lrm_state_get_rsc_info(lrm_state, long_id, 0); } if (!rsc && do_create) { CRM_CHECK(class != NULL, return NULL); CRM_CHECK(type != NULL, return NULL); crm_trace("Adding rsc %s before operation", id); lrm_state_register_rsc(lrm_state, id, class, provider, type, lrmd_opt_drop_recurring); rsc = lrm_state_get_rsc_info(lrm_state, id, 0); if (!rsc) { fsa_data_t *msg_data = NULL; crm_err("Could not add resource %s to LRM %s", id, lrm_state->node_name); /* only register this as a internal error if this involves the local * lrmd. Otherwise we're likely dealing with an unresponsive remote-node * which is not a FSA failure. */ if (lrm_state_is_local(lrm_state) == TRUE) { register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } } } return rsc; } static void delete_resource(lrm_state_t * lrm_state, const char *id, lrmd_rsc_info_t * rsc, GHashTableIter * gIter, const char *sys, const char *host, const char *user, ha_msg_input_t * request, gboolean unregister) { int rc = pcmk_ok; crm_info("Removing resource %s for %s (%s) on %s", id, sys, user ? user : "internal", host); if (rsc && unregister) { rc = lrm_state_unregister_rsc(lrm_state, id, 0); } if (rc == pcmk_ok) { crm_trace("Resource '%s' deleted", id); } else if (rc == -EINPROGRESS) { crm_info("Deletion of resource '%s' pending", id); if (request) { struct pending_deletion_op_s *op = NULL; char *ref = crm_element_value_copy(request->msg, XML_ATTR_REFERENCE); op = calloc(1, sizeof(struct pending_deletion_op_s)); op->rsc = strdup(rsc->id); op->input = copy_ha_msg_input(request); g_hash_table_insert(lrm_state->deletion_ops, ref, op); } return; } else { crm_warn("Deletion of resource '%s' for %s (%s) on %s failed: %d", id, sys, user ? user : "internal", host, rc); } delete_rsc_entry(lrm_state, request, id, gIter, rc, user); } static int get_fake_call_id(lrm_state_t *lrm_state, const char *rsc_id) { int call_id = 999999999; rsc_history_t *entry = NULL; if(lrm_state) { entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); } /* Make sure the call id is greater than the last successful operation, * otherwise the failure will not result in a possible recovery of the resource * as it could appear the failure occurred before the successful start */ if (entry) { call_id = entry->last_callid + 1; } if (call_id < 0) { call_id = 1; } return call_id; } static void force_reprobe(lrm_state_t *lrm_state, const char *from_sys, const char *from_host, const char *user_name, gboolean is_remote_node) { GHashTableIter gIter; rsc_history_t *entry = NULL; crm_info("clearing resource history on node %s", lrm_state->node_name); g_hash_table_iter_init(&gIter, lrm_state->resource_history); while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) { /* only unregister the resource during a reprobe if it is not a remote connection * resource. otherwise unregistering the connection will terminate remote-node * membership */ gboolean unregister = TRUE; if (is_remote_lrmd_ra(NULL, NULL, entry->id)) { lrm_state_t *remote_lrm_state = lrm_state_find(entry->id); if (remote_lrm_state) { /* when forcing a reprobe, make sure to clear remote node before * clearing the remote node's connection resource */ force_reprobe(remote_lrm_state, from_sys, from_host, user_name, TRUE); } unregister = FALSE; } delete_resource(lrm_state, entry->id, &entry->rsc, &gIter, from_sys, from_host, user_name, NULL, unregister); } /* Now delete the copy in the CIB */ erase_status_tag(lrm_state->node_name, XML_CIB_TAG_LRM, cib_scope_local); /* And finally, _delete_ the value in attrd * Setting it to FALSE results in the PE sending us back here again */ update_attrd(lrm_state->node_name, CRM_OP_PROBED, NULL, user_name, is_remote_node); } static void synthesize_lrmd_failure(lrm_state_t *lrm_state, xmlNode *action, int rc) { lrmd_event_data_t *op = NULL; const char *operation = crm_element_value(action, XML_LRM_ATTR_TASK); const char *target_node = crm_element_value(action, XML_LRM_ATTR_TARGET); xmlNode *xml_rsc = find_xml_node(action, XML_CIB_TAG_RESOURCE, TRUE); if(xml_rsc == NULL) { /* Do something else? driect_ack? */ crm_info("Skipping %s=%d on %s (%p): no resource", crm_element_value(action, XML_LRM_ATTR_TASK_KEY), rc, target_node, lrm_state); return; } else if(operation == NULL) { /* This probably came from crm_resource -C, nothing to do */ crm_info("Skipping %s=%d on %s (%p): no operation", crm_element_value(action, XML_ATTR_TRANSITION_KEY), rc, target_node, lrm_state); return; } op = construct_op(lrm_state, action, ID(xml_rsc), operation); CRM_ASSERT(op != NULL); op->call_id = get_fake_call_id(lrm_state, op->rsc_id); if(safe_str_eq(operation, RSC_NOTIFY)) { /* Notifications can't fail yet */ op->op_status = PCMK_LRM_OP_DONE; op->rc = PCMK_OCF_OK; } else { op->op_status = PCMK_LRM_OP_ERROR; op->rc = rc; } op->t_run = time(NULL); op->t_rcchange = op->t_run; crm_info("Faking result %d for %s_%s_%d on %s (%p)", op->rc, op->rsc_id, op->op_type, op->interval, target_node, lrm_state); if(lrm_state) { process_lrm_event(lrm_state, op, NULL); } else { lrmd_rsc_info_t rsc; rsc.id = strdup(op->rsc_id); rsc.type = crm_element_value_copy(xml_rsc, XML_ATTR_TYPE); rsc.class = crm_element_value_copy(xml_rsc, XML_AGENT_ATTR_CLASS); rsc.provider = crm_element_value_copy(xml_rsc, XML_AGENT_ATTR_PROVIDER); do_update_resource(target_node, &rsc, op); free(rsc.id); free(rsc.type); free(rsc.class); free(rsc.provider); } lrmd_free_event(op); } /* A_LRM_INVOKE */ void do_lrm_invoke(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { gboolean create_rsc = TRUE; lrm_state_t *lrm_state = NULL; const char *crm_op = NULL; const char *from_sys = NULL; const char *from_host = NULL; const char *operation = NULL; ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); const char *user_name = NULL; const char *target_node = NULL; gboolean is_remote_node = FALSE; gboolean crm_rsc_delete = FALSE; if (input->xml != NULL) { /* Remote node operations are routed here to their remote connections */ target_node = crm_element_value(input->xml, XML_LRM_ATTR_TARGET); } if (target_node == NULL) { target_node = fsa_our_uname; } else if (safe_str_neq(target_node, fsa_our_uname)) { is_remote_node = TRUE; } lrm_state = lrm_state_find(target_node); if (lrm_state == NULL && is_remote_node) { crm_err("no lrmd connection for remote node %s found on cluster node %s. Can not process request.", target_node, fsa_our_uname); /* The action must be recorded here and in the CIB as failed */ synthesize_lrmd_failure(NULL, input->xml, PCMK_OCF_CONNECTION_DIED); return; } CRM_ASSERT(lrm_state != NULL); #if ENABLE_ACL user_name = crm_acl_get_set_user(input->msg, F_CRM_USER, NULL); crm_trace("LRM command from user '%s'", user_name); #endif crm_op = crm_element_value(input->msg, F_CRM_TASK); from_sys = crm_element_value(input->msg, F_CRM_SYS_FROM); if (safe_str_neq(from_sys, CRM_SYSTEM_TENGINE)) { from_host = crm_element_value(input->msg, F_CRM_HOST_FROM); } crm_trace("LRM command from: %s", from_sys); if (safe_str_eq(crm_op, CRM_OP_LRM_DELETE)) { /* remember this delete op came from crm_resource */ crm_rsc_delete = TRUE; operation = CRMD_ACTION_DELETE; } else if (safe_str_eq(crm_op, CRM_OP_LRM_REFRESH)) { operation = CRM_OP_LRM_REFRESH; } else if (safe_str_eq(crm_op, CRM_OP_LRM_FAIL)) { lrmd_event_data_t *op = NULL; lrmd_rsc_info_t *rsc = NULL; xmlNode *xml_rsc = find_xml_node(input->xml, XML_CIB_TAG_RESOURCE, TRUE); CRM_CHECK(xml_rsc != NULL, return); /* The lrmd can not fail a resource, it does not understand the * concept of success or failure in relation to a resource, it simply * executes operations and reports the results. We determine what a failure is. * Becaues of this, if we want to fail a resource we have to fake what we * understand a failure to look like. * * To do this we create a fake lrmd operation event for the resource * we want to fail. We then pass that event to the lrmd client callback * so it will be processed as if it actually came from the lrmd. */ op = construct_op(lrm_state, input->xml, ID(xml_rsc), "asyncmon"); CRM_ASSERT(op != NULL); free((char *)op->user_data); op->user_data = NULL; op->call_id = get_fake_call_id(lrm_state, op->rsc_id); op->interval = 0; op->op_status = PCMK_LRM_OP_DONE; op->rc = PCMK_OCF_UNKNOWN_ERROR; op->t_run = time(NULL); op->t_rcchange = op->t_run; #if ENABLE_ACL if (user_name && is_privileged(user_name) == FALSE) { crm_err("%s does not have permission to fail %s", user_name, ID(xml_rsc)); send_direct_ack(from_host, from_sys, NULL, op, ID(xml_rsc)); lrmd_free_event(op); return; } #endif rsc = get_lrm_resource(lrm_state, xml_rsc, input->xml, create_rsc); if (rsc) { crm_info("Failing resource %s...", rsc->id); process_lrm_event(lrm_state, op, NULL); op->op_status = PCMK_LRM_OP_DONE; op->rc = PCMK_OCF_OK; lrmd_free_rsc_info(rsc); } else { crm_info("Cannot find/create resource in order to fail it..."); crm_log_xml_warn(input->msg, "bad input"); } send_direct_ack(from_host, from_sys, NULL, op, ID(xml_rsc)); lrmd_free_event(op); return; } else if (input->xml != NULL) { operation = crm_element_value(input->xml, XML_LRM_ATTR_TASK); } if (safe_str_eq(crm_op, CRM_OP_LRM_REFRESH)) { int rc = pcmk_ok; xmlNode *fragment = do_lrm_query_internal(lrm_state, TRUE); fsa_cib_update(XML_CIB_TAG_STATUS, fragment, cib_quorum_override, rc, user_name); crm_info("Forced a local LRM refresh: call=%d", rc); if(strcmp(CRM_SYSTEM_CRMD, from_sys) != 0) { xmlNode *reply = create_request( CRM_OP_INVOKE_LRM, fragment, from_host, from_sys, CRM_SYSTEM_LRMD, fsa_our_uuid); crm_debug("ACK'ing refresh from %s (%s)", from_sys, from_host); if (relay_message(reply, TRUE) == FALSE) { crm_log_xml_err(reply, "Unable to route reply"); } free_xml(reply); } free_xml(fragment); } else if (safe_str_eq(crm_op, CRM_OP_LRM_QUERY)) { xmlNode *data = do_lrm_query_internal(lrm_state, FALSE); xmlNode *reply = create_reply(input->msg, data); if (relay_message(reply, TRUE) == FALSE) { crm_err("Unable to route reply"); crm_log_xml_err(reply, "reply"); } free_xml(reply); free_xml(data); } else if (safe_str_eq(operation, CRM_OP_PROBED)) { update_attrd(lrm_state->node_name, CRM_OP_PROBED, XML_BOOLEAN_TRUE, user_name, is_remote_node); } else if (safe_str_eq(operation, CRM_OP_REPROBE) || safe_str_eq(crm_op, CRM_OP_REPROBE)) { crm_notice("Forcing the status of all resources to be redetected"); force_reprobe(lrm_state, from_sys, from_host, user_name, is_remote_node); if(strcmp(CRM_SYSTEM_TENGINE, from_sys) != 0 && strcmp(CRM_SYSTEM_TENGINE, from_sys) != 0) { xmlNode *reply = create_request( CRM_OP_INVOKE_LRM, NULL, from_host, from_sys, CRM_SYSTEM_LRMD, fsa_our_uuid); crm_debug("ACK'ing re-probe from %s (%s)", from_sys, from_host); if (relay_message(reply, TRUE) == FALSE) { crm_log_xml_err(reply, "Unable to route reply"); } free_xml(reply); } } else if (operation != NULL) { lrmd_rsc_info_t *rsc = NULL; xmlNode *params = NULL; xmlNode *xml_rsc = find_xml_node(input->xml, XML_CIB_TAG_RESOURCE, TRUE); CRM_CHECK(xml_rsc != NULL, return); /* only the first 16 chars are used by the LRM */ params = find_xml_node(input->xml, XML_TAG_ATTRS, TRUE); if (safe_str_eq(operation, CRMD_ACTION_DELETE)) { create_rsc = FALSE; } if(lrm_state_is_connected(lrm_state) == FALSE) { synthesize_lrmd_failure(lrm_state, input->xml, PCMK_OCF_CONNECTION_DIED); return; } rsc = get_lrm_resource(lrm_state, xml_rsc, input->xml, create_rsc); if (rsc == NULL && create_rsc) { crm_err("Invalid resource definition for %s", ID(xml_rsc)); crm_log_xml_warn(input->msg, "bad input"); /* if the operation couldn't complete because we can't register * the resource, return a generic error */ synthesize_lrmd_failure(lrm_state, input->xml, PCMK_OCF_NOT_CONFIGURED); } else if (rsc == NULL) { lrmd_event_data_t *op = NULL; crm_notice("Not creating resource for a %s event: %s", operation, ID(input->xml)); delete_rsc_entry(lrm_state, input, ID(xml_rsc), NULL, pcmk_ok, user_name); op = construct_op(lrm_state, input->xml, ID(xml_rsc), operation); /* Deleting something that does not exist is a success */ op->op_status = PCMK_LRM_OP_DONE; op->rc = PCMK_OCF_OK; CRM_ASSERT(op != NULL); send_direct_ack(from_host, from_sys, NULL, op, ID(xml_rsc)); lrmd_free_event(op); } else if (safe_str_eq(operation, CRMD_ACTION_CANCEL)) { char *op_key = NULL; char *meta_key = NULL; int call = 0; const char *call_id = NULL; const char *op_task = NULL; const char *op_interval = NULL; gboolean in_progress = FALSE; CRM_CHECK(params != NULL, crm_log_xml_warn(input->xml, "Bad command"); return); meta_key = crm_meta_name(XML_LRM_ATTR_INTERVAL); op_interval = crm_element_value(params, meta_key); free(meta_key); meta_key = crm_meta_name(XML_LRM_ATTR_TASK); op_task = crm_element_value(params, meta_key); free(meta_key); meta_key = crm_meta_name(XML_LRM_ATTR_CALLID); call_id = crm_element_value(params, meta_key); free(meta_key); CRM_CHECK(op_task != NULL, crm_log_xml_warn(input->xml, "Bad command"); return); CRM_CHECK(op_interval != NULL, crm_log_xml_warn(input->xml, "Bad command"); return); op_key = generate_op_key(rsc->id, op_task, crm_parse_int(op_interval, "0")); crm_debug("PE requested op %s (call=%s) be cancelled", op_key, call_id ? call_id : "NA"); call = crm_parse_int(call_id, "0"); if (call == 0) { /* the normal case when the PE cancels a recurring op */ in_progress = cancel_op_key(lrm_state, rsc, op_key, TRUE); } else { /* the normal case when the PE cancels an orphan op */ in_progress = cancel_op(lrm_state, rsc->id, NULL, call, TRUE); } if (in_progress == FALSE) { lrmd_event_data_t *op = construct_op(lrm_state, input->xml, rsc->id, op_task); crm_info("Nothing known about operation %d for %s", call, op_key); delete_op_entry(lrm_state, NULL, rsc->id, op_key, call); CRM_ASSERT(op != NULL); op->rc = PCMK_OCF_OK; op->op_status = PCMK_LRM_OP_DONE; send_direct_ack(from_host, from_sys, rsc, op, rsc->id); lrmd_free_event(op); /* needed?? surely not otherwise the cancel_op_(_key) wouldn't * have failed in the first place */ g_hash_table_remove(lrm_state->pending_ops, op_key); } free(op_key); } else if (rsc != NULL && safe_str_eq(operation, CRMD_ACTION_DELETE)) { gboolean unregister = TRUE; #if ENABLE_ACL int cib_rc = delete_rsc_status(lrm_state, rsc->id, cib_dryrun | cib_sync_call, user_name); if (cib_rc != pcmk_ok) { lrmd_event_data_t *op = NULL; crm_err ("Attempted deletion of resource status '%s' from CIB for %s (user=%s) on %s failed: (rc=%d) %s", rsc->id, from_sys, user_name ? user_name : "unknown", from_host, cib_rc, pcmk_strerror(cib_rc)); op = construct_op(lrm_state, input->xml, rsc->id, operation); op->op_status = PCMK_LRM_OP_ERROR; if (cib_rc == -EACCES) { op->rc = PCMK_OCF_INSUFFICIENT_PRIV; } else { op->rc = PCMK_OCF_UNKNOWN_ERROR; } send_direct_ack(from_host, from_sys, NULL, op, rsc->id); lrmd_free_event(op); return; } #endif if (crm_rsc_delete == TRUE && is_remote_lrmd_ra(NULL, NULL, rsc->id)) { unregister = FALSE; } delete_resource(lrm_state, rsc->id, rsc, NULL, from_sys, from_host, user_name, input, unregister); } else if (rsc != NULL) { do_lrm_rsc_op(lrm_state, rsc, operation, input->xml, input->msg); } lrmd_free_rsc_info(rsc); } else { crm_err("Operation was neither a lrm_query, nor a rsc op. %s", crm_str(crm_op)); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } static lrmd_event_data_t * construct_op(lrm_state_t * lrm_state, xmlNode * rsc_op, const char *rsc_id, const char *operation) { lrmd_event_data_t *op = NULL; const char *op_delay = NULL; const char *op_timeout = NULL; const char *op_interval = NULL; GHashTable *params = NULL; const char *transition = NULL; CRM_ASSERT(rsc_id != NULL); op = calloc(1, sizeof(lrmd_event_data_t)); op->type = lrmd_event_exec_complete; op->op_type = strdup(operation); op->op_status = PCMK_LRM_OP_PENDING; op->rc = -1; op->rsc_id = strdup(rsc_id); op->interval = 0; op->timeout = 0; op->start_delay = 0; if (rsc_op == NULL) { CRM_LOG_ASSERT(safe_str_eq(CRMD_ACTION_STOP, operation)); op->user_data = NULL; /* the stop_all_resources() case * by definition there is no DC (or they'd be shutting * us down). * So we should put our version here. */ op->params = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); g_hash_table_insert(op->params, strdup(XML_ATTR_CRM_VERSION), strdup(CRM_FEATURE_SET)); crm_trace("Constructed %s op for %s", operation, rsc_id); return op; } params = xml2list(rsc_op); g_hash_table_remove(params, CRM_META "_op_target_rc"); op_delay = crm_meta_value(params, XML_OP_ATTR_START_DELAY); op_timeout = crm_meta_value(params, XML_ATTR_TIMEOUT); op_interval = crm_meta_value(params, XML_LRM_ATTR_INTERVAL); op->interval = crm_parse_int(op_interval, "0"); op->timeout = crm_parse_int(op_timeout, "0"); op->start_delay = crm_parse_int(op_delay, "0"); if (safe_str_neq(operation, RSC_STOP)) { op->params = params; } else { rsc_history_t *entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); /* If we do not have stop parameters cached, use * whatever we are given */ if (!entry || !entry->stop_params) { op->params = params; } else { /* Copy the cached parameter list so that we stop the resource * with the old attributes, not the new ones */ op->params = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); g_hash_table_foreach(params, copy_meta_keys, op->params); g_hash_table_foreach(entry->stop_params, copy_instance_keys, op->params); g_hash_table_destroy(params); params = NULL; } } /* sanity */ if (op->interval < 0) { op->interval = 0; } if (op->timeout <= 0) { op->timeout = op->interval; } if (op->start_delay < 0) { op->start_delay = 0; } transition = crm_element_value(rsc_op, XML_ATTR_TRANSITION_KEY); CRM_CHECK(transition != NULL, return op); op->user_data = strdup(transition); if (op->interval != 0) { if (safe_str_eq(operation, CRMD_ACTION_START) || safe_str_eq(operation, CRMD_ACTION_STOP)) { crm_err("Start and Stop actions cannot have an interval: %d", op->interval); op->interval = 0; } } crm_trace("Constructed %s op for %s: interval=%d", operation, rsc_id, op->interval); return op; } void send_direct_ack(const char *to_host, const char *to_sys, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op, const char *rsc_id) { xmlNode *reply = NULL; xmlNode *update, *iter; crm_node_t *peer = NULL; CRM_CHECK(op != NULL, return); if (op->rsc_id == NULL) { CRM_ASSERT(rsc_id != NULL); op->rsc_id = strdup(rsc_id); } if (to_sys == NULL) { to_sys = CRM_SYSTEM_TENGINE; } peer = crm_get_peer(0, fsa_our_uname); update = do_update_node_cib(peer, node_update_none, NULL, __FUNCTION__); iter = create_xml_node(update, XML_CIB_TAG_LRM); crm_xml_add(iter, XML_ATTR_ID, fsa_our_uuid); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE); crm_xml_add(iter, XML_ATTR_ID, op->rsc_id); build_operation_update(iter, rsc, op, __FUNCTION__); reply = create_request(CRM_OP_INVOKE_LRM, update, to_host, to_sys, CRM_SYSTEM_LRMD, NULL); crm_log_xml_trace(update, "ACK Update"); crm_debug("ACK'ing resource op %s_%s_%d from %s: %s", op->rsc_id, op->op_type, op->interval, op->user_data, crm_element_value(reply, XML_ATTR_REFERENCE)); if (relay_message(reply, TRUE) == FALSE) { crm_log_xml_err(reply, "Unable to route reply"); } free_xml(update); free_xml(reply); } gboolean verify_stopped(enum crmd_fsa_state cur_state, int log_level) { gboolean res = TRUE; GList *lrm_state_list = lrm_state_get_list(); GList *state_entry; for (state_entry = lrm_state_list; state_entry != NULL; state_entry = state_entry->next) { lrm_state_t *lrm_state = state_entry->data; if (!lrm_state_verify_stopped(lrm_state, cur_state, log_level)) { /* keep iterating through all even when false is returned */ res = FALSE; } } set_bit(fsa_input_register, R_SENT_RSC_STOP); g_list_free(lrm_state_list); lrm_state_list = NULL; return res; } struct stop_recurring_action_s { lrmd_rsc_info_t *rsc; lrm_state_t *lrm_state; }; static gboolean stop_recurring_action_by_rsc(gpointer key, gpointer value, gpointer user_data) { gboolean remove = FALSE; struct stop_recurring_action_s *event = user_data; struct recurring_op_s *op = (struct recurring_op_s *)value; if (op->interval != 0 && crm_str_eq(op->rsc_id, event->rsc->id, TRUE)) { crm_debug("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id, key); remove = !cancel_op(event->lrm_state, event->rsc->id, key, op->call_id, FALSE); } return remove; } static gboolean stop_recurring_actions(gpointer key, gpointer value, gpointer user_data) { gboolean remove = FALSE; lrm_state_t *lrm_state = user_data; struct recurring_op_s *op = (struct recurring_op_s *)value; if (op->interval != 0) { crm_info("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id, key); remove = !cancel_op(lrm_state, op->rsc_id, key, op->call_id, FALSE); } return remove; } static void do_lrm_rsc_op(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *operation, xmlNode * msg, xmlNode * request) { int call_id = 0; char *op_id = NULL; lrmd_event_data_t *op = NULL; lrmd_key_value_t *params = NULL; fsa_data_t *msg_data = NULL; const char *transition = NULL; gboolean stop_recurring = FALSE; CRM_CHECK(rsc != NULL, return); CRM_CHECK(operation != NULL, return); if (msg != NULL) { transition = crm_element_value(msg, XML_ATTR_TRANSITION_KEY); if (transition == NULL) { crm_log_xml_err(msg, "Missing transition number"); } } op = construct_op(lrm_state, msg, rsc->id, operation); CRM_CHECK(op != NULL, return); if (is_remote_lrmd_ra(NULL, NULL, rsc->id) && op->interval == 0 && strcmp(operation, CRMD_ACTION_MIGRATE) == 0) { /* pcmk remote connections are a special use case. * We never ever want to stop monitoring a connection resource until * the entire migration has completed. If the connection is ever unexpected * severed, even during a migration, this is an event we must detect.*/ stop_recurring = FALSE; } else if (op->interval == 0 && strcmp(operation, CRMD_ACTION_STATUS) != 0 && strcmp(operation, CRMD_ACTION_NOTIFY) != 0) { /* stop any previous monitor operations before changing the resource state */ stop_recurring = TRUE; } if (stop_recurring == TRUE) { guint removed = 0; struct stop_recurring_action_s data; data.rsc = rsc; data.lrm_state = lrm_state; removed = g_hash_table_foreach_remove( lrm_state->pending_ops, stop_recurring_action_by_rsc, &data); crm_debug("Stopped %u recurring operations in preparation for %s_%s_%d", removed, rsc->id, operation, op->interval); } /* now do the op */ crm_info("Performing key=%s op=%s_%s_%d", transition, rsc->id, operation, op->interval); if (fsa_state != S_NOT_DC && fsa_state != S_POLICY_ENGINE && fsa_state != S_TRANSITION_ENGINE) { if (safe_str_neq(operation, "fail") && safe_str_neq(operation, CRMD_ACTION_STOP)) { crm_info("Discarding attempt to perform action %s on %s in state %s", operation, rsc->id, fsa_state2string(fsa_state)); op->rc = CRM_DIRECT_NACK_RC; op->op_status = PCMK_LRM_OP_ERROR; send_direct_ack(NULL, NULL, rsc, op, rsc->id); lrmd_free_event(op); free(op_id); return; } } op_id = generate_op_key(rsc->id, op->op_type, op->interval); if (op->interval > 0) { /* cancel it so we can then restart it without conflict */ cancel_op_key(lrm_state, rsc, op_id, FALSE); } if (op->params) { char *key = NULL; char *value = NULL; GHashTableIter iter; g_hash_table_iter_init(&iter, op->params); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) { params = lrmd_key_value_add(params, key, value); } } call_id = lrm_state_exec(lrm_state, rsc->id, op->op_type, op->user_data, op->interval, op->timeout, op->start_delay, params); if (call_id <= 0 && lrm_state_is_local(lrm_state)) { crm_err("Operation %s on %s failed: %d", operation, rsc->id, call_id); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } else if (call_id <= 0) { crm_err("Operation %s on resource %s failed to execute on remote node %s: %d", operation, rsc->id, lrm_state->node_name, call_id); op->call_id = get_fake_call_id(lrm_state, rsc->id); op->op_status = PCMK_LRM_OP_DONE; op->rc = PCMK_OCF_UNKNOWN_ERROR; op->t_run = time(NULL); op->t_rcchange = op->t_run; process_lrm_event(lrm_state, op, NULL); } else { /* record all operations so we can wait * for them to complete during shutdown */ char *call_id_s = make_stop_id(rsc->id, call_id); struct recurring_op_s *pending = NULL; pending = calloc(1, sizeof(struct recurring_op_s)); crm_trace("Recording pending op: %d - %s %s", call_id, op_id, call_id_s); pending->call_id = call_id; pending->interval = op->interval; pending->op_type = strdup(operation); pending->op_key = strdup(op_id); pending->rsc_id = strdup(rsc->id); pending->start_time = time(NULL); pending->user_data = strdup(op->user_data); g_hash_table_replace(lrm_state->pending_ops, call_id_s, pending); if (op->interval > 0 && op->start_delay > START_DELAY_THRESHOLD) { char *uuid = NULL; int dummy = 0, target_rc = 0; crm_info("Faking confirmation of %s: execution postponed for over 5 minutes", op_id); decode_transition_key(op->user_data, &uuid, &dummy, &dummy, &target_rc); free(uuid); op->rc = target_rc; op->op_status = PCMK_LRM_OP_DONE; send_direct_ack(NULL, NULL, rsc, op, rsc->id); } pending->params = op->params; op->params = NULL; } free(op_id); lrmd_free_event(op); return; } int last_resource_update = 0; static void cib_rsc_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { switch (rc) { case pcmk_ok: case -pcmk_err_diff_failed: case -pcmk_err_diff_resync: crm_trace("Resource update %d complete: rc=%d", call_id, rc); break; default: crm_warn("Resource update %d failed: (rc=%d) %s", call_id, rc, pcmk_strerror(rc)); } if (call_id == last_resource_update) { last_resource_update = 0; trigger_fsa(fsa_source); } } static void remote_node_init_status(const char *node_name, int call_opt) { int call_id = 0; xmlNode *update = create_xml_node(NULL, XML_CIB_TAG_STATUS); xmlNode *state; state = simple_remote_node_status(node_name, update,__FUNCTION__); crm_xml_add(state, XML_NODE_IS_FENCED, "0"); fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, call_id, NULL); if (call_id != pcmk_ok) { crm_debug("Failed to init status section for remote-node %s", node_name); } free_xml(update); } static void remote_node_clear_status(const char *node_name, int call_opt) { if (node_name == NULL) { return; } remote_node_init_status(node_name, call_opt); erase_status_tag(node_name, XML_CIB_TAG_LRM, call_opt); erase_status_tag(node_name, XML_TAG_TRANSIENT_NODEATTRS, call_opt); } static int do_update_resource(const char *node_name, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op) { /* */ int rc = pcmk_ok; xmlNode *update, *iter = NULL; int call_opt = cib_quorum_override; const char *uuid = NULL; CRM_CHECK(op != NULL, return 0); if (fsa_state == S_ELECTION || fsa_state == S_PENDING) { crm_info("Sending update to local CIB in state: %s", fsa_state2string(fsa_state)); call_opt |= cib_scope_local; } iter = create_xml_node(iter, XML_CIB_TAG_STATUS); update = iter; iter = create_xml_node(iter, XML_CIB_TAG_STATE); if (safe_str_eq(node_name, fsa_our_uname)) { uuid = fsa_our_uuid; } else { /* remote nodes uuid and uname are equal */ uuid = node_name; crm_xml_add(iter, XML_NODE_IS_REMOTE, "true"); } CRM_LOG_ASSERT(uuid != NULL); if(uuid == NULL) { rc = -EINVAL; goto done; } crm_xml_add(iter, XML_ATTR_UUID, uuid); crm_xml_add(iter, XML_ATTR_UNAME, node_name); crm_xml_add(iter, XML_ATTR_ORIGIN, __FUNCTION__); iter = create_xml_node(iter, XML_CIB_TAG_LRM); crm_xml_add(iter, XML_ATTR_ID, uuid); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE); crm_xml_add(iter, XML_ATTR_ID, op->rsc_id); build_operation_update(iter, rsc, op, __FUNCTION__); if (rsc) { const char *container = NULL; crm_xml_add(iter, XML_ATTR_TYPE, rsc->type); crm_xml_add(iter, XML_AGENT_ATTR_CLASS, rsc->class); crm_xml_add(iter, XML_AGENT_ATTR_PROVIDER, rsc->provider); if (op->params) { container = g_hash_table_lookup(op->params, CRM_META"_"XML_RSC_ATTR_CONTAINER); } if (container) { crm_trace("Resource %s is a part of container resource %s", op->rsc_id, container); crm_xml_add(iter, XML_RSC_ATTR_CONTAINER, container); } CRM_CHECK(rsc->type != NULL, crm_err("Resource %s has no value for type", op->rsc_id)); CRM_CHECK(rsc->class != NULL, crm_err("Resource %s has no value for class", op->rsc_id)); /* check to see if we need to initialize remote-node related status sections */ if (safe_str_eq(op->op_type, "start") && op->rc == 0 && op->op_status == PCMK_LRM_OP_DONE) { const char *remote_node = g_hash_table_lookup(op->params, CRM_META"_remote_node"); if (remote_node) { - /* A container for a remote-node has started, initalize remote-node's status */ + /* A container for a remote-node has started, initialize remote-node's status */ crm_info("Initalizing lrm status for container remote-node %s. Container successfully started.", remote_node); remote_node_clear_status(remote_node, call_opt); } else if (container == FALSE && safe_str_eq(rsc->type, "remote") && safe_str_eq(rsc->provider, "pacemaker")) { - /* baremetal remote node connection resource has started, initalize remote-node's status */ + /* baremetal remote node connection resource has started, initialize remote-node's status */ crm_info("Initializing lrm status for baremetal remote-node %s", rsc->id); remote_node_clear_status(rsc->id, call_opt); } } } else { crm_warn("Resource %s no longer exists in the lrmd", op->rsc_id); send_direct_ack(NULL, NULL, rsc, op, op->rsc_id); goto cleanup; } crm_log_xml_trace(update, __FUNCTION__); /* make it an asyncronous call and be done with it * * Best case: * the resource state will be discovered during * the next signup or election. * * Bad case: * we are shutting down and there is no DC at the time, * but then why were we shutting down then anyway? * (probably because of an internal error) * * Worst case: * we get shot for having resources "running" when the really weren't * * the alternative however means blocking here for too long, which * isnt acceptable */ fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, rc, NULL); if (rc > 0) { last_resource_update = rc; } done: /* the return code is a call number, not an error code */ crm_trace("Sent resource state update message: %d for %s=%d on %s", rc, op->op_type, op->interval, op->rsc_id); fsa_register_cib_callback(rc, FALSE, NULL, cib_rsc_callback); cleanup: free_xml(update); return rc; } void do_lrm_event(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t * msg_data) { CRM_CHECK(FALSE, return); } gboolean process_lrm_event(lrm_state_t * lrm_state, lrmd_event_data_t * op, struct recurring_op_s *pending) { char *op_id = NULL; char *op_key = NULL; int update_id = 0; gboolean remove = FALSE; gboolean removed = FALSE; lrmd_rsc_info_t *rsc = NULL; CRM_CHECK(op != NULL, return FALSE); CRM_CHECK(op->rsc_id != NULL, return FALSE); op_id = make_stop_id(op->rsc_id, op->call_id); op_key = generate_op_key(op->rsc_id, op->op_type, op->interval); rsc = lrm_state_get_rsc_info(lrm_state, op->rsc_id, 0); if(pending == NULL) { remove = TRUE; pending = g_hash_table_lookup(lrm_state->pending_ops, op_id); } if (op->op_status == PCMK_LRM_OP_ERROR) { switch(op->rc) { case PCMK_OCF_NOT_RUNNING: case PCMK_OCF_RUNNING_MASTER: case PCMK_OCF_DEGRADED: case PCMK_OCF_DEGRADED_MASTER: /* Leave it up to the TE/PE to decide if this is an error */ op->op_status = PCMK_LRM_OP_DONE; break; default: /* Nothing to do */ break; } } if (op->op_status != PCMK_LRM_OP_CANCELLED) { if (safe_str_eq(op->op_type, RSC_NOTIFY)) { /* Keep notify ops out of the CIB */ send_direct_ack(NULL, NULL, NULL, op, op->rsc_id); } else { update_id = do_update_resource(lrm_state->node_name, rsc, op); } } else if (op->interval == 0) { /* This will occur when "crm resource cleanup" is called while actions are in-flight */ crm_err("Op %s (call=%d): Cancelled", op_key, op->call_id); send_direct_ack(NULL, NULL, NULL, op, op->rsc_id); } else if (pending == NULL) { /* We don't need to do anything for cancelled ops * that are not in our pending op list. There are no * transition actions waiting on these operations. */ } else if (op->user_data == NULL) { /* At this point we have a pending entry, but no transition * key present in the user_data field. report this */ crm_err("Op %s (call=%d): No user data", op_key, op->call_id); } else if (pending->remove) { /* The tengine canceled this op, we have been waiting for the cancel to finish. */ delete_op_entry(lrm_state, op, op->rsc_id, op_key, op->call_id); } else if (pending && op->rsc_deleted) { /* The tengine initiated this op, but it was cancelled outside of the * tengine's control during a resource cleanup/re-probe request. The tengine * must be alerted that this operation completed, otherwise the tengine * will continue waiting for this update to occur until it is timed out. * We don't want this update going to the cib though, so use a direct ack. */ crm_trace("Op %s (call=%d): cancelled due to rsc deletion", op_key, op->call_id); send_direct_ack(NULL, NULL, NULL, op, op->rsc_id); } else { /* Before a stop is called, no need to direct ack */ crm_trace("Op %s (call=%d): no delete event required", op_key, op->call_id); } if(remove == FALSE) { /* The caller will do this afterwards, but keep the logging consistent */ removed = TRUE; } else if ((op->interval == 0) && g_hash_table_remove(lrm_state->pending_ops, op_id)) { removed = TRUE; crm_trace("Op %s (call=%d, stop-id=%s, remaining=%u): Confirmed", op_key, op->call_id, op_id, g_hash_table_size(lrm_state->pending_ops)); } else if(op->interval != 0 && op->op_status == PCMK_LRM_OP_CANCELLED) { removed = TRUE; g_hash_table_remove(lrm_state->pending_ops, op_id); } switch (op->op_status) { case PCMK_LRM_OP_CANCELLED: crm_info("Operation %s: %s (node=%s, call=%d, confirmed=%s)", op_key, services_lrm_status_str(op->op_status), lrm_state->node_name, op->call_id, removed ? "true" : "false"); break; case PCMK_LRM_OP_DONE: do_crm_log(op->interval?LOG_INFO:LOG_NOTICE, "Operation %s: %s (node=%s, call=%d, rc=%d, cib-update=%d, confirmed=%s)", op_key, services_ocf_exitcode_str(op->rc), lrm_state->node_name, op->call_id, op->rc, update_id, removed ? "true" : "false"); break; case PCMK_LRM_OP_TIMEOUT: crm_err("Operation %s: %s (node=%s, call=%d, timeout=%dms)", op_key, services_lrm_status_str(op->op_status), lrm_state->node_name, op->call_id, op->timeout); break; default: crm_err("Operation %s (node=%s, call=%d, status=%d, cib-update=%d, confirmed=%s) %s", op_key, lrm_state->node_name, op->call_id, op->op_status, update_id, removed ? "true" : "false", services_lrm_status_str(op->op_status)); } if (op->output) { char *prefix = crm_strdup_printf("%s-%s_%s_%d:%d", lrm_state->node_name, op->rsc_id, op->op_type, op->interval, op->call_id); if (op->rc) { crm_log_output(LOG_NOTICE, prefix, op->output); } else { crm_log_output(LOG_DEBUG, prefix, op->output); } free(prefix); } crmd_notify_resource_op(lrm_state->node_name, op); if (op->rsc_deleted) { crm_info("Deletion of resource '%s' complete after %s", op->rsc_id, op_key); delete_rsc_entry(lrm_state, NULL, op->rsc_id, NULL, pcmk_ok, NULL); } /* If a shutdown was escalated while operations were pending, * then the FSA will be stalled right now... allow it to continue */ mainloop_set_trigger(fsa_source); update_history_cache(lrm_state, rsc, op); lrmd_free_rsc_info(rsc); free(op_key); free(op_id); return TRUE; } diff --git a/lib/ais/utils.c b/lib/ais/utils.c index 94a250521f..c72b07df8a 100644 --- a/lib/ais/utils.c +++ b/lib/ais/utils.c @@ -1,780 +1,780 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "./utils.h" #include "./plugin.h" struct pcmk_env_s pcmk_env; void log_ais_message(int level, const AIS_Message * msg) { char *data = get_ais_data(msg); qb_log_from_external_source(__func__, __FILE__, "Msg[%d] (dest=%s:%s, from=%s:%s.%d, remote=%s, size=%d): %.90s", level, __LINE__, 0, msg->id, ais_dest(&(msg->host)), msg_type2text(msg->host.type), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, msg->sender.uname == local_uname ? "false" : "true", ais_data_len(msg), data); /* do_ais_log(level, */ /* "Msg[%d] (dest=%s:%s, from=%s:%s.%d, remote=%s, size=%d): %.90s", */ /* msg->id, ais_dest(&(msg->host)), msg_type2text(msg->host.type), */ /* ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), */ /* msg->sender.pid, */ /* msg->sender.uname==local_uname?"false":"true", */ /* ais_data_len(msg), data); */ ais_free(data); } /* static gboolean ghash_find_by_uname(gpointer key, gpointer value, gpointer user_data) { crm_node_t *node = value; int id = GPOINTER_TO_INT(user_data); if (node->id == id) { return TRUE; } return FALSE; } */ static int ais_string_to_boolean(const char *s) { int rc = 0; if (s == NULL) { return rc; } if (strcasecmp(s, "true") == 0 || strcasecmp(s, "on") == 0 || strcasecmp(s, "yes") == 0 || strcasecmp(s, "y") == 0 || strcasecmp(s, "1") == 0) { rc = 1; } return rc; } static char *opts_default[] = { NULL, NULL }; static char *opts_vgrind[] = { NULL, NULL, NULL, NULL, NULL }; static void pcmk_setscheduler(crm_child_t * child) { #if defined(HAVE_SCHED_SETSCHEDULER) int policy = sched_getscheduler(0); if (policy == -1) { ais_perror("Could not get scheduling policy for %s", child->name); } else { int priority = -10; if (policy != SCHED_OTHER) { struct sched_param sp; policy = SCHED_OTHER; # if defined(SCHED_RESET_ON_FORK) policy |= SCHED_RESET_ON_FORK; # endif memset(&sp, 0, sizeof(sp)); sp.sched_priority = 0; if (sched_setscheduler(0, policy, &sp) == -1) { ais_perror("Could not reset scheduling policy to SCHED_OTHER for %s", child->name); return; } } if (setpriority(PRIO_PROCESS, 0, priority) == -1) { ais_perror("Could not reset process priority to %d for %s", priority, child->name); } } #else ais_info("The platform is missing process priority setting features. Leaving at default."); #endif } gboolean spawn_child(crm_child_t * child) { int lpc = 0; uid_t uid = 0; gid_t gid = 0; struct rlimit oflimits; gboolean use_valgrind = FALSE; gboolean use_callgrind = FALSE; const char *devnull = "/dev/null"; const char *env_valgrind = getenv("PCMK_valgrind_enabled"); const char *env_callgrind = getenv("PCMK_callgrind_enabled"); if (child->command == NULL) { ais_info("Nothing to do for child \"%s\"", child->name); return TRUE; } if (ais_string_to_boolean(env_callgrind)) { use_callgrind = TRUE; use_valgrind = TRUE; } else if (env_callgrind != NULL && strstr(env_callgrind, child->name)) { use_callgrind = TRUE; use_valgrind = TRUE; } else if (ais_string_to_boolean(env_valgrind)) { use_valgrind = TRUE; } else if (env_valgrind != NULL && strstr(env_valgrind, child->name)) { use_valgrind = TRUE; } if (use_valgrind && strlen(VALGRIND_BIN) == 0) { ais_warn("Cannot enable valgrind for %s:" " The location of the valgrind binary is unknown", child->name); use_valgrind = FALSE; } if (child->uid) { if (pcmk_user_lookup(child->uid, &uid, &gid) < 0) { ais_err("Invalid uid (%s) specified for %s", child->uid, child->name); return FALSE; } ais_info("Using uid=%u and group=%u for process %s", uid, gid, child->name); } child->pid = fork(); AIS_ASSERT(child->pid != -1); if (child->pid > 0) { /* parent */ ais_info("Forked child %d for process %s%s", child->pid, child->name, use_valgrind ? " (valgrind enabled: " VALGRIND_BIN ")" : ""); } else { pcmk_setscheduler(child); /* Setup the two alternate arg arrarys */ opts_vgrind[0] = ais_strdup(VALGRIND_BIN); if (use_callgrind) { opts_vgrind[1] = ais_strdup("--tool=callgrind"); opts_vgrind[2] = ais_strdup("--callgrind-out-file=" CRM_STATE_DIR "/callgrind.out.%p"); opts_vgrind[3] = ais_strdup(child->command); opts_vgrind[4] = NULL; } else { opts_vgrind[1] = ais_strdup(child->command); opts_vgrind[2] = NULL; opts_vgrind[3] = NULL; opts_vgrind[4] = NULL; } opts_default[0] = ais_strdup(child->command);; if (uid && initgroups(child->uid, gid) < 0) { - ais_perror("Cannot initalize groups for %s", child->uid); + ais_perror("Cannot initialize groups for %s", child->uid); } if (uid && setuid(uid) < 0) { ais_perror("Could not set user to %d (%s)", uid, child->uid); } /* Close all open file descriptors */ getrlimit(RLIMIT_NOFILE, &oflimits); for (; lpc < oflimits.rlim_cur; lpc++) { close(lpc); } (void)open(devnull, O_RDONLY); /* Stdin: fd 0 */ (void)open(devnull, O_WRONLY); /* Stdout: fd 1 */ (void)open(devnull, O_WRONLY); /* Stderr: fd 2 */ /* *INDENT-OFF* */ setenv("HA_COMPRESSION", "bz2", 1); setenv("HA_cluster_type", "openais", 1); setenv("HA_debug", pcmk_env.debug, 1); setenv("HA_logfacility", pcmk_env.syslog, 1); setenv("HA_LOGFACILITY", pcmk_env.syslog, 1); setenv("HA_use_logd", pcmk_env.use_logd, 1); setenv("HA_quorum_type", pcmk_env.quorum, 1); /* *INDENT-ON* */ if (pcmk_env.logfile) { setenv("HA_logfile", pcmk_env.logfile, 1); } if (use_valgrind) { (void)execvp(VALGRIND_BIN, opts_vgrind); } else { (void)execvp(child->command, opts_default); } ais_perror("FATAL: Cannot exec %s", child->command); exit(100); } return TRUE; } gboolean stop_child(crm_child_t * child, int signal) { if (signal == 0) { signal = SIGTERM; } if (child->command == NULL) { ais_info("Nothing to do for child \"%s\"", child->name); return TRUE; } ais_debug("Stopping CRM child \"%s\"", child->name); if (child->pid <= 0) { ais_trace("Client %s not running", child->name); return TRUE; } errno = 0; if (kill(child->pid, signal) == 0) { ais_notice("Sent -%d to %s: [%d]", signal, child->name, child->pid); } else { ais_perror("Sent -%d to %s: [%d]", signal, child->name, child->pid); } return TRUE; } void destroy_ais_node(gpointer data) { crm_node_t *node = data; ais_info("Destroying entry for node %u", node->id); ais_free(node->addr); ais_free(node->uname); ais_free(node->state); ais_free(node); } int update_member(unsigned int id, uint64_t born, uint64_t seq, int32_t votes, uint32_t procs, const char *uname, const char *state, const char *version) { int changed = 0; crm_node_t *node = NULL; node = g_hash_table_lookup(membership_list, GUINT_TO_POINTER(id)); if (node == NULL) { ais_malloc0(node, sizeof(crm_node_t)); ais_info("Creating entry for node %u born on " U64T "", id, seq); node->id = id; node->addr = NULL; node->state = ais_strdup("unknown"); g_hash_table_insert(membership_list, GUINT_TO_POINTER(id), node); node = g_hash_table_lookup(membership_list, GUINT_TO_POINTER(id)); } AIS_ASSERT(node != NULL); if (seq != 0) { node->last_seen = seq; } if (born != 0 && node->born != born) { changed = TRUE; node->born = born; ais_info("%p Node %u (%s) born on: " U64T, node, id, uname, born); } if (version != NULL) { ais_free(node->version); node->version = ais_strdup(version); } if (uname != NULL) { if (node->uname == NULL || ais_str_eq(node->uname, uname) == FALSE) { ais_info("%p Node %u now known as %s (was: %s)", node, id, uname, node->uname); ais_free(node->uname); node->uname = ais_strdup(uname); changed = TRUE; } } if (procs != 0 && procs != node->processes) { ais_info("Node %s now has process list: %.32x (%u)", node->uname, procs, procs); node->processes = procs; changed = TRUE; } if (votes >= 0 && votes != node->votes) { ais_info("Node %s now has %d quorum votes (was %d)", node->uname, votes, node->votes); node->votes = votes; changed = TRUE; } if (state != NULL) { if (node->state == NULL || ais_str_eq(node->state, state) == FALSE) { ais_free(node->state); node->state = ais_strdup(state); ais_info("Node %u/%s is now: %s", id, node->uname ? node->uname : "unknown", state); changed = TRUE; } } return changed; } void delete_member(uint32_t id, const char *uname) { if (uname == NULL) { g_hash_table_remove(membership_list, GUINT_TO_POINTER(id)); return; } ais_err("Deleting by uname is not yet supported"); } const char * member_uname(uint32_t id) { crm_node_t *node = g_hash_table_lookup(membership_list, GUINT_TO_POINTER(id)); if (node == NULL) { return ".unknown."; } if (node->uname == NULL) { return ".pending."; } return node->uname; } char * append_member(char *data, crm_node_t * node) { int size = 1; /* nul */ int offset = 0; static int fixed_len = 4 + 8 + 7 + 6 + 6 + 7 + 11; if (data) { size = strlen(data); } offset = size; size += fixed_len; size += 32; /* node->id */ size += 100; /* node->seq, node->born */ size += strlen(node->state); if (node->uname) { size += (7 + strlen(node->uname)); } if (node->addr) { size += (6 + strlen(node->addr)); } if (node->version) { size += (9 + strlen(node->version)); } data = realloc_safe(data, size); offset += snprintf(data + offset, size - offset, "id); if (node->uname) { offset += snprintf(data + offset, size - offset, "uname=\"%s\" ", node->uname); } offset += snprintf(data + offset, size - offset, "state=\"%s\" ", node->state); offset += snprintf(data + offset, size - offset, "born=\"" U64T "\" ", node->born); offset += snprintf(data + offset, size - offset, "seen=\"" U64T "\" ", node->last_seen); offset += snprintf(data + offset, size - offset, "votes=\"%d\" ", node->votes); offset += snprintf(data + offset, size - offset, "processes=\"%u\" ", node->processes); if (node->addr) { offset += snprintf(data + offset, size - offset, "addr=\"%s\" ", node->addr); } if (node->version) { offset += snprintf(data + offset, size - offset, "version=\"%s\" ", node->version); } offset += snprintf(data + offset, size - offset, "/>"); return data; } void swap_sender(AIS_Message * msg) { int tmp = 0; char tmp_s[256]; tmp = msg->host.type; msg->host.type = msg->sender.type; msg->sender.type = tmp; tmp = msg->host.type; msg->host.size = msg->sender.type; msg->sender.type = tmp; memcpy(tmp_s, msg->host.uname, 256); memcpy(msg->host.uname, msg->sender.uname, 256); memcpy(msg->sender.uname, tmp_s, 256); } char * get_ais_data(const AIS_Message * msg) { int rc = BZ_OK; char *uncompressed = NULL; unsigned int new_size = msg->size + 1; if (msg->is_compressed == FALSE) { uncompressed = strdup(msg->data); } else { ais_malloc0(uncompressed, new_size); rc = BZ2_bzBuffToBuffDecompress(uncompressed, &new_size, (char *)msg->data, msg->compressed_size, 1, 0); if (rc != BZ_OK) { ais_info("rc=%d, new=%u expected=%u", rc, new_size, msg->size); } AIS_ASSERT(rc == BZ_OK); AIS_ASSERT(new_size == msg->size); } return uncompressed; } int send_plugin_msg(enum crm_ais_msg_types type, const char *host, const char *data) { int rc = 0; int data_len = 0; AIS_Message *ais_msg = NULL; int total_size = sizeof(AIS_Message); AIS_ASSERT(local_nodeid != 0); if (data != NULL) { data_len = 1 + strlen(data); total_size += data_len; } ais_malloc0(ais_msg, total_size); ais_msg->header.size = total_size; ais_msg->header.error = CS_OK; ais_msg->header.id = 0; ais_msg->size = data_len; ais_msg->sender.type = crm_msg_ais; if (data != NULL) { memcpy(ais_msg->data, data, data_len); } ais_msg->host.type = type; ais_msg->host.id = 0; if (host) { ais_msg->host.size = strlen(host); memset(ais_msg->host.uname, 0, MAX_NAME); memcpy(ais_msg->host.uname, host, ais_msg->host.size); /* ais_msg->host.id = nodeid_lookup(host); */ } else { ais_msg->host.type = type; ais_msg->host.size = 0; memset(ais_msg->host.uname, 0, MAX_NAME); } rc = send_plugin_msg_raw(ais_msg); ais_free(ais_msg); return rc; } extern struct corosync_api_v1 *pcmk_api; int send_client_ipc(void *conn, const AIS_Message * ais_msg) { int rc = -1; if (conn == NULL) { rc = -2; } else if (!libais_connection_active(conn)) { ais_warn("Connection no longer active"); rc = -3; /* } else if ((queue->size - 1) == queue->used) { */ /* ais_err("Connection is throttled: %d", queue->size); */ } else { #if SUPPORT_COROSYNC rc = pcmk_api->ipc_dispatch_send(conn, ais_msg, ais_msg->header.size); #endif } return rc; } int send_client_msg(void *conn, enum crm_ais_msg_class class, enum crm_ais_msg_types type, const char *data) { int rc = 0; int data_len = 0; int total_size = sizeof(AIS_Message); AIS_Message *ais_msg = NULL; static int msg_id = 0; AIS_ASSERT(local_nodeid != 0); msg_id++; AIS_ASSERT(msg_id != 0 /* wrap-around */ ); if (data != NULL) { data_len = 1 + strlen(data); } total_size += data_len; ais_malloc0(ais_msg, total_size); ais_msg->id = msg_id; ais_msg->header.id = class; ais_msg->header.size = total_size; ais_msg->header.error = CS_OK; ais_msg->size = data_len; if (data != NULL) { memcpy(ais_msg->data, data, data_len); } ais_msg->host.size = 0; ais_msg->host.type = type; memset(ais_msg->host.uname, 0, MAX_NAME); ais_msg->host.id = 0; ais_msg->sender.type = crm_msg_ais; ais_msg->sender.size = local_uname_len; memset(ais_msg->sender.uname, 0, MAX_NAME); memcpy(ais_msg->sender.uname, local_uname, ais_msg->sender.size); ais_msg->sender.id = local_nodeid; rc = send_client_ipc(conn, ais_msg); if (rc != 0) { ais_warn("Sending message to %s failed: %d", msg_type2text(type), rc); log_ais_message(LOG_DEBUG, ais_msg); } ais_free(ais_msg); return rc; } char * ais_concat(const char *prefix, const char *suffix, char join) { int len = 0; char *new_str = NULL; AIS_ASSERT(prefix != NULL); AIS_ASSERT(suffix != NULL); len = strlen(prefix) + strlen(suffix) + 2; ais_malloc0(new_str, (len)); sprintf(new_str, "%s%c%s", prefix, join, suffix); new_str[len - 1] = 0; return new_str; } hdb_handle_t config_find_init(struct corosync_api_v1 * config, char *name) { hdb_handle_t local_handle = 0; #if SUPPORT_COROSYNC config->object_find_create(OBJECT_PARENT_HANDLE, name, strlen(name), &local_handle); ais_info("Local handle: %lld for %s", (long long)local_handle, name); #endif return local_handle; } hdb_handle_t config_find_next(struct corosync_api_v1 * config, char *name, hdb_handle_t top_handle) { int rc = 0; hdb_handle_t local_handle = 0; #if SUPPORT_COROSYNC rc = config->object_find_next(top_handle, &local_handle); #endif if (rc < 0) { ais_info("No additional configuration supplied for: %s", name); local_handle = 0; } else { ais_info("Processing additional %s options...", name); } return local_handle; } void config_find_done(struct corosync_api_v1 *config, hdb_handle_t local_handle) { #if SUPPORT_COROSYNC config->object_find_destroy(local_handle); #endif } int get_config_opt(struct corosync_api_v1 *config, hdb_handle_t object_service_handle, char *key, char **value, const char *fallback) { char *env_key = NULL; *value = NULL; if (object_service_handle > 0) { config->object_key_get(object_service_handle, key, strlen(key), (void **)value, NULL); } if (*value) { ais_info("Found '%s' for option: %s", *value, key); return 0; } env_key = ais_concat("HA", key, '_'); *value = getenv(env_key); ais_free(env_key); if (*value) { ais_info("Found '%s' in ENV for option: %s", *value, key); return 0; } if (fallback) { ais_info("Defaulting to '%s' for option: %s", fallback, key); *value = ais_strdup(fallback); } else { ais_info("No default for option: %s", key); } return -1; } int ais_get_boolean(const char *value) { if (value == NULL) { return 0; } else if (strcasecmp(value, "true") == 0 || strcasecmp(value, "on") == 0 || strcasecmp(value, "yes") == 0 || strcasecmp(value, "y") == 0 || strcasecmp(value, "1") == 0) { return 1; } return 0; } long long ais_get_int(const char *text, char **end_text) { long long result = -1; char *local_end_text = NULL; errno = 0; if (text != NULL) { #ifdef ANSI_ONLY if (end_text != NULL) { result = strtol(text, end_text, 10); } else { result = strtol(text, &local_end_text, 10); } #else if (end_text != NULL) { result = strtoll(text, end_text, 10); } else { result = strtoll(text, &local_end_text, 10); } #endif if (errno == EINVAL) { ais_err("Conversion of %s failed", text); result = -1; } else if (errno == ERANGE) { ais_err("Conversion of %s was clipped: %lld", text, result); } else if (errno != 0) { ais_perror("Conversion of %s failed:", text); } if (local_end_text != NULL && local_end_text[0] != '\0') { ais_err("Characters left over after parsing '%s': '%s'", text, local_end_text); } } return result; } #define PW_BUFFER_LEN 500 int pcmk_user_lookup(const char *name, uid_t * uid, gid_t * gid) { int rc = -1; char *buffer = NULL; struct passwd pwd; struct passwd *pwentry = NULL; ais_malloc0(buffer, PW_BUFFER_LEN); getpwnam_r(name, &pwd, buffer, PW_BUFFER_LEN, &pwentry); if (pwentry) { rc = 0; if (uid) { *uid = pwentry->pw_uid; } if (gid) { *gid = pwentry->pw_gid; } ais_debug("Cluster user %s has uid=%d gid=%d", name, pwentry->pw_uid, pwentry->pw_gid); } else { ais_err("Cluster user %s does not exist", name); } ais_free(buffer); return rc; } diff --git a/pengine/graph.c b/pengine/graph.c index a50f15b514..6edd72844a 100644 --- a/pengine/graph.c +++ b/pengine/graph.c @@ -1,1440 +1,1440 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include void update_colo_start_chain(action_t * action); gboolean rsc_update_action(action_t * first, action_t * then, enum pe_ordering type); static enum pe_action_flags get_action_flags(action_t * action, node_t * node) { enum pe_action_flags flags = action->flags; if (action->rsc) { flags = action->rsc->cmds->action_flags(action, NULL); if (action->rsc->variant >= pe_clone && node) { /* We only care about activity on $node */ enum pe_action_flags clone_flags = action->rsc->cmds->action_flags(action, node); /* Go to great lengths to ensure the correct value for pe_action_runnable... * * If we are a clone, then for _ordering_ constraints, its only relevant * if we are runnable _anywhere_. * * This only applies to _runnable_ though, and only for ordering constraints. * If this function is ever used during colocation, then we'll need additional logic * * Not very satisfying, but its logical and appears to work well. */ if (is_not_set(clone_flags, pe_action_runnable) && is_set(flags, pe_action_runnable)) { pe_rsc_trace(action->rsc, "Fixing up runnable flag for %s", action->uuid); set_bit(clone_flags, pe_action_runnable); } flags = clone_flags; } } return flags; } static char * convert_non_atomic_uuid(char *old_uuid, resource_t * rsc, gboolean allow_notify, gboolean free_original) { int interval = 0; char *uuid = NULL; char *rid = NULL; char *raw_task = NULL; int task = no_action; CRM_ASSERT(rsc); pe_rsc_trace(rsc, "Processing %s", old_uuid); if (old_uuid == NULL) { return NULL; } else if (strstr(old_uuid, "notify") != NULL) { goto done; /* no conversion */ } else if (rsc->variant < pe_group) { goto done; /* no conversion */ } CRM_ASSERT(parse_op_key(old_uuid, &rid, &raw_task, &interval)); if (interval > 0) { goto done; /* no conversion */ } task = text2task(raw_task); switch (task) { case stop_rsc: case start_rsc: case action_notify: case action_promote: case action_demote: break; case stopped_rsc: case started_rsc: case action_notified: case action_promoted: case action_demoted: task--; break; case monitor_rsc: case shutdown_crm: case stonith_node: task = no_action; break; default: crm_err("Unknown action: %s", raw_task); task = no_action; break; } if (task != no_action) { if (is_set(rsc->flags, pe_rsc_notify) && allow_notify) { uuid = generate_notify_key(rid, "confirmed-post", task2text(task + 1)); } else { uuid = generate_op_key(rid, task2text(task + 1), 0); } pe_rsc_trace(rsc, "Converted %s -> %s", old_uuid, uuid); } done: if (uuid == NULL) { uuid = strdup(old_uuid); } if (free_original) { free(old_uuid); } free(raw_task); free(rid); return uuid; } static action_t * rsc_expand_action(action_t * action) { action_t *result = action; if (action->rsc && action->rsc->variant >= pe_group) { /* Expand 'start' -> 'started' */ char *uuid = NULL; gboolean notify = FALSE; if (action->rsc->parent == NULL) { /* Only outter-most resources have notification actions */ notify = is_set(action->rsc->flags, pe_rsc_notify); } uuid = convert_non_atomic_uuid(action->uuid, action->rsc, notify, FALSE); if (uuid) { pe_rsc_trace(action->rsc, "Converting %s to %s %d", action->uuid, uuid, is_set(action->rsc->flags, pe_rsc_notify)); result = find_first_action(action->rsc->actions, uuid, NULL, NULL); if (result == NULL) { crm_err("Couldn't expand %s", action->uuid); result = action; } free(uuid); } } return result; } static enum pe_graph_flags graph_update_action(action_t * first, action_t * then, node_t * node, enum pe_action_flags flags, enum pe_ordering type) { enum pe_graph_flags changed = pe_graph_none; gboolean processed = FALSE; /* TODO: Do as many of these in parallel as possible */ if (type & pe_order_implies_then) { processed = TRUE; if (then->rsc) { changed |= then->rsc->cmds->update_actions(first, then, node, flags & pe_action_optional, pe_action_optional, pe_order_implies_then); } else if (is_set(flags, pe_action_optional) == FALSE) { if (update_action_flags(then, pe_action_optional | pe_action_clear)) { changed |= pe_graph_updated_then; } } if (changed) { pe_rsc_trace(then->rsc, "implies right: %s then %s: changed", first->uuid, then->uuid); } else { crm_trace("implies right: %s then %s %p", first->uuid, then->uuid, then->rsc); } } if ((type & pe_order_restart) && then->rsc) { enum pe_action_flags restart = (pe_action_optional | pe_action_runnable); processed = TRUE; changed |= then->rsc->cmds->update_actions(first, then, node, flags, restart, pe_order_restart); if (changed) { pe_rsc_trace(then->rsc, "restart: %s then %s: changed", first->uuid, then->uuid); } else { crm_trace("restart: %s then %s", first->uuid, then->uuid); } } if (type & pe_order_implies_first) { processed = TRUE; if (first->rsc) { changed |= first->rsc->cmds->update_actions(first, then, node, flags, pe_action_optional, pe_order_implies_first); } else if (is_set(flags, pe_action_optional) == FALSE) { if (update_action_flags(first, pe_action_runnable | pe_action_clear)) { changed |= pe_graph_updated_first; } } if (changed) { pe_rsc_trace(then->rsc, "implies left: %s then %s: changed", first->uuid, then->uuid); } else { crm_trace("implies left: %s then %s", first->uuid, then->uuid); } } if (type & pe_order_implies_first_master) { processed = TRUE; if (then->rsc) { changed |= then->rsc->cmds->update_actions(first, then, node, flags & pe_action_optional, pe_action_optional, pe_order_implies_first_master); } if (changed) { pe_rsc_trace(then->rsc, "implies left when right rsc is Master role: %s then %s: changed", first->uuid, then->uuid); } else { crm_trace("implies left when right rsc is Master role: %s then %s", first->uuid, then->uuid); } } if (type & pe_order_one_or_more) { processed = TRUE; if (then->rsc) { changed |= then->rsc->cmds->update_actions(first, then, node, flags, pe_action_runnable, pe_order_one_or_more); } else if (is_set(flags, pe_action_runnable)) { /* alright. a "first" action is considered runnable, incremente * the 'runnable_before' counter */ then->runnable_before++; /* if the runnable before count for then exceeds the required number * of "before" runnable actions... mark then as runnable */ if (then->runnable_before >= then->required_runnable_before) { if (update_action_flags(then, pe_action_runnable)) { changed |= pe_graph_updated_then; } } } if (changed) { pe_rsc_trace(then->rsc, "runnable_one_or_more: %s then %s: changed", first->uuid, then->uuid); } else { crm_trace("runnable_one_or_more: %s then %s", first->uuid, then->uuid); } } if (type & pe_order_runnable_left) { processed = TRUE; if (then->rsc) { changed |= then->rsc->cmds->update_actions(first, then, node, flags, pe_action_runnable, pe_order_runnable_left); } else if (is_set(flags, pe_action_runnable) == FALSE) { if (update_action_flags(then, pe_action_runnable | pe_action_clear)) { changed |= pe_graph_updated_then; } } if (changed) { pe_rsc_trace(then->rsc, "runnable: %s then %s: changed", first->uuid, then->uuid); } else { crm_trace("runnable: %s then %s", first->uuid, then->uuid); } } if (type & pe_order_implies_first_migratable) { processed = TRUE; if (then->rsc) { changed |= then->rsc->cmds->update_actions(first, then, node, flags, pe_action_optional, pe_order_implies_first_migratable); } if (changed) { pe_rsc_trace(then->rsc, "optional: %s then %s: changed", first->uuid, then->uuid); } else { crm_trace("optional: %s then %s", first->uuid, then->uuid); } } if (type & pe_order_pseudo_left) { processed = TRUE; if (then->rsc) { changed |= then->rsc->cmds->update_actions(first, then, node, flags, pe_action_optional, pe_order_pseudo_left); } if (changed) { pe_rsc_trace(then->rsc, "optional: %s then %s: changed", first->uuid, then->uuid); } else { crm_trace("optional: %s then %s", first->uuid, then->uuid); } } if (type & pe_order_optional) { processed = TRUE; if (then->rsc) { changed |= then->rsc->cmds->update_actions(first, then, node, flags, pe_action_runnable, pe_order_optional); } if (changed) { pe_rsc_trace(then->rsc, "optional: %s then %s: changed", first->uuid, then->uuid); } else { crm_trace("optional: %s then %s", first->uuid, then->uuid); } } if (type & pe_order_asymmetrical) { processed = TRUE; if (then->rsc) { changed |= then->rsc->cmds->update_actions(first, then, node, flags, pe_action_runnable, pe_order_asymmetrical); } if (changed) { pe_rsc_trace(then->rsc, "asymmetrical: %s then %s: changed", first->uuid, then->uuid); } else { crm_trace("asymmetrical: %s then %s", first->uuid, then->uuid); } } if ((first->flags & pe_action_runnable) && (type & pe_order_implies_then_printed) && (flags & pe_action_optional) == 0) { processed = TRUE; crm_trace("%s implies %s printed", first->uuid, then->uuid); update_action_flags(then, pe_action_print_always); /* dont care about changed */ } if ((type & pe_order_implies_first_printed) && (flags & pe_action_optional) == 0) { processed = TRUE; crm_trace("%s implies %s printed", then->uuid, first->uuid); update_action_flags(first, pe_action_print_always); /* dont care about changed */ } if ((type & pe_order_implies_then || type & pe_order_implies_first || type & pe_order_restart) && first->rsc && safe_str_eq(first->task, RSC_STOP) && is_not_set(first->rsc->flags, pe_rsc_managed) && is_set(first->rsc->flags, pe_rsc_block) && is_not_set(first->flags, pe_action_runnable)) { if (update_action_flags(then, pe_action_runnable | pe_action_clear)) { changed |= pe_graph_updated_then; } if (changed) { pe_rsc_trace(then->rsc, "unmanaged left: %s then %s: changed", first->uuid, then->uuid); } else { crm_trace("unmanaged left: %s then %s", first->uuid, then->uuid); } } if (processed == FALSE) { crm_trace("Constraint 0x%.6x not applicable", type); } return changed; } static void mark_start_blocked(resource_t *rsc) { GListPtr gIter = rsc->actions; for (; gIter != NULL; gIter = gIter->next) { action_t *action = (action_t *) gIter->data; if (safe_str_neq(action->task, RSC_START)) { continue; } if (is_set(action->flags, pe_action_runnable)) { clear_bit(action->flags, pe_action_runnable); update_colo_start_chain(action); update_action(action); } } } void update_colo_start_chain(action_t *action) { GListPtr gIter = NULL; resource_t *rsc = NULL; if (is_not_set(action->flags, pe_action_runnable) && safe_str_eq(action->task, RSC_START)) { rsc = uber_parent(action->rsc); } if (rsc == NULL || rsc->rsc_cons_lhs == NULL) { return; } /* if rsc has children, all the children need to have start set to * unrunnable before we follow the colo chain for the parent. */ for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { resource_t *child = (resource_t *)gIter->data; action_t *start = find_first_action(child->actions, NULL, RSC_START, NULL); if (start == NULL || is_set(start->flags, pe_action_runnable)) { return; } } for (gIter = rsc->rsc_cons_lhs; gIter != NULL; gIter = gIter->next) { rsc_colocation_t *colocate_with = (rsc_colocation_t *)gIter->data; if (colocate_with->score == INFINITY) { mark_start_blocked(colocate_with->rsc_lh); } } } gboolean update_action(action_t * then) { GListPtr lpc = NULL; enum pe_graph_flags changed = pe_graph_none; int last_flags = then->flags; crm_trace("Processing %s (%s %s %s)", then->uuid, is_set(then->flags, pe_action_optional) ? "optional" : "required", is_set(then->flags, pe_action_runnable) ? "runnable" : "unrunnable", is_set(then->flags, pe_action_pseudo) ? "pseudo" : then->node ? then->node->details->uname : ""); if (is_set(then->flags, pe_action_requires_any)) { /* initialize current known runnable before actions to 0 * from here as graph_update_action is called for each of * then's before actions, this number will increment as * runnable 'first' actions are encountered */ then->runnable_before = 0; /* for backwards compatibility with previous options that use - * the 'requires_any' flag, initalize required to 1 if it is + * the 'requires_any' flag, initialize required to 1 if it is * not set. */ if (then->required_runnable_before == 0) { then->required_runnable_before = 1; } clear_bit(then->flags, pe_action_runnable); /* We are relying on the pe_order_one_or_more clause of * graph_update_action(), called as part of the: * * 'if (first == other->action)' * * block below, to set this back if appropriate */ } for (lpc = then->actions_before; lpc != NULL; lpc = lpc->next) { action_wrapper_t *other = (action_wrapper_t *) lpc->data; action_t *first = other->action; node_t *then_node = then->node; node_t *first_node = first->node; enum pe_action_flags then_flags = 0; enum pe_action_flags first_flags = 0; if (first->rsc && first->rsc->variant == pe_group && safe_str_eq(first->task, RSC_START)) { first_node = first->rsc->fns->location(first->rsc, NULL, FALSE); if (first_node) { crm_trace("First: Found node %s for %s", first_node->details->uname, first->uuid); } } if (then->rsc && then->rsc->variant == pe_group && safe_str_eq(then->task, RSC_START)) { then_node = then->rsc->fns->location(then->rsc, NULL, FALSE); if (then_node) { crm_trace("Then: Found node %s for %s", then_node->details->uname, then->uuid); } } clear_bit(changed, pe_graph_updated_first); if (first->rsc != then->rsc && first->rsc != NULL && then->rsc != NULL && first->rsc != then->rsc->parent) { first = rsc_expand_action(first); } if (first != other->action) { crm_trace("Ordering %s afer %s instead of %s", then->uuid, first->uuid, other->action->uuid); } first_flags = get_action_flags(first, then_node); then_flags = get_action_flags(then, first_node); crm_trace("Checking %s (%s %s %s) against %s (%s %s %s) filter=0x%.6x type=0x%.6x", then->uuid, is_set(then_flags, pe_action_optional) ? "optional" : "required", is_set(then_flags, pe_action_runnable) ? "runnable" : "unrunnable", is_set(then_flags, pe_action_pseudo) ? "pseudo" : then->node ? then->node->details-> uname : "", first->uuid, is_set(first_flags, pe_action_optional) ? "optional" : "required", is_set(first_flags, pe_action_runnable) ? "runnable" : "unrunnable", is_set(first_flags, pe_action_pseudo) ? "pseudo" : first->node ? first->node->details-> uname : "", first_flags, other->type); if (first == other->action) { /* * 'first' was not expanded (ie. from 'start' to 'running'), which could mean it: * - has no associated resource, * - was a primitive, * - was pre-expanded (ie. 'running' instead of 'start') * * The third argument here to graph_update_action() is a node which is used under two conditions: * - Interleaving, in which case first->node and * then->node are equal (and NULL) * - If 'then' is a clone, to limit the scope of the * constraint to instances on the supplied node * */ int otype = other->type; node_t *node = then->node; if(is_set(otype, pe_order_implies_then_on_node)) { /* Normally we want the _whole_ 'then' clone to * restart if 'first' is restarted, so then->node is * needed. * * However for unfencing, we want to limit this to * instances on the same node as 'first' (the * unfencing operation), so first->node is supplied. * * Swap the node, from then on we can can treat it * like any other 'pe_order_implies_then' */ clear_bit(otype, pe_order_implies_then_on_node); set_bit(otype, pe_order_implies_then); node = first->node; } clear_bit(first_flags, pe_action_pseudo); changed |= graph_update_action(first, then, node, first_flags, otype); /* 'first' was for a complex resource (clone, group, etc), * create a new dependency if necessary */ } else if (order_actions(first, then, other->type)) { /* This was the first time 'first' and 'then' were associated, * start again to get the new actions_before list */ changed |= (pe_graph_updated_then | pe_graph_disable); } if (changed & pe_graph_disable) { crm_trace("Disabled constraint %s -> %s", other->action->uuid, then->uuid); clear_bit(changed, pe_graph_disable); other->type = pe_order_none; } if (changed & pe_graph_updated_first) { GListPtr lpc2 = NULL; crm_trace("Updated %s (first %s %s %s), processing dependants ", first->uuid, is_set(first->flags, pe_action_optional) ? "optional" : "required", is_set(first->flags, pe_action_runnable) ? "runnable" : "unrunnable", is_set(first->flags, pe_action_pseudo) ? "pseudo" : first->node ? first->node->details-> uname : ""); for (lpc2 = first->actions_after; lpc2 != NULL; lpc2 = lpc2->next) { action_wrapper_t *other = (action_wrapper_t *) lpc2->data; update_action(other->action); } update_action(first); } } if (is_set(then->flags, pe_action_requires_any)) { if (last_flags != then->flags) { changed |= pe_graph_updated_then; } else { clear_bit(changed, pe_graph_updated_then); } } if (changed & pe_graph_updated_then) { crm_trace("Updated %s (then %s %s %s), processing dependants ", then->uuid, is_set(then->flags, pe_action_optional) ? "optional" : "required", is_set(then->flags, pe_action_runnable) ? "runnable" : "unrunnable", is_set(then->flags, pe_action_pseudo) ? "pseudo" : then->node ? then->node->details-> uname : ""); if (is_set(last_flags, pe_action_runnable) && is_not_set(then->flags, pe_action_runnable)) { update_colo_start_chain(then); } update_action(then); for (lpc = then->actions_after; lpc != NULL; lpc = lpc->next) { action_wrapper_t *other = (action_wrapper_t *) lpc->data; update_action(other->action); } } return FALSE; } gboolean shutdown_constraints(node_t * node, action_t * shutdown_op, pe_working_set_t * data_set) { /* add the stop to the before lists so it counts as a pre-req * for the shutdown */ GListPtr lpc = NULL; for (lpc = data_set->actions; lpc != NULL; lpc = lpc->next) { action_t *action = (action_t *) lpc->data; if (action->rsc == NULL || action->node == NULL) { continue; } else if (action->node->details != node->details) { continue; } else if (is_set(action->rsc->flags, pe_rsc_maintenance)) { pe_rsc_trace(action->rsc, "Skipping %s: maintenance mode", action->uuid); continue; } else if (node->details->maintenance) { pe_rsc_trace(action->rsc, "Skipping %s: node %s is in maintenance mode", action->uuid, node->details->uname); continue; } else if (safe_str_neq(action->task, RSC_STOP)) { continue; } else if (is_not_set(action->rsc->flags, pe_rsc_managed) && is_not_set(action->rsc->flags, pe_rsc_block)) { /* * If another action depends on this one, we may still end up blocking */ pe_rsc_trace(action->rsc, "Skipping %s: unmanaged", action->uuid); continue; } pe_rsc_trace(action->rsc, "Ordering %s before shutdown on %s", action->uuid, node->details->uname); pe_clear_action_bit(action, pe_action_optional); custom_action_order(action->rsc, NULL, action, NULL, strdup(CRM_OP_SHUTDOWN), shutdown_op, pe_order_optional | pe_order_runnable_left, data_set); } return TRUE; } gboolean stonith_constraints(node_t * node, action_t * stonith_op, pe_working_set_t * data_set) { CRM_CHECK(stonith_op != NULL, return FALSE); /* * Make sure the stonith OP occurs before we start any shared resources */ if (stonith_op != NULL) { GListPtr lpc = NULL; for (lpc = data_set->resources; lpc != NULL; lpc = lpc->next) { resource_t *rsc = (resource_t *) lpc->data; if(stonith_op->rsc == NULL) { rsc_stonith_ordering(rsc, stonith_op, data_set); } else if(stonith_op->rsc != rsc && stonith_op->rsc != rsc->container) { rsc_stonith_ordering(rsc, stonith_op, data_set); } } } /* add the stonith OP as a stop pre-req and the mark the stop * as a pseudo op - since its now redundant */ return TRUE; } static node_t * get_router_node(action_t *action) { node_t *began_on = NULL; node_t *ended_on = NULL; node_t *router_node = NULL; if (safe_str_eq(action->task, CRM_OP_FENCE) || is_remote_node(action->node) == FALSE) { return NULL; } CRM_ASSERT(action->node->details->remote_rsc != NULL); if (action->node->details->remote_rsc->running_on) { began_on = action->node->details->remote_rsc->running_on->data; } ended_on = action->node->details->remote_rsc->allocated_to; /* if there is only one location to choose from, * this is easy. Check for those conditions first */ if (!began_on || !ended_on) { /* remote rsc is either shutting down or starting up */ return began_on ? began_on : ended_on; } else if (began_on->details == ended_on->details) { /* remote rsc didn't move nodes. */ return began_on; } /* If we have get here, we know the remote resource * began on one node and is moving to another node. * * This means some actions will get routed through the cluster * node the connection rsc began on, and others are routed through * the cluster node the connection rsc ends up on. * * 1. stop, demote, migrate actions of resources living in the remote * node _MUST_ occur _BEFORE_ the connection can move (these actions * are all required before the remote rsc stop action can occur.) In * this case, we know these actions have to be routed through the initial * cluster node the connection resource lived on before the move takes place. * * 2. Everything else (start, promote, monitor, probe, refresh, clear failcount * delete ....) must occur after the resource starts on the node it is * moving to. */ /* 1. before connection rsc moves. */ if (safe_str_eq(action->task, "stop") || safe_str_eq(action->task, "demote") || safe_str_eq(action->task, "migrate_from") || safe_str_eq(action->task, "migrate_to")) { router_node = began_on; /* 2. after connection rsc moves. */ } else { router_node = ended_on; } return router_node; } static xmlNode * action2xml(action_t * action, gboolean as_input, pe_working_set_t *data_set) { gboolean needs_node_info = TRUE; xmlNode *action_xml = NULL; xmlNode *args_xml = NULL; char *action_id_s = NULL; if (action == NULL) { return NULL; } if (safe_str_eq(action->task, CRM_OP_FENCE)) { action_xml = create_xml_node(NULL, XML_GRAPH_TAG_CRM_EVENT); /* needs_node_info = FALSE; */ } else if (safe_str_eq(action->task, CRM_OP_SHUTDOWN)) { action_xml = create_xml_node(NULL, XML_GRAPH_TAG_CRM_EVENT); } else if (safe_str_eq(action->task, CRM_OP_CLEAR_FAILCOUNT)) { action_xml = create_xml_node(NULL, XML_GRAPH_TAG_CRM_EVENT); } else if (safe_str_eq(action->task, CRM_OP_LRM_REFRESH)) { action_xml = create_xml_node(NULL, XML_GRAPH_TAG_CRM_EVENT); /* } else if(safe_str_eq(action->task, RSC_PROBED)) { */ /* action_xml = create_xml_node(NULL, XML_GRAPH_TAG_CRM_EVENT); */ } else if (is_set(action->flags, pe_action_pseudo)) { action_xml = create_xml_node(NULL, XML_GRAPH_TAG_PSEUDO_EVENT); needs_node_info = FALSE; } else { action_xml = create_xml_node(NULL, XML_GRAPH_TAG_RSC_OP); } action_id_s = crm_itoa(action->id); crm_xml_add(action_xml, XML_ATTR_ID, action_id_s); free(action_id_s); crm_xml_add(action_xml, XML_LRM_ATTR_TASK, action->task); if (action->rsc != NULL && action->rsc->clone_name != NULL) { char *clone_key = NULL; const char *interval_s = g_hash_table_lookup(action->meta, XML_LRM_ATTR_INTERVAL); int interval = crm_parse_int(interval_s, "0"); if (safe_str_eq(action->task, RSC_NOTIFY)) { const char *n_type = g_hash_table_lookup(action->meta, "notify_type"); const char *n_task = g_hash_table_lookup(action->meta, "notify_operation"); CRM_CHECK(n_type != NULL, crm_err("No notify type value found for %s", action->uuid)); CRM_CHECK(n_task != NULL, crm_err("No notify operation value found for %s", action->uuid)); clone_key = generate_notify_key(action->rsc->clone_name, n_type, n_task); } else if(action->cancel_task) { clone_key = generate_op_key(action->rsc->clone_name, action->cancel_task, interval); } else { clone_key = generate_op_key(action->rsc->clone_name, action->task, interval); } CRM_CHECK(clone_key != NULL, crm_err("Could not generate a key for %s", action->uuid)); crm_xml_add(action_xml, XML_LRM_ATTR_TASK_KEY, clone_key); crm_xml_add(action_xml, "internal_" XML_LRM_ATTR_TASK_KEY, action->uuid); free(clone_key); } else { crm_xml_add(action_xml, XML_LRM_ATTR_TASK_KEY, action->uuid); } if (needs_node_info && action->node != NULL) { node_t *router_node = get_router_node(action); crm_xml_add(action_xml, XML_LRM_ATTR_TARGET, action->node->details->uname); crm_xml_add(action_xml, XML_LRM_ATTR_TARGET_UUID, action->node->details->id); if (router_node) { crm_xml_add(action_xml, XML_LRM_ATTR_ROUTER_NODE, router_node->details->uname); } } if (as_input) { return action_xml; } if (action->rsc) { if (is_set(action->flags, pe_action_pseudo) == FALSE) { int lpc = 0; xmlNode *rsc_xml = create_xml_node(action_xml, crm_element_name(action->rsc->xml)); const char *attr_list[] = { XML_AGENT_ATTR_CLASS, XML_AGENT_ATTR_PROVIDER, XML_ATTR_TYPE }; if (is_set(action->rsc->flags, pe_rsc_orphan) && action->rsc->clone_name) { /* Do not use the 'instance free' name here as that * might interfere with the instance we plan to keep. * Ie. if there are more than two named /anonymous/ * instances on a given node, we need to make sure the * command goes to the right one. * * Keep this block, even when everyone is using * 'instance free' anonymous clone names - it means * we'll do the right thing if anyone toggles the * unique flag to 'off' */ crm_debug("Using orphan clone name %s instead of %s", action->rsc->id, action->rsc->clone_name); crm_xml_add(rsc_xml, XML_ATTR_ID, action->rsc->clone_name); crm_xml_add(rsc_xml, XML_ATTR_ID_LONG, action->rsc->id); } else if (is_not_set(action->rsc->flags, pe_rsc_unique)) { const char *xml_id = ID(action->rsc->xml); crm_debug("Using anonymous clone name %s for %s (aka. %s)", xml_id, action->rsc->id, action->rsc->clone_name); /* ID is what we'd like client to use * ID_LONG is what they might know it as instead * * ID_LONG is only strictly needed /here/ during the * transition period until all nodes in the cluster * are running the new software /and/ have rebooted * once (meaning that they've only ever spoken to a DC * supporting this feature). * * If anyone toggles the unique flag to 'on', the * 'instance free' name will correspond to an orphan * and fall into the claus above instead */ crm_xml_add(rsc_xml, XML_ATTR_ID, xml_id); if (action->rsc->clone_name && safe_str_neq(xml_id, action->rsc->clone_name)) { crm_xml_add(rsc_xml, XML_ATTR_ID_LONG, action->rsc->clone_name); } else { crm_xml_add(rsc_xml, XML_ATTR_ID_LONG, action->rsc->id); } } else { CRM_ASSERT(action->rsc->clone_name == NULL); crm_xml_add(rsc_xml, XML_ATTR_ID, action->rsc->id); } for (lpc = 0; lpc < DIMOF(attr_list); lpc++) { crm_xml_add(rsc_xml, attr_list[lpc], g_hash_table_lookup(action->rsc->meta, attr_list[lpc])); } } } args_xml = create_xml_node(NULL, XML_TAG_ATTRS); crm_xml_add(args_xml, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); g_hash_table_foreach(action->extra, hash2field, args_xml); if (action->rsc != NULL && action->node) { GHashTable *p = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); get_rsc_attributes(p, action->rsc, action->node, data_set); g_hash_table_foreach(p, hash2smartfield, args_xml); g_hash_table_destroy(p); } else if(action->rsc && action->rsc->variant <= pe_native) { g_hash_table_foreach(action->rsc->parameters, hash2smartfield, args_xml); } g_hash_table_foreach(action->meta, hash2metafield, args_xml); if (action->rsc != NULL) { int isolated = 0; resource_t *parent = action->rsc; while (parent != NULL) { isolated |= parent->isolation_wrapper ? 1 : 0; parent->cmds->append_meta(parent, args_xml); parent = parent->parent; } if (isolated && action->node) { char *nodeattr = crm_meta_name(XML_RSC_ATTR_ISOLATION_HOST); crm_xml_add(args_xml, nodeattr, action->node->details->uname); free(nodeattr); } } else if (safe_str_eq(action->task, CRM_OP_FENCE) && action->node) { g_hash_table_foreach(action->node->details->attrs, hash2metafield, args_xml); } sorted_xml(args_xml, action_xml, FALSE); crm_log_xml_trace(action_xml, "dumped action"); free_xml(args_xml); return action_xml; } static gboolean should_dump_action(action_t * action) { CRM_CHECK(action != NULL, return FALSE); if (is_set(action->flags, pe_action_dumped)) { crm_trace("action %d (%s) was already dumped", action->id, action->uuid); return FALSE; } else if (is_set(action->flags, pe_action_pseudo) && safe_str_eq(action->task, CRM_OP_PROBED)) { GListPtr lpc = NULL; /* This is a horrible but convenient hack * * It mimimizes the number of actions with unsatisfied inputs * (ie. not included in the graph) * * This in turn, means we can be more concise when printing * aborted/incomplete graphs. * * It also makes it obvious which node is preventing * probe_complete from running (presumably because it is only * partially up) * * For these reasons we tolerate such perversions */ for (lpc = action->actions_after; lpc != NULL; lpc = lpc->next) { action_wrapper_t *wrapper = (action_wrapper_t *) lpc->data; if (is_not_set(wrapper->action->flags, pe_action_runnable)) { /* Only interested in runnable operations */ } else if (safe_str_neq(wrapper->action->task, RSC_START)) { /* Only interested in start operations */ } else if (is_set(wrapper->action->flags, pe_action_dumped)) { crm_trace("action %d (%s) dependency of %s", action->id, action->uuid, wrapper->action->uuid); return TRUE; } else if (should_dump_action(wrapper->action)) { crm_trace("action %d (%s) dependency of %s", action->id, action->uuid, wrapper->action->uuid); return TRUE; } } } if (is_set(action->flags, pe_action_runnable) == FALSE) { crm_trace("action %d (%s) was not runnable", action->id, action->uuid); return FALSE; } else if (is_set(action->flags, pe_action_optional) && is_set(action->flags, pe_action_print_always) == FALSE) { crm_trace("action %d (%s) was optional", action->id, action->uuid); return FALSE; } else if (action->rsc != NULL && is_not_set(action->rsc->flags, pe_rsc_managed)) { const char *interval = NULL; interval = g_hash_table_lookup(action->meta, XML_LRM_ATTR_INTERVAL); /* make sure probes and recurring monitors go through */ if (safe_str_neq(action->task, RSC_STATUS) && interval == NULL) { crm_trace("action %d (%s) was for an unmanaged resource (%s)", action->id, action->uuid, action->rsc->id); return FALSE; } } if (is_set(action->flags, pe_action_pseudo) || safe_str_eq(action->task, CRM_OP_FENCE) || safe_str_eq(action->task, CRM_OP_SHUTDOWN)) { /* skip the next checks */ return TRUE; } if (action->node == NULL) { pe_err("action %d (%s) was not allocated", action->id, action->uuid); log_action(LOG_DEBUG, "Unallocated action", action, FALSE); return FALSE; } else if (action->node->details->online == FALSE) { pe_err("action %d was (%s) scheduled for offline node", action->id, action->uuid); log_action(LOG_DEBUG, "Action for offline node", action, FALSE); return FALSE; #if 0 /* but this would also affect resources that can be safely * migrated before a fencing op */ } else if (action->node->details->unclean == FALSE) { pe_err("action %d was (%s) scheduled for unclean node", action->id, action->uuid); log_action(LOG_DEBUG, "Action for unclean node", action, FALSE); return FALSE; #endif } return TRUE; } /* lowest to highest */ static gint sort_action_id(gconstpointer a, gconstpointer b) { const action_wrapper_t *action_wrapper2 = (const action_wrapper_t *)a; const action_wrapper_t *action_wrapper1 = (const action_wrapper_t *)b; if (a == NULL) { return 1; } if (b == NULL) { return -1; } if (action_wrapper1->action->id > action_wrapper2->action->id) { return -1; } if (action_wrapper1->action->id < action_wrapper2->action->id) { return 1; } return 0; } static gboolean check_dump_input(int last_action, action_t * action, action_wrapper_t * wrapper) { int type = wrapper->type; if (wrapper->state == pe_link_dumped) { return TRUE; } else if (wrapper->state == pe_link_dup) { return FALSE; } type &= ~pe_order_implies_first_printed; type &= ~pe_order_implies_then_printed; type &= ~pe_order_optional; if (wrapper->action->node && action->rsc && action->rsc->fillers && is_not_set(type, pe_order_preserve) && wrapper->action->node->details->remote_rsc && uber_parent(action->rsc) != uber_parent(wrapper->action->rsc) ) { /* This prevents user-defined ordering constraints between * resources in remote nodes and the resources that * define/represent a remote node. * * There is no known valid reason to allow this sort of thing * but if one arises, we'd need to change the * action->rsc->fillers clause to be more specific, possibly * to check that it contained wrapper->action->rsc */ crm_warn("Invalid ordering constraint between %s and %s", wrapper->action->rsc->id, action->rsc->id); wrapper->type = pe_order_none; return FALSE; } if (last_action == wrapper->action->id) { crm_trace("Input (%d) %s duplicated for %s", wrapper->action->id, wrapper->action->uuid, action->uuid); wrapper->state = pe_link_dup; return FALSE; } else if (wrapper->type == pe_order_none) { crm_trace("Input (%d) %s suppressed for %s", wrapper->action->id, wrapper->action->uuid, action->uuid); return FALSE; } else if (is_set(wrapper->action->flags, pe_action_runnable) == FALSE && type == pe_order_none && safe_str_neq(wrapper->action->uuid, CRM_OP_PROBED)) { crm_trace("Input (%d) %s optional (ordering) for %s", wrapper->action->id, wrapper->action->uuid, action->uuid); return FALSE; } else if (is_set(wrapper->action->flags, pe_action_runnable) == FALSE && is_set(type, pe_order_one_or_more)) { crm_trace("Input (%d) %s optional (one-or-more) for %s", wrapper->action->id, wrapper->action->uuid, action->uuid); return FALSE; } else if (is_set(action->flags, pe_action_pseudo) && (wrapper->type & pe_order_stonith_stop)) { crm_trace("Input (%d) %s suppressed for %s", wrapper->action->id, wrapper->action->uuid, action->uuid); return FALSE; } else if ((wrapper->type & pe_order_implies_first_migratable) && (is_set(wrapper->action->flags, pe_action_runnable) == FALSE)) { return FALSE; } else if ((wrapper->type & pe_order_apply_first_non_migratable) && (is_set(wrapper->action->flags, pe_action_migrate_runnable))) { return FALSE; } else if ((wrapper->type == pe_order_optional) && strstr(wrapper->action->uuid, "_stop_0") && is_set(wrapper->action->flags, pe_action_migrate_runnable)) { /* for optional only ordering, ordering is not preserved for * a stop action that is actually involved with a migration. */ return FALSE; } else if (wrapper->type == pe_order_load) { crm_trace("check load filter %s.%s -> %s.%s", wrapper->action->uuid, wrapper->action->node ? wrapper->action->node->details->uname : "", action->uuid, action->node ? action->node->details->uname : ""); if (action->rsc && safe_str_eq(action->task, RSC_MIGRATE)) { /* Remove the orders like the following if not relevant: * "load_stopped_node2" -> "rscA_migrate_to node1" * which were created also from: pengine/native.c: MigrateRsc() * order_actions(other, then, other_w->type); */ /* For migrate_to ops, we care about where it has been * allocated to, not where the action will be executed */ if (wrapper->action->node == NULL || action->rsc->allocated_to == NULL || wrapper->action->node->details != action->rsc->allocated_to->details) { /* Check if the actions are for the same node, ignore otherwise */ crm_trace("load filter - migrate"); wrapper->type = pe_order_none; return FALSE; } } else if (wrapper->action->node == NULL || action->node == NULL || wrapper->action->node->details != action->node->details) { /* Check if the actions are for the same node, ignore otherwise */ crm_trace("load filter - node"); wrapper->type = pe_order_none; return FALSE; } else if (is_set(wrapper->action->flags, pe_action_optional)) { /* Check if the pre-req is optional, ignore if so */ crm_trace("load filter - optional"); wrapper->type = pe_order_none; return FALSE; } } else if (wrapper->type == pe_order_anti_colocation) { crm_trace("check anti-colocation filter %s.%s -> %s.%s", wrapper->action->uuid, wrapper->action->node ? wrapper->action->node->details->uname : "", action->uuid, action->node ? action->node->details->uname : ""); if (wrapper->action->node && action->node && wrapper->action->node->details != action->node->details) { /* Check if the actions are for the same node, ignore otherwise */ crm_trace("anti-colocation filter - node"); wrapper->type = pe_order_none; return FALSE; } else if (is_set(wrapper->action->flags, pe_action_optional)) { /* Check if the pre-req is optional, ignore if so */ crm_trace("anti-colocation filter - optional"); wrapper->type = pe_order_none; return FALSE; } } else if (wrapper->action->rsc && wrapper->action->rsc != action->rsc && is_set(wrapper->action->rsc->flags, pe_rsc_failed) && is_not_set(wrapper->action->rsc->flags, pe_rsc_managed) && strstr(wrapper->action->uuid, "_stop_0") && action->rsc && action->rsc->variant >= pe_clone) { crm_warn("Ignoring requirement that %s complete before %s:" " unmanaged failed resources cannot prevent clone shutdown", wrapper->action->uuid, action->uuid); return FALSE; } else if (is_set(wrapper->action->flags, pe_action_dumped) || should_dump_action(wrapper->action)) { crm_trace("Input (%d) %s should be dumped for %s", wrapper->action->id, wrapper->action->uuid, action->uuid); goto dump; #if 0 } else if (is_set(wrapper->action->flags, pe_action_runnable) && is_set(wrapper->action->flags, pe_action_pseudo) && wrapper->action->rsc->variant != pe_native) { crm_crit("Input (%d) %s should be dumped for %s", wrapper->action->id, wrapper->action->uuid, action->uuid); goto dump; #endif } else if (is_set(wrapper->action->flags, pe_action_optional) == TRUE && is_set(wrapper->action->flags, pe_action_print_always) == FALSE) { crm_trace("Input (%d) %s optional for %s", wrapper->action->id, wrapper->action->uuid, action->uuid); crm_trace("Input (%d) %s n=%p p=%d r=%d o=%d a=%d f=0x%.6x", wrapper->action->id, wrapper->action->uuid, wrapper->action->node, is_set(wrapper->action->flags, pe_action_pseudo), is_set(wrapper->action->flags, pe_action_runnable), is_set(wrapper->action->flags, pe_action_optional), is_set(wrapper->action->flags, pe_action_print_always), wrapper->type); return FALSE; } dump: return TRUE; } static gboolean graph_has_loop(action_t * init_action, action_t * action, action_wrapper_t * wrapper) { GListPtr lpc = NULL; gboolean has_loop = FALSE; if (is_set(wrapper->action->flags, pe_action_tracking)) { crm_trace("Breaking tracking loop: %s.%s -> %s.%s (0x%.6x)", wrapper->action->uuid, wrapper->action->node ? wrapper->action->node->details->uname : "", action->uuid, action->node ? action->node->details->uname : "", wrapper->type); return FALSE; } if (check_dump_input(-1, action, wrapper) == FALSE) { return FALSE; } /* If there's any order like: * "rscB_stop node2"-> "load_stopped_node2" -> "rscA_migrate_to node1" * rscA is being migrated from node1 to node2, * while rscB is being migrated from node2 to node1. * There will be potential graph loop. * Break the order "load_stopped_node2" -> "rscA_migrate_to node1". */ crm_trace("Checking graph loop: %s.%s -> %s.%s (0x%.6x)", wrapper->action->uuid, wrapper->action->node ? wrapper->action->node->details->uname : "", action->uuid, action->node ? action->node->details->uname : "", wrapper->type); if (wrapper->action == init_action) { crm_debug("Found graph loop: %s.%s ->...-> %s.%s", action->uuid, action->node ? action->node->details->uname : "", init_action->uuid, init_action->node ? init_action->node->details->uname : ""); return TRUE; } set_bit(wrapper->action->flags, pe_action_tracking); for (lpc = wrapper->action->actions_before; lpc != NULL; lpc = lpc->next) { action_wrapper_t *wrapper_before = (action_wrapper_t *) lpc->data; if (graph_has_loop(init_action, wrapper->action, wrapper_before)) { has_loop = TRUE; goto done; } } done: clear_bit(wrapper->action->flags, pe_action_tracking); return has_loop; } static gboolean should_dump_input(int last_action, action_t * action, action_wrapper_t * wrapper) { wrapper->state = pe_link_not_dumped; if (check_dump_input(last_action, action, wrapper) == FALSE) { return FALSE; } if (wrapper->type == pe_order_load && action->rsc && safe_str_eq(action->task, RSC_MIGRATE)) { crm_trace("Checking graph loop - load migrate: %s.%s -> %s.%s", wrapper->action->uuid, wrapper->action->node ? wrapper->action->node->details->uname : "", action->uuid, action->node ? action->node->details->uname : ""); if (graph_has_loop(action, action, wrapper)) { /* Remove the orders like the following if they are introducing any graph loops: * "load_stopped_node2" -> "rscA_migrate_to node1" * which were created also from: pengine/native.c: MigrateRsc() * order_actions(other, then, other_w->type); */ crm_debug("Breaking graph loop - load migrate: %s.%s -> %s.%s", wrapper->action->uuid, wrapper->action->node ? wrapper->action->node->details->uname : "", action->uuid, action->node ? action->node->details->uname : ""); wrapper->type = pe_order_none; return FALSE; } } crm_trace("Input (%d) %s n=%p p=%d r=%d o=%d a=%d f=0x%.6x dumped for %s", wrapper->action->id, wrapper->action->uuid, wrapper->action->node, is_set(wrapper->action->flags, pe_action_pseudo), is_set(wrapper->action->flags, pe_action_runnable), is_set(wrapper->action->flags, pe_action_optional), is_set(wrapper->action->flags, pe_action_print_always), wrapper->type, action->uuid); return TRUE; } void graph_element_from_action(action_t * action, pe_working_set_t * data_set) { GListPtr lpc = NULL; int last_action = -1; int synapse_priority = 0; xmlNode *syn = NULL; xmlNode *set = NULL; xmlNode *in = NULL; xmlNode *input = NULL; xmlNode *xml_action = NULL; if (should_dump_action(action) == FALSE) { return; } set_bit(action->flags, pe_action_dumped); syn = create_xml_node(data_set->graph, "synapse"); set = create_xml_node(syn, "action_set"); in = create_xml_node(syn, "inputs"); crm_xml_add_int(syn, XML_ATTR_ID, data_set->num_synapse); data_set->num_synapse++; if (action->rsc != NULL) { synapse_priority = action->rsc->priority; } if (action->priority > synapse_priority) { synapse_priority = action->priority; } if (synapse_priority > 0) { crm_xml_add_int(syn, XML_CIB_ATTR_PRIORITY, synapse_priority); } xml_action = action2xml(action, FALSE, data_set); add_node_nocopy(set, crm_element_name(xml_action), xml_action); action->actions_before = g_list_sort(action->actions_before, sort_action_id); for (lpc = action->actions_before; lpc != NULL; lpc = lpc->next) { action_wrapper_t *wrapper = (action_wrapper_t *) lpc->data; if (should_dump_input(last_action, action, wrapper) == FALSE) { continue; } wrapper->state = pe_link_dumped; CRM_CHECK(last_action < wrapper->action->id,; ); last_action = wrapper->action->id; input = create_xml_node(in, "trigger"); xml_action = action2xml(wrapper->action, TRUE, data_set); add_node_nocopy(input, crm_element_name(xml_action), xml_action); } }