diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c index 0f9d8026e7..9e8dd36ea9 100644 --- a/daemons/controld/controld_execd.c +++ b/daemons/controld/controld_execd.c @@ -1,2803 +1,2801 @@ /* * Copyright 2004-2019 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include // lrmd_event_data_t, lrmd_rsc_info_t, etc. #include #include #include #include #include #include #define START_DELAY_THRESHOLD 5 * 60 * 1000 #define MAX_LRM_REG_FAILS 30 #define s_if_plural(i) (((i) == 1)? "" : "s") struct delete_event_s { int rc; const char *rsc; lrm_state_t *lrm_state; }; static gboolean is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id); static gboolean build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list); static gboolean stop_recurring_actions(gpointer key, gpointer value, gpointer user_data); static int delete_rsc_status(lrm_state_t * lrm_state, const char *rsc_id, int call_options, const char *user_name); static lrmd_event_data_t *construct_op(lrm_state_t * lrm_state, xmlNode * rsc_op, const char *rsc_id, const char *operation); static void do_lrm_rsc_op(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *operation, xmlNode * msg, xmlNode * request); void send_direct_ack(const char *to_host, const char *to_sys, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op, const char *rsc_id); static gboolean lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state, int log_level); static int do_update_resource(const char *node_name, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op); static void lrm_connection_destroy(void) { if (is_set(fsa_input_register, R_LRM_CONNECTED)) { crm_crit("Connection to executor failed"); register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL); clear_bit(fsa_input_register, R_LRM_CONNECTED); } else { crm_info("Disconnected from executor"); } } static char * make_stop_id(const char *rsc, int call_id) { return crm_strdup_printf("%s:%d", rsc, call_id); } static void copy_instance_keys(gpointer key, gpointer value, gpointer user_data) { if (strstr(key, CRM_META "_") == NULL) { g_hash_table_replace(user_data, strdup((const char *)key), strdup((const char *)value)); } } static void copy_meta_keys(gpointer key, gpointer value, gpointer user_data) { if (strstr(key, CRM_META "_") != NULL) { g_hash_table_replace(user_data, strdup((const char *)key), strdup((const char *)value)); } } /*! * \internal * \brief Remove a recurring operation from a resource's history * * \param[in,out] history Resource history to modify * \param[in] op Operation to remove * * \return TRUE if the operation was found and removed, FALSE otherwise */ static gboolean history_remove_recurring_op(rsc_history_t *history, const lrmd_event_data_t *op) { GList *iter; for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) { lrmd_event_data_t *existing = iter->data; if ((op->interval_ms == existing->interval_ms) && crm_str_eq(op->rsc_id, existing->rsc_id, TRUE) && safe_str_eq(op->op_type, existing->op_type)) { history->recurring_op_list = g_list_delete_link(history->recurring_op_list, iter); lrmd_free_event(existing); return TRUE; } } return FALSE; } /*! * \internal * \brief Free all recurring operations in resource history * * \param[in,out] history Resource history to modify */ static void history_free_recurring_ops(rsc_history_t *history) { GList *iter; for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) { lrmd_free_event(iter->data); } g_list_free(history->recurring_op_list); history->recurring_op_list = NULL; } /*! * \internal * \brief Free resource history * * \param[in,out] history Resource history to free */ void history_free(gpointer data) { rsc_history_t *history = (rsc_history_t*)data; if (history->stop_params) { g_hash_table_destroy(history->stop_params); } /* Don't need to free history->rsc.id because it's set to history->id */ free(history->rsc.type); free(history->rsc.standard); free(history->rsc.provider); lrmd_free_event(history->failed); lrmd_free_event(history->last); free(history->id); history_free_recurring_ops(history); free(history); } static void update_history_cache(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op) { int target_rc = 0; rsc_history_t *entry = NULL; if (op->rsc_deleted) { crm_debug("Purged history for '%s' after %s", op->rsc_id, op->op_type); delete_rsc_status(lrm_state, op->rsc_id, cib_quorum_override, NULL); return; } if (safe_str_eq(op->op_type, RSC_NOTIFY)) { return; } crm_debug("Updating history for '%s' with %s op", op->rsc_id, op->op_type); entry = g_hash_table_lookup(lrm_state->resource_history, op->rsc_id); if (entry == NULL && rsc) { entry = calloc(1, sizeof(rsc_history_t)); entry->id = strdup(op->rsc_id); g_hash_table_insert(lrm_state->resource_history, entry->id, entry); entry->rsc.id = entry->id; entry->rsc.type = strdup(rsc->type); entry->rsc.standard = strdup(rsc->standard); if (rsc->provider) { entry->rsc.provider = strdup(rsc->provider); } else { entry->rsc.provider = NULL; } } else if (entry == NULL) { crm_info("Resource %s no longer exists, not updating cache", op->rsc_id); return; } entry->last_callid = op->call_id; target_rc = rsc_op_expected_rc(op); if (op->op_status == PCMK_LRM_OP_CANCELLED) { if (op->interval_ms > 0) { crm_trace("Removing cancelled recurring op: " CRM_OP_FMT, op->rsc_id, op->op_type, op->interval_ms); history_remove_recurring_op(entry, op); return; } else { crm_trace("Skipping " CRM_OP_FMT " rc=%d, status=%d", op->rsc_id, op->op_type, op->interval_ms, op->rc, op->op_status); } } else if (did_rsc_op_fail(op, target_rc)) { /* Store failed monitors here, otherwise the block below will cause them * to be forgotten when a stop happens. */ if (entry->failed) { lrmd_free_event(entry->failed); } entry->failed = lrmd_copy_event(op); } else if (op->interval_ms == 0) { if (entry->last) { lrmd_free_event(entry->last); } entry->last = lrmd_copy_event(op); if (op->params && (safe_str_eq(CRMD_ACTION_START, op->op_type) || safe_str_eq("reload", op->op_type) || safe_str_eq(CRMD_ACTION_STATUS, op->op_type))) { if (entry->stop_params) { g_hash_table_destroy(entry->stop_params); } entry->stop_params = crm_str_table_new(); g_hash_table_foreach(op->params, copy_instance_keys, entry->stop_params); } } if (op->interval_ms > 0) { /* Ensure there are no duplicates */ history_remove_recurring_op(entry, op); crm_trace("Adding recurring op: " CRM_OP_FMT, op->rsc_id, op->op_type, op->interval_ms); entry->recurring_op_list = g_list_prepend(entry->recurring_op_list, lrmd_copy_event(op)); } else if (entry->recurring_op_list && safe_str_eq(op->op_type, RSC_STATUS) == FALSE) { crm_trace("Dropping %d recurring ops because of: " CRM_OP_FMT, g_list_length(entry->recurring_op_list), op->rsc_id, op->op_type, op->interval_ms); history_free_recurring_ops(entry); } } /*! * \internal * \brief Send a direct OK ack for a resource task * * \param[in] lrm_state LRM connection * \param[in] input Input message being ack'ed * \param[in] rsc_id ID of affected resource * \param[in] rsc Affected resource (if available) * \param[in] task Operation task being ack'ed * \param[in] ack_host Name of host to send ack to * \param[in] ack_sys IPC system name to ack */ static void send_task_ok_ack(lrm_state_t *lrm_state, ha_msg_input_t *input, const char *rsc_id, lrmd_rsc_info_t *rsc, const char *task, const char *ack_host, const char *ack_sys) { lrmd_event_data_t *op = construct_op(lrm_state, input->xml, rsc_id, task); op->rc = PCMK_OCF_OK; op->op_status = PCMK_LRM_OP_DONE; send_direct_ack(ack_host, ack_sys, rsc, op, rsc_id); lrmd_free_event(op); } static inline const char * op_node_name(lrmd_event_data_t *op) { return op->remote_nodename? op->remote_nodename : fsa_our_uname; } void lrm_op_callback(lrmd_event_data_t * op) { CRM_CHECK(op != NULL, return); switch (op->type) { case lrmd_event_disconnect: if (op->remote_nodename == NULL) { /* If this is the local executor IPC connection, set the right * bits in the controller when the connection goes down. */ lrm_connection_destroy(); } break; case lrmd_event_exec_complete: { lrm_state_t *lrm_state = lrm_state_find(op_node_name(op)); CRM_ASSERT(lrm_state != NULL); process_lrm_event(lrm_state, op, NULL, NULL); } break; default: break; } } /* A_LRM_CONNECT */ void do_lrm_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { /* This only pertains to local executor connections. Remote connections are * handled as resources within the scheduler. Connecting and disconnecting * from remote executor instances is handled differently. */ lrm_state_t *lrm_state = NULL; if(fsa_our_uname == NULL) { return; /* Nothing to do */ } lrm_state = lrm_state_find_or_create(fsa_our_uname); if (lrm_state == NULL) { register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); return; } if (action & A_LRM_DISCONNECT) { if (lrm_state_verify_stopped(lrm_state, cur_state, LOG_INFO) == FALSE) { if (action == A_LRM_DISCONNECT) { crmd_fsa_stall(FALSE); return; } } clear_bit(fsa_input_register, R_LRM_CONNECTED); crm_info("Disconnecting from the executor"); lrm_state_disconnect(lrm_state); lrm_state_reset_tables(lrm_state, FALSE); crm_notice("Disconnected from the executor"); } if (action & A_LRM_CONNECT) { int ret = pcmk_ok; crm_debug("Connecting to the executor"); ret = lrm_state_ipc_connect(lrm_state); if (ret != pcmk_ok) { if (lrm_state->num_lrm_register_fails < MAX_LRM_REG_FAILS) { crm_warn("Failed to connect to the executor %d time%s (%d max)", lrm_state->num_lrm_register_fails, s_if_plural(lrm_state->num_lrm_register_fails), MAX_LRM_REG_FAILS); controld_start_timer(wait_timer); crmd_fsa_stall(FALSE); return; } } if (ret != pcmk_ok) { crm_err("Failed to connect to the executor the max allowed %d time%s", lrm_state->num_lrm_register_fails, s_if_plural(lrm_state->num_lrm_register_fails)); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); return; } set_bit(fsa_input_register, R_LRM_CONNECTED); crm_info("Connection to the executor established"); } if (action & ~(A_LRM_CONNECT | A_LRM_DISCONNECT)) { crm_err("Unexpected action %s in %s", fsa_action2string(action), __FUNCTION__); } } static gboolean lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state, int log_level) { int counter = 0; gboolean rc = TRUE; const char *when = "lrm disconnect"; GHashTableIter gIter; const char *key = NULL; rsc_history_t *entry = NULL; struct recurring_op_s *pending = NULL; crm_debug("Checking for active resources before exit"); if (cur_state == S_TERMINATE) { log_level = LOG_ERR; when = "shutdown"; } else if (is_set(fsa_input_register, R_SHUTDOWN)) { when = "shutdown... waiting"; } if (lrm_state->pending_ops && lrm_state_is_connected(lrm_state) == TRUE) { guint removed = g_hash_table_foreach_remove( lrm_state->pending_ops, stop_recurring_actions, lrm_state); guint nremaining = g_hash_table_size(lrm_state->pending_ops); if (removed || nremaining) { crm_notice("Stopped %u recurring operation%s at %s (%u remaining)", removed, s_if_plural(removed), when, nremaining); } } if (lrm_state->pending_ops) { g_hash_table_iter_init(&gIter, lrm_state->pending_ops); while (g_hash_table_iter_next(&gIter, NULL, (void **)&pending)) { /* Ignore recurring actions in the shutdown calculations */ if (pending->interval_ms == 0) { counter++; } } } if (counter > 0) { do_crm_log(log_level, "%d pending executor operation%s at %s", counter, s_if_plural(counter), when); if (cur_state == S_TERMINATE || !is_set(fsa_input_register, R_SENT_RSC_STOP)) { g_hash_table_iter_init(&gIter, lrm_state->pending_ops); while (g_hash_table_iter_next(&gIter, (gpointer*)&key, (gpointer*)&pending)) { do_crm_log(log_level, "Pending action: %s (%s)", key, pending->op_key); } } else { rc = FALSE; } return rc; } if (lrm_state->resource_history == NULL) { return rc; } if (is_set(fsa_input_register, R_SHUTDOWN)) { /* At this point we're not waiting, we're just shutting down */ when = "shutdown"; } counter = 0; g_hash_table_iter_init(&gIter, lrm_state->resource_history); while (g_hash_table_iter_next(&gIter, NULL, (gpointer*)&entry)) { if (is_rsc_active(lrm_state, entry->id) == FALSE) { continue; } counter++; if (log_level == LOG_ERR) { crm_info("Found %s active at %s", entry->id, when); } else { crm_trace("Found %s active at %s", entry->id, when); } if (lrm_state->pending_ops) { GHashTableIter hIter; g_hash_table_iter_init(&hIter, lrm_state->pending_ops); while (g_hash_table_iter_next(&hIter, (gpointer*)&key, (gpointer*)&pending)) { if (crm_str_eq(entry->id, pending->rsc_id, TRUE)) { crm_notice("%sction %s (%s) incomplete at %s", pending->interval_ms == 0 ? "A" : "Recurring a", key, pending->op_key, when); } } } } if (counter) { crm_err("%d resource%s active at %s", counter, (counter == 1)? " was" : "s were", when); } return rc; } static char * build_parameter_list(const lrmd_event_data_t *op, const struct ra_metadata_s *metadata, xmlNode *result, enum ra_param_flags_e param_type, bool invert_for_xml) { int len = 0; int max = 0; char *list = NULL; GList *iter = NULL; /* Newer resource agents support the "private" parameter attribute to * indicate sensitive parameters. For backward compatibility with older * agents, this list is used if the agent doesn't specify any as "private". */ const char *secure_terms[] = { "password", "passwd", "user", }; if (is_not_set(metadata->ra_flags, ra_uses_private) && (param_type == ra_param_private)) { max = DIMOF(secure_terms); } for (iter = metadata->ra_params; iter != NULL; iter = iter->next) { struct ra_param_s *param = (struct ra_param_s *) iter->data; bool accept = FALSE; if (is_set(param->rap_flags, param_type)) { accept = TRUE; } else if (max) { for (int lpc = 0; lpc < max; lpc++) { if (safe_str_eq(secure_terms[lpc], param->rap_name)) { accept = TRUE; break; } } } if (accept) { int start = len; crm_trace("Attr %s is %s", param->rap_name, ra_param_flag2text(param_type)); len += strlen(param->rap_name) + 2; // include spaces around list = realloc_safe(list, len + 1); // include null terminator // spaces before and after make parsing simpler sprintf(list + start, " %s ", param->rap_name); } else { crm_trace("Rejecting %s for %s", param->rap_name, ra_param_flag2text(param_type)); } if (result && (invert_for_xml? !accept : accept)) { const char *v = g_hash_table_lookup(op->params, param->rap_name); if (v != NULL) { crm_trace("Adding attr %s=%s to the xml result", param->rap_name, v); crm_xml_add(result, param->rap_name, v); } } } return list; } static void append_restart_list(lrmd_event_data_t *op, struct ra_metadata_s *metadata, xmlNode *update, const char *version) { char *list = NULL; char *digest = NULL; xmlNode *restart = NULL; CRM_LOG_ASSERT(op->params != NULL); if (op->interval_ms > 0) { /* monitors are not reloadable */ return; } if (is_set(metadata->ra_flags, ra_supports_reload)) { restart = create_xml_node(NULL, XML_TAG_PARAMS); /* Add any parameters with unique="1" to the "op-force-restart" list. * * (Currently, we abuse "unique=0" to indicate reloadability. This is * nonstandard and should eventually be replaced once the OCF standard * is updated with something better.) */ list = build_parameter_list(op, metadata, restart, ra_param_unique, FALSE); } else { /* Resource does not support reloads */ return; } digest = calculate_operation_digest(restart, version); /* Add "op-force-restart" and "op-restart-digest" to indicate the resource supports reload, * no matter if it actually supports any parameters with unique="1"). */ crm_xml_add(update, XML_LRM_ATTR_OP_RESTART, list? list: ""); crm_xml_add(update, XML_LRM_ATTR_RESTART_DIGEST, digest); crm_trace("%s: %s, %s", op->rsc_id, digest, list); crm_log_xml_trace(restart, "restart digest source"); free_xml(restart); free(digest); free(list); } static void append_secure_list(lrmd_event_data_t *op, struct ra_metadata_s *metadata, xmlNode *update, const char *version) { char *list = NULL; char *digest = NULL; xmlNode *secure = NULL; CRM_LOG_ASSERT(op->params != NULL); /* * To keep XML_LRM_ATTR_OP_SECURE short, we want it to contain the * secure parameters but XML_LRM_ATTR_SECURE_DIGEST to be based on * the insecure ones */ secure = create_xml_node(NULL, XML_TAG_PARAMS); list = build_parameter_list(op, metadata, secure, ra_param_private, TRUE); if (list != NULL) { digest = calculate_operation_digest(secure, version); crm_xml_add(update, XML_LRM_ATTR_OP_SECURE, list); crm_xml_add(update, XML_LRM_ATTR_SECURE_DIGEST, digest); crm_trace("%s: %s, %s", op->rsc_id, digest, list); crm_log_xml_trace(secure, "secure digest source"); } else { crm_trace("%s: no secure parameters", op->rsc_id); } free_xml(secure); free(digest); free(list); } static gboolean build_operation_update(xmlNode * parent, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op, const char *node_name, const char *src) { int target_rc = 0; xmlNode *xml_op = NULL; struct ra_metadata_s *metadata = NULL; const char *caller_version = NULL; lrm_state_t *lrm_state = NULL; if (op == NULL) { return FALSE; } target_rc = rsc_op_expected_rc(op); /* there is a small risk in formerly mixed clusters that it will * be sub-optimal. * * however with our upgrade policy, the update we send should * still be completely supported anyway */ caller_version = g_hash_table_lookup(op->params, XML_ATTR_CRM_VERSION); CRM_LOG_ASSERT(caller_version != NULL); if(caller_version == NULL) { caller_version = CRM_FEATURE_SET; } crm_trace("Building %s operation update with originator version: %s", op->rsc_id, caller_version); xml_op = pcmk__create_history_xml(parent, op, caller_version, target_rc, fsa_our_uname, src, LOG_DEBUG); if (xml_op == NULL) { return TRUE; } if ((rsc == NULL) || (op->params == NULL) || !crm_op_needs_metadata(rsc->standard, op->op_type)) { crm_trace("No digests needed for %s action on %s (params=%p rsc=%p)", op->op_type, op->rsc_id, op->params, rsc); return TRUE; } lrm_state = lrm_state_find(node_name); if (lrm_state == NULL) { crm_warn("Cannot calculate digests for operation " CRM_OP_FMT " because we have no connection to executor for %s", op->rsc_id, op->op_type, op->interval_ms, node_name); return TRUE; } metadata = metadata_cache_get(lrm_state->metadata_cache, rsc); if (metadata == NULL) { /* For now, we always collect resource agent meta-data via a local, * synchronous, direct execution of the agent. This has multiple issues: * the executor should execute agents, not the controller; meta-data for * Pacemaker Remote nodes should be collected on those nodes, not * locally; and the meta-data call shouldn't eat into the timeout of the * real action being performed. * * These issues are planned to be addressed by having the scheduler * schedule a meta-data cache check at the beginning of each transition. * Once that is working, this block will only be a fallback in case the * initial collection fails. */ char *metadata_str = NULL; int rc = lrm_state_get_metadata(lrm_state, rsc->standard, rsc->provider, rsc->type, &metadata_str, 0); if (rc != pcmk_ok) { crm_warn("Failed to get metadata for %s (%s:%s:%s)", rsc->id, rsc->standard, rsc->provider, rsc->type); return TRUE; } metadata = metadata_cache_update(lrm_state->metadata_cache, rsc, metadata_str); free(metadata_str); if (metadata == NULL) { crm_warn("Failed to update metadata for %s (%s:%s:%s)", rsc->id, rsc->standard, rsc->provider, rsc->type); return TRUE; } } #if ENABLE_VERSIONED_ATTRS crm_xml_add(xml_op, XML_ATTR_RA_VERSION, metadata->ra_version); #endif crm_trace("Including additional digests for %s::%s:%s", rsc->standard, rsc->provider, rsc->type); append_restart_list(op, metadata, xml_op, caller_version); append_secure_list(op, metadata, xml_op, caller_version); return TRUE; } static gboolean is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id) { rsc_history_t *entry = NULL; entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); if (entry == NULL || entry->last == NULL) { return FALSE; } crm_trace("Processing %s: %s.%d=%d", rsc_id, entry->last->op_type, entry->last->interval_ms, entry->last->rc); if (entry->last->rc == PCMK_OCF_OK && safe_str_eq(entry->last->op_type, CRMD_ACTION_STOP)) { return FALSE; } else if (entry->last->rc == PCMK_OCF_OK && safe_str_eq(entry->last->op_type, CRMD_ACTION_MIGRATE)) { - /* a stricter check is too complex... - * leave that to the PE - */ + // A stricter check is too complex ... leave that to the scheduler return FALSE; } else if (entry->last->rc == PCMK_OCF_NOT_RUNNING) { return FALSE; } else if ((entry->last->interval_ms == 0) && (entry->last->rc == PCMK_OCF_NOT_CONFIGURED)) { /* Badly configured resources can't be reliably stopped */ return FALSE; } return TRUE; } static gboolean build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list) { GHashTableIter iter; rsc_history_t *entry = NULL; g_hash_table_iter_init(&iter, lrm_state->resource_history); while (g_hash_table_iter_next(&iter, NULL, (void **)&entry)) { GList *gIter = NULL; xmlNode *xml_rsc = create_xml_node(rsc_list, XML_LRM_TAG_RESOURCE); crm_xml_add(xml_rsc, XML_ATTR_ID, entry->id); crm_xml_add(xml_rsc, XML_ATTR_TYPE, entry->rsc.type); crm_xml_add(xml_rsc, XML_AGENT_ATTR_CLASS, entry->rsc.standard); crm_xml_add(xml_rsc, XML_AGENT_ATTR_PROVIDER, entry->rsc.provider); if (entry->last && entry->last->params) { const char *container = g_hash_table_lookup(entry->last->params, CRM_META"_"XML_RSC_ATTR_CONTAINER); if (container) { crm_trace("Resource %s is a part of container resource %s", entry->id, container); crm_xml_add(xml_rsc, XML_RSC_ATTR_CONTAINER, container); } } build_operation_update(xml_rsc, &(entry->rsc), entry->failed, lrm_state->node_name, __FUNCTION__); build_operation_update(xml_rsc, &(entry->rsc), entry->last, lrm_state->node_name, __FUNCTION__); for (gIter = entry->recurring_op_list; gIter != NULL; gIter = gIter->next) { build_operation_update(xml_rsc, &(entry->rsc), gIter->data, lrm_state->node_name, __FUNCTION__); } } return FALSE; } static xmlNode * do_lrm_query_internal(lrm_state_t *lrm_state, int update_flags) { xmlNode *xml_state = NULL; xmlNode *xml_data = NULL; xmlNode *rsc_list = NULL; crm_node_t *peer = NULL; peer = crm_get_peer_full(0, lrm_state->node_name, CRM_GET_PEER_ANY); CRM_CHECK(peer != NULL, return NULL); xml_state = create_node_state_update(peer, update_flags, NULL, __FUNCTION__); if (xml_state == NULL) { return NULL; } xml_data = create_xml_node(xml_state, XML_CIB_TAG_LRM); crm_xml_add(xml_data, XML_ATTR_ID, peer->uuid); rsc_list = create_xml_node(xml_data, XML_LRM_TAG_RESOURCES); /* Build a list of active (not always running) resources */ build_active_RAs(lrm_state, rsc_list); crm_log_xml_trace(xml_state, "Current executor state"); return xml_state; } xmlNode * do_lrm_query(gboolean is_replace, const char *node_name) { lrm_state_t *lrm_state = lrm_state_find(node_name); if (!lrm_state) { crm_err("Could not find executor state for node %s", node_name); return NULL; } return do_lrm_query_internal(lrm_state, node_update_cluster|node_update_peer); } static void notify_deleted(lrm_state_t * lrm_state, ha_msg_input_t * input, const char *rsc_id, int rc) { lrmd_event_data_t *op = NULL; const char *from_sys = crm_element_value(input->msg, F_CRM_SYS_FROM); const char *from_host = crm_element_value(input->msg, F_CRM_HOST_FROM); crm_info("Notifying %s on %s that %s was%s deleted", from_sys, (from_host? from_host : "localhost"), rsc_id, ((rc == pcmk_ok)? "" : " not")); op = construct_op(lrm_state, input->xml, rsc_id, CRMD_ACTION_DELETE); if (rc == pcmk_ok) { op->op_status = PCMK_LRM_OP_DONE; op->rc = PCMK_OCF_OK; } else { op->op_status = PCMK_LRM_OP_ERROR; op->rc = PCMK_OCF_UNKNOWN_ERROR; } send_direct_ack(from_host, from_sys, NULL, op, rsc_id); lrmd_free_event(op); if (safe_str_neq(from_sys, CRM_SYSTEM_TENGINE)) { /* this isn't expected - trigger a new transition */ time_t now = time(NULL); char *now_s = crm_itoa(now); crm_debug("Triggering a refresh after %s deleted %s from the executor", from_sys, rsc_id); update_attr_delegate(fsa_cib_conn, cib_none, XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL, "last-lrm-refresh", now_s, FALSE, NULL, NULL); free(now_s); } } static gboolean lrm_remove_deleted_rsc(gpointer key, gpointer value, gpointer user_data) { struct delete_event_s *event = user_data; struct pending_deletion_op_s *op = value; if (crm_str_eq(event->rsc, op->rsc, TRUE)) { notify_deleted(event->lrm_state, op->input, event->rsc, event->rc); return TRUE; } return FALSE; } static gboolean lrm_remove_deleted_op(gpointer key, gpointer value, gpointer user_data) { const char *rsc = user_data; struct recurring_op_s *pending = value; if (crm_str_eq(rsc, pending->rsc_id, TRUE)) { crm_info("Removing op %s:%d for deleted resource %s", pending->op_key, pending->call_id, rsc); return TRUE; } return FALSE; } /* * Remove the rsc from the CIB * * Avoids refreshing the entire LRM section of this host */ #define RSC_TEMPLATE "//"XML_CIB_TAG_STATE"[@uname='%s']//"XML_LRM_TAG_RESOURCE"[@id='%s']" static int delete_rsc_status(lrm_state_t * lrm_state, const char *rsc_id, int call_options, const char *user_name) { char *rsc_xpath = NULL; int rc = pcmk_ok; CRM_CHECK(rsc_id != NULL, return -ENXIO); rsc_xpath = crm_strdup_printf(RSC_TEMPLATE, lrm_state->node_name, rsc_id); rc = cib_internal_op(fsa_cib_conn, CIB_OP_DELETE, NULL, rsc_xpath, NULL, NULL, call_options | cib_xpath, user_name); free(rsc_xpath); return rc; } static void delete_rsc_entry(lrm_state_t * lrm_state, ha_msg_input_t * input, const char *rsc_id, GHashTableIter * rsc_gIter, int rc, const char *user_name) { struct delete_event_s event; CRM_CHECK(rsc_id != NULL, return); if (rc == pcmk_ok) { char *rsc_id_copy = strdup(rsc_id); if (rsc_gIter) g_hash_table_iter_remove(rsc_gIter); else g_hash_table_remove(lrm_state->resource_history, rsc_id_copy); crm_debug("sync: Sending delete op for %s", rsc_id_copy); delete_rsc_status(lrm_state, rsc_id_copy, cib_quorum_override, user_name); g_hash_table_foreach_remove(lrm_state->pending_ops, lrm_remove_deleted_op, rsc_id_copy); free(rsc_id_copy); } if (input) { notify_deleted(lrm_state, input, rsc_id, rc); } event.rc = rc; event.rsc = rsc_id; event.lrm_state = lrm_state; g_hash_table_foreach_remove(lrm_state->deletion_ops, lrm_remove_deleted_rsc, &event); } /*! * \internal * \brief Erase an LRM history entry from the CIB, given the operation data * * \param[in] lrm_state LRM state of the desired node * \param[in] op Operation whose history should be deleted */ static void erase_lrm_history_by_op(lrm_state_t *lrm_state, lrmd_event_data_t *op) { xmlNode *xml_top = NULL; CRM_CHECK(op != NULL, return); xml_top = create_xml_node(NULL, XML_LRM_TAG_RSC_OP); crm_xml_add_int(xml_top, XML_LRM_ATTR_CALLID, op->call_id); crm_xml_add(xml_top, XML_ATTR_TRANSITION_KEY, op->user_data); if (op->interval_ms > 0) { char *op_id = generate_op_key(op->rsc_id, op->op_type, op->interval_ms); /* Avoid deleting last_failure too (if it was a result of this recurring op failing) */ crm_xml_add(xml_top, XML_ATTR_ID, op_id); free(op_id); } crm_debug("Erasing resource operation history for " CRM_OP_FMT " (call=%d)", op->rsc_id, op->op_type, op->interval_ms, op->call_id); fsa_cib_conn->cmds->remove(fsa_cib_conn, XML_CIB_TAG_STATUS, xml_top, cib_quorum_override); crm_log_xml_trace(xml_top, "op:cancel"); free_xml(xml_top); } /* Define xpath to find LRM resource history entry by node and resource */ #define XPATH_HISTORY \ "/" XML_TAG_CIB "/" XML_CIB_TAG_STATUS \ "/" XML_CIB_TAG_STATE "[@" XML_ATTR_UNAME "='%s']" \ "/" XML_CIB_TAG_LRM "/" XML_LRM_TAG_RESOURCES \ "/" XML_LRM_TAG_RESOURCE "[@" XML_ATTR_ID "='%s']" \ "/" XML_LRM_TAG_RSC_OP /* ... and also by operation key */ #define XPATH_HISTORY_ID XPATH_HISTORY \ "[@" XML_ATTR_ID "='%s']" /* ... and also by operation key and operation call ID */ #define XPATH_HISTORY_CALL XPATH_HISTORY \ "[@" XML_ATTR_ID "='%s' and @" XML_LRM_ATTR_CALLID "='%d']" /* ... and also by operation key and original operation key */ #define XPATH_HISTORY_ORIG XPATH_HISTORY \ "[@" XML_ATTR_ID "='%s' and @" XML_LRM_ATTR_TASK_KEY "='%s']" /*! * \internal * \brief Erase an LRM history entry from the CIB, given operation identifiers * * \param[in] lrm_state LRM state of the node to clear history for * \param[in] rsc_id Name of resource to clear history for * \param[in] key Operation key of operation to clear history for * \param[in] orig_op If specified, delete only if it has this original op * \param[in] call_id If specified, delete entry only if it has this call ID */ static void erase_lrm_history_by_id(lrm_state_t *lrm_state, const char *rsc_id, const char *key, const char *orig_op, int call_id) { char *op_xpath = NULL; CRM_CHECK((rsc_id != NULL) && (key != NULL), return); if (call_id > 0) { op_xpath = crm_strdup_printf(XPATH_HISTORY_CALL, lrm_state->node_name, rsc_id, key, call_id); } else if (orig_op) { op_xpath = crm_strdup_printf(XPATH_HISTORY_ORIG, lrm_state->node_name, rsc_id, key, orig_op); } else { op_xpath = crm_strdup_printf(XPATH_HISTORY_ID, lrm_state->node_name, rsc_id, key); } crm_debug("Erasing resource operation history for %s on %s (call=%d)", key, rsc_id, call_id); fsa_cib_conn->cmds->remove(fsa_cib_conn, op_xpath, NULL, cib_quorum_override | cib_xpath); free(op_xpath); } static inline gboolean last_failed_matches_op(rsc_history_t *entry, const char *op, guint interval_ms) { if (entry == NULL) { return FALSE; } if (op == NULL) { return TRUE; } return (safe_str_eq(op, entry->failed->op_type) && (interval_ms == entry->failed->interval_ms)); } /*! * \internal * \brief Clear a resource's last failure * * Erase a resource's last failure on a particular node from both the * LRM resource history in the CIB, and the resource history remembered * for the LRM state. * * \param[in] rsc_id Resource name * \param[in] node_name Node name * \param[in] operation If specified, only clear if matching this operation * \param[in] interval_ms If operation is specified, it has this interval */ void lrm_clear_last_failure(const char *rsc_id, const char *node_name, const char *operation, guint interval_ms) { char *op_key = NULL; char *orig_op_key = NULL; lrm_state_t *lrm_state = NULL; lrm_state = lrm_state_find(node_name); if (lrm_state == NULL) { return; } /* Erase from CIB */ op_key = generate_op_key(rsc_id, "last_failure", 0); if (operation) { orig_op_key = generate_op_key(rsc_id, operation, interval_ms); } erase_lrm_history_by_id(lrm_state, rsc_id, op_key, orig_op_key, 0); free(op_key); free(orig_op_key); /* Remove from memory */ if (lrm_state->resource_history) { rsc_history_t *entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); if (last_failed_matches_op(entry, operation, interval_ms)) { lrmd_free_event(entry->failed); entry->failed = NULL; } } } /* Returns: gboolean - cancellation is in progress */ static gboolean cancel_op(lrm_state_t * lrm_state, const char *rsc_id, const char *key, int op, gboolean remove) { int rc = pcmk_ok; char *local_key = NULL; struct recurring_op_s *pending = NULL; CRM_CHECK(op != 0, return FALSE); CRM_CHECK(rsc_id != NULL, return FALSE); if (key == NULL) { local_key = make_stop_id(rsc_id, op); key = local_key; } pending = g_hash_table_lookup(lrm_state->pending_ops, key); if (pending) { if (remove && pending->remove == FALSE) { pending->remove = TRUE; crm_debug("Scheduling %s for removal", key); } if (pending->cancelled) { crm_debug("Operation %s already cancelled", key); free(local_key); return FALSE; } pending->cancelled = TRUE; } else { crm_info("No pending op found for %s", key); free(local_key); return FALSE; } crm_debug("Cancelling op %d for %s (%s)", op, rsc_id, key); rc = lrm_state_cancel(lrm_state, pending->rsc_id, pending->op_type, pending->interval_ms); if (rc == pcmk_ok) { crm_debug("Op %d for %s (%s): cancelled", op, rsc_id, key); free(local_key); return TRUE; } crm_debug("Op %d for %s (%s): Nothing to cancel", op, rsc_id, key); /* The caller needs to make sure the entry is * removed from the pending_ops list * * Usually by returning TRUE inside the worker function * supplied to g_hash_table_foreach_remove() * * Not removing the entry from pending_ops will block * the node from shutting down */ free(local_key); return FALSE; } struct cancel_data { gboolean done; gboolean remove; const char *key; lrmd_rsc_info_t *rsc; lrm_state_t *lrm_state; }; static gboolean cancel_action_by_key(gpointer key, gpointer value, gpointer user_data) { gboolean remove = FALSE; struct cancel_data *data = user_data; struct recurring_op_s *op = (struct recurring_op_s *)value; if (crm_str_eq(op->op_key, data->key, TRUE)) { data->done = TRUE; remove = !cancel_op(data->lrm_state, data->rsc->id, key, op->call_id, data->remove); } return remove; } static gboolean cancel_op_key(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *key, gboolean remove) { guint removed = 0; struct cancel_data data; CRM_CHECK(rsc != NULL, return FALSE); CRM_CHECK(key != NULL, return FALSE); data.key = key; data.rsc = rsc; data.done = FALSE; data.remove = remove; data.lrm_state = lrm_state; removed = g_hash_table_foreach_remove(lrm_state->pending_ops, cancel_action_by_key, &data); crm_trace("Removed %u op cache entries, new size: %u", removed, g_hash_table_size(lrm_state->pending_ops)); return data.done; } /*! * \internal * \brief Retrieve resource information from LRM * * \param[in] lrm_state LRM connection to use * \param[in] rsc_xml XML containing resource configuration * \param[in] do_create If true, register resource with LRM if not already * \param[out] rsc_info Where to store resource information obtained from LRM * * \retval pcmk_ok Success (and rsc_info holds newly allocated result) * \retval -EINVAL Required information is missing from arguments * \retval -ENOTCONN No active connection to LRM * \retval -ENODEV Resource not found * \retval -errno Error communicating with executor when registering resource * * \note Caller is responsible for freeing result on success. */ static int get_lrm_resource(lrm_state_t *lrm_state, xmlNode *rsc_xml, gboolean do_create, lrmd_rsc_info_t **rsc_info) { const char *id = ID(rsc_xml); CRM_CHECK(lrm_state && rsc_xml && rsc_info, return -EINVAL); CRM_CHECK(id, return -EINVAL); if (lrm_state_is_connected(lrm_state) == FALSE) { return -ENOTCONN; } crm_trace("Retrieving resource information for %s from the executor", id); *rsc_info = lrm_state_get_rsc_info(lrm_state, id, 0); // If resource isn't known by ID, try clone name, if provided if (!*rsc_info) { const char *long_id = crm_element_value(rsc_xml, XML_ATTR_ID_LONG); if (long_id) { *rsc_info = lrm_state_get_rsc_info(lrm_state, long_id, 0); } } if ((*rsc_info == NULL) && do_create) { const char *class = crm_element_value(rsc_xml, XML_AGENT_ATTR_CLASS); const char *provider = crm_element_value(rsc_xml, XML_AGENT_ATTR_PROVIDER); const char *type = crm_element_value(rsc_xml, XML_ATTR_TYPE); int rc; crm_trace("Registering resource %s with the executor", id); rc = lrm_state_register_rsc(lrm_state, id, class, provider, type, lrmd_opt_drop_recurring); if (rc != pcmk_ok) { fsa_data_t *msg_data = NULL; crm_err("Could not register resource %s with the executor on %s: %s " CRM_XS " rc=%d", id, lrm_state->node_name, pcmk_strerror(rc), rc); /* Register this as an internal error if this involves the local * executor. Otherwise, we're likely dealing with an unresponsive * remote node, which is not an FSA failure. */ if (lrm_state_is_local(lrm_state) == TRUE) { register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } return rc; } *rsc_info = lrm_state_get_rsc_info(lrm_state, id, 0); } return *rsc_info? pcmk_ok : -ENODEV; } static void delete_resource(lrm_state_t * lrm_state, const char *id, lrmd_rsc_info_t * rsc, GHashTableIter * gIter, const char *sys, const char *host, const char *user, ha_msg_input_t * request, gboolean unregister) { int rc = pcmk_ok; crm_info("Removing resource %s for %s (%s) on %s", id, sys, user ? user : "internal", host); if (rsc && unregister) { rc = lrm_state_unregister_rsc(lrm_state, id, 0); } if (rc == pcmk_ok) { crm_trace("Resource '%s' deleted", id); } else if (rc == -EINPROGRESS) { crm_info("Deletion of resource '%s' pending", id); if (request) { struct pending_deletion_op_s *op = NULL; char *ref = crm_element_value_copy(request->msg, XML_ATTR_REFERENCE); op = calloc(1, sizeof(struct pending_deletion_op_s)); op->rsc = strdup(rsc->id); op->input = copy_ha_msg_input(request); g_hash_table_insert(lrm_state->deletion_ops, ref, op); } return; } else { crm_warn("Deletion of resource '%s' for %s (%s) on %s failed: %d", id, sys, user ? user : "internal", host, rc); } delete_rsc_entry(lrm_state, request, id, gIter, rc, user); } static int get_fake_call_id(lrm_state_t *lrm_state, const char *rsc_id) { int call_id = 999999999; rsc_history_t *entry = NULL; if(lrm_state) { entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); } /* Make sure the call id is greater than the last successful operation, * otherwise the failure will not result in a possible recovery of the resource * as it could appear the failure occurred before the successful start */ if (entry) { call_id = entry->last_callid + 1; } if (call_id < 0) { call_id = 1; } return call_id; } static void fake_op_status(lrm_state_t *lrm_state, lrmd_event_data_t *op, int op_status, enum ocf_exitcode op_exitcode) { op->call_id = get_fake_call_id(lrm_state, op->rsc_id); op->t_run = time(NULL); op->t_rcchange = op->t_run; op->op_status = op_status; op->rc = op_exitcode; } static void force_reprobe(lrm_state_t *lrm_state, const char *from_sys, const char *from_host, const char *user_name, gboolean is_remote_node) { GHashTableIter gIter; rsc_history_t *entry = NULL; crm_info("Clearing resource history on node %s", lrm_state->node_name); g_hash_table_iter_init(&gIter, lrm_state->resource_history); while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) { /* only unregister the resource during a reprobe if it is not a remote connection * resource. otherwise unregistering the connection will terminate remote-node * membership */ gboolean unregister = TRUE; if (is_remote_lrmd_ra(NULL, NULL, entry->id)) { lrm_state_t *remote_lrm_state = lrm_state_find(entry->id); if (remote_lrm_state) { /* when forcing a reprobe, make sure to clear remote node before * clearing the remote node's connection resource */ force_reprobe(remote_lrm_state, from_sys, from_host, user_name, TRUE); } unregister = FALSE; } delete_resource(lrm_state, entry->id, &entry->rsc, &gIter, from_sys, from_host, user_name, NULL, unregister); } /* Now delete the copy in the CIB */ erase_status_tag(lrm_state->node_name, XML_CIB_TAG_LRM, cib_scope_local); /* Finally, _delete_ the value in pacemaker-attrd -- setting it to FALSE * would result in the scheduler sending us back here again */ update_attrd(lrm_state->node_name, CRM_OP_PROBED, NULL, user_name, is_remote_node); } /*! * \internal * \brief Fail a requested action without actually executing it * * For an action that can't be executed, process it similarly to an actual * execution result, with specified error status (except for notify actions, * which will always be treated as successful). * * \param[in] lrm_state Executor connection that action is for * \param[in] action Action XML from request * \param[in] rc Desired return code to use * \param[in] op_status Desired operation status to use */ static void synthesize_lrmd_failure(lrm_state_t *lrm_state, xmlNode *action, int op_status, enum ocf_exitcode rc) { lrmd_event_data_t *op = NULL; const char *operation = crm_element_value(action, XML_LRM_ATTR_TASK); const char *target_node = crm_element_value(action, XML_LRM_ATTR_TARGET); xmlNode *xml_rsc = find_xml_node(action, XML_CIB_TAG_RESOURCE, TRUE); if ((xml_rsc == NULL) || (ID(xml_rsc) == NULL)) { /* @TODO Should we do something else, like direct ack? */ crm_info("Can't fake %s failure (%d) on %s without resource configuration", crm_element_value(action, XML_LRM_ATTR_TASK_KEY), rc, target_node); return; } else if(operation == NULL) { /* This probably came from crm_resource -C, nothing to do */ crm_info("Can't fake %s failure (%d) on %s without operation", ID(xml_rsc), rc, target_node); return; } op = construct_op(lrm_state, action, ID(xml_rsc), operation); if (safe_str_eq(operation, RSC_NOTIFY)) { // Notifications can't fail fake_op_status(lrm_state, op, PCMK_LRM_OP_DONE, PCMK_OCF_OK); } else { fake_op_status(lrm_state, op, op_status, rc); } crm_info("Faking " CRM_OP_FMT " result (%d) on %s", op->rsc_id, op->op_type, op->interval_ms, op->rc, target_node); // Process the result as if it came from the LRM process_lrm_event(lrm_state, op, NULL, action); lrmd_free_event(op); } /*! * \internal * \brief Get target of an LRM operation * * \param[in] xml LRM operation data XML * * \return LRM operation target node name (local node or Pacemaker Remote node) */ static const char * lrm_op_target(xmlNode *xml) { const char *target = NULL; if (xml) { target = crm_element_value(xml, XML_LRM_ATTR_TARGET); } if (target == NULL) { target = fsa_our_uname; } return target; } static void fail_lrm_resource(xmlNode *xml, lrm_state_t *lrm_state, const char *user_name, const char *from_host, const char *from_sys) { lrmd_event_data_t *op = NULL; lrmd_rsc_info_t *rsc = NULL; xmlNode *xml_rsc = find_xml_node(xml, XML_CIB_TAG_RESOURCE, TRUE); CRM_CHECK(xml_rsc != NULL, return); /* The executor simply executes operations and reports the results, without * any concept of success or failure, so to fail a resource, we must fake * what a failure looks like. * * To do this, we create a fake executor operation event for the resource, * and pass that event to the executor client callback so it will be * processed as if it came from the executor. */ op = construct_op(lrm_state, xml, ID(xml_rsc), "asyncmon"); fake_op_status(lrm_state, op, PCMK_LRM_OP_DONE, PCMK_OCF_UNKNOWN_ERROR); free((char*) op->user_data); op->user_data = NULL; op->interval_ms = 0; #if ENABLE_ACL if (user_name && is_privileged(user_name) == FALSE) { crm_err("%s does not have permission to fail %s", user_name, ID(xml_rsc)); send_direct_ack(from_host, from_sys, NULL, op, ID(xml_rsc)); lrmd_free_event(op); return; } #endif if (get_lrm_resource(lrm_state, xml_rsc, TRUE, &rsc) == pcmk_ok) { crm_info("Failing resource %s...", rsc->id); op->exit_reason = strdup("Simulated failure"); process_lrm_event(lrm_state, op, NULL, xml); op->op_status = PCMK_LRM_OP_DONE; op->rc = PCMK_OCF_OK; lrmd_free_rsc_info(rsc); } else { crm_info("Cannot find/create resource in order to fail it..."); crm_log_xml_warn(xml, "bad input"); } send_direct_ack(from_host, from_sys, NULL, op, ID(xml_rsc)); lrmd_free_event(op); } static void handle_refresh_op(lrm_state_t *lrm_state, const char *user_name, const char *from_host, const char *from_sys) { int rc = pcmk_ok; xmlNode *fragment = do_lrm_query_internal(lrm_state, node_update_all); fsa_cib_update(XML_CIB_TAG_STATUS, fragment, cib_quorum_override, rc, user_name); crm_info("Forced a local resource history refresh: call=%d", rc); if (safe_str_neq(CRM_SYSTEM_CRMD, from_sys)) { xmlNode *reply = create_request(CRM_OP_INVOKE_LRM, fragment, from_host, from_sys, CRM_SYSTEM_LRMD, fsa_our_uuid); crm_debug("ACK'ing refresh from %s (%s)", from_sys, from_host); if (relay_message(reply, TRUE) == FALSE) { crm_log_xml_err(reply, "Unable to route reply"); } free_xml(reply); } free_xml(fragment); } static void handle_query_op(xmlNode *msg, lrm_state_t *lrm_state) { xmlNode *data = do_lrm_query_internal(lrm_state, node_update_all); xmlNode *reply = create_reply(msg, data); if (relay_message(reply, TRUE) == FALSE) { crm_err("Unable to route reply"); crm_log_xml_err(reply, "reply"); } free_xml(reply); free_xml(data); } static void handle_reprobe_op(lrm_state_t *lrm_state, const char *from_sys, const char *from_host, const char *user_name, gboolean is_remote_node) { crm_notice("Forcing the status of all resources to be redetected"); force_reprobe(lrm_state, from_sys, from_host, user_name, is_remote_node); if (safe_str_neq(CRM_SYSTEM_PENGINE, from_sys) && safe_str_neq(CRM_SYSTEM_TENGINE, from_sys)) { xmlNode *reply = create_request(CRM_OP_INVOKE_LRM, NULL, from_host, from_sys, CRM_SYSTEM_LRMD, fsa_our_uuid); crm_debug("ACK'ing re-probe from %s (%s)", from_sys, from_host); if (relay_message(reply, TRUE) == FALSE) { crm_log_xml_err(reply, "Unable to route reply"); } free_xml(reply); } } static bool do_lrm_cancel(ha_msg_input_t *input, lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, const char *from_host, const char *from_sys) { char *op_key = NULL; char *meta_key = NULL; int call = 0; const char *call_id = NULL; const char *op_task = NULL; const char *interval_ms_s = NULL; gboolean in_progress = FALSE; xmlNode *params = find_xml_node(input->xml, XML_TAG_ATTRS, TRUE); CRM_CHECK(params != NULL, return FALSE); meta_key = crm_meta_name(XML_LRM_ATTR_INTERVAL_MS); interval_ms_s = crm_element_value(params, meta_key); free(meta_key); CRM_CHECK(interval_ms_s != NULL, return FALSE); meta_key = crm_meta_name(XML_LRM_ATTR_TASK); op_task = crm_element_value(params, meta_key); free(meta_key); CRM_CHECK(op_task != NULL, return FALSE); meta_key = crm_meta_name(XML_LRM_ATTR_CALLID); call_id = crm_element_value(params, meta_key); free(meta_key); op_key = generate_op_key(rsc->id, op_task, crm_parse_ms(interval_ms_s)); crm_debug("Scheduler requested op %s (call=%s) be cancelled", op_key, (call_id? call_id : "NA")); call = crm_parse_int(call_id, "0"); if (call == 0) { // Normal case when the scheduler cancels a recurring op in_progress = cancel_op_key(lrm_state, rsc, op_key, TRUE); } else { // Normal case when the scheduler cancels an orphan op in_progress = cancel_op(lrm_state, rsc->id, NULL, call, TRUE); } // Acknowledge cancellation operation if for a remote connection resource if (!in_progress || is_remote_lrmd_ra(NULL, NULL, rsc->id)) { char *op_id = make_stop_id(rsc->id, call); if (is_remote_lrmd_ra(NULL, NULL, rsc->id) == FALSE) { crm_info("Nothing known about operation %d for %s", call, op_key); } erase_lrm_history_by_id(lrm_state, rsc->id, op_key, NULL, call); send_task_ok_ack(lrm_state, input, rsc->id, rsc, op_task, from_host, from_sys); /* needed at least for cancellation of a remote operation */ g_hash_table_remove(lrm_state->pending_ops, op_id); free(op_id); } else { /* No ack is needed since abcdaa8, but peers with older versions * in a rolling upgrade need one. We didn't bump the feature set * at that commit, so we can only compare against the previous * CRM version (3.0.8). If any peers have feature set 3.0.9 but * not abcdaa8, they will time out waiting for the ack (no * released versions of Pacemaker are affected). */ const char *peer_version = crm_element_value(params, XML_ATTR_CRM_VERSION); if (compare_version(peer_version, "3.0.8") <= 0) { crm_info("Sending compatibility ack for %s cancellation to %s (CRM version %s)", op_key, from_host, peer_version); send_task_ok_ack(lrm_state, input, rsc->id, rsc, op_task, from_host, from_sys); } } free(op_key); return TRUE; } static void do_lrm_delete(ha_msg_input_t *input, lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, const char *from_sys, const char *from_host, bool crm_rsc_delete, const char *user_name) { gboolean unregister = TRUE; #if ENABLE_ACL int cib_rc = delete_rsc_status(lrm_state, rsc->id, cib_dryrun|cib_sync_call, user_name); if (cib_rc != pcmk_ok) { lrmd_event_data_t *op = NULL; crm_err("Could not delete resource status of %s for %s (user %s) on %s: %s" CRM_XS " rc=%d", rsc->id, from_sys, (user_name? user_name : "unknown"), from_host, pcmk_strerror(cib_rc), cib_rc); op = construct_op(lrm_state, input->xml, rsc->id, CRMD_ACTION_DELETE); op->op_status = PCMK_LRM_OP_ERROR; if (cib_rc == -EACCES) { op->rc = PCMK_OCF_INSUFFICIENT_PRIV; } else { op->rc = PCMK_OCF_UNKNOWN_ERROR; } send_direct_ack(from_host, from_sys, NULL, op, rsc->id); lrmd_free_event(op); return; } #endif if (crm_rsc_delete && is_remote_lrmd_ra(NULL, NULL, rsc->id)) { unregister = FALSE; } delete_resource(lrm_state, rsc->id, rsc, NULL, from_sys, from_host, user_name, input, unregister); } /* A_LRM_INVOKE */ void do_lrm_invoke(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { lrm_state_t *lrm_state = NULL; const char *crm_op = NULL; const char *from_sys = NULL; const char *from_host = NULL; const char *operation = NULL; ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); const char *user_name = NULL; const char *target_node = NULL; gboolean is_remote_node = FALSE; bool crm_rsc_delete = FALSE; target_node = lrm_op_target(input->xml); is_remote_node = safe_str_neq(target_node, fsa_our_uname); lrm_state = lrm_state_find(target_node); if ((lrm_state == NULL) && is_remote_node) { crm_err("Failing action because local node has never had connection to remote node %s", target_node); synthesize_lrmd_failure(NULL, input->xml, PCMK_LRM_OP_NOT_CONNECTED, PCMK_OCF_UNKNOWN_ERROR); return; } CRM_ASSERT(lrm_state != NULL); #if ENABLE_ACL user_name = crm_acl_get_set_user(input->msg, F_CRM_USER, NULL); crm_trace("Executor command from user '%s'", user_name); #endif crm_op = crm_element_value(input->msg, F_CRM_TASK); from_sys = crm_element_value(input->msg, F_CRM_SYS_FROM); if (safe_str_neq(from_sys, CRM_SYSTEM_TENGINE)) { from_host = crm_element_value(input->msg, F_CRM_HOST_FROM); } crm_trace("Executor %s command from %s", crm_op, from_sys); if (safe_str_eq(crm_op, CRM_OP_LRM_DELETE)) { crm_rsc_delete = TRUE; // Only crm_resource uses this op operation = CRMD_ACTION_DELETE; } else if (safe_str_eq(crm_op, CRM_OP_LRM_FAIL)) { fail_lrm_resource(input->xml, lrm_state, user_name, from_host, from_sys); return; } else if (input->xml != NULL) { operation = crm_element_value(input->xml, XML_LRM_ATTR_TASK); } if (safe_str_eq(crm_op, CRM_OP_LRM_REFRESH)) { handle_refresh_op(lrm_state, user_name, from_host, from_sys); } else if (safe_str_eq(crm_op, CRM_OP_LRM_QUERY)) { handle_query_op(input->msg, lrm_state); } else if (safe_str_eq(operation, CRM_OP_PROBED)) { update_attrd(lrm_state->node_name, CRM_OP_PROBED, XML_BOOLEAN_TRUE, user_name, is_remote_node); } else if (safe_str_eq(operation, CRM_OP_REPROBE) || safe_str_eq(crm_op, CRM_OP_REPROBE)) { handle_reprobe_op(lrm_state, from_sys, from_host, user_name, is_remote_node); } else if (operation != NULL) { lrmd_rsc_info_t *rsc = NULL; xmlNode *xml_rsc = find_xml_node(input->xml, XML_CIB_TAG_RESOURCE, TRUE); gboolean create_rsc = safe_str_neq(operation, CRMD_ACTION_DELETE); int rc; // We can't return anything meaningful without a resource ID CRM_CHECK(xml_rsc && ID(xml_rsc), return); rc = get_lrm_resource(lrm_state, xml_rsc, create_rsc, &rsc); if (rc == -ENOTCONN) { synthesize_lrmd_failure(lrm_state, input->xml, PCMK_LRM_OP_NOT_CONNECTED, PCMK_OCF_UNKNOWN_ERROR); return; } else if ((rc < 0) && !create_rsc) { /* Delete of malformed or nonexistent resource * (deleting something that does not exist is a success) */ crm_notice("Not registering resource '%s' for a %s event " CRM_XS " get-rc=%d (%s) transition-key=%s", ID(xml_rsc), operation, rc, pcmk_strerror(rc), ID(input->xml)); delete_rsc_entry(lrm_state, input, ID(xml_rsc), NULL, pcmk_ok, user_name); send_task_ok_ack(lrm_state, input, ID(xml_rsc), NULL, operation, from_host, from_sys); return; } else if (rc == -EINVAL) { // Resource operation on malformed resource crm_err("Invalid resource definition for %s", ID(xml_rsc)); crm_log_xml_warn(input->msg, "invalid resource"); synthesize_lrmd_failure(lrm_state, input->xml, PCMK_LRM_OP_ERROR, PCMK_OCF_NOT_CONFIGURED); // fatal error return; } else if (rc < 0) { // Error communicating with the executor crm_err("Could not register resource '%s' with executor: %s " CRM_XS " rc=%d", ID(xml_rsc), pcmk_strerror(rc), rc); crm_log_xml_warn(input->msg, "failed registration"); synthesize_lrmd_failure(lrm_state, input->xml, PCMK_LRM_OP_ERROR, PCMK_OCF_INVALID_PARAM); // hard error return; } if (safe_str_eq(operation, CRMD_ACTION_CANCEL)) { if (!do_lrm_cancel(input, lrm_state, rsc, from_host, from_sys)) { crm_log_xml_warn(input->xml, "Bad command"); } } else if (safe_str_eq(operation, CRMD_ACTION_DELETE)) { do_lrm_delete(input, lrm_state, rsc, from_sys, from_host, crm_rsc_delete, user_name); } else { do_lrm_rsc_op(lrm_state, rsc, operation, input->xml, input->msg); } lrmd_free_rsc_info(rsc); } else { crm_err("Cannot perform operation %s of unknown type", crm_str(crm_op)); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } static lrmd_event_data_t * construct_op(lrm_state_t * lrm_state, xmlNode * rsc_op, const char *rsc_id, const char *operation) { lrmd_event_data_t *op = NULL; const char *op_delay = NULL; const char *op_timeout = NULL; const char *interval_ms_s = NULL; GHashTable *params = NULL; const char *transition = NULL; CRM_ASSERT(rsc_id && operation); op = calloc(1, sizeof(lrmd_event_data_t)); CRM_ASSERT(op != NULL); op->type = lrmd_event_exec_complete; op->op_type = strdup(operation); op->op_status = PCMK_LRM_OP_PENDING; op->rc = -1; op->rsc_id = strdup(rsc_id); op->interval_ms = 0; op->timeout = 0; op->start_delay = 0; if (rsc_op == NULL) { CRM_LOG_ASSERT(safe_str_eq(CRMD_ACTION_STOP, operation)); op->user_data = NULL; /* the stop_all_resources() case * by definition there is no DC (or they'd be shutting * us down). * So we should put our version here. */ op->params = crm_str_table_new(); g_hash_table_insert(op->params, strdup(XML_ATTR_CRM_VERSION), strdup(CRM_FEATURE_SET)); crm_trace("Constructed %s op for %s", operation, rsc_id); return op; } params = xml2list(rsc_op); g_hash_table_remove(params, CRM_META "_op_target_rc"); op_delay = crm_meta_value(params, XML_OP_ATTR_START_DELAY); op_timeout = crm_meta_value(params, XML_ATTR_TIMEOUT); interval_ms_s = crm_meta_value(params, XML_LRM_ATTR_INTERVAL_MS); op->interval_ms = crm_parse_ms(interval_ms_s); op->timeout = crm_parse_int(op_timeout, "0"); op->start_delay = crm_parse_int(op_delay, "0"); #if ENABLE_VERSIONED_ATTRS // Resolve any versioned parameters if (lrm_state && safe_str_neq(op->op_type, RSC_METADATA) && safe_str_neq(op->op_type, CRMD_ACTION_DELETE) && !is_remote_lrmd_ra(NULL, NULL, rsc_id)) { // Resource info *should* already be cached, so we don't get executor call lrmd_rsc_info_t *rsc = lrm_state_get_rsc_info(lrm_state, rsc_id, 0); struct ra_metadata_s *metadata; metadata = metadata_cache_get(lrm_state->metadata_cache, rsc); if (metadata) { xmlNode *versioned_attrs = NULL; GHashTable *hash = NULL; char *key = NULL; char *value = NULL; GHashTableIter iter; versioned_attrs = first_named_child(rsc_op, XML_TAG_OP_VER_ATTRS); hash = pe_unpack_versioned_parameters(versioned_attrs, metadata->ra_version); g_hash_table_iter_init(&iter, hash); while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &value)) { g_hash_table_iter_steal(&iter); g_hash_table_replace(params, key, value); } g_hash_table_destroy(hash); versioned_attrs = first_named_child(rsc_op, XML_TAG_OP_VER_META); hash = pe_unpack_versioned_parameters(versioned_attrs, metadata->ra_version); g_hash_table_iter_init(&iter, hash); while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &value)) { g_hash_table_replace(params, crm_meta_name(key), strdup(value)); if (safe_str_eq(key, XML_ATTR_TIMEOUT)) { op->timeout = crm_parse_int(value, "0"); } else if (safe_str_eq(key, XML_OP_ATTR_START_DELAY)) { op->start_delay = crm_parse_int(value, "0"); } } g_hash_table_destroy(hash); versioned_attrs = first_named_child(rsc_op, XML_TAG_RSC_VER_ATTRS); hash = pe_unpack_versioned_parameters(versioned_attrs, metadata->ra_version); g_hash_table_iter_init(&iter, hash); while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &value)) { g_hash_table_iter_steal(&iter); g_hash_table_replace(params, key, value); } g_hash_table_destroy(hash); } lrmd_free_rsc_info(rsc); } #endif if (safe_str_neq(operation, RSC_STOP)) { op->params = params; } else { rsc_history_t *entry = NULL; if (lrm_state) { entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); } /* If we do not have stop parameters cached, use * whatever we are given */ if (!entry || !entry->stop_params) { op->params = params; } else { /* Copy the cached parameter list so that we stop the resource * with the old attributes, not the new ones */ op->params = crm_str_table_new(); g_hash_table_foreach(params, copy_meta_keys, op->params); g_hash_table_foreach(entry->stop_params, copy_instance_keys, op->params); g_hash_table_destroy(params); params = NULL; } } /* sanity */ if (op->timeout <= 0) { op->timeout = op->interval_ms; } if (op->start_delay < 0) { op->start_delay = 0; } transition = crm_element_value(rsc_op, XML_ATTR_TRANSITION_KEY); CRM_CHECK(transition != NULL, return op); op->user_data = strdup(transition); if (op->interval_ms != 0) { if (safe_str_eq(operation, CRMD_ACTION_START) || safe_str_eq(operation, CRMD_ACTION_STOP)) { crm_err("Start and Stop actions cannot have an interval: %u", op->interval_ms); op->interval_ms = 0; } } crm_trace("Constructed %s op for %s: interval=%u", operation, rsc_id, op->interval_ms); return op; } void send_direct_ack(const char *to_host, const char *to_sys, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op, const char *rsc_id) { xmlNode *reply = NULL; xmlNode *update, *iter; crm_node_t *peer = NULL; CRM_CHECK(op != NULL, return); if (op->rsc_id == NULL) { CRM_ASSERT(rsc_id != NULL); op->rsc_id = strdup(rsc_id); } if (to_sys == NULL) { to_sys = CRM_SYSTEM_TENGINE; } peer = crm_get_peer(0, fsa_our_uname); update = create_node_state_update(peer, node_update_none, NULL, __FUNCTION__); iter = create_xml_node(update, XML_CIB_TAG_LRM); crm_xml_add(iter, XML_ATTR_ID, fsa_our_uuid); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE); crm_xml_add(iter, XML_ATTR_ID, op->rsc_id); build_operation_update(iter, rsc, op, fsa_our_uname, __FUNCTION__); reply = create_request(CRM_OP_INVOKE_LRM, update, to_host, to_sys, CRM_SYSTEM_LRMD, NULL); crm_log_xml_trace(update, "ACK Update"); crm_debug("ACK'ing resource op " CRM_OP_FMT " from %s: %s", op->rsc_id, op->op_type, op->interval_ms, op->user_data, crm_element_value(reply, XML_ATTR_REFERENCE)); if (relay_message(reply, TRUE) == FALSE) { crm_log_xml_err(reply, "Unable to route reply"); } free_xml(update); free_xml(reply); } gboolean verify_stopped(enum crmd_fsa_state cur_state, int log_level) { gboolean res = TRUE; GList *lrm_state_list = lrm_state_get_list(); GList *state_entry; for (state_entry = lrm_state_list; state_entry != NULL; state_entry = state_entry->next) { lrm_state_t *lrm_state = state_entry->data; if (!lrm_state_verify_stopped(lrm_state, cur_state, log_level)) { /* keep iterating through all even when false is returned */ res = FALSE; } } set_bit(fsa_input_register, R_SENT_RSC_STOP); g_list_free(lrm_state_list); lrm_state_list = NULL; return res; } struct stop_recurring_action_s { lrmd_rsc_info_t *rsc; lrm_state_t *lrm_state; }; static gboolean stop_recurring_action_by_rsc(gpointer key, gpointer value, gpointer user_data) { gboolean remove = FALSE; struct stop_recurring_action_s *event = user_data; struct recurring_op_s *op = (struct recurring_op_s *)value; if ((op->interval_ms != 0) && crm_str_eq(op->rsc_id, event->rsc->id, TRUE)) { crm_debug("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id, (char*)key); remove = !cancel_op(event->lrm_state, event->rsc->id, key, op->call_id, FALSE); } return remove; } static gboolean stop_recurring_actions(gpointer key, gpointer value, gpointer user_data) { gboolean remove = FALSE; lrm_state_t *lrm_state = user_data; struct recurring_op_s *op = (struct recurring_op_s *)value; if (op->interval_ms != 0) { crm_info("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id, (const char *) key); remove = !cancel_op(lrm_state, op->rsc_id, key, op->call_id, FALSE); } return remove; } static void record_pending_op(const char *node_name, lrmd_rsc_info_t *rsc, lrmd_event_data_t *op) { const char *record_pending = NULL; CRM_CHECK(node_name != NULL, return); CRM_CHECK(rsc != NULL, return); CRM_CHECK(op != NULL, return); // Never record certain operation types as pending if ((op->op_type == NULL) || (op->params == NULL) || !controld_action_is_recordable(op->op_type)) { return; } // defaults to true record_pending = crm_meta_value(op->params, XML_OP_ATTR_PENDING); if (record_pending && !crm_is_true(record_pending)) { return; } op->call_id = -1; op->op_status = PCMK_LRM_OP_PENDING; op->rc = PCMK_OCF_UNKNOWN; op->t_run = time(NULL); op->t_rcchange = op->t_run; /* write a "pending" entry to the CIB, inhibit notification */ crm_debug("Recording pending op " CRM_OP_FMT " on %s in the CIB", op->rsc_id, op->op_type, op->interval_ms, node_name); do_update_resource(node_name, rsc, op); } static void do_lrm_rsc_op(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *operation, xmlNode * msg, xmlNode * request) { int call_id = 0; char *op_id = NULL; lrmd_event_data_t *op = NULL; lrmd_key_value_t *params = NULL; fsa_data_t *msg_data = NULL; const char *transition = NULL; gboolean stop_recurring = FALSE; bool send_nack = FALSE; CRM_CHECK(rsc != NULL, return); CRM_CHECK(operation != NULL, return); if (msg != NULL) { transition = crm_element_value(msg, XML_ATTR_TRANSITION_KEY); if (transition == NULL) { crm_log_xml_err(msg, "Missing transition number"); } } op = construct_op(lrm_state, msg, rsc->id, operation); CRM_CHECK(op != NULL, return); if (is_remote_lrmd_ra(NULL, NULL, rsc->id) && (op->interval_ms == 0) && strcmp(operation, CRMD_ACTION_MIGRATE) == 0) { /* pcmk remote connections are a special use case. * We never ever want to stop monitoring a connection resource until * the entire migration has completed. If the connection is unexpectedly * severed, even during a migration, this is an event we must detect.*/ stop_recurring = FALSE; } else if ((op->interval_ms == 0) && strcmp(operation, CRMD_ACTION_STATUS) != 0 && strcmp(operation, CRMD_ACTION_NOTIFY) != 0) { /* stop any previous monitor operations before changing the resource state */ stop_recurring = TRUE; } if (stop_recurring == TRUE) { guint removed = 0; struct stop_recurring_action_s data; data.rsc = rsc; data.lrm_state = lrm_state; removed = g_hash_table_foreach_remove( lrm_state->pending_ops, stop_recurring_action_by_rsc, &data); if (removed) { crm_debug("Stopped %u recurring operation%s in preparation for " CRM_OP_FMT, removed, s_if_plural(removed), rsc->id, operation, op->interval_ms); } } /* now do the op */ crm_info("Performing key=%s op=" CRM_OP_FMT, transition, rsc->id, operation, op->interval_ms); if (is_set(fsa_input_register, R_SHUTDOWN) && safe_str_eq(operation, RSC_START)) { register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL); send_nack = TRUE; } else if (fsa_state != S_NOT_DC && fsa_state != S_POLICY_ENGINE /* Recalculating */ && fsa_state != S_TRANSITION_ENGINE && safe_str_neq(operation, CRMD_ACTION_STOP)) { send_nack = TRUE; } if(send_nack) { crm_notice("Discarding attempt to perform action %s on %s in state %s (shutdown=%s)", operation, rsc->id, fsa_state2string(fsa_state), is_set(fsa_input_register, R_SHUTDOWN)?"true":"false"); op->rc = PCMK_OCF_UNKNOWN_ERROR; op->op_status = PCMK_LRM_OP_INVALID; send_direct_ack(NULL, NULL, rsc, op, rsc->id); lrmd_free_event(op); free(op_id); return; } record_pending_op(lrm_state->node_name, rsc, op); op_id = generate_op_key(rsc->id, op->op_type, op->interval_ms); if (op->interval_ms > 0) { /* cancel it so we can then restart it without conflict */ cancel_op_key(lrm_state, rsc, op_id, FALSE); } if (op->params) { char *key = NULL; char *value = NULL; GHashTableIter iter; g_hash_table_iter_init(&iter, op->params); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) { params = lrmd_key_value_add(params, key, value); } } call_id = lrm_state_exec(lrm_state, rsc->id, op->op_type, op->user_data, op->interval_ms, op->timeout, op->start_delay, params); if (call_id <= 0 && lrm_state_is_local(lrm_state)) { crm_err("Operation %s on %s failed: %d", operation, rsc->id, call_id); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } else if (call_id <= 0) { crm_err("Operation %s on resource %s failed to execute on remote node %s: %d", operation, rsc->id, lrm_state->node_name, call_id); fake_op_status(lrm_state, op, PCMK_LRM_OP_DONE, PCMK_OCF_UNKNOWN_ERROR); process_lrm_event(lrm_state, op, NULL, NULL); } else { /* record all operations so we can wait * for them to complete during shutdown */ char *call_id_s = make_stop_id(rsc->id, call_id); struct recurring_op_s *pending = NULL; pending = calloc(1, sizeof(struct recurring_op_s)); crm_trace("Recording pending op: %d - %s %s", call_id, op_id, call_id_s); pending->call_id = call_id; pending->interval_ms = op->interval_ms; pending->op_type = strdup(operation); pending->op_key = strdup(op_id); pending->rsc_id = strdup(rsc->id); pending->start_time = time(NULL); pending->user_data = op->user_data? strdup(op->user_data) : NULL; g_hash_table_replace(lrm_state->pending_ops, call_id_s, pending); if ((op->interval_ms > 0) && (op->start_delay > START_DELAY_THRESHOLD)) { int target_rc = 0; crm_info("Faking confirmation of %s: execution postponed for over 5 minutes", op_id); decode_transition_key(op->user_data, NULL, NULL, NULL, &target_rc); op->rc = target_rc; op->op_status = PCMK_LRM_OP_DONE; send_direct_ack(NULL, NULL, rsc, op, rsc->id); } pending->params = op->params; op->params = NULL; } free(op_id); lrmd_free_event(op); return; } int last_resource_update = 0; static void cib_rsc_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { switch (rc) { case pcmk_ok: case -pcmk_err_diff_failed: case -pcmk_err_diff_resync: crm_trace("Resource update %d complete: rc=%d", call_id, rc); break; default: crm_warn("Resource update %d failed: (rc=%d) %s", call_id, rc, pcmk_strerror(rc)); } if (call_id == last_resource_update) { last_resource_update = 0; trigger_fsa(fsa_source); } } static int do_update_resource(const char *node_name, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op) { /* */ int rc = pcmk_ok; xmlNode *update, *iter = NULL; int call_opt = crmd_cib_smart_opt(); const char *uuid = NULL; CRM_CHECK(op != NULL, return 0); iter = create_xml_node(iter, XML_CIB_TAG_STATUS); update = iter; iter = create_xml_node(iter, XML_CIB_TAG_STATE); if (safe_str_eq(node_name, fsa_our_uname)) { uuid = fsa_our_uuid; } else { /* remote nodes uuid and uname are equal */ uuid = node_name; crm_xml_add(iter, XML_NODE_IS_REMOTE, "true"); } CRM_LOG_ASSERT(uuid != NULL); if(uuid == NULL) { rc = -EINVAL; goto done; } crm_xml_add(iter, XML_ATTR_UUID, uuid); crm_xml_add(iter, XML_ATTR_UNAME, node_name); crm_xml_add(iter, XML_ATTR_ORIGIN, __FUNCTION__); iter = create_xml_node(iter, XML_CIB_TAG_LRM); crm_xml_add(iter, XML_ATTR_ID, uuid); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE); crm_xml_add(iter, XML_ATTR_ID, op->rsc_id); build_operation_update(iter, rsc, op, node_name, __FUNCTION__); if (rsc) { const char *container = NULL; crm_xml_add(iter, XML_ATTR_TYPE, rsc->type); crm_xml_add(iter, XML_AGENT_ATTR_CLASS, rsc->standard); crm_xml_add(iter, XML_AGENT_ATTR_PROVIDER, rsc->provider); if (op->params) { container = g_hash_table_lookup(op->params, CRM_META"_"XML_RSC_ATTR_CONTAINER); } if (container) { crm_trace("Resource %s is a part of container resource %s", op->rsc_id, container); crm_xml_add(iter, XML_RSC_ATTR_CONTAINER, container); } } else { crm_warn("Resource %s no longer exists in the executor", op->rsc_id); send_direct_ack(NULL, NULL, rsc, op, op->rsc_id); goto cleanup; } crm_log_xml_trace(update, __FUNCTION__); /* make it an asynchronous call and be done with it * * Best case: * the resource state will be discovered during * the next signup or election. * * Bad case: * we are shutting down and there is no DC at the time, * but then why were we shutting down then anyway? * (probably because of an internal error) * * Worst case: * we get shot for having resources "running" that really weren't * * the alternative however means blocking here for too long, which * isn't acceptable */ fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, rc, NULL); if (rc > 0) { last_resource_update = rc; } done: /* the return code is a call number, not an error code */ crm_trace("Sent resource state update message: %d for %s=%u on %s", rc, op->op_type, op->interval_ms, op->rsc_id); fsa_register_cib_callback(rc, FALSE, NULL, cib_rsc_callback); cleanup: free_xml(update); return rc; } void do_lrm_event(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t * msg_data) { CRM_CHECK(FALSE, return); } static char * unescape_newlines(const char *string) { char *pch = NULL; char *ret = NULL; static const char *escaped_newline = "\\n"; if (!string) { return NULL; } ret = strdup(string); pch = strstr(ret, escaped_newline); while (pch != NULL) { /* Replace newline escape pattern with actual newline (and a space so we * don't have to shuffle the rest of the buffer) */ pch[0] = '\n'; pch[1] = ' '; pch = strstr(pch, escaped_newline); } return ret; } static bool did_lrm_rsc_op_fail(lrm_state_t *lrm_state, const char * rsc_id, const char * op_type, guint interval_ms) { rsc_history_t *entry = NULL; CRM_CHECK(lrm_state != NULL, return FALSE); CRM_CHECK(rsc_id != NULL, return FALSE); CRM_CHECK(op_type != NULL, return FALSE); entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); if (entry == NULL || entry->failed == NULL) { return FALSE; } if (crm_str_eq(entry->failed->rsc_id, rsc_id, TRUE) && safe_str_eq(entry->failed->op_type, op_type) && entry->failed->interval_ms == interval_ms) { return TRUE; } return FALSE; } void process_lrm_event(lrm_state_t *lrm_state, lrmd_event_data_t *op, struct recurring_op_s *pending, xmlNode *action_xml) { char *op_id = NULL; char *op_key = NULL; int update_id = 0; gboolean remove = FALSE; gboolean removed = FALSE; bool need_direct_ack = FALSE; lrmd_rsc_info_t *rsc = NULL; const char *node_name = NULL; CRM_CHECK(op != NULL, return); CRM_CHECK(op->rsc_id != NULL, return); // Remap new status codes for older DCs if (compare_version(fsa_our_dc_version, "3.2.0") < 0) { switch (op->op_status) { case PCMK_LRM_OP_NOT_CONNECTED: op->op_status = PCMK_LRM_OP_ERROR; op->rc = PCMK_OCF_CONNECTION_DIED; break; case PCMK_LRM_OP_INVALID: op->op_status = PCMK_LRM_OP_ERROR; op->rc = CRM_DIRECT_NACK_RC; break; default: break; } } op_id = make_stop_id(op->rsc_id, op->call_id); op_key = generate_op_key(op->rsc_id, op->op_type, op->interval_ms); // Get resource info if available (from executor state or action XML) if (lrm_state) { rsc = lrm_state_get_rsc_info(lrm_state, op->rsc_id, 0); } if ((rsc == NULL) && action_xml) { xmlNode *xml = find_xml_node(action_xml, XML_CIB_TAG_RESOURCE, TRUE); const char *standard = crm_element_value(xml, XML_AGENT_ATTR_CLASS); const char *provider = crm_element_value(xml, XML_AGENT_ATTR_PROVIDER); const char *type = crm_element_value(xml, XML_ATTR_TYPE); if (standard && type) { crm_info("%s agent information not cached, using %s%s%s:%s from action XML", op->rsc_id, standard, (provider? ":" : ""), (provider? provider : ""), type); rsc = lrmd_new_rsc_info(op->rsc_id, standard, provider, type); } else { crm_err("Can't process %s result because %s agent information not cached or in XML", op_key, op->rsc_id); } } // Get node name if available (from executor state or action XML) if (lrm_state) { node_name = lrm_state->node_name; } else if (action_xml) { node_name = crm_element_value(action_xml, XML_LRM_ATTR_TARGET); } if(pending == NULL) { remove = TRUE; if (lrm_state) { pending = g_hash_table_lookup(lrm_state->pending_ops, op_id); } } if (op->op_status == PCMK_LRM_OP_ERROR) { switch(op->rc) { case PCMK_OCF_NOT_RUNNING: case PCMK_OCF_RUNNING_MASTER: case PCMK_OCF_DEGRADED: case PCMK_OCF_DEGRADED_MASTER: // Leave it to the TE/scheduler to decide if this is an error op->op_status = PCMK_LRM_OP_DONE; break; default: /* Nothing to do */ break; } } if (op->op_status != PCMK_LRM_OP_CANCELLED) { /* We might not record the result, so directly acknowledge it to the * originator instead, so it doesn't time out waiting for the result * (especially important if part of a transition). */ need_direct_ack = TRUE; if (controld_action_is_recordable(op->op_type)) { if (node_name && rsc) { // We should record the result, and happily, we can update_id = do_update_resource(node_name, rsc, op); need_direct_ack = FALSE; } else if (op->rsc_deleted) { /* We shouldn't record the result (likely the resource was * refreshed, cleaned, or removed while this operation was * in flight). */ crm_notice("Not recording %s result in CIB because " "resource information was removed since it was initiated", op_key); } else { /* This shouldn't be possible; the executor didn't consider the * resource deleted, but we couldn't find resource or node * information. */ crm_err("Unable to record %s result in CIB: %s", op_key, (node_name? "No resource information" : "No node name")); } } } else if (op->interval_ms == 0) { /* A non-recurring operation was cancelled. Most likely, the * never-initiated action was removed from the executor's pending * operations list upon resource removal. */ need_direct_ack = TRUE; } else if (pending == NULL) { /* This recurring operation was cancelled, but was not pending. No * transition actions are waiting on it, nothing needs to be done. */ } else if (op->user_data == NULL) { /* This recurring operation was cancelled and pending, but we don't * have a transition key. This should never happen. */ crm_err("Recurring operation %s was cancelled without transition information", op_key); } else if (pending->remove) { /* This recurring operation was cancelled (by us) and pending, and we * have been waiting for it to finish. */ if (lrm_state) { erase_lrm_history_by_op(lrm_state, op); } /* If the recurring operation had failed, the lrm_rsc_op is recorded as * "last_failure" which won't get erased from the cib given the logic on * purpose in erase_lrm_history_by_op(). So that the cancel action won't * have a chance to get confirmed by DC with process_op_deletion(). * Cluster transition would get stuck waiting for the remaining action * timer to time out. * * Directly acknowledge the cancel operation in this case. */ if (did_lrm_rsc_op_fail(lrm_state, pending->rsc_id, pending->op_type, pending->interval_ms)) { need_direct_ack = TRUE; } } else if (op->rsc_deleted) { /* This recurring operation was cancelled (but not by us, and the * executor does not have resource information, likely due to resource * cleanup, refresh, or removal) and pending. */ crm_debug("Recurring op %s was cancelled due to resource deletion", op_key); need_direct_ack = TRUE; } else { /* This recurring operation was cancelled (but not by us, likely by the * executor before stopping the resource) and pending. We don't need to * do anything special. */ } if (need_direct_ack) { send_direct_ack(NULL, NULL, NULL, op, op->rsc_id); } if(remove == FALSE) { /* The caller will do this afterwards, but keep the logging consistent */ removed = TRUE; } else if (lrm_state && ((op->interval_ms == 0) || (op->op_status == PCMK_LRM_OP_CANCELLED))) { gboolean found = g_hash_table_remove(lrm_state->pending_ops, op_id); if (op->interval_ms != 0) { removed = TRUE; } else if (found) { removed = TRUE; crm_trace("Op %s (call=%d, stop-id=%s, remaining=%u): Confirmed", op_key, op->call_id, op_id, g_hash_table_size(lrm_state->pending_ops)); } } if (node_name == NULL) { node_name = "unknown node"; // for logging } switch (op->op_status) { case PCMK_LRM_OP_CANCELLED: crm_info("Result of %s operation for %s on %s: %s " CRM_XS " call=%d key=%s confirmed=%s", crm_action_str(op->op_type, op->interval_ms), op->rsc_id, node_name, services_lrm_status_str(op->op_status), op->call_id, op_key, (removed? "true" : "false")); break; case PCMK_LRM_OP_DONE: crm_notice("Result of %s operation for %s on %s: %d (%s) " CRM_XS " call=%d key=%s confirmed=%s cib-update=%d", crm_action_str(op->op_type, op->interval_ms), op->rsc_id, node_name, op->rc, services_ocf_exitcode_str(op->rc), op->call_id, op_key, (removed? "true" : "false"), update_id); break; case PCMK_LRM_OP_TIMEOUT: crm_err("Result of %s operation for %s on %s: %s " CRM_XS " call=%d key=%s timeout=%dms", crm_action_str(op->op_type, op->interval_ms), op->rsc_id, node_name, services_lrm_status_str(op->op_status), op->call_id, op_key, op->timeout); break; default: crm_err("Result of %s operation for %s on %s: %s " CRM_XS " call=%d key=%s confirmed=%s status=%d cib-update=%d", crm_action_str(op->op_type, op->interval_ms), op->rsc_id, node_name, services_lrm_status_str(op->op_status), op->call_id, op_key, (removed? "true" : "false"), op->op_status, update_id); } if (op->output) { char *prefix = crm_strdup_printf("%s-" CRM_OP_FMT ":%d", node_name, op->rsc_id, op->op_type, op->interval_ms, op->call_id); if (op->rc) { crm_log_output(LOG_NOTICE, prefix, op->output); } else { crm_log_output(LOG_DEBUG, prefix, op->output); } free(prefix); } if (lrm_state) { if (safe_str_neq(op->op_type, RSC_METADATA)) { crmd_alert_resource_op(lrm_state->node_name, op); } else if (rsc && (op->rc == PCMK_OCF_OK)) { char *metadata = unescape_newlines(op->output); metadata_cache_update(lrm_state->metadata_cache, rsc, metadata); free(metadata); } } if (op->rsc_deleted) { crm_info("Deletion of resource '%s' complete after %s", op->rsc_id, op_key); if (lrm_state) { delete_rsc_entry(lrm_state, NULL, op->rsc_id, NULL, pcmk_ok, NULL); } } /* If a shutdown was escalated while operations were pending, * then the FSA will be stalled right now... allow it to continue */ mainloop_set_trigger(fsa_source); if (lrm_state && rsc) { update_history_cache(lrm_state, rsc, op); } lrmd_free_rsc_info(rsc); free(op_key); free(op_id); } diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c index 0933487b76..d9b1e1ecf7 100644 --- a/daemons/controld/controld_fencing.c +++ b/daemons/controld/controld_fencing.c @@ -1,954 +1,954 @@ /* * Copyright 2004-2019 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include static void tengine_stonith_history_synced(stonith_t *st, stonith_event_t *st_event); /* * stonith failure counting * * We don't want to get stuck in a permanent fencing loop. Keep track of the * number of fencing failures for each target node, and the most we'll restart a * transition for. */ struct st_fail_rec { int count; }; static bool fence_reaction_panic = FALSE; static unsigned long int stonith_max_attempts = 10; static GHashTable *stonith_failures = NULL; void update_stonith_max_attempts(const char *value) { if (safe_str_eq(value, CRM_INFINITY_S)) { stonith_max_attempts = CRM_SCORE_INFINITY; } else { stonith_max_attempts = crm_int_helper(value, NULL); } } void set_fence_reaction(const char *reaction_s) { if (safe_str_eq(reaction_s, "panic")) { fence_reaction_panic = TRUE; } else { if (safe_str_neq(reaction_s, "stop")) { crm_warn("Invalid value '%s' for %s, using 'stop'", reaction_s, XML_CONFIG_ATTR_FENCE_REACTION); } fence_reaction_panic = FALSE; } } static gboolean too_many_st_failures(const char *target) { GHashTableIter iter; const char *key = NULL; struct st_fail_rec *value = NULL; if (stonith_failures == NULL) { return FALSE; } if (target == NULL) { g_hash_table_iter_init(&iter, stonith_failures); while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &value)) { if (value->count >= stonith_max_attempts) { target = (const char*)key; goto too_many; } } } else { value = g_hash_table_lookup(stonith_failures, target); if ((value != NULL) && (value->count >= stonith_max_attempts)) { goto too_many; } } return FALSE; too_many: crm_warn("Too many failures (%d) to fence %s, giving up", value->count, target); return TRUE; } /*! * \internal * \brief Reset a stonith fail count * * \param[in] target Name of node to reset, or NULL for all */ void st_fail_count_reset(const char *target) { if (stonith_failures == NULL) { return; } if (target) { struct st_fail_rec *rec = NULL; rec = g_hash_table_lookup(stonith_failures, target); if (rec) { rec->count = 0; } } else { GHashTableIter iter; const char *key = NULL; struct st_fail_rec *rec = NULL; g_hash_table_iter_init(&iter, stonith_failures); while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &rec)) { rec->count = 0; } } } static void st_fail_count_increment(const char *target) { struct st_fail_rec *rec = NULL; if (stonith_failures == NULL) { stonith_failures = crm_str_table_new(); } rec = g_hash_table_lookup(stonith_failures, target); if (rec) { rec->count++; } else { rec = malloc(sizeof(struct st_fail_rec)); if(rec == NULL) { return; } rec->count = 1; g_hash_table_insert(stonith_failures, strdup(target), rec); } } /* end stonith fail count functions */ static void cib_fencing_updated(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) { if (rc < pcmk_ok) { crm_err("Fencing update %d for %s: failed - %s (%d)", call_id, (char *)user_data, pcmk_strerror(rc), rc); crm_log_xml_warn(msg, "Failed update"); abort_transition(INFINITY, tg_shutdown, "CIB update failed", NULL); } else { crm_info("Fencing update %d for %s: complete", call_id, (char *)user_data); } } static void send_stonith_update(crm_action_t *action, const char *target, const char *uuid) { int rc = pcmk_ok; crm_node_t *peer = NULL; /* We (usually) rely on the membership layer to do node_update_cluster, * and the peer status callback to do node_update_peer, because the node * might have already rejoined before we get the stonith result here. */ int flags = node_update_join | node_update_expected; /* zero out the node-status & remove all LRM status info */ xmlNode *node_state = NULL; CRM_CHECK(target != NULL, return); CRM_CHECK(uuid != NULL, return); /* Make sure the membership and join caches are accurate */ peer = crm_get_peer_full(0, target, CRM_GET_PEER_ANY); CRM_CHECK(peer != NULL, return); if (peer->state == NULL) { /* Usually, we rely on the membership layer to update the cluster state * in the CIB. However, if the node has never been seen, do it here, so * the node is not considered unclean. */ flags |= node_update_cluster; } if (peer->uuid == NULL) { crm_info("Recording uuid '%s' for node '%s'", uuid, target); peer->uuid = strdup(uuid); } crmd_peer_down(peer, TRUE); /* Generate a node state update for the CIB */ node_state = create_node_state_update(peer, flags, NULL, __FUNCTION__); /* we have to mark whether or not remote nodes have already been fenced */ if (peer->flags & crm_remote_node) { time_t now = time(NULL); char *now_s = crm_itoa(now); crm_xml_add(node_state, XML_NODE_IS_FENCED, now_s); free(now_s); } /* Force our known ID */ crm_xml_add(node_state, XML_ATTR_UUID, uuid); rc = fsa_cib_conn->cmds->update(fsa_cib_conn, XML_CIB_TAG_STATUS, node_state, cib_quorum_override | cib_scope_local | cib_can_create); /* Delay processing the trigger until the update completes */ crm_debug("Sending fencing update %d for %s", rc, target); fsa_register_cib_callback(rc, FALSE, strdup(target), cib_fencing_updated); /* Make sure it sticks */ /* fsa_cib_conn->cmds->bump_epoch(fsa_cib_conn, cib_quorum_override|cib_scope_local); */ erase_status_tag(peer->uname, XML_CIB_TAG_LRM, cib_scope_local); erase_status_tag(peer->uname, XML_TAG_TRANSIENT_NODEATTRS, cib_scope_local); free_xml(node_state); return; } /*! * \internal * \brief Abort transition due to stonith failure * * \param[in] abort_action Whether to restart or stop transition * \param[in] target Don't restart if this (NULL for any) has too many failures * \param[in] reason Log this stonith action XML as abort reason (or NULL) */ static void abort_for_stonith_failure(enum transition_action abort_action, const char *target, xmlNode *reason) { /* If stonith repeatedly fails, we eventually give up on starting a new * transition for that reason. */ if ((abort_action != tg_stop) && too_many_st_failures(target)) { abort_action = tg_stop; } abort_transition(INFINITY, abort_action, "Stonith failed", reason); } /* * stonith cleanup list * * If the DC is shot, proper notifications might not go out. * The stonith cleanup list allows the cluster to (re-)send * notifications once a new DC is elected. */ static GListPtr stonith_cleanup_list = NULL; /*! * \internal * \brief Add a node to the stonith cleanup list * * \param[in] target Name of node to add */ void add_stonith_cleanup(const char *target) { stonith_cleanup_list = g_list_append(stonith_cleanup_list, strdup(target)); } /*! * \internal * \brief Remove a node from the stonith cleanup list * * \param[in] Name of node to remove */ void remove_stonith_cleanup(const char *target) { GListPtr iter = stonith_cleanup_list; while (iter != NULL) { GListPtr tmp = iter; char *iter_name = tmp->data; iter = iter->next; if (safe_str_eq(target, iter_name)) { crm_trace("Removing %s from the cleanup list", iter_name); stonith_cleanup_list = g_list_delete_link(stonith_cleanup_list, tmp); free(iter_name); } } } /*! * \internal * \brief Purge all entries from the stonith cleanup list */ void purge_stonith_cleanup() { if (stonith_cleanup_list) { GListPtr iter = NULL; for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) { char *target = iter->data; crm_info("Purging %s from stonith cleanup list", target); free(target); } g_list_free(stonith_cleanup_list); stonith_cleanup_list = NULL; } } /*! * \internal * \brief Send stonith updates for all entries in cleanup list, then purge it */ void execute_stonith_cleanup() { GListPtr iter; for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) { char *target = iter->data; crm_node_t *target_node = crm_get_peer(0, target); const char *uuid = crm_peer_uuid(target_node); crm_notice("Marking %s, target of a previous stonith action, as clean", target); send_stonith_update(NULL, target, uuid); free(target); } g_list_free(stonith_cleanup_list); stonith_cleanup_list = NULL; } /* end stonith cleanup list functions */ /* stonith API client * * Functions that need to interact directly with the fencer via its API */ static stonith_t *stonith_api = NULL; static crm_trigger_t *stonith_reconnect = NULL; static char *te_client_id = NULL; static gboolean fail_incompletable_stonith(crm_graph_t *graph) { GListPtr lpc = NULL; const char *task = NULL; xmlNode *last_action = NULL; if (graph == NULL) { return FALSE; } for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) { GListPtr lpc2 = NULL; synapse_t *synapse = (synapse_t *) lpc->data; if (synapse->confirmed) { continue; } for (lpc2 = synapse->actions; lpc2 != NULL; lpc2 = lpc2->next) { crm_action_t *action = (crm_action_t *) lpc2->data; if (action->type != action_type_crm || action->confirmed) { continue; } task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); if (task && safe_str_eq(task, CRM_OP_FENCE)) { action->failed = TRUE; last_action = action->xml; update_graph(graph, action); crm_notice("Failing action %d (%s): fencer terminated", action->id, ID(action->xml)); } } } if (last_action != NULL) { crm_warn("Fencer failure resulted in unrunnable actions"); abort_for_stonith_failure(tg_restart, NULL, last_action); return TRUE; } return FALSE; } static void tengine_stonith_connection_destroy(stonith_t *st, stonith_event_t *e) { te_cleanup_stonith_history_sync(st, FALSE); if (is_set(fsa_input_register, R_ST_REQUIRED)) { crm_crit("Fencing daemon connection failed"); mainloop_set_trigger(stonith_reconnect); } else { crm_info("Fencing daemon disconnected"); } if (stonith_api) { /* the client API won't properly reconnect notifications * if they are still in the table - so remove them */ if (stonith_api->state != stonith_disconnected) { stonith_api->cmds->disconnect(st); } stonith_api->cmds->remove_notification(stonith_api, T_STONITH_NOTIFY_DISCONNECT); stonith_api->cmds->remove_notification(stonith_api, T_STONITH_NOTIFY_FENCE); stonith_api->cmds->remove_notification(stonith_api, T_STONITH_NOTIFY_HISTORY_SYNCED); } if (AM_I_DC) { fail_incompletable_stonith(transition_graph); trigger_graph(); } } static void tengine_stonith_notify(stonith_t *st, stonith_event_t *st_event) { if (te_client_id == NULL) { te_client_id = crm_strdup_printf("%s.%lu", crm_system_name, (unsigned long) getpid()); } if (st_event == NULL) { crm_err("Notify data not found"); return; } crmd_alert_fencing_op(st_event); if ((st_event->result == pcmk_ok) && safe_str_eq("on", st_event->action)) { crm_notice("%s was successfully unfenced by %s (at the request of %s)", st_event->target, st_event->executioner? st_event->executioner : "", st_event->origin); /* TODO: Hook up st_event->device */ return; } else if (safe_str_eq("on", st_event->action)) { crm_err("Unfencing of %s by %s failed: %s (%d)", st_event->target, st_event->executioner? st_event->executioner : "", pcmk_strerror(st_event->result), st_event->result); return; } else if ((st_event->result == pcmk_ok) && crm_str_eq(st_event->target, fsa_our_uname, TRUE)) { /* We were notified of our own fencing. Most likely, either fencing was * misconfigured, or fabric fencing that doesn't cut cluster * communication is in use. * * Either way, shutting down the local host is a good idea, to require * administrator intervention. Also, other nodes would otherwise likely * set our status to lost because of the fencing callback and discard * our subsequent election votes as "not part of our cluster". */ crm_crit("We were allegedly just fenced by %s for %s!", st_event->executioner? st_event->executioner : "the cluster", st_event->origin); /* Dumps blackbox if enabled */ if (fence_reaction_panic) { pcmk_panic(__FUNCTION__); } else { crm_exit(CRM_EX_FATAL); } return; } /* Update the count of stonith failures for this target, in case we become * DC later. The current DC has already updated its fail count in * tengine_stonith_callback(). */ if (!AM_I_DC && safe_str_eq(st_event->operation, T_STONITH_NOTIFY_FENCE)) { if (st_event->result == pcmk_ok) { st_fail_count_reset(st_event->target); } else { st_fail_count_increment(st_event->target); } } crm_notice("Peer %s was%s terminated (%s) by %s on behalf of %s: %s " CRM_XS " initiator=%s ref=%s", st_event->target, st_event->result == pcmk_ok ? "" : " not", st_event->action, st_event->executioner ? st_event->executioner : "", (st_event->client_origin? st_event->client_origin : ""), pcmk_strerror(st_event->result), st_event->origin, st_event->id); if (st_event->result == pcmk_ok) { crm_node_t *peer = crm_find_known_peer_full(0, st_event->target, CRM_GET_PEER_ANY); const char *uuid = NULL; gboolean we_are_executioner = safe_str_eq(st_event->executioner, fsa_our_uname); if (peer == NULL) { return; } uuid = crm_peer_uuid(peer); crm_trace("target=%s dc=%s", st_event->target, fsa_our_dc); if(AM_I_DC) { /* The DC always sends updates */ send_stonith_update(NULL, st_event->target, uuid); /* @TODO Ideally, at this point, we'd check whether the fenced node * hosted any guest nodes, and call remote_node_down() for them. * Unfortunately, the controller doesn't have a simple, reliable way * to map hosts to guests. It might be possible to track this in the * peer cache via crm_remote_peer_cache_refresh(). For now, we rely - * on the PE creating fence pseudo-events for the guests. + * on the scheduler creating fence pseudo-events for the guests. */ if (st_event->client_origin && safe_str_neq(st_event->client_origin, te_client_id)) { /* Abort the current transition graph if it wasn't us * that invoked stonith to fence someone */ crm_info("External fencing operation from %s fenced %s", st_event->client_origin, st_event->target); abort_transition(INFINITY, tg_restart, "External Fencing Operation", NULL); } /* Assume it was our leader if we don't currently have one */ } else if (((fsa_our_dc == NULL) || safe_str_eq(fsa_our_dc, st_event->target)) && is_not_set(peer->flags, crm_remote_node)) { crm_notice("Target %s our leader %s (recorded: %s)", fsa_our_dc ? "was" : "may have been", st_event->target, fsa_our_dc ? fsa_our_dc : ""); /* Given the CIB resyncing that occurs around elections, * have one node update the CIB now and, if the new DC is different, * have them do so too after the election */ if (we_are_executioner) { send_stonith_update(NULL, st_event->target, uuid); } add_stonith_cleanup(st_event->target); } /* If the target is a remote node, and we host its connection, * immediately fail all monitors so it can be recovered quickly. * The connection won't necessarily drop when a remote node is fenced, * so the failure might not otherwise be detected until the next poke. */ if (is_set(peer->flags, crm_remote_node)) { remote_ra_fail(st_event->target); } crmd_peer_down(peer, TRUE); } } /*! * \brief Connect to fencer * * \param[in] user_data If NULL, retry failures now, otherwise retry in main loop * * \return TRUE * \note If user_data is NULL, this will wait 2s between attempts, for up to * 30 attempts, meaning the controller could be blocked as long as 58s. */ static gboolean te_connect_stonith(gpointer user_data) { int rc = pcmk_ok; if (stonith_api == NULL) { stonith_api = stonith_api_new(); if (stonith_api == NULL) { crm_err("Could not connect to fencer: API memory allocation failed"); return TRUE; } } if (stonith_api->state != stonith_disconnected) { crm_trace("Already connected to fencer, no need to retry"); return TRUE; } if (user_data == NULL) { // Blocking (retry failures now until successful) rc = stonith_api_connect_retry(stonith_api, crm_system_name, 30); if (rc != pcmk_ok) { crm_err("Could not connect to fencer in 30 attempts: %s " CRM_XS " rc=%d", pcmk_strerror(rc), rc); } } else { // Non-blocking (retry failures later in main loop) rc = stonith_api->cmds->connect(stonith_api, crm_system_name, NULL); if (rc != pcmk_ok) { if (is_set(fsa_input_register, R_ST_REQUIRED)) { crm_err("Fencer connection failed (will retry): %s " CRM_XS " rc=%d", pcmk_strerror(rc), rc); mainloop_set_trigger(stonith_reconnect); } else { crm_info("Fencer connection failed (ignoring because no longer required): %s " CRM_XS " rc=%d", pcmk_strerror(rc), rc); } return TRUE; } } if (rc == pcmk_ok) { stonith_api->cmds->register_notification(stonith_api, T_STONITH_NOTIFY_DISCONNECT, tengine_stonith_connection_destroy); stonith_api->cmds->register_notification(stonith_api, T_STONITH_NOTIFY_FENCE, tengine_stonith_notify); stonith_api->cmds->register_notification(stonith_api, T_STONITH_NOTIFY_HISTORY_SYNCED, tengine_stonith_history_synced); te_trigger_stonith_history_sync(TRUE); crm_notice("Fencer successfully connected"); } return TRUE; } /*! \internal \brief Schedule fencer connection attempt in main loop */ void controld_trigger_fencer_connect() { if (stonith_reconnect == NULL) { stonith_reconnect = mainloop_add_trigger(G_PRIORITY_LOW, te_connect_stonith, GINT_TO_POINTER(TRUE)); } set_bit(fsa_input_register, R_ST_REQUIRED); mainloop_set_trigger(stonith_reconnect); } void controld_disconnect_fencer(bool destroy) { if (stonith_api) { // Prevent fencer connection from coming up again clear_bit(fsa_input_register, R_ST_REQUIRED); if (stonith_api->state != stonith_disconnected) { stonith_api->cmds->disconnect(stonith_api); } stonith_api->cmds->remove_notification(stonith_api, T_STONITH_NOTIFY_DISCONNECT); stonith_api->cmds->remove_notification(stonith_api, T_STONITH_NOTIFY_FENCE); stonith_api->cmds->remove_notification(stonith_api, T_STONITH_NOTIFY_HISTORY_SYNCED); } if (destroy) { if (stonith_api) { stonith_api->cmds->free(stonith_api); stonith_api = NULL; } if (stonith_reconnect) { mainloop_destroy_trigger(stonith_reconnect); stonith_reconnect = NULL; } if (te_client_id) { free(te_client_id); te_client_id = NULL; } } } static gboolean do_stonith_history_sync(gpointer user_data) { if (stonith_api && (stonith_api->state != stonith_disconnected)) { stonith_history_t *history = NULL; te_cleanup_stonith_history_sync(stonith_api, FALSE); stonith_api->cmds->history(stonith_api, st_opt_sync_call | st_opt_broadcast, NULL, &history, 5); stonith_history_free(history); return TRUE; } else { crm_info("Skip triggering stonith history-sync as stonith is disconnected"); return FALSE; } } static void tengine_stonith_callback(stonith_t *stonith, stonith_callback_data_t *data) { char *uuid = NULL; int stonith_id = -1; int transition_id = -1; crm_action_t *action = NULL; int call_id = data->call_id; int rc = data->rc; char *userdata = data->userdata; CRM_CHECK(userdata != NULL, return); crm_notice("Stonith operation %d/%s: %s (%d)", call_id, (char *)userdata, pcmk_strerror(rc), rc); if (AM_I_DC == FALSE) { return; } /* crm_info("call=%d, optype=%d, node_name=%s, result=%d, node_list=%s, action=%s", */ /* op->call_id, op->optype, op->node_name, op->op_result, */ /* (char *)op->node_list, op->private_data); */ /* filter out old STONITH actions */ CRM_CHECK(decode_transition_key(userdata, &uuid, &transition_id, &stonith_id, NULL), goto bail); if (transition_graph->complete || stonith_id < 0 || safe_str_neq(uuid, te_uuid) || transition_graph->id != transition_id) { crm_info("Ignoring STONITH action initiated outside of the current transition"); goto bail; } action = controld_get_action(stonith_id); if (action == NULL) { crm_err("Stonith action not matched"); goto bail; } stop_te_timer(action->timer); if (rc == pcmk_ok) { const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); const char *uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID); const char *op = crm_meta_value(action->params, "stonith_action"); crm_info("Stonith operation %d for %s passed", call_id, target); if (action->confirmed == FALSE) { te_action_confirmed(action, NULL); if (safe_str_eq("on", op)) { const char *value = NULL; char *now = crm_ttoa(time(NULL)); update_attrd(target, CRM_ATTR_UNFENCED, now, NULL, FALSE); free(now); value = crm_meta_value(action->params, XML_OP_ATTR_DIGESTS_ALL); update_attrd(target, CRM_ATTR_DIGESTS_ALL, value, NULL, FALSE); value = crm_meta_value(action->params, XML_OP_ATTR_DIGESTS_SECURE); update_attrd(target, CRM_ATTR_DIGESTS_SECURE, value, NULL, FALSE); } else if (action->sent_update == FALSE) { send_stonith_update(action, target, uuid); action->sent_update = TRUE; } } st_fail_count_reset(target); } else { const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); enum transition_action abort_action = tg_restart; action->failed = TRUE; crm_notice("Stonith operation %d for %s failed (%s): aborting transition.", call_id, target, pcmk_strerror(rc)); /* If no fence devices were available, there's no use in immediately * checking again, so don't start a new transition in that case. */ if (rc == -ENODEV) { crm_warn("No devices found in cluster to fence %s, giving up", target); abort_action = tg_stop; } /* Increment the fail count now, so abort_for_stonith_failure() can * check it. Non-DC nodes will increment it in tengine_stonith_notify(). */ st_fail_count_increment(target); abort_for_stonith_failure(abort_action, target, NULL); } update_graph(transition_graph, action); trigger_graph(); bail: free(userdata); free(uuid); return; } gboolean te_fence_node(crm_graph_t *graph, crm_action_t *action) { int rc = 0; const char *id = NULL; const char *uuid = NULL; const char *target = NULL; const char *type = NULL; gboolean invalid_action = FALSE; enum stonith_call_options options = st_opt_none; id = ID(action->xml); target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID); type = crm_meta_value(action->params, "stonith_action"); CRM_CHECK(id != NULL, invalid_action = TRUE); CRM_CHECK(uuid != NULL, invalid_action = TRUE); CRM_CHECK(type != NULL, invalid_action = TRUE); CRM_CHECK(target != NULL, invalid_action = TRUE); if (invalid_action) { crm_log_xml_warn(action->xml, "BadAction"); return FALSE; } crm_notice("Requesting fencing (%s) of node %s " CRM_XS " action=%s timeout=%u", type, target, id, transition_graph->stonith_timeout); /* Passing NULL means block until we can connect... */ te_connect_stonith(NULL); if (crmd_join_phase_count(crm_join_confirmed) == 1) { options |= st_opt_allow_suicide; } rc = stonith_api->cmds->fence(stonith_api, options, target, type, (int) (transition_graph->stonith_timeout / 1000), 0); stonith_api->cmds->register_callback(stonith_api, rc, (int) (transition_graph->stonith_timeout / 1000), st_opt_timeout_updates, generate_transition_key(transition_graph->id, action->id, 0, te_uuid), "tengine_stonith_callback", tengine_stonith_callback); return TRUE; } /* end stonith API client functions */ /* * stonith history synchronization * * Each node's fencer keeps track of a cluster-wide fencing history. When a node * joins or leaves, we need to synchronize the history across all nodes. */ static crm_trigger_t *stonith_history_sync_trigger = NULL; static mainloop_timer_t *stonith_history_sync_timer_short = NULL; static mainloop_timer_t *stonith_history_sync_timer_long = NULL; void te_cleanup_stonith_history_sync(stonith_t *st, bool free_timers) { if (free_timers) { mainloop_timer_del(stonith_history_sync_timer_short); stonith_history_sync_timer_short = NULL; mainloop_timer_del(stonith_history_sync_timer_long); stonith_history_sync_timer_long = NULL; } else { mainloop_timer_stop(stonith_history_sync_timer_short); mainloop_timer_stop(stonith_history_sync_timer_long); } if (st) { st->cmds->remove_notification(st, T_STONITH_NOTIFY_HISTORY_SYNCED); } } static void tengine_stonith_history_synced(stonith_t *st, stonith_event_t *st_event) { te_cleanup_stonith_history_sync(st, FALSE); crm_debug("Fence-history synced - cancel all timers"); } static gboolean stonith_history_sync_set_trigger(gpointer user_data) { mainloop_set_trigger(stonith_history_sync_trigger); return FALSE; } void te_trigger_stonith_history_sync(bool long_timeout) { /* trigger a sync in 5s to give more nodes the * chance to show up so that we don't create * unnecessary stonith-history-sync traffic * * the long timeout of 30s is there as a fallback * so that after a successful connection to fenced * we will wait for 30s for the DC to trigger a * history-sync * if this doesn't happen we trigger a sync locally * (e.g. fenced segfaults and is restarted by pacemakerd) */ /* as we are finally checking the stonith-connection * in do_stonith_history_sync we should be fine * leaving stonith_history_sync_time & stonith_history_sync_trigger * around */ if (stonith_history_sync_trigger == NULL) { stonith_history_sync_trigger = mainloop_add_trigger(G_PRIORITY_LOW, do_stonith_history_sync, NULL); } if (long_timeout) { if(stonith_history_sync_timer_long == NULL) { stonith_history_sync_timer_long = mainloop_timer_add("history_sync_long", 30000, FALSE, stonith_history_sync_set_trigger, NULL); } crm_info("Fence history will be synchronized cluster-wide within 30 seconds"); mainloop_timer_start(stonith_history_sync_timer_long); } else { if(stonith_history_sync_timer_short == NULL) { stonith_history_sync_timer_short = mainloop_timer_add("history_sync_short", 5000, FALSE, stonith_history_sync_set_trigger, NULL); } crm_info("Fence history will be synchronized cluster-wide within 5 seconds"); mainloop_timer_start(stonith_history_sync_timer_short); } } /* end stonith history synchronization functions */ diff --git a/daemons/controld/controld_fsa.c b/daemons/controld/controld_fsa.c index 190246297e..6760224eab 100644 --- a/daemons/controld/controld_fsa.c +++ b/daemons/controld/controld_fsa.c @@ -1,645 +1,645 @@ /* * Copyright 2004-2019 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include char *fsa_our_dc = NULL; cib_t *fsa_cib_conn = NULL; char *fsa_our_dc_version = NULL; char *fsa_our_uuid = NULL; char *fsa_our_uname = NULL; char *fsa_cluster_name = NULL; gboolean do_fsa_stall = FALSE; long long fsa_input_register = 0; long long fsa_actions = A_NOTHING; enum crmd_fsa_state fsa_state = S_STARTING; extern uint highest_born_on; extern uint num_join_invites; extern void initialize_join(gboolean before); #define DOT_PREFIX "actions:trace: " #define do_dot_log(fmt, args...) crm_trace( fmt, ##args) long long do_state_transition(long long actions, enum crmd_fsa_state cur_state, enum crmd_fsa_state next_state, fsa_data_t * msg_data); void s_crmd_fsa_actions(fsa_data_t * fsa_data); void log_fsa_input(fsa_data_t * stored_msg); void init_dotfile(void); void init_dotfile(void) { do_dot_log(DOT_PREFIX "digraph \"g\" {"); do_dot_log(DOT_PREFIX " size = \"30,30\""); do_dot_log(DOT_PREFIX " graph ["); do_dot_log(DOT_PREFIX " fontsize = \"12\""); do_dot_log(DOT_PREFIX " fontname = \"Times-Roman\""); do_dot_log(DOT_PREFIX " fontcolor = \"black\""); do_dot_log(DOT_PREFIX " bb = \"0,0,398.922306,478.927856\""); do_dot_log(DOT_PREFIX " color = \"black\""); do_dot_log(DOT_PREFIX " ]"); do_dot_log(DOT_PREFIX " node ["); do_dot_log(DOT_PREFIX " fontsize = \"12\""); do_dot_log(DOT_PREFIX " fontname = \"Times-Roman\""); do_dot_log(DOT_PREFIX " fontcolor = \"black\""); do_dot_log(DOT_PREFIX " shape = \"ellipse\""); do_dot_log(DOT_PREFIX " color = \"black\""); do_dot_log(DOT_PREFIX " ]"); do_dot_log(DOT_PREFIX " edge ["); do_dot_log(DOT_PREFIX " fontsize = \"12\""); do_dot_log(DOT_PREFIX " fontname = \"Times-Roman\""); do_dot_log(DOT_PREFIX " fontcolor = \"black\""); do_dot_log(DOT_PREFIX " color = \"black\""); do_dot_log(DOT_PREFIX " ]"); do_dot_log(DOT_PREFIX "// special nodes"); do_dot_log(DOT_PREFIX " \"S_PENDING\" "); do_dot_log(DOT_PREFIX " ["); do_dot_log(DOT_PREFIX " color = \"blue\""); do_dot_log(DOT_PREFIX " fontcolor = \"blue\""); do_dot_log(DOT_PREFIX " ]"); do_dot_log(DOT_PREFIX " \"S_TERMINATE\" "); do_dot_log(DOT_PREFIX " ["); do_dot_log(DOT_PREFIX " color = \"red\""); do_dot_log(DOT_PREFIX " fontcolor = \"red\""); do_dot_log(DOT_PREFIX " ]"); do_dot_log(DOT_PREFIX "// DC only nodes"); do_dot_log(DOT_PREFIX " \"S_INTEGRATION\" [ fontcolor = \"green\" ]"); do_dot_log(DOT_PREFIX " \"S_POLICY_ENGINE\" [ fontcolor = \"green\" ]"); do_dot_log(DOT_PREFIX " \"S_TRANSITION_ENGINE\" [ fontcolor = \"green\" ]"); do_dot_log(DOT_PREFIX " \"S_RELEASE_DC\" [ fontcolor = \"green\" ]"); do_dot_log(DOT_PREFIX " \"S_IDLE\" [ fontcolor = \"green\" ]"); } static void do_fsa_action(fsa_data_t * fsa_data, long long an_action, void (*function) (long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t * msg_data)) { fsa_actions &= ~an_action; crm_trace(DOT_PREFIX "\t// %s", fsa_action2string(an_action)); function(an_action, fsa_data->fsa_cause, fsa_state, fsa_data->fsa_input, fsa_data); } static long long startup_actions = A_STARTUP | A_CIB_START | A_LRM_CONNECT | A_HA_CONNECT | A_READCONFIG | A_STARTED | A_CL_JOIN_QUERY; // A_LOG, A_WARN, A_ERROR void do_log(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { unsigned log_type = LOG_TRACE; if (action & A_LOG) { log_type = LOG_INFO; } else if (action & A_WARN) { log_type = LOG_WARNING; } else if (action & A_ERROR) { log_type = LOG_ERR; } do_crm_log(log_type, "Input %s received in state %s from %s", fsa_input2string(msg_data->fsa_input), fsa_state2string(cur_state), msg_data->origin); if (msg_data->data_type == fsa_dt_ha_msg) { ha_msg_input_t *input = fsa_typed_data(msg_data->data_type); crm_log_xml_debug(input->msg, __FUNCTION__); } else if (msg_data->data_type == fsa_dt_xml) { xmlNode *input = fsa_typed_data(msg_data->data_type); crm_log_xml_debug(input, __FUNCTION__); } else if (msg_data->data_type == fsa_dt_lrm) { lrmd_event_data_t *input = fsa_typed_data(msg_data->data_type); do_crm_log(log_type, "Resource %s: Call ID %d returned %d (%d)." " New status if rc=0: %s", input->rsc_id, input->call_id, input->rc, input->op_status, (char *)input->user_data); } } enum crmd_fsa_state s_crmd_fsa(enum crmd_fsa_cause cause) { fsa_data_t *fsa_data = NULL; long long register_copy = fsa_input_register; long long new_actions = A_NOTHING; enum crmd_fsa_state last_state; crm_trace("FSA invoked with Cause: %s\tState: %s", fsa_cause2string(cause), fsa_state2string(fsa_state)); fsa_dump_actions(fsa_actions, "Initial"); do_fsa_stall = FALSE; if (is_message() == FALSE && fsa_actions != A_NOTHING) { /* fake the first message so we can get into the loop */ fsa_data = calloc(1, sizeof(fsa_data_t)); fsa_data->fsa_input = I_NULL; fsa_data->fsa_cause = C_FSA_INTERNAL; fsa_data->origin = __FUNCTION__; fsa_data->data_type = fsa_dt_none; fsa_message_queue = g_list_append(fsa_message_queue, fsa_data); fsa_data = NULL; } while (is_message() && do_fsa_stall == FALSE) { crm_trace("Checking messages (%d remaining)", g_list_length(fsa_message_queue)); fsa_data = get_message(); if(fsa_data == NULL) { continue; } log_fsa_input(fsa_data); /* add any actions back to the queue */ fsa_actions |= fsa_data->actions; fsa_dump_actions(fsa_data->actions, "Restored actions"); /* get the next batch of actions */ new_actions = crmd_fsa_actions[fsa_data->fsa_input][fsa_state]; fsa_actions |= new_actions; fsa_dump_actions(new_actions, "New actions"); if (fsa_data->fsa_input != I_NULL && fsa_data->fsa_input != I_ROUTER) { crm_debug("Processing %s: [ state=%s cause=%s origin=%s ]", fsa_input2string(fsa_data->fsa_input), fsa_state2string(fsa_state), fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin); } /* logging : *before* the state is changed */ if (is_set(fsa_actions, A_ERROR)) { do_fsa_action(fsa_data, A_ERROR, do_log); } if (is_set(fsa_actions, A_WARN)) { do_fsa_action(fsa_data, A_WARN, do_log); } if (is_set(fsa_actions, A_LOG)) { do_fsa_action(fsa_data, A_LOG, do_log); } /* update state variables */ last_state = fsa_state; fsa_state = crmd_fsa_state[fsa_data->fsa_input][fsa_state]; /* * Remove certain actions during shutdown */ if (fsa_state == S_STOPPING || ((fsa_input_register & R_SHUTDOWN) == R_SHUTDOWN)) { clear_bit(fsa_actions, startup_actions); } /* * Hook for change of state. * Allows actions to be added or removed when entering a state */ if (last_state != fsa_state) { fsa_actions = do_state_transition(fsa_actions, last_state, fsa_state, fsa_data); } else { do_dot_log(DOT_PREFIX "\t// FSA input: State=%s \tCause=%s" " \tInput=%s \tOrigin=%s() \tid=%d", fsa_state2string(fsa_state), fsa_cause2string(fsa_data->fsa_cause), fsa_input2string(fsa_data->fsa_input), fsa_data->origin, fsa_data->id); } /* start doing things... */ s_crmd_fsa_actions(fsa_data); delete_fsa_input(fsa_data); fsa_data = NULL; } if (g_list_length(fsa_message_queue) > 0 || fsa_actions != A_NOTHING || do_fsa_stall) { crm_debug("Exiting the FSA: queue=%d, fsa_actions=0x%llx, stalled=%s", g_list_length(fsa_message_queue), fsa_actions, do_fsa_stall ? "true" : "false"); } else { crm_trace("Exiting the FSA"); } /* cleanup inputs? */ if (register_copy != fsa_input_register) { long long same = register_copy & fsa_input_register; fsa_dump_inputs(LOG_DEBUG, "Added", fsa_input_register ^ same); fsa_dump_inputs(LOG_DEBUG, "Removed", register_copy ^ same); } fsa_dump_actions(fsa_actions, "Remaining"); fsa_dump_queue(LOG_DEBUG); return fsa_state; } void s_crmd_fsa_actions(fsa_data_t * fsa_data) { /* * Process actions in order of priority but do only one * action at a time to avoid complicating the ordering. */ CRM_CHECK(fsa_data != NULL, return); while (fsa_actions != A_NOTHING && do_fsa_stall == FALSE) { /* regular action processing in order of action priority * * Make sure all actions that connect to required systems * are performed first */ if (fsa_actions & A_ERROR) { do_fsa_action(fsa_data, A_ERROR, do_log); } else if (fsa_actions & A_WARN) { do_fsa_action(fsa_data, A_WARN, do_log); } else if (fsa_actions & A_LOG) { do_fsa_action(fsa_data, A_LOG, do_log); /* get out of here NOW! before anything worse happens */ } else if (fsa_actions & A_EXIT_1) { do_fsa_action(fsa_data, A_EXIT_1, do_exit); /* sub-system restart */ } else if ((fsa_actions & O_LRM_RECONNECT) == O_LRM_RECONNECT) { do_fsa_action(fsa_data, O_LRM_RECONNECT, do_lrm_control); } else if ((fsa_actions & O_CIB_RESTART) == O_CIB_RESTART) { do_fsa_action(fsa_data, O_CIB_RESTART, do_cib_control); } else if ((fsa_actions & O_PE_RESTART) == O_PE_RESTART) { do_fsa_action(fsa_data, O_PE_RESTART, do_pe_control); } else if ((fsa_actions & O_TE_RESTART) == O_TE_RESTART) { do_fsa_action(fsa_data, O_TE_RESTART, do_te_control); /* essential start tasks */ } else if (fsa_actions & A_STARTUP) { do_fsa_action(fsa_data, A_STARTUP, do_startup); } else if (fsa_actions & A_CIB_START) { do_fsa_action(fsa_data, A_CIB_START, do_cib_control); } else if (fsa_actions & A_HA_CONNECT) { do_fsa_action(fsa_data, A_HA_CONNECT, do_ha_control); } else if (fsa_actions & A_READCONFIG) { do_fsa_action(fsa_data, A_READCONFIG, do_read_config); /* sub-system start/connect */ } else if (fsa_actions & A_LRM_CONNECT) { do_fsa_action(fsa_data, A_LRM_CONNECT, do_lrm_control); } else if (fsa_actions & A_TE_START) { do_fsa_action(fsa_data, A_TE_START, do_te_control); } else if (fsa_actions & A_PE_START) { do_fsa_action(fsa_data, A_PE_START, do_pe_control); /* Timers */ /* else if(fsa_actions & O_DC_TIMER_RESTART) { do_fsa_action(fsa_data, O_DC_TIMER_RESTART, do_timer_control) */ ; } else if (fsa_actions & A_DC_TIMER_STOP) { do_fsa_action(fsa_data, A_DC_TIMER_STOP, do_timer_control); } else if (fsa_actions & A_INTEGRATE_TIMER_STOP) { do_fsa_action(fsa_data, A_INTEGRATE_TIMER_STOP, do_timer_control); } else if (fsa_actions & A_INTEGRATE_TIMER_START) { do_fsa_action(fsa_data, A_INTEGRATE_TIMER_START, do_timer_control); } else if (fsa_actions & A_FINALIZE_TIMER_STOP) { do_fsa_action(fsa_data, A_FINALIZE_TIMER_STOP, do_timer_control); } else if (fsa_actions & A_FINALIZE_TIMER_START) { do_fsa_action(fsa_data, A_FINALIZE_TIMER_START, do_timer_control); /* * Highest priority actions */ } else if (fsa_actions & A_MSG_ROUTE) { do_fsa_action(fsa_data, A_MSG_ROUTE, do_msg_route); } else if (fsa_actions & A_RECOVER) { do_fsa_action(fsa_data, A_RECOVER, do_recover); } else if (fsa_actions & A_CL_JOIN_RESULT) { do_fsa_action(fsa_data, A_CL_JOIN_RESULT, do_cl_join_finalize_respond); } else if (fsa_actions & A_CL_JOIN_REQUEST) { do_fsa_action(fsa_data, A_CL_JOIN_REQUEST, do_cl_join_offer_respond); } else if (fsa_actions & A_SHUTDOWN_REQ) { do_fsa_action(fsa_data, A_SHUTDOWN_REQ, do_shutdown_req); } else if (fsa_actions & A_ELECTION_VOTE) { do_fsa_action(fsa_data, A_ELECTION_VOTE, do_election_vote); } else if (fsa_actions & A_ELECTION_COUNT) { do_fsa_action(fsa_data, A_ELECTION_COUNT, do_election_count_vote); } else if (fsa_actions & A_LRM_EVENT) { do_fsa_action(fsa_data, A_LRM_EVENT, do_lrm_event); /* * High priority actions */ } else if (fsa_actions & A_STARTED) { do_fsa_action(fsa_data, A_STARTED, do_started); } else if (fsa_actions & A_CL_JOIN_QUERY) { do_fsa_action(fsa_data, A_CL_JOIN_QUERY, do_cl_join_query); } else if (fsa_actions & A_DC_TIMER_START) { do_fsa_action(fsa_data, A_DC_TIMER_START, do_timer_control); /* * Medium priority actions * - Membership */ } else if (fsa_actions & A_DC_TAKEOVER) { do_fsa_action(fsa_data, A_DC_TAKEOVER, do_dc_takeover); } else if (fsa_actions & A_DC_RELEASE) { do_fsa_action(fsa_data, A_DC_RELEASE, do_dc_release); } else if (fsa_actions & A_DC_JOIN_FINAL) { do_fsa_action(fsa_data, A_DC_JOIN_FINAL, do_dc_join_final); } else if (fsa_actions & A_ELECTION_CHECK) { do_fsa_action(fsa_data, A_ELECTION_CHECK, do_election_check); } else if (fsa_actions & A_ELECTION_START) { do_fsa_action(fsa_data, A_ELECTION_START, do_election_vote); } else if (fsa_actions & A_DC_JOIN_OFFER_ALL) { do_fsa_action(fsa_data, A_DC_JOIN_OFFER_ALL, do_dc_join_offer_all); } else if (fsa_actions & A_DC_JOIN_OFFER_ONE) { do_fsa_action(fsa_data, A_DC_JOIN_OFFER_ONE, do_dc_join_offer_one); } else if (fsa_actions & A_DC_JOIN_PROCESS_REQ) { do_fsa_action(fsa_data, A_DC_JOIN_PROCESS_REQ, do_dc_join_filter_offer); } else if (fsa_actions & A_DC_JOIN_PROCESS_ACK) { do_fsa_action(fsa_data, A_DC_JOIN_PROCESS_ACK, do_dc_join_ack); } else if (fsa_actions & A_DC_JOIN_FINALIZE) { do_fsa_action(fsa_data, A_DC_JOIN_FINALIZE, do_dc_join_finalize); } else if (fsa_actions & A_CL_JOIN_ANNOUNCE) { do_fsa_action(fsa_data, A_CL_JOIN_ANNOUNCE, do_cl_join_announce); /* * Low(er) priority actions * Make sure the CIB is always updated before invoking the - * PE, and the PE before the TE + * scheduler, and the scheduler before the transition engine. */ } else if (fsa_actions & A_TE_HALT) { do_fsa_action(fsa_data, A_TE_HALT, do_te_invoke); } else if (fsa_actions & A_TE_CANCEL) { do_fsa_action(fsa_data, A_TE_CANCEL, do_te_invoke); } else if (fsa_actions & A_LRM_INVOKE) { do_fsa_action(fsa_data, A_LRM_INVOKE, do_lrm_invoke); } else if (fsa_actions & A_PE_INVOKE) { do_fsa_action(fsa_data, A_PE_INVOKE, do_pe_invoke); } else if (fsa_actions & A_TE_INVOKE) { do_fsa_action(fsa_data, A_TE_INVOKE, do_te_invoke); /* Shutdown actions */ } else if (fsa_actions & A_DC_RELEASED) { do_fsa_action(fsa_data, A_DC_RELEASED, do_dc_release); } else if (fsa_actions & A_PE_STOP) { do_fsa_action(fsa_data, A_PE_STOP, do_pe_control); } else if (fsa_actions & A_TE_STOP) { do_fsa_action(fsa_data, A_TE_STOP, do_te_control); } else if (fsa_actions & A_SHUTDOWN) { do_fsa_action(fsa_data, A_SHUTDOWN, do_shutdown); } else if (fsa_actions & A_LRM_DISCONNECT) { do_fsa_action(fsa_data, A_LRM_DISCONNECT, do_lrm_control); } else if (fsa_actions & A_HA_DISCONNECT) { do_fsa_action(fsa_data, A_HA_DISCONNECT, do_ha_control); } else if (fsa_actions & A_CIB_STOP) { do_fsa_action(fsa_data, A_CIB_STOP, do_cib_control); } else if (fsa_actions & A_STOP) { do_fsa_action(fsa_data, A_STOP, do_stop); /* exit gracefully */ } else if (fsa_actions & A_EXIT_0) { do_fsa_action(fsa_data, A_EXIT_0, do_exit); /* Error checking and reporting */ } else { crm_err("Action %s not supported "CRM_XS" 0x%llx", fsa_action2string(fsa_actions), fsa_actions); register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, fsa_data, NULL, __FUNCTION__); } } } void log_fsa_input(fsa_data_t * stored_msg) { CRM_ASSERT(stored_msg); crm_trace("Processing queued input %d", stored_msg->id); if (stored_msg->fsa_cause == C_LRM_OP_CALLBACK) { crm_trace("FSA processing LRM callback from %s", stored_msg->origin); } else if (stored_msg->data == NULL) { crm_trace("FSA processing input from %s", stored_msg->origin); } else { ha_msg_input_t *ha_input = fsa_typed_data_adv(stored_msg, fsa_dt_ha_msg, __FUNCTION__); crm_trace("FSA processing XML message from %s", stored_msg->origin); crm_log_xml_trace(ha_input->xml, "FSA message data"); } } long long do_state_transition(long long actions, enum crmd_fsa_state cur_state, enum crmd_fsa_state next_state, fsa_data_t * msg_data) { int level = LOG_INFO; long long tmp = actions; gboolean clear_recovery_bit = TRUE; enum crmd_fsa_cause cause = msg_data->fsa_cause; enum crmd_fsa_input current_input = msg_data->fsa_input; const char *state_from = fsa_state2string(cur_state); const char *state_to = fsa_state2string(next_state); const char *input = fsa_input2string(current_input); CRM_LOG_ASSERT(cur_state != next_state); do_dot_log(DOT_PREFIX "\t%s -> %s [ label=%s cause=%s origin=%s ]", state_from, state_to, input, fsa_cause2string(cause), msg_data->origin); if (cur_state == S_IDLE || next_state == S_IDLE) { level = LOG_NOTICE; } else if (cur_state == S_NOT_DC || next_state == S_NOT_DC) { level = LOG_NOTICE; } else if (cur_state == S_ELECTION) { level = LOG_NOTICE; } else if (cur_state == S_STARTING) { level = LOG_NOTICE; } else if (next_state == S_RECOVERY) { level = LOG_WARNING; } do_crm_log(level, "State transition %s -> %s " CRM_XS " input=%s cause=%s origin=%s", state_from, state_to, input, fsa_cause2string(cause), msg_data->origin); if (next_state != S_ELECTION && cur_state != S_RELEASE_DC) { controld_stop_election_timer(); } #if 0 if ((fsa_input_register & R_SHUTDOWN)) { set_bit(tmp, A_DC_TIMER_STOP); } #endif if (next_state == S_INTEGRATION) { set_bit(tmp, A_INTEGRATE_TIMER_START); } else { set_bit(tmp, A_INTEGRATE_TIMER_STOP); } if (next_state == S_FINALIZE_JOIN) { set_bit(tmp, A_FINALIZE_TIMER_START); } else { set_bit(tmp, A_FINALIZE_TIMER_STOP); } if (next_state != S_PENDING) { set_bit(tmp, A_DC_TIMER_STOP); } if (next_state != S_ELECTION) { highest_born_on = 0; } if (next_state != S_IDLE) { controld_stop_timer(recheck_timer); } if (cur_state == S_FINALIZE_JOIN && next_state == S_POLICY_ENGINE) { populate_cib_nodes(node_update_quick|node_update_all, __FUNCTION__); } switch (next_state) { case S_PENDING: fsa_cib_conn->cmds->set_slave(fsa_cib_conn, cib_scope_local); /* fall through */ case S_ELECTION: crm_trace("Resetting our DC to NULL on transition to %s", fsa_state2string(next_state)); update_dc(NULL); break; case S_NOT_DC: election_trigger->counter = 0; purge_stonith_cleanup(); if (is_set(fsa_input_register, R_SHUTDOWN)) { crm_info("(Re)Issuing shutdown request now" " that we have a new DC"); set_bit(tmp, A_SHUTDOWN_REQ); } CRM_LOG_ASSERT(fsa_our_dc != NULL); if (fsa_our_dc == NULL) { crm_err("Reached S_NOT_DC without a DC" " being recorded"); } break; case S_RECOVERY: clear_recovery_bit = FALSE; break; case S_FINALIZE_JOIN: CRM_LOG_ASSERT(AM_I_DC); if (cause == C_TIMER_POPPED) { crm_warn("Progressed to state %s after %s", fsa_state2string(next_state), fsa_cause2string(cause)); } if (crmd_join_phase_count(crm_join_welcomed) > 0) { crm_warn("%u cluster nodes failed to respond" " to the join offer.", crmd_join_phase_count(crm_join_welcomed)); crmd_join_phase_log(LOG_NOTICE); } else { crm_debug("All %d cluster nodes responded to the join offer.", crmd_join_phase_count(crm_join_integrated)); } break; case S_POLICY_ENGINE: election_trigger->counter = 0; CRM_LOG_ASSERT(AM_I_DC); if (cause == C_TIMER_POPPED) { crm_info("Progressed to state %s after %s", fsa_state2string(next_state), fsa_cause2string(cause)); } if (crmd_join_phase_count(crm_join_finalized) > 0) { crm_err("%u cluster nodes failed to confirm their join.", crmd_join_phase_count(crm_join_finalized)); crmd_join_phase_log(LOG_NOTICE); } else if (crmd_join_phase_count(crm_join_confirmed) == crm_active_peers()) { crm_debug("All %u cluster nodes are" " eligible to run resources.", crm_active_peers()); } else if (crmd_join_phase_count(crm_join_confirmed) > crm_active_peers()) { crm_err("We have more confirmed nodes than our membership does: %d vs. %d", crmd_join_phase_count(crm_join_confirmed), crm_active_peers()); register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL); } else if (saved_ccm_membership_id != crm_peer_seq) { crm_info("Membership changed: %llu -> %llu - join restart", saved_ccm_membership_id, crm_peer_seq); register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL); } else { crm_warn("Only %u of %u cluster " "nodes are eligible to run resources - continue %d", crmd_join_phase_count(crm_join_confirmed), crm_active_peers(), crmd_join_phase_count(crm_join_welcomed)); } /* initialize_join(FALSE); */ break; case S_STOPPING: case S_TERMINATE: /* possibly redundant */ set_bit(fsa_input_register, R_SHUTDOWN); break; case S_IDLE: CRM_LOG_ASSERT(AM_I_DC); if (is_set(fsa_input_register, R_SHUTDOWN)) { crm_info("(Re)Issuing shutdown request now" " that we are the DC"); set_bit(tmp, A_SHUTDOWN_REQ); } controld_start_recheck_timer(); break; default: break; } if (clear_recovery_bit && next_state != S_PENDING) { tmp &= ~A_RECOVER; } else if (clear_recovery_bit == FALSE) { tmp |= A_RECOVER; } if (tmp != actions) { /* fsa_dump_actions(actions ^ tmp, "New actions"); */ actions = tmp; } return actions; } diff --git a/daemons/controld/controld_join_dc.c b/daemons/controld/controld_join_dc.c index d13a84258b..988aaa690b 100644 --- a/daemons/controld/controld_join_dc.c +++ b/daemons/controld/controld_join_dc.c @@ -1,716 +1,716 @@ /* * Copyright 2004-2019 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include char *max_epoch = NULL; char *max_generation_from = NULL; xmlNode *max_generation_xml = NULL; void initialize_join(gboolean before); void finalize_join_for(gpointer key, gpointer value, gpointer user_data); void finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data); gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source); static int current_join_id = 0; unsigned long long saved_ccm_membership_id = 0; void crm_update_peer_join(const char *source, crm_node_t * node, enum crm_join_phase phase) { enum crm_join_phase last = 0; if(node == NULL) { crm_err("Could not update join because node not specified" CRM_XS " join-%u source=%s phase=%s", current_join_id, source, crm_join_phase_str(phase)); return; } /* Remote nodes do not participate in joins */ if (is_set(node->flags, crm_remote_node)) { return; } last = node->join; if(phase == last) { crm_trace("%s: Node %s[%u] - join-%u phase still %s", source, node->uname, node->id, current_join_id, crm_join_phase_str(last)); } else if ((phase <= crm_join_none) || (phase == (last + 1))) { node->join = phase; crm_info("%s: Node %s[%u] - join-%u phase %s -> %s", source, node->uname, node->id, current_join_id, crm_join_phase_str(last), crm_join_phase_str(phase)); } else { crm_err("Could not update join for node %s because phase transition invalid " CRM_XS " join-%u source=%s node_id=%u last=%s new=%s", node->uname, current_join_id, source, node->id, crm_join_phase_str(last), crm_join_phase_str(phase)); } } void initialize_join(gboolean before) { GHashTableIter iter; crm_node_t *peer = NULL; /* clear out/reset a bunch of stuff */ crm_debug("join-%d: Initializing join data (flag=%s)", current_join_id, before ? "true" : "false"); g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) { crm_update_peer_join(__FUNCTION__, peer, crm_join_none); } if (before) { if (max_generation_from != NULL) { free(max_generation_from); max_generation_from = NULL; } if (max_generation_xml != NULL) { free_xml(max_generation_xml); max_generation_xml = NULL; } clear_bit(fsa_input_register, R_HAVE_CIB); clear_bit(fsa_input_register, R_CIB_ASKED); } } /*! * \internal * \brief Create a join message from the DC * * \param[in] join_op Join operation name * \param[in] host_to Recipient of message */ static xmlNode * create_dc_message(const char *join_op, const char *host_to) { xmlNode *msg = create_request(join_op, NULL, host_to, CRM_SYSTEM_CRMD, CRM_SYSTEM_DC, NULL); /* Identify which election this is a part of */ crm_xml_add_int(msg, F_CRM_JOIN_ID, current_join_id); /* Add a field specifying whether the DC is shutting down. This keeps the * joining node from fencing the old DC if it becomes the new DC. */ crm_xml_add_boolean(msg, F_CRM_DC_LEAVING, is_set(fsa_input_register, R_SHUTDOWN)); return msg; } static void join_make_offer(gpointer key, gpointer value, gpointer user_data) { xmlNode *offer = NULL; crm_node_t *member = (crm_node_t *)value; CRM_ASSERT(member != NULL); if (crm_is_peer_active(member) == FALSE) { crm_info("Not making an offer to %s: not active (%s)", member->uname, member->state); if(member->expected == NULL && safe_str_eq(member->state, CRM_NODE_LOST)) { /* You would think this unsafe, but in fact this plus an * active resource is what causes it to be fenced. * * Yes, this does mean that any node that dies at the same * time as the old DC and is not running resource (still) * won't be fenced. * * I'm not happy about this either. */ crm_update_peer_expected(__FUNCTION__, member, CRMD_JOINSTATE_DOWN); } return; } if (member->uname == NULL) { crm_info("No recipient for welcome message.(Node uuid:%s)", member->uuid); return; } if (saved_ccm_membership_id != crm_peer_seq) { saved_ccm_membership_id = crm_peer_seq; crm_info("Making join offers based on membership %llu", crm_peer_seq); } if(user_data && member->join > crm_join_none) { crm_info("Skipping %s: already known %d", member->uname, member->join); return; } crm_update_peer_join(__FUNCTION__, (crm_node_t*)member, crm_join_none); offer = create_dc_message(CRM_OP_JOIN_OFFER, member->uname); // Advertise our feature set so the joining node can bail if not compatible crm_xml_add(offer, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); /* send the welcome */ crm_info("join-%d: Sending offer to %s", current_join_id, member->uname); send_cluster_message(member, crm_msg_crmd, offer, TRUE); free_xml(offer); crm_update_peer_join(__FUNCTION__, member, crm_join_welcomed); /* crm_update_peer_expected(__FUNCTION__, member, CRMD_JOINSTATE_PENDING); */ } /* A_DC_JOIN_OFFER_ALL */ void do_dc_join_offer_all(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { /* Reset everyone's status back to down or in_ccm in the CIB. * Any nodes that are active in the CIB but not in the cluster membership * will be seen as offline by the scheduler anyway. */ current_join_id++; initialize_join(TRUE); /* do_update_cib_nodes(TRUE, __FUNCTION__); */ update_dc(NULL); if (cause == C_HA_MESSAGE && current_input == I_NODE_JOIN) { crm_info("A new node joined the cluster"); } g_hash_table_foreach(crm_peer_cache, join_make_offer, NULL); // Don't waste time by invoking the scheduler yet crm_info("join-%d: Waiting on %d outstanding join acks", current_join_id, crmd_join_phase_count(crm_join_welcomed)); } /* A_DC_JOIN_OFFER_ONE */ void do_dc_join_offer_one(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { crm_node_t *member; ha_msg_input_t *welcome = NULL; const char *op = NULL; const char *join_to = NULL; if (msg_data->data) { welcome = fsa_typed_data(fsa_dt_ha_msg); } else { crm_info("An unknown node joined - (re-)offer to any unconfirmed nodes"); g_hash_table_foreach(crm_peer_cache, join_make_offer, &member); check_join_state(cur_state, __FUNCTION__); return; } if (welcome == NULL) { crm_err("Attempt to send welcome message without a message to reply to!"); return; } join_to = crm_element_value(welcome->msg, F_CRM_HOST_FROM); if (join_to == NULL) { crm_err("Attempt to send welcome message without a host to reply to!"); return; } member = crm_get_peer(0, join_to); op = crm_element_value(welcome->msg, F_CRM_TASK); if (join_to != NULL && (cur_state == S_INTEGRATION || cur_state == S_FINALIZE_JOIN)) { /* note: it _is_ possible that a node will have been * sick or starting up when the original offer was made. * however, it will either re-announce itself in due course * _or_ we can re-store the original offer on the client. */ crm_trace("(Re-)offering membership to %s...", join_to); } crm_info("join-%d: Processing %s request from %s in state %s", current_join_id, op, join_to, fsa_state2string(cur_state)); crm_update_peer_join(__FUNCTION__, member, crm_join_none); join_make_offer(NULL, member, NULL); /* always offer to the DC (ourselves) * this ensures the correct value for max_generation_from */ if (strcmp(join_to, fsa_our_uname) != 0) { member = crm_get_peer(0, fsa_our_uname); join_make_offer(NULL, member, NULL); } - /* this was a genuine join request, cancel any existing - * transition and invoke the PE + /* This was a genuine join request; cancel any existing transition and + * invoke the scheduler. */ abort_transition(INFINITY, tg_restart, "Node join", NULL); // Don't waste time by invoking the scheduler yet crm_debug("Waiting on %d outstanding join acks for join-%d", crmd_join_phase_count(crm_join_welcomed), current_join_id); } static int compare_int_fields(xmlNode * left, xmlNode * right, const char *field) { const char *elem_l = crm_element_value(left, field); const char *elem_r = crm_element_value(right, field); int int_elem_l = crm_int_helper(elem_l, NULL); int int_elem_r = crm_int_helper(elem_r, NULL); if (int_elem_l < int_elem_r) { return -1; } else if (int_elem_l > int_elem_r) { return 1; } return 0; } /* A_DC_JOIN_PROCESS_REQ */ void do_dc_join_filter_offer(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { xmlNode *generation = NULL; int cmp = 0; int join_id = -1; gboolean ack_nack_bool = TRUE; const char *ack_nack = CRMD_JOINSTATE_MEMBER; ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg); const char *join_from = crm_element_value(join_ack->msg, F_CRM_HOST_FROM); const char *ref = crm_element_value(join_ack->msg, F_CRM_REFERENCE); const char *join_version = crm_element_value(join_ack->msg, XML_ATTR_CRM_VERSION); crm_node_t *join_node = crm_get_peer(0, join_from); crm_debug("Processing req from %s", join_from); generation = join_ack->xml; crm_element_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id); if (max_generation_xml != NULL && generation != NULL) { int lpc = 0; const char *attributes[] = { XML_ATTR_GENERATION_ADMIN, XML_ATTR_GENERATION, XML_ATTR_NUMUPDATES, }; for (lpc = 0; cmp == 0 && lpc < DIMOF(attributes); lpc++) { cmp = compare_int_fields(max_generation_xml, generation, attributes[lpc]); } } if (join_id != current_join_id) { crm_debug("Invalid response from %s: join-%d vs. join-%d", join_from, join_id, current_join_id); check_join_state(cur_state, __FUNCTION__); return; } else if (join_node == NULL || crm_is_peer_active(join_node) == FALSE) { crm_err("Node %s is not a member", join_from); ack_nack_bool = FALSE; } else if (generation == NULL) { crm_err("Generation was NULL"); ack_nack_bool = FALSE; } else if ((join_version == NULL) || !feature_set_compatible(CRM_FEATURE_SET, join_version)) { crm_err("Node %s feature set (%s) is incompatible with ours (%s)", join_from, (join_version? join_version : "pre-3.1.0"), CRM_FEATURE_SET); ack_nack_bool = FALSE; } else if (max_generation_xml == NULL) { max_generation_xml = copy_xml(generation); max_generation_from = strdup(join_from); } else if (cmp < 0 || (cmp == 0 && safe_str_eq(join_from, fsa_our_uname))) { crm_debug("%s has a better generation number than" " the current max %s", join_from, max_generation_from); if (max_generation_xml) { crm_log_xml_debug(max_generation_xml, "Max generation"); } crm_log_xml_debug(generation, "Their generation"); free(max_generation_from); free_xml(max_generation_xml); max_generation_from = strdup(join_from); max_generation_xml = copy_xml(join_ack->xml); } if (ack_nack_bool == FALSE) { /* NACK this client */ ack_nack = CRMD_JOINSTATE_NACK; crm_update_peer_join(__FUNCTION__, join_node, crm_join_nack); crm_err("Rejecting cluster join request from %s " CRM_XS " NACK join-%d ref=%s", join_from, join_id, ref); } else { crm_debug("join-%d: Welcoming node %s (ref %s)", join_id, join_from, ref); crm_update_peer_join(__FUNCTION__, join_node, crm_join_integrated); } crm_update_peer_expected(__FUNCTION__, join_node, ack_nack); crm_debug("%u nodes have been integrated into join-%d", crmd_join_phase_count(crm_join_integrated), join_id); if (check_join_state(cur_state, __FUNCTION__) == FALSE) { // Don't waste time by invoking the scheduler yet crm_debug("join-%d: Still waiting on %d outstanding offers", join_id, crmd_join_phase_count(crm_join_welcomed)); } } /* A_DC_JOIN_FINALIZE */ void do_dc_join_finalize(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { char *sync_from = NULL; int rc = pcmk_ok; /* This we can do straight away and avoid clients timing us out * while we compute the latest CIB */ crm_debug("Finalizing join-%d for %d clients", current_join_id, crmd_join_phase_count(crm_join_integrated)); crmd_join_phase_log(LOG_INFO); if (crmd_join_phase_count(crm_join_welcomed) != 0) { crm_info("Waiting for %d more nodes", crmd_join_phase_count(crm_join_welcomed)); /* crmd_fsa_stall(FALSE); Needed? */ return; } else if (crmd_join_phase_count(crm_join_integrated) == 0) { /* Nothing to do */ check_join_state(fsa_state, __FUNCTION__); return; } clear_bit(fsa_input_register, R_HAVE_CIB); if (max_generation_from == NULL || safe_str_eq(max_generation_from, fsa_our_uname)) { set_bit(fsa_input_register, R_HAVE_CIB); } if (is_set(fsa_input_register, R_IN_TRANSITION)) { crm_warn("Delaying response to cluster join offer while transition in progress " CRM_XS " join-%d", current_join_id); crmd_fsa_stall(FALSE); return; } if (max_generation_from && is_set(fsa_input_register, R_HAVE_CIB) == FALSE) { /* ask for the agreed best CIB */ sync_from = strdup(max_generation_from); set_bit(fsa_input_register, R_CIB_ASKED); crm_notice("Syncing the Cluster Information Base from %s to rest of cluster " CRM_XS " join-%d", sync_from, current_join_id); crm_log_xml_notice(max_generation_xml, "Requested version"); } else { /* Send _our_ CIB out to everyone */ sync_from = strdup(fsa_our_uname); crm_info("join-%d: Syncing our CIB to the rest of the cluster", current_join_id); crm_log_xml_debug(max_generation_xml, "Requested version"); } rc = fsa_cib_conn->cmds->sync_from(fsa_cib_conn, sync_from, NULL, cib_quorum_override); fsa_register_cib_callback(rc, FALSE, sync_from, finalize_sync_callback); } void finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { CRM_LOG_ASSERT(-EPERM != rc); clear_bit(fsa_input_register, R_CIB_ASKED); if (rc != pcmk_ok) { do_crm_log((rc == -pcmk_err_old_data ? LOG_WARNING : LOG_ERR), "Sync from %s failed: %s", (char *)user_data, pcmk_strerror(rc)); /* restart the whole join process */ register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION_DC, NULL, NULL, __FUNCTION__); } else if (AM_I_DC && fsa_state == S_FINALIZE_JOIN) { set_bit(fsa_input_register, R_HAVE_CIB); clear_bit(fsa_input_register, R_CIB_ASKED); /* make sure dc_uuid is re-set to us */ if (check_join_state(fsa_state, __FUNCTION__) == FALSE) { crm_debug("Notifying %d clients of join-%d results", crmd_join_phase_count(crm_join_integrated), current_join_id); g_hash_table_foreach(crm_peer_cache, finalize_join_for, NULL); } } else { crm_debug("No longer the DC in S_FINALIZE_JOIN: %s in %s", AM_I_DC ? "DC" : "controller", fsa_state2string(fsa_state)); } } static void join_update_complete_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { fsa_data_t *msg_data = NULL; if (rc == pcmk_ok) { crm_debug("Join update %d complete", call_id); check_join_state(fsa_state, __FUNCTION__); } else { crm_err("Join update %d failed", call_id); crm_log_xml_debug(msg, "failed"); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } /* A_DC_JOIN_PROCESS_ACK */ void do_dc_join_ack(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { int join_id = -1; int call_id = 0; ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg); const char *op = crm_element_value(join_ack->msg, F_CRM_TASK); const char *join_from = crm_element_value(join_ack->msg, F_CRM_HOST_FROM); crm_node_t *peer = crm_get_peer(0, join_from); if (safe_str_neq(op, CRM_OP_JOIN_CONFIRM) || peer == NULL) { crm_debug("Ignoring op=%s message from %s", op, join_from); return; } crm_trace("Processing ack from %s", join_from); crm_element_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id); if (peer->join != crm_join_finalized) { crm_info("Join not in progress: ignoring join-%d from %s (phase = %d)", join_id, join_from, peer->join); return; } else if (join_id != current_join_id) { crm_err("Invalid response from %s: join-%d vs. join-%d", join_from, join_id, current_join_id); crm_update_peer_join(__FUNCTION__, peer, crm_join_nack); return; } crm_update_peer_join(__FUNCTION__, peer, crm_join_confirmed); crm_info("join-%d: Updating node state to %s for %s", join_id, CRMD_JOINSTATE_MEMBER, join_from); /* update CIB with the current LRM status from the node * We don't need to notify the TE of these updates, a transition will * be started in due time */ erase_status_tag(join_from, XML_CIB_TAG_LRM, cib_scope_local); if (safe_str_eq(join_from, fsa_our_uname)) { xmlNode *now_dc_lrmd_state = do_lrm_query(TRUE, fsa_our_uname); if (now_dc_lrmd_state != NULL) { crm_debug("Local executor state updated from query"); fsa_cib_update(XML_CIB_TAG_STATUS, now_dc_lrmd_state, cib_scope_local | cib_quorum_override | cib_can_create, call_id, NULL); free_xml(now_dc_lrmd_state); } else { crm_warn("Local executor state updated from join acknowledgement because query failed"); fsa_cib_update(XML_CIB_TAG_STATUS, join_ack->xml, cib_scope_local | cib_quorum_override | cib_can_create, call_id, NULL); } } else { crm_debug("Executor state for %s updated from join acknowledgement", join_from); fsa_cib_update(XML_CIB_TAG_STATUS, join_ack->xml, cib_scope_local | cib_quorum_override | cib_can_create, call_id, NULL); } fsa_register_cib_callback(call_id, FALSE, NULL, join_update_complete_callback); crm_debug("join-%d: Registered callback for CIB status update %d", join_id, call_id); } void finalize_join_for(gpointer key, gpointer value, gpointer user_data) { xmlNode *acknak = NULL; xmlNode *tmp1 = NULL; crm_node_t *join_node = value; const char *join_to = join_node->uname; if(join_node->join != crm_join_integrated) { crm_trace("Skipping %s in state %d", join_to, join_node->join); return; } /* make sure a node entry exists for the new node */ crm_trace("Creating node entry for %s", join_to); tmp1 = create_xml_node(NULL, XML_CIB_TAG_NODE); set_uuid(tmp1, XML_ATTR_UUID, join_node); crm_xml_add(tmp1, XML_ATTR_UNAME, join_to); fsa_cib_anon_update(XML_CIB_TAG_NODES, tmp1); free_xml(tmp1); join_node = crm_get_peer(0, join_to); if (crm_is_peer_active(join_node) == FALSE) { /* * NACK'ing nodes that the membership layer doesn't know about yet * simply creates more churn * * Better to leave them waiting and let the join restart when * the new membership event comes in * * All other NACKs (due to versions etc) should still be processed */ crm_update_peer_expected(__FUNCTION__, join_node, CRMD_JOINSTATE_PENDING); return; } /* send the ack/nack to the node */ acknak = create_dc_message(CRM_OP_JOIN_ACKNAK, join_to); crm_debug("join-%d: ACK'ing join request from %s", current_join_id, join_to); crm_xml_add(acknak, CRM_OP_JOIN_ACKNAK, XML_BOOLEAN_TRUE); crm_update_peer_join(__FUNCTION__, join_node, crm_join_finalized); crm_update_peer_expected(__FUNCTION__, join_node, CRMD_JOINSTATE_MEMBER); send_cluster_message(crm_get_peer(0, join_to), crm_msg_crmd, acknak, TRUE); free_xml(acknak); return; } gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source) { static unsigned long long highest_seq = 0; crm_debug("Invoked by %s in state: %s", source, fsa_state2string(cur_state)); if (saved_ccm_membership_id != crm_peer_seq) { crm_debug("%s: Membership changed since join started: %llu -> %llu (%llu)", source, saved_ccm_membership_id, crm_peer_seq, highest_seq); if(highest_seq < crm_peer_seq) { /* Don't spam the FSA with duplicates */ highest_seq = crm_peer_seq; register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL); } } else if (cur_state == S_INTEGRATION) { if (crmd_join_phase_count(crm_join_welcomed) == 0) { crm_debug("join-%d: Integration of %d peers complete: %s", current_join_id, crmd_join_phase_count(crm_join_integrated), source); register_fsa_input_before(C_FSA_INTERNAL, I_INTEGRATED, NULL); return TRUE; } } else if (cur_state == S_FINALIZE_JOIN) { if (is_set(fsa_input_register, R_HAVE_CIB) == FALSE) { crm_debug("join-%d: Delaying I_FINALIZED until we have the CIB", current_join_id); return TRUE; } else if (crmd_join_phase_count(crm_join_welcomed) != 0) { crm_debug("join-%d: Still waiting on %d welcomed nodes", current_join_id, crmd_join_phase_count(crm_join_welcomed)); crmd_join_phase_log(LOG_DEBUG); } else if (crmd_join_phase_count(crm_join_integrated) != 0) { crm_debug("join-%d: Still waiting on %d integrated nodes", current_join_id, crmd_join_phase_count(crm_join_integrated)); crmd_join_phase_log(LOG_DEBUG); } else if (crmd_join_phase_count(crm_join_finalized) != 0) { crm_debug("join-%d: Still waiting on %d finalized nodes", current_join_id, crmd_join_phase_count(crm_join_finalized)); crmd_join_phase_log(LOG_DEBUG); } else { crm_debug("join-%d complete: %s", current_join_id, source); register_fsa_input_later(C_FSA_INTERNAL, I_FINALIZED, NULL); return TRUE; } } return FALSE; } void do_dc_join_final(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { crm_debug("Ensuring DC, quorum and node attributes are up-to-date"); crm_update_quorum(crm_have_quorum, TRUE); } int crmd_join_phase_count(enum crm_join_phase phase) { int count = 0; crm_node_t *peer; GHashTableIter iter; g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) { if(peer->join == phase) { count++; } } return count; } void crmd_join_phase_log(int level) { crm_node_t *peer; GHashTableIter iter; g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) { do_crm_log(level, "join-%d: %s=%s", current_join_id, peer->uname, crm_join_phase_str(peer->join)); } } diff --git a/daemons/controld/controld_schedulerd.c b/daemons/controld/controld_schedulerd.c index 4b76355c8d..e19102bb1e 100644 --- a/daemons/controld/controld_schedulerd.c +++ b/daemons/controld/controld_schedulerd.c @@ -1,468 +1,468 @@ /* * Copyright 2004-2019 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include /* pid_t, sleep, ssize_t */ #include #include #include #include #include #include static mainloop_io_t *pe_subsystem = NULL; /*! * \internal * \brief Close any scheduler connection and free associated memory */ void pe_subsystem_free(void) { clear_bit(fsa_input_register, R_PE_REQUIRED); if (pe_subsystem) { controld_expect_sched_reply(NULL); mainloop_del_ipc_client(pe_subsystem); pe_subsystem = NULL; clear_bit(fsa_input_register, R_PE_CONNECTED); } } /*! * \internal * \brief Save CIB query result to file, raising FSA error * * \param[in] msg Ignored * \param[in] call_id Call ID of CIB query * \param[in] rc Return code of CIB query * \param[in] output Result of CIB query * \param[in] user_data Unique identifier for filename (will be freed) * * \note This is intended to be called after a scheduler connection fails. */ static void save_cib_contents(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) { char *id = user_data; register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __FUNCTION__); CRM_CHECK(id != NULL, return); if (rc == pcmk_ok) { char *filename = crm_strdup_printf(PE_STATE_DIR "/pe-core-%s.bz2", id); if (write_xml_file(output, filename, TRUE) < 0) { crm_err("Could not save Cluster Information Base to %s after scheduler crash", filename); } else { crm_notice("Saved Cluster Information Base to %s after scheduler crash", filename); } free(filename); } } /*! * \internal * \brief Respond to scheduler connection failure * * \param[in] user_data Ignored */ static void pe_ipc_destroy(gpointer user_data) { // If we aren't connected to the scheduler, we can't expect a reply controld_expect_sched_reply(NULL); if (is_set(fsa_input_register, R_PE_REQUIRED)) { int rc = pcmk_ok; char *uuid_str = crm_generate_uuid(); crm_crit("Connection to the scheduler failed " CRM_XS " uuid=%s", uuid_str); /* * The scheduler died... * * Save the current CIB so that we have a chance of * figuring out what killed it. * * Delay raising the I_ERROR until the query below completes or * 5s is up, whichever comes first. * */ rc = fsa_cib_conn->cmds->query(fsa_cib_conn, NULL, NULL, cib_scope_local); fsa_register_cib_callback(rc, FALSE, uuid_str, save_cib_contents); } else { crm_info("Connection to the scheduler released"); } clear_bit(fsa_input_register, R_PE_CONNECTED); pe_subsystem = NULL; mainloop_set_trigger(fsa_source); return; } /*! * \internal * \brief Handle message from scheduler connection * * \param[in] buffer XML message (will be freed) * \param[in] length Ignored * \param[in] userdata Ignored * * \return 0 */ static int pe_ipc_dispatch(const char *buffer, ssize_t length, gpointer userdata) { xmlNode *msg = string2xml(buffer); if (msg) { route_message(C_IPC_MESSAGE, msg); } free_xml(msg); return 0; } /*! * \internal * \brief Make new connection to scheduler * * \return TRUE on success, FALSE otherwise */ static bool pe_subsystem_new() { static struct ipc_client_callbacks pe_callbacks = { .dispatch = pe_ipc_dispatch, .destroy = pe_ipc_destroy }; set_bit(fsa_input_register, R_PE_REQUIRED); pe_subsystem = mainloop_add_ipc_client(CRM_SYSTEM_PENGINE, G_PRIORITY_DEFAULT, 5 * 1024 * 1024 /* 5MB */, NULL, &pe_callbacks); if (pe_subsystem == NULL) { return FALSE; } set_bit(fsa_input_register, R_PE_CONNECTED); return TRUE; } /*! * \internal - * \brief Send an XML message to the PE + * \brief Send an XML message to the scheduler * * \param[in] cmd XML message to send * * \return pcmk_ok on success, -errno otherwise */ static int pe_subsystem_send(xmlNode *cmd) { if (pe_subsystem) { int sent = crm_ipc_send(mainloop_get_ipc_client(pe_subsystem), cmd, 0, 0, NULL); if (sent == 0) { sent = -ENODATA; } else if (sent > 0) { sent = pcmk_ok; } return sent; } return -ENOTCONN; } static void do_pe_invoke_callback(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data); /* A_PE_START, A_PE_STOP, O_PE_RESTART */ void do_pe_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { if (action & A_PE_STOP) { pe_subsystem_free(); } if ((action & A_PE_START) && (is_not_set(fsa_input_register, R_PE_CONNECTED))) { if (cur_state == S_STOPPING) { crm_info("Ignoring request to connect to scheduler while shutting down"); } else if (!pe_subsystem_new()) { crm_warn("Could not connect to scheduler"); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } } } int fsa_pe_query = 0; char *fsa_pe_ref = NULL; static mainloop_timer_t *controld_sched_timer = NULL; // @TODO Make this a configurable cluster option if there's demand for it #define SCHED_TIMEOUT_MS (120000) /*! * \internal * \brief Handle a timeout waiting for scheduler reply * * \param[in] user_data Ignored * * \return FALSE (indicating that timer should not be restarted) */ static gboolean controld_sched_timeout(gpointer user_data) { if (AM_I_DC) { /* If this node is the DC but can't communicate with the scheduler, just * exit (and likely get fenced) so this node doesn't interfere with any * further DC elections. * * @TODO We could try something less drastic first, like disconnecting * and reconnecting to the scheduler, but something is likely going * seriously wrong, so perhaps it's better to just fail as quickly as * possible. */ crmd_exit(CRM_EX_FATAL); } return FALSE; } void controld_stop_sched_timer() { if (controld_sched_timer && fsa_pe_ref) { crm_trace("Stopping timer for scheduler reply %s", fsa_pe_ref); } mainloop_timer_stop(controld_sched_timer); } /*! * \internal * \brief Set the scheduler request currently being waited on * * \param[in] msg Request to expect reply to (or NULL for none) */ void controld_expect_sched_reply(xmlNode *msg) { char *ref = NULL; if (msg) { ref = crm_element_value_copy(msg, XML_ATTR_REFERENCE); CRM_ASSERT(ref != NULL); if (controld_sched_timer == NULL) { controld_sched_timer = mainloop_timer_add("scheduler_reply_timer", SCHED_TIMEOUT_MS, FALSE, controld_sched_timeout, NULL); } mainloop_timer_start(controld_sched_timer); } else { controld_stop_sched_timer(); } free(fsa_pe_ref); fsa_pe_ref = ref; } /*! * \internal * \brief Free the scheduler reply timer */ void controld_free_sched_timer() { if (controld_sched_timer != NULL) { mainloop_timer_del(controld_sched_timer); controld_sched_timer = NULL; } } /* A_PE_INVOKE */ void do_pe_invoke(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { if (AM_I_DC == FALSE) { crm_err("Not invoking scheduler because not DC: %s", fsa_action2string(action)); return; } if (is_set(fsa_input_register, R_PE_CONNECTED) == FALSE) { if (is_set(fsa_input_register, R_SHUTDOWN)) { crm_err("Cannot shut down gracefully without the scheduler"); register_fsa_input_before(C_FSA_INTERNAL, I_TERMINATE, NULL); } else { crm_info("Waiting for the scheduler to connect"); crmd_fsa_stall(FALSE); register_fsa_action(A_PE_START); } return; } if (cur_state != S_POLICY_ENGINE) { crm_notice("Not invoking scheduler because in state %s", fsa_state2string(cur_state)); return; } if (is_set(fsa_input_register, R_HAVE_CIB) == FALSE) { crm_err("Attempted to invoke scheduler without consistent Cluster Information Base!"); /* start the join from scratch */ register_fsa_input_before(C_FSA_INTERNAL, I_ELECTION, NULL); return; } fsa_pe_query = fsa_cib_conn->cmds->query(fsa_cib_conn, NULL, NULL, cib_scope_local); crm_debug("Query %d: Requesting the current CIB: %s", fsa_pe_query, fsa_state2string(fsa_state)); controld_expect_sched_reply(NULL); fsa_register_cib_callback(fsa_pe_query, FALSE, NULL, do_pe_invoke_callback); } static void force_local_option(xmlNode *xml, const char *attr_name, const char *attr_value) { int max = 0; int lpc = 0; char *xpath_string = NULL; xmlXPathObjectPtr xpathObj = NULL; xpath_string = crm_strdup_printf("%.128s//%s//nvpair[@name='%.128s']", get_object_path(XML_CIB_TAG_CRMCONFIG), XML_CIB_TAG_PROPSET, attr_name); xpathObj = xpath_search(xml, xpath_string); max = numXpathResults(xpathObj); free(xpath_string); for (lpc = 0; lpc < max; lpc++) { xmlNode *match = getXpathResult(xpathObj, lpc); crm_trace("Forcing %s/%s = %s", ID(match), attr_name, attr_value); crm_xml_add(match, XML_NVPAIR_ATTR_VALUE, attr_value); } if(max == 0) { xmlNode *configuration = NULL; xmlNode *crm_config = NULL; xmlNode *cluster_property_set = NULL; crm_trace("Creating %s-%s for %s=%s", CIB_OPTIONS_FIRST, attr_name, attr_name, attr_value); configuration = find_entity(xml, XML_CIB_TAG_CONFIGURATION, NULL); if (configuration == NULL) { configuration = create_xml_node(xml, XML_CIB_TAG_CONFIGURATION); } crm_config = find_entity(configuration, XML_CIB_TAG_CRMCONFIG, NULL); if (crm_config == NULL) { crm_config = create_xml_node(configuration, XML_CIB_TAG_CRMCONFIG); } cluster_property_set = find_entity(crm_config, XML_CIB_TAG_PROPSET, NULL); if (cluster_property_set == NULL) { cluster_property_set = create_xml_node(crm_config, XML_CIB_TAG_PROPSET); crm_xml_add(cluster_property_set, XML_ATTR_ID, CIB_OPTIONS_FIRST); } xml = create_xml_node(cluster_property_set, XML_CIB_TAG_NVPAIR); crm_xml_set_id(xml, "%s-%s", CIB_OPTIONS_FIRST, attr_name); crm_xml_add(xml, XML_NVPAIR_ATTR_NAME, attr_name); crm_xml_add(xml, XML_NVPAIR_ATTR_VALUE, attr_value); } freeXpathObject(xpathObj); } static void do_pe_invoke_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { xmlNode *cmd = NULL; pid_t watchdog = pcmk_locate_sbd(); if (rc != pcmk_ok) { crm_err("Could not retrieve the Cluster Information Base: %s " CRM_XS " rc=%d call=%d", pcmk_strerror(rc), rc, call_id); register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __FUNCTION__); return; } else if (call_id != fsa_pe_query) { crm_trace("Skipping superseded CIB query: %d (current=%d)", call_id, fsa_pe_query); return; } else if (AM_I_DC == FALSE || is_set(fsa_input_register, R_PE_CONNECTED) == FALSE) { crm_debug("No need to invoke the scheduler anymore"); return; } else if (fsa_state != S_POLICY_ENGINE) { crm_debug("Discarding scheduler request in state: %s", fsa_state2string(fsa_state)); return; /* this callback counts as 1 */ } else if (num_cib_op_callbacks() > 1) { crm_debug("Re-asking for the CIB: %d other peer updates still pending", (num_cib_op_callbacks() - 1)); sleep(1); register_fsa_action(A_PE_INVOKE); return; } else if (fsa_state != S_POLICY_ENGINE) { crm_err("Invoking scheduler in state: %s", fsa_state2string(fsa_state)); return; } CRM_LOG_ASSERT(output != NULL); /* Refresh the remote node cache and the known node cache when the * scheduler is invoked */ crm_peer_caches_refresh(output); crm_xml_add(output, XML_ATTR_DC_UUID, fsa_our_uuid); crm_xml_add_int(output, XML_ATTR_HAVE_QUORUM, fsa_has_quorum); force_local_option(output, XML_ATTR_HAVE_WATCHDOG, watchdog?"true":"false"); if (ever_had_quorum && crm_have_quorum == FALSE) { crm_xml_add_int(output, XML_ATTR_QUORUM_PANIC, 1); } cmd = create_request(CRM_OP_PECALC, output, NULL, CRM_SYSTEM_PENGINE, CRM_SYSTEM_DC, NULL); rc = pe_subsystem_send(cmd); if (rc < 0) { crm_err("Could not contact the scheduler: %s " CRM_XS " rc=%d", pcmk_strerror(rc), rc); register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __FUNCTION__); } else { controld_expect_sched_reply(cmd); crm_debug("Invoking the scheduler: query=%d, ref=%s, seq=%llu, quorate=%d", fsa_pe_query, fsa_pe_ref, crm_peer_seq, fsa_has_quorum); } free_xml(cmd); } diff --git a/lib/pengine/bundle.c b/lib/pengine/bundle.c index ebfd58582d..fa8d778730 100644 --- a/lib/pengine/bundle.c +++ b/lib/pengine/bundle.c @@ -1,1939 +1,1939 @@ /* * Copyright 2004-2019 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #define PE__VARIANT_BUNDLE 1 #include "./variant.h" static char * next_ip(const char *last_ip) { unsigned int oct1 = 0; unsigned int oct2 = 0; unsigned int oct3 = 0; unsigned int oct4 = 0; int rc = sscanf(last_ip, "%u.%u.%u.%u", &oct1, &oct2, &oct3, &oct4); if (rc != 4) { /*@ TODO check for IPv6 */ return NULL; } else if (oct3 > 253) { return NULL; } else if (oct4 > 253) { ++oct3; oct4 = 1; } else { ++oct4; } return crm_strdup_printf("%u.%u.%u.%u", oct1, oct2, oct3, oct4); } static int allocate_ip(pe__bundle_variant_data_t *data, pe__bundle_replica_t *replica, char *buffer, int max) { if(data->ip_range_start == NULL) { return 0; } else if(data->ip_last) { replica->ipaddr = next_ip(data->ip_last); } else { replica->ipaddr = strdup(data->ip_range_start); } data->ip_last = replica->ipaddr; switch (data->agent_type) { case PE__CONTAINER_AGENT_DOCKER: case PE__CONTAINER_AGENT_PODMAN: if (data->add_host) { return snprintf(buffer, max, " --add-host=%s-%d:%s", data->prefix, replica->offset, replica->ipaddr); } case PE__CONTAINER_AGENT_RKT: return snprintf(buffer, max, " --hosts-entry=%s=%s-%d", replica->ipaddr, data->prefix, replica->offset); default: // PE__CONTAINER_AGENT_UNKNOWN break; } return 0; } static xmlNode * create_resource(const char *name, const char *provider, const char *kind) { xmlNode *rsc = create_xml_node(NULL, XML_CIB_TAG_RESOURCE); crm_xml_add(rsc, XML_ATTR_ID, name); crm_xml_add(rsc, XML_AGENT_ATTR_CLASS, PCMK_RESOURCE_CLASS_OCF); crm_xml_add(rsc, XML_AGENT_ATTR_PROVIDER, provider); crm_xml_add(rsc, XML_ATTR_TYPE, kind); return rsc; } /*! * \internal * \brief Check whether cluster can manage resource inside container * * \param[in] data Container variant data * * \return TRUE if networking configuration is acceptable, FALSE otherwise * * \note The resource is manageable if an IP range or control port has been * specified. If a control port is used without an IP range, replicas per * host must be 1. */ static bool valid_network(pe__bundle_variant_data_t *data) { if(data->ip_range_start) { return TRUE; } if(data->control_port) { if(data->nreplicas_per_host > 1) { pe_err("Specifying the 'control-port' for %s requires 'replicas-per-host=1'", data->prefix); data->nreplicas_per_host = 1; /* @TODO to be sure: clear_bit(rsc->flags, pe_rsc_unique); */ } return TRUE; } return FALSE; } static bool create_ip_resource(pe_resource_t *parent, pe__bundle_variant_data_t *data, pe__bundle_replica_t *replica, pe_working_set_t *data_set) { if(data->ip_range_start) { char *id = NULL; xmlNode *xml_ip = NULL; xmlNode *xml_obj = NULL; id = crm_strdup_printf("%s-ip-%s", data->prefix, replica->ipaddr); crm_xml_sanitize_id(id); xml_ip = create_resource(id, "heartbeat", "IPaddr2"); free(id); xml_obj = create_xml_node(xml_ip, XML_TAG_ATTR_SETS); crm_xml_set_id(xml_obj, "%s-attributes-%d", data->prefix, replica->offset); crm_create_nvpair_xml(xml_obj, NULL, "ip", replica->ipaddr); if(data->host_network) { crm_create_nvpair_xml(xml_obj, NULL, "nic", data->host_network); } if(data->host_netmask) { crm_create_nvpair_xml(xml_obj, NULL, "cidr_netmask", data->host_netmask); } else { crm_create_nvpair_xml(xml_obj, NULL, "cidr_netmask", "32"); } xml_obj = create_xml_node(xml_ip, "operations"); crm_create_op_xml(xml_obj, ID(xml_ip), "monitor", "60s", NULL); // TODO: Other ops? Timeouts and intervals from underlying resource? if (!common_unpack(xml_ip, &replica->ip, parent, data_set)) { return FALSE; } parent->children = g_list_append(parent->children, replica->ip); } return TRUE; } static bool create_docker_resource(pe_resource_t *parent, pe__bundle_variant_data_t *data, pe__bundle_replica_t *replica, pe_working_set_t *data_set) { int offset = 0, max = 4096; char *buffer = calloc(1, max+1); int doffset = 0, dmax = 1024; char *dbuffer = calloc(1, dmax+1); char *id = NULL; xmlNode *xml_container = NULL; xmlNode *xml_obj = NULL; id = crm_strdup_printf("%s-docker-%d", data->prefix, replica->offset); crm_xml_sanitize_id(id); xml_container = create_resource(id, "heartbeat", PE__CONTAINER_AGENT_DOCKER_S); free(id); xml_obj = create_xml_node(xml_container, XML_TAG_ATTR_SETS); crm_xml_set_id(xml_obj, "%s-attributes-%d", data->prefix, replica->offset); crm_create_nvpair_xml(xml_obj, NULL, "image", data->image); crm_create_nvpair_xml(xml_obj, NULL, "allow_pull", XML_BOOLEAN_TRUE); crm_create_nvpair_xml(xml_obj, NULL, "force_kill", XML_BOOLEAN_FALSE); crm_create_nvpair_xml(xml_obj, NULL, "reuse", XML_BOOLEAN_FALSE); offset += snprintf(buffer+offset, max-offset, " --restart=no"); /* Set a container hostname only if we have an IP to map it to. * The user can set -h or --uts=host themselves if they want a nicer * name for logs, but this makes applications happy who need their * hostname to match the IP they bind to. */ if (data->ip_range_start != NULL) { offset += snprintf(buffer+offset, max-offset, " -h %s-%d", data->prefix, replica->offset); } offset += snprintf(buffer+offset, max-offset, " -e PCMK_stderr=1"); if (data->container_network) { #if 0 offset += snprintf(buffer+offset, max-offset, " --link-local-ip=%s", replica->ipaddr); #endif offset += snprintf(buffer+offset, max-offset, " --net=%s", data->container_network); } if(data->control_port) { offset += snprintf(buffer+offset, max-offset, " -e PCMK_remote_port=%s", data->control_port); } else { offset += snprintf(buffer+offset, max-offset, " -e PCMK_remote_port=%d", DEFAULT_REMOTE_PORT); } for(GListPtr pIter = data->mounts; pIter != NULL; pIter = pIter->next) { pe__bundle_mount_t *mount = pIter->data; if (is_set(mount->flags, pe__bundle_mount_subdir)) { char *source = crm_strdup_printf( "%s/%s-%d", mount->source, data->prefix, replica->offset); if(doffset > 0) { doffset += snprintf(dbuffer+doffset, dmax-doffset, ","); } doffset += snprintf(dbuffer+doffset, dmax-doffset, "%s", source); offset += snprintf(buffer+offset, max-offset, " -v %s:%s", source, mount->target); free(source); } else { offset += snprintf(buffer+offset, max-offset, " -v %s:%s", mount->source, mount->target); } if(mount->options) { offset += snprintf(buffer+offset, max-offset, ":%s", mount->options); } } for(GListPtr pIter = data->ports; pIter != NULL; pIter = pIter->next) { pe__bundle_port_t *port = pIter->data; if (replica->ipaddr) { offset += snprintf(buffer+offset, max-offset, " -p %s:%s:%s", replica->ipaddr, port->source, port->target); } else if(safe_str_neq(data->container_network, "host")) { // No need to do port mapping if net=host offset += snprintf(buffer+offset, max-offset, " -p %s:%s", port->source, port->target); } } if (data->launcher_options) { offset += snprintf(buffer+offset, max-offset, " %s", data->launcher_options); } if (data->container_host_options) { offset += snprintf(buffer + offset, max - offset, " %s", data->container_host_options); } crm_create_nvpair_xml(xml_obj, NULL, "run_opts", buffer); free(buffer); crm_create_nvpair_xml(xml_obj, NULL, "mount_points", dbuffer); free(dbuffer); if (replica->child) { if (data->container_command) { crm_create_nvpair_xml(xml_obj, NULL, "run_cmd", data->container_command); } else { crm_create_nvpair_xml(xml_obj, NULL, "run_cmd", SBIN_DIR "/pacemaker-remoted"); } /* TODO: Allow users to specify their own? * * We just want to know if the container is alive, we'll * monitor the child independently */ crm_create_nvpair_xml(xml_obj, NULL, "monitor_cmd", "/bin/true"); /* } else if(child && data->untrusted) { * Support this use-case? * * The ability to have resources started/stopped by us, but * unable to set attributes, etc. * * Arguably better to control API access this with ACLs like * "normal" remote nodes * * crm_create_nvpair_xml(xml_obj, NULL, * "run_cmd", * "/usr/libexec/pacemaker/pacemaker-execd"); * crm_create_nvpair_xml(xml_obj, NULL, "monitor_cmd", * "/usr/libexec/pacemaker/lrmd_internal_ctl -c poke"); */ } else { if (data->container_command) { crm_create_nvpair_xml(xml_obj, NULL, "run_cmd", data->container_command); } /* TODO: Allow users to specify their own? * * We don't know what's in the container, so we just want * to know if it is alive */ crm_create_nvpair_xml(xml_obj, NULL, "monitor_cmd", "/bin/true"); } xml_obj = create_xml_node(xml_container, "operations"); crm_create_op_xml(xml_obj, ID(xml_container), "monitor", "60s", NULL); // TODO: Other ops? Timeouts and intervals from underlying resource? if (!common_unpack(xml_container, &replica->container, parent, data_set)) { return FALSE; } parent->children = g_list_append(parent->children, replica->container); return TRUE; } static bool create_podman_resource(pe_resource_t *parent, pe__bundle_variant_data_t *data, pe__bundle_replica_t *replica, pe_working_set_t *data_set) { int offset = 0, max = 4096; char *buffer = calloc(1, max+1); int doffset = 0, dmax = 1024; char *dbuffer = calloc(1, dmax+1); char *id = NULL; xmlNode *xml_container = NULL; xmlNode *xml_obj = NULL; id = crm_strdup_printf("%s-podman-%d", data->prefix, replica->offset); crm_xml_sanitize_id(id); xml_container = create_resource(id, "heartbeat", PE__CONTAINER_AGENT_PODMAN_S); free(id); xml_obj = create_xml_node(xml_container, XML_TAG_ATTR_SETS); crm_xml_set_id(xml_obj, "%s-attributes-%d", data->prefix, replica->offset); crm_create_nvpair_xml(xml_obj, NULL, "image", data->image); crm_create_nvpair_xml(xml_obj, NULL, "allow_pull", XML_BOOLEAN_TRUE); crm_create_nvpair_xml(xml_obj, NULL, "force_kill", XML_BOOLEAN_FALSE); crm_create_nvpair_xml(xml_obj, NULL, "reuse", XML_BOOLEAN_FALSE); // FIXME: (bandini 2018-08) podman has no restart policies //offset += snprintf(buffer+offset, max-offset, " --restart=no"); /* Set a container hostname only if we have an IP to map it to. * The user can set -h or --uts=host themselves if they want a nicer * name for logs, but this makes applications happy who need their * hostname to match the IP they bind to. */ if (data->ip_range_start != NULL) { offset += snprintf(buffer+offset, max-offset, " -h %s-%d", data->prefix, replica->offset); } offset += snprintf(buffer+offset, max-offset, " -e PCMK_stderr=1"); if (data->container_network) { #if 0 // podman has no support for --link-local-ip offset += snprintf(buffer+offset, max-offset, " --link-local-ip=%s", replica->ipaddr); #endif offset += snprintf(buffer+offset, max-offset, " --net=%s", data->container_network); } if(data->control_port) { offset += snprintf(buffer+offset, max-offset, " -e PCMK_remote_port=%s", data->control_port); } else { offset += snprintf(buffer+offset, max-offset, " -e PCMK_remote_port=%d", DEFAULT_REMOTE_PORT); } for(GListPtr pIter = data->mounts; pIter != NULL; pIter = pIter->next) { pe__bundle_mount_t *mount = pIter->data; if (is_set(mount->flags, pe__bundle_mount_subdir)) { char *source = crm_strdup_printf( "%s/%s-%d", mount->source, data->prefix, replica->offset); if(doffset > 0) { doffset += snprintf(dbuffer+doffset, dmax-doffset, ","); } doffset += snprintf(dbuffer+doffset, dmax-doffset, "%s", source); offset += snprintf(buffer+offset, max-offset, " -v %s:%s", source, mount->target); free(source); } else { offset += snprintf(buffer+offset, max-offset, " -v %s:%s", mount->source, mount->target); } if(mount->options) { offset += snprintf(buffer+offset, max-offset, ":%s", mount->options); } } for(GListPtr pIter = data->ports; pIter != NULL; pIter = pIter->next) { pe__bundle_port_t *port = pIter->data; if (replica->ipaddr) { offset += snprintf(buffer+offset, max-offset, " -p %s:%s:%s", replica->ipaddr, port->source, port->target); } else if(safe_str_neq(data->container_network, "host")) { // No need to do port mapping if net=host offset += snprintf(buffer+offset, max-offset, " -p %s:%s", port->source, port->target); } } if (data->launcher_options) { offset += snprintf(buffer+offset, max-offset, " %s", data->launcher_options); } if (data->container_host_options) { offset += snprintf(buffer + offset, max - offset, " %s", data->container_host_options); } crm_create_nvpair_xml(xml_obj, NULL, "run_opts", buffer); free(buffer); crm_create_nvpair_xml(xml_obj, NULL, "mount_points", dbuffer); free(dbuffer); if (replica->child) { if (data->container_command) { crm_create_nvpair_xml(xml_obj, NULL, "run_cmd", data->container_command); } else { crm_create_nvpair_xml(xml_obj, NULL, "run_cmd", SBIN_DIR "/pacemaker-remoted"); } /* TODO: Allow users to specify their own? * * We just want to know if the container is alive, we'll * monitor the child independently */ crm_create_nvpair_xml(xml_obj, NULL, "monitor_cmd", "/bin/true"); /* } else if(child && data->untrusted) { * Support this use-case? * * The ability to have resources started/stopped by us, but * unable to set attributes, etc. * * Arguably better to control API access this with ACLs like * "normal" remote nodes * * crm_create_nvpair_xml(xml_obj, NULL, * "run_cmd", * "/usr/libexec/pacemaker/pacemaker-execd"); * crm_create_nvpair_xml(xml_obj, NULL, "monitor_cmd", * "/usr/libexec/pacemaker/lrmd_internal_ctl -c poke"); */ } else { if (data->container_command) { crm_create_nvpair_xml(xml_obj, NULL, "run_cmd", data->container_command); } /* TODO: Allow users to specify their own? * * We don't know what's in the container, so we just want * to know if it is alive */ crm_create_nvpair_xml(xml_obj, NULL, "monitor_cmd", "/bin/true"); } xml_obj = create_xml_node(xml_container, "operations"); crm_create_op_xml(xml_obj, ID(xml_container), "monitor", "60s", NULL); // TODO: Other ops? Timeouts and intervals from underlying resource? if (!common_unpack(xml_container, &replica->container, parent, data_set)) { return FALSE; } parent->children = g_list_append(parent->children, replica->container); return TRUE; } static bool create_rkt_resource(pe_resource_t *parent, pe__bundle_variant_data_t *data, pe__bundle_replica_t *replica, pe_working_set_t *data_set) { int offset = 0, max = 4096; char *buffer = calloc(1, max+1); int doffset = 0, dmax = 1024; char *dbuffer = calloc(1, dmax+1); char *id = NULL; xmlNode *xml_container = NULL; xmlNode *xml_obj = NULL; int volid = 0; id = crm_strdup_printf("%s-rkt-%d", data->prefix, replica->offset); crm_xml_sanitize_id(id); xml_container = create_resource(id, "heartbeat", PE__CONTAINER_AGENT_RKT_S); free(id); xml_obj = create_xml_node(xml_container, XML_TAG_ATTR_SETS); crm_xml_set_id(xml_obj, "%s-attributes-%d", data->prefix, replica->offset); crm_create_nvpair_xml(xml_obj, NULL, "image", data->image); crm_create_nvpair_xml(xml_obj, NULL, "allow_pull", "true"); crm_create_nvpair_xml(xml_obj, NULL, "force_kill", "false"); crm_create_nvpair_xml(xml_obj, NULL, "reuse", "false"); /* Set a container hostname only if we have an IP to map it to. * The user can set -h or --uts=host themselves if they want a nicer * name for logs, but this makes applications happy who need their * hostname to match the IP they bind to. */ if (data->ip_range_start != NULL) { offset += snprintf(buffer+offset, max-offset, " --hostname=%s-%d", data->prefix, replica->offset); } offset += snprintf(buffer+offset, max-offset, " --environment=PCMK_stderr=1"); if (data->container_network) { #if 0 offset += snprintf(buffer+offset, max-offset, " --link-local-ip=%s", replica->ipaddr); #endif offset += snprintf(buffer+offset, max-offset, " --net=%s", data->container_network); } if(data->control_port) { offset += snprintf(buffer+offset, max-offset, " --environment=PCMK_remote_port=%s", data->control_port); } else { offset += snprintf(buffer+offset, max-offset, " --environment=PCMK_remote_port=%d", DEFAULT_REMOTE_PORT); } for(GListPtr pIter = data->mounts; pIter != NULL; pIter = pIter->next) { pe__bundle_mount_t *mount = pIter->data; if (is_set(mount->flags, pe__bundle_mount_subdir)) { char *source = crm_strdup_printf( "%s/%s-%d", mount->source, data->prefix, replica->offset); if(doffset > 0) { doffset += snprintf(dbuffer+doffset, dmax-doffset, ","); } doffset += snprintf(dbuffer+doffset, dmax-doffset, "%s", source); offset += snprintf(buffer+offset, max-offset, " --volume vol%d,kind=host,source=%s", volid, source); if(mount->options) { offset += snprintf(buffer+offset, max-offset, ",%s", mount->options); } offset += snprintf(buffer+offset, max-offset, " --mount volume=vol%d,target=%s", volid, mount->target); free(source); } else { offset += snprintf(buffer+offset, max-offset, " --volume vol%d,kind=host,source=%s", volid, mount->source); if(mount->options) { offset += snprintf(buffer+offset, max-offset, ",%s", mount->options); } offset += snprintf(buffer+offset, max-offset, " --mount volume=vol%d,target=%s", volid, mount->target); } volid++; } for(GListPtr pIter = data->ports; pIter != NULL; pIter = pIter->next) { pe__bundle_port_t *port = pIter->data; if (replica->ipaddr) { offset += snprintf(buffer+offset, max-offset, " --port=%s:%s:%s", port->target, replica->ipaddr, port->source); } else { offset += snprintf(buffer+offset, max-offset, " --port=%s:%s", port->target, port->source); } } if (data->launcher_options) { offset += snprintf(buffer+offset, max-offset, " %s", data->launcher_options); } if (data->container_host_options) { offset += snprintf(buffer + offset, max - offset, " %s", data->container_host_options); } crm_create_nvpair_xml(xml_obj, NULL, "run_opts", buffer); free(buffer); crm_create_nvpair_xml(xml_obj, NULL, "mount_points", dbuffer); free(dbuffer); if (replica->child) { if (data->container_command) { crm_create_nvpair_xml(xml_obj, NULL, "run_cmd", data->container_command); } else { crm_create_nvpair_xml(xml_obj, NULL, "run_cmd", SBIN_DIR "/pacemaker-remoted"); } /* TODO: Allow users to specify their own? * * We just want to know if the container is alive, we'll * monitor the child independently */ crm_create_nvpair_xml(xml_obj, NULL, "monitor_cmd", "/bin/true"); /* } else if(child && data->untrusted) { * Support this use-case? * * The ability to have resources started/stopped by us, but * unable to set attributes, etc. * * Arguably better to control API access this with ACLs like * "normal" remote nodes * * crm_create_nvpair_xml(xml_obj, NULL, * "run_cmd", * "/usr/libexec/pacemaker/pacemaker-execd"); * crm_create_nvpair_xml(xml_obj, NULL, "monitor_cmd", * "/usr/libexec/pacemaker/lrmd_internal_ctl -c poke"); */ } else { if (data->container_command) { crm_create_nvpair_xml(xml_obj, NULL, "run_cmd", data->container_command); } /* TODO: Allow users to specify their own? * * We don't know what's in the container, so we just want * to know if it is alive */ crm_create_nvpair_xml(xml_obj, NULL, "monitor_cmd", "/bin/true"); } xml_obj = create_xml_node(xml_container, "operations"); crm_create_op_xml(xml_obj, ID(xml_container), "monitor", "60s", NULL); // TODO: Other ops? Timeouts and intervals from underlying resource? if (!common_unpack(xml_container, &replica->container, parent, data_set)) { return FALSE; } parent->children = g_list_append(parent->children, replica->container); return TRUE; } /*! * \brief Ban a node from a resource's (and its children's) allowed nodes list * * \param[in,out] rsc Resource to modify * \param[in] uname Name of node to ban */ static void disallow_node(resource_t *rsc, const char *uname) { gpointer match = g_hash_table_lookup(rsc->allowed_nodes, uname); if (match) { ((pe_node_t *) match)->weight = -INFINITY; ((pe_node_t *) match)->rsc_discover_mode = pe_discover_never; } if (rsc->children) { GListPtr child; for (child = rsc->children; child != NULL; child = child->next) { disallow_node((resource_t *) (child->data), uname); } } } static bool create_remote_resource(pe_resource_t *parent, pe__bundle_variant_data_t *data, pe__bundle_replica_t *replica, pe_working_set_t *data_set) { if (replica->child && valid_network(data)) { GHashTableIter gIter; GListPtr rsc_iter = NULL; node_t *node = NULL; xmlNode *xml_remote = NULL; char *id = crm_strdup_printf("%s-%d", data->prefix, replica->offset); char *port_s = NULL; const char *uname = NULL; const char *connect_name = NULL; if (remote_id_conflict(id, data_set)) { free(id); // The biggest hammer we have id = crm_strdup_printf("pcmk-internal-%s-remote-%d", replica->child->id, replica->offset); CRM_ASSERT(remote_id_conflict(id, data_set) == FALSE); } /* REMOTE_CONTAINER_HACK: Using "#uname" as the server name when the * connection does not have its own IP is a magic string that we use to * support nested remotes (i.e. a bundle running on a remote node). */ connect_name = (replica->ipaddr? replica->ipaddr : "#uname"); if (data->control_port == NULL) { port_s = crm_itoa(DEFAULT_REMOTE_PORT); } /* This sets replica->container as replica->remote's container, which is - * similar to what happens with guest nodes. This is how the PE knows - * that the bundle node is fenced by recovering the container, and that - * remote should be ordered relative to the container. + * similar to what happens with guest nodes. This is how the scheduler + * knows that the bundle node is fenced by recovering the container, and + * that remote should be ordered relative to the container. */ xml_remote = pe_create_remote_xml(NULL, id, replica->container->id, NULL, NULL, NULL, connect_name, (data->control_port? data->control_port : port_s)); free(port_s); /* Abandon our created ID, and pull the copy from the XML, because we * need something that will get freed during data set cleanup to use as * the node ID and uname. */ free(id); id = NULL; uname = ID(xml_remote); /* Ensure a node has been created for the guest (it may have already * been, if it has a permanent node attribute), and ensure its weight is * -INFINITY so no other resources can run on it. */ node = pe_find_node(data_set->nodes, uname); if (node == NULL) { node = pe_create_node(uname, uname, "remote", "-INFINITY", data_set); } else { node->weight = -INFINITY; } node->rsc_discover_mode = pe_discover_never; /* unpack_remote_nodes() ensures that each remote node and guest node * has a pe_node_t entry. Ideally, it would do the same for bundle nodes. * Unfortunately, a bundle has to be mostly unpacked before it's obvious * what nodes will be needed, so we do it just above. * * Worse, that means that the node may have been utilized while * unpacking other resources, without our weight correction. The most * likely place for this to happen is when common_unpack() calls * resource_location() to set a default score in symmetric clusters. * This adds a node *copy* to each resource's allowed nodes, and these * copies will have the wrong weight. * * As a hacky workaround, fix those copies here. * * @TODO Possible alternative: ensure bundles are unpacked before other * resources, so the weight is correct before any copies are made. */ for (rsc_iter = data_set->resources; rsc_iter; rsc_iter = rsc_iter->next) { disallow_node((resource_t *) (rsc_iter->data), uname); } replica->node = node_copy(node); replica->node->weight = 500; replica->node->rsc_discover_mode = pe_discover_exclusive; /* Ensure the node shows up as allowed and with the correct discovery set */ if (replica->child->allowed_nodes != NULL) { g_hash_table_destroy(replica->child->allowed_nodes); } replica->child->allowed_nodes = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free); g_hash_table_insert(replica->child->allowed_nodes, (gpointer) replica->node->details->id, node_copy(replica->node)); { node_t *copy = node_copy(replica->node); copy->weight = -INFINITY; g_hash_table_insert(replica->child->parent->allowed_nodes, (gpointer) replica->node->details->id, copy); } if (!common_unpack(xml_remote, &replica->remote, parent, data_set)) { return FALSE; } g_hash_table_iter_init(&gIter, replica->remote->allowed_nodes); while (g_hash_table_iter_next(&gIter, NULL, (void **)&node)) { if (pe__is_guest_or_remote_node(node)) { /* Remote resources can only run on 'normal' cluster node */ node->weight = -INFINITY; } } replica->node->details->remote_rsc = replica->remote; // Ensure pe__is_guest_node() functions correctly immediately replica->remote->container = replica->container; /* A bundle's #kind is closer to "container" (guest node) than the * "remote" set by pe_create_node(). */ g_hash_table_insert(replica->node->details->attrs, strdup(CRM_ATTR_KIND), strdup("container")); /* One effect of this is that setup_container() will add * replica->remote to replica->container's fillers, which will make * pe__resource_contains_guest_node() true for replica->container. * * replica->child does NOT get added to replica->container's fillers. * The only noticeable effect if it did would be for its fail count to * be taken into account when checking replica->container's migration * threshold. */ parent->children = g_list_append(parent->children, replica->remote); } return TRUE; } static bool create_container(pe_resource_t *parent, pe__bundle_variant_data_t *data, pe__bundle_replica_t *replica, pe_working_set_t *data_set) { switch (data->agent_type) { case PE__CONTAINER_AGENT_DOCKER: if (!create_docker_resource(parent, data, replica, data_set)) { return FALSE; } break; case PE__CONTAINER_AGENT_PODMAN: if (!create_podman_resource(parent, data, replica, data_set)) { return FALSE; } break; case PE__CONTAINER_AGENT_RKT: if (!create_rkt_resource(parent, data, replica, data_set)) { return FALSE; } break; default: // PE__CONTAINER_AGENT_UNKNOWN return FALSE; } if (create_ip_resource(parent, data, replica, data_set) == FALSE) { return FALSE; } if(create_remote_resource(parent, data, replica, data_set) == FALSE) { return FALSE; } if (replica->child && replica->ipaddr) { add_hash_param(replica->child->meta, "external-ip", replica->ipaddr); } if (replica->remote) { /* * Allow the remote connection resource to be allocated to a * different node than the one on which the container is active. * * This makes it possible to have Pacemaker Remote nodes running * containers with pacemaker-remoted inside in order to start * services inside those containers. */ set_bit(replica->remote->flags, pe_rsc_allow_remote_remotes); } return TRUE; } static void mount_add(pe__bundle_variant_data_t *bundle_data, const char *source, const char *target, const char *options, uint32_t flags) { pe__bundle_mount_t *mount = calloc(1, sizeof(pe__bundle_mount_t)); mount->source = strdup(source); mount->target = strdup(target); if (options) { mount->options = strdup(options); } mount->flags = flags; bundle_data->mounts = g_list_append(bundle_data->mounts, mount); } static void mount_free(pe__bundle_mount_t *mount) { free(mount->source); free(mount->target); free(mount->options); free(mount); } static void port_free(pe__bundle_port_t *port) { free(port->source); free(port->target); free(port); } static pe__bundle_replica_t * replica_for_remote(pe_resource_t *remote) { resource_t *top = remote; pe__bundle_variant_data_t *bundle_data = NULL; if (top == NULL) { return NULL; } while (top->parent != NULL) { top = top->parent; } get_bundle_variant_data(bundle_data, top); for (GList *gIter = bundle_data->replicas; gIter != NULL; gIter = gIter->next) { pe__bundle_replica_t *replica = gIter->data; if (replica->remote == remote) { return replica; } } CRM_LOG_ASSERT(FALSE); return NULL; } bool pe__bundle_needs_remote_name(pe_resource_t *rsc) { const char *value; if (rsc == NULL) { return FALSE; } value = g_hash_table_lookup(rsc->parameters, XML_RSC_ATTR_REMOTE_RA_ADDR); if (safe_str_eq(value, "#uname") == FALSE) { return FALSE; } else { const char *match[3][2] = { { XML_ATTR_TYPE, "remote" }, { XML_AGENT_ATTR_CLASS, PCMK_RESOURCE_CLASS_OCF }, { XML_AGENT_ATTR_PROVIDER, "pacemaker" }, }; for (int m = 0; m < 3; m++) { value = crm_element_value(rsc->xml, match[m][0]); if (safe_str_neq(value, match[m][1])) { return FALSE; } } } return TRUE; } const char * pe__add_bundle_remote_name(pe_resource_t *rsc, xmlNode *xml, const char *field) { // REMOTE_CONTAINER_HACK: Allow remote nodes that start containers with pacemaker remote inside pe_node_t *node = NULL; pe__bundle_replica_t *replica = NULL; if (!pe__bundle_needs_remote_name(rsc)) { return NULL; } replica = replica_for_remote(rsc); if (replica == NULL) { return NULL; } node = replica->container->allocated_to; if (node == NULL) { /* If it won't be running anywhere after the * transition, go with where it's running now. */ node = pe__current_node(replica->container); } if(node == NULL) { crm_trace("Cannot determine address for bundle connection %s", rsc->id); return NULL; } crm_trace("Setting address for bundle connection %s to bundle host %s", rsc->id, node->details->uname); if(xml != NULL && field != NULL) { crm_xml_add(xml, field, node->details->uname); } return node->details->uname; } gboolean pe__unpack_bundle(pe_resource_t *rsc, pe_working_set_t *data_set) { const char *value = NULL; xmlNode *xml_obj = NULL; xmlNode *xml_resource = NULL; pe__bundle_variant_data_t *bundle_data = NULL; bool need_log_mount = TRUE; CRM_ASSERT(rsc != NULL); pe_rsc_trace(rsc, "Processing resource %s...", rsc->id); bundle_data = calloc(1, sizeof(pe__bundle_variant_data_t)); rsc->variant_opaque = bundle_data; bundle_data->prefix = strdup(rsc->id); xml_obj = first_named_child(rsc->xml, PE__CONTAINER_AGENT_DOCKER_S); if (xml_obj != NULL) { bundle_data->agent_type = PE__CONTAINER_AGENT_DOCKER; } else { xml_obj = first_named_child(rsc->xml, PE__CONTAINER_AGENT_RKT_S); if (xml_obj != NULL) { bundle_data->agent_type = PE__CONTAINER_AGENT_RKT; } else { xml_obj = first_named_child(rsc->xml, PE__CONTAINER_AGENT_PODMAN_S); if (xml_obj != NULL) { bundle_data->agent_type = PE__CONTAINER_AGENT_PODMAN; } else { return FALSE; } } } value = crm_element_value(xml_obj, XML_RSC_ATTR_PROMOTED_MAX); if (value == NULL) { // @COMPAT deprecated since 2.0.0 value = crm_element_value(xml_obj, "masters"); } bundle_data->promoted_max = crm_parse_int(value, "0"); if (bundle_data->promoted_max < 0) { pe_err("%s for %s must be nonnegative integer, using 0", XML_RSC_ATTR_PROMOTED_MAX, rsc->id); bundle_data->promoted_max = 0; } value = crm_element_value(xml_obj, "replicas"); if ((value == NULL) && bundle_data->promoted_max) { bundle_data->nreplicas = bundle_data->promoted_max; } else { bundle_data->nreplicas = crm_parse_int(value, "1"); } if (bundle_data->nreplicas < 1) { pe_err("'replicas' for %s must be positive integer, using 1", rsc->id); bundle_data->nreplicas = 1; } /* * Communication between containers on the same host via the * floating IPs only works if the container is started with: * --userland-proxy=false --ip-masq=false */ value = crm_element_value(xml_obj, "replicas-per-host"); bundle_data->nreplicas_per_host = crm_parse_int(value, "1"); if (bundle_data->nreplicas_per_host < 1) { pe_err("'replicas-per-host' for %s must be positive integer, using 1", rsc->id); bundle_data->nreplicas_per_host = 1; } if (bundle_data->nreplicas_per_host == 1) { clear_bit(rsc->flags, pe_rsc_unique); } bundle_data->container_command = crm_element_value_copy(xml_obj, "run-command"); bundle_data->launcher_options = crm_element_value_copy(xml_obj, "options"); bundle_data->image = crm_element_value_copy(xml_obj, "image"); bundle_data->container_network = crm_element_value_copy(xml_obj, "network"); xml_obj = first_named_child(rsc->xml, "network"); if(xml_obj) { bundle_data->ip_range_start = crm_element_value_copy(xml_obj, "ip-range-start"); bundle_data->host_netmask = crm_element_value_copy(xml_obj, "host-netmask"); bundle_data->host_network = crm_element_value_copy(xml_obj, "host-interface"); bundle_data->control_port = crm_element_value_copy(xml_obj, "control-port"); value = crm_element_value(xml_obj, "add-host"); if (check_boolean(value) == FALSE) { bundle_data->add_host = TRUE; } else { crm_str_to_boolean(value, &bundle_data->add_host); } for (xmlNode *xml_child = __xml_first_child_element(xml_obj); xml_child != NULL; xml_child = __xml_next_element(xml_child)) { pe__bundle_port_t *port = calloc(1, sizeof(pe__bundle_port_t)); port->source = crm_element_value_copy(xml_child, "port"); if(port->source == NULL) { port->source = crm_element_value_copy(xml_child, "range"); } else { port->target = crm_element_value_copy(xml_child, "internal-port"); } if(port->source != NULL && strlen(port->source) > 0) { if(port->target == NULL) { port->target = strdup(port->source); } bundle_data->ports = g_list_append(bundle_data->ports, port); } else { pe_err("Invalid port directive %s", ID(xml_child)); port_free(port); } } } xml_obj = first_named_child(rsc->xml, "storage"); for (xmlNode *xml_child = __xml_first_child_element(xml_obj); xml_child != NULL; xml_child = __xml_next_element(xml_child)) { const char *source = crm_element_value(xml_child, "source-dir"); const char *target = crm_element_value(xml_child, "target-dir"); const char *options = crm_element_value(xml_child, "options"); int flags = pe__bundle_mount_none; if (source == NULL) { source = crm_element_value(xml_child, "source-dir-root"); set_bit(flags, pe__bundle_mount_subdir); } if (source && target) { mount_add(bundle_data, source, target, options, flags); if (strcmp(target, "/var/log") == 0) { need_log_mount = FALSE; } } else { pe_err("Invalid mount directive %s", ID(xml_child)); } } xml_obj = first_named_child(rsc->xml, "primitive"); if (xml_obj && valid_network(bundle_data)) { char *value = NULL; xmlNode *xml_set = NULL; xml_resource = create_xml_node(NULL, XML_CIB_TAG_INCARNATION); /* @COMPAT We no longer use the tag, but we need to keep it as * part of the resource name, so that bundles don't restart in a rolling * upgrade. (It also avoids needing to change regression tests.) */ crm_xml_set_id(xml_resource, "%s-%s", bundle_data->prefix, (bundle_data->promoted_max? "master" : (const char *)xml_resource->name)); xml_set = create_xml_node(xml_resource, XML_TAG_META_SETS); crm_xml_set_id(xml_set, "%s-%s-meta", bundle_data->prefix, xml_resource->name); crm_create_nvpair_xml(xml_set, NULL, XML_RSC_ATTR_ORDERED, XML_BOOLEAN_TRUE); value = crm_itoa(bundle_data->nreplicas); crm_create_nvpair_xml(xml_set, NULL, XML_RSC_ATTR_INCARNATION_MAX, value); free(value); value = crm_itoa(bundle_data->nreplicas_per_host); crm_create_nvpair_xml(xml_set, NULL, XML_RSC_ATTR_INCARNATION_NODEMAX, value); free(value); crm_create_nvpair_xml(xml_set, NULL, XML_RSC_ATTR_UNIQUE, (bundle_data->nreplicas_per_host > 1)? XML_BOOLEAN_TRUE : XML_BOOLEAN_FALSE); if (bundle_data->promoted_max) { crm_create_nvpair_xml(xml_set, NULL, XML_RSC_ATTR_PROMOTABLE, XML_BOOLEAN_TRUE); value = crm_itoa(bundle_data->promoted_max); crm_create_nvpair_xml(xml_set, NULL, XML_RSC_ATTR_PROMOTED_MAX, value); free(value); } //crm_xml_add(xml_obj, XML_ATTR_ID, bundle_data->prefix); add_node_copy(xml_resource, xml_obj); } else if(xml_obj) { pe_err("Cannot control %s inside %s without either ip-range-start or control-port", rsc->id, ID(xml_obj)); return FALSE; } if(xml_resource) { int lpc = 0; GListPtr childIter = NULL; resource_t *new_rsc = NULL; pe__bundle_port_t *port = NULL; int offset = 0, max = 1024; char *buffer = NULL; if (common_unpack(xml_resource, &new_rsc, rsc, data_set) == FALSE) { pe_err("Failed unpacking resource %s", ID(rsc->xml)); if (new_rsc != NULL && new_rsc->fns != NULL) { new_rsc->fns->free(new_rsc); } return FALSE; } bundle_data->child = new_rsc; /* Currently, we always map the default authentication key location * into the same location inside the container. * * Ideally, we would respect the host's PCMK_authkey_location, but: * - it may be different on different nodes; * - the actual connection will do extra checking to make sure the key * file exists and is readable, that we can't do here on the DC * - tools such as crm_resource and crm_simulate may not have the same * environment variables as the cluster, causing operation digests to * differ * * Always using the default location inside the container is fine, * because we control the pacemaker_remote environment, and it avoids * having to pass another environment variable to the container. * * @TODO A better solution may be to have only pacemaker_remote use the * environment variable, and have the cluster nodes use a new * cluster option for key location. This would introduce the limitation * of the location being the same on all cluster nodes, but that's * reasonable. */ mount_add(bundle_data, DEFAULT_REMOTE_KEY_LOCATION, DEFAULT_REMOTE_KEY_LOCATION, NULL, pe__bundle_mount_none); if (need_log_mount) { mount_add(bundle_data, CRM_BUNDLE_DIR, "/var/log", NULL, pe__bundle_mount_subdir); } port = calloc(1, sizeof(pe__bundle_port_t)); if(bundle_data->control_port) { port->source = strdup(bundle_data->control_port); } else { /* If we wanted to respect PCMK_remote_port, we could use * crm_default_remote_port() here and elsewhere in this file instead * of DEFAULT_REMOTE_PORT. * * However, it gains nothing, since we control both the container * environment and the connection resource parameters, and the user * can use a different port if desired by setting control-port. */ port->source = crm_itoa(DEFAULT_REMOTE_PORT); } port->target = strdup(port->source); bundle_data->ports = g_list_append(bundle_data->ports, port); buffer = calloc(1, max+1); for (childIter = bundle_data->child->children; childIter != NULL; childIter = childIter->next) { pe__bundle_replica_t *replica = calloc(1, sizeof(pe__bundle_replica_t)); replica->child = childIter->data; replica->child->exclusive_discover = TRUE; replica->offset = lpc++; // Ensure the child's notify gets set based on the underlying primitive's value if (is_set(replica->child->flags, pe_rsc_notify)) { set_bit(bundle_data->child->flags, pe_rsc_notify); } offset += allocate_ip(bundle_data, replica, buffer+offset, max-offset); bundle_data->replicas = g_list_append(bundle_data->replicas, replica); bundle_data->attribute_target = g_hash_table_lookup(replica->child->meta, XML_RSC_ATTR_TARGET); } bundle_data->container_host_options = buffer; if (bundle_data->attribute_target) { g_hash_table_replace(rsc->meta, strdup(XML_RSC_ATTR_TARGET), strdup(bundle_data->attribute_target)); g_hash_table_replace(bundle_data->child->meta, strdup(XML_RSC_ATTR_TARGET), strdup(bundle_data->attribute_target)); } } else { // Just a naked container, no pacemaker-remote int offset = 0, max = 1024; char *buffer = calloc(1, max+1); for (int lpc = 0; lpc < bundle_data->nreplicas; lpc++) { pe__bundle_replica_t *replica = calloc(1, sizeof(pe__bundle_replica_t)); replica->offset = lpc; offset += allocate_ip(bundle_data, replica, buffer+offset, max-offset); bundle_data->replicas = g_list_append(bundle_data->replicas, replica); } bundle_data->container_host_options = buffer; } for (GList *gIter = bundle_data->replicas; gIter != NULL; gIter = gIter->next) { pe__bundle_replica_t *replica = gIter->data; if (!create_container(rsc, bundle_data, replica, data_set)) { pe_err("Failed unpacking resource %s", rsc->id); rsc->fns->free(rsc); return FALSE; } } if (bundle_data->child) { rsc->children = g_list_append(rsc->children, bundle_data->child); } return TRUE; } static int replica_resource_active(pe_resource_t *rsc, gboolean all) { if (rsc) { gboolean child_active = rsc->fns->active(rsc, all); if (child_active && !all) { return TRUE; } else if (!child_active && all) { return FALSE; } } return -1; } gboolean pe__bundle_active(pe_resource_t *rsc, gboolean all) { pe__bundle_variant_data_t *bundle_data = NULL; GListPtr iter = NULL; get_bundle_variant_data(bundle_data, rsc); for (iter = bundle_data->replicas; iter != NULL; iter = iter->next) { pe__bundle_replica_t *replica = iter->data; int rsc_active; rsc_active = replica_resource_active(replica->ip, all); if (rsc_active >= 0) { return (gboolean) rsc_active; } rsc_active = replica_resource_active(replica->child, all); if (rsc_active >= 0) { return (gboolean) rsc_active; } rsc_active = replica_resource_active(replica->container, all); if (rsc_active >= 0) { return (gboolean) rsc_active; } rsc_active = replica_resource_active(replica->remote, all); if (rsc_active >= 0) { return (gboolean) rsc_active; } } /* If "all" is TRUE, we've already checked that no resources were inactive, * so return TRUE; if "all" is FALSE, we didn't find any active resources, * so return FALSE. */ return all; } /*! * \internal * \brief Find the bundle replica corresponding to a given node * * \param[in] bundle Top-level bundle resource * \param[in] node Node to search for * * \return Bundle replica if found, NULL otherwise */ pe_resource_t * pe__find_bundle_replica(const pe_resource_t *bundle, const pe_node_t *node) { pe__bundle_variant_data_t *bundle_data = NULL; CRM_ASSERT(bundle && node); get_bundle_variant_data(bundle_data, bundle); for (GList *gIter = bundle_data->replicas; gIter != NULL; gIter = gIter->next) { pe__bundle_replica_t *replica = gIter->data; CRM_ASSERT(replica && replica->node); if (replica->node->details == node->details) { return replica->child; } } return NULL; } static void print_rsc_in_list(resource_t *rsc, const char *pre_text, long options, void *print_data) { if (rsc != NULL) { if (options & pe_print_html) { status_print("
  • "); } rsc->fns->print(rsc, pre_text, options, print_data); if (options & pe_print_html) { status_print("
  • \n"); } } } static const char* container_agent_str(enum pe__container_agent t) { switch (t) { case PE__CONTAINER_AGENT_DOCKER: return PE__CONTAINER_AGENT_DOCKER_S; case PE__CONTAINER_AGENT_RKT: return PE__CONTAINER_AGENT_RKT_S; case PE__CONTAINER_AGENT_PODMAN: return PE__CONTAINER_AGENT_PODMAN_S; default: // PE__CONTAINER_AGENT_UNKNOWN break; } return PE__CONTAINER_AGENT_UNKNOWN_S; } static void bundle_print_xml(pe_resource_t *rsc, const char *pre_text, long options, void *print_data) { pe__bundle_variant_data_t *bundle_data = NULL; char *child_text = NULL; CRM_CHECK(rsc != NULL, return); if (pre_text == NULL) { pre_text = ""; } child_text = crm_concat(pre_text, " ", ' '); get_bundle_variant_data(bundle_data, rsc); status_print("%sid); status_print("type=\"%s\" ", container_agent_str(bundle_data->agent_type)); status_print("image=\"%s\" ", bundle_data->image); status_print("unique=\"%s\" ", is_set(rsc->flags, pe_rsc_unique)? "true" : "false"); status_print("managed=\"%s\" ", is_set(rsc->flags, pe_rsc_managed) ? "true" : "false"); status_print("failed=\"%s\" ", is_set(rsc->flags, pe_rsc_failed) ? "true" : "false"); status_print(">\n"); for (GList *gIter = bundle_data->replicas; gIter != NULL; gIter = gIter->next) { pe__bundle_replica_t *replica = gIter->data; CRM_ASSERT(replica); status_print("%s \n", pre_text, replica->offset); print_rsc_in_list(replica->ip, child_text, options, print_data); print_rsc_in_list(replica->child, child_text, options, print_data); print_rsc_in_list(replica->container, child_text, options, print_data); print_rsc_in_list(replica->remote, child_text, options, print_data); status_print("%s \n", pre_text); } status_print("%s\n", pre_text); free(child_text); } int pe__bundle_xml(pcmk__output_t *out, va_list args) { unsigned int options = va_arg(args, unsigned int); pe_resource_t *rsc = va_arg(args, pe_resource_t *); pe__bundle_variant_data_t *bundle_data = NULL; int rc = 0; CRM_ASSERT(rsc != NULL); get_bundle_variant_data(bundle_data, rsc); rc = pe__name_and_nvpairs_xml(out, true, "bundle", 6 , "id", rsc->id , "type", container_agent_str(bundle_data->agent_type) , "image", bundle_data->image , "unique", BOOL2STR(is_set(rsc->flags, pe_rsc_unique)) , "managed", BOOL2STR(is_set(rsc->flags, pe_rsc_managed)) , "failed", BOOL2STR(is_set(rsc->flags, pe_rsc_failed))); CRM_ASSERT(rc == 0); for (GList *gIter = bundle_data->replicas; gIter != NULL; gIter = gIter->next) { pe__bundle_replica_t *replica = gIter->data; char *id = crm_itoa(replica->offset); CRM_ASSERT(replica); rc = pe__name_and_nvpairs_xml(out, true, "replica", 1, "id", id); free(id); CRM_ASSERT(rc == 0); if (replica->ip != NULL) { out->message(out, crm_map_element_name(replica->ip->xml), options, replica->ip); } if (replica->child != NULL) { out->message(out, crm_map_element_name(replica->child->xml), options, replica->child); } out->message(out, crm_map_element_name(replica->container->xml), options, replica->container); if (replica->remote != NULL) { out->message(out, crm_map_element_name(replica->remote->xml), options, replica->remote); } pcmk__output_xml_pop_parent(out); // replica } pcmk__output_xml_pop_parent(out); // bundle return rc; } static void pe__bundle_replica_output_html(pcmk__output_t *out, pe__bundle_replica_t *replica, long options) { node_t *node = NULL; pe_resource_t *rsc = replica->child; int offset = 0; char buffer[LINE_MAX]; if(rsc == NULL) { rsc = replica->container; } if (replica->remote) { offset += snprintf(buffer + offset, LINE_MAX - offset, "%s", rsc_printable_id(replica->remote)); } else { offset += snprintf(buffer + offset, LINE_MAX - offset, "%s", rsc_printable_id(replica->container)); } if (replica->ipaddr) { offset += snprintf(buffer + offset, LINE_MAX - offset, " (%s)", replica->ipaddr); } node = pe__current_node(replica->container); pe__common_output_html(out, rsc, buffer, node, options); } int pe__bundle_html(pcmk__output_t *out, va_list args) { unsigned int options = va_arg(args, unsigned int); pe_resource_t *rsc = va_arg(args, pe_resource_t *); pe__bundle_variant_data_t *bundle_data = NULL; char buffer[LINE_MAX]; CRM_ASSERT(rsc != NULL); get_bundle_variant_data(bundle_data, rsc); pcmk__output_create_xml_node(out, "br"); out->begin_list(out, NULL, NULL, "Container bundle%s: %s [%s]%s%s", (bundle_data->nreplicas > 1)? " set" : "", rsc->id, bundle_data->image, is_set(rsc->flags, pe_rsc_unique) ? " (unique)" : "", is_set(rsc->flags, pe_rsc_managed) ? "" : " (unmanaged)"); for (GList *gIter = bundle_data->replicas; gIter != NULL; gIter = gIter->next) { pe__bundle_replica_t *replica = gIter->data; CRM_ASSERT(replica); pcmk__output_xml_create_parent(out, "li"); if (is_set(options, pe_print_implicit)) { if(g_list_length(bundle_data->replicas) > 1) { snprintf(buffer, LINE_MAX, " Replica[%d]", replica->offset); xmlNodeSetContent(pcmk__output_xml_peek_parent(out), (pcmkXmlStr) buffer); } pcmk__output_create_xml_node(out, "br"); out->begin_list(out, NULL, NULL, NULL); if (replica->ip != NULL) { out->message(out, crm_map_element_name(replica->ip->xml), options, replica->ip); } if (replica->child != NULL) { out->message(out, crm_map_element_name(replica->child->xml), options, replica->child); } out->message(out, crm_map_element_name(replica->container->xml), options, replica->container); if (replica->remote != NULL) { out->message(out, crm_map_element_name(replica->remote->xml), options, replica->remote); } out->end_list(out); } else { pe__bundle_replica_output_html(out, replica, options); } pcmk__output_xml_pop_parent(out); } out->end_list(out); return 0; } static void pe__bundle_replica_output_text(pcmk__output_t *out, pe__bundle_replica_t *replica, long options) { node_t *node = NULL; pe_resource_t *rsc = replica->child; int offset = 0; char buffer[LINE_MAX]; if(rsc == NULL) { rsc = replica->container; } if (replica->remote) { offset += snprintf(buffer + offset, LINE_MAX - offset, "%s", rsc_printable_id(replica->remote)); } else { offset += snprintf(buffer + offset, LINE_MAX - offset, "%s", rsc_printable_id(replica->container)); } if (replica->ipaddr) { offset += snprintf(buffer + offset, LINE_MAX - offset, " (%s)", replica->ipaddr); } node = pe__current_node(replica->container); pe__common_output_text(out, rsc, buffer, node, options); } int pe__bundle_text(pcmk__output_t *out, va_list args) { unsigned int options = va_arg(args, unsigned int); pe_resource_t *rsc = va_arg(args, pe_resource_t *); pe__bundle_variant_data_t *bundle_data = NULL; CRM_ASSERT(rsc != NULL); get_bundle_variant_data(bundle_data, rsc); out->begin_list(out, NULL, NULL, "Container bundle%s: %s [%s]%s%s", (bundle_data->nreplicas > 1)? " set" : "", rsc->id, bundle_data->image, is_set(rsc->flags, pe_rsc_unique) ? " (unique)" : "", is_set(rsc->flags, pe_rsc_managed) ? "" : " (unmanaged)"); for (GList *gIter = bundle_data->replicas; gIter != NULL; gIter = gIter->next) { pe__bundle_replica_t *replica = gIter->data; CRM_ASSERT(replica); if (is_set(options, pe_print_implicit)) { if(g_list_length(bundle_data->replicas) > 1) { out->list_item(out, NULL, "Replica[%d]", replica->offset); } out->begin_list(out, NULL, NULL, NULL); if (replica->ip != NULL) { out->message(out, crm_map_element_name(replica->ip->xml), options, replica->ip); } if (replica->child != NULL) { out->message(out, crm_map_element_name(replica->child->xml), options, replica->child); } out->message(out, crm_map_element_name(replica->container->xml), options, replica->container); if (replica->remote != NULL) { out->message(out, crm_map_element_name(replica->remote->xml), options, replica->remote); } out->end_list(out); } else { pe__bundle_replica_output_text(out, replica, options); } } out->end_list(out); return 0; } static void print_bundle_replica(pe__bundle_replica_t *replica, const char *pre_text, long options, void *print_data) { node_t *node = NULL; pe_resource_t *rsc = replica->child; int offset = 0; char buffer[LINE_MAX]; if(rsc == NULL) { rsc = replica->container; } if (replica->remote) { offset += snprintf(buffer + offset, LINE_MAX - offset, "%s", rsc_printable_id(replica->remote)); } else { offset += snprintf(buffer + offset, LINE_MAX - offset, "%s", rsc_printable_id(replica->container)); } if (replica->ipaddr) { offset += snprintf(buffer + offset, LINE_MAX - offset, " (%s)", replica->ipaddr); } node = pe__current_node(replica->container); common_print(rsc, pre_text, buffer, node, options, print_data); } void pe__print_bundle(pe_resource_t *rsc, const char *pre_text, long options, void *print_data) { pe__bundle_variant_data_t *bundle_data = NULL; char *child_text = NULL; CRM_CHECK(rsc != NULL, return); if (options & pe_print_xml) { bundle_print_xml(rsc, pre_text, options, print_data); return; } get_bundle_variant_data(bundle_data, rsc); if (pre_text == NULL) { pre_text = " "; } status_print("%sContainer bundle%s: %s [%s]%s%s\n", pre_text, ((bundle_data->nreplicas > 1)? " set" : ""), rsc->id, bundle_data->image, is_set(rsc->flags, pe_rsc_unique) ? " (unique)" : "", is_set(rsc->flags, pe_rsc_managed) ? "" : " (unmanaged)"); if (options & pe_print_html) { status_print("
    \n
      \n"); } for (GList *gIter = bundle_data->replicas; gIter != NULL; gIter = gIter->next) { pe__bundle_replica_t *replica = gIter->data; CRM_ASSERT(replica); if (options & pe_print_html) { status_print("
    • "); } if (is_set(options, pe_print_implicit)) { child_text = crm_strdup_printf(" %s", pre_text); if(g_list_length(bundle_data->replicas) > 1) { status_print(" %sReplica[%d]\n", pre_text, replica->offset); } if (options & pe_print_html) { status_print("
      \n
        \n"); } print_rsc_in_list(replica->ip, child_text, options, print_data); print_rsc_in_list(replica->container, child_text, options, print_data); print_rsc_in_list(replica->remote, child_text, options, print_data); print_rsc_in_list(replica->child, child_text, options, print_data); if (options & pe_print_html) { status_print("
      \n"); } } else { child_text = crm_strdup_printf("%s ", pre_text); print_bundle_replica(replica, child_text, options, print_data); } free(child_text); if (options & pe_print_html) { status_print("
    • \n"); } } if (options & pe_print_html) { status_print("
    \n"); } } static void free_bundle_replica(pe__bundle_replica_t *replica) { if (replica == NULL) { return; } if (replica->node) { free(replica->node); replica->node = NULL; } if (replica->ip) { free_xml(replica->ip->xml); replica->ip->xml = NULL; replica->ip->fns->free(replica->ip); replica->ip = NULL; } if (replica->container) { free_xml(replica->container->xml); replica->container->xml = NULL; replica->container->fns->free(replica->container); replica->container = NULL; } if (replica->remote) { free_xml(replica->remote->xml); replica->remote->xml = NULL; replica->remote->fns->free(replica->remote); replica->remote = NULL; } free(replica->ipaddr); free(replica); } void pe__free_bundle(pe_resource_t *rsc) { pe__bundle_variant_data_t *bundle_data = NULL; CRM_CHECK(rsc != NULL, return); get_bundle_variant_data(bundle_data, rsc); pe_rsc_trace(rsc, "Freeing %s", rsc->id); free(bundle_data->prefix); free(bundle_data->image); free(bundle_data->control_port); free(bundle_data->host_network); free(bundle_data->host_netmask); free(bundle_data->ip_range_start); free(bundle_data->container_network); free(bundle_data->launcher_options); free(bundle_data->container_command); free(bundle_data->container_host_options); g_list_free_full(bundle_data->replicas, (GDestroyNotify) free_bundle_replica); g_list_free_full(bundle_data->mounts, (GDestroyNotify)mount_free); g_list_free_full(bundle_data->ports, (GDestroyNotify)port_free); g_list_free(rsc->children); if(bundle_data->child) { free_xml(bundle_data->child->xml); bundle_data->child->xml = NULL; bundle_data->child->fns->free(bundle_data->child); } common_free(rsc); } enum rsc_role_e pe__bundle_resource_state(const pe_resource_t *rsc, gboolean current) { enum rsc_role_e container_role = RSC_ROLE_UNKNOWN; return container_role; } /*! * \brief Get the number of configured replicas in a bundle * * \param[in] rsc Bundle resource * * \return Number of configured replicas, or 0 on error */ int pe_bundle_replicas(const resource_t *rsc) { if ((rsc == NULL) || (rsc->variant != pe_container)) { return 0; } else { pe__bundle_variant_data_t *bundle_data = NULL; get_bundle_variant_data(bundle_data, rsc); return bundle_data->nreplicas; } } void pe__count_bundle(pe_resource_t *rsc) { pe__bundle_variant_data_t *bundle_data = NULL; get_bundle_variant_data(bundle_data, rsc); for (GList *item = bundle_data->replicas; item != NULL; item = item->next) { pe__bundle_replica_t *replica = item->data; if (replica->ip) { replica->ip->fns->count(replica->ip); } if (replica->child) { replica->child->fns->count(replica->child); } if (replica->container) { replica->container->fns->count(replica->container); } if (replica->remote) { replica->remote->fns->count(replica->remote); } } } diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c index d33775838c..e2039aa74a 100644 --- a/lib/pengine/unpack.c +++ b/lib/pengine/unpack.c @@ -1,3842 +1,3842 @@ /* * Copyright 2004-2019 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include CRM_TRACE_INIT_DATA(pe_status); #define set_config_flag(data_set, option, flag) do { \ const char *tmp = pe_pref(data_set->config_hash, option); \ if(tmp) { \ if(crm_is_true(tmp)) { \ set_bit(data_set->flags, flag); \ } else { \ clear_bit(data_set->flags, flag); \ } \ } \ } while(0) static void unpack_rsc_op(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, xmlNode **last_failure, enum action_fail_response *failed, pe_working_set_t *data_set); static gboolean determine_remote_online_status(pe_working_set_t * data_set, node_t * this_node); static void add_node_attrs(xmlNode *attrs, pe_node_t *node, bool overwrite, pe_working_set_t *data_set); // Bitmask for warnings we only want to print once uint32_t pe_wo = 0; static gboolean is_dangling_guest_node(node_t *node) { /* we are looking for a remote-node that was supposed to be mapped to a * container resource, but all traces of that container have disappeared * from both the config and the status section. */ if (pe__is_guest_or_remote_node(node) && node->details->remote_rsc && node->details->remote_rsc->container == NULL && is_set(node->details->remote_rsc->flags, pe_rsc_orphan_container_filler)) { return TRUE; } return FALSE; } /*! * \brief Schedule a fence action for a node * * \param[in,out] data_set Current working set of cluster * \param[in,out] node Node to fence * \param[in] reason Text description of why fencing is needed */ void pe_fence_node(pe_working_set_t * data_set, node_t * node, const char *reason) { CRM_CHECK(node, return); /* A guest node is fenced by marking its container as failed */ if (pe__is_guest_node(node)) { resource_t *rsc = node->details->remote_rsc->container; if (is_set(rsc->flags, pe_rsc_failed) == FALSE) { if (!is_set(rsc->flags, pe_rsc_managed)) { crm_notice("Not fencing guest node %s " "(otherwise would because %s): " "its guest resource %s is unmanaged", node->details->uname, reason, rsc->id); } else { crm_warn("Guest node %s will be fenced " "(by recovering its guest resource %s): %s", node->details->uname, rsc->id, reason); /* We don't mark the node as unclean because that would prevent the * node from running resources. We want to allow it to run resources * in this transition if the recovery succeeds. */ node->details->remote_requires_reset = TRUE; set_bit(rsc->flags, pe_rsc_failed); } } } else if (is_dangling_guest_node(node)) { crm_info("Cleaning up dangling connection for guest node %s: " "fencing was already done because %s, " "and guest resource no longer exists", node->details->uname, reason); set_bit(node->details->remote_rsc->flags, pe_rsc_failed); } else if (pe__is_remote_node(node)) { resource_t *rsc = node->details->remote_rsc; if (rsc && (!is_set(rsc->flags, pe_rsc_managed))) { crm_notice("Not fencing remote node %s " "(otherwise would because %s): connection is unmanaged", node->details->uname, reason); } else if(node->details->remote_requires_reset == FALSE) { node->details->remote_requires_reset = TRUE; crm_warn("Remote node %s %s: %s", node->details->uname, pe_can_fence(data_set, node)? "will be fenced" : "is unclean", reason); } node->details->unclean = TRUE; pe_fence_op(node, NULL, TRUE, reason, data_set); } else if (node->details->unclean) { crm_trace("Cluster node %s %s because %s", node->details->uname, pe_can_fence(data_set, node)? "would also be fenced" : "also is unclean", reason); } else { crm_warn("Cluster node %s %s: %s", node->details->uname, pe_can_fence(data_set, node)? "will be fenced" : "is unclean", reason); node->details->unclean = TRUE; pe_fence_op(node, NULL, TRUE, reason, data_set); } } // @TODO xpaths can't handle templates, rules, or id-refs // nvpair with provides or requires set to unfencing #define XPATH_UNFENCING_NVPAIR XML_CIB_TAG_NVPAIR \ "[(@" XML_NVPAIR_ATTR_NAME "='" XML_RSC_ATTR_PROVIDES "'" \ "or @" XML_NVPAIR_ATTR_NAME "='" XML_RSC_ATTR_REQUIRES "') " \ "and @" XML_NVPAIR_ATTR_VALUE "='unfencing']" // unfencing in rsc_defaults or any resource #define XPATH_ENABLE_UNFENCING \ "/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_RESOURCES \ "//" XML_TAG_META_SETS "/" XPATH_UNFENCING_NVPAIR \ "|/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_RSCCONFIG \ "/" XML_TAG_META_SETS "/" XPATH_UNFENCING_NVPAIR static void set_if_xpath(unsigned long long flag, const char *xpath, pe_working_set_t *data_set) { xmlXPathObjectPtr result = NULL; if (is_not_set(data_set->flags, flag)) { result = xpath_search(data_set->input, xpath); if (result && (numXpathResults(result) > 0)) { set_bit(data_set->flags, flag); } freeXpathObject(result); } } gboolean unpack_config(xmlNode * config, pe_working_set_t * data_set) { const char *value = NULL; GHashTable *config_hash = crm_str_table_new(); data_set->config_hash = config_hash; pe__unpack_dataset_nvpairs(config, XML_CIB_TAG_PROPSET, NULL, config_hash, CIB_OPTIONS_FIRST, FALSE, data_set); verify_pe_options(data_set->config_hash); set_config_flag(data_set, "enable-startup-probes", pe_flag_startup_probes); if(is_not_set(data_set->flags, pe_flag_startup_probes)) { crm_info("Startup probes: disabled (dangerous)"); } value = pe_pref(data_set->config_hash, XML_ATTR_HAVE_WATCHDOG); if (value && crm_is_true(value)) { crm_notice("Watchdog will be used via SBD if fencing is required " "and stonith-watchdog-timeout is nonzero"); set_bit(data_set->flags, pe_flag_have_stonith_resource); } /* Set certain flags via xpath here, so they can be used before the relevant * configuration sections are unpacked. */ set_if_xpath(pe_flag_enable_unfencing, XPATH_ENABLE_UNFENCING, data_set); value = pe_pref(data_set->config_hash, "stonith-timeout"); data_set->stonith_timeout = (int) crm_parse_interval_spec(value); crm_debug("STONITH timeout: %d", data_set->stonith_timeout); set_config_flag(data_set, "stonith-enabled", pe_flag_stonith_enabled); crm_debug("STONITH of failed nodes is %s", is_set(data_set->flags, pe_flag_stonith_enabled) ? "enabled" : "disabled"); data_set->stonith_action = pe_pref(data_set->config_hash, "stonith-action"); if (!strcmp(data_set->stonith_action, "poweroff")) { pe_warn_once(pe_wo_poweroff, "Support for stonith-action of 'poweroff' is deprecated " "and will be removed in a future release (use 'off' instead)"); data_set->stonith_action = "off"; } crm_trace("STONITH will %s nodes", data_set->stonith_action); set_config_flag(data_set, "concurrent-fencing", pe_flag_concurrent_fencing); crm_debug("Concurrent fencing is %s", is_set(data_set->flags, pe_flag_concurrent_fencing) ? "enabled" : "disabled"); set_config_flag(data_set, "stop-all-resources", pe_flag_stop_everything); crm_debug("Stop all active resources: %s", is_set(data_set->flags, pe_flag_stop_everything) ? "true" : "false"); set_config_flag(data_set, "symmetric-cluster", pe_flag_symmetric_cluster); if (is_set(data_set->flags, pe_flag_symmetric_cluster)) { crm_debug("Cluster is symmetric" " - resources can run anywhere by default"); } value = pe_pref(data_set->config_hash, "no-quorum-policy"); if (safe_str_eq(value, "ignore")) { data_set->no_quorum_policy = no_quorum_ignore; } else if (safe_str_eq(value, "freeze")) { data_set->no_quorum_policy = no_quorum_freeze; } else if (safe_str_eq(value, "suicide")) { if (is_set(data_set->flags, pe_flag_stonith_enabled)) { int do_panic = 0; crm_element_value_int(data_set->input, XML_ATTR_QUORUM_PANIC, &do_panic); if (do_panic || is_set(data_set->flags, pe_flag_have_quorum)) { data_set->no_quorum_policy = no_quorum_suicide; } else { crm_notice("Resetting no-quorum-policy to 'stop': cluster has never had quorum"); data_set->no_quorum_policy = no_quorum_stop; } } else { crm_config_err("Resetting no-quorum-policy to 'stop': stonith is not configured"); data_set->no_quorum_policy = no_quorum_stop; } } else { data_set->no_quorum_policy = no_quorum_stop; } switch (data_set->no_quorum_policy) { case no_quorum_freeze: crm_debug("On loss of quorum: Freeze resources"); break; case no_quorum_stop: crm_debug("On loss of quorum: Stop ALL resources"); break; case no_quorum_suicide: crm_notice("On loss of quorum: Fence all remaining nodes"); break; case no_quorum_ignore: crm_notice("On loss of quorum: Ignore"); break; } set_config_flag(data_set, "stop-orphan-resources", pe_flag_stop_rsc_orphans); crm_trace("Orphan resources are %s", is_set(data_set->flags, pe_flag_stop_rsc_orphans) ? "stopped" : "ignored"); set_config_flag(data_set, "stop-orphan-actions", pe_flag_stop_action_orphans); crm_trace("Orphan resource actions are %s", is_set(data_set->flags, pe_flag_stop_action_orphans) ? "stopped" : "ignored"); set_config_flag(data_set, "remove-after-stop", pe_flag_remove_after_stop); crm_trace("Stopped resources are removed from the status section: %s", is_set(data_set->flags, pe_flag_remove_after_stop) ? "true" : "false"); set_config_flag(data_set, "maintenance-mode", pe_flag_maintenance_mode); crm_trace("Maintenance mode: %s", is_set(data_set->flags, pe_flag_maintenance_mode) ? "true" : "false"); set_config_flag(data_set, "start-failure-is-fatal", pe_flag_start_failure_fatal); crm_trace("Start failures are %s", is_set(data_set->flags, pe_flag_start_failure_fatal) ? "always fatal" : "handled by failcount"); if (is_set(data_set->flags, pe_flag_stonith_enabled)) { set_config_flag(data_set, "startup-fencing", pe_flag_startup_fencing); } if (is_set(data_set->flags, pe_flag_startup_fencing)) { crm_trace("Unseen nodes will be fenced"); } else { pe_warn_once(pe_wo_blind, "Blind faith: not fencing unseen nodes"); } node_score_red = char2score(pe_pref(data_set->config_hash, "node-health-red")); node_score_green = char2score(pe_pref(data_set->config_hash, "node-health-green")); node_score_yellow = char2score(pe_pref(data_set->config_hash, "node-health-yellow")); crm_debug("Node scores: 'red' = %s, 'yellow' = %s, 'green' = %s", pe_pref(data_set->config_hash, "node-health-red"), pe_pref(data_set->config_hash, "node-health-yellow"), pe_pref(data_set->config_hash, "node-health-green")); data_set->placement_strategy = pe_pref(data_set->config_hash, "placement-strategy"); crm_trace("Placement strategy: %s", data_set->placement_strategy); return TRUE; } static void destroy_digest_cache(gpointer ptr) { op_digest_cache_t *data = ptr; free_xml(data->params_all); free_xml(data->params_secure); free_xml(data->params_restart); free(data->digest_all_calc); free(data->digest_restart_calc); free(data->digest_secure_calc); free(data); } node_t * pe_create_node(const char *id, const char *uname, const char *type, const char *score, pe_working_set_t * data_set) { node_t *new_node = NULL; if (pe_find_node(data_set->nodes, uname) != NULL) { crm_config_warn("Detected multiple node entries with uname=%s" " - this is rarely intended", uname); } new_node = calloc(1, sizeof(node_t)); if (new_node == NULL) { return NULL; } new_node->weight = char2score(score); new_node->fixed = FALSE; new_node->details = calloc(1, sizeof(struct pe_node_shared_s)); if (new_node->details == NULL) { free(new_node); return NULL; } crm_trace("Creating node for entry %s/%s", uname, id); new_node->details->id = id; new_node->details->uname = uname; new_node->details->online = FALSE; new_node->details->shutdown = FALSE; new_node->details->rsc_discovery_enabled = TRUE; new_node->details->running_rsc = NULL; new_node->details->type = node_ping; if (safe_str_eq(type, "remote")) { new_node->details->type = node_remote; set_bit(data_set->flags, pe_flag_have_remote_nodes); } else if ((type == NULL) || safe_str_eq(type, "member")) { new_node->details->type = node_member; } new_node->details->attrs = crm_str_table_new(); if (pe__is_guest_or_remote_node(new_node)) { g_hash_table_insert(new_node->details->attrs, strdup(CRM_ATTR_KIND), strdup("remote")); } else { g_hash_table_insert(new_node->details->attrs, strdup(CRM_ATTR_KIND), strdup("cluster")); } new_node->details->utilization = crm_str_table_new(); new_node->details->digest_cache = g_hash_table_new_full(crm_str_hash, g_str_equal, free, destroy_digest_cache); data_set->nodes = g_list_insert_sorted(data_set->nodes, new_node, sort_node_uname); return new_node; } bool remote_id_conflict(const char *remote_name, pe_working_set_t *data) { bool match = FALSE; #if 1 pe_find_resource(data->resources, remote_name); #else if (data->name_check == NULL) { data->name_check = g_hash_table_new(crm_str_hash, g_str_equal); for (xml_rsc = __xml_first_child_element(parent); xml_rsc != NULL; xml_rsc = __xml_next_element(xml_rsc)) { const char *id = ID(xml_rsc); /* avoiding heap allocation here because we know the duration of this hashtable allows us to */ g_hash_table_insert(data->name_check, (char *) id, (char *) id); } } if (g_hash_table_lookup(data->name_check, remote_name)) { match = TRUE; } #endif if (match) { crm_err("Invalid remote-node name, a resource called '%s' already exists.", remote_name); return NULL; } return match; } static const char * expand_remote_rsc_meta(xmlNode *xml_obj, xmlNode *parent, pe_working_set_t *data) { xmlNode *attr_set = NULL; xmlNode *attr = NULL; const char *container_id = ID(xml_obj); const char *remote_name = NULL; const char *remote_server = NULL; const char *remote_port = NULL; const char *connect_timeout = "60s"; const char *remote_allow_migrate=NULL; const char *is_managed = NULL; for (attr_set = __xml_first_child_element(xml_obj); attr_set != NULL; attr_set = __xml_next_element(attr_set)) { if (safe_str_neq((const char *)attr_set->name, XML_TAG_META_SETS)) { continue; } for (attr = __xml_first_child_element(attr_set); attr != NULL; attr = __xml_next_element(attr)) { const char *value = crm_element_value(attr, XML_NVPAIR_ATTR_VALUE); const char *name = crm_element_value(attr, XML_NVPAIR_ATTR_NAME); if (safe_str_eq(name, XML_RSC_ATTR_REMOTE_NODE)) { remote_name = value; } else if (safe_str_eq(name, "remote-addr")) { remote_server = value; } else if (safe_str_eq(name, "remote-port")) { remote_port = value; } else if (safe_str_eq(name, "remote-connect-timeout")) { connect_timeout = value; } else if (safe_str_eq(name, "remote-allow-migrate")) { remote_allow_migrate=value; } else if (safe_str_eq(name, XML_RSC_ATTR_MANAGED)) { is_managed = value; } } } if (remote_name == NULL) { return NULL; } if (remote_id_conflict(remote_name, data)) { return NULL; } pe_create_remote_xml(parent, remote_name, container_id, remote_allow_migrate, is_managed, connect_timeout, remote_server, remote_port); return remote_name; } static void handle_startup_fencing(pe_working_set_t *data_set, node_t *new_node) { if ((new_node->details->type == node_remote) && (new_node->details->remote_rsc == NULL)) { /* Ignore fencing for remote nodes that don't have a connection resource * associated with them. This happens when remote node entries get left * in the nodes section after the connection resource is removed. */ return; } if (is_set(data_set->flags, pe_flag_startup_fencing)) { // All nodes are unclean until we've seen their status entry new_node->details->unclean = TRUE; } else { // Blind faith ... new_node->details->unclean = FALSE; } /* We need to be able to determine if a node's status section * exists or not separate from whether the node is unclean. */ new_node->details->unseen = TRUE; } gboolean unpack_nodes(xmlNode * xml_nodes, pe_working_set_t * data_set) { xmlNode *xml_obj = NULL; node_t *new_node = NULL; const char *id = NULL; const char *uname = NULL; const char *type = NULL; const char *score = NULL; for (xml_obj = __xml_first_child_element(xml_nodes); xml_obj != NULL; xml_obj = __xml_next_element(xml_obj)) { if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_NODE, TRUE)) { new_node = NULL; id = crm_element_value(xml_obj, XML_ATTR_ID); uname = crm_element_value(xml_obj, XML_ATTR_UNAME); type = crm_element_value(xml_obj, XML_ATTR_TYPE); score = crm_element_value(xml_obj, XML_RULE_ATTR_SCORE); crm_trace("Processing node %s/%s", uname, id); if (id == NULL) { crm_config_err("Must specify id tag in "); continue; } new_node = pe_create_node(id, uname, type, score, data_set); if (new_node == NULL) { return FALSE; } /* if(data_set->have_quorum == FALSE */ /* && data_set->no_quorum_policy == no_quorum_stop) { */ /* /\* start shutting resources down *\/ */ /* new_node->weight = -INFINITY; */ /* } */ handle_startup_fencing(data_set, new_node); add_node_attrs(xml_obj, new_node, FALSE, data_set); pe__unpack_dataset_nvpairs(xml_obj, XML_TAG_UTILIZATION, NULL, new_node->details->utilization, NULL, FALSE, data_set); crm_trace("Done with node %s", crm_element_value(xml_obj, XML_ATTR_UNAME)); } } if (data_set->localhost && pe_find_node(data_set->nodes, data_set->localhost) == NULL) { crm_info("Creating a fake local node"); pe_create_node(data_set->localhost, data_set->localhost, NULL, 0, data_set); } return TRUE; } static void setup_container(resource_t * rsc, pe_working_set_t * data_set) { const char *container_id = NULL; if (rsc->children) { GListPtr gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; setup_container(child_rsc, data_set); } return; } container_id = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_CONTAINER); if (container_id && safe_str_neq(container_id, rsc->id)) { resource_t *container = pe_find_resource(data_set->resources, container_id); if (container) { rsc->container = container; set_bit(container->flags, pe_rsc_is_container); container->fillers = g_list_append(container->fillers, rsc); pe_rsc_trace(rsc, "Resource %s's container is %s", rsc->id, container_id); } else { pe_err("Resource %s: Unknown resource container (%s)", rsc->id, container_id); } } } gboolean unpack_remote_nodes(xmlNode * xml_resources, pe_working_set_t * data_set) { xmlNode *xml_obj = NULL; /* Create remote nodes and guest nodes from the resource configuration * before unpacking resources. */ for (xml_obj = __xml_first_child_element(xml_resources); xml_obj != NULL; xml_obj = __xml_next_element(xml_obj)) { const char *new_node_id = NULL; /* Check for remote nodes, which are defined by ocf:pacemaker:remote * primitives. */ if (xml_contains_remote_node(xml_obj)) { new_node_id = ID(xml_obj); /* The "pe_find_node" check is here to make sure we don't iterate over * an expanded node that has already been added to the node list. */ if (new_node_id && pe_find_node(data_set->nodes, new_node_id) == NULL) { crm_trace("Found remote node %s defined by resource %s", new_node_id, ID(xml_obj)); pe_create_node(new_node_id, new_node_id, "remote", NULL, data_set); } continue; } /* Check for guest nodes, which are defined by special meta-attributes * of a primitive of any type (for example, VirtualDomain or Xen). */ if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_RESOURCE, TRUE)) { /* This will add an ocf:pacemaker:remote primitive to the * configuration for the guest node's connection, to be unpacked * later. */ new_node_id = expand_remote_rsc_meta(xml_obj, xml_resources, data_set); if (new_node_id && pe_find_node(data_set->nodes, new_node_id) == NULL) { crm_trace("Found guest node %s in resource %s", new_node_id, ID(xml_obj)); pe_create_node(new_node_id, new_node_id, "remote", NULL, data_set); } continue; } /* Check for guest nodes inside a group. Clones are currently not * supported as guest nodes. */ if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_GROUP, TRUE)) { xmlNode *xml_obj2 = NULL; for (xml_obj2 = __xml_first_child_element(xml_obj); xml_obj2 != NULL; xml_obj2 = __xml_next_element(xml_obj2)) { new_node_id = expand_remote_rsc_meta(xml_obj2, xml_resources, data_set); if (new_node_id && pe_find_node(data_set->nodes, new_node_id) == NULL) { crm_trace("Found guest node %s in resource %s inside group %s", new_node_id, ID(xml_obj2), ID(xml_obj)); pe_create_node(new_node_id, new_node_id, "remote", NULL, data_set); } } } } return TRUE; } /* Call this after all the nodes and resources have been * unpacked, but before the status section is read. * * A remote node's online status is reflected by the state * of the remote node's connection resource. We need to link * the remote node to this connection resource so we can have - * easy access to the connection resource during the PE calculations. + * easy access to the connection resource during the scheduler calculations. */ static void link_rsc2remotenode(pe_working_set_t *data_set, resource_t *new_rsc) { node_t *remote_node = NULL; if (new_rsc->is_remote_node == FALSE) { return; } if (is_set(data_set->flags, pe_flag_quick_location)) { /* remote_nodes and remote_resources are not linked in quick location calculations */ return; } remote_node = pe_find_node(data_set->nodes, new_rsc->id); CRM_CHECK(remote_node != NULL, return;); pe_rsc_trace(new_rsc, "Linking remote connection resource %s to node %s", new_rsc->id, remote_node->details->uname); remote_node->details->remote_rsc = new_rsc; if (new_rsc->container == NULL) { /* Handle start-up fencing for remote nodes (as opposed to guest nodes) * the same as is done for cluster nodes. */ handle_startup_fencing(data_set, remote_node); } else { /* pe_create_node() marks the new node as "remote" or "cluster"; now * that we know the node is a guest node, update it correctly. */ g_hash_table_replace(remote_node->details->attrs, strdup(CRM_ATTR_KIND), strdup("container")); } } static void destroy_tag(gpointer data) { tag_t *tag = data; if (tag) { free(tag->id); g_list_free_full(tag->refs, free); free(tag); } } /*! * \internal * \brief Parse configuration XML for resource information * * \param[in] xml_resources Top of resource configuration XML * \param[in,out] data_set Where to put resource information * * \return TRUE * * \note unpack_remote_nodes() MUST be called before this, so that the nodes can * be used when common_unpack() calls resource_location() */ gboolean unpack_resources(xmlNode * xml_resources, pe_working_set_t * data_set) { xmlNode *xml_obj = NULL; GListPtr gIter = NULL; data_set->template_rsc_sets = g_hash_table_new_full(crm_str_hash, g_str_equal, free, destroy_tag); for (xml_obj = __xml_first_child_element(xml_resources); xml_obj != NULL; xml_obj = __xml_next_element(xml_obj)) { resource_t *new_rsc = NULL; if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_RSC_TEMPLATE, TRUE)) { const char *template_id = ID(xml_obj); if (template_id && g_hash_table_lookup_extended(data_set->template_rsc_sets, template_id, NULL, NULL) == FALSE) { /* Record the template's ID for the knowledge of its existence anyway. */ g_hash_table_insert(data_set->template_rsc_sets, strdup(template_id), NULL); } continue; } crm_trace("Beginning unpack... <%s id=%s... >", crm_element_name(xml_obj), ID(xml_obj)); if (common_unpack(xml_obj, &new_rsc, NULL, data_set)) { data_set->resources = g_list_append(data_set->resources, new_rsc); pe_rsc_trace(new_rsc, "Added resource %s", new_rsc->id); } else { crm_config_err("Failed unpacking %s %s", crm_element_name(xml_obj), crm_element_value(xml_obj, XML_ATTR_ID)); if (new_rsc != NULL && new_rsc->fns != NULL) { new_rsc->fns->free(new_rsc); } } } for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; setup_container(rsc, data_set); link_rsc2remotenode(data_set, rsc); } data_set->resources = g_list_sort(data_set->resources, sort_rsc_priority); if (is_set(data_set->flags, pe_flag_quick_location)) { /* Ignore */ } else if (is_set(data_set->flags, pe_flag_stonith_enabled) && is_set(data_set->flags, pe_flag_have_stonith_resource) == FALSE) { crm_config_err("Resource start-up disabled since no STONITH resources have been defined"); crm_config_err("Either configure some or disable STONITH with the stonith-enabled option"); crm_config_err("NOTE: Clusters with shared data need STONITH to ensure data integrity"); } return TRUE; } gboolean unpack_tags(xmlNode * xml_tags, pe_working_set_t * data_set) { xmlNode *xml_tag = NULL; data_set->tags = g_hash_table_new_full(crm_str_hash, g_str_equal, free, destroy_tag); for (xml_tag = __xml_first_child_element(xml_tags); xml_tag != NULL; xml_tag = __xml_next_element(xml_tag)) { xmlNode *xml_obj_ref = NULL; const char *tag_id = ID(xml_tag); if (crm_str_eq((const char *)xml_tag->name, XML_CIB_TAG_TAG, TRUE) == FALSE) { continue; } if (tag_id == NULL) { crm_config_err("Failed unpacking %s: %s should be specified", crm_element_name(xml_tag), XML_ATTR_ID); continue; } for (xml_obj_ref = __xml_first_child_element(xml_tag); xml_obj_ref != NULL; xml_obj_ref = __xml_next_element(xml_obj_ref)) { const char *obj_ref = ID(xml_obj_ref); if (crm_str_eq((const char *)xml_obj_ref->name, XML_CIB_TAG_OBJ_REF, TRUE) == FALSE) { continue; } if (obj_ref == NULL) { crm_config_err("Failed unpacking %s for tag %s: %s should be specified", crm_element_name(xml_obj_ref), tag_id, XML_ATTR_ID); continue; } if (add_tag_ref(data_set->tags, tag_id, obj_ref) == FALSE) { return FALSE; } } } return TRUE; } /* The ticket state section: * "/cib/status/tickets/ticket_state" */ static gboolean unpack_ticket_state(xmlNode * xml_ticket, pe_working_set_t * data_set) { const char *ticket_id = NULL; const char *granted = NULL; const char *last_granted = NULL; const char *standby = NULL; xmlAttrPtr xIter = NULL; ticket_t *ticket = NULL; ticket_id = ID(xml_ticket); if (ticket_id == NULL || strlen(ticket_id) == 0) { return FALSE; } crm_trace("Processing ticket state for %s", ticket_id); ticket = g_hash_table_lookup(data_set->tickets, ticket_id); if (ticket == NULL) { ticket = ticket_new(ticket_id, data_set); if (ticket == NULL) { return FALSE; } } for (xIter = xml_ticket->properties; xIter; xIter = xIter->next) { const char *prop_name = (const char *)xIter->name; const char *prop_value = crm_element_value(xml_ticket, prop_name); if (crm_str_eq(prop_name, XML_ATTR_ID, TRUE)) { continue; } g_hash_table_replace(ticket->state, strdup(prop_name), strdup(prop_value)); } granted = g_hash_table_lookup(ticket->state, "granted"); if (granted && crm_is_true(granted)) { ticket->granted = TRUE; crm_info("We have ticket '%s'", ticket->id); } else { ticket->granted = FALSE; crm_info("We do not have ticket '%s'", ticket->id); } last_granted = g_hash_table_lookup(ticket->state, "last-granted"); if (last_granted) { ticket->last_granted = crm_parse_int(last_granted, 0); } standby = g_hash_table_lookup(ticket->state, "standby"); if (standby && crm_is_true(standby)) { ticket->standby = TRUE; if (ticket->granted) { crm_info("Granted ticket '%s' is in standby-mode", ticket->id); } } else { ticket->standby = FALSE; } crm_trace("Done with ticket state for %s", ticket_id); return TRUE; } static gboolean unpack_tickets_state(xmlNode * xml_tickets, pe_working_set_t * data_set) { xmlNode *xml_obj = NULL; for (xml_obj = __xml_first_child_element(xml_tickets); xml_obj != NULL; xml_obj = __xml_next_element(xml_obj)) { if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_TICKET_STATE, TRUE) == FALSE) { continue; } unpack_ticket_state(xml_obj, data_set); } return TRUE; } static void unpack_handle_remote_attrs(node_t *this_node, xmlNode *state, pe_working_set_t * data_set) { const char *resource_discovery_enabled = NULL; xmlNode *attrs = NULL; resource_t *rsc = NULL; if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE) == FALSE) { return; } if ((this_node == NULL) || !pe__is_guest_or_remote_node(this_node)) { return; } crm_trace("Processing remote node id=%s, uname=%s", this_node->details->id, this_node->details->uname); this_node->details->remote_maintenance = crm_atoi(crm_element_value(state, XML_NODE_IS_MAINTENANCE), "0"); rsc = this_node->details->remote_rsc; if (this_node->details->remote_requires_reset == FALSE) { this_node->details->unclean = FALSE; this_node->details->unseen = FALSE; } attrs = find_xml_node(state, XML_TAG_TRANSIENT_NODEATTRS, FALSE); add_node_attrs(attrs, this_node, TRUE, data_set); if (pe__shutdown_requested(this_node)) { crm_info("Node %s is shutting down", this_node->details->uname); this_node->details->shutdown = TRUE; if (rsc) { rsc->next_role = RSC_ROLE_STOPPED; } } if (crm_is_true(pe_node_attribute_raw(this_node, "standby"))) { crm_info("Node %s is in standby-mode", this_node->details->uname); this_node->details->standby = TRUE; } if (crm_is_true(pe_node_attribute_raw(this_node, "maintenance")) || (rsc && !is_set(rsc->flags, pe_rsc_managed))) { crm_info("Node %s is in maintenance-mode", this_node->details->uname); this_node->details->maintenance = TRUE; } resource_discovery_enabled = pe_node_attribute_raw(this_node, XML_NODE_ATTR_RSC_DISCOVERY); if (resource_discovery_enabled && !crm_is_true(resource_discovery_enabled)) { if (pe__is_remote_node(this_node) && is_not_set(data_set->flags, pe_flag_stonith_enabled)) { crm_warn("Ignoring %s attribute on remote node %s because stonith is disabled", XML_NODE_ATTR_RSC_DISCOVERY, this_node->details->uname); } else { /* This is either a remote node with fencing enabled, or a guest * node. We don't care whether fencing is enabled when fencing guest * nodes, because they are "fenced" by recovering their containing * resource. */ crm_info("Node %s has resource discovery disabled", this_node->details->uname); this_node->details->rsc_discovery_enabled = FALSE; } } } static bool unpack_node_loop(xmlNode * status, bool fence, pe_working_set_t * data_set) { bool changed = false; xmlNode *lrm_rsc = NULL; for (xmlNode *state = __xml_first_child_element(status); state != NULL; state = __xml_next_element(state)) { const char *id = NULL; const char *uname = NULL; node_t *this_node = NULL; bool process = FALSE; if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE) == FALSE) { continue; } id = crm_element_value(state, XML_ATTR_ID); uname = crm_element_value(state, XML_ATTR_UNAME); this_node = pe_find_node_any(data_set->nodes, id, uname); if (this_node == NULL) { crm_info("Node %s is unknown", id); continue; } else if (this_node->details->unpacked) { crm_info("Node %s is already processed", id); continue; } else if (!pe__is_guest_or_remote_node(this_node) && is_set(data_set->flags, pe_flag_stonith_enabled)) { // A redundant test, but preserves the order for regression tests process = TRUE; } else if (pe__is_guest_or_remote_node(this_node)) { bool check = FALSE; resource_t *rsc = this_node->details->remote_rsc; if(fence) { check = TRUE; } else if(rsc == NULL) { /* Not ready yet */ } else if (pe__is_guest_node(this_node) && rsc->role == RSC_ROLE_STARTED && rsc->container->role == RSC_ROLE_STARTED) { /* Both the connection and its containing resource need to be * known to be up before we process resources running in it. */ check = TRUE; crm_trace("Checking node %s/%s/%s status %d/%d/%d", id, rsc->id, rsc->container->id, fence, rsc->role, RSC_ROLE_STARTED); } else if (!pe__is_guest_node(this_node) && rsc->role == RSC_ROLE_STARTED) { check = TRUE; crm_trace("Checking node %s/%s status %d/%d/%d", id, rsc->id, fence, rsc->role, RSC_ROLE_STARTED); } if (check) { determine_remote_online_status(data_set, this_node); unpack_handle_remote_attrs(this_node, state, data_set); process = TRUE; } } else if (this_node->details->online) { process = TRUE; } else if (fence) { process = TRUE; } if(process) { crm_trace("Processing lrm resource entries on %shealthy%s node: %s", fence?"un":"", (pe__is_guest_or_remote_node(this_node)? " remote" : ""), this_node->details->uname); changed = TRUE; this_node->details->unpacked = TRUE; lrm_rsc = find_xml_node(state, XML_CIB_TAG_LRM, FALSE); lrm_rsc = find_xml_node(lrm_rsc, XML_LRM_TAG_RESOURCES, FALSE); unpack_lrm_resources(this_node, lrm_rsc, data_set); } } return changed; } /* remove nodes that are down, stopping */ /* create positive rsc_to_node constraints between resources and the nodes they are running on */ /* anything else? */ gboolean unpack_status(xmlNode * status, pe_working_set_t * data_set) { const char *id = NULL; const char *uname = NULL; xmlNode *state = NULL; node_t *this_node = NULL; crm_trace("Beginning unpack"); if (data_set->tickets == NULL) { data_set->tickets = g_hash_table_new_full(crm_str_hash, g_str_equal, free, destroy_ticket); } for (state = __xml_first_child_element(status); state != NULL; state = __xml_next_element(state)) { if (crm_str_eq((const char *)state->name, XML_CIB_TAG_TICKETS, TRUE)) { unpack_tickets_state((xmlNode *) state, data_set); } else if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE)) { xmlNode *attrs = NULL; const char *resource_discovery_enabled = NULL; id = crm_element_value(state, XML_ATTR_ID); uname = crm_element_value(state, XML_ATTR_UNAME); this_node = pe_find_node_any(data_set->nodes, id, uname); if (uname == NULL) { /* error */ continue; } else if (this_node == NULL) { crm_config_warn("Node %s in status section no longer exists", uname); continue; } else if (pe__is_guest_or_remote_node(this_node)) { /* online state for remote nodes is determined by the * rsc state after all the unpacking is done. we do however * need to mark whether or not the node has been fenced as this plays * a role during unpacking cluster node resource state */ this_node->details->remote_was_fenced = crm_atoi(crm_element_value(state, XML_NODE_IS_FENCED), "0"); continue; } crm_trace("Processing node id=%s, uname=%s", id, uname); /* Mark the node as provisionally clean * - at least we have seen it in the current cluster's lifetime */ this_node->details->unclean = FALSE; this_node->details->unseen = FALSE; attrs = find_xml_node(state, XML_TAG_TRANSIENT_NODEATTRS, FALSE); add_node_attrs(attrs, this_node, TRUE, data_set); if (crm_is_true(pe_node_attribute_raw(this_node, "standby"))) { crm_info("Node %s is in standby-mode", this_node->details->uname); this_node->details->standby = TRUE; } if (crm_is_true(pe_node_attribute_raw(this_node, "maintenance"))) { crm_info("Node %s is in maintenance-mode", this_node->details->uname); this_node->details->maintenance = TRUE; } resource_discovery_enabled = pe_node_attribute_raw(this_node, XML_NODE_ATTR_RSC_DISCOVERY); if (resource_discovery_enabled && !crm_is_true(resource_discovery_enabled)) { crm_warn("ignoring %s attribute on node %s, disabling resource discovery is not allowed on cluster nodes", XML_NODE_ATTR_RSC_DISCOVERY, this_node->details->uname); } crm_trace("determining node state"); determine_online_status(state, this_node, data_set); if (is_not_set(data_set->flags, pe_flag_have_quorum) && this_node->details->online && (data_set->no_quorum_policy == no_quorum_suicide)) { /* Everything else should flow from this automatically - * At least until the PE becomes able to migrate off healthy resources + * (at least until the scheduler becomes able to migrate off + * healthy resources) */ pe_fence_node(data_set, this_node, "cluster does not have quorum"); } } } while(unpack_node_loop(status, FALSE, data_set)) { crm_trace("Start another loop"); } // Now catch any nodes we didn't see unpack_node_loop(status, is_set(data_set->flags, pe_flag_stonith_enabled), data_set); /* Now that we know where resources are, we can schedule stops of containers * with failed bundle connections */ if (data_set->stop_needed != NULL) { for (GList *item = data_set->stop_needed; item; item = item->next) { pe_resource_t *container = item->data; pe_node_t *node = pe__current_node(container); if (node) { stop_action(container, node, FALSE); } } g_list_free(data_set->stop_needed); data_set->stop_needed = NULL; } for (GListPtr gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { node_t *this_node = gIter->data; if (this_node == NULL) { continue; } else if (!pe__is_guest_or_remote_node(this_node)) { continue; } else if(this_node->details->unpacked) { continue; } determine_remote_online_status(data_set, this_node); } return TRUE; } static gboolean determine_online_status_no_fencing(pe_working_set_t * data_set, xmlNode * node_state, node_t * this_node) { gboolean online = FALSE; const char *join = crm_element_value(node_state, XML_NODE_JOIN_STATE); const char *is_peer = crm_element_value(node_state, XML_NODE_IS_PEER); const char *in_cluster = crm_element_value(node_state, XML_NODE_IN_CLUSTER); const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED); if (!crm_is_true(in_cluster)) { crm_trace("Node is down: in_cluster=%s", crm_str(in_cluster)); } else if (safe_str_eq(is_peer, ONLINESTATUS)) { if (safe_str_eq(join, CRMD_JOINSTATE_MEMBER)) { online = TRUE; } else { crm_debug("Node is not ready to run resources: %s", join); } } else if (this_node->details->expected_up == FALSE) { crm_trace("Controller is down: in_cluster=%s", crm_str(in_cluster)); crm_trace("\tis_peer=%s, join=%s, expected=%s", crm_str(is_peer), crm_str(join), crm_str(exp_state)); } else { /* mark it unclean */ pe_fence_node(data_set, this_node, "peer is unexpectedly down"); crm_info("\tin_cluster=%s, is_peer=%s, join=%s, expected=%s", crm_str(in_cluster), crm_str(is_peer), crm_str(join), crm_str(exp_state)); } return online; } static gboolean determine_online_status_fencing(pe_working_set_t * data_set, xmlNode * node_state, node_t * this_node) { gboolean online = FALSE; gboolean do_terminate = FALSE; bool crmd_online = FALSE; const char *join = crm_element_value(node_state, XML_NODE_JOIN_STATE); const char *is_peer = crm_element_value(node_state, XML_NODE_IS_PEER); const char *in_cluster = crm_element_value(node_state, XML_NODE_IN_CLUSTER); const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED); const char *terminate = pe_node_attribute_raw(this_node, "terminate"); /* - XML_NODE_IN_CLUSTER ::= true|false - XML_NODE_IS_PEER ::= online|offline - XML_NODE_JOIN_STATE ::= member|down|pending|banned - XML_NODE_EXPECTED ::= member|down */ if (crm_is_true(terminate)) { do_terminate = TRUE; } else if (terminate != NULL && strlen(terminate) > 0) { /* could be a time() value */ char t = terminate[0]; if (t != '0' && isdigit(t)) { do_terminate = TRUE; } } crm_trace("%s: in_cluster=%s, is_peer=%s, join=%s, expected=%s, term=%d", this_node->details->uname, crm_str(in_cluster), crm_str(is_peer), crm_str(join), crm_str(exp_state), do_terminate); online = crm_is_true(in_cluster); crmd_online = safe_str_eq(is_peer, ONLINESTATUS); if (exp_state == NULL) { exp_state = CRMD_JOINSTATE_DOWN; } if (this_node->details->shutdown) { crm_debug("%s is shutting down", this_node->details->uname); /* Slightly different criteria since we can't shut down a dead peer */ online = crmd_online; } else if (in_cluster == NULL) { pe_fence_node(data_set, this_node, "peer has not been seen by the cluster"); } else if (safe_str_eq(join, CRMD_JOINSTATE_NACK)) { pe_fence_node(data_set, this_node, "peer failed the pacemaker membership criteria"); } else if (do_terminate == FALSE && safe_str_eq(exp_state, CRMD_JOINSTATE_DOWN)) { if (crm_is_true(in_cluster) || crmd_online) { crm_info("- Node %s is not ready to run resources", this_node->details->uname); this_node->details->standby = TRUE; this_node->details->pending = TRUE; } else { crm_trace("%s is down or still coming up", this_node->details->uname); } } else if (do_terminate && safe_str_eq(join, CRMD_JOINSTATE_DOWN) && crm_is_true(in_cluster) == FALSE && !crmd_online) { crm_info("Node %s was just shot", this_node->details->uname); online = FALSE; } else if (crm_is_true(in_cluster) == FALSE) { pe_fence_node(data_set, this_node, "peer is no longer part of the cluster"); } else if (!crmd_online) { pe_fence_node(data_set, this_node, "peer process is no longer available"); /* Everything is running at this point, now check join state */ } else if (do_terminate) { pe_fence_node(data_set, this_node, "termination was requested"); } else if (safe_str_eq(join, CRMD_JOINSTATE_MEMBER)) { crm_info("Node %s is active", this_node->details->uname); } else if (safe_str_eq(join, CRMD_JOINSTATE_PENDING) || safe_str_eq(join, CRMD_JOINSTATE_DOWN)) { crm_info("Node %s is not ready to run resources", this_node->details->uname); this_node->details->standby = TRUE; this_node->details->pending = TRUE; } else { pe_fence_node(data_set, this_node, "peer was in an unknown state"); crm_warn("%s: in-cluster=%s, is-peer=%s, join=%s, expected=%s, term=%d, shutdown=%d", this_node->details->uname, crm_str(in_cluster), crm_str(is_peer), crm_str(join), crm_str(exp_state), do_terminate, this_node->details->shutdown); } return online; } static gboolean determine_remote_online_status(pe_working_set_t * data_set, node_t * this_node) { resource_t *rsc = this_node->details->remote_rsc; resource_t *container = NULL; pe_node_t *host = NULL; /* If there is a node state entry for a (former) Pacemaker Remote node * but no resource creating that node, the node's connection resource will * be NULL. Consider it an offline remote node in that case. */ if (rsc == NULL) { this_node->details->online = FALSE; goto remote_online_done; } container = rsc->container; if (container && (g_list_length(rsc->running_on) == 1)) { host = rsc->running_on->data; } /* If the resource is currently started, mark it online. */ if (rsc->role == RSC_ROLE_STARTED) { crm_trace("%s node %s presumed ONLINE because connection resource is started", (container? "Guest" : "Remote"), this_node->details->id); this_node->details->online = TRUE; } /* consider this node shutting down if transitioning start->stop */ if (rsc->role == RSC_ROLE_STARTED && rsc->next_role == RSC_ROLE_STOPPED) { crm_trace("%s node %s shutting down because connection resource is stopping", (container? "Guest" : "Remote"), this_node->details->id); this_node->details->shutdown = TRUE; } /* Now check all the failure conditions. */ if(container && is_set(container->flags, pe_rsc_failed)) { crm_trace("Guest node %s UNCLEAN because guest resource failed", this_node->details->id); this_node->details->online = FALSE; this_node->details->remote_requires_reset = TRUE; } else if(is_set(rsc->flags, pe_rsc_failed)) { crm_trace("%s node %s OFFLINE because connection resource failed", (container? "Guest" : "Remote"), this_node->details->id); this_node->details->online = FALSE; } else if (rsc->role == RSC_ROLE_STOPPED || (container && container->role == RSC_ROLE_STOPPED)) { crm_trace("%s node %s OFFLINE because its resource is stopped", (container? "Guest" : "Remote"), this_node->details->id); this_node->details->online = FALSE; this_node->details->remote_requires_reset = FALSE; } else if (host && (host->details->online == FALSE) && host->details->unclean) { crm_trace("Guest node %s UNCLEAN because host is unclean", this_node->details->id); this_node->details->online = FALSE; this_node->details->remote_requires_reset = TRUE; } remote_online_done: crm_trace("Remote node %s online=%s", this_node->details->id, this_node->details->online ? "TRUE" : "FALSE"); return this_node->details->online; } gboolean determine_online_status(xmlNode * node_state, node_t * this_node, pe_working_set_t * data_set) { gboolean online = FALSE; const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED); if (this_node == NULL) { crm_config_err("No node to check"); return online; } this_node->details->shutdown = FALSE; this_node->details->expected_up = FALSE; if (pe__shutdown_requested(this_node)) { this_node->details->shutdown = TRUE; } else if (safe_str_eq(exp_state, CRMD_JOINSTATE_MEMBER)) { this_node->details->expected_up = TRUE; } if (this_node->details->type == node_ping) { this_node->details->unclean = FALSE; online = FALSE; /* As far as resource management is concerned, * the node is safely offline. * Anyone caught abusing this logic will be shot */ } else if (is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE) { online = determine_online_status_no_fencing(data_set, node_state, this_node); } else { online = determine_online_status_fencing(data_set, node_state, this_node); } if (online) { this_node->details->online = TRUE; } else { /* remove node from contention */ this_node->fixed = TRUE; this_node->weight = -INFINITY; } if (online && this_node->details->shutdown) { /* don't run resources here */ this_node->fixed = TRUE; this_node->weight = -INFINITY; } if (this_node->details->type == node_ping) { crm_info("Node %s is not a pacemaker node", this_node->details->uname); } else if (this_node->details->unclean) { pe_proc_warn("Node %s is unclean", this_node->details->uname); } else if (this_node->details->online) { crm_info("Node %s is %s", this_node->details->uname, this_node->details->shutdown ? "shutting down" : this_node->details->pending ? "pending" : this_node->details->standby ? "standby" : this_node->details->maintenance ? "maintenance" : "online"); } else { crm_trace("Node %s is offline", this_node->details->uname); } return online; } /*! * \internal * \brief Find the end of a resource's name, excluding any clone suffix * * \param[in] id Resource ID to check * * \return Pointer to last character of resource's base name */ const char * pe_base_name_end(const char *id) { if (!crm_strlen_zero(id)) { const char *end = id + strlen(id) - 1; for (const char *s = end; s > id; --s) { switch (*s) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': break; case ':': return (s == end)? s : (s - 1); default: return end; } } return end; } return NULL; } /*! * \internal * \brief Get a resource name excluding any clone suffix * * \param[in] last_rsc_id Resource ID to check * * \return Pointer to newly allocated string with resource's base name * \note It is the caller's responsibility to free() the result. * This asserts on error, so callers can assume result is not NULL. */ char * clone_strip(const char *last_rsc_id) { const char *end = pe_base_name_end(last_rsc_id); char *basename = NULL; CRM_ASSERT(end); basename = strndup(last_rsc_id, end - last_rsc_id + 1); CRM_ASSERT(basename); return basename; } /*! * \internal * \brief Get the name of the first instance of a cloned resource * * \param[in] last_rsc_id Resource ID to check * * \return Pointer to newly allocated string with resource's base name plus :0 * \note It is the caller's responsibility to free() the result. * This asserts on error, so callers can assume result is not NULL. */ char * clone_zero(const char *last_rsc_id) { const char *end = pe_base_name_end(last_rsc_id); size_t base_name_len = end - last_rsc_id + 1; char *zero = NULL; CRM_ASSERT(end); zero = calloc(base_name_len + 3, sizeof(char)); CRM_ASSERT(zero); memcpy(zero, last_rsc_id, base_name_len); zero[base_name_len] = ':'; zero[base_name_len + 1] = '0'; return zero; } static resource_t * create_fake_resource(const char *rsc_id, xmlNode * rsc_entry, pe_working_set_t * data_set) { resource_t *rsc = NULL; xmlNode *xml_rsc = create_xml_node(NULL, XML_CIB_TAG_RESOURCE); copy_in_properties(xml_rsc, rsc_entry); crm_xml_add(xml_rsc, XML_ATTR_ID, rsc_id); crm_log_xml_debug(xml_rsc, "Orphan resource"); if (!common_unpack(xml_rsc, &rsc, NULL, data_set)) { return NULL; } if (xml_contains_remote_node(xml_rsc)) { node_t *node; crm_debug("Detected orphaned remote node %s", rsc_id); node = pe_find_node(data_set->nodes, rsc_id); if (node == NULL) { node = pe_create_node(rsc_id, rsc_id, "remote", NULL, data_set); } link_rsc2remotenode(data_set, rsc); if (node) { crm_trace("Setting node %s as shutting down due to orphaned connection resource", rsc_id); node->details->shutdown = TRUE; } } if (crm_element_value(rsc_entry, XML_RSC_ATTR_CONTAINER)) { /* This orphaned rsc needs to be mapped to a container. */ crm_trace("Detected orphaned container filler %s", rsc_id); set_bit(rsc->flags, pe_rsc_orphan_container_filler); } set_bit(rsc->flags, pe_rsc_orphan); data_set->resources = g_list_append(data_set->resources, rsc); return rsc; } /*! * \internal * \brief Create orphan instance for anonymous clone resource history */ static pe_resource_t * create_anonymous_orphan(pe_resource_t *parent, const char *rsc_id, pe_node_t *node, pe_working_set_t *data_set) { pe_resource_t *top = pe__create_clone_child(parent, data_set); // find_rsc() because we might be a cloned group pe_resource_t *orphan = top->fns->find_rsc(top, rsc_id, NULL, pe_find_clone); pe_rsc_debug(parent, "Created orphan %s for %s: %s on %s", top->id, parent->id, rsc_id, node->details->uname); return orphan; } /*! * \internal * \brief Check a node for an instance of an anonymous clone * * Return a child instance of the specified anonymous clone, in order of * preference: (1) the instance running on the specified node, if any; * (2) an inactive instance (i.e. within the total of clone-max instances); * (3) a newly created orphan (i.e. clone-max instances are already active). * * \param[in] data_set Cluster information * \param[in] node Node on which to check for instance * \param[in] parent Clone to check * \param[in] rsc_id Name of cloned resource in history (without instance) */ static resource_t * find_anonymous_clone(pe_working_set_t * data_set, node_t * node, resource_t * parent, const char *rsc_id) { GListPtr rIter = NULL; pe_resource_t *rsc = NULL; pe_resource_t *inactive_instance = NULL; gboolean skip_inactive = FALSE; CRM_ASSERT(parent != NULL); CRM_ASSERT(pe_rsc_is_clone(parent)); CRM_ASSERT(is_not_set(parent->flags, pe_rsc_unique)); // Check for active (or partially active, for cloned groups) instance pe_rsc_trace(parent, "Looking for %s on %s in %s", rsc_id, node->details->uname, parent->id); for (rIter = parent->children; rsc == NULL && rIter; rIter = rIter->next) { GListPtr locations = NULL; resource_t *child = rIter->data; /* Check whether this instance is already known to be active or pending * anywhere, at this stage of unpacking. Because this function is called * for a resource before the resource's individual operation history * entries are unpacked, locations will generally not contain the * desired node. * * However, there are three exceptions: * (1) when child is a cloned group and we have already unpacked the * history of another member of the group on the same node; * (2) when we've already unpacked the history of another numbered * instance on the same node (which can happen if globally-unique * was flipped from true to false); and * (3) when we re-run calculations on the same data set as part of a * simulation. */ child->fns->location(child, &locations, 2); if (locations) { /* We should never associate the same numbered anonymous clone * instance with multiple nodes, and clone instances can't migrate, * so there must be only one location, regardless of history. */ CRM_LOG_ASSERT(locations->next == NULL); if (((pe_node_t *)locations->data)->details == node->details) { /* This child instance is active on the requested node, so check * for a corresponding configured resource. We use find_rsc() * instead of child because child may be a cloned group, and we * need the particular member corresponding to rsc_id. * * If the history entry is orphaned, rsc will be NULL. */ rsc = parent->fns->find_rsc(child, rsc_id, NULL, pe_find_clone); if (rsc) { /* If there are multiple instance history entries for an * anonymous clone in a single node's history (which can * happen if globally-unique is switched from true to * false), we want to consider the instances beyond the * first as orphans, even if there are inactive instance * numbers available. */ if (rsc->running_on) { crm_notice("Active (now-)anonymous clone %s has " "multiple (orphan) instance histories on %s", parent->id, node->details->uname); skip_inactive = TRUE; rsc = NULL; } else { pe_rsc_trace(parent, "Resource %s, active", rsc->id); } } } g_list_free(locations); } else { pe_rsc_trace(parent, "Resource %s, skip inactive", child->id); if (!skip_inactive && !inactive_instance && is_not_set(child->flags, pe_rsc_block)) { // Remember one inactive instance in case we don't find active inactive_instance = parent->fns->find_rsc(child, rsc_id, NULL, pe_find_clone); /* ... but don't use it if it was already associated with a * pending action on another node */ if (inactive_instance && inactive_instance->pending_node && (inactive_instance->pending_node->details != node->details)) { inactive_instance = NULL; } } } } if ((rsc == NULL) && !skip_inactive && (inactive_instance != NULL)) { pe_rsc_trace(parent, "Resource %s, empty slot", inactive_instance->id); rsc = inactive_instance; } /* If the resource has "requires" set to "quorum" or "nothing", and we don't * have a clone instance for every node, we don't want to consume a valid * instance number for unclean nodes. Such instances may appear to be active * according to the history, but should be considered inactive, so we can * start an instance elsewhere. Treat such instances as orphans. * * An exception is instances running on guest nodes -- since guest node * "fencing" is actually just a resource stop, requires shouldn't apply. * * @TODO Ideally, we'd use an inactive instance number if it is not needed * for any clean instances. However, we don't know that at this point. */ if ((rsc != NULL) && is_not_set(rsc->flags, pe_rsc_needs_fencing) && (!node->details->online || node->details->unclean) && !pe__is_guest_node(node) && !pe__is_universal_clone(parent, data_set)) { rsc = NULL; } if (rsc == NULL) { rsc = create_anonymous_orphan(parent, rsc_id, node, data_set); pe_rsc_trace(parent, "Resource %s, orphan", rsc->id); } return rsc; } static resource_t * unpack_find_resource(pe_working_set_t * data_set, node_t * node, const char *rsc_id, xmlNode * rsc_entry) { resource_t *rsc = NULL; resource_t *parent = NULL; crm_trace("looking for %s", rsc_id); rsc = pe_find_resource(data_set->resources, rsc_id); if (rsc == NULL) { /* If we didn't find the resource by its name in the operation history, * check it again as a clone instance. Even when clone-max=0, we create * a single :0 orphan to match against here. */ char *clone0_id = clone_zero(rsc_id); resource_t *clone0 = pe_find_resource(data_set->resources, clone0_id); if (clone0 && is_not_set(clone0->flags, pe_rsc_unique)) { rsc = clone0; parent = uber_parent(clone0); crm_trace("%s found as %s (%s)", rsc_id, clone0_id, parent->id); } else { crm_trace("%s is not known as %s either (orphan)", rsc_id, clone0_id); } free(clone0_id); } else if (rsc->variant > pe_native) { crm_trace("Resource history for %s is orphaned because it is no longer primitive", rsc_id); return NULL; } else { parent = uber_parent(rsc); } if (pe_rsc_is_anon_clone(parent)) { if (pe_rsc_is_bundled(parent)) { rsc = pe__find_bundle_replica(parent->parent, node); } else { char *base = clone_strip(rsc_id); rsc = find_anonymous_clone(data_set, node, parent, base); free(base); CRM_ASSERT(rsc != NULL); } } if (rsc && safe_str_neq(rsc_id, rsc->id) && safe_str_neq(rsc_id, rsc->clone_name)) { free(rsc->clone_name); rsc->clone_name = strdup(rsc_id); pe_rsc_debug(rsc, "Internally renamed %s on %s to %s%s", rsc_id, node->details->uname, rsc->id, (is_set(rsc->flags, pe_rsc_orphan)? " (ORPHAN)" : "")); } return rsc; } static resource_t * process_orphan_resource(xmlNode * rsc_entry, node_t * node, pe_working_set_t * data_set) { resource_t *rsc = NULL; const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID); crm_debug("Detected orphan resource %s on %s", rsc_id, node->details->uname); rsc = create_fake_resource(rsc_id, rsc_entry, data_set); if (is_set(data_set->flags, pe_flag_stop_rsc_orphans) == FALSE) { clear_bit(rsc->flags, pe_rsc_managed); } else { CRM_CHECK(rsc != NULL, return NULL); pe_rsc_trace(rsc, "Added orphan %s", rsc->id); resource_location(rsc, NULL, -INFINITY, "__orphan_do_not_run__", data_set); } return rsc; } static void process_rsc_state(resource_t * rsc, node_t * node, enum action_fail_response on_fail, xmlNode * migrate_op, pe_working_set_t * data_set) { node_t *tmpnode = NULL; char *reason = NULL; CRM_ASSERT(rsc); pe_rsc_trace(rsc, "Resource %s is %s on %s: on_fail=%s", rsc->id, role2text(rsc->role), node->details->uname, fail2text(on_fail)); /* process current state */ if (rsc->role != RSC_ROLE_UNKNOWN) { resource_t *iter = rsc; while (iter) { if (g_hash_table_lookup(iter->known_on, node->details->id) == NULL) { node_t *n = node_copy(node); pe_rsc_trace(rsc, "%s (aka. %s) known on %s", rsc->id, rsc->clone_name, n->details->uname); g_hash_table_insert(iter->known_on, (gpointer) n->details->id, n); } if (is_set(iter->flags, pe_rsc_unique)) { break; } iter = iter->parent; } } /* If a managed resource is believed to be running, but node is down ... */ if (rsc->role > RSC_ROLE_STOPPED && node->details->online == FALSE && node->details->maintenance == FALSE && is_set(rsc->flags, pe_rsc_managed)) { gboolean should_fence = FALSE; /* If this is a guest node, fence it (regardless of whether fencing is * enabled, because guest node fencing is done by recovery of the * container resource rather than by the fencer). Mark the resource * we're processing as failed. When the guest comes back up, its * operation history in the CIB will be cleared, freeing the affected * resource to run again once we are sure we know its state. */ if (pe__is_guest_node(node)) { set_bit(rsc->flags, pe_rsc_failed); should_fence = TRUE; } else if (is_set(data_set->flags, pe_flag_stonith_enabled)) { if (pe__is_remote_node(node) && node->details->remote_rsc && is_not_set(node->details->remote_rsc->flags, pe_rsc_failed)) { /* Setting unseen means that fencing of the remote node will * occur only if the connection resource is not going to start * somewhere. This allows connection resources on a failed * cluster node to move to another node without requiring the * remote nodes to be fenced as well. */ node->details->unseen = TRUE; reason = crm_strdup_printf("%s is active there (fencing will be" " revoked if remote connection can " "be re-established elsewhere)", rsc->id); } should_fence = TRUE; } if (should_fence) { if (reason == NULL) { reason = crm_strdup_printf("%s is thought to be active there", rsc->id); } pe_fence_node(data_set, node, reason); } free(reason); } if (node->details->unclean) { /* No extra processing needed * Also allows resources to be started again after a node is shot */ on_fail = action_fail_ignore; } switch (on_fail) { case action_fail_ignore: /* nothing to do */ break; case action_fail_fence: /* treat it as if it is still running * but also mark the node as unclean */ reason = crm_strdup_printf("%s failed there", rsc->id); pe_fence_node(data_set, node, reason); free(reason); break; case action_fail_standby: node->details->standby = TRUE; node->details->standby_onfail = TRUE; break; case action_fail_block: /* is_managed == FALSE will prevent any * actions being sent for the resource */ clear_bit(rsc->flags, pe_rsc_managed); set_bit(rsc->flags, pe_rsc_block); break; case action_fail_migrate: /* make sure it comes up somewhere else * or not at all */ resource_location(rsc, node, -INFINITY, "__action_migration_auto__", data_set); break; case action_fail_stop: rsc->next_role = RSC_ROLE_STOPPED; break; case action_fail_recover: if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) { set_bit(rsc->flags, pe_rsc_failed); stop_action(rsc, node, FALSE); } break; case action_fail_restart_container: set_bit(rsc->flags, pe_rsc_failed); if (rsc->container && pe_rsc_is_bundled(rsc)) { /* A bundle's remote connection can run on a different node than * the bundle's container. We don't necessarily know where the * container is running yet, so remember it and add a stop * action for it later. */ data_set->stop_needed = g_list_prepend(data_set->stop_needed, rsc->container); } else if (rsc->container) { stop_action(rsc->container, node, FALSE); } else if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) { stop_action(rsc, node, FALSE); } break; case action_fail_reset_remote: set_bit(rsc->flags, pe_rsc_failed); if (is_set(data_set->flags, pe_flag_stonith_enabled)) { tmpnode = NULL; if (rsc->is_remote_node) { tmpnode = pe_find_node(data_set->nodes, rsc->id); } if (tmpnode && pe__is_remote_node(tmpnode) && tmpnode->details->remote_was_fenced == 0) { /* The remote connection resource failed in a way that * should result in fencing the remote node. */ pe_fence_node(data_set, tmpnode, "remote connection is unrecoverable"); } } /* require the stop action regardless if fencing is occurring or not. */ if (rsc->role > RSC_ROLE_STOPPED) { stop_action(rsc, node, FALSE); } /* if reconnect delay is in use, prevent the connection from exiting the * "STOPPED" role until the failure is cleared by the delay timeout. */ if (rsc->remote_reconnect_ms) { rsc->next_role = RSC_ROLE_STOPPED; } break; } /* ensure a remote-node connection failure forces an unclean remote-node * to be fenced. By setting unseen = FALSE, the remote-node failure will * result in a fencing operation regardless if we're going to attempt to * reconnect to the remote-node in this transition or not. */ if (is_set(rsc->flags, pe_rsc_failed) && rsc->is_remote_node) { tmpnode = pe_find_node(data_set->nodes, rsc->id); if (tmpnode && tmpnode->details->unclean) { tmpnode->details->unseen = FALSE; } } if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) { if (is_set(rsc->flags, pe_rsc_orphan)) { if (is_set(rsc->flags, pe_rsc_managed)) { crm_config_warn("Detected active orphan %s running on %s", rsc->id, node->details->uname); } else { crm_config_warn("Cluster configured not to stop active orphans." " %s must be stopped manually on %s", rsc->id, node->details->uname); } } native_add_running(rsc, node, data_set); if (on_fail != action_fail_ignore) { set_bit(rsc->flags, pe_rsc_failed); } } else if (rsc->clone_name && strchr(rsc->clone_name, ':') != NULL) { /* Only do this for older status sections that included instance numbers * Otherwise stopped instances will appear as orphans */ pe_rsc_trace(rsc, "Resetting clone_name %s for %s (stopped)", rsc->clone_name, rsc->id); free(rsc->clone_name); rsc->clone_name = NULL; } else { GList *possible_matches = pe__resource_actions(rsc, node, RSC_STOP, FALSE); GListPtr gIter = possible_matches; for (; gIter != NULL; gIter = gIter->next) { action_t *stop = (action_t *) gIter->data; stop->flags |= pe_action_optional; } g_list_free(possible_matches); } } /* create active recurring operations as optional */ static void process_recurring(node_t * node, resource_t * rsc, int start_index, int stop_index, GListPtr sorted_op_list, pe_working_set_t * data_set) { int counter = -1; const char *task = NULL; const char *status = NULL; GListPtr gIter = sorted_op_list; CRM_ASSERT(rsc); pe_rsc_trace(rsc, "%s: Start index %d, stop index = %d", rsc->id, start_index, stop_index); for (; gIter != NULL; gIter = gIter->next) { xmlNode *rsc_op = (xmlNode *) gIter->data; guint interval_ms = 0; char *key = NULL; const char *id = ID(rsc_op); const char *interval_ms_s = NULL; counter++; if (node->details->online == FALSE) { pe_rsc_trace(rsc, "Skipping %s/%s: node is offline", rsc->id, node->details->uname); break; /* Need to check if there's a monitor for role="Stopped" */ } else if (start_index < stop_index && counter <= stop_index) { pe_rsc_trace(rsc, "Skipping %s/%s: resource is not active", id, node->details->uname); continue; } else if (counter < start_index) { pe_rsc_trace(rsc, "Skipping %s/%s: old %d", id, node->details->uname, counter); continue; } interval_ms_s = crm_element_value(rsc_op, XML_LRM_ATTR_INTERVAL_MS); interval_ms = crm_parse_ms(interval_ms_s); if (interval_ms == 0) { pe_rsc_trace(rsc, "Skipping %s/%s: non-recurring", id, node->details->uname); continue; } status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS); if (safe_str_eq(status, "-1")) { pe_rsc_trace(rsc, "Skipping %s/%s: status", id, node->details->uname); continue; } task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); /* create the action */ key = generate_op_key(rsc->id, task, interval_ms); pe_rsc_trace(rsc, "Creating %s/%s", key, node->details->uname); custom_action(rsc, key, task, node, TRUE, TRUE, data_set); } } void calculate_active_ops(GListPtr sorted_op_list, int *start_index, int *stop_index) { int counter = -1; int implied_monitor_start = -1; int implied_clone_start = -1; const char *task = NULL; const char *status = NULL; GListPtr gIter = sorted_op_list; *stop_index = -1; *start_index = -1; for (; gIter != NULL; gIter = gIter->next) { xmlNode *rsc_op = (xmlNode *) gIter->data; counter++; task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS); if (safe_str_eq(task, CRMD_ACTION_STOP) && safe_str_eq(status, "0")) { *stop_index = counter; } else if (safe_str_eq(task, CRMD_ACTION_START) || safe_str_eq(task, CRMD_ACTION_MIGRATED)) { *start_index = counter; } else if ((implied_monitor_start <= *stop_index) && safe_str_eq(task, CRMD_ACTION_STATUS)) { const char *rc = crm_element_value(rsc_op, XML_LRM_ATTR_RC); if (safe_str_eq(rc, "0") || safe_str_eq(rc, "8")) { implied_monitor_start = counter; } } else if (safe_str_eq(task, CRMD_ACTION_PROMOTE) || safe_str_eq(task, CRMD_ACTION_DEMOTE)) { implied_clone_start = counter; } } if (*start_index == -1) { if (implied_clone_start != -1) { *start_index = implied_clone_start; } else if (implied_monitor_start != -1) { *start_index = implied_monitor_start; } } } static resource_t * unpack_lrm_rsc_state(node_t * node, xmlNode * rsc_entry, pe_working_set_t * data_set) { GListPtr gIter = NULL; int stop_index = -1; int start_index = -1; enum rsc_role_e req_role = RSC_ROLE_UNKNOWN; const char *task = NULL; const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID); resource_t *rsc = NULL; GListPtr op_list = NULL; GListPtr sorted_op_list = NULL; xmlNode *migrate_op = NULL; xmlNode *rsc_op = NULL; xmlNode *last_failure = NULL; enum action_fail_response on_fail = FALSE; enum rsc_role_e saved_role = RSC_ROLE_UNKNOWN; crm_trace("[%s] Processing %s on %s", crm_element_name(rsc_entry), rsc_id, node->details->uname); /* extract operations */ op_list = NULL; sorted_op_list = NULL; for (rsc_op = __xml_first_child_element(rsc_entry); rsc_op != NULL; rsc_op = __xml_next_element(rsc_op)) { if (crm_str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, TRUE)) { op_list = g_list_prepend(op_list, rsc_op); } } if (op_list == NULL) { /* if there are no operations, there is nothing to do */ return NULL; } /* find the resource */ rsc = unpack_find_resource(data_set, node, rsc_id, rsc_entry); if (rsc == NULL) { rsc = process_orphan_resource(rsc_entry, node, data_set); } CRM_ASSERT(rsc != NULL); /* process operations */ saved_role = rsc->role; on_fail = action_fail_ignore; rsc->role = RSC_ROLE_UNKNOWN; sorted_op_list = g_list_sort(op_list, sort_op_by_callid); for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) { xmlNode *rsc_op = (xmlNode *) gIter->data; task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); if (safe_str_eq(task, CRMD_ACTION_MIGRATED)) { migrate_op = rsc_op; } unpack_rsc_op(rsc, node, rsc_op, &last_failure, &on_fail, data_set); } /* create active recurring operations as optional */ calculate_active_ops(sorted_op_list, &start_index, &stop_index); process_recurring(node, rsc, start_index, stop_index, sorted_op_list, data_set); /* no need to free the contents */ g_list_free(sorted_op_list); process_rsc_state(rsc, node, on_fail, migrate_op, data_set); if (get_target_role(rsc, &req_role)) { if (rsc->next_role == RSC_ROLE_UNKNOWN || req_role < rsc->next_role) { pe_rsc_debug(rsc, "%s: Overwriting calculated next role %s" " with requested next role %s", rsc->id, role2text(rsc->next_role), role2text(req_role)); rsc->next_role = req_role; } else if (req_role > rsc->next_role) { pe_rsc_info(rsc, "%s: Not overwriting calculated next role %s" " with requested next role %s", rsc->id, role2text(rsc->next_role), role2text(req_role)); } } if (saved_role > rsc->role) { rsc->role = saved_role; } return rsc; } static void handle_orphaned_container_fillers(xmlNode * lrm_rsc_list, pe_working_set_t * data_set) { xmlNode *rsc_entry = NULL; for (rsc_entry = __xml_first_child_element(lrm_rsc_list); rsc_entry != NULL; rsc_entry = __xml_next_element(rsc_entry)) { resource_t *rsc; resource_t *container; const char *rsc_id; const char *container_id; if (safe_str_neq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE)) { continue; } container_id = crm_element_value(rsc_entry, XML_RSC_ATTR_CONTAINER); rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID); if (container_id == NULL || rsc_id == NULL) { continue; } container = pe_find_resource(data_set->resources, container_id); if (container == NULL) { continue; } rsc = pe_find_resource(data_set->resources, rsc_id); if (rsc == NULL || is_set(rsc->flags, pe_rsc_orphan_container_filler) == FALSE || rsc->container != NULL) { continue; } pe_rsc_trace(rsc, "Mapped container of orphaned resource %s to %s", rsc->id, container_id); rsc->container = container; container->fillers = g_list_append(container->fillers, rsc); } } gboolean unpack_lrm_resources(node_t * node, xmlNode * lrm_rsc_list, pe_working_set_t * data_set) { xmlNode *rsc_entry = NULL; gboolean found_orphaned_container_filler = FALSE; CRM_CHECK(node != NULL, return FALSE); crm_trace("Unpacking resources on %s", node->details->uname); for (rsc_entry = __xml_first_child_element(lrm_rsc_list); rsc_entry != NULL; rsc_entry = __xml_next_element(rsc_entry)) { if (crm_str_eq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE, TRUE)) { resource_t *rsc = unpack_lrm_rsc_state(node, rsc_entry, data_set); if (!rsc) { continue; } if (is_set(rsc->flags, pe_rsc_orphan_container_filler)) { found_orphaned_container_filler = TRUE; } } } /* now that all the resource state has been unpacked for this node * we have to go back and map any orphaned container fillers to their * container resource */ if (found_orphaned_container_filler) { handle_orphaned_container_fillers(lrm_rsc_list, data_set); } return TRUE; } static void set_active(resource_t * rsc) { resource_t *top = uber_parent(rsc); if (top && is_set(top->flags, pe_rsc_promotable)) { rsc->role = RSC_ROLE_SLAVE; } else { rsc->role = RSC_ROLE_STARTED; } } static void set_node_score(gpointer key, gpointer value, gpointer user_data) { node_t *node = value; int *score = user_data; node->weight = *score; } #define STATUS_PATH_MAX 1024 static xmlNode * find_lrm_op(const char *resource, const char *op, const char *node, const char *source, bool success_only, pe_working_set_t *data_set) { int offset = 0; char xpath[STATUS_PATH_MAX]; xmlNode *xml = NULL; offset += snprintf(xpath + offset, STATUS_PATH_MAX - offset, "//node_state[@uname='%s']", node); offset += snprintf(xpath + offset, STATUS_PATH_MAX - offset, "//" XML_LRM_TAG_RESOURCE "[@id='%s']", resource); /* Need to check against transition_magic too? */ if (source && safe_str_eq(op, CRMD_ACTION_MIGRATE)) { offset += snprintf(xpath + offset, STATUS_PATH_MAX - offset, "/" XML_LRM_TAG_RSC_OP "[@operation='%s' and @migrate_target='%s']", op, source); } else if (source && safe_str_eq(op, CRMD_ACTION_MIGRATED)) { offset += snprintf(xpath + offset, STATUS_PATH_MAX - offset, "/" XML_LRM_TAG_RSC_OP "[@operation='%s' and @migrate_source='%s']", op, source); } else { offset += snprintf(xpath + offset, STATUS_PATH_MAX - offset, "/" XML_LRM_TAG_RSC_OP "[@operation='%s']", op); } CRM_LOG_ASSERT(offset > 0); xml = get_xpath_object(xpath, data_set->input, LOG_DEBUG); if (xml && success_only) { int rc = PCMK_OCF_UNKNOWN_ERROR; int status = PCMK_LRM_OP_ERROR; crm_element_value_int(xml, XML_LRM_ATTR_RC, &rc); crm_element_value_int(xml, XML_LRM_ATTR_OPSTATUS, &status); if ((rc != PCMK_OCF_OK) || (status != PCMK_LRM_OP_DONE)) { return NULL; } } return xml; } static int pe__call_id(xmlNode *op_xml) { int id = 0; if (op_xml) { crm_element_value_int(op_xml, XML_LRM_ATTR_CALLID, &id); } return id; } /*! * \brief Check whether a stop happened on the same node after some event * * \param[in] rsc Resource being checked * \param[in] node Node being checked * \param[in] xml_op Event that stop is being compared to * \param[in] data_set Cluster working set * * \return TRUE if stop happened after event, FALSE otherwise * * \note This is really unnecessary, but kept as a safety mechanism. We * currently don't save more than one successful event in history, so this * only matters when processing really old CIB files that we don't * technically support anymore, or as preparation for logging an extended * history in the future. */ static bool stop_happened_after(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, pe_working_set_t *data_set) { xmlNode *stop_op = find_lrm_op(rsc->id, CRMD_ACTION_STOP, node->details->uname, NULL, TRUE, data_set); return (stop_op && (pe__call_id(stop_op) > pe__call_id(xml_op))); } static void unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, pe_working_set_t *data_set) { /* A successful migration sequence is: * migrate_to on source node * migrate_from on target node * stop on source node * * If a migrate_to is followed by a stop, the entire migration (successful * or failed) is complete, and we don't care what happened on the target. * * If no migrate_from has happened, the migration is considered to be * "partial". If the migrate_from failed, make sure the resource gets * stopped on both source and target (if up). * * If the migrate_to and migrate_from both succeeded (which also implies the * resource is no longer running on the source), but there is no stop, the * migration is considered to be "dangling". Schedule a stop on the source * in this case. */ int from_rc = 0; int from_status = 0; pe_node_t *target_node = NULL; pe_node_t *source_node = NULL; xmlNode *migrate_from = NULL; const char *source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE); const char *target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET); // Sanity check CRM_CHECK(source && target && !strcmp(source, node->details->uname), return); if (stop_happened_after(rsc, node, xml_op, data_set)) { return; } // Clones are not allowed to migrate, so role can't be master rsc->role = RSC_ROLE_STARTED; target_node = pe_find_node(data_set->nodes, target); source_node = pe_find_node(data_set->nodes, source); // Check whether there was a migrate_from action on the target migrate_from = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, target, source, FALSE, data_set); if (migrate_from) { crm_element_value_int(migrate_from, XML_LRM_ATTR_RC, &from_rc); crm_element_value_int(migrate_from, XML_LRM_ATTR_OPSTATUS, &from_status); pe_rsc_trace(rsc, "%s op on %s exited with status=%d, rc=%d", ID(migrate_from), target, from_status, from_rc); } if (migrate_from && from_rc == PCMK_OCF_OK && from_status == PCMK_LRM_OP_DONE) { /* The migrate_to and migrate_from both succeeded, so mark the migration * as "dangling". This will be used to schedule a stop action on the * source without affecting the target. */ pe_rsc_trace(rsc, "Detected dangling migration op: %s on %s", ID(xml_op), source); rsc->role = RSC_ROLE_STOPPED; rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, node); } else if (migrate_from && (from_status != PCMK_LRM_OP_PENDING)) { // Failed if (target_node && target_node->details->online) { pe_rsc_trace(rsc, "Marking active on %s %p %d", target, target_node, target_node->details->online); native_add_running(rsc, target_node, data_set); } } else { // Pending, or complete but erased if (target_node && target_node->details->online) { pe_rsc_trace(rsc, "Marking active on %s %p %d", target, target_node, target_node->details->online); native_add_running(rsc, target_node, data_set); if (source_node && source_node->details->online) { /* This is a partial migration: the migrate_to completed * successfully on the source, but the migrate_from has not * completed. Remember the source and target; if the newly * chosen target remains the same when we schedule actions * later, we may continue with the migration. */ rsc->partial_migration_target = target_node; rsc->partial_migration_source = source_node; } } else { /* Consider it failed here - forces a restart, prevents migration */ set_bit(rsc->flags, pe_rsc_failed); clear_bit(rsc->flags, pe_rsc_allow_migrate); } } } static void unpack_migrate_to_failure(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, pe_working_set_t *data_set) { int target_stop_id = 0; int target_migrate_from_id = 0; xmlNode *target_stop = NULL; xmlNode *target_migrate_from = NULL; const char *source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE); const char *target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET); // Sanity check CRM_CHECK(source && target && !strcmp(source, node->details->uname), return); /* If a migration failed, we have to assume the resource is active. Clones * are not allowed to migrate, so role can't be master. */ rsc->role = RSC_ROLE_STARTED; // Check for stop on the target target_stop = find_lrm_op(rsc->id, CRMD_ACTION_STOP, target, NULL, TRUE, data_set); target_stop_id = pe__call_id(target_stop); // Check for migrate_from on the target target_migrate_from = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, target, source, TRUE, data_set); target_migrate_from_id = pe__call_id(target_migrate_from); if ((target_stop == NULL) || (target_stop_id < target_migrate_from_id)) { /* There was no stop on the source, or a stop that happened before a * migrate_from, so assume the resource is still active on the target * (if it is up). */ node_t *target_node = pe_find_node(data_set->nodes, target); pe_rsc_trace(rsc, "stop (%d) + migrate_from (%d)", target_stop_id, target_migrate_from_id); if (target_node && target_node->details->online) { native_add_running(rsc, target_node, data_set); } } else if (target_migrate_from == NULL) { /* We know there was a stop on the target, but there may not have been a * migrate_from (the stop could have happened before migrate_from was * scheduled or attempted). * * That means this could be a "dangling" migration. But first, check * whether there is a newer migrate_from or start on the source node -- * it's possible the failed migration was followed by a successful * full restart or migration in the reverse direction, in which case we * don't want to force it to stop. */ xmlNode *source_migrate_from = NULL; xmlNode *source_start = NULL; int source_migrate_to_id = pe__call_id(xml_op); source_migrate_from = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, source, NULL, TRUE, data_set); if (pe__call_id(source_migrate_from) > source_migrate_to_id) { return; } source_start = find_lrm_op(rsc->id, CRMD_ACTION_START, source, NULL, TRUE, data_set); if (pe__call_id(source_start) > source_migrate_to_id) { return; } // Mark node as having dangling migration so we can force a stop later rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, node); } } static void unpack_migrate_from_failure(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, pe_working_set_t *data_set) { xmlNode *source_stop = NULL; xmlNode *source_migrate_to = NULL; const char *source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE); const char *target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET); // Sanity check CRM_CHECK(source && target && !strcmp(target, node->details->uname), return); /* If a migration failed, we have to assume the resource is active. Clones * are not allowed to migrate, so role can't be master. */ rsc->role = RSC_ROLE_STARTED; // Check for a stop on the source source_stop = find_lrm_op(rsc->id, CRMD_ACTION_STOP, source, NULL, TRUE, data_set); // Check for a migrate_to on the source source_migrate_to = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATE, source, target, TRUE, data_set); if ((source_stop == NULL) || (pe__call_id(source_stop) < pe__call_id(source_migrate_to))) { /* There was no stop on the source, or a stop that happened before * migrate_to, so assume the resource is still active on the source (if * it is up). */ pe_node_t *source_node = pe_find_node(data_set->nodes, source); if (source_node && source_node->details->online) { native_add_running(rsc, source_node, data_set); } } } static void record_failed_op(xmlNode *op, const pe_node_t *node, const pe_resource_t *rsc, pe_working_set_t *data_set) { xmlNode *xIter = NULL; const char *op_key = crm_element_value(op, XML_LRM_ATTR_TASK_KEY); if (node->details->online == FALSE) { return; } for (xIter = data_set->failed->children; xIter; xIter = xIter->next) { const char *key = crm_element_value(xIter, XML_LRM_ATTR_TASK_KEY); const char *uname = crm_element_value(xIter, XML_ATTR_UNAME); if(safe_str_eq(op_key, key) && safe_str_eq(uname, node->details->uname)) { crm_trace("Skipping duplicate entry %s on %s", op_key, node->details->uname); return; } } crm_trace("Adding entry %s on %s", op_key, node->details->uname); crm_xml_add(op, XML_ATTR_UNAME, node->details->uname); crm_xml_add(op, XML_LRM_ATTR_RSCID, rsc->id); add_node_copy(data_set->failed, op); } static const char *get_op_key(xmlNode *xml_op) { const char *key = crm_element_value(xml_op, XML_LRM_ATTR_TASK_KEY); if(key == NULL) { key = ID(xml_op); } return key; } static const char * last_change_str(xmlNode *xml_op) { time_t when; const char *when_s = NULL; if (crm_element_value_epoch(xml_op, XML_RSC_OP_LAST_CHANGE, &when) == pcmk_ok) { when_s = crm_now_string(&when); if (when_s) { // Skip day of week to make message shorter when_s = strchr(when_s, ' '); if (when_s) { ++when_s; } } } return ((when_s && *when_s)? when_s : "unknown time"); } static void unpack_rsc_op_failure(resource_t * rsc, node_t * node, int rc, xmlNode * xml_op, xmlNode ** last_failure, enum action_fail_response * on_fail, pe_working_set_t * data_set) { guint interval_ms = 0; bool is_probe = false; action_t *action = NULL; const char *key = get_op_key(xml_op); const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); const char *exit_reason = crm_element_value(xml_op, XML_LRM_ATTR_EXIT_REASON); CRM_ASSERT(rsc); CRM_CHECK(task != NULL, return); *last_failure = xml_op; crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms); if ((interval_ms == 0) && !strcmp(task, CRMD_ACTION_STATUS)) { is_probe = true; } if (exit_reason == NULL) { exit_reason = ""; } if (is_not_set(data_set->flags, pe_flag_symmetric_cluster) && (rc == PCMK_OCF_NOT_INSTALLED)) { crm_trace("Unexpected result (%s%s%s) was recorded for " "%s of %s on %s at %s " CRM_XS " rc=%d id=%s", services_ocf_exitcode_str(rc), (*exit_reason? ": " : ""), exit_reason, (is_probe? "probe" : task), rsc->id, node->details->uname, last_change_str(xml_op), rc, ID(xml_op)); } else { crm_warn("Unexpected result (%s%s%s) was recorded for " "%s of %s on %s at %s " CRM_XS " rc=%d id=%s", services_ocf_exitcode_str(rc), (*exit_reason? ": " : ""), exit_reason, (is_probe? "probe" : task), rsc->id, node->details->uname, last_change_str(xml_op), rc, ID(xml_op)); if (is_probe && (rc != PCMK_OCF_OK) && (rc != PCMK_OCF_NOT_RUNNING) && (rc != PCMK_OCF_RUNNING_MASTER)) { /* A failed (not just unexpected) probe result could mean the user * didn't know resources will be probed even where they can't run. */ crm_notice("If it is not possible for %s to run on %s, see " "the resource-discovery option for location constraints", rsc->id, node->details->uname); } record_failed_op(xml_op, node, rsc, data_set); } action = custom_action(rsc, strdup(key), task, NULL, TRUE, FALSE, data_set); if ((action->on_fail <= action_fail_fence && *on_fail < action->on_fail) || (action->on_fail == action_fail_reset_remote && *on_fail <= action_fail_recover) || (action->on_fail == action_fail_restart_container && *on_fail <= action_fail_recover) || (*on_fail == action_fail_restart_container && action->on_fail >= action_fail_migrate)) { pe_rsc_trace(rsc, "on-fail %s -> %s for %s (%s)", fail2text(*on_fail), fail2text(action->on_fail), action->uuid, key); *on_fail = action->on_fail; } if (!strcmp(task, CRMD_ACTION_STOP)) { resource_location(rsc, node, -INFINITY, "__stop_fail__", data_set); } else if (!strcmp(task, CRMD_ACTION_MIGRATE)) { unpack_migrate_to_failure(rsc, node, xml_op, data_set); } else if (!strcmp(task, CRMD_ACTION_MIGRATED)) { unpack_migrate_from_failure(rsc, node, xml_op, data_set); } else if (!strcmp(task, CRMD_ACTION_PROMOTE)) { rsc->role = RSC_ROLE_MASTER; } else if (!strcmp(task, CRMD_ACTION_DEMOTE)) { if (action->on_fail == action_fail_block) { rsc->role = RSC_ROLE_MASTER; rsc->next_role = RSC_ROLE_STOPPED; } else if(rc == PCMK_OCF_NOT_RUNNING) { rsc->role = RSC_ROLE_STOPPED; } else { - /* - * Staying in master role would put the PE/TE into a loop. Setting - * slave role is not dangerous because the resource will be stopped - * as part of recovery, and any master promotion will be ordered - * after that stop. + /* Staying in master role would put the scheduler and controller + * into a loop. Setting slave role is not dangerous because the + * resource will be stopped as part of recovery, and any master + * promotion will be ordered after that stop. */ rsc->role = RSC_ROLE_SLAVE; } } if(is_probe && rc == PCMK_OCF_NOT_INSTALLED) { /* leave stopped */ pe_rsc_trace(rsc, "Leaving %s stopped", rsc->id); rsc->role = RSC_ROLE_STOPPED; } else if (rsc->role < RSC_ROLE_STARTED) { pe_rsc_trace(rsc, "Setting %s active", rsc->id); set_active(rsc); } pe_rsc_trace(rsc, "Resource %s: role=%s, unclean=%s, on_fail=%s, fail_role=%s", rsc->id, role2text(rsc->role), node->details->unclean ? "true" : "false", fail2text(action->on_fail), role2text(action->fail_role)); if (action->fail_role != RSC_ROLE_STARTED && rsc->next_role < action->fail_role) { rsc->next_role = action->fail_role; } if (action->fail_role == RSC_ROLE_STOPPED) { int score = -INFINITY; resource_t *fail_rsc = rsc; if (fail_rsc->parent) { resource_t *parent = uber_parent(fail_rsc); if (pe_rsc_is_clone(parent) && is_not_set(parent->flags, pe_rsc_unique)) { /* For clone resources, if a child fails on an operation * with on-fail = stop, all the resources fail. Do this by preventing * the parent from coming up again. */ fail_rsc = parent; } } crm_notice("%s will not be started under current conditions", fail_rsc->id); /* make sure it doesn't come up again */ if (fail_rsc->allowed_nodes != NULL) { g_hash_table_destroy(fail_rsc->allowed_nodes); } fail_rsc->allowed_nodes = node_hash_from_list(data_set->nodes); g_hash_table_foreach(fail_rsc->allowed_nodes, set_node_score, &score); } pe_free_action(action); } /*! * \internal * \brief Remap operation status based on action result * * Given an action result, determine an appropriate operation status for the * purposes of responding to the action (the status provided by the executor is * not directly usable since the executor does not know what was expected). * * \param[in,out] rsc Resource that operation history entry is for * \param[in] rc Actual return code of operation * \param[in] target_rc Expected return code of operation * \param[in] node Node where operation was executed * \param[in] xml_op Operation history entry XML from CIB status * \param[in,out] on_fail What should be done about the result * \param[in] data_set Current cluster working set * * \return Operation status based on return code and action info * \note This may update the resource's current and next role. */ static int determine_op_status( resource_t *rsc, int rc, int target_rc, node_t * node, xmlNode * xml_op, enum action_fail_response * on_fail, pe_working_set_t * data_set) { guint interval_ms = 0; bool is_probe = false; int result = PCMK_LRM_OP_DONE; const char *key = get_op_key(xml_op); const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); const char *exit_reason = crm_element_value(xml_op, XML_LRM_ATTR_EXIT_REASON); CRM_ASSERT(rsc); CRM_CHECK(task != NULL, return PCMK_LRM_OP_ERROR); if (exit_reason == NULL) { exit_reason = ""; } crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms); if ((interval_ms == 0) && !strcmp(task, CRMD_ACTION_STATUS)) { is_probe = true; task = "probe"; } if (target_rc < 0) { /* Pre-1.0 Pacemaker versions, and Pacemaker 1.1.6 or earlier with * Heartbeat 2.0.7 or earlier as the cluster layer, did not include the * target_rc in the transition key, which (along with the similar case * of a corrupted transition key in the CIB) will be reported to this * function as -1. Pacemaker 2.0+ does not support rolling upgrades from * those versions or processing of saved CIB files from those versions, * so we do not need to care much about this case. */ result = PCMK_LRM_OP_ERROR; crm_warn("Expected result not found for %s on %s (corrupt or obsolete CIB?)", key, node->details->uname); } else if (target_rc != rc) { result = PCMK_LRM_OP_ERROR; pe_rsc_debug(rsc, "%s on %s: expected %d (%s), got %d (%s%s%s)", key, node->details->uname, target_rc, services_ocf_exitcode_str(target_rc), rc, services_ocf_exitcode_str(rc), (*exit_reason? ": " : ""), exit_reason); } switch (rc) { case PCMK_OCF_OK: if (is_probe && (target_rc == PCMK_OCF_NOT_RUNNING)) { result = PCMK_LRM_OP_DONE; pe_rsc_info(rsc, "Probe found %s active on %s at %s", rsc->id, node->details->uname, last_change_str(xml_op)); } break; case PCMK_OCF_NOT_RUNNING: if (is_probe || target_rc == rc || is_not_set(rsc->flags, pe_rsc_managed)) { result = PCMK_LRM_OP_DONE; rsc->role = RSC_ROLE_STOPPED; /* clear any previous failure actions */ *on_fail = action_fail_ignore; rsc->next_role = RSC_ROLE_UNKNOWN; } break; case PCMK_OCF_RUNNING_MASTER: if (is_probe && (rc != target_rc)) { result = PCMK_LRM_OP_DONE; pe_rsc_info(rsc, "Probe found %s active and promoted on %s at %s", rsc->id, node->details->uname, last_change_str(xml_op)); } rsc->role = RSC_ROLE_MASTER; break; case PCMK_OCF_DEGRADED_MASTER: case PCMK_OCF_FAILED_MASTER: rsc->role = RSC_ROLE_MASTER; result = PCMK_LRM_OP_ERROR; break; case PCMK_OCF_NOT_CONFIGURED: result = PCMK_LRM_OP_ERROR_FATAL; break; case PCMK_OCF_UNIMPLEMENT_FEATURE: if (interval_ms > 0) { result = PCMK_LRM_OP_NOTSUPPORTED; break; } // fall through case PCMK_OCF_NOT_INSTALLED: case PCMK_OCF_INVALID_PARAM: case PCMK_OCF_INSUFFICIENT_PRIV: if (!pe_can_fence(data_set, node) && !strcmp(task, CRMD_ACTION_STOP)) { /* If a stop fails and we can't fence, there's nothing else we can do */ pe_proc_err("No further recovery can be attempted for %s " "because %s on %s failed (%s%s%s) at %s " CRM_XS " rc=%d id=%s", rsc->id, task, node->details->uname, services_ocf_exitcode_str(rc), (*exit_reason? ": " : ""), exit_reason, last_change_str(xml_op), rc, ID(xml_op)); clear_bit(rsc->flags, pe_rsc_managed); set_bit(rsc->flags, pe_rsc_block); } result = PCMK_LRM_OP_ERROR_HARD; break; default: if (result == PCMK_LRM_OP_DONE) { crm_info("Treating unknown exit status %d from %s of %s " "on %s at %s as failure", rc, task, rsc->id, node->details->uname, last_change_str(xml_op)); result = PCMK_LRM_OP_ERROR; } break; } return result; } // return TRUE if start or monitor last failure but parameters changed static bool should_clear_for_param_change(xmlNode *xml_op, const char *task, pe_resource_t *rsc, pe_node_t *node, pe_working_set_t *data_set) { if (!strcmp(task, "start") || !strcmp(task, "monitor")) { if (pe__bundle_needs_remote_name(rsc)) { /* We haven't allocated resources yet, so we can't reliably * substitute addr parameters for the REMOTE_CONTAINER_HACK. * When that's needed, defer the check until later. */ pe__add_param_check(xml_op, rsc, node, pe_check_last_failure, data_set); } else { op_digest_cache_t *digest_data = NULL; digest_data = rsc_action_digest_cmp(rsc, xml_op, node, data_set); switch (digest_data->rc) { case RSC_DIGEST_UNKNOWN: crm_trace("Resource %s history entry %s on %s" " has no digest to compare", rsc->id, get_op_key(xml_op), node->details->id); break; case RSC_DIGEST_MATCH: break; default: return TRUE; } } } return FALSE; } // Order action after fencing of remote node, given connection rsc static void order_after_remote_fencing(pe_action_t *action, pe_resource_t *remote_conn, pe_working_set_t *data_set) { pe_node_t *remote_node = pe_find_node(data_set->nodes, remote_conn->id); if (remote_node) { pe_action_t *fence = pe_fence_op(remote_node, NULL, TRUE, NULL, data_set); order_actions(fence, action, pe_order_implies_then); } } static bool should_ignore_failure_timeout(pe_resource_t *rsc, xmlNode *xml_op, const char *task, guint interval_ms, bool is_last_failure, pe_working_set_t *data_set) { /* Clearing failures of recurring monitors has special concerns. The * executor reports only changes in the monitor result, so if the * monitor is still active and still getting the same failure result, * that will go undetected after the failure is cleared. * * Also, the operation history will have the time when the recurring * monitor result changed to the given code, not the time when the * result last happened. * * @TODO We probably should clear such failures only when the failure * timeout has passed since the last occurrence of the failed result. * However we don't record that information. We could maybe approximate * that by clearing only if there is a more recent successful monitor or * stop result, but we don't even have that information at this point * since we are still unpacking the resource's operation history. * * This is especially important for remote connection resources with a * reconnect interval, so in that case, we skip clearing failures * if the remote node hasn't been fenced. */ if (rsc->remote_reconnect_ms && is_set(data_set->flags, pe_flag_stonith_enabled) && (interval_ms != 0) && safe_str_eq(task, CRMD_ACTION_STATUS)) { pe_node_t *remote_node = pe_find_node(data_set->nodes, rsc->id); if (remote_node && !remote_node->details->remote_was_fenced) { if (is_last_failure) { crm_info("Waiting to clear monitor failure for remote node %s" " until fencing has occurred", rsc->id); } return TRUE; } } return FALSE; } /*! * \internal * \brief Check operation age and schedule failure clearing when appropriate * * This function has two distinct purposes. The first is to check whether an * operation history entry is expired (i.e. the resource has a failure timeout, * the entry is older than the timeout, and the resource either has no fail * count or its fail count is entirely older than the timeout). The second is to * schedule fail count clearing when appropriate (i.e. the operation is expired * and either the resource has an expired fail count or the operation is a * last_failure for a remote connection resource with a reconnect interval, * or the operation is a last_failure for a start or monitor operation and the * resource's parameters have changed since the operation). * * \param[in] rsc Resource that operation happened to * \param[in] node Node that operation happened on * \param[in] rc Actual result of operation * \param[in] xml_op Operation history entry XML * \param[in] data_set Current working set * * \return TRUE if operation history entry is expired, FALSE otherwise */ static bool check_operation_expiry(pe_resource_t *rsc, pe_node_t *node, int rc, xmlNode *xml_op, pe_working_set_t *data_set) { bool expired = FALSE; bool is_last_failure = crm_ends_with(ID(xml_op), "_last_failure_0"); time_t last_run = 0; guint interval_ms = 0; int unexpired_fail_count = 0; const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); const char *clear_reason = NULL; crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms); if ((rsc->failure_timeout > 0) && (crm_element_value_epoch(xml_op, XML_RSC_OP_LAST_CHANGE, &last_run) == 0)) { // Resource has a failure-timeout, and history entry has a timestamp time_t now = get_effective_time(data_set); time_t last_failure = 0; // Is this particular operation history older than the failure timeout? if ((now >= (last_run + rsc->failure_timeout)) && !should_ignore_failure_timeout(rsc, xml_op, task, interval_ms, is_last_failure, data_set)) { expired = TRUE; } // Does the resource as a whole have an unexpired fail count? unexpired_fail_count = pe_get_failcount(node, rsc, &last_failure, pe_fc_effective, xml_op, data_set); // Update scheduler recheck time according to *last* failure crm_trace("%s@%lld is %sexpired @%lld with unexpired_failures=%d timeout=%ds" " last-failure@%lld", ID(xml_op), (long long) last_run, (expired? "" : "not "), (long long) now, unexpired_fail_count, rsc->failure_timeout, (long long) last_failure); last_failure += rsc->failure_timeout + 1; if (unexpired_fail_count && (now < last_failure)) { pe__update_recheck_time(last_failure, data_set); } } if (expired) { if (pe_get_failcount(node, rsc, NULL, pe_fc_default, xml_op, data_set)) { // There is a fail count ignoring timeout if (unexpired_fail_count == 0) { // There is no fail count considering timeout clear_reason = "it expired"; } else { /* This operation is old, but there is an unexpired fail count. * In a properly functioning cluster, this should only be * possible if this operation is not a failure (otherwise the * fail count should be expired too), so this is really just a * failsafe. */ expired = FALSE; } } else if (is_last_failure && rsc->remote_reconnect_ms) { /* Clear any expired last failure when reconnect interval is set, * even if there is no fail count. */ clear_reason = "reconnect interval is set"; } } if (!expired && is_last_failure && should_clear_for_param_change(xml_op, task, rsc, node, data_set)) { clear_reason = "resource parameters have changed"; } if (clear_reason != NULL) { // Schedule clearing of the fail count pe_action_t *clear_op = pe__clear_failcount(rsc, node, clear_reason, data_set); if (is_set(data_set->flags, pe_flag_stonith_enabled) && rsc->remote_reconnect_ms) { /* If we're clearing a remote connection due to a reconnect * interval, we want to wait until any scheduled fencing * completes. * * We could limit this to remote_node->details->unclean, but at * this point, that's always true (it won't be reliable until * after unpack_node_loop() is done). */ crm_info("Clearing %s failure will wait until any scheduled " "fencing of %s completes", task, rsc->id); order_after_remote_fencing(clear_op, rsc, data_set); } } if (expired && (interval_ms == 0) && safe_str_eq(task, CRMD_ACTION_STATUS)) { switch(rc) { case PCMK_OCF_OK: case PCMK_OCF_NOT_RUNNING: case PCMK_OCF_RUNNING_MASTER: case PCMK_OCF_DEGRADED: case PCMK_OCF_DEGRADED_MASTER: // Don't expire probes that return these values expired = FALSE; break; } } return expired; } int pe__target_rc_from_xml(xmlNode *xml_op) { int target_rc = 0; const char *key = crm_element_value(xml_op, XML_ATTR_TRANSITION_KEY); if (key == NULL) { return -1; } decode_transition_key(key, NULL, NULL, NULL, &target_rc); return target_rc; } static enum action_fail_response get_action_on_fail(resource_t *rsc, const char *key, const char *task, pe_working_set_t * data_set) { int result = action_fail_recover; action_t *action = custom_action(rsc, strdup(key), task, NULL, TRUE, FALSE, data_set); result = action->on_fail; pe_free_action(action); return result; } static void update_resource_state(resource_t * rsc, node_t * node, xmlNode * xml_op, const char * task, int rc, xmlNode * last_failure, enum action_fail_response * on_fail, pe_working_set_t * data_set) { gboolean clear_past_failure = FALSE; CRM_ASSERT(rsc); CRM_ASSERT(xml_op); if (rc == PCMK_OCF_NOT_RUNNING) { clear_past_failure = TRUE; } else if (rc == PCMK_OCF_NOT_INSTALLED) { rsc->role = RSC_ROLE_STOPPED; } else if (safe_str_eq(task, CRMD_ACTION_STATUS)) { if (last_failure) { const char *op_key = get_op_key(xml_op); const char *last_failure_key = get_op_key(last_failure); if (safe_str_eq(op_key, last_failure_key)) { clear_past_failure = TRUE; } } if (rsc->role < RSC_ROLE_STARTED) { set_active(rsc); } } else if (safe_str_eq(task, CRMD_ACTION_START)) { rsc->role = RSC_ROLE_STARTED; clear_past_failure = TRUE; } else if (safe_str_eq(task, CRMD_ACTION_STOP)) { rsc->role = RSC_ROLE_STOPPED; clear_past_failure = TRUE; } else if (safe_str_eq(task, CRMD_ACTION_PROMOTE)) { rsc->role = RSC_ROLE_MASTER; clear_past_failure = TRUE; } else if (safe_str_eq(task, CRMD_ACTION_DEMOTE)) { /* Demote from Master does not clear an error */ rsc->role = RSC_ROLE_SLAVE; } else if (safe_str_eq(task, CRMD_ACTION_MIGRATED)) { rsc->role = RSC_ROLE_STARTED; clear_past_failure = TRUE; } else if (safe_str_eq(task, CRMD_ACTION_MIGRATE)) { unpack_migrate_to_success(rsc, node, xml_op, data_set); } else if (rsc->role < RSC_ROLE_STARTED) { pe_rsc_trace(rsc, "%s active on %s", rsc->id, node->details->uname); set_active(rsc); } /* clear any previous failure actions */ if (clear_past_failure) { switch (*on_fail) { case action_fail_stop: case action_fail_fence: case action_fail_migrate: case action_fail_standby: pe_rsc_trace(rsc, "%s.%s is not cleared by a completed stop", rsc->id, fail2text(*on_fail)); break; case action_fail_block: case action_fail_ignore: case action_fail_recover: case action_fail_restart_container: *on_fail = action_fail_ignore; rsc->next_role = RSC_ROLE_UNKNOWN; break; case action_fail_reset_remote: if (rsc->remote_reconnect_ms == 0) { /* With no reconnect interval, the connection is allowed to * start again after the remote node is fenced and * completely stopped. (With a reconnect interval, we wait * for the failure to be cleared entirely before attempting * to reconnect.) */ *on_fail = action_fail_ignore; rsc->next_role = RSC_ROLE_UNKNOWN; } break; } } } /*! * \internal * \brief Remap informational monitor results to usual values * * Certain OCF result codes are for providing extended information to the * user about services that aren't yet failed but not entirely healthy either. * These must be treated as the "normal" result by pacemaker. * * \param[in] rc Actual result of a monitor action * \param[in] xml_op Operation history XML * \param[in] node Node that operation happened on * \param[in] rsc Resource that operation happened to * \param[in] data_set Cluster working set * * \return Result code that pacemaker should use * * \note If the result is remapped, and the node is not shutting down or failed, * the operation will be recorded in the data set's list of failed * operations, to highlight it for the user. */ static int remap_monitor_rc(int rc, xmlNode *xml_op, const pe_node_t *node, const pe_resource_t *rsc, pe_working_set_t *data_set) { int remapped_rc = rc; switch (rc) { case PCMK_OCF_DEGRADED: remapped_rc = PCMK_OCF_OK; break; case PCMK_OCF_DEGRADED_MASTER: remapped_rc = PCMK_OCF_RUNNING_MASTER; break; default: break; } if (rc != remapped_rc) { crm_trace("Remapping monitor result %d to %d", rc, remapped_rc); if (!node->details->shutdown || node->details->online) { record_failed_op(xml_op, node, rsc, data_set); } } return remapped_rc; } static void unpack_rsc_op(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, xmlNode **last_failure, enum action_fail_response *on_fail, pe_working_set_t *data_set) { int rc = 0; int task_id = 0; int target_rc = 0; int status = PCMK_LRM_OP_UNKNOWN; guint interval_ms = 0; const char *task = NULL; const char *task_key = NULL; const char *exit_reason = NULL; bool expired = FALSE; resource_t *parent = rsc; enum action_fail_response failure_strategy = action_fail_recover; CRM_CHECK(rsc && node && xml_op, return); target_rc = pe__target_rc_from_xml(xml_op); task_key = get_op_key(xml_op); task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); exit_reason = crm_element_value(xml_op, XML_LRM_ATTR_EXIT_REASON); if (exit_reason == NULL) { exit_reason = ""; } crm_element_value_int(xml_op, XML_LRM_ATTR_RC, &rc); crm_element_value_int(xml_op, XML_LRM_ATTR_CALLID, &task_id); crm_element_value_int(xml_op, XML_LRM_ATTR_OPSTATUS, &status); crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms); CRM_CHECK(task != NULL, return); CRM_CHECK(status <= PCMK_LRM_OP_INVALID, return); CRM_CHECK(status >= PCMK_LRM_OP_PENDING, return); if (!strcmp(task, CRMD_ACTION_NOTIFY) || !strcmp(task, CRMD_ACTION_METADATA)) { /* safe to ignore these */ return; } if (is_not_set(rsc->flags, pe_rsc_unique)) { parent = uber_parent(rsc); } pe_rsc_trace(rsc, "Unpacking task %s/%s (call_id=%d, status=%d, rc=%d) on %s (role=%s)", task_key, task, task_id, status, rc, node->details->uname, role2text(rsc->role)); if (node->details->unclean) { pe_rsc_trace(rsc, "Node %s (where %s is running) is unclean." " Further action depends on the value of the stop's on-fail attribute", node->details->uname, rsc->id); } /* It should be possible to call remap_monitor_rc() first then call * check_operation_expiry() only if rc != target_rc, because there should * never be a fail count without at least one unexpected result in the * resource history. That would be more efficient by avoiding having to call * check_operation_expiry() for expected results. * * However, we do have such configurations in the scheduler regression * tests, even if it shouldn't be possible with the current code. It's * probably a good idea anyway, but that would require updating the test * inputs to something currently possible. */ if ((status != PCMK_LRM_OP_NOT_INSTALLED) && check_operation_expiry(rsc, node, rc, xml_op, data_set)) { expired = TRUE; } if (!strcmp(task, CRMD_ACTION_STATUS)) { rc = remap_monitor_rc(rc, xml_op, node, rsc, data_set); } if (expired && (rc != target_rc)) { const char *magic = crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC); if (interval_ms == 0) { crm_notice("Ignoring expired %s failure on %s " CRM_XS " actual=%d expected=%d magic=%s", task_key, node->details->uname, rc, target_rc, magic); goto done; } else if(node->details->online && node->details->unclean == FALSE) { /* Reschedule the recurring monitor. CancelXmlOp() won't work at * this stage, so as a hacky workaround, forcibly change the restart * digest so check_action_definition() does what we want later. * * @TODO We should skip this if there is a newer successful monitor. * Also, this causes rescheduling only if the history entry * has an op-digest (which the expire-non-blocked-failure * scheduler regression test doesn't, but that may not be a * realistic scenario in production). */ crm_notice("Rescheduling %s after failure expired on %s " CRM_XS " actual=%d expected=%d magic=%s", task_key, node->details->uname, rc, target_rc, magic); crm_xml_add(xml_op, XML_LRM_ATTR_RESTART_DIGEST, "calculated-failure-timeout"); goto done; } } /* If the executor reported an operation status of anything but done or * error, consider that final. But for done or error, we know better whether * it should be treated as a failure or not, because we know the expected * result. */ if(status == PCMK_LRM_OP_DONE || status == PCMK_LRM_OP_ERROR) { status = determine_op_status(rsc, rc, target_rc, node, xml_op, on_fail, data_set); pe_rsc_trace(rsc, "Remapped %s status to %d", task_key, status); } switch (status) { case PCMK_LRM_OP_CANCELLED: // Should never happen pe_err("Resource history contains cancellation '%s' " "(%s of %s on %s at %s)", ID(xml_op), task, rsc->id, node->details->uname, last_change_str(xml_op)); break; case PCMK_LRM_OP_PENDING: if (!strcmp(task, CRMD_ACTION_START)) { set_bit(rsc->flags, pe_rsc_start_pending); set_active(rsc); } else if (!strcmp(task, CRMD_ACTION_PROMOTE)) { rsc->role = RSC_ROLE_MASTER; } else if (!strcmp(task, CRMD_ACTION_MIGRATE) && node->details->unclean) { /* If a pending migrate_to action is out on a unclean node, * we have to force the stop action on the target. */ const char *migrate_target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET); node_t *target = pe_find_node(data_set->nodes, migrate_target); if (target) { stop_action(rsc, target, FALSE); } } if (rsc->pending_task == NULL) { if ((interval_ms != 0) || strcmp(task, CRMD_ACTION_STATUS)) { rsc->pending_task = strdup(task); rsc->pending_node = node; } else { /* Pending probes are not printed, even if pending * operations are requested. If someone ever requests that * behavior, enable the below and the corresponding part of * native.c:native_pending_task(). */ #if 0 rsc->pending_task = strdup("probe"); rsc->pending_node = node; #endif } } break; case PCMK_LRM_OP_DONE: pe_rsc_trace(rsc, "%s of %s on %s completed at %s " CRM_XS " id=%s", task, rsc->id, node->details->uname, last_change_str(xml_op), ID(xml_op)); update_resource_state(rsc, node, xml_op, task, rc, *last_failure, on_fail, data_set); break; case PCMK_LRM_OP_NOT_INSTALLED: failure_strategy = get_action_on_fail(rsc, task_key, task, data_set); if (failure_strategy == action_fail_ignore) { crm_warn("Cannot ignore failed %s of %s on %s: " "Resource agent doesn't exist " CRM_XS " status=%d rc=%d id=%s", task, rsc->id, node->details->uname, status, rc, ID(xml_op)); /* Also for printing it as "FAILED" by marking it as pe_rsc_failed later */ *on_fail = action_fail_migrate; } resource_location(parent, node, -INFINITY, "hard-error", data_set); unpack_rsc_op_failure(rsc, node, rc, xml_op, last_failure, on_fail, data_set); break; case PCMK_LRM_OP_NOT_CONNECTED: if (pe__is_guest_or_remote_node(node) && is_set(node->details->remote_rsc->flags, pe_rsc_managed)) { /* We should never get into a situation where a managed remote * connection resource is considered OK but a resource action * behind the connection gets a "not connected" status. But as a * fail-safe in case a bug or unusual circumstances do lead to * that, ensure the remote connection is considered failed. */ set_bit(node->details->remote_rsc->flags, pe_rsc_failed); } // fall through case PCMK_LRM_OP_ERROR: case PCMK_LRM_OP_ERROR_HARD: case PCMK_LRM_OP_ERROR_FATAL: case PCMK_LRM_OP_TIMEOUT: case PCMK_LRM_OP_NOTSUPPORTED: case PCMK_LRM_OP_INVALID: failure_strategy = get_action_on_fail(rsc, task_key, task, data_set); if ((failure_strategy == action_fail_ignore) || (failure_strategy == action_fail_restart_container && !strcmp(task, CRMD_ACTION_STOP))) { crm_warn("Pretending failed %s (%s%s%s) of %s on %s at %s " "succeeded " CRM_XS " rc=%d id=%s", task, services_ocf_exitcode_str(rc), (*exit_reason? ": " : ""), exit_reason, rsc->id, node->details->uname, last_change_str(xml_op), rc, ID(xml_op)); update_resource_state(rsc, node, xml_op, task, target_rc, *last_failure, on_fail, data_set); crm_xml_add(xml_op, XML_ATTR_UNAME, node->details->uname); set_bit(rsc->flags, pe_rsc_failure_ignored); record_failed_op(xml_op, node, rsc, data_set); if (failure_strategy == action_fail_restart_container && *on_fail <= action_fail_recover) { *on_fail = failure_strategy; } } else { unpack_rsc_op_failure(rsc, node, rc, xml_op, last_failure, on_fail, data_set); if(status == PCMK_LRM_OP_ERROR_HARD) { do_crm_log(rc != PCMK_OCF_NOT_INSTALLED?LOG_ERR:LOG_NOTICE, "Preventing %s from restarting on %s because " "of hard failure (%s%s%s)" CRM_XS " rc=%d id=%s", parent->id, node->details->uname, services_ocf_exitcode_str(rc), (*exit_reason? ": " : ""), exit_reason, rc, ID(xml_op)); resource_location(parent, node, -INFINITY, "hard-error", data_set); } else if(status == PCMK_LRM_OP_ERROR_FATAL) { crm_err("Preventing %s from restarting anywhere because " "of fatal failure (%s%s%s) " CRM_XS " rc=%d id=%s", parent->id, services_ocf_exitcode_str(rc), (*exit_reason? ": " : ""), exit_reason, rc, ID(xml_op)); resource_location(parent, NULL, -INFINITY, "fatal-error", data_set); } } break; } done: pe_rsc_trace(rsc, "Resource %s after %s: role=%s, next=%s", rsc->id, task, role2text(rsc->role), role2text(rsc->next_role)); } static void add_node_attrs(xmlNode *xml_obj, pe_node_t *node, bool overwrite, pe_working_set_t *data_set) { const char *cluster_name = NULL; g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_UNAME), strdup(node->details->uname)); g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_ID), strdup(node->details->id)); if (safe_str_eq(node->details->id, data_set->dc_uuid)) { data_set->dc_node = node; node->details->is_dc = TRUE; g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_IS_DC), strdup(XML_BOOLEAN_TRUE)); } else { g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_IS_DC), strdup(XML_BOOLEAN_FALSE)); } cluster_name = g_hash_table_lookup(data_set->config_hash, "cluster-name"); if (cluster_name) { g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_CLUSTER_NAME), strdup(cluster_name)); } pe__unpack_dataset_nvpairs(xml_obj, XML_TAG_ATTR_SETS, NULL, node->details->attrs, NULL, overwrite, data_set); if (pe_node_attribute_raw(node, CRM_ATTR_SITE_NAME) == NULL) { const char *site_name = pe_node_attribute_raw(node, "site-name"); if (site_name) { g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_SITE_NAME), strdup(site_name)); } else if (cluster_name) { /* Default to cluster-name if unset */ g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_SITE_NAME), strdup(cluster_name)); } } } static GListPtr extract_operations(const char *node, const char *rsc, xmlNode * rsc_entry, gboolean active_filter) { int counter = -1; int stop_index = -1; int start_index = -1; xmlNode *rsc_op = NULL; GListPtr gIter = NULL; GListPtr op_list = NULL; GListPtr sorted_op_list = NULL; /* extract operations */ op_list = NULL; sorted_op_list = NULL; for (rsc_op = __xml_first_child_element(rsc_entry); rsc_op != NULL; rsc_op = __xml_next_element(rsc_op)) { if (crm_str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, TRUE)) { crm_xml_add(rsc_op, "resource", rsc); crm_xml_add(rsc_op, XML_ATTR_UNAME, node); op_list = g_list_prepend(op_list, rsc_op); } } if (op_list == NULL) { /* if there are no operations, there is nothing to do */ return NULL; } sorted_op_list = g_list_sort(op_list, sort_op_by_callid); /* create active recurring operations as optional */ if (active_filter == FALSE) { return sorted_op_list; } op_list = NULL; calculate_active_ops(sorted_op_list, &start_index, &stop_index); for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) { xmlNode *rsc_op = (xmlNode *) gIter->data; counter++; if (start_index < stop_index) { crm_trace("Skipping %s: not active", ID(rsc_entry)); break; } else if (counter < start_index) { crm_trace("Skipping %s: old", ID(rsc_op)); continue; } op_list = g_list_append(op_list, rsc_op); } g_list_free(sorted_op_list); return op_list; } GListPtr find_operations(const char *rsc, const char *node, gboolean active_filter, pe_working_set_t * data_set) { GListPtr output = NULL; GListPtr intermediate = NULL; xmlNode *tmp = NULL; xmlNode *status = find_xml_node(data_set->input, XML_CIB_TAG_STATUS, TRUE); node_t *this_node = NULL; xmlNode *node_state = NULL; for (node_state = __xml_first_child_element(status); node_state != NULL; node_state = __xml_next_element(node_state)) { if (crm_str_eq((const char *)node_state->name, XML_CIB_TAG_STATE, TRUE)) { const char *uname = crm_element_value(node_state, XML_ATTR_UNAME); if (node != NULL && safe_str_neq(uname, node)) { continue; } this_node = pe_find_node(data_set->nodes, uname); if(this_node == NULL) { CRM_LOG_ASSERT(this_node != NULL); continue; } else if (pe__is_guest_or_remote_node(this_node)) { determine_remote_online_status(data_set, this_node); } else { determine_online_status(node_state, this_node, data_set); } if (this_node->details->online || is_set(data_set->flags, pe_flag_stonith_enabled)) { /* offline nodes run no resources... * unless stonith is enabled in which case we need to * make sure rsc start events happen after the stonith */ xmlNode *lrm_rsc = NULL; tmp = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE); tmp = find_xml_node(tmp, XML_LRM_TAG_RESOURCES, FALSE); for (lrm_rsc = __xml_first_child_element(tmp); lrm_rsc != NULL; lrm_rsc = __xml_next_element(lrm_rsc)) { if (crm_str_eq((const char *)lrm_rsc->name, XML_LRM_TAG_RESOURCE, TRUE)) { const char *rsc_id = crm_element_value(lrm_rsc, XML_ATTR_ID); if (rsc != NULL && safe_str_neq(rsc_id, rsc)) { continue; } intermediate = extract_operations(uname, rsc_id, lrm_rsc, active_filter); output = g_list_concat(output, intermediate); } } } } } return output; }