diff --git a/daemons/controld/controld_based.c b/daemons/controld/controld_based.c index ec08831ad0..ed86d46d25 100644 --- a/daemons/controld/controld_based.c +++ b/daemons/controld/controld_based.c @@ -1,151 +1,171 @@ /* * Copyright 2004-2018 Andrew Beekhof * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include /* sleep */ #include #include #include #include #include #include // for crmd_cib_connection_destroy() #include #include int cib_retries = 0; static void do_cib_updated(const char *event, xmlNode * msg) { if (crm_patchset_contains_alert(msg, TRUE)) { mainloop_set_trigger(config_read); } } static void do_cib_replaced(const char *event, xmlNode * msg) { crm_debug("Updating the CIB after a replace: DC=%s", AM_I_DC ? "true" : "false"); if (AM_I_DC == FALSE) { return; } else if (fsa_state == S_FINALIZE_JOIN && is_set(fsa_input_register, R_CIB_ASKED)) { /* no need to restart the join - we asked for this replace op */ return; } /* start the join process again so we get everyone's LRM status */ populate_cib_nodes(node_update_quick|node_update_all, __FUNCTION__); register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL); } /* A_CIB_STOP, A_CIB_START, O_CIB_RESTART */ void do_cib_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { CRM_ASSERT(fsa_cib_conn != NULL); if (action & A_CIB_STOP) { if (fsa_cib_conn->state != cib_disconnected && last_resource_update != 0) { crm_info("Waiting for resource update %d to complete", last_resource_update); crmd_fsa_stall(FALSE); return; } crm_info("Disconnecting from the CIB manager"); clear_bit(fsa_input_register, R_CIB_CONNECTED); fsa_cib_conn->cmds->del_notify_callback(fsa_cib_conn, T_CIB_DIFF_NOTIFY, do_cib_updated); if (fsa_cib_conn->state != cib_disconnected) { fsa_cib_conn->cmds->set_slave(fsa_cib_conn, cib_scope_local); fsa_cib_conn->cmds->signoff(fsa_cib_conn); } crm_notice("Disconnected from the CIB manager"); } if (action & A_CIB_START) { int rc = pcmk_ok; if (cur_state == S_STOPPING) { crm_err("Ignoring request to connect to the CIB manager after shutdown"); return; } rc = fsa_cib_conn->cmds->signon(fsa_cib_conn, CRM_SYSTEM_CRMD, cib_command_nonblocking); if (rc != pcmk_ok) { /* a short wait that usually avoids stalling the FSA */ sleep(1); rc = fsa_cib_conn->cmds->signon(fsa_cib_conn, CRM_SYSTEM_CRMD, cib_command_nonblocking); } if (rc != pcmk_ok) { crm_info("Could not connect to the CIB manager: %s", pcmk_strerror(rc)); } else if (pcmk_ok != fsa_cib_conn->cmds->set_connection_dnotify(fsa_cib_conn, crmd_cib_connection_destroy)) { crm_err("Could not set dnotify callback"); } else if (pcmk_ok != fsa_cib_conn->cmds->add_notify_callback(fsa_cib_conn, T_CIB_REPLACE_NOTIFY, do_cib_replaced)) { crm_err("Could not set CIB notification callback (replace)"); } else if (pcmk_ok != fsa_cib_conn->cmds->add_notify_callback(fsa_cib_conn, T_CIB_DIFF_NOTIFY, do_cib_updated)) { crm_err("Could not set CIB notification callback (update)"); } else { set_bit(fsa_input_register, R_CIB_CONNECTED); cib_retries = 0; } if (is_not_set(fsa_input_register, R_CIB_CONNECTED)) { cib_retries++; crm_warn("Couldn't complete CIB registration %d" " times... pause and retry", cib_retries); if (cib_retries < 30) { crm_timer_start(wait_timer); crmd_fsa_stall(FALSE); } else { crm_err("Could not complete CIB" " registration %d times..." " hard error", cib_retries); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } } } /*! * \internal * \brief Get CIB call options to use local scope if master unavailable * * \return CIB call options */ int crmd_cib_smart_opt() { int call_opt = cib_quorum_override; if (fsa_state == S_ELECTION || fsa_state == S_PENDING) { crm_info("Sending update to local CIB in state: %s", fsa_state2string(fsa_state)); call_opt |= cib_scope_local; } return call_opt; } + +/*! + * \internal + * \brief Check whether an action type should be recorded in the CIB + * + * \param[in] action Action type + * + * \return TRUE if action should be recorded, FALSE otherwise + */ +bool +controld_action_is_recordable(const char *action) +{ + if (safe_str_eq(action, CRMD_ACTION_CANCEL) + || safe_str_eq(action, CRMD_ACTION_DELETE) + || safe_str_eq(action, CRMD_ACTION_NOTIFY) + || safe_str_eq(action, CRMD_ACTION_METADATA)) { + return FALSE; + } + return TRUE; +} diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c index c7714fd0f0..167c66e131 100644 --- a/daemons/controld/controld_execd.c +++ b/daemons/controld/controld_execd.c @@ -1,2676 +1,2675 @@ /* * Copyright 2004-2018 Andrew Beekhof * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define START_DELAY_THRESHOLD 5 * 60 * 1000 #define MAX_LRM_REG_FAILS 30 #define s_if_plural(i) (((i) == 1)? "" : "s") struct delete_event_s { int rc; const char *rsc; lrm_state_t *lrm_state; }; static gboolean is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id); static gboolean build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list); static gboolean stop_recurring_actions(gpointer key, gpointer value, gpointer user_data); static int delete_rsc_status(lrm_state_t * lrm_state, const char *rsc_id, int call_options, const char *user_name); static lrmd_event_data_t *construct_op(lrm_state_t * lrm_state, xmlNode * rsc_op, const char *rsc_id, const char *operation); static void do_lrm_rsc_op(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *operation, xmlNode * msg, xmlNode * request); void send_direct_ack(const char *to_host, const char *to_sys, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op, const char *rsc_id); static gboolean lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state, int log_level); static int do_update_resource(const char *node_name, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op); static void lrm_connection_destroy(void) { if (is_set(fsa_input_register, R_LRM_CONNECTED)) { crm_crit("Connection to executor failed"); register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL); clear_bit(fsa_input_register, R_LRM_CONNECTED); } else { crm_info("Disconnected from executor"); } } static char * make_stop_id(const char *rsc, int call_id) { return crm_strdup_printf("%s:%d", rsc, call_id); } static void copy_instance_keys(gpointer key, gpointer value, gpointer user_data) { if (strstr(key, CRM_META "_") == NULL) { g_hash_table_replace(user_data, strdup((const char *)key), strdup((const char *)value)); } } static void copy_meta_keys(gpointer key, gpointer value, gpointer user_data) { if (strstr(key, CRM_META "_") != NULL) { g_hash_table_replace(user_data, strdup((const char *)key), strdup((const char *)value)); } } /*! * \internal * \brief Remove a recurring operation from a resource's history * * \param[in,out] history Resource history to modify * \param[in] op Operation to remove * * \return TRUE if the operation was found and removed, FALSE otherwise */ static gboolean history_remove_recurring_op(rsc_history_t *history, const lrmd_event_data_t *op) { GList *iter; for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) { lrmd_event_data_t *existing = iter->data; if ((op->interval_ms == existing->interval_ms) && crm_str_eq(op->rsc_id, existing->rsc_id, TRUE) && safe_str_eq(op->op_type, existing->op_type)) { history->recurring_op_list = g_list_delete_link(history->recurring_op_list, iter); lrmd_free_event(existing); return TRUE; } } return FALSE; } /*! * \internal * \brief Free all recurring operations in resource history * * \param[in,out] history Resource history to modify */ static void history_free_recurring_ops(rsc_history_t *history) { GList *iter; for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) { lrmd_free_event(iter->data); } g_list_free(history->recurring_op_list); history->recurring_op_list = NULL; } /*! * \internal * \brief Free resource history * * \param[in,out] history Resource history to free */ void history_free(gpointer data) { rsc_history_t *history = (rsc_history_t*)data; if (history->stop_params) { g_hash_table_destroy(history->stop_params); } /* Don't need to free history->rsc.id because it's set to history->id */ free(history->rsc.type); free(history->rsc.standard); free(history->rsc.provider); lrmd_free_event(history->failed); lrmd_free_event(history->last); free(history->id); history_free_recurring_ops(history); free(history); } static void update_history_cache(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op) { int target_rc = 0; rsc_history_t *entry = NULL; if (op->rsc_deleted) { crm_debug("Purged history for '%s' after %s", op->rsc_id, op->op_type); delete_rsc_status(lrm_state, op->rsc_id, cib_quorum_override, NULL); return; } if (safe_str_eq(op->op_type, RSC_NOTIFY)) { return; } crm_debug("Updating history for '%s' with %s op", op->rsc_id, op->op_type); entry = g_hash_table_lookup(lrm_state->resource_history, op->rsc_id); if (entry == NULL && rsc) { entry = calloc(1, sizeof(rsc_history_t)); entry->id = strdup(op->rsc_id); g_hash_table_insert(lrm_state->resource_history, entry->id, entry); entry->rsc.id = entry->id; entry->rsc.type = strdup(rsc->type); entry->rsc.standard = strdup(rsc->standard); if (rsc->provider) { entry->rsc.provider = strdup(rsc->provider); } else { entry->rsc.provider = NULL; } } else if (entry == NULL) { crm_info("Resource %s no longer exists, not updating cache", op->rsc_id); return; } entry->last_callid = op->call_id; target_rc = rsc_op_expected_rc(op); if (op->op_status == PCMK_LRM_OP_CANCELLED) { if (op->interval_ms > 0) { crm_trace("Removing cancelled recurring op: " CRM_OP_FMT, op->rsc_id, op->op_type, op->interval_ms); history_remove_recurring_op(entry, op); return; } else { crm_trace("Skipping " CRM_OP_FMT " rc=%d, status=%d", op->rsc_id, op->op_type, op->interval_ms, op->rc, op->op_status); } } else if (did_rsc_op_fail(op, target_rc)) { /* Store failed monitors here, otherwise the block below will cause them * to be forgotten when a stop happens. */ if (entry->failed) { lrmd_free_event(entry->failed); } entry->failed = lrmd_copy_event(op); } else if (op->interval_ms == 0) { if (entry->last) { lrmd_free_event(entry->last); } entry->last = lrmd_copy_event(op); if (op->params && (safe_str_eq(CRMD_ACTION_START, op->op_type) || safe_str_eq("reload", op->op_type) || safe_str_eq(CRMD_ACTION_STATUS, op->op_type))) { if (entry->stop_params) { g_hash_table_destroy(entry->stop_params); } entry->stop_params = crm_str_table_new(); g_hash_table_foreach(op->params, copy_instance_keys, entry->stop_params); } } if (op->interval_ms > 0) { /* Ensure there are no duplicates */ history_remove_recurring_op(entry, op); crm_trace("Adding recurring op: " CRM_OP_FMT, op->rsc_id, op->op_type, op->interval_ms); entry->recurring_op_list = g_list_prepend(entry->recurring_op_list, lrmd_copy_event(op)); } else if (entry->recurring_op_list && safe_str_eq(op->op_type, RSC_STATUS) == FALSE) { crm_trace("Dropping %d recurring ops because of: " CRM_OP_FMT, g_list_length(entry->recurring_op_list), op->rsc_id, op->op_type, op->interval_ms); history_free_recurring_ops(entry); } } /*! * \internal * \brief Send a direct OK ack for a resource task * * \param[in] lrm_state LRM connection * \param[in] input Input message being ack'ed * \param[in] rsc_id ID of affected resource * \param[in] rsc Affected resource (if available) * \param[in] task Operation task being ack'ed * \param[in] ack_host Name of host to send ack to * \param[in] ack_sys IPC system name to ack */ static void send_task_ok_ack(lrm_state_t *lrm_state, ha_msg_input_t *input, const char *rsc_id, lrmd_rsc_info_t *rsc, const char *task, const char *ack_host, const char *ack_sys) { lrmd_event_data_t *op = construct_op(lrm_state, input->xml, rsc_id, task); op->rc = PCMK_OCF_OK; op->op_status = PCMK_LRM_OP_DONE; send_direct_ack(ack_host, ack_sys, rsc, op, rsc_id); lrmd_free_event(op); } void lrm_op_callback(lrmd_event_data_t * op) { const char *nodename = NULL; lrm_state_t *lrm_state = NULL; CRM_CHECK(op != NULL, return); /* determine the node name for this connection. */ nodename = op->remote_nodename ? op->remote_nodename : fsa_our_uname; if (op->type == lrmd_event_disconnect && (safe_str_eq(nodename, fsa_our_uname))) { /* If this is the local executor IPC connection, set the right bits in the * controller when the connection goes down. */ lrm_connection_destroy(); return; } else if (op->type != lrmd_event_exec_complete) { /* we only need to process execution results */ return; } lrm_state = lrm_state_find(nodename); CRM_ASSERT(lrm_state != NULL); process_lrm_event(lrm_state, op, NULL); } /* A_LRM_CONNECT */ void do_lrm_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { /* This only pertains to local executor connections. Remote connections are * handled as resources within the scheduler. Connecting and disconnecting * from remote executor instances is handled differently. */ lrm_state_t *lrm_state = NULL; if(fsa_our_uname == NULL) { return; /* Nothing to do */ } lrm_state = lrm_state_find_or_create(fsa_our_uname); if (lrm_state == NULL) { register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); return; } if (action & A_LRM_DISCONNECT) { if (lrm_state_verify_stopped(lrm_state, cur_state, LOG_INFO) == FALSE) { if (action == A_LRM_DISCONNECT) { crmd_fsa_stall(FALSE); return; } } clear_bit(fsa_input_register, R_LRM_CONNECTED); crm_info("Disconnecting from the executor"); lrm_state_disconnect(lrm_state); lrm_state_reset_tables(lrm_state, FALSE); crm_notice("Disconnected from the executor"); } if (action & A_LRM_CONNECT) { int ret = pcmk_ok; crm_debug("Connecting to the executor"); ret = lrm_state_ipc_connect(lrm_state); if (ret != pcmk_ok) { if (lrm_state->num_lrm_register_fails < MAX_LRM_REG_FAILS) { crm_warn("Failed to connect to the executor %d time%s (%d max)", lrm_state->num_lrm_register_fails, s_if_plural(lrm_state->num_lrm_register_fails), MAX_LRM_REG_FAILS); crm_timer_start(wait_timer); crmd_fsa_stall(FALSE); return; } } if (ret != pcmk_ok) { crm_err("Failed to connect to the executor the max allowed %d time%s", lrm_state->num_lrm_register_fails, s_if_plural(lrm_state->num_lrm_register_fails)); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); return; } set_bit(fsa_input_register, R_LRM_CONNECTED); crm_info("Connection to the executor established"); } if (action & ~(A_LRM_CONNECT | A_LRM_DISCONNECT)) { crm_err("Unexpected action %s in %s", fsa_action2string(action), __FUNCTION__); } } static gboolean lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state, int log_level) { int counter = 0; gboolean rc = TRUE; const char *when = "lrm disconnect"; GHashTableIter gIter; const char *key = NULL; rsc_history_t *entry = NULL; struct recurring_op_s *pending = NULL; crm_debug("Checking for active resources before exit"); if (cur_state == S_TERMINATE) { log_level = LOG_ERR; when = "shutdown"; } else if (is_set(fsa_input_register, R_SHUTDOWN)) { when = "shutdown... waiting"; } if (lrm_state->pending_ops && lrm_state_is_connected(lrm_state) == TRUE) { guint removed = g_hash_table_foreach_remove( lrm_state->pending_ops, stop_recurring_actions, lrm_state); guint nremaining = g_hash_table_size(lrm_state->pending_ops); if (removed || nremaining) { crm_notice("Stopped %u recurring operation%s at %s (%u remaining)", removed, s_if_plural(removed), when, nremaining); } } if (lrm_state->pending_ops) { g_hash_table_iter_init(&gIter, lrm_state->pending_ops); while (g_hash_table_iter_next(&gIter, NULL, (void **)&pending)) { /* Ignore recurring actions in the shutdown calculations */ if (pending->interval_ms == 0) { counter++; } } } if (counter > 0) { do_crm_log(log_level, "%d pending executor operation%s at %s", counter, s_if_plural(counter), when); if (cur_state == S_TERMINATE || !is_set(fsa_input_register, R_SENT_RSC_STOP)) { g_hash_table_iter_init(&gIter, lrm_state->pending_ops); while (g_hash_table_iter_next(&gIter, (gpointer*)&key, (gpointer*)&pending)) { do_crm_log(log_level, "Pending action: %s (%s)", key, pending->op_key); } } else { rc = FALSE; } return rc; } if (lrm_state->resource_history == NULL) { return rc; } if (is_set(fsa_input_register, R_SHUTDOWN)) { /* At this point we're not waiting, we're just shutting down */ when = "shutdown"; } counter = 0; g_hash_table_iter_init(&gIter, lrm_state->resource_history); while (g_hash_table_iter_next(&gIter, NULL, (gpointer*)&entry)) { if (is_rsc_active(lrm_state, entry->id) == FALSE) { continue; } counter++; if (log_level == LOG_ERR) { crm_info("Found %s active at %s", entry->id, when); } else { crm_trace("Found %s active at %s", entry->id, when); } if (lrm_state->pending_ops) { GHashTableIter hIter; g_hash_table_iter_init(&hIter, lrm_state->pending_ops); while (g_hash_table_iter_next(&hIter, (gpointer*)&key, (gpointer*)&pending)) { if (crm_str_eq(entry->id, pending->rsc_id, TRUE)) { crm_notice("%sction %s (%s) incomplete at %s", pending->interval_ms == 0 ? "A" : "Recurring a", key, pending->op_key, when); } } } } if (counter) { crm_err("%d resource%s active at %s", counter, (counter == 1)? " was" : "s were", when); } return rc; } static char * build_parameter_list(const lrmd_event_data_t *op, const struct ra_metadata_s *metadata, xmlNode *result, enum ra_param_flags_e param_type, bool invert_for_xml) { int len = 0; int max = 0; char *list = NULL; GList *iter = NULL; /* Newer resource agents support the "private" parameter attribute to * indicate sensitive parameters. For backward compatibility with older * agents, this list is used if the agent doesn't specify any as "private". */ const char *secure_terms[] = { "password", "passwd", "user", }; if (is_not_set(metadata->ra_flags, ra_uses_private) && (param_type == ra_param_private)) { max = DIMOF(secure_terms); } for (iter = metadata->ra_params; iter != NULL; iter = iter->next) { struct ra_param_s *param = (struct ra_param_s *) iter->data; bool accept = FALSE; if (is_set(param->rap_flags, param_type)) { accept = TRUE; } else if (max) { for (int lpc = 0; lpc < max; lpc++) { if (safe_str_eq(secure_terms[lpc], param->rap_name)) { accept = TRUE; break; } } } if (accept) { int start = len; crm_trace("Attr %s is %s", param->rap_name, ra_param_flag2text(param_type)); len += strlen(param->rap_name) + 2; // include spaces around list = realloc_safe(list, len + 1); // include null terminator // spaces before and after make parsing simpler sprintf(list + start, " %s ", param->rap_name); } else { crm_trace("Rejecting %s for %s", param->rap_name, ra_param_flag2text(param_type)); } if (result && (invert_for_xml? !accept : accept)) { const char *v = g_hash_table_lookup(op->params, param->rap_name); if (v != NULL) { crm_trace("Adding attr %s=%s to the xml result", param->rap_name, v); crm_xml_add(result, param->rap_name, v); } } } return list; } static void append_restart_list(lrmd_event_data_t *op, struct ra_metadata_s *metadata, xmlNode *update, const char *version) { char *list = NULL; char *digest = NULL; xmlNode *restart = NULL; CRM_LOG_ASSERT(op->params != NULL); if (op->interval_ms > 0) { /* monitors are not reloadable */ return; } if (is_set(metadata->ra_flags, ra_supports_reload)) { restart = create_xml_node(NULL, XML_TAG_PARAMS); /* Add any parameters with unique="1" to the "op-force-restart" list. * * (Currently, we abuse "unique=0" to indicate reloadability. This is * nonstandard and should eventually be replaced once the OCF standard * is updated with something better.) */ list = build_parameter_list(op, metadata, restart, ra_param_unique, FALSE); } else { /* Resource does not support reloads */ return; } digest = calculate_operation_digest(restart, version); /* Add "op-force-restart" and "op-restart-digest" to indicate the resource supports reload, * no matter if it actually supports any parameters with unique="1"). */ crm_xml_add(update, XML_LRM_ATTR_OP_RESTART, list? list: ""); crm_xml_add(update, XML_LRM_ATTR_RESTART_DIGEST, digest); crm_trace("%s: %s, %s", op->rsc_id, digest, list); crm_log_xml_trace(restart, "restart digest source"); free_xml(restart); free(digest); free(list); } static void append_secure_list(lrmd_event_data_t *op, struct ra_metadata_s *metadata, xmlNode *update, const char *version) { char *list = NULL; char *digest = NULL; xmlNode *secure = NULL; CRM_LOG_ASSERT(op->params != NULL); /* * To keep XML_LRM_ATTR_OP_SECURE short, we want it to contain the * secure parameters but XML_LRM_ATTR_SECURE_DIGEST to be based on * the insecure ones */ secure = create_xml_node(NULL, XML_TAG_PARAMS); list = build_parameter_list(op, metadata, secure, ra_param_private, TRUE); if (list != NULL) { digest = calculate_operation_digest(secure, version); crm_xml_add(update, XML_LRM_ATTR_OP_SECURE, list); crm_xml_add(update, XML_LRM_ATTR_SECURE_DIGEST, digest); crm_trace("%s: %s, %s", op->rsc_id, digest, list); crm_log_xml_trace(secure, "secure digest source"); } else { crm_trace("%s: no secure parameters", op->rsc_id); } free_xml(secure); free(digest); free(list); } static gboolean build_operation_update(xmlNode * parent, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op, const char *node_name, const char *src) { int target_rc = 0; xmlNode *xml_op = NULL; struct ra_metadata_s *metadata = NULL; const char *caller_version = NULL; lrm_state_t *lrm_state = NULL; if (op == NULL) { return FALSE; } target_rc = rsc_op_expected_rc(op); /* there is a small risk in formerly mixed clusters that it will * be sub-optimal. * * however with our upgrade policy, the update we send should * still be completely supported anyway */ caller_version = g_hash_table_lookup(op->params, XML_ATTR_CRM_VERSION); CRM_LOG_ASSERT(caller_version != NULL); if(caller_version == NULL) { caller_version = CRM_FEATURE_SET; } crm_trace("Building %s operation update with originator version: %s", op->rsc_id, caller_version); xml_op = create_operation_update(parent, op, caller_version, target_rc, fsa_our_uname, src, LOG_DEBUG); if (xml_op == NULL) { return TRUE; } if ((rsc == NULL) || (op->params == NULL) || !crm_op_needs_metadata(rsc->standard, op->op_type)) { crm_trace("No digests needed for %s action on %s (params=%p rsc=%p)", op->op_type, op->rsc_id, op->params, rsc); return TRUE; } lrm_state = lrm_state_find(node_name); if (lrm_state == NULL) { crm_warn("Cannot calculate digests for operation " CRM_OP_FMT " because we have no connection to executor for %s", op->rsc_id, op->op_type, op->interval_ms, node_name); return TRUE; } metadata = metadata_cache_get(lrm_state->metadata_cache, rsc); if (metadata == NULL) { /* For now, we always collect resource agent meta-data via a local, * synchronous, direct execution of the agent. This has multiple issues: * the executor should execute agents, not the controller; meta-data for * Pacemaker Remote nodes should be collected on those nodes, not * locally; and the meta-data call shouldn't eat into the timeout of the * real action being performed. * * These issues are planned to be addressed by having the scheduler * schedule a meta-data cache check at the beginning of each transition. * Once that is working, this block will only be a fallback in case the * initial collection fails. */ char *metadata_str = NULL; int rc = lrm_state_get_metadata(lrm_state, rsc->standard, rsc->provider, rsc->type, &metadata_str, 0); if (rc != pcmk_ok) { crm_warn("Failed to get metadata for %s (%s:%s:%s)", rsc->id, rsc->standard, rsc->provider, rsc->type); return TRUE; } metadata = metadata_cache_update(lrm_state->metadata_cache, rsc, metadata_str); free(metadata_str); if (metadata == NULL) { crm_warn("Failed to update metadata for %s (%s:%s:%s)", rsc->id, rsc->standard, rsc->provider, rsc->type); return TRUE; } } #if ENABLE_VERSIONED_ATTRS crm_xml_add(xml_op, XML_ATTR_RA_VERSION, metadata->ra_version); #endif crm_trace("Including additional digests for %s::%s:%s", rsc->standard, rsc->provider, rsc->type); append_restart_list(op, metadata, xml_op, caller_version); append_secure_list(op, metadata, xml_op, caller_version); return TRUE; } static gboolean is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id) { rsc_history_t *entry = NULL; entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); if (entry == NULL || entry->last == NULL) { return FALSE; } crm_trace("Processing %s: %s.%d=%d", rsc_id, entry->last->op_type, entry->last->interval_ms, entry->last->rc); if (entry->last->rc == PCMK_OCF_OK && safe_str_eq(entry->last->op_type, CRMD_ACTION_STOP)) { return FALSE; } else if (entry->last->rc == PCMK_OCF_OK && safe_str_eq(entry->last->op_type, CRMD_ACTION_MIGRATE)) { /* a stricter check is too complex... * leave that to the PE */ return FALSE; } else if (entry->last->rc == PCMK_OCF_NOT_RUNNING) { return FALSE; } else if ((entry->last->interval_ms == 0) && (entry->last->rc == PCMK_OCF_NOT_CONFIGURED)) { /* Badly configured resources can't be reliably stopped */ return FALSE; } return TRUE; } static gboolean build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list) { GHashTableIter iter; rsc_history_t *entry = NULL; g_hash_table_iter_init(&iter, lrm_state->resource_history); while (g_hash_table_iter_next(&iter, NULL, (void **)&entry)) { GList *gIter = NULL; xmlNode *xml_rsc = create_xml_node(rsc_list, XML_LRM_TAG_RESOURCE); crm_xml_add(xml_rsc, XML_ATTR_ID, entry->id); crm_xml_add(xml_rsc, XML_ATTR_TYPE, entry->rsc.type); crm_xml_add(xml_rsc, XML_AGENT_ATTR_CLASS, entry->rsc.standard); crm_xml_add(xml_rsc, XML_AGENT_ATTR_PROVIDER, entry->rsc.provider); if (entry->last && entry->last->params) { const char *container = g_hash_table_lookup(entry->last->params, CRM_META"_"XML_RSC_ATTR_CONTAINER); if (container) { crm_trace("Resource %s is a part of container resource %s", entry->id, container); crm_xml_add(xml_rsc, XML_RSC_ATTR_CONTAINER, container); } } build_operation_update(xml_rsc, &(entry->rsc), entry->failed, lrm_state->node_name, __FUNCTION__); build_operation_update(xml_rsc, &(entry->rsc), entry->last, lrm_state->node_name, __FUNCTION__); for (gIter = entry->recurring_op_list; gIter != NULL; gIter = gIter->next) { build_operation_update(xml_rsc, &(entry->rsc), gIter->data, lrm_state->node_name, __FUNCTION__); } } return FALSE; } static xmlNode * do_lrm_query_internal(lrm_state_t *lrm_state, int update_flags) { xmlNode *xml_state = NULL; xmlNode *xml_data = NULL; xmlNode *rsc_list = NULL; crm_node_t *peer = NULL; peer = crm_get_peer_full(0, lrm_state->node_name, CRM_GET_PEER_ANY); CRM_CHECK(peer != NULL, return NULL); xml_state = create_node_state_update(peer, update_flags, NULL, __FUNCTION__); if (xml_state == NULL) { return NULL; } xml_data = create_xml_node(xml_state, XML_CIB_TAG_LRM); crm_xml_add(xml_data, XML_ATTR_ID, peer->uuid); rsc_list = create_xml_node(xml_data, XML_LRM_TAG_RESOURCES); /* Build a list of active (not always running) resources */ build_active_RAs(lrm_state, rsc_list); crm_log_xml_trace(xml_state, "Current executor state"); return xml_state; } xmlNode * do_lrm_query(gboolean is_replace, const char *node_name) { lrm_state_t *lrm_state = lrm_state_find(node_name); if (!lrm_state) { crm_err("Could not find executor state for node %s", node_name); return NULL; } return do_lrm_query_internal(lrm_state, node_update_cluster|node_update_peer); } static void notify_deleted(lrm_state_t * lrm_state, ha_msg_input_t * input, const char *rsc_id, int rc) { lrmd_event_data_t *op = NULL; const char *from_sys = crm_element_value(input->msg, F_CRM_SYS_FROM); const char *from_host = crm_element_value(input->msg, F_CRM_HOST_FROM); crm_info("Notifying %s on %s that %s was%s deleted", from_sys, (from_host? from_host : "localhost"), rsc_id, ((rc == pcmk_ok)? "" : " not")); op = construct_op(lrm_state, input->xml, rsc_id, CRMD_ACTION_DELETE); if (rc == pcmk_ok) { op->op_status = PCMK_LRM_OP_DONE; op->rc = PCMK_OCF_OK; } else { op->op_status = PCMK_LRM_OP_ERROR; op->rc = PCMK_OCF_UNKNOWN_ERROR; } send_direct_ack(from_host, from_sys, NULL, op, rsc_id); lrmd_free_event(op); if (safe_str_neq(from_sys, CRM_SYSTEM_TENGINE)) { /* this isn't expected - trigger a new transition */ time_t now = time(NULL); char *now_s = crm_itoa(now); crm_debug("Triggering a refresh after %s deleted %s from the executor", from_sys, rsc_id); update_attr_delegate(fsa_cib_conn, cib_none, XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL, "last-lrm-refresh", now_s, FALSE, NULL, NULL); free(now_s); } } static gboolean lrm_remove_deleted_rsc(gpointer key, gpointer value, gpointer user_data) { struct delete_event_s *event = user_data; struct pending_deletion_op_s *op = value; if (crm_str_eq(event->rsc, op->rsc, TRUE)) { notify_deleted(event->lrm_state, op->input, event->rsc, event->rc); return TRUE; } return FALSE; } static gboolean lrm_remove_deleted_op(gpointer key, gpointer value, gpointer user_data) { const char *rsc = user_data; struct recurring_op_s *pending = value; if (crm_str_eq(rsc, pending->rsc_id, TRUE)) { crm_info("Removing op %s:%d for deleted resource %s", pending->op_key, pending->call_id, rsc); return TRUE; } return FALSE; } /* * Remove the rsc from the CIB * * Avoids refreshing the entire LRM section of this host */ #define rsc_template "//"XML_CIB_TAG_STATE"[@uname='%s']//"XML_LRM_TAG_RESOURCE"[@id='%s']" static int delete_rsc_status(lrm_state_t * lrm_state, const char *rsc_id, int call_options, const char *user_name) { char *rsc_xpath = NULL; int rc = pcmk_ok; CRM_CHECK(rsc_id != NULL, return -ENXIO); rsc_xpath = crm_strdup_printf(rsc_template, lrm_state->node_name, rsc_id); rc = cib_internal_op(fsa_cib_conn, CIB_OP_DELETE, NULL, rsc_xpath, NULL, NULL, call_options | cib_xpath, user_name); free(rsc_xpath); return rc; } static void delete_rsc_entry(lrm_state_t * lrm_state, ha_msg_input_t * input, const char *rsc_id, GHashTableIter * rsc_gIter, int rc, const char *user_name) { struct delete_event_s event; CRM_CHECK(rsc_id != NULL, return); if (rc == pcmk_ok) { char *rsc_id_copy = strdup(rsc_id); if (rsc_gIter) g_hash_table_iter_remove(rsc_gIter); else g_hash_table_remove(lrm_state->resource_history, rsc_id_copy); crm_debug("sync: Sending delete op for %s", rsc_id_copy); delete_rsc_status(lrm_state, rsc_id_copy, cib_quorum_override, user_name); g_hash_table_foreach_remove(lrm_state->pending_ops, lrm_remove_deleted_op, rsc_id_copy); free(rsc_id_copy); } if (input) { notify_deleted(lrm_state, input, rsc_id, rc); } event.rc = rc; event.rsc = rsc_id; event.lrm_state = lrm_state; g_hash_table_foreach_remove(lrm_state->deletion_ops, lrm_remove_deleted_rsc, &event); } /*! * \internal * \brief Erase an LRM history entry from the CIB, given the operation data * * \param[in] lrm_state LRM state of the desired node * \param[in] op Operation whose history should be deleted */ static void erase_lrm_history_by_op(lrm_state_t *lrm_state, lrmd_event_data_t *op) { xmlNode *xml_top = NULL; CRM_CHECK(op != NULL, return); xml_top = create_xml_node(NULL, XML_LRM_TAG_RSC_OP); crm_xml_add_int(xml_top, XML_LRM_ATTR_CALLID, op->call_id); crm_xml_add(xml_top, XML_ATTR_TRANSITION_KEY, op->user_data); if (op->interval_ms > 0) { char *op_id = generate_op_key(op->rsc_id, op->op_type, op->interval_ms); /* Avoid deleting last_failure too (if it was a result of this recurring op failing) */ crm_xml_add(xml_top, XML_ATTR_ID, op_id); free(op_id); } crm_debug("Erasing resource operation history for " CRM_OP_FMT " (call=%d)", op->rsc_id, op->op_type, op->interval_ms, op->call_id); fsa_cib_conn->cmds->remove(fsa_cib_conn, XML_CIB_TAG_STATUS, xml_top, cib_quorum_override); crm_log_xml_trace(xml_top, "op:cancel"); free_xml(xml_top); } /* Define xpath to find LRM resource history entry by node and resource */ #define XPATH_HISTORY \ "/" XML_TAG_CIB "/" XML_CIB_TAG_STATUS \ "/" XML_CIB_TAG_STATE "[@" XML_ATTR_UNAME "='%s']" \ "/" XML_CIB_TAG_LRM "/" XML_LRM_TAG_RESOURCES \ "/" XML_LRM_TAG_RESOURCE "[@" XML_ATTR_ID "='%s']" \ "/" XML_LRM_TAG_RSC_OP /* ... and also by operation key */ #define XPATH_HISTORY_ID XPATH_HISTORY \ "[@" XML_ATTR_ID "='%s']" /* ... and also by operation key and operation call ID */ #define XPATH_HISTORY_CALL XPATH_HISTORY \ "[@" XML_ATTR_ID "='%s' and @" XML_LRM_ATTR_CALLID "='%d']" /* ... and also by operation key and original operation key */ #define XPATH_HISTORY_ORIG XPATH_HISTORY \ "[@" XML_ATTR_ID "='%s' and @" XML_LRM_ATTR_TASK_KEY "='%s']" /*! * \internal * \brief Erase an LRM history entry from the CIB, given operation identifiers * * \param[in] lrm_state LRM state of the node to clear history for * \param[in] rsc_id Name of resource to clear history for * \param[in] key Operation key of operation to clear history for * \param[in] orig_op If specified, delete only if it has this original op * \param[in] call_id If specified, delete entry only if it has this call ID */ static void erase_lrm_history_by_id(lrm_state_t *lrm_state, const char *rsc_id, const char *key, const char *orig_op, int call_id) { char *op_xpath = NULL; CRM_CHECK((rsc_id != NULL) && (key != NULL), return); if (call_id > 0) { op_xpath = crm_strdup_printf(XPATH_HISTORY_CALL, lrm_state->node_name, rsc_id, key, call_id); } else if (orig_op) { op_xpath = crm_strdup_printf(XPATH_HISTORY_ORIG, lrm_state->node_name, rsc_id, key, orig_op); } else { op_xpath = crm_strdup_printf(XPATH_HISTORY_ID, lrm_state->node_name, rsc_id, key); } crm_debug("Erasing resource operation history for %s on %s (call=%d)", key, rsc_id, call_id); fsa_cib_conn->cmds->remove(fsa_cib_conn, op_xpath, NULL, cib_quorum_override | cib_xpath); free(op_xpath); } static inline gboolean last_failed_matches_op(rsc_history_t *entry, const char *op, guint interval_ms) { if (entry == NULL) { return FALSE; } if (op == NULL) { return TRUE; } return (safe_str_eq(op, entry->failed->op_type) && (interval_ms == entry->failed->interval_ms)); } /*! * \internal * \brief Clear a resource's last failure * * Erase a resource's last failure on a particular node from both the * LRM resource history in the CIB, and the resource history remembered * for the LRM state. * * \param[in] rsc_id Resource name * \param[in] node_name Node name * \param[in] operation If specified, only clear if matching this operation * \param[in] interval_ms If operation is specified, it has this interval */ void lrm_clear_last_failure(const char *rsc_id, const char *node_name, const char *operation, guint interval_ms) { char *op_key = NULL; char *orig_op_key = NULL; lrm_state_t *lrm_state = NULL; lrm_state = lrm_state_find(node_name); if (lrm_state == NULL) { return; } /* Erase from CIB */ op_key = generate_op_key(rsc_id, "last_failure", 0); if (operation) { orig_op_key = generate_op_key(rsc_id, operation, interval_ms); } erase_lrm_history_by_id(lrm_state, rsc_id, op_key, orig_op_key, 0); free(op_key); free(orig_op_key); /* Remove from memory */ if (lrm_state->resource_history) { rsc_history_t *entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); if (last_failed_matches_op(entry, operation, interval_ms)) { lrmd_free_event(entry->failed); entry->failed = NULL; } } } /* Returns: gboolean - cancellation is in progress */ static gboolean cancel_op(lrm_state_t * lrm_state, const char *rsc_id, const char *key, int op, gboolean remove) { int rc = pcmk_ok; char *local_key = NULL; struct recurring_op_s *pending = NULL; CRM_CHECK(op != 0, return FALSE); CRM_CHECK(rsc_id != NULL, return FALSE); if (key == NULL) { local_key = make_stop_id(rsc_id, op); key = local_key; } pending = g_hash_table_lookup(lrm_state->pending_ops, key); if (pending) { if (remove && pending->remove == FALSE) { pending->remove = TRUE; crm_debug("Scheduling %s for removal", key); } if (pending->cancelled) { crm_debug("Operation %s already cancelled", key); free(local_key); return FALSE; } pending->cancelled = TRUE; } else { crm_info("No pending op found for %s", key); free(local_key); return FALSE; } crm_debug("Cancelling op %d for %s (%s)", op, rsc_id, key); rc = lrm_state_cancel(lrm_state, pending->rsc_id, pending->op_type, pending->interval_ms); if (rc == pcmk_ok) { crm_debug("Op %d for %s (%s): cancelled", op, rsc_id, key); free(local_key); return TRUE; } crm_debug("Op %d for %s (%s): Nothing to cancel", op, rsc_id, key); /* The caller needs to make sure the entry is * removed from the pending_ops list * * Usually by returning TRUE inside the worker function * supplied to g_hash_table_foreach_remove() * * Not removing the entry from pending_ops will block * the node from shutting down */ free(local_key); return FALSE; } struct cancel_data { gboolean done; gboolean remove; const char *key; lrmd_rsc_info_t *rsc; lrm_state_t *lrm_state; }; static gboolean cancel_action_by_key(gpointer key, gpointer value, gpointer user_data) { gboolean remove = FALSE; struct cancel_data *data = user_data; struct recurring_op_s *op = (struct recurring_op_s *)value; if (crm_str_eq(op->op_key, data->key, TRUE)) { data->done = TRUE; remove = !cancel_op(data->lrm_state, data->rsc->id, key, op->call_id, data->remove); } return remove; } static gboolean cancel_op_key(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *key, gboolean remove) { guint removed = 0; struct cancel_data data; CRM_CHECK(rsc != NULL, return FALSE); CRM_CHECK(key != NULL, return FALSE); data.key = key; data.rsc = rsc; data.done = FALSE; data.remove = remove; data.lrm_state = lrm_state; removed = g_hash_table_foreach_remove(lrm_state->pending_ops, cancel_action_by_key, &data); crm_trace("Removed %u op cache entries, new size: %u", removed, g_hash_table_size(lrm_state->pending_ops)); return data.done; } /*! * \internal * \brief Retrieve resource information from LRM * * \param[in] lrm_state LRM connection to use * \param[in] rsc_xml XML containing resource configuration * \param[in] do_create If true, register resource with LRM if not already * \param[out] rsc_info Where to store resource information obtained from LRM * * \retval pcmk_ok Success (and rsc_info holds newly allocated result) * \retval -EINVAL Required information is missing from arguments * \retval -ENOTCONN No active connection to LRM * \retval -ENODEV Resource not found * \retval -errno Error communicating with executor when registering resource * * \note Caller is responsible for freeing result on success. */ static int get_lrm_resource(lrm_state_t *lrm_state, xmlNode *rsc_xml, gboolean do_create, lrmd_rsc_info_t **rsc_info) { const char *id = ID(rsc_xml); CRM_CHECK(lrm_state && rsc_xml && rsc_info, return -EINVAL); CRM_CHECK(id, return -EINVAL); if (lrm_state_is_connected(lrm_state) == FALSE) { return -ENOTCONN; } crm_trace("Retrieving resource information for %s from the executor", id); *rsc_info = lrm_state_get_rsc_info(lrm_state, id, 0); // If resource isn't known by ID, try clone name, if provided if (!*rsc_info) { const char *long_id = crm_element_value(rsc_xml, XML_ATTR_ID_LONG); if (long_id) { *rsc_info = lrm_state_get_rsc_info(lrm_state, long_id, 0); } } if ((*rsc_info == NULL) && do_create) { const char *class = crm_element_value(rsc_xml, XML_AGENT_ATTR_CLASS); const char *provider = crm_element_value(rsc_xml, XML_AGENT_ATTR_PROVIDER); const char *type = crm_element_value(rsc_xml, XML_ATTR_TYPE); int rc; crm_trace("Registering resource %s with the executor", id); rc = lrm_state_register_rsc(lrm_state, id, class, provider, type, lrmd_opt_drop_recurring); if (rc != pcmk_ok) { fsa_data_t *msg_data = NULL; crm_err("Could not register resource %s with the executor on %s: %s " CRM_XS " rc=%d", id, lrm_state->node_name, pcmk_strerror(rc), rc); /* Register this as an internal error if this involves the local * executor. Otherwise, we're likely dealing with an unresponsive * remote node, which is not an FSA failure. */ if (lrm_state_is_local(lrm_state) == TRUE) { register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } return rc; } *rsc_info = lrm_state_get_rsc_info(lrm_state, id, 0); } return *rsc_info? pcmk_ok : -ENODEV; } static void delete_resource(lrm_state_t * lrm_state, const char *id, lrmd_rsc_info_t * rsc, GHashTableIter * gIter, const char *sys, const char *host, const char *user, ha_msg_input_t * request, gboolean unregister) { int rc = pcmk_ok; crm_info("Removing resource %s for %s (%s) on %s", id, sys, user ? user : "internal", host); if (rsc && unregister) { rc = lrm_state_unregister_rsc(lrm_state, id, 0); } if (rc == pcmk_ok) { crm_trace("Resource '%s' deleted", id); } else if (rc == -EINPROGRESS) { crm_info("Deletion of resource '%s' pending", id); if (request) { struct pending_deletion_op_s *op = NULL; char *ref = crm_element_value_copy(request->msg, XML_ATTR_REFERENCE); op = calloc(1, sizeof(struct pending_deletion_op_s)); op->rsc = strdup(rsc->id); op->input = copy_ha_msg_input(request); g_hash_table_insert(lrm_state->deletion_ops, ref, op); } return; } else { crm_warn("Deletion of resource '%s' for %s (%s) on %s failed: %d", id, sys, user ? user : "internal", host, rc); } delete_rsc_entry(lrm_state, request, id, gIter, rc, user); } static int get_fake_call_id(lrm_state_t *lrm_state, const char *rsc_id) { int call_id = 999999999; rsc_history_t *entry = NULL; if(lrm_state) { entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); } /* Make sure the call id is greater than the last successful operation, * otherwise the failure will not result in a possible recovery of the resource * as it could appear the failure occurred before the successful start */ if (entry) { call_id = entry->last_callid + 1; } if (call_id < 0) { call_id = 1; } return call_id; } static void fake_op_status(lrm_state_t *lrm_state, lrmd_event_data_t *op, int op_status, enum ocf_exitcode op_exitcode) { op->call_id = get_fake_call_id(lrm_state, op->rsc_id); op->t_run = time(NULL); op->t_rcchange = op->t_run; op->op_status = op_status; op->rc = op_exitcode; } static void force_reprobe(lrm_state_t *lrm_state, const char *from_sys, const char *from_host, const char *user_name, gboolean is_remote_node) { GHashTableIter gIter; rsc_history_t *entry = NULL; crm_info("Clearing resource history on node %s", lrm_state->node_name); g_hash_table_iter_init(&gIter, lrm_state->resource_history); while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) { /* only unregister the resource during a reprobe if it is not a remote connection * resource. otherwise unregistering the connection will terminate remote-node * membership */ gboolean unregister = TRUE; if (is_remote_lrmd_ra(NULL, NULL, entry->id)) { lrm_state_t *remote_lrm_state = lrm_state_find(entry->id); if (remote_lrm_state) { /* when forcing a reprobe, make sure to clear remote node before * clearing the remote node's connection resource */ force_reprobe(remote_lrm_state, from_sys, from_host, user_name, TRUE); } unregister = FALSE; } delete_resource(lrm_state, entry->id, &entry->rsc, &gIter, from_sys, from_host, user_name, NULL, unregister); } /* Now delete the copy in the CIB */ erase_status_tag(lrm_state->node_name, XML_CIB_TAG_LRM, cib_scope_local); /* Finally, _delete_ the value in pacemaker-attrd -- setting it to FALSE * would result in the scheduler sending us back here again */ update_attrd(lrm_state->node_name, CRM_OP_PROBED, NULL, user_name, is_remote_node); } static void synthesize_lrmd_failure(lrm_state_t *lrm_state, xmlNode *action, int rc) { lrmd_event_data_t *op = NULL; lrmd_rsc_info_t *rsc_info = NULL; const char *operation = crm_element_value(action, XML_LRM_ATTR_TASK); const char *target_node = crm_element_value(action, XML_LRM_ATTR_TARGET); xmlNode *xml_rsc = find_xml_node(action, XML_CIB_TAG_RESOURCE, TRUE); if ((xml_rsc == NULL) || (ID(xml_rsc) == NULL)) { /* @TODO Should we do something else, like direct ack? */ crm_info("Can't fake %s failure (%d) on %s without resource configuration", crm_element_value(action, XML_LRM_ATTR_TASK_KEY), rc, target_node); return; } else if(operation == NULL) { /* This probably came from crm_resource -C, nothing to do */ crm_info("Can't fake %s failure (%d) on %s without operation", ID(xml_rsc), rc, target_node); return; } op = construct_op(lrm_state, action, ID(xml_rsc), operation); if (safe_str_eq(operation, RSC_NOTIFY)) { // Notifications can't fail fake_op_status(lrm_state, op, PCMK_LRM_OP_DONE, PCMK_OCF_OK); } else { fake_op_status(lrm_state, op, PCMK_LRM_OP_ERROR, rc); } crm_info("Faking " CRM_OP_FMT " result (%d) on %s", op->rsc_id, op->op_type, op->interval_ms, op->rc, target_node); /* Process the result as if it came from the LRM, if possible * (i.e. resource info can be obtained from the lrm_state). */ if (lrm_state) { rsc_info = lrm_state_get_rsc_info(lrm_state, op->rsc_id, 0); } if (rsc_info) { lrmd_free_rsc_info(rsc_info); process_lrm_event(lrm_state, op, NULL); - } else { + } else if (controld_action_is_recordable(op->op_type)) { /* If we can't process the result normally, at least write it to the CIB * if possible, so the scheduler can act on it. */ const char *standard = crm_element_value(xml_rsc, XML_AGENT_ATTR_CLASS); const char *provider = crm_element_value(xml_rsc, XML_AGENT_ATTR_PROVIDER); const char *type = crm_element_value(xml_rsc, XML_ATTR_TYPE); if (standard && type) { rsc_info = lrmd_new_rsc_info(op->rsc_id, standard, provider, type); do_update_resource(target_node, rsc_info, op); lrmd_free_rsc_info(rsc_info); } else { // @TODO Should we direct ack? crm_info("Can't fake %s failure (%d) on %s without resource standard and type", crm_element_value(action, XML_LRM_ATTR_TASK_KEY), rc, target_node); } } lrmd_free_event(op); } /*! * \internal * \brief Get target of an LRM operation * * \param[in] xml LRM operation data XML * * \return LRM operation target node name (local node or Pacemaker Remote node) */ static const char * lrm_op_target(xmlNode *xml) { const char *target = NULL; if (xml) { target = crm_element_value(xml, XML_LRM_ATTR_TARGET); } if (target == NULL) { target = fsa_our_uname; } return target; } static void fail_lrm_resource(xmlNode *xml, lrm_state_t *lrm_state, const char *user_name, const char *from_host, const char *from_sys) { lrmd_event_data_t *op = NULL; lrmd_rsc_info_t *rsc = NULL; xmlNode *xml_rsc = find_xml_node(xml, XML_CIB_TAG_RESOURCE, TRUE); CRM_CHECK(xml_rsc != NULL, return); /* The executor simply executes operations and reports the results, without * any concept of success or failure, so to fail a resource, we must fake * what a failure looks like. * * To do this, we create a fake executor operation event for the resource, * and pass that event to the executor client callback so it will be * processed as if it came from the executor. */ op = construct_op(lrm_state, xml, ID(xml_rsc), "asyncmon"); fake_op_status(lrm_state, op, PCMK_LRM_OP_DONE, PCMK_OCF_UNKNOWN_ERROR); free((char*) op->user_data); op->user_data = NULL; op->interval_ms = 0; #if ENABLE_ACL if (user_name && is_privileged(user_name) == FALSE) { crm_err("%s does not have permission to fail %s", user_name, ID(xml_rsc)); send_direct_ack(from_host, from_sys, NULL, op, ID(xml_rsc)); lrmd_free_event(op); return; } #endif if (get_lrm_resource(lrm_state, xml_rsc, TRUE, &rsc) == pcmk_ok) { crm_info("Failing resource %s...", rsc->id); process_lrm_event(lrm_state, op, NULL); op->op_status = PCMK_LRM_OP_DONE; op->rc = PCMK_OCF_OK; lrmd_free_rsc_info(rsc); } else { crm_info("Cannot find/create resource in order to fail it..."); crm_log_xml_warn(xml, "bad input"); } send_direct_ack(from_host, from_sys, NULL, op, ID(xml_rsc)); lrmd_free_event(op); } static void handle_refresh_op(lrm_state_t *lrm_state, const char *user_name, const char *from_host, const char *from_sys) { int rc = pcmk_ok; xmlNode *fragment = do_lrm_query_internal(lrm_state, node_update_all); fsa_cib_update(XML_CIB_TAG_STATUS, fragment, cib_quorum_override, rc, user_name); crm_info("Forced a local resource history refresh: call=%d", rc); if (safe_str_neq(CRM_SYSTEM_CRMD, from_sys)) { xmlNode *reply = create_request(CRM_OP_INVOKE_LRM, fragment, from_host, from_sys, CRM_SYSTEM_LRMD, fsa_our_uuid); crm_debug("ACK'ing refresh from %s (%s)", from_sys, from_host); if (relay_message(reply, TRUE) == FALSE) { crm_log_xml_err(reply, "Unable to route reply"); } free_xml(reply); } free_xml(fragment); } static void handle_query_op(xmlNode *msg, lrm_state_t *lrm_state) { xmlNode *data = do_lrm_query_internal(lrm_state, node_update_all); xmlNode *reply = create_reply(msg, data); if (relay_message(reply, TRUE) == FALSE) { crm_err("Unable to route reply"); crm_log_xml_err(reply, "reply"); } free_xml(reply); free_xml(data); } static void handle_reprobe_op(lrm_state_t *lrm_state, const char *from_sys, const char *from_host, const char *user_name, gboolean is_remote_node) { crm_notice("Forcing the status of all resources to be redetected"); force_reprobe(lrm_state, from_sys, from_host, user_name, is_remote_node); if (safe_str_neq(CRM_SYSTEM_PENGINE, from_sys) && safe_str_neq(CRM_SYSTEM_TENGINE, from_sys)) { xmlNode *reply = create_request(CRM_OP_INVOKE_LRM, NULL, from_host, from_sys, CRM_SYSTEM_LRMD, fsa_our_uuid); crm_debug("ACK'ing re-probe from %s (%s)", from_sys, from_host); if (relay_message(reply, TRUE) == FALSE) { crm_log_xml_err(reply, "Unable to route reply"); } free_xml(reply); } } static bool do_lrm_cancel(ha_msg_input_t *input, lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, const char *from_host, const char *from_sys) { char *op_key = NULL; char *meta_key = NULL; int call = 0; const char *call_id = NULL; const char *op_task = NULL; const char *interval_ms_s = NULL; gboolean in_progress = FALSE; xmlNode *params = find_xml_node(input->xml, XML_TAG_ATTRS, TRUE); CRM_CHECK(params != NULL, return FALSE); meta_key = crm_meta_name(XML_LRM_ATTR_INTERVAL_MS); interval_ms_s = crm_element_value(params, meta_key); free(meta_key); CRM_CHECK(interval_ms_s != NULL, return FALSE); meta_key = crm_meta_name(XML_LRM_ATTR_TASK); op_task = crm_element_value(params, meta_key); free(meta_key); CRM_CHECK(op_task != NULL, return FALSE); meta_key = crm_meta_name(XML_LRM_ATTR_CALLID); call_id = crm_element_value(params, meta_key); free(meta_key); op_key = generate_op_key(rsc->id, op_task, crm_parse_ms(interval_ms_s)); crm_debug("Scheduler requested op %s (call=%s) be cancelled", op_key, (call_id? call_id : "NA")); call = crm_parse_int(call_id, "0"); if (call == 0) { // Normal case when the scheduler cancels a recurring op in_progress = cancel_op_key(lrm_state, rsc, op_key, TRUE); } else { // Normal case when the scheduler cancels an orphan op in_progress = cancel_op(lrm_state, rsc->id, NULL, call, TRUE); } // Acknowledge cancellation operation if for a remote connection resource if (!in_progress || is_remote_lrmd_ra(NULL, NULL, rsc->id)) { char *op_id = make_stop_id(rsc->id, call); if (is_remote_lrmd_ra(NULL, NULL, rsc->id) == FALSE) { crm_info("Nothing known about operation %d for %s", call, op_key); } erase_lrm_history_by_id(lrm_state, rsc->id, op_key, NULL, call); send_task_ok_ack(lrm_state, input, rsc->id, rsc, op_task, from_host, from_sys); /* needed at least for cancellation of a remote operation */ g_hash_table_remove(lrm_state->pending_ops, op_id); free(op_id); } else { /* No ack is needed since abcdaa8, but peers with older versions * in a rolling upgrade need one. We didn't bump the feature set * at that commit, so we can only compare against the previous * CRM version (3.0.8). If any peers have feature set 3.0.9 but * not abcdaa8, they will time out waiting for the ack (no * released versions of Pacemaker are affected). */ const char *peer_version = crm_element_value(params, XML_ATTR_CRM_VERSION); if (compare_version(peer_version, "3.0.8") <= 0) { crm_info("Sending compatibility ack for %s cancellation to %s (CRM version %s)", op_key, from_host, peer_version); send_task_ok_ack(lrm_state, input, rsc->id, rsc, op_task, from_host, from_sys); } } free(op_key); return TRUE; } static void do_lrm_delete(ha_msg_input_t *input, lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, const char *from_sys, const char *from_host, bool crm_rsc_delete, const char *user_name) { gboolean unregister = TRUE; #if ENABLE_ACL int cib_rc = delete_rsc_status(lrm_state, rsc->id, cib_dryrun|cib_sync_call, user_name); if (cib_rc != pcmk_ok) { lrmd_event_data_t *op = NULL; crm_err("Could not delete resource status of %s for %s (user %s) on %s: %s" CRM_XS " rc=%d", rsc->id, from_sys, (user_name? user_name : "unknown"), from_host, pcmk_strerror(cib_rc), cib_rc); op = construct_op(lrm_state, input->xml, rsc->id, CRMD_ACTION_DELETE); op->op_status = PCMK_LRM_OP_ERROR; if (cib_rc == -EACCES) { op->rc = PCMK_OCF_INSUFFICIENT_PRIV; } else { op->rc = PCMK_OCF_UNKNOWN_ERROR; } send_direct_ack(from_host, from_sys, NULL, op, rsc->id); lrmd_free_event(op); return; } #endif if (crm_rsc_delete && is_remote_lrmd_ra(NULL, NULL, rsc->id)) { unregister = FALSE; } delete_resource(lrm_state, rsc->id, rsc, NULL, from_sys, from_host, user_name, input, unregister); } /* A_LRM_INVOKE */ void do_lrm_invoke(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { lrm_state_t *lrm_state = NULL; const char *crm_op = NULL; const char *from_sys = NULL; const char *from_host = NULL; const char *operation = NULL; ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); const char *user_name = NULL; const char *target_node = NULL; gboolean is_remote_node = FALSE; bool crm_rsc_delete = FALSE; target_node = lrm_op_target(input->xml); is_remote_node = safe_str_neq(target_node, fsa_our_uname); lrm_state = lrm_state_find(target_node); if ((lrm_state == NULL) && is_remote_node) { crm_err("Failing action because local node has never had connection to remote node %s", target_node); synthesize_lrmd_failure(NULL, input->xml, PCMK_OCF_CONNECTION_DIED); return; } CRM_ASSERT(lrm_state != NULL); #if ENABLE_ACL user_name = crm_acl_get_set_user(input->msg, F_CRM_USER, NULL); crm_trace("Executor command from user '%s'", user_name); #endif crm_op = crm_element_value(input->msg, F_CRM_TASK); from_sys = crm_element_value(input->msg, F_CRM_SYS_FROM); if (safe_str_neq(from_sys, CRM_SYSTEM_TENGINE)) { from_host = crm_element_value(input->msg, F_CRM_HOST_FROM); } crm_trace("Executor %s command from %s", crm_op, from_sys); if (safe_str_eq(crm_op, CRM_OP_LRM_DELETE)) { crm_rsc_delete = TRUE; // Only crm_resource uses this op operation = CRMD_ACTION_DELETE; } else if (safe_str_eq(crm_op, CRM_OP_LRM_FAIL)) { fail_lrm_resource(input->xml, lrm_state, user_name, from_host, from_sys); return; } else if (input->xml != NULL) { operation = crm_element_value(input->xml, XML_LRM_ATTR_TASK); } if (safe_str_eq(crm_op, CRM_OP_LRM_REFRESH)) { handle_refresh_op(lrm_state, user_name, from_host, from_sys); } else if (safe_str_eq(crm_op, CRM_OP_LRM_QUERY)) { handle_query_op(input->msg, lrm_state); } else if (safe_str_eq(operation, CRM_OP_PROBED)) { update_attrd(lrm_state->node_name, CRM_OP_PROBED, XML_BOOLEAN_TRUE, user_name, is_remote_node); } else if (safe_str_eq(operation, CRM_OP_REPROBE) || safe_str_eq(crm_op, CRM_OP_REPROBE)) { handle_reprobe_op(lrm_state, from_sys, from_host, user_name, is_remote_node); } else if (operation != NULL) { lrmd_rsc_info_t *rsc = NULL; xmlNode *xml_rsc = find_xml_node(input->xml, XML_CIB_TAG_RESOURCE, TRUE); gboolean create_rsc = safe_str_neq(operation, CRMD_ACTION_DELETE); int rc; // We can't return anything meaningful without a resource ID CRM_CHECK(xml_rsc && ID(xml_rsc), return); rc = get_lrm_resource(lrm_state, xml_rsc, create_rsc, &rsc); if (rc == -ENOTCONN) { synthesize_lrmd_failure(lrm_state, input->xml, PCMK_OCF_CONNECTION_DIED); return; } else if ((rc < 0) && !create_rsc) { /* Delete of malformed or nonexistent resource * (deleting something that does not exist is a success) */ crm_notice("Not registering resource '%s' for a %s event " CRM_XS " get-rc=%d (%s) transition-key=%s", ID(xml_rsc), operation, rc, pcmk_strerror(rc), ID(input->xml)); delete_rsc_entry(lrm_state, input, ID(xml_rsc), NULL, pcmk_ok, user_name); send_task_ok_ack(lrm_state, input, ID(xml_rsc), NULL, operation, from_host, from_sys); return; } else if (rc == -EINVAL) { // Resource operation on malformed resource crm_err("Invalid resource definition for %s", ID(xml_rsc)); crm_log_xml_warn(input->msg, "invalid resource"); synthesize_lrmd_failure(lrm_state, input->xml, PCMK_OCF_NOT_CONFIGURED); // fatal error return; } else if (rc < 0) { // Error communicating with the executor crm_err("Could not register resource '%s' with executor: %s " CRM_XS " rc=%d", ID(xml_rsc), pcmk_strerror(rc), rc); crm_log_xml_warn(input->msg, "failed registration"); synthesize_lrmd_failure(lrm_state, input->xml, PCMK_OCF_INVALID_PARAM); // hard error return; } if (safe_str_eq(operation, CRMD_ACTION_CANCEL)) { if (!do_lrm_cancel(input, lrm_state, rsc, from_host, from_sys)) { crm_log_xml_warn(input->xml, "Bad command"); } } else if (safe_str_eq(operation, CRMD_ACTION_DELETE)) { do_lrm_delete(input, lrm_state, rsc, from_sys, from_host, crm_rsc_delete, user_name); } else { do_lrm_rsc_op(lrm_state, rsc, operation, input->xml, input->msg); } lrmd_free_rsc_info(rsc); } else { crm_err("Cannot perform operation %s of unknown type", crm_str(crm_op)); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } static lrmd_event_data_t * construct_op(lrm_state_t * lrm_state, xmlNode * rsc_op, const char *rsc_id, const char *operation) { lrmd_event_data_t *op = NULL; const char *op_delay = NULL; const char *op_timeout = NULL; const char *interval_ms_s = NULL; GHashTable *params = NULL; const char *transition = NULL; CRM_ASSERT(rsc_id && operation); op = calloc(1, sizeof(lrmd_event_data_t)); CRM_ASSERT(op != NULL); op->type = lrmd_event_exec_complete; op->op_type = strdup(operation); op->op_status = PCMK_LRM_OP_PENDING; op->rc = -1; op->rsc_id = strdup(rsc_id); op->interval_ms = 0; op->timeout = 0; op->start_delay = 0; if (rsc_op == NULL) { CRM_LOG_ASSERT(safe_str_eq(CRMD_ACTION_STOP, operation)); op->user_data = NULL; /* the stop_all_resources() case * by definition there is no DC (or they'd be shutting * us down). * So we should put our version here. */ op->params = crm_str_table_new(); g_hash_table_insert(op->params, strdup(XML_ATTR_CRM_VERSION), strdup(CRM_FEATURE_SET)); crm_trace("Constructed %s op for %s", operation, rsc_id); return op; } params = xml2list(rsc_op); g_hash_table_remove(params, CRM_META "_op_target_rc"); op_delay = crm_meta_value(params, XML_OP_ATTR_START_DELAY); op_timeout = crm_meta_value(params, XML_ATTR_TIMEOUT); interval_ms_s = crm_meta_value(params, XML_LRM_ATTR_INTERVAL_MS); op->interval_ms = crm_parse_ms(interval_ms_s); op->timeout = crm_parse_int(op_timeout, "0"); op->start_delay = crm_parse_int(op_delay, "0"); #if ENABLE_VERSIONED_ATTRS // Resolve any versioned parameters if (lrm_state && safe_str_neq(op->op_type, RSC_METADATA) && safe_str_neq(op->op_type, CRMD_ACTION_DELETE) && !is_remote_lrmd_ra(NULL, NULL, rsc_id)) { // Resource info *should* already be cached, so we don't get executor call lrmd_rsc_info_t *rsc = lrm_state_get_rsc_info(lrm_state, rsc_id, 0); struct ra_metadata_s *metadata; metadata = metadata_cache_get(lrm_state->metadata_cache, rsc); if (metadata) { xmlNode *versioned_attrs = NULL; GHashTable *hash = NULL; char *key = NULL; char *value = NULL; GHashTableIter iter; versioned_attrs = first_named_child(rsc_op, XML_TAG_OP_VER_ATTRS); hash = pe_unpack_versioned_parameters(versioned_attrs, metadata->ra_version); g_hash_table_iter_init(&iter, hash); while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &value)) { g_hash_table_iter_steal(&iter); g_hash_table_replace(params, key, value); } g_hash_table_destroy(hash); versioned_attrs = first_named_child(rsc_op, XML_TAG_OP_VER_META); hash = pe_unpack_versioned_parameters(versioned_attrs, metadata->ra_version); g_hash_table_iter_init(&iter, hash); while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &value)) { g_hash_table_replace(params, crm_meta_name(key), strdup(value)); if (safe_str_eq(key, XML_ATTR_TIMEOUT)) { op->timeout = crm_parse_int(value, "0"); } else if (safe_str_eq(key, XML_OP_ATTR_START_DELAY)) { op->start_delay = crm_parse_int(value, "0"); } } g_hash_table_destroy(hash); versioned_attrs = first_named_child(rsc_op, XML_TAG_RSC_VER_ATTRS); hash = pe_unpack_versioned_parameters(versioned_attrs, metadata->ra_version); g_hash_table_iter_init(&iter, hash); while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &value)) { g_hash_table_iter_steal(&iter); g_hash_table_replace(params, key, value); } g_hash_table_destroy(hash); } lrmd_free_rsc_info(rsc); } #endif if (safe_str_neq(operation, RSC_STOP)) { op->params = params; } else { rsc_history_t *entry = NULL; if (lrm_state) { entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); } /* If we do not have stop parameters cached, use * whatever we are given */ if (!entry || !entry->stop_params) { op->params = params; } else { /* Copy the cached parameter list so that we stop the resource * with the old attributes, not the new ones */ op->params = crm_str_table_new(); g_hash_table_foreach(params, copy_meta_keys, op->params); g_hash_table_foreach(entry->stop_params, copy_instance_keys, op->params); g_hash_table_destroy(params); params = NULL; } } /* sanity */ if (op->timeout <= 0) { op->timeout = op->interval_ms; } if (op->start_delay < 0) { op->start_delay = 0; } transition = crm_element_value(rsc_op, XML_ATTR_TRANSITION_KEY); CRM_CHECK(transition != NULL, return op); op->user_data = strdup(transition); if (op->interval_ms != 0) { if (safe_str_eq(operation, CRMD_ACTION_START) || safe_str_eq(operation, CRMD_ACTION_STOP)) { crm_err("Start and Stop actions cannot have an interval: %u", op->interval_ms); op->interval_ms = 0; } } crm_trace("Constructed %s op for %s: interval=%u", operation, rsc_id, op->interval_ms); return op; } void send_direct_ack(const char *to_host, const char *to_sys, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op, const char *rsc_id) { xmlNode *reply = NULL; xmlNode *update, *iter; crm_node_t *peer = NULL; CRM_CHECK(op != NULL, return); if (op->rsc_id == NULL) { CRM_ASSERT(rsc_id != NULL); op->rsc_id = strdup(rsc_id); } if (to_sys == NULL) { to_sys = CRM_SYSTEM_TENGINE; } peer = crm_get_peer(0, fsa_our_uname); update = create_node_state_update(peer, node_update_none, NULL, __FUNCTION__); iter = create_xml_node(update, XML_CIB_TAG_LRM); crm_xml_add(iter, XML_ATTR_ID, fsa_our_uuid); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE); crm_xml_add(iter, XML_ATTR_ID, op->rsc_id); build_operation_update(iter, rsc, op, fsa_our_uname, __FUNCTION__); reply = create_request(CRM_OP_INVOKE_LRM, update, to_host, to_sys, CRM_SYSTEM_LRMD, NULL); crm_log_xml_trace(update, "ACK Update"); crm_debug("ACK'ing resource op " CRM_OP_FMT " from %s: %s", op->rsc_id, op->op_type, op->interval_ms, op->user_data, crm_element_value(reply, XML_ATTR_REFERENCE)); if (relay_message(reply, TRUE) == FALSE) { crm_log_xml_err(reply, "Unable to route reply"); } free_xml(update); free_xml(reply); } gboolean verify_stopped(enum crmd_fsa_state cur_state, int log_level) { gboolean res = TRUE; GList *lrm_state_list = lrm_state_get_list(); GList *state_entry; for (state_entry = lrm_state_list; state_entry != NULL; state_entry = state_entry->next) { lrm_state_t *lrm_state = state_entry->data; if (!lrm_state_verify_stopped(lrm_state, cur_state, log_level)) { /* keep iterating through all even when false is returned */ res = FALSE; } } set_bit(fsa_input_register, R_SENT_RSC_STOP); g_list_free(lrm_state_list); lrm_state_list = NULL; return res; } struct stop_recurring_action_s { lrmd_rsc_info_t *rsc; lrm_state_t *lrm_state; }; static gboolean stop_recurring_action_by_rsc(gpointer key, gpointer value, gpointer user_data) { gboolean remove = FALSE; struct stop_recurring_action_s *event = user_data; struct recurring_op_s *op = (struct recurring_op_s *)value; if ((op->interval_ms != 0) && crm_str_eq(op->rsc_id, event->rsc->id, TRUE)) { crm_debug("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id, (char*)key); remove = !cancel_op(event->lrm_state, event->rsc->id, key, op->call_id, FALSE); } return remove; } static gboolean stop_recurring_actions(gpointer key, gpointer value, gpointer user_data) { gboolean remove = FALSE; lrm_state_t *lrm_state = user_data; struct recurring_op_s *op = (struct recurring_op_s *)value; if (op->interval_ms != 0) { crm_info("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id, (const char *) key); remove = !cancel_op(lrm_state, op->rsc_id, key, op->call_id, FALSE); } return remove; } static void record_pending_op(const char *node_name, lrmd_rsc_info_t *rsc, lrmd_event_data_t *op) { const char *record_pending = NULL; CRM_CHECK(node_name != NULL, return); CRM_CHECK(rsc != NULL, return); CRM_CHECK(op != NULL, return); + // Never record certain operation types as pending if ((op->op_type == NULL) || (op->params == NULL) - || safe_str_eq(op->op_type, CRMD_ACTION_CANCEL) - || safe_str_eq(op->op_type, CRMD_ACTION_DELETE)) { + || !controld_action_is_recordable(op->op_type)) { return; } // defaults to true record_pending = crm_meta_value(op->params, XML_OP_ATTR_PENDING); if (record_pending && !crm_is_true(record_pending)) { return; } op->call_id = -1; op->op_status = PCMK_LRM_OP_PENDING; op->rc = PCMK_OCF_UNKNOWN; op->t_run = time(NULL); op->t_rcchange = op->t_run; /* write a "pending" entry to the CIB, inhibit notification */ crm_debug("Recording pending op " CRM_OP_FMT " on %s in the CIB", op->rsc_id, op->op_type, op->interval_ms, node_name); do_update_resource(node_name, rsc, op); } static void do_lrm_rsc_op(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *operation, xmlNode * msg, xmlNode * request) { int call_id = 0; char *op_id = NULL; lrmd_event_data_t *op = NULL; lrmd_key_value_t *params = NULL; fsa_data_t *msg_data = NULL; const char *transition = NULL; gboolean stop_recurring = FALSE; bool send_nack = FALSE; CRM_CHECK(rsc != NULL, return); CRM_CHECK(operation != NULL, return); if (msg != NULL) { transition = crm_element_value(msg, XML_ATTR_TRANSITION_KEY); if (transition == NULL) { crm_log_xml_err(msg, "Missing transition number"); } } op = construct_op(lrm_state, msg, rsc->id, operation); CRM_CHECK(op != NULL, return); if (is_remote_lrmd_ra(NULL, NULL, rsc->id) && (op->interval_ms == 0) && strcmp(operation, CRMD_ACTION_MIGRATE) == 0) { /* pcmk remote connections are a special use case. * We never ever want to stop monitoring a connection resource until * the entire migration has completed. If the connection is unexpectedly * severed, even during a migration, this is an event we must detect.*/ stop_recurring = FALSE; } else if ((op->interval_ms == 0) && strcmp(operation, CRMD_ACTION_STATUS) != 0 && strcmp(operation, CRMD_ACTION_NOTIFY) != 0) { /* stop any previous monitor operations before changing the resource state */ stop_recurring = TRUE; } if (stop_recurring == TRUE) { guint removed = 0; struct stop_recurring_action_s data; data.rsc = rsc; data.lrm_state = lrm_state; removed = g_hash_table_foreach_remove( lrm_state->pending_ops, stop_recurring_action_by_rsc, &data); if (removed) { crm_debug("Stopped %u recurring operation%s in preparation for " CRM_OP_FMT, removed, s_if_plural(removed), rsc->id, operation, op->interval_ms); } } /* now do the op */ crm_info("Performing key=%s op=" CRM_OP_FMT, transition, rsc->id, operation, op->interval_ms); if (is_set(fsa_input_register, R_SHUTDOWN) && safe_str_eq(operation, RSC_START)) { register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL); send_nack = TRUE; } else if (fsa_state != S_NOT_DC && fsa_state != S_POLICY_ENGINE /* Recalculating */ && fsa_state != S_TRANSITION_ENGINE && safe_str_neq(operation, CRMD_ACTION_STOP)) { send_nack = TRUE; } if(send_nack) { crm_notice("Discarding attempt to perform action %s on %s in state %s (shutdown=%s)", operation, rsc->id, fsa_state2string(fsa_state), is_set(fsa_input_register, R_SHUTDOWN)?"true":"false"); op->rc = CRM_DIRECT_NACK_RC; op->op_status = PCMK_LRM_OP_ERROR; send_direct_ack(NULL, NULL, rsc, op, rsc->id); lrmd_free_event(op); free(op_id); return; } record_pending_op(lrm_state->node_name, rsc, op); op_id = generate_op_key(rsc->id, op->op_type, op->interval_ms); if (op->interval_ms > 0) { /* cancel it so we can then restart it without conflict */ cancel_op_key(lrm_state, rsc, op_id, FALSE); } if (op->params) { char *key = NULL; char *value = NULL; GHashTableIter iter; g_hash_table_iter_init(&iter, op->params); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) { params = lrmd_key_value_add(params, key, value); } } call_id = lrm_state_exec(lrm_state, rsc->id, op->op_type, op->user_data, op->interval_ms, op->timeout, op->start_delay, params); if (call_id <= 0 && lrm_state_is_local(lrm_state)) { crm_err("Operation %s on %s failed: %d", operation, rsc->id, call_id); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } else if (call_id <= 0) { crm_err("Operation %s on resource %s failed to execute on remote node %s: %d", operation, rsc->id, lrm_state->node_name, call_id); fake_op_status(lrm_state, op, PCMK_LRM_OP_DONE, PCMK_OCF_UNKNOWN_ERROR); process_lrm_event(lrm_state, op, NULL); } else { /* record all operations so we can wait * for them to complete during shutdown */ char *call_id_s = make_stop_id(rsc->id, call_id); struct recurring_op_s *pending = NULL; pending = calloc(1, sizeof(struct recurring_op_s)); crm_trace("Recording pending op: %d - %s %s", call_id, op_id, call_id_s); pending->call_id = call_id; pending->interval_ms = op->interval_ms; pending->op_type = strdup(operation); pending->op_key = strdup(op_id); pending->rsc_id = strdup(rsc->id); pending->start_time = time(NULL); pending->user_data = strdup(op->user_data); g_hash_table_replace(lrm_state->pending_ops, call_id_s, pending); if ((op->interval_ms > 0) && (op->start_delay > START_DELAY_THRESHOLD)) { char *uuid = NULL; int dummy = 0, target_rc = 0; crm_info("Faking confirmation of %s: execution postponed for over 5 minutes", op_id); decode_transition_key(op->user_data, &uuid, &dummy, &dummy, &target_rc); free(uuid); op->rc = target_rc; op->op_status = PCMK_LRM_OP_DONE; send_direct_ack(NULL, NULL, rsc, op, rsc->id); } pending->params = op->params; op->params = NULL; } free(op_id); lrmd_free_event(op); return; } int last_resource_update = 0; static void cib_rsc_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { switch (rc) { case pcmk_ok: case -pcmk_err_diff_failed: case -pcmk_err_diff_resync: crm_trace("Resource update %d complete: rc=%d", call_id, rc); break; default: crm_warn("Resource update %d failed: (rc=%d) %s", call_id, rc, pcmk_strerror(rc)); } if (call_id == last_resource_update) { last_resource_update = 0; trigger_fsa(fsa_source); } } static int do_update_resource(const char *node_name, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op) { /* */ int rc = pcmk_ok; xmlNode *update, *iter = NULL; int call_opt = crmd_cib_smart_opt(); const char *uuid = NULL; CRM_CHECK(op != NULL, return 0); iter = create_xml_node(iter, XML_CIB_TAG_STATUS); update = iter; iter = create_xml_node(iter, XML_CIB_TAG_STATE); if (safe_str_eq(node_name, fsa_our_uname)) { uuid = fsa_our_uuid; } else { /* remote nodes uuid and uname are equal */ uuid = node_name; crm_xml_add(iter, XML_NODE_IS_REMOTE, "true"); } CRM_LOG_ASSERT(uuid != NULL); if(uuid == NULL) { rc = -EINVAL; goto done; } crm_xml_add(iter, XML_ATTR_UUID, uuid); crm_xml_add(iter, XML_ATTR_UNAME, node_name); crm_xml_add(iter, XML_ATTR_ORIGIN, __FUNCTION__); iter = create_xml_node(iter, XML_CIB_TAG_LRM); crm_xml_add(iter, XML_ATTR_ID, uuid); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE); crm_xml_add(iter, XML_ATTR_ID, op->rsc_id); build_operation_update(iter, rsc, op, node_name, __FUNCTION__); if (rsc) { const char *container = NULL; crm_xml_add(iter, XML_ATTR_TYPE, rsc->type); crm_xml_add(iter, XML_AGENT_ATTR_CLASS, rsc->standard); crm_xml_add(iter, XML_AGENT_ATTR_PROVIDER, rsc->provider); if (op->params) { container = g_hash_table_lookup(op->params, CRM_META"_"XML_RSC_ATTR_CONTAINER); } if (container) { crm_trace("Resource %s is a part of container resource %s", op->rsc_id, container); crm_xml_add(iter, XML_RSC_ATTR_CONTAINER, container); } } else { crm_warn("Resource %s no longer exists in the executor", op->rsc_id); send_direct_ack(NULL, NULL, rsc, op, op->rsc_id); goto cleanup; } crm_log_xml_trace(update, __FUNCTION__); /* make it an asynchronous call and be done with it * * Best case: * the resource state will be discovered during * the next signup or election. * * Bad case: * we are shutting down and there is no DC at the time, * but then why were we shutting down then anyway? * (probably because of an internal error) * * Worst case: * we get shot for having resources "running" that really weren't * * the alternative however means blocking here for too long, which * isn't acceptable */ fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, rc, NULL); if (rc > 0) { last_resource_update = rc; } done: /* the return code is a call number, not an error code */ crm_trace("Sent resource state update message: %d for %s=%u on %s", rc, op->op_type, op->interval_ms, op->rsc_id); fsa_register_cib_callback(rc, FALSE, NULL, cib_rsc_callback); cleanup: free_xml(update); return rc; } void do_lrm_event(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t * msg_data) { CRM_CHECK(FALSE, return); } static char * unescape_newlines(const char *string) { char *pch = NULL; char *ret = NULL; static const char *escaped_newline = "\\n"; if (!string) { return NULL; } ret = strdup(string); pch = strstr(ret, escaped_newline); while (pch != NULL) { /* 2 chars for 2 chars, null-termination irrelevant */ memcpy(pch, "\n ", 2 * sizeof(char)); pch = strstr(pch, escaped_newline); } return ret; } gboolean process_lrm_event(lrm_state_t * lrm_state, lrmd_event_data_t * op, struct recurring_op_s *pending) { char *op_id = NULL; char *op_key = NULL; int update_id = 0; gboolean remove = FALSE; gboolean removed = FALSE; lrmd_rsc_info_t *rsc = NULL; CRM_CHECK(op != NULL, return FALSE); CRM_CHECK(op->rsc_id != NULL, return FALSE); op_id = make_stop_id(op->rsc_id, op->call_id); op_key = generate_op_key(op->rsc_id, op->op_type, op->interval_ms); rsc = lrm_state_get_rsc_info(lrm_state, op->rsc_id, 0); if(pending == NULL) { remove = TRUE; pending = g_hash_table_lookup(lrm_state->pending_ops, op_id); } if (op->op_status == PCMK_LRM_OP_ERROR) { switch(op->rc) { case PCMK_OCF_NOT_RUNNING: case PCMK_OCF_RUNNING_MASTER: case PCMK_OCF_DEGRADED: case PCMK_OCF_DEGRADED_MASTER: // Leave it to the TE/scheduler to decide if this is an error op->op_status = PCMK_LRM_OP_DONE; break; default: /* Nothing to do */ break; } } if (op->op_status != PCMK_LRM_OP_CANCELLED) { - if (safe_str_eq(op->op_type, RSC_NOTIFY) || safe_str_eq(op->op_type, RSC_METADATA)) { - /* Keep notify and meta-data ops out of the CIB */ - send_direct_ack(NULL, NULL, NULL, op, op->rsc_id); - } else { + if (controld_action_is_recordable(op->op_type)) { update_id = do_update_resource(lrm_state->node_name, rsc, op); + } else { + send_direct_ack(NULL, NULL, NULL, op, op->rsc_id); } } else if (op->interval_ms == 0) { /* This will occur when "crm resource cleanup" is called while actions are in-flight */ crm_err("Op %s (call=%d): Cancelled", op_key, op->call_id); send_direct_ack(NULL, NULL, NULL, op, op->rsc_id); } else if (pending == NULL) { /* We don't need to do anything for cancelled ops * that are not in our pending op list. There are no * transition actions waiting on these operations. */ } else if (op->user_data == NULL) { /* At this point we have a pending entry, but no transition * key present in the user_data field. report this */ crm_err("Op %s (call=%d): No user data", op_key, op->call_id); } else if (pending->remove) { /* The tengine canceled this op, we have been waiting for the cancel to finish. */ erase_lrm_history_by_op(lrm_state, op); } else if (op->rsc_deleted) { /* The tengine initiated this op, but it was cancelled outside of the * tengine's control during a resource cleanup/re-probe request. The tengine * must be alerted that this operation completed, otherwise the tengine * will continue waiting for this update to occur until it is timed out. * We don't want this update going to the cib though, so use a direct ack. */ crm_trace("Op %s (call=%d): cancelled due to rsc deletion", op_key, op->call_id); send_direct_ack(NULL, NULL, NULL, op, op->rsc_id); } else { /* Before a stop is called, no need to direct ack */ crm_trace("Op %s (call=%d): no delete event required", op_key, op->call_id); } if(remove == FALSE) { /* The caller will do this afterwards, but keep the logging consistent */ removed = TRUE; } else if ((op->interval_ms == 0) && g_hash_table_remove(lrm_state->pending_ops, op_id)) { removed = TRUE; crm_trace("Op %s (call=%d, stop-id=%s, remaining=%u): Confirmed", op_key, op->call_id, op_id, g_hash_table_size(lrm_state->pending_ops)); } else if ((op->interval_ms != 0) && (op->op_status == PCMK_LRM_OP_CANCELLED)) { removed = TRUE; g_hash_table_remove(lrm_state->pending_ops, op_id); } switch (op->op_status) { case PCMK_LRM_OP_CANCELLED: crm_info("Result of %s operation for %s on %s: %s " CRM_XS " call=%d key=%s confirmed=%s", crm_action_str(op->op_type, op->interval_ms), op->rsc_id, lrm_state->node_name, services_lrm_status_str(op->op_status), op->call_id, op_key, (removed? "true" : "false")); break; case PCMK_LRM_OP_DONE: do_crm_log((op->interval_ms? LOG_INFO : LOG_NOTICE), "Result of %s operation for %s on %s: %d (%s) " CRM_XS " call=%d key=%s confirmed=%s cib-update=%d", crm_action_str(op->op_type, op->interval_ms), op->rsc_id, lrm_state->node_name, op->rc, services_ocf_exitcode_str(op->rc), op->call_id, op_key, (removed? "true" : "false"), update_id); break; case PCMK_LRM_OP_TIMEOUT: crm_err("Result of %s operation for %s on %s: %s " CRM_XS " call=%d key=%s timeout=%dms", crm_action_str(op->op_type, op->interval_ms), op->rsc_id, lrm_state->node_name, services_lrm_status_str(op->op_status), op->call_id, op_key, op->timeout); break; default: crm_err("Result of %s operation for %s on %s: %s " CRM_XS " call=%d key=%s confirmed=%s status=%d cib-update=%d", crm_action_str(op->op_type, op->interval_ms), op->rsc_id, lrm_state->node_name, services_lrm_status_str(op->op_status), op->call_id, op_key, (removed? "true" : "false"), op->op_status, update_id); } if (op->output) { char *prefix = crm_strdup_printf("%s-" CRM_OP_FMT ":%d", lrm_state->node_name, op->rsc_id, op->op_type, op->interval_ms, op->call_id); if (op->rc) { crm_log_output(LOG_NOTICE, prefix, op->output); } else { crm_log_output(LOG_DEBUG, prefix, op->output); } free(prefix); } if (safe_str_neq(op->op_type, RSC_METADATA)) { crmd_alert_resource_op(lrm_state->node_name, op); } else if (op->rc == PCMK_OCF_OK) { char *metadata = unescape_newlines(op->output); metadata_cache_update(lrm_state->metadata_cache, rsc, metadata); free(metadata); } if (op->rsc_deleted) { crm_info("Deletion of resource '%s' complete after %s", op->rsc_id, op_key); delete_rsc_entry(lrm_state, NULL, op->rsc_id, NULL, pcmk_ok, NULL); } /* If a shutdown was escalated while operations were pending, * then the FSA will be stalled right now... allow it to continue */ mainloop_set_trigger(fsa_source); update_history_cache(lrm_state, rsc, op); lrmd_free_rsc_info(rsc); free(op_key); free(op_id); return TRUE; } diff --git a/daemons/controld/controld_te_actions.c b/daemons/controld/controld_te_actions.c index 26bfd41fd7..35b20dec5b 100644 --- a/daemons/controld/controld_te_actions.c +++ b/daemons/controld/controld_te_actions.c @@ -1,755 +1,748 @@ /* * Copyright 2004-2018 Andrew Beekhof * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include char *te_uuid = NULL; GHashTable *te_targets = NULL; void send_rsc_command(crm_action_t * action); static void te_update_job_count(crm_action_t * action, int offset); static void te_start_action_timer(crm_graph_t * graph, crm_action_t * action) { action->timer = calloc(1, sizeof(crm_action_timer_t)); action->timer->timeout = action->timeout; action->timer->action = action; action->timer->source_id = g_timeout_add(action->timer->timeout + graph->network_delay, action_timer_callback, (void *)action->timer); CRM_ASSERT(action->timer->source_id != 0); } static gboolean te_pseudo_action(crm_graph_t * graph, crm_action_t * pseudo) { const char *task = crm_element_value(pseudo->xml, XML_LRM_ATTR_TASK); /* send to peers as well? */ if (safe_str_eq(task, CRM_OP_MAINTENANCE_NODES)) { GHashTableIter iter; crm_node_t *node = NULL; g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { xmlNode *cmd = NULL; if (safe_str_eq(fsa_our_uname, node->uname)) { continue; } cmd = create_request(task, pseudo->xml, node->uname, CRM_SYSTEM_CRMD, CRM_SYSTEM_TENGINE, NULL); send_cluster_message(node, crm_msg_crmd, cmd, FALSE); free_xml(cmd); } remote_ra_process_maintenance_nodes(pseudo->xml); } else { /* Check action for Pacemaker Remote node side effects */ remote_ra_process_pseudo(pseudo->xml); } crm_debug("Pseudo-action %d (%s) fired and confirmed", pseudo->id, crm_element_value(pseudo->xml, XML_LRM_ATTR_TASK_KEY)); te_action_confirmed(pseudo); update_graph(graph, pseudo); trigger_graph(); return TRUE; } void send_stonith_update(crm_action_t * action, const char *target, const char *uuid) { int rc = pcmk_ok; crm_node_t *peer = NULL; /* We (usually) rely on the membership layer to do node_update_cluster, * and the peer status callback to do node_update_peer, because the node * might have already rejoined before we get the stonith result here. */ int flags = node_update_join | node_update_expected; /* zero out the node-status & remove all LRM status info */ xmlNode *node_state = NULL; CRM_CHECK(target != NULL, return); CRM_CHECK(uuid != NULL, return); /* Make sure the membership and join caches are accurate */ peer = crm_get_peer_full(0, target, CRM_GET_PEER_ANY); CRM_CHECK(peer != NULL, return); if (peer->state == NULL) { /* Usually, we rely on the membership layer to update the cluster state * in the CIB. However, if the node has never been seen, do it here, so * the node is not considered unclean. */ flags |= node_update_cluster; } if (peer->uuid == NULL) { crm_info("Recording uuid '%s' for node '%s'", uuid, target); peer->uuid = strdup(uuid); } crmd_peer_down(peer, TRUE); /* Generate a node state update for the CIB */ node_state = create_node_state_update(peer, flags, NULL, __FUNCTION__); /* we have to mark whether or not remote nodes have already been fenced */ if (peer->flags & crm_remote_node) { time_t now = time(NULL); char *now_s = crm_itoa(now); crm_xml_add(node_state, XML_NODE_IS_FENCED, now_s); free(now_s); } /* Force our known ID */ crm_xml_add(node_state, XML_ATTR_UUID, uuid); rc = fsa_cib_conn->cmds->update(fsa_cib_conn, XML_CIB_TAG_STATUS, node_state, cib_quorum_override | cib_scope_local | cib_can_create); /* Delay processing the trigger until the update completes */ crm_debug("Sending fencing update %d for %s", rc, target); fsa_register_cib_callback(rc, FALSE, strdup(target), cib_fencing_updated); /* Make sure it sticks */ /* fsa_cib_conn->cmds->bump_epoch(fsa_cib_conn, cib_quorum_override|cib_scope_local); */ erase_status_tag(peer->uname, XML_CIB_TAG_LRM, cib_scope_local); erase_status_tag(peer->uname, XML_TAG_TRANSIENT_NODEATTRS, cib_scope_local); free_xml(node_state); return; } static gboolean te_fence_node(crm_graph_t * graph, crm_action_t * action) { int rc = 0; const char *id = NULL; const char *uuid = NULL; const char *target = NULL; const char *type = NULL; gboolean invalid_action = FALSE; enum stonith_call_options options = st_opt_none; id = ID(action->xml); target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID); type = crm_meta_value(action->params, "stonith_action"); CRM_CHECK(id != NULL, invalid_action = TRUE); CRM_CHECK(uuid != NULL, invalid_action = TRUE); CRM_CHECK(type != NULL, invalid_action = TRUE); CRM_CHECK(target != NULL, invalid_action = TRUE); if (invalid_action) { crm_log_xml_warn(action->xml, "BadAction"); return FALSE; } crm_notice("Requesting fencing (%s) of node %s " CRM_XS " action=%s timeout=%d", type, target, id, transition_graph->stonith_timeout); /* Passing NULL means block until we can connect... */ te_connect_stonith(NULL); if (crmd_join_phase_count(crm_join_confirmed) == 1) { options |= st_opt_allow_suicide; } rc = stonith_api->cmds->fence(stonith_api, options, target, type, transition_graph->stonith_timeout / 1000, 0); stonith_api->cmds->register_callback(stonith_api, rc, transition_graph->stonith_timeout / 1000, st_opt_timeout_updates, generate_transition_key(transition_graph->id, action->id, 0, te_uuid), "tengine_stonith_callback", tengine_stonith_callback); return TRUE; } static int get_target_rc(crm_action_t * action) { const char *target_rc_s = crm_meta_value(action->params, XML_ATTR_TE_TARGET_RC); if (target_rc_s != NULL) { return crm_parse_int(target_rc_s, "0"); } return 0; } static gboolean te_crm_command(crm_graph_t * graph, crm_action_t * action) { char *counter = NULL; xmlNode *cmd = NULL; gboolean is_local = FALSE; const char *id = NULL; const char *task = NULL; const char *value = NULL; const char *on_node = NULL; const char *router_node = NULL; gboolean rc = TRUE; gboolean no_wait = FALSE; id = ID(action->xml); task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); router_node = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE); if (!router_node) { router_node = on_node; } CRM_CHECK(on_node != NULL && strlen(on_node) != 0, crm_err("Corrupted command (id=%s) %s: no node", crm_str(id), crm_str(task)); return FALSE); crm_info("Executing crm-event (%s): %s on %s%s%s", crm_str(id), crm_str(task), on_node, is_local ? " (local)" : "", no_wait ? " - no waiting" : ""); if (safe_str_eq(router_node, fsa_our_uname)) { is_local = TRUE; } value = crm_meta_value(action->params, XML_ATTR_TE_NOWAIT); if (crm_is_true(value)) { no_wait = TRUE; } if (is_local && safe_str_eq(task, CRM_OP_SHUTDOWN)) { /* defer until everything else completes */ crm_info("crm-event (%s) is a local shutdown", crm_str(id)); graph->completion_action = tg_shutdown; graph->abort_reason = "local shutdown"; te_action_confirmed(action); update_graph(graph, action); trigger_graph(); return TRUE; } else if (safe_str_eq(task, CRM_OP_SHUTDOWN)) { crm_node_t *peer = crm_get_peer(0, router_node); crm_update_peer_expected(__FUNCTION__, peer, CRMD_JOINSTATE_DOWN); } cmd = create_request(task, action->xml, router_node, CRM_SYSTEM_CRMD, CRM_SYSTEM_TENGINE, NULL); counter = generate_transition_key(transition_graph->id, action->id, get_target_rc(action), te_uuid); crm_xml_add(cmd, XML_ATTR_TRANSITION_KEY, counter); rc = send_cluster_message(crm_get_peer(0, router_node), crm_msg_crmd, cmd, TRUE); free(counter); free_xml(cmd); if (rc == FALSE) { crm_err("Action %d failed: send", action->id); return FALSE; } else if (no_wait) { te_action_confirmed(action); update_graph(graph, action); trigger_graph(); } else { if (action->timeout <= 0) { crm_err("Action %d: %s on %s had an invalid timeout (%dms). Using %dms instead", action->id, task, on_node, action->timeout, graph->network_delay); action->timeout = graph->network_delay; } te_start_action_timer(graph, action); } return TRUE; } -gboolean -cib_action_update(crm_action_t * action, int status, int op_rc) +void +controld_record_action_timeout(crm_action_t *action) { lrmd_event_data_t *op = NULL; xmlNode *state = NULL; xmlNode *rsc = NULL; xmlNode *xml_op = NULL; xmlNode *action_rsc = NULL; int rc = pcmk_ok; const char *rsc_id = NULL; - const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); const char *task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY); const char *target_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID); int call_options = cib_quorum_override | cib_scope_local; int target_rc = get_target_rc(action); - if (status == PCMK_LRM_OP_PENDING) { - crm_debug("%s %d: Recording pending operation %s on %s", - crm_element_name(action->xml), action->id, task_uuid, target); - } else { - crm_warn("%s %d: %s on %s timed out", - crm_element_name(action->xml), action->id, task_uuid, target); - } + crm_warn("%s %d: %s on %s timed out", + crm_element_name(action->xml), action->id, task_uuid, target); action_rsc = find_xml_node(action->xml, XML_CIB_TAG_RESOURCE, TRUE); if (action_rsc == NULL) { - return FALSE; + return; } rsc_id = ID(action_rsc); - CRM_CHECK(rsc_id != NULL, crm_log_xml_err(action->xml, "Bad:action"); - return FALSE); + CRM_CHECK(rsc_id != NULL, + crm_log_xml_err(action->xml, "Bad:action"); return); /* update the CIB */ state = create_xml_node(NULL, XML_CIB_TAG_STATE); crm_xml_add(state, XML_ATTR_UUID, target_uuid); crm_xml_add(state, XML_ATTR_UNAME, target); rsc = create_xml_node(state, XML_CIB_TAG_LRM); crm_xml_add(rsc, XML_ATTR_ID, target_uuid); rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCES); rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCE); crm_xml_add(rsc, XML_ATTR_ID, rsc_id); crm_copy_xml_element(action_rsc, rsc, XML_ATTR_TYPE); crm_copy_xml_element(action_rsc, rsc, XML_AGENT_ATTR_CLASS); crm_copy_xml_element(action_rsc, rsc, XML_AGENT_ATTR_PROVIDER); - op = convert_graph_action(NULL, action, status, op_rc); + /* If the executor gets a timeout while waiting for the action to complete, + * that will be reported via the usual callback. This timeout means that we + * didn't hear from the executor or the controller that relayed the action + * to the executor. + * + * @TODO Using PCMK_OCF_UNKNOWN_ERROR instead of PCMK_OCF_TIMEOUT is one way + * to distinguish those situations, but perhaps PCMK_OCF_TIMEOUT would be + * preferable anyway. + */ + op = convert_graph_action(NULL, action, PCMK_LRM_OP_TIMEOUT, + PCMK_OCF_UNKNOWN_ERROR); op->call_id = -1; op->user_data = generate_transition_key(transition_graph->id, action->id, target_rc, te_uuid); xml_op = create_operation_update(rsc, op, CRM_FEATURE_SET, target_rc, target, __FUNCTION__, LOG_INFO); lrmd_free_event(op); - crm_trace("Updating CIB with \"%s\" (%s): %s %s on %s", - status < 0 ? "new action" : XML_ATTR_TIMEOUT, - crm_element_name(action->xml), crm_str(task), rsc_id, target); - crm_log_xml_trace(xml_op, "Op"); + crm_log_xml_trace(xml_op, "Action timeout"); rc = fsa_cib_conn->cmds->update(fsa_cib_conn, XML_CIB_TAG_STATUS, state, call_options); - - crm_trace("Updating CIB with %s action %d: %s on %s (call_id=%d)", - services_lrm_status_str(status), action->id, task_uuid, target, rc); - fsa_register_cib_callback(rc, FALSE, NULL, cib_action_updated); free_xml(state); + crm_trace("Sent CIB update (call ID %d) for timeout of action %d (%s on %s)", + rc, action->id, task_uuid, target); action->sent_update = TRUE; - - if (rc < pcmk_ok) { - return FALSE; - } - - return TRUE; } static gboolean te_rsc_command(crm_graph_t * graph, crm_action_t * action) { /* never overwrite stop actions in the CIB with * anything other than completed results * * Writing pending stops makes it look like the * resource is running again */ xmlNode *cmd = NULL; xmlNode *rsc_op = NULL; gboolean rc = TRUE; gboolean no_wait = FALSE; gboolean is_local = FALSE; char *counter = NULL; const char *task = NULL; const char *value = NULL; const char *on_node = NULL; const char *router_node = NULL; const char *task_uuid = NULL; CRM_ASSERT(action != NULL); CRM_ASSERT(action->xml != NULL); action->executed = FALSE; on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); CRM_CHECK(on_node != NULL && strlen(on_node) != 0, crm_err("Corrupted command(id=%s) %s: no node", ID(action->xml), crm_str(task)); return FALSE); rsc_op = action->xml; task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY); router_node = crm_element_value(rsc_op, XML_LRM_ATTR_ROUTER_NODE); if (!router_node) { router_node = on_node; } counter = generate_transition_key(transition_graph->id, action->id, get_target_rc(action), te_uuid); crm_xml_add(rsc_op, XML_ATTR_TRANSITION_KEY, counter); if (safe_str_eq(router_node, fsa_our_uname)) { is_local = TRUE; } value = crm_meta_value(action->params, XML_ATTR_TE_NOWAIT); if (crm_is_true(value)) { no_wait = TRUE; } crm_notice("Initiating %s operation %s%s on %s%s "CRM_XS" action %d", task, task_uuid, (is_local? " locally" : ""), on_node, (no_wait? " without waiting" : ""), action->id); cmd = create_request(CRM_OP_INVOKE_LRM, rsc_op, router_node, CRM_SYSTEM_LRMD, CRM_SYSTEM_TENGINE, NULL); if (is_local) { /* shortcut local resource commands */ ha_msg_input_t data = { .msg = cmd, .xml = rsc_op, }; fsa_data_t msg = { .id = 0, .data = &data, .data_type = fsa_dt_ha_msg, .fsa_input = I_NULL, .fsa_cause = C_FSA_INTERNAL, .actions = A_LRM_INVOKE, .origin = __FUNCTION__, }; do_lrm_invoke(A_LRM_INVOKE, C_FSA_INTERNAL, fsa_state, I_NULL, &msg); } else { rc = send_cluster_message(crm_get_peer(0, router_node), crm_msg_lrmd, cmd, TRUE); } free(counter); free_xml(cmd); action->executed = TRUE; if (rc == FALSE) { crm_err("Action %d failed: send", action->id); return FALSE; } else if (no_wait) { crm_info("Action %d confirmed - no wait", action->id); action->confirmed = TRUE; /* Just mark confirmed. * Don't bump the job count only to immediately decrement it */ update_graph(transition_graph, action); trigger_graph(); } else if (action->confirmed == TRUE) { crm_debug("Action %d: %s %s on %s(timeout %dms) was already confirmed.", action->id, task, task_uuid, on_node, action->timeout); } else { if (action->timeout <= 0) { crm_err("Action %d: %s %s on %s had an invalid timeout (%dms). Using %dms instead", action->id, task, task_uuid, on_node, action->timeout, graph->network_delay); action->timeout = graph->network_delay; } te_update_job_count(action, 1); te_start_action_timer(graph, action); } return TRUE; } struct te_peer_s { char *name; int jobs; int migrate_jobs; }; static void te_peer_free(gpointer p) { struct te_peer_s *peer = p; free(peer->name); free(peer); } void te_reset_job_counts(void) { GHashTableIter iter; struct te_peer_s *peer = NULL; if(te_targets == NULL) { te_targets = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, te_peer_free); } g_hash_table_iter_init(&iter, te_targets); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & peer)) { peer->jobs = 0; peer->migrate_jobs = 0; } } static void te_update_job_count_on(const char *target, int offset, bool migrate) { struct te_peer_s *r = NULL; if(target == NULL || te_targets == NULL) { return; } r = g_hash_table_lookup(te_targets, target); if(r == NULL) { r = calloc(1, sizeof(struct te_peer_s)); r->name = strdup(target); g_hash_table_insert(te_targets, r->name, r); } r->jobs += offset; if(migrate) { r->migrate_jobs += offset; } crm_trace("jobs[%s] = %d", target, r->jobs); } static void te_update_job_count(crm_action_t * action, int offset) { const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); if (action->type != action_type_rsc || target == NULL) { /* No limit on these */ return; } /* if we have a router node, this means the action is performing * on a remote node. For now, we count all actions occurring on a * remote node against the job list on the cluster node hosting * the connection resources */ target = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE); if ((target == NULL) && (safe_str_eq(task, CRMD_ACTION_MIGRATE) || safe_str_eq(task, CRMD_ACTION_MIGRATED))) { const char *t1 = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_SOURCE); const char *t2 = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_TARGET); te_update_job_count_on(t1, offset, TRUE); te_update_job_count_on(t2, offset, TRUE); return; } else if (target == NULL) { target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); } te_update_job_count_on(target, offset, FALSE); } static gboolean te_should_perform_action_on(crm_graph_t * graph, crm_action_t * action, const char *target) { int limit = 0; struct te_peer_s *r = NULL; const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); const char *id = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY); if(target == NULL) { /* No limit on these */ return TRUE; } else if(te_targets == NULL) { return FALSE; } r = g_hash_table_lookup(te_targets, target); limit = throttle_get_job_limit(target); if(r == NULL) { r = calloc(1, sizeof(struct te_peer_s)); r->name = strdup(target); g_hash_table_insert(te_targets, r->name, r); } if(limit <= r->jobs) { crm_trace("Peer %s is over their job limit of %d (%d): deferring %s", target, limit, r->jobs, id); return FALSE; } else if(graph->migration_limit > 0 && r->migrate_jobs >= graph->migration_limit) { if (safe_str_eq(task, CRMD_ACTION_MIGRATE) || safe_str_eq(task, CRMD_ACTION_MIGRATED)) { crm_trace("Peer %s is over their migration job limit of %d (%d): deferring %s", target, graph->migration_limit, r->migrate_jobs, id); return FALSE; } } crm_trace("Peer %s has not hit their limit yet. current jobs = %d limit= %d limit", target, r->jobs, limit); return TRUE; } static gboolean te_should_perform_action(crm_graph_t * graph, crm_action_t * action) { const char *target = NULL; const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); if (action->type != action_type_rsc) { /* No limit on these */ return TRUE; } /* if we have a router node, this means the action is performing * on a remote node. For now, we count all actions occurring on a * remote node against the job list on the cluster node hosting * the connection resources */ target = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE); if ((target == NULL) && (safe_str_eq(task, CRMD_ACTION_MIGRATE) || safe_str_eq(task, CRMD_ACTION_MIGRATED))) { target = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_SOURCE); if(te_should_perform_action_on(graph, action, target) == FALSE) { return FALSE; } target = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_TARGET); } else if (target == NULL) { target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); } return te_should_perform_action_on(graph, action, target); } void te_action_confirmed(crm_action_t * action) { const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); if (action->confirmed == FALSE && action->type == action_type_rsc && target != NULL) { te_update_job_count(action, -1); } action->confirmed = TRUE; } crm_graph_functions_t te_graph_fns = { te_pseudo_action, te_rsc_command, te_crm_command, te_fence_node, te_should_perform_action, }; void notify_crmd(crm_graph_t * graph) { const char *type = "unknown"; enum crmd_fsa_input event = I_NULL; crm_debug("Processing transition completion in state %s", fsa_state2string(fsa_state)); if (graph->complete == FALSE) { CRM_CHECK(graph->complete,); graph->complete = TRUE; } switch (graph->completion_action) { case tg_stop: type = "stop"; if (fsa_state == S_TRANSITION_ENGINE) { event = I_TE_SUCCESS; } break; case tg_done: type = "done"; if (fsa_state == S_TRANSITION_ENGINE) { event = I_TE_SUCCESS; } break; case tg_restart: type = "restart"; if (fsa_state == S_TRANSITION_ENGINE) { if (transition_timer->period_ms > 0) { crm_timer_stop(transition_timer); crm_timer_start(transition_timer); } else { event = I_PE_CALC; } } else if (fsa_state == S_POLICY_ENGINE) { register_fsa_action(A_PE_INVOKE); } break; case tg_shutdown: type = "shutdown"; if (is_set(fsa_input_register, R_SHUTDOWN)) { event = I_STOP; } else { crm_err("We didn't ask to be shut down, yet the scheduler is telling us to"); event = I_TERMINATE; } } crm_debug("Transition %d status: %s - %s", graph->id, type, crm_str(graph->abort_reason)); graph->abort_reason = NULL; graph->completion_action = tg_done; clear_bit(fsa_input_register, R_IN_TRANSITION); if (event != I_NULL) { register_fsa_input(C_FSA_INTERNAL, event, NULL); } else if (fsa_source) { mainloop_set_trigger(fsa_source); } } diff --git a/daemons/controld/controld_te_callbacks.c b/daemons/controld/controld_te_callbacks.c index ce1edb7375..f48161a32c 100644 --- a/daemons/controld/controld_te_callbacks.c +++ b/daemons/controld/controld_te_callbacks.c @@ -1,951 +1,951 @@ /* * Copyright 2004-2018 Andrew Beekhof * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include /* For ONLINESTATUS etc */ void te_update_confirm(const char *event, xmlNode * msg); extern char *te_uuid; gboolean shuttingdown = FALSE; crm_graph_t *transition_graph; crm_trigger_t *transition_trigger = NULL; static unsigned long int stonith_max_attempts = 10; /* #define rsc_op_template "//"XML_TAG_DIFF_ADDED"//"XML_TAG_CIB"//"XML_CIB_TAG_STATE"[@uname='%s']"//"XML_LRM_TAG_RSC_OP"[@id='%s]" */ #define rsc_op_template "//"XML_TAG_DIFF_ADDED"//"XML_TAG_CIB"//"XML_LRM_TAG_RSC_OP"[@id='%s']" static const char * get_node_id(xmlNode * rsc_op) { xmlNode *node = rsc_op; while (node != NULL && safe_str_neq(XML_CIB_TAG_STATE, TYPE(node))) { node = node->parent; } CRM_CHECK(node != NULL, return NULL); return ID(node); } void update_stonith_max_attempts(const char* value) { if (safe_str_eq(value, CRM_INFINITY_S)) { stonith_max_attempts = CRM_SCORE_INFINITY; } else { stonith_max_attempts = crm_int_helper(value, NULL); } } static void te_update_diff_v1(const char *event, xmlNode *diff) { int lpc, max; xmlXPathObject *xpathObj = NULL; CRM_CHECK(diff != NULL, return); xml_log_patchset(LOG_TRACE, __FUNCTION__, diff); if (cib_config_changed(NULL, NULL, &diff)) { abort_transition(INFINITY, tg_restart, "Non-status change", diff); goto bail; /* configuration changed */ } /* Tickets Attributes - Added/Updated */ xpathObj = xpath_search(diff, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_CIB_TAG_TICKETS); if (numXpathResults(xpathObj) > 0) { xmlNode *aborted = getXpathResult(xpathObj, 0); abort_transition(INFINITY, tg_restart, "Ticket attribute: update", aborted); goto bail; } freeXpathObject(xpathObj); /* Tickets Attributes - Removed */ xpathObj = xpath_search(diff, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_CIB_TAG_TICKETS); if (numXpathResults(xpathObj) > 0) { xmlNode *aborted = getXpathResult(xpathObj, 0); abort_transition(INFINITY, tg_restart, "Ticket attribute: removal", aborted); goto bail; } freeXpathObject(xpathObj); /* Transient Attributes - Added/Updated */ xpathObj = xpath_search(diff, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_TAG_TRANSIENT_NODEATTRS "//" XML_CIB_TAG_NVPAIR); max = numXpathResults(xpathObj); for (lpc = 0; lpc < max; lpc++) { xmlNode *attr = getXpathResult(xpathObj, lpc); const char *name = crm_element_value(attr, XML_NVPAIR_ATTR_NAME); const char *value = NULL; if (safe_str_eq(CRM_OP_PROBED, name)) { value = crm_element_value(attr, XML_NVPAIR_ATTR_VALUE); } if (crm_is_true(value) == FALSE) { abort_transition(INFINITY, tg_restart, "Transient attribute: update", attr); crm_log_xml_trace(attr, "Abort"); goto bail; } } freeXpathObject(xpathObj); /* Transient Attributes - Removed */ xpathObj = xpath_search(diff, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_TAG_TRANSIENT_NODEATTRS); if (numXpathResults(xpathObj) > 0) { xmlNode *aborted = getXpathResult(xpathObj, 0); abort_transition(INFINITY, tg_restart, "Transient attribute: removal", aborted); goto bail; } freeXpathObject(xpathObj); /* * Updates by, or in response to, TE actions will never contain updates * for more than one resource at a time, so such updates indicate an * LRM refresh. * * In that case, start a new transition rather than check each result * individually, which can result in _huge_ speedups in large clusters. * * Unfortunately, we can only do so when there are no pending actions. * Otherwise, we could mistakenly throw away those results here, and * the cluster will stall waiting for them and time out the operation. */ if (transition_graph->pending == 0) { xpathObj = xpath_search(diff, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_LRM_TAG_RESOURCE); max = numXpathResults(xpathObj); if (max > 1) { crm_debug("Ignoring resource operation updates due to history refresh of %d resources", max); crm_log_xml_trace(diff, "lrm-refresh"); abort_transition(INFINITY, tg_restart, "History refresh", NULL); goto bail; } freeXpathObject(xpathObj); } /* Process operation updates */ xpathObj = xpath_search(diff, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_LRM_TAG_RSC_OP); max = numXpathResults(xpathObj); if (max > 0) { int lpc = 0; for (lpc = 0; lpc < max; lpc++) { xmlNode *rsc_op = getXpathResult(xpathObj, lpc); const char *node = get_node_id(rsc_op); process_graph_event(rsc_op, node); } } freeXpathObject(xpathObj); /* Detect deleted (as opposed to replaced or added) actions - eg. crm_resource -C */ xpathObj = xpath_search(diff, "//" XML_TAG_DIFF_REMOVED "//" XML_LRM_TAG_RSC_OP); max = numXpathResults(xpathObj); for (lpc = 0; lpc < max; lpc++) { int path_max = 0; const char *op_id = NULL; char *rsc_op_xpath = NULL; xmlXPathObject *op_match = NULL; xmlNode *match = getXpathResult(xpathObj, lpc); CRM_LOG_ASSERT(match != NULL); if(match == NULL) { continue; }; op_id = ID(match); path_max = strlen(rsc_op_template) + strlen(op_id) + 1; rsc_op_xpath = calloc(1, path_max); snprintf(rsc_op_xpath, path_max, rsc_op_template, op_id); op_match = xpath_search(diff, rsc_op_xpath); if (numXpathResults(op_match) == 0) { /* Prevent false positives by matching cancelations too */ const char *node = get_node_id(match); crm_action_t *cancelled = get_cancel_action(op_id, node); if (cancelled == NULL) { crm_debug("No match for deleted action %s (%s on %s)", rsc_op_xpath, op_id, node); abort_transition(INFINITY, tg_restart, "Resource op removal", match); freeXpathObject(op_match); free(rsc_op_xpath); goto bail; } else { crm_debug("Deleted lrm_rsc_op %s on %s was for graph event %d", op_id, node, cancelled->id); } } freeXpathObject(op_match); free(rsc_op_xpath); } bail: freeXpathObject(xpathObj); } static void process_lrm_resource_diff(xmlNode *lrm_resource, const char *node) { for (xmlNode *rsc_op = __xml_first_child(lrm_resource); rsc_op != NULL; rsc_op = __xml_next(rsc_op)) { process_graph_event(rsc_op, node); } } static void process_resource_updates(const char *node, xmlNode *xml, xmlNode *change, const char *op, const char *xpath) { xmlNode *rsc = NULL; if (xml == NULL) { return; } else if (strcmp((const char*)xml->name, XML_CIB_TAG_LRM) == 0) { xml = first_named_child(xml, XML_LRM_TAG_RESOURCES); crm_trace("Got %p in %s", xml, XML_CIB_TAG_LRM); } CRM_ASSERT(strcmp((const char*)xml->name, XML_LRM_TAG_RESOURCES) == 0); /* * Updates by, or in response to, TE actions will never contain updates * for more than one resource at a time, so such updates indicate an * LRM refresh. * * In that case, start a new transition rather than check each result * individually, which can result in _huge_ speedups in large clusters. * * Unfortunately, we can only do so when there are no pending actions. * Otherwise, we could mistakenly throw away those results here, and * the cluster will stall waiting for them and time out the operation. */ if ((transition_graph->pending == 0) && xml->children && xml->children->next) { crm_log_xml_trace(change, "lrm-refresh"); abort_transition(INFINITY, tg_restart, "History refresh", NULL); return; } for (rsc = __xml_first_child(xml); rsc != NULL; rsc = __xml_next(rsc)) { crm_trace("Processing %s", ID(rsc)); process_lrm_resource_diff(rsc, node); } } #define NODE_PATT "/lrm[@id=" static char *get_node_from_xpath(const char *xpath) { char *nodeid = NULL; char *tmp = strstr(xpath, NODE_PATT); if(tmp) { tmp += strlen(NODE_PATT); tmp += 1; nodeid = strdup(tmp); tmp = strstr(nodeid, "\'"); CRM_ASSERT(tmp); tmp[0] = 0; } return nodeid; } static char *extract_node_uuid(const char *xpath) { char *mutable_path = strdup(xpath); char *node_uuid = NULL; char *search = NULL; char *match = NULL; match = strstr(mutable_path, "node_state[@id=\'"); if (match == NULL) { free(mutable_path); return NULL; } match += strlen("node_state[@id=\'"); search = strchr(match, '\''); if (search == NULL) { free(mutable_path); return NULL; } search[0] = 0; node_uuid = strdup(match); free(mutable_path); return node_uuid; } static void abort_unless_down(const char *xpath, const char *op, xmlNode *change, const char *reason) { char *node_uuid = NULL; crm_action_t *down = NULL; if(safe_str_neq(op, "delete")) { abort_transition(INFINITY, tg_restart, reason, change); return; } node_uuid = extract_node_uuid(xpath); if(node_uuid == NULL) { crm_err("Could not extract node ID from %s", xpath); abort_transition(INFINITY, tg_restart, reason, change); return; } down = match_down_event(node_uuid); if (down == NULL) { crm_trace("Not expecting %s to be down (%s)", node_uuid, xpath); abort_transition(INFINITY, tg_restart, reason, change); } else { crm_trace("Expecting changes to %s (%s)", node_uuid, xpath); } free(node_uuid); } static void process_op_deletion(const char *xpath, xmlNode *change) { char *mutable_key = strdup(xpath); char *key; char *node_uuid; crm_action_t *cancel = NULL; // Extract the part of xpath between last pair of single quotes key = strrchr(mutable_key, '\''); if (key != NULL) { *key = '\0'; key = strrchr(mutable_key, '\''); } if (key == NULL) { crm_warn("Ignoring malformed CIB update (resource deletion of %s)", xpath); free(mutable_key); return; } ++key; node_uuid = extract_node_uuid(xpath); cancel = get_cancel_action(key, node_uuid); if (cancel) { crm_info("Cancellation of %s on %s confirmed (%d)", key, node_uuid, cancel->id); stop_te_timer(cancel->timer); te_action_confirmed(cancel); update_graph(transition_graph, cancel); trigger_graph(); } else { abort_transition(INFINITY, tg_restart, "Resource operation removal", change); } free(mutable_key); free(node_uuid); } static void process_delete_diff(const char *xpath, const char *op, xmlNode *change) { if (strstr(xpath, "/" XML_LRM_TAG_RSC_OP "[")) { process_op_deletion(xpath, change); } else if (strstr(xpath, "/" XML_CIB_TAG_LRM "[")) { abort_unless_down(xpath, op, change, "Resource state removal"); } else if (strstr(xpath, "/" XML_CIB_TAG_STATE "[")) { abort_unless_down(xpath, op, change, "Node state removal"); } else { crm_trace("Ignoring delete of %s", xpath); } } static void process_node_state_diff(xmlNode *state, xmlNode *change, const char *op, const char *xpath) { xmlNode *lrm = first_named_child(state, XML_CIB_TAG_LRM); process_resource_updates(ID(state), lrm, change, op, xpath); } static void process_status_diff(xmlNode *status, xmlNode *change, const char *op, const char *xpath) { for (xmlNode *state = __xml_first_child(status); state != NULL; state = __xml_next(state)) { process_node_state_diff(state, change, op, xpath); } } static void process_cib_diff(xmlNode *cib, xmlNode *change, const char *op, const char *xpath) { xmlNode *status = first_named_child(cib, XML_CIB_TAG_STATUS); xmlNode *config = first_named_child(cib, XML_CIB_TAG_CONFIGURATION); if (status) { process_status_diff(status, change, op, xpath); } if (config) { abort_transition(INFINITY, tg_restart, "Non-status-only change", change); } } static void te_update_diff_v2(xmlNode *diff) { crm_log_xml_trace(diff, "Patch:Raw"); for (xmlNode *change = __xml_first_child(diff); change != NULL; change = __xml_next(change)) { xmlNode *match = NULL; const char *name = NULL; const char *xpath = crm_element_value(change, XML_DIFF_PATH); // Possible ops: create, modify, delete, move const char *op = crm_element_value(change, XML_DIFF_OP); // Ignore uninteresting updates if (op == NULL) { continue; } else if (xpath == NULL) { crm_trace("Ignoring %s change for version field", op); continue; } else if (strcmp(op, "move") == 0) { crm_trace("Ignoring move change at %s", xpath); continue; } // Find the result of create/modify ops if (strcmp(op, "create") == 0) { match = change->children; } else if (strcmp(op, "modify") == 0) { match = first_named_child(change, XML_DIFF_RESULT); if(match) { match = match->children; } } else if (strcmp(op, "delete") != 0) { crm_warn("Ignoring malformed CIB update (%s operation on %s is unrecognized)", op, xpath); continue; } if (match) { if (match->type == XML_COMMENT_NODE) { crm_trace("Ignoring %s operation for comment at %s", op, xpath); continue; } name = (const char *)match->name; } crm_trace("Handling %s operation for %s%s%s", op, (xpath? xpath : "CIB"), (name? " matched by " : ""), (name? name : "")); if (strstr(xpath, "/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION)) { abort_transition(INFINITY, tg_restart, "Configuration change", change); break; // Won't be packaged with operation results we may be waiting for } else if (strstr(xpath, "/" XML_CIB_TAG_TICKETS) || safe_str_eq(name, XML_CIB_TAG_TICKETS)) { abort_transition(INFINITY, tg_restart, "Ticket attribute change", change); break; // Won't be packaged with operation results we may be waiting for } else if (strstr(xpath, "/" XML_TAG_TRANSIENT_NODEATTRS "[") || safe_str_eq(name, XML_TAG_TRANSIENT_NODEATTRS)) { abort_unless_down(xpath, op, change, "Transient attribute change"); break; // Won't be packaged with operation results we may be waiting for } else if (strcmp(op, "delete") == 0) { process_delete_diff(xpath, op, change); } else if (name == NULL) { crm_warn("Ignoring malformed CIB update (%s at %s has no result)", op, xpath); } else if (strcmp(name, XML_TAG_CIB) == 0) { process_cib_diff(match, change, op, xpath); } else if (strcmp(name, XML_CIB_TAG_STATUS) == 0) { process_status_diff(match, change, op, xpath); } else if (strcmp(name, XML_CIB_TAG_STATE) == 0) { process_node_state_diff(match, change, op, xpath); } else if (strcmp(name, XML_CIB_TAG_LRM) == 0) { process_resource_updates(ID(match), match, change, op, xpath); } else if (strcmp(name, XML_LRM_TAG_RESOURCES) == 0) { char *local_node = get_node_from_xpath(xpath); process_resource_updates(local_node, match, change, op, xpath); free(local_node); } else if (strcmp(name, XML_LRM_TAG_RESOURCE) == 0) { char *local_node = get_node_from_xpath(xpath); process_lrm_resource_diff(match, local_node); free(local_node); } else if (strcmp(name, XML_LRM_TAG_RSC_OP) == 0) { char *local_node = get_node_from_xpath(xpath); process_graph_event(match, local_node); free(local_node); } else { crm_warn("Ignoring malformed CIB update (%s at %s has unrecognized result %s)", op, xpath, name); } } } void te_update_diff(const char *event, xmlNode * msg) { xmlNode *diff = NULL; const char *op = NULL; int rc = -EINVAL; int format = 1; int p_add[] = { 0, 0, 0 }; int p_del[] = { 0, 0, 0 }; CRM_CHECK(msg != NULL, return); crm_element_value_int(msg, F_CIB_RC, &rc); if (transition_graph == NULL) { crm_trace("No graph"); return; } else if (rc < pcmk_ok) { crm_trace("Filter rc=%d (%s)", rc, pcmk_strerror(rc)); return; } else if (transition_graph->complete && fsa_state != S_IDLE && fsa_state != S_TRANSITION_ENGINE && fsa_state != S_POLICY_ENGINE) { crm_trace("Filter state=%s, complete=%d", fsa_state2string(fsa_state), transition_graph->complete); return; } op = crm_element_value(msg, F_CIB_OPERATION); diff = get_message_xml(msg, F_CIB_UPDATE_RESULT); xml_patch_versions(diff, p_add, p_del); crm_debug("Processing (%s) diff: %d.%d.%d -> %d.%d.%d (%s)", op, p_del[0], p_del[1], p_del[2], p_add[0], p_add[1], p_add[2], fsa_state2string(fsa_state)); crm_element_value_int(diff, "format", &format); switch (format) { case 1: te_update_diff_v1(event, diff); break; case 2: te_update_diff_v2(diff); break; default: crm_warn("Ignoring malformed CIB update (unknown patch format %d)", format); } } gboolean process_te_message(xmlNode * msg, xmlNode * xml_data) { const char *from = crm_element_value(msg, F_ORIG); const char *sys_to = crm_element_value(msg, F_CRM_SYS_TO); const char *sys_from = crm_element_value(msg, F_CRM_SYS_FROM); const char *ref = crm_element_value(msg, F_CRM_REFERENCE); const char *op = crm_element_value(msg, F_CRM_TASK); const char *type = crm_element_value(msg, F_CRM_MSG_TYPE); crm_trace("Processing %s (%s) message", op, ref); crm_log_xml_trace(msg, "ipc"); if (op == NULL) { /* error */ } else if (sys_to == NULL || strcasecmp(sys_to, CRM_SYSTEM_TENGINE) != 0) { crm_trace("Bad sys-to %s", crm_str(sys_to)); return FALSE; } else if (safe_str_eq(op, CRM_OP_INVOKE_LRM) && safe_str_eq(sys_from, CRM_SYSTEM_LRMD) /* && safe_str_eq(type, XML_ATTR_RESPONSE) */ ) { xmlXPathObject *xpathObj = NULL; crm_log_xml_trace(msg, "Processing (N)ACK"); crm_debug("Processing (N)ACK %s from %s", crm_element_value(msg, F_CRM_REFERENCE), from); xpathObj = xpath_search(xml_data, "//" XML_LRM_TAG_RSC_OP); if (numXpathResults(xpathObj)) { int lpc = 0, max = numXpathResults(xpathObj); for (lpc = 0; lpc < max; lpc++) { xmlNode *rsc_op = getXpathResult(xpathObj, lpc); const char *node = get_node_id(rsc_op); process_graph_event(rsc_op, node); } freeXpathObject(xpathObj); } else { crm_log_xml_err(msg, "Invalid (N)ACK"); freeXpathObject(xpathObj); return FALSE; } } else { crm_err("Unknown command: %s::%s from %s", type, op, sys_from); } crm_trace("finished processing message"); return TRUE; } GHashTable *stonith_failures = NULL; struct st_fail_rec { int count; }; static gboolean too_many_st_failures(const char *target) { GHashTableIter iter; const char *key = NULL; struct st_fail_rec *value = NULL; if (stonith_failures == NULL) { return FALSE; } if (target == NULL) { g_hash_table_iter_init(&iter, stonith_failures); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) { if (value->count >= stonith_max_attempts) { target = (const char*)key; goto too_many; } } } else { value = g_hash_table_lookup(stonith_failures, target); if ((value != NULL) && (value->count >= stonith_max_attempts)) { goto too_many; } } return FALSE; too_many: crm_warn("Too many failures (%d) to fence %s, giving up", value->count, target); return TRUE; } /*! * \internal * \brief Reset a stonith fail count * * \param[in] target Name of node to reset, or NULL for all */ void st_fail_count_reset(const char *target) { if (stonith_failures == NULL) { return; } if (target) { struct st_fail_rec *rec = NULL; rec = g_hash_table_lookup(stonith_failures, target); if (rec) { rec->count = 0; } } else { GHashTableIter iter; const char *key = NULL; struct st_fail_rec *rec = NULL; g_hash_table_iter_init(&iter, stonith_failures); while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &rec)) { rec->count = 0; } } } void st_fail_count_increment(const char *target) { struct st_fail_rec *rec = NULL; if (stonith_failures == NULL) { stonith_failures = crm_str_table_new(); } rec = g_hash_table_lookup(stonith_failures, target); if (rec) { rec->count++; } else { rec = malloc(sizeof(struct st_fail_rec)); if(rec == NULL) { return; } rec->count = 1; g_hash_table_insert(stonith_failures, strdup(target), rec); } } /*! * \internal * \brief Abort transition due to stonith failure * * \param[in] abort_action Whether to restart or stop transition * \param[in] target Don't restart if this (NULL for any) has too many failures * \param[in] reason Log this stonith action XML as abort reason (or NULL) */ void abort_for_stonith_failure(enum transition_action abort_action, const char *target, xmlNode *reason) { /* If stonith repeatedly fails, we eventually give up on starting a new * transition for that reason. */ if ((abort_action != tg_stop) && too_many_st_failures(target)) { abort_action = tg_stop; } abort_transition(INFINITY, abort_action, "Stonith failed", reason); } void tengine_stonith_callback(stonith_t * stonith, stonith_callback_data_t * data) { char *uuid = NULL; int target_rc = -1; int stonith_id = -1; int transition_id = -1; crm_action_t *action = NULL; int call_id = data->call_id; int rc = data->rc; char *userdata = data->userdata; CRM_CHECK(userdata != NULL, return); crm_notice("Stonith operation %d/%s: %s (%d)", call_id, (char *)userdata, pcmk_strerror(rc), rc); if (AM_I_DC == FALSE) { return; } /* crm_info("call=%d, optype=%d, node_name=%s, result=%d, node_list=%s, action=%s", */ /* op->call_id, op->optype, op->node_name, op->op_result, */ /* (char *)op->node_list, op->private_data); */ /* filter out old STONITH actions */ CRM_CHECK(decode_transition_key(userdata, &uuid, &transition_id, &stonith_id, &target_rc), crm_err("Invalid event detected"); goto bail; ); if (transition_graph->complete || stonith_id < 0 || safe_str_neq(uuid, te_uuid) || transition_graph->id != transition_id) { crm_info("Ignoring STONITH action initiated outside of the current transition"); goto bail; } action = get_action(stonith_id, FALSE); if (action == NULL) { crm_err("Stonith action not matched"); goto bail; } stop_te_timer(action->timer); if (rc == pcmk_ok) { const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); const char *uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID); const char *op = crm_meta_value(action->params, "stonith_action"); crm_info("Stonith operation %d for %s passed", call_id, target); if (action->confirmed == FALSE) { te_action_confirmed(action); if (safe_str_eq("on", op)) { const char *value = NULL; char *now = crm_itoa(time(NULL)); update_attrd(target, CRM_ATTR_UNFENCED, now, NULL, FALSE); free(now); value = crm_meta_value(action->params, XML_OP_ATTR_DIGESTS_ALL); update_attrd(target, CRM_ATTR_DIGESTS_ALL, value, NULL, FALSE); value = crm_meta_value(action->params, XML_OP_ATTR_DIGESTS_SECURE); update_attrd(target, CRM_ATTR_DIGESTS_SECURE, value, NULL, FALSE); } else if (action->sent_update == FALSE) { send_stonith_update(action, target, uuid); action->sent_update = TRUE; } } st_fail_count_reset(target); } else { const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); enum transition_action abort_action = tg_restart; action->failed = TRUE; crm_notice("Stonith operation %d for %s failed (%s): aborting transition.", call_id, target, pcmk_strerror(rc)); /* If no fence devices were available, there's no use in immediately * checking again, so don't start a new transition in that case. */ if (rc == -ENODEV) { crm_warn("No devices found in cluster to fence %s, giving up", target); abort_action = tg_stop; } /* Increment the fail count now, so abort_for_stonith_failure() can * check it. Non-DC nodes will increment it in tengine_stonith_notify(). */ st_fail_count_increment(target); abort_for_stonith_failure(abort_action, target, NULL); } update_graph(transition_graph, action); trigger_graph(); bail: free(userdata); free(uuid); return; } void cib_fencing_updated(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { if (rc < pcmk_ok) { crm_err("Fencing update %d for %s: failed - %s (%d)", call_id, (char *)user_data, pcmk_strerror(rc), rc); crm_log_xml_warn(msg, "Failed update"); abort_transition(INFINITY, tg_shutdown, "CIB update failed", NULL); } else { crm_info("Fencing update %d for %s: complete", call_id, (char *)user_data); } } void cib_action_updated(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { if (rc < pcmk_ok) { crm_err("Update %d FAILED: %s", call_id, pcmk_strerror(rc)); } } gboolean action_timer_callback(gpointer data) { crm_action_timer_t *timer = NULL; CRM_CHECK(data != NULL, return FALSE); timer = (crm_action_timer_t *) data; stop_te_timer(timer); crm_warn("Timer popped (timeout=%d, abort_level=%d, complete=%s)", timer->timeout, transition_graph->abort_priority, transition_graph->complete ? "true" : "false"); CRM_CHECK(timer->action != NULL, return FALSE); if (transition_graph->complete) { crm_warn("Ignoring timeout while not in transition"); } else { /* fail the action */ gboolean send_update = TRUE; const char *task = crm_element_value(timer->action->xml, XML_LRM_ATTR_TASK); print_action(LOG_ERR, "Aborting transition, action lost: ", timer->action); timer->action->failed = TRUE; te_action_confirmed(timer->action); abort_transition(INFINITY, tg_restart, "Action lost", NULL); update_graph(transition_graph, timer->action); trigger_graph(); if (timer->action->type != action_type_rsc) { send_update = FALSE; - } else if (safe_str_eq(task, RSC_CANCEL)) { + } else if (!controld_action_is_recordable(task)) { /* we don't need to update the CIB with these */ send_update = FALSE; } if (send_update) { - cib_action_update(timer->action, PCMK_LRM_OP_TIMEOUT, PCMK_OCF_UNKNOWN_ERROR); + controld_record_action_timeout(timer->action); } } return FALSE; } diff --git a/daemons/controld/controld_transition.h b/daemons/controld/controld_transition.h index 43c8c9fd35..563030642b 100644 --- a/daemons/controld/controld_transition.h +++ b/daemons/controld/controld_transition.h @@ -1,83 +1,83 @@ /* * Copyright 2004-2018 Andrew Beekhof * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef TENGINE__H # define TENGINE__H # include # include # include # include extern stonith_t *stonith_api; extern void send_stonith_update(crm_action_t * stonith_action, const char *target, const char *uuid); /* stonith cleanup list */ void add_stonith_cleanup(const char *target); void remove_stonith_cleanup(const char *target); void purge_stonith_cleanup(void); void execute_stonith_cleanup(void); /* tengine */ extern crm_action_t *match_down_event(const char *target); extern crm_action_t *get_cancel_action(const char *id, const char *node); -extern gboolean cib_action_update(crm_action_t * action, int status, int op_rc); +void controld_record_action_timeout(crm_action_t *action); extern gboolean fail_incompletable_actions(crm_graph_t * graph, const char *down_node); void process_graph_event(xmlNode *event, const char *event_node); /* utils */ extern crm_action_t *get_action(int id, gboolean confirmed); extern gboolean stop_te_timer(crm_action_timer_t * timer); extern const char *get_rsc_state(const char *task, enum op_status status); /* unpack */ extern gboolean process_te_message(xmlNode * msg, xmlNode * xml_data); extern crm_graph_t *transition_graph; extern crm_trigger_t *transition_trigger; extern char *te_uuid; extern void notify_crmd(crm_graph_t * graph); void cib_fencing_updated(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data); void cib_action_updated(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data); gboolean action_timer_callback(gpointer data); gboolean te_graph_trigger(gpointer user_data); void te_update_diff(const char *event, xmlNode *msg); void tengine_stonith_callback(stonith_t *stonith, stonith_callback_data_t *data); void update_stonith_max_attempts(const char* value); extern void trigger_graph_processing(const char *fn, int line); void abort_after_delay(int abort_priority, enum transition_action abort_action, const char *abort_text, guint delay_ms); extern void abort_transition_graph(int abort_priority, enum transition_action abort_action, const char *abort_text, xmlNode * reason, const char *fn, int line); # define trigger_graph() trigger_graph_processing(__FUNCTION__, __LINE__) # define abort_transition(pri, action, text, reason) \ abort_transition_graph(pri, action, text, reason,__FUNCTION__,__LINE__); extern gboolean te_connect_stonith(gpointer user_data); extern crm_trigger_t *transition_trigger; extern crm_trigger_t *stonith_reconnect; extern char *failed_stop_offset; extern char *failed_start_offset; extern int active_timeout; extern int stonith_op_active; void te_action_confirmed(crm_action_t * action); void te_reset_job_counts(void); #endif diff --git a/daemons/controld/controld_utils.h b/daemons/controld/controld_utils.h index e972be0f9c..f89297cbfb 100644 --- a/daemons/controld/controld_utils.h +++ b/daemons/controld/controld_utils.h @@ -1,101 +1,102 @@ /* * Copyright 2004-2018 Andrew Beekhof * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef CRMD_UTILS__H # define CRMD_UTILS__H # include # include # include # include /* For CIB_OP_MODIFY */ # include "controld_alerts.h" # define FAKE_TE_ID "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" # define fsa_cib_update(section, data, options, call_id, user_name) \ if(fsa_cib_conn != NULL) { \ call_id = cib_internal_op( \ fsa_cib_conn, CIB_OP_MODIFY, NULL, section, data, \ NULL, options, user_name); \ \ } else { \ crm_err("No CIB manager connection available"); \ } # define fsa_cib_anon_update(section, data, options) \ if(fsa_cib_conn != NULL) { \ fsa_cib_conn->cmds->modify( \ fsa_cib_conn, section, data, options); \ \ } else { \ crm_err("No CIB connection available"); \ } extern gboolean fsa_has_quorum; extern int last_peer_update; extern int last_resource_update; enum node_update_flags { node_update_none = 0x0000, node_update_quick = 0x0001, node_update_cluster = 0x0010, node_update_peer = 0x0020, node_update_join = 0x0040, node_update_expected = 0x0100, node_update_all = node_update_cluster|node_update_peer|node_update_join|node_update_expected, }; gboolean crm_timer_stop(fsa_timer_t * timer); gboolean crm_timer_start(fsa_timer_t * timer); gboolean crm_timer_popped(gpointer data); gboolean is_timer_started(fsa_timer_t * timer); crm_exit_t crmd_exit(crm_exit_t exit_code); crm_exit_t crmd_fast_exit(crm_exit_t exit_code); void pe_subsystem_free(void); void fsa_dump_actions(long long action, const char *text); void fsa_dump_inputs(int log_level, const char *text, long long input_register); gboolean update_dc(xmlNode * msg); void crm_update_peer_join(const char *source, crm_node_t * node, enum crm_join_phase phase); xmlNode *create_node_state_update(crm_node_t *node, int flags, xmlNode *parent, const char *source); void populate_cib_nodes(enum node_update_flags flags, const char *source); void crm_update_quorum(gboolean quorum, gboolean force_update); void erase_status_tag(const char *uname, const char *tag, int options); void update_attrd(const char *host, const char *name, const char *value, const char *user_name, gboolean is_remote_node); void update_attrd_remote_node_removed(const char *host, const char *user_name); void update_attrd_clear_failures(const char *host, const char *rsc, const char *op, const char *interval_spec, gboolean is_remote_node); int crmd_join_phase_count(enum crm_join_phase phase); void crmd_join_phase_log(int level); const char *get_timer_desc(fsa_timer_t * timer); void st_fail_count_reset(const char * target); void st_fail_count_increment(const char *target); void abort_for_stonith_failure(enum transition_action abort_action, const char *target, xmlNode *reason); void crmd_peer_down(crm_node_t *peer, bool full); unsigned int cib_op_timeout(void); bool feature_set_compatible(const char *dc_version, const char *join_version); +bool controld_action_is_recordable(const char *action); /* Convenience macro for registering a CIB callback * (assumes that data can be freed with free()) */ # define fsa_register_cib_callback(id, flag, data, fn) do { \ CRM_ASSERT(fsa_cib_conn); \ fsa_cib_conn->cmds->register_callback_full( \ fsa_cib_conn, id, cib_op_timeout(), \ flag, data, #fn, fn, free); \ } while(0) #endif diff --git a/daemons/execd/cts-exec-helper.c b/daemons/execd/cts-exec-helper.c index 633d49bbc0..70a80de80e 100644 --- a/daemons/execd/cts-exec-helper.c +++ b/daemons/execd/cts-exec-helper.c @@ -1,616 +1,617 @@ /* * Copyright 2012-2018 David Vossel * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include /* *INDENT-OFF* */ static struct crm_option long_options[] = { {"help", 0, 0, '?'}, {"verbose", 0, 0, 'V', "\t\tPrint out logs and events to screen"}, {"quiet", 0, 0, 'Q', "\t\tSuppress all output to screen"}, {"tls", 0, 0, 'S', "\t\tUse tls backend for local connection"}, {"listen", 1, 0, 'l', "\tListen for a specific event string"}, {"api-call", 1, 0, 'c', "\tDirectly relates to executor API functions"}, {"no-wait", 0, 0, 'w', "\tMake api call and do not wait for result."}, {"is-running", 0, 0, 'R', "\tDetermine if a resource is registered and running."}, {"notify-orig", 0, 0, 'n', "\tOnly notify this client the results of an api action."}, {"notify-changes", 0, 0, 'o', "\tOnly notify client changes to recurring operations."}, {"-spacer-", 1, 0, '-', "\nParameters for api-call option"}, {"action", 1, 0, 'a'}, {"rsc-id", 1, 0, 'r'}, {"cancel-call-id", 1, 0, 'x'}, {"provider", 1, 0, 'P'}, {"class", 1, 0, 'C'}, {"type", 1, 0, 'T'}, {"interval", 1, 0, 'i'}, {"timeout", 1, 0, 't'}, {"start-delay", 1, 0, 's'}, {"param-key", 1, 0, 'k'}, {"param-val", 1, 0, 'v'}, {"-spacer-", 1, 0, '-'}, {0, 0, 0, 0} }; /* *INDENT-ON* */ cib_t *cib_conn = NULL; static int exec_call_id = 0; static int exec_call_opts = 0; extern void cleanup_alloc_calculations(pe_working_set_t * data_set); static gboolean start_test(gpointer user_data); static void try_connect(void); static struct { int verbose; int quiet; guint interval_ms; int timeout; int start_delay; int cancel_call_id; int no_wait; int is_running; int no_connect; const char *api_call; const char *rsc_id; const char *provider; const char *class; const char *type; const char *action; const char *listen; lrmd_key_value_t *params; } options; GMainLoop *mainloop = NULL; lrmd_t *lrmd_conn = NULL; static char event_buf_v0[1024]; static void test_exit(crm_exit_t exit_code) { lrmd_api_delete(lrmd_conn); crm_exit(exit_code); } #define print_result(result) \ if (!options.quiet) { \ result; \ } \ #define report_event(event) \ snprintf(event_buf_v0, sizeof(event_buf_v0), "NEW_EVENT event_type:%s rsc_id:%s action:%s rc:%s op_status:%s", \ lrmd_event_type2str(event->type), \ event->rsc_id, \ event->op_type ? event->op_type : "none", \ services_ocf_exitcode_str(event->rc), \ services_lrm_status_str(event->op_status)); \ crm_info("%s", event_buf_v0); static void test_shutdown(int nsig) { lrmd_api_delete(lrmd_conn); lrmd_conn = NULL; } static void read_events(lrmd_event_data_t * event) { report_event(event); if (options.listen) { if (safe_str_eq(options.listen, event_buf_v0)) { print_result(printf("LISTEN EVENT SUCCESSFUL\n")); test_exit(CRM_EX_OK); } } if (exec_call_id && (event->call_id == exec_call_id)) { if (event->op_status == 0 && event->rc == 0) { print_result(printf("API-CALL SUCCESSFUL for 'exec'\n")); } else { print_result(printf("API-CALL FAILURE for 'exec', rc:%d lrmd_op_status:%s\n", event->rc, services_lrm_status_str(event->op_status))); test_exit(CRM_EX_ERROR); } if (!options.listen) { test_exit(CRM_EX_OK); } } } static gboolean timeout_err(gpointer data) { print_result(printf("LISTEN EVENT FAILURE - timeout occurred, never found.\n")); test_exit(CRM_EX_TIMEOUT); return FALSE; } static void connection_events(lrmd_event_data_t * event) { int rc = event->connection_rc; if (event->type != lrmd_event_connect) { /* ignore */ return; } if (!rc) { crm_info("Executor client connection established"); start_test(NULL); return; } else { sleep(1); try_connect(); crm_notice("Executor client connection failed"); } } static void try_connect(void) { int tries = 10; static int num_tries = 0; int rc = 0; lrmd_conn->cmds->set_callback(lrmd_conn, connection_events); for (; num_tries < tries; num_tries++) { rc = lrmd_conn->cmds->connect_async(lrmd_conn, "pacemaker-execd", 3000); if (!rc) { return; /* we'll hear back in async callback */ } sleep(1); } print_result(printf("API CONNECTION FAILURE\n")); test_exit(CRM_EX_ERROR); } static gboolean start_test(gpointer user_data) { int rc = 0; if (!options.no_connect) { if (!lrmd_conn->cmds->is_connected(lrmd_conn)) { try_connect(); /* async connect -- this function will get called back into */ return 0; } } lrmd_conn->cmds->set_callback(lrmd_conn, read_events); if (options.timeout) { g_timeout_add(options.timeout, timeout_err, NULL); } if (!options.api_call) { return 0; } if (safe_str_eq(options.api_call, "exec")) { rc = lrmd_conn->cmds->exec(lrmd_conn, options.rsc_id, options.action, NULL, options.interval_ms, options.timeout, options.start_delay, exec_call_opts, options.params); if (rc > 0) { exec_call_id = rc; print_result(printf("API-CALL 'exec' action pending, waiting on response\n")); } } else if (safe_str_eq(options.api_call, "register_rsc")) { rc = lrmd_conn->cmds->register_rsc(lrmd_conn, options.rsc_id, options.class, options.provider, options.type, 0); } else if (safe_str_eq(options.api_call, "get_rsc_info")) { lrmd_rsc_info_t *rsc_info; rsc_info = lrmd_conn->cmds->get_rsc_info(lrmd_conn, options.rsc_id, 0); if (rsc_info) { print_result(printf("RSC_INFO: id:%s class:%s provider:%s type:%s\n", rsc_info->id, rsc_info->standard, rsc_info->provider ? rsc_info->provider : "", rsc_info->type)); lrmd_free_rsc_info(rsc_info); rc = pcmk_ok; } else { rc = -1; } } else if (safe_str_eq(options.api_call, "unregister_rsc")) { rc = lrmd_conn->cmds->unregister_rsc(lrmd_conn, options.rsc_id, 0); } else if (safe_str_eq(options.api_call, "cancel")) { rc = lrmd_conn->cmds->cancel(lrmd_conn, options.rsc_id, options.action, options.interval_ms); } else if (safe_str_eq(options.api_call, "metadata")) { char *output = NULL; rc = lrmd_conn->cmds->get_metadata(lrmd_conn, options.class, options.provider, options.type, &output, 0); if (rc == pcmk_ok) { print_result(printf("%s", output)); free(output); } } else if (safe_str_eq(options.api_call, "list_agents")) { lrmd_list_t *list = NULL; lrmd_list_t *iter = NULL; rc = lrmd_conn->cmds->list_agents(lrmd_conn, &list, options.class, options.provider); if (rc > 0) { print_result(printf("%d agents found\n", rc)); for (iter = list; iter != NULL; iter = iter->next) { print_result(printf("%s\n", iter->val)); } lrmd_list_freeall(list); rc = 0; } else { print_result(printf("API_CALL FAILURE - no agents found\n")); rc = -1; } } else if (safe_str_eq(options.api_call, "list_ocf_providers")) { lrmd_list_t *list = NULL; lrmd_list_t *iter = NULL; rc = lrmd_conn->cmds->list_ocf_providers(lrmd_conn, options.type, &list); if (rc > 0) { print_result(printf("%d providers found\n", rc)); for (iter = list; iter != NULL; iter = iter->next) { print_result(printf("%s\n", iter->val)); } lrmd_list_freeall(list); rc = 0; } else { print_result(printf("API_CALL FAILURE - no providers found\n")); rc = -1; } } else if (safe_str_eq(options.api_call, "list_standards")) { lrmd_list_t *list = NULL; lrmd_list_t *iter = NULL; rc = lrmd_conn->cmds->list_standards(lrmd_conn, &list); if (rc > 0) { print_result(printf("%d standards found\n", rc)); for (iter = list; iter != NULL; iter = iter->next) { print_result(printf("%s\n", iter->val)); } lrmd_list_freeall(list); rc = 0; } else { print_result(printf("API_CALL FAILURE - no providers found\n")); rc = -1; } } else if (safe_str_eq(options.api_call, "get_recurring_ops")) { GList *op_list = NULL; GList *op_item = NULL; rc = lrmd_conn->cmds->get_recurring_ops(lrmd_conn, options.rsc_id, 0, 0, &op_list); for (op_item = op_list; op_item != NULL; op_item = op_item->next) { lrmd_op_info_t *op_info = op_item->data; print_result(printf("RECURRING_OP: %s_%s_%s timeout=%sms\n", op_info->rsc_id, op_info->action, op_info->interval_ms_s, op_info->timeout_ms_s)); lrmd_free_op_info(op_info); } g_list_free(op_list); } else if (options.api_call) { print_result(printf("API-CALL FAILURE unknown action '%s'\n", options.action)); test_exit(CRM_EX_ERROR); } if (rc < 0) { print_result(printf("API-CALL FAILURE for '%s' api_rc:%d\n", options.api_call, rc)); test_exit(CRM_EX_ERROR); } if (options.api_call && rc == pcmk_ok) { print_result(printf("API-CALL SUCCESSFUL for '%s'\n", options.api_call)); if (!options.listen) { test_exit(CRM_EX_OK); } } if (options.no_wait) { /* just make the call and exit regardless of anything else. */ test_exit(CRM_EX_OK); } return 0; } static int generate_params(void) { int rc = 0; pe_working_set_t data_set; xmlNode *cib_xml_copy = NULL; resource_t *rsc = NULL; GHashTable *params = NULL; GHashTable *meta = NULL; GHashTableIter iter; if (options.params) { return 0; } set_working_set_defaults(&data_set); cib_conn = cib_new(); rc = cib_conn->cmds->signon(cib_conn, "cts-exec-helper", cib_query); if (rc != pcmk_ok) { crm_err("Could not connect to the CIB manager: %s", pcmk_strerror(rc)); rc = -1; goto param_gen_bail; } rc = cib_conn->cmds->query(cib_conn, NULL, &cib_xml_copy, cib_scope_local | cib_sync_call); if (rc != pcmk_ok) { crm_err("Error retrieving cib copy: %s (%d)", pcmk_strerror(rc), rc); goto param_gen_bail; } else if (cib_xml_copy == NULL) { rc = -ENODATA; crm_err("Error retrieving cib copy: %s (%d)", pcmk_strerror(rc), rc); goto param_gen_bail; } if (cli_config_update(&cib_xml_copy, NULL, FALSE) == FALSE) { crm_err("Error updating cib configuration"); rc = -1; goto param_gen_bail; } data_set.input = cib_xml_copy; data_set.now = crm_time_new(NULL); cluster_status(&data_set); if (options.rsc_id) { rsc = pe_find_resource_with_flags(data_set.resources, options.rsc_id, pe_find_renamed|pe_find_any); } if (!rsc) { crm_err("Resource does not exist in config"); rc = -1; goto param_gen_bail; } params = crm_str_table_new(); meta = crm_str_table_new(); get_rsc_attributes(params, rsc, NULL, &data_set); get_meta_attributes(meta, rsc, NULL, &data_set); if (params) { char *key = NULL; char *value = NULL; g_hash_table_iter_init(&iter, params); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) { options.params = lrmd_key_value_add(options.params, key, value); } g_hash_table_destroy(params); } if (meta) { char *key = NULL; char *value = NULL; g_hash_table_iter_init(&iter, meta); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) { char *crm_name = crm_meta_name(key); options.params = lrmd_key_value_add(options.params, crm_name, value); free(crm_name); } g_hash_table_destroy(meta); } param_gen_bail: cleanup_alloc_calculations(&data_set); return rc; } int main(int argc, char **argv) { int option_index = 0; int argerr = 0; int flag; char *key = NULL; char *val = NULL; gboolean use_tls = FALSE; crm_trigger_t *trig; + crm_log_cli_init("cts-exec-helper"); crm_set_options(NULL, "mode [options]", long_options, "Inject commands into the executor, and watch for events\n"); while (1) { flag = crm_get_option(argc, argv, &option_index); if (flag == -1) break; switch (flag) { case '?': crm_help(flag, CRM_EX_OK); break; case 'V': options.verbose = 1; break; case 'Q': options.quiet = 1; options.verbose = 0; break; case 'l': options.listen = optarg; break; case 'w': options.no_wait = 1; break; case 'R': options.is_running = 1; break; case 'n': exec_call_opts = lrmd_opt_notify_orig_only; break; case 'o': exec_call_opts = lrmd_opt_notify_changes_only; break; case 'c': options.api_call = optarg; break; case 'a': options.action = optarg; break; case 'r': options.rsc_id = optarg; break; case 'x': if(optarg) { options.cancel_call_id = atoi(optarg); } break; case 'P': options.provider = optarg; break; case 'C': options.class = optarg; break; case 'T': options.type = optarg; break; case 'i': if(optarg) { options.interval_ms = crm_parse_interval_spec(optarg); } break; case 't': if(optarg) { options.timeout = atoi(optarg); } break; case 's': if(optarg) { options.start_delay = atoi(optarg); } break; case 'k': key = optarg; if (key && val) { options.params = lrmd_key_value_add(options.params, key, val); key = val = NULL; } break; case 'v': val = optarg; if (key && val) { options.params = lrmd_key_value_add(options.params, key, val); key = val = NULL; } break; case 'S': use_tls = TRUE; break; default: ++argerr; break; } } if (argerr) { crm_help('?', CRM_EX_USAGE); } if (optind > argc) { ++argerr; } if (!options.listen && (safe_str_eq(options.api_call, "metadata") || safe_str_eq(options.api_call, "list_agents") || safe_str_eq(options.api_call, "list_standards") || safe_str_eq(options.api_call, "list_ocf_providers"))) { options.no_connect = 1; } crm_log_init(NULL, LOG_INFO, TRUE, (options.verbose? TRUE : FALSE), argc, argv, FALSE); if (options.is_running) { if (!options.timeout) { options.timeout = 30000; } options.interval_ms = 0; if (!options.rsc_id) { crm_err("rsc-id must be given when is-running is used"); test_exit(CRM_EX_ERROR); } if (generate_params()) { print_result(printf ("Failed to retrieve rsc parameters from cib, can not determine if rsc is running.\n")); test_exit(CRM_EX_ERROR); } options.api_call = "exec"; options.action = "monitor"; exec_call_opts = lrmd_opt_notify_orig_only; } /* if we can't perform an api_call or listen for events, * there is nothing to do */ if (!options.api_call && !options.listen) { crm_err("Nothing to be done. Please specify 'api-call' and/or 'listen'"); return CRM_EX_OK; } if (use_tls) { lrmd_conn = lrmd_remote_api_new(NULL, "localhost", 0); } else { lrmd_conn = lrmd_api_new(); } trig = mainloop_add_trigger(G_PRIORITY_HIGH, start_test, NULL); mainloop_set_trigger(trig); mainloop_add_signal(SIGTERM, test_shutdown); crm_info("Starting"); mainloop = g_main_loop_new(NULL, FALSE); g_main_loop_run(mainloop); if (cib_conn != NULL) { cib_conn->cmds->signoff(cib_conn); cib_delete(cib_conn); } test_exit(CRM_EX_OK); return CRM_EX_OK; } diff --git a/lib/common/operations.c b/lib/common/operations.c index b642ae40a0..fd8fe965ce 100644 --- a/lib/common/operations.c +++ b/lib/common/operations.c @@ -1,587 +1,587 @@ /* * Copyright 2004-2018 Andrew Beekhof * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #ifndef _GNU_SOURCE # define _GNU_SOURCE #endif #include #include #include #include #include #include #include #include #include /*! * \brief Generate an operation key * * \param[in] rsc_id ID of resource being operated on * \param[in] op_type Operation name * \param[in] interval_ms Operation interval * * \return Newly allocated memory containing operation key as string * * \note It is the caller's responsibility to free() the result. */ char * generate_op_key(const char *rsc_id, const char *op_type, guint interval_ms) { CRM_ASSERT(rsc_id != NULL); CRM_ASSERT(op_type != NULL); return crm_strdup_printf(CRM_OP_FMT, rsc_id, op_type, interval_ms); } gboolean parse_op_key(const char *key, char **rsc_id, char **op_type, guint *interval_ms) { char *notify = NULL; char *mutable_key = NULL; char *mutable_key_ptr = NULL; size_t len = 0, offset = 0; unsigned long long ch = 0; guint local_interval_ms = 0; // Initialize output variables in case of early return if (rsc_id) { *rsc_id = NULL; } if (op_type) { *op_type = NULL; } if (interval_ms) { *interval_ms = 0; } CRM_CHECK(key && *key, return FALSE); // Parse interval at end of string len = strlen(key); offset = len - 1; while ((offset > 0) && isdigit(key[offset])) { ch = key[offset] - '0'; for (int digits = len - offset; digits > 1; --digits) { ch = ch * 10; } local_interval_ms += ch; offset--; } crm_trace("Operation key '%s' has interval %ums", key, local_interval_ms); if (interval_ms) { *interval_ms = local_interval_ms; } CRM_CHECK((offset != (len - 1)) && (key[offset] == '_'), return FALSE); mutable_key = strndup(key, offset); offset--; while (offset > 0 && key[offset] != '_') { offset--; } CRM_CHECK(key[offset] == '_', free(mutable_key); return FALSE); mutable_key_ptr = mutable_key + offset + 1; crm_trace(" Action: %s", mutable_key_ptr); if (op_type) { *op_type = strdup(mutable_key_ptr); } mutable_key[offset] = 0; offset--; notify = strstr(mutable_key, "_post_notify"); if (notify && safe_str_eq(notify, "_post_notify")) { notify[0] = 0; } notify = strstr(mutable_key, "_pre_notify"); if (notify && safe_str_eq(notify, "_pre_notify")) { notify[0] = 0; } crm_trace(" Resource: %s", mutable_key); if (rsc_id) { *rsc_id = mutable_key; } else { free(mutable_key); } return TRUE; } char * generate_notify_key(const char *rsc_id, const char *notify_type, const char *op_type) { CRM_CHECK(rsc_id != NULL, return NULL); CRM_CHECK(op_type != NULL, return NULL); CRM_CHECK(notify_type != NULL, return NULL); return crm_strdup_printf("%s_%s_notify_%s_0", rsc_id, notify_type, op_type); } char * generate_transition_magic(const char *transition_key, int op_status, int op_rc) { CRM_CHECK(transition_key != NULL, return NULL); return crm_strdup_printf("%d:%d;%s", op_status, op_rc, transition_key); } gboolean decode_transition_magic(const char *magic, char **uuid, int *transition_id, int *action_id, int *op_status, int *op_rc, int *target_rc) { int res = 0; char *key = NULL; gboolean result = TRUE; CRM_CHECK(magic != NULL, return FALSE); CRM_CHECK(op_rc != NULL, return FALSE); CRM_CHECK(op_status != NULL, return FALSE); key = calloc(1, strlen(magic) + 1); res = sscanf(magic, "%d:%d;%s", op_status, op_rc, key); if (res != 3) { crm_warn("Only found %d items in: '%s'", res, magic); free(key); return FALSE; } CRM_CHECK(decode_transition_key(key, uuid, transition_id, action_id, target_rc), result = FALSE); free(key); return result; } char * generate_transition_key(int transition_id, int action_id, int target_rc, const char *node) { CRM_CHECK(node != NULL, return NULL); return crm_strdup_printf("%d:%d:%d:%-*s", action_id, transition_id, target_rc, 36, node); } gboolean decode_transition_key(const char *key, char **uuid, int *transition_id, int *action_id, int *target_rc) { CRM_CHECK(uuid != NULL, return FALSE); CRM_CHECK(target_rc != NULL, return FALSE); CRM_CHECK(action_id != NULL, return FALSE); CRM_CHECK(transition_id != NULL, return FALSE); *uuid = calloc(37, sizeof(char)); if (sscanf(key, "%d:%d:%d:%36s", action_id, transition_id, target_rc, *uuid) != 4) { crm_err("Invalid transition key '%s'", key); free(*uuid); *uuid = NULL; *target_rc = -1; *action_id = -1; *transition_id = -1; return FALSE; } if (strlen(*uuid) != 36) { crm_warn("Invalid UUID '%s' in transition key '%s'", *uuid, key); } return TRUE; } void filter_action_parameters(xmlNode * param_set, const char *version) { char *key = NULL; char *timeout = NULL; char *interval_ms_s = NULL; const char *attr_filter[] = { XML_ATTR_ID, XML_ATTR_CRM_VERSION, XML_LRM_ATTR_OP_DIGEST, XML_LRM_ATTR_TARGET, XML_LRM_ATTR_TARGET_UUID, "pcmk_external_ip" }; gboolean do_delete = FALSE; int lpc = 0; static int meta_len = 0; if (meta_len == 0) { meta_len = strlen(CRM_META); } if (param_set == NULL) { return; } for (lpc = 0; lpc < DIMOF(attr_filter); lpc++) { xml_remove_prop(param_set, attr_filter[lpc]); } key = crm_meta_name(XML_LRM_ATTR_INTERVAL_MS); interval_ms_s = crm_element_value_copy(param_set, key); free(key); key = crm_meta_name(XML_ATTR_TIMEOUT); timeout = crm_element_value_copy(param_set, key); if (param_set) { xmlAttrPtr xIter = param_set->properties; while (xIter) { const char *prop_name = (const char *)xIter->name; xIter = xIter->next; do_delete = FALSE; if (strncasecmp(prop_name, CRM_META, meta_len) == 0) { do_delete = TRUE; } if (do_delete) { xml_remove_prop(param_set, prop_name); } } } if (interval_ms_s && strcmp(interval_ms_s, "0")) { /* Re-instate the operation's timeout value */ if (timeout != NULL) { crm_xml_add(param_set, key, timeout); } } free(interval_ms_s); free(timeout); free(key); } #define FAKE_TE_ID "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" static void append_digest(lrmd_event_data_t * op, xmlNode * update, const char *version, const char *magic, int level) { /* this will enable us to later determine that the * resource's parameters have changed and we should force * a restart */ char *digest = NULL; xmlNode *args_xml = NULL; if (op->params == NULL) { return; } args_xml = create_xml_node(NULL, XML_TAG_PARAMS); g_hash_table_foreach(op->params, hash2field, args_xml); filter_action_parameters(args_xml, version); digest = calculate_operation_digest(args_xml, version); #if 0 if (level < get_crm_log_level() && op->interval_ms == 0 && crm_str_eq(op->op_type, CRMD_ACTION_START, TRUE)) { char *digest_source = dump_xml_unformatted(args_xml); do_crm_log(level, "Calculated digest %s for %s (%s). Source: %s\n", digest, ID(update), magic, digest_source); free(digest_source); } #endif crm_xml_add(update, XML_LRM_ATTR_OP_DIGEST, digest); free_xml(args_xml); free(digest); } int rsc_op_expected_rc(lrmd_event_data_t * op) { int rc = 0; if (op && op->user_data) { int dummy = 0; char *uuid = NULL; decode_transition_key(op->user_data, &uuid, &dummy, &dummy, &rc); free(uuid); } return rc; } gboolean did_rsc_op_fail(lrmd_event_data_t * op, int target_rc) { switch (op->op_status) { case PCMK_LRM_OP_CANCELLED: case PCMK_LRM_OP_PENDING: return FALSE; break; case PCMK_LRM_OP_NOTSUPPORTED: case PCMK_LRM_OP_TIMEOUT: case PCMK_LRM_OP_ERROR: return TRUE; break; default: if (target_rc != op->rc) { return TRUE; } } return FALSE; } /*! * \brief Create a CIB XML element for an operation * * \param[in] parent If not NULL, make new XML node a child of this one * \param[in] prefix Generate an ID using this prefix * \param[in] task Operation task to set * \param[in] interval_spec Operation interval to set * \param[in] timeout If not NULL, operation timeout to set * * \return New XML object on success, NULL otherwise */ xmlNode * crm_create_op_xml(xmlNode *parent, const char *prefix, const char *task, const char *interval_spec, const char *timeout) { xmlNode *xml_op; CRM_CHECK(prefix && task && interval_spec, return NULL); xml_op = create_xml_node(parent, XML_ATTR_OP); crm_xml_set_id(xml_op, "%s-%s-%s", prefix, task, interval_spec); crm_xml_add(xml_op, XML_LRM_ATTR_INTERVAL, interval_spec); crm_xml_add(xml_op, "name", task); if (timeout) { crm_xml_add(xml_op, XML_ATTR_TIMEOUT, timeout); } return xml_op; } xmlNode * create_operation_update(xmlNode * parent, lrmd_event_data_t * op, const char * caller_version, int target_rc, const char * node, const char * origin, int level) { char *key = NULL; char *magic = NULL; char *op_id = NULL; char *op_id_additional = NULL; char *local_user_data = NULL; const char *exit_reason = NULL; xmlNode *xml_op = NULL; const char *task = NULL; CRM_CHECK(op != NULL, return NULL); do_crm_log(level, "%s: Updating resource %s after %s op %s (interval=%u)", origin, op->rsc_id, op->op_type, services_lrm_status_str(op->op_status), op->interval_ms); crm_trace("DC version: %s", caller_version); task = op->op_type; /* Remap the task name under various scenarios, to make life easier for the * PE when determining the current state. */ if (crm_str_eq(task, "reload", TRUE)) { if (op->op_status == PCMK_LRM_OP_DONE) { task = CRMD_ACTION_START; } else { task = CRMD_ACTION_STATUS; } } else if (crm_str_eq(task, CRMD_ACTION_MIGRATE, TRUE)) { /* if the migrate_from fails it will have enough info to do the right thing */ if (op->op_status == PCMK_LRM_OP_DONE) { task = CRMD_ACTION_STOP; } else { task = CRMD_ACTION_STATUS; } } else if ((op->op_status == PCMK_LRM_OP_DONE) && crm_str_eq(task, CRMD_ACTION_MIGRATED, TRUE)) { task = CRMD_ACTION_START; } key = generate_op_key(op->rsc_id, task, op->interval_ms); if (crm_str_eq(task, CRMD_ACTION_NOTIFY, TRUE)) { const char *n_type = crm_meta_value(op->params, "notify_type"); const char *n_task = crm_meta_value(op->params, "notify_operation"); CRM_LOG_ASSERT(n_type != NULL); CRM_LOG_ASSERT(n_task != NULL); op_id = generate_notify_key(op->rsc_id, n_type, n_task); if (op->op_status != PCMK_LRM_OP_PENDING) { /* Ignore notify errors. * - * @TODO We really should keep the actual result here, and ignore it - * when processing the CIB diff. + * @TODO It might be better to keep the correct result here, and + * ignore it in process_graph_event(). */ op->op_status = PCMK_LRM_OP_DONE; op->rc = 0; } } else if (did_rsc_op_fail(op, target_rc)) { op_id = generate_op_key(op->rsc_id, "last_failure", 0); if (op->interval_ms == 0) { // Ensure 'last' gets updated, in case record-pending is true op_id_additional = generate_op_key(op->rsc_id, "last", 0); } exit_reason = op->exit_reason; } else if (op->interval_ms > 0) { op_id = strdup(key); } else { op_id = generate_op_key(op->rsc_id, "last", 0); } again: xml_op = find_entity(parent, XML_LRM_TAG_RSC_OP, op_id); if (xml_op == NULL) { xml_op = create_xml_node(parent, XML_LRM_TAG_RSC_OP); } if (op->user_data == NULL) { crm_debug("Generating fake transition key for: " CRM_OP_FMT " %d from %s", op->rsc_id, op->op_type, op->interval_ms, op->call_id, origin); local_user_data = generate_transition_key(-1, op->call_id, target_rc, FAKE_TE_ID); op->user_data = local_user_data; } if(magic == NULL) { magic = generate_transition_magic(op->user_data, op->op_status, op->rc); } crm_xml_add(xml_op, XML_ATTR_ID, op_id); crm_xml_add(xml_op, XML_LRM_ATTR_TASK_KEY, key); crm_xml_add(xml_op, XML_LRM_ATTR_TASK, task); crm_xml_add(xml_op, XML_ATTR_ORIGIN, origin); crm_xml_add(xml_op, XML_ATTR_CRM_VERSION, caller_version); crm_xml_add(xml_op, XML_ATTR_TRANSITION_KEY, op->user_data); crm_xml_add(xml_op, XML_ATTR_TRANSITION_MAGIC, magic); crm_xml_add(xml_op, XML_LRM_ATTR_EXIT_REASON, exit_reason == NULL ? "" : exit_reason); crm_xml_add(xml_op, XML_LRM_ATTR_TARGET, node); /* For context during triage */ crm_xml_add_int(xml_op, XML_LRM_ATTR_CALLID, op->call_id); crm_xml_add_int(xml_op, XML_LRM_ATTR_RC, op->rc); crm_xml_add_int(xml_op, XML_LRM_ATTR_OPSTATUS, op->op_status); crm_xml_add_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, op->interval_ms); if (compare_version("2.1", caller_version) <= 0) { if (op->t_run || op->t_rcchange || op->exec_time || op->queue_time) { crm_trace("Timing data (" CRM_OP_FMT "): last=%u change=%u exec=%u queue=%u", op->rsc_id, op->op_type, op->interval_ms, op->t_run, op->t_rcchange, op->exec_time, op->queue_time); if (op->interval_ms == 0) { /* The values are the same for non-recurring ops */ crm_xml_add_int(xml_op, XML_RSC_OP_LAST_RUN, op->t_run); crm_xml_add_int(xml_op, XML_RSC_OP_LAST_CHANGE, op->t_run); } else if(op->t_rcchange) { /* last-run is not accurate for recurring ops */ crm_xml_add_int(xml_op, XML_RSC_OP_LAST_CHANGE, op->t_rcchange); } else { /* ...but is better than nothing otherwise */ crm_xml_add_int(xml_op, XML_RSC_OP_LAST_CHANGE, op->t_run); } crm_xml_add_int(xml_op, XML_RSC_OP_T_EXEC, op->exec_time); crm_xml_add_int(xml_op, XML_RSC_OP_T_QUEUE, op->queue_time); } } if (crm_str_eq(op->op_type, CRMD_ACTION_MIGRATE, TRUE) || crm_str_eq(op->op_type, CRMD_ACTION_MIGRATED, TRUE)) { /* * Record migrate_source and migrate_target always for migrate ops. */ const char *name = XML_LRM_ATTR_MIGRATE_SOURCE; crm_xml_add(xml_op, name, crm_meta_value(op->params, name)); name = XML_LRM_ATTR_MIGRATE_TARGET; crm_xml_add(xml_op, name, crm_meta_value(op->params, name)); } append_digest(op, xml_op, caller_version, magic, LOG_DEBUG); if (op_id_additional) { free(op_id); op_id = op_id_additional; op_id_additional = NULL; goto again; } if (local_user_data) { free(local_user_data); op->user_data = NULL; } free(magic); free(op_id); free(key); return xml_op; } /*! * \brief Check whether an operation requires resource agent meta-data * * \param[in] rsc_class Resource agent class (or NULL to skip class check) * \param[in] op Operation action (or NULL to skip op check) * * \return TRUE if operation needs meta-data, FALSE otherwise * \note At least one of rsc_class and op must be specified. */ bool crm_op_needs_metadata(const char *rsc_class, const char *op) { /* Agent meta-data is used to determine whether a reload is possible, and to * evaluate versioned parameters -- so if this op is not relevant to those * features, we don't need the meta-data. */ CRM_CHECK(rsc_class || op, return FALSE); if (rsc_class && strcmp(rsc_class, PCMK_RESOURCE_CLASS_OCF) && strcmp(rsc_class, PCMK_RESOURCE_CLASS_STONITH)) { /* Meta-data is only needed for resource classes that use parameters */ return FALSE; } /* Meta-data is only needed for these actions */ if (op && strcmp(op, CRMD_ACTION_START) && strcmp(op, CRMD_ACTION_STATUS) && strcmp(op, CRMD_ACTION_PROMOTE) && strcmp(op, CRMD_ACTION_DEMOTE) && strcmp(op, CRMD_ACTION_RELOAD) && strcmp(op, CRMD_ACTION_MIGRATE) && strcmp(op, CRMD_ACTION_MIGRATED) && strcmp(op, CRMD_ACTION_NOTIFY)) { return FALSE; } return TRUE; } diff --git a/lib/pengine/clone.c b/lib/pengine/clone.c index 981009c57a..95632bd31e 100644 --- a/lib/pengine/clone.c +++ b/lib/pengine/clone.c @@ -1,653 +1,653 @@ /* * Copyright 2004-2018 Andrew Beekhof * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #define VARIANT_CLONE 1 #include "./variant.h" void force_non_unique_clone(resource_t * rsc, const char *rid, pe_working_set_t * data_set); resource_t *create_child_clone(resource_t * rsc, int sub_id, pe_working_set_t * data_set); static void mark_as_orphan(resource_t * rsc) { GListPtr gIter = rsc->children; set_bit(rsc->flags, pe_rsc_orphan); for (; gIter != NULL; gIter = gIter->next) { resource_t *child = (resource_t *) gIter->data; mark_as_orphan(child); } } void force_non_unique_clone(resource_t * rsc, const char *rid, pe_working_set_t * data_set) { if (pe_rsc_is_clone(rsc)) { clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); crm_config_warn("Clones %s contains non-OCF resource %s and so " "can only be used as an anonymous clone. " "Set the " XML_RSC_ATTR_UNIQUE " meta attribute to false", rsc->id, rid); clone_data->clone_node_max = 1; clone_data->clone_max = g_list_length(data_set->nodes); clear_bit_recursive(rsc, pe_rsc_unique); } } resource_t * find_clone_instance(resource_t * rsc, const char *sub_id, pe_working_set_t * data_set) { char *child_id = NULL; resource_t *child = NULL; const char *child_base = NULL; clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); child_base = ID(clone_data->xml_obj_child); child_id = crm_concat(child_base, sub_id, ':'); child = pe_find_resource(rsc->children, child_id); free(child_id); return child; } resource_t * create_child_clone(resource_t * rsc, int sub_id, pe_working_set_t * data_set) { gboolean as_orphan = FALSE; char *inc_num = NULL; char *inc_max = NULL; resource_t *child_rsc = NULL; xmlNode *child_copy = NULL; clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); CRM_CHECK(clone_data->xml_obj_child != NULL, return FALSE); if (sub_id < 0) { as_orphan = TRUE; sub_id = clone_data->total_clones; } inc_num = crm_itoa(sub_id); inc_max = crm_itoa(clone_data->clone_max); child_copy = copy_xml(clone_data->xml_obj_child); crm_xml_add(child_copy, XML_RSC_ATTR_INCARNATION, inc_num); if (common_unpack(child_copy, &child_rsc, rsc, data_set) == FALSE) { pe_err("Failed unpacking resource %s", crm_element_value(child_copy, XML_ATTR_ID)); child_rsc = NULL; goto bail; } /* child_rsc->globally_unique = rsc->globally_unique; */ CRM_ASSERT(child_rsc); clone_data->total_clones += 1; pe_rsc_trace(child_rsc, "Setting clone attributes for: %s", child_rsc->id); rsc->children = g_list_append(rsc->children, child_rsc); if (as_orphan) { mark_as_orphan(child_rsc); } add_hash_param(child_rsc->meta, XML_RSC_ATTR_INCARNATION_MAX, inc_max); print_resource(LOG_TRACE, "Added ", child_rsc, FALSE); bail: free(inc_num); free(inc_max); return child_rsc; } gboolean clone_unpack(resource_t * rsc, pe_working_set_t * data_set) { int lpc = 0; xmlNode *a_child = NULL; xmlNode *xml_obj = rsc->xml; clone_variant_data_t *clone_data = NULL; const char *ordered = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_ORDERED); const char *interleave = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INTERLEAVE); const char *max_clones = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INCARNATION_MAX); const char *max_clones_node = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INCARNATION_NODEMAX); pe_rsc_trace(rsc, "Processing resource %s...", rsc->id); clone_data = calloc(1, sizeof(clone_variant_data_t)); rsc->variant_opaque = clone_data; if (is_set(rsc->flags, pe_rsc_promotable)) { const char *promoted_max = NULL; const char *promoted_node_max = NULL; promoted_max = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_PROMOTED_MAX); if (promoted_max == NULL) { // @COMPAT deprecated since 2.0.0 promoted_max = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_MASTER_MAX); } promoted_node_max = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_PROMOTED_NODEMAX); if (promoted_node_max == NULL) { // @COMPAT deprecated since 2.0.0 promoted_node_max = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_MASTER_NODEMAX); } clone_data->promoted_max = crm_parse_int(promoted_max, "1"); clone_data->promoted_node_max = crm_parse_int(promoted_node_max, "1"); } - clone_data->active_clones = 0; - clone_data->xml_obj_child = NULL; + // Implied by calloc() + /* clone_data->xml_obj_child = NULL; */ + clone_data->clone_node_max = crm_parse_int(max_clones_node, "1"); if (max_clones) { clone_data->clone_max = crm_parse_int(max_clones, "1"); } else if (g_list_length(data_set->nodes) > 0) { clone_data->clone_max = g_list_length(data_set->nodes); } else { clone_data->clone_max = 1; /* Handy during crm_verify */ } clone_data->interleave = crm_is_true(interleave); clone_data->ordered = crm_is_true(ordered); if ((rsc->flags & pe_rsc_unique) == 0 && clone_data->clone_node_max > 1) { crm_config_err("Anonymous clones (%s) may only support one copy per node", rsc->id); clone_data->clone_node_max = 1; } pe_rsc_trace(rsc, "Options for %s", rsc->id); pe_rsc_trace(rsc, "\tClone max: %d", clone_data->clone_max); pe_rsc_trace(rsc, "\tClone node max: %d", clone_data->clone_node_max); pe_rsc_trace(rsc, "\tClone is unique: %s", is_set(rsc->flags, pe_rsc_unique) ? "true" : "false"); pe_rsc_trace(rsc, "\tClone is promotable: %s", is_set(rsc->flags, pe_rsc_promotable) ? "true" : "false"); // Clones may contain a single group or primitive for (a_child = __xml_first_child(xml_obj); a_child != NULL; a_child = __xml_next_element(a_child)) { if (crm_str_eq((const char *)a_child->name, XML_CIB_TAG_RESOURCE, TRUE) || crm_str_eq((const char *)a_child->name, XML_CIB_TAG_GROUP, TRUE)) { clone_data->xml_obj_child = a_child; break; } } if (clone_data->xml_obj_child == NULL) { crm_config_err("%s has nothing to clone", rsc->id); return FALSE; } /* * Make clones ever so slightly sticky by default * * This helps ensure clone instances are not shuffled around the cluster * for no benefit in situations when pre-allocation is not appropriate */ if (g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_STICKINESS) == NULL) { add_hash_param(rsc->meta, XML_RSC_ATTR_STICKINESS, "1"); } pe_rsc_trace(rsc, "\tClone is unique (fixed): %s", is_set(rsc->flags, pe_rsc_unique) ? "true" : "false"); - clone_data->notify_confirm = is_set(rsc->flags, pe_rsc_notify); add_hash_param(rsc->meta, XML_RSC_ATTR_UNIQUE, is_set(rsc->flags, pe_rsc_unique) ? XML_BOOLEAN_TRUE : XML_BOOLEAN_FALSE); for (lpc = 0; lpc < clone_data->clone_max; lpc++) { if (create_child_clone(rsc, lpc, data_set) == NULL) { return FALSE; } } if (clone_data->clone_max == 0) { /* create one so that unpack_find_resource() will hook up * any orphans up to the parent correctly */ if (create_child_clone(rsc, -1, data_set) == NULL) { return FALSE; } } pe_rsc_trace(rsc, "Added %d children to resource %s...", clone_data->clone_max, rsc->id); return TRUE; } gboolean clone_active(resource_t * rsc, gboolean all) { GListPtr gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; gboolean child_active = child_rsc->fns->active(child_rsc, all); if (all == FALSE && child_active) { return TRUE; } else if (all && child_active == FALSE) { return FALSE; } } if (all) { return TRUE; } else { return FALSE; } } static void short_print(char *list, const char *prefix, const char *type, const char *suffix, long options, void *print_data) { if(suffix == NULL) { suffix = ""; } if (list) { if (options & pe_print_html) { status_print("
  • "); } status_print("%s%s: [%s ]%s", prefix, type, list, suffix); if (options & pe_print_html) { status_print("
  • \n"); } else if (options & pe_print_suppres_nl) { /* nothing */ } else if ((options & pe_print_printf) || (options & pe_print_ncurses)) { status_print("\n"); } } } static const char * configured_role_str(resource_t * rsc) { const char *target_role = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET_ROLE); if ((target_role == NULL) && rsc->children && rsc->children->data) { target_role = g_hash_table_lookup(((resource_t*)rsc->children->data)->meta, XML_RSC_ATTR_TARGET_ROLE); } return target_role; } static enum rsc_role_e configured_role(resource_t * rsc) { const char *target_role = configured_role_str(rsc); if (target_role) { return text2role(target_role); } return RSC_ROLE_UNKNOWN; } static void clone_print_xml(resource_t * rsc, const char *pre_text, long options, void *print_data) { char *child_text = crm_concat(pre_text, " ", ' '); const char *target_role = configured_role_str(rsc); GListPtr gIter = rsc->children; status_print("%sid); status_print("multi_state=\"%s\" ", is_set(rsc->flags, pe_rsc_promotable)? "true" : "false"); status_print("unique=\"%s\" ", is_set(rsc->flags, pe_rsc_unique) ? "true" : "false"); status_print("managed=\"%s\" ", is_set(rsc->flags, pe_rsc_managed) ? "true" : "false"); status_print("failed=\"%s\" ", is_set(rsc->flags, pe_rsc_failed) ? "true" : "false"); status_print("failure_ignored=\"%s\" ", is_set(rsc->flags, pe_rsc_failure_ignored) ? "true" : "false"); if (target_role) { status_print("target_role=\"%s\" ", target_role); } status_print(">\n"); for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; child_rsc->fns->print(child_rsc, child_text, options, print_data); } status_print("%s\n", pre_text); free(child_text); } bool is_set_recursive(resource_t * rsc, long long flag, bool any) { GListPtr gIter; bool all = !any; if(is_set(rsc->flags, flag)) { if(any) { return TRUE; } } else if(all) { return FALSE; } for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { if(is_set_recursive(gIter->data, flag, any)) { if(any) { return TRUE; } } else if(all) { return FALSE; } } if(all) { return TRUE; } return FALSE; } void clone_print(resource_t * rsc, const char *pre_text, long options, void *print_data) { char *list_text = NULL; char *child_text = NULL; char *stopped_list = NULL; const char *type = "Clone"; GListPtr master_list = NULL; GListPtr started_list = NULL; GListPtr gIter = rsc->children; clone_variant_data_t *clone_data = NULL; int active_instances = 0; if (pre_text == NULL) { pre_text = " "; } if (options & pe_print_xml) { clone_print_xml(rsc, pre_text, options, print_data); return; } get_clone_variant_data(clone_data, rsc); child_text = crm_concat(pre_text, " ", ' '); if (is_set(rsc->flags, pe_rsc_promotable)) { type = "Master/Slave"; } status_print("%s%s Set: %s [%s]%s%s", pre_text ? pre_text : "", type, rsc->id, ID(clone_data->xml_obj_child), is_set(rsc->flags, pe_rsc_unique) ? " (unique)" : "", is_set(rsc->flags, pe_rsc_managed) ? "" : " (unmanaged)"); if (options & pe_print_html) { status_print("\n
      \n"); } else if ((options & pe_print_log) == 0) { status_print("\n"); } for (; gIter != NULL; gIter = gIter->next) { gboolean print_full = FALSE; resource_t *child_rsc = (resource_t *) gIter->data; if (options & pe_print_clone_details) { print_full = TRUE; } if (child_rsc->fns->active(child_rsc, FALSE) == FALSE) { /* Inactive clone */ if (is_set(child_rsc->flags, pe_rsc_orphan)) { continue; } else if (is_set(rsc->flags, pe_rsc_unique)) { print_full = TRUE; } else if (is_not_set(options, pe_print_clone_active)) { stopped_list = add_list_element(stopped_list, child_rsc->id); } } else if (is_set_recursive(child_rsc, pe_rsc_unique, TRUE) || is_set_recursive(child_rsc, pe_rsc_orphan, TRUE) || is_set_recursive(child_rsc, pe_rsc_managed, FALSE) == FALSE || is_set_recursive(child_rsc, pe_rsc_failed, TRUE)) { /* Unique, unmanaged or failed clone */ print_full = TRUE; } else if (is_set(options, pe_print_pending) && child_rsc->pending_task != NULL) { /* In a pending state */ print_full = TRUE; } else if (child_rsc->fns->active(child_rsc, TRUE)) { /* Fully active anonymous clone */ node_t *location = child_rsc->fns->location(child_rsc, NULL, TRUE); if (location) { enum rsc_role_e a_role = child_rsc->fns->state(child_rsc, TRUE); if (location->details->online == FALSE && location->details->unclean) { print_full = TRUE; } else if (a_role > RSC_ROLE_SLAVE) { /* And active on a single node as master */ master_list = g_list_append(master_list, location); } else { /* And active on a single node as started/slave */ started_list = g_list_append(started_list, location); } } else { /* uncolocated group - bleh */ print_full = TRUE; } } else { /* Partially active anonymous clone */ print_full = TRUE; } if (print_full) { if (options & pe_print_html) { status_print("
    • \n"); } child_rsc->fns->print(child_rsc, child_text, options, print_data); if (options & pe_print_html) { status_print("
    • \n"); } } } /* Masters */ master_list = g_list_sort(master_list, sort_node_uname); for (gIter = master_list; gIter; gIter = gIter->next) { node_t *host = gIter->data; list_text = add_list_element(list_text, host->details->uname); active_instances++; } short_print(list_text, child_text, "Masters", NULL, options, print_data); g_list_free(master_list); free(list_text); list_text = NULL; /* Started/Slaves */ started_list = g_list_sort(started_list, sort_node_uname); for (gIter = started_list; gIter; gIter = gIter->next) { node_t *host = gIter->data; list_text = add_list_element(list_text, host->details->uname); active_instances++; } if (is_set(rsc->flags, pe_rsc_promotable)) { enum rsc_role_e role = configured_role(rsc); if(role == RSC_ROLE_SLAVE) { short_print(list_text, child_text, "Slaves (target-role)", NULL, options, print_data); } else { short_print(list_text, child_text, "Slaves", NULL, options, print_data); } } else { short_print(list_text, child_text, "Started", NULL, options, print_data); } g_list_free(started_list); free(list_text); list_text = NULL; if (is_not_set(options, pe_print_clone_active)) { const char *state = "Stopped"; enum rsc_role_e role = configured_role(rsc); if (role == RSC_ROLE_STOPPED) { state = "Stopped (disabled)"; } if (is_not_set(rsc->flags, pe_rsc_unique) && (clone_data->clone_max > active_instances)) { GListPtr nIter; GListPtr list = g_hash_table_get_values(rsc->allowed_nodes); /* Custom stopped list for non-unique clones */ free(stopped_list); stopped_list = NULL; if (g_list_length(list) == 0) { /* Clusters with symmetrical=false haven't calculated allowed_nodes yet * If we've not probed for them yet, the Stopped list will be empty */ list = g_hash_table_get_values(rsc->known_on); } list = g_list_sort(list, sort_node_uname); for (nIter = list; nIter != NULL; nIter = nIter->next) { node_t *node = (node_t *)nIter->data; if (pe_find_node(rsc->running_on, node->details->uname) == NULL) { stopped_list = add_list_element(stopped_list, node->details->uname); } } g_list_free(list); } short_print(stopped_list, child_text, state, NULL, options, print_data); free(stopped_list); } if (options & pe_print_html) { status_print("
    \n"); } free(child_text); } void clone_free(resource_t * rsc) { clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); pe_rsc_trace(rsc, "Freeing %s", rsc->id); for (GListPtr gIter = rsc->children; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; CRM_ASSERT(child_rsc); pe_rsc_trace(child_rsc, "Freeing child %s", child_rsc->id); free_xml(child_rsc->xml); child_rsc->xml = NULL; /* There could be a saved unexpanded xml */ free_xml(child_rsc->orig_xml); child_rsc->orig_xml = NULL; child_rsc->fns->free(child_rsc); } g_list_free(rsc->children); if (clone_data) { CRM_ASSERT(clone_data->demote_notify == NULL); CRM_ASSERT(clone_data->stop_notify == NULL); CRM_ASSERT(clone_data->start_notify == NULL); CRM_ASSERT(clone_data->promote_notify == NULL); } common_free(rsc); } enum rsc_role_e clone_resource_state(const resource_t * rsc, gboolean current) { enum rsc_role_e clone_role = RSC_ROLE_UNKNOWN; GListPtr gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; enum rsc_role_e a_role = child_rsc->fns->state(child_rsc, current); if (a_role > clone_role) { clone_role = a_role; } } pe_rsc_trace(rsc, "%s role: %s", rsc->id, role2text(clone_role)); return clone_role; } /*! * \internal * \brief Check whether a clone has an instance for every node * * \param[in] rsc Clone to check * \param[in] data_set Cluster state */ bool pe__is_universal_clone(pe_resource_t *rsc, pe_working_set_t *data_set) { if (pe_rsc_is_clone(rsc)) { clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); if (clone_data->clone_max == g_list_length(data_set->nodes)) { return TRUE; } } return FALSE; } diff --git a/lib/pengine/native.c b/lib/pengine/native.c index 08e79446c8..fe43362d05 100644 --- a/lib/pengine/native.c +++ b/lib/pengine/native.c @@ -1,964 +1,960 @@ /* * Copyright 2004-2018 Andrew Beekhof * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #define VARIANT_NATIVE 1 #include "./variant.h" /*! * \internal * \brief Check whether a resource is active on multiple nodes */ static bool is_multiply_active(pe_resource_t *rsc) { unsigned int count = 0; if (rsc->variant == pe_native) { pe__find_active_requires(rsc, &count); } return count > 1; } void native_add_running(resource_t * rsc, node_t * node, pe_working_set_t * data_set) { GListPtr gIter = rsc->running_on; CRM_CHECK(node != NULL, return); for (; gIter != NULL; gIter = gIter->next) { node_t *a_node = (node_t *) gIter->data; CRM_CHECK(a_node != NULL, return); if (safe_str_eq(a_node->details->id, node->details->id)) { return; } } pe_rsc_trace(rsc, "Adding %s to %s %s", rsc->id, node->details->uname, is_set(rsc->flags, pe_rsc_managed)?"":"(unmanaged)"); rsc->running_on = g_list_append(rsc->running_on, node); if (rsc->variant == pe_native) { node->details->running_rsc = g_list_append(node->details->running_rsc, rsc); } if (rsc->variant == pe_native && node->details->maintenance) { clear_bit(rsc->flags, pe_rsc_managed); } if (is_not_set(rsc->flags, pe_rsc_managed)) { resource_t *p = rsc->parent; pe_rsc_info(rsc, "resource %s isn't managed", rsc->id); resource_location(rsc, node, INFINITY, "not_managed_default", data_set); while(p && node->details->online) { /* add without the additional location constraint */ p->running_on = g_list_append(p->running_on, node); p = p->parent; } return; } if (is_multiply_active(rsc)) { switch (rsc->recovery_type) { case recovery_stop_only: { GHashTableIter gIter; node_t *local_node = NULL; /* make sure it doesn't come up again */ if (rsc->allowed_nodes != NULL) { g_hash_table_destroy(rsc->allowed_nodes); } rsc->allowed_nodes = node_hash_from_list(data_set->nodes); g_hash_table_iter_init(&gIter, rsc->allowed_nodes); while (g_hash_table_iter_next(&gIter, NULL, (void **)&local_node)) { local_node->weight = -INFINITY; } } break; case recovery_stop_start: break; case recovery_block: clear_bit(rsc->flags, pe_rsc_managed); set_bit(rsc->flags, pe_rsc_block); /* If the resource belongs to a group or bundle configured with * multiple-active=block, block the entire entity. */ if (rsc->parent && (rsc->parent->variant == pe_group || rsc->parent->variant == pe_container) && rsc->parent->recovery_type == recovery_block) { GListPtr gIter = rsc->parent->children; for (; gIter != NULL; gIter = gIter->next) { resource_t *child = (resource_t *) gIter->data; clear_bit(child->flags, pe_rsc_managed); set_bit(child->flags, pe_rsc_block); } } break; } crm_debug("%s is active on multiple nodes including %s: %s", rsc->id, node->details->uname, recovery2text(rsc->recovery_type)); } else { pe_rsc_trace(rsc, "Resource %s is active on: %s", rsc->id, node->details->uname); } if (rsc->parent != NULL) { native_add_running(rsc->parent, node, data_set); } } extern void force_non_unique_clone(resource_t * rsc, const char *rid, pe_working_set_t * data_set); gboolean native_unpack(resource_t * rsc, pe_working_set_t * data_set) { resource_t *parent = uber_parent(rsc); native_variant_data_t *native_data = NULL; const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); pe_rsc_trace(rsc, "Processing resource %s...", rsc->id); native_data = calloc(1, sizeof(native_variant_data_t)); rsc->variant_opaque = native_data; // Cloned LSB primitives must be anonymous if (is_set(rsc->flags, pe_rsc_unique) && rsc->parent && safe_str_eq(class, PCMK_RESOURCE_CLASS_LSB)) { force_non_unique_clone(parent, rsc->id, data_set); } // Only OCF primitives can be promotable clones if (is_set(parent->flags, pe_rsc_promotable) && safe_str_neq(class, PCMK_RESOURCE_CLASS_OCF)) { pe_err("Resource %s is of type %s and therefore " "cannot be used as a promotable clone resource", rsc->id, class); return FALSE; } return TRUE; } static bool rsc_is_on_node(resource_t *rsc, const node_t *node, int flags) { pe_rsc_trace(rsc, "Checking whether %s is on %s", rsc->id, node->details->uname); if (is_set(flags, pe_find_current) && rsc->running_on) { for (GListPtr iter = rsc->running_on; iter; iter = iter->next) { node_t *loc = (node_t *) iter->data; if (loc->details == node->details) { return TRUE; } } } else if (is_set(flags, pe_find_inactive) && (rsc->running_on == NULL)) { return TRUE; } else if (is_not_set(flags, pe_find_current) && rsc->allocated_to && (rsc->allocated_to->details == node->details)) { return TRUE; } return FALSE; } resource_t * native_find_rsc(resource_t * rsc, const char *id, const node_t *on_node, int flags) { bool match = FALSE; resource_t *result = NULL; CRM_CHECK(id && rsc && rsc->id, return NULL); if (flags & pe_find_clone) { const char *rid = ID(rsc->xml); if (!pe_rsc_is_clone(uber_parent(rsc))) { match = FALSE; } else if (!strcmp(id, rsc->id) || safe_str_eq(id, rid)) { match = TRUE; } } else if (!strcmp(id, rsc->id)) { match = TRUE; } else if (is_set(flags, pe_find_renamed) && rsc->clone_name && strcmp(rsc->clone_name, id) == 0) { match = TRUE; } else if (is_set(flags, pe_find_any) || (is_set(flags, pe_find_anon) && is_not_set(rsc->flags, pe_rsc_unique))) { match = pe_base_name_eq(rsc, id); } if (match && on_node) { bool match_node = rsc_is_on_node(rsc, on_node, flags); if (match_node == FALSE) { match = FALSE; } } if (match) { return rsc; } for (GListPtr gIter = rsc->children; gIter != NULL; gIter = gIter->next) { resource_t *child = (resource_t *) gIter->data; result = rsc->fns->find_rsc(child, id, on_node, flags); if (result) { return result; } } return NULL; } char * native_parameter(resource_t * rsc, node_t * node, gboolean create, const char *name, pe_working_set_t * data_set) { char *value_copy = NULL; const char *value = NULL; GHashTable *hash = NULL; GHashTable *local_hash = NULL; CRM_CHECK(rsc != NULL, return NULL); CRM_CHECK(name != NULL && strlen(name) != 0, return NULL); pe_rsc_trace(rsc, "Looking up %s in %s", name, rsc->id); if (create || g_hash_table_size(rsc->parameters) == 0) { if (node != NULL) { pe_rsc_trace(rsc, "Creating hash with node %s", node->details->uname); } else { pe_rsc_trace(rsc, "Creating default hash"); } local_hash = crm_str_table_new(); get_rsc_attributes(local_hash, rsc, node, data_set); hash = local_hash; } else { hash = rsc->parameters; } value = g_hash_table_lookup(hash, name); if (value == NULL) { /* try meta attributes instead */ value = g_hash_table_lookup(rsc->meta, name); } if (value != NULL) { value_copy = strdup(value); } if (local_hash != NULL) { g_hash_table_destroy(local_hash); } return value_copy; } gboolean native_active(resource_t * rsc, gboolean all) { GListPtr gIter = rsc->running_on; for (; gIter != NULL; gIter = gIter->next) { node_t *a_node = (node_t *) gIter->data; if (a_node->details->unclean) { crm_debug("Resource %s: node %s is unclean", rsc->id, a_node->details->uname); return TRUE; } else if (a_node->details->online == FALSE) { crm_debug("Resource %s: node %s is offline", rsc->id, a_node->details->uname); } else { crm_debug("Resource %s active on %s", rsc->id, a_node->details->uname); return TRUE; } } return FALSE; } struct print_data_s { long options; void *print_data; }; static void native_print_attr(gpointer key, gpointer value, gpointer user_data) { long options = ((struct print_data_s *)user_data)->options; void *print_data = ((struct print_data_s *)user_data)->print_data; status_print("Option: %s = %s\n", (char *)key, (char *)value); } static const char * native_pending_state(resource_t * rsc) { const char *pending_state = NULL; if (safe_str_eq(rsc->pending_task, CRMD_ACTION_START)) { pending_state = "Starting"; } else if (safe_str_eq(rsc->pending_task, CRMD_ACTION_STOP)) { pending_state = "Stopping"; } else if (safe_str_eq(rsc->pending_task, CRMD_ACTION_MIGRATE)) { pending_state = "Migrating"; } else if (safe_str_eq(rsc->pending_task, CRMD_ACTION_MIGRATED)) { /* Work might be done in here. */ pending_state = "Migrating"; } else if (safe_str_eq(rsc->pending_task, CRMD_ACTION_PROMOTE)) { pending_state = "Promoting"; } else if (safe_str_eq(rsc->pending_task, CRMD_ACTION_DEMOTE)) { pending_state = "Demoting"; } return pending_state; } static const char * native_pending_task(resource_t * rsc) { const char *pending_task = NULL; - if (safe_str_eq(rsc->pending_task, CRMD_ACTION_NOTIFY)) { - /* "Notifying" is not very useful to be shown. */ - pending_task = NULL; - - } else if (safe_str_eq(rsc->pending_task, CRMD_ACTION_STATUS)) { + if (safe_str_eq(rsc->pending_task, CRMD_ACTION_STATUS)) { pending_task = "Monitoring"; /* Pending probes are not printed, even if pending * operations are requested. If someone ever requests that * behavior, uncomment this and the corresponding part of * unpack.c:unpack_rsc_op(). */ /* } else if (safe_str_eq(rsc->pending_task, "probe")) { pending_task = "Checking"; */ } return pending_task; } static enum rsc_role_e native_displayable_role(resource_t *rsc) { enum rsc_role_e role = rsc->role; if ((role == RSC_ROLE_STARTED) && is_set(uber_parent(rsc)->flags, pe_rsc_promotable)) { role = RSC_ROLE_SLAVE; } return role; } static const char * native_displayable_state(resource_t *rsc, long options) { const char *rsc_state = NULL; if (options & pe_print_pending) { rsc_state = native_pending_state(rsc); } if (rsc_state == NULL) { rsc_state = role2text(native_displayable_role(rsc)); } return rsc_state; } static void native_print_xml(resource_t * rsc, const char *pre_text, long options, void *print_data) { const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); const char *prov = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER); const char *rsc_state = native_displayable_state(rsc, options); const char *target_role = NULL; /* resource information. */ status_print("%sxml, XML_ATTR_TYPE)); status_print("role=\"%s\" ", rsc_state); if (rsc->meta) { target_role = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET_ROLE); } if (target_role) { status_print("target_role=\"%s\" ", target_role); } status_print("active=\"%s\" ", rsc->fns->active(rsc, TRUE) ? "true" : "false"); status_print("orphaned=\"%s\" ", is_set(rsc->flags, pe_rsc_orphan) ? "true" : "false"); status_print("blocked=\"%s\" ", is_set(rsc->flags, pe_rsc_block) ? "true" : "false"); status_print("managed=\"%s\" ", is_set(rsc->flags, pe_rsc_managed) ? "true" : "false"); status_print("failed=\"%s\" ", is_set(rsc->flags, pe_rsc_failed) ? "true" : "false"); status_print("failure_ignored=\"%s\" ", is_set(rsc->flags, pe_rsc_failure_ignored) ? "true" : "false"); status_print("nodes_running_on=\"%d\" ", g_list_length(rsc->running_on)); if (options & pe_print_pending) { const char *pending_task = native_pending_task(rsc); if (pending_task) { status_print("pending=\"%s\" ", pending_task); } } if (options & pe_print_dev) { status_print("provisional=\"%s\" ", is_set(rsc->flags, pe_rsc_provisional) ? "true" : "false"); status_print("runnable=\"%s\" ", is_set(rsc->flags, pe_rsc_runnable) ? "true" : "false"); status_print("priority=\"%f\" ", (double)rsc->priority); status_print("variant=\"%s\" ", crm_element_name(rsc->xml)); } /* print out the nodes this resource is running on */ if (options & pe_print_rsconly) { status_print("/>\n"); /* do nothing */ } else if (rsc->running_on != NULL) { GListPtr gIter = rsc->running_on; status_print(">\n"); for (; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; status_print("%s \n", pre_text, node->details->uname, node->details->id, node->details->online ? "false" : "true"); } status_print("%s\n", pre_text); } else { status_print("/>\n"); } } /* making this inline rather than a macro prevents a coverity "unreachable" * warning on the first usage */ static inline const char * comma_if(int i) { return i? ", " : ""; } void common_print(resource_t * rsc, const char *pre_text, const char *name, node_t *node, long options, void *print_data) { const char *desc = NULL; const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); const char *kind = crm_element_value(rsc->xml, XML_ATTR_TYPE); const char *target_role = NULL; enum rsc_role_e role = native_displayable_role(rsc); int offset = 0; int flagOffset = 0; char buffer[LINE_MAX]; char flagBuffer[LINE_MAX]; CRM_ASSERT(rsc->variant == pe_native); CRM_ASSERT(kind != NULL); if (rsc->meta) { const char *is_internal = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INTERNAL_RSC); if (crm_is_true(is_internal) && is_not_set(options, pe_print_implicit)) { crm_trace("skipping print of internal resource %s", rsc->id); return; } target_role = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET_ROLE); } if (pre_text == NULL && (options & pe_print_printf)) { pre_text = " "; } if (options & pe_print_xml) { native_print_xml(rsc, pre_text, options, print_data); return; } if ((options & pe_print_rsconly) || g_list_length(rsc->running_on) > 1) { node = NULL; } if (options & pe_print_html) { if (is_not_set(rsc->flags, pe_rsc_managed)) { status_print(""); } else if (is_set(rsc->flags, pe_rsc_failed)) { status_print(""); } else if (rsc->variant == pe_native && (rsc->running_on == NULL)) { status_print(""); } else if (g_list_length(rsc->running_on) > 1) { status_print(""); } else if (is_set(rsc->flags, pe_rsc_failure_ignored)) { status_print(""); } else { status_print(""); } } if(pre_text) { offset += snprintf(buffer + offset, LINE_MAX - offset, "%s", pre_text); } offset += snprintf(buffer + offset, LINE_MAX - offset, "%s", name); offset += snprintf(buffer + offset, LINE_MAX - offset, "\t(%s", class); if (crm_provider_required(class)) { const char *prov = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER); offset += snprintf(buffer + offset, LINE_MAX - offset, "::%s", prov); } offset += snprintf(buffer + offset, LINE_MAX - offset, ":%s):\t", kind); if(is_set(rsc->flags, pe_rsc_orphan)) { offset += snprintf(buffer + offset, LINE_MAX - offset, " ORPHANED "); } if(role > RSC_ROLE_SLAVE && is_set(rsc->flags, pe_rsc_failed)) { offset += snprintf(buffer + offset, LINE_MAX - offset, "FAILED %s", role2text(role)); } else if(is_set(rsc->flags, pe_rsc_failed)) { offset += snprintf(buffer + offset, LINE_MAX - offset, "FAILED"); } else { const char *rsc_state = native_displayable_state(rsc, options); offset += snprintf(buffer + offset, LINE_MAX - offset, "%s", rsc_state); } if(node) { offset += snprintf(buffer + offset, LINE_MAX - offset, " %s", node->details->uname); if (node->details->online == FALSE && node->details->unclean) { flagOffset += snprintf(flagBuffer + flagOffset, LINE_MAX - flagOffset, "%sUNCLEAN", comma_if(flagOffset)); } } if (options & pe_print_pending) { const char *pending_task = native_pending_task(rsc); if (pending_task) { flagOffset += snprintf(flagBuffer + flagOffset, LINE_MAX - flagOffset, "%s%s", comma_if(flagOffset), pending_task); } } if (target_role) { enum rsc_role_e target_role_e = text2role(target_role); /* Ignore target role Started, as it is the default anyways * (and would also allow a Master to be Master). * Show if target role limits our abilities. */ if (target_role_e == RSC_ROLE_STOPPED) { flagOffset += snprintf(flagBuffer + flagOffset, LINE_MAX - flagOffset, "%sdisabled", comma_if(flagOffset)); rsc->cluster->disabled_resources++; } else if (is_set(uber_parent(rsc)->flags, pe_rsc_promotable) && target_role_e == RSC_ROLE_SLAVE) { flagOffset += snprintf(flagBuffer + flagOffset, LINE_MAX - flagOffset, "%starget-role:%s", comma_if(flagOffset), target_role); rsc->cluster->disabled_resources++; } } if (is_set(rsc->flags, pe_rsc_block)) { flagOffset += snprintf(flagBuffer + flagOffset, LINE_MAX - flagOffset, "%sblocked", comma_if(flagOffset)); rsc->cluster->blocked_resources++; } else if (is_not_set(rsc->flags, pe_rsc_managed)) { flagOffset += snprintf(flagBuffer + flagOffset, LINE_MAX - flagOffset, "%sunmanaged", comma_if(flagOffset)); } if(is_set(rsc->flags, pe_rsc_failure_ignored)) { flagOffset += snprintf(flagBuffer + flagOffset, LINE_MAX - flagOffset, "%sfailure ignored", comma_if(flagOffset)); } if ((options & pe_print_rsconly) || g_list_length(rsc->running_on) > 1) { desc = crm_element_value(rsc->xml, XML_ATTR_DESC); } CRM_LOG_ASSERT(offset > 0); if(flagOffset > 0) { status_print("%s (%s)%s%s", buffer, flagBuffer, desc?" ":"", desc?desc:""); } else { status_print("%s%s%s", buffer, desc?" ":"", desc?desc:""); } #if CURSES_ENABLED if ((options & pe_print_rsconly) || g_list_length(rsc->running_on) > 1) { /* Done */ } else if (options & pe_print_ncurses) { /* coverity[negative_returns] False positive */ move(-1, 0); } #endif if (options & pe_print_html) { status_print(" "); } if ((options & pe_print_rsconly)) { } else if (g_list_length(rsc->running_on) > 1) { GListPtr gIter = rsc->running_on; int counter = 0; if (options & pe_print_html) { status_print("
      \n"); } else if ((options & pe_print_printf) || (options & pe_print_ncurses)) { status_print("["); } for (; gIter != NULL; gIter = gIter->next) { node_t *n = (node_t *) gIter->data; counter++; if (options & pe_print_html) { status_print("
    • \n%s", n->details->uname); } else if ((options & pe_print_printf) || (options & pe_print_ncurses)) { status_print(" %s", n->details->uname); } else if ((options & pe_print_log)) { status_print("\t%d : %s", counter, n->details->uname); } else { status_print("%s", n->details->uname); } if (options & pe_print_html) { status_print("
    • \n"); } } if (options & pe_print_html) { status_print("
    \n"); } else if ((options & pe_print_printf) || (options & pe_print_ncurses)) { status_print(" ]"); } } if (options & pe_print_html) { status_print("
    \n"); } else if (options & pe_print_suppres_nl) { /* nothing */ } else if ((options & pe_print_printf) || (options & pe_print_ncurses)) { status_print("\n"); } if (options & pe_print_details) { struct print_data_s pdata; pdata.options = options; pdata.print_data = print_data; g_hash_table_foreach(rsc->parameters, native_print_attr, &pdata); } if (options & pe_print_dev) { GHashTableIter iter; node_t *n = NULL; status_print("%s\t(%s%svariant=%s, priority=%f)", pre_text, is_set(rsc->flags, pe_rsc_provisional) ? "provisional, " : "", is_set(rsc->flags, pe_rsc_runnable) ? "" : "non-startable, ", crm_element_name(rsc->xml), (double)rsc->priority); status_print("%s\tAllowed Nodes", pre_text); g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&n)) { status_print("%s\t * %s %d", pre_text, n->details->uname, n->weight); } } if (options & pe_print_max_details) { GHashTableIter iter; node_t *n = NULL; status_print("%s\t=== Allowed Nodes\n", pre_text); g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&n)) { print_node("\t", n, FALSE); } } } void native_print(resource_t * rsc, const char *pre_text, long options, void *print_data) { node_t *node = NULL; CRM_ASSERT(rsc->variant == pe_native); if (options & pe_print_xml) { native_print_xml(rsc, pre_text, options, print_data); return; } node = pe__current_node(rsc); common_print(rsc, pre_text, rsc_printable_id(rsc), node, options, print_data); } void native_free(resource_t * rsc) { pe_rsc_trace(rsc, "Freeing resource action list (not the data)"); common_free(rsc); } enum rsc_role_e native_resource_state(const resource_t * rsc, gboolean current) { enum rsc_role_e role = rsc->next_role; if (current) { role = rsc->role; } pe_rsc_trace(rsc, "%s state: %s", rsc->id, role2text(role)); return role; } node_t * native_location(resource_t * rsc, GListPtr * list, gboolean current) { node_t *one = NULL; GListPtr result = NULL; if (rsc->children) { GListPtr gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { resource_t *child = (resource_t *) gIter->data; child->fns->location(child, &result, current); } } else if (current && rsc->running_on) { result = g_list_copy(rsc->running_on); } else if (current == FALSE && rsc->allocated_to) { result = g_list_append(NULL, rsc->allocated_to); } if (result && g_list_length(result) == 1) { one = g_list_nth_data(result, 0); } if (list) { GListPtr gIter = result; for (; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; if (*list == NULL || pe_find_node_id(*list, node->details->id) == NULL) { *list = g_list_append(*list, node); } } } g_list_free(result); return one; } static void get_rscs_brief(GListPtr rsc_list, GHashTable * rsc_table, GHashTable * active_table) { GListPtr gIter = rsc_list; for (; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); const char *kind = crm_element_value(rsc->xml, XML_ATTR_TYPE); int offset = 0; char buffer[LINE_MAX]; int *rsc_counter = NULL; int *active_counter = NULL; if (rsc->variant != pe_native) { continue; } offset += snprintf(buffer + offset, LINE_MAX - offset, "%s", class); if (crm_provider_required(class)) { const char *prov = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER); offset += snprintf(buffer + offset, LINE_MAX - offset, "::%s", prov); } offset += snprintf(buffer + offset, LINE_MAX - offset, ":%s", kind); CRM_LOG_ASSERT(offset > 0); if (rsc_table) { rsc_counter = g_hash_table_lookup(rsc_table, buffer); if (rsc_counter == NULL) { rsc_counter = calloc(1, sizeof(int)); *rsc_counter = 0; g_hash_table_insert(rsc_table, strdup(buffer), rsc_counter); } (*rsc_counter)++; } if (active_table) { GListPtr gIter2 = rsc->running_on; for (; gIter2 != NULL; gIter2 = gIter2->next) { node_t *node = (node_t *) gIter2->data; GHashTable *node_table = NULL; if (node->details->unclean == FALSE && node->details->online == FALSE) { continue; } node_table = g_hash_table_lookup(active_table, node->details->uname); if (node_table == NULL) { node_table = crm_str_table_new(); g_hash_table_insert(active_table, strdup(node->details->uname), node_table); } active_counter = g_hash_table_lookup(node_table, buffer); if (active_counter == NULL) { active_counter = calloc(1, sizeof(int)); *active_counter = 0; g_hash_table_insert(node_table, strdup(buffer), active_counter); } (*active_counter)++; } } } } static void destroy_node_table(gpointer data) { GHashTable *node_table = data; if (node_table) { g_hash_table_destroy(node_table); } } void print_rscs_brief(GListPtr rsc_list, const char *pre_text, long options, void *print_data, gboolean print_all) { GHashTable *rsc_table = crm_str_table_new(); GHashTable *active_table = g_hash_table_new_full(crm_str_hash, g_str_equal, free, destroy_node_table); GHashTableIter hash_iter; char *type = NULL; int *rsc_counter = NULL; get_rscs_brief(rsc_list, rsc_table, active_table); g_hash_table_iter_init(&hash_iter, rsc_table); while (g_hash_table_iter_next(&hash_iter, (gpointer *)&type, (gpointer *)&rsc_counter)) { GHashTableIter hash_iter2; char *node_name = NULL; GHashTable *node_table = NULL; int active_counter_all = 0; g_hash_table_iter_init(&hash_iter2, active_table); while (g_hash_table_iter_next(&hash_iter2, (gpointer *)&node_name, (gpointer *)&node_table)) { int *active_counter = g_hash_table_lookup(node_table, type); if (active_counter == NULL || *active_counter == 0) { continue; } else { active_counter_all += *active_counter; } if (options & pe_print_rsconly) { node_name = NULL; } if (options & pe_print_html) { status_print("
  • \n"); } if (print_all) { status_print("%s%d/%d\t(%s):\tActive %s\n", pre_text ? pre_text : "", active_counter ? *active_counter : 0, rsc_counter ? *rsc_counter : 0, type, active_counter && (*active_counter > 0) && node_name ? node_name : ""); } else { status_print("%s%d\t(%s):\tActive %s\n", pre_text ? pre_text : "", active_counter ? *active_counter : 0, type, active_counter && (*active_counter > 0) && node_name ? node_name : ""); } if (options & pe_print_html) { status_print("
  • \n"); } } if (print_all && active_counter_all == 0) { if (options & pe_print_html) { status_print("
  • \n"); } status_print("%s%d/%d\t(%s):\tActive\n", pre_text ? pre_text : "", active_counter_all, rsc_counter ? *rsc_counter : 0, type); if (options & pe_print_html) { status_print("
  • \n"); } } } if (rsc_table) { g_hash_table_destroy(rsc_table); rsc_table = NULL; } if (active_table) { g_hash_table_destroy(active_table); active_table = NULL; } } diff --git a/lib/pengine/variant.h b/lib/pengine/variant.h index 7530286bb1..ce5276b1ff 100644 --- a/lib/pengine/variant.h +++ b/lib/pengine/variant.h @@ -1,163 +1,149 @@ -/* - * Copyright (C) 2004 Andrew Beekhof - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +/* + * Copyright 2004-2018 Andrew Beekhof + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ + #ifndef PE_VARIANT__H # define PE_VARIANT__H # if VARIANT_CLONE typedef struct clone_variant_data_s { int clone_max; int clone_node_max; int promoted_max; int promoted_node_max; int total_clones; - int active_clones; - int max_nodes; + // @TODO make these a bitmask gboolean interleave; gboolean ordered; gboolean applied_master_prefs; gboolean merged_master_weights; notify_data_t *stop_notify; notify_data_t *start_notify; notify_data_t *demote_notify; notify_data_t *promote_notify; xmlNode *xml_obj_child; - - gboolean notify_confirm; - } clone_variant_data_t; # define get_clone_variant_data(data, rsc) \ CRM_ASSERT(rsc != NULL); \ CRM_ASSERT(rsc->variant == pe_clone); \ data = (clone_variant_data_t *)rsc->variant_opaque; # elif VARIANT_CONTAINER typedef struct { int offset; node_t *node; char *ipaddr; resource_t *ip; resource_t *child; resource_t *docker; resource_t *remote; } container_grouping_t; typedef struct { char *source; char *target; char *options; int flags; } container_mount_t; typedef struct { char *source; char *target; } container_port_t; enum container_type { PE_CONTAINER_TYPE_UNKNOWN, PE_CONTAINER_TYPE_DOCKER, PE_CONTAINER_TYPE_RKT }; #define PE_CONTAINER_TYPE_UNKNOWN_S "unknown" #define PE_CONTAINER_TYPE_DOCKER_S "Docker" #define PE_CONTAINER_TYPE_RKT_S "rkt" typedef struct container_variant_data_s { int promoted_max; int replicas; int replicas_per_host; char *prefix; char *image; const char *ip_last; char *host_network; char *host_netmask; char *control_port; char *docker_network; char *ip_range_start; gboolean add_host; char *docker_host_options; char *docker_run_options; char *docker_run_command; const char *attribute_target; resource_t *child; GListPtr tuples; /* container_grouping_t * */ GListPtr ports; /* */ GListPtr mounts; /* */ enum container_type type; } container_variant_data_t; # define get_container_variant_data(data, rsc) \ CRM_ASSERT(rsc != NULL); \ CRM_ASSERT(rsc->variant == pe_container); \ CRM_ASSERT(rsc->variant_opaque != NULL); \ data = (container_variant_data_t *)rsc->variant_opaque; \ # elif VARIANT_GROUP typedef struct group_variant_data_s { int num_children; resource_t *first_child; resource_t *last_child; gboolean colocated; gboolean ordered; gboolean child_starting; gboolean child_stopping; } group_variant_data_t; # define get_group_variant_data(data, rsc) \ CRM_ASSERT(rsc != NULL); \ CRM_ASSERT(rsc->variant == pe_group); \ CRM_ASSERT(rsc->variant_opaque != NULL); \ data = (group_variant_data_t *)rsc->variant_opaque; \ # elif VARIANT_NATIVE typedef struct native_variant_data_s { int dummy; } native_variant_data_t; # define get_native_variant_data(data, rsc) \ CRM_ASSERT(rsc != NULL); \ CRM_ASSERT(rsc->variant == pe_native); \ CRM_ASSERT(rsc->variant_opaque != NULL); \ data = (native_variant_data_t *)rsc->variant_opaque; # endif #endif