diff --git a/crmd/lrm.c b/crmd/lrm.c index 5b412d5d11..3edee9d743 100644 --- a/crmd/lrm.c +++ b/crmd/lrm.c @@ -1,2685 +1,2682 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define START_DELAY_THRESHOLD 5 * 60 * 1000 #define MAX_LRM_REG_FAILS 30 #define s_if_plural(i) (((i) == 1)? "" : "s") struct delete_event_s { int rc; const char *rsc; lrm_state_t *lrm_state; }; static gboolean is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id); static gboolean build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list); static gboolean stop_recurring_actions(gpointer key, gpointer value, gpointer user_data); static int delete_rsc_status(lrm_state_t * lrm_state, const char *rsc_id, int call_options, const char *user_name); static lrmd_event_data_t *construct_op(lrm_state_t * lrm_state, xmlNode * rsc_op, const char *rsc_id, const char *operation); static void do_lrm_rsc_op(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *operation, xmlNode * msg, xmlNode * request); void send_direct_ack(const char *to_host, const char *to_sys, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op, const char *rsc_id); static gboolean lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state, int log_level); static int do_update_resource(const char *node_name, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op); static void lrm_connection_destroy(void) { if (is_set(fsa_input_register, R_LRM_CONNECTED)) { crm_crit("LRM Connection failed"); register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL); clear_bit(fsa_input_register, R_LRM_CONNECTED); } else { crm_info("LRM Connection disconnected"); } } static char * make_stop_id(const char *rsc, int call_id) { return crm_strdup_printf("%s:%d", rsc, call_id); } static void copy_instance_keys(gpointer key, gpointer value, gpointer user_data) { if (strstr(key, CRM_META "_") == NULL) { g_hash_table_replace(user_data, strdup((const char *)key), strdup((const char *)value)); } } static void copy_meta_keys(gpointer key, gpointer value, gpointer user_data) { if (strstr(key, CRM_META "_") != NULL) { g_hash_table_replace(user_data, strdup((const char *)key), strdup((const char *)value)); } } /*! * \internal * \brief Remove a recurring operation from a resource's history * * \param[in,out] history Resource history to modify * \param[in] op Operation to remove * * \return TRUE if the operation was found and removed, FALSE otherwise */ static gboolean history_remove_recurring_op(rsc_history_t *history, const lrmd_event_data_t *op) { GList *iter; for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) { lrmd_event_data_t *existing = iter->data; if ((op->interval_ms == existing->interval_ms) && crm_str_eq(op->rsc_id, existing->rsc_id, TRUE) && safe_str_eq(op->op_type, existing->op_type)) { history->recurring_op_list = g_list_delete_link(history->recurring_op_list, iter); lrmd_free_event(existing); return TRUE; } } return FALSE; } /*! * \internal * \brief Free all recurring operations in resource history * * \param[in,out] history Resource history to modify */ static void history_free_recurring_ops(rsc_history_t *history) { GList *iter; for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) { lrmd_free_event(iter->data); } g_list_free(history->recurring_op_list); history->recurring_op_list = NULL; } /*! * \internal * \brief Free resource history * * \param[in,out] history Resource history to free */ void history_free(gpointer data) { rsc_history_t *history = (rsc_history_t*)data; if (history->stop_params) { g_hash_table_destroy(history->stop_params); } /* Don't need to free history->rsc.id because it's set to history->id */ free(history->rsc.type); free(history->rsc.standard); free(history->rsc.provider); lrmd_free_event(history->failed); lrmd_free_event(history->last); free(history->id); history_free_recurring_ops(history); free(history); } static void update_history_cache(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op) { int target_rc = 0; rsc_history_t *entry = NULL; if (op->rsc_deleted) { crm_debug("Purged history for '%s' after %s", op->rsc_id, op->op_type); delete_rsc_status(lrm_state, op->rsc_id, cib_quorum_override, NULL); return; } if (safe_str_eq(op->op_type, RSC_NOTIFY)) { return; } crm_debug("Updating history for '%s' with %s op", op->rsc_id, op->op_type); entry = g_hash_table_lookup(lrm_state->resource_history, op->rsc_id); if (entry == NULL && rsc) { entry = calloc(1, sizeof(rsc_history_t)); entry->id = strdup(op->rsc_id); g_hash_table_insert(lrm_state->resource_history, entry->id, entry); entry->rsc.id = entry->id; entry->rsc.type = strdup(rsc->type); entry->rsc.standard = strdup(rsc->standard); if (rsc->provider) { entry->rsc.provider = strdup(rsc->provider); } else { entry->rsc.provider = NULL; } } else if (entry == NULL) { crm_info("Resource %s no longer exists, not updating cache", op->rsc_id); return; } entry->last_callid = op->call_id; target_rc = rsc_op_expected_rc(op); if (op->op_status == PCMK_LRM_OP_CANCELLED) { if (op->interval_ms > 0) { crm_trace("Removing cancelled recurring op: " CRM_OP_FMT, op->rsc_id, op->op_type, op->interval_ms); history_remove_recurring_op(entry, op); return; } else { crm_trace("Skipping " CRM_OP_FMT " rc=%d, status=%d", op->rsc_id, op->op_type, op->interval_ms, op->rc, op->op_status); } } else if (did_rsc_op_fail(op, target_rc)) { /* Store failed monitors here, otherwise the block below will cause them * to be forgotten when a stop happens. */ if (entry->failed) { lrmd_free_event(entry->failed); } entry->failed = lrmd_copy_event(op); } else if (op->interval_ms == 0) { if (entry->last) { lrmd_free_event(entry->last); } entry->last = lrmd_copy_event(op); if (op->params && (safe_str_eq(CRMD_ACTION_START, op->op_type) || safe_str_eq("reload", op->op_type) || safe_str_eq(CRMD_ACTION_STATUS, op->op_type))) { if (entry->stop_params) { g_hash_table_destroy(entry->stop_params); } entry->stop_params = crm_str_table_new(); g_hash_table_foreach(op->params, copy_instance_keys, entry->stop_params); } } if (op->interval_ms > 0) { /* Ensure there are no duplicates */ history_remove_recurring_op(entry, op); crm_trace("Adding recurring op: " CRM_OP_FMT, op->rsc_id, op->op_type, op->interval_ms); entry->recurring_op_list = g_list_prepend(entry->recurring_op_list, lrmd_copy_event(op)); } else if (entry->recurring_op_list && safe_str_eq(op->op_type, RSC_STATUS) == FALSE) { crm_trace("Dropping %d recurring ops because of: " CRM_OP_FMT, g_list_length(entry->recurring_op_list), op->rsc_id, op->op_type, op->interval_ms); history_free_recurring_ops(entry); } } /*! * \internal * \brief Send a direct OK ack for a resource task * * \param[in] lrm_state LRM connection * \param[in] input Input message being ack'ed * \param[in] rsc_id ID of affected resource * \param[in] rsc Affected resource (if available) * \param[in] task Operation task being ack'ed * \param[in] ack_host Name of host to send ack to * \param[in] ack_sys IPC system name to ack */ static void send_task_ok_ack(lrm_state_t *lrm_state, ha_msg_input_t *input, const char *rsc_id, lrmd_rsc_info_t *rsc, const char *task, const char *ack_host, const char *ack_sys) { lrmd_event_data_t *op = construct_op(lrm_state, input->xml, rsc_id, task); op->rc = PCMK_OCF_OK; op->op_status = PCMK_LRM_OP_DONE; send_direct_ack(ack_host, ack_sys, rsc, op, rsc_id); lrmd_free_event(op); } void lrm_op_callback(lrmd_event_data_t * op) { const char *nodename = NULL; lrm_state_t *lrm_state = NULL; CRM_CHECK(op != NULL, return); /* determine the node name for this connection. */ nodename = op->remote_nodename ? op->remote_nodename : fsa_our_uname; if (op->type == lrmd_event_disconnect && (safe_str_eq(nodename, fsa_our_uname))) { /* if this is the local lrmd ipc connection, set the right bits in the * crmd when the connection goes down */ lrm_connection_destroy(); return; } else if (op->type != lrmd_event_exec_complete) { /* we only need to process execution results */ return; } lrm_state = lrm_state_find(nodename); CRM_ASSERT(lrm_state != NULL); process_lrm_event(lrm_state, op, NULL); } /* A_LRM_CONNECT */ void do_lrm_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { /* This only pertains to local lrmd connections. Remote connections are handled as * resources within the pengine. Connecting and disconnecting from remote lrmd instances * handled differently than the local. */ lrm_state_t *lrm_state = NULL; if(fsa_our_uname == NULL) { return; /* Nothing to do */ } lrm_state = lrm_state_find_or_create(fsa_our_uname); if (lrm_state == NULL) { register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); return; } if (action & A_LRM_DISCONNECT) { if (lrm_state_verify_stopped(lrm_state, cur_state, LOG_INFO) == FALSE) { if (action == A_LRM_DISCONNECT) { crmd_fsa_stall(FALSE); return; } } clear_bit(fsa_input_register, R_LRM_CONNECTED); crm_info("Disconnecting from the LRM"); lrm_state_disconnect(lrm_state); lrm_state_reset_tables(lrm_state, FALSE); crm_notice("Disconnected from the LRM"); } if (action & A_LRM_CONNECT) { int ret = pcmk_ok; crm_debug("Connecting to the LRM"); ret = lrm_state_ipc_connect(lrm_state); if (ret != pcmk_ok) { if (lrm_state->num_lrm_register_fails < MAX_LRM_REG_FAILS) { crm_warn("Failed to connect to the LRM %d time%s (%d max)", lrm_state->num_lrm_register_fails, s_if_plural(lrm_state->num_lrm_register_fails), MAX_LRM_REG_FAILS); crm_timer_start(wait_timer); crmd_fsa_stall(FALSE); return; } } if (ret != pcmk_ok) { crm_err("Failed to connect to the LRM the max allowed %d time%s", lrm_state->num_lrm_register_fails, s_if_plural(lrm_state->num_lrm_register_fails)); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); return; } set_bit(fsa_input_register, R_LRM_CONNECTED); crm_info("LRM connection established"); } if (action & ~(A_LRM_CONNECT | A_LRM_DISCONNECT)) { crm_err("Unexpected action %s in %s", fsa_action2string(action), __FUNCTION__); } } static gboolean lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state, int log_level) { int counter = 0; gboolean rc = TRUE; const char *when = "lrm disconnect"; GHashTableIter gIter; const char *key = NULL; rsc_history_t *entry = NULL; struct recurring_op_s *pending = NULL; crm_debug("Checking for active resources before exit"); if (cur_state == S_TERMINATE) { log_level = LOG_ERR; when = "shutdown"; } else if (is_set(fsa_input_register, R_SHUTDOWN)) { when = "shutdown... waiting"; } if (lrm_state->pending_ops && lrm_state_is_connected(lrm_state) == TRUE) { guint removed = g_hash_table_foreach_remove( lrm_state->pending_ops, stop_recurring_actions, lrm_state); guint nremaining = g_hash_table_size(lrm_state->pending_ops); if (removed || nremaining) { crm_notice("Stopped %u recurring operation%s at %s (%u remaining)", removed, s_if_plural(removed), when, nremaining); } } if (lrm_state->pending_ops) { g_hash_table_iter_init(&gIter, lrm_state->pending_ops); while (g_hash_table_iter_next(&gIter, NULL, (void **)&pending)) { /* Ignore recurring actions in the shutdown calculations */ if (pending->interval_ms == 0) { counter++; } } } if (counter > 0) { do_crm_log(log_level, "%d pending LRM operation%s at %s", counter, s_if_plural(counter), when); if (cur_state == S_TERMINATE || !is_set(fsa_input_register, R_SENT_RSC_STOP)) { g_hash_table_iter_init(&gIter, lrm_state->pending_ops); while (g_hash_table_iter_next(&gIter, (gpointer*)&key, (gpointer*)&pending)) { do_crm_log(log_level, "Pending action: %s (%s)", key, pending->op_key); } } else { rc = FALSE; } return rc; } if (lrm_state->resource_history == NULL) { return rc; } if (is_set(fsa_input_register, R_SHUTDOWN)) { /* At this point we're not waiting, we're just shutting down */ when = "shutdown"; } counter = 0; g_hash_table_iter_init(&gIter, lrm_state->resource_history); while (g_hash_table_iter_next(&gIter, NULL, (gpointer*)&entry)) { if (is_rsc_active(lrm_state, entry->id) == FALSE) { continue; } counter++; if (log_level == LOG_ERR) { crm_info("Found %s active at %s", entry->id, when); } else { crm_trace("Found %s active at %s", entry->id, when); } if (lrm_state->pending_ops) { GHashTableIter hIter; g_hash_table_iter_init(&hIter, lrm_state->pending_ops); while (g_hash_table_iter_next(&hIter, (gpointer*)&key, (gpointer*)&pending)) { if (crm_str_eq(entry->id, pending->rsc_id, TRUE)) { crm_notice("%sction %s (%s) incomplete at %s", pending->interval_ms == 0 ? "A" : "Recurring a", key, pending->op_key, when); } } } } if (counter) { crm_err("%d resource%s active at %s", counter, (counter == 1)? " was" : "s were", when); } return rc; } static char * build_parameter_list(const lrmd_event_data_t *op, const struct ra_metadata_s *metadata, xmlNode *result, enum ra_param_flags_e param_type, bool invert_for_xml) { int len = 0; int max = 0; char *list = NULL; GList *iter = NULL; /* Newer resource agents support the "private" parameter attribute to * indicate sensitive parameters. For backward compatibility with older * agents, this list is used if the agent doesn't specify any as "private". */ const char *secure_terms[] = { "password", "passwd", "user", }; if (is_not_set(metadata->ra_flags, ra_uses_private) && (param_type == ra_param_private)) { max = DIMOF(secure_terms); } for (iter = metadata->ra_params; iter != NULL; iter = iter->next) { struct ra_param_s *param = (struct ra_param_s *) iter->data; bool accept = FALSE; if (is_set(param->rap_flags, param_type)) { accept = TRUE; } else if (max) { for (int lpc = 0; lpc < max; lpc++) { if (safe_str_eq(secure_terms[lpc], param->rap_name)) { accept = TRUE; break; } } } if (accept) { int start = len; crm_trace("Attr %s is %s", param->rap_name, ra_param_flag2text(param_type)); len += strlen(param->rap_name) + 2; // include spaces around list = realloc_safe(list, len + 1); // include null terminator // spaces before and after make parsing simpler sprintf(list + start, " %s ", param->rap_name); } else { crm_trace("Rejecting %s for %s", param->rap_name, ra_param_flag2text(param_type)); } if (result && (invert_for_xml? !accept : accept)) { const char *v = g_hash_table_lookup(op->params, param->rap_name); if (v != NULL) { crm_trace("Adding attr %s=%s to the xml result", param->rap_name, v); crm_xml_add(result, param->rap_name, v); } } } return list; } static void append_restart_list(lrmd_event_data_t *op, struct ra_metadata_s *metadata, xmlNode *update, const char *version) { char *list = NULL; char *digest = NULL; xmlNode *restart = NULL; CRM_LOG_ASSERT(op->params != NULL); if (op->interval_ms > 0) { /* monitors are not reloadable */ return; } if (is_set(metadata->ra_flags, ra_supports_reload)) { restart = create_xml_node(NULL, XML_TAG_PARAMS); /* Add any parameters with unique="1" to the "op-force-restart" list. * * (Currently, we abuse "unique=0" to indicate reloadability. This is * nonstandard and should eventually be replaced once the OCF standard * is updated with something better.) */ list = build_parameter_list(op, metadata, restart, ra_param_unique, FALSE); } else { /* Resource does not support reloads */ return; } digest = calculate_operation_digest(restart, version); /* Add "op-force-restart" and "op-restart-digest" to indicate the resource supports reload, * no matter if it actually supports any parameters with unique="1"). */ crm_xml_add(update, XML_LRM_ATTR_OP_RESTART, list? list: ""); crm_xml_add(update, XML_LRM_ATTR_RESTART_DIGEST, digest); crm_trace("%s: %s, %s", op->rsc_id, digest, list); crm_log_xml_trace(restart, "restart digest source"); free_xml(restart); free(digest); free(list); } static void append_secure_list(lrmd_event_data_t *op, struct ra_metadata_s *metadata, xmlNode *update, const char *version) { char *list = NULL; char *digest = NULL; xmlNode *secure = NULL; CRM_LOG_ASSERT(op->params != NULL); /* * To keep XML_LRM_ATTR_OP_SECURE short, we want it to contain the * secure parameters but XML_LRM_ATTR_SECURE_DIGEST to be based on * the insecure ones */ secure = create_xml_node(NULL, XML_TAG_PARAMS); list = build_parameter_list(op, metadata, secure, ra_param_private, TRUE); if (list != NULL) { digest = calculate_operation_digest(secure, version); crm_xml_add(update, XML_LRM_ATTR_OP_SECURE, list); crm_xml_add(update, XML_LRM_ATTR_SECURE_DIGEST, digest); crm_trace("%s: %s, %s", op->rsc_id, digest, list); crm_log_xml_trace(secure, "secure digest source"); } else { crm_trace("%s: no secure parameters", op->rsc_id); } free_xml(secure); free(digest); free(list); } static gboolean build_operation_update(xmlNode * parent, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op, const char *node_name, const char *src) { int target_rc = 0; xmlNode *xml_op = NULL; struct ra_metadata_s *metadata = NULL; const char *caller_version = NULL; lrm_state_t *lrm_state = NULL; if (op == NULL) { return FALSE; } target_rc = rsc_op_expected_rc(op); /* there is a small risk in formerly mixed clusters that it will * be sub-optimal. * * however with our upgrade policy, the update we send should * still be completely supported anyway */ caller_version = g_hash_table_lookup(op->params, XML_ATTR_CRM_VERSION); CRM_LOG_ASSERT(caller_version != NULL); if(caller_version == NULL) { caller_version = CRM_FEATURE_SET; } crm_trace("Building %s operation update with originator version: %s", op->rsc_id, caller_version); xml_op = create_operation_update(parent, op, caller_version, target_rc, fsa_our_uname, src, LOG_DEBUG); if (xml_op == NULL) { return TRUE; } if ((rsc == NULL) || (op == NULL) || (op->params == NULL) || !crm_op_needs_metadata(rsc->standard, op->op_type)) { crm_trace("No digests needed for %s action on %s (params=%p rsc=%p)", op->op_type, op->rsc_id, op->params, rsc); return TRUE; } lrm_state = lrm_state_find(node_name); if (lrm_state == NULL) { crm_warn("Cannot calculate digests for operation " CRM_OP_FMT " because we have no LRM connection to %s", op->rsc_id, op->op_type, op->interval_ms, node_name); return TRUE; } metadata = metadata_cache_get(lrm_state->metadata_cache, rsc); if (metadata == NULL) { /* For now, we always collect resource agent meta-data via a local, * synchronous, direct execution of the agent. This has multiple issues: * the lrmd should execute agents, not the crmd; meta-data for * Pacemaker Remote nodes should be collected on those nodes, not * locally; and the meta-data call shouldn't eat into the timeout of the * real action being performed. * * These issues are planned to be addressed by having the PE schedule * a meta-data cache check at the beginning of each transition. Once * that is working, this block will only be a fallback in case the * initial collection fails. */ char *metadata_str = NULL; int rc = lrm_state_get_metadata(lrm_state, rsc->standard, rsc->provider, rsc->type, &metadata_str, 0); if (rc != pcmk_ok) { crm_warn("Failed to get metadata for %s (%s:%s:%s)", rsc->id, rsc->standard, rsc->provider, rsc->type); return TRUE; } metadata = metadata_cache_update(lrm_state->metadata_cache, rsc, metadata_str); free(metadata_str); if (metadata == NULL) { crm_warn("Failed to update metadata for %s (%s:%s:%s)", rsc->id, rsc->standard, rsc->provider, rsc->type); return TRUE; } } #if ENABLE_VERSIONED_ATTRS crm_xml_add(xml_op, XML_ATTR_RA_VERSION, metadata->ra_version); #endif crm_trace("Including additional digests for %s::%s:%s", rsc->standard, rsc->provider, rsc->type); append_restart_list(op, metadata, xml_op, caller_version); append_secure_list(op, metadata, xml_op, caller_version); return TRUE; } static gboolean is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id) { rsc_history_t *entry = NULL; entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); if (entry == NULL || entry->last == NULL) { return FALSE; } crm_trace("Processing %s: %s.%d=%d", rsc_id, entry->last->op_type, entry->last->interval_ms, entry->last->rc); if (entry->last->rc == PCMK_OCF_OK && safe_str_eq(entry->last->op_type, CRMD_ACTION_STOP)) { return FALSE; } else if (entry->last->rc == PCMK_OCF_OK && safe_str_eq(entry->last->op_type, CRMD_ACTION_MIGRATE)) { /* a stricter check is too complex... * leave that to the PE */ return FALSE; } else if (entry->last->rc == PCMK_OCF_NOT_RUNNING) { return FALSE; } else if ((entry->last->interval_ms == 0) && (entry->last->rc == PCMK_OCF_NOT_CONFIGURED)) { /* Badly configured resources can't be reliably stopped */ return FALSE; } return TRUE; } static gboolean build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list) { GHashTableIter iter; rsc_history_t *entry = NULL; g_hash_table_iter_init(&iter, lrm_state->resource_history); while (g_hash_table_iter_next(&iter, NULL, (void **)&entry)) { GList *gIter = NULL; xmlNode *xml_rsc = create_xml_node(rsc_list, XML_LRM_TAG_RESOURCE); crm_xml_add(xml_rsc, XML_ATTR_ID, entry->id); crm_xml_add(xml_rsc, XML_ATTR_TYPE, entry->rsc.type); crm_xml_add(xml_rsc, XML_AGENT_ATTR_CLASS, entry->rsc.standard); crm_xml_add(xml_rsc, XML_AGENT_ATTR_PROVIDER, entry->rsc.provider); if (entry->last && entry->last->params) { const char *container = g_hash_table_lookup(entry->last->params, CRM_META"_"XML_RSC_ATTR_CONTAINER); if (container) { crm_trace("Resource %s is a part of container resource %s", entry->id, container); crm_xml_add(xml_rsc, XML_RSC_ATTR_CONTAINER, container); } } build_operation_update(xml_rsc, &(entry->rsc), entry->failed, lrm_state->node_name, __FUNCTION__); build_operation_update(xml_rsc, &(entry->rsc), entry->last, lrm_state->node_name, __FUNCTION__); for (gIter = entry->recurring_op_list; gIter != NULL; gIter = gIter->next) { build_operation_update(xml_rsc, &(entry->rsc), gIter->data, lrm_state->node_name, __FUNCTION__); } } return FALSE; } static xmlNode * do_lrm_query_internal(lrm_state_t *lrm_state, int update_flags) { xmlNode *xml_state = NULL; xmlNode *xml_data = NULL; xmlNode *rsc_list = NULL; crm_node_t *peer = NULL; peer = crm_get_peer_full(0, lrm_state->node_name, CRM_GET_PEER_ANY); CRM_CHECK(peer != NULL, return NULL); xml_state = create_node_state_update(peer, update_flags, NULL, __FUNCTION__); if (xml_state == NULL) { return NULL; } xml_data = create_xml_node(xml_state, XML_CIB_TAG_LRM); crm_xml_add(xml_data, XML_ATTR_ID, peer->uuid); rsc_list = create_xml_node(xml_data, XML_LRM_TAG_RESOURCES); /* Build a list of active (not always running) resources */ build_active_RAs(lrm_state, rsc_list); crm_log_xml_trace(xml_state, "Current state of the LRM"); return xml_state; } xmlNode * do_lrm_query(gboolean is_replace, const char *node_name) { lrm_state_t *lrm_state = lrm_state_find(node_name); if (!lrm_state) { crm_err("Could not query lrm state for lrmd node %s", node_name); return NULL; } return do_lrm_query_internal(lrm_state, node_update_cluster|node_update_peer); } static void notify_deleted(lrm_state_t * lrm_state, ha_msg_input_t * input, const char *rsc_id, int rc) { lrmd_event_data_t *op = NULL; const char *from_sys = crm_element_value(input->msg, F_CRM_SYS_FROM); const char *from_host = crm_element_value(input->msg, F_CRM_HOST_FROM); crm_info("Notifying %s on %s that %s was%s deleted", from_sys, (from_host? from_host : "localhost"), rsc_id, ((rc == pcmk_ok)? "" : " not")); op = construct_op(lrm_state, input->xml, rsc_id, CRMD_ACTION_DELETE); if (rc == pcmk_ok) { op->op_status = PCMK_LRM_OP_DONE; op->rc = PCMK_OCF_OK; } else { op->op_status = PCMK_LRM_OP_ERROR; op->rc = PCMK_OCF_UNKNOWN_ERROR; } send_direct_ack(from_host, from_sys, NULL, op, rsc_id); lrmd_free_event(op); if (safe_str_neq(from_sys, CRM_SYSTEM_TENGINE)) { /* this isn't expected - trigger a new transition */ time_t now = time(NULL); char *now_s = crm_itoa(now); crm_debug("Triggering a refresh after %s deleted %s from the LRM", from_sys, rsc_id); update_attr_delegate(fsa_cib_conn, cib_none, XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL, "last-lrm-refresh", now_s, FALSE, NULL, NULL); free(now_s); } } static gboolean lrm_remove_deleted_rsc(gpointer key, gpointer value, gpointer user_data) { struct delete_event_s *event = user_data; struct pending_deletion_op_s *op = value; if (crm_str_eq(event->rsc, op->rsc, TRUE)) { notify_deleted(event->lrm_state, op->input, event->rsc, event->rc); return TRUE; } return FALSE; } static gboolean lrm_remove_deleted_op(gpointer key, gpointer value, gpointer user_data) { const char *rsc = user_data; struct recurring_op_s *pending = value; if (crm_str_eq(rsc, pending->rsc_id, TRUE)) { crm_info("Removing op %s:%d for deleted resource %s", pending->op_key, pending->call_id, rsc); return TRUE; } return FALSE; } /* * Remove the rsc from the CIB * * Avoids refreshing the entire LRM section of this host */ #define rsc_template "//"XML_CIB_TAG_STATE"[@uname='%s']//"XML_LRM_TAG_RESOURCE"[@id='%s']" static int delete_rsc_status(lrm_state_t * lrm_state, const char *rsc_id, int call_options, const char *user_name) { char *rsc_xpath = NULL; int rc = pcmk_ok; CRM_CHECK(rsc_id != NULL, return -ENXIO); rsc_xpath = crm_strdup_printf(rsc_template, lrm_state->node_name, rsc_id); rc = cib_internal_op(fsa_cib_conn, CIB_OP_DELETE, NULL, rsc_xpath, NULL, NULL, call_options | cib_xpath, user_name); free(rsc_xpath); return rc; } static void delete_rsc_entry(lrm_state_t * lrm_state, ha_msg_input_t * input, const char *rsc_id, GHashTableIter * rsc_gIter, int rc, const char *user_name) { struct delete_event_s event; CRM_CHECK(rsc_id != NULL, return); if (rc == pcmk_ok) { char *rsc_id_copy = strdup(rsc_id); if (rsc_gIter) g_hash_table_iter_remove(rsc_gIter); else g_hash_table_remove(lrm_state->resource_history, rsc_id_copy); crm_debug("sync: Sending delete op for %s", rsc_id_copy); delete_rsc_status(lrm_state, rsc_id_copy, cib_quorum_override, user_name); g_hash_table_foreach_remove(lrm_state->pending_ops, lrm_remove_deleted_op, rsc_id_copy); free(rsc_id_copy); } if (input) { notify_deleted(lrm_state, input, rsc_id, rc); } event.rc = rc; event.rsc = rsc_id; event.lrm_state = lrm_state; g_hash_table_foreach_remove(lrm_state->deletion_ops, lrm_remove_deleted_rsc, &event); } /*! * \internal * \brief Erase an LRM history entry from the CIB, given the operation data * * \param[in] lrm_state LRM state of the desired node * \param[in] op Operation whose history should be deleted */ static void erase_lrm_history_by_op(lrm_state_t *lrm_state, lrmd_event_data_t *op) { xmlNode *xml_top = NULL; CRM_CHECK(op != NULL, return); xml_top = create_xml_node(NULL, XML_LRM_TAG_RSC_OP); crm_xml_add_int(xml_top, XML_LRM_ATTR_CALLID, op->call_id); crm_xml_add(xml_top, XML_ATTR_TRANSITION_KEY, op->user_data); if (op->interval_ms > 0) { char *op_id = generate_op_key(op->rsc_id, op->op_type, op->interval_ms); /* Avoid deleting last_failure too (if it was a result of this recurring op failing) */ crm_xml_add(xml_top, XML_ATTR_ID, op_id); free(op_id); } crm_debug("Erasing LRM resource history for " CRM_OP_FMT " (call=%d)", op->rsc_id, op->op_type, op->interval_ms, op->call_id); fsa_cib_conn->cmds->remove(fsa_cib_conn, XML_CIB_TAG_STATUS, xml_top, cib_quorum_override); crm_log_xml_trace(xml_top, "op:cancel"); free_xml(xml_top); } /* Define xpath to find LRM resource history entry by node and resource */ #define XPATH_HISTORY \ "/" XML_TAG_CIB "/" XML_CIB_TAG_STATUS \ "/" XML_CIB_TAG_STATE "[@" XML_ATTR_UNAME "='%s']" \ "/" XML_CIB_TAG_LRM "/" XML_LRM_TAG_RESOURCES \ "/" XML_LRM_TAG_RESOURCE "[@" XML_ATTR_ID "='%s']" \ "/" XML_LRM_TAG_RSC_OP /* ... and also by operation key */ #define XPATH_HISTORY_ID XPATH_HISTORY \ "[@" XML_ATTR_ID "='%s']" /* ... and also by operation key and operation call ID */ #define XPATH_HISTORY_CALL XPATH_HISTORY \ "[@" XML_ATTR_ID "='%s' and @" XML_LRM_ATTR_CALLID "='%d']" /* ... and also by operation key and original operation key */ #define XPATH_HISTORY_ORIG XPATH_HISTORY \ "[@" XML_ATTR_ID "='%s' and @" XML_LRM_ATTR_TASK_KEY "='%s']" /*! * \internal * \brief Erase an LRM history entry from the CIB, given operation identifiers * * \param[in] lrm_state LRM state of the node to clear history for * \param[in] rsc_id Name of resource to clear history for * \param[in] key Operation key of operation to clear history for * \param[in] orig_op If specified, delete only if it has this original op * \param[in] call_id If specified, delete entry only if it has this call ID */ static void erase_lrm_history_by_id(lrm_state_t *lrm_state, const char *rsc_id, const char *key, const char *orig_op, int call_id) { char *op_xpath = NULL; CRM_CHECK((rsc_id != NULL) && (key != NULL), return); if (call_id > 0) { op_xpath = crm_strdup_printf(XPATH_HISTORY_CALL, lrm_state->node_name, rsc_id, key, call_id); } else if (orig_op) { op_xpath = crm_strdup_printf(XPATH_HISTORY_ORIG, lrm_state->node_name, rsc_id, key, orig_op); } else { op_xpath = crm_strdup_printf(XPATH_HISTORY_ID, lrm_state->node_name, rsc_id, key); } crm_debug("Erasing LRM resource history for %s on %s (call=%d)", key, rsc_id, call_id); fsa_cib_conn->cmds->remove(fsa_cib_conn, op_xpath, NULL, cib_quorum_override | cib_xpath); free(op_xpath); } static inline gboolean last_failed_matches_op(rsc_history_t *entry, const char *op, guint interval_ms) { if (entry == NULL) { return FALSE; } if (op == NULL) { return TRUE; } return (safe_str_eq(op, entry->failed->op_type) && (interval_ms == entry->failed->interval_ms)); } /*! * \internal * \brief Clear a resource's last failure * * Erase a resource's last failure on a particular node from both the * LRM resource history in the CIB, and the resource history remembered * for the LRM state. * * \param[in] rsc_id Resource name * \param[in] node_name Node name * \param[in] operation If specified, only clear if matching this operation * \param[in] interval_ms If operation is specified, it has this interval */ void lrm_clear_last_failure(const char *rsc_id, const char *node_name, const char *operation, guint interval_ms) { char *op_key = NULL; char *orig_op_key = NULL; lrm_state_t *lrm_state = NULL; lrm_state = lrm_state_find(node_name); if (lrm_state == NULL) { return; } /* Erase from CIB */ op_key = generate_op_key(rsc_id, "last_failure", 0); if (operation) { orig_op_key = generate_op_key(rsc_id, operation, interval_ms); } erase_lrm_history_by_id(lrm_state, rsc_id, op_key, orig_op_key, 0); free(op_key); free(orig_op_key); /* Remove from memory */ if (lrm_state->resource_history) { rsc_history_t *entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); if (last_failed_matches_op(entry, operation, interval_ms)) { lrmd_free_event(entry->failed); entry->failed = NULL; } } } /* Returns: gboolean - cancellation is in progress */ static gboolean cancel_op(lrm_state_t * lrm_state, const char *rsc_id, const char *key, int op, gboolean remove) { int rc = pcmk_ok; char *local_key = NULL; struct recurring_op_s *pending = NULL; CRM_CHECK(op != 0, return FALSE); CRM_CHECK(rsc_id != NULL, return FALSE); if (key == NULL) { local_key = make_stop_id(rsc_id, op); key = local_key; } pending = g_hash_table_lookup(lrm_state->pending_ops, key); if (pending) { if (remove && pending->remove == FALSE) { pending->remove = TRUE; crm_debug("Scheduling %s for removal", key); } if (pending->cancelled) { crm_debug("Operation %s already cancelled", key); free(local_key); return FALSE; } pending->cancelled = TRUE; } else { crm_info("No pending op found for %s", key); free(local_key); return FALSE; } crm_debug("Cancelling op %d for %s (%s)", op, rsc_id, key); rc = lrm_state_cancel(lrm_state, pending->rsc_id, pending->op_type, pending->interval_ms); if (rc == pcmk_ok) { crm_debug("Op %d for %s (%s): cancelled", op, rsc_id, key); free(local_key); return TRUE; } crm_debug("Op %d for %s (%s): Nothing to cancel", op, rsc_id, key); /* The caller needs to make sure the entry is * removed from the pending_ops list * * Usually by returning TRUE inside the worker function * supplied to g_hash_table_foreach_remove() * * Not removing the entry from pending_ops will block * the node from shutting down */ free(local_key); return FALSE; } struct cancel_data { gboolean done; gboolean remove; const char *key; lrmd_rsc_info_t *rsc; lrm_state_t *lrm_state; }; static gboolean cancel_action_by_key(gpointer key, gpointer value, gpointer user_data) { gboolean remove = FALSE; struct cancel_data *data = user_data; struct recurring_op_s *op = (struct recurring_op_s *)value; if (crm_str_eq(op->op_key, data->key, TRUE)) { data->done = TRUE; remove = !cancel_op(data->lrm_state, data->rsc->id, key, op->call_id, data->remove); } return remove; } static gboolean cancel_op_key(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *key, gboolean remove) { guint removed = 0; struct cancel_data data; CRM_CHECK(rsc != NULL, return FALSE); CRM_CHECK(key != NULL, return FALSE); data.key = key; data.rsc = rsc; data.done = FALSE; data.remove = remove; data.lrm_state = lrm_state; removed = g_hash_table_foreach_remove(lrm_state->pending_ops, cancel_action_by_key, &data); crm_trace("Removed %u op cache entries, new size: %u", removed, g_hash_table_size(lrm_state->pending_ops)); return data.done; } /*! * \internal * \brief Retrieve resource information from LRM * * \param[in] lrm_state LRM connection to use * \param[in] rsc_xml XML containing resource configuration * \param[in] do_create If true, register resource with LRM if not already * \param[out] rsc_info Where to store resource information obtained from LRM * * \retval pcmk_ok Success (and rsc_info holds newly allocated result) * \retval -EINVAL Required information is missing from arguments * \retval -ENOTCONN No active connection to LRM * \retval -ENODEV Resource not found * \retval -errno Error communicating with lrmd when registering resource * * \note Caller is responsible for freeing result on success. */ static int get_lrm_resource(lrm_state_t *lrm_state, xmlNode *rsc_xml, gboolean do_create, lrmd_rsc_info_t **rsc_info) { const char *id = ID(rsc_xml); CRM_CHECK(lrm_state && rsc_xml && rsc_info, return -EINVAL); CRM_CHECK(id, return -EINVAL); if (lrm_state_is_connected(lrm_state) == FALSE) { return -ENOTCONN; } crm_trace("Retrieving resource information for %s from the LRM", id); *rsc_info = lrm_state_get_rsc_info(lrm_state, id, 0); // If resource isn't known by ID, try clone name, if provided if (!*rsc_info) { const char *long_id = crm_element_value(rsc_xml, XML_ATTR_ID_LONG); if (long_id) { *rsc_info = lrm_state_get_rsc_info(lrm_state, long_id, 0); } } if ((*rsc_info == NULL) && do_create) { const char *class = crm_element_value(rsc_xml, XML_AGENT_ATTR_CLASS); const char *provider = crm_element_value(rsc_xml, XML_AGENT_ATTR_PROVIDER); const char *type = crm_element_value(rsc_xml, XML_ATTR_TYPE); int rc; crm_trace("Registering resource %s with LRM", id); rc = lrm_state_register_rsc(lrm_state, id, class, provider, type, lrmd_opt_drop_recurring); if (rc != pcmk_ok) { fsa_data_t *msg_data = NULL; crm_err("Could not register resource %s with LRM on %s: %s " CRM_XS " rc=%d", id, lrm_state->node_name, pcmk_strerror(rc), rc); /* Register this as an internal error if this involves the local * lrmd. Otherwise, we're likely dealing with an unresponsive remote * node, which is not an FSA failure. */ if (lrm_state_is_local(lrm_state) == TRUE) { register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } return rc; } *rsc_info = lrm_state_get_rsc_info(lrm_state, id, 0); } return *rsc_info? pcmk_ok : -ENODEV; } static void delete_resource(lrm_state_t * lrm_state, const char *id, lrmd_rsc_info_t * rsc, GHashTableIter * gIter, const char *sys, const char *host, const char *user, ha_msg_input_t * request, gboolean unregister) { int rc = pcmk_ok; crm_info("Removing resource %s for %s (%s) on %s", id, sys, user ? user : "internal", host); if (rsc && unregister) { rc = lrm_state_unregister_rsc(lrm_state, id, 0); } if (rc == pcmk_ok) { crm_trace("Resource '%s' deleted", id); } else if (rc == -EINPROGRESS) { crm_info("Deletion of resource '%s' pending", id); if (request) { struct pending_deletion_op_s *op = NULL; char *ref = crm_element_value_copy(request->msg, XML_ATTR_REFERENCE); op = calloc(1, sizeof(struct pending_deletion_op_s)); op->rsc = strdup(rsc->id); op->input = copy_ha_msg_input(request); g_hash_table_insert(lrm_state->deletion_ops, ref, op); } return; } else { crm_warn("Deletion of resource '%s' for %s (%s) on %s failed: %d", id, sys, user ? user : "internal", host, rc); } delete_rsc_entry(lrm_state, request, id, gIter, rc, user); } static int get_fake_call_id(lrm_state_t *lrm_state, const char *rsc_id) { int call_id = 999999999; rsc_history_t *entry = NULL; if(lrm_state) { entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); } /* Make sure the call id is greater than the last successful operation, * otherwise the failure will not result in a possible recovery of the resource * as it could appear the failure occurred before the successful start */ if (entry) { call_id = entry->last_callid + 1; } if (call_id < 0) { call_id = 1; } return call_id; } static void fake_op_status(lrm_state_t *lrm_state, lrmd_event_data_t *op, int op_status, enum ocf_exitcode op_exitcode) { op->call_id = get_fake_call_id(lrm_state, op->rsc_id); op->t_run = time(NULL); op->t_rcchange = op->t_run; op->op_status = op_status; op->rc = op_exitcode; } static void force_reprobe(lrm_state_t *lrm_state, const char *from_sys, const char *from_host, const char *user_name, gboolean is_remote_node) { GHashTableIter gIter; rsc_history_t *entry = NULL; crm_info("Clearing resource history on node %s", lrm_state->node_name); g_hash_table_iter_init(&gIter, lrm_state->resource_history); while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) { /* only unregister the resource during a reprobe if it is not a remote connection * resource. otherwise unregistering the connection will terminate remote-node * membership */ gboolean unregister = TRUE; if (is_remote_lrmd_ra(NULL, NULL, entry->id)) { lrm_state_t *remote_lrm_state = lrm_state_find(entry->id); if (remote_lrm_state) { /* when forcing a reprobe, make sure to clear remote node before * clearing the remote node's connection resource */ force_reprobe(remote_lrm_state, from_sys, from_host, user_name, TRUE); } unregister = FALSE; } delete_resource(lrm_state, entry->id, &entry->rsc, &gIter, from_sys, from_host, user_name, NULL, unregister); } /* Now delete the copy in the CIB */ erase_status_tag(lrm_state->node_name, XML_CIB_TAG_LRM, cib_scope_local); /* And finally, _delete_ the value in attrd * Setting it to FALSE results in the PE sending us back here again */ update_attrd(lrm_state->node_name, CRM_OP_PROBED, NULL, user_name, is_remote_node); } static void synthesize_lrmd_failure(lrm_state_t *lrm_state, xmlNode *action, int rc) { lrmd_event_data_t *op = NULL; lrmd_rsc_info_t *rsc_info = NULL; const char *operation = crm_element_value(action, XML_LRM_ATTR_TASK); const char *target_node = crm_element_value(action, XML_LRM_ATTR_TARGET); xmlNode *xml_rsc = find_xml_node(action, XML_CIB_TAG_RESOURCE, TRUE); if ((xml_rsc == NULL) || (ID(xml_rsc) == NULL)) { /* @TODO Should we do something else, like direct ack? */ crm_info("Can't fake %s failure (%d) on %s without resource configuration", crm_element_value(action, XML_LRM_ATTR_TASK_KEY), rc, target_node); return; } else if(operation == NULL) { /* This probably came from crm_resource -C, nothing to do */ crm_info("Can't fake %s failure (%d) on %s without operation", ID(xml_rsc), rc, target_node); return; } op = construct_op(lrm_state, action, ID(xml_rsc), operation); if (safe_str_eq(operation, RSC_NOTIFY)) { // Notifications can't fail fake_op_status(lrm_state, op, PCMK_LRM_OP_DONE, PCMK_OCF_OK); } else { fake_op_status(lrm_state, op, PCMK_LRM_OP_ERROR, rc); } crm_info("Faking " CRM_OP_FMT " result (%d) on %s", op->rsc_id, op->op_type, op->interval_ms, op->rc, target_node); /* Process the result as if it came from the LRM, if possible * (i.e. resource info can be obtained from the lrm_state). */ if (lrm_state) { rsc_info = lrm_state_get_rsc_info(lrm_state, op->rsc_id, 0); } if (rsc_info) { process_lrm_event(lrm_state, op, NULL); } else { /* If we can't process the result normally, at least write it to the CIB * if possible, so the PE can act on it. */ char *standard = crm_element_value_copy(xml_rsc, XML_AGENT_ATTR_CLASS); char *provider = crm_element_value_copy(xml_rsc, XML_AGENT_ATTR_PROVIDER); char *type = crm_element_value_copy(xml_rsc, XML_ATTR_TYPE); if (standard && type) { rsc_info = lrmd_new_rsc_info(op->rsc_id, standard, provider, type); do_update_resource(target_node, rsc_info, op); lrmd_free_rsc_info(rsc_info); } else { // @TODO Should we direct ack? crm_info("Can't fake %s failure (%d) on %s without resource standard and type", crm_element_value(action, XML_LRM_ATTR_TASK_KEY), rc, target_node); } } lrmd_free_event(op); } /*! * \internal * \brief Get target of an LRM operation * * \param[in] xml LRM operation data XML * * \return LRM operation target node name (local node or Pacemaker Remote node) */ static const char * lrm_op_target(xmlNode *xml) { const char *target = NULL; if (xml) { target = crm_element_value(xml, XML_LRM_ATTR_TARGET); } if (target == NULL) { target = fsa_our_uname; } return target; } static void fail_lrm_resource(xmlNode *xml, lrm_state_t *lrm_state, const char *user_name, const char *from_host, const char *from_sys) { lrmd_event_data_t *op = NULL; lrmd_rsc_info_t *rsc = NULL; xmlNode *xml_rsc = find_xml_node(xml, XML_CIB_TAG_RESOURCE, TRUE); CRM_CHECK(xml_rsc != NULL, return); /* The lrmd simply executes operations and reports the results, without any * concept of success or failure, so to fail a resource, we must fake what a * failure looks like. * * To do this, we create a fake lrmd operation event for the resource, and * pass that event to the lrmd client callback so it will be processed as if * it came from the lrmd. */ op = construct_op(lrm_state, xml, ID(xml_rsc), "asyncmon"); fake_op_status(lrm_state, op, PCMK_LRM_OP_DONE, PCMK_OCF_UNKNOWN_ERROR); free((char*) op->user_data); op->user_data = NULL; op->interval_ms = 0; #if ENABLE_ACL if (user_name && is_privileged(user_name) == FALSE) { crm_err("%s does not have permission to fail %s", user_name, ID(xml_rsc)); send_direct_ack(from_host, from_sys, NULL, op, ID(xml_rsc)); lrmd_free_event(op); return; } #endif if (get_lrm_resource(lrm_state, xml_rsc, TRUE, &rsc) == pcmk_ok) { crm_info("Failing resource %s...", rsc->id); process_lrm_event(lrm_state, op, NULL); op->op_status = PCMK_LRM_OP_DONE; op->rc = PCMK_OCF_OK; lrmd_free_rsc_info(rsc); } else { crm_info("Cannot find/create resource in order to fail it..."); crm_log_xml_warn(xml, "bad input"); } send_direct_ack(from_host, from_sys, NULL, op, ID(xml_rsc)); lrmd_free_event(op); } static void handle_refresh_op(lrm_state_t *lrm_state, const char *user_name, const char *from_host, const char *from_sys) { int rc = pcmk_ok; xmlNode *fragment = do_lrm_query_internal(lrm_state, node_update_all); fsa_cib_update(XML_CIB_TAG_STATUS, fragment, cib_quorum_override, rc, user_name); crm_info("Forced a local LRM refresh: call=%d", rc); if (safe_str_neq(CRM_SYSTEM_CRMD, from_sys)) { xmlNode *reply = create_request(CRM_OP_INVOKE_LRM, fragment, from_host, from_sys, CRM_SYSTEM_LRMD, fsa_our_uuid); crm_debug("ACK'ing refresh from %s (%s)", from_sys, from_host); if (relay_message(reply, TRUE) == FALSE) { crm_log_xml_err(reply, "Unable to route reply"); } free_xml(reply); } free_xml(fragment); } static void handle_query_op(xmlNode *msg, lrm_state_t *lrm_state) { xmlNode *data = do_lrm_query_internal(lrm_state, node_update_all); xmlNode *reply = create_reply(msg, data); if (relay_message(reply, TRUE) == FALSE) { crm_err("Unable to route reply"); crm_log_xml_err(reply, "reply"); } free_xml(reply); free_xml(data); } static void handle_reprobe_op(lrm_state_t *lrm_state, const char *from_sys, const char *from_host, const char *user_name, gboolean is_remote_node) { crm_notice("Forcing the status of all resources to be redetected"); force_reprobe(lrm_state, from_sys, from_host, user_name, is_remote_node); if (safe_str_neq(CRM_SYSTEM_PENGINE, from_sys) && safe_str_neq(CRM_SYSTEM_TENGINE, from_sys)) { xmlNode *reply = create_request(CRM_OP_INVOKE_LRM, NULL, from_host, from_sys, CRM_SYSTEM_LRMD, fsa_our_uuid); crm_debug("ACK'ing re-probe from %s (%s)", from_sys, from_host); if (relay_message(reply, TRUE) == FALSE) { crm_log_xml_err(reply, "Unable to route reply"); } free_xml(reply); } } static bool do_lrm_cancel(ha_msg_input_t *input, lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, const char *from_host, const char *from_sys) { char *op_key = NULL; char *meta_key = NULL; int call = 0; const char *call_id = NULL; const char *op_task = NULL; const char *interval_ms_s = NULL; gboolean in_progress = FALSE; xmlNode *params = find_xml_node(input->xml, XML_TAG_ATTRS, TRUE); CRM_CHECK(params != NULL, return FALSE); meta_key = crm_meta_name(XML_LRM_ATTR_INTERVAL_MS); interval_ms_s = crm_element_value(params, meta_key); free(meta_key); CRM_CHECK(interval_ms_s != NULL, return FALSE); meta_key = crm_meta_name(XML_LRM_ATTR_TASK); op_task = crm_element_value(params, meta_key); free(meta_key); CRM_CHECK(op_task != NULL, return FALSE); meta_key = crm_meta_name(XML_LRM_ATTR_CALLID); call_id = crm_element_value(params, meta_key); free(meta_key); op_key = generate_op_key(rsc->id, op_task, crm_parse_ms(interval_ms_s)); crm_debug("PE requested op %s (call=%s) be cancelled", op_key, (call_id? call_id : "NA")); call = crm_parse_int(call_id, "0"); if (call == 0) { /* the normal case when the PE cancels a recurring op */ in_progress = cancel_op_key(lrm_state, rsc, op_key, TRUE); } else { /* the normal case when the PE cancels an orphan op */ in_progress = cancel_op(lrm_state, rsc->id, NULL, call, TRUE); } // Acknowledge cancellation operation if for a remote connection resource if (!in_progress || is_remote_lrmd_ra(NULL, NULL, rsc->id)) { char *op_id = make_stop_id(rsc->id, call); if (is_remote_lrmd_ra(NULL, NULL, rsc->id) == FALSE) { crm_info("Nothing known about operation %d for %s", call, op_key); } erase_lrm_history_by_id(lrm_state, rsc->id, op_key, NULL, call); send_task_ok_ack(lrm_state, input, rsc->id, rsc, op_task, from_host, from_sys); /* needed at least for cancellation of a remote operation */ g_hash_table_remove(lrm_state->pending_ops, op_id); free(op_id); } else { /* No ack is needed since abcdaa8, but peers with older versions * in a rolling upgrade need one. We didn't bump the feature set * at that commit, so we can only compare against the previous * CRM version (3.0.8). If any peers have feature set 3.0.9 but * not abcdaa8, they will time out waiting for the ack (no * released versions of Pacemaker are affected). */ const char *peer_version = crm_element_value(params, XML_ATTR_CRM_VERSION); if (compare_version(peer_version, "3.0.8") <= 0) { crm_info("Sending compatibility ack for %s cancellation to %s (CRM version %s)", op_key, from_host, peer_version); send_task_ok_ack(lrm_state, input, rsc->id, rsc, op_task, from_host, from_sys); } } free(op_key); return TRUE; } static void do_lrm_delete(ha_msg_input_t *input, lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, const char *from_sys, const char *from_host, bool crm_rsc_delete, const char *user_name) { gboolean unregister = TRUE; #if ENABLE_ACL int cib_rc = delete_rsc_status(lrm_state, rsc->id, cib_dryrun|cib_sync_call, user_name); if (cib_rc != pcmk_ok) { lrmd_event_data_t *op = NULL; crm_err("Could not delete resource status of %s for %s (user %s) on %s: %s" CRM_XS " rc=%d", rsc->id, from_sys, (user_name? user_name : "unknown"), from_host, pcmk_strerror(cib_rc), cib_rc); op = construct_op(lrm_state, input->xml, rsc->id, CRMD_ACTION_DELETE); op->op_status = PCMK_LRM_OP_ERROR; if (cib_rc == -EACCES) { op->rc = PCMK_OCF_INSUFFICIENT_PRIV; } else { op->rc = PCMK_OCF_UNKNOWN_ERROR; } send_direct_ack(from_host, from_sys, NULL, op, rsc->id); lrmd_free_event(op); lrmd_free_rsc_info(rsc); return; } #endif if (crm_rsc_delete && is_remote_lrmd_ra(NULL, NULL, rsc->id)) { unregister = FALSE; } delete_resource(lrm_state, rsc->id, rsc, NULL, from_sys, from_host, user_name, input, unregister); } /* A_LRM_INVOKE */ void do_lrm_invoke(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { lrm_state_t *lrm_state = NULL; const char *crm_op = NULL; const char *from_sys = NULL; const char *from_host = NULL; const char *operation = NULL; ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); const char *user_name = NULL; const char *target_node = NULL; gboolean is_remote_node = FALSE; bool crm_rsc_delete = FALSE; target_node = lrm_op_target(input->xml); is_remote_node = safe_str_neq(target_node, fsa_our_uname); lrm_state = lrm_state_find(target_node); if ((lrm_state == NULL) && is_remote_node) { crm_err("Failing action because local node has never had connection to remote node %s", target_node); synthesize_lrmd_failure(NULL, input->xml, PCMK_OCF_CONNECTION_DIED); return; } CRM_ASSERT(lrm_state != NULL); #if ENABLE_ACL user_name = crm_acl_get_set_user(input->msg, F_CRM_USER, NULL); crm_trace("LRM command from user '%s'", user_name); #endif crm_op = crm_element_value(input->msg, F_CRM_TASK); from_sys = crm_element_value(input->msg, F_CRM_SYS_FROM); if (safe_str_neq(from_sys, CRM_SYSTEM_TENGINE)) { from_host = crm_element_value(input->msg, F_CRM_HOST_FROM); } crm_trace("LRM %s command from %s", crm_op, from_sys); if (safe_str_eq(crm_op, CRM_OP_LRM_DELETE)) { crm_rsc_delete = TRUE; // Only crm_resource uses this op operation = CRMD_ACTION_DELETE; } else if (safe_str_eq(crm_op, CRM_OP_LRM_FAIL)) { fail_lrm_resource(input->xml, lrm_state, user_name, from_host, from_sys); return; } else if (input->xml != NULL) { operation = crm_element_value(input->xml, XML_LRM_ATTR_TASK); } if (safe_str_eq(crm_op, CRM_OP_LRM_REFRESH)) { handle_refresh_op(lrm_state, user_name, from_host, from_sys); } else if (safe_str_eq(crm_op, CRM_OP_LRM_QUERY)) { handle_query_op(input->msg, lrm_state); } else if (safe_str_eq(operation, CRM_OP_PROBED)) { update_attrd(lrm_state->node_name, CRM_OP_PROBED, XML_BOOLEAN_TRUE, user_name, is_remote_node); } else if (safe_str_eq(operation, CRM_OP_REPROBE) || safe_str_eq(crm_op, CRM_OP_REPROBE)) { handle_reprobe_op(lrm_state, from_sys, from_host, user_name, is_remote_node); } else if (operation != NULL) { lrmd_rsc_info_t *rsc = NULL; xmlNode *xml_rsc = find_xml_node(input->xml, XML_CIB_TAG_RESOURCE, TRUE); gboolean create_rsc = safe_str_neq(operation, CRMD_ACTION_DELETE); int rc; // We can't return anything meaningful without a resource ID CRM_CHECK(xml_rsc && ID(xml_rsc), return); rc = get_lrm_resource(lrm_state, xml_rsc, create_rsc, &rsc); if (rc == -ENOTCONN) { synthesize_lrmd_failure(lrm_state, input->xml, PCMK_OCF_CONNECTION_DIED); return; } else if (!create_rsc) { /* Delete of malformed or nonexistent resource * (deleting something that does not exist is a success) */ crm_notice("Not registering resource '%s' for a %s event " CRM_XS " get-rc=%d (%s) transition-key=%s", ID(xml_rsc), operation, rc, pcmk_strerror(rc), ID(input->xml)); delete_rsc_entry(lrm_state, input, ID(xml_rsc), NULL, pcmk_ok, user_name); send_task_ok_ack(lrm_state, input, ID(xml_rsc), NULL, operation, from_host, from_sys); return; } else if (rc == -EINVAL) { // Resource operation on malformed resource crm_err("Invalid resource definition for %s", ID(xml_rsc)); crm_log_xml_warn(input->msg, "invalid resource"); synthesize_lrmd_failure(lrm_state, input->xml, PCMK_OCF_NOT_CONFIGURED); // fatal error return; } else if (rc < 0) { // Error communicating with lrmd crm_err("Could not register resource '%s' with lrmd: %s " CRM_XS " rc=%d", ID(xml_rsc), pcmk_strerror(rc), rc); crm_log_xml_warn(input->msg, "failed registration"); synthesize_lrmd_failure(lrm_state, input->xml, PCMK_OCF_INVALID_PARAM); // hard error return; } if (safe_str_eq(operation, CRMD_ACTION_CANCEL)) { if (!do_lrm_cancel(input, lrm_state, rsc, from_host, from_sys)) { crm_log_xml_warn(input->xml, "Bad command"); } } else if (safe_str_eq(operation, CRMD_ACTION_DELETE)) { do_lrm_delete(input, lrm_state, rsc, from_sys, from_host, crm_rsc_delete, user_name); } else { do_lrm_rsc_op(lrm_state, rsc, operation, input->xml, input->msg); } lrmd_free_rsc_info(rsc); } else { crm_err("Cannot perform operation %s of unknown type", crm_str(crm_op)); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } static lrmd_event_data_t * construct_op(lrm_state_t * lrm_state, xmlNode * rsc_op, const char *rsc_id, const char *operation) { lrmd_event_data_t *op = NULL; const char *op_delay = NULL; const char *op_timeout = NULL; const char *interval_ms_s = NULL; GHashTable *params = NULL; const char *transition = NULL; CRM_ASSERT(rsc_id && operation); op = calloc(1, sizeof(lrmd_event_data_t)); CRM_ASSERT(op != NULL); op->type = lrmd_event_exec_complete; op->op_type = strdup(operation); op->op_status = PCMK_LRM_OP_PENDING; op->rc = -1; op->rsc_id = strdup(rsc_id); op->interval_ms = 0; op->timeout = 0; op->start_delay = 0; if (rsc_op == NULL) { CRM_LOG_ASSERT(safe_str_eq(CRMD_ACTION_STOP, operation)); op->user_data = NULL; /* the stop_all_resources() case * by definition there is no DC (or they'd be shutting * us down). * So we should put our version here. */ op->params = crm_str_table_new(); g_hash_table_insert(op->params, strdup(XML_ATTR_CRM_VERSION), strdup(CRM_FEATURE_SET)); crm_trace("Constructed %s op for %s", operation, rsc_id); return op; } params = xml2list(rsc_op); g_hash_table_remove(params, CRM_META "_op_target_rc"); op_delay = crm_meta_value(params, XML_OP_ATTR_START_DELAY); op_timeout = crm_meta_value(params, XML_ATTR_TIMEOUT); interval_ms_s = crm_meta_value(params, XML_LRM_ATTR_INTERVAL_MS); op->interval_ms = crm_parse_ms(interval_ms_s); op->timeout = crm_parse_int(op_timeout, "0"); op->start_delay = crm_parse_int(op_delay, "0"); #if ENABLE_VERSIONED_ATTRS // Resolve any versioned parameters if (lrm_state && safe_str_neq(op->op_type, RSC_METADATA) && safe_str_neq(op->op_type, CRMD_ACTION_DELETE) && !is_remote_lrmd_ra(NULL, NULL, rsc_id)) { // Resource info *should* already be cached, so we don't get lrmd call lrmd_rsc_info_t *rsc = lrm_state_get_rsc_info(lrm_state, rsc_id, 0); struct ra_metadata_s *metadata; metadata = metadata_cache_get(lrm_state->metadata_cache, rsc); if (metadata) { xmlNode *versioned_attrs = NULL; GHashTable *hash = NULL; char *key = NULL; char *value = NULL; GHashTableIter iter; versioned_attrs = first_named_child(rsc_op, XML_TAG_OP_VER_ATTRS); hash = pe_unpack_versioned_parameters(versioned_attrs, metadata->ra_version); g_hash_table_iter_init(&iter, hash); while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &value)) { g_hash_table_iter_steal(&iter); g_hash_table_replace(params, key, value); } g_hash_table_destroy(hash); versioned_attrs = first_named_child(rsc_op, XML_TAG_OP_VER_META); hash = pe_unpack_versioned_parameters(versioned_attrs, metadata->ra_version); g_hash_table_iter_init(&iter, hash); while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &value)) { g_hash_table_replace(params, crm_meta_name(key), strdup(value)); if (safe_str_eq(key, XML_ATTR_TIMEOUT)) { op->timeout = crm_parse_int(value, "0"); } else if (safe_str_eq(key, XML_OP_ATTR_START_DELAY)) { op->start_delay = crm_parse_int(value, "0"); } } g_hash_table_destroy(hash); versioned_attrs = first_named_child(rsc_op, XML_TAG_RSC_VER_ATTRS); hash = pe_unpack_versioned_parameters(versioned_attrs, metadata->ra_version); g_hash_table_iter_init(&iter, hash); while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &value)) { g_hash_table_iter_steal(&iter); g_hash_table_replace(params, key, value); } g_hash_table_destroy(hash); } lrmd_free_rsc_info(rsc); } #endif if (safe_str_neq(operation, RSC_STOP)) { op->params = params; } else { rsc_history_t *entry = NULL; if (lrm_state) { entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); } /* If we do not have stop parameters cached, use * whatever we are given */ if (!entry || !entry->stop_params) { op->params = params; } else { /* Copy the cached parameter list so that we stop the resource * with the old attributes, not the new ones */ op->params = crm_str_table_new(); g_hash_table_foreach(params, copy_meta_keys, op->params); g_hash_table_foreach(entry->stop_params, copy_instance_keys, op->params); g_hash_table_destroy(params); params = NULL; } } /* sanity */ - if (op->interval_ms < 0) { - op->interval_ms = 0; - } if (op->timeout <= 0) { op->timeout = op->interval_ms; } if (op->start_delay < 0) { op->start_delay = 0; } transition = crm_element_value(rsc_op, XML_ATTR_TRANSITION_KEY); CRM_CHECK(transition != NULL, return op); op->user_data = strdup(transition); if (op->interval_ms != 0) { if (safe_str_eq(operation, CRMD_ACTION_START) || safe_str_eq(operation, CRMD_ACTION_STOP)) { crm_err("Start and Stop actions cannot have an interval: %u", op->interval_ms); op->interval_ms = 0; } } crm_trace("Constructed %s op for %s: interval=%u", operation, rsc_id, op->interval_ms); return op; } void send_direct_ack(const char *to_host, const char *to_sys, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op, const char *rsc_id) { xmlNode *reply = NULL; xmlNode *update, *iter; crm_node_t *peer = NULL; CRM_CHECK(op != NULL, return); if (op->rsc_id == NULL) { CRM_ASSERT(rsc_id != NULL); op->rsc_id = strdup(rsc_id); } if (to_sys == NULL) { to_sys = CRM_SYSTEM_TENGINE; } peer = crm_get_peer(0, fsa_our_uname); update = create_node_state_update(peer, node_update_none, NULL, __FUNCTION__); iter = create_xml_node(update, XML_CIB_TAG_LRM); crm_xml_add(iter, XML_ATTR_ID, fsa_our_uuid); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE); crm_xml_add(iter, XML_ATTR_ID, op->rsc_id); build_operation_update(iter, rsc, op, fsa_our_uname, __FUNCTION__); reply = create_request(CRM_OP_INVOKE_LRM, update, to_host, to_sys, CRM_SYSTEM_LRMD, NULL); crm_log_xml_trace(update, "ACK Update"); crm_debug("ACK'ing resource op " CRM_OP_FMT " from %s: %s", op->rsc_id, op->op_type, op->interval_ms, op->user_data, crm_element_value(reply, XML_ATTR_REFERENCE)); if (relay_message(reply, TRUE) == FALSE) { crm_log_xml_err(reply, "Unable to route reply"); } free_xml(update); free_xml(reply); } gboolean verify_stopped(enum crmd_fsa_state cur_state, int log_level) { gboolean res = TRUE; GList *lrm_state_list = lrm_state_get_list(); GList *state_entry; for (state_entry = lrm_state_list; state_entry != NULL; state_entry = state_entry->next) { lrm_state_t *lrm_state = state_entry->data; if (!lrm_state_verify_stopped(lrm_state, cur_state, log_level)) { /* keep iterating through all even when false is returned */ res = FALSE; } } set_bit(fsa_input_register, R_SENT_RSC_STOP); g_list_free(lrm_state_list); lrm_state_list = NULL; return res; } struct stop_recurring_action_s { lrmd_rsc_info_t *rsc; lrm_state_t *lrm_state; }; static gboolean stop_recurring_action_by_rsc(gpointer key, gpointer value, gpointer user_data) { gboolean remove = FALSE; struct stop_recurring_action_s *event = user_data; struct recurring_op_s *op = (struct recurring_op_s *)value; if ((op->interval_ms != 0) && crm_str_eq(op->rsc_id, event->rsc->id, TRUE)) { crm_debug("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id, (char*)key); remove = !cancel_op(event->lrm_state, event->rsc->id, key, op->call_id, FALSE); } return remove; } static gboolean stop_recurring_actions(gpointer key, gpointer value, gpointer user_data) { gboolean remove = FALSE; lrm_state_t *lrm_state = user_data; struct recurring_op_s *op = (struct recurring_op_s *)value; if (op->interval_ms != 0) { crm_info("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id, key); remove = !cancel_op(lrm_state, op->rsc_id, key, op->call_id, FALSE); } return remove; } static void record_pending_op(const char *node_name, lrmd_rsc_info_t *rsc, lrmd_event_data_t *op) { const char *record_pending = NULL; CRM_CHECK(node_name != NULL, return); CRM_CHECK(rsc != NULL, return); CRM_CHECK(op != NULL, return); if ((op->op_type == NULL) || (op->params == NULL) || safe_str_eq(op->op_type, CRMD_ACTION_CANCEL) || safe_str_eq(op->op_type, CRMD_ACTION_DELETE)) { return; } // defaults to true record_pending = crm_meta_value(op->params, XML_OP_ATTR_PENDING); if (record_pending && !crm_is_true(record_pending)) { return; } op->call_id = -1; op->op_status = PCMK_LRM_OP_PENDING; op->rc = PCMK_OCF_UNKNOWN; op->t_run = time(NULL); op->t_rcchange = op->t_run; /* write a "pending" entry to the CIB, inhibit notification */ crm_debug("Recording pending op " CRM_OP_FMT " on %s in the CIB", op->rsc_id, op->op_type, op->interval_ms, node_name); do_update_resource(node_name, rsc, op); } static void do_lrm_rsc_op(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *operation, xmlNode * msg, xmlNode * request) { int call_id = 0; char *op_id = NULL; lrmd_event_data_t *op = NULL; lrmd_key_value_t *params = NULL; fsa_data_t *msg_data = NULL; const char *transition = NULL; gboolean stop_recurring = FALSE; bool send_nack = FALSE; CRM_CHECK(rsc != NULL, return); CRM_CHECK(operation != NULL, return); if (msg != NULL) { transition = crm_element_value(msg, XML_ATTR_TRANSITION_KEY); if (transition == NULL) { crm_log_xml_err(msg, "Missing transition number"); } } op = construct_op(lrm_state, msg, rsc->id, operation); CRM_CHECK(op != NULL, return); if (is_remote_lrmd_ra(NULL, NULL, rsc->id) && (op->interval_ms == 0) && strcmp(operation, CRMD_ACTION_MIGRATE) == 0) { /* pcmk remote connections are a special use case. * We never ever want to stop monitoring a connection resource until * the entire migration has completed. If the connection is unexpectedly * severed, even during a migration, this is an event we must detect.*/ stop_recurring = FALSE; } else if ((op->interval_ms == 0) && strcmp(operation, CRMD_ACTION_STATUS) != 0 && strcmp(operation, CRMD_ACTION_NOTIFY) != 0) { /* stop any previous monitor operations before changing the resource state */ stop_recurring = TRUE; } if (stop_recurring == TRUE) { guint removed = 0; struct stop_recurring_action_s data; data.rsc = rsc; data.lrm_state = lrm_state; removed = g_hash_table_foreach_remove( lrm_state->pending_ops, stop_recurring_action_by_rsc, &data); if (removed) { crm_debug("Stopped %u recurring operation%s in preparation for " CRM_OP_FMT, removed, s_if_plural(removed), rsc->id, operation, op->interval_ms); } } /* now do the op */ crm_info("Performing key=%s op=" CRM_OP_FMT, transition, rsc->id, operation, op->interval_ms); if (is_set(fsa_input_register, R_SHUTDOWN) && safe_str_eq(operation, RSC_START)) { register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL); send_nack = TRUE; } else if (fsa_state != S_NOT_DC && fsa_state != S_POLICY_ENGINE /* Recalculating */ && fsa_state != S_TRANSITION_ENGINE && safe_str_neq(operation, CRMD_ACTION_STOP)) { send_nack = TRUE; } if(send_nack) { crm_notice("Discarding attempt to perform action %s on %s in state %s (shutdown=%s)", operation, rsc->id, fsa_state2string(fsa_state), is_set(fsa_input_register, R_SHUTDOWN)?"true":"false"); op->rc = CRM_DIRECT_NACK_RC; op->op_status = PCMK_LRM_OP_ERROR; send_direct_ack(NULL, NULL, rsc, op, rsc->id); lrmd_free_event(op); free(op_id); return; } record_pending_op(lrm_state->node_name, rsc, op); op_id = generate_op_key(rsc->id, op->op_type, op->interval_ms); if (op->interval_ms > 0) { /* cancel it so we can then restart it without conflict */ cancel_op_key(lrm_state, rsc, op_id, FALSE); } if (op->params) { char *key = NULL; char *value = NULL; GHashTableIter iter; g_hash_table_iter_init(&iter, op->params); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) { params = lrmd_key_value_add(params, key, value); } } call_id = lrm_state_exec(lrm_state, rsc->id, op->op_type, op->user_data, op->interval_ms, op->timeout, op->start_delay, params); if (call_id <= 0 && lrm_state_is_local(lrm_state)) { crm_err("Operation %s on %s failed: %d", operation, rsc->id, call_id); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } else if (call_id <= 0) { crm_err("Operation %s on resource %s failed to execute on remote node %s: %d", operation, rsc->id, lrm_state->node_name, call_id); fake_op_status(lrm_state, op, PCMK_LRM_OP_DONE, PCMK_OCF_UNKNOWN_ERROR); process_lrm_event(lrm_state, op, NULL); } else { /* record all operations so we can wait * for them to complete during shutdown */ char *call_id_s = make_stop_id(rsc->id, call_id); struct recurring_op_s *pending = NULL; pending = calloc(1, sizeof(struct recurring_op_s)); crm_trace("Recording pending op: %d - %s %s", call_id, op_id, call_id_s); pending->call_id = call_id; pending->interval_ms = op->interval_ms; pending->op_type = strdup(operation); pending->op_key = strdup(op_id); pending->rsc_id = strdup(rsc->id); pending->start_time = time(NULL); pending->user_data = strdup(op->user_data); g_hash_table_replace(lrm_state->pending_ops, call_id_s, pending); if ((op->interval_ms > 0) && (op->start_delay > START_DELAY_THRESHOLD)) { char *uuid = NULL; int dummy = 0, target_rc = 0; crm_info("Faking confirmation of %s: execution postponed for over 5 minutes", op_id); decode_transition_key(op->user_data, &uuid, &dummy, &dummy, &target_rc); free(uuid); op->rc = target_rc; op->op_status = PCMK_LRM_OP_DONE; send_direct_ack(NULL, NULL, rsc, op, rsc->id); } pending->params = op->params; op->params = NULL; } free(op_id); lrmd_free_event(op); return; } int last_resource_update = 0; static void cib_rsc_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { switch (rc) { case pcmk_ok: case -pcmk_err_diff_failed: case -pcmk_err_diff_resync: crm_trace("Resource update %d complete: rc=%d", call_id, rc); break; default: crm_warn("Resource update %d failed: (rc=%d) %s", call_id, rc, pcmk_strerror(rc)); } if (call_id == last_resource_update) { last_resource_update = 0; trigger_fsa(fsa_source); } } static int do_update_resource(const char *node_name, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op) { /* */ int rc = pcmk_ok; xmlNode *update, *iter = NULL; int call_opt = crmd_cib_smart_opt(); const char *uuid = NULL; CRM_CHECK(op != NULL, return 0); iter = create_xml_node(iter, XML_CIB_TAG_STATUS); update = iter; iter = create_xml_node(iter, XML_CIB_TAG_STATE); if (safe_str_eq(node_name, fsa_our_uname)) { uuid = fsa_our_uuid; } else { /* remote nodes uuid and uname are equal */ uuid = node_name; crm_xml_add(iter, XML_NODE_IS_REMOTE, "true"); } CRM_LOG_ASSERT(uuid != NULL); if(uuid == NULL) { rc = -EINVAL; goto done; } crm_xml_add(iter, XML_ATTR_UUID, uuid); crm_xml_add(iter, XML_ATTR_UNAME, node_name); crm_xml_add(iter, XML_ATTR_ORIGIN, __FUNCTION__); iter = create_xml_node(iter, XML_CIB_TAG_LRM); crm_xml_add(iter, XML_ATTR_ID, uuid); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE); crm_xml_add(iter, XML_ATTR_ID, op->rsc_id); build_operation_update(iter, rsc, op, node_name, __FUNCTION__); if (rsc) { const char *container = NULL; crm_xml_add(iter, XML_ATTR_TYPE, rsc->type); crm_xml_add(iter, XML_AGENT_ATTR_CLASS, rsc->standard); crm_xml_add(iter, XML_AGENT_ATTR_PROVIDER, rsc->provider); if (op->params) { container = g_hash_table_lookup(op->params, CRM_META"_"XML_RSC_ATTR_CONTAINER); } if (container) { crm_trace("Resource %s is a part of container resource %s", op->rsc_id, container); crm_xml_add(iter, XML_RSC_ATTR_CONTAINER, container); } } else { crm_warn("Resource %s no longer exists in the lrmd", op->rsc_id); send_direct_ack(NULL, NULL, rsc, op, op->rsc_id); goto cleanup; } crm_log_xml_trace(update, __FUNCTION__); /* make it an asynchronous call and be done with it * * Best case: * the resource state will be discovered during * the next signup or election. * * Bad case: * we are shutting down and there is no DC at the time, * but then why were we shutting down then anyway? * (probably because of an internal error) * * Worst case: * we get shot for having resources "running" when the really weren't * * the alternative however means blocking here for too long, which * isn't acceptable */ fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, rc, NULL); if (rc > 0) { last_resource_update = rc; } done: /* the return code is a call number, not an error code */ crm_trace("Sent resource state update message: %d for %s=%u on %s", rc, op->op_type, op->interval_ms, op->rsc_id); fsa_register_cib_callback(rc, FALSE, NULL, cib_rsc_callback); cleanup: free_xml(update); return rc; } void do_lrm_event(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t * msg_data) { CRM_CHECK(FALSE, return); } static char * unescape_newlines(const char *string) { char *pch = NULL; char *ret = NULL; static const char *escaped_newline = "\\n"; if (!string) { return NULL; } ret = strdup(string); pch = strstr(ret, escaped_newline); while (pch != NULL) { /* 2 chars for 2 chars, null-termination irrelevant */ memcpy(pch, "\n ", 2 * sizeof(char)); pch = strstr(pch, escaped_newline); } return ret; } gboolean process_lrm_event(lrm_state_t * lrm_state, lrmd_event_data_t * op, struct recurring_op_s *pending) { char *op_id = NULL; char *op_key = NULL; int update_id = 0; gboolean remove = FALSE; gboolean removed = FALSE; lrmd_rsc_info_t *rsc = NULL; CRM_CHECK(op != NULL, return FALSE); CRM_CHECK(op->rsc_id != NULL, return FALSE); op_id = make_stop_id(op->rsc_id, op->call_id); op_key = generate_op_key(op->rsc_id, op->op_type, op->interval_ms); rsc = lrm_state_get_rsc_info(lrm_state, op->rsc_id, 0); if(pending == NULL) { remove = TRUE; pending = g_hash_table_lookup(lrm_state->pending_ops, op_id); } if (op->op_status == PCMK_LRM_OP_ERROR) { switch(op->rc) { case PCMK_OCF_NOT_RUNNING: case PCMK_OCF_RUNNING_MASTER: case PCMK_OCF_DEGRADED: case PCMK_OCF_DEGRADED_MASTER: /* Leave it up to the TE/PE to decide if this is an error */ op->op_status = PCMK_LRM_OP_DONE; break; default: /* Nothing to do */ break; } } if (op->op_status != PCMK_LRM_OP_CANCELLED) { if (safe_str_eq(op->op_type, RSC_NOTIFY) || safe_str_eq(op->op_type, RSC_METADATA)) { /* Keep notify and meta-data ops out of the CIB */ send_direct_ack(NULL, NULL, NULL, op, op->rsc_id); } else { update_id = do_update_resource(lrm_state->node_name, rsc, op); } } else if (op->interval_ms == 0) { /* This will occur when "crm resource cleanup" is called while actions are in-flight */ crm_err("Op %s (call=%d): Cancelled", op_key, op->call_id); send_direct_ack(NULL, NULL, NULL, op, op->rsc_id); } else if (pending == NULL) { /* We don't need to do anything for cancelled ops * that are not in our pending op list. There are no * transition actions waiting on these operations. */ } else if (op->user_data == NULL) { /* At this point we have a pending entry, but no transition * key present in the user_data field. report this */ crm_err("Op %s (call=%d): No user data", op_key, op->call_id); } else if (pending->remove) { /* The tengine canceled this op, we have been waiting for the cancel to finish. */ erase_lrm_history_by_op(lrm_state, op); } else if (pending && op->rsc_deleted) { /* The tengine initiated this op, but it was cancelled outside of the * tengine's control during a resource cleanup/re-probe request. The tengine * must be alerted that this operation completed, otherwise the tengine * will continue waiting for this update to occur until it is timed out. * We don't want this update going to the cib though, so use a direct ack. */ crm_trace("Op %s (call=%d): cancelled due to rsc deletion", op_key, op->call_id); send_direct_ack(NULL, NULL, NULL, op, op->rsc_id); } else { /* Before a stop is called, no need to direct ack */ crm_trace("Op %s (call=%d): no delete event required", op_key, op->call_id); } if(remove == FALSE) { /* The caller will do this afterwards, but keep the logging consistent */ removed = TRUE; } else if ((op->interval_ms == 0) && g_hash_table_remove(lrm_state->pending_ops, op_id)) { removed = TRUE; crm_trace("Op %s (call=%d, stop-id=%s, remaining=%u): Confirmed", op_key, op->call_id, op_id, g_hash_table_size(lrm_state->pending_ops)); } else if ((op->interval_ms != 0) && (op->op_status == PCMK_LRM_OP_CANCELLED)) { removed = TRUE; g_hash_table_remove(lrm_state->pending_ops, op_id); } switch (op->op_status) { case PCMK_LRM_OP_CANCELLED: crm_info("Result of %s operation for %s on %s: %s " CRM_XS " call=%d key=%s confirmed=%s", crm_action_str(op->op_type, op->interval_ms), op->rsc_id, lrm_state->node_name, services_lrm_status_str(op->op_status), op->call_id, op_key, (removed? "true" : "false")); break; case PCMK_LRM_OP_DONE: do_crm_log((op->interval_ms? LOG_INFO : LOG_NOTICE), "Result of %s operation for %s on %s: %d (%s) " CRM_XS " call=%d key=%s confirmed=%s cib-update=%d", crm_action_str(op->op_type, op->interval_ms), op->rsc_id, lrm_state->node_name, op->rc, services_ocf_exitcode_str(op->rc), op->call_id, op_key, (removed? "true" : "false"), update_id); break; case PCMK_LRM_OP_TIMEOUT: crm_err("Result of %s operation for %s on %s: %s " CRM_XS " call=%d key=%s timeout=%dms", crm_action_str(op->op_type, op->interval_ms), op->rsc_id, lrm_state->node_name, services_lrm_status_str(op->op_status), op->call_id, op_key, op->timeout); break; default: crm_err("Result of %s operation for %s on %s: %s " CRM_XS " call=%d key=%s confirmed=%s status=%d cib-update=%d", crm_action_str(op->op_type, op->interval_ms), op->rsc_id, lrm_state->node_name, services_lrm_status_str(op->op_status), op->call_id, op_key, (removed? "true" : "false"), op->op_status, update_id); } if (op->output) { char *prefix = crm_strdup_printf("%s-" CRM_OP_FMT ":%d", lrm_state->node_name, op->rsc_id, op->op_type, op->interval_ms, op->call_id); if (op->rc) { crm_log_output(LOG_NOTICE, prefix, op->output); } else { crm_log_output(LOG_DEBUG, prefix, op->output); } free(prefix); } if (safe_str_neq(op->op_type, RSC_METADATA)) { crmd_alert_resource_op(lrm_state->node_name, op); } else if (op->rc == PCMK_OCF_OK) { char *metadata = unescape_newlines(op->output); metadata_cache_update(lrm_state->metadata_cache, rsc, metadata); free(metadata); } if (op->rsc_deleted) { crm_info("Deletion of resource '%s' complete after %s", op->rsc_id, op_key); delete_rsc_entry(lrm_state, NULL, op->rsc_id, NULL, pcmk_ok, NULL); } /* If a shutdown was escalated while operations were pending, * then the FSA will be stalled right now... allow it to continue */ mainloop_set_trigger(fsa_source); update_history_cache(lrm_state, rsc, op); lrmd_free_rsc_info(rsc); free(op_key); free(op_id); return TRUE; } diff --git a/lib/common/utils.c b/lib/common/utils.c index 40fb6f03ad..5bbcb58c70 100644 --- a/lib/common/utils.c +++ b/lib/common/utils.c @@ -1,1526 +1,1526 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #ifndef _GNU_SOURCE # define _GNU_SOURCE #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifndef MAXLINE # define MAXLINE 512 #endif #ifdef HAVE_GETOPT_H # include #endif #ifndef PW_BUFFER_LEN # define PW_BUFFER_LEN 500 #endif CRM_TRACE_INIT_DATA(common); gboolean crm_config_error = FALSE; gboolean crm_config_warning = FALSE; char *crm_system_name = NULL; int node_score_red = 0; int node_score_green = 0; int node_score_yellow = 0; static struct crm_option *crm_long_options = NULL; static const char *crm_app_description = NULL; static char *crm_short_options = NULL; static const char *crm_app_usage = NULL; gboolean check_time(const char *value) { if (crm_get_msec(value) < 5000) { return FALSE; } return TRUE; } gboolean check_timer(const char *value) { if (crm_get_msec(value) < 0) { return FALSE; } return TRUE; } gboolean check_boolean(const char *value) { int tmp = FALSE; if (crm_str_to_boolean(value, &tmp) != 1) { return FALSE; } return TRUE; } gboolean check_number(const char *value) { errno = 0; if (value == NULL) { return FALSE; } else if (safe_str_eq(value, CRM_MINUS_INFINITY_S)) { } else if (safe_str_eq(value, CRM_INFINITY_S)) { } else { crm_int_helper(value, NULL); } if (errno != 0) { return FALSE; } return TRUE; } gboolean check_positive_number(const char* value) { if (safe_str_eq(value, CRM_INFINITY_S) || (crm_int_helper(value, NULL))) { return TRUE; } return FALSE; } gboolean check_quorum(const char *value) { if (safe_str_eq(value, "stop")) { return TRUE; } else if (safe_str_eq(value, "freeze")) { return TRUE; } else if (safe_str_eq(value, "ignore")) { return TRUE; } else if (safe_str_eq(value, "suicide")) { return TRUE; } return FALSE; } gboolean check_script(const char *value) { struct stat st; if(safe_str_eq(value, "/dev/null")) { return TRUE; } if(stat(value, &st) != 0) { crm_err("Script %s does not exist", value); return FALSE; } if(S_ISREG(st.st_mode) == 0) { crm_err("Script %s is not a regular file", value); return FALSE; } if( (st.st_mode & (S_IXUSR | S_IXGRP )) == 0) { crm_err("Script %s is not executable", value); return FALSE; } return TRUE; } gboolean check_utilization(const char *value) { char *end = NULL; long number = strtol(value, &end, 10); if(end && end[0] != '%') { return FALSE; } else if(number < 0) { return FALSE; } return TRUE; } void crm_args_fini() { free(crm_short_options); crm_short_options = NULL; } int char2score(const char *score) { int score_f = 0; if (score == NULL) { } else if (safe_str_eq(score, CRM_MINUS_INFINITY_S)) { score_f = -CRM_SCORE_INFINITY; } else if (safe_str_eq(score, CRM_INFINITY_S)) { score_f = CRM_SCORE_INFINITY; } else if (safe_str_eq(score, CRM_PLUS_INFINITY_S)) { score_f = CRM_SCORE_INFINITY; } else if (safe_str_eq(score, "red")) { score_f = node_score_red; } else if (safe_str_eq(score, "yellow")) { score_f = node_score_yellow; } else if (safe_str_eq(score, "green")) { score_f = node_score_green; } else { score_f = crm_parse_int(score, NULL); if (score_f > 0 && score_f > CRM_SCORE_INFINITY) { score_f = CRM_SCORE_INFINITY; } else if (score_f < 0 && score_f < -CRM_SCORE_INFINITY) { score_f = -CRM_SCORE_INFINITY; } } return score_f; } char * score2char_stack(int score, char *buf, size_t len) { if (score >= CRM_SCORE_INFINITY) { strncpy(buf, CRM_INFINITY_S, 9); } else if (score <= -CRM_SCORE_INFINITY) { strncpy(buf, CRM_MINUS_INFINITY_S , 10); } else { return crm_itoa_stack(score, buf, len); } return buf; } char * score2char(int score) { if (score >= CRM_SCORE_INFINITY) { return strdup(CRM_INFINITY_S); } else if (score <= -CRM_SCORE_INFINITY) { return strdup(CRM_MINUS_INFINITY_S); } return crm_itoa(score); } const char * cluster_option(GHashTable * options, gboolean(*validate) (const char *), const char *name, const char *old_name, const char *def_value) { const char *value = NULL; char *new_value = NULL; CRM_ASSERT(name != NULL); if (options) { value = g_hash_table_lookup(options, name); if ((value == NULL) && old_name) { value = g_hash_table_lookup(options, old_name); if (value != NULL) { crm_config_warn("Support for legacy name '%s' for cluster option '%s'" " is deprecated and will be removed in a future release", old_name, name); // Inserting copy with current name ensures we only warn once new_value = strdup(value); g_hash_table_insert(options, strdup(name), new_value); value = new_value; } } if (value && validate && (validate(value) == FALSE)) { crm_config_err("Resetting cluster option '%s' to default: value '%s' is invalid", name, value); value = NULL; } if (value) { return value; } } // No value found, use default value = def_value; if (value == NULL) { crm_trace("No value or default provided for cluster option '%s'", name); return NULL; } if (validate) { CRM_CHECK(validate(value) != FALSE, crm_err("Bug: default value for cluster option '%s' is invalid", name); return NULL); } crm_trace("Using default value '%s' for cluster option '%s'", value, name); if (options) { new_value = strdup(value); g_hash_table_insert(options, strdup(name), new_value); value = new_value; } return value; } const char * get_cluster_pref(GHashTable * options, pe_cluster_option * option_list, int len, const char *name) { const char *value = NULL; for (int lpc = 0; lpc < len; lpc++) { if (safe_str_eq(name, option_list[lpc].name)) { value = cluster_option(options, option_list[lpc].is_valid, option_list[lpc].name, option_list[lpc].alt_name, option_list[lpc].default_value); return value; } } CRM_CHECK(FALSE, crm_err("Bug: looking for unknown option '%s'", name)); return NULL; } void config_metadata(const char *name, const char *version, const char *desc_short, const char *desc_long, pe_cluster_option * option_list, int len) { int lpc = 0; fprintf(stdout, "" "\n" "\n" " %s\n" " %s\n" " %s\n" " \n", name, version, desc_long, desc_short); for (lpc = 0; lpc < len; lpc++) { if (option_list[lpc].description_long == NULL && option_list[lpc].description_short == NULL) { continue; } fprintf(stdout, " \n" " %s\n" " \n" " %s%s%s\n" " \n", option_list[lpc].name, option_list[lpc].description_short, option_list[lpc].type, option_list[lpc].default_value, option_list[lpc].description_long ? option_list[lpc]. description_long : option_list[lpc].description_short, option_list[lpc].values ? " Allowed values: " : "", option_list[lpc].values ? option_list[lpc].values : ""); } fprintf(stdout, " \n\n"); } void verify_all_options(GHashTable * options, pe_cluster_option * option_list, int len) { int lpc = 0; for (lpc = 0; lpc < len; lpc++) { cluster_option(options, option_list[lpc].is_valid, option_list[lpc].name, option_list[lpc].alt_name, option_list[lpc].default_value); } } char * generate_hash_key(const char *crm_msg_reference, const char *sys) { char *hash_key = crm_concat(sys ? sys : "none", crm_msg_reference, '_'); crm_trace("created hash key: (%s)", hash_key); return hash_key; } int crm_user_lookup(const char *name, uid_t * uid, gid_t * gid) { int rc = pcmk_ok; char *buffer = NULL; struct passwd pwd; struct passwd *pwentry = NULL; buffer = calloc(1, PW_BUFFER_LEN); rc = getpwnam_r(name, &pwd, buffer, PW_BUFFER_LEN, &pwentry); if (pwentry) { if (uid) { *uid = pwentry->pw_uid; } if (gid) { *gid = pwentry->pw_gid; } crm_trace("User %s has uid=%d gid=%d", name, pwentry->pw_uid, pwentry->pw_gid); } else { rc = rc? -rc : -EINVAL; crm_info("User %s lookup: %s", name, pcmk_strerror(rc)); } free(buffer); return rc; } static int crm_version_helper(const char *text, char **end_text) { int atoi_result = -1; CRM_ASSERT(end_text != NULL); errno = 0; if (text != NULL && text[0] != 0) { atoi_result = (int)strtol(text, end_text, 10); if (errno == EINVAL) { crm_err("Conversion of '%s' %c failed", text, text[0]); atoi_result = -1; } } return atoi_result; } /* * version1 < version2 : -1 * version1 = version2 : 0 * version1 > version2 : 1 */ int compare_version(const char *version1, const char *version2) { int rc = 0; int lpc = 0; char *ver1_copy = NULL, *ver2_copy = NULL; char *rest1 = NULL, *rest2 = NULL; if (version1 == NULL && version2 == NULL) { return 0; } else if (version1 == NULL) { return -1; } else if (version2 == NULL) { return 1; } ver1_copy = strdup(version1); ver2_copy = strdup(version2); rest1 = ver1_copy; rest2 = ver2_copy; while (1) { int digit1 = 0; int digit2 = 0; lpc++; if (rest1 == rest2) { break; } if (rest1 != NULL) { digit1 = crm_version_helper(rest1, &rest1); } if (rest2 != NULL) { digit2 = crm_version_helper(rest2, &rest2); } if (digit1 < digit2) { rc = -1; break; } else if (digit1 > digit2) { rc = 1; break; } if (rest1 != NULL && rest1[0] == '.') { rest1++; } if (rest1 != NULL && rest1[0] == 0) { rest1 = NULL; } if (rest2 != NULL && rest2[0] == '.') { rest2++; } if (rest2 != NULL && rest2[0] == 0) { rest2 = NULL; } } free(ver1_copy); free(ver2_copy); if (rc == 0) { crm_trace("%s == %s (%d)", version1, version2, lpc); } else if (rc < 0) { crm_trace("%s < %s (%d)", version1, version2, lpc); } else if (rc > 0) { crm_trace("%s > %s (%d)", version1, version2, lpc); } return rc; } gboolean do_stderr = FALSE; #ifndef NUMCHARS # define NUMCHARS "0123456789." #endif #ifndef WHITESPACE # define WHITESPACE " \t\n\r\f" #endif guint crm_parse_interval_spec(const char *input) { long long msec = 0; if (input == NULL) { return 0; } else if (input[0] != 'P') { long long tmp = crm_get_msec(input); if(tmp > 0) { msec = tmp; } } else { crm_time_t *period_s = crm_time_parse_duration(input); msec = 1000 * crm_time_get_seconds(period_s); crm_time_free(period_s); } - return (guint) msec; + return (msec <= 0)? 0 : ((msec >= G_MAXUINT)? G_MAXUINT : (guint) msec); } long long crm_get_msec(const char *input) { const char *cp = input; const char *units; long long multiplier = 1000; long long divisor = 1; long long msec = -1; char *end_text = NULL; /* double dret; */ if (input == NULL) { return msec; } cp += strspn(cp, WHITESPACE); units = cp + strspn(cp, NUMCHARS); units += strspn(units, WHITESPACE); if (strchr(NUMCHARS, *cp) == NULL) { return msec; } if (strncasecmp(units, "ms", 2) == 0 || strncasecmp(units, "msec", 4) == 0) { multiplier = 1; divisor = 1; } else if (strncasecmp(units, "us", 2) == 0 || strncasecmp(units, "usec", 4) == 0) { multiplier = 1; divisor = 1000; } else if (strncasecmp(units, "s", 1) == 0 || strncasecmp(units, "sec", 3) == 0) { multiplier = 1000; divisor = 1; } else if (strncasecmp(units, "m", 1) == 0 || strncasecmp(units, "min", 3) == 0) { multiplier = 60 * 1000; divisor = 1; } else if (strncasecmp(units, "h", 1) == 0 || strncasecmp(units, "hr", 2) == 0) { multiplier = 60 * 60 * 1000; divisor = 1; } else if (*units != EOS && *units != '\n' && *units != '\r') { return msec; } msec = crm_int_helper(cp, &end_text); if (msec > LLONG_MAX/multiplier) { /* arithmetics overflow while multiplier/divisor mutually exclusive */ return LLONG_MAX; } msec *= multiplier; msec /= divisor; /* dret += 0.5; */ /* msec = (long long)dret; */ return msec; } extern bool crm_is_daemon; /* coverity[+kill] */ void crm_abort(const char *file, const char *function, int line, const char *assert_condition, gboolean do_core, gboolean do_fork) { int rc = 0; int pid = 0; int status = 0; /* Implied by the parent's error logging below */ /* crm_write_blackbox(0); */ if(crm_is_daemon == FALSE) { /* This is a command line tool - do not fork */ /* crm_add_logfile(NULL); * Record it to a file? */ crm_enable_stderr(TRUE); /* Make sure stderr is enabled so we can tell the caller */ do_fork = FALSE; /* Just crash if needed */ } if (do_core == FALSE) { crm_err("%s: Triggered assert at %s:%d : %s", function, file, line, assert_condition); return; } else if (do_fork) { pid = fork(); } else { crm_err("%s: Triggered fatal assert at %s:%d : %s", function, file, line, assert_condition); } if (pid == -1) { crm_crit("%s: Cannot create core for non-fatal assert at %s:%d : %s", function, file, line, assert_condition); return; } else if(pid == 0) { /* Child process */ abort(); return; } /* Parent process */ crm_err("%s: Forked child %d to record non-fatal assert at %s:%d : %s", function, pid, file, line, assert_condition); crm_write_blackbox(SIGTRAP, NULL); do { rc = waitpid(pid, &status, 0); if(rc == pid) { return; /* Job done */ } } while(errno == EINTR); if (errno == ECHILD) { /* crm_mon does this */ crm_trace("Cannot wait on forked child %d - SIGCHLD is probably set to SIG_IGN", pid); return; } crm_perror(LOG_ERR, "Cannot wait on forked child %d", pid); } int crm_pid_active(long pid, const char *daemon) { static int have_proc_pid = 0; if(have_proc_pid == 0) { char proc_path[PATH_MAX], exe_path[PATH_MAX]; /* check to make sure pid hasn't been reused by another process */ snprintf(proc_path, sizeof(proc_path), "/proc/%lu/exe", (long unsigned int)getpid()); have_proc_pid = 1; if(readlink(proc_path, exe_path, PATH_MAX - 1) < 0) { have_proc_pid = -1; } } if (pid <= 0) { return -1; } else if (kill(pid, 0) < 0 && errno == ESRCH) { return 0; } else if(daemon == NULL || have_proc_pid == -1) { return 1; } else { int rc = 0; char proc_path[PATH_MAX], exe_path[PATH_MAX], myexe_path[PATH_MAX]; /* check to make sure pid hasn't been reused by another process */ snprintf(proc_path, sizeof(proc_path), "/proc/%lu/exe", pid); rc = readlink(proc_path, exe_path, PATH_MAX - 1); if (rc < 0 && errno == EACCES) { crm_perror(LOG_INFO, "Could not read from %s", proc_path); return 1; } else if (rc < 0) { crm_perror(LOG_ERR, "Could not read from %s", proc_path); return 0; } exe_path[rc] = 0; if(daemon[0] != '/') { rc = snprintf(myexe_path, sizeof(proc_path), CRM_DAEMON_DIR"/%s", daemon); myexe_path[rc] = 0; } else { rc = snprintf(myexe_path, sizeof(proc_path), "%s", daemon); myexe_path[rc] = 0; } if (strcmp(exe_path, myexe_path) == 0) { return 1; } } return 0; } #define LOCKSTRLEN 11 long crm_read_pidfile(const char *filename) { int fd; struct stat sbuf; long pid = -ENOENT; char buf[LOCKSTRLEN + 1]; if ((fd = open(filename, O_RDONLY)) < 0) { goto bail; } if (fstat(fd, &sbuf) >= 0 && sbuf.st_size < LOCKSTRLEN) { sleep(2); /* if someone was about to create one, * give'm a sec to do so */ } if (read(fd, buf, sizeof(buf)) < 1) { goto bail; } if (sscanf(buf, "%lu", &pid) > 0) { if (pid <= 0) { pid = -ESRCH; } else { crm_trace("Got pid %lu from %s\n", pid, filename); } } bail: if (fd >= 0) { close(fd); } return pid; } long crm_pidfile_inuse(const char *filename, long mypid, const char *daemon) { long pid = crm_read_pidfile(filename); if (pid < 2) { /* Invalid pid */ pid = -ENOENT; unlink(filename); } else if (mypid && pid == mypid) { /* In use by us */ pid = pcmk_ok; } else if (crm_pid_active(pid, daemon) == FALSE) { /* Contains a stale value */ unlink(filename); pid = -ENOENT; } else if (mypid && pid != mypid) { /* locked by existing process - give up */ pid = -EEXIST; } return pid; } static int crm_lock_pidfile(const char *filename, const char *name) { long mypid = 0; int fd = 0, rc = 0; char buf[LOCKSTRLEN + 1]; mypid = (unsigned long)getpid(); rc = crm_pidfile_inuse(filename, 0, name); if (rc == -ENOENT) { /* exists but the process is not active */ } else if (rc != pcmk_ok) { /* locked by existing process - give up */ return rc; } if ((fd = open(filename, O_CREAT | O_WRONLY | O_EXCL, 0644)) < 0) { /* Hmmh, why did we fail? Anyway, nothing we can do about it */ return -errno; } snprintf(buf, sizeof(buf), "%*lu\n", LOCKSTRLEN - 1, mypid); rc = write(fd, buf, LOCKSTRLEN); close(fd); if (rc != LOCKSTRLEN) { crm_perror(LOG_ERR, "Incomplete write to %s", filename); return -errno; } return crm_pidfile_inuse(filename, mypid, name); } void crm_make_daemon(const char *name, gboolean daemonize, const char *pidfile) { int rc; long pid; const char *devnull = "/dev/null"; if (daemonize == FALSE) { return; } /* Check before we even try... */ rc = crm_pidfile_inuse(pidfile, 1, name); if(rc < pcmk_ok && rc != -ENOENT) { pid = crm_read_pidfile(pidfile); crm_err("%s: already running [pid %ld in %s]", name, pid, pidfile); printf("%s: already running [pid %ld in %s]\n", name, pid, pidfile); crm_exit(CRM_EX_ERROR); } pid = fork(); if (pid < 0) { fprintf(stderr, "%s: could not start daemon\n", name); crm_perror(LOG_ERR, "fork"); crm_exit(CRM_EX_OSERR); } else if (pid > 0) { crm_exit(CRM_EX_OK); } rc = crm_lock_pidfile(pidfile, name); if(rc < pcmk_ok) { crm_err("Could not lock '%s' for %s: %s (%d)", pidfile, name, pcmk_strerror(rc), rc); printf("Could not lock '%s' for %s: %s (%d)\n", pidfile, name, pcmk_strerror(rc), rc); crm_exit(CRM_EX_ERROR); } umask(S_IWGRP | S_IWOTH | S_IROTH); close(STDIN_FILENO); (void)open(devnull, O_RDONLY); /* Stdin: fd 0 */ close(STDOUT_FILENO); (void)open(devnull, O_WRONLY); /* Stdout: fd 1 */ close(STDERR_FILENO); (void)open(devnull, O_WRONLY); /* Stderr: fd 2 */ } char * crm_meta_name(const char *field) { int lpc = 0; int max = 0; char *crm_name = NULL; CRM_CHECK(field != NULL, return NULL); crm_name = crm_concat(CRM_META, field, '_'); /* Massage the names so they can be used as shell variables */ max = strlen(crm_name); for (; lpc < max; lpc++) { switch (crm_name[lpc]) { case '-': crm_name[lpc] = '_'; break; } } return crm_name; } const char * crm_meta_value(GHashTable * hash, const char *field) { char *key = NULL; const char *value = NULL; key = crm_meta_name(field); if (key) { value = g_hash_table_lookup(hash, key); free(key); } return value; } static struct option * crm_create_long_opts(struct crm_option *long_options) { struct option *long_opts = NULL; #ifdef HAVE_GETOPT_H int index = 0, lpc = 0; /* * A previous, possibly poor, choice of '?' as the short form of --help * means that getopt_long() returns '?' for both --help and for "unknown option" * * This dummy entry allows us to differentiate between the two in crm_get_option() * and exit with the correct error code */ long_opts = realloc_safe(long_opts, (index + 1) * sizeof(struct option)); long_opts[index].name = "__dummmy__"; long_opts[index].has_arg = 0; long_opts[index].flag = 0; long_opts[index].val = '_'; index++; for (lpc = 0; long_options[lpc].name != NULL; lpc++) { if (long_options[lpc].name[0] == '-') { continue; } long_opts = realloc_safe(long_opts, (index + 1) * sizeof(struct option)); /*fprintf(stderr, "Creating %d %s = %c\n", index, * long_options[lpc].name, long_options[lpc].val); */ long_opts[index].name = long_options[lpc].name; long_opts[index].has_arg = long_options[lpc].has_arg; long_opts[index].flag = long_options[lpc].flag; long_opts[index].val = long_options[lpc].val; index++; } /* Now create the list terminator */ long_opts = realloc_safe(long_opts, (index + 1) * sizeof(struct option)); long_opts[index].name = NULL; long_opts[index].has_arg = 0; long_opts[index].flag = 0; long_opts[index].val = 0; #endif return long_opts; } void crm_set_options(const char *short_options, const char *app_usage, struct crm_option *long_options, const char *app_desc) { if (short_options) { crm_short_options = strdup(short_options); } else if (long_options) { int lpc = 0; int opt_string_len = 0; char *local_short_options = NULL; for (lpc = 0; long_options[lpc].name != NULL; lpc++) { if (long_options[lpc].val && long_options[lpc].val != '-' && long_options[lpc].val < UCHAR_MAX) { local_short_options = realloc_safe(local_short_options, opt_string_len + 4); local_short_options[opt_string_len++] = long_options[lpc].val; /* getopt(3) says: Two colons mean an option takes an optional arg; */ if (long_options[lpc].has_arg == optional_argument) { local_short_options[opt_string_len++] = ':'; } if (long_options[lpc].has_arg >= required_argument) { local_short_options[opt_string_len++] = ':'; } local_short_options[opt_string_len] = 0; } } crm_short_options = local_short_options; crm_trace("Generated short option string: '%s'", local_short_options); } if (long_options) { crm_long_options = long_options; } if (app_desc) { crm_app_description = app_desc; } if (app_usage) { crm_app_usage = app_usage; } } int crm_get_option(int argc, char **argv, int *index) { return crm_get_option_long(argc, argv, index, NULL); } int crm_get_option_long(int argc, char **argv, int *index, const char **longname) { #ifdef HAVE_GETOPT_H static struct option *long_opts = NULL; if (long_opts == NULL && crm_long_options) { long_opts = crm_create_long_opts(crm_long_options); } *index = 0; if (long_opts) { int flag = getopt_long(argc, argv, crm_short_options, long_opts, index); switch (flag) { case 0: if (long_opts[*index].val) { return long_opts[*index].val; } else if (longname) { *longname = long_opts[*index].name; } else { crm_notice("Unhandled option --%s", long_opts[*index].name); return flag; } case -1: /* End of option processing */ break; case ':': crm_trace("Missing argument"); crm_help('?', CRM_EX_USAGE); break; case '?': crm_help('?', (*index? CRM_EX_OK : CRM_EX_USAGE)); break; } return flag; } #endif if (crm_short_options) { return getopt(argc, argv, crm_short_options); } return -1; } crm_exit_t crm_help(char cmd, crm_exit_t exit_code) { int i = 0; FILE *stream = (exit_code ? stderr : stdout); if (cmd == 'v' || cmd == '$') { fprintf(stream, "Pacemaker %s\n", PACEMAKER_VERSION); fprintf(stream, "Written by Andrew Beekhof\n"); goto out; } if (cmd == '!') { fprintf(stream, "Pacemaker %s (Build: %s): %s\n", PACEMAKER_VERSION, BUILD_VERSION, CRM_FEATURES); goto out; } fprintf(stream, "%s - %s\n", crm_system_name, crm_app_description); if (crm_app_usage) { fprintf(stream, "Usage: %s %s\n", crm_system_name, crm_app_usage); } if (crm_long_options) { fprintf(stream, "Options:\n"); for (i = 0; crm_long_options[i].name != NULL; i++) { if (crm_long_options[i].flags & pcmk_option_hidden) { } else if (crm_long_options[i].flags & pcmk_option_paragraph) { fprintf(stream, "%s\n\n", crm_long_options[i].desc); } else if (crm_long_options[i].flags & pcmk_option_example) { fprintf(stream, "\t#%s\n\n", crm_long_options[i].desc); } else if (crm_long_options[i].val == '-' && crm_long_options[i].desc) { fprintf(stream, "%s\n", crm_long_options[i].desc); } else { /* is val printable as char ? */ if (crm_long_options[i].val && crm_long_options[i].val <= UCHAR_MAX) { fprintf(stream, " -%c,", crm_long_options[i].val); } else { fputs(" ", stream); } fprintf(stream, " --%s%s\t%s\n", crm_long_options[i].name, crm_long_options[i].has_arg == optional_argument ? "[=value]" : crm_long_options[i].has_arg == required_argument ? "=value" : "", crm_long_options[i].desc ? crm_long_options[i].desc : ""); } } } else if (crm_short_options) { fprintf(stream, "Usage: %s - %s\n", crm_system_name, crm_app_description); for (i = 0; crm_short_options[i] != 0; i++) { int has_arg = no_argument /* 0 */; if (crm_short_options[i + 1] == ':') { if (crm_short_options[i + 2] == ':') has_arg = optional_argument /* 2 */; else has_arg = required_argument /* 1 */; } fprintf(stream, " -%c %s\n", crm_short_options[i], has_arg == optional_argument ? "[value]" : has_arg == required_argument ? "{value}" : ""); i += has_arg; } } fprintf(stream, "\nReport bugs to %s\n", PACKAGE_BUGREPORT); out: return crm_exit(exit_code); } void cib_ipc_servers_init(qb_ipcs_service_t **ipcs_ro, qb_ipcs_service_t **ipcs_rw, qb_ipcs_service_t **ipcs_shm, struct qb_ipcs_service_handlers *ro_cb, struct qb_ipcs_service_handlers *rw_cb) { *ipcs_ro = mainloop_add_ipc_server(cib_channel_ro, QB_IPC_NATIVE, ro_cb); *ipcs_rw = mainloop_add_ipc_server(cib_channel_rw, QB_IPC_NATIVE, rw_cb); *ipcs_shm = mainloop_add_ipc_server(cib_channel_shm, QB_IPC_SHM, rw_cb); if (*ipcs_ro == NULL || *ipcs_rw == NULL || *ipcs_shm == NULL) { crm_err("Failed to create cib servers: exiting and inhibiting respawn."); crm_warn("Verify pacemaker and pacemaker_remote are not both enabled."); crm_exit(CRM_EX_FATAL); } } void cib_ipc_servers_destroy(qb_ipcs_service_t *ipcs_ro, qb_ipcs_service_t *ipcs_rw, qb_ipcs_service_t *ipcs_shm) { qb_ipcs_destroy(ipcs_ro); qb_ipcs_destroy(ipcs_rw); qb_ipcs_destroy(ipcs_shm); } qb_ipcs_service_t * crmd_ipc_server_init(struct qb_ipcs_service_handlers *cb) { return mainloop_add_ipc_server(CRM_SYSTEM_CRMD, QB_IPC_NATIVE, cb); } void attrd_ipc_server_init(qb_ipcs_service_t **ipcs, struct qb_ipcs_service_handlers *cb) { *ipcs = mainloop_add_ipc_server(T_ATTRD, QB_IPC_NATIVE, cb); if (*ipcs == NULL) { crm_err("Failed to create attrd servers: exiting and inhibiting respawn."); crm_warn("Verify pacemaker and pacemaker_remote are not both enabled."); crm_exit(CRM_EX_FATAL); } } void stonith_ipc_server_init(qb_ipcs_service_t **ipcs, struct qb_ipcs_service_handlers *cb) { *ipcs = mainloop_add_ipc_server("stonith-ng", QB_IPC_NATIVE, cb); if (*ipcs == NULL) { crm_err("Failed to create stonith-ng servers: exiting and inhibiting respawn."); crm_warn("Verify pacemaker and pacemaker_remote are not both enabled."); crm_exit(CRM_EX_FATAL); } } bool pcmk_acl_required(const char *user) { #if ENABLE_ACL if(user == NULL || strlen(user) == 0) { crm_trace("no user set"); return FALSE; } else if (strcmp(user, CRM_DAEMON_USER) == 0) { return FALSE; } else if (strcmp(user, "root") == 0) { return FALSE; } crm_trace("acls required for %s", user); return TRUE; #else crm_trace("acls not supported"); return FALSE; #endif } #if ENABLE_ACL char * uid2username(uid_t uid) { struct passwd *pwent = getpwuid(uid); if (pwent == NULL) { crm_perror(LOG_ERR, "Cannot get password entry of uid: %d", uid); return NULL; } else { return strdup(pwent->pw_name); } } const char * crm_acl_get_set_user(xmlNode * request, const char *field, const char *peer_user) { /* field is only checked for backwards compatibility */ static const char *effective_user = NULL; const char *requested_user = NULL; const char *user = NULL; if(effective_user == NULL) { effective_user = uid2username(geteuid()); } requested_user = crm_element_value(request, XML_ACL_TAG_USER); if(requested_user == NULL) { requested_user = crm_element_value(request, field); } if (is_privileged(effective_user) == FALSE) { /* We're not running as a privileged user, set or overwrite any existing value for $XML_ACL_TAG_USER */ user = effective_user; } else if(peer_user == NULL && requested_user == NULL) { /* No user known or requested, use 'effective_user' and make sure one is set for the request */ user = effective_user; } else if(peer_user == NULL) { /* No user known, trusting 'requested_user' */ user = requested_user; } else if (is_privileged(peer_user) == FALSE) { /* The peer is not a privileged user, set or overwrite any existing value for $XML_ACL_TAG_USER */ user = peer_user; } else if (requested_user == NULL) { /* Even if we're privileged, make sure there is always a value set */ user = peer_user; } else { /* Legal delegation to 'requested_user' */ user = requested_user; } // This requires pointer comparison, not string comparison if(user != crm_element_value(request, XML_ACL_TAG_USER)) { crm_xml_add(request, XML_ACL_TAG_USER, user); } if(field != NULL && user != crm_element_value(request, field)) { crm_xml_add(request, field, user); } return requested_user; } void determine_request_user(const char *user, xmlNode * request, const char *field) { /* Get our internal validation out of the way first */ CRM_CHECK(user != NULL && request != NULL && field != NULL, return); /* If our peer is a privileged user, we might be doing something on behalf of someone else */ if (is_privileged(user) == FALSE) { /* We're not a privileged user, set or overwrite any existing value for $field */ crm_xml_replace(request, field, user); } else if (crm_element_value(request, field) == NULL) { /* Even if we're privileged, make sure there is always a value set */ crm_xml_replace(request, field, user); /* } else { Legal delegation */ } crm_trace("Processing msg as user '%s'", crm_element_value(request, field)); } #endif void * find_library_function(void **handle, const char *lib, const char *fn, gboolean fatal) { char *error; void *a_function; if (*handle == NULL) { *handle = dlopen(lib, RTLD_LAZY); } if (!(*handle)) { crm_err("%sCould not open %s: %s", fatal ? "Fatal: " : "", lib, dlerror()); if (fatal) { crm_exit(CRM_EX_FATAL); } return NULL; } a_function = dlsym(*handle, fn); if (a_function == NULL) { error = dlerror(); crm_err("%sCould not find %s in %s: %s", fatal ? "Fatal: " : "", fn, lib, error); if (fatal) { crm_exit(CRM_EX_FATAL); } } return a_function; } void * convert_const_pointer(const void *ptr) { /* Worst function ever */ return (void *)ptr; } #ifdef HAVE_UUID_UUID_H # include #endif char * crm_generate_uuid(void) { unsigned char uuid[16]; char *buffer = malloc(37); /* Including NUL byte */ uuid_generate(uuid); uuid_unparse(uuid, buffer); return buffer; } /*! * \brief Check whether a string represents a cluster daemon name * * \param[in] name String to check * * \return TRUE if name is standard client name used by daemons, FALSE otherwise */ bool crm_is_daemon_name(const char *name) { return (name && (!strcmp(name, CRM_SYSTEM_CRMD) || !strcmp(name, CRM_SYSTEM_STONITHD) || !strcmp(name, T_ATTRD) || !strcmp(name, CRM_SYSTEM_CIB) || !strcmp(name, CRM_SYSTEM_MCP) || !strcmp(name, CRM_SYSTEM_DC) || !strcmp(name, CRM_SYSTEM_TENGINE) || !strcmp(name, CRM_SYSTEM_LRMD))); } #include char * crm_md5sum(const char *buffer) { int lpc = 0, len = 0; char *digest = NULL; unsigned char raw_digest[MD5_DIGEST_SIZE]; if (buffer == NULL) { buffer = ""; } len = strlen(buffer); crm_trace("Beginning digest of %d bytes", len); digest = malloc(2 * MD5_DIGEST_SIZE + 1); if(digest) { md5_buffer(buffer, len, raw_digest); for (lpc = 0; lpc < MD5_DIGEST_SIZE; lpc++) { sprintf(digest + (2 * lpc), "%02x", raw_digest[lpc]); } digest[(2 * MD5_DIGEST_SIZE)] = 0; crm_trace("Digest %s.", digest); } else { crm_err("Could not create digest"); } return digest; } #ifdef HAVE_GNUTLS_GNUTLS_H void crm_gnutls_global_init(void) { signal(SIGPIPE, SIG_IGN); gnutls_global_init(); } #endif char * crm_generate_ra_key(const char *standard, const char *provider, const char *type) { if (!standard && !provider && !type) { return NULL; } return crm_strdup_printf("%s%s%s:%s", (standard? standard : ""), (provider? ":" : ""), (provider? provider : ""), (type? type : "")); } /*! * \brief Check whether a resource standard requires a provider to be specified * * \param[in] standard Standard name * * \return TRUE if standard requires a provider, FALSE otherwise */ bool crm_provider_required(const char *standard) { CRM_CHECK(standard != NULL, return FALSE); /* @TODO * - this should probably be case-sensitive, but isn't, * for backward compatibility * - it might be nice to keep standards' capabilities (supports provider, * can be promotable, etc.) as structured data somewhere */ if (!strcasecmp(standard, PCMK_RESOURCE_CLASS_OCF)) { return TRUE; } return FALSE; } /*! * \brief Parse a "standard[:provider]:type" agent specification * * \param[in] spec Agent specification * \param[out] standard Newly allocated memory containing agent standard (or NULL) * \param[out] provider Newly allocated memory containing agent provider (or NULL) * \param[put] type Newly allocated memory containing agent type (or NULL) * * \return pcmk_ok if the string could be parsed, -EINVAL otherwise * * \note It is acceptable for the type to contain a ':' if the standard supports * that. For example, systemd supports the form "systemd:UNIT@A:B". * \note It is the caller's responsibility to free the returned values. */ int crm_parse_agent_spec(const char *spec, char **standard, char **provider, char **type) { char *colon; CRM_CHECK(spec && standard && provider && type, return -EINVAL); *standard = NULL; *provider = NULL; *type = NULL; colon = strchr(spec, ':'); if ((colon == NULL) || (colon == spec)) { return -EINVAL; } *standard = strndup(spec, colon - spec); spec = colon + 1; if (crm_provider_required(*standard)) { colon = strchr(spec, ':'); if ((colon == NULL) || (colon == spec)) { free(*standard); return -EINVAL; } *provider = strndup(spec, colon - spec); spec = colon + 1; } if (*spec == '\0') { free(*standard); free(*provider); return -EINVAL; } *type = strdup(spec); return pcmk_ok; } diff --git a/lib/pengine/utils.c b/lib/pengine/utils.c index 51ebed38d2..5c3f519835 100644 --- a/lib/pengine/utils.c +++ b/lib/pengine/utils.c @@ -1,2343 +1,2340 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include pe_working_set_t *pe_dataset = NULL; extern xmlNode *get_object_root(const char *object_type, xmlNode * the_root); void print_str_str(gpointer key, gpointer value, gpointer user_data); gboolean ghash_free_str_str(gpointer key, gpointer value, gpointer user_data); void unpack_operation(action_t * action, xmlNode * xml_obj, resource_t * container, pe_working_set_t * data_set); static xmlNode *find_rsc_op_entry_helper(resource_t * rsc, const char *key, gboolean include_disabled); #if ENABLE_VERSIONED_ATTRS pe_rsc_action_details_t * pe_rsc_action_details(pe_action_t *action) { pe_rsc_action_details_t *details; CRM_CHECK(action != NULL, return NULL); if (action->action_details == NULL) { action->action_details = calloc(1, sizeof(pe_rsc_action_details_t)); CRM_CHECK(action->action_details != NULL, return NULL); } details = (pe_rsc_action_details_t *) action->action_details; if (details->versioned_parameters == NULL) { details->versioned_parameters = create_xml_node(NULL, XML_TAG_OP_VER_ATTRS); } if (details->versioned_meta == NULL) { details->versioned_meta = create_xml_node(NULL, XML_TAG_OP_VER_META); } return details; } static void pe_free_rsc_action_details(pe_action_t *action) { pe_rsc_action_details_t *details; if ((action == NULL) || (action->action_details == NULL)) { return; } details = (pe_rsc_action_details_t *) action->action_details; if (details->versioned_parameters) { free_xml(details->versioned_parameters); } if (details->versioned_meta) { free_xml(details->versioned_meta); } action->action_details = NULL; } #endif /*! * \internal * \brief Check whether we can fence a particular node * * \param[in] data_set Working set for cluster * \param[in] node Name of node to check * * \return TRUE if node can be fenced, FALSE otherwise * * \note This function should only be called for cluster nodes and baremetal * remote nodes; guest nodes are fenced by stopping their container * resource, so fence execution requirements do not apply to them. */ bool pe_can_fence(pe_working_set_t * data_set, node_t *node) { if(is_not_set(data_set->flags, pe_flag_stonith_enabled)) { return FALSE; /* Turned off */ } else if (is_not_set(data_set->flags, pe_flag_have_stonith_resource)) { return FALSE; /* No devices */ } else if (is_set(data_set->flags, pe_flag_have_quorum)) { return TRUE; } else if (data_set->no_quorum_policy == no_quorum_ignore) { return TRUE; } else if(node == NULL) { return FALSE; } else if(node->details->online) { crm_notice("We can fence %s without quorum because they're in our membership", node->details->uname); return TRUE; } crm_trace("Cannot fence %s", node->details->uname); return FALSE; } node_t * node_copy(const node_t *this_node) { node_t *new_node = NULL; CRM_CHECK(this_node != NULL, return NULL); new_node = calloc(1, sizeof(node_t)); CRM_ASSERT(new_node != NULL); crm_trace("Copying %p (%s) to %p", this_node, this_node->details->uname, new_node); new_node->rsc_discover_mode = this_node->rsc_discover_mode; new_node->weight = this_node->weight; new_node->fixed = this_node->fixed; new_node->details = this_node->details; return new_node; } /* any node in list1 or list2 and not in the other gets a score of -INFINITY */ void node_list_exclude(GHashTable * hash, GListPtr list, gboolean merge_scores) { GHashTable *result = hash; node_t *other_node = NULL; GListPtr gIter = list; GHashTableIter iter; node_t *node = NULL; g_hash_table_iter_init(&iter, hash); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { other_node = pe_find_node_id(list, node->details->id); if (other_node == NULL) { node->weight = -INFINITY; } else if (merge_scores) { node->weight = merge_weights(node->weight, other_node->weight); } } for (; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; other_node = pe_hash_table_lookup(result, node->details->id); if (other_node == NULL) { node_t *new_node = node_copy(node); new_node->weight = -INFINITY; g_hash_table_insert(result, (gpointer) new_node->details->id, new_node); } } } GHashTable * node_hash_from_list(GListPtr list) { GListPtr gIter = list; GHashTable *result = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free); for (; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; node_t *n = node_copy(node); g_hash_table_insert(result, (gpointer) n->details->id, n); } return result; } GListPtr node_list_dup(GListPtr list1, gboolean reset, gboolean filter) { GListPtr result = NULL; GListPtr gIter = list1; for (; gIter != NULL; gIter = gIter->next) { node_t *new_node = NULL; node_t *this_node = (node_t *) gIter->data; if (filter && this_node->weight < 0) { continue; } new_node = node_copy(this_node); if (reset) { new_node->weight = 0; } if (new_node != NULL) { result = g_list_prepend(result, new_node); } } return result; } gint sort_node_uname(gconstpointer a, gconstpointer b) { const node_t *node_a = a; const node_t *node_b = b; return strcmp(node_a->details->uname, node_b->details->uname); } void dump_node_scores_worker(int level, const char *file, const char *function, int line, resource_t * rsc, const char *comment, GHashTable * nodes) { GHashTable *hash = nodes; GHashTableIter iter; node_t *node = NULL; if (rsc) { hash = rsc->allowed_nodes; } if (rsc && is_set(rsc->flags, pe_rsc_orphan)) { /* Don't show the allocation scores for orphans */ return; } if (level == 0) { char score[128]; int len = sizeof(score); /* For now we want this in sorted order to keep the regression tests happy */ GListPtr gIter = NULL; GListPtr list = g_hash_table_get_values(hash); list = g_list_sort(list, sort_node_uname); gIter = list; for (; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; /* This function is called a whole lot, use stack allocated score */ score2char_stack(node->weight, score, len); if (rsc) { printf("%s: %s allocation score on %s: %s\n", comment, rsc->id, node->details->uname, score); } else { printf("%s: %s = %s\n", comment, node->details->uname, score); } } g_list_free(list); } else if (hash) { char score[128]; int len = sizeof(score); g_hash_table_iter_init(&iter, hash); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { /* This function is called a whole lot, use stack allocated score */ score2char_stack(node->weight, score, len); if (rsc) { do_crm_log_alias(LOG_TRACE, file, function, line, "%s: %s allocation score on %s: %s", comment, rsc->id, node->details->uname, score); } else { do_crm_log_alias(LOG_TRACE, file, function, line + 1, "%s: %s = %s", comment, node->details->uname, score); } } } if (rsc && rsc->children) { GListPtr gIter = NULL; gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { resource_t *child = (resource_t *) gIter->data; dump_node_scores_worker(level, file, function, line, child, comment, nodes); } } } static void append_dump_text(gpointer key, gpointer value, gpointer user_data) { char **dump_text = user_data; char *new_text = crm_strdup_printf("%s %s=%s", *dump_text, (char *)key, (char *)value); free(*dump_text); *dump_text = new_text; } void dump_node_capacity(int level, const char *comment, node_t * node) { char *dump_text = crm_strdup_printf("%s: %s capacity:", comment, node->details->uname); g_hash_table_foreach(node->details->utilization, append_dump_text, &dump_text); if (level == 0) { fprintf(stdout, "%s\n", dump_text); } else { crm_trace("%s", dump_text); } free(dump_text); } void dump_rsc_utilization(int level, const char *comment, resource_t * rsc, node_t * node) { char *dump_text = crm_strdup_printf("%s: %s utilization on %s:", comment, rsc->id, node->details->uname); g_hash_table_foreach(rsc->utilization, append_dump_text, &dump_text); if (level == 0) { fprintf(stdout, "%s\n", dump_text); } else { crm_trace("%s", dump_text); } free(dump_text); } gint sort_rsc_index(gconstpointer a, gconstpointer b) { const resource_t *resource1 = (const resource_t *)a; const resource_t *resource2 = (const resource_t *)b; if (a == NULL && b == NULL) { return 0; } if (a == NULL) { return 1; } if (b == NULL) { return -1; } if (resource1->sort_index > resource2->sort_index) { return -1; } if (resource1->sort_index < resource2->sort_index) { return 1; } return 0; } gint sort_rsc_priority(gconstpointer a, gconstpointer b) { const resource_t *resource1 = (const resource_t *)a; const resource_t *resource2 = (const resource_t *)b; if (a == NULL && b == NULL) { return 0; } if (a == NULL) { return 1; } if (b == NULL) { return -1; } if (resource1->priority > resource2->priority) { return -1; } if (resource1->priority < resource2->priority) { return 1; } return 0; } action_t * custom_action(resource_t * rsc, char *key, const char *task, node_t * on_node, gboolean optional, gboolean save_action, pe_working_set_t * data_set) { action_t *action = NULL; GListPtr possible_matches = NULL; CRM_CHECK(key != NULL, return NULL); CRM_CHECK(task != NULL, free(key); return NULL); if (save_action && rsc != NULL) { possible_matches = find_actions(rsc->actions, key, on_node); } else if(save_action) { #if 0 action = g_hash_table_lookup(data_set->singletons, key); #else /* More expensive but takes 'node' into account */ possible_matches = find_actions(data_set->actions, key, on_node); #endif } if(data_set->singletons == NULL) { data_set->singletons = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, NULL); } if (possible_matches != NULL) { if (g_list_length(possible_matches) > 1) { pe_warn("Action %s for %s on %s exists %d times", task, rsc ? rsc->id : "", on_node ? on_node->details->uname : "", g_list_length(possible_matches)); } action = g_list_nth_data(possible_matches, 0); pe_rsc_trace(rsc, "Found existing action (%d) %s for %s on %s", action->id, task, rsc ? rsc->id : "", on_node ? on_node->details->uname : ""); g_list_free(possible_matches); } if (action == NULL) { if (save_action) { pe_rsc_trace(rsc, "Creating%s action %d: %s for %s on %s %d", optional ? "" : " mandatory", data_set->action_id, key, rsc ? rsc->id : "", on_node ? on_node->details->uname : "", optional); } action = calloc(1, sizeof(action_t)); if (save_action) { action->id = data_set->action_id++; } else { action->id = 0; } action->rsc = rsc; CRM_ASSERT(task != NULL); action->task = strdup(task); if (on_node) { action->node = node_copy(on_node); } action->uuid = strdup(key); pe_set_action_bit(action, pe_action_runnable); if (optional) { pe_rsc_trace(rsc, "Set optional on %s", action->uuid); pe_set_action_bit(action, pe_action_optional); } else { pe_clear_action_bit(action, pe_action_optional); pe_rsc_trace(rsc, "Unset optional on %s", action->uuid); } /* Implied by calloc()... action->actions_before = NULL; action->actions_after = NULL; action->pseudo = FALSE; action->dumped = FALSE; action->processed = FALSE; action->seen_count = 0; */ action->extra = crm_str_table_new(); action->meta = crm_str_table_new(); if (save_action) { data_set->actions = g_list_prepend(data_set->actions, action); if(rsc == NULL) { g_hash_table_insert(data_set->singletons, action->uuid, action); } } if (rsc != NULL) { action->op_entry = find_rsc_op_entry_helper(rsc, key, TRUE); unpack_operation(action, action->op_entry, rsc->container, data_set); if (save_action) { rsc->actions = g_list_prepend(rsc->actions, action); } } if (save_action) { pe_rsc_trace(rsc, "Action %d created", action->id); } } if (optional == FALSE) { pe_rsc_trace(rsc, "Unset optional on %s", action->uuid); pe_clear_action_bit(action, pe_action_optional); } if (rsc != NULL) { enum action_tasks a_task = text2task(action->task); int warn_level = LOG_TRACE; if (save_action) { warn_level = LOG_WARNING; } if (is_set(action->flags, pe_action_have_node_attrs) == FALSE && action->node != NULL && action->op_entry != NULL) { pe_set_action_bit(action, pe_action_have_node_attrs); unpack_instance_attributes(data_set->input, action->op_entry, XML_TAG_ATTR_SETS, action->node->details->attrs, action->extra, NULL, FALSE, data_set->now); } if (is_set(action->flags, pe_action_pseudo)) { /* leave untouched */ } else if (action->node == NULL) { pe_rsc_trace(rsc, "Unset runnable on %s", action->uuid); pe_clear_action_bit(action, pe_action_runnable); } else if (is_not_set(rsc->flags, pe_rsc_managed) && g_hash_table_lookup(action->meta, XML_LRM_ATTR_INTERVAL_MS) == NULL) { crm_debug("Action %s (unmanaged)", action->uuid); pe_rsc_trace(rsc, "Set optional on %s", action->uuid); pe_set_action_bit(action, pe_action_optional); /* action->runnable = FALSE; */ } else if (action->node->details->online == FALSE && (!is_container_remote_node(action->node) || action->node->details->remote_requires_reset)) { pe_clear_action_bit(action, pe_action_runnable); do_crm_log(warn_level, "Action %s on %s is unrunnable (offline)", action->uuid, action->node->details->uname); if (is_set(action->rsc->flags, pe_rsc_managed) && save_action && a_task == stop_rsc && action->node->details->unclean == FALSE) { pe_fence_node(data_set, action->node, "resource actions are unrunnable"); } } else if (action->node->details->pending) { pe_clear_action_bit(action, pe_action_runnable); do_crm_log(warn_level, "Action %s on %s is unrunnable (pending)", action->uuid, action->node->details->uname); } else if (action->needs == rsc_req_nothing) { pe_rsc_trace(rsc, "Action %s does not require anything", action->uuid); pe_action_set_reason(action, NULL, TRUE); pe_set_action_bit(action, pe_action_runnable); #if 0 /* * No point checking this * - if we don't have quorum we can't stonith anyway */ } else if (action->needs == rsc_req_stonith) { crm_trace("Action %s requires only stonith", action->uuid); action->runnable = TRUE; #endif } else if (is_set(data_set->flags, pe_flag_have_quorum) == FALSE && data_set->no_quorum_policy == no_quorum_stop) { pe_action_set_flag_reason(__FUNCTION__, __LINE__, action, NULL, "no quorum", pe_action_runnable, TRUE); crm_debug("%s\t%s (cancelled : quorum)", action->node->details->uname, action->uuid); } else if (is_set(data_set->flags, pe_flag_have_quorum) == FALSE && data_set->no_quorum_policy == no_quorum_freeze) { pe_rsc_trace(rsc, "Check resource is already active: %s %s %s %s", rsc->id, action->uuid, role2text(rsc->next_role), role2text(rsc->role)); if (rsc->fns->active(rsc, TRUE) == FALSE || rsc->next_role > rsc->role) { pe_action_set_flag_reason(__FUNCTION__, __LINE__, action, NULL, "quorum freeze", pe_action_runnable, TRUE); pe_rsc_debug(rsc, "%s\t%s (cancelled : quorum freeze)", action->node->details->uname, action->uuid); } } else if(is_not_set(action->flags, pe_action_runnable)) { pe_rsc_trace(rsc, "Action %s is runnable", action->uuid); //pe_action_set_reason(action, NULL, TRUE); pe_set_action_bit(action, pe_action_runnable); } if (save_action) { switch (a_task) { case stop_rsc: set_bit(rsc->flags, pe_rsc_stopping); break; case start_rsc: clear_bit(rsc->flags, pe_rsc_starting); if (is_set(action->flags, pe_action_runnable)) { set_bit(rsc->flags, pe_rsc_starting); } break; default: break; } } } free(key); return action; } static const char * unpack_operation_on_fail(action_t * action) { const char *value = g_hash_table_lookup(action->meta, XML_OP_ATTR_ON_FAIL); if (safe_str_eq(action->task, CRMD_ACTION_STOP) && safe_str_eq(value, "standby")) { crm_config_err("on-fail=standby is not allowed for stop actions: %s", action->rsc->id); return NULL; } else if (safe_str_eq(action->task, CRMD_ACTION_DEMOTE) && !value) { /* demote on_fail defaults to master monitor value if present */ xmlNode *operation = NULL; const char *name = NULL; const char *role = NULL; const char *on_fail = NULL; const char *interval_spec = NULL; const char *enabled = NULL; CRM_CHECK(action->rsc != NULL, return NULL); for (operation = __xml_first_child(action->rsc->ops_xml); operation && !value; operation = __xml_next_element(operation)) { if (!crm_str_eq((const char *)operation->name, "op", TRUE)) { continue; } name = crm_element_value(operation, "name"); role = crm_element_value(operation, "role"); on_fail = crm_element_value(operation, XML_OP_ATTR_ON_FAIL); enabled = crm_element_value(operation, "enabled"); interval_spec = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); if (!on_fail) { continue; } else if (enabled && !crm_is_true(enabled)) { continue; } else if (safe_str_neq(name, "monitor") || safe_str_neq(role, "Master")) { continue; } else if (crm_parse_interval_spec(interval_spec) == 0) { continue; } value = on_fail; } } return value; } static xmlNode * find_min_interval_mon(resource_t * rsc, gboolean include_disabled) { guint interval_ms = 0; guint min_interval_ms = G_MAXUINT; const char *name = NULL; const char *value = NULL; const char *interval_spec = NULL; xmlNode *op = NULL; xmlNode *operation = NULL; for (operation = __xml_first_child(rsc->ops_xml); operation != NULL; operation = __xml_next_element(operation)) { if (crm_str_eq((const char *)operation->name, "op", TRUE)) { name = crm_element_value(operation, "name"); interval_spec = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); value = crm_element_value(operation, "enabled"); if (!include_disabled && value && crm_is_true(value) == FALSE) { continue; } if (safe_str_neq(name, RSC_STATUS)) { continue; } interval_ms = crm_parse_interval_spec(interval_spec); - if (interval_ms < 0) { - continue; - } - if (min_interval_ms < 0 || interval_ms < min_interval_ms) { + if (interval_ms && (interval_ms < min_interval_ms)) { min_interval_ms = interval_ms; op = operation; } } } return op; } static int unpack_start_delay(const char *value, GHashTable *meta) { int start_delay = 0; if (value != NULL) { start_delay = crm_get_msec(value); if (start_delay < 0) { start_delay = 0; } if (meta) { g_hash_table_replace(meta, strdup(XML_OP_ATTR_START_DELAY), crm_itoa(start_delay)); } } return start_delay; } static int unpack_interval_origin(const char *value, GHashTable *meta, xmlNode *xml_obj, guint interval_ms, crm_time_t *now) { int start_delay = 0; if ((interval_ms > 0) && (value != NULL)) { crm_time_t *origin = crm_time_new(value); if (origin && now) { crm_time_t *delay = NULL; int rc = crm_time_compare(origin, now); long long delay_s = 0; int interval_sec = interval_ms / 1000; crm_trace("Origin: %s, interval: %d", value, interval_sec); /* If 'origin' is in the future, find the most recent "multiple" that occurred in the past */ while(rc > 0) { crm_time_add_seconds(origin, -interval_sec); rc = crm_time_compare(origin, now); } /* Now find the first "multiple" that occurs after 'now' */ while (rc < 0) { crm_time_add_seconds(origin, interval_sec); rc = crm_time_compare(origin, now); } delay = crm_time_calculate_duration(origin, now); crm_time_log(LOG_TRACE, "origin", origin, crm_time_log_date | crm_time_log_timeofday | crm_time_log_with_timezone); crm_time_log(LOG_TRACE, "now", now, crm_time_log_date | crm_time_log_timeofday | crm_time_log_with_timezone); crm_time_log(LOG_TRACE, "delay", delay, crm_time_log_duration); delay_s = crm_time_get_seconds(delay); CRM_CHECK(delay_s >= 0, delay_s = 0); start_delay = delay_s * 1000; if (xml_obj) { crm_info("Calculated a start delay of %llds for %s", delay_s, ID(xml_obj)); } if (meta) { g_hash_table_replace(meta, strdup(XML_OP_ATTR_START_DELAY), crm_itoa(start_delay)); } crm_time_free(origin); crm_time_free(delay); } else if (!origin && xml_obj) { crm_config_err("Operation %s contained an invalid " XML_OP_ATTR_ORIGIN ": %s", ID(xml_obj), value); } } return start_delay; } static int unpack_timeout(const char *value) { int timeout = crm_get_msec(value); if (timeout < 0) { timeout = crm_get_msec(CRM_DEFAULT_OP_TIMEOUT_S); } return timeout; } int pe_get_configured_timeout(resource_t *rsc, const char *action, pe_working_set_t *data_set) { xmlNode *child = NULL; const char *timeout = NULL; int timeout_ms = 0; for (child = first_named_child(rsc->ops_xml, XML_ATTR_OP); child != NULL; child = crm_next_same_xml(child)) { if (safe_str_eq(action, crm_element_value(child, XML_NVPAIR_ATTR_NAME))) { timeout = crm_element_value(child, XML_ATTR_TIMEOUT); break; } } if (timeout == NULL && data_set->op_defaults) { GHashTable *action_meta = crm_str_table_new(); unpack_instance_attributes(data_set->input, data_set->op_defaults, XML_TAG_META_SETS, NULL, action_meta, NULL, FALSE, data_set->now); timeout = g_hash_table_lookup(action_meta, XML_ATTR_TIMEOUT); } // @TODO check meta-attributes (including versioned meta-attributes) // @TODO maybe use min-interval monitor timeout as default for monitors timeout_ms = crm_get_msec(timeout); if (timeout_ms < 0) { timeout_ms = crm_get_msec(CRM_DEFAULT_OP_TIMEOUT_S); } return timeout_ms; } #if ENABLE_VERSIONED_ATTRS static void unpack_versioned_meta(xmlNode *versioned_meta, xmlNode *xml_obj, guint interval_ms, crm_time_t *now) { xmlNode *attrs = NULL; xmlNode *attr = NULL; for (attrs = __xml_first_child(versioned_meta); attrs != NULL; attrs = __xml_next_element(attrs)) { for (attr = __xml_first_child(attrs); attr != NULL; attr = __xml_next_element(attr)) { const char *name = crm_element_value(attr, XML_NVPAIR_ATTR_NAME); const char *value = crm_element_value(attr, XML_NVPAIR_ATTR_VALUE); if (safe_str_eq(name, XML_OP_ATTR_START_DELAY)) { int start_delay = unpack_start_delay(value, NULL); crm_xml_add_int(attr, XML_NVPAIR_ATTR_VALUE, start_delay); } else if (safe_str_eq(name, XML_OP_ATTR_ORIGIN)) { int start_delay = unpack_interval_origin(value, NULL, xml_obj, interval_ms, now); crm_xml_add(attr, XML_NVPAIR_ATTR_NAME, XML_OP_ATTR_START_DELAY); crm_xml_add_int(attr, XML_NVPAIR_ATTR_VALUE, start_delay); } else if (safe_str_eq(name, XML_ATTR_TIMEOUT)) { int timeout = unpack_timeout(value); crm_xml_add_int(attr, XML_NVPAIR_ATTR_VALUE, timeout); } } } } #endif /*! * \brief Unpack operation XML into an action structure * * Unpack an operation's meta-attributes (normalizing the interval, timeout, * and start delay values as integer milliseconds), requirements, and * failure policy. * * \param[in,out] action Action to unpack into * \param[in] xml_obj Operation XML (or NULL if all defaults) * \param[in] container Resource that contains affected resource, if any * \param[in] data_set Cluster state */ void unpack_operation(action_t * action, xmlNode * xml_obj, resource_t * container, pe_working_set_t * data_set) { guint interval_ms = 0; int timeout = 0; char *value_ms = NULL; const char *value = NULL; const char *field = NULL; char *default_timeout = NULL; #if ENABLE_VERSIONED_ATTRS pe_rsc_action_details_t *rsc_details = NULL; #endif CRM_CHECK(action && action->rsc, return); // Cluster-wide unpack_instance_attributes(data_set->input, data_set->op_defaults, XML_TAG_META_SETS, NULL, action->meta, NULL, FALSE, data_set->now); // Probe timeouts default differently, so handle timeout default later default_timeout = g_hash_table_lookup(action->meta, XML_ATTR_TIMEOUT); if (default_timeout) { default_timeout = strdup(default_timeout); g_hash_table_remove(action->meta, XML_ATTR_TIMEOUT); } // take precedence over defaults unpack_instance_attributes(data_set->input, xml_obj, XML_TAG_META_SETS, NULL, action->meta, NULL, TRUE, data_set->now); #if ENABLE_VERSIONED_ATTRS rsc_details = pe_rsc_action_details(action); pe_unpack_versioned_attributes(data_set->input, xml_obj, XML_TAG_ATTR_SETS, NULL, rsc_details->versioned_parameters, data_set->now); pe_unpack_versioned_attributes(data_set->input, xml_obj, XML_TAG_META_SETS, NULL, rsc_details->versioned_meta, data_set->now); #endif /* Anything set as an XML property has highest precedence. * This ensures we use the name and interval from the tag. */ if (xml_obj) { xmlAttrPtr xIter = NULL; for (xIter = xml_obj->properties; xIter; xIter = xIter->next) { const char *prop_name = (const char *)xIter->name; const char *prop_value = crm_element_value(xml_obj, prop_name); g_hash_table_replace(action->meta, strdup(prop_name), strdup(prop_value)); } } g_hash_table_remove(action->meta, "id"); // Normalize interval to milliseconds field = XML_LRM_ATTR_INTERVAL; value = g_hash_table_lookup(action->meta, field); if (value != NULL) { interval_ms = crm_parse_interval_spec(value); if (interval_ms > 0) { value_ms = crm_strdup_printf("%u", interval_ms); g_hash_table_replace(action->meta, strdup(field), value_ms); } else { g_hash_table_remove(action->meta, field); } } // Handle timeout default, now that we know the interval if (g_hash_table_lookup(action->meta, XML_ATTR_TIMEOUT)) { free(default_timeout); } else { // Probe timeouts default to minimum-interval monitor's if (safe_str_eq(action->task, RSC_STATUS) && (interval_ms == 0)) { xmlNode *min_interval_mon = find_min_interval_mon(action->rsc, FALSE); if (min_interval_mon) { value = crm_element_value(min_interval_mon, XML_ATTR_TIMEOUT); if (value) { crm_trace("\t%s defaults to minimum-interval monitor's timeout '%s'", action->uuid, value); free(default_timeout); default_timeout = strdup(value); } } } if (default_timeout) { g_hash_table_insert(action->meta, strdup(XML_ATTR_TIMEOUT), default_timeout); } } if (safe_str_neq(action->task, RSC_START) && safe_str_neq(action->task, RSC_PROMOTE)) { action->needs = rsc_req_nothing; value = "nothing (not start/promote)"; } else if (is_set(action->rsc->flags, pe_rsc_needs_fencing)) { action->needs = rsc_req_stonith; value = "fencing (resource)"; } else if (is_set(action->rsc->flags, pe_rsc_needs_quorum)) { action->needs = rsc_req_quorum; value = "quorum (resource)"; } else { action->needs = rsc_req_nothing; value = "nothing (resource)"; } pe_rsc_trace(action->rsc, "\tAction %s requires: %s", action->uuid, value); value = unpack_operation_on_fail(action); if (value == NULL) { } else if (safe_str_eq(value, "block")) { action->on_fail = action_fail_block; g_hash_table_insert(action->meta, strdup(XML_OP_ATTR_ON_FAIL), strdup("block")); } else if (safe_str_eq(value, "fence")) { action->on_fail = action_fail_fence; value = "node fencing"; if (is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE) { crm_config_err("Specifying on_fail=fence and" " stonith-enabled=false makes no sense"); action->on_fail = action_fail_stop; action->fail_role = RSC_ROLE_STOPPED; value = "stop resource"; } } else if (safe_str_eq(value, "standby")) { action->on_fail = action_fail_standby; value = "node standby"; } else if (safe_str_eq(value, "ignore") || safe_str_eq(value, "nothing")) { action->on_fail = action_fail_ignore; value = "ignore"; } else if (safe_str_eq(value, "migrate")) { action->on_fail = action_fail_migrate; value = "force migration"; } else if (safe_str_eq(value, "stop")) { action->on_fail = action_fail_stop; action->fail_role = RSC_ROLE_STOPPED; value = "stop resource"; } else if (safe_str_eq(value, "restart")) { action->on_fail = action_fail_recover; value = "restart (and possibly migrate)"; } else if (safe_str_eq(value, "restart-container")) { if (container) { action->on_fail = action_fail_restart_container; value = "restart container (and possibly migrate)"; } else { value = NULL; } } else { pe_err("Resource %s: Unknown failure type (%s)", action->rsc->id, value); value = NULL; } /* defaults */ if (value == NULL && container) { action->on_fail = action_fail_restart_container; value = "restart container (and possibly migrate) (default)"; /* for baremetal remote nodes, ensure that any failure that results in * dropping an active connection to a remote node results in fencing of * the remote node. * * There are only two action failures that don't result in fencing. * 1. probes - probe failures are expected. * 2. start - a start failure indicates that an active connection does not already * exist. The user can set op on-fail=fence if they really want to fence start * failures. */ } else if (((value == NULL) || !is_set(action->rsc->flags, pe_rsc_managed)) && (is_rsc_baremetal_remote_node(action->rsc, data_set) && !(safe_str_eq(action->task, CRMD_ACTION_STATUS) && (interval_ms == 0)) && (safe_str_neq(action->task, CRMD_ACTION_START)))) { if (!is_set(action->rsc->flags, pe_rsc_managed)) { action->on_fail = action_fail_stop; action->fail_role = RSC_ROLE_STOPPED; value = "stop unmanaged baremetal remote node (enforcing default)"; } else { if (is_set(data_set->flags, pe_flag_stonith_enabled)) { value = "fence baremetal remote node (default)"; } else { value = "recover baremetal remote node connection (default)"; } if (action->rsc->remote_reconnect_ms) { action->fail_role = RSC_ROLE_STOPPED; } action->on_fail = action_fail_reset_remote; } } else if (value == NULL && safe_str_eq(action->task, CRMD_ACTION_STOP)) { if (is_set(data_set->flags, pe_flag_stonith_enabled)) { action->on_fail = action_fail_fence; value = "resource fence (default)"; } else { action->on_fail = action_fail_block; value = "resource block (default)"; } } else if (value == NULL) { action->on_fail = action_fail_recover; value = "restart (and possibly migrate) (default)"; } pe_rsc_trace(action->rsc, "\t%s failure handling: %s", action->task, value); value = NULL; if (xml_obj != NULL) { value = g_hash_table_lookup(action->meta, "role_after_failure"); if (value) { pe_warn_once(pe_wo_role_after, "Support for role_after_failure is deprecated and will be removed in a future release"); } } if (value != NULL && action->fail_role == RSC_ROLE_UNKNOWN) { action->fail_role = text2role(value); } /* defaults */ if (action->fail_role == RSC_ROLE_UNKNOWN) { if (safe_str_eq(action->task, CRMD_ACTION_PROMOTE)) { action->fail_role = RSC_ROLE_SLAVE; } else { action->fail_role = RSC_ROLE_STARTED; } } pe_rsc_trace(action->rsc, "\t%s failure results in: %s", action->task, role2text(action->fail_role)); value = g_hash_table_lookup(action->meta, XML_OP_ATTR_START_DELAY); if (value) { unpack_start_delay(value, action->meta); } else { value = g_hash_table_lookup(action->meta, XML_OP_ATTR_ORIGIN); unpack_interval_origin(value, action->meta, xml_obj, interval_ms, data_set->now); } value = g_hash_table_lookup(action->meta, XML_ATTR_TIMEOUT); timeout = unpack_timeout(value); g_hash_table_replace(action->meta, strdup(XML_ATTR_TIMEOUT), crm_itoa(timeout)); #if ENABLE_VERSIONED_ATTRS unpack_versioned_meta(rsc_details->versioned_meta, xml_obj, interval_ms, data_set->now); #endif } static xmlNode * find_rsc_op_entry_helper(resource_t * rsc, const char *key, gboolean include_disabled) { guint interval_ms = 0; gboolean do_retry = TRUE; char *local_key = NULL; const char *name = NULL; const char *value = NULL; const char *interval_spec = NULL; char *match_key = NULL; xmlNode *op = NULL; xmlNode *operation = NULL; retry: for (operation = __xml_first_child(rsc->ops_xml); operation != NULL; operation = __xml_next_element(operation)) { if (crm_str_eq((const char *)operation->name, "op", TRUE)) { name = crm_element_value(operation, "name"); interval_spec = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); value = crm_element_value(operation, "enabled"); if (!include_disabled && value && crm_is_true(value) == FALSE) { continue; } interval_ms = crm_parse_interval_spec(interval_spec); match_key = generate_op_key(rsc->id, name, interval_ms); if (safe_str_eq(key, match_key)) { op = operation; } free(match_key); if (rsc->clone_name) { match_key = generate_op_key(rsc->clone_name, name, interval_ms); if (safe_str_eq(key, match_key)) { op = operation; } free(match_key); } if (op != NULL) { free(local_key); return op; } } } free(local_key); if (do_retry == FALSE) { return NULL; } do_retry = FALSE; if (strstr(key, CRMD_ACTION_MIGRATE) || strstr(key, CRMD_ACTION_MIGRATED)) { local_key = generate_op_key(rsc->id, "migrate", 0); key = local_key; goto retry; } else if (strstr(key, "_notify_")) { local_key = generate_op_key(rsc->id, "notify", 0); key = local_key; goto retry; } return NULL; } xmlNode * find_rsc_op_entry(resource_t * rsc, const char *key) { return find_rsc_op_entry_helper(rsc, key, FALSE); } void print_node(const char *pre_text, node_t * node, gboolean details) { if (node == NULL) { crm_trace("%s%s: ", pre_text == NULL ? "" : pre_text, pre_text == NULL ? "" : ": "); return; } CRM_ASSERT(node->details); crm_trace("%s%s%sNode %s: (weight=%d, fixed=%s)", pre_text == NULL ? "" : pre_text, pre_text == NULL ? "" : ": ", node->details->online ? "" : "Unavailable/Unclean ", node->details->uname, node->weight, node->fixed ? "True" : "False"); if (details) { char *pe_mutable = strdup("\t\t"); GListPtr gIter = node->details->running_rsc; crm_trace("\t\t===Node Attributes"); g_hash_table_foreach(node->details->attrs, print_str_str, pe_mutable); free(pe_mutable); crm_trace("\t\t=== Resources"); for (; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; print_resource(LOG_TRACE, "\t\t", rsc, FALSE); } } } /* * Used by the HashTable for-loop */ void print_str_str(gpointer key, gpointer value, gpointer user_data) { crm_trace("%s%s %s ==> %s", user_data == NULL ? "" : (char *)user_data, user_data == NULL ? "" : ": ", (char *)key, (char *)value); } void print_resource(int log_level, const char *pre_text, resource_t * rsc, gboolean details) { long options = pe_print_log | pe_print_pending; if (rsc == NULL) { do_crm_log(log_level - 1, "%s%s: ", pre_text == NULL ? "" : pre_text, pre_text == NULL ? "" : ": "); return; } if (details) { options |= pe_print_details; } rsc->fns->print(rsc, pre_text, options, &log_level); } void pe_free_action(action_t * action) { if (action == NULL) { return; } g_list_free_full(action->actions_before, free); /* action_wrapper_t* */ g_list_free_full(action->actions_after, free); /* action_wrapper_t* */ if (action->extra) { g_hash_table_destroy(action->extra); } if (action->meta) { g_hash_table_destroy(action->meta); } #if ENABLE_VERSIONED_ATTRS if (action->rsc) { pe_free_rsc_action_details(action); } #endif free(action->cancel_task); free(action->reason); free(action->task); free(action->uuid); free(action->node); free(action); } GListPtr find_recurring_actions(GListPtr input, node_t * not_on_node) { const char *value = NULL; GListPtr result = NULL; GListPtr gIter = input; CRM_CHECK(input != NULL, return NULL); for (; gIter != NULL; gIter = gIter->next) { action_t *action = (action_t *) gIter->data; value = g_hash_table_lookup(action->meta, XML_LRM_ATTR_INTERVAL_MS); if (value == NULL) { /* skip */ } else if (safe_str_eq(value, "0")) { /* skip */ } else if (safe_str_eq(CRMD_ACTION_CANCEL, action->task)) { /* skip */ } else if (not_on_node == NULL) { crm_trace("(null) Found: %s", action->uuid); result = g_list_prepend(result, action); } else if (action->node == NULL) { /* skip */ } else if (action->node->details != not_on_node->details) { crm_trace("Found: %s", action->uuid); result = g_list_prepend(result, action); } } return result; } enum action_tasks get_complex_task(resource_t * rsc, const char *name, gboolean allow_non_atomic) { enum action_tasks task = text2task(name); if (rsc == NULL) { return task; } else if (allow_non_atomic == FALSE || rsc->variant == pe_native) { switch (task) { case stopped_rsc: case started_rsc: case action_demoted: case action_promoted: crm_trace("Folding %s back into its atomic counterpart for %s", name, rsc->id); return task - 1; break; default: break; } } return task; } action_t * find_first_action(GListPtr input, const char *uuid, const char *task, node_t * on_node) { GListPtr gIter = NULL; CRM_CHECK(uuid || task, return NULL); for (gIter = input; gIter != NULL; gIter = gIter->next) { action_t *action = (action_t *) gIter->data; if (uuid != NULL && safe_str_neq(uuid, action->uuid)) { continue; } else if (task != NULL && safe_str_neq(task, action->task)) { continue; } else if (on_node == NULL) { return action; } else if (action->node == NULL) { continue; } else if (on_node->details == action->node->details) { return action; } } return NULL; } GListPtr find_actions(GListPtr input, const char *key, const node_t *on_node) { GListPtr gIter = input; GListPtr result = NULL; CRM_CHECK(key != NULL, return NULL); for (; gIter != NULL; gIter = gIter->next) { action_t *action = (action_t *) gIter->data; if (safe_str_neq(key, action->uuid)) { crm_trace("%s does not match action %s", key, action->uuid); continue; } else if (on_node == NULL) { crm_trace("Action %s matches (ignoring node)", key); result = g_list_prepend(result, action); } else if (action->node == NULL) { crm_trace("Action %s matches (unallocated, assigning to %s)", key, on_node->details->uname); action->node = node_copy(on_node); result = g_list_prepend(result, action); } else if (on_node->details == action->node->details) { crm_trace("Action %s on %s matches", key, on_node->details->uname); result = g_list_prepend(result, action); } else { crm_trace("Action %s on node %s does not match requested node %s", key, action->node->details->uname, on_node->details->uname); } } return result; } GListPtr find_actions_exact(GListPtr input, const char *key, node_t * on_node) { GListPtr gIter = input; GListPtr result = NULL; CRM_CHECK(key != NULL, return NULL); for (; gIter != NULL; gIter = gIter->next) { action_t *action = (action_t *) gIter->data; crm_trace("Matching %s against %s", key, action->uuid); if (safe_str_neq(key, action->uuid)) { crm_trace("Key mismatch: %s vs. %s", key, action->uuid); continue; } else if (on_node == NULL || action->node == NULL) { crm_trace("on_node=%p, action->node=%p", on_node, action->node); continue; } else if (safe_str_eq(on_node->details->id, action->node->details->id)) { result = g_list_prepend(result, action); } crm_trace("Node mismatch: %s vs. %s", on_node->details->id, action->node->details->id); } return result; } static void resource_node_score(resource_t * rsc, node_t * node, int score, const char *tag) { node_t *match = NULL; if ((rsc->exclusive_discover || (node->rsc_discover_mode == pe_discover_never)) && safe_str_eq(tag, "symmetric_default")) { /* This string comparision may be fragile, but exclusive resources and * exclusive nodes should not have the symmetric_default constraint * applied to them. */ return; } else if (rsc->children) { GListPtr gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; resource_node_score(child_rsc, node, score, tag); } } pe_rsc_trace(rsc, "Setting %s for %s on %s: %d", tag, rsc->id, node->details->uname, score); match = pe_hash_table_lookup(rsc->allowed_nodes, node->details->id); if (match == NULL) { match = node_copy(node); g_hash_table_insert(rsc->allowed_nodes, (gpointer) match->details->id, match); } match->weight = merge_weights(match->weight, score); } void resource_location(resource_t * rsc, node_t * node, int score, const char *tag, pe_working_set_t * data_set) { if (node != NULL) { resource_node_score(rsc, node, score, tag); } else if (data_set != NULL) { GListPtr gIter = data_set->nodes; for (; gIter != NULL; gIter = gIter->next) { node_t *node_iter = (node_t *) gIter->data; resource_node_score(rsc, node_iter, score, tag); } } else { GHashTableIter iter; node_t *node_iter = NULL; g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node_iter)) { resource_node_score(rsc, node_iter, score, tag); } } if (node == NULL && score == -INFINITY) { if (rsc->allocated_to) { crm_info("Deallocating %s from %s", rsc->id, rsc->allocated_to->details->uname); free(rsc->allocated_to); rsc->allocated_to = NULL; } } } #define sort_return(an_int, why) do { \ free(a_uuid); \ free(b_uuid); \ crm_trace("%s (%d) %c %s (%d) : %s", \ a_xml_id, a_call_id, an_int>0?'>':an_int<0?'<':'=', \ b_xml_id, b_call_id, why); \ return an_int; \ } while(0) gint sort_op_by_callid(gconstpointer a, gconstpointer b) { int a_call_id = -1; int b_call_id = -1; char *a_uuid = NULL; char *b_uuid = NULL; const xmlNode *xml_a = a; const xmlNode *xml_b = b; const char *a_xml_id = crm_element_value(xml_a, XML_ATTR_ID); const char *b_xml_id = crm_element_value(xml_b, XML_ATTR_ID); if (safe_str_eq(a_xml_id, b_xml_id)) { /* We have duplicate lrm_rsc_op entries in the status * section which is unliklely to be a good thing * - we can handle it easily enough, but we need to get * to the bottom of why it's happening. */ pe_err("Duplicate lrm_rsc_op entries named %s", a_xml_id); sort_return(0, "duplicate"); } crm_element_value_int(xml_a, XML_LRM_ATTR_CALLID, &a_call_id); crm_element_value_int(xml_b, XML_LRM_ATTR_CALLID, &b_call_id); if (a_call_id == -1 && b_call_id == -1) { /* both are pending ops so it doesn't matter since * stops are never pending */ sort_return(0, "pending"); } else if (a_call_id >= 0 && a_call_id < b_call_id) { sort_return(-1, "call id"); } else if (b_call_id >= 0 && a_call_id > b_call_id) { sort_return(1, "call id"); } else if (b_call_id >= 0 && a_call_id == b_call_id) { /* * The op and last_failed_op are the same * Order on last-rc-change */ int last_a = -1; int last_b = -1; crm_element_value_int(xml_a, XML_RSC_OP_LAST_CHANGE, &last_a); crm_element_value_int(xml_b, XML_RSC_OP_LAST_CHANGE, &last_b); crm_trace("rc-change: %d vs %d", last_a, last_b); if (last_a >= 0 && last_a < last_b) { sort_return(-1, "rc-change"); } else if (last_b >= 0 && last_a > last_b) { sort_return(1, "rc-change"); } sort_return(0, "rc-change"); } else { /* One of the inputs is a pending operation * Attempt to use XML_ATTR_TRANSITION_MAGIC to determine its age relative to the other */ int a_id = -1; int b_id = -1; int dummy = -1; const char *a_magic = crm_element_value(xml_a, XML_ATTR_TRANSITION_MAGIC); const char *b_magic = crm_element_value(xml_b, XML_ATTR_TRANSITION_MAGIC); CRM_CHECK(a_magic != NULL && b_magic != NULL, sort_return(0, "No magic")); if(!decode_transition_magic(a_magic, &a_uuid, &a_id, &dummy, &dummy, &dummy, &dummy)) { sort_return(0, "bad magic a"); } if(!decode_transition_magic(b_magic, &b_uuid, &b_id, &dummy, &dummy, &dummy, &dummy)) { sort_return(0, "bad magic b"); } /* try to determine the relative age of the operation... * some pending operations (e.g. a start) may have been superseded * by a subsequent stop * * [a|b]_id == -1 means it's a shutdown operation and _always_ comes last */ if (safe_str_neq(a_uuid, b_uuid) || a_id == b_id) { /* * some of the logic in here may be redundant... * * if the UUID from the TE doesn't match then one better * be a pending operation. * pending operations don't survive between elections and joins * because we query the LRM directly */ if (b_call_id == -1) { sort_return(-1, "transition + call"); } else if (a_call_id == -1) { sort_return(1, "transition + call"); } } else if ((a_id >= 0 && a_id < b_id) || b_id == -1) { sort_return(-1, "transition"); } else if ((b_id >= 0 && a_id > b_id) || a_id == -1) { sort_return(1, "transition"); } } /* we should never end up here */ CRM_CHECK(FALSE, sort_return(0, "default")); } time_t get_effective_time(pe_working_set_t * data_set) { if(data_set) { if (data_set->now == NULL) { crm_trace("Recording a new 'now'"); data_set->now = crm_time_new(NULL); } return crm_time_get_seconds_since_epoch(data_set->now); } crm_trace("Defaulting to 'now'"); return time(NULL); } gboolean get_target_role(resource_t * rsc, enum rsc_role_e * role) { enum rsc_role_e local_role = RSC_ROLE_UNKNOWN; const char *value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET_ROLE); CRM_CHECK(role != NULL, return FALSE); if (value == NULL || safe_str_eq("started", value) || safe_str_eq("default", value)) { return FALSE; } local_role = text2role(value); if (local_role == RSC_ROLE_UNKNOWN) { crm_config_err("%s: Unknown value for %s: %s", rsc->id, XML_RSC_ATTR_TARGET_ROLE, value); return FALSE; } else if (local_role > RSC_ROLE_STARTED) { if (is_set(uber_parent(rsc)->flags, pe_rsc_promotable)) { if (local_role > RSC_ROLE_SLAVE) { /* This is what we'd do anyway, just leave the default to avoid messing up the placement algorithm */ return FALSE; } } else { crm_config_err("%s is not part of a promotable clone resource, a %s of '%s' makes no sense", rsc->id, XML_RSC_ATTR_TARGET_ROLE, value); return FALSE; } } *role = local_role; return TRUE; } gboolean order_actions(action_t * lh_action, action_t * rh_action, enum pe_ordering order) { GListPtr gIter = NULL; action_wrapper_t *wrapper = NULL; GListPtr list = NULL; if (order == pe_order_none) { return FALSE; } if (lh_action == NULL || rh_action == NULL) { return FALSE; } crm_trace("Ordering Action %s before %s", lh_action->uuid, rh_action->uuid); /* Ensure we never create a dependency on ourselves... it's happened */ CRM_ASSERT(lh_action != rh_action); /* Filter dups, otherwise update_action_states() has too much work to do */ gIter = lh_action->actions_after; for (; gIter != NULL; gIter = gIter->next) { action_wrapper_t *after = (action_wrapper_t *) gIter->data; if (after->action == rh_action && (after->type & order)) { return FALSE; } } wrapper = calloc(1, sizeof(action_wrapper_t)); wrapper->action = rh_action; wrapper->type = order; list = lh_action->actions_after; list = g_list_prepend(list, wrapper); lh_action->actions_after = list; wrapper = NULL; /* order |= pe_order_implies_then; */ /* order ^= pe_order_implies_then; */ wrapper = calloc(1, sizeof(action_wrapper_t)); wrapper->action = lh_action; wrapper->type = order; list = rh_action->actions_before; list = g_list_prepend(list, wrapper); rh_action->actions_before = list; return TRUE; } action_t * get_pseudo_op(const char *name, pe_working_set_t * data_set) { action_t *op = NULL; if(data_set->singletons) { op = g_hash_table_lookup(data_set->singletons, name); } if (op == NULL) { op = custom_action(NULL, strdup(name), name, NULL, TRUE, TRUE, data_set); set_bit(op->flags, pe_action_pseudo); set_bit(op->flags, pe_action_runnable); } return op; } void destroy_ticket(gpointer data) { ticket_t *ticket = data; if (ticket->state) { g_hash_table_destroy(ticket->state); } free(ticket->id); free(ticket); } ticket_t * ticket_new(const char *ticket_id, pe_working_set_t * data_set) { ticket_t *ticket = NULL; if (ticket_id == NULL || strlen(ticket_id) == 0) { return NULL; } if (data_set->tickets == NULL) { data_set->tickets = g_hash_table_new_full(crm_str_hash, g_str_equal, free, destroy_ticket); } ticket = g_hash_table_lookup(data_set->tickets, ticket_id); if (ticket == NULL) { ticket = calloc(1, sizeof(ticket_t)); if (ticket == NULL) { crm_err("Cannot allocate ticket '%s'", ticket_id); return NULL; } crm_trace("Creaing ticket entry for %s", ticket_id); ticket->id = strdup(ticket_id); ticket->granted = FALSE; ticket->last_granted = -1; ticket->standby = FALSE; ticket->state = crm_str_table_new(); g_hash_table_insert(data_set->tickets, strdup(ticket->id), ticket); } return ticket; } static void filter_parameters(xmlNode * param_set, const char *param_string, bool need_present) { if (param_set && param_string) { xmlAttrPtr xIter = param_set->properties; while (xIter) { const char *prop_name = (const char *)xIter->name; char *name = crm_strdup_printf(" %s ", prop_name); char *match = strstr(param_string, name); free(name); // Do now, because current entry might get removed below xIter = xIter->next; if (need_present && match == NULL) { crm_trace("%s not found in %s", prop_name, param_string); xml_remove_prop(param_set, prop_name); } else if (need_present == FALSE && match) { crm_trace("%s found in %s", prop_name, param_string); xml_remove_prop(param_set, prop_name); } } } } #if ENABLE_VERSIONED_ATTRS static void append_versioned_params(xmlNode *versioned_params, const char *ra_version, xmlNode *params) { GHashTable *hash = pe_unpack_versioned_parameters(versioned_params, ra_version); char *key = NULL; char *value = NULL; GHashTableIter iter; g_hash_table_iter_init(&iter, hash); while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &value)) { crm_xml_add(params, key, value); } g_hash_table_destroy(hash); } #endif static op_digest_cache_t * rsc_action_digest(resource_t * rsc, const char *task, const char *key, node_t * node, xmlNode * xml_op, pe_working_set_t * data_set) { op_digest_cache_t *data = NULL; data = g_hash_table_lookup(node->details->digest_cache, key); if (data == NULL) { GHashTable *local_rsc_params = crm_str_table_new(); action_t *action = custom_action(rsc, strdup(key), task, node, TRUE, FALSE, data_set); #if ENABLE_VERSIONED_ATTRS xmlNode *local_versioned_params = create_xml_node(NULL, XML_TAG_RSC_VER_ATTRS); const char *ra_version = NULL; #endif const char *op_version; const char *restart_list = NULL; const char *secure_list = " passwd password "; data = calloc(1, sizeof(op_digest_cache_t)); CRM_ASSERT(data != NULL); get_rsc_attributes(local_rsc_params, rsc, node, data_set); #if ENABLE_VERSIONED_ATTRS pe_get_versioned_attributes(local_versioned_params, rsc, node, data_set); #endif data->params_all = create_xml_node(NULL, XML_TAG_PARAMS); // REMOTE_CONTAINER_HACK: Allow remote nodes that start containers with pacemaker remote inside if (container_fix_remote_addr_in(rsc, data->params_all, "addr")) { crm_trace("Fixed addr for %s on %s", rsc->id, node->details->uname); } g_hash_table_foreach(local_rsc_params, hash2field, data->params_all); g_hash_table_foreach(action->extra, hash2field, data->params_all); g_hash_table_foreach(rsc->parameters, hash2field, data->params_all); g_hash_table_foreach(action->meta, hash2metafield, data->params_all); if(xml_op) { secure_list = crm_element_value(xml_op, XML_LRM_ATTR_OP_SECURE); restart_list = crm_element_value(xml_op, XML_LRM_ATTR_OP_RESTART); op_version = crm_element_value(xml_op, XML_ATTR_CRM_VERSION); #if ENABLE_VERSIONED_ATTRS ra_version = crm_element_value(xml_op, XML_ATTR_RA_VERSION); #endif } else { op_version = CRM_FEATURE_SET; } #if ENABLE_VERSIONED_ATTRS append_versioned_params(local_versioned_params, ra_version, data->params_all); append_versioned_params(rsc->versioned_parameters, ra_version, data->params_all); { pe_rsc_action_details_t *details = pe_rsc_action_details(action); append_versioned_params(details->versioned_parameters, ra_version, data->params_all); } #endif filter_action_parameters(data->params_all, op_version); g_hash_table_destroy(local_rsc_params); pe_free_action(action); data->digest_all_calc = calculate_operation_digest(data->params_all, op_version); if (is_set(data_set->flags, pe_flag_sanitized)) { data->params_secure = copy_xml(data->params_all); if(secure_list) { filter_parameters(data->params_secure, secure_list, FALSE); } data->digest_secure_calc = calculate_operation_digest(data->params_secure, op_version); } if(xml_op && crm_element_value(xml_op, XML_LRM_ATTR_RESTART_DIGEST) != NULL) { data->params_restart = copy_xml(data->params_all); if (restart_list) { filter_parameters(data->params_restart, restart_list, TRUE); } data->digest_restart_calc = calculate_operation_digest(data->params_restart, op_version); } g_hash_table_insert(node->details->digest_cache, strdup(key), data); } return data; } op_digest_cache_t * rsc_action_digest_cmp(resource_t * rsc, xmlNode * xml_op, node_t * node, pe_working_set_t * data_set) { op_digest_cache_t *data = NULL; char *key = NULL; guint interval_ms = 0; const char *op_version; const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); const char *interval_ms_s = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL_MS); const char *digest_all; const char *digest_restart; CRM_ASSERT(node != NULL); op_version = crm_element_value(xml_op, XML_ATTR_CRM_VERSION); digest_all = crm_element_value(xml_op, XML_LRM_ATTR_OP_DIGEST); digest_restart = crm_element_value(xml_op, XML_LRM_ATTR_RESTART_DIGEST); interval_ms = crm_parse_ms(interval_ms_s); key = generate_op_key(rsc->id, task, interval_ms); data = rsc_action_digest(rsc, task, key, node, xml_op, data_set); data->rc = RSC_DIGEST_MATCH; if (digest_restart && data->digest_restart_calc && strcmp(data->digest_restart_calc, digest_restart) != 0) { pe_rsc_info(rsc, "Parameters to %s on %s changed: was %s vs. now %s (restart:%s) %s", key, node->details->uname, crm_str(digest_restart), data->digest_restart_calc, op_version, crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC)); data->rc = RSC_DIGEST_RESTART; } else if (digest_all == NULL) { /* it is unknown what the previous op digest was */ data->rc = RSC_DIGEST_UNKNOWN; } else if (strcmp(digest_all, data->digest_all_calc) != 0) { pe_rsc_info(rsc, "Parameters to %s on %s changed: was %s vs. now %s (%s:%s) %s", key, node->details->uname, crm_str(digest_all), data->digest_all_calc, (interval_ms > 0)? "reschedule" : "reload", op_version, crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC)); data->rc = RSC_DIGEST_ALL; } free(key); return data; } #define STONITH_DIGEST_TASK "stonith-on" static op_digest_cache_t * fencing_action_digest_cmp(resource_t * rsc, node_t * node, pe_working_set_t * data_set) { char *key = generate_op_key(rsc->id, STONITH_DIGEST_TASK, 0); op_digest_cache_t *data = rsc_action_digest(rsc, STONITH_DIGEST_TASK, key, node, NULL, data_set); const char *digest_all = pe_node_attribute_raw(node, CRM_ATTR_DIGESTS_ALL); const char *digest_secure = pe_node_attribute_raw(node, CRM_ATTR_DIGESTS_SECURE); /* No 'reloads' for fencing device changes * * We use the resource id + agent + digest so that we can detect * changes to the agent and/or the parameters used */ char *search_all = crm_strdup_printf("%s:%s:%s", rsc->id, (const char*)g_hash_table_lookup(rsc->meta, XML_ATTR_TYPE), data->digest_all_calc); char *search_secure = crm_strdup_printf("%s:%s:%s", rsc->id, (const char*)g_hash_table_lookup(rsc->meta, XML_ATTR_TYPE), data->digest_secure_calc); data->rc = RSC_DIGEST_ALL; if (digest_all == NULL) { /* it is unknown what the previous op digest was */ data->rc = RSC_DIGEST_UNKNOWN; } else if (strstr(digest_all, search_all)) { data->rc = RSC_DIGEST_MATCH; } else if(digest_secure && data->digest_secure_calc) { if(strstr(digest_secure, search_secure)) { if (is_set(data_set->flags, pe_flag_sanitized)) { printf("Only 'private' parameters to %s for unfencing %s changed\n", rsc->id, node->details->uname); } data->rc = RSC_DIGEST_MATCH; } } if (data->rc == RSC_DIGEST_ALL && is_set(data_set->flags, pe_flag_sanitized) && data->digest_secure_calc) { if (is_set(data_set->flags, pe_flag_sanitized)) { printf("Parameters to %s for unfencing %s changed, try '%s:%s:%s'\n", rsc->id, node->details->uname, rsc->id, (const char *) g_hash_table_lookup(rsc->meta, XML_ATTR_TYPE), data->digest_secure_calc); } } free(key); free(search_all); free(search_secure); return data; } const char *rsc_printable_id(resource_t *rsc) { if (is_not_set(rsc->flags, pe_rsc_unique)) { return ID(rsc->xml); } return rsc->id; } void clear_bit_recursive(resource_t * rsc, unsigned long long flag) { GListPtr gIter = rsc->children; clear_bit(rsc->flags, flag); for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; clear_bit_recursive(child_rsc, flag); } } void set_bit_recursive(resource_t * rsc, unsigned long long flag) { GListPtr gIter = rsc->children; set_bit(rsc->flags, flag); for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; set_bit_recursive(child_rsc, flag); } } static GListPtr find_unfencing_devices(GListPtr candidates, GListPtr matches) { for (GListPtr gIter = candidates; gIter != NULL; gIter = gIter->next) { resource_t *candidate = gIter->data; const char *provides = g_hash_table_lookup(candidate->meta, XML_RSC_ATTR_PROVIDES); const char *requires = g_hash_table_lookup(candidate->meta, XML_RSC_ATTR_REQUIRES); if(candidate->children) { matches = find_unfencing_devices(candidate->children, matches); } else if (is_not_set(candidate->flags, pe_rsc_fence_device)) { continue; } else if (crm_str_eq(provides, "unfencing", FALSE) || crm_str_eq(requires, "unfencing", FALSE)) { matches = g_list_prepend(matches, candidate); } } return matches; } action_t * pe_fence_op(node_t * node, const char *op, bool optional, const char *reason, pe_working_set_t * data_set) { char *op_key = NULL; action_t *stonith_op = NULL; if(op == NULL) { op = data_set->stonith_action; } op_key = crm_strdup_printf("%s-%s-%s", CRM_OP_FENCE, node->details->uname, op); if(data_set->singletons) { stonith_op = g_hash_table_lookup(data_set->singletons, op_key); } if(stonith_op == NULL) { stonith_op = custom_action(NULL, op_key, CRM_OP_FENCE, node, TRUE, TRUE, data_set); add_hash_param(stonith_op->meta, XML_LRM_ATTR_TARGET, node->details->uname); add_hash_param(stonith_op->meta, XML_LRM_ATTR_TARGET_UUID, node->details->id); add_hash_param(stonith_op->meta, "stonith_action", op); if(is_remote_node(node) && is_set(data_set->flags, pe_flag_enable_unfencing)) { /* Extra work to detect device changes on remotes * * We may do this for all nodes in the future, but for now * the check_action_definition() based stuff works fine. * * Use "stonith-on" to avoid creating cache entries for * operations check_action_definition() would look for. */ long max = 1024; long digests_all_offset = 0; long digests_secure_offset = 0; char *digests_all = malloc(max); char *digests_secure = malloc(max); GListPtr matches = find_unfencing_devices(data_set->resources, NULL); for (GListPtr gIter = matches; gIter != NULL; gIter = gIter->next) { resource_t *match = gIter->data; op_digest_cache_t *data = fencing_action_digest_cmp(match, node, data_set); if(data->rc == RSC_DIGEST_ALL) { optional = FALSE; crm_notice("Unfencing %s (remote): because the definition of %s changed", node->details->uname, match->id); if (is_set(data_set->flags, pe_flag_sanitized)) { /* Extra detail for those running from the commandline */ fprintf(stdout, " notice: Unfencing %s (remote): because the definition of %s changed\n", node->details->uname, match->id); } } digests_all_offset += snprintf( digests_all+digests_all_offset, max-digests_all_offset, "%s:%s:%s,", match->id, (const char*)g_hash_table_lookup(match->meta, XML_ATTR_TYPE), data->digest_all_calc); digests_secure_offset += snprintf( digests_secure+digests_secure_offset, max-digests_secure_offset, "%s:%s:%s,", match->id, (const char*)g_hash_table_lookup(match->meta, XML_ATTR_TYPE), data->digest_secure_calc); } g_hash_table_insert(stonith_op->meta, strdup(XML_OP_ATTR_DIGESTS_ALL), digests_all); g_hash_table_insert(stonith_op->meta, strdup(XML_OP_ATTR_DIGESTS_SECURE), digests_secure); } } else { free(op_key); } if(optional == FALSE && pe_can_fence(data_set, node)) { pe_action_required(stonith_op, NULL, reason); } else if(reason && stonith_op->reason == NULL) { stonith_op->reason = strdup(reason); } return stonith_op; } void trigger_unfencing( resource_t * rsc, node_t *node, const char *reason, action_t *dependency, pe_working_set_t * data_set) { if(is_not_set(data_set->flags, pe_flag_enable_unfencing)) { /* No resources require it */ return; } else if (rsc != NULL && is_not_set(rsc->flags, pe_rsc_fence_device)) { /* Wasn't a stonith device */ return; } else if(node && node->details->online && node->details->unclean == FALSE && node->details->shutdown == FALSE) { action_t *unfence = pe_fence_op(node, "on", FALSE, reason, data_set); if(dependency) { order_actions(unfence, dependency, pe_order_optional); } } else if(rsc) { GHashTableIter iter; g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { if(node->details->online && node->details->unclean == FALSE && node->details->shutdown == FALSE) { trigger_unfencing(rsc, node, reason, dependency, data_set); } } } } gboolean add_tag_ref(GHashTable * tags, const char * tag_name, const char * obj_ref) { tag_t *tag = NULL; GListPtr gIter = NULL; gboolean is_existing = FALSE; CRM_CHECK(tags && tag_name && obj_ref, return FALSE); tag = g_hash_table_lookup(tags, tag_name); if (tag == NULL) { tag = calloc(1, sizeof(tag_t)); if (tag == NULL) { return FALSE; } tag->id = strdup(tag_name); tag->refs = NULL; g_hash_table_insert(tags, strdup(tag_name), tag); } for (gIter = tag->refs; gIter != NULL; gIter = gIter->next) { const char *existing_ref = (const char *) gIter->data; if (crm_str_eq(existing_ref, obj_ref, TRUE)){ is_existing = TRUE; break; } } if (is_existing == FALSE) { tag->refs = g_list_append(tag->refs, strdup(obj_ref)); crm_trace("Added: tag=%s ref=%s", tag->id, obj_ref); } return TRUE; } void pe_action_set_flag_reason(const char *function, long line, pe_action_t *action, pe_action_t *reason, const char *text, enum pe_action_flags flags, bool overwrite) { bool unset = FALSE; bool update = FALSE; const char *change = NULL; if(is_set(flags, pe_action_runnable)) { unset = TRUE; change = "unrunnable"; } else if(is_set(flags, pe_action_optional)) { unset = TRUE; change = "required"; } else if(is_set(flags, pe_action_migrate_runnable)) { unset = TRUE; overwrite = TRUE; change = "unrunnable"; } else if(is_set(flags, pe_action_dangle)) { change = "dangling"; } else if(is_set(flags, pe_action_requires_any)) { change = "required"; } else { crm_err("Unknown flag change to %s by %s: 0x%.16x", flags, action->uuid, (reason? reason->uuid : 0)); } if(unset) { if(is_set(action->flags, flags)) { action->flags = crm_clear_bit(function, line, action->uuid, action->flags, flags); update = TRUE; } } else { if(is_not_set(action->flags, flags)) { action->flags = crm_set_bit(function, line, action->uuid, action->flags, flags); update = TRUE; } } if((change && update) || text) { char *reason_text = NULL; if(reason == NULL) { pe_action_set_reason(action, text, overwrite); } else if(reason->rsc == NULL) { reason_text = crm_strdup_printf("%s %s%c %s", change, reason->task, text?':':0, text?text:""); } else { reason_text = crm_strdup_printf("%s %s %s%c %s", change, reason->rsc->id, reason->task, text?':':0, text?text:"NA"); } if(reason_text && action->rsc != reason->rsc) { pe_action_set_reason(action, reason_text, overwrite); } free(reason_text); } } void pe_action_set_reason(pe_action_t *action, const char *reason, bool overwrite) { if(action->reason && overwrite) { pe_rsc_trace(action->rsc, "Changing %s reason from '%s' to '%s'", action->uuid, action->reason, reason); free(action->reason); action->reason = NULL; } if(action->reason == NULL) { if(reason) { pe_rsc_trace(action->rsc, "Set %s reason to '%s'", action->uuid, reason); action->reason = strdup(reason); } else { action->reason = NULL; } } }