diff --git a/crmd/lrm.c b/crmd/lrm.c index 6dd83699dd..144233d68b 100644 --- a/crmd/lrm.c +++ b/crmd/lrm.c @@ -1,2480 +1,2480 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #define START_DELAY_THRESHOLD 5 * 60 * 1000 #define MAX_LRM_REG_FAILS 30 struct delete_event_s { int rc; const char *rsc; lrm_state_t *lrm_state; }; static gboolean is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id); static gboolean build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list); static gboolean stop_recurring_actions(gpointer key, gpointer value, gpointer user_data); static int delete_rsc_status(lrm_state_t * lrm_state, const char *rsc_id, int call_options, const char *user_name); static lrmd_event_data_t *construct_op(lrm_state_t * lrm_state, xmlNode * rsc_op, const char *rsc_id, const char *operation); static void do_lrm_rsc_op(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *operation, xmlNode * msg, xmlNode * request); void send_direct_ack(const char *to_host, const char *to_sys, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op, const char *rsc_id); static gboolean lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state, int log_level); static int do_update_resource(const char *node_name, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op); static void lrm_connection_destroy(void) { if (is_set(fsa_input_register, R_LRM_CONNECTED)) { crm_crit("LRM Connection failed"); register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL); clear_bit(fsa_input_register, R_LRM_CONNECTED); } else { crm_info("LRM Connection disconnected"); } } static char * make_stop_id(const char *rsc, int call_id) { char *op_id = NULL; op_id = calloc(1, strlen(rsc) + 34); if (op_id != NULL) { snprintf(op_id, strlen(rsc) + 34, "%s:%d", rsc, call_id); } return op_id; } static void copy_instance_keys(gpointer key, gpointer value, gpointer user_data) { if (strstr(key, CRM_META "_") == NULL) { g_hash_table_replace(user_data, strdup((const char *)key), strdup((const char *)value)); } } static void copy_meta_keys(gpointer key, gpointer value, gpointer user_data) { if (strstr(key, CRM_META "_") != NULL) { g_hash_table_replace(user_data, strdup((const char *)key), strdup((const char *)value)); } } /*! * \internal * \brief Remove a recurring operation from a resource's history * * \param[in,out] history Resource history to modify * \param[in] op Operation to remove * * \return TRUE if the operation was found and removed, FALSE otherwise */ static gboolean history_remove_recurring_op(rsc_history_t *history, const lrmd_event_data_t *op) { GList *iter; for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) { lrmd_event_data_t *existing = iter->data; if ((op->interval == existing->interval) && crm_str_eq(op->rsc_id, existing->rsc_id, TRUE) && safe_str_eq(op->op_type, existing->op_type)) { history->recurring_op_list = g_list_delete_link(history->recurring_op_list, iter); lrmd_free_event(existing); return TRUE; } } return FALSE; } /*! * \internal * \brief Free all recurring operations in resource history * * \param[in,out] history Resource history to modify */ static void history_free_recurring_ops(rsc_history_t *history) { GList *iter; for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) { lrmd_free_event(iter->data); } g_list_free(history->recurring_op_list); history->recurring_op_list = NULL; } /*! * \internal * \brief Free resource history * * \param[in,out] history Resource history to free */ void history_free(gpointer data) { rsc_history_t *history = (rsc_history_t*)data; if (history->stop_params) { g_hash_table_destroy(history->stop_params); } /* Don't need to free history->rsc.id because it's set to history->id */ free(history->rsc.type); free(history->rsc.class); free(history->rsc.provider); lrmd_free_event(history->failed); lrmd_free_event(history->last); free(history->id); history_free_recurring_ops(history); free(history); } static void update_history_cache(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op) { int target_rc = 0; rsc_history_t *entry = NULL; if (op->rsc_deleted) { crm_debug("Purged history for '%s' after %s", op->rsc_id, op->op_type); delete_rsc_status(lrm_state, op->rsc_id, cib_quorum_override, NULL); return; } if (safe_str_eq(op->op_type, RSC_NOTIFY)) { return; } crm_debug("Updating history for '%s' with %s op", op->rsc_id, op->op_type); entry = g_hash_table_lookup(lrm_state->resource_history, op->rsc_id); if (entry == NULL && rsc) { entry = calloc(1, sizeof(rsc_history_t)); entry->id = strdup(op->rsc_id); g_hash_table_insert(lrm_state->resource_history, entry->id, entry); entry->rsc.id = entry->id; entry->rsc.type = strdup(rsc->type); entry->rsc.class = strdup(rsc->class); if (rsc->provider) { entry->rsc.provider = strdup(rsc->provider); } else { entry->rsc.provider = NULL; } } else if (entry == NULL) { crm_info("Resource %s no longer exists, not updating cache", op->rsc_id); return; } entry->last_callid = op->call_id; target_rc = rsc_op_expected_rc(op); if (op->op_status == PCMK_LRM_OP_CANCELLED) { if (op->interval > 0) { crm_trace("Removing cancelled recurring op: %s_%s_%d", op->rsc_id, op->op_type, op->interval); history_remove_recurring_op(entry, op); return; } else { crm_trace("Skipping %s_%s_%d rc=%d, status=%d", op->rsc_id, op->op_type, op->interval, op->rc, op->op_status); } } else if (did_rsc_op_fail(op, target_rc)) { /* We must store failed monitors here * - otherwise the block below will cause them to be forgetten them when a stop happens */ if (entry->failed) { lrmd_free_event(entry->failed); } entry->failed = lrmd_copy_event(op); } else if (op->interval == 0) { if (entry->last) { lrmd_free_event(entry->last); } entry->last = lrmd_copy_event(op); if (op->params && (safe_str_eq(CRMD_ACTION_START, op->op_type) || safe_str_eq("reload", op->op_type) || safe_str_eq(CRMD_ACTION_STATUS, op->op_type))) { if (entry->stop_params) { g_hash_table_destroy(entry->stop_params); } entry->stop_params = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); g_hash_table_foreach(op->params, copy_instance_keys, entry->stop_params); } } if (op->interval > 0) { /* Ensure there are no duplicates */ history_remove_recurring_op(entry, op); crm_trace("Adding recurring op: %s_%s_%d", op->rsc_id, op->op_type, op->interval); entry->recurring_op_list = g_list_prepend(entry->recurring_op_list, lrmd_copy_event(op)); } else if (entry->recurring_op_list && safe_str_eq(op->op_type, RSC_STATUS) == FALSE) { crm_trace("Dropping %d recurring ops because of: %s_%s_%d", g_list_length(entry->recurring_op_list), op->rsc_id, op->op_type, op->interval); history_free_recurring_ops(entry); } } /*! * \internal * \brief Send a direct OK ack for a resource task * * \param[in] lrm_state LRM connection * \param[in] input Input message being ack'ed * \param[in] rsc_id ID of affected resource * \param[in] rsc Affected resource (if available) * \param[in] task Operation task being ack'ed * \param[in] ack_host Name of host to send ack to * \param[in] ack_sys IPC system name to ack */ static void send_task_ok_ack(lrm_state_t *lrm_state, ha_msg_input_t *input, const char *rsc_id, lrmd_rsc_info_t *rsc, const char *task, const char *ack_host, const char *ack_sys) { lrmd_event_data_t *op = construct_op(lrm_state, input->xml, rsc_id, task); CRM_ASSERT(op != NULL); op->rc = PCMK_OCF_OK; op->op_status = PCMK_LRM_OP_DONE; send_direct_ack(ack_host, ack_sys, rsc, op, rsc_id); lrmd_free_event(op); } void lrm_op_callback(lrmd_event_data_t * op) { const char *nodename = NULL; lrm_state_t *lrm_state = NULL; CRM_CHECK(op != NULL, return); /* determine the node name for this connection. */ nodename = op->remote_nodename ? op->remote_nodename : fsa_our_uname; if (op->type == lrmd_event_disconnect && (safe_str_eq(nodename, fsa_our_uname))) { /* if this is the local lrmd ipc connection, set the right bits in the * crmd when the connection goes down */ lrm_connection_destroy(); return; } else if (op->type != lrmd_event_exec_complete) { /* we only need to process execution results */ return; } lrm_state = lrm_state_find(nodename); CRM_ASSERT(lrm_state != NULL); process_lrm_event(lrm_state, op, NULL); } /* A_LRM_CONNECT */ void do_lrm_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { /* This only pertains to local lrmd connections. Remote connections are handled as * resources within the pengine. Connecting and disconnecting from remote lrmd instances * handled differently than the local. */ lrm_state_t *lrm_state = NULL; if(fsa_our_uname == NULL) { return; /* Nothing to do */ } lrm_state = lrm_state_find_or_create(fsa_our_uname); if (lrm_state == NULL) { register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); return; } if (action & A_LRM_DISCONNECT) { if (lrm_state_verify_stopped(lrm_state, cur_state, LOG_INFO) == FALSE) { if (action == A_LRM_DISCONNECT) { crmd_fsa_stall(FALSE); return; } } clear_bit(fsa_input_register, R_LRM_CONNECTED); crm_info("Disconnecting from the LRM"); lrm_state_disconnect(lrm_state); lrm_state_reset_tables(lrm_state); crm_notice("Disconnected from the LRM"); } if (action & A_LRM_CONNECT) { int ret = pcmk_ok; crm_debug("Connecting to the LRM"); ret = lrm_state_ipc_connect(lrm_state); if (ret != pcmk_ok) { if (lrm_state->num_lrm_register_fails < MAX_LRM_REG_FAILS) { crm_warn("Failed to sign on to the LRM %d" " (%d max) times", lrm_state->num_lrm_register_fails, MAX_LRM_REG_FAILS); crm_timer_start(wait_timer); crmd_fsa_stall(FALSE); return; } } if (ret != pcmk_ok) { crm_err("Failed to sign on to the LRM %d" " (max) times", lrm_state->num_lrm_register_fails); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); return; } set_bit(fsa_input_register, R_LRM_CONNECTED); crm_info("LRM connection established"); } if (action & ~(A_LRM_CONNECT | A_LRM_DISCONNECT)) { crm_err("Unexpected action %s in %s", fsa_action2string(action), __FUNCTION__); } } static gboolean lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state, int log_level) { int counter = 0; gboolean rc = TRUE; const char *when = "lrm disconnect"; GHashTableIter gIter; const char *key = NULL; rsc_history_t *entry = NULL; struct recurring_op_s *pending = NULL; crm_debug("Checking for active resources before exit"); if (cur_state == S_TERMINATE) { log_level = LOG_ERR; when = "shutdown"; } else if (is_set(fsa_input_register, R_SHUTDOWN)) { when = "shutdown... waiting"; } if (lrm_state->pending_ops && lrm_state_is_connected(lrm_state) == TRUE) { guint removed = g_hash_table_foreach_remove( lrm_state->pending_ops, stop_recurring_actions, lrm_state); guint nremaining = g_hash_table_size(lrm_state->pending_ops); if (removed || nremaining) { crm_notice("Stopped %u recurring operations at %s (%u operations remaining)", removed, when, nremaining); } } if (lrm_state->pending_ops) { g_hash_table_iter_init(&gIter, lrm_state->pending_ops); while (g_hash_table_iter_next(&gIter, NULL, (void **)&pending)) { /* Ignore recurring actions in the shutdown calculations */ if (pending->interval == 0) { counter++; } } } if (counter > 0) { do_crm_log(log_level, "%d pending LRM operations at %s", counter, when); if (cur_state == S_TERMINATE || !is_set(fsa_input_register, R_SENT_RSC_STOP)) { g_hash_table_iter_init(&gIter, lrm_state->pending_ops); while (g_hash_table_iter_next(&gIter, (gpointer*)&key, (gpointer*)&pending)) { do_crm_log(log_level, "Pending action: %s (%s)", key, pending->op_key); } } else { rc = FALSE; } return rc; } if (lrm_state->resource_history == NULL) { return rc; } if (cur_state == S_TERMINATE || is_set(fsa_input_register, R_SHUTDOWN)) { /* At this point we're not waiting, we're just shutting down */ when = "shutdown"; } counter = 0; g_hash_table_iter_init(&gIter, lrm_state->resource_history); while (g_hash_table_iter_next(&gIter, NULL, (gpointer*)&entry)) { if (is_rsc_active(lrm_state, entry->id) == FALSE) { continue; } counter++; crm_trace("Found %s active", entry->id); if (lrm_state->pending_ops) { GHashTableIter hIter; g_hash_table_iter_init(&hIter, lrm_state->pending_ops); while (g_hash_table_iter_next(&hIter, (gpointer*)&key, (gpointer*)&pending)) { if (crm_str_eq(entry->id, pending->rsc_id, TRUE)) { crm_notice("%sction %s (%s) incomplete at %s", pending->interval == 0 ? "A" : "Recurring a", key, pending->op_key, when); } } } } if (counter) { crm_err("%d resources were active at %s.", counter, when); } return rc; } GHashTable *metadata_hash = NULL; static char * get_rsc_metadata(const char *type, const char *rclass, const char *provider, bool force) { int rc = pcmk_ok; int len = 0; char *key = NULL; char *metadata = NULL; /* Always use a local connection for this operation */ lrm_state_t *lrm_state = lrm_state_find(fsa_our_uname); CRM_CHECK(type != NULL, return NULL); CRM_CHECK(rclass != NULL, return NULL); CRM_CHECK(lrm_state != NULL, return NULL); if (provider == NULL) { provider = "heartbeat"; } if (metadata_hash == NULL) { metadata_hash = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); } len = strlen(type) + strlen(rclass) + strlen(provider) + 4; key = malloc(len); if(key == NULL) { return NULL; } snprintf(key, len, "%s::%s:%s", rclass, provider, type); if(force == FALSE) { metadata = g_hash_table_lookup(metadata_hash, key); if (metadata) { crm_trace("Retrieved cached metadata for %s", key); } } if(metadata == NULL) { rc = lrm_state_get_metadata(lrm_state, rclass, provider, type, &metadata, 0); if(rc == pcmk_ok) { crm_trace("Retrieved live metadata for %s", key); CRM_LOG_ASSERT(metadata != NULL); g_hash_table_insert(metadata_hash, key, metadata); key = NULL; } else { crm_trace("No metadata found for %s: %s" CRM_XS " rc=%d", key, pcmk_strerror(rc), rc); CRM_CHECK(metadata == NULL, metadata = NULL); } } free(key); return metadata; } static char * build_parameter_list(lrmd_event_data_t *op, xmlNode *metadata, xmlNode *result, const char *criteria, bool target, bool invert_for_xml) { int len = 0; int max = 0; char *list = NULL; xmlNode *param = NULL; xmlNode *params = NULL; const char *secure_terms[] = { "password", "passwd", "user", }; if(safe_str_eq("private", criteria)) { /* It will take time for the agents to be updated * Check for some common terms */ max = DIMOF(secure_terms); } params = find_xml_node(metadata, "parameters", TRUE); for (param = __xml_first_child(params); param != NULL; param = __xml_next(param)) { if (crm_str_eq((const char *)param->name, "parameter", TRUE)) { bool accept = FALSE; const char *name = crm_element_value(param, "name"); const char *value = crm_element_value(param, criteria); if(max && value) { /* Turn off the compatibility logic once an agent has been updated to know about 'private' */ max = 0; } if (name == NULL) { crm_err("Invalid parameter in %s metadata", op->rsc_id); } else if(target == crm_is_true(value)) { accept = TRUE; } else if(max) { int lpc = 0; bool found = FALSE; for(lpc = 0; found == FALSE && lpc < max; lpc++) { if(safe_str_eq(secure_terms[lpc], name)) { found = TRUE; } } if(found == target) { accept = TRUE; } } if(accept) { int start = len; crm_trace("Attr %s is %s%s", name, target?"":"not ", criteria); len += strlen(name) + 2; list = realloc_safe(list, len + 1); sprintf(list + start, " %s ", name); } else { crm_trace("Rejecting %s for %s", name, criteria); } if(invert_for_xml) { crm_trace("Inverting %s match for %s xml", name, criteria); accept = !accept; } if(result && accept) { value = g_hash_table_lookup(op->params, name); if(value != NULL) { char *summary = crm_versioned_param_summary(op->versioned_params, name); if (summary) { crm_trace("Adding attr %s=%s to the xml result", name, summary); crm_xml_add(result, name, summary); free(summary); } else { crm_trace("Adding attr %s=%s to the xml result", name, value); crm_xml_add(result, name, value); } } } } } return list; } static bool resource_supports_action(xmlNode *metadata, const char *name) { const char *value = NULL; xmlNode *action = NULL; xmlNode *actions = NULL; actions = find_xml_node(metadata, "actions", TRUE); for (action = __xml_first_child(actions); action != NULL; action = __xml_next(action)) { if (crm_str_eq((const char *)action->name, "action", TRUE)) { value = crm_element_value(action, "name"); if (safe_str_eq(name, value)) { return TRUE; } } } return FALSE; } static void append_restart_list(lrmd_event_data_t *op, xmlNode *metadata, xmlNode * update, const char *version) { char *list = NULL; char *digest = NULL; xmlNode *restart = NULL; CRM_LOG_ASSERT(op->params != NULL); if (op->interval > 0) { /* monitors are not reloadable */ return; } if(resource_supports_action(metadata, "reload")) { restart = create_xml_node(NULL, XML_TAG_PARAMS); /* Any parameters with unique="1" should be added into the "op-force-restart" list. */ list = build_parameter_list(op, metadata, restart, "unique", TRUE, FALSE); } else { /* Resource does not support reloads */ return; } digest = calculate_operation_digest(restart, version); /* Add "op-force-restart" and "op-restart-digest" to indicate the resource supports reload, * no matter if it actually supports any parameters with unique="1"). */ crm_xml_add(update, XML_LRM_ATTR_OP_RESTART, list? list: ""); crm_xml_add(update, XML_LRM_ATTR_RESTART_DIGEST, digest); crm_trace("%s: %s, %s", op->rsc_id, digest, list); crm_log_xml_trace(restart, "restart digest source"); free_xml(restart); free(digest); free(list); } static void append_secure_list(lrmd_event_data_t *op, xmlNode *metadata, xmlNode * update, const char *version) { char *list = NULL; char *digest = NULL; xmlNode *secure = NULL; CRM_LOG_ASSERT(op->params != NULL); /* * To keep XML_LRM_ATTR_OP_SECURE short, we want it to contain the * secure parameters but XML_LRM_ATTR_SECURE_DIGEST to be based on * the insecure ones */ secure = create_xml_node(NULL, XML_TAG_PARAMS); list = build_parameter_list(op, metadata, secure, "private", TRUE, TRUE); if (list != NULL) { digest = calculate_operation_digest(secure, version); crm_xml_add(update, XML_LRM_ATTR_OP_SECURE, list); crm_xml_add(update, XML_LRM_ATTR_SECURE_DIGEST, digest); crm_trace("%s: %s, %s", op->rsc_id, digest, list); crm_log_xml_trace(secure, "secure digest source"); } else { crm_trace("%s: no secure parameters", op->rsc_id); } free_xml(secure); free(digest); free(list); } static gboolean build_operation_update(xmlNode * parent, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op, const char *src) { int target_rc = 0; xmlNode *xml_op = NULL; xmlNode *metadata = NULL; const char *m_string = NULL; const char *caller_version = NULL; if (op == NULL) { return FALSE; } target_rc = rsc_op_expected_rc(op); /* there is a small risk in formerly mixed clusters that it will * be sub-optimal. * * however with our upgrade policy, the update we send should * still be completely supported anyway */ caller_version = g_hash_table_lookup(op->params, XML_ATTR_CRM_VERSION); CRM_LOG_ASSERT(caller_version != NULL); if(caller_version == NULL) { caller_version = CRM_FEATURE_SET; } crm_trace("Building %s operation update with originator version: %s", op->rsc_id, caller_version); xml_op = create_operation_update(parent, op, caller_version, target_rc, fsa_our_uname, src, LOG_DEBUG); if (xml_op == NULL) { return TRUE; } if (rsc == NULL || op->params == NULL || crm_str_eq(CRMD_ACTION_STOP, op->op_type, TRUE)) { /* Stopped resources don't need the digest logic */ crm_trace("No digests needed for %s %p %p %s", op->rsc_id, op->params, rsc, op->op_type); return TRUE; } m_string = get_rsc_metadata(rsc->type, rsc->class, rsc->provider, safe_str_eq(op->op_type, RSC_START)); if(m_string == NULL) { crm_err("No metadata for %s::%s:%s", rsc->class, rsc->provider, rsc->type); return TRUE; } metadata = string2xml(m_string); if(metadata == NULL) { crm_err("Metadata for %s::%s:%s is not valid XML", rsc->class, rsc->provider, rsc->type); return TRUE; } crm_trace("Including additional digests for %s::%s:%s", rsc->class, rsc->provider, rsc->type); append_restart_list(op, metadata, xml_op, caller_version); append_secure_list(op, metadata, xml_op, caller_version); free_xml(metadata); return TRUE; } static gboolean is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id) { rsc_history_t *entry = NULL; entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); if (entry == NULL || entry->last == NULL) { return FALSE; } crm_trace("Processing %s: %s.%d=%d", rsc_id, entry->last->op_type, entry->last->interval, entry->last->rc); if (entry->last->rc == PCMK_OCF_OK && safe_str_eq(entry->last->op_type, CRMD_ACTION_STOP)) { return FALSE; } else if (entry->last->rc == PCMK_OCF_OK && safe_str_eq(entry->last->op_type, CRMD_ACTION_MIGRATE)) { /* a stricter check is too complex... * leave that to the PE */ return FALSE; } else if (entry->last->rc == PCMK_OCF_NOT_RUNNING) { return FALSE; } else if (entry->last->interval == 0 && entry->last->rc == PCMK_OCF_NOT_CONFIGURED) { /* Badly configured resources can't be reliably stopped */ return FALSE; } return TRUE; } static gboolean build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list) { GHashTableIter iter; rsc_history_t *entry = NULL; g_hash_table_iter_init(&iter, lrm_state->resource_history); while (g_hash_table_iter_next(&iter, NULL, (void **)&entry)) { GList *gIter = NULL; xmlNode *xml_rsc = create_xml_node(rsc_list, XML_LRM_TAG_RESOURCE); crm_xml_add(xml_rsc, XML_ATTR_ID, entry->id); crm_xml_add(xml_rsc, XML_ATTR_TYPE, entry->rsc.type); crm_xml_add(xml_rsc, XML_AGENT_ATTR_CLASS, entry->rsc.class); crm_xml_add(xml_rsc, XML_AGENT_ATTR_PROVIDER, entry->rsc.provider); if (entry->last && entry->last->params) { const char *container = g_hash_table_lookup(entry->last->params, CRM_META"_"XML_RSC_ATTR_CONTAINER); if (container) { crm_trace("Resource %s is a part of container resource %s", entry->id, container); crm_xml_add(xml_rsc, XML_RSC_ATTR_CONTAINER, container); } } build_operation_update(xml_rsc, &(entry->rsc), entry->failed, __FUNCTION__); build_operation_update(xml_rsc, &(entry->rsc), entry->last, __FUNCTION__); for (gIter = entry->recurring_op_list; gIter != NULL; gIter = gIter->next) { build_operation_update(xml_rsc, &(entry->rsc), gIter->data, __FUNCTION__); } } return FALSE; } static xmlNode * do_lrm_query_internal(lrm_state_t *lrm_state, int update_flags) { xmlNode *xml_state = NULL; xmlNode *xml_data = NULL; xmlNode *rsc_list = NULL; crm_node_t *peer = NULL; peer = crm_get_peer_full(0, lrm_state->node_name, CRM_GET_PEER_ANY); CRM_CHECK(peer != NULL, return NULL); xml_state = create_node_state_update(peer, update_flags, NULL, __FUNCTION__); xml_data = create_xml_node(xml_state, XML_CIB_TAG_LRM); crm_xml_add(xml_data, XML_ATTR_ID, peer->uuid); rsc_list = create_xml_node(xml_data, XML_LRM_TAG_RESOURCES); /* Build a list of active (not always running) resources */ build_active_RAs(lrm_state, rsc_list); crm_log_xml_trace(xml_state, "Current state of the LRM"); return xml_state; } xmlNode * do_lrm_query(gboolean is_replace, const char *node_name) { lrm_state_t *lrm_state = lrm_state_find(node_name); xmlNode *xml_state; if (!lrm_state) { crm_err("Could not query lrm state for lrmd node %s", node_name); return NULL; } xml_state = do_lrm_query_internal(lrm_state, node_update_cluster|node_update_peer); /* In case this function is called to generate a join confirmation to * send to the DC, force the current and expected join state to member. * This isn't necessary for newer DCs but is backward compatible. */ crm_xml_add(xml_state, XML_NODE_JOIN_STATE, CRMD_JOINSTATE_MEMBER); crm_xml_add(xml_state, XML_NODE_EXPECTED, CRMD_JOINSTATE_MEMBER); return xml_state; } static void notify_deleted(lrm_state_t * lrm_state, ha_msg_input_t * input, const char *rsc_id, int rc) { lrmd_event_data_t *op = NULL; const char *from_sys = crm_element_value(input->msg, F_CRM_SYS_FROM); const char *from_host = crm_element_value(input->msg, F_CRM_HOST_FROM); crm_info("Notifying %s on %s that %s was%s deleted", from_sys, from_host, rsc_id, rc == pcmk_ok ? "" : " not"); op = construct_op(lrm_state, input->xml, rsc_id, CRMD_ACTION_DELETE); CRM_ASSERT(op != NULL); if (rc == pcmk_ok) { op->op_status = PCMK_LRM_OP_DONE; op->rc = PCMK_OCF_OK; } else { op->op_status = PCMK_LRM_OP_ERROR; op->rc = PCMK_OCF_UNKNOWN_ERROR; } send_direct_ack(from_host, from_sys, NULL, op, rsc_id); lrmd_free_event(op); if (safe_str_neq(from_sys, CRM_SYSTEM_TENGINE)) { /* this isn't expected - trigger a new transition */ time_t now = time(NULL); char *now_s = crm_itoa(now); crm_debug("Triggering a refresh after %s deleted %s from the LRM", from_sys, rsc_id); update_attr_delegate(fsa_cib_conn, cib_none, XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL, "last-lrm-refresh", now_s, FALSE, NULL, NULL); free(now_s); } } static gboolean lrm_remove_deleted_rsc(gpointer key, gpointer value, gpointer user_data) { struct delete_event_s *event = user_data; struct pending_deletion_op_s *op = value; if (crm_str_eq(event->rsc, op->rsc, TRUE)) { notify_deleted(event->lrm_state, op->input, event->rsc, event->rc); return TRUE; } return FALSE; } static gboolean lrm_remove_deleted_op(gpointer key, gpointer value, gpointer user_data) { const char *rsc = user_data; struct recurring_op_s *pending = value; if (crm_str_eq(rsc, pending->rsc_id, TRUE)) { crm_info("Removing op %s:%d for deleted resource %s", pending->op_key, pending->call_id, rsc); return TRUE; } return FALSE; } /* * Remove the rsc from the CIB * * Avoids refreshing the entire LRM section of this host */ #define rsc_template "//"XML_CIB_TAG_STATE"[@uname='%s']//"XML_LRM_TAG_RESOURCE"[@id='%s']" static int delete_rsc_status(lrm_state_t * lrm_state, const char *rsc_id, int call_options, const char *user_name) { char *rsc_xpath = NULL; int max = 0; int rc = pcmk_ok; CRM_CHECK(rsc_id != NULL, return -ENXIO); max = strlen(rsc_template) + strlen(rsc_id) + strlen(lrm_state->node_name) + 1; rsc_xpath = calloc(1, max); snprintf(rsc_xpath, max, rsc_template, lrm_state->node_name, rsc_id); rc = cib_internal_op(fsa_cib_conn, CIB_OP_DELETE, NULL, rsc_xpath, NULL, NULL, call_options | cib_xpath, user_name); free(rsc_xpath); return rc; } static void delete_rsc_entry(lrm_state_t * lrm_state, ha_msg_input_t * input, const char *rsc_id, GHashTableIter * rsc_gIter, int rc, const char *user_name) { struct delete_event_s event; CRM_CHECK(rsc_id != NULL, return); if (rc == pcmk_ok) { char *rsc_id_copy = strdup(rsc_id); if (rsc_gIter) g_hash_table_iter_remove(rsc_gIter); else g_hash_table_remove(lrm_state->resource_history, rsc_id_copy); crm_debug("sync: Sending delete op for %s", rsc_id_copy); delete_rsc_status(lrm_state, rsc_id_copy, cib_quorum_override, user_name); g_hash_table_foreach_remove(lrm_state->pending_ops, lrm_remove_deleted_op, rsc_id_copy); free(rsc_id_copy); } if (input) { notify_deleted(lrm_state, input, rsc_id, rc); } event.rc = rc; event.rsc = rsc_id; event.lrm_state = lrm_state; g_hash_table_foreach_remove(lrm_state->deletion_ops, lrm_remove_deleted_rsc, &event); } /* * Remove the op from the CIB * * Avoids refreshing the entire LRM section of this host */ #define op_template "//"XML_CIB_TAG_STATE"[@uname='%s']//"XML_LRM_TAG_RESOURCE"[@id='%s']/"XML_LRM_TAG_RSC_OP"[@id='%s']" #define op_call_template "//"XML_CIB_TAG_STATE"[@uname='%s']//"XML_LRM_TAG_RESOURCE"[@id='%s']/"XML_LRM_TAG_RSC_OP"[@id='%s' and @"XML_LRM_ATTR_CALLID"='%d']" static void delete_op_entry(lrm_state_t * lrm_state, lrmd_event_data_t * op, const char *rsc_id, const char *key, int call_id) { xmlNode *xml_top = NULL; if (op != NULL) { xml_top = create_xml_node(NULL, XML_LRM_TAG_RSC_OP); crm_xml_add_int(xml_top, XML_LRM_ATTR_CALLID, op->call_id); crm_xml_add(xml_top, XML_ATTR_TRANSITION_KEY, op->user_data); if (op->interval > 0) { char *op_id = generate_op_key(op->rsc_id, op->op_type, op->interval); /* Avoid deleting last_failure too (if it was a result of this recurring op failing) */ crm_xml_add(xml_top, XML_ATTR_ID, op_id); free(op_id); } crm_debug("async: Sending delete op for %s_%s_%d (call=%d)", op->rsc_id, op->op_type, op->interval, op->call_id); fsa_cib_conn->cmds->delete(fsa_cib_conn, XML_CIB_TAG_STATUS, xml_top, cib_quorum_override); } else if (rsc_id != NULL && key != NULL) { int max = 0; char *op_xpath = NULL; if (call_id > 0) { max = strlen(op_call_template) + strlen(rsc_id) + strlen(lrm_state->node_name) + strlen(key) + 10; op_xpath = calloc(1, max); snprintf(op_xpath, max, op_call_template, lrm_state->node_name, rsc_id, key, call_id); } else { max = strlen(op_template) + strlen(rsc_id) + strlen(lrm_state->node_name) + strlen(key) + 1; op_xpath = calloc(1, max); snprintf(op_xpath, max, op_template, lrm_state->node_name, rsc_id, key); } crm_debug("sync: Sending delete op for %s (call=%d)", rsc_id, call_id); fsa_cib_conn->cmds->delete(fsa_cib_conn, op_xpath, NULL, cib_quorum_override | cib_xpath); free(op_xpath); } else { crm_err("Not enough information to delete op entry: rsc=%p key=%p", rsc_id, key); return; } crm_log_xml_trace(xml_top, "op:cancel"); free_xml(xml_top); } void lrm_clear_last_failure(const char *rsc_id, const char *node_name) { char *attr = NULL; GHashTableIter iter; GList *lrm_state_list = lrm_state_get_list(); GList *state_entry; rsc_history_t *entry = NULL; attr = generate_op_key(rsc_id, "last_failure", 0); /* This clears last failure for every lrm state that has this rsc.*/ for (state_entry = lrm_state_list; state_entry != NULL; state_entry = state_entry->next) { lrm_state_t *lrm_state = state_entry->data; if (node_name != NULL) { if (strcmp(node_name, lrm_state->node_name) != 0) { /* filter by node_name if node_name is present */ continue; } } delete_op_entry(lrm_state, NULL, rsc_id, attr, 0); if (!lrm_state->resource_history) { continue; } g_hash_table_iter_init(&iter, lrm_state->resource_history); while (g_hash_table_iter_next(&iter, NULL, (void **)&entry)) { if (crm_str_eq(rsc_id, entry->id, TRUE)) { lrmd_free_event(entry->failed); entry->failed = NULL; } } } free(attr); g_list_free(lrm_state_list); } /* Returns: gboolean - cancellation is in progress */ static gboolean cancel_op(lrm_state_t * lrm_state, const char *rsc_id, const char *key, int op, gboolean remove) { int rc = pcmk_ok; char *local_key = NULL; struct recurring_op_s *pending = NULL; CRM_CHECK(op != 0, return FALSE); CRM_CHECK(rsc_id != NULL, return FALSE); if (key == NULL) { local_key = make_stop_id(rsc_id, op); key = local_key; } pending = g_hash_table_lookup(lrm_state->pending_ops, key); if (pending) { if (remove && pending->remove == FALSE) { pending->remove = TRUE; crm_debug("Scheduling %s for removal", key); } if (pending->cancelled) { crm_debug("Operation %s already cancelled", key); free(local_key); return FALSE; } pending->cancelled = TRUE; } else { crm_info("No pending op found for %s", key); free(local_key); return FALSE; } crm_debug("Cancelling op %d for %s (%s)", op, rsc_id, key); rc = lrm_state_cancel(lrm_state, pending->rsc_id, pending->op_type, pending->interval); if (rc == pcmk_ok) { crm_debug("Op %d for %s (%s): cancelled", op, rsc_id, key); free(local_key); return TRUE; } crm_debug("Op %d for %s (%s): Nothing to cancel", op, rsc_id, key); /* The caller needs to make sure the entry is * removed from the pending_ops list * * Usually by returning TRUE inside the worker function * supplied to g_hash_table_foreach_remove() * * Not removing the entry from pending_ops will block * the node from shutting down */ free(local_key); return FALSE; } struct cancel_data { gboolean done; gboolean remove; const char *key; lrmd_rsc_info_t *rsc; lrm_state_t *lrm_state; }; static gboolean cancel_action_by_key(gpointer key, gpointer value, gpointer user_data) { gboolean remove = FALSE; struct cancel_data *data = user_data; struct recurring_op_s *op = (struct recurring_op_s *)value; if (crm_str_eq(op->op_key, data->key, TRUE)) { data->done = TRUE; remove = !cancel_op(data->lrm_state, data->rsc->id, key, op->call_id, data->remove); } return remove; } static gboolean cancel_op_key(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *key, gboolean remove) { guint removed = 0; struct cancel_data data; CRM_CHECK(rsc != NULL, return FALSE); CRM_CHECK(key != NULL, return FALSE); data.key = key; data.rsc = rsc; data.done = FALSE; data.remove = remove; data.lrm_state = lrm_state; removed = g_hash_table_foreach_remove(lrm_state->pending_ops, cancel_action_by_key, &data); crm_trace("Removed %u op cache entries, new size: %u", removed, g_hash_table_size(lrm_state->pending_ops)); return data.done; } static lrmd_rsc_info_t * get_lrm_resource(lrm_state_t * lrm_state, xmlNode * resource, xmlNode * op_msg, gboolean do_create) { lrmd_rsc_info_t *rsc = NULL; const char *id = ID(resource); const char *type = crm_element_value(resource, XML_ATTR_TYPE); const char *class = crm_element_value(resource, XML_AGENT_ATTR_CLASS); const char *provider = crm_element_value(resource, XML_AGENT_ATTR_PROVIDER); const char *long_id = crm_element_value(resource, XML_ATTR_ID_LONG); crm_trace("Retrieving %s from the LRM.", id); CRM_CHECK(id != NULL, return NULL); rsc = lrm_state_get_rsc_info(lrm_state, id, 0); if (!rsc && long_id) { rsc = lrm_state_get_rsc_info(lrm_state, long_id, 0); } if (!rsc && do_create) { CRM_CHECK(class != NULL, return NULL); CRM_CHECK(type != NULL, return NULL); crm_trace("Adding rsc %s before operation", id); lrm_state_register_rsc(lrm_state, id, class, provider, type, lrmd_opt_drop_recurring); rsc = lrm_state_get_rsc_info(lrm_state, id, 0); if (!rsc) { fsa_data_t *msg_data = NULL; crm_err("Could not add resource %s to LRM %s", id, lrm_state->node_name); /* only register this as a internal error if this involves the local * lrmd. Otherwise we're likely dealing with an unresponsive remote-node * which is not a FSA failure. */ if (lrm_state_is_local(lrm_state) == TRUE) { register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } } } return rsc; } static void delete_resource(lrm_state_t * lrm_state, const char *id, lrmd_rsc_info_t * rsc, GHashTableIter * gIter, const char *sys, const char *host, const char *user, ha_msg_input_t * request, gboolean unregister) { int rc = pcmk_ok; crm_info("Removing resource %s for %s (%s) on %s", id, sys, user ? user : "internal", host); if (rsc && unregister) { rc = lrm_state_unregister_rsc(lrm_state, id, 0); } if (rc == pcmk_ok) { crm_trace("Resource '%s' deleted", id); } else if (rc == -EINPROGRESS) { crm_info("Deletion of resource '%s' pending", id); if (request) { struct pending_deletion_op_s *op = NULL; char *ref = crm_element_value_copy(request->msg, XML_ATTR_REFERENCE); op = calloc(1, sizeof(struct pending_deletion_op_s)); op->rsc = strdup(rsc->id); op->input = copy_ha_msg_input(request); g_hash_table_insert(lrm_state->deletion_ops, ref, op); } return; } else { crm_warn("Deletion of resource '%s' for %s (%s) on %s failed: %d", id, sys, user ? user : "internal", host, rc); } delete_rsc_entry(lrm_state, request, id, gIter, rc, user); } static int get_fake_call_id(lrm_state_t *lrm_state, const char *rsc_id) { int call_id = 999999999; rsc_history_t *entry = NULL; if(lrm_state) { entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); } /* Make sure the call id is greater than the last successful operation, * otherwise the failure will not result in a possible recovery of the resource * as it could appear the failure occurred before the successful start */ if (entry) { call_id = entry->last_callid + 1; } if (call_id < 0) { call_id = 1; } return call_id; } static void force_reprobe(lrm_state_t *lrm_state, const char *from_sys, const char *from_host, const char *user_name, gboolean is_remote_node) { GHashTableIter gIter; rsc_history_t *entry = NULL; crm_info("clearing resource history on node %s", lrm_state->node_name); g_hash_table_iter_init(&gIter, lrm_state->resource_history); while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) { /* only unregister the resource during a reprobe if it is not a remote connection * resource. otherwise unregistering the connection will terminate remote-node * membership */ gboolean unregister = TRUE; if (is_remote_lrmd_ra(NULL, NULL, entry->id)) { lrm_state_t *remote_lrm_state = lrm_state_find(entry->id); if (remote_lrm_state) { /* when forcing a reprobe, make sure to clear remote node before * clearing the remote node's connection resource */ force_reprobe(remote_lrm_state, from_sys, from_host, user_name, TRUE); } unregister = FALSE; } delete_resource(lrm_state, entry->id, &entry->rsc, &gIter, from_sys, from_host, user_name, NULL, unregister); } /* Now delete the copy in the CIB */ erase_status_tag(lrm_state->node_name, XML_CIB_TAG_LRM, cib_scope_local); /* And finally, _delete_ the value in attrd * Setting it to FALSE results in the PE sending us back here again */ update_attrd(lrm_state->node_name, CRM_OP_PROBED, NULL, user_name, is_remote_node); } static void synthesize_lrmd_failure(lrm_state_t *lrm_state, xmlNode *action, int rc) { lrmd_event_data_t *op = NULL; const char *operation = crm_element_value(action, XML_LRM_ATTR_TASK); const char *target_node = crm_element_value(action, XML_LRM_ATTR_TARGET); xmlNode *xml_rsc = find_xml_node(action, XML_CIB_TAG_RESOURCE, TRUE); if(xml_rsc == NULL) { - /* Do something else? driect_ack? */ + /* @TODO Should we do something else, like direct ack? */ crm_info("Skipping %s=%d on %s (%p): no resource", crm_element_value(action, XML_LRM_ATTR_TASK_KEY), rc, target_node, lrm_state); return; } else if(operation == NULL) { /* This probably came from crm_resource -C, nothing to do */ crm_info("Skipping %s=%d on %s (%p): no operation", crm_element_value(action, XML_ATTR_TRANSITION_KEY), rc, target_node, lrm_state); return; } op = construct_op(lrm_state, action, ID(xml_rsc), operation); CRM_ASSERT(op != NULL); op->call_id = get_fake_call_id(lrm_state, op->rsc_id); if(safe_str_eq(operation, RSC_NOTIFY)) { /* Notifications can't fail yet */ op->op_status = PCMK_LRM_OP_DONE; op->rc = PCMK_OCF_OK; } else { op->op_status = PCMK_LRM_OP_ERROR; op->rc = rc; } op->t_run = time(NULL); op->t_rcchange = op->t_run; crm_info("Faking result %d for %s_%s_%d on %s (%p)", op->rc, op->rsc_id, op->op_type, op->interval, target_node, lrm_state); if(lrm_state) { process_lrm_event(lrm_state, op, NULL); } else { lrmd_rsc_info_t rsc; rsc.id = strdup(op->rsc_id); rsc.type = crm_element_value_copy(xml_rsc, XML_ATTR_TYPE); rsc.class = crm_element_value_copy(xml_rsc, XML_AGENT_ATTR_CLASS); rsc.provider = crm_element_value_copy(xml_rsc, XML_AGENT_ATTR_PROVIDER); do_update_resource(target_node, &rsc, op); free(rsc.id); free(rsc.type); free(rsc.class); free(rsc.provider); } lrmd_free_event(op); } /* A_LRM_INVOKE */ void do_lrm_invoke(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { gboolean create_rsc = TRUE; lrm_state_t *lrm_state = NULL; const char *crm_op = NULL; const char *from_sys = NULL; const char *from_host = NULL; const char *operation = NULL; ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); const char *user_name = NULL; const char *target_node = NULL; gboolean is_remote_node = FALSE; gboolean crm_rsc_delete = FALSE; if (input->xml != NULL) { /* Remote node operations are routed here to their remote connections */ target_node = crm_element_value(input->xml, XML_LRM_ATTR_TARGET); } if (target_node == NULL) { target_node = fsa_our_uname; } else if (safe_str_neq(target_node, fsa_our_uname)) { is_remote_node = TRUE; } lrm_state = lrm_state_find(target_node); if (lrm_state == NULL && is_remote_node) { crm_err("no lrmd connection for remote node %s found on cluster node %s. Can not process request.", target_node, fsa_our_uname); /* The action must be recorded here and in the CIB as failed */ synthesize_lrmd_failure(NULL, input->xml, PCMK_OCF_CONNECTION_DIED); return; } CRM_ASSERT(lrm_state != NULL); #if ENABLE_ACL user_name = crm_acl_get_set_user(input->msg, F_CRM_USER, NULL); crm_trace("LRM command from user '%s'", user_name); #endif crm_op = crm_element_value(input->msg, F_CRM_TASK); from_sys = crm_element_value(input->msg, F_CRM_SYS_FROM); if (safe_str_neq(from_sys, CRM_SYSTEM_TENGINE)) { from_host = crm_element_value(input->msg, F_CRM_HOST_FROM); } crm_trace("LRM command from: %s", from_sys); if (safe_str_eq(crm_op, CRM_OP_LRM_DELETE)) { /* remember this delete op came from crm_resource */ crm_rsc_delete = TRUE; operation = CRMD_ACTION_DELETE; } else if (safe_str_eq(crm_op, CRM_OP_LRM_REFRESH)) { operation = CRM_OP_LRM_REFRESH; } else if (safe_str_eq(crm_op, CRM_OP_LRM_FAIL)) { lrmd_event_data_t *op = NULL; lrmd_rsc_info_t *rsc = NULL; xmlNode *xml_rsc = find_xml_node(input->xml, XML_CIB_TAG_RESOURCE, TRUE); CRM_CHECK(xml_rsc != NULL, return); /* The lrmd can not fail a resource, it does not understand the * concept of success or failure in relation to a resource, it simply * executes operations and reports the results. We determine what a failure is. * Because of this, if we want to fail a resource we have to fake what we * understand a failure to look like. * * To do this we create a fake lrmd operation event for the resource * we want to fail. We then pass that event to the lrmd client callback * so it will be processed as if it actually came from the lrmd. */ op = construct_op(lrm_state, input->xml, ID(xml_rsc), "asyncmon"); CRM_ASSERT(op != NULL); free((char *)op->user_data); op->user_data = NULL; op->call_id = get_fake_call_id(lrm_state, op->rsc_id); op->interval = 0; op->op_status = PCMK_LRM_OP_DONE; op->rc = PCMK_OCF_UNKNOWN_ERROR; op->t_run = time(NULL); op->t_rcchange = op->t_run; #if ENABLE_ACL if (user_name && is_privileged(user_name) == FALSE) { crm_err("%s does not have permission to fail %s", user_name, ID(xml_rsc)); send_direct_ack(from_host, from_sys, NULL, op, ID(xml_rsc)); lrmd_free_event(op); return; } #endif rsc = get_lrm_resource(lrm_state, xml_rsc, input->xml, create_rsc); if (rsc) { crm_info("Failing resource %s...", rsc->id); process_lrm_event(lrm_state, op, NULL); op->op_status = PCMK_LRM_OP_DONE; op->rc = PCMK_OCF_OK; lrmd_free_rsc_info(rsc); } else { crm_info("Cannot find/create resource in order to fail it..."); crm_log_xml_warn(input->msg, "bad input"); } send_direct_ack(from_host, from_sys, NULL, op, ID(xml_rsc)); lrmd_free_event(op); return; } else if (input->xml != NULL) { operation = crm_element_value(input->xml, XML_LRM_ATTR_TASK); } if (safe_str_eq(crm_op, CRM_OP_LRM_REFRESH)) { int rc = pcmk_ok; xmlNode *fragment = do_lrm_query_internal(lrm_state, node_update_all); fsa_cib_update(XML_CIB_TAG_STATUS, fragment, cib_quorum_override, rc, user_name); crm_info("Forced a local LRM refresh: call=%d", rc); if(strcmp(CRM_SYSTEM_CRMD, from_sys) != 0) { xmlNode *reply = create_request( CRM_OP_INVOKE_LRM, fragment, from_host, from_sys, CRM_SYSTEM_LRMD, fsa_our_uuid); crm_debug("ACK'ing refresh from %s (%s)", from_sys, from_host); if (relay_message(reply, TRUE) == FALSE) { crm_log_xml_err(reply, "Unable to route reply"); } free_xml(reply); } free_xml(fragment); } else if (safe_str_eq(crm_op, CRM_OP_LRM_QUERY)) { xmlNode *data = do_lrm_query_internal(lrm_state, node_update_all); xmlNode *reply = create_reply(input->msg, data); if (relay_message(reply, TRUE) == FALSE) { crm_err("Unable to route reply"); crm_log_xml_err(reply, "reply"); } free_xml(reply); free_xml(data); } else if (safe_str_eq(operation, CRM_OP_PROBED)) { update_attrd(lrm_state->node_name, CRM_OP_PROBED, XML_BOOLEAN_TRUE, user_name, is_remote_node); } else if (safe_str_eq(operation, CRM_OP_REPROBE) || safe_str_eq(crm_op, CRM_OP_REPROBE)) { crm_notice("Forcing the status of all resources to be redetected"); force_reprobe(lrm_state, from_sys, from_host, user_name, is_remote_node); if(strcmp(CRM_SYSTEM_TENGINE, from_sys) != 0 && strcmp(CRM_SYSTEM_TENGINE, from_sys) != 0) { xmlNode *reply = create_request( CRM_OP_INVOKE_LRM, NULL, from_host, from_sys, CRM_SYSTEM_LRMD, fsa_our_uuid); crm_debug("ACK'ing re-probe from %s (%s)", from_sys, from_host); if (relay_message(reply, TRUE) == FALSE) { crm_log_xml_err(reply, "Unable to route reply"); } free_xml(reply); } } else if (operation != NULL) { lrmd_rsc_info_t *rsc = NULL; xmlNode *params = NULL; xmlNode *xml_rsc = find_xml_node(input->xml, XML_CIB_TAG_RESOURCE, TRUE); CRM_CHECK(xml_rsc != NULL, return); params = find_xml_node(input->xml, XML_TAG_ATTRS, TRUE); if (safe_str_eq(operation, CRMD_ACTION_DELETE)) { create_rsc = FALSE; } if(lrm_state_is_connected(lrm_state) == FALSE) { synthesize_lrmd_failure(lrm_state, input->xml, PCMK_OCF_CONNECTION_DIED); return; } rsc = get_lrm_resource(lrm_state, xml_rsc, input->xml, create_rsc); if (rsc == NULL && create_rsc) { crm_err("Invalid resource definition for %s", ID(xml_rsc)); crm_log_xml_warn(input->msg, "bad input"); /* if the operation couldn't complete because we can't register * the resource, return a generic error */ synthesize_lrmd_failure(lrm_state, input->xml, PCMK_OCF_NOT_CONFIGURED); } else if (rsc == NULL) { crm_notice("Not creating resource for a %s event: %s", operation, ID(input->xml)); delete_rsc_entry(lrm_state, input, ID(xml_rsc), NULL, pcmk_ok, user_name); /* Deleting something that does not exist is a success */ send_task_ok_ack(lrm_state, input, ID(xml_rsc), NULL, operation, from_host, from_sys); } else if (safe_str_eq(operation, CRMD_ACTION_CANCEL)) { char *op_key = NULL; char *meta_key = NULL; int call = 0; const char *call_id = NULL; const char *op_task = NULL; const char *op_interval = NULL; gboolean in_progress = FALSE; CRM_CHECK(params != NULL, crm_log_xml_warn(input->xml, "Bad command"); lrmd_free_rsc_info(rsc); return); meta_key = crm_meta_name(XML_LRM_ATTR_INTERVAL); op_interval = crm_element_value(params, meta_key); free(meta_key); meta_key = crm_meta_name(XML_LRM_ATTR_TASK); op_task = crm_element_value(params, meta_key); free(meta_key); meta_key = crm_meta_name(XML_LRM_ATTR_CALLID); call_id = crm_element_value(params, meta_key); free(meta_key); CRM_CHECK(op_task != NULL, crm_log_xml_warn(input->xml, "Bad command"); lrmd_free_rsc_info(rsc); return); CRM_CHECK(op_interval != NULL, crm_log_xml_warn(input->xml, "Bad command"); lrmd_free_rsc_info(rsc); return); op_key = generate_op_key(rsc->id, op_task, crm_parse_int(op_interval, "0")); crm_debug("PE requested op %s (call=%s) be cancelled", op_key, call_id ? call_id : "NA"); call = crm_parse_int(call_id, "0"); if (call == 0) { /* the normal case when the PE cancels a recurring op */ in_progress = cancel_op_key(lrm_state, rsc, op_key, TRUE); } else { /* the normal case when the PE cancels an orphan op */ in_progress = cancel_op(lrm_state, rsc->id, NULL, call, TRUE); } /* Acknowledge the cancellation operation if it's for a remote connection resource */ if (in_progress == FALSE || is_remote_lrmd_ra(NULL, NULL, rsc->id)) { char *op_id = make_stop_id(rsc->id, call); if (is_remote_lrmd_ra(NULL, NULL, rsc->id) == FALSE) { crm_info("Nothing known about operation %d for %s", call, op_key); } delete_op_entry(lrm_state, NULL, rsc->id, op_key, call); send_task_ok_ack(lrm_state, input, rsc->id, rsc, op_task, from_host, from_sys); /* needed at least for cancellation of a remote operation */ g_hash_table_remove(lrm_state->pending_ops, op_id); free(op_id); } else { /* No ack is needed since abcdaa8, but peers with older versions * in a rolling upgrade need one. We didn't bump the feature set * at that commit, so we can only compare against the previous * CRM version (3.0.8). If any peers have feature set 3.0.9 but * not abcdaa8, they will time out waiting for the ack (no * released versions of Pacemaker are affected). */ const char *peer_version = crm_element_value(params, XML_ATTR_CRM_VERSION); if (compare_version(peer_version, "3.0.8") <= 0) { crm_info("Sending compatibility ack for %s cancellation to %s (CRM version %s)", op_key, from_host, peer_version); send_task_ok_ack(lrm_state, input, rsc->id, rsc, op_task, from_host, from_sys); } } free(op_key); } else if (safe_str_eq(operation, CRMD_ACTION_DELETE)) { gboolean unregister = TRUE; #if ENABLE_ACL int cib_rc = delete_rsc_status(lrm_state, rsc->id, cib_dryrun | cib_sync_call, user_name); if (cib_rc != pcmk_ok) { lrmd_event_data_t *op = NULL; crm_err ("Attempted deletion of resource status '%s' from CIB for %s (user=%s) on %s failed: (rc=%d) %s", rsc->id, from_sys, user_name ? user_name : "unknown", from_host, cib_rc, pcmk_strerror(cib_rc)); op = construct_op(lrm_state, input->xml, rsc->id, operation); op->op_status = PCMK_LRM_OP_ERROR; if (cib_rc == -EACCES) { op->rc = PCMK_OCF_INSUFFICIENT_PRIV; } else { op->rc = PCMK_OCF_UNKNOWN_ERROR; } send_direct_ack(from_host, from_sys, NULL, op, rsc->id); lrmd_free_event(op); lrmd_free_rsc_info(rsc); return; } #endif if (crm_rsc_delete == TRUE && is_remote_lrmd_ra(NULL, NULL, rsc->id)) { unregister = FALSE; } delete_resource(lrm_state, rsc->id, rsc, NULL, from_sys, from_host, user_name, input, unregister); } else { do_lrm_rsc_op(lrm_state, rsc, operation, input->xml, input->msg); } lrmd_free_rsc_info(rsc); } else { crm_err("Operation was neither a lrm_query, nor a rsc op. %s", crm_str(crm_op)); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } static lrmd_event_data_t * construct_op(lrm_state_t * lrm_state, xmlNode * rsc_op, const char *rsc_id, const char *operation) { lrmd_event_data_t *op = NULL; const char *op_delay = NULL; const char *op_timeout = NULL; const char *op_interval = NULL; GHashTable *params = NULL; xmlNode *versioned_params = NULL; const char *transition = NULL; CRM_ASSERT(rsc_id != NULL); op = calloc(1, sizeof(lrmd_event_data_t)); op->type = lrmd_event_exec_complete; op->op_type = strdup(operation); op->op_status = PCMK_LRM_OP_PENDING; op->rc = -1; op->rsc_id = strdup(rsc_id); op->interval = 0; op->timeout = 0; op->start_delay = 0; if (rsc_op == NULL) { CRM_LOG_ASSERT(safe_str_eq(CRMD_ACTION_STOP, operation)); op->user_data = NULL; /* the stop_all_resources() case * by definition there is no DC (or they'd be shutting * us down). * So we should put our version here. */ op->params = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); g_hash_table_insert(op->params, strdup(XML_ATTR_CRM_VERSION), strdup(CRM_FEATURE_SET)); crm_trace("Constructed %s op for %s", operation, rsc_id); return op; } params = xml2list(rsc_op); g_hash_table_remove(params, CRM_META "_op_target_rc"); if (!is_remote_lrmd_ra(NULL, NULL, rsc_id)) { xmlNode *ptr = first_named_child(rsc_op, XML_TAG_VER_ATTRS); if (ptr) { versioned_params = copy_xml(ptr); } } op_delay = crm_meta_value(params, XML_OP_ATTR_START_DELAY); op_timeout = crm_meta_value(params, XML_ATTR_TIMEOUT); op_interval = crm_meta_value(params, XML_LRM_ATTR_INTERVAL); op->interval = crm_parse_int(op_interval, "0"); op->timeout = crm_parse_int(op_timeout, "0"); op->start_delay = crm_parse_int(op_delay, "0"); if (safe_str_neq(operation, RSC_STOP)) { op->params = params; op->versioned_params = versioned_params; } else { rsc_history_t *entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); /* If we do not have stop parameters cached, use * whatever we are given */ if (!entry || !entry->stop_params) { op->params = params; op->versioned_params = versioned_params; } else { /* Copy the cached parameter list so that we stop the resource * with the old attributes, not the new ones */ op->params = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); g_hash_table_foreach(params, copy_meta_keys, op->params); g_hash_table_foreach(entry->stop_params, copy_instance_keys, op->params); g_hash_table_destroy(params); params = NULL; op->versioned_params = NULL; free_xml(versioned_params); } } if (op->versioned_params) { char *versioned_params_text = dump_xml_unformatted(op->versioned_params); if (versioned_params_text) { g_hash_table_insert(op->params, strdup("#" XML_TAG_VER_ATTRS), versioned_params_text); } } /* sanity */ if (op->interval < 0) { op->interval = 0; } if (op->timeout <= 0) { op->timeout = op->interval; } if (op->start_delay < 0) { op->start_delay = 0; } transition = crm_element_value(rsc_op, XML_ATTR_TRANSITION_KEY); CRM_CHECK(transition != NULL, return op); op->user_data = strdup(transition); if (op->interval != 0) { if (safe_str_eq(operation, CRMD_ACTION_START) || safe_str_eq(operation, CRMD_ACTION_STOP)) { crm_err("Start and Stop actions cannot have an interval: %d", op->interval); op->interval = 0; } } crm_trace("Constructed %s op for %s: interval=%d", operation, rsc_id, op->interval); return op; } void send_direct_ack(const char *to_host, const char *to_sys, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op, const char *rsc_id) { xmlNode *reply = NULL; xmlNode *update, *iter; crm_node_t *peer = NULL; CRM_CHECK(op != NULL, return); if (op->rsc_id == NULL) { CRM_ASSERT(rsc_id != NULL); op->rsc_id = strdup(rsc_id); } if (to_sys == NULL) { to_sys = CRM_SYSTEM_TENGINE; } peer = crm_get_peer(0, fsa_our_uname); update = create_node_state_update(peer, node_update_none, NULL, __FUNCTION__); iter = create_xml_node(update, XML_CIB_TAG_LRM); crm_xml_add(iter, XML_ATTR_ID, fsa_our_uuid); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE); crm_xml_add(iter, XML_ATTR_ID, op->rsc_id); build_operation_update(iter, rsc, op, __FUNCTION__); reply = create_request(CRM_OP_INVOKE_LRM, update, to_host, to_sys, CRM_SYSTEM_LRMD, NULL); crm_log_xml_trace(update, "ACK Update"); crm_debug("ACK'ing resource op %s_%s_%d from %s: %s", op->rsc_id, op->op_type, op->interval, op->user_data, crm_element_value(reply, XML_ATTR_REFERENCE)); if (relay_message(reply, TRUE) == FALSE) { crm_log_xml_err(reply, "Unable to route reply"); } free_xml(update); free_xml(reply); } gboolean verify_stopped(enum crmd_fsa_state cur_state, int log_level) { gboolean res = TRUE; GList *lrm_state_list = lrm_state_get_list(); GList *state_entry; for (state_entry = lrm_state_list; state_entry != NULL; state_entry = state_entry->next) { lrm_state_t *lrm_state = state_entry->data; if (!lrm_state_verify_stopped(lrm_state, cur_state, log_level)) { /* keep iterating through all even when false is returned */ res = FALSE; } } set_bit(fsa_input_register, R_SENT_RSC_STOP); g_list_free(lrm_state_list); lrm_state_list = NULL; return res; } struct stop_recurring_action_s { lrmd_rsc_info_t *rsc; lrm_state_t *lrm_state; }; static gboolean stop_recurring_action_by_rsc(gpointer key, gpointer value, gpointer user_data) { gboolean remove = FALSE; struct stop_recurring_action_s *event = user_data; struct recurring_op_s *op = (struct recurring_op_s *)value; if (op->interval != 0 && crm_str_eq(op->rsc_id, event->rsc->id, TRUE)) { crm_debug("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id, (char*)key); remove = !cancel_op(event->lrm_state, event->rsc->id, key, op->call_id, FALSE); } return remove; } static gboolean stop_recurring_actions(gpointer key, gpointer value, gpointer user_data) { gboolean remove = FALSE; lrm_state_t *lrm_state = user_data; struct recurring_op_s *op = (struct recurring_op_s *)value; if (op->interval != 0) { crm_info("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id, key); remove = !cancel_op(lrm_state, op->rsc_id, key, op->call_id, FALSE); } return remove; } static void do_lrm_rsc_op(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *operation, xmlNode * msg, xmlNode * request) { int call_id = 0; char *op_id = NULL; lrmd_event_data_t *op = NULL; lrmd_key_value_t *params = NULL; fsa_data_t *msg_data = NULL; const char *transition = NULL; gboolean stop_recurring = FALSE; bool send_nack = FALSE; CRM_CHECK(rsc != NULL, return); CRM_CHECK(operation != NULL, return); if (msg != NULL) { transition = crm_element_value(msg, XML_ATTR_TRANSITION_KEY); if (transition == NULL) { crm_log_xml_err(msg, "Missing transition number"); } } op = construct_op(lrm_state, msg, rsc->id, operation); CRM_CHECK(op != NULL, return); if (is_remote_lrmd_ra(NULL, NULL, rsc->id) && op->interval == 0 && strcmp(operation, CRMD_ACTION_MIGRATE) == 0) { /* pcmk remote connections are a special use case. * We never ever want to stop monitoring a connection resource until * the entire migration has completed. If the connection is ever unexpected * severed, even during a migration, this is an event we must detect.*/ stop_recurring = FALSE; } else if (op->interval == 0 && strcmp(operation, CRMD_ACTION_STATUS) != 0 && strcmp(operation, CRMD_ACTION_NOTIFY) != 0) { /* stop any previous monitor operations before changing the resource state */ stop_recurring = TRUE; } if (stop_recurring == TRUE) { guint removed = 0; struct stop_recurring_action_s data; data.rsc = rsc; data.lrm_state = lrm_state; removed = g_hash_table_foreach_remove( lrm_state->pending_ops, stop_recurring_action_by_rsc, &data); if (removed) { crm_debug("Stopped %u recurring operations in preparation for %s_%s_%d", removed, rsc->id, operation, op->interval); } } /* now do the op */ crm_info("Performing key=%s op=%s_%s_%d", transition, rsc->id, operation, op->interval); if (is_set(fsa_input_register, R_SHUTDOWN) && safe_str_eq(operation, RSC_START)) { register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL); send_nack = TRUE; } else if (fsa_state != S_NOT_DC && fsa_state != S_POLICY_ENGINE /* Recalculating */ && fsa_state != S_TRANSITION_ENGINE && safe_str_neq(operation, "fail") && safe_str_neq(operation, CRMD_ACTION_STOP)) { send_nack = TRUE; } if(send_nack) { crm_notice("Discarding attempt to perform action %s on %s in state %s (shutdown=%s)", operation, rsc->id, fsa_state2string(fsa_state), is_set(fsa_input_register, R_SHUTDOWN)?"true":"false"); op->rc = CRM_DIRECT_NACK_RC; op->op_status = PCMK_LRM_OP_ERROR; send_direct_ack(NULL, NULL, rsc, op, rsc->id); lrmd_free_event(op); free(op_id); return; } op_id = generate_op_key(rsc->id, op->op_type, op->interval); if (op->interval > 0) { /* cancel it so we can then restart it without conflict */ cancel_op_key(lrm_state, rsc, op_id, FALSE); } if (op->params) { char *key = NULL; char *value = NULL; GHashTableIter iter; g_hash_table_iter_init(&iter, op->params); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) { params = lrmd_key_value_add(params, key, value); } } call_id = lrm_state_exec(lrm_state, rsc->id, op->op_type, op->user_data, op->interval, op->timeout, op->start_delay, params); if (call_id <= 0 && lrm_state_is_local(lrm_state)) { crm_err("Operation %s on %s failed: %d", operation, rsc->id, call_id); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } else if (call_id <= 0) { crm_err("Operation %s on resource %s failed to execute on remote node %s: %d", operation, rsc->id, lrm_state->node_name, call_id); op->call_id = get_fake_call_id(lrm_state, rsc->id); op->op_status = PCMK_LRM_OP_DONE; op->rc = PCMK_OCF_UNKNOWN_ERROR; op->t_run = time(NULL); op->t_rcchange = op->t_run; process_lrm_event(lrm_state, op, NULL); } else { /* record all operations so we can wait * for them to complete during shutdown */ char *call_id_s = make_stop_id(rsc->id, call_id); struct recurring_op_s *pending = NULL; pending = calloc(1, sizeof(struct recurring_op_s)); crm_trace("Recording pending op: %d - %s %s", call_id, op_id, call_id_s); pending->call_id = call_id; pending->interval = op->interval; pending->op_type = strdup(operation); pending->op_key = strdup(op_id); pending->rsc_id = strdup(rsc->id); pending->start_time = time(NULL); pending->user_data = strdup(op->user_data); g_hash_table_replace(lrm_state->pending_ops, call_id_s, pending); if (op->interval > 0 && op->start_delay > START_DELAY_THRESHOLD) { char *uuid = NULL; int dummy = 0, target_rc = 0; crm_info("Faking confirmation of %s: execution postponed for over 5 minutes", op_id); decode_transition_key(op->user_data, &uuid, &dummy, &dummy, &target_rc); free(uuid); op->rc = target_rc; op->op_status = PCMK_LRM_OP_DONE; send_direct_ack(NULL, NULL, rsc, op, rsc->id); } pending->params = op->params; op->params = NULL; } free(op_id); lrmd_free_event(op); return; } int last_resource_update = 0; static void cib_rsc_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { switch (rc) { case pcmk_ok: case -pcmk_err_diff_failed: case -pcmk_err_diff_resync: crm_trace("Resource update %d complete: rc=%d", call_id, rc); break; default: crm_warn("Resource update %d failed: (rc=%d) %s", call_id, rc, pcmk_strerror(rc)); } if (call_id == last_resource_update) { last_resource_update = 0; trigger_fsa(fsa_source); } } static int do_update_resource(const char *node_name, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op) { /* */ int rc = pcmk_ok; xmlNode *update, *iter = NULL; int call_opt = crmd_cib_smart_opt(); const char *uuid = NULL; CRM_CHECK(op != NULL, return 0); iter = create_xml_node(iter, XML_CIB_TAG_STATUS); update = iter; iter = create_xml_node(iter, XML_CIB_TAG_STATE); if (safe_str_eq(node_name, fsa_our_uname)) { uuid = fsa_our_uuid; } else { /* remote nodes uuid and uname are equal */ uuid = node_name; crm_xml_add(iter, XML_NODE_IS_REMOTE, "true"); } CRM_LOG_ASSERT(uuid != NULL); if(uuid == NULL) { rc = -EINVAL; goto done; } crm_xml_add(iter, XML_ATTR_UUID, uuid); crm_xml_add(iter, XML_ATTR_UNAME, node_name); crm_xml_add(iter, XML_ATTR_ORIGIN, __FUNCTION__); iter = create_xml_node(iter, XML_CIB_TAG_LRM); crm_xml_add(iter, XML_ATTR_ID, uuid); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE); crm_xml_add(iter, XML_ATTR_ID, op->rsc_id); build_operation_update(iter, rsc, op, __FUNCTION__); if (rsc) { const char *container = NULL; crm_xml_add(iter, XML_ATTR_TYPE, rsc->type); crm_xml_add(iter, XML_AGENT_ATTR_CLASS, rsc->class); crm_xml_add(iter, XML_AGENT_ATTR_PROVIDER, rsc->provider); if (op->params) { container = g_hash_table_lookup(op->params, CRM_META"_"XML_RSC_ATTR_CONTAINER); } if (container) { crm_trace("Resource %s is a part of container resource %s", op->rsc_id, container); crm_xml_add(iter, XML_RSC_ATTR_CONTAINER, container); } } else { crm_warn("Resource %s no longer exists in the lrmd", op->rsc_id); send_direct_ack(NULL, NULL, rsc, op, op->rsc_id); goto cleanup; } crm_log_xml_trace(update, __FUNCTION__); /* make it an asynchronous call and be done with it * * Best case: * the resource state will be discovered during * the next signup or election. * * Bad case: * we are shutting down and there is no DC at the time, * but then why were we shutting down then anyway? * (probably because of an internal error) * * Worst case: * we get shot for having resources "running" when the really weren't * * the alternative however means blocking here for too long, which * isn't acceptable */ fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, rc, NULL); if (rc > 0) { last_resource_update = rc; } done: /* the return code is a call number, not an error code */ crm_trace("Sent resource state update message: %d for %s=%d on %s", rc, op->op_type, op->interval, op->rsc_id); fsa_register_cib_callback(rc, FALSE, NULL, cib_rsc_callback); cleanup: free_xml(update); return rc; } void do_lrm_event(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t * msg_data) { CRM_CHECK(FALSE, return); } gboolean process_lrm_event(lrm_state_t * lrm_state, lrmd_event_data_t * op, struct recurring_op_s *pending) { char *op_id = NULL; char *op_key = NULL; int update_id = 0; gboolean remove = FALSE; gboolean removed = FALSE; lrmd_rsc_info_t *rsc = NULL; CRM_CHECK(op != NULL, return FALSE); CRM_CHECK(op->rsc_id != NULL, return FALSE); op_id = make_stop_id(op->rsc_id, op->call_id); op_key = generate_op_key(op->rsc_id, op->op_type, op->interval); rsc = lrm_state_get_rsc_info(lrm_state, op->rsc_id, 0); if(pending == NULL) { remove = TRUE; pending = g_hash_table_lookup(lrm_state->pending_ops, op_id); } if (op->op_status == PCMK_LRM_OP_ERROR) { switch(op->rc) { case PCMK_OCF_NOT_RUNNING: case PCMK_OCF_RUNNING_MASTER: case PCMK_OCF_DEGRADED: case PCMK_OCF_DEGRADED_MASTER: /* Leave it up to the TE/PE to decide if this is an error */ op->op_status = PCMK_LRM_OP_DONE; break; default: /* Nothing to do */ break; } } if (op->op_status != PCMK_LRM_OP_CANCELLED) { if (safe_str_eq(op->op_type, RSC_NOTIFY)) { /* Keep notify ops out of the CIB */ send_direct_ack(NULL, NULL, NULL, op, op->rsc_id); } else { update_id = do_update_resource(lrm_state->node_name, rsc, op); } } else if (op->interval == 0) { /* This will occur when "crm resource cleanup" is called while actions are in-flight */ crm_err("Op %s (call=%d): Cancelled", op_key, op->call_id); send_direct_ack(NULL, NULL, NULL, op, op->rsc_id); } else if (pending == NULL) { /* We don't need to do anything for cancelled ops * that are not in our pending op list. There are no * transition actions waiting on these operations. */ } else if (op->user_data == NULL) { /* At this point we have a pending entry, but no transition * key present in the user_data field. report this */ crm_err("Op %s (call=%d): No user data", op_key, op->call_id); } else if (pending->remove) { /* The tengine canceled this op, we have been waiting for the cancel to finish. */ delete_op_entry(lrm_state, op, op->rsc_id, op_key, op->call_id); } else if (pending && op->rsc_deleted) { /* The tengine initiated this op, but it was cancelled outside of the * tengine's control during a resource cleanup/re-probe request. The tengine * must be alerted that this operation completed, otherwise the tengine * will continue waiting for this update to occur until it is timed out. * We don't want this update going to the cib though, so use a direct ack. */ crm_trace("Op %s (call=%d): cancelled due to rsc deletion", op_key, op->call_id); send_direct_ack(NULL, NULL, NULL, op, op->rsc_id); } else { /* Before a stop is called, no need to direct ack */ crm_trace("Op %s (call=%d): no delete event required", op_key, op->call_id); } if(remove == FALSE) { /* The caller will do this afterwards, but keep the logging consistent */ removed = TRUE; } else if ((op->interval == 0) && g_hash_table_remove(lrm_state->pending_ops, op_id)) { removed = TRUE; crm_trace("Op %s (call=%d, stop-id=%s, remaining=%u): Confirmed", op_key, op->call_id, op_id, g_hash_table_size(lrm_state->pending_ops)); } else if(op->interval != 0 && op->op_status == PCMK_LRM_OP_CANCELLED) { removed = TRUE; g_hash_table_remove(lrm_state->pending_ops, op_id); } switch (op->op_status) { case PCMK_LRM_OP_CANCELLED: crm_info("Result of %s operation for %s on %s: %s " CRM_XS " call=%d key=%s confirmed=%s", crm_action_str(op->op_type, op->interval), op->rsc_id, lrm_state->node_name, services_lrm_status_str(op->op_status), op->call_id, op_key, (removed? "true" : "false")); break; case PCMK_LRM_OP_DONE: do_crm_log(op->interval?LOG_INFO:LOG_NOTICE, "Result of %s operation for %s on %s: %d (%s) " CRM_XS " call=%d key=%s confirmed=%s cib-update=%d", crm_action_str(op->op_type, op->interval), op->rsc_id, lrm_state->node_name, op->rc, services_ocf_exitcode_str(op->rc), op->call_id, op_key, (removed? "true" : "false"), update_id); break; case PCMK_LRM_OP_TIMEOUT: crm_err("Result of %s operation for %s on %s: %s " CRM_XS " call=%d key=%s timeout=%dms", crm_action_str(op->op_type, op->interval), op->rsc_id, lrm_state->node_name, services_lrm_status_str(op->op_status), op->call_id, op_key, op->timeout); break; default: crm_err("Result of %s operation for %s on %s: %s " CRM_XS " call=%d key=%s confirmed=%s status=%d cib-update=%d", crm_action_str(op->op_type, op->interval), op->rsc_id, lrm_state->node_name, services_lrm_status_str(op->op_status), op->call_id, op_key, (removed? "true" : "false"), op->op_status, update_id); } if (op->output) { char *prefix = crm_strdup_printf("%s-%s_%s_%d:%d", lrm_state->node_name, op->rsc_id, op->op_type, op->interval, op->call_id); if (op->rc) { crm_log_output(LOG_NOTICE, prefix, op->output); } else { crm_log_output(LOG_DEBUG, prefix, op->output); } free(prefix); } crmd_notify_resource_op(lrm_state->node_name, op); if (op->rsc_deleted) { crm_info("Deletion of resource '%s' complete after %s", op->rsc_id, op_key); delete_rsc_entry(lrm_state, NULL, op->rsc_id, NULL, pcmk_ok, NULL); } /* If a shutdown was escalated while operations were pending, * then the FSA will be stalled right now... allow it to continue */ mainloop_set_trigger(fsa_source); update_history_cache(lrm_state, rsc, op); lrmd_free_rsc_info(rsc); free(op_key); free(op_id); return TRUE; } diff --git a/doc/Pacemaker_Explained/en-US/Ch-Resources.txt b/doc/Pacemaker_Explained/en-US/Ch-Resources.txt index 2d988f09bf..e0d906b04e 100644 --- a/doc/Pacemaker_Explained/en-US/Ch-Resources.txt +++ b/doc/Pacemaker_Explained/en-US/Ch-Resources.txt @@ -1,846 +1,846 @@ = Cluster Resources = == What is a Cluster Resource? == indexterm:[Resource] A resource is a service made highly available by a cluster. The simplest type of resource, a 'primitive' resource, is described in this chapter. More complex forms, such as groups and clones, are described in later chapters. Every primitive resource has a 'resource agent'. A resource agent is an external program that abstracts the service it provides and present a consistent view to the cluster. This allows the cluster to be agnostic about the resources it manages. The cluster doesn't need to understand how the resource works because it relies on the resource agent to do the right thing when given a `start`, `stop` or `monitor` command. For this reason, it is crucial that resource agents are well-tested. Typically, resource agents come in the form of shell scripts. However, they can be written using any technology (such as C, Python or Perl) that the author is comfortable with. [[s-resource-supported]] == Resource Classes == indexterm:[Resource,class] Pacemaker supports several classes of agents: * OCF * LSB * Upstart * Systemd * Service * Fencing * Nagios Plugins === Open Cluster Framework === indexterm:[Resource,OCF] indexterm:[OCF,Resources] indexterm:[Open Cluster Framework,Resources] The OCF standard footnote:[See http://www.opencf.org/cgi-bin/viewcvs.cgi/specs/ra/resource-agent-api.txt?rev=HEAD -- at least as it relates to resource agents. The Pacemaker implementation has been somewhat extended from the OCF specs, but none of those changes are incompatible with the original OCF specification.] is basically an extension of the Linux Standard Base conventions for init scripts to: * support parameters, * make them self-describing, and * make them extensible OCF specs have strict definitions of the exit codes that actions must return. footnote:[ The resource-agents source code includes the `ocf-tester` script, which can be useful in this regard. ] The cluster follows these specifications exactly, and giving the wrong exit code will cause the cluster to behave in ways you will likely find puzzling and annoying. In particular, the cluster needs to distinguish a completely stopped resource from one which is in some erroneous and indeterminate state. Parameters are passed to the resource agent as environment variables, with the special prefix +OCF_RESKEY_+. So, a parameter which the user thinks of as +ip+ will be passed to the resource agent as +OCF_RESKEY_ip+. The number and purpose of the parameters is left to the resource agent; however, the resource agent should use the `meta-data` command to advertise any that it supports. The OCF class is the most preferred as it is an industry standard, highly flexible (allowing parameters to be passed to agents in a non-positional manner) and self-describing. For more information, see the http://www.linux-ha.org/wiki/OCF_Resource_Agents[reference] and <>. === Linux Standard Base === indexterm:[Resource,LSB] indexterm:[LSB,Resources] indexterm:[Linux Standard Base,Resources] LSB resource agents are those found in +/etc/init.d+. Generally, they are provided by the OS distribution and, in order to be used with the cluster, they must conform to the LSB Spec. footnote:[ See http://refspecs.linux-foundation.org/LSB_3.0.0/LSB-Core-generic/LSB-Core-generic/iniscrptact.html for the LSB Spec as it relates to init scripts. ] [WARNING] ==== Many distributions claim LSB compliance but ship with broken init scripts. For details on how to check whether your init script is LSB-compatible, see <>. Common problematic violations of the LSB standard include: * Not implementing the status operation at all * Not observing the correct exit status codes for `start/stop/status` actions * Starting a started resource returns an error * Stopping a stopped resource returns an error ==== [IMPORTANT] ==== Remember to make sure the computer is _not_ configured to start any services at boot time -- that should be controlled by the cluster. ==== === Systemd === indexterm:[Resource,Systemd] indexterm:[Systemd,Resources] Some newer distributions have replaced the old http://en.wikipedia.org/wiki/Init#SysV-style["SysV"] style of initialization daemons and scripts with an alternative called http://www.freedesktop.org/wiki/Software/systemd[Systemd]. Pacemaker is able to manage these services _if they are present_. Instead of init scripts, systemd has 'unit files'. Generally, the services (unit files) are provided by the OS distribution, but there are online guides for converting from init scripts. footnote:[For example, http://0pointer.de/blog/projects/systemd-for-admins-3.html] [IMPORTANT] ==== Remember to make sure the computer is _not_ configured to start any services at boot time -- that should be controlled by the cluster. ==== === Upstart === indexterm:[Resource,Upstart] indexterm:[Upstart,Resources] Some newer distributions have replaced the old http://en.wikipedia.org/wiki/Init#SysV-style["SysV"] style of initialization daemons (and scripts) with an alternative called http://upstart.ubuntu.com/[Upstart]. Pacemaker is able to manage these services _if they are present_. Instead of init scripts, upstart has 'jobs'. Generally, the services (jobs) are provided by the OS distribution. [IMPORTANT] ==== Remember to make sure the computer is _not_ configured to start any services at boot time -- that should be controlled by the cluster. ==== === System Services === indexterm:[Resource,System Services] indexterm:[System Service,Resources] Since there are various types of system services (+systemd+, +upstart+, and +lsb+), Pacemaker supports a special +service+ alias which intelligently figures out which one applies to a given cluster node. This is particularly useful when the cluster contains a mix of +systemd+, +upstart+, and +lsb+. In order, Pacemaker will try to find the named service as: . an LSB init script . a Systemd unit file . an Upstart job === STONITH === indexterm:[Resource,STONITH] indexterm:[STONITH,Resources] The STONITH class is used exclusively for fencing-related resources. This is discussed later in <>. === Nagios Plugins === indexterm:[Resource,Nagios Plugins] indexterm:[Nagios Plugins,Resources] Nagios Plugins footnote:[The project has two independent forks, hosted at https://www.nagios-plugins.org/ and https://www.monitoring-plugins.org/. Output from both projects' plugins is similar, so plugins from either project can be used with pacemaker.] allow us to monitor services on remote hosts. Pacemaker is able to do remote monitoring with the plugins _if they are present_. A common use case is to configure them as resources belonging to a resource container (usually a virtual machine), and the container will be restarted if any of them has failed. Another use is to configure them as ordinary resources to be used for monitoring hosts or services via the network. The supported parameters are same as the long options of the plugin. [[primitive-resource]] == Resource Properties == These values tell the cluster which resource agent to use for the resource, where to find that resource agent and what standards it conforms to. .Properties of a Primitive Resource [width="95%",cols="1m,6<",options="header",align="center"] |========================================================= |Field |Description |id |Your name for the resource indexterm:[id,Resource] indexterm:[Resource,Property,id] |class |The standard the resource agent conforms to. Allowed values: +lsb+, +nagios+, +ocf+, +service+, +stonith+, +systemd+, +upstart+ indexterm:[class,Resource] indexterm:[Resource,Property,class] |type |The name of the Resource Agent you wish to use. E.g. +IPaddr+ or +Filesystem+ indexterm:[type,Resource] indexterm:[Resource,Property,type] |provider |The OCF spec allows multiple vendors to supply the same resource agent. To use the OCF resource agents supplied by the Heartbeat project, you would specify +heartbeat+ here. indexterm:[provider,Resource] indexterm:[Resource,Property,provider] |========================================================= The XML definition of a resource can be queried with the `crm_resource` tool. For example: ---- # crm_resource --resource Email --query-xml ---- might produce: .A system resource definition ===== [source,XML] ===== [NOTE] ===== One of the main drawbacks to system services (LSB, systemd or Upstart) resources is that they do not allow any parameters! ===== //// See https://tools.ietf.org/html/rfc5737 for choice of example IP address //// .An OCF resource definition ===== [source,XML] ------- ------- ===== [[s-resource-options]] == Resource Options == Resources have two types of options: 'meta-attributes' and 'instance attributes'. Meta-attributes apply to any type of resource, while instance attributes are specific to each resource agent. === Resource Meta-Attributes === Meta-attributes are used by the cluster to decide how a resource should behave and can be easily set using the `--meta` option of the `crm_resource` command. .Meta-attributes of a Primitive Resource [width="95%",cols="2m,2,5> resources, promoted to master if appropriate) * +Slave:+ Allow the resource to be started, but only in Slave mode if the resource is <> * +Master:+ Equivalent to +Started+ indexterm:[target-role,Resource Option] indexterm:[Resource,Option,target-role] |is-managed |TRUE |Is the cluster allowed to start and stop the resource? Allowed values: +true+, +false+ indexterm:[is-managed,Resource Option] indexterm:[Resource,Option,is-managed] |resource-stickiness |value of +resource-stickiness+ in the +rsc_defaults+ section |How much does the resource prefer to stay where it is? indexterm:[resource-stickiness,Resource Option] indexterm:[Resource,Option,resource-stickiness] |requires |fencing (unless +stonith-enabled+ is +false+ or +class+ is +stonith+, in which case it defaults to quorum) |Conditions under which the resource can be started '(since 1.1.8)' Allowed values: * +nothing:+ can always be started * +quorum:+ The cluster can only start this resource if a majority of the configured nodes are active * +fencing:+ The cluster can only start this resource if a majority of the configured nodes are active _and_ any failed or unknown nodes have been powered off * +unfencing:+ The cluster can only start this resource if a majority of the configured nodes are active _and_ any failed or unknown nodes have been powered off _and_ only on nodes that have been 'unfenced' '(since 1.1.9)' indexterm:[requires,Resource Option] indexterm:[Resource,Option,requires] |migration-threshold |INFINITY |How many failures may occur for this resource on a node, before this node is marked ineligible to host this resource. A value of 0 indicates that this feature is disabled (the node will never be marked ineligible); by constrast, the cluster treats INFINITY (the default) as a very large but finite number. This option has an effect only if the failed operation has on-fail=restart (the default), and additionally for failed start operations, if the cluster property start-failure-is-fatal is false. indexterm:[migration-threshold,Resource Option] indexterm:[Resource,Option,migration-threshold] |failure-timeout |0 |How many seconds to wait before acting as if the failure had not occurred, and potentially allowing the resource back to the node on which it failed. A value of 0 indicates that this feature is disabled. As with any time-based actions, this is not guaranteed to be checked more frequently than the value of +cluster-recheck-interval+ (see <>). indexterm:[failure-timeout,Resource Option] indexterm:[Resource,Option,failure-timeout] |multiple-active |stop_start |What should the cluster do if it ever finds the resource active on more than one node? Allowed values: * +block:+ mark the resource as unmanaged * +stop_only:+ stop all active instances and leave them that way * +stop_start:+ stop all active instances and start the resource in one location only indexterm:[multiple-active,Resource Option] indexterm:[Resource,Option,multiple-active] |remote-node | |The name of the remote-node this resource defines. This both enables the resource as a remote-node and defines the unique name used to identify the remote-node. If no other parameters are set, this value will also be assumed as the hostname to connect to at the port specified by +remote-port+. +WARNING:+ This value cannot overlap with any resource or node IDs. If not specified, this feature is disabled. |remote-port |3121 |Port to use for the guest connection to pacemaker_remote |remote-addr |value of +remote-node+ |The IP address or hostname to connect to if remote-node's name is not the hostname of the guest. |+remote-connect-timeout+ |60s |How long before a pending guest connection will time out. |========================================================= [NOTE] ==== Support for remote nodes was added in pacemaker 1.1.10. If you are using an earlier version, options related to remote nodes will not be available. ==== As an example of setting resource options, if you performed the following commands on an LSB Email resource: ------- # crm_resource --meta --resource Email --set-parameter priority --parameter-value 100 # crm_resource -m -r Email -p multiple-active -v block ------- the resulting resource definition might be: .An LSB resource with cluster options ===== [source,XML] ------- ------- ===== [[s-resource-defaults]] === Setting Global Defaults for Resource Meta-Attributes === To set a default value for a resource option, add it to the +rsc_defaults+ section with `crm_attribute`. For example, ---- # crm_attribute --type rsc_defaults --name is-managed --update false ---- would prevent the cluster from starting or stopping any of the resources in the configuration (unless of course the individual resources were specifically enabled by having their +is-managed+ set to +true+). === Resource Instance Attributes === The resource agents of some resource classes (lsb, systemd and upstart 'not' among them) can be given parameters which determine how they behave and which instance of a service they control. If your resource agent supports parameters, you can add them with the `crm_resource` command. For example, ---- # crm_resource --resource Public-IP --set-parameter ip --parameter-value 192.0.2.2 ---- would create an entry in the resource like this: .An example OCF resource with instance attributes ===== [source,XML] ------- ------- ===== For an OCF resource, the result would be an environment variable called +OCF_RESKEY_ip+ with a value of +192.0.2.2+. The list of instance attributes supported by an OCF resource agent can be found by calling the resource agent with the `meta-data` command. The output contains an XML description of all the supported attributes, their purpose and default values. .Displaying the metadata for the Dummy resource agent template ===== ---- # export OCF_ROOT=/usr/lib/ocf # $OCF_ROOT/resource.d/pacemaker/Dummy meta-data ---- [source,XML] ------- 1.0 This is a Dummy Resource Agent. It does absolutely nothing except keep track of whether its running or not. Its purpose in life is for testing and to serve as a template for RA writers. NB: Please pay attention to the timeouts specified in the actions section below. They should be meaningful for the kind of resource the agent manages. They should be the minimum advised timeouts, but they shouldn't/cannot cover _all_ possible resource instances. So, try to be neither overly generous nor too stingy, but moderate. The minimum timeouts should never be below 10 seconds. Example stateless resource agent Location to store the resource state in. State file Fake attribute that can be changed to cause a reload Fake attribute that can be changed to cause a reload Number of seconds to sleep during operations. This can be used to test how the cluster reacts to operation timeouts. Operation sleep duration in seconds. ------- ===== == Resource Operations == indexterm:[Resource,Action] 'Operations' are actions the cluster can perform on a resource by calling the resource agent. Resource agents must support certain common operations such as start, stop and monitor, and may implement any others. Some operations are generated by the cluster itself, for example, stopping and starting resources as needed. You can configure operations in the cluster configuration. As an example, by default the cluster will 'not' ensure your resources stay healthy once they are started. footnote:[Currently, anyway. Automatic monitoring operations may be added in a future version of Pacemaker.] To instruct the cluster to do this, you need to add a +monitor+ operation to the resource's definition. .An OCF resource with a recurring health check ===== [source,XML] ------- ------- ===== .Properties of an Operation [width="95%",cols="2m,3,6>. indexterm:[interval,Action Property] indexterm:[Action,Property,interval] |timeout | |How long to wait before declaring the action has failed indexterm:[timeout,Action Property] indexterm:[Action,Property,timeout] |on-fail |restart '(except for stop operations, which default to' fence 'when STONITH is enabled and' block 'otherwise)' |The action to take if this action ever fails. Allowed values: * +ignore:+ Pretend the resource did not fail. * +block:+ Don't perform any further operations on the resource. * +stop:+ Stop the resource and do not start it elsewhere. * +restart:+ Stop the resource and start it again (possibly on a different node). * +fence:+ STONITH the node on which the resource failed. * +standby:+ Move _all_ resources away from the node on which the resource failed. indexterm:[on-fail,Action Property] indexterm:[Action,Property,on-fail] |enabled |TRUE |If +false+, ignore this operation definition. This is typically used to pause a particular recurring monitor operation; for instance, it can complement the respective resource being unmanaged (+is-managed=false+), as this alone will <>. Disabling the operation does not suppress all actions of the given type. Allowed values: +true+, +false+. indexterm:[enabled,Action Property] indexterm:[Action,Property,enabled] |record-pending | |If +true+, the intention to perform the operation is recorded so that GUIs and CLI tools can indicate that an operation is in progress. This is best set as an 'operation default' (see next section). Allowed values: +true+, +false+. indexterm:[enabled,Action Property] indexterm:[Action,Property,enabled] |role | |Run the operation only on node(s) that the cluster thinks should be in the specified role. This only makes sense for recurring monitor operations. Allowed (case-sensitive) values: +Stopped+, +Started+, and in the case of <> resources, +Slave+ and +Master+. indexterm:[role,Action Property] indexterm:[Action,Property,role] |========================================================= [[s-resource-monitoring]] === Monitoring Resources for Failure === When Pacemaker first starts a resource, it runs one-time monitor operations (referred to as 'probes') to ensure the resource is running where it's supposed to be, and not running where it's not supposed to be. (This behavior can be affected by the +resource-discovery+ location constraint property.) Other than those initial probes, Pacemaker will not (by default) check that the resource continues to stay healthy. As in the example above, you must configure monitor operations explicitly to perform these checks. By default, a monitor operation will ensure that the resource is running where it is supposed to. The +target-role+ property can be used for further checking. For example, if a resource has one monitor operation with +interval=10 role=Started+ and a second monitor operation with +interval=11 role=Stopped+, the cluster will run the first monitor on any nodes it thinks 'should' be running the resource, and the second monitor on any nodes that it thinks 'should not' be running the resource (for the truly paranoid, who want to know when an administrator manually starts a service by mistake). [[s-monitoring-unmanaged]] === Monitoring Resources When Administration is Disabled === Recurring monitor operations behave differently under various administrative settings: * When a resource is unmanaged (by setting +is-managed=false+): No monitors will be stopped. + If the unmanaged resource is stopped on a node where the cluster thinks it should be running, the cluster will detect and report that it is not, but it will not consider the monitor failed, and will not try to start the resource until it is managed again. + Starting the unmanaged resource on a different node is strongly discouraged and will at least cause the cluster to consider the resource failed, and may require the resource's +target-role+ to be set to +Stopped+ then +Started+ to be recovered. * When a node is put into standby: All resources will be moved away from the node, and all monitor operations will be stopped on the node, except those with +role=Stopped+. Monitor operations with +role=Stopped+ will be started on the node if appropriate. * When the cluster is put into maintenance mode: All resources will be marked as unmanaged. All monitor operations will be stopped, except those with +role=Stopped+. As with single unmanaged resources, starting a resource on a node other than where the cluster expects it to be will cause problems. [[s-operation-defaults]] === Setting Global Defaults for Operations === You can change the global default values for operation properties in a given cluster. These are defined in an +op_defaults+ section of the CIB's +configuration+ section, and can be set with `crm_attribute`. For example, ---- # crm_attribute --type op_defaults --name timeout --update 20s ---- would default each operation's +timeout+ to 20 seconds. If an operation's definition also includes a value for +timeout+, then that value would be used for that operation instead. === When Implicit Operations Take a Long Time === The cluster will always perform a number of implicit operations: +start+, +stop+ and a non-recurring +monitor+ operation used at startup to check whether the resource is already active. If one of these is taking too long, then you can create an entry for them and specify a longer timeout. .An OCF resource with custom timeouts for its implicit actions ===== [source,XML] ------- ------- ===== === Multiple Monitor Operations === Provided no two operations (for a single resource) have the same name and interval, you can have as many monitor operations as you like. In this way, you can do a superficial health check every minute and progressively more intense ones at higher intervals. To tell the resource agent what kind of check to perform, you need to provide each monitor with a different value for a common parameter. The OCF standard creates a special parameter called +OCF_CHECK_LEVEL+ for this purpose and dictates that it is "made available to the resource agent without the normal +OCF_RESKEY+ prefix". Whatever name you choose, you can specify it by adding an +instance_attributes+ block to the +op+ tag. It is up to each resource agent to look for the parameter and decide how to use it. .An OCF resource with two recurring health checks, performing different levels of checks specified via +OCF_CHECK_LEVEL+. ===== [source,XML] ------- - + ------- ===== === Disabling a Monitor Operation === The easiest way to stop a recurring monitor is to just delete it. However, there can be times when you only want to disable it temporarily. In such cases, simply add +enabled="false"+ to the operation's definition. .Example of an OCF resource with a disabled health check ===== [source,XML] ------- ------- ===== This can be achieved from the command line by executing: ---- # cibadmin --modify --xml-text '' ---- Once you've done whatever you needed to do, you can then re-enable it with ---- # cibadmin --modify --xml-text '' ---- diff --git a/doc/Pacemaker_Explained/en-US/Ch-Rules.txt b/doc/Pacemaker_Explained/en-US/Ch-Rules.txt index 986a6aac8b..77c98885a2 100644 --- a/doc/Pacemaker_Explained/en-US/Ch-Rules.txt +++ b/doc/Pacemaker_Explained/en-US/Ch-Rules.txt @@ -1,600 +1,600 @@ = Rules = //// We prefer [[ch-rules]], but older versions of asciidoc don't deal well with that construct for chapter headings //// anchor:ch-rules[Chapter 8, Rules] indexterm:[Resource,Constraint,Rule] Rules can be used to make your configuration more dynamic. One common example is to set one value for +resource-stickiness+ during working hours, to prevent resources from being moved back to their most preferred location, and another on weekends when no-one is around to notice an outage. Another use of rules might be to assign machines to different processing groups (using a node attribute) based on time and to then use that attribute when creating location constraints. Each rule can contain a number of expressions, date-expressions and even other rules. The results of the expressions are combined based on the rule's +boolean-op+ field to determine if the rule ultimately evaluates to +true+ or +false+. What happens next depends on the context in which the rule is being used. == Rule Properties == .Properties of a Rule [width="95%",cols="2m,1,5<",options="header",align="center"] |========================================================= |Field |Default |Description |role |+Started+ |Limits the rule to apply only when the resource is in the specified role. Allowed values are +Started+, +Slave+, and +Master+. A rule with +role="Master"+ cannot determine the initial location of a clone instance and will only affect which of the active instances will be promoted. indexterm:[role,Constraint Rule] indexterm:[Constraint,Rule,role] |score | |The score to apply if the rule evaluates to +true+. Limited to use in rules that are part of location constraints. indexterm:[score,Constraint Rule] indexterm:[Constraint,Rule,score] |score-attribute | |The node attribute to look up and use as a score if the rule evaluates to +true+. Limited to use in rules that are part of location constraints. indexterm:[score-attribute,Constraint Rule] indexterm:[Constraint,Rule,score-attribute] |boolean-op |+and+ |How to combine the result of multiple expression objects. Allowed values are +and+ and +or+. indexterm:[boolean-op,Constraint Rule] indexterm:[Constraint,Rule,boolean-op] |========================================================= == Node Attribute Expressions == indexterm:[Resource,Constraint,Attribute Expression] Expression objects are used to control a resource based on the attributes defined by a node or nodes. .Properties of an Expression [width="95%",cols="1m,1,5 ---- ==== .Equivalent expression ==== [source,XML] ---- ---- ==== .9am-5pm Monday-Friday ==== [source,XML] ------- ------- ==== Please note that the +16+ matches up to +16:59:59+, as the numeric value (hour) still matches! .9am-6pm Monday through Friday or anytime Saturday ==== [source,XML] ------- ------- ==== .9am-5pm or 9pm-12am Monday through Friday ==== [source,XML] ------- ------- ==== .Mondays in March 2005 ==== [source,XML] ------- ------- ==== [NOTE] ====== Because no time is specified with the above dates, 00:00:00 is implied. This means that the range includes all of 2005-03-01 but none of 2005-04-01. You may wish to write +end="2005-03-31T23:59:59"+ to avoid confusion. ====== .A full moon on Friday the 13th ===== [source,XML] ------- ------- ===== == Using Rules to Determine Resource Location == indexterm:[Rule,Determine Resource Location] indexterm:[Resource,Location,Determine by Rules] A location constraint may contain rules. When the constraint's outermost rule evaluates to +false+, the cluster treats the constraint as if it were not there. When the rule evaluates to +true+, the node's preference for running the resource is updated with the score associated with the rule. If this sounds familiar, it is because you have been using a simplified syntax for location constraint rules already. Consider the following location constraint: .Prevent myApacheRsc from running on c001n03 ===== [source,XML] ------- ------- ===== This constraint can be more verbosely written as: .Prevent myApacheRsc from running on c001n03 - expanded version ===== [source,XML] ------- ------- ===== The advantage of using the expanded form is that one can then add extra clauses to the rule, such as limiting the rule such that it only applies during certain times of the day or days of the week. === Location Rules Based on Other Node Properties === The expanded form allows us to match on node properties other than its name. If we rated each machine's CPU power such that the cluster had the following nodes section: .A sample nodes section for use with score-attribute ===== [source,XML] ------- ------- ===== then we could prevent resources from running on underpowered machines with this rule: [source,XML] ------- ------- === Using +score-attribute+ Instead of +score+ === When using +score-attribute+ instead of +score+, each node matched by the rule has its score adjusted differently, according to its value for the named node attribute. Thus, in the previous example, if a rule used +score-attribute="cpu_mips"+, +c001n01+ would have its preference to run the resource increased by +1234+ whereas +c001n02+ would have its preference increased by +5678+. == Using Rules to Control Resource Options == Often some cluster nodes will be different from their peers. Sometimes, these differences -- e.g. the location of a binary or the names of network interfaces -- require resources to be configured differently depending on the machine they're hosted on. By defining multiple +instance_attributes+ objects for the resource and adding a rule to each, we can easily handle these special cases. In the example below, +mySpecialRsc+ will use eth1 and port 9999 when run on +node1+, eth2 and port 8888 on +node2+ and default to eth0 and port 9999 for all other nodes. .Defining different resource options based on the node name ===== [source,XML] ------- ------- ===== The order in which +instance_attributes+ objects are evaluated is determined by their score (highest to lowest). If not supplied, score defaults to zero, and objects with an equal score are processed in listed order. If the +instance_attributes+ object has no rule or a +rule+ that evaluates to +true+, then for any parameter the resource does not yet have a value for, the resource will use the parameter values defined by the +instance_attributes+. For example, given the configuration above, if the resource is placed on node1: . +special-node1+ has the highest score (3) and so is evaluated first; its rule evaluates to +true+, so +interface+ is set to +eth1+. . +special-node2+ is evaluated next with score 2, but its rule evaluates to +false+, so it is ignored. . +defaults+ is evaluated last with score 1, and has no rule, so its values are examined; +interface+ is already defined, so the value here is not used, but +port+ is not yet defined, so +port+ is set to +9999+. == Using Rules to Control Cluster Options == indexterm:[Rule,Controlling Cluster Options] indexterm:[Cluster,Setting Options with Rules] Controlling cluster options is achieved in much the same manner as specifying different resource options on different nodes. The difference is that because they are cluster options, one cannot (or should not, because they won't work) use attribute-based expressions. The following example illustrates how to set a different +resource-stickiness+ value during and outside work hours. This allows resources to automatically move back to their most preferred hosts, but at a time that (in theory) does not interfere with business activities. .Change +resource-stickiness+ during working hours ===== [source,XML] ------- ------- ===== [[s-rules-recheck]] == Ensuring Time-Based Rules Take Effect == A Pacemaker cluster is an event-driven system. As such, it won't recalculate the best place for resources to run unless something (like a resource failure or configuration change) happens. This can mean that a location constraint that only allows resource X to run between 9am and 5pm is not enforced. If you rely on time-based rules, the +cluster-recheck-interval+ cluster option (which defaults to 15 minutes) is essential. This tells the cluster to periodically recalculate the ideal state of the cluster. For example, if you set +cluster-recheck-interval="5m"+, then sometime between 09:00 and 09:05 the cluster would notice that it needs to start resource X, and between 17:00 and 17:05 it would realize that X needed to be stopped. The timing of the actual start and stop actions depends on what other actions the cluster may need to perform first. diff --git a/extra/resources/SysInfo b/extra/resources/SysInfo index 47bbe3749b..7d1c0a3054 100644 --- a/extra/resources/SysInfo +++ b/extra/resources/SysInfo @@ -1,386 +1,386 @@ #!/bin/bash # # # SysInfo OCF Resource Agent # It records (in the CIB) various attributes of a node # # Copyright (c) 2004 SUSE LINUX AG, Lars Marowsky-Brée # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS=${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs} . ${OCF_FUNCTIONS} : ${__OCF_ACTION=$1} ####################################################################### meta_data() { cat < 1.0 This is a SysInfo Resource Agent. It records (in the CIB) various attributes of a node Sample Linux output: arch: i686 os: Linux-2.4.26-gentoo-r14 free_swap: 1999 cpu_info: Intel(R) Celeron(R) CPU 2.40GHz cpu_speed: 4771.02 cpu_cores: 1 cpu_load: 0.00 ram_total: 513 ram_free: 117 root_free: 2.4 #health_disk: red Sample Darwin output: arch: i386 os: Darwin-8.6.2 cpu_info: Intel Core Duo cpu_speed: 2.16 cpu_cores: 2 cpu_load: 0.18 ram_total: 2016 ram_free: 787 root_free: 13 #health_disk: green Units: free_swap: MB ram_*: MB cpu_speed (Linux): bogomips cpu_speed (Darwin): GHz *_free: GB (or user-defined: disk_unit) SysInfo resource agent PID file PID file Interval to allow values to stabilize Dampening Delay Filesystems or Paths to be queried for free disk space as a SPACE separated list - e.g "/dev/sda1 /tmp". Results will be written to an attribute with leading slashes removed, and other slashes replaced with underscore, and the word 'free' appended - e.g for /dev/sda1 it would be 'dev_sda1_free'. Note: The root filesystem '/' is always queried to an attribute named 'root_free' List of Filesytems/Paths to query for free disk space Unit to report disk free space in. Can be one of: B, K, M, G, T, P (case-insensitive) Unit to report disk free space in The amount of free space required in monitored disks. If any of the monitored disks has less than this amount of free space, , with the node attribute "#health_disk" changing to "red", all resources will move away from the node. Set the node-health-strategy property appropriately for this to take effect. If the unit is not specified, it defaults to disk_unit. minimum disk free space required END } ####################################################################### UpdateStat() { name=$1; shift value="$*" printf "%s:\t%s\n" "$name" "$value" if [ "$__OCF_ACTION" = "start" ] ; then ${HA_SBIN_DIR}/attrd_updater ${OCF_RESKEY_delay} -S status -n $name -B "$value" else ${HA_SBIN_DIR}/attrd_updater ${OCF_RESKEY_delay} -S status -n $name -v "$value" fi } SysInfoStats() { UpdateStat arch "`uname -m`" UpdateStat os "`uname -s`-`uname -r`" case `uname -s` in "Darwin") mem=`top -l 1 | grep Mem: | awk '{print $10}'` mem_used=`top -l 1 | grep Mem: | awk '{print $8}'` mem=`SysInfo_mem_units $mem` mem_used=`SysInfo_mem_units $mem_used` mem_total=`expr $mem_used + $mem` cpu_type=`system_profiler SPHardwareDataType | awk -F': ' '/^CPU Type/ {print $2; exit}'` cpu_speed=`system_profiler SPHardwareDataType | awk -F': ' '/^CPU Speed/ {print $2; exit}'` cpu_cores=`system_profiler SPHardwareDataType | awk -F': ' '/^Number Of/ {print $2; exit}'` cpu_load=`uptime | awk '{ print $10 }'` ;; "FreeBSD") cpu_type=`sysctl -in hw.model` cpu_speed=`sysctl -in dev.cpu.0.freq` cpu_cores=`sysctl -in hw.ncpu` cpu_load=`sysctl -in vm.loadavg | awk '{ print $4 }'` free_pages=`sysctl -in vm.stats.vm.v_free_count` page_count=`sysctl -in vm.stats.vm.v_page_count` page_size=`sysctl -in vm.stats.vm.v_page_size` mem=`expr $free_pages \* $page_size / 1024 / 1024`M mem_total=`expr $page_count \* $page_size / 1024 / 1024`M ;; "Linux") if [ -f /proc/cpuinfo ]; then cpu_type=`awk -F': ' '/model name/ {print $2; exit}' /proc/cpuinfo` cpu_speed=`awk -F': ' '/bogomips/ {print $2; exit}' /proc/cpuinfo` cpu_cores=`grep "^processor" /proc/cpuinfo | wc -l` fi cpu_load=`uptime | awk '{ print $10 }'` if [ -f /proc/meminfo ]; then # meminfo results are in kB mem=`grep "SwapFree" /proc/meminfo | awk '{print $2"k"}'` if [ ! -z $mem ]; then UpdateStat free_swap `SysInfo_mem_units $mem` fi mem=`grep "Inactive" /proc/meminfo | awk '{print $2"k"}'` mem_total=`grep "MemTotal" /proc/meminfo | awk '{print $2"k"}'` else mem=`top -n 1 | grep Mem: | awk '{print $7}'` fi ;; *) esac if [ x != x"$cpu_type" ]; then UpdateStat cpu_info "$cpu_type" fi if [ x != x"$cpu_speed" ]; then UpdateStat cpu_speed "$cpu_speed" fi if [ x != x"$cpu_cores" ]; then UpdateStat cpu_cores "$cpu_cores" fi if [ x != x"$cpu_load" ]; then UpdateStat cpu_load "$cpu_load" fi if [ ! -z "$mem" ]; then # Massage the memory values UpdateStat ram_total `SysInfo_mem_units $mem_total` UpdateStat ram_free `SysInfo_mem_units $mem` fi # Portability notes: # o tail: explicit "-n" not available in Solaris; instead simplify # 'tail -n ' to the equivalent 'tail -'. for disk in "/" ${OCF_RESKEY_disks}; do unset disk_free disk_label disk_free=`df -h ${disk} | tail -1 | awk '{print $4}'` if [ x != x"$disk_free" ]; then disk_label=`echo $disk | sed -e 's#^/$#root#;s#^/*##;s#/#_#g'` disk_free=`SysInfo_hdd_units $disk_free` UpdateStat ${disk_label}_free $disk_free if [ -n "$MIN_FREE" ]; then if [ $disk_free -le $MIN_FREE ]; then UpdateStat "#health_disk" "red" else UpdateStat "#health_disk" "green" fi fi fi done } SysInfo_megabytes() { # Size in megabytes echo $1 | awk '{ n = $0; sub(/[0-9]+(.[0-9]+)?/, ""); split(n, a, $0); n=a[1]; if ($0 == "G" || $0 == "") { n *= 1024 }; if (/^kB?/) { n /= 1024 }; - printf "%d\n", n }' # Intentionaly round to an integer + printf "%d\n", n }' # Intentionally round to an integer } SysInfo_mem_units() { mem=$1 if [ -z $1 ]; then return fi mem=$(SysInfo_megabytes "$1") # Round to the next multiple of 50 r=$(($mem % 50)) if [ $r != 0 ]; then mem=$(($mem + 50 - $r)) fi echo $mem } SysInfo_hdd_units() { # Defauts to size in gigabytes case $OCF_RESKEY_disk_unit in [Pp]) echo $(($(SysInfo_megabytes "$1") / 1024 / 1024 / 1024));; [Tt]) echo $(($(SysInfo_megabytes "$1") / 1024 / 1024));; [Gg]) echo $(($(SysInfo_megabytes "$1") / 1024));; [Mm]) echo $(SysInfo_megabytes "$1");; [Kk]) echo $(($(SysInfo_megabytes "$1") * 1024));; [Bb]) echo $(($(SysInfo_megabytes "$1") * 1024 * 1024));; *) ocf_log err "Invalid value for disk_unit: $OCF_RESKEY_disk_unit" echo $(($(SysInfo_megabytes "$1") / 1024));; esac } SysInfo_usage() { cat < $OCF_RESKEY_pidfile SysInfoStats exit $OCF_SUCCESS } SysInfo_stop() { rm $OCF_RESKEY_pidfile exit $OCF_SUCCESS } SysInfo_monitor() { if [ -f $OCF_RESKEY_pidfile ]; then clone=`cat $OCF_RESKEY_pidfile` fi if [ x$clone = x ]; then rm $OCF_RESKEY_pidfile exit $OCF_NOT_RUNNING elif [ $clone = $OCF_RESKEY_clone ]; then SysInfoStats exit $OCF_SUCCESS elif [ x$OCF_RESKEY_CRM_meta_globally_unique = xtrue -o x$OCF_RESKEY_CRM_meta_globally_unique = xTrue -o x$OCF_RESKEY_CRM_meta_globally_unique = xyes -o x$OCF_RESKEY_CRM_meta_globally_unique = xYes ]; then SysInfoStats exit $OCF_SUCCESS fi exit $OCF_NOT_RUNNING } SysInfo_validate() { return $OCF_SUCCESS } if [ $# -ne 1 ]; then SysInfo_usage exit $OCF_ERR_ARGS fi : ${OCF_RESKEY_pidfile:="${HA_VARRUN%%/}/SysInfo-${OCF_RESOURCE_INSTANCE}"} : ${OCF_RESKEY_disk_unit:="G"} : ${OCF_RESKEY_clone:="0"} if [ x != x${OCF_RESKEY_delay} ]; then OCF_RESKEY_delay="-d ${OCF_RESKEY_delay}" else OCF_RESKEY_delay="-d 0" fi MIN_FREE="" if [ -n "$OCF_RESKEY_min_disk_free" ]; then ocf_is_decimal "$OCF_RESKEY_min_disk_free" && OCF_RESKEY_min_disk_free="$OCF_RESKEY_min_disk_free$OCF_RESKEY_disk_unit" MIN_FREE=`SysInfo_hdd_units $OCF_RESKEY_min_disk_free` fi case $__OCF_ACTION in meta-data) meta_data exit $OCF_SUCCESS ;; start) SysInfo_start ;; stop) SysInfo_stop ;; monitor) SysInfo_monitor ;; validate-all) SysInfo_validate ;; usage|help) SysInfo_usage exit $OCF_SUCCESS ;; *) SysInfo_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $? diff --git a/include/crm/common/xml.h b/include/crm/common/xml.h index 3741386d29..150055b059 100644 --- a/include/crm/common/xml.h +++ b/include/crm/common/xml.h @@ -1,359 +1,358 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef CRM_COMMON_XML__H # define CRM_COMMON_XML__H /** * \file * \brief Wrappers for and extensions to libxml2 * \ingroup core */ # include # include # include # include # include # include # include # include # include /* Compression costs a LOT, don't do it unless we're hitting message limits * * For now, use 256k as the lower size, which means we can have 4 big data fields * before we hit heartbeat's message limit * * The previous limit was 10k, compressing 184 of 1071 messages accounted for 23% * of the total CPU used by the cib */ # define CRM_BZ2_BLOCKS 4 # define CRM_BZ2_WORK 20 # define CRM_BZ2_THRESHOLD 128 * 1024 # define XML_PARANOIA_CHECKS 0 gboolean add_message_xml(xmlNode * msg, const char *field, xmlNode * xml); xmlNode *get_message_xml(xmlNode * msg, const char *field); GHashTable *xml2list(xmlNode * parent); void hash2nvpair(gpointer key, gpointer value, gpointer user_data); void hash2field(gpointer key, gpointer value, gpointer user_data); void hash2metafield(gpointer key, gpointer value, gpointer user_data); void hash2smartfield(gpointer key, gpointer value, gpointer user_data); xmlDoc *getDocPtr(xmlNode * node); /* * Replacement function for xmlCopyPropList which at the very least, * doesn't work the way *I* would expect it to. * * Copy all the attributes/properties from src into target. * * Not recursive, does not return anything. * */ void copy_in_properties(xmlNode * target, xmlNode * src); void expand_plus_plus(xmlNode * target, const char *name, const char *value); void fix_plus_plus_recursive(xmlNode * target); /* * Create a node named "name" as a child of "parent" * If parent is NULL, creates an unconnected node. * * Returns the created node * */ xmlNode *create_xml_node(xmlNode * parent, const char *name); /* * Make a copy of name and value and use the copied memory to create * an attribute for node. * * If node, name or value are NULL, nothing is done. * * If name or value are an empty string, nothing is done. * * Returns FALSE on failure and TRUE on success. * */ const char *crm_xml_add(xmlNode * node, const char *name, const char *value); const char *crm_xml_replace(xmlNode * node, const char *name, const char *value); const char *crm_xml_add_int(xmlNode * node, const char *name, int value); /*! * \brief Add a boolean attribute to an XML object * * Add an attribute with the value XML_BOOLEAN_TRUE or XML_BOOLEAN_FALSE * as appropriate to an XML object. * * \param[in/out] node XML object to add attribute to * \param[in] name Name of attribute to add * \param[in] value Boolean whose value will be tested * * \return Pointer to newly created XML attribute's content, or NULL on error */ static inline const char * crm_xml_add_boolean(xmlNode *node, const char *name, gboolean value) { return crm_xml_add(node, name, (value? "true" : "false")); } /* * Unlink the node and set its doc pointer to NULL so free_xml() * will act appropriately */ void unlink_xml_node(xmlNode * node); /* * */ void purge_diff_markers(xmlNode * a_node); /* * Returns a deep copy of src_node * */ xmlNode *copy_xml(xmlNode * src_node); /* * Add a copy of xml_node to new_parent */ xmlNode *add_node_copy(xmlNode * new_parent, xmlNode * xml_node); int add_node_nocopy(xmlNode * parent, const char *name, xmlNode * child); /* * XML I/O Functions * * Whitespace between tags is discarded. */ xmlNode *filename2xml(const char *filename); xmlNode *stdin2xml(void); xmlNode *string2xml(const char *input); int write_xml_fd(xmlNode * xml_node, const char *filename, int fd, gboolean compress); int write_xml_file(xmlNode * xml_node, const char *filename, gboolean compress); char *dump_xml_formatted(xmlNode * msg); /* Also dump the text node with xml_log_option_text enabled */ char *dump_xml_formatted_with_text(xmlNode * msg); char *dump_xml_unformatted(xmlNode * msg); /* * Diff related Functions */ xmlNode *diff_xml_object(xmlNode * left, xmlNode * right, gboolean suppress); xmlNode *subtract_xml_object(xmlNode * parent, xmlNode * left, xmlNode * right, gboolean full, gboolean * changed, const char *marker); gboolean can_prune_leaf(xmlNode * xml_node); void print_xml_diff(FILE * where, xmlNode * diff); gboolean apply_xml_diff(xmlNode * old, xmlNode * diff, xmlNode ** new); /* * Searching & Modifying */ xmlNode *find_xml_node(xmlNode * cib, const char *node_path, gboolean must_find); xmlNode *find_entity(xmlNode * parent, const char *node_name, const char *id); void xml_remove_prop(xmlNode * obj, const char *name); gboolean replace_xml_child(xmlNode * parent, xmlNode * child, xmlNode * update, gboolean delete_only); gboolean update_xml_child(xmlNode * child, xmlNode * to_update); int find_xml_children(xmlNode ** children, xmlNode * root, const char *tag, const char *field, const char *value, gboolean search_matches); int crm_element_value_int(xmlNode * data, const char *name, int *dest); char *crm_element_value_copy(xmlNode * data, const char *name); int crm_element_value_const_int(const xmlNode * data, const char *name, int *dest); const char *crm_element_value_const(const xmlNode * data, const char *name); xmlNode *get_xpath_object(const char *xpath, xmlNode * xml_obj, int error_level); xmlNode *get_xpath_object_relative(const char *xpath, xmlNode * xml_obj, int error_level); static inline const char * crm_element_name(xmlNode *xml) { return xml? (const char *)(xml->name) : NULL; } const char *crm_element_value(xmlNode * data, const char *name); void xml_validate(const xmlNode * root); gboolean xml_has_children(const xmlNode * root); char *calculate_on_disk_digest(xmlNode * local_cib); char *calculate_operation_digest(xmlNode * local_cib, const char *version); char *calculate_xml_versioned_digest(xmlNode * input, gboolean sort, gboolean do_filter, const char *version); /* schema-related functions (from schemas.c) */ gboolean validate_xml(xmlNode * xml_blob, const char *validation, gboolean to_logs); gboolean validate_xml_verbose(xmlNode * xml_blob); /*! * \brief Try update CIB XML to the highest pacemaker's standard if feasible * * "Update" means either actively employ XSLT-based transformation(s) * (if intermediate product to transform valid per its declared schema version, * transformation available, proceeded successfully with a result valid per * expectated newer schema version), or just try to bump the marked validating * schema until all gradually rising schema versions attested or the first * such attempt subsequently fails to validate. Which of the two styles will * be used depends on \p transform parameter (positive/negative, respectively). * * \param[in/out] xml_blob XML tree representing CIB, may be swapped with * an "updated" one * \param[out] best The highest configuration version (per its index * in the global schemas table) it was possible to * reach during the update steps while ensuring * the validity of the result; if no validation * success was observed against possibly multiple * schemas, the value is less or equal the result * of get_schema_version applied on the * input \p xml_blob value (unless that function * maps it to -1, then 0 would be used instead) * \param[in] max When \p transform is positive, this allows to * set upper boundary schema (per its index in the * global schemas table) beyond which its forbidden * to update by the means of XSLT transformation * \param[in] transform Whether to employ XSLT-based transformation so * as allow overcoming possible incompatibilities * between major schema versions (see above) - * \param[in] to_logs Whether to output notable progress info to - * internall log streams or rather to stderr - * (positive/negative, respectively) + * \param[in] to_logs If true, output notable progress info to + * internal log streams; if false, to stderr * * \return pcmk_ok if no non-recoverable error encountered (up to * caller to evaluate if the update satisfies the requirements * per returned \p best value), negative value carrying the reason * otherwise */ int update_validation(xmlNode **xml_blob, int *best, int max, gboolean transform, gboolean to_logs); int get_schema_version(const char *name); const char *get_schema_name(int version); const char *xml_latest_schema(void); gboolean cli_config_update(xmlNode ** xml, int *best_version, gboolean to_logs); void crm_xml_init(void); void crm_xml_cleanup(void); static inline xmlNode * __xml_first_child(xmlNode * parent) { xmlNode *child = NULL; if (parent) { child = parent->children; while (child && child->type == XML_TEXT_NODE) { child = child->next; } } return child; } static inline xmlNode * __xml_next(xmlNode * child) { if (child) { child = child->next; while (child && child->type == XML_TEXT_NODE) { child = child->next; } } return child; } static inline xmlNode * __xml_next_element(xmlNode * child) { if (child) { child = child->next; while (child && child->type != XML_ELEMENT_NODE) { child = child->next; } } return child; } void free_xml(xmlNode * child); xmlNode *first_named_child(xmlNode * parent, const char *name); xmlNode *sorted_xml(xmlNode * input, xmlNode * parent, gboolean recursive); xmlXPathObjectPtr xpath_search(xmlNode * xml_top, const char *path); void crm_foreach_xpath_result(xmlNode *xml, const char *xpath, void (*helper)(xmlNode*, void*), void *user_data); xmlNode *expand_idref(xmlNode * input, xmlNode * top); void freeXpathObject(xmlXPathObjectPtr xpathObj); xmlNode *getXpathResult(xmlXPathObjectPtr xpathObj, int index); void dedupXpathResults(xmlXPathObjectPtr xpathObj); static inline int numXpathResults(xmlXPathObjectPtr xpathObj) { if(xpathObj == NULL || xpathObj->nodesetval == NULL) { return 0; } return xpathObj->nodesetval->nodeNr; } bool xml_acl_enabled(xmlNode *xml); void xml_acl_disable(xmlNode *xml); bool xml_acl_denied(xmlNode *xml); /* Part or all of a change was rejected */ bool xml_acl_filtered_copy(const char *user, xmlNode* acl_source, xmlNode *xml, xmlNode ** result); bool xml_tracking_changes(xmlNode * xml); bool xml_document_dirty(xmlNode *xml); void xml_track_changes(xmlNode * xml, const char *user, xmlNode *acl_source, bool enforce_acls); void xml_calculate_changes(xmlNode * old, xmlNode * new); /* For comparing two documents after the fact */ void xml_accept_changes(xmlNode * xml); void xml_log_changes(uint8_t level, const char *function, xmlNode *xml); void xml_log_patchset(uint8_t level, const char *function, xmlNode *xml); bool xml_patch_versions(xmlNode *patchset, int add[3], int del[3]); xmlNode *xml_create_patchset( int format, xmlNode *source, xmlNode *target, bool *config, bool manage_version); int xml_apply_patchset(xmlNode *xml, xmlNode *patchset, bool check_version); void patchset_process_digest(xmlNode *patch, xmlNode *source, xmlNode *target, bool with_digest); void save_xml_to_file(xmlNode * xml, const char *desc, const char *filename); char *xml_get_path(xmlNode *xml); char * crm_xml_escape(const char *text); #endif diff --git a/lib/common/ipc.c b/lib/common/ipc.c index 6d6d3cd15d..f060fcdb09 100644 --- a/lib/common/ipc.c +++ b/lib/common/ipc.c @@ -1,1283 +1,1293 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define PCMK_IPC_VERSION 1 struct crm_ipc_response_header { struct qb_ipc_response_header qb; uint32_t size_uncompressed; uint32_t size_compressed; uint32_t flags; uint8_t version; /* Protect against version changes for anyone that might bother to statically link us */ }; static int hdr_offset = 0; static unsigned int ipc_buffer_max = 0; static unsigned int pick_ipc_buffer(unsigned int max); static inline void crm_ipc_init(void) { if (hdr_offset == 0) { hdr_offset = sizeof(struct crm_ipc_response_header); } if (ipc_buffer_max == 0) { ipc_buffer_max = pick_ipc_buffer(0); } } unsigned int crm_ipc_default_buffer_size(void) { return pick_ipc_buffer(0); } static char * generateReference(const char *custom1, const char *custom2) { static uint ref_counter = 0; const char *local_cust1 = custom1; const char *local_cust2 = custom2; int reference_len = 4; char *since_epoch = NULL; reference_len += 20; /* too big */ reference_len += 40; /* too big */ if (local_cust1 == NULL) { local_cust1 = "_empty_"; } reference_len += strlen(local_cust1); if (local_cust2 == NULL) { local_cust2 = "_empty_"; } reference_len += strlen(local_cust2); since_epoch = calloc(1, reference_len); if (since_epoch != NULL) { sprintf(since_epoch, "%s-%s-%lu-%u", local_cust1, local_cust2, (unsigned long)time(NULL), ref_counter++); } return since_epoch; } xmlNode * create_request_adv(const char *task, xmlNode * msg_data, const char *host_to, const char *sys_to, const char *sys_from, const char *uuid_from, const char *origin) { char *true_from = NULL; xmlNode *request = NULL; char *reference = generateReference(task, sys_from); if (uuid_from != NULL) { true_from = generate_hash_key(sys_from, uuid_from); } else if (sys_from != NULL) { true_from = strdup(sys_from); } else { crm_err("No sys from specified"); } /* host_from will get set for us if necessary by CRMd when routed */ request = create_xml_node(NULL, __FUNCTION__); crm_xml_add(request, F_CRM_ORIGIN, origin); crm_xml_add(request, F_TYPE, T_CRM); crm_xml_add(request, F_CRM_VERSION, CRM_FEATURE_SET); crm_xml_add(request, F_CRM_MSG_TYPE, XML_ATTR_REQUEST); crm_xml_add(request, F_CRM_REFERENCE, reference); crm_xml_add(request, F_CRM_TASK, task); crm_xml_add(request, F_CRM_SYS_TO, sys_to); crm_xml_add(request, F_CRM_SYS_FROM, true_from); /* HOSTTO will be ignored if it is to the DC anyway. */ if (host_to != NULL && strlen(host_to) > 0) { crm_xml_add(request, F_CRM_HOST_TO, host_to); } if (msg_data != NULL) { add_message_xml(request, F_CRM_DATA, msg_data); } free(reference); free(true_from); return request; } /* * This method adds a copy of xml_response_data */ xmlNode * create_reply_adv(xmlNode * original_request, xmlNode * xml_response_data, const char *origin) { xmlNode *reply = NULL; const char *host_from = crm_element_value(original_request, F_CRM_HOST_FROM); const char *sys_from = crm_element_value(original_request, F_CRM_SYS_FROM); const char *sys_to = crm_element_value(original_request, F_CRM_SYS_TO); const char *type = crm_element_value(original_request, F_CRM_MSG_TYPE); const char *operation = crm_element_value(original_request, F_CRM_TASK); const char *crm_msg_reference = crm_element_value(original_request, F_CRM_REFERENCE); if (type == NULL) { crm_err("Cannot create new_message, no message type in original message"); CRM_ASSERT(type != NULL); return NULL; #if 0 } else if (strcasecmp(XML_ATTR_REQUEST, type) != 0) { crm_err("Cannot create new_message, original message was not a request"); return NULL; #endif } reply = create_xml_node(NULL, __FUNCTION__); if (reply == NULL) { crm_err("Cannot create new_message, malloc failed"); return NULL; } crm_xml_add(reply, F_CRM_ORIGIN, origin); crm_xml_add(reply, F_TYPE, T_CRM); crm_xml_add(reply, F_CRM_VERSION, CRM_FEATURE_SET); crm_xml_add(reply, F_CRM_MSG_TYPE, XML_ATTR_RESPONSE); crm_xml_add(reply, F_CRM_REFERENCE, crm_msg_reference); crm_xml_add(reply, F_CRM_TASK, operation); /* since this is a reply, we reverse the from and to */ crm_xml_add(reply, F_CRM_SYS_TO, sys_from); crm_xml_add(reply, F_CRM_SYS_FROM, sys_to); /* HOSTTO will be ignored if it is to the DC anyway. */ if (host_from != NULL && strlen(host_from) > 0) { crm_xml_add(reply, F_CRM_HOST_TO, host_from); } if (xml_response_data != NULL) { add_message_xml(reply, F_CRM_DATA, xml_response_data); } return reply; } /* Libqb based IPC */ /* Server... */ GHashTable *client_connections = NULL; crm_client_t * crm_client_get(qb_ipcs_connection_t * c) { if (client_connections) { return g_hash_table_lookup(client_connections, c); } crm_trace("No client found for %p", c); return NULL; } crm_client_t * crm_client_get_by_id(const char *id) { gpointer key; crm_client_t *client; GHashTableIter iter; if (client_connections && id) { g_hash_table_iter_init(&iter, client_connections); while (g_hash_table_iter_next(&iter, &key, (gpointer *) & client)) { if (strcmp(client->id, id) == 0) { return client; } } } crm_trace("No client found with id=%s", id); return NULL; } const char * crm_client_name(crm_client_t * c) { if (c == NULL) { return "null"; } else if (c->name == NULL && c->id == NULL) { return "unknown"; } else if (c->name == NULL) { return c->id; } else { return c->name; } } void crm_client_init(void) { if (client_connections == NULL) { crm_trace("Creating client hash table"); client_connections = g_hash_table_new(g_direct_hash, g_direct_equal); } } void crm_client_cleanup(void) { if (client_connections != NULL) { int active = g_hash_table_size(client_connections); if (active) { crm_err("Exiting with %d active connections", active); } g_hash_table_destroy(client_connections); client_connections = NULL; } } void crm_client_disconnect_all(qb_ipcs_service_t *service) { qb_ipcs_connection_t *c = NULL; if (service == NULL) { return; } c = qb_ipcs_connection_first_get(service); while (c != NULL) { qb_ipcs_connection_t *last = c; c = qb_ipcs_connection_next_get(service, last); /* There really shouldn't be anyone connected at this point */ crm_notice("Disconnecting client %p, pid=%d...", last, crm_ipcs_client_pid(last)); qb_ipcs_disconnect(last); qb_ipcs_connection_unref(last); } } crm_client_t * crm_client_new(qb_ipcs_connection_t * c, uid_t uid_client, gid_t gid_client) { static uid_t uid_server = 0; static gid_t gid_cluster = 0; crm_client_t *client = NULL; CRM_LOG_ASSERT(c); if (c == NULL) { return NULL; } if (gid_cluster == 0) { uid_server = getuid(); if(crm_user_lookup(CRM_DAEMON_USER, NULL, &gid_cluster) < 0) { static bool have_error = FALSE; if(have_error == FALSE) { crm_warn("Could not find group for user %s", CRM_DAEMON_USER); have_error = TRUE; } } } if(gid_cluster != 0 && gid_client != 0) { uid_t best_uid = -1; /* Passing -1 to chown(2) means don't change */ if(uid_client == 0 || uid_server == 0) { /* Someone is priveliged, but the other may not be */ best_uid = QB_MAX(uid_client, uid_server); crm_trace("Allowing user %u to clean up after disconnect", best_uid); } crm_trace("Giving access to group %u", gid_cluster); qb_ipcs_connection_auth_set(c, best_uid, gid_cluster, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP); } crm_client_init(); /* TODO: Do our own auth checking, return NULL if unauthorized */ client = calloc(1, sizeof(crm_client_t)); client->ipcs = c; client->kind = CRM_CLIENT_IPC; client->pid = crm_ipcs_client_pid(c); client->id = crm_generate_uuid(); crm_debug("Connecting %p for uid=%d gid=%d pid=%u id=%s", c, uid_client, gid_client, client->pid, client->id); #if ENABLE_ACL client->user = uid2username(uid_client); #endif g_hash_table_insert(client_connections, c, client); return client; } void crm_client_destroy(crm_client_t * c) { if (c == NULL) { return; } if (client_connections) { if (c->ipcs) { crm_trace("Destroying %p/%p (%d remaining)", c, c->ipcs, crm_hash_table_size(client_connections) - 1); g_hash_table_remove(client_connections, c->ipcs); } else { crm_trace("Destroying remote connection %p (%d remaining)", c, crm_hash_table_size(client_connections) - 1); g_hash_table_remove(client_connections, c->id); } } if (c->event_timer) { g_source_remove(c->event_timer); } crm_debug("Destroying %d events", g_list_length(c->event_queue)); while (c->event_queue) { struct iovec *event = c->event_queue->data; c->event_queue = g_list_remove(c->event_queue, event); free(event[0].iov_base); free(event[1].iov_base); free(event); } free(c->id); free(c->name); free(c->user); if (c->remote) { if (c->remote->auth_timeout) { g_source_remove(c->remote->auth_timeout); } free(c->remote->buffer); free(c->remote); } free(c); } int crm_ipcs_client_pid(qb_ipcs_connection_t * c) { struct qb_ipcs_connection_stats stats; stats.client_pid = 0; qb_ipcs_connection_stats_get(c, &stats, 0); return stats.client_pid; } xmlNode * crm_ipcs_recv(crm_client_t * c, void *data, size_t size, uint32_t * id, uint32_t * flags) { xmlNode *xml = NULL; char *uncompressed = NULL; char *text = ((char *)data) + sizeof(struct crm_ipc_response_header); struct crm_ipc_response_header *header = data; if (id) { *id = ((struct qb_ipc_response_header *)data)->id; } if (flags) { *flags = header->flags; } if (is_set(header->flags, crm_ipc_proxied)) { /* mark this client as being the endpoint of a proxy connection. * Proxy connections responses are sent on the event channel to avoid * blocking the proxy daemon (crmd) */ c->flags |= crm_client_flag_ipc_proxied; } if(header->version > PCMK_IPC_VERSION) { crm_err("Filtering incompatible v%d IPC message, we only support versions <= %d", header->version, PCMK_IPC_VERSION); return NULL; } if (header->size_compressed) { int rc = 0; unsigned int size_u = 1 + header->size_uncompressed; uncompressed = calloc(1, size_u); crm_trace("Decompressing message data %u bytes into %u bytes", header->size_compressed, size_u); rc = BZ2_bzBuffToBuffDecompress(uncompressed, &size_u, text, header->size_compressed, 1, 0); text = uncompressed; if (rc != BZ_OK) { crm_err("Decompression failed: %s (%d)", bz2_strerror(rc), rc); free(uncompressed); return NULL; } } CRM_ASSERT(text[header->size_uncompressed - 1] == 0); crm_trace("Received %.200s", text); xml = string2xml(text); free(uncompressed); return xml; } ssize_t crm_ipcs_flush_events(crm_client_t * c); static gboolean crm_ipcs_flush_events_cb(gpointer data) { crm_client_t *c = data; c->event_timer = 0; crm_ipcs_flush_events(c); return FALSE; } ssize_t crm_ipcs_flush_events(crm_client_t * c) { int sent = 0; ssize_t rc = 0; int queue_len = 0; if (c == NULL) { return pcmk_ok; } else if (c->event_timer) { /* There is already a timer, wait until it goes off */ crm_trace("Timer active for %p - %d", c->ipcs, c->event_timer); return pcmk_ok; } queue_len = g_list_length(c->event_queue); while (c->event_queue && sent < 100) { struct crm_ipc_response_header *header = NULL; struct iovec *event = c->event_queue->data; rc = qb_ipcs_event_sendv(c->ipcs, event, 2); if (rc < 0) { break; } sent++; header = event[0].iov_base; if (header->size_compressed) { crm_trace("Event %d to %p[%d] (%lld compressed bytes) sent", header->qb.id, c->ipcs, c->pid, (long long) rc); } else { crm_trace("Event %d to %p[%d] (%lld bytes) sent: %.120s", header->qb.id, c->ipcs, c->pid, (long long) rc, (char *) (event[1].iov_base)); } c->event_queue = g_list_remove(c->event_queue, event); free(event[0].iov_base); free(event[1].iov_base); free(event); } queue_len -= sent; if (sent > 0 || c->event_queue) { crm_trace("Sent %d events (%d remaining) for %p[%d]: %s (%lld)", sent, queue_len, c->ipcs, c->pid, pcmk_strerror(rc < 0 ? rc : 0), (long long) rc); } if (c->event_queue) { if (queue_len % 100 == 0 && queue_len > 99) { crm_warn("Event queue for %p[%d] has grown to %d", c->ipcs, c->pid, queue_len); } else if (queue_len > 500) { crm_err("Evicting slow client %p[%d]: event queue reached %d entries", c->ipcs, c->pid, queue_len); qb_ipcs_disconnect(c->ipcs); return rc; } c->event_timer = g_timeout_add(1000 + 100 * queue_len, crm_ipcs_flush_events_cb, c); } return rc; } ssize_t crm_ipc_prepare(uint32_t request, xmlNode * message, struct iovec ** result, uint32_t max_send_size) { static unsigned int biggest = 0; struct iovec *iov; unsigned int total = 0; char *compressed = NULL; char *buffer = dump_xml_unformatted(message); struct crm_ipc_response_header *header = calloc(1, sizeof(struct crm_ipc_response_header)); CRM_ASSERT(result != NULL); crm_ipc_init(); if (max_send_size == 0) { max_send_size = ipc_buffer_max; } CRM_LOG_ASSERT(max_send_size != 0); *result = NULL; iov = calloc(2, sizeof(struct iovec)); iov[0].iov_len = hdr_offset; iov[0].iov_base = header; header->version = PCMK_IPC_VERSION; header->size_uncompressed = 1 + strlen(buffer); total = iov[0].iov_len + header->size_uncompressed; if (total < max_send_size) { iov[1].iov_base = buffer; iov[1].iov_len = header->size_uncompressed; } else { unsigned int new_size = 0; if (crm_compress_string (buffer, header->size_uncompressed, max_send_size, &compressed, &new_size)) { header->flags |= crm_ipc_compressed; header->size_compressed = new_size; iov[1].iov_len = header->size_compressed; iov[1].iov_base = compressed; free(buffer); biggest = QB_MAX(header->size_compressed, biggest); } else { ssize_t rc = -EMSGSIZE; crm_log_xml_trace(message, "EMSGSIZE"); biggest = QB_MAX(header->size_uncompressed, biggest); crm_err ("Could not compress the message (%u bytes) into less than the configured ipc limit (%u bytes). " "Set PCMK_ipc_buffer to a higher value (%u bytes suggested)", header->size_uncompressed, max_send_size, 4 * biggest); free(compressed); free(buffer); free(header); free(iov); return rc; } } header->qb.size = iov[0].iov_len + iov[1].iov_len; header->qb.id = (int32_t)request; /* Replying to a specific request */ *result = iov; CRM_ASSERT(header->qb.size > 0); return header->qb.size; } ssize_t crm_ipcs_sendv(crm_client_t * c, struct iovec * iov, enum crm_ipc_flags flags) { ssize_t rc; static uint32_t id = 1; struct crm_ipc_response_header *header = iov[0].iov_base; if (c->flags & crm_client_flag_ipc_proxied) { /* _ALL_ replies to proxied connections need to be sent as events */ if (is_not_set(flags, crm_ipc_server_event)) { flags |= crm_ipc_server_event; /* this flag lets us know this was originally meant to be a response. * even though we're sending it over the event channel. */ flags |= crm_ipc_proxied_relay_response; } } header->flags |= flags; if (flags & crm_ipc_server_event) { header->qb.id = id++; /* We don't really use it, but doesn't hurt to set one */ if (flags & crm_ipc_server_free) { crm_trace("Sending the original to %p[%d]", c->ipcs, c->pid); c->event_queue = g_list_append(c->event_queue, iov); } else { struct iovec *iov_copy = calloc(2, sizeof(struct iovec)); crm_trace("Sending a copy to %p[%d]", c->ipcs, c->pid); iov_copy[0].iov_len = iov[0].iov_len; iov_copy[0].iov_base = malloc(iov[0].iov_len); memcpy(iov_copy[0].iov_base, iov[0].iov_base, iov[0].iov_len); iov_copy[1].iov_len = iov[1].iov_len; iov_copy[1].iov_base = malloc(iov[1].iov_len); memcpy(iov_copy[1].iov_base, iov[1].iov_base, iov[1].iov_len); c->event_queue = g_list_append(c->event_queue, iov_copy); } } else { CRM_LOG_ASSERT(header->qb.id != 0); /* Replying to a specific request */ rc = qb_ipcs_response_sendv(c->ipcs, iov, 2); if (rc < header->qb.size) { crm_notice("Response %d to %p[%d] (%u bytes) failed: %s (%d)", header->qb.id, c->ipcs, c->pid, header->qb.size, pcmk_strerror(rc), rc); } else { crm_trace("Response %d sent, %lld bytes to %p[%d]", header->qb.id, (long long) rc, c->ipcs, c->pid); } if (flags & crm_ipc_server_free) { free(iov[0].iov_base); free(iov[1].iov_base); free(iov); } } if (flags & crm_ipc_server_event) { rc = crm_ipcs_flush_events(c); } else { crm_ipcs_flush_events(c); } if (rc == -EPIPE || rc == -ENOTCONN) { crm_trace("Client %p disconnected", c->ipcs); } return rc; } ssize_t crm_ipcs_send(crm_client_t * c, uint32_t request, xmlNode * message, enum crm_ipc_flags flags) { struct iovec *iov = NULL; ssize_t rc = 0; if(c == NULL) { return -EDESTADDRREQ; } crm_ipc_init(); rc = crm_ipc_prepare(request, message, &iov, ipc_buffer_max); if (rc > 0) { rc = crm_ipcs_sendv(c, iov, flags | crm_ipc_server_free); } else { free(iov); crm_notice("Message to %p[%d] failed: %s (%d)", c->ipcs, c->pid, pcmk_strerror(rc), rc); } return rc; } void crm_ipcs_send_ack(crm_client_t * c, uint32_t request, uint32_t flags, const char *tag, const char *function, int line) { if (flags & crm_ipc_client_response) { xmlNode *ack = create_xml_node(NULL, tag); crm_trace("Ack'ing msg from %s (%p)", crm_client_name(c), c); c->request_id = 0; crm_xml_add(ack, "function", function); crm_xml_add_int(ack, "line", line); crm_ipcs_send(c, request, ack, flags); free_xml(ack); } } /* Client... */ #define MIN_MSG_SIZE 12336 /* sizeof(struct qb_ipc_connection_response) */ #define MAX_MSG_SIZE 128*1024 /* 128k default */ struct crm_ipc_s { struct pollfd pfd; /* the max size we can send/receive over ipc */ unsigned int max_buf_size; /* Size of the allocated 'buffer' */ unsigned int buf_size; int msg_size; int need_reply; char *buffer; char *name; uint32_t buffer_flags; qb_ipcc_connection_t *ipc; }; static unsigned int pick_ipc_buffer(unsigned int max) { static unsigned int global_max = 0; if (global_max == 0) { const char *env = getenv("PCMK_ipc_buffer"); if (env) { int env_max = crm_parse_int(env, "0"); global_max = (env_max > 0)? QB_MAX(MIN_MSG_SIZE, env_max) : MAX_MSG_SIZE; } else { global_max = MAX_MSG_SIZE; } } return QB_MAX(max, global_max); } crm_ipc_t * crm_ipc_new(const char *name, size_t max_size) { crm_ipc_t *client = NULL; client = calloc(1, sizeof(crm_ipc_t)); client->name = strdup(name); client->buf_size = pick_ipc_buffer(max_size); client->buffer = malloc(client->buf_size); /* Clients initiating connection pick the max buf size */ client->max_buf_size = client->buf_size; client->pfd.fd = -1; client->pfd.events = POLLIN; client->pfd.revents = 0; return client; } /*! * \brief Establish an IPC connection to a Pacemaker component * * \param[in] client Connection instance obtained from crm_ipc_new() * * \return TRUE on success, FALSE otherwise (in which case errno will be set) */ bool crm_ipc_connect(crm_ipc_t * client) { client->need_reply = FALSE; client->ipc = qb_ipcc_connect(client->name, client->buf_size); if (client->ipc == NULL) { crm_debug("Could not establish %s connection: %s (%d)", client->name, pcmk_strerror(errno), errno); return FALSE; } client->pfd.fd = crm_ipc_get_fd(client); if (client->pfd.fd < 0) { crm_debug("Could not obtain file descriptor for %s connection: %s (%d)", client->name, pcmk_strerror(errno), errno); return FALSE; } qb_ipcc_context_set(client->ipc, client); #ifdef HAVE_IPCS_GET_BUFFER_SIZE client->max_buf_size = qb_ipcc_get_buffer_size(client->ipc); if (client->max_buf_size > client->buf_size) { free(client->buffer); client->buffer = calloc(1, client->max_buf_size); client->buf_size = client->max_buf_size; } #endif return TRUE; } void crm_ipc_close(crm_ipc_t * client) { if (client) { crm_trace("Disconnecting %s IPC connection %p (%p)", client->name, client, client->ipc); if (client->ipc) { qb_ipcc_connection_t *ipc = client->ipc; client->ipc = NULL; qb_ipcc_disconnect(ipc); } } } void crm_ipc_destroy(crm_ipc_t * client) { if (client) { if (client->ipc && qb_ipcc_is_connected(client->ipc)) { crm_notice("Destroying an active IPC connection to %s", client->name); /* The next line is basically unsafe * * If this connection was attached to mainloop and mainloop is active, * the 'disconnected' callback will end up back here and we'll end * up free'ing the memory twice - something that can still happen * even without this if we destroy a connection and it closes before * we call exit */ /* crm_ipc_close(client); */ } crm_trace("Destroying IPC connection to %s: %p", client->name, client); free(client->buffer); free(client->name); free(client); } } int crm_ipc_get_fd(crm_ipc_t * client) { int fd = 0; if (client && client->ipc && (qb_ipcc_fd_get(client->ipc, &fd) == 0)) { return fd; } errno = EINVAL; crm_perror(LOG_ERR, "Could not obtain file IPC descriptor for %s", (client? client->name : "unspecified client")); return -errno; } bool crm_ipc_connected(crm_ipc_t * client) { bool rc = FALSE; if (client == NULL) { crm_trace("No client"); return FALSE; } else if (client->ipc == NULL) { crm_trace("No connection"); return FALSE; } else if (client->pfd.fd < 0) { crm_trace("Bad descriptor"); return FALSE; } rc = qb_ipcc_is_connected(client->ipc); if (rc == FALSE) { client->pfd.fd = -EINVAL; } return rc; } +/*! + * \brief Check whether an IPC connection is ready to be read + * + * \param[in] client Connection to check + * + * \return Positive value if ready to be read, 0 if not ready, -errno on error + */ int -crm_ipc_ready(crm_ipc_t * client) +crm_ipc_ready(crm_ipc_t *client) { + int rc; + CRM_ASSERT(client != NULL); if (crm_ipc_connected(client) == FALSE) { return -ENOTCONN; } client->pfd.revents = 0; - return poll(&(client->pfd), 1, 0); + rc = poll(&(client->pfd), 1, 0); + return (rc < 0)? -errno : rc; } static int crm_ipc_decompress(crm_ipc_t * client) { struct crm_ipc_response_header *header = (struct crm_ipc_response_header *)(void*)client->buffer; if (header->size_compressed) { int rc = 0; unsigned int size_u = 1 + header->size_uncompressed; /* never let buf size fall below our max size required for ipc reads. */ unsigned int new_buf_size = QB_MAX((hdr_offset + size_u), client->max_buf_size); char *uncompressed = calloc(1, new_buf_size); crm_trace("Decompressing message data %u bytes into %u bytes", header->size_compressed, size_u); rc = BZ2_bzBuffToBuffDecompress(uncompressed + hdr_offset, &size_u, client->buffer + hdr_offset, header->size_compressed, 1, 0); if (rc != BZ_OK) { crm_err("Decompression failed: %s (%d)", bz2_strerror(rc), rc); free(uncompressed); return -EILSEQ; } /* * This assert no longer holds true. For an identical msg, some clients may * require compression, and others may not. If that same msg (event) is sent * to multiple clients, it could result in some clients receiving a compressed * msg even though compression was not explicitly required for them. * * CRM_ASSERT((header->size_uncompressed + hdr_offset) >= ipc_buffer_max); */ CRM_ASSERT(size_u == header->size_uncompressed); memcpy(uncompressed, client->buffer, hdr_offset); /* Preserve the header */ header = (struct crm_ipc_response_header *)(void*)uncompressed; free(client->buffer); client->buf_size = new_buf_size; client->buffer = uncompressed; } CRM_ASSERT(client->buffer[hdr_offset + header->size_uncompressed - 1] == 0); return pcmk_ok; } long crm_ipc_read(crm_ipc_t * client) { struct crm_ipc_response_header *header = NULL; CRM_ASSERT(client != NULL); CRM_ASSERT(client->ipc != NULL); CRM_ASSERT(client->buffer != NULL); crm_ipc_init(); client->buffer[0] = 0; client->msg_size = qb_ipcc_event_recv(client->ipc, client->buffer, client->buf_size - 1, 0); if (client->msg_size >= 0) { int rc = crm_ipc_decompress(client); if (rc != pcmk_ok) { return rc; } header = (struct crm_ipc_response_header *)(void*)client->buffer; if(header->version > PCMK_IPC_VERSION) { crm_err("Filtering incompatible v%d IPC message, we only support versions <= %d", header->version, PCMK_IPC_VERSION); return -EBADMSG; } crm_trace("Received %s event %d, size=%u, rc=%d, text: %.100s", client->name, header->qb.id, header->qb.size, client->msg_size, client->buffer + hdr_offset); } else { crm_trace("No message from %s received: %s", client->name, pcmk_strerror(client->msg_size)); } if (crm_ipc_connected(client) == FALSE || client->msg_size == -ENOTCONN) { crm_err("Connection to %s failed", client->name); } if (header) { /* Data excluding the header */ return header->size_uncompressed; } return -ENOMSG; } const char * crm_ipc_buffer(crm_ipc_t * client) { CRM_ASSERT(client != NULL); return client->buffer + sizeof(struct crm_ipc_response_header); } uint32_t crm_ipc_buffer_flags(crm_ipc_t * client) { struct crm_ipc_response_header *header = NULL; CRM_ASSERT(client != NULL); if (client->buffer == NULL) { return 0; } header = (struct crm_ipc_response_header *)(void*)client->buffer; return header->flags; } const char * crm_ipc_name(crm_ipc_t * client) { CRM_ASSERT(client != NULL); return client->name; } static int internal_ipc_send_recv(crm_ipc_t * client, const void *iov) { int rc = 0; do { rc = qb_ipcc_sendv_recv(client->ipc, iov, 2, client->buffer, client->buf_size, -1); } while (rc == -EAGAIN && crm_ipc_connected(client)); return rc; } static int internal_ipc_send_request(crm_ipc_t * client, const void *iov, int ms_timeout) { int rc = 0; time_t timeout = time(NULL) + 1 + (ms_timeout / 1000); do { rc = qb_ipcc_sendv(client->ipc, iov, 2); } while (rc == -EAGAIN && time(NULL) < timeout && crm_ipc_connected(client)); return rc; } static int internal_ipc_get_reply(crm_ipc_t * client, int request_id, int ms_timeout) { time_t timeout = time(NULL) + 1 + (ms_timeout / 1000); int rc = 0; crm_ipc_init(); /* get the reply */ crm_trace("client %s waiting on reply to msg id %d", client->name, request_id); do { rc = qb_ipcc_recv(client->ipc, client->buffer, client->buf_size, 1000); if (rc > 0) { struct crm_ipc_response_header *hdr = NULL; int rc = crm_ipc_decompress(client); if (rc != pcmk_ok) { return rc; } hdr = (struct crm_ipc_response_header *)(void*)client->buffer; if (hdr->qb.id == request_id) { /* Got it */ break; } else if (hdr->qb.id < request_id) { xmlNode *bad = string2xml(crm_ipc_buffer(client)); crm_err("Discarding old reply %d (need %d)", hdr->qb.id, request_id); crm_log_xml_notice(bad, "OldIpcReply"); } else { xmlNode *bad = string2xml(crm_ipc_buffer(client)); crm_err("Discarding newer reply %d (need %d)", hdr->qb.id, request_id); crm_log_xml_notice(bad, "ImpossibleReply"); CRM_ASSERT(hdr->qb.id <= request_id); } } else if (crm_ipc_connected(client) == FALSE) { crm_err("Server disconnected client %s while waiting for msg id %d", client->name, request_id); break; } } while (time(NULL) < timeout); return rc; } int crm_ipc_send(crm_ipc_t * client, xmlNode * message, enum crm_ipc_flags flags, int32_t ms_timeout, xmlNode ** reply) { long rc = 0; struct iovec *iov; static uint32_t id = 0; static int factor = 8; struct crm_ipc_response_header *header; crm_ipc_init(); if (client == NULL) { crm_notice("Invalid connection"); return -ENOTCONN; } else if (crm_ipc_connected(client) == FALSE) { /* Don't even bother */ crm_notice("Connection to %s closed", client->name); return -ENOTCONN; } if (ms_timeout == 0) { ms_timeout = 5000; } if (client->need_reply) { crm_trace("Trying again to obtain pending reply from %s", client->name); rc = qb_ipcc_recv(client->ipc, client->buffer, client->buf_size, ms_timeout); if (rc < 0) { crm_warn("Sending to %s (%p) is disabled until pending reply is received", client->name, client->ipc); return -EALREADY; } else { crm_notice("Lost reply from %s (%p) finally arrived, sending re-enabled", client->name, client->ipc); client->need_reply = FALSE; } } id++; CRM_LOG_ASSERT(id != 0); /* Crude wrap-around detection */ rc = crm_ipc_prepare(id, message, &iov, client->max_buf_size); if(rc < 0) { return rc; } header = iov[0].iov_base; header->flags |= flags; if(is_set(flags, crm_ipc_proxied)) { /* Don't look for a synchronous response */ clear_bit(flags, crm_ipc_client_response); } if(header->size_compressed) { if(factor < 10 && (client->max_buf_size / 10) < (rc / factor)) { crm_notice("Compressed message exceeds %d0%% of the configured ipc limit (%u bytes), " "consider setting PCMK_ipc_buffer to %u or higher", factor, client->max_buf_size, 2 * client->max_buf_size); factor++; } } crm_trace("Sending from client: %s request id: %d bytes: %u timeout:%d msg...", client->name, header->qb.id, header->qb.size, ms_timeout); if (ms_timeout > 0 || is_not_set(flags, crm_ipc_client_response)) { rc = internal_ipc_send_request(client, iov, ms_timeout); if (rc <= 0) { crm_trace("Failed to send from client %s request %d with %u bytes...", client->name, header->qb.id, header->qb.size); goto send_cleanup; } else if (is_not_set(flags, crm_ipc_client_response)) { crm_trace("Message sent, not waiting for reply to %d from %s to %u bytes...", header->qb.id, client->name, header->qb.size); goto send_cleanup; } rc = internal_ipc_get_reply(client, header->qb.id, ms_timeout); if (rc < 0) { /* No reply, for now, disable sending * * The alternative is to close the connection since we don't know * how to detect and discard out-of-sequence replies * * TODO - implement the above */ client->need_reply = TRUE; } } else { rc = internal_ipc_send_recv(client, iov); } if (rc > 0) { struct crm_ipc_response_header *hdr = (struct crm_ipc_response_header *)(void*)client->buffer; crm_trace("Received response %d, size=%u, rc=%ld, text: %.200s", hdr->qb.id, hdr->qb.size, rc, crm_ipc_buffer(client)); if (reply) { *reply = string2xml(crm_ipc_buffer(client)); } } else { crm_trace("Response not received: rc=%ld, errno=%d", rc, errno); } send_cleanup: if (crm_ipc_connected(client) == FALSE) { crm_notice("Connection to %s closed: %s (%ld)", client->name, pcmk_strerror(rc), rc); } else if (rc == -ETIMEDOUT) { crm_warn("Request %d to %s (%p) failed: %s (%ld) after %dms", header->qb.id, client->name, client->ipc, pcmk_strerror(rc), rc, ms_timeout); crm_write_blackbox(0, NULL); } else if (rc <= 0) { crm_warn("Request %d to %s (%p) failed: %s (%ld)", header->qb.id, client->name, client->ipc, pcmk_strerror(rc), rc); } free(header); free(iov[1].iov_base); free(iov); return rc; } /* Utils */ xmlNode * create_hello_message(const char *uuid, const char *client_name, const char *major_version, const char *minor_version) { xmlNode *hello_node = NULL; xmlNode *hello = NULL; if (uuid == NULL || strlen(uuid) == 0 || client_name == NULL || strlen(client_name) == 0 || major_version == NULL || strlen(major_version) == 0 || minor_version == NULL || strlen(minor_version) == 0) { crm_err("Missing fields, Hello message will not be valid."); return NULL; } hello_node = create_xml_node(NULL, XML_TAG_OPTIONS); crm_xml_add(hello_node, "major_version", major_version); crm_xml_add(hello_node, "minor_version", minor_version); crm_xml_add(hello_node, "client_name", client_name); crm_xml_add(hello_node, "client_uuid", uuid); crm_trace("creating hello message"); hello = create_request(CRM_OP_HELLO, hello_node, NULL, NULL, client_name, uuid); free_xml(hello_node); return hello; } diff --git a/lib/common/remote.c b/lib/common/remote.c index fd6a9c2fe8..2042f78f0b 100644 --- a/lib/common/remote.c +++ b/lib/common/remote.c @@ -1,1012 +1,1009 @@ /* * Copyright (c) 2008 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef HAVE_GNUTLS_GNUTLS_H # undef KEYFILE # include const int psk_tls_kx_order[] = { GNUTLS_KX_DHE_PSK, GNUTLS_KX_PSK, }; const int anon_tls_kx_order[] = { GNUTLS_KX_ANON_DH, GNUTLS_KX_DHE_RSA, GNUTLS_KX_DHE_DSS, GNUTLS_KX_RSA, 0 }; #endif /* Swab macros from linux/swab.h */ #ifdef HAVE_LINUX_SWAB_H # include #else /* * casts are necessary for constants, because we never know how for sure * how U/UL/ULL map to __u16, __u32, __u64. At least not in a portable way. */ #define __swab16(x) ((uint16_t)( \ (((uint16_t)(x) & (uint16_t)0x00ffU) << 8) | \ (((uint16_t)(x) & (uint16_t)0xff00U) >> 8))) #define __swab32(x) ((uint32_t)( \ (((uint32_t)(x) & (uint32_t)0x000000ffUL) << 24) | \ (((uint32_t)(x) & (uint32_t)0x0000ff00UL) << 8) | \ (((uint32_t)(x) & (uint32_t)0x00ff0000UL) >> 8) | \ (((uint32_t)(x) & (uint32_t)0xff000000UL) >> 24))) #define __swab64(x) ((uint64_t)( \ (((uint64_t)(x) & (uint64_t)0x00000000000000ffULL) << 56) | \ (((uint64_t)(x) & (uint64_t)0x000000000000ff00ULL) << 40) | \ (((uint64_t)(x) & (uint64_t)0x0000000000ff0000ULL) << 24) | \ (((uint64_t)(x) & (uint64_t)0x00000000ff000000ULL) << 8) | \ (((uint64_t)(x) & (uint64_t)0x000000ff00000000ULL) >> 8) | \ (((uint64_t)(x) & (uint64_t)0x0000ff0000000000ULL) >> 24) | \ (((uint64_t)(x) & (uint64_t)0x00ff000000000000ULL) >> 40) | \ (((uint64_t)(x) & (uint64_t)0xff00000000000000ULL) >> 56))) #endif #define REMOTE_MSG_VERSION 1 #define ENDIAN_LOCAL 0xBADADBBD struct crm_remote_header_v0 { uint32_t endian; /* Detect messages from hosts with different endian-ness */ uint32_t version; uint64_t id; uint64_t flags; uint32_t size_total; uint32_t payload_offset; uint32_t payload_compressed; uint32_t payload_uncompressed; /* New fields get added here */ } __attribute__ ((packed)); static struct crm_remote_header_v0 * crm_remote_header(crm_remote_t * remote) { struct crm_remote_header_v0 *header = (struct crm_remote_header_v0 *)remote->buffer; if(remote->buffer_offset < sizeof(struct crm_remote_header_v0)) { return NULL; } else if(header->endian != ENDIAN_LOCAL) { uint32_t endian = __swab32(header->endian); CRM_LOG_ASSERT(endian == ENDIAN_LOCAL); if(endian != ENDIAN_LOCAL) { crm_err("Invalid message detected, endian mismatch: %lx is neither %lx nor the swab'd %lx", ENDIAN_LOCAL, header->endian, endian); return NULL; } header->id = __swab64(header->id); header->flags = __swab64(header->flags); header->endian = __swab32(header->endian); header->version = __swab32(header->version); header->size_total = __swab32(header->size_total); header->payload_offset = __swab32(header->payload_offset); header->payload_compressed = __swab32(header->payload_compressed); header->payload_uncompressed = __swab32(header->payload_uncompressed); } return header; } #ifdef HAVE_GNUTLS_GNUTLS_H int crm_initiate_client_tls_handshake(crm_remote_t * remote, int timeout_ms) { int rc = 0; int pollrc = 0; time_t start = time(NULL); do { rc = gnutls_handshake(*remote->tls_session); if (rc == GNUTLS_E_INTERRUPTED || rc == GNUTLS_E_AGAIN) { pollrc = crm_remote_ready(remote, 1000); if (pollrc < 0) { /* poll returned error, there is no hope */ rc = -1; } } } while (((time(NULL) - start) < (timeout_ms / 1000)) && (rc == GNUTLS_E_INTERRUPTED || rc == GNUTLS_E_AGAIN)); if (rc < 0) { crm_trace("gnutls_handshake() failed with %d", rc); } return rc; } void * crm_create_anon_tls_session(int csock, int type /* GNUTLS_SERVER, GNUTLS_CLIENT */ , void *credentials) { gnutls_session_t *session = gnutls_malloc(sizeof(gnutls_session_t)); gnutls_init(session, type); # ifdef HAVE_GNUTLS_PRIORITY_SET_DIRECT /* http://www.manpagez.com/info/gnutls/gnutls-2.10.4/gnutls_81.php#Echo-Server-with-anonymous-authentication */ gnutls_priority_set_direct(*session, "NORMAL:+ANON-DH", NULL); /* gnutls_priority_set_direct (*session, "NONE:+VERS-TLS-ALL:+CIPHER-ALL:+MAC-ALL:+SIGN-ALL:+COMP-ALL:+ANON-DH", NULL); */ # else gnutls_set_default_priority(*session); gnutls_kx_set_priority(*session, anon_tls_kx_order); # endif gnutls_transport_set_ptr(*session, (gnutls_transport_ptr_t) GINT_TO_POINTER(csock)); switch (type) { case GNUTLS_SERVER: gnutls_credentials_set(*session, GNUTLS_CRD_ANON, (gnutls_anon_server_credentials_t) credentials); break; case GNUTLS_CLIENT: gnutls_credentials_set(*session, GNUTLS_CRD_ANON, (gnutls_anon_client_credentials_t) credentials); break; } return session; } void * create_psk_tls_session(int csock, int type /* GNUTLS_SERVER, GNUTLS_CLIENT */ , void *credentials) { gnutls_session_t *session = gnutls_malloc(sizeof(gnutls_session_t)); gnutls_init(session, type); # ifdef HAVE_GNUTLS_PRIORITY_SET_DIRECT gnutls_priority_set_direct(*session, "NORMAL:+DHE-PSK:+PSK", NULL); # else gnutls_set_default_priority(*session); gnutls_kx_set_priority(*session, psk_tls_kx_order); # endif gnutls_transport_set_ptr(*session, (gnutls_transport_ptr_t) GINT_TO_POINTER(csock)); switch (type) { case GNUTLS_SERVER: gnutls_credentials_set(*session, GNUTLS_CRD_PSK, (gnutls_psk_server_credentials_t) credentials); break; case GNUTLS_CLIENT: gnutls_credentials_set(*session, GNUTLS_CRD_PSK, (gnutls_psk_client_credentials_t) credentials); break; } return session; } static int crm_send_tls(gnutls_session_t * session, const char *buf, size_t len) { const char *unsent = buf; int rc = 0; int total_send; if (buf == NULL) { return -1; } total_send = len; crm_trace("Message size: %llu", (unsigned long long) len); while (TRUE) { rc = gnutls_record_send(*session, unsent, len); if (rc == GNUTLS_E_INTERRUPTED || rc == GNUTLS_E_AGAIN) { crm_debug("Retry"); } else if (rc < 0) { crm_err("Connection terminated rc = %d", rc); break; } else if (rc < len) { crm_debug("Sent %d of %llu bytes", rc, (unsigned long long) len); len -= rc; unsent += rc; } else { crm_trace("Sent all %d bytes", rc); break; } } return rc < 0 ? rc : total_send; } #endif static int crm_send_plaintext(int sock, const char *buf, size_t len) { int rc = 0; const char *unsent = buf; int total_send; if (buf == NULL) { return -1; } total_send = len; crm_trace("Message on socket %d: size=%llu", sock, (unsigned long long) len); retry: rc = write(sock, unsent, len); if (rc < 0) { switch (errno) { case EINTR: case EAGAIN: crm_trace("Retry"); goto retry; default: crm_perror(LOG_ERR, "Could only write %d of the remaining %d bytes", rc, (int)len); break; } } else if (rc < len) { crm_trace("Only sent %d of %llu remaining bytes", rc, (unsigned long long) len); len -= rc; unsent += rc; goto retry; } else { crm_trace("Sent %d bytes: %.100s", rc, buf); } return rc < 0 ? rc : total_send; } static int crm_remote_sendv(crm_remote_t * remote, struct iovec * iov, int iovs) { int lpc = 0; int rc = -ESOCKTNOSUPPORT; for(; lpc < iovs; lpc++) { #ifdef HAVE_GNUTLS_GNUTLS_H if (remote->tls_session) { rc = crm_send_tls(remote->tls_session, iov[lpc].iov_base, iov[lpc].iov_len); } else if (remote->tcp_socket) { #else if (remote->tcp_socket) { #endif rc = crm_send_plaintext(remote->tcp_socket, iov[lpc].iov_base, iov[lpc].iov_len); } else { crm_err("Unsupported connection type"); } } return rc; } int crm_remote_send(crm_remote_t * remote, xmlNode * msg) { int rc = -1; static uint64_t id = 0; char *xml_text = dump_xml_unformatted(msg); struct iovec iov[2]; struct crm_remote_header_v0 *header; if (xml_text == NULL) { crm_err("Invalid XML, can not send msg"); return -1; } header = calloc(1, sizeof(struct crm_remote_header_v0)); iov[0].iov_base = header; iov[0].iov_len = sizeof(struct crm_remote_header_v0); iov[1].iov_base = xml_text; iov[1].iov_len = 1 + strlen(xml_text); id++; header->id = id; header->endian = ENDIAN_LOCAL; header->version = REMOTE_MSG_VERSION; header->payload_offset = iov[0].iov_len; header->payload_uncompressed = iov[1].iov_len; header->size_total = iov[0].iov_len + iov[1].iov_len; crm_trace("Sending len[0]=%d, start=%x", (int)iov[0].iov_len, *(int*)(void*)xml_text); rc = crm_remote_sendv(remote, iov, 2); if (rc < 0) { crm_err("Failed to send remote msg, rc = %d", rc); } free(iov[0].iov_base); free(iov[1].iov_base); return rc; } /*! * \internal * \brief handles the recv buffer and parsing out msgs. * \note new_data is owned by this function once it is passed in. */ xmlNode * crm_remote_parse_buffer(crm_remote_t * remote) { xmlNode *xml = NULL; struct crm_remote_header_v0 *header = crm_remote_header(remote); if (remote->buffer == NULL || header == NULL) { return NULL; } /* Support compression on the receiving end now, in case we ever want to add it later */ if (header->payload_compressed) { int rc = 0; unsigned int size_u = 1 + header->payload_uncompressed; char *uncompressed = calloc(1, header->payload_offset + size_u); crm_trace("Decompressing message data %d bytes into %d bytes", header->payload_compressed, size_u); rc = BZ2_bzBuffToBuffDecompress(uncompressed + header->payload_offset, &size_u, remote->buffer + header->payload_offset, header->payload_compressed, 1, 0); if (rc != BZ_OK && header->version > REMOTE_MSG_VERSION) { crm_warn("Couldn't decompress v%d message, we only understand v%d", header->version, REMOTE_MSG_VERSION); free(uncompressed); return NULL; } else if (rc != BZ_OK) { crm_err("Decompression failed: %s (%d)", bz2_strerror(rc), rc); free(uncompressed); return NULL; } CRM_ASSERT(size_u == header->payload_uncompressed); memcpy(uncompressed, remote->buffer, header->payload_offset); /* Preserve the header */ remote->buffer_size = header->payload_offset + size_u; free(remote->buffer); remote->buffer = uncompressed; header = crm_remote_header(remote); } /* take ownership of the buffer */ remote->buffer_offset = 0; CRM_LOG_ASSERT(remote->buffer[sizeof(struct crm_remote_header_v0) + header->payload_uncompressed - 1] == 0); xml = string2xml(remote->buffer + header->payload_offset); if (xml == NULL && header->version > REMOTE_MSG_VERSION) { crm_warn("Couldn't parse v%d message, we only understand v%d", header->version, REMOTE_MSG_VERSION); } else if (xml == NULL) { crm_err("Couldn't parse: '%.120s'", remote->buffer + header->payload_offset); } return xml; } /*! * \internal - * \brief Determine if a remote session has data to read + * \brief Wait for a remote session to have data to read * - * \retval 0, timeout occurred. - * \retval positive, data is ready to be read - * \retval negative, session has ended + * \param[in] remote Connection to check + * \param[in] total_timeout Maximum time (in ms) to wait + * + * \return Positive value if ready to be read, 0 on timeout, -errno on error */ int -crm_remote_ready(crm_remote_t * remote, int total_timeout /* ms */ ) +crm_remote_ready(crm_remote_t *remote, int total_timeout) { struct pollfd fds = { 0, }; int sock = 0; int rc = 0; time_t start; int timeout = total_timeout; #ifdef HAVE_GNUTLS_GNUTLS_H if (remote->tls_session) { void *sock_ptr = gnutls_transport_get_ptr(*remote->tls_session); sock = GPOINTER_TO_INT(sock_ptr); } else if (remote->tcp_socket) { #else if (remote->tcp_socket) { #endif sock = remote->tcp_socket; } else { crm_err("Unsupported connection type"); } if (sock <= 0) { crm_trace("No longer connected"); return -ENOTCONN; } start = time(NULL); errno = 0; do { fds.fd = sock; fds.events = POLLIN; /* If we got an EINTR while polling, and we have a * specific timeout we are trying to honor, attempt * to adjust the timeout to the closest second. */ if (errno == EINTR && (timeout > 0)) { timeout = total_timeout - ((time(NULL) - start) * 1000); if (timeout < 1000) { timeout = 1000; } } rc = poll(&fds, 1, timeout); } while (rc < 0 && errno == EINTR); - return rc; + return (rc < 0)? -errno : rc; } /*! * \internal * \brief Read bytes off non blocking remote connection. * * \note only use with NON-Blocking sockets. Should only be used after polling socket. * This function will return once max_size is met, the socket read buffer * is empty, or an error is encountered. * * \retval number of bytes received */ static size_t crm_remote_recv_once(crm_remote_t * remote) { int rc = 0; size_t read_len = sizeof(struct crm_remote_header_v0); struct crm_remote_header_v0 *header = crm_remote_header(remote); if(header) { /* Stop at the end of the current message */ read_len = header->size_total; } /* automatically grow the buffer when needed */ if(remote->buffer_size < read_len) { remote->buffer_size = 2 * read_len; crm_trace("Expanding buffer to %llu bytes", (unsigned long long) remote->buffer_size); remote->buffer = realloc_safe(remote->buffer, remote->buffer_size + 1); CRM_ASSERT(remote->buffer != NULL); } #ifdef HAVE_GNUTLS_GNUTLS_H if (remote->tls_session) { rc = gnutls_record_recv(*(remote->tls_session), remote->buffer + remote->buffer_offset, remote->buffer_size - remote->buffer_offset); if (rc == GNUTLS_E_INTERRUPTED) { rc = -EINTR; } else if (rc == GNUTLS_E_AGAIN) { rc = -EAGAIN; } else if (rc < 0) { crm_debug("TLS receive failed: %s (%d)", gnutls_strerror(rc), rc); rc = -pcmk_err_generic; } } else if (remote->tcp_socket) { #else if (remote->tcp_socket) { #endif errno = 0; rc = read(remote->tcp_socket, remote->buffer + remote->buffer_offset, remote->buffer_size - remote->buffer_offset); if(rc < 0) { rc = -errno; } } else { crm_err("Unsupported connection type"); return -ESOCKTNOSUPPORT; } /* process any errors. */ if (rc > 0) { remote->buffer_offset += rc; /* always null terminate buffer, the +1 to alloc always allows for this. */ remote->buffer[remote->buffer_offset] = '\0'; crm_trace("Received %u more bytes, %llu total", rc, (unsigned long long) remote->buffer_offset); } else if (rc == -EINTR || rc == -EAGAIN) { crm_trace("non-blocking, exiting read: %s (%d)", pcmk_strerror(rc), rc); } else if (rc == 0) { crm_debug("EOF encoutered after %llu bytes", (unsigned long long) remote->buffer_offset); return -ENOTCONN; } else { crm_debug("Error receiving message after %llu bytes: %s (%d)", (unsigned long long) remote->buffer_offset, pcmk_strerror(rc), rc); return -ENOTCONN; } header = crm_remote_header(remote); if(header) { if(remote->buffer_offset < header->size_total) { crm_trace("Read less than the advertised length: %llu < %u bytes", (unsigned long long) remote->buffer_offset, header->size_total); } else { crm_trace("Read full message of %llu bytes", (unsigned long long) remote->buffer_offset); return remote->buffer_offset; } } return -EAGAIN; } /*! * \internal * \brief Read data off the socket until at least one full message is present or timeout occures. * \retval TRUE message read * \retval FALSE full message not read */ gboolean crm_remote_recv(crm_remote_t * remote, int total_timeout /*ms */ , int *disconnected) { int rc; time_t start = time(NULL); int remaining_timeout = 0; if (total_timeout == 0) { total_timeout = 10000; } else if (total_timeout < 0) { total_timeout = 60000; } *disconnected = 0; remaining_timeout = total_timeout; while ((remaining_timeout > 0) && !(*disconnected)) { /* read some more off the tls buffer if we still have time left. */ crm_trace("waiting to receive remote msg, starting timeout %d, remaining_timeout %d", total_timeout, remaining_timeout); rc = crm_remote_ready(remote, remaining_timeout); if (rc == 0) { crm_err("poll timed out (%d ms) while waiting to receive msg", remaining_timeout); return FALSE; - } else if (rc == -EAGAIN) { - crm_trace("waiting for remote connection data (up to %dms)", - remaining_timeout); - } else if(rc < 0) { - crm_debug("poll() failed: %s (%d)", pcmk_strerror(rc), rc); + crm_debug("could not poll: %s (%d)", pcmk_strerror(rc), rc); } else { rc = crm_remote_recv_once(remote); if(rc > 0) { return TRUE; } else if (rc < 0) { crm_debug("recv() failed: %s (%d)", pcmk_strerror(rc), rc); } } if(rc == -ENOTCONN) { *disconnected = 1; return FALSE; } remaining_timeout = total_timeout - ((time(NULL) - start) * 1000); } return FALSE; } struct tcp_async_cb_data { gboolean success; int sock; void *userdata; void (*callback) (void *userdata, int sock); int timeout; /*ms */ time_t start; }; static gboolean check_connect_finished(gpointer userdata) { struct tcp_async_cb_data *cb_data = userdata; int rc = 0; int sock = cb_data->sock; int error = 0; fd_set rset, wset; socklen_t len = sizeof(error); struct timeval ts = { 0, }; if (cb_data->success == TRUE) { goto dispatch_done; } FD_ZERO(&rset); FD_SET(sock, &rset); wset = rset; crm_trace("fd %d: checking to see if connect finished", sock); rc = select(sock + 1, &rset, &wset, NULL, &ts); if (rc < 0) { rc = errno; if ((errno == EINPROGRESS) || (errno == EAGAIN)) { /* reschedule if there is still time left */ if ((time(NULL) - cb_data->start) < (cb_data->timeout / 1000)) { goto reschedule; } else { rc = -ETIMEDOUT; } } crm_trace("fd %d: select failed %d connect dispatch ", sock, rc); goto dispatch_done; } else if (rc == 0) { if ((time(NULL) - cb_data->start) < (cb_data->timeout / 1000)) { goto reschedule; } crm_debug("fd %d: timeout during select", sock); rc = -ETIMEDOUT; goto dispatch_done; } else { crm_trace("fd %d: select returned success", sock); rc = 0; } /* can we read or write to the socket now? */ if (FD_ISSET(sock, &rset) || FD_ISSET(sock, &wset)) { if (getsockopt(sock, SOL_SOCKET, SO_ERROR, &error, &len) < 0) { crm_trace("fd %d: call to getsockopt failed", sock); rc = -1; goto dispatch_done; } if (error) { crm_trace("fd %d: error returned from getsockopt: %d", sock, error); rc = -1; goto dispatch_done; } } else { crm_trace("neither read nor write set after select"); rc = -1; goto dispatch_done; } dispatch_done: if (!rc) { crm_trace("fd %d: connected", sock); /* Success, set the return code to the sock to report to the callback */ rc = cb_data->sock; cb_data->sock = 0; } else { close(sock); } if (cb_data->callback) { cb_data->callback(cb_data->userdata, rc); } free(cb_data); return FALSE; reschedule: /* will check again next interval */ return TRUE; } static int internal_tcp_connect_async(int sock, const struct sockaddr *addr, socklen_t addrlen, int timeout /* ms */ , int *timer_id, void *userdata, void (*callback) (void *userdata, int sock)) { int rc = 0; int flag = 0; int interval = 500; int timer; struct tcp_async_cb_data *cb_data = NULL; if ((flag = fcntl(sock, F_GETFL)) >= 0) { if (fcntl(sock, F_SETFL, flag | O_NONBLOCK) < 0) { crm_err("fcntl() write failed"); return -1; } } rc = connect(sock, addr, addrlen); if (rc < 0 && (errno != EINPROGRESS) && (errno != EAGAIN)) { return -1; } cb_data = calloc(1, sizeof(struct tcp_async_cb_data)); cb_data->userdata = userdata; cb_data->callback = callback; cb_data->sock = sock; cb_data->timeout = timeout; cb_data->start = time(NULL); if (rc == 0) { /* The connect was successful immediately, we still return to mainloop * and let this callback get called later. This avoids the user of this api * to have to account for the fact the callback could be invoked within this * function before returning. */ cb_data->success = TRUE; interval = 1; } /* Check connect finished is mostly doing a non-block poll on the socket * to see if we can read/write to it. Once we can, the connect has completed. * This method allows us to connect to the server without blocking mainloop. * * This is a poor man's way of polling to see when the connection finished. * At some point we should figure out a way to use a mainloop fd callback for this. * Something about the way mainloop is currently polling prevents this from working at the * moment though. */ crm_trace("fd %d: scheduling to check if connect finished in %dms second", sock, interval); timer = g_timeout_add(interval, check_connect_finished, cb_data); if (timer_id) { *timer_id = timer; } return 0; } static int internal_tcp_connect(int sock, const struct sockaddr *addr, socklen_t addrlen) { int flag = 0; int rc = connect(sock, addr, addrlen); if (rc == 0) { if ((flag = fcntl(sock, F_GETFL)) >= 0) { if (fcntl(sock, F_SETFL, flag | O_NONBLOCK) < 0) { crm_err("fcntl() write failed"); return -1; } } } return rc; } /*! * \internal * \brief tcp connection to server at specified port * \retval negative, failed to connect. * \retval positive, sock fd */ int crm_remote_tcp_connect_async(const char *host, int port, int timeout, /*ms */ int *timer_id, void *userdata, void (*callback) (void *userdata, int sock)) { char buffer[256]; struct addrinfo *res = NULL; struct addrinfo *rp = NULL; struct addrinfo hints; const char *server = host; int ret_ga; int sock = -1; /* getaddrinfo */ memset(&hints, 0, sizeof(struct addrinfo)); hints.ai_family = AF_UNSPEC; /* Allow IPv4 or IPv6 */ hints.ai_socktype = SOCK_STREAM; hints.ai_flags = AI_CANONNAME; crm_debug("Looking up %s", server); ret_ga = getaddrinfo(server, NULL, &hints, &res); if (ret_ga) { crm_err("getaddrinfo: %s", gai_strerror(ret_ga)); return -1; } if (!res || !res->ai_addr) { crm_err("getaddrinfo failed"); goto async_cleanup; } for (rp = res; rp != NULL; rp = rp->ai_next) { struct sockaddr *addr = rp->ai_addr; if (!addr) { continue; } if (rp->ai_canonname) { server = res->ai_canonname; } crm_debug("Got address %s for %s", server, host); /* create socket */ sock = socket(rp->ai_family, SOCK_STREAM, IPPROTO_TCP); if (sock == -1) { crm_err("Socket creation failed for remote client connection."); continue; } memset(buffer, 0, DIMOF(buffer)); if (addr->sa_family == AF_INET6) { struct sockaddr_in6 *addr_in = (struct sockaddr_in6 *)(void*)addr; addr_in->sin6_port = htons(port); inet_ntop(addr->sa_family, &addr_in->sin6_addr, buffer, DIMOF(buffer)); } else { struct sockaddr_in *addr_in = (struct sockaddr_in *)(void*)addr; addr_in->sin_port = htons(port); inet_ntop(addr->sa_family, &addr_in->sin_addr, buffer, DIMOF(buffer)); } crm_info("Attempting to connect to remote server at %s:%d", buffer, port); if (callback) { if (internal_tcp_connect_async (sock, rp->ai_addr, rp->ai_addrlen, timeout, timer_id, userdata, callback) == 0) { goto async_cleanup; /* Success for now, we'll hear back later in the callback */ } } else { if (internal_tcp_connect(sock, rp->ai_addr, rp->ai_addrlen) == 0) { break; /* Success */ } } close(sock); sock = -1; } async_cleanup: if (res) { freeaddrinfo(res); } return sock; } int crm_remote_tcp_connect(const char *host, int port) { return crm_remote_tcp_connect_async(host, port, -1, NULL, NULL, NULL); } /* Convert a struct sockaddr address to a string, IPv4 and IPv6: */ static char * get_ip_str(const struct sockaddr_storage * sa, char * s, size_t maxlen) { switch(((struct sockaddr *)sa)->sa_family) { case AF_INET: inet_ntop(AF_INET, &(((struct sockaddr_in *)sa)->sin_addr), s, maxlen); break; case AF_INET6: inet_ntop(AF_INET6, &(((struct sockaddr_in6 *)sa)->sin6_addr), s, maxlen); break; default: strncpy(s, "Unknown AF", maxlen); return NULL; } return s; } int crm_remote_accept(int ssock) { int csock = 0; int rc = 0; int flag = 0; unsigned laddr = 0; struct sockaddr_storage addr; char addr_str[INET6_ADDRSTRLEN]; #ifdef TCP_USER_TIMEOUT int optval; long sbd_timeout = crm_get_sbd_timeout(); #endif /* accept the connection */ laddr = sizeof(addr); memset(&addr, 0, sizeof(addr)); csock = accept(ssock, (struct sockaddr *)&addr, &laddr); get_ip_str(&addr, addr_str, INET6_ADDRSTRLEN); crm_info("New remote connection from %s", addr_str); if (csock == -1) { crm_err("accept socket failed"); return -1; } if ((flag = fcntl(csock, F_GETFL)) >= 0) { if ((rc = fcntl(csock, F_SETFL, flag | O_NONBLOCK)) < 0) { crm_err("fcntl() write failed"); close(csock); return rc; } } else { crm_err("fcntl() read failed"); close(csock); return flag; } #ifdef TCP_USER_TIMEOUT if (sbd_timeout > 0) { optval = sbd_timeout / 2; /* time to fail and retry before watchdog */ rc = setsockopt(csock, SOL_TCP, TCP_USER_TIMEOUT, &optval, sizeof(optval)); if (rc < 0) { crm_err("setting TCP_USER_TIMEOUT (%d) on client socket failed", optval); close(csock); return rc; } } #endif return csock; } diff --git a/tools/crm_report.in b/tools/crm_report.in index 848b6b9e9f..0a456af247 100755 --- a/tools/crm_report.in +++ b/tools/crm_report.in @@ -1,513 +1,515 @@ #!/bin/sh # Copyright (C) 2010 Andrew Beekhof # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public # License as published by the Free Software Foundation; either # version 2.1 of the License, or (at your option) any later version. # # This software is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # Note the quotes around `$TEMP': they are essential! TEMP=`getopt \ -o hv?xl:f:t:n:T:L:p:c:dSACHu:D:MVse: \ --long help,cts:,cts-log:,dest:,node:,nodes:,from:,to:,sos-mode,logfile:,as-directory,single-node,cluster:,user:,max-depth:,version,features,rsh: \ -n 'crm_report' -- "$@"` eval set -- "$TEMP" progname=$(basename "$0") rsh="ssh -T" times="" tests="" nodes="" compress=1 cluster="any" ssh_user="root" search_logs=1 report_data=`dirname $0` maxdepth=5 extra_logs="" sanitize_patterns="passw.*" log_patterns="CRIT: ERROR:" usage() { cat< "$l_base/$HALOG_F" fi for node in $nodes; do cat <$l_base/.env LABEL="$label" REPORT_HOME="$r_base" REPORT_MASTER="$host" REPORT_TARGET="$node" LOG_START=$start LOG_END=$end REMOVE=1 SANITIZE="$sanitize_patterns" CLUSTER=$cluster LOG_PATTERNS="$log_patterns" EXTRA_LOGS="$extra_logs" SEARCH_LOGS=$search_logs verbose=$verbose maxdepth=$maxdepth EOF if [ $host = $node ]; then cat <>$l_base/.env REPORT_HOME="$l_base" EOF cat $l_base/.env $report_data/report.common $report_data/report.collector > $l_base/collector bash $l_base/collector else cat $l_base/.env $report_data/report.common $report_data/report.collector \ | $rsh -l $ssh_user $node -- "mkdir -p $r_base; cat > $r_base/collector; bash $r_base/collector" | (cd $l_base && tar mxf -) fi done analyze $l_base > $l_base/$ANALYSIS_F if [ -f $l_base/$HALOG_F ]; then node_events $l_base/$HALOG_F > $l_base/$EVENTS_F fi for node in $nodes; do cat $l_base/$node/$ANALYSIS_F >> $l_base/$ANALYSIS_F if [ -s $l_base/$node/$EVENTS_F ]; then cat $l_base/$node/$EVENTS_F >> $l_base/$EVENTS_F elif [ -s $l_base/$HALOG_F ]; then awk "\$4==\"$nodes\"" $l_base/$EVENTS_F >> $l_base/$n/$EVENTS_F fi done log " " if [ $compress = 1 ]; then fname=`shrink $l_base` rm -rf $l_base log "Collected results are available in $fname" log " " log "Please create a bug entry at" log " http://developerbugs.linux-foundation.org/enter_bug.cgi?product=Pacemaker" log "Include a description of your problem and attach this tarball" log " " log "Thank you for taking time to create this report." else log "Collected results are available in $l_base" fi log " " } # # check if files have same content in the cluster # cibdiff() { d1=`dirname $1` d2=`dirname $2` if [ -f $d1/RUNNING -a -f $d2/RUNNING ] || [ -f $d1/STOPPED -a -f $d2/STOPPED ]; then if which crm_diff > /dev/null 2>&1; then crm_diff -c -n $1 -o $2 else info "crm_diff(8) not found, cannot diff CIBs" fi else echo "can't compare cibs from running and stopped systems" fi } diffcheck() { [ -f "$1" ] || { echo "$1 does not exist" return 1 } [ -f "$2" ] || { echo "$2 does not exist" return 1 } case `basename $1` in $CIB_F) cibdiff $1 $2;; $B_CONF) diff -u $1 $2;; # confdiff? *) diff -u $1 $2;; esac } # # remove duplicates if files are same, make links instead # consolidate() { for n in $NODES; do if [ -f $1/$2 ]; then rm $1/$n/$2 else mv $1/$n/$2 $1 fi ln -s ../$2 $1/$n done } analyze_one() { rc=0 node0="" for n in $NODES; do if [ "$node0" ]; then diffcheck $1/$node0/$2 $1/$n/$2 rc=$(($rc+$?)) else node0=$n fi done return $rc } analyze() { flist="$HOSTCACHE $MEMBERSHIP_F $CIB_F $CRM_MON_F $B_CONF logd.cf $SYSINFO_F" for f in $flist; do printf "Diff $f... " ls $1/*/$f >/dev/null 2>&1 || { echo "no $1/*/$f :/" continue } if analyze_one $1 $f; then echo "OK" [ "$f" != $CIB_F ] && consolidate $1 $f else echo "" fi done } do_cts() { test_sets=`echo $tests | tr ',' ' '` for test_set in $test_sets; do start_time=0 start_test=`echo $test_set | tr '-' ' ' | awk '{print $1}'` end_time=0 end_test=`echo $test_set | tr '-' ' ' | awk '{print $2}'` if [ x$end_test = x ]; then msg="Extracting test $start_test" label="CTS-$start_test-`date +"%b-%d-%Y"`" end_test=`expr $start_test + 1` else msg="Extracting tests $start_test to $end_test" label="CTS-$start_test-$end_test-`date +"%b-%d-%Y"`" end_test=`expr $end_test + 1` fi if [ $start_test = 0 ]; then start_pat="BEGINNING [0-9].* TESTS" else start_pat="Running test.*\[ *$start_test\]" fi if [ x$ctslog = x ]; then ctslog=`findmsg 1 "$start_pat"` if [ x$ctslog = x ]; then fatal "No CTS control file detected" else log "Using CTS control file: $ctslog" fi fi line=`grep -n "$start_pat" $ctslog | tail -1 | sed 's/:.*//'` if [ ! -z "$line" ]; then start_time=`linetime $ctslog $line` fi line=`grep -n "Running test.*\[ *$end_test\]" $ctslog | tail -1 | sed 's/:.*//'` if [ ! -z "$line" ]; then end_time=`linetime $ctslog $line` fi if [ -z "$nodes" ]; then nodes=`grep CTS: $ctslog | grep -v debug: | grep " \* " | sed s:.*\\\*::g | sort -u | tr '\\n' ' '` log "Calculated node list: $nodes" fi if [ $end_time -lt $start_time ]; then debug "Test didn't complete, grabbing everything up to now" end_time=`date +%s` fi if [ $start_time != 0 ];then log "$msg (`time2str $start_time` to `time2str $end_time`)" collect_data $label $start_time $end_time $ctslog else fatal "$msg failed: not found" fi done } node_names_from_xml() { awk ' /uname/ { for( i=1; i<=NF; i++ ) if( $i~/^uname=/ ) { sub("uname=.","",$i); sub("\".*","",$i); print $i; next; } } ' | tr '\n' ' ' } getnodes() { cluster="$1" # 1. Live (cluster nodes or Pacemaker Remote nodes) # TODO: This will not detect Pacemaker Remote nodes unless they # have ever had a permanent node attribute set, because it only # searches the nodes section. It should also search the config # for resources that create Pacemaker Remote nodes. cib_nodes=$(cibadmin -Ql -o nodes 2>/dev/null) if [ $? -eq 0 ]; then debug "Querying CIB for nodes" echo "$cib_nodes" | node_names_from_xml return fi # 2. Saved if [ -f "@CRM_CONFIG_DIR@/cib.xml" ]; then debug "Querying on-disk CIB for nodes" grep "node " "@CRM_CONFIG_DIR@/cib.xml" | node_names_from_xml return fi # 3. hostcache if [ -z "$HA_STATE_DIR" ]; then HA_STATE_DIR=/var/lib/heartbeat fi if [ -f "$HA_STATE_DIR/hostcache" ]; then debug "Reading nodes from $HA_STATE_DIR/hostcache" awk '{print $1}' "$HA_STATE_DIR/hostcache" return fi # 4. ha.cf if [ "x$cluster" = "xheartbeat" ]; then cluster_cf=$(find_cluster_cf $cluster) debug "Reading nodes from $cluster_cf" getcfvar $cluster node "$cluster_cf" return fi # 5. logs # TODO: This has multiple issues: # * It looks for messages from crm_update_peer(), which is used only by # heartbeat and legacy plugin clusters; it should work with CMAN and # corosync2 clusters as well. # * It does a findmsg for "crm_update_peer" (which will hit # "crm_update_peer_proc" etc.), but then greps for "crm_update_peer:". # * It always uses grep, even though $logfile might be compressed. # For this reason and efficiency, it would nice if findmsg could # optionally print the matches instead of the file names. # * It would be nice to skip this step for Pacemaker Remote nodes since their # logs will not have node names, but it is nontrivial to know that. # Cluster nodes generally won't get here, but stopped Pacemaker Remote # nodes will. logfile=$(findmsg 1 "crm_update_peer") debug "Looking for nodes in $logfile" if [ ! -z "$logfile" ]; then grep crm_update_peer: "$logfile" \ | sed s/.*crm_update_peer// \ | sed s/://g \ | awk '{print $2}' \ | grep -v "(null)" \ | sort -u \ | tr '\n' ' ' fi } if [ "x$tests" != "x" ]; then do_cts elif [ "x$start_time" != "x" ]; then masterlog="" if [ -z "$sanitize_patterns" ]; then log "WARNING: The tarball produced by this program may contain" log " sensitive information such as passwords." log "" log "We will attempt to remove such information if you use the" log "-p option. For example: -p \"pass.*\" -p \"user.*\"" log "" log "However, doing this may reduce the ability for the recipients" log "to diagnose issues and generally provide assistance." log "" log "IT IS YOUR RESPONSIBILITY TO PROTECT SENSITIVE DATA FROM EXPOSURE" log "" fi # If user didn't specify a cluster stack, make a best guess if possible. if [ -z "$cluster" ] || [ "$cluster" = "any" ]; then cluster=$(get_cluster_type) fi # If user didn't specify node(s), make a best guess if possible. if [ -z "$nodes" ]; then nodes=`getnodes $cluster` if [ -n "$nodes" ]; then log "Calculated node list: $nodes" else fatal "Cannot determine nodes; specify --nodes or --single-node" fi fi if echo $nodes | grep -qs $host then debug "We are a cluster node" else debug "We are a log master" masterlog=`findmsg 1 "crmd\\|CTS"` fi if [ -z $end_time ]; then end_time=`perl -e 'print time()'` fi label="pcmk-`date +"%a-%d-%b-%Y"`" log "Collecting data from $nodes (`time2str $start_time` to `time2str $end_time`)" collect_data $label $start_time $end_time $masterlog else fatal "Not sure what to do, no tests or time ranges to extract" fi # vim: set expandtab tabstop=8 softtabstop=4 shiftwidth=4 textwidth=80: diff --git a/tools/report.collector b/tools/report.collector index e75a79031e..83218ee1a0 100644 --- a/tools/report.collector +++ b/tools/report.collector @@ -1,821 +1,829 @@ # Copyright (C) 2007 Dejan Muhamedagic # Almost everything as part of hb_report # Copyright (C) 2010 Andrew Beekhof # Cleanups, refactoring, extensions # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public # License as published by the Free Software Foundation; either # version 2.1 of the License, or (at your option) any later version. # # This software is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # if echo $REPORT_HOME | grep -qs '^/' then debug "Using full path to working directory: $REPORT_HOME" else REPORT_HOME="$HOME/$REPORT_HOME" debug "Canonicalizing working directory path: $REPORT_HOME" fi detect_host findlogdcf() { for f in \ `test -x $CRM_DAEMON_DIR/ha_logd && which strings > /dev/null 2>&1 && strings $CRM_DAEMON_DIR/ha_logd | grep 'logd\.cf'` \ `for d; do echo $d/logd.cf $d/ha_logd.cf; done` do if [ -f "$f" ]; then echo $f debug "Located logd.cf at: $f" return 0 fi done debug "Could not determine logd.cf location" return 1 } # # find files newer than a and older than b # isnumber() { echo "$*" | grep -qs '^[0-9][0-9]*$' } touchfile() { t=`mktemp` && perl -e "\$file=\"$t\"; \$tm=$1;" -e 'utime $tm, $tm, $file;' && echo $t } find_files_clean() { [ -z "$from_stamp" ] || rm -f "$from_stamp" [ -z "$to_stamp" ] || rm -f "$to_stamp" from_stamp="" to_stamp="" } find_files() { dirs= from_time=$2 to_time=$3 for d in $1; do if [ -d $d ]; then dirs="$dirs $d" fi done if [ x"$dirs" = x ]; then return fi isnumber "$from_time" && [ "$from_time" -gt 0 ] || { warning "sorry, can't find files in [ $1 ] based on time if you don't supply time" return } trap find_files_clean 0 if ! from_stamp=`touchfile $from_time`; then warning "sorry, can't create temporary file for find_files" return fi findexp="-newer $from_stamp" if isnumber "$to_time" && [ "$to_time" -gt 0 ]; then if ! to_stamp=`touchfile $to_time`; then warning "sorry, can't create temporary file for find_files" find_files_clean return fi findexp="$findexp ! -newer $to_stamp" fi find $dirs -type f $findexp find_files_clean trap "" 0 } # # check permissions of files/dirs # pl_checkperms() { perl -e ' # check permissions and ownership # uid and gid are numeric # everything must match exactly # no error checking! (file should exist, etc) ($filename, $perms, $in_uid, $in_gid) = @ARGV; ($mode,$uid,$gid) = (stat($filename))[2,4,5]; $p=sprintf("%04o", $mode & 07777); $p ne $perms and exit(1); $uid ne $in_uid and exit(1); $gid ne $in_gid and exit(1); ' $* } num_id() { getent $1 $2 | awk -F: '{print $3}' } chk_id() { [ "$2" ] && return 0 echo "$1: id not found" return 1 } check_perms() { while read type f p uid gid; do if [ ! -e "$f" ]; then echo "$f doesn't exist" continue elif [ ! -$type "$f" ]; then echo "$f has wrong type" continue fi n_uid=`num_id passwd $uid` chk_id "$uid" "$n_uid" || continue n_gid=`num_id group $gid` chk_id "$gid" "$n_gid" || continue pl_checkperms $f $p $n_uid $n_gid || { echo "wrong permissions or ownership for $f:" ls -ld $f } done } # # coredumps # findbinary() { random_binary=`which cat 2>/dev/null` # suppose we are lucky binary=`gdb $random_binary $1 < /dev/null 2>/dev/null | grep 'Core was generated' | awk '{print $5}' | sed "s/^.//;s/[.':]*$//"` if [ x = x"$binary" ]; then debug "Could not detect the program name for core $1 from the gdb output; will try with file(1)" binary=$(file $1 | awk '/from/{ for( i=1; i<=NF; i++ ) if( $i == "from" ) { print $(i+1) break } }') binary=`echo $binary | tr -d "'"` binary=$(echo $binary | tr -d '`') if [ "$binary" ]; then binary=`which $binary 2>/dev/null` fi fi if [ x = x"$binary" ]; then warning "Could not find the program path for core $1" return fi fullpath=`which $binary 2>/dev/null` if [ x = x"$fullpath" ]; then if [ -x $CRM_DAEMON_DIR/$binary ]; then echo $CRM_DAEMON_DIR/$binary debug "Found the program at $CRM_DAEMON_DIR/$binary for core $1" else warning "Could not find the program path for core $1" fi else echo $fullpath debug "Found the program at $fullpath for core $1" fi } getbt() { which gdb > /dev/null 2>&1 || { warning "Please install gdb to get backtraces" return } for corefile; do absbinpath=`findbinary $corefile` [ x = x"$absbinpath" ] && continue echo "====================== start backtrace ======================" ls -l $corefile # Summary first... gdb -batch -n -quiet -ex ${BT_OPTS:-"thread apply all bt"} -ex quit \ $absbinpath $corefile 2>/dev/null echo "====================== start detail ======================" # Now the unreadable details... gdb -batch -n -quiet -ex ${BT_OPTS:-"thread apply all bt full"} -ex quit \ $absbinpath $corefile 2>/dev/null echo "======================= end backtrace =======================" done } dump_status_and_config() { crm_mon -1 2>&1 | grep -v '^Last upd' > $target/$CRM_MON_F cibadmin -Ql 2>/dev/null > $target/${CIB_F}.live } getconfig() { cluster=$1; shift; target=$1; shift; for cf in $*; do if [ -e "$cf" ]; then cp -a "$cf" $target/ fi done if which crm_uuid >/dev/null 2>&1; then crm_uuid -r > $target/$HB_UUID_F 2>&1 fi if is_running crmd; then dump_status_and_config case $cluster in cman) crm_node -p --cman > $target/$MEMBERSHIP_F 2>&1;; corosync|openais) crm_node -p --openais > $target/$MEMBERSHIP_F 2>&1;; heartbeat) crm_node -p --heartbeat > $target/$MEMBERSHIP_F 2>&1;; *) crm_node -p > $target/$MEMBERSHIP_F 2>&1;; esac echo "$host" > $target/RUNNING elif is_running pacemaker_remoted; then dump_status_and_config echo "$host" > $target/RUNNING else echo "$host" > $target/STOPPED fi +} + +get_readable_cib() { + target="$1"; shift; if [ -f "$target/$CIB_F" ]; then - crm_verify -V -x $target/$CIB_F >$target/$CRM_VERIFY_F 2>&1 + crm_verify -V -x "$target/$CIB_F" >"$target/$CRM_VERIFY_F" 2>&1 if which crm >/dev/null 2>&1 ; then - CIB_file=$target/$CIB_F crm configure show >$target/$CIB_TXT_F 2>&1 + CIB_file="$target/$CIB_F" crm configure show >"$target/$CIB_TXT_F" 2>&1 elif which pcs >/dev/null 2>&1 ; then - pcs config -f $target/$CIB_F >$target/$CIB_TXT_F 2>&1 + pcs config -f "$target/$CIB_F" >"$target/$CIB_TXT_F" 2>&1 fi fi } # # remove values of sensitive attributes # # this is not proper xml parsing, but it will work under the # circumstances sanitize_xml_attrs() { sed $( for patt in $SANITIZE; do echo "-e /name=\"$patt\"/s/value=\"[^\"]*\"/value=\"****\"/" done ) } sanitize_hacf() { awk ' $1=="stonith_host"{ for( i=5; i<=NF; i++ ) $i="****"; } {print} ' } sanitize_one_clean() { [ -z "$tmp" ] || rm -f "$tmp" tmp="" [ -z "$ref" ] || rm -f "$ref" ref="" } sanitize() { file=$1 compress="" if [ -z "$SANITIZE" ]; then return fi echo $file | grep -qs 'gz$' && compress=gzip echo $file | grep -qs 'bz2$' && compress=bzip2 if [ "$compress" ]; then decompress="$compress -dc" else compress=cat decompress=cat fi trap sanitize_one_clean 0 tmp=`mktemp` ref=`mktemp` if [ -z "$tmp" -o -z "$ref" ]; then sanitize_one_clean fatal "cannot create temporary files" fi touch -r $file $ref # save the mtime if [ "`basename $file`" = ha.cf ]; then sanitize_hacf else $decompress | sanitize_xml_attrs | $compress fi < $file > $tmp mv $tmp $file # note: cleaning $tmp up is still needed even after it's renamed # because its temp directory is still there. touch -r $ref $file sanitize_one_clean trap "" 0 } # # get some system info # distro() { if which lsb_release >/dev/null 2>&1 then lsb_release -d | sed -e 's/^Description:\s*//' debug "Using lsb_release for distribution info" return fi relf=`ls /etc/debian_version 2>/dev/null` || relf=`ls /etc/slackware-version 2>/dev/null` || relf=`ls -d /etc/*-release 2>/dev/null` && { for f in $relf; do test -f $f && { echo "`ls $f` `cat $f`" debug "Found `echo $relf | tr '\n' ' '` distribution release file(s)" return } done } warning "No lsb_release, no /etc/*-release, no /etc/debian_version: no distro information" } pkg_ver() { if which dpkg >/dev/null 2>&1 ; then pkg_mgr="deb" elif which rpm >/dev/null 2>&1 ; then pkg_mgr="rpm" elif which pkg_info >/dev/null 2>&1 ; then pkg_mgr="pkg_info" elif which pkginfo >/dev/null 2>&1 ; then pkg_mgr="pkginfo" else warning "Unknown package manager" return fi debug "The package manager is: $pkg_mgr" echo "The package manager is: $pkg_mgr" echo "Installed packages:" case $pkg_mgr in deb) dpkg-query -f '${Package} ${Version} ${Architecture}\n' -W | sort echo for pkg in $*; do if dpkg-query -W $pkg 2>/dev/null ; then debug "Verifying installation of: $pkg" echo "Verifying installation of: $pkg" debsums -s $pkg 2>/dev/null fi done ;; rpm) rpm -qa --qf '%{name} %{version}-%{release} - %{distribution} %{arch}\n' | sort echo for pkg in $*; do if rpm -q $pkg >/dev/null 2>&1 ; then debug "Verifying installation of: $pkg" echo "Verifying installation of: $pkg" rpm --verify $pkg 2>&1 fi done ;; pkg_info) pkg_info ;; pkginfo) pkginfo | awk '{print $3}' # format? ;; esac } getbacktraces() { debug "Looking for backtraces: $*" flist=$( for f in `find_files "$CRM_CORE_DIRS" $1 $2`; do bf=`basename $f` test `expr match $bf core` -gt 0 && echo $f done) if [ "$flist" ]; then for core in $flist; do log "Found core file: `ls -al $core`" done # Make a copy of them in case we need more data later # Luckily they compress well mkdir cores >/dev/null 2>&1 cp -a $flist cores/ shrink cores rm -rf cores # Now get as much as we can from them automagically for f in $flist; do getbt $f done fi } getpeinputs() { if [ -n "$PE_STATE_DIR" ]; then flist=$( find_files "$PE_STATE_DIR" "$1" "$2" | sed "s,`dirname $PE_STATE_DIR`/,,g" ) if [ "$flist" ]; then (cd $(dirname "$PE_STATE_DIR") && tar cf - $flist) | (cd "$3" && tar xf -) debug "found `echo $flist | wc -w` pengine input files in $PE_STATE_DIR" fi fi } getblackboxes() { flist=$( find_files $BLACKBOX_DIR $1 $2 ) for bb in $flist; do bb_short=`basename $bb` qb-blackbox $bb > $3/${bb_short}.blackbox 2>&1 info "Extracting contents of blackbox: $bb_short" done } # # some basic system info and stats # sys_info() { cluster=$1; shift echo "Platform: `uname`" echo "Kernel release: `uname -r`" echo "Architecture: `uname -m`" if [ `uname` = Linux ]; then echo "Distribution: `distro`" fi echo cibadmin --version 2>&1 | head -1 cibadmin -! 2>&1 case $cluster in openais) echo openais # version: how? ;; cman) cman_tool -V 2>&1 | head -1 /usr/sbin/corosync -v 2>&1 | head -1 ;; corosync) /usr/sbin/corosync -v 2>&1 | head -1 ;; heartbeat) echo heartbeat $($CRM_DAEMON_DIR/heartbeat -V 2>&1) ;; esac # Cluster glue version hash (if available) stonith -V 2>/dev/null # Resource agents version hash echo "resource-agents: `grep 'Build version:' /usr/lib/ocf/resource.d/heartbeat/.ocf-shellfuncs`" echo pkg_ver $* } sys_stats() { set -x uname -n uptime ps axf ps auxw top -b -n 1 ifconfig -a ip addr list netstat -i arp -an test -d /proc && { cat /proc/cpuinfo } lsscsi lspci mount df set +x } dlm_dump() { if which dlm_tool >/dev/null 2>&1 ; then if is_running dlm_controld; then echo "--- Lockspace overview:" dlm_tool ls -n echo "---Lockspace history:" dlm_tool dump echo "---Lockspace status:" dlm_tool status dlm_tool status -v echo "---Lockspace config:" dlm_tool dump_config dlm_tool log_plock dlm_tool ls | grep name | while read X N ; do echo "--- Lockspace $N:" dlm_tool lockdump "$N" dlm_tool lockdebug -svw "$N" done fi fi } iscfvarset() { test "`getcfvar $1 $2`" } iscfvartrue() { getcfvar $1 $2 $3 | egrep -qsi "^(true|y|yes|on|1)" } uselogd() { cf_file=$2 case $1 in heartbeat) iscfvartrue $1 use_logd $cf_file && return 0 # if use_logd true iscfvarset $1 logfacility $cf_file || iscfvarset $1 logfile $cf_file || iscfvarset $1 debugfile $cf_file || return 0 # or none of the log options set false ;; *) iscfvartrue $1 use_logd $cf_file ;; esac } get_logfiles() { cf_type=$1 cf_file="$2" cf_logd="$3" facility_var="logfacility" if [ -f "$cf_logd" ]; then # TODO: this call is broken, it expects args if uselogd; then cf_file="$cf_logd" cf_type="logd" fi fi case $cf_type in cman|openais|corosync) if [ -f "$cf_file" ]; then debug "Reading $cf_type log settings from $cf_file" if iscfvartrue $cf_type to_syslog "$cf_file"; then facility_var=syslog_facility fi if iscfvartrue $cf_type to_logfile "$cf_file"; then logfile=$(getcfvar $cf_type logfile "$cf_file") fi fi ;; heartbeat|logd) if [ -f "$cf_file" ]; then debug "Reading $cf_type log settings from $cf_file" if iscfvartrue $cf_type debug "$cf_file"; then logfile=$(getcfvar $cf_type debugfile "$cf_file") else logfile=$(getcfvar $cf_type logfile "$cf_file") fi fi ;; esac if [ -z "$logfile" ]; then logfile="/var/log/pacemaker.log" debug "Log settings not found for cluster type $cf_type, assuming $logfile" fi if [ -f "$logfile" ]; then echo $logfile fi if [ "x$facility" = x ]; then facility=`getcfvar $cf_type $facility_var $cf_file` [ "" = "$facility" ] && facility="daemon" fi # Always include system logs (if we can find them) msg="Mark:pcmk:`perl -e 'print time()'`" logger -p $facility.info $msg >/dev/null 2>&1 sleep 2 # Give syslog time to catch up in case it's busy findmsg 1 "$msg" # Look for detail logs: # - initial pacemakerd logs and tracing might go to a different file pattern="Starting Pacemaker" # - make sure we get something from the Policy Engine pattern="$pattern\\|Calculated Transition" # - cib and lrmd updates (helpful on non-DC nodes or when the cluster has been up for a long time) pattern="$pattern\\|cib_perform_op\\|process_lrm_event" # - pacemaker_remote might use a different file pattern="$pattern\\|pacemaker_remoted:" findmsg 3 "$pattern" } essential_files() { cat< $SYSINFO_F essential_files $cluster | check_perms > $PERMISSIONS_F 2>&1 getconfig $cluster "$REPORT_HOME/$REPORT_TARGET" "$cluster_cf" "$logd_cf" "$CRM_CONFIG_DIR/$CIB_F" "$HA_STATE_DIR/hostcache" "/etc/drbd.conf" "/etc/drbd.d" "/etc/booth" getpeinputs $LOG_START $LOG_END $REPORT_HOME/$REPORT_TARGET getbacktraces $LOG_START $LOG_END > $REPORT_HOME/$REPORT_TARGET/$BT_F getblackboxes $LOG_START $LOG_END $REPORT_HOME/$REPORT_TARGET case $cluster in cman|corosync) if is_running corosync; then corosync-blackbox >corosync-blackbox-live.txt 2>&1 # corosync-fplay > corosync-blackbox.txt tool=`pickfirst corosync-objctl corosync-cmapctl` case $tool in *objctl) $tool -a > corosync.dump 2>/dev/null;; *cmapctl) $tool > corosync.dump 2>/dev/null;; esac corosync-quorumtool -s -i > corosync.quorum 2>&1 fi ;; esac dc=`crm_mon -1 2>/dev/null | awk '/Current DC/ {print $3}'` if [ "$REPORT_TARGET" = "$dc" ]; then echo "$REPORT_TARGET" > DC fi dlm_dump > $DLM_DUMP_F 2>&1 sys_stats > $SYSSTATS_F 2>&1 debug "Sanitizing files: $SANITIZE" # # replace sensitive info with '****' # cf="" if [ ! -z "$cluster_cf" ]; then cf=`basename $cluster_cf` fi -for f in $cf $CIB_F $CIB_TXT_F $CIB_F.live pengine/*; do +for f in "$cf" "$CIB_F" "$CIB_F.live" pengine/*; do if [ -f "$f" ]; then - sanitize $f + sanitize "$f" fi done +# For convenience, generate human-readable version of CIB and any XML errors +# in it (AFTER sanitizing, so we don't need to sanitize this output) +get_readable_cib "$REPORT_HOME/$REPORT_TARGET" + # Grab logs start=`date -d @${LOG_START} +"%F %T"` end=`date -d @${LOG_END} +"%F %T"` debug "Gathering logs from $start to $end: $logfiles $EXTRA_LOGS" trap '[ -z "$pattfile" ] || rm -f "$pattfile"' 0 pattfile=`mktemp` || fatal "cannot create temporary files" for p in $LOG_PATTERNS; do echo "$p" done > $pattfile for l in $logfiles $EXTRA_LOGS; do b="$(basename $l).extract.txt" if [ ! -f "$l" ]; then # Not a file continue elif [ -f "$b" ]; then # We already have it continue fi dumplogset "$l" $LOG_START $LOG_END > "$b" sanitize "$b" echo "Log patterns $REPORT_TARGET:" > $ANALYSIS_F grep -f "$pattfile" "$b" >> $ANALYSIS_F done which journalctl > /dev/null 2>&1 if [ $? = 0 ]; then log "Including segment [$LOG_START-$LOG_END] from journald" journalctl --since "$start" --until "$end" > journal.log cat journal.log | grep -f $pattfile >> $ANALYSIS_F fi rm -f $pattfile trap "" 0 # Purge files containing no information for f in `ls -1`; do if [ -d "$f" ]; then continue elif [ ! -s "$f" ]; then case $f in *core*) log "Detected empty core file: $f";; *) debug "Removing empty file: `ls -al $f`" rm -f $f ;; esac fi done # Parse for events for l in $logfiles $EXTRA_LOGS; do b="$(basename $l).extract.txt" node_events "$b" > $EVENTS_F # Link the first logfile to a standard name if it doesn't yet exist if [ -e "$b" -a ! -e "$HALOG_F" ]; then ln -s "$b" "$HALOG_F" fi done if [ -e $REPORT_HOME/.env ]; then debug "Localhost: $REPORT_MASTER $REPORT_TARGET" elif [ "$REPORT_MASTER" != "$REPORT_TARGET" ]; then debug "Streaming report back to $REPORT_MASTER" (cd $REPORT_HOME && tar cf - $REPORT_TARGET) if [ "$REMOVE" = "1" ]; then cd rm -rf $REPORT_HOME fi fi # vim: set expandtab tabstop=8 softtabstop=4 shiftwidth=4 textwidth=80: diff --git a/tools/report.common.in b/tools/report.common.in index f9ed6f5355..9b43486271 100644 --- a/tools/report.common.in +++ b/tools/report.common.in @@ -1,886 +1,902 @@ # Copyright (C) 2007 Dejan Muhamedagic # Almost everything as part of hb_report # Copyright (C) 2010 Andrew Beekhof # Cleanups, refactoring, extensions # # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public # License as published by the Free Software Foundation; either # version 2.1 of the License, or (at your option) any later version. # # This software is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # host=`uname -n` shorthost=`echo $host | sed s:\\\\..*::` if [ -z $verbose ]; then verbose=0 fi # Target Files EVENTS_F=events.txt ANALYSIS_F=analysis.txt DESCRIPTION_F=description.txt HALOG_F=cluster-log.txt BT_F=backtraces.txt SYSINFO_F=sysinfo.txt SYSSTATS_F=sysstats.txt DLM_DUMP_F=dlm_dump.txt CRM_MON_F=crm_mon.txt MEMBERSHIP_F=members.txt HB_UUID_F=hb_uuid.txt HOSTCACHE=hostcache CRM_VERIFY_F=crm_verify.txt PERMISSIONS_F=permissions.txt CIB_F=cib.xml CIB_TXT_F=cib.txt EVENT_PATTERNS=" state do_state_transition membership pcmk_peer_update.*(lost|memb): quorum crmd.*crm_update_quorum|crmd.*ais.disp.*quorum.(lost|ac?quir) pause Process.pause.detected resources lrmd.*rsc:(start|stop) stonith te_fence_node|stonith-ng.*log_oper.*report|stonithd.*(requests|(Succeeded|Failed).to.STONITH|result=) start_stop sutdown.decision|Starting.heartbeat|Corosync.Cluster.Engine|corosync.*Initializing.transport|Executive.Service.RELEASE|crm_shutdown:.Requesting.shutdown|pcmk_shutdown:.Shutdown.complete " # superset of all packages of interest on all distros # (the package manager will be used to validate the installation # of any of these packages that are installed) PACKAGES="pacemaker pacemaker-libs pacemaker-cluster-libs libpacemaker3 pacemaker-remote pacemaker-pygui pacemaker-pymgmt pymgmt-client openais libopenais2 libopenais3 corosync libcorosync4 resource-agents cluster-glue-libs cluster-glue libglue2 ldirectord heartbeat heartbeat-common heartbeat-resources libheartbeat2 ocfs2-tools ocfs2-tools-o2cb ocfs2console ocfs2-kmp-default ocfs2-kmp-pae ocfs2-kmp-xen ocfs2-kmp-debug ocfs2-kmp-trace drbd drbd-kmp-xen drbd-kmp-pae drbd-kmp-default drbd-kmp-debug drbd-kmp-trace drbd-heartbeat drbd-pacemaker drbd-utils drbd-bash-completion drbd-xen lvm2 lvm2-clvm cmirrord libdlm libdlm2 libdlm3 hawk ruby lighttpd kernel-default kernel-pae kernel-xen glibc " # Potential locations of system log files SYSLOGS=" /var/log/* /var/logs/* /var/syslog/* /var/adm/* /var/log/ha/* /var/log/cluster/* " +# Whether pacemaker_remoted was found (0 = yes, 1 = no, -1 = haven't looked yet) +REMOTED_STATUS=-1 + # # keep the user posted # record() { if [ x != x"$REPORT_HOME" -a -d "${REPORT_HOME}/$shorthost" ]; then rec="${REPORT_HOME}/$shorthost/report.out" elif [ x != x"${l_base}" -a -d "${l_base}" ]; then rec="${l_base}/report.summary" else rec="/dev/null" fi printf "%-10s $*\n" "$shorthost:" 2>&1 >> "${rec}" } log() { printf "%-10s $*\n" "$shorthost:" 1>&2 record "$*" } debug() { if [ $verbose -gt 0 ]; then log "Debug: $*" else record "Debug: $*" fi } info() { log "$*" } warning() { log "WARN: $*" } fatal() { log "ERROR: $*" exit 1 } is_running() { ps -ef | egrep -qs $(echo "$1" | sed -e 's/^\(.\)/[\1]/') } has_remoted() { - # TODO: the binary might be elsewhere - if which pacemaker_remoted >/dev/null 2>&1; then - return 0 - elif [ -x "@sbindir@/pacemaker_remoted" ]; then - return 0 + if [ $REMOTED_STATUS -eq -1 ]; then + REMOTED_STATUS=1 + if which pacemaker_remoted >/dev/null 2>&1; then + REMOTED_STATUS=0 + elif [ -x "@sbindir@/pacemaker_remoted" ]; then + REMOTED_STATUS=0 + else + # @TODO: the binary might be elsewhere, + # but a global search is too expensive + for d in /{usr,opt}/{local/,}{s,}bin; do + if [ -x "${d}/pacemaker_remoted" ]; then + REMOTED_STATUS=0 + fi + done + fi fi - return 1 + return $REMOTED_STATUS } # found_dir found_dir() { echo "$2" info "Pacemaker $1 found in: $2" } detect_daemon_dir() { info "Searching for where Pacemaker daemons live... this may take a while" for d in \ {/usr,/usr/local,/opt/local,@exec_prefix@}/{libexec,lib64,lib}/{pacemaker,heartbeat} do # pacemaker and pacemaker-cts packages can install to daemon directory, # so check for a file from each if [ -e $d/pengine ] || [ -e $d/lrmd_test ]; then found_dir "daemons" "$d" return fi done + # Pacemaker Remote nodes don't need to install daemons + if has_remoted; then + info "Pacemaker daemons not found (this appears to be a Pacemaker Remote node)" + return + fi + for f in $(find / -maxdepth $maxdepth -type f -name pengine -o -name lrmd_test); do d=$(dirname "$f") found_dir "daemons" "$d" return done - # Pacemaker Remote nodes don't need to install daemons - if has_remoted; then - info "Not found (this appears to be a Pacemaker Remote node)" - else - fatal "Pacemaker daemons not found (nonstandard installation?)" - fi + fatal "Pacemaker daemons not found (nonstandard installation?)" } detect_cib_dir() { for d in $local_state_dir/lib/{pacemaker/cib,heartbeat/crm}; do if [ "-f $d/cib.xml" ]; then found_dir "config files" "$d" return fi done + # Pacemaker Remote nodes don't need a CIB + if has_remoted; then + info "Pacemaker config not found (this appears to be a Pacemaker Remote node)" + return + fi + info "Searching for where Pacemaker keeps config information... this may take a while" # TODO: What about false positives where someone copied the CIB? for f in $(find / -maxdepth $maxdepth -type f -name cib.xml); do d=$(dirname $f) found_dir "config files" "$d" return done - # Pacemaker Remote nodes don't need a CIB - if has_remoted; then - info "Not found (this appears to be a Pacemaker Remote node)" - else - warning "Pacemaker config not found (nonstandard installation?)" - fi + warning "Pacemaker config not found (nonstandard installation?)" } detect_state_dir() { if [ -n "$CRM_CONFIG_DIR" ]; then # Assume new layout # $local_state_dir/lib/pacemaker/(cib,pengine,blackbox,cores) dirname "$CRM_CONFIG_DIR" # Pacemaker Remote nodes might not have a CRM_CONFIG_DIR elif [ -d "$local_state_dir/lib/pacemaker" ]; then echo $local_state_dir/lib/pacemaker fi } detect_pe_dir() { config_root="$1" d="$config_root/pengine" if [ -d "$d" ]; then found_dir "policy engine inputs" "$d" return fi + if has_remoted; then + info "Pacemaker policy engine inputs not found (this appears to be a Pacemaker Remote node)" + return + fi + info "Searching for where Pacemaker keeps Policy Engine inputs... this may take a while" for d in $(find / -maxdepth $maxdepth -type d -name pengine); do found_dir "policy engine inputs" "$d" return done - if has_remoted; then - info "Not found (this appears to be a Pacemaker Remote node)" - else - fatal "Pacemaker policy engine inputs not found (nonstandard installation?)" - fi + fatal "Pacemaker policy engine inputs not found (nonstandard installation?)" } detect_host() { local_state_dir=@localstatedir@ if [ -d $local_state_dir/run ]; then CRM_STATE_DIR=$local_state_dir/run/crm else info "Searching for where Pacemaker keeps runtime data... this may take a while" for d in `find / -maxdepth $maxdepth -type d -name run`; do local_state_dir=`dirname $d` CRM_STATE_DIR=$d/crm break done info "Found: $CRM_STATE_DIR" fi debug "Machine runtime directory: $local_state_dir" debug "Pacemaker runtime data located in: $CRM_STATE_DIR" CRM_DAEMON_DIR=$(detect_daemon_dir) CRM_CONFIG_DIR=$(detect_cib_dir) config_root=$(detect_state_dir) # Older versions had none BLACKBOX_DIR=$config_root/blackbox debug "Pacemaker blackboxes (if any) located in: $BLACKBOX_DIR" PE_STATE_DIR=$(detect_pe_dir "$config_root") HA_STATE_DIR=$local_state_dir/lib/heartbeat debug "Assuming Heartbeat state files, if any, are located in: $HA_STATE_DIR" CRM_CORE_DIRS="" for d in $config_root/cores $HA_STATE_DIR/cores $local_state_dir/lib/corosync $local_state_dir/lib/openais; do if [ -d $d ]; then CRM_CORE_DIRS="$CRM_CORE_DIRS $d" fi done debug "Core files located under: $CRM_CORE_DIRS" } time2str() { perl -e "use POSIX; print strftime('%x %X',localtime($1));" } get_time() { perl -e "\$time=\"$*\";" -e ' $unix_tm = 0; eval "use Date::Parse"; if (index($time, ":") < 0) { } elsif (!$@) { $unix_tm = str2time($time); } else { eval "use Date::Manip"; if (!$@) { $unix_tm = UnixDate(ParseDateString($time), "%s"); } } if ($unix_tm != "") { print int($unix_tm); } else { print ""; } ' } get_time_() { warning "Unknown time format used by: $*" } get_time_syslog() { awk '{print $1,$2,$3}' } get_time_legacy() { awk '{print $2}' | sed 's/_/ /' } get_time_iso8601() { awk '{print $1}' } get_time_format_for_string() { l="$*" t=$(get_time `echo $l | get_time_syslog`) if [ "x$t" != x ]; then echo syslog return fi t=$(get_time `echo $l | get_time_iso8601`) if [ "x$t" != x ]; then echo iso8601 return fi t=$(get_time `echo $l | get_time_legacy`) if [ "x$t" != x ]; then echo legacy return fi } get_time_format() { t=0 l="" func="" trycnt=10 while [ $trycnt -gt 0 ] && read l; do func=$(get_time_format_for_string $l) if [ "x$func" != x ]; then break fi trycnt=$(($trycnt-1)) done #debug "Logfile uses the $func time format" echo $func } get_first_time() { l="" format=$1 while read l; do t=$(echo $l | get_time_$format) ts=$(get_time $t) if [ "x$ts" != x ]; then echo "$ts" return fi done } get_last_time() { l="" best=`date +%s` # Now format=$1 while read l; do t=$(echo $l | get_time_$format) ts=$(get_time $t) if [ "x$ts" != x ]; then best=$ts fi done echo $best } linetime() { l=`tail -n +$2 $1 | grep -a ":[0-5][0-9]:" | head -n 1` format=`get_time_format_for_string $l` t=`echo $l | get_time_$format` get_time "$t" } # # findmsg # # Print the names of up to system logs that contain , # ordered by most recently modified. # findmsg() { max=$1 pattern="$2" found=0 # List all potential system logs ordered by most recently modified. candidates=$(ls -1td $SYSLOGS 2>/dev/null) if [ -z "$candidates" ]; then debug "No system logs found to search for pattern \'$pattern\'" return fi # Portable way to handle files with spaces in their names. SAVE_IFS=$IFS IFS=" " # Check each log file for matches. logfiles="" for f in $candidates; do local cat=$(find_decompressor "$f") $cat "$f" 2>/dev/null | grep -q -e "$pattern" if [ $? -eq 0 ]; then # Add this file to the list of hits # (using newline as separator to handle spaces in names). if [ -z "$logfiles" ]; then logfiles="$f" else logfiles="$logfiles $f" fi # If we have enough hits, print them and return. found=$(($found+1)) if [ $found -ge $max ]; then debug "Pattern \'$pattern\' found in: [ $logfiles ]" IFS=$SAVE_IFS echo "$logfiles" return fi fi done 2>/dev/null IFS=$SAVE_IFS debug "Pattern \'$pattern\' not found in any system logs" } node_events() { if [ -e $1 ]; then Epatt=`echo "$EVENT_PATTERNS" | while read title p; do [ -n "$p" ] && echo -n "|$p"; done | sed 's/.//' ` grep -E "$Epatt" $1 fi } pickfirst() { for x; do which $x >/dev/null 2>&1 && { echo $x return 0 } done return 1 } shrink() { olddir=$PWD dir=`dirname $1` base=`basename $1` target=$1.tar tar_options="cf" variant=`pickfirst bzip2 gzip xz false` case $variant in bz*) tar_options="jcf" target="$target.bz2" ;; gz*) tar_options="zcf" target="$target.gz" ;; xz*) tar_options="Jcf" target="$target.xz" ;; *) warning "Could not find a compression program, the resulting tarball may be huge" ;; esac if [ -e $target ]; then fatal "Destination $target already exists, specify an alternate name with --dest" fi cd $dir >/dev/null 2>&1 tar $tar_options $target $base >/dev/null 2>&1 cd $olddir >/dev/null 2>&1 echo $target } findln_by_time() { local logf=$1 local tm=$2 local first=1 # Some logs can be massive (over 1,500,000,000 lines have been seen in the wild) # Even just 'wc -l' on these files can take 10+ minutes local fileSize=`ls -lh | awk '{ print $5 }' | grep -ie G` if [ x$fileSize != x ]; then warning "$logf is ${fileSize} in size and could take many hours to process. Skipping." return fi local last=`wc -l < $logf` while [ $first -le $last ]; do mid=$((($last+$first)/2)) trycnt=10 while [ $trycnt -gt 0 ]; do tmid=`linetime $logf $mid` [ "$tmid" ] && break warning "cannot extract time: $logf:$mid; will try the next one" trycnt=$(($trycnt-1)) # shift the whole first-last segment first=$(($first-1)) last=$(($last-1)) mid=$((($last+$first)/2)) done if [ -z "$tmid" ]; then warning "giving up on log..." return fi if [ $tmid -gt $tm ]; then last=$(($mid-1)) elif [ $tmid -lt $tm ]; then first=$(($mid+1)) else break fi done echo $mid } dumplog() { local logf=$1 local from_line=$2 local to_line=$3 [ "$from_line" ] || return tail -n +$from_line $logf | if [ "$to_line" ]; then head -$(($to_line-$from_line+1)) else cat fi } # # find log/set of logs which are interesting for us # # # find log slices # find_decompressor() { case $1 in *bz2) echo "bzip2 -dc" ;; *gz) echo "gzip -dc" ;; *xz) echo "xz -dc" ;; *) echo "cat" ;; esac } # # check if the log contains a piece of our segment # is_our_log() { local logf=$1 local from_time=$2 local to_time=$3 local cat=`find_decompressor $logf` local format=`$cat $logf | get_time_format` local first_time=`$cat $logf | head -10 | get_first_time $format` local last_time=`$cat $logf | tail -10 | get_last_time $format` if [ x = "x$first_time" -o x = "x$last_time" ]; then warning "Skipping bad logfile '$1': Could not determine log dates" return 0 # skip (empty log?) fi if [ $from_time -gt $last_time ]; then # we shouldn't get here anyway if the logs are in order return 2 # we're past good logs; exit fi if [ $from_time -ge $first_time ]; then return 3 # this is the last good log fi # have to go further back if [ x = "x$to_time" -o $to_time -ge $first_time ]; then return 1 # include this log else return 0 # don't include this log fi } # # go through archived logs (timewise backwards) and see if there # are lines belonging to us # (we rely on untouched log files, i.e. that modify time # hasn't been changed) # arch_logs() { local logf=$1 local from_time=$2 local to_time=$3 # look for files such as: ha-log-20090308 or # ha-log-20090308.gz (.bz2) or ha-log.0, etc ls -t $logf $logf*[0-9z] 2>/dev/null | while read next_log; do is_our_log $next_log $from_time $to_time case $? in 0) ;; # noop, continue 1) echo $next_log # include log and continue debug "Found log $next_log" ;; 2) break;; # don't go through older logs! 3) echo $next_log # include log and continue debug "Found log $next_log" break ;; # don't go through older logs! esac done } # # print part of the log # drop_tmp_file() { [ -z "$tmp" ] || rm -f "$tmp" } print_logseg() { local logf=$1 local from_time=$2 local to_time=$3 # uncompress to a temp file (if necessary) local cat=`find_decompressor $logf` if [ "$cat" != "cat" ]; then tmp=`mktemp` $cat $logf > $tmp trap drop_tmp_file 0 sourcef=$tmp else sourcef=$logf tmp="" fi if [ "$from_time" = 0 ]; then FROM_LINE=1 else FROM_LINE=`findln_by_time $sourcef $from_time` fi if [ -z "$FROM_LINE" ]; then warning "couldn't find line for time $from_time; corrupt log file?" return fi TO_LINE="" if [ "$to_time" != 0 ]; then TO_LINE=`findln_by_time $sourcef $to_time` if [ -z "$TO_LINE" ]; then warning "couldn't find line for time $to_time; corrupt log file?" return fi if [ $FROM_LINE -lt $TO_LINE ]; then dumplog $sourcef $FROM_LINE $TO_LINE log "Including segment [$FROM_LINE-$TO_LINE] from $logf" else debug "Empty segment [$FROM_LINE-$TO_LINE] from $logf" fi else dumplog $sourcef $FROM_LINE $TO_LINE log "Including all logs after line $FROM_LINE from $logf" fi drop_tmp_file trap "" 0 } # # find log/set of logs which are interesting for us # dumplogset() { local logf=$1 local from_time=$2 local to_time=$3 local logf_set=`arch_logs $logf $from_time $to_time` if [ x = "x$logf_set" ]; then return fi local num_logs=`echo "$logf_set" | wc -l` local oldest=`echo $logf_set | awk '{print $NF}'` local newest=`echo $logf_set | awk '{print $1}'` local mid_logfiles=`echo $logf_set | awk '{for(i=NF-1; i>1; i--) print $i}'` # the first logfile: from $from_time to $to_time (or end) # logfiles in the middle: all # the last logfile: from beginning to $to_time (or end) case $num_logs in 1) print_logseg $newest $from_time $to_time;; *) print_logseg $oldest $from_time 0 for f in $mid_logfiles; do `find_decompressor $f` $f debug "including complete $f logfile" done print_logseg $newest 0 $to_time ;; esac } # cut out a stanza getstanza() { awk -v name="$1" ' !in_stanza && NF==2 && /^[a-z][a-z]*[[:space:]]*{/ { # stanza start if ($1 == name) in_stanza = 1 } in_stanza { print } in_stanza && NF==1 && $1 == "}" { exit } ' } # supply stanza in $1 and variable name in $2 # (stanza is optional) getcfvar() { cf_type=$1; shift; cf_var=$1; shift; cf_file=$* [ -f "$cf_file" ] || return case $cf_type in cman) grep $cf_var $cf_file | sed s/.*$cf_var=\"// | sed s/\".*// ;; corosync|openais) sed 's/#.*//' < $cf_file | if [ $# -eq 2 ]; then getstanza "$cf_var" shift 1 else cat fi | awk -v varname="$cf_var" ' NF==2 && match($1,varname":$")==1 { print $2; exit; } ' ;; heartbeat) sed 's/#.*//' < $cf_file | grep -w "^$cf_var" | sed 's/^[^[:space:]]*[[:space:]]*//' ;; logd) sed 's/#.*//' < $cf_file | grep -w "^$cf_var" | sed 's/^[^[:space:]]*[[:space:]]*//' ;; esac } pickfirst() { for x; do which $x >/dev/null 2>&1 && { echo $x return 0 } done return 1 } # # figure out the cluster type, depending on the process list # and existence of configuration files # get_cluster_type() { if is_running corosync; then tool=`pickfirst corosync-objctl corosync-cmapctl` case $tool in *objctl) quorum=`$tool -a | grep quorum.provider | sed 's/.*=\s*//'`;; *cmapctl) quorum=`$tool | grep quorum.provider | sed 's/.*=\s*//'`;; esac if [ x"$quorum" = x"quorum_cman" ]; then stack="cman" else stack="corosync" fi elif is_running aisexec; then stack="openais" elif ps -ef | grep -v -e grep -e "eartbeat/[clasp]" | egrep -qs '[h]eartbeat' then stack="heartbeat" # Now we're guessing... elif [ -f /etc/cluster/cluster.conf ]; then stack="cman" # TODO: Technically these could be anywhere :-/ elif [ -f /etc/corosync/corosync.conf ]; then stack="corosync" elif [ -f /etc/ais/openais.conf ]; then stack="openais" elif [ -f /etc/ha.d/ha.cf ]; then stack="heartbeat" else # We still don't know. This might be a Pacemaker Remote node, # or the configuration might be in a nonstandard location. stack="any" fi debug "Detected the '$stack' cluster stack" echo $stack } find_cluster_cf() { case $1 in cman) echo "/etc/cluster/cluster.conf";; corosync) best_size=0 best_file="" # TODO: Technically these could be anywhere :-/ for cf in /etc/ais/openais.conf /etc/corosync/corosync.conf; do if [ -f $cf ]; then size=`wc -l $cf | awk '{print $1}'` if [ $size -gt $best_size ]; then best_size=$size best_file=$cf fi fi done if [ -z "$best_file" ]; then debug "Looking for corosync configuration file. This may take a while..." for f in `find / -maxdepth $maxdepth -type f -name corosync.conf`; do best_file=$f break done fi debug "Located corosync config file: $best_file" echo "$best_file" ;; openais) # TODO: Technically it could be anywhere :-/ cf="/etc/ais/openais.conf" if [ -f $cf ]; then echo "$cf" fi ;; heartbeat) cf="/etc/ha.d/ha.cf" if [ -f $cf ]; then echo "$cf" fi ;; any) # Cluster type is undetermined. Don't complain, because this # might be a Pacemaker Remote node. ;; *) warning "Unknown cluster type: $1" ;; esac } # # check for the major prereq for a) parameter parsing and b) # parsing logs # t=`get_time "12:00"` if [ "$t" = "" ]; then fatal "please install the perl Date::Parse module (perl-DateTime-Format-DateParse on Fedora/Red Hat)" fi # vim: set expandtab tabstop=8 softtabstop=4 shiftwidth=4 textwidth=80: