diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c
index 9fd1ba9c7e..976fed1392 100644
--- a/daemons/controld/controld_execd.c
+++ b/daemons/controld/controld_execd.c
@@ -1,2738 +1,2733 @@
/*
* Copyright 2004-2019 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#define START_DELAY_THRESHOLD 5 * 60 * 1000
#define MAX_LRM_REG_FAILS 30
#define s_if_plural(i) (((i) == 1)? "" : "s")
struct delete_event_s {
int rc;
const char *rsc;
lrm_state_t *lrm_state;
};
static gboolean is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id);
static gboolean build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list);
static gboolean stop_recurring_actions(gpointer key, gpointer value, gpointer user_data);
static int delete_rsc_status(lrm_state_t * lrm_state, const char *rsc_id, int call_options,
const char *user_name);
static lrmd_event_data_t *construct_op(lrm_state_t * lrm_state, xmlNode * rsc_op,
const char *rsc_id, const char *operation);
static void do_lrm_rsc_op(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *operation,
xmlNode * msg, xmlNode * request);
void send_direct_ack(const char *to_host, const char *to_sys,
lrmd_rsc_info_t * rsc, lrmd_event_data_t * op, const char *rsc_id);
static gboolean lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state,
int log_level);
static int do_update_resource(const char *node_name, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op);
static void
lrm_connection_destroy(void)
{
if (is_set(fsa_input_register, R_LRM_CONNECTED)) {
crm_crit("Connection to executor failed");
register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL);
clear_bit(fsa_input_register, R_LRM_CONNECTED);
} else {
crm_info("Disconnected from executor");
}
}
static char *
make_stop_id(const char *rsc, int call_id)
{
return crm_strdup_printf("%s:%d", rsc, call_id);
}
static void
copy_instance_keys(gpointer key, gpointer value, gpointer user_data)
{
if (strstr(key, CRM_META "_") == NULL) {
g_hash_table_replace(user_data, strdup((const char *)key), strdup((const char *)value));
}
}
static void
copy_meta_keys(gpointer key, gpointer value, gpointer user_data)
{
if (strstr(key, CRM_META "_") != NULL) {
g_hash_table_replace(user_data, strdup((const char *)key), strdup((const char *)value));
}
}
/*!
* \internal
* \brief Remove a recurring operation from a resource's history
*
* \param[in,out] history Resource history to modify
* \param[in] op Operation to remove
*
* \return TRUE if the operation was found and removed, FALSE otherwise
*/
static gboolean
history_remove_recurring_op(rsc_history_t *history, const lrmd_event_data_t *op)
{
GList *iter;
for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) {
lrmd_event_data_t *existing = iter->data;
if ((op->interval_ms == existing->interval_ms)
&& crm_str_eq(op->rsc_id, existing->rsc_id, TRUE)
&& safe_str_eq(op->op_type, existing->op_type)) {
history->recurring_op_list = g_list_delete_link(history->recurring_op_list, iter);
lrmd_free_event(existing);
return TRUE;
}
}
return FALSE;
}
/*!
* \internal
* \brief Free all recurring operations in resource history
*
* \param[in,out] history Resource history to modify
*/
static void
history_free_recurring_ops(rsc_history_t *history)
{
GList *iter;
for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) {
lrmd_free_event(iter->data);
}
g_list_free(history->recurring_op_list);
history->recurring_op_list = NULL;
}
/*!
* \internal
* \brief Free resource history
*
* \param[in,out] history Resource history to free
*/
void
history_free(gpointer data)
{
rsc_history_t *history = (rsc_history_t*)data;
if (history->stop_params) {
g_hash_table_destroy(history->stop_params);
}
/* Don't need to free history->rsc.id because it's set to history->id */
free(history->rsc.type);
free(history->rsc.standard);
free(history->rsc.provider);
lrmd_free_event(history->failed);
lrmd_free_event(history->last);
free(history->id);
history_free_recurring_ops(history);
free(history);
}
static void
update_history_cache(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op)
{
int target_rc = 0;
rsc_history_t *entry = NULL;
if (op->rsc_deleted) {
crm_debug("Purged history for '%s' after %s", op->rsc_id, op->op_type);
delete_rsc_status(lrm_state, op->rsc_id, cib_quorum_override, NULL);
return;
}
if (safe_str_eq(op->op_type, RSC_NOTIFY)) {
return;
}
crm_debug("Updating history for '%s' with %s op", op->rsc_id, op->op_type);
entry = g_hash_table_lookup(lrm_state->resource_history, op->rsc_id);
if (entry == NULL && rsc) {
entry = calloc(1, sizeof(rsc_history_t));
entry->id = strdup(op->rsc_id);
g_hash_table_insert(lrm_state->resource_history, entry->id, entry);
entry->rsc.id = entry->id;
entry->rsc.type = strdup(rsc->type);
entry->rsc.standard = strdup(rsc->standard);
if (rsc->provider) {
entry->rsc.provider = strdup(rsc->provider);
} else {
entry->rsc.provider = NULL;
}
} else if (entry == NULL) {
crm_info("Resource %s no longer exists, not updating cache", op->rsc_id);
return;
}
entry->last_callid = op->call_id;
target_rc = rsc_op_expected_rc(op);
if (op->op_status == PCMK_LRM_OP_CANCELLED) {
if (op->interval_ms > 0) {
crm_trace("Removing cancelled recurring op: " CRM_OP_FMT,
op->rsc_id, op->op_type, op->interval_ms);
history_remove_recurring_op(entry, op);
return;
} else {
crm_trace("Skipping " CRM_OP_FMT " rc=%d, status=%d",
op->rsc_id, op->op_type, op->interval_ms, op->rc,
op->op_status);
}
} else if (did_rsc_op_fail(op, target_rc)) {
/* Store failed monitors here, otherwise the block below will cause them
* to be forgotten when a stop happens.
*/
if (entry->failed) {
lrmd_free_event(entry->failed);
}
entry->failed = lrmd_copy_event(op);
} else if (op->interval_ms == 0) {
if (entry->last) {
lrmd_free_event(entry->last);
}
entry->last = lrmd_copy_event(op);
if (op->params &&
(safe_str_eq(CRMD_ACTION_START, op->op_type) ||
safe_str_eq("reload", op->op_type) ||
safe_str_eq(CRMD_ACTION_STATUS, op->op_type))) {
if (entry->stop_params) {
g_hash_table_destroy(entry->stop_params);
}
entry->stop_params = crm_str_table_new();
g_hash_table_foreach(op->params, copy_instance_keys, entry->stop_params);
}
}
if (op->interval_ms > 0) {
/* Ensure there are no duplicates */
history_remove_recurring_op(entry, op);
crm_trace("Adding recurring op: " CRM_OP_FMT,
op->rsc_id, op->op_type, op->interval_ms);
entry->recurring_op_list = g_list_prepend(entry->recurring_op_list, lrmd_copy_event(op));
} else if (entry->recurring_op_list && safe_str_eq(op->op_type, RSC_STATUS) == FALSE) {
crm_trace("Dropping %d recurring ops because of: " CRM_OP_FMT,
g_list_length(entry->recurring_op_list), op->rsc_id,
op->op_type, op->interval_ms);
history_free_recurring_ops(entry);
}
}
/*!
* \internal
* \brief Send a direct OK ack for a resource task
*
* \param[in] lrm_state LRM connection
* \param[in] input Input message being ack'ed
* \param[in] rsc_id ID of affected resource
* \param[in] rsc Affected resource (if available)
* \param[in] task Operation task being ack'ed
* \param[in] ack_host Name of host to send ack to
* \param[in] ack_sys IPC system name to ack
*/
static void
send_task_ok_ack(lrm_state_t *lrm_state, ha_msg_input_t *input,
const char *rsc_id, lrmd_rsc_info_t *rsc, const char *task,
const char *ack_host, const char *ack_sys)
{
lrmd_event_data_t *op = construct_op(lrm_state, input->xml, rsc_id, task);
op->rc = PCMK_OCF_OK;
op->op_status = PCMK_LRM_OP_DONE;
send_direct_ack(ack_host, ack_sys, rsc, op, rsc_id);
lrmd_free_event(op);
}
static inline const char *
op_node_name(lrmd_event_data_t *op)
{
return op->remote_nodename? op->remote_nodename : fsa_our_uname;
}
void
lrm_op_callback(lrmd_event_data_t * op)
{
CRM_CHECK(op != NULL, return);
switch (op->type) {
case lrmd_event_disconnect:
if (op->remote_nodename == NULL) {
/* If this is the local executor IPC connection, set the right
* bits in the controller when the connection goes down.
*/
lrm_connection_destroy();
}
break;
case lrmd_event_exec_complete:
{
lrm_state_t *lrm_state = lrm_state_find(op_node_name(op));
CRM_ASSERT(lrm_state != NULL);
process_lrm_event(lrm_state, op, NULL, NULL);
}
break;
default:
break;
}
}
/* A_LRM_CONNECT */
void
do_lrm_control(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
/* This only pertains to local executor connections. Remote connections are
* handled as resources within the scheduler. Connecting and disconnecting
* from remote executor instances is handled differently.
*/
lrm_state_t *lrm_state = NULL;
if(fsa_our_uname == NULL) {
return; /* Nothing to do */
}
lrm_state = lrm_state_find_or_create(fsa_our_uname);
if (lrm_state == NULL) {
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
return;
}
if (action & A_LRM_DISCONNECT) {
if (lrm_state_verify_stopped(lrm_state, cur_state, LOG_INFO) == FALSE) {
if (action == A_LRM_DISCONNECT) {
crmd_fsa_stall(FALSE);
return;
}
}
clear_bit(fsa_input_register, R_LRM_CONNECTED);
crm_info("Disconnecting from the executor");
lrm_state_disconnect(lrm_state);
lrm_state_reset_tables(lrm_state, FALSE);
crm_notice("Disconnected from the executor");
}
if (action & A_LRM_CONNECT) {
int ret = pcmk_ok;
crm_debug("Connecting to the executor");
ret = lrm_state_ipc_connect(lrm_state);
if (ret != pcmk_ok) {
if (lrm_state->num_lrm_register_fails < MAX_LRM_REG_FAILS) {
crm_warn("Failed to connect to the executor %d time%s (%d max)",
lrm_state->num_lrm_register_fails,
s_if_plural(lrm_state->num_lrm_register_fails),
MAX_LRM_REG_FAILS);
crm_timer_start(wait_timer);
crmd_fsa_stall(FALSE);
return;
}
}
if (ret != pcmk_ok) {
crm_err("Failed to connect to the executor the max allowed %d time%s",
lrm_state->num_lrm_register_fails,
s_if_plural(lrm_state->num_lrm_register_fails));
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
return;
}
set_bit(fsa_input_register, R_LRM_CONNECTED);
crm_info("Connection to the executor established");
}
if (action & ~(A_LRM_CONNECT | A_LRM_DISCONNECT)) {
crm_err("Unexpected action %s in %s", fsa_action2string(action), __FUNCTION__);
}
}
static gboolean
lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state, int log_level)
{
int counter = 0;
gboolean rc = TRUE;
const char *when = "lrm disconnect";
GHashTableIter gIter;
const char *key = NULL;
rsc_history_t *entry = NULL;
struct recurring_op_s *pending = NULL;
crm_debug("Checking for active resources before exit");
if (cur_state == S_TERMINATE) {
log_level = LOG_ERR;
when = "shutdown";
} else if (is_set(fsa_input_register, R_SHUTDOWN)) {
when = "shutdown... waiting";
}
if (lrm_state->pending_ops && lrm_state_is_connected(lrm_state) == TRUE) {
guint removed = g_hash_table_foreach_remove(
lrm_state->pending_ops, stop_recurring_actions, lrm_state);
guint nremaining = g_hash_table_size(lrm_state->pending_ops);
if (removed || nremaining) {
crm_notice("Stopped %u recurring operation%s at %s (%u remaining)",
removed, s_if_plural(removed), when, nremaining);
}
}
if (lrm_state->pending_ops) {
g_hash_table_iter_init(&gIter, lrm_state->pending_ops);
while (g_hash_table_iter_next(&gIter, NULL, (void **)&pending)) {
/* Ignore recurring actions in the shutdown calculations */
if (pending->interval_ms == 0) {
counter++;
}
}
}
if (counter > 0) {
do_crm_log(log_level, "%d pending executor operation%s at %s",
counter, s_if_plural(counter), when);
if (cur_state == S_TERMINATE || !is_set(fsa_input_register, R_SENT_RSC_STOP)) {
g_hash_table_iter_init(&gIter, lrm_state->pending_ops);
while (g_hash_table_iter_next(&gIter, (gpointer*)&key, (gpointer*)&pending)) {
do_crm_log(log_level, "Pending action: %s (%s)", key, pending->op_key);
}
} else {
rc = FALSE;
}
return rc;
}
if (lrm_state->resource_history == NULL) {
return rc;
}
if (is_set(fsa_input_register, R_SHUTDOWN)) {
/* At this point we're not waiting, we're just shutting down */
when = "shutdown";
}
counter = 0;
g_hash_table_iter_init(&gIter, lrm_state->resource_history);
while (g_hash_table_iter_next(&gIter, NULL, (gpointer*)&entry)) {
if (is_rsc_active(lrm_state, entry->id) == FALSE) {
continue;
}
counter++;
if (log_level == LOG_ERR) {
crm_info("Found %s active at %s", entry->id, when);
} else {
crm_trace("Found %s active at %s", entry->id, when);
}
if (lrm_state->pending_ops) {
GHashTableIter hIter;
g_hash_table_iter_init(&hIter, lrm_state->pending_ops);
while (g_hash_table_iter_next(&hIter, (gpointer*)&key, (gpointer*)&pending)) {
if (crm_str_eq(entry->id, pending->rsc_id, TRUE)) {
crm_notice("%sction %s (%s) incomplete at %s",
pending->interval_ms == 0 ? "A" : "Recurring a",
key, pending->op_key, when);
}
}
}
}
if (counter) {
crm_err("%d resource%s active at %s",
counter, (counter == 1)? " was" : "s were", when);
}
return rc;
}
static char *
build_parameter_list(const lrmd_event_data_t *op,
const struct ra_metadata_s *metadata,
xmlNode *result, enum ra_param_flags_e param_type,
bool invert_for_xml)
{
int len = 0;
int max = 0;
char *list = NULL;
GList *iter = NULL;
/* Newer resource agents support the "private" parameter attribute to
* indicate sensitive parameters. For backward compatibility with older
* agents, this list is used if the agent doesn't specify any as "private".
*/
const char *secure_terms[] = {
"password",
"passwd",
"user",
};
if (is_not_set(metadata->ra_flags, ra_uses_private)
&& (param_type == ra_param_private)) {
max = DIMOF(secure_terms);
}
for (iter = metadata->ra_params; iter != NULL; iter = iter->next) {
struct ra_param_s *param = (struct ra_param_s *) iter->data;
bool accept = FALSE;
if (is_set(param->rap_flags, param_type)) {
accept = TRUE;
} else if (max) {
for (int lpc = 0; lpc < max; lpc++) {
if (safe_str_eq(secure_terms[lpc], param->rap_name)) {
accept = TRUE;
break;
}
}
}
if (accept) {
int start = len;
crm_trace("Attr %s is %s", param->rap_name, ra_param_flag2text(param_type));
len += strlen(param->rap_name) + 2; // include spaces around
list = realloc_safe(list, len + 1); // include null terminator
// spaces before and after make parsing simpler
sprintf(list + start, " %s ", param->rap_name);
} else {
crm_trace("Rejecting %s for %s", param->rap_name, ra_param_flag2text(param_type));
}
if (result && (invert_for_xml? !accept : accept)) {
const char *v = g_hash_table_lookup(op->params, param->rap_name);
if (v != NULL) {
crm_trace("Adding attr %s=%s to the xml result", param->rap_name, v);
crm_xml_add(result, param->rap_name, v);
}
}
}
return list;
}
static void
append_restart_list(lrmd_event_data_t *op, struct ra_metadata_s *metadata,
xmlNode *update, const char *version)
{
char *list = NULL;
char *digest = NULL;
xmlNode *restart = NULL;
CRM_LOG_ASSERT(op->params != NULL);
if (op->interval_ms > 0) {
/* monitors are not reloadable */
return;
}
if (is_set(metadata->ra_flags, ra_supports_reload)) {
restart = create_xml_node(NULL, XML_TAG_PARAMS);
/* Add any parameters with unique="1" to the "op-force-restart" list.
*
* (Currently, we abuse "unique=0" to indicate reloadability. This is
* nonstandard and should eventually be replaced once the OCF standard
* is updated with something better.)
*/
list = build_parameter_list(op, metadata, restart, ra_param_unique,
FALSE);
} else {
/* Resource does not support reloads */
return;
}
digest = calculate_operation_digest(restart, version);
/* Add "op-force-restart" and "op-restart-digest" to indicate the resource supports reload,
* no matter if it actually supports any parameters with unique="1"). */
crm_xml_add(update, XML_LRM_ATTR_OP_RESTART, list? list: "");
crm_xml_add(update, XML_LRM_ATTR_RESTART_DIGEST, digest);
crm_trace("%s: %s, %s", op->rsc_id, digest, list);
crm_log_xml_trace(restart, "restart digest source");
free_xml(restart);
free(digest);
free(list);
}
static void
append_secure_list(lrmd_event_data_t *op, struct ra_metadata_s *metadata,
xmlNode *update, const char *version)
{
char *list = NULL;
char *digest = NULL;
xmlNode *secure = NULL;
CRM_LOG_ASSERT(op->params != NULL);
/*
* To keep XML_LRM_ATTR_OP_SECURE short, we want it to contain the
* secure parameters but XML_LRM_ATTR_SECURE_DIGEST to be based on
* the insecure ones
*/
secure = create_xml_node(NULL, XML_TAG_PARAMS);
list = build_parameter_list(op, metadata, secure, ra_param_private, TRUE);
if (list != NULL) {
digest = calculate_operation_digest(secure, version);
crm_xml_add(update, XML_LRM_ATTR_OP_SECURE, list);
crm_xml_add(update, XML_LRM_ATTR_SECURE_DIGEST, digest);
crm_trace("%s: %s, %s", op->rsc_id, digest, list);
crm_log_xml_trace(secure, "secure digest source");
} else {
crm_trace("%s: no secure parameters", op->rsc_id);
}
free_xml(secure);
free(digest);
free(list);
}
static gboolean
build_operation_update(xmlNode * parent, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op,
const char *node_name, const char *src)
{
int target_rc = 0;
xmlNode *xml_op = NULL;
struct ra_metadata_s *metadata = NULL;
const char *caller_version = NULL;
lrm_state_t *lrm_state = NULL;
if (op == NULL) {
return FALSE;
}
target_rc = rsc_op_expected_rc(op);
/* there is a small risk in formerly mixed clusters that it will
* be sub-optimal.
*
* however with our upgrade policy, the update we send should
* still be completely supported anyway
*/
caller_version = g_hash_table_lookup(op->params, XML_ATTR_CRM_VERSION);
CRM_LOG_ASSERT(caller_version != NULL);
if(caller_version == NULL) {
caller_version = CRM_FEATURE_SET;
}
crm_trace("Building %s operation update with originator version: %s", op->rsc_id, caller_version);
xml_op = create_operation_update(parent, op, caller_version, target_rc, fsa_our_uname, src, LOG_DEBUG);
if (xml_op == NULL) {
return TRUE;
}
if ((rsc == NULL) || (op->params == NULL)
|| !crm_op_needs_metadata(rsc->standard, op->op_type)) {
crm_trace("No digests needed for %s action on %s (params=%p rsc=%p)",
op->op_type, op->rsc_id, op->params, rsc);
return TRUE;
}
lrm_state = lrm_state_find(node_name);
if (lrm_state == NULL) {
crm_warn("Cannot calculate digests for operation " CRM_OP_FMT
" because we have no connection to executor for %s",
op->rsc_id, op->op_type, op->interval_ms, node_name);
return TRUE;
}
metadata = metadata_cache_get(lrm_state->metadata_cache, rsc);
if (metadata == NULL) {
/* For now, we always collect resource agent meta-data via a local,
* synchronous, direct execution of the agent. This has multiple issues:
* the executor should execute agents, not the controller; meta-data for
* Pacemaker Remote nodes should be collected on those nodes, not
* locally; and the meta-data call shouldn't eat into the timeout of the
* real action being performed.
*
* These issues are planned to be addressed by having the scheduler
* schedule a meta-data cache check at the beginning of each transition.
* Once that is working, this block will only be a fallback in case the
* initial collection fails.
*/
char *metadata_str = NULL;
int rc = lrm_state_get_metadata(lrm_state, rsc->standard,
rsc->provider, rsc->type,
&metadata_str, 0);
if (rc != pcmk_ok) {
crm_warn("Failed to get metadata for %s (%s:%s:%s)",
rsc->id, rsc->standard, rsc->provider, rsc->type);
return TRUE;
}
metadata = metadata_cache_update(lrm_state->metadata_cache, rsc,
metadata_str);
free(metadata_str);
if (metadata == NULL) {
crm_warn("Failed to update metadata for %s (%s:%s:%s)",
rsc->id, rsc->standard, rsc->provider, rsc->type);
return TRUE;
}
}
#if ENABLE_VERSIONED_ATTRS
crm_xml_add(xml_op, XML_ATTR_RA_VERSION, metadata->ra_version);
#endif
crm_trace("Including additional digests for %s::%s:%s", rsc->standard, rsc->provider, rsc->type);
append_restart_list(op, metadata, xml_op, caller_version);
append_secure_list(op, metadata, xml_op, caller_version);
return TRUE;
}
static gboolean
is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id)
{
rsc_history_t *entry = NULL;
entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id);
if (entry == NULL || entry->last == NULL) {
return FALSE;
}
crm_trace("Processing %s: %s.%d=%d", rsc_id, entry->last->op_type,
entry->last->interval_ms, entry->last->rc);
if (entry->last->rc == PCMK_OCF_OK && safe_str_eq(entry->last->op_type, CRMD_ACTION_STOP)) {
return FALSE;
} else if (entry->last->rc == PCMK_OCF_OK
&& safe_str_eq(entry->last->op_type, CRMD_ACTION_MIGRATE)) {
/* a stricter check is too complex...
* leave that to the PE
*/
return FALSE;
} else if (entry->last->rc == PCMK_OCF_NOT_RUNNING) {
return FALSE;
} else if ((entry->last->interval_ms == 0)
&& (entry->last->rc == PCMK_OCF_NOT_CONFIGURED)) {
/* Badly configured resources can't be reliably stopped */
return FALSE;
}
return TRUE;
}
static gboolean
build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list)
{
GHashTableIter iter;
rsc_history_t *entry = NULL;
g_hash_table_iter_init(&iter, lrm_state->resource_history);
while (g_hash_table_iter_next(&iter, NULL, (void **)&entry)) {
GList *gIter = NULL;
xmlNode *xml_rsc = create_xml_node(rsc_list, XML_LRM_TAG_RESOURCE);
crm_xml_add(xml_rsc, XML_ATTR_ID, entry->id);
crm_xml_add(xml_rsc, XML_ATTR_TYPE, entry->rsc.type);
crm_xml_add(xml_rsc, XML_AGENT_ATTR_CLASS, entry->rsc.standard);
crm_xml_add(xml_rsc, XML_AGENT_ATTR_PROVIDER, entry->rsc.provider);
if (entry->last && entry->last->params) {
const char *container = g_hash_table_lookup(entry->last->params, CRM_META"_"XML_RSC_ATTR_CONTAINER);
if (container) {
crm_trace("Resource %s is a part of container resource %s", entry->id, container);
crm_xml_add(xml_rsc, XML_RSC_ATTR_CONTAINER, container);
}
}
build_operation_update(xml_rsc, &(entry->rsc), entry->failed, lrm_state->node_name, __FUNCTION__);
build_operation_update(xml_rsc, &(entry->rsc), entry->last, lrm_state->node_name, __FUNCTION__);
for (gIter = entry->recurring_op_list; gIter != NULL; gIter = gIter->next) {
build_operation_update(xml_rsc, &(entry->rsc), gIter->data, lrm_state->node_name, __FUNCTION__);
}
}
return FALSE;
}
static xmlNode *
do_lrm_query_internal(lrm_state_t *lrm_state, int update_flags)
{
xmlNode *xml_state = NULL;
xmlNode *xml_data = NULL;
xmlNode *rsc_list = NULL;
crm_node_t *peer = NULL;
peer = crm_get_peer_full(0, lrm_state->node_name, CRM_GET_PEER_ANY);
CRM_CHECK(peer != NULL, return NULL);
xml_state = create_node_state_update(peer, update_flags, NULL,
__FUNCTION__);
if (xml_state == NULL) {
return NULL;
}
xml_data = create_xml_node(xml_state, XML_CIB_TAG_LRM);
crm_xml_add(xml_data, XML_ATTR_ID, peer->uuid);
rsc_list = create_xml_node(xml_data, XML_LRM_TAG_RESOURCES);
/* Build a list of active (not always running) resources */
build_active_RAs(lrm_state, rsc_list);
crm_log_xml_trace(xml_state, "Current executor state");
return xml_state;
}
xmlNode *
do_lrm_query(gboolean is_replace, const char *node_name)
{
lrm_state_t *lrm_state = lrm_state_find(node_name);
if (!lrm_state) {
crm_err("Could not find executor state for node %s", node_name);
return NULL;
}
return do_lrm_query_internal(lrm_state,
node_update_cluster|node_update_peer);
}
static void
notify_deleted(lrm_state_t * lrm_state, ha_msg_input_t * input, const char *rsc_id, int rc)
{
lrmd_event_data_t *op = NULL;
const char *from_sys = crm_element_value(input->msg, F_CRM_SYS_FROM);
const char *from_host = crm_element_value(input->msg, F_CRM_HOST_FROM);
crm_info("Notifying %s on %s that %s was%s deleted",
from_sys, (from_host? from_host : "localhost"), rsc_id,
((rc == pcmk_ok)? "" : " not"));
op = construct_op(lrm_state, input->xml, rsc_id, CRMD_ACTION_DELETE);
if (rc == pcmk_ok) {
op->op_status = PCMK_LRM_OP_DONE;
op->rc = PCMK_OCF_OK;
} else {
op->op_status = PCMK_LRM_OP_ERROR;
op->rc = PCMK_OCF_UNKNOWN_ERROR;
}
send_direct_ack(from_host, from_sys, NULL, op, rsc_id);
lrmd_free_event(op);
if (safe_str_neq(from_sys, CRM_SYSTEM_TENGINE)) {
/* this isn't expected - trigger a new transition */
time_t now = time(NULL);
char *now_s = crm_itoa(now);
crm_debug("Triggering a refresh after %s deleted %s from the executor",
from_sys, rsc_id);
update_attr_delegate(fsa_cib_conn, cib_none, XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL,
"last-lrm-refresh", now_s, FALSE, NULL, NULL);
free(now_s);
}
}
static gboolean
lrm_remove_deleted_rsc(gpointer key, gpointer value, gpointer user_data)
{
struct delete_event_s *event = user_data;
struct pending_deletion_op_s *op = value;
if (crm_str_eq(event->rsc, op->rsc, TRUE)) {
notify_deleted(event->lrm_state, op->input, event->rsc, event->rc);
return TRUE;
}
return FALSE;
}
static gboolean
lrm_remove_deleted_op(gpointer key, gpointer value, gpointer user_data)
{
const char *rsc = user_data;
struct recurring_op_s *pending = value;
if (crm_str_eq(rsc, pending->rsc_id, TRUE)) {
crm_info("Removing op %s:%d for deleted resource %s",
pending->op_key, pending->call_id, rsc);
return TRUE;
}
return FALSE;
}
/*
* Remove the rsc from the CIB
*
* Avoids refreshing the entire LRM section of this host
*/
#define RSC_TEMPLATE "//"XML_CIB_TAG_STATE"[@uname='%s']//"XML_LRM_TAG_RESOURCE"[@id='%s']"
static int
delete_rsc_status(lrm_state_t * lrm_state, const char *rsc_id, int call_options,
const char *user_name)
{
char *rsc_xpath = NULL;
int rc = pcmk_ok;
CRM_CHECK(rsc_id != NULL, return -ENXIO);
rsc_xpath = crm_strdup_printf(RSC_TEMPLATE, lrm_state->node_name, rsc_id);
rc = cib_internal_op(fsa_cib_conn, CIB_OP_DELETE, NULL, rsc_xpath,
NULL, NULL, call_options | cib_xpath, user_name);
free(rsc_xpath);
return rc;
}
static void
delete_rsc_entry(lrm_state_t * lrm_state, ha_msg_input_t * input, const char *rsc_id,
GHashTableIter * rsc_gIter, int rc, const char *user_name)
{
struct delete_event_s event;
CRM_CHECK(rsc_id != NULL, return);
if (rc == pcmk_ok) {
char *rsc_id_copy = strdup(rsc_id);
if (rsc_gIter)
g_hash_table_iter_remove(rsc_gIter);
else
g_hash_table_remove(lrm_state->resource_history, rsc_id_copy);
crm_debug("sync: Sending delete op for %s", rsc_id_copy);
delete_rsc_status(lrm_state, rsc_id_copy, cib_quorum_override, user_name);
g_hash_table_foreach_remove(lrm_state->pending_ops, lrm_remove_deleted_op, rsc_id_copy);
free(rsc_id_copy);
}
if (input) {
notify_deleted(lrm_state, input, rsc_id, rc);
}
event.rc = rc;
event.rsc = rsc_id;
event.lrm_state = lrm_state;
g_hash_table_foreach_remove(lrm_state->deletion_ops, lrm_remove_deleted_rsc, &event);
}
/*!
* \internal
* \brief Erase an LRM history entry from the CIB, given the operation data
*
* \param[in] lrm_state LRM state of the desired node
* \param[in] op Operation whose history should be deleted
*/
static void
erase_lrm_history_by_op(lrm_state_t *lrm_state, lrmd_event_data_t *op)
{
xmlNode *xml_top = NULL;
CRM_CHECK(op != NULL, return);
xml_top = create_xml_node(NULL, XML_LRM_TAG_RSC_OP);
crm_xml_add_int(xml_top, XML_LRM_ATTR_CALLID, op->call_id);
crm_xml_add(xml_top, XML_ATTR_TRANSITION_KEY, op->user_data);
if (op->interval_ms > 0) {
char *op_id = generate_op_key(op->rsc_id, op->op_type, op->interval_ms);
/* Avoid deleting last_failure too (if it was a result of this recurring op failing) */
crm_xml_add(xml_top, XML_ATTR_ID, op_id);
free(op_id);
}
crm_debug("Erasing resource operation history for " CRM_OP_FMT " (call=%d)",
op->rsc_id, op->op_type, op->interval_ms, op->call_id);
fsa_cib_conn->cmds->remove(fsa_cib_conn, XML_CIB_TAG_STATUS, xml_top,
cib_quorum_override);
crm_log_xml_trace(xml_top, "op:cancel");
free_xml(xml_top);
}
/* Define xpath to find LRM resource history entry by node and resource */
#define XPATH_HISTORY \
"/" XML_TAG_CIB "/" XML_CIB_TAG_STATUS \
"/" XML_CIB_TAG_STATE "[@" XML_ATTR_UNAME "='%s']" \
"/" XML_CIB_TAG_LRM "/" XML_LRM_TAG_RESOURCES \
"/" XML_LRM_TAG_RESOURCE "[@" XML_ATTR_ID "='%s']" \
"/" XML_LRM_TAG_RSC_OP
/* ... and also by operation key */
#define XPATH_HISTORY_ID XPATH_HISTORY \
"[@" XML_ATTR_ID "='%s']"
/* ... and also by operation key and operation call ID */
#define XPATH_HISTORY_CALL XPATH_HISTORY \
"[@" XML_ATTR_ID "='%s' and @" XML_LRM_ATTR_CALLID "='%d']"
/* ... and also by operation key and original operation key */
#define XPATH_HISTORY_ORIG XPATH_HISTORY \
"[@" XML_ATTR_ID "='%s' and @" XML_LRM_ATTR_TASK_KEY "='%s']"
/*!
* \internal
* \brief Erase an LRM history entry from the CIB, given operation identifiers
*
* \param[in] lrm_state LRM state of the node to clear history for
* \param[in] rsc_id Name of resource to clear history for
* \param[in] key Operation key of operation to clear history for
* \param[in] orig_op If specified, delete only if it has this original op
* \param[in] call_id If specified, delete entry only if it has this call ID
*/
static void
erase_lrm_history_by_id(lrm_state_t *lrm_state, const char *rsc_id,
const char *key, const char *orig_op, int call_id)
{
char *op_xpath = NULL;
CRM_CHECK((rsc_id != NULL) && (key != NULL), return);
if (call_id > 0) {
op_xpath = crm_strdup_printf(XPATH_HISTORY_CALL,
lrm_state->node_name, rsc_id, key,
call_id);
} else if (orig_op) {
op_xpath = crm_strdup_printf(XPATH_HISTORY_ORIG,
lrm_state->node_name, rsc_id, key,
orig_op);
} else {
op_xpath = crm_strdup_printf(XPATH_HISTORY_ID,
lrm_state->node_name, rsc_id, key);
}
crm_debug("Erasing resource operation history for %s on %s (call=%d)",
key, rsc_id, call_id);
fsa_cib_conn->cmds->remove(fsa_cib_conn, op_xpath, NULL,
cib_quorum_override | cib_xpath);
free(op_xpath);
}
static inline gboolean
last_failed_matches_op(rsc_history_t *entry, const char *op, guint interval_ms)
{
if (entry == NULL) {
return FALSE;
}
if (op == NULL) {
return TRUE;
}
return (safe_str_eq(op, entry->failed->op_type)
&& (interval_ms == entry->failed->interval_ms));
}
/*!
* \internal
* \brief Clear a resource's last failure
*
* Erase a resource's last failure on a particular node from both the
* LRM resource history in the CIB, and the resource history remembered
* for the LRM state.
*
* \param[in] rsc_id Resource name
* \param[in] node_name Node name
* \param[in] operation If specified, only clear if matching this operation
* \param[in] interval_ms If operation is specified, it has this interval
*/
void
lrm_clear_last_failure(const char *rsc_id, const char *node_name,
const char *operation, guint interval_ms)
{
char *op_key = NULL;
char *orig_op_key = NULL;
lrm_state_t *lrm_state = NULL;
lrm_state = lrm_state_find(node_name);
if (lrm_state == NULL) {
return;
}
/* Erase from CIB */
op_key = generate_op_key(rsc_id, "last_failure", 0);
if (operation) {
orig_op_key = generate_op_key(rsc_id, operation, interval_ms);
}
erase_lrm_history_by_id(lrm_state, rsc_id, op_key, orig_op_key, 0);
free(op_key);
free(orig_op_key);
/* Remove from memory */
if (lrm_state->resource_history) {
rsc_history_t *entry = g_hash_table_lookup(lrm_state->resource_history,
rsc_id);
if (last_failed_matches_op(entry, operation, interval_ms)) {
lrmd_free_event(entry->failed);
entry->failed = NULL;
}
}
}
/* Returns: gboolean - cancellation is in progress */
static gboolean
cancel_op(lrm_state_t * lrm_state, const char *rsc_id, const char *key, int op, gboolean remove)
{
int rc = pcmk_ok;
char *local_key = NULL;
struct recurring_op_s *pending = NULL;
CRM_CHECK(op != 0, return FALSE);
CRM_CHECK(rsc_id != NULL, return FALSE);
if (key == NULL) {
local_key = make_stop_id(rsc_id, op);
key = local_key;
}
pending = g_hash_table_lookup(lrm_state->pending_ops, key);
if (pending) {
if (remove && pending->remove == FALSE) {
pending->remove = TRUE;
crm_debug("Scheduling %s for removal", key);
}
if (pending->cancelled) {
crm_debug("Operation %s already cancelled", key);
free(local_key);
return FALSE;
}
pending->cancelled = TRUE;
} else {
crm_info("No pending op found for %s", key);
free(local_key);
return FALSE;
}
crm_debug("Cancelling op %d for %s (%s)", op, rsc_id, key);
rc = lrm_state_cancel(lrm_state, pending->rsc_id, pending->op_type,
pending->interval_ms);
if (rc == pcmk_ok) {
crm_debug("Op %d for %s (%s): cancelled", op, rsc_id, key);
free(local_key);
return TRUE;
}
crm_debug("Op %d for %s (%s): Nothing to cancel", op, rsc_id, key);
/* The caller needs to make sure the entry is
* removed from the pending_ops list
*
* Usually by returning TRUE inside the worker function
* supplied to g_hash_table_foreach_remove()
*
* Not removing the entry from pending_ops will block
* the node from shutting down
*/
free(local_key);
return FALSE;
}
struct cancel_data {
gboolean done;
gboolean remove;
const char *key;
lrmd_rsc_info_t *rsc;
lrm_state_t *lrm_state;
};
static gboolean
cancel_action_by_key(gpointer key, gpointer value, gpointer user_data)
{
gboolean remove = FALSE;
struct cancel_data *data = user_data;
struct recurring_op_s *op = (struct recurring_op_s *)value;
if (crm_str_eq(op->op_key, data->key, TRUE)) {
data->done = TRUE;
remove = !cancel_op(data->lrm_state, data->rsc->id, key, op->call_id, data->remove);
}
return remove;
}
static gboolean
cancel_op_key(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *key, gboolean remove)
{
guint removed = 0;
struct cancel_data data;
CRM_CHECK(rsc != NULL, return FALSE);
CRM_CHECK(key != NULL, return FALSE);
data.key = key;
data.rsc = rsc;
data.done = FALSE;
data.remove = remove;
data.lrm_state = lrm_state;
removed = g_hash_table_foreach_remove(lrm_state->pending_ops, cancel_action_by_key, &data);
crm_trace("Removed %u op cache entries, new size: %u",
removed, g_hash_table_size(lrm_state->pending_ops));
return data.done;
}
/*!
* \internal
* \brief Retrieve resource information from LRM
*
* \param[in] lrm_state LRM connection to use
* \param[in] rsc_xml XML containing resource configuration
* \param[in] do_create If true, register resource with LRM if not already
* \param[out] rsc_info Where to store resource information obtained from LRM
*
* \retval pcmk_ok Success (and rsc_info holds newly allocated result)
* \retval -EINVAL Required information is missing from arguments
* \retval -ENOTCONN No active connection to LRM
* \retval -ENODEV Resource not found
* \retval -errno Error communicating with executor when registering resource
*
* \note Caller is responsible for freeing result on success.
*/
static int
get_lrm_resource(lrm_state_t *lrm_state, xmlNode *rsc_xml, gboolean do_create,
lrmd_rsc_info_t **rsc_info)
{
const char *id = ID(rsc_xml);
CRM_CHECK(lrm_state && rsc_xml && rsc_info, return -EINVAL);
CRM_CHECK(id, return -EINVAL);
if (lrm_state_is_connected(lrm_state) == FALSE) {
return -ENOTCONN;
}
crm_trace("Retrieving resource information for %s from the executor", id);
*rsc_info = lrm_state_get_rsc_info(lrm_state, id, 0);
// If resource isn't known by ID, try clone name, if provided
if (!*rsc_info) {
const char *long_id = crm_element_value(rsc_xml, XML_ATTR_ID_LONG);
if (long_id) {
*rsc_info = lrm_state_get_rsc_info(lrm_state, long_id, 0);
}
}
if ((*rsc_info == NULL) && do_create) {
const char *class = crm_element_value(rsc_xml, XML_AGENT_ATTR_CLASS);
const char *provider = crm_element_value(rsc_xml, XML_AGENT_ATTR_PROVIDER);
const char *type = crm_element_value(rsc_xml, XML_ATTR_TYPE);
int rc;
crm_trace("Registering resource %s with the executor", id);
rc = lrm_state_register_rsc(lrm_state, id, class, provider, type,
lrmd_opt_drop_recurring);
if (rc != pcmk_ok) {
fsa_data_t *msg_data = NULL;
crm_err("Could not register resource %s with the executor on %s: %s "
CRM_XS " rc=%d",
id, lrm_state->node_name, pcmk_strerror(rc), rc);
/* Register this as an internal error if this involves the local
* executor. Otherwise, we're likely dealing with an unresponsive
* remote node, which is not an FSA failure.
*/
if (lrm_state_is_local(lrm_state) == TRUE) {
register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
}
return rc;
}
*rsc_info = lrm_state_get_rsc_info(lrm_state, id, 0);
}
return *rsc_info? pcmk_ok : -ENODEV;
}
static void
delete_resource(lrm_state_t * lrm_state,
const char *id,
lrmd_rsc_info_t * rsc,
GHashTableIter * gIter,
const char *sys,
const char *host,
const char *user,
ha_msg_input_t * request,
gboolean unregister)
{
int rc = pcmk_ok;
crm_info("Removing resource %s for %s (%s) on %s", id, sys, user ? user : "internal", host);
if (rsc && unregister) {
rc = lrm_state_unregister_rsc(lrm_state, id, 0);
}
if (rc == pcmk_ok) {
crm_trace("Resource '%s' deleted", id);
} else if (rc == -EINPROGRESS) {
crm_info("Deletion of resource '%s' pending", id);
if (request) {
struct pending_deletion_op_s *op = NULL;
char *ref = crm_element_value_copy(request->msg, XML_ATTR_REFERENCE);
op = calloc(1, sizeof(struct pending_deletion_op_s));
op->rsc = strdup(rsc->id);
op->input = copy_ha_msg_input(request);
g_hash_table_insert(lrm_state->deletion_ops, ref, op);
}
return;
} else {
crm_warn("Deletion of resource '%s' for %s (%s) on %s failed: %d",
id, sys, user ? user : "internal", host, rc);
}
delete_rsc_entry(lrm_state, request, id, gIter, rc, user);
}
static int
get_fake_call_id(lrm_state_t *lrm_state, const char *rsc_id)
{
int call_id = 999999999;
rsc_history_t *entry = NULL;
if(lrm_state) {
entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id);
}
/* Make sure the call id is greater than the last successful operation,
* otherwise the failure will not result in a possible recovery of the resource
* as it could appear the failure occurred before the successful start */
if (entry) {
call_id = entry->last_callid + 1;
}
if (call_id < 0) {
call_id = 1;
}
return call_id;
}
static void
fake_op_status(lrm_state_t *lrm_state, lrmd_event_data_t *op, int op_status,
enum ocf_exitcode op_exitcode)
{
op->call_id = get_fake_call_id(lrm_state, op->rsc_id);
op->t_run = time(NULL);
op->t_rcchange = op->t_run;
op->op_status = op_status;
op->rc = op_exitcode;
}
static void
force_reprobe(lrm_state_t *lrm_state, const char *from_sys,
const char *from_host, const char *user_name,
gboolean is_remote_node)
{
GHashTableIter gIter;
rsc_history_t *entry = NULL;
crm_info("Clearing resource history on node %s", lrm_state->node_name);
g_hash_table_iter_init(&gIter, lrm_state->resource_history);
while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) {
/* only unregister the resource during a reprobe if it is not a remote connection
* resource. otherwise unregistering the connection will terminate remote-node
* membership */
gboolean unregister = TRUE;
if (is_remote_lrmd_ra(NULL, NULL, entry->id)) {
lrm_state_t *remote_lrm_state = lrm_state_find(entry->id);
if (remote_lrm_state) {
/* when forcing a reprobe, make sure to clear remote node before
* clearing the remote node's connection resource */
force_reprobe(remote_lrm_state, from_sys, from_host, user_name, TRUE);
}
unregister = FALSE;
}
delete_resource(lrm_state, entry->id, &entry->rsc, &gIter, from_sys, from_host,
user_name, NULL, unregister);
}
/* Now delete the copy in the CIB */
erase_status_tag(lrm_state->node_name, XML_CIB_TAG_LRM, cib_scope_local);
/* Finally, _delete_ the value in pacemaker-attrd -- setting it to FALSE
* would result in the scheduler sending us back here again
*/
update_attrd(lrm_state->node_name, CRM_OP_PROBED, NULL, user_name, is_remote_node);
}
static void
synthesize_lrmd_failure(lrm_state_t *lrm_state, xmlNode *action, int rc)
{
lrmd_event_data_t *op = NULL;
const char *operation = crm_element_value(action, XML_LRM_ATTR_TASK);
const char *target_node = crm_element_value(action, XML_LRM_ATTR_TARGET);
xmlNode *xml_rsc = find_xml_node(action, XML_CIB_TAG_RESOURCE, TRUE);
if ((xml_rsc == NULL) || (ID(xml_rsc) == NULL)) {
/* @TODO Should we do something else, like direct ack? */
crm_info("Can't fake %s failure (%d) on %s without resource configuration",
crm_element_value(action, XML_LRM_ATTR_TASK_KEY), rc,
target_node);
return;
} else if(operation == NULL) {
/* This probably came from crm_resource -C, nothing to do */
crm_info("Can't fake %s failure (%d) on %s without operation",
ID(xml_rsc), rc, target_node);
return;
}
op = construct_op(lrm_state, action, ID(xml_rsc), operation);
if (safe_str_eq(operation, RSC_NOTIFY)) { // Notifications can't fail
fake_op_status(lrm_state, op, PCMK_LRM_OP_DONE, PCMK_OCF_OK);
} else {
fake_op_status(lrm_state, op, PCMK_LRM_OP_ERROR, rc);
}
crm_info("Faking " CRM_OP_FMT " result (%d) on %s",
op->rsc_id, op->op_type, op->interval_ms, op->rc, target_node);
// Process the result as if it came from the LRM
process_lrm_event(lrm_state, op, NULL, action);
lrmd_free_event(op);
}
/*!
* \internal
* \brief Get target of an LRM operation
*
* \param[in] xml LRM operation data XML
*
* \return LRM operation target node name (local node or Pacemaker Remote node)
*/
static const char *
lrm_op_target(xmlNode *xml)
{
const char *target = NULL;
if (xml) {
target = crm_element_value(xml, XML_LRM_ATTR_TARGET);
}
if (target == NULL) {
target = fsa_our_uname;
}
return target;
}
static void
fail_lrm_resource(xmlNode *xml, lrm_state_t *lrm_state, const char *user_name,
const char *from_host, const char *from_sys)
{
lrmd_event_data_t *op = NULL;
lrmd_rsc_info_t *rsc = NULL;
xmlNode *xml_rsc = find_xml_node(xml, XML_CIB_TAG_RESOURCE, TRUE);
CRM_CHECK(xml_rsc != NULL, return);
/* The executor simply executes operations and reports the results, without
* any concept of success or failure, so to fail a resource, we must fake
* what a failure looks like.
*
* To do this, we create a fake executor operation event for the resource,
* and pass that event to the executor client callback so it will be
* processed as if it came from the executor.
*/
op = construct_op(lrm_state, xml, ID(xml_rsc), "asyncmon");
fake_op_status(lrm_state, op, PCMK_LRM_OP_DONE, PCMK_OCF_UNKNOWN_ERROR);
free((char*) op->user_data);
op->user_data = NULL;
op->interval_ms = 0;
#if ENABLE_ACL
if (user_name && is_privileged(user_name) == FALSE) {
crm_err("%s does not have permission to fail %s", user_name, ID(xml_rsc));
send_direct_ack(from_host, from_sys, NULL, op, ID(xml_rsc));
lrmd_free_event(op);
return;
}
#endif
if (get_lrm_resource(lrm_state, xml_rsc, TRUE, &rsc) == pcmk_ok) {
crm_info("Failing resource %s...", rsc->id);
op->exit_reason = strdup("Simulated failure");
process_lrm_event(lrm_state, op, NULL, xml);
op->op_status = PCMK_LRM_OP_DONE;
op->rc = PCMK_OCF_OK;
lrmd_free_rsc_info(rsc);
} else {
crm_info("Cannot find/create resource in order to fail it...");
crm_log_xml_warn(xml, "bad input");
}
send_direct_ack(from_host, from_sys, NULL, op, ID(xml_rsc));
lrmd_free_event(op);
}
static void
handle_refresh_op(lrm_state_t *lrm_state, const char *user_name,
const char *from_host, const char *from_sys)
{
int rc = pcmk_ok;
xmlNode *fragment = do_lrm_query_internal(lrm_state, node_update_all);
fsa_cib_update(XML_CIB_TAG_STATUS, fragment, cib_quorum_override, rc, user_name);
crm_info("Forced a local resource history refresh: call=%d", rc);
if (safe_str_neq(CRM_SYSTEM_CRMD, from_sys)) {
xmlNode *reply = create_request(CRM_OP_INVOKE_LRM, fragment, from_host,
from_sys, CRM_SYSTEM_LRMD,
fsa_our_uuid);
crm_debug("ACK'ing refresh from %s (%s)", from_sys, from_host);
if (relay_message(reply, TRUE) == FALSE) {
crm_log_xml_err(reply, "Unable to route reply");
}
free_xml(reply);
}
free_xml(fragment);
}
static void
handle_query_op(xmlNode *msg, lrm_state_t *lrm_state)
{
xmlNode *data = do_lrm_query_internal(lrm_state, node_update_all);
xmlNode *reply = create_reply(msg, data);
if (relay_message(reply, TRUE) == FALSE) {
crm_err("Unable to route reply");
crm_log_xml_err(reply, "reply");
}
free_xml(reply);
free_xml(data);
}
static void
handle_reprobe_op(lrm_state_t *lrm_state, const char *from_sys,
const char *from_host, const char *user_name,
gboolean is_remote_node)
{
crm_notice("Forcing the status of all resources to be redetected");
force_reprobe(lrm_state, from_sys, from_host, user_name, is_remote_node);
if (safe_str_neq(CRM_SYSTEM_PENGINE, from_sys)
&& safe_str_neq(CRM_SYSTEM_TENGINE, from_sys)) {
xmlNode *reply = create_request(CRM_OP_INVOKE_LRM, NULL, from_host,
from_sys, CRM_SYSTEM_LRMD,
fsa_our_uuid);
crm_debug("ACK'ing re-probe from %s (%s)", from_sys, from_host);
if (relay_message(reply, TRUE) == FALSE) {
crm_log_xml_err(reply, "Unable to route reply");
}
free_xml(reply);
}
}
static bool do_lrm_cancel(ha_msg_input_t *input, lrm_state_t *lrm_state,
lrmd_rsc_info_t *rsc, const char *from_host, const char *from_sys)
{
char *op_key = NULL;
char *meta_key = NULL;
int call = 0;
const char *call_id = NULL;
const char *op_task = NULL;
const char *interval_ms_s = NULL;
gboolean in_progress = FALSE;
xmlNode *params = find_xml_node(input->xml, XML_TAG_ATTRS, TRUE);
CRM_CHECK(params != NULL, return FALSE);
meta_key = crm_meta_name(XML_LRM_ATTR_INTERVAL_MS);
interval_ms_s = crm_element_value(params, meta_key);
free(meta_key);
CRM_CHECK(interval_ms_s != NULL, return FALSE);
meta_key = crm_meta_name(XML_LRM_ATTR_TASK);
op_task = crm_element_value(params, meta_key);
free(meta_key);
CRM_CHECK(op_task != NULL, return FALSE);
meta_key = crm_meta_name(XML_LRM_ATTR_CALLID);
call_id = crm_element_value(params, meta_key);
free(meta_key);
op_key = generate_op_key(rsc->id, op_task, crm_parse_ms(interval_ms_s));
crm_debug("Scheduler requested op %s (call=%s) be cancelled",
op_key, (call_id? call_id : "NA"));
call = crm_parse_int(call_id, "0");
if (call == 0) {
// Normal case when the scheduler cancels a recurring op
in_progress = cancel_op_key(lrm_state, rsc, op_key, TRUE);
} else {
// Normal case when the scheduler cancels an orphan op
in_progress = cancel_op(lrm_state, rsc->id, NULL, call, TRUE);
}
// Acknowledge cancellation operation if for a remote connection resource
if (!in_progress || is_remote_lrmd_ra(NULL, NULL, rsc->id)) {
char *op_id = make_stop_id(rsc->id, call);
if (is_remote_lrmd_ra(NULL, NULL, rsc->id) == FALSE) {
crm_info("Nothing known about operation %d for %s", call, op_key);
}
erase_lrm_history_by_id(lrm_state, rsc->id, op_key, NULL, call);
send_task_ok_ack(lrm_state, input, rsc->id, rsc, op_task,
from_host, from_sys);
/* needed at least for cancellation of a remote operation */
g_hash_table_remove(lrm_state->pending_ops, op_id);
free(op_id);
} else {
/* No ack is needed since abcdaa8, but peers with older versions
* in a rolling upgrade need one. We didn't bump the feature set
* at that commit, so we can only compare against the previous
* CRM version (3.0.8). If any peers have feature set 3.0.9 but
* not abcdaa8, they will time out waiting for the ack (no
* released versions of Pacemaker are affected).
*/
const char *peer_version = crm_element_value(params, XML_ATTR_CRM_VERSION);
if (compare_version(peer_version, "3.0.8") <= 0) {
crm_info("Sending compatibility ack for %s cancellation to %s (CRM version %s)",
op_key, from_host, peer_version);
send_task_ok_ack(lrm_state, input, rsc->id, rsc, op_task,
from_host, from_sys);
}
}
free(op_key);
return TRUE;
}
static void
do_lrm_delete(ha_msg_input_t *input, lrm_state_t *lrm_state,
lrmd_rsc_info_t *rsc, const char *from_sys, const char *from_host,
bool crm_rsc_delete, const char *user_name)
{
gboolean unregister = TRUE;
#if ENABLE_ACL
int cib_rc = delete_rsc_status(lrm_state, rsc->id,
cib_dryrun|cib_sync_call, user_name);
if (cib_rc != pcmk_ok) {
lrmd_event_data_t *op = NULL;
crm_err("Could not delete resource status of %s for %s (user %s) on %s: %s"
CRM_XS " rc=%d",
rsc->id, from_sys, (user_name? user_name : "unknown"),
from_host, pcmk_strerror(cib_rc), cib_rc);
op = construct_op(lrm_state, input->xml, rsc->id, CRMD_ACTION_DELETE);
op->op_status = PCMK_LRM_OP_ERROR;
if (cib_rc == -EACCES) {
op->rc = PCMK_OCF_INSUFFICIENT_PRIV;
} else {
op->rc = PCMK_OCF_UNKNOWN_ERROR;
}
send_direct_ack(from_host, from_sys, NULL, op, rsc->id);
lrmd_free_event(op);
return;
}
#endif
if (crm_rsc_delete && is_remote_lrmd_ra(NULL, NULL, rsc->id)) {
unregister = FALSE;
}
delete_resource(lrm_state, rsc->id, rsc, NULL, from_sys, from_host,
user_name, input, unregister);
}
/* A_LRM_INVOKE */
void
do_lrm_invoke(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
lrm_state_t *lrm_state = NULL;
const char *crm_op = NULL;
const char *from_sys = NULL;
const char *from_host = NULL;
const char *operation = NULL;
ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg);
const char *user_name = NULL;
const char *target_node = NULL;
gboolean is_remote_node = FALSE;
bool crm_rsc_delete = FALSE;
target_node = lrm_op_target(input->xml);
is_remote_node = safe_str_neq(target_node, fsa_our_uname);
lrm_state = lrm_state_find(target_node);
if ((lrm_state == NULL) && is_remote_node) {
crm_err("Failing action because local node has never had connection to remote node %s",
target_node);
synthesize_lrmd_failure(NULL, input->xml, PCMK_OCF_CONNECTION_DIED);
return;
}
CRM_ASSERT(lrm_state != NULL);
#if ENABLE_ACL
user_name = crm_acl_get_set_user(input->msg, F_CRM_USER, NULL);
crm_trace("Executor command from user '%s'", user_name);
#endif
crm_op = crm_element_value(input->msg, F_CRM_TASK);
from_sys = crm_element_value(input->msg, F_CRM_SYS_FROM);
if (safe_str_neq(from_sys, CRM_SYSTEM_TENGINE)) {
from_host = crm_element_value(input->msg, F_CRM_HOST_FROM);
}
crm_trace("Executor %s command from %s", crm_op, from_sys);
if (safe_str_eq(crm_op, CRM_OP_LRM_DELETE)) {
crm_rsc_delete = TRUE; // Only crm_resource uses this op
operation = CRMD_ACTION_DELETE;
} else if (safe_str_eq(crm_op, CRM_OP_LRM_FAIL)) {
fail_lrm_resource(input->xml, lrm_state, user_name, from_host,
from_sys);
return;
} else if (input->xml != NULL) {
operation = crm_element_value(input->xml, XML_LRM_ATTR_TASK);
}
if (safe_str_eq(crm_op, CRM_OP_LRM_REFRESH)) {
handle_refresh_op(lrm_state, user_name, from_host, from_sys);
} else if (safe_str_eq(crm_op, CRM_OP_LRM_QUERY)) {
handle_query_op(input->msg, lrm_state);
} else if (safe_str_eq(operation, CRM_OP_PROBED)) {
update_attrd(lrm_state->node_name, CRM_OP_PROBED, XML_BOOLEAN_TRUE,
user_name, is_remote_node);
} else if (safe_str_eq(operation, CRM_OP_REPROBE)
|| safe_str_eq(crm_op, CRM_OP_REPROBE)) {
handle_reprobe_op(lrm_state, from_sys, from_host, user_name,
is_remote_node);
} else if (operation != NULL) {
lrmd_rsc_info_t *rsc = NULL;
xmlNode *xml_rsc = find_xml_node(input->xml, XML_CIB_TAG_RESOURCE, TRUE);
gboolean create_rsc = safe_str_neq(operation, CRMD_ACTION_DELETE);
int rc;
// We can't return anything meaningful without a resource ID
CRM_CHECK(xml_rsc && ID(xml_rsc), return);
rc = get_lrm_resource(lrm_state, xml_rsc, create_rsc, &rsc);
if (rc == -ENOTCONN) {
synthesize_lrmd_failure(lrm_state, input->xml,
PCMK_OCF_CONNECTION_DIED);
return;
} else if ((rc < 0) && !create_rsc) {
/* Delete of malformed or nonexistent resource
* (deleting something that does not exist is a success)
*/
crm_notice("Not registering resource '%s' for a %s event "
CRM_XS " get-rc=%d (%s) transition-key=%s",
ID(xml_rsc), operation,
rc, pcmk_strerror(rc), ID(input->xml));
delete_rsc_entry(lrm_state, input, ID(xml_rsc), NULL, pcmk_ok,
user_name);
send_task_ok_ack(lrm_state, input, ID(xml_rsc), NULL, operation,
from_host, from_sys);
return;
} else if (rc == -EINVAL) {
// Resource operation on malformed resource
crm_err("Invalid resource definition for %s", ID(xml_rsc));
crm_log_xml_warn(input->msg, "invalid resource");
synthesize_lrmd_failure(lrm_state, input->xml,
PCMK_OCF_NOT_CONFIGURED); // fatal error
return;
} else if (rc < 0) {
// Error communicating with the executor
crm_err("Could not register resource '%s' with executor: %s "
CRM_XS " rc=%d",
ID(xml_rsc), pcmk_strerror(rc), rc);
crm_log_xml_warn(input->msg, "failed registration");
synthesize_lrmd_failure(lrm_state, input->xml,
PCMK_OCF_INVALID_PARAM); // hard error
return;
}
if (safe_str_eq(operation, CRMD_ACTION_CANCEL)) {
if (!do_lrm_cancel(input, lrm_state, rsc, from_host, from_sys)) {
crm_log_xml_warn(input->xml, "Bad command");
}
} else if (safe_str_eq(operation, CRMD_ACTION_DELETE)) {
do_lrm_delete(input, lrm_state, rsc, from_sys, from_host,
crm_rsc_delete, user_name);
} else {
do_lrm_rsc_op(lrm_state, rsc, operation, input->xml, input->msg);
}
lrmd_free_rsc_info(rsc);
} else {
crm_err("Cannot perform operation %s of unknown type", crm_str(crm_op));
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
}
}
static lrmd_event_data_t *
construct_op(lrm_state_t * lrm_state, xmlNode * rsc_op, const char *rsc_id, const char *operation)
{
lrmd_event_data_t *op = NULL;
const char *op_delay = NULL;
const char *op_timeout = NULL;
const char *interval_ms_s = NULL;
GHashTable *params = NULL;
const char *transition = NULL;
CRM_ASSERT(rsc_id && operation);
op = calloc(1, sizeof(lrmd_event_data_t));
CRM_ASSERT(op != NULL);
op->type = lrmd_event_exec_complete;
op->op_type = strdup(operation);
op->op_status = PCMK_LRM_OP_PENDING;
op->rc = -1;
op->rsc_id = strdup(rsc_id);
op->interval_ms = 0;
op->timeout = 0;
op->start_delay = 0;
if (rsc_op == NULL) {
CRM_LOG_ASSERT(safe_str_eq(CRMD_ACTION_STOP, operation));
op->user_data = NULL;
/* the stop_all_resources() case
* by definition there is no DC (or they'd be shutting
* us down).
* So we should put our version here.
*/
op->params = crm_str_table_new();
g_hash_table_insert(op->params, strdup(XML_ATTR_CRM_VERSION), strdup(CRM_FEATURE_SET));
crm_trace("Constructed %s op for %s", operation, rsc_id);
return op;
}
params = xml2list(rsc_op);
g_hash_table_remove(params, CRM_META "_op_target_rc");
op_delay = crm_meta_value(params, XML_OP_ATTR_START_DELAY);
op_timeout = crm_meta_value(params, XML_ATTR_TIMEOUT);
interval_ms_s = crm_meta_value(params, XML_LRM_ATTR_INTERVAL_MS);
op->interval_ms = crm_parse_ms(interval_ms_s);
op->timeout = crm_parse_int(op_timeout, "0");
op->start_delay = crm_parse_int(op_delay, "0");
#if ENABLE_VERSIONED_ATTRS
// Resolve any versioned parameters
if (lrm_state && safe_str_neq(op->op_type, RSC_METADATA)
&& safe_str_neq(op->op_type, CRMD_ACTION_DELETE)
&& !is_remote_lrmd_ra(NULL, NULL, rsc_id)) {
// Resource info *should* already be cached, so we don't get executor call
lrmd_rsc_info_t *rsc = lrm_state_get_rsc_info(lrm_state, rsc_id, 0);
struct ra_metadata_s *metadata;
metadata = metadata_cache_get(lrm_state->metadata_cache, rsc);
if (metadata) {
xmlNode *versioned_attrs = NULL;
GHashTable *hash = NULL;
char *key = NULL;
char *value = NULL;
GHashTableIter iter;
versioned_attrs = first_named_child(rsc_op, XML_TAG_OP_VER_ATTRS);
hash = pe_unpack_versioned_parameters(versioned_attrs, metadata->ra_version);
g_hash_table_iter_init(&iter, hash);
while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &value)) {
g_hash_table_iter_steal(&iter);
g_hash_table_replace(params, key, value);
}
g_hash_table_destroy(hash);
versioned_attrs = first_named_child(rsc_op, XML_TAG_OP_VER_META);
hash = pe_unpack_versioned_parameters(versioned_attrs, metadata->ra_version);
g_hash_table_iter_init(&iter, hash);
while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &value)) {
g_hash_table_replace(params, crm_meta_name(key), strdup(value));
if (safe_str_eq(key, XML_ATTR_TIMEOUT)) {
op->timeout = crm_parse_int(value, "0");
} else if (safe_str_eq(key, XML_OP_ATTR_START_DELAY)) {
op->start_delay = crm_parse_int(value, "0");
}
}
g_hash_table_destroy(hash);
versioned_attrs = first_named_child(rsc_op, XML_TAG_RSC_VER_ATTRS);
hash = pe_unpack_versioned_parameters(versioned_attrs, metadata->ra_version);
g_hash_table_iter_init(&iter, hash);
while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &value)) {
g_hash_table_iter_steal(&iter);
g_hash_table_replace(params, key, value);
}
g_hash_table_destroy(hash);
}
lrmd_free_rsc_info(rsc);
}
#endif
if (safe_str_neq(operation, RSC_STOP)) {
op->params = params;
} else {
rsc_history_t *entry = NULL;
if (lrm_state) {
entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id);
}
/* If we do not have stop parameters cached, use
* whatever we are given */
if (!entry || !entry->stop_params) {
op->params = params;
} else {
/* Copy the cached parameter list so that we stop the resource
* with the old attributes, not the new ones */
op->params = crm_str_table_new();
g_hash_table_foreach(params, copy_meta_keys, op->params);
g_hash_table_foreach(entry->stop_params, copy_instance_keys, op->params);
g_hash_table_destroy(params);
params = NULL;
}
}
/* sanity */
if (op->timeout <= 0) {
op->timeout = op->interval_ms;
}
if (op->start_delay < 0) {
op->start_delay = 0;
}
transition = crm_element_value(rsc_op, XML_ATTR_TRANSITION_KEY);
CRM_CHECK(transition != NULL, return op);
op->user_data = strdup(transition);
if (op->interval_ms != 0) {
if (safe_str_eq(operation, CRMD_ACTION_START)
|| safe_str_eq(operation, CRMD_ACTION_STOP)) {
crm_err("Start and Stop actions cannot have an interval: %u",
op->interval_ms);
op->interval_ms = 0;
}
}
crm_trace("Constructed %s op for %s: interval=%u",
operation, rsc_id, op->interval_ms);
return op;
}
void
send_direct_ack(const char *to_host, const char *to_sys,
lrmd_rsc_info_t * rsc, lrmd_event_data_t * op, const char *rsc_id)
{
xmlNode *reply = NULL;
xmlNode *update, *iter;
crm_node_t *peer = NULL;
CRM_CHECK(op != NULL, return);
if (op->rsc_id == NULL) {
CRM_ASSERT(rsc_id != NULL);
op->rsc_id = strdup(rsc_id);
}
if (to_sys == NULL) {
to_sys = CRM_SYSTEM_TENGINE;
}
peer = crm_get_peer(0, fsa_our_uname);
update = create_node_state_update(peer, node_update_none, NULL,
__FUNCTION__);
iter = create_xml_node(update, XML_CIB_TAG_LRM);
crm_xml_add(iter, XML_ATTR_ID, fsa_our_uuid);
iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES);
iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE);
crm_xml_add(iter, XML_ATTR_ID, op->rsc_id);
build_operation_update(iter, rsc, op, fsa_our_uname, __FUNCTION__);
reply = create_request(CRM_OP_INVOKE_LRM, update, to_host, to_sys, CRM_SYSTEM_LRMD, NULL);
crm_log_xml_trace(update, "ACK Update");
crm_debug("ACK'ing resource op " CRM_OP_FMT " from %s: %s",
op->rsc_id, op->op_type, op->interval_ms, op->user_data,
crm_element_value(reply, XML_ATTR_REFERENCE));
if (relay_message(reply, TRUE) == FALSE) {
crm_log_xml_err(reply, "Unable to route reply");
}
free_xml(update);
free_xml(reply);
}
gboolean
verify_stopped(enum crmd_fsa_state cur_state, int log_level)
{
gboolean res = TRUE;
GList *lrm_state_list = lrm_state_get_list();
GList *state_entry;
for (state_entry = lrm_state_list; state_entry != NULL; state_entry = state_entry->next) {
lrm_state_t *lrm_state = state_entry->data;
if (!lrm_state_verify_stopped(lrm_state, cur_state, log_level)) {
/* keep iterating through all even when false is returned */
res = FALSE;
}
}
set_bit(fsa_input_register, R_SENT_RSC_STOP);
g_list_free(lrm_state_list); lrm_state_list = NULL;
return res;
}
struct stop_recurring_action_s {
lrmd_rsc_info_t *rsc;
lrm_state_t *lrm_state;
};
static gboolean
stop_recurring_action_by_rsc(gpointer key, gpointer value, gpointer user_data)
{
gboolean remove = FALSE;
struct stop_recurring_action_s *event = user_data;
struct recurring_op_s *op = (struct recurring_op_s *)value;
if ((op->interval_ms != 0)
&& crm_str_eq(op->rsc_id, event->rsc->id, TRUE)) {
crm_debug("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id, (char*)key);
remove = !cancel_op(event->lrm_state, event->rsc->id, key, op->call_id, FALSE);
}
return remove;
}
static gboolean
stop_recurring_actions(gpointer key, gpointer value, gpointer user_data)
{
gboolean remove = FALSE;
lrm_state_t *lrm_state = user_data;
struct recurring_op_s *op = (struct recurring_op_s *)value;
if (op->interval_ms != 0) {
crm_info("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id,
(const char *) key);
remove = !cancel_op(lrm_state, op->rsc_id, key, op->call_id, FALSE);
}
return remove;
}
static void
record_pending_op(const char *node_name, lrmd_rsc_info_t *rsc, lrmd_event_data_t *op)
{
const char *record_pending = NULL;
CRM_CHECK(node_name != NULL, return);
CRM_CHECK(rsc != NULL, return);
CRM_CHECK(op != NULL, return);
// Never record certain operation types as pending
if ((op->op_type == NULL) || (op->params == NULL)
|| !controld_action_is_recordable(op->op_type)) {
return;
}
// defaults to true
record_pending = crm_meta_value(op->params, XML_OP_ATTR_PENDING);
if (record_pending && !crm_is_true(record_pending)) {
return;
}
op->call_id = -1;
op->op_status = PCMK_LRM_OP_PENDING;
op->rc = PCMK_OCF_UNKNOWN;
op->t_run = time(NULL);
op->t_rcchange = op->t_run;
/* write a "pending" entry to the CIB, inhibit notification */
crm_debug("Recording pending op " CRM_OP_FMT " on %s in the CIB",
op->rsc_id, op->op_type, op->interval_ms, node_name);
do_update_resource(node_name, rsc, op);
}
static void
do_lrm_rsc_op(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *operation, xmlNode * msg,
xmlNode * request)
{
int call_id = 0;
char *op_id = NULL;
lrmd_event_data_t *op = NULL;
lrmd_key_value_t *params = NULL;
fsa_data_t *msg_data = NULL;
const char *transition = NULL;
gboolean stop_recurring = FALSE;
bool send_nack = FALSE;
CRM_CHECK(rsc != NULL, return);
CRM_CHECK(operation != NULL, return);
if (msg != NULL) {
transition = crm_element_value(msg, XML_ATTR_TRANSITION_KEY);
if (transition == NULL) {
crm_log_xml_err(msg, "Missing transition number");
}
}
op = construct_op(lrm_state, msg, rsc->id, operation);
CRM_CHECK(op != NULL, return);
if (is_remote_lrmd_ra(NULL, NULL, rsc->id)
&& (op->interval_ms == 0)
&& strcmp(operation, CRMD_ACTION_MIGRATE) == 0) {
/* pcmk remote connections are a special use case.
* We never ever want to stop monitoring a connection resource until
* the entire migration has completed. If the connection is unexpectedly
* severed, even during a migration, this is an event we must detect.*/
stop_recurring = FALSE;
} else if ((op->interval_ms == 0)
&& strcmp(operation, CRMD_ACTION_STATUS) != 0
&& strcmp(operation, CRMD_ACTION_NOTIFY) != 0) {
/* stop any previous monitor operations before changing the resource state */
stop_recurring = TRUE;
}
if (stop_recurring == TRUE) {
guint removed = 0;
struct stop_recurring_action_s data;
data.rsc = rsc;
data.lrm_state = lrm_state;
removed = g_hash_table_foreach_remove(
lrm_state->pending_ops, stop_recurring_action_by_rsc, &data);
if (removed) {
crm_debug("Stopped %u recurring operation%s in preparation for " CRM_OP_FMT,
removed, s_if_plural(removed),
rsc->id, operation, op->interval_ms);
}
}
/* now do the op */
crm_info("Performing key=%s op=" CRM_OP_FMT,
transition, rsc->id, operation, op->interval_ms);
if (is_set(fsa_input_register, R_SHUTDOWN) && safe_str_eq(operation, RSC_START)) {
register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL);
send_nack = TRUE;
} else if (fsa_state != S_NOT_DC
&& fsa_state != S_POLICY_ENGINE /* Recalculating */
&& fsa_state != S_TRANSITION_ENGINE
&& safe_str_neq(operation, CRMD_ACTION_STOP)) {
send_nack = TRUE;
}
if(send_nack) {
crm_notice("Discarding attempt to perform action %s on %s in state %s (shutdown=%s)",
operation, rsc->id, fsa_state2string(fsa_state),
is_set(fsa_input_register, R_SHUTDOWN)?"true":"false");
op->rc = CRM_DIRECT_NACK_RC;
op->op_status = PCMK_LRM_OP_ERROR;
send_direct_ack(NULL, NULL, rsc, op, rsc->id);
lrmd_free_event(op);
free(op_id);
return;
}
record_pending_op(lrm_state->node_name, rsc, op);
op_id = generate_op_key(rsc->id, op->op_type, op->interval_ms);
if (op->interval_ms > 0) {
/* cancel it so we can then restart it without conflict */
cancel_op_key(lrm_state, rsc, op_id, FALSE);
}
if (op->params) {
char *key = NULL;
char *value = NULL;
GHashTableIter iter;
g_hash_table_iter_init(&iter, op->params);
while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
params = lrmd_key_value_add(params, key, value);
}
}
call_id = lrm_state_exec(lrm_state, rsc->id, op->op_type, op->user_data,
op->interval_ms, op->timeout, op->start_delay,
params);
if (call_id <= 0 && lrm_state_is_local(lrm_state)) {
crm_err("Operation %s on %s failed: %d", operation, rsc->id, call_id);
register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
} else if (call_id <= 0) {
crm_err("Operation %s on resource %s failed to execute on remote node %s: %d",
operation, rsc->id, lrm_state->node_name, call_id);
fake_op_status(lrm_state, op, PCMK_LRM_OP_DONE, PCMK_OCF_UNKNOWN_ERROR);
process_lrm_event(lrm_state, op, NULL, NULL);
} else {
/* record all operations so we can wait
* for them to complete during shutdown
*/
char *call_id_s = make_stop_id(rsc->id, call_id);
struct recurring_op_s *pending = NULL;
pending = calloc(1, sizeof(struct recurring_op_s));
crm_trace("Recording pending op: %d - %s %s", call_id, op_id, call_id_s);
pending->call_id = call_id;
pending->interval_ms = op->interval_ms;
pending->op_type = strdup(operation);
pending->op_key = strdup(op_id);
pending->rsc_id = strdup(rsc->id);
pending->start_time = time(NULL);
pending->user_data = op->user_data? strdup(op->user_data) : NULL;
g_hash_table_replace(lrm_state->pending_ops, call_id_s, pending);
if ((op->interval_ms > 0)
&& (op->start_delay > START_DELAY_THRESHOLD)) {
-
- char *uuid = NULL;
- int dummy = 0, target_rc = 0;
+ int target_rc = 0;
crm_info("Faking confirmation of %s: execution postponed for over 5 minutes", op_id);
-
- decode_transition_key(op->user_data, &uuid, &dummy, &dummy, &target_rc);
- free(uuid);
-
+ decode_transition_key(op->user_data, NULL, NULL, NULL, &target_rc);
op->rc = target_rc;
op->op_status = PCMK_LRM_OP_DONE;
send_direct_ack(NULL, NULL, rsc, op, rsc->id);
}
pending->params = op->params;
op->params = NULL;
}
free(op_id);
lrmd_free_event(op);
return;
}
int last_resource_update = 0;
static void
cib_rsc_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
switch (rc) {
case pcmk_ok:
case -pcmk_err_diff_failed:
case -pcmk_err_diff_resync:
crm_trace("Resource update %d complete: rc=%d", call_id, rc);
break;
default:
crm_warn("Resource update %d failed: (rc=%d) %s", call_id, rc, pcmk_strerror(rc));
}
if (call_id == last_resource_update) {
last_resource_update = 0;
trigger_fsa(fsa_source);
}
}
static int
do_update_resource(const char *node_name, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op)
{
/*
*/
int rc = pcmk_ok;
xmlNode *update, *iter = NULL;
int call_opt = crmd_cib_smart_opt();
const char *uuid = NULL;
CRM_CHECK(op != NULL, return 0);
iter = create_xml_node(iter, XML_CIB_TAG_STATUS);
update = iter;
iter = create_xml_node(iter, XML_CIB_TAG_STATE);
if (safe_str_eq(node_name, fsa_our_uname)) {
uuid = fsa_our_uuid;
} else {
/* remote nodes uuid and uname are equal */
uuid = node_name;
crm_xml_add(iter, XML_NODE_IS_REMOTE, "true");
}
CRM_LOG_ASSERT(uuid != NULL);
if(uuid == NULL) {
rc = -EINVAL;
goto done;
}
crm_xml_add(iter, XML_ATTR_UUID, uuid);
crm_xml_add(iter, XML_ATTR_UNAME, node_name);
crm_xml_add(iter, XML_ATTR_ORIGIN, __FUNCTION__);
iter = create_xml_node(iter, XML_CIB_TAG_LRM);
crm_xml_add(iter, XML_ATTR_ID, uuid);
iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES);
iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE);
crm_xml_add(iter, XML_ATTR_ID, op->rsc_id);
build_operation_update(iter, rsc, op, node_name, __FUNCTION__);
if (rsc) {
const char *container = NULL;
crm_xml_add(iter, XML_ATTR_TYPE, rsc->type);
crm_xml_add(iter, XML_AGENT_ATTR_CLASS, rsc->standard);
crm_xml_add(iter, XML_AGENT_ATTR_PROVIDER, rsc->provider);
if (op->params) {
container = g_hash_table_lookup(op->params, CRM_META"_"XML_RSC_ATTR_CONTAINER);
}
if (container) {
crm_trace("Resource %s is a part of container resource %s", op->rsc_id, container);
crm_xml_add(iter, XML_RSC_ATTR_CONTAINER, container);
}
} else {
crm_warn("Resource %s no longer exists in the executor", op->rsc_id);
send_direct_ack(NULL, NULL, rsc, op, op->rsc_id);
goto cleanup;
}
crm_log_xml_trace(update, __FUNCTION__);
/* make it an asynchronous call and be done with it
*
* Best case:
* the resource state will be discovered during
* the next signup or election.
*
* Bad case:
* we are shutting down and there is no DC at the time,
* but then why were we shutting down then anyway?
* (probably because of an internal error)
*
* Worst case:
* we get shot for having resources "running" that really weren't
*
* the alternative however means blocking here for too long, which
* isn't acceptable
*/
fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, rc, NULL);
if (rc > 0) {
last_resource_update = rc;
}
done:
/* the return code is a call number, not an error code */
crm_trace("Sent resource state update message: %d for %s=%u on %s",
rc, op->op_type, op->interval_ms, op->rsc_id);
fsa_register_cib_callback(rc, FALSE, NULL, cib_rsc_callback);
cleanup:
free_xml(update);
return rc;
}
void
do_lrm_event(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t * msg_data)
{
CRM_CHECK(FALSE, return);
}
static char *
unescape_newlines(const char *string)
{
char *pch = NULL;
char *ret = NULL;
static const char *escaped_newline = "\\n";
if (!string) {
return NULL;
}
ret = strdup(string);
pch = strstr(ret, escaped_newline);
while (pch != NULL) {
/* 2 chars for 2 chars, null-termination irrelevant */
memcpy(pch, "\n ", 2 * sizeof(char));
pch = strstr(pch, escaped_newline);
}
return ret;
}
void
process_lrm_event(lrm_state_t *lrm_state, lrmd_event_data_t *op,
struct recurring_op_s *pending, xmlNode *action_xml)
{
char *op_id = NULL;
char *op_key = NULL;
int update_id = 0;
gboolean remove = FALSE;
gboolean removed = FALSE;
bool need_direct_ack = FALSE;
lrmd_rsc_info_t *rsc = NULL;
const char *node_name = NULL;
CRM_CHECK(op != NULL, return);
CRM_CHECK(op->rsc_id != NULL, return);
op_id = make_stop_id(op->rsc_id, op->call_id);
op_key = generate_op_key(op->rsc_id, op->op_type, op->interval_ms);
// Get resource info if available (from executor state or action XML)
if (lrm_state) {
rsc = lrm_state_get_rsc_info(lrm_state, op->rsc_id, 0);
}
if ((rsc == NULL) && action_xml) {
xmlNode *xml = find_xml_node(action_xml, XML_CIB_TAG_RESOURCE, TRUE);
const char *standard = crm_element_value(xml, XML_AGENT_ATTR_CLASS);
const char *provider = crm_element_value(xml, XML_AGENT_ATTR_PROVIDER);
const char *type = crm_element_value(xml, XML_ATTR_TYPE);
if (standard && type) {
crm_info("%s agent information not cached, using %s%s%s:%s from action XML",
op->rsc_id, standard,
(provider? ":" : ""), (provider? provider : ""), type);
rsc = lrmd_new_rsc_info(op->rsc_id, standard, provider, type);
} else {
crm_err("Can't process %s result because %s agent information not cached or in XML",
op_key, op->rsc_id);
}
}
// Get node name if available (from executor state or action XML)
if (lrm_state) {
node_name = lrm_state->node_name;
} else if (action_xml) {
node_name = crm_element_value(action_xml, XML_LRM_ATTR_TARGET);
}
if(pending == NULL) {
remove = TRUE;
if (lrm_state) {
pending = g_hash_table_lookup(lrm_state->pending_ops, op_id);
}
}
if (op->op_status == PCMK_LRM_OP_ERROR) {
switch(op->rc) {
case PCMK_OCF_NOT_RUNNING:
case PCMK_OCF_RUNNING_MASTER:
case PCMK_OCF_DEGRADED:
case PCMK_OCF_DEGRADED_MASTER:
// Leave it to the TE/scheduler to decide if this is an error
op->op_status = PCMK_LRM_OP_DONE;
break;
default:
/* Nothing to do */
break;
}
}
if (op->op_status != PCMK_LRM_OP_CANCELLED) {
/* We might not record the result, so directly acknowledge it to the
* originator instead, so it doesn't time out waiting for the result
* (especially important if part of a transition).
*/
need_direct_ack = TRUE;
if (controld_action_is_recordable(op->op_type)) {
if (node_name && rsc) {
// We should record the result, and happily, we can
update_id = do_update_resource(node_name, rsc, op);
need_direct_ack = FALSE;
} else if (op->rsc_deleted) {
/* We shouldn't record the result (likely the resource was
* refreshed, cleaned, or removed while this operation was
* in flight).
*/
crm_notice("Not recording %s result in CIB because "
"resource information was removed since it was initiated",
op_key);
} else {
/* This shouldn't be possible; the executor didn't consider the
* resource deleted, but we couldn't find resource or node
* information.
*/
crm_err("Unable to record %s result in CIB: %s", op_key,
(node_name? "No resource information" : "No node name"));
}
}
} else if (op->interval_ms == 0) {
/* A non-recurring operation was cancelled. Most likely, the
* never-initiated action was removed from the executor's pending
* operations list upon resource removal.
*/
need_direct_ack = TRUE;
} else if (pending == NULL) {
/* This recurring operation was cancelled, but was not pending. No
* transition actions are waiting on it, nothing needs to be done.
*/
} else if (op->user_data == NULL) {
/* This recurring operation was cancelled and pending, but we don't
* have a transition key. This should never happen.
*/
crm_err("Recurring operation %s was cancelled without transition information",
op_key);
} else if (pending->remove) {
/* This recurring operation was cancelled (by us) and pending, and we
* have been waiting for it to finish.
*/
if (lrm_state) {
erase_lrm_history_by_op(lrm_state, op);
}
} else if (op->rsc_deleted) {
/* This recurring operation was cancelled (but not by us, and the
* executor does not have resource information, likely due to resource
* cleanup, refresh, or removal) and pending.
*/
crm_debug("Recurring op %s was cancelled due to resource deletion",
op_key);
need_direct_ack = TRUE;
} else {
/* This recurring operation was cancelled (but not by us, likely by the
* executor before stopping the resource) and pending. We don't need to
* do anything special.
*/
}
if (need_direct_ack) {
send_direct_ack(NULL, NULL, NULL, op, op->rsc_id);
}
if(remove == FALSE) {
/* The caller will do this afterwards, but keep the logging consistent */
removed = TRUE;
} else if (lrm_state && ((op->interval_ms == 0)
|| (op->op_status == PCMK_LRM_OP_CANCELLED))) {
gboolean found = g_hash_table_remove(lrm_state->pending_ops, op_id);
if (op->interval_ms != 0) {
removed = TRUE;
} else if (found) {
removed = TRUE;
crm_trace("Op %s (call=%d, stop-id=%s, remaining=%u): Confirmed",
op_key, op->call_id, op_id,
g_hash_table_size(lrm_state->pending_ops));
}
}
if (node_name == NULL) {
node_name = "unknown node"; // for logging
}
switch (op->op_status) {
case PCMK_LRM_OP_CANCELLED:
crm_info("Result of %s operation for %s on %s: %s "
CRM_XS " call=%d key=%s confirmed=%s",
crm_action_str(op->op_type, op->interval_ms),
op->rsc_id, node_name,
services_lrm_status_str(op->op_status),
op->call_id, op_key, (removed? "true" : "false"));
break;
case PCMK_LRM_OP_DONE:
do_crm_log((op->interval_ms? LOG_INFO : LOG_NOTICE),
"Result of %s operation for %s on %s: %d (%s) "
CRM_XS " call=%d key=%s confirmed=%s cib-update=%d",
crm_action_str(op->op_type, op->interval_ms),
op->rsc_id, node_name,
op->rc, services_ocf_exitcode_str(op->rc),
op->call_id, op_key, (removed? "true" : "false"),
update_id);
break;
case PCMK_LRM_OP_TIMEOUT:
crm_err("Result of %s operation for %s on %s: %s "
CRM_XS " call=%d key=%s timeout=%dms",
crm_action_str(op->op_type, op->interval_ms),
op->rsc_id, node_name,
services_lrm_status_str(op->op_status),
op->call_id, op_key, op->timeout);
break;
default:
crm_err("Result of %s operation for %s on %s: %s "
CRM_XS " call=%d key=%s confirmed=%s status=%d cib-update=%d",
crm_action_str(op->op_type, op->interval_ms),
op->rsc_id, node_name,
services_lrm_status_str(op->op_status), op->call_id, op_key,
(removed? "true" : "false"), op->op_status, update_id);
}
if (op->output) {
char *prefix =
crm_strdup_printf("%s-" CRM_OP_FMT ":%d", node_name,
op->rsc_id, op->op_type, op->interval_ms,
op->call_id);
if (op->rc) {
crm_log_output(LOG_NOTICE, prefix, op->output);
} else {
crm_log_output(LOG_DEBUG, prefix, op->output);
}
free(prefix);
}
if (lrm_state) {
if (safe_str_neq(op->op_type, RSC_METADATA)) {
crmd_alert_resource_op(lrm_state->node_name, op);
} else if (rsc && (op->rc == PCMK_OCF_OK)) {
char *metadata = unescape_newlines(op->output);
metadata_cache_update(lrm_state->metadata_cache, rsc, metadata);
free(metadata);
}
}
if (op->rsc_deleted) {
crm_info("Deletion of resource '%s' complete after %s", op->rsc_id, op_key);
if (lrm_state) {
delete_rsc_entry(lrm_state, NULL, op->rsc_id, NULL, pcmk_ok, NULL);
}
}
/* If a shutdown was escalated while operations were pending,
* then the FSA will be stalled right now... allow it to continue
*/
mainloop_set_trigger(fsa_source);
if (lrm_state && rsc) {
update_history_cache(lrm_state, rsc, op);
}
lrmd_free_rsc_info(rsc);
free(op_key);
free(op_id);
}
diff --git a/daemons/controld/controld_te_callbacks.c b/daemons/controld/controld_te_callbacks.c
index 02ab103f4c..51d908e90b 100644
--- a/daemons/controld/controld_te_callbacks.c
+++ b/daemons/controld/controld_te_callbacks.c
@@ -1,962 +1,959 @@
/*
* Copyright 2004-2019 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include
#include
#include
#include
#include
#include
#include
#include /* For ONLINESTATUS etc */
void te_update_confirm(const char *event, xmlNode * msg);
extern char *te_uuid;
gboolean shuttingdown = FALSE;
crm_graph_t *transition_graph;
crm_trigger_t *transition_trigger = NULL;
static unsigned long int stonith_max_attempts = 10;
/* #define RSC_OP_TEMPLATE "//"XML_TAG_DIFF_ADDED"//"XML_TAG_CIB"//"XML_CIB_TAG_STATE"[@uname='%s']"//"XML_LRM_TAG_RSC_OP"[@id='%s]" */
#define RSC_OP_TEMPLATE "//"XML_TAG_DIFF_ADDED"//"XML_TAG_CIB"//"XML_LRM_TAG_RSC_OP"[@id='%s']"
static const char *
get_node_id(xmlNode * rsc_op)
{
xmlNode *node = rsc_op;
while (node != NULL && safe_str_neq(XML_CIB_TAG_STATE, TYPE(node))) {
node = node->parent;
}
CRM_CHECK(node != NULL, return NULL);
return ID(node);
}
void
update_stonith_max_attempts(const char* value)
{
if (safe_str_eq(value, CRM_INFINITY_S)) {
stonith_max_attempts = CRM_SCORE_INFINITY;
}
else {
stonith_max_attempts = crm_int_helper(value, NULL);
}
}
static void
te_update_diff_v1(const char *event, xmlNode *diff)
{
int lpc, max;
xmlXPathObject *xpathObj = NULL;
CRM_CHECK(diff != NULL, return);
xml_log_patchset(LOG_TRACE, __FUNCTION__, diff);
if (cib_config_changed(NULL, NULL, &diff)) {
abort_transition(INFINITY, tg_restart, "Non-status change", diff);
goto bail; /* configuration changed */
}
/* Tickets Attributes - Added/Updated */
xpathObj =
xpath_search(diff,
"//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_CIB_TAG_TICKETS);
if (numXpathResults(xpathObj) > 0) {
xmlNode *aborted = getXpathResult(xpathObj, 0);
abort_transition(INFINITY, tg_restart, "Ticket attribute: update", aborted);
goto bail;
}
freeXpathObject(xpathObj);
/* Tickets Attributes - Removed */
xpathObj =
xpath_search(diff,
"//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_CIB_TAG_TICKETS);
if (numXpathResults(xpathObj) > 0) {
xmlNode *aborted = getXpathResult(xpathObj, 0);
abort_transition(INFINITY, tg_restart, "Ticket attribute: removal", aborted);
goto bail;
}
freeXpathObject(xpathObj);
/* Transient Attributes - Added/Updated */
xpathObj =
xpath_search(diff,
"//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//"
XML_TAG_TRANSIENT_NODEATTRS "//" XML_CIB_TAG_NVPAIR);
max = numXpathResults(xpathObj);
for (lpc = 0; lpc < max; lpc++) {
xmlNode *attr = getXpathResult(xpathObj, lpc);
const char *name = crm_element_value(attr, XML_NVPAIR_ATTR_NAME);
const char *value = NULL;
if (safe_str_eq(CRM_OP_PROBED, name)) {
value = crm_element_value(attr, XML_NVPAIR_ATTR_VALUE);
}
if (crm_is_true(value) == FALSE) {
abort_transition(INFINITY, tg_restart, "Transient attribute: update", attr);
crm_log_xml_trace(attr, "Abort");
goto bail;
}
}
freeXpathObject(xpathObj);
/* Transient Attributes - Removed */
xpathObj =
xpath_search(diff,
"//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//"
XML_TAG_TRANSIENT_NODEATTRS);
if (numXpathResults(xpathObj) > 0) {
xmlNode *aborted = getXpathResult(xpathObj, 0);
abort_transition(INFINITY, tg_restart, "Transient attribute: removal", aborted);
goto bail;
}
freeXpathObject(xpathObj);
/*
* Updates by, or in response to, TE actions will never contain updates
* for more than one resource at a time, so such updates indicate an
* LRM refresh.
*
* In that case, start a new transition rather than check each result
* individually, which can result in _huge_ speedups in large clusters.
*
* Unfortunately, we can only do so when there are no pending actions.
* Otherwise, we could mistakenly throw away those results here, and
* the cluster will stall waiting for them and time out the operation.
*/
if (transition_graph->pending == 0) {
xpathObj = xpath_search(diff,
"//" F_CIB_UPDATE_RESULT
"//" XML_TAG_DIFF_ADDED
"//" XML_LRM_TAG_RESOURCE);
max = numXpathResults(xpathObj);
if (max > 1) {
crm_debug("Ignoring resource operation updates due to history refresh of %d resources",
max);
crm_log_xml_trace(diff, "lrm-refresh");
abort_transition(INFINITY, tg_restart, "History refresh", NULL);
goto bail;
}
freeXpathObject(xpathObj);
}
/* Process operation updates */
xpathObj =
xpath_search(diff,
"//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_LRM_TAG_RSC_OP);
max = numXpathResults(xpathObj);
if (max > 0) {
int lpc = 0;
for (lpc = 0; lpc < max; lpc++) {
xmlNode *rsc_op = getXpathResult(xpathObj, lpc);
const char *node = get_node_id(rsc_op);
process_graph_event(rsc_op, node);
}
}
freeXpathObject(xpathObj);
/* Detect deleted (as opposed to replaced or added) actions - eg. crm_resource -C */
xpathObj = xpath_search(diff, "//" XML_TAG_DIFF_REMOVED "//" XML_LRM_TAG_RSC_OP);
max = numXpathResults(xpathObj);
for (lpc = 0; lpc < max; lpc++) {
int path_max = 0;
const char *op_id = NULL;
char *rsc_op_xpath = NULL;
xmlXPathObject *op_match = NULL;
xmlNode *match = getXpathResult(xpathObj, lpc);
CRM_LOG_ASSERT(match != NULL);
if(match == NULL) { continue; };
op_id = ID(match);
path_max = strlen(RSC_OP_TEMPLATE) + strlen(op_id) + 1;
rsc_op_xpath = calloc(1, path_max);
snprintf(rsc_op_xpath, path_max, RSC_OP_TEMPLATE, op_id);
op_match = xpath_search(diff, rsc_op_xpath);
if (numXpathResults(op_match) == 0) {
/* Prevent false positives by matching cancelations too */
const char *node = get_node_id(match);
crm_action_t *cancelled = get_cancel_action(op_id, node);
if (cancelled == NULL) {
crm_debug("No match for deleted action %s (%s on %s)", rsc_op_xpath, op_id,
node);
abort_transition(INFINITY, tg_restart, "Resource op removal", match);
freeXpathObject(op_match);
free(rsc_op_xpath);
goto bail;
} else {
crm_debug("Deleted lrm_rsc_op %s on %s was for graph event %d",
op_id, node, cancelled->id);
}
}
freeXpathObject(op_match);
free(rsc_op_xpath);
}
bail:
freeXpathObject(xpathObj);
}
static void
process_lrm_resource_diff(xmlNode *lrm_resource, const char *node)
{
for (xmlNode *rsc_op = __xml_first_child(lrm_resource); rsc_op != NULL;
rsc_op = __xml_next(rsc_op)) {
process_graph_event(rsc_op, node);
}
}
static void
process_resource_updates(const char *node, xmlNode *xml, xmlNode *change,
const char *op, const char *xpath)
{
xmlNode *rsc = NULL;
if (xml == NULL) {
return;
} else if (strcmp((const char*)xml->name, XML_CIB_TAG_LRM) == 0) {
xml = first_named_child(xml, XML_LRM_TAG_RESOURCES);
crm_trace("Got %p in %s", xml, XML_CIB_TAG_LRM);
}
CRM_ASSERT(strcmp((const char*)xml->name, XML_LRM_TAG_RESOURCES) == 0);
/*
* Updates by, or in response to, TE actions will never contain updates
* for more than one resource at a time, so such updates indicate an
* LRM refresh.
*
* In that case, start a new transition rather than check each result
* individually, which can result in _huge_ speedups in large clusters.
*
* Unfortunately, we can only do so when there are no pending actions.
* Otherwise, we could mistakenly throw away those results here, and
* the cluster will stall waiting for them and time out the operation.
*/
if ((transition_graph->pending == 0)
&& xml->children && xml->children->next) {
crm_log_xml_trace(change, "lrm-refresh");
abort_transition(INFINITY, tg_restart, "History refresh", NULL);
return;
}
for (rsc = __xml_first_child(xml); rsc != NULL; rsc = __xml_next(rsc)) {
crm_trace("Processing %s", ID(rsc));
process_lrm_resource_diff(rsc, node);
}
}
#define NODE_PATT "/lrm[@id="
static char *get_node_from_xpath(const char *xpath)
{
char *nodeid = NULL;
char *tmp = strstr(xpath, NODE_PATT);
if(tmp) {
tmp += strlen(NODE_PATT);
tmp += 1;
nodeid = strdup(tmp);
tmp = strstr(nodeid, "\'");
CRM_ASSERT(tmp);
tmp[0] = 0;
}
return nodeid;
}
static char *extract_node_uuid(const char *xpath)
{
char *mutable_path = strdup(xpath);
char *node_uuid = NULL;
char *search = NULL;
char *match = NULL;
match = strstr(mutable_path, "node_state[@id=\'");
if (match == NULL) {
free(mutable_path);
return NULL;
}
match += strlen("node_state[@id=\'");
search = strchr(match, '\'');
if (search == NULL) {
free(mutable_path);
return NULL;
}
search[0] = 0;
node_uuid = strdup(match);
free(mutable_path);
return node_uuid;
}
static void
abort_unless_down(const char *xpath, const char *op, xmlNode *change,
const char *reason)
{
char *node_uuid = NULL;
crm_action_t *down = NULL;
if(safe_str_neq(op, "delete")) {
abort_transition(INFINITY, tg_restart, reason, change);
return;
}
node_uuid = extract_node_uuid(xpath);
if(node_uuid == NULL) {
crm_err("Could not extract node ID from %s", xpath);
abort_transition(INFINITY, tg_restart, reason, change);
return;
}
down = match_down_event(node_uuid);
if (down == NULL) {
crm_trace("Not expecting %s to be down (%s)", node_uuid, xpath);
abort_transition(INFINITY, tg_restart, reason, change);
} else {
crm_trace("Expecting changes to %s (%s)", node_uuid, xpath);
}
free(node_uuid);
}
static void
process_op_deletion(const char *xpath, xmlNode *change)
{
char *mutable_key = strdup(xpath);
char *key;
char *node_uuid;
crm_action_t *cancel = NULL;
// Extract the part of xpath between last pair of single quotes
key = strrchr(mutable_key, '\'');
if (key != NULL) {
*key = '\0';
key = strrchr(mutable_key, '\'');
}
if (key == NULL) {
crm_warn("Ignoring malformed CIB update (resource deletion of %s)",
xpath);
free(mutable_key);
return;
}
++key;
node_uuid = extract_node_uuid(xpath);
cancel = get_cancel_action(key, node_uuid);
if (cancel) {
crm_info("Cancellation of %s on %s confirmed (%d)",
key, node_uuid, cancel->id);
stop_te_timer(cancel->timer);
te_action_confirmed(cancel);
update_graph(transition_graph, cancel);
trigger_graph();
} else {
abort_transition(INFINITY, tg_restart, "Resource operation removal",
change);
}
free(mutable_key);
free(node_uuid);
}
static void
process_delete_diff(const char *xpath, const char *op, xmlNode *change)
{
if (strstr(xpath, "/" XML_LRM_TAG_RSC_OP "[")) {
process_op_deletion(xpath, change);
} else if (strstr(xpath, "/" XML_CIB_TAG_LRM "[")) {
abort_unless_down(xpath, op, change, "Resource state removal");
} else if (strstr(xpath, "/" XML_CIB_TAG_STATE "[")) {
abort_unless_down(xpath, op, change, "Node state removal");
} else {
crm_trace("Ignoring delete of %s", xpath);
}
}
static void
process_node_state_diff(xmlNode *state, xmlNode *change, const char *op,
const char *xpath)
{
xmlNode *lrm = first_named_child(state, XML_CIB_TAG_LRM);
process_resource_updates(ID(state), lrm, change, op, xpath);
}
static void
process_status_diff(xmlNode *status, xmlNode *change, const char *op,
const char *xpath)
{
for (xmlNode *state = __xml_first_child(status); state != NULL;
state = __xml_next(state)) {
process_node_state_diff(state, change, op, xpath);
}
}
static void
process_cib_diff(xmlNode *cib, xmlNode *change, const char *op,
const char *xpath)
{
xmlNode *status = first_named_child(cib, XML_CIB_TAG_STATUS);
xmlNode *config = first_named_child(cib, XML_CIB_TAG_CONFIGURATION);
if (status) {
process_status_diff(status, change, op, xpath);
}
if (config) {
abort_transition(INFINITY, tg_restart,
"Non-status-only change", change);
}
}
static void
te_update_diff_v2(xmlNode *diff)
{
crm_log_xml_trace(diff, "Patch:Raw");
for (xmlNode *change = __xml_first_child(diff); change != NULL;
change = __xml_next(change)) {
xmlNode *match = NULL;
const char *name = NULL;
const char *xpath = crm_element_value(change, XML_DIFF_PATH);
// Possible ops: create, modify, delete, move
const char *op = crm_element_value(change, XML_DIFF_OP);
// Ignore uninteresting updates
if (op == NULL) {
continue;
} else if (xpath == NULL) {
crm_trace("Ignoring %s change for version field", op);
continue;
} else if (strcmp(op, "move") == 0) {
crm_trace("Ignoring move change at %s", xpath);
continue;
}
// Find the result of create/modify ops
if (strcmp(op, "create") == 0) {
match = change->children;
} else if (strcmp(op, "modify") == 0) {
match = first_named_child(change, XML_DIFF_RESULT);
if(match) {
match = match->children;
}
} else if (strcmp(op, "delete") != 0) {
crm_warn("Ignoring malformed CIB update (%s operation on %s is unrecognized)",
op, xpath);
continue;
}
if (match) {
if (match->type == XML_COMMENT_NODE) {
crm_trace("Ignoring %s operation for comment at %s", op, xpath);
continue;
}
name = (const char *)match->name;
}
crm_trace("Handling %s operation for %s%s%s",
op, (xpath? xpath : "CIB"),
(name? " matched by " : ""), (name? name : ""));
if (strstr(xpath, "/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION)) {
abort_transition(INFINITY, tg_restart, "Configuration change",
change);
break; // Won't be packaged with operation results we may be waiting for
} else if (strstr(xpath, "/" XML_CIB_TAG_TICKETS)
|| safe_str_eq(name, XML_CIB_TAG_TICKETS)) {
abort_transition(INFINITY, tg_restart, "Ticket attribute change", change);
break; // Won't be packaged with operation results we may be waiting for
} else if (strstr(xpath, "/" XML_TAG_TRANSIENT_NODEATTRS "[")
|| safe_str_eq(name, XML_TAG_TRANSIENT_NODEATTRS)) {
abort_unless_down(xpath, op, change, "Transient attribute change");
break; // Won't be packaged with operation results we may be waiting for
} else if (strcmp(op, "delete") == 0) {
process_delete_diff(xpath, op, change);
} else if (name == NULL) {
crm_warn("Ignoring malformed CIB update (%s at %s has no result)",
op, xpath);
} else if (strcmp(name, XML_TAG_CIB) == 0) {
process_cib_diff(match, change, op, xpath);
} else if (strcmp(name, XML_CIB_TAG_STATUS) == 0) {
process_status_diff(match, change, op, xpath);
} else if (strcmp(name, XML_CIB_TAG_STATE) == 0) {
process_node_state_diff(match, change, op, xpath);
} else if (strcmp(name, XML_CIB_TAG_LRM) == 0) {
process_resource_updates(ID(match), match, change, op, xpath);
} else if (strcmp(name, XML_LRM_TAG_RESOURCES) == 0) {
char *local_node = get_node_from_xpath(xpath);
process_resource_updates(local_node, match, change, op, xpath);
free(local_node);
} else if (strcmp(name, XML_LRM_TAG_RESOURCE) == 0) {
char *local_node = get_node_from_xpath(xpath);
process_lrm_resource_diff(match, local_node);
free(local_node);
} else if (strcmp(name, XML_LRM_TAG_RSC_OP) == 0) {
char *local_node = get_node_from_xpath(xpath);
process_graph_event(match, local_node);
free(local_node);
} else {
crm_warn("Ignoring malformed CIB update (%s at %s has unrecognized result %s)",
op, xpath, name);
}
}
}
void
te_update_diff(const char *event, xmlNode * msg)
{
xmlNode *diff = NULL;
const char *op = NULL;
int rc = -EINVAL;
int format = 1;
int p_add[] = { 0, 0, 0 };
int p_del[] = { 0, 0, 0 };
CRM_CHECK(msg != NULL, return);
crm_element_value_int(msg, F_CIB_RC, &rc);
if (transition_graph == NULL) {
crm_trace("No graph");
return;
} else if (rc < pcmk_ok) {
crm_trace("Filter rc=%d (%s)", rc, pcmk_strerror(rc));
return;
} else if (transition_graph->complete
&& fsa_state != S_IDLE
&& fsa_state != S_TRANSITION_ENGINE
&& fsa_state != S_POLICY_ENGINE) {
crm_trace("Filter state=%s, complete=%d", fsa_state2string(fsa_state),
transition_graph->complete);
return;
}
op = crm_element_value(msg, F_CIB_OPERATION);
diff = get_message_xml(msg, F_CIB_UPDATE_RESULT);
xml_patch_versions(diff, p_add, p_del);
crm_debug("Processing (%s) diff: %d.%d.%d -> %d.%d.%d (%s)", op,
p_del[0], p_del[1], p_del[2], p_add[0], p_add[1], p_add[2],
fsa_state2string(fsa_state));
crm_element_value_int(diff, "format", &format);
switch (format) {
case 1:
te_update_diff_v1(event, diff);
break;
case 2:
te_update_diff_v2(diff);
break;
default:
crm_warn("Ignoring malformed CIB update (unknown patch format %d)",
format);
}
}
gboolean
process_te_message(xmlNode * msg, xmlNode * xml_data)
{
const char *from = crm_element_value(msg, F_ORIG);
const char *sys_to = crm_element_value(msg, F_CRM_SYS_TO);
const char *sys_from = crm_element_value(msg, F_CRM_SYS_FROM);
const char *ref = crm_element_value(msg, F_CRM_REFERENCE);
const char *op = crm_element_value(msg, F_CRM_TASK);
const char *type = crm_element_value(msg, F_CRM_MSG_TYPE);
crm_trace("Processing %s (%s) message", op, ref);
crm_log_xml_trace(msg, "ipc");
if (op == NULL) {
/* error */
} else if (sys_to == NULL || strcasecmp(sys_to, CRM_SYSTEM_TENGINE) != 0) {
crm_trace("Bad sys-to %s", crm_str(sys_to));
return FALSE;
} else if (safe_str_eq(op, CRM_OP_INVOKE_LRM)
&& safe_str_eq(sys_from, CRM_SYSTEM_LRMD)
/* && safe_str_eq(type, XML_ATTR_RESPONSE) */
) {
xmlXPathObject *xpathObj = NULL;
crm_log_xml_trace(msg, "Processing (N)ACK");
crm_debug("Processing (N)ACK %s from %s", crm_element_value(msg, F_CRM_REFERENCE), from);
xpathObj = xpath_search(xml_data, "//" XML_LRM_TAG_RSC_OP);
if (numXpathResults(xpathObj)) {
int lpc = 0, max = numXpathResults(xpathObj);
for (lpc = 0; lpc < max; lpc++) {
xmlNode *rsc_op = getXpathResult(xpathObj, lpc);
const char *node = get_node_id(rsc_op);
process_graph_event(rsc_op, node);
}
freeXpathObject(xpathObj);
} else {
crm_log_xml_err(msg, "Invalid (N)ACK");
freeXpathObject(xpathObj);
return FALSE;
}
} else {
crm_err("Unknown command: %s::%s from %s", type, op, sys_from);
}
crm_trace("finished processing message");
return TRUE;
}
GHashTable *stonith_failures = NULL;
struct st_fail_rec {
int count;
};
static gboolean
too_many_st_failures(const char *target)
{
GHashTableIter iter;
const char *key = NULL;
struct st_fail_rec *value = NULL;
if (stonith_failures == NULL) {
return FALSE;
}
if (target == NULL) {
g_hash_table_iter_init(&iter, stonith_failures);
while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
if (value->count >= stonith_max_attempts) {
target = (const char*)key;
goto too_many;
}
}
} else {
value = g_hash_table_lookup(stonith_failures, target);
if ((value != NULL) && (value->count >= stonith_max_attempts)) {
goto too_many;
}
}
return FALSE;
too_many:
crm_warn("Too many failures (%d) to fence %s, giving up",
value->count, target);
return TRUE;
}
/*!
* \internal
* \brief Reset a stonith fail count
*
* \param[in] target Name of node to reset, or NULL for all
*/
void
st_fail_count_reset(const char *target)
{
if (stonith_failures == NULL) {
return;
}
if (target) {
struct st_fail_rec *rec = NULL;
rec = g_hash_table_lookup(stonith_failures, target);
if (rec) {
rec->count = 0;
}
} else {
GHashTableIter iter;
const char *key = NULL;
struct st_fail_rec *rec = NULL;
g_hash_table_iter_init(&iter, stonith_failures);
while (g_hash_table_iter_next(&iter, (gpointer *) &key,
(gpointer *) &rec)) {
rec->count = 0;
}
}
}
void
st_fail_count_increment(const char *target)
{
struct st_fail_rec *rec = NULL;
if (stonith_failures == NULL) {
stonith_failures = crm_str_table_new();
}
rec = g_hash_table_lookup(stonith_failures, target);
if (rec) {
rec->count++;
} else {
rec = malloc(sizeof(struct st_fail_rec));
if(rec == NULL) {
return;
}
rec->count = 1;
g_hash_table_insert(stonith_failures, strdup(target), rec);
}
}
/*!
* \internal
* \brief Abort transition due to stonith failure
*
* \param[in] abort_action Whether to restart or stop transition
* \param[in] target Don't restart if this (NULL for any) has too many failures
* \param[in] reason Log this stonith action XML as abort reason (or NULL)
*/
void
abort_for_stonith_failure(enum transition_action abort_action,
const char *target, xmlNode *reason)
{
/* If stonith repeatedly fails, we eventually give up on starting a new
* transition for that reason.
*/
if ((abort_action != tg_stop) && too_many_st_failures(target)) {
abort_action = tg_stop;
}
abort_transition(INFINITY, abort_action, "Stonith failed", reason);
}
void
tengine_stonith_callback(stonith_t * stonith, stonith_callback_data_t * data)
{
char *uuid = NULL;
- int target_rc = -1;
int stonith_id = -1;
int transition_id = -1;
crm_action_t *action = NULL;
int call_id = data->call_id;
int rc = data->rc;
char *userdata = data->userdata;
CRM_CHECK(userdata != NULL, return);
crm_notice("Stonith operation %d/%s: %s (%d)", call_id, (char *)userdata,
pcmk_strerror(rc), rc);
if (AM_I_DC == FALSE) {
return;
}
/* crm_info("call=%d, optype=%d, node_name=%s, result=%d, node_list=%s, action=%s", */
/* op->call_id, op->optype, op->node_name, op->op_result, */
/* (char *)op->node_list, op->private_data); */
/* filter out old STONITH actions */
- CRM_CHECK(decode_transition_key(userdata, &uuid, &transition_id, &stonith_id, &target_rc),
- crm_err("Invalid event detected");
- goto bail;
- );
+ CRM_CHECK(decode_transition_key(userdata, &uuid, &transition_id, &stonith_id, NULL),
+ goto bail);
if (transition_graph->complete || stonith_id < 0 || safe_str_neq(uuid, te_uuid)
|| transition_graph->id != transition_id) {
crm_info("Ignoring STONITH action initiated outside of the current transition");
goto bail;
}
action = get_action(stonith_id, FALSE);
if (action == NULL) {
crm_err("Stonith action not matched");
goto bail;
}
stop_te_timer(action->timer);
if (rc == pcmk_ok) {
const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
const char *uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
const char *op = crm_meta_value(action->params, "stonith_action");
crm_info("Stonith operation %d for %s passed", call_id, target);
if (action->confirmed == FALSE) {
te_action_confirmed(action);
if (safe_str_eq("on", op)) {
const char *value = NULL;
char *now = crm_itoa(time(NULL));
update_attrd(target, CRM_ATTR_UNFENCED, now, NULL, FALSE);
free(now);
value = crm_meta_value(action->params, XML_OP_ATTR_DIGESTS_ALL);
update_attrd(target, CRM_ATTR_DIGESTS_ALL, value, NULL, FALSE);
value = crm_meta_value(action->params, XML_OP_ATTR_DIGESTS_SECURE);
update_attrd(target, CRM_ATTR_DIGESTS_SECURE, value, NULL, FALSE);
} else if (action->sent_update == FALSE) {
send_stonith_update(action, target, uuid);
action->sent_update = TRUE;
}
}
st_fail_count_reset(target);
} else {
const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
enum transition_action abort_action = tg_restart;
action->failed = TRUE;
crm_notice("Stonith operation %d for %s failed (%s): aborting transition.",
call_id, target, pcmk_strerror(rc));
/* If no fence devices were available, there's no use in immediately
* checking again, so don't start a new transition in that case.
*/
if (rc == -ENODEV) {
crm_warn("No devices found in cluster to fence %s, giving up",
target);
abort_action = tg_stop;
}
/* Increment the fail count now, so abort_for_stonith_failure() can
* check it. Non-DC nodes will increment it in tengine_stonith_notify().
*/
st_fail_count_increment(target);
abort_for_stonith_failure(abort_action, target, NULL);
}
update_graph(transition_graph, action);
trigger_graph();
bail:
free(userdata);
free(uuid);
return;
}
void
cib_fencing_updated(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
if (rc < pcmk_ok) {
crm_err("Fencing update %d for %s: failed - %s (%d)",
call_id, (char *)user_data, pcmk_strerror(rc), rc);
crm_log_xml_warn(msg, "Failed update");
abort_transition(INFINITY, tg_shutdown, "CIB update failed", NULL);
} else {
crm_info("Fencing update %d for %s: complete", call_id, (char *)user_data);
}
}
void
cib_action_updated(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
if (rc < pcmk_ok) {
crm_err("Update %d FAILED: %s", call_id, pcmk_strerror(rc));
}
}
/*!
* \brief Handle a timeout in node-to-node communication
*
* \param[in] data Pointer to action timer
*
* \return FALSE (indicating that source should be not be re-added)
*/
gboolean
action_timer_callback(gpointer data)
{
crm_action_timer_t *timer = NULL;
const char *task = NULL;
const char *on_node = NULL;
const char *via_node = NULL;
CRM_CHECK(data != NULL, return FALSE);
timer = (crm_action_timer_t *) data;
stop_te_timer(timer);
CRM_CHECK(timer->action != NULL, return FALSE);
task = crm_element_value(timer->action->xml, XML_LRM_ATTR_TASK);
on_node = crm_element_value(timer->action->xml, XML_LRM_ATTR_TARGET);
via_node = crm_element_value(timer->action->xml, XML_LRM_ATTR_ROUTER_NODE);
if (transition_graph->complete) {
crm_notice("Node %s did not send %s result (via %s) within %dms "
"(ignoring because transition not in progress)",
(on_node? on_node : ""), (task? task : "unknown action"),
(via_node? via_node : "controller"), timer->timeout);
} else {
/* fail the action */
crm_err("Node %s did not send %s result (via %s) within %dms "
"(action timeout plus cluster-delay)",
(on_node? on_node : ""), (task? task : "unknown action"),
(via_node? via_node : "controller"), timer->timeout);
print_action(LOG_ERR, "Aborting transition, action lost: ", timer->action);
timer->action->failed = TRUE;
te_action_confirmed(timer->action);
abort_transition(INFINITY, tg_restart, "Action lost", NULL);
update_graph(transition_graph, timer->action);
trigger_graph();
// Record timeout in the CIB if appropriate
if ((timer->action->type == action_type_rsc)
&& controld_action_is_recordable(task)) {
controld_record_action_timeout(timer->action);
}
}
return FALSE;
}
diff --git a/lib/common/operations.c b/lib/common/operations.c
index e6b89103f1..2144cc681a 100644
--- a/lib/common/operations.c
+++ b/lib/common/operations.c
@@ -1,583 +1,636 @@
/*
* Copyright 2004-2019 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include
#ifndef _GNU_SOURCE
# define _GNU_SOURCE
#endif
#include
#include
#include
#include
#include
#include
#include
#include
#include
/*!
* \brief Generate an operation key
*
* \param[in] rsc_id ID of resource being operated on
* \param[in] op_type Operation name
* \param[in] interval_ms Operation interval
*
* \return Newly allocated memory containing operation key as string
*
* \note It is the caller's responsibility to free() the result.
*/
char *
generate_op_key(const char *rsc_id, const char *op_type, guint interval_ms)
{
CRM_ASSERT(rsc_id != NULL);
CRM_ASSERT(op_type != NULL);
return crm_strdup_printf(CRM_OP_FMT, rsc_id, op_type, interval_ms);
}
gboolean
parse_op_key(const char *key, char **rsc_id, char **op_type, guint *interval_ms)
{
char *notify = NULL;
char *mutable_key = NULL;
char *mutable_key_ptr = NULL;
size_t len = 0, offset = 0;
unsigned long long ch = 0;
guint local_interval_ms = 0;
// Initialize output variables in case of early return
if (rsc_id) {
*rsc_id = NULL;
}
if (op_type) {
*op_type = NULL;
}
if (interval_ms) {
*interval_ms = 0;
}
CRM_CHECK(key && *key, return FALSE);
// Parse interval at end of string
len = strlen(key);
offset = len - 1;
while ((offset > 0) && isdigit(key[offset])) {
ch = key[offset] - '0';
for (int digits = len - offset; digits > 1; --digits) {
ch = ch * 10;
}
local_interval_ms += ch;
offset--;
}
crm_trace("Operation key '%s' has interval %ums", key, local_interval_ms);
if (interval_ms) {
*interval_ms = local_interval_ms;
}
CRM_CHECK((offset != (len - 1)) && (key[offset] == '_'), return FALSE);
mutable_key = strndup(key, offset);
offset--;
while (offset > 0 && key[offset] != '_') {
offset--;
}
CRM_CHECK(key[offset] == '_',
free(mutable_key); return FALSE);
mutable_key_ptr = mutable_key + offset + 1;
crm_trace(" Action: %s", mutable_key_ptr);
if (op_type) {
*op_type = strdup(mutable_key_ptr);
}
mutable_key[offset] = 0;
offset--;
notify = strstr(mutable_key, "_post_notify");
if (notify && safe_str_eq(notify, "_post_notify")) {
notify[0] = 0;
}
notify = strstr(mutable_key, "_pre_notify");
if (notify && safe_str_eq(notify, "_pre_notify")) {
notify[0] = 0;
}
crm_trace(" Resource: %s", mutable_key);
if (rsc_id) {
*rsc_id = mutable_key;
} else {
free(mutable_key);
}
return TRUE;
}
char *
generate_notify_key(const char *rsc_id, const char *notify_type, const char *op_type)
{
CRM_CHECK(rsc_id != NULL, return NULL);
CRM_CHECK(op_type != NULL, return NULL);
CRM_CHECK(notify_type != NULL, return NULL);
return crm_strdup_printf("%s_%s_notify_%s_0",
rsc_id, notify_type, op_type);
}
static char *
generate_transition_magic(const char *transition_key, int op_status, int op_rc)
{
CRM_CHECK(transition_key != NULL, return NULL);
return crm_strdup_printf("%d:%d;%s", op_status, op_rc, transition_key);
}
+/*!
+ * \brief Parse a transition magic string into its constituent parts
+ *
+ * \param[in] magic Magic string to parse (must be non-NULL)
+ * \param[out] uuid If non-NULL, where to store copy of parsed UUID
+ * \param[out] transition_id If non-NULL, where to store parsed transition ID
+ * \param[out] action_id If non-NULL, where to store parsed action ID
+ * \param[out] op_status If non-NULL, where to store parsed result status
+ * \param[out] op_rc If non-NULL, where to store parsed actual rc
+ * \param[out] target_rc If non-NULL, where to stored parsed target rc
+ *
+ * \return TRUE if key was valid, FALSE otherwise
+ * \note If uuid is supplied and this returns TRUE, the caller is responsible
+ * for freeing the memory for *uuid using free().
+ */
gboolean
decode_transition_magic(const char *magic, char **uuid, int *transition_id, int *action_id,
int *op_status, int *op_rc, int *target_rc)
{
int res = 0;
char *key = NULL;
gboolean result = TRUE;
+ int local_op_status = -1;
+ int local_op_rc = -1;
CRM_CHECK(magic != NULL, return FALSE);
- CRM_CHECK(op_rc != NULL, return FALSE);
- CRM_CHECK(op_status != NULL, return FALSE);
#ifdef SSCANF_HAS_M
- res = sscanf(magic, "%d:%d;%ms", op_status, op_rc, &key);
+ res = sscanf(magic, "%d:%d;%ms", &local_op_status, &local_op_rc, &key);
#else
key = calloc(1, strlen(magic) - 3); // magic must have >=4 other characters
- res = sscanf(magic, "%d:%d;%s", op_status, op_rc, key);
+ CRM_ASSERT(key);
+ res = sscanf(magic, "%d:%d;%s", &local_op_status, &local_op_rc, key);
#endif
if (res == EOF) {
crm_err("Could not decode transition information '%s': %s",
magic, pcmk_strerror(errno));
result = FALSE;
} else if (res < 3) {
crm_warn("Transition information '%s' incomplete (%d of 3 expected items)",
magic, res);
result = FALSE;
} else {
- CRM_CHECK(decode_transition_key(key, uuid, transition_id, action_id,
- target_rc), result = FALSE);
+ if (op_status) {
+ *op_status = local_op_status;
+ }
+ if (op_rc) {
+ *op_rc = local_op_rc;
+ }
+ result = decode_transition_key(key, uuid, transition_id, action_id,
+ target_rc);
}
free(key);
return result;
}
char *
generate_transition_key(int transition_id, int action_id, int target_rc, const char *node)
{
CRM_CHECK(node != NULL, return NULL);
return crm_strdup_printf("%d:%d:%d:%-*s",
action_id, transition_id, target_rc, 36, node);
}
+/*!
+ * \brief Parse a transition key into its constituent parts
+ *
+ * \param[in] key Transition key to parse (must be non-NULL)
+ * \param[out] uuid If non-NULL, where to store copy of parsed UUID
+ * \param[out] transition_id If non-NULL, where to store parsed transition ID
+ * \param[out] action_id If non-NULL, where to store parsed action ID
+ * \param[out] target_rc If non-NULL, where to stored parsed target rc
+ *
+ * \return TRUE if key was valid, FALSE otherwise
+ * \note If uuid is supplied and this returns TRUE, the caller is responsible
+ * for freeing the memory for *uuid using free().
+ */
gboolean
decode_transition_key(const char *key, char **uuid, int *transition_id, int *action_id,
int *target_rc)
{
- CRM_CHECK(uuid != NULL, return FALSE);
- CRM_CHECK(target_rc != NULL, return FALSE);
- CRM_CHECK(action_id != NULL, return FALSE);
- CRM_CHECK(transition_id != NULL, return FALSE);
-
- *uuid = calloc(37, sizeof(char));
- if (sscanf(key, "%d:%d:%d:%36s",
- action_id, transition_id, target_rc, *uuid) != 4) {
- crm_err("Invalid transition key '%s'", key);
- free(*uuid);
+ int local_transition_id = -1;
+ int local_action_id = -1;
+ int local_target_rc = -1;
+ char local_uuid[37] = { '\0' };
+
+ // Initialize any supplied output arguments
+ if (uuid) {
*uuid = NULL;
- *target_rc = -1;
- *action_id = -1;
+ }
+ if (transition_id) {
*transition_id = -1;
+ }
+ if (action_id) {
+ *action_id = -1;
+ }
+ if (target_rc) {
+ *target_rc = -1;
+ }
+
+ CRM_CHECK(key != NULL, return FALSE);
+ if (sscanf(key, "%d:%d:%d:%36s", &local_action_id, &local_transition_id,
+ &local_target_rc, local_uuid) != 4) {
+ crm_err("Invalid transition key '%s'", key);
return FALSE;
}
- if (strlen(*uuid) != 36) {
- crm_warn("Invalid UUID '%s' in transition key '%s'", *uuid, key);
+ if (strlen(local_uuid) != 36) {
+ crm_warn("Invalid UUID '%s' in transition key '%s'", local_uuid, key);
+ }
+ if (uuid) {
+ *uuid = strdup(local_uuid);
+ CRM_ASSERT(*uuid);
+ }
+ if (transition_id) {
+ *transition_id = local_transition_id;
+ }
+ if (action_id) {
+ *action_id = local_action_id;
+ }
+ if (target_rc) {
+ *target_rc = local_target_rc;
}
return TRUE;
}
void
filter_action_parameters(xmlNode * param_set, const char *version)
{
char *key = NULL;
char *timeout = NULL;
char *interval_ms_s = NULL;
const char *attr_filter[] = {
XML_ATTR_ID,
XML_ATTR_CRM_VERSION,
XML_LRM_ATTR_OP_DIGEST,
XML_LRM_ATTR_TARGET,
XML_LRM_ATTR_TARGET_UUID,
"pcmk_external_ip"
};
gboolean do_delete = FALSE;
int lpc = 0;
static int meta_len = 0;
if (meta_len == 0) {
meta_len = strlen(CRM_META);
}
if (param_set == NULL) {
return;
}
for (lpc = 0; lpc < DIMOF(attr_filter); lpc++) {
xml_remove_prop(param_set, attr_filter[lpc]);
}
key = crm_meta_name(XML_LRM_ATTR_INTERVAL_MS);
interval_ms_s = crm_element_value_copy(param_set, key);
free(key);
key = crm_meta_name(XML_ATTR_TIMEOUT);
timeout = crm_element_value_copy(param_set, key);
if (param_set) {
xmlAttrPtr xIter = param_set->properties;
while (xIter) {
const char *prop_name = (const char *)xIter->name;
xIter = xIter->next;
do_delete = FALSE;
if (strncasecmp(prop_name, CRM_META, meta_len) == 0) {
do_delete = TRUE;
}
if (do_delete) {
xml_remove_prop(param_set, prop_name);
}
}
}
if (interval_ms_s && strcmp(interval_ms_s, "0")) {
/* Re-instate the operation's timeout value */
if (timeout != NULL) {
crm_xml_add(param_set, key, timeout);
}
}
free(interval_ms_s);
free(timeout);
free(key);
}
#define FAKE_TE_ID "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"
static void
append_digest(lrmd_event_data_t * op, xmlNode * update, const char *version, const char *magic,
int level)
{
/* this will enable us to later determine that the
* resource's parameters have changed and we should force
* a restart
*/
char *digest = NULL;
xmlNode *args_xml = NULL;
if (op->params == NULL) {
return;
}
args_xml = create_xml_node(NULL, XML_TAG_PARAMS);
g_hash_table_foreach(op->params, hash2field, args_xml);
filter_action_parameters(args_xml, version);
digest = calculate_operation_digest(args_xml, version);
#if 0
if (level < get_crm_log_level()
&& op->interval_ms == 0 && crm_str_eq(op->op_type, CRMD_ACTION_START, TRUE)) {
char *digest_source = dump_xml_unformatted(args_xml);
do_crm_log(level, "Calculated digest %s for %s (%s). Source: %s\n",
digest, ID(update), magic, digest_source);
free(digest_source);
}
#endif
crm_xml_add(update, XML_LRM_ATTR_OP_DIGEST, digest);
free_xml(args_xml);
free(digest);
}
int
rsc_op_expected_rc(lrmd_event_data_t * op)
{
int rc = 0;
if (op && op->user_data) {
- int dummy = 0;
- char *uuid = NULL;
-
- decode_transition_key(op->user_data, &uuid, &dummy, &dummy, &rc);
- free(uuid);
+ decode_transition_key(op->user_data, NULL, NULL, NULL, &rc);
}
return rc;
}
gboolean
did_rsc_op_fail(lrmd_event_data_t * op, int target_rc)
{
switch (op->op_status) {
case PCMK_LRM_OP_CANCELLED:
case PCMK_LRM_OP_PENDING:
return FALSE;
break;
case PCMK_LRM_OP_NOTSUPPORTED:
case PCMK_LRM_OP_TIMEOUT:
case PCMK_LRM_OP_ERROR:
return TRUE;
break;
default:
if (target_rc != op->rc) {
return TRUE;
}
}
return FALSE;
}
/*!
* \brief Create a CIB XML element for an operation
*
* \param[in] parent If not NULL, make new XML node a child of this one
* \param[in] prefix Generate an ID using this prefix
* \param[in] task Operation task to set
* \param[in] interval_spec Operation interval to set
* \param[in] timeout If not NULL, operation timeout to set
*
* \return New XML object on success, NULL otherwise
*/
xmlNode *
crm_create_op_xml(xmlNode *parent, const char *prefix, const char *task,
const char *interval_spec, const char *timeout)
{
xmlNode *xml_op;
CRM_CHECK(prefix && task && interval_spec, return NULL);
xml_op = create_xml_node(parent, XML_ATTR_OP);
crm_xml_set_id(xml_op, "%s-%s-%s", prefix, task, interval_spec);
crm_xml_add(xml_op, XML_LRM_ATTR_INTERVAL, interval_spec);
crm_xml_add(xml_op, "name", task);
if (timeout) {
crm_xml_add(xml_op, XML_ATTR_TIMEOUT, timeout);
}
return xml_op;
}
xmlNode *
create_operation_update(xmlNode * parent, lrmd_event_data_t * op, const char * caller_version,
int target_rc, const char * node, const char * origin, int level)
{
char *key = NULL;
char *magic = NULL;
char *op_id = NULL;
char *op_id_additional = NULL;
char *local_user_data = NULL;
const char *exit_reason = NULL;
xmlNode *xml_op = NULL;
const char *task = NULL;
CRM_CHECK(op != NULL, return NULL);
do_crm_log(level, "%s: Updating resource %s after %s op %s (interval=%u)",
origin, op->rsc_id, op->op_type, services_lrm_status_str(op->op_status),
op->interval_ms);
crm_trace("DC version: %s", caller_version);
task = op->op_type;
/* Record a successful reload as a start, and a failed reload as a monitor,
* to make life easier for the scheduler when determining the current state.
*/
if (crm_str_eq(task, "reload", TRUE)) {
if (op->op_status == PCMK_LRM_OP_DONE) {
task = CRMD_ACTION_START;
} else {
task = CRMD_ACTION_STATUS;
}
}
key = generate_op_key(op->rsc_id, task, op->interval_ms);
if (crm_str_eq(task, CRMD_ACTION_NOTIFY, TRUE)) {
const char *n_type = crm_meta_value(op->params, "notify_type");
const char *n_task = crm_meta_value(op->params, "notify_operation");
CRM_LOG_ASSERT(n_type != NULL);
CRM_LOG_ASSERT(n_task != NULL);
op_id = generate_notify_key(op->rsc_id, n_type, n_task);
if (op->op_status != PCMK_LRM_OP_PENDING) {
/* Ignore notify errors.
*
* @TODO It might be better to keep the correct result here, and
* ignore it in process_graph_event().
*/
op->op_status = PCMK_LRM_OP_DONE;
op->rc = 0;
}
} else if (did_rsc_op_fail(op, target_rc)) {
op_id = generate_op_key(op->rsc_id, "last_failure", 0);
if (op->interval_ms == 0) {
// Ensure 'last' gets updated, in case record-pending is true
op_id_additional = generate_op_key(op->rsc_id, "last", 0);
}
exit_reason = op->exit_reason;
} else if (op->interval_ms > 0) {
op_id = strdup(key);
} else {
op_id = generate_op_key(op->rsc_id, "last", 0);
}
again:
xml_op = find_entity(parent, XML_LRM_TAG_RSC_OP, op_id);
if (xml_op == NULL) {
xml_op = create_xml_node(parent, XML_LRM_TAG_RSC_OP);
}
if (op->user_data == NULL) {
crm_debug("Generating fake transition key for: " CRM_OP_FMT " %d from %s",
op->rsc_id, op->op_type, op->interval_ms,
op->call_id, origin);
local_user_data = generate_transition_key(-1, op->call_id, target_rc, FAKE_TE_ID);
op->user_data = local_user_data;
}
if(magic == NULL) {
magic = generate_transition_magic(op->user_data, op->op_status, op->rc);
}
crm_xml_add(xml_op, XML_ATTR_ID, op_id);
crm_xml_add(xml_op, XML_LRM_ATTR_TASK_KEY, key);
crm_xml_add(xml_op, XML_LRM_ATTR_TASK, task);
crm_xml_add(xml_op, XML_ATTR_ORIGIN, origin);
crm_xml_add(xml_op, XML_ATTR_CRM_VERSION, caller_version);
crm_xml_add(xml_op, XML_ATTR_TRANSITION_KEY, op->user_data);
crm_xml_add(xml_op, XML_ATTR_TRANSITION_MAGIC, magic);
crm_xml_add(xml_op, XML_LRM_ATTR_EXIT_REASON, exit_reason == NULL ? "" : exit_reason);
crm_xml_add(xml_op, XML_LRM_ATTR_TARGET, node); /* For context during triage */
crm_xml_add_int(xml_op, XML_LRM_ATTR_CALLID, op->call_id);
crm_xml_add_int(xml_op, XML_LRM_ATTR_RC, op->rc);
crm_xml_add_int(xml_op, XML_LRM_ATTR_OPSTATUS, op->op_status);
crm_xml_add_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, op->interval_ms);
if (compare_version("2.1", caller_version) <= 0) {
if (op->t_run || op->t_rcchange || op->exec_time || op->queue_time) {
crm_trace("Timing data (" CRM_OP_FMT "): last=%u change=%u exec=%u queue=%u",
op->rsc_id, op->op_type, op->interval_ms,
op->t_run, op->t_rcchange, op->exec_time, op->queue_time);
if (op->interval_ms == 0) {
/* The values are the same for non-recurring ops */
crm_xml_add_int(xml_op, XML_RSC_OP_LAST_RUN, op->t_run);
crm_xml_add_int(xml_op, XML_RSC_OP_LAST_CHANGE, op->t_run);
} else if(op->t_rcchange) {
/* last-run is not accurate for recurring ops */
crm_xml_add_int(xml_op, XML_RSC_OP_LAST_CHANGE, op->t_rcchange);
} else {
/* ...but is better than nothing otherwise */
crm_xml_add_int(xml_op, XML_RSC_OP_LAST_CHANGE, op->t_run);
}
crm_xml_add_int(xml_op, XML_RSC_OP_T_EXEC, op->exec_time);
crm_xml_add_int(xml_op, XML_RSC_OP_T_QUEUE, op->queue_time);
}
}
if (crm_str_eq(op->op_type, CRMD_ACTION_MIGRATE, TRUE)
|| crm_str_eq(op->op_type, CRMD_ACTION_MIGRATED, TRUE)) {
/*
* Record migrate_source and migrate_target always for migrate ops.
*/
const char *name = XML_LRM_ATTR_MIGRATE_SOURCE;
crm_xml_add(xml_op, name, crm_meta_value(op->params, name));
name = XML_LRM_ATTR_MIGRATE_TARGET;
crm_xml_add(xml_op, name, crm_meta_value(op->params, name));
}
append_digest(op, xml_op, caller_version, magic, LOG_DEBUG);
if (op_id_additional) {
free(op_id);
op_id = op_id_additional;
op_id_additional = NULL;
goto again;
}
if (local_user_data) {
free(local_user_data);
op->user_data = NULL;
}
free(magic);
free(op_id);
free(key);
return xml_op;
}
/*!
* \brief Check whether an operation requires resource agent meta-data
*
* \param[in] rsc_class Resource agent class (or NULL to skip class check)
* \param[in] op Operation action (or NULL to skip op check)
*
* \return TRUE if operation needs meta-data, FALSE otherwise
* \note At least one of rsc_class and op must be specified.
*/
bool
crm_op_needs_metadata(const char *rsc_class, const char *op)
{
/* Agent meta-data is used to determine whether a reload is possible, and to
* evaluate versioned parameters -- so if this op is not relevant to those
* features, we don't need the meta-data.
*/
CRM_CHECK(rsc_class || op, return FALSE);
if (rsc_class
&& is_not_set(pcmk_get_ra_caps(rsc_class), pcmk_ra_cap_params)) {
/* Meta-data is only needed for resource classes that use parameters */
return FALSE;
}
/* Meta-data is only needed for these actions */
if (op
&& strcmp(op, CRMD_ACTION_START)
&& strcmp(op, CRMD_ACTION_STATUS)
&& strcmp(op, CRMD_ACTION_PROMOTE)
&& strcmp(op, CRMD_ACTION_DEMOTE)
&& strcmp(op, CRMD_ACTION_RELOAD)
&& strcmp(op, CRMD_ACTION_MIGRATE)
&& strcmp(op, CRMD_ACTION_MIGRATED)
&& strcmp(op, CRMD_ACTION_NOTIFY)) {
return FALSE;
}
return TRUE;
}
diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c
index 7a4e04c960..02cef2c3fd 100644
--- a/lib/pengine/unpack.c
+++ b/lib/pengine/unpack.c
@@ -1,3529 +1,3525 @@
/*
* Copyright 2004-2019 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
CRM_TRACE_INIT_DATA(pe_status);
#define set_config_flag(data_set, option, flag) do { \
const char *tmp = pe_pref(data_set->config_hash, option); \
if(tmp) { \
if(crm_is_true(tmp)) { \
set_bit(data_set->flags, flag); \
} else { \
clear_bit(data_set->flags, flag); \
} \
} \
} while(0)
gboolean unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op, xmlNode ** last_failure,
enum action_fail_response *failed, pe_working_set_t * data_set);
static gboolean determine_remote_online_status(pe_working_set_t * data_set, node_t * this_node);
// Bitmask for warnings we only want to print once
uint32_t pe_wo = 0;
static gboolean
is_dangling_guest_node(node_t *node)
{
/* we are looking for a remote-node that was supposed to be mapped to a
* container resource, but all traces of that container have disappeared
* from both the config and the status section. */
if (pe__is_guest_or_remote_node(node) &&
node->details->remote_rsc &&
node->details->remote_rsc->container == NULL &&
is_set(node->details->remote_rsc->flags, pe_rsc_orphan_container_filler)) {
return TRUE;
}
return FALSE;
}
/*!
* \brief Schedule a fence action for a node
*
* \param[in,out] data_set Current working set of cluster
* \param[in,out] node Node to fence
* \param[in] reason Text description of why fencing is needed
*/
void
pe_fence_node(pe_working_set_t * data_set, node_t * node, const char *reason)
{
CRM_CHECK(node, return);
/* A guest node is fenced by marking its container as failed */
if (pe__is_guest_node(node)) {
resource_t *rsc = node->details->remote_rsc->container;
if (is_set(rsc->flags, pe_rsc_failed) == FALSE) {
if (!is_set(rsc->flags, pe_rsc_managed)) {
crm_notice("Not fencing guest node %s "
"(otherwise would because %s): "
"its guest resource %s is unmanaged",
node->details->uname, reason, rsc->id);
} else {
crm_warn("Guest node %s will be fenced "
"(by recovering its guest resource %s): %s",
node->details->uname, rsc->id, reason);
/* We don't mark the node as unclean because that would prevent the
* node from running resources. We want to allow it to run resources
* in this transition if the recovery succeeds.
*/
node->details->remote_requires_reset = TRUE;
set_bit(rsc->flags, pe_rsc_failed);
}
}
} else if (is_dangling_guest_node(node)) {
crm_info("Cleaning up dangling connection for guest node %s: "
"fencing was already done because %s, "
"and guest resource no longer exists",
node->details->uname, reason);
set_bit(node->details->remote_rsc->flags, pe_rsc_failed);
} else if (pe__is_remote_node(node)) {
resource_t *rsc = node->details->remote_rsc;
if (rsc && (!is_set(rsc->flags, pe_rsc_managed))) {
crm_notice("Not fencing remote node %s "
"(otherwise would because %s): connection is unmanaged",
node->details->uname, reason);
} else if(node->details->remote_requires_reset == FALSE) {
node->details->remote_requires_reset = TRUE;
crm_warn("Remote node %s %s: %s",
node->details->uname,
pe_can_fence(data_set, node)? "will be fenced" : "is unclean",
reason);
}
node->details->unclean = TRUE;
pe_fence_op(node, NULL, TRUE, reason, data_set);
} else if (node->details->unclean) {
crm_trace("Cluster node %s %s because %s",
node->details->uname,
pe_can_fence(data_set, node)? "would also be fenced" : "also is unclean",
reason);
} else {
crm_warn("Cluster node %s %s: %s",
node->details->uname,
pe_can_fence(data_set, node)? "will be fenced" : "is unclean",
reason);
node->details->unclean = TRUE;
pe_fence_op(node, NULL, TRUE, reason, data_set);
}
}
// @TODO xpaths can't handle templates, rules, or id-refs
// nvpair with provides or requires set to unfencing
#define XPATH_UNFENCING_NVPAIR XML_CIB_TAG_NVPAIR \
"[(@" XML_NVPAIR_ATTR_NAME "='" XML_RSC_ATTR_PROVIDES "'" \
"or @" XML_NVPAIR_ATTR_NAME "='" XML_RSC_ATTR_REQUIRES "') " \
"and @" XML_NVPAIR_ATTR_VALUE "='unfencing']"
// unfencing in rsc_defaults or any resource
#define XPATH_ENABLE_UNFENCING \
"/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_RESOURCES \
"//" XML_TAG_META_SETS "/" XPATH_UNFENCING_NVPAIR \
"|/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_RSCCONFIG \
"/" XML_TAG_META_SETS "/" XPATH_UNFENCING_NVPAIR
static
void set_if_xpath(unsigned long long flag, const char *xpath,
pe_working_set_t *data_set)
{
xmlXPathObjectPtr result = NULL;
if (is_not_set(data_set->flags, flag)) {
result = xpath_search(data_set->input, xpath);
if (result && (numXpathResults(result) > 0)) {
set_bit(data_set->flags, flag);
}
freeXpathObject(result);
}
}
gboolean
unpack_config(xmlNode * config, pe_working_set_t * data_set)
{
const char *value = NULL;
GHashTable *config_hash = crm_str_table_new();
data_set->config_hash = config_hash;
unpack_instance_attributes(data_set->input, config, XML_CIB_TAG_PROPSET, NULL, config_hash,
CIB_OPTIONS_FIRST, FALSE, data_set->now);
verify_pe_options(data_set->config_hash);
set_config_flag(data_set, "enable-startup-probes", pe_flag_startup_probes);
if(is_not_set(data_set->flags, pe_flag_startup_probes)) {
crm_info("Startup probes: disabled (dangerous)");
}
value = pe_pref(data_set->config_hash, XML_ATTR_HAVE_WATCHDOG);
if (value && crm_is_true(value)) {
crm_notice("Watchdog will be used via SBD if fencing is required");
set_bit(data_set->flags, pe_flag_have_stonith_resource);
}
/* Set certain flags via xpath here, so they can be used before the relevant
* configuration sections are unpacked.
*/
set_if_xpath(pe_flag_enable_unfencing, XPATH_ENABLE_UNFENCING, data_set);
value = pe_pref(data_set->config_hash, "stonith-timeout");
data_set->stonith_timeout = crm_get_msec(value);
crm_debug("STONITH timeout: %d", data_set->stonith_timeout);
set_config_flag(data_set, "stonith-enabled", pe_flag_stonith_enabled);
crm_debug("STONITH of failed nodes is %s",
is_set(data_set->flags, pe_flag_stonith_enabled) ? "enabled" : "disabled");
data_set->stonith_action = pe_pref(data_set->config_hash, "stonith-action");
if (!strcmp(data_set->stonith_action, "poweroff")) {
pe_warn_once(pe_wo_poweroff,
"Support for stonith-action of 'poweroff' is deprecated "
"and will be removed in a future release (use 'off' instead)");
data_set->stonith_action = "off";
}
crm_trace("STONITH will %s nodes", data_set->stonith_action);
set_config_flag(data_set, "concurrent-fencing", pe_flag_concurrent_fencing);
crm_debug("Concurrent fencing is %s",
is_set(data_set->flags, pe_flag_concurrent_fencing) ? "enabled" : "disabled");
set_config_flag(data_set, "stop-all-resources", pe_flag_stop_everything);
crm_debug("Stop all active resources: %s",
is_set(data_set->flags, pe_flag_stop_everything) ? "true" : "false");
set_config_flag(data_set, "symmetric-cluster", pe_flag_symmetric_cluster);
if (is_set(data_set->flags, pe_flag_symmetric_cluster)) {
crm_debug("Cluster is symmetric" " - resources can run anywhere by default");
}
value = pe_pref(data_set->config_hash, "no-quorum-policy");
if (safe_str_eq(value, "ignore")) {
data_set->no_quorum_policy = no_quorum_ignore;
} else if (safe_str_eq(value, "freeze")) {
data_set->no_quorum_policy = no_quorum_freeze;
} else if (safe_str_eq(value, "suicide")) {
if (is_set(data_set->flags, pe_flag_stonith_enabled)) {
int do_panic = 0;
crm_element_value_int(data_set->input, XML_ATTR_QUORUM_PANIC,
&do_panic);
if (do_panic || is_set(data_set->flags, pe_flag_have_quorum)) {
data_set->no_quorum_policy = no_quorum_suicide;
} else {
crm_notice("Resetting no-quorum-policy to 'stop': cluster has never had quorum");
data_set->no_quorum_policy = no_quorum_stop;
}
} else {
crm_config_err("Resetting no-quorum-policy to 'stop': stonith is not configured");
data_set->no_quorum_policy = no_quorum_stop;
}
} else {
data_set->no_quorum_policy = no_quorum_stop;
}
switch (data_set->no_quorum_policy) {
case no_quorum_freeze:
crm_debug("On loss of quorum: Freeze resources");
break;
case no_quorum_stop:
crm_debug("On loss of quorum: Stop ALL resources");
break;
case no_quorum_suicide:
crm_notice("On loss of quorum: Fence all remaining nodes");
break;
case no_quorum_ignore:
crm_notice("On loss of quorum: Ignore");
break;
}
set_config_flag(data_set, "stop-orphan-resources", pe_flag_stop_rsc_orphans);
crm_trace("Orphan resources are %s",
is_set(data_set->flags, pe_flag_stop_rsc_orphans) ? "stopped" : "ignored");
set_config_flag(data_set, "stop-orphan-actions", pe_flag_stop_action_orphans);
crm_trace("Orphan resource actions are %s",
is_set(data_set->flags, pe_flag_stop_action_orphans) ? "stopped" : "ignored");
set_config_flag(data_set, "remove-after-stop", pe_flag_remove_after_stop);
crm_trace("Stopped resources are removed from the status section: %s",
is_set(data_set->flags, pe_flag_remove_after_stop) ? "true" : "false");
set_config_flag(data_set, "maintenance-mode", pe_flag_maintenance_mode);
crm_trace("Maintenance mode: %s",
is_set(data_set->flags, pe_flag_maintenance_mode) ? "true" : "false");
set_config_flag(data_set, "start-failure-is-fatal", pe_flag_start_failure_fatal);
crm_trace("Start failures are %s",
is_set(data_set->flags,
pe_flag_start_failure_fatal) ? "always fatal" : "handled by failcount");
if (is_set(data_set->flags, pe_flag_stonith_enabled)) {
set_config_flag(data_set, "startup-fencing", pe_flag_startup_fencing);
}
if (is_set(data_set->flags, pe_flag_startup_fencing)) {
crm_trace("Unseen nodes will be fenced");
} else {
pe_warn_once(pe_wo_blind, "Blind faith: not fencing unseen nodes");
}
node_score_red = char2score(pe_pref(data_set->config_hash, "node-health-red"));
node_score_green = char2score(pe_pref(data_set->config_hash, "node-health-green"));
node_score_yellow = char2score(pe_pref(data_set->config_hash, "node-health-yellow"));
crm_debug("Node scores: 'red' = %s, 'yellow' = %s, 'green' = %s",
pe_pref(data_set->config_hash, "node-health-red"),
pe_pref(data_set->config_hash, "node-health-yellow"),
pe_pref(data_set->config_hash, "node-health-green"));
data_set->placement_strategy = pe_pref(data_set->config_hash, "placement-strategy");
crm_trace("Placement strategy: %s", data_set->placement_strategy);
return TRUE;
}
static void
destroy_digest_cache(gpointer ptr)
{
op_digest_cache_t *data = ptr;
free_xml(data->params_all);
free_xml(data->params_secure);
free_xml(data->params_restart);
free(data->digest_all_calc);
free(data->digest_restart_calc);
free(data->digest_secure_calc);
free(data);
}
node_t *
pe_create_node(const char *id, const char *uname, const char *type,
const char *score, pe_working_set_t * data_set)
{
node_t *new_node = NULL;
if (pe_find_node(data_set->nodes, uname) != NULL) {
crm_config_warn("Detected multiple node entries with uname=%s"
" - this is rarely intended", uname);
}
new_node = calloc(1, sizeof(node_t));
if (new_node == NULL) {
return NULL;
}
new_node->weight = char2score(score);
new_node->fixed = FALSE;
new_node->details = calloc(1, sizeof(struct pe_node_shared_s));
if (new_node->details == NULL) {
free(new_node);
return NULL;
}
crm_trace("Creating node for entry %s/%s", uname, id);
new_node->details->id = id;
new_node->details->uname = uname;
new_node->details->online = FALSE;
new_node->details->shutdown = FALSE;
new_node->details->rsc_discovery_enabled = TRUE;
new_node->details->running_rsc = NULL;
new_node->details->type = node_ping;
if (safe_str_eq(type, "remote")) {
new_node->details->type = node_remote;
set_bit(data_set->flags, pe_flag_have_remote_nodes);
} else if ((type == NULL) || safe_str_eq(type, "member")) {
new_node->details->type = node_member;
}
new_node->details->attrs = crm_str_table_new();
if (pe__is_guest_or_remote_node(new_node)) {
g_hash_table_insert(new_node->details->attrs, strdup(CRM_ATTR_KIND),
strdup("remote"));
} else {
g_hash_table_insert(new_node->details->attrs, strdup(CRM_ATTR_KIND),
strdup("cluster"));
}
new_node->details->utilization = crm_str_table_new();
new_node->details->digest_cache = g_hash_table_new_full(crm_str_hash,
g_str_equal, free,
destroy_digest_cache);
data_set->nodes = g_list_insert_sorted(data_set->nodes, new_node, sort_node_uname);
return new_node;
}
bool
remote_id_conflict(const char *remote_name, pe_working_set_t *data)
{
bool match = FALSE;
#if 1
pe_find_resource(data->resources, remote_name);
#else
if (data->name_check == NULL) {
data->name_check = g_hash_table_new(crm_str_hash, g_str_equal);
for (xml_rsc = __xml_first_child(parent); xml_rsc != NULL; xml_rsc = __xml_next_element(xml_rsc)) {
const char *id = ID(xml_rsc);
/* avoiding heap allocation here because we know the duration of this hashtable allows us to */
g_hash_table_insert(data->name_check, (char *) id, (char *) id);
}
}
if (g_hash_table_lookup(data->name_check, remote_name)) {
match = TRUE;
}
#endif
if (match) {
crm_err("Invalid remote-node name, a resource called '%s' already exists.", remote_name);
return NULL;
}
return match;
}
static const char *
expand_remote_rsc_meta(xmlNode *xml_obj, xmlNode *parent, pe_working_set_t *data)
{
xmlNode *attr_set = NULL;
xmlNode *attr = NULL;
const char *container_id = ID(xml_obj);
const char *remote_name = NULL;
const char *remote_server = NULL;
const char *remote_port = NULL;
const char *connect_timeout = "60s";
const char *remote_allow_migrate=NULL;
const char *is_managed = NULL;
for (attr_set = __xml_first_child(xml_obj); attr_set != NULL; attr_set = __xml_next_element(attr_set)) {
if (safe_str_neq((const char *)attr_set->name, XML_TAG_META_SETS)) {
continue;
}
for (attr = __xml_first_child(attr_set); attr != NULL; attr = __xml_next_element(attr)) {
const char *value = crm_element_value(attr, XML_NVPAIR_ATTR_VALUE);
const char *name = crm_element_value(attr, XML_NVPAIR_ATTR_NAME);
if (safe_str_eq(name, XML_RSC_ATTR_REMOTE_NODE)) {
remote_name = value;
} else if (safe_str_eq(name, "remote-addr")) {
remote_server = value;
} else if (safe_str_eq(name, "remote-port")) {
remote_port = value;
} else if (safe_str_eq(name, "remote-connect-timeout")) {
connect_timeout = value;
} else if (safe_str_eq(name, "remote-allow-migrate")) {
remote_allow_migrate=value;
} else if (safe_str_eq(name, XML_RSC_ATTR_MANAGED)) {
is_managed = value;
}
}
}
if (remote_name == NULL) {
return NULL;
}
if (remote_id_conflict(remote_name, data)) {
return NULL;
}
pe_create_remote_xml(parent, remote_name, container_id,
remote_allow_migrate, is_managed,
connect_timeout, remote_server, remote_port);
return remote_name;
}
static void
handle_startup_fencing(pe_working_set_t *data_set, node_t *new_node)
{
if ((new_node->details->type == node_remote) && (new_node->details->remote_rsc == NULL)) {
/* Ignore fencing for remote nodes that don't have a connection resource
* associated with them. This happens when remote node entries get left
* in the nodes section after the connection resource is removed.
*/
return;
}
if (is_set(data_set->flags, pe_flag_startup_fencing)) {
// All nodes are unclean until we've seen their status entry
new_node->details->unclean = TRUE;
} else {
// Blind faith ...
new_node->details->unclean = FALSE;
}
/* We need to be able to determine if a node's status section
* exists or not separate from whether the node is unclean. */
new_node->details->unseen = TRUE;
}
gboolean
unpack_nodes(xmlNode * xml_nodes, pe_working_set_t * data_set)
{
xmlNode *xml_obj = NULL;
node_t *new_node = NULL;
const char *id = NULL;
const char *uname = NULL;
const char *type = NULL;
const char *score = NULL;
for (xml_obj = __xml_first_child(xml_nodes); xml_obj != NULL; xml_obj = __xml_next_element(xml_obj)) {
if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_NODE, TRUE)) {
new_node = NULL;
id = crm_element_value(xml_obj, XML_ATTR_ID);
uname = crm_element_value(xml_obj, XML_ATTR_UNAME);
type = crm_element_value(xml_obj, XML_ATTR_TYPE);
score = crm_element_value(xml_obj, XML_RULE_ATTR_SCORE);
crm_trace("Processing node %s/%s", uname, id);
if (id == NULL) {
crm_config_err("Must specify id tag in ");
continue;
}
new_node = pe_create_node(id, uname, type, score, data_set);
if (new_node == NULL) {
return FALSE;
}
/* if(data_set->have_quorum == FALSE */
/* && data_set->no_quorum_policy == no_quorum_stop) { */
/* /\* start shutting resources down *\/ */
/* new_node->weight = -INFINITY; */
/* } */
handle_startup_fencing(data_set, new_node);
add_node_attrs(xml_obj, new_node, FALSE, data_set);
unpack_instance_attributes(data_set->input, xml_obj, XML_TAG_UTILIZATION, NULL,
new_node->details->utilization, NULL, FALSE, data_set->now);
crm_trace("Done with node %s", crm_element_value(xml_obj, XML_ATTR_UNAME));
}
}
if (data_set->localhost && pe_find_node(data_set->nodes, data_set->localhost) == NULL) {
crm_info("Creating a fake local node");
pe_create_node(data_set->localhost, data_set->localhost, NULL, 0,
data_set);
}
return TRUE;
}
static void
setup_container(resource_t * rsc, pe_working_set_t * data_set)
{
const char *container_id = NULL;
if (rsc->children) {
GListPtr gIter = rsc->children;
for (; gIter != NULL; gIter = gIter->next) {
resource_t *child_rsc = (resource_t *) gIter->data;
setup_container(child_rsc, data_set);
}
return;
}
container_id = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_CONTAINER);
if (container_id && safe_str_neq(container_id, rsc->id)) {
resource_t *container = pe_find_resource(data_set->resources, container_id);
if (container) {
rsc->container = container;
set_bit(container->flags, pe_rsc_is_container);
container->fillers = g_list_append(container->fillers, rsc);
pe_rsc_trace(rsc, "Resource %s's container is %s", rsc->id, container_id);
} else {
pe_err("Resource %s: Unknown resource container (%s)", rsc->id, container_id);
}
}
}
gboolean
unpack_remote_nodes(xmlNode * xml_resources, pe_working_set_t * data_set)
{
xmlNode *xml_obj = NULL;
/* Create remote nodes and guest nodes from the resource configuration
* before unpacking resources.
*/
for (xml_obj = __xml_first_child(xml_resources); xml_obj != NULL; xml_obj = __xml_next_element(xml_obj)) {
const char *new_node_id = NULL;
/* Check for remote nodes, which are defined by ocf:pacemaker:remote
* primitives.
*/
if (xml_contains_remote_node(xml_obj)) {
new_node_id = ID(xml_obj);
/* The "pe_find_node" check is here to make sure we don't iterate over
* an expanded node that has already been added to the node list. */
if (new_node_id && pe_find_node(data_set->nodes, new_node_id) == NULL) {
crm_trace("Found remote node %s defined by resource %s",
new_node_id, ID(xml_obj));
pe_create_node(new_node_id, new_node_id, "remote", NULL,
data_set);
}
continue;
}
/* Check for guest nodes, which are defined by special meta-attributes
* of a primitive of any type (for example, VirtualDomain or Xen).
*/
if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_RESOURCE, TRUE)) {
/* This will add an ocf:pacemaker:remote primitive to the
* configuration for the guest node's connection, to be unpacked
* later.
*/
new_node_id = expand_remote_rsc_meta(xml_obj, xml_resources, data_set);
if (new_node_id && pe_find_node(data_set->nodes, new_node_id) == NULL) {
crm_trace("Found guest node %s in resource %s",
new_node_id, ID(xml_obj));
pe_create_node(new_node_id, new_node_id, "remote", NULL,
data_set);
}
continue;
}
/* Check for guest nodes inside a group. Clones are currently not
* supported as guest nodes.
*/
if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_GROUP, TRUE)) {
xmlNode *xml_obj2 = NULL;
for (xml_obj2 = __xml_first_child(xml_obj); xml_obj2 != NULL; xml_obj2 = __xml_next_element(xml_obj2)) {
new_node_id = expand_remote_rsc_meta(xml_obj2, xml_resources, data_set);
if (new_node_id && pe_find_node(data_set->nodes, new_node_id) == NULL) {
crm_trace("Found guest node %s in resource %s inside group %s",
new_node_id, ID(xml_obj2), ID(xml_obj));
pe_create_node(new_node_id, new_node_id, "remote", NULL,
data_set);
}
}
}
}
return TRUE;
}
/* Call this after all the nodes and resources have been
* unpacked, but before the status section is read.
*
* A remote node's online status is reflected by the state
* of the remote node's connection resource. We need to link
* the remote node to this connection resource so we can have
* easy access to the connection resource during the PE calculations.
*/
static void
link_rsc2remotenode(pe_working_set_t *data_set, resource_t *new_rsc)
{
node_t *remote_node = NULL;
if (new_rsc->is_remote_node == FALSE) {
return;
}
if (is_set(data_set->flags, pe_flag_quick_location)) {
/* remote_nodes and remote_resources are not linked in quick location calculations */
return;
}
print_resource(LOG_TRACE, "Linking remote-node connection resource, ", new_rsc, FALSE);
remote_node = pe_find_node(data_set->nodes, new_rsc->id);
CRM_CHECK(remote_node != NULL, return;);
remote_node->details->remote_rsc = new_rsc;
if (new_rsc->container == NULL) {
/* Handle start-up fencing for remote nodes (as opposed to guest nodes)
* the same as is done for cluster nodes.
*/
handle_startup_fencing(data_set, remote_node);
} else {
/* pe_create_node() marks the new node as "remote" or "cluster"; now
* that we know the node is a guest node, update it correctly.
*/
g_hash_table_replace(remote_node->details->attrs, strdup(CRM_ATTR_KIND),
strdup("container"));
}
}
static void
destroy_tag(gpointer data)
{
tag_t *tag = data;
if (tag) {
free(tag->id);
g_list_free_full(tag->refs, free);
free(tag);
}
}
/*!
* \internal
* \brief Parse configuration XML for resource information
*
* \param[in] xml_resources Top of resource configuration XML
* \param[in,out] data_set Where to put resource information
*
* \return TRUE
*
* \note unpack_remote_nodes() MUST be called before this, so that the nodes can
* be used when common_unpack() calls resource_location()
*/
gboolean
unpack_resources(xmlNode * xml_resources, pe_working_set_t * data_set)
{
xmlNode *xml_obj = NULL;
GListPtr gIter = NULL;
data_set->template_rsc_sets = g_hash_table_new_full(crm_str_hash,
g_str_equal, free,
destroy_tag);
for (xml_obj = __xml_first_child(xml_resources); xml_obj != NULL; xml_obj = __xml_next_element(xml_obj)) {
resource_t *new_rsc = NULL;
if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_RSC_TEMPLATE, TRUE)) {
const char *template_id = ID(xml_obj);
if (template_id && g_hash_table_lookup_extended(data_set->template_rsc_sets,
template_id, NULL, NULL) == FALSE) {
/* Record the template's ID for the knowledge of its existence anyway. */
g_hash_table_insert(data_set->template_rsc_sets, strdup(template_id), NULL);
}
continue;
}
crm_trace("Beginning unpack... <%s id=%s... >", crm_element_name(xml_obj), ID(xml_obj));
if (common_unpack(xml_obj, &new_rsc, NULL, data_set)) {
data_set->resources = g_list_append(data_set->resources, new_rsc);
print_resource(LOG_TRACE, "Added ", new_rsc, FALSE);
} else {
crm_config_err("Failed unpacking %s %s",
crm_element_name(xml_obj), crm_element_value(xml_obj, XML_ATTR_ID));
if (new_rsc != NULL && new_rsc->fns != NULL) {
new_rsc->fns->free(new_rsc);
}
}
}
for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
resource_t *rsc = (resource_t *) gIter->data;
setup_container(rsc, data_set);
link_rsc2remotenode(data_set, rsc);
}
data_set->resources = g_list_sort(data_set->resources, sort_rsc_priority);
if (is_set(data_set->flags, pe_flag_quick_location)) {
/* Ignore */
} else if (is_set(data_set->flags, pe_flag_stonith_enabled)
&& is_set(data_set->flags, pe_flag_have_stonith_resource) == FALSE) {
crm_config_err("Resource start-up disabled since no STONITH resources have been defined");
crm_config_err("Either configure some or disable STONITH with the stonith-enabled option");
crm_config_err("NOTE: Clusters with shared data need STONITH to ensure data integrity");
}
return TRUE;
}
gboolean
unpack_tags(xmlNode * xml_tags, pe_working_set_t * data_set)
{
xmlNode *xml_tag = NULL;
data_set->tags = g_hash_table_new_full(crm_str_hash, g_str_equal, free,
destroy_tag);
for (xml_tag = __xml_first_child(xml_tags); xml_tag != NULL; xml_tag = __xml_next_element(xml_tag)) {
xmlNode *xml_obj_ref = NULL;
const char *tag_id = ID(xml_tag);
if (crm_str_eq((const char *)xml_tag->name, XML_CIB_TAG_TAG, TRUE) == FALSE) {
continue;
}
if (tag_id == NULL) {
crm_config_err("Failed unpacking %s: %s should be specified",
crm_element_name(xml_tag), XML_ATTR_ID);
continue;
}
for (xml_obj_ref = __xml_first_child(xml_tag); xml_obj_ref != NULL; xml_obj_ref = __xml_next_element(xml_obj_ref)) {
const char *obj_ref = ID(xml_obj_ref);
if (crm_str_eq((const char *)xml_obj_ref->name, XML_CIB_TAG_OBJ_REF, TRUE) == FALSE) {
continue;
}
if (obj_ref == NULL) {
crm_config_err("Failed unpacking %s for tag %s: %s should be specified",
crm_element_name(xml_obj_ref), tag_id, XML_ATTR_ID);
continue;
}
if (add_tag_ref(data_set->tags, tag_id, obj_ref) == FALSE) {
return FALSE;
}
}
}
return TRUE;
}
/* The ticket state section:
* "/cib/status/tickets/ticket_state" */
static gboolean
unpack_ticket_state(xmlNode * xml_ticket, pe_working_set_t * data_set)
{
const char *ticket_id = NULL;
const char *granted = NULL;
const char *last_granted = NULL;
const char *standby = NULL;
xmlAttrPtr xIter = NULL;
ticket_t *ticket = NULL;
ticket_id = ID(xml_ticket);
if (ticket_id == NULL || strlen(ticket_id) == 0) {
return FALSE;
}
crm_trace("Processing ticket state for %s", ticket_id);
ticket = g_hash_table_lookup(data_set->tickets, ticket_id);
if (ticket == NULL) {
ticket = ticket_new(ticket_id, data_set);
if (ticket == NULL) {
return FALSE;
}
}
for (xIter = xml_ticket->properties; xIter; xIter = xIter->next) {
const char *prop_name = (const char *)xIter->name;
const char *prop_value = crm_element_value(xml_ticket, prop_name);
if (crm_str_eq(prop_name, XML_ATTR_ID, TRUE)) {
continue;
}
g_hash_table_replace(ticket->state, strdup(prop_name), strdup(prop_value));
}
granted = g_hash_table_lookup(ticket->state, "granted");
if (granted && crm_is_true(granted)) {
ticket->granted = TRUE;
crm_info("We have ticket '%s'", ticket->id);
} else {
ticket->granted = FALSE;
crm_info("We do not have ticket '%s'", ticket->id);
}
last_granted = g_hash_table_lookup(ticket->state, "last-granted");
if (last_granted) {
ticket->last_granted = crm_parse_int(last_granted, 0);
}
standby = g_hash_table_lookup(ticket->state, "standby");
if (standby && crm_is_true(standby)) {
ticket->standby = TRUE;
if (ticket->granted) {
crm_info("Granted ticket '%s' is in standby-mode", ticket->id);
}
} else {
ticket->standby = FALSE;
}
crm_trace("Done with ticket state for %s", ticket_id);
return TRUE;
}
static gboolean
unpack_tickets_state(xmlNode * xml_tickets, pe_working_set_t * data_set)
{
xmlNode *xml_obj = NULL;
for (xml_obj = __xml_first_child(xml_tickets); xml_obj != NULL; xml_obj = __xml_next_element(xml_obj)) {
if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_TICKET_STATE, TRUE) == FALSE) {
continue;
}
unpack_ticket_state(xml_obj, data_set);
}
return TRUE;
}
static void
unpack_handle_remote_attrs(node_t *this_node, xmlNode *state, pe_working_set_t * data_set)
{
const char *resource_discovery_enabled = NULL;
xmlNode *attrs = NULL;
resource_t *rsc = NULL;
const char *shutdown = NULL;
if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE) == FALSE) {
return;
}
if ((this_node == NULL) || !pe__is_guest_or_remote_node(this_node)) {
return;
}
crm_trace("Processing remote node id=%s, uname=%s", this_node->details->id, this_node->details->uname);
this_node->details->remote_maintenance =
crm_atoi(crm_element_value(state, XML_NODE_IS_MAINTENANCE), "0");
rsc = this_node->details->remote_rsc;
if (this_node->details->remote_requires_reset == FALSE) {
this_node->details->unclean = FALSE;
this_node->details->unseen = FALSE;
}
attrs = find_xml_node(state, XML_TAG_TRANSIENT_NODEATTRS, FALSE);
add_node_attrs(attrs, this_node, TRUE, data_set);
shutdown = pe_node_attribute_raw(this_node, XML_CIB_ATTR_SHUTDOWN);
if (shutdown != NULL && safe_str_neq("0", shutdown)) {
crm_info("Node %s is shutting down", this_node->details->uname);
this_node->details->shutdown = TRUE;
if (rsc) {
rsc->next_role = RSC_ROLE_STOPPED;
}
}
if (crm_is_true(pe_node_attribute_raw(this_node, "standby"))) {
crm_info("Node %s is in standby-mode", this_node->details->uname);
this_node->details->standby = TRUE;
}
if (crm_is_true(pe_node_attribute_raw(this_node, "maintenance")) ||
(rsc && !is_set(rsc->flags, pe_rsc_managed))) {
crm_info("Node %s is in maintenance-mode", this_node->details->uname);
this_node->details->maintenance = TRUE;
}
resource_discovery_enabled = pe_node_attribute_raw(this_node, XML_NODE_ATTR_RSC_DISCOVERY);
if (resource_discovery_enabled && !crm_is_true(resource_discovery_enabled)) {
if (pe__is_remote_node(this_node)
&& is_not_set(data_set->flags, pe_flag_stonith_enabled)) {
crm_warn("Ignoring %s attribute on remote node %s because stonith is disabled",
XML_NODE_ATTR_RSC_DISCOVERY, this_node->details->uname);
} else {
/* This is either a remote node with fencing enabled, or a guest
* node. We don't care whether fencing is enabled when fencing guest
* nodes, because they are "fenced" by recovering their containing
* resource.
*/
crm_info("Node %s has resource discovery disabled", this_node->details->uname);
this_node->details->rsc_discovery_enabled = FALSE;
}
}
}
static bool
unpack_node_loop(xmlNode * status, bool fence, pe_working_set_t * data_set)
{
bool changed = false;
xmlNode *lrm_rsc = NULL;
for (xmlNode *state = __xml_first_child(status); state != NULL; state = __xml_next_element(state)) {
const char *id = NULL;
const char *uname = NULL;
node_t *this_node = NULL;
bool process = FALSE;
if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE) == FALSE) {
continue;
}
id = crm_element_value(state, XML_ATTR_ID);
uname = crm_element_value(state, XML_ATTR_UNAME);
this_node = pe_find_node_any(data_set->nodes, id, uname);
if (this_node == NULL) {
crm_info("Node %s is unknown", id);
continue;
} else if (this_node->details->unpacked) {
crm_info("Node %s is already processed", id);
continue;
} else if (!pe__is_guest_or_remote_node(this_node)
&& is_set(data_set->flags, pe_flag_stonith_enabled)) {
// A redundant test, but preserves the order for regression tests
process = TRUE;
} else if (pe__is_guest_or_remote_node(this_node)) {
bool check = FALSE;
resource_t *rsc = this_node->details->remote_rsc;
if(fence) {
check = TRUE;
} else if(rsc == NULL) {
/* Not ready yet */
} else if (pe__is_guest_node(this_node)
&& rsc->role == RSC_ROLE_STARTED
&& rsc->container->role == RSC_ROLE_STARTED) {
/* Both the connection and its containing resource need to be
* known to be up before we process resources running in it.
*/
check = TRUE;
crm_trace("Checking node %s/%s/%s status %d/%d/%d", id, rsc->id, rsc->container->id, fence, rsc->role, RSC_ROLE_STARTED);
} else if (!pe__is_guest_node(this_node)
&& rsc->role == RSC_ROLE_STARTED) {
check = TRUE;
crm_trace("Checking node %s/%s status %d/%d/%d", id, rsc->id, fence, rsc->role, RSC_ROLE_STARTED);
}
if (check) {
determine_remote_online_status(data_set, this_node);
unpack_handle_remote_attrs(this_node, state, data_set);
process = TRUE;
}
} else if (this_node->details->online) {
process = TRUE;
} else if (fence) {
process = TRUE;
}
if(process) {
crm_trace("Processing lrm resource entries on %shealthy%s node: %s",
fence?"un":"",
(pe__is_guest_or_remote_node(this_node)? " remote" : ""),
this_node->details->uname);
changed = TRUE;
this_node->details->unpacked = TRUE;
lrm_rsc = find_xml_node(state, XML_CIB_TAG_LRM, FALSE);
lrm_rsc = find_xml_node(lrm_rsc, XML_LRM_TAG_RESOURCES, FALSE);
unpack_lrm_resources(this_node, lrm_rsc, data_set);
}
}
return changed;
}
/* remove nodes that are down, stopping */
/* create positive rsc_to_node constraints between resources and the nodes they are running on */
/* anything else? */
gboolean
unpack_status(xmlNode * status, pe_working_set_t * data_set)
{
const char *id = NULL;
const char *uname = NULL;
xmlNode *state = NULL;
node_t *this_node = NULL;
crm_trace("Beginning unpack");
if (data_set->tickets == NULL) {
data_set->tickets = g_hash_table_new_full(crm_str_hash, g_str_equal,
free, destroy_ticket);
}
for (state = __xml_first_child(status); state != NULL; state = __xml_next_element(state)) {
if (crm_str_eq((const char *)state->name, XML_CIB_TAG_TICKETS, TRUE)) {
unpack_tickets_state((xmlNode *) state, data_set);
} else if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE)) {
xmlNode *attrs = NULL;
const char *resource_discovery_enabled = NULL;
id = crm_element_value(state, XML_ATTR_ID);
uname = crm_element_value(state, XML_ATTR_UNAME);
this_node = pe_find_node_any(data_set->nodes, id, uname);
if (uname == NULL) {
/* error */
continue;
} else if (this_node == NULL) {
crm_config_warn("Node %s in status section no longer exists", uname);
continue;
} else if (pe__is_guest_or_remote_node(this_node)) {
/* online state for remote nodes is determined by the
* rsc state after all the unpacking is done. we do however
* need to mark whether or not the node has been fenced as this plays
* a role during unpacking cluster node resource state */
this_node->details->remote_was_fenced =
crm_atoi(crm_element_value(state, XML_NODE_IS_FENCED), "0");
continue;
}
crm_trace("Processing node id=%s, uname=%s", id, uname);
/* Mark the node as provisionally clean
* - at least we have seen it in the current cluster's lifetime
*/
this_node->details->unclean = FALSE;
this_node->details->unseen = FALSE;
attrs = find_xml_node(state, XML_TAG_TRANSIENT_NODEATTRS, FALSE);
add_node_attrs(attrs, this_node, TRUE, data_set);
if (crm_is_true(pe_node_attribute_raw(this_node, "standby"))) {
crm_info("Node %s is in standby-mode", this_node->details->uname);
this_node->details->standby = TRUE;
}
if (crm_is_true(pe_node_attribute_raw(this_node, "maintenance"))) {
crm_info("Node %s is in maintenance-mode", this_node->details->uname);
this_node->details->maintenance = TRUE;
}
resource_discovery_enabled = pe_node_attribute_raw(this_node, XML_NODE_ATTR_RSC_DISCOVERY);
if (resource_discovery_enabled && !crm_is_true(resource_discovery_enabled)) {
crm_warn("ignoring %s attribute on node %s, disabling resource discovery is not allowed on cluster nodes",
XML_NODE_ATTR_RSC_DISCOVERY, this_node->details->uname);
}
crm_trace("determining node state");
determine_online_status(state, this_node, data_set);
if (is_not_set(data_set->flags, pe_flag_have_quorum)
&& this_node->details->online
&& (data_set->no_quorum_policy == no_quorum_suicide)) {
/* Everything else should flow from this automatically
* At least until the PE becomes able to migrate off healthy resources
*/
pe_fence_node(data_set, this_node, "cluster does not have quorum");
}
}
}
while(unpack_node_loop(status, FALSE, data_set)) {
crm_trace("Start another loop");
}
// Now catch any nodes we didn't see
unpack_node_loop(status, is_set(data_set->flags, pe_flag_stonith_enabled), data_set);
/* Now that we know where resources are, we can schedule stops of containers
* with failed bundle connections
*/
if (data_set->stop_needed != NULL) {
for (GList *item = data_set->stop_needed; item; item = item->next) {
pe_resource_t *container = item->data;
pe_node_t *node = pe__current_node(container);
if (node) {
stop_action(container, node, FALSE);
}
}
g_list_free(data_set->stop_needed);
data_set->stop_needed = NULL;
}
for (GListPtr gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
node_t *this_node = gIter->data;
if (this_node == NULL) {
continue;
} else if (!pe__is_guest_or_remote_node(this_node)) {
continue;
} else if(this_node->details->unpacked) {
continue;
}
determine_remote_online_status(data_set, this_node);
}
return TRUE;
}
static gboolean
determine_online_status_no_fencing(pe_working_set_t * data_set, xmlNode * node_state,
node_t * this_node)
{
gboolean online = FALSE;
const char *join = crm_element_value(node_state, XML_NODE_JOIN_STATE);
const char *is_peer = crm_element_value(node_state, XML_NODE_IS_PEER);
const char *in_cluster = crm_element_value(node_state, XML_NODE_IN_CLUSTER);
const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED);
if (!crm_is_true(in_cluster)) {
crm_trace("Node is down: in_cluster=%s", crm_str(in_cluster));
} else if (safe_str_eq(is_peer, ONLINESTATUS)) {
if (safe_str_eq(join, CRMD_JOINSTATE_MEMBER)) {
online = TRUE;
} else {
crm_debug("Node is not ready to run resources: %s", join);
}
} else if (this_node->details->expected_up == FALSE) {
crm_trace("Controller is down: in_cluster=%s", crm_str(in_cluster));
crm_trace("\tis_peer=%s, join=%s, expected=%s",
crm_str(is_peer), crm_str(join), crm_str(exp_state));
} else {
/* mark it unclean */
pe_fence_node(data_set, this_node, "peer is unexpectedly down");
crm_info("\tin_cluster=%s, is_peer=%s, join=%s, expected=%s",
crm_str(in_cluster), crm_str(is_peer), crm_str(join), crm_str(exp_state));
}
return online;
}
static gboolean
determine_online_status_fencing(pe_working_set_t * data_set, xmlNode * node_state,
node_t * this_node)
{
gboolean online = FALSE;
gboolean do_terminate = FALSE;
bool crmd_online = FALSE;
const char *join = crm_element_value(node_state, XML_NODE_JOIN_STATE);
const char *is_peer = crm_element_value(node_state, XML_NODE_IS_PEER);
const char *in_cluster = crm_element_value(node_state, XML_NODE_IN_CLUSTER);
const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED);
const char *terminate = pe_node_attribute_raw(this_node, "terminate");
/*
- XML_NODE_IN_CLUSTER ::= true|false
- XML_NODE_IS_PEER ::= online|offline
- XML_NODE_JOIN_STATE ::= member|down|pending|banned
- XML_NODE_EXPECTED ::= member|down
*/
if (crm_is_true(terminate)) {
do_terminate = TRUE;
} else if (terminate != NULL && strlen(terminate) > 0) {
/* could be a time() value */
char t = terminate[0];
if (t != '0' && isdigit(t)) {
do_terminate = TRUE;
}
}
crm_trace("%s: in_cluster=%s, is_peer=%s, join=%s, expected=%s, term=%d",
this_node->details->uname, crm_str(in_cluster), crm_str(is_peer),
crm_str(join), crm_str(exp_state), do_terminate);
online = crm_is_true(in_cluster);
crmd_online = safe_str_eq(is_peer, ONLINESTATUS);
if (exp_state == NULL) {
exp_state = CRMD_JOINSTATE_DOWN;
}
if (this_node->details->shutdown) {
crm_debug("%s is shutting down", this_node->details->uname);
/* Slightly different criteria since we can't shut down a dead peer */
online = crmd_online;
} else if (in_cluster == NULL) {
pe_fence_node(data_set, this_node, "peer has not been seen by the cluster");
} else if (safe_str_eq(join, CRMD_JOINSTATE_NACK)) {
pe_fence_node(data_set, this_node, "peer failed the pacemaker membership criteria");
} else if (do_terminate == FALSE && safe_str_eq(exp_state, CRMD_JOINSTATE_DOWN)) {
if (crm_is_true(in_cluster) || crmd_online) {
crm_info("- Node %s is not ready to run resources", this_node->details->uname);
this_node->details->standby = TRUE;
this_node->details->pending = TRUE;
} else {
crm_trace("%s is down or still coming up", this_node->details->uname);
}
} else if (do_terminate && safe_str_eq(join, CRMD_JOINSTATE_DOWN)
&& crm_is_true(in_cluster) == FALSE && !crmd_online) {
crm_info("Node %s was just shot", this_node->details->uname);
online = FALSE;
} else if (crm_is_true(in_cluster) == FALSE) {
pe_fence_node(data_set, this_node, "peer is no longer part of the cluster");
} else if (!crmd_online) {
pe_fence_node(data_set, this_node, "peer process is no longer available");
/* Everything is running at this point, now check join state */
} else if (do_terminate) {
pe_fence_node(data_set, this_node, "termination was requested");
} else if (safe_str_eq(join, CRMD_JOINSTATE_MEMBER)) {
crm_info("Node %s is active", this_node->details->uname);
} else if (safe_str_eq(join, CRMD_JOINSTATE_PENDING)
|| safe_str_eq(join, CRMD_JOINSTATE_DOWN)) {
crm_info("Node %s is not ready to run resources", this_node->details->uname);
this_node->details->standby = TRUE;
this_node->details->pending = TRUE;
} else {
pe_fence_node(data_set, this_node, "peer was in an unknown state");
crm_warn("%s: in-cluster=%s, is-peer=%s, join=%s, expected=%s, term=%d, shutdown=%d",
this_node->details->uname, crm_str(in_cluster), crm_str(is_peer),
crm_str(join), crm_str(exp_state), do_terminate, this_node->details->shutdown);
}
return online;
}
static gboolean
determine_remote_online_status(pe_working_set_t * data_set, node_t * this_node)
{
resource_t *rsc = this_node->details->remote_rsc;
resource_t *container = NULL;
pe_node_t *host = NULL;
/* If there is a node state entry for a (former) Pacemaker Remote node
* but no resource creating that node, the node's connection resource will
* be NULL. Consider it an offline remote node in that case.
*/
if (rsc == NULL) {
this_node->details->online = FALSE;
goto remote_online_done;
}
container = rsc->container;
if (container && (g_list_length(rsc->running_on) == 1)) {
host = rsc->running_on->data;
}
/* If the resource is currently started, mark it online. */
if (rsc->role == RSC_ROLE_STARTED) {
crm_trace("%s node %s presumed ONLINE because connection resource is started",
(container? "Guest" : "Remote"), this_node->details->id);
this_node->details->online = TRUE;
}
/* consider this node shutting down if transitioning start->stop */
if (rsc->role == RSC_ROLE_STARTED && rsc->next_role == RSC_ROLE_STOPPED) {
crm_trace("%s node %s shutting down because connection resource is stopping",
(container? "Guest" : "Remote"), this_node->details->id);
this_node->details->shutdown = TRUE;
}
/* Now check all the failure conditions. */
if(container && is_set(container->flags, pe_rsc_failed)) {
crm_trace("Guest node %s UNCLEAN because guest resource failed",
this_node->details->id);
this_node->details->online = FALSE;
this_node->details->remote_requires_reset = TRUE;
} else if(is_set(rsc->flags, pe_rsc_failed)) {
crm_trace("%s node %s OFFLINE because connection resource failed",
(container? "Guest" : "Remote"), this_node->details->id);
this_node->details->online = FALSE;
} else if (rsc->role == RSC_ROLE_STOPPED
|| (container && container->role == RSC_ROLE_STOPPED)) {
crm_trace("%s node %s OFFLINE because its resource is stopped",
(container? "Guest" : "Remote"), this_node->details->id);
this_node->details->online = FALSE;
this_node->details->remote_requires_reset = FALSE;
} else if (host && (host->details->online == FALSE)
&& host->details->unclean) {
crm_trace("Guest node %s UNCLEAN because host is unclean",
this_node->details->id);
this_node->details->online = FALSE;
this_node->details->remote_requires_reset = TRUE;
}
remote_online_done:
crm_trace("Remote node %s online=%s",
this_node->details->id, this_node->details->online ? "TRUE" : "FALSE");
return this_node->details->online;
}
gboolean
determine_online_status(xmlNode * node_state, node_t * this_node, pe_working_set_t * data_set)
{
gboolean online = FALSE;
const char *shutdown = NULL;
const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED);
if (this_node == NULL) {
crm_config_err("No node to check");
return online;
}
this_node->details->shutdown = FALSE;
this_node->details->expected_up = FALSE;
shutdown = pe_node_attribute_raw(this_node, XML_CIB_ATTR_SHUTDOWN);
if (shutdown != NULL && safe_str_neq("0", shutdown)) {
this_node->details->shutdown = TRUE;
} else if (safe_str_eq(exp_state, CRMD_JOINSTATE_MEMBER)) {
this_node->details->expected_up = TRUE;
}
if (this_node->details->type == node_ping) {
this_node->details->unclean = FALSE;
online = FALSE; /* As far as resource management is concerned,
* the node is safely offline.
* Anyone caught abusing this logic will be shot
*/
} else if (is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE) {
online = determine_online_status_no_fencing(data_set, node_state, this_node);
} else {
online = determine_online_status_fencing(data_set, node_state, this_node);
}
if (online) {
this_node->details->online = TRUE;
} else {
/* remove node from contention */
this_node->fixed = TRUE;
this_node->weight = -INFINITY;
}
if (online && this_node->details->shutdown) {
/* don't run resources here */
this_node->fixed = TRUE;
this_node->weight = -INFINITY;
}
if (this_node->details->type == node_ping) {
crm_info("Node %s is not a pacemaker node", this_node->details->uname);
} else if (this_node->details->unclean) {
pe_proc_warn("Node %s is unclean", this_node->details->uname);
} else if (this_node->details->online) {
crm_info("Node %s is %s", this_node->details->uname,
this_node->details->shutdown ? "shutting down" :
this_node->details->pending ? "pending" :
this_node->details->standby ? "standby" :
this_node->details->maintenance ? "maintenance" : "online");
} else {
crm_trace("Node %s is offline", this_node->details->uname);
}
return online;
}
/*!
* \internal
* \brief Find the end of a resource's name, excluding any clone suffix
*
* \param[in] id Resource ID to check
*
* \return Pointer to last character of resource's base name
*/
const char *
pe_base_name_end(const char *id)
{
if (!crm_strlen_zero(id)) {
const char *end = id + strlen(id) - 1;
for (const char *s = end; s > id; --s) {
switch (*s) {
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
break;
case ':':
return (s == end)? s : (s - 1);
default:
return end;
}
}
return end;
}
return NULL;
}
/*!
* \internal
* \brief Get a resource name excluding any clone suffix
*
* \param[in] last_rsc_id Resource ID to check
*
* \return Pointer to newly allocated string with resource's base name
* \note It is the caller's responsibility to free() the result.
* This asserts on error, so callers can assume result is not NULL.
*/
char *
clone_strip(const char *last_rsc_id)
{
const char *end = pe_base_name_end(last_rsc_id);
char *basename = NULL;
CRM_ASSERT(end);
basename = strndup(last_rsc_id, end - last_rsc_id + 1);
CRM_ASSERT(basename);
return basename;
}
/*!
* \internal
* \brief Get the name of the first instance of a cloned resource
*
* \param[in] last_rsc_id Resource ID to check
*
* \return Pointer to newly allocated string with resource's base name plus :0
* \note It is the caller's responsibility to free() the result.
* This asserts on error, so callers can assume result is not NULL.
*/
char *
clone_zero(const char *last_rsc_id)
{
const char *end = pe_base_name_end(last_rsc_id);
size_t base_name_len = end - last_rsc_id + 1;
char *zero = NULL;
CRM_ASSERT(end);
zero = calloc(base_name_len + 3, sizeof(char));
CRM_ASSERT(zero);
memcpy(zero, last_rsc_id, base_name_len);
zero[base_name_len] = ':';
zero[base_name_len + 1] = '0';
return zero;
}
static resource_t *
create_fake_resource(const char *rsc_id, xmlNode * rsc_entry, pe_working_set_t * data_set)
{
resource_t *rsc = NULL;
xmlNode *xml_rsc = create_xml_node(NULL, XML_CIB_TAG_RESOURCE);
copy_in_properties(xml_rsc, rsc_entry);
crm_xml_add(xml_rsc, XML_ATTR_ID, rsc_id);
crm_log_xml_debug(xml_rsc, "Orphan resource");
if (!common_unpack(xml_rsc, &rsc, NULL, data_set)) {
return NULL;
}
if (xml_contains_remote_node(xml_rsc)) {
node_t *node;
crm_debug("Detected orphaned remote node %s", rsc_id);
node = pe_find_node(data_set->nodes, rsc_id);
if (node == NULL) {
node = pe_create_node(rsc_id, rsc_id, "remote", NULL, data_set);
}
link_rsc2remotenode(data_set, rsc);
if (node) {
crm_trace("Setting node %s as shutting down due to orphaned connection resource", rsc_id);
node->details->shutdown = TRUE;
}
}
if (crm_element_value(rsc_entry, XML_RSC_ATTR_CONTAINER)) {
/* This orphaned rsc needs to be mapped to a container. */
crm_trace("Detected orphaned container filler %s", rsc_id);
set_bit(rsc->flags, pe_rsc_orphan_container_filler);
}
set_bit(rsc->flags, pe_rsc_orphan);
data_set->resources = g_list_append(data_set->resources, rsc);
return rsc;
}
/*!
* \internal
* \brief Create orphan instance for anonymous clone resource history
*/
static pe_resource_t *
create_anonymous_orphan(pe_resource_t *parent, const char *rsc_id,
pe_node_t *node, pe_working_set_t *data_set)
{
pe_resource_t *top = pe__create_clone_child(parent, data_set);
// find_rsc() because we might be a cloned group
pe_resource_t *orphan = top->fns->find_rsc(top, rsc_id, NULL, pe_find_clone);
pe_rsc_debug(parent, "Created orphan %s for %s: %s on %s",
top->id, parent->id, rsc_id, node->details->uname);
return orphan;
}
/*!
* \internal
* \brief Check a node for an instance of an anonymous clone
*
* Return a child instance of the specified anonymous clone, in order of
* preference: (1) the instance running on the specified node, if any;
* (2) an inactive instance (i.e. within the total of clone-max instances);
* (3) a newly created orphan (i.e. clone-max instances are already active).
*
* \param[in] data_set Cluster information
* \param[in] node Node on which to check for instance
* \param[in] parent Clone to check
* \param[in] rsc_id Name of cloned resource in history (without instance)
*/
static resource_t *
find_anonymous_clone(pe_working_set_t * data_set, node_t * node, resource_t * parent,
const char *rsc_id)
{
GListPtr rIter = NULL;
pe_resource_t *rsc = NULL;
pe_resource_t *inactive_instance = NULL;
gboolean skip_inactive = FALSE;
CRM_ASSERT(parent != NULL);
CRM_ASSERT(pe_rsc_is_clone(parent));
CRM_ASSERT(is_not_set(parent->flags, pe_rsc_unique));
// Check for active (or partially active, for cloned groups) instance
pe_rsc_trace(parent, "Looking for %s on %s in %s", rsc_id, node->details->uname, parent->id);
for (rIter = parent->children; rsc == NULL && rIter; rIter = rIter->next) {
GListPtr locations = NULL;
resource_t *child = rIter->data;
/* Check whether this instance is already known to be active or pending
* anywhere, at this stage of unpacking. Because this function is called
* for a resource before the resource's individual operation history
* entries are unpacked, locations will generally not contain the
* desired node.
*
* However, there are three exceptions:
* (1) when child is a cloned group and we have already unpacked the
* history of another member of the group on the same node;
* (2) when we've already unpacked the history of another numbered
* instance on the same node (which can happen if globally-unique
* was flipped from true to false); and
* (3) when we re-run calculations on the same data set as part of a
* simulation.
*/
child->fns->location(child, &locations, 2);
if (locations) {
/* We should never associate the same numbered anonymous clone
* instance with multiple nodes, and clone instances can't migrate,
* so there must be only one location, regardless of history.
*/
CRM_LOG_ASSERT(locations->next == NULL);
if (((pe_node_t *)locations->data)->details == node->details) {
/* This child instance is active on the requested node, so check
* for a corresponding configured resource. We use find_rsc()
* instead of child because child may be a cloned group, and we
* need the particular member corresponding to rsc_id.
*
* If the history entry is orphaned, rsc will be NULL.
*/
rsc = parent->fns->find_rsc(child, rsc_id, NULL, pe_find_clone);
if (rsc) {
/* If there are multiple instance history entries for an
* anonymous clone in a single node's history (which can
* happen if globally-unique is switched from true to
* false), we want to consider the instances beyond the
* first as orphans, even if there are inactive instance
* numbers available.
*/
if (rsc->running_on) {
crm_notice("Active (now-)anonymous clone %s has "
"multiple (orphan) instance histories on %s",
parent->id, node->details->uname);
skip_inactive = TRUE;
rsc = NULL;
} else {
pe_rsc_trace(parent, "Resource %s, active", rsc->id);
}
}
}
g_list_free(locations);
} else {
pe_rsc_trace(parent, "Resource %s, skip inactive", child->id);
if (!skip_inactive && !inactive_instance
&& is_not_set(child->flags, pe_rsc_block)) {
// Remember one inactive instance in case we don't find active
inactive_instance = parent->fns->find_rsc(child, rsc_id, NULL,
pe_find_clone);
/* ... but don't use it if it was already associated with a
* pending action on another node
*/
if (inactive_instance && inactive_instance->pending_node
&& (inactive_instance->pending_node->details != node->details)) {
inactive_instance = NULL;
}
}
}
}
if ((rsc == NULL) && !skip_inactive && (inactive_instance != NULL)) {
pe_rsc_trace(parent, "Resource %s, empty slot", inactive_instance->id);
rsc = inactive_instance;
}
/* If the resource has "requires" set to "quorum" or "nothing", and we don't
* have a clone instance for every node, we don't want to consume a valid
* instance number for unclean nodes. Such instances may appear to be active
* according to the history, but should be considered inactive, so we can
* start an instance elsewhere. Treat such instances as orphans.
*
* An exception is instances running on guest nodes -- since guest node
* "fencing" is actually just a resource stop, requires shouldn't apply.
*
* @TODO Ideally, we'd use an inactive instance number if it is not needed
* for any clean instances. However, we don't know that at this point.
*/
if ((rsc != NULL) && is_not_set(rsc->flags, pe_rsc_needs_fencing)
&& (!node->details->online || node->details->unclean)
&& !pe__is_guest_node(node)
&& !pe__is_universal_clone(parent, data_set)) {
rsc = NULL;
}
if (rsc == NULL) {
rsc = create_anonymous_orphan(parent, rsc_id, node, data_set);
pe_rsc_trace(parent, "Resource %s, orphan", rsc->id);
}
return rsc;
}
static resource_t *
unpack_find_resource(pe_working_set_t * data_set, node_t * node, const char *rsc_id,
xmlNode * rsc_entry)
{
resource_t *rsc = NULL;
resource_t *parent = NULL;
crm_trace("looking for %s", rsc_id);
rsc = pe_find_resource(data_set->resources, rsc_id);
if (rsc == NULL) {
/* If we didn't find the resource by its name in the operation history,
* check it again as a clone instance. Even when clone-max=0, we create
* a single :0 orphan to match against here.
*/
char *clone0_id = clone_zero(rsc_id);
resource_t *clone0 = pe_find_resource(data_set->resources, clone0_id);
if (clone0 && is_not_set(clone0->flags, pe_rsc_unique)) {
rsc = clone0;
parent = uber_parent(clone0);
crm_trace("%s found as %s (%s)", rsc_id, clone0_id, parent->id);
} else {
crm_trace("%s is not known as %s either (orphan)",
rsc_id, clone0_id);
}
free(clone0_id);
} else if (rsc->variant > pe_native) {
crm_trace("Resource history for %s is orphaned because it is no longer primitive",
rsc_id);
return NULL;
} else {
parent = uber_parent(rsc);
}
if (pe_rsc_is_anon_clone(parent)) {
if (pe_rsc_is_bundled(parent)) {
rsc = pe__find_bundle_replica(parent->parent, node);
} else {
char *base = clone_strip(rsc_id);
rsc = find_anonymous_clone(data_set, node, parent, base);
free(base);
CRM_ASSERT(rsc != NULL);
}
}
if (rsc && safe_str_neq(rsc_id, rsc->id)
&& safe_str_neq(rsc_id, rsc->clone_name)) {
free(rsc->clone_name);
rsc->clone_name = strdup(rsc_id);
pe_rsc_debug(rsc, "Internally renamed %s on %s to %s%s",
rsc_id, node->details->uname, rsc->id,
(is_set(rsc->flags, pe_rsc_orphan)? " (ORPHAN)" : ""));
}
return rsc;
}
static resource_t *
process_orphan_resource(xmlNode * rsc_entry, node_t * node, pe_working_set_t * data_set)
{
resource_t *rsc = NULL;
const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID);
crm_debug("Detected orphan resource %s on %s", rsc_id, node->details->uname);
rsc = create_fake_resource(rsc_id, rsc_entry, data_set);
if (is_set(data_set->flags, pe_flag_stop_rsc_orphans) == FALSE) {
clear_bit(rsc->flags, pe_rsc_managed);
} else {
print_resource(LOG_TRACE, "Added orphan", rsc, FALSE);
CRM_CHECK(rsc != NULL, return NULL);
resource_location(rsc, NULL, -INFINITY, "__orphan_dont_run__", data_set);
}
return rsc;
}
static void
process_rsc_state(resource_t * rsc, node_t * node,
enum action_fail_response on_fail,
xmlNode * migrate_op, pe_working_set_t * data_set)
{
node_t *tmpnode = NULL;
char *reason = NULL;
CRM_ASSERT(rsc);
pe_rsc_trace(rsc, "Resource %s is %s on %s: on_fail=%s",
rsc->id, role2text(rsc->role), node->details->uname, fail2text(on_fail));
/* process current state */
if (rsc->role != RSC_ROLE_UNKNOWN) {
resource_t *iter = rsc;
while (iter) {
if (g_hash_table_lookup(iter->known_on, node->details->id) == NULL) {
node_t *n = node_copy(node);
pe_rsc_trace(rsc, "%s (aka. %s) known on %s", rsc->id, rsc->clone_name,
n->details->uname);
g_hash_table_insert(iter->known_on, (gpointer) n->details->id, n);
}
if (is_set(iter->flags, pe_rsc_unique)) {
break;
}
iter = iter->parent;
}
}
/* If a managed resource is believed to be running, but node is down ... */
if (rsc->role > RSC_ROLE_STOPPED
&& node->details->online == FALSE
&& node->details->maintenance == FALSE
&& is_set(rsc->flags, pe_rsc_managed)) {
gboolean should_fence = FALSE;
/* If this is a guest node, fence it (regardless of whether fencing is
* enabled, because guest node fencing is done by recovery of the
* container resource rather than by the fencer). Mark the resource
* we're processing as failed. When the guest comes back up, its
* operation history in the CIB will be cleared, freeing the affected
* resource to run again once we are sure we know its state.
*/
if (pe__is_guest_node(node)) {
set_bit(rsc->flags, pe_rsc_failed);
should_fence = TRUE;
} else if (is_set(data_set->flags, pe_flag_stonith_enabled)) {
if (pe__is_remote_node(node) && node->details->remote_rsc
&& is_not_set(node->details->remote_rsc->flags, pe_rsc_failed)) {
/* Setting unseen means that fencing of the remote node will
* occur only if the connection resource is not going to start
* somewhere. This allows connection resources on a failed
* cluster node to move to another node without requiring the
* remote nodes to be fenced as well.
*/
node->details->unseen = TRUE;
reason = crm_strdup_printf("%s is active there (fencing will be"
" revoked if remote connection can "
"be re-established elsewhere)",
rsc->id);
}
should_fence = TRUE;
}
if (should_fence) {
if (reason == NULL) {
reason = crm_strdup_printf("%s is thought to be active there", rsc->id);
}
pe_fence_node(data_set, node, reason);
}
free(reason);
}
if (node->details->unclean) {
/* No extra processing needed
* Also allows resources to be started again after a node is shot
*/
on_fail = action_fail_ignore;
}
switch (on_fail) {
case action_fail_ignore:
/* nothing to do */
break;
case action_fail_fence:
/* treat it as if it is still running
* but also mark the node as unclean
*/
reason = crm_strdup_printf("%s failed there", rsc->id);
pe_fence_node(data_set, node, reason);
free(reason);
break;
case action_fail_standby:
node->details->standby = TRUE;
node->details->standby_onfail = TRUE;
break;
case action_fail_block:
/* is_managed == FALSE will prevent any
* actions being sent for the resource
*/
clear_bit(rsc->flags, pe_rsc_managed);
set_bit(rsc->flags, pe_rsc_block);
break;
case action_fail_migrate:
/* make sure it comes up somewhere else
* or not at all
*/
resource_location(rsc, node, -INFINITY, "__action_migration_auto__", data_set);
break;
case action_fail_stop:
rsc->next_role = RSC_ROLE_STOPPED;
break;
case action_fail_recover:
if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) {
set_bit(rsc->flags, pe_rsc_failed);
stop_action(rsc, node, FALSE);
}
break;
case action_fail_restart_container:
set_bit(rsc->flags, pe_rsc_failed);
if (rsc->container && pe_rsc_is_bundled(rsc)) {
/* A bundle's remote connection can run on a different node than
* the bundle's container. We don't necessarily know where the
* container is running yet, so remember it and add a stop
* action for it later.
*/
data_set->stop_needed = g_list_prepend(data_set->stop_needed,
rsc->container);
} else if (rsc->container) {
stop_action(rsc->container, node, FALSE);
} else if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) {
stop_action(rsc, node, FALSE);
}
break;
case action_fail_reset_remote:
set_bit(rsc->flags, pe_rsc_failed);
if (is_set(data_set->flags, pe_flag_stonith_enabled)) {
tmpnode = NULL;
if (rsc->is_remote_node) {
tmpnode = pe_find_node(data_set->nodes, rsc->id);
}
if (tmpnode &&
pe__is_remote_node(tmpnode) &&
tmpnode->details->remote_was_fenced == 0) {
/* The remote connection resource failed in a way that
* should result in fencing the remote node.
*/
pe_fence_node(data_set, tmpnode,
"remote connection is unrecoverable");
}
}
/* require the stop action regardless if fencing is occurring or not. */
if (rsc->role > RSC_ROLE_STOPPED) {
stop_action(rsc, node, FALSE);
}
/* if reconnect delay is in use, prevent the connection from exiting the
* "STOPPED" role until the failure is cleared by the delay timeout. */
if (rsc->remote_reconnect_ms) {
rsc->next_role = RSC_ROLE_STOPPED;
}
break;
}
/* ensure a remote-node connection failure forces an unclean remote-node
* to be fenced. By setting unseen = FALSE, the remote-node failure will
* result in a fencing operation regardless if we're going to attempt to
* reconnect to the remote-node in this transition or not. */
if (is_set(rsc->flags, pe_rsc_failed) && rsc->is_remote_node) {
tmpnode = pe_find_node(data_set->nodes, rsc->id);
if (tmpnode && tmpnode->details->unclean) {
tmpnode->details->unseen = FALSE;
}
}
if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) {
if (is_set(rsc->flags, pe_rsc_orphan)) {
if (is_set(rsc->flags, pe_rsc_managed)) {
crm_config_warn("Detected active orphan %s running on %s",
rsc->id, node->details->uname);
} else {
crm_config_warn("Cluster configured not to stop active orphans."
" %s must be stopped manually on %s",
rsc->id, node->details->uname);
}
}
native_add_running(rsc, node, data_set);
if (on_fail != action_fail_ignore) {
set_bit(rsc->flags, pe_rsc_failed);
}
} else if (rsc->clone_name && strchr(rsc->clone_name, ':') != NULL) {
/* Only do this for older status sections that included instance numbers
* Otherwise stopped instances will appear as orphans
*/
pe_rsc_trace(rsc, "Resetting clone_name %s for %s (stopped)", rsc->clone_name, rsc->id);
free(rsc->clone_name);
rsc->clone_name = NULL;
} else {
GList *possible_matches = pe__resource_actions(rsc, node, RSC_STOP,
FALSE);
GListPtr gIter = possible_matches;
for (; gIter != NULL; gIter = gIter->next) {
action_t *stop = (action_t *) gIter->data;
stop->flags |= pe_action_optional;
}
g_list_free(possible_matches);
}
}
/* create active recurring operations as optional */
static void
process_recurring(node_t * node, resource_t * rsc,
int start_index, int stop_index,
GListPtr sorted_op_list, pe_working_set_t * data_set)
{
int counter = -1;
const char *task = NULL;
const char *status = NULL;
GListPtr gIter = sorted_op_list;
CRM_ASSERT(rsc);
pe_rsc_trace(rsc, "%s: Start index %d, stop index = %d", rsc->id, start_index, stop_index);
for (; gIter != NULL; gIter = gIter->next) {
xmlNode *rsc_op = (xmlNode *) gIter->data;
guint interval_ms = 0;
char *key = NULL;
const char *id = ID(rsc_op);
const char *interval_ms_s = NULL;
counter++;
if (node->details->online == FALSE) {
pe_rsc_trace(rsc, "Skipping %s/%s: node is offline", rsc->id, node->details->uname);
break;
/* Need to check if there's a monitor for role="Stopped" */
} else if (start_index < stop_index && counter <= stop_index) {
pe_rsc_trace(rsc, "Skipping %s/%s: resource is not active", id, node->details->uname);
continue;
} else if (counter < start_index) {
pe_rsc_trace(rsc, "Skipping %s/%s: old %d", id, node->details->uname, counter);
continue;
}
interval_ms_s = crm_element_value(rsc_op, XML_LRM_ATTR_INTERVAL_MS);
interval_ms = crm_parse_ms(interval_ms_s);
if (interval_ms == 0) {
pe_rsc_trace(rsc, "Skipping %s/%s: non-recurring", id, node->details->uname);
continue;
}
status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS);
if (safe_str_eq(status, "-1")) {
pe_rsc_trace(rsc, "Skipping %s/%s: status", id, node->details->uname);
continue;
}
task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
/* create the action */
key = generate_op_key(rsc->id, task, interval_ms);
pe_rsc_trace(rsc, "Creating %s/%s", key, node->details->uname);
custom_action(rsc, key, task, node, TRUE, TRUE, data_set);
}
}
void
calculate_active_ops(GListPtr sorted_op_list, int *start_index, int *stop_index)
{
int counter = -1;
int implied_monitor_start = -1;
int implied_clone_start = -1;
const char *task = NULL;
const char *status = NULL;
GListPtr gIter = sorted_op_list;
*stop_index = -1;
*start_index = -1;
for (; gIter != NULL; gIter = gIter->next) {
xmlNode *rsc_op = (xmlNode *) gIter->data;
counter++;
task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS);
if (safe_str_eq(task, CRMD_ACTION_STOP)
&& safe_str_eq(status, "0")) {
*stop_index = counter;
} else if (safe_str_eq(task, CRMD_ACTION_START) || safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
*start_index = counter;
} else if ((implied_monitor_start <= *stop_index) && safe_str_eq(task, CRMD_ACTION_STATUS)) {
const char *rc = crm_element_value(rsc_op, XML_LRM_ATTR_RC);
if (safe_str_eq(rc, "0") || safe_str_eq(rc, "8")) {
implied_monitor_start = counter;
}
} else if (safe_str_eq(task, CRMD_ACTION_PROMOTE) || safe_str_eq(task, CRMD_ACTION_DEMOTE)) {
implied_clone_start = counter;
}
}
if (*start_index == -1) {
if (implied_clone_start != -1) {
*start_index = implied_clone_start;
} else if (implied_monitor_start != -1) {
*start_index = implied_monitor_start;
}
}
}
static resource_t *
unpack_lrm_rsc_state(node_t * node, xmlNode * rsc_entry, pe_working_set_t * data_set)
{
GListPtr gIter = NULL;
int stop_index = -1;
int start_index = -1;
enum rsc_role_e req_role = RSC_ROLE_UNKNOWN;
const char *task = NULL;
const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID);
resource_t *rsc = NULL;
GListPtr op_list = NULL;
GListPtr sorted_op_list = NULL;
xmlNode *migrate_op = NULL;
xmlNode *rsc_op = NULL;
xmlNode *last_failure = NULL;
enum action_fail_response on_fail = FALSE;
enum rsc_role_e saved_role = RSC_ROLE_UNKNOWN;
crm_trace("[%s] Processing %s on %s",
crm_element_name(rsc_entry), rsc_id, node->details->uname);
/* extract operations */
op_list = NULL;
sorted_op_list = NULL;
for (rsc_op = __xml_first_child(rsc_entry); rsc_op != NULL; rsc_op = __xml_next_element(rsc_op)) {
if (crm_str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, TRUE)) {
op_list = g_list_prepend(op_list, rsc_op);
}
}
if (op_list == NULL) {
/* if there are no operations, there is nothing to do */
return NULL;
}
/* find the resource */
rsc = unpack_find_resource(data_set, node, rsc_id, rsc_entry);
if (rsc == NULL) {
rsc = process_orphan_resource(rsc_entry, node, data_set);
}
CRM_ASSERT(rsc != NULL);
/* process operations */
saved_role = rsc->role;
on_fail = action_fail_ignore;
rsc->role = RSC_ROLE_UNKNOWN;
sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
xmlNode *rsc_op = (xmlNode *) gIter->data;
task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
if (safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
migrate_op = rsc_op;
}
unpack_rsc_op(rsc, node, rsc_op, &last_failure, &on_fail, data_set);
}
/* create active recurring operations as optional */
calculate_active_ops(sorted_op_list, &start_index, &stop_index);
process_recurring(node, rsc, start_index, stop_index, sorted_op_list, data_set);
/* no need to free the contents */
g_list_free(sorted_op_list);
process_rsc_state(rsc, node, on_fail, migrate_op, data_set);
if (get_target_role(rsc, &req_role)) {
if (rsc->next_role == RSC_ROLE_UNKNOWN || req_role < rsc->next_role) {
pe_rsc_debug(rsc, "%s: Overwriting calculated next role %s"
" with requested next role %s",
rsc->id, role2text(rsc->next_role), role2text(req_role));
rsc->next_role = req_role;
} else if (req_role > rsc->next_role) {
pe_rsc_info(rsc, "%s: Not overwriting calculated next role %s"
" with requested next role %s",
rsc->id, role2text(rsc->next_role), role2text(req_role));
}
}
if (saved_role > rsc->role) {
rsc->role = saved_role;
}
return rsc;
}
static void
handle_orphaned_container_fillers(xmlNode * lrm_rsc_list, pe_working_set_t * data_set)
{
xmlNode *rsc_entry = NULL;
for (rsc_entry = __xml_first_child(lrm_rsc_list); rsc_entry != NULL;
rsc_entry = __xml_next_element(rsc_entry)) {
resource_t *rsc;
resource_t *container;
const char *rsc_id;
const char *container_id;
if (safe_str_neq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE)) {
continue;
}
container_id = crm_element_value(rsc_entry, XML_RSC_ATTR_CONTAINER);
rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID);
if (container_id == NULL || rsc_id == NULL) {
continue;
}
container = pe_find_resource(data_set->resources, container_id);
if (container == NULL) {
continue;
}
rsc = pe_find_resource(data_set->resources, rsc_id);
if (rsc == NULL ||
is_set(rsc->flags, pe_rsc_orphan_container_filler) == FALSE ||
rsc->container != NULL) {
continue;
}
pe_rsc_trace(rsc, "Mapped container of orphaned resource %s to %s",
rsc->id, container_id);
rsc->container = container;
container->fillers = g_list_append(container->fillers, rsc);
}
}
gboolean
unpack_lrm_resources(node_t * node, xmlNode * lrm_rsc_list, pe_working_set_t * data_set)
{
xmlNode *rsc_entry = NULL;
gboolean found_orphaned_container_filler = FALSE;
CRM_CHECK(node != NULL, return FALSE);
crm_trace("Unpacking resources on %s", node->details->uname);
for (rsc_entry = __xml_first_child(lrm_rsc_list); rsc_entry != NULL;
rsc_entry = __xml_next_element(rsc_entry)) {
if (crm_str_eq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE, TRUE)) {
resource_t *rsc = unpack_lrm_rsc_state(node, rsc_entry, data_set);
if (!rsc) {
continue;
}
if (is_set(rsc->flags, pe_rsc_orphan_container_filler)) {
found_orphaned_container_filler = TRUE;
}
}
}
/* now that all the resource state has been unpacked for this node
* we have to go back and map any orphaned container fillers to their
* container resource */
if (found_orphaned_container_filler) {
handle_orphaned_container_fillers(lrm_rsc_list, data_set);
}
return TRUE;
}
static void
set_active(resource_t * rsc)
{
resource_t *top = uber_parent(rsc);
if (top && is_set(top->flags, pe_rsc_promotable)) {
rsc->role = RSC_ROLE_SLAVE;
} else {
rsc->role = RSC_ROLE_STARTED;
}
}
static void
set_node_score(gpointer key, gpointer value, gpointer user_data)
{
node_t *node = value;
int *score = user_data;
node->weight = *score;
}
#define STATUS_PATH_MAX 1024
static xmlNode *
find_lrm_op(const char *resource, const char *op, const char *node, const char *source,
pe_working_set_t * data_set)
{
int offset = 0;
char xpath[STATUS_PATH_MAX];
offset += snprintf(xpath + offset, STATUS_PATH_MAX - offset, "//node_state[@uname='%s']", node);
offset +=
snprintf(xpath + offset, STATUS_PATH_MAX - offset, "//" XML_LRM_TAG_RESOURCE "[@id='%s']",
resource);
/* Need to check against transition_magic too? */
if (source && safe_str_eq(op, CRMD_ACTION_MIGRATE)) {
offset +=
snprintf(xpath + offset, STATUS_PATH_MAX - offset,
"/" XML_LRM_TAG_RSC_OP "[@operation='%s' and @migrate_target='%s']", op,
source);
} else if (source && safe_str_eq(op, CRMD_ACTION_MIGRATED)) {
offset +=
snprintf(xpath + offset, STATUS_PATH_MAX - offset,
"/" XML_LRM_TAG_RSC_OP "[@operation='%s' and @migrate_source='%s']", op,
source);
} else {
offset +=
snprintf(xpath + offset, STATUS_PATH_MAX - offset,
"/" XML_LRM_TAG_RSC_OP "[@operation='%s']", op);
}
CRM_LOG_ASSERT(offset > 0);
return get_xpath_object(xpath, data_set->input, LOG_DEBUG);
}
static bool
stop_happened_after(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op,
pe_working_set_t *data_set)
{
xmlNode *stop_op = find_lrm_op(rsc->id, CRMD_ACTION_STOP, node->details->id,
NULL, data_set);
if (stop_op) {
int stop_id = 0;
int task_id = 0;
crm_element_value_int(stop_op, XML_LRM_ATTR_CALLID, &stop_id);
crm_element_value_int(xml_op, XML_LRM_ATTR_CALLID, &task_id);
if (stop_id > task_id) {
return TRUE;
}
}
return FALSE;
}
static void
unpack_rsc_migration(resource_t *rsc, node_t *node, xmlNode *xml_op, pe_working_set_t * data_set)
{
/* A successful migration sequence is:
* migrate_to on source node
* migrate_from on target node
* stop on source node
*
* If a migrate_to is followed by a stop, the entire migration (successful
* or failed) is complete, and we don't care what happened on the target.
*
* If no migrate_from has happened, the migration is considered to be
* "partial". If the migrate_from failed, make sure the resource gets
* stopped on both source and target (if up).
*
* If the migrate_to and migrate_from both succeeded (which also implies the
* resource is no longer running on the source), but there is no stop, the
* migration is considered to be "dangling".
*/
int from_rc = 0;
int from_status = 0;
const char *migrate_source = NULL;
const char *migrate_target = NULL;
pe_node_t *target = NULL;
pe_node_t *source = NULL;
xmlNode *migrate_from = NULL;
if (stop_happened_after(rsc, node, xml_op, data_set)) {
return;
}
// Clones are not allowed to migrate, so role can't be master
rsc->role = RSC_ROLE_STARTED;
migrate_source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE);
migrate_target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET);
target = pe_find_node(data_set->nodes, migrate_target);
source = pe_find_node(data_set->nodes, migrate_source);
// Check whether there was a migrate_from action
migrate_from = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, migrate_target,
migrate_source, data_set);
if (migrate_from) {
crm_element_value_int(migrate_from, XML_LRM_ATTR_RC, &from_rc);
crm_element_value_int(migrate_from, XML_LRM_ATTR_OPSTATUS, &from_status);
pe_rsc_trace(rsc, "%s op on %s exited with status=%d, rc=%d",
ID(migrate_from), migrate_target, from_status, from_rc);
}
if (migrate_from && from_rc == PCMK_OCF_OK
&& from_status == PCMK_LRM_OP_DONE) {
/* The migrate_to and migrate_from both succeeded, so mark the migration
* as "dangling". This will be used to schedule a stop action on the
* source without affecting the target.
*/
pe_rsc_trace(rsc, "Detected dangling migration op: %s on %s", ID(xml_op),
migrate_source);
rsc->role = RSC_ROLE_STOPPED;
rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, node);
} else if (migrate_from && (from_status != PCMK_LRM_OP_PENDING)) { // Failed
if (target && target->details->online) {
pe_rsc_trace(rsc, "Marking active on %s %p %d", migrate_target, target,
target->details->online);
native_add_running(rsc, target, data_set);
}
} else { // Pending, or complete but erased
if (target && target->details->online) {
pe_rsc_trace(rsc, "Marking active on %s %p %d", migrate_target, target,
target->details->online);
native_add_running(rsc, target, data_set);
if (source && source->details->online) {
/* This is a partial migration: the migrate_to completed
* successfully on the source, but the migrate_from has not
* completed. Remember the source and target; if the newly
* chosen target remains the same when we schedule actions
* later, we may continue with the migration.
*/
rsc->partial_migration_target = target;
rsc->partial_migration_source = source;
}
} else {
/* Consider it failed here - forces a restart, prevents migration */
set_bit(rsc->flags, pe_rsc_failed);
clear_bit(rsc->flags, pe_rsc_allow_migrate);
}
}
}
static void
unpack_rsc_migration_failure(resource_t *rsc, node_t *node, xmlNode *xml_op, pe_working_set_t * data_set)
{
const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
CRM_ASSERT(rsc);
if (safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
int stop_id = 0;
int migrate_id = 0;
const char *migrate_source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE);
const char *migrate_target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET);
xmlNode *stop_op =
find_lrm_op(rsc->id, CRMD_ACTION_STOP, migrate_source, NULL, data_set);
xmlNode *migrate_op =
find_lrm_op(rsc->id, CRMD_ACTION_MIGRATE, migrate_source, migrate_target,
data_set);
if (stop_op) {
crm_element_value_int(stop_op, XML_LRM_ATTR_CALLID, &stop_id);
}
if (migrate_op) {
crm_element_value_int(migrate_op, XML_LRM_ATTR_CALLID, &migrate_id);
}
/* Get our state right */
rsc->role = RSC_ROLE_STARTED; /* can be master? */
if (stop_op == NULL || stop_id < migrate_id) {
node_t *source = pe_find_node(data_set->nodes, migrate_source);
if (source && source->details->online) {
native_add_running(rsc, source, data_set);
}
}
} else if (safe_str_eq(task, CRMD_ACTION_MIGRATE)) {
int stop_id = 0;
int migrate_id = 0;
const char *migrate_source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE);
const char *migrate_target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET);
xmlNode *stop_op =
find_lrm_op(rsc->id, CRMD_ACTION_STOP, migrate_target, NULL, data_set);
xmlNode *migrate_op =
find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, migrate_target, migrate_source,
data_set);
if (stop_op) {
crm_element_value_int(stop_op, XML_LRM_ATTR_CALLID, &stop_id);
}
if (migrate_op) {
crm_element_value_int(migrate_op, XML_LRM_ATTR_CALLID, &migrate_id);
}
/* Get our state right */
rsc->role = RSC_ROLE_STARTED; /* can be master? */
if (stop_op == NULL || stop_id < migrate_id) {
node_t *target = pe_find_node(data_set->nodes, migrate_target);
pe_rsc_trace(rsc, "Stop: %p %d, Migrated: %p %d", stop_op, stop_id, migrate_op,
migrate_id);
if (target && target->details->online) {
native_add_running(rsc, target, data_set);
}
} else if (migrate_op == NULL) {
/* Make sure it gets cleaned up, the stop may pre-date the migrate_from */
rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, node);
}
}
}
static void
record_failed_op(xmlNode *op, node_t* node, resource_t *rsc, pe_working_set_t * data_set)
{
xmlNode *xIter = NULL;
const char *op_key = crm_element_value(op, XML_LRM_ATTR_TASK_KEY);
if (node->details->online == FALSE) {
return;
}
for (xIter = data_set->failed->children; xIter; xIter = xIter->next) {
const char *key = crm_element_value(xIter, XML_LRM_ATTR_TASK_KEY);
const char *uname = crm_element_value(xIter, XML_ATTR_UNAME);
if(safe_str_eq(op_key, key) && safe_str_eq(uname, node->details->uname)) {
crm_trace("Skipping duplicate entry %s on %s", op_key, node->details->uname);
return;
}
}
crm_trace("Adding entry %s on %s", op_key, node->details->uname);
crm_xml_add(op, XML_ATTR_UNAME, node->details->uname);
crm_xml_add(op, XML_LRM_ATTR_RSCID, rsc->id);
add_node_copy(data_set->failed, op);
}
static const char *get_op_key(xmlNode *xml_op)
{
const char *key = crm_element_value(xml_op, XML_LRM_ATTR_TASK_KEY);
if(key == NULL) {
key = ID(xml_op);
}
return key;
}
static void
unpack_rsc_op_failure(resource_t * rsc, node_t * node, int rc, xmlNode * xml_op, xmlNode ** last_failure,
enum action_fail_response * on_fail, pe_working_set_t * data_set)
{
guint interval_ms = 0;
bool is_probe = FALSE;
action_t *action = NULL;
const char *key = get_op_key(xml_op);
const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
CRM_ASSERT(rsc);
*last_failure = xml_op;
crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms);
if ((interval_ms == 0) && safe_str_eq(task, CRMD_ACTION_STATUS)) {
is_probe = TRUE;
pe_rsc_trace(rsc, "is a probe: %s", key);
}
if (rc != PCMK_OCF_NOT_INSTALLED || is_set(data_set->flags, pe_flag_symmetric_cluster)) {
crm_warn("Processing failed %s of %s on %s: %s " CRM_XS " rc=%d",
(is_probe? "probe" : task), rsc->id, node->details->uname,
services_ocf_exitcode_str(rc), rc);
if (is_probe && (rc != PCMK_OCF_OK)
&& (rc != PCMK_OCF_NOT_RUNNING)
&& (rc != PCMK_OCF_RUNNING_MASTER)) {
/* A failed (not just unexpected) probe result could mean the user
* didn't know resources will be probed even where they can't run.
*/
crm_notice("If it is not possible for %s to run on %s, see "
"the resource-discovery option for location constraints",
rsc->id, node->details->uname);
}
record_failed_op(xml_op, node, rsc, data_set);
} else {
crm_trace("Processing failed op %s for %s on %s: %s (%d)",
task, rsc->id, node->details->uname, services_ocf_exitcode_str(rc),
rc);
}
action = custom_action(rsc, strdup(key), task, NULL, TRUE, FALSE, data_set);
if ((action->on_fail <= action_fail_fence && *on_fail < action->on_fail) ||
(action->on_fail == action_fail_reset_remote && *on_fail <= action_fail_recover) ||
(action->on_fail == action_fail_restart_container && *on_fail <= action_fail_recover) ||
(*on_fail == action_fail_restart_container && action->on_fail >= action_fail_migrate)) {
pe_rsc_trace(rsc, "on-fail %s -> %s for %s (%s)", fail2text(*on_fail),
fail2text(action->on_fail), action->uuid, key);
*on_fail = action->on_fail;
}
if (safe_str_eq(task, CRMD_ACTION_STOP)) {
resource_location(rsc, node, -INFINITY, "__stop_fail__", data_set);
} else if (safe_str_eq(task, CRMD_ACTION_MIGRATE) || safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
unpack_rsc_migration_failure(rsc, node, xml_op, data_set);
} else if (safe_str_eq(task, CRMD_ACTION_PROMOTE)) {
rsc->role = RSC_ROLE_MASTER;
} else if (safe_str_eq(task, CRMD_ACTION_DEMOTE)) {
if (action->on_fail == action_fail_block) {
rsc->role = RSC_ROLE_MASTER;
rsc->next_role = RSC_ROLE_STOPPED;
} else if(rc == PCMK_OCF_NOT_RUNNING) {
rsc->role = RSC_ROLE_STOPPED;
} else {
/*
* Staying in master role would put the PE/TE into a loop. Setting
* slave role is not dangerous because the resource will be stopped
* as part of recovery, and any master promotion will be ordered
* after that stop.
*/
rsc->role = RSC_ROLE_SLAVE;
}
}
if(is_probe && rc == PCMK_OCF_NOT_INSTALLED) {
/* leave stopped */
pe_rsc_trace(rsc, "Leaving %s stopped", rsc->id);
rsc->role = RSC_ROLE_STOPPED;
} else if (rsc->role < RSC_ROLE_STARTED) {
pe_rsc_trace(rsc, "Setting %s active", rsc->id);
set_active(rsc);
}
pe_rsc_trace(rsc, "Resource %s: role=%s, unclean=%s, on_fail=%s, fail_role=%s",
rsc->id, role2text(rsc->role),
node->details->unclean ? "true" : "false",
fail2text(action->on_fail), role2text(action->fail_role));
if (action->fail_role != RSC_ROLE_STARTED && rsc->next_role < action->fail_role) {
rsc->next_role = action->fail_role;
}
if (action->fail_role == RSC_ROLE_STOPPED) {
int score = -INFINITY;
resource_t *fail_rsc = rsc;
if (fail_rsc->parent) {
resource_t *parent = uber_parent(fail_rsc);
if (pe_rsc_is_clone(parent)
&& is_not_set(parent->flags, pe_rsc_unique)) {
/* For clone resources, if a child fails on an operation
* with on-fail = stop, all the resources fail. Do this by preventing
* the parent from coming up again. */
fail_rsc = parent;
}
}
crm_warn("Making sure %s doesn't come up again", fail_rsc->id);
/* make sure it doesn't come up again */
if (fail_rsc->allowed_nodes != NULL) {
g_hash_table_destroy(fail_rsc->allowed_nodes);
}
fail_rsc->allowed_nodes = node_hash_from_list(data_set->nodes);
g_hash_table_foreach(fail_rsc->allowed_nodes, set_node_score, &score);
}
pe_free_action(action);
}
/*!
* \internal
* \brief Remap operation status based on action result
*
* Given an action result, determine an appropriate operation status for the
* purposes of responding to the action (the status provided by the executor is
* not directly usable since the executor does not know what was expected).
*
* \param[in,out] rsc Resource that operation history entry is for
* \param[in] rc Actual return code of operation
* \param[in] target_rc Expected return code of operation
* \param[in] node Node where operation was executed
* \param[in] xml_op Operation history entry XML from CIB status
* \param[in,out] on_fail What should be done about the result
* \param[in] data_set Current cluster working set
*
* \return Operation status based on return code and action info
* \note This may update the resource's current and next role.
*/
static int
determine_op_status(
resource_t *rsc, int rc, int target_rc, node_t * node, xmlNode * xml_op, enum action_fail_response * on_fail, pe_working_set_t * data_set)
{
guint interval_ms = 0;
int result = PCMK_LRM_OP_DONE;
const char *key = get_op_key(xml_op);
const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
bool is_probe = FALSE;
CRM_ASSERT(rsc);
crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms);
if ((interval_ms == 0) && safe_str_eq(task, CRMD_ACTION_STATUS)) {
is_probe = TRUE;
}
if (target_rc < 0) {
/* Pre-1.0 Pacemaker versions, and Pacemaker 1.1.6 or earlier with
* Heartbeat 2.0.7 or earlier as the cluster layer, did not include the
* target_rc in the transition key, which (along with the similar case
* of a corrupted transition key in the CIB) will be reported to this
* function as -1. Pacemaker 2.0+ does not support rolling upgrades from
* those versions or processing of saved CIB files from those versions,
* so we do not need to care much about this case.
*/
result = PCMK_LRM_OP_ERROR;
crm_warn("Expected result not found for %s on %s (corrupt or obsolete CIB?)",
key, node->details->uname);
} else if (target_rc != rc) {
result = PCMK_LRM_OP_ERROR;
pe_rsc_debug(rsc, "%s on %s returned '%s' (%d) instead of the expected value: '%s' (%d)",
key, node->details->uname,
services_ocf_exitcode_str(rc), rc,
services_ocf_exitcode_str(target_rc), target_rc);
}
switch (rc) {
case PCMK_OCF_OK:
// @TODO Should this be (rc != target_rc)?
if (is_probe && (target_rc == PCMK_OCF_NOT_RUNNING)) {
result = PCMK_LRM_OP_DONE;
pe_rsc_info(rsc, "Operation %s found resource %s active on %s",
task, rsc->id, node->details->uname);
}
break;
case PCMK_OCF_NOT_RUNNING:
if (is_probe || target_rc == rc || is_not_set(rsc->flags, pe_rsc_managed)) {
result = PCMK_LRM_OP_DONE;
rsc->role = RSC_ROLE_STOPPED;
/* clear any previous failure actions */
*on_fail = action_fail_ignore;
rsc->next_role = RSC_ROLE_UNKNOWN;
}
break;
case PCMK_OCF_RUNNING_MASTER:
if (is_probe && (rc != target_rc)) {
result = PCMK_LRM_OP_DONE;
pe_rsc_info(rsc, "Operation %s found resource %s active in master mode on %s",
task, rsc->id, node->details->uname);
}
rsc->role = RSC_ROLE_MASTER;
break;
case PCMK_OCF_DEGRADED_MASTER:
case PCMK_OCF_FAILED_MASTER:
rsc->role = RSC_ROLE_MASTER;
result = PCMK_LRM_OP_ERROR;
break;
case PCMK_OCF_NOT_CONFIGURED:
result = PCMK_LRM_OP_ERROR_FATAL;
break;
case PCMK_OCF_UNIMPLEMENT_FEATURE:
if (interval_ms > 0) {
result = PCMK_LRM_OP_NOTSUPPORTED;
break;
}
// fall through
case PCMK_OCF_NOT_INSTALLED:
case PCMK_OCF_INVALID_PARAM:
case PCMK_OCF_INSUFFICIENT_PRIV:
if (!pe_can_fence(data_set, node)
&& safe_str_eq(task, CRMD_ACTION_STOP)) {
/* If a stop fails and we can't fence, there's nothing else we can do */
pe_proc_err("No further recovery can be attempted for %s: %s action failed with '%s' (%d)",
rsc->id, task, services_ocf_exitcode_str(rc), rc);
clear_bit(rsc->flags, pe_rsc_managed);
set_bit(rsc->flags, pe_rsc_block);
}
result = PCMK_LRM_OP_ERROR_HARD;
break;
default:
if (result == PCMK_LRM_OP_DONE) {
crm_info("Treating unknown return code %d for %s on %s as failure",
rc, key, node->details->uname);
result = PCMK_LRM_OP_ERROR;
}
break;
}
return result;
}
static bool check_operation_expiry(resource_t *rsc, node_t *node, int rc, xmlNode *xml_op, pe_working_set_t * data_set)
{
bool expired = FALSE;
time_t last_failure = 0;
guint interval_ms = 0;
int failure_timeout = rsc->failure_timeout;
const char *key = get_op_key(xml_op);
const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
const char *clear_reason = NULL;
crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms);
/* clearing recurring monitor operation failures automatically
* needs to be carefully considered */
if ((interval_ms != 0) && safe_str_eq(task, "monitor")) {
/* TODO, in the future we should consider not clearing recurring monitor
* op failures unless the last action for a resource was a "stop" action.
* otherwise it is possible that clearing the monitor failure will result
* in the resource being in an undeterministic state.
*
* For now we handle this potential undeterministic condition for remote
* node connection resources by not clearing a recurring monitor op failure
* until after the node has been fenced. */
if (is_set(data_set->flags, pe_flag_stonith_enabled)
&& rsc->remote_reconnect_ms) {
node_t *remote_node = pe_find_node(data_set->nodes, rsc->id);
if (remote_node && remote_node->details->remote_was_fenced == 0) {
if (strstr(ID(xml_op), "last_failure")) {
crm_info("Waiting to clear monitor failure for remote node %s until fencing has occurred", rsc->id);
}
/* disabling failure timeout for this operation because we believe
* fencing of the remote node should occur first. */
failure_timeout = 0;
}
}
}
if (failure_timeout > 0) {
int last_run = 0;
if (crm_element_value_int(xml_op, XML_RSC_OP_LAST_CHANGE, &last_run) == 0) {
time_t now = get_effective_time(data_set);
if (now > (last_run + failure_timeout)) {
expired = TRUE;
}
}
}
if (expired) {
if (pe_get_failcount(node, rsc, &last_failure, pe_fc_default, xml_op,
data_set)) {
// There is a fail count ignoring timeout
if (pe_get_failcount(node, rsc, &last_failure, pe_fc_effective,
xml_op, data_set) == 0) {
// There is no fail count considering timeout
clear_reason = "it expired";
} else {
expired = FALSE;
}
} else if (rsc->remote_reconnect_ms
&& strstr(ID(xml_op), "last_failure")) {
// Always clear last failure when reconnect interval is set
clear_reason = "reconnect interval is set";
}
} else if (strstr(ID(xml_op), "last_failure") &&
((strcmp(task, "start") == 0) || (strcmp(task, "monitor") == 0))) {
if (pe__bundle_needs_remote_name(rsc)) {
/* We haven't allocated resources yet, so we can't reliably
* substitute addr parameters for the REMOTE_CONTAINER_HACK.
* When that's needed, defer the check until later.
*/
pe__add_param_check(xml_op, rsc, node, pe_check_last_failure,
data_set);
} else {
op_digest_cache_t *digest_data = NULL;
digest_data = rsc_action_digest_cmp(rsc, xml_op, node, data_set);
switch (digest_data->rc) {
case RSC_DIGEST_UNKNOWN:
crm_trace("Resource %s history entry %s on %s has no digest to compare",
rsc->id, key, node->details->id);
break;
case RSC_DIGEST_MATCH:
break;
default:
clear_reason = "resource parameters have changed";
break;
}
}
}
if (clear_reason != NULL) {
// Schedule clearing of the fail count
pe_action_t *clear_op = pe__clear_failcount(rsc, node, clear_reason,
data_set);
if (is_set(data_set->flags, pe_flag_stonith_enabled)
&& rsc->remote_reconnect_ms) {
pe_node_t *remote_node = pe_find_node(data_set->nodes, rsc->id);
if (remote_node) {
/* If we're clearing a remote connection due to a reconnect
* interval, we want to wait until any scheduled fencing
* completes.
*
* We could limit this to remote_node->details->unclean, but at
* this point, that's always true (it won't be reliable until
* after unpack_node_loop() is done).
*/
pe_action_t *fence = pe_fence_op(remote_node, NULL, TRUE, NULL,
data_set);
crm_info("Clearing %s failure will wait until any scheduled "
"fencing of %s completes", task, rsc->id);
order_actions(fence, clear_op, pe_order_implies_then);
}
}
}
if (expired && (interval_ms == 0) && safe_str_eq(task, CRMD_ACTION_STATUS)) {
switch(rc) {
case PCMK_OCF_OK:
case PCMK_OCF_NOT_RUNNING:
case PCMK_OCF_RUNNING_MASTER:
case PCMK_OCF_DEGRADED:
case PCMK_OCF_DEGRADED_MASTER:
/* Don't expire probes that return these values */
expired = FALSE;
break;
}
}
return expired;
}
int get_target_rc(xmlNode *xml_op)
{
- int dummy = 0;
int target_rc = 0;
- char *dummy_string = NULL;
const char *key = crm_element_value(xml_op, XML_ATTR_TRANSITION_KEY);
+
if (key == NULL) {
return -1;
}
-
- decode_transition_key(key, &dummy_string, &dummy, &dummy, &target_rc);
- free(dummy_string);
-
+ decode_transition_key(key, NULL, NULL, NULL, &target_rc);
return target_rc;
}
static enum action_fail_response
get_action_on_fail(resource_t *rsc, const char *key, const char *task, pe_working_set_t * data_set)
{
int result = action_fail_recover;
action_t *action = custom_action(rsc, strdup(key), task, NULL, TRUE, FALSE, data_set);
result = action->on_fail;
pe_free_action(action);
return result;
}
static void
update_resource_state(resource_t * rsc, node_t * node, xmlNode * xml_op, const char * task, int rc,
xmlNode * last_failure, enum action_fail_response * on_fail, pe_working_set_t * data_set)
{
gboolean clear_past_failure = FALSE;
CRM_ASSERT(rsc);
CRM_ASSERT(xml_op);
if (rc == PCMK_OCF_NOT_RUNNING) {
clear_past_failure = TRUE;
} else if (rc == PCMK_OCF_NOT_INSTALLED) {
rsc->role = RSC_ROLE_STOPPED;
} else if (safe_str_eq(task, CRMD_ACTION_STATUS)) {
if (last_failure) {
const char *op_key = get_op_key(xml_op);
const char *last_failure_key = get_op_key(last_failure);
if (safe_str_eq(op_key, last_failure_key)) {
clear_past_failure = TRUE;
}
}
if (rsc->role < RSC_ROLE_STARTED) {
set_active(rsc);
}
} else if (safe_str_eq(task, CRMD_ACTION_START)) {
rsc->role = RSC_ROLE_STARTED;
clear_past_failure = TRUE;
} else if (safe_str_eq(task, CRMD_ACTION_STOP)) {
rsc->role = RSC_ROLE_STOPPED;
clear_past_failure = TRUE;
} else if (safe_str_eq(task, CRMD_ACTION_PROMOTE)) {
rsc->role = RSC_ROLE_MASTER;
clear_past_failure = TRUE;
} else if (safe_str_eq(task, CRMD_ACTION_DEMOTE)) {
/* Demote from Master does not clear an error */
rsc->role = RSC_ROLE_SLAVE;
} else if (safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
rsc->role = RSC_ROLE_STARTED;
clear_past_failure = TRUE;
} else if (safe_str_eq(task, CRMD_ACTION_MIGRATE)) {
unpack_rsc_migration(rsc, node, xml_op, data_set);
} else if (rsc->role < RSC_ROLE_STARTED) {
pe_rsc_trace(rsc, "%s active on %s", rsc->id, node->details->uname);
set_active(rsc);
}
/* clear any previous failure actions */
if (clear_past_failure) {
switch (*on_fail) {
case action_fail_stop:
case action_fail_fence:
case action_fail_migrate:
case action_fail_standby:
pe_rsc_trace(rsc, "%s.%s is not cleared by a completed stop",
rsc->id, fail2text(*on_fail));
break;
case action_fail_block:
case action_fail_ignore:
case action_fail_recover:
case action_fail_restart_container:
*on_fail = action_fail_ignore;
rsc->next_role = RSC_ROLE_UNKNOWN;
break;
case action_fail_reset_remote:
if (rsc->remote_reconnect_ms == 0) {
/* With no reconnect interval, the connection is allowed to
* start again after the remote node is fenced and
* completely stopped. (With a reconnect interval, we wait
* for the failure to be cleared entirely before attempting
* to reconnect.)
*/
*on_fail = action_fail_ignore;
rsc->next_role = RSC_ROLE_UNKNOWN;
}
break;
}
}
}
gboolean
unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op, xmlNode ** last_failure,
enum action_fail_response * on_fail, pe_working_set_t * data_set)
{
int task_id = 0;
const char *key = NULL;
const char *task = NULL;
const char *task_key = NULL;
int rc = 0;
int status = PCMK_LRM_OP_UNKNOWN;
int target_rc = get_target_rc(xml_op);
guint interval_ms = 0;
gboolean expired = FALSE;
resource_t *parent = rsc;
enum action_fail_response failure_strategy = action_fail_recover;
CRM_CHECK(rsc != NULL, return FALSE);
CRM_CHECK(node != NULL, return FALSE);
CRM_CHECK(xml_op != NULL, return FALSE);
task_key = get_op_key(xml_op);
task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
key = crm_element_value(xml_op, XML_ATTR_TRANSITION_KEY);
crm_element_value_int(xml_op, XML_LRM_ATTR_RC, &rc);
crm_element_value_int(xml_op, XML_LRM_ATTR_CALLID, &task_id);
crm_element_value_int(xml_op, XML_LRM_ATTR_OPSTATUS, &status);
crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms);
CRM_CHECK(task != NULL, return FALSE);
CRM_CHECK(status <= PCMK_LRM_OP_NOT_INSTALLED, return FALSE);
CRM_CHECK(status >= PCMK_LRM_OP_PENDING, return FALSE);
if (safe_str_eq(task, CRMD_ACTION_NOTIFY) ||
safe_str_eq(task, CRMD_ACTION_METADATA)) {
/* safe to ignore these */
return TRUE;
}
if (is_not_set(rsc->flags, pe_rsc_unique)) {
parent = uber_parent(rsc);
}
pe_rsc_trace(rsc, "Unpacking task %s/%s (call_id=%d, status=%d, rc=%d) on %s (role=%s)",
task_key, task, task_id, status, rc, node->details->uname, role2text(rsc->role));
if (node->details->unclean) {
pe_rsc_trace(rsc, "Node %s (where %s is running) is unclean."
" Further action depends on the value of the stop's on-fail attribute",
node->details->uname, rsc->id);
}
if(status != PCMK_LRM_OP_NOT_INSTALLED) {
expired = check_operation_expiry(rsc, node, rc, xml_op, data_set);
}
/* Degraded results are informational only, re-map them to their error-free equivalents */
if (rc == PCMK_OCF_DEGRADED && safe_str_eq(task, CRMD_ACTION_STATUS)) {
rc = PCMK_OCF_OK;
/* Add them to the failed list to highlight them for the user */
if ((node->details->shutdown == FALSE) || (node->details->online == TRUE)) {
crm_trace("Remapping %d to %d", PCMK_OCF_DEGRADED, PCMK_OCF_OK);
record_failed_op(xml_op, node, rsc, data_set);
}
} else if (rc == PCMK_OCF_DEGRADED_MASTER && safe_str_eq(task, CRMD_ACTION_STATUS)) {
rc = PCMK_OCF_RUNNING_MASTER;
/* Add them to the failed list to highlight them for the user */
if ((node->details->shutdown == FALSE) || (node->details->online == TRUE)) {
crm_trace("Remapping %d to %d", PCMK_OCF_DEGRADED_MASTER, PCMK_OCF_RUNNING_MASTER);
record_failed_op(xml_op, node, rsc, data_set);
}
}
if (expired && target_rc != rc) {
const char *magic = crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC);
pe_rsc_debug(rsc, "Expired operation '%s' on %s returned '%s' (%d) instead of the expected value: '%s' (%d)",
key, node->details->uname,
services_ocf_exitcode_str(rc), rc,
services_ocf_exitcode_str(target_rc), target_rc);
if (interval_ms == 0) {
crm_notice("Ignoring expired calculated failure %s (rc=%d, magic=%s) on %s",
task_key, rc, magic, node->details->uname);
goto done;
} else if(node->details->online && node->details->unclean == FALSE) {
crm_notice("Re-initiated expired calculated failure %s (rc=%d, magic=%s) on %s",
task_key, rc, magic, node->details->uname);
/* This is SO horrible, but we don't have access to CancelXmlOp() yet */
crm_xml_add(xml_op, XML_LRM_ATTR_RESTART_DIGEST, "calculated-failure-timeout");
goto done;
}
}
/* If the executor reported an operation status of anything but done or
* error, consider that final. But for done or error, we know better whether
* it should be treated as a failure or not, because we know the expected
* result.
*/
if(status == PCMK_LRM_OP_DONE || status == PCMK_LRM_OP_ERROR) {
status = determine_op_status(rsc, rc, target_rc, node, xml_op, on_fail, data_set);
}
pe_rsc_trace(rsc, "Handling status: %d", status);
switch (status) {
case PCMK_LRM_OP_CANCELLED:
/* do nothing?? */
pe_err("Don't know what to do for cancelled ops yet");
break;
case PCMK_LRM_OP_PENDING:
if (safe_str_eq(task, CRMD_ACTION_START)) {
set_bit(rsc->flags, pe_rsc_start_pending);
set_active(rsc);
} else if (safe_str_eq(task, CRMD_ACTION_PROMOTE)) {
rsc->role = RSC_ROLE_MASTER;
} else if (safe_str_eq(task, CRMD_ACTION_MIGRATE) && node->details->unclean) {
/* If a pending migrate_to action is out on a unclean node,
* we have to force the stop action on the target. */
const char *migrate_target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET);
node_t *target = pe_find_node(data_set->nodes, migrate_target);
if (target) {
stop_action(rsc, target, FALSE);
}
}
if (rsc->pending_task == NULL) {
if (safe_str_eq(task, CRMD_ACTION_STATUS) && (interval_ms == 0)) {
/* Pending probes are not printed, even if pending
* operations are requested. If someone ever requests that
* behavior, uncomment this and the corresponding part of
* native.c:native_pending_task().
*/
/*rsc->pending_task = strdup("probe");*/
/*rsc->pending_node = node;*/
} else {
rsc->pending_task = strdup(task);
rsc->pending_node = node;
}
}
break;
case PCMK_LRM_OP_DONE:
pe_rsc_trace(rsc, "%s/%s completed on %s", rsc->id, task, node->details->uname);
update_resource_state(rsc, node, xml_op, task, rc, *last_failure, on_fail, data_set);
break;
case PCMK_LRM_OP_NOT_INSTALLED:
failure_strategy = get_action_on_fail(rsc, task_key, task, data_set);
if (failure_strategy == action_fail_ignore) {
crm_warn("Cannot ignore failed %s (status=%d, rc=%d) on %s: "
"Resource agent doesn't exist",
task_key, status, rc, node->details->uname);
/* Also for printing it as "FAILED" by marking it as pe_rsc_failed later */
*on_fail = action_fail_migrate;
}
resource_location(parent, node, -INFINITY, "hard-error", data_set);
unpack_rsc_op_failure(rsc, node, rc, xml_op, last_failure, on_fail, data_set);
break;
case PCMK_LRM_OP_ERROR:
case PCMK_LRM_OP_ERROR_HARD:
case PCMK_LRM_OP_ERROR_FATAL:
case PCMK_LRM_OP_TIMEOUT:
case PCMK_LRM_OP_NOTSUPPORTED:
failure_strategy = get_action_on_fail(rsc, task_key, task, data_set);
if ((failure_strategy == action_fail_ignore)
|| (failure_strategy == action_fail_restart_container
&& safe_str_eq(task, CRMD_ACTION_STOP))) {
crm_warn("Pretending the failure of %s (rc=%d) on %s succeeded",
task_key, rc, node->details->uname);
update_resource_state(rsc, node, xml_op, task, target_rc, *last_failure, on_fail, data_set);
crm_xml_add(xml_op, XML_ATTR_UNAME, node->details->uname);
set_bit(rsc->flags, pe_rsc_failure_ignored);
record_failed_op(xml_op, node, rsc, data_set);
if (failure_strategy == action_fail_restart_container && *on_fail <= action_fail_recover) {
*on_fail = failure_strategy;
}
} else {
unpack_rsc_op_failure(rsc, node, rc, xml_op, last_failure, on_fail, data_set);
if(status == PCMK_LRM_OP_ERROR_HARD) {
do_crm_log(rc != PCMK_OCF_NOT_INSTALLED?LOG_ERR:LOG_NOTICE,
"Preventing %s from re-starting on %s: operation %s failed '%s' (%d)",
parent->id, node->details->uname,
task, services_ocf_exitcode_str(rc), rc);
resource_location(parent, node, -INFINITY, "hard-error", data_set);
} else if(status == PCMK_LRM_OP_ERROR_FATAL) {
crm_err("Preventing %s from re-starting anywhere: operation %s failed '%s' (%d)",
parent->id, task, services_ocf_exitcode_str(rc), rc);
resource_location(parent, NULL, -INFINITY, "fatal-error", data_set);
}
}
break;
}
done:
pe_rsc_trace(rsc, "Resource %s after %s: role=%s, next=%s", rsc->id, task, role2text(rsc->role), role2text(rsc->next_role));
return TRUE;
}
gboolean
add_node_attrs(xmlNode * xml_obj, node_t * node, gboolean overwrite, pe_working_set_t * data_set)
{
const char *cluster_name = NULL;
g_hash_table_insert(node->details->attrs,
strdup(CRM_ATTR_UNAME), strdup(node->details->uname));
g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_ID),
strdup(node->details->id));
if (safe_str_eq(node->details->id, data_set->dc_uuid)) {
data_set->dc_node = node;
node->details->is_dc = TRUE;
g_hash_table_insert(node->details->attrs,
strdup(CRM_ATTR_IS_DC), strdup(XML_BOOLEAN_TRUE));
} else {
g_hash_table_insert(node->details->attrs,
strdup(CRM_ATTR_IS_DC), strdup(XML_BOOLEAN_FALSE));
}
cluster_name = g_hash_table_lookup(data_set->config_hash, "cluster-name");
if (cluster_name) {
g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_CLUSTER_NAME),
strdup(cluster_name));
}
unpack_instance_attributes(data_set->input, xml_obj, XML_TAG_ATTR_SETS, NULL,
node->details->attrs, NULL, overwrite, data_set->now);
if (pe_node_attribute_raw(node, CRM_ATTR_SITE_NAME) == NULL) {
const char *site_name = pe_node_attribute_raw(node, "site-name");
if (site_name) {
g_hash_table_insert(node->details->attrs,
strdup(CRM_ATTR_SITE_NAME),
strdup(site_name));
} else if (cluster_name) {
/* Default to cluster-name if unset */
g_hash_table_insert(node->details->attrs,
strdup(CRM_ATTR_SITE_NAME),
strdup(cluster_name));
}
}
return TRUE;
}
static GListPtr
extract_operations(const char *node, const char *rsc, xmlNode * rsc_entry, gboolean active_filter)
{
int counter = -1;
int stop_index = -1;
int start_index = -1;
xmlNode *rsc_op = NULL;
GListPtr gIter = NULL;
GListPtr op_list = NULL;
GListPtr sorted_op_list = NULL;
/* extract operations */
op_list = NULL;
sorted_op_list = NULL;
for (rsc_op = __xml_first_child(rsc_entry); rsc_op != NULL; rsc_op = __xml_next_element(rsc_op)) {
if (crm_str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, TRUE)) {
crm_xml_add(rsc_op, "resource", rsc);
crm_xml_add(rsc_op, XML_ATTR_UNAME, node);
op_list = g_list_prepend(op_list, rsc_op);
}
}
if (op_list == NULL) {
/* if there are no operations, there is nothing to do */
return NULL;
}
sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
/* create active recurring operations as optional */
if (active_filter == FALSE) {
return sorted_op_list;
}
op_list = NULL;
calculate_active_ops(sorted_op_list, &start_index, &stop_index);
for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
xmlNode *rsc_op = (xmlNode *) gIter->data;
counter++;
if (start_index < stop_index) {
crm_trace("Skipping %s: not active", ID(rsc_entry));
break;
} else if (counter < start_index) {
crm_trace("Skipping %s: old", ID(rsc_op));
continue;
}
op_list = g_list_append(op_list, rsc_op);
}
g_list_free(sorted_op_list);
return op_list;
}
GListPtr
find_operations(const char *rsc, const char *node, gboolean active_filter,
pe_working_set_t * data_set)
{
GListPtr output = NULL;
GListPtr intermediate = NULL;
xmlNode *tmp = NULL;
xmlNode *status = find_xml_node(data_set->input, XML_CIB_TAG_STATUS, TRUE);
node_t *this_node = NULL;
xmlNode *node_state = NULL;
for (node_state = __xml_first_child(status); node_state != NULL;
node_state = __xml_next_element(node_state)) {
if (crm_str_eq((const char *)node_state->name, XML_CIB_TAG_STATE, TRUE)) {
const char *uname = crm_element_value(node_state, XML_ATTR_UNAME);
if (node != NULL && safe_str_neq(uname, node)) {
continue;
}
this_node = pe_find_node(data_set->nodes, uname);
if(this_node == NULL) {
CRM_LOG_ASSERT(this_node != NULL);
continue;
} else if (pe__is_guest_or_remote_node(this_node)) {
determine_remote_online_status(data_set, this_node);
} else {
determine_online_status(node_state, this_node, data_set);
}
if (this_node->details->online || is_set(data_set->flags, pe_flag_stonith_enabled)) {
/* offline nodes run no resources...
* unless stonith is enabled in which case we need to
* make sure rsc start events happen after the stonith
*/
xmlNode *lrm_rsc = NULL;
tmp = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE);
tmp = find_xml_node(tmp, XML_LRM_TAG_RESOURCES, FALSE);
for (lrm_rsc = __xml_first_child(tmp); lrm_rsc != NULL;
lrm_rsc = __xml_next_element(lrm_rsc)) {
if (crm_str_eq((const char *)lrm_rsc->name, XML_LRM_TAG_RESOURCE, TRUE)) {
const char *rsc_id = crm_element_value(lrm_rsc, XML_ATTR_ID);
if (rsc != NULL && safe_str_neq(rsc_id, rsc)) {
continue;
}
intermediate = extract_operations(uname, rsc_id, lrm_rsc, active_filter);
output = g_list_concat(output, intermediate);
}
}
}
}
}
return output;
}
diff --git a/lib/pengine/utils.c b/lib/pengine/utils.c
index a788309f6a..2f4dc1e84e 100644
--- a/lib/pengine/utils.c
+++ b/lib/pengine/utils.c
@@ -1,2417 +1,2418 @@
/*
* Copyright 2004-2019 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
extern xmlNode *get_object_root(const char *object_type, xmlNode * the_root);
void print_str_str(gpointer key, gpointer value, gpointer user_data);
gboolean ghash_free_str_str(gpointer key, gpointer value, gpointer user_data);
void unpack_operation(action_t * action, xmlNode * xml_obj, resource_t * container,
pe_working_set_t * data_set);
static xmlNode *find_rsc_op_entry_helper(resource_t * rsc, const char *key,
gboolean include_disabled);
#if ENABLE_VERSIONED_ATTRS
pe_rsc_action_details_t *
pe_rsc_action_details(pe_action_t *action)
{
pe_rsc_action_details_t *details;
CRM_CHECK(action != NULL, return NULL);
if (action->action_details == NULL) {
action->action_details = calloc(1, sizeof(pe_rsc_action_details_t));
CRM_CHECK(action->action_details != NULL, return NULL);
}
details = (pe_rsc_action_details_t *) action->action_details;
if (details->versioned_parameters == NULL) {
details->versioned_parameters = create_xml_node(NULL,
XML_TAG_OP_VER_ATTRS);
}
if (details->versioned_meta == NULL) {
details->versioned_meta = create_xml_node(NULL, XML_TAG_OP_VER_META);
}
return details;
}
static void
pe_free_rsc_action_details(pe_action_t *action)
{
pe_rsc_action_details_t *details;
if ((action == NULL) || (action->action_details == NULL)) {
return;
}
details = (pe_rsc_action_details_t *) action->action_details;
if (details->versioned_parameters) {
free_xml(details->versioned_parameters);
}
if (details->versioned_meta) {
free_xml(details->versioned_meta);
}
action->action_details = NULL;
}
#endif
/*!
* \internal
* \brief Check whether we can fence a particular node
*
* \param[in] data_set Working set for cluster
* \param[in] node Name of node to check
*
* \return TRUE if node can be fenced, FALSE otherwise
*
* \note This function should only be called for cluster nodes and
* remote nodes; guest nodes are fenced by stopping their container
* resource, so fence execution requirements do not apply to them.
*/
bool pe_can_fence(pe_working_set_t * data_set, node_t *node)
{
if(is_not_set(data_set->flags, pe_flag_stonith_enabled)) {
return FALSE; /* Turned off */
} else if (is_not_set(data_set->flags, pe_flag_have_stonith_resource)) {
return FALSE; /* No devices */
} else if (is_set(data_set->flags, pe_flag_have_quorum)) {
return TRUE;
} else if (data_set->no_quorum_policy == no_quorum_ignore) {
return TRUE;
} else if(node == NULL) {
return FALSE;
} else if(node->details->online) {
crm_notice("We can fence %s without quorum because they're in our membership", node->details->uname);
return TRUE;
}
crm_trace("Cannot fence %s", node->details->uname);
return FALSE;
}
node_t *
node_copy(const node_t *this_node)
{
node_t *new_node = NULL;
CRM_CHECK(this_node != NULL, return NULL);
new_node = calloc(1, sizeof(node_t));
CRM_ASSERT(new_node != NULL);
crm_trace("Copying %p (%s) to %p", this_node, this_node->details->uname, new_node);
new_node->rsc_discover_mode = this_node->rsc_discover_mode;
new_node->weight = this_node->weight;
new_node->fixed = this_node->fixed;
new_node->details = this_node->details;
return new_node;
}
/* any node in list1 or list2 and not in the other gets a score of -INFINITY */
void
node_list_exclude(GHashTable * hash, GListPtr list, gboolean merge_scores)
{
GHashTable *result = hash;
node_t *other_node = NULL;
GListPtr gIter = list;
GHashTableIter iter;
node_t *node = NULL;
g_hash_table_iter_init(&iter, hash);
while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
other_node = pe_find_node_id(list, node->details->id);
if (other_node == NULL) {
node->weight = -INFINITY;
} else if (merge_scores) {
node->weight = merge_weights(node->weight, other_node->weight);
}
}
for (; gIter != NULL; gIter = gIter->next) {
node_t *node = (node_t *) gIter->data;
other_node = pe_hash_table_lookup(result, node->details->id);
if (other_node == NULL) {
node_t *new_node = node_copy(node);
new_node->weight = -INFINITY;
g_hash_table_insert(result, (gpointer) new_node->details->id, new_node);
}
}
}
GHashTable *
node_hash_from_list(GListPtr list)
{
GListPtr gIter = list;
GHashTable *result = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL,
free);
for (; gIter != NULL; gIter = gIter->next) {
node_t *node = (node_t *) gIter->data;
node_t *n = node_copy(node);
g_hash_table_insert(result, (gpointer) n->details->id, n);
}
return result;
}
GListPtr
node_list_dup(GListPtr list1, gboolean reset, gboolean filter)
{
GListPtr result = NULL;
GListPtr gIter = list1;
for (; gIter != NULL; gIter = gIter->next) {
node_t *new_node = NULL;
node_t *this_node = (node_t *) gIter->data;
if (filter && this_node->weight < 0) {
continue;
}
new_node = node_copy(this_node);
if (reset) {
new_node->weight = 0;
}
if (new_node != NULL) {
result = g_list_prepend(result, new_node);
}
}
return result;
}
gint
sort_node_uname(gconstpointer a, gconstpointer b)
{
const char *name_a = ((const node_t *) a)->details->uname;
const char *name_b = ((const node_t *) b)->details->uname;
while (*name_a && *name_b) {
if (isdigit(*name_a) && isdigit(*name_b)) {
// If node names contain a number, sort numerically
char *end_a = NULL;
char *end_b = NULL;
long num_a = strtol(name_a, &end_a, 10);
long num_b = strtol(name_b, &end_b, 10);
// allow ordering e.g. 007 > 7
size_t len_a = end_a - name_a;
size_t len_b = end_b - name_b;
if (num_a < num_b) {
return -1;
} else if (num_a > num_b) {
return 1;
} else if (len_a < len_b) {
return -1;
} else if (len_a > len_b) {
return 1;
}
name_a = end_a;
name_b = end_b;
} else {
// Compare non-digits case-insensitively
int lower_a = tolower(*name_a);
int lower_b = tolower(*name_b);
if (lower_a < lower_b) {
return -1;
} else if (lower_a > lower_b) {
return 1;
}
++name_a;
++name_b;
}
}
if (!*name_a && *name_b) {
return -1;
} else if (*name_a && !*name_b) {
return 1;
}
return 0;
}
void
dump_node_scores_worker(int level, const char *file, const char *function, int line,
resource_t * rsc, const char *comment, GHashTable * nodes)
{
GHashTable *hash = nodes;
GHashTableIter iter;
node_t *node = NULL;
if (rsc) {
hash = rsc->allowed_nodes;
}
if (rsc && is_set(rsc->flags, pe_rsc_orphan)) {
/* Don't show the allocation scores for orphans */
return;
}
if (level == 0) {
char score[128];
int len = sizeof(score);
/* For now we want this in sorted order to keep the regression tests happy */
GListPtr gIter = NULL;
GListPtr list = g_hash_table_get_values(hash);
list = g_list_sort(list, sort_node_uname);
gIter = list;
for (; gIter != NULL; gIter = gIter->next) {
node_t *node = (node_t *) gIter->data;
/* This function is called a whole lot, use stack allocated score */
score2char_stack(node->weight, score, len);
if (rsc) {
printf("%s: %s allocation score on %s: %s\n",
comment, rsc->id, node->details->uname, score);
} else {
printf("%s: %s = %s\n", comment, node->details->uname, score);
}
}
g_list_free(list);
} else if (hash) {
char score[128];
int len = sizeof(score);
g_hash_table_iter_init(&iter, hash);
while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
/* This function is called a whole lot, use stack allocated score */
score2char_stack(node->weight, score, len);
if (rsc) {
do_crm_log_alias(LOG_TRACE, file, function, line,
"%s: %s allocation score on %s: %s", comment, rsc->id,
node->details->uname, score);
} else {
do_crm_log_alias(LOG_TRACE, file, function, line + 1, "%s: %s = %s", comment,
node->details->uname, score);
}
}
}
if (rsc && rsc->children) {
GListPtr gIter = NULL;
gIter = rsc->children;
for (; gIter != NULL; gIter = gIter->next) {
resource_t *child = (resource_t *) gIter->data;
dump_node_scores_worker(level, file, function, line, child, comment, nodes);
}
}
}
static void
append_dump_text(gpointer key, gpointer value, gpointer user_data)
{
char **dump_text = user_data;
char *new_text = crm_strdup_printf("%s %s=%s",
*dump_text, (char *)key, (char *)value);
free(*dump_text);
*dump_text = new_text;
}
void
dump_node_capacity(int level, const char *comment, node_t * node)
{
char *dump_text = crm_strdup_printf("%s: %s capacity:",
comment, node->details->uname);
g_hash_table_foreach(node->details->utilization, append_dump_text, &dump_text);
if (level == 0) {
fprintf(stdout, "%s\n", dump_text);
} else {
crm_trace("%s", dump_text);
}
free(dump_text);
}
void
dump_rsc_utilization(int level, const char *comment, resource_t * rsc, node_t * node)
{
char *dump_text = crm_strdup_printf("%s: %s utilization on %s:",
comment, rsc->id, node->details->uname);
g_hash_table_foreach(rsc->utilization, append_dump_text, &dump_text);
if (level == 0) {
fprintf(stdout, "%s\n", dump_text);
} else {
crm_trace("%s", dump_text);
}
free(dump_text);
}
gint
sort_rsc_index(gconstpointer a, gconstpointer b)
{
const resource_t *resource1 = (const resource_t *)a;
const resource_t *resource2 = (const resource_t *)b;
if (a == NULL && b == NULL) {
return 0;
}
if (a == NULL) {
return 1;
}
if (b == NULL) {
return -1;
}
if (resource1->sort_index > resource2->sort_index) {
return -1;
}
if (resource1->sort_index < resource2->sort_index) {
return 1;
}
return 0;
}
gint
sort_rsc_priority(gconstpointer a, gconstpointer b)
{
const resource_t *resource1 = (const resource_t *)a;
const resource_t *resource2 = (const resource_t *)b;
if (a == NULL && b == NULL) {
return 0;
}
if (a == NULL) {
return 1;
}
if (b == NULL) {
return -1;
}
if (resource1->priority > resource2->priority) {
return -1;
}
if (resource1->priority < resource2->priority) {
return 1;
}
return 0;
}
action_t *
custom_action(resource_t * rsc, char *key, const char *task,
node_t * on_node, gboolean optional, gboolean save_action,
pe_working_set_t * data_set)
{
action_t *action = NULL;
GListPtr possible_matches = NULL;
CRM_CHECK(key != NULL, return NULL);
CRM_CHECK(task != NULL, free(key); return NULL);
if (save_action && rsc != NULL) {
possible_matches = find_actions(rsc->actions, key, on_node);
} else if(save_action) {
#if 0
action = g_hash_table_lookup(data_set->singletons, key);
#else
/* More expensive but takes 'node' into account */
possible_matches = find_actions(data_set->actions, key, on_node);
#endif
}
if(data_set->singletons == NULL) {
data_set->singletons = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, NULL);
}
if (possible_matches != NULL) {
if (g_list_length(possible_matches) > 1) {
pe_warn("Action %s for %s on %s exists %d times",
task, rsc ? rsc->id : "",
on_node ? on_node->details->uname : "", g_list_length(possible_matches));
}
action = g_list_nth_data(possible_matches, 0);
pe_rsc_trace(rsc, "Found existing action %d (%s) for %s (%s) on %s",
action->id, action->uuid,
(rsc? rsc->id : "no resource"), task,
(on_node? on_node->details->uname : "no node"));
g_list_free(possible_matches);
}
if (action == NULL) {
if (save_action) {
pe_rsc_trace(rsc, "Creating %s action %d: %s for %s (%s) on %s",
(optional? "optional" : " mandatory"),
data_set->action_id, key,
(rsc? rsc->id : "no resource"), task,
(on_node? on_node->details->uname : "no node"));
}
action = calloc(1, sizeof(action_t));
if (save_action) {
action->id = data_set->action_id++;
} else {
action->id = 0;
}
action->rsc = rsc;
CRM_ASSERT(task != NULL);
action->task = strdup(task);
if (on_node) {
action->node = node_copy(on_node);
}
action->uuid = strdup(key);
pe_set_action_bit(action, pe_action_runnable);
if (optional) {
pe_set_action_bit(action, pe_action_optional);
} else {
pe_clear_action_bit(action, pe_action_optional);
}
action->extra = crm_str_table_new();
action->meta = crm_str_table_new();
if (save_action) {
data_set->actions = g_list_prepend(data_set->actions, action);
if(rsc == NULL) {
g_hash_table_insert(data_set->singletons, action->uuid, action);
}
}
if (rsc != NULL) {
action->op_entry = find_rsc_op_entry_helper(rsc, key, TRUE);
unpack_operation(action, action->op_entry, rsc->container, data_set);
if (save_action) {
rsc->actions = g_list_prepend(rsc->actions, action);
}
}
if (save_action) {
pe_rsc_trace(rsc, "Action %d created", action->id);
}
}
if (!optional && is_set(action->flags, pe_action_optional)) {
pe_rsc_trace(rsc, "Unset optional on action %d", action->id);
pe_clear_action_bit(action, pe_action_optional);
}
if (rsc != NULL) {
enum action_tasks a_task = text2task(action->task);
int warn_level = LOG_TRACE;
if (save_action) {
warn_level = LOG_WARNING;
}
if (is_set(action->flags, pe_action_have_node_attrs) == FALSE
&& action->node != NULL && action->op_entry != NULL) {
pe_set_action_bit(action, pe_action_have_node_attrs);
unpack_instance_attributes(data_set->input, action->op_entry, XML_TAG_ATTR_SETS,
action->node->details->attrs,
action->extra, NULL, FALSE, data_set->now);
}
if (is_set(action->flags, pe_action_pseudo)) {
/* leave untouched */
} else if (action->node == NULL) {
pe_rsc_trace(rsc, "Unset runnable on %s", action->uuid);
pe_clear_action_bit(action, pe_action_runnable);
} else if (is_not_set(rsc->flags, pe_rsc_managed)
&& g_hash_table_lookup(action->meta,
XML_LRM_ATTR_INTERVAL_MS) == NULL) {
crm_debug("Action %s (unmanaged)", action->uuid);
pe_rsc_trace(rsc, "Set optional on %s", action->uuid);
pe_set_action_bit(action, pe_action_optional);
/* action->runnable = FALSE; */
} else if (action->node->details->online == FALSE
&& (!pe__is_guest_node(action->node)
|| action->node->details->remote_requires_reset)) {
pe_clear_action_bit(action, pe_action_runnable);
do_crm_log(warn_level, "Action %s on %s is unrunnable (offline)",
action->uuid, action->node->details->uname);
if (is_set(action->rsc->flags, pe_rsc_managed)
&& save_action && a_task == stop_rsc
&& action->node->details->unclean == FALSE) {
pe_fence_node(data_set, action->node, "resource actions are unrunnable");
}
} else if (action->node->details->pending) {
pe_clear_action_bit(action, pe_action_runnable);
do_crm_log(warn_level, "Action %s on %s is unrunnable (pending)",
action->uuid, action->node->details->uname);
} else if (action->needs == rsc_req_nothing) {
pe_rsc_trace(rsc, "Action %s does not require anything", action->uuid);
pe_action_set_reason(action, NULL, TRUE);
pe_set_action_bit(action, pe_action_runnable);
#if 0
/*
* No point checking this
* - if we don't have quorum we can't stonith anyway
*/
} else if (action->needs == rsc_req_stonith) {
crm_trace("Action %s requires only stonith", action->uuid);
action->runnable = TRUE;
#endif
} else if (is_set(data_set->flags, pe_flag_have_quorum) == FALSE
&& data_set->no_quorum_policy == no_quorum_stop) {
pe_action_set_flag_reason(__FUNCTION__, __LINE__, action, NULL, "no quorum", pe_action_runnable, TRUE);
crm_debug("%s\t%s (cancelled : quorum)", action->node->details->uname, action->uuid);
} else if (is_set(data_set->flags, pe_flag_have_quorum) == FALSE
&& data_set->no_quorum_policy == no_quorum_freeze) {
pe_rsc_trace(rsc, "Check resource is already active: %s %s %s %s", rsc->id, action->uuid, role2text(rsc->next_role), role2text(rsc->role));
if (rsc->fns->active(rsc, TRUE) == FALSE || rsc->next_role > rsc->role) {
pe_action_set_flag_reason(__FUNCTION__, __LINE__, action, NULL, "quorum freeze", pe_action_runnable, TRUE);
pe_rsc_debug(rsc, "%s\t%s (cancelled : quorum freeze)",
action->node->details->uname, action->uuid);
}
} else if(is_not_set(action->flags, pe_action_runnable)) {
pe_rsc_trace(rsc, "Action %s is runnable", action->uuid);
//pe_action_set_reason(action, NULL, TRUE);
pe_set_action_bit(action, pe_action_runnable);
}
if (save_action) {
switch (a_task) {
case stop_rsc:
set_bit(rsc->flags, pe_rsc_stopping);
break;
case start_rsc:
clear_bit(rsc->flags, pe_rsc_starting);
if (is_set(action->flags, pe_action_runnable)) {
set_bit(rsc->flags, pe_rsc_starting);
}
break;
default:
break;
}
}
}
free(key);
return action;
}
static const char *
unpack_operation_on_fail(action_t * action)
{
const char *value = g_hash_table_lookup(action->meta, XML_OP_ATTR_ON_FAIL);
if (safe_str_eq(action->task, CRMD_ACTION_STOP) && safe_str_eq(value, "standby")) {
crm_config_err("on-fail=standby is not allowed for stop actions: %s", action->rsc->id);
return NULL;
} else if (safe_str_eq(action->task, CRMD_ACTION_DEMOTE) && !value) {
/* demote on_fail defaults to master monitor value if present */
xmlNode *operation = NULL;
const char *name = NULL;
const char *role = NULL;
const char *on_fail = NULL;
const char *interval_spec = NULL;
const char *enabled = NULL;
CRM_CHECK(action->rsc != NULL, return NULL);
for (operation = __xml_first_child(action->rsc->ops_xml);
operation && !value; operation = __xml_next_element(operation)) {
if (!crm_str_eq((const char *)operation->name, "op", TRUE)) {
continue;
}
name = crm_element_value(operation, "name");
role = crm_element_value(operation, "role");
on_fail = crm_element_value(operation, XML_OP_ATTR_ON_FAIL);
enabled = crm_element_value(operation, "enabled");
interval_spec = crm_element_value(operation, XML_LRM_ATTR_INTERVAL);
if (!on_fail) {
continue;
} else if (enabled && !crm_is_true(enabled)) {
continue;
} else if (safe_str_neq(name, "monitor") || safe_str_neq(role, "Master")) {
continue;
} else if (crm_parse_interval_spec(interval_spec) == 0) {
continue;
}
value = on_fail;
}
}
return value;
}
static xmlNode *
find_min_interval_mon(resource_t * rsc, gboolean include_disabled)
{
guint interval_ms = 0;
guint min_interval_ms = G_MAXUINT;
const char *name = NULL;
const char *value = NULL;
const char *interval_spec = NULL;
xmlNode *op = NULL;
xmlNode *operation = NULL;
for (operation = __xml_first_child(rsc->ops_xml); operation != NULL;
operation = __xml_next_element(operation)) {
if (crm_str_eq((const char *)operation->name, "op", TRUE)) {
name = crm_element_value(operation, "name");
interval_spec = crm_element_value(operation, XML_LRM_ATTR_INTERVAL);
value = crm_element_value(operation, "enabled");
if (!include_disabled && value && crm_is_true(value) == FALSE) {
continue;
}
if (safe_str_neq(name, RSC_STATUS)) {
continue;
}
interval_ms = crm_parse_interval_spec(interval_spec);
if (interval_ms && (interval_ms < min_interval_ms)) {
min_interval_ms = interval_ms;
op = operation;
}
}
}
return op;
}
static int
unpack_start_delay(const char *value, GHashTable *meta)
{
int start_delay = 0;
if (value != NULL) {
start_delay = crm_get_msec(value);
if (start_delay < 0) {
start_delay = 0;
}
if (meta) {
g_hash_table_replace(meta, strdup(XML_OP_ATTR_START_DELAY), crm_itoa(start_delay));
}
}
return start_delay;
}
static int
unpack_interval_origin(const char *value, GHashTable *meta, xmlNode *xml_obj,
guint interval_ms, crm_time_t *now)
{
int start_delay = 0;
if ((interval_ms > 0) && (value != NULL)) {
crm_time_t *origin = crm_time_new(value);
if (origin && now) {
crm_time_t *delay = NULL;
int rc = crm_time_compare(origin, now);
long long delay_s = 0;
int interval_sec = interval_ms / 1000;
crm_trace("Origin: %s, interval: %d", value, interval_sec);
/* If 'origin' is in the future, find the most recent "multiple" that occurred in the past */
while(rc > 0) {
crm_time_add_seconds(origin, -interval_sec);
rc = crm_time_compare(origin, now);
}
/* Now find the first "multiple" that occurs after 'now' */
while (rc < 0) {
crm_time_add_seconds(origin, interval_sec);
rc = crm_time_compare(origin, now);
}
delay = crm_time_calculate_duration(origin, now);
crm_time_log(LOG_TRACE, "origin", origin,
crm_time_log_date | crm_time_log_timeofday |
crm_time_log_with_timezone);
crm_time_log(LOG_TRACE, "now", now,
crm_time_log_date | crm_time_log_timeofday |
crm_time_log_with_timezone);
crm_time_log(LOG_TRACE, "delay", delay, crm_time_log_duration);
delay_s = crm_time_get_seconds(delay);
if (delay_s < 0) {
delay_s = 0;
}
start_delay = delay_s * 1000;
if (xml_obj) {
crm_info("Calculated a start delay of %llds for %s", delay_s, ID(xml_obj));
}
if (meta) {
g_hash_table_replace(meta, strdup(XML_OP_ATTR_START_DELAY),
crm_itoa(start_delay));
}
crm_time_free(origin);
crm_time_free(delay);
} else if (!origin && xml_obj) {
crm_config_err("Operation %s contained an invalid " XML_OP_ATTR_ORIGIN ": %s",
ID(xml_obj), value);
}
}
return start_delay;
}
static int
unpack_timeout(const char *value)
{
int timeout = crm_get_msec(value);
if (timeout < 0) {
timeout = crm_get_msec(CRM_DEFAULT_OP_TIMEOUT_S);
}
return timeout;
}
int
pe_get_configured_timeout(resource_t *rsc, const char *action, pe_working_set_t *data_set)
{
xmlNode *child = NULL;
const char *timeout = NULL;
int timeout_ms = 0;
for (child = first_named_child(rsc->ops_xml, XML_ATTR_OP);
child != NULL; child = crm_next_same_xml(child)) {
if (safe_str_eq(action, crm_element_value(child, XML_NVPAIR_ATTR_NAME))) {
timeout = crm_element_value(child, XML_ATTR_TIMEOUT);
break;
}
}
if (timeout == NULL && data_set->op_defaults) {
GHashTable *action_meta = crm_str_table_new();
unpack_instance_attributes(data_set->input, data_set->op_defaults, XML_TAG_META_SETS,
NULL, action_meta, NULL, FALSE, data_set->now);
timeout = g_hash_table_lookup(action_meta, XML_ATTR_TIMEOUT);
}
// @TODO check meta-attributes (including versioned meta-attributes)
// @TODO maybe use min-interval monitor timeout as default for monitors
timeout_ms = crm_get_msec(timeout);
if (timeout_ms < 0) {
timeout_ms = crm_get_msec(CRM_DEFAULT_OP_TIMEOUT_S);
}
return timeout_ms;
}
#if ENABLE_VERSIONED_ATTRS
static void
unpack_versioned_meta(xmlNode *versioned_meta, xmlNode *xml_obj,
guint interval_ms, crm_time_t *now)
{
xmlNode *attrs = NULL;
xmlNode *attr = NULL;
for (attrs = __xml_first_child(versioned_meta); attrs != NULL; attrs = __xml_next_element(attrs)) {
for (attr = __xml_first_child(attrs); attr != NULL; attr = __xml_next_element(attr)) {
const char *name = crm_element_value(attr, XML_NVPAIR_ATTR_NAME);
const char *value = crm_element_value(attr, XML_NVPAIR_ATTR_VALUE);
if (safe_str_eq(name, XML_OP_ATTR_START_DELAY)) {
int start_delay = unpack_start_delay(value, NULL);
crm_xml_add_int(attr, XML_NVPAIR_ATTR_VALUE, start_delay);
} else if (safe_str_eq(name, XML_OP_ATTR_ORIGIN)) {
int start_delay = unpack_interval_origin(value, NULL, xml_obj,
interval_ms, now);
crm_xml_add(attr, XML_NVPAIR_ATTR_NAME, XML_OP_ATTR_START_DELAY);
crm_xml_add_int(attr, XML_NVPAIR_ATTR_VALUE, start_delay);
} else if (safe_str_eq(name, XML_ATTR_TIMEOUT)) {
int timeout = unpack_timeout(value);
crm_xml_add_int(attr, XML_NVPAIR_ATTR_VALUE, timeout);
}
}
}
}
#endif
/*!
* \brief Unpack operation XML into an action structure
*
* Unpack an operation's meta-attributes (normalizing the interval, timeout,
* and start delay values as integer milliseconds), requirements, and
* failure policy.
*
* \param[in,out] action Action to unpack into
* \param[in] xml_obj Operation XML (or NULL if all defaults)
* \param[in] container Resource that contains affected resource, if any
* \param[in] data_set Cluster state
*/
void
unpack_operation(action_t * action, xmlNode * xml_obj, resource_t * container,
pe_working_set_t * data_set)
{
guint interval_ms = 0;
int timeout = 0;
char *value_ms = NULL;
const char *value = NULL;
const char *field = NULL;
char *default_timeout = NULL;
#if ENABLE_VERSIONED_ATTRS
pe_rsc_action_details_t *rsc_details = NULL;
#endif
CRM_CHECK(action && action->rsc, return);
// Cluster-wide
unpack_instance_attributes(data_set->input, data_set->op_defaults, XML_TAG_META_SETS, NULL,
action->meta, NULL, FALSE, data_set->now);
// Probe timeouts default differently, so handle timeout default later
default_timeout = g_hash_table_lookup(action->meta, XML_ATTR_TIMEOUT);
if (default_timeout) {
default_timeout = strdup(default_timeout);
g_hash_table_remove(action->meta, XML_ATTR_TIMEOUT);
}
if (xml_obj) {
xmlAttrPtr xIter = NULL;
// take precedence over defaults
unpack_instance_attributes(data_set->input, xml_obj, XML_TAG_META_SETS,
NULL, action->meta, NULL, TRUE,
data_set->now);
#if ENABLE_VERSIONED_ATTRS
rsc_details = pe_rsc_action_details(action);
pe_unpack_versioned_attributes(data_set->input, xml_obj,
XML_TAG_ATTR_SETS, NULL,
rsc_details->versioned_parameters,
data_set->now);
pe_unpack_versioned_attributes(data_set->input, xml_obj,
XML_TAG_META_SETS, NULL,
rsc_details->versioned_meta,
data_set->now);
#endif
/* Anything set as an XML property has highest precedence.
* This ensures we use the name and interval from the tag.
*/
for (xIter = xml_obj->properties; xIter; xIter = xIter->next) {
const char *prop_name = (const char *)xIter->name;
const char *prop_value = crm_element_value(xml_obj, prop_name);
g_hash_table_replace(action->meta, strdup(prop_name), strdup(prop_value));
}
}
g_hash_table_remove(action->meta, "id");
// Normalize interval to milliseconds
field = XML_LRM_ATTR_INTERVAL;
value = g_hash_table_lookup(action->meta, field);
if (value != NULL) {
interval_ms = crm_parse_interval_spec(value);
} else if ((xml_obj == NULL) && !strcmp(action->task, RSC_STATUS)) {
/* An orphaned recurring monitor will not have any XML. However, we
* want the interval to be set, so the action can be properly detected
* as a recurring monitor. Parse it from the key in this case.
*/
parse_op_key(action->uuid, NULL, NULL, &interval_ms);
}
if (interval_ms > 0) {
value_ms = crm_strdup_printf("%u", interval_ms);
g_hash_table_replace(action->meta, strdup(field), value_ms);
} else if (value) {
g_hash_table_remove(action->meta, field);
}
// Handle timeout default, now that we know the interval
if (g_hash_table_lookup(action->meta, XML_ATTR_TIMEOUT)) {
free(default_timeout);
} else {
// Probe timeouts default to minimum-interval monitor's
if (safe_str_eq(action->task, RSC_STATUS) && (interval_ms == 0)) {
xmlNode *min_interval_mon = find_min_interval_mon(action->rsc, FALSE);
if (min_interval_mon) {
value = crm_element_value(min_interval_mon, XML_ATTR_TIMEOUT);
if (value) {
crm_trace("\t%s defaults to minimum-interval monitor's timeout '%s'",
action->uuid, value);
free(default_timeout);
default_timeout = strdup(value);
}
}
}
if (default_timeout) {
g_hash_table_insert(action->meta, strdup(XML_ATTR_TIMEOUT),
default_timeout);
}
}
if (safe_str_neq(action->task, RSC_START)
&& safe_str_neq(action->task, RSC_PROMOTE)) {
action->needs = rsc_req_nothing;
value = "nothing (not start/promote)";
} else if (is_set(action->rsc->flags, pe_rsc_needs_fencing)) {
action->needs = rsc_req_stonith;
value = "fencing (resource)";
} else if (is_set(action->rsc->flags, pe_rsc_needs_quorum)) {
action->needs = rsc_req_quorum;
value = "quorum (resource)";
} else {
action->needs = rsc_req_nothing;
value = "nothing (resource)";
}
pe_rsc_trace(action->rsc, "\tAction %s requires: %s", action->uuid, value);
value = unpack_operation_on_fail(action);
if (value == NULL) {
} else if (safe_str_eq(value, "block")) {
action->on_fail = action_fail_block;
g_hash_table_insert(action->meta, strdup(XML_OP_ATTR_ON_FAIL), strdup("block"));
value = "block"; // The above could destroy the original string
} else if (safe_str_eq(value, "fence")) {
action->on_fail = action_fail_fence;
value = "node fencing";
if (is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE) {
crm_config_err("Specifying on_fail=fence and" " stonith-enabled=false makes no sense");
action->on_fail = action_fail_stop;
action->fail_role = RSC_ROLE_STOPPED;
value = "stop resource";
}
} else if (safe_str_eq(value, "standby")) {
action->on_fail = action_fail_standby;
value = "node standby";
} else if (safe_str_eq(value, "ignore")
|| safe_str_eq(value, "nothing")) {
action->on_fail = action_fail_ignore;
value = "ignore";
} else if (safe_str_eq(value, "migrate")) {
action->on_fail = action_fail_migrate;
value = "force migration";
} else if (safe_str_eq(value, "stop")) {
action->on_fail = action_fail_stop;
action->fail_role = RSC_ROLE_STOPPED;
value = "stop resource";
} else if (safe_str_eq(value, "restart")) {
action->on_fail = action_fail_recover;
value = "restart (and possibly migrate)";
} else if (safe_str_eq(value, "restart-container")) {
if (container) {
action->on_fail = action_fail_restart_container;
value = "restart container (and possibly migrate)";
} else {
value = NULL;
}
} else {
pe_err("Resource %s: Unknown failure type (%s)", action->rsc->id, value);
value = NULL;
}
/* defaults */
if (value == NULL && container) {
action->on_fail = action_fail_restart_container;
value = "restart container (and possibly migrate) (default)";
/* For remote nodes, ensure that any failure that results in dropping an
* active connection to the node results in fencing of the node.
*
* There are only two action failures that don't result in fencing.
* 1. probes - probe failures are expected.
* 2. start - a start failure indicates that an active connection does not already
* exist. The user can set op on-fail=fence if they really want to fence start
* failures. */
} else if (((value == NULL) || !is_set(action->rsc->flags, pe_rsc_managed)) &&
(pe__resource_is_remote_conn(action->rsc, data_set) &&
!(safe_str_eq(action->task, CRMD_ACTION_STATUS) && (interval_ms == 0)) &&
(safe_str_neq(action->task, CRMD_ACTION_START)))) {
if (!is_set(action->rsc->flags, pe_rsc_managed)) {
action->on_fail = action_fail_stop;
action->fail_role = RSC_ROLE_STOPPED;
value = "stop unmanaged remote node (enforcing default)";
} else {
if (is_set(data_set->flags, pe_flag_stonith_enabled)) {
value = "fence remote node (default)";
} else {
value = "recover remote node connection (default)";
}
if (action->rsc->remote_reconnect_ms) {
action->fail_role = RSC_ROLE_STOPPED;
}
action->on_fail = action_fail_reset_remote;
}
} else if (value == NULL && safe_str_eq(action->task, CRMD_ACTION_STOP)) {
if (is_set(data_set->flags, pe_flag_stonith_enabled)) {
action->on_fail = action_fail_fence;
value = "resource fence (default)";
} else {
action->on_fail = action_fail_block;
value = "resource block (default)";
}
} else if (value == NULL) {
action->on_fail = action_fail_recover;
value = "restart (and possibly migrate) (default)";
}
pe_rsc_trace(action->rsc, "\t%s failure handling: %s", action->task, value);
value = NULL;
if (xml_obj != NULL) {
value = g_hash_table_lookup(action->meta, "role_after_failure");
if (value) {
pe_warn_once(pe_wo_role_after,
"Support for role_after_failure is deprecated and will be removed in a future release");
}
}
if (value != NULL && action->fail_role == RSC_ROLE_UNKNOWN) {
action->fail_role = text2role(value);
}
/* defaults */
if (action->fail_role == RSC_ROLE_UNKNOWN) {
if (safe_str_eq(action->task, CRMD_ACTION_PROMOTE)) {
action->fail_role = RSC_ROLE_SLAVE;
} else {
action->fail_role = RSC_ROLE_STARTED;
}
}
pe_rsc_trace(action->rsc, "\t%s failure results in: %s", action->task,
role2text(action->fail_role));
value = g_hash_table_lookup(action->meta, XML_OP_ATTR_START_DELAY);
if (value) {
unpack_start_delay(value, action->meta);
} else {
value = g_hash_table_lookup(action->meta, XML_OP_ATTR_ORIGIN);
unpack_interval_origin(value, action->meta, xml_obj, interval_ms,
data_set->now);
}
value = g_hash_table_lookup(action->meta, XML_ATTR_TIMEOUT);
timeout = unpack_timeout(value);
g_hash_table_replace(action->meta, strdup(XML_ATTR_TIMEOUT), crm_itoa(timeout));
#if ENABLE_VERSIONED_ATTRS
unpack_versioned_meta(rsc_details->versioned_meta, xml_obj, interval_ms,
data_set->now);
#endif
}
static xmlNode *
find_rsc_op_entry_helper(resource_t * rsc, const char *key, gboolean include_disabled)
{
guint interval_ms = 0;
gboolean do_retry = TRUE;
char *local_key = NULL;
const char *name = NULL;
const char *value = NULL;
const char *interval_spec = NULL;
char *match_key = NULL;
xmlNode *op = NULL;
xmlNode *operation = NULL;
retry:
for (operation = __xml_first_child(rsc->ops_xml); operation != NULL;
operation = __xml_next_element(operation)) {
if (crm_str_eq((const char *)operation->name, "op", TRUE)) {
name = crm_element_value(operation, "name");
interval_spec = crm_element_value(operation, XML_LRM_ATTR_INTERVAL);
value = crm_element_value(operation, "enabled");
if (!include_disabled && value && crm_is_true(value) == FALSE) {
continue;
}
interval_ms = crm_parse_interval_spec(interval_spec);
match_key = generate_op_key(rsc->id, name, interval_ms);
if (safe_str_eq(key, match_key)) {
op = operation;
}
free(match_key);
if (rsc->clone_name) {
match_key = generate_op_key(rsc->clone_name, name, interval_ms);
if (safe_str_eq(key, match_key)) {
op = operation;
}
free(match_key);
}
if (op != NULL) {
free(local_key);
return op;
}
}
}
free(local_key);
if (do_retry == FALSE) {
return NULL;
}
do_retry = FALSE;
if (strstr(key, CRMD_ACTION_MIGRATE) || strstr(key, CRMD_ACTION_MIGRATED)) {
local_key = generate_op_key(rsc->id, "migrate", 0);
key = local_key;
goto retry;
} else if (strstr(key, "_notify_")) {
local_key = generate_op_key(rsc->id, "notify", 0);
key = local_key;
goto retry;
}
return NULL;
}
xmlNode *
find_rsc_op_entry(resource_t * rsc, const char *key)
{
return find_rsc_op_entry_helper(rsc, key, FALSE);
}
void
print_node(const char *pre_text, node_t * node, gboolean details)
{
if (node == NULL) {
crm_trace("%s%s: ", pre_text == NULL ? "" : pre_text, pre_text == NULL ? "" : ": ");
return;
}
CRM_ASSERT(node->details);
crm_trace("%s%s%sNode %s: (weight=%d, fixed=%s)",
pre_text == NULL ? "" : pre_text,
pre_text == NULL ? "" : ": ",
node->details->online ? "" : "Unavailable/Unclean ",
node->details->uname, node->weight, node->fixed ? "True" : "False");
if (details) {
char *pe_mutable = strdup("\t\t");
GListPtr gIter = node->details->running_rsc;
crm_trace("\t\t===Node Attributes");
g_hash_table_foreach(node->details->attrs, print_str_str, pe_mutable);
free(pe_mutable);
crm_trace("\t\t=== Resources");
for (; gIter != NULL; gIter = gIter->next) {
resource_t *rsc = (resource_t *) gIter->data;
print_resource(LOG_TRACE, "\t\t", rsc, FALSE);
}
}
}
/*
* Used by the HashTable for-loop
*/
void
print_str_str(gpointer key, gpointer value, gpointer user_data)
{
crm_trace("%s%s %s ==> %s",
user_data == NULL ? "" : (char *)user_data,
user_data == NULL ? "" : ": ", (char *)key, (char *)value);
}
void
print_resource(int log_level, const char *pre_text, resource_t * rsc, gboolean details)
{
long options = pe_print_log | pe_print_pending;
if (rsc == NULL) {
do_crm_log(log_level - 1, "%s%s: ",
pre_text == NULL ? "" : pre_text, pre_text == NULL ? "" : ": ");
return;
}
if (details) {
options |= pe_print_details;
}
rsc->fns->print(rsc, pre_text, options, &log_level);
}
void
pe_free_action(action_t * action)
{
if (action == NULL) {
return;
}
g_list_free_full(action->actions_before, free); /* action_wrapper_t* */
g_list_free_full(action->actions_after, free); /* action_wrapper_t* */
if (action->extra) {
g_hash_table_destroy(action->extra);
}
if (action->meta) {
g_hash_table_destroy(action->meta);
}
#if ENABLE_VERSIONED_ATTRS
if (action->rsc) {
pe_free_rsc_action_details(action);
}
#endif
free(action->cancel_task);
free(action->reason);
free(action->task);
free(action->uuid);
free(action->node);
free(action);
}
GListPtr
find_recurring_actions(GListPtr input, node_t * not_on_node)
{
const char *value = NULL;
GListPtr result = NULL;
GListPtr gIter = input;
CRM_CHECK(input != NULL, return NULL);
for (; gIter != NULL; gIter = gIter->next) {
action_t *action = (action_t *) gIter->data;
value = g_hash_table_lookup(action->meta, XML_LRM_ATTR_INTERVAL_MS);
if (value == NULL) {
/* skip */
} else if (safe_str_eq(value, "0")) {
/* skip */
} else if (safe_str_eq(CRMD_ACTION_CANCEL, action->task)) {
/* skip */
} else if (not_on_node == NULL) {
crm_trace("(null) Found: %s", action->uuid);
result = g_list_prepend(result, action);
} else if (action->node == NULL) {
/* skip */
} else if (action->node->details != not_on_node->details) {
crm_trace("Found: %s", action->uuid);
result = g_list_prepend(result, action);
}
}
return result;
}
enum action_tasks
get_complex_task(resource_t * rsc, const char *name, gboolean allow_non_atomic)
{
enum action_tasks task = text2task(name);
if (rsc == NULL) {
return task;
} else if (allow_non_atomic == FALSE || rsc->variant == pe_native) {
switch (task) {
case stopped_rsc:
case started_rsc:
case action_demoted:
case action_promoted:
crm_trace("Folding %s back into its atomic counterpart for %s", name, rsc->id);
return task - 1;
break;
default:
break;
}
}
return task;
}
action_t *
find_first_action(GListPtr input, const char *uuid, const char *task, node_t * on_node)
{
GListPtr gIter = NULL;
CRM_CHECK(uuid || task, return NULL);
for (gIter = input; gIter != NULL; gIter = gIter->next) {
action_t *action = (action_t *) gIter->data;
if (uuid != NULL && safe_str_neq(uuid, action->uuid)) {
continue;
} else if (task != NULL && safe_str_neq(task, action->task)) {
continue;
} else if (on_node == NULL) {
return action;
} else if (action->node == NULL) {
continue;
} else if (on_node->details == action->node->details) {
return action;
}
}
return NULL;
}
GListPtr
find_actions(GListPtr input, const char *key, const node_t *on_node)
{
GListPtr gIter = input;
GListPtr result = NULL;
CRM_CHECK(key != NULL, return NULL);
for (; gIter != NULL; gIter = gIter->next) {
action_t *action = (action_t *) gIter->data;
if (safe_str_neq(key, action->uuid)) {
crm_trace("%s does not match action %s", key, action->uuid);
continue;
} else if (on_node == NULL) {
crm_trace("Action %s matches (ignoring node)", key);
result = g_list_prepend(result, action);
} else if (action->node == NULL) {
crm_trace("Action %s matches (unallocated, assigning to %s)",
key, on_node->details->uname);
action->node = node_copy(on_node);
result = g_list_prepend(result, action);
} else if (on_node->details == action->node->details) {
crm_trace("Action %s on %s matches", key, on_node->details->uname);
result = g_list_prepend(result, action);
} else {
crm_trace("Action %s on node %s does not match requested node %s",
key, action->node->details->uname,
on_node->details->uname);
}
}
return result;
}
GList *
find_actions_exact(GList *input, const char *key, const pe_node_t *on_node)
{
GList *result = NULL;
CRM_CHECK(key != NULL, return NULL);
if (on_node == NULL) {
crm_trace("Not searching for action %s because node not specified",
key);
return NULL;
}
for (GList *gIter = input; gIter != NULL; gIter = gIter->next) {
pe_action_t *action = (pe_action_t *) gIter->data;
if (action->node == NULL) {
crm_trace("Skipping comparison of %s vs action %s without node",
key, action->uuid);
} else if (safe_str_neq(key, action->uuid)) {
crm_trace("Desired action %s doesn't match %s", key, action->uuid);
} else if (safe_str_neq(on_node->details->id,
action->node->details->id)) {
crm_trace("Action %s desired node ID %s doesn't match %s",
key, on_node->details->id, action->node->details->id);
} else {
crm_trace("Action %s matches", key);
result = g_list_prepend(result, action);
}
}
return result;
}
/*!
* \brief Find all actions of given type for a resource
*
* \param[in] rsc Resource to search
* \param[in] node Find only actions scheduled on this node
* \param[in] task Action name to search for
* \param[in] require_node If TRUE, NULL node or action node will not match
*
* \return List of actions found (or NULL if none)
* \note If node is not NULL and require_node is FALSE, matching actions
* without a node will be assigned to node.
*/
GList *
pe__resource_actions(const pe_resource_t *rsc, const pe_node_t *node,
const char *task, bool require_node)
{
GList *result = NULL;
char *key = generate_op_key(rsc->id, task, 0);
if (require_node) {
result = find_actions_exact(rsc->actions, key, node);
} else {
result = find_actions(rsc->actions, key, node);
}
free(key);
return result;
}
static void
resource_node_score(resource_t * rsc, node_t * node, int score, const char *tag)
{
node_t *match = NULL;
if ((rsc->exclusive_discover || (node->rsc_discover_mode == pe_discover_never))
&& safe_str_eq(tag, "symmetric_default")) {
/* This string comparision may be fragile, but exclusive resources and
* exclusive nodes should not have the symmetric_default constraint
* applied to them.
*/
return;
} else if (rsc->children) {
GListPtr gIter = rsc->children;
for (; gIter != NULL; gIter = gIter->next) {
resource_t *child_rsc = (resource_t *) gIter->data;
resource_node_score(child_rsc, node, score, tag);
}
}
pe_rsc_trace(rsc, "Setting %s for %s on %s: %d", tag, rsc->id, node->details->uname, score);
match = pe_hash_table_lookup(rsc->allowed_nodes, node->details->id);
if (match == NULL) {
match = node_copy(node);
g_hash_table_insert(rsc->allowed_nodes, (gpointer) match->details->id, match);
}
match->weight = merge_weights(match->weight, score);
}
void
resource_location(resource_t * rsc, node_t * node, int score, const char *tag,
pe_working_set_t * data_set)
{
if (node != NULL) {
resource_node_score(rsc, node, score, tag);
} else if (data_set != NULL) {
GListPtr gIter = data_set->nodes;
for (; gIter != NULL; gIter = gIter->next) {
node_t *node_iter = (node_t *) gIter->data;
resource_node_score(rsc, node_iter, score, tag);
}
} else {
GHashTableIter iter;
node_t *node_iter = NULL;
g_hash_table_iter_init(&iter, rsc->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **)&node_iter)) {
resource_node_score(rsc, node_iter, score, tag);
}
}
if (node == NULL && score == -INFINITY) {
if (rsc->allocated_to) {
crm_info("Deallocating %s from %s", rsc->id, rsc->allocated_to->details->uname);
free(rsc->allocated_to);
rsc->allocated_to = NULL;
}
}
}
#define sort_return(an_int, why) do { \
free(a_uuid); \
free(b_uuid); \
crm_trace("%s (%d) %c %s (%d) : %s", \
a_xml_id, a_call_id, an_int>0?'>':an_int<0?'<':'=', \
b_xml_id, b_call_id, why); \
return an_int; \
} while(0)
gint
sort_op_by_callid(gconstpointer a, gconstpointer b)
{
int a_call_id = -1;
int b_call_id = -1;
char *a_uuid = NULL;
char *b_uuid = NULL;
const xmlNode *xml_a = a;
const xmlNode *xml_b = b;
const char *a_xml_id = crm_element_value(xml_a, XML_ATTR_ID);
const char *b_xml_id = crm_element_value(xml_b, XML_ATTR_ID);
if (safe_str_eq(a_xml_id, b_xml_id)) {
/* We have duplicate lrm_rsc_op entries in the status
* section which is unliklely to be a good thing
* - we can handle it easily enough, but we need to get
* to the bottom of why it's happening.
*/
pe_err("Duplicate lrm_rsc_op entries named %s", a_xml_id);
sort_return(0, "duplicate");
}
crm_element_value_int(xml_a, XML_LRM_ATTR_CALLID, &a_call_id);
crm_element_value_int(xml_b, XML_LRM_ATTR_CALLID, &b_call_id);
if (a_call_id == -1 && b_call_id == -1) {
/* both are pending ops so it doesn't matter since
* stops are never pending
*/
sort_return(0, "pending");
} else if (a_call_id >= 0 && a_call_id < b_call_id) {
sort_return(-1, "call id");
} else if (b_call_id >= 0 && a_call_id > b_call_id) {
sort_return(1, "call id");
} else if (b_call_id >= 0 && a_call_id == b_call_id) {
/*
* The op and last_failed_op are the same
* Order on last-rc-change
*/
int last_a = -1;
int last_b = -1;
crm_element_value_int(xml_a, XML_RSC_OP_LAST_CHANGE, &last_a);
crm_element_value_int(xml_b, XML_RSC_OP_LAST_CHANGE, &last_b);
crm_trace("rc-change: %d vs %d", last_a, last_b);
if (last_a >= 0 && last_a < last_b) {
sort_return(-1, "rc-change");
} else if (last_b >= 0 && last_a > last_b) {
sort_return(1, "rc-change");
}
sort_return(0, "rc-change");
} else {
/* One of the inputs is a pending operation
* Attempt to use XML_ATTR_TRANSITION_MAGIC to determine its age relative to the other
*/
int a_id = -1;
int b_id = -1;
- int dummy = -1;
const char *a_magic = crm_element_value(xml_a, XML_ATTR_TRANSITION_MAGIC);
const char *b_magic = crm_element_value(xml_b, XML_ATTR_TRANSITION_MAGIC);
CRM_CHECK(a_magic != NULL && b_magic != NULL, sort_return(0, "No magic"));
- if(!decode_transition_magic(a_magic, &a_uuid, &a_id, &dummy, &dummy, &dummy, &dummy)) {
+ if (!decode_transition_magic(a_magic, &a_uuid, &a_id, NULL, NULL, NULL,
+ NULL)) {
sort_return(0, "bad magic a");
}
- if(!decode_transition_magic(b_magic, &b_uuid, &b_id, &dummy, &dummy, &dummy, &dummy)) {
+ if (!decode_transition_magic(b_magic, &b_uuid, &b_id, NULL, NULL, NULL,
+ NULL)) {
sort_return(0, "bad magic b");
}
/* try to determine the relative age of the operation...
* some pending operations (e.g. a start) may have been superseded
* by a subsequent stop
*
* [a|b]_id == -1 means it's a shutdown operation and _always_ comes last
*/
if (safe_str_neq(a_uuid, b_uuid) || a_id == b_id) {
/*
* some of the logic in here may be redundant...
*
* if the UUID from the TE doesn't match then one better
* be a pending operation.
* pending operations don't survive between elections and joins
* because we query the LRM directly
*/
if (b_call_id == -1) {
sort_return(-1, "transition + call");
} else if (a_call_id == -1) {
sort_return(1, "transition + call");
}
} else if ((a_id >= 0 && a_id < b_id) || b_id == -1) {
sort_return(-1, "transition");
} else if ((b_id >= 0 && a_id > b_id) || a_id == -1) {
sort_return(1, "transition");
}
}
/* we should never end up here */
CRM_CHECK(FALSE, sort_return(0, "default"));
}
time_t
get_effective_time(pe_working_set_t * data_set)
{
if(data_set) {
if (data_set->now == NULL) {
crm_trace("Recording a new 'now'");
data_set->now = crm_time_new(NULL);
}
return crm_time_get_seconds_since_epoch(data_set->now);
}
crm_trace("Defaulting to 'now'");
return time(NULL);
}
gboolean
get_target_role(resource_t * rsc, enum rsc_role_e * role)
{
enum rsc_role_e local_role = RSC_ROLE_UNKNOWN;
const char *value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET_ROLE);
CRM_CHECK(role != NULL, return FALSE);
if (value == NULL || safe_str_eq("started", value)
|| safe_str_eq("default", value)) {
return FALSE;
}
local_role = text2role(value);
if (local_role == RSC_ROLE_UNKNOWN) {
crm_config_err("%s: Unknown value for %s: %s", rsc->id, XML_RSC_ATTR_TARGET_ROLE, value);
return FALSE;
} else if (local_role > RSC_ROLE_STARTED) {
if (is_set(uber_parent(rsc)->flags, pe_rsc_promotable)) {
if (local_role > RSC_ROLE_SLAVE) {
/* This is what we'd do anyway, just leave the default to avoid messing up the placement algorithm */
return FALSE;
}
} else {
crm_config_err("%s is not part of a promotable clone resource, a %s of '%s' makes no sense",
rsc->id, XML_RSC_ATTR_TARGET_ROLE, value);
return FALSE;
}
}
*role = local_role;
return TRUE;
}
gboolean
order_actions(action_t * lh_action, action_t * rh_action, enum pe_ordering order)
{
GListPtr gIter = NULL;
action_wrapper_t *wrapper = NULL;
GListPtr list = NULL;
if (order == pe_order_none) {
return FALSE;
}
if (lh_action == NULL || rh_action == NULL) {
return FALSE;
}
crm_trace("Ordering Action %s before %s", lh_action->uuid, rh_action->uuid);
/* Ensure we never create a dependency on ourselves... it's happened */
CRM_ASSERT(lh_action != rh_action);
/* Filter dups, otherwise update_action_states() has too much work to do */
gIter = lh_action->actions_after;
for (; gIter != NULL; gIter = gIter->next) {
action_wrapper_t *after = (action_wrapper_t *) gIter->data;
if (after->action == rh_action && (after->type & order)) {
return FALSE;
}
}
wrapper = calloc(1, sizeof(action_wrapper_t));
wrapper->action = rh_action;
wrapper->type = order;
list = lh_action->actions_after;
list = g_list_prepend(list, wrapper);
lh_action->actions_after = list;
wrapper = NULL;
/* order |= pe_order_implies_then; */
/* order ^= pe_order_implies_then; */
wrapper = calloc(1, sizeof(action_wrapper_t));
wrapper->action = lh_action;
wrapper->type = order;
list = rh_action->actions_before;
list = g_list_prepend(list, wrapper);
rh_action->actions_before = list;
return TRUE;
}
action_t *
get_pseudo_op(const char *name, pe_working_set_t * data_set)
{
action_t *op = NULL;
if(data_set->singletons) {
op = g_hash_table_lookup(data_set->singletons, name);
}
if (op == NULL) {
op = custom_action(NULL, strdup(name), name, NULL, TRUE, TRUE, data_set);
set_bit(op->flags, pe_action_pseudo);
set_bit(op->flags, pe_action_runnable);
}
return op;
}
void
destroy_ticket(gpointer data)
{
ticket_t *ticket = data;
if (ticket->state) {
g_hash_table_destroy(ticket->state);
}
free(ticket->id);
free(ticket);
}
ticket_t *
ticket_new(const char *ticket_id, pe_working_set_t * data_set)
{
ticket_t *ticket = NULL;
if (ticket_id == NULL || strlen(ticket_id) == 0) {
return NULL;
}
if (data_set->tickets == NULL) {
data_set->tickets =
g_hash_table_new_full(crm_str_hash, g_str_equal, free,
destroy_ticket);
}
ticket = g_hash_table_lookup(data_set->tickets, ticket_id);
if (ticket == NULL) {
ticket = calloc(1, sizeof(ticket_t));
if (ticket == NULL) {
crm_err("Cannot allocate ticket '%s'", ticket_id);
return NULL;
}
crm_trace("Creaing ticket entry for %s", ticket_id);
ticket->id = strdup(ticket_id);
ticket->granted = FALSE;
ticket->last_granted = -1;
ticket->standby = FALSE;
ticket->state = crm_str_table_new();
g_hash_table_insert(data_set->tickets, strdup(ticket->id), ticket);
}
return ticket;
}
static void
filter_parameters(xmlNode * param_set, const char *param_string, bool need_present)
{
if (param_set && param_string) {
xmlAttrPtr xIter = param_set->properties;
while (xIter) {
const char *prop_name = (const char *)xIter->name;
char *name = crm_strdup_printf(" %s ", prop_name);
char *match = strstr(param_string, name);
free(name);
// Do now, because current entry might get removed below
xIter = xIter->next;
if (need_present && match == NULL) {
crm_trace("%s not found in %s", prop_name, param_string);
xml_remove_prop(param_set, prop_name);
} else if (need_present == FALSE && match) {
crm_trace("%s found in %s", prop_name, param_string);
xml_remove_prop(param_set, prop_name);
}
}
}
}
#if ENABLE_VERSIONED_ATTRS
static void
append_versioned_params(xmlNode *versioned_params, const char *ra_version, xmlNode *params)
{
GHashTable *hash = pe_unpack_versioned_parameters(versioned_params, ra_version);
char *key = NULL;
char *value = NULL;
GHashTableIter iter;
g_hash_table_iter_init(&iter, hash);
while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &value)) {
crm_xml_add(params, key, value);
}
g_hash_table_destroy(hash);
}
#endif
static op_digest_cache_t *
rsc_action_digest(resource_t * rsc, const char *task, const char *key,
node_t * node, xmlNode * xml_op, pe_working_set_t * data_set)
{
op_digest_cache_t *data = NULL;
data = g_hash_table_lookup(node->details->digest_cache, key);
if (data == NULL) {
GHashTable *local_rsc_params = crm_str_table_new();
action_t *action = custom_action(rsc, strdup(key), task, node, TRUE, FALSE, data_set);
#if ENABLE_VERSIONED_ATTRS
xmlNode *local_versioned_params = create_xml_node(NULL, XML_TAG_RSC_VER_ATTRS);
const char *ra_version = NULL;
#endif
const char *op_version;
const char *restart_list = NULL;
const char *secure_list = " passwd password ";
data = calloc(1, sizeof(op_digest_cache_t));
CRM_ASSERT(data != NULL);
get_rsc_attributes(local_rsc_params, rsc, node, data_set);
#if ENABLE_VERSIONED_ATTRS
pe_get_versioned_attributes(local_versioned_params, rsc, node, data_set);
#endif
data->params_all = create_xml_node(NULL, XML_TAG_PARAMS);
// REMOTE_CONTAINER_HACK: Allow remote nodes that start containers with pacemaker remote inside
if (pe__add_bundle_remote_name(rsc, data->params_all,
XML_RSC_ATTR_REMOTE_RA_ADDR)) {
crm_trace("Set address for bundle connection %s (on %s)",
rsc->id, node->details->uname);
}
g_hash_table_foreach(local_rsc_params, hash2field, data->params_all);
g_hash_table_foreach(action->extra, hash2field, data->params_all);
g_hash_table_foreach(rsc->parameters, hash2field, data->params_all);
g_hash_table_foreach(action->meta, hash2metafield, data->params_all);
if(xml_op) {
secure_list = crm_element_value(xml_op, XML_LRM_ATTR_OP_SECURE);
restart_list = crm_element_value(xml_op, XML_LRM_ATTR_OP_RESTART);
op_version = crm_element_value(xml_op, XML_ATTR_CRM_VERSION);
#if ENABLE_VERSIONED_ATTRS
ra_version = crm_element_value(xml_op, XML_ATTR_RA_VERSION);
#endif
} else {
op_version = CRM_FEATURE_SET;
}
#if ENABLE_VERSIONED_ATTRS
append_versioned_params(local_versioned_params, ra_version, data->params_all);
append_versioned_params(rsc->versioned_parameters, ra_version, data->params_all);
{
pe_rsc_action_details_t *details = pe_rsc_action_details(action);
append_versioned_params(details->versioned_parameters, ra_version, data->params_all);
}
#endif
filter_action_parameters(data->params_all, op_version);
g_hash_table_destroy(local_rsc_params);
pe_free_action(action);
data->digest_all_calc = calculate_operation_digest(data->params_all, op_version);
if (is_set(data_set->flags, pe_flag_sanitized)) {
data->params_secure = copy_xml(data->params_all);
if(secure_list) {
filter_parameters(data->params_secure, secure_list, FALSE);
}
data->digest_secure_calc = calculate_operation_digest(data->params_secure, op_version);
}
if(xml_op && crm_element_value(xml_op, XML_LRM_ATTR_RESTART_DIGEST) != NULL) {
data->params_restart = copy_xml(data->params_all);
if (restart_list) {
filter_parameters(data->params_restart, restart_list, TRUE);
}
data->digest_restart_calc = calculate_operation_digest(data->params_restart, op_version);
}
g_hash_table_insert(node->details->digest_cache, strdup(key), data);
}
return data;
}
op_digest_cache_t *
rsc_action_digest_cmp(resource_t * rsc, xmlNode * xml_op, node_t * node,
pe_working_set_t * data_set)
{
op_digest_cache_t *data = NULL;
char *key = NULL;
guint interval_ms = 0;
const char *op_version;
const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
const char *interval_ms_s = crm_element_value(xml_op,
XML_LRM_ATTR_INTERVAL_MS);
const char *digest_all;
const char *digest_restart;
CRM_ASSERT(node != NULL);
op_version = crm_element_value(xml_op, XML_ATTR_CRM_VERSION);
digest_all = crm_element_value(xml_op, XML_LRM_ATTR_OP_DIGEST);
digest_restart = crm_element_value(xml_op, XML_LRM_ATTR_RESTART_DIGEST);
interval_ms = crm_parse_ms(interval_ms_s);
key = generate_op_key(rsc->id, task, interval_ms);
data = rsc_action_digest(rsc, task, key, node, xml_op, data_set);
data->rc = RSC_DIGEST_MATCH;
if (digest_restart && data->digest_restart_calc && strcmp(data->digest_restart_calc, digest_restart) != 0) {
pe_rsc_info(rsc, "Parameters to %s on %s changed: was %s vs. now %s (restart:%s) %s",
key, node->details->uname,
crm_str(digest_restart), data->digest_restart_calc,
op_version, crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC));
data->rc = RSC_DIGEST_RESTART;
} else if (digest_all == NULL) {
/* it is unknown what the previous op digest was */
data->rc = RSC_DIGEST_UNKNOWN;
} else if (strcmp(digest_all, data->digest_all_calc) != 0) {
pe_rsc_info(rsc, "Parameters to %s on %s changed: was %s vs. now %s (%s:%s) %s",
key, node->details->uname,
crm_str(digest_all), data->digest_all_calc,
(interval_ms > 0)? "reschedule" : "reload",
op_version, crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC));
data->rc = RSC_DIGEST_ALL;
}
free(key);
return data;
}
#define STONITH_DIGEST_TASK "stonith-on"
static op_digest_cache_t *
fencing_action_digest_cmp(resource_t * rsc, node_t * node, pe_working_set_t * data_set)
{
char *key = generate_op_key(rsc->id, STONITH_DIGEST_TASK, 0);
op_digest_cache_t *data = rsc_action_digest(rsc, STONITH_DIGEST_TASK, key, node, NULL, data_set);
const char *digest_all = pe_node_attribute_raw(node, CRM_ATTR_DIGESTS_ALL);
const char *digest_secure = pe_node_attribute_raw(node, CRM_ATTR_DIGESTS_SECURE);
/* No 'reloads' for fencing device changes
*
* We use the resource id + agent + digest so that we can detect
* changes to the agent and/or the parameters used
*/
char *search_all = crm_strdup_printf("%s:%s:%s", rsc->id, (const char*)g_hash_table_lookup(rsc->meta, XML_ATTR_TYPE), data->digest_all_calc);
char *search_secure = crm_strdup_printf("%s:%s:%s", rsc->id, (const char*)g_hash_table_lookup(rsc->meta, XML_ATTR_TYPE), data->digest_secure_calc);
data->rc = RSC_DIGEST_ALL;
if (digest_all == NULL) {
/* it is unknown what the previous op digest was */
data->rc = RSC_DIGEST_UNKNOWN;
} else if (strstr(digest_all, search_all)) {
data->rc = RSC_DIGEST_MATCH;
} else if(digest_secure && data->digest_secure_calc) {
if(strstr(digest_secure, search_secure)) {
if (is_set(data_set->flags, pe_flag_stdout)) {
printf("Only 'private' parameters to %s for unfencing %s changed\n",
rsc->id, node->details->uname);
}
data->rc = RSC_DIGEST_MATCH;
}
}
if (is_set(data_set->flags, pe_flag_sanitized)
&& is_set(data_set->flags, pe_flag_stdout)
&& (data->rc == RSC_DIGEST_ALL)
&& data->digest_secure_calc) {
printf("Parameters to %s for unfencing %s changed, try '%s:%s:%s'\n",
rsc->id, node->details->uname, rsc->id,
(const char *) g_hash_table_lookup(rsc->meta, XML_ATTR_TYPE),
data->digest_secure_calc);
}
free(key);
free(search_all);
free(search_secure);
return data;
}
const char *rsc_printable_id(resource_t *rsc)
{
if (is_not_set(rsc->flags, pe_rsc_unique)) {
return ID(rsc->xml);
}
return rsc->id;
}
void
clear_bit_recursive(resource_t * rsc, unsigned long long flag)
{
GListPtr gIter = rsc->children;
clear_bit(rsc->flags, flag);
for (; gIter != NULL; gIter = gIter->next) {
resource_t *child_rsc = (resource_t *) gIter->data;
clear_bit_recursive(child_rsc, flag);
}
}
void
set_bit_recursive(resource_t * rsc, unsigned long long flag)
{
GListPtr gIter = rsc->children;
set_bit(rsc->flags, flag);
for (; gIter != NULL; gIter = gIter->next) {
resource_t *child_rsc = (resource_t *) gIter->data;
set_bit_recursive(child_rsc, flag);
}
}
static GListPtr
find_unfencing_devices(GListPtr candidates, GListPtr matches)
{
for (GListPtr gIter = candidates; gIter != NULL; gIter = gIter->next) {
resource_t *candidate = gIter->data;
const char *provides = g_hash_table_lookup(candidate->meta, XML_RSC_ATTR_PROVIDES);
const char *requires = g_hash_table_lookup(candidate->meta, XML_RSC_ATTR_REQUIRES);
if(candidate->children) {
matches = find_unfencing_devices(candidate->children, matches);
} else if (is_not_set(candidate->flags, pe_rsc_fence_device)) {
continue;
} else if (crm_str_eq(provides, "unfencing", FALSE) || crm_str_eq(requires, "unfencing", FALSE)) {
matches = g_list_prepend(matches, candidate);
}
}
return matches;
}
action_t *
pe_fence_op(node_t * node, const char *op, bool optional, const char *reason, pe_working_set_t * data_set)
{
char *op_key = NULL;
action_t *stonith_op = NULL;
if(op == NULL) {
op = data_set->stonith_action;
}
op_key = crm_strdup_printf("%s-%s-%s", CRM_OP_FENCE, node->details->uname, op);
if(data_set->singletons) {
stonith_op = g_hash_table_lookup(data_set->singletons, op_key);
}
if(stonith_op == NULL) {
stonith_op = custom_action(NULL, op_key, CRM_OP_FENCE, node, TRUE, TRUE, data_set);
add_hash_param(stonith_op->meta, XML_LRM_ATTR_TARGET, node->details->uname);
add_hash_param(stonith_op->meta, XML_LRM_ATTR_TARGET_UUID, node->details->id);
add_hash_param(stonith_op->meta, "stonith_action", op);
if (pe__is_guest_or_remote_node(node)
&& is_set(data_set->flags, pe_flag_enable_unfencing)) {
/* Extra work to detect device changes on remotes
*
* We may do this for all nodes in the future, but for now
* the check_action_definition() based stuff works fine.
*
* Use "stonith-on" to avoid creating cache entries for
* operations check_action_definition() would look for.
*/
long max = 1024;
long digests_all_offset = 0;
long digests_secure_offset = 0;
char *digests_all = malloc(max);
char *digests_secure = malloc(max);
GListPtr matches = find_unfencing_devices(data_set->resources, NULL);
for (GListPtr gIter = matches; gIter != NULL; gIter = gIter->next) {
resource_t *match = gIter->data;
op_digest_cache_t *data = fencing_action_digest_cmp(match, node, data_set);
if(data->rc == RSC_DIGEST_ALL) {
optional = FALSE;
crm_notice("Unfencing %s (remote): because the definition of %s changed", node->details->uname, match->id);
if (is_set(data_set->flags, pe_flag_stdout)) {
fprintf(stdout, " notice: Unfencing %s (remote): because the definition of %s changed\n", node->details->uname, match->id);
}
}
digests_all_offset += snprintf(
digests_all+digests_all_offset, max-digests_all_offset,
"%s:%s:%s,", match->id, (const char*)g_hash_table_lookup(match->meta, XML_ATTR_TYPE), data->digest_all_calc);
digests_secure_offset += snprintf(
digests_secure+digests_secure_offset, max-digests_secure_offset,
"%s:%s:%s,", match->id, (const char*)g_hash_table_lookup(match->meta, XML_ATTR_TYPE), data->digest_secure_calc);
}
g_hash_table_insert(stonith_op->meta,
strdup(XML_OP_ATTR_DIGESTS_ALL),
digests_all);
g_hash_table_insert(stonith_op->meta,
strdup(XML_OP_ATTR_DIGESTS_SECURE),
digests_secure);
}
} else {
free(op_key);
}
if(optional == FALSE && pe_can_fence(data_set, node)) {
pe_action_required(stonith_op, NULL, reason);
} else if(reason && stonith_op->reason == NULL) {
stonith_op->reason = strdup(reason);
}
return stonith_op;
}
void
trigger_unfencing(
resource_t * rsc, node_t *node, const char *reason, action_t *dependency, pe_working_set_t * data_set)
{
if(is_not_set(data_set->flags, pe_flag_enable_unfencing)) {
/* No resources require it */
return;
} else if (rsc != NULL && is_not_set(rsc->flags, pe_rsc_fence_device)) {
/* Wasn't a stonith device */
return;
} else if(node
&& node->details->online
&& node->details->unclean == FALSE
&& node->details->shutdown == FALSE) {
action_t *unfence = pe_fence_op(node, "on", FALSE, reason, data_set);
if(dependency) {
order_actions(unfence, dependency, pe_order_optional);
}
} else if(rsc) {
GHashTableIter iter;
g_hash_table_iter_init(&iter, rsc->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
if(node->details->online && node->details->unclean == FALSE && node->details->shutdown == FALSE) {
trigger_unfencing(rsc, node, reason, dependency, data_set);
}
}
}
}
gboolean
add_tag_ref(GHashTable * tags, const char * tag_name, const char * obj_ref)
{
tag_t *tag = NULL;
GListPtr gIter = NULL;
gboolean is_existing = FALSE;
CRM_CHECK(tags && tag_name && obj_ref, return FALSE);
tag = g_hash_table_lookup(tags, tag_name);
if (tag == NULL) {
tag = calloc(1, sizeof(tag_t));
if (tag == NULL) {
return FALSE;
}
tag->id = strdup(tag_name);
tag->refs = NULL;
g_hash_table_insert(tags, strdup(tag_name), tag);
}
for (gIter = tag->refs; gIter != NULL; gIter = gIter->next) {
const char *existing_ref = (const char *) gIter->data;
if (crm_str_eq(existing_ref, obj_ref, TRUE)){
is_existing = TRUE;
break;
}
}
if (is_existing == FALSE) {
tag->refs = g_list_append(tag->refs, strdup(obj_ref));
crm_trace("Added: tag=%s ref=%s", tag->id, obj_ref);
}
return TRUE;
}
void pe_action_set_flag_reason(const char *function, long line,
pe_action_t *action, pe_action_t *reason, const char *text,
enum pe_action_flags flags, bool overwrite)
{
bool unset = FALSE;
bool update = FALSE;
const char *change = NULL;
if(is_set(flags, pe_action_runnable)) {
unset = TRUE;
change = "unrunnable";
} else if(is_set(flags, pe_action_optional)) {
unset = TRUE;
change = "required";
} else if(is_set(flags, pe_action_migrate_runnable)) {
unset = TRUE;
overwrite = TRUE;
change = "unrunnable";
} else if(is_set(flags, pe_action_dangle)) {
change = "dangling";
} else if(is_set(flags, pe_action_requires_any)) {
change = "required";
} else {
crm_err("Unknown flag change to %x by %s: 0x%s",
flags, action->uuid, (reason? reason->uuid : "0"));
}
if(unset) {
if(is_set(action->flags, flags)) {
action->flags = crm_clear_bit(function, line, action->uuid, action->flags, flags);
update = TRUE;
}
} else {
if(is_not_set(action->flags, flags)) {
action->flags = crm_set_bit(function, line, action->uuid, action->flags, flags);
update = TRUE;
}
}
if((change && update) || text) {
char *reason_text = NULL;
if(reason == NULL) {
pe_action_set_reason(action, text, overwrite);
} else if(reason->rsc == NULL) {
reason_text = crm_strdup_printf("%s %s%c %s", change, reason->task, text?':':0, text?text:"");
} else {
reason_text = crm_strdup_printf("%s %s %s%c %s", change, reason->rsc->id, reason->task, text?':':0, text?text:"NA");
}
if(reason_text && action->rsc != reason->rsc) {
pe_action_set_reason(action, reason_text, overwrite);
}
free(reason_text);
}
}
void pe_action_set_reason(pe_action_t *action, const char *reason, bool overwrite)
{
if(action->reason && overwrite) {
pe_rsc_trace(action->rsc, "Changing %s reason from '%s' to '%s'", action->uuid, action->reason, reason);
free(action->reason);
action->reason = NULL;
}
if(action->reason == NULL) {
if(reason) {
pe_rsc_trace(action->rsc, "Set %s reason to '%s'", action->uuid, reason);
action->reason = strdup(reason);
} else {
action->reason = NULL;
}
}
}
diff --git a/tools/crm_mon.c b/tools/crm_mon.c
index 51a2cf8820..926cf080e7 100644
--- a/tools/crm_mon.c
+++ b/tools/crm_mon.c
@@ -1,4428 +1,4424 @@
/*
* Copyright 2004-2019 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include /* crm_ends_with_ext */
#include
#include
#include
#include
#include
#include
#include
#include <../lib/pengine/unpack.h>
#include
#include
static void clean_up_connections(void);
static crm_exit_t clean_up(crm_exit_t exit_code);
static void crm_diff_update(const char *event, xmlNode * msg);
static gboolean mon_refresh_display(gpointer user_data);
static int cib_connect(gboolean full);
static void mon_st_callback_event(stonith_t * st, stonith_event_t * e);
static void mon_st_callback_display(stonith_t * st, stonith_event_t * e);
static void kick_refresh(gboolean data_updated);
static char *get_node_display_name(node_t *node);
/*
* Definitions indicating which items to print
*/
#define mon_show_times (0x0001U)
#define mon_show_stack (0x0002U)
#define mon_show_dc (0x0004U)
#define mon_show_count (0x0008U)
#define mon_show_nodes (0x0010U)
#define mon_show_resources (0x0020U)
#define mon_show_attributes (0x0040U)
#define mon_show_failcounts (0x0080U)
#define mon_show_operations (0x0100U)
#define mon_show_tickets (0x0200U)
#define mon_show_bans (0x0400U)
#define mon_show_fence_history (0x0800U)
#define mon_show_headers (mon_show_times | mon_show_stack | mon_show_dc \
| mon_show_count)
#define mon_show_default (mon_show_headers | mon_show_nodes \
| mon_show_resources)
#define mon_show_all (mon_show_default | mon_show_attributes \
| mon_show_failcounts | mon_show_operations \
| mon_show_tickets | mon_show_bans \
| mon_show_fence_history)
static unsigned int show = mon_show_default;
/*
* Definitions indicating how to output
*/
enum mon_output_format_e {
mon_output_none,
mon_output_monitor,
mon_output_plain,
mon_output_console,
mon_output_xml,
mon_output_html,
mon_output_cgi
} output_format = mon_output_console;
static char *output_filename = NULL; /* if sending output to a file, its name */
/* other globals */
static char *pid_file = NULL;
static gboolean group_by_node = FALSE;
static gboolean inactive_resources = FALSE;
static int reconnect_msec = 5000;
static gboolean daemonize = FALSE;
static GMainLoop *mainloop = NULL;
static guint timer_id = 0;
static mainloop_timer_t *refresh_timer = NULL;
static pe_working_set_t *mon_data_set = NULL;
static GList *attr_list = NULL;
static const char *external_agent = NULL;
static const char *external_recipient = NULL;
static cib_t *cib = NULL;
static stonith_t *st = NULL;
static xmlNode *current_cib = NULL;
static gboolean one_shot = FALSE;
static gboolean has_warnings = FALSE;
static gboolean print_timing = FALSE;
static gboolean watch_fencing = FALSE;
static gboolean fence_history = FALSE;
static gboolean fence_full_history = FALSE;
static gboolean fence_connect = FALSE;
static int fence_history_level = 1;
static gboolean print_brief = FALSE;
static gboolean print_pending = TRUE;
static gboolean print_clone_detail = FALSE;
#if CURSES_ENABLED
static gboolean curses_console_initialized = FALSE;
#endif
/* FIXME allow, detect, and correctly interpret glob pattern or regex? */
const char *print_neg_location_prefix = "";
/* Never display node attributes whose name starts with one of these prefixes */
#define FILTER_STR { CRM_FAIL_COUNT_PREFIX, CRM_LAST_FAILURE_PREFIX, \
"shutdown", "terminate", "standby", "probe_complete", \
"#", NULL }
long last_refresh = 0;
crm_trigger_t *refresh_trigger = NULL;
/* Define exit codes for monitoring-compatible output
* For nagios plugins, the possibilities are
* OK=0, WARN=1, CRIT=2, and UNKNOWN=3
*/
#define MON_STATUS_WARN CRM_EX_ERROR
#define MON_STATUS_CRIT CRM_EX_INVALID_PARAM
#define MON_STATUS_UNKNOWN CRM_EX_UNIMPLEMENT_FEATURE
/* Convenience macro for prettifying output (e.g. "node" vs "nodes") */
#define s_if_plural(i) (((i) == 1)? "" : "s")
#if CURSES_ENABLED
# define print_dot() if (output_format == mon_output_console) { \
printw("."); \
clrtoeol(); \
refresh(); \
} else { \
fprintf(stdout, "."); \
}
#else
# define print_dot() fprintf(stdout, ".");
#endif
#if CURSES_ENABLED
# define print_as(fmt, args...) if (output_format == mon_output_console) { \
printw(fmt, ##args); \
clrtoeol(); \
refresh(); \
} else { \
fprintf(stdout, fmt, ##args); \
}
#else
# define print_as(fmt, args...) fprintf(stdout, fmt, ##args);
#endif
static void
blank_screen(void)
{
#if CURSES_ENABLED
int lpc = 0;
for (lpc = 0; lpc < LINES; lpc++) {
move(lpc, 0);
clrtoeol();
}
move(0, 0);
refresh();
#endif
}
static gboolean
mon_timer_popped(gpointer data)
{
int rc = pcmk_ok;
#if CURSES_ENABLED
if (output_format == mon_output_console) {
clear();
refresh();
}
#endif
if (timer_id > 0) {
g_source_remove(timer_id);
timer_id = 0;
}
print_as("Reconnecting...\n");
rc = cib_connect(TRUE);
if (rc != pcmk_ok) {
timer_id = g_timeout_add(reconnect_msec, mon_timer_popped, NULL);
}
return FALSE;
}
static void
mon_cib_connection_destroy(gpointer user_data)
{
print_as("Connection to the cluster-daemons terminated\n");
if (refresh_timer != NULL) {
/* we'll trigger a refresh after reconnect */
mainloop_timer_stop(refresh_timer);
}
if (timer_id) {
/* we'll trigger a new reconnect-timeout at the end */
g_source_remove(timer_id);
timer_id = 0;
}
if (st) {
/* the client API won't properly reconnect notifications
* if they are still in the table - so remove them
*/
st->cmds->remove_notification(st, T_STONITH_NOTIFY_DISCONNECT);
st->cmds->remove_notification(st, T_STONITH_NOTIFY_FENCE);
st->cmds->remove_notification(st, T_STONITH_NOTIFY_HISTORY);
if (st->state != stonith_disconnected) {
st->cmds->disconnect(st);
}
}
if (cib) {
cib->cmds->signoff(cib);
timer_id = g_timeout_add(reconnect_msec, mon_timer_popped, NULL);
}
return;
}
/*
* Mainloop signal handler.
*/
static void
mon_shutdown(int nsig)
{
clean_up(CRM_EX_OK);
}
#if ON_DARWIN
# define sighandler_t sig_t
#endif
#if CURSES_ENABLED
# ifndef HAVE_SIGHANDLER_T
typedef void (*sighandler_t) (int);
# endif
static sighandler_t ncurses_winch_handler;
static void
mon_winresize(int nsig)
{
static int not_done;
int lines = 0, cols = 0;
if (!not_done++) {
if (ncurses_winch_handler)
/* the original ncurses WINCH signal handler does the
* magic of retrieving the new window size;
* otherwise, we'd have to use ioctl or tgetent */
(*ncurses_winch_handler) (SIGWINCH);
getmaxyx(stdscr, lines, cols);
resizeterm(lines, cols);
mainloop_set_trigger(refresh_trigger);
}
not_done--;
}
#endif
static int
cib_connect(gboolean full)
{
int rc = pcmk_ok;
static gboolean need_pass = TRUE;
CRM_CHECK(cib != NULL, return -EINVAL);
if (getenv("CIB_passwd") != NULL) {
need_pass = FALSE;
}
if ((fence_connect) && (st == NULL)) {
st = stonith_api_new();
}
if ((fence_connect) && (st->state == stonith_disconnected)) {
crm_trace("Connecting to stonith");
rc = st->cmds->connect(st, crm_system_name, NULL);
if (rc == pcmk_ok) {
crm_trace("Setting up stonith callbacks");
if (watch_fencing) {
st->cmds->register_notification(st, T_STONITH_NOTIFY_DISCONNECT,
mon_st_callback_event);
st->cmds->register_notification(st, T_STONITH_NOTIFY_FENCE, mon_st_callback_event);
} else {
st->cmds->register_notification(st, T_STONITH_NOTIFY_DISCONNECT,
mon_st_callback_display);
st->cmds->register_notification(st, T_STONITH_NOTIFY_HISTORY, mon_st_callback_display);
}
}
}
if (cib->state != cib_connected_query && cib->state != cib_connected_command) {
crm_trace("Connecting to the CIB");
if ((output_format == mon_output_console) && need_pass && (cib->variant == cib_remote)) {
need_pass = FALSE;
print_as("Password:");
}
rc = cib->cmds->signon(cib, crm_system_name, cib_query);
if (rc != pcmk_ok) {
return rc;
}
rc = cib->cmds->query(cib, NULL, ¤t_cib, cib_scope_local | cib_sync_call);
if (rc == pcmk_ok) {
mon_refresh_display(NULL);
}
if (rc == pcmk_ok && full) {
if (rc == pcmk_ok) {
rc = cib->cmds->set_connection_dnotify(cib, mon_cib_connection_destroy);
if (rc == -EPROTONOSUPPORT) {
print_as
("Notification setup not supported, won't be able to reconnect after failure");
if (output_format == mon_output_console) {
sleep(2);
}
rc = pcmk_ok;
}
}
if (rc == pcmk_ok) {
cib->cmds->del_notify_callback(cib, T_CIB_DIFF_NOTIFY, crm_diff_update);
rc = cib->cmds->add_notify_callback(cib, T_CIB_DIFF_NOTIFY, crm_diff_update);
}
if (rc != pcmk_ok) {
print_as("Notification setup failed, could not monitor CIB actions");
if (output_format == mon_output_console) {
sleep(2);
}
clean_up_connections();
}
}
}
return rc;
}
/* *INDENT-OFF* */
static struct crm_option long_options[] = {
/* Top-level Options */
{"help", 0, 0, '?', "\tThis text"},
{"version", 0, 0, '$', "\tVersion information" },
{"verbose", 0, 0, 'V', "\tIncrease debug output"},
{"quiet", 0, 0, 'Q', "\tDisplay only essential output" },
{"-spacer-", 1, 0, '-', "\nModes (mutually exclusive):"},
{"as-html", 1, 0, 'h', "\tWrite cluster status to the named html file"},
{"as-xml", 0, 0, 'X', "\t\tWrite cluster status as xml to stdout. This will enable one-shot mode."},
{"web-cgi", 0, 0, 'w', "\t\tWeb mode with output suitable for CGI (preselected when run as *.cgi)"},
{"simple-status", 0, 0, 's', "\tDisplay the cluster status once as a simple one line output (suitable for nagios)"},
{"-spacer-", 1, 0, '-', "\nDisplay Options:"},
{"group-by-node", 0, 0, 'n', "\tGroup resources by node" },
{"inactive", 0, 0, 'r', "\t\tDisplay inactive resources" },
{"failcounts", 0, 0, 'f', "\tDisplay resource fail counts"},
{"operations", 0, 0, 'o', "\tDisplay resource operation history" },
{"timing-details", 0, 0, 't', "\tDisplay resource operation history with timing details" },
{"tickets", 0, 0, 'c', "\t\tDisplay cluster tickets"},
{"watch-fencing", 0, 0, 'W', "\tListen for fencing events. For use with --external-agent"},
{"fence-history", 2, 0, 'm', "Show fence history\n"
"\t\t\t\t\t0=off, 1=failures and pending (default without option),\n"
"\t\t\t\t\t2=add successes (default without value for option),\n"
"\t\t\t\t\t3=show full history without reduction to most recent of each flavor"},
{"neg-locations", 2, 0, 'L', "Display negative location constraints [optionally filtered by id prefix]"},
{"show-node-attributes", 0, 0, 'A', "Display node attributes" },
{"hide-headers", 0, 0, 'D', "\tHide all headers" },
{"show-detail", 0, 0, 'R', "\tShow more details (node IDs, individual clone instances)" },
{"brief", 0, 0, 'b', "\t\tBrief output" },
{"pending", 0, 0, 'j', "\t\tDisplay pending state if 'record-pending' is enabled", pcmk_option_hidden},
{"-spacer-", 1, 0, '-', "\nAdditional Options:"},
{"interval", 1, 0, 'i', "\tUpdate frequency in seconds" },
{"one-shot", 0, 0, '1', "\t\tDisplay the cluster status once on the console and exit"},
{"disable-ncurses",0, 0, 'N', "\tDisable the use of ncurses", !CURSES_ENABLED},
{"daemonize", 0, 0, 'd', "\tRun in the background as a daemon"},
{"pid-file", 1, 0, 'p', "\t(Advanced) Daemon pid file location"},
{"external-agent", 1, 0, 'E', "A program to run when resource operations take place."},
{"external-recipient",1, 0, 'e', "A recipient for your program (assuming you want the program to send something to someone)."},
{"xml-file", 1, 0, 'x', NULL, pcmk_option_hidden},
{"-spacer-", 1, 0, '-', "\nExamples:", pcmk_option_paragraph},
{"-spacer-", 1, 0, '-', "Display the cluster status on the console with updates as they occur:", pcmk_option_paragraph},
{"-spacer-", 1, 0, '-', " crm_mon", pcmk_option_example},
{"-spacer-", 1, 0, '-', "Display the cluster status on the console just once then exit:", pcmk_option_paragraph},
{"-spacer-", 1, 0, '-', " crm_mon -1", pcmk_option_example},
{"-spacer-", 1, 0, '-', "Display your cluster status, group resources by node, and include inactive resources in the list:", pcmk_option_paragraph},
{"-spacer-", 1, 0, '-', " crm_mon --group-by-node --inactive", pcmk_option_example},
{"-spacer-", 1, 0, '-', "Start crm_mon as a background daemon and have it write the cluster status to an HTML file:", pcmk_option_paragraph},
{"-spacer-", 1, 0, '-', " crm_mon --daemonize --as-html /path/to/docroot/filename.html", pcmk_option_example},
{"-spacer-", 1, 0, '-', "Start crm_mon and export the current cluster status as xml to stdout, then exit.:", pcmk_option_paragraph},
{"-spacer-", 1, 0, '-', " crm_mon --as-xml", pcmk_option_example},
{NULL, 0, 0, 0}
};
/* *INDENT-ON* */
#if CURSES_ENABLED
static const char *
get_option_desc(char c)
{
int lpc;
for (lpc = 0; long_options[lpc].name != NULL; lpc++) {
if (long_options[lpc].name[0] == '-')
continue;
if (long_options[lpc].val == c) {
static char *buf = NULL;
const char *rv;
char *nl;
/* chop off tabs and cut at newline */
free(buf); /* free string from last usage */
buf = strdup(long_options[lpc].desc);
rv = buf; /* make a copy to keep buf pointer unaltered
for freeing when we come by next time.
Like this the result stays valid until
the next call.
*/
while(isspace(rv[0])) {
rv++;
}
nl = strchr(rv, '\n');
if (nl) {
*nl = '\0';
}
return rv;
}
}
return NULL;
}
#define print_option_help(option, condition) \
print_as("%c %c: \t%s\n", ((condition)? '*': ' '), option, get_option_desc(option));
static gboolean
detect_user_input(GIOChannel *channel, GIOCondition condition, gpointer unused)
{
int c;
gboolean config_mode = FALSE;
while (1) {
/* Get user input */
c = getchar();
switch (c) {
case 'm':
if (!fence_history_level) {
fence_history = TRUE;
fence_connect = TRUE;
if (st == NULL) {
mon_cib_connection_destroy(NULL);
}
}
show ^= mon_show_fence_history;
break;
case 'c':
show ^= mon_show_tickets;
break;
case 'f':
show ^= mon_show_failcounts;
break;
case 'n':
group_by_node = ! group_by_node;
break;
case 'o':
show ^= mon_show_operations;
if ((show & mon_show_operations) == 0) {
print_timing = 0;
}
break;
case 'r':
inactive_resources = ! inactive_resources;
break;
case 'R':
print_clone_detail = ! print_clone_detail;
break;
case 't':
print_timing = ! print_timing;
if (print_timing) {
show |= mon_show_operations;
}
break;
case 'A':
show ^= mon_show_attributes;
break;
case 'L':
show ^= mon_show_bans;
break;
case 'D':
/* If any header is shown, clear them all, otherwise set them all */
if (show & mon_show_headers) {
show &= ~mon_show_headers;
} else {
show |= mon_show_headers;
}
break;
case 'b':
print_brief = ! print_brief;
break;
case 'j':
print_pending = ! print_pending;
break;
case '?':
config_mode = TRUE;
break;
default:
goto refresh;
}
if (!config_mode)
goto refresh;
blank_screen();
print_as("Display option change mode\n");
print_as("\n");
print_option_help('c', show & mon_show_tickets);
print_option_help('f', show & mon_show_failcounts);
print_option_help('n', group_by_node);
print_option_help('o', show & mon_show_operations);
print_option_help('r', inactive_resources);
print_option_help('t', print_timing);
print_option_help('A', show & mon_show_attributes);
print_option_help('L', show & mon_show_bans);
print_option_help('D', (show & mon_show_headers) == 0);
print_option_help('R', print_clone_detail);
print_option_help('b', print_brief);
print_option_help('j', print_pending);
print_option_help('m', (show & mon_show_fence_history));
print_as("\n");
print_as("Toggle fields via field letter, type any other key to return");
}
refresh:
mon_refresh_display(NULL);
return TRUE;
}
#endif
int
main(int argc, char **argv)
{
int flag;
int argerr = 0;
int option_index = 0;
int rc = pcmk_ok;
pid_file = strdup("/tmp/ClusterMon.pid");
crm_log_cli_init("crm_mon");
crm_set_options(NULL, "mode [options]", long_options,
"Provides a summary of cluster's current state."
"\n\nOutputs varying levels of detail in a number of different formats.\n");
#if !defined (ON_DARWIN) && !defined (ON_BSD)
/* prevent zombies */
signal(SIGCLD, SIG_IGN);
#endif
if (crm_ends_with_ext(argv[0], ".cgi") == TRUE) {
output_format = mon_output_cgi;
one_shot = TRUE;
}
/* to enable stonith-connection when called via some application like pcs
* set environment-variable FENCE_HISTORY to desired level
* so you don't have to modify this application
*/
/* fence_history_level = crm_atoi(getenv("FENCE_HISTORY"), "0"); */
while (1) {
flag = crm_get_option(argc, argv, &option_index);
if (flag == -1)
break;
switch (flag) {
case 'V':
crm_bump_log_level(argc, argv);
break;
case 'Q':
show &= ~mon_show_times;
break;
case 'i':
reconnect_msec = crm_get_msec(optarg);
break;
case 'n':
group_by_node = TRUE;
break;
case 'r':
inactive_resources = TRUE;
break;
case 'W':
watch_fencing = TRUE;
fence_connect = TRUE;
break;
case 'm':
fence_history_level = crm_atoi(optarg, "2");
break;
case 'd':
daemonize = TRUE;
break;
case 't':
print_timing = TRUE;
show |= mon_show_operations;
break;
case 'o':
show |= mon_show_operations;
break;
case 'f':
show |= mon_show_failcounts;
break;
case 'A':
show |= mon_show_attributes;
break;
case 'L':
show |= mon_show_bans;
print_neg_location_prefix = optarg? optarg : "";
break;
case 'D':
show &= ~mon_show_headers;
break;
case 'b':
print_brief = TRUE;
break;
case 'j':
print_pending = TRUE;
break;
case 'R':
print_clone_detail = TRUE;
break;
case 'c':
show |= mon_show_tickets;
break;
case 'p':
free(pid_file);
if(optarg == NULL) {
crm_help(flag, CRM_EX_USAGE);
}
pid_file = strdup(optarg);
break;
case 'x':
if(optarg == NULL) {
crm_help(flag, CRM_EX_USAGE);
}
setenv("CIB_file", optarg, 1);
one_shot = TRUE;
break;
case 'h':
if(optarg == NULL) {
crm_help(flag, CRM_EX_USAGE);
}
argerr += (output_format != mon_output_console);
output_format = mon_output_html;
output_filename = strdup(optarg);
umask(S_IWGRP | S_IWOTH);
break;
case 'X':
argerr += (output_format != mon_output_console);
output_format = mon_output_xml;
one_shot = TRUE;
break;
case 'w':
/* do not allow argv[0] and argv[1...] redundancy */
argerr += (output_format != mon_output_console);
output_format = mon_output_cgi;
one_shot = TRUE;
break;
case 's':
argerr += (output_format != mon_output_console);
output_format = mon_output_monitor;
one_shot = TRUE;
break;
case 'E':
external_agent = optarg;
break;
case 'e':
external_recipient = optarg;
break;
case '1':
one_shot = TRUE;
break;
case 'N':
if (output_format == mon_output_console) {
output_format = mon_output_plain;
}
break;
case '$':
case '?':
crm_help(flag, CRM_EX_OK);
break;
default:
printf("Argument code 0%o (%c) is not (?yet?) supported\n", flag, flag);
++argerr;
break;
}
}
if (watch_fencing) {
/* don't moan as fence_history_level == 1 is default */
fence_history_level = 0;
}
/* create the cib-object early to be able to do further
* decisions based on the cib-source
*/
cib = cib_new();
if (cib == NULL) {
rc = -EINVAL;
} else {
switch (cib->variant) {
case cib_native:
/* cib & fencing - everything available */
break;
case cib_file:
/* Don't try to connect to fencing as we
* either don't have a running cluster or
* the fencing-information would possibly
* not match the cib data from a file.
* As we don't expect cib-updates coming
* in enforce one-shot. */
fence_history_level = 0;
one_shot = TRUE;
break;
case cib_remote:
/* updates coming in but no fencing */
fence_history_level = 0;
break;
case cib_undefined:
case cib_database:
default:
/* something is odd */
rc = -EINVAL;
crm_err("Invalid cib-source");
break;
}
}
switch (fence_history_level) {
case 3:
fence_full_history = TRUE;
/* fall through to next lower level */
case 2:
show |= mon_show_fence_history;
/* fall through to next lower level */
case 1:
fence_history = TRUE;
fence_connect = TRUE;
break;
default:
break;
}
/* Extra sanity checks when in CGI mode */
if (output_format == mon_output_cgi) {
argerr += (optind < argc);
argerr += (output_filename != NULL);
argerr += ((cib) && (cib->variant == cib_file));
argerr += (external_agent != NULL);
argerr += (daemonize == TRUE); /* paranoia */
} else if (optind < argc) {
printf("non-option ARGV-elements: ");
while (optind < argc)
printf("%s ", argv[optind++]);
printf("\n");
}
if (argerr) {
return clean_up(CRM_EX_USAGE);
}
/* XML output always prints everything */
if (output_format == mon_output_xml) {
show = mon_show_all;
print_timing = TRUE;
}
if (one_shot) {
if (output_format == mon_output_console) {
output_format = mon_output_plain;
}
} else if (daemonize) {
if ((output_format == mon_output_console) || (output_format == mon_output_plain)) {
output_format = mon_output_none;
}
crm_enable_stderr(FALSE);
if ((output_format != mon_output_html)
&& !external_agent) {
printf ("Looks like you forgot to specify one or more of: "
"--as-html, --external-agent\n");
return clean_up(CRM_EX_USAGE);
}
if (cib) {
/* to be on the safe side don't have cib-object around
* when we are forking
*/
cib_delete(cib);
cib = NULL;
crm_make_daemon(crm_system_name, TRUE, pid_file);
cib = cib_new();
if (cib == NULL) {
rc = -EINVAL;
}
/* otherwise assume we've got the same cib-object we've just destroyed
* in our parent
*/
}
} else if (output_format == mon_output_console) {
#if CURSES_ENABLED
initscr();
cbreak();
noecho();
crm_enable_stderr(FALSE);
curses_console_initialized = TRUE;
#else
one_shot = TRUE;
output_format = mon_output_plain;
printf("Defaulting to one-shot mode\n");
printf("You need to have curses available at compile time to enable console mode\n");
#endif
}
crm_info("Starting %s", crm_system_name);
if (cib) {
do {
if (!one_shot) {
print_as("Waiting until cluster is available on this node ...\n");
}
rc = cib_connect(!one_shot);
if (one_shot) {
break;
} else if (rc != pcmk_ok) {
sleep(reconnect_msec / 1000);
#if CURSES_ENABLED
if (output_format == mon_output_console) {
clear();
refresh();
}
#endif
} else {
if (output_format == mon_output_html) {
print_as("Writing html to %s ...\n", output_filename);
}
}
} while (rc == -ENOTCONN);
}
if (rc != pcmk_ok) {
if (output_format == mon_output_monitor) {
printf("CLUSTER CRIT: Connection to cluster failed: %s\n",
pcmk_strerror(rc));
return clean_up(MON_STATUS_CRIT);
} else {
if (rc == -ENOTCONN) {
print_as("\nError: cluster is not available on this node\n");
} else {
print_as("\nConnection to cluster failed: %s\n",
pcmk_strerror(rc));
}
}
if (output_format == mon_output_console) {
sleep(2);
}
return clean_up(crm_errno2exit(rc));
}
if (one_shot) {
return clean_up(CRM_EX_OK);
}
mainloop = g_main_loop_new(NULL, FALSE);
mainloop_add_signal(SIGTERM, mon_shutdown);
mainloop_add_signal(SIGINT, mon_shutdown);
#if CURSES_ENABLED
if (output_format == mon_output_console) {
ncurses_winch_handler = signal(SIGWINCH, mon_winresize);
if (ncurses_winch_handler == SIG_DFL ||
ncurses_winch_handler == SIG_IGN || ncurses_winch_handler == SIG_ERR)
ncurses_winch_handler = NULL;
g_io_add_watch(g_io_channel_unix_new(STDIN_FILENO), G_IO_IN, detect_user_input, NULL);
}
#endif
refresh_trigger = mainloop_add_trigger(G_PRIORITY_LOW, mon_refresh_display, NULL);
g_main_loop_run(mainloop);
g_main_loop_unref(mainloop);
crm_info("Exiting %s", crm_system_name);
return clean_up(CRM_EX_OK);
}
#define mon_warn(fmt...) do { \
if (!has_warnings) { \
print_as("CLUSTER WARN:"); \
} else { \
print_as(","); \
} \
print_as(fmt); \
has_warnings = TRUE; \
} while(0)
static int
count_resources(pe_working_set_t * data_set, resource_t * rsc)
{
int count = 0;
GListPtr gIter = NULL;
if (rsc == NULL) {
gIter = data_set->resources;
} else if (rsc->children) {
gIter = rsc->children;
} else {
return is_not_set(rsc->flags, pe_rsc_orphan);
}
for (; gIter != NULL; gIter = gIter->next) {
count += count_resources(data_set, gIter->data);
}
return count;
}
/*!
* \internal
* \brief Print one-line status suitable for use with monitoring software
*
* \param[in] data_set Working set of CIB state
* \param[in] history List of stonith actions
*
* \note This function's output (and the return code when the program exits)
* should conform to https://www.monitoring-plugins.org/doc/guidelines.html
*/
static void
print_simple_status(pe_working_set_t * data_set,
stonith_history_t *history)
{
GListPtr gIter = NULL;
int nodes_online = 0;
int nodes_standby = 0;
int nodes_maintenance = 0;
if (data_set->dc_node == NULL) {
mon_warn(" No DC");
}
for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
node_t *node = (node_t *) gIter->data;
if (node->details->standby && node->details->online) {
nodes_standby++;
} else if (node->details->maintenance && node->details->online) {
nodes_maintenance++;
} else if (node->details->online) {
nodes_online++;
} else {
mon_warn(" offline node: %s", node->details->uname);
}
}
if (!has_warnings) {
int nresources = count_resources(data_set, NULL);
print_as("CLUSTER OK: %d node%s online", nodes_online, s_if_plural(nodes_online));
if (nodes_standby > 0) {
print_as(", %d standby node%s", nodes_standby, s_if_plural(nodes_standby));
}
if (nodes_maintenance > 0) {
print_as(", %d maintenance node%s", nodes_maintenance, s_if_plural(nodes_maintenance));
}
print_as(", %d resource%s configured", nresources, s_if_plural(nresources));
}
print_as("\n");
}
/*!
* \internal
* \brief Print a [name]=[value][units] pair, optionally using time string
*
* \param[in] stream File stream to display output to
* \param[in] name Name to display
* \param[in] value Value to display (or NULL to convert time instead)
* \param[in] units Units to display (or NULL for no units)
* \param[in] epoch_time Epoch time to convert if value is NULL
*/
static void
print_nvpair(FILE *stream, const char *name, const char *value,
const char *units, time_t epoch_time)
{
/* print name= */
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as(" %s=", name);
break;
case mon_output_html:
case mon_output_cgi:
case mon_output_xml:
fprintf(stream, " %s=", name);
break;
default:
break;
}
/* If we have a value (and optionally units), print it */
if (value) {
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as("%s%s", value, (units? units : ""));
break;
case mon_output_html:
case mon_output_cgi:
fprintf(stream, "%s%s", value, (units? units : ""));
break;
case mon_output_xml:
fprintf(stream, "\"%s%s\"", value, (units? units : ""));
break;
default:
break;
}
/* Otherwise print user-friendly time string */
} else {
static char empty_str[] = "";
char *c, *date_str = asctime(localtime(&epoch_time));
for (c = (date_str != NULL) ? date_str : empty_str; *c != '\0'; ++c) {
if (*c == '\n') {
*c = '\0';
break;
}
}
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as("'%s'", date_str);
break;
case mon_output_html:
case mon_output_cgi:
case mon_output_xml:
fprintf(stream, "\"%s\"", date_str);
break;
default:
break;
}
}
}
/*!
* \internal
* \brief Print whatever is needed to start a node section
*
* \param[in] stream File stream to display output to
* \param[in] node Node to print
*/
static void
print_node_start(FILE *stream, node_t *node)
{
char *node_name;
switch (output_format) {
case mon_output_plain:
case mon_output_console:
node_name = get_node_display_name(node);
print_as("* Node %s:\n", node_name);
free(node_name);
break;
case mon_output_html:
case mon_output_cgi:
node_name = get_node_display_name(node);
fprintf(stream, " Node: %s
\n \n", node_name);
free(node_name);
break;
case mon_output_xml:
fprintf(stream, " \n", node->details->uname);
break;
default:
break;
}
}
/*!
* \internal
* \brief Print whatever is needed to end a node section
*
* \param[in] stream File stream to display output to
*/
static void
print_node_end(FILE *stream)
{
switch (output_format) {
case mon_output_html:
case mon_output_cgi:
fprintf(stream, "
\n");
break;
case mon_output_xml:
fprintf(stream, " \n");
break;
default:
break;
}
}
/*!
* \internal
* \brief Print resources section heading appropriate to options
*
* \param[in] stream File stream to display output to
*/
static void
print_resources_heading(FILE *stream)
{
const char *heading;
if (group_by_node) {
/* Active resources have already been printed by node */
heading = (inactive_resources? "Inactive resources" : NULL);
} else if (inactive_resources) {
heading = "Full list of resources";
} else {
heading = "Active resources";
}
/* Print section heading */
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as("\n%s:\n\n", heading);
break;
case mon_output_html:
case mon_output_cgi:
fprintf(stream, "
\n %s
\n", heading);
break;
case mon_output_xml:
fprintf(stream, " \n");
break;
default:
break;
}
}
/*!
* \internal
* \brief Print whatever resource section closing is appropriate
*
* \param[in] stream File stream to display output to
*/
static void
print_resources_closing(FILE *stream, gboolean printed_heading)
{
const char *heading;
/* What type of resources we did or did not display */
if (group_by_node) {
heading = "inactive ";
} else if (inactive_resources) {
heading = "";
} else {
heading = "active ";
}
switch (output_format) {
case mon_output_plain:
case mon_output_console:
if (!printed_heading) {
print_as("\nNo %sresources\n\n", heading);
}
break;
case mon_output_html:
case mon_output_cgi:
if (!printed_heading) {
fprintf(stream, "
\n No %sresources
\n", heading);
}
break;
case mon_output_xml:
fprintf(stream, " %s\n",
(printed_heading? "" : ""));
break;
default:
break;
}
}
/*!
* \internal
* \brief Print whatever resource section(s) are appropriate
*
* \param[in] stream File stream to display output to
* \param[in] data_set Cluster state to display
* \param[in] print_opts Bitmask of pe_print_options
*/
static void
print_resources(FILE *stream, pe_working_set_t *data_set, int print_opts)
{
GListPtr rsc_iter;
const char *prefix = NULL;
gboolean printed_heading = FALSE;
gboolean brief_output = print_brief;
/* If we already showed active resources by node, and
* we're not showing inactive resources, we have nothing to do
*/
if (group_by_node && !inactive_resources) {
return;
}
/* XML uses an indent, and ignores brief option for resources */
if (output_format == mon_output_xml) {
prefix = " ";
brief_output = FALSE;
}
/* If we haven't already printed resources grouped by node,
* and brief output was requested, print resource summary */
if (brief_output && !group_by_node) {
print_resources_heading(stream);
printed_heading = TRUE;
print_rscs_brief(data_set->resources, NULL, print_opts, stream,
inactive_resources);
}
/* For each resource, display it if appropriate */
for (rsc_iter = data_set->resources; rsc_iter != NULL; rsc_iter = rsc_iter->next) {
resource_t *rsc = (resource_t *) rsc_iter->data;
/* Complex resources may have some sub-resources active and some inactive */
gboolean is_active = rsc->fns->active(rsc, TRUE);
gboolean partially_active = rsc->fns->active(rsc, FALSE);
/* Skip inactive orphans (deleted but still in CIB) */
if (is_set(rsc->flags, pe_rsc_orphan) && !is_active) {
continue;
/* Skip active resources if we already displayed them by node */
} else if (group_by_node) {
if (is_active) {
continue;
}
/* Skip primitives already counted in a brief summary */
} else if (brief_output && (rsc->variant == pe_native)) {
continue;
/* Skip resources that aren't at least partially active,
* unless we're displaying inactive resources
*/
} else if (!partially_active && !inactive_resources) {
continue;
}
/* Print this resource */
if (printed_heading == FALSE) {
print_resources_heading(stream);
printed_heading = TRUE;
}
rsc->fns->print(rsc, prefix, print_opts, stream);
}
print_resources_closing(stream, printed_heading);
}
/*!
* \internal
* \brief Print heading for resource history
*
* \param[in] stream File stream to display output to
* \param[in] data_set Current state of CIB
* \param[in] node Node that ran this resource
* \param[in] rsc Resource to print
* \param[in] rsc_id ID of resource to print
* \param[in] all Whether to print every resource or just failed ones
*/
static void
print_rsc_history_start(FILE *stream, pe_working_set_t *data_set, node_t *node,
resource_t *rsc, const char *rsc_id, gboolean all)
{
time_t last_failure = 0;
int failcount = rsc?
pe_get_failcount(node, rsc, &last_failure, pe_fc_default,
NULL, data_set)
: 0;
if (!all && !failcount && (last_failure <= 0)) {
return;
}
/* Print resource ID */
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as(" %s:", rsc_id);
break;
case mon_output_html:
case mon_output_cgi:
fprintf(stream, " %s:", rsc_id);
break;
case mon_output_xml:
fprintf(stream, " 0)) {
/* Print migration threshold */
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as(" migration-threshold=%d", rsc->migration_threshold);
break;
case mon_output_html:
case mon_output_cgi:
fprintf(stream, " migration-threshold=%d", rsc->migration_threshold);
break;
case mon_output_xml:
fprintf(stream, " orphan=\"false\" migration-threshold=\"%d\"",
rsc->migration_threshold);
break;
default:
break;
}
/* Print fail count if any */
if (failcount > 0) {
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as(" " CRM_FAIL_COUNT_PREFIX "=%d", failcount);
break;
case mon_output_html:
case mon_output_cgi:
fprintf(stream, " " CRM_FAIL_COUNT_PREFIX "=%d", failcount);
break;
case mon_output_xml:
fprintf(stream, " " CRM_FAIL_COUNT_PREFIX "=\"%d\"",
failcount);
break;
default:
break;
}
}
/* Print last failure time if any */
if (last_failure > 0) {
print_nvpair(stream, CRM_LAST_FAILURE_PREFIX, NULL, NULL,
last_failure);
}
}
/* End the heading */
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as("\n");
break;
case mon_output_html:
case mon_output_cgi:
fprintf(stream, "\n \n");
break;
case mon_output_xml:
fprintf(stream, ">\n");
break;
default:
break;
}
}
/*!
* \internal
* \brief Print closing for resource history
*
* \param[in] stream File stream to display output to
*/
static void
print_rsc_history_end(FILE *stream)
{
switch (output_format) {
case mon_output_html:
case mon_output_cgi:
fprintf(stream, "
\n \n");
break;
case mon_output_xml:
fprintf(stream, " \n");
break;
default:
break;
}
}
/*!
* \internal
* \brief Print operation history
*
* \param[in] stream File stream to display output to
* \param[in] data_set Current state of CIB
* \param[in] node Node this operation is for
* \param[in] xml_op Root of XML tree describing this operation
* \param[in] task Task parsed from this operation's XML
* \param[in] interval_ms_s Interval parsed from this operation's XML
* \param[in] rc Return code parsed from this operation's XML
*/
static void
print_op_history(FILE *stream, pe_working_set_t *data_set, node_t *node,
xmlNode *xml_op, const char *task, const char *interval_ms_s,
int rc)
{
const char *value = NULL;
const char *call = crm_element_value(xml_op, XML_LRM_ATTR_CALLID);
/* Begin the operation description */
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as(" + (%s) %s:", call, task);
break;
case mon_output_html:
case mon_output_cgi:
fprintf(stream, " (%s) %s:", call, task);
break;
case mon_output_xml:
fprintf(stream, " 0) {
print_nvpair(stream, attr, NULL, NULL, int_value);
}
}
attr = XML_RSC_OP_LAST_RUN;
value = crm_element_value(xml_op, attr);
if (value) {
int_value = crm_parse_int(value, NULL);
if (int_value > 0) {
print_nvpair(stream, attr, NULL, NULL, int_value);
}
}
attr = XML_RSC_OP_T_EXEC;
value = crm_element_value(xml_op, attr);
if (value) {
print_nvpair(stream, attr, value, "ms", 0);
}
attr = XML_RSC_OP_T_QUEUE;
value = crm_element_value(xml_op, attr);
if (value) {
print_nvpair(stream, attr, value, "ms", 0);
}
}
/* End the operation description */
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as(" rc=%d (%s)\n", rc, services_ocf_exitcode_str(rc));
break;
case mon_output_html:
case mon_output_cgi:
fprintf(stream, " rc=%d (%s)\n", rc, services_ocf_exitcode_str(rc));
break;
case mon_output_xml:
fprintf(stream, " rc=\"%d\" rc_text=\"%s\" />\n", rc, services_ocf_exitcode_str(rc));
break;
default:
break;
}
}
/*!
* \internal
* \brief Print resource operation/failure history
*
* \param[in] stream File stream to display output to
* \param[in] data_set Current state of CIB
* \param[in] node Node that ran this resource
* \param[in] rsc_entry Root of XML tree describing resource status
* \param[in] operations Whether to print operations or just failcounts
*/
static void
print_rsc_history(FILE *stream, pe_working_set_t *data_set, node_t *node,
xmlNode *rsc_entry, gboolean operations)
{
GListPtr gIter = NULL;
GListPtr op_list = NULL;
gboolean printed = FALSE;
const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID);
resource_t *rsc = pe_find_resource(data_set->resources, rsc_id);
xmlNode *rsc_op = NULL;
/* If we're not showing operations, just print the resource failure summary */
if (operations == FALSE) {
print_rsc_history_start(stream, data_set, node, rsc, rsc_id, FALSE);
print_rsc_history_end(stream);
return;
}
/* Create a list of this resource's operations */
for (rsc_op = __xml_first_child(rsc_entry); rsc_op != NULL; rsc_op = __xml_next(rsc_op)) {
if (crm_str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, TRUE)) {
op_list = g_list_append(op_list, rsc_op);
}
}
op_list = g_list_sort(op_list, sort_op_by_callid);
/* Print each operation */
for (gIter = op_list; gIter != NULL; gIter = gIter->next) {
xmlNode *xml_op = (xmlNode *) gIter->data;
const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
const char *interval_ms_s = crm_element_value(xml_op,
XML_LRM_ATTR_INTERVAL_MS);
const char *op_rc = crm_element_value(xml_op, XML_LRM_ATTR_RC);
int rc = crm_parse_int(op_rc, "0");
/* Display 0-interval monitors as "probe" */
if (safe_str_eq(task, CRMD_ACTION_STATUS)
&& ((interval_ms_s == NULL) || safe_str_eq(interval_ms_s, "0"))) {
task = "probe";
}
/* Ignore notifies and some probes */
if (safe_str_eq(task, CRMD_ACTION_NOTIFY) || (safe_str_eq(task, "probe") && (rc == 7))) {
continue;
}
/* If this is the first printed operation, print heading for resource */
if (printed == FALSE) {
printed = TRUE;
print_rsc_history_start(stream, data_set, node, rsc, rsc_id, TRUE);
}
/* Print the operation */
print_op_history(stream, data_set, node, xml_op, task, interval_ms_s,
rc);
}
/* Free the list we created (no need to free the individual items) */
g_list_free(op_list);
/* If we printed anything, close the resource */
if (printed) {
print_rsc_history_end(stream);
}
}
/*!
* \internal
* \brief Print node operation/failure history
*
* \param[in] stream File stream to display output to
* \param[in] data_set Current state of CIB
* \param[in] node_state Root of XML tree describing node status
* \param[in] operations Whether to print operations or just failcounts
*/
static void
print_node_history(FILE *stream, pe_working_set_t *data_set,
xmlNode *node_state, gboolean operations)
{
node_t *node = pe_find_node_id(data_set->nodes, ID(node_state));
xmlNode *lrm_rsc = NULL;
xmlNode *rsc_entry = NULL;
if (node && node->details && node->details->online) {
print_node_start(stream, node);
lrm_rsc = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE);
lrm_rsc = find_xml_node(lrm_rsc, XML_LRM_TAG_RESOURCES, FALSE);
/* Print history of each of the node's resources */
for (rsc_entry = __xml_first_child(lrm_rsc); rsc_entry != NULL;
rsc_entry = __xml_next(rsc_entry)) {
if (crm_str_eq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE, TRUE)) {
print_rsc_history(stream, data_set, node, rsc_entry, operations);
}
}
print_node_end(stream);
}
}
/*!
* \internal
* \brief Print extended information about an attribute if appropriate
*
* \param[in] data_set Working set of CIB state
*
* \return TRUE if extended information was printed, FALSE otherwise
* \note Currently, extended information is only supported for ping/pingd
* resources, for which a message will be printed if connectivity is lost
* or degraded.
*/
static gboolean
print_attr_msg(FILE *stream, node_t * node, GListPtr rsc_list, const char *attrname, const char *attrvalue)
{
GListPtr gIter = NULL;
for (gIter = rsc_list; gIter != NULL; gIter = gIter->next) {
resource_t *rsc = (resource_t *) gIter->data;
const char *type = g_hash_table_lookup(rsc->meta, "type");
if (rsc->children != NULL) {
if (print_attr_msg(stream, node, rsc->children, attrname, attrvalue)) {
return TRUE;
}
}
if (safe_str_eq(type, "ping") || safe_str_eq(type, "pingd")) {
const char *name = g_hash_table_lookup(rsc->parameters, "name");
if (name == NULL) {
name = "pingd";
}
/* To identify the resource with the attribute name. */
if (safe_str_eq(name, attrname)) {
int host_list_num = 0;
int expected_score = 0;
int value = crm_parse_int(attrvalue, "0");
const char *hosts = g_hash_table_lookup(rsc->parameters, "host_list");
const char *multiplier = g_hash_table_lookup(rsc->parameters, "multiplier");
if(hosts) {
char **host_list = g_strsplit(hosts, " ", 0);
host_list_num = g_strv_length(host_list);
g_strfreev(host_list);
}
/* pingd multiplier is the same as the default value. */
expected_score = host_list_num * crm_parse_int(multiplier, "1");
switch (output_format) {
case mon_output_plain:
case mon_output_console:
if (value <= 0) {
print_as("\t: Connectivity is lost");
} else if (value < expected_score) {
print_as("\t: Connectivity is degraded (Expected=%d)", expected_score);
}
break;
case mon_output_html:
case mon_output_cgi:
if (value <= 0) {
fprintf(stream, " (connectivity is lost)");
} else if (value < expected_score) {
fprintf(stream, " (connectivity is degraded -- expected %d)",
expected_score);
}
break;
case mon_output_xml:
fprintf(stream, " expected=\"%d\"", expected_score);
break;
default:
break;
}
return TRUE;
}
}
}
return FALSE;
}
static int
compare_attribute(gconstpointer a, gconstpointer b)
{
int rc;
rc = strcmp((const char *)a, (const char *)b);
return rc;
}
static void
create_attr_list(gpointer name, gpointer value, gpointer data)
{
int i;
const char *filt_str[] = FILTER_STR;
CRM_CHECK(name != NULL, return);
/* filtering automatic attributes */
for (i = 0; filt_str[i] != NULL; i++) {
if (g_str_has_prefix(name, filt_str[i])) {
return;
}
}
attr_list = g_list_insert_sorted(attr_list, name, compare_attribute);
}
/* structure for passing multiple user data to g_list_foreach() */
struct mon_attr_data {
FILE *stream;
node_t *node;
};
static void
print_node_attribute(gpointer name, gpointer user_data)
{
const char *value = NULL;
struct mon_attr_data *data = (struct mon_attr_data *) user_data;
value = pe_node_attribute_raw(data->node, name);
/* Print attribute name and value */
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as(" + %-32s\t: %-10s", (char *)name, value);
break;
case mon_output_html:
case mon_output_cgi:
fprintf(data->stream, " %s: %s",
(char *)name, value);
break;
case mon_output_xml:
fprintf(data->stream,
" stream, data->node, data->node->details->running_rsc,
name, value);
/* Close out the attribute */
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as("\n");
break;
case mon_output_html:
case mon_output_cgi:
fprintf(data->stream, "\n");
break;
case mon_output_xml:
fprintf(data->stream, " />\n");
break;
default:
break;
}
}
static void
print_node_summary(FILE *stream, pe_working_set_t * data_set, gboolean operations)
{
xmlNode *node_state = NULL;
xmlNode *cib_status = get_object_root(XML_CIB_TAG_STATUS, data_set->input);
/* Print heading */
switch (output_format) {
case mon_output_plain:
case mon_output_console:
if (operations) {
print_as("\nOperations:\n");
} else {
print_as("\nMigration Summary:\n");
}
break;
case mon_output_html:
case mon_output_cgi:
if (operations) {
fprintf(stream, "
\n Operations
\n");
} else {
fprintf(stream, "
\n Migration Summary
\n");
}
break;
case mon_output_xml:
fprintf(stream, " \n");
break;
default:
break;
}
/* Print each node in the CIB status */
for (node_state = __xml_first_child(cib_status); node_state != NULL;
node_state = __xml_next(node_state)) {
if (crm_str_eq((const char *)node_state->name, XML_CIB_TAG_STATE, TRUE)) {
print_node_history(stream, data_set, node_state, operations);
}
}
/* Close section */
switch (output_format) {
case mon_output_xml:
fprintf(stream, " \n");
break;
default:
break;
}
}
static void
print_ticket(gpointer name, gpointer value, gpointer data)
{
ticket_t *ticket = (ticket_t *) value;
FILE *stream = (FILE *) data;
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as("* %s:\t%s%s", ticket->id,
(ticket->granted? "granted" : "revoked"),
(ticket->standby? " [standby]" : ""));
break;
case mon_output_html:
case mon_output_cgi:
fprintf(stream, " %s: %s%s", ticket->id,
(ticket->granted? "granted" : "revoked"),
(ticket->standby? " [standby]" : ""));
break;
case mon_output_xml:
fprintf(stream, " id, (ticket->granted? "granted" : "revoked"),
(ticket->standby? "true" : "false"));
break;
default:
break;
}
if (ticket->last_granted > -1) {
print_nvpair(stdout, "last-granted", NULL, NULL, ticket->last_granted);
}
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as("\n");
break;
case mon_output_html:
case mon_output_cgi:
fprintf(stream, "\n");
break;
case mon_output_xml:
fprintf(stream, " />\n");
break;
default:
break;
}
}
static void
print_cluster_tickets(FILE *stream, pe_working_set_t * data_set)
{
/* Print section heading */
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as("\nTickets:\n");
break;
case mon_output_html:
case mon_output_cgi:
fprintf(stream, "
\n Tickets
\n \n");
break;
case mon_output_xml:
fprintf(stream, " \n");
break;
default:
break;
}
/* Print each ticket */
g_hash_table_foreach(data_set->tickets, print_ticket, stream);
/* Close section */
switch (output_format) {
case mon_output_html:
case mon_output_cgi:
fprintf(stream, "
\n");
break;
case mon_output_xml:
fprintf(stream, " \n");
break;
default:
break;
}
}
/*!
* \internal
* \brief Return human-friendly string representing node name
*
* The returned string will be in the format
* uname[@hostUname] [(nodeID)]
* "@hostUname" will be printed if the node is a guest node.
* "(nodeID)" will be printed if the node ID is different from the node uname,
* and detailed output has been requested.
*
* \param[in] node Node to represent
* \return Newly allocated string with representation of node name
* \note It is the caller's responsibility to free the result with free().
*/
static char *
get_node_display_name(node_t *node)
{
char *node_name;
const char *node_host = NULL;
const char *node_id = NULL;
int name_len;
CRM_ASSERT((node != NULL) && (node->details != NULL) && (node->details->uname != NULL));
/* Host is displayed only if this is a guest node */
if (pe__is_guest_node(node)) {
node_t *host_node = pe__current_node(node->details->remote_rsc);
if (host_node && host_node->details) {
node_host = host_node->details->uname;
}
if (node_host == NULL) {
node_host = ""; /* so we at least get "uname@" to indicate guest */
}
}
/* Node ID is displayed if different from uname and detail is requested */
if (print_clone_detail && safe_str_neq(node->details->uname, node->details->id)) {
node_id = node->details->id;
}
/* Determine name length */
name_len = strlen(node->details->uname) + 1;
if (node_host) {
name_len += strlen(node_host) + 1; /* "@node_host" */
}
if (node_id) {
name_len += strlen(node_id) + 3; /* + " (node_id)" */
}
/* Allocate and populate display name */
node_name = malloc(name_len);
CRM_ASSERT(node_name != NULL);
strcpy(node_name, node->details->uname);
if (node_host) {
strcat(node_name, "@");
strcat(node_name, node_host);
}
if (node_id) {
strcat(node_name, " (");
strcat(node_name, node_id);
strcat(node_name, ")");
}
return node_name;
}
/*!
* \internal
* \brief Print a negative location constraint
*
* \param[in] stream File stream to display output to
* \param[in] node Node affected by constraint
* \param[in] location Constraint to print
*/
static void
print_ban(FILE *stream, pe_node_t *node, pe__location_t *location)
{
char *node_name = NULL;
switch (output_format) {
case mon_output_plain:
case mon_output_console:
node_name = get_node_display_name(node);
print_as(" %s\tprevents %s from running %son %s\n",
location->id, location->rsc_lh->id,
((location->role_filter == RSC_ROLE_MASTER)? "as Master " : ""),
node_name);
break;
case mon_output_html:
case mon_output_cgi:
node_name = get_node_display_name(node);
fprintf(stream, " %s prevents %s from running %son %s\n",
location->id, location->rsc_lh->id,
((location->role_filter == RSC_ROLE_MASTER)? "as Master " : ""),
node_name);
break;
case mon_output_xml:
fprintf(stream,
" \n",
location->id, location->rsc_lh->id, node->details->uname, node->weight,
((location->role_filter == RSC_ROLE_MASTER)? "true" : "false"));
break;
default:
break;
}
free(node_name);
}
/*!
* \internal
* \brief Print section for negative location constraints
*
* \param[in] stream File stream to display output to
* \param[in] data_set Working set corresponding to CIB status to display
*/
static void
print_neg_locations(FILE *stream, pe_working_set_t *data_set)
{
GListPtr gIter, gIter2;
/* Print section heading */
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as("\nNegative Location Constraints:\n");
break;
case mon_output_html:
case mon_output_cgi:
fprintf(stream, "
\n Negative Location Constraints
\n \n");
break;
case mon_output_xml:
fprintf(stream, " \n");
break;
default:
break;
}
/* Print each ban */
for (gIter = data_set->placement_constraints; gIter != NULL; gIter = gIter->next) {
pe__location_t *location = gIter->data;
if (!g_str_has_prefix(location->id, print_neg_location_prefix))
continue;
for (gIter2 = location->node_list_rh; gIter2 != NULL; gIter2 = gIter2->next) {
node_t *node = (node_t *) gIter2->data;
if (node->weight < 0) {
print_ban(stream, node, location);
}
}
}
/* Close section */
switch (output_format) {
case mon_output_cgi:
case mon_output_html:
fprintf(stream, "
\n");
break;
case mon_output_xml:
fprintf(stream, " \n");
break;
default:
break;
}
}
static void
crm_mon_get_parameters(resource_t *rsc, pe_working_set_t * data_set)
{
get_rsc_attributes(rsc->parameters, rsc, NULL, data_set);
crm_trace("Beekhof: unpacked params for %s (%d)", rsc->id, g_hash_table_size(rsc->parameters));
if(rsc->children) {
GListPtr gIter = NULL;
for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
crm_mon_get_parameters(gIter->data, data_set);
}
}
}
/*!
* \internal
* \brief Print node attributes section
*
* \param[in] stream File stream to display output to
* \param[in] data_set Working set of CIB state
*/
static void
print_node_attributes(FILE *stream, pe_working_set_t *data_set)
{
GListPtr gIter = NULL;
/* Print section heading */
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as("\nNode Attributes:\n");
break;
case mon_output_html:
case mon_output_cgi:
fprintf(stream, "
\n Node Attributes
\n");
break;
case mon_output_xml:
fprintf(stream, " \n");
break;
default:
break;
}
/* Unpack all resource parameters (it would be more efficient to do this
* only when needed for the first time in print_attr_msg())
*/
for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
crm_mon_get_parameters(gIter->data, data_set);
}
/* Display each node's attributes */
for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
struct mon_attr_data data;
data.stream = stream;
data.node = (node_t *) gIter->data;
if (data.node && data.node->details && data.node->details->online) {
print_node_start(stream, data.node);
g_hash_table_foreach(data.node->details->attrs, create_attr_list, NULL);
g_list_foreach(attr_list, print_node_attribute, &data);
g_list_free(attr_list);
attr_list = NULL;
print_node_end(stream);
}
}
/* Print section footer */
switch (output_format) {
case mon_output_xml:
fprintf(stream, " \n");
break;
default:
break;
}
}
/*!
* \internal
* \brief Return resource display options corresponding to command-line choices
*
* \return Bitmask of pe_print_options suitable for resource print functions
*/
static int
get_resource_display_options(void)
{
int print_opts;
/* Determine basic output format */
switch (output_format) {
case mon_output_console:
print_opts = pe_print_ncurses;
break;
case mon_output_html:
case mon_output_cgi:
print_opts = pe_print_html;
break;
case mon_output_xml:
print_opts = pe_print_xml;
break;
default:
print_opts = pe_print_printf;
break;
}
/* Add optional display elements */
if (print_pending) {
print_opts |= pe_print_pending;
}
if (print_clone_detail) {
print_opts |= pe_print_clone_details|pe_print_implicit;
}
if (!inactive_resources) {
print_opts |= pe_print_clone_active;
}
if (print_brief) {
print_opts |= pe_print_brief;
}
return print_opts;
}
/*!
* \internal
* \brief Return human-friendly string representing current time
*
* \return Current time as string (as by ctime() but without newline) on success
* or "Could not determine current time" on error
* \note The return value points to a statically allocated string which might be
* overwritten by subsequent calls to any of the C library date and time functions.
*/
static const char *
crm_now_string(void)
{
time_t a_time = time(NULL);
char *since_epoch = ctime(&a_time);
if ((a_time == (time_t) -1) || (since_epoch == NULL)) {
return "Could not determine current time";
}
since_epoch[strlen(since_epoch) - 1] = EOS; /* trim newline */
return (since_epoch);
}
/*!
* \internal
* \brief Print header for cluster summary if needed
*
* \param[in] stream File stream to display output to
*/
static void
print_cluster_summary_header(FILE *stream)
{
switch (output_format) {
case mon_output_html:
case mon_output_cgi:
fprintf(stream, " Cluster Summary
\n \n");
break;
case mon_output_xml:
fprintf(stream, " \n");
break;
default:
break;
}
}
/*!
* \internal
* \brief Print footer for cluster summary if needed
*
* \param[in] stream File stream to display output to
*/
static void
print_cluster_summary_footer(FILE *stream)
{
switch (output_format) {
case mon_output_cgi:
case mon_output_html:
fprintf(stream, "
\n");
break;
case mon_output_xml:
fprintf(stream, " \n");
break;
default:
break;
}
}
/*!
* \internal
* \brief Print times the display was last updated and CIB last changed
*
* \param[in] stream File stream to display output to
* \param[in] data_set Working set of CIB state
*/
static void
print_cluster_times(FILE *stream, pe_working_set_t *data_set)
{
const char *last_written = crm_element_value(data_set->input, XML_CIB_ATTR_WRITTEN);
const char *user = crm_element_value(data_set->input, XML_ATTR_UPDATE_USER);
const char *client = crm_element_value(data_set->input, XML_ATTR_UPDATE_CLIENT);
const char *origin = crm_element_value(data_set->input, XML_ATTR_UPDATE_ORIG);
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as("Last updated: %s", crm_now_string());
print_as((user || client || origin)? "\n" : "\t\t");
print_as("Last change: %s", last_written ? last_written : "");
if (user) {
print_as(" by %s", user);
}
if (client) {
print_as(" via %s", client);
}
if (origin) {
print_as(" on %s", origin);
}
print_as("\n");
break;
case mon_output_html:
case mon_output_cgi:
fprintf(stream, " Last updated: %s
\n", crm_now_string());
fprintf(stream, " Last change: %s", last_written ? last_written : "");
if (user) {
fprintf(stream, " by %s", user);
}
if (client) {
fprintf(stream, " via %s", client);
}
if (origin) {
fprintf(stream, " on %s", origin);
}
fprintf(stream, "
\n");
break;
case mon_output_xml:
fprintf(stream, " \n", crm_now_string());
fprintf(stream, " \n",
last_written ? last_written : "", user ? user : "",
client ? client : "", origin ? origin : "");
break;
default:
break;
}
}
/*!
* \internal
* \brief Print cluster stack
*
* \param[in] stream File stream to display output to
* \param[in] stack_s Stack name
*/
static void
print_cluster_stack(FILE *stream, const char *stack_s)
{
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as("Stack: %s\n", stack_s);
break;
case mon_output_html:
case mon_output_cgi:
fprintf(stream, " Stack: %s
\n", stack_s);
break;
case mon_output_xml:
fprintf(stream, " \n", stack_s);
break;
default:
break;
}
}
/*!
* \internal
* \brief Print current DC and its version
*
* \param[in] stream File stream to display output to
* \param[in] data_set Working set of CIB state
*/
static void
print_cluster_dc(FILE *stream, pe_working_set_t *data_set)
{
node_t *dc = data_set->dc_node;
xmlNode *dc_version = get_xpath_object("//nvpair[@name='dc-version']",
data_set->input, LOG_DEBUG);
const char *dc_version_s = dc_version?
crm_element_value(dc_version, XML_NVPAIR_ATTR_VALUE)
: NULL;
const char *quorum = crm_element_value(data_set->input, XML_ATTR_HAVE_QUORUM);
char *dc_name = dc? get_node_display_name(dc) : NULL;
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as("Current DC: ");
if (dc) {
print_as("%s (version %s) - partition %s quorum\n",
dc_name, (dc_version_s? dc_version_s : "unknown"),
(crm_is_true(quorum) ? "with" : "WITHOUT"));
} else {
print_as("NONE\n");
}
break;
case mon_output_html:
case mon_output_cgi:
fprintf(stream, " Current DC: ");
if (dc) {
fprintf(stream, "%s (version %s) - partition %s quorum",
dc_name, (dc_version_s? dc_version_s : "unknown"),
(crm_is_true(quorum)? "with" : "WITHOUT"));
} else {
fprintf(stream, "NONE");
}
fprintf(stream, "
\n");
break;
case mon_output_xml:
fprintf(stream, " details->uname, dc->details->id,
(crm_is_true(quorum) ? "true" : "false"));
} else {
fprintf(stream, "present=\"false\"");
}
fprintf(stream, " />\n");
break;
default:
break;
}
free(dc_name);
}
/*!
* \internal
* \brief Print counts of configured nodes and resources
*
* \param[in] stream File stream to display output to
* \param[in] data_set Working set of CIB state
* \param[in] stack_s Stack name
*/
static void
print_cluster_counts(FILE *stream, pe_working_set_t *data_set, const char *stack_s)
{
int nnodes = g_list_length(data_set->nodes);
int nresources = count_resources(data_set, NULL);
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as("\n%d node%s configured\n", nnodes, s_if_plural(nnodes));
print_as("%d resource%s configured",
nresources, s_if_plural(nresources));
if(data_set->disabled_resources || data_set->blocked_resources) {
print_as(" (");
if (data_set->disabled_resources) {
print_as("%d DISABLED", data_set->disabled_resources);
}
if (data_set->disabled_resources && data_set->blocked_resources) {
print_as(", ");
}
if (data_set->blocked_resources) {
print_as("%d BLOCKED from starting due to failure",
data_set->blocked_resources);
}
print_as(")");
}
print_as("\n");
break;
case mon_output_html:
case mon_output_cgi:
fprintf(stream, " %d node%s configured
\n",
nnodes, s_if_plural(nnodes));
fprintf(stream, " %d resource%s configured",
nresources, s_if_plural(nresources));
if (data_set->disabled_resources || data_set->blocked_resources) {
fprintf(stream, " (");
if (data_set->disabled_resources) {
fprintf(stream, "%d DISABLED",
data_set->disabled_resources);
}
if (data_set->disabled_resources && data_set->blocked_resources) {
fprintf(stream, ", ");
}
if (data_set->blocked_resources) {
fprintf(stream,
"%d BLOCKED from starting due to failure",
data_set->blocked_resources);
}
fprintf(stream, ")");
}
fprintf(stream, "
\n");
break;
case mon_output_xml:
fprintf(stream,
" \n",
g_list_length(data_set->nodes));
fprintf(stream,
" \n",
count_resources(data_set, NULL),
data_set->disabled_resources, data_set->blocked_resources);
break;
default:
break;
}
}
/*!
* \internal
* \brief Print cluster-wide options
*
* \param[in] stream File stream to display output to
* \param[in] data_set Working set of CIB state
*
* \note Currently this is only implemented for HTML and XML output, and
* prints only a few options. If there is demand, more could be added.
*/
static void
print_cluster_options(FILE *stream, pe_working_set_t *data_set)
{
switch (output_format) {
case mon_output_plain:
case mon_output_console:
if (is_set(data_set->flags, pe_flag_maintenance_mode)) {
print_as("\n *** Resource management is DISABLED ***");
print_as("\n The cluster will not attempt to start, stop or recover services");
print_as("\n");
}
break;
case mon_output_html:
fprintf(stream, "
\n Config Options
\n");
fprintf(stream, " \n");
fprintf(stream, " STONITH of failed nodes | %s |
\n",
is_set(data_set->flags, pe_flag_stonith_enabled)? "enabled" : "disabled");
fprintf(stream, " Cluster is | %ssymmetric |
\n",
is_set(data_set->flags, pe_flag_symmetric_cluster)? "" : "a");
fprintf(stream, " No Quorum Policy | ");
switch (data_set->no_quorum_policy) {
case no_quorum_freeze:
fprintf(stream, "Freeze resources");
break;
case no_quorum_stop:
fprintf(stream, "Stop ALL resources");
break;
case no_quorum_ignore:
fprintf(stream, "Ignore");
break;
case no_quorum_suicide:
fprintf(stream, "Suicide");
break;
}
fprintf(stream, " |
\n");
fprintf(stream, " Resource management | ");
if (is_set(data_set->flags, pe_flag_maintenance_mode)) {
fprintf(stream, "DISABLED (the cluster will "
"not attempt to start, stop or recover services)");
} else {
fprintf(stream, "enabled");
}
fprintf(stream, " |
\n");
fprintf(stream, "
\n \n");
break;
case mon_output_xml:
fprintf(stream, " flags, pe_flag_stonith_enabled)?
"true" : "false");
fprintf(stream, " symmetric-cluster=\"%s\"",
is_set(data_set->flags, pe_flag_symmetric_cluster)?
"true" : "false");
fprintf(stream, " no-quorum-policy=\"");
switch (data_set->no_quorum_policy) {
case no_quorum_freeze:
fprintf(stream, "freeze");
break;
case no_quorum_stop:
fprintf(stream, "stop");
break;
case no_quorum_ignore:
fprintf(stream, "ignore");
break;
case no_quorum_suicide:
fprintf(stream, "suicide");
break;
}
fprintf(stream, "\"");
fprintf(stream, " maintenance-mode=\"%s\"",
is_set(data_set->flags, pe_flag_maintenance_mode)?
"true" : "false");
fprintf(stream, " />\n");
break;
default:
break;
}
}
/*!
* \internal
* \brief Get the name of the stack in use (or "unknown" if not available)
*
* \param[in] data_set Working set of CIB state
*
* \return String representing stack name
*/
static const char *
get_cluster_stack(pe_working_set_t *data_set)
{
xmlNode *stack = get_xpath_object("//nvpair[@name='cluster-infrastructure']",
data_set->input, LOG_DEBUG);
return stack? crm_element_value(stack, XML_NVPAIR_ATTR_VALUE) : "unknown";
}
/*!
* \internal
* \brief Print a summary of cluster-wide information
*
* \param[in] stream File stream to display output to
* \param[in] data_set Working set of CIB state
*/
static void
print_cluster_summary(FILE *stream, pe_working_set_t *data_set)
{
const char *stack_s = get_cluster_stack(data_set);
gboolean header_printed = FALSE;
if (show & mon_show_stack) {
if (header_printed == FALSE) {
print_cluster_summary_header(stream);
header_printed = TRUE;
}
print_cluster_stack(stream, stack_s);
}
/* Always print DC if none, even if not requested */
if ((data_set->dc_node == NULL) || (show & mon_show_dc)) {
if (header_printed == FALSE) {
print_cluster_summary_header(stream);
header_printed = TRUE;
}
print_cluster_dc(stream, data_set);
}
if (show & mon_show_times) {
if (header_printed == FALSE) {
print_cluster_summary_header(stream);
header_printed = TRUE;
}
print_cluster_times(stream, data_set);
}
if (is_set(data_set->flags, pe_flag_maintenance_mode)
|| data_set->disabled_resources
|| data_set->blocked_resources
|| is_set(show, mon_show_count)) {
if (header_printed == FALSE) {
print_cluster_summary_header(stream);
header_printed = TRUE;
}
print_cluster_counts(stream, data_set, stack_s);
}
/* There is not a separate option for showing cluster options, so show with
* stack for now; a separate option could be added if there is demand
*/
if (show & mon_show_stack) {
print_cluster_options(stream, data_set);
}
if (header_printed) {
print_cluster_summary_footer(stream);
}
}
/*!
* \internal
* \brief Print a failed action
*
* \param[in] stream File stream to display output to
* \param[in] xml_op Root of XML tree describing failed action
*/
static void
print_failed_action(FILE *stream, xmlNode *xml_op)
{
const char *op_key = crm_element_value(xml_op, XML_LRM_ATTR_TASK_KEY);
const char *op_key_attr = "op_key";
const char *last = crm_element_value(xml_op, XML_RSC_OP_LAST_CHANGE);
const char *node = crm_element_value(xml_op, XML_ATTR_UNAME);
const char *call = crm_element_value(xml_op, XML_LRM_ATTR_CALLID);
const char *exit_reason = crm_element_value(xml_op, XML_LRM_ATTR_EXIT_REASON);
int rc = crm_parse_int(crm_element_value(xml_op, XML_LRM_ATTR_RC), "0");
int status = crm_parse_int(crm_element_value(xml_op, XML_LRM_ATTR_OPSTATUS), "0");
char *exit_reason_cleaned;
/* If no op_key was given, use id instead */
if (op_key == NULL) {
op_key = ID(xml_op);
op_key_attr = "id";
}
/* If no exit reason was given, use "none" */
if (exit_reason == NULL) {
exit_reason = "none";
}
/* Print common action information */
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as("* %s on %s '%s' (%d): call=%s, status=%s, exitreason='%s'",
op_key, node, services_ocf_exitcode_str(rc), rc,
call, services_lrm_status_str(status), exit_reason);
break;
case mon_output_html:
case mon_output_cgi:
fprintf(stream, " %s on %s '%s' (%d): call=%s, status=%s, exitreason='%s'",
op_key, node, services_ocf_exitcode_str(rc), rc,
call, services_lrm_status_str(status), exit_reason);
break;
case mon_output_xml:
exit_reason_cleaned = crm_xml_escape(exit_reason);
fprintf(stream, " \n");
break;
case mon_output_xml:
fprintf(stream, " />\n");
break;
default:
break;
}
}
/*!
* \internal
* \brief Print a section for failed actions
*
* \param[in] stream File stream to display output to
* \param[in] data_set Working set of CIB state
*/
static void
print_failed_actions(FILE *stream, pe_working_set_t *data_set)
{
xmlNode *xml_op = NULL;
/* Print section heading */
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as("\nFailed Resource Actions:\n");
break;
case mon_output_html:
case mon_output_cgi:
fprintf(stream,
"
\n Failed Resource Actions
\n \n");
break;
case mon_output_xml:
fprintf(stream, " \n");
break;
default:
break;
}
/* Print each failed action */
for (xml_op = __xml_first_child(data_set->failed); xml_op != NULL;
xml_op = __xml_next(xml_op)) {
print_failed_action(stream, xml_op);
}
/* End section */
switch (output_format) {
case mon_output_html:
case mon_output_cgi:
fprintf(stream, "
\n");
break;
case mon_output_xml:
fprintf(stream, " \n");
break;
default:
break;
}
}
/*!
* \internal
* \brief Reduce the stonith-history
* for successful actions we keep the last of every action-type & target
* for failed actions we record as well who had failed
* for actions in progress we keep full track
*
* \param[in] history List of stonith actions
*
*/
static stonith_history_t *
reduce_stonith_history(stonith_history_t *history)
{
stonith_history_t *new = NULL, *hp, *np, *tmp;
for (hp = history; hp; ) {
for (np = new; np; np = np->next) {
if ((hp->state == st_done) || (hp->state == st_failed)) {
/* action not in progress */
if (safe_str_eq(hp->target, np->target) &&
safe_str_eq(hp->action, np->action) &&
(hp->state == np->state)) {
if ((hp->state == st_done) ||
safe_str_eq(hp->delegate, np->delegate)) {
/* replace or purge */
if (hp->completed < np->completed) {
/* purge older hp */
tmp = hp->next;
hp->next = NULL;
stonith_history_free(hp);
hp = tmp;
break;
}
/* damn single linked list */
free(hp->target);
free(hp->action);
free(np->origin);
np->origin = hp->origin;
free(np->delegate);
np->delegate = hp->delegate;
free(np->client);
np->client = hp->client;
np->completed = hp->completed;
tmp = hp;
hp = hp->next;
free(tmp);
break;
}
}
if (np->next) {
continue;
}
}
np = 0; /* let outer loop progress hp */
break;
}
/* simply move hp from history to new */
if (np == NULL) {
tmp = hp->next;
hp->next = new;
new = hp;
hp = tmp;
}
}
return new;
}
/*!
* \internal
* \brief Sort the stonith-history
* sort by competed most current on the top
* pending actions lacking a completed-stamp are gathered at the top
*
* \param[in] history List of stonith actions
*
*/
static stonith_history_t *
sort_stonith_history(stonith_history_t *history)
{
stonith_history_t *new = NULL, *pending = NULL, *hp, *np, *tmp;
for (hp = history; hp; ) {
tmp = hp->next;
if ((hp->state == st_done) || (hp->state == st_failed)) {
/* sort into new */
if ((!new) || (hp->completed > new->completed)) {
hp->next = new;
new = hp;
} else {
np = new;
do {
if ((!np->next) || (hp->completed > np->next->completed)) {
hp->next = np->next;
np->next = hp;
break;
}
np = np->next;
} while (1);
}
} else {
/* put into pending */
hp->next = pending;
pending = hp;
}
hp = tmp;
}
/* pending actions don't have a completed-stamp so make them go front */
if (pending) {
stonith_history_t *last_pending = pending;
while (last_pending->next) {
last_pending = last_pending->next;
}
last_pending->next = new;
new = pending;
}
return new;
}
/*!
* \internal
* \brief Print a stonith action
*
* \param[in] stream File stream to display output to
* \param[in] event stonith event
*/
static void
print_stonith_action(FILE *stream, stonith_history_t *event)
{
const char *action_s = stonith_action_str(event->action);
char *run_at_s = ctime(&event->completed);
if ((run_at_s) && (run_at_s[0] != 0)) {
run_at_s[strlen(run_at_s)-1] = 0; /* Overwrite the newline */
}
switch(output_format) {
case mon_output_xml:
fprintf(stream, " target, event->action);
switch(event->state) {
case st_done:
fprintf(stream, " state=\"success\"");
break;
case st_failed:
fprintf(stream, " state=\"failed\"");
break;
default:
fprintf(stream, " state=\"pending\"");
}
fprintf(stream, " origin=\"%s\" client=\"%s\"",
event->origin, event->client);
if (event->delegate) {
fprintf(stream, " delegate=\"%s\"", event->delegate);
}
switch(event->state) {
case st_done:
case st_failed:
fprintf(stream, " completed=\"%s\"", run_at_s?run_at_s:"");
break;
default:
break;
}
fprintf(stream, " />\n");
break;
case mon_output_plain:
case mon_output_console:
switch(event->state) {
case st_done:
print_as("* %s of %s successful: delegate=%s, client=%s, origin=%s,\n"
" %s='%s'\n",
action_s, event->target,
event->delegate ? event->delegate : "",
event->client, event->origin,
fence_full_history?"completed":"last-successful",
run_at_s?run_at_s:"");
break;
case st_failed:
print_as("* %s of %s failed: delegate=%s, client=%s, origin=%s,\n"
" %s='%s'\n",
action_s, event->target,
event->delegate ? event->delegate : "",
event->client, event->origin,
fence_full_history?"completed":"last-failed",
run_at_s?run_at_s:"");
break;
default:
print_as("* %s of %s pending: client=%s, origin=%s\n",
action_s, event->target,
event->client, event->origin);
}
break;
case mon_output_html:
case mon_output_cgi:
switch(event->state) {
case st_done:
fprintf(stream, " %s of %s successful: delegate=%s, "
"client=%s, origin=%s, %s='%s'\n",
action_s, event->target,
event->delegate ? event->delegate : "",
event->client, event->origin,
fence_full_history?"completed":"last-successful",
run_at_s?run_at_s:"");
break;
case st_failed:
fprintf(stream, " %s of %s failed: delegate=%s, "
"client=%s, origin=%s, %s='%s'\n",
action_s, event->target,
event->delegate ? event->delegate : "",
event->client, event->origin,
fence_full_history?"completed":"last-failed",
run_at_s?run_at_s:"");
break;
default:
fprintf(stream, " %s of %s pending: client=%s, "
"origin=%s\n",
action_s, event->target,
event->client, event->origin);
}
break;
default:
/* no support for fence history for other formats so far */
break;
}
}
/*!
* \internal
* \brief Print a section for failed stonith actions
*
* \param[in] stream File stream to display output to
* \param[in] history List of stonith actions
*
*/
static void
print_failed_stonith_actions(FILE *stream, stonith_history_t *history)
{
stonith_history_t *hp;
for (hp = history; hp; hp = hp->next) {
if (hp->state == st_failed) {
break;
}
}
if (!hp) {
return;
}
/* Print section heading */
switch (output_format) {
/* no need to take care of xml in here as xml gets full
* history anyway
*/
case mon_output_plain:
case mon_output_console:
print_as("\nFailed Fencing Actions:\n");
break;
case mon_output_html:
case mon_output_cgi:
fprintf(stream, "
\n Failed Fencing Actions
\n \n");
break;
default:
break;
}
/* Print each failed stonith action */
for (hp = history; hp; hp = hp->next) {
if (hp->state == st_failed) {
print_stonith_action(stream, hp);
}
}
/* End section */
switch (output_format) {
case mon_output_html:
case mon_output_cgi:
fprintf(stream, "
\n");
break;
default:
break;
}
}
/*!
* \internal
* \brief Print pending stonith actions
*
* \param[in] stream File stream to display output to
* \param[in] history List of stonith actions
*
*/
static void
print_stonith_pending(FILE *stream, stonith_history_t *history)
{
/* xml-output always shows the full history
* so we'll never have to show pending-actions
* separately
*/
if (history && (history->state != st_failed) &&
(history->state != st_done)) {
stonith_history_t *hp;
/* Print section heading */
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as("\nPending Fencing Actions:\n");
break;
case mon_output_html:
case mon_output_cgi:
fprintf(stream, "
\n Pending Fencing Actions
\n \n");
break;
default:
break;
}
for (hp = history; hp; hp = hp->next) {
if ((hp->state == st_failed) || (hp->state == st_done)) {
break;
}
print_stonith_action(stream, hp);
}
/* End section */
switch (output_format) {
case mon_output_html:
case mon_output_cgi:
fprintf(stream, "
\n");
break;
default:
break;
}
}
}
/*!
* \internal
* \brief Print a section for stonith-history
*
* \param[in] stream File stream to display output to
* \param[in] history List of stonith actions
*
*/
static void
print_stonith_history(FILE *stream, stonith_history_t *history)
{
stonith_history_t *hp;
/* Print section heading */
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as("\nFencing History:\n");
break;
case mon_output_html:
case mon_output_cgi:
fprintf(stream, "
\n Fencing History
\n \n");
break;
case mon_output_xml:
fprintf(stream, " \n");
break;
default:
break;
}
for (hp = history; hp; hp = hp->next) {
if ((hp->state != st_failed) || (output_format == mon_output_xml)) {
print_stonith_action(stream, hp);
}
}
/* End section */
switch (output_format) {
case mon_output_html:
case mon_output_cgi:
fprintf(stream, "
\n");
break;
case mon_output_xml:
fprintf(stream, " \n");
break;
default:
break;
}
}
/*!
* \internal
* \brief Print cluster status to screen
*
* This uses the global display preferences set by command-line options
* to display cluster status in a human-friendly way.
*
* \param[in] data_set Working set of CIB state
* \param[in] stonith_history List of stonith actions
*/
static void
print_status(pe_working_set_t * data_set,
stonith_history_t *stonith_history)
{
GListPtr gIter = NULL;
int print_opts = get_resource_display_options();
/* space-separated lists of node names */
char *online_nodes = NULL;
char *online_remote_nodes = NULL;
char *online_guest_nodes = NULL;
char *offline_nodes = NULL;
char *offline_remote_nodes = NULL;
if (output_format == mon_output_console) {
blank_screen();
}
print_cluster_summary(stdout, data_set);
print_as("\n");
/* Gather node information (and print if in bad state or grouping by node) */
for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
node_t *node = (node_t *) gIter->data;
const char *node_mode = NULL;
char *node_name = get_node_display_name(node);
/* Get node mode */
if (node->details->unclean) {
if (node->details->online) {
node_mode = "UNCLEAN (online)";
} else if (node->details->pending) {
node_mode = "UNCLEAN (pending)";
} else {
node_mode = "UNCLEAN (offline)";
}
} else if (node->details->pending) {
node_mode = "pending";
} else if (node->details->standby_onfail && node->details->online) {
node_mode = "standby (on-fail)";
} else if (node->details->standby) {
if (node->details->online) {
if (node->details->running_rsc) {
node_mode = "standby (with active resources)";
} else {
node_mode = "standby";
}
} else {
node_mode = "OFFLINE (standby)";
}
} else if (node->details->maintenance) {
if (node->details->online) {
node_mode = "maintenance";
} else {
node_mode = "OFFLINE (maintenance)";
}
} else if (node->details->online) {
node_mode = "online";
if (group_by_node == FALSE) {
if (pe__is_guest_node(node)) {
online_guest_nodes = add_list_element(online_guest_nodes, node_name);
} else if (pe__is_remote_node(node)) {
online_remote_nodes = add_list_element(online_remote_nodes, node_name);
} else {
online_nodes = add_list_element(online_nodes, node_name);
}
free(node_name);
continue;
}
} else {
node_mode = "OFFLINE";
if (group_by_node == FALSE) {
if (pe__is_remote_node(node)) {
offline_remote_nodes = add_list_element(offline_remote_nodes, node_name);
} else if (pe__is_guest_node(node)) {
/* ignore offline guest nodes */
} else {
offline_nodes = add_list_element(offline_nodes, node_name);
}
free(node_name);
continue;
}
}
/* If we get here, node is in bad state, or we're grouping by node */
/* Print the node name and status */
if (pe__is_guest_node(node)) {
print_as("Guest");
} else if (pe__is_remote_node(node)) {
print_as("Remote");
}
print_as("Node %s: %s\n", node_name, node_mode);
/* If we're grouping by node, print its resources */
if (group_by_node) {
if (print_brief) {
print_rscs_brief(node->details->running_rsc, "\t", print_opts | pe_print_rsconly,
stdout, FALSE);
} else {
GListPtr gIter2 = NULL;
for (gIter2 = node->details->running_rsc; gIter2 != NULL; gIter2 = gIter2->next) {
resource_t *rsc = (resource_t *) gIter2->data;
rsc->fns->print(rsc, "\t", print_opts | pe_print_rsconly, stdout);
}
}
}
free(node_name);
}
/* If we're not grouping by node, summarize nodes by status */
if (online_nodes) {
print_as("Online: [%s ]\n", online_nodes);
free(online_nodes);
}
if (offline_nodes) {
print_as("OFFLINE: [%s ]\n", offline_nodes);
free(offline_nodes);
}
if (online_remote_nodes) {
print_as("RemoteOnline: [%s ]\n", online_remote_nodes);
free(online_remote_nodes);
}
if (offline_remote_nodes) {
print_as("RemoteOFFLINE: [%s ]\n", offline_remote_nodes);
free(offline_remote_nodes);
}
if (online_guest_nodes) {
print_as("GuestOnline: [%s ]\n", online_guest_nodes);
free(online_guest_nodes);
}
/* Print resources section, if needed */
print_resources(stdout, data_set, print_opts);
/* print Node Attributes section if requested */
if (show & mon_show_attributes) {
print_node_attributes(stdout, data_set);
}
/* If requested, print resource operations (which includes failcounts)
* or just failcounts
*/
if (show & (mon_show_operations | mon_show_failcounts)) {
print_node_summary(stdout, data_set,
((show & mon_show_operations)? TRUE : FALSE));
}
/* If there were any failed actions, print them */
if (xml_has_children(data_set->failed)) {
print_failed_actions(stdout, data_set);
}
/* Print failed stonith actions */
if (fence_history) {
print_failed_stonith_actions(stdout, stonith_history);
}
/* Print tickets if requested */
if (show & mon_show_tickets) {
print_cluster_tickets(stdout, data_set);
}
/* Print negative location constraints if requested */
if (show & mon_show_bans) {
print_neg_locations(stdout, data_set);
}
/* Print stonith history */
if (fence_history) {
if (show & mon_show_fence_history) {
print_stonith_history(stdout, stonith_history);
} else {
print_stonith_pending(stdout, stonith_history);
}
}
#if CURSES_ENABLED
if (output_format == mon_output_console) {
refresh();
}
#endif
}
/*!
* \internal
* \brief Print cluster status in XML format
*
* \param[in] data_set Working set of CIB state
*/
static void
print_xml_status(pe_working_set_t * data_set,
stonith_history_t *stonith_history)
{
FILE *stream = stdout;
GListPtr gIter = NULL;
int print_opts = get_resource_display_options();
fprintf(stream, "\n");
fprintf(stream, "\n", VERSION);
print_cluster_summary(stream, data_set);
/*** NODES ***/
fprintf(stream, " \n");
for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
node_t *node = (node_t *) gIter->data;
const char *node_type = "unknown";
switch (node->details->type) {
case node_member:
node_type = "member";
break;
case node_remote:
node_type = "remote";
break;
case node_ping:
node_type = "ping";
break;
}
fprintf(stream, " details->uname);
fprintf(stream, "id=\"%s\" ", node->details->id);
fprintf(stream, "online=\"%s\" ", node->details->online ? "true" : "false");
fprintf(stream, "standby=\"%s\" ", node->details->standby ? "true" : "false");
fprintf(stream, "standby_onfail=\"%s\" ", node->details->standby_onfail ? "true" : "false");
fprintf(stream, "maintenance=\"%s\" ", node->details->maintenance ? "true" : "false");
fprintf(stream, "pending=\"%s\" ", node->details->pending ? "true" : "false");
fprintf(stream, "unclean=\"%s\" ", node->details->unclean ? "true" : "false");
fprintf(stream, "shutdown=\"%s\" ", node->details->shutdown ? "true" : "false");
fprintf(stream, "expected_up=\"%s\" ", node->details->expected_up ? "true" : "false");
fprintf(stream, "is_dc=\"%s\" ", node->details->is_dc ? "true" : "false");
fprintf(stream, "resources_running=\"%d\" ", g_list_length(node->details->running_rsc));
fprintf(stream, "type=\"%s\" ", node_type);
if (pe__is_guest_node(node)) {
fprintf(stream, "id_as_resource=\"%s\" ", node->details->remote_rsc->container->id);
}
if (group_by_node) {
GListPtr lpc2 = NULL;
fprintf(stream, ">\n");
for (lpc2 = node->details->running_rsc; lpc2 != NULL; lpc2 = lpc2->next) {
resource_t *rsc = (resource_t *) lpc2->data;
rsc->fns->print(rsc, " ", print_opts | pe_print_rsconly, stream);
}
fprintf(stream, " \n");
} else {
fprintf(stream, "/>\n");
}
}
fprintf(stream, " \n");
/* Print resources section, if needed */
print_resources(stream, data_set, print_opts);
/* print Node Attributes section if requested */
if (show & mon_show_attributes) {
print_node_attributes(stream, data_set);
}
/* If requested, print resource operations (which includes failcounts)
* or just failcounts
*/
if (show & (mon_show_operations | mon_show_failcounts)) {
print_node_summary(stream, data_set,
((show & mon_show_operations)? TRUE : FALSE));
}
/* If there were any failed actions, print them */
if (xml_has_children(data_set->failed)) {
print_failed_actions(stream, data_set);
}
/* Print stonith history */
if (fence_history) {
print_stonith_history(stdout, stonith_history);
}
/* Print tickets if requested */
if (show & mon_show_tickets) {
print_cluster_tickets(stream, data_set);
}
/* Print negative location constraints if requested */
if (show & mon_show_bans) {
print_neg_locations(stream, data_set);
}
fprintf(stream, "\n");
fflush(stream);
fclose(stream);
}
/*!
* \internal
* \brief Print cluster status in HTML format (with HTTP headers if CGI)
*
* \param[in] data_set Working set of CIB state
* \param[in] filename Name of file to write HTML to (ignored if CGI)
*
* \return 0 on success, -1 on error
*/
static int
print_html_status(pe_working_set_t * data_set,
const char *filename,
stonith_history_t *stonith_history)
{
FILE *stream;
GListPtr gIter = NULL;
char *filename_tmp = NULL;
int print_opts = get_resource_display_options();
if (output_format == mon_output_cgi) {
stream = stdout;
fprintf(stream, "Content-Type: text/html\n\n");
} else {
filename_tmp = crm_concat(filename, "tmp", '.');
stream = fopen(filename_tmp, "w");
if (stream == NULL) {
crm_perror(LOG_ERR, "Cannot open %s for writing", filename_tmp);
free(filename_tmp);
return -1;
}
}
fprintf(stream, "\n");
fprintf(stream, " \n");
fprintf(stream, " Cluster status\n");
fprintf(stream, " \n", reconnect_msec / 1000);
fprintf(stream, " \n");
fprintf(stream, "\n");
print_cluster_summary(stream, data_set);
/*** NODE LIST ***/
fprintf(stream, "
\n Node List
\n");
fprintf(stream, "\n");
for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
node_t *node = (node_t *) gIter->data;
char *node_name = get_node_display_name(node);
fprintf(stream, "- Node: %s: ", node_name);
if (node->details->standby_onfail && node->details->online) {
fprintf(stream, "standby (on-fail)\n");
} else if (node->details->standby && node->details->online) {
fprintf(stream, "standby%s\n",
node->details->running_rsc?" (with active resources)":"");
} else if (node->details->standby) {
fprintf(stream, "OFFLINE (standby)\n");
} else if (node->details->maintenance && node->details->online) {
fprintf(stream, "maintenance\n");
} else if (node->details->maintenance) {
fprintf(stream, "OFFLINE (maintenance)\n");
} else if (node->details->online) {
fprintf(stream, "online\n");
} else {
fprintf(stream, "OFFLINE\n");
}
if (print_brief && group_by_node) {
fprintf(stream, "
\n");
print_rscs_brief(node->details->running_rsc, NULL, print_opts | pe_print_rsconly,
stream, FALSE);
fprintf(stream, "
\n");
} else if (group_by_node) {
GListPtr lpc2 = NULL;
fprintf(stream, "\n");
for (lpc2 = node->details->running_rsc; lpc2 != NULL; lpc2 = lpc2->next) {
resource_t *rsc = (resource_t *) lpc2->data;
fprintf(stream, "- ");
rsc->fns->print(rsc, NULL, print_opts | pe_print_rsconly, stream);
fprintf(stream, "
\n");
}
fprintf(stream, "
\n");
}
fprintf(stream, " \n");
free(node_name);
}
fprintf(stream, "
\n");
/* Print resources section, if needed */
print_resources(stream, data_set, print_opts);
/* print Node Attributes section if requested */
if (show & mon_show_attributes) {
print_node_attributes(stream, data_set);
}
/* If requested, print resource operations (which includes failcounts)
* or just failcounts
*/
if (show & (mon_show_operations | mon_show_failcounts)) {
print_node_summary(stream, data_set,
((show & mon_show_operations)? TRUE : FALSE));
}
/* If there were any failed actions, print them */
if (xml_has_children(data_set->failed)) {
print_failed_actions(stream, data_set);
}
/* Print failed stonith actions */
if (fence_history) {
print_failed_stonith_actions(stream, stonith_history);
}
/* Print stonith history */
if (fence_history) {
if (show & mon_show_fence_history) {
print_stonith_history(stream, stonith_history);
} else {
print_stonith_pending(stdout, stonith_history);
}
}
/* Print tickets if requested */
if (show & mon_show_tickets) {
print_cluster_tickets(stream, data_set);
}
/* Print negative location constraints if requested */
if (show & mon_show_bans) {
print_neg_locations(stream, data_set);
}
fprintf(stream, "\n");
fprintf(stream, "\n");
fflush(stream);
fclose(stream);
if (output_format != mon_output_cgi) {
if (rename(filename_tmp, filename) != 0) {
crm_perror(LOG_ERR, "Unable to rename %s->%s", filename_tmp, filename);
}
free(filename_tmp);
}
return 0;
}
static int
send_custom_trap(const char *node, const char *rsc, const char *task, int target_rc, int rc,
int status, const char *desc)
{
pid_t pid;
/*setenv needs chars, these are ints */
char *rc_s = crm_itoa(rc);
char *status_s = crm_itoa(status);
char *target_rc_s = crm_itoa(target_rc);
crm_debug("Sending external notification to '%s' via '%s'", external_recipient, external_agent);
if(rsc) {
setenv("CRM_notify_rsc", rsc, 1);
}
if (external_recipient) {
setenv("CRM_notify_recipient", external_recipient, 1);
}
setenv("CRM_notify_node", node, 1);
setenv("CRM_notify_task", task, 1);
setenv("CRM_notify_desc", desc, 1);
setenv("CRM_notify_rc", rc_s, 1);
setenv("CRM_notify_target_rc", target_rc_s, 1);
setenv("CRM_notify_status", status_s, 1);
pid = fork();
if (pid == -1) {
crm_perror(LOG_ERR, "notification fork() failed.");
}
if (pid == 0) {
/* crm_debug("notification: I am the child. Executing the nofitication program."); */
execl(external_agent, external_agent, NULL);
exit(CRM_EX_ERROR);
}
crm_trace("Finished running custom notification program '%s'.", external_agent);
free(target_rc_s);
free(status_s);
free(rc_s);
return 0;
}
static void
handle_rsc_op(xmlNode * xml, const char *node_id)
{
int rc = -1;
int status = -1;
- int action = -1;
int target_rc = -1;
- int transition_num = -1;
gboolean notify = TRUE;
char *rsc = NULL;
char *task = NULL;
const char *desc = NULL;
const char *magic = NULL;
const char *id = NULL;
- char *update_te_uuid = NULL;
const char *node = NULL;
xmlNode *n = xml;
xmlNode * rsc_op = xml;
if(strcmp((const char*)xml->name, XML_LRM_TAG_RSC_OP) != 0) {
xmlNode *cIter;
for(cIter = xml->children; cIter; cIter = cIter->next) {
handle_rsc_op(cIter, node_id);
}
return;
}
id = crm_element_value(rsc_op, XML_LRM_ATTR_TASK_KEY);
if (id == NULL) {
/* Compatibility with <= 1.1.5 */
id = ID(rsc_op);
}
magic = crm_element_value(rsc_op, XML_ATTR_TRANSITION_MAGIC);
if (magic == NULL) {
/* non-change */
return;
}
- if (FALSE == decode_transition_magic(magic, &update_te_uuid, &transition_num, &action,
- &status, &rc, &target_rc)) {
+ if (!decode_transition_magic(magic, NULL, NULL, NULL, &status, &rc,
+ &target_rc)) {
crm_err("Invalid event %s detected for %s", magic, id);
return;
}
if (parse_op_key(id, &rsc, &task, NULL) == FALSE) {
crm_err("Invalid event detected for %s", id);
goto bail;
}
node = crm_element_value(rsc_op, XML_LRM_ATTR_TARGET);
while (n != NULL && safe_str_neq(XML_CIB_TAG_STATE, TYPE(n))) {
n = n->parent;
}
if(node == NULL && n) {
node = crm_element_value(n, XML_ATTR_UNAME);
}
if (node == NULL && n) {
node = ID(n);
}
if (node == NULL) {
node = node_id;
}
if (node == NULL) {
crm_err("No node detected for event %s (%s)", magic, id);
goto bail;
}
/* look up where we expected it to be? */
desc = pcmk_strerror(pcmk_ok);
if (status == PCMK_LRM_OP_DONE && target_rc == rc) {
crm_notice("%s of %s on %s completed: %s", task, rsc, node, desc);
if (rc == PCMK_OCF_NOT_RUNNING) {
notify = FALSE;
}
} else if (status == PCMK_LRM_OP_DONE) {
desc = services_ocf_exitcode_str(rc);
crm_warn("%s of %s on %s failed: %s", task, rsc, node, desc);
} else {
desc = services_lrm_status_str(status);
crm_warn("%s of %s on %s failed: %s", task, rsc, node, desc);
}
if (notify && external_agent) {
send_custom_trap(node, rsc, task, target_rc, rc, status, desc);
}
bail:
- free(update_te_uuid);
free(rsc);
free(task);
}
static gboolean
mon_trigger_refresh(gpointer user_data)
{
mainloop_set_trigger(refresh_trigger);
return FALSE;
}
#define NODE_PATT "/lrm[@id="
static char *
get_node_from_xpath(const char *xpath)
{
char *nodeid = NULL;
char *tmp = strstr(xpath, NODE_PATT);
if(tmp) {
tmp += strlen(NODE_PATT);
tmp += 1;
nodeid = strdup(tmp);
tmp = strstr(nodeid, "\'");
CRM_ASSERT(tmp);
tmp[0] = 0;
}
return nodeid;
}
static void
crm_diff_update_v2(const char *event, xmlNode * msg)
{
xmlNode *change = NULL;
xmlNode *diff = get_message_xml(msg, F_CIB_UPDATE_RESULT);
for (change = __xml_first_child(diff); change != NULL; change = __xml_next(change)) {
const char *name = NULL;
const char *op = crm_element_value(change, XML_DIFF_OP);
const char *xpath = crm_element_value(change, XML_DIFF_PATH);
xmlNode *match = NULL;
const char *node = NULL;
if(op == NULL) {
continue;
} else if(strcmp(op, "create") == 0) {
match = change->children;
} else if(strcmp(op, "move") == 0) {
continue;
} else if(strcmp(op, "delete") == 0) {
continue;
} else if(strcmp(op, "modify") == 0) {
match = first_named_child(change, XML_DIFF_RESULT);
if(match) {
match = match->children;
}
}
if(match) {
name = (const char *)match->name;
}
crm_trace("Handling %s operation for %s %p, %s", op, xpath, match, name);
if(xpath == NULL) {
/* Version field, ignore */
} else if(name == NULL) {
crm_debug("No result for %s operation to %s", op, xpath);
CRM_ASSERT(strcmp(op, "delete") == 0 || strcmp(op, "move") == 0);
} else if(strcmp(name, XML_TAG_CIB) == 0) {
xmlNode *state = NULL;
xmlNode *status = first_named_child(match, XML_CIB_TAG_STATUS);
for (state = __xml_first_child(status); state != NULL; state = __xml_next(state)) {
node = crm_element_value(state, XML_ATTR_UNAME);
if (node == NULL) {
node = ID(state);
}
handle_rsc_op(state, node);
}
} else if(strcmp(name, XML_CIB_TAG_STATUS) == 0) {
xmlNode *state = NULL;
for (state = __xml_first_child(match); state != NULL; state = __xml_next(state)) {
node = crm_element_value(state, XML_ATTR_UNAME);
if (node == NULL) {
node = ID(state);
}
handle_rsc_op(state, node);
}
} else if(strcmp(name, XML_CIB_TAG_STATE) == 0) {
node = crm_element_value(match, XML_ATTR_UNAME);
if (node == NULL) {
node = ID(match);
}
handle_rsc_op(match, node);
} else if(strcmp(name, XML_CIB_TAG_LRM) == 0) {
node = ID(match);
handle_rsc_op(match, node);
} else if(strcmp(name, XML_LRM_TAG_RESOURCES) == 0) {
char *local_node = get_node_from_xpath(xpath);
handle_rsc_op(match, local_node);
free(local_node);
} else if(strcmp(name, XML_LRM_TAG_RESOURCE) == 0) {
char *local_node = get_node_from_xpath(xpath);
handle_rsc_op(match, local_node);
free(local_node);
} else if(strcmp(name, XML_LRM_TAG_RSC_OP) == 0) {
char *local_node = get_node_from_xpath(xpath);
handle_rsc_op(match, local_node);
free(local_node);
} else {
crm_trace("Ignoring %s operation for %s %p, %s", op, xpath, match, name);
}
}
}
static void
crm_diff_update_v1(const char *event, xmlNode * msg)
{
/* Process operation updates */
xmlXPathObject *xpathObj = xpath_search(msg,
"//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED
"//" XML_LRM_TAG_RSC_OP);
int lpc = 0, max = numXpathResults(xpathObj);
for (lpc = 0; lpc < max; lpc++) {
xmlNode *rsc_op = getXpathResult(xpathObj, lpc);
handle_rsc_op(rsc_op, NULL);
}
freeXpathObject(xpathObj);
}
static void
crm_diff_update(const char *event, xmlNode * msg)
{
int rc = -1;
static bool stale = FALSE;
gboolean cib_updated = FALSE;
xmlNode *diff = get_message_xml(msg, F_CIB_UPDATE_RESULT);
print_dot();
if (current_cib != NULL) {
rc = xml_apply_patchset(current_cib, diff, TRUE);
switch (rc) {
case -pcmk_err_diff_resync:
case -pcmk_err_diff_failed:
crm_notice("[%s] Patch aborted: %s (%d)", event, pcmk_strerror(rc), rc);
free_xml(current_cib); current_cib = NULL;
break;
case pcmk_ok:
cib_updated = TRUE;
break;
default:
crm_notice("[%s] ABORTED: %s (%d)", event, pcmk_strerror(rc), rc);
free_xml(current_cib); current_cib = NULL;
}
}
if (current_cib == NULL) {
crm_trace("Re-requesting the full cib");
cib->cmds->query(cib, NULL, ¤t_cib, cib_scope_local | cib_sync_call);
}
if (external_agent) {
int format = 0;
crm_element_value_int(diff, "format", &format);
switch(format) {
case 1:
crm_diff_update_v1(event, msg);
break;
case 2:
crm_diff_update_v2(event, msg);
break;
default:
crm_err("Unknown patch format: %d", format);
}
}
if (current_cib == NULL) {
if(!stale) {
print_as("--- Stale data ---");
}
stale = TRUE;
return;
}
stale = FALSE;
kick_refresh(cib_updated);
}
static gboolean
mon_refresh_display(gpointer user_data)
{
xmlNode *cib_copy = copy_xml(current_cib);
stonith_history_t *stonith_history = NULL;
last_refresh = time(NULL);
if (cli_config_update(&cib_copy, NULL, FALSE) == FALSE) {
if (cib) {
cib->cmds->signoff(cib);
}
print_as("Upgrade failed: %s", pcmk_strerror(-pcmk_err_schema_validation));
if (output_format == mon_output_console) {
sleep(2);
}
clean_up(CRM_EX_CONFIG);
return FALSE;
}
/* get the stonith-history if there is evidence we need it
*/
while (fence_history) {
if (st != NULL) {
if (st->cmds->history(st, st_opt_sync_call, NULL, &stonith_history, 120)) {
fprintf(stderr, "Critical: Unable to get stonith-history\n");
mon_cib_connection_destroy(NULL);
} else {
if ((!fence_full_history) && (output_format != mon_output_xml)) {
stonith_history = reduce_stonith_history(stonith_history);
}
stonith_history = sort_stonith_history(stonith_history);
break; /* all other cases are errors */
}
} else {
fprintf(stderr, "Critical: No stonith-API\n");
}
free_xml(cib_copy);
print_as("Reading stonith-history failed");
if (output_format == mon_output_console) {
sleep(2);
}
return FALSE;
}
if (mon_data_set == NULL) {
mon_data_set = pe_new_working_set();
CRM_ASSERT(mon_data_set != NULL);
}
mon_data_set->input = cib_copy;
cluster_status(mon_data_set);
/* Unpack constraints if any section will need them
* (tickets may be referenced in constraints but not granted yet,
* and bans need negative location constraints) */
if (show & (mon_show_bans | mon_show_tickets)) {
xmlNode *cib_constraints = get_object_root(XML_CIB_TAG_CONSTRAINTS,
mon_data_set->input);
unpack_constraints(cib_constraints, mon_data_set);
}
switch (output_format) {
case mon_output_html:
case mon_output_cgi:
if (print_html_status(mon_data_set, output_filename, stonith_history) != 0) {
fprintf(stderr, "Critical: Unable to output html file\n");
clean_up(CRM_EX_CANTCREAT);
return FALSE;
}
break;
case mon_output_xml:
print_xml_status(mon_data_set, stonith_history);
break;
case mon_output_monitor:
print_simple_status(mon_data_set, stonith_history);
if (has_warnings) {
clean_up(MON_STATUS_WARN);
return FALSE;
}
break;
case mon_output_plain:
case mon_output_console:
print_status(mon_data_set, stonith_history);
break;
case mon_output_none:
break;
}
stonith_history_free(stonith_history);
stonith_history = NULL;
pe_reset_working_set(mon_data_set);
return TRUE;
}
static void
mon_st_callback_event(stonith_t * st, stonith_event_t * e)
{
if (st->state == stonith_disconnected) {
/* disconnect cib as well and have everything reconnect */
mon_cib_connection_destroy(NULL);
} else if (external_agent) {
char *desc = crm_strdup_printf("Operation %s requested by %s for peer %s: %s (ref=%s)",
e->operation, e->origin, e->target, pcmk_strerror(e->result),
e->id);
send_custom_trap(e->target, NULL, e->operation, pcmk_ok, e->result, 0, desc);
free(desc);
}
}
static void
kick_refresh(gboolean data_updated)
{
static int updates = 0;
long now = time(NULL);
if (data_updated) {
updates++;
}
if(refresh_timer == NULL) {
refresh_timer = mainloop_timer_add("refresh", 2000, FALSE, mon_trigger_refresh, NULL);
}
/* Refresh
* - immediately if the last update was more than 5s ago
* - every 10 cib-updates
* - at most 2s after the last update
*/
if ((now - last_refresh) > (reconnect_msec / 1000)) {
mainloop_set_trigger(refresh_trigger);
mainloop_timer_stop(refresh_timer);
updates = 0;
} else if(updates >= 10) {
mainloop_set_trigger(refresh_trigger);
mainloop_timer_stop(refresh_timer);
updates = 0;
} else {
mainloop_timer_start(refresh_timer);
}
}
static void
mon_st_callback_display(stonith_t * st, stonith_event_t * e)
{
if (st->state == stonith_disconnected) {
/* disconnect cib as well and have everything reconnect */
mon_cib_connection_destroy(NULL);
} else {
print_dot();
kick_refresh(TRUE);
}
}
static void
clean_up_connections(void)
{
if (cib != NULL) {
cib->cmds->signoff(cib);
cib_delete(cib);
cib = NULL;
}
if (st != NULL) {
if (st->state != stonith_disconnected) {
st->cmds->remove_notification(st, T_STONITH_NOTIFY_DISCONNECT);
st->cmds->remove_notification(st, T_STONITH_NOTIFY_FENCE);
st->cmds->remove_notification(st, T_STONITH_NOTIFY_HISTORY);
st->cmds->disconnect(st);
}
stonith_api_delete(st);
st = NULL;
}
}
/*
* De-init ncurses, disconnect from the CIB manager, disconnect fencing,
* deallocate memory and show usage-message if requested.
*
* We don't actually return, but nominally returning crm_exit_t allows a usage
* like "return clean_up(exit_code);" which helps static analysis understand the
* code flow.
*/
static crm_exit_t
clean_up(crm_exit_t exit_code)
{
#if CURSES_ENABLED
if (curses_console_initialized) {
output_format = mon_output_plain;
echo();
nocbreak();
endwin();
curses_console_initialized = FALSE;
}
#endif
clean_up_connections();
free(output_filename);
free(pid_file);
pe_free_working_set(mon_data_set);
mon_data_set = NULL;
if (exit_code == CRM_EX_USAGE) {
if (output_format == mon_output_cgi) {
fprintf(stdout, "Content-Type: text/plain\n"
"Status: 500\n\n");
} else {
crm_help('?', CRM_EX_USAGE);
}
}
crm_exit(exit_code);
}