Page Menu
Home
ClusterLabs Projects
Search
Configure Global Search
Log In
Files
F3686798
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
65 KB
Referenced Files
None
Subscribers
None
View Options
diff --git a/crmd/lrm.c b/crmd/lrm.c
index d2698822d1..a15fded1db 100644
--- a/crmd/lrm.c
+++ b/crmd/lrm.c
@@ -1,2093 +1,2095 @@
/*
* Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <crm/crm.h>
#include <crm/cib.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <crmd.h>
#include <crmd_fsa.h>
#include <crmd_messages.h>
#include <crmd_callbacks.h>
#include <crmd_lrm.h>
#include <lrm/lrm_api.h>
#include <lrm/raexec.h>
#define START_DELAY_THRESHOLD 5 * 60 * 1000
typedef struct resource_history_s {
char *id;
lrm_rsc_t rsc;
lrm_op_t *last;
lrm_op_t *failed;
GList *recurring_op_list;
} rsc_history_t;
struct recurring_op_s {
char *rsc_id;
char *op_key;
int call_id;
int interval;
gboolean remove;
gboolean cancelled;
};
struct pending_deletion_op_s {
char *rsc;
ha_msg_input_t *input;
};
struct delete_event_s {
int rc;
const char *rsc;
};
GHashTable *resource_history = NULL;
GHashTable *pending_ops = NULL;
GHashTable *deletion_ops = NULL;
GCHSource *lrm_source = NULL;
int num_lrm_register_fails = 0;
int max_lrm_register_fails = 30;
gboolean populate_history_cache(void);
gboolean process_lrm_event(lrm_op_t * op);
gboolean is_rsc_active(const char *rsc_id);
gboolean build_active_RAs(xmlNode * rsc_list);
static gboolean stop_recurring_actions(gpointer key, gpointer value, gpointer user_data);
static int delete_rsc_status(const char *rsc_id, int call_options, const char *user_name);
lrm_op_t *construct_op(xmlNode * rsc_op, const char *rsc_id, const char *operation);
void do_lrm_rsc_op(lrm_rsc_t * rsc, const char *operation, xmlNode * msg, xmlNode * request);
void send_direct_ack(const char *to_host, const char *to_sys,
lrm_rsc_t * rsc, lrm_op_t * op, const char *rsc_id);
void
lrm_connection_destroy(gpointer user_data)
{
if (is_set(fsa_input_register, R_LRM_CONNECTED)) {
crm_crit("LRM Connection failed");
register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL);
clear_bit_inplace(fsa_input_register, R_LRM_CONNECTED);
} else {
crm_info("LRM Connection disconnected");
}
lrm_source = NULL;
}
static void
free_deletion_op(gpointer value)
{
struct pending_deletion_op_s *op = value;
crm_free(op->rsc);
delete_ha_msg_input(op->input);
crm_free(op);
}
static void
free_recurring_op(gpointer value)
{
struct recurring_op_s *op = (struct recurring_op_s *)value;
crm_free(op->rsc_id);
crm_free(op->op_key);
crm_free(op);
}
static char *
make_stop_id(const char *rsc, int call_id)
{
char *op_id = NULL;
crm_malloc0(op_id, strlen(rsc) + 34);
if (op_id != NULL) {
snprintf(op_id, strlen(rsc) + 34, "%s:%d", rsc, call_id);
}
return op_id;
}
static void
dup_attr(gpointer key, gpointer value, gpointer user_data)
{
g_hash_table_replace(user_data, crm_strdup(key), crm_strdup(value));
}
static void
history_cache_destroy(gpointer data)
{
rsc_history_t *entry = data;
crm_free(entry->rsc.type);
crm_free(entry->rsc.class);
crm_free(entry->rsc.provider);
free_lrm_op(entry->failed);
free_lrm_op(entry->last);
crm_free(entry->id);
crm_free(entry);
}
static void
update_history_cache(lrm_rsc_t * rsc, lrm_op_t * op)
{
int target_rc = 0;
rsc_history_t *entry = NULL;
#ifdef HAVE_LRM_OP_T_RSC_DELETED
if (op->rsc_deleted) {
crm_debug("Purged history for '%s' after %s", op->rsc_id, op->op_type);
delete_rsc_status(op->rsc_id, cib_quorum_override, NULL);
return;
}
#endif
if (safe_str_eq(op->op_type, RSC_NOTIFY)) {
return;
}
crm_debug("Appending %s op to history for '%s'", op->op_type, op->rsc_id);
entry = g_hash_table_lookup(resource_history, op->rsc_id);
if (entry == NULL && rsc) {
crm_malloc0(entry, sizeof(rsc_history_t));
entry->id = crm_strdup(op->rsc_id);
g_hash_table_insert(resource_history, entry->id, entry);
entry->rsc.id = entry->id;
entry->rsc.type = crm_strdup(rsc->type);
entry->rsc.class = crm_strdup(rsc->class);
if (rsc->provider) {
entry->rsc.provider = crm_strdup(rsc->provider);
} else {
entry->rsc.provider = NULL;
}
} else if (entry == NULL) {
crm_info("Resource %s no longer exists, not updating cache", op->rsc_id);
return;
}
target_rc = rsc_op_expected_rc(op);
if (op->op_status == LRM_OP_CANCELLED) {
crm_trace("Skipping %s_%s_%d rc=%d, status=%d", op->rsc_id, op->op_type, op->interval,
op->rc, op->op_status);
} else if (did_rsc_op_fail(op, target_rc)) {
/* We must store failed monitors here
* - otherwise the block below will cause them to be forgetten them when a stop happens
*/
if (entry->failed) {
free_lrm_op(entry->failed);
}
entry->failed = copy_lrm_op(op);
} else if (op->interval == 0) {
if (entry->last) {
free_lrm_op(entry->last);
}
entry->last = copy_lrm_op(op);
}
if (op->interval > 0) {
crm_trace("Adding recurring op: %s_%s_%d", op->rsc_id, op->op_type, op->interval);
entry->recurring_op_list = g_list_prepend(entry->recurring_op_list, copy_lrm_op(op));
} else if (entry->recurring_op_list && safe_str_eq(op->op_type, RSC_STATUS) == FALSE) {
GList *gIter = entry->recurring_op_list;
crm_trace("Dropping %d recurring ops because of: %s_%s_%d",
g_list_length(gIter), op->rsc_id, op->op_type, op->interval);
for (; gIter != NULL; gIter = gIter->next) {
free_lrm_op(gIter->data);
}
g_list_free(entry->recurring_op_list);
entry->recurring_op_list = NULL;
}
}
/* A_LRM_CONNECT */
void
do_lrm_control(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
if (fsa_lrm_conn == NULL) {
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
return;
}
if (action & A_LRM_DISCONNECT) {
if (verify_stopped(cur_state, LOG_INFO) == FALSE) {
if(action == A_LRM_DISCONNECT) {
crmd_fsa_stall(NULL);
return;
}
}
if (is_set(fsa_input_register, R_LRM_CONNECTED)) {
clear_bit_inplace(fsa_input_register, R_LRM_CONNECTED);
if (lrm_source) {
G_main_del_IPC_Channel(lrm_source);
lrm_source = NULL;
}
fsa_lrm_conn->lrm_ops->signoff(fsa_lrm_conn);
crm_info("Disconnected from the LRM");
}
g_hash_table_destroy(resource_history);
resource_history = NULL;
g_hash_table_destroy(deletion_ops);
deletion_ops = NULL;
g_hash_table_destroy(pending_ops);
pending_ops = NULL;
}
if (action & A_LRM_CONNECT) {
int ret = HA_OK;
deletion_ops = g_hash_table_new_full(crm_str_hash, g_str_equal,
g_hash_destroy_str, free_deletion_op);
pending_ops = g_hash_table_new_full(crm_str_hash, g_str_equal,
g_hash_destroy_str, free_recurring_op);
resource_history = g_hash_table_new_full(crm_str_hash, g_str_equal,
NULL, history_cache_destroy);
if (ret == HA_OK) {
crm_debug("Connecting to the LRM");
ret = fsa_lrm_conn->lrm_ops->signon(fsa_lrm_conn, CRM_SYSTEM_CRMD);
}
if (ret != HA_OK) {
if (++num_lrm_register_fails < max_lrm_register_fails) {
crm_warn("Failed to sign on to the LRM %d"
" (%d max) times", num_lrm_register_fails, max_lrm_register_fails);
crm_timer_start(wait_timer);
crmd_fsa_stall(NULL);
return;
}
}
if (ret == HA_OK) {
crm_trace("LRM: set_lrm_callback...");
ret = fsa_lrm_conn->lrm_ops->set_lrm_callback(fsa_lrm_conn, lrm_op_callback);
if (ret != HA_OK) {
crm_err("Failed to set LRM callbacks");
}
}
if (ret != HA_OK) {
crm_err("Failed to sign on to the LRM %d" " (max) times", num_lrm_register_fails);
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
return;
}
populate_history_cache();
/* TODO: create a destroy handler that causes
* some recovery to happen
*/
lrm_source = G_main_add_IPC_Channel(G_PRIORITY_LOW,
fsa_lrm_conn->lrm_ops->ipcchan(fsa_lrm_conn),
FALSE, lrm_dispatch, fsa_lrm_conn,
lrm_connection_destroy);
set_bit_inplace(fsa_input_register, R_LRM_CONNECTED);
crm_debug("LRM connection established");
}
if (action & ~(A_LRM_CONNECT | A_LRM_DISCONNECT)) {
crm_err("Unexpected action %s in %s", fsa_action2string(action), __FUNCTION__);
}
}
static void
ghash_print_pending(gpointer key, gpointer value, gpointer user_data)
{
const char *stop_id = key;
int *log_level = user_data;
struct recurring_op_s *pending = value;
do_crm_log(*log_level, "Pending action: %s (%s)", stop_id, pending->op_key);
}
static void
ghash_print_pending_for_rsc(gpointer key, gpointer value, gpointer user_data)
{
const char *stop_id = key;
char *rsc = user_data;
struct recurring_op_s *pending = value;
if (safe_str_eq(rsc, pending->rsc_id)) {
crm_notice("%sction %s (%s) incomplete at shutdown",
pending->interval == 0 ? "A" : "Recurring a", stop_id, pending->op_key);
}
}
static void
ghash_count_pending(gpointer key, gpointer value, gpointer user_data)
{
int *counter = user_data;
struct recurring_op_s *pending = value;
if (pending->interval > 0) {
/* Ignore recurring actions in the shutdown calculations */
return;
}
(*counter)++;
}
gboolean
verify_stopped(enum crmd_fsa_state cur_state, int log_level)
{
int counter = 0;
gboolean rc = TRUE;
GHashTableIter gIter;
rsc_history_t *entry = NULL;
crm_debug("Checking for active resources before exit");
if (cur_state == S_TERMINATE) {
log_level = LOG_ERR;
}
if (pending_ops) {
if (is_set(fsa_input_register, R_LRM_CONNECTED)) {
/* Only log/complain about non-recurring actions */
g_hash_table_foreach_remove(pending_ops, stop_recurring_actions, NULL);
}
g_hash_table_foreach(pending_ops, ghash_count_pending, &counter);
}
if (counter > 0) {
rc = FALSE;
do_crm_log(log_level,
"%d pending LRM operations at shutdown%s",
counter, cur_state == S_TERMINATE ? "" : "... waiting");
if (cur_state == S_TERMINATE || !is_set(fsa_input_register, R_SENT_RSC_STOP)) {
g_hash_table_foreach(pending_ops, ghash_print_pending, &log_level);
}
goto bail;
}
if (resource_history == NULL) {
goto bail;
}
g_hash_table_iter_init(&gIter, resource_history);
while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) {
if (is_rsc_active(entry->id) == FALSE) {
continue;
}
crm_err("Resource %s was active at shutdown."
" You may ignore this error if it is unmanaged.", entry->id);
g_hash_table_foreach(pending_ops, ghash_print_pending_for_rsc, entry->id);
}
bail:
set_bit_inplace(fsa_input_register, R_SENT_RSC_STOP);
if (cur_state == S_TERMINATE) {
rc = TRUE;
}
return rc;
}
static char *
get_rsc_metadata(const char *type, const char *class, const char *provider)
{
char *metadata = NULL;
CRM_CHECK(type != NULL, return NULL);
CRM_CHECK(class != NULL, return NULL);
if (provider == NULL) {
provider = "heartbeat";
}
crm_trace("Retreiving metadata for %s::%s:%s", type, class, provider);
metadata = fsa_lrm_conn->lrm_ops->get_rsc_type_metadata(fsa_lrm_conn, class, type, provider);
if (metadata) {
/* copy the metadata because the LRM likes using
* g_alloc instead of cl_malloc
*/
char *m_copy = crm_strdup(metadata);
g_free(metadata);
metadata = m_copy;
} else {
crm_warn("No metadata found for %s::%s:%s", type, class, provider);
}
return metadata;
}
typedef struct reload_data_s {
char *key;
char *metadata;
time_t last_query;
gboolean can_reload;
GListPtr restart_list;
} reload_data_t;
static void
g_hash_destroy_reload(gpointer data)
{
reload_data_t *reload = data;
crm_free(reload->key);
crm_free(reload->metadata);
slist_basic_destroy(reload->restart_list);
crm_free(reload);
}
GHashTable *reload_hash = NULL;
static GListPtr
get_rsc_restart_list(lrm_rsc_t * rsc, lrm_op_t * op)
{
int len = 0;
char *key = NULL;
char *copy = NULL;
const char *value = NULL;
const char *provider = NULL;
xmlNode *param = NULL;
xmlNode *params = NULL;
xmlNode *actions = NULL;
xmlNode *metadata = NULL;
time_t now = time(NULL);
reload_data_t *reload = NULL;
if (reload_hash == NULL) {
reload_hash = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, g_hash_destroy_reload);
}
provider = rsc->provider;
if (provider == NULL) {
provider = "heartbeat";
}
len = strlen(rsc->type) + strlen(rsc->class) + strlen(provider) + 4;
crm_malloc(key, len);
snprintf(key, len, "%s::%s:%s", rsc->type, rsc->class, provider);
reload = g_hash_table_lookup(reload_hash, key);
if (reload && ((now - 9) > reload->last_query)
&& safe_str_eq(op->op_type, RSC_START)) {
reload = NULL; /* re-query */
}
if (reload == NULL) {
xmlNode *action = NULL;
crm_malloc0(reload, sizeof(reload_data_t));
g_hash_table_replace(reload_hash, key, reload);
reload->last_query = now;
reload->key = key;
key = NULL;
reload->metadata = get_rsc_metadata(rsc->type, rsc->class, provider);
metadata = string2xml(reload->metadata);
if (metadata == NULL) {
crm_err("Metadata for %s::%s:%s is not valid XML",
rsc->provider, rsc->class, rsc->type);
goto cleanup;
}
actions = find_xml_node(metadata, "actions", TRUE);
for (action = __xml_first_child(actions); action != NULL; action = __xml_next(action)) {
if (crm_str_eq((const char *)action->name, "action", TRUE)) {
value = crm_element_value(action, "name");
if (safe_str_eq("reload", value)) {
reload->can_reload = TRUE;
break;
}
}
}
if (reload->can_reload == FALSE) {
goto cleanup;
}
params = find_xml_node(metadata, "parameters", TRUE);
for (param = __xml_first_child(params); param != NULL; param = __xml_next(param)) {
if (crm_str_eq((const char *)param->name, "parameter", TRUE)) {
value = crm_element_value(param, "unique");
if (crm_is_true(value)) {
value = crm_element_value(param, "name");
if (value == NULL) {
crm_err("%s: NULL param", key);
continue;
}
crm_debug("Attr %s is not reloadable", value);
copy = crm_strdup(value);
CRM_CHECK(copy != NULL, continue);
reload->restart_list = g_list_append(reload->restart_list, copy);
}
}
}
}
cleanup:
crm_free(key);
free_xml(metadata);
return reload ? reload->restart_list : NULL;
}
static void
append_restart_list(lrm_rsc_t * rsc, lrm_op_t * op, xmlNode * update, const char *version)
{
int len = 0;
char *list = NULL;
char *digest = NULL;
const char *value = NULL;
gboolean non_empty = FALSE;
xmlNode *restart = NULL;
GListPtr restart_list = NULL;
GListPtr lpc = NULL;
if (op->interval > 0) {
/* monitors are not reloadable */
return;
} else if (op->params == NULL) {
crm_debug("%s has no parameters", ID(update));
return;
} else if (rsc == NULL) {
return;
} else if (crm_str_eq(CRMD_ACTION_STOP, op->op_type, TRUE)) {
/* Stopped resources don't need to be reloaded */
return;
} else if (compare_version("1.0.8", version) > 0) {
/* Caller version does not support reloads */
return;
}
restart_list = get_rsc_restart_list(rsc, op);
if (restart_list == NULL) {
/* Resource does not support reloads */
return;
}
restart = create_xml_node(NULL, XML_TAG_PARAMS);
for (lpc = restart_list; lpc != NULL; lpc = lpc->next) {
const char *param = (const char *)lpc->data;
int start = len;
CRM_CHECK(param != NULL, continue);
value = g_hash_table_lookup(op->params, param);
if (value != NULL) {
non_empty = TRUE;
crm_xml_add(restart, param, value);
}
len += strlen(param) + 2;
crm_realloc(list, len + 1);
sprintf(list + start, " %s ", param);
}
digest = calculate_operation_digest(restart, version);
crm_xml_add(update, XML_LRM_ATTR_OP_RESTART, list);
crm_xml_add(update, XML_LRM_ATTR_RESTART_DIGEST, digest);
#if 0
crm_debug("%s: %s, %s", rsc->id, digest, list);
if (non_empty) {
crm_log_xml_debug(restart, "restart digest source");
}
#endif
free_xml(restart);
crm_free(digest);
crm_free(list);
}
static gboolean
build_operation_update(xmlNode * parent, lrm_rsc_t * rsc, lrm_op_t * op, const char *src)
{
int target_rc = 0;
xmlNode *xml_op = NULL;
const char *caller_version = CRM_FEATURE_SET;
if (op == NULL) {
return FALSE;
} else if (AM_I_DC) {
} else if (fsa_our_dc_version != NULL) {
caller_version = fsa_our_dc_version;
} else if (op->params == NULL) {
caller_version = fsa_our_dc_version;
} else {
/* there is a small risk in formerly mixed clusters that
* it will be sub-optimal.
* however with our upgrade policy, the update we send
* should still be completely supported anyway
*/
caller_version = g_hash_table_lookup(op->params, XML_ATTR_CRM_VERSION);
crm_warn("Falling back to operation originator version: %s", caller_version);
}
target_rc = rsc_op_expected_rc(op);
xml_op = create_operation_update(parent, op, caller_version, target_rc, src, LOG_DEBUG);
if (xml_op) {
append_restart_list(rsc, op, xml_op, caller_version);
}
return TRUE;
}
gboolean
is_rsc_active(const char *rsc_id)
{
rsc_history_t *entry = NULL;
crm_trace("Processing lrm_rsc_t entry %s", rsc_id);
entry = g_hash_table_lookup(resource_history, rsc_id);
if (entry == NULL || entry->last == NULL) {
return FALSE;
}
if (entry->last->rc == EXECRA_OK && safe_str_eq(entry->last->op_type, CRMD_ACTION_STOP)) {
return FALSE;
} else if (entry->last->rc == EXECRA_OK
&& safe_str_eq(entry->last->op_type, CRMD_ACTION_MIGRATE)) {
/* a stricter check is too complex...
* leave that to the PE
*/
return FALSE;
} else if (entry->last->rc == EXECRA_NOT_RUNNING) {
return FALSE;
} else if (entry->last->interval == 0 && entry->last->rc == EXECRA_NOT_CONFIGURED) {
/* Badly configured resources can't be reliably stopped */
return FALSE;
}
return TRUE;
}
gboolean
build_active_RAs(xmlNode * rsc_list)
{
GHashTableIter iter;
rsc_history_t *entry = NULL;
g_hash_table_iter_init(&iter, resource_history);
while (g_hash_table_iter_next(&iter, NULL, (void **)&entry)) {
GList *gIter = NULL;
xmlNode *xml_rsc = create_xml_node(rsc_list, XML_LRM_TAG_RESOURCE);
crm_xml_add(xml_rsc, XML_ATTR_ID, entry->id);
crm_xml_add(xml_rsc, XML_ATTR_TYPE, entry->rsc.type);
crm_xml_add(xml_rsc, XML_AGENT_ATTR_CLASS, entry->rsc.class);
crm_xml_add(xml_rsc, XML_AGENT_ATTR_PROVIDER, entry->rsc.provider);
build_operation_update(xml_rsc, &(entry->rsc), entry->last, __FUNCTION__);
build_operation_update(xml_rsc, &(entry->rsc), entry->failed, __FUNCTION__);
for (gIter = entry->recurring_op_list; gIter != NULL; gIter = gIter->next) {
build_operation_update(xml_rsc, &(entry->rsc), gIter->data, __FUNCTION__);
}
}
return FALSE;
}
gboolean
populate_history_cache(void)
{
GListPtr gIter = NULL;
GListPtr gIter2 = NULL;
GList *op_list = NULL;
GList *rsc_list = NULL;
state_flag_t cur_state = 0;
rsc_list = fsa_lrm_conn->lrm_ops->get_all_rscs(fsa_lrm_conn);
for (gIter = rsc_list; gIter != NULL; gIter = gIter->next) {
char *rid = (char *)gIter->data;
int max_call_id = -1;
lrm_rsc_t *rsc = fsa_lrm_conn->lrm_ops->get_rsc(fsa_lrm_conn, rid);
if (rsc == NULL) {
crm_err("NULL resource returned from the LRM: %s", rid);
continue;
}
op_list = rsc->ops->get_cur_state(rsc, &cur_state);
for (gIter2 = op_list; gIter2 != NULL; gIter2 = gIter2->next) {
lrm_op_t *op = (lrm_op_t *) gIter2->data;
if (max_call_id < op->call_id) {
update_history_cache(rsc, op);
} else if (max_call_id > op->call_id) {
crm_err("Bad call_id in list=%d. Previous call_id=%d", op->call_id, max_call_id);
} else {
crm_warn("lrm->get_cur_state() returned"
" duplicate entries for call_id=%d", op->call_id);
}
max_call_id = op->call_id;
lrm_free_op(op);
}
g_list_free(op_list);
lrm_free_rsc(rsc);
free(rid);
}
g_list_free(rsc_list);
return TRUE;
}
xmlNode *
do_lrm_query(gboolean is_replace)
{
gboolean shut_down = FALSE;
xmlNode *xml_result = NULL;
xmlNode *xml_state = NULL;
xmlNode *xml_data = NULL;
xmlNode *rsc_list = NULL;
const char *exp_state = CRMD_STATE_ACTIVE;
if (is_set(fsa_input_register, R_SHUTDOWN)) {
exp_state = CRMD_STATE_INACTIVE;
shut_down = TRUE;
}
xml_state = create_node_state(fsa_our_uname, ACTIVESTATUS, XML_BOOLEAN_TRUE,
ONLINESTATUS, CRMD_JOINSTATE_MEMBER, exp_state,
!shut_down, __FUNCTION__);
xml_data = create_xml_node(xml_state, XML_CIB_TAG_LRM);
crm_xml_add(xml_data, XML_ATTR_ID, fsa_our_uuid);
rsc_list = create_xml_node(xml_data, XML_LRM_TAG_RESOURCES);
/* Build a list of active (not always running) resources */
build_active_RAs(rsc_list);
xml_result = create_cib_fragment(xml_state, XML_CIB_TAG_STATUS);
crm_log_xml_trace(xml_state, "Current state of the LRM");
free_xml(xml_state);
return xml_result;
}
static void
notify_deleted(ha_msg_input_t * input, const char *rsc_id, int rc)
{
lrm_op_t *op = NULL;
const char *from_sys = crm_element_value(input->msg, F_CRM_SYS_FROM);
const char *from_host = crm_element_value(input->msg, F_CRM_HOST_FROM);
crm_info("Notifying %s on %s that %s was%s deleted",
from_sys, from_host, rsc_id, rc == HA_OK ? "" : " not");
op = construct_op(input->xml, rsc_id, CRMD_ACTION_DELETE);
CRM_ASSERT(op != NULL);
if (rc == HA_OK) {
op->op_status = LRM_OP_DONE;
op->rc = EXECRA_OK;
} else {
op->op_status = LRM_OP_ERROR;
op->rc = EXECRA_UNKNOWN_ERROR;
}
send_direct_ack(from_host, from_sys, NULL, op, rsc_id);
free_lrm_op(op);
if (safe_str_neq(from_sys, CRM_SYSTEM_TENGINE)) {
/* this isn't expected - trigger a new transition */
time_t now = time(NULL);
char *now_s = crm_itoa(now);
crm_debug("Triggering a refresh after %s deleted %s from the LRM", from_sys, rsc_id);
update_attr(fsa_cib_conn, cib_none, XML_CIB_TAG_CRMCONFIG,
NULL, NULL, NULL, NULL, "last-lrm-refresh", now_s, FALSE);
crm_free(now_s);
}
}
static gboolean
lrm_remove_deleted_rsc(gpointer key, gpointer value, gpointer user_data)
{
struct delete_event_s *event = user_data;
struct pending_deletion_op_s *op = value;
if (safe_str_eq(event->rsc, op->rsc)) {
notify_deleted(op->input, event->rsc, event->rc);
return TRUE;
}
return FALSE;
}
static gboolean
lrm_remove_deleted_op(gpointer key, gpointer value, gpointer user_data)
{
const char *rsc = user_data;
struct recurring_op_s *pending = value;
if (safe_str_eq(rsc, pending->rsc_id)) {
crm_info("Removing op %s:%d for deleted resource %s",
pending->op_key, pending->call_id, rsc);
return TRUE;
}
return FALSE;
}
/*
* Remove the rsc from the CIB
*
* Avoids refreshing the entire LRM section of this host
*/
#define rsc_template "//"XML_CIB_TAG_STATE"[@uname='%s']//"XML_LRM_TAG_RESOURCE"[@id='%s']"
static int
delete_rsc_status(const char *rsc_id, int call_options, const char *user_name)
{
char *rsc_xpath = NULL;
int max = 0;
int rc = cib_ok;
CRM_CHECK(rsc_id != NULL, return cib_id_check);
max = strlen(rsc_template) + strlen(rsc_id) + strlen(fsa_our_uname) + 1;
crm_malloc0(rsc_xpath, max);
snprintf(rsc_xpath, max, rsc_template, fsa_our_uname, rsc_id);
rc = fsa_cib_conn->cmds->delegated_variant_op(fsa_cib_conn, CIB_OP_DELETE, NULL, rsc_xpath,
NULL, NULL, call_options | cib_xpath, user_name);
crm_free(rsc_xpath);
return rc;
}
static void
delete_rsc_entry(ha_msg_input_t * input, const char *rsc_id, GHashTableIter *rsc_gIter, int rc, const char *user_name)
{
struct delete_event_s event;
CRM_CHECK(rsc_id != NULL, return);
if (rc == HA_OK) {
char *rsc_id_copy = crm_strdup(rsc_id);
if (rsc_gIter)
g_hash_table_iter_remove(rsc_gIter);
else
g_hash_table_remove(resource_history, rsc_id_copy);
crm_debug("sync: Sending delete op for %s", rsc_id_copy);
delete_rsc_status(rsc_id_copy, cib_quorum_override, user_name);
g_hash_table_foreach_remove(pending_ops, lrm_remove_deleted_op, rsc_id_copy);
crm_free(rsc_id_copy);
}
if (input) {
notify_deleted(input, rsc_id, rc);
}
event.rc = rc;
event.rsc = rsc_id;
g_hash_table_foreach_remove(deletion_ops, lrm_remove_deleted_rsc, &event);
}
/*
* Remove the op from the CIB
*
* Avoids refreshing the entire LRM section of this host
*/
#define op_template "//"XML_CIB_TAG_STATE"[@uname='%s']//"XML_LRM_TAG_RESOURCE"[@id='%s']/"XML_LRM_TAG_RSC_OP"[@id='%s']"
#define op_call_template "//"XML_CIB_TAG_STATE"[@uname='%s']//"XML_LRM_TAG_RESOURCE"[@id='%s']/"XML_LRM_TAG_RSC_OP"[@id='%s' and @"XML_LRM_ATTR_CALLID"='%d']"
static void
delete_op_entry(lrm_op_t * op, const char *rsc_id, const char *key, int call_id)
{
xmlNode *xml_top = NULL;
if (op != NULL) {
xml_top = create_xml_node(NULL, XML_LRM_TAG_RSC_OP);
crm_xml_add_int(xml_top, XML_LRM_ATTR_CALLID, op->call_id);
crm_xml_add(xml_top, XML_ATTR_TRANSITION_KEY, op->user_data);
if(op->interval > 0) {
+ char *op_id = generate_op_key(op->rsc_id, op->op_type, op->interval);
/* Avoid deleting last_failure too (if it was a result of this recurring op failing) */
- crm_xml_add(xml_top, XML_ATTR_ID, op->user_data);
+ crm_xml_add(xml_top, XML_ATTR_ID, op_id);
+ crm_free(op_id);
}
crm_debug("async: Sending delete op for %s_%s_%d (call=%d)",
op->rsc_id, op->op_type, op->interval, op->call_id);
fsa_cib_conn->cmds->delete(fsa_cib_conn, XML_CIB_TAG_STATUS, xml_top, cib_quorum_override);
} else if (rsc_id != NULL && key != NULL) {
int max = 0;
char *op_xpath = NULL;
if (call_id > 0) {
max =
strlen(op_call_template) + strlen(rsc_id) + strlen(fsa_our_uname) + strlen(key) +
10;
crm_malloc0(op_xpath, max);
snprintf(op_xpath, max, op_call_template, fsa_our_uname, rsc_id, key, call_id);
} else {
max = strlen(op_template) + strlen(rsc_id) + strlen(fsa_our_uname) + strlen(key) + 1;
crm_malloc0(op_xpath, max);
snprintf(op_xpath, max, op_template, fsa_our_uname, rsc_id, key);
}
crm_debug("sync: Sending delete op for %s (call=%d)", rsc_id, call_id);
fsa_cib_conn->cmds->delete(fsa_cib_conn, op_xpath, NULL, cib_quorum_override | cib_xpath);
crm_free(op_xpath);
} else {
crm_err("Not enough information to delete op entry: rsc=%p key=%p", rsc_id, key);
return;
}
crm_log_xml_trace(xml_top, "op:cancel");
free_xml(xml_top);
}
void lrm_clear_last_failure(const char *rsc_id)
{
char *attr = NULL;
GHashTableIter iter;
rsc_history_t *entry = NULL;
attr = generate_op_key(rsc_id, "last_failure", 0);
delete_op_entry(NULL, rsc_id, attr, 0);
crm_free(attr);
if (!resource_history) {
return;
}
g_hash_table_iter_init(&iter, resource_history);
while (g_hash_table_iter_next(&iter, NULL, (void **)&entry)) {
if (safe_str_eq(rsc_id, entry->id)) {
free_lrm_op(entry->failed);
entry->failed = NULL;
}
}
}
static gboolean
cancel_op(lrm_rsc_t * rsc, const char *key, int op, gboolean remove)
{
int rc = HA_OK;
struct recurring_op_s *pending = NULL;
CRM_CHECK(op != 0, return FALSE);
CRM_CHECK(rsc != NULL, return FALSE);
if (key == NULL) {
key = make_stop_id(rsc->id, op);
}
pending = g_hash_table_lookup(pending_ops, key);
if (pending) {
if (remove && pending->remove == FALSE) {
pending->remove = TRUE;
crm_debug("Scheduling %s for removal", key);
}
if (pending->cancelled) {
crm_debug("Operation %s already cancelled", key);
return TRUE;
}
pending->cancelled = TRUE;
} else {
crm_info("No pending op found for %s", key);
}
crm_debug("Cancelling op %d for %s (%s)", op, rsc->id, key);
rc = rsc->ops->cancel_op(rsc, op);
if (rc == HA_OK) {
crm_debug("Op %d for %s (%s): cancelled", op, rsc->id, key);
#ifdef HAVE_LRM_OP_T_RSC_DELETED
} else if (rc == HA_RSCBUSY) {
crm_debug("Op %d for %s (%s): cancelation pending", op, rsc->id, key);
#endif
} else {
crm_debug("Op %d for %s (%s): Nothing to cancel", op, rsc->id, key);
/* The caller needs to make sure the entry is
* removed from the pending_ops list
*
* Usually by returning TRUE inside the worker function
* supplied to g_hash_table_foreach_remove()
*
* Not removing the entry from pending_ops will block
* the node from shutting down
*/
return FALSE;
}
return TRUE;
}
struct cancel_data {
gboolean done;
gboolean remove;
const char *key;
lrm_rsc_t *rsc;
};
static gboolean
cancel_action_by_key(gpointer key, gpointer value, gpointer user_data)
{
struct cancel_data *data = user_data;
struct recurring_op_s *op = (struct recurring_op_s *)value;
if (safe_str_eq(op->op_key, data->key)) {
data->done = TRUE;
if (cancel_op(data->rsc, key, op->call_id, data->remove) == FALSE) {
return TRUE;
}
}
return FALSE;
}
static gboolean
cancel_op_key(lrm_rsc_t * rsc, const char *key, gboolean remove)
{
struct cancel_data data;
CRM_CHECK(rsc != NULL, return FALSE);
CRM_CHECK(key != NULL, return FALSE);
data.key = key;
data.rsc = rsc;
data.done = FALSE;
data.remove = remove;
g_hash_table_foreach_remove(pending_ops, cancel_action_by_key, &data);
return data.done;
}
static lrm_rsc_t *
get_lrm_resource(xmlNode * resource, xmlNode * op_msg, gboolean do_create)
{
char rid[RID_LEN];
lrm_rsc_t *rsc = NULL;
const char *short_id = ID(resource);
const char *long_id = crm_element_value(resource, XML_ATTR_ID_LONG);
crm_trace("Retrieving %s from the LRM.", short_id);
CRM_CHECK(short_id != NULL, return NULL);
if (rsc == NULL) {
/* check if its already there (short name) */
strncpy(rid, short_id, RID_LEN);
rid[RID_LEN - 1] = 0;
rsc = fsa_lrm_conn->lrm_ops->get_rsc(fsa_lrm_conn, rid);
}
if (rsc == NULL && long_id != NULL) {
/* try the long name instead */
strncpy(rid, long_id, RID_LEN);
rid[RID_LEN - 1] = 0;
rsc = fsa_lrm_conn->lrm_ops->get_rsc(fsa_lrm_conn, rid);
}
if (rsc == NULL && do_create) {
/* add it to the LRM */
const char *type = crm_element_value(resource, XML_ATTR_TYPE);
const char *class = crm_element_value(resource, XML_AGENT_ATTR_CLASS);
const char *provider = crm_element_value(resource, XML_AGENT_ATTR_PROVIDER);
GHashTable *params = xml2list(op_msg);
CRM_CHECK(class != NULL, return NULL);
CRM_CHECK(type != NULL, return NULL);
crm_trace("Adding rsc %s before operation", short_id);
strncpy(rid, short_id, RID_LEN);
rid[RID_LEN - 1] = 0;
if (g_hash_table_size(params) == 0) {
crm_log_xml_warn(op_msg, "EmptyParams");
}
if (params != NULL) {
g_hash_table_remove(params, CRM_META "_op_target_rc");
}
fsa_lrm_conn->lrm_ops->add_rsc(fsa_lrm_conn, rid, class, type, provider, params);
rsc = fsa_lrm_conn->lrm_ops->get_rsc(fsa_lrm_conn, rid);
g_hash_table_destroy(params);
if (rsc == NULL) {
fsa_data_t *msg_data = NULL;
crm_err("Could not add resource %s to LRM", rid);
register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
}
}
return rsc;
}
static void
delete_resource(const char *id, lrm_rsc_t * rsc, GHashTableIter *gIter,
const char *sys, const char *host, const char *user, ha_msg_input_t * request)
{
int rc = HA_OK;
crm_info("Removing resource %s for %s (%s) on %s", id, sys, user ? user : "internal", host);
if (rsc) {
rc = fsa_lrm_conn->lrm_ops->delete_rsc(fsa_lrm_conn, id);
}
if (rc == HA_OK) {
crm_trace("Resource '%s' deleted", id);
#ifdef HAVE_LRM_OP_T_RSC_DELETED
} else if (rc == HA_RSCBUSY) {
crm_info("Deletion of resource '%s' pending", id);
if (request) {
struct pending_deletion_op_s *op = NULL;
char *ref = crm_element_value_copy(request->msg, XML_ATTR_REFERENCE);
crm_malloc0(op, sizeof(struct pending_deletion_op_s));
op->rsc = crm_strdup(rsc->id);
op->input = copy_ha_msg_input(request);
g_hash_table_insert(deletion_ops, ref, op);
}
return;
#endif
} else {
crm_warn("Deletion of resource '%s' for %s (%s) on %s failed: %d",
id, sys, user ? user : "internal", host, rc);
}
delete_rsc_entry(request, id, gIter, rc, user);
}
/* A_LRM_INVOKE */
void
do_lrm_invoke(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
gboolean done = FALSE;
gboolean create_rsc = TRUE;
const char *crm_op = NULL;
const char *from_sys = NULL;
const char *from_host = NULL;
const char *operation = NULL;
ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg);
const char *user_name = NULL;
#if ENABLE_ACL
user_name = crm_element_value(input->msg, F_CRM_USER);
crm_trace("LRM command from user '%s'", user_name);
#endif
crm_op = crm_element_value(input->msg, F_CRM_TASK);
from_sys = crm_element_value(input->msg, F_CRM_SYS_FROM);
if (safe_str_neq(from_sys, CRM_SYSTEM_TENGINE)) {
from_host = crm_element_value(input->msg, F_CRM_HOST_FROM);
}
crm_trace("LRM command from: %s", from_sys);
if (safe_str_eq(crm_op, CRM_OP_LRM_DELETE)) {
operation = CRMD_ACTION_DELETE;
} else if (safe_str_eq(operation, CRM_OP_LRM_REFRESH)) {
crm_op = CRM_OP_LRM_REFRESH;
} else if (safe_str_eq(crm_op, CRM_OP_LRM_FAIL)) {
#if HAVE_STRUCT_LRM_OPS_FAIL_RSC
int rc = HA_OK;
lrm_op_t *op = NULL;
lrm_rsc_t *rsc = NULL;
xmlNode *xml_rsc = find_xml_node(input->xml, XML_CIB_TAG_RESOURCE, TRUE);
CRM_CHECK(xml_rsc != NULL, return);
op = construct_op(input->xml, ID(xml_rsc), "fail");
op->op_status = LRM_OP_ERROR;
op->rc = EXECRA_UNKNOWN_ERROR;
CRM_ASSERT(op != NULL);
# if ENABLE_ACL
if (user_name && is_privileged(user_name) == FALSE) {
crm_err("%s does not have permission to fail %s", user_name, ID(xml_rsc));
send_direct_ack(from_host, from_sys, NULL, op, ID(xml_rsc));
free_lrm_op(op);
return;
}
# endif
rsc = get_lrm_resource(xml_rsc, input->xml, create_rsc);
if (rsc) {
crm_info("Failing resource %s...", rsc->id);
rc = fsa_lrm_conn->lrm_ops->fail_rsc(fsa_lrm_conn, rsc->id, 1,
"do_lrm_invoke: Async failure");
if (rc != HA_OK) {
crm_err("Could not initiate an asynchronous failure for %s (%d)", rsc->id, rc);
} else {
op->op_status = LRM_OP_DONE;
op->rc = EXECRA_OK;
}
lrm_free_rsc(rsc);
} else {
crm_info("Cannot find/create resource in order to fail it...");
crm_log_xml_warn(input->msg, "bad input");
}
send_direct_ack(from_host, from_sys, NULL, op, ID(xml_rsc));
free_lrm_op(op);
return;
#else
crm_info("Failing resource...");
operation = "fail";
#endif
} else if (input->xml != NULL) {
operation = crm_element_value(input->xml, XML_LRM_ATTR_TASK);
}
if (safe_str_eq(crm_op, CRM_OP_LRM_REFRESH)) {
enum cib_errors rc = cib_ok;
xmlNode *fragment = do_lrm_query(TRUE);
crm_info("Forcing a local LRM refresh");
fsa_cib_update(XML_CIB_TAG_STATUS, fragment, cib_quorum_override, rc, user_name);
free_xml(fragment);
} else if (safe_str_eq(crm_op, CRM_OP_LRM_QUERY)) {
xmlNode *data = do_lrm_query(FALSE);
xmlNode *reply = create_reply(input->msg, data);
if (relay_message(reply, TRUE) == FALSE) {
crm_err("Unable to route reply");
crm_log_xml_err(reply, "reply");
}
free_xml(reply);
free_xml(data);
} else if (safe_str_eq(operation, CRM_OP_PROBED)) {
update_attrd(NULL, CRM_OP_PROBED, XML_BOOLEAN_TRUE, user_name);
} else if (safe_str_eq(crm_op, CRM_OP_REPROBE)) {
GHashTableIter gIter;
rsc_history_t *entry = NULL;
crm_notice("Forcing the status of all resources to be redetected");
g_hash_table_iter_init(&gIter, resource_history);
while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) {
lrm_rsc_t *rsc = fsa_lrm_conn->lrm_ops->get_rsc(fsa_lrm_conn, entry->id);
delete_resource(entry->id, rsc, &gIter, from_sys, from_host, user_name, NULL);
lrm_free_rsc(rsc);
}
/* Now delete the copy in the CIB */
erase_status_tag(fsa_our_uname, XML_CIB_TAG_LRM, cib_scope_local);
/* And finally, _delete_ the value in attrd
* Setting it to FALSE results in the PE sending us back here again
*/
update_attrd(NULL, CRM_OP_PROBED, NULL, user_name);
} else if (operation != NULL) {
lrm_rsc_t *rsc = NULL;
xmlNode *params = NULL;
xmlNode *xml_rsc = find_xml_node(input->xml, XML_CIB_TAG_RESOURCE, TRUE);
CRM_CHECK(xml_rsc != NULL, return);
/* only the first 16 chars are used by the LRM */
params = find_xml_node(input->xml, XML_TAG_ATTRS, TRUE);
if (safe_str_eq(operation, CRMD_ACTION_DELETE)) {
create_rsc = FALSE;
}
rsc = get_lrm_resource(xml_rsc, input->xml, create_rsc);
if (rsc == NULL && create_rsc) {
crm_err("Invalid resource definition");
crm_log_xml_warn(input->msg, "bad input");
} else if (rsc == NULL) {
lrm_op_t *op = NULL;
crm_notice("Not creating resource for a %s event: %s", operation, ID(input->xml));
delete_rsc_entry(input, ID(xml_rsc), NULL, HA_OK, user_name);
op = construct_op(input->xml, ID(xml_rsc), operation);
op->op_status = LRM_OP_DONE;
op->rc = EXECRA_OK;
CRM_ASSERT(op != NULL);
send_direct_ack(from_host, from_sys, NULL, op, ID(xml_rsc));
free_lrm_op(op);
} else if (safe_str_eq(operation, CRMD_ACTION_CANCEL)) {
lrm_op_t *op = NULL;
char *op_key = NULL;
char *meta_key = NULL;
int call = 0;
const char *call_id = NULL;
const char *op_task = NULL;
const char *op_interval = NULL;
CRM_CHECK(params != NULL, crm_log_xml_warn(input->xml, "Bad command");
return);
meta_key = crm_meta_name(XML_LRM_ATTR_INTERVAL);
op_interval = crm_element_value(params, meta_key);
crm_free(meta_key);
meta_key = crm_meta_name(XML_LRM_ATTR_TASK);
op_task = crm_element_value(params, meta_key);
crm_free(meta_key);
meta_key = crm_meta_name(XML_LRM_ATTR_CALLID);
call_id = crm_element_value(params, meta_key);
crm_free(meta_key);
CRM_CHECK(op_task != NULL, crm_log_xml_warn(input->xml, "Bad command");
return);
CRM_CHECK(op_interval != NULL, crm_log_xml_warn(input->xml, "Bad command");
return);
op = construct_op(input->xml, rsc->id, op_task);
CRM_ASSERT(op != NULL);
op_key = generate_op_key(rsc->id, op_task, crm_parse_int(op_interval, "0"));
crm_debug("PE requested op %s (call=%s) be cancelled",
op_key, call_id ? call_id : "NA");
call = crm_parse_int(call_id, "0");
if (call == 0) {
/* the normal case when the PE cancels a recurring op */
done = cancel_op_key(rsc, op_key, TRUE);
} else {
/* the normal case when the PE cancels an orphan op */
done = cancel_op(rsc, NULL, call, TRUE);
}
if (done == FALSE) {
crm_debug("Nothing known about operation %d for %s", call, op_key);
delete_op_entry(NULL, rsc->id, op_key, call);
/* needed?? surely not otherwise the cancel_op_(_key) wouldn't
* have failed in the first place
*/
g_hash_table_remove(pending_ops, op_key);
}
op->rc = EXECRA_OK;
op->op_status = LRM_OP_DONE;
send_direct_ack(from_host, from_sys, rsc, op, rsc->id);
crm_free(op_key);
free_lrm_op(op);
} else if (safe_str_eq(operation, CRMD_ACTION_DELETE)) {
int cib_rc = cib_ok;
CRM_ASSERT(rsc != NULL);
cib_rc = delete_rsc_status(rsc->id, cib_dryrun | cib_sync_call, user_name);
if (cib_rc != cib_ok) {
lrm_op_t *op = NULL;
crm_err
("Attempt of deleting resource status '%s' from CIB for %s (user=%s) on %s failed: (rc=%d) %s",
rsc->id, from_sys, user_name ? user_name : "unknown", from_host, cib_rc,
cib_error2string(cib_rc));
op = construct_op(input->xml, rsc->id, operation);
op->op_status = LRM_OP_ERROR;
if (cib_rc == cib_permission_denied) {
op->rc = EXECRA_INSUFFICIENT_PRIV;
} else {
op->rc = EXECRA_UNKNOWN_ERROR;
}
send_direct_ack(from_host, from_sys, NULL, op, rsc->id);
free_lrm_op(op);
return;
}
delete_resource(rsc->id, rsc, NULL, from_sys, from_host, user_name, input);
} else if (rsc != NULL) {
do_lrm_rsc_op(rsc, operation, input->xml, input->msg);
}
lrm_free_rsc(rsc);
} else {
crm_err("Operation was neither a lrm_query, nor a rsc op. %s", crm_str(crm_op));
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
}
}
static void
copy_meta_keys(gpointer key, gpointer value, gpointer user_data)
{
if (strstr(key, CRM_META "_") != NULL) {
g_hash_table_insert(user_data, strdup((const char *)key), strdup((const char *)value));
}
}
lrm_op_t *
construct_op(xmlNode * rsc_op, const char *rsc_id, const char *operation)
{
lrm_op_t *op = NULL;
const char *op_delay = NULL;
const char *op_timeout = NULL;
const char *op_interval = NULL;
GHashTable *params = NULL;
const char *transition = NULL;
CRM_LOG_ASSERT(rsc_id != NULL);
crm_malloc0(op, sizeof(lrm_op_t));
op->op_type = crm_strdup(operation);
op->op_status = LRM_OP_PENDING;
op->rc = -1;
op->rsc_id = crm_strdup(rsc_id);
op->interval = 0;
op->timeout = 0;
op->start_delay = 0;
op->copyparams = 0;
op->app_name = crm_strdup(CRM_SYSTEM_CRMD);
if (rsc_op == NULL) {
CRM_LOG_ASSERT(safe_str_eq(CRMD_ACTION_STOP, operation));
op->user_data = NULL;
op->user_data_len = 0;
/* the stop_all_resources() case
* by definition there is no DC (or they'd be shutting
* us down).
* So we should put our version here.
*/
op->params = g_hash_table_new_full(crm_str_hash, g_str_equal,
g_hash_destroy_str, g_hash_destroy_str);
g_hash_table_insert(op->params,
crm_strdup(XML_ATTR_CRM_VERSION), crm_strdup(CRM_FEATURE_SET));
crm_trace("Constructed %s op for %s", operation, rsc_id);
return op;
}
params = xml2list(rsc_op);
g_hash_table_remove(params, CRM_META "_op_target_rc");
op_delay = crm_meta_value(params, XML_OP_ATTR_START_DELAY);
op_timeout = crm_meta_value(params, XML_ATTR_TIMEOUT);
op_interval = crm_meta_value(params, XML_LRM_ATTR_INTERVAL);
op->interval = crm_parse_int(op_interval, "0");
op->timeout = crm_parse_int(op_timeout, "0");
op->start_delay = crm_parse_int(op_delay, "0");
if (safe_str_neq(operation, RSC_STOP)) {
op->params = params;
} else {
/* Create a blank parameter list so that we stop the resource
* with the old attributes, not the new ones
*/
const char *version = g_hash_table_lookup(params, XML_ATTR_CRM_VERSION);
op->params = g_hash_table_new_full(crm_str_hash, g_str_equal,
g_hash_destroy_str, g_hash_destroy_str);
if (version) {
g_hash_table_insert(op->params, crm_strdup(XML_ATTR_CRM_VERSION), crm_strdup(version));
}
g_hash_table_foreach(params, copy_meta_keys, op->params);
g_hash_table_destroy(params);
params = NULL;
}
/* sanity */
if (op->interval < 0) {
op->interval = 0;
}
if (op->timeout <= 0) {
op->timeout = op->interval;
}
if (op->start_delay < 0) {
op->start_delay = 0;
}
transition = crm_element_value(rsc_op, XML_ATTR_TRANSITION_KEY);
CRM_CHECK(transition != NULL, return op);
op->user_data = crm_strdup(transition);
op->user_data_len = 1 + strlen(op->user_data);
if (op->interval != 0) {
if (safe_str_eq(operation, CRMD_ACTION_START)
|| safe_str_eq(operation, CRMD_ACTION_STOP)) {
crm_err("Start and Stop actions cannot have an interval: %d", op->interval);
op->interval = 0;
}
}
/* reset the resource's parameters? */
if (op->interval == 0) {
if (safe_str_eq(CRMD_ACTION_START, operation)
|| safe_str_eq(CRMD_ACTION_STATUS, operation)) {
op->copyparams = 1;
}
}
crm_trace("Constructed %s op for %s: interval=%d", operation, rsc_id, op->interval);
return op;
}
void
send_direct_ack(const char *to_host, const char *to_sys,
lrm_rsc_t * rsc, lrm_op_t * op, const char *rsc_id)
{
xmlNode *reply = NULL;
xmlNode *update, *iter;
xmlNode *fragment;
CRM_CHECK(op != NULL, return);
if (op->rsc_id == NULL) {
CRM_LOG_ASSERT(rsc_id != NULL);
op->rsc_id = crm_strdup(rsc_id);
}
if (to_sys == NULL) {
to_sys = CRM_SYSTEM_TENGINE;
}
update = create_node_state(fsa_our_uname, NULL, NULL, NULL, NULL, NULL, FALSE, __FUNCTION__);
iter = create_xml_node(update, XML_CIB_TAG_LRM);
crm_xml_add(iter, XML_ATTR_ID, fsa_our_uuid);
iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES);
iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE);
crm_xml_add(iter, XML_ATTR_ID, op->rsc_id);
build_operation_update(iter, rsc, op, __FUNCTION__);
fragment = create_cib_fragment(update, XML_CIB_TAG_STATUS);
reply = create_request(CRM_OP_INVOKE_LRM, fragment, to_host, to_sys, CRM_SYSTEM_LRMD, NULL);
crm_log_xml_trace(update, "ACK Update");
crm_debug("ACK'ing resource op %s_%s_%d from %s: %s",
op->rsc_id, op->op_type, op->interval, op->user_data,
crm_element_value(reply, XML_ATTR_REFERENCE));
if (relay_message(reply, TRUE) == FALSE) {
crm_log_xml_err(reply, "Unable to route reply");
}
free_xml(fragment);
free_xml(update);
free_xml(reply);
}
static gboolean
stop_recurring_action_by_rsc(gpointer key, gpointer value, gpointer user_data)
{
lrm_rsc_t *rsc = user_data;
struct recurring_op_s *op = (struct recurring_op_s *)value;
if (op->interval != 0 && safe_str_eq(op->rsc_id, rsc->id)) {
if (cancel_op(rsc, key, op->call_id, FALSE) == FALSE) {
return TRUE;
}
}
return FALSE;
}
static gboolean
stop_recurring_actions(gpointer key, gpointer value, gpointer user_data)
{
gboolean remove = FALSE;
struct recurring_op_s *op = (struct recurring_op_s *)value;
if (op->interval != 0) {
lrm_rsc_t *rsc = fsa_lrm_conn->lrm_ops->get_rsc(fsa_lrm_conn, op->rsc_id);
if (rsc) {
remove = cancel_op(rsc, key, op->call_id, FALSE);
lrm_free_rsc(rsc);
}
}
return remove;
}
void
do_lrm_rsc_op(lrm_rsc_t * rsc, const char *operation, xmlNode * msg, xmlNode * request)
{
int call_id = 0;
char *op_id = NULL;
lrm_op_t *op = NULL;
fsa_data_t *msg_data = NULL;
const char *transition = NULL;
CRM_CHECK(rsc != NULL, return);
if (msg != NULL) {
transition = crm_element_value(msg, XML_ATTR_TRANSITION_KEY);
if (transition == NULL) {
crm_log_xml_err(msg, "Missing transition number");
}
}
op = construct_op(msg, rsc->id, operation);
/* stop the monitor before stopping the resource */
if (crm_str_eq(operation, CRMD_ACTION_STOP, TRUE)
|| crm_str_eq(operation, CRMD_ACTION_DEMOTE, TRUE)
|| crm_str_eq(operation, CRMD_ACTION_PROMOTE, TRUE)
|| crm_str_eq(operation, CRMD_ACTION_MIGRATE, TRUE)) {
g_hash_table_foreach_remove(pending_ops, stop_recurring_action_by_rsc, rsc);
}
/* now do the op */
crm_debug("Performing key=%s op=%s_%s_%d", transition, rsc->id, operation, op->interval);
if (fsa_state != S_NOT_DC && fsa_state != S_POLICY_ENGINE && fsa_state != S_TRANSITION_ENGINE) {
if (safe_str_neq(operation, "fail")
&& safe_str_neq(operation, CRMD_ACTION_STOP)) {
crm_info("Discarding attempt to perform action %s on %s"
" in state %s", operation, rsc->id, fsa_state2string(fsa_state));
op->rc = 99;
op->op_status = LRM_OP_ERROR;
send_direct_ack(NULL, NULL, rsc, op, rsc->id);
free_lrm_op(op);
crm_free(op_id);
return;
}
}
op_id = generate_op_key(rsc->id, op->op_type, op->interval);
if (op->interval > 0) {
/* cancel it so we can then restart it without conflict */
cancel_op_key(rsc, op_id, FALSE);
op->target_rc = CHANGED;
} else {
op->target_rc = EVERYTIME;
}
call_id = rsc->ops->perform_op(rsc, op);
if (call_id <= 0) {
crm_err("Operation %s on %s failed: %d", operation, rsc->id, call_id);
register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
} else {
/* record all operations so we can wait
* for them to complete during shutdown
*/
char *call_id_s = make_stop_id(rsc->id, call_id);
struct recurring_op_s *pending = NULL;
crm_malloc0(pending, sizeof(struct recurring_op_s));
crm_trace("Recording pending op: %d - %s %s", call_id, op_id, call_id_s);
pending->call_id = call_id;
pending->interval = op->interval;
pending->op_key = crm_strdup(op_id);
pending->rsc_id = crm_strdup(rsc->id);
g_hash_table_replace(pending_ops, call_id_s, pending);
if (op->interval > 0 && op->start_delay > START_DELAY_THRESHOLD) {
char *uuid = NULL;
int dummy = 0, target_rc = 0;
crm_info("Faking confirmation of %s: execution postponed for over 5 minutes", op_id);
decode_transition_key(op->user_data, &uuid, &dummy, &dummy, &target_rc);
crm_free(uuid);
op->rc = target_rc;
op->op_status = LRM_OP_DONE;
send_direct_ack(NULL, NULL, rsc, op, rsc->id);
}
}
crm_free(op_id);
free_lrm_op(op);
return;
}
lrm_op_t *
copy_lrm_op(const lrm_op_t * op)
{
lrm_op_t *op_copy = NULL;
CRM_CHECK(op != NULL, return NULL);
CRM_CHECK(op->rsc_id != NULL, return NULL);
crm_malloc0(op_copy, sizeof(lrm_op_t));
op_copy->op_type = crm_strdup(op->op_type);
/* input fields */
op_copy->params = g_hash_table_new_full(crm_str_hash, g_str_equal,
g_hash_destroy_str, g_hash_destroy_str);
if (op->params != NULL) {
g_hash_table_foreach(op->params, dup_attr, op_copy->params);
}
op_copy->timeout = op->timeout;
op_copy->interval = op->interval;
op_copy->target_rc = op->target_rc;
/* in the CRM, this is always a string */
if (op->user_data != NULL) {
op_copy->user_data = crm_strdup(op->user_data);
}
/* output fields */
op_copy->op_status = op->op_status;
op_copy->rc = op->rc;
op_copy->call_id = op->call_id;
op_copy->output = NULL;
op_copy->rsc_id = crm_strdup(op->rsc_id);
if (op->app_name != NULL) {
op_copy->app_name = crm_strdup(op->app_name);
}
if (op->output != NULL) {
op_copy->output = crm_strdup(op->output);
}
return op_copy;
}
lrm_rsc_t *
copy_lrm_rsc(const lrm_rsc_t * rsc)
{
lrm_rsc_t *rsc_copy = NULL;
if (rsc == NULL) {
return NULL;
}
crm_malloc0(rsc_copy, sizeof(lrm_rsc_t));
rsc_copy->id = crm_strdup(rsc->id);
rsc_copy->type = crm_strdup(rsc->type);
rsc_copy->class = NULL;
rsc_copy->provider = NULL;
if (rsc->class != NULL) {
rsc_copy->class = crm_strdup(rsc->class);
}
if (rsc->provider != NULL) {
rsc_copy->provider = crm_strdup(rsc->provider);
}
/* GHashTable* params; */
rsc_copy->params = NULL;
rsc_copy->ops = NULL;
return rsc_copy;
}
static void
cib_rsc_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
switch (rc) {
case cib_ok:
case cib_diff_failed:
case cib_diff_resync:
crm_trace("Resource update %d complete: rc=%d", call_id, rc);
break;
default:
crm_warn("Resource update %d failed: (rc=%d) %s", call_id, rc, cib_error2string(rc));
}
}
static int
do_update_resource(lrm_rsc_t * rsc, lrm_op_t * op)
{
/*
<status>
<nodes_status id=uname>
<lrm>
<lrm_resources>
<lrm_resource id=...>
</...>
*/
int rc = cib_ok;
xmlNode *update, *iter = NULL;
int call_opt = cib_quorum_override;
CRM_CHECK(op != NULL, return 0);
if (fsa_state == S_ELECTION || fsa_state == S_PENDING) {
crm_info("Sending update to local CIB in state: %s", fsa_state2string(fsa_state));
call_opt |= cib_scope_local;
}
iter = create_xml_node(iter, XML_CIB_TAG_STATUS);
update = iter;
iter = create_xml_node(iter, XML_CIB_TAG_STATE);
set_uuid(iter, XML_ATTR_UUID, fsa_our_uname);
crm_xml_add(iter, XML_ATTR_UNAME, fsa_our_uname);
crm_xml_add(iter, XML_ATTR_ORIGIN, __FUNCTION__);
iter = create_xml_node(iter, XML_CIB_TAG_LRM);
crm_xml_add(iter, XML_ATTR_ID, fsa_our_uuid);
iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES);
iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE);
crm_xml_add(iter, XML_ATTR_ID, op->rsc_id);
build_operation_update(iter, rsc, op, __FUNCTION__);
if (rsc) {
crm_xml_add(iter, XML_ATTR_TYPE, rsc->type);
crm_xml_add(iter, XML_AGENT_ATTR_CLASS, rsc->class);
crm_xml_add(iter, XML_AGENT_ATTR_PROVIDER, rsc->provider);
CRM_CHECK(rsc->type != NULL, crm_err("Resource %s has no value for type", op->rsc_id));
CRM_CHECK(rsc->class != NULL, crm_err("Resource %s has no value for class", op->rsc_id));
} else {
crm_warn("Resource %s no longer exists in the lrmd", op->rsc_id);
goto cleanup;
}
/* make it an asyncronous call and be done with it
*
* Best case:
* the resource state will be discovered during
* the next signup or election.
*
* Bad case:
* we are shutting down and there is no DC at the time,
* but then why were we shutting down then anyway?
* (probably because of an internal error)
*
* Worst case:
* we get shot for having resources "running" when the really weren't
*
* the alternative however means blocking here for too long, which
* isnt acceptable
*/
fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, rc, NULL);
/* the return code is a call number, not an error code */
crm_trace("Sent resource state update message: %d", rc);
fsa_cib_conn->cmds->register_callback(fsa_cib_conn, rc, 60, FALSE, NULL, "cib_rsc_callback",
cib_rsc_callback);
cleanup:
free_xml(update);
return rc;
}
void
do_lrm_event(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t * msg_data)
{
CRM_CHECK(FALSE, return);
}
gboolean
process_lrm_event(lrm_op_t * op)
{
char *op_id = NULL;
char *op_key = NULL;
int update_id = 0;
int log_level = LOG_ERR;
gboolean removed = FALSE;
lrm_rsc_t *rsc = NULL;
struct recurring_op_s *pending = NULL;
CRM_CHECK(op != NULL, return FALSE);
CRM_CHECK(op->rsc_id != NULL, return FALSE);
op_key = generate_op_key(op->rsc_id, op->op_type, op->interval);
switch (op->op_status) {
case LRM_OP_ERROR:
case LRM_OP_PENDING:
case LRM_OP_NOTSUPPORTED:
break;
case LRM_OP_CANCELLED:
log_level = LOG_INFO;
break;
case LRM_OP_DONE:
log_level = LOG_INFO;
break;
case LRM_OP_TIMEOUT:
log_level = LOG_DEBUG_3;
crm_err("LRM operation %s (%d) %s (timeout=%dms)",
op_key, op->call_id, op_status2text(op->op_status), op->timeout);
break;
default:
crm_err("Mapping unknown status (%d) to ERROR", op->op_status);
op->op_status = LRM_OP_ERROR;
}
if (op->op_status == LRM_OP_ERROR
&& (op->rc == EXECRA_RUNNING_MASTER || op->rc == EXECRA_NOT_RUNNING)) {
/* Leave it up to the TE/PE to decide if this is an error */
op->op_status = LRM_OP_DONE;
log_level = LOG_INFO;
}
op_id = make_stop_id(op->rsc_id, op->call_id);
pending = g_hash_table_lookup(pending_ops, op_id);
rsc = fsa_lrm_conn->lrm_ops->get_rsc(fsa_lrm_conn, op->rsc_id);
if (op->op_status != LRM_OP_CANCELLED) {
if (safe_str_eq(op->op_type, RSC_NOTIFY)) {
/* Keep notify ops out of the CIB */
send_direct_ack(NULL, NULL, NULL, op, op->rsc_id);
} else {
update_id = do_update_resource(rsc, op);
}
if (op->interval != 0) {
goto out;
}
} else if (op->interval == 0) {
/* This will occur when "crm resource cleanup" is called while actions are in-flight */
crm_err("Op %s (call=%d): Cancelled", op_key, op->call_id);
send_direct_ack(NULL, NULL, NULL, op, op->rsc_id);
} else if (pending == NULL) {
crm_err("Op %s (call=%d): No 'pending' entry", op_key, op->call_id);
} else if (op->user_data == NULL) {
crm_err("Op %s (call=%d): No user data", op_key, op->call_id);
} else if (pending->remove) {
delete_op_entry(op, op->rsc_id, op_key, op->call_id);
} else {
/* Before a stop is called, no need to direct ack */
crm_trace("Op %s (call=%d): no delete event required", op_key, op->call_id);
}
if (g_hash_table_remove(pending_ops, op_id)) {
removed = TRUE;
crm_trace("Op %s (call=%d, stop-id=%s): Confirmed", op_key, op->call_id, op_id);
}
out:
if (op->op_status == LRM_OP_DONE) {
do_crm_log(log_level,
"LRM operation %s (call=%d, rc=%d, cib-update=%d, confirmed=%s) %s",
op_key, op->call_id, op->rc, update_id, removed ? "true" : "false",
execra_code2string(op->rc));
} else {
do_crm_log(log_level,
"LRM operation %s (call=%d, status=%d, cib-update=%d, confirmed=%s) %s",
op_key, op->call_id, op->op_status, update_id, removed ? "true" : "false",
op_status2text(op->op_status));
}
if (op->rc != 0 && op->output != NULL) {
crm_info("Result: %s", op->output);
} else if (op->output != NULL) {
crm_debug("Result: %s", op->output);
}
#ifdef HAVE_LRM_OP_T_RSC_DELETED
if (op->rsc_deleted) {
crm_info("Deletion of resource '%s' complete after %s", op->rsc_id, op_key);
delete_rsc_entry(NULL, op->rsc_id, NULL, HA_OK, NULL);
}
#endif
/* If a shutdown was escalated while operations were pending,
* then the FSA will be stalled right now... allow it to continue
*/
mainloop_set_trigger(fsa_source);
update_history_cache(rsc, op);
lrm_free_rsc(rsc);
crm_free(op_key);
crm_free(op_id);
return TRUE;
}
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Mon, Apr 21, 6:03 PM (1 d, 4 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1665029
Default Alt Text
(65 KB)
Attached To
Mode
rP Pacemaker
Attached
Detach File
Event Timeline
Log In to Comment