Page Menu
Home
ClusterLabs Projects
Search
Configure Global Search
Log In
Files
F7609193
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
72 KB
Referenced Files
None
Subscribers
None
View Options
diff --git a/crm/crmd/lrm.c b/crm/crmd/lrm.c
index 0229a266a0..91047e1606 100644
--- a/crm/crmd/lrm.c
+++ b/crm/crmd/lrm.c
@@ -1,1600 +1,1599 @@
/*
* Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <portability.h>
#include <sys/param.h>
#include <crm/crm.h>
#include <crmd_fsa.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h> /* for access */
#include <heartbeat.h>
#include <clplumbing/cl_signal.h>
#include <errno.h>
#include <crm/cib.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <crmd.h>
#include <crmd_messages.h>
#include <crmd_callbacks.h>
#include <crmd_lrm.h>
#include <lrm/raexec.h>
#include <crm/dmalloc_wrapper.h>
char *make_stop_id(const char *rsc, int call_id);
gboolean verify_stopped(gboolean force, int log_level);
gboolean resource_stopped(gpointer key, gpointer value, gpointer user_data);
gboolean build_operation_update(
crm_data_t *rsc_list, lrm_op_t *op, const char *src, int lpc);
gboolean build_active_RAs(crm_data_t *rsc_list);
gboolean is_rsc_active(const char *rsc_id);
void do_update_resource(lrm_op_t *op);
gboolean process_lrm_event(lrm_op_t *op);
enum crmd_fsa_input do_lrm_rsc_op(
lrm_rsc_t *rsc, const char *operation,
crm_data_t *msg, HA_Message *request);
enum crmd_fsa_input do_fake_lrm_op(gpointer data);
void stop_recurring_action(
gpointer key, gpointer value, gpointer user_data);
gboolean remove_recurring_action(
gpointer key, gpointer value, gpointer user_data);
lrm_op_t *construct_op(
crm_data_t *rsc_op, const char *rsc_id, const char *operation);
void send_direct_ack(const char *to_host, const char *to_sys,
lrm_op_t* op, const char *rsc_id);
void free_recurring_op(gpointer value);
GHashTable *monitors = NULL;
GHashTable *resources = NULL;
GHashTable *shutdown_ops = NULL;
int num_lrm_register_fails = 0;
int max_lrm_register_fails = 30;
enum crmd_rscstate {
crmd_rscstate_NULL,
crmd_rscstate_START,
crmd_rscstate_START_PENDING,
crmd_rscstate_START_OK,
crmd_rscstate_START_FAIL,
crmd_rscstate_STOP,
crmd_rscstate_STOP_PENDING,
crmd_rscstate_STOP_OK,
crmd_rscstate_STOP_FAIL,
crmd_rscstate_MON,
crmd_rscstate_MON_PENDING,
crmd_rscstate_MON_OK,
crmd_rscstate_MON_FAIL,
crmd_rscstate_GENERIC_PENDING,
crmd_rscstate_GENERIC_OK,
crmd_rscstate_GENERIC_FAIL
};
const char *crmd_rscstate2string(enum crmd_rscstate state);
const char *
crmd_rscstate2string(enum crmd_rscstate state)
{
switch(state) {
case crmd_rscstate_NULL:
return NULL;
case crmd_rscstate_START:
return CRMD_ACTION_START;
case crmd_rscstate_START_PENDING:
return CRMD_ACTION_START_PENDING;
case crmd_rscstate_START_OK:
return CRMD_ACTION_STARTED;
case crmd_rscstate_START_FAIL:
return CRMD_ACTION_START_FAIL;
case crmd_rscstate_STOP:
return CRMD_ACTION_STOP;
case crmd_rscstate_STOP_PENDING:
return CRMD_ACTION_STOP_PENDING;
case crmd_rscstate_STOP_OK:
return CRMD_ACTION_STOPPED;
case crmd_rscstate_STOP_FAIL:
return CRMD_ACTION_STOP_FAIL;
case crmd_rscstate_MON:
return CRMD_ACTION_MON;
case crmd_rscstate_MON_PENDING:
return CRMD_ACTION_MON_PENDING;
case crmd_rscstate_MON_OK:
return CRMD_ACTION_MON_OK;
case crmd_rscstate_MON_FAIL:
return CRMD_ACTION_MON_FAIL;
case crmd_rscstate_GENERIC_PENDING:
return CRMD_ACTION_GENERIC_PENDING;
case crmd_rscstate_GENERIC_OK:
return CRMD_ACTION_GENERIC_OK;
case crmd_rscstate_GENERIC_FAIL:
return CRMD_ACTION_GENERIC_FAIL;
}
return "<unknown>";
}
GCHSource *lrm_source = NULL;
/* A_LRM_CONNECT */
enum crmd_fsa_input
do_lrm_control(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input,
fsa_data_t *msg_data)
{
int ret = HA_OK;
if(action & A_LRM_DISCONNECT) {
verify_stopped(TRUE, LOG_ERR);
if(lrm_source) {
crm_debug("Removing LRM connection from MainLoop");
if(G_main_del_IPC_Channel(lrm_source) == FALSE) {
crm_err("Could not remove LRM connection"
" from MainLoop");
}
lrm_source = NULL;
}
if(fsa_lrm_conn) {
fsa_lrm_conn->lrm_ops->signoff(fsa_lrm_conn);
crm_info("Disconnected from the LRM");
clear_bit_inplace(fsa_input_register, R_LRM_CONNECTED);
fsa_lrm_conn = NULL;
}
/* TODO: Clean up the hashtable */
}
if(action & A_LRM_CONNECT) {
ret = HA_OK;
monitors = g_hash_table_new_full(
g_str_hash, g_str_equal,
g_hash_destroy_str, free_recurring_op);
resources = g_hash_table_new_full(
g_str_hash, g_str_equal,
g_hash_destroy_str, g_hash_destroy_str);
shutdown_ops = g_hash_table_new_full(
g_str_hash, g_str_equal,
g_hash_destroy_str, g_hash_destroy_str);
fsa_lrm_conn = ll_lrm_new(XML_CIB_TAG_LRM);
if(NULL == fsa_lrm_conn) {
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
ret = HA_FAIL;
}
if(ret == HA_OK) {
crm_debug("Connecting to the LRM");
ret = fsa_lrm_conn->lrm_ops->signon(
fsa_lrm_conn, CRM_SYSTEM_CRMD);
}
if(ret != HA_OK) {
if(++num_lrm_register_fails < max_lrm_register_fails) {
crm_warn("Failed to sign on to the LRM %d"
" (%d max) times",
num_lrm_register_fails,
max_lrm_register_fails);
crm_timer_start(wait_timer);
crmd_fsa_stall(NULL);
return I_NULL;
}
}
if(ret == HA_OK) {
crm_debug_4("LRM: set_lrm_callback...");
ret = fsa_lrm_conn->lrm_ops->set_lrm_callback(
fsa_lrm_conn, lrm_op_callback);
if(ret != HA_OK) {
crm_err("Failed to set LRM callbacks");
}
}
if(ret != HA_OK) {
crm_err("Failed to sign on to the LRM %d"
" (max) times", num_lrm_register_fails);
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
return I_NULL;
}
/* TODO: create a destroy handler that causes
* some recovery to happen
*/
lrm_source = G_main_add_IPC_Channel(
G_PRIORITY_LOW,
fsa_lrm_conn->lrm_ops->ipcchan(fsa_lrm_conn),
FALSE, lrm_dispatch, fsa_lrm_conn,
default_ipc_connection_destroy);
set_bit_inplace(fsa_input_register, R_LRM_CONNECTED);
crm_debug("LRM connection established");
}
if(action & ~(A_LRM_CONNECT|A_LRM_DISCONNECT)) {
crm_err("Unexpected action %s in %s",
fsa_action2string(action), __FUNCTION__);
}
return I_NULL;
}
static void
ghash_print_pending(gpointer key, gpointer value, gpointer user_data)
{
const char *action = key;
int *log_level = user_data;
crm_log_maybe(*log_level, "Pending action: %s", action);
}
gboolean
verify_stopped(gboolean force, int log_level)
{
GListPtr lrm_list = NULL;
crm_info("Checking for active resources before exit");
if(fsa_lrm_conn == NULL) {
crm_err("Exiting with no LRM connection..."
" resources may be active!");
return TRUE;
}
if(g_hash_table_size(shutdown_ops) > 0) {
crm_log_maybe(log_level,
"%d pending LRM operations at shutdown%s",
g_hash_table_size(shutdown_ops),
force?"":"... waiting");
if(force || !is_set(fsa_input_register, R_SENT_RSC_STOP)) {
g_hash_table_foreach(
shutdown_ops, ghash_print_pending, &log_level);
}
if(force == FALSE) {
return FALSE;
}
}
lrm_list = fsa_lrm_conn->lrm_ops->get_all_rscs(fsa_lrm_conn);
slist_iter(
rsc_id, char, lrm_list, lpc,
if(is_rsc_active(rsc_id) == FALSE) {
continue;
}
crm_err("Resource %s was active at shutdown."
" You may ignore this error if it is unmanaged.",
rsc_id);
);
set_bit_inplace(fsa_input_register, R_SENT_RSC_STOP);
register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL);
return TRUE;
}
gboolean
build_operation_update(
crm_data_t *xml_rsc, lrm_op_t *op, const char *src, int lpc)
{
char *fail_state = NULL;
const char *state = NULL;
crm_data_t *xml_op = NULL;
char *op_id = NULL;
const char *caller_version = NULL;
char *digest = NULL;
crm_data_t *args_xml = NULL;
crm_data_t *args_parent = NULL;
CRM_DEV_ASSERT(op != NULL);
if(crm_assert_failed) {
return FALSE;
}
crm_debug_2("%s: Updating resouce %s after %s %s op",
src, op->rsc_id, op_status2text(op->op_status), op->op_type);
if(op->op_status == LRM_OP_CANCELLED) {
crm_debug_3("Ignoring cancelled op");
return TRUE;
}
if(AM_I_DC) {
caller_version = CRM_FEATURE_SET;
} else if(fsa_our_dc_version != NULL) {
caller_version = fsa_our_dc_version;
} else {
/* there is a small risk in formerly mixed clusters that
* it will be sub-optimal.
* however with our upgrade policy, the update we send
* should still be completely supported anyway
*/
caller_version = g_hash_table_lookup(
op->params, XML_ATTR_CRM_VERSION);
crm_warn("Falling back to operation originator version: %s",
caller_version);
}
crm_debug_3("DC version: %s", caller_version);
if(safe_str_eq(op->op_type, CRMD_ACTION_NOTIFY)) {
const char *n_type = g_hash_table_lookup(
op->params, crm_meta_name("notify_type"));
const char *n_task = g_hash_table_lookup(
op->params, crm_meta_name("notify_operation"));
#if CRM_DEPRECATED_SINCE_2_0_5
if(n_type == NULL) {
n_type = g_hash_table_lookup(op->params, "notify_type");
}
if(n_task == NULL) {
n_task = g_hash_table_lookup(op->params, "notify_operation");
}
#endif
CRM_DEV_ASSERT(n_type != NULL);
CRM_DEV_ASSERT(n_task != NULL);
op_id = generate_notify_key(op->rsc_id, n_type, n_task);
/* these are not yet allowed to fail */
op->op_status = LRM_OP_DONE;
op->rc = 0;
} else {
op_id = generate_op_key(op->rsc_id, op->op_type, op->interval);
}
/* Handle recurring ops - infer last op_status */
if(op->op_status == LRM_OP_PENDING && op->interval > 0) {
if(op->rc == 0) {
crm_debug("Mapping pending operation to DONE");
op->op_status = LRM_OP_DONE;
} else {
crm_debug("Mapping pending operation to ERROR");
op->op_status = LRM_OP_ERROR;
}
}
xml_op = find_entity(xml_rsc, XML_LRM_TAG_RSC_OP, op_id);
if(xml_op != NULL) {
const char *old_status_s = crm_element_value(
xml_op, XML_LRM_ATTR_OPSTATUS);
int old_status = crm_parse_int(old_status_s, "-2");
int log_level = LOG_ERR;
if(old_status_s == NULL) {
crm_err("No value for "XML_LRM_ATTR_OPSTATUS);
} else if(old_status == op->op_status) {
/* safe to mask */
log_level = LOG_WARNING;
} else if(old_status == LRM_OP_PENDING){
/* ??safe to mask?? */
/* log_level = LOG_WARNING; */
}
crm_log_maybe(log_level,
"Duplicate %s operations in get_cur_state()",
op_id);
crm_log_maybe(log_level+2,
"New entry: %s %s (call=%d, status=%s)",
op_id, op->user_data, op->call_id,
op_status2text(op->op_status));
crm_log_xml(log_level+2, "Existing entry", xml_op);
crm_free(op_id);
return FALSE;
} else {
xml_op = create_xml_node(xml_rsc, XML_LRM_TAG_RSC_OP);
}
crm_xml_add(xml_op, XML_ATTR_ID, op_id);
crm_free(op_id);
crm_xml_add(xml_op, XML_LRM_ATTR_TASK, op->op_type);
crm_xml_add(xml_op, XML_ATTR_ORIGIN, src);
if(op->user_data == NULL) {
op->user_data = generate_transition_key(-1, fsa_our_uname);
}
#if CRM_DEPRECATED_SINCE_2_0_3
if(compare_version("1.0.3", caller_version) > 0) {
CRM_CHECK(FALSE, ; );
fail_state = generate_transition_magic_v202(
op->user_data, op->op_status);
} else {
fail_state = generate_transition_magic(
op->user_data, op->op_status, op->rc);
}
#else
fail_state = generate_transition_magic(
op->user_data, op->op_status, op->rc);
#endif
crm_xml_add(xml_op, XML_ATTR_TRANSITION_KEY, op->user_data);
crm_xml_add(xml_op, XML_ATTR_TRANSITION_MAGIC, fail_state);
crm_free(fail_state);
switch(op->op_status) {
case LRM_OP_PENDING:
break;
case LRM_OP_CANCELLED:
crm_err("What to do here");
break;
case LRM_OP_ERROR:
case LRM_OP_TIMEOUT:
case LRM_OP_NOTSUPPORTED:
crm_debug("Resource action %s/%s %s: %d",
op->rsc_id, op->op_type,
op_status2text(op->op_status), op->rc);
break;
case LRM_OP_DONE:
if(safe_str_eq(CRMD_ACTION_START, op->op_type)) {
state = CRMD_ACTION_STARTED;
} else if(safe_str_eq(CRMD_ACTION_STOP, op->op_type)) {
state = CRMD_ACTION_STOPPED;
} else if(safe_str_eq(CRMD_ACTION_MON, op->op_type)) {
state = CRMD_ACTION_STARTED;
} else {
crm_debug("Using status \"%s\" for op \"%s\"",
CRMD_ACTION_GENERIC_OK, op->op_type);
state = CRMD_ACTION_GENERIC_OK;
}
break;
}
crm_xml_add_int(xml_op, XML_LRM_ATTR_CALLID, op->call_id);
/* set these on 'xml_rsc' too to make life easy for the PE */
crm_xml_add(xml_op, XML_ATTR_CRM_VERSION, caller_version);
crm_xml_add_int(xml_op, XML_LRM_ATTR_RC, op->rc);
crm_xml_add_int(xml_op, XML_LRM_ATTR_OPSTATUS, op->op_status);
crm_xml_add_int(xml_op, XML_LRM_ATTR_INTERVAL, op->interval);
/* this will enable us to later determin that the
* resource's parameters have changed and we should force
* a restart
*/
args_parent = NULL;
#if CRM_DEPRECATED_SINCE_2_0_4
if(compare_version("1.0.4", caller_version) > 0) {
args_parent = xml_op;
}
#endif
args_xml = create_xml_node(args_parent, XML_TAG_PARAMS);
g_hash_table_foreach(op->params, hash2field, args_xml);
filter_action_parameters(args_xml, caller_version);
digest = calculate_xml_digest(args_xml, TRUE);
crm_xml_add(xml_op, XML_LRM_ATTR_OP_DIGEST, digest);
crm_free(digest);
if(args_parent == NULL) {
free_xml(args_xml);
}
return TRUE;
}
gboolean
is_rsc_active(const char *rsc_id)
{
GList *op_list = NULL;
gboolean active = FALSE;
lrm_rsc_t *the_rsc = NULL;
state_flag_t cur_state = 0;
int max_call_id = -1;
if(fsa_lrm_conn == NULL) {
return FALSE;
}
the_rsc = fsa_lrm_conn->lrm_ops->get_rsc(fsa_lrm_conn, rsc_id);
crm_debug_2("Processing lrm_rsc_t entry %s", rsc_id);
if(the_rsc == NULL) {
crm_err("NULL resource returned from the LRM");
return FALSE;
}
op_list = the_rsc->ops->get_cur_state(the_rsc, &cur_state);
crm_debug_3("\tcurrent state:%s",cur_state==LRM_RSC_IDLE?"Idle":"Busy");
slist_iter(
op, lrm_op_t, op_list, llpc,
crm_debug("Processing op %s_%d (%d) for %s (status=%d, rc=%d)",
op->op_type, op->interval, op->call_id, the_rsc->id,
op->op_status, op->rc);
CRM_ASSERT(max_call_id <= op->call_id);
if(safe_str_eq(op->op_type, CRMD_ACTION_STOP)) {
active = FALSE;
} else if(op->rc == EXECRA_NOT_RUNNING) {
active = FALSE;
} else {
active = TRUE;
}
max_call_id = op->call_id;
lrm_free_op(op);
);
g_list_free(op_list);
lrm_free_rsc(the_rsc);
return active;
}
gboolean
build_active_RAs(crm_data_t *rsc_list)
{
GList *op_list = NULL;
GList *lrm_list = NULL;
gboolean found_op = FALSE;
state_flag_t cur_state = 0;
if(fsa_lrm_conn == NULL) {
return FALSE;
}
lrm_list = fsa_lrm_conn->lrm_ops->get_all_rscs(fsa_lrm_conn);
slist_iter(
rid, char, lrm_list, lpc,
lrm_rsc_t *the_rsc =
fsa_lrm_conn->lrm_ops->get_rsc(fsa_lrm_conn, rid);
crm_data_t *xml_rsc = create_xml_node(
rsc_list, XML_LRM_TAG_RESOURCE);
int max_call_id = -1;
crm_debug("Processing lrm_rsc_t entry %s", rid);
if(the_rsc == NULL) {
crm_err("NULL resource returned from the LRM");
continue;
}
crm_xml_add(xml_rsc, XML_ATTR_ID, the_rsc->id);
crm_xml_add(xml_rsc, XML_ATTR_TYPE, the_rsc->type);
crm_xml_add(xml_rsc, XML_AGENT_ATTR_CLASS, the_rsc->class);
crm_xml_add(xml_rsc, XML_AGENT_ATTR_PROVIDER,the_rsc->provider);
op_list = the_rsc->ops->get_cur_state(the_rsc, &cur_state);
crm_debug_2("\tcurrent state:%s",
cur_state==LRM_RSC_IDLE?"Idle":"Busy");
slist_iter(
op, lrm_op_t, op_list, llpc,
crm_debug_2("Processing op %s for %s (status=%d, rc=%d)",
op->op_type, the_rsc->id, op->op_status, op->rc);
if(max_call_id < op->call_id) {
build_operation_update(
xml_rsc, op, __FUNCTION__, llpc);
} else if(max_call_id > op->call_id) {
crm_err("Bad call_id in list=%d. Previous call_id=%d",
op->call_id, max_call_id);
} else {
crm_warn("lrm->get_cur_state() returned"
" duplicate entries for call_id=%d",
op->call_id);
}
max_call_id = op->call_id;
found_op = TRUE;
lrm_free_op(op);
);
if(found_op == FALSE && g_list_length(op_list) != 0) {
crm_err("Could not properly determin last op"
" for %s from %d entries", the_rsc->id,
g_list_length(op_list));
}
g_list_free(op_list);
lrm_free_rsc(the_rsc);
);
g_list_free(lrm_list);
return TRUE;
}
crm_data_t*
do_lrm_query(gboolean is_replace)
{
gboolean shut_down = FALSE;
crm_data_t *xml_result= NULL;
crm_data_t *xml_state = NULL;
crm_data_t *xml_data = NULL;
crm_data_t *rsc_list = NULL;
const char *exp_state = CRMD_JOINSTATE_MEMBER;
if(is_set(fsa_input_register, R_SHUTDOWN)) {
exp_state = CRMD_STATE_INACTIVE;
shut_down = TRUE;
}
xml_state = create_node_state(
fsa_our_uname, ACTIVESTATUS, XML_BOOLEAN_TRUE,
ONLINESTATUS, CRMD_JOINSTATE_MEMBER, exp_state,
!shut_down, __FUNCTION__);
xml_data = create_xml_node(xml_state, XML_CIB_TAG_LRM);
crm_xml_add(xml_data, XML_ATTR_ID, fsa_our_uuid);
rsc_list = create_xml_node(xml_data, XML_LRM_TAG_RESOURCES);
/* Build a list of active (not always running) resources */
build_active_RAs(rsc_list);
if(is_replace) {
crm_xml_add(xml_state, XML_CIB_ATTR_REPLACE, XML_CIB_TAG_LRM);
}
xml_result = create_cib_fragment(xml_state, XML_CIB_TAG_STATUS);
crm_log_xml_debug_3(xml_state, "Current state of the LRM");
return xml_result;
}
struct recurring_op_s
{
char *rsc_id;
int call_id;
};
static void
cancel_monitor(lrm_rsc_t *rsc, const char *key)
{
struct recurring_op_s *existing_op = NULL;
if(rsc == NULL) {
crm_err("No resource to cancel and operation for");
return;
} else if(key == NULL) {
crm_err("No operation to cancel");
return;
}
existing_op = g_hash_table_lookup(monitors, key);
if(existing_op != NULL) {
crm_debug("Cancelling previous invocation of %s (%d)",
key, existing_op->call_id);
/* cancel it so we can then restart it without conflict */
if(rsc->ops->cancel_op(rsc, existing_op->call_id) != HA_OK) {
crm_info("Couldn't cancel %s (%d)",
key, existing_op->call_id);
} else {
g_hash_table_remove(monitors, key);
}
} else {
crm_debug("No previous invocation of %s", key);
}
}
static lrm_rsc_t *
get_lrm_resource(crm_data_t *resource, crm_data_t *op_msg, gboolean do_create)
{
char rid[64];
lrm_rsc_t *rsc = NULL;
const char *short_id = ID(resource);
const char *long_id = crm_element_value(resource, XML_ATTR_ID_LONG);
crm_debug_2("Retrieving %s from the LRM.", short_id);
CRM_CHECK(short_id != NULL, return NULL);
if(rsc == NULL) {
/* check if its already there (short name) */
strncpy(rid, short_id, 64);
rid[63] = 0;
rsc = fsa_lrm_conn->lrm_ops->get_rsc(fsa_lrm_conn, rid);
}
if(rsc == NULL && long_id != NULL) {
/* try the long name instead */
strncpy(rid, long_id, 64);
rid[63] = 0;
rsc = fsa_lrm_conn->lrm_ops->get_rsc(fsa_lrm_conn, rid);
}
if(rsc == NULL && do_create) {
/* add it to the LRM */
const char *type = crm_element_value(resource, XML_ATTR_TYPE);
const char *class = crm_element_value(resource, XML_AGENT_ATTR_CLASS);
const char *provider = crm_element_value(resource, XML_AGENT_ATTR_PROVIDER);
GHashTable *params = xml2list(op_msg);
CRM_CHECK(class != NULL, return NULL);
CRM_CHECK(type != NULL, return NULL);
crm_debug("Adding rsc %s before operation", short_id);
strncpy(rid, short_id, 64);
rid[63] = 0;
#if CRM_DEPRECATED_SINCE_2_0_3
if(op_msg != NULL) {
if(g_hash_table_lookup(
params, XML_ATTR_CRM_VERSION) == NULL) {
g_hash_table_destroy(params);
params = xml2list_202(op_msg);
}
}
#endif
if(g_hash_table_size(params) == 0) {
crm_log_xml_warn(op_msg, "EmptyParams");
}
fsa_lrm_conn->lrm_ops->add_rsc(
fsa_lrm_conn, rid, class, type, provider, params);
rsc = fsa_lrm_conn->lrm_ops->get_rsc(fsa_lrm_conn, rid);
g_hash_table_destroy(params);
if(rsc == NULL) {
fsa_data_t *msg_data = NULL;
crm_err("Could not add resource %s to LRM", rid);
register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
}
}
return rsc;
}
/* A_LRM_INVOKE */
enum crmd_fsa_input
do_lrm_invoke(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input,
fsa_data_t *msg_data)
{
const char *crm_op = NULL;
const char *from_sys = NULL;
const char *from_host = NULL;
const char *operation = NULL;
enum crmd_fsa_input next_input = I_NULL;
ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg);
crm_op = cl_get_string(input->msg, F_CRM_TASK);
from_sys = cl_get_string(input->msg, F_CRM_SYS_FROM);
if(safe_str_neq(from_sys, CRM_SYSTEM_TENGINE)) {
from_host = cl_get_string(input->msg, F_CRM_HOST_FROM);
}
crm_debug_2("LRM command from: %s", from_sys);
if(safe_str_eq(crm_op, CRM_OP_LRM_DELETE)) {
operation = CRMD_ACTION_DELETE;
} else if(safe_str_eq(operation, CRM_OP_LRM_REFRESH)) {
crm_op = CRM_OP_LRM_REFRESH;
} else if(input->xml != NULL) {
operation = crm_element_value(input->xml, XML_LRM_ATTR_TASK);
}
if(crm_op != NULL && safe_str_eq(crm_op, CRM_OP_LRM_REFRESH)) {
enum cib_errors rc = cib_ok;
crm_data_t *fragment = do_lrm_query(TRUE);
crm_info("Forcing a local LRM refresh");
fsa_cib_update(XML_CIB_TAG_STATUS, fragment,
cib_quorum_override, rc);
free_xml(fragment);
} else if(crm_op != NULL && safe_str_eq(crm_op, CRM_OP_LRM_QUERY)) {
crm_data_t *data = do_lrm_query(FALSE);
HA_Message *reply = create_reply(input->msg, data);
if(relay_message(reply, TRUE) == FALSE) {
crm_err("Unable to route reply");
crm_log_message(LOG_ERR, reply);
crm_msg_del(reply);
}
free_xml(data);
} else if(safe_str_eq(operation, CRM_OP_PROBED)
|| safe_str_eq(crm_op, CRM_OP_REPROBE)) {
const char *probed = XML_BOOLEAN_TRUE;
if(safe_str_eq(crm_op, CRM_OP_REPROBE)) {
probed = XML_BOOLEAN_FALSE;
}
update_attr(fsa_cib_conn, cib_none, XML_CIB_TAG_STATUS,
fsa_our_uuid, NULL, NULL, CRM_OP_PROBED, probed);
} else if(operation != NULL) {
lrm_rsc_t *rsc = NULL;
crm_data_t *params = NULL;
gboolean create_rsc = TRUE;
crm_data_t *xml_rsc = find_xml_node(
input->xml, XML_CIB_TAG_RESOURCE, TRUE);
CRM_CHECK(xml_rsc != NULL, return I_NULL);
/* only the first 16 chars are used by the LRM */
params = find_xml_node(input->xml, XML_TAG_ATTRS,TRUE);
if(safe_str_eq(operation, CRMD_ACTION_STOP)) {
create_rsc = FALSE;
}
rsc = get_lrm_resource(xml_rsc, input->xml, create_rsc);
if(rsc == NULL && create_rsc) {
crm_err("Invalid resource definition");
crm_log_xml_warn(input->xml, "Bad command");
} else if(rsc == NULL) {
lrm_op_t* op = NULL;
crm_err("Not creating resource for a stop event");
crm_log_xml_warn(input->xml, "Bad command");
op = construct_op(input->xml, ID(xml_rsc), operation);
op->op_status = LRM_OP_DONE;
op->rc = 0;
CRM_ASSERT(op != NULL);
send_direct_ack(from_host, from_sys, op, ID(xml_rsc));
free_lrm_op(op);
} else if(safe_str_eq(operation, CRMD_ACTION_CANCEL)) {
lrm_op_t* op = NULL;
char *op_key = NULL;
const char *op_task = NULL;
const char *op_interval = NULL;
CRM_CHECK(params != NULL,
crm_log_xml_warn(input->xml, "Bad command");
return I_NULL);
op_task = crm_element_value(params, crm_meta_name(XML_LRM_ATTR_TASK));
op_interval = crm_element_value(params, crm_meta_name("interval"));
#if CRM_DEPRECATED_SINCE_2_0_5
if(op_interval == NULL) {
op_interval = crm_element_value(params, "interval");
}
if(op_task == NULL) {
op_task = crm_element_value(params, XML_LRM_ATTR_TASK);
if(op_task == NULL) {
op_task = crm_element_value(params, "task");
}
}
#endif
CRM_CHECK(op_task != NULL,
crm_log_xml_warn(input->xml, "Bad command");
return I_NULL);
CRM_CHECK(op_interval != NULL,
crm_log_xml_warn(input->xml, "Bad command");
return I_NULL);
op = construct_op(input->xml, rsc->id, op_task);
CRM_ASSERT(op != NULL);
op_key = generate_op_key(
rsc->id,op_task,crm_parse_int(op_interval,"0"));
cancel_monitor(rsc, op_key);
op->op_status = LRM_OP_DONE;
op->rc = EXECRA_OK;
send_direct_ack(from_host, from_sys, op, rsc->id);
crm_free(op_key);
free_lrm_op(op);
} else if(safe_str_eq(operation, CRMD_ACTION_DELETE)) {
int rc = HA_OK;
lrm_op_t* op = NULL;
op = construct_op(input->xml, rsc->id, operation);
CRM_ASSERT(op != NULL);
op->op_status = LRM_OP_DONE;
op->rc = EXECRA_OK;
if(rsc == NULL) {
crm_debug("Resource %s was already removed",
rsc->id);
} else {
crm_info("Removing resource %s from the LRM",
rsc->id);
rc = fsa_lrm_conn->lrm_ops->delete_rsc(
fsa_lrm_conn, rsc->id);
if(rc != HA_OK) {
crm_err("Failed to remove resource %s",
rsc->id);
op->op_status = LRM_OP_ERROR;
op->rc = EXECRA_UNKNOWN_ERROR;
}
}
send_direct_ack(from_host, from_sys, op, rsc->id);
free_lrm_op(op);
} else if(rsc != NULL) {
next_input = do_lrm_rsc_op(
rsc, operation, input->xml, input->msg);
}
lrm_free_rsc(rsc);
} else {
crm_err("Operation was neither a lrm_query, nor a rsc op. %s",
crm_str(crm_op));
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
}
return next_input;
}
lrm_op_t *
construct_op(crm_data_t *rsc_op, const char *rsc_id, const char *operation)
{
lrm_op_t *op = NULL;
const char *op_delay = NULL;
const char *op_timeout = NULL;
const char *op_interval = NULL;
const char *transition = NULL;
CRM_DEV_ASSERT(rsc_id != NULL);
crm_malloc0(op, sizeof(lrm_op_t));
op->op_type = crm_strdup(operation);
op->op_status = LRM_OP_PENDING;
op->rc = -1;
op->rsc_id = crm_strdup(rsc_id);
op->interval = 0;
op->timeout = 0;
op->start_delay = 0;
op->app_name = crm_strdup(CRM_SYSTEM_CRMD);
if(rsc_op == NULL) {
CRM_DEV_ASSERT(safe_str_eq(CRMD_ACTION_STOP, operation));
op->user_data = NULL;
op->user_data_len = 0;
/* the stop_all_resources() case
* by definition there is no DC (or they'd be shutting
* us down).
* So we should put our version here.
*/
op->params = g_hash_table_new_full(
g_str_hash, g_str_equal,
g_hash_destroy_str, g_hash_destroy_str);
g_hash_table_insert(op->params,
crm_strdup(XML_ATTR_CRM_VERSION),
crm_strdup(CRM_FEATURE_SET));
crm_debug_2("Constructed %s op for %s", operation, rsc_id);
return op;
}
op->params = xml2list(rsc_op);
#if CRM_DEPRECATED_SINCE_2_0_3
if(g_hash_table_lookup(op->params, XML_ATTR_CRM_VERSION) == NULL) {
g_hash_table_destroy(op->params);
op->params = xml2list_202(rsc_op);
}
#endif
if(op->params == NULL) {
CRM_DEV_ASSERT(safe_str_eq(CRMD_ACTION_STOP, operation));
}
op_delay = g_hash_table_lookup(op->params, crm_meta_name("start_delay"));
op_timeout = g_hash_table_lookup(op->params, crm_meta_name("timeout"));
op_interval = g_hash_table_lookup(op->params, crm_meta_name("interval"));
#if CRM_DEPRECATED_SINCE_2_0_5
if(op_delay == NULL) {
op_delay = g_hash_table_lookup(op->params, "start_delay");
}
if(op_timeout == NULL) {
op_timeout = g_hash_table_lookup(op->params, "timeout");
}
if(op_interval == NULL) {
op_interval = g_hash_table_lookup(op->params, "interval");
}
#endif
op->interval = crm_parse_int(op_interval, "0");
op->timeout = crm_parse_int(op_timeout, "0");
op->start_delay = crm_parse_int(op_delay, "0");
/* sanity */
if(op->interval < 0) {
op->interval = 0;
}
if(op->timeout < 0) {
op->timeout = 0;
}
if(op->start_delay < 0) {
op->start_delay = 0;
}
transition = crm_element_value(rsc_op, XML_ATTR_TRANSITION_KEY);
CRM_CHECK(transition != NULL, return op);
op->user_data = crm_strdup(transition);
op->user_data_len = 1+strlen(op->user_data);
if(op->interval != 0) {
if(safe_str_eq(operation, CRMD_ACTION_START)
|| safe_str_eq(operation, CRMD_ACTION_STOP)) {
crm_err("Start and Stop actions cannot have an interval");
op->interval = 0;
}
}
crm_debug_2("Constructed %s op for %s: interval=%d",
operation, rsc_id, op->interval);
return op;
}
void
send_direct_ack(const char *to_host, const char *to_sys,
lrm_op_t* op, const char *rsc_id)
{
HA_Message *reply = NULL;
crm_data_t *update, *iter;
crm_data_t *fragment;
CRM_DEV_ASSERT(op != NULL);
if(crm_assert_failed) {
return;
}
if(op->rsc_id == NULL) {
CRM_DEV_ASSERT(rsc_id != NULL);
op->rsc_id = crm_strdup(rsc_id);
}
if(to_sys == NULL) {
to_sys = CRM_SYSTEM_TENGINE;
}
crm_info("ACK'ing resource op: %s for %s", op->op_type, op->rsc_id);
update = create_node_state(
fsa_our_uname, NULL, NULL, NULL, NULL, NULL, FALSE, __FUNCTION__);
iter = create_xml_node(update, XML_CIB_TAG_LRM);
crm_xml_add(iter, XML_ATTR_ID, fsa_our_uuid);
iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES);
iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE);
crm_xml_add(iter, XML_ATTR_ID, op->rsc_id);
build_operation_update(iter, op, __FUNCTION__, 0);
fragment = create_cib_fragment(update, XML_CIB_TAG_STATUS);
reply = create_request(CRM_OP_INVOKE_LRM, fragment, to_host,
to_sys, CRM_SYSTEM_LRMD, NULL);
crm_debug("Sending ACK: %s", cl_get_string(reply, XML_ATTR_REFERENCE));
crm_log_xml_debug_2(update, "ACK Update");
crm_log_message_adv(LOG_DEBUG_3, "ACK Reply", reply);
if(relay_message(reply, TRUE) == FALSE) {
crm_log_message_adv(LOG_ERR, "Unable to route reply", reply);
crm_msg_del(reply);
}
free_xml(fragment);
free_xml(update);
}
enum crmd_fsa_input
do_lrm_rsc_op(lrm_rsc_t *rsc, const char *operation,
crm_data_t *msg, HA_Message *request)
{
int call_id = 0;
char *op_id = NULL;
lrm_op_t* op = NULL;
fsa_data_t *msg_data = NULL;
const char *transition = NULL;
CRM_CHECK(rsc != NULL, return I_NULL);
if(msg != NULL) {
transition = crm_element_value(msg, XML_ATTR_TRANSITION_KEY);
if(transition == NULL) {
crm_err("Missing transition");
crm_log_message(LOG_ERR, msg);
}
}
/* stop the monitor before stopping the resource */
if(safe_str_eq(operation, CRMD_ACTION_STOP)) {
g_hash_table_foreach(monitors, stop_recurring_action, rsc);
g_hash_table_foreach_remove(
monitors, remove_recurring_action, rsc);
}
/* now do the op */
op = construct_op(msg, rsc->id, operation);
crm_info("Performing op %s on %s (interval=%dms, key=%s)",
operation, rsc->id, op->interval, transition);
if((AM_I_DC == FALSE && fsa_state != S_NOT_DC)
|| (AM_I_DC && fsa_state != S_TRANSITION_ENGINE)) {
if(safe_str_neq(operation, CRMD_ACTION_STOP)) {
crm_info("Discarding attempt to perform action %s on %s"
" in state %s", operation, rsc->id,
fsa_state2string(fsa_state));
op->rc = 99;
op->op_status = LRM_OP_ERROR;
send_direct_ack(NULL, NULL, op, rsc->id);
free_lrm_op(op);
crm_free(op_id);
return I_NULL;
}
}
op_id = generate_op_key(rsc->id, op->op_type, op->interval);
if(op->interval > 0) {
cancel_monitor(rsc, op_id);
op->target_rc = CHANGED;
} else {
op->target_rc = EVERYTIME;
}
g_hash_table_replace(resources,crm_strdup(rsc->id), crm_strdup(op_id));
call_id = rsc->ops->perform_op(rsc, op);
if(call_id <= 0) {
crm_err("Operation %s on %s failed: %d",
operation, rsc->id, call_id);
register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
} else {
/* record all operations so we can wait
* for them to complete during shutdown
*/
char *call_id_s = make_stop_id(rsc->id, call_id);
g_hash_table_replace(
shutdown_ops, call_id_s, crm_strdup(rsc->id));
crm_debug_2("Recording pending op: %s/%s %s",
rsc->id, operation, call_id_s);
if(op->interval > 0) {
struct recurring_op_s *op = NULL;
crm_malloc0(op, sizeof(struct recurring_op_s));
crm_debug_2("Adding recurring %s op for %s (%d)",
op_id, rsc->id, call_id);
op->call_id = call_id;
op->rsc_id = crm_strdup(rsc->id);
g_hash_table_insert(monitors, op_id, op);
op_id = NULL;
}
}
crm_free(op_id);
free_lrm_op(op);
return I_NULL;
}
void
stop_recurring_action(gpointer key, gpointer value, gpointer user_data)
{
lrm_rsc_t *rsc = user_data;
struct recurring_op_s *op = (struct recurring_op_s*)value;
if(safe_str_eq(op->rsc_id, rsc->id)) {
if(op->call_id > 0) {
crm_debug("Stopping recurring op %d for %s (%s)",
op->call_id, rsc->id, (char*)key);
rsc->ops->cancel_op(rsc, op->call_id);
} else {
crm_err("Invalid call_id %d for %s",
op->call_id, rsc->id);
}
}
}
gboolean
remove_recurring_action(gpointer key, gpointer value, gpointer user_data)
{
lrm_rsc_t *rsc = user_data;
struct recurring_op_s *op = (struct recurring_op_s*)value;
if(safe_str_eq(op->rsc_id, rsc->id)) {
return TRUE;
}
return FALSE;
}
void
free_recurring_op(gpointer value)
{
struct recurring_op_s *op = (struct recurring_op_s*)value;
crm_free(op->rsc_id);
crm_free(op);
}
void
free_lrm_op(lrm_op_t *op)
{
g_hash_table_destroy(op->params);
crm_free(op->user_data);
crm_free(op->output);
crm_free(op->rsc_id);
crm_free(op->op_type);
crm_free(op->app_name);
crm_free(op);
}
static void dup_attr(gpointer key, gpointer value, gpointer user_data)
{
g_hash_table_replace(user_data, crm_strdup(key), crm_strdup(value));
}
lrm_op_t *
copy_lrm_op(const lrm_op_t *op)
{
lrm_op_t *op_copy = NULL;
CRM_DEV_ASSERT(op != NULL);
if(crm_assert_failed) {
return NULL;
}
CRM_ASSERT(op->rsc_id != NULL);
crm_malloc0(op_copy, sizeof(lrm_op_t));
op_copy->op_type = crm_strdup(op->op_type);
/* input fields */
op_copy->params = g_hash_table_new_full(
g_str_hash, g_str_equal,
g_hash_destroy_str, g_hash_destroy_str);
if(op->params != NULL) {
g_hash_table_foreach(op->params, dup_attr, op_copy->params);
}
op_copy->timeout = op->timeout;
op_copy->interval = op->interval;
op_copy->target_rc = op->target_rc;
/* in the CRM, this is always a string */
if(op->user_data != NULL) {
op_copy->user_data = crm_strdup(op->user_data);
}
/* output fields */
op_copy->op_status = op->op_status;
op_copy->rc = op->rc;
op_copy->call_id = op->call_id;
op_copy->output = NULL;
op_copy->rsc_id = crm_strdup(op->rsc_id);
if(op->app_name != NULL) {
op_copy->app_name = crm_strdup(op->app_name);
}
if(op->output != NULL) {
op_copy->output = crm_strdup(op->output);
}
return op_copy;
}
lrm_rsc_t *
copy_lrm_rsc(const lrm_rsc_t *rsc)
{
lrm_rsc_t *rsc_copy = NULL;
if(rsc == NULL) {
return NULL;
}
crm_malloc0(rsc_copy, sizeof(lrm_rsc_t));
rsc_copy->id = crm_strdup(rsc->id);
rsc_copy->type = crm_strdup(rsc->type);
rsc_copy->class = NULL;
rsc_copy->provider = NULL;
if(rsc->class != NULL) {
rsc_copy->class = crm_strdup(rsc->class);
}
if(rsc->provider != NULL) {
rsc_copy->provider = crm_strdup(rsc->provider);
}
/* GHashTable* params; */
rsc_copy->params = NULL;
rsc_copy->ops = NULL;
return rsc_copy;
}
static void
cib_rsc_callback(const HA_Message *msg, int call_id, int rc,
crm_data_t *output, void *user_data)
{
if(rc != cib_ok) {
crm_err("Resource update %d failed: %s",
call_id, cib_error2string(rc));
} else {
crm_debug("Resource update %d complete", call_id);
}
}
void
do_update_resource(lrm_op_t* op)
{
/*
<status>
<nodes_status id=uname>
<lrm>
<lrm_resources>
<lrm_resource id=...>
</...>
*/
int rc = cib_ok;
crm_data_t *update, *iter;
CRM_CHECK(op != NULL, return);
update = create_node_state(
fsa_our_uname, NULL, NULL, NULL, NULL, NULL, FALSE, __FUNCTION__);
iter = create_xml_node(update, XML_CIB_TAG_LRM);
crm_xml_add(iter, XML_ATTR_ID, fsa_our_uuid);
iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES);
iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE);
crm_xml_add(iter, XML_ATTR_ID, op->rsc_id);
if(op->interval == 0) {
lrm_rsc_t *rsc = NULL;
crm_debug_2("Updating %s resource definitions after %s op",
op->rsc_id, op->op_type);
rsc = fsa_lrm_conn->lrm_ops->get_rsc(fsa_lrm_conn, op->rsc_id);
crm_xml_add(iter, XML_ATTR_TYPE, rsc->type);
crm_xml_add(iter, XML_AGENT_ATTR_CLASS, rsc->class);
crm_xml_add(iter, XML_AGENT_ATTR_PROVIDER,rsc->provider);
lrm_free_rsc(rsc);
}
build_operation_update(iter, op, __FUNCTION__, 0);
/* make it an asyncronous call and be done with it
*
* Best case:
* the resource state will be discovered during
* the next signup or election.
*
* Bad case:
* we are shutting down and there is no DC at the time,
* but then why were we shutting down then anyway?
* (probably because of an internal error)
*
* Worst case:
* we get shot for having resources "running" when the really weren't
*
* the alternative however means blocking here for too long, which
* isnt acceptable
*/
fsa_cib_update(XML_CIB_TAG_STATUS, update, cib_quorum_override, rc);
if(rc > 0) {
/* the return code is a call number, not an error code */
crm_debug("Sent resource state update message: %d", rc);
add_cib_op_callback(rc, FALSE, NULL, cib_rsc_callback);
} else {
crm_err("Resource state update failed: %s",
cib_error2string(rc));
- CRM_DEV_ASSERT(rc == cib_ok);
}
free_xml(update);
}
enum crmd_fsa_input
do_lrm_event(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input cur_input,
fsa_data_t *msg_data)
{
lrm_op_t *op = NULL;
CRM_CHECK(msg_data->fsa_cause == C_LRM_OP_CALLBACK, return I_NULL);
op = fsa_typed_data(fsa_dt_lrm);
process_lrm_event(op);
return I_NULL;
}
gboolean
process_lrm_event(lrm_op_t *op)
{
CRM_CHECK(op != NULL, return I_NULL);
CRM_CHECK(op->rsc_id != NULL, return I_NULL);
if(op->rc == 8 || op->rc == 7) {
/* Leave it up to the TE/PE to decide if this is an error */
op->op_status = LRM_OP_DONE;
}
switch(op->op_status) {
case LRM_OP_PENDING:
/* this really shouldnt happen */
crm_err("LRM operation (%d) %s_%d on %s %s: %s",
op->call_id, op->op_type,
op->interval,
crm_str(op->rsc_id),
op_status2text(op->op_status),
execra_code2string(op->rc));
break;
case LRM_OP_ERROR:
crm_err("LRM operation (%d) %s_%d on %s %s: (%d) %s",
op->call_id, op->op_type,
op->interval,
crm_str(op->rsc_id),
op_status2text(op->op_status),
op->rc, execra_code2string(op->rc));
if(op->output != NULL) {
crm_debug("Result: %s", op->output);
}
break;
case LRM_OP_CANCELLED:
crm_warn("LRM operation (%d) %s_%d on %s %s",
op->call_id, op->op_type,
op->interval,
crm_str(op->rsc_id),
op_status2text(op->op_status));
break;
case LRM_OP_TIMEOUT:
crm_err("LRM operation (%d) %s_%d on %s %s",
op->call_id, op->op_type,
op->interval,
crm_str(op->rsc_id),
op_status2text(op->op_status));
break;
case LRM_OP_NOTSUPPORTED:
crm_err("LRM operation (%d) %s_%d on %s %s",
op->call_id, op->op_type,
op->interval,
crm_str(op->rsc_id),
op_status2text(op->op_status));
break;
case LRM_OP_DONE:
crm_info("LRM operation (%d) %s_%d on %s %s",
op->call_id, op->op_type,
op->interval,
crm_str(op->rsc_id),
op_status2text(op->op_status));
break;
}
if(op->op_status != LRM_OP_CANCELLED) {
do_update_resource(op);
if(op->interval > 0) {
crm_debug("Op %d %s_%s_%d returned",
op->call_id, op->rsc_id, op->op_type,
op->interval);
return TRUE;
}
} else if(op->interval == 0) {
crm_err("No update sent for cancelled op %d: %s_%s_%d",
op->call_id, op->rsc_id, op->op_type, op->interval);
}
if(g_hash_table_size(shutdown_ops) > 0) {
char *op_id = make_stop_id(op->rsc_id, op->call_id);
if(g_hash_table_remove(shutdown_ops, op_id)) {
crm_debug_2("Op %d %s_%s_%d is confirmed",
op->call_id, op->rsc_id, op->op_type,
op->interval);
crm_free(op_id);
return TRUE;
}
crm_free(op_id);
}
crm_err("Op %d %s_%s_%d not matched",
op->call_id, op->rsc_id, op->op_type, op->interval);
return TRUE;
}
char *
make_stop_id(const char *rsc, int call_id)
{
char *op_id = NULL;
crm_malloc0(op_id, strlen(rsc) + 34);
if(op_id != NULL) {
snprintf(op_id, strlen(rsc) + 34, "%s:%d", rsc, call_id);
}
return op_id;
}
gboolean
resource_stopped(gpointer key, gpointer value, gpointer user_data)
{
const char *this_rsc = value;
const char *target_rsc = user_data;
if(safe_str_eq(this_rsc, target_rsc)) {
return TRUE;
}
return FALSE;
}
diff --git a/crm/tengine/events.c b/crm/tengine/events.c
index 96a8f1fc52..06b6b6939a 100644
--- a/crm/tengine/events.c
+++ b/crm/tengine/events.c
@@ -1,562 +1,550 @@
-/* $Id: events.c,v 1.21 2006/06/22 12:58:45 andrew Exp $ */
+/* $Id: events.c,v 1.22 2006/08/14 08:52:30 andrew Exp $ */
/*
* Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <portability.h>
#include <sys/param.h>
#include <crm/crm.h>
#include <crm/cib.h>
#include <crm/msg_xml.h>
#include <crm/common/msg.h>
#include <crm/common/xml.h>
#include <tengine.h>
#include <heartbeat.h>
#include <clplumbing/Gmain_timeout.h>
#include <lrm/lrm_api.h>
crm_data_t *need_abort(crm_data_t *update);
void process_graph_event(crm_data_t *event, const char *event_node);
int match_graph_event(
crm_action_t *action, crm_data_t *event, const char *event_node);
crm_data_t *
need_abort(crm_data_t *update)
{
crm_data_t *section_xml = NULL;
const char *section = NULL;
if(update == NULL) {
return NULL;
}
section = XML_CIB_TAG_NODES;
section_xml = get_object_root(section, update);
xml_child_iter(section_xml, child,
return section_xml;
);
section = XML_CIB_TAG_RESOURCES;
section_xml = get_object_root(section, update);
xml_child_iter(section_xml, child,
return section_xml;
);
section = XML_CIB_TAG_CONSTRAINTS;
section_xml = get_object_root(section, update);
xml_child_iter(section_xml, child,
return section_xml;
);
section = XML_CIB_TAG_CRMCONFIG;
section_xml = get_object_root(section, update);
xml_child_iter(section_xml, child,
return section_xml;
);
return NULL;
}
static gboolean
fail_incompletable_actions(crm_graph_t *graph, const char *down_node)
{
const char *target = NULL;
crm_data_t *last_action = NULL;
slist_iter(
synapse, synapse_t, graph->synapses, lpc,
if (synapse->confirmed) {
continue;
}
slist_iter(
action, crm_action_t, synapse->actions, lpc,
if(action->type == action_type_pseudo || action->confirmed) {
continue;
}
target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
if(safe_str_eq(target, down_node)) {
action->failed = TRUE;
last_action = action->xml;
update_graph(graph, action);
crm_notice("Action %d (%s) is scheduled for %s (offline)",
action->id, ID(action->xml), down_node);
}
);
);
if(last_action != NULL) {
crm_warn("Node %s shutdown resulted in un-runnable actions", down_node);
abort_transition(INFINITY, tg_restart, "Node failure", last_action);
return TRUE;
}
return FALSE;
}
gboolean
extract_event(crm_data_t *msg)
{
int shutdown = 0;
const char *event_node = NULL;
/*
[cib fragment]
...
<status>
<node_state id="node1" state=CRMD_STATE_ACTIVE exp_state="active">
<lrm>
<lrm_resources>
<rsc_state id="" rsc_id="rsc4" node_id="node1" rsc_state="stopped"/>
*/
crm_debug_4("Extracting event from %s", crm_element_name(msg));
xml_child_iter_filter(
msg, node_state, XML_CIB_TAG_STATE,
crm_data_t *attrs = NULL;
crm_data_t *resources = NULL;
const char *ccm_state = crm_element_value(
node_state, XML_CIB_ATTR_INCCM);
const char *crmd_state = crm_element_value(
node_state, XML_CIB_ATTR_CRMDSTATE);
/* Transient node attribute changes... */
event_node = crm_element_value(node_state, XML_ATTR_ID);
crm_debug("Processing state update from %s", event_node);
crm_log_xml_debug_3(node_state,"Processing");
attrs = find_xml_node(
node_state, XML_TAG_TRANSIENT_NODEATTRS, FALSE);
if(attrs != NULL) {
crm_info("Aborting on "XML_TAG_TRANSIENT_NODEATTRS" changes");
abort_transition(INFINITY, tg_restart,
XML_TAG_TRANSIENT_NODEATTRS, attrs);
}
resources = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE);
resources = find_xml_node(
resources, XML_LRM_TAG_RESOURCES, FALSE);
/* LRM resource update... */
xml_child_iter(
resources, rsc,
xml_child_iter(
rsc, rsc_op,
crm_log_xml_debug_3(
rsc_op, "Processing resource update");
process_graph_event(rsc_op, event_node);
);
);
/*
* node state update... possibly from a shutdown we requested
*/
if(safe_str_eq(ccm_state, XML_BOOLEAN_FALSE)
|| safe_str_eq(crmd_state, CRMD_JOINSTATE_DOWN)) {
crm_action_t *shutdown = NULL;
crm_debug_3("A shutdown we requested?");
shutdown = match_down_event(0, event_node, NULL);
if(shutdown != NULL) {
update_graph(transition_graph, shutdown);
trigger_graph();
} else {
crm_info("Stonith/shutdown event not matched");
abort_transition(INFINITY, tg_restart,
"Node failure", node_state);
}
fail_incompletable_actions(transition_graph, event_node);
}
shutdown = 0;
ha_msg_value_int(node_state, XML_CIB_ATTR_SHUTDOWN, &shutdown);
if(shutdown != 0) {
crm_info("Aborting on "XML_CIB_ATTR_SHUTDOWN" attribute");
abort_transition(INFINITY, tg_restart,
"Shutdown request", node_state);
}
);
return TRUE;
}
static void
update_failcount(crm_action_t *action, int rc)
{
crm_data_t *rsc = NULL;
char *attr_name = NULL;
const char *task = NULL;
const char *rsc_id = NULL;
const char *on_node = NULL;
const char *on_uuid = NULL;
const char *interval = NULL;
if(rc == 99) {
/* this is an internal code for "we're busy, try again" */
return;
}
interval = g_hash_table_lookup(
action->params, crm_meta_name("interval"));
if(interval == NULL) {
return;
}
CRM_CHECK(action->xml != NULL, return);
rsc = find_xml_node(action->xml, XML_CIB_TAG_RESOURCE, TRUE);
CRM_CHECK(rsc != NULL, return);
rsc_id = ID(rsc);
CRM_CHECK(rsc_id != NULL, return);
task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
on_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
CRM_CHECK(task != NULL, return);
CRM_CHECK(on_uuid != NULL, return);
CRM_CHECK(on_node != NULL, return);
attr_name = crm_concat("fail-count", rsc_id, '-');
crm_warn("Updating failcount for %s on %s after failed %s: rc=%d",
rsc_id, on_node, task, rc);
update_attr(te_cib_conn, cib_none, XML_CIB_TAG_STATUS,
on_uuid, NULL,NULL, attr_name, XML_NVPAIR_ATTR_VALUE"++");
crm_free(attr_name);
}
/*
* returns the ID of the action if a match is found
* returns -1 if a match was not found
* returns -2 if a match was found but the action failed (and was
* not allowed to)
*/
int
match_graph_event(
crm_action_t *action, crm_data_t *event, const char *event_node)
{
int log_level_fail = LOG_ERR;
int target_rc = 0;
const char *target_rc_s = NULL;
const char *allow_fail = NULL;
const char *this_action = NULL;
const char *this_node = NULL;
const char *this_uname = NULL;
const char *magic = NULL;
const char *this_event;
char *update_te_uuid = NULL;
const char *update_event;
int op_status_i = -3;
int op_rc_i = -3;
int transition_i = -1;
CRM_CHECK(event != NULL, return -1);
crm_debug_3("Processing \"%s\" change", crm_element_name(event));
update_event = crm_element_value(event, XML_ATTR_ID);
magic = crm_element_value(event, XML_ATTR_TRANSITION_MAGIC);
CRM_CHECK(magic != NULL, return -2);
this_action = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
this_uname = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
this_event = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
this_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
CRM_CHECK(this_event != NULL, return -2);
if(safe_str_neq(this_event, update_event)) {
crm_debug_2("Action %d : Event mismatch %s vs. %s",
action->id, this_event, update_event);
return -1;
} else if(safe_str_neq(this_node, event_node)) {
crm_debug_2("Action %d : Node mismatch %s (%s) vs. %s",
action->id, this_node, this_uname, event_node);
return -1;
}
crm_debug_2("Matched action (%d) %s", action->id, this_event);
CRM_CHECK(decode_transition_magic(
magic, &update_te_uuid,
&transition_i, &op_status_i, &op_rc_i), return -2);
if(transition_i == -1) {
/* we never expect these - recompute */
crm_err("Detected action %s initiated outside of a transition",
this_event);
crm_log_message(LOG_ERR, event);
return -2;
} else if(safe_str_neq(update_te_uuid, te_uuid)) {
crm_info("Detected action %s from a different transitioner:"
" %s vs. %s", this_event, update_te_uuid, te_uuid);
crm_log_message(LOG_INFO, event);
return -3;
} else if(transition_graph->id != transition_i) {
crm_warn("Detected an action %s from a different transition:"
" %d vs. %d", this_event, transition_i,
transition_graph->id);
crm_log_message(LOG_INFO, event);
return -4;
}
/* stop this event's timer if it had one */
stop_te_timer(action->timer);
action->confirmed = TRUE;
target_rc_s = g_hash_table_lookup(
action->params,crm_meta_name(XML_ATTR_TE_TARGET_RC));
if(target_rc_s != NULL) {
crm_debug_2("Target rc: %s vs. %d", target_rc_s, op_rc_i);
target_rc = crm_parse_int(target_rc_s, NULL);
if(target_rc == op_rc_i) {
crm_debug_2("Target rc: == %d", op_rc_i);
if(op_status_i != LRM_OP_DONE) {
crm_debug_2("Re-mapping op status to"
" LRM_OP_DONE for %s",update_event);
op_status_i = LRM_OP_DONE;
}
} else {
crm_debug_2("Target rc: != %d", op_rc_i);
if(op_status_i != LRM_OP_ERROR) {
crm_info("Re-mapping op status to"
" LRM_OP_ERROR for %s", update_event);
op_status_i = LRM_OP_ERROR;
}
}
}
/* Process OP status */
switch(op_status_i) {
case -3:
crm_err("Action returned the same as last time..."
" whatever that was!");
crm_log_message(LOG_ERR, event);
break;
case LRM_OP_PENDING:
crm_debug("Ignoring pending operation");
return -5;
break;
case LRM_OP_DONE:
break;
case LRM_OP_ERROR:
/* This is the code we use for direct nack's */
if(op_rc_i == 99) {
log_level_fail = LOG_WARNING;
}
/* fall through */
case LRM_OP_TIMEOUT:
case LRM_OP_NOTSUPPORTED:
action->failed = TRUE;
crm_log_maybe(log_level_fail,
"Action %s on %s failed (target: %d vs. rc: %d): %s",
update_event, this_uname, target_rc,
op_rc_i, op_status2text(op_status_i));
break;
case LRM_OP_CANCELLED:
/* do nothing?? */
crm_err("Dont know what to do for cancelled ops yet");
break;
default:
action->failed = TRUE;
crm_err("Unsupported action result: %d", op_status_i);
}
update_graph(transition_graph, action);
trigger_graph();
if(action->failed) {
allow_fail = g_hash_table_lookup(
action->params, crm_meta_name(XML_ATTR_TE_ALLOWFAIL));
if(crm_is_true(allow_fail)) {
action->failed = FALSE;
}
}
if(action->failed) {
/* ignore probes */
if(target_rc != EXECRA_NOT_RUNNING) {
update_failcount(action, op_rc_i);
}
abort_transition(action->synapse->priority+1,
tg_restart, "Event failed", event);
} else if(transition_graph->complete) {
abort_transition(INFINITY, tg_restart,"No active graph", event);
}
te_log_action(LOG_INFO, "Action %s (%d) confirmed",
this_event, action->id);
return action->id;
}
crm_action_t *
match_down_event(int id, const char *target, const char *filter)
{
const char *this_action = NULL;
const char *this_node = NULL;
crm_action_t *match = NULL;
slist_iter(
synapse, synapse_t, transition_graph->synapses, lpc,
/* lookup event */
slist_iter(
action, crm_action_t, synapse->actions, lpc2,
if(id > 0 && action->id == id) {
match = action;
break;
}
this_action = crm_element_value(
action->xml, XML_LRM_ATTR_TASK);
if(action->type != action_type_crm) {
continue;
} else if(safe_str_eq(this_action, CRM_OP_LRM_REFRESH)){
continue;
} else if(filter != NULL
&& safe_str_neq(this_action, filter)) {
continue;
}
this_node = crm_element_value(
action->xml, XML_LRM_ATTR_TARGET_UUID);
if(this_node == NULL) {
crm_log_xml_err(action->xml, "No node uuid");
}
if(safe_str_neq(this_node, target)) {
crm_debug("Action %d : Node mismatch: %s",
action->id, this_node);
continue;
}
match = action;
break;
);
if(match != NULL) {
/* stop this event's timer if it had one */
break;
}
);
if(match != NULL) {
/* stop this event's timer if it had one */
crm_debug("Match found for action %d: %s on %s", id,
crm_element_value(match->xml, XML_LRM_ATTR_TASK_KEY),
target);
stop_te_timer(match->timer);
match->confirmed = TRUE;
} else if(id > 0) {
crm_err("No match for action %d", id);
} else {
crm_warn("No match for shutdown action on %s", target);
}
return match;
}
void
process_graph_event(crm_data_t *event, const char *event_node)
{
int rc = -1;
- int op_status_i = 0;
const char *magic = NULL;
const char *rsc_id = NULL;
- const char *op_status = NULL;
CRM_ASSERT(event != NULL);
rsc_id = crm_element_value(event, XML_ATTR_ID);
- op_status = crm_element_value(event, XML_LRM_ATTR_OPSTATUS);
magic = crm_element_value(event, XML_ATTR_TRANSITION_MAGIC);
- if(op_status != NULL) {
- op_status_i = crm_parse_int(op_status, NULL);
- if(op_status_i == LRM_OP_PENDING) {
- /* just information that the action was sent */
- crm_debug_2("Ignoring TE initiated updates");
- return;
- }
- }
-
if(magic == NULL) {
crm_log_xml_debug_2(event, "Skipping \"non-change\"");
return;
} else {
crm_debug_2("Processing CIB update: %s on %s: %s",
rsc_id, event_node, magic);
}
slist_iter(
synapse, synapse_t, transition_graph->synapses, lpc,
/* lookup event */
slist_iter(
action, crm_action_t, synapse->actions, lpc2,
rc = match_graph_event(action, event, event_node);
if(rc >= 0) {
crm_log_xml_debug_2(event, "match:found");
} else if(rc == -5) {
crm_log_xml_debug_2(event, "match:pending");
} else if(rc != -1) {
crm_warn("Search for %s terminated: %d",
ID(event), rc);
abort_transition(INFINITY, tg_restart,
"Unexpected event", event);
}
if(rc != -1) {
return;
}
);
);
/* unexpected event, trigger a pe-recompute */
/* possibly do this only for certain types of actions */
crm_warn("Event not found.");
crm_log_xml_info(event, "match:not-found");
abort_transition(INFINITY, tg_restart, "Unexpected event", event);
return;
}
diff --git a/crm/tengine/utils.c b/crm/tengine/utils.c
index 189636d1b4..25c40f7469 100644
--- a/crm/tengine/utils.c
+++ b/crm/tengine/utils.c
@@ -1,141 +1,94 @@
-/* $Id: utils.c,v 1.59 2006/04/19 12:24:59 andrew Exp $ */
+/* $Id: utils.c,v 1.60 2006/08/14 08:52:30 andrew Exp $ */
/*
* Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <portability.h>
#include <sys/param.h>
#include <crm/crm.h>
#include <crm/cib.h>
#include <crm/msg_xml.h>
#include <crm/common/msg.h>
#include <crm/common/xml.h>
#include <tengine.h>
#include <heartbeat.h>
#include <clplumbing/Gmain_timeout.h>
#include <lrm/lrm_api.h>
extern cib_t *te_cib_conn;
-const char *
-get_rsc_state(const char *task, op_status_t status)
-{
- if(safe_str_eq(CRMD_ACTION_START, task)) {
- if(status == LRM_OP_PENDING) {
- return CRMD_ACTION_START_PENDING;
- } else if(status == LRM_OP_DONE) {
- return CRMD_ACTION_STARTED;
- } else {
- return CRMD_ACTION_START_FAIL;
- }
-
- } else if(safe_str_eq(CRMD_ACTION_STOP, task)) {
- if(status == LRM_OP_PENDING) {
- return CRMD_ACTION_STOP_PENDING;
- } else if(status == LRM_OP_DONE) {
- return CRMD_ACTION_STOPPED;
- } else {
- return CRMD_ACTION_STOP_FAIL;
- }
-
- } else {
- if(safe_str_eq(CRMD_ACTION_MON, task)) {
- if(status == LRM_OP_PENDING) {
- return CRMD_ACTION_MON_PENDING;
- } else if(status == LRM_OP_DONE) {
- return CRMD_ACTION_MON_OK;
- } else {
- return CRMD_ACTION_MON_FAIL;
- }
- } else {
- const char *rsc_state = NULL;
- if(status == LRM_OP_PENDING) {
- rsc_state = CRMD_ACTION_GENERIC_PENDING;
- } else if(status == LRM_OP_DONE) {
- rsc_state = CRMD_ACTION_GENERIC_OK;
- } else {
- rsc_state = CRMD_ACTION_GENERIC_FAIL;
- }
- crm_warn("Using status \"%s\" for op \"%s\"..."
- " this is still in the experimental stage.",
- rsc_state, task);
- return rsc_state;
- }
- }
-}
-
gboolean
stop_te_timer(crm_action_timer_t *timer)
{
const char *timer_desc = "action timer";
if(timer == NULL) {
return FALSE;
}
if(timer->reason == timeout_abort) {
timer_desc = "global timer";
}
if(timer->source_id != 0) {
crm_debug_2("Stopping %s", timer_desc);
Gmain_timeout_remove(timer->source_id);
timer->source_id = 0;
} else {
return FALSE;
}
return TRUE;
}
void
trigger_graph_processing(const char *fn, int line)
{
G_main_set_trigger(transition_trigger);
crm_debug_2("%s:%d - Triggered graph processing", fn, line);
}
void
abort_transition_graph(
int abort_priority, enum transition_action abort_action,
const char *abort_text, crm_data_t *reason, const char *fn, int line)
{
int log_level = LOG_DEBUG;
/*
if(abort_priority >= INFINITY) {
log_level = LOG_INFO;
}
*/
update_abort_priority(
transition_graph, abort_priority, abort_action, abort_text);
crm_log_maybe(log_level, "%s:%d - Triggered graph processing : %s",
fn, line, abort_text);
if(reason != NULL) {
crm_log_xml(log_level, "Cause", reason);
}
if(transition_graph->complete) {
notify_crmd(transition_graph);
} else {
G_main_set_trigger(transition_trigger);
}
}
diff --git a/lib/crm/common/ipc.c b/lib/crm/common/ipc.c
index 64df19a260..a3804f9888 100644
--- a/lib/crm/common/ipc.c
+++ b/lib/crm/common/ipc.c
@@ -1,435 +1,439 @@
-/* $Id: ipc.c,v 1.27 2006/07/12 09:31:36 andrew Exp $ */
+/* $Id: ipc.c,v 1.28 2006/08/14 08:52:08 andrew Exp $ */
/*
* Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <portability.h>
#include <sys/param.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <errno.h>
#include <fcntl.h>
#include <crm/crm.h>
#include <clplumbing/ipc.h>
#include <clplumbing/Gmain_timeout.h>
#include <clplumbing/cl_log.h>
#include <clplumbing/cl_signal.h>
#include <clplumbing/lsb_exitcodes.h>
#include <clplumbing/uids.h>
#include <clplumbing/realtime.h>
#include <clplumbing/GSource.h>
#include <clplumbing/cl_poll.h>
#include <crm/common/ipc.h>
#include <crm/msg_xml.h>
#include <ha_msg.h>
#include <crm/dmalloc_wrapper.h>
gboolean
send_ha_message(ll_cluster_t *hb_conn, HA_Message *msg, const char *node, gboolean force_ordered)
{
gboolean all_is_good = TRUE;
cl_mem_stats_t saved_stats;
crm_save_mem_stats(__PRETTY_FUNCTION__, &saved_stats);
if (msg == NULL) {
crm_err("cant send NULL message");
all_is_good = FALSE;
} else if(hb_conn == NULL) {
crm_err("No heartbeat connection specified");
all_is_good = FALSE;
} else if(hb_conn->llc_ops->chan_is_connected(hb_conn) == FALSE) {
crm_err("Not connected to Heartbeat");
all_is_good = FALSE;
} else if(node != NULL) {
if(hb_conn->llc_ops->send_ordered_nodemsg(
hb_conn, msg, node) != HA_OK) {
all_is_good = FALSE;
crm_err("Send failed");
} else {
crm_debug_2("Message sent...");
}
} else if(force_ordered) {
if(hb_conn->llc_ops->send_ordered_clustermsg(hb_conn, msg) != HA_OK) {
all_is_good = FALSE;
crm_err("Broadcast Send failed");
} else {
crm_debug_2("Broadcast message sent...");
}
} else {
if(hb_conn->llc_ops->sendclustermsg(hb_conn, msg) != HA_OK) {
all_is_good = FALSE;
crm_err("Broadcast Send failed");
} else {
crm_debug_2("Broadcast message sent...");
}
}
if(all_is_good == FALSE && hb_conn != NULL) {
IPC_Channel *ipc = NULL;
IPC_Queue *send_q = NULL;
if(hb_conn->llc_ops->chan_is_connected(hb_conn) != HA_OK) {
ipc = hb_conn->llc_ops->ipcchan(hb_conn);
}
if(ipc != NULL) {
/* ipc->ops->resume_io(ipc); */
send_q = ipc->send_queue;
}
if(send_q != NULL) {
CRM_CHECK(send_q->current_qlen < send_q->max_qlen, ;);
}
}
crm_log_message_adv(all_is_good?LOG_MSG:LOG_WARNING,"HA[outbound]",msg);
crm_diff_mem_stats(LOG_DEBUG, LOG_DEBUG, __PRETTY_FUNCTION__, NULL, &saved_stats);
return all_is_good;
}
/* frees msg */
gboolean
send_ipc_message(IPC_Channel *ipc_client, HA_Message *msg)
{
gboolean all_is_good = TRUE;
int fail_level = LOG_WARNING;
cl_mem_stats_t saved_stats;
crm_save_mem_stats(__PRETTY_FUNCTION__, &saved_stats);
if(ipc_client != NULL && ipc_client->conntype == IPC_CLIENT) {
fail_level = LOG_ERR;
}
if (msg == NULL) {
crm_err("cant send NULL message");
all_is_good = FALSE;
} else if (ipc_client == NULL) {
crm_err("cant send message without an IPC Channel");
all_is_good = FALSE;
} else if(ipc_client->ops->get_chan_status(ipc_client) != IPC_CONNECT) {
crm_log_maybe(fail_level, "IPC Channel to %d is not connected",
(int)ipc_client->farside_pid);
all_is_good = FALSE;
}
if(all_is_good && msg2ipcchan(msg, ipc_client) != HA_OK) {
crm_log_maybe(fail_level, "Could not send IPC message to %d",
(int)ipc_client->farside_pid);
all_is_good = FALSE;
if(ipc_client->ops->get_chan_status(ipc_client) != IPC_CONNECT) {
crm_log_maybe(fail_level,
"IPC Channel to %d is no longer connected",
(int)ipc_client->farside_pid);
} else if(ipc_client->conntype == IPC_CLIENT) {
- CRM_CHECK(ipc_client->send_queue->current_qlen < ipc_client->send_queue->max_qlen, ;);
+ if(ipc_client->send_queue->current_qlen >= ipc_client->send_queue->max_qlen) {
+ crm_err("Send queue to %d (size=%d) full.",
+ ipc_client->farside_pid,
+ (int)ipc_client->send_queue->max_qlen);
+ }
}
}
/* ipc_client->ops->resume_io(ipc_client); */
crm_log_message_adv(all_is_good?LOG_MSG:LOG_WARNING,"IPC[outbound]",msg);
crm_diff_mem_stats(LOG_DEBUG, LOG_DEBUG, __PRETTY_FUNCTION__, NULL, &saved_stats);
return all_is_good;
}
void
default_ipc_connection_destroy(gpointer user_data)
{
return;
}
int
init_server_ipc_comms(
char *channel_name,
gboolean (*channel_client_connect)(IPC_Channel *newclient,gpointer user_data),
void (*channel_connection_destroy)(gpointer user_data))
{
/* the clients wait channel is the other source of events.
* This source delivers the clients connection events.
* listen to this source at a relatively lower priority.
*/
char commpath[SOCKET_LEN];
IPC_WaitConnection *wait_ch;
sprintf(commpath, CRM_SOCK_DIR "/%s", channel_name);
wait_ch = wait_channel_init(commpath);
if (wait_ch == NULL) {
return 1;
}
G_main_add_IPC_WaitConnection(
G_PRIORITY_LOW, wait_ch, NULL, FALSE,
channel_client_connect, channel_name,
channel_connection_destroy);
crm_debug_3("Listening on: %s", commpath);
return 0;
}
GCHSource*
init_client_ipc_comms(const char *channel_name,
gboolean (*dispatch)(
IPC_Channel* source_data, gpointer user_data),
void *client_data, IPC_Channel **ch)
{
IPC_Channel *a_ch = NULL;
GCHSource *the_source = NULL;
void *callback_data = client_data;
a_ch = init_client_ipc_comms_nodispatch(channel_name);
if(ch != NULL) {
*ch = a_ch;
if(callback_data == NULL) {
callback_data = a_ch;
}
}
if(a_ch == NULL) {
crm_warn("Setup of client connection failed,"
" not adding channel to mainloop");
return NULL;
}
if(dispatch == NULL) {
crm_warn("No dispatch method specified..."
"maybe you meant init_client_ipc_comms_nodispatch()?");
} else {
crm_debug_3("Adding dispatch method to channel");
the_source = G_main_add_IPC_Channel(
G_PRIORITY_HIGH, a_ch, FALSE, dispatch, callback_data,
default_ipc_connection_destroy);
}
return the_source;
}
IPC_Channel *
init_client_ipc_comms_nodispatch(const char *channel_name)
{
IPC_Channel *ch;
GHashTable *attrs;
static char path[] = IPC_PATH_ATTR;
char *commpath = NULL;
int local_socket_len = 2; /* 2 = '/' + '\0' */
local_socket_len += strlen(channel_name);
local_socket_len += strlen(CRM_SOCK_DIR);
crm_malloc0(commpath, local_socket_len);
if(commpath != NULL) {
sprintf(commpath, CRM_SOCK_DIR "/%s", channel_name);
commpath[local_socket_len - 1] = '\0';
crm_debug_3("Attempting to talk on: %s", commpath);
}
attrs = g_hash_table_new(g_str_hash,g_str_equal);
g_hash_table_insert(attrs, path, commpath);
ch = ipc_channel_constructor(IPC_ANYTYPE, attrs);
g_hash_table_destroy(attrs);
if (ch == NULL) {
crm_err("Could not access channel on: %s", commpath);
crm_free(commpath);
return NULL;
} else if (ch->ops->initiate_connection(ch) != IPC_OK) {
crm_debug("Could not init comms on: %s", commpath);
ch->ops->destroy(ch);
crm_free(commpath);
return NULL;
}
ch->ops->set_recv_qlen(ch, 100);
ch->ops->set_send_qlen(ch, 100);
/* ch->should_send_block = TRUE; */
crm_debug_3("Processing of %s complete", commpath);
crm_free(commpath);
return ch;
}
IPC_WaitConnection *
wait_channel_init(char daemonsocket[])
{
IPC_WaitConnection *wait_ch;
mode_t mask;
char path[] = IPC_PATH_ATTR;
GHashTable * attrs;
attrs = g_hash_table_new(g_str_hash,g_str_equal);
g_hash_table_insert(attrs, path, daemonsocket);
mask = umask(0);
wait_ch = ipc_wait_conn_constructor(IPC_ANYTYPE, attrs);
if (wait_ch == NULL) {
cl_perror("Can't create wait channel of type %s",
IPC_ANYTYPE);
exit(1);
}
mask = umask(mask);
g_hash_table_destroy(attrs);
return wait_ch;
}
longclock_t ipc_call_start = 0;
longclock_t ipc_call_stop = 0;
longclock_t ipc_call_diff = 0;
gboolean
subsystem_msg_dispatch(IPC_Channel *sender, void *user_data)
{
int lpc = 0;
HA_Message *msg = NULL;
ha_msg_input_t *new_input = NULL;
gboolean all_is_well = TRUE;
const char *sys_to;
const char *task;
while(IPC_ISRCONN(sender)) {
gboolean process = FALSE;
if(sender->ops->is_message_pending(sender) == 0) {
break;
}
msg = msgfromIPC_noauth(sender);
if (msg == NULL) {
crm_err("No message from %d this time",
sender->farside_pid);
continue;
}
lpc++;
new_input = new_ha_msg_input(msg);
crm_msg_del(msg);
msg = NULL;
crm_log_message(LOG_MSG, new_input->msg);
sys_to = cl_get_string(new_input->msg, F_CRM_SYS_TO);
task = cl_get_string(new_input->msg, F_CRM_TASK);
if(safe_str_eq(task, CRM_OP_HELLO)) {
process = TRUE;
} else if(sys_to == NULL) {
crm_err("Value of %s was NULL!!", F_CRM_SYS_TO);
} else if(task == NULL) {
crm_err("Value of %s was NULL!!", F_CRM_TASK);
} else {
process = TRUE;
}
if(process){
gboolean (*process_function)
(HA_Message *msg, crm_data_t *data, IPC_Channel *sender) = NULL;
process_function = user_data;
#ifdef MSG_LOG
crm_log_message_adv(
LOG_MSG, __FUNCTION__, new_input->msg);
#endif
if(ipc_call_diff_max_ms > 0) {
ipc_call_start = time_longclock();
}
if(FALSE == process_function(
new_input->msg, new_input->xml, sender)) {
crm_warn("Received a message destined for %s"
" by mistake", sys_to);
}
if(ipc_call_diff_max_ms > 0) {
unsigned int ipc_call_diff_ms = 0;
ipc_call_stop = time_longclock();
ipc_call_diff = sub_longclock(
ipc_call_stop, ipc_call_start);
ipc_call_diff_ms = longclockto_ms(
ipc_call_diff);
if(ipc_call_diff_ms > ipc_call_diff_max_ms) {
crm_err("%s took %dms to complete",
sys_to, ipc_call_diff_ms);
}
}
} else {
#ifdef MSG_LOG
crm_log_message_adv(
LOG_ERR, NULL, new_input->msg);
#endif
}
delete_ha_msg_input(new_input);
new_input = NULL;
if(sender->ch_status == IPC_CONNECT) {
break;
}
}
crm_debug_2("Processed %d messages", lpc);
if (sender->ch_status != IPC_CONNECT) {
crm_err("The server %d has left us: Shutting down...NOW",
sender->farside_pid);
exit(1); /* shutdown properly later */
return !all_is_well;
}
return all_is_well;
}
gboolean
is_ipc_empty(IPC_Channel *ch)
{
if(ch == NULL) {
return TRUE;
} else if(ch->send_queue->current_qlen == 0
&& ch->recv_queue->current_qlen == 0) {
return TRUE;
}
return FALSE;
}
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Wed, Oct 15, 11:47 PM (2 h, 47 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2530565
Default Alt Text
(72 KB)
Attached To
Mode
rP Pacemaker
Attached
Detach File
Event Timeline
Log In to Comment