Page MenuHomeClusterLabs Projects

No OneTemporary

diff --git a/crm/crmd/lrm.c b/crm/crmd/lrm.c
index 0229a266a0..91047e1606 100644
--- a/crm/crmd/lrm.c
+++ b/crm/crmd/lrm.c
@@ -1,1600 +1,1599 @@
/*
* Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <portability.h>
#include <sys/param.h>
#include <crm/crm.h>
#include <crmd_fsa.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h> /* for access */
#include <heartbeat.h>
#include <clplumbing/cl_signal.h>
#include <errno.h>
#include <crm/cib.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <crmd.h>
#include <crmd_messages.h>
#include <crmd_callbacks.h>
#include <crmd_lrm.h>
#include <lrm/raexec.h>
#include <crm/dmalloc_wrapper.h>
char *make_stop_id(const char *rsc, int call_id);
gboolean verify_stopped(gboolean force, int log_level);
gboolean resource_stopped(gpointer key, gpointer value, gpointer user_data);
gboolean build_operation_update(
crm_data_t *rsc_list, lrm_op_t *op, const char *src, int lpc);
gboolean build_active_RAs(crm_data_t *rsc_list);
gboolean is_rsc_active(const char *rsc_id);
void do_update_resource(lrm_op_t *op);
gboolean process_lrm_event(lrm_op_t *op);
enum crmd_fsa_input do_lrm_rsc_op(
lrm_rsc_t *rsc, const char *operation,
crm_data_t *msg, HA_Message *request);
enum crmd_fsa_input do_fake_lrm_op(gpointer data);
void stop_recurring_action(
gpointer key, gpointer value, gpointer user_data);
gboolean remove_recurring_action(
gpointer key, gpointer value, gpointer user_data);
lrm_op_t *construct_op(
crm_data_t *rsc_op, const char *rsc_id, const char *operation);
void send_direct_ack(const char *to_host, const char *to_sys,
lrm_op_t* op, const char *rsc_id);
void free_recurring_op(gpointer value);
GHashTable *monitors = NULL;
GHashTable *resources = NULL;
GHashTable *shutdown_ops = NULL;
int num_lrm_register_fails = 0;
int max_lrm_register_fails = 30;
enum crmd_rscstate {
crmd_rscstate_NULL,
crmd_rscstate_START,
crmd_rscstate_START_PENDING,
crmd_rscstate_START_OK,
crmd_rscstate_START_FAIL,
crmd_rscstate_STOP,
crmd_rscstate_STOP_PENDING,
crmd_rscstate_STOP_OK,
crmd_rscstate_STOP_FAIL,
crmd_rscstate_MON,
crmd_rscstate_MON_PENDING,
crmd_rscstate_MON_OK,
crmd_rscstate_MON_FAIL,
crmd_rscstate_GENERIC_PENDING,
crmd_rscstate_GENERIC_OK,
crmd_rscstate_GENERIC_FAIL
};
const char *crmd_rscstate2string(enum crmd_rscstate state);
const char *
crmd_rscstate2string(enum crmd_rscstate state)
{
switch(state) {
case crmd_rscstate_NULL:
return NULL;
case crmd_rscstate_START:
return CRMD_ACTION_START;
case crmd_rscstate_START_PENDING:
return CRMD_ACTION_START_PENDING;
case crmd_rscstate_START_OK:
return CRMD_ACTION_STARTED;
case crmd_rscstate_START_FAIL:
return CRMD_ACTION_START_FAIL;
case crmd_rscstate_STOP:
return CRMD_ACTION_STOP;
case crmd_rscstate_STOP_PENDING:
return CRMD_ACTION_STOP_PENDING;
case crmd_rscstate_STOP_OK:
return CRMD_ACTION_STOPPED;
case crmd_rscstate_STOP_FAIL:
return CRMD_ACTION_STOP_FAIL;
case crmd_rscstate_MON:
return CRMD_ACTION_MON;
case crmd_rscstate_MON_PENDING:
return CRMD_ACTION_MON_PENDING;
case crmd_rscstate_MON_OK:
return CRMD_ACTION_MON_OK;
case crmd_rscstate_MON_FAIL:
return CRMD_ACTION_MON_FAIL;
case crmd_rscstate_GENERIC_PENDING:
return CRMD_ACTION_GENERIC_PENDING;
case crmd_rscstate_GENERIC_OK:
return CRMD_ACTION_GENERIC_OK;
case crmd_rscstate_GENERIC_FAIL:
return CRMD_ACTION_GENERIC_FAIL;
}
return "<unknown>";
}
GCHSource *lrm_source = NULL;
/* A_LRM_CONNECT */
enum crmd_fsa_input
do_lrm_control(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input,
fsa_data_t *msg_data)
{
int ret = HA_OK;
if(action & A_LRM_DISCONNECT) {
verify_stopped(TRUE, LOG_ERR);
if(lrm_source) {
crm_debug("Removing LRM connection from MainLoop");
if(G_main_del_IPC_Channel(lrm_source) == FALSE) {
crm_err("Could not remove LRM connection"
" from MainLoop");
}
lrm_source = NULL;
}
if(fsa_lrm_conn) {
fsa_lrm_conn->lrm_ops->signoff(fsa_lrm_conn);
crm_info("Disconnected from the LRM");
clear_bit_inplace(fsa_input_register, R_LRM_CONNECTED);
fsa_lrm_conn = NULL;
}
/* TODO: Clean up the hashtable */
}
if(action & A_LRM_CONNECT) {
ret = HA_OK;
monitors = g_hash_table_new_full(
g_str_hash, g_str_equal,
g_hash_destroy_str, free_recurring_op);
resources = g_hash_table_new_full(
g_str_hash, g_str_equal,
g_hash_destroy_str, g_hash_destroy_str);
shutdown_ops = g_hash_table_new_full(
g_str_hash, g_str_equal,
g_hash_destroy_str, g_hash_destroy_str);
fsa_lrm_conn = ll_lrm_new(XML_CIB_TAG_LRM);
if(NULL == fsa_lrm_conn) {
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
ret = HA_FAIL;
}
if(ret == HA_OK) {
crm_debug("Connecting to the LRM");
ret = fsa_lrm_conn->lrm_ops->signon(
fsa_lrm_conn, CRM_SYSTEM_CRMD);
}
if(ret != HA_OK) {
if(++num_lrm_register_fails < max_lrm_register_fails) {
crm_warn("Failed to sign on to the LRM %d"
" (%d max) times",
num_lrm_register_fails,
max_lrm_register_fails);
crm_timer_start(wait_timer);
crmd_fsa_stall(NULL);
return I_NULL;
}
}
if(ret == HA_OK) {
crm_debug_4("LRM: set_lrm_callback...");
ret = fsa_lrm_conn->lrm_ops->set_lrm_callback(
fsa_lrm_conn, lrm_op_callback);
if(ret != HA_OK) {
crm_err("Failed to set LRM callbacks");
}
}
if(ret != HA_OK) {
crm_err("Failed to sign on to the LRM %d"
" (max) times", num_lrm_register_fails);
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
return I_NULL;
}
/* TODO: create a destroy handler that causes
* some recovery to happen
*/
lrm_source = G_main_add_IPC_Channel(
G_PRIORITY_LOW,
fsa_lrm_conn->lrm_ops->ipcchan(fsa_lrm_conn),
FALSE, lrm_dispatch, fsa_lrm_conn,
default_ipc_connection_destroy);
set_bit_inplace(fsa_input_register, R_LRM_CONNECTED);
crm_debug("LRM connection established");
}
if(action & ~(A_LRM_CONNECT|A_LRM_DISCONNECT)) {
crm_err("Unexpected action %s in %s",
fsa_action2string(action), __FUNCTION__);
}
return I_NULL;
}
static void
ghash_print_pending(gpointer key, gpointer value, gpointer user_data)
{
const char *action = key;
int *log_level = user_data;
crm_log_maybe(*log_level, "Pending action: %s", action);
}
gboolean
verify_stopped(gboolean force, int log_level)
{
GListPtr lrm_list = NULL;
crm_info("Checking for active resources before exit");
if(fsa_lrm_conn == NULL) {
crm_err("Exiting with no LRM connection..."
" resources may be active!");
return TRUE;
}
if(g_hash_table_size(shutdown_ops) > 0) {
crm_log_maybe(log_level,
"%d pending LRM operations at shutdown%s",
g_hash_table_size(shutdown_ops),
force?"":"... waiting");
if(force || !is_set(fsa_input_register, R_SENT_RSC_STOP)) {
g_hash_table_foreach(
shutdown_ops, ghash_print_pending, &log_level);
}
if(force == FALSE) {
return FALSE;
}
}
lrm_list = fsa_lrm_conn->lrm_ops->get_all_rscs(fsa_lrm_conn);
slist_iter(
rsc_id, char, lrm_list, lpc,
if(is_rsc_active(rsc_id) == FALSE) {
continue;
}
crm_err("Resource %s was active at shutdown."
" You may ignore this error if it is unmanaged.",
rsc_id);
);
set_bit_inplace(fsa_input_register, R_SENT_RSC_STOP);
register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL);
return TRUE;
}
gboolean
build_operation_update(
crm_data_t *xml_rsc, lrm_op_t *op, const char *src, int lpc)
{
char *fail_state = NULL;
const char *state = NULL;
crm_data_t *xml_op = NULL;
char *op_id = NULL;
const char *caller_version = NULL;
char *digest = NULL;
crm_data_t *args_xml = NULL;
crm_data_t *args_parent = NULL;
CRM_DEV_ASSERT(op != NULL);
if(crm_assert_failed) {
return FALSE;
}
crm_debug_2("%s: Updating resouce %s after %s %s op",
src, op->rsc_id, op_status2text(op->op_status), op->op_type);
if(op->op_status == LRM_OP_CANCELLED) {
crm_debug_3("Ignoring cancelled op");
return TRUE;
}
if(AM_I_DC) {
caller_version = CRM_FEATURE_SET;
} else if(fsa_our_dc_version != NULL) {
caller_version = fsa_our_dc_version;
} else {
/* there is a small risk in formerly mixed clusters that
* it will be sub-optimal.
* however with our upgrade policy, the update we send
* should still be completely supported anyway
*/
caller_version = g_hash_table_lookup(
op->params, XML_ATTR_CRM_VERSION);
crm_warn("Falling back to operation originator version: %s",
caller_version);
}
crm_debug_3("DC version: %s", caller_version);
if(safe_str_eq(op->op_type, CRMD_ACTION_NOTIFY)) {
const char *n_type = g_hash_table_lookup(
op->params, crm_meta_name("notify_type"));
const char *n_task = g_hash_table_lookup(
op->params, crm_meta_name("notify_operation"));
#if CRM_DEPRECATED_SINCE_2_0_5
if(n_type == NULL) {
n_type = g_hash_table_lookup(op->params, "notify_type");
}
if(n_task == NULL) {
n_task = g_hash_table_lookup(op->params, "notify_operation");
}
#endif
CRM_DEV_ASSERT(n_type != NULL);
CRM_DEV_ASSERT(n_task != NULL);
op_id = generate_notify_key(op->rsc_id, n_type, n_task);
/* these are not yet allowed to fail */
op->op_status = LRM_OP_DONE;
op->rc = 0;
} else {
op_id = generate_op_key(op->rsc_id, op->op_type, op->interval);
}
/* Handle recurring ops - infer last op_status */
if(op->op_status == LRM_OP_PENDING && op->interval > 0) {
if(op->rc == 0) {
crm_debug("Mapping pending operation to DONE");
op->op_status = LRM_OP_DONE;
} else {
crm_debug("Mapping pending operation to ERROR");
op->op_status = LRM_OP_ERROR;
}
}
xml_op = find_entity(xml_rsc, XML_LRM_TAG_RSC_OP, op_id);
if(xml_op != NULL) {
const char *old_status_s = crm_element_value(
xml_op, XML_LRM_ATTR_OPSTATUS);
int old_status = crm_parse_int(old_status_s, "-2");
int log_level = LOG_ERR;
if(old_status_s == NULL) {
crm_err("No value for "XML_LRM_ATTR_OPSTATUS);
} else if(old_status == op->op_status) {
/* safe to mask */
log_level = LOG_WARNING;
} else if(old_status == LRM_OP_PENDING){
/* ??safe to mask?? */
/* log_level = LOG_WARNING; */
}
crm_log_maybe(log_level,
"Duplicate %s operations in get_cur_state()",
op_id);
crm_log_maybe(log_level+2,
"New entry: %s %s (call=%d, status=%s)",
op_id, op->user_data, op->call_id,
op_status2text(op->op_status));
crm_log_xml(log_level+2, "Existing entry", xml_op);
crm_free(op_id);
return FALSE;
} else {
xml_op = create_xml_node(xml_rsc, XML_LRM_TAG_RSC_OP);
}
crm_xml_add(xml_op, XML_ATTR_ID, op_id);
crm_free(op_id);
crm_xml_add(xml_op, XML_LRM_ATTR_TASK, op->op_type);
crm_xml_add(xml_op, XML_ATTR_ORIGIN, src);
if(op->user_data == NULL) {
op->user_data = generate_transition_key(-1, fsa_our_uname);
}
#if CRM_DEPRECATED_SINCE_2_0_3
if(compare_version("1.0.3", caller_version) > 0) {
CRM_CHECK(FALSE, ; );
fail_state = generate_transition_magic_v202(
op->user_data, op->op_status);
} else {
fail_state = generate_transition_magic(
op->user_data, op->op_status, op->rc);
}
#else
fail_state = generate_transition_magic(
op->user_data, op->op_status, op->rc);
#endif
crm_xml_add(xml_op, XML_ATTR_TRANSITION_KEY, op->user_data);
crm_xml_add(xml_op, XML_ATTR_TRANSITION_MAGIC, fail_state);
crm_free(fail_state);
switch(op->op_status) {
case LRM_OP_PENDING:
break;
case LRM_OP_CANCELLED:
crm_err("What to do here");
break;
case LRM_OP_ERROR:
case LRM_OP_TIMEOUT:
case LRM_OP_NOTSUPPORTED:
crm_debug("Resource action %s/%s %s: %d",
op->rsc_id, op->op_type,
op_status2text(op->op_status), op->rc);
break;
case LRM_OP_DONE:
if(safe_str_eq(CRMD_ACTION_START, op->op_type)) {
state = CRMD_ACTION_STARTED;
} else if(safe_str_eq(CRMD_ACTION_STOP, op->op_type)) {
state = CRMD_ACTION_STOPPED;
} else if(safe_str_eq(CRMD_ACTION_MON, op->op_type)) {
state = CRMD_ACTION_STARTED;
} else {
crm_debug("Using status \"%s\" for op \"%s\"",
CRMD_ACTION_GENERIC_OK, op->op_type);
state = CRMD_ACTION_GENERIC_OK;
}
break;
}
crm_xml_add_int(xml_op, XML_LRM_ATTR_CALLID, op->call_id);
/* set these on 'xml_rsc' too to make life easy for the PE */
crm_xml_add(xml_op, XML_ATTR_CRM_VERSION, caller_version);
crm_xml_add_int(xml_op, XML_LRM_ATTR_RC, op->rc);
crm_xml_add_int(xml_op, XML_LRM_ATTR_OPSTATUS, op->op_status);
crm_xml_add_int(xml_op, XML_LRM_ATTR_INTERVAL, op->interval);
/* this will enable us to later determin that the
* resource's parameters have changed and we should force
* a restart
*/
args_parent = NULL;
#if CRM_DEPRECATED_SINCE_2_0_4
if(compare_version("1.0.4", caller_version) > 0) {
args_parent = xml_op;
}
#endif
args_xml = create_xml_node(args_parent, XML_TAG_PARAMS);
g_hash_table_foreach(op->params, hash2field, args_xml);
filter_action_parameters(args_xml, caller_version);
digest = calculate_xml_digest(args_xml, TRUE);
crm_xml_add(xml_op, XML_LRM_ATTR_OP_DIGEST, digest);
crm_free(digest);
if(args_parent == NULL) {
free_xml(args_xml);
}
return TRUE;
}
gboolean
is_rsc_active(const char *rsc_id)
{
GList *op_list = NULL;
gboolean active = FALSE;
lrm_rsc_t *the_rsc = NULL;
state_flag_t cur_state = 0;
int max_call_id = -1;
if(fsa_lrm_conn == NULL) {
return FALSE;
}
the_rsc = fsa_lrm_conn->lrm_ops->get_rsc(fsa_lrm_conn, rsc_id);
crm_debug_2("Processing lrm_rsc_t entry %s", rsc_id);
if(the_rsc == NULL) {
crm_err("NULL resource returned from the LRM");
return FALSE;
}
op_list = the_rsc->ops->get_cur_state(the_rsc, &cur_state);
crm_debug_3("\tcurrent state:%s",cur_state==LRM_RSC_IDLE?"Idle":"Busy");
slist_iter(
op, lrm_op_t, op_list, llpc,
crm_debug("Processing op %s_%d (%d) for %s (status=%d, rc=%d)",
op->op_type, op->interval, op->call_id, the_rsc->id,
op->op_status, op->rc);
CRM_ASSERT(max_call_id <= op->call_id);
if(safe_str_eq(op->op_type, CRMD_ACTION_STOP)) {
active = FALSE;
} else if(op->rc == EXECRA_NOT_RUNNING) {
active = FALSE;
} else {
active = TRUE;
}
max_call_id = op->call_id;
lrm_free_op(op);
);
g_list_free(op_list);
lrm_free_rsc(the_rsc);
return active;
}
gboolean
build_active_RAs(crm_data_t *rsc_list)
{
GList *op_list = NULL;
GList *lrm_list = NULL;
gboolean found_op = FALSE;
state_flag_t cur_state = 0;
if(fsa_lrm_conn == NULL) {
return FALSE;
}
lrm_list = fsa_lrm_conn->lrm_ops->get_all_rscs(fsa_lrm_conn);
slist_iter(
rid, char, lrm_list, lpc,
lrm_rsc_t *the_rsc =
fsa_lrm_conn->lrm_ops->get_rsc(fsa_lrm_conn, rid);
crm_data_t *xml_rsc = create_xml_node(
rsc_list, XML_LRM_TAG_RESOURCE);
int max_call_id = -1;
crm_debug("Processing lrm_rsc_t entry %s", rid);
if(the_rsc == NULL) {
crm_err("NULL resource returned from the LRM");
continue;
}
crm_xml_add(xml_rsc, XML_ATTR_ID, the_rsc->id);
crm_xml_add(xml_rsc, XML_ATTR_TYPE, the_rsc->type);
crm_xml_add(xml_rsc, XML_AGENT_ATTR_CLASS, the_rsc->class);
crm_xml_add(xml_rsc, XML_AGENT_ATTR_PROVIDER,the_rsc->provider);
op_list = the_rsc->ops->get_cur_state(the_rsc, &cur_state);
crm_debug_2("\tcurrent state:%s",
cur_state==LRM_RSC_IDLE?"Idle":"Busy");
slist_iter(
op, lrm_op_t, op_list, llpc,
crm_debug_2("Processing op %s for %s (status=%d, rc=%d)",
op->op_type, the_rsc->id, op->op_status, op->rc);
if(max_call_id < op->call_id) {
build_operation_update(
xml_rsc, op, __FUNCTION__, llpc);
} else if(max_call_id > op->call_id) {
crm_err("Bad call_id in list=%d. Previous call_id=%d",
op->call_id, max_call_id);
} else {
crm_warn("lrm->get_cur_state() returned"
" duplicate entries for call_id=%d",
op->call_id);
}
max_call_id = op->call_id;
found_op = TRUE;
lrm_free_op(op);
);
if(found_op == FALSE && g_list_length(op_list) != 0) {
crm_err("Could not properly determin last op"
" for %s from %d entries", the_rsc->id,
g_list_length(op_list));
}
g_list_free(op_list);
lrm_free_rsc(the_rsc);
);
g_list_free(lrm_list);
return TRUE;
}
crm_data_t*
do_lrm_query(gboolean is_replace)
{
gboolean shut_down = FALSE;
crm_data_t *xml_result= NULL;
crm_data_t *xml_state = NULL;
crm_data_t *xml_data = NULL;
crm_data_t *rsc_list = NULL;
const char *exp_state = CRMD_JOINSTATE_MEMBER;
if(is_set(fsa_input_register, R_SHUTDOWN)) {
exp_state = CRMD_STATE_INACTIVE;
shut_down = TRUE;
}
xml_state = create_node_state(
fsa_our_uname, ACTIVESTATUS, XML_BOOLEAN_TRUE,
ONLINESTATUS, CRMD_JOINSTATE_MEMBER, exp_state,
!shut_down, __FUNCTION__);
xml_data = create_xml_node(xml_state, XML_CIB_TAG_LRM);
crm_xml_add(xml_data, XML_ATTR_ID, fsa_our_uuid);
rsc_list = create_xml_node(xml_data, XML_LRM_TAG_RESOURCES);
/* Build a list of active (not always running) resources */
build_active_RAs(rsc_list);
if(is_replace) {
crm_xml_add(xml_state, XML_CIB_ATTR_REPLACE, XML_CIB_TAG_LRM);
}
xml_result = create_cib_fragment(xml_state, XML_CIB_TAG_STATUS);
crm_log_xml_debug_3(xml_state, "Current state of the LRM");
return xml_result;
}
struct recurring_op_s
{
char *rsc_id;
int call_id;
};
static void
cancel_monitor(lrm_rsc_t *rsc, const char *key)
{
struct recurring_op_s *existing_op = NULL;
if(rsc == NULL) {
crm_err("No resource to cancel and operation for");
return;
} else if(key == NULL) {
crm_err("No operation to cancel");
return;
}
existing_op = g_hash_table_lookup(monitors, key);
if(existing_op != NULL) {
crm_debug("Cancelling previous invocation of %s (%d)",
key, existing_op->call_id);
/* cancel it so we can then restart it without conflict */
if(rsc->ops->cancel_op(rsc, existing_op->call_id) != HA_OK) {
crm_info("Couldn't cancel %s (%d)",
key, existing_op->call_id);
} else {
g_hash_table_remove(monitors, key);
}
} else {
crm_debug("No previous invocation of %s", key);
}
}
static lrm_rsc_t *
get_lrm_resource(crm_data_t *resource, crm_data_t *op_msg, gboolean do_create)
{
char rid[64];
lrm_rsc_t *rsc = NULL;
const char *short_id = ID(resource);
const char *long_id = crm_element_value(resource, XML_ATTR_ID_LONG);
crm_debug_2("Retrieving %s from the LRM.", short_id);
CRM_CHECK(short_id != NULL, return NULL);
if(rsc == NULL) {
/* check if its already there (short name) */
strncpy(rid, short_id, 64);
rid[63] = 0;
rsc = fsa_lrm_conn->lrm_ops->get_rsc(fsa_lrm_conn, rid);
}
if(rsc == NULL && long_id != NULL) {
/* try the long name instead */
strncpy(rid, long_id, 64);
rid[63] = 0;
rsc = fsa_lrm_conn->lrm_ops->get_rsc(fsa_lrm_conn, rid);
}
if(rsc == NULL && do_create) {
/* add it to the LRM */
const char *type = crm_element_value(resource, XML_ATTR_TYPE);
const char *class = crm_element_value(resource, XML_AGENT_ATTR_CLASS);
const char *provider = crm_element_value(resource, XML_AGENT_ATTR_PROVIDER);
GHashTable *params = xml2list(op_msg);
CRM_CHECK(class != NULL, return NULL);
CRM_CHECK(type != NULL, return NULL);
crm_debug("Adding rsc %s before operation", short_id);
strncpy(rid, short_id, 64);
rid[63] = 0;
#if CRM_DEPRECATED_SINCE_2_0_3
if(op_msg != NULL) {
if(g_hash_table_lookup(
params, XML_ATTR_CRM_VERSION) == NULL) {
g_hash_table_destroy(params);
params = xml2list_202(op_msg);
}
}
#endif
if(g_hash_table_size(params) == 0) {
crm_log_xml_warn(op_msg, "EmptyParams");
}
fsa_lrm_conn->lrm_ops->add_rsc(
fsa_lrm_conn, rid, class, type, provider, params);
rsc = fsa_lrm_conn->lrm_ops->get_rsc(fsa_lrm_conn, rid);
g_hash_table_destroy(params);
if(rsc == NULL) {
fsa_data_t *msg_data = NULL;
crm_err("Could not add resource %s to LRM", rid);
register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
}
}
return rsc;
}
/* A_LRM_INVOKE */
enum crmd_fsa_input
do_lrm_invoke(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input,
fsa_data_t *msg_data)
{
const char *crm_op = NULL;
const char *from_sys = NULL;
const char *from_host = NULL;
const char *operation = NULL;
enum crmd_fsa_input next_input = I_NULL;
ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg);
crm_op = cl_get_string(input->msg, F_CRM_TASK);
from_sys = cl_get_string(input->msg, F_CRM_SYS_FROM);
if(safe_str_neq(from_sys, CRM_SYSTEM_TENGINE)) {
from_host = cl_get_string(input->msg, F_CRM_HOST_FROM);
}
crm_debug_2("LRM command from: %s", from_sys);
if(safe_str_eq(crm_op, CRM_OP_LRM_DELETE)) {
operation = CRMD_ACTION_DELETE;
} else if(safe_str_eq(operation, CRM_OP_LRM_REFRESH)) {
crm_op = CRM_OP_LRM_REFRESH;
} else if(input->xml != NULL) {
operation = crm_element_value(input->xml, XML_LRM_ATTR_TASK);
}
if(crm_op != NULL && safe_str_eq(crm_op, CRM_OP_LRM_REFRESH)) {
enum cib_errors rc = cib_ok;
crm_data_t *fragment = do_lrm_query(TRUE);
crm_info("Forcing a local LRM refresh");
fsa_cib_update(XML_CIB_TAG_STATUS, fragment,
cib_quorum_override, rc);
free_xml(fragment);
} else if(crm_op != NULL && safe_str_eq(crm_op, CRM_OP_LRM_QUERY)) {
crm_data_t *data = do_lrm_query(FALSE);
HA_Message *reply = create_reply(input->msg, data);
if(relay_message(reply, TRUE) == FALSE) {
crm_err("Unable to route reply");
crm_log_message(LOG_ERR, reply);
crm_msg_del(reply);
}
free_xml(data);
} else if(safe_str_eq(operation, CRM_OP_PROBED)
|| safe_str_eq(crm_op, CRM_OP_REPROBE)) {
const char *probed = XML_BOOLEAN_TRUE;
if(safe_str_eq(crm_op, CRM_OP_REPROBE)) {
probed = XML_BOOLEAN_FALSE;
}
update_attr(fsa_cib_conn, cib_none, XML_CIB_TAG_STATUS,
fsa_our_uuid, NULL, NULL, CRM_OP_PROBED, probed);
} else if(operation != NULL) {
lrm_rsc_t *rsc = NULL;
crm_data_t *params = NULL;
gboolean create_rsc = TRUE;
crm_data_t *xml_rsc = find_xml_node(
input->xml, XML_CIB_TAG_RESOURCE, TRUE);
CRM_CHECK(xml_rsc != NULL, return I_NULL);
/* only the first 16 chars are used by the LRM */
params = find_xml_node(input->xml, XML_TAG_ATTRS,TRUE);
if(safe_str_eq(operation, CRMD_ACTION_STOP)) {
create_rsc = FALSE;
}
rsc = get_lrm_resource(xml_rsc, input->xml, create_rsc);
if(rsc == NULL && create_rsc) {
crm_err("Invalid resource definition");
crm_log_xml_warn(input->xml, "Bad command");
} else if(rsc == NULL) {
lrm_op_t* op = NULL;
crm_err("Not creating resource for a stop event");
crm_log_xml_warn(input->xml, "Bad command");
op = construct_op(input->xml, ID(xml_rsc), operation);
op->op_status = LRM_OP_DONE;
op->rc = 0;
CRM_ASSERT(op != NULL);
send_direct_ack(from_host, from_sys, op, ID(xml_rsc));
free_lrm_op(op);
} else if(safe_str_eq(operation, CRMD_ACTION_CANCEL)) {
lrm_op_t* op = NULL;
char *op_key = NULL;
const char *op_task = NULL;
const char *op_interval = NULL;
CRM_CHECK(params != NULL,
crm_log_xml_warn(input->xml, "Bad command");
return I_NULL);
op_task = crm_element_value(params, crm_meta_name(XML_LRM_ATTR_TASK));
op_interval = crm_element_value(params, crm_meta_name("interval"));
#if CRM_DEPRECATED_SINCE_2_0_5
if(op_interval == NULL) {
op_interval = crm_element_value(params, "interval");
}
if(op_task == NULL) {
op_task = crm_element_value(params, XML_LRM_ATTR_TASK);
if(op_task == NULL) {
op_task = crm_element_value(params, "task");
}
}
#endif
CRM_CHECK(op_task != NULL,
crm_log_xml_warn(input->xml, "Bad command");
return I_NULL);
CRM_CHECK(op_interval != NULL,
crm_log_xml_warn(input->xml, "Bad command");
return I_NULL);
op = construct_op(input->xml, rsc->id, op_task);
CRM_ASSERT(op != NULL);
op_key = generate_op_key(
rsc->id,op_task,crm_parse_int(op_interval,"0"));
cancel_monitor(rsc, op_key);
op->op_status = LRM_OP_DONE;
op->rc = EXECRA_OK;
send_direct_ack(from_host, from_sys, op, rsc->id);
crm_free(op_key);
free_lrm_op(op);
} else if(safe_str_eq(operation, CRMD_ACTION_DELETE)) {
int rc = HA_OK;
lrm_op_t* op = NULL;
op = construct_op(input->xml, rsc->id, operation);
CRM_ASSERT(op != NULL);
op->op_status = LRM_OP_DONE;
op->rc = EXECRA_OK;
if(rsc == NULL) {
crm_debug("Resource %s was already removed",
rsc->id);
} else {
crm_info("Removing resource %s from the LRM",
rsc->id);
rc = fsa_lrm_conn->lrm_ops->delete_rsc(
fsa_lrm_conn, rsc->id);
if(rc != HA_OK) {
crm_err("Failed to remove resource %s",
rsc->id);
op->op_status = LRM_OP_ERROR;
op->rc = EXECRA_UNKNOWN_ERROR;
}
}
send_direct_ack(from_host, from_sys, op, rsc->id);
free_lrm_op(op);
} else if(rsc != NULL) {
next_input = do_lrm_rsc_op(
rsc, operation, input->xml, input->msg);
}
lrm_free_rsc(rsc);
} else {
crm_err("Operation was neither a lrm_query, nor a rsc op. %s",
crm_str(crm_op));
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
}
return next_input;
}
lrm_op_t *
construct_op(crm_data_t *rsc_op, const char *rsc_id, const char *operation)
{
lrm_op_t *op = NULL;
const char *op_delay = NULL;
const char *op_timeout = NULL;
const char *op_interval = NULL;
const char *transition = NULL;
CRM_DEV_ASSERT(rsc_id != NULL);
crm_malloc0(op, sizeof(lrm_op_t));
op->op_type = crm_strdup(operation);
op->op_status = LRM_OP_PENDING;
op->rc = -1;
op->rsc_id = crm_strdup(rsc_id);
op->interval = 0;
op->timeout = 0;
op->start_delay = 0;
op->app_name = crm_strdup(CRM_SYSTEM_CRMD);
if(rsc_op == NULL) {
CRM_DEV_ASSERT(safe_str_eq(CRMD_ACTION_STOP, operation));
op->user_data = NULL;
op->user_data_len = 0;
/* the stop_all_resources() case
* by definition there is no DC (or they'd be shutting
* us down).
* So we should put our version here.
*/
op->params = g_hash_table_new_full(
g_str_hash, g_str_equal,
g_hash_destroy_str, g_hash_destroy_str);
g_hash_table_insert(op->params,
crm_strdup(XML_ATTR_CRM_VERSION),
crm_strdup(CRM_FEATURE_SET));
crm_debug_2("Constructed %s op for %s", operation, rsc_id);
return op;
}
op->params = xml2list(rsc_op);
#if CRM_DEPRECATED_SINCE_2_0_3
if(g_hash_table_lookup(op->params, XML_ATTR_CRM_VERSION) == NULL) {
g_hash_table_destroy(op->params);
op->params = xml2list_202(rsc_op);
}
#endif
if(op->params == NULL) {
CRM_DEV_ASSERT(safe_str_eq(CRMD_ACTION_STOP, operation));
}
op_delay = g_hash_table_lookup(op->params, crm_meta_name("start_delay"));
op_timeout = g_hash_table_lookup(op->params, crm_meta_name("timeout"));
op_interval = g_hash_table_lookup(op->params, crm_meta_name("interval"));
#if CRM_DEPRECATED_SINCE_2_0_5
if(op_delay == NULL) {
op_delay = g_hash_table_lookup(op->params, "start_delay");
}
if(op_timeout == NULL) {
op_timeout = g_hash_table_lookup(op->params, "timeout");
}
if(op_interval == NULL) {
op_interval = g_hash_table_lookup(op->params, "interval");
}
#endif
op->interval = crm_parse_int(op_interval, "0");
op->timeout = crm_parse_int(op_timeout, "0");
op->start_delay = crm_parse_int(op_delay, "0");
/* sanity */
if(op->interval < 0) {
op->interval = 0;
}
if(op->timeout < 0) {
op->timeout = 0;
}
if(op->start_delay < 0) {
op->start_delay = 0;
}
transition = crm_element_value(rsc_op, XML_ATTR_TRANSITION_KEY);
CRM_CHECK(transition != NULL, return op);
op->user_data = crm_strdup(transition);
op->user_data_len = 1+strlen(op->user_data);
if(op->interval != 0) {
if(safe_str_eq(operation, CRMD_ACTION_START)
|| safe_str_eq(operation, CRMD_ACTION_STOP)) {
crm_err("Start and Stop actions cannot have an interval");
op->interval = 0;
}
}
crm_debug_2("Constructed %s op for %s: interval=%d",
operation, rsc_id, op->interval);
return op;
}
void
send_direct_ack(const char *to_host, const char *to_sys,
lrm_op_t* op, const char *rsc_id)
{
HA_Message *reply = NULL;
crm_data_t *update, *iter;
crm_data_t *fragment;
CRM_DEV_ASSERT(op != NULL);
if(crm_assert_failed) {
return;
}
if(op->rsc_id == NULL) {
CRM_DEV_ASSERT(rsc_id != NULL);
op->rsc_id = crm_strdup(rsc_id);
}
if(to_sys == NULL) {
to_sys = CRM_SYSTEM_TENGINE;
}
crm_info("ACK'ing resource op: %s for %s", op->op_type, op->rsc_id);
update = create_node_state(
fsa_our_uname, NULL, NULL, NULL, NULL, NULL, FALSE, __FUNCTION__);
iter = create_xml_node(update, XML_CIB_TAG_LRM);
crm_xml_add(iter, XML_ATTR_ID, fsa_our_uuid);
iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES);
iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE);
crm_xml_add(iter, XML_ATTR_ID, op->rsc_id);
build_operation_update(iter, op, __FUNCTION__, 0);
fragment = create_cib_fragment(update, XML_CIB_TAG_STATUS);
reply = create_request(CRM_OP_INVOKE_LRM, fragment, to_host,
to_sys, CRM_SYSTEM_LRMD, NULL);
crm_debug("Sending ACK: %s", cl_get_string(reply, XML_ATTR_REFERENCE));
crm_log_xml_debug_2(update, "ACK Update");
crm_log_message_adv(LOG_DEBUG_3, "ACK Reply", reply);
if(relay_message(reply, TRUE) == FALSE) {
crm_log_message_adv(LOG_ERR, "Unable to route reply", reply);
crm_msg_del(reply);
}
free_xml(fragment);
free_xml(update);
}
enum crmd_fsa_input
do_lrm_rsc_op(lrm_rsc_t *rsc, const char *operation,
crm_data_t *msg, HA_Message *request)
{
int call_id = 0;
char *op_id = NULL;
lrm_op_t* op = NULL;
fsa_data_t *msg_data = NULL;
const char *transition = NULL;
CRM_CHECK(rsc != NULL, return I_NULL);
if(msg != NULL) {
transition = crm_element_value(msg, XML_ATTR_TRANSITION_KEY);
if(transition == NULL) {
crm_err("Missing transition");
crm_log_message(LOG_ERR, msg);
}
}
/* stop the monitor before stopping the resource */
if(safe_str_eq(operation, CRMD_ACTION_STOP)) {
g_hash_table_foreach(monitors, stop_recurring_action, rsc);
g_hash_table_foreach_remove(
monitors, remove_recurring_action, rsc);
}
/* now do the op */
op = construct_op(msg, rsc->id, operation);
crm_info("Performing op %s on %s (interval=%dms, key=%s)",
operation, rsc->id, op->interval, transition);
if((AM_I_DC == FALSE && fsa_state != S_NOT_DC)
|| (AM_I_DC && fsa_state != S_TRANSITION_ENGINE)) {
if(safe_str_neq(operation, CRMD_ACTION_STOP)) {
crm_info("Discarding attempt to perform action %s on %s"
" in state %s", operation, rsc->id,
fsa_state2string(fsa_state));
op->rc = 99;
op->op_status = LRM_OP_ERROR;
send_direct_ack(NULL, NULL, op, rsc->id);
free_lrm_op(op);
crm_free(op_id);
return I_NULL;
}
}
op_id = generate_op_key(rsc->id, op->op_type, op->interval);
if(op->interval > 0) {
cancel_monitor(rsc, op_id);
op->target_rc = CHANGED;
} else {
op->target_rc = EVERYTIME;
}
g_hash_table_replace(resources,crm_strdup(rsc->id), crm_strdup(op_id));
call_id = rsc->ops->perform_op(rsc, op);
if(call_id <= 0) {
crm_err("Operation %s on %s failed: %d",
operation, rsc->id, call_id);
register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
} else {
/* record all operations so we can wait
* for them to complete during shutdown
*/
char *call_id_s = make_stop_id(rsc->id, call_id);
g_hash_table_replace(
shutdown_ops, call_id_s, crm_strdup(rsc->id));
crm_debug_2("Recording pending op: %s/%s %s",
rsc->id, operation, call_id_s);
if(op->interval > 0) {
struct recurring_op_s *op = NULL;
crm_malloc0(op, sizeof(struct recurring_op_s));
crm_debug_2("Adding recurring %s op for %s (%d)",
op_id, rsc->id, call_id);
op->call_id = call_id;
op->rsc_id = crm_strdup(rsc->id);
g_hash_table_insert(monitors, op_id, op);
op_id = NULL;
}
}
crm_free(op_id);
free_lrm_op(op);
return I_NULL;
}
void
stop_recurring_action(gpointer key, gpointer value, gpointer user_data)
{
lrm_rsc_t *rsc = user_data;
struct recurring_op_s *op = (struct recurring_op_s*)value;
if(safe_str_eq(op->rsc_id, rsc->id)) {
if(op->call_id > 0) {
crm_debug("Stopping recurring op %d for %s (%s)",
op->call_id, rsc->id, (char*)key);
rsc->ops->cancel_op(rsc, op->call_id);
} else {
crm_err("Invalid call_id %d for %s",
op->call_id, rsc->id);
}
}
}
gboolean
remove_recurring_action(gpointer key, gpointer value, gpointer user_data)
{
lrm_rsc_t *rsc = user_data;
struct recurring_op_s *op = (struct recurring_op_s*)value;
if(safe_str_eq(op->rsc_id, rsc->id)) {
return TRUE;
}
return FALSE;
}
void
free_recurring_op(gpointer value)
{
struct recurring_op_s *op = (struct recurring_op_s*)value;
crm_free(op->rsc_id);
crm_free(op);
}
void
free_lrm_op(lrm_op_t *op)
{
g_hash_table_destroy(op->params);
crm_free(op->user_data);
crm_free(op->output);
crm_free(op->rsc_id);
crm_free(op->op_type);
crm_free(op->app_name);
crm_free(op);
}
static void dup_attr(gpointer key, gpointer value, gpointer user_data)
{
g_hash_table_replace(user_data, crm_strdup(key), crm_strdup(value));
}
lrm_op_t *
copy_lrm_op(const lrm_op_t *op)
{
lrm_op_t *op_copy = NULL;
CRM_DEV_ASSERT(op != NULL);
if(crm_assert_failed) {
return NULL;
}
CRM_ASSERT(op->rsc_id != NULL);
crm_malloc0(op_copy, sizeof(lrm_op_t));
op_copy->op_type = crm_strdup(op->op_type);
/* input fields */
op_copy->params = g_hash_table_new_full(
g_str_hash, g_str_equal,
g_hash_destroy_str, g_hash_destroy_str);
if(op->params != NULL) {
g_hash_table_foreach(op->params, dup_attr, op_copy->params);
}
op_copy->timeout = op->timeout;
op_copy->interval = op->interval;
op_copy->target_rc = op->target_rc;
/* in the CRM, this is always a string */
if(op->user_data != NULL) {
op_copy->user_data = crm_strdup(op->user_data);
}
/* output fields */
op_copy->op_status = op->op_status;
op_copy->rc = op->rc;
op_copy->call_id = op->call_id;
op_copy->output = NULL;
op_copy->rsc_id = crm_strdup(op->rsc_id);
if(op->app_name != NULL) {
op_copy->app_name = crm_strdup(op->app_name);
}
if(op->output != NULL) {
op_copy->output = crm_strdup(op->output);
}
return op_copy;
}
lrm_rsc_t *
copy_lrm_rsc(const lrm_rsc_t *rsc)
{
lrm_rsc_t *rsc_copy = NULL;
if(rsc == NULL) {
return NULL;
}
crm_malloc0(rsc_copy, sizeof(lrm_rsc_t));
rsc_copy->id = crm_strdup(rsc->id);
rsc_copy->type = crm_strdup(rsc->type);
rsc_copy->class = NULL;
rsc_copy->provider = NULL;
if(rsc->class != NULL) {
rsc_copy->class = crm_strdup(rsc->class);
}
if(rsc->provider != NULL) {
rsc_copy->provider = crm_strdup(rsc->provider);
}
/* GHashTable* params; */
rsc_copy->params = NULL;
rsc_copy->ops = NULL;
return rsc_copy;
}
static void
cib_rsc_callback(const HA_Message *msg, int call_id, int rc,
crm_data_t *output, void *user_data)
{
if(rc != cib_ok) {
crm_err("Resource update %d failed: %s",
call_id, cib_error2string(rc));
} else {
crm_debug("Resource update %d complete", call_id);
}
}
void
do_update_resource(lrm_op_t* op)
{
/*
<status>
<nodes_status id=uname>
<lrm>
<lrm_resources>
<lrm_resource id=...>
</...>
*/
int rc = cib_ok;
crm_data_t *update, *iter;
CRM_CHECK(op != NULL, return);
update = create_node_state(
fsa_our_uname, NULL, NULL, NULL, NULL, NULL, FALSE, __FUNCTION__);
iter = create_xml_node(update, XML_CIB_TAG_LRM);
crm_xml_add(iter, XML_ATTR_ID, fsa_our_uuid);
iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES);
iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE);
crm_xml_add(iter, XML_ATTR_ID, op->rsc_id);
if(op->interval == 0) {
lrm_rsc_t *rsc = NULL;
crm_debug_2("Updating %s resource definitions after %s op",
op->rsc_id, op->op_type);
rsc = fsa_lrm_conn->lrm_ops->get_rsc(fsa_lrm_conn, op->rsc_id);
crm_xml_add(iter, XML_ATTR_TYPE, rsc->type);
crm_xml_add(iter, XML_AGENT_ATTR_CLASS, rsc->class);
crm_xml_add(iter, XML_AGENT_ATTR_PROVIDER,rsc->provider);
lrm_free_rsc(rsc);
}
build_operation_update(iter, op, __FUNCTION__, 0);
/* make it an asyncronous call and be done with it
*
* Best case:
* the resource state will be discovered during
* the next signup or election.
*
* Bad case:
* we are shutting down and there is no DC at the time,
* but then why were we shutting down then anyway?
* (probably because of an internal error)
*
* Worst case:
* we get shot for having resources "running" when the really weren't
*
* the alternative however means blocking here for too long, which
* isnt acceptable
*/
fsa_cib_update(XML_CIB_TAG_STATUS, update, cib_quorum_override, rc);
if(rc > 0) {
/* the return code is a call number, not an error code */
crm_debug("Sent resource state update message: %d", rc);
add_cib_op_callback(rc, FALSE, NULL, cib_rsc_callback);
} else {
crm_err("Resource state update failed: %s",
cib_error2string(rc));
- CRM_DEV_ASSERT(rc == cib_ok);
}
free_xml(update);
}
enum crmd_fsa_input
do_lrm_event(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input cur_input,
fsa_data_t *msg_data)
{
lrm_op_t *op = NULL;
CRM_CHECK(msg_data->fsa_cause == C_LRM_OP_CALLBACK, return I_NULL);
op = fsa_typed_data(fsa_dt_lrm);
process_lrm_event(op);
return I_NULL;
}
gboolean
process_lrm_event(lrm_op_t *op)
{
CRM_CHECK(op != NULL, return I_NULL);
CRM_CHECK(op->rsc_id != NULL, return I_NULL);
if(op->rc == 8 || op->rc == 7) {
/* Leave it up to the TE/PE to decide if this is an error */
op->op_status = LRM_OP_DONE;
}
switch(op->op_status) {
case LRM_OP_PENDING:
/* this really shouldnt happen */
crm_err("LRM operation (%d) %s_%d on %s %s: %s",
op->call_id, op->op_type,
op->interval,
crm_str(op->rsc_id),
op_status2text(op->op_status),
execra_code2string(op->rc));
break;
case LRM_OP_ERROR:
crm_err("LRM operation (%d) %s_%d on %s %s: (%d) %s",
op->call_id, op->op_type,
op->interval,
crm_str(op->rsc_id),
op_status2text(op->op_status),
op->rc, execra_code2string(op->rc));
if(op->output != NULL) {
crm_debug("Result: %s", op->output);
}
break;
case LRM_OP_CANCELLED:
crm_warn("LRM operation (%d) %s_%d on %s %s",
op->call_id, op->op_type,
op->interval,
crm_str(op->rsc_id),
op_status2text(op->op_status));
break;
case LRM_OP_TIMEOUT:
crm_err("LRM operation (%d) %s_%d on %s %s",
op->call_id, op->op_type,
op->interval,
crm_str(op->rsc_id),
op_status2text(op->op_status));
break;
case LRM_OP_NOTSUPPORTED:
crm_err("LRM operation (%d) %s_%d on %s %s",
op->call_id, op->op_type,
op->interval,
crm_str(op->rsc_id),
op_status2text(op->op_status));
break;
case LRM_OP_DONE:
crm_info("LRM operation (%d) %s_%d on %s %s",
op->call_id, op->op_type,
op->interval,
crm_str(op->rsc_id),
op_status2text(op->op_status));
break;
}
if(op->op_status != LRM_OP_CANCELLED) {
do_update_resource(op);
if(op->interval > 0) {
crm_debug("Op %d %s_%s_%d returned",
op->call_id, op->rsc_id, op->op_type,
op->interval);
return TRUE;
}
} else if(op->interval == 0) {
crm_err("No update sent for cancelled op %d: %s_%s_%d",
op->call_id, op->rsc_id, op->op_type, op->interval);
}
if(g_hash_table_size(shutdown_ops) > 0) {
char *op_id = make_stop_id(op->rsc_id, op->call_id);
if(g_hash_table_remove(shutdown_ops, op_id)) {
crm_debug_2("Op %d %s_%s_%d is confirmed",
op->call_id, op->rsc_id, op->op_type,
op->interval);
crm_free(op_id);
return TRUE;
}
crm_free(op_id);
}
crm_err("Op %d %s_%s_%d not matched",
op->call_id, op->rsc_id, op->op_type, op->interval);
return TRUE;
}
char *
make_stop_id(const char *rsc, int call_id)
{
char *op_id = NULL;
crm_malloc0(op_id, strlen(rsc) + 34);
if(op_id != NULL) {
snprintf(op_id, strlen(rsc) + 34, "%s:%d", rsc, call_id);
}
return op_id;
}
gboolean
resource_stopped(gpointer key, gpointer value, gpointer user_data)
{
const char *this_rsc = value;
const char *target_rsc = user_data;
if(safe_str_eq(this_rsc, target_rsc)) {
return TRUE;
}
return FALSE;
}
diff --git a/crm/tengine/events.c b/crm/tengine/events.c
index 96a8f1fc52..06b6b6939a 100644
--- a/crm/tengine/events.c
+++ b/crm/tengine/events.c
@@ -1,562 +1,550 @@
-/* $Id: events.c,v 1.21 2006/06/22 12:58:45 andrew Exp $ */
+/* $Id: events.c,v 1.22 2006/08/14 08:52:30 andrew Exp $ */
/*
* Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <portability.h>
#include <sys/param.h>
#include <crm/crm.h>
#include <crm/cib.h>
#include <crm/msg_xml.h>
#include <crm/common/msg.h>
#include <crm/common/xml.h>
#include <tengine.h>
#include <heartbeat.h>
#include <clplumbing/Gmain_timeout.h>
#include <lrm/lrm_api.h>
crm_data_t *need_abort(crm_data_t *update);
void process_graph_event(crm_data_t *event, const char *event_node);
int match_graph_event(
crm_action_t *action, crm_data_t *event, const char *event_node);
crm_data_t *
need_abort(crm_data_t *update)
{
crm_data_t *section_xml = NULL;
const char *section = NULL;
if(update == NULL) {
return NULL;
}
section = XML_CIB_TAG_NODES;
section_xml = get_object_root(section, update);
xml_child_iter(section_xml, child,
return section_xml;
);
section = XML_CIB_TAG_RESOURCES;
section_xml = get_object_root(section, update);
xml_child_iter(section_xml, child,
return section_xml;
);
section = XML_CIB_TAG_CONSTRAINTS;
section_xml = get_object_root(section, update);
xml_child_iter(section_xml, child,
return section_xml;
);
section = XML_CIB_TAG_CRMCONFIG;
section_xml = get_object_root(section, update);
xml_child_iter(section_xml, child,
return section_xml;
);
return NULL;
}
static gboolean
fail_incompletable_actions(crm_graph_t *graph, const char *down_node)
{
const char *target = NULL;
crm_data_t *last_action = NULL;
slist_iter(
synapse, synapse_t, graph->synapses, lpc,
if (synapse->confirmed) {
continue;
}
slist_iter(
action, crm_action_t, synapse->actions, lpc,
if(action->type == action_type_pseudo || action->confirmed) {
continue;
}
target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
if(safe_str_eq(target, down_node)) {
action->failed = TRUE;
last_action = action->xml;
update_graph(graph, action);
crm_notice("Action %d (%s) is scheduled for %s (offline)",
action->id, ID(action->xml), down_node);
}
);
);
if(last_action != NULL) {
crm_warn("Node %s shutdown resulted in un-runnable actions", down_node);
abort_transition(INFINITY, tg_restart, "Node failure", last_action);
return TRUE;
}
return FALSE;
}
gboolean
extract_event(crm_data_t *msg)
{
int shutdown = 0;
const char *event_node = NULL;
/*
[cib fragment]
...
<status>
<node_state id="node1" state=CRMD_STATE_ACTIVE exp_state="active">
<lrm>
<lrm_resources>
<rsc_state id="" rsc_id="rsc4" node_id="node1" rsc_state="stopped"/>
*/
crm_debug_4("Extracting event from %s", crm_element_name(msg));
xml_child_iter_filter(
msg, node_state, XML_CIB_TAG_STATE,
crm_data_t *attrs = NULL;
crm_data_t *resources = NULL;
const char *ccm_state = crm_element_value(
node_state, XML_CIB_ATTR_INCCM);
const char *crmd_state = crm_element_value(
node_state, XML_CIB_ATTR_CRMDSTATE);
/* Transient node attribute changes... */
event_node = crm_element_value(node_state, XML_ATTR_ID);
crm_debug("Processing state update from %s", event_node);
crm_log_xml_debug_3(node_state,"Processing");
attrs = find_xml_node(
node_state, XML_TAG_TRANSIENT_NODEATTRS, FALSE);
if(attrs != NULL) {
crm_info("Aborting on "XML_TAG_TRANSIENT_NODEATTRS" changes");
abort_transition(INFINITY, tg_restart,
XML_TAG_TRANSIENT_NODEATTRS, attrs);
}
resources = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE);
resources = find_xml_node(
resources, XML_LRM_TAG_RESOURCES, FALSE);
/* LRM resource update... */
xml_child_iter(
resources, rsc,
xml_child_iter(
rsc, rsc_op,
crm_log_xml_debug_3(
rsc_op, "Processing resource update");
process_graph_event(rsc_op, event_node);
);
);
/*
* node state update... possibly from a shutdown we requested
*/
if(safe_str_eq(ccm_state, XML_BOOLEAN_FALSE)
|| safe_str_eq(crmd_state, CRMD_JOINSTATE_DOWN)) {
crm_action_t *shutdown = NULL;
crm_debug_3("A shutdown we requested?");
shutdown = match_down_event(0, event_node, NULL);
if(shutdown != NULL) {
update_graph(transition_graph, shutdown);
trigger_graph();
} else {
crm_info("Stonith/shutdown event not matched");
abort_transition(INFINITY, tg_restart,
"Node failure", node_state);
}
fail_incompletable_actions(transition_graph, event_node);
}
shutdown = 0;
ha_msg_value_int(node_state, XML_CIB_ATTR_SHUTDOWN, &shutdown);
if(shutdown != 0) {
crm_info("Aborting on "XML_CIB_ATTR_SHUTDOWN" attribute");
abort_transition(INFINITY, tg_restart,
"Shutdown request", node_state);
}
);
return TRUE;
}
static void
update_failcount(crm_action_t *action, int rc)
{
crm_data_t *rsc = NULL;
char *attr_name = NULL;
const char *task = NULL;
const char *rsc_id = NULL;
const char *on_node = NULL;
const char *on_uuid = NULL;
const char *interval = NULL;
if(rc == 99) {
/* this is an internal code for "we're busy, try again" */
return;
}
interval = g_hash_table_lookup(
action->params, crm_meta_name("interval"));
if(interval == NULL) {
return;
}
CRM_CHECK(action->xml != NULL, return);
rsc = find_xml_node(action->xml, XML_CIB_TAG_RESOURCE, TRUE);
CRM_CHECK(rsc != NULL, return);
rsc_id = ID(rsc);
CRM_CHECK(rsc_id != NULL, return);
task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
on_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
CRM_CHECK(task != NULL, return);
CRM_CHECK(on_uuid != NULL, return);
CRM_CHECK(on_node != NULL, return);
attr_name = crm_concat("fail-count", rsc_id, '-');
crm_warn("Updating failcount for %s on %s after failed %s: rc=%d",
rsc_id, on_node, task, rc);
update_attr(te_cib_conn, cib_none, XML_CIB_TAG_STATUS,
on_uuid, NULL,NULL, attr_name, XML_NVPAIR_ATTR_VALUE"++");
crm_free(attr_name);
}
/*
* returns the ID of the action if a match is found
* returns -1 if a match was not found
* returns -2 if a match was found but the action failed (and was
* not allowed to)
*/
int
match_graph_event(
crm_action_t *action, crm_data_t *event, const char *event_node)
{
int log_level_fail = LOG_ERR;
int target_rc = 0;
const char *target_rc_s = NULL;
const char *allow_fail = NULL;
const char *this_action = NULL;
const char *this_node = NULL;
const char *this_uname = NULL;
const char *magic = NULL;
const char *this_event;
char *update_te_uuid = NULL;
const char *update_event;
int op_status_i = -3;
int op_rc_i = -3;
int transition_i = -1;
CRM_CHECK(event != NULL, return -1);
crm_debug_3("Processing \"%s\" change", crm_element_name(event));
update_event = crm_element_value(event, XML_ATTR_ID);
magic = crm_element_value(event, XML_ATTR_TRANSITION_MAGIC);
CRM_CHECK(magic != NULL, return -2);
this_action = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
this_uname = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
this_event = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
this_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
CRM_CHECK(this_event != NULL, return -2);
if(safe_str_neq(this_event, update_event)) {
crm_debug_2("Action %d : Event mismatch %s vs. %s",
action->id, this_event, update_event);
return -1;
} else if(safe_str_neq(this_node, event_node)) {
crm_debug_2("Action %d : Node mismatch %s (%s) vs. %s",
action->id, this_node, this_uname, event_node);
return -1;
}
crm_debug_2("Matched action (%d) %s", action->id, this_event);
CRM_CHECK(decode_transition_magic(
magic, &update_te_uuid,
&transition_i, &op_status_i, &op_rc_i), return -2);
if(transition_i == -1) {
/* we never expect these - recompute */
crm_err("Detected action %s initiated outside of a transition",
this_event);
crm_log_message(LOG_ERR, event);
return -2;
} else if(safe_str_neq(update_te_uuid, te_uuid)) {
crm_info("Detected action %s from a different transitioner:"
" %s vs. %s", this_event, update_te_uuid, te_uuid);
crm_log_message(LOG_INFO, event);
return -3;
} else if(transition_graph->id != transition_i) {
crm_warn("Detected an action %s from a different transition:"
" %d vs. %d", this_event, transition_i,
transition_graph->id);
crm_log_message(LOG_INFO, event);
return -4;
}
/* stop this event's timer if it had one */
stop_te_timer(action->timer);
action->confirmed = TRUE;
target_rc_s = g_hash_table_lookup(
action->params,crm_meta_name(XML_ATTR_TE_TARGET_RC));
if(target_rc_s != NULL) {
crm_debug_2("Target rc: %s vs. %d", target_rc_s, op_rc_i);
target_rc = crm_parse_int(target_rc_s, NULL);
if(target_rc == op_rc_i) {
crm_debug_2("Target rc: == %d", op_rc_i);
if(op_status_i != LRM_OP_DONE) {
crm_debug_2("Re-mapping op status to"
" LRM_OP_DONE for %s",update_event);
op_status_i = LRM_OP_DONE;
}
} else {
crm_debug_2("Target rc: != %d", op_rc_i);
if(op_status_i != LRM_OP_ERROR) {
crm_info("Re-mapping op status to"
" LRM_OP_ERROR for %s", update_event);
op_status_i = LRM_OP_ERROR;
}
}
}
/* Process OP status */
switch(op_status_i) {
case -3:
crm_err("Action returned the same as last time..."
" whatever that was!");
crm_log_message(LOG_ERR, event);
break;
case LRM_OP_PENDING:
crm_debug("Ignoring pending operation");
return -5;
break;
case LRM_OP_DONE:
break;
case LRM_OP_ERROR:
/* This is the code we use for direct nack's */
if(op_rc_i == 99) {
log_level_fail = LOG_WARNING;
}
/* fall through */
case LRM_OP_TIMEOUT:
case LRM_OP_NOTSUPPORTED:
action->failed = TRUE;
crm_log_maybe(log_level_fail,
"Action %s on %s failed (target: %d vs. rc: %d): %s",
update_event, this_uname, target_rc,
op_rc_i, op_status2text(op_status_i));
break;
case LRM_OP_CANCELLED:
/* do nothing?? */
crm_err("Dont know what to do for cancelled ops yet");
break;
default:
action->failed = TRUE;
crm_err("Unsupported action result: %d", op_status_i);
}
update_graph(transition_graph, action);
trigger_graph();
if(action->failed) {
allow_fail = g_hash_table_lookup(
action->params, crm_meta_name(XML_ATTR_TE_ALLOWFAIL));
if(crm_is_true(allow_fail)) {
action->failed = FALSE;
}
}
if(action->failed) {
/* ignore probes */
if(target_rc != EXECRA_NOT_RUNNING) {
update_failcount(action, op_rc_i);
}
abort_transition(action->synapse->priority+1,
tg_restart, "Event failed", event);
} else if(transition_graph->complete) {
abort_transition(INFINITY, tg_restart,"No active graph", event);
}
te_log_action(LOG_INFO, "Action %s (%d) confirmed",
this_event, action->id);
return action->id;
}
crm_action_t *
match_down_event(int id, const char *target, const char *filter)
{
const char *this_action = NULL;
const char *this_node = NULL;
crm_action_t *match = NULL;
slist_iter(
synapse, synapse_t, transition_graph->synapses, lpc,
/* lookup event */
slist_iter(
action, crm_action_t, synapse->actions, lpc2,
if(id > 0 && action->id == id) {
match = action;
break;
}
this_action = crm_element_value(
action->xml, XML_LRM_ATTR_TASK);
if(action->type != action_type_crm) {
continue;
} else if(safe_str_eq(this_action, CRM_OP_LRM_REFRESH)){
continue;
} else if(filter != NULL
&& safe_str_neq(this_action, filter)) {
continue;
}
this_node = crm_element_value(
action->xml, XML_LRM_ATTR_TARGET_UUID);
if(this_node == NULL) {
crm_log_xml_err(action->xml, "No node uuid");
}
if(safe_str_neq(this_node, target)) {
crm_debug("Action %d : Node mismatch: %s",
action->id, this_node);
continue;
}
match = action;
break;
);
if(match != NULL) {
/* stop this event's timer if it had one */
break;
}
);
if(match != NULL) {
/* stop this event's timer if it had one */
crm_debug("Match found for action %d: %s on %s", id,
crm_element_value(match->xml, XML_LRM_ATTR_TASK_KEY),
target);
stop_te_timer(match->timer);
match->confirmed = TRUE;
} else if(id > 0) {
crm_err("No match for action %d", id);
} else {
crm_warn("No match for shutdown action on %s", target);
}
return match;
}
void
process_graph_event(crm_data_t *event, const char *event_node)
{
int rc = -1;
- int op_status_i = 0;
const char *magic = NULL;
const char *rsc_id = NULL;
- const char *op_status = NULL;
CRM_ASSERT(event != NULL);
rsc_id = crm_element_value(event, XML_ATTR_ID);
- op_status = crm_element_value(event, XML_LRM_ATTR_OPSTATUS);
magic = crm_element_value(event, XML_ATTR_TRANSITION_MAGIC);
- if(op_status != NULL) {
- op_status_i = crm_parse_int(op_status, NULL);
- if(op_status_i == LRM_OP_PENDING) {
- /* just information that the action was sent */
- crm_debug_2("Ignoring TE initiated updates");
- return;
- }
- }
-
if(magic == NULL) {
crm_log_xml_debug_2(event, "Skipping \"non-change\"");
return;
} else {
crm_debug_2("Processing CIB update: %s on %s: %s",
rsc_id, event_node, magic);
}
slist_iter(
synapse, synapse_t, transition_graph->synapses, lpc,
/* lookup event */
slist_iter(
action, crm_action_t, synapse->actions, lpc2,
rc = match_graph_event(action, event, event_node);
if(rc >= 0) {
crm_log_xml_debug_2(event, "match:found");
} else if(rc == -5) {
crm_log_xml_debug_2(event, "match:pending");
} else if(rc != -1) {
crm_warn("Search for %s terminated: %d",
ID(event), rc);
abort_transition(INFINITY, tg_restart,
"Unexpected event", event);
}
if(rc != -1) {
return;
}
);
);
/* unexpected event, trigger a pe-recompute */
/* possibly do this only for certain types of actions */
crm_warn("Event not found.");
crm_log_xml_info(event, "match:not-found");
abort_transition(INFINITY, tg_restart, "Unexpected event", event);
return;
}
diff --git a/crm/tengine/utils.c b/crm/tengine/utils.c
index 189636d1b4..25c40f7469 100644
--- a/crm/tengine/utils.c
+++ b/crm/tengine/utils.c
@@ -1,141 +1,94 @@
-/* $Id: utils.c,v 1.59 2006/04/19 12:24:59 andrew Exp $ */
+/* $Id: utils.c,v 1.60 2006/08/14 08:52:30 andrew Exp $ */
/*
* Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <portability.h>
#include <sys/param.h>
#include <crm/crm.h>
#include <crm/cib.h>
#include <crm/msg_xml.h>
#include <crm/common/msg.h>
#include <crm/common/xml.h>
#include <tengine.h>
#include <heartbeat.h>
#include <clplumbing/Gmain_timeout.h>
#include <lrm/lrm_api.h>
extern cib_t *te_cib_conn;
-const char *
-get_rsc_state(const char *task, op_status_t status)
-{
- if(safe_str_eq(CRMD_ACTION_START, task)) {
- if(status == LRM_OP_PENDING) {
- return CRMD_ACTION_START_PENDING;
- } else if(status == LRM_OP_DONE) {
- return CRMD_ACTION_STARTED;
- } else {
- return CRMD_ACTION_START_FAIL;
- }
-
- } else if(safe_str_eq(CRMD_ACTION_STOP, task)) {
- if(status == LRM_OP_PENDING) {
- return CRMD_ACTION_STOP_PENDING;
- } else if(status == LRM_OP_DONE) {
- return CRMD_ACTION_STOPPED;
- } else {
- return CRMD_ACTION_STOP_FAIL;
- }
-
- } else {
- if(safe_str_eq(CRMD_ACTION_MON, task)) {
- if(status == LRM_OP_PENDING) {
- return CRMD_ACTION_MON_PENDING;
- } else if(status == LRM_OP_DONE) {
- return CRMD_ACTION_MON_OK;
- } else {
- return CRMD_ACTION_MON_FAIL;
- }
- } else {
- const char *rsc_state = NULL;
- if(status == LRM_OP_PENDING) {
- rsc_state = CRMD_ACTION_GENERIC_PENDING;
- } else if(status == LRM_OP_DONE) {
- rsc_state = CRMD_ACTION_GENERIC_OK;
- } else {
- rsc_state = CRMD_ACTION_GENERIC_FAIL;
- }
- crm_warn("Using status \"%s\" for op \"%s\"..."
- " this is still in the experimental stage.",
- rsc_state, task);
- return rsc_state;
- }
- }
-}
-
gboolean
stop_te_timer(crm_action_timer_t *timer)
{
const char *timer_desc = "action timer";
if(timer == NULL) {
return FALSE;
}
if(timer->reason == timeout_abort) {
timer_desc = "global timer";
}
if(timer->source_id != 0) {
crm_debug_2("Stopping %s", timer_desc);
Gmain_timeout_remove(timer->source_id);
timer->source_id = 0;
} else {
return FALSE;
}
return TRUE;
}
void
trigger_graph_processing(const char *fn, int line)
{
G_main_set_trigger(transition_trigger);
crm_debug_2("%s:%d - Triggered graph processing", fn, line);
}
void
abort_transition_graph(
int abort_priority, enum transition_action abort_action,
const char *abort_text, crm_data_t *reason, const char *fn, int line)
{
int log_level = LOG_DEBUG;
/*
if(abort_priority >= INFINITY) {
log_level = LOG_INFO;
}
*/
update_abort_priority(
transition_graph, abort_priority, abort_action, abort_text);
crm_log_maybe(log_level, "%s:%d - Triggered graph processing : %s",
fn, line, abort_text);
if(reason != NULL) {
crm_log_xml(log_level, "Cause", reason);
}
if(transition_graph->complete) {
notify_crmd(transition_graph);
} else {
G_main_set_trigger(transition_trigger);
}
}
diff --git a/lib/crm/common/ipc.c b/lib/crm/common/ipc.c
index 64df19a260..a3804f9888 100644
--- a/lib/crm/common/ipc.c
+++ b/lib/crm/common/ipc.c
@@ -1,435 +1,439 @@
-/* $Id: ipc.c,v 1.27 2006/07/12 09:31:36 andrew Exp $ */
+/* $Id: ipc.c,v 1.28 2006/08/14 08:52:08 andrew Exp $ */
/*
* Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <portability.h>
#include <sys/param.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <errno.h>
#include <fcntl.h>
#include <crm/crm.h>
#include <clplumbing/ipc.h>
#include <clplumbing/Gmain_timeout.h>
#include <clplumbing/cl_log.h>
#include <clplumbing/cl_signal.h>
#include <clplumbing/lsb_exitcodes.h>
#include <clplumbing/uids.h>
#include <clplumbing/realtime.h>
#include <clplumbing/GSource.h>
#include <clplumbing/cl_poll.h>
#include <crm/common/ipc.h>
#include <crm/msg_xml.h>
#include <ha_msg.h>
#include <crm/dmalloc_wrapper.h>
gboolean
send_ha_message(ll_cluster_t *hb_conn, HA_Message *msg, const char *node, gboolean force_ordered)
{
gboolean all_is_good = TRUE;
cl_mem_stats_t saved_stats;
crm_save_mem_stats(__PRETTY_FUNCTION__, &saved_stats);
if (msg == NULL) {
crm_err("cant send NULL message");
all_is_good = FALSE;
} else if(hb_conn == NULL) {
crm_err("No heartbeat connection specified");
all_is_good = FALSE;
} else if(hb_conn->llc_ops->chan_is_connected(hb_conn) == FALSE) {
crm_err("Not connected to Heartbeat");
all_is_good = FALSE;
} else if(node != NULL) {
if(hb_conn->llc_ops->send_ordered_nodemsg(
hb_conn, msg, node) != HA_OK) {
all_is_good = FALSE;
crm_err("Send failed");
} else {
crm_debug_2("Message sent...");
}
} else if(force_ordered) {
if(hb_conn->llc_ops->send_ordered_clustermsg(hb_conn, msg) != HA_OK) {
all_is_good = FALSE;
crm_err("Broadcast Send failed");
} else {
crm_debug_2("Broadcast message sent...");
}
} else {
if(hb_conn->llc_ops->sendclustermsg(hb_conn, msg) != HA_OK) {
all_is_good = FALSE;
crm_err("Broadcast Send failed");
} else {
crm_debug_2("Broadcast message sent...");
}
}
if(all_is_good == FALSE && hb_conn != NULL) {
IPC_Channel *ipc = NULL;
IPC_Queue *send_q = NULL;
if(hb_conn->llc_ops->chan_is_connected(hb_conn) != HA_OK) {
ipc = hb_conn->llc_ops->ipcchan(hb_conn);
}
if(ipc != NULL) {
/* ipc->ops->resume_io(ipc); */
send_q = ipc->send_queue;
}
if(send_q != NULL) {
CRM_CHECK(send_q->current_qlen < send_q->max_qlen, ;);
}
}
crm_log_message_adv(all_is_good?LOG_MSG:LOG_WARNING,"HA[outbound]",msg);
crm_diff_mem_stats(LOG_DEBUG, LOG_DEBUG, __PRETTY_FUNCTION__, NULL, &saved_stats);
return all_is_good;
}
/* frees msg */
gboolean
send_ipc_message(IPC_Channel *ipc_client, HA_Message *msg)
{
gboolean all_is_good = TRUE;
int fail_level = LOG_WARNING;
cl_mem_stats_t saved_stats;
crm_save_mem_stats(__PRETTY_FUNCTION__, &saved_stats);
if(ipc_client != NULL && ipc_client->conntype == IPC_CLIENT) {
fail_level = LOG_ERR;
}
if (msg == NULL) {
crm_err("cant send NULL message");
all_is_good = FALSE;
} else if (ipc_client == NULL) {
crm_err("cant send message without an IPC Channel");
all_is_good = FALSE;
} else if(ipc_client->ops->get_chan_status(ipc_client) != IPC_CONNECT) {
crm_log_maybe(fail_level, "IPC Channel to %d is not connected",
(int)ipc_client->farside_pid);
all_is_good = FALSE;
}
if(all_is_good && msg2ipcchan(msg, ipc_client) != HA_OK) {
crm_log_maybe(fail_level, "Could not send IPC message to %d",
(int)ipc_client->farside_pid);
all_is_good = FALSE;
if(ipc_client->ops->get_chan_status(ipc_client) != IPC_CONNECT) {
crm_log_maybe(fail_level,
"IPC Channel to %d is no longer connected",
(int)ipc_client->farside_pid);
} else if(ipc_client->conntype == IPC_CLIENT) {
- CRM_CHECK(ipc_client->send_queue->current_qlen < ipc_client->send_queue->max_qlen, ;);
+ if(ipc_client->send_queue->current_qlen >= ipc_client->send_queue->max_qlen) {
+ crm_err("Send queue to %d (size=%d) full.",
+ ipc_client->farside_pid,
+ (int)ipc_client->send_queue->max_qlen);
+ }
}
}
/* ipc_client->ops->resume_io(ipc_client); */
crm_log_message_adv(all_is_good?LOG_MSG:LOG_WARNING,"IPC[outbound]",msg);
crm_diff_mem_stats(LOG_DEBUG, LOG_DEBUG, __PRETTY_FUNCTION__, NULL, &saved_stats);
return all_is_good;
}
void
default_ipc_connection_destroy(gpointer user_data)
{
return;
}
int
init_server_ipc_comms(
char *channel_name,
gboolean (*channel_client_connect)(IPC_Channel *newclient,gpointer user_data),
void (*channel_connection_destroy)(gpointer user_data))
{
/* the clients wait channel is the other source of events.
* This source delivers the clients connection events.
* listen to this source at a relatively lower priority.
*/
char commpath[SOCKET_LEN];
IPC_WaitConnection *wait_ch;
sprintf(commpath, CRM_SOCK_DIR "/%s", channel_name);
wait_ch = wait_channel_init(commpath);
if (wait_ch == NULL) {
return 1;
}
G_main_add_IPC_WaitConnection(
G_PRIORITY_LOW, wait_ch, NULL, FALSE,
channel_client_connect, channel_name,
channel_connection_destroy);
crm_debug_3("Listening on: %s", commpath);
return 0;
}
GCHSource*
init_client_ipc_comms(const char *channel_name,
gboolean (*dispatch)(
IPC_Channel* source_data, gpointer user_data),
void *client_data, IPC_Channel **ch)
{
IPC_Channel *a_ch = NULL;
GCHSource *the_source = NULL;
void *callback_data = client_data;
a_ch = init_client_ipc_comms_nodispatch(channel_name);
if(ch != NULL) {
*ch = a_ch;
if(callback_data == NULL) {
callback_data = a_ch;
}
}
if(a_ch == NULL) {
crm_warn("Setup of client connection failed,"
" not adding channel to mainloop");
return NULL;
}
if(dispatch == NULL) {
crm_warn("No dispatch method specified..."
"maybe you meant init_client_ipc_comms_nodispatch()?");
} else {
crm_debug_3("Adding dispatch method to channel");
the_source = G_main_add_IPC_Channel(
G_PRIORITY_HIGH, a_ch, FALSE, dispatch, callback_data,
default_ipc_connection_destroy);
}
return the_source;
}
IPC_Channel *
init_client_ipc_comms_nodispatch(const char *channel_name)
{
IPC_Channel *ch;
GHashTable *attrs;
static char path[] = IPC_PATH_ATTR;
char *commpath = NULL;
int local_socket_len = 2; /* 2 = '/' + '\0' */
local_socket_len += strlen(channel_name);
local_socket_len += strlen(CRM_SOCK_DIR);
crm_malloc0(commpath, local_socket_len);
if(commpath != NULL) {
sprintf(commpath, CRM_SOCK_DIR "/%s", channel_name);
commpath[local_socket_len - 1] = '\0';
crm_debug_3("Attempting to talk on: %s", commpath);
}
attrs = g_hash_table_new(g_str_hash,g_str_equal);
g_hash_table_insert(attrs, path, commpath);
ch = ipc_channel_constructor(IPC_ANYTYPE, attrs);
g_hash_table_destroy(attrs);
if (ch == NULL) {
crm_err("Could not access channel on: %s", commpath);
crm_free(commpath);
return NULL;
} else if (ch->ops->initiate_connection(ch) != IPC_OK) {
crm_debug("Could not init comms on: %s", commpath);
ch->ops->destroy(ch);
crm_free(commpath);
return NULL;
}
ch->ops->set_recv_qlen(ch, 100);
ch->ops->set_send_qlen(ch, 100);
/* ch->should_send_block = TRUE; */
crm_debug_3("Processing of %s complete", commpath);
crm_free(commpath);
return ch;
}
IPC_WaitConnection *
wait_channel_init(char daemonsocket[])
{
IPC_WaitConnection *wait_ch;
mode_t mask;
char path[] = IPC_PATH_ATTR;
GHashTable * attrs;
attrs = g_hash_table_new(g_str_hash,g_str_equal);
g_hash_table_insert(attrs, path, daemonsocket);
mask = umask(0);
wait_ch = ipc_wait_conn_constructor(IPC_ANYTYPE, attrs);
if (wait_ch == NULL) {
cl_perror("Can't create wait channel of type %s",
IPC_ANYTYPE);
exit(1);
}
mask = umask(mask);
g_hash_table_destroy(attrs);
return wait_ch;
}
longclock_t ipc_call_start = 0;
longclock_t ipc_call_stop = 0;
longclock_t ipc_call_diff = 0;
gboolean
subsystem_msg_dispatch(IPC_Channel *sender, void *user_data)
{
int lpc = 0;
HA_Message *msg = NULL;
ha_msg_input_t *new_input = NULL;
gboolean all_is_well = TRUE;
const char *sys_to;
const char *task;
while(IPC_ISRCONN(sender)) {
gboolean process = FALSE;
if(sender->ops->is_message_pending(sender) == 0) {
break;
}
msg = msgfromIPC_noauth(sender);
if (msg == NULL) {
crm_err("No message from %d this time",
sender->farside_pid);
continue;
}
lpc++;
new_input = new_ha_msg_input(msg);
crm_msg_del(msg);
msg = NULL;
crm_log_message(LOG_MSG, new_input->msg);
sys_to = cl_get_string(new_input->msg, F_CRM_SYS_TO);
task = cl_get_string(new_input->msg, F_CRM_TASK);
if(safe_str_eq(task, CRM_OP_HELLO)) {
process = TRUE;
} else if(sys_to == NULL) {
crm_err("Value of %s was NULL!!", F_CRM_SYS_TO);
} else if(task == NULL) {
crm_err("Value of %s was NULL!!", F_CRM_TASK);
} else {
process = TRUE;
}
if(process){
gboolean (*process_function)
(HA_Message *msg, crm_data_t *data, IPC_Channel *sender) = NULL;
process_function = user_data;
#ifdef MSG_LOG
crm_log_message_adv(
LOG_MSG, __FUNCTION__, new_input->msg);
#endif
if(ipc_call_diff_max_ms > 0) {
ipc_call_start = time_longclock();
}
if(FALSE == process_function(
new_input->msg, new_input->xml, sender)) {
crm_warn("Received a message destined for %s"
" by mistake", sys_to);
}
if(ipc_call_diff_max_ms > 0) {
unsigned int ipc_call_diff_ms = 0;
ipc_call_stop = time_longclock();
ipc_call_diff = sub_longclock(
ipc_call_stop, ipc_call_start);
ipc_call_diff_ms = longclockto_ms(
ipc_call_diff);
if(ipc_call_diff_ms > ipc_call_diff_max_ms) {
crm_err("%s took %dms to complete",
sys_to, ipc_call_diff_ms);
}
}
} else {
#ifdef MSG_LOG
crm_log_message_adv(
LOG_ERR, NULL, new_input->msg);
#endif
}
delete_ha_msg_input(new_input);
new_input = NULL;
if(sender->ch_status == IPC_CONNECT) {
break;
}
}
crm_debug_2("Processed %d messages", lpc);
if (sender->ch_status != IPC_CONNECT) {
crm_err("The server %d has left us: Shutting down...NOW",
sender->farside_pid);
exit(1); /* shutdown properly later */
return !all_is_well;
}
return all_is_well;
}
gboolean
is_ipc_empty(IPC_Channel *ch)
{
if(ch == NULL) {
return TRUE;
} else if(ch->send_queue->current_qlen == 0
&& ch->recv_queue->current_qlen == 0) {
return TRUE;
}
return FALSE;
}

File Metadata

Mime Type
text/x-diff
Expires
Wed, Oct 15, 11:47 PM (2 h, 47 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2530565
Default Alt Text
(72 KB)

Event Timeline