Page Menu
Home
ClusterLabs Projects
Search
Configure Global Search
Log In
Files
F3687555
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
36 KB
Referenced Files
None
Subscribers
None
View Options
diff --git a/crmd/callbacks.c b/crmd/callbacks.c
index 22bee4b8e3..146430cc5b 100644
--- a/crmd/callbacks.c
+++ b/crmd/callbacks.c
@@ -1,632 +1,655 @@
/*
* Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <crm/crm.h>
#include <string.h>
#include <crmd_fsa.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <crm/common/msg.h>
#include <crm/common/cluster.h>
#include <crm/cib.h>
#include <crmd.h>
#include <crmd_messages.h>
#include <crmd_callbacks.h>
#include <crmd_lrm.h>
void crmd_ha_connection_destroy(gpointer user_data);
void crmd_ha_msg_filter(xmlNode *msg);
/* From join_dc... */
extern gboolean check_join_state(
enum crmd_fsa_state cur_state, const char *source);
#define trigger_fsa(source) crm_debug_3("Triggering FSA: %s", __FUNCTION__); \
mainloop_set_trigger(source);
#if SUPPORT_HEARTBEAT
gboolean
crmd_ha_msg_dispatch(ll_cluster_t *cluster_conn, gpointer user_data)
{
IPC_Channel *channel = NULL;
gboolean stay_connected = TRUE;
crm_debug_3("Invoked");
if(cluster_conn != NULL) {
channel = cluster_conn->llc_ops->ipcchan(cluster_conn);
}
CRM_CHECK(cluster_conn != NULL, ;);
CRM_CHECK(channel != NULL, ;);
if(channel != NULL && IPC_ISRCONN(channel)) {
if(cluster_conn->llc_ops->msgready(cluster_conn) == 0) {
crm_debug_2("no message ready yet");
}
/* invoke the callbacks but dont block */
cluster_conn->llc_ops->rcvmsg(cluster_conn, 0);
}
if (channel == NULL || channel->ch_status != IPC_CONNECT) {
if(is_set(fsa_input_register, R_HA_DISCONNECTED) == FALSE) {
crm_crit("Lost connection to heartbeat service.");
} else {
crm_info("Lost connection to heartbeat service.");
}
trigger_fsa(fsa_source);
stay_connected = FALSE;
}
return stay_connected;
}
#endif
void
crmd_ha_connection_destroy(gpointer user_data)
{
crm_debug_3("Invoked");
if(is_set(fsa_input_register, R_HA_DISCONNECTED)) {
/* we signed out, so this is expected */
crm_info("Heartbeat disconnection complete");
return;
}
crm_crit("Lost connection to heartbeat service!");
register_fsa_input(C_HA_DISCONNECT, I_ERROR, NULL);
trigger_fsa(fsa_source);
}
void
crmd_ha_msg_filter(xmlNode *msg)
{
if(AM_I_DC) {
const char *sys_from = crm_element_value(msg, F_CRM_SYS_FROM);
if(safe_str_eq(sys_from, CRM_SYSTEM_DC)) {
const char *from = crm_element_value(msg, F_ORIG);
if(safe_str_neq(from, fsa_our_uname)) {
int level = LOG_INFO;
const char *op = crm_element_value(msg, F_CRM_TASK);
/* make sure the election happens NOW */
if(fsa_state != S_ELECTION) {
ha_msg_input_t new_input;
level = LOG_ERR;
new_input.msg = msg;
register_fsa_error_adv(
C_FSA_INTERNAL, I_ELECTION, NULL, &new_input, __FUNCTION__);
}
do_crm_log(level, "Another DC detected: %s (op=%s)", from, op);
goto done;
}
}
} else {
const char *sys_to = crm_element_value(msg, F_CRM_SYS_TO);
if(safe_str_eq(sys_to, CRM_SYSTEM_DC)) {
return;
}
}
/* crm_log_xml(LOG_MSG, "HA[inbound]", msg); */
route_message(C_HA_MESSAGE, msg);
done:
trigger_fsa(fsa_source);
}
#if SUPPORT_HEARTBEAT
void
crmd_ha_msg_callback(HA_Message *hamsg, void* private_data)
{
int level = LOG_DEBUG;
crm_node_t *from_node = NULL;
xmlNode *msg = convert_ha_message(NULL, hamsg, __FUNCTION__);
const char *from = crm_element_value(msg, F_ORIG);
const char *op = crm_element_value(msg, F_CRM_TASK);
const char *sys_from = crm_element_value(msg, F_CRM_SYS_FROM);
CRM_CHECK(from != NULL, crm_log_xml_err(msg, "anon"); goto bail);
crm_debug_2("HA[inbound]: %s from %s", op, from);
if(crm_peer_cache == NULL || crm_active_members() == 0) {
crm_debug("Ignoring HA messages until we are"
" connected to the CCM (%s op from %s)", op, from);
crm_log_xml(LOG_MSG, "HA[inbound]: Ignore (No CCM)", msg);
goto bail;
}
from_node = crm_get_peer(0, from);
if(crm_is_member_active(from_node) == FALSE) {
if(safe_str_eq(op, CRM_OP_VOTE)) {
level = LOG_WARNING;
} else if(AM_I_DC && safe_str_eq(op, CRM_OP_JOIN_ANNOUNCE)) {
level = LOG_WARNING;
} else if(safe_str_eq(sys_from, CRM_SYSTEM_DC)) {
level = LOG_WARNING;
}
do_crm_log(level,
"Ignoring HA message (op=%s) from %s: not in our"
" membership list (size=%d)", op, from,
crm_active_members());
crm_log_xml(LOG_MSG, "HA[inbound]: CCM Discard", msg);
} else {
crmd_ha_msg_filter(msg);
}
bail:
free_xml(msg);
return;
}
#endif
/*
* Apparently returning TRUE means "stay connected, keep doing stuff".
* Returning FALSE means "we're all done, close the connection"
*/
gboolean
crmd_ipc_msg_callback(IPC_Channel *client, gpointer user_data)
{
int lpc = 0;
xmlNode *msg = NULL;
crmd_client_t *curr_client = (crmd_client_t*)user_data;
gboolean stay_connected = TRUE;
crm_debug_2("Invoked: %s",
curr_client->table_key);
while(IPC_ISRCONN(client)) {
if(client->ops->is_message_pending(client) == 0) {
break;
}
msg = xmlfromIPC(client, MAX_IPC_DELAY);
if (msg == NULL) {
break;
}
lpc++;
crm_debug_2("Processing msg from %s", curr_client->table_key);
crm_log_xml(LOG_DEBUG_2, "CRMd[inbound]", msg);
if(crmd_authorize_message(msg, curr_client)) {
route_message(C_IPC_MESSAGE, msg);
}
free_xml(msg);
msg = NULL;
if(client->ch_status != IPC_CONNECT) {
break;
}
}
crm_debug_2("Processed %d messages", lpc);
if (client->ch_status != IPC_CONNECT) {
stay_connected = FALSE;
process_client_disconnect(curr_client);
}
trigger_fsa(fsa_source);
return stay_connected;
}
extern GCHSource *lrm_source;
gboolean
lrm_dispatch(IPC_Channel *src_not_used, gpointer user_data)
{
/* ?? src == lrm_channel ?? */
ll_lrm_t *lrm = (ll_lrm_t*)user_data;
IPC_Channel *lrm_channel = lrm->lrm_ops->ipcchan(lrm);
lrm->lrm_ops->rcvmsg(lrm, FALSE);
if(lrm_channel->ch_status != IPC_CONNECT) {
lrm_connection_destroy(NULL);
return FALSE;
}
return TRUE;
}
extern gboolean process_lrm_event(lrm_op_t *op);
void
lrm_op_callback(lrm_op_t* op)
{
CRM_CHECK(op != NULL, return);
process_lrm_event(op);
}
+static void crmd_peer_update(crm_node_t *member, enum crm_proc_flag client)
+{
+ const char *status = NULL;
+
+ CRM_CHECK(member != NULL, return);
+ status = (member->processes&client)?ONLINESTATUS:OFFLINESTATUS;
+ crm_notice("Status update: Client %s/%s now has status [%s] (DC=%s)",
+ member->uname, peer2text(client), status,
+ AM_I_DC?"true":crm_str(fsa_our_dc));
+
+ if((client & crm_proc_crmd) == 0) {
+ return;
+ } else if(is_set(fsa_input_register, R_CIB_CONNECTED) == FALSE) {
+ return;
+ } else if(fsa_state == S_STOPPING) {
+ return;
+ }
+
+ if(safe_str_eq(member->uname, fsa_our_dc) && crm_is_full_member(member) == FALSE){
+ /* Did the DC leave us? */
+ crm_info("Got client status callback - our DC is dead");
+ register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ELECTION, NULL);
+
+ } else if(AM_I_DC) {
+ xmlNode *update = NULL;
+ update = create_node_state(
+ member->uname, NULL, NULL, status, NULL, NULL, FALSE, __FUNCTION__);
+
+ fsa_cib_anon_update(
+ XML_CIB_TAG_STATUS, update, cib_scope_local|cib_quorum_override|cib_can_create);
+ free_xml(update);
+
+ if((member->processes & client) == 0) {
+ erase_node_from_join(member->uname);
+ check_join_state(fsa_state, __FUNCTION__);
+ }
+ }
+
+ trigger_fsa(fsa_source);
+}
+
void ais_status_callback(enum crm_status_type type, crm_node_t *node, const void *data)
{
gboolean reset_status_entry = FALSE;
+ const uint32_t old = *(const uint32_t *)data;
if(AM_I_DC == FALSE || node->uname == NULL) {
return;
}
switch(type) {
case crm_status_uname:
crm_info("status: %s is now %s", node->uname, node->state);
/* reset_status_entry = TRUE; */
/* If we've never seen the node, then it also wont be in the status section */
break;
case crm_status_nstate:
crm_info("status: %s is now %s (was %s)", node->uname, node->state, (const char *)data);
reset_status_entry = TRUE;
break;
case crm_status_processes:
+ crm_info("status: %s now has process list %32x (was %32x)", node->uname, node->processes, old);
+ if( (node->processes ^ old) & crm_proc_crmd ) {
+ crmd_peer_update(node, crm_proc_crmd);
+ }
break;
}
/* Can this be removed now that do_cl_join_finalize_respond() does the same thing? */
if(reset_status_entry && safe_str_eq(CRMD_STATE_ACTIVE, node->state)) {
erase_status_tag(node->uname, XML_CIB_TAG_LRM, cib_scope_local);
erase_status_tag(node->uname, XML_TAG_TRANSIENT_NODEATTRS, cib_scope_local);
/* TODO: potentially we also want to set XML_CIB_ATTR_JOINSTATE and XML_CIB_ATTR_EXPSTATE here */
}
}
void
crmd_ha_status_callback(const char *node, const char *status, void *private)
{
xmlNode *update = NULL;
crm_node_t *member = NULL;
- crm_notice("Status update: Node %s now has status [%s] (DC=%s)",
- node, status, AM_I_DC?"true":"false");
+ crm_notice("Status update: Node %s now has status [%s]", node, status);
member = crm_get_peer(0, node);
if(member == NULL || crm_is_member_active(member) == FALSE) {
/* Make sure it is created so crm_update_peer_proc() succeeds */
const char *uuid = get_uuid(node);
member = crm_update_peer(0, 0, 0, -1, 0, uuid, node, NULL, NULL);
}
if(safe_str_eq(status, PINGSTATUS)) {
return;
}
if(safe_str_eq(status, DEADSTATUS)) {
/* this node is toast */
crm_update_peer_proc(node, crm_proc_ais, OFFLINESTATUS);
if(AM_I_DC) {
update = create_node_state(
node, DEADSTATUS, XML_BOOLEAN_NO, OFFLINESTATUS,
CRMD_JOINSTATE_DOWN, NULL, TRUE, __FUNCTION__);
}
} else {
crm_update_peer_proc(node, crm_proc_ais, ONLINESTATUS);
if(AM_I_DC) {
update = create_node_state(
node, ACTIVESTATUS, NULL, NULL,
CRMD_JOINSTATE_PENDING, NULL, FALSE, __FUNCTION__);
}
}
trigger_fsa(fsa_source);
if(update != NULL) {
fsa_cib_anon_update(
XML_CIB_TAG_STATUS, update, cib_scope_local|cib_quorum_override|cib_can_create);
free_xml(update);
}
}
void
crmd_client_status_callback(const char * node, const char * client,
const char * status, void * private)
{
const char *join = NULL;
crm_node_t *member = NULL;
- xmlNode *update = NULL;
gboolean clear_shutdown = FALSE;
crm_debug_3("Invoked");
if(safe_str_neq(client, CRM_SYSTEM_CRMD)) {
return;
}
if(safe_str_eq(status, JOINSTATUS)){
clear_shutdown = TRUE;
status = ONLINESTATUS;
join = CRMD_JOINSTATE_PENDING;
} else if(safe_str_eq(status, LEAVESTATUS)){
status = OFFLINESTATUS;
join = CRMD_JOINSTATE_DOWN;
/* clear_shutdown = TRUE; */
}
set_bit_inplace(fsa_input_register, R_PEER_DATA);
crm_notice("Status update: Client %s/%s now has status [%s] (DC=%s)",
node, client, status, AM_I_DC?"true":"false");
if(safe_str_eq(status, ONLINESTATUS)) {
/* remove the cached value in case it changed */
crm_debug_2("Uncaching UUID for %s", node);
unget_uuid(node);
}
member = crm_get_peer(0, node);
if(member == NULL || crm_is_member_active(member) == FALSE) {
/* Make sure it is created so crm_update_peer_proc() succeeds */
const char *uuid = get_uuid(node);
member = crm_update_peer(0, 0, 0, -1, 0, uuid, node, NULL, NULL);
}
- crm_update_peer_proc(node, crm_proc_crmd, status);
-
- if(is_set(fsa_input_register, R_CIB_CONNECTED) == FALSE) {
- return;
- } else if(fsa_state == S_STOPPING) {
- return;
- }
-
- if(safe_str_eq(node, fsa_our_dc) && safe_str_eq(status, OFFLINESTATUS)){
- /* did our DC leave us */
- crm_info("Got client status callback - our DC is dead");
- register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ELECTION, NULL);
-
- } else if(AM_I_DC == FALSE) {
- crm_info("Not the DC");
-
- } else {
+ if(AM_I_DC) {
+ xmlNode *update = NULL;
crm_debug_3("Got client status callback");
update = create_node_state(
node, NULL, NULL, status, join, NULL, clear_shutdown, __FUNCTION__);
fsa_cib_anon_update(
XML_CIB_TAG_STATUS, update, cib_scope_local|cib_quorum_override|cib_can_create);
free_xml(update);
-
- if(safe_str_eq(status, OFFLINESTATUS)) {
- erase_node_from_join(node);
- check_join_state(fsa_state, __FUNCTION__);
- }
- }
-
- trigger_fsa(fsa_source);
+ }
+ crm_update_peer_proc(node, crm_proc_crmd, status);
}
void
crmd_ipc_connection_destroy(gpointer user_data)
{
GCHSource *source = NULL;
crmd_client_t *client = user_data;
/* Calling this function on an _active_ connection results in:
* crmd_ipc_connection_destroy (callbacks.c:431)
* -> G_main_del_IPC_Channel (GSource.c:478)
* -> g_source_unref
* -> G_CH_destroy_int (GSource.c:647)
* -> crmd_ipc_connection_destroy (callbacks.c:437)\
*
* A better alternative is to call G_main_del_IPC_Channel() directly
*/
if(client == NULL) {
crm_debug_4("No client to delete");
return;
}
crm_debug_2("Disconnecting client %s (%p)", client->table_key, client);
source = client->client_source;
client->client_source = NULL;
if(source != NULL) {
crm_debug_3("Deleting %s (%p) from mainloop",
client->table_key, source);
G_main_del_IPC_Channel(source);
}
crm_free(client->table_key);
crm_free(client->sub_sys);
crm_free(client->uuid);
crm_free(client);
return;
}
gboolean
crmd_client_connect(IPC_Channel *client_channel, gpointer user_data)
{
crm_debug_3("Invoked");
if (client_channel == NULL) {
crm_err("Channel was NULL");
} else if (client_channel->ch_status == IPC_DISCONNECT) {
crm_err("Channel was disconnected");
} else {
crmd_client_t *blank_client = NULL;
crm_debug_3("Channel connected");
crm_malloc0(blank_client, sizeof(crmd_client_t));
CRM_ASSERT(blank_client != NULL);
crm_debug_2("Created client: %p", blank_client);
client_channel->ops->set_recv_qlen(client_channel, 1024);
client_channel->ops->set_send_qlen(client_channel, 1024);
blank_client->client_channel = client_channel;
blank_client->sub_sys = NULL;
blank_client->uuid = NULL;
blank_client->table_key = NULL;
blank_client->client_source =
G_main_add_IPC_Channel(
G_PRIORITY_LOW, client_channel,
FALSE, crmd_ipc_msg_callback,
blank_client, crmd_ipc_connection_destroy);
}
return TRUE;
}
#if SUPPORT_HEARTBEAT
static gboolean fsa_have_quorum = FALSE;
gboolean ccm_dispatch(int fd, gpointer user_data)
{
int rc = 0;
oc_ev_t *ccm_token = (oc_ev_t*)user_data;
gboolean was_error = FALSE;
crm_debug_3("Invoked");
rc = oc_ev_handle_event(ccm_token);
if(rc != 0) {
if(is_set(fsa_input_register, R_CCM_DISCONNECTED) == FALSE) {
/* we signed out, so this is expected */
register_fsa_input(C_CCM_CALLBACK, I_ERROR, NULL);
crm_err("CCM connection appears to have failed: rc=%d.",
rc);
}
was_error = TRUE;
}
trigger_fsa(fsa_source);
return !was_error;
}
void
crmd_ccm_msg_callback(
oc_ed_t event, void *cookie, size_t size, const void *data)
{
gboolean update_cache = FALSE;
const oc_ev_membership_t *membership = data;
gboolean update_quorum = FALSE;
crm_debug_3("Invoked");
CRM_ASSERT(data != NULL);
crm_info("Quorum %s after event=%s (id=%d)",
ccm_have_quorum(event)?"(re)attained":"lost",
ccm_event_name(event), membership->m_instance);
if(crm_peer_seq > membership->m_instance) {
crm_err("Membership instance ID went backwards! %llu->%d",
crm_peer_seq, membership->m_instance);
CRM_ASSERT(crm_peer_seq <= membership->m_instance);
return;
}
/*
* OC_EV_MS_NEW_MEMBERSHIP: membership with quorum
* OC_EV_MS_MS_INVALID: membership without quorum
* OC_EV_MS_NOT_PRIMARY: previous membership no longer valid
* OC_EV_MS_PRIMARY_RESTORED: previous membership restored
* OC_EV_MS_EVICTED: the client is evicted from ccm.
*/
switch(event) {
case OC_EV_MS_NEW_MEMBERSHIP:
case OC_EV_MS_INVALID:
update_cache = TRUE;
update_quorum = TRUE;
break;
case OC_EV_MS_NOT_PRIMARY:
break;
case OC_EV_MS_PRIMARY_RESTORED:
update_cache = TRUE;
crm_peer_seq = membership->m_instance;
break;
case OC_EV_MS_EVICTED:
update_quorum = TRUE;
register_fsa_input(C_FSA_INTERNAL, I_STOP, NULL);
crm_err("Shutting down after CCM event: %s",
ccm_event_name(event));
break;
default:
crm_err("Unknown CCM event: %d", event);
}
if(update_quorum) {
crm_have_quorum = ccm_have_quorum(event);
crm_update_quorum(crm_have_quorum, FALSE);
if(crm_have_quorum == FALSE) {
/* did we just loose quorum? */
if(fsa_have_quorum) {
crm_info("Quorum lost: %s", ccm_event_name(event));
}
}
}
if(update_cache) {
crm_debug_2("Updating cache after event %s", ccm_event_name(event));
do_ccm_update_cache(C_CCM_CALLBACK, fsa_state, event, data, NULL);
} else if(event != OC_EV_MS_NOT_PRIMARY) {
crm_peer_seq = membership->m_instance;
register_fsa_action(A_TE_CANCEL);
}
oc_ev_callback_done(cookie);
return;
}
#endif
void
crmd_cib_connection_destroy(gpointer user_data)
{
CRM_CHECK(user_data == fsa_cib_conn, ;);
crm_debug_3("Invoked");
trigger_fsa(fsa_source);
fsa_cib_conn->state = cib_disconnected;
if(is_set(fsa_input_register, R_CIB_CONNECTED) == FALSE) {
crm_info("Connection to the CIB terminated...");
return;
}
/* eventually this will trigger a reconnect, not a shutdown */
crm_err("Connection to the CIB terminated...");
register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL);
clear_bit_inplace(fsa_input_register, R_CIB_CONNECTED);
return;
}
gboolean
crm_fsa_trigger(gpointer user_data)
{
crm_debug_2("Invoked (queue len: %d)", g_list_length(fsa_message_queue));
s_crmd_fsa(C_FSA_INTERNAL);
crm_debug_2("Exited (queue len: %d)", g_list_length(fsa_message_queue));
return TRUE;
}
diff --git a/include/crm/common/cluster.h b/include/crm/common/cluster.h
index 2f698122ca..5fb40bdcf0 100644
--- a/include/crm/common/cluster.h
+++ b/include/crm/common/cluster.h
@@ -1,106 +1,107 @@
/*
* Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef CRM_COMMON_CLUSTER__H
#define CRM_COMMON_CLUSTER__H
#include <crm/common/xml.h>
#include <crm/common/msg.h>
#include <crm/common/util.h>
#include <crm/ais.h>
#if SUPPORT_HEARTBEAT
# include <heartbeat/hb_api.h>
# include <ocf/oc_event.h>
#endif
extern gboolean crm_have_quorum;
extern GHashTable *crm_peer_cache;
extern GHashTable *crm_peer_id_cache;
extern unsigned long long crm_peer_seq;
extern void crm_peer_init(void);
extern void crm_peer_destroy(void);
extern gboolean crm_cluster_connect(
char **our_uname, char **our_uuid, void *dispatch, void *destroy,
#if SUPPORT_HEARTBEAT
ll_cluster_t **hb_conn
#else
void **unused
#endif
);
extern gboolean send_cluster_message(
const char *node, enum crm_ais_msg_types service, xmlNode *data, gboolean ordered);
extern void destroy_crm_node(gpointer data);
extern crm_node_t *crm_get_peer(unsigned int id, const char *uname);
extern crm_node_t *crm_update_ais_node(xmlNode *member, long long seq);
extern void crm_update_peer_proc(
const char *uname, uint32_t flag, const char *status);
extern crm_node_t *crm_update_peer(
unsigned int id, uint64_t born, uint64_t seen, int32_t votes, uint32_t children,
const char *uuid, const char *uname, const char *addr, const char *state);
extern gboolean crm_is_member_active(const crm_node_t *node);
+extern gboolean crm_is_full_member(const crm_node_t *node);
extern guint crm_active_members(void);
extern guint reap_crm_member(uint32_t id);
extern guint crm_active_members(void);
extern guint crm_active_peers(uint32_t peer);
extern gboolean crm_calculate_quorum(void);
extern int crm_terminate_member(int nodeid, const char *uname, IPC_Channel *cluster);
extern int crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection);
extern gboolean crm_get_cluster_name(char **cname);
#if SUPPORT_HEARTBEAT
extern gboolean ccm_have_quorum(oc_ed_t event);
extern const char *ccm_event_name(oc_ed_t event);
extern crm_node_t *crm_update_ccm_node(
const oc_ev_membership_t *oc, int offset, const char *state, uint64_t seq);
#endif
#if SUPPORT_AIS
extern int ais_fd_sync;
extern GFDSource *ais_source;
extern gboolean send_ais_text(
int class, const char *data, gboolean local,
const char *node, enum crm_ais_msg_types dest);
extern gboolean get_ais_nodeid(uint32_t *id, char **uname);
extern gboolean ais_dispatch(int sender, gpointer user_data);
#endif
extern void empty_uuid_cache(void);
extern const char *get_uuid(const char *uname);
extern const char *get_uname(const char *uuid);
extern void set_uuid(xmlNode *node, const char *attr, const char *uname);
extern void unget_uuid(const char *uname);
enum crm_status_type
{
crm_status_uname,
crm_status_nstate,
crm_status_processes,
};
enum crm_ais_msg_types text2msg_type(const char *text);
extern void crm_set_status_callback(
void (*dispatch)(enum crm_status_type, crm_node_t*, const void*));
#endif
diff --git a/lib/common/membership.c b/lib/common/membership.c
index e689f7f895..a4b4242705 100644
--- a/lib/common/membership.c
+++ b/lib/common/membership.c
@@ -1,484 +1,497 @@
/*
* Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <crm_internal.h>
#ifndef _GNU_SOURCE
# define _GNU_SOURCE
#endif
#include <sys/param.h>
#include <sys/types.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <glib.h>
#include <crm/common/ipc.h>
#include <crm/common/cluster.h>
#include <crm/ais.h>
#include <crm/msg_xml.h>
GHashTable *crm_peer_id_cache = NULL;
GHashTable *crm_peer_cache = NULL;
unsigned long long crm_peer_seq = 0;
gboolean crm_have_quorum = FALSE;
gboolean crm_is_member_active(const crm_node_t *node)
{
if(node && safe_str_eq(node->state, CRM_NODE_MEMBER)) {
return TRUE;
}
return FALSE;
}
+gboolean crm_is_full_member(const crm_node_t *node)
+{
+ if(crm_is_member_active(node) && (node->processes & crm_proc_crmd)) {
+ return TRUE;
+ }
+ return FALSE;
+}
+
static gboolean crm_reap_dead_member(
gpointer key, gpointer value, gpointer user_data)
{
crm_node_t *node = value;
crm_node_t *search = user_data;
if(search != NULL && node->id != search->id) {
return FALSE;
} else if(crm_is_member_active(value) == FALSE) {
crm_notice("Removing %s/%u from the membership list", node->uname, node->id);
return TRUE;
}
return FALSE;
}
guint reap_crm_member(uint32_t id)
{
int matches = 0;
crm_node_t *node = g_hash_table_lookup(crm_peer_id_cache, GUINT_TO_POINTER(id));
if(node == NULL) {
crm_info("Peer %u is unknown", id);
} else if(crm_is_member_active(node)) {
crm_warn("Peer %u/%s is still active", id, node->uname);
} else {
if(g_hash_table_remove(crm_peer_id_cache, GUINT_TO_POINTER(id))) {
crm_notice("Removed dead peer %u from the uuid cache", id);
} else {
crm_warn("Peer %u/%s was not removed", id, node->uname);
}
matches = g_hash_table_foreach_remove(
crm_peer_cache, crm_reap_dead_member, node);
crm_notice("Removed %d dead peers with id=%u from the membership list", matches, id);
}
return matches;
}
static void crm_count_member(
gpointer key, gpointer value, gpointer user_data)
{
guint *count = user_data;
if(crm_is_member_active(value)) {
*count = *count + 1;
}
}
guint crm_active_members(void)
{
guint count = 0;
g_hash_table_foreach(crm_peer_cache, crm_count_member, &count);
return count;
}
struct peer_count_s
{
uint32_t peer;
guint count;
};
static void crm_count_peer(
gpointer key, gpointer value, gpointer user_data)
{
crm_node_t *node = value;
struct peer_count_s *search = user_data;
if(crm_is_member_active(node) && (node->processes & search->peer)) {
search->count = search->count + 1;
}
}
guint crm_active_peers(uint32_t peer)
{
struct peer_count_s search;
search.count = 0;
search.peer = peer;
g_hash_table_foreach(crm_peer_cache, crm_count_peer, &search);
return search.count;
}
void destroy_crm_node(gpointer data)
{
crm_node_t *node = data;
crm_debug_2("Destroying entry for node %u", node->id);
crm_free(node->addr);
crm_free(node->uname);
crm_free(node->state);
crm_free(node->uuid);
crm_free(node);
}
void crm_peer_init(void)
{
static gboolean initialized = FALSE;
if(initialized) {
return;
}
initialized = TRUE;
crm_peer_destroy();
if(crm_peer_cache == NULL) {
crm_peer_cache = g_hash_table_new_full(
g_str_hash, g_str_equal, NULL, destroy_crm_node);
}
if(crm_peer_id_cache == NULL) {
crm_peer_id_cache = g_hash_table_new_full(
g_direct_hash, g_direct_equal, NULL, NULL);
}
}
void crm_peer_destroy(void)
{
if(crm_peer_cache != NULL) {
g_hash_table_destroy(crm_peer_cache);
crm_peer_cache = NULL;
}
if(crm_peer_id_cache != NULL) {
g_hash_table_destroy(crm_peer_id_cache);
crm_peer_id_cache = NULL;
}
}
void (*crm_status_callback)(enum crm_status_type, crm_node_t*, const void*) = NULL;
void crm_set_status_callback(
void (*dispatch)(enum crm_status_type,crm_node_t*, const void*))
{
crm_status_callback = dispatch;
}
static crm_node_t *crm_new_peer(unsigned int id, const char *uname)
{
crm_node_t *node = NULL;
CRM_CHECK(uname != NULL || id > 0, return NULL);
crm_debug("Creating entry for node %s/%u", uname, id);
crm_malloc0(node, sizeof(crm_node_t));
node->state = crm_strdup("unknown");
if(id > 0) {
node->id = id;
crm_info("Node %s now has id: %u", crm_str(uname), id);
g_hash_table_replace(crm_peer_id_cache, GUINT_TO_POINTER(node->id), node);
}
if(uname) {
node->uname = crm_strdup(uname);
CRM_ASSERT(node->uname != NULL);
crm_info("Node %u is now known as %s", id, node->uname);
g_hash_table_replace(crm_peer_cache, node->uname, node);
if(is_openais_cluster()) {
node->uuid = crm_strdup(node->uname);
}
if(crm_status_callback) {
crm_status_callback(crm_status_uname, node, NULL);
}
}
return node;
}
crm_node_t *crm_get_peer(unsigned int id, const char *uname)
{
crm_node_t *node = NULL;
if(uname != NULL) {
node = g_hash_table_lookup(crm_peer_cache, uname);
}
if(node == NULL && id > 0) {
node = g_hash_table_lookup(crm_peer_id_cache, GUINT_TO_POINTER(id));
if(node && node->uname && uname) {
crm_crit("Node %s and %s share the same cluster node id '%u'!",
node->uname, uname, id);
/* NOTE: Calling crm_new_peer() means the entry in
* crm_peer_id_cache will point to the new entity
*/
/* TODO: Replace the old uname instead? */
node = crm_new_peer(id, uname);
CRM_ASSERT(node->uname != NULL);
}
}
if(node && uname && node->uname == NULL) {
node->uname = crm_strdup(uname);
crm_info("Node %u is now known as %s", id, uname);
g_hash_table_insert(crm_peer_cache, node->uname, node);
if(crm_status_callback) {
crm_status_callback(crm_status_uname, node, NULL);
}
}
if(node && id > 0 && id != node->id) {
g_hash_table_remove(crm_peer_id_cache, GUINT_TO_POINTER(node->id));
g_hash_table_insert(crm_peer_id_cache, GUINT_TO_POINTER(id), node);
node->id = id;
crm_info("Node %s now has id: %u", crm_str(uname), id);
}
return node;
}
crm_node_t *crm_update_peer(
unsigned int id, uint64_t born, uint64_t seen, int32_t votes, uint32_t children,
const char *uuid, const char *uname, const char *addr, const char *state)
{
gboolean state_changed = FALSE;
gboolean addr_changed = FALSE;
gboolean procs_changed = FALSE;
gboolean votes_changed = FALSE;
crm_node_t *node = NULL;
CRM_CHECK(uname != NULL || id > 0, return NULL);
CRM_ASSERT(crm_peer_cache != NULL);
CRM_ASSERT(crm_peer_id_cache != NULL);
node = crm_get_peer(id, uname);
if(node == NULL) {
node = crm_new_peer(id, uname);
/* do it now so we don't get '(new)' everywhere */
node->votes = votes;
node->processes = children;
if(addr) {
node->addr = crm_strdup(addr);
}
}
if(votes > 0 && node->votes != votes) {
votes_changed = TRUE;
node->votes = votes;
}
if(node->uuid == NULL) {
if(uuid != NULL) {
node->uuid = crm_strdup(uuid);
} else if(node->uname != NULL && is_openais_cluster()) {
node->uuid = crm_strdup(node->uname);
}
}
if(children > 0 && children != node->processes) {
uint32_t last = node->processes;
node->processes = children;
procs_changed = TRUE;
if(crm_status_callback) {
crm_status_callback(crm_status_processes, node, &last);
}
}
if(born != 0) {
node->born = born;
}
if(state != NULL && safe_str_neq(node->state, state)) {
char *last = node->state;
node->state = crm_strdup(state);
state_changed = TRUE;
if(crm_status_callback) {
crm_status_callback(crm_status_nstate, node, last);
}
crm_free(last);
}
if(seen != 0 && crm_is_member_active(node)) {
node->last_seen = seen;
}
if(addr != NULL) {
if(node->addr == NULL || crm_str_eq(node->addr, addr, FALSE) == FALSE) {
addr_changed = TRUE;
crm_free(node->addr);
node->addr = crm_strdup(addr);
}
}
if(state_changed || addr_changed || votes_changed || procs_changed) {
crm_info("Node %s: id=%u state=%s%s addr=%s%s votes=%d%s born="U64T" seen="U64T" proc=%.32x%s",
node->uname, node->id,
node->state, state_changed?" (new)":"",
node->addr, addr_changed?" (new)":"",
node->votes, votes_changed?" (new)":"",
node->born, node->last_seen,
node->processes, procs_changed?" (new)":""
);
}
return node;
}
crm_node_t *crm_update_ais_node(xmlNode *member, long long seq)
{
const char *id_s = crm_element_value(member, "id");
const char *addr = crm_element_value(member, "addr");
const char *uname = crm_element_value(member, "uname");
const char *state = crm_element_value(member, "state");
const char *born_s = crm_element_value(member, "born");
const char *seen_s = crm_element_value(member, "seen");
const char *votes_s = crm_element_value(member, "votes");
const char *procs_s = crm_element_value(member, "processes");
int votes = crm_int_helper(votes_s, NULL);
unsigned int id = crm_int_helper(id_s, NULL);
unsigned int procs = crm_int_helper(procs_s, NULL);
/* TODO: These values will contain garbage if version < 0.7.1 */
uint64_t born = crm_int_helper(born_s, NULL);
uint64_t seen = crm_int_helper(seen_s, NULL);
return crm_update_peer(id, born, seen, votes, procs, uname, uname, addr, state);
}
#if SUPPORT_HEARTBEAT
crm_node_t *crm_update_ccm_node(
const oc_ev_membership_t *oc, int offset, const char *state, uint64_t seq)
{
crm_node_t *node = NULL;
const char *uuid = NULL;
CRM_CHECK(oc->m_array[offset].node_uname != NULL, return NULL);
uuid = get_uuid(oc->m_array[offset].node_uname);
node = crm_update_peer(oc->m_array[offset].node_id,
oc->m_array[offset].node_born_on, seq, -1, 0,
uuid, oc->m_array[offset].node_uname, NULL, state);
if(safe_str_eq(CRM_NODE_ACTIVE, state)) {
/* Heartbeat doesn't send status notifications for nodes that were already part of the cluster */
crm_update_peer_proc(
oc->m_array[offset].node_uname, crm_proc_ais, ONLINESTATUS);
/* Nor does it send status notifications for processes that were already active */
crm_update_peer_proc(
oc->m_array[offset].node_uname, crm_proc_crmd, ONLINESTATUS);
}
return node;
}
#endif
void crm_update_peer_proc(const char *uname, uint32_t flag, const char *status)
{
+ uint32_t last = 0;
crm_node_t *node = NULL;
gboolean changed = FALSE;
CRM_ASSERT(crm_peer_cache != NULL);
CRM_CHECK(uname != NULL, return);
node = g_hash_table_lookup(crm_peer_cache, uname);
CRM_CHECK(node != NULL,
crm_err("Could not set %s.%s to %s", uname, peer2text(flag), status);
return);
+ last = node->processes;
if(safe_str_eq(status, ONLINESTATUS)) {
if((node->processes & flag) == 0) {
set_bit_inplace(node->processes, flag);
changed = TRUE;
}
} else if(node->processes & flag) {
clear_bit_inplace(node->processes, flag);
changed = TRUE;
}
if(changed) {
crm_info("%s.%s is now %s", uname, peer2text(flag), status);
+ if(crm_status_callback) {
+ crm_status_callback(crm_status_processes, node, &last);
+ }
}
}
static int crm_terminate_member_common(int nodeid, const char *uname, IPC_Channel *cluster, int *connection)
{
crm_node_t *node = NULL;
gboolean success = FALSE;
const char *reason = "Cluster connection failed";
node = crm_get_peer(nodeid, uname);
if(cluster == NULL) {
reason = "No connection to the cluster";
} else if(node == NULL) {
if(uname) {
crm_err("Nothing known about node uname=%s", uname);
} else if(nodeid > 0) {
crm_err("Nothing known about node id=%d", nodeid);
} else {
crm_err("A node id or uname is required, got %d/%p", nodeid, uname);
}
return -1;
} else {
time_t now = time(NULL);
char *now_s = crm_itoa(now);
if(cluster) {
success = attrd_update(cluster, 'U', node->uname, "terminate", now_s, XML_CIB_TAG_STATUS, NULL, NULL);
} else {
success = attrd_update_no_mainloop(connection, 'U', node->uname, "terminate", now_s, XML_CIB_TAG_STATUS, NULL, NULL);
}
crm_free(now_s);
}
if(success) {
crm_info("Requested that node %d/%s be terminated", nodeid, node->uname);
return 1;
}
crm_err("Could not terminate node %d/%s: %s", nodeid, node->uname, reason);
return 0;
}
int crm_terminate_member(int nodeid, const char *uname, IPC_Channel *cluster)
{
if(cluster != NULL) {
return crm_terminate_member_common(nodeid, uname, cluster, NULL);
}
crm_err("Could not terminate node %d/%s: No cluster connection", nodeid, uname);
return 0;
}
int crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection)
{
return crm_terminate_member_common(nodeid, uname, NULL, connection);
}
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Mon, Apr 21, 7:56 PM (11 h, 29 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1665624
Default Alt Text
(36 KB)
Attached To
Mode
rP Pacemaker
Attached
Detach File
Event Timeline
Log In to Comment