diff --git a/crm/cib/callbacks.c b/crm/cib/callbacks.c index 408202ad6b..30b849d819 100644 --- a/crm/cib/callbacks.c +++ b/crm/cib/callbacks.c @@ -1,1737 +1,1737 @@ -/* $Id: callbacks.c,v 1.119 2006/04/05 13:23:34 andrew Exp $ */ +/* $Id: callbacks.c,v 1.120 2006/04/20 15:43:23 andrew Exp $ */ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include extern GMainLoop* mainloop; extern gboolean cib_shutdown_flag; extern void GHFunc_count_peers( gpointer key, gpointer value, gpointer user_data); extern enum cib_errors revision_check( crm_data_t *cib_update, crm_data_t *cib_copy, int flags); void initiate_exit(void); void terminate_ha_connection(const char *caller); gint cib_GCompareFunc(gconstpointer a, gconstpointer b); gboolean cib_msg_timeout(gpointer data); void cib_GHFunc(gpointer key, gpointer value, gpointer user_data); gboolean can_write(int flags); HA_Message *cib_msg_copy(HA_Message *msg, gboolean with_data); gboolean ccm_manual_check(gpointer data); void send_cib_replace(const HA_Message *sync_request, const char *host); void cib_process_request( HA_Message *request, gboolean privileged, gboolean force_synchronous, gboolean from_peer, cib_client_t *cib_client); gboolean syncd_once = FALSE; GHashTable *peer_hash = NULL; int next_client_id = 0; gboolean cib_is_master = FALSE; gboolean cib_have_quorum = FALSE; char * ccm_transition_id = NULL; GHashTable *client_list = NULL; GHashTable *ccm_membership = NULL; extern const char *cib_our_uname; extern ll_cluster_t *hb_conn; extern unsigned long cib_num_ops, cib_num_local, cib_num_updates, cib_num_fail; extern unsigned long cib_bad_connects, cib_num_timeouts; extern longclock_t cib_call_time; static HA_Message * cib_prepare_common(HA_Message *root, const char *section) { HA_Message *data = NULL; /* extract the CIB from the fragment */ if(root == NULL) { return NULL; } else if(safe_str_eq(crm_element_name(root), XML_TAG_FRAGMENT)) { data = find_xml_node(root, XML_TAG_CIB, TRUE); if(data != NULL) { crm_debug_3("Extracted CIB from "XML_TAG_FRAGMENT); } else { crm_log_xml_debug_4(root, "No CIB"); } } else { data = root; crm_log_xml_debug_4(root, "cib:input"); } /* grab the section specified for the command */ if(data != NULL && safe_str_eq(crm_element_name(data), XML_TAG_CIB)){ int rc = revision_check(data, the_cib, 0/* call_options */); if(rc == cib_ok) { data = get_object_root(section, data); if(data != NULL) { crm_debug_3("Extracted %s from CIB", section); } else { crm_log_xml_debug_4(root, "No Section"); } } else { crm_debug_2("Revision check failed"); } } return data; } static enum cib_errors cib_prepare_none(HA_Message *request, HA_Message **data, const char **section) { *data = NULL; *section = cl_get_string(request, F_CIB_SECTION); return cib_ok; } static enum cib_errors cib_prepare_data(HA_Message *request, HA_Message **data, const char **section) { HA_Message *input_fragment = cl_get_struct(request, F_CIB_CALLDATA); *section = cl_get_string(request, F_CIB_SECTION); *data = cib_prepare_common(input_fragment, *section); return cib_ok; } static enum cib_errors cib_prepare_sync(HA_Message *request, HA_Message **data, const char **section) { *section = cl_get_string(request, F_CIB_SECTION); *data = request; return cib_ok; } static enum cib_errors cib_prepare_diff(HA_Message *request, HA_Message **data, const char **section) { HA_Message *input_fragment = cl_get_struct(request,F_CIB_UPDATE_DIFF); *section = NULL; *data = cib_prepare_common(input_fragment, NULL); return cib_ok; } static enum cib_errors cib_cleanup_query(const char *op, HA_Message **data, HA_Message **output) { CRM_DEV_ASSERT(*data == NULL); return cib_ok; } static enum cib_errors cib_cleanup_output(const char *op, HA_Message **data, HA_Message **output) { free_xml(*output); return cib_ok; } static enum cib_errors cib_cleanup_none(const char *op, HA_Message **data, HA_Message **output) { CRM_DEV_ASSERT(*data == NULL); CRM_DEV_ASSERT(*output == NULL); return cib_ok; } static enum cib_errors cib_cleanup_sync(const char *op, HA_Message **data, HA_Message **output) { /* data is non-NULL but doesnt need to be free'd */ CRM_DEV_ASSERT(*output == NULL); return cib_ok; } /* typedef struct cib_operation_s { const char* operation; gboolean modifies_cib; gboolean needs_privileges; gboolean needs_quorum; enum cib_errors (*prepare)(HA_Message *, crm_data_t**, const char **); enum cib_errors (*cleanup)(crm_data_t**, crm_data_t**); enum cib_errors (*fn)( const char *, int, const char *, crm_data_t*, crm_data_t*, crm_data_t**, crm_data_t**); } cib_operation_t; */ /* technically bump does modify the cib... * but we want to split the "bump" from the "sync" */ cib_operation_t cib_server_ops[] = { {NULL, FALSE, FALSE, FALSE, cib_prepare_none, cib_cleanup_none, cib_process_default}, {CIB_OP_QUERY, FALSE, FALSE, FALSE, cib_prepare_none, cib_cleanup_query, cib_process_query}, {CIB_OP_MODIFY, TRUE, TRUE, TRUE, cib_prepare_data, cib_cleanup_output, cib_process_modify}, {CIB_OP_UPDATE, TRUE, TRUE, TRUE, cib_prepare_data, cib_cleanup_output, cib_process_change}, {CIB_OP_APPLY_DIFF,TRUE, TRUE, TRUE, cib_prepare_diff, cib_cleanup_sync, cib_process_diff}, {CIB_OP_SLAVE, FALSE, TRUE, FALSE, cib_prepare_none, cib_cleanup_none, cib_process_readwrite}, {CIB_OP_SLAVEALL, FALSE, TRUE, FALSE, cib_prepare_none, cib_cleanup_none, cib_process_readwrite}, {CIB_OP_SYNC_ONE, FALSE, TRUE, FALSE, cib_prepare_sync, cib_cleanup_sync, cib_process_sync_one}, {CIB_OP_MASTER, FALSE, TRUE, FALSE, cib_prepare_none, cib_cleanup_none, cib_process_readwrite}, {CIB_OP_ISMASTER, FALSE, TRUE, FALSE, cib_prepare_none, cib_cleanup_none, cib_process_readwrite}, {CIB_OP_BUMP, TRUE, TRUE, TRUE, cib_prepare_none, cib_cleanup_output, cib_process_bump}, {CIB_OP_REPLACE, TRUE, TRUE, TRUE, cib_prepare_data, cib_cleanup_output, cib_process_replace}, {CIB_OP_CREATE, TRUE, TRUE, TRUE, cib_prepare_data, cib_cleanup_output, cib_process_change}, {CIB_OP_DELETE, TRUE, TRUE, TRUE, cib_prepare_data, cib_cleanup_output, cib_process_delete}, {CIB_OP_DELETE_ALT,TRUE, TRUE, TRUE, cib_prepare_data, cib_cleanup_output, cib_process_change}, {CIB_OP_SYNC, FALSE, TRUE, FALSE, cib_prepare_sync, cib_cleanup_sync, cib_process_sync}, {CRM_OP_QUIT, FALSE, TRUE, FALSE, cib_prepare_none, cib_cleanup_none, cib_process_quit}, {CRM_OP_PING, FALSE, FALSE, FALSE, cib_prepare_none, cib_cleanup_output, cib_process_ping}, {CIB_OP_ERASE, TRUE, TRUE, TRUE, cib_prepare_none, cib_cleanup_output, cib_process_erase}, {CRM_OP_NOOP, FALSE, FALSE, FALSE, cib_prepare_none, cib_cleanup_none, cib_process_default}, {"cib_shutdown_req",FALSE, TRUE, FALSE, cib_prepare_sync, cib_cleanup_sync, cib_process_shutdown_req}, }; int send_via_callback_channel(HA_Message *msg, const char *token); enum cib_errors cib_process_command( HA_Message *request, HA_Message **reply, crm_data_t **cib_diff, gboolean privileged); gboolean cib_common_callback(IPC_Channel *channel, cib_client_t *cib_client, gboolean force_synchronous, gboolean privileged); enum cib_errors cib_get_operation_id(const HA_Message * msg, int *operation); gboolean cib_process_disconnect(IPC_Channel *channel, cib_client_t *cib_client); static void cib_ipc_connection_destroy(gpointer user_data) { cib_client_t *cib_client = user_data; /* cib_process_disconnect */ if(cib_client == NULL) { crm_debug_4("Destroying %p", user_data); return; } if(cib_client->source != NULL) { crm_debug_4("Deleting %s (%p) from mainloop", cib_client->name, cib_client->source); G_main_del_IPC_Channel(cib_client->source); cib_client->source = NULL; } crm_debug_3("Destroying %s (%p)", cib_client->name, user_data); crm_free(cib_client->name); crm_free(cib_client->callback_id); crm_free(cib_client->id); crm_free(cib_client); crm_debug_4("Freed the cib client"); return; } static cib_client_t * cib_client_connect_common( IPC_Channel *channel, const char *channel_name, gboolean (*callback)(IPC_Channel *channel, gpointer user_data)) { gboolean can_connect = TRUE; cib_client_t *new_client = NULL; crm_debug_3("Connecting channel"); if (channel == NULL) { crm_err("Channel was NULL"); can_connect = FALSE; cib_bad_connects++; } else if (channel->ch_status != IPC_CONNECT) { crm_err("Channel was disconnected"); can_connect = FALSE; cib_bad_connects++; } else if(channel_name == NULL) { crm_err("user_data must contain channel name"); can_connect = FALSE; cib_bad_connects++; } else if(cib_shutdown_flag) { crm_info("Ignoring new client [%d] during shutdown", channel->farside_pid); return NULL; } else { crm_malloc0(new_client, sizeof(cib_client_t)); new_client->channel = channel; new_client->channel_name = channel_name; crm_debug_3("Created channel %p for channel %s", new_client, new_client->channel_name); channel->ops->set_recv_qlen(channel, 100); channel->ops->set_send_qlen(channel, 400); if(callback != NULL) { new_client->source = G_main_add_IPC_Channel( G_PRIORITY_DEFAULT, channel, FALSE, callback, new_client, cib_ipc_connection_destroy); } crm_debug_3("Channel %s connected for client %s", new_client->channel_name, new_client->id); } return new_client; } gboolean cib_client_connect_rw_synch(IPC_Channel *channel, gpointer user_data) { cib_client_t *new_client = cib_client_connect_common( channel, cib_channel_ro_synchronous, cib_rw_synchronous_callback); if(new_client == NULL) { return FALSE; } return TRUE; } gboolean cib_client_connect_ro_synch(IPC_Channel *channel, gpointer user_data) { cib_client_t *new_client = cib_client_connect_common( channel, cib_channel_ro_synchronous, cib_ro_synchronous_callback); if(new_client == NULL) { return FALSE; } return TRUE; } gboolean cib_client_connect_rw_ro(IPC_Channel *channel, gpointer user_data) { cl_uuid_t client_id; HA_Message *reg_msg = NULL; cib_client_t *new_client = NULL; char uuid_str[UU_UNPARSE_SIZEOF]; gboolean (*callback)(IPC_Channel *channel, gpointer user_data); callback = cib_ro_callback; if(safe_str_eq(user_data, cib_channel_rw)) { callback = cib_rw_callback; } new_client = cib_client_connect_common( channel, callback==cib_ro_callback?cib_channel_ro:cib_channel_rw, callback); if(new_client == NULL) { return FALSE; } cl_uuid_generate(&client_id); cl_uuid_unparse(&client_id, uuid_str); new_client->id = crm_strdup(uuid_str); cl_uuid_generate(&client_id); cl_uuid_unparse(&client_id, uuid_str); new_client->callback_id = crm_strdup(uuid_str); /* make sure we can find ourselves later for sync calls * redirected to the master instance */ g_hash_table_insert( client_list, new_client->id, new_client); reg_msg = ha_msg_new(3); ha_msg_add(reg_msg, F_CIB_OPERATION, CRM_OP_REGISTER); ha_msg_add(reg_msg, F_CIB_CLIENTID, new_client->id); ha_msg_add( reg_msg, F_CIB_CALLBACK_TOKEN, new_client->callback_id); send_ipc_message(channel, reg_msg); return TRUE; } gboolean cib_client_connect_null(IPC_Channel *channel, gpointer user_data) { cib_client_t *new_client = cib_client_connect_common( channel, cib_channel_callback, cib_null_callback); if(new_client == NULL) { return FALSE; } return TRUE; } gboolean cib_rw_callback(IPC_Channel *channel, gpointer user_data) { return cib_common_callback(channel, user_data, FALSE, TRUE); } gboolean cib_ro_synchronous_callback(IPC_Channel *channel, gpointer user_data) { return cib_common_callback(channel, user_data, TRUE, FALSE); } gboolean cib_rw_synchronous_callback(IPC_Channel *channel, gpointer user_data) { return cib_common_callback(channel, user_data, TRUE, TRUE); } gboolean cib_ro_callback(IPC_Channel *channel, gpointer user_data) { return cib_common_callback(channel, user_data, FALSE, FALSE); } gboolean cib_null_callback(IPC_Channel *channel, gpointer user_data) { gboolean keep_connection = TRUE; HA_Message *op_request = NULL; cib_client_t *cib_client = user_data; cib_client_t *hash_client = NULL; const char *type = NULL; const char *uuid_ticket = NULL; const char *client_name = NULL; gboolean register_failed = FALSE; if(cib_client == NULL) { crm_err("Discarding IPC message from unknown source" " on callback channel."); return FALSE; } while(IPC_ISRCONN(channel)) { if(channel->ops->is_message_pending(channel) == 0) { break; } op_request = msgfromIPC_noauth(channel); if(op_request == NULL) { break; } type = cl_get_string(op_request, F_CIB_OPERATION); if(safe_str_eq(type, T_CIB_NOTIFY) ) { /* Update the notify filters for this client */ int on_off = 0; ha_msg_value_int( op_request, F_CIB_NOTIFY_ACTIVATE, &on_off); type = cl_get_string(op_request, F_CIB_NOTIFY_TYPE); crm_info("Setting %s callbacks for %s: %s", type, cib_client->name, on_off?"on":"off"); if(safe_str_eq(type, T_CIB_POST_NOTIFY)) { cib_client->post_notify = on_off; } else if(safe_str_eq(type, T_CIB_PRE_NOTIFY)) { cib_client->pre_notify = on_off; } else if(safe_str_eq(type, T_CIB_UPDATE_CONFIRM)) { cib_client->confirmations = on_off; } else if(safe_str_eq(type, T_CIB_DIFF_NOTIFY)) { cib_client->diffs = on_off; } else if(safe_str_eq(type, T_CIB_REPLACE_NOTIFY)) { cib_client->replace = on_off; } continue; } else if(safe_str_neq(type, CRM_OP_REGISTER) ) { crm_warn("Discarding IPC message from %s on callback channel", cib_client->id); crm_msg_del(op_request); continue; } uuid_ticket = cl_get_string(op_request, F_CIB_CALLBACK_TOKEN); client_name = cl_get_string(op_request, F_CIB_CLIENTNAME); CRM_DEV_ASSERT(uuid_ticket != NULL); if(crm_assert_failed) { register_failed = crm_assert_failed; } CRM_DEV_ASSERT(client_name != NULL); if(crm_assert_failed) { register_failed = crm_assert_failed; } if(register_failed == FALSE) { hash_client = g_hash_table_lookup(client_list, uuid_ticket); if(hash_client != NULL) { crm_err("Duplicate registration request..." " disconnecting"); register_failed = TRUE; } } if(register_failed) { crm_err("Registration request failed... disconnecting"); crm_msg_del(op_request); return FALSE; } cib_client->id = crm_strdup(uuid_ticket); cib_client->name = crm_strdup(client_name); g_hash_table_insert(client_list, cib_client->id, cib_client); crm_debug_2("Registered %s on %s channel", cib_client->id, cib_client->channel_name); if(safe_str_eq(cib_client->name, CRM_SYSTEM_TENGINE)) { /* The TE is _always_ interested in these * Enable now to avoid timing issues */ cib_client->diffs = TRUE; } crm_msg_del(op_request); op_request = ha_msg_new(2); ha_msg_add(op_request, F_CIB_OPERATION, CRM_OP_REGISTER); ha_msg_add(op_request, F_CIB_CLIENTID, cib_client->id); send_ipc_message(channel, op_request); if(channel->ch_status == IPC_CONNECT) { break; } } if(channel->ch_status != IPC_CONNECT) { crm_debug_2("Client disconnected"); keep_connection = cib_process_disconnect(channel, cib_client); } return keep_connection; } void cib_common_callback_worker(HA_Message *op_request, cib_client_t *cib_client, gboolean force_synchronous, gboolean privileged); void cib_common_callback_worker(HA_Message *op_request, cib_client_t *cib_client, gboolean force_synchronous, gboolean privileged) { int rc = cib_ok; int call_type = 0; const char *op = NULL; longclock_t call_stop = 0; longclock_t call_start = 0; call_start = time_longclock(); cib_client->num_calls++; op = cl_get_string(op_request, F_CIB_OPERATION); rc = cib_get_operation_id(op_request, &call_type); if(rc != cib_ok) { crm_debug("Invalid operation %s from %s/%s", op, cib_client->name, cib_client->channel_name); } else { crm_debug_2("Processing %s operation from %s/%s", op, cib_client->name, cib_client->channel_name); } crm_log_message_adv(LOG_MSG, "Client[inbound]", op_request); ha_msg_add(op_request, F_CIB_CLIENTID, cib_client->id); ha_msg_add(op_request, F_CIB_CLIENTNAME, cib_client->name); if(rc == cib_ok) { cib_process_request( op_request, force_synchronous, privileged, FALSE, cib_client); } crm_debug_3("Cleaning up request"); crm_msg_del(op_request); call_stop = time_longclock(); cib_call_time += (call_stop - call_start); } gboolean cib_common_callback(IPC_Channel *channel, cib_client_t *cib_client, gboolean force_synchronous, gboolean privileged) { int lpc = 0; HA_Message *op_request = NULL; gboolean keep_channel = TRUE; if(cib_client == NULL) { crm_err("Receieved call from unknown source. Discarding."); return FALSE; } if(cib_client->name == NULL) { cib_client->name = crm_itoa(channel->farside_pid); } if(cib_client->id == NULL) { cib_client->id = crm_strdup(cib_client->name); g_hash_table_insert(client_list, cib_client->id, cib_client); } crm_debug_2("Callback for %s on %s channel", cib_client->id, cib_client->channel_name); while(IPC_ISRCONN(channel)) { if(channel->ops->is_message_pending(channel) == 0) { break; } op_request = msgfromIPC_noauth(channel); if (op_request == NULL) { perror("Receive failure:"); break; } lpc++; crm_assert_failed = FALSE; cib_common_callback_worker( op_request, cib_client, force_synchronous, privileged); op_request = NULL; if(channel->ch_status == IPC_CONNECT) { break; } } crm_debug_2("Processed %d messages", lpc); if(channel->ch_status != IPC_CONNECT) { crm_debug_2("Client disconnected"); keep_channel = cib_process_disconnect(channel, cib_client); } return keep_channel; } void cib_process_request( HA_Message *request, gboolean force_synchronous, gboolean privileged, gboolean from_peer, cib_client_t *cib_client) { int call_type = 0; int call_options = 0; gboolean process = TRUE; gboolean needs_reply = TRUE; gboolean local_notify = FALSE; gboolean needs_forward = FALSE; crm_data_t *result_diff = NULL; enum cib_errors rc = cib_ok; HA_Message *op_reply = NULL; const char *op = cl_get_string(request, F_CIB_OPERATION); const char *originator = cl_get_string(request, F_ORIG); const char *host = cl_get_string(request, F_CIB_HOST); const char *reply_to = cl_get_string(request, F_CIB_ISREPLY); const char *update = cl_get_string(request, F_CIB_GLOBAL_UPDATE); const char *delegated = cl_get_string(request, F_CIB_DELEGATED); const char *client_id = NULL; crm_debug_4("%s Processing msg %s", cib_our_uname, cl_get_string(request, F_SEQ)); cib_num_ops++; if(cib_num_ops == 0) { cib_num_fail = 0; cib_num_local = 0; cib_num_updates = 0; crm_info("Stats wrapped around"); } if(host != NULL && strlen(host) == 0) { host = NULL; } ha_msg_value_int(request, F_CIB_CALLOPTS, &call_options); crm_debug_4("Retrieved call options: %d", call_options); if(force_synchronous) { call_options |= cib_sync_call; } crm_debug_2("Processing %s message (%s) to %s...", from_peer?"peer":"local", from_peer?originator:cib_our_uname, host?host:"master"); rc = cib_get_operation_id(request, &call_type); if(cib_server_ops[call_type].modifies_cib) { cib_num_updates++; } if(rc != cib_ok) { /* TODO: construct error reply */ crm_err("Pre-processing of command failed: %s", cib_error2string(rc)); } else if(from_peer == FALSE) { if(cib_server_ops[call_type].modifies_cib && !(call_options & cib_inhibit_bcast)) { /* we need to send an update anyway */ needs_reply = TRUE; } else { needs_reply = FALSE; } if(host == NULL && (call_options & cib_scope_local)) { crm_debug("Processing locally scoped %s op from %s", op, cib_client->name); local_notify = TRUE; } else if(host == NULL && cib_is_master) { crm_debug("Processing master %s op locally from %s", op, cib_client->name); local_notify = TRUE; } else if(safe_str_eq(host, cib_our_uname)) { crm_debug("Processing locally addressed %s op from %s", op, cib_client->name); local_notify = TRUE; } else { crm_debug("%s op from %s needs to be forwarded to %s", op, cib_client->name, host?host:"the master instance"); needs_forward = TRUE; process = FALSE; } } else if(safe_str_eq(op, "cib_shutdown_req")) { if(reply_to != NULL) { crm_debug("Processing %s from %s", op, host); needs_reply = FALSE; } else { crm_debug("Processing %s reply from %s", op, host); } } else if(crm_is_true(update) && safe_str_eq(reply_to, cib_our_uname)) { crm_debug("Processing global/peer update from %s" " that originated from us", originator); needs_reply = FALSE; if(cl_get_string(request, F_CIB_CLIENTID) != NULL) { local_notify = TRUE; } } else if(crm_is_true(update)) { crm_debug("Processing global/peer update from %s", originator); needs_reply = FALSE; } else if(host != NULL && safe_str_eq(host, cib_our_uname)) { crm_debug("Processing request sent to us from %s", originator); } else if(delegated != NULL && cib_is_master == TRUE) { crm_debug("Processing request sent to master instance from %s", originator); } else if(reply_to != NULL && safe_str_eq(reply_to, cib_our_uname)) { crm_debug("Forward reply sent from %s to local clients", originator); process = FALSE; needs_reply = FALSE; local_notify = TRUE; } else if(delegated != NULL) { crm_debug("Ignoring msg for master instance"); return; } else if(host != NULL) { /* this is for a specific instance and we're not it */ crm_debug("Ignoring msg for instance on %s", crm_str(host)); return; } else if(reply_to == NULL && cib_is_master == FALSE) { /* this is for the master instance and we're not it */ crm_debug("Ignoring %s reply to %s", op, crm_str(reply_to)); return; } else { crm_err("Nothing for us to do?"); crm_log_message_adv(LOG_ERR, "Peer[inbound]", request); return; } crm_debug_3("Finished determining processing actions"); if(call_options & cib_discard_reply) { needs_reply = cib_server_ops[call_type].modifies_cib; local_notify = FALSE; } if(needs_forward) { HA_Message *forward_msg = cib_msg_copy(request, TRUE); ha_msg_add(forward_msg, F_CIB_DELEGATED, cib_our_uname); if(host != NULL) { crm_debug_2("Forwarding %s op to %s", op, host); send_ha_message(hb_conn, forward_msg, host, FALSE); } else { crm_debug_2("Forwarding %s op to master instance", op); send_ha_message(hb_conn, forward_msg, NULL, FALSE); } if(call_options & cib_discard_reply) { crm_debug_2("Client not interested in reply"); crm_msg_del(forward_msg); } else if(call_options & cib_sync_call) { /* keep track of the request so we can time it * out if required */ crm_debug_2("Registering delegated call from %s", cib_client->id); cib_client->delegated_calls = g_list_append( cib_client->delegated_calls, forward_msg); } else { crm_msg_del(forward_msg); } return; } if(process) { cib_num_local++; crm_debug_2("Performing local processing:" " op=%s origin=%s/%s,%s (update=%s)", cl_get_string(request, F_CIB_OPERATION), originator, cl_get_string(request, F_CIB_CLIENTID), cl_get_string(request, F_CIB_CALLID), update); rc = cib_process_command( request, &op_reply, &result_diff, privileged); crm_debug_2("Processing complete"); if(rc == cib_diff_resync || rc == cib_diff_failed || rc == cib_old_data) { crm_warn("%s operation failed: %s", crm_str(op), cib_error2string(rc)); } else if(rc != cib_ok) { cib_num_fail++; crm_err("%s operation failed: %s", crm_str(op), cib_error2string(rc)); crm_log_message_adv(LOG_DEBUG, "CIB[output]", op_reply); crm_log_message_adv(LOG_INFO, "Input message", request); } if(op_reply == NULL && (needs_reply || local_notify)) { crm_err("Unexpected NULL reply to message"); crm_log_message(LOG_ERR, request); needs_reply = FALSE; local_notify = FALSE; } } crm_debug_3("processing response cases"); if(local_notify) { /* send callback to originating child */ cib_client_t *client_obj = NULL; HA_Message *client_reply = NULL; enum cib_errors local_rc = cib_ok; crm_debug_2("Performing notification"); if(process == FALSE) { client_reply = cib_msg_copy(request, TRUE); } else { /* can we set this to FALSE? */ client_reply = cib_msg_copy(op_reply, TRUE); } client_id = cl_get_string(request, F_CIB_CLIENTID); if(client_id != NULL) { client_obj = g_hash_table_lookup( client_list, client_id); } else { crm_debug("No client to sent the response to." " F_CIB_CLIENTID not set."); } crm_debug_3("Sending callback to request originator"); if(client_obj == NULL) { local_rc = cib_reply_failed; } else { const char *client_id = client_obj->callback_id; crm_debug_2("Sending %ssync response to %s %s", (call_options & cib_sync_call)?"":"an a-", client_obj->name, from_peer?"(originator of delegated request)":""); if(call_options & cib_sync_call) { client_id = client_obj->id; } local_rc = send_via_callback_channel( client_reply, client_id); client_reply = NULL; } if(local_rc != cib_ok) { crm_warn("%sSync reply to %s failed: %s", (call_options & cib_sync_call)?"":"A-", client_obj?client_obj->name:"", cib_error2string(local_rc)); crm_log_message(LOG_DEBUG, op_reply); } } if(needs_reply == FALSE) { /* nothing more to do... * this was a non-originating slave update */ crm_debug_2("Completed slave update"); crm_msg_del(op_reply); free_xml(result_diff); return; } crm_debug_4("add the originator to message"); CRM_DEV_ASSERT(op_reply != NULL); CRM_DEV_ASSERT(cib_server_ops[call_type].modifies_cib == FALSE || result_diff != NULL || rc != cib_ok); /* from now on we are the server */ if(rc == cib_ok && result_diff != NULL && !(call_options & cib_inhibit_bcast)) { /* this (successful) call modified the CIB _and_ the * change needs to be broadcast... * send via HA to other nodes */ HA_Message *op_bcast = cib_msg_copy(request, FALSE); int diff_add_updates = 0; int diff_add_epoch = 0; int diff_add_admin_epoch = 0; int diff_del_updates = 0; int diff_del_epoch = 0; int diff_del_admin_epoch = 0; cib_diff_version_details( result_diff, &diff_add_admin_epoch, &diff_add_epoch, &diff_add_updates, &diff_del_admin_epoch, &diff_del_epoch, &diff_del_updates); crm_debug("Sending update diff %d.%d.%d -> %d.%d.%d", diff_del_admin_epoch,diff_del_epoch,diff_del_updates, diff_add_admin_epoch,diff_add_epoch,diff_add_updates); ha_msg_add(op_bcast, F_CIB_ISREPLY, originator); ha_msg_add(op_bcast, F_CIB_GLOBAL_UPDATE, XML_BOOLEAN_TRUE); ha_msg_mod(op_bcast, F_CIB_OPERATION, CIB_OP_APPLY_DIFF); add_message_xml(op_bcast, F_CIB_UPDATE_DIFF, result_diff); crm_log_message(LOG_DEBUG_3, op_bcast); send_ha_message(hb_conn, op_bcast, NULL, TRUE); crm_msg_del(op_bcast); } else if((call_options & cib_discard_reply) == 0) { crm_debug_2("Sending replies"); if(from_peer && originator != NULL) { /* send reply via HA to originating node */ crm_debug_2("Sending request result to originator only"); ha_msg_add(op_reply, F_CIB_ISREPLY, originator); send_ha_message(hb_conn, op_reply, originator, FALSE); } if(call_options & cib_inhibit_bcast ) { crm_debug("Request not broadcast: inhibited"); } if(cib_server_ops[call_type].modifies_cib == FALSE) { crm_debug_2("Request not broadcast: R/O call"); } if(rc != cib_ok) { crm_warn("Request not broadcast: call failed: %s", cib_error2string(rc)); } } crm_msg_del(op_reply); free_xml(result_diff); return; } static HA_Message * cib_construct_reply(HA_Message *request, HA_Message *output, int rc) { int lpc = 0; HA_Message *reply = NULL; const char *name = NULL; const char *value = NULL; const char *names[] = { F_CIB_OPERATION, F_CIB_CALLID, F_CIB_CLIENTID, F_CIB_CALLOPTS }; crm_debug_4("Creating a basic reply"); reply = ha_msg_new(8); ha_msg_add(reply, F_TYPE, T_CIB); for(lpc = 0; lpc < DIMOF(names); lpc++) { name = names[lpc]; value = cl_get_string(request, name); ha_msg_add(reply, name, value); } ha_msg_add_int(reply, F_CIB_RC, rc); if(output != NULL) { crm_debug_4("Attaching reply output"); add_message_xml(reply, F_CIB_CALLDATA, output); } return reply; } enum cib_errors cib_process_command(HA_Message *request, HA_Message **reply, crm_data_t **cib_diff, gboolean privileged) { crm_data_t *output = NULL; crm_data_t *input = NULL; crm_data_t *current_cib = the_cib; crm_data_t *result_cib = NULL; int call_type = 0; int call_options = 0; enum cib_errors rc = cib_ok; enum cib_errors rc2 = cib_ok; const char *op = NULL; const char *section = NULL; gboolean global_update = crm_is_true( cl_get_string(request, F_CIB_GLOBAL_UPDATE)); *reply = NULL; *cib_diff = NULL; /* Start processing the request... */ op = cl_get_string(request, F_CIB_OPERATION); ha_msg_value_int(request, F_CIB_CALLOPTS, &call_options); rc = cib_get_operation_id(request, &call_type); if(rc == cib_ok && cib_server_ops[call_type].needs_privileges && privileged == FALSE) { /* abort */ rc = cib_not_authorized; } if(rc == cib_ok && global_update == FALSE && cib_server_ops[call_type].needs_quorum && can_write(call_options) == FALSE) { rc = cib_no_quorum; } /* prevent NUMUPDATES from being incrimented - apply the change as-is */ if(global_update) { call_options |= cib_inhibit_bcast; call_options |= cib_force_diff; } rc2 = cib_server_ops[call_type].prepare(request, &input, §ion); if(rc == cib_ok) { rc = rc2; } if(rc != cib_ok) { crm_debug_2("Call setup failed"); } else if(cib_server_ops[call_type].modifies_cib) { if((call_options & cib_inhibit_notify) == 0) { cib_pre_notify( call_options, op, get_object_root(section, current_cib), input); } if(rc == cib_ok) { rc = cib_server_ops[call_type].fn( op, call_options, section, input, current_cib, &result_cib, &output); } if(rc == cib_ok) { CRM_DEV_ASSERT(result_cib != NULL); CRM_DEV_ASSERT(current_cib != result_cib); update_counters(__FILE__, __FUNCTION__, result_cib); if(do_id_check(result_cib, NULL)) { rc = cib_id_check; if(call_options & cib_force_diff) { crm_err("Global update introduces id collision!"); } } else { *cib_diff = diff_cib_object( current_cib, result_cib, FALSE); } } if(rc != cib_ok) { free_xml(result_cib); } else if(activateCibXml(result_cib, CIB_FILENAME) != 0){ crm_warn("Activation failed"); rc = cib_ACTIVATION; } if((call_options & cib_inhibit_notify) == 0) { const char *call_id = cl_get_string( request, F_CIB_CALLID); const char *client = cl_get_string( request, F_CIB_CLIENTNAME); cib_post_notify(call_options, op, input, rc, the_cib); cib_diff_notify(call_options, client, call_id, op, input, rc, *cib_diff); } log_xml_diff(rc==cib_ok?cib_diff_loglevel:cib_diff_loglevel+1, *cib_diff, "cib:diff"); } else { rc = cib_server_ops[call_type].fn( op, call_options, section, input, current_cib, &result_cib, &output); CRM_DEV_ASSERT(result_cib == NULL); free_xml(result_cib); } if((call_options & cib_discard_reply) == 0) { *reply = cib_construct_reply(request, output, rc); } if(call_type >= 0) { cib_server_ops[call_type].cleanup(op, &input, &output); } return rc; } int send_via_callback_channel(HA_Message *msg, const char *token) { cib_client_t *hash_client = NULL; GList *list_item = NULL; enum cib_errors rc = cib_ok; crm_debug_3("Delivering msg %p to client %s", msg, token); if(token == NULL) { crm_err("No client id token, cant send message"); if(rc == cib_ok) { rc = cib_missing; } } else { /* A client that left before we could reply is not really * _our_ error. Warn instead. */ hash_client = g_hash_table_lookup(client_list, token); if(hash_client == NULL) { crm_warn("Cannot find client for token %s", token); rc = cib_client_gone; } else if(hash_client->channel == NULL) { crm_err("Cannot find channel for client %s", token); rc = cib_client_corrupt; } else if(hash_client->channel->ops->get_chan_status( hash_client->channel) == IPC_DISCONNECT) { crm_warn("Client %s has disconnected", token); rc = cib_client_gone; cib_num_timeouts++; } } /* this is a more important error so overwriting rc is warrented */ if(msg == NULL) { crm_err("No message to send"); rc = cib_reply_failed; } if(rc == cib_ok) { list_item = g_list_find_custom( hash_client->delegated_calls, msg, cib_GCompareFunc); } if(list_item != NULL) { /* remove it - no need to time it out */ HA_Message *orig_msg = list_item->data; crm_debug_3("Removing msg from delegated list"); hash_client->delegated_calls = g_list_remove( hash_client->delegated_calls, orig_msg); CRM_DEV_ASSERT(orig_msg != msg); crm_msg_del(orig_msg); } if(rc == cib_ok) { crm_debug_3("Delivering reply to client %s", token); if(send_ipc_message(hash_client->channel, msg) == FALSE) { crm_warn("Delivery of reply to client %s/%s failed", hash_client->name, token); rc = cib_reply_failed; } } else { /* be consistent... * send_ipc_message() will free the message, so we should do * so manually if we dont try to send it. */ crm_msg_del(msg); } return rc; } gint cib_GCompareFunc(gconstpointer a, gconstpointer b) { const HA_Message *a_msg = a; const HA_Message *b_msg = b; int msg_a_id = 0; int msg_b_id = 0; ha_msg_value_int(a_msg, F_CIB_CALLID, &msg_a_id); ha_msg_value_int(b_msg, F_CIB_CALLID, &msg_b_id); if(msg_a_id == msg_b_id) { return 0; } else if(msg_a_id < msg_b_id) { return -1; } return 1; } gboolean cib_msg_timeout(gpointer data) { crm_debug_4("Checking if any clients have timed out messages"); /* g_hash_table_foreach(client_list, cib_GHFunc, NULL); */ return TRUE; } void cib_GHFunc(gpointer key, gpointer value, gpointer user_data) { int timeout = 0; /* 1 iteration == 10 seconds */ HA_Message *msg = NULL; HA_Message *reply = NULL; const char *host_to = NULL; cib_client_t *client = value; GListPtr list = client->delegated_calls; while(list != NULL) { msg = list->data; ha_msg_value_int(msg, F_CIB_TIMEOUT, &timeout); if(timeout <= 0) { list = list->next; continue; } else { int seen = 0; ha_msg_value_int(msg, F_CIB_SEENCOUNT, &seen); crm_debug_4("Timeout %d, seen %d", timeout, seen); if(seen < timeout) { crm_debug_4("Updating seen count for msg from client %s", client->id); seen += 10; ha_msg_mod_int(msg, F_CIB_SEENCOUNT, seen); list = list->next; continue; } } cib_num_timeouts++; host_to = cl_get_string(msg, F_CIB_HOST); crm_warn("Sending operation timeout msg to client %s", client->id); reply = ha_msg_new(4); ha_msg_add(reply, F_TYPE, T_CIB); ha_msg_add(reply, F_CIB_OPERATION, cl_get_string(msg, F_CIB_OPERATION)); ha_msg_add(reply, F_CIB_CALLID, cl_get_string(msg, F_CIB_CALLID)); if(host_to == NULL) { ha_msg_add_int(reply, F_CIB_RC, cib_master_timeout); } else { ha_msg_add_int(reply, F_CIB_RC, cib_remote_timeout); } send_ipc_message(client->channel, reply); list = list->next; client->delegated_calls = g_list_remove( client->delegated_calls, msg); crm_msg_del(msg); } } gboolean cib_process_disconnect(IPC_Channel *channel, cib_client_t *cib_client) { if (channel == NULL) { CRM_DEV_ASSERT(cib_client == NULL); } else if (cib_client == NULL) { crm_err("No client"); } else { CRM_DEV_ASSERT(channel->ch_status != IPC_CONNECT); crm_debug_2("Cleaning up after client disconnect: %s/%s/%s", crm_str(cib_client->name), cib_client->channel_name, cib_client->id); if(cib_client->id != NULL) { if(!g_hash_table_remove(client_list, cib_client->id)) { crm_err("Client %s not found in the hashtable", cib_client->name); } } } if(cib_shutdown_flag && g_hash_table_size(client_list) == 0) { crm_info("All clients disconnected..."); initiate_exit(); } return FALSE; } gboolean cib_ha_dispatch(IPC_Channel *channel, gpointer user_data) { ll_cluster_t *hb_cluster = (ll_cluster_t*)user_data; crm_debug_3("Invoked"); if(IPC_ISRCONN(channel)) { if(hb_cluster->llc_ops->msgready(hb_cluster) == 0) { crm_debug_2("no message ready yet"); } /* invoke the callbacks but dont block */ hb_cluster->llc_ops->rcvmsg(hb_cluster, 0); } if (channel->ch_status != IPC_CONNECT) { return FALSE; } return TRUE; } void cib_peer_callback(HA_Message * msg, void* private_data) { int call_type = 0; int call_options = 0; const char *originator = cl_get_string(msg, F_ORIG); const char *seq = cl_get_string(msg, F_SEQ); const char *op = cl_get_string(msg, F_CIB_OPERATION); crm_log_message_adv(LOG_MSG, "Peer[inbound]", msg); crm_debug_2("Peer %s message (%s) from %s", op, seq, originator); if(originator == NULL || safe_str_eq(originator, cib_our_uname)) { crm_debug_3("Discarding %s message from ourselves", op); return; } else if(ccm_membership == NULL) { crm_info("Discarding %s message (%s) from %s:" " membership not established", op, seq, originator); return; } else if(g_hash_table_lookup(ccm_membership, originator) == NULL) { crm_warn("Discarding %s message (%s) from %s:" " not in our membership", op, seq, originator); return; } else if(cib_get_operation_id(msg, &call_type) != cib_ok) { crm_debug("Discarding %s message (%s) from %s:" " Invalid operation", op, seq, originator); return; } crm_debug_2("Processing %s msg (%s) from %s",op, seq, originator); ha_msg_value_int(msg, F_CIB_CALLOPTS, &call_options); crm_debug_4("Retrieved call options: %d", call_options); if(cl_get_string(msg, F_CIB_CLIENTNAME) == NULL) { ha_msg_add(msg, F_CIB_CLIENTNAME, originator); } cib_process_request(msg, FALSE, TRUE, TRUE, NULL); return; } HA_Message * cib_msg_copy(HA_Message *msg, gboolean with_data) { int lpc = 0; const char *field = NULL; const char *value = NULL; const HA_Message *value_struct = NULL; static const char *field_list[] = { F_TYPE , F_CIB_CLIENTID , F_CIB_CALLOPTS , F_CIB_CALLID , F_CIB_OPERATION , F_CIB_ISREPLY , F_CIB_SECTION , F_CIB_HOST , F_CIB_RC , F_CIB_DELEGATED , F_CIB_OBJID , F_CIB_OBJTYPE , F_CIB_EXISTING , F_CIB_SEENCOUNT , F_CIB_TIMEOUT , F_CIB_CALLBACK_TOKEN , F_CIB_GLOBAL_UPDATE , F_CIB_CLIENTNAME , F_CIB_NOTIFY_TYPE , F_CIB_NOTIFY_ACTIVATE }; static const char *data_list[] = { F_CIB_CALLDATA , F_CIB_UPDATE , F_CIB_UPDATE_RESULT }; HA_Message *copy = NULL; copy = ha_msg_new(10); if(copy == NULL) { return copy; } for(lpc = 0; lpc < DIMOF(field_list); lpc++) { field = field_list[lpc]; value = cl_get_string(msg, field); if(value != NULL) { ha_msg_add(copy, field, value); } } for(lpc = 0; with_data && lpc < DIMOF(data_list); lpc++) { field = data_list[lpc]; value_struct = cl_get_struct(msg, field); if(value_struct != NULL) { add_message_xml(copy, field, value_struct); } } return copy; } enum cib_errors cib_get_operation_id(const HA_Message * msg, int *operation) { int lpc = 0; int max_msg_types = DIMOF(cib_server_ops); const char *op = cl_get_string(msg, F_CIB_OPERATION); for (lpc = 0; lpc < max_msg_types; lpc++) { if (safe_str_eq(op, cib_server_ops[lpc].operation)) { *operation = lpc; return cib_ok; } } crm_err("Operation %s is not valid", op); *operation = -1; return cib_operation; } void cib_client_status_callback(const char * node, const char * client, const char * status, void * private) { if(safe_str_eq(client, CRM_SYSTEM_CIB)) { crm_info("Status update: Client %s/%s now has status [%s]", node, client, status); g_hash_table_replace(peer_hash, crm_strdup(node), crm_strdup(status)); set_connected_peers(the_cib); } return; } extern oc_ev_t *cib_ev_token; gboolean ccm_manual_check(gpointer data) { int rc = 0; oc_ev_t *ccm_token = cib_ev_token; crm_debug("manual check"); rc = oc_ev_handle_event(ccm_token); if(0 == rc) { return TRUE; } else { crm_err("CCM connection appears to have failed: rc=%d.", rc); return FALSE; } } gboolean cib_ccm_dispatch(int fd, gpointer user_data) { int rc = 0; oc_ev_t *ccm_token = (oc_ev_t*)user_data; crm_debug_2("received callback"); rc = oc_ev_handle_event(ccm_token); if(0 == rc) { return TRUE; } else { crm_err("CCM connection appears to have failed: rc=%d.", rc); return FALSE; } } static void crm_ghash_clfree(gpointer data) { crm_free(data); } void cib_ccm_msg_callback( oc_ed_t event, void *cookie, size_t size, const void *data) { int instance = -1; gboolean update_id = FALSE; gboolean update_quorum = FALSE; const oc_ev_membership_t *membership = data; if(membership != NULL) { instance = membership->m_instance; } crm_debug("Process CCM event=%s (id=%d)", ccm_event_name(event), instance); switch(event) { case OC_EV_MS_NEW_MEMBERSHIP: case OC_EV_MS_INVALID: update_id = TRUE; update_quorum = TRUE; break; case OC_EV_MS_PRIMARY_RESTORED: update_id = TRUE; break; case OC_EV_MS_NOT_PRIMARY: crm_debug_2("Ignoring transitional CCM event: %s", ccm_event_name(event)); break; case OC_EV_MS_EVICTED: crm_err("Evicted from CCM: %s", ccm_event_name(event)); update_quorum = TRUE; break; default: crm_err("Unknown CCM event: %d", event); } if(update_id) { CRM_DEV_ASSERT(membership != NULL); if(crm_assert_failed) { return; } if(ccm_transition_id != NULL) { crm_free(ccm_transition_id); ccm_transition_id = NULL; } ccm_transition_id = crm_itoa(instance); set_transition(the_cib); } if(update_quorum) { unsigned int members = 0; int offset = 0; unsigned int lpc = 0; cib_have_quorum = ccm_have_quorum(event); if(cib_have_quorum) { crm_xml_add( the_cib,XML_ATTR_HAVE_QUORUM,XML_BOOLEAN_TRUE); } else { crm_xml_add( the_cib,XML_ATTR_HAVE_QUORUM,XML_BOOLEAN_FALSE); } crm_debug("Quorum %s after event=%s (id=%d)", cib_have_quorum?"(re)attained":"lost", ccm_event_name(event), instance); if(ccm_membership == NULL) { ccm_membership = g_hash_table_new_full( g_str_hash, g_str_equal, crm_ghash_clfree, NULL); } + if(membership != NULL && membership->m_n_out != 0) { + members = membership->m_n_out; + offset = membership->m_out_idx; + for(lpc = 0; lpc < members; lpc++) { + oc_node_t a_node = membership->m_array[lpc+offset]; + crm_info("LOST: %s", a_node.node_uname); + g_hash_table_remove( + ccm_membership, a_node.node_uname); + } + } if(membership != NULL && membership->m_n_in != 0) { members = membership->m_n_in; offset = membership->m_in_idx; for(lpc = 0; lpc < members; lpc++) { oc_node_t a_node = membership->m_array[lpc+offset]; char *uname = crm_strdup(a_node.node_uname); crm_info("New peer: %s", uname); g_hash_table_replace( ccm_membership, uname, uname); } } - if(membership != NULL && membership->m_n_out != 0) { - members = membership->m_n_out; - offset = membership->m_out_idx; - for(lpc = 0; lpc < members; lpc++) { - oc_node_t a_node = membership->m_array[lpc+offset]; - crm_info("LOST: %s", a_node.node_uname); - g_hash_table_remove( - ccm_membership, a_node.node_uname); - } - } } oc_ev_callback_done(cookie); set_connected_peers(the_cib); return; } gboolean can_write(int flags) { if(cib_have_quorum) { return TRUE; } else if((flags & cib_quorum_override) != 0) { return TRUE; } return FALSE; } static gboolean cib_force_exit(gpointer data) { crm_notice("Forcing exit!"); terminate_ha_connection(__FUNCTION__); return FALSE; } void initiate_exit(void) { int active = 0; HA_Message *leaving = NULL; g_hash_table_foreach(peer_hash, GHFunc_count_peers, &active); if(active < 2) { terminate_ha_connection(__FUNCTION__); return; } crm_info("Sending disconnect notification to %d peers...", active); leaving = ha_msg_new(3); ha_msg_add(leaving, F_TYPE, "cib"); ha_msg_add(leaving, F_CIB_OPERATION, "cib_shutdown_req"); send_ha_message(hb_conn, leaving, NULL, TRUE); crm_msg_del(leaving); Gmain_timeout_add(crm_get_msec("5s"), cib_force_exit, NULL); } void terminate_ha_connection(const char *caller) { if(hb_conn != NULL) { crm_info("%s: Disconnecting heartbeat", caller); hb_conn->llc_ops->signoff(hb_conn, FALSE); } else { crm_err("%s: No heartbeat connection", caller); exit(LSB_EXIT_OK); } } diff --git a/crm/cib/messages.c b/crm/cib/messages.c index fa8ea5052d..7187879749 100644 --- a/crm/cib/messages.c +++ b/crm/cib/messages.c @@ -1,931 +1,931 @@ -/* $Id: messages.c,v 1.74 2006/04/10 13:01:21 andrew Exp $ */ +/* $Id: messages.c,v 1.75 2006/04/20 15:43:23 andrew Exp $ */ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define MAX_DIFF_RETRY 5 extern const char *cib_our_uname; extern gboolean syncd_once; enum cib_errors revision_check(crm_data_t *cib_update, crm_data_t *cib_copy, int flags); int get_revision(crm_data_t *xml_obj, int cur_revision); #define CIB_DIFF_LEVEL LOG_DEBUG unsigned int cib_diff_loglevel = CIB_DIFF_LEVEL+1; enum cib_errors updateList( crm_data_t *local_cib, crm_data_t *update_command, crm_data_t *failed, int operation, const char *section); enum cib_errors replace_section( const char *section, crm_data_t *tmpCib, crm_data_t *command); gboolean check_generation(crm_data_t *newCib, crm_data_t *oldCib); gboolean update_results( crm_data_t *failed, crm_data_t *target, int operation, int return_code); enum cib_errors cib_update_counter( crm_data_t *xml_obj, const char *field, gboolean reset); enum cib_errors sync_our_cib(HA_Message *request, gboolean all); extern ll_cluster_t *hb_conn; extern HA_Message *cib_msg_copy(const HA_Message *msg, gboolean with_data); extern gboolean cib_shutdown_flag; extern void terminate_ha_connection(const char *caller); enum cib_errors cib_process_shutdown_req( const char *op, int options, const char *section, crm_data_t *input, crm_data_t *existing_cib, crm_data_t **result_cib, crm_data_t **answer) { enum cib_errors result = cib_ok; const char *host = cl_get_string(input, F_ORIG); *answer = NULL; if(cl_get_string(input, F_CIB_ISREPLY) == NULL) { crm_info("Shutdown REQ from %s", host); return cib_ok; } else if(cib_shutdown_flag) { crm_info("Shutdown ACK from %s", host); terminate_ha_connection(__FUNCTION__); return cib_ok; } else { crm_err("Shutdown ACK from %s - not shutting down",host); result = cib_unknown; } return result; } enum cib_errors cib_process_default( const char *op, int options, const char *section, crm_data_t *input, crm_data_t *existing_cib, crm_data_t **result_cib, crm_data_t **answer) { enum cib_errors result = cib_ok; crm_debug_2("Processing \"%s\" event", op); *answer = NULL; if(op == NULL) { result = cib_operation; crm_err("No operation specified"); } else if(strcmp(CRM_OP_NOOP, op) == 0) { ; } else { result = cib_NOTSUPPORTED; crm_err("Action [%s] is not supported by the CIB", op); } return result; } enum cib_errors cib_process_quit( const char *op, int options, const char *section, crm_data_t *input, crm_data_t *existing_cib, crm_data_t **result_cib, crm_data_t **answer) { enum cib_errors result = cib_ok; crm_debug_2("Processing \"%s\" event", op); crm_warn("The CRMd has asked us to exit... complying"); exit(0); return result; } enum cib_errors cib_process_readwrite( const char *op, int options, const char *section, crm_data_t *input, crm_data_t *existing_cib, crm_data_t **result_cib, crm_data_t **answer) { enum cib_errors result = cib_ok; crm_debug_2("Processing \"%s\" event", op); if(safe_str_eq(op, CIB_OP_ISMASTER)) { if(cib_is_master == TRUE) { result = cib_ok; } else { result = cib_not_master; } return result; } if(safe_str_eq(op, CIB_OP_MASTER)) { cib_diff_loglevel = CIB_DIFF_LEVEL; if(cib_is_master == FALSE) { crm_info("We are now in R/W mode"); cib_is_master = TRUE; syncd_once = TRUE; } else { crm_debug("We are still in R/W mode"); } } else if(cib_is_master) { crm_info("We are now in R/O mode"); cib_diff_loglevel = CIB_DIFF_LEVEL+1; cib_is_master = FALSE; } return result; } enum cib_errors cib_process_ping( const char *op, int options, const char *section, crm_data_t *input, crm_data_t *existing_cib, crm_data_t **result_cib, crm_data_t **answer) { enum cib_errors result = cib_ok; crm_debug_2("Processing \"%s\" event", op); *answer = createPingAnswerFragment(CRM_SYSTEM_CIB, "ok"); return result; } enum cib_errors cib_process_query( const char *op, int options, const char *section, crm_data_t *input, crm_data_t *existing_cib, crm_data_t **result_cib, crm_data_t **answer) { crm_data_t *obj_root = NULL; enum cib_errors result = cib_ok; crm_debug_2("Processing \"%s\" event for section=%s", op, crm_str(section)); *answer = NULL; if (safe_str_eq(XML_CIB_TAG_SECTION_ALL, section)) { section = NULL; } obj_root = get_object_root(section, existing_cib); if(obj_root == NULL) { result = cib_NOTEXISTS; } else { *answer = obj_root; } if(result == cib_ok && *answer == NULL) { crm_err("Error creating query response"); result = cib_output_data; } return result; } enum cib_errors cib_process_erase( const char *op, int options, const char *section, crm_data_t *input, crm_data_t *existing_cib, crm_data_t **result_cib, crm_data_t **answer) { crm_data_t *local_diff = NULL; enum cib_errors result = cib_ok; crm_debug_2("Processing \"%s\" event", op); *answer = NULL; *result_cib = createEmptyCib(); copy_in_properties(*result_cib, existing_cib); cib_update_counter(*result_cib, XML_ATTR_GENERATION, FALSE); cib_update_counter(*result_cib, XML_ATTR_NUMUPDATES, TRUE); local_diff = diff_cib_object(existing_cib, *result_cib, FALSE); cib_replace_notify(*result_cib, result, local_diff); free_xml(local_diff); return result; } enum cib_errors cib_process_bump( const char *op, int options, const char *section, crm_data_t *input, crm_data_t *existing_cib, crm_data_t **result_cib, crm_data_t **answer) { enum cib_errors result = cib_ok; crm_debug_2("Processing \"%s\" event for epoch=%s", op, crm_str(crm_element_value(the_cib, XML_ATTR_GENERATION))); *answer = NULL; *result_cib = copy_xml(the_cib); cib_update_counter(*result_cib, XML_ATTR_GENERATION, FALSE); cib_update_counter(*result_cib, XML_ATTR_NUMUPDATES, FALSE); return result; } enum cib_errors cib_process_sync( const char *op, int options, const char *section, crm_data_t *input, crm_data_t *existing_cib, crm_data_t **result_cib, crm_data_t **answer) { return sync_our_cib(input, TRUE); } enum cib_errors cib_process_sync_one( const char *op, int options, const char *section, crm_data_t *input, crm_data_t *existing_cib, crm_data_t **result_cib, crm_data_t **answer) { return sync_our_cib(input, FALSE); } enum cib_errors cib_update_counter(crm_data_t *xml_obj, const char *field, gboolean reset) { char *new_value = NULL; char *old_value = NULL; int int_value = -1; if(reset == FALSE && crm_element_value(xml_obj, field) != NULL) { old_value = crm_element_value_copy(xml_obj, field); } if(old_value != NULL) { crm_malloc0(new_value, 128*(sizeof(char))); int_value = atoi(old_value); sprintf(new_value, "%d", ++int_value); } else { new_value = crm_strdup("1"); } crm_debug_4("%s %d(%s)->%s", field, int_value, crm_str(old_value), crm_str(new_value)); crm_xml_add(xml_obj, field, new_value); crm_free(new_value); crm_free(old_value); return cib_ok; } int sync_in_progress = 0; enum cib_errors cib_process_diff( const char *op, int options, const char *section, crm_data_t *input, crm_data_t *existing_cib, crm_data_t **result_cib, crm_data_t **answer) { unsigned int log_level = LOG_DEBUG; const char *value = NULL; const char *reason = NULL; gboolean apply_diff = TRUE; gboolean do_resync = FALSE; enum cib_errors result = cib_ok; int this_updates = 0; int this_epoch = 0; int this_admin_epoch = 0; int diff_add_updates = 0; int diff_add_epoch = 0; int diff_add_admin_epoch = 0; int diff_del_updates = 0; int diff_del_epoch = 0; int diff_del_admin_epoch = 0; crm_debug_2("Processing \"%s\" event", op); if(cib_is_master) { /* the master is never waiting for a resync */ sync_in_progress = 0; } cib_diff_version_details( input, &diff_add_admin_epoch, &diff_add_epoch, &diff_add_updates, &diff_del_admin_epoch, &diff_del_epoch, &diff_del_updates); if(sync_in_progress > MAX_DIFF_RETRY) { /* request another full-sync, * the last request may have been lost */ sync_in_progress = 0; } if(sync_in_progress) { sync_in_progress++; crm_warn("Not applying diff %d.%d.%d -> %d.%d.%d (sync in progress)", diff_del_admin_epoch,diff_del_epoch,diff_del_updates, diff_add_admin_epoch,diff_add_epoch,diff_add_updates); return cib_diff_resync; } value = crm_element_value(existing_cib, XML_ATTR_GENERATION); this_epoch = atoi(value?value:"0"); value = crm_element_value(existing_cib, XML_ATTR_NUMUPDATES); this_updates = atoi(value?value:"0"); value = crm_element_value(existing_cib, XML_ATTR_GENERATION_ADMIN); this_admin_epoch = atoi(value?value:"0"); if(diff_del_admin_epoch == diff_add_admin_epoch && diff_del_epoch == diff_add_epoch && diff_del_updates == diff_add_updates) { apply_diff = FALSE; log_level = LOG_ERR; reason = "+ and - versions in the diff did not change"; log_cib_diff(LOG_ERR, input, __FUNCTION__); } if(apply_diff && diff_del_admin_epoch > this_admin_epoch) { do_resync = TRUE; apply_diff = FALSE; log_level = LOG_INFO; reason = "current \""XML_ATTR_GENERATION_ADMIN"\" is less than required"; } else if(apply_diff && diff_del_admin_epoch < this_admin_epoch) { apply_diff = FALSE; log_level = LOG_WARNING; reason = "current \""XML_ATTR_GENERATION_ADMIN"\" is greater than required"; } if(apply_diff && diff_del_epoch > this_epoch) { do_resync = TRUE; apply_diff = FALSE; log_level = LOG_INFO; reason = "current \""XML_ATTR_GENERATION"\" is less than required"; } else if(apply_diff && diff_del_epoch < this_epoch) { apply_diff = FALSE; log_level = LOG_WARNING; reason = "current \""XML_ATTR_GENERATION"\" is greater than required"; } if(apply_diff && diff_del_updates > this_updates) { do_resync = TRUE; apply_diff = FALSE; log_level = LOG_INFO; reason = "current \""XML_ATTR_NUMUPDATES"\" is less than required"; } else if(apply_diff && diff_del_updates < this_updates) { apply_diff = FALSE; log_level = LOG_WARNING; reason = "current \""XML_ATTR_NUMUPDATES"\" is greater than required"; } if(apply_diff && apply_cib_diff(existing_cib, input, result_cib) == FALSE) { log_level = LOG_WARNING; reason = "Failed application of an update diff"; if(options & cib_force_diff && cib_is_master == FALSE) { log_level = LOG_INFO; reason = "Failed application of a global update. Requesting full refresh."; do_resync = TRUE; } else if(options & cib_force_diff) { reason = "Failed application of a global update. Not requesting full refresh."; } } if(reason != NULL) { crm_log_maybe( log_level, "Diff %d.%d.%d -> %d.%d.%d not applied to %d.%d.%d: %s", diff_del_admin_epoch,diff_del_epoch,diff_del_updates, diff_add_admin_epoch,diff_add_epoch,diff_add_updates, this_admin_epoch,this_epoch,this_updates, reason); result = cib_diff_failed; } else if(apply_diff) { crm_debug_2("Diff %d.%d.%d -> %d.%d.%d was applied", diff_del_admin_epoch,diff_del_epoch,diff_del_updates, diff_add_admin_epoch,diff_add_epoch,diff_add_updates); } if(do_resync && cib_is_master == FALSE) { HA_Message *sync_me = ha_msg_new(3); free_xml(*result_cib); *result_cib = NULL; result = cib_diff_resync; crm_info("Requesting re-sync from peer: %s", reason); sync_in_progress++; ha_msg_add(sync_me, F_TYPE, "cib"); ha_msg_add(sync_me, F_CIB_OPERATION, CIB_OP_SYNC_ONE); ha_msg_add(sync_me, F_CIB_DELEGATED, cib_our_uname); if(send_ha_message(hb_conn, sync_me, NULL, FALSE) == FALSE) { result = cib_not_connected; } ha_msg_del(sync_me); } else if(do_resync) { crm_err("Not resyncing in master mode"); } return result; } enum cib_errors cib_process_replace( const char *op, int options, const char *section, crm_data_t *input, crm_data_t *existing_cib, crm_data_t **result_cib, crm_data_t **answer) { gboolean send_notify = FALSE; gboolean verbose = FALSE; enum cib_errors result = cib_ok; crm_debug_2("Processing \"%s\" event for section=%s", op, crm_str(section)); *answer = NULL; if (options & cib_verbose) { verbose = TRUE; } if(safe_str_eq(XML_CIB_TAG_SECTION_ALL, section)) { section = NULL; } if (input == NULL) { result = cib_NOOBJECT; } else if(section == NULL) { int updates = 0; int epoch = 0; int admin_epoch = 0; int replace_updates = 0; int replace_epoch = 0; int replace_admin_epoch = 0; const char *reason = NULL; cib_version_details( existing_cib, &admin_epoch, &epoch, &updates); cib_version_details(input, &replace_admin_epoch, &replace_epoch, &replace_updates); if(replace_admin_epoch < admin_epoch) { reason = XML_ATTR_GENERATION_ADMIN; } else if(replace_admin_epoch > admin_epoch) { /* no more checks */ } else if(replace_epoch < epoch) { reason = XML_ATTR_GENERATION; } else if(replace_epoch > epoch) { /* no more checks */ } else if(replace_updates < updates) { reason = XML_ATTR_NUMUPDATES; } if(reason != NULL) { crm_warn("Replacement %d.%d.%d not applied to %d.%d.%d:" " current %s is greater than the replacement", replace_admin_epoch, replace_epoch, replace_updates, admin_epoch, epoch, updates, reason); result = cib_old_data; } sync_in_progress = 0; *result_cib = copy_xml(input); send_notify = TRUE; } else { *result_cib = copy_xml(existing_cib); result = replace_section(section, *result_cib, input); if(safe_str_eq(section, XML_CIB_TAG_STATUS)) { send_notify = TRUE; } } if(send_notify) { crm_data_t *local_diff = NULL; local_diff = diff_cib_object(existing_cib, *result_cib, FALSE); cib_replace_notify(*result_cib, result, local_diff); free_xml(local_diff); } if(result == cib_ok && section != NULL) { cib_update_counter(*result_cib, XML_ATTR_NUMUPDATES, FALSE); } return result; } enum cib_errors cib_process_delete( const char *op, int options, const char *section, crm_data_t *input, crm_data_t *existing_cib, crm_data_t **result_cib, crm_data_t **answer) { crm_data_t *obj_root = NULL; crm_debug_2("Processing \"%s\" event", op); if(input == NULL) { crm_err("Cannot perform modification with no data"); return cib_NOOBJECT; } *result_cib = copy_xml(existing_cib); obj_root = get_object_root(section, *result_cib); crm_validate_data(input); crm_validate_data(*result_cib); if(delete_xml_child(NULL, obj_root, input) == FALSE) { crm_debug_2("No matching object to delete"); } cib_update_counter(*result_cib, XML_ATTR_NUMUPDATES, FALSE); return cib_ok; } enum cib_errors cib_process_modify( const char *op, int options, const char *section, crm_data_t *input, crm_data_t *existing_cib, crm_data_t **result_cib, crm_data_t **answer) { crm_data_t *obj_root = NULL; crm_debug_2("Processing \"%s\" event", op); if(input == NULL) { crm_err("Cannot perform modification with no data"); return cib_NOOBJECT; } *result_cib = copy_xml(existing_cib); obj_root = get_object_root(section, *result_cib); crm_validate_data(input); crm_validate_data(*result_cib); if(update_xml_child(obj_root, input) == FALSE) { return cib_NOTEXISTS; } cib_update_counter(*result_cib, XML_ATTR_NUMUPDATES, FALSE); return cib_ok; } enum cib_errors cib_process_change( const char *op, int options, const char *section, crm_data_t *input, crm_data_t *existing_cib, crm_data_t **result_cib, crm_data_t **answer) { gboolean verbose = FALSE; crm_data_t *failed = NULL; enum cib_errors result = cib_ok; int cib_update_op = CIB_UPDATE_OP_NONE; crm_debug_2("Processing \"%s\" event for section=%s", op, crm_str(section)); failed = create_xml_node(NULL, XML_TAG_FAILED); if (strcmp(CIB_OP_CREATE, op) == 0) { cib_update_op = CIB_UPDATE_OP_ADD; } else if (strcmp(CIB_OP_UPDATE, op) == 0) { cib_update_op = CIB_UPDATE_OP_MODIFY; } else if (strcmp(CIB_OP_DELETE_ALT, op) == 0) { cib_update_op = CIB_UPDATE_OP_DELETE; } else { crm_err("Incorrect request handler invoked for \"%s\" op", crm_str(op)); return cib_operation; } result = cib_ok; if (options & cib_verbose) { verbose = TRUE; } if(safe_str_eq(XML_CIB_TAG_SECTION_ALL, section)) { section = NULL; } if(input == NULL) { crm_err("Cannot perform modification with no data"); return cib_NOOBJECT; } *result_cib = copy_xml(existing_cib); crm_validate_data(input); crm_validate_data(*result_cib); /* make changes to a temp copy then activate */ if(section == NULL) { int lpc = 0; const char *type = NULL; crm_data_t *sub_input = NULL; /* order is no longer important here */ const char *type_list[] = { XML_CIB_TAG_NODES, XML_CIB_TAG_CONSTRAINTS, XML_CIB_TAG_RESOURCES, XML_CIB_TAG_STATUS, XML_CIB_TAG_CRMCONFIG }; copy_in_properties(*result_cib, input); for(lpc = 0; lpc < DIMOF(type_list); lpc++) { type = type_list[lpc]; if(result == cib_ok) { crm_debug_2("Processing section=%s", type); sub_input = get_object_root(type, input); result = updateList( *result_cib, sub_input, failed, cib_update_op, type); } } } else { result = updateList( *result_cib, input, failed, cib_update_op, section); } if(result == cib_ok && !(options & cib_inhibit_bcast)) { cib_update_counter(*result_cib, XML_ATTR_NUMUPDATES, FALSE); } if (result != cib_ok || xml_has_children(failed)) { if(result == cib_ok) { result = cib_unknown; } crm_log_xml_err(failed, "CIB Update failures"); *answer = failed; } return result; } enum cib_errors replace_section( const char *section, crm_data_t *tmpCib, crm_data_t *new_section) { crm_data_t *old_section = NULL; /* find the old and new versions of the section */ old_section = get_object_root(section, tmpCib); if(old_section == NULL) { crm_err("The CIB is corrupt, cannot replace missing section %s", section); return cib_NOSECTION; } else if(new_section == NULL) { crm_err("The CIB is corrupt, cannot set section %s to nothing", section); return cib_NOSECTION; } xml_child_iter( old_section, a_child, free_xml_from_parent(old_section, a_child); ); copy_in_properties(old_section, new_section); xml_child_iter( new_section, a_child, add_node_copy(old_section, a_child); ); return cib_ok; } -#define cib_update_xml_macro(parent, xml_update) \ +#define cib_update_xml_macro(parent, xml_update) \ if(operation == CIB_UPDATE_OP_DELETE) { \ rc = delete_cib_object(parent, xml_update); \ - update_results(failed, xml_update, operation, rc); \ + update_results(failed, xml_update, operation, rc); \ \ } else if(operation == CIB_UPDATE_OP_MODIFY) { \ rc = update_cib_object(parent, xml_update); \ - update_results(failed, xml_update, operation, rc); \ + update_results(failed, xml_update, operation, rc); \ \ } else { \ rc = add_cib_object(parent, xml_update); \ - update_results(failed, xml_update, operation, rc); \ + update_results(failed, xml_update, operation, rc); \ } enum cib_errors updateList(crm_data_t *local_cib, crm_data_t *xml_section, crm_data_t *failed, int operation, const char *section) { int rc = cib_ok; crm_data_t *this_section = get_object_root(section, local_cib); if (section == NULL || xml_section == NULL) { crm_err("Section %s not found in message." " CIB update is corrupt, ignoring.", crm_str(section)); return cib_NOSECTION; } if((CIB_UPDATE_OP_NONE > operation) || (operation > CIB_UPDATE_OP_MAX)){ crm_err("Invalid operation on section %s", crm_str(section)); return cib_operation; } if(safe_str_eq(crm_element_name(xml_section), section)) { xml_child_iter(xml_section, a_child, rc = cib_ok; cib_update_xml_macro(this_section, a_child); ); } else { cib_update_xml_macro(this_section, xml_section); } if(rc == cib_ok && xml_has_children(failed)) { rc = cib_unknown; } return rc; } gboolean check_generation(crm_data_t *newCib, crm_data_t *oldCib) { if(cib_compare_generation(newCib, oldCib) >= 0) { return TRUE; } crm_warn("Generation from update is older than the existing one"); return FALSE; } gboolean update_results( crm_data_t *failed, crm_data_t *target, int operation, int return_code) { gboolean was_error = FALSE; const char *error_msg = NULL; const char *operation_msg = NULL; crm_data_t *xml_node = NULL; if (return_code != cib_ok) { operation_msg = cib_op2string(operation); error_msg = cib_error2string(return_code); xml_node = create_xml_node(failed, XML_FAIL_TAG_CIB); was_error = TRUE; add_node_copy(xml_node, target); crm_xml_add(xml_node, XML_FAILCIB_ATTR_ID, ID(target)); crm_xml_add(xml_node, XML_FAILCIB_ATTR_OBJTYPE, TYPE(target)); crm_xml_add(xml_node, XML_FAILCIB_ATTR_OP, operation_msg); crm_xml_add(xml_node, XML_FAILCIB_ATTR_REASON, error_msg); crm_warn("Action %s failed: %s (cde=%d)", operation_msg, error_msg, return_code); } return was_error; } enum cib_errors revision_check(crm_data_t *cib_update, crm_data_t *cib_copy, int flags) { int cmp = 0; enum cib_errors rc = cib_ok; char *new_revision = NULL; const char *cur_revision = crm_element_value( cib_copy, XML_ATTR_CIB_REVISION); crm_validate_data(cib_update); crm_validate_data(cib_copy); if(crm_element_value(cib_update, XML_ATTR_CIB_REVISION) == NULL) { return cib_ok; } new_revision = crm_element_value_copy(cib_update,XML_ATTR_CIB_REVISION); cmp = compare_version(new_revision, CIB_FEATURE_SET); if(cmp > 0) { CRM_DEV_ASSERT(cib_is_master == FALSE); CRM_DEV_ASSERT((flags & cib_scope_local) == 0); if(cib_is_master) { crm_err("Update uses an unsupported tag/feature:" " %s vs %s", new_revision,CIB_FEATURE_SET); rc = cib_revision_unsupported; } else if(flags & cib_scope_local) { /* an admin has forced a local change using a tag we * dont understand... ERROR */ crm_err("Local update uses an unsupported tag/feature:" " %s vs %s", new_revision,CIB_FEATURE_SET); rc = cib_revision_unsupported; } } else if(cur_revision == NULL) { crm_info("Updating CIB revision to %s", new_revision); crm_xml_add(cib_copy, XML_ATTR_CIB_REVISION, new_revision); } else { /* make sure we end up with the right value in the end */ crm_xml_add(cib_update, XML_ATTR_CIB_REVISION, cur_revision); } crm_free(new_revision); return rc; } enum cib_errors sync_our_cib(HA_Message *request, gboolean all) { enum cib_errors result = cib_ok; const char *host = cl_get_string(request, F_ORIG); const char *op = cl_get_string(request, F_CIB_OPERATION); HA_Message *replace_request = cib_msg_copy(request, FALSE); CRM_CHECK(the_cib != NULL, ;); CRM_CHECK(replace_request != NULL, ;); crm_info("Syncing CIB to %s", all?"all peers":host); if(all == FALSE && host == NULL) { crm_log_message(LOG_ERR, request); } /* remove the "all == FALSE" condition * * sync_from was failing, the local client wasnt being notified * because it didnt know it was a reply * setting this does not prevent the other nodes from applying it * if all == TRUE */ if(host != NULL) { ha_msg_add(replace_request, F_CIB_ISREPLY, host); } ha_msg_mod(replace_request, F_CIB_OPERATION, CIB_OP_REPLACE); ha_msg_add(replace_request, "original_"F_CIB_OPERATION, op); ha_msg_add(replace_request, F_CIB_GLOBAL_UPDATE, XML_BOOLEAN_TRUE); add_message_xml(replace_request, F_CIB_CALLDATA, the_cib); if(send_ha_message(hb_conn, replace_request, all?NULL:host, FALSE) == FALSE) { result = cib_not_connected; } ha_msg_del(replace_request); return result; } diff --git a/crm/crmd/callbacks.c b/crm/crmd/callbacks.c index 61ec1d187f..597b847cbe 100644 --- a/crm/crmd/callbacks.c +++ b/crm/crmd/callbacks.c @@ -1,662 +1,661 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include GHashTable *crmd_peer_state = NULL; crm_data_t *find_xml_in_hamessage(const HA_Message * msg); void crmd_ha_connection_destroy(gpointer user_data); /* From join_dc... */ extern gboolean check_join_state( enum crmd_fsa_state cur_state, const char *source); /* #define MAX_EMPTY_CALLBACKS 20 */ /* int empty_callbacks = 0; */ #define trigger_fsa(source) crm_debug_3("Triggering FSA: %s", __FUNCTION__); \ G_main_set_trigger(source); gboolean crmd_ha_msg_dispatch(IPC_Channel *channel, gpointer user_data) { gboolean stay_connected = TRUE; crm_debug_3("Invoked"); if(fsa_cluster_conn != NULL && IPC_ISRCONN(channel)) { if(fsa_cluster_conn->llc_ops->msgready(fsa_cluster_conn) == 0) { crm_debug_2("no message ready yet"); } /* invoke the callbacks but dont block */ fsa_cluster_conn->llc_ops->rcvmsg(fsa_cluster_conn, 0); } if (fsa_cluster_conn == NULL || channel->ch_status != IPC_CONNECT) { if(is_set(fsa_input_register, R_HA_DISCONNECTED) == FALSE) { crm_crit("Lost connection to heartbeat service."); } else { crm_info("Lost connection to heartbeat service."); } trigger_fsa(fsa_source); stay_connected = FALSE; } return stay_connected; } void crmd_ha_connection_destroy(gpointer user_data) { crm_debug_3("Invoked"); if(is_set(fsa_input_register, R_HA_DISCONNECTED)) { /* we signed out, so this is expected */ crm_info("Heartbeat disconnection complete"); return; } crm_crit("Lost connection to heartbeat service!"); register_fsa_input(C_HA_DISCONNECT, I_ERROR, NULL); trigger_fsa(fsa_source); } void crmd_ha_msg_callback(HA_Message * msg, void* private_data) { int level = LOG_DEBUG; ha_msg_input_t *new_input = NULL; oc_node_t *from_node = NULL; const char *from = ha_msg_value(msg, F_ORIG); const char *seq = ha_msg_value(msg, F_SEQ); const char *op = ha_msg_value(msg, F_CRM_TASK); const char *sys_to = ha_msg_value(msg, F_CRM_SYS_TO); const char *sys_from = ha_msg_value(msg, F_CRM_SYS_FROM); CRM_DEV_ASSERT(from != NULL); crm_debug_2("HA[inbound]: %s from %s", op, from); if(fsa_membership_copy == NULL) { crm_debug("Ignoring HA messages until we are" " connected to the CCM (%s op from %s)", op, from); crm_log_message_adv( LOG_MSG, "HA[inbound]: Ignore (No CCM)", msg); return; } from_node = g_hash_table_lookup(fsa_membership_copy->members, from); if(from_node == NULL) { if(safe_str_eq(op, CRM_OP_VOTE)) { level = LOG_WARNING; } else if(AM_I_DC && safe_str_eq(op, CRM_OP_JOIN_ANNOUNCE)) { level = LOG_WARNING; } else if(safe_str_eq(sys_from, CRM_SYSTEM_DC)) { level = LOG_WARNING; } do_crm_log(level, __FILE__, __FUNCTION__, "Ignoring HA message (op=%s) from %s: not in our" " membership list (size=%d)", op, from, g_hash_table_size(fsa_membership_copy->members)); crm_log_message_adv(LOG_MSG, "HA[inbound]: CCM Discard", msg); } else if(safe_str_eq(sys_to, CRM_SYSTEM_DC) && AM_I_DC == FALSE) { crm_debug_2("Ignoring message for the DC [F_SEQ=%s]", seq); return; } else if(safe_str_eq(sys_from, CRM_SYSTEM_DC)) { if(AM_I_DC && safe_str_neq(from, fsa_our_uname)) { crm_err("Another DC detected: %s (op=%s)", from, op); /* make sure the election happens NOW */ level = LOG_WARNING; if(fsa_state != S_ELECTION) { new_input = new_ha_msg_input(msg); register_fsa_error_adv( C_FSA_INTERNAL, I_ELECTION, NULL, new_input, __FUNCTION__); } #if 0 /* still thinking about this one... * could create a timing issue if we dont notice the * election before a new DC is elected. */ } else if(fsa_our_dc != NULL && safe_str_neq(from,fsa_our_dc)){ crm_warn("Ignoring message from wrong DC: %s vs. %s ", from, fsa_our_dc); return; #endif } else { crm_debug_2("Processing DC message from %s [F_SEQ=%s]", from, seq); } } if(new_input == NULL) { int msg_id = -1; crm_log_message_adv(LOG_MSG, "HA[inbound]", msg); new_input = new_ha_msg_input(msg); msg_id = register_fsa_input(C_HA_MESSAGE, I_ROUTER, new_input); crm_debug_2("Submitted %s from %s for processing (job=%d)", op, from, msg_id); } #if 0 if(ha_msg_value(msg, XML_ATTR_REFERENCE) == NULL) { ha_msg_add(new_input->msg, XML_ATTR_REFERENCE, seq); } #endif delete_ha_msg_input(new_input); trigger_fsa(fsa_source); return; } /* * Apparently returning TRUE means "stay connected, keep doing stuff". * Returning FALSE means "we're all done, close the connection" */ gboolean crmd_ipc_msg_callback(IPC_Channel *client, gpointer user_data) { int lpc = 0; HA_Message *msg = NULL; ha_msg_input_t *new_input = NULL; crmd_client_t *curr_client = (crmd_client_t*)user_data; gboolean stay_connected = TRUE; crm_debug_2("Invoked: %s", curr_client->table_key); while(IPC_ISRCONN(client)) { if(client->ops->is_message_pending(client) == 0) { break; } msg = msgfromIPC_noauth(client); if (msg == NULL) { crm_info("%s: no message this time", curr_client->table_key); continue; } lpc++; new_input = new_ha_msg_input(msg); crm_msg_del(msg); crm_debug_2("Processing msg from %s", curr_client->table_key); crm_log_message_adv(LOG_DEBUG_2, "CRMd[inbound]", new_input->msg); if(crmd_authorize_message(new_input, curr_client)) { register_fsa_input(C_IPC_MESSAGE, I_ROUTER, new_input); } delete_ha_msg_input(new_input); msg = NULL; new_input = NULL; if(client->ch_status != IPC_CONNECT) { break; } } crm_debug_2("Processed %d messages", lpc); if (client->ch_status != IPC_CONNECT) { stay_connected = FALSE; process_client_disconnect(curr_client); } trigger_fsa(fsa_source); return stay_connected; } extern GCHSource *lrm_source; gboolean lrm_dispatch(IPC_Channel *src_not_used, gpointer user_data) { /* ?? src == lrm_channel ?? */ ll_lrm_t *lrm = (ll_lrm_t*)user_data; IPC_Channel *lrm_channel = lrm->lrm_ops->ipcchan(lrm); crm_debug_3("Invoked"); lrm->lrm_ops->rcvmsg(lrm, FALSE); if(lrm_channel->ch_status != IPC_CONNECT) { if(is_set(fsa_input_register, R_LRM_CONNECTED)) { crm_crit("LRM Connection failed"); register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL); clear_bit_inplace(fsa_input_register, R_LRM_CONNECTED); } else { crm_info("LRM Connection disconnected"); } lrm_source = NULL; return FALSE; } return TRUE; } void lrm_op_callback(lrm_op_t* op) { CRM_DEV_ASSERT(op != NULL); if(crm_assert_failed) { return; } crm_debug_3("Invoked: %s/%s (%s)", op->op_type, op->rsc_id, op_status2text(op->op_status)); /* Make sure the LRM events are received in order */ register_fsa_input_later(C_LRM_OP_CALLBACK, I_LRM_EVENT, op); } void crmd_ha_status_callback( const char *node, const char * status, void* private_data) { crm_data_t *update = NULL; crm_notice("Status update: Node %s now has status [%s]",node,status); if(safe_str_neq(status, DEADSTATUS)) { crm_debug_3("nstatus callback was not for a dead node"); return; } /* this node is taost */ update = create_node_state( node, status, XML_BOOLEAN_NO, OFFLINESTATUS, CRMD_STATE_INACTIVE, NULL, TRUE, __FUNCTION__); crm_xml_add(update, XML_CIB_ATTR_REPLACE, XML_TAG_TRANSIENT_NODEATTRS); /* this change should not be broadcast */ update_local_cib(create_cib_fragment(update, XML_CIB_TAG_STATUS)); trigger_fsa(fsa_source); free_xml(update); } void crmd_client_status_callback(const char * node, const char * client, const char * status, void * private) { const char *join = NULL; crm_data_t *update = NULL; gboolean clear_shutdown = FALSE; crm_debug_3("Invoked"); if(safe_str_neq(client, CRM_SYSTEM_CRMD)) { return; } if(safe_str_eq(status, JOINSTATUS)){ status = ONLINESTATUS; clear_shutdown = TRUE; } else if(safe_str_eq(status, LEAVESTATUS)){ status = OFFLINESTATUS; join = CRMD_STATE_INACTIVE; /* clear_shutdown = TRUE; */ } set_bit_inplace(fsa_input_register, R_PEER_DATA); g_hash_table_replace( crmd_peer_state, crm_strdup(node), crm_strdup(status)); if(is_set(fsa_input_register, R_CIB_CONNECTED) == FALSE) { return; } else if(fsa_state == S_STOPPING) { return; } crm_notice("Status update: Client %s/%s now has status [%s]", node, client, status); if(safe_str_eq(status, ONLINESTATUS)) { /* remove the cached value in case it changed */ crm_info("Uncaching UUID for %s", node); unget_uuid(node); } - if(safe_str_eq(node, fsa_our_dc) - && safe_str_eq(status, OFFLINESTATUS)) { + if(safe_str_eq(node, fsa_our_dc) && safe_str_eq(status, OFFLINESTATUS)){ /* did our DC leave us */ crm_info("Got client status callback - our DC is dead"); register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ELECTION, NULL); } else { crm_debug_3("Got client status callback"); update = create_node_state( node, NULL, NULL, status, join, NULL, clear_shutdown, __FUNCTION__); if(clear_shutdown) { crm_xml_add(update, XML_CIB_ATTR_REPLACE, XML_TAG_TRANSIENT_NODEATTRS); } /* it is safe to keep these updates on the local node * each node updates their own CIB */ fsa_cib_anon_update( XML_CIB_TAG_STATUS, update, cib_inhibit_bcast|cib_scope_local|cib_quorum_override); free_xml(update); if(AM_I_DC && safe_str_eq(status, OFFLINESTATUS)) { g_hash_table_remove(confirmed_nodes, node); g_hash_table_remove(finalized_nodes, node); g_hash_table_remove(integrated_nodes, node); g_hash_table_remove(welcomed_nodes, node); check_join_state(fsa_state, __FUNCTION__); } } trigger_fsa(fsa_source); } static void crmd_ipc_connection_destroy(gpointer user_data) { crmd_client_t *client = user_data; if(client == NULL) { crm_debug_4("No client to delete"); return; } if(client->client_source != NULL) { crm_debug_4("Deleting %s (%p) from mainloop", client->uuid, client->client_source); G_main_del_IPC_Channel(client->client_source); client->client_source = NULL; } crm_debug_3("Freeing %s client", client->uuid); crm_free(client->table_key); crm_free(client->sub_sys); crm_free(client->uuid); crm_free(client); return; } gboolean crmd_client_connect(IPC_Channel *client_channel, gpointer user_data) { crm_debug_3("Invoked"); if (client_channel == NULL) { crm_err("Channel was NULL"); } else if (client_channel->ch_status == IPC_DISCONNECT) { crm_err("Channel was disconnected"); } else { crmd_client_t *blank_client = NULL; crm_debug_3("Channel connected"); crm_malloc0(blank_client, sizeof(crmd_client_t)); if (blank_client == NULL) { return FALSE; } client_channel->ops->set_recv_qlen(client_channel, 100); client_channel->ops->set_send_qlen(client_channel, 100); blank_client->client_channel = client_channel; blank_client->sub_sys = NULL; blank_client->uuid = NULL; blank_client->table_key = NULL; blank_client->client_source = G_main_add_IPC_Channel( G_PRIORITY_LOW, client_channel, FALSE, crmd_ipc_msg_callback, blank_client, crmd_ipc_connection_destroy); } return TRUE; } gboolean ccm_dispatch(int fd, gpointer user_data) { int rc = 0; oc_ev_t *ccm_token = (oc_ev_t*)user_data; gboolean was_error = FALSE; crm_debug_3("Invoked"); rc = oc_ev_handle_event(ccm_token); if(rc != 0) { if(is_set(fsa_input_register, R_CCM_DISCONNECTED) == FALSE) { /* we signed out, so this is expected */ register_fsa_input(C_CCM_CALLBACK, I_ERROR, NULL); crm_err("CCM connection appears to have failed: rc=%d.", rc); } was_error = TRUE; } trigger_fsa(fsa_source); return !was_error; } static gboolean fsa_have_quorum = FALSE; void crmd_ccm_msg_callback( oc_ed_t event, void *cookie, size_t size, const void *data) { int instance = -1; gboolean update_cache = FALSE; struct crmd_ccm_data_s *event_data = NULL; const oc_ev_membership_t *membership = data; gboolean update_quorum = FALSE; gboolean trigger_transition = FALSE; crm_debug_3("Invoked"); if(data != NULL) { instance = membership->m_instance; } crm_info("Quorum %s after event=%s (id=%d)", ccm_have_quorum(event)?"(re)attained":"lost", ccm_event_name(event), instance); switch(event) { case OC_EV_MS_NEW_MEMBERSHIP: case OC_EV_MS_INVALID:/* fall through */ update_cache = TRUE; update_quorum = TRUE; break; case OC_EV_MS_NOT_PRIMARY: #if UNTESTED if(AM_I_DC == FALSE) { break; } /* tell the TE to pretend it had completed and stop */ /* side effect: we'll end up in S_IDLE */ register_fsa_action(A_TE_HALT, TRUE); #endif break; case OC_EV_MS_PRIMARY_RESTORED: fsa_membership_copy->id = instance; if(AM_I_DC && need_transition(fsa_state)) { trigger_transition = TRUE; } break; case OC_EV_MS_EVICTED: update_quorum = TRUE; register_fsa_input(C_FSA_INTERNAL, I_STOP, NULL); crm_err("Shutting down after CCM event: %s", ccm_event_name(event)); break; default: crm_err("Unknown CCM event: %d", event); } if(update_quorum && ccm_have_quorum(event) == FALSE) { /* did we just loose quorum? */ if(fsa_have_quorum && need_transition(fsa_state)) { crm_info("Quorum lost: triggering transition (%s)", ccm_event_name(event)); trigger_transition = TRUE; } fsa_have_quorum = FALSE; } else if(update_quorum) { crm_debug_2("Updating quorum after event %s", ccm_event_name(event)); fsa_have_quorum = TRUE; } if(trigger_transition) { crm_debug_2("Scheduling transition after event %s", ccm_event_name(event)); /* make sure that when we query the CIB that it has * the changes that triggered the transition */ switch(event) { case OC_EV_MS_NEW_MEMBERSHIP: case OC_EV_MS_INVALID: case OC_EV_MS_PRIMARY_RESTORED: fsa_membership_copy->id = instance; break; default: break; } if(update_cache == FALSE) { /* a stand-alone transition */ register_fsa_action(A_TE_CANCEL); } } if(update_cache) { crm_debug_2("Updating cache after event %s", ccm_event_name(event)); crm_malloc0(event_data, sizeof(struct crmd_ccm_data_s)); if(event_data == NULL) { return; } event_data->event = event; if(data != NULL) { event_data->oc = copy_ccm_oc_data(data); } register_fsa_input_adv( C_CCM_CALLBACK, I_CCM_EVENT, event_data, trigger_transition?A_TE_CANCEL:A_NOTHING, FALSE, __FUNCTION__); if (event_data->oc) { crm_free(event_data->oc); event_data->oc = NULL; } crm_free(event_data); } oc_ev_callback_done(cookie); return; } void crmd_cib_connection_destroy(gpointer user_data) { crm_debug_3("Invoked"); trigger_fsa(fsa_source); if(is_set(fsa_input_register, R_CIB_CONNECTED) == FALSE) { crm_info("Connection to the CIB terminated..."); return; } /* eventually this will trigger a reconnect, not a shutdown */ crm_err("Connection to the CIB terminated..."); register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL); clear_bit_inplace(fsa_input_register, R_CIB_CONNECTED); return; } longclock_t fsa_start = 0; longclock_t fsa_stop = 0; longclock_t fsa_diff = 0; gboolean crm_fsa_trigger(gpointer user_data) { unsigned int fsa_diff_ms = 0; if(fsa_diff_max_ms > 0) { fsa_start = time_longclock(); } crm_debug_2("Invoked (queue len: %d)", g_list_length(fsa_message_queue)); s_crmd_fsa(C_FSA_INTERNAL); crm_debug_2("Exited (queue len: %d)", g_list_length(fsa_message_queue)); if(fsa_diff_max_ms > 0) { fsa_stop = time_longclock(); fsa_diff = sub_longclock(fsa_stop, fsa_start); fsa_diff_ms = longclockto_ms(fsa_diff); if(fsa_diff_ms > fsa_diff_max_ms) { crm_err("FSA took %dms to complete", fsa_diff_ms); } else if(fsa_diff_ms > fsa_diff_warn_ms) { crm_warn("FSA took %dms to complete", fsa_diff_ms); } } return TRUE; } diff --git a/crm/crmd/ccm.c b/crm/crmd/ccm.c index 5353e3367b..10fd7012fa 100644 --- a/crm/crmd/ccm.c +++ b/crm/crmd/ccm.c @@ -1,705 +1,697 @@ -/* $Id: ccm.c,v 1.103 2006/04/18 10:59:46 andrew Exp $ */ +/* $Id: ccm.c,v 1.104 2006/04/20 15:43:23 andrew Exp $ */ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* put these first so that uuid_t is defined without conflicts */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include void oc_ev_special(const oc_ev_t *, oc_ev_class_t , int ); int register_with_ccm(ll_cluster_t *hb_cluster); void msg_ccm_join(const HA_Message *msg, void *foo); void crmd_ccm_msg_callback(oc_ed_t event, void *cookie, size_t size, const void *data); gboolean ghash_node_clfree(gpointer key, gpointer value, gpointer user_data); void ghash_update_cib_node(gpointer key, gpointer value, gpointer user_data); #define CCM_EVENT_DETAIL 0 #define CCM_EVENT_DETAIL_PARTIAL 0 oc_ev_t *fsa_ev_token; int num_ccm_register_fails = 0; int max_ccm_register_fails = 30; /* A_CCM_CONNECT */ enum crmd_fsa_input do_ccm_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { int ret; int fsa_ev_fd; gboolean did_fail = FALSE; if(action & A_CCM_DISCONNECT){ set_bit_inplace(fsa_input_register, R_CCM_DISCONNECTED); oc_ev_unregister(fsa_ev_token); } if(action & A_CCM_CONNECT) { crm_debug_3("Registering with CCM"); clear_bit_inplace(fsa_input_register, R_CCM_DISCONNECTED); ret = oc_ev_register(&fsa_ev_token); if (ret != 0) { crm_warn("CCM registration failed"); did_fail = TRUE; } if(did_fail == FALSE) { crm_debug_3("Setting up CCM callbacks"); ret = oc_ev_set_callback(fsa_ev_token, OC_EV_MEMB_CLASS, crmd_ccm_msg_callback, NULL); if (ret != 0) { crm_warn("CCM callback not set"); did_fail = TRUE; } } if(did_fail == FALSE) { oc_ev_special(fsa_ev_token, OC_EV_MEMB_CLASS, 0/*don't care*/); crm_debug_3("Activating CCM token"); ret = oc_ev_activate(fsa_ev_token, &fsa_ev_fd); if (ret != 0){ crm_warn("CCM Activation failed"); did_fail = TRUE; } } if(did_fail) { num_ccm_register_fails++; oc_ev_unregister(fsa_ev_token); if(num_ccm_register_fails < max_ccm_register_fails) { crm_warn("CCM Connection failed" " %d times (%d max)", num_ccm_register_fails, max_ccm_register_fails); crm_timer_start(wait_timer); crmd_fsa_stall(NULL); return I_NULL; } else { crm_err("CCM Activation failed %d (max) times", num_ccm_register_fails); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); return I_NULL; } } crm_info("CCM connection established..." " waiting for first callback"); G_main_add_fd(G_PRIORITY_HIGH, fsa_ev_fd, FALSE, ccm_dispatch, fsa_ev_token, default_ipc_connection_destroy); } if(action & ~(A_CCM_CONNECT|A_CCM_DISCONNECT)) { crm_err("Unexpected action %s in %s", fsa_action2string(action), __FUNCTION__); } return I_NULL; } extern GHashTable *voted; /* A_CCM_EVENT */ enum crmd_fsa_input do_ccm_event(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { enum crmd_fsa_input return_input = I_NULL; oc_ed_t event; const oc_ev_membership_t *oc = NULL; struct crmd_ccm_data_s *ccm_data = fsa_typed_data(fsa_dt_ccm); if(ccm_data == NULL) { crm_err("No data provided to FSA function"); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); return I_NULL; } else if(msg_data->fsa_cause != C_CCM_CALLBACK) { crm_err("FSA function called in response to incorect input"); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); return I_NULL; } event = ccm_data->event; oc = ccm_data->oc; ccm_event_detail(oc, event); if (OC_EV_MS_EVICTED == event) { /* todo: drop back to S_PENDING instead */ /* get out... NOW! * * go via the error recovery process so that HA will * restart us if required */ register_fsa_error(cause, I_ERROR, msg_data->data); return I_NULL; + } + + return return_input; +} + +static void +check_dead_member(const char *uname, GHashTable *members) +{ + CRM_CHECK(uname != NULL, return); + if(members != NULL && g_hash_table_lookup(members, uname) != NULL) { + crm_err("%s didnt really leave the membership!", uname); + return; + } + + if(confirmed_nodes != NULL) { + g_hash_table_remove(confirmed_nodes, uname); + } + if(finalized_nodes != NULL) { + g_hash_table_remove(finalized_nodes, uname); + } + if(integrated_nodes != NULL) { + g_hash_table_remove(integrated_nodes, uname); + } + + if(safe_str_eq(fsa_our_uname, uname)) { + crm_err("We're not part of the cluster anymore"); } - if(AM_I_DC == FALSE && oc->m_n_out != 0) { - /* Possibly move this logic to ghash_update_cib_node() */ - unsigned lpc = 0; - int offset = oc->m_out_idx; - for(lpc=0; lpc < oc->m_n_out; lpc++) { - const char *uname = oc->m_array[offset+lpc].node_uname; - if(uname == NULL) { - crm_err("CCM node had no name"); - continue; - - } else { - /* remove any no-votes they had cast */ - if(voted != NULL) { - g_hash_table_remove(voted, uname); - } - if(safe_str_eq(uname, fsa_our_dc)) { - crm_warn("Our DC node (%s) left the cluster", - uname); - register_fsa_input(cause, I_ELECTION, NULL); - } - } + if(AM_I_DC) { + /* remove any no-votes they had cast */ + if(voted != NULL) { + g_hash_table_remove(voted, uname); } + + } else if(safe_str_eq(uname, fsa_our_dc)) { + crm_warn("Our DC node (%s) left the cluster", uname); + register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL); } - - return return_input; } /* A_CCM_UPDATE_CACHE */ /* * Take the opportunity to update the node status in the CIB as well */ - enum crmd_fsa_input do_ccm_update_cache(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { enum crmd_fsa_input next_input = I_NULL; unsigned int lpc; int offset; GHashTable *members = NULL; oc_ed_t event; const oc_ev_membership_t *oc = NULL; oc_node_list_t *tmp = NULL, *membership_copy = NULL; struct crmd_ccm_data_s *ccm_data = fsa_typed_data(fsa_dt_ccm); HA_Message *no_op = create_request( CRM_OP_NOOP, NULL, NULL, CRM_SYSTEM_CRMD, AM_I_DC?CRM_SYSTEM_DC:CRM_SYSTEM_CRMD, NULL); if(ccm_data == NULL) { crm_err("No data provided to FSA function"); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); send_msg_via_ha(fsa_cluster_conn, no_op); return I_NULL; } event = ccm_data->event; oc = ccm_data->oc; crm_debug_2("Updating CCM cache after a \"%s\" event.", ccm_event_name(event)); crm_debug_2("instance=%d, nodes=%d, new=%d, lost=%d n_idx=%d, " "new_idx=%d, old_idx=%d", oc->m_instance, oc->m_n_member, oc->m_n_in, oc->m_n_out, oc->m_memb_idx, oc->m_in_idx, oc->m_out_idx); #define ALAN_DEBUG 1 #ifdef ALAN_DEBUG { /* * Size (Size + 2) / 2 * * 3 (3+2)/2 = 5 / 2 = 2 * 4 (4+2)/2 = 6 / 2 = 3 * 5 (5+2)/2 = 7 / 2 = 3 * 6 (6+2)/2 = 8 / 2 = 4 * 7 (7+2)/2 = 9 / 2 = 4 */ unsigned int clsize = (oc->m_out_idx - oc->m_n_member); unsigned int plsize = (clsize + 2)/2; gboolean plurality = (oc->m_n_member >= plsize); gboolean Q = ccm_have_quorum(event); if(clsize == 2) { if (!Q) { crm_err("2 nodes w/o quorum"); } } else if(Q && !plurality) { crm_err("Quorum w/o plurality (%d/%d nodes)", oc->m_n_member, clsize); } else if(plurality && !Q) { crm_err("Plurality w/o Quorum (%d/%d nodes)", oc->m_n_member, clsize); } else { crm_debug_2("Quorum(%s) and plurality (%d/%d) agree.", Q?"true":"false", oc->m_n_member, clsize); } } #endif crm_malloc0(membership_copy, sizeof(oc_node_list_t)); if(membership_copy == NULL) { crm_crit("Couldnt create membership copy - out of memory"); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); return I_NULL; } membership_copy->id = oc->m_instance; membership_copy->last_event = event; crm_debug_3("Copying members"); /*--*-- All Member Nodes --*--*/ offset = oc->m_memb_idx; membership_copy->members_size = oc->m_n_member; if(membership_copy->members_size > 0) { membership_copy->members = g_hash_table_new(g_str_hash, g_str_equal); members = membership_copy->members; for(lpc=0; lpc < membership_copy->members_size; lpc++) { oc_node_t *member = NULL; crm_debug_3("Copying member %d", lpc); crm_malloc0(member, sizeof(oc_node_t)); if(member == NULL) { continue; } member->node_id = oc->m_array[offset+lpc].node_id; member->node_born_on = oc->m_array[offset+lpc].node_born_on; member->node_uname = NULL; if(oc->m_array[offset+lpc].node_uname != NULL) { member->node_uname = crm_strdup(oc->m_array[offset+lpc].node_uname); } else { crm_err("Node %d had a NULL uname", member->node_id); } g_hash_table_insert( members, member->node_uname, member); } } else { membership_copy->members = NULL; } crm_debug_3("Copying new members"); /*--*-- New Member Nodes --*--*/ offset = oc->m_in_idx; membership_copy->new_members_size = oc->m_n_in; if(membership_copy->new_members_size > 0) { membership_copy->new_members = g_hash_table_new(g_str_hash, g_str_equal); members = membership_copy->new_members; for(lpc=0; lpc < membership_copy->new_members_size; lpc++) { oc_node_t *member = NULL; crm_malloc0(member, sizeof(oc_node_t)); if(member == NULL) { continue; } member->node_uname = NULL; member->node_id = oc->m_array[offset+lpc].node_id; member->node_born_on = oc->m_array[offset+lpc].node_born_on; if(oc->m_array[offset+lpc].node_uname != NULL) { member->node_uname = crm_strdup(oc->m_array[offset+lpc].node_uname); } else { crm_err("Node %d had a NULL uname", member->node_id); } g_hash_table_insert( members, member->node_uname, member); g_hash_table_insert(members, member->node_uname, member); } } else { membership_copy->new_members = NULL; } crm_debug_3("Copying dead members"); /*--*-- Recently Dead Member Nodes --*--*/ offset = oc->m_out_idx; membership_copy->dead_members_size = oc->m_n_out; if(membership_copy->dead_members_size > 0) { membership_copy->dead_members = g_hash_table_new(g_str_hash, g_str_equal); members = membership_copy->dead_members; for(lpc=0; lpc < membership_copy->dead_members_size; lpc++) { oc_node_t *member = NULL; crm_malloc0(member, sizeof(oc_node_t)); if(member == NULL) { continue; } - member->node_id = - oc->m_array[offset+lpc].node_id; + member->node_id = oc->m_array[offset+lpc].node_id; member->node_born_on = oc->m_array[offset+lpc].node_born_on; member->node_uname = NULL; CRM_DEV_ASSERT(oc->m_array[offset+lpc].node_uname != NULL); if(oc->m_array[offset+lpc].node_uname == NULL) { continue; } member->node_uname = crm_strdup(oc->m_array[offset+lpc].node_uname); - g_hash_table_insert( - members, member->node_uname, member); - g_hash_table_insert(members, member->node_uname, member); - if(confirmed_nodes != NULL) { - g_hash_table_remove( - confirmed_nodes, member->node_uname); - } - if(finalized_nodes != NULL) { - g_hash_table_remove( - finalized_nodes, member->node_uname); - } - if(integrated_nodes != NULL) { - g_hash_table_remove( - integrated_nodes, member->node_uname); - } + check_dead_member( + member->node_uname, membership_copy->members); + } } else { membership_copy->dead_members = NULL; } tmp = fsa_membership_copy; fsa_membership_copy = membership_copy; crm_debug_2("Updated membership cache with %d (%d new, %d lost) members", g_hash_table_size(fsa_membership_copy->members), g_hash_table_size(fsa_membership_copy->new_members), g_hash_table_size(fsa_membership_copy->dead_members)); /* Free the old copy */ if(tmp != NULL) { if(tmp->members != NULL) g_hash_table_foreach_remove( tmp->members, ghash_node_clfree, NULL); if(tmp->new_members != NULL) g_hash_table_foreach_remove( tmp->new_members, ghash_node_clfree, NULL); if(tmp->dead_members != NULL) g_hash_table_foreach_remove( tmp->dead_members, ghash_node_clfree, NULL); crm_free(tmp); } crm_debug_3("Free'd old copies"); set_bit_inplace(fsa_input_register, R_CCM_DATA); if(cur_state != S_STOPPING) { crm_debug_3("Updating the CIB from CCM cache"); do_update_cib_nodes(FALSE, __FUNCTION__); } /* Membership changed, remind everyone we're here. * This will aid detection of duplicate DCs */ send_msg_via_ha(fsa_cluster_conn, no_op); return next_input; } void ccm_event_detail(const oc_ev_membership_t *oc, oc_ed_t event) { int lpc; gboolean member = FALSE; member = FALSE; crm_debug_2("-----------------------"); crm_info("%s: trans=%d, nodes=%d, new=%d, lost=%d n_idx=%d, " "new_idx=%d, old_idx=%d", ccm_event_name(event), oc->m_instance, oc->m_n_member, oc->m_n_in, oc->m_n_out, oc->m_memb_idx, oc->m_in_idx, oc->m_out_idx); #if !CCM_EVENT_DETAIL_PARTIAL for(lpc=0; lpc < oc->m_n_member; lpc++) { crm_info("\tCURRENT: %s [nodeid=%d, born=%d]", oc->m_array[oc->m_memb_idx+lpc].node_uname, oc->m_array[oc->m_memb_idx+lpc].node_id, oc->m_array[oc->m_memb_idx+lpc].node_born_on); if(safe_str_eq(fsa_our_uname, oc->m_array[oc->m_memb_idx+lpc].node_uname)) { member = TRUE; } } if (member == FALSE) { crm_warn("MY NODE IS NOT IN CCM THE MEMBERSHIP LIST"); } #endif for(lpc=0; lpc<(int)oc->m_n_in; lpc++) { crm_info("\tNEW: %s [nodeid=%d, born=%d]", oc->m_array[oc->m_in_idx+lpc].node_uname, oc->m_array[oc->m_in_idx+lpc].node_id, oc->m_array[oc->m_in_idx+lpc].node_born_on); } for(lpc=0; lpc<(int)oc->m_n_out; lpc++) { crm_info("\tLOST: %s [nodeid=%d, born=%d]", oc->m_array[oc->m_out_idx+lpc].node_uname, oc->m_array[oc->m_out_idx+lpc].node_id, oc->m_array[oc->m_out_idx+lpc].node_born_on); - if(fsa_our_uname != NULL - && 0 == strcmp(fsa_our_uname, - oc->m_array[oc->m_out_idx+lpc].node_uname)) { - crm_err("We're not part of the cluster anymore"); - } } crm_debug_2("-----------------------"); } int register_with_ccm(ll_cluster_t *hb_cluster) { return 0; } void msg_ccm_join(const HA_Message *msg, void *foo) { crm_debug_2("###### Received ccm_join message..."); if (msg != NULL) { crm_debug_2("[type=%s]", ha_msg_value(msg, F_TYPE)); crm_debug_2("[orig=%s]", ha_msg_value(msg, F_ORIG)); crm_debug_2("[to=%s]", ha_msg_value(msg, F_TO)); crm_debug_2("[status=%s]", ha_msg_value(msg, F_STATUS)); crm_debug_2("[info=%s]", ha_msg_value(msg, F_COMMENT)); crm_debug_2("[rsc_hold=%s]", ha_msg_value(msg, F_RESOURCES)); crm_debug_2("[stable=%s]", ha_msg_value(msg, F_ISSTABLE)); crm_debug_2("[rtype=%s]", ha_msg_value(msg, F_RTYPE)); crm_debug_2("[ts=%s]", ha_msg_value(msg, F_TIME)); crm_debug_2("[seq=%s]", ha_msg_value(msg, F_SEQ)); crm_debug_2("[generation=%s]", ha_msg_value(msg, F_HBGENERATION)); /* crm_debug_2("[=%s]", ha_msg_value(msg, F_)); */ } return; } struct update_data_s { const char *state; const char *caller; crm_data_t *updates; gboolean overwrite_join; }; static void ccm_node_update_complete(const HA_Message *msg, int call_id, int rc, crm_data_t *output, void *user_data) { fsa_data_t *msg_data = NULL; if(rc == cib_ok) { crm_debug("Node update %d complete", call_id); } else { crm_err("Node update %d failed", call_id); crm_log_message(LOG_DEBUG, msg); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } void do_update_cib_nodes(gboolean overwrite, const char *caller) { int call_id = 0; int call_options = cib_scope_local|cib_quorum_override; struct update_data_s update_data; crm_data_t *fragment = NULL; if(fsa_membership_copy == NULL) { /* We got a replace notification before being connected to * the CCM. * So there is no need to update the local CIB with our values * - since we have none. */ return; } fragment = create_xml_node(NULL, XML_CIB_TAG_STATUS); update_data.caller = caller; update_data.updates = fragment; - update_data.state = XML_BOOLEAN_YES; update_data.overwrite_join = overwrite; if(overwrite == FALSE) { call_options = call_options|cib_inhibit_bcast; crm_debug_2("Inhibiting bcast for membership updates"); } - /* live nodes */ - if(fsa_membership_copy->members != NULL) { - g_hash_table_foreach(fsa_membership_copy->members, - ghash_update_cib_node, &update_data); - } - /* dead nodes */ update_data.state = XML_BOOLEAN_NO; if(fsa_membership_copy->dead_members != NULL) { g_hash_table_foreach(fsa_membership_copy->dead_members, ghash_update_cib_node, &update_data); } + /* live nodes */ + update_data.state = XML_BOOLEAN_YES; + if(fsa_membership_copy->members != NULL) { + g_hash_table_foreach(fsa_membership_copy->members, + ghash_update_cib_node, &update_data); + } + fsa_cib_update(XML_CIB_TAG_STATUS, fragment, call_options, call_id); add_cib_op_callback(call_id, FALSE, NULL, ccm_node_update_complete); free_xml(fragment); } void ghash_update_cib_node(gpointer key, gpointer value, gpointer user_data) { crm_data_t *tmp1 = NULL; const char *join = NULL; const char *peer_online = NULL; const char *node_uname = (const char*)key; struct update_data_s* data = (struct update_data_s*)user_data; crm_debug_2("Updating %s: %s (overwrite=%s)", node_uname, data->state, data->overwrite_join?"true":"false"); peer_online = g_hash_table_lookup(crmd_peer_state, node_uname); if(data->overwrite_join) { if(safe_str_neq(peer_online, ONLINESTATUS)) { join = CRMD_STATE_INACTIVE; } else { const char *peer_member = g_hash_table_lookup( confirmed_nodes, node_uname); if(peer_member != NULL) { join = CRMD_JOINSTATE_MEMBER; } else { join = CRMD_JOINSTATE_PENDING; } } } tmp1 = create_node_state(node_uname, NULL, data->state, peer_online, join, NULL, FALSE, data->caller); add_node_copy(data->updates, tmp1); free_xml(tmp1); } gboolean ghash_node_clfree(gpointer key, gpointer value, gpointer user_data) { /* value->node_uname is free'd as "key" */ if(key != NULL) { crm_free(key); } if(value != NULL) { crm_free(value); } return TRUE; }