diff --git a/cib/callbacks.c b/cib/callbacks.c index d0f05aa615..c38136be98 100644 --- a/cib/callbacks.c +++ b/cib/callbacks.c @@ -1,1901 +1,1901 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include extern GMainLoop* mainloop; extern gboolean cib_shutdown_flag; extern gboolean stand_alone; extern const char* cib_root; #if SUPPORT_HEARTBEAT extern ll_cluster_t *hb_conn; #endif extern void cib_ha_connection_destroy(gpointer user_data); extern enum cib_errors cib_update_counter( crm_data_t *xml_obj, const char *field, gboolean reset); extern void GHFunc_count_peers( gpointer key, gpointer value, gpointer user_data); extern enum cib_errors revision_check( crm_data_t *cib_update, crm_data_t *cib_copy, int flags); void initiate_exit(void); void terminate_cib(const char *caller); gint cib_GCompareFunc(gconstpointer a, gconstpointer b); gboolean cib_msg_timeout(gpointer data); void cib_GHFunc(gpointer key, gpointer value, gpointer user_data); gboolean can_write(int flags); HA_Message *cib_msg_copy(HA_Message *msg, gboolean with_data); void send_cib_replace(const HA_Message *sync_request, const char *host); void cib_process_request( HA_Message *request, gboolean privileged, gboolean force_synchronous, gboolean from_peer, cib_client_t *cib_client); HA_Message *cib_construct_reply(HA_Message *request, HA_Message *output, int rc); gboolean syncd_once = FALSE; extern GHashTable *client_list; int next_client_id = 0; gboolean cib_is_master = FALSE; extern const char *cib_our_uname; extern unsigned long cib_num_ops, cib_num_local, cib_num_updates, cib_num_fail; extern unsigned long cib_bad_connects, cib_num_timeouts; extern longclock_t cib_call_time; extern enum cib_errors cib_status; static HA_Message * cib_prepare_common(HA_Message *root, const char *section) { HA_Message *data = NULL; /* extract the CIB from the fragment */ if(root == NULL) { return NULL; } else if(safe_str_eq(crm_element_name(root), XML_TAG_FRAGMENT) || safe_str_eq(crm_element_name(root), F_CIB_CALLDATA)) { data = find_xml_node(root, XML_TAG_CIB, TRUE); if(data != NULL) { crm_debug_3("Extracted CIB from %s", TYPE(root)); } else { crm_log_xml_debug_4(root, "No CIB"); } } else { data = root; } /* grab the section specified for the command */ if(section != NULL && data != NULL && safe_str_eq(crm_element_name(data), XML_TAG_CIB)){ int rc = revision_check(data, the_cib, 0/* call_options */); if(rc == cib_ok) { data = get_object_root(section, data); if(data != NULL) { crm_debug_3("Extracted %s from CIB", section); } else { crm_log_xml_debug_4(root, "No Section"); } } else { crm_debug_2("Revision check failed"); } } crm_log_xml_debug_4(root, "cib:input"); return copy_xml(data); } static gboolean verify_section(const char *section) { if(section == NULL) { return TRUE; } else if(safe_str_eq(section, XML_TAG_CIB)) { return TRUE; } else if(safe_str_eq(section, XML_CIB_TAG_STATUS)) { return TRUE; } else if(safe_str_eq(section, XML_CIB_TAG_CRMCONFIG)) { return TRUE; } else if(safe_str_eq(section, XML_CIB_TAG_NODES)) { return TRUE; } else if(safe_str_eq(section, XML_CIB_TAG_RESOURCES)) { return TRUE; } else if(safe_str_eq(section, XML_CIB_TAG_CONSTRAINTS)) { return TRUE; } return FALSE; } static enum cib_errors cib_prepare_none(HA_Message *request, HA_Message **data, const char **section) { *data = NULL; *section = cl_get_string(request, F_CIB_SECTION); if(verify_section(*section) == FALSE) { return cib_bad_section; } return cib_ok; } static enum cib_errors cib_prepare_data(HA_Message *request, HA_Message **data, const char **section) { HA_Message *input_fragment = get_message_xml(request, F_CIB_CALLDATA); *section = cl_get_string(request, F_CIB_SECTION); *data = cib_prepare_common(input_fragment, *section); free_xml(input_fragment); if(verify_section(*section) == FALSE) { return cib_bad_section; } return cib_ok; } static enum cib_errors cib_prepare_sync(HA_Message *request, HA_Message **data, const char **section) { *section = cl_get_string(request, F_CIB_SECTION); *data = request; if(verify_section(*section) == FALSE) { return cib_bad_section; } return cib_ok; } static enum cib_errors cib_prepare_diff(HA_Message *request, HA_Message **data, const char **section) { HA_Message *input_fragment = NULL; const char *update = cl_get_string(request, F_CIB_GLOBAL_UPDATE); *data = NULL; *section = NULL; if(crm_is_true(update)) { input_fragment = get_message_xml(request,F_CIB_UPDATE_DIFF); } else { input_fragment = get_message_xml(request, F_CIB_CALLDATA); } CRM_CHECK(input_fragment != NULL,crm_log_message(LOG_WARNING, request)); *data = cib_prepare_common(input_fragment, NULL); free_xml(input_fragment); return cib_ok; } static enum cib_errors cib_cleanup_query(const char *op, HA_Message **data, HA_Message **output) { CRM_DEV_ASSERT(*data == NULL); return cib_ok; } static enum cib_errors cib_cleanup_data(const char *op, HA_Message **data, HA_Message **output) { free_xml(*output); free_xml(*data); return cib_ok; } static enum cib_errors cib_cleanup_output(const char *op, HA_Message **data, HA_Message **output) { free_xml(*output); return cib_ok; } static enum cib_errors cib_cleanup_none(const char *op, HA_Message **data, HA_Message **output) { CRM_DEV_ASSERT(*data == NULL); CRM_DEV_ASSERT(*output == NULL); return cib_ok; } static enum cib_errors cib_cleanup_sync(const char *op, HA_Message **data, HA_Message **output) { /* data is non-NULL but doesnt need to be free'd */ CRM_DEV_ASSERT(*output == NULL); return cib_ok; } /* typedef struct cib_operation_s { const char* operation; gboolean modifies_cib; gboolean needs_privileges; gboolean needs_quorum; enum cib_errors (*prepare)(HA_Message *, crm_data_t**, const char **); enum cib_errors (*cleanup)(crm_data_t**, crm_data_t**); enum cib_errors (*fn)( const char *, int, const char *, crm_data_t*, crm_data_t*, crm_data_t**, crm_data_t**); } cib_operation_t; */ /* technically bump does modify the cib... * but we want to split the "bump" from the "sync" */ cib_operation_t cib_server_ops[] = { {NULL, FALSE, FALSE, FALSE, cib_prepare_none, cib_cleanup_none, cib_process_default}, {CIB_OP_QUERY, FALSE, FALSE, FALSE, cib_prepare_none, cib_cleanup_query, cib_process_query}, {CIB_OP_MODIFY, TRUE, TRUE, TRUE, cib_prepare_data, cib_cleanup_data, cib_process_modify}, {CIB_OP_UPDATE, TRUE, TRUE, TRUE, cib_prepare_data, cib_cleanup_data, cib_process_change}, {CIB_OP_APPLY_DIFF,TRUE, TRUE, TRUE, cib_prepare_diff, cib_cleanup_data, cib_process_diff}, {CIB_OP_SLAVE, FALSE, TRUE, FALSE, cib_prepare_none, cib_cleanup_none, cib_process_readwrite}, {CIB_OP_SLAVEALL, FALSE, TRUE, FALSE, cib_prepare_none, cib_cleanup_none, cib_process_readwrite}, {CIB_OP_SYNC_ONE, FALSE, TRUE, FALSE, cib_prepare_sync, cib_cleanup_sync, cib_process_sync_one}, {CIB_OP_MASTER, FALSE, TRUE, FALSE, cib_prepare_none, cib_cleanup_none, cib_process_readwrite}, {CIB_OP_ISMASTER, FALSE, TRUE, FALSE, cib_prepare_none, cib_cleanup_none, cib_process_readwrite}, {CIB_OP_BUMP, TRUE, TRUE, TRUE, cib_prepare_none, cib_cleanup_output, cib_process_bump}, {CIB_OP_REPLACE, TRUE, TRUE, TRUE, cib_prepare_data, cib_cleanup_data, cib_process_replace}, {CIB_OP_CREATE, TRUE, TRUE, TRUE, cib_prepare_data, cib_cleanup_data, cib_process_change}, {CIB_OP_DELETE, TRUE, TRUE, TRUE, cib_prepare_data, cib_cleanup_data, cib_process_delete}, {CIB_OP_DELETE_ALT,TRUE, TRUE, TRUE, cib_prepare_data, cib_cleanup_data, cib_process_change}, {CIB_OP_SYNC, FALSE, TRUE, FALSE, cib_prepare_sync, cib_cleanup_sync, cib_process_sync}, {CRM_OP_QUIT, FALSE, TRUE, FALSE, cib_prepare_none, cib_cleanup_none, cib_process_quit}, {CRM_OP_PING, FALSE, FALSE, FALSE, cib_prepare_none, cib_cleanup_output, cib_process_ping}, {CIB_OP_ERASE, TRUE, TRUE, TRUE, cib_prepare_none, cib_cleanup_output, cib_process_erase}, {CRM_OP_NOOP, FALSE, FALSE, FALSE, cib_prepare_none, cib_cleanup_none, cib_process_default}, {"cib_shutdown_req",FALSE, TRUE, FALSE, cib_prepare_sync, cib_cleanup_sync, cib_process_shutdown_req}, }; int send_via_callback_channel(HA_Message *msg, const char *token); enum cib_errors cib_process_command( HA_Message *request, HA_Message **reply, crm_data_t **cib_diff, gboolean privileged); gboolean cib_common_callback(IPC_Channel *channel, cib_client_t *cib_client, gboolean force_synchronous, gboolean privileged); enum cib_errors cib_get_operation_id(const HA_Message * msg, int *operation); gboolean cib_process_disconnect(IPC_Channel *channel, cib_client_t *cib_client); int num_clients = 0; static void cib_ipc_connection_destroy(gpointer user_data) { cib_client_t *cib_client = user_data; /* cib_process_disconnect */ if(cib_client == NULL) { crm_debug_4("Destroying %p", user_data); return; } if(cib_client->source != NULL) { crm_debug_4("Deleting %s (%p) from mainloop", cib_client->name, cib_client->source); G_main_del_IPC_Channel(cib_client->source); cib_client->source = NULL; } crm_debug_3("Destroying %s (%p)", cib_client->name, user_data); num_clients--; crm_debug_2("Num unfree'd clients: %d", num_clients); crm_free(cib_client->name); crm_free(cib_client->callback_id); crm_free(cib_client->id); crm_free(cib_client); crm_debug_4("Freed the cib client"); return; } static cib_client_t * cib_client_connect_common( IPC_Channel *channel, const char *channel_name, gboolean (*callback)(IPC_Channel *channel, gpointer user_data)) { gboolean can_connect = TRUE; cib_client_t *new_client = NULL; crm_debug_3("Connecting channel"); if (channel == NULL) { crm_err("Channel was NULL"); can_connect = FALSE; cib_bad_connects++; } else if (channel->ch_status != IPC_CONNECT) { crm_err("Channel was disconnected"); can_connect = FALSE; cib_bad_connects++; } else if(channel_name == NULL) { crm_err("user_data must contain channel name"); can_connect = FALSE; cib_bad_connects++; } else if(cib_shutdown_flag) { crm_info("Ignoring new client [%d] during shutdown", channel->farside_pid); return NULL; } else { crm_malloc0(new_client, sizeof(cib_client_t)); num_clients++; new_client->channel = channel; new_client->channel_name = channel_name; crm_debug_3("Created channel %p for channel %s", new_client, new_client->channel_name); channel->ops->set_recv_qlen(channel, 1024); channel->ops->set_send_qlen(channel, 1024); if(callback != NULL) { new_client->source = G_main_add_IPC_Channel( G_PRIORITY_DEFAULT, channel, FALSE, callback, new_client, cib_ipc_connection_destroy); } crm_debug_3("Channel %s connected for client %s", new_client->channel_name, new_client->id); } return new_client; } gboolean cib_client_connect_rw_synch(IPC_Channel *channel, gpointer user_data) { cib_client_t *new_client = NULL; new_client = cib_client_connect_common( channel, cib_channel_rw_synchronous, cib_rw_synchronous_callback); if(new_client == NULL) { return FALSE; } return TRUE; } gboolean cib_client_connect_ro_synch(IPC_Channel *channel, gpointer user_data) { cib_client_t *new_client = NULL; new_client = cib_client_connect_common( channel, cib_channel_ro_synchronous, cib_ro_synchronous_callback); if(new_client == NULL) { return FALSE; } return TRUE; } gboolean cib_client_connect_rw_ro(IPC_Channel *channel, gpointer user_data) { cl_uuid_t client_id; HA_Message *reg_msg = NULL; cib_client_t *new_client = NULL; char uuid_str[UU_UNPARSE_SIZEOF]; gboolean (*callback)(IPC_Channel *channel, gpointer user_data); callback = cib_ro_callback; if(safe_str_eq(user_data, cib_channel_rw)) { callback = cib_rw_callback; } new_client = cib_client_connect_common( channel, callback==cib_ro_callback?cib_channel_ro:cib_channel_rw, callback); if(new_client == NULL) { return FALSE; } cl_uuid_generate(&client_id); cl_uuid_unparse(&client_id, uuid_str); CRM_CHECK(new_client->id == NULL, crm_free(new_client->id)); new_client->id = crm_strdup(uuid_str); cl_uuid_generate(&client_id); cl_uuid_unparse(&client_id, uuid_str); CRM_CHECK(new_client->callback_id == NULL, crm_free(new_client->callback_id)); new_client->callback_id = crm_strdup(uuid_str); /* make sure we can find ourselves later for sync calls * redirected to the master instance */ g_hash_table_insert(client_list, new_client->id, new_client); reg_msg = ha_msg_new(3); ha_msg_add(reg_msg, F_CIB_OPERATION, CRM_OP_REGISTER); ha_msg_add(reg_msg, F_CIB_CLIENTID, new_client->id); ha_msg_add( reg_msg, F_CIB_CALLBACK_TOKEN, new_client->callback_id); send_ipc_message(channel, reg_msg); crm_msg_del(reg_msg); return TRUE; } gboolean cib_client_connect_null(IPC_Channel *channel, gpointer user_data) { cib_client_t *new_client = NULL; new_client = cib_client_connect_common( channel, cib_channel_callback, cib_null_callback); if(new_client == NULL) { return FALSE; } return TRUE; } gboolean cib_rw_callback(IPC_Channel *channel, gpointer user_data) { gboolean result = FALSE; result = cib_common_callback(channel, user_data, FALSE, TRUE); return result; } gboolean cib_ro_synchronous_callback(IPC_Channel *channel, gpointer user_data) { gboolean result = FALSE; result = cib_common_callback(channel, user_data, TRUE, FALSE); return result; } gboolean cib_rw_synchronous_callback(IPC_Channel *channel, gpointer user_data) { gboolean result = FALSE; result = cib_common_callback(channel, user_data, TRUE, TRUE); return result; } gboolean cib_ro_callback(IPC_Channel *channel, gpointer user_data) { gboolean result = FALSE; result = cib_common_callback(channel, user_data, FALSE, FALSE); return result; } gboolean cib_null_callback(IPC_Channel *channel, gpointer user_data) { gboolean keep_connection = TRUE; HA_Message *op_request = NULL; HA_Message *registered = NULL; cib_client_t *cib_client = user_data; cib_client_t *hash_client = NULL; const char *type = NULL; const char *uuid_ticket = NULL; const char *client_name = NULL; gboolean register_failed = FALSE; if(cib_client == NULL) { crm_err("Discarding IPC message from unknown source" " on callback channel."); return FALSE; } while(IPC_ISRCONN(channel)) { crm_msg_del(op_request); if(channel->ops->is_message_pending(channel) == 0) { break; } op_request = msgfromIPC_noauth(channel); if(op_request == NULL) { break; } type = cl_get_string(op_request, F_CIB_OPERATION); if(safe_str_eq(type, T_CIB_NOTIFY) ) { /* Update the notify filters for this client */ int on_off = 0; ha_msg_value_int( op_request, F_CIB_NOTIFY_ACTIVATE, &on_off); type = cl_get_string(op_request, F_CIB_NOTIFY_TYPE); crm_info("Setting %s callbacks for %s: %s", type, cib_client->name, on_off?"on":"off"); if(safe_str_eq(type, T_CIB_POST_NOTIFY)) { cib_client->post_notify = on_off; } else if(safe_str_eq(type, T_CIB_PRE_NOTIFY)) { cib_client->pre_notify = on_off; } else if(safe_str_eq(type, T_CIB_UPDATE_CONFIRM)) { cib_client->confirmations = on_off; } else if(safe_str_eq(type, T_CIB_DIFF_NOTIFY)) { cib_client->diffs = on_off; } else if(safe_str_eq(type, T_CIB_REPLACE_NOTIFY)) { cib_client->replace = on_off; } continue; } else if(safe_str_neq(type, CRM_OP_REGISTER) ) { crm_warn("Discarding IPC message from %s on callback channel", cib_client->id); continue; } uuid_ticket = cl_get_string(op_request, F_CIB_CALLBACK_TOKEN); client_name = cl_get_string(op_request, F_CIB_CLIENTNAME); CRM_DEV_ASSERT(uuid_ticket != NULL); if(crm_assert_failed) { register_failed = crm_assert_failed; } CRM_DEV_ASSERT(client_name != NULL); if(crm_assert_failed) { register_failed = crm_assert_failed; } if(register_failed == FALSE) { hash_client = g_hash_table_lookup(client_list, uuid_ticket); if(hash_client != NULL) { crm_err("Duplicate registration request..." " disconnecting"); register_failed = TRUE; } } if(register_failed) { crm_err("Registration request failed... disconnecting"); crm_msg_del(op_request); return FALSE; } CRM_CHECK(cib_client->id == NULL, crm_free(cib_client->id)); CRM_CHECK(cib_client->name == NULL, crm_free(cib_client->name)); cib_client->id = crm_strdup(uuid_ticket); cib_client->name = crm_strdup(client_name); g_hash_table_insert(client_list, cib_client->id, cib_client); crm_debug_2("Registered %s on %s channel", cib_client->id, cib_client->channel_name); if(safe_str_eq(cib_client->name, CRM_SYSTEM_TENGINE)) { /* The TE is _always_ interested in these * Enable now to avoid timing issues */ cib_client->diffs = TRUE; } registered = ha_msg_new(2); ha_msg_add(registered, F_CIB_OPERATION, CRM_OP_REGISTER); ha_msg_add(registered, F_CIB_CLIENTID, cib_client->id); send_ipc_message(channel, registered); crm_msg_del(registered); if(channel->ch_status == IPC_CONNECT) { break; } } crm_msg_del(op_request); if(channel->ch_status != IPC_CONNECT) { crm_debug_2("Client disconnected"); keep_connection = cib_process_disconnect(channel, cib_client); } return keep_connection; } void cib_common_callback_worker(HA_Message *op_request, cib_client_t *cib_client, gboolean force_synchronous, gboolean privileged); void cib_common_callback_worker(HA_Message *op_request, cib_client_t *cib_client, gboolean force_synchronous, gboolean privileged) { int rc = cib_ok; int call_type = 0; const char *op = NULL; longclock_t call_stop = 0; longclock_t call_start = 0; call_start = time_longclock(); cib_client->num_calls++; op = cl_get_string(op_request, F_CIB_OPERATION); rc = cib_get_operation_id(op_request, &call_type); if(rc != cib_ok) { crm_debug("Invalid operation %s from %s/%s", op, cib_client->name, cib_client->channel_name); } else { crm_debug_2("Processing %s operation from %s/%s", op, cib_client->name, cib_client->channel_name); } if(rc == cib_ok) { cib_process_request( op_request, force_synchronous, privileged, FALSE, cib_client); } call_stop = time_longclock(); cib_call_time += (call_stop - call_start); } gboolean cib_common_callback(IPC_Channel *channel, cib_client_t *cib_client, gboolean force_synchronous, gboolean privileged) { int lpc = 0; HA_Message *op_request = NULL; gboolean keep_channel = TRUE; if(cib_client == NULL) { crm_err("Receieved call from unknown source. Discarding."); return FALSE; } if(cib_client->name == NULL) { cib_client->name = crm_itoa(channel->farside_pid); } if(cib_client->id == NULL) { cib_client->id = crm_strdup(cib_client->name); g_hash_table_insert(client_list, cib_client->id, cib_client); } crm_debug_2("Callback for %s on %s channel", cib_client->id, cib_client->channel_name); while(IPC_ISRCONN(channel)) { if(channel->ops->is_message_pending(channel) == 0) { break; } op_request = msgfromIPC_noauth(channel); if (op_request == NULL) { perror("Receive failure:"); break; } lpc++; crm_assert_failed = FALSE; crm_log_message_adv(LOG_MSG, "Client[inbound]", op_request); ha_msg_add(op_request, F_CIB_CLIENTID, cib_client->id); ha_msg_add(op_request, F_CIB_CLIENTNAME, cib_client->name); cib_common_callback_worker( op_request, cib_client, force_synchronous, privileged); crm_msg_del(op_request); if(channel->ch_status == IPC_CONNECT) { break; } } crm_debug_2("Processed %d messages", lpc); if(channel->ch_status != IPC_CONNECT) { crm_debug_2("Client disconnected"); keep_channel = cib_process_disconnect(channel, cib_client); } return keep_channel; } extern void cib_send_remote_msg(void *session, HA_Message *msg); static void do_local_notify(HA_Message *notify_src, const char *client_id, gboolean sync_reply, gboolean from_peer) { /* send callback to originating child */ cib_client_t *client_obj = NULL; HA_Message *client_reply = NULL; enum cib_errors local_rc = cib_ok; crm_debug_2("Performing notification"); client_reply = cib_msg_copy(notify_src, TRUE); if(client_id != NULL) { client_obj = g_hash_table_lookup( client_list, client_id); } else { crm_debug_2("No client to sent the response to." " F_CIB_CLIENTID not set."); } crm_debug_3("Sending callback to request originator"); if(client_obj == NULL) { local_rc = cib_reply_failed; } else if (crm_str_eq(client_obj->channel_name, "remote", FALSE)) { crm_debug("Send message over TLS connection"); cib_send_remote_msg(client_obj->channel, client_reply); } else { const char *client_id = client_obj->callback_id; crm_debug_2("Sending %ssync response to %s %s", sync_reply?"":"an a-", client_obj->name, from_peer?"(originator of delegated request)":""); if(sync_reply) { client_id = client_obj->id; } local_rc = send_via_callback_channel(client_reply, client_id); } if(local_rc != cib_ok && client_obj != NULL) { crm_warn("%sSync reply to %s failed: %s", sync_reply?"":"A-", client_obj?client_obj->name:"", cib_error2string(local_rc)); } ha_msg_del(client_reply); } static void parse_local_options( cib_client_t *cib_client, int call_type, int call_options, const char *host, const char *op, gboolean *local_notify, gboolean *needs_reply, gboolean *process, gboolean *needs_forward) { if(cib_server_ops[call_type].modifies_cib && !(call_options & cib_inhibit_bcast)) { /* we need to send an update anyway */ *needs_reply = TRUE; } else { *needs_reply = FALSE; } if(host == NULL && (call_options & cib_scope_local)) { crm_debug_2("Processing locally scoped %s op from %s", op, cib_client->name); *local_notify = TRUE; } else if(host == NULL && cib_is_master) { crm_debug_2("Processing master %s op locally from %s", op, cib_client->name); *local_notify = TRUE; } else if(safe_str_eq(host, cib_our_uname)) { crm_debug_2("Processing locally addressed %s op from %s", op, cib_client->name); *local_notify = TRUE; } else if(stand_alone) { *needs_forward = FALSE; *local_notify = TRUE; *process = TRUE; } else { crm_debug_2("%s op from %s needs to be forwarded to %s", op, cib_client->name, host?host:"the master instance"); *needs_forward = TRUE; *process = FALSE; } } static gboolean parse_peer_options( int call_type, HA_Message *request, gboolean *local_notify, gboolean *needs_reply, gboolean *process, gboolean *needs_forward) { const char *op = cl_get_string(request, F_CIB_OPERATION); const char *originator = cl_get_string(request, F_ORIG); const char *host = cl_get_string(request, F_CIB_HOST); const char *reply_to = cl_get_string(request, F_CIB_ISREPLY); const char *update = cl_get_string(request, F_CIB_GLOBAL_UPDATE); const char *delegated = cl_get_string(request, F_CIB_DELEGATED); if(safe_str_eq(op, "cib_shutdown_req")) { if(reply_to != NULL) { crm_debug("Processing %s from %s", op, host); *needs_reply = FALSE; } else { crm_debug("Processing %s reply from %s", op, host); } return TRUE; } else if(crm_is_true(update) && safe_str_eq(reply_to, cib_our_uname)) { crm_debug_2("Processing global/peer update from %s" " that originated from us", originator); *needs_reply = FALSE; if(cl_get_string(request, F_CIB_CLIENTID) != NULL) { *local_notify = TRUE; } return TRUE; } else if(crm_is_true(update)) { crm_debug_2("Processing global/peer update from %s", originator); *needs_reply = FALSE; return TRUE; } else if(host != NULL && safe_str_eq(host, cib_our_uname)) { crm_debug_2("Processing request sent to us from %s", originator); return TRUE; } else if(delegated != NULL && cib_is_master == TRUE) { crm_debug_2("Processing request sent to master instance from %s", originator); return TRUE; } else if(reply_to != NULL && safe_str_eq(reply_to, cib_our_uname)) { crm_debug_2("Forward reply sent from %s to local clients", originator); *process = FALSE; *needs_reply = FALSE; *local_notify = TRUE; return TRUE; } else if(delegated != NULL) { crm_debug_2("Ignoring msg for master instance"); } else if(host != NULL) { /* this is for a specific instance and we're not it */ crm_debug_2("Ignoring msg for instance on %s", crm_str(host)); } else if(reply_to == NULL && cib_is_master == FALSE) { /* this is for the master instance and we're not it */ crm_debug_2("Ignoring reply to %s", crm_str(reply_to)); } else { crm_err("Nothing for us to do?"); crm_log_message_adv(LOG_ERR, "Peer[inbound]", request); } return FALSE; } static void forward_request(HA_Message *request, cib_client_t *cib_client, int call_options) { HA_Message *forward_msg = NULL; const char *op = cl_get_string(request, F_CIB_OPERATION); const char *host = cl_get_string(request, F_CIB_HOST); forward_msg = cib_msg_copy(request, TRUE); ha_msg_add(forward_msg, F_CIB_DELEGATED, cib_our_uname); if(host != NULL) { crm_debug_2("Forwarding %s op to %s", op, host); send_cluster_message(host, crm_msg_cib, forward_msg, FALSE); } else { crm_debug_2("Forwarding %s op to master instance", op); send_cluster_message(NULL, crm_msg_cib, forward_msg, FALSE); } if(call_options & cib_discard_reply) { crm_debug_2("Client not interested in reply"); } else if(call_options & cib_sync_call) { /* keep track of the request so we can time it * out if required */ crm_debug_2("Registering delegated call from %s", cib_client->id); cib_client->delegated_calls = g_list_append( cib_client->delegated_calls, forward_msg); forward_msg = NULL; } crm_msg_del(forward_msg); } static void send_peer_reply( HA_Message *msg, crm_data_t *result_diff, const char *originator, gboolean broadcast) { HA_Message *reply_copy = NULL; CRM_ASSERT(msg != NULL); reply_copy = cib_msg_copy(msg, TRUE); if(broadcast) { /* this (successful) call modified the CIB _and_ the * change needs to be broadcast... * send via HA to other nodes */ int diff_add_updates = 0; int diff_add_epoch = 0; int diff_add_admin_epoch = 0; int diff_del_updates = 0; int diff_del_epoch = 0; int diff_del_admin_epoch = 0; char *digest = NULL; cib_diff_version_details( result_diff, &diff_add_admin_epoch, &diff_add_epoch, &diff_add_updates, &diff_del_admin_epoch, &diff_del_epoch, &diff_del_updates); crm_debug("Sending update diff %d.%d.%d -> %d.%d.%d", diff_del_admin_epoch,diff_del_epoch,diff_del_updates, diff_add_admin_epoch,diff_add_epoch,diff_add_updates); ha_msg_add(reply_copy, F_CIB_ISREPLY, originator); ha_msg_add(reply_copy, F_CIB_GLOBAL_UPDATE, XML_BOOLEAN_TRUE); ha_msg_mod(reply_copy, F_CIB_OPERATION, CIB_OP_APPLY_DIFF); digest = calculate_xml_digest(the_cib, FALSE, TRUE); ha_msg_mod(result_diff, XML_ATTR_DIGEST, digest); /* crm_log_xml_debug(the_cib, digest); */ crm_free(digest); add_message_xml(reply_copy, F_CIB_UPDATE_DIFF, result_diff); crm_log_message(LOG_DEBUG_3, reply_copy); send_cluster_message(NULL, crm_msg_cib, reply_copy, TRUE); } else if(originator != NULL) { /* send reply via HA to originating node */ crm_debug_2("Sending request result to originator only"); ha_msg_add(reply_copy, F_CIB_ISREPLY, originator); send_cluster_message(originator, crm_msg_cib, reply_copy, FALSE); } crm_msg_del(reply_copy); } void cib_process_request( HA_Message *request, gboolean force_synchronous, gboolean privileged, gboolean from_peer, cib_client_t *cib_client) { int call_type = 0; int call_options = 0; gboolean process = TRUE; gboolean needs_reply = TRUE; gboolean local_notify = FALSE; gboolean needs_forward = FALSE; crm_data_t *result_diff = NULL; enum cib_errors rc = cib_ok; HA_Message *op_reply = NULL; const char *op = cl_get_string(request, F_CIB_OPERATION); const char *originator = cl_get_string(request, F_ORIG); const char *host = cl_get_string(request, F_CIB_HOST); const char *update = cl_get_string(request, F_CIB_GLOBAL_UPDATE); crm_debug_4("%s Processing msg %s", cib_our_uname, cl_get_string(request, F_SEQ)); cib_num_ops++; if(cib_num_ops == 0) { cib_num_fail = 0; cib_num_local = 0; cib_num_updates = 0; crm_info("Stats wrapped around"); } if(host != NULL && strlen(host) == 0) { host = NULL; } ha_msg_value_int(request, F_CIB_CALLOPTS, &call_options); crm_debug_4("Retrieved call options: %d", call_options); if(force_synchronous) { call_options |= cib_sync_call; } crm_debug_2("Processing %s message (%s) for %s...", from_peer?"peer":"local", from_peer?originator:cib_our_uname, host?host:"master"); rc = cib_get_operation_id(request, &call_type); if(cib_server_ops[call_type].modifies_cib) { cib_num_updates++; } if(rc != cib_ok) { /* TODO: construct error reply */ crm_err("Pre-processing of command failed: %s", cib_error2string(rc)); } else if(from_peer == FALSE) { parse_local_options(cib_client, call_type, call_options, host, op, &local_notify, &needs_reply, &process, &needs_forward); } else if(parse_peer_options(call_type, request, &local_notify, &needs_reply, &process, &needs_forward) == FALSE) { return; } crm_debug_3("Finished determining processing actions"); if(call_options & cib_discard_reply) { needs_reply = cib_server_ops[call_type].modifies_cib; local_notify = FALSE; } if(needs_forward) { forward_request(request, cib_client, call_options); return; } if(cib_status != cib_ok) { rc = cib_status; crm_err("Operation ignored, cluster configuration is invalid." " Please repair and restart: %s", cib_error2string(cib_status)); op_reply = cib_construct_reply(request, the_cib, cib_status); } else if(process) { cib_num_local++; crm_debug_2("Performing local processing:" " op=%s origin=%s/%s,%s (update=%s)", cl_get_string(request, F_CIB_OPERATION), originator, cl_get_string(request, F_CIB_CLIENTID), cl_get_string(request, F_CIB_CALLID), update); rc = cib_process_command( request, &op_reply, &result_diff, privileged); crm_debug_2("Processing complete"); if(rc == cib_diff_resync || rc == cib_diff_failed || rc == cib_old_data) { crm_warn("%s operation failed: %s", crm_str(op), cib_error2string(rc)); } else if(rc != cib_ok) { cib_num_fail++; crm_err("%s operation failed: %s", crm_str(op), cib_error2string(rc)); crm_log_message_adv(LOG_DEBUG, "CIB[output]", op_reply); crm_log_message_adv(LOG_INFO, "Input message", request); } if(op_reply == NULL && (needs_reply || local_notify)) { crm_err("Unexpected NULL reply to message"); crm_log_message(LOG_ERR, request); needs_reply = FALSE; local_notify = FALSE; } } crm_debug_3("processing response cases"); if(local_notify) { const char *client_id = cl_get_string(request, F_CIB_CLIENTID); if(process == FALSE) { do_local_notify(request, client_id, call_options & cib_sync_call, from_peer); } else { do_local_notify(op_reply, client_id, call_options & cib_sync_call, from_peer); } } /* from now on we are the server */ if(needs_reply == FALSE || stand_alone) { /* nothing more to do... * this was a non-originating slave update */ crm_debug_2("Completed slave update"); } else if(rc == cib_ok && result_diff != NULL && !(call_options & cib_inhibit_bcast)) { send_peer_reply(request, result_diff, originator, TRUE); } else if(call_options & cib_discard_reply) { crm_debug_4("Caller isn't interested in reply"); } else if (from_peer) { crm_debug_2("Directing reply to %s", originator); if(call_options & cib_inhibit_bcast) { crm_debug_3("Request not broadcast: inhibited"); } if(cib_server_ops[call_type].modifies_cib == FALSE || result_diff == NULL) { crm_debug_3("Request not broadcast: R/O call"); } if(rc != cib_ok) { crm_debug_3("Request not broadcast: call failed: %s", cib_error2string(rc)); } send_peer_reply(op_reply, result_diff, originator, FALSE); } crm_msg_del(op_reply); free_xml(result_diff); return; } HA_Message * cib_construct_reply(HA_Message *request, HA_Message *output, int rc) { int lpc = 0; HA_Message *reply = NULL; const char *name = NULL; const char *value = NULL; const char *names[] = { F_CIB_OPERATION, F_CIB_CALLID, F_CIB_CLIENTID, F_CIB_CALLOPTS }; crm_debug_4("Creating a basic reply"); reply = ha_msg_new(8); ha_msg_add(reply, F_TYPE, T_CIB); for(lpc = 0; lpc < DIMOF(names); lpc++) { name = names[lpc]; value = cl_get_string(request, name); ha_msg_add(reply, name, value); } ha_msg_add_int(reply, F_CIB_RC, rc); if(output != NULL) { crm_debug_4("Attaching reply output"); add_message_xml(reply, F_CIB_CALLDATA, output); } return reply; } enum cib_errors cib_process_command(HA_Message *request, HA_Message **reply, crm_data_t **cib_diff, gboolean privileged) { crm_data_t *output = NULL; crm_data_t *input = NULL; crm_data_t *current_cib = NULL; crm_data_t *result_cib = NULL; int call_type = 0; int call_options = 0; enum cib_errors rc = cib_ok; enum cib_errors rc2 = cib_ok; int log_level = LOG_DEBUG_3; crm_data_t *filtered = NULL; const char *op = NULL; const char *section = NULL; gboolean config_changed = FALSE; gboolean global_update = crm_is_true( cl_get_string(request, F_CIB_GLOBAL_UPDATE)); CRM_ASSERT(cib_status == cib_ok); *reply = NULL; *cib_diff = NULL; if(per_action_cib) { CRM_CHECK(the_cib == NULL, free_xml(the_cib)); the_cib = readCibXmlFile(cib_root, "cib.xml", FALSE); CRM_CHECK(the_cib != NULL, return cib_NOOBJECT); } current_cib = the_cib; /* Start processing the request... */ op = cl_get_string(request, F_CIB_OPERATION); ha_msg_value_int(request, F_CIB_CALLOPTS, &call_options); rc = cib_get_operation_id(request, &call_type); if(rc == cib_ok && cib_server_ops[call_type].needs_privileges && privileged == FALSE) { /* abort */ rc = cib_not_authorized; } if(rc == cib_ok && stand_alone == FALSE && global_update == FALSE && cib_server_ops[call_type].needs_quorum && can_write(call_options) == FALSE) { rc = cib_no_quorum; } /* prevent NUMUPDATES from being incrimented - apply the change as-is */ if(global_update) { call_options |= cib_inhibit_bcast; call_options |= cib_force_diff; } rc2 = cib_server_ops[call_type].prepare(request, &input, §ion); if(rc == cib_ok) { rc = rc2; } if(rc != cib_ok) { crm_debug_2("Call setup failed: %s", cib_error2string(rc)); goto done; } else if(cib_server_ops[call_type].modifies_cib == FALSE) { rc = cib_server_ops[call_type].fn( op, call_options, section, input, current_cib, &result_cib, &output); CRM_CHECK(result_cib == NULL, free_xml(result_cib)); goto done; } /* Handle a valid write action */ if((call_options & cib_inhibit_notify) == 0) { cib_pre_notify(call_options, op, get_object_root(section, current_cib), input); } if(rc == cib_ok) { result_cib = copy_xml(current_cib); rc = cib_server_ops[call_type].fn( op, call_options, section, input, current_cib, &result_cib, &output); } if(rc == cib_ok) { CRM_DEV_ASSERT(result_cib != NULL); CRM_DEV_ASSERT(current_cib != result_cib); update_counters(__FILE__, __FUNCTION__, result_cib); config_changed = cib_config_changed(current_cib, result_cib, &filtered); if(global_update) { /* skip */ CRM_CHECK(call_type == 4 || call_type == 11, crm_err("Call type: %d", call_type); crm_log_message(LOG_ERR, request)); crm_debug_2("Skipping update: global replace"); } else if(cib_server_ops[call_type].fn == cib_process_change && (call_options & cib_inhibit_bcast)) { /* skip */ crm_debug_2("Skipping update: inhibit broadcast"); } else { cib_update_counter(result_cib, XML_ATTR_NUMUPDATES, FALSE); if(config_changed) { cib_update_counter(result_cib, XML_ATTR_NUMUPDATES, TRUE); cib_update_counter(result_cib, XML_ATTR_GENERATION, FALSE); } } if(do_id_check(result_cib, NULL, TRUE, FALSE)) { rc = cib_id_check; if(call_options & cib_force_diff) { crm_err("Global update introduces id collision!"); } } else { *cib_diff = diff_cib_object(current_cib, result_cib, FALSE); } } if(rc != cib_ok) { free_xml(result_cib); } else { rc = activateCibXml(result_cib, config_changed); if(rc != cib_ok) { crm_warn("Activation failed"); } } if((call_options & cib_inhibit_notify) == 0) { const char *call_id = cl_get_string(request, F_CIB_CALLID); const char *client = cl_get_string(request, F_CIB_CLIENTNAME); cib_post_notify(call_options, op, input, rc, the_cib); cib_diff_notify(call_options, client, call_id, op, input, rc, *cib_diff); } if(rc == cib_dtd_validation && global_update) { log_level = LOG_WARNING; crm_log_xml_info(input, "cib:global_update"); } else if(rc != cib_ok) { log_level = LOG_DEBUG_4; } else if(cib_is_master && config_changed) { log_level = LOG_INFO; } else if(cib_is_master) { log_level = LOG_DEBUG; log_xml_diff(LOG_DEBUG_2, filtered, "cib:diff:filtered"); } else if(config_changed) { log_level = LOG_DEBUG_2; } else { log_level = LOG_DEBUG_3; } log_xml_diff(log_level, *cib_diff, "cib:diff"); free_xml(filtered); done: if((call_options & cib_discard_reply) == 0) { *reply = cib_construct_reply(request, output, rc); } if(call_type >= 0) { cib_server_ops[call_type].cleanup(op, &input, &output); } if(per_action_cib) { uninitializeCib(); } return rc; } int send_via_callback_channel(HA_Message *msg, const char *token) { cib_client_t *hash_client = NULL; GList *list_item = NULL; enum cib_errors rc = cib_ok; crm_debug_3("Delivering msg %p to client %s", msg, token); if(token == NULL) { crm_err("No client id token, cant send message"); if(rc == cib_ok) { rc = cib_missing; } } else { /* A client that left before we could reply is not really * _our_ error. Warn instead. */ hash_client = g_hash_table_lookup(client_list, token); if(hash_client == NULL) { crm_warn("Cannot find client for token %s", token); rc = cib_client_gone; } else if(hash_client->channel == NULL) { crm_err("Cannot find channel for client %s", token); rc = cib_client_corrupt; } else if(hash_client->channel->ops->get_chan_status( hash_client->channel) == IPC_DISCONNECT) { crm_warn("Client %s has disconnected", token); rc = cib_client_gone; cib_num_timeouts++; } } /* this is a more important error so overwriting rc is warrented */ if(msg == NULL) { crm_err("No message to send"); rc = cib_reply_failed; } if(rc == cib_ok) { list_item = g_list_find_custom( hash_client->delegated_calls, msg, cib_GCompareFunc); } if(list_item != NULL) { /* remove it - no need to time it out */ HA_Message *orig_msg = list_item->data; crm_debug_3("Removing msg from delegated list"); hash_client->delegated_calls = g_list_remove( hash_client->delegated_calls, orig_msg); CRM_DEV_ASSERT(orig_msg != msg); crm_msg_del(orig_msg); } if(rc == cib_ok) { crm_debug_3("Delivering reply to client %s", token); if(send_ipc_message(hash_client->channel, msg) == FALSE) { crm_warn("Delivery of reply to client %s/%s failed", hash_client->name, token); rc = cib_reply_failed; } } return rc; } gint cib_GCompareFunc(gconstpointer a, gconstpointer b) { const HA_Message *a_msg = a; const HA_Message *b_msg = b; int msg_a_id = 0; int msg_b_id = 0; ha_msg_value_int(a_msg, F_CIB_CALLID, &msg_a_id); ha_msg_value_int(b_msg, F_CIB_CALLID, &msg_b_id); if(msg_a_id == msg_b_id) { return 0; } else if(msg_a_id < msg_b_id) { return -1; } return 1; } gboolean cib_msg_timeout(gpointer data) { crm_debug_4("Checking if any clients have timed out messages"); /* g_hash_table_foreach(client_list, cib_GHFunc, NULL); */ return TRUE; } void cib_GHFunc(gpointer key, gpointer value, gpointer user_data) { int timeout = 0; /* 1 iteration == 10 seconds */ HA_Message *msg = NULL; HA_Message *reply = NULL; const char *host_to = NULL; cib_client_t *client = value; GListPtr list = client->delegated_calls; while(list != NULL) { msg = list->data; ha_msg_value_int(msg, F_CIB_TIMEOUT, &timeout); if(timeout <= 0) { list = list->next; continue; } else { int seen = 0; ha_msg_value_int(msg, F_CIB_SEENCOUNT, &seen); crm_debug_4("Timeout %d, seen %d", timeout, seen); if(seen < timeout) { crm_debug_4("Updating seen count for msg from client %s", client->id); seen += 10; ha_msg_mod_int(msg, F_CIB_SEENCOUNT, seen); list = list->next; continue; } } cib_num_timeouts++; host_to = cl_get_string(msg, F_CIB_HOST); crm_warn("Sending operation timeout msg to client %s", client->id); reply = ha_msg_new(4); ha_msg_add(reply, F_TYPE, T_CIB); ha_msg_add(reply, F_CIB_OPERATION, cl_get_string(msg, F_CIB_OPERATION)); ha_msg_add(reply, F_CIB_CALLID, cl_get_string(msg, F_CIB_CALLID)); if(host_to == NULL) { ha_msg_add_int(reply, F_CIB_RC, cib_master_timeout); } else { ha_msg_add_int(reply, F_CIB_RC, cib_remote_timeout); } send_ipc_message(client->channel, reply); list = list->next; client->delegated_calls = g_list_remove( client->delegated_calls, msg); crm_msg_del(msg); crm_msg_del(reply); } } gboolean cib_process_disconnect(IPC_Channel *channel, cib_client_t *cib_client) { if (channel == NULL) { CRM_DEV_ASSERT(cib_client == NULL); } else if (cib_client == NULL) { crm_err("No client"); } else { CRM_DEV_ASSERT(channel->ch_status != IPC_CONNECT); crm_debug_2("Cleaning up after client disconnect: %s/%s/%s", crm_str(cib_client->name), cib_client->channel_name, cib_client->id); if(cib_client->id != NULL) { if(!g_hash_table_remove(client_list, cib_client->id)) { crm_err("Client %s not found in the hashtable", cib_client->name); } } } if(cib_shutdown_flag && g_hash_table_size(client_list) == 0) { crm_info("All clients disconnected..."); initiate_exit(); } return FALSE; } void cib_peer_callback(HA_Message * msg, void* private_data) { int call_type = 0; int call_options = 0; const char *originator = cl_get_string(msg, F_ORIG); const char *seq = cl_get_string(msg, F_SEQ); const char *op = cl_get_string(msg, F_CIB_OPERATION); crm_node_t *node = NULL; crm_log_message_adv(LOG_MSG, "Peer[inbound]", msg); crm_debug_2("Peer %s message (%s) from %s", op, seq, originator); if(originator == NULL || safe_str_eq(originator, cib_our_uname)) { crm_debug_2("Discarding %s message %s from ourselves", op, seq); return; } if(crm_peer_cache == NULL) { crm_info("Discarding %s message (%s) from %s:" " membership not established", op, seq, originator); return; } node = g_hash_table_lookup(crm_peer_cache, originator); if(node == NULL || crm_is_member_active(node) == FALSE) { crm_warn("Discarding %s message (%s) from %s:" " not in our membership", op, seq, originator); return; } if(cib_get_operation_id(msg, &call_type) != cib_ok) { crm_debug("Discarding %s message (%s) from %s:" " Invalid operation", op, seq, originator); return; } crm_debug_2("Processing %s msg (%s) from %s",op, seq, originator); ha_msg_value_int(msg, F_CIB_CALLOPTS, &call_options); crm_debug_4("Retrieved call options: %d", call_options); if(cl_get_string(msg, F_CIB_CLIENTNAME) == NULL) { ha_msg_add(msg, F_CIB_CLIENTNAME, originator); } cib_process_request(msg, FALSE, TRUE, TRUE, NULL); return; } HA_Message * cib_msg_copy(HA_Message *msg, gboolean with_data) { int lpc = 0; const char *field = NULL; const char *value = NULL; HA_Message *value_struct = NULL; static const char *field_list[] = { F_XML_TAGNAME , F_TYPE , F_CIB_CLIENTID , F_CIB_CALLOPTS , F_CIB_CALLID , F_CIB_OPERATION , F_CIB_ISREPLY , F_CIB_SECTION , F_CIB_HOST , F_CIB_RC , F_CIB_DELEGATED , F_CIB_OBJID , F_CIB_OBJTYPE , F_CIB_EXISTING , F_CIB_SEENCOUNT , F_CIB_TIMEOUT , F_CIB_CALLBACK_TOKEN , F_CIB_GLOBAL_UPDATE , F_CIB_CLIENTNAME , F_CIB_NOTIFY_TYPE , F_CIB_NOTIFY_ACTIVATE }; static const char *data_list[] = { F_CIB_CALLDATA , F_CIB_UPDATE , F_CIB_UPDATE_RESULT }; HA_Message *copy = ha_msg_new(10); CRM_ASSERT(copy != NULL); for(lpc = 0; lpc < DIMOF(field_list); lpc++) { field = field_list[lpc]; value = cl_get_string(msg, field); if(value != NULL) { ha_msg_add(copy, field, value); } } for(lpc = 0; with_data && lpc < DIMOF(data_list); lpc++) { field = data_list[lpc]; value_struct = get_message_xml(msg, field); if(value_struct != NULL) { add_message_xml(copy, field, value_struct); } free_xml(value_struct); } return copy; } enum cib_errors cib_get_operation_id(const HA_Message * msg, int *operation) { int lpc = 0; int max_msg_types = DIMOF(cib_server_ops); const char *op = cl_get_string(msg, F_CIB_OPERATION); for (lpc = 0; lpc < max_msg_types; lpc++) { if (safe_str_eq(op, cib_server_ops[lpc].operation)) { *operation = lpc; return cib_ok; } } crm_err("Operation %s is not valid", op); *operation = -1; return cib_operation; } void cib_client_status_callback(const char * node, const char * client, const char * status, void * private) { crm_node_t *member = NULL; if(safe_str_eq(client, CRM_SYSTEM_CIB)) { crm_info("Status update: Client %s/%s now has status [%s]", node, client, status); if(safe_str_eq(status, JOINSTATUS)){ status = ONLINESTATUS; } else if(safe_str_eq(status, LEAVESTATUS)){ status = OFFLINESTATUS; } member = g_hash_table_lookup(crm_peer_cache, node); if(member == NULL) { /* Make sure it gets created */ const char *uuid = get_uuid(node); - member = crm_update_peer(0, 0, -1, -1, uuid, node, NULL, NULL); + member = crm_update_peer(0, 0, -1, 0, uuid, node, NULL, NULL); } - crm_update_peer_proc(node, crm_proc_crmd, status); + crm_update_peer_proc(node, crm_proc_cib, status); set_connected_peers(the_cib); } return; } #if SUPPORT_HEARTBEAT extern oc_ev_t *cib_ev_token; gboolean cib_ccm_dispatch(int fd, gpointer user_data) { int rc = 0; oc_ev_t *ccm_token = (oc_ev_t*)user_data; crm_debug_2("received callback"); rc = oc_ev_handle_event(ccm_token); if(0 == rc) { return TRUE; } crm_err("CCM connection appears to have failed: rc=%d.", rc); /* eventually it might be nice to recover and reconnect... but until then... */ crm_err("Exiting to recover from CCM connection failure"); exit(2); return FALSE; } int current_instance = 0; void cib_ccm_msg_callback( oc_ed_t event, void *cookie, size_t size, const void *data) { gboolean update_id = FALSE; const oc_ev_membership_t *membership = data; CRM_ASSERT(membership != NULL); crm_info("Processing CCM event=%s (id=%d)", ccm_event_name(event), membership->m_instance); if(current_instance > membership->m_instance) { crm_err("Membership instance ID went backwards! %d->%d", current_instance, membership->m_instance); CRM_ASSERT(current_instance <= membership->m_instance); } switch(event) { case OC_EV_MS_NEW_MEMBERSHIP: case OC_EV_MS_INVALID: update_id = TRUE; break; case OC_EV_MS_PRIMARY_RESTORED: update_id = TRUE; break; case OC_EV_MS_NOT_PRIMARY: crm_debug_2("Ignoring transitional CCM event: %s", ccm_event_name(event)); break; case OC_EV_MS_EVICTED: crm_err("Evicted from CCM: %s", ccm_event_name(event)); break; default: crm_err("Unknown CCM event: %d", event); } if(update_id) { unsigned int lpc = 0; CRM_CHECK(membership != NULL, return); current_instance = membership->m_instance; for(lpc=0; lpc < membership->m_n_out; lpc++) { crm_update_ccm_node( membership, lpc+membership->m_out_idx, CRM_NODE_LOST); } for(lpc=0; lpc < membership->m_n_member; lpc++) { crm_update_ccm_node( membership, lpc+membership->m_memb_idx,CRM_NODE_ACTIVE); } } oc_ev_callback_done(cookie); set_connected_peers(the_cib); return; } #endif gboolean can_write(int flags) { return TRUE; } static gboolean cib_force_exit(gpointer data) { crm_notice("Forcing exit!"); terminate_cib(__FUNCTION__); return FALSE; } void initiate_exit(void) { int active = 0; HA_Message *leaving = NULL; active = crm_active_peers(crm_proc_cib); if(active < 2) { terminate_cib(__FUNCTION__); return; } crm_info("Sending disconnect notification to %d peers...", active); leaving = ha_msg_new(3); ha_msg_add(leaving, F_TYPE, "cib"); ha_msg_add(leaving, F_CIB_OPERATION, "cib_shutdown_req"); send_cluster_message(NULL, crm_msg_cib, leaving, TRUE); crm_msg_del(leaving); Gmain_timeout_add(crm_get_msec("5s"), cib_force_exit, NULL); } void terminate_cib(const char *caller) { #if SUPPORT_AIS if(is_openais_cluster()) { cib_ha_connection_destroy(NULL); return; } #endif #if SUPPORT_HEARTBEAT if(hb_conn != NULL) { crm_info("%s: Disconnecting heartbeat", caller); hb_conn->llc_ops->signoff(hb_conn, FALSE); } else { crm_err("%s: No heartbeat connection", caller); } #endif uninitializeCib(); crm_info("Exiting..."); if (mainloop != NULL && g_main_is_running(mainloop)) { g_main_quit(mainloop); } else { exit(LSB_EXIT_OK); } } diff --git a/crmd/callbacks.c b/crmd/callbacks.c index a9494f4068..cc1637ac03 100644 --- a/crmd/callbacks.c +++ b/crmd/callbacks.c @@ -1,645 +1,645 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include crm_data_t *find_xml_in_hamessage(const HA_Message * msg); void crmd_ha_connection_destroy(gpointer user_data); void crmd_ha_msg_filter(HA_Message *msg); /* From join_dc... */ extern gboolean check_join_state( enum crmd_fsa_state cur_state, const char *source); /* #define MAX_EMPTY_CALLBACKS 20 */ /* int empty_callbacks = 0; */ #define trigger_fsa(source) crm_debug_3("Triggering FSA: %s", __FUNCTION__); \ G_main_set_trigger(source); #if SUPPORT_HEARTBEAT gboolean crmd_ha_msg_dispatch(ll_cluster_t *cluster_conn, gpointer user_data) { IPC_Channel *channel = NULL; gboolean stay_connected = TRUE; crm_debug_3("Invoked"); if(cluster_conn != NULL) { channel = cluster_conn->llc_ops->ipcchan(cluster_conn); } CRM_CHECK(cluster_conn != NULL, ;); CRM_CHECK(channel != NULL, ;); if(channel != NULL && IPC_ISRCONN(channel)) { if(cluster_conn->llc_ops->msgready(cluster_conn) == 0) { crm_debug_2("no message ready yet"); } /* invoke the callbacks but dont block */ cluster_conn->llc_ops->rcvmsg(cluster_conn, 0); } if (channel == NULL || channel->ch_status != IPC_CONNECT) { if(is_set(fsa_input_register, R_HA_DISCONNECTED) == FALSE) { crm_crit("Lost connection to heartbeat service."); } else { crm_info("Lost connection to heartbeat service."); } trigger_fsa(fsa_source); stay_connected = FALSE; } return stay_connected; } #endif void crmd_ha_connection_destroy(gpointer user_data) { crm_debug_3("Invoked"); if(is_set(fsa_input_register, R_HA_DISCONNECTED)) { /* we signed out, so this is expected */ crm_info("Heartbeat disconnection complete"); return; } crm_crit("Lost connection to heartbeat service!"); register_fsa_input(C_HA_DISCONNECT, I_ERROR, NULL); trigger_fsa(fsa_source); } void crmd_ha_msg_filter(HA_Message *msg) { ha_msg_input_t *new_input = NULL; const char *from = ha_msg_value(msg, F_ORIG); const char *seq = ha_msg_value(msg, F_SEQ); const char *op = ha_msg_value(msg, F_CRM_TASK); const char *sys_to = ha_msg_value(msg, F_CRM_SYS_TO); const char *sys_from = ha_msg_value(msg, F_CRM_SYS_FROM); if(safe_str_eq(sys_to, CRM_SYSTEM_DC) && AM_I_DC == FALSE) { crm_debug_2("Ignoring message for the DC [F_SEQ=%s]", seq); return; } else if(safe_str_eq(sys_from, CRM_SYSTEM_DC)) { if(AM_I_DC && safe_str_neq(from, fsa_our_uname)) { crm_err("Another DC detected: %s (op=%s)", from, op); /* make sure the election happens NOW */ if(fsa_state != S_ELECTION) { new_input = new_ha_msg_input(msg); register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION, NULL, new_input, __FUNCTION__); } } else { crm_debug_2("Processing DC message from %s [F_SEQ=%s]", from, seq); } } if(new_input == NULL) { crm_log_message_adv(LOG_MSG, "HA[inbound]", msg); new_input = new_ha_msg_input(msg); route_message(C_HA_MESSAGE, new_input); } delete_ha_msg_input(new_input); trigger_fsa(fsa_source); } #if SUPPORT_HEARTBEAT void crmd_ha_msg_callback(HA_Message * msg, void* private_data) { int level = LOG_DEBUG; oc_node_t *from_node = NULL; const char *from = ha_msg_value(msg, F_ORIG); const char *op = ha_msg_value(msg, F_CRM_TASK); const char *sys_from = ha_msg_value(msg, F_CRM_SYS_FROM); CRM_DEV_ASSERT(from != NULL); crm_debug_2("HA[inbound]: %s from %s", op, from); if(crm_peer_cache == NULL) { crm_debug("Ignoring HA messages until we are" " connected to the CCM (%s op from %s)", op, from); crm_log_message_adv( LOG_MSG, "HA[inbound]: Ignore (No CCM)", msg); return; } from_node = g_hash_table_lookup(crm_peer_cache, from); if(from_node == NULL) { if(safe_str_eq(op, CRM_OP_VOTE)) { level = LOG_WARNING; } else if(AM_I_DC && safe_str_eq(op, CRM_OP_JOIN_ANNOUNCE)) { level = LOG_WARNING; } else if(safe_str_eq(sys_from, CRM_SYSTEM_DC)) { level = LOG_WARNING; } do_crm_log(level, "Ignoring HA message (op=%s) from %s: not in our" " membership list (size=%d)", op, from, crm_active_members()); crm_log_message_adv(LOG_MSG, "HA[inbound]: CCM Discard", msg); } else { crmd_ha_msg_filter(msg); } return; } #endif /* * Apparently returning TRUE means "stay connected, keep doing stuff". * Returning FALSE means "we're all done, close the connection" */ gboolean crmd_ipc_msg_callback(IPC_Channel *client, gpointer user_data) { int lpc = 0; HA_Message *msg = NULL; ha_msg_input_t *new_input = NULL; crmd_client_t *curr_client = (crmd_client_t*)user_data; gboolean stay_connected = TRUE; crm_debug_2("Invoked: %s", curr_client->table_key); while(IPC_ISRCONN(client)) { if(client->ops->is_message_pending(client) == 0) { break; } msg = msgfromIPC_noauth(client); if (msg == NULL) { crm_info("%s: no message this time", curr_client->table_key); continue; } lpc++; new_input = new_ha_msg_input(msg); crm_msg_del(msg); crm_debug_2("Processing msg from %s", curr_client->table_key); crm_log_message_adv(LOG_DEBUG_2, "CRMd[inbound]", new_input->msg); if(crmd_authorize_message(new_input, curr_client)) { route_message(C_IPC_MESSAGE, new_input); } delete_ha_msg_input(new_input); new_input = NULL; msg = NULL; if(client->ch_status != IPC_CONNECT) { break; } } crm_debug_2("Processed %d messages", lpc); if (client->ch_status != IPC_CONNECT) { stay_connected = FALSE; process_client_disconnect(curr_client); } trigger_fsa(fsa_source); return stay_connected; } extern GCHSource *lrm_source; gboolean lrm_dispatch(IPC_Channel *src_not_used, gpointer user_data) { /* ?? src == lrm_channel ?? */ ll_lrm_t *lrm = (ll_lrm_t*)user_data; IPC_Channel *lrm_channel = lrm->lrm_ops->ipcchan(lrm); crm_debug_3("Invoked"); lrm->lrm_ops->rcvmsg(lrm, FALSE); if(lrm_channel->ch_status != IPC_CONNECT) { if(is_set(fsa_input_register, R_LRM_CONNECTED)) { crm_crit("LRM Connection failed"); register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL); clear_bit_inplace(fsa_input_register, R_LRM_CONNECTED); } else { crm_info("LRM Connection disconnected"); } lrm_source = NULL; return FALSE; } return TRUE; } extern gboolean process_lrm_event(lrm_op_t *op); void lrm_op_callback(lrm_op_t* op) { CRM_CHECK(op != NULL, return); process_lrm_event(op); } void crmd_ha_status_callback(const char *node, const char *status, void *private) { crm_data_t *update = NULL; crm_node_t *member = NULL; crm_notice("Status update: Node %s now has status [%s]",node,status); member = g_hash_table_lookup(crm_peer_cache, node); if(member == NULL) { /* Make sure it is created so crm_update_peer_proc() succeeds */ const char *uuid = get_uuid(node); - member = crm_update_peer(0, 0, -1, -1, uuid, node, NULL, NULL); + member = crm_update_peer(0, 0, -1, 0, uuid, node, NULL, NULL); } if(safe_str_eq(status, DEADSTATUS)) { /* this node is taost */ crm_update_peer_proc(node, crm_proc_ais, OFFLINESTATUS); update = create_node_state( node, status, XML_BOOLEAN_NO, OFFLINESTATUS, CRMD_STATE_INACTIVE, NULL, TRUE, __FUNCTION__); } else if(safe_str_eq(status, ACTIVESTATUS)) { crm_update_peer_proc(node, crm_proc_ais, ONLINESTATUS); update = create_node_state( node, status, NULL, NULL, NULL, NULL, FALSE, __FUNCTION__); } if(update != NULL) { /* this change should not be broadcast */ fsa_cib_anon_update( XML_CIB_TAG_STATUS, update, cib_inhibit_bcast|cib_scope_local|cib_quorum_override); trigger_fsa(fsa_source); free_xml(update); } } void crmd_client_status_callback(const char * node, const char * client, const char * status, void * private) { const char *join = NULL; crm_node_t *member = NULL; crm_data_t *update = NULL; gboolean clear_shutdown = FALSE; crm_debug_3("Invoked"); if(safe_str_neq(client, CRM_SYSTEM_CRMD)) { return; } if(safe_str_eq(status, JOINSTATUS)){ status = ONLINESTATUS; clear_shutdown = TRUE; } else if(safe_str_eq(status, LEAVESTATUS)){ status = OFFLINESTATUS; join = CRMD_STATE_INACTIVE; /* clear_shutdown = TRUE; */ } set_bit_inplace(fsa_input_register, R_PEER_DATA); crm_notice("Status update: Client %s/%s now has status [%s]", node, client, status); if(safe_str_eq(status, ONLINESTATUS)) { /* remove the cached value in case it changed */ crm_debug_2("Uncaching UUID for %s", node); unget_uuid(node); } member = g_hash_table_lookup(crm_peer_cache, node); if(member == NULL) { /* Make sure it is created so crm_update_peer_proc() succeeds */ const char *uuid = get_uuid(node); - member = crm_update_peer(0, 0, -1, -1, uuid, node, NULL, NULL); + member = crm_update_peer(0, 0, -1, 0, uuid, node, NULL, NULL); } crm_update_peer_proc(node, crm_proc_crmd, status); if(is_set(fsa_input_register, R_CIB_CONNECTED) == FALSE) { return; } else if(fsa_state == S_STOPPING) { return; } if(safe_str_eq(node, fsa_our_dc) && safe_str_eq(status, OFFLINESTATUS)){ /* did our DC leave us */ crm_info("Got client status callback - our DC is dead"); register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ELECTION, NULL); } else { crm_debug_3("Got client status callback"); update = create_node_state(node, NULL, NULL, status, join, NULL, clear_shutdown, __FUNCTION__); if(safe_str_eq(status, ONLINESTATUS)){ crm_xml_add(update, XML_CIB_ATTR_REPLACE, XML_CIB_TAG_LRM",,"XML_TAG_TRANSIENT_NODEATTRS","); } /* it is safe to keep these updates on the local node * each node updates their own CIB */ fsa_cib_anon_update( XML_CIB_TAG_STATUS, update, cib_inhibit_bcast|cib_scope_local|cib_quorum_override); free_xml(update); if(AM_I_DC && safe_str_eq(status, OFFLINESTATUS)) { erase_node_from_join(node); check_join_state(fsa_state, __FUNCTION__); } } trigger_fsa(fsa_source); } void crmd_ipc_connection_destroy(gpointer user_data) { GCHSource *source = NULL; crmd_client_t *client = user_data; /* Calling this function on an _active_ connection results in: * crmd_ipc_connection_destroy (callbacks.c:431) * -> G_main_del_IPC_Channel (GSource.c:478) * -> g_source_unref * -> G_CH_destroy_int (GSource.c:647) * -> crmd_ipc_connection_destroy (callbacks.c:437)\ * * A better alternative is to call G_main_del_IPC_Channel() directly */ if(client == NULL) { crm_debug_4("No client to delete"); return; } crm_debug_2("Disconnecting client %s (%p)", client->table_key, client); source = client->client_source; client->client_source = NULL; if(source != NULL) { crm_debug_3("Deleting %s (%p) from mainloop", client->table_key, source); G_main_del_IPC_Channel(source); } crm_free(client->table_key); crm_free(client->sub_sys); crm_free(client->uuid); crm_free(client); return; } gboolean crmd_client_connect(IPC_Channel *client_channel, gpointer user_data) { crm_debug_3("Invoked"); if (client_channel == NULL) { crm_err("Channel was NULL"); } else if (client_channel->ch_status == IPC_DISCONNECT) { crm_err("Channel was disconnected"); } else { crmd_client_t *blank_client = NULL; crm_debug_3("Channel connected"); crm_malloc0(blank_client, sizeof(crmd_client_t)); CRM_ASSERT(blank_client != NULL); crm_debug_2("Created client: %p", blank_client); client_channel->ops->set_recv_qlen(client_channel, 1024); client_channel->ops->set_send_qlen(client_channel, 1024); blank_client->client_channel = client_channel; blank_client->sub_sys = NULL; blank_client->uuid = NULL; blank_client->table_key = NULL; blank_client->client_source = G_main_add_IPC_Channel( G_PRIORITY_LOW, client_channel, FALSE, crmd_ipc_msg_callback, blank_client, crmd_ipc_connection_destroy); } return TRUE; } #if SUPPORT_HEARTBEAT static gboolean fsa_have_quorum = FALSE; gboolean ccm_dispatch(int fd, gpointer user_data) { int rc = 0; oc_ev_t *ccm_token = (oc_ev_t*)user_data; gboolean was_error = FALSE; crm_debug_3("Invoked"); rc = oc_ev_handle_event(ccm_token); if(rc != 0) { if(is_set(fsa_input_register, R_CCM_DISCONNECTED) == FALSE) { /* we signed out, so this is expected */ register_fsa_input(C_CCM_CALLBACK, I_ERROR, NULL); crm_err("CCM connection appears to have failed: rc=%d.", rc); } was_error = TRUE; } trigger_fsa(fsa_source); return !was_error; } void crmd_ccm_msg_callback( oc_ed_t event, void *cookie, size_t size, const void *data) { gboolean update_cache = FALSE; const oc_ev_membership_t *membership = data; gboolean update_quorum = FALSE; gboolean trigger_transition = FALSE; crm_debug_3("Invoked"); CRM_ASSERT(data != NULL); crm_info("Quorum %s after event=%s (id=%d)", ccm_have_quorum(event)?"(re)attained":"lost", ccm_event_name(event), membership->m_instance); if(crm_peer_seq > membership->m_instance) { crm_err("Membership instance ID went backwards! %llu->%d", crm_peer_seq, membership->m_instance); CRM_ASSERT(crm_peer_seq <= membership->m_instance); return; } /* * OC_EV_MS_NEW_MEMBERSHIP: membership with quorum * OC_EV_MS_MS_INVALID: membership without quorum * OC_EV_MS_NOT_PRIMARY: previous membership no longer valid * OC_EV_MS_PRIMARY_RESTORED: previous membership restored * OC_EV_MS_EVICTED: the client is evicted from ccm. */ switch(event) { case OC_EV_MS_NEW_MEMBERSHIP: case OC_EV_MS_INVALID: update_cache = TRUE; update_quorum = TRUE; break; case OC_EV_MS_NOT_PRIMARY: break; case OC_EV_MS_PRIMARY_RESTORED: update_cache = TRUE; crm_peer_seq = membership->m_instance; if(AM_I_DC && need_transition(fsa_state)) { trigger_transition = TRUE; } break; case OC_EV_MS_EVICTED: update_quorum = TRUE; register_fsa_input(C_FSA_INTERNAL, I_STOP, NULL); crm_err("Shutting down after CCM event: %s", ccm_event_name(event)); break; default: crm_err("Unknown CCM event: %d", event); } if(update_quorum) { crm_have_quorum = ccm_have_quorum(event); crm_update_quorum(crm_have_quorum); if(crm_have_quorum == FALSE) { /* did we just loose quorum? */ if(fsa_have_quorum && need_transition(fsa_state)) { crm_info("Quorum lost: triggering transition (%s)", ccm_event_name(event)); trigger_transition = TRUE; } } } if(update_cache) { crm_debug_2("Updating cache after event %s", ccm_event_name(event)); do_ccm_update_cache(C_CCM_CALLBACK, fsa_state, event, data, NULL); } else if(event != OC_EV_MS_NOT_PRIMARY) { crm_peer_seq = membership->m_instance; register_fsa_action(A_TE_CANCEL); } oc_ev_callback_done(cookie); return; } #endif void crmd_cib_connection_destroy(gpointer user_data) { crm_debug_3("Invoked"); trigger_fsa(fsa_source); if(is_set(fsa_input_register, R_CIB_CONNECTED) == FALSE) { crm_info("Connection to the CIB terminated..."); return; } /* eventually this will trigger a reconnect, not a shutdown */ crm_err("Connection to the CIB terminated..."); register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL); clear_bit_inplace(fsa_input_register, R_CIB_CONNECTED); return; } longclock_t fsa_start = 0; longclock_t fsa_stop = 0; longclock_t fsa_diff = 0; gboolean crm_fsa_trigger(gpointer user_data) { unsigned int fsa_diff_ms = 0; if(fsa_diff_max_ms > 0) { fsa_start = time_longclock(); } crm_debug_2("Invoked (queue len: %d)", g_list_length(fsa_message_queue)); s_crmd_fsa(C_FSA_INTERNAL); crm_debug_2("Exited (queue len: %d)", g_list_length(fsa_message_queue)); if(fsa_diff_max_ms > 0) { fsa_stop = time_longclock(); fsa_diff = sub_longclock(fsa_stop, fsa_start); fsa_diff_ms = longclockto_ms(fsa_diff); if(fsa_diff_ms > fsa_diff_max_ms) { crm_err("FSA took %dms to complete", fsa_diff_ms); } else if(fsa_diff_ms > fsa_diff_warn_ms) { crm_warn("FSA took %dms to complete", fsa_diff_ms); } } return TRUE; } diff --git a/crmd/ccm.c b/crmd/ccm.c index b34d098148..be6e84b25e 100644 --- a/crmd/ccm.c +++ b/crmd/ccm.c @@ -1,468 +1,468 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* put these first so that uuid_t is defined without conflicts */ #include #if SUPPORT_HEARTBEAT #include #include #endif #include #include #include #include #include #include #include #include #include #include #include #include void post_cache_update(int instance); #if SUPPORT_HEARTBEAT oc_ev_t *fsa_ev_token; void oc_ev_special(const oc_ev_t *, oc_ev_class_t , int ); void crmd_ccm_msg_callback( oc_ed_t event, void *cookie, size_t size, const void *data); #endif void ghash_update_cib_node(gpointer key, gpointer value, gpointer user_data); void check_dead_member(const char *uname, GHashTable *members); void reap_dead_ccm_nodes(gpointer key, gpointer value, gpointer user_data); #define CCM_EVENT_DETAIL 0 #define CCM_EVENT_DETAIL_PARTIAL 0 int num_ccm_register_fails = 0; int max_ccm_register_fails = 30; int last_peer_update = 0; extern GHashTable *voted; struct update_data_s { const char *state; const char *caller; crm_data_t *updates; gboolean overwrite_join; }; void reap_dead_ccm_nodes(gpointer key, gpointer value, gpointer user_data) { crm_node_t *node = value; if(crm_is_member_active(node) == FALSE) { check_dead_member(node->uname, NULL); } } void check_dead_member(const char *uname, GHashTable *members) { CRM_CHECK(uname != NULL, return); if(members != NULL && g_hash_table_lookup(members, uname) != NULL) { crm_err("%s didnt really leave the membership!", uname); return; } erase_node_from_join(uname); if(voted != NULL) { g_hash_table_remove(voted, uname); } if(safe_str_eq(fsa_our_uname, uname)) { crm_err("We're not part of the cluster anymore"); } if(AM_I_DC == FALSE && safe_str_eq(uname, fsa_our_dc)) { crm_warn("Our DC node (%s) left the cluster", uname); register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL); } } /* A_CCM_CONNECT */ void do_ccm_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { #if SUPPORT_HEARTBEAT if(is_heartbeat_cluster()) { if(action & A_CCM_DISCONNECT){ set_bit_inplace(fsa_input_register, R_CCM_DISCONNECTED); oc_ev_unregister(fsa_ev_token); } if(action & A_CCM_CONNECT) { int ret; int fsa_ev_fd; gboolean did_fail = FALSE; crm_peer_init(); crm_debug_3("Registering with CCM"); clear_bit_inplace(fsa_input_register, R_CCM_DISCONNECTED); ret = oc_ev_register(&fsa_ev_token); if (ret != 0) { crm_warn("CCM registration failed"); did_fail = TRUE; } if(did_fail == FALSE) { crm_debug_3("Setting up CCM callbacks"); ret = oc_ev_set_callback(fsa_ev_token, OC_EV_MEMB_CLASS, crmd_ccm_msg_callback, NULL); if (ret != 0) { crm_warn("CCM callback not set"); did_fail = TRUE; } } if(did_fail == FALSE) { oc_ev_special(fsa_ev_token, OC_EV_MEMB_CLASS, 0/*don't care*/); crm_debug_3("Activating CCM token"); ret = oc_ev_activate(fsa_ev_token, &fsa_ev_fd); if (ret != 0){ crm_warn("CCM Activation failed"); did_fail = TRUE; } } if(did_fail) { num_ccm_register_fails++; oc_ev_unregister(fsa_ev_token); if(num_ccm_register_fails < max_ccm_register_fails) { crm_warn("CCM Connection failed" " %d times (%d max)", num_ccm_register_fails, max_ccm_register_fails); crm_timer_start(wait_timer); crmd_fsa_stall(NULL); return; } else { crm_err("CCM Activation failed %d (max) times", num_ccm_register_fails); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); return; } } crm_info("CCM connection established..." " waiting for first callback"); G_main_add_fd(G_PRIORITY_HIGH, fsa_ev_fd, FALSE, ccm_dispatch, fsa_ev_token, default_ipc_connection_destroy); } } #endif if(action & ~(A_CCM_CONNECT|A_CCM_DISCONNECT)) { crm_err("Unexpected action %s in %s", fsa_action2string(action), __FUNCTION__); } } #if SUPPORT_HEARTBEAT void ccm_event_detail(const oc_ev_membership_t *oc, oc_ed_t event) { int lpc; gboolean member = FALSE; member = FALSE; crm_debug_2("-----------------------"); crm_info("%s: trans=%d, nodes=%d, new=%d, lost=%d n_idx=%d, " "new_idx=%d, old_idx=%d", ccm_event_name(event), oc->m_instance, oc->m_n_member, oc->m_n_in, oc->m_n_out, oc->m_memb_idx, oc->m_in_idx, oc->m_out_idx); #if !CCM_EVENT_DETAIL_PARTIAL for(lpc=0; lpc < oc->m_n_member; lpc++) { crm_info("\tCURRENT: %s [nodeid=%d, born=%d]", oc->m_array[oc->m_memb_idx+lpc].node_uname, oc->m_array[oc->m_memb_idx+lpc].node_id, oc->m_array[oc->m_memb_idx+lpc].node_born_on); if(safe_str_eq(fsa_our_uname, oc->m_array[oc->m_memb_idx+lpc].node_uname)) { member = TRUE; } } if (member == FALSE) { crm_warn("MY NODE IS NOT IN CCM THE MEMBERSHIP LIST"); } #endif for(lpc=0; lpc<(int)oc->m_n_in; lpc++) { crm_info("\tNEW: %s [nodeid=%d, born=%d]", oc->m_array[oc->m_in_idx+lpc].node_uname, oc->m_array[oc->m_in_idx+lpc].node_id, oc->m_array[oc->m_in_idx+lpc].node_born_on); } for(lpc=0; lpc<(int)oc->m_n_out; lpc++) { crm_info("\tLOST: %s [nodeid=%d, born=%d]", oc->m_array[oc->m_out_idx+lpc].node_uname, oc->m_array[oc->m_out_idx+lpc].node_id, oc->m_array[oc->m_out_idx+lpc].node_born_on); } crm_debug_2("-----------------------"); } #endif void post_cache_update(int instance) { HA_Message *no_op = NULL; if(is_openais_cluster()) { } crm_peer_seq = instance; crm_debug("Updated cache after membership event %d.", instance); if((fsa_input_register & R_CCM_DATA) == 0) { populate_cib_nodes(FALSE); } g_hash_table_foreach(crm_peer_cache, reap_dead_ccm_nodes, NULL); set_bit_inplace(fsa_input_register, R_CCM_DATA); do_update_cib_nodes(FALSE, __FUNCTION__); /* Membership changed, remind everyone we're here. * This will aid detection of duplicate DCs */ no_op = create_request( CRM_OP_NOOP, NULL, NULL, CRM_SYSTEM_CRMD, AM_I_DC?CRM_SYSTEM_DC:CRM_SYSTEM_CRMD, NULL); send_msg_via_ha(no_op); } /* A_CCM_UPDATE_CACHE */ /* * Take the opportunity to update the node status in the CIB as well */ #if SUPPORT_HEARTBEAT void do_ccm_update_cache( enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, oc_ed_t event, const oc_ev_membership_t *oc, crm_data_t *xml) { unsigned long long instance = 0; unsigned int lpc = 0; if(is_heartbeat_cluster()) { CRM_ASSERT(oc != NULL); instance = oc->m_instance; } CRM_ASSERT(crm_peer_seq <= instance); switch(cur_state) { case S_STOPPING: case S_TERMINATE: case S_HALT: crm_debug("Ignoring %s CCM event %llu, we're in state %s", ccm_event_name(event), instance, fsa_state2string(cur_state)); return; case S_ELECTION: register_fsa_action(A_ELECTION_CHECK); break; default: break; } if(is_heartbeat_cluster()) { ccm_event_detail(oc, event); /*--*-- Recently Dead Member Nodes --*--*/ for(lpc=0; lpc < oc->m_n_out; lpc++) { crm_update_ccm_node(oc, lpc+oc->m_out_idx, CRM_NODE_LOST); } /*--*-- All Member Nodes --*--*/ for(lpc=0; lpc < oc->m_n_member; lpc++) { crm_update_ccm_node(oc, lpc+oc->m_memb_idx, CRM_NODE_ACTIVE); } } if(event == OC_EV_MS_EVICTED) { crm_update_peer( - 0, 0, -1, -1, + 0, 0, -1, 0, fsa_our_uuid, fsa_our_uname, NULL, CRM_NODE_EVICTED); /* todo: drop back to S_PENDING instead */ /* get out... NOW! * * go via the error recovery process so that HA will * restart us if required */ register_fsa_error_adv(cause, I_ERROR, NULL, NULL, __FUNCTION__); } post_cache_update(instance); return; } #endif static void ccm_node_update_complete(const HA_Message *msg, int call_id, int rc, crm_data_t *output, void *user_data) { fsa_data_t *msg_data = NULL; last_peer_update = 0; if(rc == cib_ok) { crm_debug("Node update %d complete", call_id); } else { crm_err("Node update %d failed", call_id); crm_log_message(LOG_DEBUG, msg); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } void ghash_update_cib_node(gpointer key, gpointer value, gpointer user_data) { crm_data_t *tmp1 = NULL; const char *join = NULL; crm_node_t *node = value; struct update_data_s* data = (struct update_data_s*)user_data; data->state = XML_BOOLEAN_NO; if(safe_str_eq(node->state, CRM_NODE_ACTIVE)) { data->state = XML_BOOLEAN_YES; } crm_debug_2("Updating %s: %s (overwrite=%s)", node->uname, data->state, data->overwrite_join?"true":"false"); if(data->overwrite_join) { if((node->processes & crm_proc_crmd) == FALSE) { join = CRMD_JOINSTATE_DOWN; } else { const char *peer_member = g_hash_table_lookup( confirmed_nodes, node->uname); if(peer_member != NULL) { join = CRMD_JOINSTATE_MEMBER; } else { join = CRMD_JOINSTATE_PENDING; } } } tmp1 = create_node_state( node->uname, NULL, data->state, (node->processes&crm_proc_crmd)?ONLINESTATUS:OFFLINESTATUS, join, NULL, FALSE, data->caller); add_node_copy(data->updates, tmp1); free_xml(tmp1); } void do_update_cib_nodes(gboolean overwrite, const char *caller) { int call_id = 0; int call_options = cib_scope_local|cib_quorum_override; struct update_data_s update_data; crm_data_t *fragment = NULL; if(crm_peer_cache == NULL) { /* We got a replace notification before being connected to * the CCM. * So there is no need to update the local CIB with our values * - since we have none. */ return; } fragment = create_xml_node(NULL, XML_CIB_TAG_STATUS); update_data.caller = caller; update_data.updates = fragment; update_data.overwrite_join = overwrite; if(overwrite == FALSE) { call_options = call_options|cib_inhibit_bcast; crm_debug_2("Inhibiting bcast for membership updates"); } g_hash_table_foreach(crm_peer_cache, ghash_update_cib_node, &update_data); fsa_cib_update(XML_CIB_TAG_STATUS, fragment, call_options, call_id); add_cib_op_callback(call_id, FALSE, NULL, ccm_node_update_complete); last_peer_update = call_id; free_xml(fragment); } static void cib_quorum_update_complete( const HA_Message *msg, int call_id, int rc, crm_data_t *output, void *user_data) { fsa_data_t *msg_data = NULL; if(rc == cib_ok) { crm_debug("Quorum update %d complete", call_id); } else { crm_err("Quorum update %d failed", call_id); crm_log_message(LOG_DEBUG, msg); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } void crm_update_quorum(gboolean bool) { int call_id = 0; crm_data_t *update = NULL; int call_options = cib_scope_local|cib_quorum_override; fsa_has_quorum = bool; update = create_xml_node(NULL, XML_TAG_CIB); crm_xml_add_int(update, XML_ATTR_HAVE_QUORUM, fsa_has_quorum); fsa_cib_update(XML_TAG_CIB, update, call_options, call_id); crm_info("Updating quorum status to %s (call=%d)", bool?"true":"false", call_id); add_cib_op_callback(call_id, FALSE, NULL, cib_quorum_update_complete); free_xml(update); } diff --git a/crmd/control.c b/crmd/control.c index 705663c164..639141e2db 100644 --- a/crmd/control.c +++ b/crmd/control.c @@ -1,973 +1,974 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include char *ipc_server = NULL; extern void post_cache_update(int seq); extern void crmd_ha_connection_destroy(gpointer user_data); gboolean crm_shutdown(int nsig, gpointer unused); gboolean fsa_has_quorum = FALSE; GHashTable *ipc_clients = NULL; GTRIGSource *fsa_source = NULL; /* A_HA_CONNECT */ #if SUPPORT_AIS extern void crmd_ha_msg_filter(HA_Message * msg); static gboolean crm_ais_dispatch(AIS_Message *wrapper, char *data, int sender) { int seq = 0; const char *seq_s = NULL; crm_data_t *xml = string2xml(data); if(xml == NULL) { crm_err("Message received: %d:'%.120s'", wrapper->id, data); return TRUE; } crm_debug_2("Message received: %d:'%.120s'", wrapper->id, data); ha_msg_add(xml, F_ORIG, wrapper->sender.uname); ha_msg_add_int(xml, F_SEQ, wrapper->id); switch(wrapper->header.id) { case crm_class_notify: break; case crm_class_members: seq_s = crm_element_value(xml, "seq"); CRM_ASSERT(xml != NULL); seq = crm_int_helper(seq_s, NULL); set_bit_inplace(fsa_input_register, R_PEER_DATA); post_cache_update(seq); crm_update_quorum(crm_have_quorum); break; default: crmd_ha_msg_filter(xml); break; } free_xml(xml); return TRUE; } static void crm_ais_destroy(gpointer user_data) { crm_err("AIS connection terminated"); ais_fd_sync = -1; exit(1); } #endif void do_ha_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { gboolean registered = FALSE; if(action & A_HA_DISCONNECT) { if(is_openais_cluster()) { crm_peer_destroy(); crm_info("Disconnected from OpenAIS"); #if SUPPORT_HEARTBEAT } else if(fsa_cluster_conn != NULL) { set_bit_inplace(fsa_input_register, R_HA_DISCONNECTED); fsa_cluster_conn->llc_ops->signoff(fsa_cluster_conn, FALSE); crm_info("Disconnected from Heartbeat"); #endif } } if(action & A_HA_CONNECT) { void *dispatch = NULL; void *destroy = NULL; if(is_openais_cluster()) { #if SUPPORT_AIS destroy = crm_ais_destroy; dispatch = crm_ais_dispatch; #endif } else if(is_heartbeat_cluster()) { #if SUPPORT_HEARTBEAT dispatch = crmd_ha_msg_callback; destroy = crmd_ha_connection_destroy; #endif } registered = crm_cluster_connect( &fsa_our_uname, &fsa_our_uuid, dispatch, destroy, #if SUPPORT_HEARTBEAT &fsa_cluster_conn #else NULL #endif ); #if SUPPORT_HEARTBEAT if(is_heartbeat_cluster()) { crm_debug_3("Be informed of Node Status changes"); if (registered && fsa_cluster_conn->llc_ops->set_nstatus_callback( fsa_cluster_conn, crmd_ha_status_callback, fsa_cluster_conn) != HA_OK){ crm_err("Cannot set nstatus callback: %s", fsa_cluster_conn->llc_ops->errmsg(fsa_cluster_conn)); registered = FALSE; } crm_debug_3("Be informed of CRM Client Status changes"); if (registered && fsa_cluster_conn->llc_ops->set_cstatus_callback( fsa_cluster_conn, crmd_client_status_callback, fsa_cluster_conn) != HA_OK) { crm_err("Cannot set cstatus callback: %s", fsa_cluster_conn->llc_ops->errmsg(fsa_cluster_conn)); registered = FALSE; } + populate_cib_nodes(TRUE); } #endif if(registered == FALSE) { set_bit_inplace(fsa_input_register, R_HA_DISCONNECTED); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); return; } clear_bit_inplace(fsa_input_register, R_HA_DISCONNECTED); crm_info("Connected to Heartbeat"); } if(action & ~(A_HA_CONNECT|A_HA_DISCONNECT)) { crm_err("Unexpected action %s in %s", fsa_action2string(action), __FUNCTION__); } } /* A_SHUTDOWN */ void do_shutdown(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { int lpc = 0; gboolean continue_shutdown = TRUE; struct crm_subsystem_s *subsystems[] = { pe_subsystem, te_subsystem }; /* just in case */ set_bit_inplace(fsa_input_register, R_SHUTDOWN); for(lpc = 0; lpc < DIMOF(subsystems); lpc++) { struct crm_subsystem_s *a_subsystem = subsystems[lpc]; if(is_set(fsa_input_register, a_subsystem->flag_connected)) { crm_info("Terminating the %s", a_subsystem->name); if(stop_subsystem(a_subsystem, TRUE) == FALSE) { /* its gone... */ crm_err("Faking %s exit", a_subsystem->name); clear_bit_inplace(fsa_input_register, a_subsystem->flag_connected); } continue_shutdown = FALSE; } } if(continue_shutdown == FALSE) { crm_info("Waiting for subsystems to exit"); crmd_fsa_stall(NULL); } crm_info("All subsystems stopped, continuing"); } /* A_SHUTDOWN_REQ */ void do_shutdown_req(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { HA_Message *msg = NULL; crm_info("Sending shutdown request to DC: %s", crm_str(fsa_our_dc)); msg = create_request( CRM_OP_SHUTDOWN_REQ, NULL, NULL, CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL); /* set_bit_inplace(fsa_input_register, R_STAYDOWN); */ if(send_request(msg, NULL) == FALSE) { if(AM_I_DC) { crm_info("Processing shutdown locally"); } else { register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } } extern char *max_generation_from; extern crm_data_t *max_generation_xml; extern GHashTable *meta_hash; extern GHashTable *resources; extern GHashTable *voted; void log_connected_client(gpointer key, gpointer value, gpointer user_data); void log_connected_client(gpointer key, gpointer value, gpointer user_data) { crmd_client_t *client = value; crm_err("%s is still connected at exit", client->table_key); } static void free_mem(fsa_data_t *msg_data) { #if SUPPORT_HEARTBEAT if(fsa_cluster_conn) { fsa_cluster_conn->llc_ops->delete(fsa_cluster_conn); fsa_cluster_conn = NULL; } #endif slist_destroy(fsa_data_t, fsa_data, fsa_message_queue, crm_info("Dropping %s: [ state=%s cause=%s origin=%s ]", fsa_input2string(fsa_data->fsa_input), fsa_state2string(fsa_state), fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin); delete_fsa_input(fsa_data); ); delete_fsa_input(msg_data); if(ipc_clients) { crm_debug("Number of connected clients: %d", g_hash_table_size(ipc_clients)); /* g_hash_table_foreach(ipc_clients, log_connected_client, NULL); */ g_hash_table_destroy(ipc_clients); } empty_uuid_cache(); crm_peer_destroy(); clear_bit_inplace(fsa_input_register, R_CCM_DATA); if(te_subsystem->client && te_subsystem->client->client_source) { crm_debug("Full destroy: TE"); G_main_del_IPC_Channel(te_subsystem->client->client_source); } else { crm_debug("Partial destroy: TE"); crmd_ipc_connection_destroy(te_subsystem->client); } crm_free(te_subsystem); if(pe_subsystem->client && pe_subsystem->client->client_source) { crm_debug("Full destroy: PE"); G_main_del_IPC_Channel(pe_subsystem->client->client_source); } else { crm_debug("Partial destroy: PE"); crmd_ipc_connection_destroy(pe_subsystem->client); } crm_free(pe_subsystem); crm_free(cib_subsystem); if(integrated_nodes) { g_hash_table_destroy(integrated_nodes); } if(finalized_nodes) { g_hash_table_destroy(finalized_nodes); } if(confirmed_nodes) { g_hash_table_destroy(confirmed_nodes); } if(meta_hash) { g_hash_table_destroy(meta_hash); } if(resources) { g_hash_table_destroy(resources); } if(voted) { g_hash_table_destroy(voted); } cib_delete(fsa_cib_conn); fsa_cib_conn = NULL; if(fsa_lrm_conn) { fsa_lrm_conn->lrm_ops->delete(fsa_lrm_conn); } crm_free(integration_timer); crm_free(finalization_timer); crm_free(election_trigger); crm_free(election_timeout); crm_free(shutdown_escalation_timer); crm_free(wait_timer); crm_free(recheck_timer); crm_free(fsa_our_dc_version); crm_free(fsa_our_uuid); crm_free(fsa_our_dc); crm_free(ipc_server); crm_free(max_generation_from); free_xml(max_generation_xml); } /* A_EXIT_0, A_EXIT_1 */ void do_exit(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { int exit_code = 0; int log_level = LOG_INFO; const char *exit_type = "gracefully"; if(action & A_EXIT_1) { exit_code = 1; log_level = LOG_ERR; exit_type = "forcefully"; } verify_stopped(cur_state, LOG_ERR); do_crm_log(log_level, "Performing %s - %s exiting the CRMd", fsa_action2string(action), exit_type); if(is_set(fsa_input_register, R_IN_RECOVERY)) { crm_err("Could not recover from internal error"); exit_code = 2; } if(is_set(fsa_input_register, R_STAYDOWN)) { crm_warn("Inhibiting respawn by Heartbeat"); exit_code = 100; } free_mem(msg_data); crm_info("[%s] stopped (%d)", crm_system_name, exit_code); cl_flush_logs(); exit(exit_code); } /* A_STARTUP */ void do_startup(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { int was_error = 0; int interval = 1; /* seconds between DC heartbeats */ crm_debug("Registering Signal Handlers"); G_main_add_SignalHandler( G_PRIORITY_HIGH, SIGTERM, crm_shutdown, NULL, NULL); fsa_source = G_main_add_TriggerHandler( G_PRIORITY_HIGH, crm_fsa_trigger, NULL, NULL); ipc_clients = g_hash_table_new(g_str_hash, g_str_equal); crm_debug("Creating CIB and LRM objects"); fsa_cib_conn = cib_new(); fsa_lrm_conn = ll_lrm_new(XML_CIB_TAG_LRM); crm_debug("Init server comms"); if(ipc_server == NULL) { ipc_server = crm_strdup(CRM_SYSTEM_CRMD); } was_error = init_server_ipc_comms(ipc_server, crmd_client_connect, default_ipc_connection_destroy); /* set up the timers */ crm_malloc0(integration_timer, sizeof(fsa_timer_t)); crm_malloc0(finalization_timer, sizeof(fsa_timer_t)); crm_malloc0(election_trigger, sizeof(fsa_timer_t)); crm_malloc0(election_timeout, sizeof(fsa_timer_t)); crm_malloc0(shutdown_escalation_timer, sizeof(fsa_timer_t)); crm_malloc0(wait_timer, sizeof(fsa_timer_t)); crm_malloc0(recheck_timer, sizeof(fsa_timer_t)); interval = interval * 1000; if(election_trigger != NULL) { election_trigger->source_id = 0; election_trigger->period_ms = -1; election_trigger->fsa_input = I_DC_TIMEOUT; election_trigger->callback = crm_timer_popped; election_trigger->repeat = FALSE; } else { was_error = TRUE; } if(election_timeout != NULL) { election_timeout->source_id = 0; election_timeout->period_ms = -1; election_timeout->fsa_input = I_ELECTION_DC; election_timeout->callback = crm_timer_popped; election_timeout->repeat = FALSE; } else { was_error = TRUE; } if(integration_timer != NULL) { integration_timer->source_id = 0; integration_timer->period_ms = -1; integration_timer->fsa_input = I_INTEGRATED; integration_timer->callback = crm_timer_popped; integration_timer->repeat = FALSE; } else { was_error = TRUE; } if(finalization_timer != NULL) { finalization_timer->source_id = 0; finalization_timer->period_ms = -1; finalization_timer->fsa_input = I_FINALIZED; finalization_timer->callback = crm_timer_popped; finalization_timer->repeat = FALSE; /* for possible enabling... a bug in the join protocol left * a slave in S_PENDING while we think its in S_NOT_DC * * raising I_FINALIZED put us into a transition loop which is * never resolved. * in this loop we continually send probes which the node * NACK's because its in S_PENDING * * if we have nodes where heartbeat is active but the * CRM is not... then this will be handled in the * integration phase */ finalization_timer->fsa_input = I_ELECTION; } else { was_error = TRUE; } if(shutdown_escalation_timer != NULL) { shutdown_escalation_timer->source_id = 0; shutdown_escalation_timer->period_ms = -1; shutdown_escalation_timer->fsa_input = I_STOP; shutdown_escalation_timer->callback = crm_timer_popped; shutdown_escalation_timer->repeat = FALSE; } else { was_error = TRUE; } if(wait_timer != NULL) { wait_timer->source_id = 0; wait_timer->period_ms = 2000; wait_timer->fsa_input = I_NULL; wait_timer->callback = crm_timer_popped; wait_timer->repeat = FALSE; } else { was_error = TRUE; } if(recheck_timer != NULL) { recheck_timer->source_id = 0; recheck_timer->period_ms = -1; recheck_timer->fsa_input = I_PE_CALC; recheck_timer->callback = crm_timer_popped; recheck_timer->repeat = FALSE; } else { was_error = TRUE; } /* set up the sub systems */ crm_malloc0(cib_subsystem, sizeof(struct crm_subsystem_s)); crm_malloc0(te_subsystem, sizeof(struct crm_subsystem_s)); crm_malloc0(pe_subsystem, sizeof(struct crm_subsystem_s)); if(cib_subsystem != NULL) { cib_subsystem->pid = -1; cib_subsystem->path = BIN_DIR; cib_subsystem->name = CRM_SYSTEM_CIB; cib_subsystem->command = BIN_DIR"/"CRM_SYSTEM_CIB; cib_subsystem->args = "-VVc"; cib_subsystem->flag_connected = R_CIB_CONNECTED; cib_subsystem->flag_required = R_CIB_REQUIRED; } else { was_error = TRUE; } if(te_subsystem != NULL) { te_subsystem->pid = -1; te_subsystem->path = BIN_DIR; te_subsystem->name = CRM_SYSTEM_TENGINE; te_subsystem->command = BIN_DIR"/"CRM_SYSTEM_TENGINE; te_subsystem->args = NULL; te_subsystem->flag_connected = R_TE_CONNECTED; te_subsystem->flag_required = R_TE_REQUIRED; } else { was_error = TRUE; } if(pe_subsystem != NULL) { pe_subsystem->pid = -1; pe_subsystem->path = BIN_DIR; pe_subsystem->name = CRM_SYSTEM_PENGINE; pe_subsystem->command = BIN_DIR"/"CRM_SYSTEM_PENGINE; pe_subsystem->args = NULL; pe_subsystem->flag_connected = R_PE_CONNECTED; pe_subsystem->flag_required = R_PE_REQUIRED; } else { was_error = TRUE; } if(was_error) { register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } welcomed_nodes = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); integrated_nodes = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); finalized_nodes = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); confirmed_nodes = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); set_sigchld_proctrack(G_PRIORITY_HIGH,DEFAULT_MAXDISPATCHTIME); } /* A_STOP */ void do_stop(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL); } /* A_STARTED */ void do_started(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { if(cur_state != S_STARTING) { crm_err("Start cancelled... %s", fsa_state2string(cur_state)); return; } else if(is_set(fsa_input_register, R_CCM_DATA) == FALSE) { crm_info("Delaying start, CCM (%.16llx) not connected", R_CCM_DATA); crmd_fsa_stall(NULL); return; } else if(is_set(fsa_input_register, R_LRM_CONNECTED) == FALSE) { crm_info("Delaying start, LRM (%.16llx) not connected", R_LRM_CONNECTED); crmd_fsa_stall(NULL); return; } else if(is_set(fsa_input_register, R_CIB_CONNECTED) == FALSE) { crm_info("Delaying start, CIB (%.16llx) not connected", R_CIB_CONNECTED); crmd_fsa_stall(NULL); return; } else if(is_set(fsa_input_register, R_READ_CONFIG) == FALSE) { crm_info("Delaying start, Config not read (%.16llx)", R_READ_CONFIG); crmd_fsa_stall(NULL); return; } else if(is_set(fsa_input_register, R_PEER_DATA) == FALSE) { HA_Message *msg = NULL; /* try reading from HA */ crm_info("Delaying start, Peer data (%.16llx) not recieved", R_PEER_DATA); crm_debug_3("Looking for a HA message"); #if SUPPORT_HEARTBEAT if(is_heartbeat_cluster()) { msg = fsa_cluster_conn->llc_ops->readmsg(fsa_cluster_conn, 0); } #endif if(msg != NULL) { crm_debug_3("There was a HA message"); crm_msg_del(msg); } /* this should no longer be required */ /* crm_timer_start(wait_timer); */ crmd_fsa_stall(NULL); return; } crm_info("The local CRM is operational"); clear_bit_inplace(fsa_input_register, R_STARTING); register_fsa_input(msg_data->fsa_cause, I_PENDING, NULL); } /* A_RECOVER */ void do_recover(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { set_bit_inplace(fsa_input_register, R_IN_RECOVERY); crm_err("Action %s (%.16llx) not supported", fsa_action2string(action), action); register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL); } pe_cluster_option crmd_opts[] = { /* name, old-name, validate, default, description */ { XML_CONFIG_ATTR_DC_DEADTIME, NULL, "time", NULL, "10s", &check_time, "How long to wait for a response from other nodes during startup.", "The \"correct\" value will depend on the speed and load of your network." }, { XML_CONFIG_ATTR_RECHECK, NULL, "time", "Zero disables polling. Positive values are an interval in seconds (unless other SI units are specified. eg. 5min)", "0", &check_timer, "Polling interval for time based changes to options, resource parameters and constraints.", "The Cluster is primarily event driven, however the configuration can have elements that change based on time. To ensure these changes take effect, we can optionally poll the cluster's status for changes." }, { XML_CONFIG_ATTR_ELECTION_FAIL, NULL, "time", NULL, "2min", &check_timer, "*** Advanced Use Only ***.", "If need to adjust this value, it probably indicates the presence of a bug." }, { XML_CONFIG_ATTR_FORCE_QUIT, NULL, "time", NULL, "20min", &check_timer, "*** Advanced Use Only ***.", "If need to adjust this value, it probably indicates the presence of a bug." }, { "crmd-integration-timeout", NULL, "time", NULL, "3min", &check_timer, "*** Advanced Use Only ***.", "If need to adjust this value, it probably indicates the presence of a bug." }, { "crmd-finalization-timeout", NULL, "time", NULL, "30min", &check_timer, "*** Advanced Use Only ***.", "If you need to adjust this value, it probably indicates the presence of a bug." }, }; void crmd_metadata(void) { config_metadata("CRM Daemon", "1.0", "CRM Daemon Options", "This is a fake resource that details the options that can be configured for the CRM Daemon.", crmd_opts, DIMOF(crmd_opts)); } static void verify_crmd_options(GHashTable *options) { verify_all_options(options, crmd_opts, DIMOF(crmd_opts)); } static const char * crmd_pref(GHashTable *options, const char *name) { return get_cluster_pref(options, crmd_opts, DIMOF(crmd_opts), name); } static void config_query_callback(const HA_Message *msg, int call_id, int rc, crm_data_t *output, void *user_data) { const char *value = NULL; GHashTable *config_hash = NULL; if(rc != cib_ok) { fsa_data_t *msg_data = NULL; crm_err("Local CIB query resulted in an error: %s", cib_error2string(rc)); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); if(rc == cib_bad_permissions || rc == cib_bad_digest || rc == cib_bad_config) { crm_err("The cluster is mis-configured - shutting down and staying down"); set_bit_inplace(fsa_input_register, R_STAYDOWN); } return; } crm_debug("Call %d : Parsing CIB options", call_id); config_hash = g_hash_table_new_full( g_str_hash,g_str_equal, g_hash_destroy_str,g_hash_destroy_str); unpack_instance_attributes( output, XML_CIB_TAG_PROPSET, NULL, config_hash, CIB_OPTIONS_FIRST, NULL); value = g_hash_table_lookup(config_hash, XML_CONFIG_ATTR_DC_DEADTIME); if(value == NULL) { /* apparently we're not allowed to free the result of getenv */ char *param_val = getenv(ENV_PREFIX "initdead"); value = crmd_pref(config_hash, XML_CONFIG_ATTR_DC_DEADTIME); if(param_val != NULL) { int from_env = crm_get_msec(param_val) / 2; int from_defaults = crm_get_msec(value); if(from_env > from_defaults) { g_hash_table_replace( config_hash, crm_strdup(XML_CONFIG_ATTR_DC_DEADTIME), crm_strdup(param_val)); } } } verify_crmd_options(config_hash); value = crmd_pref(config_hash, XML_CONFIG_ATTR_DC_DEADTIME); election_trigger->period_ms = crm_get_msec(value); value = crmd_pref(config_hash, XML_CONFIG_ATTR_FORCE_QUIT); shutdown_escalation_timer->period_ms = crm_get_msec(value); value = crmd_pref(config_hash, XML_CONFIG_ATTR_ELECTION_FAIL); election_timeout->period_ms = crm_get_msec(value); value = crmd_pref(config_hash, XML_CONFIG_ATTR_RECHECK); recheck_timer->period_ms = crm_get_msec(value); value = crmd_pref(config_hash, "crmd-integration-timeout"); integration_timer->period_ms = crm_get_msec(value); value = crmd_pref(config_hash, "crmd-finalization-timeout"); finalization_timer->period_ms = crm_get_msec(value); set_bit_inplace(fsa_input_register, R_READ_CONFIG); crm_debug_3("Triggering FSA: %s", __FUNCTION__); G_main_set_trigger(fsa_source); g_hash_table_destroy(config_hash); } /* A_READCONFIG */ void do_read_config(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { int call_id = fsa_cib_conn->cmds->query( fsa_cib_conn, XML_CIB_TAG_CRMCONFIG, NULL, cib_scope_local); add_cib_op_callback(call_id, FALSE, NULL, config_query_callback); crm_debug_2("Querying the CIB... call %d", call_id); } gboolean crm_shutdown(int nsig, gpointer unused) { if (crmd_mainloop != NULL && g_main_is_running(crmd_mainloop)) { if(is_set(fsa_input_register, R_SHUTDOWN)) { crm_err("Escalating the shutdown"); register_fsa_input_before(C_SHUTDOWN, I_ERROR, NULL); } else { crm_info("Requesting shutdown"); set_bit_inplace(fsa_input_register, R_SHUTDOWN); register_fsa_input(C_SHUTDOWN,I_SHUTDOWN,NULL); if(shutdown_escalation_timer->period_ms < 1) { GHashTable *config_hash = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); const char *value = crmd_pref( config_hash, XML_CONFIG_ATTR_FORCE_QUIT); int msec = crm_get_msec(value); crm_info("Using default shutdown escalation: %dms", msec); shutdown_escalation_timer->period_ms = msec; g_hash_table_destroy(config_hash); } /* cant rely on this... */ crm_timer_start(shutdown_escalation_timer); } } else { crm_info("exit from shutdown"); exit(LSB_EXIT_OK); } return TRUE; } static void default_cib_update_callback(const HA_Message *msg, int call_id, int rc, crm_data_t *output, void *user_data) { if(rc != cib_ok) { fsa_data_t *msg_data = NULL; crm_err("CIB Update failed: %s", cib_error2string(rc)); crm_log_xml_warn(output, "update:failed"); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } #if SUPPORT_HEARTBEAT static void populate_cib_nodes_ha(gboolean with_client_status) { int call_id = 0; const char *ha_node = NULL; crm_data_t *cib_node_list = NULL; - if(fsa_cluster_conn) { + if(fsa_cluster_conn == NULL) { crm_debug("Not connected"); return; } /* Async get client status information in the cluster */ crm_debug_2("Invoked"); if(with_client_status) { crm_debug_3("Requesting an initial dump of CRMD client_status"); fsa_cluster_conn->llc_ops->client_status( fsa_cluster_conn, NULL, CRM_SYSTEM_CRMD, -1); } crm_info("Requesting the list of configured nodes"); fsa_cluster_conn->llc_ops->init_nodewalk(fsa_cluster_conn); cib_node_list = create_xml_node(NULL, XML_CIB_TAG_NODES); do { const char *ha_node_type = NULL; const char *ha_node_uuid = NULL; crm_data_t *cib_new_node = NULL; ha_node = fsa_cluster_conn->llc_ops->nextnode(fsa_cluster_conn); if(ha_node == NULL) { continue; } ha_node_type = fsa_cluster_conn->llc_ops->node_type( fsa_cluster_conn, ha_node); if(safe_str_neq(NORMALNODE, ha_node_type)) { crm_debug("Node %s: skipping '%s'", ha_node, ha_node_type); continue; } ha_node_uuid = get_uuid(ha_node); if(ha_node_uuid == NULL) { crm_warn("Node %s: no uuid found", ha_node); continue; } crm_notice("Node: %s (uuid: %s)", ha_node, ha_node_uuid); cib_new_node = create_xml_node(cib_node_list, XML_CIB_TAG_NODE); crm_xml_add(cib_new_node, XML_ATTR_ID, ha_node_uuid); crm_xml_add(cib_new_node, XML_ATTR_UNAME, ha_node); crm_xml_add(cib_new_node, XML_ATTR_TYPE, ha_node_type); } while(ha_node != NULL); fsa_cluster_conn->llc_ops->end_nodewalk(fsa_cluster_conn); /* Now update the CIB with the list of nodes */ fsa_cib_update( XML_CIB_TAG_NODES, cib_node_list, cib_scope_local|cib_quorum_override|cib_inhibit_bcast, call_id); add_cib_op_callback(call_id, FALSE, NULL, default_cib_update_callback); free_xml(cib_node_list); crm_debug_2("Complete"); } #endif static void create_cib_node_definition( gpointer key, gpointer value, gpointer user_data) { crm_node_t *node = value; crm_data_t *cib_nodes = user_data; crm_data_t *cib_new_node = NULL; crm_notice("Node: %s (uuid: %s)", node->uname, node->uuid); cib_new_node = create_xml_node(cib_nodes, XML_CIB_TAG_NODE); crm_xml_add(cib_new_node, XML_ATTR_ID, node->uuid); crm_xml_add(cib_new_node, XML_ATTR_UNAME, node->uname); crm_xml_add(cib_new_node, XML_ATTR_TYPE, NORMALNODE); } void populate_cib_nodes(gboolean with_client_status) { int call_id = 0; crm_data_t *cib_node_list = NULL; #if SUPPORT_HEARTBEAT if(is_heartbeat_cluster()) { populate_cib_nodes_ha(with_client_status); return; } #endif if(is_openais_cluster() && with_client_status) { crm_info("Requesting the list of configured nodes"); #if SUPPORT_AIS send_ais_text(crm_class_members, __FUNCTION__, TRUE, NULL, crm_msg_ais); #endif } cib_node_list = create_xml_node(NULL, XML_CIB_TAG_NODES); g_hash_table_foreach( crm_peer_cache, create_cib_node_definition, cib_node_list); fsa_cib_update( XML_CIB_TAG_NODES, cib_node_list, cib_scope_local|cib_quorum_override|cib_inhibit_bcast, call_id); add_cib_op_callback(call_id, FALSE, NULL, default_cib_update_callback); free_xml(cib_node_list); crm_debug_2("Complete"); } diff --git a/include/crm/ais_common.h b/include/crm/ais_common.h index 10cb7594d7..dc141b2486 100644 --- a/include/crm/ais_common.h +++ b/include/crm/ais_common.h @@ -1,168 +1,203 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef CRM_AIS_COMMON__H #define CRM_AIS_COMMON__H #include #include #include #if SUPPORT_AIS # include # include # include #else typedef struct { int size __attribute__((aligned(8))); int id __attribute__((aligned(8))); } mar_req_header_t __attribute__((aligned(8))); #endif #define MAX_NAME 256 #define AIS_IPC_NAME "ais-crm-ipc" #define CRM_NODE_LOST "lost" #define CRM_NODE_MEMBER "member" #define CRM_NODE_ACTIVE CRM_NODE_MEMBER #define CRM_NODE_INACTIVE CRM_NODE_LOST #define CRM_NODE_EVICTED "evicted" typedef struct crm_ais_host_s AIS_Host; typedef struct crm_ais_msg_s AIS_Message; enum crm_ais_msg_class { crm_class_cluster = 0, crm_class_members = 1, crm_class_notify = 2, }; /* order here matters - its used to index into the crm_children array */ enum crm_ais_msg_types { crm_msg_none = 0, crm_msg_ais = 1, crm_msg_lrmd = 2, crm_msg_cib = 3, crm_msg_crmd = 4, crm_msg_te = 5, crm_msg_pe = 6, crm_msg_attrd = 7, }; enum crm_proc_flag { - crm_proc_none = 0x00000000, - crm_proc_ais = 0x00000001, + crm_proc_none = 0x00000001, + crm_proc_ais = 0x00000002, crm_proc_lrmd = 0x00000010, - crm_proc_stontih = 0x00000020, + crm_proc_stonith = 0x00000020, crm_proc_cib = 0x00000100, crm_proc_crmd = 0x00000200, crm_proc_pe = 0x00001000, crm_proc_te = 0x00002000, - crm_proc_attrd = 0x00004000, + crm_proc_attrd = 0x00010000, }; typedef struct crm_peer_node_s { unsigned int id; unsigned long long born; unsigned long long last_seen; int32_t votes; uint32_t processes; char *uname; char *state; char *uuid; char *addr; char *version; } crm_node_t; struct crm_ais_host_s { uint32_t id; uint32_t pid; gboolean local; enum crm_ais_msg_types type; uint32_t size; char uname[256]; } __attribute__((packed)); struct crm_ais_msg_s { mar_req_header_t header __attribute__((aligned(8))); uint32_t id; gboolean is_compressed; AIS_Host host; AIS_Host sender; uint32_t size; uint32_t compressed_size; /* 584 bytes */ char data[0]; } __attribute__((packed)); static inline const char *msg_type2text(enum crm_ais_msg_types type) { const char *text = "unknown"; switch(type) { case crm_msg_none: text = "unknown"; break; case crm_msg_ais: text = "ais"; break; case crm_msg_cib: text = "cib"; break; case crm_msg_crmd: text = "crmd"; break; case crm_msg_pe: text = "pengine"; break; case crm_msg_te: text = "tengine"; break; case crm_msg_lrmd: text = "lrmd"; break; case crm_msg_attrd: text = "attrd"; break; } return text; } +static inline const char *peer2text(enum crm_proc_flag proc) +{ + const char *text = "unknown"; + switch(proc) { + case crm_proc_none: + text = "unknown"; + break; + case crm_proc_ais: + text = "ais"; + break; + case crm_proc_cib: + text = "cib"; + break; + case crm_proc_crmd: + text = "crmd"; + break; + case crm_proc_pe: + text = "pengine"; + break; + case crm_proc_te: + text = "tengine"; + break; + case crm_proc_lrmd: + text = "lrmd"; + break; + case crm_proc_attrd: + text = "attrd"; + break; + case crm_proc_stonith: + text = "stonith"; + break; + } + return text; +} + static inline const char *ais_dest(struct crm_ais_host_s *host) { if(host->local) { return "local"; } else if(host->size > 0) { return host->uname; } else { return ""; } } #define ais_data_len(msg) (msg->is_compressed?msg->compressed_size:msg->size) #endif diff --git a/lib/crm/common/membership.c b/lib/crm/common/membership.c index a0d6234f4a..f0a1ed4d93 100644 --- a/lib/crm/common/membership.c +++ b/lib/crm/common/membership.c @@ -1,368 +1,383 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #ifndef _GNU_SOURCE # define _GNU_SOURCE #endif #include #include #include #include #include #include #include #include #include struct quorum_count_s { guint votes_max; guint votes_active; guint votes_total; guint nodes_max; guint nodes_total; }; GHashTable *crm_peer_cache = NULL; unsigned long long crm_peer_seq = 0; unsigned long long crm_max_peers = 0; struct quorum_count_s quorum_stats; gboolean crm_have_quorum = FALSE; gboolean crm_is_member_active(const crm_node_t *node) { if(safe_str_eq(node->state, CRM_NODE_MEMBER)) { return TRUE; } return FALSE; } static gboolean crm_reap_dead_member( gpointer key, gpointer value, gpointer user_data) { if(crm_is_member_active(value) == FALSE) { return TRUE; } return FALSE; } guint reap_crm_membership(void) { /* remove all dead members */ return g_hash_table_foreach_remove( crm_peer_cache, crm_reap_dead_member, NULL); } static void crm_count_member( gpointer key, gpointer value, gpointer user_data) { guint *count = user_data; if(crm_is_member_active(value)) { *count = *count + 1; } } guint crm_active_members(void) { guint count = 0; g_hash_table_foreach(crm_peer_cache, crm_count_member, &count); return count; } struct peer_count_s { uint32_t peer; guint count; }; static void crm_count_peer( gpointer key, gpointer value, gpointer user_data) { crm_node_t *node = value; struct peer_count_s *search = user_data; if(crm_is_member_active(node) && (node->processes & search->peer)) { search->count = search->count + 1; } } guint crm_active_peers(uint32_t peer) { struct peer_count_s search; search.count = 0; search.peer = peer; g_hash_table_foreach(crm_peer_cache, crm_count_peer, &search); return search.count; } void destroy_crm_node(gpointer data) { crm_node_t *node = data; crm_info("Destroying entry for node %u", node->id); crm_free(node->addr); crm_free(node->uname); crm_free(node->state); crm_free(node); } void crm_peer_init(void) { crm_warn("Set these options via openais.conf"); quorum_stats.votes_max = 2; quorum_stats.votes_active = 0; quorum_stats.votes_total = 0; quorum_stats.nodes_max = 1; quorum_stats.nodes_total = 0; crm_peer_destroy(); if(crm_peer_cache == NULL) { crm_peer_cache = g_hash_table_new_full( g_str_hash, g_str_equal, NULL, destroy_crm_node); } } void crm_peer_destroy(void) { if(crm_peer_cache != NULL) { g_hash_table_destroy(crm_peer_cache); crm_peer_cache = NULL; } } crm_node_t *crm_update_peer( unsigned int id, unsigned long long born, int32_t votes, uint32_t children, const char *uuid, const char *uname, const char *addr, const char *state) { crm_node_t *node = NULL; CRM_CHECK(uname != NULL, return NULL); CRM_ASSERT(crm_peer_cache != NULL); node = g_hash_table_lookup(crm_peer_cache, uname); if(node == NULL) { crm_info("Creating entry for node %s/%u/%llu", uname, id, born); CRM_CHECK(id >= 0, return NULL); CRM_CHECK(uuid != NULL, return NULL); crm_malloc0(node, sizeof(crm_node_t)); node->id = id; node->born = 0; + node->processes = 0; node->uuid = crm_strdup(uuid); node->uname = crm_strdup(uname); node->votes = votes; node->addr = NULL; node->state = crm_strdup("unknown"); g_hash_table_insert(crm_peer_cache, node->uname, node); node = g_hash_table_lookup(crm_peer_cache, uname); CRM_ASSERT(node != NULL); } + if(votes > 0 && node->votes != votes) { + node->votes = votes; + crm_info("Node %s now has %d votes", node->uname, votes); + } + if(id > 0 && id != node->id) { node->id = id; crm_info("Node %s now has id %u", node->uname, id); } - if(children >= 0 && children != node->processes) { + if(children > 0 && children != node->processes) { crm_info("Node %s now has children: %.32x (%u)", node->uname, children, children); node->processes = children; } if(state != NULL) { if(node->state == NULL || crm_str_eq(node->state, state, FALSE) == FALSE) { crm_free(node->state); node->state = crm_strdup(state); crm_info("Node %s is now: %s", node->uname, state); if(crm_is_member_active(node)) { node->born = born; } else { node->born = -1; } } } if(addr != NULL) { if(node->addr == NULL || crm_str_eq(node->addr, addr, FALSE) == FALSE) { crm_free(node->addr); node->addr = crm_strdup(addr); crm_info("Node %s now has address: %s", node->uname, addr); } } return node; } crm_node_t *crm_update_ais_node(crm_data_t *member, long long seq) { const char *addr = crm_element_value(member, "addr"); const char *uname = crm_element_value(member, "uname"); const char *state = crm_element_value(member, "state"); const char *id_s = crm_element_value(member, "id"); const char *votes_s = crm_element_value(member, "votes"); const char *procs_s = crm_element_value(member, "processes"); int votes = crm_int_helper(votes_s, NULL); unsigned int id = crm_int_helper(id_s, NULL); unsigned int procs = crm_int_helper(procs_s, NULL); return crm_update_peer(id, seq, votes, procs, uname, uname, addr, state); } #if SUPPORT_HEARTBEAT crm_node_t *crm_update_ccm_node( const oc_ev_membership_t *oc, int offset, const char *state) { const char *uuid = NULL; CRM_CHECK(oc->m_array[offset].node_uname != NULL, return NULL); uuid = get_uuid(oc->m_array[offset].node_uname); return crm_update_peer(oc->m_array[offset].node_id, - oc->m_array[offset].node_born_on, - -1, -1, + oc->m_array[offset].node_born_on, -1, 0, uuid, oc->m_array[offset].node_uname, NULL, state); } #endif void crm_update_peer_proc(const char *uname, uint32_t flag, const char *status) { crm_node_t *node = NULL; + gboolean changed = FALSE; CRM_ASSERT(crm_peer_cache != NULL); CRM_CHECK(uname != NULL, return); node = g_hash_table_lookup(crm_peer_cache, uname); CRM_CHECK(node != NULL, return); if(safe_str_eq(status, ONLINESTATUS)) { - set_bit_inplace(node->processes, flag); - } else { + if((node->processes & flag) == 0) { + set_bit_inplace(node->processes, flag); + changed = TRUE; + } + + } else if(node->processes & flag) { clear_bit_inplace(node->processes, flag); + changed = TRUE; + } + + if(changed) { + crm_info("%s.%s is now %s", uname, peer2text(flag), status); } } static void crm_count_quorum( gpointer key, gpointer value, gpointer user_data) { crm_node_t *node = value; quorum_stats.nodes_total += 1; quorum_stats.votes_total += node->votes; if(crm_is_member_active(node)) { quorum_stats.votes_active = quorum_stats.votes_active + node->votes; } } gboolean crm_calculate_quorum(void) { unsigned int limit = 0; gboolean quorate = TRUE; quorum_stats.votes_total = 0; quorum_stats.nodes_total = 0; quorum_stats.votes_active = 0; g_hash_table_foreach(crm_peer_cache, crm_count_quorum, NULL); if(quorum_stats.votes_total > quorum_stats.votes_max) { crm_info("Known quorum votes: %u -> %u", quorum_stats.votes_max, quorum_stats.votes_total); quorum_stats.votes_max = quorum_stats.votes_total; } if(quorum_stats.nodes_total > quorum_stats.nodes_max) { crm_info("Known quorum nodes: %u -> %u", quorum_stats.nodes_max, quorum_stats.nodes_total); quorum_stats.nodes_max = quorum_stats.nodes_total; } limit = (quorum_stats.votes_max + 2) / 2; if(quorum_stats.votes_active < limit) { quorate = FALSE; } crm_debug("known: %u, available: %u, limit: %u, active: %u: %s", quorum_stats.votes_max, quorum_stats.votes_total, limit, quorum_stats.votes_active, quorate?"true":"false"); if(quorate != crm_have_quorum) { crm_notice("Membership %llu: quorum %s", crm_peer_seq, quorate?"attained":"lost"); } else { crm_debug("Membership %llu: quorum %s", crm_peer_seq, quorate?"retained":"lost"); } crm_have_quorum = quorate; return quorate; } /* Code appropriated (with permission) from cman/daemon/commands.c under GPLv2 */ #if 0 static int calculate_quorum(int allow_decrease, int max_expected, unsigned int *ret_total_votes) { struct list *nodelist; struct cluster_node *node; unsigned int total_votes = 0; unsigned int highest_expected = 0; unsigned int newquorum, q1, q2; unsigned int total_nodes = 0; list_iterate(nodelist, &cluster_members_list) { node = list_item(nodelist, struct cluster_node); if (node->state == NODESTATE_MEMBER) { highest_expected = max(highest_expected, node->expected_votes); total_votes += node->votes; total_nodes++; } } if (quorum_device && quorum_device->state == NODESTATE_MEMBER) total_votes += quorum_device->votes; if (max_expected > 0) highest_expected = max_expected; /* This quorum calculation is taken from the OpenVMS Cluster Systems * manual, but, then, you guessed that didn't you */ q1 = (highest_expected + 2) / 2; q2 = (total_votes + 2) / 2; newquorum = max(q1, q2); /* Normally quorum never decreases but the system administrator can * force it down by setting expected votes to a maximum value */ if (!allow_decrease) newquorum = max(quorum, newquorum); /* The special two_node mode allows each of the two nodes to retain * quorum if the other fails. Only one of the two should live past * fencing (as both nodes try to fence each other in split-brain.) * Also: if there are more than two nodes, force us inquorate to avoid * any damage or confusion. */ if (two_node && total_nodes <= 2) newquorum = 1; if (ret_total_votes) *ret_total_votes = total_votes; return newquorum; } #endif