diff --git a/crm/cib/callbacks.c b/crm/cib/callbacks.c index 30b849d819..816dcceffe 100644 --- a/crm/cib/callbacks.c +++ b/crm/cib/callbacks.c @@ -1,1737 +1,1737 @@ -/* $Id: callbacks.c,v 1.120 2006/04/20 15:43:23 andrew Exp $ */ +/* $Id: callbacks.c,v 1.121 2006/04/21 07:08:03 andrew Exp $ */ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include extern GMainLoop* mainloop; extern gboolean cib_shutdown_flag; extern void GHFunc_count_peers( gpointer key, gpointer value, gpointer user_data); extern enum cib_errors revision_check( crm_data_t *cib_update, crm_data_t *cib_copy, int flags); void initiate_exit(void); void terminate_ha_connection(const char *caller); gint cib_GCompareFunc(gconstpointer a, gconstpointer b); gboolean cib_msg_timeout(gpointer data); void cib_GHFunc(gpointer key, gpointer value, gpointer user_data); gboolean can_write(int flags); HA_Message *cib_msg_copy(HA_Message *msg, gboolean with_data); gboolean ccm_manual_check(gpointer data); void send_cib_replace(const HA_Message *sync_request, const char *host); void cib_process_request( HA_Message *request, gboolean privileged, gboolean force_synchronous, gboolean from_peer, cib_client_t *cib_client); gboolean syncd_once = FALSE; GHashTable *peer_hash = NULL; int next_client_id = 0; gboolean cib_is_master = FALSE; gboolean cib_have_quorum = FALSE; char * ccm_transition_id = NULL; GHashTable *client_list = NULL; GHashTable *ccm_membership = NULL; extern const char *cib_our_uname; extern ll_cluster_t *hb_conn; extern unsigned long cib_num_ops, cib_num_local, cib_num_updates, cib_num_fail; extern unsigned long cib_bad_connects, cib_num_timeouts; extern longclock_t cib_call_time; static HA_Message * cib_prepare_common(HA_Message *root, const char *section) { HA_Message *data = NULL; /* extract the CIB from the fragment */ if(root == NULL) { return NULL; } else if(safe_str_eq(crm_element_name(root), XML_TAG_FRAGMENT)) { data = find_xml_node(root, XML_TAG_CIB, TRUE); if(data != NULL) { crm_debug_3("Extracted CIB from "XML_TAG_FRAGMENT); } else { crm_log_xml_debug_4(root, "No CIB"); } } else { data = root; crm_log_xml_debug_4(root, "cib:input"); } /* grab the section specified for the command */ if(data != NULL && safe_str_eq(crm_element_name(data), XML_TAG_CIB)){ int rc = revision_check(data, the_cib, 0/* call_options */); if(rc == cib_ok) { data = get_object_root(section, data); if(data != NULL) { crm_debug_3("Extracted %s from CIB", section); } else { crm_log_xml_debug_4(root, "No Section"); } } else { crm_debug_2("Revision check failed"); } } return data; } static enum cib_errors cib_prepare_none(HA_Message *request, HA_Message **data, const char **section) { *data = NULL; *section = cl_get_string(request, F_CIB_SECTION); return cib_ok; } static enum cib_errors cib_prepare_data(HA_Message *request, HA_Message **data, const char **section) { HA_Message *input_fragment = cl_get_struct(request, F_CIB_CALLDATA); *section = cl_get_string(request, F_CIB_SECTION); *data = cib_prepare_common(input_fragment, *section); return cib_ok; } static enum cib_errors cib_prepare_sync(HA_Message *request, HA_Message **data, const char **section) { *section = cl_get_string(request, F_CIB_SECTION); *data = request; return cib_ok; } static enum cib_errors cib_prepare_diff(HA_Message *request, HA_Message **data, const char **section) { HA_Message *input_fragment = cl_get_struct(request,F_CIB_UPDATE_DIFF); *section = NULL; *data = cib_prepare_common(input_fragment, NULL); return cib_ok; } static enum cib_errors cib_cleanup_query(const char *op, HA_Message **data, HA_Message **output) { CRM_DEV_ASSERT(*data == NULL); return cib_ok; } static enum cib_errors cib_cleanup_output(const char *op, HA_Message **data, HA_Message **output) { free_xml(*output); return cib_ok; } static enum cib_errors cib_cleanup_none(const char *op, HA_Message **data, HA_Message **output) { CRM_DEV_ASSERT(*data == NULL); CRM_DEV_ASSERT(*output == NULL); return cib_ok; } static enum cib_errors cib_cleanup_sync(const char *op, HA_Message **data, HA_Message **output) { /* data is non-NULL but doesnt need to be free'd */ CRM_DEV_ASSERT(*output == NULL); return cib_ok; } /* typedef struct cib_operation_s { const char* operation; gboolean modifies_cib; gboolean needs_privileges; gboolean needs_quorum; enum cib_errors (*prepare)(HA_Message *, crm_data_t**, const char **); enum cib_errors (*cleanup)(crm_data_t**, crm_data_t**); enum cib_errors (*fn)( const char *, int, const char *, crm_data_t*, crm_data_t*, crm_data_t**, crm_data_t**); } cib_operation_t; */ /* technically bump does modify the cib... * but we want to split the "bump" from the "sync" */ cib_operation_t cib_server_ops[] = { {NULL, FALSE, FALSE, FALSE, cib_prepare_none, cib_cleanup_none, cib_process_default}, {CIB_OP_QUERY, FALSE, FALSE, FALSE, cib_prepare_none, cib_cleanup_query, cib_process_query}, {CIB_OP_MODIFY, TRUE, TRUE, TRUE, cib_prepare_data, cib_cleanup_output, cib_process_modify}, {CIB_OP_UPDATE, TRUE, TRUE, TRUE, cib_prepare_data, cib_cleanup_output, cib_process_change}, {CIB_OP_APPLY_DIFF,TRUE, TRUE, TRUE, cib_prepare_diff, cib_cleanup_sync, cib_process_diff}, {CIB_OP_SLAVE, FALSE, TRUE, FALSE, cib_prepare_none, cib_cleanup_none, cib_process_readwrite}, {CIB_OP_SLAVEALL, FALSE, TRUE, FALSE, cib_prepare_none, cib_cleanup_none, cib_process_readwrite}, {CIB_OP_SYNC_ONE, FALSE, TRUE, FALSE, cib_prepare_sync, cib_cleanup_sync, cib_process_sync_one}, {CIB_OP_MASTER, FALSE, TRUE, FALSE, cib_prepare_none, cib_cleanup_none, cib_process_readwrite}, {CIB_OP_ISMASTER, FALSE, TRUE, FALSE, cib_prepare_none, cib_cleanup_none, cib_process_readwrite}, {CIB_OP_BUMP, TRUE, TRUE, TRUE, cib_prepare_none, cib_cleanup_output, cib_process_bump}, {CIB_OP_REPLACE, TRUE, TRUE, TRUE, cib_prepare_data, cib_cleanup_output, cib_process_replace}, {CIB_OP_CREATE, TRUE, TRUE, TRUE, cib_prepare_data, cib_cleanup_output, cib_process_change}, {CIB_OP_DELETE, TRUE, TRUE, TRUE, cib_prepare_data, cib_cleanup_output, cib_process_delete}, {CIB_OP_DELETE_ALT,TRUE, TRUE, TRUE, cib_prepare_data, cib_cleanup_output, cib_process_change}, {CIB_OP_SYNC, FALSE, TRUE, FALSE, cib_prepare_sync, cib_cleanup_sync, cib_process_sync}, {CRM_OP_QUIT, FALSE, TRUE, FALSE, cib_prepare_none, cib_cleanup_none, cib_process_quit}, {CRM_OP_PING, FALSE, FALSE, FALSE, cib_prepare_none, cib_cleanup_output, cib_process_ping}, {CIB_OP_ERASE, TRUE, TRUE, TRUE, cib_prepare_none, cib_cleanup_output, cib_process_erase}, {CRM_OP_NOOP, FALSE, FALSE, FALSE, cib_prepare_none, cib_cleanup_none, cib_process_default}, {"cib_shutdown_req",FALSE, TRUE, FALSE, cib_prepare_sync, cib_cleanup_sync, cib_process_shutdown_req}, }; int send_via_callback_channel(HA_Message *msg, const char *token); enum cib_errors cib_process_command( HA_Message *request, HA_Message **reply, crm_data_t **cib_diff, gboolean privileged); gboolean cib_common_callback(IPC_Channel *channel, cib_client_t *cib_client, gboolean force_synchronous, gboolean privileged); enum cib_errors cib_get_operation_id(const HA_Message * msg, int *operation); gboolean cib_process_disconnect(IPC_Channel *channel, cib_client_t *cib_client); static void cib_ipc_connection_destroy(gpointer user_data) { cib_client_t *cib_client = user_data; /* cib_process_disconnect */ if(cib_client == NULL) { crm_debug_4("Destroying %p", user_data); return; } if(cib_client->source != NULL) { crm_debug_4("Deleting %s (%p) from mainloop", cib_client->name, cib_client->source); G_main_del_IPC_Channel(cib_client->source); cib_client->source = NULL; } crm_debug_3("Destroying %s (%p)", cib_client->name, user_data); crm_free(cib_client->name); crm_free(cib_client->callback_id); crm_free(cib_client->id); crm_free(cib_client); crm_debug_4("Freed the cib client"); return; } static cib_client_t * cib_client_connect_common( IPC_Channel *channel, const char *channel_name, gboolean (*callback)(IPC_Channel *channel, gpointer user_data)) { gboolean can_connect = TRUE; cib_client_t *new_client = NULL; crm_debug_3("Connecting channel"); if (channel == NULL) { crm_err("Channel was NULL"); can_connect = FALSE; cib_bad_connects++; } else if (channel->ch_status != IPC_CONNECT) { crm_err("Channel was disconnected"); can_connect = FALSE; cib_bad_connects++; } else if(channel_name == NULL) { crm_err("user_data must contain channel name"); can_connect = FALSE; cib_bad_connects++; } else if(cib_shutdown_flag) { crm_info("Ignoring new client [%d] during shutdown", channel->farside_pid); return NULL; } else { crm_malloc0(new_client, sizeof(cib_client_t)); new_client->channel = channel; new_client->channel_name = channel_name; crm_debug_3("Created channel %p for channel %s", new_client, new_client->channel_name); channel->ops->set_recv_qlen(channel, 100); channel->ops->set_send_qlen(channel, 400); if(callback != NULL) { new_client->source = G_main_add_IPC_Channel( G_PRIORITY_DEFAULT, channel, FALSE, callback, new_client, cib_ipc_connection_destroy); } crm_debug_3("Channel %s connected for client %s", new_client->channel_name, new_client->id); } return new_client; } gboolean cib_client_connect_rw_synch(IPC_Channel *channel, gpointer user_data) { cib_client_t *new_client = cib_client_connect_common( channel, cib_channel_ro_synchronous, cib_rw_synchronous_callback); if(new_client == NULL) { return FALSE; } return TRUE; } gboolean cib_client_connect_ro_synch(IPC_Channel *channel, gpointer user_data) { cib_client_t *new_client = cib_client_connect_common( channel, cib_channel_ro_synchronous, cib_ro_synchronous_callback); if(new_client == NULL) { return FALSE; } return TRUE; } gboolean cib_client_connect_rw_ro(IPC_Channel *channel, gpointer user_data) { cl_uuid_t client_id; HA_Message *reg_msg = NULL; cib_client_t *new_client = NULL; char uuid_str[UU_UNPARSE_SIZEOF]; gboolean (*callback)(IPC_Channel *channel, gpointer user_data); callback = cib_ro_callback; if(safe_str_eq(user_data, cib_channel_rw)) { callback = cib_rw_callback; } new_client = cib_client_connect_common( channel, callback==cib_ro_callback?cib_channel_ro:cib_channel_rw, callback); if(new_client == NULL) { return FALSE; } cl_uuid_generate(&client_id); cl_uuid_unparse(&client_id, uuid_str); new_client->id = crm_strdup(uuid_str); cl_uuid_generate(&client_id); cl_uuid_unparse(&client_id, uuid_str); new_client->callback_id = crm_strdup(uuid_str); /* make sure we can find ourselves later for sync calls * redirected to the master instance */ g_hash_table_insert( client_list, new_client->id, new_client); reg_msg = ha_msg_new(3); ha_msg_add(reg_msg, F_CIB_OPERATION, CRM_OP_REGISTER); ha_msg_add(reg_msg, F_CIB_CLIENTID, new_client->id); ha_msg_add( reg_msg, F_CIB_CALLBACK_TOKEN, new_client->callback_id); send_ipc_message(channel, reg_msg); return TRUE; } gboolean cib_client_connect_null(IPC_Channel *channel, gpointer user_data) { cib_client_t *new_client = cib_client_connect_common( channel, cib_channel_callback, cib_null_callback); if(new_client == NULL) { return FALSE; } return TRUE; } gboolean cib_rw_callback(IPC_Channel *channel, gpointer user_data) { return cib_common_callback(channel, user_data, FALSE, TRUE); } gboolean cib_ro_synchronous_callback(IPC_Channel *channel, gpointer user_data) { return cib_common_callback(channel, user_data, TRUE, FALSE); } gboolean cib_rw_synchronous_callback(IPC_Channel *channel, gpointer user_data) { return cib_common_callback(channel, user_data, TRUE, TRUE); } gboolean cib_ro_callback(IPC_Channel *channel, gpointer user_data) { return cib_common_callback(channel, user_data, FALSE, FALSE); } gboolean cib_null_callback(IPC_Channel *channel, gpointer user_data) { gboolean keep_connection = TRUE; HA_Message *op_request = NULL; cib_client_t *cib_client = user_data; cib_client_t *hash_client = NULL; const char *type = NULL; const char *uuid_ticket = NULL; const char *client_name = NULL; gboolean register_failed = FALSE; if(cib_client == NULL) { crm_err("Discarding IPC message from unknown source" " on callback channel."); return FALSE; } while(IPC_ISRCONN(channel)) { if(channel->ops->is_message_pending(channel) == 0) { break; } op_request = msgfromIPC_noauth(channel); if(op_request == NULL) { break; } type = cl_get_string(op_request, F_CIB_OPERATION); if(safe_str_eq(type, T_CIB_NOTIFY) ) { /* Update the notify filters for this client */ int on_off = 0; ha_msg_value_int( op_request, F_CIB_NOTIFY_ACTIVATE, &on_off); type = cl_get_string(op_request, F_CIB_NOTIFY_TYPE); crm_info("Setting %s callbacks for %s: %s", type, cib_client->name, on_off?"on":"off"); if(safe_str_eq(type, T_CIB_POST_NOTIFY)) { cib_client->post_notify = on_off; } else if(safe_str_eq(type, T_CIB_PRE_NOTIFY)) { cib_client->pre_notify = on_off; } else if(safe_str_eq(type, T_CIB_UPDATE_CONFIRM)) { cib_client->confirmations = on_off; } else if(safe_str_eq(type, T_CIB_DIFF_NOTIFY)) { cib_client->diffs = on_off; } else if(safe_str_eq(type, T_CIB_REPLACE_NOTIFY)) { cib_client->replace = on_off; } continue; } else if(safe_str_neq(type, CRM_OP_REGISTER) ) { crm_warn("Discarding IPC message from %s on callback channel", cib_client->id); crm_msg_del(op_request); continue; } uuid_ticket = cl_get_string(op_request, F_CIB_CALLBACK_TOKEN); client_name = cl_get_string(op_request, F_CIB_CLIENTNAME); CRM_DEV_ASSERT(uuid_ticket != NULL); if(crm_assert_failed) { register_failed = crm_assert_failed; } CRM_DEV_ASSERT(client_name != NULL); if(crm_assert_failed) { register_failed = crm_assert_failed; } if(register_failed == FALSE) { hash_client = g_hash_table_lookup(client_list, uuid_ticket); if(hash_client != NULL) { crm_err("Duplicate registration request..." " disconnecting"); register_failed = TRUE; } } if(register_failed) { crm_err("Registration request failed... disconnecting"); crm_msg_del(op_request); return FALSE; } cib_client->id = crm_strdup(uuid_ticket); cib_client->name = crm_strdup(client_name); g_hash_table_insert(client_list, cib_client->id, cib_client); crm_debug_2("Registered %s on %s channel", cib_client->id, cib_client->channel_name); if(safe_str_eq(cib_client->name, CRM_SYSTEM_TENGINE)) { /* The TE is _always_ interested in these * Enable now to avoid timing issues */ cib_client->diffs = TRUE; } crm_msg_del(op_request); op_request = ha_msg_new(2); ha_msg_add(op_request, F_CIB_OPERATION, CRM_OP_REGISTER); ha_msg_add(op_request, F_CIB_CLIENTID, cib_client->id); send_ipc_message(channel, op_request); if(channel->ch_status == IPC_CONNECT) { break; } } if(channel->ch_status != IPC_CONNECT) { crm_debug_2("Client disconnected"); keep_connection = cib_process_disconnect(channel, cib_client); } return keep_connection; } void cib_common_callback_worker(HA_Message *op_request, cib_client_t *cib_client, gboolean force_synchronous, gboolean privileged); void cib_common_callback_worker(HA_Message *op_request, cib_client_t *cib_client, gboolean force_synchronous, gboolean privileged) { int rc = cib_ok; int call_type = 0; const char *op = NULL; longclock_t call_stop = 0; longclock_t call_start = 0; call_start = time_longclock(); cib_client->num_calls++; op = cl_get_string(op_request, F_CIB_OPERATION); rc = cib_get_operation_id(op_request, &call_type); if(rc != cib_ok) { crm_debug("Invalid operation %s from %s/%s", op, cib_client->name, cib_client->channel_name); } else { crm_debug_2("Processing %s operation from %s/%s", op, cib_client->name, cib_client->channel_name); } crm_log_message_adv(LOG_MSG, "Client[inbound]", op_request); ha_msg_add(op_request, F_CIB_CLIENTID, cib_client->id); ha_msg_add(op_request, F_CIB_CLIENTNAME, cib_client->name); if(rc == cib_ok) { cib_process_request( op_request, force_synchronous, privileged, FALSE, cib_client); } crm_debug_3("Cleaning up request"); crm_msg_del(op_request); call_stop = time_longclock(); cib_call_time += (call_stop - call_start); } gboolean cib_common_callback(IPC_Channel *channel, cib_client_t *cib_client, gboolean force_synchronous, gboolean privileged) { int lpc = 0; HA_Message *op_request = NULL; gboolean keep_channel = TRUE; if(cib_client == NULL) { crm_err("Receieved call from unknown source. Discarding."); return FALSE; } if(cib_client->name == NULL) { cib_client->name = crm_itoa(channel->farside_pid); } if(cib_client->id == NULL) { cib_client->id = crm_strdup(cib_client->name); g_hash_table_insert(client_list, cib_client->id, cib_client); } crm_debug_2("Callback for %s on %s channel", cib_client->id, cib_client->channel_name); while(IPC_ISRCONN(channel)) { if(channel->ops->is_message_pending(channel) == 0) { break; } op_request = msgfromIPC_noauth(channel); if (op_request == NULL) { perror("Receive failure:"); break; } lpc++; crm_assert_failed = FALSE; cib_common_callback_worker( op_request, cib_client, force_synchronous, privileged); op_request = NULL; if(channel->ch_status == IPC_CONNECT) { break; } } crm_debug_2("Processed %d messages", lpc); if(channel->ch_status != IPC_CONNECT) { crm_debug_2("Client disconnected"); keep_channel = cib_process_disconnect(channel, cib_client); } return keep_channel; } void cib_process_request( HA_Message *request, gboolean force_synchronous, gboolean privileged, gboolean from_peer, cib_client_t *cib_client) { int call_type = 0; int call_options = 0; gboolean process = TRUE; gboolean needs_reply = TRUE; gboolean local_notify = FALSE; gboolean needs_forward = FALSE; crm_data_t *result_diff = NULL; enum cib_errors rc = cib_ok; HA_Message *op_reply = NULL; const char *op = cl_get_string(request, F_CIB_OPERATION); const char *originator = cl_get_string(request, F_ORIG); const char *host = cl_get_string(request, F_CIB_HOST); const char *reply_to = cl_get_string(request, F_CIB_ISREPLY); const char *update = cl_get_string(request, F_CIB_GLOBAL_UPDATE); const char *delegated = cl_get_string(request, F_CIB_DELEGATED); const char *client_id = NULL; crm_debug_4("%s Processing msg %s", cib_our_uname, cl_get_string(request, F_SEQ)); cib_num_ops++; if(cib_num_ops == 0) { cib_num_fail = 0; cib_num_local = 0; cib_num_updates = 0; crm_info("Stats wrapped around"); } if(host != NULL && strlen(host) == 0) { host = NULL; } ha_msg_value_int(request, F_CIB_CALLOPTS, &call_options); crm_debug_4("Retrieved call options: %d", call_options); if(force_synchronous) { call_options |= cib_sync_call; } crm_debug_2("Processing %s message (%s) to %s...", from_peer?"peer":"local", from_peer?originator:cib_our_uname, host?host:"master"); rc = cib_get_operation_id(request, &call_type); if(cib_server_ops[call_type].modifies_cib) { cib_num_updates++; } if(rc != cib_ok) { /* TODO: construct error reply */ crm_err("Pre-processing of command failed: %s", cib_error2string(rc)); } else if(from_peer == FALSE) { if(cib_server_ops[call_type].modifies_cib && !(call_options & cib_inhibit_bcast)) { /* we need to send an update anyway */ needs_reply = TRUE; } else { needs_reply = FALSE; } if(host == NULL && (call_options & cib_scope_local)) { crm_debug("Processing locally scoped %s op from %s", op, cib_client->name); local_notify = TRUE; } else if(host == NULL && cib_is_master) { crm_debug("Processing master %s op locally from %s", op, cib_client->name); local_notify = TRUE; } else if(safe_str_eq(host, cib_our_uname)) { crm_debug("Processing locally addressed %s op from %s", op, cib_client->name); local_notify = TRUE; } else { crm_debug("%s op from %s needs to be forwarded to %s", op, cib_client->name, host?host:"the master instance"); needs_forward = TRUE; process = FALSE; } } else if(safe_str_eq(op, "cib_shutdown_req")) { if(reply_to != NULL) { crm_debug("Processing %s from %s", op, host); needs_reply = FALSE; } else { crm_debug("Processing %s reply from %s", op, host); } } else if(crm_is_true(update) && safe_str_eq(reply_to, cib_our_uname)) { crm_debug("Processing global/peer update from %s" " that originated from us", originator); needs_reply = FALSE; if(cl_get_string(request, F_CIB_CLIENTID) != NULL) { local_notify = TRUE; } } else if(crm_is_true(update)) { crm_debug("Processing global/peer update from %s", originator); needs_reply = FALSE; } else if(host != NULL && safe_str_eq(host, cib_our_uname)) { crm_debug("Processing request sent to us from %s", originator); } else if(delegated != NULL && cib_is_master == TRUE) { crm_debug("Processing request sent to master instance from %s", originator); } else if(reply_to != NULL && safe_str_eq(reply_to, cib_our_uname)) { crm_debug("Forward reply sent from %s to local clients", originator); process = FALSE; needs_reply = FALSE; local_notify = TRUE; } else if(delegated != NULL) { crm_debug("Ignoring msg for master instance"); return; } else if(host != NULL) { /* this is for a specific instance and we're not it */ crm_debug("Ignoring msg for instance on %s", crm_str(host)); return; } else if(reply_to == NULL && cib_is_master == FALSE) { /* this is for the master instance and we're not it */ crm_debug("Ignoring %s reply to %s", op, crm_str(reply_to)); return; } else { crm_err("Nothing for us to do?"); crm_log_message_adv(LOG_ERR, "Peer[inbound]", request); return; } crm_debug_3("Finished determining processing actions"); if(call_options & cib_discard_reply) { needs_reply = cib_server_ops[call_type].modifies_cib; local_notify = FALSE; } if(needs_forward) { HA_Message *forward_msg = cib_msg_copy(request, TRUE); ha_msg_add(forward_msg, F_CIB_DELEGATED, cib_our_uname); if(host != NULL) { crm_debug_2("Forwarding %s op to %s", op, host); send_ha_message(hb_conn, forward_msg, host, FALSE); } else { crm_debug_2("Forwarding %s op to master instance", op); send_ha_message(hb_conn, forward_msg, NULL, FALSE); } if(call_options & cib_discard_reply) { crm_debug_2("Client not interested in reply"); crm_msg_del(forward_msg); } else if(call_options & cib_sync_call) { /* keep track of the request so we can time it * out if required */ crm_debug_2("Registering delegated call from %s", cib_client->id); cib_client->delegated_calls = g_list_append( cib_client->delegated_calls, forward_msg); } else { crm_msg_del(forward_msg); } return; } if(process) { cib_num_local++; crm_debug_2("Performing local processing:" " op=%s origin=%s/%s,%s (update=%s)", cl_get_string(request, F_CIB_OPERATION), originator, cl_get_string(request, F_CIB_CLIENTID), cl_get_string(request, F_CIB_CALLID), update); rc = cib_process_command( request, &op_reply, &result_diff, privileged); crm_debug_2("Processing complete"); if(rc == cib_diff_resync || rc == cib_diff_failed || rc == cib_old_data) { crm_warn("%s operation failed: %s", crm_str(op), cib_error2string(rc)); } else if(rc != cib_ok) { cib_num_fail++; crm_err("%s operation failed: %s", crm_str(op), cib_error2string(rc)); crm_log_message_adv(LOG_DEBUG, "CIB[output]", op_reply); crm_log_message_adv(LOG_INFO, "Input message", request); } if(op_reply == NULL && (needs_reply || local_notify)) { crm_err("Unexpected NULL reply to message"); crm_log_message(LOG_ERR, request); needs_reply = FALSE; local_notify = FALSE; } } crm_debug_3("processing response cases"); if(local_notify) { /* send callback to originating child */ cib_client_t *client_obj = NULL; HA_Message *client_reply = NULL; enum cib_errors local_rc = cib_ok; crm_debug_2("Performing notification"); if(process == FALSE) { client_reply = cib_msg_copy(request, TRUE); } else { /* can we set this to FALSE? */ client_reply = cib_msg_copy(op_reply, TRUE); } client_id = cl_get_string(request, F_CIB_CLIENTID); if(client_id != NULL) { client_obj = g_hash_table_lookup( client_list, client_id); } else { crm_debug("No client to sent the response to." " F_CIB_CLIENTID not set."); } crm_debug_3("Sending callback to request originator"); if(client_obj == NULL) { local_rc = cib_reply_failed; } else { const char *client_id = client_obj->callback_id; crm_debug_2("Sending %ssync response to %s %s", (call_options & cib_sync_call)?"":"an a-", client_obj->name, from_peer?"(originator of delegated request)":""); if(call_options & cib_sync_call) { client_id = client_obj->id; } local_rc = send_via_callback_channel( client_reply, client_id); client_reply = NULL; } if(local_rc != cib_ok) { crm_warn("%sSync reply to %s failed: %s", (call_options & cib_sync_call)?"":"A-", client_obj?client_obj->name:"", cib_error2string(local_rc)); crm_log_message(LOG_DEBUG, op_reply); } } if(needs_reply == FALSE) { /* nothing more to do... * this was a non-originating slave update */ crm_debug_2("Completed slave update"); crm_msg_del(op_reply); free_xml(result_diff); return; } crm_debug_4("add the originator to message"); CRM_DEV_ASSERT(op_reply != NULL); CRM_DEV_ASSERT(cib_server_ops[call_type].modifies_cib == FALSE || result_diff != NULL || rc != cib_ok); /* from now on we are the server */ if(rc == cib_ok && result_diff != NULL && !(call_options & cib_inhibit_bcast)) { /* this (successful) call modified the CIB _and_ the * change needs to be broadcast... * send via HA to other nodes */ HA_Message *op_bcast = cib_msg_copy(request, FALSE); int diff_add_updates = 0; int diff_add_epoch = 0; int diff_add_admin_epoch = 0; int diff_del_updates = 0; int diff_del_epoch = 0; int diff_del_admin_epoch = 0; cib_diff_version_details( result_diff, &diff_add_admin_epoch, &diff_add_epoch, &diff_add_updates, &diff_del_admin_epoch, &diff_del_epoch, &diff_del_updates); crm_debug("Sending update diff %d.%d.%d -> %d.%d.%d", diff_del_admin_epoch,diff_del_epoch,diff_del_updates, diff_add_admin_epoch,diff_add_epoch,diff_add_updates); ha_msg_add(op_bcast, F_CIB_ISREPLY, originator); ha_msg_add(op_bcast, F_CIB_GLOBAL_UPDATE, XML_BOOLEAN_TRUE); ha_msg_mod(op_bcast, F_CIB_OPERATION, CIB_OP_APPLY_DIFF); add_message_xml(op_bcast, F_CIB_UPDATE_DIFF, result_diff); crm_log_message(LOG_DEBUG_3, op_bcast); send_ha_message(hb_conn, op_bcast, NULL, TRUE); crm_msg_del(op_bcast); } else if((call_options & cib_discard_reply) == 0) { crm_debug_2("Sending replies"); if(from_peer && originator != NULL) { /* send reply via HA to originating node */ crm_debug_2("Sending request result to originator only"); ha_msg_add(op_reply, F_CIB_ISREPLY, originator); send_ha_message(hb_conn, op_reply, originator, FALSE); } if(call_options & cib_inhibit_bcast ) { crm_debug("Request not broadcast: inhibited"); } if(cib_server_ops[call_type].modifies_cib == FALSE) { crm_debug_2("Request not broadcast: R/O call"); } if(rc != cib_ok) { crm_warn("Request not broadcast: call failed: %s", cib_error2string(rc)); } } crm_msg_del(op_reply); free_xml(result_diff); return; } static HA_Message * cib_construct_reply(HA_Message *request, HA_Message *output, int rc) { int lpc = 0; HA_Message *reply = NULL; const char *name = NULL; const char *value = NULL; const char *names[] = { F_CIB_OPERATION, F_CIB_CALLID, F_CIB_CLIENTID, F_CIB_CALLOPTS }; crm_debug_4("Creating a basic reply"); reply = ha_msg_new(8); ha_msg_add(reply, F_TYPE, T_CIB); for(lpc = 0; lpc < DIMOF(names); lpc++) { name = names[lpc]; value = cl_get_string(request, name); ha_msg_add(reply, name, value); } ha_msg_add_int(reply, F_CIB_RC, rc); if(output != NULL) { crm_debug_4("Attaching reply output"); add_message_xml(reply, F_CIB_CALLDATA, output); } return reply; } enum cib_errors cib_process_command(HA_Message *request, HA_Message **reply, crm_data_t **cib_diff, gboolean privileged) { crm_data_t *output = NULL; crm_data_t *input = NULL; crm_data_t *current_cib = the_cib; crm_data_t *result_cib = NULL; int call_type = 0; int call_options = 0; enum cib_errors rc = cib_ok; enum cib_errors rc2 = cib_ok; const char *op = NULL; const char *section = NULL; gboolean global_update = crm_is_true( cl_get_string(request, F_CIB_GLOBAL_UPDATE)); *reply = NULL; *cib_diff = NULL; /* Start processing the request... */ op = cl_get_string(request, F_CIB_OPERATION); ha_msg_value_int(request, F_CIB_CALLOPTS, &call_options); rc = cib_get_operation_id(request, &call_type); if(rc == cib_ok && cib_server_ops[call_type].needs_privileges && privileged == FALSE) { /* abort */ rc = cib_not_authorized; } if(rc == cib_ok && global_update == FALSE && cib_server_ops[call_type].needs_quorum && can_write(call_options) == FALSE) { rc = cib_no_quorum; } /* prevent NUMUPDATES from being incrimented - apply the change as-is */ if(global_update) { call_options |= cib_inhibit_bcast; call_options |= cib_force_diff; } rc2 = cib_server_ops[call_type].prepare(request, &input, §ion); if(rc == cib_ok) { rc = rc2; } if(rc != cib_ok) { crm_debug_2("Call setup failed"); } else if(cib_server_ops[call_type].modifies_cib) { if((call_options & cib_inhibit_notify) == 0) { cib_pre_notify( call_options, op, get_object_root(section, current_cib), input); } if(rc == cib_ok) { rc = cib_server_ops[call_type].fn( op, call_options, section, input, current_cib, &result_cib, &output); } if(rc == cib_ok) { CRM_DEV_ASSERT(result_cib != NULL); CRM_DEV_ASSERT(current_cib != result_cib); update_counters(__FILE__, __FUNCTION__, result_cib); if(do_id_check(result_cib, NULL)) { rc = cib_id_check; if(call_options & cib_force_diff) { crm_err("Global update introduces id collision!"); } } else { *cib_diff = diff_cib_object( current_cib, result_cib, FALSE); } } if(rc != cib_ok) { free_xml(result_cib); } else if(activateCibXml(result_cib, CIB_FILENAME) != 0){ crm_warn("Activation failed"); rc = cib_ACTIVATION; } if((call_options & cib_inhibit_notify) == 0) { const char *call_id = cl_get_string( request, F_CIB_CALLID); const char *client = cl_get_string( request, F_CIB_CLIENTNAME); cib_post_notify(call_options, op, input, rc, the_cib); cib_diff_notify(call_options, client, call_id, op, input, rc, *cib_diff); } log_xml_diff(rc==cib_ok?cib_diff_loglevel:cib_diff_loglevel+1, *cib_diff, "cib:diff"); } else { rc = cib_server_ops[call_type].fn( op, call_options, section, input, current_cib, &result_cib, &output); CRM_DEV_ASSERT(result_cib == NULL); free_xml(result_cib); } if((call_options & cib_discard_reply) == 0) { *reply = cib_construct_reply(request, output, rc); } if(call_type >= 0) { cib_server_ops[call_type].cleanup(op, &input, &output); } return rc; } int send_via_callback_channel(HA_Message *msg, const char *token) { cib_client_t *hash_client = NULL; GList *list_item = NULL; enum cib_errors rc = cib_ok; crm_debug_3("Delivering msg %p to client %s", msg, token); if(token == NULL) { crm_err("No client id token, cant send message"); if(rc == cib_ok) { rc = cib_missing; } } else { /* A client that left before we could reply is not really * _our_ error. Warn instead. */ hash_client = g_hash_table_lookup(client_list, token); if(hash_client == NULL) { crm_warn("Cannot find client for token %s", token); rc = cib_client_gone; } else if(hash_client->channel == NULL) { crm_err("Cannot find channel for client %s", token); rc = cib_client_corrupt; } else if(hash_client->channel->ops->get_chan_status( hash_client->channel) == IPC_DISCONNECT) { crm_warn("Client %s has disconnected", token); rc = cib_client_gone; cib_num_timeouts++; } } /* this is a more important error so overwriting rc is warrented */ if(msg == NULL) { crm_err("No message to send"); rc = cib_reply_failed; } if(rc == cib_ok) { list_item = g_list_find_custom( hash_client->delegated_calls, msg, cib_GCompareFunc); } if(list_item != NULL) { /* remove it - no need to time it out */ HA_Message *orig_msg = list_item->data; crm_debug_3("Removing msg from delegated list"); hash_client->delegated_calls = g_list_remove( hash_client->delegated_calls, orig_msg); CRM_DEV_ASSERT(orig_msg != msg); crm_msg_del(orig_msg); } if(rc == cib_ok) { crm_debug_3("Delivering reply to client %s", token); if(send_ipc_message(hash_client->channel, msg) == FALSE) { crm_warn("Delivery of reply to client %s/%s failed", hash_client->name, token); rc = cib_reply_failed; } } else { /* be consistent... * send_ipc_message() will free the message, so we should do * so manually if we dont try to send it. */ crm_msg_del(msg); } return rc; } gint cib_GCompareFunc(gconstpointer a, gconstpointer b) { const HA_Message *a_msg = a; const HA_Message *b_msg = b; int msg_a_id = 0; int msg_b_id = 0; ha_msg_value_int(a_msg, F_CIB_CALLID, &msg_a_id); ha_msg_value_int(b_msg, F_CIB_CALLID, &msg_b_id); if(msg_a_id == msg_b_id) { return 0; } else if(msg_a_id < msg_b_id) { return -1; } return 1; } gboolean cib_msg_timeout(gpointer data) { crm_debug_4("Checking if any clients have timed out messages"); /* g_hash_table_foreach(client_list, cib_GHFunc, NULL); */ return TRUE; } void cib_GHFunc(gpointer key, gpointer value, gpointer user_data) { int timeout = 0; /* 1 iteration == 10 seconds */ HA_Message *msg = NULL; HA_Message *reply = NULL; const char *host_to = NULL; cib_client_t *client = value; GListPtr list = client->delegated_calls; while(list != NULL) { msg = list->data; ha_msg_value_int(msg, F_CIB_TIMEOUT, &timeout); if(timeout <= 0) { list = list->next; continue; } else { int seen = 0; ha_msg_value_int(msg, F_CIB_SEENCOUNT, &seen); crm_debug_4("Timeout %d, seen %d", timeout, seen); if(seen < timeout) { crm_debug_4("Updating seen count for msg from client %s", client->id); seen += 10; ha_msg_mod_int(msg, F_CIB_SEENCOUNT, seen); list = list->next; continue; } } cib_num_timeouts++; host_to = cl_get_string(msg, F_CIB_HOST); crm_warn("Sending operation timeout msg to client %s", client->id); reply = ha_msg_new(4); ha_msg_add(reply, F_TYPE, T_CIB); ha_msg_add(reply, F_CIB_OPERATION, cl_get_string(msg, F_CIB_OPERATION)); ha_msg_add(reply, F_CIB_CALLID, cl_get_string(msg, F_CIB_CALLID)); if(host_to == NULL) { ha_msg_add_int(reply, F_CIB_RC, cib_master_timeout); } else { ha_msg_add_int(reply, F_CIB_RC, cib_remote_timeout); } send_ipc_message(client->channel, reply); list = list->next; client->delegated_calls = g_list_remove( client->delegated_calls, msg); crm_msg_del(msg); } } gboolean cib_process_disconnect(IPC_Channel *channel, cib_client_t *cib_client) { if (channel == NULL) { CRM_DEV_ASSERT(cib_client == NULL); } else if (cib_client == NULL) { crm_err("No client"); } else { CRM_DEV_ASSERT(channel->ch_status != IPC_CONNECT); crm_debug_2("Cleaning up after client disconnect: %s/%s/%s", crm_str(cib_client->name), cib_client->channel_name, cib_client->id); if(cib_client->id != NULL) { if(!g_hash_table_remove(client_list, cib_client->id)) { crm_err("Client %s not found in the hashtable", cib_client->name); } } } if(cib_shutdown_flag && g_hash_table_size(client_list) == 0) { crm_info("All clients disconnected..."); initiate_exit(); } return FALSE; } gboolean cib_ha_dispatch(IPC_Channel *channel, gpointer user_data) { ll_cluster_t *hb_cluster = (ll_cluster_t*)user_data; crm_debug_3("Invoked"); if(IPC_ISRCONN(channel)) { if(hb_cluster->llc_ops->msgready(hb_cluster) == 0) { crm_debug_2("no message ready yet"); } /* invoke the callbacks but dont block */ hb_cluster->llc_ops->rcvmsg(hb_cluster, 0); } if (channel->ch_status != IPC_CONNECT) { return FALSE; } return TRUE; } void cib_peer_callback(HA_Message * msg, void* private_data) { int call_type = 0; int call_options = 0; const char *originator = cl_get_string(msg, F_ORIG); const char *seq = cl_get_string(msg, F_SEQ); const char *op = cl_get_string(msg, F_CIB_OPERATION); crm_log_message_adv(LOG_MSG, "Peer[inbound]", msg); crm_debug_2("Peer %s message (%s) from %s", op, seq, originator); if(originator == NULL || safe_str_eq(originator, cib_our_uname)) { - crm_debug_3("Discarding %s message from ourselves", op); + crm_debug("Discarding %s message %s from ourselves", op, seq); return; } else if(ccm_membership == NULL) { crm_info("Discarding %s message (%s) from %s:" " membership not established", op, seq, originator); return; } else if(g_hash_table_lookup(ccm_membership, originator) == NULL) { crm_warn("Discarding %s message (%s) from %s:" " not in our membership", op, seq, originator); return; } else if(cib_get_operation_id(msg, &call_type) != cib_ok) { crm_debug("Discarding %s message (%s) from %s:" " Invalid operation", op, seq, originator); return; } crm_debug_2("Processing %s msg (%s) from %s",op, seq, originator); ha_msg_value_int(msg, F_CIB_CALLOPTS, &call_options); crm_debug_4("Retrieved call options: %d", call_options); if(cl_get_string(msg, F_CIB_CLIENTNAME) == NULL) { ha_msg_add(msg, F_CIB_CLIENTNAME, originator); } cib_process_request(msg, FALSE, TRUE, TRUE, NULL); return; } HA_Message * cib_msg_copy(HA_Message *msg, gboolean with_data) { int lpc = 0; const char *field = NULL; const char *value = NULL; const HA_Message *value_struct = NULL; static const char *field_list[] = { F_TYPE , F_CIB_CLIENTID , F_CIB_CALLOPTS , F_CIB_CALLID , F_CIB_OPERATION , F_CIB_ISREPLY , F_CIB_SECTION , F_CIB_HOST , F_CIB_RC , F_CIB_DELEGATED , F_CIB_OBJID , F_CIB_OBJTYPE , F_CIB_EXISTING , F_CIB_SEENCOUNT , F_CIB_TIMEOUT , F_CIB_CALLBACK_TOKEN , F_CIB_GLOBAL_UPDATE , F_CIB_CLIENTNAME , F_CIB_NOTIFY_TYPE , F_CIB_NOTIFY_ACTIVATE }; static const char *data_list[] = { F_CIB_CALLDATA , F_CIB_UPDATE , F_CIB_UPDATE_RESULT }; HA_Message *copy = NULL; copy = ha_msg_new(10); if(copy == NULL) { return copy; } for(lpc = 0; lpc < DIMOF(field_list); lpc++) { field = field_list[lpc]; value = cl_get_string(msg, field); if(value != NULL) { ha_msg_add(copy, field, value); } } for(lpc = 0; with_data && lpc < DIMOF(data_list); lpc++) { field = data_list[lpc]; value_struct = cl_get_struct(msg, field); if(value_struct != NULL) { add_message_xml(copy, field, value_struct); } } return copy; } enum cib_errors cib_get_operation_id(const HA_Message * msg, int *operation) { int lpc = 0; int max_msg_types = DIMOF(cib_server_ops); const char *op = cl_get_string(msg, F_CIB_OPERATION); for (lpc = 0; lpc < max_msg_types; lpc++) { if (safe_str_eq(op, cib_server_ops[lpc].operation)) { *operation = lpc; return cib_ok; } } crm_err("Operation %s is not valid", op); *operation = -1; return cib_operation; } void cib_client_status_callback(const char * node, const char * client, const char * status, void * private) { if(safe_str_eq(client, CRM_SYSTEM_CIB)) { crm_info("Status update: Client %s/%s now has status [%s]", node, client, status); g_hash_table_replace(peer_hash, crm_strdup(node), crm_strdup(status)); set_connected_peers(the_cib); } return; } extern oc_ev_t *cib_ev_token; gboolean ccm_manual_check(gpointer data) { int rc = 0; oc_ev_t *ccm_token = cib_ev_token; crm_debug("manual check"); rc = oc_ev_handle_event(ccm_token); if(0 == rc) { return TRUE; } else { crm_err("CCM connection appears to have failed: rc=%d.", rc); return FALSE; } } gboolean cib_ccm_dispatch(int fd, gpointer user_data) { int rc = 0; oc_ev_t *ccm_token = (oc_ev_t*)user_data; crm_debug_2("received callback"); rc = oc_ev_handle_event(ccm_token); if(0 == rc) { return TRUE; } else { crm_err("CCM connection appears to have failed: rc=%d.", rc); return FALSE; } } static void crm_ghash_clfree(gpointer data) { crm_free(data); } void cib_ccm_msg_callback( oc_ed_t event, void *cookie, size_t size, const void *data) { int instance = -1; gboolean update_id = FALSE; gboolean update_quorum = FALSE; const oc_ev_membership_t *membership = data; if(membership != NULL) { instance = membership->m_instance; } crm_debug("Process CCM event=%s (id=%d)", ccm_event_name(event), instance); switch(event) { case OC_EV_MS_NEW_MEMBERSHIP: case OC_EV_MS_INVALID: update_id = TRUE; update_quorum = TRUE; break; case OC_EV_MS_PRIMARY_RESTORED: update_id = TRUE; break; case OC_EV_MS_NOT_PRIMARY: crm_debug_2("Ignoring transitional CCM event: %s", ccm_event_name(event)); break; case OC_EV_MS_EVICTED: crm_err("Evicted from CCM: %s", ccm_event_name(event)); update_quorum = TRUE; break; default: crm_err("Unknown CCM event: %d", event); } if(update_id) { CRM_DEV_ASSERT(membership != NULL); if(crm_assert_failed) { return; } if(ccm_transition_id != NULL) { crm_free(ccm_transition_id); ccm_transition_id = NULL; } ccm_transition_id = crm_itoa(instance); set_transition(the_cib); } if(update_quorum) { unsigned int members = 0; int offset = 0; unsigned int lpc = 0; cib_have_quorum = ccm_have_quorum(event); if(cib_have_quorum) { crm_xml_add( the_cib,XML_ATTR_HAVE_QUORUM,XML_BOOLEAN_TRUE); } else { crm_xml_add( the_cib,XML_ATTR_HAVE_QUORUM,XML_BOOLEAN_FALSE); } crm_debug("Quorum %s after event=%s (id=%d)", cib_have_quorum?"(re)attained":"lost", ccm_event_name(event), instance); if(ccm_membership == NULL) { ccm_membership = g_hash_table_new_full( g_str_hash, g_str_equal, crm_ghash_clfree, NULL); } if(membership != NULL && membership->m_n_out != 0) { members = membership->m_n_out; offset = membership->m_out_idx; for(lpc = 0; lpc < members; lpc++) { oc_node_t a_node = membership->m_array[lpc+offset]; crm_info("LOST: %s", a_node.node_uname); g_hash_table_remove( ccm_membership, a_node.node_uname); } } if(membership != NULL && membership->m_n_in != 0) { members = membership->m_n_in; offset = membership->m_in_idx; for(lpc = 0; lpc < members; lpc++) { oc_node_t a_node = membership->m_array[lpc+offset]; char *uname = crm_strdup(a_node.node_uname); crm_info("New peer: %s", uname); g_hash_table_replace( ccm_membership, uname, uname); } } } oc_ev_callback_done(cookie); set_connected_peers(the_cib); return; } gboolean can_write(int flags) { if(cib_have_quorum) { return TRUE; } else if((flags & cib_quorum_override) != 0) { return TRUE; } return FALSE; } static gboolean cib_force_exit(gpointer data) { crm_notice("Forcing exit!"); terminate_ha_connection(__FUNCTION__); return FALSE; } void initiate_exit(void) { int active = 0; HA_Message *leaving = NULL; g_hash_table_foreach(peer_hash, GHFunc_count_peers, &active); if(active < 2) { terminate_ha_connection(__FUNCTION__); return; } crm_info("Sending disconnect notification to %d peers...", active); leaving = ha_msg_new(3); ha_msg_add(leaving, F_TYPE, "cib"); ha_msg_add(leaving, F_CIB_OPERATION, "cib_shutdown_req"); send_ha_message(hb_conn, leaving, NULL, TRUE); crm_msg_del(leaving); Gmain_timeout_add(crm_get_msec("5s"), cib_force_exit, NULL); } void terminate_ha_connection(const char *caller) { if(hb_conn != NULL) { crm_info("%s: Disconnecting heartbeat", caller); hb_conn->llc_ops->signoff(hb_conn, FALSE); } else { crm_err("%s: No heartbeat connection", caller); exit(LSB_EXIT_OK); } } diff --git a/crm/crmd/lrm.c b/crm/crmd/lrm.c index dc2c0d745e..e522ed53e5 100644 --- a/crm/crmd/lrm.c +++ b/crm/crmd/lrm.c @@ -1,1585 +1,1596 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include /* for access */ #include #include #include #include #include #include #include #include #include #include #include #include char *make_stop_id(const char *rsc, int call_id); void ghash_print_pending(gpointer key, gpointer value, gpointer user_data); gboolean stop_all_resources(void); gboolean resource_stopped(gpointer key, gpointer value, gpointer user_data); gboolean build_operation_update( crm_data_t *rsc_list, lrm_op_t *op, const char *src, int lpc); gboolean build_active_RAs(crm_data_t *rsc_list); gboolean is_rsc_active(const char *rsc_id); void do_update_resource(lrm_op_t *op); enum crmd_fsa_input do_lrm_rsc_op( lrm_rsc_t *rsc, char *rid, const char *operation, crm_data_t *msg, HA_Message *request); enum crmd_fsa_input do_fake_lrm_op(gpointer data); void stop_recurring_action( gpointer key, gpointer value, gpointer user_data); gboolean remove_recurring_action( gpointer key, gpointer value, gpointer user_data); lrm_op_t *construct_op( crm_data_t *rsc_op, const char *rsc_id, const char *operation); void send_direct_ack(const char *to_host, const char *to_sys, lrm_op_t* op, const char *rsc_id); void free_recurring_op(gpointer value); GHashTable *monitors = NULL; GHashTable *resources = NULL; GHashTable *resources_confirmed = NULL; GHashTable *shutdown_ops = NULL; int num_lrm_register_fails = 0; int max_lrm_register_fails = 30; enum crmd_rscstate { crmd_rscstate_NULL, crmd_rscstate_START, crmd_rscstate_START_PENDING, crmd_rscstate_START_OK, crmd_rscstate_START_FAIL, crmd_rscstate_STOP, crmd_rscstate_STOP_PENDING, crmd_rscstate_STOP_OK, crmd_rscstate_STOP_FAIL, crmd_rscstate_MON, crmd_rscstate_MON_PENDING, crmd_rscstate_MON_OK, crmd_rscstate_MON_FAIL, crmd_rscstate_GENERIC_PENDING, crmd_rscstate_GENERIC_OK, crmd_rscstate_GENERIC_FAIL }; const char *crmd_rscstate2string(enum crmd_rscstate state); const char * crmd_rscstate2string(enum crmd_rscstate state) { switch(state) { case crmd_rscstate_NULL: return NULL; case crmd_rscstate_START: return CRMD_ACTION_START; case crmd_rscstate_START_PENDING: return CRMD_ACTION_START_PENDING; case crmd_rscstate_START_OK: return CRMD_ACTION_STARTED; case crmd_rscstate_START_FAIL: return CRMD_ACTION_START_FAIL; case crmd_rscstate_STOP: return CRMD_ACTION_STOP; case crmd_rscstate_STOP_PENDING: return CRMD_ACTION_STOP_PENDING; case crmd_rscstate_STOP_OK: return CRMD_ACTION_STOPPED; case crmd_rscstate_STOP_FAIL: return CRMD_ACTION_STOP_FAIL; case crmd_rscstate_MON: return CRMD_ACTION_MON; case crmd_rscstate_MON_PENDING: return CRMD_ACTION_MON_PENDING; case crmd_rscstate_MON_OK: return CRMD_ACTION_MON_OK; case crmd_rscstate_MON_FAIL: return CRMD_ACTION_MON_FAIL; case crmd_rscstate_GENERIC_PENDING: return CRMD_ACTION_GENERIC_PENDING; case crmd_rscstate_GENERIC_OK: return CRMD_ACTION_GENERIC_OK; case crmd_rscstate_GENERIC_FAIL: return CRMD_ACTION_GENERIC_FAIL; } return ""; } GCHSource *lrm_source = NULL; /* A_LRM_CONNECT */ enum crmd_fsa_input do_lrm_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { int ret = HA_OK; if(action & A_LRM_DISCONNECT) { if(lrm_source) { crm_debug("Removing LRM connection from MainLoop"); if(G_main_del_IPC_Channel(lrm_source) == FALSE) { crm_err("Could not remove LRM connection" " from MainLoop"); } lrm_source = NULL; } if(fsa_lrm_conn) { fsa_lrm_conn->lrm_ops->signoff(fsa_lrm_conn); crm_info("Disconnected from the LRM"); clear_bit_inplace(fsa_input_register, R_LRM_CONNECTED); fsa_lrm_conn = NULL; } /* TODO: Clean up the hashtable */ } if(action & A_LRM_CONNECT) { ret = HA_OK; monitors = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, free_recurring_op); resources = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); resources_confirmed = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); shutdown_ops = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); fsa_lrm_conn = ll_lrm_new(XML_CIB_TAG_LRM); if(NULL == fsa_lrm_conn) { register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); ret = HA_FAIL; } if(ret == HA_OK) { crm_debug("Connecting to the LRM"); ret = fsa_lrm_conn->lrm_ops->signon( fsa_lrm_conn, CRM_SYSTEM_CRMD); } if(ret != HA_OK) { if(++num_lrm_register_fails < max_lrm_register_fails) { crm_warn("Failed to sign on to the LRM %d" " (%d max) times", num_lrm_register_fails, max_lrm_register_fails); crm_timer_start(wait_timer); crmd_fsa_stall(NULL); return I_NULL; } } if(ret == HA_OK) { crm_debug_4("LRM: set_lrm_callback..."); ret = fsa_lrm_conn->lrm_ops->set_lrm_callback( fsa_lrm_conn, lrm_op_callback); if(ret != HA_OK) { crm_err("Failed to set LRM callbacks"); } } if(ret != HA_OK) { crm_err("Failed to sign on to the LRM %d" " (max) times", num_lrm_register_fails); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); return I_NULL; } /* TODO: create a destroy handler that causes * some recovery to happen */ lrm_source = G_main_add_IPC_Channel( G_PRIORITY_LOW, fsa_lrm_conn->lrm_ops->ipcchan(fsa_lrm_conn), FALSE, lrm_dispatch, fsa_lrm_conn, default_ipc_connection_destroy); set_bit_inplace(fsa_input_register, R_LRM_CONNECTED); crm_debug("LRM connection established"); } if(action & ~(A_LRM_CONNECT|A_LRM_DISCONNECT)) { crm_err("Unexpected action %s in %s", fsa_action2string(action), __FUNCTION__); } return I_NULL; } gboolean stop_all_resources(void) { GListPtr lrm_list = NULL; crm_info("Making sure all active resources are stopped before exit"); if(fsa_lrm_conn == NULL) { return TRUE; } else if(is_set(fsa_input_register, R_SENT_RSC_STOP)) { crm_debug("Already sent stop operation"); return TRUE; } lrm_list = fsa_lrm_conn->lrm_ops->get_all_rscs(fsa_lrm_conn); slist_iter( rsc_id, char, lrm_list, lpc, if(is_rsc_active(rsc_id)) { crm_err("Resource %s was active at shutdown." " You may ignore this error if it is unmanaged.", rsc_id); /* do_lrm_rsc_op(NULL, rsc_id, CRMD_ACTION_STOP, NULL, NULL); */ } ); set_bit_inplace(fsa_input_register, R_SENT_RSC_STOP); if(g_hash_table_size(shutdown_ops) == 0) { register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL); } else { crm_info("Waiting for %d pending stop operations " " to complete before exiting", g_hash_table_size(shutdown_ops)); } return TRUE; } gboolean build_operation_update( crm_data_t *xml_rsc, lrm_op_t *op, const char *src, int lpc) { char *fail_state = NULL; const char *state = NULL; crm_data_t *xml_op = NULL; char *op_id = NULL; const char *caller_version = NULL; CRM_DEV_ASSERT(op != NULL); if(crm_assert_failed) { return FALSE; } crm_debug_2("%s: Updating resouce %s after %s %s op", src, op->rsc_id, op_status2text(op->op_status), op->op_type); if(op->op_status == LRM_OP_CANCELLED) { crm_debug_3("Ignoring cancelled op"); return TRUE; } if(AM_I_DC) { caller_version = CRM_FEATURE_SET; } else if(fsa_our_dc_version != NULL) { caller_version = fsa_our_dc_version; } else { /* there is a small risk in formerly mixed clusters that * it will be sub-optimal. * however with our upgrade policy, the update we send * should still be completely supported anyway */ caller_version = g_hash_table_lookup( op->params, XML_ATTR_CRM_VERSION); crm_warn("Falling back to operation originator version: %s", caller_version); } crm_debug_3("DC version: %s", caller_version); if(safe_str_eq(op->op_type, CRMD_ACTION_NOTIFY)) { const char *n_type = g_hash_table_lookup( op->params, "notify_type"); const char *n_task = g_hash_table_lookup( op->params, "notify_operation"); CRM_DEV_ASSERT(n_type != NULL); CRM_DEV_ASSERT(n_task != NULL); op_id = generate_notify_key(op->rsc_id, n_type, n_task); /* these are not yet allowed to fail */ op->op_status = LRM_OP_DONE; op->rc = 0; } else { op_id = generate_op_key(op->rsc_id, op->op_type, op->interval); } /* Handle recurring ops - infer last op_status */ if(op->op_status == LRM_OP_PENDING && op->interval > 0) { if(op->rc == 0) { crm_debug("Mapping pending operation to DONE"); op->op_status = LRM_OP_DONE; } else { crm_debug("Mapping pending operation to ERROR"); op->op_status = LRM_OP_ERROR; } } xml_op = find_entity(xml_rsc, XML_LRM_TAG_RSC_OP, op_id); if(xml_op != NULL) { const char *old_status_s = crm_element_value( xml_op, XML_LRM_ATTR_OPSTATUS); int old_status = crm_parse_int(old_status_s, "-2"); int log_level = LOG_ERR; if(old_status_s == NULL) { crm_err("No value for "XML_LRM_ATTR_OPSTATUS); } else if(old_status == op->op_status) { /* safe to mask */ log_level = LOG_WARNING; } else if(old_status == LRM_OP_PENDING){ /* ??safe to mask?? */ /* log_level = LOG_WARNING; */ } crm_log_maybe(log_level, "Duplicate %s operations in get_cur_state()", op_id); crm_log_maybe(log_level+2, "New entry: %s %s (call=%d, status=%s)", op_id, op->user_data, op->call_id, op_status2text(op->op_status)); crm_log_xml(log_level+2, "Existing entry", xml_op); crm_free(op_id); return FALSE; } else { xml_op = create_xml_node(xml_rsc, XML_LRM_TAG_RSC_OP); } crm_xml_add(xml_op, XML_ATTR_ID, op_id); crm_free(op_id); crm_xml_add(xml_op, XML_LRM_ATTR_TASK, op->op_type); crm_xml_add(xml_op, XML_ATTR_ORIGIN, src); if(op->user_data == NULL) { op->user_data = generate_transition_key(-1, fsa_our_uname); } #if CRM_DEPRECATED_SINCE_2_0_3 if(compare_version("1.0.3", caller_version) > 0) { CRM_CHECK(FALSE, ; ); fail_state = generate_transition_magic_v202( op->user_data, op->op_status); } else { fail_state = generate_transition_magic( op->user_data, op->op_status, op->rc); } #else fail_state = generate_transition_magic( op->user_data, op->op_status, op->rc); #endif crm_xml_add(xml_op, XML_ATTR_TRANSITION_KEY, op->user_data); crm_xml_add(xml_op, XML_ATTR_TRANSITION_MAGIC, fail_state); crm_free(fail_state); switch(op->op_status) { case LRM_OP_PENDING: break; case LRM_OP_CANCELLED: crm_err("What to do here"); break; case LRM_OP_ERROR: case LRM_OP_TIMEOUT: case LRM_OP_NOTSUPPORTED: crm_debug("Resource action %s/%s %s: %d", op->rsc_id, op->op_type, op_status2text(op->op_status), op->rc); break; case LRM_OP_DONE: if(safe_str_eq(CRMD_ACTION_START, op->op_type)) { state = CRMD_ACTION_STARTED; } else if(safe_str_eq(CRMD_ACTION_STOP, op->op_type)) { state = CRMD_ACTION_STOPPED; } else if(safe_str_eq(CRMD_ACTION_MON, op->op_type)) { state = CRMD_ACTION_STARTED; } else { crm_warn("Using status \"%s\" for op \"%s\"" "... this is still in the experimental stage.", CRMD_ACTION_GENERIC_OK, op->op_type); state = CRMD_ACTION_GENERIC_OK; } break; } crm_xml_add_int(xml_op, XML_LRM_ATTR_CALLID, op->call_id); /* set these on 'xml_rsc' too to make life easy for the PE */ crm_xml_add_int(xml_op, XML_LRM_ATTR_RC, op->rc); crm_xml_add_int(xml_op, XML_LRM_ATTR_OPSTATUS, op->op_status); crm_xml_add_int(xml_op, XML_LRM_ATTR_INTERVAL, op->interval); if(safe_str_neq(op->op_type, CRMD_ACTION_STOP)) { /* this will enable us to later determin that the * resource's parameters have changed and we should force * a restart * however it will come at the cost of a potentially much * larger CIB */ char *digest = NULL; crm_data_t *args_xml = NULL; crm_data_t *args_parent = NULL; #if CRM_DEPRECATED_SINCE_2_0_4 if(compare_version("1.0.5", caller_version) > 0) { args_parent = xml_op; } #endif args_xml = create_xml_node(args_parent, XML_TAG_PARAMS); g_hash_table_foreach(op->params, hash2field, args_xml); filter_action_parameters(args_xml); digest = calculate_xml_digest(args_xml, TRUE); crm_xml_add(xml_op, XML_LRM_ATTR_OP_DIGEST, digest); if(args_parent == NULL) { free_xml(args_xml); } } return TRUE; } gboolean is_rsc_active(const char *rsc_id) { GList *op_list = NULL; gboolean active = FALSE; lrm_rsc_t *the_rsc = NULL; state_flag_t cur_state = 0; int max_call_id = -1; if(fsa_lrm_conn == NULL) { return FALSE; } the_rsc = fsa_lrm_conn->lrm_ops->get_rsc(fsa_lrm_conn, rsc_id); crm_debug_2("Processing lrm_rsc_t entry %s", rsc_id); if(the_rsc == NULL) { crm_err("NULL resource returned from the LRM"); return FALSE; } op_list = the_rsc->ops->get_cur_state(the_rsc, &cur_state); crm_debug_3("\tcurrent state:%s",cur_state==LRM_RSC_IDLE?"Idle":"Busy"); slist_iter( op, lrm_op_t, op_list, llpc, - crm_debug_2("Processing op %s (%d) for %s (status=%d, rc=%d)", + crm_debug("Processing op %s (%d) for %s (status=%d, rc=%d)", op->op_type, op->call_id, the_rsc->id, op->op_status, op->rc); CRM_ASSERT(max_call_id <= op->call_id); if(safe_str_eq(op->op_type, CRMD_ACTION_STOP)) { active = FALSE; } else if(op->rc == EXECRA_NOT_RUNNING) { active = FALSE; } else { active = TRUE; } max_call_id = op->call_id; lrm_free_op(op); ); g_list_free(op_list); lrm_free_rsc(the_rsc); return active; } gboolean build_active_RAs(crm_data_t *rsc_list) { GList *op_list = NULL; GList *lrm_list = NULL; gboolean found_op = FALSE; state_flag_t cur_state = 0; if(fsa_lrm_conn == NULL) { return FALSE; } lrm_list = fsa_lrm_conn->lrm_ops->get_all_rscs(fsa_lrm_conn); slist_iter( rid, char, lrm_list, lpc, lrm_rsc_t *the_rsc = fsa_lrm_conn->lrm_ops->get_rsc(fsa_lrm_conn, rid); crm_data_t *xml_rsc = create_xml_node( rsc_list, XML_LRM_TAG_RESOURCE); int max_call_id = -1; crm_debug("Processing lrm_rsc_t entry %s", rid); if(the_rsc == NULL) { crm_err("NULL resource returned from the LRM"); continue; } crm_xml_add(xml_rsc, XML_ATTR_ID, the_rsc->id); crm_xml_add(xml_rsc, XML_ATTR_TYPE, the_rsc->type); crm_xml_add(xml_rsc, XML_AGENT_ATTR_CLASS, the_rsc->class); crm_xml_add(xml_rsc, XML_AGENT_ATTR_PROVIDER,the_rsc->provider); op_list = the_rsc->ops->get_cur_state(the_rsc, &cur_state); crm_debug_2("\tcurrent state:%s", cur_state==LRM_RSC_IDLE?"Idle":"Busy"); slist_iter( op, lrm_op_t, op_list, llpc, crm_debug_2("Processing op %s for %s (status=%d, rc=%d)", op->op_type, the_rsc->id, op->op_status, op->rc); if(max_call_id < op->call_id) { build_operation_update( xml_rsc, op, __FUNCTION__, llpc); } else if(max_call_id > op->call_id) { crm_err("Bad call_id in list=%d. Previous call_id=%d", op->call_id, max_call_id); } else { crm_warn("lrm->get_cur_state() returned" " duplicate entries for call_id=%d", op->call_id); } max_call_id = op->call_id; found_op = TRUE; lrm_free_op(op); ); if(found_op == FALSE && g_list_length(op_list) != 0) { crm_err("Could not properly determin last op" " for %s from %d entries", the_rsc->id, g_list_length(op_list)); } g_list_free(op_list); lrm_free_rsc(the_rsc); ); g_list_free(lrm_list); return TRUE; } crm_data_t* do_lrm_query(gboolean is_replace) { gboolean shut_down = FALSE; crm_data_t *xml_result= NULL; crm_data_t *xml_state = NULL; crm_data_t *xml_data = NULL; crm_data_t *rsc_list = NULL; const char *exp_state = CRMD_JOINSTATE_MEMBER; if(is_set(fsa_input_register, R_SHUTDOWN)) { exp_state = CRMD_STATE_INACTIVE; shut_down = TRUE; } xml_state = create_node_state( fsa_our_uname, ACTIVESTATUS, XML_BOOLEAN_TRUE, ONLINESTATUS, CRMD_JOINSTATE_MEMBER, exp_state, !shut_down, __FUNCTION__); xml_data = create_xml_node(xml_state, XML_CIB_TAG_LRM); crm_xml_add(xml_data, XML_ATTR_ID, fsa_our_uuid); rsc_list = create_xml_node(xml_data, XML_LRM_TAG_RESOURCES); /* Build a list of active (not always running) resources */ build_active_RAs(rsc_list); if(is_replace) { crm_xml_add(xml_state, XML_CIB_ATTR_REPLACE, XML_CIB_TAG_LRM); } xml_result = create_cib_fragment(xml_state, XML_CIB_TAG_STATUS); crm_log_xml_debug_3(xml_state, "Current state of the LRM"); return xml_result; } struct recurring_op_s { char *rsc_id; int call_id; }; static void cancel_monitor(lrm_rsc_t *rsc, const char *key) { struct recurring_op_s *existing_op = NULL; if(rsc == NULL) { crm_err("No resource to cancel and operation for"); return; } else if(key == NULL) { crm_err("No operation to cancel"); return; } existing_op = g_hash_table_lookup(monitors, key); if(existing_op != NULL) { crm_debug("Cancelling previous invocation of %s (%d)", key, existing_op->call_id); /* cancel it so we can then restart it without conflict */ if(rsc->ops->cancel_op(rsc, existing_op->call_id) != HA_OK) { crm_info("Couldn't cancel %s (%d)", key, existing_op->call_id); } else { g_hash_table_remove(monitors, key); } } else { crm_debug("No previous invocation of %s", key); } } /* static gboolean is_active(const char *rsc_id) { char *stop_id = NULL; const char *last_op = NULL; gboolean is_active = TRUE; last_op = g_hash_table_lookup(resources, rsc_id); stop_id = generate_op_key(rsc_id, CRMD_ACTION_STOP, 0); if(safe_str_eq(last_op, stop_id)) { is_active = FALSE; } crm_free(stop_id); return is_active; } */ /* A_LRM_INVOKE */ enum crmd_fsa_input do_lrm_invoke(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { const char *crm_op = NULL; const char *from_sys = NULL; const char *from_host = NULL; const char *operation = NULL; enum crmd_fsa_input next_input = I_NULL; ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); crm_op = cl_get_string(input->msg, F_CRM_TASK); from_sys = cl_get_string(input->msg, F_CRM_SYS_FROM); if(safe_str_neq(from_sys, CRM_SYSTEM_TENGINE)) { from_host = cl_get_string(input->msg, F_CRM_HOST_FROM); } crm_debug_2("LRM command from: %s", from_sys); if(safe_str_eq(crm_op, CRM_OP_LRM_DELETE)) { operation = CRMD_ACTION_DELETE; } else if(safe_str_eq(operation, CRM_OP_LRM_REFRESH)) { crm_op = CRM_OP_LRM_REFRESH; } else if(input->xml != NULL) { operation = crm_element_value(input->xml, XML_LRM_ATTR_TASK); } if(crm_op != NULL && safe_str_eq(crm_op, CRM_OP_LRM_REFRESH)) { enum cib_errors rc = cib_ok; crm_data_t *fragment = do_lrm_query(TRUE); crm_info("Forcing a local LRM refresh"); fsa_cib_update(XML_CIB_TAG_STATUS, fragment, cib_quorum_override, rc); free_xml(fragment); } else if(crm_op != NULL && safe_str_eq(crm_op, CRM_OP_LRM_QUERY)) { crm_data_t *data = do_lrm_query(FALSE); HA_Message *reply = create_reply(input->msg, data); if(relay_message(reply, TRUE) == FALSE) { crm_err("Unable to route reply"); crm_log_message(LOG_ERR, reply); crm_msg_del(reply); } free_xml(data); } else if(safe_str_eq(operation, CRM_OP_PROBED) || safe_str_eq(crm_op, CRM_OP_REPROBE)) { const char *probed = XML_BOOLEAN_TRUE; if(safe_str_eq(crm_op, CRM_OP_REPROBE)) { probed = XML_BOOLEAN_FALSE; } update_attr(fsa_cib_conn, cib_none, XML_CIB_TAG_STATUS, fsa_our_uuid, NULL, NULL, CRM_OP_PROBED, probed); } else if(operation != NULL) { char rid[64]; lrm_rsc_t *rsc = NULL; const char *id_from_cib = NULL; crm_data_t *xml_rsc = find_xml_node( input->xml, XML_CIB_TAG_RESOURCE, TRUE); CRM_DEV_ASSERT(xml_rsc != NULL); if(crm_assert_failed) { crm_log_xml_err(input->xml, "Bad resource"); return I_NULL; } id_from_cib = crm_element_value(xml_rsc, XML_ATTR_ID); CRM_DEV_ASSERT(id_from_cib != NULL); if(crm_assert_failed) { crm_err("No value for %s in %s", XML_ATTR_ID, crm_element_name(xml_rsc)); crm_log_xml_err(input->xml, "Bad command"); return I_NULL; } /* only the first 16 chars are used by the LRM */ strncpy(rid, id_from_cib, 64); rid[63] = 0; crm_debug_2("Retrieving %s from the LRM.", rid); rsc = fsa_lrm_conn->lrm_ops->get_rsc(fsa_lrm_conn, rid); if(safe_str_eq(operation, CRMD_ACTION_CANCEL)) { lrm_op_t* op = NULL; crm_data_t *params = NULL; const char *op_key = NULL; const char *op_task = NULL; crm_log_xml_debug(input->xml, "CancelOp"); op_key = crm_element_value(xml_rsc, "operation_key"); params = find_xml_node(input->xml, XML_TAG_ATTRS,TRUE); if(params != NULL) { op_task = crm_element_value( params, XML_LRM_ATTR_TASK); #if CRM_DEPRECATED_SINCE_2_0_4 if(op_task == NULL) { op_task = crm_element_value(params, "task"); } #endif } CRM_CHECK(params != NULL && op_task != NULL && op_key != NULL, return I_NULL); op = construct_op(input->xml, id_from_cib, op_task); CRM_ASSERT(op != NULL); if(op_key == NULL) { crm_err("No operation to cancel"); crm_log_message(LOG_ERR, input->msg); } else if(rsc != NULL) { cancel_monitor(rsc, op_key); } op->op_status = LRM_OP_DONE; op->rc = EXECRA_OK; send_direct_ack(from_host, from_sys, op, id_from_cib); free_lrm_op(op); } else if(safe_str_eq(operation, CRMD_ACTION_DELETE)) { int rc = HA_OK; lrm_op_t* op = NULL; op = construct_op(input->xml, id_from_cib, operation); CRM_ASSERT(op != NULL); op->op_status = LRM_OP_DONE; op->rc = EXECRA_OK; if(rsc == NULL) { crm_debug("Resource %s was already removed", id_from_cib); } else { crm_info("Removing resource %s from the LRM", rsc->id); rc = fsa_lrm_conn->lrm_ops->delete_rsc( fsa_lrm_conn, rid); if(rc != HA_OK) { crm_err("Failed to remove resource %s", rid); op->op_status = LRM_OP_ERROR; op->rc = EXECRA_UNKNOWN_ERROR; } } send_direct_ack(from_host, from_sys, op, id_from_cib); free_lrm_op(op); } else { next_input = do_lrm_rsc_op( rsc, rid, operation, input->xml, input->msg); } lrm_free_rsc(rsc); } else { crm_err("Operation was neither a lrm_query, nor a rsc op. %s", crm_str(crm_op)); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } return next_input; } lrm_op_t * construct_op(crm_data_t *rsc_op, const char *rsc_id, const char *operation) { lrm_op_t *op = NULL; const char *transition = NULL; CRM_DEV_ASSERT(rsc_id != NULL); crm_malloc0(op, sizeof(lrm_op_t)); op->op_type = crm_strdup(operation); op->op_status = LRM_OP_PENDING; op->rc = -1; op->rsc_id = crm_strdup(rsc_id); op->interval = 0; op->timeout = 0; op->start_delay = 0; op->app_name = crm_strdup(CRM_SYSTEM_CRMD); if(rsc_op == NULL) { CRM_DEV_ASSERT(safe_str_eq(CRMD_ACTION_STOP, operation)); op->user_data = NULL; op->user_data_len = 0; /* the stop_all_resources() case * by definition there is no DC (or they'd be shutting * us down). * So we should put our version here. */ op->params = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); g_hash_table_insert(op->params, crm_strdup(XML_ATTR_CRM_VERSION), crm_strdup(CRM_FEATURE_SET)); crm_debug_2("Constructed %s op for %s", operation, rsc_id); return op; } op->params = xml2list(rsc_op); #if CRM_DEPRECATED_SINCE_2_0_3 if(g_hash_table_lookup(op->params, XML_ATTR_CRM_VERSION) == NULL) { g_hash_table_destroy(op->params); op->params = xml2list_202(rsc_op); } #endif if(op->params == NULL) { CRM_DEV_ASSERT(safe_str_eq(CRMD_ACTION_STOP, operation)); } op->interval = crm_parse_int( g_hash_table_lookup(op->params, "interval"), "0"); op->timeout = crm_parse_int( g_hash_table_lookup(op->params, "timeout"), "0"); op->start_delay = crm_parse_int( g_hash_table_lookup(op->params, "start_delay"), "0"); /* sanity */ if(op->interval < 0) { op->interval = 0; g_hash_table_replace( op->params, crm_strdup("interval"), crm_strdup("0")); } if(op->timeout < 0) { op->timeout = 0; g_hash_table_replace( op->params, crm_strdup("timeout"), crm_strdup("0")); } if(op->start_delay < 0) { op->start_delay = 0; g_hash_table_replace( op->params, crm_strdup("start_delay"), crm_strdup("0")); } transition = crm_element_value(rsc_op, XML_ATTR_TRANSITION_KEY); CRM_CHECK(transition != NULL, return op); op->user_data = crm_strdup(transition); op->user_data_len = 1+strlen(op->user_data); if(safe_str_eq(operation, CRMD_ACTION_START) || safe_str_eq(operation, CRMD_ACTION_STOP)) { char *interval_s = g_hash_table_lookup(op->params, "interval"); CRM_CHECK(op->interval == 0, return NULL); CRM_CHECK(interval_s == NULL, return NULL); } crm_debug_2("Constructed %s op for %s: interval=%d", operation, rsc_id, op->interval); return op; } void send_direct_ack(const char *to_host, const char *to_sys, lrm_op_t* op, const char *rsc_id) { HA_Message *reply = NULL; crm_data_t *update, *iter; crm_data_t *fragment; CRM_DEV_ASSERT(op != NULL); if(crm_assert_failed) { return; } if(op->rsc_id == NULL) { CRM_DEV_ASSERT(rsc_id != NULL); op->rsc_id = crm_strdup(rsc_id); } if(to_sys == NULL) { to_sys = CRM_SYSTEM_TENGINE; } crm_info("ACK'ing resource op: %s for %s", op->op_type, op->rsc_id); update = create_node_state( fsa_our_uname, NULL, NULL, NULL, NULL, NULL, FALSE, __FUNCTION__); iter = create_xml_node(update, XML_CIB_TAG_LRM); crm_xml_add(iter, XML_ATTR_ID, fsa_our_uuid); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE); crm_xml_add(iter, XML_ATTR_ID, op->rsc_id); build_operation_update(iter, op, __FUNCTION__, 0); fragment = create_cib_fragment(update, XML_CIB_TAG_STATUS); reply = create_request(CRM_OP_INVOKE_LRM, fragment, to_host, to_sys, CRM_SYSTEM_LRMD, NULL); crm_debug("Sending ACK: %s", cl_get_string(reply, XML_ATTR_REFERENCE)); crm_log_xml_debug_2(update, "ACK Update"); crm_log_message_adv(LOG_DEBUG_3, "ACK Reply", reply); if(relay_message(reply, TRUE) == FALSE) { crm_log_message_adv(LOG_ERR, "Unable to route reply", reply); crm_msg_del(reply); } free_xml(fragment); free_xml(update); } enum crmd_fsa_input do_lrm_rsc_op(lrm_rsc_t *rsc, char *rid, const char *operation, crm_data_t *msg, HA_Message *request) { int call_id = 0; char *op_id = NULL; lrm_op_t* op = NULL; const char *type = NULL; const char *class = NULL; const char *provider = NULL; const char *transition = NULL; GHashTable *params = NULL; fsa_data_t *msg_data = NULL; CRM_DEV_ASSERT(rid != NULL); if(rsc != NULL) { class = rsc->class; type = rsc->type; } else if(msg != NULL) { crm_data_t *xml_rsc = find_xml_node( msg, XML_CIB_TAG_RESOURCE, TRUE); class = crm_element_value(xml_rsc, XML_AGENT_ATTR_CLASS); CRM_DEV_ASSERT(class != NULL); if(crm_assert_failed) { return I_NULL; } type = crm_element_value(xml_rsc, XML_ATTR_TYPE); CRM_DEV_ASSERT(type != NULL); if(crm_assert_failed) { return I_NULL; } provider = crm_element_value(xml_rsc, XML_AGENT_ATTR_PROVIDER); } if(msg != NULL) { transition = crm_element_value(msg, XML_ATTR_TRANSITION_KEY); if(transition == NULL) { crm_err("Missing transition"); crm_log_message(LOG_ERR, msg); } } if(rsc == NULL) { /* check if its already there */ rsc = fsa_lrm_conn->lrm_ops->get_rsc(fsa_lrm_conn, rid); } if(rsc == NULL) { /* add it to the list */ crm_debug("adding rsc %s before operation", rid); if(msg != NULL) { params = xml2list(msg); #if CRM_DEPRECATED_SINCE_2_0_3 if(g_hash_table_lookup( params, XML_ATTR_CRM_VERSION) == NULL) { g_hash_table_destroy(params); params = xml2list_202(msg); } #endif } fsa_lrm_conn->lrm_ops->add_rsc( fsa_lrm_conn, rid, class, type, provider, params); rsc = fsa_lrm_conn->lrm_ops->get_rsc(fsa_lrm_conn, rid); g_hash_table_destroy(params); } if(rsc == NULL) { crm_err("Could not add resource %s to LRM", rid); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); return I_NULL; } /* stop the monitor before stopping the resource */ if(safe_str_eq(operation, CRMD_ACTION_STOP)) { g_hash_table_foreach(monitors, stop_recurring_action, rsc); g_hash_table_foreach_remove( monitors, remove_recurring_action, rsc); } /* now do the op */ op = construct_op(msg, rsc->id, operation); crm_info("Performing op %s on %s (interval=%dms, key=%s)", operation, rid, op->interval, transition); if((AM_I_DC == FALSE && fsa_state != S_NOT_DC) || (AM_I_DC && fsa_state != S_TRANSITION_ENGINE)) { if(safe_str_neq(operation, CRMD_ACTION_STOP)) { crm_info("Discarding attempt to perform action %s on %s" " in state %s", operation, rid, fsa_state2string(fsa_state)); op->rc = 99; op->op_status = LRM_OP_ERROR; send_direct_ack(NULL, NULL, op, rsc->id); free_lrm_op(op); crm_free(op_id); return I_NULL; } } op_id = generate_op_key(rsc->id, op->op_type, op->interval); if(op->interval > 0) { cancel_monitor(rsc, op_id); op->target_rc = CHANGED; } else { op->target_rc = EVERYTIME; } g_hash_table_replace(resources,crm_strdup(rsc->id), crm_strdup(op_id)); call_id = rsc->ops->perform_op(rsc, op); if(call_id <= 0) { crm_err("Operation %s on %s failed: %d",operation,rid,call_id); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } else if(op->interval > 0) { struct recurring_op_s *op = NULL; crm_malloc0(op, sizeof(struct recurring_op_s)); crm_debug_2("Adding recurring %s op for %s (%d)", op_id, rsc->id, call_id); op->call_id = call_id; op->rsc_id = crm_strdup(rsc->id); g_hash_table_insert(monitors, op_id, op); op_id = NULL; } else { /* record all non-recurring operations so we can wait * for them to complete during shutdown */ char *call_id_s = make_stop_id(rsc->id, call_id); g_hash_table_replace( shutdown_ops, call_id_s, crm_strdup(rsc->id)); crm_debug_2("Recording pending op: %s/%s %s", rsc->id, operation, call_id_s); } crm_free(op_id); free_lrm_op(op); return I_NULL; } void stop_recurring_action(gpointer key, gpointer value, gpointer user_data) { lrm_rsc_t *rsc = user_data; struct recurring_op_s *op = (struct recurring_op_s*)value; if(safe_str_eq(op->rsc_id, rsc->id)) { if(op->call_id > 0) { crm_debug("Stopping recurring op %d for %s (%s)", op->call_id, rsc->id, (char*)key); rsc->ops->cancel_op(rsc, op->call_id); } else { crm_err("Invalid call_id %d for %s", op->call_id, rsc->id); /* TODO: we probably need to look up the LRM to find it */ } } } gboolean remove_recurring_action(gpointer key, gpointer value, gpointer user_data) { lrm_rsc_t *rsc = user_data; struct recurring_op_s *op = (struct recurring_op_s*)value; if(safe_str_eq(op->rsc_id, rsc->id)) { return TRUE; } return FALSE; } void free_recurring_op(gpointer value) { struct recurring_op_s *op = (struct recurring_op_s*)value; crm_free(op->rsc_id); crm_free(op); } void free_lrm_op(lrm_op_t *op) { g_hash_table_destroy(op->params); crm_free(op->user_data); crm_free(op->output); crm_free(op->rsc_id); crm_free(op->op_type); crm_free(op->app_name); crm_free(op); } static void dup_attr(gpointer key, gpointer value, gpointer user_data) { g_hash_table_replace(user_data, crm_strdup(key), crm_strdup(value)); } lrm_op_t * copy_lrm_op(const lrm_op_t *op) { lrm_op_t *op_copy = NULL; CRM_DEV_ASSERT(op != NULL); if(crm_assert_failed) { return NULL; } CRM_ASSERT(op->rsc_id != NULL); crm_malloc0(op_copy, sizeof(lrm_op_t)); op_copy->op_type = crm_strdup(op->op_type); /* input fields */ op_copy->params = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); if(op->params != NULL) { g_hash_table_foreach(op->params, dup_attr, op_copy->params); } op_copy->timeout = op->timeout; op_copy->interval = op->interval; op_copy->target_rc = op->target_rc; /* in the CRM, this is always a string */ if(op->user_data != NULL) { op_copy->user_data = crm_strdup(op->user_data); } /* output fields */ op_copy->op_status = op->op_status; op_copy->rc = op->rc; op_copy->call_id = op->call_id; op_copy->output = NULL; op_copy->rsc_id = crm_strdup(op->rsc_id); if(op->app_name != NULL) { op_copy->app_name = crm_strdup(op->app_name); } if(op->output != NULL) { op_copy->output = crm_strdup(op->output); } return op_copy; } lrm_rsc_t * copy_lrm_rsc(const lrm_rsc_t *rsc) { lrm_rsc_t *rsc_copy = NULL; if(rsc == NULL) { return NULL; } crm_malloc0(rsc_copy, sizeof(lrm_rsc_t)); rsc_copy->id = crm_strdup(rsc->id); rsc_copy->type = crm_strdup(rsc->type); rsc_copy->class = NULL; rsc_copy->provider = NULL; if(rsc->class != NULL) { rsc_copy->class = crm_strdup(rsc->class); } if(rsc->provider != NULL) { rsc_copy->provider = crm_strdup(rsc->provider); } /* GHashTable* params; */ rsc_copy->params = NULL; rsc_copy->ops = NULL; return rsc_copy; } +static void +cib_rsc_callback(const HA_Message *msg, int call_id, int rc, + crm_data_t *output, void *user_data) +{ + if(rc != cib_ok) { + crm_err("Resource update %d failed: %s", + call_id, cib_error2string(rc)); + } else { + crm_debug("Resource update %d complete", call_id); + } +} + + void do_update_resource(lrm_op_t* op) { /* */ int rc = cib_ok; crm_data_t *update, *iter; - CRM_DEV_ASSERT(op != NULL); - if(crm_assert_failed) { - return; - } + CRM_CHECK(op != NULL, return); update = create_node_state( fsa_our_uname, NULL, NULL, NULL, NULL, NULL, FALSE, __FUNCTION__); iter = create_xml_node(update, XML_CIB_TAG_LRM); crm_xml_add(iter, XML_ATTR_ID, fsa_our_uuid); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE); crm_xml_add(iter, XML_ATTR_ID, op->rsc_id); if(op->interval == 0) { lrm_rsc_t *rsc = NULL; crm_debug_2("Updating %s resource definitions after %s op", op->rsc_id, op->op_type); rsc = fsa_lrm_conn->lrm_ops->get_rsc(fsa_lrm_conn, op->rsc_id); crm_xml_add(iter, XML_ATTR_TYPE, rsc->type); crm_xml_add(iter, XML_AGENT_ATTR_CLASS, rsc->class); crm_xml_add(iter, XML_AGENT_ATTR_PROVIDER,rsc->provider); lrm_free_rsc(rsc); } build_operation_update(iter, op, __FUNCTION__, 0); /* make it an asyncronous call and be done with it * * Best case: * the resource state will be discovered during * the next signup or election. * * Bad case: * we are shutting down and there is no DC at the time, * but then why were we shutting down then anyway? * (probably because of an internal error) * * Worst case: * we get shot for having resources "running" when the really weren't * * the alternative however means blocking here for too long, which * isnt acceptable */ fsa_cib_update(XML_CIB_TAG_STATUS, update, cib_quorum_override, rc); if(rc > 0) { /* the return code is a call number, not an error code */ - crm_debug_3("Sent resource state update message: %d", rc); + crm_debug("Sent resource state update message: %d", rc); + add_cib_op_callback(rc, FALSE, NULL, cib_rsc_callback); } else { crm_err("Resource state update failed: %s", cib_error2string(rc)); CRM_DEV_ASSERT(rc == cib_ok); } free_xml(update); } enum crmd_fsa_input do_lrm_event(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data) { lrm_op_t* op = NULL; const char *last_op = NULL; const char *probe_s = NULL; gboolean is_probe = FALSE; int log_rsc_err = LOG_WARNING; if(msg_data->fsa_cause != C_LRM_OP_CALLBACK) { register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); return I_NULL; } op = fsa_typed_data(fsa_dt_lrm); CRM_DEV_ASSERT(op != NULL); CRM_DEV_ASSERT(op != NULL && op->rsc_id != NULL); if(crm_assert_failed) { return I_NULL; } probe_s = g_hash_table_lookup(op->params, XML_ATTR_LRM_PROBE); is_probe = crm_is_true(probe_s); switch(op->op_status) { case LRM_OP_PENDING: /* this really shouldnt happen */ crm_err("LRM operation (%d) %s_%d on %s %s: %s", op->call_id, op->op_type, op->interval, crm_str(op->rsc_id), op_status2text(op->op_status), execra_code2string(op->rc)); break; case LRM_OP_ERROR: if(is_probe) { log_rsc_err = LOG_INFO; } crm_log_maybe(log_rsc_err, "LRM operation (%d) %s_%d on %s %s: (%d) %s", op->call_id, op->op_type, op->interval, crm_str(op->rsc_id), op_status2text(op->op_status), op->rc, execra_code2string(op->rc)); if(op->output != NULL) { crm_debug("Result: %s", op->output); } break; case LRM_OP_CANCELLED: crm_warn("LRM operation (%d) %s_%d on %s %s", op->call_id, op->op_type, op->interval, crm_str(op->rsc_id), op_status2text(op->op_status)); return I_NULL; break; case LRM_OP_TIMEOUT: last_op = g_hash_table_lookup( resources_confirmed, crm_strdup(op->rsc_id)); if(safe_str_eq(last_op, CRMD_ACTION_STOP) && safe_str_eq(op->op_type, CRMD_ACTION_MON)) { crm_err("LRM sent a timed out %s operation" " _after_ a confirmed stop", op->op_type); return I_NULL; } crm_err("LRM operation (%d) %s_%d on %s %s", op->call_id, op->op_type, op->interval, crm_str(op->rsc_id), op_status2text(op->op_status)); break; case LRM_OP_NOTSUPPORTED: crm_err("LRM operation (%d) %s_%d on %s %s", op->call_id, op->op_type, op->interval, crm_str(op->rsc_id), op_status2text(op->op_status)); break; case LRM_OP_DONE: crm_info("LRM operation (%d) %s_%d on %s %s", op->call_id, op->op_type, op->interval, crm_str(op->rsc_id), op_status2text(op->op_status)); break; } g_hash_table_replace(resources_confirmed, crm_strdup(op->rsc_id), crm_strdup(op->op_type)); do_update_resource(op); if(g_hash_table_size(shutdown_ops) > 0) { char *op_id = make_stop_id(op->rsc_id, op->call_id); if(g_hash_table_remove(shutdown_ops, op_id)) { crm_debug_2("Op %d (%s %s) confirmed", op->call_id, op->op_type, op->rsc_id); } else if(op->interval == 0) { crm_err("Op %d (%s %s) not matched: %s", op->call_id, op->op_type, op->rsc_id, op_id); } crm_free(op_id); } if(is_set(fsa_input_register, R_SENT_RSC_STOP)) { if(g_hash_table_size(shutdown_ops) == 0) { register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL); } else { crm_info("Still waiting for %d pending stop operations" " to complete before exiting", g_hash_table_size(shutdown_ops)); g_hash_table_foreach( shutdown_ops, ghash_print_pending, NULL); } } return I_NULL; } char * make_stop_id(const char *rsc, int call_id) { char *op_id = NULL; crm_malloc0(op_id, strlen(rsc) + 34); if(op_id != NULL) { snprintf(op_id, strlen(rsc) + 34, "%s:%d", rsc, call_id); } return op_id; } void ghash_print_pending(gpointer key, gpointer value, gpointer user_data) { const char *uname = key; crm_debug("Pending action: %s", uname); } gboolean resource_stopped(gpointer key, gpointer value, gpointer user_data) { const char *this_rsc = value; const char *target_rsc = user_data; if(safe_str_eq(this_rsc, target_rsc)) { return TRUE; } return FALSE; } diff --git a/crm/pengine/pengine.c b/crm/pengine/pengine.c index 9c0542a55e..481a231667 100755 --- a/crm/pengine/pengine.c +++ b/crm/pengine/pengine.c @@ -1,388 +1,389 @@ -/* $Id: pengine.c,v 1.108 2006/04/04 17:15:43 andrew Exp $ */ +/* $Id: pengine.c,v 1.109 2006/04/21 07:08:04 andrew Exp $ */ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include crm_data_t * do_calculations( pe_working_set_t *data_set, crm_data_t *xml_input, ha_time_t *now); gboolean was_processing_error = FALSE; gboolean was_processing_warning = FALSE; gboolean was_config_error = FALSE; gboolean was_config_warning = FALSE; unsigned int pengine_input_loglevel = LOG_INFO; #define PE_WORKING_DIR HA_VARLIBDIR"/heartbeat/pengine" extern int transition_id; #define get_series() was_processing_error?1:was_processing_warning?2:3 typedef struct series_s { int id; const char *name; const char *param; int wrap; } series_t; series_t series[] = { { 0, "pe-unknown", "_dont_match_anything_", -1 }, { 0, "pe-error", "pe-error-series-max", -1 }, { 0, "pe-warn", "pe-warn-series-max", 200 }, { 0, "pe-input", "pe-input-series-max", 400 }, }; gboolean process_pe_message(HA_Message *msg, crm_data_t * xml_data, IPC_Channel *sender) { const char *sys_to = cl_get_string(msg, F_CRM_SYS_TO); const char *op = cl_get_string(msg, F_CRM_TASK); const char *ref = cl_get_string(msg, XML_ATTR_REFERENCE); crm_debug_3("Processing %s op (ref=%s)...", op, ref); if(op == NULL){ /* error */ } else if(strcmp(op, CRM_OP_HELLO) == 0) { /* ignore */ } else if(safe_str_eq(cl_get_string(msg, F_CRM_MSG_TYPE), XML_ATTR_RESPONSE)) { /* ignore */ } else if(sys_to == NULL || strcmp(sys_to, CRM_SYSTEM_PENGINE) != 0) { crm_debug_3("Bad sys-to %s", crm_str(sys_to)); return FALSE; } else if(strcmp(op, CRM_OP_PECALC) == 0) { int seq = -1; int series_id = 0; int series_wrap = 0; char *filename = NULL; const char *value = NULL; pe_working_set_t data_set; crm_data_t *generation = create_xml_node(NULL, XML_TAG_CIB); crm_data_t *log_input = copy_xml(xml_data); #if HAVE_BZLIB_H gboolean compress = TRUE; #else gboolean compress = FALSE; #endif copy_in_properties(generation, xml_data); crm_log_xml_info(generation, "[generation]"); was_processing_error = FALSE; was_processing_warning = FALSE; crm_zero_mem_stats(NULL); do_calculations(&data_set, xml_data, NULL); crm_log_xml_debug_3(data_set.graph, "[out]"); if(send_ipc_reply(sender, msg, data_set.graph) == FALSE) { crm_err("Answer could not be sent"); } series_id = get_series(); series_wrap = series[series_id].wrap; value = g_hash_table_lookup( data_set.config_hash, series[series_id].param); if(value != NULL) { series_wrap = crm_int_helper(value, NULL); if(errno != 0) { series_wrap = series[series_id].wrap; } } else { pe_config_warn("No value specified for cluster" " preference: %s", series[series_id].param); } data_set.input = NULL; cleanup_calculations(&data_set); if(is_ipc_empty(sender) && crm_mem_stats(NULL)) { pe_warn("Unfree'd memory"); } seq = get_last_sequence(PE_WORKING_DIR, series[series_id].name); filename = generate_series_filename( PE_WORKING_DIR, series[series_id].name, seq, compress); write_xml_file(log_input, filename, compress); write_last_sequence(PE_WORKING_DIR, series[series_id].name, seq+1, series_wrap); if(was_processing_error) { crm_err("Transition %d:" " ERRORs found during PE processing." - " Input stored in: %s", + " PEngine Input stored in: %s", transition_id, filename); } else if(was_processing_warning) { crm_warn("Transition %d:" " WARNINGs found during PE processing." - " Input stored in: %s", + " PEngine Input stored in: %s", transition_id, filename); } else { - crm_info("PEngine Input stored in: %s", filename); + crm_info("Transition %d: PEngine Input stored in: %s", + transition_id, filename); } if(was_config_error) { crm_info("Configuration ERRORs found during PE processing." " Please run \"crm_verify -L\" to identify issues."); } else if(was_processing_warning) { crm_info("Configuration WARNINGs found during PE processing." " Please run \"crm_verify -L\" to identify issues."); } free_xml(generation); free_xml(log_input); crm_free(filename); } else if(strcmp(op, CRM_OP_QUIT) == 0) { crm_warn("Received quit message, terminating"); exit(0); } return TRUE; } #define MEMCHECK_STAGE_0 0 #define check_and_exit(stage) cleanup_calculations(data_set); \ crm_mem_stats(NULL); \ crm_err("Exiting: stage %d", stage); \ exit(1); crm_data_t * do_calculations(pe_working_set_t *data_set, crm_data_t *xml_input, ha_time_t *now) { int rsc_log_level = LOG_INFO; /* pe_debug_on(); */ set_working_set_defaults(data_set); data_set->input = xml_input; data_set->now = now; if(data_set->now == NULL) { data_set->now = new_ha_date(TRUE); } #if MEMCHECK_STAGE_SETUP check_and_exit(-1); #endif crm_debug_5("unpack"); stage0(data_set); #if MEMCHECK_STAGE_0 check_and_exit(0); #endif slist_iter(rsc, resource_t, data_set->resources, lpc, rsc->fns->print(rsc, NULL, pe_print_log, &rsc_log_level); ); crm_debug_5("apply placement constraints"); stage1(data_set); #if MEMCHECK_STAGE_1 check_and_exit(1); #endif crm_debug_5("color resources"); stage2(data_set); #if MEMCHECK_STAGE_2 check_and_exit(2); #endif /* unused */ stage3(data_set); #if MEMCHECK_STAGE_3 check_and_exit(3); #endif crm_debug_5("assign nodes to colors"); stage4(data_set); #if MEMCHECK_STAGE_4 check_and_exit(4); #endif crm_debug_5("creating actions and internal ording constraints"); stage5(data_set); #if MEMCHECK_STAGE_5 check_and_exit(5); #endif crm_debug_5("processing fencing and shutdown cases"); stage6(data_set); #if MEMCHECK_STAGE_6 check_and_exit(6); #endif crm_debug_5("applying ordering constraints"); stage7(data_set); #if MEMCHECK_STAGE_7 check_and_exit(7); #endif crm_debug_5("creating transition graph"); stage8(data_set); #if MEMCHECK_STAGE_8 check_and_exit(8); #endif crm_debug_2("=#=#=#=#= Summary =#=#=#=#="); crm_debug_2("========= All Actions ========="); slist_iter(action, action_t, data_set->actions, lpc, log_action(LOG_DEBUG_2, "\t", action, TRUE) ); crm_debug_2("\t========= Set %d (Un-runnable) =========", -1); crm_action_debug_2( slist_iter(action, action_t, data_set->actions, lpc, if(action->optional == FALSE && action->runnable == FALSE && action->pseudo == FALSE) { log_action(LOG_DEBUG_2, "\t", action, TRUE); } ) ); return data_set->graph; } void cleanup_calculations(pe_working_set_t *data_set) { GListPtr iterator = NULL; if(data_set == NULL) { return; } if(data_set->config_hash != NULL) { g_hash_table_destroy(data_set->config_hash); } crm_free(data_set->dc_uuid); crm_free(data_set->transition_idle_timeout); crm_debug_3("deleting order cons"); pe_free_ordering(data_set->ordering_constraints); crm_debug_3("deleting actions"); pe_free_actions(data_set->actions); crm_debug_3("deleting resources"); pe_free_resources(data_set->resources); crm_debug_3("deleting nodes"); pe_free_nodes(data_set->nodes); crm_debug_3("deleting colors"); pe_free_colors(data_set->colors); crm_debug_3("deleting node cons"); iterator = data_set->placement_constraints; while(iterator) { pe_free_rsc_to_node(iterator->data); iterator = iterator->next; } if(data_set->placement_constraints != NULL) { g_list_free(data_set->placement_constraints); } free_xml(data_set->graph); free_ha_date(data_set->now); free_xml(data_set->input); data_set->stonith_action = NULL; } void set_working_set_defaults(pe_working_set_t *data_set) { data_set->input = NULL; data_set->now = NULL; data_set->graph = NULL; data_set->transition_idle_timeout = crm_strdup("60s"); data_set->dc_uuid = NULL; data_set->dc_node = NULL; data_set->have_quorum = FALSE; data_set->stonith_enabled = FALSE; data_set->stonith_action = NULL; data_set->symmetric_cluster = TRUE; data_set->is_managed_default = TRUE; data_set->no_quorum_policy = no_quorum_freeze; data_set->remove_after_stop = FALSE; data_set->stop_action_orphans = TRUE; data_set->stop_rsc_orphans = FALSE; data_set->short_rsc_names = FALSE; data_set->config_hash = NULL; data_set->nodes = NULL; data_set->resources = NULL; data_set->ordering_constraints = NULL; data_set->placement_constraints = NULL; data_set->no_color = NULL; data_set->colors = NULL; data_set->actions = NULL; data_set->num_synapse = 0; data_set->max_valid_nodes = 0; data_set->order_id = 1; data_set->action_id = 1; data_set->color_id = 0; data_set->default_resource_stickiness = 0; data_set->default_resource_fail_stickiness = 0; } diff --git a/crm/pengine/utils.c b/crm/pengine/utils.c index ceb04a2b9f..b00d94723a 100644 --- a/crm/pengine/utils.c +++ b/crm/pengine/utils.c @@ -1,1892 +1,1891 @@ -/* $Id: utils.c,v 1.130 2006/04/18 11:15:37 andrew Exp $ */ +/* $Id: utils.c,v 1.131 2006/04/21 07:08:04 andrew Exp $ */ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include void print_str_str(gpointer key, gpointer value, gpointer user_data); gboolean ghash_free_str_str(gpointer key, gpointer value, gpointer user_data); void unpack_operation( action_t *action, crm_data_t *xml_obj, pe_working_set_t* data_set); /* only for rsc_colocation constraints */ rsc_colocation_t * invert_constraint(rsc_colocation_t *constraint) { rsc_colocation_t *inverted_con = NULL; crm_debug_3("Inverting constraint"); if(constraint == NULL) { pe_err("Cannot invert NULL constraint"); return NULL; } crm_malloc0(inverted_con, sizeof(rsc_colocation_t)); if(inverted_con == NULL) { return NULL; } inverted_con->id = constraint->id; inverted_con->strength = constraint->strength; /* swap the direction */ inverted_con->rsc_lh = constraint->rsc_rh; inverted_con->rsc_rh = constraint->rsc_lh; inverted_con->state_lh = constraint->state_rh; inverted_con->state_rh = constraint->state_lh; crm_action_debug_3( print_rsc_colocation("Inverted constraint", inverted_con, FALSE)); return inverted_con; } /* are the contents of list1 and list2 equal * nodes with weight < 0 are ignored if filter == TRUE * * slow but linear * */ gboolean node_list_eq(GListPtr list1, GListPtr list2, gboolean filter) { node_t *other_node; GListPtr lhs = list1; GListPtr rhs = list2; slist_iter( node, node_t, lhs, lpc, if(node == NULL || (filter && node->weight < 0)) { continue; } other_node = (node_t*) pe_find_node_id(rhs, node->details->id); if(other_node == NULL || other_node->weight < 0) { return FALSE; } ); lhs = list2; rhs = list1; slist_iter( node, node_t, lhs, lpc, if(node == NULL || (filter && node->weight < 0)) { continue; } other_node = (node_t*) pe_find_node_id(rhs, node->details->id); if(other_node == NULL || other_node->weight < 0) { return FALSE; } ); return TRUE; } /* the intersection of list1 and list2 */ GListPtr node_list_and(GListPtr list1, GListPtr list2, gboolean filter) { GListPtr result = NULL; unsigned lpc = 0; for(lpc = 0; lpc < g_list_length(list1); lpc++) { node_t *node = (node_t*)g_list_nth_data(list1, lpc); node_t *other_node = pe_find_node_id(list2, node->details->id); node_t *new_node = NULL; if(other_node != NULL) { new_node = node_copy(node); crm_debug_3("Copied node %s: %d", new_node->details->uname, new_node->weight); } if(new_node != NULL) { new_node->weight = merge_weights( new_node->weight, other_node->weight); crm_debug_3("New node weight for %s: %d", new_node->details->uname, new_node->weight); if(filter && new_node->weight < 0) { crm_free(new_node); new_node = NULL; } } if(new_node != NULL) { result = g_list_append(result, new_node); } } return result; } /* list1 - list2 */ GListPtr node_list_minus(GListPtr list1, GListPtr list2, gboolean filter) { GListPtr result = NULL; slist_iter( node, node_t, list1, lpc, node_t *other_node = pe_find_node_id(list2, node->details->id); node_t *new_node = NULL; if(node == NULL || other_node != NULL || (filter && node->weight < 0)) { continue; } new_node = node_copy(node); result = g_list_append(result, new_node); ); crm_debug_3("Minus result len: %d", g_list_length(result)); return result; } /* list1 + list2 - (intersection of list1 and list2) */ GListPtr node_list_xor(GListPtr list1, GListPtr list2, gboolean filter) { GListPtr result = NULL; slist_iter( node, node_t, list1, lpc, node_t *new_node = NULL; node_t *other_node = (node_t*) pe_find_node_id(list2, node->details->id); if(node == NULL || other_node != NULL || (filter && node->weight < 0)) { continue; } new_node = node_copy(node); result = g_list_append(result, new_node); ); slist_iter( node, node_t, list2, lpc, node_t *new_node = NULL; node_t *other_node = (node_t*) pe_find_node_id(list1, node->details->id); if(node == NULL || other_node != NULL || (filter && node->weight < 0)) { continue; } new_node = node_copy(node); result = g_list_append(result, new_node); ); crm_debug_3("Xor result len: %d", g_list_length(result)); return result; } GListPtr node_list_or(GListPtr list1, GListPtr list2, gboolean filter) { node_t *other_node = NULL; GListPtr result = NULL; gboolean needs_filter = FALSE; result = node_list_dup(list1, filter); slist_iter( node, node_t, list2, lpc, if(node == NULL) { continue; } other_node = (node_t*)pe_find_node_id( result, node->details->id); if(other_node != NULL) { other_node->weight = merge_weights( other_node->weight, node->weight); if(filter && node->weight < 0) { needs_filter = TRUE; } } else if(filter == FALSE || node->weight >= 0) { node_t *new_node = node_copy(node); result = g_list_append(result, new_node); } ); /* not the neatest way, but the most expedient for now */ if(filter && needs_filter) { GListPtr old_result = result; result = node_list_dup(old_result, filter); pe_free_shallow_adv(old_result, TRUE); } return result; } GListPtr node_list_dup(GListPtr list1, gboolean filter) { GListPtr result = NULL; slist_iter( this_node, node_t, list1, lpc, node_t *new_node = NULL; if(filter && this_node->weight < 0) { continue; } new_node = node_copy(this_node); if(new_node != NULL) { result = g_list_append(result, new_node); } ); return result; } node_t * node_copy(node_t *this_node) { node_t *new_node = NULL; CRM_CHECK(this_node != NULL, return NULL); crm_malloc0(new_node, sizeof(node_t)); CRM_CHECK(new_node != NULL, return NULL); crm_debug_5("Copying %p (%s) to %p", this_node, this_node->details->uname, new_node); new_node->weight = this_node->weight; new_node->fixed = this_node->fixed; new_node->details = this_node->details; return new_node; } /* * Create a new color with the contents of "nodes" as the list of * possible nodes that resources with this color can be run on. * * Typically, when creating a color you will provide the node list from * the resource you will first assign the color to. * * If "colors" != NULL, it will be added to that list * If "resources" != NULL, it will be added to every provisional resource * in that list */ color_t * create_color( pe_working_set_t *data_set, resource_t *resource, GListPtr node_list) { color_t *new_color = NULL; crm_debug_5("Creating color"); crm_malloc0(new_color, sizeof(color_t)); if(new_color == NULL) { return NULL; } new_color->id = data_set->color_id++; new_color->local_weight = 1.0; crm_debug_5("Creating color details"); crm_malloc0(new_color->details, sizeof(struct color_shared_s)); if(new_color->details == NULL) { crm_free(new_color); return NULL; } new_color->details->id = new_color->id; new_color->details->highest_priority = -1; new_color->details->chosen_node = NULL; new_color->details->candidate_nodes = NULL; new_color->details->allocated_resources = NULL; new_color->details->pending = TRUE; if(resource != NULL) { crm_debug_5("populating node list"); new_color->details->highest_priority = resource->priority; new_color->details->candidate_nodes = node_list_dup(node_list, TRUE); } crm_action_debug_3(print_color("Created color", new_color, TRUE)); CRM_CHECK(data_set != NULL, return NULL); data_set->colors = g_list_append(data_set->colors, new_color); return new_color; } color_t * copy_color(color_t *a_color) { color_t *color_copy = NULL; if(a_color == NULL) { pe_err("Cannot copy NULL"); return NULL; } crm_malloc0(color_copy, sizeof(color_t)); if(color_copy != NULL) { color_copy->id = a_color->id; color_copy->details = a_color->details; color_copy->local_weight = 1.0; } return color_copy; } resource_t * pe_find_resource(GListPtr rsc_list, const char *id) { unsigned lpc = 0; resource_t *rsc = NULL; resource_t *child_rsc = NULL; crm_debug_4("Looking for %s in %d objects", id, g_list_length(rsc_list)); for(lpc = 0; lpc < g_list_length(rsc_list); lpc++) { rsc = g_list_nth_data(rsc_list, lpc); if(rsc == NULL) { } else if(safe_str_eq(rsc->id, id)){ crm_debug_4("Found a match for %s", id); return rsc; #if 0 } else if(data_set->short_rsc_names == FALSE && safe_str_eq(rsc->graph_name, id)) { #else } else if(safe_str_eq(rsc->graph_name, id)) { #endif crm_debug_3("Found a match for %s", id); return rsc; } } for(lpc = 0; lpc < g_list_length(rsc_list); lpc++) { rsc = g_list_nth_data(rsc_list, lpc); child_rsc = rsc->fns->find_child(rsc, id); if(child_rsc != NULL) { crm_debug_4("Found a match for %s in %s", id, rsc->id); return child_rsc; } } - /* error */ - crm_debug("No match for %s", id); + crm_debug_2("No match for %s", id); return NULL; } node_t * pe_find_node_id(GListPtr nodes, const char *id) { unsigned lpc = 0; node_t *node = NULL; for(lpc = 0; lpc < g_list_length(nodes); lpc++) { node = g_list_nth_data(nodes, lpc); if(safe_str_eq(node->details->id, id)) { return node; } } /* error */ return NULL; } gint gslist_color_compare(gconstpointer a, gconstpointer b); color_t * find_color(GListPtr candidate_colors, color_t *other_color) { GListPtr tmp = g_list_find_custom(candidate_colors, other_color, gslist_color_compare); if(tmp != NULL) { return (color_t *)tmp->data; } return NULL; } gint gslist_color_compare(gconstpointer a, gconstpointer b) { const color_t *color_a = (const color_t*)a; const color_t *color_b = (const color_t*)b; /* crm_debug_5("%d vs. %d", a?color_a->id:-2, b?color_b->id:-2); */ if(a == b) { return 0; } else if(a == NULL || b == NULL) { return 1; } else if(color_a->id == color_b->id) { return 0; } return 1; } gint sort_rsc_priority(gconstpointer a, gconstpointer b) { const resource_t *resource1 = (const resource_t*)a; const resource_t *resource2 = (const resource_t*)b; if(a == NULL && b == NULL) { return 0; } if(a == NULL) { return 1; } if(b == NULL) { return -1; } if(resource1->priority > resource2->priority) { return -1; } if(resource1->priority < resource2->priority) { return 1; } return 0; } gint sort_rsc_node_weight(gconstpointer a, gconstpointer b) { const resource_t *resource1 = (const resource_t*)a; const resource_t *resource2 = (const resource_t*)b; const color_t *color1 = NULL; const color_t *color2 = NULL; const node_t *node1 = NULL; const node_t *node2 = NULL; CRM_ASSERT(resource1 != NULL); CRM_ASSERT(resource2 != NULL); color1 = resource1->color; color2 = resource2->color; CRM_CHECK(color1 != NULL, return 0); CRM_CHECK(color2 != NULL, return 0); node1 = color1->details->chosen_node; node2 = color2->details->chosen_node; if(node1 == NULL && node2 == NULL) { return 0; } if(node1 == NULL) { return 1; } if(node2 == NULL) { return -1; } CRM_ASSERT(node1 != NULL); CRM_ASSERT(node2 != NULL); if(node1->weight > node2->weight) { crm_debug("%s (%d) > %s (%d) : %s vs. %s", node1->details->id, node1->weight, node2->details->id, node2->weight, resource1->id, resource2->id); return -1; } if(node1->weight < node2->weight) { crm_debug("%s (%d) < %s (%d) : %s vs. %s", node1->details->id, node1->weight, node2->details->id, node2->weight, resource1->id, resource2->id); return 1; } crm_debug("%s (%d) == %s (%d) : %s vs. %s", node1->details->id, node1->weight, node2->details->id, node2->weight, resource1->id, resource2->id); return 0; } /* lowest to highest */ gint sort_action_id(gconstpointer a, gconstpointer b) { const action_wrapper_t *action_wrapper2 = (const action_wrapper_t*)a; const action_wrapper_t *action_wrapper1 = (const action_wrapper_t*)b; if(a == NULL) { return 1; } if(b == NULL) { return -1; } if(action_wrapper1->action->id > action_wrapper2->action->id) { return -1; } if(action_wrapper1->action->id < action_wrapper2->action->id) { return 1; } return 0; } gint sort_cons_strength(gconstpointer a, gconstpointer b) { const rsc_colocation_t *rsc_constraint1 = (const rsc_colocation_t*)a; const rsc_colocation_t *rsc_constraint2 = (const rsc_colocation_t*)b; if(a == NULL) { return 1; } if(b == NULL) { return -1; } if(rsc_constraint1->strength > rsc_constraint2->strength) { return 1; } if(rsc_constraint1->strength < rsc_constraint2->strength) { return -1; } return 0; } gint sort_color_weight(gconstpointer a, gconstpointer b) { const color_t *color1 = (const color_t*)a; const color_t *color2 = (const color_t*)b; if(a == NULL) { return 1; } if(b == NULL) { return -1; } if(color1->local_weight > color2->local_weight) { return -1; } if(color1->local_weight < color2->local_weight) { return 1; } return 0; } /* return -1 if 'a' is more preferred * return 1 if 'b' is more preferred */ gint sort_node_weight(gconstpointer a, gconstpointer b) { const node_t *node1 = (const node_t*)a; const node_t *node2 = (const node_t*)b; int node1_weight = 0; int node2_weight = 0; if(a == NULL) { return 1; } if(b == NULL) { return -1; } node1_weight = node1->weight; node2_weight = node2->weight; if(node1->details->unclean || node1->details->shutdown) { node1_weight = -INFINITY; } if(node2->details->unclean || node2->details->shutdown) { node2_weight = -INFINITY; } if(node1_weight > node2_weight) { crm_debug_2("%s (%d) > %s (%d) : weight", node1->details->uname, node1_weight, node2->details->uname, node2_weight); return -1; } if(node1_weight < node2_weight) { crm_debug_2("%s (%d) < %s (%d) : weight", node1->details->uname, node1_weight, node2->details->uname, node2_weight); return 1; } crm_debug_3("%s (%d) == %s (%d) : weight", node1->details->uname, node1_weight, node2->details->uname, node2_weight); /* now try to balance resources across the cluster */ if(node1->details->num_resources < node2->details->num_resources) { crm_debug_2("%s (%d) < %s (%d) : resources", node1->details->uname, node1->details->num_resources, node2->details->uname, node2->details->num_resources); return -1; } else if(node1->details->num_resources > node2->details->num_resources) { crm_debug_2("%s (%d) > %s (%d) : resources", node1->details->uname, node1->details->num_resources, node2->details->uname, node2->details->num_resources); return 1; } crm_debug_4("%s = %s", node1->details->uname, node2->details->uname); return 0; } action_t * custom_action(resource_t *rsc, char *key, const char *task, node_t *on_node, gboolean optional, gboolean save_action, pe_working_set_t *data_set) { action_t *action = NULL; GListPtr possible_matches = NULL; CRM_CHECK(key != NULL, return NULL); CRM_CHECK(task != NULL, return NULL); if(save_action && rsc != NULL) { possible_matches = find_actions(rsc->actions, key, on_node); } if(possible_matches != NULL) { crm_free(key); if(g_list_length(possible_matches) > 1) { pe_warn("Action %s for %s on %s exists %d times", task, rsc?rsc->id:"", on_node?on_node->details->uname:"", g_list_length(possible_matches)); } action = g_list_nth_data(possible_matches, 0); crm_debug_4("Found existing action (%d) %s for %s on %s", action->id, task, rsc?rsc->id:"", on_node?on_node->details->uname:""); } if(action == NULL) { crm_debug_2("Creating action %d: %s for %s on %s", data_set->action_id, task, rsc?rsc->id:"", on_node?on_node->details->uname:""); crm_malloc0(action, sizeof(action_t)); if(action != NULL) { if(save_action) { action->id = data_set->action_id++; } else { action->id = 0; } action->rsc = rsc; action->task = task; action->node = on_node; action->actions_before = NULL; action->actions_after = NULL; action->failure_is_fatal = TRUE; action->pseudo = FALSE; action->dumped = FALSE; action->runnable = TRUE; action->processed = FALSE; action->optional = TRUE; action->seen_count = 0; action->extra = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); /* include our version number... * so future versions know what to be compatible * with when we're DC */ add_hash_param(action->extra, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); if(save_action) { data_set->actions = g_list_append( data_set->actions, action); } action->uuid = key; if(rsc != NULL) { action->op_entry = find_rsc_op_entry(rsc, key); unpack_operation( action, action->op_entry, data_set); if(save_action) { rsc->actions = g_list_append( rsc->actions, action); } } crm_debug_4("Action %d created", action->id); } } if(optional == FALSE && action->optional) { crm_debug_2("Action %d (%s) marked manditory", action->id, action->uuid); action->optional = FALSE; } if(rsc != NULL) { enum action_tasks a_task = text2task(action->task); if(action->node != NULL) { unpack_instance_attributes( action->op_entry, XML_TAG_ATTR_SETS, action->node, action->extra, NULL,0, data_set); } if(action->node == NULL) { action->runnable = FALSE; } else if(rsc->is_managed == FALSE) { crm_warn("Action %s %s is for %s (unmanaged)", action->uuid, task, rsc->id); action->optional = TRUE; /* action->runnable = FALSE; */ #if 0 } else if(action->node->details->unclean) { crm_warn("Action %s on %s is unrunnable (unclean)", action->uuid, action->node?action->node->details->uname:""); action->runnable = FALSE; #endif } else if(action->node->details->online == FALSE) { action->runnable = FALSE; crm_warn("Action %s on %s is unrunnable (offline)", action->uuid, action->node->details->uname); if(action->rsc->is_managed && save_action && a_task == stop_rsc) { crm_warn("Marking node %s unclean", action->node->details->uname); action->node->details->unclean = TRUE; } } else if(action->needs == rsc_req_nothing) { crm_debug_2("Action %s doesnt require anything", action->uuid); action->runnable = TRUE; #if 0 /* * No point checking this * - if we dont have quorum we cant stonith anyway */ } else if(action->needs == rsc_req_stonith) { crm_debug_2("Action %s requires only stonith", action->uuid); action->runnable = TRUE; #endif } else if(data_set->have_quorum == FALSE && data_set->no_quorum_policy == no_quorum_stop) { action->runnable = FALSE; crm_debug("%s\t%s %s (cancelled : quorum)", action->node->details->uname, action->task, rsc->id); } else if(data_set->have_quorum == FALSE && data_set->no_quorum_policy == no_quorum_freeze) { crm_debug_2("Check resource is already active"); if(rsc->fns->active(rsc, TRUE) == FALSE) { action->runnable = FALSE; crm_debug("%s\t%s %s (cancelled : quorum freeze)", action->node->details->uname, action->task, rsc->id); } } else { crm_debug_2("Action %s is runnable", action->uuid); action->runnable = TRUE; } if(save_action) { switch(a_task) { case stop_rsc: rsc->stopping = TRUE; break; case start_rsc: rsc->starting = FALSE; if(action->runnable) { rsc->starting = TRUE; } break; default: break; } } } return action; } void unpack_operation( action_t *action, crm_data_t *xml_obj, pe_working_set_t* data_set) { int lpc = 0; const char *value = NULL; const char *fields[] = { XML_LRM_ATTR_INTERVAL, "timeout", "start_delay", }; CRM_CHECK(action->rsc != NULL, return); if(xml_obj != NULL) { value = crm_element_value(xml_obj, "prereq"); } if(value == NULL && safe_str_eq(action->task, CRMD_ACTION_START)) { value = g_hash_table_lookup( action->rsc->parameters, "start_prereq"); } if(value == NULL && safe_str_neq(action->task, CRMD_ACTION_START)) { /* todo: integrate stop as an option? */ action->needs = rsc_req_nothing; value = "nothing (default)"; } else if(safe_str_eq(value, "nothing")) { action->needs = rsc_req_nothing; } else if(safe_str_eq(value, "quorum")) { action->needs = rsc_req_quorum; } else if(safe_str_eq(value, "fencing")) { action->needs = rsc_req_stonith; } else if(data_set->no_quorum_policy == no_quorum_ignore) { action->needs = rsc_req_nothing; value = "nothing (default)"; } else if(data_set->no_quorum_policy == no_quorum_freeze && data_set->stonith_enabled) { action->needs = rsc_req_stonith; value = "fencing (default)"; } else { action->needs = rsc_req_quorum; value = "quorum (default)"; } crm_debug_2("\tAction %s requires: %s", action->task, value); value = NULL; if(xml_obj != NULL) { value = crm_element_value(xml_obj, "on_fail"); } if(value == NULL && safe_str_eq(action->task, CRMD_ACTION_STOP)) { value = g_hash_table_lookup( action->rsc->parameters, "on_stopfail"); if(value != NULL) { #if CRM_DEPRECATED_SINCE_2_0_2 pe_config_err("The \"on_stopfail\" attribute used in" " %s has been deprecated since 2.0.2", action->rsc->id); #else pe_config_err("The \"on_stopfail\" attribute used in" " %s has been deprecated since 2.0.2" " and is now disabled", action->rsc->id); value = NULL; #endif pe_config_err("Please use specify the \"on_fail\"" " attribute on the \"stop\" operation" " instead"); } } if(value == NULL) { } else if(safe_str_eq(value, "block")) { action->on_fail = action_fail_block; } else if(safe_str_eq(value, "fence")) { action->on_fail = action_fail_fence; value = "node fencing"; } else if(safe_str_eq(value, "ignore")) { action->on_fail = action_fail_ignore; value = "ignore"; } else if(safe_str_eq(value, "migrate")) { action->on_fail = action_fail_migrate; value = "force migration"; } else if(safe_str_eq(value, "stop")) { action->fail_role = RSC_ROLE_STOPPED; value = "stop resource"; } else if(safe_str_eq(value, "restart") || safe_str_eq(value, "nothing")) { action->on_fail = action_fail_recover; value = "restart (and possibly migrate)"; } else { pe_err("Resource %s: Unknown failure type (%s)", action->rsc->id, value); value = NULL; } /* defaults */ if(value == NULL && safe_str_eq(action->task, CRMD_ACTION_STOP)) { if(data_set->stonith_enabled) { action->on_fail = action_fail_fence; value = "resource fence (default)"; } else { action->on_fail = action_fail_block; value = "resource block (default)"; } } else if(value == NULL) { action->on_fail = action_fail_recover; value = "restart (and possibly migrate) (default)"; } crm_debug_2("\t%s failure handling: %s", action->task, value); value = NULL; if(xml_obj != NULL) { value = crm_element_value(xml_obj, "role_after_failure"); } if(value != NULL && action->fail_role == RSC_ROLE_UNKNOWN) { action->fail_role = text2role(value); } /* defaults */ if(action->fail_role == RSC_ROLE_UNKNOWN) { if(safe_str_eq(action->task, CRMD_ACTION_PROMOTE)) { action->fail_role = RSC_ROLE_SLAVE; } else { action->fail_role = RSC_ROLE_STARTED; } } crm_debug_2("\t%s failure results in: %s", action->task, role2text(action->fail_role)); if(xml_obj == NULL) { return; } for(;lpc < DIMOF(fields); lpc++) { value = crm_element_value(xml_obj, fields[lpc]); if(value != NULL) { int tmp_i = crm_get_msec(value); char *tmp_ms = NULL; if(tmp_i < 0) { tmp_i = 0; } tmp_ms = crm_itoa(tmp_i); g_hash_table_insert( action->extra, crm_strdup(fields[lpc]), tmp_ms); } } /* if(safe_str_eq(native_data->agent->class, "stonith")) { */ /* if(rsc->start_needs == rsc_req_stonith) { */ /* pe_err("Stonith resources (eg. %s) cannot require" */ /* " fencing to start", rsc->id); */ /* } */ /* rsc->start_needs = rsc_req_quorum; */ /* } */ } crm_data_t * find_rsc_op_entry(resource_t *rsc, const char *key) { const char *name = NULL; const char *value = NULL; const char *interval = NULL; char *match_key = NULL; crm_data_t *op = NULL; xml_child_iter_filter( rsc->ops_xml, operation, "op", name = crm_element_value(operation, "name"); interval = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); value = crm_element_value(operation, "disabled"); if(crm_is_true(value)) { crm_debug_2("%s disabled", ID(operation)); continue; } match_key = generate_op_key( rsc->graph_name, name, crm_get_msec(interval)); crm_debug_2("Matching %s with %s", key, match_key); if(safe_str_eq(key, match_key)) { op = operation; } crm_free(match_key); if(op != NULL) { return op; } ); crm_debug_2("No match for %s", key); return op; } const char * fail2text(enum action_fail_response fail) { const char *result = ""; switch(fail) { case action_fail_ignore: result = "ignore"; break; case action_fail_block: result = "block"; break; case action_fail_recover: result = "recover"; break; case action_fail_migrate: result = "migrate"; break; case action_fail_fence: result = "fence"; break; } return result; } const char * strength2text(enum con_strength strength) { const char *result = ""; switch(strength) { case pecs_ignore: result = "ignore"; break; case pecs_must: result = XML_STRENGTH_VAL_MUST; break; case pecs_must_not: result = XML_STRENGTH_VAL_MUSTNOT; break; case pecs_startstop: result = "start/stop"; break; } return result; } const char * ordering_type2text(enum pe_ordering type) { const char *result = ""; switch(type) { case pe_ordering_manditory: result = "manditory"; break; case pe_ordering_restart: result = "restart"; break; case pe_ordering_recover: result = "recover"; break; case pe_ordering_optional: result = "optional"; break; case pe_ordering_postnotify: result = "post_notify"; break; } return result; } enum action_tasks text2task(const char *task) { if(safe_str_eq(task, CRMD_ACTION_STOP)) { return stop_rsc; } else if(safe_str_eq(task, CRMD_ACTION_STOPPED)) { return stopped_rsc; } else if(safe_str_eq(task, CRMD_ACTION_START)) { return start_rsc; } else if(safe_str_eq(task, CRMD_ACTION_STARTED)) { return started_rsc; } else if(safe_str_eq(task, CRM_OP_SHUTDOWN)) { return shutdown_crm; } else if(safe_str_eq(task, CRM_OP_FENCE)) { return stonith_node; } else if(safe_str_eq(task, CRMD_ACTION_MON)) { return monitor_rsc; } else if(safe_str_eq(task, CRMD_ACTION_NOTIFY)) { return action_notify; } else if(safe_str_eq(task, CRMD_ACTION_NOTIFIED)) { return action_notified; } else if(safe_str_eq(task, CRMD_ACTION_PROMOTE)) { return action_promote; } else if(safe_str_eq(task, CRMD_ACTION_DEMOTE)) { return action_demote; } else if(safe_str_eq(task, CRMD_ACTION_PROMOTED)) { return action_promoted; } else if(safe_str_eq(task, CRMD_ACTION_DEMOTED)) { return action_demoted; } else if(safe_str_eq(task, CRMD_ACTION_CANCEL)) { return no_action; } else if(safe_str_eq(task, CRMD_ACTION_DELETE)) { return no_action; } else if(safe_str_eq(task, CRMD_ACTION_STATUS)) { return no_action; } else if(safe_str_eq(task, CRM_OP_PROBED)) { return no_action; } else if(safe_str_eq(task, CRM_OP_LRM_REFRESH)) { return no_action; } pe_err("Unsupported action: %s", task); return no_action; } const char * task2text(enum action_tasks task) { const char *result = ""; switch(task) { case no_action: result = "no_action"; break; case stop_rsc: result = CRMD_ACTION_STOP; break; case stopped_rsc: result = CRMD_ACTION_STOPPED; break; case start_rsc: result = CRMD_ACTION_START; break; case started_rsc: result = CRMD_ACTION_STARTED; break; case shutdown_crm: result = CRM_OP_SHUTDOWN; break; case stonith_node: result = CRM_OP_FENCE; break; case monitor_rsc: result = CRMD_ACTION_MON; break; case action_notify: result = CRMD_ACTION_NOTIFY; break; case action_notified: result = CRMD_ACTION_NOTIFIED; break; case action_promote: result = CRMD_ACTION_PROMOTE; break; case action_promoted: result = CRMD_ACTION_PROMOTED; break; case action_demote: result = CRMD_ACTION_DEMOTE; break; case action_demoted: result = CRMD_ACTION_DEMOTED; break; } return result; } const char * role2text(enum rsc_role_e role) { CRM_CHECK(role >= RSC_ROLE_UNKNOWN, return RSC_ROLE_UNKNOWN_S); CRM_CHECK(role < RSC_ROLE_MAX, return RSC_ROLE_UNKNOWN_S); switch(role) { case RSC_ROLE_UNKNOWN: return RSC_ROLE_UNKNOWN_S; case RSC_ROLE_STOPPED: return RSC_ROLE_STOPPED_S; case RSC_ROLE_STARTED: return RSC_ROLE_STARTED_S; case RSC_ROLE_SLAVE: return RSC_ROLE_SLAVE_S; case RSC_ROLE_MASTER: return RSC_ROLE_MASTER_S; } return RSC_ROLE_UNKNOWN_S; } enum rsc_role_e text2role(const char *role) { if(safe_str_eq(role, RSC_ROLE_STOPPED_S)) { return RSC_ROLE_STOPPED; } else if(safe_str_eq(role, RSC_ROLE_STARTED_S)) { return RSC_ROLE_STARTED; } else if(safe_str_eq(role, RSC_ROLE_SLAVE_S)) { return RSC_ROLE_SLAVE; } else if(safe_str_eq(role, RSC_ROLE_MASTER_S)) { return RSC_ROLE_MASTER; } else if(safe_str_eq(role, RSC_ROLE_UNKNOWN_S)) { return RSC_ROLE_UNKNOWN; } crm_err("Unknown role: %s", role); return RSC_ROLE_UNKNOWN; } void print_node(const char *pre_text, node_t *node, gboolean details) { if(node == NULL) { crm_debug_4("%s%s: ", pre_text==NULL?"":pre_text, pre_text==NULL?"":": "); return; } crm_debug_4("%s%s%sNode %s: (weight=%d, fixed=%s)", pre_text==NULL?"":pre_text, pre_text==NULL?"":": ", node->details==NULL?"error ":node->details->online?"":"Unavailable/Unclean ", node->details->uname, node->weight, node->fixed?"True":"False"); if(details && node != NULL && node->details != NULL) { char *pe_mutable = crm_strdup("\t\t"); crm_debug_4("\t\t===Node Attributes"); g_hash_table_foreach(node->details->attrs, print_str_str, pe_mutable); crm_free(pe_mutable); crm_debug_4("\t\t=== Resources"); slist_iter( rsc, resource_t, node->details->running_rsc, lpc, print_resource(LOG_DEBUG_4, "\t\t", rsc, FALSE); ); } } /* * Used by the HashTable for-loop */ void print_str_str(gpointer key, gpointer value, gpointer user_data) { crm_debug_4("%s%s %s ==> %s", user_data==NULL?"":(char*)user_data, user_data==NULL?"":": ", (char*)key, (char*)value); } void print_color_details(const char *pre_text, struct color_shared_s *color, gboolean details) { if(color == NULL) { crm_debug_4("%s%s: ", pre_text==NULL?"":pre_text, pre_text==NULL?"":": "); return; } crm_debug_4("%s%sColor %d: node=%s (from %d candidates)", pre_text==NULL?"":pre_text, pre_text==NULL?"":": ", color->id, color->chosen_node==NULL?"":color->chosen_node->details->uname, g_list_length(color->candidate_nodes)); if(details) { slist_iter(node, node_t, color->candidate_nodes, lpc, print_node("\t", node, FALSE)); } } void print_color(const char *pre_text, color_t *color, gboolean details) { if(color == NULL) { crm_debug_4("%s%s: ", pre_text==NULL?"":pre_text, pre_text==NULL?"":": "); return; } crm_debug_4("%s%sColor %d: (weight=%d, node=%s, possible=%d)", pre_text==NULL?"":pre_text, pre_text==NULL?"":": ", color->id, color->local_weight, safe_val5("",color,details,chosen_node,details,uname), g_list_length(color->details->candidate_nodes)); if(details) { print_color_details("\t", color->details, details); } } void print_rsc_to_node(const char *pre_text, rsc_to_node_t *cons, gboolean details) { if(cons == NULL) { crm_debug_4("%s%s: ", pre_text==NULL?"":pre_text, pre_text==NULL?"":": "); return; } crm_debug_4("%s%s%s Constraint %s (%p) - %d nodes:", pre_text==NULL?"":pre_text, pre_text==NULL?"":": ", "rsc_to_node", cons->id, cons, g_list_length(cons->node_list_rh)); if(details == FALSE) { crm_debug_4("\t%s (node placement rule)", safe_val3(NULL, cons, rsc_lh, id)); slist_iter( node, node_t, cons->node_list_rh, lpc, print_node("\t\t-->", node, FALSE) ); } } void print_rsc_colocation(const char *pre_text, rsc_colocation_t *cons, gboolean details) { if(cons == NULL) { crm_debug_4("%s%s: ", pre_text==NULL?"":pre_text, pre_text==NULL?"":": "); return; } crm_debug_4("%s%s%s Constraint %s (%p):", pre_text==NULL?"":pre_text, pre_text==NULL?"":": ", XML_CONS_TAG_RSC_DEPEND, cons->id, cons); if(details == FALSE) { crm_debug_4("\t%s --> %s, %s", safe_val3(NULL, cons, rsc_lh, id), safe_val3(NULL, cons, rsc_rh, id), strength2text(cons->strength)); } } void print_resource( int log_level, const char *pre_text, resource_t *rsc, gboolean details) { long options = pe_print_log; if(rsc == NULL) { crm_log_maybe(log_level-1, "%s%s: ", pre_text==NULL?"":pre_text, pre_text==NULL?"":": "); return; } if(details) { options |= pe_print_details; } rsc->fns->print(rsc, pre_text, options, &log_level); } #define util_log(fmt...) do_crm_log(log_level, __FILE__, __FUNCTION__, fmt) void log_action(unsigned int log_level, const char *pre_text, action_t *action, gboolean details) { const char *node_uname = NULL; const char *node_uuid = NULL; if(action == NULL) { util_log("%s%s: ", pre_text==NULL?"":pre_text, pre_text==NULL?"":": "); return; } if(action->pseudo) { node_uname = NULL; node_uuid = NULL; } else if(action->node != NULL) { node_uname = action->node->details->uname; node_uuid = action->node->details->id; } else { node_uname = ""; node_uuid = NULL; } switch(text2task(action->task)) { case stonith_node: case shutdown_crm: crm_log_maybe(log_level, "%s%s%sAction %d: %s%s%s%s%s%s", pre_text==NULL?"":pre_text, pre_text==NULL?"":": ", action->pseudo?"Pseduo ":action->optional?"Optional ":action->runnable?action->processed?"":"(Provisional) ":"!!Non-Startable!! ", action->id, action->uuid, node_uname?"\ton ":"", node_uname?node_uname:"", node_uuid?"\t\t(":"", node_uuid?node_uuid:"", node_uuid?")":""); break; default: crm_log_maybe(log_level, "%s%s%sAction %d: %s %s%s%s%s%s%s", pre_text==NULL?"":pre_text, pre_text==NULL?"":": ", action->optional?"Optional ":action->pseudo?"Pseduo ":action->runnable?action->processed?"":"(Provisional) ":"!!Non-Startable!! ", action->id, action->uuid, safe_val3("", action, rsc, id), node_uname?"\ton ":"", node_uname?node_uname:"", node_uuid?"\t\t(":"", node_uuid?node_uuid:"", node_uuid?")":""); break; } if(details) { crm_log_maybe(log_level+1, "\t\t====== Preceeding Actions"); slist_iter( other, action_wrapper_t, action->actions_before, lpc, log_action(log_level+1, "\t\t", other->action, FALSE); ); #if 1 crm_log_maybe(log_level+1, "\t\t====== Subsequent Actions"); slist_iter( other, action_wrapper_t, action->actions_after, lpc, log_action(log_level+1, "\t\t", other->action, FALSE); ); #endif crm_log_maybe(log_level+1, "\t\t====== End"); } else { crm_log_maybe(log_level, "\t\t(seen=%d, before=%d, after=%d)", action->seen_count, g_list_length(action->actions_before), g_list_length(action->actions_after)); } } void pe_free_nodes(GListPtr nodes) { GListPtr iterator = nodes; while(iterator != NULL) { node_t *node = (node_t*)iterator->data; struct node_shared_s *details = node->details; iterator = iterator->next; crm_debug_5("deleting node"); crm_debug_5("%s is being deleted", details->uname); print_node("delete", node, FALSE); if(details != NULL) { if(details->attrs != NULL) { g_hash_table_destroy(details->attrs); } pe_free_shallow_adv(details->running_rsc, FALSE); crm_free(details); } crm_free(node); } if(nodes != NULL) { g_list_free(nodes); } } void pe_free_colors(GListPtr colors) { GListPtr iterator = colors; while(iterator != NULL) { color_t *color = (color_t *)iterator->data; struct color_shared_s *details = color->details; iterator = iterator->next; if(details != NULL) { pe_free_shallow(details->candidate_nodes); pe_free_shallow_adv(details->allocated_resources, FALSE); crm_free(details->chosen_node); crm_free(details); } crm_free(color); } if(colors != NULL) { g_list_free(colors); } } void pe_free_shallow(GListPtr alist) { pe_free_shallow_adv(alist, TRUE); } void pe_free_shallow_adv(GListPtr alist, gboolean with_data) { GListPtr item; GListPtr item_next = alist; while(item_next != NULL) { item = item_next; item_next = item_next->next; if(with_data) { /* crm_debug_5("freeing %p", item->data); */ crm_free(item->data); } item->data = NULL; item->next = NULL; g_list_free(item); } } void pe_free_resources(GListPtr resources) { resource_t *rsc = NULL; GListPtr iterator = resources; while(iterator != NULL) { iterator = iterator; rsc = (resource_t *)iterator->data; iterator = iterator->next; rsc->fns->free(rsc); } if(resources != NULL) { g_list_free(resources); } } void pe_free_actions(GListPtr actions) { GListPtr iterator = actions; while(iterator != NULL) { action_t *action = (action_t *)iterator->data; iterator = iterator->next; pe_free_shallow(action->actions_before);/* action_warpper_t* */ pe_free_shallow(action->actions_after); /* action_warpper_t* */ g_hash_table_destroy(action->extra); crm_free(action->uuid); crm_free(action); } if(actions != NULL) { g_list_free(actions); } } void pe_free_ordering(GListPtr constraints) { GListPtr iterator = constraints; while(iterator != NULL) { order_constraint_t *order = iterator->data; iterator = iterator->next; crm_free(order->lh_action_task); crm_free(order->rh_action_task); crm_free(order); } if(constraints != NULL) { g_list_free(constraints); } } void pe_free_rsc_colocation(rsc_colocation_t *cons) { if(cons != NULL) { crm_debug_4("Freeing constraint %s (%p)", cons->id, cons); crm_free(cons); } } void pe_free_rsc_to_node(rsc_to_node_t *cons) { if(cons != NULL) { pe_free_shallow(cons->node_list_rh); crm_free(cons); } } GListPtr find_recurring_actions(GListPtr input, node_t *not_on_node) { const char *value = NULL; GListPtr result = NULL; CRM_CHECK(input != NULL, return NULL); slist_iter( action, action_t, input, lpc, value = g_hash_table_lookup(action->extra, XML_LRM_ATTR_INTERVAL); if(value == NULL) { /* skip */ } else if(safe_str_eq(CRMD_ACTION_CANCEL, action->task)) { /* skip */ } else if(not_on_node == NULL) { crm_debug_5("(null) Found: %s", action->uuid); result = g_list_append(result, action); } else if(action->node == NULL) { /* skip */ } else if(action->node->details != not_on_node->details) { crm_debug_5("Found: %s", action->uuid); result = g_list_append(result, action); } ); return result; } GListPtr find_actions(GListPtr input, const char *key, node_t *on_node) { GListPtr result = NULL; CRM_CHECK(key != NULL, return NULL); slist_iter( action, action_t, input, lpc, crm_debug_5("Matching %s against %s", key, action->uuid); if(safe_str_neq(key, action->uuid)) { continue; } else if(on_node == NULL) { result = g_list_append(result, action); } else if(action->node == NULL) { /* skip */ crm_debug_2("While looking for %s action on %s, " "found an unallocated one. Assigning" " it to the requested node...", key, on_node->details->uname); action->node = on_node; result = g_list_append(result, action); } else if(safe_str_eq(on_node->details->id, action->node->details->id)) { result = g_list_append(result, action); } ); return result; } GListPtr find_actions_exact(GListPtr input, const char *key, node_t *on_node) { GListPtr result = NULL; CRM_CHECK(key != NULL, return NULL); slist_iter( action, action_t, input, lpc, crm_debug_5("Matching %s against %s", key, action->uuid); if(safe_str_neq(key, action->uuid)) { crm_debug_3("Key mismatch: %s vs. %s", key, action->uuid); continue; } else if(on_node == NULL || action->node == NULL) { crm_debug_3("on_node=%p, action->node=%p", on_node, action->node); continue; } else if(safe_str_eq(on_node->details->id, action->node->details->id)) { result = g_list_append(result, action); } crm_debug_2("Node mismatch: %s vs. %s", on_node->details->id, action->node->details->id); ); return result; } void set_id(crm_data_t * xml_obj, const char *prefix, int child) { int id_len = 0; gboolean use_prefix = TRUE; gboolean use_child = TRUE; char *new_id = NULL; const char *id = crm_element_value(xml_obj, XML_ATTR_ID); id_len = 1 + strlen(id); if(child > 999) { pe_err("Are you insane?!?" " The CRM does not support > 1000 children per resource"); return; } else if(child < 0) { use_child = FALSE; } else { id_len += 4; /* child */ } if(prefix == NULL || safe_str_eq(id, prefix)) { use_prefix = FALSE; } else { id_len += (1 + strlen(prefix)); } crm_malloc0(new_id, id_len); if(use_child) { snprintf(new_id, id_len, "%s%s%s:%d", use_prefix?prefix:"", use_prefix?":":"", id, child); } else { snprintf(new_id, id_len, "%s%s%s", use_prefix?prefix:"", use_prefix?":":"", id); } crm_xml_add(xml_obj, XML_ATTR_ID, new_id); crm_free(new_id); } int merge_weights(int w1, int w2) { int result = w1 + w2; if(w1 <= -INFINITY || w2 <= -INFINITY) { if(w1 >= INFINITY || w2 >= INFINITY) { crm_debug_2("-INFINITY + INFINITY == -INFINITY"); } return -INFINITY; } else if(w1 >= INFINITY || w2 >= INFINITY) { return INFINITY; } /* detect wrap-around */ if(result > 0) { if(w1 <= 0 && w2 < 0) { result = -INFINITY; } } else if(w1 > 0 && w2 > 0) { result = INFINITY; } /* detect +/- INFINITY */ if(result >= INFINITY) { result = INFINITY; } else if(result <= -INFINITY) { result = -INFINITY; } return result; } int char2score(const char *score) { int score_f = 0; if(score == NULL) { } else if(safe_str_eq(score, MINUS_INFINITY_S)) { score_f = -INFINITY; } else if(safe_str_eq(score, INFINITY_S)) { score_f = INFINITY; } else if(safe_str_eq(score, "+"INFINITY_S)) { score_f = INFINITY; } else { score_f = crm_parse_int(score, NULL); if(score_f > 0 && score_f > INFINITY) { score_f = INFINITY; } else if(score_f < 0 && score_f < -INFINITY) { score_f = -INFINITY; } } return score_f; } rsc_to_node_t * rsc2node_new(const char *id, resource_t *rsc, int node_weight, node_t *foo_node, pe_working_set_t *data_set) { rsc_to_node_t *new_con = NULL; if(rsc == NULL || id == NULL) { pe_err("Invalid constraint %s for rsc=%p", crm_str(id), rsc); return NULL; } crm_malloc0(new_con, sizeof(rsc_to_node_t)); if(new_con != NULL) { new_con->id = id; new_con->rsc_lh = rsc; new_con->node_list_rh = NULL; new_con->role_filter = RSC_ROLE_UNKNOWN; if(foo_node != NULL) { node_t *copy = node_copy(foo_node); copy->weight = node_weight; new_con->node_list_rh = g_list_append(NULL, copy); } else { CRM_CHECK(node_weight == 0, return NULL); } data_set->placement_constraints = g_list_append( data_set->placement_constraints, new_con); rsc->rsc_location = g_list_append( rsc->rsc_location, new_con); } return new_con; } diff --git a/crm/tengine/callbacks.c b/crm/tengine/callbacks.c index 161f66be99..25f71d7cda 100644 --- a/crm/tengine/callbacks.c +++ b/crm/tengine/callbacks.c @@ -1,530 +1,530 @@ -/* $Id: callbacks.c,v 1.74 2006/04/19 12:24:59 andrew Exp $ */ +/* $Id: callbacks.c,v 1.75 2006/04/21 07:08:04 andrew Exp $ */ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include void te_update_confirm(const char *event, HA_Message *msg); void te_update_diff(const char *event, HA_Message *msg); crm_data_t *need_abort(crm_data_t *update); void cib_fencing_updated(const HA_Message *msg, int call_id, int rc, crm_data_t *output, void *user_data); extern char *te_uuid; gboolean shuttingdown = FALSE; crm_graph_t *transition_graph; GTRIGSource *transition_trigger = NULL; crm_action_timer_t *transition_timer = NULL; static gboolean start_global_timer(crm_action_timer_t *timer, int timeout) { CRM_ASSERT(timer != NULL); CRM_CHECK(timer > 0, return FALSE); CRM_CHECK(timer->source_id == 0, return FALSE); if(timeout <= 0) { crm_err("Tried to start timer with period: %d", timeout); } else if(timer->source_id == 0) { crm_debug_2("Starting abort timer: %dms", timeout); timer->timeout = timeout; timer->source_id = Gmain_timeout_add( timeout, global_timer_callback, (void*)timer); CRM_ASSERT(timer->source_id != 0); return TRUE; } else { crm_err("Timer is already active with period: %d", timer->timeout); } return FALSE; } void te_update_diff(const char *event, HA_Message *msg) { int rc = -1; const char *op = NULL; crm_data_t *diff = NULL; crm_data_t *aborted = NULL; const char *set_name = NULL; int diff_add_updates = 0; int diff_add_epoch = 0; int diff_add_admin_epoch = 0; int diff_del_updates = 0; int diff_del_epoch = 0; int diff_del_admin_epoch = 0; if(msg == NULL) { crm_err("NULL update"); return; } ha_msg_value_int(msg, F_CIB_RC, &rc); op = cl_get_string(msg, F_CIB_OPERATION); if(rc < cib_ok) { crm_debug_2("Ignoring failed %s operation: %s", op, cib_error2string(rc)); return; } diff = get_message_xml(msg, F_CIB_UPDATE_RESULT); cib_diff_version_details( diff, &diff_add_admin_epoch, &diff_add_epoch, &diff_add_updates, &diff_del_admin_epoch, &diff_del_epoch, &diff_del_updates); crm_info("Processing diff (%s): %d.%d.%d -> %d.%d.%d", op, diff_del_admin_epoch,diff_del_epoch,diff_del_updates, diff_add_admin_epoch,diff_add_epoch,diff_add_updates); log_cib_diff(LOG_DEBUG_2, diff, op); set_name = "diff-added"; if(diff != NULL && aborted == NULL) { crm_data_t *section = NULL; crm_data_t *change_set = find_xml_node(diff, set_name, FALSE); change_set = find_xml_node(change_set, XML_TAG_CIB, FALSE); if(change_set != NULL) { crm_debug_2("Checking status changes"); section=get_object_root(XML_CIB_TAG_STATUS,change_set); } if(section != NULL) { extract_event(section); } crm_debug_2("Checking change set: %s", set_name); aborted = need_abort(change_set); } set_name = "diff-removed"; if(diff != NULL && aborted == NULL) { crm_data_t *change_set = find_xml_node(diff, set_name, FALSE); change_set = find_xml_node(change_set, XML_TAG_CIB, FALSE); crm_debug_2("Checking change set: %s", set_name); aborted = need_abort(change_set); } if(aborted != NULL) { abort_transition( INFINITY, tg_restart, "Non-status change", NULL); } free_xml(diff); return; } gboolean process_te_message(HA_Message *msg, crm_data_t *xml_data, IPC_Channel *sender) { crm_data_t *xml_obj = NULL; const char *from = cl_get_string(msg, F_ORIG); const char *sys_to = cl_get_string(msg, F_CRM_SYS_TO); const char *sys_from = cl_get_string(msg, F_CRM_SYS_FROM); const char *ref = cl_get_string(msg, XML_ATTR_REFERENCE); const char *op = cl_get_string(msg, F_CRM_TASK); const char *type = cl_get_string(msg, F_CRM_MSG_TYPE); crm_debug_2("Processing %s (%s) message", op, ref); crm_log_message(LOG_DEBUG_3, msg); if(op == NULL){ /* error */ } else if(strcmp(op, CRM_OP_HELLO) == 0) { /* ignore */ } else if(sys_to == NULL || strcmp(sys_to, CRM_SYSTEM_TENGINE) != 0) { crm_debug_2("Bad sys-to %s", crm_str(sys_to)); return FALSE; } else if(safe_str_eq(op, CRM_OP_INVOKE_LRM) && safe_str_eq(sys_from, CRM_SYSTEM_LRMD) /* && safe_str_eq(type, XML_ATTR_RESPONSE) */ ){ #if CRM_DEPRECATED_SINCE_2_0_4 if(safe_str_eq(crm_element_name(xml_data), XML_TAG_CIB)) { xml_obj = xml_data; } else { xml_obj = find_xml_node(xml_data, XML_TAG_CIB, TRUE); } #else xml_obj = xml_data; CRM_CHECK(xml_obj != NULL, crm_log_message_adv(LOG_ERR, "Invalid (N)ACK", msg); return FALSE); #endif CRM_CHECK(xml_obj != NULL, crm_log_message_adv(LOG_ERR, "Invalid (N)ACK", msg); return FALSE); xml_obj = get_object_root(XML_CIB_TAG_STATUS, xml_obj); CRM_CHECK(xml_obj != NULL, crm_log_message_adv(LOG_ERR, "Invalid (N)ACK", msg); return FALSE); crm_log_message_adv(LOG_DEBUG_2, "Processing (N)ACK", msg); crm_debug("Processing (N)ACK from %s", from); extract_event(xml_obj); } else if(safe_str_eq(type, XML_ATTR_RESPONSE)) { crm_err("Message was a response not a request. Discarding"); return TRUE; } else if(strcmp(op, CRM_OP_TRANSITION) == 0) { if(transition_graph->complete == FALSE) { crm_info("Another transition is already active"); abort_transition( INFINITY,tg_restart,"Transition Active",NULL); } else { destroy_graph(transition_graph); transition_graph = unpack_graph(xml_data); start_global_timer(transition_timer, transition_graph->transition_timeout); trigger_graph(); - print_graph(LOG_DEBUG, transition_graph); + print_graph(LOG_DEBUG_2, transition_graph); } } else if(strcmp(op, CRM_OP_TE_HALT) == 0) { abort_transition(INFINITY, tg_stop, "Peer Halt", NULL); } else if(strcmp(op, CRM_OP_TEABORT) == 0) { abort_transition(INFINITY, tg_restart, "Peer Cancelled", NULL); } else { crm_err("Unknown command: %s::%s from %s", type, op, sys_from); } crm_debug_3("finished processing message"); return TRUE; } void tengine_stonith_callback(stonith_ops_t * op) { const char *allow_fail = NULL; int stonith_id = -1; crm_action_t *stonith_action = NULL; char *op_key = NULL; char *call_id = NULL; if(op == NULL) { crm_err("Called with a NULL op!"); return; } crm_info("call=%d, optype=%d, node_name=%s, result=%d, node_list=%s, action=%s", op->call_id, op->optype, op->node_name, op->op_result, (char *)op->node_list, op->private_data); /* this will mark the event complete if a match is found */ CRM_CHECK(op->private_data != NULL, return); /* filter out old STONITH actions */ decodeNVpair(op->private_data, ';', &call_id, &op_key); if(op_key != NULL) { char *key = generate_transition_key( transition_graph->id, te_uuid); gboolean key_matched = safe_str_eq(key, op_key); crm_free(key); if(key_matched == FALSE) { crm_info("Ignoring old STONITH op: %s", op->private_data); return; } } #if 1 stonith_id = crm_parse_int(call_id, "-1"); if(stonith_id < 0) { crm_err("Stonith action not matched: %s (%s)", call_id, op->private_data); return; } #endif stonith_action = match_down_event( stonith_id, op->node_uuid, CRM_OP_FENCE); if(stonith_action == NULL) { crm_err("Stonith action not matched"); return; } switch(op->op_result) { case STONITH_SUCCEEDED: send_stonith_update(op); break; case STONITH_CANNOT: case STONITH_TIMEOUT: case STONITH_GENERIC: stonith_action->failed = TRUE; allow_fail = g_hash_table_lookup( stonith_action->params, XML_ATTR_TE_ALLOWFAIL); if(FALSE == crm_is_true(allow_fail)) { crm_err("Stonith of %s failed (%d)..." " aborting transition.", op->node_name, op->op_result); abort_transition(INFINITY, tg_restart, "Stonith failed", NULL); } break; default: crm_err("Unsupported action result: %d", op->op_result); abort_transition(INFINITY, tg_restart, "Unsupport Stonith result", NULL); } update_graph(transition_graph, stonith_action); trigger_graph(); return; } void tengine_stonith_connection_destroy(gpointer user_data) { #if 0 crm_err("Fencing daemon has left us: Shutting down...NOW"); /* shutdown properly later */ CRM_CHECK(FALSE/* fencing daemon died */); #else crm_err("Fencing daemon has left us"); #endif return; } gboolean tengine_stonith_dispatch(IPC_Channel *sender, void *user_data) { int lpc = 0; while(stonithd_op_result_ready()) { if (sender->ch_status == IPC_DISCONNECT) { /* The message which was pending for us is that * the IPC status is now IPC_DISCONNECT */ break; } if(ST_FAIL == stonithd_receive_ops_result(FALSE)) { crm_err("stonithd_receive_ops_result() failed"); } else { lpc++; } } crm_debug_2("Processed %d messages", lpc); if (sender->ch_status == IPC_DISCONNECT) { return FALSE; } return TRUE; } void cib_fencing_updated(const HA_Message *msg, int call_id, int rc, crm_data_t *output, void *user_data) { trigger_graph(); if(rc < cib_ok) { crm_err("CIB update failed: %s", cib_error2string(rc)); crm_log_xml_warn(msg, "[Failed Update]"); } } void cib_action_updated(const HA_Message *msg, int call_id, int rc, crm_data_t *output, void *user_data) { trigger_graph(); if(rc < cib_ok) { crm_err("Update %d FAILED: %s", call_id, cib_error2string(rc)); } } gboolean action_timer_callback(gpointer data) { crm_action_timer_t *timer = NULL; if(data == NULL) { crm_err("Timer popped with no data"); return FALSE; } timer = (crm_action_timer_t*)data; stop_te_timer(timer); crm_warn("Timer popped (abort_level=%d, complete=%s)", transition_graph->abort_priority, transition_graph->complete?"true":"false"); CRM_CHECK(timer->action != NULL, return FALSE); if(transition_graph->complete) { crm_err("Ignoring timeout while not in transition"); } else if(timer->reason == timeout_action_warn) { print_graph_action( LOG_WARNING,"Action missed its timeout", timer->action); } else { /* fail the action */ cib_action_update(timer->action, LRM_OP_TIMEOUT); } return FALSE; } static int unconfirmed_actions(gboolean send_updates) { int unconfirmed = 0; crm_debug_2("Unconfirmed actions..."); slist_iter( synapse, synapse_t, transition_graph->synapses, lpc, /* lookup event */ slist_iter( action, crm_action_t, synapse->actions, lpc2, if(action->executed == FALSE) { continue; } else if(action->confirmed) { continue; } unconfirmed++; crm_debug("Action %d: unconfirmed",action->id); if(action->type == action_type_rsc && send_updates) { cib_action_update(action, LRM_OP_PENDING); } ); ); if(unconfirmed > 0) { crm_info("Waiting on %d unconfirmed actions", unconfirmed); } return unconfirmed; } gboolean global_timer_callback(gpointer data) { crm_action_timer_t *timer = NULL; if(data == NULL) { crm_err("Timer popped with no data"); return FALSE; } timer = (crm_action_timer_t*)data; stop_te_timer(timer); crm_warn("Timer popped (abort_level=%d, complete=%s)", transition_graph->abort_priority, transition_graph->complete?"true":"false"); CRM_CHECK(timer->action == NULL, return FALSE); if(transition_graph->complete) { crm_err("Ignoring timeout while not in transition"); } else if(timer->reason == timeout_abort) { int unconfirmed = unconfirmed_actions(FALSE); crm_warn("Transition abort timeout reached..." " marking transition complete."); transition_graph->complete = TRUE; abort_transition(INFINITY, tg_restart, "Global Timeout", NULL); if(unconfirmed != 0) { crm_warn("Writing %d unconfirmed actions to the CIB", unconfirmed); unconfirmed_actions(TRUE); } } return FALSE; } gboolean te_graph_trigger(gpointer user_data) { int timeout = 0; enum transition_status graph_rc = -1; if(transition_graph->complete) { notify_crmd(transition_graph); return TRUE; } graph_rc = run_graph(transition_graph); timeout = transition_graph->transition_timeout; print_graph(LOG_DEBUG_2, transition_graph); if(graph_rc == transition_active) { crm_debug_3("Transition not yet complete"); stop_te_timer(transition_timer); start_global_timer(transition_timer, timeout); return TRUE; } else if(graph_rc == transition_pending) { timeout = transition_timer->timeout; crm_debug_3("Transition not yet complete - no actions fired"); return TRUE; } if(graph_rc != transition_complete) { crm_err("Transition failed: %s", transition_status(graph_rc)); print_graph(LOG_WARNING, transition_graph); } notify_crmd(transition_graph); return TRUE; } diff --git a/crm/tengine/events.c b/crm/tengine/events.c index eb9a53ee6c..3cdfc94c66 100644 --- a/crm/tengine/events.c +++ b/crm/tengine/events.c @@ -1,513 +1,515 @@ -/* $Id: events.c,v 1.13 2006/04/11 07:36:49 andrew Exp $ */ +/* $Id: events.c,v 1.14 2006/04/21 07:08:04 andrew Exp $ */ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include crm_data_t *need_abort(crm_data_t *update); void process_graph_event(crm_data_t *event, const char *event_node); int match_graph_event( crm_action_t *action, crm_data_t *event, const char *event_node); crm_data_t * need_abort(crm_data_t *update) { crm_data_t *section_xml = NULL; const char *section = NULL; if(update == NULL) { return NULL; } section = XML_CIB_TAG_NODES; section_xml = get_object_root(section, update); xml_child_iter(section_xml, child, return section_xml; ); section = XML_CIB_TAG_RESOURCES; section_xml = get_object_root(section, update); xml_child_iter(section_xml, child, return section_xml; ); section = XML_CIB_TAG_CONSTRAINTS; section_xml = get_object_root(section, update); xml_child_iter(section_xml, child, return section_xml; ); section = XML_CIB_TAG_CRMCONFIG; section_xml = get_object_root(section, update); xml_child_iter(section_xml, child, return section_xml; ); return NULL; } gboolean extract_event(crm_data_t *msg) { int shutdown = 0; const char *event_node = NULL; /* [cib fragment] ... */ crm_debug_4("Extracting event from %s", crm_element_name(msg)); xml_child_iter_filter( msg, node_state, XML_CIB_TAG_STATE, crm_data_t *attrs = NULL; crm_data_t *resources = NULL; const char *ccm_state = crm_element_value( node_state, XML_CIB_ATTR_INCCM); const char *crmd_state = crm_element_value( node_state, XML_CIB_ATTR_CRMDSTATE); /* Transient node attribute changes... */ event_node = crm_element_value(node_state, XML_ATTR_ID); crm_debug("Processing state update from %s", event_node); crm_log_xml_debug_3(node_state,"Processing"); attrs = find_xml_node( node_state, XML_TAG_TRANSIENT_NODEATTRS, FALSE); if(attrs != NULL) { crm_info("Aborting on "XML_TAG_TRANSIENT_NODEATTRS" changes"); abort_transition(INFINITY, tg_restart, XML_TAG_TRANSIENT_NODEATTRS, attrs); } resources = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE); resources = find_xml_node( resources, XML_LRM_TAG_RESOURCES, FALSE); /* LRM resource update... */ xml_child_iter( resources, rsc, xml_child_iter( rsc, rsc_op, crm_log_xml_debug_3( rsc_op, "Processing resource update"); process_graph_event(rsc_op, event_node); ); ); /* * node state update... possibly from a shutdown we requested */ if(safe_str_eq(ccm_state, XML_BOOLEAN_FALSE) || safe_str_eq(crmd_state, CRMD_JOINSTATE_DOWN)) { crm_action_t *shutdown = NULL; crm_debug_3("A shutdown we requested?"); shutdown = match_down_event(0, event_node, NULL); if(shutdown != NULL) { update_graph(transition_graph, shutdown); trigger_graph(); } else { crm_info("Stonith/shutdown event not matched"); abort_transition(INFINITY, tg_restart, "Node failure", node_state); } } shutdown = 0; ha_msg_value_int(node_state, XML_CIB_ATTR_SHUTDOWN, &shutdown); if(shutdown != 0) { crm_info("Aborting on "XML_CIB_ATTR_SHUTDOWN" attribute"); abort_transition(INFINITY, tg_restart, "Shutdown request", node_state); } ); return TRUE; } static void update_failcount(crm_action_t *action, int rc) { char *attr_name = NULL; const char *task = NULL; const char *rsc_id = NULL; const char *on_node = NULL; const char *on_uuid = NULL; const char *interval = NULL; if(rc == 99) { /* this is an internal code for "we're busy, try again" */ return; } interval = g_hash_table_lookup(action->params, "interval"); if(interval == NULL) { return; } CRM_CHECK(action->xml != NULL, return); task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); rsc_id = crm_element_value(action->xml, XML_LRM_ATTR_RSCID); on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); on_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID); CRM_CHECK(task != NULL, return); CRM_CHECK(rsc_id != NULL, return); CRM_CHECK(on_uuid != NULL, return); CRM_CHECK(on_node != NULL, return); attr_name = crm_concat("fail-count", rsc_id, '-'); crm_warn("Updating failcount for %s on %s after failed %s: rc=%d", rsc_id, on_node, task, rc); update_attr(te_cib_conn, cib_none, XML_CIB_TAG_STATUS, on_uuid, NULL,NULL, attr_name, XML_NVPAIR_ATTR_VALUE"++"); crm_free(attr_name); } /* * returns the ID of the action if a match is found * returns -1 if a match was not found * returns -2 if a match was found but the action failed (and was * not allowed to) */ int match_graph_event( crm_action_t *action, crm_data_t *event, const char *event_node) { int log_level_fail = LOG_ERR; int target_rc = 0; const char *target_rc_s = NULL; const char *allow_fail = NULL; const char *this_action = NULL; const char *this_node = NULL; const char *this_uname = NULL; const char *magic = NULL; const char *this_event; char *update_te_uuid = NULL; const char *update_event; int op_status_i = -3; int op_rc_i = -3; int transition_i = -1; CRM_CHECK(event != NULL, return -1); crm_debug_3("Processing \"%s\" change", crm_element_name(event)); update_event = crm_element_value(event, XML_ATTR_ID); magic = crm_element_value(event, XML_ATTR_TRANSITION_MAGIC); CRM_CHECK(magic != NULL, return -2); this_action = crm_element_value(action->xml, XML_LRM_ATTR_TASK); this_uname = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); this_event = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY); this_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID); CRM_CHECK(this_event != NULL, return -2); if(safe_str_neq(this_event, update_event)) { crm_debug_2("Action %d : Event mismatch %s vs. %s", action->id, this_event, update_event); return -1; } else if(safe_str_neq(this_node, event_node)) { crm_debug_2("Action %d : Node mismatch %s (%s) vs. %s", action->id, this_node, this_uname, event_node); return -1; } - crm_debug("Matched action (%d) %s", action->id, this_event); + crm_debug_2("Matched action (%d) %s", action->id, this_event); CRM_CHECK(decode_transition_magic( magic, &update_te_uuid, &transition_i, &op_status_i, &op_rc_i), return -2); if(transition_i == -1) { /* we never expect these - recompute */ - crm_err("Detected an action initiated outside of a transition"); + crm_err("Detected action %s initiated outside of a transition", + this_event); crm_log_message(LOG_ERR, event); return -2; } else if(safe_str_neq(update_te_uuid, te_uuid)) { - crm_info("Detected an action from a different transitioner:" - " %s vs. %s", update_te_uuid, te_uuid); + crm_info("Detected action %s from a different transitioner:" + " %s vs. %s", this_event, update_te_uuid, te_uuid); crm_log_message(LOG_INFO, event); return -3; } else if(transition_graph->id != transition_i) { - crm_warn("Detected an action from a different transition:" - " %d vs. %d", transition_i, transition_graph->id); + crm_warn("Detected an action %s from a different transition:" + " %d vs. %d", this_event, transition_i, + transition_graph->id); crm_log_message(LOG_INFO, event); return -4; } /* stop this event's timer if it had one */ stop_te_timer(action->timer); action->confirmed = TRUE; target_rc_s = g_hash_table_lookup(action->params,XML_ATTR_TE_TARGET_RC); if(target_rc_s != NULL) { - crm_debug("Target rc: %s vs. %d", target_rc_s, op_rc_i); + crm_debug_2("Target rc: %s vs. %d", target_rc_s, op_rc_i); target_rc = crm_parse_int(target_rc_s, NULL); if(target_rc == op_rc_i) { crm_debug_2("Target rc: == %d", op_rc_i); if(op_status_i != LRM_OP_DONE) { - crm_debug("Re-mapping op status to" - " LRM_OP_DONE for %s", update_event); + crm_debug_2("Re-mapping op status to" + " LRM_OP_DONE for %s",update_event); op_status_i = LRM_OP_DONE; } } else { crm_debug_2("Target rc: != %d", op_rc_i); if(op_status_i != LRM_OP_ERROR) { crm_info("Re-mapping op status to" " LRM_OP_ERROR for %s", update_event); op_status_i = LRM_OP_ERROR; } } } /* Process OP status */ switch(op_status_i) { case -3: crm_err("Action returned the same as last time..." " whatever that was!"); crm_log_message(LOG_ERR, event); break; case LRM_OP_PENDING: crm_debug("Ignoring pending operation"); return -5; break; case LRM_OP_DONE: break; case LRM_OP_ERROR: /* This is the code we use for direct nack's */ if(op_rc_i == 99) { log_level_fail = LOG_WARNING; } /* fall through */ case LRM_OP_TIMEOUT: case LRM_OP_NOTSUPPORTED: action->failed = TRUE; crm_log_maybe(log_level_fail, "Action %s on %s failed (target: %d vs. rc: %d): %s", update_event, this_uname, target_rc, op_rc_i, op_status2text(op_status_i)); break; case LRM_OP_CANCELLED: /* do nothing?? */ crm_err("Dont know what to do for cancelled ops yet"); break; default: action->failed = TRUE; crm_err("Unsupported action result: %d", op_status_i); } update_graph(transition_graph, action); trigger_graph(); if(action->failed) { allow_fail = g_hash_table_lookup( action->params, XML_ATTR_TE_ALLOWFAIL); if(crm_is_true(allow_fail)) { action->failed = FALSE; } } if(action->failed) { /* ignore probes */ if(target_rc != EXECRA_NOT_RUNNING) { update_failcount(action, op_rc_i); } abort_transition(action->synapse->priority, tg_restart, "Event failed", event); } else if(transition_graph->complete) { abort_transition(INFINITY, tg_restart,"No active graph", event); } te_log_action(LOG_INFO, "Action %s (%d) confirmed", this_event, action->id); return action->id; } crm_action_t * match_down_event(int id, const char *target, const char *filter) { const char *this_action = NULL; const char *this_node = NULL; crm_action_t *match = NULL; slist_iter( synapse, synapse_t, transition_graph->synapses, lpc, /* lookup event */ slist_iter( action, crm_action_t, synapse->actions, lpc2, if(id > 0 && action->id == id) { match = action; break; } this_action = crm_element_value( action->xml, XML_LRM_ATTR_TASK); if(action->type != action_type_crm) { continue; } else if(safe_str_eq(this_action, CRM_OP_LRM_REFRESH)){ continue; } else if(filter != NULL && safe_str_neq(this_action, filter)) { continue; } this_node = crm_element_value( action->xml, XML_LRM_ATTR_TARGET_UUID); if(this_node == NULL) { crm_log_xml_err(action->xml, "No node uuid"); } if(safe_str_neq(this_node, target)) { crm_debug("Action %d : Node mismatch: %s", action->id, this_node); continue; } match = action; break; ); if(match != NULL) { /* stop this event's timer if it had one */ break; } ); if(match != NULL) { /* stop this event's timer if it had one */ crm_debug("Match found for action %d: %s on %s", id, crm_element_value(match->xml, XML_LRM_ATTR_TASK_KEY), target); stop_te_timer(match->timer); match->confirmed = TRUE; } else if(id > 0) { crm_err("No match for action %d", id); } else { crm_warn("No match for shutdown action on %s", target); } return match; } void process_graph_event(crm_data_t *event, const char *event_node) { int rc = -1; int op_status_i = 0; const char *magic = NULL; const char *rsc_id = NULL; const char *op_status = NULL; CRM_ASSERT(event != NULL); rsc_id = crm_element_value(event, XML_ATTR_ID); op_status = crm_element_value(event, XML_LRM_ATTR_OPSTATUS); magic = crm_element_value(event, XML_ATTR_TRANSITION_MAGIC); if(op_status != NULL) { op_status_i = crm_parse_int(op_status, NULL); if(op_status_i == LRM_OP_PENDING) { /* just information that the action was sent */ crm_debug_2("Ignoring TE initiated updates"); return; } } if(magic == NULL) { crm_log_xml_debug_2(event, "Skipping \"non-change\""); return; } else { crm_debug_2("Processing CIB update: %s on %s: %s", rsc_id, event_node, magic); } slist_iter( synapse, synapse_t, transition_graph->synapses, lpc, /* lookup event */ slist_iter( action, crm_action_t, synapse->actions, lpc2, rc = match_graph_event(action, event, event_node); if(rc >= 0) { crm_log_xml_debug_2(event, "match:found"); } else if(rc == -5) { crm_log_xml_debug_2(event, "match:pending"); } else if(rc != -1) { crm_warn("Search for %s terminated: %d", ID(event), rc); abort_transition(INFINITY, tg_restart, "Unexpected event", event); } if(rc != -1) { return; } ); ); /* unexpected event, trigger a pe-recompute */ /* possibly do this only for certain types of actions */ crm_warn("Event not found."); abort_transition(INFINITY, tg_restart, "Unexpected event", event); return; } diff --git a/lib/crm/transition/graph.c b/lib/crm/transition/graph.c index 2b5dcd7e89..962c659f8b 100644 --- a/lib/crm/transition/graph.c +++ b/lib/crm/transition/graph.c @@ -1,317 +1,316 @@ -/* $Id: graph.c,v 1.9 2006/04/19 12:24:59 andrew Exp $ */ +/* $Id: graph.c,v 1.10 2006/04/21 07:08:04 andrew Exp $ */ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include /* #include */ /* #include */ crm_graph_functions_t *graph_fns = NULL; static gboolean update_synapse_ready(synapse_t *synapse, int action_id) { gboolean updates = FALSE; CRM_CHECK(synapse->executed == FALSE, return FALSE); CRM_CHECK(synapse->confirmed == FALSE, return FALSE); synapse->ready = TRUE; slist_iter( prereq, crm_action_t, synapse->inputs, lpc, crm_debug_3("Processing input %d", prereq->id); if(prereq->id == action_id) { crm_debug_2("Marking input %d of synapse %d confirmed", action_id, synapse->id); prereq->confirmed = TRUE; updates = TRUE; } else if(prereq->confirmed == FALSE) { synapse->ready = FALSE; } ); if(updates) { crm_debug_2("Updated synapse %d", synapse->id); } return updates; } static gboolean update_synapse_confirmed(synapse_t *synapse, int action_id) { gboolean updates = FALSE; gboolean is_confirmed = TRUE; CRM_CHECK(synapse->executed, return FALSE); CRM_CHECK(synapse->confirmed == FALSE, return TRUE); is_confirmed = TRUE; slist_iter( action, crm_action_t, synapse->actions, lpc, crm_debug_3("Processing action %d", action->id); if(action->id == action_id) { - crm_info("Confirmed: Action %d of Synapse %d", + crm_debug_2("Confirmed: Action %d of Synapse %d", action_id, synapse->id); action->confirmed = TRUE; updates = TRUE; } else if(action->confirmed == FALSE) { is_confirmed = FALSE; - crm_debug_2("Synapse %d still not confirmed after action %d", + crm_debug_3("Synapse %d still not confirmed after action %d", synapse->id, action_id); } ); if(is_confirmed && synapse->confirmed == FALSE) { - crm_debug("Confirmed: Synapse %d", synapse->id); + crm_debug_2("Confirmed: Synapse %d", synapse->id); synapse->confirmed = TRUE; updates = TRUE; } if(updates) { - crm_debug_2("Updated synapse %d", synapse->id); + crm_debug_3("Updated synapse %d", synapse->id); } return updates; } gboolean update_graph(crm_graph_t *graph, crm_action_t *action) { gboolean rc = FALSE; gboolean updates = FALSE; slist_iter( synapse, synapse_t, graph->synapses, lpc, if (synapse->confirmed) { crm_debug_2("Synapse complete"); } else if (synapse->executed) { crm_debug_2("Synapse executed"); rc = update_synapse_confirmed(synapse, action->id); } else if(action->failed == FALSE) { rc = update_synapse_ready(synapse, action->id); } updates = updates || rc; ); if(updates) { crm_debug_2("Updated graph with completed action %d", action->id); } return updates; } static gboolean should_fire_synapse(synapse_t *synapse) { CRM_CHECK(synapse->executed == FALSE, return FALSE); CRM_CHECK(synapse->confirmed == FALSE, return FALSE); crm_debug_3("Checking pre-reqs for %d", synapse->id); /* lookup prereqs */ synapse->ready = TRUE; slist_iter( prereq, crm_action_t, synapse->inputs, lpc, crm_debug_3("Processing input %d", prereq->id); if(prereq->confirmed == FALSE) { crm_debug_3("Inputs for synapse %d not satisfied", synapse->id); synapse->ready = FALSE; break; } ); return synapse->ready; } static gboolean initiate_action(crm_graph_t *graph, crm_action_t *action) { const char *id = NULL; int tmp_time = 2 * action->timeout; CRM_CHECK(action->executed == FALSE, return FALSE); id = ID(action->xml); CRM_CHECK(id != NULL, return FALSE); if(tmp_time > graph->transition_timeout) { crm_debug("Action %d: Increasing IDLE timer to %d", action->id, tmp_time); graph->transition_timeout = tmp_time; } action->executed = TRUE; if(action->type == action_type_pseudo){ - te_log_action(LOG_INFO, - "Executing pseudo-event: %d", action->id); + crm_debug_2("Executing pseudo-event: %d", action->id); return graph_fns->pseudo(graph, action); } else if(action->type == action_type_rsc) { - te_log_action(LOG_INFO, "Executing rsc-event: %d", action->id); + crm_debug_2("Executing rsc-event: %d", action->id); return graph_fns->rsc(graph, action); } else if(action->type == action_type_crm) { const char *task = NULL; task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); CRM_CHECK(task != NULL, return FALSE); if(safe_str_eq(task, CRM_OP_FENCE)) { - te_log_action(LOG_INFO, "Executing STONITH-event: %d", + crm_info("Executing STONITH-event: %d", action->id); return graph_fns->stonith(graph, action); } - te_log_action(LOG_INFO, "Executing crm-event: %d", action->id); + crm_info("Executing crm-event: %d", action->id); return graph_fns->crmd(graph, action); } te_log_action(LOG_ERR, "Failed on unsupported command type: %s (id=%s)", crm_element_name(action->xml), id); return FALSE; } static gboolean fire_synapse(crm_graph_t *graph, synapse_t *synapse) { CRM_CHECK(synapse != NULL, return FALSE); CRM_CHECK(synapse->ready, return FALSE); CRM_CHECK(synapse->confirmed == FALSE, return TRUE); - crm_debug("Synapse %d fired", synapse->id); + crm_debug_2("Synapse %d fired", synapse->id); synapse->executed = TRUE; slist_iter( action, crm_action_t, synapse->actions, lpc, /* allow some leeway */ gboolean passed = FALSE; /* Invoke the action and start the timer */ passed = initiate_action(graph, action); if(passed == FALSE) { crm_err("Failed initiating <%s id=%d> in synapse %d", crm_element_name(action->xml), action->id, synapse->id); return FALSE; } ); return TRUE; } int run_graph(crm_graph_t *graph) { int num_fired = 0; int num_pending = 0; int num_skipped = 0; int num_complete = 0; int num_incomplete = 0; int stat_log_level = LOG_DEBUG; int pass_result = transition_active; if(graph_fns == NULL) { set_default_graph_functions(); } if(graph == NULL) { return transition_complete; } crm_debug_2("Entering graph %d callback", graph->id); slist_iter( synapse, synapse_t, graph->synapses, lpc, if (synapse->confirmed) { crm_debug_3("Synapse %d complete", synapse->id); num_complete++; } else if (synapse->executed) { int pending_log = LOG_DEBUG_2; if(synapse->priority > graph->abort_priority) { pending_log = LOG_DEBUG_3; } crm_log_maybe(pending_log, "Synapse %d: confirmation pending", synapse->id); num_pending++; } else if(synapse->priority <= graph->abort_priority) { crm_debug_2("Skipping synapse %d: aborting", synapse->id); num_skipped++; } else { crm_debug_2("Synapse %d pending", synapse->id); if(should_fire_synapse(synapse)) { if(fire_synapse(graph, synapse) == FALSE) { return transition_failed; } num_fired++; } else { num_incomplete++; } } ); if(num_pending == 0 && num_fired == 0) { graph->complete = TRUE; stat_log_level = LOG_INFO; pass_result = transition_complete; if(num_incomplete != 0) { stat_log_level = LOG_WARNING; pass_result = transition_terminated; } else if(num_skipped != 0) { stat_log_level = LOG_NOTICE; } } else if(num_fired == 0) { pass_result = transition_pending; } crm_log_maybe(stat_log_level+1, "===================================================="); crm_log_maybe(stat_log_level, "Transition %d: (Complete=%d, Pending=%d," " Fired=%d, Skipped=%d, Incomplete=%d)", graph->id, num_complete, num_pending, num_fired, num_skipped, num_incomplete); return pass_result; }