diff --git a/crmd/callbacks.c b/crmd/callbacks.c index fadf915440..2aa038ba4b 100644 --- a/crmd/callbacks.c +++ b/crmd/callbacks.c @@ -1,638 +1,640 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include void crmd_ha_connection_destroy(gpointer user_data); void crmd_ha_msg_filter(xmlNode *msg); /* From join_dc... */ extern gboolean check_join_state( enum crmd_fsa_state cur_state, const char *source); #define trigger_fsa(source) crm_debug_3("Triggering FSA: %s", __FUNCTION__); \ G_main_set_trigger(source); #if SUPPORT_HEARTBEAT gboolean crmd_ha_msg_dispatch(ll_cluster_t *cluster_conn, gpointer user_data) { IPC_Channel *channel = NULL; gboolean stay_connected = TRUE; crm_debug_3("Invoked"); if(cluster_conn != NULL) { channel = cluster_conn->llc_ops->ipcchan(cluster_conn); } CRM_CHECK(cluster_conn != NULL, ;); CRM_CHECK(channel != NULL, ;); if(channel != NULL && IPC_ISRCONN(channel)) { if(cluster_conn->llc_ops->msgready(cluster_conn) == 0) { crm_debug_2("no message ready yet"); } /* invoke the callbacks but dont block */ cluster_conn->llc_ops->rcvmsg(cluster_conn, 0); } if (channel == NULL || channel->ch_status != IPC_CONNECT) { if(is_set(fsa_input_register, R_HA_DISCONNECTED) == FALSE) { crm_crit("Lost connection to heartbeat service."); } else { crm_info("Lost connection to heartbeat service."); } trigger_fsa(fsa_source); stay_connected = FALSE; } return stay_connected; } #endif void crmd_ha_connection_destroy(gpointer user_data) { crm_debug_3("Invoked"); if(is_set(fsa_input_register, R_HA_DISCONNECTED)) { /* we signed out, so this is expected */ crm_info("Heartbeat disconnection complete"); return; } crm_crit("Lost connection to heartbeat service!"); register_fsa_input(C_HA_DISCONNECT, I_ERROR, NULL); trigger_fsa(fsa_source); } void crmd_ha_msg_filter(xmlNode *msg) { ha_msg_input_t new_input; const char *from = crm_element_value(msg, F_ORIG); const char *seq = crm_element_value(msg, F_SEQ); const char *op = crm_element_value(msg, F_CRM_TASK); const char *sys_to = crm_element_value(msg, F_CRM_SYS_TO); const char *sys_from = crm_element_value(msg, F_CRM_SYS_FROM); new_input.msg = NULL; if(safe_str_eq(sys_to, CRM_SYSTEM_DC) && AM_I_DC == FALSE) { crm_debug_2("Ignoring message for the DC [F_SEQ=%s]", seq); return; } else if(safe_str_eq(sys_from, CRM_SYSTEM_DC)) { if(AM_I_DC && safe_str_neq(from, fsa_our_uname)) { crm_err("Another DC detected: %s (op=%s)", from, op); /* make sure the election happens NOW */ if(fsa_state != S_ELECTION) { new_input.msg = msg; register_fsa_error_adv( C_FSA_INTERNAL, I_ELECTION, NULL, &new_input, __FUNCTION__); } } else { crm_debug_2("Processing DC message from %s [F_SEQ=%s]", from, seq); } } if(new_input.msg == NULL) { crm_log_xml(LOG_MSG, "HA[inbound]", msg); route_message(C_HA_MESSAGE, msg); } trigger_fsa(fsa_source); } #if SUPPORT_HEARTBEAT void crmd_ha_msg_callback(HA_Message *hamsg, void* private_data) { int level = LOG_DEBUG; oc_node_t *from_node = NULL; xmlNode *msg = convert_ha_message(NULL, hamsg, __FUNCTION__); const char *from = crm_element_value(msg, F_ORIG); const char *op = crm_element_value(msg, F_CRM_TASK); const char *sys_from = crm_element_value(msg, F_CRM_SYS_FROM); CRM_CHECK(from != NULL, crm_log_xml_err(msg, "anon"); goto bail); crm_debug_2("HA[inbound]: %s from %s", op, from); if(crm_peer_cache == NULL || crm_active_members() == 0) { crm_debug("Ignoring HA messages until we are" " connected to the CCM (%s op from %s)", op, from); crm_log_xml(LOG_MSG, "HA[inbound]: Ignore (No CCM)", msg); goto bail; } from_node = g_hash_table_lookup(crm_peer_cache, from); if(from_node == NULL) { if(safe_str_eq(op, CRM_OP_VOTE)) { level = LOG_WARNING; } else if(AM_I_DC && safe_str_eq(op, CRM_OP_JOIN_ANNOUNCE)) { level = LOG_WARNING; } else if(safe_str_eq(sys_from, CRM_SYSTEM_DC)) { level = LOG_WARNING; } do_crm_log(level, "Ignoring HA message (op=%s) from %s: not in our" " membership list (size=%d)", op, from, crm_active_members()); crm_log_xml(LOG_MSG, "HA[inbound]: CCM Discard", msg); } else { crmd_ha_msg_filter(msg); return; } bail: free_xml(msg); return; } #endif /* * Apparently returning TRUE means "stay connected, keep doing stuff". * Returning FALSE means "we're all done, close the connection" */ gboolean crmd_ipc_msg_callback(IPC_Channel *client, gpointer user_data) { int lpc = 0; xmlNode *msg = NULL; crmd_client_t *curr_client = (crmd_client_t*)user_data; gboolean stay_connected = TRUE; crm_debug_2("Invoked: %s", curr_client->table_key); while(IPC_ISRCONN(client)) { if(client->ops->is_message_pending(client) == 0) { break; } msg = xmlfromIPC(client, 0); if (msg == NULL) { break; } lpc++; crm_debug_2("Processing msg from %s", curr_client->table_key); crm_log_xml(LOG_DEBUG_2, "CRMd[inbound]", msg); if(crmd_authorize_message(msg, curr_client)) { route_message(C_IPC_MESSAGE, msg); } free_xml(msg); msg = NULL; if(client->ch_status != IPC_CONNECT) { break; } } crm_debug_2("Processed %d messages", lpc); if (client->ch_status != IPC_CONNECT) { stay_connected = FALSE; process_client_disconnect(curr_client); } trigger_fsa(fsa_source); return stay_connected; } extern GCHSource *lrm_source; gboolean lrm_dispatch(IPC_Channel *src_not_used, gpointer user_data) { /* ?? src == lrm_channel ?? */ ll_lrm_t *lrm = (ll_lrm_t*)user_data; IPC_Channel *lrm_channel = lrm->lrm_ops->ipcchan(lrm); crm_debug_3("Invoked"); lrm->lrm_ops->rcvmsg(lrm, FALSE); if(lrm_channel->ch_status != IPC_CONNECT) { if(is_set(fsa_input_register, R_LRM_CONNECTED)) { crm_crit("LRM Connection failed"); register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL); clear_bit_inplace(fsa_input_register, R_LRM_CONNECTED); } else { crm_info("LRM Connection disconnected"); } lrm_source = NULL; return FALSE; } return TRUE; } extern gboolean process_lrm_event(lrm_op_t *op); void lrm_op_callback(lrm_op_t* op) { CRM_CHECK(op != NULL, return); process_lrm_event(op); } void crmd_ha_status_callback(const char *node, const char *status, void *private) { xmlNode *update = NULL; crm_node_t *member = NULL; crm_notice("Status update: Node %s now has status [%s] (DC=%s)", node, status, AM_I_DC?"true":"false"); member = g_hash_table_lookup(crm_peer_cache, node); if(member == NULL) { /* Make sure it is created so crm_update_peer_proc() succeeds */ const char *uuid = get_uuid(node); member = crm_update_peer(0, 0, -1, 0, uuid, node, NULL, NULL); } if(safe_str_eq(status, PINGSTATUS)) { return; } if(safe_str_eq(status, DEADSTATUS)) { /* this node is toast */ crm_update_peer_proc(node, crm_proc_ais, OFFLINESTATUS); if(AM_I_DC) { update = create_node_state( node, DEADSTATUS, XML_BOOLEAN_NO, OFFLINESTATUS, CRMD_JOINSTATE_DOWN, NULL, TRUE, __FUNCTION__); } } else { crm_update_peer_proc(node, crm_proc_ais, ONLINESTATUS); if(AM_I_DC) { update = create_node_state( node, ACTIVESTATUS, NULL, NULL, NULL, NULL, FALSE, __FUNCTION__); } } trigger_fsa(fsa_source); if(update != NULL) { fsa_cib_anon_update( XML_CIB_TAG_STATUS, update, cib_scope_local|cib_quorum_override); free_xml(update); } } void crmd_client_status_callback(const char * node, const char * client, const char * status, void * private) { const char *join = NULL; crm_node_t *member = NULL; xmlNode *update = NULL; gboolean clear_shutdown = FALSE; crm_debug_3("Invoked"); if(safe_str_neq(client, CRM_SYSTEM_CRMD)) { return; } if(safe_str_eq(status, JOINSTATUS)){ status = ONLINESTATUS; clear_shutdown = TRUE; } else if(safe_str_eq(status, LEAVESTATUS)){ status = OFFLINESTATUS; join = CRMD_JOINSTATE_DOWN; /* clear_shutdown = TRUE; */ } set_bit_inplace(fsa_input_register, R_PEER_DATA); crm_notice("Status update: Client %s/%s now has status [%s] (DC=%s)", node, client, status, AM_I_DC?"true":"false"); if(safe_str_eq(status, ONLINESTATUS)) { /* remove the cached value in case it changed */ crm_debug_2("Uncaching UUID for %s", node); unget_uuid(node); } member = g_hash_table_lookup(crm_peer_cache, node); if(member == NULL) { /* Make sure it is created so crm_update_peer_proc() succeeds */ const char *uuid = get_uuid(node); member = crm_update_peer(0, 0, -1, 0, uuid, node, NULL, NULL); } crm_update_peer_proc(node, crm_proc_crmd, status); if(is_set(fsa_input_register, R_CIB_CONNECTED) == FALSE) { return; } else if(fsa_state == S_STOPPING) { return; } if(safe_str_eq(node, fsa_our_dc) && safe_str_eq(status, OFFLINESTATUS)){ /* did our DC leave us */ crm_info("Got client status callback - our DC is dead"); register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ELECTION, NULL); } else if(AM_I_DC == FALSE) { crm_info("Not the DC"); } else { - crm_debug_3("Got client status callback"); - update = create_node_state(node, NULL, NULL, status, join, - NULL, clear_shutdown, __FUNCTION__); - - if(safe_str_eq(status, ONLINESTATUS)){ - crm_xml_add(update, XML_CIB_ATTR_REPLACE, XML_CIB_TAG_LRM","XML_TAG_TRANSIENT_NODEATTRS","); - } - - fsa_cib_anon_update( - XML_CIB_TAG_STATUS, update, cib_scope_local|cib_quorum_override); - free_xml(update); + crm_debug_3("Got client status callback"); - if(safe_str_eq(status, OFFLINESTATUS)) { - erase_node_from_join(node); - check_join_state(fsa_state, __FUNCTION__); - } + if(fsa_cib_conn != NULL && safe_str_eq(status, ONLINESTATUS)) { + erase_status_tag(fsa_our_uname, XML_CIB_TAG_LRM); + erase_status_tag(fsa_our_uname, XML_TAG_TRANSIENT_NODEATTRS); + } + + update = create_node_state( + node, NULL, NULL, status, join, NULL, clear_shutdown, __FUNCTION__); + + fsa_cib_anon_update( + XML_CIB_TAG_STATUS, update, cib_scope_local|cib_quorum_override); + free_xml(update); + + if(safe_str_eq(status, OFFLINESTATUS)) { + erase_node_from_join(node); + check_join_state(fsa_state, __FUNCTION__); + } } trigger_fsa(fsa_source); } void crmd_ipc_connection_destroy(gpointer user_data) { GCHSource *source = NULL; crmd_client_t *client = user_data; /* Calling this function on an _active_ connection results in: * crmd_ipc_connection_destroy (callbacks.c:431) * -> G_main_del_IPC_Channel (GSource.c:478) * -> g_source_unref * -> G_CH_destroy_int (GSource.c:647) * -> crmd_ipc_connection_destroy (callbacks.c:437)\ * * A better alternative is to call G_main_del_IPC_Channel() directly */ if(client == NULL) { crm_debug_4("No client to delete"); return; } crm_debug_2("Disconnecting client %s (%p)", client->table_key, client); source = client->client_source; client->client_source = NULL; if(source != NULL) { crm_debug_3("Deleting %s (%p) from mainloop", client->table_key, source); G_main_del_IPC_Channel(source); } crm_free(client->table_key); crm_free(client->sub_sys); crm_free(client->uuid); crm_free(client); return; } gboolean crmd_client_connect(IPC_Channel *client_channel, gpointer user_data) { crm_debug_3("Invoked"); if (client_channel == NULL) { crm_err("Channel was NULL"); } else if (client_channel->ch_status == IPC_DISCONNECT) { crm_err("Channel was disconnected"); } else { crmd_client_t *blank_client = NULL; crm_debug_3("Channel connected"); crm_malloc0(blank_client, sizeof(crmd_client_t)); CRM_ASSERT(blank_client != NULL); crm_debug_2("Created client: %p", blank_client); client_channel->ops->set_recv_qlen(client_channel, 1024); client_channel->ops->set_send_qlen(client_channel, 1024); blank_client->client_channel = client_channel; blank_client->sub_sys = NULL; blank_client->uuid = NULL; blank_client->table_key = NULL; blank_client->client_source = G_main_add_IPC_Channel( G_PRIORITY_LOW, client_channel, FALSE, crmd_ipc_msg_callback, blank_client, crmd_ipc_connection_destroy); } return TRUE; } #if SUPPORT_HEARTBEAT static gboolean fsa_have_quorum = FALSE; gboolean ccm_dispatch(int fd, gpointer user_data) { int rc = 0; oc_ev_t *ccm_token = (oc_ev_t*)user_data; gboolean was_error = FALSE; crm_debug_3("Invoked"); rc = oc_ev_handle_event(ccm_token); if(rc != 0) { if(is_set(fsa_input_register, R_CCM_DISCONNECTED) == FALSE) { /* we signed out, so this is expected */ register_fsa_input(C_CCM_CALLBACK, I_ERROR, NULL); crm_err("CCM connection appears to have failed: rc=%d.", rc); } was_error = TRUE; } trigger_fsa(fsa_source); return !was_error; } void crmd_ccm_msg_callback( oc_ed_t event, void *cookie, size_t size, const void *data) { gboolean update_cache = FALSE; const oc_ev_membership_t *membership = data; gboolean update_quorum = FALSE; crm_debug_3("Invoked"); CRM_ASSERT(data != NULL); crm_info("Quorum %s after event=%s (id=%d)", ccm_have_quorum(event)?"(re)attained":"lost", ccm_event_name(event), membership->m_instance); if(crm_peer_seq > membership->m_instance) { crm_err("Membership instance ID went backwards! %llu->%d", crm_peer_seq, membership->m_instance); CRM_ASSERT(crm_peer_seq <= membership->m_instance); return; } /* * OC_EV_MS_NEW_MEMBERSHIP: membership with quorum * OC_EV_MS_MS_INVALID: membership without quorum * OC_EV_MS_NOT_PRIMARY: previous membership no longer valid * OC_EV_MS_PRIMARY_RESTORED: previous membership restored * OC_EV_MS_EVICTED: the client is evicted from ccm. */ switch(event) { case OC_EV_MS_NEW_MEMBERSHIP: case OC_EV_MS_INVALID: update_cache = TRUE; update_quorum = TRUE; break; case OC_EV_MS_NOT_PRIMARY: break; case OC_EV_MS_PRIMARY_RESTORED: update_cache = TRUE; crm_peer_seq = membership->m_instance; break; case OC_EV_MS_EVICTED: update_quorum = TRUE; register_fsa_input(C_FSA_INTERNAL, I_STOP, NULL); crm_err("Shutting down after CCM event: %s", ccm_event_name(event)); break; default: crm_err("Unknown CCM event: %d", event); } if(update_quorum) { crm_have_quorum = ccm_have_quorum(event); crm_update_quorum(crm_have_quorum); if(crm_have_quorum == FALSE) { /* did we just loose quorum? */ if(fsa_have_quorum) { crm_info("Quorum lost: %s", ccm_event_name(event)); } } } if(update_cache) { crm_debug_2("Updating cache after event %s", ccm_event_name(event)); do_ccm_update_cache(C_CCM_CALLBACK, fsa_state, event, data, NULL); } else if(event != OC_EV_MS_NOT_PRIMARY) { crm_peer_seq = membership->m_instance; register_fsa_action(A_TE_CANCEL); } oc_ev_callback_done(cookie); return; } #endif void crmd_cib_connection_destroy(gpointer user_data) { crm_debug_3("Invoked"); trigger_fsa(fsa_source); if(is_set(fsa_input_register, R_CIB_CONNECTED) == FALSE) { crm_info("Connection to the CIB terminated..."); return; } /* eventually this will trigger a reconnect, not a shutdown */ crm_err("Connection to the CIB terminated..."); register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL); clear_bit_inplace(fsa_input_register, R_CIB_CONNECTED); return; } longclock_t fsa_start = 0; longclock_t fsa_stop = 0; longclock_t fsa_diff = 0; gboolean crm_fsa_trigger(gpointer user_data) { unsigned int fsa_diff_ms = 0; if(fsa_diff_max_ms > 0) { fsa_start = time_longclock(); } crm_debug_2("Invoked (queue len: %d)", g_list_length(fsa_message_queue)); s_crmd_fsa(C_FSA_INTERNAL); crm_debug_2("Exited (queue len: %d)", g_list_length(fsa_message_queue)); if(fsa_diff_max_ms > 0) { fsa_stop = time_longclock(); fsa_diff = sub_longclock(fsa_stop, fsa_start); fsa_diff_ms = longclockto_ms(fsa_diff); if(fsa_diff_ms > fsa_diff_max_ms) { crm_err("FSA took %dms to complete", fsa_diff_ms); } else if(fsa_diff_ms > fsa_diff_warn_ms) { crm_warn("FSA took %dms to complete", fsa_diff_ms); } } return TRUE; } diff --git a/crmd/crmd_utils.h b/crmd/crmd_utils.h index 18fed589b1..b3a1184574 100644 --- a/crmd/crmd_utils.h +++ b/crmd/crmd_utils.h @@ -1,95 +1,96 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef CRMD_UTILS__H #define CRMD_UTILS__H #include #include #define CLIENT_EXIT_WAIT 30 extern void process_client_disconnect(crmd_client_t *curr_client); #define fsa_cib_update(section, data, options, call_id) \ if(fsa_cib_conn != NULL) { \ call_id = fsa_cib_conn->cmds->modify( \ fsa_cib_conn, section, data, options); \ \ } else { \ crm_err("No CIB connection available"); \ } #define fsa_cib_anon_update(section, data, options) \ if(fsa_cib_conn != NULL) { \ fsa_cib_conn->cmds->modify( \ fsa_cib_conn, section, data, options); \ \ } else { \ crm_err("No CIB connection available"); \ } extern gboolean fsa_has_quorum; extern int last_peer_update; extern gboolean crm_timer_stop (fsa_timer_t *timer); extern gboolean crm_timer_start(fsa_timer_t *timer); extern gboolean crm_timer_popped(gpointer data); extern xmlNode *create_node_state( const char *uname, const char *ha_state, const char *ccm_state, const char *crmd_state, const char *join_state, const char *exp_state, gboolean clear_shutdown, const char *src); extern void create_node_entry( const char *uuid, const char *uname, const char *type); extern gboolean stop_subsystem ( struct crm_subsystem_s *centry, gboolean force_quit); extern gboolean start_subsystem(struct crm_subsystem_s *centry); extern lrm_op_t *copy_lrm_op(const lrm_op_t *op); extern lrm_rsc_t *copy_lrm_rsc(const lrm_rsc_t *rsc); extern void fsa_dump_actions(long long action, const char *text); extern void fsa_dump_inputs( int log_level, const char *text, long long input_register); extern void update_dc(xmlNode *msg, gboolean assert_same); extern void erase_node_from_join(const char *node); extern void populate_cib_nodes(gboolean with_client_status); extern void crm_update_quorum(gboolean bool); +extern void erase_status_tag(const char *uname, const char *tag); #define start_transition(state) do { \ switch(state) { \ case S_TRANSITION_ENGINE: \ register_fsa_action(A_TE_CANCEL); \ break; \ case S_POLICY_ENGINE: \ case S_IDLE: \ register_fsa_input(C_FSA_INTERNAL, I_PE_CALC, NULL); \ break; \ default: \ crm_debug("NOT starting a new transition in state %s", \ fsa_state2string(fsa_state)); \ break; \ } \ } while(0) #endif diff --git a/crmd/join_dc.c b/crmd/join_dc.c index e23488d1b3..a57007e4ae 100644 --- a/crmd/join_dc.c +++ b/crmd/join_dc.c @@ -1,678 +1,679 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include GHashTable *welcomed_nodes = NULL; GHashTable *integrated_nodes = NULL; GHashTable *finalized_nodes = NULL; GHashTable *confirmed_nodes = NULL; char *max_epoch = NULL; char *max_generation_from = NULL; xmlNode *max_generation_xml = NULL; void initialize_join(gboolean before); gboolean finalize_join_for(gpointer key, gpointer value, gpointer user_data); void finalize_sync_callback(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data); gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source); void finalize_join(const char *caller); static int current_join_id = 0; unsigned long long saved_ccm_membership_id = 0; static void update_attrd(void) { static IPC_Channel *attrd = NULL; if(attrd == NULL) { crm_info("Connecting to attrd..."); attrd = init_client_ipc_comms_nodispatch(T_ATTRD); } if(attrd != NULL) { xmlNode *update = create_xml_node(NULL, __FUNCTION__); crm_xml_add(update, F_TYPE, T_ATTRD); crm_xml_add(update, F_ORIG, "crmd"); crm_xml_add(update, "task", "refresh"); if(send_ipc_message(attrd, update) == FALSE) { crm_err("attrd refresh failed"); attrd = NULL; } else { crm_debug("sent attrd refresh"); } free_xml(update); } else { crm_info("Couldn't connect to attrd this time"); } } void initialize_join(gboolean before) { /* clear out/reset a bunch of stuff */ crm_debug("join-%d: Initializing join data (flag=%s)", current_join_id, before?"true":"false"); g_hash_table_destroy(welcomed_nodes); g_hash_table_destroy(integrated_nodes); g_hash_table_destroy(finalized_nodes); g_hash_table_destroy(confirmed_nodes); if(before) { if(max_generation_from != NULL) { crm_free(max_generation_from); max_generation_from = NULL; } if(max_generation_xml != NULL) { free_xml(max_generation_xml); max_generation_xml = NULL; } clear_bit_inplace(fsa_input_register, R_HAVE_CIB); clear_bit_inplace(fsa_input_register, R_CIB_ASKED); } welcomed_nodes = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); integrated_nodes = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); finalized_nodes = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); confirmed_nodes = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); } void erase_node_from_join(const char *uname) { gboolean w = FALSE, i = FALSE, f = FALSE, c = FALSE; if(uname == NULL) { return; } if(welcomed_nodes != NULL) { w = g_hash_table_remove(welcomed_nodes, uname); } if(integrated_nodes != NULL) { i = g_hash_table_remove(integrated_nodes, uname); } if(finalized_nodes != NULL) { f = g_hash_table_remove(finalized_nodes, uname); } if(confirmed_nodes != NULL) { c = g_hash_table_remove(confirmed_nodes, uname); } crm_info("Removed dead node %s from join calculations:" " welcomed=%d itegrated=%d finalized=%d confirmed=%d", uname, w, i, f, c); } static void join_make_offer(gpointer key, gpointer value, gpointer user_data) { const char *join_to = NULL; const crm_node_t *member = value; CRM_ASSERT(member != NULL); if(crm_is_member_active(member) == FALSE) { return; } join_to = member->uname; if(join_to == NULL) { crm_err("No recipient for welcome message"); return; } erase_node_from_join(join_to); if(saved_ccm_membership_id != crm_peer_seq) { saved_ccm_membership_id = crm_peer_seq; crm_info("Making join offers based on membership %llu", crm_peer_seq); } if(member->processes & crm_proc_crmd) { xmlNode *offer = create_request( CRM_OP_JOIN_OFFER, NULL, join_to, CRM_SYSTEM_CRMD, CRM_SYSTEM_DC, NULL); char *join_offered = crm_itoa(current_join_id); crm_xml_add_int(offer, F_CRM_JOIN_ID, current_join_id); /* send the welcome */ crm_debug("join-%d: Sending offer to %s", current_join_id, join_to); send_msg_via_ha(offer); free_xml(offer); g_hash_table_insert( welcomed_nodes, crm_strdup(join_to), join_offered); } else { crm_info("Peer process on %s is not active (yet?): %.8lx %d", join_to, (long)member->processes, g_hash_table_size(crm_peer_cache)); } } /* A_DC_JOIN_OFFER_ALL */ void do_dc_join_offer_all(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { /* reset everyones status back to down or in_ccm in the CIB * * any nodes that are active in the CIB but not in the CCM list * will be seen as offline by the PE anyway */ current_join_id++; initialize_join(TRUE); /* do_update_cib_nodes(TRUE, __FUNCTION__); */ update_dc(NULL, FALSE); if(cause == C_HA_MESSAGE && current_input == I_NODE_JOIN) { crm_info("A new node joined the cluster"); } g_hash_table_foreach(crm_peer_cache, join_make_offer, NULL); /* dont waste time by invoking the PE yet; */ crm_info("join-%d: Waiting on %d outstanding join acks", current_join_id, g_hash_table_size(welcomed_nodes)); } /* A_DC_JOIN_OFFER_ONE */ void do_dc_join_offer_one(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { crm_node_t *member; ha_msg_input_t *welcome = fsa_typed_data(fsa_dt_ha_msg); const char *join_to = NULL; if(welcome == NULL) { crm_err("Attempt to send welcome message " "without a message to reply to!"); return; } join_to = crm_element_value(welcome->msg, F_CRM_HOST_FROM); if(join_to != NULL && (cur_state == S_INTEGRATION || cur_state == S_FINALIZE_JOIN)) { /* note: it _is_ possible that a node will have been * sick or starting up when the original offer was made. * however, it will either re-announce itself in due course * _or_ we can re-store the original offer on the client. */ crm_debug("Re-offering membership to %s...", join_to); } crm_info("join-%d: Processing annouce request from %s in state %s", current_join_id, join_to, fsa_state2string(cur_state)); /* always offer to the DC (ourselves) * this ensures the correct value for max_generation_from */ member = g_hash_table_lookup(crm_peer_cache, fsa_our_uname); join_make_offer(NULL, member, NULL); member = g_hash_table_lookup(crm_peer_cache, join_to); join_make_offer(NULL, member, NULL); /* this was a genuine join request, cancel any existing * transition and invoke the PE */ start_transition(fsa_state); /* dont waste time by invoking the pe yet; */ crm_debug("Waiting on %d outstanding join acks for join-%d", g_hash_table_size(welcomed_nodes), current_join_id); } /* A_DC_JOIN_PROCESS_REQ */ void do_dc_join_filter_offer(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { xmlNode *generation = NULL; int cmp = 0; int join_id = -1; gboolean ack_nack_bool = TRUE; const char *ack_nack = CRMD_JOINSTATE_MEMBER; ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg); const char *join_from = crm_element_value(join_ack->msg,F_CRM_HOST_FROM); const char *ref = crm_element_value(join_ack->msg,XML_ATTR_REFERENCE); gpointer join_node = g_hash_table_lookup( crm_peer_cache, join_from); crm_debug("Processing req from %s", join_from); generation = join_ack->xml; crm_element_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id); if(max_generation_xml != NULL && generation != NULL) { cmp = cib_compare_generation(max_generation_xml, generation); } if(join_node == NULL) { crm_err("Node %s is not a member", join_from); ack_nack_bool = FALSE; } else if(generation == NULL) { crm_err("Generation was NULL"); ack_nack_bool = FALSE; } else if(join_id != current_join_id) { crm_debug("Invalid response from %s: join-%d vs. join-%d", join_from, join_id, current_join_id); check_join_state(cur_state, __FUNCTION__); return; } else if(max_generation_xml == NULL) { max_generation_xml = copy_xml(generation); max_generation_from = crm_strdup(join_from); } else if(cmp < 0 || (cmp == 0 && safe_str_eq(join_from, fsa_our_uname))) { crm_debug("%s has a better generation number than" " the current max %s", join_from, max_generation_from); if(max_generation_xml) { crm_log_xml_debug(max_generation_xml, "Max generation"); } crm_log_xml_debug(generation, "Their generation"); crm_free(max_generation_from); free_xml(max_generation_xml); max_generation_from = crm_strdup(join_from); max_generation_xml = copy_xml(join_ack->xml); } if(ack_nack_bool == FALSE) { /* NACK this client */ ack_nack = CRMD_JOINSTATE_DOWN; crm_err("join-%d: NACK'ing node %s (ref %s)", join_id, join_from, ref); } else { crm_debug("join-%d: Welcoming node %s (ref %s)", join_id, join_from, ref); } /* add them to our list of CRMD_STATE_ACTIVE nodes */ g_hash_table_insert( integrated_nodes, crm_strdup(join_from), crm_strdup(ack_nack)); crm_debug("%u nodes have been integrated into join-%d", g_hash_table_size(integrated_nodes), join_id); g_hash_table_remove(welcomed_nodes, join_from); if(check_join_state(cur_state, __FUNCTION__) == FALSE) { /* dont waste time by invoking the PE yet; */ crm_debug("join-%d: Still waiting on %d outstanding offers", join_id, g_hash_table_size(welcomed_nodes)); } } /* A_DC_JOIN_FINALIZE */ void do_dc_join_finalize(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { enum cib_errors rc = cib_ok; /* This we can do straight away and avoid clients timing us out * while we compute the latest CIB */ crm_debug("Finializing join-%d for %d clients", current_join_id, g_hash_table_size(integrated_nodes)); if(g_hash_table_size(integrated_nodes) == 0) { return; } clear_bit_inplace(fsa_input_register, R_HAVE_CIB); if(max_generation_from == NULL || safe_str_eq(max_generation_from, fsa_our_uname)){ set_bit_inplace(fsa_input_register, R_HAVE_CIB); } if(is_set(fsa_input_register, R_IN_TRANSITION)) { crm_warn("join-%d: We are still in a transition." " Delaying until the TE completes.", current_join_id); crmd_fsa_stall(NULL); return; } if(is_set(fsa_input_register, R_HAVE_CIB) == FALSE) { /* ask for the agreed best CIB */ crm_info("join-%d: Asking %s for its copy of the CIB", current_join_id, crm_str(max_generation_from)); crm_log_xml_debug(max_generation_xml, "Requesting version"); set_bit_inplace(fsa_input_register, R_CIB_ASKED); rc = fsa_cib_conn->cmds->sync_from( fsa_cib_conn, max_generation_from, NULL, cib_quorum_override); fsa_cib_conn->cmds->register_callback( fsa_cib_conn, rc, 60, FALSE, crm_strdup(max_generation_from), "finalize_sync_callback", finalize_sync_callback); return; } else { /* Send _our_ CIB out to everyone */ fsa_cib_conn->cmds->sync_from( fsa_cib_conn, fsa_our_uname, NULL,cib_quorum_override); update_attrd(); } finalize_join(__FUNCTION__); } void finalize_sync_callback(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) { CRM_DEV_ASSERT(cib_not_master != rc); clear_bit_inplace(fsa_input_register, R_CIB_ASKED); if(rc != cib_ok) { do_crm_log((rc==cib_old_data?LOG_WARNING:LOG_ERR), "Sync from %s resulted in an error: %s", (char*)user_data, cib_error2string(rc)); /* restart the whole join process */ register_fsa_error_adv( C_FSA_INTERNAL, I_ELECTION_DC,NULL,NULL,__FUNCTION__); } else if(AM_I_DC && fsa_state == S_FINALIZE_JOIN) { finalize_join(__FUNCTION__); update_attrd(); } else { crm_debug("No longer the DC in S_FINALIZE_JOIN: %s/%s", AM_I_DC?"DC":"CRMd", fsa_state2string(fsa_state)); } crm_free(user_data); } void finalize_join(const char *caller) { xmlNode *cib = createEmptyCib(); set_bit_inplace(fsa_input_register, R_HAVE_CIB); clear_bit_inplace(fsa_input_register, R_CIB_ASKED); set_uuid(cib, XML_ATTR_DC_UUID, fsa_our_uname); crm_debug_3("Update %s in the CIB to our uuid: %s", XML_ATTR_DC_UUID, crm_element_value(cib, XML_ATTR_DC_UUID)); fsa_cib_anon_update(NULL, cib, cib_quorum_override); free_xml(cib); crm_debug_3("Syncing to %d clients", g_hash_table_size(finalized_nodes)); /* make sure dc_uuid is re-set to us */ if(check_join_state(fsa_state, caller) == FALSE) { crm_debug("Notifying %d clients of join-%d results", g_hash_table_size(integrated_nodes), current_join_id); g_hash_table_foreach_remove( integrated_nodes, finalize_join_for, NULL); } } static void join_update_complete_callback(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) { fsa_data_t *msg_data = NULL; if(rc == cib_ok) { crm_debug("Join update %d complete", call_id); check_join_state(fsa_state, __FUNCTION__); } else { crm_err("Join update %d failed", call_id); crm_log_xml(LOG_DEBUG, "failed", msg); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } /* A_DC_JOIN_PROCESS_ACK */ void do_dc_join_ack(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { int join_id = -1; int call_id = 0; ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg); const char *join_id_s = NULL; const char *join_state = NULL; const char *op = crm_element_value(join_ack->msg, F_CRM_TASK); const char *join_from = crm_element_value(join_ack->msg, F_CRM_HOST_FROM); if(safe_str_neq(op, CRM_OP_JOIN_CONFIRM)) { crm_debug("Ignoring op=%s message from %s", op, join_from); return; } crm_element_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id); join_id_s = crm_element_value(join_ack->msg, F_CRM_JOIN_ID); /* now update them to "member" */ crm_debug_2("Processing ack from %s", join_from); join_state = (const char *) g_hash_table_lookup(finalized_nodes, join_from); if(join_state == NULL) { crm_err("Join not in progress: ignoring join-%d from %s", join_id, join_from); return; } else if(safe_str_neq(join_state, CRMD_JOINSTATE_MEMBER)) { crm_err("Node %s wasnt invited to join the cluster",join_from); g_hash_table_remove(finalized_nodes, join_from); return; } else if(join_id != current_join_id) { crm_err("Invalid response from %s: join-%d vs. join-%d", join_from, join_id, current_join_id); g_hash_table_remove(finalized_nodes, join_from); return; } g_hash_table_remove(finalized_nodes, join_from); if(g_hash_table_lookup(confirmed_nodes, join_from) != NULL) { crm_err("join-%d: hash already contains confirmation from %s", join_id, join_from); } g_hash_table_insert( confirmed_nodes, crm_strdup(join_from), crm_strdup(join_id_s)); crm_info("join-%d: Updating node state to %s for %s", join_id, CRMD_JOINSTATE_MEMBER, join_from); /* update CIB with the current LRM status from the node * We dont need to notify the TE of these updates, a transition will * be started in due time */ + erase_status_tag(join_from, XML_CIB_TAG_LRM); fsa_cib_update(XML_CIB_TAG_STATUS, join_ack->xml, cib_scope_local|cib_quorum_override, call_id); add_cib_op_callback( fsa_cib_conn, call_id, FALSE, NULL, join_update_complete_callback); crm_debug("join-%d: Registered callback for LRM update %d", join_id, call_id); } gboolean finalize_join_for(gpointer key, gpointer value, gpointer user_data) { const char *join_to = NULL; const char *join_state = NULL; xmlNode *acknak = NULL; if(key == NULL || value == NULL) { return TRUE; } join_to = (const char *)key; join_state = (const char *)value; /* make sure the node exists in the config section */ create_node_entry(join_to, join_to, NORMALNODE); /* send the ack/nack to the node */ acknak = create_request( CRM_OP_JOIN_ACKNAK, NULL, join_to, CRM_SYSTEM_CRMD, CRM_SYSTEM_DC, NULL); crm_xml_add_int(acknak, F_CRM_JOIN_ID, current_join_id); /* set the ack/nack */ if(safe_str_eq(join_state, CRMD_JOINSTATE_MEMBER)) { crm_debug("join-%d: ACK'ing join request from %s, state %s", current_join_id, join_to, join_state); crm_xml_add(acknak, CRM_OP_JOIN_ACKNAK, XML_BOOLEAN_TRUE); g_hash_table_insert( finalized_nodes, crm_strdup(join_to), crm_strdup(CRMD_JOINSTATE_MEMBER)); } else { crm_warn("join-%d: NACK'ing join request from %s, state %s", current_join_id, join_to, join_state); crm_xml_add(acknak, CRM_OP_JOIN_ACKNAK, XML_BOOLEAN_FALSE); } send_msg_via_ha(acknak); free_xml(acknak); return TRUE; } void ghash_print_node(gpointer key, gpointer value, gpointer user_data); gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source) { crm_debug("Invoked by %s in state: %s", source, fsa_state2string(cur_state)); if(saved_ccm_membership_id != crm_peer_seq) { crm_info("%s: Membership changed since join started: %llu -> %llu", source, saved_ccm_membership_id, crm_peer_seq); register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL); } else if(cur_state == S_INTEGRATION) { if(g_hash_table_size(welcomed_nodes) == 0) { crm_debug("join-%d: Integration of %d peers complete: %s", current_join_id, g_hash_table_size(integrated_nodes), source); register_fsa_input_before( C_FSA_INTERNAL, I_INTEGRATED, NULL); return TRUE; } } else if(cur_state == S_FINALIZE_JOIN) { if(is_set(fsa_input_register, R_HAVE_CIB) == FALSE) { crm_debug("join-%d: Delaying I_FINALIZED until we have the CIB", current_join_id); return TRUE; } else if(g_hash_table_size(integrated_nodes) == 0 && g_hash_table_size(finalized_nodes) == 0) { crm_debug("join-%d complete: %s", current_join_id, source); register_fsa_input_later( C_FSA_INTERNAL, I_FINALIZED, NULL); } else if(g_hash_table_size(integrated_nodes) != 0 && g_hash_table_size(finalized_nodes) != 0) { char *msg = NULL; crm_err("join-%d: Waiting on %d integrated nodes" " AND %d finalized nodes", current_join_id, g_hash_table_size(integrated_nodes), g_hash_table_size(finalized_nodes)); msg = crm_strdup("Integrated node"); g_hash_table_foreach(integrated_nodes, ghash_print_node, msg); crm_free(msg); msg = crm_strdup("Finalized node"); g_hash_table_foreach(finalized_nodes, ghash_print_node, msg); crm_free(msg); } else if(g_hash_table_size(integrated_nodes) != 0) { crm_debug("join-%d: Still waiting on %d integrated nodes", current_join_id, g_hash_table_size(integrated_nodes)); } else if(g_hash_table_size(finalized_nodes) != 0) { crm_debug("join-%d: Still waiting on %d finalized nodes", current_join_id, g_hash_table_size(finalized_nodes)); } } return FALSE; } diff --git a/crmd/lrm.c b/crmd/lrm.c index 72bccea442..180086d853 100644 --- a/crmd/lrm.c +++ b/crmd/lrm.c @@ -1,1892 +1,1888 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include /* for access */ #include #include #include #include #include #include #include #include #include #include #include struct recurring_op_s { char *rsc_id; char *op_key; int call_id; int interval; gboolean remove; gboolean cancelled; }; char *make_stop_id(const char *rsc, int call_id); void cib_rsc_callback(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data); gboolean build_operation_update( xmlNode *rsc_list, lrm_op_t *op, const char *src, int lpc); gboolean build_active_RAs(xmlNode *rsc_list); gboolean is_rsc_active(const char *rsc_id); void do_update_resource(lrm_op_t *op); gboolean process_lrm_event(lrm_op_t *op); void do_lrm_rsc_op(lrm_rsc_t *rsc, const char *operation, xmlNode *msg, xmlNode *request); lrm_op_t *construct_op( xmlNode *rsc_op, const char *rsc_id, const char *operation); void send_direct_ack(const char *to_host, const char *to_sys, lrm_op_t* op, const char *rsc_id); void free_recurring_op(gpointer value); GHashTable *meta_hash = NULL; GHashTable *resources = NULL; GHashTable *pending_ops = NULL; GCHSource *lrm_source = NULL; int num_lrm_register_fails = 0; int max_lrm_register_fails = 30; /* A_LRM_CONNECT */ void do_lrm_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { int ret = HA_OK; if(action & A_LRM_DISCONNECT) { if(verify_stopped(cur_state, LOG_INFO) == FALSE) { crmd_fsa_stall(NULL); return; } if(lrm_source) { crm_debug("Removing LRM connection from MainLoop"); if(G_main_del_IPC_Channel(lrm_source) == FALSE) { crm_err("Could not remove LRM connection" " from MainLoop"); } lrm_source = NULL; } if(fsa_lrm_conn) { fsa_lrm_conn->lrm_ops->signoff(fsa_lrm_conn); crm_info("Disconnected from the LRM"); clear_bit_inplace(fsa_input_register, R_LRM_CONNECTED); } /* TODO: Clean up the hashtable */ } if(action & A_LRM_CONNECT) { ret = HA_OK; pending_ops = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, free_recurring_op); resources = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); if(NULL == fsa_lrm_conn) { register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); ret = HA_FAIL; } if(ret == HA_OK) { crm_debug("Connecting to the LRM"); ret = fsa_lrm_conn->lrm_ops->signon( fsa_lrm_conn, CRM_SYSTEM_CRMD); } if(ret != HA_OK) { if(++num_lrm_register_fails < max_lrm_register_fails) { crm_warn("Failed to sign on to the LRM %d" " (%d max) times", num_lrm_register_fails, max_lrm_register_fails); crm_timer_start(wait_timer); crmd_fsa_stall(NULL); return; } } if(ret == HA_OK) { crm_debug_4("LRM: set_lrm_callback..."); ret = fsa_lrm_conn->lrm_ops->set_lrm_callback( fsa_lrm_conn, lrm_op_callback); if(ret != HA_OK) { crm_err("Failed to set LRM callbacks"); } } if(ret != HA_OK) { crm_err("Failed to sign on to the LRM %d" " (max) times", num_lrm_register_fails); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); return; } /* TODO: create a destroy handler that causes * some recovery to happen */ lrm_source = G_main_add_IPC_Channel( G_PRIORITY_LOW, fsa_lrm_conn->lrm_ops->ipcchan(fsa_lrm_conn), FALSE, lrm_dispatch, fsa_lrm_conn, default_ipc_connection_destroy); set_bit_inplace(fsa_input_register, R_LRM_CONNECTED); crm_debug("LRM connection established"); } if(action & ~(A_LRM_CONNECT|A_LRM_DISCONNECT)) { crm_err("Unexpected action %s in %s", fsa_action2string(action), __FUNCTION__); } } static void ghash_print_pending(gpointer key, gpointer value, gpointer user_data) { const char *stop_id = key; int *log_level = user_data; struct recurring_op_s *pending = value; do_crm_log(*log_level, "Pending action: %s (%s)", stop_id, pending->op_key); } static void ghash_print_pending_for_rsc(gpointer key, gpointer value, gpointer user_data) { const char *stop_id = key; char *rsc = user_data; struct recurring_op_s *pending = value; if(safe_str_eq(rsc, pending->rsc_id)) { do_crm_log(LOG_NOTICE, "%sction %s (%s) incomplete at shutdown", pending->interval==0?"A":"Recurring a", stop_id, pending->op_key); } } static void ghash_count_pending(gpointer key, gpointer value, gpointer user_data) { int *counter = user_data; struct recurring_op_s *pending = value; if(pending->interval > 0) { /* Ignore recurring actions in the shutdown calculations */ return; } (*counter)++; } gboolean verify_stopped(enum crmd_fsa_state cur_state, int log_level) { int counter = 0; gboolean rc = TRUE; GListPtr lrm_list = NULL; crm_debug("Checking for active resources before exit"); if(cur_state == S_TERMINATE) { log_level = LOG_ERR; } g_hash_table_foreach(pending_ops, ghash_count_pending, &counter); if(counter > 0) { rc = FALSE; do_crm_log(log_level, "%d pending LRM operations at shutdown%s", g_hash_table_size(pending_ops), cur_state == S_TERMINATE?"":"... waiting"); if(cur_state == S_TERMINATE || !is_set(fsa_input_register, R_SENT_RSC_STOP)) { g_hash_table_foreach( pending_ops, ghash_print_pending, &log_level); } goto bail; } if(lrm_source != NULL && fsa_lrm_conn != NULL) { lrm_list = fsa_lrm_conn->lrm_ops->get_all_rscs(fsa_lrm_conn); } slist_iter( rsc_id, char, lrm_list, lpc, if(is_rsc_active(rsc_id) == FALSE) { continue; } crm_err("Resource %s was active at shutdown." " You may ignore this error if it is unmanaged.", rsc_id); g_hash_table_foreach( pending_ops, ghash_print_pending_for_rsc, rsc_id); ); bail: set_bit_inplace(fsa_input_register, R_SENT_RSC_STOP); if(cur_state == S_TERMINATE) { rc = TRUE; } return rc; } static const char * get_rsc_metadata(const char *type, const char *class, const char *provider) { int len = 0; char *key = NULL; char *metadata = NULL; if(meta_hash == NULL) { meta_hash = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); } CRM_CHECK(type != NULL, return NULL); CRM_CHECK(class != NULL, return NULL); if(provider == NULL) { provider = "heartbeat"; } len = strlen(type) + strlen(class) + strlen(provider) + 4; crm_malloc0(key, len); sprintf(key, "%s::%s:%s", type, class, provider); key[len-1] = 0; metadata = g_hash_table_lookup(meta_hash, key); if(metadata) { crm_debug_2("Returning cached metadata for %s", key); goto out; } crm_debug("Retreiving metadata for %s", key); metadata = fsa_lrm_conn->lrm_ops->get_rsc_type_metadata( fsa_lrm_conn, class, type, provider); if(metadata) { /* copy the metadata because the LRM likes using * g_alloc instead of cl_malloc */ char *m_copy = crm_strdup(metadata); g_hash_table_insert(meta_hash, key, m_copy); key = NULL; /* prevent it from being free'd */ g_free(metadata); metadata = m_copy; } else { crm_warn("No metadata found for %s", key); } out: crm_free(key); return metadata; } static GListPtr get_rsc_restart_list(lrm_rsc_t *rsc, lrm_op_t *op) { gboolean supported = FALSE; GListPtr restart_list = NULL; const char *value = NULL; const char *metadata_str = get_rsc_metadata( rsc->type, rsc->class, rsc->provider); xmlNode *params = NULL; xmlNode *actions = NULL; xmlNode *metadata = NULL; if(metadata_str == NULL) { return NULL; } metadata = string2xml(metadata_str); if(metadata == NULL) { crm_err("Metadata for %s::%s:%s is not valid XML", rsc->provider, rsc->class, rsc->type); return NULL; } actions = find_xml_node(metadata, "actions", TRUE); xml_child_iter_filter( actions, action, "action", value = crm_element_value(action, "name"); if(safe_str_eq("reload", value)) { supported = TRUE; break; } ); if(supported == FALSE) { goto cleanup; } params = find_xml_node(metadata, "parameters", TRUE); xml_child_iter_filter( params, param, "parameter", value = crm_element_value(param, "unique"); if(crm_is_true(value)) { value = crm_element_value(param, "name"); crm_debug("Attr %s is not reloadable", value); restart_list = g_list_append( restart_list, crm_strdup(value)); } ); cleanup: free_xml(metadata); return restart_list; } static void append_restart_list(xmlNode *update, lrm_op_t *op, const char *version) { int len = 0; char *list = NULL; char *digest = NULL; lrm_rsc_t *rsc = NULL; const char *value = NULL; gboolean non_empty = FALSE; xmlNode *restart = NULL; GListPtr restart_list = NULL; if(op->interval > 0) { /* monitors are not reloadable */ return; } else if(safe_str_neq(CRMD_ACTION_START, op->op_type)) { /* only starts are potentially reloadable */ return; } else if(compare_version("1.0.8", version) > 0) { crm_debug("Caller version %s does not support reloads", version); return; } rsc = fsa_lrm_conn->lrm_ops->get_rsc(fsa_lrm_conn, op->rsc_id); if(rsc == NULL) { crm_info("Resource %s no longer in the LRM", op->rsc_id); return; } restart_list = get_rsc_restart_list(rsc, op); if(restart_list == NULL) { crm_debug("Resource %s does not support reloads", op->rsc_id); return; } restart = create_xml_node(NULL, XML_TAG_PARAMS); slist_iter(param, const char, restart_list, lpc, int start = len; value = g_hash_table_lookup(op->params, param); if(value != NULL) { non_empty = TRUE; crm_xml_add(restart, param, value); } len += strlen(param) + 2; crm_realloc(list, len+1); sprintf(list+start, " %s ", param); ); digest = calculate_xml_digest(restart, TRUE, FALSE); crm_xml_add(update, XML_LRM_ATTR_OP_RESTART, list); crm_xml_add(update, XML_LRM_ATTR_RESTART_DIGEST, digest); crm_debug("%s : %s", digest, list); if(non_empty) { crm_log_xml_debug(restart, "restart digest source"); } slist_destroy(char, child, restart_list, crm_free(child)); free_xml(restart); crm_free(digest); crm_free(list); } gboolean build_operation_update( xmlNode *xml_rsc, lrm_op_t *op, const char *src, int lpc) { char *magic = NULL; const char *task = NULL; xmlNode *xml_op = NULL; char *op_id = NULL; char *local_user_data = NULL; const char *caller_version = NULL; char *digest = NULL; xmlNode *args_xml = NULL; xmlNode *args_parent = NULL; CRM_DEV_ASSERT(op != NULL); if(crm_assert_failed) { return FALSE; } crm_debug_2("%s: Updating resouce %s after %s %s op", src, op->rsc_id, op_status2text(op->op_status), op->op_type); if(op->op_status == LRM_OP_CANCELLED) { crm_debug_3("Ignoring cancelled op"); return TRUE; } if(AM_I_DC) { caller_version = CRM_FEATURE_SET; } else if(fsa_our_dc_version != NULL) { caller_version = fsa_our_dc_version; } else { /* there is a small risk in formerly mixed clusters that * it will be sub-optimal. * however with our upgrade policy, the update we send * should still be completely supported anyway */ caller_version = g_hash_table_lookup( op->params, XML_ATTR_CRM_VERSION); crm_warn("Falling back to operation originator version: %s", caller_version); } crm_debug_3("DC version: %s", caller_version); task = op->op_type; /* remap the task name under various scenarios * this makes life easier for the PE when its trying determin the current state */ if(crm_str_eq(task, "reload", TRUE)) { if(op->op_status == LRM_OP_DONE) { task = CRMD_ACTION_START; } else { task = CRMD_ACTION_STATUS; } } else if(crm_str_eq(task, CRMD_ACTION_MIGRATE, TRUE)) { /* if the migrate_from fails it will have enough info to do the right thing */ if(op->op_status == LRM_OP_DONE) { task = CRMD_ACTION_STOP; } else { task = CRMD_ACTION_STATUS; } } else if(op->op_status == LRM_OP_DONE && crm_str_eq(task, CRMD_ACTION_MIGRATED, TRUE)) { task = CRMD_ACTION_START; } if(safe_str_eq(task, CRMD_ACTION_NOTIFY)) { const char *n_type = g_hash_table_lookup( op->params, crm_meta_name("notify_type")); const char *n_task = g_hash_table_lookup( op->params, crm_meta_name("notify_operation")); #if CRM_DEPRECATED_SINCE_2_0_5 if(n_type == NULL) { n_type = g_hash_table_lookup(op->params, "notify_type"); } if(n_task == NULL) { n_task = g_hash_table_lookup(op->params, "notify_operation"); } #endif CRM_DEV_ASSERT(n_type != NULL); CRM_DEV_ASSERT(n_task != NULL); op_id = generate_notify_key(op->rsc_id, n_type, n_task); /* these are not yet allowed to fail */ op->op_status = LRM_OP_DONE; op->rc = 0; } else { op_id = generate_op_key(op->rsc_id, task, op->interval); } xml_op = find_entity(xml_rsc, XML_LRM_TAG_RSC_OP, op_id); if(xml_op != NULL) { crm_log_xml(LOG_DEBUG, "Replacing existing entry", xml_op); } else { xml_op = create_xml_node(xml_rsc, XML_LRM_TAG_RSC_OP); } crm_xml_add(xml_op, XML_ATTR_ID, op_id); crm_free(op_id); crm_xml_add(xml_op, XML_LRM_ATTR_TASK, task); crm_xml_add(xml_op, XML_ATTR_ORIGIN, src); if(op->user_data == NULL) { char *id = crm_itoa(op->call_id); crm_debug("Generating fake transition key for:" " %s_%s_%d %d from %s", op->rsc_id, op->op_type, op->interval, op->call_id, op->app_name); local_user_data = generate_transition_key(-1, 0, 0, id); op->user_data = local_user_data; crm_free(id); } if(compare_version("1.0.3", caller_version) > 0) { magic = generate_transition_magic_v202( op->user_data, op->op_status); } else { magic = generate_transition_magic( op->user_data, op->op_status, op->rc); } crm_xml_add(xml_op, XML_ATTR_TRANSITION_KEY, op->user_data); crm_xml_add(xml_op, XML_ATTR_TRANSITION_MAGIC, magic); crm_free(magic); switch(op->op_status) { case LRM_OP_PENDING: break; case LRM_OP_CANCELLED: crm_err("What to do here"); break; case LRM_OP_ERROR: case LRM_OP_TIMEOUT: case LRM_OP_NOTSUPPORTED: crm_debug_2("Resource action %s/%s %s: %d", op->rsc_id, task, op_status2text(op->op_status), op->rc); break; case LRM_OP_DONE: break; } crm_xml_add_int(xml_op, XML_LRM_ATTR_CALLID, op->call_id); /* set these on 'xml_rsc' too to make life easy for the PE */ crm_xml_add(xml_op, XML_ATTR_CRM_VERSION, caller_version); crm_xml_add_int(xml_op, XML_LRM_ATTR_RC, op->rc); crm_xml_add_int(xml_op, XML_LRM_ATTR_OPSTATUS, op->op_status); crm_xml_add_int(xml_op, XML_LRM_ATTR_INTERVAL, op->interval); if(compare_version("2.1", caller_version) <= 0) { if(op->t_run || op->t_rcchange || op->exec_time || op->queue_time) { crm_debug_2("Timing data (%s_%s_%d): last=%lu change=%lu exec=%lu queue=%lu", op->rsc_id, op->op_type, op->interval, op->t_run, op->t_rcchange, op->exec_time, op->queue_time); - crm_xml_add_int(xml_op, "last_run", op->t_run); - crm_xml_add_int(xml_op, "last_rc_change", op->t_rcchange); - crm_xml_add_int(xml_op, "exec_time", op->exec_time); - crm_xml_add_int(xml_op, "queue_time", op->queue_time); + crm_xml_add_int(xml_op, "last-run", op->t_run); + crm_xml_add_int(xml_op, "last-rc-change", op->t_rcchange); + crm_xml_add_int(xml_op, "exec-time", op->exec_time); + crm_xml_add_int(xml_op, "queue-time", op->queue_time); } } /* this will enable us to later determin that the * resource's parameters have changed and we should force * a restart */ args_parent = NULL; #if CRM_DEPRECATED_SINCE_2_0_4 if(compare_version("1.0.4", caller_version) > 0) { args_parent = xml_op; } #endif args_xml = create_xml_node(args_parent, XML_TAG_PARAMS); g_hash_table_foreach(op->params, hash2field, args_xml); filter_action_parameters(args_xml, caller_version); digest = calculate_xml_digest(args_xml, TRUE, FALSE); if(op->interval == 0 && safe_str_neq(task, CRMD_ACTION_STOP)) { crm_debug("Calculated digest %s for %s (%s)\n", digest, ID(xml_op), crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC)); crm_log_xml(LOG_DEBUG, "digest:source", args_xml); } crm_xml_add(xml_op, XML_LRM_ATTR_OP_DIGEST, digest); crm_free(digest); if(args_parent == NULL) { free_xml(args_xml); } append_restart_list(xml_op, op, caller_version); if(op->op_status != LRM_OP_DONE && crm_str_eq(op->op_type, CRMD_ACTION_MIGRATED, TRUE)) { const char *host = g_hash_table_lookup( op->params, crm_meta_name("migrate_source_uuid")); crm_xml_add(xml_op, CRMD_ACTION_MIGRATED, host); } if(local_user_data) { crm_free(local_user_data); op->user_data = NULL; } return TRUE; } gboolean is_rsc_active(const char *rsc_id) { GList *op_list = NULL; gboolean active = FALSE; lrm_rsc_t *the_rsc = NULL; state_flag_t cur_state = 0; int max_call_id = -1; if(fsa_lrm_conn == NULL) { return FALSE; } the_rsc = fsa_lrm_conn->lrm_ops->get_rsc(fsa_lrm_conn, rsc_id); crm_debug_3("Processing lrm_rsc_t entry %s", rsc_id); if(the_rsc == NULL) { crm_err("NULL resource returned from the LRM"); return FALSE; } op_list = the_rsc->ops->get_cur_state(the_rsc, &cur_state); crm_debug_3("\tcurrent state:%s",cur_state==LRM_RSC_IDLE?"Idle":"Busy"); slist_iter( op, lrm_op_t, op_list, llpc, crm_debug_2("Processing op %s_%d (%d) for %s (status=%d, rc=%d)", op->op_type, op->interval, op->call_id, the_rsc->id, op->op_status, op->rc); CRM_ASSERT(max_call_id <= op->call_id); if(op->rc == EXECRA_OK && safe_str_eq(op->op_type, CRMD_ACTION_STOP)) { active = FALSE; } else if(op->rc == EXECRA_OK && safe_str_eq(op->op_type, CRMD_ACTION_MIGRATE)) { /* a stricter check is too complex... * leave that to the PE */ active = FALSE; } else if(op->rc == EXECRA_NOT_RUNNING) { active = FALSE; } else { active = TRUE; } max_call_id = op->call_id; lrm_free_op(op); ); g_list_free(op_list); lrm_free_rsc(the_rsc); return active; } gboolean build_active_RAs(xmlNode *rsc_list) { GList *op_list = NULL; GList *lrm_list = NULL; gboolean found_op = FALSE; state_flag_t cur_state = 0; if(fsa_lrm_conn == NULL) { return FALSE; } lrm_list = fsa_lrm_conn->lrm_ops->get_all_rscs(fsa_lrm_conn); slist_iter( rid, char, lrm_list, lpc, lrm_rsc_t *the_rsc = fsa_lrm_conn->lrm_ops->get_rsc(fsa_lrm_conn, rid); xmlNode *xml_rsc = create_xml_node( rsc_list, XML_LRM_TAG_RESOURCE); int max_call_id = -1; crm_debug_2("Processing lrm_rsc_t entry %s", rid); if(the_rsc == NULL) { crm_err("NULL resource returned from the LRM"); continue; } crm_xml_add(xml_rsc, XML_ATTR_ID, the_rsc->id); crm_xml_add(xml_rsc, XML_ATTR_TYPE, the_rsc->type); crm_xml_add(xml_rsc, XML_AGENT_ATTR_CLASS, the_rsc->class); crm_xml_add(xml_rsc, XML_AGENT_ATTR_PROVIDER,the_rsc->provider); op_list = the_rsc->ops->get_cur_state(the_rsc, &cur_state); crm_debug_2("\tcurrent state:%s", cur_state==LRM_RSC_IDLE?"Idle":"Busy"); slist_iter( op, lrm_op_t, op_list, llpc, crm_debug_2("Processing op %s for %s (status=%d, rc=%d)", op->op_type, the_rsc->id, op->op_status, op->rc); if(max_call_id < op->call_id) { build_operation_update( xml_rsc, op, __FUNCTION__, llpc); } else if(max_call_id > op->call_id) { crm_err("Bad call_id in list=%d. Previous call_id=%d", op->call_id, max_call_id); } else { crm_warn("lrm->get_cur_state() returned" " duplicate entries for call_id=%d", op->call_id); } max_call_id = op->call_id; found_op = TRUE; lrm_free_op(op); ); if(found_op == FALSE && g_list_length(op_list) != 0) { crm_err("Could not properly determin last op" " for %s from %d entries", the_rsc->id, g_list_length(op_list)); } g_list_free(op_list); lrm_free_rsc(the_rsc); ); g_list_free(lrm_list); return TRUE; } xmlNode* do_lrm_query(gboolean is_replace) { gboolean shut_down = FALSE; xmlNode *xml_result= NULL; xmlNode *xml_state = NULL; xmlNode *xml_data = NULL; xmlNode *rsc_list = NULL; const char *exp_state = CRMD_STATE_ACTIVE; if(is_set(fsa_input_register, R_SHUTDOWN)) { exp_state = CRMD_STATE_INACTIVE; shut_down = TRUE; } xml_state = create_node_state( fsa_our_uname, ACTIVESTATUS, XML_BOOLEAN_TRUE, ONLINESTATUS, CRMD_JOINSTATE_MEMBER, exp_state, !shut_down, __FUNCTION__); xml_data = create_xml_node(xml_state, XML_CIB_TAG_LRM); crm_xml_add(xml_data, XML_ATTR_ID, fsa_our_uuid); rsc_list = create_xml_node(xml_data, XML_LRM_TAG_RESOURCES); /* Build a list of active (not always running) resources */ build_active_RAs(rsc_list); - if(is_replace) { - crm_xml_add(xml_state, XML_CIB_ATTR_REPLACE, XML_CIB_TAG_LRM); - } - xml_result = create_cib_fragment(xml_state, XML_CIB_TAG_STATUS); free_xml(xml_state); crm_log_xml_debug_3(xml_state, "Current state of the LRM"); return xml_result; } /* * Remove the rsc from the CIB * * Avoids refreshing the entire LRM section of this host */ #define rsc_template "//"XML_CIB_TAG_STATE"[@uname=\"%s\"]//"XML_LRM_TAG_RESOURCE"[@id=\"%s\"]" static void delete_rsc_entry(const char *rsc_id) { int max = 0; char *rsc_xpath = NULL; CRM_CHECK(rsc_id != NULL, return); max = strlen(rsc_template) + strlen(rsc_id) + strlen(fsa_our_uname) + 1; crm_malloc0(rsc_xpath, max); snprintf(rsc_xpath, max, rsc_template, fsa_our_uname, rsc_id); CRM_CHECK(rsc_id != NULL, return); crm_debug("sync: Sending delete op for %s", rsc_id); fsa_cib_conn->cmds->delete( fsa_cib_conn, rsc_xpath, NULL, cib_quorum_override|cib_xpath); crm_free(rsc_xpath); } /* * Remove the op from the CIB * * Avoids refreshing the entire LRM section of this host */ #define op_template "//"XML_CIB_TAG_STATE"[@uname=\"%s\"]//"XML_LRM_TAG_RESOURCE"[@id=\"%s\"]/"XML_LRM_TAG_RSC_OP"[@id=\"%s\"]" #define op_call_template "//"XML_CIB_TAG_STATE"[@uname=\"%s\"]//"XML_LRM_TAG_RESOURCE"[@id=\"%s\"]/"XML_LRM_TAG_RSC_OP"[@id=\"%s\" and @"XML_LRM_ATTR_CALLID"=\"%d\"]" static void delete_op_entry(lrm_op_t *op, const char *rsc_id, const char *key, int call_id) { xmlNode *xml_top = NULL; if(op != NULL) { xml_top = create_xml_node(NULL, XML_LRM_TAG_RSC_OP); crm_xml_add_int(xml_top, XML_LRM_ATTR_CALLID, op->call_id); crm_xml_add(xml_top, XML_ATTR_TRANSITION_KEY, op->user_data); crm_debug("async: Sending delete op for %s_%s_%d (call=%d)", op->rsc_id, op->op_type, op->interval, op->call_id); fsa_cib_conn->cmds->delete( fsa_cib_conn, XML_CIB_TAG_STATUS, xml_top, cib_quorum_override); } else if (rsc_id != NULL && key != NULL) { int max = 0; char *op_xpath = NULL; if(call_id > 0) { max = strlen(op_call_template) + strlen(rsc_id) + strlen(fsa_our_uname) + strlen(key) + 10; crm_malloc0(op_xpath, max); snprintf(op_xpath, max, op_call_template, fsa_our_uname, rsc_id, key, call_id); } else { max = strlen(op_template) + strlen(rsc_id) + strlen(fsa_our_uname) + strlen(key) + 1; crm_malloc0(op_xpath, max); snprintf(op_xpath, max, op_template, fsa_our_uname, rsc_id, key); } crm_debug("sync: Sending delete op for %s (call=%d)", rsc_id, call_id); fsa_cib_conn->cmds->delete( fsa_cib_conn, op_xpath, NULL, cib_quorum_override|cib_xpath); crm_free(op_xpath); } else { crm_err("Not enough information to delete op entry: rsc=%p key=%p", rsc_id, key); return; } crm_log_xml_debug_2(xml_top, "op:cancel"); free_xml(xml_top); } static gboolean cancel_op(lrm_rsc_t *rsc, const char *key, int op, gboolean remove) { int rc = HA_OK; struct recurring_op_s *pending = NULL; CRM_CHECK(op != 0, return FALSE); CRM_CHECK(rsc != NULL, return FALSE); if(key == NULL) { key = make_stop_id(rsc->id, op); } pending = g_hash_table_lookup(pending_ops, key); if(pending) { if(remove && pending->remove == FALSE) { pending->remove = TRUE; crm_debug("Scheduling %s for removal", key); } if(pending->cancelled) { crm_debug("Operation %s already cancelled", key); return TRUE; } pending->cancelled = TRUE; } else { crm_info("No pending op found for %s", key); } crm_debug("Cancelling op %d for %s (%s)", op, rsc->id, key); rc = rsc->ops->cancel_op(rsc, op); if(rc != HA_OK) { crm_debug("Op %d for %s (%s): Nothing to cancel", op, rsc->id, key); /* The caller needs to make sure the entry is * removed from the pending_ops list * * Usually by returning TRUE inside the worker function * supplied to g_hash_table_foreach_remove() * * Not removing the entry from pending_ops will block * the node from shutting down */ return FALSE; } return TRUE; } struct cancel_data { gboolean done; gboolean remove; const char *key; lrm_rsc_t *rsc; }; static gboolean cancel_action_by_key(gpointer key, gpointer value, gpointer user_data) { struct cancel_data *data = user_data; struct recurring_op_s *op = (struct recurring_op_s*)value; if(safe_str_eq(op->op_key, data->key)) { data->done = TRUE; if (cancel_op(data->rsc, key, op->call_id, data->remove) == FALSE) { return TRUE; } } return FALSE; } static gboolean cancel_op_key(lrm_rsc_t *rsc, const char *key, gboolean remove) { struct cancel_data data; CRM_CHECK(rsc != NULL, return FALSE); CRM_CHECK(key != NULL, return FALSE); data.key = key; data.rsc = rsc; data.done = FALSE; data.remove = remove; g_hash_table_foreach_remove(pending_ops, cancel_action_by_key, &data); return data.done; } static lrm_rsc_t * get_lrm_resource(xmlNode *resource, xmlNode *op_msg, gboolean do_create) { char rid[64]; lrm_rsc_t *rsc = NULL; const char *short_id = ID(resource); const char *long_id = crm_element_value(resource, XML_ATTR_ID_LONG); crm_debug_2("Retrieving %s from the LRM.", short_id); CRM_CHECK(short_id != NULL, return NULL); if(rsc == NULL) { /* check if its already there (short name) */ strncpy(rid, short_id, 64); rid[63] = 0; rsc = fsa_lrm_conn->lrm_ops->get_rsc(fsa_lrm_conn, rid); } if(rsc == NULL && long_id != NULL) { /* try the long name instead */ strncpy(rid, long_id, 64); rid[63] = 0; rsc = fsa_lrm_conn->lrm_ops->get_rsc(fsa_lrm_conn, rid); } if(rsc == NULL && do_create) { /* add it to the LRM */ const char *type = crm_element_value(resource, XML_ATTR_TYPE); const char *class = crm_element_value(resource, XML_AGENT_ATTR_CLASS); const char *provider = crm_element_value(resource, XML_AGENT_ATTR_PROVIDER); GHashTable *params = xml2list(op_msg); CRM_CHECK(class != NULL, return NULL); CRM_CHECK(type != NULL, return NULL); crm_debug("Adding rsc %s before operation", short_id); strncpy(rid, short_id, 64); rid[63] = 0; #if CRM_DEPRECATED_SINCE_2_0_3 if(op_msg != NULL) { if(g_hash_table_lookup( params, XML_ATTR_CRM_VERSION) == NULL) { g_hash_table_destroy(params); params = xml2list_202(op_msg); } } #endif if(g_hash_table_size(params) == 0) { crm_log_xml_warn(op_msg, "EmptyParams"); } fsa_lrm_conn->lrm_ops->add_rsc( fsa_lrm_conn, rid, class, type, provider, params); rsc = fsa_lrm_conn->lrm_ops->get_rsc(fsa_lrm_conn, rid); g_hash_table_destroy(params); if(rsc == NULL) { fsa_data_t *msg_data = NULL; crm_err("Could not add resource %s to LRM", rid); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } } return rsc; } static gboolean lrm_remove_deleted_op( gpointer key, gpointer value, gpointer user_data) { const char *rsc = user_data; struct recurring_op_s *pending = value; if(safe_str_eq(rsc, pending->rsc_id)) { crm_info("Removing op %s:%d for deleted resource %s", pending->op_key, pending->call_id, rsc); return TRUE; } return FALSE; } /* A_LRM_INVOKE */ void do_lrm_invoke(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { gboolean done = FALSE; gboolean create_rsc = TRUE; const char *crm_op = NULL; const char *from_sys = NULL; const char *from_host = NULL; const char *operation = NULL; ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); crm_op = crm_element_value(input->msg, F_CRM_TASK); from_sys = crm_element_value(input->msg, F_CRM_SYS_FROM); if(safe_str_neq(from_sys, CRM_SYSTEM_TENGINE)) { from_host = crm_element_value(input->msg, F_CRM_HOST_FROM); } crm_debug_2("LRM command from: %s", from_sys); if(safe_str_eq(crm_op, CRM_OP_LRM_DELETE)) { operation = CRMD_ACTION_DELETE; } else if(safe_str_eq(operation, CRM_OP_LRM_REFRESH)) { crm_op = CRM_OP_LRM_REFRESH; } else if(safe_str_eq(crm_op, CRM_OP_LRM_FAIL)) { crm_info("Failing resource..."); operation = "fail"; } else if(input->xml != NULL) { operation = crm_element_value(input->xml, XML_LRM_ATTR_TASK); } if(safe_str_eq(crm_op, CRM_OP_LRM_REFRESH)) { enum cib_errors rc = cib_ok; xmlNode *fragment = do_lrm_query(TRUE); crm_info("Forcing a local LRM refresh"); fsa_cib_update(XML_CIB_TAG_STATUS, fragment, cib_quorum_override, rc); free_xml(fragment); } else if(safe_str_eq(crm_op, CRM_OP_LRM_QUERY)) { xmlNode *data = do_lrm_query(FALSE); xmlNode *reply = create_reply(input->msg, data); if(relay_message(reply, TRUE) == FALSE) { crm_err("Unable to route reply"); crm_log_xml(LOG_ERR, "reply", reply); } free_xml(reply); free_xml(data); } else if(safe_str_eq(operation, CRM_OP_PROBED) || safe_str_eq(crm_op, CRM_OP_REPROBE)) { int cib_options = cib_inhibit_notify; const char *probed = XML_BOOLEAN_TRUE; if(safe_str_eq(crm_op, CRM_OP_REPROBE)) { cib_options = cib_none; probed = XML_BOOLEAN_FALSE; } update_attr(fsa_cib_conn, cib_inhibit_notify, XML_CIB_TAG_STATUS, fsa_our_uuid, NULL, NULL, CRM_OP_PROBED, probed, FALSE); } else if(operation != NULL) { lrm_rsc_t *rsc = NULL; xmlNode *params = NULL; xmlNode *xml_rsc = find_xml_node( input->xml, XML_CIB_TAG_RESOURCE, TRUE); CRM_CHECK(xml_rsc != NULL, return); /* only the first 16 chars are used by the LRM */ params = find_xml_node(input->xml, XML_TAG_ATTRS, TRUE); if(safe_str_eq(operation, CRMD_ACTION_DELETE)) { create_rsc = FALSE; } rsc = get_lrm_resource(xml_rsc, input->xml, create_rsc); if(rsc == NULL && create_rsc) { crm_err("Invalid resource definition"); crm_log_xml_warn(input->msg, "bad input"); } else if(rsc == NULL) { lrm_op_t* op = NULL; crm_err("Not creating resource for a %s event: %s", operation, ID(input->xml)); crm_log_xml_warn(input->msg, "bad input"); op = construct_op(input->xml, ID(xml_rsc), operation); op->op_status = LRM_OP_DONE; op->rc = EXECRA_OK; CRM_ASSERT(op != NULL); send_direct_ack(from_host, from_sys, op, ID(xml_rsc)); free_lrm_op(op); } else if(safe_str_eq(operation, CRMD_ACTION_CANCEL)) { lrm_op_t* op = NULL; char *op_key = NULL; int call = 0; const char *call_id = NULL; const char *op_task = NULL; const char *op_interval = NULL; CRM_CHECK(params != NULL, crm_log_xml_warn(input->xml, "Bad command"); return); op_interval = crm_element_value(params, crm_meta_name("interval")); op_task = crm_element_value(params, crm_meta_name(XML_LRM_ATTR_TASK)); call_id = crm_element_value(params, crm_meta_name(XML_LRM_ATTR_CALLID)); #if CRM_DEPRECATED_SINCE_2_0_5 if(op_interval == NULL) { op_interval = crm_element_value(params, "interval"); } if(op_task == NULL) { op_task = crm_element_value(params, XML_LRM_ATTR_TASK); if(op_task == NULL) { op_task = crm_element_value(params, "task"); } } #endif CRM_CHECK(op_task != NULL, crm_log_xml_warn(input->xml, "Bad command"); return); CRM_CHECK(op_interval != NULL, crm_log_xml_warn(input->xml, "Bad command"); return); op = construct_op(input->xml, rsc->id, op_task); CRM_ASSERT(op != NULL); op_key = generate_op_key( rsc->id,op_task,crm_parse_int(op_interval,"0")); crm_debug("PE requested op %s (call=%s) be cancelled", op_key, call_id?call_id:"NA"); call = crm_parse_int(call_id, "0"); if(call == 0) { /* the normal case when the PE cancels a recurring op */ done = cancel_op_key(rsc, op_key, TRUE); } else { /* the normal case when the PE cancels an orphan op */ done = cancel_op(rsc, NULL, call, TRUE); } if(done == FALSE) { crm_debug("Nothing known about operation %d for %s", call, op_key); delete_op_entry(NULL, rsc->id, op_key, call); /* needed?? surely not otherwise the cancel_op_(_key) wouldn't * have failed in the first place */ g_hash_table_remove(pending_ops, op_key); } op->rc = EXECRA_OK; op->op_status = LRM_OP_DONE; send_direct_ack(from_host, from_sys, op, rsc->id); crm_free(op_key); free_lrm_op(op); } else if(safe_str_eq(operation, CRMD_ACTION_DELETE)) { int rc = HA_OK; lrm_op_t* op = NULL; CRM_ASSERT(rsc != NULL); op = construct_op(input->xml, rsc->id, operation); CRM_ASSERT(op != NULL); op->op_status = LRM_OP_DONE; op->rc = EXECRA_OK; crm_info("Removing resource %s from the LRM", rsc->id); rc = fsa_lrm_conn->lrm_ops->delete_rsc(fsa_lrm_conn, rsc->id); if(rc != HA_OK) { crm_err("Failed to remove resource %s", rsc->id); op->op_status = LRM_OP_ERROR; op->rc = EXECRA_UNKNOWN_ERROR; } delete_rsc_entry(rsc->id); send_direct_ack(from_host, from_sys, op, rsc->id); free_lrm_op(op); g_hash_table_foreach_remove(pending_ops, lrm_remove_deleted_op, rsc->id); if(safe_str_neq(from_sys, CRM_SYSTEM_TENGINE)) { /* this isn't expected - trigger a new transition */ time_t now = time(NULL); char *now_s = crm_itoa(now); crm_debug("Triggering a refresh after %s deleted %s from the LRM", from_sys, rsc->id); update_attr(fsa_cib_conn, cib_none, XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, "last-lrm-refresh", now_s, FALSE); crm_free(now_s); } } else if(rsc != NULL) { do_lrm_rsc_op(rsc, operation, input->xml, input->msg); } lrm_free_rsc(rsc); } else { crm_err("Operation was neither a lrm_query, nor a rsc op. %s", crm_str(crm_op)); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } lrm_op_t * construct_op(xmlNode *rsc_op, const char *rsc_id, const char *operation) { lrm_op_t *op = NULL; const char *op_delay = NULL; const char *op_timeout = NULL; const char *op_interval = NULL; const char *transition = NULL; CRM_DEV_ASSERT(rsc_id != NULL); crm_malloc0(op, sizeof(lrm_op_t)); op->op_type = crm_strdup(operation); op->op_status = LRM_OP_PENDING; op->rc = -1; op->rsc_id = crm_strdup(rsc_id); op->interval = 0; op->timeout = 0; op->start_delay = 0; op->copyparams = 0; op->app_name = crm_strdup(CRM_SYSTEM_CRMD); if(rsc_op == NULL) { CRM_DEV_ASSERT(safe_str_eq(CRMD_ACTION_STOP, operation)); op->user_data = NULL; op->user_data_len = 0; /* the stop_all_resources() case * by definition there is no DC (or they'd be shutting * us down). * So we should put our version here. */ op->params = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); g_hash_table_insert(op->params, crm_strdup(XML_ATTR_CRM_VERSION), crm_strdup(CRM_FEATURE_SET)); crm_debug_2("Constructed %s op for %s", operation, rsc_id); return op; } op->params = xml2list(rsc_op); #if CRM_DEPRECATED_SINCE_2_0_3 if(g_hash_table_lookup(op->params, XML_ATTR_CRM_VERSION) == NULL) { g_hash_table_destroy(op->params); op->params = xml2list_202(rsc_op); } #endif if(op->params == NULL) { CRM_DEV_ASSERT(safe_str_eq(CRMD_ACTION_STOP, operation)); } op_delay = g_hash_table_lookup(op->params, crm_meta_name("start_delay")); op_timeout = g_hash_table_lookup(op->params, crm_meta_name("timeout")); op_interval = g_hash_table_lookup(op->params, crm_meta_name("interval")); #if CRM_DEPRECATED_SINCE_2_0_5 if(op_delay == NULL) { op_delay = g_hash_table_lookup(op->params, "start_delay"); } if(op_timeout == NULL) { op_timeout = g_hash_table_lookup(op->params, "timeout"); } if(op_interval == NULL) { op_interval = g_hash_table_lookup(op->params, "interval"); } #endif op->interval = crm_parse_int(op_interval, "0"); op->timeout = crm_parse_int(op_timeout, "0"); op->start_delay = crm_parse_int(op_delay, "0"); /* sanity */ if(op->interval < 0) { op->interval = 0; } if(op->timeout < 0) { op->timeout = 0; } if(op->start_delay < 0) { op->start_delay = 0; } transition = crm_element_value(rsc_op, XML_ATTR_TRANSITION_KEY); CRM_CHECK(transition != NULL, return op); op->user_data = crm_strdup(transition); op->user_data_len = 1+strlen(op->user_data); if(op->interval != 0) { if(safe_str_eq(operation, CRMD_ACTION_START) || safe_str_eq(operation, CRMD_ACTION_STOP)) { crm_err("Start and Stop actions cannot have an interval"); op->interval = 0; } } /* reset the resource's parameters? */ if(op->interval == 0) { if(safe_str_eq(CRMD_ACTION_START, operation) || safe_str_eq(CRMD_ACTION_STATUS, operation)) { op->copyparams = 1; } } crm_debug_2("Constructed %s op for %s: interval=%d", operation, rsc_id, op->interval); return op; } void send_direct_ack(const char *to_host, const char *to_sys, lrm_op_t* op, const char *rsc_id) { xmlNode *reply = NULL; xmlNode *update, *iter; xmlNode *fragment; CRM_DEV_ASSERT(op != NULL); if(crm_assert_failed) { return; } if(op->rsc_id == NULL) { CRM_DEV_ASSERT(rsc_id != NULL); op->rsc_id = crm_strdup(rsc_id); } if(to_sys == NULL) { to_sys = CRM_SYSTEM_TENGINE; } update = create_node_state( fsa_our_uname, NULL, NULL, NULL, NULL, NULL, FALSE, __FUNCTION__); iter = create_xml_node(update, XML_CIB_TAG_LRM); crm_xml_add(iter, XML_ATTR_ID, fsa_our_uuid); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE); crm_xml_add(iter, XML_ATTR_ID, op->rsc_id); build_operation_update(iter, op, __FUNCTION__, 0); fragment = create_cib_fragment(update, XML_CIB_TAG_STATUS); reply = create_request(CRM_OP_INVOKE_LRM, fragment, to_host, to_sys, CRM_SYSTEM_LRMD, NULL); crm_log_xml_debug_2(update, "ACK Update"); crm_info("ACK'ing resource op %s_%s_%d from %s: %s", op->rsc_id, op->op_type, op->interval, op->user_data, crm_element_value(reply, XML_ATTR_REFERENCE)); if(relay_message(reply, TRUE) == FALSE) { crm_log_xml(LOG_ERR, "Unable to route reply", reply); } free_xml(fragment); free_xml(update); free_xml(reply); } static gboolean stop_recurring_action_by_rsc(gpointer key, gpointer value, gpointer user_data) { lrm_rsc_t *rsc = user_data; struct recurring_op_s *op = (struct recurring_op_s*)value; if(op->interval != 0 && safe_str_eq(op->rsc_id, rsc->id)) { if (cancel_op(rsc, key, op->call_id, FALSE) == FALSE) { return TRUE; } } return FALSE; } void do_lrm_rsc_op(lrm_rsc_t *rsc, const char *operation, xmlNode *msg, xmlNode *request) { int call_id = 0; char *op_id = NULL; lrm_op_t* op = NULL; fsa_data_t *msg_data = NULL; const char *transition = NULL; CRM_CHECK(rsc != NULL, return); if(msg != NULL) { transition = crm_element_value(msg, XML_ATTR_TRANSITION_KEY); if(transition == NULL) { crm_log_xml_err(msg, "Missing transition number"); } } op = construct_op(msg, rsc->id, operation); /* stop the monitor before stopping the resource */ if(crm_str_eq(operation, CRMD_ACTION_STOP, TRUE) || crm_str_eq(operation, CRMD_ACTION_DEMOTE, TRUE) || crm_str_eq(operation, CRMD_ACTION_PROMOTE, TRUE) || crm_str_eq(operation, CRMD_ACTION_MIGRATE, TRUE)) { g_hash_table_foreach_remove(pending_ops, stop_recurring_action_by_rsc, rsc); } /* now do the op */ crm_info("Performing op=%s_%s_%d key=%s)", rsc->id, operation, op->interval, transition); if(fsa_state != S_NOT_DC && fsa_state != S_TRANSITION_ENGINE) { if(safe_str_neq(operation, "fail") && safe_str_neq(operation, CRMD_ACTION_STOP)) { crm_info("Discarding attempt to perform action %s on %s" " in state %s", operation, rsc->id, fsa_state2string(fsa_state)); op->rc = 99; op->op_status = LRM_OP_ERROR; send_direct_ack(NULL, NULL, op, rsc->id); free_lrm_op(op); crm_free(op_id); return; } } op_id = generate_op_key(rsc->id, op->op_type, op->interval); if(op->interval > 0) { /* cancel it so we can then restart it without conflict */ cancel_op_key(rsc, op_id, FALSE); op->target_rc = CHANGED; } else { op->target_rc = EVERYTIME; } g_hash_table_replace(resources,crm_strdup(rsc->id), crm_strdup(op_id)); call_id = rsc->ops->perform_op(rsc, op); if(call_id <= 0) { crm_err("Operation %s on %s failed: %d", operation, rsc->id, call_id); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } else { /* record all operations so we can wait * for them to complete during shutdown */ char *call_id_s = make_stop_id(rsc->id, call_id); struct recurring_op_s *pending = NULL; crm_malloc0(pending, sizeof(struct recurring_op_s)); crm_debug("Recording pending op: %d - %s %s", call_id, op_id, call_id_s); pending->call_id = call_id; pending->interval = op->interval; pending->op_key = crm_strdup(op_id); pending->rsc_id = crm_strdup(rsc->id); g_hash_table_replace(pending_ops, call_id_s, pending); } crm_free(op_id); free_lrm_op(op); return; } void free_recurring_op(gpointer value) { struct recurring_op_s *op = (struct recurring_op_s*)value; crm_free(op->rsc_id); crm_free(op->op_key); crm_free(op); } void free_lrm_op(lrm_op_t *op) { g_hash_table_destroy(op->params); crm_free(op->user_data); crm_free(op->output); crm_free(op->rsc_id); crm_free(op->op_type); crm_free(op->app_name); crm_free(op); } static void dup_attr(gpointer key, gpointer value, gpointer user_data) { g_hash_table_replace(user_data, crm_strdup(key), crm_strdup(value)); } lrm_op_t * copy_lrm_op(const lrm_op_t *op) { lrm_op_t *op_copy = NULL; CRM_DEV_ASSERT(op != NULL); if(crm_assert_failed) { return NULL; } CRM_ASSERT(op->rsc_id != NULL); crm_malloc0(op_copy, sizeof(lrm_op_t)); op_copy->op_type = crm_strdup(op->op_type); /* input fields */ op_copy->params = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); if(op->params != NULL) { g_hash_table_foreach(op->params, dup_attr, op_copy->params); } op_copy->timeout = op->timeout; op_copy->interval = op->interval; op_copy->target_rc = op->target_rc; /* in the CRM, this is always a string */ if(op->user_data != NULL) { op_copy->user_data = crm_strdup(op->user_data); } /* output fields */ op_copy->op_status = op->op_status; op_copy->rc = op->rc; op_copy->call_id = op->call_id; op_copy->output = NULL; op_copy->rsc_id = crm_strdup(op->rsc_id); if(op->app_name != NULL) { op_copy->app_name = crm_strdup(op->app_name); } if(op->output != NULL) { op_copy->output = crm_strdup(op->output); } return op_copy; } lrm_rsc_t * copy_lrm_rsc(const lrm_rsc_t *rsc) { lrm_rsc_t *rsc_copy = NULL; if(rsc == NULL) { return NULL; } crm_malloc0(rsc_copy, sizeof(lrm_rsc_t)); rsc_copy->id = crm_strdup(rsc->id); rsc_copy->type = crm_strdup(rsc->type); rsc_copy->class = NULL; rsc_copy->provider = NULL; if(rsc->class != NULL) { rsc_copy->class = crm_strdup(rsc->class); } if(rsc->provider != NULL) { rsc_copy->provider = crm_strdup(rsc->provider); } /* GHashTable* params; */ rsc_copy->params = NULL; rsc_copy->ops = NULL; return rsc_copy; } void cib_rsc_callback(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) { switch(rc) { case cib_ok: case cib_diff_failed: case cib_diff_resync: crm_debug("Resource update %d complete: rc=%d", call_id, rc); break; default: crm_err("Resource update %d failed: (rc=%d) %s", call_id, rc, cib_error2string(rc)); } } void do_update_resource(lrm_op_t* op) { /* */ int rc = cib_ok; lrm_rsc_t *rsc = NULL; xmlNode *update, *iter; CRM_CHECK(op != NULL, return); update = create_node_state( fsa_our_uname, NULL, NULL, NULL, NULL, NULL, FALSE, __FUNCTION__); iter = create_xml_node(update, XML_CIB_TAG_LRM); crm_xml_add(iter, XML_ATTR_ID, fsa_our_uuid); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE); crm_xml_add(iter, XML_ATTR_ID, op->rsc_id); rsc = fsa_lrm_conn->lrm_ops->get_rsc(fsa_lrm_conn, op->rsc_id); CRM_CHECK(rsc->type != NULL, crm_err("Resource %s has no value for type", op->rsc_id)); CRM_CHECK(rsc->class != NULL, crm_err("Resource %s has no value for class", op->rsc_id)); crm_xml_add(iter, XML_ATTR_TYPE, rsc->type); crm_xml_add(iter, XML_AGENT_ATTR_CLASS, rsc->class); crm_xml_add(iter, XML_AGENT_ATTR_PROVIDER,rsc->provider); lrm_free_rsc(rsc); build_operation_update(iter, op, __FUNCTION__, 0); /* make it an asyncronous call and be done with it * * Best case: * the resource state will be discovered during * the next signup or election. * * Bad case: * we are shutting down and there is no DC at the time, * but then why were we shutting down then anyway? * (probably because of an internal error) * * Worst case: * we get shot for having resources "running" when the really weren't * * the alternative however means blocking here for too long, which * isnt acceptable */ fsa_cib_update(XML_CIB_TAG_STATUS, update, cib_quorum_override, rc); /* the return code is a call number, not an error code */ crm_debug("Sent resource state update message: %d", rc); fsa_cib_conn->cmds->register_callback( fsa_cib_conn, rc, 60, FALSE, NULL, "cib_rsc_callback", cib_rsc_callback); free_xml(update); } void do_lrm_event(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data) { CRM_CHECK(FALSE, return); } gboolean process_lrm_event(lrm_op_t *op) { char *op_id = NULL; char *op_key = NULL; int log_level = LOG_ERR; struct recurring_op_s *pending = NULL; CRM_CHECK(op != NULL, return FALSE); CRM_CHECK(op->rsc_id != NULL, return FALSE); op_key = generate_op_key(op->rsc_id, op->op_type, op->interval); switch(op->op_status) { case LRM_OP_ERROR: case LRM_OP_PENDING: case LRM_OP_NOTSUPPORTED: break; case LRM_OP_CANCELLED: log_level = LOG_INFO; break; case LRM_OP_DONE: log_level = LOG_INFO; break; case LRM_OP_TIMEOUT: log_level = LOG_DEBUG_3; crm_err("LRM operation %s (%d) %s (timeout=%dms)", op_key, op->call_id, op_status2text(op->op_status), op->timeout); break; default: crm_err("Mapping unknown status (%d) to ERROR", op->op_status); op->op_status = LRM_OP_ERROR; } if(op->op_status == LRM_OP_ERROR && (op->rc == EXECRA_RUNNING_MASTER || op->rc == EXECRA_NOT_RUNNING)) { /* Leave it up to the TE/PE to decide if this is an error */ op->op_status = LRM_OP_DONE; log_level = LOG_INFO; } do_crm_log(log_level, "LRM operation %s (call=%d, rc=%d) %s %s", op_key, op->call_id, op->rc, op_status2text(op->op_status), op->op_status==LRM_OP_ERROR?execra_code2string(op->rc):""); if(op->op_status == LRM_OP_ERROR && op->output != NULL) { crm_info("Result: %s", op->output); } op_id = make_stop_id(op->rsc_id, op->call_id); pending = g_hash_table_lookup(pending_ops, op_id); if(op->op_status != LRM_OP_CANCELLED) { do_update_resource(op); if(op->interval != 0) { goto out; } } else if(op->interval == 0) { /* no known valid reason for this to happen */ crm_err("Op %s (call=%d): Cancelled", op_key, op->call_id); } else if(pending == NULL) { crm_err("Op %s (call=%d): No 'pending' entry", op_key, op->call_id); } else if(op->user_data == NULL) { crm_err("Op %s (call=%d): No user data", op_key, op->call_id); } else if(pending->remove) { delete_op_entry(op, op->rsc_id, op_key, op->call_id); } else { crm_debug("Op %s (call=%d): no delete event required", op_key, op->call_id); } if(g_hash_table_remove(pending_ops, op_id)) { crm_debug("Op %s (call=%d, stop_id=%s): Confirmed", op_key, op->call_id, op_id); } out: crm_free(op_key); crm_free(op_id); return TRUE; } char * make_stop_id(const char *rsc, int call_id) { char *op_id = NULL; crm_malloc0(op_id, strlen(rsc) + 34); if(op_id != NULL) { snprintf(op_id, strlen(rsc) + 34, "%s:%d", rsc, call_id); } return op_id; } diff --git a/crmd/te_actions.c b/crmd/te_actions.c index 3182bda4eb..c48fb19e65 100644 --- a/crmd/te_actions.c +++ b/crmd/te_actions.c @@ -1,511 +1,510 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include char *te_uuid = NULL; void send_rsc_command(crm_action_t *action); extern crm_action_timer_t *transition_timer; static void te_start_action_timer(crm_action_t *action) { crm_malloc0(action->timer, sizeof(crm_action_timer_t)); action->timer->timeout = action->timeout; action->timer->reason = timeout_action_warn; action->timer->action = action; action->timer->source_id = Gmain_timeout_add( action->timer->timeout, action_timer_callback, (void*)action->timer); CRM_ASSERT(action->timer->source_id != 0); } static gboolean te_pseudo_action(crm_graph_t *graph, crm_action_t *pseudo) { crm_info("Pseudo action %d fired and confirmed", pseudo->id); pseudo->confirmed = TRUE; update_graph(graph, pseudo); trigger_graph(); return TRUE; } #if SUPPORT_HEARTBEAT void send_stonith_update(stonith_ops_t * op) { enum cib_errors rc = cib_ok; const char *target = op->node_name; const char *uuid = op->node_uuid; /* zero out the node-status & remove all LRM status info */ xmlNode *node_state = create_xml_node(NULL, XML_CIB_TAG_STATE); CRM_CHECK(op->node_name != NULL, return); CRM_CHECK(op->node_uuid != NULL, return); crm_xml_add(node_state, XML_ATTR_UUID, uuid); crm_xml_add(node_state, XML_ATTR_UNAME, target); crm_xml_add(node_state, XML_CIB_ATTR_HASTATE, DEADSTATUS); crm_xml_add(node_state, XML_CIB_ATTR_INCCM, XML_BOOLEAN_NO); crm_xml_add(node_state, XML_CIB_ATTR_CRMDSTATE, OFFLINESTATUS); crm_xml_add(node_state, XML_CIB_ATTR_JOINSTATE, CRMD_JOINSTATE_DOWN); crm_xml_add(node_state, XML_CIB_ATTR_EXPSTATE, CRMD_JOINSTATE_DOWN); - crm_xml_add(node_state, XML_CIB_ATTR_REPLACE, XML_CIB_TAG_LRM); crm_xml_add(node_state, XML_ATTR_ORIGIN, __FUNCTION__); rc = fsa_cib_conn->cmds->update( fsa_cib_conn, XML_CIB_TAG_STATUS, node_state, cib_quorum_override|cib_scope_local); if(rc < cib_ok) { crm_err("CIB update failed: %s", cib_error2string(rc)); abort_transition( INFINITY, tg_shutdown, "CIB update failed", node_state); } else { /* delay processing the trigger until the update completes */ - add_cib_op_callback(fsa_cib_conn, rc, FALSE, NULL, cib_fencing_updated); + add_cib_op_callback(fsa_cib_conn, rc, FALSE, crm_strdup(target), cib_fencing_updated); } free_xml(node_state); return; } #endif static gboolean te_fence_node(crm_graph_t *graph, crm_action_t *action) { #if SUPPORT_HEARTBEAT if(is_heartbeat_cluster()) { const char *id = NULL; const char *uuid = NULL; const char *target = NULL; const char *type = NULL; stonith_ops_t * st_op = NULL; id = ID(action->xml); target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID); type = g_hash_table_lookup(action->params, crm_meta_name("stonith_action")); CRM_CHECK(id != NULL, crm_log_xml_warn(action->xml, "BadAction"); return FALSE); CRM_CHECK(uuid != NULL, crm_log_xml_warn(action->xml, "BadAction"); return FALSE); CRM_CHECK(type != NULL, crm_log_xml_warn(action->xml, "BadAction"); return FALSE); CRM_CHECK(target != NULL, crm_log_xml_warn(action->xml, "BadAction"); return FALSE); te_log_action(LOG_INFO, "Executing %s fencing operation (%s) on %s (timeout=%d)", type, id, target, transition_graph->transition_timeout / 2); /* Passing NULL means block until we can connect... */ te_connect_stonith(NULL); crm_malloc0(st_op, sizeof(stonith_ops_t)); if(safe_str_eq(type, "poweroff")) { st_op->optype = POWEROFF; } else { st_op->optype = RESET; } st_op->timeout = transition_graph->transition_timeout / 2; st_op->node_name = crm_strdup(target); st_op->node_uuid = crm_strdup(uuid); st_op->private_data = generate_transition_key( transition_graph->id, action->id, 0, te_uuid); CRM_ASSERT(stonithd_input_IPC_channel() != NULL); if (ST_OK != stonithd_node_fence( st_op )) { crm_err("Cannot fence %s: stonithd_node_fence() call failed ", target); } return TRUE; } #endif return FALSE; } static int get_target_rc(crm_action_t *action) { const char *target_rc_s = g_hash_table_lookup( action->params, crm_meta_name(XML_ATTR_TE_TARGET_RC)); if(target_rc_s != NULL) { return crm_parse_int(target_rc_s, "0"); } return 0; } static gboolean te_crm_command(crm_graph_t *graph, crm_action_t *action) { char *value = NULL; char *counter = NULL; xmlNode *cmd = NULL; const char *id = NULL; const char *task = NULL; const char *on_node = NULL; gboolean ret = TRUE; id = ID(action->xml); task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); CRM_CHECK(on_node != NULL && strlen(on_node) != 0, te_log_action(LOG_ERR, "Corrupted command (id=%s) %s: no node", crm_str(id), crm_str(task)); return FALSE); te_log_action(LOG_INFO, "Executing crm-event (%s): %s on %s", crm_str(id), crm_str(task), on_node); cmd = create_request(task, NULL, on_node, CRM_SYSTEM_CRMD, CRM_SYSTEM_TENGINE, NULL); counter = generate_transition_key( transition_graph->id, action->id, get_target_rc(action), te_uuid); crm_xml_add(cmd, XML_ATTR_TRANSITION_KEY, counter); ret = send_cluster_message(on_node, crm_proc_crmd, cmd, TRUE); crm_free(counter); free_xml(cmd); value = g_hash_table_lookup(action->params, crm_meta_name(XML_ATTR_TE_NOWAIT)); if(ret == FALSE) { crm_err("Action %d failed: send", action->id); return FALSE; } else if(crm_is_true(value)) { crm_info("Skipping wait for %d", action->id); action->confirmed = TRUE; update_graph(graph, action); trigger_graph(); } else if(ret && action->timeout > 0) { crm_debug("Setting timer for action %d",action->id); action->timer->reason = timeout_action_warn; te_start_action_timer(action); } return TRUE; } static gboolean te_rsc_command(crm_graph_t *graph, crm_action_t *action) { /* never overwrite stop actions in the CIB with * anything other than completed results * * Writing pending stops makes it look like the * resource is running again */ const char *task = NULL; const char *on_node = NULL; action->executed = FALSE; on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); CRM_CHECK(on_node != NULL && strlen(on_node) != 0, te_log_action(LOG_ERR, "Corrupted command(id=%s) %s: no node", ID(action->xml), crm_str(task)); return FALSE); send_rsc_command(action); return TRUE; } gboolean cib_action_update(crm_action_t *action, int status) { char *op_id = NULL; char *code = NULL; char *digest = NULL; xmlNode *tmp = NULL; xmlNode *params = NULL; xmlNode *state = NULL; xmlNode *rsc = NULL; xmlNode *xml_op = NULL; xmlNode *action_rsc = NULL; enum cib_errors rc = cib_ok; const char *name = NULL; const char *value = NULL; const char *rsc_id = NULL; const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); const char *task_uuid = crm_element_value( action->xml, XML_LRM_ATTR_TASK_KEY); const char *target_uuid = crm_element_value( action->xml, XML_LRM_ATTR_TARGET_UUID); int call_options = cib_quorum_override|cib_scope_local; crm_warn("%s %d: %s on %s timed out", crm_element_name(action->xml), action->id, task_uuid, target); action_rsc = find_xml_node(action->xml, XML_CIB_TAG_RESOURCE, TRUE); if(action_rsc == NULL) { return FALSE; } rsc_id = ID(action_rsc); CRM_CHECK(rsc_id != NULL, crm_log_xml_err(action->xml, "Bad:action"); return FALSE); code = crm_itoa(status); /* update the CIB */ state = create_xml_node(NULL, XML_CIB_TAG_STATE); crm_xml_add(state, XML_ATTR_UUID, target_uuid); crm_xml_add(state, XML_ATTR_UNAME, target); rsc = create_xml_node(state, XML_CIB_TAG_LRM); crm_xml_add(rsc, XML_ATTR_ID, target_uuid); rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCES); rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCE); crm_xml_add(rsc, XML_ATTR_ID, rsc_id); name = XML_ATTR_TYPE; value = crm_element_value(action_rsc, name); crm_xml_add(rsc, name, value); name = XML_AGENT_ATTR_CLASS; value = crm_element_value(action_rsc, name); crm_xml_add(rsc, name, value); name = XML_AGENT_ATTR_PROVIDER; value = crm_element_value(action_rsc, name); crm_xml_add(rsc, name, value); xml_op = create_xml_node(rsc, XML_LRM_TAG_RSC_OP); crm_xml_add(xml_op, XML_ATTR_ID, task); op_id = generate_op_key(rsc_id, task, action->interval); crm_xml_add(xml_op, XML_ATTR_ID, op_id); crm_free(op_id); crm_xml_add(xml_op, XML_LRM_ATTR_TASK, task); crm_xml_add(xml_op, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); crm_xml_add(xml_op, XML_LRM_ATTR_OPSTATUS, code); crm_xml_add(xml_op, XML_LRM_ATTR_CALLID, "-1"); crm_xml_add_int(xml_op, XML_LRM_ATTR_INTERVAL, action->interval); crm_xml_add(xml_op, XML_LRM_ATTR_RC, code); crm_xml_add(xml_op, XML_ATTR_ORIGIN, __FUNCTION__); crm_free(code); code = generate_transition_key( transition_graph->id, action->id, get_target_rc(action), te_uuid); crm_xml_add(xml_op, XML_ATTR_TRANSITION_KEY, code); crm_free(code); code = generate_transition_magic( crm_element_value(xml_op, XML_ATTR_TRANSITION_KEY), status, status); crm_xml_add(xml_op, XML_ATTR_TRANSITION_MAGIC, code); crm_free(code); tmp = find_xml_node(action->xml, "attributes", TRUE); params = create_xml_node(NULL, XML_TAG_PARAMS); copy_in_properties(params, tmp); filter_action_parameters(params, CRM_FEATURE_SET); digest = calculate_xml_digest(params, TRUE, FALSE); /* info for now as this area has been problematic to debug */ crm_debug("Calculated digest %s for %s (%s)\n", digest, ID(xml_op), crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC)); crm_log_xml(LOG_DEBUG, "digest:source", params); crm_xml_add(xml_op, XML_LRM_ATTR_OP_DIGEST, digest); crm_free(digest); free_xml(params); crm_debug_3("Updating CIB with \"%s\" (%s): %s %s on %s", status<0?"new action":XML_ATTR_TIMEOUT, crm_element_name(action->xml), crm_str(task), rsc_id, target); rc = fsa_cib_conn->cmds->update( fsa_cib_conn, XML_CIB_TAG_STATUS, state, call_options); crm_debug("Updating CIB with %s action %d: %s on %s (call_id=%d)", op_status2text(status), action->id, task_uuid, target, rc); add_cib_op_callback(fsa_cib_conn, rc, FALSE, NULL, cib_action_updated); free_xml(state); action->sent_update = TRUE; if(rc < cib_ok) { return FALSE; } return TRUE; } void send_rsc_command(crm_action_t *action) { xmlNode *cmd = NULL; xmlNode *rsc_op = NULL; char *counter = NULL; const char *task = NULL; const char *value = NULL; const char *on_node = NULL; const char *task_uuid = NULL; CRM_ASSERT(action != NULL); CRM_ASSERT(action->xml != NULL); rsc_op = action->xml; task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY); on_node = crm_element_value(rsc_op, XML_LRM_ATTR_TARGET); counter = generate_transition_key( transition_graph->id, action->id, get_target_rc(action), te_uuid); crm_xml_add(rsc_op, XML_ATTR_TRANSITION_KEY, counter); crm_info("Initiating action %d: %s %s on %s", action->id, task, task_uuid, on_node); crm_free(counter); if(rsc_op != NULL) { crm_log_xml_debug_2(rsc_op, "Performing"); } cmd = create_request(CRM_OP_INVOKE_LRM, rsc_op, on_node, CRM_SYSTEM_LRMD, CRM_SYSTEM_TENGINE, NULL); send_cluster_message(on_node, crm_proc_lrmd, cmd, TRUE); free_xml(cmd); action->executed = TRUE; value = g_hash_table_lookup(action->params, crm_meta_name(XML_ATTR_TE_NOWAIT)); if(crm_is_true(value)) { crm_debug("Skipping wait for %d", action->id); action->confirmed = TRUE; update_graph(transition_graph, action); trigger_graph(); } else if(action->timeout > 0) { int action_timeout = (2 * action->timeout) + transition_graph->network_delay; crm_debug_3("Setting timer for action %s", task_uuid); if(transition_graph->transition_timeout < action_timeout) { crm_debug("Action %d:" " Increasing transition %d timeout to %d (2*%d + %d)", action->id, transition_graph->id, action_timeout, action->timeout, transition_graph->network_delay); transition_graph->transition_timeout = action_timeout; } te_start_action_timer(action); } } crm_graph_functions_t te_graph_fns = { te_pseudo_action, te_rsc_command, te_crm_command, te_fence_node }; void notify_crmd(crm_graph_t *graph) { int log_level = LOG_DEBUG; const char *type = "done"; crm_debug("Processing transition completion in state %s", fsa_state2string(fsa_state)); stop_te_timer(transition_timer); CRM_CHECK(graph->complete, graph->complete = TRUE); switch(graph->completion_action) { case tg_stop: type = "stop"; /* fall through */ case tg_done: log_level = LOG_INFO; clear_bit_inplace(fsa_input_register, R_IN_TRANSITION); if(fsa_state == S_TRANSITION_ENGINE) { register_fsa_input(C_FSA_INTERNAL, I_TE_SUCCESS, NULL); } break; case tg_restart: type = "restart"; clear_bit_inplace(fsa_input_register, R_IN_TRANSITION); if(fsa_state == S_TRANSITION_ENGINE) { register_fsa_input(C_FSA_INTERNAL, I_PE_CALC, NULL); } else if(fsa_state == S_POLICY_ENGINE) { register_fsa_action(A_PE_INVOKE); } break; case tg_shutdown: crm_info("Exiting after transition"); return; } te_log_action(log_level, "Transition %d status: %s - %s", graph->id, type, crm_str(graph->abort_reason)); graph->abort_reason = NULL; graph->completion_action = tg_done; } diff --git a/crmd/te_callbacks.c b/crmd/te_callbacks.c index 049d3dc24b..5637a57db6 100644 --- a/crmd/te_callbacks.c +++ b/crmd/te_callbacks.c @@ -1,475 +1,476 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include void te_update_confirm(const char *event, xmlNode *msg); xmlNode *need_abort(xmlNode *update); -void cib_fencing_updated(xmlNode *msg, int call_id, int rc, - xmlNode *output, void *user_data); extern char *te_uuid; gboolean shuttingdown = FALSE; crm_graph_t *transition_graph; GTRIGSource *transition_trigger = NULL; crm_action_timer_t *transition_timer = NULL; void te_update_diff(const char *event, xmlNode *msg) { int rc = -1; const char *op = NULL; const char *set_name = NULL; xmlNode *diff = NULL; xmlNode *aborted = NULL; int diff_add_updates = 0; int diff_add_epoch = 0; int diff_add_admin_epoch = 0; int diff_del_updates = 0; int diff_del_epoch = 0; int diff_del_admin_epoch = 0; CRM_CHECK(msg != NULL, return); crm_element_value_int(msg, F_CIB_RC, &rc); if(transition_graph == NULL) { return; } else if(rc < cib_ok) { return; } else if(transition_graph->complete != TRUE && fsa_state != S_IDLE && fsa_state != S_TRANSITION_ENGINE && fsa_state != S_POLICY_ENGINE) { return; } op = crm_element_value(msg, F_CIB_OPERATION); diff = get_message_xml(msg, F_CIB_UPDATE_RESULT); cib_diff_version_details( diff, &diff_add_admin_epoch, &diff_add_epoch, &diff_add_updates, &diff_del_admin_epoch, &diff_del_epoch, &diff_del_updates); crm_debug("Processing diff (%s): %d.%d.%d -> %d.%d.%d (%s)", op, diff_del_admin_epoch,diff_del_epoch,diff_del_updates, diff_add_admin_epoch,diff_add_epoch,diff_add_updates, fsa_state2string(fsa_state)); log_cib_diff(LOG_DEBUG_2, diff, op); set_name = "diff-added"; if(diff != NULL) { xmlNode *section = NULL; xmlNode *change_set = find_xml_node(diff, set_name, FALSE); change_set = find_xml_node(change_set, XML_TAG_CIB, FALSE); if(change_set != NULL) { crm_debug_2("Checking status changes"); section=get_object_root(XML_CIB_TAG_STATUS,change_set); } if(section != NULL) { extract_event(section); } crm_debug_2("Checking change set: %s", set_name); aborted = need_abort(change_set); } set_name = "diff-removed"; if(diff != NULL && aborted == NULL) { xmlNode *attrs = NULL; xmlNode *status = NULL; xmlNode *change_set = find_xml_node(diff, set_name, FALSE); change_set = find_xml_node(change_set, XML_TAG_CIB, FALSE); crm_debug_2("Checking change set: %s", set_name); aborted = need_abort(change_set); if(aborted == NULL && change_set != NULL) { status = get_object_root(XML_CIB_TAG_STATUS, change_set); xml_child_iter_filter( status, node_state, XML_CIB_TAG_STATE, attrs = find_xml_node( node_state, XML_TAG_TRANSIENT_NODEATTRS, FALSE); if(attrs != NULL) { crm_info("Aborting on "XML_TAG_TRANSIENT_NODEATTRS" deletions"); abort_transition(INFINITY, tg_restart, XML_TAG_TRANSIENT_NODEATTRS, attrs); } ); } } if(aborted != NULL) { abort_transition( INFINITY, tg_restart, "Non-status change", NULL); } return; } gboolean process_te_message(xmlNode *msg, xmlNode *xml_data) { xmlNode *xml_obj = NULL; const char *from = crm_element_value(msg, F_ORIG); const char *sys_to = crm_element_value(msg, F_CRM_SYS_TO); const char *sys_from = crm_element_value(msg, F_CRM_SYS_FROM); const char *ref = crm_element_value(msg, XML_ATTR_REFERENCE); const char *op = crm_element_value(msg, F_CRM_TASK); const char *type = crm_element_value(msg, F_CRM_MSG_TYPE); crm_debug_2("Processing %s (%s) message", op, ref); crm_log_xml(LOG_DEBUG_3, "ipc", msg); if(op == NULL){ /* error */ } else if(sys_to == NULL || strcasecmp(sys_to, CRM_SYSTEM_TENGINE) != 0) { crm_debug_2("Bad sys-to %s", crm_str(sys_to)); return FALSE; } else if(safe_str_eq(op, CRM_OP_INVOKE_LRM) && safe_str_eq(sys_from, CRM_SYSTEM_LRMD) /* && safe_str_eq(type, XML_ATTR_RESPONSE) */ ){ xml_obj = xml_data; CRM_CHECK(xml_obj != NULL, crm_log_xml(LOG_ERR, "Invalid (N)ACK", msg); return FALSE); CRM_CHECK(xml_obj != NULL, crm_log_xml(LOG_ERR, "Invalid (N)ACK", msg); return FALSE); xml_obj = get_object_root(XML_CIB_TAG_STATUS, xml_obj); CRM_CHECK(xml_obj != NULL, crm_log_xml(LOG_ERR, "Invalid (N)ACK", msg); return FALSE); crm_log_xml(LOG_DEBUG_2, "Processing (N)ACK", msg); crm_info("Processing (N)ACK %s from %s", crm_element_value(msg, XML_ATTR_REFERENCE), from); extract_event(xml_obj); } else { crm_err("Unknown command: %s::%s from %s", type, op, sys_from); } crm_debug_3("finished processing message"); return TRUE; } #if SUPPORT_HEARTBEAT void tengine_stonith_callback(stonith_ops_t * op) { const char *allow_fail = NULL; int target_rc = -1; int stonith_id = -1; int transition_id = -1; char *uuid = NULL; crm_action_t *stonith_action = NULL; if(op == NULL) { crm_err("Called with a NULL op!"); return; } crm_info("call=%d, optype=%d, node_name=%s, result=%d, node_list=%s, action=%s", op->call_id, op->optype, op->node_name, op->op_result, (char *)op->node_list, op->private_data); /* this will mark the event complete if a match is found */ CRM_CHECK(op->private_data != NULL, return); /* filter out old STONITH actions */ CRM_CHECK(decode_transition_key( op->private_data, &uuid, &transition_id, &stonith_id, &target_rc), crm_err("Invalid event detected"); goto bail; ); if(transition_graph->complete || stonith_id < 0 || safe_str_neq(uuid, te_uuid) || transition_graph->id != transition_id) { crm_info("Ignoring STONITH action initiated outside" " of the current transition"); } stonith_action = get_action(stonith_id, TRUE); if(stonith_action == NULL) { crm_err("Stonith action not matched"); goto bail; } switch(op->op_result) { case STONITH_SUCCEEDED: send_stonith_update(op); break; case STONITH_CANNOT: case STONITH_TIMEOUT: case STONITH_GENERIC: stonith_action->failed = TRUE; allow_fail = g_hash_table_lookup( stonith_action->params, crm_meta_name(XML_ATTR_TE_ALLOWFAIL)); if(FALSE == crm_is_true(allow_fail)) { crm_err("Stonith of %s failed (%d)..." " aborting transition.", op->node_name, op->op_result); abort_transition(INFINITY, tg_restart, "Stonith failed", NULL); } break; default: crm_err("Unsupported action result: %d", op->op_result); abort_transition(INFINITY, tg_restart, "Unsupport Stonith result", NULL); } update_graph(transition_graph, stonith_action); trigger_graph(); bail: crm_free(uuid); return; } void tengine_stonith_connection_destroy(gpointer user_data) { crm_err("Fencing daemon has left us"); stonith_src = NULL; if(stonith_src == NULL) { G_main_set_trigger(stonith_reconnect); } /* cbchan will be garbage at this point, arrange for it to be reset */ set_stonithd_input_IPC_channel_NULL(); return; } gboolean tengine_stonith_dispatch(IPC_Channel *sender, void *user_data) { int lpc = 0; while(stonithd_op_result_ready()) { if (sender->ch_status == IPC_DISCONNECT) { /* The message which was pending for us is that * the IPC status is now IPC_DISCONNECT */ break; } if(ST_FAIL == stonithd_receive_ops_result(FALSE)) { crm_err("stonithd_receive_ops_result() failed"); } else { lpc++; } } crm_debug_2("Processed %d messages", lpc); if (sender->ch_status == IPC_DISCONNECT) { return FALSE; } return TRUE; } #endif void cib_fencing_updated(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) { - if(rc < cib_ok) { - crm_err("CIB update failed: %s", cib_error2string(rc)); - crm_log_xml_warn(msg, "Failed update"); - } + if(rc < cib_ok) { + crm_err("CIB update failed: %s", cib_error2string(rc)); + crm_log_xml_warn(msg, "Failed update"); + } else { + erase_status_tag(user_data, XML_CIB_TAG_LRM); + } + crm_free(user_data); } void cib_action_updated(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) { if(rc < cib_ok) { crm_err("Update %d FAILED: %s", call_id, cib_error2string(rc)); } } void cib_failcount_updated(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) { if(rc < cib_ok) { crm_err("Update %d FAILED: %s", call_id, cib_error2string(rc)); } } gboolean action_timer_callback(gpointer data) { crm_action_timer_t *timer = NULL; if(data == NULL) { crm_err("Timer popped with no data"); return FALSE; } timer = (crm_action_timer_t*)data; stop_te_timer(timer); crm_warn("Timer popped (abort_level=%d, complete=%s)", transition_graph->abort_priority, transition_graph->complete?"true":"false"); CRM_CHECK(timer->action != NULL, return FALSE); if(transition_graph->complete) { crm_warn("Ignoring timeout while not in transition"); } else if(timer->reason == timeout_action_warn) { print_action( LOG_WARNING,"Action missed its timeout: ", timer->action); } else { /* fail the action */ cib_action_update(timer->action, LRM_OP_TIMEOUT); } return FALSE; } static int unconfirmed_actions(gboolean send_updates) { int unconfirmed = 0; const char *key = NULL; const char *task = NULL; const char *node = NULL; crm_debug_2("Unconfirmed actions..."); slist_iter( synapse, synapse_t, transition_graph->synapses, lpc, /* lookup event */ slist_iter( action, crm_action_t, synapse->actions, lpc2, if(action->executed == FALSE) { continue; } else if(action->confirmed) { continue; } unconfirmed++; task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); key = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY); crm_info("Action %s %d unconfirmed from %s", key, action->id, node); if(action->type != action_type_rsc) { continue; } else if(send_updates == FALSE) { continue; } else if(safe_str_eq(task, "cancel")) { /* we dont need to update the CIB with these */ continue; } else if(safe_str_eq(task, "stop")) { /* *never* update the CIB with these */ continue; } cib_action_update(action, LRM_OP_PENDING); ); ); if(unconfirmed > 0) { crm_warn("Waiting on %d unconfirmed actions", unconfirmed); } return unconfirmed; } gboolean global_timer_callback(gpointer data) { crm_action_timer_t *timer = NULL; if(data == NULL) { crm_err("Timer popped with no data"); return FALSE; } timer = (crm_action_timer_t*)data; stop_te_timer(timer); crm_warn("Timer popped (abort_level=%d, complete=%s)", transition_graph->abort_priority, transition_graph->complete?"true":"false"); CRM_CHECK(timer->action == NULL, return FALSE); if(transition_graph->complete) { crm_err("Ignoring timeout while not in transition"); } else if(timer->reason == timeout_abort) { int unconfirmed = unconfirmed_actions(FALSE); crm_warn("Transition abort timeout reached..." " marking transition complete."); transition_graph->complete = TRUE; abort_transition(INFINITY, tg_restart, "Global Timeout", NULL); if(unconfirmed != 0) { crm_warn("Writing %d unconfirmed actions to the CIB", unconfirmed); unconfirmed_actions(TRUE); } } return FALSE; } diff --git a/crmd/utils.c b/crmd/utils.c index a63a93f62d..14861045d1 100644 --- a/crmd/utils.c +++ b/crmd/utils.c @@ -1,1198 +1,1212 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* A_DC_TIMER_STOP, A_DC_TIMER_START, * A_FINALIZE_TIMER_STOP, A_FINALIZE_TIMER_START * A_INTEGRATE_TIMER_STOP, A_INTEGRATE_TIMER_START */ void do_timer_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { gboolean timer_op_ok = TRUE; if(action & A_DC_TIMER_STOP) { timer_op_ok = crm_timer_stop(election_trigger); } else if(action & A_FINALIZE_TIMER_STOP) { timer_op_ok = crm_timer_stop(finalization_timer); } else if(action & A_INTEGRATE_TIMER_STOP) { timer_op_ok = crm_timer_stop(integration_timer); /* } else if(action & A_ELECTION_TIMEOUT_STOP) { */ /* timer_op_ok = crm_timer_stop(election_timeout); */ } /* dont start a timer that wasnt already running */ if(action & A_DC_TIMER_START && timer_op_ok) { crm_timer_start(election_trigger); if(AM_I_DC) { /* there can be only one */ register_fsa_input(cause, I_ELECTION, NULL); } } else if(action & A_FINALIZE_TIMER_START) { crm_timer_start(finalization_timer); } else if(action & A_INTEGRATE_TIMER_START) { crm_timer_start(integration_timer); /* } else if(action & A_ELECTION_TIMEOUT_START) { */ /* crm_timer_start(election_timeout); */ } } static const char * get_timer_desc(fsa_timer_t *timer) { if(timer == election_trigger) { return "Election Trigger"; } else if(timer == election_timeout) { return "Election Timeout"; } else if(timer == shutdown_escalation_timer) { return "Shutdown Escalation"; } else if(timer == integration_timer) { return "Integration Timer"; } else if(timer == finalization_timer) { return "Finalization Timer"; } else if(timer == wait_timer) { return "Wait Timer"; } else if(timer == recheck_timer) { return "PEngine Recheck Timer"; } return "Unknown Timer"; } gboolean crm_timer_popped(gpointer data) { fsa_timer_t *timer = (fsa_timer_t *)data; if(timer == wait_timer || timer == recheck_timer || timer == finalization_timer || timer == election_trigger) { crm_info("%s (%s) just popped!", get_timer_desc(timer), fsa_input2string(timer->fsa_input)); } else { crm_err("%s (%s) just popped!", get_timer_desc(timer), fsa_input2string(timer->fsa_input)); } if(timer->repeat == FALSE) { crm_timer_stop(timer); /* make it _not_ go off again */ } if(timer->fsa_input == I_INTEGRATED) { register_fsa_input_before( C_TIMER_POPPED, timer->fsa_input, NULL); } else if(timer->fsa_input == I_PE_CALC && fsa_state != S_IDLE) { crm_debug("Discarding %s event in state: %s", fsa_input2string(timer->fsa_input), fsa_state2string(fsa_state)); } else if(timer->fsa_input == I_FINALIZED && fsa_state != S_FINALIZE_JOIN) { crm_debug("Discarding %s event in state: %s", fsa_input2string(timer->fsa_input), fsa_state2string(fsa_state)); } else if(timer->fsa_input != I_NULL) { register_fsa_input(C_TIMER_POPPED, timer->fsa_input, NULL); } crm_debug_3("Triggering FSA: %s", __FUNCTION__); G_main_set_trigger(fsa_source); return TRUE; } gboolean crm_timer_start(fsa_timer_t *timer) { const char *timer_desc = get_timer_desc(timer); if(timer->source_id == 0 && timer->period_ms > 0) { timer->source_id = Gmain_timeout_add( timer->period_ms, timer->callback, (void*)timer); CRM_ASSERT(timer->source_id != 0); crm_debug("Started %s (%s:%dms), src=%d", timer_desc, fsa_input2string(timer->fsa_input), timer->period_ms, timer->source_id); } else if(timer->period_ms < 0) { crm_err("Tried to start %s (%s:%dms) with a -ve period", timer_desc, fsa_input2string(timer->fsa_input), timer->period_ms); } else { crm_debug("%s (%s:%dms) already running: src=%d", timer_desc, fsa_input2string(timer->fsa_input), timer->period_ms, timer->source_id); return FALSE; } return TRUE; } gboolean crm_timer_stop(fsa_timer_t *timer) { const char *timer_desc = get_timer_desc(timer); if(timer == NULL) { crm_err("Attempted to stop NULL timer"); return FALSE; } else if(timer->source_id != 0) { crm_debug("Stopping %s (%s:%dms), src=%d", timer_desc, fsa_input2string(timer->fsa_input), timer->period_ms, timer->source_id); Gmain_timeout_remove(timer->source_id); timer->source_id = 0; } else { crm_debug_2("%s (%s:%dms) already stopped", timer_desc, fsa_input2string(timer->fsa_input), timer->period_ms); return FALSE; } return TRUE; } const char * fsa_input2string(enum crmd_fsa_input input) { const char *inputAsText = NULL; switch(input){ case I_NULL: inputAsText = "I_NULL"; break; case I_CIB_OP: inputAsText = "I_CIB_OP"; break; case I_CIB_UPDATE: inputAsText = "I_CIB_UPDATE"; break; case I_DC_TIMEOUT: inputAsText = "I_DC_TIMEOUT"; break; case I_ELECTION: inputAsText = "I_ELECTION"; break; case I_PE_CALC: inputAsText = "I_PE_CALC"; break; case I_RELEASE_DC: inputAsText = "I_RELEASE_DC"; break; case I_ELECTION_DC: inputAsText = "I_ELECTION_DC"; break; case I_ERROR: inputAsText = "I_ERROR"; break; case I_FAIL: inputAsText = "I_FAIL"; break; case I_INTEGRATED: inputAsText = "I_INTEGRATED"; break; case I_FINALIZED: inputAsText = "I_FINALIZED"; break; case I_NODE_JOIN: inputAsText = "I_NODE_JOIN"; break; case I_JOIN_OFFER: inputAsText = "I_JOIN_OFFER"; break; case I_JOIN_REQUEST: inputAsText = "I_JOIN_REQUEST"; break; case I_JOIN_RESULT: inputAsText = "I_JOIN_RESULT"; break; case I_NOT_DC: inputAsText = "I_NOT_DC"; break; case I_RECOVERED: inputAsText = "I_RECOVERED"; break; case I_RELEASE_FAIL: inputAsText = "I_RELEASE_FAIL"; break; case I_RELEASE_SUCCESS: inputAsText = "I_RELEASE_SUCCESS"; break; case I_RESTART: inputAsText = "I_RESTART"; break; case I_PE_SUCCESS: inputAsText = "I_PE_SUCCESS"; break; case I_ROUTER: inputAsText = "I_ROUTER"; break; case I_SHUTDOWN: inputAsText = "I_SHUTDOWN"; break; case I_STARTUP: inputAsText = "I_STARTUP"; break; case I_TE_SUCCESS: inputAsText = "I_TE_SUCCESS"; break; case I_STOP: inputAsText = "I_STOP"; break; case I_DC_HEARTBEAT: inputAsText = "I_DC_HEARTBEAT"; break; case I_WAIT_FOR_EVENT: inputAsText = "I_WAIT_FOR_EVENT"; break; case I_LRM_EVENT: inputAsText = "I_LRM_EVENT"; break; case I_PENDING: inputAsText = "I_PENDING"; break; case I_HALT: inputAsText = "I_HALT"; break; case I_TERMINATE: inputAsText = "I_TERMINATE"; break; case I_ILLEGAL: inputAsText = "I_ILLEGAL"; break; } if(inputAsText == NULL) { crm_err("Input %d is unknown", input); inputAsText = ""; } return inputAsText; } const char * fsa_state2string(enum crmd_fsa_state state) { const char *stateAsText = NULL; switch(state){ case S_IDLE: stateAsText = "S_IDLE"; break; case S_ELECTION: stateAsText = "S_ELECTION"; break; case S_INTEGRATION: stateAsText = "S_INTEGRATION"; break; case S_FINALIZE_JOIN: stateAsText = "S_FINALIZE_JOIN"; break; case S_NOT_DC: stateAsText = "S_NOT_DC"; break; case S_POLICY_ENGINE: stateAsText = "S_POLICY_ENGINE"; break; case S_RECOVERY: stateAsText = "S_RECOVERY"; break; case S_RELEASE_DC: stateAsText = "S_RELEASE_DC"; break; case S_PENDING: stateAsText = "S_PENDING"; break; case S_STOPPING: stateAsText = "S_STOPPING"; break; case S_TERMINATE: stateAsText = "S_TERMINATE"; break; case S_TRANSITION_ENGINE: stateAsText = "S_TRANSITION_ENGINE"; break; case S_STARTING: stateAsText = "S_STARTING"; break; case S_HALT: stateAsText = "S_HALT"; break; case S_ILLEGAL: stateAsText = "S_ILLEGAL"; break; } if(stateAsText == NULL) { crm_err("State %d is unknown", state); stateAsText = ""; } return stateAsText; } const char * fsa_cause2string(enum crmd_fsa_cause cause) { const char *causeAsText = NULL; switch(cause){ case C_UNKNOWN: causeAsText = "C_UNKNOWN"; break; case C_STARTUP: causeAsText = "C_STARTUP"; break; case C_IPC_MESSAGE: causeAsText = "C_IPC_MESSAGE"; break; case C_HA_MESSAGE: causeAsText = "C_HA_MESSAGE"; break; case C_CCM_CALLBACK: causeAsText = "C_CCM_CALLBACK"; break; case C_TIMER_POPPED: causeAsText = "C_TIMER_POPPED"; break; case C_SHUTDOWN: causeAsText = "C_SHUTDOWN"; break; case C_HEARTBEAT_FAILED: causeAsText = "C_HEARTBEAT_FAILED"; break; case C_SUBSYSTEM_CONNECT: causeAsText = "C_SUBSYSTEM_CONNECT"; break; case C_LRM_OP_CALLBACK: causeAsText = "C_LRM_OP_CALLBACK"; break; case C_LRM_MONITOR_CALLBACK: causeAsText = "C_LRM_MONITOR_CALLBACK"; break; case C_CRMD_STATUS_CALLBACK: causeAsText = "C_CRMD_STATUS_CALLBACK"; break; case C_HA_DISCONNECT: causeAsText = "C_HA_DISCONNECT"; break; case C_FSA_INTERNAL: causeAsText = "C_FSA_INTERNAL"; break; case C_ILLEGAL: causeAsText = "C_ILLEGAL"; break; } if(causeAsText == NULL) { crm_err("Cause %d is unknown", cause); causeAsText = ""; } return causeAsText; } const char * fsa_action2string(long long action) { const char *actionAsText = NULL; switch(action){ case A_NOTHING: actionAsText = "A_NOTHING"; break; case A_ELECTION_START: actionAsText = "A_ELECTION_START"; break; case A_READCONFIG: actionAsText = "A_READCONFIG"; break; case O_RELEASE: actionAsText = "O_RELEASE"; break; case A_STARTUP: actionAsText = "A_STARTUP"; break; case A_STARTED: actionAsText = "A_STARTED"; break; case A_HA_CONNECT: actionAsText = "A_HA_CONNECT"; break; case A_HA_DISCONNECT: actionAsText = "A_HA_DISCONNECT"; break; case A_LRM_CONNECT: actionAsText = "A_LRM_CONNECT"; break; case A_LRM_EVENT: actionAsText = "A_LRM_EVENT"; break; case A_LRM_INVOKE: actionAsText = "A_LRM_INVOKE"; break; case A_LRM_DISCONNECT: actionAsText = "A_LRM_DISCONNECT"; break; case A_CL_JOIN_QUERY: actionAsText = "A_CL_JOIN_QUERY"; break; case A_DC_TIMER_STOP: actionAsText = "A_DC_TIMER_STOP"; break; case A_DC_TIMER_START: actionAsText = "A_DC_TIMER_START"; break; case A_INTEGRATE_TIMER_START: actionAsText = "A_INTEGRATE_TIMER_START"; break; case A_INTEGRATE_TIMER_STOP: actionAsText = "A_INTEGRATE_TIMER_STOP"; break; case A_FINALIZE_TIMER_START: actionAsText = "A_FINALIZE_TIMER_START"; break; case A_FINALIZE_TIMER_STOP: actionAsText = "A_FINALIZE_TIMER_STOP"; break; case A_ELECTION_COUNT: actionAsText = "A_ELECTION_COUNT"; break; case A_ELECTION_VOTE: actionAsText = "A_ELECTION_VOTE"; break; case A_ELECTION_CHECK: actionAsText = "A_ELECTION_CHECK"; break; case A_CL_JOIN_ANNOUNCE: actionAsText = "A_CL_JOIN_ANNOUNCE"; break; case A_CL_JOIN_REQUEST: actionAsText = "A_CL_JOIN_REQUEST"; break; case A_CL_JOIN_RESULT: actionAsText = "A_CL_JOIN_RESULT"; break; case A_DC_JOIN_OFFER_ALL: actionAsText = "A_DC_JOIN_OFFER_ALL"; break; case A_DC_JOIN_OFFER_ONE: actionAsText = "A_DC_JOIN_OFFER_ONE"; break; case A_DC_JOIN_PROCESS_REQ: actionAsText = "A_DC_JOIN_PROCESS_REQ"; break; case A_DC_JOIN_PROCESS_ACK: actionAsText = "A_DC_JOIN_PROCESS_ACK"; break; case A_DC_JOIN_FINALIZE: actionAsText = "A_DC_JOIN_FINALIZE"; break; case A_MSG_PROCESS: actionAsText = "A_MSG_PROCESS"; break; case A_MSG_ROUTE: actionAsText = "A_MSG_ROUTE"; break; case A_RECOVER: actionAsText = "A_RECOVER"; break; case A_DC_RELEASE: actionAsText = "A_DC_RELEASE"; break; case A_DC_RELEASED: actionAsText = "A_DC_RELEASED"; break; case A_DC_TAKEOVER: actionAsText = "A_DC_TAKEOVER"; break; case A_SHUTDOWN: actionAsText = "A_SHUTDOWN"; break; case A_SHUTDOWN_REQ: actionAsText = "A_SHUTDOWN_REQ"; break; case A_STOP: actionAsText = "A_STOP "; break; case A_EXIT_0: actionAsText = "A_EXIT_0"; break; case A_EXIT_1: actionAsText = "A_EXIT_1"; break; case A_CCM_CONNECT: actionAsText = "A_CCM_CONNECT"; break; case A_CCM_DISCONNECT: actionAsText = "A_CCM_DISCONNECT"; break; case A_CIB_BUMPGEN: actionAsText = "A_CIB_BUMPGEN"; break; case A_CIB_INVOKE: actionAsText = "A_CIB_INVOKE"; break; case O_CIB_RESTART: actionAsText = "O_CIB_RESTART"; break; case A_CIB_START: actionAsText = "A_CIB_START"; break; case A_CIB_STOP: actionAsText = "A_CIB_STOP"; break; case A_TE_INVOKE: actionAsText = "A_TE_INVOKE"; break; case O_TE_RESTART: actionAsText = "O_TE_RESTART"; break; case A_TE_START: actionAsText = "A_TE_START"; break; case A_TE_STOP: actionAsText = "A_TE_STOP"; break; case A_TE_HALT: actionAsText = "A_TE_HALT"; break; case A_TE_CANCEL: actionAsText = "A_TE_CANCEL"; break; case A_PE_INVOKE: actionAsText = "A_PE_INVOKE"; break; case O_PE_RESTART: actionAsText = "O_PE_RESTART"; break; case A_PE_START: actionAsText = "A_PE_START"; break; case A_PE_STOP: actionAsText = "A_PE_STOP"; break; case A_NODE_BLOCK: actionAsText = "A_NODE_BLOCK"; break; case A_UPDATE_NODESTATUS: actionAsText = "A_UPDATE_NODESTATUS"; break; case A_LOG: actionAsText = "A_LOG "; break; case A_ERROR: actionAsText = "A_ERROR "; break; case A_WARN: actionAsText = "A_WARN "; break; } if(actionAsText == NULL) { crm_err("Action %.16llx is unknown", action); actionAsText = ""; } return actionAsText; } void fsa_dump_inputs(int log_level, const char *text, long long input_register) { if(input_register == A_NOTHING) { return; } if(text == NULL) { text = "Input register contents:"; } if(is_set(input_register, R_THE_DC)) { do_crm_log(log_level, "%s %.16llx (R_THE_DC)", text, R_THE_DC); } if(is_set(input_register, R_STARTING)) { do_crm_log(log_level, "%s %.16llx (R_STARTING)", text, R_STARTING); } if(is_set(input_register, R_SHUTDOWN)) { do_crm_log(log_level, "%s %.16llx (R_SHUTDOWN)", text, R_SHUTDOWN); } if(is_set(input_register, R_STAYDOWN)) { do_crm_log(log_level, "%s %.16llx (R_STAYDOWN)", text, R_STAYDOWN); } if(is_set(input_register, R_JOIN_OK)) { do_crm_log(log_level, "%s %.16llx (R_JOIN_OK)", text, R_JOIN_OK); } if(is_set(input_register, R_READ_CONFIG)) { do_crm_log(log_level, "%s %.16llx (R_READ_CONFIG)", text, R_READ_CONFIG); } if(is_set(input_register, R_INVOKE_PE)) { do_crm_log(log_level, "%s %.16llx (R_INVOKE_PE)", text, R_INVOKE_PE); } if(is_set(input_register, R_CIB_CONNECTED)) { do_crm_log(log_level, "%s %.16llx (R_CIB_CONNECTED)", text, R_CIB_CONNECTED); } if(is_set(input_register, R_PE_CONNECTED)) { do_crm_log(log_level, "%s %.16llx (R_PE_CONNECTED)", text, R_PE_CONNECTED); } if(is_set(input_register, R_TE_CONNECTED)) { do_crm_log(log_level, "%s %.16llx (R_TE_CONNECTED)", text, R_TE_CONNECTED); } if(is_set(input_register, R_LRM_CONNECTED)) { do_crm_log(log_level, "%s %.16llx (R_LRM_CONNECTED)", text, R_LRM_CONNECTED); } if(is_set(input_register, R_CIB_REQUIRED)) { do_crm_log(log_level, "%s %.16llx (R_CIB_REQUIRED)", text, R_CIB_REQUIRED); } if(is_set(input_register, R_PE_REQUIRED)) { do_crm_log(log_level, "%s %.16llx (R_PE_REQUIRED)", text, R_PE_REQUIRED); } if(is_set(input_register, R_TE_REQUIRED)) { do_crm_log(log_level, "%s %.16llx (R_TE_REQUIRED)", text, R_TE_REQUIRED); } if(is_set(input_register, R_REQ_PEND)) { do_crm_log(log_level, "%s %.16llx (R_REQ_PEND)", text, R_REQ_PEND); } if(is_set(input_register, R_PE_PEND)) { do_crm_log(log_level, "%s %.16llx (R_PE_PEND)", text, R_PE_PEND); } if(is_set(input_register, R_TE_PEND)) { do_crm_log(log_level, "%s %.16llx (R_TE_PEND)", text, R_TE_PEND); } if(is_set(input_register, R_RESP_PEND)) { do_crm_log(log_level, "%s %.16llx (R_RESP_PEND)", text, R_RESP_PEND); } if(is_set(input_register, R_CIB_DONE)) { do_crm_log(log_level, "%s %.16llx (R_CIB_DONE)", text, R_CIB_DONE); } if(is_set(input_register, R_HAVE_CIB)) { do_crm_log(log_level, "%s %.16llx (R_HAVE_CIB)", text, R_HAVE_CIB); } if(is_set(input_register, R_CIB_ASKED)) { do_crm_log(log_level, "%s %.16llx (R_CIB_ASKED)", text, R_CIB_ASKED); } if(is_set(input_register, R_CCM_DATA)) { do_crm_log(log_level, "%s %.16llx (R_CCM_DATA)", text, R_CCM_DATA); } if(is_set(input_register, R_PEER_DATA)) { do_crm_log(log_level, "%s %.16llx (R_PEER_DATA)", text, R_PEER_DATA); } if(is_set(input_register, R_IN_RECOVERY)) { do_crm_log(log_level, "%s %.16llx (R_IN_RECOVERY)", text, R_IN_RECOVERY); } } void fsa_dump_actions(long long action, const char *text) { int log_level = LOG_DEBUG_3; if(is_set(action, A_READCONFIG)) { do_crm_log(log_level, "Action %.16llx (A_READCONFIG) %s", A_READCONFIG, text); } if(is_set(action, A_STARTUP)) { do_crm_log(log_level, "Action %.16llx (A_STARTUP) %s", A_STARTUP, text); } if(is_set(action, A_STARTED)) { do_crm_log(log_level, "Action %.16llx (A_STARTED) %s", A_STARTED, text); } if(is_set(action, A_HA_CONNECT)) { do_crm_log(log_level, "Action %.16llx (A_CONNECT) %s", A_HA_CONNECT, text); } if(is_set(action, A_HA_DISCONNECT)) { do_crm_log(log_level, "Action %.16llx (A_DISCONNECT) %s", A_HA_DISCONNECT, text); } if(is_set(action, A_LRM_CONNECT)) { do_crm_log(log_level, "Action %.16llx (A_LRM_CONNECT) %s", A_LRM_CONNECT, text); } if(is_set(action, A_LRM_EVENT)) { do_crm_log(log_level, "Action %.16llx (A_LRM_EVENT) %s", A_LRM_EVENT, text); } if(is_set(action, A_LRM_INVOKE)) { do_crm_log(log_level, "Action %.16llx (A_LRM_INVOKE) %s", A_LRM_INVOKE, text); } if(is_set(action, A_LRM_DISCONNECT)) { do_crm_log(log_level, "Action %.16llx (A_LRM_DISCONNECT) %s", A_LRM_DISCONNECT, text); } if(is_set(action, A_DC_TIMER_STOP)) { do_crm_log(log_level, "Action %.16llx (A_DC_TIMER_STOP) %s", A_DC_TIMER_STOP, text); } if(is_set(action, A_DC_TIMER_START)) { do_crm_log(log_level, "Action %.16llx (A_DC_TIMER_START) %s", A_DC_TIMER_START, text); } if(is_set(action, A_INTEGRATE_TIMER_START)) { do_crm_log(log_level, "Action %.16llx (A_INTEGRATE_TIMER_START) %s", A_INTEGRATE_TIMER_START, text); } if(is_set(action, A_INTEGRATE_TIMER_STOP)) { do_crm_log(log_level, "Action %.16llx (A_INTEGRATE_TIMER_STOP) %s", A_INTEGRATE_TIMER_STOP, text); } if(is_set(action, A_FINALIZE_TIMER_START)) { do_crm_log(log_level, "Action %.16llx (A_FINALIZE_TIMER_START) %s", A_FINALIZE_TIMER_START, text); } if(is_set(action, A_FINALIZE_TIMER_STOP)) { do_crm_log(log_level, "Action %.16llx (A_FINALIZE_TIMER_STOP) %s", A_FINALIZE_TIMER_STOP, text); } if(is_set(action, A_ELECTION_COUNT)) { do_crm_log(log_level, "Action %.16llx (A_ELECTION_COUNT) %s", A_ELECTION_COUNT, text); } if(is_set(action, A_ELECTION_VOTE)) { do_crm_log(log_level, "Action %.16llx (A_ELECTION_VOTE) %s", A_ELECTION_VOTE, text); } if(is_set(action, A_ELECTION_CHECK)) { do_crm_log(log_level, "Action %.16llx (A_ELECTION_CHECK) %s", A_ELECTION_CHECK, text); } if(is_set(action, A_CL_JOIN_ANNOUNCE)) { do_crm_log(log_level, "Action %.16llx (A_CL_JOIN_ANNOUNCE) %s", A_CL_JOIN_ANNOUNCE, text); } if(is_set(action, A_CL_JOIN_REQUEST)) { do_crm_log(log_level, "Action %.16llx (A_CL_JOIN_REQUEST) %s", A_CL_JOIN_REQUEST, text); } if(is_set(action, A_CL_JOIN_RESULT)) { do_crm_log(log_level, "Action %.16llx (A_CL_JOIN_RESULT) %s", A_CL_JOIN_RESULT, text); } if(is_set(action, A_DC_JOIN_OFFER_ALL)) { do_crm_log(log_level, "Action %.16llx (A_DC_JOIN_OFFER_ALL) %s", A_DC_JOIN_OFFER_ALL, text); } if(is_set(action, A_DC_JOIN_OFFER_ONE)) { do_crm_log(log_level, "Action %.16llx (A_DC_JOIN_OFFER_ONE) %s", A_DC_JOIN_OFFER_ONE, text); } if(is_set(action, A_DC_JOIN_PROCESS_REQ)) { do_crm_log(log_level, "Action %.16llx (A_DC_JOIN_PROCESS_REQ) %s", A_DC_JOIN_PROCESS_REQ, text); } if(is_set(action, A_DC_JOIN_PROCESS_ACK)) { do_crm_log(log_level, "Action %.16llx (A_DC_JOIN_PROCESS_ACK) %s", A_DC_JOIN_PROCESS_ACK, text); } if(is_set(action, A_DC_JOIN_FINALIZE)) { do_crm_log(log_level, "Action %.16llx (A_DC_JOIN_FINALIZE) %s", A_DC_JOIN_FINALIZE, text); } if(is_set(action, A_MSG_PROCESS)) { do_crm_log(log_level, "Action %.16llx (A_MSG_PROCESS) %s", A_MSG_PROCESS, text); } if(is_set(action, A_MSG_ROUTE)) { do_crm_log(log_level, "Action %.16llx (A_MSG_ROUTE) %s", A_MSG_ROUTE, text); } if(is_set(action, A_RECOVER)) { do_crm_log(log_level, "Action %.16llx (A_RECOVER) %s", A_RECOVER, text); } if(is_set(action, A_DC_RELEASE)) { do_crm_log(log_level, "Action %.16llx (A_DC_RELEASE) %s", A_DC_RELEASE, text); } if(is_set(action, A_DC_RELEASED)) { do_crm_log(log_level, "Action %.16llx (A_DC_RELEASED) %s", A_DC_RELEASED, text); } if(is_set(action, A_DC_TAKEOVER)) { do_crm_log(log_level, "Action %.16llx (A_DC_TAKEOVER) %s", A_DC_TAKEOVER, text); } if(is_set(action, A_SHUTDOWN)) { do_crm_log(log_level, "Action %.16llx (A_SHUTDOWN) %s", A_SHUTDOWN, text); } if(is_set(action, A_SHUTDOWN_REQ)) { do_crm_log(log_level, "Action %.16llx (A_SHUTDOWN_REQ) %s", A_SHUTDOWN_REQ, text); } if(is_set(action, A_STOP)) { do_crm_log(log_level, "Action %.16llx (A_STOP ) %s", A_STOP , text); } if(is_set(action, A_EXIT_0)) { do_crm_log(log_level, "Action %.16llx (A_EXIT_0) %s", A_EXIT_0, text); } if(is_set(action, A_EXIT_1)) { do_crm_log(log_level, "Action %.16llx (A_EXIT_1) %s", A_EXIT_1, text); } if(is_set(action, A_CCM_CONNECT)) { do_crm_log(log_level, "Action %.16llx (A_CCM_CONNECT) %s", A_CCM_CONNECT, text); } if(is_set(action, A_CCM_DISCONNECT)) { do_crm_log(log_level, "Action %.16llx (A_CCM_DISCONNECT) %s", A_CCM_DISCONNECT, text); } if(is_set(action, A_CIB_BUMPGEN)) { do_crm_log(log_level, "Action %.16llx (A_CIB_BUMPGEN) %s", A_CIB_BUMPGEN, text); } if(is_set(action, A_CIB_INVOKE)) { do_crm_log(log_level, "Action %.16llx (A_CIB_INVOKE) %s", A_CIB_INVOKE, text); } if(is_set(action, A_CIB_START)) { do_crm_log(log_level, "Action %.16llx (A_CIB_START) %s", A_CIB_START, text); } if(is_set(action, A_CIB_STOP)) { do_crm_log(log_level, "Action %.16llx (A_CIB_STOP) %s", A_CIB_STOP, text); } if(is_set(action, A_TE_INVOKE)) { do_crm_log(log_level, "Action %.16llx (A_TE_INVOKE) %s", A_TE_INVOKE, text); } if(is_set(action, A_TE_START)) { do_crm_log(log_level, "Action %.16llx (A_TE_START) %s", A_TE_START, text); } if(is_set(action, A_TE_STOP)) { do_crm_log(log_level, "Action %.16llx (A_TE_STOP) %s", A_TE_STOP, text); } if(is_set(action, A_TE_CANCEL)) { do_crm_log(log_level, "Action %.16llx (A_TE_CANCEL) %s", A_TE_CANCEL, text); } if(is_set(action, A_PE_INVOKE)) { do_crm_log(log_level, "Action %.16llx (A_PE_INVOKE) %s", A_PE_INVOKE, text); } if(is_set(action, A_PE_START)) { do_crm_log(log_level, "Action %.16llx (A_PE_START) %s", A_PE_START, text); } if(is_set(action, A_PE_STOP)) { do_crm_log(log_level, "Action %.16llx (A_PE_STOP) %s", A_PE_STOP, text); } if(is_set(action, A_NODE_BLOCK)) { do_crm_log(log_level, "Action %.16llx (A_NODE_BLOCK) %s", A_NODE_BLOCK, text); } if(is_set(action, A_UPDATE_NODESTATUS)) { do_crm_log(log_level, "Action %.16llx (A_UPDATE_NODESTATUS) %s", A_UPDATE_NODESTATUS, text); } if(is_set(action, A_LOG)) { do_crm_log(log_level, "Action %.16llx (A_LOG ) %s", A_LOG, text); } if(is_set(action, A_ERROR)) { do_crm_log(log_level, "Action %.16llx (A_ERROR ) %s", A_ERROR, text); } if(is_set(action, A_WARN)) { do_crm_log(log_level, "Action %.16llx (A_WARN ) %s", A_WARN, text); } } void create_node_entry(const char *uuid, const char *uname, const char *type) { /* make sure a node entry exists for the new node * * this will add anyone except the first ever node in the cluster * since it will also be the DC which doesnt go through the * join process (with itself). We can include a special case * later if desired. */ xmlNode *tmp1 = create_xml_node(NULL, XML_CIB_TAG_NODE); crm_debug_3("Creating node entry for %s", uname); set_uuid(tmp1, XML_ATTR_UUID, uname); crm_xml_add(tmp1, XML_ATTR_UNAME, uname); crm_xml_add(tmp1, XML_ATTR_TYPE, type); fsa_cib_anon_update(XML_CIB_TAG_NODES, tmp1, cib_scope_local|cib_quorum_override); free_xml(tmp1); } xmlNode* create_node_state( const char *uname, const char *ha_state, const char *ccm_state, const char *crmd_state, const char *join_state, const char *exp_state, gboolean clear_shutdown, const char *src) { xmlNode *node_state = create_xml_node(NULL, XML_CIB_TAG_STATE); crm_debug_2("%s Creating node state entry for %s", src, uname); set_uuid(node_state, XML_ATTR_UUID, uname); if(crm_element_value(node_state, XML_ATTR_UUID) == NULL) { crm_debug("Node %s is not a cluster member", uname); free_xml(node_state); return NULL; } crm_xml_add(node_state, XML_ATTR_UNAME, uname); crm_xml_add(node_state, XML_CIB_ATTR_HASTATE, ha_state); crm_xml_add(node_state, XML_CIB_ATTR_INCCM, ccm_state); crm_xml_add(node_state, XML_CIB_ATTR_CRMDSTATE, crmd_state); crm_xml_add(node_state, XML_CIB_ATTR_JOINSTATE, join_state); crm_xml_add(node_state, XML_CIB_ATTR_EXPSTATE, exp_state); crm_xml_add(node_state, XML_ATTR_ORIGIN, src); if(clear_shutdown) { crm_xml_add(node_state, XML_CIB_ATTR_SHUTDOWN, "0"); #if CRM_DEPRECATED_SINCE_2_0_3 crm_xml_add(node_state, "clear_shutdown", "true"); #endif -/* crm_xml_add(node_state, */ -/* XML_CIB_ATTR_REPLACE, XML_TAG_TRANSIENT_NODEATTRS); */ } crm_log_xml_debug_3(node_state, "created"); return node_state; } extern GHashTable *ipc_clients; void process_client_disconnect(crmd_client_t *curr_client) { struct crm_subsystem_s *the_subsystem = NULL; CRM_CHECK(curr_client != NULL, return); crm_debug_2("received HUP from %s", curr_client->table_key); if (curr_client->sub_sys == NULL) { crm_debug_2("Client hadn't registered with us yet"); } else if (strcasecmp(CRM_SYSTEM_PENGINE, curr_client->sub_sys) == 0) { the_subsystem = pe_subsystem; } else if (strcasecmp(CRM_SYSTEM_TENGINE, curr_client->sub_sys) == 0) { the_subsystem = te_subsystem; } else if (strcasecmp(CRM_SYSTEM_CIB, curr_client->sub_sys) == 0){ the_subsystem = cib_subsystem; } if(the_subsystem != NULL) { the_subsystem->ipc = NULL; the_subsystem->client = NULL; crm_info("Received HUP from %s:[%d]", the_subsystem->name, the_subsystem->pid); } else { /* else that was a transient client */ crm_debug_2("Received HUP from transient client"); } if (curr_client->table_key != NULL) { /* * Key is destroyed below as: * curr_client->table_key * Value is cleaned up by: * crmd_ipc_connection_destroy * which will also call: * G_main_del_IPC_Channel */ g_hash_table_remove(ipc_clients, curr_client->table_key); } } void update_dc(xmlNode *msg, gboolean assert_same) { char *last_dc = fsa_our_dc; const char *dc_version = NULL; const char *welcome_from = NULL; if(msg != NULL) { dc_version = crm_element_value(msg, F_CRM_VERSION); welcome_from = crm_element_value(msg, F_CRM_HOST_FROM); CRM_CHECK(dc_version != NULL, return); CRM_CHECK(welcome_from != NULL, return); if(AM_I_DC && safe_str_neq(welcome_from, fsa_our_uname)) { crm_warn("Not updating DC to %s (%s): we are also a DC", welcome_from, dc_version); return; } if(assert_same) { CRM_CHECK(fsa_our_dc != NULL, ;); CRM_CHECK(safe_str_eq(fsa_our_dc, welcome_from), ;); } } crm_free(fsa_our_dc_version); fsa_our_dc_version = NULL; fsa_our_dc = NULL; if(welcome_from != NULL) { fsa_our_dc = crm_strdup(welcome_from); } if(dc_version != NULL) { fsa_our_dc_version = crm_strdup(dc_version); } if(fsa_our_dc != NULL) { crm_info("Set DC to %s (%s)", crm_str(fsa_our_dc), crm_str(fsa_our_dc_version)); } else if(last_dc != NULL) { crm_info("Unset DC %s", crm_str(last_dc)); } crm_free(last_dc); } + +#define STATUS_PATH_MAX 512 + +void erase_status_tag(const char *uname, const char *tag) +{ + char xpath[STATUS_PATH_MAX]; + int cib_opts = cib_scope_local|cib_quorum_override|cib_xpath; + + if(fsa_cib_conn) { + xmlNode *update = create_xml_node(NULL, tag); + crm_xml_add(update, XML_ATTR_ID, fsa_our_uuid); + snprintf(xpath, STATUS_PATH_MAX, "//node_state[uname=\"%s\"]/%s", uname, tag); + fsa_cib_conn->cmds->replace(fsa_cib_conn, xpath, update, cib_opts); + free_xml(update); + } +}