diff --git a/crm/crmd/cib.c b/crm/crmd/cib.c index 2e614025fb..d39dba4063 100644 --- a/crm/crmd/cib.c +++ b/crm/crmd/cib.c @@ -1,374 +1,382 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include /* for access */ #include #include #include /* for calls to open */ #include /* for calls to open */ #include /* for calls to open */ #include /* for getpwuid */ #include /* for initgroups */ #include /* for getrlimit */ #include /* for getrlimit */ #include #include #include #include #include #include #include #include struct crm_subsystem_s *cib_subsystem = NULL; int cib_retries = 0; static void revision_check_callback(const HA_Message *msg, int call_id, int rc, crm_data_t *output, void *user_data) { int cmp = -1; const char *revision = NULL; crm_data_t *generation = NULL; #if CRM_DEPRECATED_SINCE_2_0_4 if(safe_str_eq(crm_element_name(output), XML_TAG_CIB)) { generation = output; } else { generation = find_xml_node(output, XML_TAG_CIB, TRUE); } #else generation = output; CRM_DEV_ASSERT(safe_str_eq(crm_element_name(generation), XML_TAG_CIB)); #endif if(rc != cib_ok) { fsa_data_t *msg_data = NULL; register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); return; } crm_debug_3("Checking our feature revision is allowed: %s", CIB_FEATURE_SET); revision = crm_element_value(generation, XML_ATTR_CIB_REVISION); cmp = compare_version(revision, CIB_FEATURE_SET); if(cmp > 0) { crm_err("This build (%s) does not support the current" " resource configuration", VERSION); crm_err("We can support up to CIB feature set %s (current=%s)", CIB_FEATURE_SET, revision); crm_err("Shutting down the CRM"); /* go into a stall state */ register_fsa_error_adv( C_FSA_INTERNAL, I_SHUTDOWN, NULL, NULL, __FUNCTION__); return; } revision = crm_element_value(generation, XML_ATTR_CRM_VERSION); cmp = compare_version(revision, CRM_FEATURE_SET); if(cmp > 0) { crm_err("This build (%s) does not support the current" " resource configuration", VERSION); crm_err("We can support up to CRM feature set %s (current=%s)", revision, CRM_FEATURE_SET); crm_err("Shutting down the CRM"); /* go into a stall state */ register_fsa_error_adv( C_FSA_INTERNAL, I_SHUTDOWN, NULL, NULL, __FUNCTION__); return; } } -extern void populate_cib_nodes(ll_cluster_t *hb_cluster); +extern void populate_cib_nodes( + ll_cluster_t *hb_cluster, gboolean with_client_status); static void do_cib_replaced(const char *event, HA_Message *msg) { crm_debug("Updating the CIB after a replace"); - populate_cib_nodes(fsa_cluster_conn); +#ifdef BUG + /* enabling this seems to cause delays (in the order of 3 minutes) + * in messages being received by the node that runs this + * + * no idea why :( + */ + populate_cib_nodes(fsa_cluster_conn, FALSE); +#endif do_update_cib_nodes(AM_I_DC, __FUNCTION__); if(AM_I_DC) { /* start the join process again so we get everyone's LRM status */ register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL); } } /* A_CIB_STOP, A_CIB_START, A_CIB_RESTART, */ enum crmd_fsa_input do_cib_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { enum crmd_fsa_input result = I_NULL; struct crm_subsystem_s *this_subsys = cib_subsystem; long long stop_actions = A_CIB_STOP; long long start_actions = A_CIB_START; if(action & stop_actions) { crm_info("Disconnecting CIB"); clear_bit_inplace(fsa_input_register, R_CIB_CONNECTED); if(fsa_cib_conn != NULL && fsa_cib_conn->state != cib_disconnected) { fsa_cib_conn->cmds->set_slave( fsa_cib_conn, cib_scope_local); fsa_cib_conn->cmds->signoff(fsa_cib_conn); } } if(action & start_actions) { if(fsa_cib_conn == NULL) { fsa_cib_conn = cib_new(); } if(cur_state == S_STOPPING) { crm_err("Ignoring request to start %s after shutdown", this_subsys->name); return I_NULL; } if(cib_ok != fsa_cib_conn->cmds->signon( fsa_cib_conn, CRM_SYSTEM_CRMD, cib_command)){ crm_debug("Could not connect to the CIB service"); #if 0 } else if(cib_ok != fsa_cib_conn->cmds->set_op_callback( fsa_cib_conn, crmd_cib_op_callback)) { crm_err("Could not set op callback"); #endif } else if(cib_ok != fsa_cib_conn->cmds->set_connection_dnotify( fsa_cib_conn, crmd_cib_connection_destroy)) { crm_err("Could not set dnotify callback"); } else if(cib_ok != fsa_cib_conn->cmds->add_notify_callback( fsa_cib_conn, T_CIB_REPLACE_NOTIFY, do_cib_replaced)) { crm_err("Could not set CIB notification callback"); } else { set_bit_inplace( fsa_input_register, R_CIB_CONNECTED); } if(is_set(fsa_input_register, R_CIB_CONNECTED) == FALSE) { cib_retries++; crm_warn("Couldn't complete CIB registration %d" " times... pause and retry", cib_retries); if(cib_retries < 30) { crm_timer_start(wait_timer); crmd_fsa_stall(NULL); } else { crm_err("Could not complete CIB" " registration %d times..." " hard error", cib_retries); register_fsa_error( C_FSA_INTERNAL, I_ERROR, NULL); } } else { int call_id = 0; crm_info("CIB connection established"); call_id = fsa_cib_conn->cmds->query( fsa_cib_conn, NULL, NULL, cib_scope_local); add_cib_op_callback(call_id, FALSE, NULL, revision_check_callback); cib_retries = 0; } } return result; } /* A_CIB_INVOKE, A_CIB_BUMPGEN, A_UPDATE_NODESTATUS */ enum crmd_fsa_input do_cib_invoke(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { HA_Message *answer = NULL; enum crmd_fsa_input result = I_NULL; ha_msg_input_t *cib_msg = fsa_typed_data(fsa_dt_ha_msg); const char *sys_from = cl_get_string(cib_msg->msg, F_CRM_SYS_FROM); if(fsa_cib_conn->state == cib_disconnected) { if(cur_state != S_STOPPING) { crm_err("CIB is disconnected"); crm_log_message_adv(LOG_WARNING, "CIB Input", cib_msg->msg); return I_NULL; } crm_info("CIB is disconnected"); crm_log_message_adv(LOG_DEBUG, "CIB Input", cib_msg->msg); return I_NULL; } if(action & A_CIB_INVOKE) { if(safe_str_eq(sys_from, CRM_SYSTEM_CRMD)) { action = A_CIB_INVOKE_LOCAL; } else if(safe_str_eq(sys_from, CRM_SYSTEM_DC)) { action = A_CIB_INVOKE_LOCAL; } } if(action & A_CIB_INVOKE || action & A_CIB_INVOKE_LOCAL) { int call_options = 0; enum cib_errors rc = cib_ok; crm_data_t *cib_frag = NULL; const char *section = NULL; const char *op = cl_get_string(cib_msg->msg, F_CRM_TASK); section = cl_get_string(cib_msg->msg, F_CIB_SECTION); ha_msg_value_int(cib_msg->msg, F_CIB_CALLOPTS, &call_options); crm_log_message(LOG_MSG, cib_msg->msg); crm_log_xml_debug_3(cib_msg->xml, "[CIB update]"); if(op == NULL) { crm_err("Invalid CIB Message"); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); return I_NULL; } cib_frag = NULL; rc = fsa_cib_conn->cmds->variant_op( fsa_cib_conn, op, NULL, section, cib_msg->xml, &cib_frag, call_options); if(rc < cib_ok || (action & A_CIB_INVOKE)) { answer = create_reply(cib_msg->msg, cib_frag); ha_msg_add(answer,XML_ATTR_RESULT,cib_error2string(rc)); } if(action & A_CIB_INVOKE) { if(relay_message(answer, TRUE) == FALSE) { crm_err("Confused what to do with cib result"); crm_log_message(LOG_ERR, answer); crm_msg_del(answer); result = I_ERROR; } } else if(rc < cib_ok) { ha_msg_input_t *input = NULL; crm_err("Internal CRM/CIB command from %s() failed: %s", msg_data->origin, cib_error2string(rc)); crm_log_message_adv(LOG_WARNING, "CIB Input", cib_msg->msg); crm_log_message_adv(LOG_WARNING, "CIB Reply", answer); input = new_ha_msg_input(answer); register_fsa_input(C_FSA_INTERNAL, I_ERROR, input); crm_msg_del(answer); delete_ha_msg_input(input); } return result; } else { crm_err("Unexpected action %s in %s", fsa_action2string(action), __FUNCTION__); } return I_NULL; } /* frees fragment as part of delete_ha_msg_input() */ void update_local_cib_adv( crm_data_t *msg_data, gboolean do_now, const char *raised_from) { HA_Message *msg = NULL; ha_msg_input_t *fsa_input = NULL; int call_options = cib_quorum_override|cib_scope_local; CRM_DEV_ASSERT(msg_data != NULL); crm_malloc0(fsa_input, sizeof(ha_msg_input_t)); msg = create_request(CIB_OP_UPDATE, msg_data, NULL, CRM_SYSTEM_CIB, CRM_SYSTEM_CRMD, NULL); ha_msg_add(msg, F_CIB_SECTION, crm_element_value(msg_data, XML_ATTR_SECTION)); ha_msg_add_int(msg, F_CIB_CALLOPTS, call_options); ha_msg_add(msg, "call_origin", raised_from); fsa_input->msg = msg; fsa_input->xml = msg_data; if(AM_I_DC && crm_assert_failed) { /* register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); */ } if(do_now == FALSE) { crm_debug_3("Registering event with FSA"); register_fsa_input_adv(C_FSA_INTERNAL, I_CIB_OP, fsa_input, 0, FALSE, raised_from); } else { fsa_data_t *op_data = NULL; crm_debug_3("Invoking CIB handler directly"); crm_malloc0(op_data, sizeof(fsa_data_t)); op_data->fsa_cause = C_FSA_INTERNAL; op_data->fsa_input = I_CIB_OP; op_data->origin = raised_from; op_data->data = fsa_input; op_data->data_type = fsa_dt_ha_msg; do_cib_invoke(A_CIB_INVOKE_LOCAL, C_FSA_INTERNAL, fsa_state, I_CIB_OP, op_data); crm_free(op_data); crm_debug_3("CIB handler completed"); } crm_debug_3("deleting input"); crm_msg_del(fsa_input->msg); free_xml(fsa_input->xml); crm_free(fsa_input); crm_debug_3("deleted input"); } diff --git a/crm/crmd/control.c b/crm/crmd/control.c index ce6eccf8cd..e47e86e678 100644 --- a/crm/crmd/control.c +++ b/crm/crmd/control.c @@ -1,750 +1,754 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include extern void crmd_ha_connection_destroy(gpointer user_data); extern gboolean stop_all_resources(void); gboolean crm_shutdown(int nsig, gpointer unused); gboolean register_with_ha(ll_cluster_t *hb_cluster, const char *client_name); -void populate_cib_nodes(ll_cluster_t *hb_cluster); +void populate_cib_nodes(ll_cluster_t *hb_cluster, gboolean with_client_status); GHashTable *ipc_clients = NULL; GTRIGSource *fsa_source = NULL; /* A_HA_CONNECT */ enum crmd_fsa_input do_ha_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { gboolean registered = FALSE; if(action & A_HA_DISCONNECT) { if(fsa_cluster_conn != NULL) { set_bit_inplace(fsa_input_register, R_HA_DISCONNECTED); fsa_cluster_conn->llc_ops->signoff( fsa_cluster_conn, FALSE); fsa_cluster_conn->llc_ops->delete(fsa_cluster_conn); fsa_cluster_conn = NULL; } crm_info("Disconnected from Heartbeat"); } if(action & A_HA_CONNECT) { if(fsa_cluster_conn == NULL) { fsa_cluster_conn = ll_cluster_new("heartbeat"); } /* make sure we are disconnected first */ fsa_cluster_conn->llc_ops->signoff(fsa_cluster_conn, FALSE); registered = register_with_ha( fsa_cluster_conn, crm_system_name); if(registered == FALSE) { register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); return I_NULL; } clear_bit_inplace(fsa_input_register, R_HA_DISCONNECTED); crm_info("Connected to Heartbeat"); } if(action & ~(A_HA_CONNECT|A_HA_DISCONNECT)) { crm_err("Unexpected action %s in %s", fsa_action2string(action), __FUNCTION__); } return I_NULL; } /* A_SHUTDOWN */ enum crmd_fsa_input do_shutdown(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { int lpc = 0; gboolean continue_shutdown = TRUE; struct crm_subsystem_s *subsystems[] = { pe_subsystem, te_subsystem }; /* just in case */ set_bit_inplace(fsa_input_register, R_SHUTDOWN); for(lpc = 0; lpc < DIMOF(subsystems); lpc++) { struct crm_subsystem_s *a_subsystem = subsystems[lpc]; if(is_set(fsa_input_register, a_subsystem->flag_connected)) { crm_info("Terminating the %s", a_subsystem->name); if(stop_subsystem(a_subsystem, TRUE) == FALSE) { /* its gone... */ crm_err("Faking %s exit", a_subsystem->name); clear_bit_inplace(fsa_input_register, a_subsystem->flag_connected); } continue_shutdown = FALSE; } } if(continue_shutdown == FALSE) { crm_info("Waiting for subsystems to exit"); crmd_fsa_stall(NULL); } else { register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL); } return I_NULL; } /* A_SHUTDOWN_REQ */ enum crmd_fsa_input do_shutdown_req(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { HA_Message *msg = NULL; crm_info("Sending shutdown request to DC: %s", crm_str(fsa_our_dc)); msg = create_request( CRM_OP_SHUTDOWN_REQ, NULL, NULL, CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL); /* set_bit_inplace(fsa_input_register, R_STAYDOWN); */ if(send_request(msg, NULL) == FALSE) { if(AM_I_DC) { crm_info("Processing shutdown locally"); } else { register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } return I_NULL; } /* A_EXIT_0, A_EXIT_1 */ enum crmd_fsa_input do_exit(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { int exit_code = 0; int log_level = LOG_INFO; const char *exit_type = "gracefully"; if(action & A_EXIT_1) { exit_code = 1; log_level = LOG_ERR; exit_type = "forcefully"; } crm_log_maybe(log_level, "Performing %s - %s exiting the CRMd", fsa_action2string(action), exit_type); if(is_set(fsa_input_register, R_IN_RECOVERY)) { crm_err("Could not recover from internal error"); exit_code = 2; } else if(is_set(fsa_input_register, R_STAYDOWN)) { crm_warn("Inhibiting respawn by Heartbeat"); exit_code = 100; } crm_info("[%s] stopped (%d)", crm_system_name, exit_code); exit(exit_code); return I_NULL; } /* A_STARTUP */ enum crmd_fsa_input do_startup(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { int was_error = 0; int interval = 1; /* seconds between DC heartbeats */ crm_debug("Registering Signal Handlers"); G_main_add_SignalHandler( G_PRIORITY_HIGH, SIGTERM, crm_shutdown, NULL, NULL); fsa_source = G_main_add_TriggerHandler( G_PRIORITY_HIGH, crm_fsa_trigger, NULL, NULL); ipc_clients = g_hash_table_new(g_str_hash, g_str_equal); if(was_error == 0) { crm_debug("Init server comms"); was_error = init_server_ipc_comms( crm_strdup(CRM_SYSTEM_CRMD), crmd_client_connect, default_ipc_connection_destroy); } if(was_error == 0) { crm_debug("Creating CIB object"); fsa_cib_conn = cib_new(); } /* set up the timers */ crm_malloc0(integration_timer, sizeof(fsa_timer_t)); crm_malloc0(finalization_timer, sizeof(fsa_timer_t)); crm_malloc0(election_trigger, sizeof(fsa_timer_t)); crm_malloc0(election_timeout, sizeof(fsa_timer_t)); crm_malloc0(shutdown_escalation_timer, sizeof(fsa_timer_t)); crm_malloc0(wait_timer, sizeof(fsa_timer_t)); crm_malloc0(recheck_timer, sizeof(fsa_timer_t)); interval = interval * 1000; if(election_trigger != NULL) { election_trigger->source_id = 0; election_trigger->period_ms = -1; election_trigger->fsa_input = I_DC_TIMEOUT; election_trigger->callback = crm_timer_popped; election_trigger->repeat = FALSE; } else { was_error = TRUE; } if(election_timeout != NULL) { election_timeout->source_id = 0; election_timeout->period_ms = -1; election_timeout->fsa_input = I_ELECTION_DC; election_timeout->callback = crm_timer_popped; election_timeout->repeat = FALSE; } else { was_error = TRUE; } if(integration_timer != NULL) { integration_timer->source_id = 0; integration_timer->period_ms = -1; integration_timer->fsa_input = I_INTEGRATED; integration_timer->callback = crm_timer_popped; integration_timer->repeat = FALSE; } else { was_error = TRUE; } if(finalization_timer != NULL) { finalization_timer->source_id = 0; finalization_timer->period_ms = -1; finalization_timer->fsa_input = I_FINALIZED; finalization_timer->callback = crm_timer_popped; finalization_timer->repeat = FALSE; /* for possible enabling... a bug in the join protocol left * a slave in S_PENDING while we think its in S_NOT_DC * * raising I_FINALIZED put us into a transition loop which is * never resolved. * in this loop we continually send probes which the node * NACK's because its in S_PENDING * * if we have nodes where heartbeat is active but the * CRM is not... then this will be handled in the * integration phase */ finalization_timer->fsa_input = I_ELECTION; } else { was_error = TRUE; } if(shutdown_escalation_timer != NULL) { shutdown_escalation_timer->source_id = 0; shutdown_escalation_timer->period_ms = -1; shutdown_escalation_timer->fsa_input = I_STOP; shutdown_escalation_timer->callback = crm_timer_popped; shutdown_escalation_timer->repeat = FALSE; } else { was_error = TRUE; } if(wait_timer != NULL) { wait_timer->source_id = 0; wait_timer->period_ms = 500; wait_timer->fsa_input = I_NULL; wait_timer->callback = crm_timer_popped; wait_timer->repeat = FALSE; } else { was_error = TRUE; } if(recheck_timer != NULL) { recheck_timer->source_id = 0; recheck_timer->period_ms = -1; recheck_timer->fsa_input = I_PE_CALC; recheck_timer->callback = crm_timer_popped; recheck_timer->repeat = FALSE; } else { was_error = TRUE; } /* set up the sub systems */ crm_malloc0(cib_subsystem, sizeof(struct crm_subsystem_s)); crm_malloc0(te_subsystem, sizeof(struct crm_subsystem_s)); crm_malloc0(pe_subsystem, sizeof(struct crm_subsystem_s)); if(cib_subsystem != NULL) { cib_subsystem->pid = -1; cib_subsystem->path = BIN_DIR; cib_subsystem->name = CRM_SYSTEM_CIB; cib_subsystem->command = BIN_DIR"/"CRM_SYSTEM_CIB; cib_subsystem->args = "-VVc"; cib_subsystem->flag_connected = R_CIB_CONNECTED; cib_subsystem->flag_required = R_CIB_REQUIRED; } else { was_error = TRUE; } if(te_subsystem != NULL) { te_subsystem->pid = -1; te_subsystem->path = BIN_DIR; te_subsystem->name = CRM_SYSTEM_TENGINE; te_subsystem->command = BIN_DIR"/"CRM_SYSTEM_TENGINE; te_subsystem->args = NULL; te_subsystem->flag_connected = R_TE_CONNECTED; te_subsystem->flag_required = R_TE_REQUIRED; } else { was_error = TRUE; } if(pe_subsystem != NULL) { pe_subsystem->pid = -1; pe_subsystem->path = BIN_DIR; pe_subsystem->name = CRM_SYSTEM_PENGINE; pe_subsystem->command = BIN_DIR"/"CRM_SYSTEM_PENGINE; pe_subsystem->args = NULL; pe_subsystem->flag_connected = R_PE_CONNECTED; pe_subsystem->flag_required = R_PE_REQUIRED; } else { was_error = TRUE; } if(was_error) { register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } welcomed_nodes = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); integrated_nodes = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); finalized_nodes = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); confirmed_nodes = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); crmd_peer_state = g_hash_table_new_full( g_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); set_sigchld_proctrack(G_PRIORITY_HIGH); return I_NULL; } extern GHashTable *shutdown_ops; /* A_STOP */ enum crmd_fsa_input do_stop(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { crm_debug_2("Stopping all remaining local resources"); if(is_set(fsa_input_register, R_LRM_CONNECTED)) { stop_all_resources(); } else { crm_err("Exiting with no LRM connection..." " resources may be active!"); } if(g_hash_table_size(shutdown_ops) > 0) { crm_info("Waiting on %d stop operations to complete", g_hash_table_size(shutdown_ops)); crmd_fsa_stall(NULL); return I_NULL; } return I_NULL; } /* A_STARTED */ enum crmd_fsa_input do_started(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { if(is_set(fsa_input_register, R_CCM_DATA) == FALSE) { crm_info("Delaying start, CCM (%.16llx) not connected", R_CCM_DATA); crmd_fsa_stall(NULL); return I_NULL; } else if(is_set(fsa_input_register, R_LRM_CONNECTED) == FALSE) { crm_info("Delaying start, LRM (%.16llx) not connected", R_LRM_CONNECTED); crmd_fsa_stall(NULL); return I_NULL; } else if(is_set(fsa_input_register, R_CIB_CONNECTED) == FALSE) { crm_info("Delaying start, CIB (%.16llx) not connected", R_CIB_CONNECTED); crmd_fsa_stall(NULL); return I_NULL; } else if(is_set(fsa_input_register, R_READ_CONFIG) == FALSE) { crm_info("Delaying start, Config not read (%.16llx)", R_READ_CONFIG); crmd_fsa_stall(NULL); return I_NULL; } else if(is_set(fsa_input_register, R_PEER_DATA) == FALSE) { HA_Message * msg = NULL; /* try reading from HA */ crm_info("Delaying start, Peer data (%.16llx) not recieved", R_PEER_DATA); crm_debug_3("Looking for a HA message"); msg = fsa_cluster_conn->llc_ops->readmsg(fsa_cluster_conn, 0); if(msg != NULL) { crm_debug_3("There was a HA message"); crm_msg_del(msg); } crm_timer_start(wait_timer); crmd_fsa_stall(NULL); return I_NULL; } crm_info("The local CRM is operational"); clear_bit_inplace(fsa_input_register, R_STARTING); register_fsa_input(msg_data->fsa_cause, I_PENDING, NULL); return I_NULL; } /* A_RECOVER */ enum crmd_fsa_input do_recover(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { set_bit_inplace(fsa_input_register, R_IN_RECOVERY); crm_err("Action %s (%.16llx) not supported", fsa_action2string(action), action); register_fsa_input(C_FSA_INTERNAL, I_STOP, NULL); return I_NULL; } static void config_query_callback(const HA_Message *msg, int call_id, int rc, crm_data_t *output, void *user_data) { crm_debug("Call %d : Parsing CIB options", call_id); if(rc != cib_ok) { fsa_data_t *msg_data = NULL; crm_err("Local CIB query resulted in an error: %s", cib_error2string(rc)); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); return; } #if 0 /* disable until we can do it properly * - most people use the defaults anyway */ crm_data_t *config = output; xml_child_iter_filter( config, iter, XML_CIB_TAG_NVPAIR, const char *name = crm_element_value(iter, XML_NVPAIR_ATTR_NAME); const char *value = crm_element_value(iter, XML_NVPAIR_ATTR_VALUE); if(name == NULL || value == NULL) { continue; } else if(safe_str_eq(name, XML_CONFIG_ATTR_DC_DEADTIME)) { election_trigger->period_ms = crm_get_msec(value); } else if(safe_str_eq(name, XML_CONFIG_ATTR_FORCE_QUIT)) { shutdown_escalation_timer->period_ms = crm_get_msec(value); } else if(safe_str_eq(name, XML_CONFIG_ATTR_RECHECK)) { recheck_timer->period_ms = crm_get_msec(value); } ); #endif set_bit_inplace(fsa_input_register, R_READ_CONFIG); crm_debug_3("Triggering FSA: %s", __FUNCTION__); G_main_set_trigger(fsa_source); } /* A_READCONFIG */ enum crmd_fsa_input do_read_config(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { int call_id = fsa_cib_conn->cmds->query( fsa_cib_conn, XML_CIB_TAG_CRMCONFIG, NULL, cib_scope_local); add_cib_op_callback(call_id, FALSE, NULL, config_query_callback); crm_debug_2("Querying the CIB... call %d", call_id); /* defaults */ - election_trigger->period_ms = crm_get_msec("1min"); + election_trigger->period_ms = crm_get_msec("30s"); election_timeout->period_ms = crm_get_msec("2min"); integration_timer->period_ms = crm_get_msec("3min"); finalization_timer->period_ms = crm_get_msec("3min"); - shutdown_escalation_timer->period_ms = crm_get_msec("5min"); + shutdown_escalation_timer->period_ms = crm_get_msec("10min"); return I_NULL; } gboolean crm_shutdown(int nsig, gpointer unused) { if (crmd_mainloop != NULL && g_main_is_running(crmd_mainloop)) { if(is_set(fsa_input_register, R_SHUTDOWN)) { crm_err("Escalating the shutdown"); register_fsa_input_before(C_SHUTDOWN, I_ERROR, NULL); } else { crm_info("Requesting shutdown"); set_bit_inplace(fsa_input_register, R_SHUTDOWN); register_fsa_input(C_SHUTDOWN,I_SHUTDOWN,NULL); /* cant rely on this... */ crm_timer_start(shutdown_escalation_timer); } } else { crm_info("exit from shutdown"); exit(LSB_EXIT_OK); } return TRUE; } static void default_cib_update_callback(const HA_Message *msg, int call_id, int rc, crm_data_t *output, void *user_data) { if(rc != cib_ok) { fsa_data_t *msg_data = NULL; crm_err("CIB Update failed: %s", cib_error2string(rc)); crm_log_xml_warn(output, "update:failed"); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } void -populate_cib_nodes(ll_cluster_t *hb_cluster) +populate_cib_nodes(ll_cluster_t *hb_cluster, gboolean with_client_status) { int call_id = 0; const char *ha_node = NULL; crm_data_t *cib_node_list = NULL; /* Async get client status information in the cluster */ - crm_debug_3("Requesting an initial dump of CRMD client_status"); - fsa_cluster_conn->llc_ops->client_status( - fsa_cluster_conn, NULL, CRM_SYSTEM_CRMD, -1); - + crm_debug_2("Invoked"); + if(with_client_status) { + crm_debug_3("Requesting an initial dump of CRMD client_status"); + fsa_cluster_conn->llc_ops->client_status( + fsa_cluster_conn, NULL, CRM_SYSTEM_CRMD, -1); + } + crm_info("Requesting the list of configured nodes"); fsa_cluster_conn->llc_ops->init_nodewalk(fsa_cluster_conn); cib_node_list = create_xml_node(NULL, XML_CIB_TAG_NODES); do { const char *ha_node_type = NULL; const char *ha_node_uuid = NULL; crm_data_t *cib_new_node = NULL; ha_node = fsa_cluster_conn->llc_ops->nextnode(fsa_cluster_conn); if(ha_node == NULL) { continue; } ha_node_type = fsa_cluster_conn->llc_ops->node_type( fsa_cluster_conn, ha_node); if(safe_str_neq(NORMALNODE, ha_node_type)) { crm_debug("Node %s: skipping '%s'", ha_node, ha_node_type); continue; } ha_node_uuid = get_uuid(fsa_cluster_conn, ha_node); if(ha_node_uuid == NULL) { crm_warn("Node %s: no uuid found", ha_node); continue; } crm_notice("Node: %s (uuid: %s)", ha_node, ha_node_uuid); cib_new_node = create_xml_node(cib_node_list, XML_CIB_TAG_NODE); crm_xml_add(cib_new_node, XML_ATTR_ID, ha_node_uuid); crm_xml_add(cib_new_node, XML_ATTR_UNAME, ha_node); crm_xml_add(cib_new_node, XML_ATTR_TYPE, ha_node_type); } while(ha_node != NULL); fsa_cluster_conn->llc_ops->end_nodewalk(fsa_cluster_conn); /* Now update the CIB with the list of nodes */ call_id = fsa_cib_conn->cmds->update( fsa_cib_conn, XML_CIB_TAG_NODES, cib_node_list, NULL, cib_scope_local|cib_quorum_override|cib_inhibit_bcast); add_cib_op_callback(call_id, FALSE, NULL, default_cib_update_callback); free_xml(cib_node_list); + crm_debug_2("Complete"); } gboolean register_with_ha(ll_cluster_t *hb_cluster, const char *client_name) { crm_debug("Signing in with Heartbeat"); if (hb_cluster->llc_ops->signon(hb_cluster, client_name)!= HA_OK) { crm_err("Cannot sign on with heartbeat: %s", hb_cluster->llc_ops->errmsg(hb_cluster)); return FALSE; } crm_set_ha_options(hb_cluster); crm_debug_3("Be informed of CRM messages"); if (HA_OK != hb_cluster->llc_ops->set_msg_callback( hb_cluster, T_CRM, crmd_ha_msg_callback, hb_cluster)){ crm_err("Cannot set msg callback: %s", hb_cluster->llc_ops->errmsg(hb_cluster)); return FALSE; } #if 0 crm_debug_3("Be informed of Node Status changes"); if (HA_OK != hb_cluster->llc_ops->set_nstatus_callback( hb_cluster, crmd_ha_status_callback, hb_cluster)){ crm_err("Cannot set nstatus callback: %s", hb_cluster->llc_ops->errmsg(hb_cluster)); return FALSE; } #endif crm_debug_3("Be informed of CRM Client Status changes"); if (HA_OK != hb_cluster->llc_ops->set_cstatus_callback( hb_cluster, crmd_client_status_callback, hb_cluster)) { crm_err("Cannot set cstatus callback: %s", hb_cluster->llc_ops->errmsg(hb_cluster)); return FALSE; } crm_debug_3("Adding channel to mainloop"); G_main_add_IPC_Channel( G_PRIORITY_HIGH, hb_cluster->llc_ops->ipcchan(hb_cluster), FALSE, crmd_ha_msg_dispatch, hb_cluster /* userdata */, crmd_ha_connection_destroy); crm_debug_3("Finding our node name"); if ((fsa_our_uname = hb_cluster->llc_ops->get_mynodeid(hb_cluster)) == NULL) { crm_err("get_mynodeid() failed"); return FALSE; } crm_info("Hostname: %s", fsa_our_uname); crm_debug_3("Finding our node uuid"); fsa_our_uuid = get_uuid(fsa_cluster_conn, fsa_our_uname); if(fsa_our_uuid == NULL) { crm_err("get_uuid_by_name() failed"); return FALSE; } /* copy it so that unget_uuid() doesn't trash the value on us */ fsa_our_uuid = crm_strdup(fsa_our_uuid); crm_info("UUID: %s", fsa_our_uuid); - populate_cib_nodes(hb_cluster); + populate_cib_nodes(hb_cluster, TRUE); return TRUE; }