diff --git a/crmd/callbacks.c b/crmd/callbacks.c index 7f6599d666..c4e7b04a08 100644 --- a/crmd/callbacks.c +++ b/crmd/callbacks.c @@ -1,238 +1,238 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include void crmd_ha_connection_destroy(gpointer user_data); /* From join_dc... */ extern gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source); void crmd_ha_connection_destroy(gpointer user_data) { crm_trace("Invoked"); if (is_set(fsa_input_register, R_HA_DISCONNECTED)) { /* we signed out, so this is expected */ crm_info("Heartbeat disconnection complete"); return; } crm_crit("Lost connection to heartbeat service!"); register_fsa_input(C_HA_DISCONNECT, I_ERROR, NULL); trigger_fsa(fsa_source); } void crmd_ha_msg_filter(xmlNode * msg) { if (AM_I_DC) { const char *sys_from = crm_element_value(msg, F_CRM_SYS_FROM); if (safe_str_eq(sys_from, CRM_SYSTEM_DC)) { const char *from = crm_element_value(msg, F_ORIG); if (safe_str_neq(from, fsa_our_uname)) { int level = LOG_INFO; const char *op = crm_element_value(msg, F_CRM_TASK); /* make sure the election happens NOW */ if (fsa_state != S_ELECTION) { ha_msg_input_t new_input; level = LOG_WARNING; new_input.msg = msg; register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION, NULL, &new_input, __FUNCTION__); } do_crm_log(level, "Another DC detected: %s (op=%s)", from, op); goto done; } } } else { const char *sys_to = crm_element_value(msg, F_CRM_SYS_TO); if (safe_str_eq(sys_to, CRM_SYSTEM_DC)) { return; } } /* crm_log_xml_trace("HA[inbound]", msg); */ route_message(C_HA_MESSAGE, msg); done: trigger_fsa(fsa_source); } extern gboolean process_lrm_event(lrmd_event_data_t * op); void lrm_op_callback(lrmd_event_data_t * op) { CRM_CHECK(op != NULL, return); process_lrm_event(op); } static void crmd_proc_update(crm_node_t * member, enum crm_proc_flag client) { const char *status = NULL; CRM_CHECK(member != NULL, return); status = (member->processes & client) ? ONLINESTATUS : OFFLINESTATUS; crm_notice("Status update: Client %s/%s now has status [%s] (DC=%s)", member->uname, peer2text(client), status, AM_I_DC ? "true" : crm_str(fsa_our_dc)); if (is_set(fsa_input_register, R_CIB_CONNECTED) == FALSE) { return; } else if (fsa_state == S_STOPPING) { return; } if (safe_str_eq(member->uname, fsa_our_dc) && crm_is_peer_active(member) == FALSE) { /* Did the DC leave us? */ crm_info("Got client status callback - our DC is dead"); register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ELECTION, NULL); } else if (AM_I_DC) { enum crm_proc_flag messaging = crm_proc_plugin | crm_proc_heartbeat | crm_proc_cpg; xmlNode *update = NULL; update = create_node_state(member->uname, (member->processes & messaging) ? ACTIVESTATUS : DEADSTATUS, NULL, status, NULL, NULL, FALSE, __FUNCTION__); fsa_cib_anon_update(XML_CIB_TAG_STATUS, update, cib_scope_local | cib_quorum_override | cib_can_create); free_xml(update); if ((member->processes & client) == 0) { erase_node_from_join(member->uname); check_join_state(fsa_state, __FUNCTION__); fail_incompletable_actions(transition_graph, member->uuid); } else { register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL); } } trigger_fsa(fsa_source); } void peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data) { gboolean reset_status_entry = FALSE; uint32_t old = 0; - set_bit_inplace(fsa_input_register, R_PEER_DATA); + set_bit(fsa_input_register, R_PEER_DATA); if (node->uname == NULL) { return; } switch (type) { case crm_status_uname: crm_info("status: %s is now %s", node->uname, node->state); /* reset_status_entry = TRUE; */ /* If we've never seen the node, then it also wont be in the status section */ break; case crm_status_nstate: crm_info("status: %s is now %s (was %s)", node->uname, node->state, (const char *)data); reset_status_entry = TRUE; break; case crm_status_processes: if (data) { old = *(const uint32_t *)data; } if ((node->processes ^ old) & proc_flags) { crmd_proc_update(node, proc_flags); } break; } /* Can this be removed now that do_cl_join_finalize_respond() does the same thing? */ if (AM_I_DC && reset_status_entry && safe_str_eq(CRMD_STATE_ACTIVE, node->state)) { crm_action_t *down = match_down_event(0, node->uname, NULL); erase_status_tag(node->uname, XML_CIB_TAG_LRM, cib_scope_local); erase_status_tag(node->uname, XML_TAG_TRANSIENT_NODEATTRS, cib_scope_local); if (down) { const char *task = crm_element_value(down->xml, XML_LRM_ATTR_TASK); if (safe_str_eq(task, CRM_OP_FENCE)) { crm_info("Node return implies stonith of %s (action %d) completed", node->uname, down->id); down->confirmed = TRUE; } } /* TODO: potentially we also want to set XML_CIB_ATTR_JOINSTATE and XML_CIB_ATTR_EXPSTATE here */ } } void crmd_cib_connection_destroy(gpointer user_data) { CRM_CHECK(user_data == fsa_cib_conn,;); crm_trace("Invoked"); trigger_fsa(fsa_source); fsa_cib_conn->state = cib_disconnected; if (is_set(fsa_input_register, R_CIB_CONNECTED) == FALSE) { crm_info("Connection to the CIB terminated..."); return; } /* eventually this will trigger a reconnect, not a shutdown */ crm_err("Connection to the CIB terminated..."); register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL); - clear_bit_inplace(fsa_input_register, R_CIB_CONNECTED); + clear_bit(fsa_input_register, R_CIB_CONNECTED); return; } gboolean crm_fsa_trigger(gpointer user_data) { crm_trace("Invoked (queue len: %d)", g_list_length(fsa_message_queue)); s_crmd_fsa(C_FSA_INTERNAL); crm_trace("Exited (queue len: %d)", g_list_length(fsa_message_queue)); return TRUE; } diff --git a/crmd/cib.c b/crmd/cib.c index b1753122fc..aa5dd279a9 100644 --- a/crmd/cib.c +++ b/crmd/cib.c @@ -1,217 +1,217 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include /* for access */ #include /* for calls to open */ #include /* for calls to open */ #include /* for calls to open */ #include /* for getpwuid */ #include /* for initgroups */ #include /* for getrlimit */ #include /* for getrlimit */ #include #include #include #include #include #include #include #include struct crm_subsystem_s *cib_subsystem = NULL; int cib_retries = 0; static void do_cib_updated(const char *event, xmlNode * msg) { int rc = -1; CRM_CHECK(msg != NULL, return); crm_element_value_int(msg, F_CIB_RC, &rc); if (rc < pcmk_ok) { crm_trace("Filter rc=%d (%s)", rc, pcmk_strerror(rc)); return; } if (get_xpath_object ("//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_CIB_TAG_CRMCONFIG, msg, LOG_TRACE) != NULL) { mainloop_set_trigger(config_read); } } static void revision_check_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { int cmp = -1; const char *revision = NULL; xmlNode *generation = NULL; if (rc != pcmk_ok) { fsa_data_t *msg_data = NULL; register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); return; } generation = output; CRM_CHECK(safe_str_eq(crm_element_name(generation), XML_TAG_CIB), crm_log_xml_err(output, __FUNCTION__); return); crm_trace("Checking our feature revision is allowed: %s", CIB_FEATURE_SET); revision = crm_element_value(generation, XML_ATTR_CRM_VERSION); cmp = compare_version(revision, CRM_FEATURE_SET); if (cmp > 0) { crm_err("This build (%s) does not support the current" " resource configuration", VERSION); crm_err("We can only support up to CRM feature set %s (current=%s)", CRM_FEATURE_SET, revision); crm_err("Shutting down the CRM"); /* go into a stall state */ register_fsa_error_adv(C_FSA_INTERNAL, I_SHUTDOWN, NULL, NULL, __FUNCTION__); return; } } static void do_cib_replaced(const char *event, xmlNode * msg) { crm_debug("Updating the CIB after a replace: DC=%s", AM_I_DC ? "true" : "false"); if (AM_I_DC == FALSE) { return; } else if (fsa_state == S_FINALIZE_JOIN && is_set(fsa_input_register, R_CIB_ASKED)) { /* no need to restart the join - we asked for this replace op */ return; } /* start the join process again so we get everyone's LRM status */ populate_cib_nodes(FALSE); do_update_cib_nodes(TRUE, __FUNCTION__); register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL); } /* A_CIB_STOP, A_CIB_START, A_CIB_RESTART, */ void do_cib_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { struct crm_subsystem_s *this_subsys = cib_subsystem; long long stop_actions = A_CIB_STOP; long long start_actions = A_CIB_START; if (action & stop_actions) { crm_info("Disconnecting CIB"); - clear_bit_inplace(fsa_input_register, R_CIB_CONNECTED); + clear_bit(fsa_input_register, R_CIB_CONNECTED); CRM_ASSERT(fsa_cib_conn != NULL); fsa_cib_conn->cmds->del_notify_callback(fsa_cib_conn, T_CIB_DIFF_NOTIFY, do_cib_updated); if (fsa_cib_conn->state != cib_disconnected) { fsa_cib_conn->cmds->set_slave(fsa_cib_conn, cib_scope_local); fsa_cib_conn->cmds->signoff(fsa_cib_conn); } } if (action & start_actions) { int rc = pcmk_ok; CRM_ASSERT(fsa_cib_conn != NULL); if (cur_state == S_STOPPING) { crm_err("Ignoring request to start %s after shutdown", this_subsys->name); return; } rc = fsa_cib_conn->cmds->signon(fsa_cib_conn, CRM_SYSTEM_CRMD, cib_command_nonblocking); if (rc != pcmk_ok) { /* a short wait that usually avoids stalling the FSA */ sleep(1); rc = fsa_cib_conn->cmds->signon(fsa_cib_conn, CRM_SYSTEM_CRMD, cib_command_nonblocking); } if (rc != pcmk_ok) { crm_info("Could not connect to the CIB service: %s", pcmk_strerror(rc)); } else if (pcmk_ok != fsa_cib_conn->cmds->set_connection_dnotify(fsa_cib_conn, crmd_cib_connection_destroy)) { crm_err("Could not set dnotify callback"); } else if (pcmk_ok != fsa_cib_conn->cmds->add_notify_callback(fsa_cib_conn, T_CIB_REPLACE_NOTIFY, do_cib_replaced)) { crm_err("Could not set CIB notification callback (replace)"); } else if (pcmk_ok != fsa_cib_conn->cmds->add_notify_callback(fsa_cib_conn, T_CIB_DIFF_NOTIFY, do_cib_updated)) { crm_err("Could not set CIB notification callback (update)"); } else { - set_bit_inplace(fsa_input_register, R_CIB_CONNECTED); + set_bit(fsa_input_register, R_CIB_CONNECTED); } if (is_set(fsa_input_register, R_CIB_CONNECTED) == FALSE) { cib_retries++; crm_warn("Couldn't complete CIB registration %d" " times... pause and retry", cib_retries); if (cib_retries < 30) { crm_timer_start(wait_timer); crmd_fsa_stall(NULL); } else { crm_err("Could not complete CIB" " registration %d times..." " hard error", cib_retries); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } else { int call_id = 0; crm_info("CIB connection established"); call_id = fsa_cib_conn->cmds->query(fsa_cib_conn, NULL, NULL, cib_scope_local); add_cib_op_callback(fsa_cib_conn, call_id, FALSE, NULL, revision_check_callback); cib_retries = 0; } } } diff --git a/crmd/control.c b/crmd/control.c index ce8f54c23f..56d99b0f76 100644 --- a/crmd/control.c +++ b/crmd/control.c @@ -1,1012 +1,1012 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include qb_ipcs_service_t *ipcs = NULL; extern gboolean crm_connect_corosync(void); extern void crmd_ha_connection_destroy(gpointer user_data); void crm_shutdown(int nsig); gboolean crm_read_options(gpointer user_data); gboolean fsa_has_quorum = FALSE; GHashTable *ipc_clients = NULL; crm_trigger_t *fsa_source = NULL; crm_trigger_t *config_read = NULL; /* A_HA_CONNECT */ void do_ha_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { gboolean registered = FALSE; if (action & A_HA_DISCONNECT) { if (is_openais_cluster()) { crm_peer_destroy(); #if SUPPORT_COROSYNC terminate_ais_connection(); #endif crm_info("Disconnected from OpenAIS"); #if SUPPORT_HEARTBEAT } else if (fsa_cluster_conn != NULL) { - set_bit_inplace(fsa_input_register, R_HA_DISCONNECTED); + set_bit(fsa_input_register, R_HA_DISCONNECTED); fsa_cluster_conn->llc_ops->signoff(fsa_cluster_conn, FALSE); crm_info("Disconnected from Heartbeat"); #endif } } if (action & A_HA_CONNECT) { crm_set_status_callback(&peer_update_callback); if (is_openais_cluster()) { #if SUPPORT_COROSYNC registered = crm_connect_corosync(); #endif } else if (is_heartbeat_cluster()) { #if SUPPORT_HEARTBEAT registered = crm_cluster_connect(&fsa_our_uname, &fsa_our_uuid, crmd_ha_msg_callback, crmd_ha_connection_destroy, &fsa_cluster_conn); #endif } #if SUPPORT_HEARTBEAT if (is_heartbeat_cluster()) { crm_trace("Be informed of Node Status changes"); if (registered && fsa_cluster_conn->llc_ops->set_nstatus_callback(fsa_cluster_conn, crmd_ha_status_callback, fsa_cluster_conn) != HA_OK) { crm_err("Cannot set nstatus callback: %s", fsa_cluster_conn->llc_ops->errmsg(fsa_cluster_conn)); registered = FALSE; } crm_trace("Be informed of CRM Client Status changes"); if (registered && fsa_cluster_conn->llc_ops->set_cstatus_callback(fsa_cluster_conn, crmd_client_status_callback, fsa_cluster_conn) != HA_OK) { crm_err("Cannot set cstatus callback: %s", fsa_cluster_conn->llc_ops->errmsg(fsa_cluster_conn)); registered = FALSE; } if (registered) { crm_trace("Requesting an initial dump of CRMD client_status"); fsa_cluster_conn->llc_ops->client_status(fsa_cluster_conn, NULL, CRM_SYSTEM_CRMD, -1); } } #endif if (registered == FALSE) { - set_bit_inplace(fsa_input_register, R_HA_DISCONNECTED); + set_bit(fsa_input_register, R_HA_DISCONNECTED); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); return; } - clear_bit_inplace(fsa_input_register, R_HA_DISCONNECTED); + clear_bit(fsa_input_register, R_HA_DISCONNECTED); crm_info("Connected to the cluster"); } if (action & ~(A_HA_CONNECT | A_HA_DISCONNECT)) { crm_err("Unexpected action %s in %s", fsa_action2string(action), __FUNCTION__); } } /* A_SHUTDOWN */ void do_shutdown(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { /* just in case */ - set_bit_inplace(fsa_input_register, R_SHUTDOWN); + set_bit(fsa_input_register, R_SHUTDOWN); if (is_heartbeat_cluster()) { if (is_set(fsa_input_register, pe_subsystem->flag_connected)) { crm_info("Terminating the %s", pe_subsystem->name); if (stop_subsystem(pe_subsystem, TRUE) == FALSE) { /* its gone... */ crm_err("Faking %s exit", pe_subsystem->name); - clear_bit_inplace(fsa_input_register, pe_subsystem->flag_connected); + clear_bit(fsa_input_register, pe_subsystem->flag_connected); } else { crm_info("Waiting for subsystems to exit"); crmd_fsa_stall(NULL); } } crm_info("All subsystems stopped, continuing"); } if (stonith_api) { /* Prevent it from comming up again */ - clear_bit_inplace(fsa_input_register, R_ST_REQUIRED); + clear_bit(fsa_input_register, R_ST_REQUIRED); crm_info("Disconnecting STONITH..."); stonith_api->cmds->disconnect(stonith_api); } } /* A_SHUTDOWN_REQ */ void do_shutdown_req(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { xmlNode *msg = NULL; crm_info("Sending shutdown request to %s", crm_str(fsa_our_dc)); msg = create_request(CRM_OP_SHUTDOWN_REQ, NULL, NULL, CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL); -/* set_bit_inplace(fsa_input_register, R_STAYDOWN); */ +/* set_bit(fsa_input_register, R_STAYDOWN); */ if (send_cluster_message(NULL, crm_msg_crmd, msg, TRUE) == FALSE) { register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } free_xml(msg); } extern crm_ipc_t *attrd_ipc; extern char *max_generation_from; extern xmlNode *max_generation_xml; extern GHashTable *resource_history; extern GHashTable *voted; extern GHashTable *reload_hash; void log_connected_client(gpointer key, gpointer value, gpointer user_data); void log_connected_client(gpointer key, gpointer value, gpointer user_data) { crmd_client_t *client = value; crm_err("%s is still connected at exit", client->table_key); } static void free_mem(fsa_data_t * msg_data) { GListPtr gIter = NULL; crm_ipc_close(attrd_ipc); crm_ipc_destroy(attrd_ipc); g_main_loop_quit(crmd_mainloop); g_main_loop_unref(crmd_mainloop); #if SUPPORT_HEARTBEAT if (fsa_cluster_conn) { fsa_cluster_conn->llc_ops->delete(fsa_cluster_conn); fsa_cluster_conn = NULL; } #endif for(gIter = fsa_message_queue; gIter != NULL; gIter = gIter->next) { fsa_data_t *fsa_data = gIter->data; crm_info("Dropping %s: [ state=%s cause=%s origin=%s ]", fsa_input2string(fsa_data->fsa_input), fsa_state2string(fsa_state), fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin); delete_fsa_input(fsa_data); } g_list_free(fsa_message_queue); delete_fsa_input(msg_data); if (ipc_clients) { crm_debug("Number of connected clients: %d", g_hash_table_size(ipc_clients)); /* g_hash_table_foreach(ipc_clients, log_connected_client, NULL); */ g_hash_table_destroy(ipc_clients); } empty_uuid_cache(); crm_peer_destroy(); - clear_bit_inplace(fsa_input_register, R_MEMBERSHIP); + clear_bit(fsa_input_register, R_MEMBERSHIP); if (te_subsystem->client && te_subsystem->client->ipc) { crm_debug("Full destroy: TE"); qb_ipcs_disconnect(te_subsystem->client->ipc); } free(te_subsystem); if (pe_subsystem->client && pe_subsystem->client->ipc) { crm_debug("Full destroy: PE"); qb_ipcs_disconnect(pe_subsystem->client->ipc); } free(pe_subsystem); free(cib_subsystem); if (integrated_nodes) { g_hash_table_destroy(integrated_nodes); } if (finalized_nodes) { g_hash_table_destroy(finalized_nodes); } if (confirmed_nodes) { g_hash_table_destroy(confirmed_nodes); } if (reload_hash) { g_hash_table_destroy(reload_hash); } if (resource_history) { g_hash_table_destroy(resource_history); } if (voted) { g_hash_table_destroy(voted); } cib_delete(fsa_cib_conn); fsa_cib_conn = NULL; if (fsa_lrm_conn) { lrmd_api_delete(fsa_lrm_conn); fsa_lrm_conn = NULL; } free(transition_timer); free(integration_timer); free(finalization_timer); free(election_trigger); free(election_timeout); free(shutdown_escalation_timer); free(wait_timer); free(recheck_timer); free(fsa_our_dc_version); free(fsa_our_uname); free(fsa_our_uuid); free(fsa_our_dc); free(max_generation_from); free_xml(max_generation_xml); crm_xml_cleanup(); } /* A_EXIT_0, A_EXIT_1 */ void do_exit(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { int exit_code = 0; int log_level = LOG_INFO; const char *exit_type = "gracefully"; if (action & A_EXIT_1) { exit_code = 1; log_level = LOG_ERR; exit_type = "forcefully"; } verify_stopped(cur_state, LOG_ERR); do_crm_log(log_level, "Performing %s - %s exiting the CRMd", fsa_action2string(action), exit_type); if (is_set(fsa_input_register, R_IN_RECOVERY)) { crm_err("Could not recover from internal error"); exit_code = 2; } if (is_set(fsa_input_register, R_STAYDOWN)) { crm_warn("Inhibiting respawn by Heartbeat"); exit_code = 100; } crm_info("[%s] stopped (%d)", crm_system_name, exit_code); free_mem(msg_data); exit(exit_code); } /* A_STARTUP */ void do_startup(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { int was_error = 0; int interval = 1; /* seconds between DC heartbeats */ crm_debug("Registering Signal Handlers"); mainloop_add_signal(SIGTERM, crm_shutdown); fsa_source = mainloop_add_trigger(G_PRIORITY_HIGH, crm_fsa_trigger, NULL); config_read = mainloop_add_trigger(G_PRIORITY_HIGH, crm_read_options, NULL); ipc_clients = g_hash_table_new(crm_str_hash, g_str_equal); crm_debug("Creating CIB and LRM objects"); fsa_cib_conn = cib_new(); fsa_lrm_conn = lrmd_api_new(); /* set up the timers */ transition_timer = calloc(1, sizeof(fsa_timer_t)); integration_timer = calloc(1, sizeof(fsa_timer_t)); finalization_timer = calloc(1, sizeof(fsa_timer_t)); election_trigger = calloc(1, sizeof(fsa_timer_t)); election_timeout = calloc(1, sizeof(fsa_timer_t)); shutdown_escalation_timer = calloc(1, sizeof(fsa_timer_t)); wait_timer = calloc(1, sizeof(fsa_timer_t)); recheck_timer = calloc(1, sizeof(fsa_timer_t)); interval = interval * 1000; if (election_trigger != NULL) { election_trigger->source_id = 0; election_trigger->period_ms = -1; election_trigger->fsa_input = I_DC_TIMEOUT; election_trigger->callback = crm_timer_popped; election_trigger->repeat = FALSE; } else { was_error = TRUE; } if (election_timeout != NULL) { election_timeout->source_id = 0; election_timeout->period_ms = -1; election_timeout->fsa_input = I_ELECTION_DC; election_timeout->callback = crm_timer_popped; election_timeout->repeat = FALSE; } else { was_error = TRUE; } if (transition_timer != NULL) { transition_timer->source_id = 0; transition_timer->period_ms = -1; transition_timer->fsa_input = I_PE_CALC; transition_timer->callback = crm_timer_popped; transition_timer->repeat = FALSE; } else { was_error = TRUE; } if (integration_timer != NULL) { integration_timer->source_id = 0; integration_timer->period_ms = -1; integration_timer->fsa_input = I_INTEGRATED; integration_timer->callback = crm_timer_popped; integration_timer->repeat = FALSE; } else { was_error = TRUE; } if (finalization_timer != NULL) { finalization_timer->source_id = 0; finalization_timer->period_ms = -1; finalization_timer->fsa_input = I_FINALIZED; finalization_timer->callback = crm_timer_popped; finalization_timer->repeat = FALSE; /* for possible enabling... a bug in the join protocol left * a slave in S_PENDING while we think its in S_NOT_DC * * raising I_FINALIZED put us into a transition loop which is * never resolved. * in this loop we continually send probes which the node * NACK's because its in S_PENDING * * if we have nodes where heartbeat is active but the * CRM is not... then this will be handled in the * integration phase */ finalization_timer->fsa_input = I_ELECTION; } else { was_error = TRUE; } if (shutdown_escalation_timer != NULL) { shutdown_escalation_timer->source_id = 0; shutdown_escalation_timer->period_ms = -1; shutdown_escalation_timer->fsa_input = I_STOP; shutdown_escalation_timer->callback = crm_timer_popped; shutdown_escalation_timer->repeat = FALSE; } else { was_error = TRUE; } if (wait_timer != NULL) { wait_timer->source_id = 0; wait_timer->period_ms = 2000; wait_timer->fsa_input = I_NULL; wait_timer->callback = crm_timer_popped; wait_timer->repeat = FALSE; } else { was_error = TRUE; } if (recheck_timer != NULL) { recheck_timer->source_id = 0; recheck_timer->period_ms = -1; recheck_timer->fsa_input = I_PE_CALC; recheck_timer->callback = crm_timer_popped; recheck_timer->repeat = FALSE; } else { was_error = TRUE; } /* set up the sub systems */ cib_subsystem = calloc(1, sizeof(struct crm_subsystem_s)); te_subsystem = calloc(1, sizeof(struct crm_subsystem_s)); pe_subsystem = calloc(1, sizeof(struct crm_subsystem_s)); if (cib_subsystem != NULL) { cib_subsystem->pid = -1; cib_subsystem->name = CRM_SYSTEM_CIB; cib_subsystem->flag_connected = R_CIB_CONNECTED; cib_subsystem->flag_required = R_CIB_REQUIRED; } else { was_error = TRUE; } if (te_subsystem != NULL) { te_subsystem->pid = -1; te_subsystem->name = CRM_SYSTEM_TENGINE; te_subsystem->flag_connected = R_TE_CONNECTED; te_subsystem->flag_required = R_TE_REQUIRED; } else { was_error = TRUE; } if (pe_subsystem != NULL) { pe_subsystem->pid = -1; pe_subsystem->path = CRM_DAEMON_DIR; pe_subsystem->name = CRM_SYSTEM_PENGINE; pe_subsystem->command = CRM_DAEMON_DIR "/" CRM_SYSTEM_PENGINE; pe_subsystem->args = NULL; pe_subsystem->flag_connected = R_PE_CONNECTED; pe_subsystem->flag_required = R_PE_REQUIRED; } else { was_error = TRUE; } if (was_error == FALSE && is_heartbeat_cluster()) { if(start_subsystem(pe_subsystem) == FALSE) { was_error = TRUE; } } if (was_error) { register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } welcomed_nodes = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); integrated_nodes = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); finalized_nodes = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); confirmed_nodes = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); } static int32_t crmd_ipc_accept(qb_ipcs_connection_t *c, uid_t uid, gid_t gid) { crmd_client_t *blank_client = NULL; #if ENABLE_ACL struct group *crm_grp = NULL; #endif crm_trace("Connecting %p for uid=%d gid=%d", c, uid, gid); blank_client = calloc(1, sizeof(crmd_client_t)); CRM_ASSERT(blank_client != NULL); crm_trace("Created client: %p", blank_client); blank_client->ipc = c; blank_client->sub_sys = NULL; blank_client->uuid = NULL; blank_client->table_key = NULL; #if ENABLE_ACL crm_grp = getgrnam(CRM_DAEMON_GROUP); if (crm_grp) { qb_ipcs_connection_auth_set(c, -1, crm_grp->gr_gid, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP); } blank_client->user = uid2username(uid); #endif qb_ipcs_context_set(c, blank_client); return 0; } static void crmd_ipc_created(qb_ipcs_connection_t *c) { crm_trace("Client %p connected", c); } static int32_t crmd_ipc_dispatch(qb_ipcs_connection_t *c, void *data, size_t size) { crmd_client_t *client = qb_ipcs_context_get(c); xmlNode *msg = crm_ipcs_recv(c, data, size); xmlNode *ack = create_xml_node(NULL, "ack"); crm_trace("Invoked: %s", client->table_key); crm_ipcs_send(c, ack, FALSE); /* TODO: Do not unconditionally send this */ free_xml(ack); if (msg == NULL) { return 0; } #if ENABLE_ACL determine_request_user(client->user, msg, F_CRM_USER); #endif crm_trace("Processing msg from %s", client->table_key); crm_log_xml_trace(msg, "CRMd[inbound]"); if (crmd_authorize_message(msg, client)) { route_message(C_IPC_MESSAGE, msg); } trigger_fsa(fsa_source); free_xml(msg); return 0; } static int32_t crmd_ipc_closed(qb_ipcs_connection_t *c) { return 0; } static void crmd_ipc_destroy(qb_ipcs_connection_t *c) { crmd_client_t *client = qb_ipcs_context_get(c); if (client == NULL) { crm_trace("No client to delete"); return; } process_client_disconnect(client); crm_trace("Disconnecting client %s (%p)", client->table_key, client); free(client->table_key); free(client->sub_sys); free(client->uuid); free(client->user); free(client); trigger_fsa(fsa_source); } /* A_STOP */ void do_stop(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { if (is_heartbeat_cluster()) { stop_subsystem(pe_subsystem, FALSE); } mainloop_del_ipc_server(ipcs); register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL); } /* A_STARTED */ void do_started(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { static struct qb_ipcs_service_handlers crmd_callbacks = { .connection_accept = crmd_ipc_accept, .connection_created = crmd_ipc_created, .msg_process = crmd_ipc_dispatch, .connection_closed = crmd_ipc_closed, .connection_destroyed = crmd_ipc_destroy }; if (cur_state != S_STARTING) { crm_err("Start cancelled... %s", fsa_state2string(cur_state)); return; } else if (is_set(fsa_input_register, R_MEMBERSHIP) == FALSE) { crm_info("Delaying start, no membership data (%.16llx)", R_MEMBERSHIP); crmd_fsa_stall(NULL); return; } else if (is_set(fsa_input_register, R_LRM_CONNECTED) == FALSE) { crm_info("Delaying start, LRM not connected (%.16llx)", R_LRM_CONNECTED); crmd_fsa_stall(NULL); return; } else if (is_set(fsa_input_register, R_CIB_CONNECTED) == FALSE) { crm_info("Delaying start, CIB not connected (%.16llx)", R_CIB_CONNECTED); crmd_fsa_stall(NULL); return; } else if (is_set(fsa_input_register, R_READ_CONFIG) == FALSE) { crm_info("Delaying start, Config not read (%.16llx)", R_READ_CONFIG); crmd_fsa_stall(NULL); return; } else if (is_set(fsa_input_register, R_PEER_DATA) == FALSE) { /* try reading from HA */ crm_info("Delaying start, No peer data (%.16llx)", R_PEER_DATA); #if SUPPORT_HEARTBEAT if (is_heartbeat_cluster()) { HA_Message *msg = NULL; crm_trace("Looking for a HA message"); msg = fsa_cluster_conn->llc_ops->readmsg(fsa_cluster_conn, 0); if (msg != NULL) { crm_trace("There was a HA message"); ha_msg_del(msg); } } #endif crmd_fsa_stall(NULL); return; } crm_debug("Init server comms"); ipcs = mainloop_add_ipc_server(CRM_SYSTEM_CRMD, QB_IPC_NATIVE, &crmd_callbacks); if (ipcs == NULL) { crm_err("Couldn't start IPC server"); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } if (stonith_reconnect == NULL) { int dummy; stonith_reconnect = mainloop_add_trigger(G_PRIORITY_LOW, te_connect_stonith, &dummy); } - set_bit_inplace(fsa_input_register, R_ST_REQUIRED); + set_bit(fsa_input_register, R_ST_REQUIRED); mainloop_set_trigger(stonith_reconnect); crm_notice("The local CRM is operational"); - clear_bit_inplace(fsa_input_register, R_STARTING); + clear_bit(fsa_input_register, R_STARTING); register_fsa_input(msg_data->fsa_cause, I_PENDING, NULL); } /* A_RECOVER */ void do_recover(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { - set_bit_inplace(fsa_input_register, R_IN_RECOVERY); + set_bit(fsa_input_register, R_IN_RECOVERY); crm_err("Action %s (%.16llx) not supported", fsa_action2string(action), action); register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL); } /* *INDENT-OFF* */ pe_cluster_option crmd_opts[] = { /* name, old-name, validate, default, description */ { "dc-version", NULL, "string", NULL, "none", NULL, "Version of Pacemaker on the cluster's DC.", "Includes the hash which identifies the exact Mercurial changeset it was built from. Used for diagnostic purposes." }, { "cluster-infrastructure", NULL, "string", NULL, "heartbeat", NULL, "The messaging stack on which Pacemaker is currently running.", "Used for informational and diagnostic purposes." }, { XML_CONFIG_ATTR_DC_DEADTIME, "dc_deadtime", "time", NULL, "20s", &check_time, "How long to wait for a response from other nodes during startup.", "The \"correct\" value will depend on the speed/load of your network and the type of switches used." }, { XML_CONFIG_ATTR_RECHECK, "cluster_recheck_interval", "time", "Zero disables polling. Positive values are an interval in seconds (unless other SI units are specified. eg. 5min)", "15min", &check_timer, "Polling interval for time based changes to options, resource parameters and constraints.", "The Cluster is primarily event driven, however the configuration can have elements that change based on time." " To ensure these changes take effect, we can optionally poll the cluster's status for changes." }, { XML_CONFIG_ATTR_ELECTION_FAIL, "election_timeout", "time", NULL, "2min", &check_timer, "*** Advanced Use Only ***.", "If need to adjust this value, it probably indicates the presence of a bug." }, { XML_CONFIG_ATTR_FORCE_QUIT, "shutdown_escalation", "time", NULL, "20min", &check_timer, "*** Advanced Use Only ***.", "If need to adjust this value, it probably indicates the presence of a bug." }, { "crmd-integration-timeout", NULL, "time", NULL, "3min", &check_timer, "*** Advanced Use Only ***.", "If need to adjust this value, it probably indicates the presence of a bug." }, { "crmd-finalization-timeout", NULL, "time", NULL, "30min", &check_timer, "*** Advanced Use Only ***.", "If you need to adjust this value, it probably indicates the presence of a bug." }, { "crmd-transition-delay", NULL, "time", NULL, "0s", &check_timer, "*** Advanced Use Only ***\nEnabling this option will slow down cluster recovery under all conditions", "Delay cluster recovery for the configured interval to allow for additional/related events to occur.\nUseful if your configuration is sensitive to the order in which ping updates arrive." }, { XML_ATTR_EXPECTED_VOTES, NULL, "integer", NULL, "2", &check_number, "The number of nodes expected to be in the cluster", "Used to calculate quorum in openais based clusters." }, }; /* *INDENT-ON* */ void crmd_metadata(void) { config_metadata("CRM Daemon", "1.0", "CRM Daemon Options", "This is a fake resource that details the options that can be configured for the CRM Daemon.", crmd_opts, DIMOF(crmd_opts)); } static void verify_crmd_options(GHashTable * options) { verify_all_options(options, crmd_opts, DIMOF(crmd_opts)); } static const char * crmd_pref(GHashTable * options, const char *name) { return get_cluster_pref(options, crmd_opts, DIMOF(crmd_opts), name); } static void config_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { const char *value = NULL; GHashTable *config_hash = NULL; ha_time_t *now = new_ha_date(TRUE); if (rc != pcmk_ok) { fsa_data_t *msg_data = NULL; crm_err("Local CIB query resulted in an error: %s", pcmk_strerror(rc)); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); if (rc == -EACCES || rc == -pcmk_err_dtd_validation) { crm_err("The cluster is mis-configured - shutting down and staying down"); - set_bit_inplace(fsa_input_register, R_STAYDOWN); + set_bit(fsa_input_register, R_STAYDOWN); } goto bail; } crm_debug("Call %d : Parsing CIB options", call_id); config_hash = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); unpack_instance_attributes(output, output, XML_CIB_TAG_PROPSET, NULL, config_hash, CIB_OPTIONS_FIRST, FALSE, now); verify_crmd_options(config_hash); value = crmd_pref(config_hash, XML_CONFIG_ATTR_DC_DEADTIME); election_trigger->period_ms = crm_get_msec(value); value = crmd_pref(config_hash, XML_CONFIG_ATTR_FORCE_QUIT); shutdown_escalation_timer->period_ms = crm_get_msec(value); crm_debug("Shutdown escalation occurs after: %dms", shutdown_escalation_timer->period_ms); value = crmd_pref(config_hash, XML_CONFIG_ATTR_ELECTION_FAIL); election_timeout->period_ms = crm_get_msec(value); value = crmd_pref(config_hash, XML_CONFIG_ATTR_RECHECK); recheck_timer->period_ms = crm_get_msec(value); crm_debug("Checking for expired actions every %dms", recheck_timer->period_ms); value = crmd_pref(config_hash, "crmd-transition-delay"); transition_timer->period_ms = crm_get_msec(value); value = crmd_pref(config_hash, "crmd-integration-timeout"); integration_timer->period_ms = crm_get_msec(value); value = crmd_pref(config_hash, "crmd-finalization-timeout"); finalization_timer->period_ms = crm_get_msec(value); #if SUPPORT_COROSYNC if (is_classic_ais_cluster()) { value = crmd_pref(config_hash, XML_ATTR_EXPECTED_VOTES); crm_debug("Sending expected-votes=%s to corosync", value); send_ais_text(crm_class_quorum, value, TRUE, NULL, crm_msg_ais); } #endif - set_bit_inplace(fsa_input_register, R_READ_CONFIG); + set_bit(fsa_input_register, R_READ_CONFIG); crm_trace("Triggering FSA: %s", __FUNCTION__); mainloop_set_trigger(fsa_source); g_hash_table_destroy(config_hash); bail: free_ha_date(now); } gboolean crm_read_options(gpointer user_data) { int call_id = fsa_cib_conn->cmds->query(fsa_cib_conn, XML_CIB_TAG_CRMCONFIG, NULL, cib_scope_local); add_cib_op_callback(fsa_cib_conn, call_id, FALSE, NULL, config_query_callback); crm_trace("Querying the CIB... call %d", call_id); return TRUE; } /* A_READCONFIG */ void do_read_config(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { mainloop_set_trigger(config_read); } void crm_shutdown(int nsig) { if (crmd_mainloop != NULL && g_main_is_running(crmd_mainloop)) { if (is_set(fsa_input_register, R_SHUTDOWN)) { crm_err("Escalating the shutdown"); register_fsa_input_before(C_SHUTDOWN, I_ERROR, NULL); } else { - set_bit_inplace(fsa_input_register, R_SHUTDOWN); + set_bit(fsa_input_register, R_SHUTDOWN); register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL); if (shutdown_escalation_timer->period_ms < 1) { const char *value = crmd_pref(NULL, XML_CONFIG_ATTR_FORCE_QUIT); int msec = crm_get_msec(value); crm_debug("Using default shutdown escalation: %dms", msec); shutdown_escalation_timer->period_ms = msec; } /* cant rely on this... */ crm_notice("Requesting shutdown, upper limit is %dms", shutdown_escalation_timer->period_ms); crm_timer_start(shutdown_escalation_timer); } } else { crm_info("exit from shutdown"); exit(EX_OK); } } static void default_cib_update_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { if (rc != pcmk_ok) { fsa_data_t *msg_data = NULL; crm_err("CIB Update failed: %s", pcmk_strerror(rc)); crm_log_xml_warn(output, "update:failed"); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } #if SUPPORT_HEARTBEAT static void populate_cib_nodes_ha(gboolean with_client_status) { int call_id = 0; const char *ha_node = NULL; xmlNode *cib_node_list = NULL; if (fsa_cluster_conn == NULL) { crm_debug("Not connected"); return; } /* Async get client status information in the cluster */ crm_info("Requesting the list of configured nodes"); fsa_cluster_conn->llc_ops->init_nodewalk(fsa_cluster_conn); cib_node_list = create_xml_node(NULL, XML_CIB_TAG_NODES); do { const char *ha_node_type = NULL; const char *ha_node_uuid = NULL; xmlNode *cib_new_node = NULL; ha_node = fsa_cluster_conn->llc_ops->nextnode(fsa_cluster_conn); if (ha_node == NULL) { continue; } ha_node_type = fsa_cluster_conn->llc_ops->node_type(fsa_cluster_conn, ha_node); if (safe_str_neq(NORMALNODE, ha_node_type)) { crm_debug("Node %s: skipping '%s'", ha_node, ha_node_type); continue; } ha_node_uuid = get_uuid(ha_node); if (ha_node_uuid == NULL) { crm_warn("Node %s: no uuid found", ha_node); continue; } crm_debug("Node: %s (uuid: %s)", ha_node, ha_node_uuid); cib_new_node = create_xml_node(cib_node_list, XML_CIB_TAG_NODE); crm_xml_add(cib_new_node, XML_ATTR_ID, ha_node_uuid); crm_xml_add(cib_new_node, XML_ATTR_UNAME, ha_node); crm_xml_add(cib_new_node, XML_ATTR_TYPE, ha_node_type); } while (ha_node != NULL); fsa_cluster_conn->llc_ops->end_nodewalk(fsa_cluster_conn); /* Now update the CIB with the list of nodes */ fsa_cib_update(XML_CIB_TAG_NODES, cib_node_list, cib_scope_local | cib_quorum_override, call_id, NULL); add_cib_op_callback(fsa_cib_conn, call_id, FALSE, NULL, default_cib_update_callback); free_xml(cib_node_list); crm_trace("Complete"); } #endif static void create_cib_node_definition(gpointer key, gpointer value, gpointer user_data) { crm_node_t *node = value; xmlNode *cib_nodes = user_data; xmlNode *cib_new_node = NULL; crm_trace("Creating node entry for %s/%s", node->uname, node->uuid); cib_new_node = create_xml_node(cib_nodes, XML_CIB_TAG_NODE); crm_xml_add(cib_new_node, XML_ATTR_ID, node->uuid); crm_xml_add(cib_new_node, XML_ATTR_UNAME, node->uname); crm_xml_add(cib_new_node, XML_ATTR_TYPE, NORMALNODE); } void populate_cib_nodes(gboolean with_client_status) { int call_id = 0; xmlNode *cib_node_list = NULL; #if SUPPORT_HEARTBEAT if (is_heartbeat_cluster()) { populate_cib_nodes_ha(with_client_status); return; } #endif cib_node_list = create_xml_node(NULL, XML_CIB_TAG_NODES); g_hash_table_foreach(crm_peer_cache, create_cib_node_definition, cib_node_list); fsa_cib_update(XML_CIB_TAG_NODES, cib_node_list, cib_scope_local | cib_quorum_override, call_id, NULL); add_cib_op_callback(fsa_cib_conn, call_id, FALSE, NULL, default_cib_update_callback); free_xml(cib_node_list); crm_trace("Complete"); } diff --git a/crmd/corosync.c b/crmd/corosync.c index c2fff68359..90a7f2a3c9 100644 --- a/crmd/corosync.c +++ b/crmd/corosync.c @@ -1,182 +1,182 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include extern void post_cache_update(int seq); extern void crmd_ha_connection_destroy(gpointer user_data); /* A_HA_CONNECT */ #if SUPPORT_COROSYNC static gboolean crmd_ais_dispatch(AIS_Message * wrapper, char *data, int sender) { int seq = 0; xmlNode *xml = NULL; const char *seq_s = NULL; xml = string2xml(data); if (xml == NULL) { crm_err("Could not parse message content (%d): %.100s", wrapper->header.id, data); return TRUE; } switch (wrapper->header.id) { case crm_class_members: seq_s = crm_element_value(xml, "id"); seq = crm_int_helper(seq_s, NULL); - set_bit_inplace(fsa_input_register, R_PEER_DATA); + set_bit(fsa_input_register, R_PEER_DATA); post_cache_update(seq); /* fall through */ case crm_class_quorum: crm_update_quorum(crm_have_quorum, FALSE); if (AM_I_DC) { const char *votes = crm_element_value(xml, "expected"); if (votes == NULL || check_number(votes) == FALSE) { crm_log_xml_err(xml, "Invalid quorum/membership update"); } else { int rc = update_attr(fsa_cib_conn, cib_quorum_override | cib_scope_local | cib_inhibit_notify, XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL, XML_ATTR_EXPECTED_VOTES, votes, FALSE); crm_info("Setting expected votes to %s", votes); if (pcmk_ok > rc) { crm_err("Quorum update failed: %s", pcmk_strerror(rc)); } } } break; case crm_class_cluster: crm_xml_add(xml, F_ORIG, wrapper->sender.uname); crm_xml_add_int(xml, F_SEQ, wrapper->id); crmd_ha_msg_filter(xml); break; case crm_class_rmpeer: /* Ignore */ break; case crm_class_notify: case crm_class_nodeid: crm_err("Unexpected message class (%d): %.100s", wrapper->header.id, data); break; default: crm_err("Invalid message class (%d): %.100s", wrapper->header.id, data); } free_xml(xml); return TRUE; } static gboolean crmd_cman_dispatch(unsigned long long seq, gboolean quorate) { crm_update_quorum(quorate, FALSE); post_cache_update(seq); return TRUE; } static void crmd_quorum_destroy(gpointer user_data) { if (is_set(fsa_input_register, R_HA_DISCONNECTED)) { crm_err("connection terminated"); exit(1); } else { crm_info("connection closed"); } } static void crmd_ais_destroy(gpointer user_data) { if (is_set(fsa_input_register, R_HA_DISCONNECTED)) { crm_err("connection terminated"); exit(1); } else { crm_info("connection closed"); } } #if SUPPORT_CMAN static void crmd_cman_destroy(gpointer user_data) { if (is_set(fsa_input_register, R_HA_DISCONNECTED)) { crm_err("connection terminated"); exit(1); } else { crm_info("connection closed"); } } #endif extern gboolean crm_connect_corosync(void); gboolean crm_connect_corosync(void) { gboolean rc = FALSE; if (is_openais_cluster()) { crm_set_status_callback(&peer_update_callback); rc = crm_cluster_connect(&fsa_our_uname, &fsa_our_uuid, crmd_ais_dispatch, crmd_ais_destroy, NULL); } if (rc && is_corosync_cluster()) { init_quorum_connection(crmd_cman_dispatch, crmd_quorum_destroy); } #if SUPPORT_CMAN if (rc && is_cman_cluster()) { init_cman_connection(crmd_cman_dispatch, crmd_cman_destroy); - set_bit_inplace(fsa_input_register, R_MEMBERSHIP); + set_bit(fsa_input_register, R_MEMBERSHIP); } #endif return rc; } #endif diff --git a/crmd/election.c b/crmd/election.c index d2d58fae4e..34fa894b56 100644 --- a/crmd/election.c +++ b/crmd/election.c @@ -1,577 +1,577 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include GHashTable *voted = NULL; uint highest_born_on = -1; static int current_election_id = 1; static int crm_uptime(struct timeval *output) { struct rusage info; int rc = getrusage(RUSAGE_SELF, &info); output->tv_sec = 0; output->tv_usec = 0; if (rc < 0) { crm_perror(LOG_ERR, "Could not calculate the current uptime"); return -1; } output->tv_sec = info.ru_utime.tv_sec; output->tv_usec = info.ru_utime.tv_usec; crm_debug("Current CPU usage is: %lds, %ldus", (long)info.ru_utime.tv_sec, (long)info.ru_utime.tv_usec); return 1; } static int crm_compare_age(struct timeval your_age) { struct timeval our_age; if (crm_uptime(&our_age) < 0) { return -1; } /* We want these times to be "significantly" different */ if (our_age.tv_sec > your_age.tv_sec) { crm_debug("Win: %ld vs %ld (seconds)", (long)our_age.tv_sec, (long)your_age.tv_sec); return 1; } else if (our_age.tv_sec < your_age.tv_sec) { crm_debug("Loose: %ld vs %ld (seconds)", (long)our_age.tv_sec, (long)your_age.tv_sec); return -1; } else if (our_age.tv_usec > your_age.tv_usec) { crm_debug("Win: %ld vs %ld (usec)", (long)our_age.tv_usec, (long)your_age.tv_usec); return 1; } else if (our_age.tv_usec < your_age.tv_usec) { crm_debug("Loose: %ld vs %ld (usec)", (long)our_age.tv_usec, (long)your_age.tv_usec); return -1; } return 0; } /* A_ELECTION_VOTE */ void do_election_vote(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { struct timeval age; xmlNode *vote = NULL; gboolean not_voting = FALSE; /* don't vote if we're in one of these states or wanting to shut down */ switch (cur_state) { case S_STARTING: case S_RECOVERY: case S_STOPPING: case S_TERMINATE: crm_warn("Not voting in election, we're in state %s", fsa_state2string(cur_state)); not_voting = TRUE; break; default: break; } if (not_voting == FALSE) { if (is_set(fsa_input_register, R_STARTING)) { not_voting = TRUE; } } if (not_voting) { if (AM_I_DC) { register_fsa_input(C_FSA_INTERNAL, I_RELEASE_DC, NULL); } else { register_fsa_input(C_FSA_INTERNAL, I_PENDING, NULL); } return; } vote = create_request(CRM_OP_VOTE, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL); current_election_id++; crm_xml_add(vote, F_CRM_ELECTION_OWNER, fsa_our_uuid); crm_xml_add_int(vote, F_CRM_ELECTION_ID, current_election_id); crm_uptime(&age); crm_xml_add_int(vote, F_CRM_ELECTION_AGE_S, age.tv_sec); crm_xml_add_int(vote, F_CRM_ELECTION_AGE_US, age.tv_usec); send_cluster_message(NULL, crm_msg_crmd, vote, TRUE); free_xml(vote); crm_debug("Started election %d", current_election_id); if (voted) { g_hash_table_destroy(voted); } voted = NULL; if (cur_state == S_ELECTION || cur_state == S_RELEASE_DC) { crm_timer_start(election_timeout); } else if (cur_state != S_INTEGRATION) { crm_err("Broken? Voting in state %s", fsa_state2string(cur_state)); } return; } char *dc_hb_msg = NULL; int beat_num = 0; gboolean do_dc_heartbeat(gpointer data) { return TRUE; } struct election_data_s { const char *winning_uname; unsigned int winning_bornon; }; static void log_member_uname(gpointer key, gpointer value, gpointer user_data) { const crm_node_t *node = value; if (crm_is_peer_active(node)) { crm_err("%s: %s proc=%.32x", (char *)user_data, (char *)key, node->processes); } } static void log_node(gpointer key, gpointer value, gpointer user_data) { crm_err("%s: %s", (char *)user_data, (char *)key); } void do_election_check(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { int voted_size = 0; int num_members = crm_active_peers(); if (voted) { voted_size = g_hash_table_size(voted); } /* in the case of #voted > #members, it is better to * wait for the timeout and give the cluster time to * stabilize */ if (fsa_state != S_ELECTION) { crm_debug("Ignore election check: we not in an election"); } else if (voted_size >= num_members) { /* we won and everyone has voted */ crm_timer_stop(election_timeout); register_fsa_input(C_FSA_INTERNAL, I_ELECTION_DC, NULL); if (voted_size > num_members) { char *data = NULL; data = strdup("member"); g_hash_table_foreach(crm_peer_cache, log_member_uname, data); free(data); data = strdup("voted"); g_hash_table_foreach(voted, log_node, data); free(data); } crm_debug("Destroying voted hash"); g_hash_table_destroy(voted); voted = NULL; } else { crm_debug("Still waiting on %d non-votes (%d total)", num_members - voted_size, num_members); } return; } #define win_dampen 1 /* in seconds */ #define loss_dampen 2 /* in seconds */ /* A_ELECTION_COUNT */ void do_election_count_vote(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { struct timeval your_age; int age; int election_id = -1; int log_level = LOG_INFO; gboolean use_born_on = FALSE; gboolean done = FALSE; gboolean we_loose = FALSE; const char *op = NULL; const char *vote_from = NULL; const char *your_version = NULL; const char *election_owner = NULL; const char *reason = "unknown"; crm_node_t *our_node = NULL, *your_node = NULL; ha_msg_input_t *vote = fsa_typed_data(fsa_dt_ha_msg); static time_t last_election_win = 0; static time_t last_election_loss = 0; /* if the membership copy is NULL we REALLY shouldnt be voting * the question is how we managed to get here. */ CRM_CHECK(msg_data != NULL, return); CRM_CHECK(crm_peer_cache != NULL, return); CRM_CHECK(vote != NULL, crm_err("Bogus data from %s", msg_data->origin); return); CRM_CHECK(vote->msg != NULL, crm_err("Bogus data from %s", msg_data->origin); return); your_age.tv_sec = 0; your_age.tv_usec = 0; op = crm_element_value(vote->msg, F_CRM_TASK); vote_from = crm_element_value(vote->msg, F_CRM_HOST_FROM); your_version = crm_element_value(vote->msg, F_CRM_VERSION); election_owner = crm_element_value(vote->msg, F_CRM_ELECTION_OWNER); crm_element_value_int(vote->msg, F_CRM_ELECTION_ID, &election_id); crm_element_value_int(vote->msg, F_CRM_ELECTION_AGE_S, (int *)&(your_age.tv_sec)); crm_element_value_int(vote->msg, F_CRM_ELECTION_AGE_US, (int *)&(your_age.tv_usec)); CRM_CHECK(vote_from != NULL, vote_from = fsa_our_uname); your_node = crm_get_peer(0, vote_from); our_node = crm_get_peer(0, fsa_our_uname); if (voted == NULL) { crm_debug("Created voted hash"); voted = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); } if (is_heartbeat_cluster()) { use_born_on = TRUE; } else if (is_classic_ais_cluster()) { use_born_on = TRUE; } age = crm_compare_age(your_age); if (cur_state == S_STARTING) { reason = "Still starting"; we_loose = TRUE; } else if (our_node == NULL || crm_is_peer_active(our_node) == FALSE) { reason = "We are not part of the cluster"; log_level = LOG_ERR; we_loose = TRUE; } else if (your_node == NULL || crm_is_peer_active(your_node) == FALSE) { /* Possibly we cached the message in the FSA queue at a point that it wasn't */ reason = "Peer is not part of our cluster"; log_level = LOG_WARNING; done = TRUE; } else if (election_id != current_election_id && crm_str_eq(fsa_our_uuid, election_owner, TRUE)) { log_level = LOG_DEBUG_2; reason = "Superceeded"; done = TRUE; } else if (crm_str_eq(op, CRM_OP_NOVOTE, TRUE)) { char *op_copy = strdup(op); char *uname_copy = strdup(vote_from); CRM_ASSERT(crm_str_eq(fsa_our_uuid, election_owner, TRUE)); /* update the list of nodes that have voted */ g_hash_table_replace(voted, uname_copy, op_copy); reason = "Recorded"; done = TRUE; } else if (crm_str_eq(vote_from, fsa_our_uname, TRUE)) { char *op_copy = strdup(op); char *uname_copy = strdup(vote_from); CRM_ASSERT(crm_str_eq(fsa_our_uuid, election_owner, TRUE)); /* update ourselves in the list of nodes that have voted */ g_hash_table_replace(voted, uname_copy, op_copy); reason = "Recorded"; done = TRUE; } else if (compare_version(your_version, CRM_FEATURE_SET) < 0) { reason = "Version"; we_loose = TRUE; } else if (compare_version(your_version, CRM_FEATURE_SET) > 0) { reason = "Version"; } else if (age < 0) { reason = "Uptime"; we_loose = TRUE; } else if (age > 0) { reason = "Uptime"; /* TODO: Check for y(our) born < 0 */ } else if (use_born_on && your_node->born < our_node->born) { reason = "Born"; we_loose = TRUE; } else if (use_born_on && your_node->born > our_node->born) { reason = "Born"; } else if (fsa_our_uname == NULL) { reason = "Unknown host name"; we_loose = TRUE; } else if (strcasecmp(fsa_our_uname, vote_from) > 0) { reason = "Host name"; we_loose = TRUE; } else { reason = "Host name"; CRM_ASSERT(strcmp(fsa_our_uname, vote_from) != 0); /* cant happen... * } else if(strcasecmp(fsa_our_uname, vote_from) == 0) { * * default... * } else { // strcasecmp(fsa_our_uname, vote_from) < 0 * we win */ } if (done) { do_crm_log(log_level + 1, "Election %d (current: %d, owner: %s): Processed %s from %s (%s)", election_id, current_election_id, election_owner, op, vote_from, reason); } else if (we_loose) { xmlNode *novote = create_request(CRM_OP_NOVOTE, NULL, vote_from, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL); do_crm_log(log_level, "Election %d (owner: %s) lost: %s from %s (%s)", election_id, election_owner, op, vote_from, reason); update_dc(NULL); crm_timer_stop(election_timeout); if (fsa_input_register & R_THE_DC) { crm_trace("Give up the DC to %s", vote_from); register_fsa_input(C_FSA_INTERNAL, I_RELEASE_DC, NULL); } else if (cur_state != S_STARTING) { crm_trace("We werent the DC anyway"); register_fsa_input(C_FSA_INTERNAL, I_PENDING, NULL); } crm_xml_add(novote, F_CRM_ELECTION_OWNER, election_owner); crm_xml_add_int(novote, F_CRM_ELECTION_ID, election_id); send_cluster_message(vote_from, crm_msg_crmd, novote, TRUE); free_xml(novote); fsa_cib_conn->cmds->set_slave(fsa_cib_conn, cib_scope_local); last_election_loss = time(NULL); last_election_win = 0; } else { do_crm_log(log_level, "Election %d (owner: %s) pass: %s from %s (%s)", election_id, election_owner, op, vote_from, reason); if (last_election_loss) { time_t tm_now = time(NULL); if (tm_now - last_election_loss < (time_t) loss_dampen) { crm_info("Election %d ignore: We already lost an election less than %ds ago (%s)", election_id, loss_dampen, ctime(&last_election_loss)); update_dc(NULL); return; } last_election_loss = 0; } #if 0 /* Enabling this code can lead to multiple DCs during SimulStart. * Specifically when a node comes up after our last 'win' vote. * * Fixing and enabling this functionality might become important when * we start running realy big clusters, but for now leave it disabled. */ if (last_election_win) { time_t tm_now = time(NULL); if (tm_now - last_election_win < (time_t) win_dampen) { crm_info("Election %d ignore: We already won an election less than %ds ago", election_id, win_dampen); return; } } last_election_win = time(NULL); #endif register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL); g_hash_table_destroy(voted); voted = NULL; } } /* A_ELECT_TIMER_START, A_ELECTION_TIMEOUT */ /* we won */ void do_election_timer_ctrl(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { } static void feature_update_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { if (rc != pcmk_ok) { fsa_data_t *msg_data = NULL; register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } /* A_DC_TAKEOVER */ void do_dc_takeover(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { int rc = pcmk_ok; xmlNode *cib = NULL; GListPtr gIter = NULL; static const char *cluster_type = NULL; if (cluster_type == NULL) { cluster_type = getenv("HA_cluster_type"); } if (cluster_type == NULL) { cluster_type = "Heartbeat"; } crm_info("Taking over DC status for this partition"); - set_bit_inplace(fsa_input_register, R_THE_DC); + set_bit(fsa_input_register, R_THE_DC); for (gIter = stonith_cleanup_list; gIter != NULL; gIter = gIter->next) { char *target = gIter->data; const char *uuid = get_uuid(target); crm_notice("Marking %s, target of a previous stonith action, as clean", target); send_stonith_update(NULL, target, uuid); free(target); } g_list_free(stonith_cleanup_list); stonith_cleanup_list = NULL; #if SUPPORT_COROSYNC if (is_classic_ais_cluster()) { send_ais_text(crm_class_quorum, NULL, TRUE, NULL, crm_msg_ais); } #endif if (voted != NULL) { crm_trace("Destroying voted hash"); g_hash_table_destroy(voted); voted = NULL; } - set_bit_inplace(fsa_input_register, R_JOIN_OK); - set_bit_inplace(fsa_input_register, R_INVOKE_PE); + set_bit(fsa_input_register, R_JOIN_OK); + set_bit(fsa_input_register, R_INVOKE_PE); fsa_cib_conn->cmds->set_master(fsa_cib_conn, cib_scope_local); cib = create_xml_node(NULL, XML_TAG_CIB); crm_xml_add(cib, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); fsa_cib_update(XML_TAG_CIB, cib, cib_quorum_override, rc, NULL); add_cib_op_callback(fsa_cib_conn, rc, FALSE, NULL, feature_update_callback); update_attr(fsa_cib_conn, cib_none, XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL, "dc-version", VERSION "-" BUILD_VERSION, FALSE); update_attr(fsa_cib_conn, cib_none, XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL, "cluster-infrastructure", cluster_type, FALSE); mainloop_set_trigger(config_read); free_xml(cib); } /* A_DC_RELEASE */ void do_dc_release(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { if (action & A_DC_RELEASE) { GListPtr gIter = NULL; crm_debug("Releasing the role of DC"); - clear_bit_inplace(fsa_input_register, R_THE_DC); + clear_bit(fsa_input_register, R_THE_DC); for (gIter = stonith_cleanup_list; gIter != NULL; gIter = gIter->next) { char *target = gIter->data; crm_debug("Purging %s from stonith cleanup list", target); free(target); } g_list_free(stonith_cleanup_list); stonith_cleanup_list = NULL; } else if (action & A_DC_RELEASED) { crm_info("DC role released"); #if 0 if (are there errors) { /* we cant stay up if not healthy */ /* or perhaps I_ERROR and go to S_RECOVER? */ result = I_SHUTDOWN; } #endif register_fsa_input(C_FSA_INTERNAL, I_RELEASE_SUCCESS, NULL); } else { crm_err("Unknown action %s", fsa_action2string(action)); } crm_trace("Am I still the DC? %s", AM_I_DC ? XML_BOOLEAN_YES : XML_BOOLEAN_NO); } diff --git a/crmd/fsa.c b/crmd/fsa.c index 2c76821cdd..fd265f684c 100644 --- a/crmd/fsa.c +++ b/crmd/fsa.c @@ -1,688 +1,688 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include char *fsa_our_dc = NULL; cib_t *fsa_cib_conn = NULL; char *fsa_our_dc_version = NULL; lrmd_t *fsa_lrm_conn; char *fsa_our_uuid = NULL; char *fsa_our_uname = NULL; #if SUPPORT_HEARTBEAT ll_cluster_t *fsa_cluster_conn; #endif fsa_timer_t *wait_timer = NULL; fsa_timer_t *recheck_timer = NULL; fsa_timer_t *election_trigger = NULL; fsa_timer_t *election_timeout = NULL; fsa_timer_t *transition_timer = NULL; fsa_timer_t *integration_timer = NULL; fsa_timer_t *finalization_timer = NULL; fsa_timer_t *shutdown_escalation_timer = NULL; volatile gboolean do_fsa_stall = FALSE; volatile long long fsa_input_register = 0; volatile long long fsa_actions = A_NOTHING; volatile enum crmd_fsa_state fsa_state = S_STARTING; extern uint highest_born_on; extern uint num_join_invites; extern GHashTable *welcomed_nodes; extern GHashTable *finalized_nodes; extern GHashTable *confirmed_nodes; extern GHashTable *integrated_nodes; extern void initialize_join(gboolean before); #define DOT_PREFIX "actions:trace: " #define do_dot_log(fmt, args...) crm_trace( fmt, ##args) long long do_state_transition(long long actions, enum crmd_fsa_state cur_state, enum crmd_fsa_state next_state, fsa_data_t * msg_data); void dump_rsc_info(void); void dump_rsc_info_callback(const xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data); void ghash_print_node(gpointer key, gpointer value, gpointer user_data); void s_crmd_fsa_actions(fsa_data_t * fsa_data); void log_fsa_input(fsa_data_t * stored_msg); void init_dotfile(void); void init_dotfile(void) { do_dot_log(DOT_PREFIX "digraph \"g\" {"); do_dot_log(DOT_PREFIX " size = \"30,30\""); do_dot_log(DOT_PREFIX " graph ["); do_dot_log(DOT_PREFIX " fontsize = \"12\""); do_dot_log(DOT_PREFIX " fontname = \"Times-Roman\""); do_dot_log(DOT_PREFIX " fontcolor = \"black\""); do_dot_log(DOT_PREFIX " bb = \"0,0,398.922306,478.927856\""); do_dot_log(DOT_PREFIX " color = \"black\""); do_dot_log(DOT_PREFIX " ]"); do_dot_log(DOT_PREFIX " node ["); do_dot_log(DOT_PREFIX " fontsize = \"12\""); do_dot_log(DOT_PREFIX " fontname = \"Times-Roman\""); do_dot_log(DOT_PREFIX " fontcolor = \"black\""); do_dot_log(DOT_PREFIX " shape = \"ellipse\""); do_dot_log(DOT_PREFIX " color = \"black\""); do_dot_log(DOT_PREFIX " ]"); do_dot_log(DOT_PREFIX " edge ["); do_dot_log(DOT_PREFIX " fontsize = \"12\""); do_dot_log(DOT_PREFIX " fontname = \"Times-Roman\""); do_dot_log(DOT_PREFIX " fontcolor = \"black\""); do_dot_log(DOT_PREFIX " color = \"black\""); do_dot_log(DOT_PREFIX " ]"); do_dot_log(DOT_PREFIX "// special nodes"); do_dot_log(DOT_PREFIX " \"S_PENDING\" "); do_dot_log(DOT_PREFIX " ["); do_dot_log(DOT_PREFIX " color = \"blue\""); do_dot_log(DOT_PREFIX " fontcolor = \"blue\""); do_dot_log(DOT_PREFIX " ]"); do_dot_log(DOT_PREFIX " \"S_TERMINATE\" "); do_dot_log(DOT_PREFIX " ["); do_dot_log(DOT_PREFIX " color = \"red\""); do_dot_log(DOT_PREFIX " fontcolor = \"red\""); do_dot_log(DOT_PREFIX " ]"); do_dot_log(DOT_PREFIX "// DC only nodes"); do_dot_log(DOT_PREFIX " \"S_INTEGRATION\" [ fontcolor = \"green\" ]"); do_dot_log(DOT_PREFIX " \"S_POLICY_ENGINE\" [ fontcolor = \"green\" ]"); do_dot_log(DOT_PREFIX " \"S_TRANSITION_ENGINE\" [ fontcolor = \"green\" ]"); do_dot_log(DOT_PREFIX " \"S_RELEASE_DC\" [ fontcolor = \"green\" ]"); do_dot_log(DOT_PREFIX " \"S_IDLE\" [ fontcolor = \"green\" ]"); } static void do_fsa_action(fsa_data_t * fsa_data, long long an_action, void (*function) (long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t * msg_data)) { fsa_actions &= ~an_action; crm_trace(DOT_PREFIX "\t// %s", fsa_action2string(an_action)); function(an_action, fsa_data->fsa_cause, fsa_state, fsa_data->fsa_input, fsa_data); } static long long startup_actions = A_STARTUP | A_CIB_START | A_LRM_CONNECT | A_CCM_CONNECT | A_HA_CONNECT | A_READCONFIG | A_STARTED | A_CL_JOIN_QUERY; enum crmd_fsa_state s_crmd_fsa(enum crmd_fsa_cause cause) { fsa_data_t *fsa_data = NULL; long long register_copy = fsa_input_register; long long new_actions = A_NOTHING; enum crmd_fsa_state last_state = fsa_state; crm_trace("FSA invoked with Cause: %s\tState: %s", fsa_cause2string(cause), fsa_state2string(fsa_state)); do_fsa_stall = FALSE; if (is_message() == FALSE && fsa_actions != A_NOTHING) { /* fake the first message so we can get into the loop */ fsa_data = calloc(1, sizeof(fsa_data_t)); fsa_data->fsa_input = I_NULL; fsa_data->fsa_cause = C_FSA_INTERNAL; fsa_data->origin = __FUNCTION__; fsa_data->data_type = fsa_dt_none; fsa_message_queue = g_list_append(fsa_message_queue, fsa_data); fsa_data = NULL; } while (is_message() && do_fsa_stall == FALSE) { crm_trace("Checking messages (%d remaining)", g_list_length(fsa_message_queue)); fsa_data = get_message(); CRM_CHECK(fsa_data != NULL, continue); log_fsa_input(fsa_data); /* add any actions back to the queue */ fsa_actions |= fsa_data->actions; /* get the next batch of actions */ new_actions = crmd_fsa_actions[fsa_data->fsa_input][fsa_state]; fsa_actions |= new_actions; if (fsa_data->fsa_input != I_NULL && fsa_data->fsa_input != I_ROUTER) { crm_debug("Processing %s: [ state=%s cause=%s origin=%s ]", fsa_input2string(fsa_data->fsa_input), fsa_state2string(fsa_state), fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin); } #ifdef FSA_TRACE if (new_actions != A_NOTHING) { crm_trace("Adding FSA actions %.16llx for %s/%s", new_actions, fsa_input2string(fsa_data->fsa_input), fsa_state2string(fsa_state)); fsa_dump_actions(new_actions, "\tFSA scheduled"); } else if (fsa_data->fsa_input != I_NULL && new_actions == A_NOTHING) { crm_debug("No action specified for input,state (%s,%s)", fsa_input2string(fsa_data->fsa_input), fsa_state2string(fsa_state)); } if (fsa_data->actions != A_NOTHING) { crm_trace("Adding input actions %.16llx for %s/%s", new_actions, fsa_input2string(fsa_data->fsa_input), fsa_state2string(fsa_state)); fsa_dump_actions(fsa_data->actions, "\tInput scheduled"); } #endif /* logging : *before* the state is changed */ if (is_set(fsa_actions, A_ERROR)) { do_fsa_action(fsa_data, A_ERROR, do_log); } if (is_set(fsa_actions, A_WARN)) { do_fsa_action(fsa_data, A_WARN, do_log); } if (is_set(fsa_actions, A_LOG)) { do_fsa_action(fsa_data, A_LOG, do_log); } /* update state variables */ last_state = fsa_state; fsa_state = crmd_fsa_state[fsa_data->fsa_input][fsa_state]; /* * Remove certain actions during shutdown */ if (fsa_state == S_STOPPING || ((fsa_input_register & R_SHUTDOWN) == R_SHUTDOWN)) { - clear_bit_inplace(fsa_actions, startup_actions); + clear_bit(fsa_actions, startup_actions); } /* * Hook for change of state. * Allows actions to be added or removed when entering a state */ if (last_state != fsa_state) { fsa_actions = do_state_transition(fsa_actions, last_state, fsa_state, fsa_data); } else { do_dot_log(DOT_PREFIX "\t// FSA input: State=%s \tCause=%s" " \tInput=%s \tOrigin=%s() \tid=%d", fsa_state2string(fsa_state), fsa_cause2string(fsa_data->fsa_cause), fsa_input2string(fsa_data->fsa_input), fsa_data->origin, fsa_data->id); } /* start doing things... */ s_crmd_fsa_actions(fsa_data); delete_fsa_input(fsa_data); fsa_data = NULL; } if (g_list_length(fsa_message_queue) > 0 || fsa_actions != A_NOTHING || do_fsa_stall) { crm_debug("Exiting the FSA: queue=%d, fsa_actions=0x%llx, stalled=%s", g_list_length(fsa_message_queue), fsa_actions, do_fsa_stall ? "true" : "false"); } else { crm_trace("Exiting the FSA"); } /* cleanup inputs? */ if (register_copy != fsa_input_register) { long long same = register_copy & fsa_input_register; fsa_dump_inputs(LOG_DEBUG, "Added input:", fsa_input_register ^ same); fsa_dump_inputs(LOG_DEBUG, "Removed input:", register_copy ^ same); } fsa_dump_queue(LOG_DEBUG); return fsa_state; } void s_crmd_fsa_actions(fsa_data_t * fsa_data) { /* * Process actions in order of priority but do only one * action at a time to avoid complicating the ordering. */ CRM_CHECK(fsa_data != NULL, return); while (fsa_actions != A_NOTHING && do_fsa_stall == FALSE) { /* regular action processing in order of action priority * * Make sure all actions that connect to required systems * are performed first */ if (fsa_actions & A_ERROR) { do_fsa_action(fsa_data, A_ERROR, do_log); } else if (fsa_actions & A_WARN) { do_fsa_action(fsa_data, A_WARN, do_log); } else if (fsa_actions & A_LOG) { do_fsa_action(fsa_data, A_LOG, do_log); /* get out of here NOW! before anything worse happens */ } else if (fsa_actions & A_EXIT_1) { do_fsa_action(fsa_data, A_EXIT_1, do_exit); /* sub-system restart */ } else if ((fsa_actions & O_LRM_RECONNECT) == O_LRM_RECONNECT) { do_fsa_action(fsa_data, O_LRM_RECONNECT, do_lrm_control); } else if ((fsa_actions & O_CIB_RESTART) == O_CIB_RESTART) { do_fsa_action(fsa_data, O_CIB_RESTART, do_cib_control); } else if ((fsa_actions & O_PE_RESTART) == O_PE_RESTART) { do_fsa_action(fsa_data, O_PE_RESTART, do_pe_control); } else if ((fsa_actions & O_TE_RESTART) == O_TE_RESTART) { do_fsa_action(fsa_data, O_TE_RESTART, do_te_control); /* essential start tasks */ } else if (fsa_actions & A_STARTUP) { do_fsa_action(fsa_data, A_STARTUP, do_startup); } else if (fsa_actions & A_CIB_START) { do_fsa_action(fsa_data, A_CIB_START, do_cib_control); } else if (fsa_actions & A_HA_CONNECT) { do_fsa_action(fsa_data, A_HA_CONNECT, do_ha_control); } else if (fsa_actions & A_READCONFIG) { do_fsa_action(fsa_data, A_READCONFIG, do_read_config); /* sub-system start/connect */ } else if (fsa_actions & A_LRM_CONNECT) { do_fsa_action(fsa_data, A_LRM_CONNECT, do_lrm_control); } else if (fsa_actions & A_CCM_CONNECT) { #if SUPPORT_HEARTBEAT if (is_heartbeat_cluster()) { do_fsa_action(fsa_data, A_CCM_CONNECT, do_ccm_control); } #endif fsa_actions &= ~A_CCM_CONNECT; } else if (fsa_actions & A_TE_START) { do_fsa_action(fsa_data, A_TE_START, do_te_control); } else if (fsa_actions & A_PE_START) { do_fsa_action(fsa_data, A_PE_START, do_pe_control); /* Timers */ /* else if(fsa_actions & O_DC_TIMER_RESTART) { do_fsa_action(fsa_data, O_DC_TIMER_RESTART, do_timer_control) */ ; } else if (fsa_actions & A_DC_TIMER_STOP) { do_fsa_action(fsa_data, A_DC_TIMER_STOP, do_timer_control); } else if (fsa_actions & A_INTEGRATE_TIMER_STOP) { do_fsa_action(fsa_data, A_INTEGRATE_TIMER_STOP, do_timer_control); } else if (fsa_actions & A_INTEGRATE_TIMER_START) { do_fsa_action(fsa_data, A_INTEGRATE_TIMER_START, do_timer_control); } else if (fsa_actions & A_FINALIZE_TIMER_STOP) { do_fsa_action(fsa_data, A_FINALIZE_TIMER_STOP, do_timer_control); } else if (fsa_actions & A_FINALIZE_TIMER_START) { do_fsa_action(fsa_data, A_FINALIZE_TIMER_START, do_timer_control); /* * Highest priority actions */ } else if (fsa_actions & A_MSG_ROUTE) { do_fsa_action(fsa_data, A_MSG_ROUTE, do_msg_route); } else if (fsa_actions & A_RECOVER) { do_fsa_action(fsa_data, A_RECOVER, do_recover); } else if (fsa_actions & A_CL_JOIN_RESULT) { do_fsa_action(fsa_data, A_CL_JOIN_RESULT, do_cl_join_finalize_respond); } else if (fsa_actions & A_CL_JOIN_REQUEST) { do_fsa_action(fsa_data, A_CL_JOIN_REQUEST, do_cl_join_offer_respond); } else if (fsa_actions & A_SHUTDOWN_REQ) { do_fsa_action(fsa_data, A_SHUTDOWN_REQ, do_shutdown_req); } else if (fsa_actions & A_ELECTION_VOTE) { do_fsa_action(fsa_data, A_ELECTION_VOTE, do_election_vote); } else if (fsa_actions & A_ELECTION_COUNT) { do_fsa_action(fsa_data, A_ELECTION_COUNT, do_election_count_vote); } else if (fsa_actions & A_LRM_EVENT) { do_fsa_action(fsa_data, A_LRM_EVENT, do_lrm_event); /* * High priority actions */ } else if (fsa_actions & A_STARTED) { do_fsa_action(fsa_data, A_STARTED, do_started); } else if (fsa_actions & A_CL_JOIN_QUERY) { do_fsa_action(fsa_data, A_CL_JOIN_QUERY, do_cl_join_query); } else if (fsa_actions & A_DC_TIMER_START) { do_fsa_action(fsa_data, A_DC_TIMER_START, do_timer_control); /* * Medium priority actions */ } else if (fsa_actions & A_DC_TAKEOVER) { do_fsa_action(fsa_data, A_DC_TAKEOVER, do_dc_takeover); } else if (fsa_actions & A_DC_RELEASE) { do_fsa_action(fsa_data, A_DC_RELEASE, do_dc_release); } else if (fsa_actions & A_DC_JOIN_FINAL) { do_fsa_action(fsa_data, A_DC_JOIN_FINAL, do_dc_join_final); } else if (fsa_actions & A_ELECTION_CHECK) { do_fsa_action(fsa_data, A_ELECTION_CHECK, do_election_check); } else if (fsa_actions & A_ELECTION_START) { do_fsa_action(fsa_data, A_ELECTION_START, do_election_vote); } else if (fsa_actions & A_TE_HALT) { do_fsa_action(fsa_data, A_TE_HALT, do_te_invoke); } else if (fsa_actions & A_TE_CANCEL) { do_fsa_action(fsa_data, A_TE_CANCEL, do_te_invoke); } else if (fsa_actions & A_DC_JOIN_OFFER_ALL) { do_fsa_action(fsa_data, A_DC_JOIN_OFFER_ALL, do_dc_join_offer_all); } else if (fsa_actions & A_DC_JOIN_OFFER_ONE) { do_fsa_action(fsa_data, A_DC_JOIN_OFFER_ONE, do_dc_join_offer_all); } else if (fsa_actions & A_DC_JOIN_PROCESS_REQ) { do_fsa_action(fsa_data, A_DC_JOIN_PROCESS_REQ, do_dc_join_filter_offer); } else if (fsa_actions & A_DC_JOIN_PROCESS_ACK) { do_fsa_action(fsa_data, A_DC_JOIN_PROCESS_ACK, do_dc_join_ack); /* * Low(er) priority actions * Make sure the CIB is always updated before invoking the * PE, and the PE before the TE */ } else if (fsa_actions & A_DC_JOIN_FINALIZE) { do_fsa_action(fsa_data, A_DC_JOIN_FINALIZE, do_dc_join_finalize); } else if (fsa_actions & A_LRM_INVOKE) { do_fsa_action(fsa_data, A_LRM_INVOKE, do_lrm_invoke); } else if (fsa_actions & A_PE_INVOKE) { do_fsa_action(fsa_data, A_PE_INVOKE, do_pe_invoke); } else if (fsa_actions & A_TE_INVOKE) { do_fsa_action(fsa_data, A_TE_INVOKE, do_te_invoke); } else if (fsa_actions & A_CL_JOIN_ANNOUNCE) { do_fsa_action(fsa_data, A_CL_JOIN_ANNOUNCE, do_cl_join_announce); /* sub-system stop */ } else if (fsa_actions & A_DC_RELEASED) { do_fsa_action(fsa_data, A_DC_RELEASED, do_dc_release); } else if (fsa_actions & A_PE_STOP) { do_fsa_action(fsa_data, A_PE_STOP, do_pe_control); } else if (fsa_actions & A_TE_STOP) { do_fsa_action(fsa_data, A_TE_STOP, do_te_control); } else if (fsa_actions & A_SHUTDOWN) { do_fsa_action(fsa_data, A_SHUTDOWN, do_shutdown); } else if (fsa_actions & A_LRM_DISCONNECT) { do_fsa_action(fsa_data, A_LRM_DISCONNECT, do_lrm_control); } else if (fsa_actions & A_CCM_DISCONNECT) { #if SUPPORT_HEARTBEAT if (is_heartbeat_cluster()) { do_fsa_action(fsa_data, A_CCM_DISCONNECT, do_ccm_control); } #endif fsa_actions &= ~A_CCM_DISCONNECT; } else if (fsa_actions & A_HA_DISCONNECT) { do_fsa_action(fsa_data, A_HA_DISCONNECT, do_ha_control); } else if (fsa_actions & A_CIB_STOP) { do_fsa_action(fsa_data, A_CIB_STOP, do_cib_control); } else if (fsa_actions & A_STOP) { do_fsa_action(fsa_data, A_STOP, do_stop); /* exit gracefully */ } else if (fsa_actions & A_EXIT_0) { do_fsa_action(fsa_data, A_EXIT_0, do_exit); /* Error checking and reporting */ } else { crm_err("Action %s (0x%llx) not supported ", fsa_action2string(fsa_actions), fsa_actions); register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, fsa_data, NULL, __FUNCTION__); } } } void log_fsa_input(fsa_data_t * stored_msg) { crm_trace("Processing queued input %d", stored_msg->id); if (stored_msg->fsa_cause == C_CCM_CALLBACK) { crm_trace("FSA processing CCM callback from %s", stored_msg->origin); } else if (stored_msg->fsa_cause == C_LRM_OP_CALLBACK) { crm_trace("FSA processing LRM callback from %s", stored_msg->origin); } else if (stored_msg->data == NULL) { crm_trace("FSA processing input from %s", stored_msg->origin); } else { ha_msg_input_t *ha_input = fsa_typed_data_adv(stored_msg, fsa_dt_ha_msg, __FUNCTION__); crm_trace("FSA processing XML message from %s", stored_msg->origin); crm_log_xml_trace(ha_input->xml, "FSA message data"); } } long long do_state_transition(long long actions, enum crmd_fsa_state cur_state, enum crmd_fsa_state next_state, fsa_data_t * msg_data) { int level = LOG_INFO; long long tmp = actions; gboolean clear_recovery_bit = TRUE; enum crmd_fsa_cause cause = msg_data->fsa_cause; enum crmd_fsa_input current_input = msg_data->fsa_input; const char *state_from = fsa_state2string(cur_state); const char *state_to = fsa_state2string(next_state); const char *input = fsa_input2string(current_input); CRM_LOG_ASSERT(cur_state != next_state); do_dot_log(DOT_PREFIX "\t%s -> %s [ label=%s cause=%s origin=%s ]", state_from, state_to, input, fsa_cause2string(cause), msg_data->origin); if(cur_state == S_IDLE || next_state == S_IDLE) { level = LOG_NOTICE; } else if(cur_state == S_NOT_DC || next_state == S_NOT_DC) { level = LOG_NOTICE; } else if(cur_state == S_ELECTION) { level = LOG_NOTICE; } else if(next_state == S_RECOVERY) { level = LOG_WARNING; } do_crm_log(level, "State transition %s -> %s [ input=%s cause=%s origin=%s ]", state_from, state_to, input, fsa_cause2string(cause), msg_data->origin); /* the last two clauses might cause trouble later */ if (election_timeout != NULL && next_state != S_ELECTION && cur_state != S_RELEASE_DC) { crm_timer_stop(election_timeout); /* } else { */ /* crm_timer_start(election_timeout); */ } #if 0 if ((fsa_input_register & R_SHUTDOWN)) { - set_bit_inplace(tmp, A_DC_TIMER_STOP); + set_bit(tmp, A_DC_TIMER_STOP); } #endif if (next_state == S_INTEGRATION) { - set_bit_inplace(tmp, A_INTEGRATE_TIMER_START); + set_bit(tmp, A_INTEGRATE_TIMER_START); } else { - set_bit_inplace(tmp, A_INTEGRATE_TIMER_STOP); + set_bit(tmp, A_INTEGRATE_TIMER_STOP); } if (next_state == S_FINALIZE_JOIN) { - set_bit_inplace(tmp, A_FINALIZE_TIMER_START); + set_bit(tmp, A_FINALIZE_TIMER_START); } else { - set_bit_inplace(tmp, A_FINALIZE_TIMER_STOP); + set_bit(tmp, A_FINALIZE_TIMER_STOP); } if (next_state != S_PENDING) { - set_bit_inplace(tmp, A_DC_TIMER_STOP); + set_bit(tmp, A_DC_TIMER_STOP); } if (next_state != S_ELECTION) { highest_born_on = 0; } if (next_state != S_IDLE) { crm_timer_stop(recheck_timer); } if (cur_state == S_FINALIZE_JOIN && next_state == S_POLICY_ENGINE) { populate_cib_nodes(FALSE); do_update_cib_nodes(TRUE, __FUNCTION__); } switch (next_state) { case S_PENDING: fsa_cib_conn->cmds->set_slave(fsa_cib_conn, cib_scope_local); /* fall through */ case S_ELECTION: crm_trace("Resetting our DC to NULL on transition to %s", fsa_state2string(next_state)); update_dc(NULL); break; case S_NOT_DC: election_trigger->counter = 0; if (is_set(fsa_input_register, R_SHUTDOWN)) { crm_info("(Re)Issuing shutdown request now" " that we have a new DC"); - set_bit_inplace(tmp, A_SHUTDOWN_REQ); + set_bit(tmp, A_SHUTDOWN_REQ); } CRM_LOG_ASSERT(fsa_our_dc != NULL); if (fsa_our_dc == NULL) { crm_err("Reached S_NOT_DC without a DC" " being recorded"); } break; case S_RECOVERY: clear_recovery_bit = FALSE; break; case S_FINALIZE_JOIN: CRM_LOG_ASSERT(AM_I_DC); if (cause == C_TIMER_POPPED) { crm_warn("Progressed to state %s after %s", fsa_state2string(next_state), fsa_cause2string(cause)); } if (g_hash_table_size(welcomed_nodes) > 0) { char *msg = strdup(" Welcome reply not received from"); crm_warn("%u cluster nodes failed to respond" " to the join offer.", g_hash_table_size(welcomed_nodes)); g_hash_table_foreach(welcomed_nodes, ghash_print_node, msg); free(msg); } else { crm_debug("All %d cluster nodes " "responded to the join offer.", g_hash_table_size(integrated_nodes)); } break; case S_POLICY_ENGINE: election_trigger->counter = 0; CRM_LOG_ASSERT(AM_I_DC); if (cause == C_TIMER_POPPED) { crm_info("Progressed to state %s after %s", fsa_state2string(next_state), fsa_cause2string(cause)); } if (g_hash_table_size(finalized_nodes) > 0) { char *msg = strdup(" Confirm not received from"); crm_err("%u cluster nodes failed to confirm" " their join.", g_hash_table_size(finalized_nodes)); g_hash_table_foreach(finalized_nodes, ghash_print_node, msg); free(msg); } else if (g_hash_table_size(confirmed_nodes) == crm_active_peers()) { crm_debug("All %u cluster nodes are" " eligible to run resources.", crm_active_peers()); } else if (g_hash_table_size(confirmed_nodes) > crm_active_peers()) { crm_err("We have more confirmed nodes than our membership does: %d vs. %d", g_hash_table_size(confirmed_nodes), crm_active_peers()); register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL); } else if (saved_ccm_membership_id != crm_peer_seq) { crm_info("Membership changed: %llu -> %llu - join restart", saved_ccm_membership_id, crm_peer_seq); register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL); } else { crm_warn("Only %u of %u cluster " "nodes are eligible to run resources - continue %d", g_hash_table_size(confirmed_nodes), crm_active_peers(), g_hash_table_size(welcomed_nodes)); } /* initialize_join(FALSE); */ break; case S_STOPPING: case S_TERMINATE: /* possibly redundant */ - set_bit_inplace(fsa_input_register, R_SHUTDOWN); + set_bit(fsa_input_register, R_SHUTDOWN); break; case S_IDLE: CRM_LOG_ASSERT(AM_I_DC); dump_rsc_info(); if (is_set(fsa_input_register, R_SHUTDOWN)) { crm_info("(Re)Issuing shutdown request now" " that we are the DC"); - set_bit_inplace(tmp, A_SHUTDOWN_REQ); + set_bit(tmp, A_SHUTDOWN_REQ); } if (recheck_timer->period_ms > 0) { crm_debug("Starting %s", get_timer_desc(recheck_timer)); crm_timer_start(recheck_timer); } break; default: break; } if (clear_recovery_bit && next_state != S_PENDING) { tmp &= ~A_RECOVER; } else if (clear_recovery_bit == FALSE) { tmp |= A_RECOVER; } if (tmp != actions) { /* fsa_dump_actions(actions ^ tmp, "New actions"); */ actions = tmp; } return actions; } void dump_rsc_info(void) { } void ghash_print_node(gpointer key, gpointer value, gpointer user_data) { const char *text = user_data; const char *uname = key; const char *value_s = value; crm_info("%s: %s %s", text, uname, value_s); } diff --git a/crmd/heartbeat.c b/crmd/heartbeat.c index b595a799cc..d5dfdc1b8a 100644 --- a/crmd/heartbeat.c +++ b/crmd/heartbeat.c @@ -1,553 +1,553 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ /* put these first so that uuid_t is defined without conflicts */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include void oc_ev_special(const oc_ev_t *, oc_ev_class_t, int); void ccm_event_detail(const oc_ev_membership_t * oc, oc_ed_t event); gboolean crmd_ha_msg_dispatch(ll_cluster_t * cluster_conn, gpointer user_data); void crmd_ccm_msg_callback(oc_ed_t event, void *cookie, size_t size, const void *data); int ccm_dispatch(gpointer user_data); #define CCM_EVENT_DETAIL 0 #define CCM_EVENT_DETAIL_PARTIAL 0 int (*ccm_api_callback_done) (void *cookie) = NULL; int (*ccm_api_handle_event) (const oc_ev_t * token) = NULL; static gboolean fsa_have_quorum = FALSE; static oc_ev_t *fsa_ev_token; static void *ccm_library = NULL; static int num_ccm_register_fails = 0; static int max_ccm_register_fails = 30; static void ccm_connection_destroy(void *userdata) { } /* A_CCM_CONNECT */ void do_ccm_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { static struct mainloop_fd_callbacks ccm_fd_callbacks = { .dispatch = ccm_dispatch, .destroy = ccm_connection_destroy, }; if (is_heartbeat_cluster()) { int (*ccm_api_register) (oc_ev_t ** token) = find_library_function(&ccm_library, CCM_LIBRARY, "oc_ev_register", 1); int (*ccm_api_set_callback) (const oc_ev_t * token, oc_ev_class_t class, oc_ev_callback_t * fn, oc_ev_callback_t ** prev_fn) = find_library_function(&ccm_library, CCM_LIBRARY, "oc_ev_set_callback", 1); void (*ccm_api_special) (const oc_ev_t *, oc_ev_class_t, int) = find_library_function(&ccm_library, CCM_LIBRARY, "oc_ev_special", 1); int (*ccm_api_activate) (const oc_ev_t * token, int *fd) = find_library_function(&ccm_library, CCM_LIBRARY, "oc_ev_activate", 1); int (*ccm_api_unregister) (oc_ev_t * token) = find_library_function(&ccm_library, CCM_LIBRARY, "oc_ev_unregister", 1); if (action & A_CCM_DISCONNECT) { - set_bit_inplace(fsa_input_register, R_CCM_DISCONNECTED); + set_bit(fsa_input_register, R_CCM_DISCONNECTED); (*ccm_api_unregister) (fsa_ev_token); } if (action & A_CCM_CONNECT) { int ret; int fsa_ev_fd; gboolean did_fail = FALSE; crm_trace("Registering with CCM"); - clear_bit_inplace(fsa_input_register, R_CCM_DISCONNECTED); + clear_bit(fsa_input_register, R_CCM_DISCONNECTED); ret = (*ccm_api_register) (&fsa_ev_token); if (ret != 0) { crm_warn("CCM registration failed"); did_fail = TRUE; } if (did_fail == FALSE) { crm_trace("Setting up CCM callbacks"); ret = (*ccm_api_set_callback) (fsa_ev_token, OC_EV_MEMB_CLASS, crmd_ccm_msg_callback, NULL); if (ret != 0) { crm_warn("CCM callback not set"); did_fail = TRUE; } } if (did_fail == FALSE) { (*ccm_api_special) (fsa_ev_token, OC_EV_MEMB_CLASS, 0 /*don't care */ ); crm_trace("Activating CCM token"); ret = (*ccm_api_activate) (fsa_ev_token, &fsa_ev_fd); if (ret != 0) { crm_warn("CCM Activation failed"); did_fail = TRUE; } } if (did_fail) { num_ccm_register_fails++; (*ccm_api_unregister) (fsa_ev_token); if (num_ccm_register_fails < max_ccm_register_fails) { crm_warn("CCM Connection failed" " %d times (%d max)", num_ccm_register_fails, max_ccm_register_fails); crm_timer_start(wait_timer); crmd_fsa_stall(NULL); return; } else { crm_err("CCM Activation failed %d (max) times", num_ccm_register_fails); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); return; } } crm_info("CCM connection established... waiting for first callback"); mainloop_add_fd("heartbeat-ccm", fsa_ev_fd, fsa_ev_token, &ccm_fd_callbacks); } } if (action & ~(A_CCM_CONNECT | A_CCM_DISCONNECT)) { crm_err("Unexpected action %s in %s", fsa_action2string(action), __FUNCTION__); } } void ccm_event_detail(const oc_ev_membership_t * oc, oc_ed_t event) { int lpc; gboolean member = FALSE; member = FALSE; crm_trace("-----------------------"); crm_info("%s: trans=%d, nodes=%d, new=%d, lost=%d n_idx=%d, " "new_idx=%d, old_idx=%d", ccm_event_name(event), oc->m_instance, oc->m_n_member, oc->m_n_in, oc->m_n_out, oc->m_memb_idx, oc->m_in_idx, oc->m_out_idx); # if !CCM_EVENT_DETAIL_PARTIAL for (lpc = 0; lpc < oc->m_n_member; lpc++) { crm_info("\tCURRENT: %s [nodeid=%d, born=%d]", oc->m_array[oc->m_memb_idx + lpc].node_uname, oc->m_array[oc->m_memb_idx + lpc].node_id, oc->m_array[oc->m_memb_idx + lpc].node_born_on); if (safe_str_eq(fsa_our_uname, oc->m_array[oc->m_memb_idx + lpc].node_uname)) { member = TRUE; } } if (member == FALSE) { crm_warn("MY NODE IS NOT IN CCM THE MEMBERSHIP LIST"); } # endif for (lpc = 0; lpc < (int)oc->m_n_in; lpc++) { crm_info("\tNEW: %s [nodeid=%d, born=%d]", oc->m_array[oc->m_in_idx + lpc].node_uname, oc->m_array[oc->m_in_idx + lpc].node_id, oc->m_array[oc->m_in_idx + lpc].node_born_on); } for (lpc = 0; lpc < (int)oc->m_n_out; lpc++) { crm_info("\tLOST: %s [nodeid=%d, born=%d]", oc->m_array[oc->m_out_idx + lpc].node_uname, oc->m_array[oc->m_out_idx + lpc].node_id, oc->m_array[oc->m_out_idx + lpc].node_born_on); } crm_trace("-----------------------"); } /* A_CCM_UPDATE_CACHE */ /* * Take the opportunity to update the node status in the CIB as well */ void do_ccm_update_cache(enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, oc_ed_t event, const oc_ev_membership_t * oc, xmlNode * xml) { unsigned long long instance = 0; unsigned int lpc = 0; if (is_heartbeat_cluster()) { CRM_ASSERT(oc != NULL); instance = oc->m_instance; } CRM_ASSERT(crm_peer_seq <= instance); switch (cur_state) { case S_STOPPING: case S_TERMINATE: case S_HALT: crm_debug("Ignoring %s CCM event %llu, we're in state %s", ccm_event_name(event), instance, fsa_state2string(cur_state)); return; case S_ELECTION: register_fsa_action(A_ELECTION_CHECK); break; default: break; } if (is_heartbeat_cluster()) { ccm_event_detail(oc, event); /*--*-- Recently Dead Member Nodes --*--*/ for (lpc = 0; lpc < oc->m_n_out; lpc++) { crm_update_ccm_node(oc, lpc + oc->m_out_idx, CRM_NODE_LOST, instance); } /*--*-- All Member Nodes --*--*/ for (lpc = 0; lpc < oc->m_n_member; lpc++) { crm_update_ccm_node(oc, lpc + oc->m_memb_idx, CRM_NODE_ACTIVE, instance); } } if (event == OC_EV_MS_EVICTED) { crm_update_peer(__FUNCTION__, 0, 0, 0, -1, 0, fsa_our_uuid, fsa_our_uname, NULL, CRM_NODE_EVICTED); /* todo: drop back to S_PENDING instead */ /* get out... NOW! * * go via the error recovery process so that HA will * restart us if required */ register_fsa_error_adv(cause, I_ERROR, NULL, NULL, __FUNCTION__); } post_cache_update(instance); return; } int ccm_dispatch(gpointer user_data) { int rc = 0; oc_ev_t *ccm_token = (oc_ev_t *) user_data; gboolean was_error = FALSE; crm_trace("Invoked"); if (ccm_api_handle_event == NULL) { ccm_api_handle_event = find_library_function(&ccm_library, CCM_LIBRARY, "oc_ev_handle_event", 1); } rc = (*ccm_api_handle_event) (ccm_token); if (rc != 0) { if (is_set(fsa_input_register, R_CCM_DISCONNECTED) == FALSE) { /* we signed out, so this is expected */ register_fsa_input(C_CCM_CALLBACK, I_ERROR, NULL); crm_err("CCM connection appears to have failed: rc=%d.", rc); } was_error = TRUE; } trigger_fsa(fsa_source); if(was_error) { return -1; } return 0; } void crmd_ccm_msg_callback(oc_ed_t event, void *cookie, size_t size, const void *data) { gboolean update_cache = FALSE; const oc_ev_membership_t *membership = data; gboolean update_quorum = FALSE; crm_trace("Invoked"); CRM_ASSERT(data != NULL); crm_info("Quorum %s after event=%s (id=%d)", ccm_have_quorum(event) ? "(re)attained" : "lost", ccm_event_name(event), membership->m_instance); if (crm_peer_seq > membership->m_instance) { crm_err("Membership instance ID went backwards! %llu->%d", crm_peer_seq, membership->m_instance); CRM_ASSERT(crm_peer_seq <= membership->m_instance); return; } /* * OC_EV_MS_NEW_MEMBERSHIP: membership with quorum * OC_EV_MS_MS_INVALID: membership without quorum * OC_EV_MS_NOT_PRIMARY: previous membership no longer valid * OC_EV_MS_PRIMARY_RESTORED: previous membership restored * OC_EV_MS_EVICTED: the client is evicted from ccm. */ switch (event) { case OC_EV_MS_NEW_MEMBERSHIP: case OC_EV_MS_INVALID: update_cache = TRUE; update_quorum = TRUE; break; case OC_EV_MS_NOT_PRIMARY: break; case OC_EV_MS_PRIMARY_RESTORED: update_cache = TRUE; crm_peer_seq = membership->m_instance; break; case OC_EV_MS_EVICTED: update_quorum = TRUE; register_fsa_input(C_FSA_INTERNAL, I_STOP, NULL); crm_err("Shutting down after CCM event: %s", ccm_event_name(event)); break; default: crm_err("Unknown CCM event: %d", event); } if (update_quorum) { crm_have_quorum = ccm_have_quorum(event); crm_update_quorum(crm_have_quorum, FALSE); if (crm_have_quorum == FALSE) { /* did we just loose quorum? */ if (fsa_have_quorum) { crm_info("Quorum lost: %s", ccm_event_name(event)); } } } if (update_cache) { crm_trace("Updating cache after event %s", ccm_event_name(event)); do_ccm_update_cache(C_CCM_CALLBACK, fsa_state, event, data, NULL); } else if (event != OC_EV_MS_NOT_PRIMARY) { crm_peer_seq = membership->m_instance; register_fsa_action(A_TE_CANCEL); } if (ccm_api_callback_done == NULL) { ccm_api_callback_done = find_library_function(&ccm_library, CCM_LIBRARY, "oc_ev_callback_done", 1); } (*ccm_api_callback_done) (cookie); return; } void crmd_ha_status_callback(const char *node, const char *status, void *private) { xmlNode *update = NULL; crm_node_t *peer = NULL; crm_notice("Status update: Node %s now has status [%s]", node, status); peer = crm_get_peer(0, node); if (safe_str_eq(status, PINGSTATUS)) { return; } if (safe_str_eq(status, DEADSTATUS)) { /* this node is toast */ crm_update_peer_proc(__FUNCTION__, peer, crm_proc_heartbeat, OFFLINESTATUS); if (AM_I_DC) { update = create_node_state(node, DEADSTATUS, XML_BOOLEAN_NO, OFFLINESTATUS, CRMD_JOINSTATE_DOWN, NULL, TRUE, __FUNCTION__); } } else { crm_update_peer_proc(__FUNCTION__, peer, crm_proc_heartbeat, ONLINESTATUS); if (AM_I_DC) { update = create_node_state(node, ACTIVESTATUS, NULL, NULL, CRMD_JOINSTATE_PENDING, NULL, FALSE, __FUNCTION__); } } trigger_fsa(fsa_source); if (update != NULL) { fsa_cib_anon_update(XML_CIB_TAG_STATUS, update, cib_scope_local | cib_quorum_override | cib_can_create); free_xml(update); } } void crmd_client_status_callback(const char *node, const char *client, const char *status, void *private) { const char *join = NULL; crm_node_t *peer = NULL; gboolean clear_shutdown = FALSE; crm_trace("Invoked"); if (safe_str_neq(client, CRM_SYSTEM_CRMD)) { return; } if (safe_str_eq(status, JOINSTATUS)) { clear_shutdown = TRUE; status = ONLINESTATUS; join = CRMD_JOINSTATE_PENDING; } else if (safe_str_eq(status, LEAVESTATUS)) { status = OFFLINESTATUS; join = CRMD_JOINSTATE_DOWN; /* clear_shutdown = TRUE; */ } - set_bit_inplace(fsa_input_register, R_PEER_DATA); + set_bit(fsa_input_register, R_PEER_DATA); crm_notice("Status update: Client %s/%s now has status [%s] (DC=%s)", node, client, status, AM_I_DC ? "true" : "false"); if (safe_str_eq(status, ONLINESTATUS)) { /* remove the cached value in case it changed */ crm_trace("Uncaching UUID for %s", node); unget_uuid(node); } peer = crm_get_peer(0, node); if (AM_I_DC) { xmlNode *update = NULL; crm_trace("Got client status callback"); update = create_node_state(node, NULL, NULL, status, join, NULL, clear_shutdown, __FUNCTION__); fsa_cib_anon_update(XML_CIB_TAG_STATUS, update, cib_scope_local | cib_quorum_override | cib_can_create); free_xml(update); } crm_update_peer_proc(__FUNCTION__, peer, crm_proc_crmd, status); } void crmd_ha_msg_callback(HA_Message * hamsg, void *private_data) { int level = LOG_DEBUG; crm_node_t *from_node = NULL; xmlNode *msg = convert_ha_message(NULL, hamsg, __FUNCTION__); const char *from = crm_element_value(msg, F_ORIG); const char *op = crm_element_value(msg, F_CRM_TASK); const char *sys_from = crm_element_value(msg, F_CRM_SYS_FROM); CRM_CHECK(from != NULL, crm_log_xml_err(msg, "anon"); goto bail); crm_trace("HA[inbound]: %s from %s", op, from); if (crm_peer_cache == NULL || crm_active_peers() == 0) { crm_debug("Ignoring HA messages until we are" " connected to the CCM (%s op from %s)", op, from); crm_log_xml_trace(msg, "HA[inbound]: Ignore (No CCM)"); goto bail; } from_node = crm_get_peer(0, from); if (crm_is_peer_active(from_node) == FALSE) { if (safe_str_eq(op, CRM_OP_VOTE)) { level = LOG_WARNING; } else if (AM_I_DC && safe_str_eq(op, CRM_OP_JOIN_ANNOUNCE)) { level = LOG_WARNING; } else if (safe_str_eq(sys_from, CRM_SYSTEM_DC)) { level = LOG_WARNING; } do_crm_log(level, "Ignoring HA message (op=%s) from %s: not in our" " membership list (size=%d)", op, from, crm_active_peers()); crm_log_xml_trace(msg, "HA[inbound]: CCM Discard"); } else { crmd_ha_msg_filter(msg); } bail: free_xml(msg); return; } gboolean crmd_ha_msg_dispatch(ll_cluster_t * cluster_conn, gpointer user_data) { IPC_Channel *channel = NULL; gboolean stay_connected = TRUE; crm_trace("Invoked"); if (cluster_conn != NULL) { channel = cluster_conn->llc_ops->ipcchan(cluster_conn); } CRM_CHECK(cluster_conn != NULL,;); CRM_CHECK(channel != NULL,;); if (channel != NULL && IPC_ISRCONN(channel)) { if (cluster_conn->llc_ops->msgready(cluster_conn) == 0) { crm_trace("no message ready yet"); } /* invoke the callbacks but dont block */ cluster_conn->llc_ops->rcvmsg(cluster_conn, 0); } if (channel == NULL || channel->ch_status != IPC_CONNECT) { if (is_set(fsa_input_register, R_HA_DISCONNECTED) == FALSE) { crm_crit("Lost connection to heartbeat service."); } else { crm_info("Lost connection to heartbeat service."); } trigger_fsa(fsa_source); stay_connected = FALSE; } return stay_connected; } diff --git a/crmd/join_dc.c b/crmd/join_dc.c index 298cced4f1..33d137b366 100644 --- a/crmd/join_dc.c +++ b/crmd/join_dc.c @@ -1,657 +1,657 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include GHashTable *welcomed_nodes = NULL; GHashTable *integrated_nodes = NULL; GHashTable *finalized_nodes = NULL; GHashTable *confirmed_nodes = NULL; char *max_epoch = NULL; char *max_generation_from = NULL; xmlNode *max_generation_xml = NULL; void initialize_join(gboolean before); gboolean finalize_join_for(gpointer key, gpointer value, gpointer user_data); void finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data); gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source); static int current_join_id = 0; unsigned long long saved_ccm_membership_id = 0; void initialize_join(gboolean before) { /* clear out/reset a bunch of stuff */ crm_debug("join-%d: Initializing join data (flag=%s)", current_join_id, before ? "true" : "false"); g_hash_table_destroy(welcomed_nodes); g_hash_table_destroy(integrated_nodes); g_hash_table_destroy(finalized_nodes); g_hash_table_destroy(confirmed_nodes); if (before) { if (max_generation_from != NULL) { free(max_generation_from); max_generation_from = NULL; } if (max_generation_xml != NULL) { free_xml(max_generation_xml); max_generation_xml = NULL; } - clear_bit_inplace(fsa_input_register, R_HAVE_CIB); - clear_bit_inplace(fsa_input_register, R_CIB_ASKED); + clear_bit(fsa_input_register, R_HAVE_CIB); + clear_bit(fsa_input_register, R_CIB_ASKED); } welcomed_nodes = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); integrated_nodes = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); finalized_nodes = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); confirmed_nodes = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); } void erase_node_from_join(const char *uname) { gboolean w = FALSE, i = FALSE, f = FALSE, c = FALSE; if (uname == NULL) { return; } if (welcomed_nodes != NULL) { w = g_hash_table_remove(welcomed_nodes, uname); } if (integrated_nodes != NULL) { i = g_hash_table_remove(integrated_nodes, uname); } if (finalized_nodes != NULL) { f = g_hash_table_remove(finalized_nodes, uname); } if (confirmed_nodes != NULL) { c = g_hash_table_remove(confirmed_nodes, uname); } if (w || i || f || c) { crm_debug("Removed node %s from join calculations:" " welcomed=%d itegrated=%d finalized=%d confirmed=%d", uname, w, i, f, c); } } static void join_make_offer(gpointer key, gpointer value, gpointer user_data) { const char *join_to = NULL; const crm_node_t *member = value; CRM_ASSERT(member != NULL); if (crm_is_peer_active(member) == FALSE) { crm_trace("Not making an offer to %s: not active", member->uname); return; } join_to = member->uname; if (join_to == NULL) { crm_err("No recipient for welcome message"); return; } erase_node_from_join(join_to); if (saved_ccm_membership_id != crm_peer_seq) { saved_ccm_membership_id = crm_peer_seq; crm_info("Making join offers based on membership %llu", crm_peer_seq); } if (crm_is_peer_active(member)) { xmlNode *offer = create_request(CRM_OP_JOIN_OFFER, NULL, join_to, CRM_SYSTEM_CRMD, CRM_SYSTEM_DC, NULL); char *join_offered = crm_itoa(current_join_id); crm_xml_add_int(offer, F_CRM_JOIN_ID, current_join_id); /* send the welcome */ crm_debug("join-%d: Sending offer to %s", current_join_id, join_to); send_cluster_message(join_to, crm_msg_crmd, offer, TRUE); free_xml(offer); g_hash_table_insert(welcomed_nodes, strdup(join_to), join_offered); } else { crm_info("Peer process on %s is not active (yet?): %.8lx %d", join_to, (long)member->processes, g_hash_table_size(crm_peer_cache)); } } /* A_DC_JOIN_OFFER_ALL */ void do_dc_join_offer_all(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { /* reset everyones status back to down or in_ccm in the CIB * * any nodes that are active in the CIB but not in the CCM list * will be seen as offline by the PE anyway */ current_join_id++; initialize_join(TRUE); /* do_update_cib_nodes(TRUE, __FUNCTION__); */ update_dc(NULL); if (cause == C_HA_MESSAGE && current_input == I_NODE_JOIN) { crm_info("A new node joined the cluster"); } g_hash_table_foreach(crm_peer_cache, join_make_offer, NULL); /* dont waste time by invoking the PE yet; */ crm_info("join-%d: Waiting on %d outstanding join acks", current_join_id, g_hash_table_size(welcomed_nodes)); } /* A_DC_JOIN_OFFER_ONE */ void do_dc_join_offer_one(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { crm_node_t *member; ha_msg_input_t *welcome = NULL; const char *op = NULL; const char *join_to = NULL; if (msg_data->data) { welcome = fsa_typed_data(fsa_dt_ha_msg); } else { crm_info("A new node joined - wait until it contacts us"); return; } if (welcome == NULL) { crm_err("Attempt to send welcome message without a message to reply to!"); return; } join_to = crm_element_value(welcome->msg, F_CRM_HOST_FROM); if (join_to == NULL) { crm_err("Attempt to send welcome message without a host to reply to!"); return; } member = crm_get_peer(0, join_to); if (crm_is_peer_active(member) == FALSE) { crm_err("%s is not a fully active member of our partition", join_to); return; } op = crm_element_value(welcome->msg, F_CRM_TASK); if (join_to != NULL && (cur_state == S_INTEGRATION || cur_state == S_FINALIZE_JOIN)) { /* note: it _is_ possible that a node will have been * sick or starting up when the original offer was made. * however, it will either re-announce itself in due course * _or_ we can re-store the original offer on the client. */ crm_trace("(Re-)offering membership to %s...", join_to); } crm_info("join-%d: Processing %s request from %s in state %s", current_join_id, op, join_to, fsa_state2string(cur_state)); join_make_offer(NULL, member, NULL); /* always offer to the DC (ourselves) * this ensures the correct value for max_generation_from */ member = crm_get_peer(0, fsa_our_uname); join_make_offer(NULL, member, NULL); /* this was a genuine join request, cancel any existing * transition and invoke the PE */ start_transition(fsa_state); /* dont waste time by invoking the pe yet; */ crm_debug("Waiting on %d outstanding join acks for join-%d", g_hash_table_size(welcomed_nodes), current_join_id); } static int compare_int_fields(xmlNode * left, xmlNode * right, const char *field) { const char *elem_l = crm_element_value(left, field); const char *elem_r = crm_element_value(right, field); int int_elem_l = crm_int_helper(elem_l, NULL); int int_elem_r = crm_int_helper(elem_r, NULL); if (int_elem_l < int_elem_r) { return -1; } else if (int_elem_l > int_elem_r) { return 1; } return 0; } /* A_DC_JOIN_PROCESS_REQ */ void do_dc_join_filter_offer(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { xmlNode *generation = NULL; int cmp = 0; int join_id = -1; gboolean ack_nack_bool = TRUE; const char *ack_nack = CRMD_JOINSTATE_MEMBER; ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg); const char *join_from = crm_element_value(join_ack->msg, F_CRM_HOST_FROM); const char *ref = crm_element_value(join_ack->msg, XML_ATTR_REFERENCE); crm_node_t *join_node = crm_get_peer(0, join_from); crm_debug("Processing req from %s", join_from); generation = join_ack->xml; crm_element_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id); if (max_generation_xml != NULL && generation != NULL) { int lpc = 0; const char *attributes[] = { XML_ATTR_GENERATION_ADMIN, XML_ATTR_GENERATION, XML_ATTR_NUMUPDATES, }; for (lpc = 0; cmp == 0 && lpc < DIMOF(attributes); lpc++) { cmp = compare_int_fields(max_generation_xml, generation, attributes[lpc]); } } if (join_id != current_join_id) { crm_debug("Invalid response from %s: join-%d vs. join-%d", join_from, join_id, current_join_id); check_join_state(cur_state, __FUNCTION__); return; } else if (join_node == NULL || crm_is_peer_active(join_node) == FALSE) { crm_err("Node %s is not a member", join_from); ack_nack_bool = FALSE; } else if (generation == NULL) { crm_err("Generation was NULL"); ack_nack_bool = FALSE; } else if (max_generation_xml == NULL) { max_generation_xml = copy_xml(generation); max_generation_from = strdup(join_from); } else if (cmp < 0 || (cmp == 0 && safe_str_eq(join_from, fsa_our_uname))) { crm_debug("%s has a better generation number than" " the current max %s", join_from, max_generation_from); if (max_generation_xml) { crm_log_xml_debug(max_generation_xml, "Max generation"); } crm_log_xml_debug(generation, "Their generation"); free(max_generation_from); free_xml(max_generation_xml); max_generation_from = strdup(join_from); max_generation_xml = copy_xml(join_ack->xml); } if (ack_nack_bool == FALSE) { /* NACK this client */ ack_nack = CRMD_JOINSTATE_NACK; crm_err("join-%d: NACK'ing node %s (ref %s)", join_id, join_from, ref); } else { crm_debug("join-%d: Welcoming node %s (ref %s)", join_id, join_from, ref); } /* add them to our list of CRMD_STATE_ACTIVE nodes */ g_hash_table_insert(integrated_nodes, strdup(join_from), strdup(ack_nack)); crm_debug("%u nodes have been integrated into join-%d", g_hash_table_size(integrated_nodes), join_id); g_hash_table_remove(welcomed_nodes, join_from); if (check_join_state(cur_state, __FUNCTION__) == FALSE) { /* dont waste time by invoking the PE yet; */ crm_debug("join-%d: Still waiting on %d outstanding offers", join_id, g_hash_table_size(welcomed_nodes)); } } /* A_DC_JOIN_FINALIZE */ void do_dc_join_finalize(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { char *sync_from = NULL; int rc = pcmk_ok; /* This we can do straight away and avoid clients timing us out * while we compute the latest CIB */ crm_debug("Finializing join-%d for %d clients", current_join_id, g_hash_table_size(integrated_nodes)); if (g_hash_table_size(integrated_nodes) == 0) { /* If we don't even have ourself, start again */ register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION_DC, NULL, NULL, __FUNCTION__); return; } - clear_bit_inplace(fsa_input_register, R_HAVE_CIB); + clear_bit(fsa_input_register, R_HAVE_CIB); if (max_generation_from == NULL || safe_str_eq(max_generation_from, fsa_our_uname)) { - set_bit_inplace(fsa_input_register, R_HAVE_CIB); + set_bit(fsa_input_register, R_HAVE_CIB); } if (is_set(fsa_input_register, R_IN_TRANSITION)) { crm_warn("join-%d: We are still in a transition." " Delaying until the TE completes.", current_join_id); crmd_fsa_stall(NULL); return; } if (is_set(fsa_input_register, R_HAVE_CIB) == FALSE) { /* ask for the agreed best CIB */ sync_from = strdup(max_generation_from); crm_log_xml_debug(max_generation_xml, "Requesting version"); - set_bit_inplace(fsa_input_register, R_CIB_ASKED); + set_bit(fsa_input_register, R_CIB_ASKED); } else { /* Send _our_ CIB out to everyone */ sync_from = strdup(fsa_our_uname); } crm_info("join-%d: Syncing the CIB from %s to the rest of the cluster", current_join_id, sync_from); rc = fsa_cib_conn->cmds->sync_from(fsa_cib_conn, sync_from, NULL, cib_quorum_override); fsa_cib_conn->cmds->register_callback(fsa_cib_conn, rc, 60, FALSE, sync_from, "finalize_sync_callback", finalize_sync_callback); } void finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { CRM_LOG_ASSERT(-EPERM != rc); - clear_bit_inplace(fsa_input_register, R_CIB_ASKED); + clear_bit(fsa_input_register, R_CIB_ASKED); if (rc != pcmk_ok) { do_crm_log((rc == -pcmk_err_old_data ? LOG_WARNING : LOG_ERR), "Sync from %s failed: %s", (char *)user_data, pcmk_strerror(rc)); /* restart the whole join process */ register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION_DC, NULL, NULL, __FUNCTION__); } else if (AM_I_DC && fsa_state == S_FINALIZE_JOIN) { - set_bit_inplace(fsa_input_register, R_HAVE_CIB); - clear_bit_inplace(fsa_input_register, R_CIB_ASKED); + set_bit(fsa_input_register, R_HAVE_CIB); + clear_bit(fsa_input_register, R_CIB_ASKED); /* make sure dc_uuid is re-set to us */ if (check_join_state(fsa_state, __FUNCTION__) == FALSE) { crm_debug("Notifying %d clients of join-%d results", g_hash_table_size(integrated_nodes), current_join_id); g_hash_table_foreach_remove(integrated_nodes, finalize_join_for, NULL); } } else { crm_debug("No longer the DC in S_FINALIZE_JOIN: %s/%s", AM_I_DC ? "DC" : "CRMd", fsa_state2string(fsa_state)); } free(user_data); } static void join_update_complete_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { fsa_data_t *msg_data = NULL; if (rc == pcmk_ok) { crm_debug("Join update %d complete", call_id); check_join_state(fsa_state, __FUNCTION__); } else { crm_err("Join update %d failed", call_id); crm_log_xml_debug(msg, "failed"); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } /* A_DC_JOIN_PROCESS_ACK */ void do_dc_join_ack(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { int join_id = -1; int call_id = 0; ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg); const char *join_id_s = NULL; const char *join_state = NULL; const char *op = crm_element_value(join_ack->msg, F_CRM_TASK); const char *join_from = crm_element_value(join_ack->msg, F_CRM_HOST_FROM); if (safe_str_neq(op, CRM_OP_JOIN_CONFIRM)) { crm_debug("Ignoring op=%s message from %s", op, join_from); return; } crm_element_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id); join_id_s = crm_element_value(join_ack->msg, F_CRM_JOIN_ID); /* now update them to "member" */ crm_trace("Processing ack from %s", join_from); join_state = (const char *) g_hash_table_lookup(finalized_nodes, join_from); if (join_state == NULL) { crm_err("Join not in progress: ignoring join-%d from %s", join_id, join_from); return; } else if (safe_str_neq(join_state, CRMD_JOINSTATE_MEMBER)) { crm_err("Node %s wasnt invited to join the cluster", join_from); g_hash_table_remove(finalized_nodes, join_from); return; } else if (join_id != current_join_id) { crm_err("Invalid response from %s: join-%d vs. join-%d", join_from, join_id, current_join_id); g_hash_table_remove(finalized_nodes, join_from); return; } g_hash_table_remove(finalized_nodes, join_from); if (g_hash_table_lookup(confirmed_nodes, join_from) != NULL) { crm_err("join-%d: hash already contains confirmation from %s", join_id, join_from); } g_hash_table_insert(confirmed_nodes, strdup(join_from), strdup(join_id_s)); crm_info("join-%d: Updating node state to %s for %s", join_id, CRMD_JOINSTATE_MEMBER, join_from); /* update CIB with the current LRM status from the node * We dont need to notify the TE of these updates, a transition will * be started in due time */ erase_status_tag(join_from, XML_CIB_TAG_LRM, cib_scope_local); fsa_cib_update(XML_CIB_TAG_STATUS, join_ack->xml, cib_scope_local | cib_quorum_override | cib_can_create, call_id, NULL); add_cib_op_callback(fsa_cib_conn, call_id, FALSE, NULL, join_update_complete_callback); crm_debug("join-%d: Registered callback for LRM update %d", join_id, call_id); } gboolean finalize_join_for(gpointer key, gpointer value, gpointer user_data) { const char *join_to = NULL; const char *join_state = NULL; xmlNode *acknak = NULL; crm_node_t *join_node = NULL; if (key == NULL || value == NULL) { return TRUE; } join_to = (const char *)key; join_state = (const char *)value; /* make sure the node exists in the config section */ create_node_entry(join_to, join_to, NORMALNODE); join_node = crm_get_peer(0, join_to); if (crm_is_peer_active(join_node) == FALSE) { /* * NACK'ing nodes that the membership layer doesn't know about yet * simply creates more churn * * Better to leave them waiting and let the join restart when * the new membership event comes in * * All other NACKs (due to versions etc) should still be processed */ return TRUE; } /* send the ack/nack to the node */ acknak = create_request(CRM_OP_JOIN_ACKNAK, NULL, join_to, CRM_SYSTEM_CRMD, CRM_SYSTEM_DC, NULL); crm_xml_add_int(acknak, F_CRM_JOIN_ID, current_join_id); /* set the ack/nack */ if (safe_str_eq(join_state, CRMD_JOINSTATE_MEMBER)) { crm_debug("join-%d: ACK'ing join request from %s, state %s", current_join_id, join_to, join_state); crm_xml_add(acknak, CRM_OP_JOIN_ACKNAK, XML_BOOLEAN_TRUE); g_hash_table_insert(finalized_nodes, strdup(join_to), strdup(CRMD_JOINSTATE_MEMBER)); } else { crm_warn("join-%d: NACK'ing join request from %s, state %s", current_join_id, join_to, join_state); crm_xml_add(acknak, CRM_OP_JOIN_ACKNAK, XML_BOOLEAN_FALSE); } send_cluster_message(join_to, crm_msg_crmd, acknak, TRUE); free_xml(acknak); return TRUE; } void ghash_print_node(gpointer key, gpointer value, gpointer user_data); gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source) { crm_debug("Invoked by %s in state: %s", source, fsa_state2string(cur_state)); if (saved_ccm_membership_id != crm_peer_seq) { crm_debug("%s: Membership changed since join started: %llu -> %llu", source, saved_ccm_membership_id, crm_peer_seq); register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL); } else if (cur_state == S_INTEGRATION) { if (g_hash_table_size(welcomed_nodes) == 0) { crm_debug("join-%d: Integration of %d peers complete: %s", current_join_id, g_hash_table_size(integrated_nodes), source); register_fsa_input_before(C_FSA_INTERNAL, I_INTEGRATED, NULL); return TRUE; } } else if (cur_state == S_FINALIZE_JOIN) { if (is_set(fsa_input_register, R_HAVE_CIB) == FALSE) { crm_debug("join-%d: Delaying I_FINALIZED until we have the CIB", current_join_id); return TRUE; } else if (g_hash_table_size(integrated_nodes) == 0 && g_hash_table_size(finalized_nodes) == 0) { crm_debug("join-%d complete: %s", current_join_id, source); register_fsa_input_later(C_FSA_INTERNAL, I_FINALIZED, NULL); } else if (g_hash_table_size(integrated_nodes) != 0 && g_hash_table_size(finalized_nodes) != 0) { char *msg = NULL; crm_err("join-%d: Waiting on %d integrated nodes" " AND %d finalized nodes", current_join_id, g_hash_table_size(integrated_nodes), g_hash_table_size(finalized_nodes)); msg = strdup("Integrated node"); g_hash_table_foreach(integrated_nodes, ghash_print_node, msg); free(msg); msg = strdup("Finalized node"); g_hash_table_foreach(finalized_nodes, ghash_print_node, msg); free(msg); } else if (g_hash_table_size(integrated_nodes) != 0) { crm_debug("join-%d: Still waiting on %d integrated nodes", current_join_id, g_hash_table_size(integrated_nodes)); } else if (g_hash_table_size(finalized_nodes) != 0) { crm_debug("join-%d: Still waiting on %d finalized nodes", current_join_id, g_hash_table_size(finalized_nodes)); } } return FALSE; } void do_dc_join_final(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { crm_debug("Ensuring DC, quorum and node attributes are up-to-date"); update_attrd(NULL, NULL, NULL, NULL); crm_update_quorum(crm_have_quorum, TRUE); } diff --git a/crmd/lrm.c b/crmd/lrm.c index f682120f62..a593d05e89 100644 --- a/crmd/lrm.c +++ b/crmd/lrm.c @@ -1,2000 +1,2000 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define START_DELAY_THRESHOLD 5 * 60 * 1000 typedef struct resource_history_s { char *id; lrmd_rsc_info_t rsc; lrmd_event_data_t *last; lrmd_event_data_t *failed; GList *recurring_op_list; /* Resources must be stopped using the same * parameters they were started with. This hashtable * holds the parameters that should be used for the next stop * cmd on this resource. */ GHashTable *stop_params; } rsc_history_t; struct recurring_op_s { char *rsc_id; char *op_type; char *op_key; int call_id; int interval; int last_rc; int last_op_status; gboolean remove; gboolean cancelled; }; struct pending_deletion_op_s { char *rsc; ha_msg_input_t *input; }; struct delete_event_s { int rc; const char *rsc; }; GHashTable *resource_history = NULL; GHashTable *pending_ops = NULL; GHashTable *deletion_ops = NULL; int num_lrm_register_fails = 0; int max_lrm_register_fails = 30; gboolean process_lrm_event(lrmd_event_data_t * op); gboolean is_rsc_active(const char *rsc_id); gboolean build_active_RAs(xmlNode * rsc_list); static gboolean stop_recurring_actions(gpointer key, gpointer value, gpointer user_data); static int delete_rsc_status(const char *rsc_id, int call_options, const char *user_name); static lrmd_event_data_t *construct_op(xmlNode * rsc_op, const char *rsc_id, const char *operation); void do_lrm_rsc_op(lrmd_rsc_info_t * rsc, const char *operation, xmlNode * msg, xmlNode * request); void send_direct_ack(const char *to_host, const char *to_sys, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op, const char *rsc_id); static void lrm_connection_destroy(void) { if (is_set(fsa_input_register, R_LRM_CONNECTED)) { crm_crit("LRM Connection failed"); register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL); - clear_bit_inplace(fsa_input_register, R_LRM_CONNECTED); + clear_bit(fsa_input_register, R_LRM_CONNECTED); } else { crm_info("LRM Connection disconnected"); } } static void free_deletion_op(gpointer value) { struct pending_deletion_op_s *op = value; free(op->rsc); delete_ha_msg_input(op->input); free(op); } static void free_recurring_op(gpointer value) { struct recurring_op_s *op = (struct recurring_op_s *)value; free(op->rsc_id); free(op->op_type); free(op->op_key); free(op); } static char * make_stop_id(const char *rsc, int call_id) { char *op_id = NULL; op_id = calloc(1, strlen(rsc) + 34); if (op_id != NULL) { snprintf(op_id, strlen(rsc) + 34, "%s:%d", rsc, call_id); } return op_id; } static void copy_instance_keys(gpointer key, gpointer value, gpointer user_data) { if (strstr(key, CRM_META "_") == NULL) { g_hash_table_replace(user_data, strdup((const char *)key), strdup((const char *)value)); } } static void copy_meta_keys(gpointer key, gpointer value, gpointer user_data) { if (strstr(key, CRM_META "_") != NULL) { g_hash_table_replace(user_data, strdup((const char *)key), strdup((const char *)value)); } } static void history_cache_destroy(gpointer data) { rsc_history_t *entry = data; if (entry->stop_params) { g_hash_table_destroy(entry->stop_params); } free(entry->rsc.type); free(entry->rsc.class); free(entry->rsc.provider); lrmd_free_event(entry->failed); lrmd_free_event(entry->last); free(entry->id); free(entry); } static void update_history_cache(lrmd_rsc_info_t * rsc, lrmd_event_data_t * op) { int target_rc = 0; rsc_history_t *entry = NULL; if (op->rsc_deleted) { crm_debug("Purged history for '%s' after %s", op->rsc_id, op->op_type); delete_rsc_status(op->rsc_id, cib_quorum_override, NULL); return; } if (safe_str_eq(op->op_type, RSC_NOTIFY)) { return; } crm_debug("Updating history for '%s' with %s op", op->rsc_id, op->op_type); entry = g_hash_table_lookup(resource_history, op->rsc_id); if (entry == NULL && rsc) { entry = calloc(1, sizeof(rsc_history_t)); entry->id = strdup(op->rsc_id); g_hash_table_insert(resource_history, entry->id, entry); entry->rsc.id = entry->id; entry->rsc.type = strdup(rsc->type); entry->rsc.class = strdup(rsc->class); if (rsc->provider) { entry->rsc.provider = strdup(rsc->provider); } else { entry->rsc.provider = NULL; } } else if (entry == NULL) { crm_info("Resource %s no longer exists, not updating cache", op->rsc_id); return; } target_rc = rsc_op_expected_rc(op); if (op->op_status == PCMK_LRM_OP_CANCELLED) { if (op->interval > 0) { GList *gIter, *gIterNext; crm_trace("Removing cancelled recurring op: %s_%s_%d", op->rsc_id, op->op_type, op->interval); for (gIter = entry->recurring_op_list; gIter != NULL; gIter = gIterNext) { lrmd_event_data_t *existing = gIter->data; gIterNext = gIter->next; if (safe_str_eq(op->rsc_id, existing->rsc_id) && safe_str_eq(op->op_type, existing->op_type) && op->interval == existing->interval) { lrmd_free_event(existing); entry->recurring_op_list = g_list_delete_link(entry->recurring_op_list, gIter); } } return; } else { crm_trace("Skipping %s_%s_%d rc=%d, status=%d", op->rsc_id, op->op_type, op->interval, op->rc, op->op_status); } } else if (did_rsc_op_fail(op, target_rc)) { /* We must store failed monitors here * - otherwise the block below will cause them to be forgetten them when a stop happens */ if (entry->failed) { lrmd_free_event(entry->failed); } entry->failed = lrmd_copy_event(op); } else if (op->interval == 0) { if (entry->last) { lrmd_free_event(entry->last); } entry->last = lrmd_copy_event(op); if (op->params && (safe_str_eq(CRMD_ACTION_START, op->op_type) || safe_str_eq(CRMD_ACTION_STATUS, op->op_type))) { if (entry->stop_params) { g_hash_table_destroy(entry->stop_params); } entry->stop_params = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); g_hash_table_foreach(op->params, copy_instance_keys, entry->stop_params); } } if (op->interval > 0) { crm_trace("Adding recurring op: %s_%s_%d", op->rsc_id, op->op_type, op->interval); entry->recurring_op_list = g_list_prepend(entry->recurring_op_list, lrmd_copy_event(op)); } else if (entry->recurring_op_list && safe_str_eq(op->op_type, RSC_STATUS) == FALSE) { GList *gIter = entry->recurring_op_list; crm_trace("Dropping %d recurring ops because of: %s_%s_%d", g_list_length(gIter), op->rsc_id, op->op_type, op->interval); for (; gIter != NULL; gIter = gIter->next) { lrmd_free_event(gIter->data); } g_list_free(entry->recurring_op_list); entry->recurring_op_list = NULL; } } /* A_LRM_CONNECT */ void do_lrm_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { if (fsa_lrm_conn == NULL) { register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); return; } if (action & A_LRM_DISCONNECT) { if (verify_stopped(cur_state, LOG_INFO) == FALSE) { if(action == A_LRM_DISCONNECT) { crmd_fsa_stall(NULL); return; } } if (is_set(fsa_input_register, R_LRM_CONNECTED)) { - clear_bit_inplace(fsa_input_register, R_LRM_CONNECTED); + clear_bit(fsa_input_register, R_LRM_CONNECTED); fsa_lrm_conn->cmds->disconnect(fsa_lrm_conn); crm_info("Disconnected from the LRM"); } g_hash_table_destroy(resource_history); resource_history = NULL; g_hash_table_destroy(deletion_ops); deletion_ops = NULL; g_hash_table_destroy(pending_ops); pending_ops = NULL; } if (action & A_LRM_CONNECT) { int ret = pcmk_ok; deletion_ops = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, free_deletion_op); pending_ops = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, free_recurring_op); resource_history = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, history_cache_destroy); crm_debug("Connecting to the LRM"); ret = fsa_lrm_conn->cmds->connect(fsa_lrm_conn, CRM_SYSTEM_CRMD, NULL); if (ret != pcmk_ok) { if (++num_lrm_register_fails < max_lrm_register_fails) { crm_warn("Failed to sign on to the LRM %d" " (%d max) times", num_lrm_register_fails, max_lrm_register_fails); crm_timer_start(wait_timer); crmd_fsa_stall(NULL); return; } } if (ret == pcmk_ok) { crm_trace("LRM: set_lrm_callback..."); fsa_lrm_conn->cmds->set_callback(fsa_lrm_conn, lrm_op_callback); } if (ret != pcmk_ok) { crm_err("Failed to sign on to the LRM %d" " (max) times", num_lrm_register_fails); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); return; } - set_bit_inplace(fsa_input_register, R_LRM_CONNECTED); + set_bit(fsa_input_register, R_LRM_CONNECTED); crm_debug("LRM connection established"); } if (action & ~(A_LRM_CONNECT | A_LRM_DISCONNECT)) { crm_err("Unexpected action %s in %s", fsa_action2string(action), __FUNCTION__); } } static void ghash_print_pending(gpointer key, gpointer value, gpointer user_data) { const char *stop_id = key; int *log_level = user_data; struct recurring_op_s *pending = value; do_crm_log(*log_level, "Pending action: %s (%s)", stop_id, pending->op_key); } static void ghash_print_pending_for_rsc(gpointer key, gpointer value, gpointer user_data) { const char *stop_id = key; char *rsc = user_data; struct recurring_op_s *pending = value; if (safe_str_eq(rsc, pending->rsc_id)) { crm_notice("%sction %s (%s) incomplete at shutdown", pending->interval == 0 ? "A" : "Recurring a", stop_id, pending->op_key); } } static void ghash_count_pending(gpointer key, gpointer value, gpointer user_data) { int *counter = user_data; struct recurring_op_s *pending = value; if (pending->interval > 0) { /* Ignore recurring actions in the shutdown calculations */ return; } (*counter)++; } gboolean verify_stopped(enum crmd_fsa_state cur_state, int log_level) { int counter = 0; gboolean rc = TRUE; GHashTableIter gIter; rsc_history_t *entry = NULL; crm_debug("Checking for active resources before exit"); if (cur_state == S_TERMINATE) { log_level = LOG_ERR; } if (pending_ops) { if (is_set(fsa_input_register, R_LRM_CONNECTED)) { /* Only log/complain about non-recurring actions */ g_hash_table_foreach_remove(pending_ops, stop_recurring_actions, NULL); } g_hash_table_foreach(pending_ops, ghash_count_pending, &counter); } if (counter > 0) { rc = FALSE; do_crm_log(log_level, "%d pending LRM operations at shutdown%s", counter, cur_state == S_TERMINATE ? "" : "... waiting"); if (cur_state == S_TERMINATE || !is_set(fsa_input_register, R_SENT_RSC_STOP)) { g_hash_table_foreach(pending_ops, ghash_print_pending, &log_level); } goto bail; } if (resource_history == NULL) { goto bail; } g_hash_table_iter_init(&gIter, resource_history); while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) { if (is_rsc_active(entry->id) == FALSE) { continue; } crm_err("Resource %s was active at shutdown." " You may ignore this error if it is unmanaged.", entry->id); g_hash_table_foreach(pending_ops, ghash_print_pending_for_rsc, entry->id); } bail: - set_bit_inplace(fsa_input_register, R_SENT_RSC_STOP); + set_bit(fsa_input_register, R_SENT_RSC_STOP); if (cur_state == S_TERMINATE) { rc = TRUE; } return rc; } static char * get_rsc_metadata(const char *type, const char *class, const char *provider) { char *metadata = NULL; CRM_CHECK(type != NULL, return NULL); CRM_CHECK(class != NULL, return NULL); if (provider == NULL) { provider = "heartbeat"; } crm_trace("Retreiving metadata for %s::%s:%s", type, class, provider); fsa_lrm_conn->cmds->get_metadata(fsa_lrm_conn, class, provider, type, &metadata, 0); if (metadata) { /* copy the metadata because the LRM likes using * g_alloc instead of cl_malloc */ char *m_copy = strdup(metadata); g_free(metadata); metadata = m_copy; } else { crm_warn("No metadata found for %s::%s:%s", type, class, provider); } return metadata; } typedef struct reload_data_s { char *key; char *metadata; time_t last_query; gboolean can_reload; GListPtr restart_list; } reload_data_t; static void g_hash_destroy_reload(gpointer data) { reload_data_t *reload = data; free(reload->key); free(reload->metadata); g_list_free_full(reload->restart_list, free); free(reload); } GHashTable *reload_hash = NULL; static GListPtr get_rsc_restart_list(lrmd_rsc_info_t * rsc, lrmd_event_data_t * op) { int len = 0; char *key = NULL; char *copy = NULL; const char *value = NULL; const char *provider = NULL; xmlNode *param = NULL; xmlNode *params = NULL; xmlNode *actions = NULL; xmlNode *metadata = NULL; time_t now = time(NULL); reload_data_t *reload = NULL; if (reload_hash == NULL) { reload_hash = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, g_hash_destroy_reload); } provider = rsc->provider; if (provider == NULL) { provider = "heartbeat"; } len = strlen(rsc->type) + strlen(rsc->class) + strlen(provider) + 4; key = malloc( len); snprintf(key, len, "%s::%s:%s", rsc->type, rsc->class, provider); reload = g_hash_table_lookup(reload_hash, key); if (reload && ((now - 9) > reload->last_query) && safe_str_eq(op->op_type, RSC_START)) { reload = NULL; /* re-query */ } if (reload == NULL) { xmlNode *action = NULL; reload = calloc(1, sizeof(reload_data_t)); g_hash_table_replace(reload_hash, key, reload); reload->last_query = now; reload->key = key; key = NULL; reload->metadata = get_rsc_metadata(rsc->type, rsc->class, provider); metadata = string2xml(reload->metadata); if (metadata == NULL) { crm_err("Metadata for %s::%s:%s is not valid XML", rsc->provider, rsc->class, rsc->type); goto cleanup; } actions = find_xml_node(metadata, "actions", TRUE); for (action = __xml_first_child(actions); action != NULL; action = __xml_next(action)) { if (crm_str_eq((const char *)action->name, "action", TRUE)) { value = crm_element_value(action, "name"); if (safe_str_eq("reload", value)) { reload->can_reload = TRUE; break; } } } if (reload->can_reload == FALSE) { goto cleanup; } params = find_xml_node(metadata, "parameters", TRUE); for (param = __xml_first_child(params); param != NULL; param = __xml_next(param)) { if (crm_str_eq((const char *)param->name, "parameter", TRUE)) { value = crm_element_value(param, "unique"); if (crm_is_true(value)) { value = crm_element_value(param, "name"); if (value == NULL) { crm_err("%s: NULL param", key); continue; } crm_debug("Attr %s is not reloadable", value); copy = strdup(value); CRM_CHECK(copy != NULL, continue); reload->restart_list = g_list_append(reload->restart_list, copy); } } } } cleanup: free(key); free_xml(metadata); return reload->restart_list; } static void append_restart_list(lrmd_rsc_info_t * rsc, lrmd_event_data_t * op, xmlNode * update, const char *version) { int len = 0; char *list = NULL; char *digest = NULL; const char *value = NULL; gboolean non_empty = FALSE; xmlNode *restart = NULL; GListPtr restart_list = NULL; GListPtr lpc = NULL; if (op->interval > 0) { /* monitors are not reloadable */ return; } else if (op->params == NULL) { crm_debug("%s has no parameters", ID(update)); return; } else if (rsc == NULL) { return; } else if (crm_str_eq(CRMD_ACTION_STOP, op->op_type, TRUE)) { /* Stopped resources don't need to be reloaded */ return; } else if (compare_version("1.0.8", version) > 0) { /* Caller version does not support reloads */ return; } restart_list = get_rsc_restart_list(rsc, op); if (restart_list == NULL) { /* Resource does not support reloads */ return; } restart = create_xml_node(NULL, XML_TAG_PARAMS); for (lpc = restart_list; lpc != NULL; lpc = lpc->next) { const char *param = (const char *)lpc->data; int start = len; CRM_CHECK(param != NULL, continue); value = g_hash_table_lookup(op->params, param); if (value != NULL) { non_empty = TRUE; crm_xml_add(restart, param, value); } len += strlen(param) + 2; list = realloc(list, len + 1); sprintf(list + start, " %s ", param); } digest = calculate_operation_digest(restart, version); crm_xml_add(update, XML_LRM_ATTR_OP_RESTART, list); crm_xml_add(update, XML_LRM_ATTR_RESTART_DIGEST, digest); #if 0 crm_debug("%s: %s, %s", rsc->id, digest, list); if (non_empty) { crm_log_xml_debug(restart, "restart digest source"); } #endif free_xml(restart); free(digest); free(list); } static gboolean build_operation_update(xmlNode * parent, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op, const char *src) { int target_rc = 0; xmlNode *xml_op = NULL; const char *caller_version = CRM_FEATURE_SET; if (op == NULL) { return FALSE; } else if (AM_I_DC) { } else if (fsa_our_dc_version != NULL) { caller_version = fsa_our_dc_version; } else if (op->params == NULL) { caller_version = fsa_our_dc_version; } else { /* there is a small risk in formerly mixed clusters that * it will be sub-optimal. * however with our upgrade policy, the update we send * should still be completely supported anyway */ caller_version = g_hash_table_lookup(op->params, XML_ATTR_CRM_VERSION); crm_debug("Falling back to operation originator version: %s", caller_version); } target_rc = rsc_op_expected_rc(op); xml_op = create_operation_update(parent, op, caller_version, target_rc, src, LOG_DEBUG); if (xml_op) { append_restart_list(rsc, op, xml_op, caller_version); } return TRUE; } gboolean is_rsc_active(const char *rsc_id) { rsc_history_t *entry = NULL; crm_trace("Processing lrmd_rsc_info_t entry %s", rsc_id); entry = g_hash_table_lookup(resource_history, rsc_id); if (entry == NULL || entry->last == NULL) { return FALSE; } if (entry->last->rc == PCMK_EXECRA_OK && safe_str_eq(entry->last->op_type, CRMD_ACTION_STOP)) { return FALSE; } else if (entry->last->rc == PCMK_EXECRA_OK && safe_str_eq(entry->last->op_type, CRMD_ACTION_MIGRATE)) { /* a stricter check is too complex... * leave that to the PE */ return FALSE; } else if (entry->last->rc == PCMK_EXECRA_NOT_RUNNING) { return FALSE; } else if (entry->last->interval == 0 && entry->last->rc == PCMK_EXECRA_NOT_CONFIGURED) { /* Badly configured resources can't be reliably stopped */ return FALSE; } return TRUE; } gboolean build_active_RAs(xmlNode * rsc_list) { GHashTableIter iter; rsc_history_t *entry = NULL; g_hash_table_iter_init(&iter, resource_history); while (g_hash_table_iter_next(&iter, NULL, (void **)&entry)) { GList *gIter = NULL; xmlNode *xml_rsc = create_xml_node(rsc_list, XML_LRM_TAG_RESOURCE); crm_xml_add(xml_rsc, XML_ATTR_ID, entry->id); crm_xml_add(xml_rsc, XML_ATTR_TYPE, entry->rsc.type); crm_xml_add(xml_rsc, XML_AGENT_ATTR_CLASS, entry->rsc.class); crm_xml_add(xml_rsc, XML_AGENT_ATTR_PROVIDER, entry->rsc.provider); build_operation_update(xml_rsc, &(entry->rsc), entry->last, __FUNCTION__); build_operation_update(xml_rsc, &(entry->rsc), entry->failed, __FUNCTION__); for (gIter = entry->recurring_op_list; gIter != NULL; gIter = gIter->next) { build_operation_update(xml_rsc, &(entry->rsc), gIter->data, __FUNCTION__); } } return FALSE; } xmlNode * do_lrm_query(gboolean is_replace) { gboolean shut_down = FALSE; xmlNode *xml_result = NULL; xmlNode *xml_state = NULL; xmlNode *xml_data = NULL; xmlNode *rsc_list = NULL; const char *exp_state = CRMD_STATE_ACTIVE; if (is_set(fsa_input_register, R_SHUTDOWN)) { exp_state = CRMD_STATE_INACTIVE; shut_down = TRUE; } xml_state = create_node_state(fsa_our_uname, ACTIVESTATUS, XML_BOOLEAN_TRUE, ONLINESTATUS, CRMD_JOINSTATE_MEMBER, exp_state, !shut_down, __FUNCTION__); xml_data = create_xml_node(xml_state, XML_CIB_TAG_LRM); crm_xml_add(xml_data, XML_ATTR_ID, fsa_our_uuid); rsc_list = create_xml_node(xml_data, XML_LRM_TAG_RESOURCES); /* Build a list of active (not always running) resources */ build_active_RAs(rsc_list); xml_result = create_cib_fragment(xml_state, XML_CIB_TAG_STATUS); crm_log_xml_trace(xml_state, "Current state of the LRM"); free_xml(xml_state); return xml_result; } static void notify_deleted(ha_msg_input_t * input, const char *rsc_id, int rc) { lrmd_event_data_t *op = NULL; const char *from_sys = crm_element_value(input->msg, F_CRM_SYS_FROM); const char *from_host = crm_element_value(input->msg, F_CRM_HOST_FROM); crm_info("Notifying %s on %s that %s was%s deleted", from_sys, from_host, rsc_id, rc == pcmk_ok ? "" : " not"); op = construct_op(input->xml, rsc_id, CRMD_ACTION_DELETE); CRM_ASSERT(op != NULL); if (rc == pcmk_ok) { op->op_status = PCMK_LRM_OP_DONE; op->rc = PCMK_EXECRA_OK; } else { op->op_status = PCMK_LRM_OP_ERROR; op->rc = PCMK_EXECRA_UNKNOWN_ERROR; } send_direct_ack(from_host, from_sys, NULL, op, rsc_id); lrmd_free_event(op); if (safe_str_neq(from_sys, CRM_SYSTEM_TENGINE)) { /* this isn't expected - trigger a new transition */ time_t now = time(NULL); char *now_s = crm_itoa(now); crm_debug("Triggering a refresh after %s deleted %s from the LRM", from_sys, rsc_id); update_attr(fsa_cib_conn, cib_none, XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL, "last-lrm-refresh", now_s, FALSE); free(now_s); } } static gboolean lrm_remove_deleted_rsc(gpointer key, gpointer value, gpointer user_data) { struct delete_event_s *event = user_data; struct pending_deletion_op_s *op = value; if (safe_str_eq(event->rsc, op->rsc)) { notify_deleted(op->input, event->rsc, event->rc); return TRUE; } return FALSE; } static gboolean lrm_remove_deleted_op(gpointer key, gpointer value, gpointer user_data) { const char *rsc = user_data; struct recurring_op_s *pending = value; if (safe_str_eq(rsc, pending->rsc_id)) { crm_info("Removing op %s:%d for deleted resource %s", pending->op_key, pending->call_id, rsc); return TRUE; } return FALSE; } /* * Remove the rsc from the CIB * * Avoids refreshing the entire LRM section of this host */ #define rsc_template "//"XML_CIB_TAG_STATE"[@uname='%s']//"XML_LRM_TAG_RESOURCE"[@id='%s']" static int delete_rsc_status(const char *rsc_id, int call_options, const char *user_name) { char *rsc_xpath = NULL; int max = 0; int rc = pcmk_ok; CRM_CHECK(rsc_id != NULL, return -ENXIO); max = strlen(rsc_template) + strlen(rsc_id) + strlen(fsa_our_uname) + 1; rsc_xpath = calloc(1, max); snprintf(rsc_xpath, max, rsc_template, fsa_our_uname, rsc_id); rc = fsa_cib_conn->cmds->delegated_variant_op(fsa_cib_conn, CIB_OP_DELETE, NULL, rsc_xpath, NULL, NULL, call_options | cib_xpath, user_name); free(rsc_xpath); return rc; } static void delete_rsc_entry(ha_msg_input_t * input, const char *rsc_id, GHashTableIter *rsc_gIter, int rc, const char *user_name) { struct delete_event_s event; CRM_CHECK(rsc_id != NULL, return); if (rc == pcmk_ok) { char *rsc_id_copy = strdup(rsc_id); if (rsc_gIter) g_hash_table_iter_remove(rsc_gIter); else g_hash_table_remove(resource_history, rsc_id_copy); crm_debug("sync: Sending delete op for %s", rsc_id_copy); delete_rsc_status(rsc_id_copy, cib_quorum_override, user_name); g_hash_table_foreach_remove(pending_ops, lrm_remove_deleted_op, rsc_id_copy); free(rsc_id_copy); } if (input) { notify_deleted(input, rsc_id, rc); } event.rc = rc; event.rsc = rsc_id; g_hash_table_foreach_remove(deletion_ops, lrm_remove_deleted_rsc, &event); } /* * Remove the op from the CIB * * Avoids refreshing the entire LRM section of this host */ #define op_template "//"XML_CIB_TAG_STATE"[@uname='%s']//"XML_LRM_TAG_RESOURCE"[@id='%s']/"XML_LRM_TAG_RSC_OP"[@id='%s']" #define op_call_template "//"XML_CIB_TAG_STATE"[@uname='%s']//"XML_LRM_TAG_RESOURCE"[@id='%s']/"XML_LRM_TAG_RSC_OP"[@id='%s' and @"XML_LRM_ATTR_CALLID"='%d']" static void delete_op_entry(lrmd_event_data_t * op, const char *rsc_id, const char *key, int call_id) { xmlNode *xml_top = NULL; if (op != NULL) { xml_top = create_xml_node(NULL, XML_LRM_TAG_RSC_OP); crm_xml_add_int(xml_top, XML_LRM_ATTR_CALLID, op->call_id); crm_xml_add(xml_top, XML_ATTR_TRANSITION_KEY, op->user_data); if(op->interval > 0) { char *op_id = generate_op_key(op->rsc_id, op->op_type, op->interval); /* Avoid deleting last_failure too (if it was a result of this recurring op failing) */ crm_xml_add(xml_top, XML_ATTR_ID, op_id); free(op_id); } crm_debug("async: Sending delete op for %s_%s_%d (call=%d)", op->rsc_id, op->op_type, op->interval, op->call_id); fsa_cib_conn->cmds->delete(fsa_cib_conn, XML_CIB_TAG_STATUS, xml_top, cib_quorum_override); } else if (rsc_id != NULL && key != NULL) { int max = 0; char *op_xpath = NULL; if (call_id > 0) { max = strlen(op_call_template) + strlen(rsc_id) + strlen(fsa_our_uname) + strlen(key) + 10; op_xpath = calloc(1, max); snprintf(op_xpath, max, op_call_template, fsa_our_uname, rsc_id, key, call_id); } else { max = strlen(op_template) + strlen(rsc_id) + strlen(fsa_our_uname) + strlen(key) + 1; op_xpath = calloc(1, max); snprintf(op_xpath, max, op_template, fsa_our_uname, rsc_id, key); } crm_debug("sync: Sending delete op for %s (call=%d)", rsc_id, call_id); fsa_cib_conn->cmds->delete(fsa_cib_conn, op_xpath, NULL, cib_quorum_override | cib_xpath); free(op_xpath); } else { crm_err("Not enough information to delete op entry: rsc=%p key=%p", rsc_id, key); return; } crm_log_xml_trace(xml_top, "op:cancel"); free_xml(xml_top); } void lrm_clear_last_failure(const char *rsc_id) { char *attr = NULL; GHashTableIter iter; rsc_history_t *entry = NULL; attr = generate_op_key(rsc_id, "last_failure", 0); delete_op_entry(NULL, rsc_id, attr, 0); free(attr); if (!resource_history) { return; } g_hash_table_iter_init(&iter, resource_history); while (g_hash_table_iter_next(&iter, NULL, (void **)&entry)) { if (safe_str_eq(rsc_id, entry->id)) { lrmd_free_event(entry->failed); entry->failed = NULL; } } } static gboolean cancel_op(const char *rsc_id, const char *key, int op, gboolean remove) { int rc = pcmk_ok; struct recurring_op_s *pending = NULL; CRM_CHECK(op != 0, return FALSE); CRM_CHECK(rsc_id != NULL, return FALSE); if (key == NULL) { key = make_stop_id(rsc_id, op); } pending = g_hash_table_lookup(pending_ops, key); if (pending) { if (remove && pending->remove == FALSE) { pending->remove = TRUE; crm_debug("Scheduling %s for removal", key); } if (pending->cancelled) { crm_debug("Operation %s already cancelled", key); return TRUE; } pending->cancelled = TRUE; } else { crm_info("No pending op found for %s", key); return TRUE; } crm_debug("Cancelling op %d for %s (%s)", op, rsc_id, key); rc = fsa_lrm_conn->cmds->cancel(fsa_lrm_conn, pending->rsc_id, pending->op_type, pending->interval); if (rc == pcmk_ok) { crm_debug("Op %d for %s (%s): cancelled", op, rsc_id, key); } else { crm_debug("Op %d for %s (%s): Nothing to cancel", op, rsc_id, key); /* The caller needs to make sure the entry is * removed from the pending_ops list * * Usually by returning TRUE inside the worker function * supplied to g_hash_table_foreach_remove() * * Not removing the entry from pending_ops will block * the node from shutting down */ return FALSE; } return TRUE; } struct cancel_data { gboolean done; gboolean remove; const char *key; lrmd_rsc_info_t *rsc; }; static gboolean cancel_action_by_key(gpointer key, gpointer value, gpointer user_data) { struct cancel_data *data = user_data; struct recurring_op_s *op = (struct recurring_op_s *)value; if (safe_str_eq(op->op_key, data->key)) { data->done = TRUE; if (cancel_op(data->rsc->id, key, op->call_id, data->remove) == FALSE) { return TRUE; } } return FALSE; } static gboolean cancel_op_key(lrmd_rsc_info_t * rsc, const char *key, gboolean remove) { struct cancel_data data; CRM_CHECK(rsc != NULL, return FALSE); CRM_CHECK(key != NULL, return FALSE); data.key = key; data.rsc = rsc; data.done = FALSE; data.remove = remove; g_hash_table_foreach_remove(pending_ops, cancel_action_by_key, &data); return data.done; } static lrmd_rsc_info_t * get_lrm_resource(xmlNode * resource, xmlNode * op_msg, gboolean do_create) { lrmd_rsc_info_t *rsc = NULL; const char *id = ID(resource); const char *type = crm_element_value(resource, XML_ATTR_TYPE); const char *class = crm_element_value(resource, XML_AGENT_ATTR_CLASS); const char *provider = crm_element_value(resource, XML_AGENT_ATTR_PROVIDER); const char *long_id = crm_element_value(resource, XML_ATTR_ID_LONG); crm_trace("Retrieving %s from the LRM.", id); CRM_CHECK(id != NULL, return NULL); rsc = fsa_lrm_conn->cmds->get_rsc_info(fsa_lrm_conn, id, 0); if (!rsc && long_id) { rsc = fsa_lrm_conn->cmds->get_rsc_info(fsa_lrm_conn, long_id, 0); } if (!rsc && do_create) { CRM_CHECK(class != NULL, return NULL); CRM_CHECK(type != NULL, return NULL); crm_trace("Adding rsc %s before operation", id); fsa_lrm_conn->cmds->register_rsc(fsa_lrm_conn, id, class, provider, type, 0); rsc = fsa_lrm_conn->cmds->get_rsc_info(fsa_lrm_conn, id, 0); if (!rsc) { fsa_data_t *msg_data = NULL; crm_err("Could not add resource %s to LRM", id); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } } return rsc; } static void delete_resource(const char *id, lrmd_rsc_info_t * rsc, GHashTableIter *gIter, const char *sys, const char *host, const char *user, ha_msg_input_t * request) { int rc = pcmk_ok; crm_info("Removing resource %s for %s (%s) on %s", id, sys, user ? user : "internal", host); if (rsc) { rc = fsa_lrm_conn->cmds->unregister_rsc(fsa_lrm_conn, id, 0); } if (rc == pcmk_ok) { crm_trace("Resource '%s' deleted", id); } else if (rc == -EINPROGRESS) { crm_info("Deletion of resource '%s' pending", id); if (request) { struct pending_deletion_op_s *op = NULL; char *ref = crm_element_value_copy(request->msg, XML_ATTR_REFERENCE); op = calloc(1, sizeof(struct pending_deletion_op_s)); op->rsc = strdup(rsc->id); op->input = copy_ha_msg_input(request); g_hash_table_insert(deletion_ops, ref, op); } return; } else { crm_warn("Deletion of resource '%s' for %s (%s) on %s failed: %d", id, sys, user ? user : "internal", host, rc); } delete_rsc_entry(request, id, gIter, rc, user); } /* A_LRM_INVOKE */ void do_lrm_invoke(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { gboolean done = FALSE; gboolean create_rsc = TRUE; const char *crm_op = NULL; const char *from_sys = NULL; const char *from_host = NULL; const char *operation = NULL; ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); const char *user_name = NULL; #if ENABLE_ACL user_name = crm_element_value(input->msg, F_CRM_USER); crm_trace("LRM command from user '%s'", user_name); #endif crm_op = crm_element_value(input->msg, F_CRM_TASK); from_sys = crm_element_value(input->msg, F_CRM_SYS_FROM); if (safe_str_neq(from_sys, CRM_SYSTEM_TENGINE)) { from_host = crm_element_value(input->msg, F_CRM_HOST_FROM); } crm_trace("LRM command from: %s", from_sys); if (safe_str_eq(crm_op, CRM_OP_LRM_DELETE)) { operation = CRMD_ACTION_DELETE; } else if (safe_str_eq(operation, CRM_OP_LRM_REFRESH)) { crm_op = CRM_OP_LRM_REFRESH; } else if (safe_str_eq(crm_op, CRM_OP_LRM_FAIL)) { rsc_history_t *entry = NULL; lrmd_event_data_t *op = NULL; lrmd_rsc_info_t *rsc = NULL; xmlNode *xml_rsc = find_xml_node(input->xml, XML_CIB_TAG_RESOURCE, TRUE); CRM_CHECK(xml_rsc != NULL, return); /* The lrmd can not fail a resource, it does not understand the * concept of success or failure in relation to a resource, it simply * executes operations and reports the results. We determine what a failure is. * Becaues of this, if we want to fail a resource we have to fake what we * understand a failure to look like. * * To do this we create a fake lrmd operation event for the resource * we want to fail. We then pass that event to the lrmd client callback * so it will be processed as if it actually came from the lrmd. */ op = construct_op(input->xml, ID(xml_rsc), "asyncmon"); free((char *) op->user_data); op->user_data = NULL; entry = g_hash_table_lookup(resource_history, op->rsc_id); /* Make sure the call id is greater than the last successful operation, * otherwise the failure will not result in a possible recovery of the resource * as it could appear the failure occurred before the successful start */ if (entry && entry->last) { op->call_id = entry->last->call_id + 1; if (op->call_id < 0) { op->call_id = 1; } } op->interval = 0; op->op_status = PCMK_LRM_OP_DONE; op->rc = PCMK_EXECRA_UNKNOWN_ERROR; CRM_ASSERT(op != NULL); # if ENABLE_ACL if (user_name && is_privileged(user_name) == FALSE) { crm_err("%s does not have permission to fail %s", user_name, ID(xml_rsc)); send_direct_ack(from_host, from_sys, NULL, op, ID(xml_rsc)); lrmd_free_event(op); return; } # endif rsc = get_lrm_resource(xml_rsc, input->xml, create_rsc); if (rsc) { crm_info("Failing resource %s...", rsc->id); process_lrm_event(op); op->op_status = PCMK_LRM_OP_DONE; op->rc = PCMK_EXECRA_OK; lrmd_free_rsc_info(rsc); } else { crm_info("Cannot find/create resource in order to fail it..."); crm_log_xml_warn(input->msg, "bad input"); } send_direct_ack(from_host, from_sys, NULL, op, ID(xml_rsc)); lrmd_free_event(op); return; } else if (input->xml != NULL) { operation = crm_element_value(input->xml, XML_LRM_ATTR_TASK); } if (safe_str_eq(crm_op, CRM_OP_LRM_REFRESH)) { int rc = pcmk_ok; xmlNode *fragment = do_lrm_query(TRUE); crm_info("Forcing a local LRM refresh"); fsa_cib_update(XML_CIB_TAG_STATUS, fragment, cib_quorum_override, rc, user_name); free_xml(fragment); } else if (safe_str_eq(crm_op, CRM_OP_LRM_QUERY)) { xmlNode *data = do_lrm_query(FALSE); xmlNode *reply = create_reply(input->msg, data); if (relay_message(reply, TRUE) == FALSE) { crm_err("Unable to route reply"); crm_log_xml_err(reply, "reply"); } free_xml(reply); free_xml(data); } else if (safe_str_eq(operation, CRM_OP_PROBED)) { update_attrd(NULL, CRM_OP_PROBED, XML_BOOLEAN_TRUE, user_name); } else if (safe_str_eq(crm_op, CRM_OP_REPROBE)) { GHashTableIter gIter; rsc_history_t *entry = NULL; crm_notice("Forcing the status of all resources to be redetected"); g_hash_table_iter_init(&gIter, resource_history); while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) { delete_resource(entry->id, &entry->rsc, &gIter, from_sys, from_host, user_name, NULL); } /* Now delete the copy in the CIB */ erase_status_tag(fsa_our_uname, XML_CIB_TAG_LRM, cib_scope_local); /* And finally, _delete_ the value in attrd * Setting it to FALSE results in the PE sending us back here again */ update_attrd(NULL, CRM_OP_PROBED, NULL, user_name); } else if (operation != NULL) { lrmd_rsc_info_t *rsc = NULL; xmlNode *params = NULL; xmlNode *xml_rsc = find_xml_node(input->xml, XML_CIB_TAG_RESOURCE, TRUE); CRM_CHECK(xml_rsc != NULL, return); /* only the first 16 chars are used by the LRM */ params = find_xml_node(input->xml, XML_TAG_ATTRS, TRUE); if (safe_str_eq(operation, CRMD_ACTION_DELETE)) { create_rsc = FALSE; } rsc = get_lrm_resource(xml_rsc, input->xml, create_rsc); if (rsc == NULL && create_rsc) { crm_err("Invalid resource definition"); crm_log_xml_warn(input->msg, "bad input"); } else if (rsc == NULL) { lrmd_event_data_t *op = NULL; crm_notice("Not creating resource for a %s event: %s", operation, ID(input->xml)); delete_rsc_entry(input, ID(xml_rsc), NULL, pcmk_ok, user_name); op = construct_op(input->xml, ID(xml_rsc), operation); op->op_status = PCMK_LRM_OP_DONE; op->rc = PCMK_EXECRA_OK; CRM_ASSERT(op != NULL); send_direct_ack(from_host, from_sys, NULL, op, ID(xml_rsc)); lrmd_free_event(op); } else if (safe_str_eq(operation, CRMD_ACTION_CANCEL)) { lrmd_event_data_t *op = NULL; char *op_key = NULL; char *meta_key = NULL; int call = 0; const char *call_id = NULL; const char *op_task = NULL; const char *op_interval = NULL; CRM_CHECK(params != NULL, crm_log_xml_warn(input->xml, "Bad command"); return); meta_key = crm_meta_name(XML_LRM_ATTR_INTERVAL); op_interval = crm_element_value(params, meta_key); free(meta_key); meta_key = crm_meta_name(XML_LRM_ATTR_TASK); op_task = crm_element_value(params, meta_key); free(meta_key); meta_key = crm_meta_name(XML_LRM_ATTR_CALLID); call_id = crm_element_value(params, meta_key); free(meta_key); CRM_CHECK(op_task != NULL, crm_log_xml_warn(input->xml, "Bad command"); return); CRM_CHECK(op_interval != NULL, crm_log_xml_warn(input->xml, "Bad command"); return); op = construct_op(input->xml, rsc->id, op_task); CRM_ASSERT(op != NULL); op_key = generate_op_key(rsc->id, op_task, crm_parse_int(op_interval, "0")); crm_debug("PE requested op %s (call=%s) be cancelled", op_key, call_id ? call_id : "NA"); call = crm_parse_int(call_id, "0"); if (call == 0) { /* the normal case when the PE cancels a recurring op */ done = cancel_op_key(rsc, op_key, TRUE); } else { /* the normal case when the PE cancels an orphan op */ done = cancel_op(rsc->id, NULL, call, TRUE); } if (done == FALSE) { crm_debug("Nothing known about operation %d for %s", call, op_key); delete_op_entry(NULL, rsc->id, op_key, call); /* needed?? surely not otherwise the cancel_op_(_key) wouldn't * have failed in the first place */ g_hash_table_remove(pending_ops, op_key); } op->rc = PCMK_EXECRA_OK; op->op_status = PCMK_LRM_OP_DONE; send_direct_ack(from_host, from_sys, rsc, op, rsc->id); free(op_key); lrmd_free_event(op); } else if (safe_str_eq(operation, CRMD_ACTION_DELETE)) { int cib_rc = pcmk_ok; CRM_ASSERT(rsc != NULL); cib_rc = delete_rsc_status(rsc->id, cib_dryrun | cib_sync_call, user_name); if (cib_rc != pcmk_ok) { lrmd_event_data_t *op = NULL; crm_err ("Attempt of deleting resource status '%s' from CIB for %s (user=%s) on %s failed: (rc=%d) %s", rsc->id, from_sys, user_name ? user_name : "unknown", from_host, cib_rc, pcmk_strerror(cib_rc)); op = construct_op(input->xml, rsc->id, operation); op->op_status = PCMK_LRM_OP_ERROR; if (cib_rc == -EACCES) { op->rc = PCMK_EXECRA_INSUFFICIENT_PRIV; } else { op->rc = PCMK_EXECRA_UNKNOWN_ERROR; } send_direct_ack(from_host, from_sys, NULL, op, rsc->id); lrmd_free_event(op); return; } delete_resource(rsc->id, rsc, NULL, from_sys, from_host, user_name, input); } else if (rsc != NULL) { do_lrm_rsc_op(rsc, operation, input->xml, input->msg); } lrmd_free_rsc_info(rsc); } else { crm_err("Operation was neither a lrm_query, nor a rsc op. %s", crm_str(crm_op)); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } static lrmd_event_data_t * construct_op(xmlNode * rsc_op, const char *rsc_id, const char *operation) { lrmd_event_data_t *op = NULL; const char *op_delay = NULL; const char *op_timeout = NULL; const char *op_interval = NULL; GHashTable *params = NULL; const char *transition = NULL; CRM_LOG_ASSERT(rsc_id != NULL); op = calloc(1, sizeof(lrmd_event_data_t)); op->type = lrmd_event_exec_complete; op->op_type = strdup(operation); op->op_status = PCMK_LRM_OP_PENDING; op->rc = -1; op->rsc_id = strdup(rsc_id); op->interval = 0; op->timeout = 0; op->start_delay = 0; if (rsc_op == NULL) { CRM_LOG_ASSERT(safe_str_eq(CRMD_ACTION_STOP, operation)); op->user_data = NULL; /* the stop_all_resources() case * by definition there is no DC (or they'd be shutting * us down). * So we should put our version here. */ op->params = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); g_hash_table_insert(op->params, strdup(XML_ATTR_CRM_VERSION), strdup(CRM_FEATURE_SET)); crm_trace("Constructed %s op for %s", operation, rsc_id); return op; } params = xml2list(rsc_op); g_hash_table_remove(params, CRM_META "_op_target_rc"); op_delay = crm_meta_value(params, XML_OP_ATTR_START_DELAY); op_timeout = crm_meta_value(params, XML_ATTR_TIMEOUT); op_interval = crm_meta_value(params, XML_LRM_ATTR_INTERVAL); op->interval = crm_parse_int(op_interval, "0"); op->timeout = crm_parse_int(op_timeout, "0"); op->start_delay = crm_parse_int(op_delay, "0"); if (safe_str_neq(operation, RSC_STOP)) { op->params = params; } else { rsc_history_t *entry = g_hash_table_lookup(resource_history, rsc_id); /* If we do not have stop parameters cached, use * whatever we are given */ if (!entry || !entry->stop_params) { op->params = params; } else { /* Copy the cached parameter list so that we stop the resource * with the old attributes, not the new ones */ op->params = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); g_hash_table_foreach(params, copy_meta_keys, op->params); g_hash_table_foreach(entry->stop_params, copy_instance_keys, op->params); g_hash_table_destroy(params); params = NULL; } } /* sanity */ if (op->interval < 0) { op->interval = 0; } if (op->timeout <= 0) { op->timeout = op->interval; } if (op->start_delay < 0) { op->start_delay = 0; } transition = crm_element_value(rsc_op, XML_ATTR_TRANSITION_KEY); CRM_CHECK(transition != NULL, return op); op->user_data = strdup(transition); if (op->interval != 0) { if (safe_str_eq(operation, CRMD_ACTION_START) || safe_str_eq(operation, CRMD_ACTION_STOP)) { crm_err("Start and Stop actions cannot have an interval: %d", op->interval); op->interval = 0; } } crm_trace("Constructed %s op for %s: interval=%d", operation, rsc_id, op->interval); return op; } void send_direct_ack(const char *to_host, const char *to_sys, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op, const char *rsc_id) { xmlNode *reply = NULL; xmlNode *update, *iter; xmlNode *fragment; CRM_CHECK(op != NULL, return); if (op->rsc_id == NULL) { CRM_LOG_ASSERT(rsc_id != NULL); op->rsc_id = strdup(rsc_id); } if (to_sys == NULL) { to_sys = CRM_SYSTEM_TENGINE; } update = create_node_state(fsa_our_uname, NULL, NULL, NULL, NULL, NULL, FALSE, __FUNCTION__); iter = create_xml_node(update, XML_CIB_TAG_LRM); crm_xml_add(iter, XML_ATTR_ID, fsa_our_uuid); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE); crm_xml_add(iter, XML_ATTR_ID, op->rsc_id); build_operation_update(iter, rsc, op, __FUNCTION__); fragment = create_cib_fragment(update, XML_CIB_TAG_STATUS); reply = create_request(CRM_OP_INVOKE_LRM, fragment, to_host, to_sys, CRM_SYSTEM_LRMD, NULL); crm_log_xml_trace(update, "ACK Update"); crm_debug("ACK'ing resource op %s_%s_%d from %s: %s", op->rsc_id, op->op_type, op->interval, op->user_data, crm_element_value(reply, XML_ATTR_REFERENCE)); if (relay_message(reply, TRUE) == FALSE) { crm_log_xml_err(reply, "Unable to route reply"); } free_xml(fragment); free_xml(update); free_xml(reply); } static gboolean stop_recurring_action_by_rsc(gpointer key, gpointer value, gpointer user_data) { lrmd_rsc_info_t *rsc = user_data; struct recurring_op_s *op = (struct recurring_op_s *)value; if (op->interval != 0 && safe_str_eq(op->rsc_id, rsc->id)) { if (cancel_op(rsc->id, key, op->call_id, FALSE) == FALSE) { return TRUE; } } return FALSE; } static gboolean stop_recurring_actions(gpointer key, gpointer value, gpointer user_data) { gboolean remove = FALSE; struct recurring_op_s *op = (struct recurring_op_s *)value; if (op->interval != 0) { remove = cancel_op(op->rsc_id, key, op->call_id, FALSE); } return remove; } void do_lrm_rsc_op(lrmd_rsc_info_t * rsc, const char *operation, xmlNode * msg, xmlNode * request) { int call_id = 0; char *op_id = NULL; lrmd_event_data_t *op = NULL; lrmd_key_value_t *params = NULL; fsa_data_t *msg_data = NULL; const char *transition = NULL; CRM_CHECK(rsc != NULL, return); if (msg != NULL) { transition = crm_element_value(msg, XML_ATTR_TRANSITION_KEY); if (transition == NULL) { crm_log_xml_err(msg, "Missing transition number"); } } op = construct_op(msg, rsc->id, operation); /* stop the monitor before stopping the resource */ if (crm_str_eq(operation, CRMD_ACTION_STOP, TRUE) || crm_str_eq(operation, CRMD_ACTION_DEMOTE, TRUE) || crm_str_eq(operation, CRMD_ACTION_PROMOTE, TRUE) || crm_str_eq(operation, CRMD_ACTION_MIGRATE, TRUE)) { g_hash_table_foreach_remove(pending_ops, stop_recurring_action_by_rsc, rsc); } /* now do the op */ crm_debug("Performing key=%s op=%s_%s_%d", transition, rsc->id, operation, op->interval); if (fsa_state != S_NOT_DC && fsa_state != S_POLICY_ENGINE && fsa_state != S_TRANSITION_ENGINE) { if (safe_str_neq(operation, "fail") && safe_str_neq(operation, CRMD_ACTION_STOP)) { crm_info("Discarding attempt to perform action %s on %s" " in state %s", operation, rsc->id, fsa_state2string(fsa_state)); op->rc = 99; op->op_status = PCMK_LRM_OP_ERROR; send_direct_ack(NULL, NULL, rsc, op, rsc->id); lrmd_free_event(op); free(op_id); return; } } op_id = generate_op_key(rsc->id, op->op_type, op->interval); if (op->interval > 0) { /* cancel it so we can then restart it without conflict */ cancel_op_key(rsc, op_id, FALSE); } if (op->params) { char *key = NULL; char *value = NULL; GHashTableIter iter; g_hash_table_iter_init(&iter, op->params); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) { params = lrmd_key_value_add(params, key, value); } } call_id = fsa_lrm_conn->cmds->exec(fsa_lrm_conn, rsc->id, op->op_type, op->user_data, op->interval, op->timeout, op->start_delay, 0, params); if (call_id <= 0) { crm_err("Operation %s on %s failed: %d", operation, rsc->id, call_id); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } else { /* record all operations so we can wait * for them to complete during shutdown */ char *call_id_s = make_stop_id(rsc->id, call_id); struct recurring_op_s *pending = NULL; pending = calloc(1, sizeof(struct recurring_op_s)); crm_trace("Recording pending op: %d - %s %s", call_id, op_id, call_id_s); pending->call_id = call_id; pending->interval = op->interval; pending->op_type = strdup(operation); pending->op_key = strdup(op_id); pending->rsc_id = strdup(rsc->id); pending->last_rc = -1; /* All rc are positive, -1 indicates the last rc has not been set. */ pending->last_op_status = -2; g_hash_table_replace(pending_ops, call_id_s, pending); if (op->interval > 0 && op->start_delay > START_DELAY_THRESHOLD) { char *uuid = NULL; int dummy = 0, target_rc = 0; crm_info("Faking confirmation of %s: execution postponed for over 5 minutes", op_id); decode_transition_key(op->user_data, &uuid, &dummy, &dummy, &target_rc); free(uuid); op->rc = target_rc; op->op_status = PCMK_LRM_OP_DONE; send_direct_ack(NULL, NULL, rsc, op, rsc->id); } } free(op_id); lrmd_free_event(op); return; } static void cib_rsc_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { switch (rc) { case pcmk_ok: case -pcmk_err_diff_failed: case -pcmk_err_diff_resync: crm_trace("Resource update %d complete: rc=%d", call_id, rc); break; default: crm_warn("Resource update %d failed: (rc=%d) %s", call_id, rc, pcmk_strerror(rc)); } } static int do_update_resource(lrmd_rsc_info_t * rsc, lrmd_event_data_t * op) { /* */ int rc = pcmk_ok; xmlNode *update, *iter = NULL; int call_opt = cib_quorum_override; CRM_CHECK(op != NULL, return 0); if (fsa_state == S_ELECTION || fsa_state == S_PENDING) { crm_info("Sending update to local CIB in state: %s", fsa_state2string(fsa_state)); call_opt |= cib_scope_local; } iter = create_xml_node(iter, XML_CIB_TAG_STATUS); update = iter; iter = create_xml_node(iter, XML_CIB_TAG_STATE); set_uuid(iter, XML_ATTR_UUID, fsa_our_uname); crm_xml_add(iter, XML_ATTR_UNAME, fsa_our_uname); crm_xml_add(iter, XML_ATTR_ORIGIN, __FUNCTION__); iter = create_xml_node(iter, XML_CIB_TAG_LRM); crm_xml_add(iter, XML_ATTR_ID, fsa_our_uuid); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE); crm_xml_add(iter, XML_ATTR_ID, op->rsc_id); build_operation_update(iter, rsc, op, __FUNCTION__); if (rsc) { crm_xml_add(iter, XML_ATTR_TYPE, rsc->type); crm_xml_add(iter, XML_AGENT_ATTR_CLASS, rsc->class); crm_xml_add(iter, XML_AGENT_ATTR_PROVIDER, rsc->provider); CRM_CHECK(rsc->type != NULL, crm_err("Resource %s has no value for type", op->rsc_id)); CRM_CHECK(rsc->class != NULL, crm_err("Resource %s has no value for class", op->rsc_id)); } else { crm_warn("Resource %s no longer exists in the lrmd", op->rsc_id); goto cleanup; } /* make it an asyncronous call and be done with it * * Best case: * the resource state will be discovered during * the next signup or election. * * Bad case: * we are shutting down and there is no DC at the time, * but then why were we shutting down then anyway? * (probably because of an internal error) * * Worst case: * we get shot for having resources "running" when the really weren't * * the alternative however means blocking here for too long, which * isnt acceptable */ fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, rc, NULL); /* the return code is a call number, not an error code */ crm_trace("Sent resource state update message: %d", rc); fsa_cib_conn->cmds->register_callback(fsa_cib_conn, rc, 60, FALSE, NULL, "cib_rsc_callback", cib_rsc_callback); cleanup: free_xml(update); return rc; } void do_lrm_event(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t * msg_data) { CRM_CHECK(FALSE, return); } gboolean process_lrm_event(lrmd_event_data_t * op) { char *op_id = NULL; char *op_key = NULL; int update_id = 0; int log_level = LOG_ERR; gboolean removed = FALSE; lrmd_rsc_info_t *rsc = NULL; struct recurring_op_s *pending = NULL; CRM_CHECK(op != NULL, return FALSE); if (op->type == lrmd_event_disconnect) { lrm_connection_destroy(); return TRUE; } else if (op->type != lrmd_event_exec_complete) { return TRUE; } CRM_CHECK(op->rsc_id != NULL, return FALSE); op_id = make_stop_id(op->rsc_id, op->call_id); pending = g_hash_table_lookup(pending_ops, op_id); /* ignore recurring ops that have not changed. */ if (op->interval && pending && (pending->last_rc == op->rc) && (pending->last_op_status == op->op_status)) { free(op_id); return TRUE; } if (pending) { pending->last_rc = op->rc; pending->last_op_status = op->op_status; } op_key = generate_op_key(op->rsc_id, op->op_type, op->interval); rsc = fsa_lrm_conn->cmds->get_rsc_info(fsa_lrm_conn, op->rsc_id, 0); switch (op->op_status) { case PCMK_LRM_OP_ERROR: case PCMK_LRM_OP_PENDING: case PCMK_LRM_OP_NOTSUPPORTED: break; case PCMK_LRM_OP_CANCELLED: log_level = LOG_INFO; break; case PCMK_LRM_OP_DONE: log_level = LOG_NOTICE; break; case PCMK_LRM_OP_TIMEOUT: log_level = LOG_DEBUG_3; crm_err("LRM operation %s (%d) %s (timeout=%dms)", op_key, op->call_id, services_lrm_status_str(op->op_status), op->timeout); break; default: crm_err("Mapping unknown status (%d) to ERROR", op->op_status); op->op_status = PCMK_LRM_OP_ERROR; } if (op->op_status == PCMK_LRM_OP_ERROR && (op->rc == PCMK_EXECRA_RUNNING_MASTER || op->rc == PCMK_EXECRA_NOT_RUNNING)) { /* Leave it up to the TE/PE to decide if this is an error */ op->op_status = PCMK_LRM_OP_DONE; log_level = LOG_INFO; } if (op->op_status != PCMK_LRM_OP_CANCELLED) { if (safe_str_eq(op->op_type, RSC_NOTIFY)) { /* Keep notify ops out of the CIB */ send_direct_ack(NULL, NULL, NULL, op, op->rsc_id); } else { update_id = do_update_resource(rsc, op); } } else if (op->interval == 0) { /* This will occur when "crm resource cleanup" is called while actions are in-flight */ crm_err("Op %s (call=%d): Cancelled", op_key, op->call_id); send_direct_ack(NULL, NULL, NULL, op, op->rsc_id); } else if (pending == NULL) { /* Operations that are cancelled may safely be removed * from the pending op list before the lrmd completion event * is received. Only report non-cancelled ops here. */ if (op->op_status != PCMK_LRM_OP_CANCELLED) { crm_err("Op %s (call=%d): No 'pending' entry", op_key, op->call_id); } } else if (op->user_data == NULL) { crm_err("Op %s (call=%d): No user data", op_key, op->call_id); } else if (pending->remove) { delete_op_entry(op, op->rsc_id, op_key, op->call_id); } else { /* Before a stop is called, no need to direct ack */ crm_trace("Op %s (call=%d): no delete event required", op_key, op->call_id); } if ((op->interval == 0) && g_hash_table_remove(pending_ops, op_id)) { removed = TRUE; crm_trace("Op %s (call=%d, stop-id=%s): Confirmed", op_key, op->call_id, op_id); } if (op->op_status == PCMK_LRM_OP_DONE) { do_crm_log(log_level, "LRM operation %s (call=%d, rc=%d, cib-update=%d, confirmed=%s) %s", op_key, op->call_id, op->rc, update_id, removed ? "true" : "false", lrmd_event_rc2str(op->rc)); } else { do_crm_log(log_level, "LRM operation %s (call=%d, status=%d, cib-update=%d, confirmed=%s) %s", op_key, op->call_id, op->op_status, update_id, removed ? "true" : "false", services_lrm_status_str(op->op_status)); } if (op->rc != 0 && op->output != NULL) { crm_info("Result: %s", op->output); } else if (op->output != NULL) { crm_debug("Result: %s", op->output); } if (op->rsc_deleted) { crm_info("Deletion of resource '%s' complete after %s", op->rsc_id, op_key); delete_rsc_entry(NULL, op->rsc_id, NULL, pcmk_ok, NULL); } /* If a shutdown was escalated while operations were pending, * then the FSA will be stalled right now... allow it to continue */ mainloop_set_trigger(fsa_source); update_history_cache(rsc, op); lrmd_free_rsc_info(rsc); free(op_key); free(op_id); return TRUE; } diff --git a/crmd/membership.c b/crmd/membership.c index 9b47b57b99..4ad3381b62 100644 --- a/crmd/membership.c +++ b/crmd/membership.c @@ -1,254 +1,254 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ /* put these first so that uuid_t is defined without conflicts */ #include #include #include #include #include #include #include #include #include #include #include #include #include gboolean membership_flux_hack = FALSE; void post_cache_update(int instance); void ghash_update_cib_node(gpointer key, gpointer value, gpointer user_data); int last_peer_update = 0; extern GHashTable *voted; struct update_data_s { const char *state; const char *caller; xmlNode *updates; gboolean overwrite_join; }; extern gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source); static void check_dead_member(const char *uname, GHashTable * members) { CRM_CHECK(uname != NULL, return); if (members != NULL && g_hash_table_lookup(members, uname) != NULL) { crm_err("%s didnt really leave the membership!", uname); return; } erase_node_from_join(uname); if (voted != NULL) { g_hash_table_remove(voted, uname); } if (safe_str_eq(fsa_our_uname, uname)) { crm_err("We're not part of the cluster anymore"); register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL); } else if (AM_I_DC == FALSE && safe_str_eq(uname, fsa_our_dc)) { crm_warn("Our DC node (%s) left the cluster", uname); register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL); } else if (fsa_state == S_INTEGRATION || fsa_state == S_FINALIZE_JOIN) { check_join_state(fsa_state, __FUNCTION__); } } static void reap_dead_nodes(gpointer key, gpointer value, gpointer user_data) { crm_node_t *node = value; if (crm_is_peer_active(node) == FALSE) { check_dead_member(node->uname, NULL); fail_incompletable_actions(transition_graph, node->uuid); } } gboolean ever_had_quorum = FALSE; void post_cache_update(int instance) { xmlNode *no_op = NULL; crm_peer_seq = instance; crm_debug("Updated cache after membership event %d.", instance); g_hash_table_foreach(crm_peer_cache, reap_dead_nodes, NULL); - set_bit_inplace(fsa_input_register, R_MEMBERSHIP); + set_bit(fsa_input_register, R_MEMBERSHIP); if (AM_I_DC) { populate_cib_nodes(FALSE); do_update_cib_nodes(FALSE, __FUNCTION__); } /* * If we lost nodes, we should re-check the election status * Safe to call outside of an election */ register_fsa_action(A_ELECTION_CHECK); /* Membership changed, remind everyone we're here. * This will aid detection of duplicate DCs */ no_op = create_request(CRM_OP_NOOP, NULL, NULL, CRM_SYSTEM_CRMD, AM_I_DC ? CRM_SYSTEM_DC : CRM_SYSTEM_CRMD, NULL); send_cluster_message(NULL, crm_msg_crmd, no_op, FALSE); free_xml(no_op); } static void crmd_node_update_complete(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { fsa_data_t *msg_data = NULL; last_peer_update = 0; if (rc == pcmk_ok) { crm_trace("Node update %d complete", call_id); } else { crm_err("Node update %d failed", call_id); crm_log_xml_debug(msg, "failed"); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } void ghash_update_cib_node(gpointer key, gpointer value, gpointer user_data) { xmlNode *tmp1 = NULL; const char *join = NULL; crm_node_t *node = value; struct update_data_s *data = (struct update_data_s *)user_data; enum crm_proc_flag messaging = crm_proc_plugin | crm_proc_heartbeat | crm_proc_cpg; data->state = XML_BOOLEAN_NO; if (safe_str_eq(node->state, CRM_NODE_ACTIVE)) { data->state = XML_BOOLEAN_YES; } crm_debug("Updating %s: %s (overwrite=%s) hash_size=%d", node->uname, data->state, data->overwrite_join ? "true" : "false", g_hash_table_size(confirmed_nodes)); if (data->overwrite_join) { if ((node->processes & proc_flags) == FALSE) { join = CRMD_JOINSTATE_DOWN; } else { const char *peer_member = g_hash_table_lookup(confirmed_nodes, node->uname); if (peer_member != NULL) { join = CRMD_JOINSTATE_MEMBER; } else { join = CRMD_JOINSTATE_PENDING; } } } tmp1 = create_node_state(node->uname, (node->processes & messaging) ? ACTIVESTATUS : DEADSTATUS, data->state, (node->processes & proc_flags) ? ONLINESTATUS : OFFLINESTATUS, join, NULL, FALSE, data->caller); add_node_copy(data->updates, tmp1); free_xml(tmp1); } void do_update_cib_nodes(gboolean overwrite, const char *caller) { int call_id = 0; int call_options = cib_scope_local | cib_quorum_override; struct update_data_s update_data; xmlNode *fragment = NULL; if (crm_peer_cache == NULL) { /* We got a replace notification before being connected to * the CCM. * So there is no need to update the local CIB with our values * - since we have none. */ return; } else if (AM_I_DC == FALSE) { return; } fragment = create_xml_node(NULL, XML_CIB_TAG_STATUS); update_data.caller = caller; update_data.updates = fragment; update_data.overwrite_join = overwrite; g_hash_table_foreach(crm_peer_cache, ghash_update_cib_node, &update_data); fsa_cib_update(XML_CIB_TAG_STATUS, fragment, call_options, call_id, NULL); add_cib_op_callback(fsa_cib_conn, call_id, FALSE, NULL, crmd_node_update_complete); last_peer_update = call_id; free_xml(fragment); } static void cib_quorum_update_complete(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { fsa_data_t *msg_data = NULL; if (rc == pcmk_ok) { crm_trace("Quorum update %d complete", call_id); } else { crm_err("Quorum update %d failed", call_id); crm_log_xml_debug(msg, "failed"); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } void crm_update_quorum(gboolean quorum, gboolean force_update) { ever_had_quorum |= quorum; if (AM_I_DC && (force_update || fsa_has_quorum != quorum)) { int call_id = 0; xmlNode *update = NULL; int call_options = cib_scope_local | cib_quorum_override; update = create_xml_node(NULL, XML_TAG_CIB); crm_xml_add_int(update, XML_ATTR_HAVE_QUORUM, quorum); set_uuid(update, XML_ATTR_DC_UUID, fsa_our_uname); fsa_cib_update(XML_TAG_CIB, update, call_options, call_id, NULL); crm_debug("Updating quorum status to %s (call=%d)", quorum ? "true" : "false", call_id); add_cib_op_callback(fsa_cib_conn, call_id, FALSE, NULL, cib_quorum_update_complete); free_xml(update); } fsa_has_quorum = quorum; } diff --git a/crmd/messages.c b/crmd/messages.c index 7aa0f5059e..5181b9c887 100644 --- a/crmd/messages.c +++ b/crmd/messages.c @@ -1,984 +1,984 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include GListPtr fsa_message_queue = NULL; extern void crm_shutdown(int nsig); void handle_response(xmlNode * stored_msg); enum crmd_fsa_input handle_request(xmlNode * stored_msg); enum crmd_fsa_input handle_shutdown_request(xmlNode * stored_msg); #ifdef MSG_LOG # define ROUTER_RESULT(x) crm_trace("Router result: %s", x); \ crm_log_xml_trace(msg, "router.log"); #else # define ROUTER_RESULT(x) crm_trace("Router result: %s", x) #endif /* debug only, can wrap all it likes */ int last_data_id = 0; void register_fsa_error_adv(enum crmd_fsa_cause cause, enum crmd_fsa_input input, fsa_data_t * cur_data, void *new_data, const char *raised_from) { /* save the current actions if any */ if (fsa_actions != A_NOTHING) { register_fsa_input_adv(cur_data ? cur_data->fsa_cause : C_FSA_INTERNAL, I_NULL, cur_data ? cur_data->data : NULL, fsa_actions, TRUE, __FUNCTION__); } /* reset the action list */ fsa_actions = A_NOTHING; /* register the error */ register_fsa_input_adv(cause, input, new_data, A_NOTHING, TRUE, raised_from); } int register_fsa_input_adv(enum crmd_fsa_cause cause, enum crmd_fsa_input input, void *data, long long with_actions, gboolean prepend, const char *raised_from) { unsigned old_len = g_list_length(fsa_message_queue); fsa_data_t *fsa_data = NULL; last_data_id++; CRM_CHECK(raised_from != NULL, raised_from = ""); crm_trace("%s %s FSA input %d (%s) (cause=%s) %s data", raised_from, prepend ? "prepended" : "appended", last_data_id, fsa_input2string(input), fsa_cause2string(cause), data ? "with" : "without"); if (input == I_WAIT_FOR_EVENT) { do_fsa_stall = TRUE; crm_debug("Stalling the FSA pending further input: cause=%s", fsa_cause2string(cause)); if (old_len > 0) { crm_warn("%s stalled the FSA with pending inputs", raised_from); fsa_dump_queue(LOG_DEBUG); } if (data == NULL) { - set_bit_inplace(fsa_actions, with_actions); + set_bit(fsa_actions, with_actions); with_actions = A_NOTHING; return 0; } crm_err("%s stalled the FSA with data - this may be broken", raised_from); } if (input == I_NULL && with_actions == A_NOTHING /* && data == NULL */ ) { /* no point doing anything */ crm_err("Cannot add entry to queue: no input and no action"); return 0; } fsa_data = calloc(1, sizeof(fsa_data_t)); fsa_data->id = last_data_id; fsa_data->fsa_input = input; fsa_data->fsa_cause = cause; fsa_data->origin = raised_from; fsa_data->data = NULL; fsa_data->data_type = fsa_dt_none; fsa_data->actions = with_actions; if (with_actions != A_NOTHING) { crm_trace("Adding actions %.16llx to input", with_actions); } if (data != NULL) { switch (cause) { case C_FSA_INTERNAL: case C_CRMD_STATUS_CALLBACK: case C_IPC_MESSAGE: case C_HA_MESSAGE: crm_trace("Copying %s data from %s as a HA msg", fsa_cause2string(cause), raised_from); CRM_CHECK(((ha_msg_input_t *) data)->msg != NULL, crm_err("Bogus data from %s", raised_from)); fsa_data->data = copy_ha_msg_input(data); fsa_data->data_type = fsa_dt_ha_msg; break; case C_LRM_OP_CALLBACK: crm_trace("Copying %s data from %s as lrmd_event_data_t", fsa_cause2string(cause), raised_from); fsa_data->data = lrmd_copy_event((lrmd_event_data_t *) data); fsa_data->data_type = fsa_dt_lrm; break; case C_CCM_CALLBACK: case C_SUBSYSTEM_CONNECT: case C_LRM_MONITOR_CALLBACK: case C_TIMER_POPPED: case C_SHUTDOWN: case C_HEARTBEAT_FAILED: case C_HA_DISCONNECT: case C_ILLEGAL: case C_UNKNOWN: case C_STARTUP: crm_err("Copying %s data (from %s)" " not yet implemented", fsa_cause2string(cause), raised_from); exit(1); break; } crm_trace("%s data copied", fsa_cause2string(fsa_data->fsa_cause)); } /* make sure to free it properly later */ if (prepend) { crm_trace("Prepending input"); fsa_message_queue = g_list_prepend(fsa_message_queue, fsa_data); } else { fsa_message_queue = g_list_append(fsa_message_queue, fsa_data); } crm_trace("Queue len: %d", g_list_length(fsa_message_queue)); fsa_dump_queue(LOG_DEBUG_2); if (old_len == g_list_length(fsa_message_queue)) { crm_err("Couldnt add message to the queue"); } if (fsa_source) { crm_trace("Triggering FSA: %s", __FUNCTION__); mainloop_set_trigger(fsa_source); } return last_data_id; } void fsa_dump_queue(int log_level) { int offset = 0; GListPtr lpc = NULL; for (lpc = fsa_message_queue; lpc != NULL; lpc = lpc->next) { fsa_data_t *data = (fsa_data_t *) lpc->data; do_crm_log_unlikely(log_level, "queue[%d(%d)]: input %s raised by %s()\t(cause=%s)", offset++, data->id, fsa_input2string(data->fsa_input), data->origin, fsa_cause2string(data->fsa_cause)); } } ha_msg_input_t * copy_ha_msg_input(ha_msg_input_t * orig) { ha_msg_input_t *copy = NULL; xmlNodePtr data = NULL; if (orig != NULL) { crm_trace("Copy msg"); data = copy_xml(orig->msg); } else { crm_trace("No message to copy"); } copy = new_ha_msg_input(data); if (orig && orig->msg != NULL) { CRM_CHECK(copy->msg != NULL, crm_err("copy failed")); } return copy; } void delete_fsa_input(fsa_data_t * fsa_data) { lrmd_event_data_t *op = NULL; xmlNode *foo = NULL; if (fsa_data == NULL) { return; } crm_trace("About to free %s data", fsa_cause2string(fsa_data->fsa_cause)); if (fsa_data->data != NULL) { switch (fsa_data->data_type) { case fsa_dt_ha_msg: delete_ha_msg_input(fsa_data->data); break; case fsa_dt_xml: foo = fsa_data->data; free_xml(foo); break; case fsa_dt_lrm: op = (lrmd_event_data_t *) fsa_data->data; lrmd_free_event(op); break; case fsa_dt_none: if (fsa_data->data != NULL) { crm_err("Dont know how to free %s data from %s", fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin); exit(1); } break; } crm_trace("%s data freed", fsa_cause2string(fsa_data->fsa_cause)); } free(fsa_data); } /* returns the next message */ fsa_data_t * get_message(void) { fsa_data_t *message = g_list_nth_data(fsa_message_queue, 0); fsa_message_queue = g_list_remove(fsa_message_queue, message); crm_trace("Processing input %d", message->id); return message; } /* returns the current head of the FIFO queue */ gboolean is_message(void) { return (g_list_length(fsa_message_queue) > 0); } void * fsa_typed_data_adv(fsa_data_t * fsa_data, enum fsa_data_type a_type, const char *caller) { void *ret_val = NULL; if (fsa_data == NULL) { crm_err("%s: No FSA data available", caller); } else if (fsa_data->data == NULL) { crm_err("%s: No message data available. Origin: %s", caller, fsa_data->origin); } else if (fsa_data->data_type != a_type) { crm_crit( "%s: Message data was the wrong type! %d vs. requested=%d." " Origin: %s", caller, fsa_data->data_type, a_type, fsa_data->origin); CRM_ASSERT(fsa_data->data_type == a_type); } else { ret_val = fsa_data->data; } return ret_val; } /* A_MSG_ROUTE */ void do_msg_route(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); route_message(msg_data->fsa_cause, input->msg); } void route_message(enum crmd_fsa_cause cause, xmlNode * input) { ha_msg_input_t fsa_input; enum crmd_fsa_input result = I_NULL; fsa_input.msg = input; CRM_CHECK(cause == C_IPC_MESSAGE || cause == C_HA_MESSAGE, return); /* try passing the buck first */ if (relay_message(input, cause == C_IPC_MESSAGE)) { return; } /* handle locally */ result = handle_message(input); /* done or process later? */ switch (result) { case I_NULL: case I_CIB_OP: case I_ROUTER: case I_NODE_JOIN: case I_JOIN_REQUEST: case I_JOIN_RESULT: break; default: /* Defering local processing of message */ register_fsa_input_later(cause, result, &fsa_input); return; } if (result != I_NULL) { /* add to the front of the queue */ register_fsa_input(cause, result, &fsa_input); } } gboolean relay_message(xmlNode * msg, gboolean originated_locally) { int dest = 1; int is_for_dc = 0; int is_for_dcib = 0; int is_for_te = 0; int is_for_crm = 0; int is_for_cib = 0; int is_local = 0; gboolean processing_complete = FALSE; const char *host_to = crm_element_value(msg, F_CRM_HOST_TO); const char *sys_to = crm_element_value(msg, F_CRM_SYS_TO); const char *sys_from = crm_element_value(msg, F_CRM_SYS_FROM); const char *type = crm_element_value(msg, F_TYPE); const char *msg_error = NULL; crm_trace("Routing message %s", crm_element_value(msg, XML_ATTR_REFERENCE)); if (msg == NULL) { msg_error = "Cannot route empty message"; } else if (safe_str_eq(CRM_OP_HELLO, crm_element_value(msg, F_CRM_TASK))) { /* quietly ignore */ processing_complete = TRUE; } else if (safe_str_neq(type, T_CRM)) { msg_error = "Bad message type"; } else if (sys_to == NULL) { msg_error = "Bad message destination: no subsystem"; } if (msg_error != NULL) { processing_complete = TRUE; crm_err("%s", msg_error); crm_log_xml_warn(msg, "bad msg"); } if (processing_complete) { return TRUE; } processing_complete = TRUE; is_for_dc = (strcasecmp(CRM_SYSTEM_DC, sys_to) == 0); is_for_dcib = (strcasecmp(CRM_SYSTEM_DCIB, sys_to) == 0); is_for_te = (strcasecmp(CRM_SYSTEM_TENGINE, sys_to) == 0); is_for_cib = (strcasecmp(CRM_SYSTEM_CIB, sys_to) == 0); is_for_crm = (strcasecmp(CRM_SYSTEM_CRMD, sys_to) == 0); is_local = 0; if (host_to == NULL || strlen(host_to) == 0) { if (is_for_dc || is_for_te) { is_local = 0; } else if (is_for_crm && originated_locally) { is_local = 0; } else { is_local = 1; } } else if (safe_str_eq(fsa_our_uname, host_to)) { is_local = 1; } if (is_for_dc || is_for_dcib || is_for_te) { if (AM_I_DC && is_for_te) { ROUTER_RESULT("Message result: Local relay"); send_msg_via_ipc(msg, sys_to); } else if (AM_I_DC) { ROUTER_RESULT("Message result: DC/CRMd process"); processing_complete = FALSE; /* more to be done by caller */ } else if (originated_locally && safe_str_neq(sys_from, CRM_SYSTEM_PENGINE) && safe_str_neq(sys_from, CRM_SYSTEM_TENGINE)) { /* Neither the TE or PE should be sending messages * to DC's on other nodes * * By definition, if we are no longer the DC, then * the PE or TE's data should be discarded */ #if SUPPORT_COROSYNC if (is_openais_cluster()) { dest = text2msg_type(sys_to); } #endif ROUTER_RESULT("Message result: External relay to DC"); send_cluster_message(host_to, dest, msg, TRUE); } else { /* discard */ ROUTER_RESULT("Message result: Discard, not DC"); } } else if (is_local && (is_for_crm || is_for_cib)) { ROUTER_RESULT("Message result: CRMd process"); processing_complete = FALSE; /* more to be done by caller */ } else if (is_local) { ROUTER_RESULT("Message result: Local relay"); send_msg_via_ipc(msg, sys_to); } else { #if SUPPORT_COROSYNC if (is_openais_cluster()) { dest = text2msg_type(sys_to); } #endif ROUTER_RESULT("Message result: External relay"); send_cluster_message(host_to, dest, msg, TRUE); } return processing_complete; } static gboolean process_hello_message(xmlNode * hello, char **uuid, char **client_name, char **major_version, char **minor_version) { const char *local_uuid; const char *local_client_name; const char *local_major_version; const char *local_minor_version; *uuid = NULL; *client_name = NULL; *major_version = NULL; *minor_version = NULL; if (hello == NULL) { return FALSE; } local_uuid = crm_element_value(hello, "client_uuid"); local_client_name = crm_element_value(hello, "client_name"); local_major_version = crm_element_value(hello, "major_version"); local_minor_version = crm_element_value(hello, "minor_version"); if (local_uuid == NULL || strlen(local_uuid) == 0) { crm_err("Hello message was not valid (field %s not found)", "uuid"); return FALSE; } else if (local_client_name == NULL || strlen(local_client_name) == 0) { crm_err("Hello message was not valid (field %s not found)", "client name"); return FALSE; } else if (local_major_version == NULL || strlen(local_major_version) == 0) { crm_err("Hello message was not valid (field %s not found)", "major version"); return FALSE; } else if (local_minor_version == NULL || strlen(local_minor_version) == 0) { crm_err("Hello message was not valid (field %s not found)", "minor version"); return FALSE; } *uuid = strdup(local_uuid); *client_name = strdup(local_client_name); *major_version = strdup(local_major_version); *minor_version = strdup(local_minor_version); crm_trace("Hello message ok"); return TRUE; } gboolean crmd_authorize_message(xmlNode * client_msg, crmd_client_t * curr_client) { /* check the best case first */ const char *sys_from = crm_element_value(client_msg, F_CRM_SYS_FROM); char *uuid = NULL; char *client_name = NULL; char *major_version = NULL; char *minor_version = NULL; const char *filtered_from; gpointer table_key = NULL; gboolean auth_result = FALSE; gboolean can_reply = FALSE; /* no-one has registered with this id */ xmlNode *xml = NULL; const char *op = crm_element_value(client_msg, F_CRM_TASK); if (safe_str_neq(CRM_OP_HELLO, op)) { if (sys_from == NULL) { crm_warn("Message [%s] was had no value for %s... discarding", crm_element_value(client_msg, XML_ATTR_REFERENCE), F_CRM_SYS_FROM); return FALSE; } filtered_from = sys_from; /* The CIB can have two names on the DC */ if (strcasecmp(sys_from, CRM_SYSTEM_DCIB) == 0) filtered_from = CRM_SYSTEM_CIB; if (g_hash_table_lookup(ipc_clients, filtered_from) != NULL) { can_reply = TRUE; /* reply can be routed */ } crm_trace("Message reply can%s be routed from %s.", can_reply ? "" : " not", sys_from); if (can_reply == FALSE) { crm_warn("Message [%s] not authorized", crm_element_value(client_msg, XML_ATTR_REFERENCE)); } return can_reply; } crm_trace("received client join msg"); crm_log_xml_trace(client_msg, "join"); xml = get_message_xml(client_msg, F_CRM_DATA); auth_result = process_hello_message(xml, &uuid, &client_name, &major_version, &minor_version); if (auth_result == TRUE) { if (client_name == NULL || uuid == NULL) { crm_err("Bad client details (client_name=%s, uuid=%s)", crm_str(client_name), crm_str(uuid)); auth_result = FALSE; } } if (auth_result == TRUE) { /* check version */ int mav = atoi(major_version); int miv = atoi(minor_version); crm_trace("Checking client version number"); if (mav < 0 || miv < 0) { crm_err("Client version (%d:%d) is not acceptable", mav, miv); auth_result = FALSE; } } table_key = (gpointer) generate_hash_key(client_name, uuid); if (auth_result == TRUE) { xmlNode *xml = NULL; crm_trace("Accepted client %s", crm_str(table_key)); curr_client->table_key = table_key; curr_client->sub_sys = strdup(client_name); curr_client->uuid = strdup(uuid); g_hash_table_insert(ipc_clients, table_key, curr_client->ipc); xml = create_hello_message("n/a", CRM_SYSTEM_CRMD, "0", "1"); crm_ipcs_send(curr_client->ipc, xml, FALSE); free_xml(xml); crm_trace("Updated client list with %s", crm_str(table_key)); crm_trace("Triggering FSA: %s", __FUNCTION__); mainloop_set_trigger(fsa_source); } else { free(table_key); crm_warn("Rejected client logon request"); qb_ipcs_disconnect(curr_client->ipc); } free(uuid); free(minor_version); free(major_version); free(client_name); /* hello messages should never be processed further */ return FALSE; } enum crmd_fsa_input handle_message(xmlNode * msg) { const char *type = NULL; CRM_CHECK(msg != NULL, return I_NULL); type = crm_element_value(msg, F_CRM_MSG_TYPE); if (crm_str_eq(type, XML_ATTR_REQUEST, TRUE)) { return handle_request(msg); } else if (crm_str_eq(type, XML_ATTR_RESPONSE, TRUE)) { handle_response(msg); return I_NULL; } crm_err("Unknown message type: %s", type); return I_NULL; } static enum crmd_fsa_input handle_failcount_op(xmlNode * stored_msg) { const char *rsc = NULL; xmlNode *xml_rsc = get_xpath_object("//" XML_CIB_TAG_RESOURCE, stored_msg, LOG_ERR); if (xml_rsc) { rsc = ID(xml_rsc); } if (rsc) { char *attr = NULL; crm_info("Removing failcount for %s", rsc); attr = crm_concat("fail-count", rsc, '-'); update_attrd(NULL, attr, NULL, NULL); free(attr); attr = crm_concat("last-failure", rsc, '-'); update_attrd(NULL, attr, NULL, NULL); free(attr); lrm_clear_last_failure(rsc); } else { crm_log_xml_warn(stored_msg, "invalid failcount op"); } return I_NULL; } enum crmd_fsa_input handle_request(xmlNode * stored_msg) { xmlNode *msg = NULL; const char *op = crm_element_value(stored_msg, F_CRM_TASK); /* Optimize this for the DC - it has the most to do */ if (op == NULL) { crm_log_xml_err(stored_msg, "Bad message"); return I_NULL; } /*========== DC-Only Actions ==========*/ if (AM_I_DC) { if (strcmp(op, CRM_OP_JOIN_ANNOUNCE) == 0) { return I_NODE_JOIN; } else if (strcmp(op, CRM_OP_JOIN_REQUEST) == 0) { return I_JOIN_REQUEST; } else if (strcmp(op, CRM_OP_JOIN_CONFIRM) == 0) { return I_JOIN_RESULT; } else if (strcmp(op, CRM_OP_SHUTDOWN) == 0) { const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM); gboolean dc_match = safe_str_eq(host_from, fsa_our_dc); if (is_set(fsa_input_register, R_SHUTDOWN)) { crm_info("Shutting ourselves down (DC)"); return I_STOP; } else if (dc_match) { crm_err("We didnt ask to be shut down, yet our" " TE is telling us too." " Better get out now!"); return I_TERMINATE; } else if (fsa_state != S_STOPPING) { crm_err("Another node is asking us to shutdown" " but we think we're ok."); return I_ELECTION; } } else if (strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0) { /* a slave wants to shut down */ /* create cib fragment and add to message */ return handle_shutdown_request(stored_msg); } } /*========== common actions ==========*/ if (strcmp(op, CRM_OP_NOVOTE) == 0) { ha_msg_input_t fsa_input; fsa_input.msg = stored_msg; register_fsa_input_adv(C_HA_MESSAGE, I_NULL, &fsa_input, A_ELECTION_COUNT | A_ELECTION_CHECK, FALSE, __FUNCTION__); } else if (strcmp(op, CRM_OP_CLEAR_FAILCOUNT) == 0) { return handle_failcount_op(stored_msg); } else if (strcmp(op, CRM_OP_VOTE) == 0) { /* count the vote and decide what to do after that */ ha_msg_input_t fsa_input; fsa_input.msg = stored_msg; register_fsa_input_adv(C_HA_MESSAGE, I_NULL, &fsa_input, A_ELECTION_COUNT | A_ELECTION_CHECK, FALSE, __FUNCTION__); /* Sometimes we _must_ go into S_ELECTION */ if (fsa_state == S_HALT) { crm_debug("Forcing an election from S_HALT"); return I_ELECTION; #if 0 } else if (AM_I_DC) { /* This is the old way of doing things but what is gained? */ return I_ELECTION; #endif } } else if (strcmp(op, CRM_OP_JOIN_OFFER) == 0) { crm_debug("Raising I_JOIN_OFFER: join-%s", crm_element_value(stored_msg, F_CRM_JOIN_ID)); return I_JOIN_OFFER; } else if (strcmp(op, CRM_OP_JOIN_ACKNAK) == 0) { crm_debug("Raising I_JOIN_RESULT: join-%s", crm_element_value(stored_msg, F_CRM_JOIN_ID)); return I_JOIN_RESULT; } else if (strcmp(op, CRM_OP_LRM_DELETE) == 0 || strcmp(op, CRM_OP_LRM_FAIL) == 0 || strcmp(op, CRM_OP_LRM_REFRESH) == 0 || strcmp(op, CRM_OP_REPROBE) == 0) { crm_xml_add(stored_msg, F_CRM_SYS_TO, CRM_SYSTEM_LRMD); return I_ROUTER; } else if (strcmp(op, CRM_OP_NOOP) == 0) { return I_NULL; } else if (strcmp(op, CRM_OP_LOCAL_SHUTDOWN) == 0) { crm_shutdown(SIGTERM); /*return I_SHUTDOWN; */ return I_NULL; /*========== (NOT_DC)-Only Actions ==========*/ } else if (AM_I_DC == FALSE && strcmp(op, CRM_OP_SHUTDOWN) == 0) { const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM); gboolean dc_match = safe_str_eq(host_from, fsa_our_dc); if (dc_match || fsa_our_dc == NULL) { if (is_set(fsa_input_register, R_SHUTDOWN) == FALSE) { crm_err("We didn't ask to be shut down, yet our" " DC is telling us too."); - set_bit_inplace(fsa_input_register, R_STAYDOWN); + set_bit(fsa_input_register, R_STAYDOWN); return I_STOP; } crm_info("Shutting down"); return I_STOP; } else { crm_warn("Discarding %s op from %s", op, host_from); } } else if (strcmp(op, CRM_OP_PING) == 0) { /* eventually do some stuff to figure out * if we /are/ ok */ const char *sys_to = crm_element_value(stored_msg, F_CRM_SYS_TO); xmlNode *ping = create_xml_node(NULL, XML_CRM_TAG_PING); crm_xml_add(ping, XML_PING_ATTR_STATUS, "ok"); crm_xml_add(ping, XML_PING_ATTR_SYSFROM, sys_to); crm_xml_add(ping, "crmd_state", fsa_state2string(fsa_state)); /* Ok, so technically not so interesting, but CTS needs to see this */ crm_notice("Current ping state: %s", fsa_state2string(fsa_state)); msg = create_reply(stored_msg, ping); relay_message(msg, TRUE); free_xml(ping); free_xml(msg); } else if (strcmp(op, CRM_OP_RM_NODE_CACHE) == 0) { xmlNode *options = get_xpath_object("//"XML_TAG_OPTIONS, stored_msg, LOG_ERR); int id = 0; if (options) { crm_element_value_int(options, XML_ATTR_ID, &id); } if (id) { reap_crm_member(id); } } else { crm_err("Unexpected request (%s) sent to %s", op, AM_I_DC ? "the DC" : "non-DC node"); crm_log_xml_err(stored_msg, "Unexpected"); } return I_NULL; } void handle_response(xmlNode * stored_msg) { const char *op = crm_element_value(stored_msg, F_CRM_TASK); if (op == NULL) { crm_log_xml_err(stored_msg, "Bad message"); } else if (AM_I_DC && strcmp(op, CRM_OP_PECALC) == 0) { /* Check if the PE answer been superceeded by a subsequent request? */ const char *msg_ref = crm_element_value(stored_msg, XML_ATTR_REFERENCE); if (msg_ref == NULL) { crm_err("%s - Ignoring calculation with no reference", op); } else if (safe_str_eq(msg_ref, fsa_pe_ref)) { ha_msg_input_t fsa_input; fsa_input.msg = stored_msg; register_fsa_input_later(C_IPC_MESSAGE, I_PE_SUCCESS, &fsa_input); crm_trace("Completed: %s...", fsa_pe_ref); } else { crm_info("%s calculation %s is obsolete", op, msg_ref); } } else if (strcmp(op, CRM_OP_VOTE) == 0 || strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0 || strcmp(op, CRM_OP_SHUTDOWN) == 0) { } else { const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM); crm_err("Unexpected response (op=%s, src=%s) sent to the %s", op, host_from, AM_I_DC ? "DC" : "CRMd"); } } enum crmd_fsa_input handle_shutdown_request(xmlNode * stored_msg) { /* handle here to avoid potential version issues * where the shutdown message/proceedure may have * been changed in later versions. * * This way the DC is always in control of the shutdown */ char *now_s = NULL; time_t now = time(NULL); xmlNode *node_state = NULL; const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM); if (host_from == NULL) { /* we're shutting down and the DC */ host_from = fsa_our_uname; } crm_info("Creating shutdown request for %s (state=%s)", host_from, fsa_state2string(fsa_state)); crm_log_xml_trace(stored_msg, "message"); node_state = create_node_state(host_from, NULL, NULL, NULL, NULL, CRMD_STATE_INACTIVE, FALSE, __FUNCTION__); fsa_cib_anon_update(XML_CIB_TAG_STATUS, node_state, cib_quorum_override); crm_log_xml_trace(node_state, "Shutdown update"); free_xml(node_state); now_s = crm_itoa(now); update_attrd(host_from, XML_CIB_ATTR_SHUTDOWN, now_s, NULL); free(now_s); /* will be picked up by the TE as long as its running */ return I_NULL; } /* msg is deleted by the time this returns */ extern gboolean process_te_message(xmlNode * msg, xmlNode * xml_data); gboolean send_msg_via_ipc(xmlNode * msg, const char *sys) { gboolean send_ok = TRUE; qb_ipcs_connection_t *client_channel; client_channel = (qb_ipcs_connection_t *) g_hash_table_lookup(ipc_clients, sys); if (crm_element_value(msg, F_CRM_HOST_FROM) == NULL) { crm_xml_add(msg, F_CRM_HOST_FROM, fsa_our_uname); } if (client_channel != NULL) { /* Transient clients such as crmadmin */ send_ok = crm_ipcs_send(client_channel, msg, TRUE); } else if (sys != NULL && strcmp(sys, CRM_SYSTEM_TENGINE) == 0) { xmlNode *data = get_message_xml(msg, F_CRM_DATA); process_te_message(msg, data); } else if (sys != NULL && strcmp(sys, CRM_SYSTEM_LRMD) == 0) { fsa_data_t fsa_data; ha_msg_input_t fsa_input; fsa_input.msg = msg; fsa_input.xml = get_message_xml(msg, F_CRM_DATA); fsa_data.id = 0; fsa_data.actions = 0; fsa_data.data = &fsa_input; fsa_data.fsa_input = I_MESSAGE; fsa_data.fsa_cause = C_IPC_MESSAGE; fsa_data.origin = __FUNCTION__; fsa_data.data_type = fsa_dt_ha_msg; #ifdef FSA_TRACE crm_trace("Invoking action A_LRM_INVOKE (%.16llx)", A_LRM_INVOKE); #endif do_lrm_invoke(A_LRM_INVOKE, C_IPC_MESSAGE, fsa_state, I_MESSAGE, &fsa_data); } else { crm_err("Unknown Sub-system (%s)... discarding message.", crm_str(sys)); send_ok = FALSE; } return send_ok; } ha_msg_input_t * new_ha_msg_input(xmlNode * orig) { ha_msg_input_t *input_copy = NULL; input_copy = calloc(1, sizeof(ha_msg_input_t)); input_copy->msg = orig; input_copy->xml = get_message_xml(input_copy->msg, F_CRM_DATA); return input_copy; } void delete_ha_msg_input(ha_msg_input_t * orig) { if (orig == NULL) { return; } free_xml(orig->msg); free(orig); } diff --git a/crmd/pengine.c b/crmd/pengine.c index 98f523df19..b866898c14 100644 --- a/crmd/pengine.c +++ b/crmd/pengine.c @@ -1,291 +1,291 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include /* for access */ #include /* for calls to open */ #include /* for calls to open */ #include /* for calls to open */ #include /* for getpwuid */ #include /* for initgroups */ #include /* for getrlimit */ #include /* for getrlimit */ #include #include #include #include #include #include #include #include struct crm_subsystem_s *pe_subsystem = NULL; void do_pe_invoke_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data); static void save_cib_contents(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { char *id = user_data; register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __FUNCTION__); CRM_CHECK(id != NULL, return); if (rc == pcmk_ok) { int len = 15; char *filename = NULL; len += strlen(id); len += strlen(PE_STATE_DIR); filename = calloc(1, len); CRM_CHECK(filename != NULL, return); sprintf(filename, PE_STATE_DIR "/pe-core-%s.bz2", id); if (write_xml_file(output, filename, TRUE) < 0) { crm_err("Could not save CIB contents after PE crash to %s", filename); } else { crm_notice("Saved CIB contents after PE crash to %s", filename); } free(filename); } free(id); } static void pe_ipc_destroy(gpointer user_data) { - clear_bit_inplace(fsa_input_register, pe_subsystem->flag_connected); + clear_bit(fsa_input_register, pe_subsystem->flag_connected); if (is_set(fsa_input_register, pe_subsystem->flag_required)) { int rc = pcmk_ok; char *uuid_str = crm_generate_uuid(); crm_crit("Connection to the Policy Engine failed (pid=%d, uuid=%s)", pe_subsystem->pid, uuid_str); /* *The PE died... * * Save the current CIB so that we have a chance of * figuring out what killed it. * * Delay raising the I_ERROR until the query below completes or * 5s is up, whichever comes first. * */ rc = fsa_cib_conn->cmds->query(fsa_cib_conn, NULL, NULL, cib_scope_local); fsa_cib_conn->cmds->register_callback(fsa_cib_conn, rc, 5, FALSE, uuid_str, "save_cib_contents", save_cib_contents); } else { crm_info("Connection to the Policy Engine released"); } pe_subsystem->pid = -1; pe_subsystem->source = NULL; pe_subsystem->client = NULL; mainloop_set_trigger(fsa_source); return; } static int pe_ipc_dispatch(const char *buffer, ssize_t length, gpointer userdata) { xmlNode *msg = string2xml(buffer); if (msg) { route_message(C_IPC_MESSAGE, msg); } free_xml(msg); return 0; } /* A_PE_START, A_PE_STOP, A_TE_RESTART */ void do_pe_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { struct crm_subsystem_s *this_subsys = pe_subsystem; long long stop_actions = A_PE_STOP; long long start_actions = A_PE_START; static struct ipc_client_callbacks pe_callbacks = { .dispatch = pe_ipc_dispatch, .destroy = pe_ipc_destroy }; if (action & stop_actions) { - clear_bit_inplace(fsa_input_register, pe_subsystem->flag_required); + clear_bit(fsa_input_register, pe_subsystem->flag_required); mainloop_del_ipc_client(pe_subsystem->source); pe_subsystem->source = NULL; - clear_bit_inplace(fsa_input_register, pe_subsystem->flag_connected); + clear_bit(fsa_input_register, pe_subsystem->flag_connected); } if ((action & start_actions) && (is_set(fsa_input_register, R_PE_CONNECTED) == FALSE)) { if (cur_state != S_STOPPING) { - set_bit_inplace(fsa_input_register, pe_subsystem->flag_required); + set_bit(fsa_input_register, pe_subsystem->flag_required); pe_subsystem->source = mainloop_add_ipc_client(CRM_SYSTEM_PENGINE, 5*1024*1024/* 5Mb */, NULL, &pe_callbacks); if (pe_subsystem->source == NULL) { crm_warn("Setup of client connection failed, not adding channel to mainloop"); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); return; } /* if (is_openais_cluster()) { */ /* pe_subsystem->pid = pe_subsystem->ipc->farside_pid; */ /* } */ - set_bit_inplace(fsa_input_register, pe_subsystem->flag_connected); + set_bit(fsa_input_register, pe_subsystem->flag_connected); } else { crm_info("Ignoring request to start %s while shutting down", this_subsys->name); } } } int fsa_pe_query = 0; char *fsa_pe_ref = NULL; /* A_PE_INVOKE */ void do_pe_invoke(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { if (AM_I_DC == FALSE) { crm_err("Not DC: No need to invoke the PE (anymore): %s", fsa_action2string(action)); return; } if (is_set(fsa_input_register, R_PE_CONNECTED) == FALSE) { if (is_set(fsa_input_register, R_SHUTDOWN)) { crm_err("Cannot shut down gracefully without the PE"); register_fsa_input_before(C_FSA_INTERNAL, I_TERMINATE, NULL); } else { crm_info("Waiting for the PE to connect"); crmd_fsa_stall(NULL); register_fsa_action(A_PE_START); } return; } if (is_set(fsa_input_register, R_HAVE_CIB) == FALSE) { crm_err("Attempted to invoke the PE without a consistent" " copy of the CIB!"); /* start the join from scratch */ register_fsa_input_before(C_FSA_INTERNAL, I_ELECTION, NULL); return; } fsa_pe_query = fsa_cib_conn->cmds->query(fsa_cib_conn, NULL, NULL, cib_scope_local); crm_debug("Query %d: Requesting the current CIB: %s", fsa_pe_query, fsa_state2string(fsa_state)); /* Make sure any queued calculations are discarded */ free(fsa_pe_ref); fsa_pe_ref = NULL; fsa_cib_conn->cmds->register_callback(fsa_cib_conn, fsa_pe_query, 60, FALSE, NULL, "do_pe_invoke_callback", do_pe_invoke_callback); } void do_pe_invoke_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { int sent; xmlNode *cmd = NULL; if (rc != pcmk_ok) { crm_err("Cant retrieve the CIB: %s (call %d)", pcmk_strerror(rc), call_id); register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __FUNCTION__); return; } else if (call_id != fsa_pe_query) { crm_trace("Skipping superceeded CIB query: %d (current=%d)", call_id, fsa_pe_query); return; } else if (AM_I_DC == FALSE || is_set(fsa_input_register, R_PE_CONNECTED) == FALSE) { crm_debug("No need to invoke the PE anymore"); return; } else if (fsa_state != S_POLICY_ENGINE) { crm_debug("Discarding PE request in state: %s", fsa_state2string(fsa_state)); return; } else if (last_peer_update != 0) { crm_debug("Re-asking for the CIB: peer update %d still pending", last_peer_update); sleep(1); register_fsa_action(A_PE_INVOKE); return; } else if (fsa_state != S_POLICY_ENGINE) { crm_err("Invoking PE in state: %s", fsa_state2string(fsa_state)); return; } CRM_LOG_ASSERT(output != NULL); crm_xml_add(output, XML_ATTR_DC_UUID, fsa_our_uuid); crm_xml_add_int(output, XML_ATTR_HAVE_QUORUM, fsa_has_quorum); if (ever_had_quorum && crm_have_quorum == FALSE) { crm_xml_add_int(output, XML_ATTR_QUORUM_PANIC, 1); } cmd = create_request(CRM_OP_PECALC, output, NULL, CRM_SYSTEM_PENGINE, CRM_SYSTEM_DC, NULL); free(fsa_pe_ref); fsa_pe_ref = crm_element_value_copy(cmd, XML_ATTR_REFERENCE); sent = crm_ipc_send(mainloop_get_ipc_client(pe_subsystem->source), cmd, NULL, 0); if (sent <= 0) { crm_err("Could not contact the pengine: %d", sent); register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __FUNCTION__); } crm_debug("Invoking the PE: query=%d, ref=%s, seq=%llu, quorate=%d", fsa_pe_query, fsa_pe_ref, crm_peer_seq, fsa_has_quorum); free_xml(cmd); } diff --git a/crmd/subsystems.c b/crmd/subsystems.c index d1906c04b8..2f544cea17 100644 --- a/crmd/subsystems.c +++ b/crmd/subsystems.c @@ -1,186 +1,186 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include /* for access */ #include /* for calls to open */ #include /* for calls to open */ #include /* for calls to open */ #include /* for getpwuid */ #include /* for initgroups */ #include #include #include /* for getrlimit */ #include #include #include /* for getrlimit */ #include #include #include #include #include #include #include #include #include static void crmdManagedChildDied(GPid pid, gint status, gpointer user_data) { struct crm_subsystem_s *the_subsystem = user_data; if(WIFSIGNALED(status)) { int signo = WTERMSIG(status); int core = WCOREDUMP(status); crm_notice("Child process %s terminated with signal %d (pid=%d, core=%d)", the_subsystem->name, signo, the_subsystem->pid, core); } else if(WIFEXITED(status)) { int exitcode = WEXITSTATUS(status); do_crm_log(exitcode == 0 ? LOG_INFO : LOG_ERR, "Child process %s exited (pid=%d, rc=%d)", the_subsystem->name, the_subsystem->pid, exitcode); } else { crm_err("Process %s:[%d] exited?", the_subsystem->name, the_subsystem->pid); } } gboolean stop_subsystem(struct crm_subsystem_s *the_subsystem, gboolean force_quit) { int quit_signal = SIGTERM; crm_trace("Stopping sub-system \"%s\"", the_subsystem->name); - clear_bit_inplace(fsa_input_register, the_subsystem->flag_required); + clear_bit(fsa_input_register, the_subsystem->flag_required); if (the_subsystem->pid <= 0) { crm_trace("Client %s not running", the_subsystem->name); return FALSE; } if (is_set(fsa_input_register, the_subsystem->flag_connected) == FALSE) { /* running but not yet connected */ crm_debug("Stopping %s before it had connected", the_subsystem->name); } /* if(force_quit && the_subsystem->sent_kill == FALSE) { quit_signal = SIGKILL; } else if(force_quit) { crm_debug("Already sent -KILL to %s: [%d]", the_subsystem->name, the_subsystem->pid); } */ errno = 0; if (kill(the_subsystem->pid, quit_signal) == 0) { crm_info("Sent -TERM to %s: [%d]", the_subsystem->name, the_subsystem->pid); the_subsystem->sent_kill = TRUE; } else { crm_perror(LOG_ERR, "Sent -TERM to %s: [%d]", the_subsystem->name, the_subsystem->pid); } return TRUE; } gboolean start_subsystem(struct crm_subsystem_s * the_subsystem) { pid_t pid; struct stat buf; int s_res; unsigned int j; struct rlimit oflimits; const char *devnull = "/dev/null"; crm_info("Starting sub-system \"%s\"", the_subsystem->name); if (the_subsystem->pid > 0) { crm_warn("Client %s already running as pid %d", the_subsystem->name, (int)the_subsystem->pid); /* starting a started X is not an error */ return TRUE; } /* * We want to ensure that the exec will succeed before * we bother forking. */ if (access(the_subsystem->path, F_OK | X_OK) != 0) { crm_perror(LOG_ERR, "Cannot (access) exec %s", the_subsystem->path); return FALSE; } s_res = stat(the_subsystem->command, &buf); if (s_res != 0) { crm_perror(LOG_ERR, "Cannot (stat) exec %s", the_subsystem->command); return FALSE; } /* We need to fork so we can make child procs not real time */ switch (pid = fork()) { case -1: crm_err("Cannot fork."); return FALSE; default: /* Parent */ g_child_watch_add(pid, crmdManagedChildDied, the_subsystem); crm_trace("Client %s is has pid: %d", the_subsystem->name, pid); the_subsystem->pid = pid; return TRUE; case 0: /* Child */ /* create a new process group to avoid * being interupted by heartbeat */ setpgid(0, 0); break; } crm_debug("Executing \"%s (%s)\" (pid %d)", the_subsystem->command, the_subsystem->name, (int)getpid()); /* A precautionary measure */ getrlimit(RLIMIT_NOFILE, &oflimits); for (j = 0; j < oflimits.rlim_cur; ++j) { close(j); } (void)open(devnull, O_RDONLY); /* Stdin: fd 0 */ (void)open(devnull, O_WRONLY); /* Stdout: fd 1 */ (void)open(devnull, O_WRONLY); /* Stderr: fd 2 */ { char *opts[] = { strdup(the_subsystem->command), NULL }; (void)execvp(the_subsystem->command, opts); } /* Should not happen */ crm_perror(LOG_ERR, "FATAL: Cannot exec %s", the_subsystem->command); exit(100); /* Suppress respawning */ return TRUE; /* never reached */ } diff --git a/crmd/te_actions.c b/crmd/te_actions.c index 633a78998e..9e3abf63db 100644 --- a/crmd/te_actions.c +++ b/crmd/te_actions.c @@ -1,517 +1,517 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include char *te_uuid = NULL; void send_rsc_command(crm_action_t * action); static void te_start_action_timer(crm_graph_t * graph, crm_action_t * action) { action->timer = calloc(1, sizeof(crm_action_timer_t)); action->timer->timeout = action->timeout; action->timer->reason = timeout_action; action->timer->action = action; action->timer->source_id = g_timeout_add(action->timer->timeout + graph->network_delay, action_timer_callback, (void *)action->timer); CRM_ASSERT(action->timer->source_id != 0); } static gboolean te_pseudo_action(crm_graph_t * graph, crm_action_t * pseudo) { crm_debug("Pseudo action %d fired and confirmed", pseudo->id); pseudo->confirmed = TRUE; update_graph(graph, pseudo); trigger_graph(); return TRUE; } void send_stonith_update(crm_action_t * action, const char *target, const char *uuid) { int rc = pcmk_ok; /* zero out the node-status & remove all LRM status info */ xmlNode *node_state = create_xml_node(NULL, XML_CIB_TAG_STATE); CRM_CHECK(target != NULL, return); CRM_CHECK(uuid != NULL, return); crm_xml_add(node_state, XML_ATTR_UUID, uuid); crm_xml_add(node_state, XML_ATTR_UNAME, target); crm_xml_add(node_state, XML_CIB_ATTR_HASTATE, DEADSTATUS); crm_xml_add(node_state, XML_CIB_ATTR_INCCM, XML_BOOLEAN_NO); crm_xml_add(node_state, XML_CIB_ATTR_CRMDSTATE, OFFLINESTATUS); crm_xml_add(node_state, XML_CIB_ATTR_JOINSTATE, CRMD_JOINSTATE_DOWN); crm_xml_add(node_state, XML_CIB_ATTR_EXPSTATE, CRMD_JOINSTATE_DOWN); crm_xml_add(node_state, XML_ATTR_ORIGIN, __FUNCTION__); rc = fsa_cib_conn->cmds->update(fsa_cib_conn, XML_CIB_TAG_STATUS, node_state, cib_quorum_override | cib_scope_local | cib_can_create); /* Delay processing the trigger until the update completes */ crm_debug("Sending fencing update %d for %s", rc, target); add_cib_op_callback(fsa_cib_conn, rc, FALSE, strdup(target), cib_fencing_updated); /* Make sure it sticks */ /* fsa_cib_conn->cmds->bump_epoch(fsa_cib_conn, cib_quorum_override|cib_scope_local); */ erase_status_tag(target, XML_CIB_TAG_LRM, cib_scope_local); erase_status_tag(target, XML_TAG_TRANSIENT_NODEATTRS, cib_scope_local); free_xml(node_state); /* Make sure the membership cache is accurate */ crm_update_peer(__FUNCTION__, 0, 0, 0, -1, crm_proc_none, uuid, target, NULL, CRM_NODE_LOST); return; } static gboolean te_fence_node(crm_graph_t * graph, crm_action_t * action) { int rc = 0; const char *id = NULL; const char *uuid = NULL; const char *target = NULL; const char *type = NULL; gboolean invalid_action = FALSE; enum stonith_call_options options = st_opt_none; id = ID(action->xml); target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID); type = crm_meta_value(action->params, "stonith_action"); CRM_CHECK(id != NULL, invalid_action = TRUE); CRM_CHECK(uuid != NULL, invalid_action = TRUE); CRM_CHECK(type != NULL, invalid_action = TRUE); CRM_CHECK(target != NULL, invalid_action = TRUE); if (invalid_action) { crm_log_xml_warn(action->xml, "BadAction"); return FALSE; } crm_notice( "Executing %s fencing operation (%s) on %s (timeout=%d)", type, id, target, transition_graph->stonith_timeout); /* Passing NULL means block until we can connect... */ te_connect_stonith(NULL); if (confirmed_nodes && g_hash_table_size(confirmed_nodes) == 1) { options |= st_opt_allow_suicide; } rc = stonith_api->cmds->fence(stonith_api, options, target, type, transition_graph->stonith_timeout / 1000); stonith_api->cmds->register_callback(stonith_api, rc, transition_graph->stonith_timeout / 1000, FALSE, generate_transition_key(transition_graph->id, action->id, 0, te_uuid), "tengine_stonith_callback", tengine_stonith_callback); return TRUE; } static int get_target_rc(crm_action_t * action) { const char *target_rc_s = crm_meta_value(action->params, XML_ATTR_TE_TARGET_RC); if (target_rc_s != NULL) { return crm_parse_int(target_rc_s, "0"); } return 0; } static gboolean te_crm_command(crm_graph_t * graph, crm_action_t * action) { char *counter = NULL; xmlNode *cmd = NULL; gboolean is_local = FALSE; const char *id = NULL; const char *task = NULL; const char *value = NULL; const char *on_node = NULL; gboolean rc = TRUE; gboolean no_wait = FALSE; id = ID(action->xml); task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); CRM_CHECK(on_node != NULL && strlen(on_node) != 0, crm_err( "Corrupted command (id=%s) %s: no node", crm_str(id), crm_str(task)); return FALSE); crm_info( "Executing crm-event (%s): %s on %s%s%s", crm_str(id), crm_str(task), on_node, is_local ? " (local)" : "", no_wait ? " - no waiting" : ""); if (safe_str_eq(on_node, fsa_our_uname)) { is_local = TRUE; } value = crm_meta_value(action->params, XML_ATTR_TE_NOWAIT); if (crm_is_true(value)) { no_wait = TRUE; } if (is_local && safe_str_eq(task, CRM_OP_SHUTDOWN)) { /* defer until everything else completes */ crm_info( "crm-event (%s) is a local shutdown", crm_str(id)); graph->completion_action = tg_shutdown; graph->abort_reason = "local shutdown"; action->confirmed = TRUE; update_graph(graph, action); trigger_graph(); return TRUE; } cmd = create_request(task, action->xml, on_node, CRM_SYSTEM_CRMD, CRM_SYSTEM_TENGINE, NULL); counter = generate_transition_key(transition_graph->id, action->id, get_target_rc(action), te_uuid); crm_xml_add(cmd, XML_ATTR_TRANSITION_KEY, counter); rc = send_cluster_message(on_node, crm_msg_crmd, cmd, TRUE); free(counter); free_xml(cmd); if (rc == FALSE) { crm_err("Action %d failed: send", action->id); return FALSE; } else if (no_wait) { action->confirmed = TRUE; update_graph(graph, action); trigger_graph(); } else { if (action->timeout <= 0) { crm_err("Action %d: %s on %s had an invalid timeout (%dms). Using %dms instead", action->id, task, on_node, action->timeout, graph->network_delay); action->timeout = graph->network_delay; } te_start_action_timer(graph, action); } return TRUE; } gboolean cib_action_update(crm_action_t * action, int status, int op_rc) { lrmd_event_data_t *op = NULL; xmlNode *state = NULL; xmlNode *rsc = NULL; xmlNode *xml_op = NULL; xmlNode *action_rsc = NULL; int rc = pcmk_ok; const char *name = NULL; const char *value = NULL; const char *rsc_id = NULL; const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); const char *task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY); const char *target_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID); int call_options = cib_quorum_override | cib_scope_local; int target_rc = get_target_rc(action); if (status == PCMK_LRM_OP_PENDING) { crm_debug("%s %d: Recording pending operation %s on %s", crm_element_name(action->xml), action->id, task_uuid, target); } else { crm_warn("%s %d: %s on %s timed out", crm_element_name(action->xml), action->id, task_uuid, target); } action_rsc = find_xml_node(action->xml, XML_CIB_TAG_RESOURCE, TRUE); if (action_rsc == NULL) { return FALSE; } rsc_id = ID(action_rsc); CRM_CHECK(rsc_id != NULL, crm_log_xml_err(action->xml, "Bad:action"); return FALSE); /* update the CIB */ state = create_xml_node(NULL, XML_CIB_TAG_STATE); crm_xml_add(state, XML_ATTR_UUID, target_uuid); crm_xml_add(state, XML_ATTR_UNAME, target); rsc = create_xml_node(state, XML_CIB_TAG_LRM); crm_xml_add(rsc, XML_ATTR_ID, target_uuid); rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCES); rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCE); crm_xml_add(rsc, XML_ATTR_ID, rsc_id); name = XML_ATTR_TYPE; value = crm_element_value(action_rsc, name); crm_xml_add(rsc, name, value); name = XML_AGENT_ATTR_CLASS; value = crm_element_value(action_rsc, name); crm_xml_add(rsc, name, value); name = XML_AGENT_ATTR_PROVIDER; value = crm_element_value(action_rsc, name); crm_xml_add(rsc, name, value); op = convert_graph_action(NULL, action, status, op_rc); op->call_id = -1; op->user_data = generate_transition_key(transition_graph->id, action->id, target_rc, te_uuid); xml_op = create_operation_update(rsc, op, CRM_FEATURE_SET, target_rc, __FUNCTION__, LOG_INFO); lrmd_free_event(op); crm_trace("Updating CIB with \"%s\" (%s): %s %s on %s", status < 0 ? "new action" : XML_ATTR_TIMEOUT, crm_element_name(action->xml), crm_str(task), rsc_id, target); crm_log_xml_trace(xml_op, "Op"); rc = fsa_cib_conn->cmds->update(fsa_cib_conn, XML_CIB_TAG_STATUS, state, call_options); crm_trace("Updating CIB with %s action %d: %s on %s (call_id=%d)", services_lrm_status_str(status), action->id, task_uuid, target, rc); add_cib_op_callback(fsa_cib_conn, rc, FALSE, NULL, cib_action_updated); free_xml(state); action->sent_update = TRUE; if (rc < pcmk_ok) { return FALSE; } return TRUE; } static gboolean te_rsc_command(crm_graph_t * graph, crm_action_t * action) { /* never overwrite stop actions in the CIB with * anything other than completed results * * Writing pending stops makes it look like the * resource is running again */ xmlNode *cmd = NULL; xmlNode *rsc_op = NULL; gboolean rc = TRUE; gboolean no_wait = FALSE; gboolean is_local = FALSE; char *counter = NULL; const char *task = NULL; const char *value = NULL; const char *on_node = NULL; const char *task_uuid = NULL; CRM_ASSERT(action != NULL); CRM_ASSERT(action->xml != NULL); action->executed = FALSE; on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); CRM_CHECK(on_node != NULL && strlen(on_node) != 0, crm_err( "Corrupted command(id=%s) %s: no node", ID(action->xml), crm_str(task)); return FALSE); rsc_op = action->xml; task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY); on_node = crm_element_value(rsc_op, XML_LRM_ATTR_TARGET); counter = generate_transition_key(transition_graph->id, action->id, get_target_rc(action), te_uuid); crm_xml_add(rsc_op, XML_ATTR_TRANSITION_KEY, counter); if (safe_str_eq(on_node, fsa_our_uname)) { is_local = TRUE; } value = crm_meta_value(action->params, XML_ATTR_TE_NOWAIT); if (crm_is_true(value)) { no_wait = TRUE; } crm_info("Initiating action %d: %s %s on %s%s%s", action->id, task, task_uuid, on_node, is_local ? " (local)" : "", no_wait ? " - no waiting" : ""); cmd = create_request(CRM_OP_INVOKE_LRM, rsc_op, on_node, CRM_SYSTEM_LRMD, CRM_SYSTEM_TENGINE, NULL); if (is_local) { /* shortcut local resource commands */ ha_msg_input_t data = { .msg = cmd, .xml = rsc_op, }; fsa_data_t msg = { .id = 0, .data = &data, .data_type = fsa_dt_ha_msg, .fsa_input = I_NULL, .fsa_cause = C_FSA_INTERNAL, .actions = A_LRM_INVOKE, .origin = __FUNCTION__, }; do_lrm_invoke(A_LRM_INVOKE, C_FSA_INTERNAL, fsa_state, I_NULL, &msg); } else { rc = send_cluster_message(on_node, crm_msg_lrmd, cmd, TRUE); } free(counter); free_xml(cmd); action->executed = TRUE; if (rc == FALSE) { crm_err("Action %d failed: send", action->id); return FALSE; } else if (no_wait) { action->confirmed = TRUE; update_graph(transition_graph, action); trigger_graph(); } else { if (action->timeout <= 0) { crm_err("Action %d: %s %s on %s had an invalid timeout (%dms). Using %dms instead", action->id, task, task_uuid, on_node, action->timeout, graph->network_delay); action->timeout = graph->network_delay; } te_start_action_timer(graph, action); } value = crm_meta_value(action->params, XML_OP_ATTR_PENDING); if (crm_is_true(value)) { /* write a "pending" entry to the CIB, inhibit notification */ crm_info("Recording pending op %s in the CIB", task_uuid); cib_action_update(action, PCMK_LRM_OP_PENDING, PCMK_EXECRA_STATUS_UNKNOWN); } return TRUE; } crm_graph_functions_t te_graph_fns = { te_pseudo_action, te_rsc_command, te_crm_command, te_fence_node }; void notify_crmd(crm_graph_t * graph) { const char *type = "unknown"; enum crmd_fsa_input event = I_NULL; crm_debug("Processing transition completion in state %s", fsa_state2string(fsa_state)); CRM_CHECK(graph->complete, graph->complete = TRUE); switch (graph->completion_action) { case tg_stop: type = "stop"; /* fall through */ case tg_done: type = "done"; if (fsa_state == S_TRANSITION_ENGINE) { event = I_TE_SUCCESS; } break; case tg_restart: type = "restart"; if (fsa_state == S_TRANSITION_ENGINE) { if (transition_timer->period_ms > 0) { crm_timer_stop(transition_timer); crm_timer_start(transition_timer); } else { event = I_PE_CALC; } } else if (fsa_state == S_POLICY_ENGINE) { register_fsa_action(A_PE_INVOKE); } break; case tg_shutdown: type = "shutdown"; if (is_set(fsa_input_register, R_SHUTDOWN)) { event = I_STOP; } else { crm_err("We didn't ask to be shut down, yet our" " PE is telling us too."); event = I_TERMINATE; } } crm_debug( "Transition %d status: %s - %s", graph->id, type, crm_str(graph->abort_reason)); graph->abort_reason = NULL; graph->completion_action = tg_done; - clear_bit_inplace(fsa_input_register, R_IN_TRANSITION); + clear_bit(fsa_input_register, R_IN_TRANSITION); if (event != I_NULL) { register_fsa_input(C_FSA_INTERNAL, event, NULL); } else if (fsa_source) { mainloop_set_trigger(fsa_source); } } diff --git a/crmd/tengine.c b/crmd/tengine.c index 3c3a52c249..e28b8597a2 100644 --- a/crmd/tengine.c +++ b/crmd/tengine.c @@ -1,234 +1,234 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include /* for access */ #include /* for calls to open */ #include /* for calls to open */ #include /* for calls to open */ #include /* for getpwuid */ #include /* for initgroups */ #include /* for getrlimit */ #include /* for getrlimit */ #include #include #include #include #include #include #include #include #include extern crm_graph_functions_t te_graph_fns; struct crm_subsystem_s *te_subsystem = NULL; stonith_t *stonith_api = NULL; static void global_cib_callback(const xmlNode * msg, int callid, int rc, xmlNode * output) { } static crm_graph_t * create_blank_graph(void) { crm_graph_t *a_graph = unpack_graph(NULL, NULL); a_graph->complete = TRUE; a_graph->abort_reason = "DC Takeover"; a_graph->completion_action = tg_restart; return a_graph; } /* A_TE_START, A_TE_STOP, A_TE_RESTART */ void do_te_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { gboolean init_ok = TRUE; if (action & A_TE_STOP) { if (transition_graph) { destroy_graph(transition_graph); transition_graph = NULL; } if (fsa_cib_conn) { fsa_cib_conn->cmds->del_notify_callback( fsa_cib_conn, T_CIB_DIFF_NOTIFY, te_update_diff); } - clear_bit_inplace(fsa_input_register, te_subsystem->flag_connected); + clear_bit(fsa_input_register, te_subsystem->flag_connected); crm_info("Transitioner is now inactive"); } if ((action & A_TE_START) == 0) { return; } else if (is_set(fsa_input_register, te_subsystem->flag_connected)) { crm_debug("The transitioner is already active"); return; } else if ((action & A_TE_START) && cur_state == S_STOPPING) { crm_info("Ignoring request to start %s while shutting down", te_subsystem->name); return; } te_uuid = crm_generate_uuid(); crm_info("Registering TE UUID: %s", te_uuid); if (transition_trigger == NULL) { transition_trigger = mainloop_add_trigger(G_PRIORITY_LOW, te_graph_trigger, NULL); } if (pcmk_ok != fsa_cib_conn->cmds->add_notify_callback(fsa_cib_conn, T_CIB_DIFF_NOTIFY, te_update_diff)) { crm_err("Could not set CIB notification callback"); init_ok = FALSE; } if (pcmk_ok != fsa_cib_conn->cmds->set_op_callback(fsa_cib_conn, global_cib_callback)) { crm_err("Could not set CIB global callback"); init_ok = FALSE; } if (init_ok) { set_graph_functions(&te_graph_fns); if (transition_graph) { destroy_graph(transition_graph); } /* create a blank one */ crm_debug("Transitioner is now active"); transition_graph = create_blank_graph(); - set_bit_inplace(fsa_input_register, te_subsystem->flag_connected); + set_bit(fsa_input_register, te_subsystem->flag_connected); } } /* A_TE_INVOKE, A_TE_CANCEL */ void do_te_invoke(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { if (AM_I_DC == FALSE || (fsa_state != S_TRANSITION_ENGINE && (action & A_TE_INVOKE))) { crm_notice("No need to invoke the TE (%s) in state %s", fsa_action2string(action), fsa_state2string(fsa_state)); return; } if (action & A_TE_CANCEL) { crm_debug("Cancelling the transition: %s", transition_graph->complete ? "inactive" : "active"); abort_transition(INFINITY, tg_restart, "Peer Cancelled", NULL); if (transition_graph->complete == FALSE) { crmd_fsa_stall(NULL); } } else if (action & A_TE_HALT) { crm_debug("Halting the transition: %s", transition_graph->complete ? "inactive" : "active"); abort_transition(INFINITY, tg_stop, "Peer Halt", NULL); if (transition_graph->complete == FALSE) { crmd_fsa_stall(NULL); } } else if (action & A_TE_INVOKE) { const char *value = NULL; xmlNode *graph_data = NULL; ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); const char *ref = crm_element_value(input->msg, XML_ATTR_REFERENCE); const char *graph_file = crm_element_value(input->msg, F_CRM_TGRAPH); const char *graph_input = crm_element_value(input->msg, F_CRM_TGRAPH_INPUT); if (graph_file == NULL && input->xml == NULL) { crm_log_xml_err(input->msg, "Bad command"); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); return; } if (transition_graph->complete == FALSE) { crm_info("Another transition is already active"); abort_transition(INFINITY, tg_restart, "Transition Active", NULL); return; } if (fsa_pe_ref == NULL || safe_str_neq(fsa_pe_ref, ref)) { crm_info("Transition is redundant: %s vs. %s", crm_str(fsa_pe_ref), crm_str(ref)); abort_transition(INFINITY, tg_restart, "Transition Redundant", NULL); } graph_data = input->xml; if (graph_data == NULL && graph_file != NULL) { graph_data = filename2xml(graph_file); } if (is_timer_started(transition_timer)) { crm_debug("The transitioner wait for a transition timer"); return; } CRM_CHECK(graph_data != NULL, crm_err("Input raised by %s is invalid", msg_data->origin); crm_log_xml_err(input->msg, "Bad command"); return); destroy_graph(transition_graph); transition_graph = unpack_graph(graph_data, graph_input); CRM_CHECK(transition_graph != NULL, transition_graph = create_blank_graph(); return); crm_info("Processing graph %d (ref=%s) derived from %s", transition_graph->id, ref, graph_input); value = crm_element_value(graph_data, "failed-stop-offset"); if (value) { free(failed_stop_offset); failed_stop_offset = strdup(value); } value = crm_element_value(graph_data, "failed-start-offset"); if (value) { free(failed_start_offset); failed_start_offset = strdup(value); } trigger_graph(); print_graph(LOG_DEBUG_2, transition_graph); if (graph_data != input->xml) { free_xml(graph_data); } } } diff --git a/include/crm/common/util.h b/include/crm/common/util.h index 570ef55e09..79501c88c2 100644 --- a/include/crm/common/util.h +++ b/include/crm/common/util.h @@ -1,163 +1,103 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef CRM_COMMON_UTIL__H # define CRM_COMMON_UTIL__H # include # include # include # include # include # include # if SUPPORT_HEARTBEAT # include # else # define NORMALNODE "normal" # define ACTIVESTATUS "active"/* fully functional, and all links are up */ # define DEADSTATUS "dead" /* Status of non-working link or machine */ # define PINGSTATUS "ping" /* Status of a working ping node */ # define JOINSTATUS "join" /* Status when an api client joins */ # define LEAVESTATUS "leave" /* Status when an api client leaves */ # define ONLINESTATUS "online"/* Status of an online client */ # define OFFLINESTATUS "offline" /* Status of an offline client */ # endif char *crm_itoa(int an_int); - -int compare_version(const char *version1, const char *version2); - -void g_hash_destroy_str(gpointer data); - gboolean crm_is_true(const char *s); - int crm_str_to_boolean(const char *s, int *ret); - +int crm_parse_int(const char *text, const char *default_text); long long crm_get_msec(const char *input); unsigned long long crm_get_interval(const char *input); +int char2score(const char *score); +char *score2char(int score); -char *generate_op_key(const char *rsc_id, const char *op_type, int interval); +int compare_version(const char *version1, const char *version2); gboolean parse_op_key(const char *key, char **rsc_id, char **op_type, int *interval); - -char *generate_notify_key(const char *rsc_id, const char *notify_type, const char *op_type); - -char *generate_transition_magic_v202(const char *transition_key, int op_status); - -char *generate_transition_magic(const char *transition_key, int op_status, int op_rc); - -gboolean decode_transition_magic(const char *magic, char **uuid, - int *transition_id, int *action_id, int *op_status, - int *op_rc, int *target_rc); - -char *generate_transition_key(int action, int transition_id, int target_rc, const char *node); - -gboolean decode_transition_key(const char *key, char **uuid, int *action, int *transition_id, int *target_rc); - -gboolean decode_op_key(const char *key, char **rsc_id, char **op_type, int *interval); - +gboolean decode_transition_key( + const char *key, char **uuid, int *action, int *transition_id, int *target_rc); +gboolean decode_transition_magic( + const char *magic, char **uuid, int *transition_id, int *action_id, + int *op_status, int *op_rc, int *target_rc); # define safe_str_eq(a, b) crm_str_eq(a, b, FALSE) - gboolean crm_str_eq(const char *a, const char *b, gboolean use_case); - gboolean safe_str_neq(const char *a, const char *b); -int crm_parse_int(const char *text, const char *default_text); # define crm_atoi(text, default_text) crm_parse_int(text, default_text) void crm_abort(const char *file, const char *function, int line, const char *condition, gboolean do_core, gboolean do_fork); -int char2score(const char *score); -char *score2char(int score); - -# define set_bit(word, bit) word = crm_set_bit(__PRETTY_FUNCTION__, NULL, word, bit) -# define clear_bit(word, bit) word = crm_clear_bit(__PRETTY_FUNCTION__, NULL, word, bit) - -# define set_bit_inplace set_bit -# define clear_bit_inplace clear_bit - -static inline long long -crm_clear_bit(const char *function, const char *target, long long word, long long bit) -{ - long long rc = (word & ~bit); - - if(rc == word) { - /* Unchanged */ - } else if (target) { - crm_trace("Bit 0x%.8llx for %s cleared by %s", bit, target, function); - } else { - crm_trace("Bit 0x%.8llx cleared by %s", bit, function); - } - - return rc; -} - -static inline long long -crm_set_bit(const char *function, const char *target, long long word, long long bit) -{ - long long rc = (word|bit); - - if(rc == word) { - /* Unchanged */ - } else if (target) { - crm_trace("Bit 0x%.8llx for %s set by %s", bit, target, function); - } else { - crm_trace("Bit 0x%.8llx set by %s", bit, function); - } - - return rc; -} - static inline gboolean is_not_set(long long word, long long bit) { return ((word & bit) == 0); } static inline gboolean is_set(long long word, long long bit) { return ((word & bit) == bit); } static inline gboolean is_set_any(long long word, long long bit) { return ((word & bit) != 0); } char *crm_meta_name(const char *field); const char *crm_meta_value(GHashTable * hash, const char *field); int rsc_op_expected_rc(lrmd_event_data_t *event); gboolean did_rsc_op_fail(lrmd_event_data_t *event, int target_rc); char *crm_md5sum(const char *buffer); char *crm_generate_uuid(void); int crm_user_lookup(const char *name, uid_t * uid, gid_t * gid); #endif diff --git a/include/crm/compatibility.h b/include/crm/compatibility.h index 2185e48d4d..8c7b5a3731 100644 --- a/include/crm/compatibility.h +++ b/include/crm/compatibility.h @@ -1,269 +1,271 @@ /* * Copyright (C) 2012 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef CRM_COMPATIBILITY__H # define CRM_COMPATIBILITY__H # define LOG_DEBUG_2 LOG_TRACE # define LOG_DEBUG_3 LOG_TRACE # define LOG_DEBUG_4 LOG_TRACE # define LOG_DEBUG_5 LOG_TRACE # define LOG_DEBUG_6 LOG_TRACE enum cib_errors { cib_ok = pcmk_ok, cib_operation = -EINVAL, cib_create_msg = -EPROTO, cib_not_connected = -ENOTCONN, cib_not_authorized = -EACCES, cib_send_failed = -ECOMM, cib_reply_failed = -ENOMSG, cib_return_code = -EPROTO, cib_output_data = -ENOMSG, cib_connection = -ENOTCONN, cib_authentication = -EPROTO, cib_missing = -EINVAL, cib_variant = -EPROTONOSUPPORT, CIBRES_MISSING_FIELD = -EINVAL, cib_unknown = -EINVAL, cib_STALE = -ENOKEY, cib_EXISTS = -ENOTUNIQ, cib_NOTEXISTS = -ENXIO, cib_ACTIVATION = -ENODATA, cib_NOOBJECT = -EINVAL, cib_NOPARENT = -EINVAL, cib_NOTSUPPORTED = -EPROTONOSUPPORT, cib_registration_msg = -EPROTO, cib_callback_token = -EPROTO, cib_callback_register = -ECOMM, cib_client_gone = -ECONNRESET, cib_not_master = -EPERM, cib_missing_data = -EINVAL, cib_remote_timeout = -ETIME, cib_no_quorum = -pcmk_err_no_quorum, cib_diff_failed = -pcmk_err_diff_failed, cib_diff_resync = -pcmk_err_diff_resync, cib_old_data = -pcmk_err_old_data, cib_dtd_validation = -pcmk_err_dtd_validation, cib_bad_section = -EINVAL, cib_bad_permissions = -EACCES, cib_invalid_argument = -EINVAL, cib_transform_failed = -pcmk_err_transform_failed, cib_permission_denied = -EACCES, }; enum stonith_errors { stonith_ok = pcmk_ok, stonith_pending = -EINPROGRESS, st_err_generic = -pcmk_err_generic, st_err_internal = -EPROTO, st_err_not_supported = -EPROTONOSUPPORT, st_err_connection = -ENOTCONN, st_err_missing = -EINVAL, st_err_exists = -ENOTUNIQ, st_err_timeout = -ETIME, st_err_ipc = -ECOMM, st_err_peer = -ENOMSG, st_err_unknown_operation = -EOPNOTSUPP, st_err_unknown_device = -ENODEV, st_err_none_available = -EHOSTUNREACH, st_err_signal = -ECONNABORTED, st_err_agent_fork = -ECHILD, st_err_agent_args = -EREMOTEIO, st_err_agent = -ECONNABORTED, st_err_invalid_level = -EINVAL, }; enum lrmd_errors { lrmd_ok = pcmk_ok, lrmd_pending = -EINPROGRESS, lrmd_err_generic = -EPROTONOSUPPORT, lrmd_err_internal = -EPROTO, lrmd_err_connection = -ENOTCONN, lrmd_err_missing = -EINVAL, lrmd_err_ipc = -ECOMM, lrmd_err_peer = -ENOMSG, lrmd_err_unknown_operation = -EOPNOTSUPP, lrmd_err_unknown_rsc = -ENODEV, lrmd_err_no_metadata = -EIO, lrmd_err_stonith_connection = -EUNATCH, lrmd_err_provider_required = -EINVAL, }; #define stonith_error2string pcmk_strerror #define lrmd_error2string pcmk_strerror #define cib_error2string pcmk_strerror static inline void slist_basic_destroy(GListPtr list) { GListPtr gIter = NULL; for (gIter = list; gIter != NULL; gIter = gIter->next) { free(gIter->data); } g_list_free(list); } # define crm_strdup strdup +# define set_bit_inplace set_bit +# define clear_bit_inplace clear_bit # define crm_malloc0(malloc_obj, length) do { \ malloc_obj = malloc(length); \ if(malloc_obj == NULL) { \ crm_err("Failed allocation of %lu bytes", (unsigned long)length); \ CRM_ASSERT(malloc_obj != NULL); \ } \ memset(malloc_obj, 0, length); \ } while(0) # define crm_malloc(malloc_obj, length) do { \ malloc_obj = malloc(length); \ if(malloc_obj == NULL) { \ crm_err("Failed allocation of %lu bytes", (unsigned long)length); \ CRM_ASSERT(malloc_obj != NULL); \ } \ } while(0) # define crm_realloc(realloc_obj, length) do { \ realloc_obj = realloc(realloc_obj, length); \ CRM_ASSERT(realloc_obj != NULL); \ } while(0) #define crm_free(free_obj) do { free(free_obj); free_obj=NULL; } while(0) /* These two child iterator macros are no longer to be used * They exist for compatability reasons and will be removed in a * future release */ # define xml_child_iter(parent, child, code) do { \ if(parent != NULL) { \ xmlNode *child = NULL; \ xmlNode *__crm_xml_iter = parent->children; \ while(__crm_xml_iter != NULL) { \ child = __crm_xml_iter; \ __crm_xml_iter = __crm_xml_iter->next; \ if(child->type == XML_ELEMENT_NODE) { \ code; \ } \ } \ } \ } while(0) # define xml_child_iter_filter(parent, child, filter, code) do { \ if(parent != NULL) { \ xmlNode *child = NULL; \ xmlNode *__crm_xml_iter = parent->children; \ while(__crm_xml_iter != NULL) { \ child = __crm_xml_iter; \ __crm_xml_iter = __crm_xml_iter->next; \ if(child->type == XML_ELEMENT_NODE) { \ if(filter == NULL \ || crm_str_eq(filter, (const char *)child->name, TRUE)) { \ code; \ } \ } \ } \ } \ } while(0) # define xml_prop_iter(parent, prop_name, prop_value, code) do { \ if(parent != NULL) { \ xmlAttrPtr prop_iter = parent->properties; \ const char *prop_name = NULL; \ const char *prop_value = NULL; \ while(prop_iter != NULL) { \ prop_name = (const char *)prop_iter->name; \ prop_value = crm_element_value(parent, prop_name); \ prop_iter = prop_iter->next; \ if(prop_name) { \ code; \ } \ } \ } \ } while(0) # define xml_prop_name_iter(parent, prop_name, code) do { \ if(parent != NULL) { \ xmlAttrPtr prop_iter = parent->properties; \ const char *prop_name = NULL; \ while(prop_iter != NULL) { \ prop_name = (const char *)prop_iter->name; \ prop_iter = prop_iter->next; \ if(prop_name) { \ code; \ } \ } \ } \ } while(0) # define zap_xml_from_parent(parent, xml_obj) free_xml_from_parent(parent, xml_obj); xml_obj = NULL /* Use something like this instead of the next macro: GListPtr gIter = rsc->children; for(; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t*)gIter->data; ... } */ # define slist_destroy(child_type, child, parent, a) do { \ GListPtr __crm_iter_head = parent; \ child_type *child = NULL; \ while(__crm_iter_head != NULL) { \ child = (child_type *) __crm_iter_head->data; \ __crm_iter_head = __crm_iter_head->next; \ { a; } \ } \ g_list_free(parent); \ } while(0) static inline gboolean attrd_update( crm_ipc_t *cluster, char command, const char *host, const char *name, const char *value, const char *section, const char *set, const char *dampen) QB_GNUC_DEPRECATED; static inline gboolean attrd_lazy_update( char command, const char *host, const char *name, const char *value, const char *section, const char *set, const char *dampen) QB_GNUC_DEPRECATED; static inline gboolean attrd_update_no_mainloop( int *connection, char command, const char *host, const char *name, const char *value, const char *section, const char *set, const char *dampen) QB_GNUC_DEPRECATED; static inline gboolean attrd_update(crm_ipc_t *cluster, char command, const char *host, const char *name, const char *value, const char *section, const char *set, const char *dampen) { return attrd_update_delegate(cluster, command, host, name, value, section, set, dampen, NULL); } static inline gboolean attrd_lazy_update(char command, const char *host, const char *name, const char *value, const char *section, const char *set, const char *dampen) { return attrd_update_delegate(NULL, command, host, name, value, section, set, dampen, NULL); } static inline gboolean attrd_update_no_mainloop(int *connection, char command, const char *host, const char *name, const char *value, const char *section, const char *set, const char *dampen) { return attrd_update_delegate(NULL, command, host, name, value, section, set, dampen, NULL); } #endif diff --git a/include/crm_internal.h b/include/crm_internal.h index 988b5d8b79..6c90996e46 100644 --- a/include/crm_internal.h +++ b/include/crm_internal.h @@ -1,207 +1,251 @@ /* crm_internal.h */ /* * Copyright (C) 2006 - 2008 * Andrew Beekhof * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef CRM_INTERNAL__H # define CRM_INTERNAL__H # include # include # include # include # include # include +# include /* Dynamic loading of libraries */ void *find_library_function(void **handle, const char *lib, const char *fn, int fatal); void *convert_const_pointer(const void *ptr); /* For ACLs */ char *uid2username(uid_t uid); void determine_request_user(char *user, xmlNode * request, const char *field); # if ENABLE_ACL # include static inline gboolean is_privileged(const char *user) { if (user == NULL) { return FALSE; } else if (strcmp(user, CRM_DAEMON_USER) == 0) { return TRUE; } else if (strcmp(user, "root") == 0) { return TRUE; } return FALSE; } # endif /* CLI option processing*/ # ifdef HAVE_GETOPT_H # include # else # define no_argument 0 # define required_argument 1 # endif # define pcmk_option_default 0x00000 # define pcmk_option_hidden 0x00001 # define pcmk_option_paragraph 0x00002 # define pcmk_option_example 0x00004 struct crm_option { /* Fields from 'struct option' in getopt.h */ /* name of long option */ const char *name; /* * one of no_argument, required_argument, and optional_argument: * whether option takes an argument */ int has_arg; /* if not NULL, set *flag to val when option found */ int *flag; /* if flag not NULL, value to set *flag to; else return value */ int val; /* Custom fields */ const char *desc; long flags; }; void crm_set_options(const char *short_options, const char *usage, struct crm_option *long_options, const char *app_desc); int crm_get_option(int argc, char **argv, int *index); int crm_get_option_long(int argc, char **argv, int *index, const char **longname); void crm_help(char cmd, int exit_code); /* Cluster Option Processing */ typedef struct pe_cluster_option_s { const char *name; const char *alt_name; const char *type; const char *values; const char *default_value; gboolean(*is_valid) (const char *); const char *description_short; const char *description_long; } pe_cluster_option; const char *cluster_option(GHashTable * options, gboolean(*validate) (const char *), const char *name, const char *old_name, const char *def_value); const char *get_cluster_pref(GHashTable * options, pe_cluster_option * option_list, int len, const char *name); void config_metadata(const char *name, const char *version, const char *desc_short, const char *desc_long, pe_cluster_option * option_list, int len); void verify_all_options(GHashTable * options, pe_cluster_option * option_list, int len); gboolean check_time(const char *value); gboolean check_timer(const char *value); gboolean check_boolean(const char *value); gboolean check_number(const char *value); /* Shared PE/crmd functionality */ void filter_action_parameters(xmlNode * param_set, const char *version); void filter_reload_parameters(xmlNode * param_set, const char *restart_string); /* Resource operation updates */ xmlNode *create_operation_update(xmlNode * parent, lrmd_event_data_t *event, const char *caller_version, int target_rc, const char *origin, int level); /* char2score */ extern int node_score_red; extern int node_score_green; extern int node_score_yellow; extern int node_score_infinity; /* Assorted convenience functions */ static inline int crm_strlen_zero(const char *s) { return !s || *s == '\0'; } char *generate_series_filename(const char *directory, const char *series, int sequence, gboolean bzip); int get_last_sequence(const char *directory, const char *series); void write_last_sequence(const char *directory, const char *series, int sequence, int max); void crm_make_daemon(const char *name, gboolean daemonize, const char *pidfile); gboolean crm_is_writable(const char *dir, const char *file, const char *user, const char *group, gboolean need_both); +char *generate_op_key(const char *rsc_id, const char *op_type, int interval); +char *generate_notify_key(const char *rsc_id, const char *notify_type, const char *op_type); +char *generate_transition_magic_v202(const char *transition_key, int op_status); +char *generate_transition_magic(const char *transition_key, int op_status, int op_rc); +char *generate_transition_key(int action, int transition_id, int target_rc, const char *node); + +static inline long long +crm_clear_bit(const char *function, const char *target, long long word, long long bit) +{ + long long rc = (word & ~bit); + + if(rc == word) { + /* Unchanged */ + } else if (target) { + crm_trace("Bit 0x%.8llx for %s cleared by %s", bit, target, function); + } else { + crm_trace("Bit 0x%.8llx cleared by %s", bit, function); + } + + return rc; +} + +static inline long long +crm_set_bit(const char *function, const char *target, long long word, long long bit) +{ + long long rc = (word|bit); + + if(rc == word) { + /* Unchanged */ + } else if (target) { + crm_trace("Bit 0x%.8llx for %s set by %s", bit, target, function); + } else { + crm_trace("Bit 0x%.8llx set by %s", bit, function); + } + + return rc; +} + +# define set_bit(word, bit) word = crm_set_bit(__PRETTY_FUNCTION__, NULL, word, bit) +# define clear_bit(word, bit) word = crm_clear_bit(__PRETTY_FUNCTION__, NULL, word, bit) + +void g_hash_destroy_str(gpointer data); + long long crm_int_helper(const char *text, char **end_text); char *crm_concat(const char *prefix, const char *suffix, char join); char *generate_hash_key(const char *crm_msg_reference, const char *sys); xmlNode *crm_recv_remote_msg(void *session, gboolean encrypted); void crm_send_remote_msg(void *session, xmlNode * msg, gboolean encrypted); # define crm_config_err(fmt...) { crm_config_error = TRUE; crm_err(fmt); } # define crm_config_warn(fmt...) { crm_config_warning = TRUE; crm_warn(fmt); } # define attrd_channel T_ATTRD # define F_ATTRD_KEY "attr_key" # define F_ATTRD_ATTRIBUTE "attr_name" # define F_ATTRD_TASK "task" # define F_ATTRD_VALUE "attr_value" # define F_ATTRD_SET "attr_set" # define F_ATTRD_SECTION "attr_section" # define F_ATTRD_DAMPEN "attr_dampening" # define F_ATTRD_IGNORE_LOCALLY "attr_ignore_locally" # define F_ATTRD_HOST "attr_host" # define F_ATTRD_USER "attr_user" # if SUPPORT_COROSYNC # if CS_USES_LIBQB # include # include typedef struct qb_ipc_request_header cs_ipc_header_request_t; typedef struct qb_ipc_response_header cs_ipc_header_response_t; # else # include # include # include static inline int qb_to_cs_error(int a) { return a; } typedef coroipc_request_header_t cs_ipc_header_request_t; typedef coroipc_response_header_t cs_ipc_header_response_t; # endif # else typedef struct { int size __attribute__ ((aligned(8))); int id __attribute__ ((aligned(8))); } __attribute__ ((aligned(8))) cs_ipc_header_request_t; typedef struct { int size __attribute__ ((aligned(8))); int id __attribute__ ((aligned(8))); int error __attribute__ ((aligned(8))); } __attribute__ ((aligned(8))) cs_ipc_header_response_t; # endif #endif /* CRM_INTERNAL__H */ diff --git a/lib/cluster/membership.c b/lib/cluster/membership.c index 0fd05bfb29..50b8bd3786 100644 --- a/lib/cluster/membership.c +++ b/lib/cluster/membership.c @@ -1,436 +1,436 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #ifndef _GNU_SOURCE # define _GNU_SOURCE #endif #include #include #include #include #include #include #include #include #include #include GHashTable *crm_peer_id_cache = NULL; GHashTable *crm_peer_cache = NULL; unsigned long long crm_peer_seq = 0; gboolean crm_have_quorum = FALSE; gboolean crm_is_peer_active(const crm_node_t * node) { #if SUPPORT_COROSYNC if(is_openais_cluster()) { return crm_is_corosync_peer_active(node); } #endif #if SUPPORT_HEARTBEAT if(is_heartbeat_cluster()) { return crm_is_heartbeat_peer_active(node); } #endif crm_err("Unhandled cluster type: %s", name_for_cluster_type(get_cluster_type())); return FALSE; } static gboolean crm_reap_dead_member(gpointer key, gpointer value, gpointer user_data) { crm_node_t *node = value; crm_node_t *search = user_data; if (search != NULL && node->id != search->id) { return FALSE; } else if (crm_is_peer_active(value) == FALSE) { crm_notice("Removing %s/%u from the membership list", node->uname, node->id); return TRUE; } return FALSE; } guint reap_crm_member(uint32_t id) { int matches = 0; crm_node_t *node = g_hash_table_lookup(crm_peer_id_cache, GUINT_TO_POINTER(id)); if (node == NULL) { crm_info("Peer %u is unknown", id); } else if (crm_is_peer_active(node)) { crm_warn("Peer %u/%s is still active", id, node->uname); } else { if (g_hash_table_remove(crm_peer_id_cache, GUINT_TO_POINTER(id))) { crm_notice("Removed dead peer %u from the uuid cache", id); } else { crm_warn("Peer %u/%s was not removed", id, node->uname); } matches = g_hash_table_foreach_remove(crm_peer_cache, crm_reap_dead_member, node); crm_notice("Removed %d dead peers with id=%u from the membership list", matches, id); } return matches; } static void crm_count_peer(gpointer key, gpointer value, gpointer user_data) { guint *count = user_data; crm_node_t *node = value; if (crm_is_peer_active(node)) { *count = *count + 1; } } guint crm_active_peers(void) { guint count = 0; g_hash_table_foreach(crm_peer_cache, crm_count_peer, &count); return count; } void destroy_crm_node(gpointer data) { crm_node_t *node = data; crm_trace("Destroying entry for node %u", node->id); free(node->addr); free(node->uname); free(node->state); free(node->uuid); free(node); } void crm_peer_init(void) { static gboolean initialized = FALSE; if (initialized) { return; } initialized = TRUE; crm_peer_destroy(); if (crm_peer_cache == NULL) { crm_peer_cache = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, destroy_crm_node); } if (crm_peer_id_cache == NULL) { crm_peer_id_cache = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, NULL); } } void crm_peer_destroy(void) { if (crm_peer_cache != NULL) { g_hash_table_destroy(crm_peer_cache); crm_peer_cache = NULL; } if (crm_peer_id_cache != NULL) { g_hash_table_destroy(crm_peer_id_cache); crm_peer_id_cache = NULL; } } void (*crm_status_callback) (enum crm_status_type, crm_node_t *, const void *) = NULL; void crm_set_status_callback(void (*dispatch) (enum crm_status_type, crm_node_t *, const void *)) { crm_status_callback = dispatch; } static crm_node_t * crm_new_peer(unsigned int id, const char *uname) { crm_node_t *node = NULL; CRM_CHECK(uname != NULL || id > 0, return NULL); crm_peer_init(); crm_debug("Creating entry for node %s/%u", uname, id); node = calloc(1, sizeof(crm_node_t)); node->state = strdup("unknown"); if (id > 0) { node->id = id; crm_info("Node %s now has id: %u", crm_str(uname), id); g_hash_table_replace(crm_peer_id_cache, GUINT_TO_POINTER(node->id), node); } if (uname) { node->uname = strdup(uname); CRM_ASSERT(node->uname != NULL); crm_info("Node %u is now known as %s", id, node->uname); g_hash_table_replace(crm_peer_cache, node->uname, node); if (node->uuid == NULL) { const char *uuid = get_node_uuid(id, node->uname); if (node->uuid) { crm_info("Node %u has uuid %s", id, node->uuid); } else { node->uuid = strdup(uuid); } } if (crm_status_callback) { crm_status_callback(crm_status_uname, node, NULL); } } return node; } crm_node_t * crm_get_peer(unsigned int id, const char *uname) { crm_node_t *node = NULL; CRM_ASSERT(id > 0 || uname != NULL); crm_peer_init(); if (uname != NULL) { node = g_hash_table_lookup(crm_peer_cache, uname); } if (node == NULL && id > 0) { node = g_hash_table_lookup(crm_peer_id_cache, GUINT_TO_POINTER(id)); if (node && node->uname && uname) { crm_crit("Node %s and %s share the same cluster node id '%u'!", node->uname, uname, id); /* NOTE: Calling crm_new_peer() means the entry in * crm_peer_id_cache will point to the new entity */ /* TODO: Replace the old uname instead? */ node = crm_new_peer(id, uname); CRM_ASSERT(node->uname != NULL); } } if (node == NULL) { node = crm_new_peer(id, uname); if(uname) { const char *uuid = get_uuid(uname); crm_update_peer(__FUNCTION__, 0, 0, 0, -1, 0, uuid, uname, NULL, NULL); } } CRM_ASSERT(node); if (node && uname && node->uname == NULL) { node->uname = strdup(uname); crm_info("Node %u is now known as %s", id, uname); g_hash_table_insert(crm_peer_cache, node->uname, node); if (crm_status_callback) { crm_status_callback(crm_status_uname, node, NULL); } } if (node && node->uname && node->uuid == NULL) { const char *uuid = get_node_uuid(id, node->uname); if (node->uuid) { crm_info("Node %u has uuid %s", id, node->uuid); } else if (uuid) { node->uuid = strdup(uuid); } } if (node && id > 0 && id != node->id) { g_hash_table_remove(crm_peer_id_cache, GUINT_TO_POINTER(node->id)); g_hash_table_insert(crm_peer_id_cache, GUINT_TO_POINTER(id), node); node->id = id; crm_info("Node %s now has id: %u", crm_str(uname), id); } return node; } crm_node_t * crm_update_peer(const char *source, unsigned int id, uint64_t born, uint64_t seen, int32_t votes, uint32_t children, const char *uuid, const char *uname, const char *addr, const char *state) { gboolean addr_changed = FALSE; gboolean state_changed = FALSE; gboolean procs_changed = FALSE; gboolean votes_changed = FALSE; crm_node_t *node = NULL; id = get_corosync_id(id, uuid); CRM_CHECK(uname != NULL || id > 0, return NULL); CRM_ASSERT(crm_peer_cache != NULL); CRM_ASSERT(crm_peer_id_cache != NULL); node = crm_get_peer(id, uname); if (node == NULL) { crm_trace("No node found for %d/%s", id, uname); node = crm_new_peer(id, uname); CRM_LOG_ASSERT(node != NULL); if (node == NULL) { crm_err("Insufficient information to create node %d/%s", id, uname); return NULL; } /* do it now so we don't get '(new)' everywhere */ node->votes = votes; node->processes = children; if (addr) { node->addr = strdup(addr); } } if (votes > 0 && node->votes != votes) { votes_changed = TRUE; node->votes = votes; } if (node->uuid == NULL) { if (is_openais_cluster()) { /* Yes, overrule whatever was passed in */ node->uuid = get_corosync_uuid(id, uname); } else if (uuid != NULL) { node->uuid = strdup(uuid); } } if (children > 0 && children != node->processes) { uint32_t last = node->processes; node->processes = children; procs_changed = TRUE; if (crm_status_callback) { crm_status_callback(crm_status_processes, node, &last); } } if (born != 0) { node->born = born; } if (state != NULL && safe_str_neq(node->state, state)) { char *last = node->state; node->state = strdup(state); state_changed = TRUE; if (crm_status_callback) { crm_status_callback(crm_status_nstate, node, last); } free(last); } if (seen != 0 && safe_str_eq(node->state, CRM_NODE_MEMBER)) { node->last_seen = seen; } if (addr != NULL) { if (node->addr == NULL || crm_str_eq(node->addr, addr, FALSE) == FALSE) { addr_changed = TRUE; free(node->addr); node->addr = strdup(addr); } } if (state_changed || addr_changed || votes_changed) { do_crm_log_unlikely(state_changed?LOG_NOTICE:LOG_INFO, "%s: Node %s: id=%u state=%s%s addr=%s%s votes=%d%s born=" U64T " seen=" U64T " proc=%.32x%s", source, node->uname, node->id, node->state, state_changed ? " (new)" : "", node->addr, addr_changed ? " (new)" : "", node->votes, votes_changed ? " (new)" : "", node->born, node->last_seen, node->processes, procs_changed ? " (new)" : ""); } else if (procs_changed) { crm_debug("%s: Node %s: id=%u seen=" U64T " proc=%.32x (new)", source, node->uname, node->id, node->last_seen, node->processes); } return node; } void crm_update_peer_proc(const char *source, crm_node_t *node, uint32_t flag, const char *status) { uint32_t last = 0; gboolean changed = FALSE; CRM_CHECK(node != NULL, crm_err("%s: Could not set %s to %s for NULL", source, peer2text(flag), status); return); last = node->processes; if (safe_str_eq(status, ONLINESTATUS)) { if ((node->processes & flag) == 0) { - set_bit_inplace(node->processes, flag); + set_bit(node->processes, flag); changed = TRUE; } } else if (node->processes & flag) { - clear_bit_inplace(node->processes, flag); + clear_bit(node->processes, flag); changed = TRUE; } if (changed) { crm_info("%s: %s.%d.%s is now %s", source, node->uname, node->id, peer2text(flag), status); if (crm_status_callback) { crm_status_callback(crm_status_processes, node, &last); } } else { crm_trace("%s: %s.%d.%s is unchanged %s", source, node->uname, node->id, peer2text(flag), status); } } int crm_terminate_member(int nodeid, const char *uname, void * unused) { /* Always use the synchronous, non-mainloop version */ return stonith_api_kick(nodeid, uname, 120, TRUE); } int crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection) { return stonith_api_kick(nodeid, uname, 120, TRUE); } diff --git a/lib/pengine/clone.c b/lib/pengine/clone.c index 6a545233f7..939dc2f2c6 100644 --- a/lib/pengine/clone.c +++ b/lib/pengine/clone.c @@ -1,554 +1,554 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #define VARIANT_CLONE 1 #include "./variant.h" void clone_create_notifications(resource_t * rsc, action_t * action, action_t * action_complete, pe_working_set_t * data_set); void force_non_unique_clone(resource_t * rsc, const char *rid, pe_working_set_t * data_set); resource_t *create_child_clone(resource_t * rsc, int sub_id, pe_working_set_t * data_set); static void mark_as_orphan(resource_t * rsc) { GListPtr gIter = rsc->children; set_bit(rsc->flags, pe_rsc_orphan); for (; gIter != NULL; gIter = gIter->next) { resource_t *child = (resource_t *) gIter->data; mark_as_orphan(child); } } static void clear_bit_recursive(resource_t * rsc, unsigned long long flag) { GListPtr gIter = rsc->children; - clear_bit_inplace(rsc->flags, flag); + clear_bit(rsc->flags, flag); for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; clear_bit_recursive(child_rsc, flag); } } void force_non_unique_clone(resource_t * rsc, const char *rid, pe_working_set_t * data_set) { if (rsc->variant == pe_clone || rsc->variant == pe_master) { clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); crm_config_warn("Clones %s contains non-OCF resource %s and so " "can only be used as an anonymous clone. " "Set the " XML_RSC_ATTR_UNIQUE " meta attribute to false", rsc->id, rid); clone_data->clone_node_max = 1; clone_data->clone_max = g_list_length(data_set->nodes); clear_bit_recursive(rsc, pe_rsc_unique); } } resource_t * find_clone_instance(resource_t * rsc, const char *sub_id, pe_working_set_t * data_set) { char *child_id = NULL; resource_t *child = NULL; const char *child_base = NULL; clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); child_base = ID(clone_data->xml_obj_child); child_id = crm_concat(child_base, sub_id, ':'); child = pe_find_resource(rsc->children, child_id); free(child_id); return child; } resource_t * create_child_clone(resource_t * rsc, int sub_id, pe_working_set_t * data_set) { gboolean as_orphan = FALSE; char *inc_num = NULL; char *inc_max = NULL; resource_t *child_rsc = NULL; xmlNode *child_copy = NULL; clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); CRM_CHECK(clone_data->xml_obj_child != NULL, return FALSE); if (sub_id < 0) { as_orphan = TRUE; sub_id = clone_data->total_clones; } inc_num = crm_itoa(sub_id); inc_max = crm_itoa(clone_data->clone_max); child_copy = copy_xml(clone_data->xml_obj_child); crm_xml_add(child_copy, XML_RSC_ATTR_INCARNATION, inc_num); if (common_unpack(child_copy, &child_rsc, rsc, data_set) == FALSE) { pe_err("Failed unpacking resource %s", crm_element_value(child_copy, XML_ATTR_ID)); child_rsc = NULL; goto bail; } /* child_rsc->globally_unique = rsc->globally_unique; */ clone_data->total_clones += 1; crm_trace("Setting clone attributes for: %s", child_rsc->id); rsc->children = g_list_append(rsc->children, child_rsc); if (as_orphan) { mark_as_orphan(child_rsc); } add_hash_param(child_rsc->meta, XML_RSC_ATTR_INCARNATION_MAX, inc_max); print_resource(LOG_DEBUG_3, "Added", child_rsc, FALSE); bail: free(inc_num); free(inc_max); return child_rsc; } gboolean master_unpack(resource_t * rsc, pe_working_set_t * data_set) { const char *master_max = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_MASTER_MAX); const char *master_node_max = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_MASTER_NODEMAX); g_hash_table_replace(rsc->meta, strdup("stateful"), strdup(XML_BOOLEAN_TRUE)); if (clone_unpack(rsc, data_set)) { clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); clone_data->master_max = crm_parse_int(master_max, "1"); clone_data->master_node_max = crm_parse_int(master_node_max, "1"); return TRUE; } return FALSE; } gboolean clone_unpack(resource_t * rsc, pe_working_set_t * data_set) { int lpc = 0; const char *type = NULL; resource_t *self = NULL; int num_xml_children = 0; xmlNode *a_child = NULL; xmlNode *xml_tmp = NULL; xmlNode *xml_self = NULL; xmlNode *xml_obj = rsc->xml; clone_variant_data_t *clone_data = NULL; const char *ordered = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_ORDERED); const char *interleave = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INTERLEAVE); const char *max_clones = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INCARNATION_MAX); const char *max_clones_node = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INCARNATION_NODEMAX); crm_trace("Processing resource %s...", rsc->id); clone_data = calloc(1, sizeof(clone_variant_data_t)); rsc->variant_opaque = clone_data; clone_data->interleave = FALSE; clone_data->ordered = FALSE; clone_data->active_clones = 0; clone_data->xml_obj_child = NULL; clone_data->clone_node_max = crm_parse_int(max_clones_node, "1"); if (max_clones) { clone_data->clone_max = crm_parse_int(max_clones, "1"); } else if (g_list_length(data_set->nodes) > 0) { clone_data->clone_max = g_list_length(data_set->nodes); } else { clone_data->clone_max = 1; /* Handy during crm_verify */ } if (crm_is_true(interleave)) { clone_data->interleave = TRUE; } if (crm_is_true(ordered)) { clone_data->ordered = TRUE; } if ((rsc->flags & pe_rsc_unique) == 0 && clone_data->clone_node_max > 1) { crm_config_err("Anonymous clones (%s) may only support one copy" " per node", rsc->id); clone_data->clone_node_max = 1; } crm_trace("Options for %s", rsc->id); crm_trace("\tClone max: %d", clone_data->clone_max); crm_trace("\tClone node max: %d", clone_data->clone_node_max); crm_trace("\tClone is unique: %s", is_set(rsc->flags, pe_rsc_unique) ? "true" : "false"); clone_data->xml_obj_child = find_xml_node(xml_obj, XML_CIB_TAG_GROUP, FALSE); if (clone_data->xml_obj_child == NULL) { clone_data->xml_obj_child = find_xml_node(xml_obj, XML_CIB_TAG_RESOURCE, TRUE); for (a_child = __xml_first_child(xml_obj); a_child != NULL; a_child = __xml_next(a_child)) { if (crm_str_eq((const char *)a_child->name, XML_CIB_TAG_RESOURCE, TRUE)) { num_xml_children++; } } } if (clone_data->xml_obj_child == NULL) { crm_config_err("%s has nothing to clone", rsc->id); return FALSE; } for (a_child = __xml_first_child(xml_obj); a_child != NULL; a_child = __xml_next(a_child)) { if (crm_str_eq((const char *)a_child->name, type, TRUE)) { num_xml_children++; } } if (num_xml_children > 1) { crm_config_err("%s has too many children. Only the first (%s) will be cloned.", rsc->id, ID(clone_data->xml_obj_child)); } xml_self = copy_xml(rsc->xml); /* this is a bit of a hack - but simplifies everything else */ xmlNodeSetName(xml_self, ((const xmlChar *)XML_CIB_TAG_RESOURCE)); xml_tmp = find_xml_node(xml_obj, "operations", FALSE); if (xml_tmp != NULL) { add_node_copy(xml_self, xml_tmp); } /* * Make clones ever so slightly sticky by default * * This helps ensure clone instances are not shuffled around the cluster * for no benefit in situations when pre-allocation is not appropriate */ if (g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_STICKINESS) == NULL) { add_hash_param(rsc->meta, XML_RSC_ATTR_STICKINESS, "1"); } if (common_unpack(xml_self, &self, rsc, data_set)) { clone_data->self = self; } else { crm_log_xml_err(xml_self, "Couldnt unpack dummy child"); clone_data->self = self; return FALSE; } crm_trace("\tClone is unique (fixed): %s", is_set(rsc->flags, pe_rsc_unique) ? "true" : "false"); clone_data->notify_confirm = is_set(rsc->flags, pe_rsc_notify); add_hash_param(rsc->meta, XML_RSC_ATTR_UNIQUE, is_set(rsc->flags, pe_rsc_unique) ? XML_BOOLEAN_TRUE : XML_BOOLEAN_FALSE); for (lpc = 0; lpc < clone_data->clone_max; lpc++) { create_child_clone(rsc, lpc, data_set); } if (clone_data->clone_max == 0) { /* create one so that unpack_find_resource() will hook up * any orphans up to the parent correctly */ create_child_clone(rsc, -1, data_set); } crm_trace("Added %d children to resource %s...", clone_data->clone_max, rsc->id); return TRUE; } gboolean clone_active(resource_t * rsc, gboolean all) { GListPtr gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; gboolean child_active = child_rsc->fns->active(child_rsc, all); if (all == FALSE && child_active) { return TRUE; } else if (all && child_active == FALSE) { return FALSE; } } if (all) { return TRUE; } else { return FALSE; } } static char * add_list_element(char *list, const char *value) { int len = 0; int last = 0; if (value == NULL) { return list; } if (list) { last = strlen(list); } len = last + 2; /* +1 space, +1 EOS */ len += strlen(value); list = realloc(list, len); sprintf(list + last, " %s", value); return list; } static void short_print(char *list, const char *prefix, const char *type, long options, void *print_data) { if (list) { if (options & pe_print_html) { status_print("
  • "); } status_print("%s%s: [%s ]", prefix, type, list); if (options & pe_print_html) { status_print("
  • \n"); } else if (options & pe_print_suppres_nl) { /* nothing */ } else if ((options & pe_print_printf) || (options & pe_print_ncurses)) { status_print("\n"); } } } static void clone_print_xml(resource_t * rsc, const char *pre_text, long options, void *print_data) { int is_master_slave = rsc->variant == pe_master ? 1 : 0; char *child_text = crm_concat(pre_text, " ", ' '); GListPtr gIter = rsc->children; status_print("%sid); status_print("multi_state=\"%s\" ", is_master_slave ? "true" : "false"); status_print("unique=\"%s\" ", is_set(rsc->flags, pe_rsc_unique) ? "true" : "false"); status_print("managed=\"%s\" ", is_set(rsc->flags, pe_rsc_managed) ? "true" : "false"); status_print("failed=\"%s\" ", is_set(rsc->flags, pe_rsc_failed) ? "true" : "false"); status_print("failure_ignored=\"%s\" ", is_set(rsc->flags, pe_rsc_failure_ignored) ? "true" : "false"); status_print(">\n"); for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; child_rsc->fns->print(child_rsc, child_text, options, print_data); } status_print("%s\n", pre_text); free(child_text); } void clone_print(resource_t * rsc, const char *pre_text, long options, void *print_data) { char *child_text = NULL; char *master_list = NULL; char *started_list = NULL; char *stopped_list = NULL; const char *type = "Clone"; GListPtr gIter = rsc->children; clone_variant_data_t *clone_data = NULL; if (pre_text == NULL) { pre_text = " "; } if (options & pe_print_xml) { clone_print_xml(rsc, pre_text, options, print_data); return; } get_clone_variant_data(clone_data, rsc); child_text = crm_concat(pre_text, " ", ' '); if (rsc->variant == pe_master) { type = "Master/Slave"; } status_print("%s%s Set: %s [%s]%s%s", pre_text ? pre_text : "", type, rsc->id, ID(clone_data->xml_obj_child), is_set(rsc->flags, pe_rsc_unique) ? " (unique)" : "", is_set(rsc->flags, pe_rsc_managed) ? "" : " (unmanaged)"); if (options & pe_print_html) { status_print("\n
      \n"); } else if ((options & pe_print_log) == 0) { status_print("\n"); } for (; gIter != NULL; gIter = gIter->next) { gboolean print_full = FALSE; resource_t *child_rsc = (resource_t *) gIter->data; if (child_rsc->fns->active(child_rsc, FALSE) == FALSE) { /* Inactive clone */ if (is_set(child_rsc->flags, pe_rsc_orphan)) { continue; } else if (is_set(rsc->flags, pe_rsc_unique)) { print_full = TRUE; } else { stopped_list = add_list_element(stopped_list, child_rsc->id); } } else if (is_set(child_rsc->flags, pe_rsc_unique) || is_set(child_rsc->flags, pe_rsc_orphan) || is_set(child_rsc->flags, pe_rsc_managed) == FALSE || is_set(child_rsc->flags, pe_rsc_failed)) { /* Unique, unmanaged or failed clone */ print_full = TRUE; } else if (child_rsc->fns->active(child_rsc, TRUE)) { /* Fully active anonymous clone */ node_t *location = child_rsc->fns->location(child_rsc, NULL, TRUE); if (location) { const char *host = location->details->uname; enum rsc_role_e a_role = child_rsc->fns->state(child_rsc, TRUE); if (a_role > RSC_ROLE_SLAVE) { /* And active on a single node as master */ master_list = add_list_element(master_list, host); } else { /* And active on a single node as started/slave */ started_list = add_list_element(started_list, host); } } else { /* uncolocated group - bleh */ print_full = TRUE; } } else { /* Partially active anonymous clone */ print_full = TRUE; } if (print_full) { if (options & pe_print_html) { status_print("
    • \n"); } child_rsc->fns->print(child_rsc, child_text, options, print_data); if (options & pe_print_html) { status_print("
    • \n"); } } } short_print(master_list, child_text, "Masters", options, print_data); short_print(started_list, child_text, rsc->variant == pe_master ? "Slaves" : "Started", options, print_data); short_print(stopped_list, child_text, "Stopped", options, print_data); free(master_list); free(started_list); free(stopped_list); if (options & pe_print_html) { status_print("
    \n"); } free(child_text); } void clone_free(resource_t * rsc) { GListPtr gIter = rsc->children; clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); crm_trace("Freeing %s", rsc->id); for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; crm_trace("Freeing child %s", child_rsc->id); free_xml(child_rsc->xml); child_rsc->fns->free(child_rsc); } g_list_free(rsc->children); if (clone_data->self) { free_xml(clone_data->self->xml); clone_data->self->fns->free(clone_data->self); } CRM_ASSERT(clone_data->demote_notify == NULL); CRM_ASSERT(clone_data->stop_notify == NULL); CRM_ASSERT(clone_data->start_notify == NULL); CRM_ASSERT(clone_data->promote_notify == NULL); common_free(rsc); } enum rsc_role_e clone_resource_state(const resource_t * rsc, gboolean current) { enum rsc_role_e clone_role = RSC_ROLE_UNKNOWN; GListPtr gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; enum rsc_role_e a_role = child_rsc->fns->state(child_rsc, current); if (a_role > clone_role) { clone_role = a_role; } } crm_trace("%s role: %s", rsc->id, role2text(clone_role)); return clone_role; } diff --git a/lib/pengine/complex.c b/lib/pengine/complex.c index aa0646b65e..1279391602 100644 --- a/lib/pengine/complex.c +++ b/lib/pengine/complex.c @@ -1,678 +1,678 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include extern xmlNode *get_object_root(const char *object_type, xmlNode * the_root); void populate_hash(xmlNode * nvpair_list, GHashTable * hash, const char **attrs, int attrs_length); resource_object_functions_t resource_class_functions[] = { { native_unpack, native_find_rsc, native_parameter, native_print, native_active, native_resource_state, native_location, native_free}, { group_unpack, native_find_rsc, native_parameter, group_print, group_active, group_resource_state, native_location, group_free}, { clone_unpack, native_find_rsc, native_parameter, clone_print, clone_active, clone_resource_state, native_location, clone_free}, { master_unpack, native_find_rsc, native_parameter, clone_print, clone_active, clone_resource_state, native_location, clone_free} }; enum pe_obj_types get_resource_type(const char *name) { if (safe_str_eq(name, XML_CIB_TAG_RESOURCE)) { return pe_native; } else if (safe_str_eq(name, XML_CIB_TAG_GROUP)) { return pe_group; } else if (safe_str_eq(name, XML_CIB_TAG_INCARNATION)) { return pe_clone; } else if (safe_str_eq(name, XML_CIB_TAG_MASTER)) { return pe_master; } return pe_unknown; } const char * get_resource_typename(enum pe_obj_types type) { switch (type) { case pe_native: return XML_CIB_TAG_RESOURCE; case pe_group: return XML_CIB_TAG_GROUP; case pe_clone: return XML_CIB_TAG_INCARNATION; case pe_master: return XML_CIB_TAG_MASTER; case pe_unknown: return "unknown"; } return ""; } static void dup_attr(gpointer key, gpointer value, gpointer user_data) { add_hash_param(user_data, key, value); } void get_meta_attributes(GHashTable * meta_hash, resource_t * rsc, node_t * node, pe_working_set_t * data_set) { GHashTable *node_hash = NULL; if (node) { node_hash = node->details->attrs; } if (rsc->xml) { xmlAttrPtr xIter = NULL; for (xIter = rsc->xml->properties; xIter; xIter = xIter->next) { const char *prop_name = (const char *)xIter->name; const char *prop_value = crm_element_value(rsc->xml, prop_name); add_hash_param(meta_hash, prop_name, prop_value); } } unpack_instance_attributes(data_set->input, rsc->xml, XML_TAG_META_SETS, node_hash, meta_hash, NULL, FALSE, data_set->now); /* populate from the regular attributes until the GUI can create * meta attributes */ unpack_instance_attributes(data_set->input, rsc->xml, XML_TAG_ATTR_SETS, node_hash, meta_hash, NULL, FALSE, data_set->now); /* set anything else based on the parent */ if (rsc->parent != NULL) { g_hash_table_foreach(rsc->parent->meta, dup_attr, meta_hash); } /* and finally check the defaults */ unpack_instance_attributes(data_set->input, data_set->rsc_defaults, XML_TAG_META_SETS, node_hash, meta_hash, NULL, FALSE, data_set->now); } void get_rsc_attributes(GHashTable * meta_hash, resource_t * rsc, node_t * node, pe_working_set_t * data_set) { GHashTable *node_hash = NULL; if (node) { node_hash = node->details->attrs; } unpack_instance_attributes(data_set->input, rsc->xml, XML_TAG_ATTR_SETS, node_hash, meta_hash, NULL, FALSE, data_set->now); /* set anything else based on the parent */ if (rsc->parent != NULL) { get_rsc_attributes(meta_hash, rsc->parent, node, data_set); } else { /* and finally check the defaults */ unpack_instance_attributes(data_set->input, data_set->rsc_defaults, XML_TAG_ATTR_SETS, node_hash, meta_hash, NULL, FALSE, data_set->now); } } static char * template_op_key(xmlNode * op) { const char *name = crm_element_value(op, "name"); const char *role = crm_element_value(op, "role"); char *key = NULL; if (role == NULL || crm_str_eq(role, RSC_ROLE_STARTED_S, TRUE) || crm_str_eq(role, RSC_ROLE_SLAVE_S, TRUE)) { role = RSC_ROLE_UNKNOWN_S; } key = crm_concat(name, role, '-'); return key; } static gboolean unpack_template(xmlNode * xml_obj, xmlNode ** expanded_xml, pe_working_set_t * data_set) { xmlNode *cib_resources = NULL; xmlNode *template = NULL; xmlNode *new_xml = NULL; xmlNode *child_xml = NULL; xmlNode *rsc_ops = NULL; xmlNode *template_ops = NULL; const char *template_ref = NULL; const char *id = NULL; if (xml_obj == NULL) { pe_err("No resource object for template unpacking"); return FALSE; } template_ref = crm_element_value(xml_obj, XML_CIB_TAG_RSC_TEMPLATE); if (template_ref == NULL) { return TRUE; } id = ID(xml_obj); if (id == NULL) { pe_err("'%s' object must have a id", crm_element_name(xml_obj)); return FALSE; } if (crm_str_eq(template_ref, id, TRUE)) { pe_err("The resource object '%s' should not reference itself", id); return FALSE; } cib_resources = get_object_root(XML_CIB_TAG_RESOURCES, data_set->input); if (cib_resources == NULL) { pe_err("Cannot get the root of object '%s'", XML_CIB_TAG_RESOURCES); return FALSE; } template = find_entity(cib_resources, XML_CIB_TAG_RSC_TEMPLATE, template_ref); if (template == NULL) { pe_err("No template named '%s'", template_ref); return FALSE; } new_xml = copy_xml(template); xmlNodeSetName(new_xml, xml_obj->name); crm_xml_replace(new_xml, XML_ATTR_ID, id); template_ops = find_xml_node(new_xml, "operations", FALSE); for (child_xml = __xml_first_child(xml_obj); child_xml != NULL; child_xml = __xml_next(child_xml)) { xmlNode *new_child = NULL; new_child = add_node_copy(new_xml, child_xml); if (crm_str_eq((const char *)new_child->name, "operations", TRUE)) { rsc_ops = new_child; } } if (template_ops && rsc_ops) { xmlNode *op = NULL; GHashTable *rsc_ops_hash = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, NULL); for (op = __xml_first_child(rsc_ops); op != NULL; op = __xml_next(op)) { char *key = template_op_key(op); g_hash_table_insert(rsc_ops_hash, key, op); } for (op = __xml_first_child(template_ops); op != NULL; op = __xml_next(op)) { char *key = template_op_key(op); if (g_hash_table_lookup(rsc_ops_hash, key) == NULL) { add_node_copy(rsc_ops, op); } free(key); } if (rsc_ops_hash) { g_hash_table_destroy(rsc_ops_hash); } free_xml_from_parent(NULL, template_ops); } /*free_xml(*expanded_xml); */ *expanded_xml = new_xml; /* Disable multi-level templates for now */ /*if(unpack_template(new_xml, expanded_xml, data_set) == FALSE) { free_xml(*expanded_xml); *expanded_xml = NULL; return FALSE; } */ return TRUE; } static gboolean add_template_rsc(xmlNode * xml_obj, pe_working_set_t * data_set) { const char *template_ref = NULL; const char *id = NULL; xmlNode *rsc_set = NULL; xmlNode *rsc_ref = NULL; if (xml_obj == NULL) { pe_err("No resource object for processing resource list of template"); return FALSE; } template_ref = crm_element_value(xml_obj, XML_CIB_TAG_RSC_TEMPLATE); if (template_ref == NULL) { return TRUE; } id = ID(xml_obj); if (id == NULL) { pe_err("'%s' object must have a id", crm_element_name(xml_obj)); return FALSE; } if (crm_str_eq(template_ref, id, TRUE)) { pe_err("The resource object '%s' should not reference itself", id); return FALSE; } rsc_set = g_hash_table_lookup(data_set->template_rsc_sets, template_ref); if (rsc_set == NULL) { rsc_set = create_xml_node(NULL, XML_CONS_TAG_RSC_SET); crm_xml_add(rsc_set, XML_ATTR_ID, template_ref); g_hash_table_insert(data_set->template_rsc_sets, strdup(template_ref), rsc_set); } rsc_ref = create_xml_node(rsc_set, XML_TAG_RESOURCE_REF); crm_xml_add(rsc_ref, XML_ATTR_ID, id); return TRUE; } gboolean common_unpack(xmlNode * xml_obj, resource_t ** rsc, resource_t * parent, pe_working_set_t * data_set) { xmlNode *expanded_xml = NULL; xmlNode *ops = NULL; resource_t *top = NULL; const char *value = NULL; const char *id = crm_element_value(xml_obj, XML_ATTR_ID); const char *class = crm_element_value(xml_obj, XML_AGENT_ATTR_CLASS); crm_log_xml_trace(xml_obj, "Processing resource input..."); if (id == NULL) { pe_err("Must specify id tag in "); return FALSE; } else if (rsc == NULL) { pe_err("Nowhere to unpack resource into"); return FALSE; } if (unpack_template(xml_obj, &expanded_xml, data_set) == FALSE) { return FALSE; } *rsc = calloc(1, sizeof(resource_t)); if (expanded_xml) { crm_log_xml_trace(expanded_xml, "Expanded resource..."); (*rsc)->xml = expanded_xml; (*rsc)->orig_xml = xml_obj; } else { (*rsc)->xml = xml_obj; (*rsc)->orig_xml = NULL; } (*rsc)->parent = parent; ops = find_xml_node((*rsc)->xml, "operations", FALSE); (*rsc)->ops_xml = expand_idref(ops, data_set->input); (*rsc)->variant = get_resource_type(crm_element_name(xml_obj)); if ((*rsc)->variant == pe_unknown) { pe_err("Unknown resource type: %s", crm_element_name(xml_obj)); free(*rsc); return FALSE; } (*rsc)->parameters = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); (*rsc)->meta = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); (*rsc)->allowed_nodes = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, g_hash_destroy_str); (*rsc)->known_on = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, g_hash_destroy_str); value = crm_element_value(xml_obj, XML_RSC_ATTR_INCARNATION); if (value) { (*rsc)->id = crm_concat(id, value, ':'); add_hash_param((*rsc)->meta, XML_RSC_ATTR_INCARNATION, value); } else { (*rsc)->id = strdup(id); } if (parent) { (*rsc)->long_name = crm_concat(parent->long_name, (*rsc)->id, ':'); } else { (*rsc)->long_name = strdup((*rsc)->id); } (*rsc)->fns = &resource_class_functions[(*rsc)->variant]; crm_trace("Unpacking resource..."); get_meta_attributes((*rsc)->meta, *rsc, NULL, data_set); (*rsc)->flags = 0; set_bit((*rsc)->flags, pe_rsc_runnable); set_bit((*rsc)->flags, pe_rsc_provisional); if (is_set(data_set->flags, pe_flag_is_managed_default)) { set_bit((*rsc)->flags, pe_rsc_managed); } (*rsc)->rsc_cons = NULL; (*rsc)->rsc_tickets = NULL; (*rsc)->actions = NULL; (*rsc)->role = RSC_ROLE_STOPPED; (*rsc)->next_role = RSC_ROLE_UNKNOWN; (*rsc)->recovery_type = recovery_stop_start; (*rsc)->stickiness = data_set->default_resource_stickiness; (*rsc)->migration_threshold = INFINITY; (*rsc)->failure_timeout = 0; value = g_hash_table_lookup((*rsc)->meta, XML_CIB_ATTR_PRIORITY); (*rsc)->priority = crm_parse_int(value, "0"); (*rsc)->effective_priority = (*rsc)->priority; value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_NOTIFY); if (crm_is_true(value)) { set_bit((*rsc)->flags, pe_rsc_notify); } value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_MANAGED); if (value != NULL && safe_str_neq("default", value)) { gboolean bool_value = TRUE; crm_str_to_boolean(value, &bool_value); if (bool_value == FALSE) { clear_bit((*rsc)->flags, pe_rsc_managed); } else { set_bit((*rsc)->flags, pe_rsc_managed); } } if (is_set(data_set->flags, pe_flag_maintenance_mode)) { clear_bit((*rsc)->flags, pe_rsc_managed); } crm_trace("Options for %s", (*rsc)->id); value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_UNIQUE); top = uber_parent(*rsc); if (crm_is_true(value) || top->variant < pe_clone) { set_bit((*rsc)->flags, pe_rsc_unique); } value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_RESTART); if (safe_str_eq(value, "restart")) { (*rsc)->restart_type = pe_restart_restart; crm_trace("\tDependency restart handling: restart"); } else { (*rsc)->restart_type = pe_restart_ignore; crm_trace("\tDependency restart handling: ignore"); } value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_MULTIPLE); if (safe_str_eq(value, "stop_only")) { (*rsc)->recovery_type = recovery_stop_only; crm_trace("\tMultiple running resource recovery: stop only"); } else if (safe_str_eq(value, "block")) { (*rsc)->recovery_type = recovery_block; crm_trace("\tMultiple running resource recovery: block"); } else { (*rsc)->recovery_type = recovery_stop_start; crm_trace("\tMultiple running resource recovery: stop/start"); } value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_STICKINESS); if (value != NULL && safe_str_neq("default", value)) { (*rsc)->stickiness = char2score(value); } value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_FAIL_STICKINESS); if (value != NULL && safe_str_neq("default", value)) { (*rsc)->migration_threshold = char2score(value); } else if (value == NULL) { /* Make a best-effort guess at a migration threshold for people with 0.6 configs * try with underscores and hyphens, from both the resource and global defaults section */ value = g_hash_table_lookup((*rsc)->meta, "resource-failure-stickiness"); if (value == NULL) { value = g_hash_table_lookup((*rsc)->meta, "resource_failure_stickiness"); } if (value == NULL) { value = g_hash_table_lookup(data_set->config_hash, "default-resource-failure-stickiness"); } if (value == NULL) { value = g_hash_table_lookup(data_set->config_hash, "default_resource_failure_stickiness"); } if (value) { int fail_sticky = char2score(value); if (fail_sticky == -INFINITY) { (*rsc)->migration_threshold = 1; crm_info ("Set a migration threshold of %d for %s based on a failure-stickiness of %s", (*rsc)->migration_threshold, (*rsc)->id, value); } else if ((*rsc)->stickiness != 0 && fail_sticky != 0) { (*rsc)->migration_threshold = (*rsc)->stickiness / fail_sticky; if ((*rsc)->migration_threshold < 0) { /* Make sure it's positive */ (*rsc)->migration_threshold = 0 - (*rsc)->migration_threshold; } (*rsc)->migration_threshold += 1; crm_info ("Calculated a migration threshold for %s of %d based on a stickiness of %d/%s", (*rsc)->id, (*rsc)->migration_threshold, (*rsc)->stickiness, value); } } } value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_FAIL_TIMEOUT); if (value != NULL) { /* call crm_get_msec() and convert back to seconds */ (*rsc)->failure_timeout = (crm_get_msec(value) / 1000); } get_target_role(*rsc, &((*rsc)->next_role)); crm_trace("\tDesired next state: %s", (*rsc)->next_role != RSC_ROLE_UNKNOWN ? role2text((*rsc)->next_role) : "default"); if ((*rsc)->fns->unpack(*rsc, data_set) == FALSE) { return FALSE; } if (is_set(data_set->flags, pe_flag_symmetric_cluster)) { resource_location(*rsc, NULL, 0, "symmetric_default", data_set); } crm_trace("\tAction notification: %s", is_set((*rsc)->flags, pe_rsc_notify) ? "required" : "not required"); if (safe_str_eq(class, "stonith")) { - set_bit_inplace(data_set->flags, pe_flag_have_stonith_resource); + set_bit(data_set->flags, pe_flag_have_stonith_resource); } (*rsc)->utilization = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); unpack_instance_attributes(data_set->input, (*rsc)->xml, XML_TAG_UTILIZATION, NULL, (*rsc)->utilization, NULL, FALSE, data_set->now); /* data_set->resources = g_list_append(data_set->resources, (*rsc)); */ if (expanded_xml) { if (add_template_rsc(xml_obj, data_set) == FALSE) { return FALSE; } } return TRUE; } void common_update_score(resource_t * rsc, const char *id, int score) { node_t *node = NULL; node = pe_hash_table_lookup(rsc->allowed_nodes, id); if (node != NULL) { crm_trace("Updating score for %s on %s: %d + %d", rsc->id, id, node->weight, score); node->weight = merge_weights(node->weight, score); } if (rsc->children) { GListPtr gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; common_update_score(child_rsc, id, score); } } } resource_t * uber_parent(resource_t * rsc) { resource_t *parent = rsc; if (parent == NULL) { return NULL; } while (parent->parent != NULL) { parent = parent->parent; } return parent; } void common_free(resource_t * rsc) { if (rsc == NULL) { return; } crm_trace("Freeing %s %d", rsc->id, rsc->variant); g_list_free(rsc->rsc_cons); g_list_free(rsc->rsc_cons_lhs); g_list_free(rsc->rsc_tickets); g_list_free(rsc->dangling_migrations); if (rsc->parameters != NULL) { g_hash_table_destroy(rsc->parameters); } if (rsc->meta != NULL) { g_hash_table_destroy(rsc->meta); } if (rsc->utilization != NULL) { g_hash_table_destroy(rsc->utilization); } if (rsc->orig_xml) { free_xml(rsc->xml); } if (rsc->parent == NULL && is_set(rsc->flags, pe_rsc_orphan)) { if (rsc->orig_xml) { free_xml(rsc->orig_xml); } else { free_xml(rsc->xml); } } if (rsc->running_on) { g_list_free(rsc->running_on); rsc->running_on = NULL; } if (rsc->known_on) { g_hash_table_destroy(rsc->known_on); rsc->known_on = NULL; } if (rsc->actions) { g_list_free(rsc->actions); rsc->actions = NULL; } if (rsc->allowed_nodes) { g_hash_table_destroy(rsc->allowed_nodes); rsc->allowed_nodes = NULL; } g_list_free(rsc->rsc_location); free(rsc->id); free(rsc->long_name); free(rsc->clone_name); free(rsc->allocated_to); free(rsc->variant_opaque); free(rsc); crm_trace("Resource freed"); } diff --git a/lib/pengine/status.c b/lib/pengine/status.c index 6e7d776e07..a58b312a34 100644 --- a/lib/pengine/status.c +++ b/lib/pengine/status.c @@ -1,287 +1,287 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include extern xmlNode *get_object_root(const char *object_type, xmlNode * the_root); #define MEMCHECK_STAGE_0 0 #define check_and_exit(stage) cleanup_calculations(data_set); \ crm_mem_stats(NULL); \ crm_err("Exiting: stage %d", stage); \ exit(1); /* * Unpack everything * At the end you'll have: * - A list of nodes * - A list of resources (each with any dependencies on other resources) * - A list of constraints between resources and nodes * - A list of constraints between start/stop actions * - A list of nodes that need to be stonith'd * - A list of nodes that need to be shutdown * - A list of the possible stop/start actions (without dependencies) */ gboolean cluster_status(pe_working_set_t * data_set) { xmlNode *config = get_object_root(XML_CIB_TAG_CRMCONFIG, data_set->input); xmlNode *cib_nodes = get_object_root(XML_CIB_TAG_NODES, data_set->input); xmlNode *cib_resources = get_object_root(XML_CIB_TAG_RESOURCES, data_set->input); xmlNode *cib_status = get_object_root(XML_CIB_TAG_STATUS, data_set->input); xmlNode *cib_domains = get_object_root(XML_CIB_TAG_DOMAINS, data_set->input); const char *value = crm_element_value(data_set->input, XML_ATTR_HAVE_QUORUM); crm_trace("Beginning unpack"); pe_dataset = data_set; /* reset remaining global variables */ data_set->failed = create_xml_node(NULL, "failed-ops"); if (data_set->input == NULL) { return FALSE; } if (data_set->now == NULL) { data_set->now = new_ha_date(TRUE); } if (data_set->input != NULL && crm_element_value(data_set->input, XML_ATTR_DC_UUID) != NULL) { /* this should always be present */ data_set->dc_uuid = crm_element_value_copy(data_set->input, XML_ATTR_DC_UUID); } - clear_bit_inplace(data_set->flags, pe_flag_have_quorum); + clear_bit(data_set->flags, pe_flag_have_quorum); if (crm_is_true(value)) { - set_bit_inplace(data_set->flags, pe_flag_have_quorum); + set_bit(data_set->flags, pe_flag_have_quorum); } data_set->op_defaults = get_object_root(XML_CIB_TAG_OPCONFIG, data_set->input); data_set->rsc_defaults = get_object_root(XML_CIB_TAG_RSCCONFIG, data_set->input); unpack_config(config, data_set); if (is_set(data_set->flags, pe_flag_have_quorum) == FALSE && data_set->no_quorum_policy != no_quorum_ignore) { crm_warn("We do not have quorum" " - fencing and resource management disabled"); } unpack_nodes(cib_nodes, data_set); unpack_domains(cib_domains, data_set); unpack_resources(cib_resources, data_set); unpack_status(cib_status, data_set); - set_bit_inplace(data_set->flags, pe_flag_have_status); + set_bit(data_set->flags, pe_flag_have_status); return TRUE; } static void pe_free_resources(GListPtr resources) { resource_t *rsc = NULL; GListPtr iterator = resources; while (iterator != NULL) { iterator = iterator; rsc = (resource_t *) iterator->data; iterator = iterator->next; rsc->fns->free(rsc); } if (resources != NULL) { g_list_free(resources); } } static void pe_free_actions(GListPtr actions) { GListPtr iterator = actions; while (iterator != NULL) { pe_free_action(iterator->data); iterator = iterator->next; } if (actions != NULL) { g_list_free(actions); } } static void pe_free_nodes(GListPtr nodes) { GListPtr iterator = nodes; while (iterator != NULL) { node_t *node = (node_t *) iterator->data; struct node_shared_s *details = node->details; iterator = iterator->next; crm_trace("deleting node"); print_node("delete", node, FALSE); if (details != NULL) { crm_trace("%s is being deleted", details->uname); if (details->attrs != NULL) { g_hash_table_destroy(details->attrs); } if (details->utilization != NULL) { g_hash_table_destroy(details->utilization); } g_list_free(details->running_rsc); g_list_free(details->allocated_rsc); free(details); } free(node); } if (nodes != NULL) { g_list_free(nodes); } } void cleanup_calculations(pe_working_set_t * data_set) { pe_dataset = NULL; if (data_set == NULL) { return; } - clear_bit_inplace(data_set->flags, pe_flag_have_status); + clear_bit(data_set->flags, pe_flag_have_status); if (data_set->config_hash != NULL) { g_hash_table_destroy(data_set->config_hash); } if (data_set->tickets) { g_hash_table_destroy(data_set->tickets); } if (data_set->template_rsc_sets) { g_hash_table_destroy(data_set->template_rsc_sets); } free(data_set->dc_uuid); crm_trace("deleting resources"); pe_free_resources(data_set->resources); crm_trace("deleting actions"); pe_free_actions(data_set->actions); if (data_set->domains) { g_hash_table_destroy(data_set->domains); } crm_trace("deleting nodes"); pe_free_nodes(data_set->nodes); free_xml(data_set->graph); free_ha_date(data_set->now); free_xml(data_set->input); free_xml(data_set->failed); set_working_set_defaults(data_set); CRM_CHECK(data_set->ordering_constraints == NULL,;); CRM_CHECK(data_set->placement_constraints == NULL,;); } void set_working_set_defaults(pe_working_set_t * data_set) { pe_dataset = data_set; memset(data_set, 0, sizeof(pe_working_set_t)); data_set->order_id = 1; data_set->action_id = 1; data_set->no_quorum_policy = no_quorum_freeze; data_set->flags = 0x0ULL; - set_bit_inplace(data_set->flags, pe_flag_stop_rsc_orphans); - set_bit_inplace(data_set->flags, pe_flag_symmetric_cluster); - set_bit_inplace(data_set->flags, pe_flag_is_managed_default); - set_bit_inplace(data_set->flags, pe_flag_stop_action_orphans); + set_bit(data_set->flags, pe_flag_stop_rsc_orphans); + set_bit(data_set->flags, pe_flag_symmetric_cluster); + set_bit(data_set->flags, pe_flag_is_managed_default); + set_bit(data_set->flags, pe_flag_stop_action_orphans); } resource_t * pe_find_resource(GListPtr rsc_list, const char *id) { unsigned lpc = 0; resource_t *rsc = NULL; resource_t *match = NULL; if (id == NULL) { return NULL; } for (lpc = 0; lpc < g_list_length(rsc_list); lpc++) { rsc = g_list_nth_data(rsc_list, lpc); match = rsc->fns->find_rsc(rsc, id, NULL, pe_find_renamed | pe_find_current); if (match != NULL) { return match; } } crm_trace("No match for %s", id); return NULL; } node_t * pe_find_node_id(GListPtr nodes, const char *id) { GListPtr gIter = nodes; for (; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; if (node && safe_str_eq(node->details->id, id)) { return node; } } /* error */ return NULL; } node_t * pe_find_node(GListPtr nodes, const char *uname) { GListPtr gIter = nodes; for (; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; if (node && safe_str_eq(node->details->uname, uname)) { return node; } } /* error */ return NULL; } diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c index 24061fc301..d278248fda 100644 --- a/lib/pengine/unpack.c +++ b/lib/pengine/unpack.c @@ -1,2420 +1,2420 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include CRM_TRACE_INIT_DATA(pe_status); #define set_config_flag(data_set, option, flag) do { \ const char *tmp = pe_pref(data_set->config_hash, option); \ if(tmp) { \ if(crm_is_true(tmp)) { \ - set_bit_inplace(data_set->flags, flag); \ + set_bit(data_set->flags, flag); \ } else { \ - clear_bit_inplace(data_set->flags, flag); \ + clear_bit(data_set->flags, flag); \ } \ } \ } while(0) gboolean unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op, GListPtr next, enum action_fail_response *failed, pe_working_set_t * data_set); static void pe_fence_node(pe_working_set_t * data_set, node_t * node, const char *reason) { CRM_CHECK(node, return); if (node->details->unclean == FALSE) { if (is_set(data_set->flags, pe_flag_stonith_enabled)) { crm_warn("Node %s will be fenced %s", node->details->uname, reason); } else { crm_warn("Node %s is unclean %s", node->details->uname, reason); } } node->details->unclean = TRUE; } gboolean unpack_config(xmlNode * config, pe_working_set_t * data_set) { const char *value = NULL; GHashTable *config_hash = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); data_set->config_hash = config_hash; unpack_instance_attributes(data_set->input, config, XML_CIB_TAG_PROPSET, NULL, config_hash, CIB_OPTIONS_FIRST, FALSE, data_set->now); verify_pe_options(data_set->config_hash); set_config_flag(data_set, "enable-startup-probes", pe_flag_startup_probes); crm_info("Startup probes: %s", is_set(data_set->flags, pe_flag_startup_probes) ? "enabled" : "disabled (dangerous)"); value = pe_pref(data_set->config_hash, "stonith-timeout"); data_set->stonith_timeout = crm_get_msec(value); crm_debug("STONITH timeout: %d", data_set->stonith_timeout); set_config_flag(data_set, "stonith-enabled", pe_flag_stonith_enabled); crm_debug("STONITH of failed nodes is %s", is_set(data_set->flags, pe_flag_stonith_enabled) ? "enabled" : "disabled"); data_set->stonith_action = pe_pref(data_set->config_hash, "stonith-action"); crm_trace("STONITH will %s nodes", data_set->stonith_action); set_config_flag(data_set, "stop-all-resources", pe_flag_stop_everything); crm_debug("Stop all active resources: %s", is_set(data_set->flags, pe_flag_stop_everything) ? "true" : "false"); set_config_flag(data_set, "symmetric-cluster", pe_flag_symmetric_cluster); if (is_set(data_set->flags, pe_flag_symmetric_cluster)) { crm_debug("Cluster is symmetric" " - resources can run anywhere by default"); } value = pe_pref(data_set->config_hash, "default-resource-stickiness"); data_set->default_resource_stickiness = char2score(value); crm_debug("Default stickiness: %d", data_set->default_resource_stickiness); value = pe_pref(data_set->config_hash, "no-quorum-policy"); if (safe_str_eq(value, "ignore")) { data_set->no_quorum_policy = no_quorum_ignore; } else if (safe_str_eq(value, "freeze")) { data_set->no_quorum_policy = no_quorum_freeze; } else if (safe_str_eq(value, "suicide")) { gboolean do_panic = FALSE; crm_element_value_int(data_set->input, XML_ATTR_QUORUM_PANIC, &do_panic); if (is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE) { crm_config_err ("Setting no-quorum-policy=suicide makes no sense if stonith-enabled=false"); } if (do_panic && is_set(data_set->flags, pe_flag_stonith_enabled)) { data_set->no_quorum_policy = no_quorum_suicide; } else if (is_set(data_set->flags, pe_flag_have_quorum) == FALSE && do_panic == FALSE) { crm_notice("Resetting no-quorum-policy to 'stop': The cluster has never had quorum"); data_set->no_quorum_policy = no_quorum_stop; } } else { data_set->no_quorum_policy = no_quorum_stop; } switch (data_set->no_quorum_policy) { case no_quorum_freeze: crm_debug("On loss of CCM Quorum: Freeze resources"); break; case no_quorum_stop: crm_debug("On loss of CCM Quorum: Stop ALL resources"); break; case no_quorum_suicide: crm_notice("On loss of CCM Quorum: Fence all remaining nodes"); break; case no_quorum_ignore: crm_notice("On loss of CCM Quorum: Ignore"); break; } set_config_flag(data_set, "stop-orphan-resources", pe_flag_stop_rsc_orphans); crm_trace("Orphan resources are %s", is_set(data_set->flags, pe_flag_stop_rsc_orphans) ? "stopped" : "ignored"); set_config_flag(data_set, "stop-orphan-actions", pe_flag_stop_action_orphans); crm_trace("Orphan resource actions are %s", is_set(data_set->flags, pe_flag_stop_action_orphans) ? "stopped" : "ignored"); set_config_flag(data_set, "remove-after-stop", pe_flag_remove_after_stop); crm_trace("Stopped resources are removed from the status section: %s", is_set(data_set->flags, pe_flag_remove_after_stop) ? "true" : "false"); set_config_flag(data_set, "maintenance-mode", pe_flag_maintenance_mode); crm_trace("Maintenance mode: %s", is_set(data_set->flags, pe_flag_maintenance_mode) ? "true" : "false"); if (is_set(data_set->flags, pe_flag_maintenance_mode)) { clear_bit(data_set->flags, pe_flag_is_managed_default); } else { set_config_flag(data_set, "is-managed-default", pe_flag_is_managed_default); } crm_trace("By default resources are %smanaged", is_set(data_set->flags, pe_flag_is_managed_default) ? "" : "not "); set_config_flag(data_set, "start-failure-is-fatal", pe_flag_start_failure_fatal); crm_trace("Start failures are %s", is_set(data_set->flags, pe_flag_start_failure_fatal) ? "always fatal" : "handled by failcount"); node_score_red = char2score(pe_pref(data_set->config_hash, "node-health-red")); node_score_green = char2score(pe_pref(data_set->config_hash, "node-health-green")); node_score_yellow = char2score(pe_pref(data_set->config_hash, "node-health-yellow")); crm_info("Node scores: 'red' = %s, 'yellow' = %s, 'green' = %s", pe_pref(data_set->config_hash, "node-health-red"), pe_pref(data_set->config_hash, "node-health-yellow"), pe_pref(data_set->config_hash, "node-health-green")); data_set->placement_strategy = pe_pref(data_set->config_hash, "placement-strategy"); crm_trace("Placement strategy: %s", data_set->placement_strategy); return TRUE; } gboolean unpack_nodes(xmlNode * xml_nodes, pe_working_set_t * data_set) { xmlNode *xml_obj = NULL; node_t *new_node = NULL; const char *id = NULL; const char *uname = NULL; const char *type = NULL; const char *score = NULL; gboolean unseen_are_unclean = TRUE; const char *blind_faith = pe_pref(data_set->config_hash, "startup-fencing"); if (crm_is_true(blind_faith) == FALSE) { unseen_are_unclean = FALSE; crm_warn("Blind faith: not fencing unseen nodes"); } for (xml_obj = __xml_first_child(xml_nodes); xml_obj != NULL; xml_obj = __xml_next(xml_obj)) { if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_NODE, TRUE)) { new_node = NULL; id = crm_element_value(xml_obj, XML_ATTR_ID); uname = crm_element_value(xml_obj, XML_ATTR_UNAME); type = crm_element_value(xml_obj, XML_ATTR_TYPE); score = crm_element_value(xml_obj, XML_RULE_ATTR_SCORE); crm_trace("Processing node %s/%s", uname, id); if (id == NULL) { crm_config_err("Must specify id tag in "); continue; } if (type == NULL) { crm_config_err("Must specify type tag in "); continue; } if (pe_find_node(data_set->nodes, uname) != NULL) { crm_config_warn("Detected multiple node entries with uname=%s" " - this is rarely intended", uname); } new_node = calloc(1, sizeof(node_t)); if (new_node == NULL) { return FALSE; } new_node->weight = char2score(score); new_node->fixed = FALSE; new_node->details = calloc(1, sizeof(struct node_shared_s)); if (new_node->details == NULL) { free(new_node); return FALSE; } crm_trace("Creaing node for entry %s/%s", uname, id); new_node->details->id = id; new_node->details->uname = uname; new_node->details->type = node_ping; new_node->details->online = FALSE; new_node->details->shutdown = FALSE; new_node->details->running_rsc = NULL; new_node->details->attrs = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); new_node->details->utilization = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); /* if(data_set->have_quorum == FALSE */ /* && data_set->no_quorum_policy == no_quorum_stop) { */ /* /\* start shutting resources down *\/ */ /* new_node->weight = -INFINITY; */ /* } */ if (is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE || unseen_are_unclean == FALSE) { /* blind faith... */ new_node->details->unclean = FALSE; } else { /* all nodes are unclean until we've seen their * status entry */ new_node->details->unclean = TRUE; } if (type == NULL || safe_str_eq(type, "member") || safe_str_eq(type, NORMALNODE)) { new_node->details->type = node_member; } add_node_attrs(xml_obj, new_node, FALSE, data_set); unpack_instance_attributes(data_set->input, xml_obj, XML_TAG_UTILIZATION, NULL, new_node->details->utilization, NULL, FALSE, data_set->now); data_set->nodes = g_list_append(data_set->nodes, new_node); crm_trace("Done with node %s", crm_element_value(xml_obj, XML_ATTR_UNAME)); } } return TRUE; } static void g_hash_destroy_node_list(gpointer data) { GListPtr domain = data; g_list_free_full(domain, free); } gboolean unpack_domains(xmlNode * xml_domains, pe_working_set_t * data_set) { const char *id = NULL; GListPtr domain = NULL; xmlNode *xml_node = NULL; xmlNode *xml_domain = NULL; crm_info("Unpacking domains"); data_set->domains = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_node_list); for (xml_domain = __xml_first_child(xml_domains); xml_domain != NULL; xml_domain = __xml_next(xml_domain)) { if (crm_str_eq((const char *)xml_domain->name, XML_CIB_TAG_DOMAIN, TRUE)) { domain = NULL; id = crm_element_value(xml_domain, XML_ATTR_ID); for (xml_node = __xml_first_child(xml_domain); xml_node != NULL; xml_node = __xml_next(xml_node)) { if (crm_str_eq((const char *)xml_node->name, XML_CIB_TAG_NODE, TRUE)) { node_t *copy = NULL; node_t *node = NULL; const char *uname = crm_element_value(xml_node, "name"); const char *score = crm_element_value(xml_node, XML_RULE_ATTR_SCORE); if (uname == NULL) { crm_config_err("Invalid domain %s: Must specify id tag in ", id); continue; } node = pe_find_node(data_set->nodes, uname); if (node == NULL) { node = pe_find_node_id(data_set->nodes, uname); } if (node == NULL) { crm_config_warn("Invalid domain %s: Node %s does not exist", id, uname); continue; } copy = node_copy(node); copy->weight = char2score(score); crm_debug("Adding %s to domain %s with score %s", node->details->uname, id, score); domain = g_list_prepend(domain, copy); } } if (domain) { crm_debug("Created domain %s with %d members", id, g_list_length(domain)); g_hash_table_replace(data_set->domains, strdup(id), domain); } } } return TRUE; } static void destroy_template_rsc_set(gpointer data) { xmlNode *rsc_set = data; free_xml(rsc_set); } gboolean unpack_resources(xmlNode * xml_resources, pe_working_set_t * data_set) { xmlNode *xml_obj = NULL; data_set->template_rsc_sets = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, destroy_template_rsc_set); for (xml_obj = __xml_first_child(xml_resources); xml_obj != NULL; xml_obj = __xml_next(xml_obj)) { resource_t *new_rsc = NULL; if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_RSC_TEMPLATE, TRUE)) { const char *template_id = ID(xml_obj); if (template_id && g_hash_table_lookup_extended(data_set->template_rsc_sets, template_id, NULL, NULL) == FALSE) { /* Record the template's ID for the knowledge of its existence anyway. */ g_hash_table_insert(data_set->template_rsc_sets, strdup(template_id), NULL); } continue; } crm_trace("Beginning unpack... <%s id=%s... >", crm_element_name(xml_obj), ID(xml_obj)); if (common_unpack(xml_obj, &new_rsc, NULL, data_set)) { data_set->resources = g_list_append(data_set->resources, new_rsc); print_resource(LOG_DEBUG_3, "Added", new_rsc, FALSE); } else { crm_config_err("Failed unpacking %s %s", crm_element_name(xml_obj), crm_element_value(xml_obj, XML_ATTR_ID)); if (new_rsc != NULL && new_rsc->fns != NULL) { new_rsc->fns->free(new_rsc); } } } data_set->resources = g_list_sort(data_set->resources, sort_rsc_priority); if (is_set(data_set->flags, pe_flag_stonith_enabled) && is_set(data_set->flags, pe_flag_have_stonith_resource) == FALSE) { crm_config_err("Resource start-up disabled since no STONITH resources have been defined"); crm_config_err("Either configure some or disable STONITH with the stonith-enabled option"); crm_config_err("NOTE: Clusters with shared data need STONITH to ensure data integrity"); } return TRUE; } /* The ticket state section: * "/cib/status/tickets/ticket_state" */ static gboolean unpack_ticket_state(xmlNode * xml_ticket, pe_working_set_t * data_set) { const char *ticket_id = NULL; const char *granted = NULL; const char *last_granted = NULL; const char *standby = NULL; xmlAttrPtr xIter = NULL; ticket_t *ticket = NULL; ticket_id = ID(xml_ticket); if (ticket_id == NULL || strlen(ticket_id) == 0) { return FALSE; } crm_trace("Processing ticket state for %s", ticket_id); ticket = g_hash_table_lookup(data_set->tickets, ticket_id); if (ticket == NULL) { ticket = ticket_new(ticket_id, data_set); if (ticket == NULL) { return FALSE; } } for (xIter = xml_ticket->properties; xIter; xIter = xIter->next) { const char *prop_name = (const char *)xIter->name; const char *prop_value = crm_element_value(xml_ticket, prop_name); if(crm_str_eq(prop_name, XML_ATTR_ID, TRUE)) { continue; } g_hash_table_replace(ticket->state, strdup(prop_name), strdup(prop_value)); } granted = g_hash_table_lookup(ticket->state, "granted"); if (granted && crm_is_true(granted)) { ticket->granted = TRUE; crm_info("We have ticket '%s'", ticket->id); } else { ticket->granted = FALSE; crm_info("We do not have ticket '%s'", ticket->id); } last_granted = g_hash_table_lookup(ticket->state, "last-granted"); if (last_granted) { ticket->last_granted = crm_parse_int(last_granted, 0); } standby = g_hash_table_lookup(ticket->state, "standby"); if (standby && crm_is_true(standby)) { ticket->standby = TRUE; if (ticket->granted) { crm_info("Granted ticket '%s' is in standby-mode", ticket->id); } } else { ticket->standby = FALSE; } crm_trace("Done with ticket state for %s", ticket_id); return TRUE; } static gboolean unpack_tickets_state(xmlNode * xml_tickets, pe_working_set_t * data_set) { xmlNode *xml_obj = NULL; for (xml_obj = __xml_first_child(xml_tickets); xml_obj != NULL; xml_obj = __xml_next(xml_obj)) { if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_TICKET_STATE, TRUE) == FALSE) { continue; } unpack_ticket_state(xml_obj, data_set); } return TRUE; } /* Compatibility with the deprecated ticket state section: * "/cib/status/tickets/instance_attributes" */ static void get_ticket_state_legacy(gpointer key, gpointer value, gpointer user_data) { const char *long_key = key; char *state_key = NULL; const char *granted_prefix = "granted-ticket-"; const char *last_granted_prefix = "last-granted-"; static int granted_prefix_strlen = 0; static int last_granted_prefix_strlen = 0; const char *ticket_id = NULL; const char *is_granted = NULL; const char *last_granted = NULL; const char *sep = NULL; ticket_t *ticket = NULL; pe_working_set_t *data_set = user_data; if (granted_prefix_strlen == 0) { granted_prefix_strlen = strlen(granted_prefix); } if (last_granted_prefix_strlen == 0) { last_granted_prefix_strlen = strlen(last_granted_prefix); } if (strstr(long_key, granted_prefix) == long_key) { ticket_id = long_key + granted_prefix_strlen; if (strlen(ticket_id)) { state_key = strdup("granted"); is_granted = value; } } else if (strstr(long_key, last_granted_prefix) == long_key) { ticket_id = long_key + last_granted_prefix_strlen; if (strlen(ticket_id)) { state_key = strdup("last-granted"); last_granted = value; } } else if ((sep = strrchr(long_key, '-'))) { ticket_id = sep + 1; state_key = strndup(long_key, strlen(long_key) - strlen(sep)); } if (ticket_id == NULL || strlen(ticket_id) == 0) { free(state_key); return; } if (state_key == NULL || strlen(state_key) == 0) { free(state_key); return; } ticket = g_hash_table_lookup(data_set->tickets, ticket_id); if (ticket == NULL) { ticket = ticket_new(ticket_id, data_set); if (ticket == NULL) { free(state_key); return; } } g_hash_table_replace(ticket->state, state_key, strdup(value)); if (is_granted) { if (crm_is_true(is_granted)) { ticket->granted = TRUE; crm_info("We have ticket '%s'", ticket->id); } else { ticket->granted = FALSE; crm_info("We do not have ticket '%s'", ticket->id); } } else if (last_granted) { ticket->last_granted = crm_parse_int(last_granted, 0); } } /* remove nodes that are down, stopping */ /* create +ve rsc_to_node constraints between resources and the nodes they are running on */ /* anything else? */ gboolean unpack_status(xmlNode * status, pe_working_set_t * data_set) { const char *id = NULL; const char *uname = NULL; xmlNode *lrm_rsc = NULL; xmlNode *attrs = NULL; xmlNode *state = NULL; xmlNode *node_state = NULL; node_t *this_node = NULL; crm_trace("Beginning unpack"); if (data_set->tickets == NULL) { data_set->tickets = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, destroy_ticket); } for (state = __xml_first_child(status); state != NULL; state = __xml_next(state)) { if (crm_str_eq((const char *)state->name, XML_CIB_TAG_TICKETS, TRUE)) { xmlNode *xml_tickets = state; GHashTable *state_hash = NULL; /* Compatibility with the deprecated ticket state section: * Unpack the attributes in the deprecated "/cib/status/tickets/instance_attributes" if it exists. */ state_hash = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); unpack_instance_attributes(data_set->input, xml_tickets, XML_TAG_ATTR_SETS, NULL, state_hash, NULL, TRUE, data_set->now); g_hash_table_foreach(state_hash, get_ticket_state_legacy, data_set); if (state_hash) { g_hash_table_destroy(state_hash); } /* Unpack the new "/cib/status/tickets/ticket_state"s */ unpack_tickets_state(xml_tickets, data_set); } if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE)) { node_state = state; id = crm_element_value(node_state, XML_ATTR_ID); uname = crm_element_value(node_state, XML_ATTR_UNAME); attrs = find_xml_node(node_state, XML_TAG_TRANSIENT_NODEATTRS, FALSE); crm_trace("Processing node id=%s, uname=%s", id, uname); this_node = pe_find_node_id(data_set->nodes, id); if (uname == NULL) { /* error */ continue; } else if (this_node == NULL) { crm_config_warn("Node %s in status section no longer exists", uname); continue; } /* Mark the node as provisionally clean * - at least we have seen it in the current cluster's lifetime */ this_node->details->unclean = FALSE; add_node_attrs(attrs, this_node, TRUE, data_set); if (crm_is_true(g_hash_table_lookup(this_node->details->attrs, "standby"))) { crm_info("Node %s is in standby-mode", this_node->details->uname); this_node->details->standby = TRUE; } crm_trace("determining node state"); determine_online_status(node_state, this_node, data_set); if (this_node->details->online && data_set->no_quorum_policy == no_quorum_suicide) { /* Everything else should flow from this automatically * At least until the PE becomes able to migrate off healthy resources */ pe_fence_node(data_set, this_node, "because the cluster does not have quorum"); } } } /* Now that we know all node states, we can safely handle migration ops * But, for now, only process healthy nodes * - this is necessary for the logic in bug lf#2508 to function correctly */ for (node_state = __xml_first_child(status); node_state != NULL; node_state = __xml_next(node_state)) { if (crm_str_eq((const char *)node_state->name, XML_CIB_TAG_STATE, TRUE) == FALSE) { continue; } id = crm_element_value(node_state, XML_ATTR_ID); this_node = pe_find_node_id(data_set->nodes, id); if (this_node == NULL) { crm_info("Node %s is unknown", id); continue; } else if (this_node->details->online) { crm_trace("Processing lrm resource entries on healthy node: %s", this_node->details->uname); lrm_rsc = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE); lrm_rsc = find_xml_node(lrm_rsc, XML_LRM_TAG_RESOURCES, FALSE); unpack_lrm_resources(this_node, lrm_rsc, data_set); } } /* Now handle failed nodes - but only if stonith is enabled * * By definition, offline nodes run no resources so there is nothing to do. * Only when stonith is enabled do we need to know what is on the node to * ensure rsc start events happen after the stonith */ for (node_state = __xml_first_child(status); node_state != NULL && is_set(data_set->flags, pe_flag_stonith_enabled); node_state = __xml_next(node_state)) { if (crm_str_eq((const char *)node_state->name, XML_CIB_TAG_STATE, TRUE) == FALSE) { continue; } id = crm_element_value(node_state, XML_ATTR_ID); this_node = pe_find_node_id(data_set->nodes, id); if (this_node == NULL || this_node->details->online) { continue; } else { crm_trace("Processing lrm resource entries on unhealthy node: %s", this_node->details->uname); lrm_rsc = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE); lrm_rsc = find_xml_node(lrm_rsc, XML_LRM_TAG_RESOURCES, FALSE); unpack_lrm_resources(this_node, lrm_rsc, data_set); } } return TRUE; } static gboolean determine_online_status_no_fencing(pe_working_set_t * data_set, xmlNode * node_state, node_t * this_node) { gboolean online = FALSE; const char *join_state = crm_element_value(node_state, XML_CIB_ATTR_JOINSTATE); const char *crm_state = crm_element_value(node_state, XML_CIB_ATTR_CRMDSTATE); const char *ccm_state = crm_element_value(node_state, XML_CIB_ATTR_INCCM); const char *ha_state = crm_element_value(node_state, XML_CIB_ATTR_HASTATE); const char *exp_state = crm_element_value(node_state, XML_CIB_ATTR_EXPSTATE); if (ha_state == NULL) { ha_state = DEADSTATUS; } if (!crm_is_true(ccm_state) || safe_str_eq(ha_state, DEADSTATUS)) { crm_trace("Node is down: ha_state=%s, ccm_state=%s", crm_str(ha_state), crm_str(ccm_state)); } else if (safe_str_eq(crm_state, ONLINESTATUS)) { if (safe_str_eq(join_state, CRMD_JOINSTATE_MEMBER)) { online = TRUE; } else { crm_debug("Node is not ready to run resources: %s", join_state); } } else if (this_node->details->expected_up == FALSE) { crm_trace("CRMd is down: ha_state=%s, ccm_state=%s", crm_str(ha_state), crm_str(ccm_state)); crm_trace("\tcrm_state=%s, join_state=%s, expected=%s", crm_str(crm_state), crm_str(join_state), crm_str(exp_state)); } else { /* mark it unclean */ pe_fence_node(data_set, this_node, "because it is partially and/or un-expectedly down"); crm_info("\tha_state=%s, ccm_state=%s," " crm_state=%s, join_state=%s, expected=%s", crm_str(ha_state), crm_str(ccm_state), crm_str(crm_state), crm_str(join_state), crm_str(exp_state)); } return online; } static gboolean determine_online_status_fencing(pe_working_set_t * data_set, xmlNode * node_state, node_t * this_node) { gboolean online = FALSE; gboolean do_terminate = FALSE; const char *join_state = crm_element_value(node_state, XML_CIB_ATTR_JOINSTATE); const char *crm_state = crm_element_value(node_state, XML_CIB_ATTR_CRMDSTATE); const char *ccm_state = crm_element_value(node_state, XML_CIB_ATTR_INCCM); const char *ha_state = crm_element_value(node_state, XML_CIB_ATTR_HASTATE); const char *exp_state = crm_element_value(node_state, XML_CIB_ATTR_EXPSTATE); const char *terminate = g_hash_table_lookup(this_node->details->attrs, "terminate"); if (ha_state == NULL) { ha_state = DEADSTATUS; } if (crm_is_true(terminate)) { do_terminate = TRUE; } else if (terminate != NULL && strlen(terminate) > 0) { /* could be a time() value */ char t = terminate[0]; if (t != '0' && isdigit(t)) { do_terminate = TRUE; } } if (crm_is_true(ccm_state) && safe_str_eq(ha_state, ACTIVESTATUS) && safe_str_eq(crm_state, ONLINESTATUS)) { if (safe_str_eq(join_state, CRMD_JOINSTATE_MEMBER)) { online = TRUE; if (do_terminate) { pe_fence_node(data_set, this_node, "because termination was requested"); } } else if (join_state == exp_state /* == NULL */ ) { crm_info("Node %s is coming up", this_node->details->uname); crm_debug("\tha_state=%s, ccm_state=%s," " crm_state=%s, join_state=%s, expected=%s", crm_str(ha_state), crm_str(ccm_state), crm_str(crm_state), crm_str(join_state), crm_str(exp_state)); } else if (safe_str_eq(join_state, CRMD_JOINSTATE_PENDING)) { crm_info("Node %s is not ready to run resources", this_node->details->uname); this_node->details->standby = TRUE; this_node->details->pending = TRUE; online = TRUE; } else if (safe_str_eq(join_state, CRMD_JOINSTATE_NACK)) { crm_warn("Node %s is not part of the cluster", this_node->details->uname); this_node->details->standby = TRUE; this_node->details->pending = TRUE; online = TRUE; } else if (safe_str_eq(join_state, exp_state)) { crm_info("Node %s is still coming up: %s", this_node->details->uname, join_state); crm_info("\tha_state=%s, ccm_state=%s, crm_state=%s", crm_str(ha_state), crm_str(ccm_state), crm_str(crm_state)); this_node->details->standby = TRUE; this_node->details->pending = TRUE; online = TRUE; } else { crm_warn("Node %s (%s) is un-expectedly down", this_node->details->uname, this_node->details->id); crm_info("\tha_state=%s, ccm_state=%s," " crm_state=%s, join_state=%s, expected=%s", crm_str(ha_state), crm_str(ccm_state), crm_str(crm_state), crm_str(join_state), crm_str(exp_state)); pe_fence_node(data_set, this_node, "because it is un-expectedly down"); } } else if (crm_is_true(ccm_state) == FALSE && safe_str_eq(ha_state, DEADSTATUS) && safe_str_eq(crm_state, OFFLINESTATUS) && this_node->details->expected_up == FALSE) { crm_debug("Node %s is down: join_state=%s, expected=%s", this_node->details->uname, crm_str(join_state), crm_str(exp_state)); #if 0 /* While a nice optimization, it causes the cluster to block until the node * comes back online. Which is a serious problem if the cluster software * is not configured to start at boot or stonith is configured to merely * stop the node instead of restart it. * Easily triggered by setting terminate=true for the DC */ } else if (do_terminate) { crm_info("Node %s is %s after forced termination", this_node->details->uname, crm_is_true(ccm_state) ? "coming up" : "going down"); crm_debug("\tha_state=%s, ccm_state=%s," " crm_state=%s, join_state=%s, expected=%s", crm_str(ha_state), crm_str(ccm_state), crm_str(crm_state), crm_str(join_state), crm_str(exp_state)); if (crm_is_true(ccm_state) == FALSE) { this_node->details->standby = TRUE; this_node->details->pending = TRUE; online = TRUE; } #endif } else if (this_node->details->expected_up) { /* mark it unclean */ pe_fence_node(data_set, this_node, "because it is un-expectedly down"); crm_info("\tha_state=%s, ccm_state=%s," " crm_state=%s, join_state=%s, expected=%s", crm_str(ha_state), crm_str(ccm_state), crm_str(crm_state), crm_str(join_state), crm_str(exp_state)); } else { crm_info("Node %s is down", this_node->details->uname); crm_debug("\tha_state=%s, ccm_state=%s," " crm_state=%s, join_state=%s, expected=%s", crm_str(ha_state), crm_str(ccm_state), crm_str(crm_state), crm_str(join_state), crm_str(exp_state)); } return online; } gboolean determine_online_status(xmlNode * node_state, node_t * this_node, pe_working_set_t * data_set) { gboolean online = FALSE; const char *shutdown = NULL; const char *exp_state = crm_element_value(node_state, XML_CIB_ATTR_EXPSTATE); if (this_node == NULL) { crm_config_err("No node to check"); return online; } this_node->details->shutdown = FALSE; this_node->details->expected_up = FALSE; shutdown = g_hash_table_lookup(this_node->details->attrs, XML_CIB_ATTR_SHUTDOWN); if (shutdown != NULL && safe_str_neq("0", shutdown)) { this_node->details->shutdown = TRUE; } else if (safe_str_eq(exp_state, CRMD_JOINSTATE_MEMBER)) { this_node->details->expected_up = TRUE; } if (is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE) { online = determine_online_status_no_fencing(data_set, node_state, this_node); } else { online = determine_online_status_fencing(data_set, node_state, this_node); } if (online) { this_node->details->online = TRUE; } else { /* remove node from contention */ this_node->fixed = TRUE; this_node->weight = -INFINITY; } if (online && this_node->details->shutdown) { /* dont run resources here */ this_node->fixed = TRUE; this_node->weight = -INFINITY; } if (this_node->details->unclean) { pe_proc_warn("Node %s is unclean", this_node->details->uname); } else if (this_node->details->online) { crm_info("Node %s is %s", this_node->details->uname, this_node->details->shutdown ? "shutting down" : this_node->details->pending ? "pending" : this_node->details->standby ? "standby" : "online"); } else { crm_trace("Node %s is offline", this_node->details->uname); } return online; } #define set_char(x) last_rsc_id[lpc] = x; complete = TRUE; char * clone_zero(const char *last_rsc_id) { int lpc = 0; char *zero = NULL; CRM_CHECK(last_rsc_id != NULL, return NULL); if (last_rsc_id != NULL) { lpc = strlen(last_rsc_id); } while (--lpc > 0) { switch (last_rsc_id[lpc]) { case 0: return NULL; break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': break; case ':': zero = calloc(1, lpc + 3); memcpy(zero, last_rsc_id, lpc); zero[lpc] = ':'; zero[lpc + 1] = '0'; zero[lpc + 2] = 0; return zero; } } return NULL; } char * increment_clone(char *last_rsc_id) { int lpc = 0; int len = 0; char *tmp = NULL; gboolean complete = FALSE; CRM_CHECK(last_rsc_id != NULL, return NULL); if (last_rsc_id != NULL) { len = strlen(last_rsc_id); } lpc = len - 1; while (complete == FALSE && lpc > 0) { switch (last_rsc_id[lpc]) { case 0: lpc--; break; case '0': set_char('1'); break; case '1': set_char('2'); break; case '2': set_char('3'); break; case '3': set_char('4'); break; case '4': set_char('5'); break; case '5': set_char('6'); break; case '6': set_char('7'); break; case '7': set_char('8'); break; case '8': set_char('9'); break; case '9': last_rsc_id[lpc] = '0'; lpc--; break; case ':': tmp = last_rsc_id; last_rsc_id = calloc(1, len + 2); memcpy(last_rsc_id, tmp, len); last_rsc_id[++lpc] = '1'; last_rsc_id[len] = '0'; last_rsc_id[len + 1] = 0; complete = TRUE; free(tmp); break; default: crm_err("Unexpected char: %c (%d)", last_rsc_id[lpc], lpc); return NULL; break; } } return last_rsc_id; } static int get_clone(char *last_rsc_id) { int clone = 0; int lpc = 0; int len = 0; CRM_CHECK(last_rsc_id != NULL, return -1); if (last_rsc_id != NULL) { len = strlen(last_rsc_id); } lpc = len - 1; while (lpc > 0) { switch (last_rsc_id[lpc]) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': clone += (int)(last_rsc_id[lpc] - '0') * (len - lpc); lpc--; break; case ':': return clone; break; default: crm_err("Unexpected char: %d (%c)", lpc, last_rsc_id[lpc]); return clone; break; } } return -1; } static resource_t * create_fake_resource(const char *rsc_id, xmlNode * rsc_entry, pe_working_set_t * data_set) { resource_t *rsc = NULL; xmlNode *xml_rsc = create_xml_node(NULL, XML_CIB_TAG_RESOURCE); copy_in_properties(xml_rsc, rsc_entry); crm_xml_add(xml_rsc, XML_ATTR_ID, rsc_id); crm_log_xml_debug(xml_rsc, "Orphan resource"); if (!common_unpack(xml_rsc, &rsc, NULL, data_set)) { return NULL; } set_bit(rsc->flags, pe_rsc_orphan); data_set->resources = g_list_append(data_set->resources, rsc); return rsc; } extern resource_t *create_child_clone(resource_t * rsc, int sub_id, pe_working_set_t * data_set); static resource_t * find_clone(pe_working_set_t * data_set, node_t * node, resource_t * parent, const char *rsc_id) { int len = 0; resource_t *rsc = NULL; char *base = clone_zero(rsc_id); char *alt_rsc_id = NULL; CRM_ASSERT(parent != NULL); CRM_ASSERT(parent->variant == pe_clone || parent->variant == pe_master); if (base) { len = strlen(base); } if (len > 0) { base[len - 1] = 0; } crm_trace("Looking for %s on %s in %s %d", rsc_id, node->details->uname, parent->id, is_set(parent->flags, pe_rsc_unique)); if (is_set(parent->flags, pe_rsc_unique)) { crm_trace("Looking for %s", rsc_id); rsc = parent->fns->find_rsc(parent, rsc_id, NULL, pe_find_current); } else { crm_trace("Looking for %s on %s", base, node->details->uname); rsc = parent->fns->find_rsc(parent, base, node, pe_find_partial | pe_find_current); if (rsc != NULL && rsc->running_on) { GListPtr gIter = parent->children; rsc = NULL; crm_trace("Looking for an existing orphan for %s: %s on %s", parent->id, rsc_id, node->details->uname); /* There is already an instance of this _anonymous_ clone active on "node". * * If there is a partially active orphan (only applies to clone groups) on * the same node, use that. * Otherwise create a new (orphaned) instance at "orphan_check:". */ for (; gIter != NULL; gIter = gIter->next) { resource_t *child = (resource_t *) gIter->data; node_t *loc = child->fns->location(child, NULL, TRUE); if (loc && loc->details == node->details) { resource_t *tmp = child->fns->find_rsc(child, base, NULL, pe_find_partial | pe_find_current); if (tmp && tmp->running_on == NULL) { rsc = tmp; break; } } } goto orphan_check; } else if (((resource_t *) parent->children->data)->variant == pe_group) { /* If we're grouped, we need to look for a peer thats active on $node * and use their clone instance number */ resource_t *peer = parent->fns->find_rsc(parent, NULL, node, pe_find_clone | pe_find_current); if (peer && peer->running_on) { char buffer[256]; int clone_num = get_clone(peer->id); snprintf(buffer, 256, "%s%d", base, clone_num); rsc = parent->fns->find_rsc(parent, buffer, node, pe_find_current | pe_find_inactive); if (rsc) { crm_trace("Found someone active: %s on %s, becoming %s", peer->id, ((node_t *) peer->running_on->data)->details->uname, buffer); } } } if (parent->fns->find_rsc(parent, rsc_id, NULL, pe_find_current)) { alt_rsc_id = strdup(rsc_id); } else { alt_rsc_id = clone_zero(rsc_id); } while (rsc == NULL) { rsc = parent->fns->find_rsc(parent, alt_rsc_id, NULL, pe_find_current); if (rsc == NULL) { crm_trace("Unknown resource: %s", alt_rsc_id); break; } if (rsc->running_on == NULL) { crm_trace("Resource %s: just right", alt_rsc_id); break; } crm_trace("Resource %s: already active", alt_rsc_id); alt_rsc_id = increment_clone(alt_rsc_id); rsc = NULL; } } orphan_check: if (rsc == NULL) { /* Create an extra orphan */ resource_t *top = create_child_clone(parent, -1, data_set); crm_debug("Created orphan for %s: %s on %s", parent->id, rsc_id, node->details->uname); rsc = top->fns->find_rsc(top, base, NULL, pe_find_current | pe_find_partial); CRM_ASSERT(rsc != NULL); } free(rsc->clone_name); rsc->clone_name = NULL; if (safe_str_neq(rsc_id, rsc->id)) { crm_info("Internally renamed %s on %s to %s%s", rsc_id, node->details->uname, rsc->id, is_set(rsc->flags, pe_rsc_orphan) ? " (ORPHAN)" : ""); rsc->clone_name = strdup(rsc_id); } free(alt_rsc_id); free(base); return rsc; } static resource_t * unpack_find_resource(pe_working_set_t * data_set, node_t * node, const char *rsc_id, xmlNode * rsc_entry) { resource_t *rsc = NULL; resource_t *clone_parent = NULL; char *alt_rsc_id = strdup(rsc_id); crm_trace("looking for %s", rsc_id); rsc = pe_find_resource(data_set->resources, alt_rsc_id); /* no match */ if (rsc == NULL) { /* Even when clone-max=0, we still create a single :0 orphan to match against */ char *tmp = clone_zero(alt_rsc_id); resource_t *clone0 = pe_find_resource(data_set->resources, tmp); clone_parent = uber_parent(clone0); free(tmp); crm_trace("%s not found: %s", alt_rsc_id, clone_parent ? clone_parent->id : "orphan"); } else { clone_parent = uber_parent(rsc); } if (clone_parent && clone_parent->variant > pe_group) { rsc = find_clone(data_set, node, clone_parent, rsc_id); CRM_ASSERT(rsc != NULL); } free(alt_rsc_id); return rsc; } static resource_t * process_orphan_resource(xmlNode * rsc_entry, node_t * node, pe_working_set_t * data_set) { resource_t *rsc = NULL; const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID); crm_debug("Detected orphan resource %s on %s", rsc_id, node->details->uname); rsc = create_fake_resource(rsc_id, rsc_entry, data_set); if (is_set(data_set->flags, pe_flag_stop_rsc_orphans) == FALSE) { clear_bit(rsc->flags, pe_rsc_managed); } else { GListPtr gIter = NULL; print_resource(LOG_DEBUG_3, "Added orphan", rsc, FALSE); CRM_CHECK(rsc != NULL, return NULL); resource_location(rsc, NULL, -INFINITY, "__orphan_dont_run__", data_set); for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; if (node->details->online && get_failcount(node, rsc, NULL, data_set)) { action_t *clear_op = NULL; action_t *ready = get_pseudo_op(CRM_OP_PROBED, data_set); clear_op = custom_action(rsc, crm_concat(rsc->id, CRM_OP_CLEAR_FAILCOUNT, '_'), CRM_OP_CLEAR_FAILCOUNT, node, FALSE, TRUE, data_set); add_hash_param(clear_op->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE); crm_info("Clearing failcount (%d) for orphaned resource %s on %s (%s)", get_failcount(node, rsc, NULL, data_set), rsc->id, node->details->uname, clear_op->uuid); order_actions(clear_op, ready, pe_order_optional); } } } return rsc; } static void process_rsc_state(resource_t * rsc, node_t * node, enum action_fail_response on_fail, xmlNode * migrate_op, pe_working_set_t * data_set) { crm_trace("Resource %s is %s on %s: on_fail=%s", rsc->id, role2text(rsc->role), node->details->uname, fail2text(on_fail)); /* process current state */ if (rsc->role != RSC_ROLE_UNKNOWN) { resource_t *iter = rsc; while (iter) { if (g_hash_table_lookup(iter->known_on, node->details->id) == NULL) { node_t *n = node_copy(node); g_hash_table_insert(iter->known_on, (gpointer) n->details->id, n); } if (is_set(iter->flags, pe_rsc_unique)) { break; } iter = iter->parent; } } if (node->details->unclean) { /* No extra processing needed * Also allows resources to be started again after a node is shot */ on_fail = action_fail_ignore; } switch (on_fail) { case action_fail_ignore: /* nothing to do */ break; case action_fail_fence: /* treat it as if it is still running * but also mark the node as unclean */ pe_fence_node(data_set, node, "to recover from resource failure(s)"); break; case action_fail_standby: node->details->standby = TRUE; node->details->standby_onfail = TRUE; break; case action_fail_block: /* is_managed == FALSE will prevent any * actions being sent for the resource */ clear_bit(rsc->flags, pe_rsc_managed); set_bit(rsc->flags, pe_rsc_block); break; case action_fail_migrate: /* make sure it comes up somewhere else * or not at all */ resource_location(rsc, node, -INFINITY, "__action_migration_auto__", data_set); break; case action_fail_stop: rsc->next_role = RSC_ROLE_STOPPED; break; case action_fail_recover: if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) { set_bit(rsc->flags, pe_rsc_failed); stop_action(rsc, node, FALSE); } break; } if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) { if (is_set(rsc->flags, pe_rsc_orphan)) { if (is_set(rsc->flags, pe_rsc_managed)) { crm_config_warn("Detected active orphan %s running on %s", rsc->id, node->details->uname); } else { crm_config_warn("Cluster configured not to stop active orphans." " %s must be stopped manually on %s", rsc->id, node->details->uname); } } native_add_running(rsc, node, data_set); if (on_fail != action_fail_ignore) { set_bit(rsc->flags, pe_rsc_failed); } } else if (rsc->clone_name) { crm_trace("Resetting clone_name %s for %s (stopped)", rsc->clone_name, rsc->id); free(rsc->clone_name); rsc->clone_name = NULL; } else { char *key = stop_key(rsc); GListPtr possible_matches = find_actions(rsc->actions, key, node); GListPtr gIter = possible_matches; for (; gIter != NULL; gIter = gIter->next) { action_t *stop = (action_t *) gIter->data; stop->flags |= pe_action_optional; } free(key); } } /* create active recurring operations as optional */ static void process_recurring(node_t * node, resource_t * rsc, int start_index, int stop_index, GListPtr sorted_op_list, pe_working_set_t * data_set) { int counter = -1; const char *task = NULL; const char *status = NULL; GListPtr gIter = sorted_op_list; crm_trace("%s: Start index %d, stop index = %d", rsc->id, start_index, stop_index); for (; gIter != NULL; gIter = gIter->next) { xmlNode *rsc_op = (xmlNode *) gIter->data; int interval = 0; char *key = NULL; const char *id = ID(rsc_op); const char *interval_s = NULL; counter++; if (node->details->online == FALSE) { crm_trace("Skipping %s/%s: node is offline", rsc->id, node->details->uname); break; /* Need to check if there's a monitor for role="Stopped" */ } else if (start_index < stop_index && counter <= stop_index) { crm_trace("Skipping %s/%s: resource is not active", id, node->details->uname); continue; } else if (counter < start_index) { crm_trace("Skipping %s/%s: old %d", id, node->details->uname, counter); continue; } interval_s = crm_element_value(rsc_op, XML_LRM_ATTR_INTERVAL); interval = crm_parse_int(interval_s, "0"); if (interval == 0) { crm_trace("Skipping %s/%s: non-recurring", id, node->details->uname); continue; } status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS); if (safe_str_eq(status, "-1")) { crm_trace("Skipping %s/%s: status", id, node->details->uname); continue; } task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); /* create the action */ key = generate_op_key(rsc->id, task, interval); crm_trace("Creating %s/%s", key, node->details->uname); custom_action(rsc, key, task, node, TRUE, TRUE, data_set); } } void calculate_active_ops(GListPtr sorted_op_list, int *start_index, int *stop_index) { int counter = -1; int implied_monitor_start = -1; int implied_master_start = -1; const char *task = NULL; const char *status = NULL; GListPtr gIter = sorted_op_list; *stop_index = -1; *start_index = -1; for (; gIter != NULL; gIter = gIter->next) { xmlNode *rsc_op = (xmlNode *) gIter->data; counter++; task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS); if (safe_str_eq(task, CRMD_ACTION_STOP) && safe_str_eq(status, "0")) { *stop_index = counter; } else if (safe_str_eq(task, CRMD_ACTION_START) || safe_str_eq(task, CRMD_ACTION_MIGRATED)) { *start_index = counter; } else if ((implied_monitor_start <= *stop_index) && safe_str_eq(task, CRMD_ACTION_STATUS)) { const char *rc = crm_element_value(rsc_op, XML_LRM_ATTR_RC); if (safe_str_eq(rc, "0") || safe_str_eq(rc, "8")) { implied_monitor_start = counter; } } else if (safe_str_eq(task, CRMD_ACTION_PROMOTE) || safe_str_eq(task, CRMD_ACTION_DEMOTE)) { implied_master_start = counter; } } if (*start_index == -1) { if (implied_master_start != -1) { *start_index = implied_master_start; } else if (implied_monitor_start != -1) { *start_index = implied_monitor_start; } } } static void unpack_lrm_rsc_state(node_t * node, xmlNode * rsc_entry, pe_working_set_t * data_set) { GListPtr gIter = NULL; int stop_index = -1; int start_index = -1; enum rsc_role_e req_role = RSC_ROLE_UNKNOWN; const char *task = NULL; const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID); resource_t *rsc = NULL; GListPtr op_list = NULL; GListPtr sorted_op_list = NULL; xmlNode *migrate_op = NULL; xmlNode *rsc_op = NULL; enum action_fail_response on_fail = FALSE; enum rsc_role_e saved_role = RSC_ROLE_UNKNOWN; crm_trace("[%s] Processing %s on %s", crm_element_name(rsc_entry), rsc_id, node->details->uname); /* extract operations */ op_list = NULL; sorted_op_list = NULL; for (rsc_op = __xml_first_child(rsc_entry); rsc_op != NULL; rsc_op = __xml_next(rsc_op)) { if (crm_str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, TRUE)) { op_list = g_list_prepend(op_list, rsc_op); } } if (op_list == NULL) { /* if there are no operations, there is nothing to do */ return; } /* find the resource */ rsc = unpack_find_resource(data_set, node, rsc_id, rsc_entry); if (rsc == NULL) { rsc = process_orphan_resource(rsc_entry, node, data_set); } CRM_ASSERT(rsc != NULL); /* process operations */ saved_role = rsc->role; on_fail = action_fail_ignore; rsc->role = RSC_ROLE_UNKNOWN; sorted_op_list = g_list_sort(op_list, sort_op_by_callid); for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) { xmlNode *rsc_op = (xmlNode *) gIter->data; task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); if (safe_str_eq(task, CRMD_ACTION_MIGRATED)) { migrate_op = rsc_op; } unpack_rsc_op(rsc, node, rsc_op, gIter->next, &on_fail, data_set); } /* create active recurring operations as optional */ calculate_active_ops(sorted_op_list, &start_index, &stop_index); process_recurring(node, rsc, start_index, stop_index, sorted_op_list, data_set); /* no need to free the contents */ g_list_free(sorted_op_list); process_rsc_state(rsc, node, on_fail, migrate_op, data_set); if (get_target_role(rsc, &req_role)) { if (rsc->next_role == RSC_ROLE_UNKNOWN || req_role < rsc->next_role) { crm_debug("%s: Overwriting calculated next role %s" " with requested next role %s", rsc->id, role2text(rsc->next_role), role2text(req_role)); rsc->next_role = req_role; } else if (req_role > rsc->next_role) { crm_info("%s: Not overwriting calculated next role %s" " with requested next role %s", rsc->id, role2text(rsc->next_role), role2text(req_role)); } } if (saved_role > rsc->role) { rsc->role = saved_role; } } gboolean unpack_lrm_resources(node_t * node, xmlNode * lrm_rsc_list, pe_working_set_t * data_set) { xmlNode *rsc_entry = NULL; CRM_CHECK(node != NULL, return FALSE); crm_trace("Unpacking resources on %s", node->details->uname); for (rsc_entry = __xml_first_child(lrm_rsc_list); rsc_entry != NULL; rsc_entry = __xml_next(rsc_entry)) { if (crm_str_eq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE, TRUE)) { unpack_lrm_rsc_state(node, rsc_entry, data_set); } } return TRUE; } static void set_active(resource_t * rsc) { resource_t *top = uber_parent(rsc); if (top && top->variant == pe_master) { rsc->role = RSC_ROLE_SLAVE; } else { rsc->role = RSC_ROLE_STARTED; } } static void set_node_score(gpointer key, gpointer value, gpointer user_data) { node_t *node = value; int *score = user_data; node->weight = *score; } #define STATUS_PATH_MAX 1024 static xmlNode * find_lrm_op(const char *resource, const char *op, const char *node, const char *source, pe_working_set_t * data_set) { int offset = 0; char xpath[STATUS_PATH_MAX]; offset += snprintf(xpath + offset, STATUS_PATH_MAX - offset, "//node_state[@uname='%s']", node); offset += snprintf(xpath + offset, STATUS_PATH_MAX - offset, "//" XML_LRM_TAG_RESOURCE "[@id='%s']", resource); /* Need to check against transition_magic too? */ if (source && safe_str_eq(op, CRMD_ACTION_MIGRATE)) { offset += snprintf(xpath + offset, STATUS_PATH_MAX - offset, "/" XML_LRM_TAG_RSC_OP "[@operation='%s' and @migrate_target='%s']", op, source); } else if (source && safe_str_eq(op, CRMD_ACTION_MIGRATED)) { offset += snprintf(xpath + offset, STATUS_PATH_MAX - offset, "/" XML_LRM_TAG_RSC_OP "[@operation='%s' and @migrate_source='%s']", op, source); } else { offset += snprintf(xpath + offset, STATUS_PATH_MAX - offset, "/" XML_LRM_TAG_RSC_OP "[@operation='%s']", op); } return get_xpath_object(xpath, data_set->input, LOG_DEBUG); } gboolean unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op, GListPtr next, enum action_fail_response * on_fail, pe_working_set_t * data_set) { int task_id = 0; const char *id = NULL; const char *key = NULL; const char *task = NULL; const char *task_key = NULL; const char *magic = NULL; const char *actual_rc = NULL; /* const char *target_rc = NULL; */ const char *task_status = NULL; const char *interval_s = NULL; const char *op_version = NULL; int interval = 0; int task_status_i = -2; int actual_rc_i = 0; int target_rc = -1; int last_failure = 0; action_t *action = NULL; node_t *effective_node = NULL; resource_t *failed = NULL; gboolean expired = FALSE; gboolean is_probe = FALSE; gboolean clear_past_failure = FALSE; CRM_CHECK(rsc != NULL, return FALSE); CRM_CHECK(node != NULL, return FALSE); CRM_CHECK(xml_op != NULL, return FALSE); id = ID(xml_op); task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); task_key = crm_element_value(xml_op, XML_LRM_ATTR_TASK_KEY); task_status = crm_element_value(xml_op, XML_LRM_ATTR_OPSTATUS); op_version = crm_element_value(xml_op, XML_ATTR_CRM_VERSION); magic = crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC); key = crm_element_value(xml_op, XML_ATTR_TRANSITION_KEY); crm_element_value_int(xml_op, XML_LRM_ATTR_CALLID, &task_id); CRM_CHECK(id != NULL, return FALSE); CRM_CHECK(task != NULL, return FALSE); CRM_CHECK(task_status != NULL, return FALSE); task_status_i = crm_parse_int(task_status, NULL); CRM_CHECK(task_status_i <= PCMK_LRM_OP_ERROR, return FALSE); CRM_CHECK(task_status_i >= PCMK_LRM_OP_PENDING, return FALSE); if (safe_str_eq(task, CRMD_ACTION_NOTIFY)) { /* safe to ignore these */ return TRUE; } if (rsc->failure_timeout > 0) { int last_run = 0; if (crm_element_value_int(xml_op, "last-rc-change", &last_run) == 0) { time_t now = get_timet_now(data_set); if (now > (last_run + rsc->failure_timeout)) { expired = TRUE; } } } crm_trace("Unpacking task %s/%s (call_id=%d, status=%s) on %s (role=%s)", id, task, task_id, task_status, node->details->uname, role2text(rsc->role)); interval_s = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL); interval = crm_parse_int(interval_s, "0"); if (interval == 0 && safe_str_eq(task, CRMD_ACTION_STATUS)) { is_probe = TRUE; } if (node->details->unclean) { crm_trace("Node %s (where %s is running) is unclean." " Further action depends on the value of the stop's on-fail attribue", node->details->uname, rsc->id); } actual_rc = crm_element_value(xml_op, XML_LRM_ATTR_RC); CRM_CHECK(actual_rc != NULL, return FALSE); actual_rc_i = crm_parse_int(actual_rc, NULL); if (key) { int dummy = 0; char *dummy_string = NULL; decode_transition_key(key, &dummy_string, &dummy, &dummy, &target_rc); free(dummy_string); } if (task_status_i == PCMK_LRM_OP_DONE && target_rc >= 0) { if (target_rc == actual_rc_i) { task_status_i = PCMK_LRM_OP_DONE; } else { task_status_i = PCMK_LRM_OP_ERROR; crm_debug("%s on %s returned %d (%s) instead of the expected value: %d (%s)", id, node->details->uname, actual_rc_i, lrmd_event_rc2str(actual_rc_i), target_rc, lrmd_event_rc2str(target_rc)); } } else if (task_status_i == PCMK_LRM_OP_ERROR) { /* let us decide that */ task_status_i = PCMK_LRM_OP_DONE; } if (task_status_i == PCMK_LRM_OP_NOTSUPPORTED) { actual_rc_i = PCMK_EXECRA_UNIMPLEMENT_FEATURE; } if (task_status_i != actual_rc_i && rsc->failure_timeout > 0 && get_failcount(node, rsc, &last_failure, data_set) == 0) { if (last_failure > 0) { action_t *clear_op = NULL; clear_op = custom_action(rsc, crm_concat(rsc->id, CRM_OP_CLEAR_FAILCOUNT, '_'), CRM_OP_CLEAR_FAILCOUNT, node, FALSE, TRUE, data_set); add_hash_param(clear_op->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE); crm_notice("Clearing expired failcount for %s on %s", rsc->id, node->details->uname); } } if (expired && actual_rc_i != PCMK_EXECRA_NOT_RUNNING && actual_rc_i != PCMK_EXECRA_RUNNING_MASTER && actual_rc_i != PCMK_EXECRA_OK) { crm_notice("Ignoring expired failure %s (rc=%d, magic=%s) on %s", id, actual_rc_i, magic, node->details->uname); goto done; } /* we could clean this up significantly except for old LRMs and CRMs that * didnt include target_rc and liked to remap status */ switch (actual_rc_i) { case PCMK_EXECRA_NOT_RUNNING: if (is_probe || target_rc == actual_rc_i) { task_status_i = PCMK_LRM_OP_DONE; rsc->role = RSC_ROLE_STOPPED; /* clear any previous failure actions */ *on_fail = action_fail_ignore; rsc->next_role = RSC_ROLE_UNKNOWN; } else if (safe_str_neq(task, CRMD_ACTION_STOP)) { task_status_i = PCMK_LRM_OP_ERROR; } break; case PCMK_EXECRA_RUNNING_MASTER: if (is_probe) { task_status_i = PCMK_LRM_OP_DONE; crm_notice("Operation %s found resource %s active in master mode on %s", task, rsc->id, node->details->uname); } else if (target_rc == actual_rc_i) { /* nothing to do */ } else if (target_rc >= 0) { task_status_i = PCMK_LRM_OP_ERROR; /* legacy code for pre-0.6.5 operations */ } else if (safe_str_neq(task, CRMD_ACTION_STATUS) || rsc->role != RSC_ROLE_MASTER) { task_status_i = PCMK_LRM_OP_ERROR; if (rsc->role != RSC_ROLE_MASTER) { crm_err("%s reported %s in master mode on %s", id, rsc->id, node->details->uname); } } rsc->role = RSC_ROLE_MASTER; break; case PCMK_EXECRA_FAILED_MASTER: rsc->role = RSC_ROLE_MASTER; task_status_i = PCMK_LRM_OP_ERROR; break; case PCMK_EXECRA_UNIMPLEMENT_FEATURE: if (interval > 0) { task_status_i = PCMK_LRM_OP_NOTSUPPORTED; break; } /* else: fall through */ case PCMK_EXECRA_INSUFFICIENT_PRIV: case PCMK_EXECRA_NOT_INSTALLED: case PCMK_EXECRA_INVALID_PARAM: effective_node = node; /* fall through */ case PCMK_EXECRA_NOT_CONFIGURED: failed = rsc; if (is_not_set(rsc->flags, pe_rsc_unique)) { failed = uber_parent(failed); } do_crm_log(actual_rc_i == PCMK_EXECRA_NOT_INSTALLED ? LOG_NOTICE : LOG_ERR, "Preventing %s from re-starting %s %s: operation %s failed '%s' (rc=%d)", failed->id, effective_node ? "on" : "anywhere in the cluster", effective_node ? effective_node->details->uname : "", task, lrmd_event_rc2str(actual_rc_i), actual_rc_i); resource_location(failed, effective_node, -INFINITY, "hard-error", data_set); if (is_probe) { /* treat these like stops */ task = CRMD_ACTION_STOP; task_status_i = PCMK_LRM_OP_DONE; crm_xml_add(xml_op, XML_ATTR_UNAME, node->details->uname); if (actual_rc_i != PCMK_EXECRA_NOT_INSTALLED || is_set(data_set->flags, pe_flag_symmetric_cluster)) { if ((node->details->shutdown == FALSE) || (node->details->online == TRUE)) { add_node_copy(data_set->failed, xml_op); } } } break; case PCMK_EXECRA_OK: if (is_probe && target_rc == 7) { task_status_i = PCMK_LRM_OP_DONE; crm_info("Operation %s found resource %s active on %s", task, rsc->id, node->details->uname); /* legacy code for pre-0.6.5 operations */ } else if (target_rc < 0 && interval > 0 && rsc->role == RSC_ROLE_MASTER) { /* catch status ops that return 0 instead of 8 while they * are supposed to be in master mode */ task_status_i = PCMK_LRM_OP_ERROR; } break; default: if (task_status_i == PCMK_LRM_OP_DONE) { crm_info("Remapping %s (rc=%d) on %s to an ERROR", id, actual_rc_i, node->details->uname); task_status_i = PCMK_LRM_OP_ERROR; } } if (task_status_i == PCMK_LRM_OP_ERROR || task_status_i == PCMK_LRM_OP_TIMEOUT || task_status_i == PCMK_LRM_OP_NOTSUPPORTED) { const char *action_key = task_key ? task_key : id; action = custom_action(rsc, strdup(action_key), task, NULL, TRUE, FALSE, data_set); if (expired) { crm_notice("Ignoring expired failure (calculated) %s (rc=%d, magic=%s) on %s", id, actual_rc_i, magic, node->details->uname); goto done; } else if (action->on_fail == action_fail_ignore) { crm_warn("Remapping %s (rc=%d) on %s to DONE: ignore", id, actual_rc_i, node->details->uname); task_status_i = PCMK_LRM_OP_DONE; set_bit(rsc->flags, pe_rsc_failure_ignored); crm_xml_add(xml_op, XML_ATTR_UNAME, node->details->uname); if ((node->details->shutdown == FALSE) || (node->details->online == TRUE)) { add_node_copy(data_set->failed, xml_op); } } } switch (task_status_i) { case PCMK_LRM_OP_PENDING: if (safe_str_eq(task, CRMD_ACTION_START)) { set_bit(rsc->flags, pe_rsc_start_pending); set_active(rsc); } else if (safe_str_eq(task, CRMD_ACTION_PROMOTE)) { rsc->role = RSC_ROLE_MASTER; } /* * Intentionally ignoring pending migrate ops here; * haven't decided if we need to do anything special * with them yet... */ break; case PCMK_LRM_OP_DONE: crm_trace("%s/%s completed on %s", rsc->id, task, node->details->uname); if (actual_rc_i == PCMK_EXECRA_NOT_RUNNING) { clear_past_failure = TRUE; } else if (safe_str_eq(task, CRMD_ACTION_START)) { rsc->role = RSC_ROLE_STARTED; clear_past_failure = TRUE; } else if (safe_str_eq(task, CRMD_ACTION_STOP)) { rsc->role = RSC_ROLE_STOPPED; clear_past_failure = TRUE; } else if (safe_str_eq(task, CRMD_ACTION_PROMOTE)) { rsc->role = RSC_ROLE_MASTER; clear_past_failure = TRUE; } else if (safe_str_eq(task, CRMD_ACTION_DEMOTE)) { /* Demote from Master does not clear an error */ rsc->role = RSC_ROLE_SLAVE; } else if (safe_str_eq(task, CRMD_ACTION_MIGRATED)) { rsc->role = RSC_ROLE_STARTED; clear_past_failure = TRUE; } else if (safe_str_eq(task, CRMD_ACTION_MIGRATE)) { /* * The normal sequence is (now): migrate_to(Src) -> migrate_from(Tgt) -> stop(Src) * * So if a migrate_to is followed by a stop, then we dont need to care what * happended on the target node * * Without the stop, we need to look for a successful migrate_from. * This would also imply we're no longer running on the source * * Without the stop, and without a migrate_from op we make sure the resource * gets stopped on both source and target (assuming the target is up) * */ int stop_id = 0; xmlNode *stop_op = find_lrm_op(rsc->id, CRMD_ACTION_STOP, node->details->id, NULL, data_set); if (stop_op) { crm_element_value_int(stop_op, XML_LRM_ATTR_CALLID, &stop_id); } if (stop_op == NULL || stop_id < task_id) { int from_rc = 0, from_status = 0; const char *migrate_source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE); const char *migrate_target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET); node_t *target = pe_find_node(data_set->nodes, migrate_target); node_t *source = pe_find_node(data_set->nodes, migrate_source); xmlNode *migrate_from = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, migrate_target, migrate_source, data_set); rsc->role = RSC_ROLE_STARTED; /* can be master? */ if (migrate_from) { crm_element_value_int(migrate_from, XML_LRM_ATTR_RC, &from_rc); crm_element_value_int(migrate_from, XML_LRM_ATTR_OPSTATUS, &from_status); crm_trace("%s op on %s exited with status=%d, rc=%d", ID(migrate_from), migrate_target, from_status, from_rc); } if (migrate_from && from_rc == PCMK_EXECRA_OK && from_status == PCMK_LRM_OP_DONE) { crm_trace("Detected dangling migration op: %s on %s", ID(xml_op), migrate_source); /* all good * just need to arrange for the stop action to get sent * but _without_ affecting the target somehow */ rsc->role = RSC_ROLE_STOPPED; rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, node); } else if (migrate_from) { /* Failed */ crm_trace("Marking active on %s %p %d", migrate_target, target, target->details->online); if (target && target->details->online) { native_add_running(rsc, target, data_set); } } else { /* Pending or complete but erased */ node_t *target = pe_find_node_id(data_set->nodes, migrate_target); crm_trace("Marking active on %s %p %d", migrate_target, target, target->details->online); if (target && target->details->online) { native_add_running(rsc, target, data_set); if (source && source->details->online) { /* If we make it here we have a partial migration. The migrate_to * has completed but the migrate_from on the target has not. Hold on * to the target and source on the resource. Later on if we detect that * the resource is still going to run on that target, we may continue * the migration */ rsc->partial_migration_target = target; rsc->partial_migration_source = source; } } else { /* Consider it failed here - forces a restart, prevents migration */ - set_bit_inplace(rsc->flags, pe_rsc_failed); + set_bit(rsc->flags, pe_rsc_failed); } } } } else if (rsc->role < RSC_ROLE_STARTED) { /* start, migrate_to and migrate_from will land here */ crm_trace("%s active on %s", rsc->id, node->details->uname); set_active(rsc); } /* clear any previous failure actions */ if (clear_past_failure) { switch (*on_fail) { case action_fail_block: case action_fail_stop: case action_fail_fence: case action_fail_migrate: case action_fail_standby: crm_trace("%s.%s is not cleared by a completed stop", rsc->id, fail2text(*on_fail)); break; case action_fail_ignore: case action_fail_recover: *on_fail = action_fail_ignore; rsc->next_role = RSC_ROLE_UNKNOWN; } } break; case PCMK_LRM_OP_ERROR: case PCMK_LRM_OP_TIMEOUT: case PCMK_LRM_OP_NOTSUPPORTED: crm_warn("Processing failed op %s on %s: %s (%d)", id, node->details->uname, lrmd_event_rc2str(actual_rc_i), actual_rc_i); crm_xml_add(xml_op, XML_ATTR_UNAME, node->details->uname); if ((node->details->shutdown == FALSE) || (node->details->online == TRUE)) { add_node_copy(data_set->failed, xml_op); } if (*on_fail < action->on_fail) { *on_fail = action->on_fail; } if (safe_str_eq(task, CRMD_ACTION_STOP)) { resource_location(rsc, node, -INFINITY, "__stop_fail__", data_set); } else if (safe_str_eq(task, CRMD_ACTION_MIGRATED)) { int stop_id = 0; int migrate_id = 0; const char *migrate_source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE); const char *migrate_target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET); xmlNode *stop_op = find_lrm_op(rsc->id, CRMD_ACTION_STOP, migrate_source, NULL, data_set); xmlNode *migrate_op = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATE, migrate_source, migrate_target, data_set); if (stop_op) { crm_element_value_int(stop_op, XML_LRM_ATTR_CALLID, &stop_id); } if (migrate_op) { crm_element_value_int(migrate_op, XML_LRM_ATTR_CALLID, &migrate_id); } /* Get our state right */ rsc->role = RSC_ROLE_STARTED; /* can be master? */ if (stop_op == NULL || stop_id < migrate_id) { node_t *source = pe_find_node(data_set->nodes, migrate_source); if (source && source->details->online) { native_add_running(rsc, source, data_set); } } } else if (safe_str_eq(task, CRMD_ACTION_MIGRATE)) { int stop_id = 0; int migrate_id = 0; const char *migrate_source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE); const char *migrate_target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET); xmlNode *stop_op = find_lrm_op(rsc->id, CRMD_ACTION_STOP, migrate_target, NULL, data_set); xmlNode *migrate_op = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, migrate_target, migrate_source, data_set); if (stop_op) { crm_element_value_int(stop_op, XML_LRM_ATTR_CALLID, &stop_id); } if (migrate_op) { crm_element_value_int(migrate_op, XML_LRM_ATTR_CALLID, &migrate_id); } /* Get our state right */ rsc->role = RSC_ROLE_STARTED; /* can be master? */ if (stop_op == NULL || stop_id < migrate_id) { node_t *target = pe_find_node(data_set->nodes, migrate_target); crm_trace("Stop: %p %d, Migrated: %p %d", stop_op, stop_id, migrate_op, migrate_id); if (target && target->details->online) { native_add_running(rsc, target, data_set); } } else if (migrate_op == NULL) { /* Make sure it gets cleaned up, the stop may pre-date the migrate_from */ rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, node); } } else if (safe_str_eq(task, CRMD_ACTION_PROMOTE)) { rsc->role = RSC_ROLE_MASTER; } else if (safe_str_eq(task, CRMD_ACTION_DEMOTE)) { /* * staying in role=master ends up putting the PE/TE into a loop * setting role=slave is not dangerous because no master will be * promoted until the failed resource has been fully stopped */ crm_warn("Forcing %s to stop after a failed demote action", rsc->id); rsc->next_role = RSC_ROLE_STOPPED; rsc->role = RSC_ROLE_SLAVE; } else if (compare_version("2.0", op_version) > 0 && safe_str_eq(task, CRMD_ACTION_START)) { crm_warn("Compatibility handling for failed op %s on %s", id, node->details->uname); resource_location(rsc, node, -INFINITY, "__legacy_start__", data_set); } if (rsc->role < RSC_ROLE_STARTED) { set_active(rsc); } crm_trace("Resource %s: role=%s, unclean=%s, on_fail=%s, fail_role=%s", rsc->id, role2text(rsc->role), node->details->unclean ? "true" : "false", fail2text(action->on_fail), role2text(action->fail_role)); if (action->fail_role != RSC_ROLE_STARTED && rsc->next_role < action->fail_role) { rsc->next_role = action->fail_role; } if (action->fail_role == RSC_ROLE_STOPPED) { int score = -INFINITY; crm_err("Making sure %s doesn't come up again", rsc->id); /* make sure it doesnt come up again */ g_hash_table_destroy(rsc->allowed_nodes); rsc->allowed_nodes = node_hash_from_list(data_set->nodes); g_hash_table_foreach(rsc->allowed_nodes, set_node_score, &score); } pe_free_action(action); action = NULL; break; case PCMK_LRM_OP_CANCELLED: /* do nothing?? */ pe_err("Dont know what to do for cancelled ops yet"); break; } done: crm_trace("Resource %s after %s: role=%s", rsc->id, task, role2text(rsc->role)); pe_free_action(action); return TRUE; } gboolean add_node_attrs(xmlNode * xml_obj, node_t * node, gboolean overwrite, pe_working_set_t * data_set) { g_hash_table_insert(node->details->attrs, strdup("#" XML_ATTR_UNAME), strdup(node->details->uname)); g_hash_table_insert(node->details->attrs, strdup("#" XML_ATTR_ID), strdup(node->details->id)); if (safe_str_eq(node->details->id, data_set->dc_uuid)) { data_set->dc_node = node; node->details->is_dc = TRUE; g_hash_table_insert(node->details->attrs, strdup("#" XML_ATTR_DC), strdup(XML_BOOLEAN_TRUE)); } else { g_hash_table_insert(node->details->attrs, strdup("#" XML_ATTR_DC), strdup(XML_BOOLEAN_FALSE)); } unpack_instance_attributes(data_set->input, xml_obj, XML_TAG_ATTR_SETS, NULL, node->details->attrs, NULL, overwrite, data_set->now); return TRUE; } static GListPtr extract_operations(const char *node, const char *rsc, xmlNode * rsc_entry, gboolean active_filter) { int counter = -1; int stop_index = -1; int start_index = -1; xmlNode *rsc_op = NULL; GListPtr gIter = NULL; GListPtr op_list = NULL; GListPtr sorted_op_list = NULL; /* extract operations */ op_list = NULL; sorted_op_list = NULL; for (rsc_op = __xml_first_child(rsc_entry); rsc_op != NULL; rsc_op = __xml_next(rsc_op)) { if (crm_str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, TRUE)) { crm_xml_add(rsc_op, "resource", rsc); crm_xml_add(rsc_op, XML_ATTR_UNAME, node); op_list = g_list_prepend(op_list, rsc_op); } } if (op_list == NULL) { /* if there are no operations, there is nothing to do */ return NULL; } sorted_op_list = g_list_sort(op_list, sort_op_by_callid); /* create active recurring operations as optional */ if (active_filter == FALSE) { return sorted_op_list; } op_list = NULL; calculate_active_ops(sorted_op_list, &start_index, &stop_index); for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) { xmlNode *rsc_op = (xmlNode *) gIter->data; counter++; if (start_index < stop_index) { crm_trace("Skipping %s: not active", ID(rsc_entry)); break; } else if (counter < start_index) { crm_trace("Skipping %s: old", ID(rsc_op)); continue; } op_list = g_list_append(op_list, rsc_op); } g_list_free(sorted_op_list); return op_list; } GListPtr find_operations(const char *rsc, const char *node, gboolean active_filter, pe_working_set_t * data_set) { GListPtr output = NULL; GListPtr intermediate = NULL; xmlNode *tmp = NULL; xmlNode *status = find_xml_node(data_set->input, XML_CIB_TAG_STATUS, TRUE); const char *uname = NULL; node_t *this_node = NULL; xmlNode *node_state = NULL; for (node_state = __xml_first_child(status); node_state != NULL; node_state = __xml_next(node_state)) { if (crm_str_eq((const char *)node_state->name, XML_CIB_TAG_STATE, TRUE)) { uname = crm_element_value(node_state, XML_ATTR_UNAME); if (node != NULL && safe_str_neq(uname, node)) { continue; } this_node = pe_find_node(data_set->nodes, uname); CRM_CHECK(this_node != NULL, continue); determine_online_status(node_state, this_node, data_set); if (this_node->details->online || is_set(data_set->flags, pe_flag_stonith_enabled)) { /* offline nodes run no resources... * unless stonith is enabled in which case we need to * make sure rsc start events happen after the stonith */ xmlNode *lrm_rsc = NULL; tmp = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE); tmp = find_xml_node(tmp, XML_LRM_TAG_RESOURCES, FALSE); for (lrm_rsc = __xml_first_child(tmp); lrm_rsc != NULL; lrm_rsc = __xml_next(lrm_rsc)) { if (crm_str_eq((const char *)lrm_rsc->name, XML_LRM_TAG_RESOURCE, TRUE)) { const char *rsc_id = crm_element_value(lrm_rsc, XML_ATTR_ID); if (rsc != NULL && safe_str_neq(rsc_id, rsc)) { continue; } intermediate = extract_operations(uname, rsc_id, lrm_rsc, active_filter); output = g_list_concat(output, intermediate); } } } } } return output; } diff --git a/lib/pengine/utils.c b/lib/pengine/utils.c index ed7b240823..83a62b0256 100644 --- a/lib/pengine/utils.c +++ b/lib/pengine/utils.c @@ -1,1512 +1,1512 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include pe_working_set_t *pe_dataset = NULL; extern xmlNode *get_object_root(const char *object_type, xmlNode * the_root); void print_str_str(gpointer key, gpointer value, gpointer user_data); gboolean ghash_free_str_str(gpointer key, gpointer value, gpointer user_data); void unpack_operation(action_t * action, xmlNode * xml_obj, pe_working_set_t * data_set); static xmlNode *find_rsc_op_entry_helper(resource_t * rsc, const char *key, gboolean include_disabled); node_t * node_copy(node_t * this_node) { node_t *new_node = NULL; CRM_CHECK(this_node != NULL, return NULL); new_node = calloc(1, sizeof(node_t)); CRM_ASSERT(new_node != NULL); crm_trace("Copying %p (%s) to %p", this_node, this_node->details->uname, new_node); new_node->weight = this_node->weight; new_node->fixed = this_node->fixed; new_node->details = this_node->details; return new_node; } /* any node in list1 or list2 and not in the other gets a score of -INFINITY */ void node_list_exclude(GHashTable * hash, GListPtr list, gboolean merge_scores) { GHashTable *result = hash; node_t *other_node = NULL; GListPtr gIter = list; GHashTableIter iter; node_t *node = NULL; g_hash_table_iter_init(&iter, hash); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { other_node = pe_find_node_id(list, node->details->id); if (other_node == NULL) { node->weight = -INFINITY; } else if (merge_scores) { node->weight = merge_weights(node->weight, other_node->weight); } } for (; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; other_node = pe_hash_table_lookup(result, node->details->id); if (other_node == NULL) { node_t *new_node = node_copy(node); new_node->weight = -INFINITY; g_hash_table_insert(result, (gpointer) new_node->details->id, new_node); } } } GHashTable * node_hash_from_list(GListPtr list) { GListPtr gIter = list; GHashTable *result = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, g_hash_destroy_str); for (; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; node_t *n = node_copy(node); g_hash_table_insert(result, (gpointer) n->details->id, n); } return result; } GListPtr node_list_dup(GListPtr list1, gboolean reset, gboolean filter) { GListPtr result = NULL; GListPtr gIter = list1; for (; gIter != NULL; gIter = gIter->next) { node_t *new_node = NULL; node_t *this_node = (node_t *) gIter->data; if (filter && this_node->weight < 0) { continue; } new_node = node_copy(this_node); if (reset) { new_node->weight = 0; } if (new_node != NULL) { result = g_list_prepend(result, new_node); } } return result; } static gint sort_node_uname(gconstpointer a, gconstpointer b) { const node_t *node_a = a; const node_t *node_b = b; return strcmp(node_a->details->uname, node_b->details->uname); } void dump_node_scores_worker(int level, const char *file, const char *function, int line, resource_t * rsc, const char *comment, GHashTable * nodes) { GHashTable *hash = nodes; GHashTableIter iter; node_t *node = NULL; if (rsc) { hash = rsc->allowed_nodes; } if (rsc && is_set(rsc->flags, pe_rsc_orphan)) { /* Don't show the allocation scores for orphans */ return; } if (level == 0) { /* For now we want this in sorted order to keep the regression tests happy */ GListPtr gIter = NULL; GListPtr list = g_hash_table_get_values(hash); list = g_list_sort(list, sort_node_uname); gIter = list; for (; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; char *score = score2char(node->weight); if (rsc) { printf("%s: %s allocation score on %s: %s\n", comment, rsc->id, node->details->uname, score); } else { printf("%s: %s = %s\n", comment, node->details->uname, score); } free(score); } g_list_free(list); } else if (hash) { g_hash_table_iter_init(&iter, hash); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { char *score = score2char(node->weight); if (rsc) { do_crm_log_alias(LOG_TRACE, file, function, line, "%s: %s allocation score on %s: %s", comment, rsc->id, node->details->uname, score); } else { do_crm_log_alias(LOG_TRACE, file, function, line + 1, "%s: %s = %s", comment, node->details->uname, score); } free(score); } } if (rsc && rsc->children) { GListPtr gIter = NULL; gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { resource_t *child = (resource_t *) gIter->data; dump_node_scores_worker(level, file, function, line, child, comment, nodes); } } } static void append_dump_text(gpointer key, gpointer value, gpointer user_data) { char **dump_text = user_data; int len = 0; char *new_text = NULL; len = strlen(*dump_text) + strlen(" ") + strlen(key) + strlen("=") + strlen(value) + 1; new_text = calloc(1, len); sprintf(new_text, "%s %s=%s", *dump_text, (char *)key, (char *)value); free(*dump_text); *dump_text = new_text; } void dump_node_capacity(int level, const char *comment, node_t * node) { int len = 0; char *dump_text = NULL; len = strlen(comment) + strlen(": ") + strlen(node->details->uname) + strlen(" capacity:") + 1; dump_text = calloc(1, len); sprintf(dump_text, "%s: %s capacity:", comment, node->details->uname); g_hash_table_foreach(node->details->utilization, append_dump_text, &dump_text); if (level == 0) { fprintf(stdout, "%s\n", dump_text); } else { crm_trace("%s", dump_text); } free(dump_text); } void dump_rsc_utilization(int level, const char *comment, resource_t * rsc, node_t * node) { int len = 0; char *dump_text = NULL; len = strlen(comment) + strlen(": ") + strlen(rsc->id) + strlen(" utilization on ") + strlen(node->details->uname) + strlen(":") + 1; dump_text = calloc(1, len); sprintf(dump_text, "%s: %s utilization on %s:", comment, rsc->id, node->details->uname); g_hash_table_foreach(rsc->utilization, append_dump_text, &dump_text); if (level == 0) { fprintf(stdout, "%s\n", dump_text); } else { crm_trace("%s", dump_text); } free(dump_text); } gint sort_rsc_index(gconstpointer a, gconstpointer b) { const resource_t *resource1 = (const resource_t *)a; const resource_t *resource2 = (const resource_t *)b; if (a == NULL && b == NULL) { return 0; } if (a == NULL) { return 1; } if (b == NULL) { return -1; } if (resource1->sort_index > resource2->sort_index) { return -1; } if (resource1->sort_index < resource2->sort_index) { return 1; } return 0; } gint sort_rsc_priority(gconstpointer a, gconstpointer b) { const resource_t *resource1 = (const resource_t *)a; const resource_t *resource2 = (const resource_t *)b; if (a == NULL && b == NULL) { return 0; } if (a == NULL) { return 1; } if (b == NULL) { return -1; } if (resource1->priority > resource2->priority) { return -1; } if (resource1->priority < resource2->priority) { return 1; } return 0; } action_t * custom_action(resource_t * rsc, char *key, const char *task, node_t * on_node, gboolean optional, gboolean save_action, pe_working_set_t * data_set) { action_t *action = NULL; GListPtr possible_matches = NULL; CRM_CHECK(key != NULL, return NULL); CRM_CHECK(task != NULL, return NULL); if (save_action && rsc != NULL) { possible_matches = find_actions(rsc->actions, key, on_node); } if (possible_matches != NULL) { if (g_list_length(possible_matches) > 1) { pe_warn("Action %s for %s on %s exists %d times", task, rsc ? rsc->id : "", on_node ? on_node->details->uname : "", g_list_length(possible_matches)); } action = g_list_nth_data(possible_matches, 0); crm_trace("Found existing action (%d) %s for %s on %s", action->id, task, rsc ? rsc->id : "", on_node ? on_node->details->uname : ""); g_list_free(possible_matches); } if (action == NULL) { if (save_action) { crm_trace("Creating%s action %d: %s for %s on %s", optional ? "" : " manditory", data_set->action_id, key, rsc ? rsc->id : "", on_node ? on_node->details->uname : ""); } action = calloc(1, sizeof(action_t)); if (save_action) { action->id = data_set->action_id++; } else { action->id = 0; } action->rsc = rsc; CRM_ASSERT(task != NULL); action->task = strdup(task); if (on_node) { action->node = node_copy(on_node); } action->uuid = strdup(key); pe_set_action_bit(action, pe_action_failure_is_fatal); pe_set_action_bit(action, pe_action_runnable); if (optional) { pe_set_action_bit(action, pe_action_optional); } else { pe_clear_action_bit(action, pe_action_optional); } /* Implied by calloc()... action->actions_before = NULL; action->actions_after = NULL; action->pseudo = FALSE; action->dumped = FALSE; action->processed = FALSE; action->seen_count = 0; */ action->extra = g_hash_table_new_full(crm_str_hash, g_str_equal, free, free); action->meta = g_hash_table_new_full(crm_str_hash, g_str_equal, free, free); if (save_action) { data_set->actions = g_list_prepend(data_set->actions, action); } if (rsc != NULL) { action->op_entry = find_rsc_op_entry_helper(rsc, key, TRUE); unpack_operation(action, action->op_entry, data_set); if (save_action) { rsc->actions = g_list_prepend(rsc->actions, action); } } if (save_action) { crm_trace("Action %d created", action->id); } } if (optional == FALSE) { crm_trace("Action %d (%s) marked manditory", action->id, action->uuid); pe_clear_action_bit(action, pe_action_optional); } if (rsc != NULL) { enum action_tasks a_task = text2task(action->task); int warn_level = LOG_DEBUG_3; if (save_action) { warn_level = LOG_WARNING; } if (is_set(action->flags, pe_action_have_node_attrs) == FALSE && action->node != NULL && action->op_entry != NULL) { pe_set_action_bit(action, pe_action_have_node_attrs); unpack_instance_attributes(data_set->input, action->op_entry, XML_TAG_ATTR_SETS, action->node->details->attrs, action->extra, NULL, FALSE, data_set->now); } if (is_set(action->flags, pe_action_pseudo)) { /* leave untouched */ } else if (action->node == NULL) { pe_clear_action_bit(action, pe_action_runnable); } else if (is_not_set(rsc->flags, pe_rsc_managed) && g_hash_table_lookup(action->meta, XML_LRM_ATTR_INTERVAL) == NULL) { crm_debug("Action %s (unmanaged)", action->uuid); pe_set_action_bit(action, pe_action_optional); /* action->runnable = FALSE; */ } else if (action->node->details->online == FALSE) { pe_clear_action_bit(action, pe_action_runnable); do_crm_log(warn_level, "Action %s on %s is unrunnable (offline)", action->uuid, action->node->details->uname); if (is_set(action->rsc->flags, pe_rsc_managed) && save_action && a_task == stop_rsc) { do_crm_log(warn_level, "Marking node %s unclean", action->node->details->uname); action->node->details->unclean = TRUE; } } else if (action->node->details->pending) { pe_clear_action_bit(action, pe_action_runnable); do_crm_log(warn_level, "Action %s on %s is unrunnable (pending)", action->uuid, action->node->details->uname); } else if (action->needs == rsc_req_nothing) { crm_trace("Action %s doesnt require anything", action->uuid); pe_set_action_bit(action, pe_action_runnable); #if 0 /* * No point checking this * - if we dont have quorum we cant stonith anyway */ } else if (action->needs == rsc_req_stonith) { crm_trace("Action %s requires only stonith", action->uuid); action->runnable = TRUE; #endif } else if (is_set(data_set->flags, pe_flag_have_quorum) == FALSE && data_set->no_quorum_policy == no_quorum_stop) { pe_clear_action_bit(action, pe_action_runnable); crm_debug("%s\t%s (cancelled : quorum)", action->node->details->uname, action->uuid); } else if (is_set(data_set->flags, pe_flag_have_quorum) == FALSE && data_set->no_quorum_policy == no_quorum_freeze) { crm_trace("Check resource is already active"); if (rsc->fns->active(rsc, TRUE) == FALSE) { pe_clear_action_bit(action, pe_action_runnable); crm_debug("%s\t%s (cancelled : quorum freeze)", action->node->details->uname, action->uuid); } } else { crm_trace("Action %s is runnable", action->uuid); pe_set_action_bit(action, pe_action_runnable); } if (save_action) { switch (a_task) { case stop_rsc: set_bit(rsc->flags, pe_rsc_stopping); break; case start_rsc: clear_bit(rsc->flags, pe_rsc_starting); if (is_set(action->flags, pe_action_runnable)) { set_bit(rsc->flags, pe_rsc_starting); } break; default: break; } } } free(key); return action; } void unpack_operation(action_t * action, xmlNode * xml_obj, pe_working_set_t * data_set) { int value_i = 0; unsigned long long interval = 0; unsigned long long start_delay = 0; char *value_ms = NULL; const char *class = NULL; const char *value = NULL; const char *field = NULL; CRM_CHECK(action->rsc != NULL, return); unpack_instance_attributes(data_set->input, data_set->op_defaults, XML_TAG_META_SETS, NULL, action->meta, NULL, FALSE, data_set->now); if (xml_obj) { xmlAttrPtr xIter = NULL; for (xIter = xml_obj->properties; xIter; xIter = xIter->next) { const char *prop_name = (const char *)xIter->name; const char *prop_value = crm_element_value(xml_obj, prop_name); g_hash_table_replace(action->meta, strdup(prop_name), strdup(prop_value)); } } unpack_instance_attributes(data_set->input, xml_obj, XML_TAG_META_SETS, NULL, action->meta, NULL, FALSE, data_set->now); unpack_instance_attributes(data_set->input, xml_obj, XML_TAG_ATTR_SETS, NULL, action->meta, NULL, FALSE, data_set->now); g_hash_table_remove(action->meta, "id"); class = g_hash_table_lookup(action->rsc->meta, "class"); value = g_hash_table_lookup(action->meta, "requires"); if (safe_str_eq(class, "stonith")) { action->needs = rsc_req_nothing; value = "nothing (fencing op)"; } else if (safe_str_eq(value, "nothing")) { action->needs = rsc_req_nothing; } else if (safe_str_eq(value, "quorum")) { action->needs = rsc_req_quorum; } else if (is_set(data_set->flags, pe_flag_stonith_enabled) && safe_str_eq(value, "fencing")) { action->needs = rsc_req_stonith; } else { if (value) { crm_config_err("Invalid value for %s->requires: %s%s", action->rsc->id, value, is_set(data_set->flags, pe_flag_stonith_enabled) ? "" : " (stonith-enabled=false)"); } if (safe_str_eq(action->task, CRMD_ACTION_STATUS) || safe_str_eq(action->task, CRMD_ACTION_NOTIFY)) { action->needs = rsc_req_nothing; value = "nothing (default)"; } else if (data_set->no_quorum_policy == no_quorum_stop && safe_str_neq(action->task, CRMD_ACTION_START)) { action->needs = rsc_req_nothing; value = "nothing (default)"; } else if (is_set(data_set->flags, pe_flag_stonith_enabled)) { action->needs = rsc_req_stonith; value = "fencing (default)"; } else { action->needs = rsc_req_quorum; value = "quorum (default)"; } } crm_trace("\tAction %s requires: %s", action->task, value); value = g_hash_table_lookup(action->meta, XML_OP_ATTR_ON_FAIL); if (safe_str_eq(action->task, CRMD_ACTION_STOP) && safe_str_eq(value, "standby")) { crm_config_err("on-fail=standby is not allowed for stop actions: %s", action->rsc->id); value = NULL; } if (value == NULL) { } else if (safe_str_eq(value, "block")) { action->on_fail = action_fail_block; } else if (safe_str_eq(value, "fence")) { action->on_fail = action_fail_fence; value = "node fencing"; if (is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE) { crm_config_err("Specifying on_fail=fence and" " stonith-enabled=false makes no sense"); action->on_fail = action_fail_stop; action->fail_role = RSC_ROLE_STOPPED; value = "stop resource"; } } else if (safe_str_eq(value, "standby")) { action->on_fail = action_fail_standby; value = "node standby"; } else if (safe_str_eq(value, "ignore") || safe_str_eq(value, "nothing")) { action->on_fail = action_fail_ignore; value = "ignore"; } else if (safe_str_eq(value, "migrate")) { action->on_fail = action_fail_migrate; value = "force migration"; } else if (safe_str_eq(value, "stop")) { action->on_fail = action_fail_stop; action->fail_role = RSC_ROLE_STOPPED; value = "stop resource"; } else if (safe_str_eq(value, "restart")) { action->on_fail = action_fail_recover; value = "restart (and possibly migrate)"; } else { pe_err("Resource %s: Unknown failure type (%s)", action->rsc->id, value); value = NULL; } /* defaults */ if (value == NULL && safe_str_eq(action->task, CRMD_ACTION_STOP)) { if (is_set(data_set->flags, pe_flag_stonith_enabled)) { action->on_fail = action_fail_fence; value = "resource fence (default)"; } else { action->on_fail = action_fail_block; value = "resource block (default)"; } } else if (value == NULL) { action->on_fail = action_fail_recover; value = "restart (and possibly migrate) (default)"; } crm_trace("\t%s failure handling: %s", action->task, value); value = NULL; if (xml_obj != NULL) { value = g_hash_table_lookup(action->meta, "role_after_failure"); } if (value != NULL && action->fail_role == RSC_ROLE_UNKNOWN) { action->fail_role = text2role(value); } /* defaults */ if (action->fail_role == RSC_ROLE_UNKNOWN) { if (safe_str_eq(action->task, CRMD_ACTION_PROMOTE)) { action->fail_role = RSC_ROLE_SLAVE; } else { action->fail_role = RSC_ROLE_STARTED; } } crm_trace("\t%s failure results in: %s", action->task, role2text(action->fail_role)); field = XML_LRM_ATTR_INTERVAL; value = g_hash_table_lookup(action->meta, field); if (value != NULL) { interval = crm_get_interval(value); if (interval > 0) { value_ms = crm_itoa(interval); g_hash_table_replace(action->meta, strdup(field), value_ms); } else { g_hash_table_remove(action->meta, field); } } field = XML_OP_ATTR_START_DELAY; value = g_hash_table_lookup(action->meta, field); if (value != NULL) { value_i = crm_get_msec(value); if (value_i < 0) { value_i = 0; } start_delay = value_i; value_ms = crm_itoa(value_i); g_hash_table_replace(action->meta, strdup(field), value_ms); } else if (interval > 0 && g_hash_table_lookup(action->meta, XML_OP_ATTR_ORIGIN)) { char *date_str = NULL; char *date_str_mutable = NULL; ha_time_t *origin = NULL; value = g_hash_table_lookup(action->meta, XML_OP_ATTR_ORIGIN); date_str = strdup(value); date_str_mutable = date_str; origin = parse_date(&date_str_mutable); free(date_str); if (origin == NULL) { crm_config_err("Operation %s contained an invalid " XML_OP_ATTR_ORIGIN ": %s", ID(xml_obj), value); } else { ha_time_t *delay = NULL; int rc = compare_date(origin, data_set->now); unsigned long long delay_s = 0; while (rc < 0) { add_seconds(origin, interval / 1000); rc = compare_date(origin, data_set->now); } delay = subtract_time(origin, data_set->now); delay_s = date_in_seconds(delay); /* log_date(LOG_DEBUG_5, "delay", delay, ha_log_date|ha_log_time|ha_log_local); */ crm_info("Calculated a start delay of %llus for %s", delay_s, ID(xml_obj)); g_hash_table_replace(action->meta, strdup(XML_OP_ATTR_START_DELAY), crm_itoa(delay_s * 1000)); start_delay = delay_s * 1000; free_ha_date(origin); free_ha_date(delay); } } field = XML_ATTR_TIMEOUT; value = g_hash_table_lookup(action->meta, field); if (value == NULL) { value = pe_pref(data_set->config_hash, "default-action-timeout"); } value_i = crm_get_msec(value); if (value_i < 0) { value_i = 0; } value_i += start_delay; value_ms = crm_itoa(value_i); g_hash_table_replace(action->meta, strdup(field), value_ms); } static xmlNode * find_rsc_op_entry_helper(resource_t * rsc, const char *key, gboolean include_disabled) { int number = 0; gboolean do_retry = TRUE; char *local_key = NULL; const char *name = NULL; const char *value = NULL; const char *interval = NULL; char *match_key = NULL; xmlNode *op = NULL; xmlNode *operation = NULL; retry: for (operation = __xml_first_child(rsc->ops_xml); operation != NULL; operation = __xml_next(operation)) { if (crm_str_eq((const char *)operation->name, "op", TRUE)) { name = crm_element_value(operation, "name"); interval = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); value = crm_element_value(operation, "enabled"); if (!include_disabled && value && crm_is_true(value) == FALSE) { continue; } number = crm_get_interval(interval); if (number < 0) { continue; } match_key = generate_op_key(rsc->id, name, number); if (safe_str_eq(key, match_key)) { op = operation; } free(match_key); if (op != NULL) { free(local_key); return op; } } } free(local_key); if (do_retry == FALSE) { return NULL; } do_retry = FALSE; if (strstr(key, CRMD_ACTION_MIGRATE) || strstr(key, CRMD_ACTION_MIGRATED)) { local_key = generate_op_key(rsc->id, "migrate", 0); key = local_key; goto retry; } else if (strstr(key, "_notify_")) { local_key = generate_op_key(rsc->id, "notify", 0); key = local_key; goto retry; } return NULL; } xmlNode * find_rsc_op_entry(resource_t * rsc, const char *key) { return find_rsc_op_entry_helper( rsc, key, FALSE); } void print_node(const char *pre_text, node_t * node, gboolean details) { if (node == NULL) { crm_trace("%s%s: ", pre_text == NULL ? "" : pre_text, pre_text == NULL ? "" : ": "); return; } crm_trace("%s%s%sNode %s: (weight=%d, fixed=%s)", pre_text == NULL ? "" : pre_text, pre_text == NULL ? "" : ": ", node->details == NULL ? "error " : node->details->online ? "" : "Unavailable/Unclean ", node->details->uname, node->weight, node->fixed ? "True" : "False"); if (details && node != NULL && node->details != NULL) { char *pe_mutable = strdup("\t\t"); GListPtr gIter = node->details->running_rsc; crm_trace("\t\t===Node Attributes"); g_hash_table_foreach(node->details->attrs, print_str_str, pe_mutable); free(pe_mutable); crm_trace("\t\t=== Resources"); for (; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; print_resource(LOG_DEBUG_4, "\t\t", rsc, FALSE); } } } /* * Used by the HashTable for-loop */ void print_str_str(gpointer key, gpointer value, gpointer user_data) { crm_trace("%s%s %s ==> %s", user_data == NULL ? "" : (char *)user_data, user_data == NULL ? "" : ": ", (char *)key, (char *)value); } void print_resource(int log_level, const char *pre_text, resource_t * rsc, gboolean details) { long options = pe_print_log; if (rsc == NULL) { do_crm_log(log_level - 1, "%s%s: ", pre_text == NULL ? "" : pre_text, pre_text == NULL ? "" : ": "); return; } if (details) { options |= pe_print_details; } rsc->fns->print(rsc, pre_text, options, &log_level); } void pe_free_action(action_t * action) { if (action == NULL) { return; } g_list_free_full(action->actions_before, free); /* action_warpper_t* */ g_list_free_full(action->actions_after, free); /* action_warpper_t* */ if (action->extra) { g_hash_table_destroy(action->extra); } if (action->meta) { g_hash_table_destroy(action->meta); } free(action->task); free(action->uuid); free(action->node); free(action); } GListPtr find_recurring_actions(GListPtr input, node_t * not_on_node) { const char *value = NULL; GListPtr result = NULL; GListPtr gIter = input; CRM_CHECK(input != NULL, return NULL); for (; gIter != NULL; gIter = gIter->next) { action_t *action = (action_t *) gIter->data; value = g_hash_table_lookup(action->meta, XML_LRM_ATTR_INTERVAL); if (value == NULL) { /* skip */ } else if (safe_str_eq(value, "0")) { /* skip */ } else if (safe_str_eq(CRMD_ACTION_CANCEL, action->task)) { /* skip */ } else if (not_on_node == NULL) { crm_trace("(null) Found: %s", action->uuid); result = g_list_prepend(result, action); } else if (action->node == NULL) { /* skip */ } else if (action->node->details != not_on_node->details) { crm_trace("Found: %s", action->uuid); result = g_list_prepend(result, action); } } return result; } enum action_tasks get_complex_task(resource_t * rsc, const char *name, gboolean allow_non_atomic) { enum action_tasks task = text2task(name); if (rsc == NULL) { return task; } else if (allow_non_atomic == FALSE || rsc->variant == pe_native) { switch (task) { case stopped_rsc: case started_rsc: case action_demoted: case action_promoted: crm_trace("Folding %s back into its atomic counterpart for %s", name, rsc->id); return task - 1; break; default: break; } } return task; } action_t * find_first_action(GListPtr input, const char *uuid, const char *task, node_t * on_node) { GListPtr gIter = NULL; CRM_CHECK(uuid || task, return NULL); for (gIter = input; gIter != NULL; gIter = gIter->next) { action_t *action = (action_t *) gIter->data; if (uuid != NULL && safe_str_neq(uuid, action->uuid)) { continue; } else if (task != NULL && safe_str_neq(task, action->task)) { continue; } else if (on_node == NULL) { return action; } else if (action->node == NULL) { continue; } else if (on_node->details == action->node->details) { return action; } } return NULL; } GListPtr find_actions(GListPtr input, const char *key, node_t * on_node) { GListPtr gIter = input; GListPtr result = NULL; CRM_CHECK(key != NULL, return NULL); for (; gIter != NULL; gIter = gIter->next) { action_t *action = (action_t *) gIter->data; crm_trace("Matching %s against %s", key, action->uuid); if (safe_str_neq(key, action->uuid)) { continue; } else if (on_node == NULL) { result = g_list_prepend(result, action); } else if (action->node == NULL) { /* skip */ crm_trace("While looking for %s action on %s, " "found an unallocated one. Assigning" " it to the requested node...", key, on_node->details->uname); action->node = node_copy(on_node); result = g_list_prepend(result, action); } else if (on_node->details == action->node->details) { result = g_list_prepend(result, action); } } return result; } GListPtr find_actions_exact(GListPtr input, const char *key, node_t * on_node) { GListPtr gIter = input; GListPtr result = NULL; CRM_CHECK(key != NULL, return NULL); for (; gIter != NULL; gIter = gIter->next) { action_t *action = (action_t *) gIter->data; crm_trace("Matching %s against %s", key, action->uuid); if (safe_str_neq(key, action->uuid)) { crm_trace("Key mismatch: %s vs. %s", key, action->uuid); continue; } else if (on_node == NULL || action->node == NULL) { crm_trace("on_node=%p, action->node=%p", on_node, action->node); continue; } else if (safe_str_eq(on_node->details->id, action->node->details->id)) { result = g_list_prepend(result, action); } crm_trace("Node mismatch: %s vs. %s", on_node->details->id, action->node->details->id); } return result; } static void resource_node_score(resource_t * rsc, node_t * node, int score, const char *tag) { node_t *match = NULL; if (rsc->children) { GListPtr gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; resource_node_score(child_rsc, node, score, tag); } } crm_trace("Setting %s for %s on %s: %d", tag, rsc->id, node->details->uname, score); match = pe_hash_table_lookup(rsc->allowed_nodes, node->details->id); if (match == NULL) { match = node_copy(node); match->weight = merge_weights(score, node->weight); g_hash_table_insert(rsc->allowed_nodes, (gpointer) match->details->id, match); } match->weight = merge_weights(match->weight, score); } void resource_location(resource_t * rsc, node_t * node, int score, const char *tag, pe_working_set_t * data_set) { if (node != NULL) { resource_node_score(rsc, node, score, tag); } else if (data_set != NULL) { GListPtr gIter = data_set->nodes; for (; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; resource_node_score(rsc, node, score, tag); } } else { GHashTableIter iter; node_t *node = NULL; g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { resource_node_score(rsc, node, score, tag); } } if (node == NULL && score == -INFINITY) { if (rsc->allocated_to) { crm_info("Deallocating %s from %s", rsc->id, rsc->allocated_to->details->uname); free(rsc->allocated_to); rsc->allocated_to = NULL; } } } #define sort_return(an_int, why) do { \ free(a_uuid); \ free(b_uuid); \ crm_trace("%s (%d) %c %s (%d) : %s", \ a_xml_id, a_call_id, an_int>0?'>':an_int<0?'<':'=', \ b_xml_id, b_call_id, why); \ return an_int; \ } while(0) gint sort_op_by_callid(gconstpointer a, gconstpointer b) { int a_call_id = -1; int b_call_id = -1; char *a_uuid = NULL; char *b_uuid = NULL; const xmlNode *xml_a = a; const xmlNode *xml_b = b; const char *a_xml_id = crm_element_value_const(xml_a, XML_ATTR_ID); const char *b_xml_id = crm_element_value_const(xml_b, XML_ATTR_ID); if (safe_str_eq(a_xml_id, b_xml_id)) { /* We have duplicate lrm_rsc_op entries in the status * section which is unliklely to be a good thing * - we can handle it easily enough, but we need to get * to the bottom of why its happening. */ pe_err("Duplicate lrm_rsc_op entries named %s", a_xml_id); sort_return(0, "duplicate"); } crm_element_value_const_int(xml_a, XML_LRM_ATTR_CALLID, &a_call_id); crm_element_value_const_int(xml_b, XML_LRM_ATTR_CALLID, &b_call_id); if (a_call_id == -1 && b_call_id == -1) { /* both are pending ops so it doesnt matter since * stops are never pending */ sort_return(0, "pending"); } else if (a_call_id >= 0 && a_call_id < b_call_id) { sort_return(-1, "call id"); } else if (b_call_id >= 0 && a_call_id > b_call_id) { sort_return(1, "call id"); } else if (b_call_id >= 0 && a_call_id == b_call_id) { /* * The op and last_failed_op are the same * Order on last-rc-change */ int last_a = -1; int last_b = -1; crm_element_value_const_int(xml_a, "last-rc-change", &last_a); crm_element_value_const_int(xml_b, "last-rc-change", &last_b); if (last_a >= 0 && last_a < last_b) { sort_return(-1, "rc-change"); } else if (last_b >= 0 && last_a > last_b) { sort_return(1, "rc-change"); } sort_return(0, "rc-change"); } else { /* One of the inputs is a pending operation * Attempt to use XML_ATTR_TRANSITION_MAGIC to determine its age relative to the other */ int a_id = -1; int b_id = -1; int dummy = -1; const char *a_magic = crm_element_value_const(xml_a, XML_ATTR_TRANSITION_MAGIC); const char *b_magic = crm_element_value_const(xml_b, XML_ATTR_TRANSITION_MAGIC); CRM_CHECK(a_magic != NULL && b_magic != NULL, sort_return(0, "No magic")); CRM_CHECK(decode_transition_magic(a_magic, &a_uuid, &a_id, &dummy, &dummy, &dummy, &dummy), sort_return(0, "bad magic a")); CRM_CHECK(decode_transition_magic(b_magic, &b_uuid, &b_id, &dummy, &dummy, &dummy, &dummy), sort_return(0, "bad magic b")); /* try and determin the relative age of the operation... * some pending operations (ie. a start) may have been supuerceeded * by a subsequent stop * * [a|b]_id == -1 means its a shutdown operation and _always_ comes last */ if (safe_str_neq(a_uuid, b_uuid) || a_id == b_id) { /* * some of the logic in here may be redundant... * * if the UUID from the TE doesnt match then one better * be a pending operation. * pending operations dont survive between elections and joins * because we query the LRM directly */ if (b_call_id == -1) { sort_return(-1, "transition + call"); } else if (a_call_id == -1) { sort_return(1, "transition + call"); } } else if ((a_id >= 0 && a_id < b_id) || b_id == -1) { sort_return(-1, "transition"); } else if ((b_id >= 0 && a_id > b_id) || a_id == -1) { sort_return(1, "transition"); } } /* we should never end up here */ CRM_CHECK(FALSE, sort_return(0, "default")); } time_t get_timet_now(pe_working_set_t * data_set) { time_t now = 0; if (data_set && data_set->now) { now = data_set->now->tm_now; } if (now == 0) { /* eventually we should convert data_set->now into time_tm * for now, its only triggered by PE regression tests */ now = time(NULL); crm_crit("Defaulting to 'now'"); if (data_set && data_set->now) { data_set->now->tm_now = now; } } return now; } struct fail_search { resource_t *rsc; int count; long long last; char *key; }; static void get_failcount_by_prefix(gpointer key_p, gpointer value, gpointer user_data) { struct fail_search *search = user_data; const char *key = key_p; const char *match = strstr(key, search->key); if (match) { if (strstr(key, "last-failure-") == key && (key + 13) == match) { search->last = crm_int_helper(value, NULL); } else if (strstr(key, "fail-count-") == key && (key + 11) == match) { search->count += char2score(value); } } } int get_failcount(node_t * node, resource_t * rsc, int *last_failure, pe_working_set_t * data_set) { struct fail_search search = { rsc, 0, 0, NULL }; search.key = strdup(rsc->id); if (is_not_set(rsc->flags, pe_rsc_unique)) { int lpc = 0; search.rsc = uber_parent(rsc); /* Strip the clone incarnation */ for (lpc = strlen(search.key); lpc > 0; lpc--) { if (search.key[lpc] == ':') { search.key[lpc + 1] = 0; break; } } g_hash_table_foreach(node->details->attrs, get_failcount_by_prefix, &search); } else { /* Optimize the "normal" case */ char *key = NULL; const char *value = NULL; key = crm_concat("fail-count", rsc->id, '-'); value = g_hash_table_lookup(node->details->attrs, key); search.count = char2score(value); free(key); key = crm_concat("last-failure", rsc->id, '-'); value = g_hash_table_lookup(node->details->attrs, key); search.last = crm_int_helper(value, NULL); free(key); } if (search.count != 0 && search.last != 0 && rsc->failure_timeout) { if (last_failure) { *last_failure = search.last; } if (search.last > 0) { time_t now = get_timet_now(data_set); if (now > (search.last + rsc->failure_timeout)) { crm_notice("Failcount for %s on %s has expired (limit was %ds)", search.rsc->id, node->details->uname, rsc->failure_timeout); search.count = 0; } } } if (search.count != 0) { char *score = score2char(search.count); crm_info("%s has failed %s times on %s", search.rsc->id, score, node->details->uname); free(score); } free(search.key); return search.count; } gboolean get_target_role(resource_t * rsc, enum rsc_role_e * role) { enum rsc_role_e local_role = RSC_ROLE_UNKNOWN; const char *value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET_ROLE); CRM_CHECK(role != NULL, return FALSE); if (value == NULL || safe_str_eq("started", value) || safe_str_eq("default", value)) { return FALSE; } local_role = text2role(value); if (local_role == RSC_ROLE_UNKNOWN) { crm_config_err("%s: Unknown value for %s: %s", rsc->id, XML_RSC_ATTR_TARGET_ROLE, value); return FALSE; } else if (local_role > RSC_ROLE_STARTED) { if (uber_parent(rsc)->variant == pe_master) { if (local_role > RSC_ROLE_SLAVE) { /* This is what we'd do anyway, just leave the default to avoid messing up the placement algorithm */ return FALSE; } } else { crm_config_err("%s is not part of a master/slave resource, a %s of '%s' makes no sense", rsc->id, XML_RSC_ATTR_TARGET_ROLE, value); return FALSE; } } *role = local_role; return TRUE; } gboolean order_actions(action_t * lh_action, action_t * rh_action, enum pe_ordering order) { GListPtr gIter = NULL; action_wrapper_t *wrapper = NULL; GListPtr list = NULL; if (order == pe_order_none) { return FALSE; } if (lh_action == NULL || rh_action == NULL) { return FALSE; } crm_trace("Ordering Action %s before %s", lh_action->uuid, rh_action->uuid); /* Filter dups, otherwise update_action_states() has too much work to do */ gIter = lh_action->actions_after; for (; gIter != NULL; gIter = gIter->next) { action_wrapper_t *after = (action_wrapper_t *) gIter->data; if (after->action == rh_action && (after->type & order)) { return FALSE; } } wrapper = calloc(1, sizeof(action_wrapper_t)); wrapper->action = rh_action; wrapper->type = order; list = lh_action->actions_after; list = g_list_prepend(list, wrapper); lh_action->actions_after = list; wrapper = NULL; /* order |= pe_order_implies_then; */ /* order ^= pe_order_implies_then; */ wrapper = calloc(1, sizeof(action_wrapper_t)); wrapper->action = lh_action; wrapper->type = order; list = rh_action->actions_before; list = g_list_prepend(list, wrapper); rh_action->actions_before = list; return TRUE; } action_t * get_pseudo_op(const char *name, pe_working_set_t * data_set) { action_t *op = NULL; const char *op_s = name; GListPtr possible_matches = NULL; possible_matches = find_actions(data_set->actions, name, NULL); if (possible_matches != NULL) { if (g_list_length(possible_matches) > 1) { pe_warn("Action %s exists %d times", name, g_list_length(possible_matches)); } op = g_list_nth_data(possible_matches, 0); g_list_free(possible_matches); } else { op = custom_action(NULL, strdup(op_s), op_s, NULL, TRUE, TRUE, data_set); - set_bit_inplace(op->flags, pe_action_pseudo); - set_bit_inplace(op->flags, pe_action_runnable); + set_bit(op->flags, pe_action_pseudo); + set_bit(op->flags, pe_action_runnable); } return op; } void destroy_ticket(gpointer data) { ticket_t *ticket = data; if (ticket->state) { g_hash_table_destroy(ticket->state); } free(ticket->id); free(ticket); } ticket_t * ticket_new(const char *ticket_id, pe_working_set_t * data_set) { ticket_t *ticket = NULL; if (ticket_id == NULL || strlen(ticket_id) == 0) { return NULL; } if (data_set->tickets == NULL) { data_set->tickets = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, destroy_ticket); } ticket = g_hash_table_lookup(data_set->tickets, ticket_id); if (ticket == NULL) { ticket = calloc(1, sizeof(ticket_t)); if (ticket == NULL) { crm_err("Cannot allocate ticket '%s'", ticket_id); return NULL; } crm_trace("Creaing ticket entry for %s", ticket_id); ticket->id = strdup(ticket_id); ticket->granted = FALSE; ticket->last_granted = -1; ticket->standby = FALSE; ticket->state = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); g_hash_table_insert(data_set->tickets, strdup(ticket->id), ticket); } return ticket; } diff --git a/pengine/allocate.c b/pengine/allocate.c index 304b619ab3..4c05586e21 100644 --- a/pengine/allocate.c +++ b/pengine/allocate.c @@ -1,2269 +1,2269 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include CRM_TRACE_INIT_DATA(pe_allocate); void set_alloc_actions(pe_working_set_t * data_set); void migrate_reload_madness(pe_working_set_t * data_set); resource_alloc_functions_t resource_class_alloc_functions[] = { { native_merge_weights, native_color, native_create_actions, native_create_probe, native_internal_constraints, native_rsc_colocation_lh, native_rsc_colocation_rh, native_rsc_location, native_action_flags, native_update_actions, native_expand, native_append_meta, }, { group_merge_weights, group_color, group_create_actions, native_create_probe, group_internal_constraints, group_rsc_colocation_lh, group_rsc_colocation_rh, group_rsc_location, group_action_flags, group_update_actions, group_expand, group_append_meta, }, { native_merge_weights, clone_color, clone_create_actions, clone_create_probe, clone_internal_constraints, clone_rsc_colocation_lh, clone_rsc_colocation_rh, clone_rsc_location, clone_action_flags, clone_update_actions, clone_expand, clone_append_meta, }, { native_merge_weights, master_color, master_create_actions, clone_create_probe, master_internal_constraints, clone_rsc_colocation_lh, master_rsc_colocation_rh, clone_rsc_location, clone_action_flags, clone_update_actions, clone_expand, master_append_meta, } }; static gboolean check_rsc_parameters(resource_t * rsc, node_t * node, xmlNode * rsc_entry, pe_working_set_t * data_set) { int attr_lpc = 0; gboolean force_restart = FALSE; gboolean delete_resource = FALSE; const char *value = NULL; const char *old_value = NULL; const char *attr_list[] = { XML_ATTR_TYPE, XML_AGENT_ATTR_CLASS, XML_AGENT_ATTR_PROVIDER }; for (; attr_lpc < DIMOF(attr_list); attr_lpc++) { value = crm_element_value(rsc->xml, attr_list[attr_lpc]); old_value = crm_element_value(rsc_entry, attr_list[attr_lpc]); if (value == old_value /* ie. NULL */ || crm_str_eq(value, old_value, TRUE)) { continue; } force_restart = TRUE; crm_notice("Forcing restart of %s on %s, %s changed: %s -> %s", rsc->id, node->details->uname, attr_list[attr_lpc], crm_str(old_value), crm_str(value)); } if (force_restart) { /* make sure the restart happens */ stop_action(rsc, node, FALSE); set_bit(rsc->flags, pe_rsc_start_pending); delete_resource = TRUE; } return delete_resource; } static void CancelXmlOp(resource_t * rsc, xmlNode * xml_op, node_t * active_node, const char *reason, pe_working_set_t * data_set) { int interval = 0; action_t *cancel = NULL; char *key = NULL; const char *task = NULL; const char *call_id = NULL; const char *interval_s = NULL; CRM_CHECK(xml_op != NULL, return); CRM_CHECK(active_node != NULL, return); task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); call_id = crm_element_value(xml_op, XML_LRM_ATTR_CALLID); interval_s = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL); interval = crm_parse_int(interval_s, "0"); /* we need to reconstruct the key because of the way we used to construct resource IDs */ key = generate_op_key(rsc->id, task, interval); crm_info("Action %s on %s will be stopped: %s", key, active_node->details->uname, reason ? reason : "unknown"); cancel = custom_action(rsc, strdup(key), RSC_CANCEL, active_node, FALSE, TRUE, data_set); free(cancel->task); cancel->task = strdup(RSC_CANCEL); add_hash_param(cancel->meta, XML_LRM_ATTR_TASK, task); add_hash_param(cancel->meta, XML_LRM_ATTR_CALLID, call_id); add_hash_param(cancel->meta, XML_LRM_ATTR_INTERVAL, interval_s); custom_action_order(rsc, stop_key(rsc), NULL, rsc, NULL, cancel, pe_order_optional, data_set); free(key); key = NULL; } static gboolean check_action_definition(resource_t * rsc, node_t * active_node, xmlNode * xml_op, pe_working_set_t * data_set) { char *key = NULL; int interval = 0; const char *interval_s = NULL; gboolean did_change = FALSE; xmlNode *params_all = NULL; xmlNode *params_restart = NULL; GHashTable *local_rsc_params = NULL; char *digest_all_calc = NULL; const char *digest_all = NULL; const char *restart_list = NULL; const char *digest_restart = NULL; char *digest_restart_calc = NULL; action_t *action = NULL; const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); const char *op_version = crm_element_value(xml_op, XML_ATTR_CRM_VERSION); CRM_CHECK(active_node != NULL, return FALSE); if (safe_str_eq(task, RSC_STOP)) { return FALSE; } interval_s = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL); interval = crm_parse_int(interval_s, "0"); /* we need to reconstruct the key because of the way we used to construct resource IDs */ key = generate_op_key(rsc->id, task, interval); if (interval > 0) { xmlNode *op_match = NULL; crm_trace("Checking parameters for %s", key); op_match = find_rsc_op_entry(rsc, key); if (op_match == NULL && is_set(data_set->flags, pe_flag_stop_action_orphans)) { CancelXmlOp(rsc, xml_op, active_node, "orphan", data_set); free(key); return TRUE; } else if (op_match == NULL) { crm_debug("Orphan action detected: %s on %s", key, active_node->details->uname); free(key); return TRUE; } } action = custom_action(rsc, key, task, active_node, TRUE, FALSE, data_set); /* key is free'd by custom_action() */ local_rsc_params = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); get_rsc_attributes(local_rsc_params, rsc, active_node, data_set); params_all = create_xml_node(NULL, XML_TAG_PARAMS); g_hash_table_foreach(local_rsc_params, hash2field, params_all); g_hash_table_foreach(action->extra, hash2field, params_all); g_hash_table_foreach(rsc->parameters, hash2field, params_all); g_hash_table_foreach(action->meta, hash2metafield, params_all); filter_action_parameters(params_all, op_version); digest_all_calc = calculate_operation_digest(params_all, op_version); digest_all = crm_element_value(xml_op, XML_LRM_ATTR_OP_DIGEST); digest_restart = crm_element_value(xml_op, XML_LRM_ATTR_RESTART_DIGEST); restart_list = crm_element_value(xml_op, XML_LRM_ATTR_OP_RESTART); if (interval == 0 && safe_str_eq(task, RSC_STATUS)) { /* Reload based on the start action not a probe */ task = RSC_START; } else if (interval == 0 && safe_str_eq(task, RSC_MIGRATED)) { /* Reload based on the start action not a migrate */ task = RSC_START; } if (digest_restart) { /* Changes that force a restart */ params_restart = copy_xml(params_all); if (restart_list) { filter_reload_parameters(params_restart, restart_list); } digest_restart_calc = calculate_operation_digest(params_restart, op_version); if (safe_str_neq(digest_restart_calc, digest_restart)) { did_change = TRUE; key = generate_op_key(rsc->id, task, interval); crm_log_xml_info(params_restart, "params:restart"); crm_info("Parameters to %s on %s changed: was %s vs. now %s (restart:%s) %s", key, active_node->details->uname, crm_str(digest_restart), digest_restart_calc, op_version, crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC)); custom_action(rsc, key, task, NULL, FALSE, TRUE, data_set); goto cleanup; } } if (safe_str_neq(digest_all_calc, digest_all)) { /* Changes that can potentially be handled by a reload */ did_change = TRUE; crm_log_xml_info(params_all, "params:reload"); key = generate_op_key(rsc->id, task, interval); crm_info("Parameters to %s on %s changed: was %s vs. now %s (reload:%s) %s", key, active_node->details->uname, crm_str(digest_all), digest_all_calc, op_version, crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC)); if (interval > 0) { action_t *op = NULL; #if 0 /* Always reload/restart the entire resource */ op = custom_action(rsc, start_key(rsc), RSC_START, NULL, FALSE, TRUE, data_set); update_action_flags(op, pe_action_allow_reload_conversion); #else /* Re-sending the recurring op is sufficient - the old one will be cancelled automatically */ op = custom_action(rsc, key, task, NULL, FALSE, TRUE, data_set); custom_action_order(rsc, start_key(rsc), NULL, NULL, NULL, op, pe_order_runnable_left, data_set); #endif } else if (digest_restart) { crm_trace("Reloading '%s' action for resource %s", task, rsc->id); /* Allow this resource to reload - unless something else causes a full restart */ set_bit(rsc->flags, pe_rsc_try_reload); /* Create these for now, it keeps the action IDs the same in the regression outputs */ custom_action(rsc, key, task, NULL, TRUE, TRUE, data_set); } else { crm_trace("Resource %s doesn't know how to reload", rsc->id); /* Re-send the start/demote/promote op * Recurring ops will be detected independantly */ custom_action(rsc, key, task, NULL, FALSE, TRUE, data_set); } } cleanup: free_xml(params_all); free_xml(params_restart); free(digest_all_calc); free(digest_restart_calc); g_hash_table_destroy(local_rsc_params); pe_free_action(action); return did_change; } extern gboolean DeleteRsc(resource_t * rsc, node_t * node, gboolean optional, pe_working_set_t * data_set); static void check_actions_for(xmlNode * rsc_entry, resource_t * rsc, node_t * node, pe_working_set_t * data_set) { GListPtr gIter = NULL; int offset = -1; int interval = 0; int stop_index = 0; int start_index = 0; const char *task = NULL; const char *interval_s = NULL; xmlNode *rsc_op = NULL; GListPtr op_list = NULL; GListPtr sorted_op_list = NULL; gboolean is_probe = FALSE; gboolean did_change = FALSE; CRM_CHECK(node != NULL, return); if (is_set(rsc->flags, pe_rsc_orphan)) { crm_trace("Skipping param check for %s: orphan", rsc->id); return; } else if (pe_find_node_id(rsc->running_on, node->details->id) == NULL) { crm_trace("Skipping param check for %s: no longer active on %s", rsc->id, node->details->uname); return; } crm_trace("Processing %s on %s", rsc->id, node->details->uname); if (check_rsc_parameters(rsc, node, rsc_entry, data_set)) { DeleteRsc(rsc, node, FALSE, data_set); } for (rsc_op = __xml_first_child(rsc_entry); rsc_op != NULL; rsc_op = __xml_next(rsc_op)) { if (crm_str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, TRUE)) { op_list = g_list_prepend(op_list, rsc_op); } } sorted_op_list = g_list_sort(op_list, sort_op_by_callid); calculate_active_ops(sorted_op_list, &start_index, &stop_index); for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) { xmlNode *rsc_op = (xmlNode *) gIter->data; offset++; if (start_index < stop_index) { /* stopped */ continue; } else if (offset < start_index) { /* action occurred prior to a start */ continue; } is_probe = FALSE; did_change = FALSE; task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); interval_s = crm_element_value(rsc_op, XML_LRM_ATTR_INTERVAL); interval = crm_parse_int(interval_s, "0"); if (interval == 0 && safe_str_eq(task, RSC_STATUS)) { is_probe = TRUE; } if (interval > 0 && is_set(data_set->flags, pe_flag_maintenance_mode)) { CancelXmlOp(rsc, rsc_op, node, "maintenance mode", data_set); } else if (is_probe || safe_str_eq(task, RSC_START) || interval > 0 || safe_str_eq(task, RSC_MIGRATED)) { did_change = check_action_definition(rsc, node, rsc_op, data_set); } if (did_change && get_failcount(node, rsc, NULL, data_set)) { char *key = NULL; action_t *action_clear = NULL; key = generate_op_key(rsc->id, CRM_OP_CLEAR_FAILCOUNT, 0); action_clear = custom_action(rsc, key, CRM_OP_CLEAR_FAILCOUNT, node, FALSE, TRUE, data_set); - set_bit_inplace(action_clear->flags, pe_action_runnable); + set_bit(action_clear->flags, pe_action_runnable); } } g_list_free(sorted_op_list); } static GListPtr find_rsc_list(GListPtr result, resource_t * rsc, const char *id, gboolean renamed_clones, gboolean partial, pe_working_set_t * data_set) { GListPtr gIter = NULL; gboolean match = FALSE; if (id == NULL) { return NULL; } else if (rsc == NULL && data_set) { for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { resource_t *child = (resource_t *) gIter->data; result = find_rsc_list(result, child, id, renamed_clones, partial, NULL); } return result; } else if (rsc == NULL) { return NULL; } if (partial) { if (strstr(rsc->id, id)) { match = TRUE; } else if (rsc->long_name && strstr(rsc->long_name, id)) { match = TRUE; } else if (renamed_clones && rsc->clone_name && strstr(rsc->clone_name, id)) { match = TRUE; } } else { if (strcmp(rsc->id, id) == 0) { match = TRUE; } else if (rsc->long_name && strcmp(rsc->long_name, id) == 0) { match = TRUE; } else if (renamed_clones && rsc->clone_name && strcmp(rsc->clone_name, id) == 0) { match = TRUE; } } if (match) { result = g_list_prepend(result, rsc); } if (rsc->children) { gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { resource_t *child = (resource_t *) gIter->data; result = find_rsc_list(result, child, id, renamed_clones, partial, NULL); } } return result; } static void check_actions(pe_working_set_t * data_set) { const char *id = NULL; node_t *node = NULL; xmlNode *lrm_rscs = NULL; xmlNode *status = get_object_root(XML_CIB_TAG_STATUS, data_set->input); xmlNode *node_state = NULL; for (node_state = __xml_first_child(status); node_state != NULL; node_state = __xml_next(node_state)) { if (crm_str_eq((const char *)node_state->name, XML_CIB_TAG_STATE, TRUE)) { id = crm_element_value(node_state, XML_ATTR_ID); lrm_rscs = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE); lrm_rscs = find_xml_node(lrm_rscs, XML_LRM_TAG_RESOURCES, FALSE); node = pe_find_node_id(data_set->nodes, id); if (node == NULL) { continue; } else if (can_run_resources(node) == FALSE) { crm_trace("Skipping param check for %s: cant run resources", node->details->uname); continue; } crm_trace("Processing node %s", node->details->uname); if (node->details->online || is_set(data_set->flags, pe_flag_stonith_enabled)) { xmlNode *rsc_entry = NULL; for (rsc_entry = __xml_first_child(lrm_rscs); rsc_entry != NULL; rsc_entry = __xml_next(rsc_entry)) { if (crm_str_eq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE, TRUE)) { if (xml_has_children(rsc_entry)) { GListPtr gIter = NULL; GListPtr result = NULL; const char *rsc_id = ID(rsc_entry); CRM_CHECK(rsc_id != NULL, return); result = find_rsc_list(NULL, NULL, rsc_id, TRUE, FALSE, data_set); for (gIter = result; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; check_actions_for(rsc_entry, rsc, node, data_set); } g_list_free(result); } } } } } } } static gboolean apply_placement_constraints(pe_working_set_t * data_set) { GListPtr gIter = NULL; crm_trace("Applying constraints..."); for (gIter = data_set->placement_constraints; gIter != NULL; gIter = gIter->next) { rsc_to_node_t *cons = (rsc_to_node_t *) gIter->data; cons->rsc_lh->cmds->rsc_location(cons->rsc_lh, cons); } return TRUE; } static void common_apply_stickiness(resource_t * rsc, node_t * node, pe_working_set_t * data_set) { int fail_count = 0; resource_t *failed = rsc; if (rsc->children) { GListPtr gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; common_apply_stickiness(child_rsc, node, data_set); } return; } if (is_set(rsc->flags, pe_rsc_managed) && rsc->stickiness != 0 && g_list_length(rsc->running_on) == 1) { node_t *current = pe_find_node_id(rsc->running_on, node->details->id); node_t *match = pe_hash_table_lookup(rsc->allowed_nodes, node->details->id); if (current == NULL) { } else if (match != NULL || is_set(data_set->flags, pe_flag_symmetric_cluster)) { resource_t *sticky_rsc = rsc; resource_location(sticky_rsc, node, rsc->stickiness, "stickiness", data_set); crm_debug("Resource %s: preferring current location" " (node=%s, weight=%d)", sticky_rsc->id, node->details->uname, rsc->stickiness); } else { GHashTableIter iter; node_t *nIter = NULL; crm_debug("Ignoring stickiness for %s: the cluster is asymmetric" " and node %s is not explicitly allowed", rsc->id, node->details->uname); g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&nIter)) { crm_err("%s[%s] = %d", rsc->id, nIter->details->uname, nIter->weight); } } } if (is_not_set(rsc->flags, pe_rsc_unique)) { failed = uber_parent(rsc); } fail_count = get_failcount(node, rsc, NULL, data_set); if (fail_count > 0 && rsc->migration_threshold != 0) { if (rsc->migration_threshold <= fail_count) { resource_location(failed, node, -INFINITY, "__fail_limit__", data_set); crm_warn("Forcing %s away from %s after %d failures (max=%d)", failed->id, node->details->uname, fail_count, rsc->migration_threshold); } else { crm_notice("%s can fail %d more times on %s before being forced off", failed->id, rsc->migration_threshold - fail_count, node->details->uname); } } } static void complex_set_cmds(resource_t * rsc) { GListPtr gIter = rsc->children; rsc->cmds = &resource_class_alloc_functions[rsc->variant]; for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; complex_set_cmds(child_rsc); } } void set_alloc_actions(pe_working_set_t * data_set) { GListPtr gIter = data_set->resources; for (; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; complex_set_cmds(rsc); } } static void calculate_system_health(gpointer gKey, gpointer gValue, gpointer user_data) { const char *key = (const char *)gKey; const char *value = (const char *)gValue; int *system_health = (int *)user_data; if (!gKey || !gValue || !user_data) { return; } /* Does it start with #health? */ if (0 == strncmp(key, "#health", 7)) { int score; /* Convert the value into an integer */ score = char2score(value); /* Add it to the running total */ *system_health = merge_weights(score, *system_health); } } static gboolean apply_system_health(pe_working_set_t * data_set) { GListPtr gIter = NULL; const char *health_strategy = pe_pref(data_set->config_hash, "node-health-strategy"); if (health_strategy == NULL || safe_str_eq(health_strategy, "none")) { /* Prevent any accidental health -> score translation */ node_score_red = 0; node_score_yellow = 0; node_score_green = 0; return TRUE; } else if (safe_str_eq(health_strategy, "migrate-on-red")) { /* Resources on nodes which have health values of red are * weighted away from that node. */ node_score_red = -INFINITY; node_score_yellow = 0; node_score_green = 0; } else if (safe_str_eq(health_strategy, "only-green")) { /* Resources on nodes which have health values of red or yellow * are forced away from that node. */ node_score_red = -INFINITY; node_score_yellow = -INFINITY; node_score_green = 0; } else if (safe_str_eq(health_strategy, "progressive")) { /* Same as the above, but use the r/y/g scores provided by the user * Defaults are provided by the pe_prefs table */ } else if (safe_str_eq(health_strategy, "custom")) { /* Requires the admin to configure the rsc_location constaints for * processing the stored health scores */ /* TODO: Check for the existance of appropriate node health constraints */ return TRUE; } else { crm_err("Unknown node health strategy: %s", health_strategy); return FALSE; } crm_info("Applying automated node health strategy: %s", health_strategy); for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { int system_health = 0; node_t *node = (node_t *) gIter->data; /* Search through the node hash table for system health entries. */ g_hash_table_foreach(node->details->attrs, calculate_system_health, &system_health); crm_info(" Node %s has an combined system health of %d", node->details->uname, system_health); /* If the health is non-zero, then create a new rsc2node so that the * weight will be added later on. */ if (system_health != 0) { GListPtr gIter2 = data_set->resources; for (; gIter2 != NULL; gIter2 = gIter2->next) { resource_t *rsc = (resource_t *) gIter2->data; rsc2node_new(health_strategy, rsc, system_health, node, data_set); } } } return TRUE; } gboolean stage0(pe_working_set_t * data_set) { xmlNode *cib_constraints = get_object_root(XML_CIB_TAG_CONSTRAINTS, data_set->input); if (data_set->input == NULL) { return FALSE; } if (is_set(data_set->flags, pe_flag_have_status) == FALSE) { crm_trace("Calculating status"); cluster_status(data_set); } set_alloc_actions(data_set); apply_system_health(data_set); unpack_constraints(cib_constraints, data_set); return TRUE; } static void wait_for_probe(resource_t * rsc, const char *action, action_t * probe_complete, pe_working_set_t * data_set) { if (probe_complete == NULL) { return; } if (rsc->children) { GListPtr gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { resource_t *child = (resource_t *) gIter->data; wait_for_probe(child, action, probe_complete, data_set); } } else { char *key = generate_op_key(rsc->id, action, 0); custom_action_order(NULL, NULL, probe_complete, rsc, key, NULL, pe_order_optional, data_set); } } /* * Check nodes for resources started outside of the LRM */ gboolean probe_resources(pe_working_set_t * data_set) { action_t *probe_complete = NULL; action_t *probe_node_complete = NULL; GListPtr gIter = NULL; GListPtr gIter2 = NULL; gIter = data_set->nodes; for (; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; const char *probed = g_hash_table_lookup(node->details->attrs, CRM_OP_PROBED); if (node->details->online == FALSE) { continue; } else if (node->details->unclean) { continue; } else if (probe_complete == NULL) { probe_complete = get_pseudo_op(CRM_OP_PROBED, data_set); } if (probed != NULL && crm_is_true(probed) == FALSE) { action_t *probe_op = custom_action(NULL, strdup(CRM_OP_REPROBE), CRM_OP_REPROBE, node, FALSE, TRUE, data_set); add_hash_param(probe_op->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE); continue; } probe_node_complete = custom_action(NULL, strdup(CRM_OP_PROBED), CRM_OP_PROBED, node, FALSE, TRUE, data_set); if (crm_is_true(probed)) { crm_trace("unset"); update_action_flags(probe_node_complete, pe_action_optional); } else { crm_trace("set"); update_action_flags(probe_node_complete, pe_action_optional | pe_action_clear); } crm_trace("%s - %d", node->details->uname, probe_node_complete->flags & pe_action_optional); probe_node_complete->priority = INFINITY; add_hash_param(probe_node_complete->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE); if (node->details->pending) { update_action_flags(probe_node_complete, pe_action_runnable | pe_action_clear); crm_info("Action %s on %s is unrunnable (pending)", probe_node_complete->uuid, probe_node_complete->node->details->uname); } order_actions(probe_node_complete, probe_complete, pe_order_runnable_left /*|pe_order_implies_then */ ); gIter2 = data_set->resources; for (; gIter2 != NULL; gIter2 = gIter2->next) { resource_t *rsc = (resource_t *) gIter2->data; if (rsc->cmds->create_probe(rsc, node, probe_node_complete, FALSE, data_set)) { update_action_flags(probe_complete, pe_action_optional | pe_action_clear); update_action_flags(probe_node_complete, pe_action_optional | pe_action_clear); wait_for_probe(rsc, RSC_START, probe_complete, data_set); } } } gIter = data_set->resources; for (; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; wait_for_probe(rsc, RSC_STOP, probe_complete, data_set); } return TRUE; } /* * Count how many valid nodes we have (so we know the maximum number of * colors we can resolve). * * Apply node constraints (ie. filter the "allowed_nodes" part of resources */ gboolean stage2(pe_working_set_t * data_set) { GListPtr gIter = NULL; crm_trace("Applying placement constraints"); gIter = data_set->nodes; for (; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; if (node == NULL) { /* error */ } else if (node->weight >= 0.0 /* global weight */ && node->details->online && node->details->type == node_member) { data_set->max_valid_nodes++; } } apply_placement_constraints(data_set); gIter = data_set->nodes; for (; gIter != NULL; gIter = gIter->next) { GListPtr gIter2 = NULL; node_t *node = (node_t *) gIter->data; gIter2 = data_set->resources; for (; gIter2 != NULL; gIter2 = gIter2->next) { resource_t *rsc = (resource_t *) gIter2->data; common_apply_stickiness(rsc, node, data_set); } } return TRUE; } /* * Create internal resource constraints before allocation */ gboolean stage3(pe_working_set_t * data_set) { GListPtr gIter = data_set->resources; for (; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; rsc->cmds->internal_constraints(rsc, data_set); } return TRUE; } /* * Check for orphaned or redefined actions */ gboolean stage4(pe_working_set_t * data_set) { check_actions(data_set); return TRUE; } static gint sort_rsc_process_order(gconstpointer a, gconstpointer b, gpointer data) { int rc = 0; int r1_weight = -INFINITY; int r2_weight = -INFINITY; const char *reason = "existance"; const GListPtr nodes = (GListPtr) data; resource_t *resource1 = (resource_t *) convert_const_pointer(a); resource_t *resource2 = (resource_t *) convert_const_pointer(b); node_t *node = NULL; GListPtr gIter = NULL; GHashTable *r1_nodes = NULL; GHashTable *r2_nodes = NULL; if (a == NULL && b == NULL) { goto done; } if (a == NULL) { return 1; } if (b == NULL) { return -1; } reason = "priority"; r1_weight = resource1->priority; r2_weight = resource2->priority; if (r1_weight > r2_weight) { rc = -1; goto done; } if (r1_weight < r2_weight) { rc = 1; goto done; } reason = "no node list"; if (nodes == NULL) { goto done; } r1_nodes = rsc_merge_weights(resource1, resource1->id, NULL, NULL, 1, pe_weights_forward | pe_weights_init); dump_node_scores(LOG_TRACE, NULL, resource1->id, r1_nodes); r2_nodes = rsc_merge_weights(resource2, resource2->id, NULL, NULL, 1, pe_weights_forward | pe_weights_init); dump_node_scores(LOG_TRACE, NULL, resource2->id, r2_nodes); /* Current location score */ reason = "current location"; r1_weight = -INFINITY; r2_weight = -INFINITY; if (resource1->running_on) { node = g_list_nth_data(resource1->running_on, 0); node = g_hash_table_lookup(r1_nodes, node->details->id); r1_weight = node->weight; } if (resource2->running_on) { node = g_list_nth_data(resource2->running_on, 0); node = g_hash_table_lookup(r2_nodes, node->details->id); r2_weight = node->weight; } if (r1_weight > r2_weight) { rc = -1; goto done; } if (r1_weight < r2_weight) { rc = 1; goto done; } reason = "score"; for (gIter = nodes; gIter != NULL; gIter = gIter->next) { node_t *r1_node = NULL; node_t *r2_node = NULL; node = (node_t *) gIter->data; r1_weight = -INFINITY; if (r1_nodes) { r1_node = g_hash_table_lookup(r1_nodes, node->details->id); } if (r1_node) { r1_weight = r1_node->weight; } r2_weight = -INFINITY; if (r2_nodes) { r2_node = g_hash_table_lookup(r2_nodes, node->details->id); } if (r2_node) { r2_weight = r2_node->weight; } if (r1_weight > r2_weight) { rc = -1; goto done; } if (r1_weight < r2_weight) { rc = 1; goto done; } } done: if (r1_nodes) { g_hash_table_destroy(r1_nodes); } if (r2_nodes) { g_hash_table_destroy(r2_nodes); } crm_trace( "%s (%d) %c %s (%d) on %s: %s", resource1->id, r1_weight, rc < 0 ? '>' : rc > 0 ? '<' : '=', resource2->id, r2_weight, node ? node->details->id : "n/a", reason); return rc; } gboolean stage5(pe_working_set_t * data_set) { GListPtr gIter = NULL; if (safe_str_neq(data_set->placement_strategy, "default")) { GListPtr nodes = g_list_copy(data_set->nodes); nodes = g_list_sort_with_data(nodes, sort_node_weight, NULL); data_set->resources = g_list_sort_with_data(data_set->resources, sort_rsc_process_order, nodes); g_list_free(nodes); } gIter = data_set->nodes; for (; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; dump_node_capacity(show_utilization ? 0 : utilization_log_level, "Original", node); } crm_trace("Allocating services"); /* Take (next) highest resource, assign it and create its actions */ gIter = data_set->resources; for (; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; crm_trace("Allocating: %s", rsc->id); rsc->cmds->allocate(rsc, NULL, data_set); } gIter = data_set->nodes; for (; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; dump_node_capacity(show_utilization ? 0 : utilization_log_level, "Remaining", node); } if (is_set(data_set->flags, pe_flag_startup_probes)) { crm_trace("Calculating needed probes"); /* This code probably needs optimization * ptest -x with 100 nodes, 100 clones and clone-max=100: With probes: ptest[14781]: 2010/09/27_17:56:46 notice: TRACE: do_calculations: pengine.c:258 Calculate cluster status ptest[14781]: 2010/09/27_17:56:46 notice: TRACE: do_calculations: pengine.c:278 Applying placement constraints ptest[14781]: 2010/09/27_17:56:47 notice: TRACE: do_calculations: pengine.c:285 Create internal constraints ptest[14781]: 2010/09/27_17:56:47 notice: TRACE: do_calculations: pengine.c:292 Check actions ptest[14781]: 2010/09/27_17:56:48 notice: TRACE: do_calculations: pengine.c:299 Allocate resources ptest[14781]: 2010/09/27_17:56:48 notice: TRACE: stage5: allocate.c:881 Allocating services ptest[14781]: 2010/09/27_17:56:49 notice: TRACE: stage5: allocate.c:894 Calculating needed probes ptest[14781]: 2010/09/27_17:56:51 notice: TRACE: stage5: allocate.c:899 Creating actions ptest[14781]: 2010/09/27_17:56:52 notice: TRACE: stage5: allocate.c:905 Creating done ptest[14781]: 2010/09/27_17:56:52 notice: TRACE: do_calculations: pengine.c:306 Processing fencing and shutdown cases ptest[14781]: 2010/09/27_17:56:52 notice: TRACE: do_calculations: pengine.c:313 Applying ordering constraints 36s ptest[14781]: 2010/09/27_17:57:28 notice: TRACE: do_calculations: pengine.c:320 Create transition graph Without probes: ptest[14637]: 2010/09/27_17:56:21 notice: TRACE: do_calculations: pengine.c:258 Calculate cluster status ptest[14637]: 2010/09/27_17:56:22 notice: TRACE: do_calculations: pengine.c:278 Applying placement constraints ptest[14637]: 2010/09/27_17:56:22 notice: TRACE: do_calculations: pengine.c:285 Create internal constraints ptest[14637]: 2010/09/27_17:56:22 notice: TRACE: do_calculations: pengine.c:292 Check actions ptest[14637]: 2010/09/27_17:56:23 notice: TRACE: do_calculations: pengine.c:299 Allocate resources ptest[14637]: 2010/09/27_17:56:23 notice: TRACE: stage5: allocate.c:881 Allocating services ptest[14637]: 2010/09/27_17:56:24 notice: TRACE: stage5: allocate.c:899 Creating actions ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: stage5: allocate.c:905 Creating done ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: do_calculations: pengine.c:306 Processing fencing and shutdown cases ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: do_calculations: pengine.c:313 Applying ordering constraints ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: do_calculations: pengine.c:320 Create transition graph */ probe_resources(data_set); } crm_trace("Creating actions"); gIter = data_set->resources; for (; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; rsc->cmds->create_actions(rsc, data_set); } crm_trace("Creating done"); return TRUE; } static gboolean is_managed(const resource_t * rsc) { GListPtr gIter = rsc->children; if (is_set(rsc->flags, pe_rsc_managed)) { return TRUE; } for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; if (is_managed(child_rsc)) { return TRUE; } } return FALSE; } static gboolean any_managed_resouces(pe_working_set_t * data_set) { GListPtr gIter = data_set->resources; for (; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; if (is_managed(rsc)) { return TRUE; } } return FALSE; } /* * Create dependancies for stonith and shutdown operations */ gboolean stage6(pe_working_set_t * data_set) { action_t *dc_down = NULL; action_t *dc_fence = NULL; action_t *stonith_op = NULL; action_t *last_stonith = NULL; gboolean integrity_lost = FALSE; action_t *ready = get_pseudo_op(STONITH_UP, data_set); action_t *all_stopped = get_pseudo_op(ALL_STOPPED, data_set); action_t *done = get_pseudo_op(STONITH_DONE, data_set); gboolean need_stonith = FALSE; GListPtr gIter = data_set->nodes; crm_trace("Processing fencing and shutdown cases"); if (is_set(data_set->flags, pe_flag_stonith_enabled) && (is_set(data_set->flags, pe_flag_have_quorum) || data_set->no_quorum_policy == no_quorum_ignore || data_set->no_quorum_policy == no_quorum_suicide)) { need_stonith = TRUE; } if (need_stonith && any_managed_resouces(data_set) == FALSE) { crm_notice("Delaying fencing operations until there are resources to manage"); need_stonith = FALSE; } for (; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; stonith_op = NULL; if (node->details->unclean && need_stonith) { pe_warn("Scheduling Node %s for STONITH", node->details->uname); stonith_op = custom_action(NULL, strdup(CRM_OP_FENCE), CRM_OP_FENCE, node, FALSE, TRUE, data_set); add_hash_param(stonith_op->meta, XML_LRM_ATTR_TARGET, node->details->uname); add_hash_param(stonith_op->meta, XML_LRM_ATTR_TARGET_UUID, node->details->id); add_hash_param(stonith_op->meta, "stonith_action", data_set->stonith_action); stonith_constraints(node, stonith_op, data_set); order_actions(ready, stonith_op, pe_order_runnable_left); order_actions(stonith_op, all_stopped, pe_order_implies_then); - clear_bit_inplace(ready->flags, pe_action_optional); + clear_bit(ready->flags, pe_action_optional); if (node->details->is_dc) { dc_down = stonith_op; dc_fence = stonith_op; } else { if (last_stonith) { order_actions(last_stonith, stonith_op, pe_order_optional); } last_stonith = stonith_op; } } else if (node->details->online && node->details->shutdown) { action_t *down_op = NULL; crm_notice("Scheduling Node %s for shutdown", node->details->uname); down_op = custom_action(NULL, strdup(CRM_OP_SHUTDOWN), CRM_OP_SHUTDOWN, node, FALSE, TRUE, data_set); shutdown_constraints(node, down_op, data_set); add_hash_param(down_op->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE); if (node->details->is_dc) { dc_down = down_op; } } if (node->details->unclean && stonith_op == NULL) { integrity_lost = TRUE; pe_warn("Node %s is unclean!", node->details->uname); } } if (integrity_lost) { if (is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE) { pe_warn("YOUR RESOURCES ARE NOW LIKELY COMPROMISED"); pe_err("ENABLE STONITH TO KEEP YOUR RESOURCES SAFE"); } else if (is_set(data_set->flags, pe_flag_have_quorum) == FALSE) { crm_notice("Cannot fence unclean nodes until quorum is" " attained (or no-quorum-policy is set to ignore)"); } } if (dc_down != NULL) { GListPtr shutdown_matches = find_actions(data_set->actions, CRM_OP_SHUTDOWN, NULL); crm_trace("Ordering shutdowns before %s on %s (DC)", dc_down->task, dc_down->node->details->uname); add_hash_param(dc_down->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE); gIter = shutdown_matches; for (; gIter != NULL; gIter = gIter->next) { action_t *node_stop = (action_t *) gIter->data; if (node_stop->node->details->is_dc) { continue; } crm_debug("Ordering shutdown on %s before %s on %s", node_stop->node->details->uname, dc_down->task, dc_down->node->details->uname); order_actions(node_stop, dc_down, pe_order_optional); } if (last_stonith && dc_down != last_stonith) { order_actions(last_stonith, dc_down, pe_order_optional); } g_list_free(shutdown_matches); } if (last_stonith) { order_actions(last_stonith, done, pe_order_implies_then); } else if (dc_fence) { order_actions(dc_down, done, pe_order_implies_then); } order_actions(ready, done, pe_order_optional); return TRUE; } /* * Determin the sets of independant actions and the correct order for the * actions in each set. * * Mark dependencies of un-runnable actions un-runnable * */ static GListPtr find_actions_by_task(GListPtr actions, resource_t * rsc, const char *original_key) { GListPtr list = NULL; list = find_actions(actions, original_key, NULL); if (list == NULL) { /* we're potentially searching a child of the original resource */ char *key = NULL; char *tmp = NULL; char *task = NULL; int interval = 0; if (parse_op_key(original_key, &tmp, &task, &interval)) { key = generate_op_key(rsc->id, task, interval); /* crm_err("looking up %s instead of %s", key, original_key); */ /* slist_iter(action, action_t, actions, lpc, */ /* crm_err(" - %s", action->uuid)); */ list = find_actions(actions, key, NULL); } else { crm_err("search key: %s", original_key); } free(key); free(tmp); free(task); } return list; } static void rsc_order_then(action_t * lh_action, resource_t * rsc, order_constraint_t * order) { GListPtr gIter = NULL; GListPtr rh_actions = NULL; action_t *rh_action = NULL; enum pe_ordering type = order->type; CRM_CHECK(rsc != NULL, return); CRM_CHECK(order != NULL, return); rh_action = order->rh_action; crm_trace("Processing RH of ordering constraint %d", order->id); if (rh_action != NULL) { rh_actions = g_list_prepend(NULL, rh_action); } else if (rsc != NULL) { rh_actions = find_actions_by_task(rsc->actions, rsc, order->rh_action_task); } if (rh_actions == NULL) { crm_trace("No RH-Side (%s/%s) found for constraint..." " ignoring", rsc->id, order->rh_action_task); if (lh_action) { crm_trace("LH-Side was: %s", lh_action->uuid); } return; } if (lh_action && lh_action->rsc == rsc && is_set(lh_action->flags, pe_action_dangle)) { crm_trace("Detected dangling operation %s -> %s", lh_action->uuid, order->rh_action_task); - clear_bit_inplace(type, pe_order_implies_then); + clear_bit(type, pe_order_implies_then); } gIter = rh_actions; for (; gIter != NULL; gIter = gIter->next) { action_t *rh_action_iter = (action_t *) gIter->data; if (lh_action) { order_actions(lh_action, rh_action_iter, type); } else if (type & pe_order_implies_then) { update_action_flags(rh_action_iter, pe_action_runnable | pe_action_clear); crm_warn("Unrunnable %s 0x%.6x", rh_action_iter->uuid, type); } else { crm_warn("neither %s 0x%.6x", rh_action_iter->uuid, type); } } g_list_free(rh_actions); } static void rsc_order_first(resource_t * lh_rsc, order_constraint_t * order, pe_working_set_t * data_set) { GListPtr gIter = NULL; GListPtr lh_actions = NULL; action_t *lh_action = order->lh_action; resource_t *rh_rsc = order->rh_rsc; crm_trace("Processing LH of ordering constraint %d", order->id); CRM_ASSERT(lh_rsc != NULL); if (lh_action != NULL) { lh_actions = g_list_prepend(NULL, lh_action); } else if (lh_action == NULL) { lh_actions = find_actions_by_task(lh_rsc->actions, lh_rsc, order->lh_action_task); } if (lh_actions == NULL && lh_rsc != rh_rsc) { char *key = NULL; char *rsc_id = NULL; char *op_type = NULL; int interval = 0; parse_op_key(order->lh_action_task, &rsc_id, &op_type, &interval); key = generate_op_key(lh_rsc->id, op_type, interval); if (lh_rsc->fns->state(lh_rsc, TRUE) != RSC_ROLE_STOPPED || safe_str_neq(op_type, RSC_STOP)) { crm_trace("No LH-Side (%s/%s) found for constraint %d with %s - creating", lh_rsc->id, order->lh_action_task, order->id, order->rh_action_task); lh_action = custom_action(lh_rsc, key, op_type, NULL, TRUE, TRUE, data_set); lh_actions = g_list_prepend(NULL, lh_action); } else { free(key); crm_trace("No LH-Side (%s/%s) found for constraint %d with %s - ignoring", lh_rsc->id, order->lh_action_task, order->id, order->rh_action_task); } free(op_type); free(rsc_id); } gIter = lh_actions; for (; gIter != NULL; gIter = gIter->next) { action_t *lh_action_iter = (action_t *) gIter->data; if (rh_rsc == NULL && order->rh_action) { rh_rsc = order->rh_action->rsc; } if (rh_rsc) { rsc_order_then(lh_action_iter, rh_rsc, order); } else if (order->rh_action) { order_actions(lh_action_iter, order->rh_action, order->type); } } g_list_free(lh_actions); } extern gboolean update_action(action_t * action); gboolean stage7(pe_working_set_t * data_set) { GListPtr gIter = NULL; crm_trace("Applying ordering constraints"); /* Don't ask me why, but apparently they need to be processed in * the order they were created in... go figure * * Also g_list_prepend() has horrendous performance characteristics * So we need to use g_list_prepend() and then reverse the list here */ data_set->ordering_constraints = g_list_reverse(data_set->ordering_constraints); gIter = data_set->ordering_constraints; for (; gIter != NULL; gIter = gIter->next) { order_constraint_t *order = (order_constraint_t *) gIter->data; resource_t *rsc = order->lh_rsc; crm_trace("Applying ordering constraint: %d", order->id); if (rsc != NULL) { crm_trace("rsc_action-to-*"); rsc_order_first(rsc, order, data_set); continue; } rsc = order->rh_rsc; if (rsc != NULL) { crm_trace("action-to-rsc_action"); rsc_order_then(order->lh_action, rsc, order); } else { crm_trace("action-to-action"); order_actions(order->lh_action, order->rh_action, order->type); } } crm_trace("Updating %d actions", g_list_length(data_set->actions)); gIter = data_set->actions; for (; gIter != NULL; gIter = gIter->next) { action_t *action = (action_t *) gIter->data; update_action(action); } crm_trace("Processing migrations"); gIter = data_set->resources; for (; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; rsc_migrate_reload(rsc, data_set); LogActions(rsc, data_set, FALSE); } return TRUE; } static gint sort_notify_entries(gconstpointer a, gconstpointer b) { int tmp; const notify_entry_t *entry_a = a; const notify_entry_t *entry_b = b; if (entry_a == NULL && entry_b == NULL) { return 0; } if (entry_a == NULL) { return 1; } if (entry_b == NULL) { return -1; } if (entry_a->rsc == NULL && entry_b->rsc == NULL) { return 0; } if (entry_a->rsc == NULL) { return 1; } if (entry_b->rsc == NULL) { return -1; } tmp = strcmp(entry_a->rsc->id, entry_b->rsc->id); if (tmp != 0) { return tmp; } if (entry_a->node == NULL && entry_b->node == NULL) { return 0; } if (entry_a->node == NULL) { return 1; } if (entry_b->node == NULL) { return -1; } return strcmp(entry_a->node->details->id, entry_b->node->details->id); } static void expand_list(GListPtr list, char **rsc_list, char **node_list) { GListPtr gIter = list; const char *uname = NULL; const char *rsc_id = NULL; const char *last_rsc_id = NULL; if (rsc_list) { *rsc_list = NULL; } if (list == NULL) { if (rsc_list) { *rsc_list = strdup(" "); } if (node_list) { *node_list = strdup(" "); } return; } if (node_list) { *node_list = NULL; } for (; gIter != NULL; gIter = gIter->next) { notify_entry_t *entry = (notify_entry_t *) gIter->data; CRM_CHECK(entry != NULL, continue); CRM_CHECK(entry->rsc != NULL, continue); CRM_CHECK(node_list == NULL || entry->node != NULL, continue); uname = NULL; rsc_id = entry->rsc->id; CRM_ASSERT(rsc_id != NULL); /* filter dups */ if (safe_str_eq(rsc_id, last_rsc_id)) { continue; } last_rsc_id = rsc_id; if (rsc_list != NULL) { int existing_len = 0; int len = 2 + strlen(rsc_id); /* +1 space, +1 EOS */ if (rsc_list && *rsc_list) { existing_len = strlen(*rsc_list); } crm_trace("Adding %s (%dc) at offset %d", rsc_id, len - 2, existing_len); *rsc_list = realloc(*rsc_list, len + existing_len); sprintf(*rsc_list + existing_len, "%s ", rsc_id); } if (entry->node != NULL) { uname = entry->node->details->uname; } if (node_list != NULL && uname) { int existing_len = 0; int len = 2 + strlen(uname); if (node_list && *node_list) { existing_len = strlen(*node_list); } crm_trace("Adding %s (%dc) at offset %d", uname, len - 2, existing_len); *node_list = realloc(*node_list, len + existing_len); sprintf(*node_list + existing_len, "%s ", uname); } } } static void dup_attr(gpointer key, gpointer value, gpointer user_data) { add_hash_param(user_data, key, value); } static action_t * pe_notify(resource_t * rsc, node_t * node, action_t * op, action_t * confirm, notify_data_t * n_data, pe_working_set_t * data_set) { char *key = NULL; action_t *trigger = NULL; const char *value = NULL; const char *task = NULL; if (op == NULL || confirm == NULL) { crm_trace("Op=%p confirm=%p", op, confirm); return NULL; } CRM_CHECK(node != NULL, return NULL); if (node->details->online == FALSE) { crm_trace("Skipping notification for %s: node offline", rsc->id); return NULL; } else if (is_set(op->flags, pe_action_runnable) == FALSE) { crm_trace("Skipping notification for %s: not runnable", op->uuid); return NULL; } value = g_hash_table_lookup(op->meta, "notify_type"); task = g_hash_table_lookup(op->meta, "notify_operation"); crm_trace("Creating notify actions for %s: %s (%s-%s)", op->uuid, rsc->id, value, task); key = generate_notify_key(rsc->id, value, task); trigger = custom_action(rsc, key, op->task, node, is_set(op->flags, pe_action_optional), TRUE, data_set); g_hash_table_foreach(op->meta, dup_attr, trigger->meta); g_hash_table_foreach(n_data->keys, dup_attr, trigger->meta); /* pseudo_notify before notify */ crm_trace("Ordering %s before %s (%d->%d)", op->uuid, trigger->uuid, trigger->id, op->id); order_actions(op, trigger, pe_order_optional); order_actions(trigger, confirm, pe_order_optional); return trigger; } static void pe_post_notify(resource_t * rsc, node_t * node, notify_data_t * n_data, pe_working_set_t * data_set) { action_t *notify = NULL; CRM_CHECK(rsc != NULL, return); if (n_data->post == NULL) { return; /* Nothing to do */ } notify = pe_notify(rsc, node, n_data->post, n_data->post_done, n_data, data_set); if (notify != NULL) { notify->priority = INFINITY; } if (n_data->post_done) { GListPtr gIter = rsc->actions; for (; gIter != NULL; gIter = gIter->next) { action_t *mon = (action_t *) gIter->data; const char *interval = g_hash_table_lookup(mon->meta, "interval"); if (interval == NULL || safe_str_eq(interval, "0")) { crm_trace("Skipping %s: interval", mon->uuid); continue; } else if (safe_str_eq(mon->task, "cancel")) { crm_trace("Skipping %s: cancel", mon->uuid); continue; } order_actions(n_data->post_done, mon, pe_order_optional); } } } notify_data_t * create_notification_boundaries(resource_t * rsc, const char *action, action_t * start, action_t * end, pe_working_set_t * data_set) { /* Create the pseudo ops that preceed and follow the actual notifications */ /* * Creates two sequences (conditional on start and end being supplied): * pre_notify -> pre_notify_complete -> start, and * end -> post_notify -> post_notify_complete * * 'start' and 'end' may be the same event or ${X} and ${X}ed as per clones */ char *key = NULL; notify_data_t *n_data = NULL; if (is_not_set(rsc->flags, pe_rsc_notify)) { return NULL; } n_data = calloc(1, sizeof(notify_data_t)); n_data->action = action; n_data->keys = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); if (start) { /* create pre-event notification wrappers */ key = generate_notify_key(rsc->id, "pre", start->task); n_data->pre = custom_action(rsc, key, RSC_NOTIFY, NULL, is_set(start->flags, pe_action_optional), TRUE, data_set); update_action_flags(n_data->pre, pe_action_pseudo); update_action_flags(n_data->pre, pe_action_runnable); add_hash_param(n_data->pre->meta, "notify_type", "pre"); add_hash_param(n_data->pre->meta, "notify_operation", n_data->action); /* create pre_notify_complete */ key = generate_notify_key(rsc->id, "confirmed-pre", start->task); n_data->pre_done = custom_action(rsc, key, RSC_NOTIFIED, NULL, is_set(start->flags, pe_action_optional), TRUE, data_set); update_action_flags(n_data->pre_done, pe_action_pseudo); update_action_flags(n_data->pre_done, pe_action_runnable); add_hash_param(n_data->pre_done->meta, "notify_type", "pre"); add_hash_param(n_data->pre_done->meta, "notify_operation", n_data->action); order_actions(n_data->pre_done, start, pe_order_optional); order_actions(n_data->pre, n_data->pre_done, pe_order_optional); } if (end) { /* create post-event notification wrappers */ key = generate_notify_key(rsc->id, "post", end->task); n_data->post = custom_action(rsc, key, RSC_NOTIFY, NULL, is_set(end->flags, pe_action_optional), TRUE, data_set); n_data->post->priority = INFINITY; update_action_flags(n_data->post, pe_action_pseudo); if (is_set(end->flags, pe_action_runnable)) { update_action_flags(n_data->post, pe_action_runnable); } else { update_action_flags(n_data->post, pe_action_runnable | pe_action_clear); } add_hash_param(n_data->post->meta, "notify_type", "post"); add_hash_param(n_data->post->meta, "notify_operation", n_data->action); /* create post_notify_complete */ key = generate_notify_key(rsc->id, "confirmed-post", end->task); n_data->post_done = custom_action(rsc, key, RSC_NOTIFIED, NULL, is_set(end->flags, pe_action_optional), TRUE, data_set); n_data->post_done->priority = INFINITY; update_action_flags(n_data->post_done, pe_action_pseudo); if (is_set(end->flags, pe_action_runnable)) { update_action_flags(n_data->post_done, pe_action_runnable); } else { update_action_flags(n_data->post_done, pe_action_runnable | pe_action_clear); } add_hash_param(n_data->post_done->meta, "notify_type", "pre"); add_hash_param(n_data->post_done->meta, "notify_operation", n_data->action); order_actions(end, n_data->post, pe_order_implies_then); order_actions(n_data->post, n_data->post_done, pe_order_implies_then); } if (start && end) { order_actions(n_data->pre_done, n_data->post, pe_order_optional); } if (safe_str_eq(action, RSC_STOP)) { action_t *all_stopped = get_pseudo_op(ALL_STOPPED, data_set); order_actions(n_data->post_done, all_stopped, pe_order_optional); } return n_data; } void collect_notification_data(resource_t * rsc, gboolean state, gboolean activity, notify_data_t * n_data) { if (rsc->children) { GListPtr gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { resource_t *child = (resource_t *) gIter->data; collect_notification_data(child, state, activity, n_data); } return; } if (state) { notify_entry_t *entry = NULL; entry = calloc(1, sizeof(notify_entry_t)); entry->rsc = rsc; if (rsc->running_on) { /* we only take the first one */ entry->node = rsc->running_on->data; } crm_trace("%s state: %s", rsc->id, role2text(rsc->role)); switch (rsc->role) { case RSC_ROLE_STOPPED: n_data->inactive = g_list_prepend(n_data->inactive, entry); break; case RSC_ROLE_STARTED: n_data->active = g_list_prepend(n_data->active, entry); break; case RSC_ROLE_SLAVE: n_data->slave = g_list_prepend(n_data->slave, entry); break; case RSC_ROLE_MASTER: n_data->master = g_list_prepend(n_data->master, entry); break; default: crm_err("Unsupported notify role"); free(entry); break; } } if (activity) { notify_entry_t *entry = NULL; enum action_tasks task; GListPtr gIter = rsc->actions; for (; gIter != NULL; gIter = gIter->next) { action_t *op = (action_t *) gIter->data; if (is_set(op->flags, pe_action_optional) == FALSE && op->node != NULL) { entry = calloc(1, sizeof(notify_entry_t)); entry->node = op->node; entry->rsc = rsc; task = text2task(op->task); switch (task) { case start_rsc: n_data->start = g_list_prepend(n_data->start, entry); break; case stop_rsc: n_data->stop = g_list_prepend(n_data->stop, entry); break; case action_promote: n_data->promote = g_list_prepend(n_data->promote, entry); break; case action_demote: n_data->demote = g_list_prepend(n_data->demote, entry); break; default: free(entry); break; } } } } } gboolean expand_notification_data(notify_data_t * n_data) { /* Expand the notification entries into a key=value hashtable * This hashtable is later used in action2xml() */ gboolean required = FALSE; char *rsc_list = NULL; char *node_list = NULL; if (n_data->stop) { n_data->stop = g_list_sort(n_data->stop, sort_notify_entries); } expand_list(n_data->stop, &rsc_list, &node_list); if (rsc_list != NULL && safe_str_neq(" ", rsc_list)) { if (safe_str_eq(n_data->action, RSC_STOP)) { required = TRUE; } } g_hash_table_insert(n_data->keys, strdup("notify_stop_resource"), rsc_list); g_hash_table_insert(n_data->keys, strdup("notify_stop_uname"), node_list); if (n_data->start) { n_data->start = g_list_sort(n_data->start, sort_notify_entries); if (rsc_list && safe_str_eq(n_data->action, RSC_START)) { required = TRUE; } } expand_list(n_data->start, &rsc_list, &node_list); g_hash_table_insert(n_data->keys, strdup("notify_start_resource"), rsc_list); g_hash_table_insert(n_data->keys, strdup("notify_start_uname"), node_list); if (n_data->demote) { n_data->demote = g_list_sort(n_data->demote, sort_notify_entries); if (safe_str_eq(n_data->action, RSC_DEMOTE)) { required = TRUE; } } expand_list(n_data->demote, &rsc_list, &node_list); g_hash_table_insert(n_data->keys, strdup("notify_demote_resource"), rsc_list); g_hash_table_insert(n_data->keys, strdup("notify_demote_uname"), node_list); if (n_data->promote) { n_data->promote = g_list_sort(n_data->promote, sort_notify_entries); if (safe_str_eq(n_data->action, RSC_PROMOTE)) { required = TRUE; } } expand_list(n_data->promote, &rsc_list, &node_list); g_hash_table_insert(n_data->keys, strdup("notify_promote_resource"), rsc_list); g_hash_table_insert(n_data->keys, strdup("notify_promote_uname"), node_list); if (n_data->active) { n_data->active = g_list_sort(n_data->active, sort_notify_entries); } expand_list(n_data->active, &rsc_list, &node_list); g_hash_table_insert(n_data->keys, strdup("notify_active_resource"), rsc_list); g_hash_table_insert(n_data->keys, strdup("notify_active_uname"), node_list); if (n_data->slave) { n_data->slave = g_list_sort(n_data->slave, sort_notify_entries); } expand_list(n_data->slave, &rsc_list, &node_list); g_hash_table_insert(n_data->keys, strdup("notify_slave_resource"), rsc_list); g_hash_table_insert(n_data->keys, strdup("notify_slave_uname"), node_list); if (n_data->master) { n_data->master = g_list_sort(n_data->master, sort_notify_entries); } expand_list(n_data->master, &rsc_list, &node_list); g_hash_table_insert(n_data->keys, strdup("notify_master_resource"), rsc_list); g_hash_table_insert(n_data->keys, strdup("notify_master_uname"), node_list); if (n_data->inactive) { n_data->inactive = g_list_sort(n_data->inactive, sort_notify_entries); } expand_list(n_data->inactive, &rsc_list, NULL); g_hash_table_insert(n_data->keys, strdup("notify_inactive_resource"), rsc_list); if (required && n_data->pre) { update_action_flags(n_data->pre, pe_action_optional | pe_action_clear); update_action_flags(n_data->pre_done, pe_action_optional | pe_action_clear); } if (required && n_data->post) { update_action_flags(n_data->post, pe_action_optional | pe_action_clear); update_action_flags(n_data->post_done, pe_action_optional | pe_action_clear); } return required; } void create_notifications(resource_t * rsc, notify_data_t * n_data, pe_working_set_t * data_set) { GListPtr gIter = NULL; action_t *stop = NULL; action_t *start = NULL; enum action_tasks task = text2task(n_data->action); if (rsc->children) { gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { resource_t *child = (resource_t *) gIter->data; create_notifications(child, n_data, data_set); } return; } /* Copy notification details into standard ops */ gIter = rsc->actions; for (; gIter != NULL; gIter = gIter->next) { action_t *op = (action_t *) gIter->data; if (is_set(op->flags, pe_action_optional) == FALSE && op->node != NULL) { enum action_tasks t = text2task(op->task); switch (t) { case start_rsc: case stop_rsc: case action_promote: case action_demote: g_hash_table_foreach(n_data->keys, dup_attr, op->meta); break; default: break; } } } crm_trace("Creating notificaitons for: %s.%s (%s->%s)", n_data->action, rsc->id, role2text(rsc->role), role2text(rsc->next_role)); stop = find_first_action(rsc->actions, NULL, RSC_STOP, NULL); start = find_first_action(rsc->actions, NULL, RSC_START, NULL); /* stop / demote */ if (rsc->role != RSC_ROLE_STOPPED) { if (task == stop_rsc || task == action_demote) { gIter = rsc->running_on; for (; gIter != NULL; gIter = gIter->next) { node_t *current_node = (node_t *) gIter->data; pe_notify(rsc, current_node, n_data->pre, n_data->pre_done, n_data, data_set); if (task == action_demote || stop == NULL || is_set(stop->flags, pe_action_optional)) { pe_post_notify(rsc, current_node, n_data, data_set); } } } } /* start / promote */ if (rsc->next_role != RSC_ROLE_STOPPED) { if (rsc->allocated_to == NULL) { pe_proc_err("Next role '%s' but %s is not allocated", role2text(rsc->next_role), rsc->id); } else if (task == start_rsc || task == action_promote) { if (task != start_rsc || start == NULL || is_set(start->flags, pe_action_optional)) { pe_notify(rsc, rsc->allocated_to, n_data->pre, n_data->pre_done, n_data, data_set); } pe_post_notify(rsc, rsc->allocated_to, n_data, data_set); } } } void free_notification_data(notify_data_t * n_data) { if (n_data == NULL) { return; } g_list_free_full(n_data->stop, free); g_list_free_full(n_data->start, free); g_list_free_full(n_data->demote, free); g_list_free_full(n_data->promote, free); g_list_free_full(n_data->master, free); g_list_free_full(n_data->slave, free); g_list_free_full(n_data->active, free); g_list_free_full(n_data->inactive, free); g_hash_table_destroy(n_data->keys); free(n_data); } int transition_id = -1; /* * Create a dependency graph to send to the transitioner (via the CRMd) */ gboolean stage8(pe_working_set_t * data_set) { GListPtr gIter = NULL; const char *value = NULL; transition_id++; crm_trace("Creating transition graph %d.", transition_id); data_set->graph = create_xml_node(NULL, XML_TAG_GRAPH); value = pe_pref(data_set->config_hash, "cluster-delay"); crm_xml_add(data_set->graph, "cluster-delay", value); value = pe_pref(data_set->config_hash, "stonith-timeout"); crm_xml_add(data_set->graph, "stonith-timeout", value); crm_xml_add(data_set->graph, "failed-stop-offset", "INFINITY"); if (is_set(data_set->flags, pe_flag_start_failure_fatal)) { crm_xml_add(data_set->graph, "failed-start-offset", "INFINITY"); } else { crm_xml_add(data_set->graph, "failed-start-offset", "1"); } value = pe_pref(data_set->config_hash, "batch-limit"); crm_xml_add(data_set->graph, "batch-limit", value); crm_xml_add_int(data_set->graph, "transition_id", transition_id); value = pe_pref(data_set->config_hash, "migration-limit"); if (crm_int_helper(value, NULL) > 0) { crm_xml_add(data_set->graph, "migration-limit", value); } /* errors... slist_iter(action, action_t, action_list, lpc, if(action->optional == FALSE && action->runnable == FALSE) { print_action("Ignoring", action, TRUE); } ); */ gIter = data_set->resources; for (; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; crm_trace("processing actions for rsc=%s", rsc->id); rsc->cmds->expand(rsc, data_set); } crm_log_xml_trace(data_set->graph, "created resource-driven action list"); /* catch any non-resource specific actions */ crm_trace("processing non-resource actions"); gIter = data_set->actions; for (; gIter != NULL; gIter = gIter->next) { action_t *action = (action_t *) gIter->data; if (action->rsc && action->node && action->node->details->shutdown && is_not_set(data_set->flags, pe_flag_maintenance_mode) && is_not_set(action->flags, pe_action_optional) && is_not_set(action->flags, pe_action_runnable) && crm_str_eq(action->task, RSC_STOP, TRUE) ) { crm_crit("Cannot shut down node '%s' because of %s:%s%s", action->node->details->uname, action->rsc->id, is_not_set(action->rsc->flags, pe_rsc_managed)?" unmanaged":" blocked", is_set(action->rsc->flags, pe_rsc_failed)?" failed":""); } graph_element_from_action(action, data_set); } crm_log_xml_trace(data_set->graph, "created generic action list"); crm_trace("Created transition graph %d.", transition_id); return TRUE; } void cleanup_alloc_calculations(pe_working_set_t * data_set) { if (data_set == NULL) { return; } crm_trace("deleting %d order cons: %p", g_list_length(data_set->ordering_constraints), data_set->ordering_constraints); pe_free_ordering(data_set->ordering_constraints); data_set->ordering_constraints = NULL; crm_trace("deleting %d node cons: %p", g_list_length(data_set->placement_constraints), data_set->placement_constraints); pe_free_rsc_to_node(data_set->placement_constraints); data_set->placement_constraints = NULL; crm_trace("deleting %d inter-resource cons: %p", g_list_length(data_set->colocation_constraints), data_set->colocation_constraints); g_list_free_full(data_set->colocation_constraints, free); data_set->colocation_constraints = NULL; crm_trace("deleting %d ticket deps: %p", g_list_length(data_set->ticket_constraints), data_set->ticket_constraints); g_list_free_full(data_set->ticket_constraints, free); data_set->ticket_constraints = NULL; cleanup_calculations(data_set); } diff --git a/pengine/allocate.h b/pengine/allocate.h index a28b1314ad..6e0f220bb3 100644 --- a/pengine/allocate.h +++ b/pengine/allocate.h @@ -1,193 +1,193 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef CRM_PENGINE_COMPLEX_ALLOC__H # define CRM_PENGINE_COMPLEX_ALLOC__H # include # include # include # include # include # include typedef struct notify_entry_s { resource_t *rsc; node_t *node; } notify_entry_t; struct resource_alloc_functions_s { GHashTable *(*merge_weights) (resource_t *, const char *, GHashTable *, const char *, int, gboolean, gboolean); node_t *(*allocate) (resource_t *, node_t *, pe_working_set_t *); void (*create_actions) (resource_t *, pe_working_set_t *); gboolean(*create_probe) (resource_t *, node_t *, action_t *, gboolean, pe_working_set_t *); void (*internal_constraints) (resource_t *, pe_working_set_t *); void (*rsc_colocation_lh) (resource_t *, resource_t *, rsc_colocation_t *); void (*rsc_colocation_rh) (resource_t *, resource_t *, rsc_colocation_t *); void (*rsc_location) (resource_t *, rsc_to_node_t *); enum pe_action_flags (*action_flags) (action_t *, node_t *); enum pe_graph_flags (*update_actions) (action_t *, action_t *, node_t *, enum pe_action_flags, enum pe_action_flags, enum pe_ordering); void (*expand) (resource_t *, pe_working_set_t *); void (*append_meta) (resource_t * rsc, xmlNode * xml); }; extern GHashTable *rsc_merge_weights(resource_t * rsc, const char *rhs, GHashTable * nodes, const char *attr, int factor, enum pe_weights flags); extern GHashTable *native_merge_weights(resource_t * rsc, const char *rhs, GHashTable * nodes, const char *attr, int factor, gboolean allow_rollback, gboolean only_positive); extern GHashTable *group_merge_weights(resource_t * rsc, const char *rhs, GHashTable * nodes, const char *attr, int factor, gboolean allow_rollback, gboolean only_positive); extern node_t *native_color(resource_t * rsc, node_t * preferred, pe_working_set_t * data_set); extern void native_create_actions(resource_t * rsc, pe_working_set_t * data_set); extern void native_internal_constraints(resource_t * rsc, pe_working_set_t * data_set); extern void native_rsc_colocation_lh(resource_t * lh_rsc, resource_t * rh_rsc, rsc_colocation_t * constraint); extern void native_rsc_colocation_rh(resource_t * lh_rsc, resource_t * rh_rsc, rsc_colocation_t * constraint); extern void rsc_ticket_constraint(resource_t * lh_rsc, rsc_ticket_t * rsc_ticket, pe_working_set_t * data_set); extern enum pe_action_flags native_action_flags(action_t * action, node_t * node); extern void native_rsc_location(resource_t * rsc, rsc_to_node_t * constraint); extern void native_expand(resource_t * rsc, pe_working_set_t * data_set); extern void native_dump(resource_t * rsc, const char *pre_text, gboolean details); extern void create_notify_element(resource_t * rsc, action_t * op, notify_data_t * n_data, pe_working_set_t * data_set); extern gboolean native_create_probe(resource_t * rsc, node_t * node, action_t * complete, gboolean force, pe_working_set_t * data_set); extern void native_append_meta(resource_t * rsc, xmlNode * xml); extern int group_num_allowed_nodes(resource_t * rsc); extern node_t *group_color(resource_t * rsc, node_t * preferred, pe_working_set_t * data_set); extern void group_create_actions(resource_t * rsc, pe_working_set_t * data_set); extern void group_internal_constraints(resource_t * rsc, pe_working_set_t * data_set); extern void group_rsc_colocation_lh(resource_t * lh_rsc, resource_t * rh_rsc, rsc_colocation_t * constraint); extern void group_rsc_colocation_rh(resource_t * lh_rsc, resource_t * rh_rsc, rsc_colocation_t * constraint); extern enum pe_action_flags group_action_flags(action_t * action, node_t * node); extern void group_rsc_location(resource_t * rsc, rsc_to_node_t * constraint); extern void group_expand(resource_t * rsc, pe_working_set_t * data_set); extern void group_append_meta(resource_t * rsc, xmlNode * xml); extern int clone_num_allowed_nodes(resource_t * rsc); extern node_t *clone_color(resource_t * rsc, node_t * preferred, pe_working_set_t * data_set); extern void clone_create_actions(resource_t * rsc, pe_working_set_t * data_set); extern void clone_internal_constraints(resource_t * rsc, pe_working_set_t * data_set); extern void clone_rsc_colocation_lh(resource_t * lh_rsc, resource_t * rh_rsc, rsc_colocation_t * constraint); extern void clone_rsc_colocation_rh(resource_t * lh_rsc, resource_t * rh_rsc, rsc_colocation_t * constraint); extern void clone_rsc_location(resource_t * rsc, rsc_to_node_t * constraint); extern enum pe_action_flags clone_action_flags(action_t * action, node_t * node); extern void clone_expand(resource_t * rsc, pe_working_set_t * data_set); extern gboolean clone_create_probe(resource_t * rsc, node_t * node, action_t * complete, gboolean force, pe_working_set_t * data_set); extern void clone_append_meta(resource_t * rsc, xmlNode * xml); extern gboolean master_unpack(resource_t * rsc, pe_working_set_t * data_set); extern node_t *master_color(resource_t * rsc, node_t * preferred, pe_working_set_t * data_set); extern void master_create_actions(resource_t * rsc, pe_working_set_t * data_set); extern void master_internal_constraints(resource_t * rsc, pe_working_set_t * data_set); extern void master_rsc_colocation_rh(resource_t * lh_rsc, resource_t * rh_rsc, rsc_colocation_t * constraint); extern void master_append_meta(resource_t * rsc, xmlNode * xml); /* extern resource_object_functions_t resource_variants[]; */ extern resource_alloc_functions_t resource_class_alloc_functions[]; extern gboolean is_active(rsc_to_node_t * cons); extern gboolean native_constraint_violated(resource_t * rsc_lh, resource_t * rsc_rh, rsc_colocation_t * constraint); extern gboolean unpack_rsc_to_attr(xmlNode * xml_obj, pe_working_set_t * data_set); extern gboolean unpack_rsc_to_node(xmlNode * xml_obj, pe_working_set_t * data_set); extern gboolean unpack_rsc_order(xmlNode * xml_obj, pe_working_set_t * data_set); extern gboolean unpack_rsc_colocation(xmlNode * xml_obj, pe_working_set_t * data_set); extern gboolean unpack_rsc_location(xmlNode * xml_obj, pe_working_set_t * data_set); extern gboolean unpack_rsc_ticket(xmlNode * xml_obj, pe_working_set_t * data_set); extern void LogActions(resource_t * rsc, pe_working_set_t * data_set, gboolean terminal); extern void cleanup_alloc_calculations(pe_working_set_t * data_set); extern notify_data_t *create_notification_boundaries(resource_t * rsc, const char *action, action_t * start, action_t * end, pe_working_set_t * data_set); extern void collect_notification_data(resource_t * rsc, gboolean state, gboolean activity, notify_data_t * n_data); extern gboolean expand_notification_data(notify_data_t * n_data); extern void create_notifications(resource_t * rsc, notify_data_t * n_data, pe_working_set_t * data_set); extern void free_notification_data(notify_data_t * n_data); extern void rsc_migrate_reload(resource_t * rsc, pe_working_set_t * data_set); extern void rsc_stonith_ordering(resource_t * rsc, action_t * stonith_op, pe_working_set_t * data_set); extern enum pe_graph_flags native_update_actions(action_t * first, action_t * then, node_t * node, enum pe_action_flags flags, enum pe_action_flags filter, enum pe_ordering type); extern enum pe_graph_flags group_update_actions(action_t * first, action_t * then, node_t * node, enum pe_action_flags flags, enum pe_action_flags filter, enum pe_ordering type); extern enum pe_graph_flags clone_update_actions(action_t * first, action_t * then, node_t * node, enum pe_action_flags flags, enum pe_action_flags filter, enum pe_ordering type); static inline gboolean update_action_flags(action_t * action, enum pe_action_flags flags) { gboolean changed = FALSE; gboolean clear = is_set(flags, pe_action_clear); enum pe_action_flags last = action->flags; if (clear) { pe_clear_action_bit(action, flags); } else { pe_set_action_bit(action, flags); } if (last != action->flags) { changed = TRUE; - clear_bit_inplace(flags, pe_action_clear); + clear_bit(flags, pe_action_clear); crm_trace("%s on %s: %sset flags 0x%.6x (was 0x%.6x, now 0x%.6x)", action->uuid, action->node ? action->node->details->uname : "[none]", clear ? "un-" : "", flags, last, action->flags); } return changed; } #endif diff --git a/pengine/graph.c b/pengine/graph.c index 3d3ccc247d..aa05e2223b 100644 --- a/pengine/graph.c +++ b/pengine/graph.c @@ -1,896 +1,896 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include gboolean update_action(action_t * action); gboolean rsc_update_action(action_t * first, action_t * then, enum pe_ordering type); static enum pe_action_flags get_action_flags(action_t * action, node_t * node) { enum pe_action_flags flags = action->flags; if (action->rsc) { flags = action->rsc->cmds->action_flags(action, NULL); if (action->rsc->variant >= pe_clone && node) { /* We only care about activity on $node */ enum pe_action_flags clone_flags = action->rsc->cmds->action_flags(action, node); /* Go to great lengths to ensure the correct value for pe_action_runnable... * * If we are a clone, then for _ordering_ constraints, its only relevant * if we are runnable _anywhere_. * * This only applies to _runnable_ though, and only for ordering constraints. * If this function is ever used during colocation, then we'll need additional logic * * Not very satisfying, but its logical and appears to work well. */ if (is_not_set(clone_flags, pe_action_runnable) && is_set(flags, pe_action_runnable)) { crm_trace("Fixing up runnable flag for %s", action->uuid); - set_bit_inplace(clone_flags, pe_action_runnable); + set_bit(clone_flags, pe_action_runnable); } flags = clone_flags; } } return flags; } static char * convert_non_atomic_uuid(char *old_uuid, resource_t * rsc, gboolean allow_notify, gboolean free_original) { int interval = 0; char *uuid = NULL; char *rid = NULL; char *raw_task = NULL; int task = no_action; crm_trace("Processing %s", old_uuid); if (old_uuid == NULL) { return NULL; } else if (strstr(old_uuid, "notify") != NULL) { goto done; /* no conversion */ } else if (rsc->variant < pe_group) { goto done; /* no conversion */ } CRM_ASSERT(parse_op_key(old_uuid, &rid, &raw_task, &interval)); if (interval > 0) { goto done; /* no conversion */ } task = text2task(raw_task); switch (task) { case stop_rsc: case start_rsc: case action_notify: case action_promote: case action_demote: break; case stopped_rsc: case started_rsc: case action_notified: case action_promoted: case action_demoted: task--; break; case monitor_rsc: case shutdown_crm: case stonith_node: task = no_action; break; default: crm_err("Unknown action: %s", raw_task); task = no_action; break; } if (task != no_action) { if (is_set(rsc->flags, pe_rsc_notify) && allow_notify) { uuid = generate_notify_key(rid, "confirmed-post", task2text(task + 1)); } else { uuid = generate_op_key(rid, task2text(task + 1), 0); } crm_trace("Converted %s -> %s", old_uuid, uuid); } done: if (uuid == NULL) { uuid = strdup(old_uuid); } if (free_original) { free(old_uuid); } free(raw_task); free(rid); return uuid; } static action_t * rsc_expand_action(action_t * action) { action_t *result = action; if (action->rsc && action->rsc->variant >= pe_group) { /* Expand 'start' -> 'started' */ char *uuid = NULL; gboolean notify = FALSE; if (action->rsc->parent == NULL) { /* Only outter-most resources have notification actions */ notify = is_set(action->rsc->flags, pe_rsc_notify); } uuid = convert_non_atomic_uuid(action->uuid, action->rsc, notify, FALSE); if (uuid) { crm_trace("Converting %s to %s %d", action->uuid, uuid, is_set(action->rsc->flags, pe_rsc_notify)); result = find_first_action(action->rsc->actions, uuid, NULL, NULL); if (result == NULL) { crm_err("Couldn't expand %s", action->uuid); result = action; } free(uuid); } } return result; } static enum pe_graph_flags graph_update_action(action_t * first, action_t * then, node_t * node, enum pe_action_flags flags, enum pe_ordering type) { enum pe_graph_flags changed = pe_graph_none; gboolean processed = FALSE; /* TODO: Do as many of these in parallel as possible */ if (type & pe_order_implies_then) { processed = TRUE; if (then->rsc) { changed |= then->rsc->cmds->update_actions(first, then, node, flags & pe_action_optional, pe_action_optional, pe_order_implies_then); } else if (is_set(flags, pe_action_optional) == FALSE) { if (update_action_flags(then, pe_action_optional | pe_action_clear)) { changed |= pe_graph_updated_then; } } crm_trace("implies right: %s then %s%s", first->uuid, then->uuid, changed?": changed":""); } if ((type & pe_order_restart) && then->rsc) { enum pe_action_flags restart = (pe_action_optional | pe_action_runnable); processed = TRUE; changed |= then->rsc->cmds->update_actions(first, then, node, flags, restart, pe_order_restart); crm_trace("restart: %s then %s%s", first->uuid, then->uuid, changed?": changed":""); } if (type & pe_order_implies_first) { processed = TRUE; if (first->rsc) { changed |= first->rsc->cmds->update_actions(first, then, node, flags, pe_action_optional, pe_order_implies_first); } else if (is_set(flags, pe_action_optional) == FALSE) { if (update_action_flags(first, pe_action_runnable | pe_action_clear)) { changed |= pe_graph_updated_first; } } crm_trace("implies left: %s then %s%s", first->uuid, then->uuid, changed?": changed":""); } if (type & pe_order_implies_first_master) { processed = TRUE; if (then->rsc) { changed |= then->rsc->cmds->update_actions(first, then, node, flags & pe_action_optional, pe_action_optional, pe_order_implies_first_master); } crm_trace("implies left when right rsc is Master role: %s then %s%s", first->uuid, then->uuid, changed?": changed":""); } if (type & pe_order_one_or_more) { processed = TRUE; if (then->rsc) { changed |= then->rsc->cmds->update_actions(first, then, node, flags, pe_action_runnable, pe_order_one_or_more); } else if (is_set(flags, pe_action_runnable)) { if (update_action_flags(then, pe_action_runnable)) { changed |= pe_graph_updated_then; } } crm_trace("runnable_one_or_more: %s then %s%s", first->uuid, then->uuid, changed?": changed":""); } if (type & pe_order_runnable_left) { processed = TRUE; if (then->rsc) { changed |= then->rsc->cmds->update_actions(first, then, node, flags, pe_action_runnable, pe_order_runnable_left); } else if (is_set(flags, pe_action_runnable) == FALSE) { if (update_action_flags(then, pe_action_runnable | pe_action_clear)) { changed |= pe_graph_updated_then; } } crm_trace("runnable: %s then %s%s", first->uuid, then->uuid, changed?": changed":""); } if (type & pe_order_optional) { processed = TRUE; if (then->rsc) { changed |= then->rsc->cmds->update_actions(first, then, node, flags, pe_action_runnable, pe_order_optional); } crm_trace("optional: %s then %s%s", first->uuid, then->uuid, changed?": changed":""); } if (type & pe_order_asymmetrical) { crm_trace("asymmetrical: %s then %s", first->uuid, then->uuid); processed = TRUE; if (then->rsc) { changed |= then->rsc->cmds->update_actions(first, then, node, flags, pe_action_runnable, pe_order_asymmetrical); } } if ((first->flags & pe_action_runnable) && (type & pe_order_implies_then_printed) && (flags & pe_action_optional) == 0) { processed = TRUE; crm_trace("%s implies %s printed", first->uuid, then->uuid); update_action_flags(then, pe_action_print_always); /* dont care about changed */ } if ((type & pe_order_implies_first_printed) && (flags & pe_action_optional) == 0) { processed = TRUE; crm_trace("%s implies %s printed", then->uuid, first->uuid); update_action_flags(first, pe_action_print_always); /* dont care about changed */ } if (processed == FALSE) { crm_trace("Constraint 0x%.6x not applicable", type); } return changed; } gboolean update_action(action_t * then) { GListPtr lpc = NULL; enum pe_graph_flags changed = pe_graph_none; int last_flags = then->flags; crm_trace("Processing %s (%s %s %s)", then->uuid, is_set(then->flags, pe_action_optional) ? "optional" : "required", is_set(then->flags, pe_action_runnable) ? "runnable" : "unrunnable", is_set(then->flags, pe_action_pseudo) ? "pseudo" : then->node ? then->node->details->uname : ""); if (is_set(then->flags, pe_action_requires_any)) { - clear_bit_inplace(then->flags, pe_action_runnable); + clear_bit(then->flags, pe_action_runnable); } for (lpc = then->actions_before; lpc != NULL; lpc = lpc->next) { action_wrapper_t *other = (action_wrapper_t *) lpc->data; action_t *first = other->action; node_t *then_node = then->node; node_t *first_node = first->node; enum pe_action_flags then_flags = 0; enum pe_action_flags first_flags = 0; if (first->rsc && first->rsc->variant == pe_group && safe_str_eq(first->task, RSC_START)) { first_node = first->rsc->fns->location(first->rsc, NULL, FALSE); if (first_node) { crm_trace("First: Found node %s for %s", first_node->details->uname, first->uuid); } } if (then->rsc && then->rsc->variant == pe_group && safe_str_eq(then->task, RSC_START)) { then_node = then->rsc->fns->location(then->rsc, NULL, FALSE); if (then_node) { crm_trace("Then: Found node %s for %s", then_node->details->uname, then->uuid); } } - clear_bit_inplace(changed, pe_graph_updated_first); + clear_bit(changed, pe_graph_updated_first); if (first->rsc != then->rsc && first->rsc != NULL && then->rsc != NULL && first->rsc != then->rsc->parent) { first = rsc_expand_action(first); } if (first != other->action) { crm_trace("Ordering %s afer %s instead of %s", then->uuid, first->uuid, other->action->uuid); } first_flags = get_action_flags(first, then_node); then_flags = get_action_flags(then, first_node); crm_trace("Checking %s (%s %s %s) against %s (%s %s %s) 0x%.6x", then->uuid, is_set(then_flags, pe_action_optional) ? "optional" : "required", is_set(then_flags, pe_action_runnable) ? "runnable" : "unrunnable", is_set(then_flags, pe_action_pseudo) ? "pseudo" : then->node ? then->node->details-> uname : "", first->uuid, is_set(first_flags, pe_action_optional) ? "optional" : "required", is_set(first_flags, pe_action_runnable) ? "runnable" : "unrunnable", is_set(first_flags, pe_action_pseudo) ? "pseudo" : first->node ? first->node->details-> uname : "", other->type); if (first == other->action) { - clear_bit_inplace(first_flags, pe_action_pseudo); + clear_bit(first_flags, pe_action_pseudo); changed |= graph_update_action(first, then, then->node, first_flags, other->type); } else if (order_actions(first, then, other->type)) { /* Start again to get the new actions_before list */ changed |= (pe_graph_updated_then | pe_graph_disable); } if (changed & pe_graph_disable) { crm_trace("Disabled constraint %s -> %s", other->action->uuid, then->uuid); - clear_bit_inplace(changed, pe_graph_disable); + clear_bit(changed, pe_graph_disable); other->type = pe_order_none; } if (changed & pe_graph_updated_first) { GListPtr lpc2 = NULL; crm_trace("Updated %s (first %s %s %s), processing dependants ", first->uuid, is_set(first->flags, pe_action_optional) ? "optional" : "required", is_set(first->flags, pe_action_runnable) ? "runnable" : "unrunnable", is_set(first->flags, pe_action_pseudo) ? "pseudo" : first->node ? first->node->details-> uname : ""); for (lpc2 = first->actions_after; lpc2 != NULL; lpc2 = lpc2->next) { action_wrapper_t *other = (action_wrapper_t *) lpc2->data; update_action(other->action); } update_action(first); } } if (is_set(then->flags, pe_action_requires_any)) { if (last_flags != then->flags) { changed |= pe_graph_updated_then; } else { - clear_bit_inplace(changed, pe_graph_updated_then); + clear_bit(changed, pe_graph_updated_then); } } if (changed & pe_graph_updated_then) { crm_trace("Updated %s (then %s %s %s), processing dependants ", then->uuid, is_set(then->flags, pe_action_optional) ? "optional" : "required", is_set(then->flags, pe_action_runnable) ? "runnable" : "unrunnable", is_set(then->flags, pe_action_pseudo) ? "pseudo" : then->node ? then->node->details-> uname : ""); update_action(then); for (lpc = then->actions_after; lpc != NULL; lpc = lpc->next) { action_wrapper_t *other = (action_wrapper_t *) lpc->data; update_action(other->action); } } return FALSE; } gboolean shutdown_constraints(node_t * node, action_t * shutdown_op, pe_working_set_t * data_set) { /* add the stop to the before lists so it counts as a pre-req * for the shutdown */ GListPtr lpc = NULL; for (lpc = data_set->actions; lpc != NULL; lpc = lpc->next) { action_t *action = (action_t *) lpc->data; if (action->rsc == NULL || action->node == NULL) { continue; } else if(action->node->details != node->details) { continue; } else if(is_set(data_set->flags, pe_flag_maintenance_mode)) { crm_trace("Skipping %s: maintainence mode", action->uuid); continue; } else if(safe_str_neq(action->task, RSC_STOP)) { continue; } else if(is_not_set(action->rsc->flags, pe_rsc_managed) && is_not_set(action->rsc->flags, pe_rsc_block)) { /* * If another action depends on this one, we may still end up blocking */ crm_trace("Skipping %s: unmanaged", action->uuid); continue; } crm_trace("Ordering %s before shutdown on %s", action->uuid, node->details->uname); - clear_bit_inplace(action->flags, pe_action_optional); + clear_bit(action->flags, pe_action_optional); custom_action_order(action->rsc, NULL, action, NULL, strdup(CRM_OP_SHUTDOWN), shutdown_op, pe_order_optional|pe_order_runnable_left, data_set); } return TRUE; } gboolean stonith_constraints(node_t * node, action_t * stonith_op, pe_working_set_t * data_set) { CRM_CHECK(stonith_op != NULL, return FALSE); /* * Make sure the stonith OP occurs before we start any shared resources */ if (stonith_op != NULL) { GListPtr lpc = NULL; for (lpc = data_set->resources; lpc != NULL; lpc = lpc->next) { resource_t *rsc = (resource_t *) lpc->data; rsc_stonith_ordering(rsc, stonith_op, data_set); } } /* add the stonith OP as a stop pre-req and the mark the stop * as a pseudo op - since its now redundant */ return TRUE; } xmlNode * action2xml(action_t * action, gboolean as_input) { gboolean needs_node_info = TRUE; xmlNode *action_xml = NULL; xmlNode *args_xml = NULL; char *action_id_s = NULL; if (action == NULL) { return NULL; } crm_trace("Dumping action %d as XML", action->id); if (safe_str_eq(action->task, CRM_OP_FENCE)) { action_xml = create_xml_node(NULL, XML_GRAPH_TAG_CRM_EVENT); /* needs_node_info = FALSE; */ } else if (safe_str_eq(action->task, CRM_OP_SHUTDOWN)) { action_xml = create_xml_node(NULL, XML_GRAPH_TAG_CRM_EVENT); } else if (safe_str_eq(action->task, CRM_OP_CLEAR_FAILCOUNT)) { action_xml = create_xml_node(NULL, XML_GRAPH_TAG_CRM_EVENT); } else if (safe_str_eq(action->task, CRM_OP_LRM_REFRESH)) { action_xml = create_xml_node(NULL, XML_GRAPH_TAG_CRM_EVENT); /* } else if(safe_str_eq(action->task, RSC_PROBED)) { */ /* action_xml = create_xml_node(NULL, XML_GRAPH_TAG_CRM_EVENT); */ } else if (is_set(action->flags, pe_action_pseudo)) { action_xml = create_xml_node(NULL, XML_GRAPH_TAG_PSEUDO_EVENT); needs_node_info = FALSE; } else { action_xml = create_xml_node(NULL, XML_GRAPH_TAG_RSC_OP); } action_id_s = crm_itoa(action->id); crm_xml_add(action_xml, XML_ATTR_ID, action_id_s); free(action_id_s); crm_xml_add(action_xml, XML_LRM_ATTR_TASK, action->task); if (action->rsc != NULL && action->rsc->clone_name != NULL) { char *clone_key = NULL; const char *interval_s = g_hash_table_lookup(action->meta, "interval"); int interval = crm_parse_int(interval_s, "0"); if (safe_str_eq(action->task, RSC_NOTIFY)) { const char *n_type = g_hash_table_lookup(action->meta, "notify_type"); const char *n_task = g_hash_table_lookup(action->meta, "notify_operation"); CRM_CHECK(n_type != NULL, crm_err("No notify type value found for %s", action->uuid)); CRM_CHECK(n_task != NULL, crm_err("No notify operation value found for %s", action->uuid)); clone_key = generate_notify_key(action->rsc->clone_name, n_type, n_task); } else { clone_key = generate_op_key(action->rsc->clone_name, action->task, interval); } CRM_CHECK(clone_key != NULL, crm_err("Could not generate a key for %s", action->uuid)); crm_xml_add(action_xml, XML_LRM_ATTR_TASK_KEY, clone_key); crm_xml_add(action_xml, "internal_" XML_LRM_ATTR_TASK_KEY, action->uuid); free(clone_key); } else { crm_xml_add(action_xml, XML_LRM_ATTR_TASK_KEY, action->uuid); } if (needs_node_info && action->node != NULL) { crm_xml_add(action_xml, XML_LRM_ATTR_TARGET, action->node->details->uname); crm_xml_add(action_xml, XML_LRM_ATTR_TARGET_UUID, action->node->details->id); } if (is_set(action->flags, pe_action_failure_is_fatal) == FALSE) { add_hash_param(action->meta, XML_ATTR_TE_ALLOWFAIL, XML_BOOLEAN_TRUE); } if (as_input) { return action_xml; } if (action->rsc) { if (is_set(action->flags, pe_action_pseudo) == FALSE) { int lpc = 0; xmlNode *rsc_xml = create_xml_node(action_xml, crm_element_name(action->rsc->xml)); const char *attr_list[] = { XML_AGENT_ATTR_CLASS, XML_AGENT_ATTR_PROVIDER, XML_ATTR_TYPE }; if (action->rsc->clone_name != NULL) { crm_debug("Using clone name %s for %s", action->rsc->clone_name, action->rsc->id); crm_xml_add(rsc_xml, XML_ATTR_ID, action->rsc->clone_name); crm_xml_add(rsc_xml, XML_ATTR_ID_LONG, action->rsc->id); } else { crm_xml_add(rsc_xml, XML_ATTR_ID, action->rsc->id); crm_xml_add(rsc_xml, XML_ATTR_ID_LONG, action->rsc->long_name); } for (lpc = 0; lpc < DIMOF(attr_list); lpc++) { crm_xml_add(rsc_xml, attr_list[lpc], g_hash_table_lookup(action->rsc->meta, attr_list[lpc])); } } } args_xml = create_xml_node(NULL, XML_TAG_ATTRS); crm_xml_add(args_xml, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); g_hash_table_foreach(action->extra, hash2field, args_xml); if (action->rsc != NULL) { g_hash_table_foreach(action->rsc->parameters, hash2smartfield, args_xml); } g_hash_table_foreach(action->meta, hash2metafield, args_xml); if (action->rsc != NULL) { resource_t *parent = action->rsc; while (parent != NULL) { parent->cmds->append_meta(parent, args_xml); parent = parent->parent; } } else if (safe_str_eq(action->task, CRM_OP_FENCE)) { g_hash_table_foreach(action->node->details->attrs, hash2metafield, args_xml); } sorted_xml(args_xml, action_xml, FALSE); crm_log_xml_trace(action_xml, "dumped action"); free_xml(args_xml); return action_xml; } static gboolean should_dump_action(action_t * action) { CRM_CHECK(action != NULL, return FALSE); if (is_set(action->flags, pe_action_dumped)) { crm_trace( "action %d (%s) was already dumped", action->id, action->uuid); return FALSE; } else if (is_set(action->flags, pe_action_runnable) == FALSE) { crm_trace( "action %d (%s) was not runnable", action->id, action->uuid); return FALSE; } else if (is_set(action->flags, pe_action_optional) && is_set(action->flags, pe_action_print_always) == FALSE) { crm_trace( "action %d (%s) was optional", action->id, action->uuid); return FALSE; } else if (action->rsc != NULL && is_not_set(action->rsc->flags, pe_rsc_managed)) { const char *interval = NULL; interval = g_hash_table_lookup(action->meta, XML_LRM_ATTR_INTERVAL); /* make sure probes and recurring monitors go through */ if (safe_str_neq(action->task, RSC_STATUS) && interval == NULL) { crm_trace( "action %d (%s) was for an unmanaged resource (%s)", action->id, action->uuid, action->rsc->id); return FALSE; } } if (is_set(action->flags, pe_action_pseudo) || safe_str_eq(action->task, CRM_OP_FENCE) || safe_str_eq(action->task, CRM_OP_SHUTDOWN)) { /* skip the next checks */ return TRUE; } if (action->node == NULL) { pe_err("action %d (%s) was not allocated", action->id, action->uuid); log_action(LOG_DEBUG, "Unallocated action", action, FALSE); return FALSE; } else if (action->node->details->online == FALSE) { pe_err("action %d was (%s) scheduled for offline node", action->id, action->uuid); log_action(LOG_DEBUG, "Action for offline node", action, FALSE); return FALSE; #if 0 /* but this would also affect resources that can be safely * migrated before a fencing op */ } else if (action->node->details->unclean == FALSE) { pe_err("action %d was (%s) scheduled for unclean node", action->id, action->uuid); log_action(LOG_DEBUG, "Action for unclean node", action, FALSE); return FALSE; #endif } return TRUE; } /* lowest to highest */ static gint sort_action_id(gconstpointer a, gconstpointer b) { const action_wrapper_t *action_wrapper2 = (const action_wrapper_t *)a; const action_wrapper_t *action_wrapper1 = (const action_wrapper_t *)b; if (a == NULL) { return 1; } if (b == NULL) { return -1; } if (action_wrapper1->action->id > action_wrapper2->action->id) { return -1; } if (action_wrapper1->action->id < action_wrapper2->action->id) { return 1; } return 0; } static gboolean should_dump_input(int last_action, action_t * action, action_wrapper_t * wrapper) { int type = wrapper->type; type &= ~pe_order_implies_first_printed; type &= ~pe_order_implies_then_printed; type &= ~pe_order_optional; wrapper->state = pe_link_not_dumped; if (last_action == wrapper->action->id) { crm_trace( "Input (%d) %s duplicated for %s", wrapper->action->id, wrapper->action->uuid, action->uuid); wrapper->state = pe_link_dup; return FALSE; } else if (wrapper->type == pe_order_none) { crm_trace( "Input (%d) %s suppressed for %s", wrapper->action->id, wrapper->action->uuid, action->uuid); return FALSE; } else if (is_set(wrapper->action->flags, pe_action_runnable) == FALSE && type == pe_order_none && safe_str_neq(wrapper->action->uuid, CRM_OP_PROBED)) { crm_trace( "Input (%d) %s optional (ordering) for %s", wrapper->action->id, wrapper->action->uuid, action->uuid); return FALSE; } else if (is_set(action->flags, pe_action_pseudo) && (wrapper->type & pe_order_stonith_stop)) { crm_trace( "Input (%d) %s suppressed for %s", wrapper->action->id, wrapper->action->uuid, action->uuid); return FALSE; } else if (wrapper->type == pe_order_load) { crm_trace("check load filter %s.%s -> %s.%s", wrapper->action->uuid, wrapper->action->node->details->uname, action->uuid, action->node->details->uname); if (action->rsc && safe_str_eq(action->task, RSC_MIGRATE)) { /* For migrate_to ops, we care about where it has been * allocated to, not where the action will be executed */ if(wrapper->action->node == NULL || action->rsc->allocated_to == NULL || wrapper->action->node->details != action->rsc->allocated_to->details) { /* Check if the actions are for the same node, ignore otherwise */ crm_trace("load filter - migrate"); wrapper->type = pe_order_none; return FALSE; } } else if (wrapper->action->node == NULL || action->node == NULL || wrapper->action->node->details != action->node->details) { /* Check if the actions are for the same node, ignore otherwise */ crm_trace("load filter - node"); wrapper->type = pe_order_none; return FALSE; } else if(is_set(wrapper->action->flags, pe_action_optional)) { /* Check if the pre-req is optional, ignore if so */ crm_trace("load filter - optional"); wrapper->type = pe_order_none; return FALSE; } } else if (wrapper->action->rsc && wrapper->action->rsc != action->rsc && is_set(wrapper->action->rsc->flags, pe_rsc_failed) && is_not_set(wrapper->action->rsc->flags, pe_rsc_managed) && strstr(wrapper->action->uuid, "_stop_0") && action->rsc && action->rsc->variant >= pe_clone) { crm_warn("Ignoring requirement that %s comeplete before %s:" " unmanaged failed resources cannot prevent clone shutdown", wrapper->action->uuid, action->uuid); return FALSE; } else if (is_set(wrapper->action->flags, pe_action_dumped) || should_dump_action(wrapper->action)) { crm_trace( "Input (%d) %s should be dumped for %s", wrapper->action->id, wrapper->action->uuid, action->uuid); goto dump; #if 0 } else if (is_set(wrapper->action->flags, pe_action_runnable) && is_set(wrapper->action->flags, pe_action_pseudo) && wrapper->action->rsc->variant != pe_native) { crm_crit("Input (%d) %s should be dumped for %s", wrapper->action->id, wrapper->action->uuid, action->uuid); goto dump; #endif } else if (is_set(wrapper->action->flags, pe_action_optional) == TRUE && is_set(wrapper->action->flags, pe_action_print_always) == FALSE) { crm_trace( "Input (%d) %s optional for %s", wrapper->action->id, wrapper->action->uuid, action->uuid); crm_trace( "Input (%d) %s n=%p p=%d r=%d o=%d a=%d f=0x%.6x", wrapper->action->id, wrapper->action->uuid, wrapper->action->node, is_set(wrapper->action->flags, pe_action_pseudo), is_set(wrapper->action->flags, pe_action_runnable), is_set(wrapper->action->flags, pe_action_optional), is_set(wrapper->action->flags, pe_action_print_always), wrapper->type); return FALSE; } dump: crm_trace( "Input (%d) %s n=%p p=%d r=%d o=%d a=%d f=0x%.6x dumped for %s", wrapper->action->id, wrapper->action->uuid, wrapper->action->node, is_set(wrapper->action->flags, pe_action_pseudo), is_set(wrapper->action->flags, pe_action_runnable), is_set(wrapper->action->flags, pe_action_optional), is_set(wrapper->action->flags, pe_action_print_always), wrapper->type, action->uuid); return TRUE; } void graph_element_from_action(action_t * action, pe_working_set_t * data_set) { GListPtr lpc = NULL; int last_action = -1; int synapse_priority = 0; xmlNode *syn = NULL; xmlNode *set = NULL; xmlNode *in = NULL; xmlNode *input = NULL; xmlNode *xml_action = NULL; if (should_dump_action(action) == FALSE) { return; } - set_bit_inplace(action->flags, pe_action_dumped); + set_bit(action->flags, pe_action_dumped); syn = create_xml_node(data_set->graph, "synapse"); set = create_xml_node(syn, "action_set"); in = create_xml_node(syn, "inputs"); crm_xml_add_int(syn, XML_ATTR_ID, data_set->num_synapse); data_set->num_synapse++; if (action->rsc != NULL) { synapse_priority = action->rsc->priority; } if (action->priority > synapse_priority) { synapse_priority = action->priority; } if (synapse_priority > 0) { crm_xml_add_int(syn, XML_CIB_ATTR_PRIORITY, synapse_priority); } xml_action = action2xml(action, FALSE); add_node_nocopy(set, crm_element_name(xml_action), xml_action); action->actions_before = g_list_sort(action->actions_before, sort_action_id); for (lpc = action->actions_before; lpc != NULL; lpc = lpc->next) { action_wrapper_t *wrapper = (action_wrapper_t *) lpc->data; if (should_dump_input(last_action, action, wrapper) == FALSE) { continue; } wrapper->state = pe_link_dumped; CRM_CHECK(last_action < wrapper->action->id,; ); last_action = wrapper->action->id; input = create_xml_node(in, "trigger"); xml_action = action2xml(wrapper->action, TRUE); add_node_nocopy(input, crm_element_name(xml_action), xml_action); } } diff --git a/pengine/group.c b/pengine/group.c index 13ad836cc4..e725647ff8 100644 --- a/pengine/group.c +++ b/pengine/group.c @@ -1,514 +1,514 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #define VARIANT_GROUP 1 #include node_t * group_color(resource_t * rsc, node_t * prefer, pe_working_set_t * data_set) { node_t *node = NULL; node_t *group_node = NULL; GListPtr gIter = NULL; group_variant_data_t *group_data = NULL; get_group_variant_data(group_data, rsc); if (is_not_set(rsc->flags, pe_rsc_provisional)) { return rsc->allocated_to; } crm_trace("Processing %s", rsc->id); if (is_set(rsc->flags, pe_rsc_allocating)) { crm_debug("Dependency loop detected involving %s", rsc->id); return NULL; } if (group_data->first_child == NULL) { /* nothign to allocate */ clear_bit(rsc->flags, pe_rsc_provisional); return NULL; } set_bit(rsc->flags, pe_rsc_allocating); rsc->role = group_data->first_child->role; group_data->first_child->rsc_cons = g_list_concat(group_data->first_child->rsc_cons, rsc->rsc_cons); rsc->rsc_cons = NULL; group_data->first_child->rsc_cons_lhs = g_list_concat(group_data->first_child->rsc_cons_lhs, rsc->rsc_cons_lhs); rsc->rsc_cons_lhs = NULL; gIter = rsc->rsc_tickets; for (; gIter != NULL; gIter = gIter->next) { rsc_ticket_t *rsc_ticket = (rsc_ticket_t *) gIter->data; if (rsc_ticket->ticket->granted == FALSE || rsc_ticket->ticket->standby) { rsc_ticket_constraint(rsc, rsc_ticket, data_set); } } dump_node_scores(show_scores ? 0 : scores_log_level, rsc, __PRETTY_FUNCTION__, rsc->allowed_nodes); gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; node = child_rsc->cmds->allocate(child_rsc, prefer, data_set); if (group_node == NULL) { group_node = node; } } rsc->next_role = group_data->first_child->next_role; clear_bit(rsc->flags, pe_rsc_allocating); clear_bit(rsc->flags, pe_rsc_provisional); if (group_data->colocated) { return group_node; } return NULL; } void group_update_pseudo_status(resource_t * parent, resource_t * child); void group_create_actions(resource_t * rsc, pe_working_set_t * data_set) { action_t *op = NULL; const char *value = NULL; GListPtr gIter = rsc->children; crm_trace("Creating actions for %s", rsc->id); for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; child_rsc->cmds->create_actions(child_rsc, data_set); group_update_pseudo_status(rsc, child_rsc); } op = start_action(rsc, NULL, TRUE /* !group_data->child_starting */ ); - set_bit_inplace(op->flags, pe_action_pseudo | pe_action_runnable); + set_bit(op->flags, pe_action_pseudo | pe_action_runnable); op = custom_action(rsc, started_key(rsc), RSC_STARTED, NULL, TRUE /* !group_data->child_starting */ , TRUE, data_set); - set_bit_inplace(op->flags, pe_action_pseudo | pe_action_runnable); + set_bit(op->flags, pe_action_pseudo | pe_action_runnable); op = stop_action(rsc, NULL, TRUE /* !group_data->child_stopping */ ); - set_bit_inplace(op->flags, pe_action_pseudo | pe_action_runnable); + set_bit(op->flags, pe_action_pseudo | pe_action_runnable); op = custom_action(rsc, stopped_key(rsc), RSC_STOPPED, NULL, TRUE /* !group_data->child_stopping */ , TRUE, data_set); - set_bit_inplace(op->flags, pe_action_pseudo | pe_action_runnable); + set_bit(op->flags, pe_action_pseudo | pe_action_runnable); value = g_hash_table_lookup(rsc->meta, "stateful"); if (crm_is_true(value)) { op = custom_action(rsc, demote_key(rsc), RSC_DEMOTE, NULL, TRUE, TRUE, data_set); - set_bit_inplace(op->flags, pe_action_pseudo); - set_bit_inplace(op->flags, pe_action_runnable); + set_bit(op->flags, pe_action_pseudo); + set_bit(op->flags, pe_action_runnable); op = custom_action(rsc, demoted_key(rsc), RSC_DEMOTED, NULL, TRUE, TRUE, data_set); - set_bit_inplace(op->flags, pe_action_pseudo); - set_bit_inplace(op->flags, pe_action_runnable); + set_bit(op->flags, pe_action_pseudo); + set_bit(op->flags, pe_action_runnable); op = custom_action(rsc, promote_key(rsc), RSC_PROMOTE, NULL, TRUE, TRUE, data_set); - set_bit_inplace(op->flags, pe_action_pseudo); - set_bit_inplace(op->flags, pe_action_runnable); + set_bit(op->flags, pe_action_pseudo); + set_bit(op->flags, pe_action_runnable); op = custom_action(rsc, promoted_key(rsc), RSC_PROMOTED, NULL, TRUE, TRUE, data_set); - set_bit_inplace(op->flags, pe_action_pseudo); - set_bit_inplace(op->flags, pe_action_runnable); + set_bit(op->flags, pe_action_pseudo); + set_bit(op->flags, pe_action_runnable); } } void group_update_pseudo_status(resource_t * parent, resource_t * child) { GListPtr gIter = child->actions; group_variant_data_t *group_data = NULL; get_group_variant_data(group_data, parent); if (group_data->ordered == FALSE) { /* If this group is not ordered, then leave the meta-actions as optional */ return; } if (group_data->child_stopping && group_data->child_starting) { return; } for (; gIter != NULL; gIter = gIter->next) { action_t *action = (action_t *) gIter->data; if (is_set(action->flags, pe_action_optional)) { continue; } if (safe_str_eq(RSC_STOP, action->task) && is_set(action->flags, pe_action_runnable)) { group_data->child_stopping = TRUE; crm_trace("Based on %s the group is stopping", action->uuid); } else if (safe_str_eq(RSC_START, action->task) && is_set(action->flags, pe_action_runnable)) { group_data->child_starting = TRUE; crm_trace("Based on %s the group is starting", action->uuid); } } } void group_internal_constraints(resource_t * rsc, pe_working_set_t * data_set) { GListPtr gIter = rsc->children; resource_t *last_rsc = NULL; resource_t *top = uber_parent(rsc); group_variant_data_t *group_data = NULL; get_group_variant_data(group_data, rsc); new_rsc_order(rsc, RSC_STOPPED, rsc, RSC_START, pe_order_optional, data_set); new_rsc_order(rsc, RSC_START, rsc, RSC_STARTED, pe_order_runnable_left, data_set); new_rsc_order(rsc, RSC_STOP, rsc, RSC_STOPPED, pe_order_runnable_left, data_set); for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; int stop = pe_order_none; int stopped = pe_order_implies_then_printed; int start = pe_order_implies_then | pe_order_runnable_left; int started = pe_order_runnable_left | pe_order_implies_then | pe_order_implies_then_printed; child_rsc->cmds->internal_constraints(child_rsc, data_set); if (last_rsc == NULL) { if (group_data->ordered) { stop |= pe_order_optional; stopped = pe_order_implies_then; } } else if (group_data->colocated) { rsc_colocation_new("group:internal_colocation", NULL, INFINITY, child_rsc, last_rsc, NULL, NULL, data_set); } if (top->variant == pe_master) { new_rsc_order(rsc, RSC_DEMOTE, child_rsc, RSC_DEMOTE, stop | pe_order_implies_first_printed, data_set); new_rsc_order(child_rsc, RSC_DEMOTE, rsc, RSC_DEMOTED, stopped, data_set); new_rsc_order(child_rsc, RSC_PROMOTE, rsc, RSC_PROMOTED, started, data_set); new_rsc_order(rsc, RSC_PROMOTE, child_rsc, RSC_PROMOTE, pe_order_implies_first_printed, data_set); } order_start_start(rsc, child_rsc, pe_order_implies_first_printed); order_stop_stop(rsc, child_rsc, stop | pe_order_implies_first_printed); new_rsc_order(child_rsc, RSC_STOP, rsc, RSC_STOPPED, stopped, data_set); new_rsc_order(child_rsc, RSC_START, rsc, RSC_STARTED, started, data_set); if (group_data->ordered == FALSE) { order_start_start(rsc, child_rsc, start | pe_order_implies_first_printed); if (top->variant == pe_master) { new_rsc_order(rsc, RSC_PROMOTE, child_rsc, RSC_PROMOTE, start | pe_order_implies_first_printed, data_set); } } else if (last_rsc != NULL) { child_rsc->restart_type = pe_restart_restart; order_start_start(last_rsc, child_rsc, start); order_stop_stop(child_rsc, last_rsc, pe_order_optional|pe_order_restart); if (top->variant == pe_master) { new_rsc_order(last_rsc, RSC_PROMOTE, child_rsc, RSC_PROMOTE, start, data_set); new_rsc_order(child_rsc, RSC_DEMOTE, last_rsc, RSC_DEMOTE, pe_order_optional, data_set); } } else { /* If anyone in the group is starting, then * pe_order_implies_then will cause _everyone_ in the group * to be sent a start action * But this is safe since starting something that is already * started is required to be "safe" */ int flags = pe_order_none; order_start_start(rsc, child_rsc, flags); if (top->variant == pe_master) { new_rsc_order(rsc, RSC_PROMOTE, child_rsc, RSC_PROMOTE, flags, data_set); } } last_rsc = child_rsc; } if (group_data->ordered && last_rsc != NULL) { int stop_stop_flags = pe_order_implies_then; int stop_stopped_flags = pe_order_optional; order_stop_stop(rsc, last_rsc, stop_stop_flags); new_rsc_order(last_rsc, RSC_STOP, rsc, RSC_STOPPED, stop_stopped_flags, data_set); if (top->variant == pe_master) { new_rsc_order(rsc, RSC_DEMOTE, last_rsc, RSC_DEMOTE, stop_stop_flags, data_set); new_rsc_order(last_rsc, RSC_DEMOTE, rsc, RSC_DEMOTED, stop_stopped_flags, data_set); } } } void group_rsc_colocation_lh(resource_t * rsc_lh, resource_t * rsc_rh, rsc_colocation_t * constraint) { GListPtr gIter = NULL; group_variant_data_t *group_data = NULL; if (rsc_lh == NULL) { pe_err("rsc_lh was NULL for %s", constraint->id); return; } else if (rsc_rh == NULL) { pe_err("rsc_rh was NULL for %s", constraint->id); return; } gIter = rsc_lh->children; crm_trace("Processing constraints from %s", rsc_lh->id); get_group_variant_data(group_data, rsc_lh); if (group_data->colocated) { group_data->first_child->cmds->rsc_colocation_lh(group_data->first_child, rsc_rh, constraint); return; } else if (constraint->score >= INFINITY) { crm_config_err("%s: Cannot perform manditory colocation" " between non-colocated group and %s", rsc_lh->id, rsc_rh->id); return; } for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; child_rsc->cmds->rsc_colocation_lh(child_rsc, rsc_rh, constraint); } } void group_rsc_colocation_rh(resource_t * rsc_lh, resource_t * rsc_rh, rsc_colocation_t * constraint) { GListPtr gIter = rsc_rh->children; group_variant_data_t *group_data = NULL; get_group_variant_data(group_data, rsc_rh); CRM_CHECK(rsc_lh->variant == pe_native, return); crm_trace("Processing RH of constraint %s", constraint->id); print_resource(LOG_DEBUG_3, "LHS", rsc_lh, TRUE); if (is_set(rsc_rh->flags, pe_rsc_provisional)) { return; } else if (group_data->colocated && group_data->first_child) { if (constraint->score >= INFINITY) { /* Ensure RHS is _fully_ up before can start LHS */ group_data->last_child->cmds->rsc_colocation_rh(rsc_lh, group_data->last_child, constraint); } else { /* A partially active RHS is fine */ group_data->first_child->cmds->rsc_colocation_rh(rsc_lh, group_data->first_child, constraint); } return; } else if (constraint->score >= INFINITY) { crm_config_err("%s: Cannot perform manditory colocation with" " non-colocated group: %s", rsc_lh->id, rsc_rh->id); return; } for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; child_rsc->cmds->rsc_colocation_rh(rsc_lh, child_rsc, constraint); } } enum pe_action_flags group_action_flags(action_t * action, node_t * node) { GListPtr gIter = NULL; enum pe_action_flags flags = (pe_action_optional | pe_action_runnable | pe_action_pseudo); for (gIter = action->rsc->children; gIter != NULL; gIter = gIter->next) { resource_t *child = (resource_t *) gIter->data; enum action_tasks task = get_complex_task(child, action->task, TRUE); const char *task_s = task2text(task); action_t *child_action = find_first_action(child->actions, NULL, task_s, node); if (child_action) { enum pe_action_flags child_flags = child->cmds->action_flags(child_action, node); if (is_set(flags, pe_action_optional) && is_set(child_flags, pe_action_optional) == FALSE) { crm_trace("%s is manditory because of %s", action->uuid, child_action->uuid); - clear_bit_inplace(flags, pe_action_optional); + clear_bit(flags, pe_action_optional); pe_clear_action_bit(action, pe_action_optional); } if (safe_str_neq(task_s, action->task) && is_set(flags, pe_action_runnable) && is_set(child_flags, pe_action_runnable) == FALSE) { crm_trace("%s is not runnable because of %s", action->uuid, child_action->uuid); - clear_bit_inplace(flags, pe_action_runnable); + clear_bit(flags, pe_action_runnable); pe_clear_action_bit(action, pe_action_runnable); } } else if (task != stop_rsc) { crm_trace("%s is not runnable because of %s (not found in %s)", action->uuid, task_s, child->id); - clear_bit_inplace(flags, pe_action_runnable); + clear_bit(flags, pe_action_runnable); } } return flags; } enum pe_graph_flags group_update_actions(action_t * first, action_t * then, node_t * node, enum pe_action_flags flags, enum pe_action_flags filter, enum pe_ordering type) { GListPtr gIter = then->rsc->children; enum pe_graph_flags changed = pe_graph_none; CRM_ASSERT(then->rsc != NULL); changed |= native_update_actions(first, then, node, flags, filter, type); for (; gIter != NULL; gIter = gIter->next) { resource_t *child = (resource_t *) gIter->data; action_t *child_action = find_first_action(child->actions, NULL, then->task, node); if (child_action) { changed |= child->cmds->update_actions(first, child_action, node, flags, filter, type); } } return changed; } void group_rsc_location(resource_t * rsc, rsc_to_node_t * constraint) { GListPtr gIter = rsc->children; GListPtr saved = constraint->node_list_rh; GListPtr zero = node_list_dup(constraint->node_list_rh, TRUE, FALSE); gboolean reset_scores = TRUE; group_variant_data_t *group_data = NULL; get_group_variant_data(group_data, rsc); crm_debug("Processing rsc_location %s for %s", constraint->id, rsc->id); native_rsc_location(rsc, constraint); for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; child_rsc->cmds->rsc_location(child_rsc, constraint); if (group_data->colocated && reset_scores) { reset_scores = FALSE; constraint->node_list_rh = zero; } } constraint->node_list_rh = saved; g_list_free_full(zero, free); } void group_expand(resource_t * rsc, pe_working_set_t * data_set) { GListPtr gIter = rsc->children; group_variant_data_t *group_data = NULL; get_group_variant_data(group_data, rsc); crm_trace("Processing actions from %s", rsc->id); CRM_CHECK(rsc != NULL, return); native_expand(rsc, data_set); for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; child_rsc->cmds->expand(child_rsc, data_set); } } GHashTable * group_merge_weights(resource_t * rsc, const char *rhs, GHashTable * nodes, const char *attr, int factor, gboolean allow_rollback, gboolean only_positive) { GListPtr gIter = rsc->rsc_cons_lhs; group_variant_data_t *group_data = NULL; get_group_variant_data(group_data, rsc); if (is_set(rsc->flags, pe_rsc_merging)) { crm_info("Breaking dependency loop with %s at %s", rsc->id, rhs); return nodes; } set_bit(rsc->flags, pe_rsc_merging); nodes = group_data->first_child->cmds->merge_weights(group_data->first_child, rhs, nodes, attr, factor, allow_rollback, only_positive); for (; gIter != NULL; gIter = gIter->next) { rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data; nodes = native_merge_weights(constraint->rsc_lh, rsc->id, nodes, constraint->node_attribute, constraint->score / INFINITY, allow_rollback, only_positive); } clear_bit(rsc->flags, pe_rsc_merging); return nodes; } void group_append_meta(resource_t * rsc, xmlNode * xml) { } diff --git a/pengine/native.c b/pengine/native.c index c97c3a8ac8..ac6434c89c 100644 --- a/pengine/native.c +++ b/pengine/native.c @@ -1,3041 +1,3041 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #define DELETE_THEN_REFRESH 1 /* The crmd will remove the resource from the CIB itself, making this redundant */ #define INFINITY_HACK (INFINITY * -100) #define VARIANT_NATIVE 1 #include void native_rsc_colocation_rh_must(resource_t * rsc_lh, gboolean update_lh, resource_t * rsc_rh, gboolean update_rh); void native_rsc_colocation_rh_mustnot(resource_t * rsc_lh, gboolean update_lh, resource_t * rsc_rh, gboolean update_rh); void Recurring(resource_t * rsc, action_t * start, node_t * node, pe_working_set_t * data_set); void RecurringOp(resource_t * rsc, action_t * start, node_t * node, xmlNode * operation, pe_working_set_t * data_set); void Recurring_Stopped(resource_t * rsc, action_t * start, node_t * node, pe_working_set_t * data_set); void RecurringOp_Stopped(resource_t * rsc, action_t * start, node_t * node, xmlNode * operation, pe_working_set_t * data_set); void pe_post_notify(resource_t * rsc, node_t * node, action_t * op, notify_data_t * n_data, pe_working_set_t * data_set); gboolean DeleteRsc(resource_t * rsc, node_t * node, gboolean optional, pe_working_set_t * data_set); gboolean StopRsc(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set); gboolean StartRsc(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set); gboolean DemoteRsc(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set); gboolean PromoteRsc(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set); gboolean RoleError(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set); gboolean NullOp(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set); /* *INDENT-OFF* */ enum rsc_role_e rsc_state_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX] = { /* Current State */ /* Next State: Unknown Stopped Started Slave Master */ /* Unknown */ { RSC_ROLE_UNKNOWN, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, }, /* Stopped */ { RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STARTED, RSC_ROLE_SLAVE, RSC_ROLE_SLAVE, }, /* Started */ { RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STARTED, RSC_ROLE_SLAVE, RSC_ROLE_MASTER, }, /* Slave */ { RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_UNKNOWN, RSC_ROLE_SLAVE, RSC_ROLE_MASTER, }, /* Master */ { RSC_ROLE_STOPPED, RSC_ROLE_SLAVE, RSC_ROLE_UNKNOWN, RSC_ROLE_SLAVE, RSC_ROLE_MASTER, }, }; gboolean (*rsc_action_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX])(resource_t*,node_t*,gboolean,pe_working_set_t*) = { /* Current State */ /* Next State: Unknown Stopped Started Slave Master */ /* Unknown */ { RoleError, StopRsc, RoleError, RoleError, RoleError, }, /* Stopped */ { RoleError, NullOp, StartRsc, StartRsc, RoleError, }, /* Started */ { RoleError, StopRsc, NullOp, NullOp, PromoteRsc, }, /* Slave */ { RoleError, StopRsc, RoleError, NullOp, PromoteRsc, }, /* Master */ { RoleError, DemoteRsc, RoleError, DemoteRsc, NullOp, }, }; /* *INDENT-ON* */ struct capacity_data { node_t *node; resource_t *rsc; gboolean is_enough; }; static void check_capacity(gpointer key, gpointer value, gpointer user_data) { int required = 0; int remaining = 0; struct capacity_data *data = user_data; required = crm_parse_int(value, "0"); remaining = crm_parse_int(g_hash_table_lookup(data->node->details->utilization, key), "0"); if (required > remaining) { crm_debug("Node %s has no enough %s for resource %s: required=%d remaining=%d", data->node->details->uname, (char *)key, data->rsc->id, required, remaining); data->is_enough = FALSE; } } static gboolean have_enough_capacity(node_t * node, resource_t * rsc) { struct capacity_data data; data.node = node; data.rsc = rsc; data.is_enough = TRUE; g_hash_table_foreach(rsc->utilization, check_capacity, &data); return data.is_enough; } static gboolean native_choose_node(resource_t * rsc, node_t * prefer, pe_working_set_t * data_set) { /* 1. Sort by weight 2. color.chosen_node = the node (of those with the highest wieght) with the fewest resources 3. remove color.chosen_node from all other colors */ int alloc_details = scores_log_level + 1; GListPtr nodes = NULL; node_t *chosen = NULL; int lpc = 0; int multiple = 0; int length = 0; gboolean result = FALSE; if (safe_str_neq(data_set->placement_strategy, "default")) { GListPtr gIter = NULL; for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; if (have_enough_capacity(node, rsc) == FALSE) { crm_debug("Resource %s cannot be allocated to node %s: none of enough capacity", rsc->id, node->details->uname); resource_location(rsc, node, -INFINITY, "__limit_utilization_", data_set); } } dump_node_scores(alloc_details, rsc, "Post-utilization", rsc->allowed_nodes); } length = g_hash_table_size(rsc->allowed_nodes); if (is_not_set(rsc->flags, pe_rsc_provisional)) { return rsc->allocated_to ? TRUE : FALSE; } if (prefer) { chosen = g_hash_table_lookup(rsc->allowed_nodes, prefer->details->id); if (chosen && chosen->weight >= 0 && can_run_resources(chosen)) { crm_trace("Using preferred node %s for %s instead of choosing from %d candidates", chosen->details->uname, rsc->id, length); } else if (chosen && chosen->weight < 0) { crm_trace("Preferred node %s for %s was unavailable", chosen->details->uname, rsc->id); chosen = NULL; } else if (chosen && can_run_resources(chosen)) { crm_trace("Preferred node %s for %s was unsuitable", chosen->details->uname, rsc->id); chosen = NULL; } else { crm_trace("Preferred node %s for %s was unknown", prefer->details->uname, rsc->id); } } if (chosen == NULL && rsc->allowed_nodes) { nodes = g_hash_table_get_values(rsc->allowed_nodes); nodes = g_list_sort_with_data(nodes, sort_node_weight, g_list_nth_data(rsc->running_on, 0)); chosen = g_list_nth_data(nodes, 0); crm_trace("Chose node %s for %s from %d candidates", chosen ? chosen->details->uname : "", rsc->id, length); if (chosen && chosen->weight > 0 && can_run_resources(chosen)) { node_t *running = g_list_nth_data(rsc->running_on, 0); if (running && can_run_resources(running) == FALSE) { crm_trace("Current node for %s (%s) can't run resources", rsc->id, running->details->uname); running = NULL; } for (lpc = 1; lpc < length && running; lpc++) { node_t *tmp = g_list_nth_data(nodes, lpc); if (tmp->weight == chosen->weight) { multiple++; if (tmp->details == running->details) { /* prefer the existing node if scores are equal */ chosen = tmp; } } } } } if (multiple > 1) { int log_level = LOG_INFO; char *score = score2char(chosen->weight); if (chosen->weight >= INFINITY) { log_level = LOG_WARNING; } do_crm_log(log_level, "%d nodes with equal score (%s) for" " running %s resources. Chose %s.", multiple, score, rsc->id, chosen->details->uname); free(score); } result = native_assign_node(rsc, nodes, chosen, FALSE); g_list_free(nodes); return result; } static int node_list_attr_score(GHashTable * list, const char *attr, const char *value) { GHashTableIter iter; node_t *node = NULL; int best_score = -INFINITY; const char *best_node = NULL; if (attr == NULL) { attr = "#" XML_ATTR_UNAME; } g_hash_table_iter_init(&iter, list); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { int weight = node->weight; if (can_run_resources(node) == FALSE) { weight = -INFINITY; } if (weight > best_score || best_node == NULL) { const char *tmp = g_hash_table_lookup(node->details->attrs, attr); if (safe_str_eq(value, tmp)) { best_score = weight; best_node = node->details->uname; } } } if (safe_str_neq(attr, "#" XML_ATTR_UNAME)) { crm_info("Best score for %s=%s was %s with %d", attr, value, best_node ? best_node : "", best_score); } return best_score; } static void node_hash_update(GHashTable * list1, GHashTable * list2, const char *attr, int factor, gboolean only_positive) { int score = 0; int new_score = 0; GHashTableIter iter; node_t *node = NULL; if (attr == NULL) { attr = "#" XML_ATTR_UNAME; } g_hash_table_iter_init(&iter, list1); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { CRM_CHECK(node != NULL, continue); score = node_list_attr_score(list2, attr, g_hash_table_lookup(node->details->attrs, attr)); new_score = merge_weights(factor * score, node->weight); if (factor < 0 && score < 0) { /* Negative preference for a node with a negative score * should not become a positive preference * * TODO: Decide if we want to filter only if weight == -INFINITY * */ crm_trace("%s: Filtering %d + %d*%d (factor * score)", node->details->uname, node->weight, factor, score); } else if (node->weight == INFINITY_HACK) { crm_trace("%s: Filtering %d + %d*%d (node < 0)", node->details->uname, node->weight, factor, score); } else if (only_positive && new_score < 0 && node->weight > 0) { node->weight = INFINITY_HACK; crm_trace("%s: Filtering %d + %d*%d (score > 0)", node->details->uname, node->weight, factor, score); } else if (only_positive && new_score < 0 && node->weight == 0) { crm_trace("%s: Filtering %d + %d*%d (score == 0)", node->details->uname, node->weight, factor, score); } else { crm_trace("%s: %d + %d*%d", node->details->uname, node->weight, factor, score); node->weight = new_score; } } } static GHashTable * node_hash_dup(GHashTable * hash) { /* Hack! */ GListPtr list = g_hash_table_get_values(hash); GHashTable *result = node_hash_from_list(list); g_list_free(list); return result; } GHashTable * native_merge_weights(resource_t * rsc, const char *rhs, GHashTable * nodes, const char *attr, int factor, gboolean allow_rollback, gboolean only_positive) { enum pe_weights flags = pe_weights_none; if (only_positive) { - set_bit_inplace(flags, pe_weights_positive); + set_bit(flags, pe_weights_positive); } if (allow_rollback) { - set_bit_inplace(flags, pe_weights_rollback); + set_bit(flags, pe_weights_rollback); } return rsc_merge_weights(rsc, rhs, nodes, attr, factor, flags); } GHashTable * rsc_merge_weights(resource_t * rsc, const char *rhs, GHashTable * nodes, const char *attr, int factor, enum pe_weights flags) { GHashTable *work = NULL; int multiplier = 1; if (factor < 0) { multiplier = -1; } if (is_set(rsc->flags, pe_rsc_merging)) { crm_info("%s: Breaking dependency loop at %s", rhs, rsc->id); return nodes; } set_bit(rsc->flags, pe_rsc_merging); if (is_set(flags, pe_weights_init)) { if (rsc->variant == pe_group && rsc->children) { GListPtr last = rsc->children; while (last->next != NULL) { last = last->next; } crm_trace("Merging %s as a group %p %p", rsc->id, rsc->children, last); work = rsc_merge_weights(last->data, rhs, NULL, attr, factor, flags); } else { work = node_hash_dup(rsc->allowed_nodes); } - clear_bit_inplace(flags, pe_weights_init); + clear_bit(flags, pe_weights_init); } else { crm_trace("%s: Combining scores from %s", rhs, rsc->id); work = node_hash_dup(nodes); node_hash_update(work, rsc->allowed_nodes, attr, factor, is_set(flags, pe_weights_positive)); } if (is_set(flags, pe_weights_rollback) && can_run_any(work) == FALSE) { crm_info("%s: Rolling back scores from %s", rhs, rsc->id); g_hash_table_destroy(work); clear_bit(rsc->flags, pe_rsc_merging); return nodes; } if (can_run_any(work)) { GListPtr gIter = NULL; if (is_set(flags, pe_weights_forward)) { gIter = rsc->rsc_cons; } else { gIter = rsc->rsc_cons_lhs; } for (; gIter != NULL; gIter = gIter->next) { resource_t *other = NULL; rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data; if (is_set(flags, pe_weights_forward)) { other = constraint->rsc_rh; } else { other = constraint->rsc_lh; } crm_trace("Applying %s (%s)", constraint->id, other->id); work = rsc_merge_weights(other, rhs, work, constraint->node_attribute, multiplier * constraint->score / INFINITY, flags); dump_node_scores(LOG_TRACE, NULL, rhs, work); } } if(is_set(flags, pe_weights_positive)) { node_t *node = NULL; GHashTableIter iter; g_hash_table_iter_init(&iter, work); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { if (node->weight == INFINITY_HACK) { node->weight = 1; } } } if (nodes) { g_hash_table_destroy(nodes); } clear_bit(rsc->flags, pe_rsc_merging); return work; } node_t * native_color(resource_t * rsc, node_t * prefer, pe_working_set_t * data_set) { GListPtr gIter = NULL; int alloc_details = scores_log_level + 1; const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); if (rsc->parent && is_not_set(rsc->parent->flags, pe_rsc_allocating)) { /* never allocate children on their own */ crm_debug("Escalating allocation of %s to its parent: %s", rsc->id, rsc->parent->id); rsc->parent->cmds->allocate(rsc->parent, prefer, data_set); } if (is_not_set(rsc->flags, pe_rsc_provisional)) { return rsc->allocated_to; } if (is_set(rsc->flags, pe_rsc_allocating)) { crm_debug("Dependency loop detected involving %s", rsc->id); return NULL; } set_bit(rsc->flags, pe_rsc_allocating); print_resource(alloc_details, "Allocating: ", rsc, FALSE); dump_node_scores(alloc_details, rsc, "Pre-allloc", rsc->allowed_nodes); for (gIter = rsc->rsc_cons; gIter != NULL; gIter = gIter->next) { rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data; GHashTable *archive = NULL; resource_t *rsc_rh = constraint->rsc_rh; crm_trace("%s: Pre-Processing %s (%s, %d, %s)", rsc->id, constraint->id, rsc_rh->id, constraint->score, role2text(constraint->role_lh)); if (constraint->role_lh >= RSC_ROLE_MASTER || (constraint->score < 0 && constraint->score > -INFINITY)) { archive = node_hash_dup(rsc->allowed_nodes); } rsc_rh->cmds->allocate(rsc_rh, NULL, data_set); rsc->cmds->rsc_colocation_lh(rsc, rsc_rh, constraint); if (archive && can_run_any(rsc->allowed_nodes) == FALSE) { crm_info("%s: Rolling back scores from %s", rsc->id, rsc_rh->id); g_hash_table_destroy(rsc->allowed_nodes); rsc->allowed_nodes = archive; archive = NULL; } if (archive) { g_hash_table_destroy(archive); } } dump_node_scores(alloc_details, rsc, "Post-coloc", rsc->allowed_nodes); for (gIter = rsc->rsc_cons_lhs; gIter != NULL; gIter = gIter->next) { rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data; rsc->allowed_nodes = constraint->rsc_lh->cmds->merge_weights(constraint->rsc_lh, rsc->id, rsc->allowed_nodes, constraint->node_attribute, constraint->score / INFINITY, TRUE, FALSE); } for (gIter = rsc->rsc_tickets; gIter != NULL; gIter = gIter->next) { rsc_ticket_t *rsc_ticket = (rsc_ticket_t *) gIter->data; if (rsc_ticket->ticket->granted == FALSE || rsc_ticket->ticket->standby) { rsc_ticket_constraint(rsc, rsc_ticket, data_set); } } print_resource(LOG_DEBUG_2, "Allocating: ", rsc, FALSE); if (rsc->next_role == RSC_ROLE_STOPPED) { crm_trace("Making sure %s doesn't get allocated", rsc->id); /* make sure it doesnt come up again */ resource_location(rsc, NULL, -INFINITY, XML_RSC_ATTR_TARGET_ROLE, data_set); } dump_node_scores(show_scores ? 0 : scores_log_level, rsc, __PRETTY_FUNCTION__, rsc->allowed_nodes); if (is_set(data_set->flags, pe_flag_stonith_enabled) && is_set(data_set->flags, pe_flag_have_stonith_resource) == FALSE) { clear_bit(rsc->flags, pe_rsc_managed); } if (is_not_set(rsc->flags, pe_rsc_managed)) { const char *reason = NULL; node_t *assign_to = NULL; rsc->next_role = rsc->role; if (rsc->running_on == NULL) { reason = "inactive"; } else if (rsc->role == RSC_ROLE_MASTER) { assign_to = rsc->running_on->data; reason = "master"; } else if (is_set(rsc->flags, pe_rsc_failed)) { reason = "failed"; } else { assign_to = rsc->running_on->data; reason = "active"; } crm_info("Unmanaged resource %s allocated to %s: %s", rsc->id, assign_to ? assign_to->details->uname : "'nowhere'", reason); native_assign_node(rsc, NULL, assign_to, TRUE); } else if (is_set(data_set->flags, pe_flag_stop_everything) && safe_str_neq(class, "stonith")) { crm_debug("Forcing %s to stop", rsc->id); native_assign_node(rsc, NULL, NULL, TRUE); } else if (is_set(rsc->flags, pe_rsc_provisional) && native_choose_node(rsc, prefer, data_set)) { crm_trace("Allocated resource %s to %s", rsc->id, rsc->allocated_to->details->uname); } else if (rsc->allocated_to == NULL) { if (is_not_set(rsc->flags, pe_rsc_orphan)) { crm_info("Resource %s cannot run anywhere", rsc->id); } else if (rsc->running_on != NULL) { crm_info("Stopping orphan resource %s", rsc->id); } } else { crm_debug("Pre-Allocated resource %s to %s", rsc->id, rsc->allocated_to->details->uname); } clear_bit(rsc->flags, pe_rsc_allocating); print_resource(LOG_DEBUG_3, "Allocated ", rsc, TRUE); return rsc->allocated_to; } static gboolean is_op_dup(resource_t * rsc, const char *name, const char *interval) { gboolean dup = FALSE; const char *id = NULL; const char *value = NULL; xmlNode *operation = NULL; for (operation = __xml_first_child(rsc->ops_xml); operation != NULL; operation = __xml_next(operation)) { if (crm_str_eq((const char *)operation->name, "op", TRUE)) { value = crm_element_value(operation, "name"); if (safe_str_neq(value, name)) { continue; } value = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); if (value == NULL) { value = "0"; } if (safe_str_neq(value, interval)) { continue; } if (id == NULL) { id = ID(operation); } else { crm_config_err("Operation %s is a duplicate of %s", ID(operation), id); crm_config_err ("Do not use the same (name, interval) combination more than once per resource"); dup = TRUE; } } } return dup; } void RecurringOp(resource_t * rsc, action_t * start, node_t * node, xmlNode * operation, pe_working_set_t * data_set) { char *key = NULL; const char *name = NULL; const char *value = NULL; const char *interval = NULL; const char *node_uname = NULL; unsigned long long interval_ms = 0; action_t *mon = NULL; gboolean is_optional = TRUE; GListPtr possible_matches = NULL; /* Only process for the operations without role="Stopped" */ value = crm_element_value(operation, "role"); if (value && text2role(value) == RSC_ROLE_STOPPED) { return; } crm_trace("Creating recurring action %s for %s in role %s on %s", ID(operation), rsc->id, role2text(rsc->next_role), node ? node->details->uname : "n/a"); if (node != NULL) { node_uname = node->details->uname; } interval = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); interval_ms = crm_get_interval(interval); if (interval_ms == 0) { return; } name = crm_element_value(operation, "name"); if (is_op_dup(rsc, name, interval)) { return; } if (safe_str_eq(name, RSC_STOP) || safe_str_eq(name, RSC_START) || safe_str_eq(name, RSC_DEMOTE) || safe_str_eq(name, RSC_PROMOTE) ) { crm_config_err("Invalid recurring action %s wth name: '%s'", ID(operation), name); return; } key = generate_op_key(rsc->id, name, interval_ms); if (find_rsc_op_entry(rsc, key) == NULL) { /* disabled */ free(key); return; } if (start != NULL) { crm_trace("Marking %s %s due to %s", key, is_set(start->flags, pe_action_optional) ? "optional" : "manditory", start->uuid); is_optional = (rsc->cmds->action_flags(start, NULL) & pe_action_optional); } else { crm_trace("Marking %s optional", key); is_optional = TRUE; } /* start a monitor for an already active resource */ possible_matches = find_actions_exact(rsc->actions, key, node); if (possible_matches == NULL) { is_optional = FALSE; crm_trace("Marking %s manditory: not active", key); } else { g_list_free(possible_matches); } if ((rsc->next_role == RSC_ROLE_MASTER && value == NULL) || (value != NULL && text2role(value) != rsc->next_role)) { int log_level = LOG_DEBUG_2; const char *result = "Ignoring"; if (is_optional) { char *local_key = strdup(key); log_level = LOG_INFO; result = "Cancelling"; /* its running : cancel it */ mon = custom_action(rsc, local_key, RSC_CANCEL, node, FALSE, TRUE, data_set); free(mon->task); mon->task = strdup(RSC_CANCEL); add_hash_param(mon->meta, XML_LRM_ATTR_INTERVAL, interval); add_hash_param(mon->meta, XML_LRM_ATTR_TASK, name); local_key = NULL; switch (rsc->role) { case RSC_ROLE_SLAVE: case RSC_ROLE_STARTED: if (rsc->next_role == RSC_ROLE_MASTER) { local_key = promote_key(rsc); } else if (rsc->next_role == RSC_ROLE_STOPPED) { local_key = stop_key(rsc); } break; case RSC_ROLE_MASTER: local_key = demote_key(rsc); break; default: break; } if (local_key) { custom_action_order(rsc, NULL, mon, rsc, local_key, NULL, pe_order_runnable_left, data_set); } mon = NULL; } do_crm_log(log_level, "%s action %s (%s vs. %s)", result, key, value ? value : role2text(RSC_ROLE_SLAVE), role2text(rsc->next_role)); free(key); key = NULL; return; } mon = custom_action(rsc, key, name, node, is_optional, TRUE, data_set); key = mon->uuid; if (is_optional) { crm_trace("%s\t %s (optional)", crm_str(node_uname), mon->uuid); } if (start == NULL || is_set(start->flags, pe_action_runnable) == FALSE) { crm_debug("%s\t %s (cancelled : start un-runnable)", crm_str(node_uname), mon->uuid); update_action_flags(mon, pe_action_runnable | pe_action_clear); } else if (node == NULL || node->details->online == FALSE || node->details->unclean) { crm_debug("%s\t %s (cancelled : no node available)", crm_str(node_uname), mon->uuid); update_action_flags(mon, pe_action_runnable | pe_action_clear); } else if (is_set(mon->flags, pe_action_optional) == FALSE) { crm_info(" Start recurring %s (%llus) for %s on %s", mon->task, interval_ms / 1000, rsc->id, crm_str(node_uname)); } if (rsc->next_role == RSC_ROLE_MASTER) { char *running_master = crm_itoa(PCMK_EXECRA_RUNNING_MASTER); add_hash_param(mon->meta, XML_ATTR_TE_TARGET_RC, running_master); free(running_master); } if (node == NULL || is_set(rsc->flags, pe_rsc_managed)) { custom_action_order(rsc, start_key(rsc), NULL, NULL, strdup(key), mon, pe_order_implies_then | pe_order_runnable_left, data_set); if (rsc->next_role == RSC_ROLE_MASTER) { custom_action_order(rsc, promote_key(rsc), NULL, rsc, NULL, mon, pe_order_optional | pe_order_runnable_left, data_set); } else if (rsc->role == RSC_ROLE_MASTER) { custom_action_order(rsc, demote_key(rsc), NULL, rsc, NULL, mon, pe_order_optional | pe_order_runnable_left, data_set); } } } void Recurring(resource_t * rsc, action_t * start, node_t * node, pe_working_set_t * data_set) { if (is_not_set(data_set->flags, pe_flag_maintenance_mode)) { xmlNode *operation = NULL; for (operation = __xml_first_child(rsc->ops_xml); operation != NULL; operation = __xml_next(operation)) { if (crm_str_eq((const char *)operation->name, "op", TRUE)) { RecurringOp(rsc, start, node, operation, data_set); } } } } void RecurringOp_Stopped(resource_t * rsc, action_t * start, node_t * node, xmlNode * operation, pe_working_set_t * data_set) { char *key = NULL; const char *name = NULL; const char *role = NULL; const char *interval = NULL; const char *node_uname = NULL; unsigned long long interval_ms = 0; GListPtr possible_matches = NULL; GListPtr gIter = NULL; /* TODO: Support of non-unique clone */ if (is_set(rsc->flags, pe_rsc_unique) == FALSE) { return; } /* Only process for the operations with role="Stopped" */ role = crm_element_value(operation, "role"); if (role == NULL || text2role(role) != RSC_ROLE_STOPPED) { return; } crm_trace ("Creating recurring actions %s for %s in role %s on nodes where it'll not be running", ID(operation), rsc->id, role2text(rsc->next_role)); if (node != NULL) { node_uname = node->details->uname; } interval = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); interval_ms = crm_get_interval(interval); if (interval_ms == 0) { return; } name = crm_element_value(operation, "name"); if (is_op_dup(rsc, name, interval)) { return; } if (safe_str_eq(name, RSC_STOP) || safe_str_eq(name, RSC_START) || safe_str_eq(name, RSC_DEMOTE) || safe_str_eq(name, RSC_PROMOTE) ) { crm_config_err("Invalid recurring action %s wth name: '%s'", ID(operation), name); return; } key = generate_op_key(rsc->id, name, interval_ms); if (find_rsc_op_entry(rsc, key) == NULL) { /* disabled */ free(key); return; } /* if the monitor exists on the node where the resource will be running, cancel it */ if (node != NULL) { possible_matches = find_actions_exact(rsc->actions, key, node); if (possible_matches) { action_t *cancel_op = NULL; char *local_key = strdup(key); g_list_free(possible_matches); cancel_op = custom_action(rsc, local_key, RSC_CANCEL, node, FALSE, TRUE, data_set); free(cancel_op->task); cancel_op->task = strdup(RSC_CANCEL); add_hash_param(cancel_op->meta, XML_LRM_ATTR_INTERVAL, interval); add_hash_param(cancel_op->meta, XML_LRM_ATTR_TASK, name); local_key = NULL; if (rsc->next_role == RSC_ROLE_STARTED || rsc->next_role == RSC_ROLE_SLAVE) { /* rsc->role == RSC_ROLE_STOPPED: cancel the monitor before start */ /* rsc->role == RSC_ROLE_STARTED: for a migration, cancel the monitor on the target node before start */ custom_action_order(rsc, NULL, cancel_op, rsc, start_key(rsc), NULL, pe_order_runnable_left, data_set); } crm_info("Cancel action %s (%s vs. %s) on %s", key, role, role2text(rsc->next_role), crm_str(node_uname)); } } for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { node_t *stop_node = (node_t *) gIter->data; const char *stop_node_uname = stop_node->details->uname; gboolean is_optional = TRUE; gboolean probe_is_optional = TRUE; gboolean stop_is_optional = TRUE; action_t *stopped_mon = NULL; char *rc_inactive = NULL; GListPtr probe_complete_ops = NULL; GListPtr stop_ops = NULL; GListPtr local_gIter = NULL; char *stop_op_key = NULL; if (node_uname && safe_str_eq(stop_node_uname, node_uname)) { continue; } crm_trace("Creating recurring action %s for %s on %s", ID(operation), rsc->id, crm_str(stop_node_uname)); /* start a monitor for an already stopped resource */ possible_matches = find_actions_exact(rsc->actions, key, stop_node); if (possible_matches == NULL) { crm_trace("Marking %s manditory on %s: not active", key, crm_str(stop_node_uname)); is_optional = FALSE; } else { crm_trace("Marking %s optional on %s: already active", key, crm_str(stop_node_uname)); is_optional = TRUE; g_list_free(possible_matches); } stopped_mon = custom_action(rsc, strdup(key), name, stop_node, is_optional, TRUE, data_set); rc_inactive = crm_itoa(PCMK_EXECRA_NOT_RUNNING); add_hash_param(stopped_mon->meta, XML_ATTR_TE_TARGET_RC, rc_inactive); free(rc_inactive); probe_complete_ops = find_actions(data_set->actions, CRM_OP_PROBED, NULL); for (local_gIter = probe_complete_ops; local_gIter != NULL; local_gIter = local_gIter->next) { action_t *probe_complete = (action_t *) local_gIter->data; if (probe_complete->node == NULL) { if (is_set(probe_complete->flags, pe_action_optional) == FALSE) { probe_is_optional = FALSE; } if (is_set(probe_complete->flags, pe_action_runnable) == FALSE) { crm_debug("%s\t %s (cancelled : probe un-runnable)", crm_str(stop_node_uname), stopped_mon->uuid); update_action_flags(stopped_mon, pe_action_runnable | pe_action_clear); } if (is_set(rsc->flags, pe_rsc_managed)) { custom_action_order(NULL, NULL, probe_complete, NULL, strdup(key), stopped_mon, pe_order_optional, data_set); } break; } } if (probe_complete_ops) { g_list_free(probe_complete_ops); } stop_op_key = stop_key(rsc); stop_ops = find_actions_exact(rsc->actions, stop_op_key, stop_node); for (local_gIter = stop_ops; local_gIter != NULL; local_gIter = local_gIter->next) { action_t *stop = (action_t *) local_gIter->data; if (is_set(stop->flags, pe_action_optional) == FALSE) { stop_is_optional = FALSE; } if (is_set(stop->flags, pe_action_runnable) == FALSE) { crm_debug("%s\t %s (cancelled : stop un-runnable)", crm_str(stop_node_uname), stopped_mon->uuid); update_action_flags(stopped_mon, pe_action_runnable | pe_action_clear); } if (is_set(rsc->flags, pe_rsc_managed)) { custom_action_order(rsc, strdup(stop_op_key), stop, NULL, strdup(key), stopped_mon, pe_order_implies_then | pe_order_runnable_left, data_set); } } if (stop_ops) { g_list_free(stop_ops); } free(stop_op_key); if (is_optional == FALSE && probe_is_optional && stop_is_optional && is_set(rsc->flags, pe_rsc_managed) == FALSE) { crm_trace("Marking %s optional on %s due to unmanaged", key, crm_str(stop_node_uname)); update_action_flags(stopped_mon, pe_action_optional); } if (is_set(stopped_mon->flags, pe_action_optional)) { crm_trace("%s\t %s (optional)", crm_str(stop_node_uname), stopped_mon->uuid); } if (stop_node->details->online == FALSE || stop_node->details->unclean) { crm_debug("%s\t %s (cancelled : no node available)", crm_str(stop_node_uname), stopped_mon->uuid); update_action_flags(stopped_mon, pe_action_runnable | pe_action_clear); } if (is_set(stopped_mon->flags, pe_action_runnable) && is_set(stopped_mon->flags, pe_action_optional) == FALSE) { crm_notice(" Start recurring %s (%llus) for %s on %s", stopped_mon->task, interval_ms / 1000, rsc->id, crm_str(stop_node_uname)); } } free(key); } void Recurring_Stopped(resource_t * rsc, action_t * start, node_t * node, pe_working_set_t * data_set) { if (is_not_set(data_set->flags, pe_flag_maintenance_mode)) { xmlNode *operation = NULL; for (operation = __xml_first_child(rsc->ops_xml); operation != NULL; operation = __xml_next(operation)) { if (crm_str_eq((const char *)operation->name, "op", TRUE)) { RecurringOp_Stopped(rsc, start, node, operation, data_set); } } } } void native_create_actions(resource_t * rsc, pe_working_set_t * data_set) { action_t *start = NULL; node_t *chosen = NULL; node_t *current = NULL; gboolean need_stop = FALSE; GListPtr gIter = NULL; int num_active_nodes = g_list_length(rsc->running_on); enum rsc_role_e role = RSC_ROLE_UNKNOWN; enum rsc_role_e next_role = RSC_ROLE_UNKNOWN; chosen = rsc->allocated_to; if (chosen != NULL && rsc->next_role == RSC_ROLE_UNKNOWN) { rsc->next_role = RSC_ROLE_STARTED; crm_trace("Fixed next_role: unknown -> %s", role2text(rsc->next_role)); } else if (rsc->next_role == RSC_ROLE_UNKNOWN) { rsc->next_role = RSC_ROLE_STOPPED; crm_trace("Fixed next_role: unknown -> %s", role2text(rsc->next_role)); } crm_trace("Processing state transition for %s: %s->%s", rsc->id, role2text(rsc->role), role2text(rsc->next_role)); if(rsc->running_on) { current = rsc->running_on->data; } get_rsc_attributes(rsc->parameters, rsc, chosen, data_set); for (gIter = rsc->dangling_migrations; gIter != NULL; gIter = gIter->next) { node_t *current = (node_t *) gIter->data; action_t *stop = stop_action(rsc, current, FALSE); - set_bit_inplace(stop->flags, pe_action_dangle); + set_bit(stop->flags, pe_action_dangle); crm_trace("Forcing a cleanup of %s on %s", rsc->id, current->details->uname); if (is_set(data_set->flags, pe_flag_remove_after_stop)) { DeleteRsc(rsc, current, FALSE, data_set); } } if (num_active_nodes > 1) { if (num_active_nodes == 2 && chosen && rsc->partial_migration_target && (chosen->details == rsc->partial_migration_target->details)) { /* Here the chosen node is still the migration target from a partial * migration. Attempt to continue the migration instead of recovering * by stopping the resource everywhere and starting it on a single node. */ crm_trace("Will attempt to continue with a partial migration to target %s from %s", rsc->partial_migration_target->details->id, rsc->partial_migration_source->details->id); } else { const char *type = crm_element_value(rsc->xml, XML_ATTR_TYPE); const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); pe_proc_err("Resource %s (%s::%s) is active on %d nodes %s", rsc->id, class, type, num_active_nodes, recovery2text(rsc->recovery_type)); crm_warn("See %s for more information.", "http://clusterlabs.org/wiki/FAQ#Resource_is_Too_Active"); if (rsc->recovery_type == recovery_stop_start) { need_stop = TRUE; } /* If by chance a partial migration is in process, * but the migration target is not chosen still, clear all * partial migration data. */ rsc->partial_migration_source = rsc->partial_migration_target = NULL; } } if (is_set(rsc->flags, pe_rsc_start_pending)) { start = start_action(rsc, chosen, TRUE); - set_bit_inplace(start->flags, pe_action_print_always); + set_bit(start->flags, pe_action_print_always); } if(current && chosen && current->details != chosen->details) { crm_trace("Moving %s", rsc->id); need_stop = TRUE; } else if(is_set(rsc->flags, pe_rsc_failed)) { crm_trace("Recovering %s", rsc->id); need_stop = TRUE; } else if(rsc->role > RSC_ROLE_STARTED && current != NULL && chosen != NULL) { /* Recovery of a promoted resource */ start = start_action(rsc, chosen, TRUE); if(is_set(start->flags, pe_action_optional) == FALSE) { crm_trace("Forced start %s", rsc->id); need_stop = TRUE; } } crm_trace("Creating actions for %s: %s->%s", rsc->id, role2text(rsc->role), role2text(rsc->next_role)); role = rsc->role; /* Potentiall optional steps on brining the resource down and back up to the same level */ while (role != RSC_ROLE_STOPPED) { next_role = rsc_state_matrix[role][RSC_ROLE_STOPPED]; crm_trace("Down: Executing: %s->%s (%s)%s", role2text(role), role2text(next_role), rsc->id, need_stop?" required":""); if (rsc_action_matrix[role][next_role] (rsc, current, !need_stop, data_set) == FALSE) { break; } role = next_role; } while (rsc->role <= rsc->next_role && role != rsc->role) { next_role = rsc_state_matrix[role][rsc->role]; crm_trace("Up: Executing: %s->%s (%s)%s", role2text(role), role2text(next_role), rsc->id, need_stop?" required":""); if (rsc_action_matrix[role][next_role] (rsc, chosen, !need_stop, data_set) == FALSE) { break; } role = next_role; } role = rsc->role; /* Required steps from this role to the next */ while (role != rsc->next_role) { next_role = rsc_state_matrix[role][rsc->next_role]; crm_trace("Role: Executing: %s->%s (%s)", role2text(role), role2text(next_role), rsc->id); if (rsc_action_matrix[role][next_role] (rsc, chosen, FALSE, data_set) == FALSE) { break; } role = next_role; } if (rsc->next_role != RSC_ROLE_STOPPED || is_set(rsc->flags, pe_rsc_managed) == FALSE) { start = start_action(rsc, chosen, TRUE); Recurring(rsc, start, chosen, data_set); Recurring_Stopped(rsc, start, chosen, data_set); } else { Recurring_Stopped(rsc, NULL, NULL, data_set); } } void native_internal_constraints(resource_t * rsc, pe_working_set_t * data_set) { /* This function is on the critical path and worth optimizing as much as possible */ resource_t *top = uber_parent(rsc); int type = pe_order_optional | pe_order_implies_then | pe_order_restart; const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); custom_action_order(rsc, generate_op_key(rsc->id, RSC_STOP, 0), NULL, rsc, generate_op_key(rsc->id, RSC_START, 0), NULL, type, data_set); if (top->variant == pe_master) { custom_action_order(rsc, generate_op_key(rsc->id, RSC_DEMOTE, 0), NULL, rsc, generate_op_key(rsc->id, RSC_STOP, 0), NULL, pe_order_implies_first_master, data_set); custom_action_order(rsc, generate_op_key(rsc->id, RSC_START, 0), NULL, rsc, generate_op_key(rsc->id, RSC_PROMOTE, 0), NULL, pe_order_runnable_left, data_set); } if (is_not_set(rsc->flags, pe_rsc_managed)) { crm_trace("Skipping fencing constraints for unmanaged resource: %s", rsc->id); return; } if (safe_str_neq(class, "stonith")) { action_t *all_stopped = get_pseudo_op(ALL_STOPPED, data_set); custom_action_order(rsc, stop_key(rsc), NULL, NULL, strdup(all_stopped->task), all_stopped, pe_order_implies_then | pe_order_runnable_left, data_set); } if (g_hash_table_size(rsc->utilization) > 0 && safe_str_neq(data_set->placement_strategy, "default")) { GHashTableIter iter; node_t *next = NULL; GListPtr gIter = NULL; crm_trace("Creating utilization constraints for %s - strategy: %s", rsc->id, data_set->placement_strategy); for (gIter = rsc->running_on; gIter != NULL; gIter = gIter->next) { node_t *current = (node_t *) gIter->data; char *load_stopped_task = crm_concat(LOAD_STOPPED, current->details->uname, '_'); action_t *load_stopped = get_pseudo_op(load_stopped_task, data_set); if (load_stopped->node == NULL) { load_stopped->node = node_copy(current); update_action_flags(load_stopped, pe_action_optional | pe_action_clear); } custom_action_order(rsc, stop_key(rsc), NULL, NULL, load_stopped_task, load_stopped, pe_order_load, data_set); } g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&next)) { char *load_stopped_task = crm_concat(LOAD_STOPPED, next->details->uname, '_'); action_t *load_stopped = get_pseudo_op(load_stopped_task, data_set); if (load_stopped->node == NULL) { load_stopped->node = node_copy(next); update_action_flags(load_stopped, pe_action_optional | pe_action_clear); } custom_action_order(NULL, strdup(load_stopped_task), load_stopped, rsc, start_key(rsc), NULL, pe_order_load, data_set); custom_action_order(NULL, strdup(load_stopped_task), load_stopped, rsc, generate_op_key(rsc->id, RSC_MIGRATE, 0), NULL, pe_order_load, data_set); free(load_stopped_task); } } } void native_rsc_colocation_lh(resource_t * rsc_lh, resource_t * rsc_rh, rsc_colocation_t * constraint) { if (rsc_lh == NULL) { pe_err("rsc_lh was NULL for %s", constraint->id); return; } else if (constraint->rsc_rh == NULL) { pe_err("rsc_rh was NULL for %s", constraint->id); return; } crm_trace("Processing colocation constraint between %s and %s", rsc_lh->id, rsc_rh->id); rsc_rh->cmds->rsc_colocation_rh(rsc_lh, rsc_rh, constraint); } static gboolean filter_colocation_constraint(resource_t * rsc_lh, resource_t * rsc_rh, rsc_colocation_t * constraint) { if (constraint->score == 0) { return FALSE; } if (constraint->score > 0 && constraint->role_lh != RSC_ROLE_UNKNOWN && constraint->role_lh != rsc_lh->next_role) { crm_trace( "LH: Skipping constraint: \"%s\" state filter", role2text(constraint->role_rh)); return FALSE; } if (constraint->score > 0 && constraint->role_rh != RSC_ROLE_UNKNOWN && constraint->role_rh != rsc_rh->next_role) { crm_trace( "RH: Skipping constraint: \"%s\" state filter", role2text(constraint->role_rh)); return FALSE; } if (constraint->score < 0 && constraint->role_lh != RSC_ROLE_UNKNOWN && constraint->role_lh == rsc_lh->next_role) { crm_trace( "LH: Skipping -ve constraint: \"%s\" state filter", role2text(constraint->role_rh)); return FALSE; } if (constraint->score < 0 && constraint->role_rh != RSC_ROLE_UNKNOWN && constraint->role_rh == rsc_rh->next_role) { crm_trace( "RH: Skipping -ve constraint: \"%s\" state filter", role2text(constraint->role_rh)); return FALSE; } return TRUE; } static void colocation_match(resource_t * rsc_lh, resource_t * rsc_rh, rsc_colocation_t * constraint) { const char *tmp = NULL; const char *value = NULL; const char *attribute = "#id"; GHashTable *work = NULL; gboolean do_check = FALSE; GHashTableIter iter; node_t *node = NULL; if (constraint->node_attribute != NULL) { attribute = constraint->node_attribute; } if (rsc_rh->allocated_to) { value = g_hash_table_lookup(rsc_rh->allocated_to->details->attrs, attribute); do_check = TRUE; } else if (constraint->score < 0) { /* nothing to do: * anti-colocation with something thats not running */ return; } work = node_hash_dup(rsc_lh->allowed_nodes); g_hash_table_iter_init(&iter, work); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { tmp = g_hash_table_lookup(node->details->attrs, attribute); if (do_check && safe_str_eq(tmp, value)) { if (constraint->score < INFINITY) { crm_trace("%s: %s.%s += %d", constraint->id, rsc_lh->id, node->details->uname, constraint->score); node->weight = merge_weights(constraint->score, node->weight); } } else if (do_check == FALSE || constraint->score >= INFINITY) { crm_trace("%s: %s.%s -= %d (%s)", constraint->id, rsc_lh->id, node->details->uname, constraint->score, do_check ? "failed" : "unallocated"); node->weight = merge_weights(-constraint->score, node->weight); } } if (can_run_any(work) || constraint->score <= -INFINITY || constraint->score >= INFINITY) { g_hash_table_destroy(rsc_lh->allowed_nodes); rsc_lh->allowed_nodes = work; work = NULL; } else { char *score = score2char(constraint->score); crm_info("%s: Rolling back scores from %s (%d, %s)", rsc_lh->id, rsc_rh->id, do_check, score); free(score); } if (work) { g_hash_table_destroy(work); } } void native_rsc_colocation_rh(resource_t * rsc_lh, resource_t * rsc_rh, rsc_colocation_t * constraint) { crm_trace("%sColocating %s with %s (%s, weight=%d)", constraint->score >= 0 ? "" : "Anti-", rsc_lh->id, rsc_rh->id, constraint->id, constraint->score); if (filter_colocation_constraint(rsc_lh, rsc_rh, constraint) == FALSE) { return; } if (is_set(rsc_rh->flags, pe_rsc_provisional)) { return; } else if (is_not_set(rsc_lh->flags, pe_rsc_provisional)) { /* error check */ struct node_shared_s *details_lh; struct node_shared_s *details_rh; if ((constraint->score > -INFINITY) && (constraint->score < INFINITY)) { return; } details_rh = rsc_rh->allocated_to ? rsc_rh->allocated_to->details : NULL; details_lh = rsc_lh->allocated_to ? rsc_lh->allocated_to->details : NULL; if (constraint->score == INFINITY && details_lh != details_rh) { crm_err("%s and %s are both allocated" " but to different nodes: %s vs. %s", rsc_lh->id, rsc_rh->id, details_lh ? details_lh->uname : "n/a", details_rh ? details_rh->uname : "n/a"); } else if (constraint->score == -INFINITY && details_lh == details_rh) { crm_err("%s and %s are both allocated" " but to the SAME node: %s", rsc_lh->id, rsc_rh->id, details_rh ? details_rh->uname : "n/a"); } return; } else { colocation_match(rsc_lh, rsc_rh, constraint); } } static gboolean filter_rsc_ticket(resource_t * rsc_lh, rsc_ticket_t * rsc_ticket) { if (rsc_ticket->role_lh != RSC_ROLE_UNKNOWN && rsc_ticket->role_lh != rsc_lh->role) { crm_trace( "LH: Skipping constraint: \"%s\" state filter", role2text(rsc_ticket->role_lh)); return FALSE; } return TRUE; } void rsc_ticket_constraint(resource_t * rsc_lh, rsc_ticket_t * rsc_ticket, pe_working_set_t * data_set) { if (rsc_ticket == NULL) { pe_err("rsc_ticket was NULL"); return; } if (rsc_lh == NULL) { pe_err("rsc_lh was NULL for %s", rsc_ticket->id); return; } if (rsc_ticket->ticket->granted && rsc_ticket->ticket->standby == FALSE) { return; } if (rsc_lh->children) { GListPtr gIter = rsc_lh->children; crm_trace("Processing ticket dependencies from %s", rsc_lh->id); for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; rsc_ticket_constraint(child_rsc, rsc_ticket, data_set); } return; } crm_trace("%s: Processing ticket dependency on %s (%s, %s)", rsc_lh->id, rsc_ticket->ticket->id, rsc_ticket->id, role2text(rsc_ticket->role_lh)); if (rsc_ticket->ticket->granted == FALSE && g_list_length(rsc_lh->running_on) > 0) { GListPtr gIter = NULL; switch (rsc_ticket->loss_policy) { case loss_ticket_stop: resource_location(rsc_lh, NULL, -INFINITY, "__loss_of_ticket__", data_set); break; case loss_ticket_demote: /*Promotion score will be set to -INFINITY in master_promotion_order() */ if (rsc_ticket->role_lh != RSC_ROLE_MASTER) { resource_location(rsc_lh, NULL, -INFINITY, "__loss_of_ticket__", data_set); } break; case loss_ticket_fence: if (filter_rsc_ticket(rsc_lh, rsc_ticket) == FALSE) { return; } resource_location(rsc_lh, NULL, -INFINITY, "__loss_of_ticket__", data_set); for (gIter = rsc_lh->running_on; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; crm_warn("Node %s will be fenced for deadman", node->details->uname); node->details->unclean = TRUE; } break; case loss_ticket_freeze: if (filter_rsc_ticket(rsc_lh, rsc_ticket) == FALSE) { return; } if (g_list_length(rsc_lh->running_on) > 0) { clear_bit(rsc_lh->flags, pe_rsc_managed); set_bit(rsc_lh->flags, pe_rsc_block); } break; } } else if (rsc_ticket->ticket->granted == FALSE){ if (rsc_ticket->role_lh != RSC_ROLE_MASTER || rsc_ticket->loss_policy == loss_ticket_stop) { resource_location(rsc_lh, NULL, -INFINITY, "__no_ticket__", data_set); } } else if (rsc_ticket->ticket->standby) { if (rsc_ticket->role_lh != RSC_ROLE_MASTER || rsc_ticket->loss_policy == loss_ticket_stop) { resource_location(rsc_lh, NULL, -INFINITY, "__ticket_standby__", data_set); } } } const char *convert_non_atomic_task(char *raw_task, resource_t * rsc, gboolean allow_notify); const char * convert_non_atomic_task(char *raw_task, resource_t * rsc, gboolean allow_notify) { int task = no_action; const char *atomic_task = raw_task; crm_trace("Processing %s for %s", crm_str(raw_task), rsc->id); if (raw_task == NULL) { return NULL; } else if (strstr(raw_task, "notify") != NULL) { goto done; /* no conversion */ } else if (rsc->variant < pe_group) { goto done; /* no conversion */ } task = text2task(raw_task); switch (task) { case stop_rsc: case start_rsc: case action_notify: case action_promote: case action_demote: break; case stopped_rsc: case started_rsc: case action_notified: case action_promoted: case action_demoted: task--; break; case monitor_rsc: case shutdown_crm: case stonith_node: goto done; break; default: crm_trace("Unknown action: %s", raw_task); goto done; break; } if (task != no_action) { if (is_set(rsc->flags, pe_rsc_notify) && allow_notify) { /* atomic_task = generate_notify_key(rid, "confirmed-post", task2text(task+1)); */ crm_err("Not handled"); } else { atomic_task = task2text(task + 1); } crm_trace("Converted %s -> %s", raw_task, atomic_task); } done: return atomic_task; } enum pe_action_flags native_action_flags(action_t * action, node_t * node) { return action->flags; } enum pe_graph_flags native_update_actions(action_t * first, action_t * then, node_t * node, enum pe_action_flags flags, enum pe_action_flags filter, enum pe_ordering type) { /* flags == get_action_flags(first, then_node) called from update_action() */ enum pe_graph_flags changed = pe_graph_none; enum pe_action_flags then_flags = then->flags; enum pe_action_flags first_flags = first->flags; if (type & pe_order_asymmetrical) { resource_t *then_rsc = then->rsc; enum rsc_role_e then_rsc_role = then_rsc ? then_rsc->fns->state(then_rsc, TRUE) : 0; if (!then_rsc) { /* ignore */ } else if ((then_rsc_role == RSC_ROLE_STOPPED) && safe_str_eq(then->task, RSC_STOP)) { /* ignore... if 'then' is supposed to be stopped after 'first', but * then is already stopped, there is nothing to be done when non-symmetrical. */ } else if ((then_rsc_role == RSC_ROLE_STARTED) && safe_str_eq(then->task, RSC_START)) { /* ignore... if 'then' is supposed to be started after 'first', but * then is already started, there is nothing to be done when non-symmetrical. */ } else if (!(first->flags & pe_action_runnable)) { /* prevent 'then' action from happening if 'first' is not runnable and * 'then' has not yet occurred. */ pe_clear_action_bit(then, pe_action_runnable); pe_clear_action_bit(then, pe_action_optional); crm_trace("Unset optional and runnable on %s", then->uuid); } else { /* ignore... then is allowed to start/stop if it wants to. */ } } if (type & pe_order_implies_first) { if ((filter & pe_action_optional) && (flags & pe_action_optional) == 0) { crm_trace("Unset optional on %s because of %s", first->uuid, then->uuid); pe_clear_action_bit(first, pe_action_optional); } } if (type & pe_order_implies_first_master) { if ((filter & pe_action_optional) && ((then->flags & pe_action_optional) == FALSE) && then->rsc && (then->rsc->role == RSC_ROLE_MASTER)) { - clear_bit_inplace(first->flags, pe_action_optional); + clear_bit(first->flags, pe_action_optional); } } if (is_set(type, pe_order_runnable_left) && is_set(filter, pe_action_runnable) && is_set(then->flags, pe_action_runnable) && is_set(flags, pe_action_runnable) == FALSE) { crm_trace("Unset runnable on %s because of %s", then->uuid, first->uuid); pe_clear_action_bit(then, pe_action_runnable); } if (is_set(type, pe_order_implies_then) && is_set(filter, pe_action_optional) && is_set(then->flags, pe_action_optional) && is_set(flags, pe_action_optional) == FALSE) { crm_trace("Unset optional on %s because of %s", then->uuid, first->uuid); pe_clear_action_bit(then, pe_action_optional); } if (is_set(type, pe_order_restart)) { const char *reason = NULL; CRM_ASSERT(first->rsc && first->rsc->variant == pe_native); CRM_ASSERT(then->rsc && then->rsc->variant == pe_native); if ((filter & pe_action_runnable) && (then->flags & pe_action_runnable) == 0) { reason = "shutdown"; } if ((filter & pe_action_optional) && (then->flags & pe_action_optional) == 0) { reason = "recover"; } if (reason && is_set(first->flags, pe_action_optional) && is_set(first->flags, pe_action_runnable)) { crm_trace("Handling %s: %s -> %s", reason, first->uuid, then->uuid); pe_clear_action_bit(first, pe_action_optional); } if (reason && is_not_set(first->flags, pe_action_optional) && is_not_set(first->flags, pe_action_runnable)) { crm_trace("Handling %s: %s -> %s", reason, first->uuid, then->uuid); pe_clear_action_bit(then, pe_action_runnable); } } if (then_flags != then->flags) { changed |= pe_graph_updated_then; crm_trace("Then: Flags for %s on %s are now 0x%.6x (was 0x%.6x) because of %s 0x%.6x", then->uuid, then->node ? then->node->details->uname : "[none]", then->flags, then_flags, first->uuid, first->flags); } if (first_flags != first->flags) { changed |= pe_graph_updated_first; crm_trace("First: Flags for %s on %s are now 0x%.6x (was 0x%.6x) because of %s 0x%.6x", first->uuid, first->node ? first->node->details->uname : "[none]", first->flags, first_flags, then->uuid, then->flags); } return changed; } void native_rsc_location(resource_t * rsc, rsc_to_node_t * constraint) { GListPtr gIter = NULL; GHashTableIter iter; node_t *node = NULL; if (constraint == NULL) { pe_err("Constraint is NULL"); return; } else if (rsc == NULL) { pe_err("LHS of rsc_to_node (%s) is NULL", constraint->id); return; } crm_trace("Applying %s (%s) to %s", constraint->id, role2text(constraint->role_filter), rsc->id); /* take "lifetime" into account */ if (constraint->role_filter > 0 && constraint->role_filter != rsc->next_role) { crm_debug("Constraint (%s) is not active (role : %s)", constraint->id, role2text(constraint->role_filter)); return; } else if (is_active(constraint) == FALSE) { crm_trace("Constraint (%s) is not active", constraint->id); return; } if (constraint->node_list_rh == NULL) { crm_trace("RHS of constraint %s is NULL", constraint->id); return; } for (gIter = constraint->node_list_rh; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; node_t *other_node = NULL; other_node = (node_t *) pe_hash_table_lookup(rsc->allowed_nodes, node->details->id); if (other_node != NULL) { crm_trace("%s + %s: %d + %d", node->details->uname, other_node->details->uname, node->weight, other_node->weight); other_node->weight = merge_weights(other_node->weight, node->weight); } else { node_t *new_node = node_copy(node); g_hash_table_insert(rsc->allowed_nodes, (gpointer) new_node->details->id, new_node); } } g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { crm_trace("%s + %s : %d", rsc->id, node->details->uname, node->weight); } } void native_expand(resource_t * rsc, pe_working_set_t * data_set) { GListPtr gIter = NULL; crm_trace("Processing actions from %s", rsc->id); for (gIter = rsc->actions; gIter != NULL; gIter = gIter->next) { action_t *action = (action_t *) gIter->data; crm_trace("processing action %d for rsc=%s", action->id, rsc->id); graph_element_from_action(action, data_set); } for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; child_rsc->cmds->expand(child_rsc, data_set); } } void #define log_change(fmt, args...) do { \ if(terminal) { \ printf(" * "fmt"\n", ##args); \ } else { \ crm_notice(fmt, ##args); \ } \ } while(0) LogActions(resource_t * rsc, pe_working_set_t * data_set, gboolean terminal) { node_t *next = NULL; node_t *current = NULL; action_t *stop = NULL; action_t *start = NULL; char *key = NULL; gboolean moving = FALSE; GListPtr possible_matches = NULL; if (rsc->children) { GListPtr gIter = NULL; for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; LogActions(child_rsc, data_set, terminal); } return; } next = rsc->allocated_to; if (rsc->running_on) { if (g_list_length(rsc->running_on) > 1 && rsc->partial_migration_source) { current = rsc->partial_migration_source; } else { current = rsc->running_on->data; } if (rsc->role == RSC_ROLE_STOPPED) { /* * This can occur when resources are being recovered * We fiddle with the current role in native_create_actions() */ rsc->role = RSC_ROLE_STARTED; } } if (current == NULL && is_set(rsc->flags, pe_rsc_orphan)) { /* Don't log stopped orphans */ return; } if (is_not_set(rsc->flags, pe_rsc_managed) || (current == NULL && next == NULL)) { crm_info("Leave %s\t(%s%s)", rsc->id, role2text(rsc->role), is_not_set(rsc->flags, pe_rsc_managed) ? " unmanaged" : ""); return; } if (current != NULL && next != NULL && safe_str_neq(current->details->id, next->details->id)) { moving = TRUE; } key = stop_key(rsc); possible_matches = find_actions(rsc->actions, key, next); free(key); if (possible_matches) { stop = possible_matches->data; g_list_free(possible_matches); } key = start_key(rsc); possible_matches = find_actions(rsc->actions, key, next); free(key); if (possible_matches) { start = possible_matches->data; g_list_free(possible_matches); } if (rsc->role == rsc->next_role) { key = generate_op_key(rsc->id, RSC_MIGRATED, 0); possible_matches = find_actions(rsc->actions, key, next); free(key); CRM_CHECK(next != NULL,); if (next == NULL) { } else if (possible_matches && current) { log_change("Migrate %s\t(%s %s -> %s)", rsc->id, role2text(rsc->role), current->details->uname, next->details->uname); g_list_free(possible_matches); } else if(is_set(rsc->flags, pe_rsc_reload)) { log_change("Reload %s\t(%s %s)", rsc->id, role2text(rsc->role), next->details->uname); } else if (start == NULL || is_set(start->flags, pe_action_optional)) { crm_info("Leave %s\t(%s %s)", rsc->id, role2text(rsc->role), next->details->uname); } else if (moving && current) { log_change("Move %s\t(%s %s -> %s)", rsc->id, role2text(rsc->role), current->details->uname, next->details->uname); } else if (is_set(rsc->flags, pe_rsc_failed)) { log_change("Recover %s\t(%s %s)", rsc->id, role2text(rsc->role), next->details->uname); } else if (start && is_set(start->flags, pe_action_runnable) == FALSE) { log_change("Stop %s\t(%s %s)", rsc->id, role2text(rsc->role), next->details->uname); } else { log_change("Restart %s\t(%s %s)", rsc->id, role2text(rsc->role), next->details->uname); } return; } if (rsc->role > RSC_ROLE_SLAVE && rsc->role > rsc->next_role) { CRM_CHECK(current != NULL,); if (current != NULL) { log_change("Demote %s\t(%s -> %s %s)", rsc->id, role2text(rsc->role), role2text(rsc->next_role), current->details->uname); if (stop != NULL && is_not_set(stop->flags, pe_action_optional) && rsc->next_role > RSC_ROLE_STOPPED) { if (is_set(rsc->flags, pe_rsc_failed)) { log_change("Recover %s\t(%s %s)", rsc->id, role2text(rsc->role), next->details->uname); } else if(is_set(rsc->flags, pe_rsc_reload)) { log_change("Reload %s\t(%s %s)", rsc->id, role2text(rsc->role), next->details->uname); } else { log_change("Restart %s\t(%s %s)", rsc->id, role2text(rsc->role), next->details->uname); } } } } else if (rsc->next_role == RSC_ROLE_STOPPED) { GListPtr gIter = NULL; CRM_CHECK(current != NULL,); for (gIter = rsc->running_on; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; log_change("Stop %s\t(%s)", rsc->id, node->details->uname); } } if (moving) { log_change("Move %s\t(%s %s -> %s)", rsc->id, role2text(rsc->next_role), current->details->uname, next->details->uname); } if (rsc->role == RSC_ROLE_STOPPED) { gboolean allowed = FALSE; if(start && (start->flags & pe_action_runnable)) { allowed = TRUE; } CRM_CHECK(next != NULL,); if (next != NULL) { log_change("Start %s\t(%s%s)", rsc->id, next->details->uname, allowed?"":" - blocked"); } if(allowed == FALSE) { return; } } if (rsc->next_role > RSC_ROLE_SLAVE && rsc->role < rsc->next_role) { CRM_CHECK(next != NULL,); if (stop != NULL && is_not_set(stop->flags, pe_action_optional) && rsc->role > RSC_ROLE_STOPPED) { if (is_set(rsc->flags, pe_rsc_failed)) { log_change("Recover %s\t(%s %s)", rsc->id, role2text(rsc->role), next->details->uname); } else if(is_set(rsc->flags, pe_rsc_reload)) { log_change("Reload %s\t(%s %s)", rsc->id, role2text(rsc->role), next->details->uname); } else { log_change("Restart %s\t(%s %s)", rsc->id, role2text(rsc->role), next->details->uname); } } log_change("Promote %s\t(%s -> %s %s)", rsc->id, role2text(rsc->role), role2text(rsc->next_role), next->details->uname); } } gboolean StopRsc(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set) { GListPtr gIter = NULL; const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); crm_trace("%s", rsc->id); if (rsc->next_role == RSC_ROLE_STOPPED && rsc->variant == pe_native && safe_str_eq(class, "stonith")) { action_t *all_stopped = get_pseudo_op(ALL_STOPPED, data_set); custom_action_order(NULL, strdup(all_stopped->task), all_stopped, rsc, stop_key(rsc), NULL, pe_order_optional | pe_order_stonith_stop, data_set); } for (gIter = rsc->running_on; gIter != NULL; gIter = gIter->next) { node_t *current = (node_t *) gIter->data; action_t *stop; if (rsc->partial_migration_target) { if(rsc->partial_migration_target->details == current->details) { crm_trace("Filtered %s -> %s %s", current->details->uname, next->details->uname, rsc->id); continue; } else { crm_trace("Forced on %s %s", current->details->uname, rsc->id); optional = FALSE; } } crm_trace("%s on %s", rsc->id, current->details->uname); stop = stop_action(rsc, current, optional); if(is_not_set(rsc->flags, pe_rsc_managed)) { update_action_flags(stop, pe_action_runnable|pe_action_clear); } if (is_set(data_set->flags, pe_flag_remove_after_stop)) { DeleteRsc(rsc, current, optional, data_set); } } return TRUE; } gboolean StartRsc(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set) { action_t *start = NULL; crm_trace("%s on %s %d", rsc->id, next?next->details->uname:"N/A", optional); start = start_action(rsc, next, TRUE); if (is_set(start->flags, pe_action_runnable) && optional == FALSE) { update_action_flags(start, pe_action_optional | pe_action_clear); } return TRUE; } gboolean PromoteRsc(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set) { char *key = NULL; GListPtr gIter = NULL; gboolean runnable = TRUE; GListPtr action_list = NULL; crm_trace("%s on %s", rsc->id, next?next->details->uname:"N/A"); CRM_CHECK(next != NULL, return FALSE); key = start_key(rsc); action_list = find_actions_exact(rsc->actions, key, next); free(key); for (gIter = action_list; gIter != NULL; gIter = gIter->next) { action_t *start = (action_t *) gIter->data; if (is_set(start->flags, pe_action_runnable) == FALSE) { runnable = FALSE; } } g_list_free(action_list); if (runnable) { promote_action(rsc, next, optional); return TRUE; } crm_debug("%s\tPromote %s (canceled)", next->details->uname, rsc->id); key = promote_key(rsc); action_list = find_actions_exact(rsc->actions, key, next); free(key); for (gIter = action_list; gIter != NULL; gIter = gIter->next) { action_t *promote = (action_t *) gIter->data; update_action_flags(promote, pe_action_runnable | pe_action_clear); } g_list_free(action_list); return TRUE; } gboolean DemoteRsc(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set) { GListPtr gIter = NULL; crm_trace("%s", rsc->id); /* CRM_CHECK(rsc->next_role == RSC_ROLE_SLAVE, return FALSE); */ for (gIter = rsc->running_on; gIter != NULL; gIter = gIter->next) { node_t *current = (node_t *) gIter->data; crm_trace("%s on %s", rsc->id, next?next->details->uname:"N/A"); demote_action(rsc, current, optional); } return TRUE; } gboolean RoleError(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set) { crm_err("%s on %s", rsc->id, next?next->details->uname:"N/A"); CRM_CHECK(FALSE, return FALSE); return FALSE; } gboolean NullOp(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set) { crm_trace("%s", rsc->id); return FALSE; } gboolean DeleteRsc(resource_t * rsc, node_t * node, gboolean optional, pe_working_set_t * data_set) { action_t *delete = NULL; #if DELETE_THEN_REFRESH action_t *refresh = NULL; #endif if (is_set(rsc->flags, pe_rsc_failed)) { crm_trace("Resource %s not deleted from %s: failed", rsc->id, node->details->uname); return FALSE; } else if (node == NULL) { crm_trace("Resource %s not deleted: NULL node", rsc->id); return FALSE; } else if (node->details->unclean || node->details->online == FALSE) { crm_trace("Resource %s not deleted from %s: unrunnable", rsc->id, node->details->uname); return FALSE; } crm_notice("Removing %s from %s", rsc->id, node->details->uname); delete = delete_action(rsc, node, optional); new_rsc_order(rsc, RSC_STOP, rsc, RSC_DELETE, optional ? pe_order_implies_then : pe_order_optional, data_set); new_rsc_order(rsc, RSC_DELETE, rsc, RSC_START, optional ? pe_order_implies_then : pe_order_optional, data_set); #if DELETE_THEN_REFRESH refresh = custom_action(NULL, strdup(CRM_OP_LRM_REFRESH), CRM_OP_LRM_REFRESH, node, FALSE, TRUE, data_set); add_hash_param(refresh->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE); order_actions(delete, refresh, pe_order_optional); #endif return TRUE; } #include <../lib/pengine/unpack.h> static node_t * probe_grouped_clone(resource_t * rsc, node_t * node, pe_working_set_t * data_set) { node_t *running = NULL; resource_t *top = uber_parent(rsc); if (running == NULL && is_set(top->flags, pe_rsc_unique) == FALSE) { /* Annoyingly we also need to check any other clone instances * Clumsy, but it will work. * * An alternative would be to update known_on for every peer * during process_rsc_state() * * This code desperately needs optimization * ptest -x with 100 nodes, 100 clones and clone-max=10: * No probes O(25s) * Detection without clone loop O(3m) * Detection with clone loop O(8m) ptest[32211]: 2010/02/18_14:27:55 CRIT: stage5: Probing for unknown resources ptest[32211]: 2010/02/18_14:33:39 CRIT: stage5: Done ptest[32211]: 2010/02/18_14:35:05 CRIT: stage7: Updating action states ptest[32211]: 2010/02/18_14:35:05 CRIT: stage7: Done */ char *clone_id = clone_zero(rsc->id); resource_t *peer = pe_find_resource(top->children, clone_id); while (peer && running == NULL) { running = pe_hash_table_lookup(peer->known_on, node->details->id); if (running != NULL) { /* we already know the status of the resource on this node */ crm_trace("Skipping active clone: %s", rsc->id); free(clone_id); return running; } clone_id = increment_clone(clone_id); peer = pe_find_resource(data_set->resources, clone_id); } free(clone_id); } return running; } gboolean native_create_probe(resource_t * rsc, node_t * node, action_t * complete, gboolean force, pe_working_set_t * data_set) { char *key = NULL; action_t *probe = NULL; node_t *running = NULL; resource_t *top = uber_parent(rsc); static const char *rc_master = NULL; static const char *rc_inactive = NULL; if (rc_inactive == NULL) { rc_inactive = crm_itoa(PCMK_EXECRA_NOT_RUNNING); rc_master = crm_itoa(PCMK_EXECRA_RUNNING_MASTER); } CRM_CHECK(node != NULL, return FALSE); if (force == FALSE && is_not_set(data_set->flags, pe_flag_startup_probes)) { crm_trace("Skipping active resource detection for %s", rsc->id); return FALSE; } if (rsc->children) { GListPtr gIter = NULL; gboolean any_created = FALSE; for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; any_created = child_rsc->cmds->create_probe(child_rsc, node, complete, force, data_set) || any_created; } return any_created; } if (is_set(rsc->flags, pe_rsc_orphan)) { crm_trace("Skipping orphan: %s", rsc->id); return FALSE; } running = g_hash_table_lookup(rsc->known_on, node->details->id); if (running == NULL && is_set(rsc->flags, pe_rsc_unique) == FALSE) { /* Anonymous clones */ if (rsc->parent == top) { running = g_hash_table_lookup(rsc->parent->known_on, node->details->id); } else { /* Grouped anonymous clones need extra special handling */ running = probe_grouped_clone(rsc, node, data_set); } } if (force == FALSE && running != NULL) { /* we already know the status of the resource on this node */ crm_trace("Skipping active: %s", rsc->id); return FALSE; } key = generate_op_key(rsc->id, RSC_STATUS, 0); probe = custom_action(rsc, key, RSC_STATUS, node, FALSE, TRUE, data_set); update_action_flags(probe, pe_action_optional | pe_action_clear); /* * We need to know if it's running_on (not just known_on) this node * to correctly determine the target rc. */ running = pe_find_node_id(rsc->running_on, node->details->id); if (running == NULL) { add_hash_param(probe->meta, XML_ATTR_TE_TARGET_RC, rc_inactive); } else if (rsc->role == RSC_ROLE_MASTER) { add_hash_param(probe->meta, XML_ATTR_TE_TARGET_RC, rc_master); } crm_debug("Probing %s on %s (%s)", rsc->id, node->details->uname, role2text(rsc->role)); order_actions(probe, complete, pe_order_implies_then); return TRUE; } static void native_start_constraints(resource_t * rsc, action_t * stonith_op, gboolean is_stonith, pe_working_set_t * data_set) { node_t *target = stonith_op ? stonith_op->node : NULL; if (is_stonith) { char *key = start_key(rsc); action_t *ready = get_pseudo_op(STONITH_UP, data_set); crm_trace("Ordering %s action before stonith events", key); custom_action_order(rsc, key, NULL, NULL, strdup(ready->task), ready, pe_order_optional | pe_order_implies_then, data_set); } else { GListPtr gIter = NULL; action_t *all_stopped = get_pseudo_op(ALL_STOPPED, data_set); action_t *stonith_done = get_pseudo_op(STONITH_DONE, data_set); for (gIter = rsc->actions; gIter != NULL; gIter = gIter->next) { action_t *action = (action_t *) gIter->data; if (action->needs == rsc_req_stonith) { order_actions(stonith_done, action, pe_order_optional); } else if (target != NULL && safe_str_eq(action->task, RSC_START) && NULL == pe_hash_table_lookup(rsc->known_on, target->details->id)) { /* if known == NULL, then we dont know if * the resource is active on the node * we're about to shoot * * in this case, regardless of action->needs, * the only safe option is to wait until * the node is shot before doing anything * to with the resource * * its analogous to waiting for all the probes * for rscX to complete before starting rscX * * the most likely explaination is that the * DC died and took its status with it */ crm_debug("Ordering %s after %s recovery", action->uuid, target->details->uname); order_actions(all_stopped, action, pe_order_optional | pe_order_runnable_left); } } } } static void native_stop_constraints(resource_t * rsc, action_t * stonith_op, gboolean is_stonith, pe_working_set_t * data_set) { char *key = NULL; GListPtr gIter = NULL; GListPtr action_list = NULL; resource_t *top = uber_parent(rsc); key = stop_key(rsc); action_list = find_actions(rsc->actions, key, stonith_op->node); free(key); /* add the stonith OP as a stop pre-req and the mark the stop * as a pseudo op - since its now redundant */ for (gIter = action_list; gIter != NULL; gIter = gIter->next) { action_t *action = (action_t *) gIter->data; if (action->node->details->online && action->node->details->unclean == FALSE && is_set(rsc->flags, pe_rsc_failed)) { continue; } if (is_set(rsc->flags, pe_rsc_failed)) { crm_notice("Stop of failed resource %s is" " implicit after %s is fenced", rsc->id, action->node->details->uname); } else { crm_info("%s is implicit after %s is fenced", action->uuid, action->node->details->uname); } /* the stop would never complete and is * now implied by the stonith operation */ update_action_flags(action, pe_action_pseudo); update_action_flags(action, pe_action_runnable); update_action_flags(action, pe_action_implied_by_stonith); if (is_stonith == FALSE) { action_t *parent_stop = find_first_action(top->actions, NULL, RSC_STOP, NULL); order_actions(stonith_op, action, pe_order_optional); order_actions(stonith_op, parent_stop, pe_order_optional); } if (is_set(rsc->flags, pe_rsc_notify)) { /* Create a second notification that will be delivered * immediately after the node is fenced * * Basic problem: * - C is a clone active on the node to be shot and stopping on another * - R is a resource that depends on C * * + C.stop depends on R.stop * + C.stopped depends on STONITH * + C.notify depends on C.stopped * + C.healthy depends on C.notify * + R.stop depends on C.healthy * * The extra notification here changes * + C.healthy depends on C.notify * into: * + C.healthy depends on C.notify' * + C.notify' depends on STONITH' * thus breaking the loop */ notify_data_t *n_data = create_notification_boundaries(rsc, RSC_STOP, NULL, stonith_op, data_set); crm_info("Creating secondary notification for %s", action->uuid); collect_notification_data(rsc, TRUE, FALSE, n_data); g_hash_table_insert(n_data->keys, strdup("notify_stop_resource"), strdup(rsc->id)); g_hash_table_insert(n_data->keys, strdup("notify_stop_uname"), strdup(action->node->details->uname)); create_notifications(uber_parent(rsc), n_data, data_set); free_notification_data(n_data); } /* From Bug #1601, successful fencing must be an input to a failed resources stop action. However given group(rA, rB) running on nodeX and B.stop has failed, A := stop healthy resource (rA.stop) B := stop failed resource (pseudo operation B.stop) C := stonith nodeX A requires B, B requires C, C requires A This loop would prevent the cluster from making progress. This block creates the "C requires A" dependency and therefore must (at least for now) be disabled. Instead, run the block above and treat all resources on nodeX as B would be (marked as a pseudo op depending on the STONITH). TODO: Break the "A requires B" dependency in update_action() and re-enable this block } else if(is_stonith == FALSE) { crm_info("Moving healthy resource %s" " off %s before fencing", rsc->id, node->details->uname); * stop healthy resources before the * stonith op * custom_action_order( rsc, stop_key(rsc), NULL, NULL,strdup(CRM_OP_FENCE),stonith_op, pe_order_optional, data_set); */ } g_list_free(action_list); key = demote_key(rsc); action_list = find_actions(rsc->actions, key, stonith_op->node); free(key); for (gIter = action_list; gIter != NULL; gIter = gIter->next) { action_t *action = (action_t *) gIter->data; if (action->node->details->online == FALSE || action->node->details->unclean == TRUE || is_set(rsc->flags, pe_rsc_failed)) { if (is_set(rsc->flags, pe_rsc_failed)) { crm_info("Demote of failed resource %s is" " implict after %s is fenced", rsc->id, action->node->details->uname); } else { crm_info("%s is implicit after %s is fenced", action->uuid, action->node->details->uname); } /* the stop would never complete and is * now implied by the stonith operation */ crm_trace("here - 1"); update_action_flags(action, pe_action_pseudo); update_action_flags(action, pe_action_runnable); if (is_stonith == FALSE) { order_actions(stonith_op, action, pe_order_optional); } } } g_list_free(action_list); } void rsc_stonith_ordering(resource_t * rsc, action_t * stonith_op, pe_working_set_t * data_set) { gboolean is_stonith = FALSE; const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); if (rsc->children) { GListPtr gIter = NULL; for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; rsc_stonith_ordering(child_rsc, stonith_op, data_set); } return; } if (is_not_set(rsc->flags, pe_rsc_managed)) { crm_trace("Skipping fencing constraints for unmanaged resource: %s", rsc->id); return; } if (stonith_op != NULL && safe_str_eq(class, "stonith")) { is_stonith = TRUE; } /* Start constraints */ native_start_constraints(rsc, stonith_op, is_stonith, data_set); /* Stop constraints */ native_stop_constraints(rsc, stonith_op, is_stonith, data_set); } enum stack_activity { stack_stable = 0, stack_starting = 1, stack_stopping = 2, stack_middle = 4, }; static enum stack_activity find_clone_activity_on(resource_t * rsc, resource_t * target, node_t * node, const char *type) { int mode = stack_stable; action_t *active = NULL; if (target->children) { GListPtr gIter = NULL; for (gIter = target->children; gIter != NULL; gIter = gIter->next) { resource_t *child = (resource_t *) gIter->data; mode |= find_clone_activity_on(rsc, child, node, type); } return mode; } active = find_first_action(target->actions, NULL, RSC_START, NULL); if (active && is_set(active->flags, pe_action_optional) == FALSE && is_set(active->flags, pe_action_pseudo) == FALSE) { crm_debug("%s: found scheduled %s action (%s)", rsc->id, active->uuid, type); mode |= stack_starting; } active = find_first_action(target->actions, NULL, RSC_STOP, node); if (active && is_set(active->flags, pe_action_optional) == FALSE && is_set(active->flags, pe_action_pseudo) == FALSE) { crm_debug("%s: found scheduled %s action (%s)", rsc->id, active->uuid, type); mode |= stack_stopping; } return mode; } static enum stack_activity check_stack_element(resource_t * rsc, resource_t * other_rsc, const char *type) { resource_t *other_p = uber_parent(other_rsc); if (other_rsc == NULL || other_rsc == rsc) { return stack_stable; } else if (other_p->variant == pe_native) { crm_notice("Cannot migrate %s due to dependency on %s (%s)", rsc->id, other_rsc->id, type); return stack_middle; } else if (other_rsc == rsc->parent) { int mode = 0; GListPtr gIter = NULL; for (gIter = other_rsc->rsc_cons; gIter != NULL; gIter = gIter->next) { rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data; if (constraint->score > 0) { mode |= check_stack_element(rsc, constraint->rsc_rh, type); } } return mode; } else if (other_p->variant == pe_group) { crm_notice("Cannot migrate %s due to dependency on group %s (%s)", rsc->id, other_rsc->id, type); return stack_middle; } /* else: >= clone */ /* ## Assumption A depends on clone(B) ## Resource Activity During Move N1 N2 N3 --- --- --- t0 A.stop t1 B.stop B.stop t2 B.start B.start t3 A.start ## Resource Activity During Migration N1 N2 N3 --- --- --- t0 B.start B.start t1 A.stop (1) t2 A.start (2) t3 B.stop B.stop Node 1: Rewritten to be a migrate-to operation Node 2: Rewritten to be a migrate-from operation # Constraints The following constraints already exist in the system. The 'ok' and 'fail' column refers to whether they still hold for migration. a) A.stop -> A.start - ok b) B.stop -> B.start - fail c) A.stop -> B.stop - ok d) B.start -> A.start - ok e) B.stop -> A.start - fail f) A.stop -> B.start - fail ## Scenarios B unchanged - ok B stopping only - fail - possible after fixing 'e' B starting only - fail - possible after fixing 'f' B stoping and starting - fail - constraint 'b' is unfixable B restarting only on N2 - fail - as-per previous only rarer */ /* Only allow migration when the clone is either stable, only starting or only stopping */ return find_clone_activity_on(rsc, other_rsc, NULL, type); } static gboolean at_stack_bottom(resource_t * rsc) { char *key = NULL; action_t *start = NULL; action_t *other = NULL; int mode = stack_stable; GListPtr action_list = NULL; GListPtr gIter = NULL; key = start_key(rsc); action_list = find_actions(rsc->actions, key, NULL); free(key); crm_trace("%s: processing", rsc->id); CRM_CHECK(action_list != NULL, return FALSE); start = action_list->data; g_list_free(action_list); for (gIter = rsc->rsc_cons; gIter != NULL; gIter = gIter->next) { rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data; resource_t *target = constraint->rsc_rh; crm_trace("Checking %s: %s == %s (%d)", constraint->id, rsc->id, target->id, constraint->score); if (constraint->score > 0) { mode |= check_stack_element(rsc, target, "coloc"); if (mode & stack_middle) { return FALSE; } else if ((mode & stack_stopping) && (mode & stack_starting)) { crm_notice("Cannot migrate %s due to colocation activity (last was %s)", rsc->id, target->id); return FALSE; } } } for (gIter = start->actions_before; gIter != NULL; gIter = gIter->next) { action_wrapper_t *other_w = (action_wrapper_t *) gIter->data; other = other_w->action; if (other_w->type & pe_order_serialize_only) { crm_trace("%s: depends on %s (serialize ordering)", rsc->id, other->uuid); continue; } crm_trace("%s: Checking %s ordering", rsc->id, other->uuid); if (is_set(other->flags, pe_action_optional) == FALSE) { mode |= check_stack_element(rsc, other->rsc, "order"); if (mode & stack_middle) { return FALSE; } else if ((mode & stack_stopping) && (mode & stack_starting)) { crm_notice("Cannot migrate %s due to ordering activity (last was %s)", rsc->id, other->rsc->id); return FALSE; } } } return TRUE; } static action_t * get_first_named_action(resource_t *rsc, const char *action, gboolean only_valid, node_t *current) { action_t *a = NULL; GListPtr action_list = NULL; char *key = generate_op_key(rsc->id, action, 0); action_list = find_actions(rsc->actions, key, current); if (action_list == NULL || action_list->data == NULL) { crm_trace("%s: no %s action", rsc->id, action); free(key); return NULL; } a = action_list->data; g_list_free(action_list); if(only_valid && is_set(a->flags, pe_action_pseudo)) { crm_trace("%s: pseudo", key); a = NULL; } else if(only_valid && is_not_set(a->flags, pe_action_runnable)) { crm_trace("%s: runnable", key); a = NULL; } free(key); return a; } static void MigrateRsc(resource_t * rsc, action_t *stop, action_t *start, pe_working_set_t * data_set, gboolean partial) { action_t *to = NULL; action_t *from = NULL; action_t *then = NULL; action_t *other = NULL; action_t *done = get_pseudo_op(STONITH_DONE, data_set); GListPtr gIter = NULL; const char *value = g_hash_table_lookup(rsc->meta, XML_OP_ATTR_ALLOW_MIGRATE); crm_trace("%s %s -> %s", rsc->id, stop->node->details->uname, start->node->details->uname); if (crm_is_true(value) == FALSE) { return; } if (rsc->next_role > RSC_ROLE_SLAVE) { crm_trace("%s: resource role: role=%s", rsc->id, role2text(rsc->next_role)); return; } if(start == NULL || stop == NULL) { crm_trace("%s: not exists %p -> %p", rsc->id, stop, start); return; } else if (start->node == NULL || stop->node == NULL) { crm_trace("%s: no node %p -> %p", rsc->id, stop->node, start->node); return; } else if(is_set(stop->flags, pe_action_optional)) { crm_trace("%s: stop action", rsc->id); return; } else if(is_set(start->flags, pe_action_optional)) { crm_trace("%s: start action", rsc->id); return; } else if (stop->node->details == start->node->details) { crm_trace("%s: not moving %p -> %p", rsc->id, stop->node, start->node); return; } else if (at_stack_bottom(rsc) == FALSE) { crm_trace("%s: not at stack bottom", rsc->id); return; } if (partial) { crm_info("Completing partial migration of %s from %s to %s", rsc->id, stop->node ? stop->node->details->uname : "unknown", start->node ? start->node->details->uname : "unknown"); } else { crm_info("Migrating %s from %s to %s", rsc->id, stop->node ? stop->node->details->uname : "unknown", start->node ? start->node->details->uname : "unknown"); } /* Preserve the stop to ensure the end state is sane on that node, * Make the start a pseudo op * Create migrate_to, have it depend on everything the stop did * Create migrate_from * *-> migrate_to -> migrate_from -> stop -> start */ update_action_flags(start, pe_action_pseudo); /* easier than trying to delete it from the graph * but perhaps we should have it run anyway */ if (!partial) { to = custom_action(rsc, generate_op_key(rsc->id, RSC_MIGRATE, 0), RSC_MIGRATE, stop->node, FALSE, TRUE, data_set); } from = custom_action(rsc, generate_op_key(rsc->id, RSC_MIGRATED, 0), RSC_MIGRATED, start->node, FALSE, TRUE, data_set); /* This is slightly sub-optimal if 'to' fails, but always * run both halves of the migration before terminating the * transition. * * This can be removed if/when we update unpack_rsc_op() to * 'correctly' handle partial migrations. * * Without this, we end up stopping both sides */ from->priority = INFINITY; if (!partial) { order_actions(to, from, pe_order_optional); add_hash_param(to->meta, XML_LRM_ATTR_MIGRATE_SOURCE, stop->node->details->uname); add_hash_param(to->meta, XML_LRM_ATTR_MIGRATE_TARGET, start->node->details->uname); } then = to ? to : from; order_actions(from, stop, pe_order_optional); order_actions(done, then, pe_order_optional); add_hash_param(from->meta, XML_LRM_ATTR_MIGRATE_SOURCE, stop->node->details->uname); add_hash_param(from->meta, XML_LRM_ATTR_MIGRATE_TARGET, start->node->details->uname); /* Create the correct ordering ajustments based on find_clone_activity_on(); */ for (gIter = rsc->rsc_cons; gIter != NULL; gIter = gIter->next) { rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data; resource_t *target = constraint->rsc_rh; crm_info("Repairing %s: %s == %s (%d)", constraint->id, rsc->id, target->id, constraint->score); if (constraint->score > 0) { int mode = check_stack_element(rsc, target, "coloc"); action_t *clone_stop = find_first_action(target->actions, NULL, RSC_STOP, NULL); action_t *clone_start = find_first_action(target->actions, NULL, RSC_STARTED, NULL); CRM_ASSERT(clone_stop != NULL); CRM_ASSERT(clone_start != NULL); CRM_ASSERT((mode & stack_middle) == 0); CRM_ASSERT(((mode & stack_stopping) && (mode & stack_starting)) == 0); if (mode & stack_stopping) { #if 0 crm_debug("Creating %s.start -> %s.stop ordering", rsc->id, target->id); order_actions(from, clone_stop, pe_order_optional); #endif GListPtr lpc2 = NULL; for (lpc2 = start->actions_before; lpc2 != NULL; lpc2 = lpc2->next) { action_wrapper_t *other_w = (action_wrapper_t *) lpc2->data; /* Needed if the clone's started pseudo-action ever gets printed in the graph */ if (other_w->action == clone_start) { crm_debug("Breaking %s -> %s ordering", other_w->action->uuid, start->uuid); other_w->type = pe_order_none; } } } else if (mode & stack_starting) { #if 0 crm_debug("Creating %s.started -> %s.stop ordering", target->id, rsc->id); order_actions(clone_start, to, pe_order_optional); #endif GListPtr lpc2 = NULL; for (lpc2 = clone_stop->actions_before; lpc2 != NULL; lpc2 = lpc2->next) { action_wrapper_t *other_w = (action_wrapper_t *) lpc2->data; /* Needed if the clone's stop pseudo-action ever gets printed in the graph */ if (other_w->action == stop) { crm_debug("Breaking %s -> %s ordering", other_w->action->uuid, clone_stop->uuid); other_w->type = pe_order_none; } } } } } #if 0 /* Implied now that start/stop are not morphed into migrate ops */ /* Anything that needed stop to complete, now also needs start to have completed */ for (gIter = stop->actions_after; gIter != NULL; gIter = gIter->next) { action_wrapper_t *other_w = (action_wrapper_t *) gIter->data; other = other_w->action; if (is_set(other->flags, pe_action_optional) || other->rsc != NULL) { continue; } crm_debug("Ordering %s before %s (stop)", from->uuid, other->uuid); order_actions(from, other, other_w->type); } #endif /* migrate 'then' action also needs anything that the stop needed to have completed too */ for (gIter = stop->actions_before; gIter != NULL; gIter = gIter->next) { action_wrapper_t *other_w = (action_wrapper_t *) gIter->data; other = other_w->action; if (other->rsc == NULL) { /* nothing */ } else if (is_set(other->flags, pe_action_optional) || other->rsc == rsc || other->rsc == rsc->parent) { continue; } crm_debug("Ordering %s before %s (stop)", other_w->action->uuid, stop->uuid); order_actions(other, then, other_w->type); } /* migrate 'then' action also needs anything that the start needed to have completed too */ for (gIter = start->actions_before; gIter != NULL; gIter = gIter->next) { action_wrapper_t *other_w = (action_wrapper_t *) gIter->data; other = other_w->action; if (other->rsc == NULL) { /* nothing */ } else if (is_set(other->flags, pe_action_optional) || other->rsc == rsc || other->rsc == rsc->parent) { continue; } crm_debug("Ordering %s before %s (start)", other_w->action->uuid, stop->uuid); order_actions(other, then, other_w->type); } } static void ReloadRsc(resource_t * rsc, action_t *stop, action_t *start, pe_working_set_t * data_set) { action_t *action = NULL; action_t *rewrite = NULL; if(is_not_set(rsc->flags, pe_rsc_try_reload)) { return; } else if(is_not_set(stop->flags, pe_action_optional)) { crm_trace("%s: stop action", rsc->id); return; } else if(is_not_set(start->flags, pe_action_optional)) { crm_trace("%s: start action", rsc->id); return; } crm_trace("%s on %s", rsc->id, stop->node->details->uname); action = get_first_named_action(rsc, RSC_PROMOTE, TRUE, NULL); if (action && is_set(action->flags, pe_action_optional) == FALSE) { update_action_flags(action, pe_action_pseudo); } action = get_first_named_action(rsc, RSC_DEMOTE, TRUE, NULL); if (action && is_set(action->flags, pe_action_optional) == FALSE) { rewrite = action; update_action_flags(stop, pe_action_pseudo); } else { rewrite = start; } crm_info("Rewriting %s of %s on %s as a reload", rewrite->task, rsc->id, stop->node->details->uname); set_bit(rsc->flags, pe_rsc_reload); update_action_flags(rewrite, pe_action_optional|pe_action_clear); free(rewrite->uuid); free(rewrite->task); rewrite->task = strdup("reload"); rewrite->uuid = generate_op_key(rsc->id, rewrite->task, 0); } void rsc_migrate_reload(resource_t * rsc, pe_working_set_t * data_set) { GListPtr gIter = NULL; action_t *stop = NULL; action_t *start = NULL; gboolean partial = FALSE; if (rsc->children) { for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; rsc_migrate_reload(child_rsc, data_set); } return; } else if (rsc->variant > pe_native) { return; } crm_trace("Processing %s", rsc->id); if (rsc->partial_migration_target) { start = get_first_named_action(rsc, RSC_START, TRUE, rsc->partial_migration_target); stop = get_first_named_action(rsc, RSC_STOP, TRUE, rsc->partial_migration_source); if (start && stop) { partial = TRUE; } } crm_trace("%s %s %p", rsc->id, partial?"partial":"full", stop); if (!partial) { stop = get_first_named_action(rsc, RSC_STOP, TRUE, rsc->running_on ? rsc->running_on->data : NULL); start = get_first_named_action(rsc, RSC_START, TRUE, NULL); } if (is_not_set(rsc->flags, pe_rsc_managed) || is_set(rsc->flags, pe_rsc_failed) || is_set(rsc->flags, pe_rsc_start_pending) || rsc->next_role < RSC_ROLE_STARTED || ((g_list_length(rsc->running_on) != 1) && !partial)) { crm_trace("%s: general resource state: flags=0x%.16llx", rsc->id, rsc->flags); return; } if(stop == NULL) { return; } else if (is_set(stop->flags, pe_action_optional) && is_set(rsc->flags, pe_rsc_try_reload)) { ReloadRsc(rsc, stop, start, data_set); } else if(is_not_set(stop->flags, pe_action_optional)) { MigrateRsc(rsc, stop, start, data_set, partial); } } void native_append_meta(resource_t * rsc, xmlNode * xml) { char *value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INCARNATION); if (value) { char *name = NULL; name = crm_meta_name(XML_RSC_ATTR_INCARNATION); crm_xml_add(xml, name, value); free(name); } } diff --git a/pengine/ptest.c b/pengine/ptest.c index 1197d8f5c1..260137fe72 100644 --- a/pengine/ptest.c +++ b/pengine/ptest.c @@ -1,510 +1,510 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if HAVE_LIBXML2 # include #endif gboolean use_stdin = FALSE; gboolean do_simulation = FALSE; gboolean inhibit_exit = FALSE; gboolean all_actions = FALSE; extern xmlNode *do_calculations(pe_working_set_t * data_set, xmlNode * xml_input, ha_time_t * now); extern void cleanup_calculations(pe_working_set_t * data_set); char *use_date = NULL; FILE *dot_strm = NULL; #define DOT_PREFIX "PE_DOT: " /* #define DOT_PREFIX "" */ #define dot_write(fmt...) if(dot_strm != NULL) { \ fprintf(dot_strm, fmt); \ fprintf(dot_strm, "\n"); \ } else { \ crm_debug(DOT_PREFIX""fmt); \ } static void init_dotfile(void) { dot_write(" digraph \"g\" {"); /* dot_write(" size = \"30,30\""); */ /* dot_write(" graph ["); */ /* dot_write(" fontsize = \"12\""); */ /* dot_write(" fontname = \"Times-Roman\""); */ /* dot_write(" fontcolor = \"black\""); */ /* dot_write(" bb = \"0,0,398.922306,478.927856\""); */ /* dot_write(" color = \"black\""); */ /* dot_write(" ]"); */ /* dot_write(" node ["); */ /* dot_write(" fontsize = \"12\""); */ /* dot_write(" fontname = \"Times-Roman\""); */ /* dot_write(" fontcolor = \"black\""); */ /* dot_write(" shape = \"ellipse\""); */ /* dot_write(" color = \"black\""); */ /* dot_write(" ]"); */ /* dot_write(" edge ["); */ /* dot_write(" fontsize = \"12\""); */ /* dot_write(" fontname = \"Times-Roman\""); */ /* dot_write(" fontcolor = \"black\""); */ /* dot_write(" color = \"black\""); */ /* dot_write(" ]"); */ } static char * create_action_name(action_t * action) { char *action_name = NULL; const char *action_host = NULL; if (action->node) { action_host = action->node->details->uname; action_name = crm_concat(action->uuid, action_host, ' '); } else if (is_set(action->flags, pe_action_pseudo)) { action_name = strdup(action->uuid); } else { action_host = ""; action_name = crm_concat(action->uuid, action_host, ' '); } if (safe_str_eq(action->task, RSC_CANCEL)) { char *tmp_action_name = action_name; action_name = crm_concat("Cancel", tmp_action_name, ' '); free(tmp_action_name); } return action_name; } gboolean USE_LIVE_CIB = FALSE; /* *INDENT-OFF* */ static struct crm_option long_options[] = { /* Top-level Options */ {"help", 0, 0, '?', "This text"}, {"version", 0, 0, '$', "Version information" }, {"verbose", 0, 0, 'V', "Increase debug output\n"}, {"simulate", 0, 0, 'S', "Simulate the transition's execution to find invalid graphs\n"}, {"show-scores", 0, 0, 's', "Display resource allocation scores"}, {"show-utilization", 0, 0, 'U', "Display utilization information"}, {"all-actions", 0, 0, 'a', "Display all possible actions - even ones not part of the transition graph"}, {"live-check", 0, 0, 'L', "Connect to the CIB and use the current contents as input"}, {"xml-text", 1, 0, 'X', "Retrieve XML from the supplied string"}, {"xml-file", 1, 0, 'x', "Retrieve XML from the named file"}, /* {"xml-pipe", 0, 0, 'p', "Retrieve XML from stdin\n"}, */ {"save-input", 1, 0, 'I', "\tSave the input to the named file"}, {"save-graph", 1, 0, 'G', "\tSave the transition graph (XML format) to the named file"}, {"save-dotfile",1, 0, 'D', "Save the transition graph (DOT format) to the named file\n"}, {0, 0, 0, 0} }; /* *INDENT-ON* */ int main(int argc, char **argv) { GListPtr lpc = NULL; gboolean process = TRUE; gboolean all_good = TRUE; enum transition_status graph_rc = -1; crm_graph_t *transition = NULL; ha_time_t *a_date = NULL; cib_t *cib_conn = NULL; xmlNode *cib_object = NULL; int argerr = 0; int flag; char *msg_buffer = NULL; gboolean optional = FALSE; pe_working_set_t data_set; const char *source = NULL; const char *xml_file = NULL; const char *dot_file = NULL; const char *graph_file = NULL; const char *input_file = NULL; const char *input_xml = NULL; /* disable glib's fancy allocators that can't be free'd */ GMemVTable vtable; vtable.malloc = malloc; vtable.realloc = realloc; vtable.free = free; vtable.calloc = calloc; vtable.try_malloc = malloc; vtable.try_realloc = realloc; g_mem_set_vtable(&vtable); crm_log_cli_init("ptest"); crm_set_options(NULL, "[-?Vv] -[Xxp] {other options}", long_options, "Calculate the cluster's response to the supplied cluster state\n" "\nSuperceeded by crm_simulate and likely to be removed in a future release\n\n"); while (1) { int option_index = 0; flag = crm_get_option(argc, argv, &option_index); if (flag == -1) break; switch (flag) { case 'S': do_simulation = TRUE; break; case 'a': all_actions = TRUE; break; case 'w': inhibit_exit = TRUE; break; case 'X': /*use_stdin = TRUE;*/ input_xml = optarg; break; case 's': show_scores = TRUE; break; case 'U': show_utilization = TRUE; break; case 'x': xml_file = optarg; break; case 'd': use_date = optarg; break; case 'D': dot_file = optarg; break; case 'G': graph_file = optarg; break; case 'I': input_file = optarg; break; case 'V': crm_bump_log_level(); break; case 'L': USE_LIVE_CIB = TRUE; break; case '$': case '?': crm_help(flag, 0); break; default: fprintf(stderr, "Option -%c is not yet supported\n", flag); ++argerr; break; } } if (optind < argc) { printf("non-option ARGV-elements: "); while (optind < argc) { printf("%s ", argv[optind++]); } printf("\n"); } if (optind > argc) { ++argerr; } if (argerr) { crm_err("%d errors in option parsing", argerr); crm_help('?', 1); } if (USE_LIVE_CIB) { int rc = pcmk_ok; source = "live cib"; cib_conn = cib_new(); rc = cib_conn->cmds->signon(cib_conn, "ptest", cib_command); if (rc == pcmk_ok) { crm_info("Reading XML from: live cluster"); cib_object = get_cib_copy(cib_conn); } else { fprintf(stderr, "Live CIB query failed: %s\n", pcmk_strerror(rc)); return 3; } if (cib_object == NULL) { fprintf(stderr, "Live CIB query failed: empty result\n"); return 3; } } else if (xml_file != NULL) { source = xml_file; cib_object = filename2xml(xml_file); } else if (use_stdin) { source = "stdin"; cib_object = filename2xml(NULL); } else if (input_xml) { source = "input string"; cib_object = string2xml(input_xml); } if (cib_object == NULL && source) { fprintf(stderr, "Could not parse configuration input from: %s\n", source); return 4; } else if (cib_object == NULL) { fprintf(stderr, "No configuration specified\n"); crm_help('?', 1); } if (get_object_root(XML_CIB_TAG_STATUS, cib_object) == NULL) { create_xml_node(cib_object, XML_CIB_TAG_STATUS); } if (cli_config_update(&cib_object, NULL, FALSE) == FALSE) { free_xml(cib_object); return -ENOKEY; } if (validate_xml(cib_object, NULL, FALSE) != TRUE) { free_xml(cib_object); return -pcmk_err_dtd_validation; } if (input_file != NULL) { FILE *input_strm = fopen(input_file, "w"); if (input_strm == NULL) { crm_perror(LOG_ERR, "Could not open %s for writing", input_file); } else { msg_buffer = dump_xml_formatted(cib_object); if (fprintf(input_strm, "%s\n", msg_buffer) < 0) { crm_perror(LOG_ERR, "Write to %s failed", input_file); } fflush(input_strm); fclose(input_strm); free(msg_buffer); } } if (use_date != NULL) { a_date = parse_date(&use_date); log_date(LOG_WARNING, "Set fake 'now' to", a_date, ha_log_date | ha_log_time); log_date(LOG_WARNING, "Set fake 'now' to (localtime)", a_date, ha_log_date | ha_log_time | ha_log_local); } set_working_set_defaults(&data_set); if (process) { if (show_scores && show_utilization) { fprintf(stdout, "Allocation scores and utilization information:\n"); } else if (show_scores) { fprintf(stdout, "Allocation scores:\n"); } else if (show_utilization) { fprintf(stdout, "Utilization information:\n"); } do_calculations(&data_set, cib_object, a_date); } msg_buffer = dump_xml_formatted(data_set.graph); if (safe_str_eq(graph_file, "-")) { fprintf(stdout, "%s\n", msg_buffer); fflush(stdout); } else if (graph_file != NULL) { FILE *graph_strm = fopen(graph_file, "w"); if (graph_strm == NULL) { crm_perror(LOG_ERR, "Could not open %s for writing", graph_file); } else { if (fprintf(graph_strm, "%s\n\n", msg_buffer) < 0) { crm_perror(LOG_ERR, "Write to %s failed", graph_file); } fflush(graph_strm); fclose(graph_strm); } } free(msg_buffer); if (dot_file != NULL) { dot_strm = fopen(dot_file, "w"); if (dot_strm == NULL) { crm_perror(LOG_ERR, "Could not open %s for writing", dot_file); } } if (dot_strm == NULL) { goto simulate; } init_dotfile(); for (lpc = data_set.actions; lpc != NULL; lpc = lpc->next) { action_t *action = (action_t *) lpc->data; const char *style = "filled"; const char *font = "black"; const char *color = "black"; const char *fill = NULL; char *action_name = create_action_name(action); crm_trace("Action %d: %p", action->id, action); if (is_set(action->flags, pe_action_pseudo)) { font = "orange"; } style = "dashed"; if (is_set(action->flags, pe_action_dumped)) { style = "bold"; color = "green"; } else if (action->rsc != NULL && is_not_set(action->rsc->flags, pe_rsc_managed)) { color = "purple"; if (all_actions == FALSE) { goto dont_write; } } else if (is_set(action->flags, pe_action_optional)) { color = "blue"; if (all_actions == FALSE) { goto dont_write; } } else { color = "red"; CRM_CHECK(is_set(action->flags, pe_action_runnable) == FALSE,; ); } - set_bit_inplace(action->flags, pe_action_dumped); + set_bit(action->flags, pe_action_dumped); dot_write("\"%s\" [ style=%s color=\"%s\" fontcolor=\"%s\" %s%s]", action_name, style, color, font, fill ? "fillcolor=" : "", fill ? fill : ""); dont_write: free(action_name); } for (lpc = data_set.actions; lpc != NULL; lpc = lpc->next) { action_t *action = (action_t *) lpc->data; GListPtr lpc2 = NULL; for (lpc2 = action->actions_before; lpc2 != NULL; lpc2 = lpc2->next) { action_wrapper_t *before = (action_wrapper_t *) lpc2->data; char *before_name = NULL; char *after_name = NULL; const char *style = "dashed"; optional = TRUE; if (before->state == pe_link_dumped) { optional = FALSE; style = "bold"; } else if (is_set(action->flags, pe_action_pseudo) && (before->type & pe_order_stonith_stop)) { continue; } else if (before->state == pe_link_dup) { continue; } else if (before->type == pe_order_none) { continue; } else if (is_set(before->action->flags, pe_action_dumped) && is_set(action->flags, pe_action_dumped)) { optional = FALSE; } if (all_actions || optional == FALSE) { before_name = create_action_name(before->action); after_name = create_action_name(action); dot_write("\"%s\" -> \"%s\" [ style = %s]", before_name, after_name, style); free(before_name); free(after_name); } } } dot_write("}"); if (dot_strm != NULL) { fflush(dot_strm); fclose(dot_strm); } simulate: if (do_simulation == FALSE) { goto cleanup; } transition = unpack_graph(data_set.graph, "ptest"); print_graph(LOG_DEBUG, transition); do { graph_rc = run_graph(transition); } while (graph_rc == transition_active); if (graph_rc != transition_complete) { crm_crit("Transition failed: %s", transition_status(graph_rc)); print_graph(LOG_ERR, transition); } destroy_graph(transition); CRM_CHECK(graph_rc == transition_complete, all_good = FALSE; crm_err("An invalid transition was produced")); cleanup: cleanup_alloc_calculations(&data_set); crm_log_deinit(); /* required for MallocDebug.app */ if (inhibit_exit) { GMainLoop *mainloop = g_main_new(FALSE); g_main_run(mainloop); } if (all_good) { return 0; } return graph_rc; } diff --git a/pengine/utils.c b/pengine/utils.c index 61e6718e42..6d4908a883 100644 --- a/pengine/utils.c +++ b/pengine/utils.c @@ -1,508 +1,508 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include void pe_free_ordering(GListPtr constraints) { GListPtr iterator = constraints; while (iterator != NULL) { order_constraint_t *order = iterator->data; iterator = iterator->next; free(order->lh_action_task); free(order->rh_action_task); free(order); } if (constraints != NULL) { g_list_free(constraints); } } void pe_free_rsc_to_node(GListPtr constraints) { GListPtr iterator = constraints; while (iterator != NULL) { rsc_to_node_t *cons = iterator->data; iterator = iterator->next; g_list_free_full(cons->node_list_rh, free); free(cons); } if (constraints != NULL) { g_list_free(constraints); } } rsc_to_node_t * rsc2node_new(const char *id, resource_t * rsc, int node_weight, node_t * foo_node, pe_working_set_t * data_set) { rsc_to_node_t *new_con = NULL; if (rsc == NULL || id == NULL) { pe_err("Invalid constraint %s for rsc=%p", crm_str(id), rsc); return NULL; } else if (foo_node == NULL) { CRM_CHECK(node_weight == 0, return NULL); } new_con = calloc(1, sizeof(rsc_to_node_t)); if (new_con != NULL) { new_con->id = id; new_con->rsc_lh = rsc; new_con->node_list_rh = NULL; new_con->role_filter = RSC_ROLE_UNKNOWN; if (foo_node != NULL) { node_t *copy = node_copy(foo_node); copy->weight = merge_weights(node_weight, foo_node->weight); new_con->node_list_rh = g_list_prepend(NULL, copy); } data_set->placement_constraints = g_list_prepend(data_set->placement_constraints, new_con); rsc->rsc_location = g_list_prepend(rsc->rsc_location, new_con); } return new_con; } gboolean can_run_resources(const node_t * node) { if (node == NULL) { return FALSE; } #if 0 if (node->weight < 0) { return FALSE; } #endif if (node->details->online == FALSE || node->details->shutdown || node->details->unclean || node->details->standby) { crm_trace("%s: online=%d, unclean=%d, standby=%d", node->details->uname, node->details->online, node->details->unclean, node->details->standby); return FALSE; } return TRUE; } struct compare_data { const node_t *node1; const node_t *node2; int result; }; static void do_compare_capacity1(gpointer key, gpointer value, gpointer user_data) { int node1_capacity = 0; int node2_capacity = 0; struct compare_data *data = user_data; node1_capacity = crm_parse_int(value, "0"); node2_capacity = crm_parse_int(g_hash_table_lookup(data->node2->details->utilization, key), "0"); if (node1_capacity > node2_capacity) { data->result--; } else if (node1_capacity < node2_capacity) { data->result++; } } static void do_compare_capacity2(gpointer key, gpointer value, gpointer user_data) { int node1_capacity = 0; int node2_capacity = 0; struct compare_data *data = user_data; if (g_hash_table_lookup_extended(data->node1->details->utilization, key, NULL, NULL)) { return; } node1_capacity = 0; node2_capacity = crm_parse_int(value, "0"); if (node1_capacity > node2_capacity) { data->result--; } else if (node1_capacity < node2_capacity) { data->result++; } } /* rc < 0 if 'node1' has more capacity remaining * rc > 0 if 'node1' has less capacity remaining */ static int compare_capacity(const node_t * node1, const node_t * node2) { struct compare_data data; data.node1 = node1; data.node2 = node2; data.result = 0; g_hash_table_foreach(node1->details->utilization, do_compare_capacity1, &data); g_hash_table_foreach(node2->details->utilization, do_compare_capacity2, &data); return data.result; } /* return -1 if 'a' is more preferred * return 1 if 'b' is more preferred */ gint sort_node_weight(gconstpointer a, gconstpointer b, gpointer data) { const node_t *node1 = (const node_t *)a; const node_t *node2 = (const node_t *)b; const node_t *active = (node_t *) data; int node1_weight = 0; int node2_weight = 0; int result = 0; if (a == NULL) { return 1; } if (b == NULL) { return -1; } node1_weight = node1->weight; node2_weight = node2->weight; if (can_run_resources(node1) == FALSE) { node1_weight = -INFINITY; } if (can_run_resources(node2) == FALSE) { node2_weight = -INFINITY; } if (node1_weight > node2_weight) { crm_trace( "%s (%d) > %s (%d) : weight", node1->details->uname, node1_weight, node2->details->uname, node2_weight); return -1; } if (node1_weight < node2_weight) { crm_trace( "%s (%d) < %s (%d) : weight", node1->details->uname, node1_weight, node2->details->uname, node2_weight); return 1; } crm_trace( "%s (%d) == %s (%d) : weight", node1->details->uname, node1_weight, node2->details->uname, node2_weight); if (safe_str_eq(pe_dataset->placement_strategy, "minimal")) { goto equal; } if (safe_str_eq(pe_dataset->placement_strategy, "balanced")) { result = compare_capacity(node1, node2); if (result != 0) { return result; } } /* now try to balance resources across the cluster */ if (node1->details->num_resources < node2->details->num_resources) { crm_trace( "%s (%d) < %s (%d) : resources", node1->details->uname, node1->details->num_resources, node2->details->uname, node2->details->num_resources); return -1; } else if (node1->details->num_resources > node2->details->num_resources) { crm_trace( "%s (%d) > %s (%d) : resources", node1->details->uname, node1->details->num_resources, node2->details->uname, node2->details->num_resources); return 1; } if (active && active->details == node1->details) { crm_trace( "%s (%d) > %s (%d) : active", node1->details->uname, node1->details->num_resources, node2->details->uname, node2->details->num_resources); return -1; } else if (active && active->details == node2->details) { crm_trace( "%s (%d) > %s (%d) : active", node1->details->uname, node1->details->num_resources, node2->details->uname, node2->details->num_resources); return 1; } equal: crm_trace( "%s = %s", node1->details->uname, node2->details->uname); return strcmp(node1->details->uname, node2->details->uname); } struct calculate_data { node_t *node; gboolean allocate; }; static void do_calculate_utilization(gpointer key, gpointer value, gpointer user_data) { const char *capacity = NULL; char *remain_capacity = NULL; struct calculate_data *data = user_data; capacity = g_hash_table_lookup(data->node->details->utilization, key); if (capacity) { if (data->allocate) { remain_capacity = crm_itoa(crm_parse_int(capacity, "0") - crm_parse_int(value, "0")); } else { remain_capacity = crm_itoa(crm_parse_int(capacity, "0") + crm_parse_int(value, "0")); } g_hash_table_replace(data->node->details->utilization, strdup(key), remain_capacity); } } /* Specify 'allocate' to TRUE when allocating * Otherwise to FALSE when deallocating */ static void calculate_utilization(node_t * node, resource_t * rsc, gboolean allocate) { struct calculate_data data; data.node = node; data.allocate = allocate; g_hash_table_foreach(rsc->utilization, do_calculate_utilization, &data); if (allocate) { dump_rsc_utilization(show_utilization ? 0 : utilization_log_level, __FUNCTION__, rsc, node); } } void native_deallocate(resource_t * rsc) { if (rsc->allocated_to) { node_t *old = rsc->allocated_to; crm_info("Deallocating %s from %s", rsc->id, old->details->uname); - set_bit_inplace(rsc->flags, pe_rsc_provisional); + set_bit(rsc->flags, pe_rsc_provisional); rsc->allocated_to = NULL; old->details->allocated_rsc = g_list_remove(old->details->allocated_rsc, rsc); old->details->num_resources--; /* old->count--; */ calculate_utilization(old, rsc, FALSE); free(old); } } gboolean native_assign_node(resource_t * rsc, GListPtr nodes, node_t * chosen, gboolean force) { CRM_ASSERT(rsc->variant == pe_native); if (force == FALSE && chosen != NULL && (can_run_resources(chosen) == FALSE || chosen->weight < 0)) { crm_debug("All nodes for resource %s are unavailable" ", unclean or shutting down (%s: %d, %d)", rsc->id, chosen->details->uname, can_run_resources(chosen), chosen->weight); rsc->next_role = RSC_ROLE_STOPPED; chosen = NULL; } /* todo: update the old node for each resource to reflect its * new resource count */ native_deallocate(rsc); clear_bit(rsc->flags, pe_rsc_provisional); if (chosen == NULL) { char *key = NULL; GListPtr gIter = NULL; GListPtr possible_matches = NULL; crm_debug("Could not allocate a node for %s", rsc->id); rsc->next_role = RSC_ROLE_STOPPED; key = generate_op_key(rsc->id, RSC_STOP, 0); possible_matches = find_actions(rsc->actions, key, NULL); for (gIter = possible_matches; gIter != NULL; gIter = gIter->next) { action_t *stop = (action_t *) gIter->data; update_action_flags(stop, pe_action_optional | pe_action_clear); } g_list_free(possible_matches); free(key); key = generate_op_key(rsc->id, RSC_START, 0); possible_matches = find_actions(rsc->actions, key, NULL); for (gIter = possible_matches; gIter != NULL; gIter = gIter->next) { action_t *start = (action_t *) gIter->data; update_action_flags(start, pe_action_runnable | pe_action_clear); } g_list_free(possible_matches); free(key); return FALSE; } crm_debug("Assigning %s to %s", chosen->details->uname, rsc->id); rsc->allocated_to = node_copy(chosen); chosen->details->allocated_rsc = g_list_prepend(chosen->details->allocated_rsc, rsc); chosen->details->num_resources++; chosen->count++; calculate_utilization(chosen, rsc, TRUE); return TRUE; } void log_action(unsigned int log_level, const char *pre_text, action_t * action, gboolean details) { const char *node_uname = NULL; const char *node_uuid = NULL; if (action == NULL) { crm_trace("%s%s: ", pre_text == NULL ? "" : pre_text, pre_text == NULL ? "" : ": "); return; } if (is_set(action->flags, pe_action_pseudo)) { node_uname = NULL; node_uuid = NULL; } else if (action->node != NULL) { node_uname = action->node->details->uname; node_uuid = action->node->details->id; } else { node_uname = ""; node_uuid = NULL; } switch (text2task(action->task)) { case stonith_node: case shutdown_crm: crm_trace( "%s%s%sAction %d: %s%s%s%s%s%s", pre_text == NULL ? "" : pre_text, pre_text == NULL ? "" : ": ", is_set(action->flags, pe_action_pseudo) ? "Pseduo " : is_set(action->flags, pe_action_optional) ? "Optional " : is_set(action->flags, pe_action_runnable) ? is_set(action->flags, pe_action_processed) ? "" : "(Provisional) " : "!!Non-Startable!! ", action->id, action->uuid, node_uname ? "\ton " : "", node_uname ? node_uname : "", node_uuid ? "\t\t(" : "", node_uuid ? node_uuid : "", node_uuid ? ")" : ""); break; default: crm_trace( "%s%s%sAction %d: %s %s%s%s%s%s%s", pre_text == NULL ? "" : pre_text, pre_text == NULL ? "" : ": ", is_set(action->flags, pe_action_optional) ? "Optional " : is_set(action->flags, pe_action_pseudo) ? "Pseduo " : is_set(action->flags, pe_action_runnable) ? is_set(action->flags, pe_action_processed) ? "" : "(Provisional) " : "!!Non-Startable!! ", action->id, action->uuid, action->rsc?action->rsc->id:"", node_uname ? "\ton " : "", node_uname ? node_uname : "", node_uuid ? "\t\t(" : "", node_uuid ? node_uuid : "", node_uuid ? ")" : ""); break; } if (details) { GListPtr gIter = NULL; crm_trace( "\t\t====== Preceding Actions"); gIter = action->actions_before; for (; gIter != NULL; gIter = gIter->next) { action_wrapper_t *other = (action_wrapper_t *) gIter->data; log_action(log_level + 1, "\t\t", other->action, FALSE); } crm_trace( "\t\t====== Subsequent Actions"); gIter = action->actions_after; for (; gIter != NULL; gIter = gIter->next) { action_wrapper_t *other = (action_wrapper_t *) gIter->data; log_action(log_level + 1, "\t\t", other->action, FALSE); } crm_trace( "\t\t====== End"); } else { crm_trace( "\t\t(seen=%d, before=%d, after=%d)", action->seen_count, g_list_length(action->actions_before), g_list_length(action->actions_after)); } } gboolean can_run_any(GHashTable * nodes) { GHashTableIter iter; node_t *node = NULL; if (nodes == NULL) { return FALSE; } g_hash_table_iter_init(&iter, nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { if (can_run_resources(node) && node->weight >= 0) { return TRUE; } } return FALSE; } diff --git a/tools/crm_inject.c b/tools/crm_inject.c index 6b88c8038a..caee411262 100644 --- a/tools/crm_inject.c +++ b/tools/crm_inject.c @@ -1,1489 +1,1489 @@ /* * Copyright (C) 2009 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include cib_t *global_cib = NULL; GListPtr op_fail = NULL; gboolean quiet = FALSE; #define new_node_template "//"XML_CIB_TAG_NODE"[@uname='%s']" #define node_template "//"XML_CIB_TAG_STATE"[@uname='%s']" #define rsc_template "//"XML_CIB_TAG_STATE"[@uname='%s']//"XML_LRM_TAG_RESOURCE"[@id='%s']" #define op_template "//"XML_CIB_TAG_STATE"[@uname='%s']//"XML_LRM_TAG_RESOURCE"[@id='%s']/"XML_LRM_TAG_RSC_OP"[@id='%s']" /* #define op_template "//"XML_CIB_TAG_STATE"[@uname='%s']//"XML_LRM_TAG_RESOURCE"[@id='%s']/"XML_LRM_TAG_RSC_OP"[@id='%s' and @"XML_LRM_ATTR_CALLID"='%d']" */ #define FAKE_TE_ID "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" #define quiet_log(fmt, args...) do { \ if(quiet == FALSE) { \ printf(fmt , ##args); \ } \ } while(0) extern void cleanup_alloc_calculations(pe_working_set_t * data_set); extern xmlNode *do_calculations(pe_working_set_t * data_set, xmlNode * xml_input, ha_time_t * now); char *use_date = NULL; static ha_time_t * get_date(void) { if (use_date) { char *date_m = use_date; return parse_date(&date_m); } return NULL; } static xmlNode * find_resource(xmlNode * cib_node, const char *resource) { char *xpath = NULL; xmlNode *match = NULL; const char *node = crm_element_value(cib_node, XML_ATTR_UNAME); int max = strlen(rsc_template) + strlen(resource) + strlen(node) + 1; xpath = calloc(1, max); snprintf(xpath, max, rsc_template, node, resource); match = get_xpath_object(xpath, cib_node, LOG_DEBUG_2); free(xpath); return match; } static void create_node_entry(cib_t * cib_conn, char *node) { int rc = pcmk_ok; int max = strlen(new_node_template) + strlen(node) + 1; char *xpath = NULL; xpath = calloc(1, max); snprintf(xpath, max, new_node_template, node); rc = cib_conn->cmds->query(cib_conn, xpath, NULL, cib_xpath | cib_sync_call | cib_scope_local); if (rc == -ENXIO) { xmlNode *cib_object = create_xml_node(NULL, XML_CIB_TAG_NODE); /* Using node uname as uuid ala corosync/openais */ crm_xml_add(cib_object, XML_ATTR_ID, node); crm_xml_add(cib_object, XML_ATTR_UNAME, node); crm_xml_add(cib_object, XML_ATTR_TYPE, NORMALNODE); cib_conn->cmds->create(cib_conn, XML_CIB_TAG_NODES, cib_object, cib_sync_call | cib_scope_local); /* Not bothering with subsequent query to see if it exists, we'll bomb out later in the call to determine_host... */ free_xml(cib_object); } free(xpath); } static xmlNode * inject_node_state(cib_t * cib_conn, char *node) { int rc = pcmk_ok; int max = strlen(rsc_template) + strlen(node) + 1; char *xpath = NULL; xmlNode *cib_object = NULL; xpath = calloc(1, max); create_node_entry(cib_conn, node); snprintf(xpath, max, node_template, node); rc = cib_conn->cmds->query(cib_conn, xpath, &cib_object, cib_xpath | cib_sync_call | cib_scope_local); if(cib_object && ID(cib_object) == NULL) { crm_err("Detected multiple node_state entries for xpath=%s, bailing", xpath); crm_log_xml_warn(cib_object, "Duplicates"); exit(1); } if (rc == -ENXIO) { char *uuid = NULL; cib_object = create_xml_node(NULL, XML_CIB_TAG_STATE); determine_host(cib_conn, &node, &uuid); crm_xml_add(cib_object, XML_ATTR_UUID, uuid); crm_xml_add(cib_object, XML_ATTR_UNAME, node); cib_conn->cmds->create(cib_conn, XML_CIB_TAG_STATUS, cib_object, cib_sync_call | cib_scope_local); free_xml(cib_object); free(uuid); rc = cib_conn->cmds->query(cib_conn, xpath, &cib_object, cib_xpath | cib_sync_call | cib_scope_local); } free(xpath); CRM_ASSERT(rc == pcmk_ok); return cib_object; } static xmlNode * modify_node(cib_t * cib_conn, char *node, gboolean up) { xmlNode *cib_node = inject_node_state(cib_conn, node); if (up) { crm_xml_add(cib_node, XML_CIB_ATTR_HASTATE, ACTIVESTATUS); crm_xml_add(cib_node, XML_CIB_ATTR_INCCM, XML_BOOLEAN_YES); crm_xml_add(cib_node, XML_CIB_ATTR_CRMDSTATE, ONLINESTATUS); crm_xml_add(cib_node, XML_CIB_ATTR_JOINSTATE, CRMD_JOINSTATE_MEMBER); crm_xml_add(cib_node, XML_CIB_ATTR_EXPSTATE, CRMD_JOINSTATE_MEMBER); } else { crm_xml_add(cib_node, XML_CIB_ATTR_HASTATE, DEADSTATUS); crm_xml_add(cib_node, XML_CIB_ATTR_INCCM, XML_BOOLEAN_NO); crm_xml_add(cib_node, XML_CIB_ATTR_CRMDSTATE, OFFLINESTATUS); crm_xml_add(cib_node, XML_CIB_ATTR_JOINSTATE, CRMD_JOINSTATE_DOWN); crm_xml_add(cib_node, XML_CIB_ATTR_EXPSTATE, CRMD_JOINSTATE_DOWN); } crm_xml_add(cib_node, XML_ATTR_ORIGIN, crm_system_name); return cib_node; } static void inject_transient_attr(xmlNode * cib_node, const char *name, const char *value) { xmlNode *attrs = NULL; xmlNode *container = NULL; xmlNode *nvp = NULL; const char *node_uuid = ID(cib_node); char *nvp_id = crm_concat(name, node_uuid, '-'); crm_info("Injecting attribute %s=%s into %s '%s'", name, value, xmlGetNodePath(cib_node), ID(cib_node)); attrs = first_named_child(cib_node, XML_TAG_TRANSIENT_NODEATTRS); if (attrs == NULL) { attrs = create_xml_node(cib_node, XML_TAG_TRANSIENT_NODEATTRS); crm_xml_add(attrs, XML_ATTR_ID, node_uuid); } container = first_named_child(attrs, XML_TAG_ATTR_SETS); if (container == NULL) { container = create_xml_node(attrs, XML_TAG_ATTR_SETS); crm_xml_add(container, XML_ATTR_ID, node_uuid); } nvp = create_xml_node(container, XML_CIB_TAG_NVPAIR); crm_xml_add(nvp, XML_ATTR_ID, nvp_id); crm_xml_add(nvp, XML_NVPAIR_ATTR_NAME, name); crm_xml_add(nvp, XML_NVPAIR_ATTR_VALUE, value); free(nvp_id); } static xmlNode * inject_resource(xmlNode * cib_node, const char *resource, const char *rclass, const char *rtype, const char *rprovider) { xmlNode *lrm = NULL; xmlNode *container = NULL; xmlNode *cib_resource = NULL; char *xpath = NULL; cib_resource = find_resource(cib_node, resource); if (cib_resource != NULL) { return cib_resource; } /* One day, add query for class, provider, type */ if (rclass == NULL || rtype == NULL) { fprintf(stderr, "Resource %s not found in the status section of %s." " Please supply the class and type to continue\n", resource, ID(cib_node)); return NULL; } else if (safe_str_neq(rclass, "ocf") && safe_str_neq(rclass, "stonith") && safe_str_neq(rclass, "heartbeat") && safe_str_neq(rclass, "lsb")) { fprintf(stderr, "Invalid class for %s: %s\n", resource, rclass); return NULL; } else if (safe_str_eq(rclass, "ocf") && rprovider == NULL) { fprintf(stderr, "Please specify the provider for resource %s\n", resource); return NULL; } xpath = (char *)xmlGetNodePath(cib_node); crm_info("Injecting new resource %s into %s '%s'", resource, xpath, ID(cib_node)); free(xpath); lrm = first_named_child(cib_node, XML_CIB_TAG_LRM); if (lrm == NULL) { const char *node_uuid = ID(cib_node); lrm = create_xml_node(cib_node, XML_CIB_TAG_LRM); crm_xml_add(lrm, XML_ATTR_ID, node_uuid); } container = first_named_child(lrm, XML_LRM_TAG_RESOURCES); if (container == NULL) { container = create_xml_node(lrm, XML_LRM_TAG_RESOURCES); } cib_resource = create_xml_node(container, XML_LRM_TAG_RESOURCE); crm_xml_add(cib_resource, XML_ATTR_ID, resource); crm_xml_add(cib_resource, XML_AGENT_ATTR_CLASS, rclass); crm_xml_add(cib_resource, XML_AGENT_ATTR_PROVIDER, rprovider); crm_xml_add(cib_resource, XML_ATTR_TYPE, rtype); return cib_resource; } static lrmd_event_data_t * create_op(xmlNode * cib_resource, const char *task, int interval, int outcome) { lrmd_event_data_t *op = NULL; xmlNode *xop = NULL; op = calloc(1, sizeof(lrmd_event_data_t)); op->rsc_id = strdup(ID(cib_resource)); op->interval = interval; op->op_type = strdup(task); op->rc = outcome; op->op_status = 0; op->params = NULL; /* TODO: Fill me in */ op->call_id = 0; for (xop = __xml_first_child(cib_resource); xop != NULL; xop = __xml_next(xop)) { int tmp = 0; crm_element_value_int(xop, XML_LRM_ATTR_CALLID, &tmp); if (tmp > op->call_id) { op->call_id = tmp; } } op->call_id++; return op; } static xmlNode * inject_op(xmlNode * cib_resource, lrmd_event_data_t * op, int target_rc) { return create_operation_update(cib_resource, op, CRM_FEATURE_SET, target_rc, crm_system_name, LOG_DEBUG_2); } static void update_failcounts(xmlNode * cib_node, const char *resource, int interval, int rc) { if (rc == 0) { return; } else if (rc == 7 && interval == 0) { return; } else { char *name = NULL; char *now = crm_itoa(time(NULL)); name = crm_concat("fail-count", resource, '-'); inject_transient_attr(cib_node, name, "value++"); name = crm_concat("last-failure", resource, '-'); inject_transient_attr(cib_node, name, now); free(name); free(now); } } static gboolean exec_pseudo_action(crm_graph_t * graph, crm_action_t * action) { const char *node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY); action->confirmed = TRUE; quiet_log(" * Pseudo action: %s%s%s\n", task, node?" on ":"", node?node:""); update_graph(graph, action); return TRUE; } static gboolean exec_rsc_action(crm_graph_t * graph, crm_action_t * action) { int rc = 0; GListPtr gIter = NULL; lrmd_event_data_t *op = NULL; int target_outcome = 0; const char *rtype = NULL; const char *rclass = NULL; const char *resource = NULL; const char *rprovider = NULL; const char *target_rc_s = crm_meta_value(action->params, XML_ATTR_TE_TARGET_RC); xmlNode *cib_node = NULL; xmlNode *cib_resource = NULL; xmlNode *action_rsc = first_named_child(action->xml, XML_CIB_TAG_RESOURCE); char *node = crm_element_value_copy(action->xml, XML_LRM_ATTR_TARGET); if (safe_str_eq(crm_element_value(action->xml, "operation"), "probe_complete")) { crm_info("Skipping %s op for %s\n", crm_element_value(action->xml, "operation"), node); goto done; } if (action_rsc == NULL) { crm_log_xml_err(action->xml, "Bad"); free(node); return FALSE; } resource = ID(action_rsc); rclass = crm_element_value(action_rsc, XML_AGENT_ATTR_CLASS); rtype = crm_element_value(action_rsc, XML_ATTR_TYPE); rprovider = crm_element_value(action_rsc, XML_AGENT_ATTR_PROVIDER); if (target_rc_s != NULL) { target_outcome = crm_parse_int(target_rc_s, "0"); } CRM_ASSERT(global_cib->cmds->query(global_cib, NULL, NULL, cib_sync_call | cib_scope_local) == pcmk_ok); cib_node = inject_node_state(global_cib, node); CRM_ASSERT(cib_node != NULL); cib_resource = inject_resource(cib_node, resource, rclass, rtype, rprovider); CRM_ASSERT(cib_resource != NULL); op = convert_graph_action(cib_resource, action, 0, target_outcome); if(op->interval) { quiet_log(" * Resource action: %-15s %s=%d on %s\n", resource, op->op_type, op->interval, node); } else { quiet_log(" * Resource action: %-15s %s on %s\n", resource, op->op_type, node); } for (gIter = op_fail; gIter != NULL; gIter = gIter->next) { char *spec = (char *)gIter->data; char *key = NULL; key = calloc(1, 1 + strlen(spec)); snprintf(key, strlen(spec), "%s_%s_%d@%s=", resource, op->op_type, op->interval, node); if (strncasecmp(key, spec, strlen(key)) == 0) { rc = sscanf(spec, "%*[^=]=%d", (int *) &op->rc); action->failed = TRUE; graph->abort_priority = INFINITY; printf("\tPretending action %d failed with rc=%d\n", action->id, op->rc); update_failcounts(cib_node, resource, op->interval, op->rc); free(key); break; } free(key); } inject_op(cib_resource, op, target_outcome); lrmd_free_event(op); rc = global_cib->cmds->modify(global_cib, XML_CIB_TAG_STATUS, cib_node, cib_sync_call | cib_scope_local); CRM_ASSERT(rc == pcmk_ok); done: free(node); free_xml(cib_node); action->confirmed = TRUE; update_graph(graph, action); return TRUE; } static gboolean exec_crmd_action(crm_graph_t * graph, crm_action_t * action) { const char *node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); action->confirmed = TRUE; quiet_log(" * Cluster action: %s on %s\n", task, node); update_graph(graph, action); return TRUE; } #define STATUS_PATH_MAX 512 static gboolean exec_stonith_action(crm_graph_t * graph, crm_action_t * action) { int rc = 0; char xpath[STATUS_PATH_MAX]; char *target = crm_element_value_copy(action->xml, XML_LRM_ATTR_TARGET); xmlNode *cib_node = modify_node(global_cib, target, FALSE); crm_xml_add(cib_node, XML_ATTR_ORIGIN, __FUNCTION__); CRM_ASSERT(cib_node != NULL); quiet_log(" * Fencing %s\n", target); rc = global_cib->cmds->replace(global_cib, XML_CIB_TAG_STATUS, cib_node, cib_sync_call | cib_scope_local); CRM_ASSERT(rc == pcmk_ok); snprintf(xpath, STATUS_PATH_MAX, "//node_state[@uname='%s']/%s", target, XML_CIB_TAG_LRM); rc = global_cib->cmds->delete(global_cib, xpath, NULL, cib_xpath | cib_sync_call | cib_scope_local); snprintf(xpath, STATUS_PATH_MAX, "//node_state[@uname='%s']/%s", target, XML_TAG_TRANSIENT_NODEATTRS); rc = global_cib->cmds->delete(global_cib, xpath, NULL, cib_xpath | cib_sync_call | cib_scope_local); action->confirmed = TRUE; update_graph(graph, action); free_xml(cib_node); free(target); return TRUE; } static char * add_list_element(char *list, const char *value) { int len = 0; int last = 0; if (value == NULL) { return list; } if (list) { last = strlen(list); } len = last + 2; /* +1 space, +1 EOS */ len += strlen(value); list = realloc(list, len); sprintf(list + last, " %s", value); return list; } static void print_cluster_status(pe_working_set_t * data_set) { char *online_nodes = NULL; char *offline_nodes = NULL; GListPtr gIter = NULL; for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; const char *node_mode = NULL; if (node->details->unclean) { if (node->details->online && node->details->unclean) { node_mode = "UNCLEAN (online)"; } else if (node->details->pending) { node_mode = "UNCLEAN (pending)"; } else { node_mode = "UNCLEAN (offline)"; } } else if (node->details->pending) { node_mode = "pending"; } else if (node->details->standby_onfail && node->details->online) { node_mode = "standby (on-fail)"; } else if (node->details->standby) { if (node->details->online) { node_mode = "standby"; } else { node_mode = "OFFLINE (standby)"; } } else if (node->details->online) { node_mode = "online"; online_nodes = add_list_element(online_nodes, node->details->uname); continue; } else { node_mode = "OFFLINE"; offline_nodes = add_list_element(offline_nodes, node->details->uname); continue; } if (safe_str_eq(node->details->uname, node->details->id)) { printf("Node %s: %s\n", node->details->uname, node_mode); } else { printf("Node %s (%s): %s\n", node->details->uname, node->details->id, node_mode); } } if (online_nodes) { printf("Online: [%s ]\n", online_nodes); free(online_nodes); } if (offline_nodes) { printf("OFFLINE: [%s ]\n", offline_nodes); free(offline_nodes); } fprintf(stdout, "\n"); for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; if (is_set(rsc->flags, pe_rsc_orphan) && rsc->role == RSC_ROLE_STOPPED) { continue; } rsc->fns->print(rsc, NULL, pe_print_printf, stdout); } fprintf(stdout, "\n"); } static int run_simulation(pe_working_set_t * data_set) { crm_graph_t *transition = NULL; enum transition_status graph_rc = -1; crm_graph_functions_t exec_fns = { exec_pseudo_action, exec_rsc_action, exec_crmd_action, exec_stonith_action, }; set_graph_functions(&exec_fns); quiet_log("\nExecuting cluster transition:\n"); transition = unpack_graph(data_set->graph, crm_system_name); print_graph(LOG_DEBUG, transition); do { graph_rc = run_graph(transition); } while (graph_rc == transition_active); if (graph_rc != transition_complete) { fprintf(stdout, "Transition failed: %s\n", transition_status(graph_rc)); print_graph(LOG_ERR, transition); } destroy_graph(transition); if (graph_rc != transition_complete) { fprintf(stdout, "An invalid transition was produced\n"); } if (quiet == FALSE) { xmlNode *cib_object = NULL; int rc = global_cib->cmds->query(global_cib, NULL, &cib_object, cib_sync_call | cib_scope_local); CRM_ASSERT(rc == pcmk_ok); quiet_log("\nRevised cluster status:\n"); cleanup_alloc_calculations(data_set); data_set->input = cib_object; data_set->now = get_date(); cluster_status(data_set); print_cluster_status(data_set); } if (graph_rc != transition_complete) { return graph_rc; } return 0; } static char * create_action_name(action_t * action) { char *action_name = NULL; const char *action_host = NULL; if (action->node) { action_host = action->node->details->uname; action_name = crm_concat(action->uuid, action_host, ' '); } else if (is_set(action->flags, pe_action_pseudo)) { action_name = strdup(action->uuid); } else { action_host = ""; action_name = crm_concat(action->uuid, action_host, ' '); } if (safe_str_eq(action->task, RSC_CANCEL)) { char *tmp_action_name = action_name; action_name = crm_concat("Cancel", tmp_action_name, ' '); free(tmp_action_name); } return action_name; } static void create_dotfile(pe_working_set_t * data_set, const char *dot_file, gboolean all_actions) { GListPtr gIter = NULL; FILE *dot_strm = fopen(dot_file, "w"); if (dot_strm == NULL) { crm_perror(LOG_ERR, "Could not open %s for writing", dot_file); return; } fprintf(dot_strm, " digraph \"g\" {\n"); for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) { action_t *action = (action_t *) gIter->data; const char *style = "dashed"; const char *font = "black"; const char *color = "black"; char *action_name = create_action_name(action); crm_trace("Action %d: %p", action->id, action); if (is_set(action->flags, pe_action_pseudo)) { font = "orange"; } if (is_set(action->flags, pe_action_dumped)) { style = "bold"; color = "green"; } else if (action->rsc != NULL && is_not_set(action->rsc->flags, pe_rsc_managed)) { color = "red"; font = "purple"; if (all_actions == FALSE) { goto dont_write; } } else if (is_set(action->flags, pe_action_optional)) { color = "blue"; if (all_actions == FALSE) { goto dont_write; } } else { color = "red"; CRM_CHECK(is_set(action->flags, pe_action_runnable) == FALSE,; ); } - set_bit_inplace(action->flags, pe_action_dumped); + set_bit(action->flags, pe_action_dumped); fprintf(dot_strm, "\"%s\" [ style=%s color=\"%s\" fontcolor=\"%s\"]\n", action_name, style, color, font); dont_write: free(action_name); } for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) { action_t *action = (action_t *) gIter->data; GListPtr gIter2 = NULL; for (gIter2 = action->actions_before; gIter2 != NULL; gIter2 = gIter2->next) { action_wrapper_t *before = (action_wrapper_t *) gIter2->data; char *before_name = NULL; char *after_name = NULL; const char *style = "dashed"; gboolean optional = TRUE; if (before->state == pe_link_dumped) { optional = FALSE; style = "bold"; } else if (is_set(action->flags, pe_action_pseudo) && (before->type & pe_order_stonith_stop)) { continue; } else if (before->state == pe_link_dup) { continue; } else if (before->type == pe_order_none) { continue; } else if (is_set(before->action->flags, pe_action_dumped) && is_set(action->flags, pe_action_dumped) && before->type != pe_order_load) { optional = FALSE; } if (all_actions || optional == FALSE) { before_name = create_action_name(before->action); after_name = create_action_name(action); fprintf(dot_strm, "\"%s\" -> \"%s\" [ style = %s]\n", before_name, after_name, style); free(before_name); free(after_name); } } } fprintf(dot_strm, "}\n"); if (dot_strm != NULL) { fflush(dot_strm); fclose(dot_strm); } } static int find_ticket_state(cib_t * the_cib, const char * ticket_id, xmlNode ** ticket_state_xml) { int offset = 0; static int xpath_max = 1024; int rc = pcmk_ok; xmlNode *xml_search = NULL; char *xpath_string = NULL; CRM_ASSERT(ticket_state_xml != NULL); *ticket_state_xml = NULL; xpath_string = calloc(1, xpath_max); offset += snprintf(xpath_string + offset, xpath_max - offset, "%s", "/cib/status/tickets"); if (ticket_id) { offset += snprintf(xpath_string + offset, xpath_max - offset, "/%s[@id=\"%s\"]", XML_CIB_TAG_TICKET_STATE, ticket_id); } rc = the_cib->cmds->query(the_cib, xpath_string, &xml_search, cib_sync_call | cib_scope_local | cib_xpath); if (rc != pcmk_ok) { goto bail; } crm_log_xml_debug(xml_search, "Match"); if (xml_has_children(xml_search)) { if (ticket_id) { fprintf(stdout, "Multiple ticket_states match ticket_id=%s\n", ticket_id); } *ticket_state_xml = xml_search; } else { *ticket_state_xml = xml_search; } bail: free(xpath_string); return rc; } static int set_ticket_state_attr(const char *ticket_id, const char *attr_name, const char *attr_value, cib_t * cib, int cib_options) { int rc = pcmk_ok; xmlNode *xml_top = NULL; xmlNode *ticket_state_xml = NULL; rc = find_ticket_state(cib, ticket_id, &ticket_state_xml); if (rc == pcmk_ok) { crm_debug("Found a match state for ticket: id=%s", ticket_id); xml_top = ticket_state_xml; } else if (rc != -ENXIO) { return rc; } else { xmlNode *xml_obj = NULL; xml_top = create_xml_node(NULL, XML_CIB_TAG_STATUS); xml_obj = create_xml_node(xml_top, XML_CIB_TAG_TICKETS); ticket_state_xml = create_xml_node(xml_obj, XML_CIB_TAG_TICKET_STATE); crm_xml_add(ticket_state_xml, XML_ATTR_ID, ticket_id); } crm_xml_add(ticket_state_xml, attr_name, attr_value); crm_log_xml_debug(xml_top, "Update"); rc = cib->cmds->modify(cib, XML_CIB_TAG_STATUS, xml_top, cib_options); free_xml(xml_top); return rc; } static void modify_configuration(pe_working_set_t * data_set, const char *quorum, GListPtr node_up, GListPtr node_down, GListPtr node_fail, GListPtr op_inject, GListPtr ticket_grant, GListPtr ticket_revoke, GListPtr ticket_standby, GListPtr ticket_activate) { int rc = pcmk_ok; GListPtr gIter = NULL; xmlNode *cib_op = NULL; xmlNode *cib_node = NULL; xmlNode *cib_resource = NULL; lrmd_event_data_t *op = NULL; if (quorum) { xmlNode *top = create_xml_node(NULL, XML_TAG_CIB); quiet_log(" + Setting quorum: %s\n", quorum); /* crm_xml_add(top, XML_ATTR_DC_UUID, dc_uuid); */ crm_xml_add(top, XML_ATTR_HAVE_QUORUM, quorum); rc = global_cib->cmds->modify(global_cib, NULL, top, cib_sync_call | cib_scope_local); CRM_ASSERT(rc == pcmk_ok); } for (gIter = node_up; gIter != NULL; gIter = gIter->next) { char *node = (char *)gIter->data; quiet_log(" + Bringing node %s online\n", node); cib_node = modify_node(global_cib, node, TRUE); CRM_ASSERT(cib_node != NULL); rc = global_cib->cmds->modify(global_cib, XML_CIB_TAG_STATUS, cib_node, cib_sync_call | cib_scope_local); CRM_ASSERT(rc == pcmk_ok); } for (gIter = node_down; gIter != NULL; gIter = gIter->next) { char *node = (char *)gIter->data; quiet_log(" + Taking node %s offline\n", node); cib_node = modify_node(global_cib, node, FALSE); CRM_ASSERT(cib_node != NULL); rc = global_cib->cmds->modify(global_cib, XML_CIB_TAG_STATUS, cib_node, cib_sync_call | cib_scope_local); CRM_ASSERT(rc == pcmk_ok); } for (gIter = node_fail; gIter != NULL; gIter = gIter->next) { char *node = (char *)gIter->data; quiet_log(" + Failing node %s\n", node); cib_node = modify_node(global_cib, node, TRUE); crm_xml_add(cib_node, XML_CIB_ATTR_INCCM, XML_BOOLEAN_NO); CRM_ASSERT(cib_node != NULL); rc = global_cib->cmds->modify(global_cib, XML_CIB_TAG_STATUS, cib_node, cib_sync_call | cib_scope_local); CRM_ASSERT(rc == pcmk_ok); } for (gIter = ticket_grant; gIter != NULL; gIter = gIter->next) { char *ticket_id = (char *)gIter->data; quiet_log(" + Granting ticket %s\n", ticket_id); rc = set_ticket_state_attr(ticket_id, "granted", "true", global_cib, cib_sync_call | cib_scope_local); CRM_ASSERT(rc == pcmk_ok); } for (gIter = ticket_revoke; gIter != NULL; gIter = gIter->next) { char *ticket_id = (char *)gIter->data; quiet_log(" + Revoking ticket %s\n", ticket_id); rc = set_ticket_state_attr(ticket_id, "granted", "false", global_cib, cib_sync_call | cib_scope_local); CRM_ASSERT(rc == pcmk_ok); } for (gIter = ticket_standby; gIter != NULL; gIter = gIter->next) { char *ticket_id = (char *)gIter->data; quiet_log(" + Making ticket %s standby\n", ticket_id); rc = set_ticket_state_attr(ticket_id, "standby", "true", global_cib, cib_sync_call | cib_scope_local); CRM_ASSERT(rc == pcmk_ok); } for (gIter = ticket_activate; gIter != NULL; gIter = gIter->next) { char *ticket_id = (char *)gIter->data; quiet_log(" + Activating ticket %s\n", ticket_id); rc = set_ticket_state_attr(ticket_id, "standby", "false", global_cib, cib_sync_call | cib_scope_local); CRM_ASSERT(rc == pcmk_ok); } for (gIter = op_inject; gIter != NULL; gIter = gIter->next) { char *spec = (char *)gIter->data; int rc = 0; int outcome = 0; int interval = 0; char *key = NULL; char *node = NULL; char *task = NULL; char *resource = NULL; const char *rtype = NULL; const char *rclass = NULL; const char *rprovider = NULL; resource_t *rsc = NULL; quiet_log(" + Injecting %s into the configuration\n", spec); key = calloc(1, strlen(spec) + 1); node = calloc(1, strlen(spec) + 1); rc = sscanf(spec, "%[^@]@%[^=]=%d", key, node, &outcome); CRM_CHECK(rc == 3, fprintf(stderr, "Invalid operation spec: %s. Only found %d fields\n", spec, rc); continue); parse_op_key(key, &resource, &task, &interval); rsc = pe_find_resource(data_set->resources, resource); if (rsc == NULL) { fprintf(stderr, " - Invalid resource name: %s\n", resource); } else { rclass = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); rtype = crm_element_value(rsc->xml, XML_ATTR_TYPE); rprovider = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER); cib_node = inject_node_state(global_cib, node); CRM_ASSERT(cib_node != NULL); update_failcounts(cib_node, resource, interval, outcome); cib_resource = inject_resource(cib_node, resource, rclass, rtype, rprovider); CRM_ASSERT(cib_resource != NULL); op = create_op(cib_resource, task, interval, outcome); CRM_ASSERT(op != NULL); cib_op = inject_op(cib_resource, op, 0); CRM_ASSERT(cib_op != NULL); lrmd_free_event(op); rc = global_cib->cmds->modify(global_cib, XML_CIB_TAG_STATUS, cib_node, cib_sync_call | cib_scope_local); CRM_ASSERT(rc == pcmk_ok); } free(task); free(node); free(key); } } static void setup_input(const char *input, const char *output) { int rc = pcmk_ok; cib_t *cib_conn = NULL; xmlNode *cib_object = NULL; char *local_output = NULL; if (input == NULL) { /* Use live CIB */ cib_conn = cib_new(); rc = cib_conn->cmds->signon(cib_conn, crm_system_name, cib_command); if (rc == pcmk_ok) { cib_object = get_cib_copy(cib_conn); } cib_conn->cmds->signoff(cib_conn); cib_delete(cib_conn); cib_conn = NULL; if (cib_object == NULL) { fprintf(stderr, "Live CIB query failed: empty result\n"); exit(3); } } else if (safe_str_eq(input, "-")) { cib_object = filename2xml(NULL); } else { cib_object = filename2xml(input); } if (get_object_root(XML_CIB_TAG_STATUS, cib_object) == NULL) { create_xml_node(cib_object, XML_CIB_TAG_STATUS); } if (cli_config_update(&cib_object, NULL, FALSE) == FALSE) { free_xml(cib_object); exit(-ENOKEY); } if (validate_xml(cib_object, NULL, FALSE) != TRUE) { free_xml(cib_object); exit(-pcmk_err_dtd_validation); } if (output == NULL) { char *pid = crm_itoa(getpid()); local_output = get_shadow_file(pid); output = local_output; free(pid); } rc = write_xml_file(cib_object, output, FALSE); free_xml(cib_object); cib_object = NULL; if (rc < 0) { fprintf(stderr, "Could not create '%s': %s\n", output, strerror(errno)); exit(rc); } setenv("CIB_file", output, 1); free(local_output); } /* *INDENT-OFF* */ static struct crm_option long_options[] = { /* Top-level Options */ {"help", 0, 0, '?', "\tThis text"}, {"version", 0, 0, '$', "\tVersion information" }, {"quiet", 0, 0, 'Q', "\tDisplay only essentialoutput"}, {"verbose", 0, 0, 'V', "\tIncrease debug output"}, {"-spacer-", 0, 0, '-', "\nOperations:"}, {"run", 0, 0, 'R', "\tDetermine the cluster's response to the given configuration and status"}, {"simulate", 0, 0, 'S', "Simulate the transition's execution and display the resulting cluster status"}, {"in-place", 0, 0, 'X', "Simulate the transition's execution and store the result back to the input file"}, {"show-scores", 0, 0, 's', "Show allocation scores"}, {"show-utilization", 0, 0, 'U', "Show utilization information"}, {"profile", 1, 0, 'P', "Run all tests in the named directory to create profiling data"}, {"-spacer-", 0, 0, '-', "\nSynthetic Cluster Events:"}, {"node-up", 1, 0, 'u', "\tBring a node online"}, {"node-down", 1, 0, 'd', "\tTake a node offline"}, {"node-fail", 1, 0, 'f', "\tMark a node as failed"}, {"op-inject", 1, 0, 'i', "\t$rsc_$task_$interval@$node=$rc - Inject the specified task before running the simulation"}, {"op-fail", 1, 0, 'F', "\t$rsc_$task_$interval@$node=$rc - Fail the specified task while running the simulation"}, {"set-datetime", 1, 0, 't', "Set date/time"}, {"quorum", 1, 0, 'q', "\tSpecify a value for quorum"}, {"ticket-grant", 1, 0, 'g', "Grant a ticket"}, {"ticket-revoke", 1, 0, 'r', "Revoke a ticket"}, {"ticket-standby", 1, 0, 'b', "Make a ticket standby"}, {"ticket-activate", 1, 0, 'e', "Activate a ticket"}, {"-spacer-", 0, 0, '-', "\nOutput Options:"}, {"save-input", 1, 0, 'I', "\tSave the input configuration to the named file"}, {"save-output", 1, 0, 'O', "Save the output configuration to the named file"}, {"save-graph", 1, 0, 'G', "\tSave the transition graph (XML format) to the named file"}, {"save-dotfile", 1, 0, 'D', "Save the transition graph (DOT format) to the named file"}, {"all-actions", 0, 0, 'a', "\tDisplay all possible actions in the DOT graph - even ones not part of the transition"}, {"-spacer-", 0, 0, '-', "\nData Source:"}, {"live-check", 0, 0, 'L', "\tConnect to the CIB and use the current contents as input"}, {"xml-file", 1, 0, 'x', "\tRetrieve XML from the named file"}, {"xml-pipe", 0, 0, 'p', "\tRetrieve XML from stdin"}, {0, 0, 0, 0} }; /* *INDENT-ON* */ static void profile_one(const char *xml_file) { xmlNode *cib_object = NULL; pe_working_set_t data_set; printf("* Testing %s\n", xml_file); cib_object = filename2xml(xml_file); if (get_object_root(XML_CIB_TAG_STATUS, cib_object) == NULL) { create_xml_node(cib_object, XML_CIB_TAG_STATUS); } if (cli_config_update(&cib_object, NULL, FALSE) == FALSE) { free_xml(cib_object); return; } if (validate_xml(cib_object, NULL, FALSE) != TRUE) { free_xml(cib_object); return; } set_working_set_defaults(&data_set); data_set.input = cib_object; data_set.now = get_date(); do_calculations(&data_set, cib_object, NULL); cleanup_alloc_calculations(&data_set); } #ifndef FILENAME_MAX # define FILENAME_MAX 512 #endif static int profile_all(const char *dir) { struct dirent **namelist; int lpc = 0; int file_num = scandir(dir, &namelist, 0, alphasort); if (file_num > 0) { struct stat prop; char buffer[FILENAME_MAX + 1]; while (file_num--) { if ('.' == namelist[file_num]->d_name[0]) { free(namelist[file_num]); continue; } else if (strstr(namelist[file_num]->d_name, ".xml") == NULL) { free(namelist[file_num]); continue; } lpc++; snprintf(buffer, FILENAME_MAX, "%s/%s", dir, namelist[file_num]->d_name); if (stat(buffer, &prop) == 0 && S_ISREG(prop.st_mode)) { profile_one(buffer); } free(namelist[file_num]); } free(namelist); } return lpc; } int main(int argc, char **argv) { int rc = 0; guint modified = 0; gboolean store = FALSE; gboolean process = FALSE; gboolean verbose = FALSE; gboolean simulate = FALSE; gboolean all_actions = FALSE; gboolean have_stdout = FALSE; pe_working_set_t data_set; const char *xml_file = "-"; const char *quorum = NULL; const char *test_dir = NULL; const char *dot_file = NULL; const char *graph_file = NULL; const char *input_file = NULL; const char *output_file = NULL; int flag = 0; int index = 0; int argerr = 0; GListPtr node_up = NULL; GListPtr node_down = NULL; GListPtr node_fail = NULL; GListPtr op_inject = NULL; GListPtr ticket_grant = NULL; GListPtr ticket_revoke = NULL; GListPtr ticket_standby = NULL; GListPtr ticket_activate = NULL; xmlNode *input = NULL; crm_log_cli_init("crm_simulate"); crm_set_options(NULL, "datasource operation [additional options]", long_options, "Tool for simulating the cluster's response to events"); if (argc < 2) { crm_help('?', EX_USAGE); } while (1) { flag = crm_get_option(argc, argv, &index); if (flag == -1) break; switch (flag) { case 'V': verbose = TRUE; if(have_stdout == FALSE) { /* Redirect stderr to stdout so we can grep the output */ have_stdout = TRUE; close(STDERR_FILENO); dup2(STDOUT_FILENO, STDERR_FILENO); } crm_bump_log_level(); break; case '?': case '$': crm_help(flag, EX_OK); break; case 'p': xml_file = "-"; break; case 'Q': quiet = TRUE; break; case 'L': xml_file = NULL; break; case 'x': xml_file = optarg; break; case 'u': modified++; node_up = g_list_append(node_up, optarg); break; case 'd': modified++; node_down = g_list_append(node_down, optarg); break; case 'f': modified++; node_fail = g_list_append(node_fail, optarg); break; case 't': use_date = strdup(optarg); break; case 'i': modified++; op_inject = g_list_append(op_inject, optarg); break; case 'F': process = TRUE; simulate = TRUE; op_fail = g_list_append(op_fail, optarg); break; case 'q': modified++; quorum = optarg; break; case 'g': modified++; ticket_grant = g_list_append(ticket_grant, optarg); break; case 'r': modified++; ticket_revoke = g_list_append(ticket_revoke, optarg); break; case 'b': modified++; ticket_standby = g_list_append(ticket_standby, optarg); break; case 'e': modified++; ticket_activate = g_list_append(ticket_activate, optarg); break; case 'a': all_actions = TRUE; break; case 's': process = TRUE; show_scores = TRUE; break; case 'U': process = TRUE; show_utilization = TRUE; break; case 'S': process = TRUE; simulate = TRUE; break; case 'X': store = TRUE; process = TRUE; simulate = TRUE; break; case 'R': process = TRUE; break; case 'D': process = TRUE; dot_file = optarg; break; case 'G': process = TRUE; graph_file = optarg; break; case 'I': input_file = optarg; break; case 'O': output_file = optarg; break; case 'P': test_dir = optarg; break; default: ++argerr; break; } } if (optind > argc) { ++argerr; } if (argerr) { crm_help('?', EX_USAGE); } if(test_dir != NULL) { return profile_all(test_dir); } setup_input(xml_file, store ? xml_file : output_file); global_cib = cib_new(); global_cib->cmds->signon(global_cib, crm_system_name, cib_command); set_working_set_defaults(&data_set); if (data_set.now != NULL) { quiet_log(" + Setting effective cluster time: %s", use_date); log_date(LOG_WARNING, "Set fake 'now' to", data_set.now, ha_log_date | ha_log_time); } rc = global_cib->cmds->query(global_cib, NULL, &input, cib_sync_call | cib_scope_local); CRM_ASSERT(rc == pcmk_ok); data_set.input = input; data_set.now = get_date(); cluster_status(&data_set); if (quiet == FALSE) { quiet_log("\nCurrent cluster status:\n"); print_cluster_status(&data_set); } if (modified) { quiet_log("Performing requested modifications\n"); modify_configuration(&data_set, quorum, node_up, node_down, node_fail, op_inject, ticket_grant, ticket_revoke, ticket_standby, ticket_activate); rc = global_cib->cmds->query(global_cib, NULL, &input, cib_sync_call); if (rc != pcmk_ok) { fprintf(stderr, "Could not connect to the CIB for input: %s\n", pcmk_strerror(rc)); goto done; } cleanup_alloc_calculations(&data_set); data_set.now = get_date(); data_set.input = input; } if (input_file != NULL) { rc = write_xml_file(input, input_file, FALSE); if (rc < 0) { fprintf(stderr, "Could not create '%s': %s\n", input_file, strerror(errno)); goto done; } } rc = 0; if (process || simulate) { ha_time_t *local_date = NULL; if (show_scores && show_utilization) { printf("Allocation scores and utilization information:\n"); } else if (show_scores) { fprintf(stdout, "Allocation scores:\n"); } else if (show_utilization) { printf("Utilization information:\n"); } do_calculations(&data_set, input, local_date); input = NULL; /* Don't try and free it twice */ if (graph_file != NULL) { char *msg_buffer = dump_xml_formatted(data_set.graph); FILE *graph_strm = fopen(graph_file, "w"); if (graph_strm == NULL) { crm_perror(LOG_ERR, "Could not open %s for writing", graph_file); } else { if (fprintf(graph_strm, "%s\n\n", msg_buffer) < 0) { crm_perror(LOG_ERR, "Write to %s failed", graph_file); } fflush(graph_strm); fclose(graph_strm); } free(msg_buffer); } if (dot_file != NULL) { create_dotfile(&data_set, dot_file, all_actions); } if (quiet == FALSE) { GListPtr gIter = NULL; quiet_log("%sTransition Summary:\n", show_scores || show_utilization || modified ? "\n" : ""); fflush(stdout); for (gIter = data_set.resources; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; LogActions(rsc, &data_set, TRUE); } } } if (simulate) { rc = run_simulation(&data_set); } done: cleanup_alloc_calculations(&data_set); global_cib->cmds->signoff(global_cib); cib_delete(global_cib); free(use_date); crm_xml_cleanup(); fflush(stderr); qb_log_fini(); return rc; }