diff --git a/daemons/controld/controld_schedulerd.c b/daemons/controld/controld_schedulerd.c index e5d5f69b08..4b53aaa972 100644 --- a/daemons/controld/controld_schedulerd.c +++ b/daemons/controld/controld_schedulerd.c @@ -1,384 +1,385 @@ /* * Copyright 2004-2018 Andrew Beekhof * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include /* pid_t, sleep, ssize_t */ #include #include #include #include #include #include #include #include /* register_fsa_error_adv */ static mainloop_io_t *pe_subsystem = NULL; /*! * \internal * \brief Close any scheduler connection and free associated memory */ void pe_subsystem_free(void) { if (pe_subsystem) { mainloop_del_ipc_client(pe_subsystem); pe_subsystem = NULL; } } /*! * \internal * \brief Save CIB query result to file, raising FSA error * * \param[in] msg Ignored * \param[in] call_id Call ID of CIB query * \param[in] rc Return code of CIB query * \param[in] output Result of CIB query * \param[in] user_data Unique identifier for filename (will be freed) * * \note This is intended to be called after a scheduler connection fails. */ static void save_cib_contents(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) { char *id = user_data; register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __FUNCTION__); CRM_CHECK(id != NULL, return); if (rc == pcmk_ok) { char *filename = crm_strdup_printf(PE_STATE_DIR "/pe-core-%s.bz2", id); if (write_xml_file(output, filename, TRUE) < 0) { crm_err("Could not save Cluster Information Base to %s after scheduler crash", filename); } else { crm_notice("Saved Cluster Information Base to %s after scheduler crash", filename); } free(filename); } } /*! * \internal * \brief Respond to scheduler connection failure * * \param[in] user_data Ignored */ static void pe_ipc_destroy(gpointer user_data) { if (is_set(fsa_input_register, R_PE_REQUIRED)) { int rc = pcmk_ok; char *uuid_str = crm_generate_uuid(); crm_crit("Connection to the scheduler failed " CRM_XS " uuid=%s", uuid_str); /* * The scheduler died... * * Save the current CIB so that we have a chance of * figuring out what killed it. * * Delay raising the I_ERROR until the query below completes or * 5s is up, whichever comes first. * */ rc = fsa_cib_conn->cmds->query(fsa_cib_conn, NULL, NULL, cib_scope_local); fsa_register_cib_callback(rc, FALSE, uuid_str, save_cib_contents); } else { crm_info("Connection to the scheduler released"); } clear_bit(fsa_input_register, R_PE_CONNECTED); pe_subsystem = NULL; mainloop_set_trigger(fsa_source); return; } /*! * \internal * \brief Handle message from scheduler connection * * \param[in] buffer XML message (will be freed) * \param[in] length Ignored * \param[in] userdata Ignored * * \return 0 */ static int pe_ipc_dispatch(const char *buffer, ssize_t length, gpointer userdata) { xmlNode *msg = string2xml(buffer); if (msg) { route_message(C_IPC_MESSAGE, msg); } free_xml(msg); return 0; } /*! * \internal * \brief Make new connection to PE * * \return TRUE on success, FALSE otherwise */ static bool pe_subsystem_new() { static struct ipc_client_callbacks pe_callbacks = { .dispatch = pe_ipc_dispatch, .destroy = pe_ipc_destroy }; pe_subsystem = mainloop_add_ipc_client(CRM_SYSTEM_PENGINE, G_PRIORITY_DEFAULT, 5 * 1024 * 1024 /* 5MB */, NULL, &pe_callbacks); return (pe_subsystem != NULL); } /*! * \internal * \brief Send an XML message to the PE * * \param[in] cmd XML message to send * * \return pcmk_ok on success, -errno otherwise */ static int pe_subsystem_send(xmlNode *cmd) { if (pe_subsystem) { int sent = crm_ipc_send(mainloop_get_ipc_client(pe_subsystem), cmd, 0, 0, NULL); if (sent == 0) { sent = -ENODATA; } else if (sent > 0) { sent = pcmk_ok; } return sent; } return -ENOTCONN; } static void do_pe_invoke_callback(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data); /* A_PE_START, A_PE_STOP, O_PE_RESTART */ void do_pe_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { if (action & A_PE_STOP) { clear_bit(fsa_input_register, R_PE_REQUIRED); pe_subsystem_free(); clear_bit(fsa_input_register, R_PE_CONNECTED); } if ((action & A_PE_START) && (is_set(fsa_input_register, R_PE_CONNECTED) == FALSE)) { if (cur_state != S_STOPPING) { set_bit(fsa_input_register, R_PE_REQUIRED); if (pe_subsystem_new()) { set_bit(fsa_input_register, R_PE_CONNECTED); } else { crm_warn("Could not connect to scheduler"); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } } else { crm_info("Ignoring request to connect to scheduler while shutting down"); } } } int fsa_pe_query = 0; char *fsa_pe_ref = NULL; /* A_PE_INVOKE */ void do_pe_invoke(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { if (AM_I_DC == FALSE) { crm_err("Not invoking scheduler because not DC: %s", fsa_action2string(action)); return; } if (is_set(fsa_input_register, R_PE_CONNECTED) == FALSE) { if (is_set(fsa_input_register, R_SHUTDOWN)) { crm_err("Cannot shut down gracefully without the scheduler"); register_fsa_input_before(C_FSA_INTERNAL, I_TERMINATE, NULL); } else { crm_info("Waiting for the scheduler to connect"); crmd_fsa_stall(FALSE); register_fsa_action(A_PE_START); } return; } if (cur_state != S_POLICY_ENGINE) { crm_notice("Not invoking scheduler because in state %s", fsa_state2string(cur_state)); return; } if (is_set(fsa_input_register, R_HAVE_CIB) == FALSE) { crm_err("Attempted to invoke scheduler without consistent Cluster Information Base!"); /* start the join from scratch */ register_fsa_input_before(C_FSA_INTERNAL, I_ELECTION, NULL); return; } fsa_pe_query = fsa_cib_conn->cmds->query(fsa_cib_conn, NULL, NULL, cib_scope_local); crm_debug("Query %d: Requesting the current CIB: %s", fsa_pe_query, fsa_state2string(fsa_state)); /* Make sure any queued calculations are discarded */ free(fsa_pe_ref); fsa_pe_ref = NULL; fsa_register_cib_callback(fsa_pe_query, FALSE, NULL, do_pe_invoke_callback); } static void force_local_option(xmlNode *xml, const char *attr_name, const char *attr_value) { int max = 0; int lpc = 0; char *xpath_string = NULL; xmlXPathObjectPtr xpathObj = NULL; xpath_string = crm_strdup_printf("%.128s//%s//nvpair[@name='%.128s']", get_object_path(XML_CIB_TAG_CRMCONFIG), XML_CIB_TAG_PROPSET, attr_name); xpathObj = xpath_search(xml, xpath_string); max = numXpathResults(xpathObj); free(xpath_string); for (lpc = 0; lpc < max; lpc++) { xmlNode *match = getXpathResult(xpathObj, lpc); crm_trace("Forcing %s/%s = %s", ID(match), attr_name, attr_value); crm_xml_add(match, XML_NVPAIR_ATTR_VALUE, attr_value); } if(max == 0) { xmlNode *configuration = NULL; xmlNode *crm_config = NULL; xmlNode *cluster_property_set = NULL; crm_trace("Creating %s-%s for %s=%s", CIB_OPTIONS_FIRST, attr_name, attr_name, attr_value); configuration = find_entity(xml, XML_CIB_TAG_CONFIGURATION, NULL); if (configuration == NULL) { configuration = create_xml_node(xml, XML_CIB_TAG_CONFIGURATION); } crm_config = find_entity(configuration, XML_CIB_TAG_CRMCONFIG, NULL); if (crm_config == NULL) { crm_config = create_xml_node(configuration, XML_CIB_TAG_CRMCONFIG); } cluster_property_set = find_entity(crm_config, XML_CIB_TAG_PROPSET, NULL); if (cluster_property_set == NULL) { cluster_property_set = create_xml_node(crm_config, XML_CIB_TAG_PROPSET); crm_xml_add(cluster_property_set, XML_ATTR_ID, CIB_OPTIONS_FIRST); } xml = create_xml_node(cluster_property_set, XML_CIB_TAG_NVPAIR); crm_xml_set_id(xml, "%s-%s", CIB_OPTIONS_FIRST, attr_name); crm_xml_add(xml, XML_NVPAIR_ATTR_NAME, attr_name); crm_xml_add(xml, XML_NVPAIR_ATTR_VALUE, attr_value); } freeXpathObject(xpathObj); } static void do_pe_invoke_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { xmlNode *cmd = NULL; pid_t watchdog = pcmk_locate_sbd(); if (rc != pcmk_ok) { crm_err("Could not retrieve the Cluster Information Base: %s " CRM_XS " rc=%d call=%d", pcmk_strerror(rc), rc, call_id); register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __FUNCTION__); return; } else if (call_id != fsa_pe_query) { crm_trace("Skipping superseded CIB query: %d (current=%d)", call_id, fsa_pe_query); return; } else if (AM_I_DC == FALSE || is_set(fsa_input_register, R_PE_CONNECTED) == FALSE) { crm_debug("No need to invoke the scheduler anymore"); return; } else if (fsa_state != S_POLICY_ENGINE) { crm_debug("Discarding scheduler request in state: %s", fsa_state2string(fsa_state)); return; /* this callback counts as 1 */ } else if (num_cib_op_callbacks() > 1) { crm_debug("Re-asking for the CIB: %d other peer updates still pending", (num_cib_op_callbacks() - 1)); sleep(1); register_fsa_action(A_PE_INVOKE); return; } else if (fsa_state != S_POLICY_ENGINE) { crm_err("Invoking scheduler in state: %s", fsa_state2string(fsa_state)); return; } CRM_LOG_ASSERT(output != NULL); - // Refresh the remote node cache when the scheduler is invoked - crm_remote_peer_cache_refresh(output); + /* Refresh the remote node cache and the known node cache when the + * scheduler is invoked */ + crm_peer_caches_refresh(output); crm_xml_add(output, XML_ATTR_DC_UUID, fsa_our_uuid); crm_xml_add_int(output, XML_ATTR_HAVE_QUORUM, fsa_has_quorum); force_local_option(output, XML_ATTR_HAVE_WATCHDOG, watchdog?"true":"false"); if (ever_had_quorum && crm_have_quorum == FALSE) { crm_xml_add_int(output, XML_ATTR_QUORUM_PANIC, 1); } cmd = create_request(CRM_OP_PECALC, output, NULL, CRM_SYSTEM_PENGINE, CRM_SYSTEM_DC, NULL); free(fsa_pe_ref); fsa_pe_ref = crm_element_value_copy(cmd, XML_ATTR_REFERENCE); rc = pe_subsystem_send(cmd); if (rc < 0) { crm_err("Could not contact the scheduler: %s " CRM_XS " rc=%d", pcmk_strerror(rc), rc); register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __FUNCTION__); } crm_debug("Invoking the scheduler: query=%d, ref=%s, seq=%llu, quorate=%d", fsa_pe_query, fsa_pe_ref, crm_peer_seq, fsa_has_quorum); free_xml(cmd); } diff --git a/daemons/controld/controld_te_utils.c b/daemons/controld/controld_te_utils.c index 3f538b9bc2..5606ed654a 100644 --- a/daemons/controld/controld_te_utils.c +++ b/daemons/controld/controld_te_utils.c @@ -1,718 +1,718 @@ /* * Copyright 2004-2018 Andrew Beekhof * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include crm_trigger_t *stonith_reconnect = NULL; static crm_trigger_t *stonith_history_sync_trigger = NULL; static mainloop_timer_t *stonith_history_sync_timer = NULL; /* * stonith cleanup list * * If the DC is shot, proper notifications might not go out. * The stonith cleanup list allows the cluster to (re-)send * notifications once a new DC is elected. */ static GListPtr stonith_cleanup_list = NULL; /*! * \internal * \brief Add a node to the stonith cleanup list * * \param[in] target Name of node to add */ void add_stonith_cleanup(const char *target) { stonith_cleanup_list = g_list_append(stonith_cleanup_list, strdup(target)); } /*! * \internal * \brief Remove a node from the stonith cleanup list * * \param[in] Name of node to remove */ void remove_stonith_cleanup(const char *target) { GListPtr iter = stonith_cleanup_list; while (iter != NULL) { GListPtr tmp = iter; char *iter_name = tmp->data; iter = iter->next; if (safe_str_eq(target, iter_name)) { crm_trace("Removing %s from the cleanup list", iter_name); stonith_cleanup_list = g_list_delete_link(stonith_cleanup_list, tmp); free(iter_name); } } } /*! * \internal * \brief Purge all entries from the stonith cleanup list */ void purge_stonith_cleanup() { if (stonith_cleanup_list) { GListPtr iter = NULL; for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) { char *target = iter->data; crm_info("Purging %s from stonith cleanup list", target); free(target); } g_list_free(stonith_cleanup_list); stonith_cleanup_list = NULL; } } /*! * \internal * \brief Send stonith updates for all entries in cleanup list, then purge it */ void execute_stonith_cleanup() { GListPtr iter; for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) { char *target = iter->data; crm_node_t *target_node = crm_get_peer(0, target); const char *uuid = crm_peer_uuid(target_node); crm_notice("Marking %s, target of a previous stonith action, as clean", target); send_stonith_update(NULL, target, uuid); free(target); } g_list_free(stonith_cleanup_list); stonith_cleanup_list = NULL; } /* end stonith cleanup list functions */ static gboolean fail_incompletable_stonith(crm_graph_t * graph) { GListPtr lpc = NULL; const char *task = NULL; xmlNode *last_action = NULL; if (graph == NULL) { return FALSE; } for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) { GListPtr lpc2 = NULL; synapse_t *synapse = (synapse_t *) lpc->data; if (synapse->confirmed) { continue; } for (lpc2 = synapse->actions; lpc2 != NULL; lpc2 = lpc2->next) { crm_action_t *action = (crm_action_t *) lpc2->data; if (action->type != action_type_crm || action->confirmed) { continue; } task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); if (task && safe_str_eq(task, CRM_OP_FENCE)) { action->failed = TRUE; last_action = action->xml; update_graph(graph, action); crm_notice("Failing action %d (%s): fencer terminated", action->id, ID(action->xml)); } } } if (last_action != NULL) { crm_warn("Fencer failure resulted in unrunnable actions"); abort_for_stonith_failure(tg_restart, NULL, last_action); return TRUE; } return FALSE; } static void tengine_stonith_connection_destroy(stonith_t * st, stonith_event_t * e) { if (is_set(fsa_input_register, R_ST_REQUIRED)) { crm_crit("Fencing daemon connection failed"); mainloop_set_trigger(stonith_reconnect); } else { crm_info("Fencing daemon disconnected"); } /* cbchan will be garbage at this point, arrange for it to be reset */ if(stonith_api) { stonith_api->state = stonith_disconnected; } if (AM_I_DC) { fail_incompletable_stonith(transition_graph); trigger_graph(); } } char *te_client_id = NULL; #ifdef HAVE_SYS_REBOOT_H # include # include #endif static void tengine_stonith_notify(stonith_t * st, stonith_event_t * st_event) { if(te_client_id == NULL) { te_client_id = crm_strdup_printf("%s.%lu", crm_system_name, (unsigned long) getpid()); } if (st_event == NULL) { crm_err("Notify data not found"); return; } crmd_alert_fencing_op(st_event); if (st_event->result == pcmk_ok && safe_str_eq("on", st_event->action)) { crm_notice("%s was successfully unfenced by %s (at the request of %s)", st_event->target, st_event->executioner ? st_event->executioner : "", st_event->origin); /* TODO: Hook up st_event->device */ return; } else if (safe_str_eq("on", st_event->action)) { crm_err("Unfencing of %s by %s failed: %s (%d)", st_event->target, st_event->executioner ? st_event->executioner : "", pcmk_strerror(st_event->result), st_event->result); return; } else if (st_event->result == pcmk_ok && crm_str_eq(st_event->target, fsa_our_uname, TRUE)) { crm_crit("We were allegedly just fenced by %s for %s!", st_event->executioner ? st_event->executioner : "", st_event->origin); /* Dumps blackbox if enabled */ qb_log_fini(); /* Try to get the above log message to disk - somehow */ /* Get out ASAP and do not come back up. * * Triggering a reboot is also not the worst idea either since * the rest of the cluster thinks we're safely down */ #ifdef RB_HALT_SYSTEM reboot(RB_HALT_SYSTEM); #endif /* * If reboot() fails or is not supported, coming back up will * probably lead to a situation where the other nodes set our * status to 'lost' because of the fencing callback and will * discard subsequent election votes with: * * Election 87 (current: 5171, owner: 103): Processed vote from east-03 (Peer is not part of our cluster) * * So just stay dead, something is seriously messed up anyway. * */ exit(CRM_EX_FATAL); // None of our wrappers since we already called qb_log_fini() return; } /* Update the count of stonith failures for this target, in case we become * DC later. The current DC has already updated its fail count in * tengine_stonith_callback(). */ if (!AM_I_DC && safe_str_eq(st_event->operation, T_STONITH_NOTIFY_FENCE)) { if (st_event->result == pcmk_ok) { st_fail_count_reset(st_event->target); } else { st_fail_count_increment(st_event->target); } } crm_notice("Peer %s was%s terminated (%s) by %s on behalf of %s: %s " CRM_XS " initiator=%s ref=%s", st_event->target, st_event->result == pcmk_ok ? "" : " not", st_event->action, st_event->executioner ? st_event->executioner : "", (st_event->client_origin? st_event->client_origin : ""), pcmk_strerror(st_event->result), st_event->origin, st_event->id); if (st_event->result == pcmk_ok) { - crm_node_t *peer = crm_find_peer_full(0, st_event->target, CRM_GET_PEER_ANY); + crm_node_t *peer = crm_find_known_peer_full(0, st_event->target, CRM_GET_PEER_ANY); const char *uuid = NULL; gboolean we_are_executioner = safe_str_eq(st_event->executioner, fsa_our_uname); if (peer == NULL) { return; } uuid = crm_peer_uuid(peer); crm_trace("target=%s dc=%s", st_event->target, fsa_our_dc); if(AM_I_DC) { /* The DC always sends updates */ send_stonith_update(NULL, st_event->target, uuid); /* @TODO Ideally, at this point, we'd check whether the fenced node * hosted any guest nodes, and call remote_node_down() for them. * Unfortunately, the controller doesn't have a simple, reliable way * to map hosts to guests. It might be possible to track this in the * peer cache via crm_remote_peer_cache_refresh(). For now, we rely * on the PE creating fence pseudo-events for the guests. */ if (st_event->client_origin && safe_str_neq(st_event->client_origin, te_client_id)) { /* Abort the current transition graph if it wasn't us * that invoked stonith to fence someone */ crm_info("External fencing operation from %s fenced %s", st_event->client_origin, st_event->target); abort_transition(INFINITY, tg_restart, "External Fencing Operation", NULL); } /* Assume it was our leader if we don't currently have one */ } else if (((fsa_our_dc == NULL) || safe_str_eq(fsa_our_dc, st_event->target)) && !is_set(peer->flags, crm_remote_node)) { crm_notice("Target %s our leader %s (recorded: %s)", fsa_our_dc ? "was" : "may have been", st_event->target, fsa_our_dc ? fsa_our_dc : ""); /* Given the CIB resyncing that occurs around elections, * have one node update the CIB now and, if the new DC is different, * have them do so too after the election */ if (we_are_executioner) { send_stonith_update(NULL, st_event->target, uuid); } add_stonith_cleanup(st_event->target); } /* If the target is a remote node, and we host its connection, * immediately fail all monitors so it can be recovered quickly. * The connection won't necessarily drop when a remote node is fenced, * so the failure might not otherwise be detected until the next poke. */ if (is_set(peer->flags, crm_remote_node)) { remote_ra_fail(st_event->target); } crmd_peer_down(peer, TRUE); } } static gboolean do_stonith_history_sync(gpointer user_data) { if (stonith_api && (stonith_api->state != stonith_disconnected)) { stonith_history_t *history = NULL; stonith_api->cmds->history(stonith_api, st_opt_sync_call | st_opt_broadcast, NULL, &history, 5); stonith_history_free(history); return TRUE; } else { crm_info("Skip triggering stonith history-sync as stonith is disconnected"); return FALSE; } } static gboolean stonith_history_sync_set_trigger(gpointer user_data) { mainloop_set_trigger(stonith_history_sync_trigger); return FALSE; } void te_trigger_stonith_history_sync(void) { /* trigger a sync in 5s to give more nodes the * chance to show up so that we don't create * unnecessary stonith-history-sync traffic */ /* as we are finally checking the stonith-connection * in do_stonith_history_sync we should be fine * leaving stonith_history_sync_time & stonith_history_sync_trigger * around */ if (stonith_history_sync_trigger == NULL) { stonith_history_sync_trigger = mainloop_add_trigger(G_PRIORITY_LOW, do_stonith_history_sync, NULL); } if(stonith_history_sync_timer == NULL) { stonith_history_sync_timer = mainloop_timer_add("history_sync", 5000, FALSE, stonith_history_sync_set_trigger, NULL); } crm_info("Fence history will be synchronized cluster-wide within 5 seconds"); mainloop_timer_start(stonith_history_sync_timer); } gboolean te_connect_stonith(gpointer user_data) { int lpc = 0; int rc = pcmk_ok; if (stonith_api == NULL) { stonith_api = stonith_api_new(); } if (stonith_api->state != stonith_disconnected) { crm_trace("Still connected"); return TRUE; } for (lpc = 0; lpc < 30; lpc++) { crm_debug("Attempting connection to fencing daemon..."); sleep(1); rc = stonith_api->cmds->connect(stonith_api, crm_system_name, NULL); if (rc == pcmk_ok) { break; } if (user_data != NULL) { if (is_set(fsa_input_register, R_ST_REQUIRED)) { crm_err("Sign-in failed: triggered a retry"); mainloop_set_trigger(stonith_reconnect); } else { crm_info("Sign-in failed, but no longer required"); } return TRUE; } crm_err("Sign-in failed: pausing and trying again in 2s..."); sleep(1); } CRM_CHECK(rc == pcmk_ok, return TRUE); /* If not, we failed 30 times... just get out */ stonith_api->cmds->register_notification(stonith_api, T_STONITH_NOTIFY_DISCONNECT, tengine_stonith_connection_destroy); stonith_api->cmds->register_notification(stonith_api, T_STONITH_NOTIFY_FENCE, tengine_stonith_notify); crm_trace("Connected"); return TRUE; } gboolean stop_te_timer(crm_action_timer_t * timer) { if (timer == NULL) { return FALSE; } if (timer->source_id != 0) { crm_trace("Stopping action timer"); g_source_remove(timer->source_id); timer->source_id = 0; } else { crm_trace("Action timer was already stopped"); return FALSE; } return TRUE; } gboolean te_graph_trigger(gpointer user_data) { enum transition_status graph_rc = -1; if (transition_graph == NULL) { crm_debug("Nothing to do"); return TRUE; } crm_trace("Invoking graph %d in state %s", transition_graph->id, fsa_state2string(fsa_state)); switch (fsa_state) { case S_STARTING: case S_PENDING: case S_NOT_DC: case S_HALT: case S_ILLEGAL: case S_STOPPING: case S_TERMINATE: return TRUE; break; default: break; } if (transition_graph->complete == FALSE) { int limit = transition_graph->batch_limit; transition_graph->batch_limit = throttle_get_total_job_limit(limit); graph_rc = run_graph(transition_graph); transition_graph->batch_limit = limit; /* Restore the configured value */ /* significant overhead... */ /* print_graph(LOG_TRACE, transition_graph); */ if (graph_rc == transition_active) { crm_trace("Transition not yet complete"); return TRUE; } else if (graph_rc == transition_pending) { crm_trace("Transition not yet complete - no actions fired"); return TRUE; } if (graph_rc != transition_complete) { crm_warn("Transition failed: %s", transition_status(graph_rc)); print_graph(LOG_NOTICE, transition_graph); } } crm_debug("Transition %d is now complete", transition_graph->id); transition_graph->complete = TRUE; notify_crmd(transition_graph); return TRUE; } void trigger_graph_processing(const char *fn, int line) { crm_trace("%s:%d - Triggered graph processing", fn, line); mainloop_set_trigger(transition_trigger); } static struct abort_timer_s { bool aborted; guint id; int priority; enum transition_action action; const char *text; } abort_timer = { 0, }; static gboolean abort_timer_popped(gpointer data) { if (AM_I_DC && (abort_timer.aborted == FALSE)) { abort_transition(abort_timer.priority, abort_timer.action, abort_timer.text, NULL); } abort_timer.id = 0; return FALSE; // do not immediately reschedule timer } /*! * \internal * \brief Abort transition after delay, if not already aborted in that time * * \param[in] abort_text Must be literal string */ void abort_after_delay(int abort_priority, enum transition_action abort_action, const char *abort_text, guint delay_ms) { if (abort_timer.id) { // Timer already in progress, stop and reschedule g_source_remove(abort_timer.id); } abort_timer.aborted = FALSE; abort_timer.priority = abort_priority; abort_timer.action = abort_action; abort_timer.text = abort_text; abort_timer.id = g_timeout_add(delay_ms, abort_timer_popped, NULL); } void abort_transition_graph(int abort_priority, enum transition_action abort_action, const char *abort_text, xmlNode * reason, const char *fn, int line) { int add[] = { 0, 0, 0 }; int del[] = { 0, 0, 0 }; int level = LOG_INFO; xmlNode *diff = NULL; xmlNode *change = NULL; CRM_CHECK(transition_graph != NULL, return); switch (fsa_state) { case S_STARTING: case S_PENDING: case S_NOT_DC: case S_HALT: case S_ILLEGAL: case S_STOPPING: case S_TERMINATE: crm_info("Abort %s suppressed: state=%s (complete=%d)", abort_text, fsa_state2string(fsa_state), transition_graph->complete); return; default: break; } abort_timer.aborted = TRUE; /* Make sure any queued calculations are discarded ASAP */ free(fsa_pe_ref); fsa_pe_ref = NULL; if (transition_graph->complete == FALSE) { if(update_abort_priority(transition_graph, abort_priority, abort_action, abort_text)) { level = LOG_NOTICE; } } if(reason) { xmlNode *search = NULL; for(search = reason; search; search = search->parent) { if (safe_str_eq(XML_TAG_DIFF, TYPE(search))) { diff = search; break; } } if(diff) { xml_patch_versions(diff, add, del); for(search = reason; search; search = search->parent) { if (safe_str_eq(XML_DIFF_CHANGE, TYPE(search))) { change = search; break; } } } } if(reason == NULL) { do_crm_log(level, "Transition %d aborted: %s "CRM_XS" source=%s:%d complete=%s", transition_graph->id, abort_text, fn, line, (transition_graph->complete? "true" : "false")); } else if(change == NULL) { char *local_path = xml_get_path(reason); do_crm_log(level, "Transition %d aborted by %s.%s: %s " CRM_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s", transition_graph->id, TYPE(reason), ID(reason), abort_text, add[0], add[1], add[2], fn, line, local_path, (transition_graph->complete? "true" : "false")); free(local_path); } else { const char *kind = NULL; const char *op = crm_element_value(change, XML_DIFF_OP); const char *path = crm_element_value(change, XML_DIFF_PATH); if(change == reason) { if(strcmp(op, "create") == 0) { reason = reason->children; } else if(strcmp(op, "modify") == 0) { reason = first_named_child(reason, XML_DIFF_RESULT); if(reason) { reason = reason->children; } } } kind = TYPE(reason); if(strcmp(op, "delete") == 0) { const char *shortpath = strrchr(path, '/'); do_crm_log(level, "Transition %d aborted by deletion of %s: %s " CRM_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s", transition_graph->id, (shortpath? (shortpath + 1) : path), abort_text, add[0], add[1], add[2], fn, line, path, (transition_graph->complete? "true" : "false")); } else if (safe_str_eq(XML_CIB_TAG_NVPAIR, kind)) { do_crm_log(level, "Transition %d aborted by %s doing %s %s=%s: %s " CRM_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s", transition_graph->id, crm_element_value(reason, XML_ATTR_ID), op, crm_element_value(reason, XML_NVPAIR_ATTR_NAME), crm_element_value(reason, XML_NVPAIR_ATTR_VALUE), abort_text, add[0], add[1], add[2], fn, line, path, (transition_graph->complete? "true" : "false")); } else if (safe_str_eq(XML_LRM_TAG_RSC_OP, kind)) { const char *magic = crm_element_value(reason, XML_ATTR_TRANSITION_MAGIC); do_crm_log(level, "Transition %d aborted by operation %s '%s' on %s: %s " CRM_XS " magic=%s cib=%d.%d.%d source=%s:%d complete=%s", transition_graph->id, crm_element_value(reason, XML_LRM_ATTR_TASK_KEY), op, crm_element_value(reason, XML_LRM_ATTR_TARGET), abort_text, magic, add[0], add[1], add[2], fn, line, (transition_graph->complete? "true" : "false")); } else if (safe_str_eq(XML_CIB_TAG_STATE, kind) || safe_str_eq(XML_CIB_TAG_NODE, kind)) { const char *uname = crm_peer_uname(ID(reason)); do_crm_log(level, "Transition %d aborted by %s '%s' on %s: %s " CRM_XS " cib=%d.%d.%d source=%s:%d complete=%s", transition_graph->id, kind, op, (uname? uname : ID(reason)), abort_text, add[0], add[1], add[2], fn, line, (transition_graph->complete? "true" : "false")); } else { const char *id = ID(reason); do_crm_log(level, "Transition %d aborted by %s.%s '%s': %s " CRM_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s", transition_graph->id, TYPE(reason), (id? id : ""), (op? op : "change"), abort_text, add[0], add[1], add[2], fn, line, path, (transition_graph->complete? "true" : "false")); } } if (transition_graph->complete) { if (transition_timer->period_ms > 0) { crm_timer_stop(transition_timer); crm_timer_start(transition_timer); } else { register_fsa_input(C_FSA_INTERNAL, I_PE_CALC, NULL); } return; } mainloop_set_trigger(transition_trigger); } diff --git a/include/crm/cluster/internal.h b/include/crm/cluster/internal.h index 369f22700c..12bf41ab02 100644 --- a/include/crm/cluster/internal.h +++ b/include/crm/cluster/internal.h @@ -1,332 +1,335 @@ /* * Copyright 2004-2018 Andrew Beekhof * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef CRM_CLUSTER_INTERNAL__H # define CRM_CLUSTER_INTERNAL__H # include typedef struct crm_ais_host_s AIS_Host; typedef struct crm_ais_msg_s AIS_Message; struct crm_ais_host_s { uint32_t id; uint32_t pid; gboolean local; enum crm_ais_msg_types type; uint32_t size; char uname[MAX_NAME]; } __attribute__ ((packed)); struct crm_ais_msg_s { cs_ipc_header_response_t header __attribute__ ((aligned(8))); uint32_t id; gboolean is_compressed; AIS_Host host; AIS_Host sender; uint32_t size; uint32_t compressed_size; /* 584 bytes */ char data[0]; } __attribute__ ((packed)); /* *INDENT-OFF* */ enum crm_proc_flag { crm_proc_none = 0x00000001, // Cluster layers crm_proc_cpg = 0x04000000, // Daemons crm_proc_execd = 0x00000010, crm_proc_based = 0x00000100, crm_proc_controld = 0x00000200, crm_proc_attrd = 0x00001000, crm_proc_schedulerd = 0x00010000, crm_proc_fenced = 0x00100000, }; /* *INDENT-ON* */ /*! * \internal * \brief Return the process bit corresponding to the current cluster stack * * \return Process flag if detectable, otherwise 0 */ static inline uint32_t crm_get_cluster_proc() { switch (get_cluster_type()) { case pcmk_cluster_corosync: return crm_proc_cpg; default: break; } return crm_proc_none; } static inline const char * peer2text(enum crm_proc_flag proc) { const char *text = "unknown"; switch (proc) { case crm_proc_none: text = "none"; break; case crm_proc_based: text = "pacemaker-based"; break; case crm_proc_controld: text = "pacemaker-controld"; break; case crm_proc_schedulerd: text = "pacemaker-schedulerd"; break; case crm_proc_execd: text = "pacemaker-execd"; break; case crm_proc_attrd: text = "pacemaker-attrd"; break; case crm_proc_fenced: text = "pacemaker-fenced"; break; case crm_proc_cpg: text = "corosync-cpg"; break; } return text; } static inline const char * ais_dest(const AIS_Host *host) { if (host->local) { return "local"; } else if (host->size > 0) { return host->uname; } else { return ""; } } # define ais_data_len(msg) (msg->is_compressed?msg->compressed_size:msg->size) /* typedef enum { CS_OK = 1, CS_ERR_LIBRARY = 2, CS_ERR_VERSION = 3, CS_ERR_INIT = 4, CS_ERR_TIMEOUT = 5, CS_ERR_TRY_AGAIN = 6, CS_ERR_INVALID_PARAM = 7, CS_ERR_NO_MEMORY = 8, CS_ERR_BAD_HANDLE = 9, CS_ERR_BUSY = 10, CS_ERR_ACCESS = 11, CS_ERR_NOT_EXIST = 12, CS_ERR_NAME_TOO_LONG = 13, CS_ERR_EXIST = 14, CS_ERR_NO_SPACE = 15, CS_ERR_INTERRUPT = 16, CS_ERR_NAME_NOT_FOUND = 17, CS_ERR_NO_RESOURCES = 18, CS_ERR_NOT_SUPPORTED = 19, CS_ERR_BAD_OPERATION = 20, CS_ERR_FAILED_OPERATION = 21, CS_ERR_MESSAGE_ERROR = 22, CS_ERR_QUEUE_FULL = 23, CS_ERR_QUEUE_NOT_AVAILABLE = 24, CS_ERR_BAD_FLAGS = 25, CS_ERR_TOO_BIG = 26, CS_ERR_NO_SECTIONS = 27, CS_ERR_CONTEXT_NOT_FOUND = 28, CS_ERR_TOO_MANY_GROUPS = 30, CS_ERR_SECURITY = 100 } cs_error_t; */ static inline const char * ais_error2text(int error) { const char *text = "unknown"; # if SUPPORT_COROSYNC switch (error) { case CS_OK: text = "OK"; break; case CS_ERR_LIBRARY: text = "Library error"; break; case CS_ERR_VERSION: text = "Version error"; break; case CS_ERR_INIT: text = "Initialization error"; break; case CS_ERR_TIMEOUT: text = "Timeout"; break; case CS_ERR_TRY_AGAIN: text = "Try again"; break; case CS_ERR_INVALID_PARAM: text = "Invalid parameter"; break; case CS_ERR_NO_MEMORY: text = "No memory"; break; case CS_ERR_BAD_HANDLE: text = "Bad handle"; break; case CS_ERR_BUSY: text = "Busy"; break; case CS_ERR_ACCESS: text = "Access error"; break; case CS_ERR_NOT_EXIST: text = "Doesn't exist"; break; case CS_ERR_NAME_TOO_LONG: text = "Name too long"; break; case CS_ERR_EXIST: text = "Exists"; break; case CS_ERR_NO_SPACE: text = "No space"; break; case CS_ERR_INTERRUPT: text = "Interrupt"; break; case CS_ERR_NAME_NOT_FOUND: text = "Name not found"; break; case CS_ERR_NO_RESOURCES: text = "No resources"; break; case CS_ERR_NOT_SUPPORTED: text = "Not supported"; break; case CS_ERR_BAD_OPERATION: text = "Bad operation"; break; case CS_ERR_FAILED_OPERATION: text = "Failed operation"; break; case CS_ERR_MESSAGE_ERROR: text = "Message error"; break; case CS_ERR_QUEUE_FULL: text = "Queue full"; break; case CS_ERR_QUEUE_NOT_AVAILABLE: text = "Queue not available"; break; case CS_ERR_BAD_FLAGS: text = "Bad flags"; break; case CS_ERR_TOO_BIG: text = "Too big"; break; case CS_ERR_NO_SECTIONS: text = "No sections"; break; } # endif return text; } static inline const char * msg_type2text(enum crm_ais_msg_types type) { const char *text = "unknown"; switch (type) { case crm_msg_none: text = "unknown"; break; case crm_msg_ais: text = "ais"; break; case crm_msg_cib: text = "cib"; break; case crm_msg_crmd: text = "crmd"; break; case crm_msg_pe: text = "pengine"; break; case crm_msg_te: text = "tengine"; break; case crm_msg_lrmd: text = "lrmd"; break; case crm_msg_attrd: text = "attrd"; break; case crm_msg_stonithd: text = "stonithd"; break; case crm_msg_stonith_ng: text = "stonith-ng"; break; } return text; } gboolean check_message_sanity(const AIS_Message * msg, const char *data); # if SUPPORT_COROSYNC gboolean send_cpg_iov(struct iovec * iov); char *get_corosync_uuid(crm_node_t *peer); char *corosync_node_name(uint64_t /*cmap_handle_t */ cmap_handle, uint32_t nodeid); char *corosync_cluster_name(void); int corosync_cmap_has_config(const char *prefix); gboolean corosync_initialize_nodelist(void *cluster, gboolean force_member, xmlNode * xml_parent); gboolean send_cluster_message_cs(xmlNode * msg, gboolean local, crm_node_t * node, enum crm_ais_msg_types dest); enum cluster_type_e find_corosync_variant(void); void terminate_cs_connection(crm_cluster_t * cluster); gboolean init_cs_connection(crm_cluster_t * cluster); gboolean init_cs_connection_once(crm_cluster_t * cluster); # endif crm_node_t *crm_update_peer_proc(const char *source, crm_node_t * peer, uint32_t flag, const char *status); crm_node_t *crm_update_peer_state(const char *source, crm_node_t * node, const char *state, int membership); void crm_update_peer_uname(crm_node_t *node, const char *uname); void crm_update_peer_expected(const char *source, crm_node_t * node, const char *expected); void crm_reap_unseen_nodes(uint64_t ring_id); gboolean cluster_connect_quorum(gboolean(*dispatch) (unsigned long long, gboolean), void (*destroy) (gpointer)); gboolean node_name_is_valid(const char *key, const char *name); crm_node_t * crm_find_peer_full(unsigned int id, const char *uname, int flags); crm_node_t * crm_find_peer(unsigned int id, const char *uname); +void crm_peer_caches_refresh(xmlNode *cib); +crm_node_t *crm_find_known_peer_full(unsigned int id, const char *uname, int flags); + #endif diff --git a/lib/cluster/membership.c b/lib/cluster/membership.c index a487e762a8..c364f39cda 100644 --- a/lib/cluster/membership.c +++ b/lib/cluster/membership.c @@ -1,1003 +1,1180 @@ /* * Copyright 2004-2018 Andrew Beekhof * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #ifndef _GNU_SOURCE # define _GNU_SOURCE #endif #include #include #include #include #include #include #include #include #include #include #define s_if_plural(i) (((i) == 1)? "" : "s") /* The peer cache remembers cluster nodes that have been seen. * This is managed mostly automatically by libcluster, based on * cluster membership events. * * Because cluster nodes can have conflicting names or UUIDs, * the hash table key is a uniquely generated ID. */ GHashTable *crm_peer_cache = NULL; /* * The remote peer cache tracks pacemaker_remote nodes. While the * value has the same type as the peer cache's, it is tracked separately for * three reasons: pacemaker_remote nodes can't have conflicting names or UUIDs, * so the name (which is also the UUID) is used as the hash table key; there * is no equivalent of membership events, so management is not automatic; and * most users of the peer cache need to exclude pacemaker_remote nodes. * * That said, using a single cache would be more logical and less error-prone, * so it would be a good idea to merge them one day. * * libcluster provides two avenues for populating the cache: * crm_remote_peer_get() and crm_remote_peer_cache_remove() directly manage it, * while crm_remote_peer_cache_refresh() populates it via the CIB. */ GHashTable *crm_remote_peer_cache = NULL; +GHashTable *crm_known_peer_cache = NULL; + unsigned long long crm_peer_seq = 0; gboolean crm_have_quorum = FALSE; static gboolean crm_autoreap = TRUE; int crm_remote_peer_cache_size(void) { if (crm_remote_peer_cache == NULL) { return 0; } return g_hash_table_size(crm_remote_peer_cache); } /*! * \brief Get a remote node peer cache entry, creating it if necessary * * \param[in] node_name Name of remote node * * \return Cache entry for node on success, NULL (and set errno) otherwise * * \note When creating a new entry, this will leave the node state undetermined, * so the caller should also call crm_update_peer_state() if the state is * known. */ crm_node_t * crm_remote_peer_get(const char *node_name) { crm_node_t *node; if (node_name == NULL) { errno = -EINVAL; return NULL; } /* Return existing cache entry if one exists */ node = g_hash_table_lookup(crm_remote_peer_cache, node_name); if (node) { return node; } /* Allocate a new entry */ node = calloc(1, sizeof(crm_node_t)); if (node == NULL) { return NULL; } /* Populate the essential information */ node->flags = crm_remote_node; node->uuid = strdup(node_name); if (node->uuid == NULL) { free(node); errno = -ENOMEM; return NULL; } /* Add the new entry to the cache */ g_hash_table_replace(crm_remote_peer_cache, node->uuid, node); crm_trace("added %s to remote cache", node_name); /* Update the entry's uname, ensuring peer status callbacks are called */ crm_update_peer_uname(node, node_name); return node; } void crm_remote_peer_cache_remove(const char *node_name) { if (g_hash_table_remove(crm_remote_peer_cache, node_name)) { crm_trace("removed %s from remote peer cache", node_name); } } /*! * \internal * \brief Return node status based on a CIB status entry * * \param[in] node_state XML of node state * * \return CRM_NODE_LOST if XML_NODE_IN_CLUSTER is false in node_state, * CRM_NODE_MEMBER otherwise * \note Unlike most boolean XML attributes, this one defaults to true, for * backward compatibility with older controllers that don't set it. */ static const char * remote_state_from_cib(xmlNode *node_state) { const char *status; status = crm_element_value(node_state, XML_NODE_IN_CLUSTER); if (status && !crm_is_true(status)) { status = CRM_NODE_LOST; } else { status = CRM_NODE_MEMBER; } return status; } /* user data for looping through remote node xpath searches */ struct refresh_data { const char *field; /* XML attribute to check for node name */ gboolean has_state; /* whether to update node state based on XML */ }; /*! * \internal * \brief Process one pacemaker_remote node xpath search result * * \param[in] result XML search result * \param[in] user_data what to look for in the XML */ static void remote_cache_refresh_helper(xmlNode *result, void *user_data) { struct refresh_data *data = user_data; const char *remote = crm_element_value(result, data->field); const char *state = NULL; crm_node_t *node; CRM_CHECK(remote != NULL, return); /* Determine node's state, if the result has it */ if (data->has_state) { state = remote_state_from_cib(result); } /* Check whether cache already has entry for node */ node = g_hash_table_lookup(crm_remote_peer_cache, remote); if (node == NULL) { /* Node is not in cache, so add a new entry for it */ node = crm_remote_peer_get(remote); CRM_ASSERT(node); if (state) { crm_update_peer_state(__FUNCTION__, node, state, 0); } } else if (is_set(node->flags, crm_node_dirty)) { /* Node is in cache and hasn't been updated already, so mark it clean */ clear_bit(node->flags, crm_node_dirty); if (state) { crm_update_peer_state(__FUNCTION__, node, state, 0); } } } static void mark_dirty(gpointer key, gpointer value, gpointer user_data) { set_bit(((crm_node_t*)value)->flags, crm_node_dirty); } static gboolean is_dirty(gpointer key, gpointer value, gpointer user_data) { return is_set(((crm_node_t*)value)->flags, crm_node_dirty); } /* search string to find CIB resources entries for guest nodes */ #define XPATH_GUEST_NODE_CONFIG \ "//" XML_TAG_CIB "//" XML_CIB_TAG_CONFIGURATION "//" XML_CIB_TAG_RESOURCE \ "//" XML_TAG_META_SETS "//" XML_CIB_TAG_NVPAIR \ "[@name='" XML_RSC_ATTR_REMOTE_NODE "']" /* search string to find CIB resources entries for remote nodes */ #define XPATH_REMOTE_NODE_CONFIG \ "//" XML_TAG_CIB "//" XML_CIB_TAG_CONFIGURATION "//" XML_CIB_TAG_RESOURCE \ "[@type='remote'][@provider='pacemaker']" /* search string to find CIB node status entries for pacemaker_remote nodes */ #define XPATH_REMOTE_NODE_STATUS \ "//" XML_TAG_CIB "//" XML_CIB_TAG_STATUS "//" XML_CIB_TAG_STATE \ "[@" XML_NODE_IS_REMOTE "='true']" /*! * \brief Repopulate the remote peer cache based on CIB XML * * \param[in] xmlNode CIB XML to parse */ void crm_remote_peer_cache_refresh(xmlNode *cib) { struct refresh_data data; crm_peer_init(); /* First, we mark all existing cache entries as dirty, * so that later we can remove any that weren't in the CIB. * We don't empty the cache, because we need to detect changes in state. */ g_hash_table_foreach(crm_remote_peer_cache, mark_dirty, NULL); /* Look for guest nodes and remote nodes in the status section */ data.field = "id"; data.has_state = TRUE; crm_foreach_xpath_result(cib, XPATH_REMOTE_NODE_STATUS, remote_cache_refresh_helper, &data); /* Look for guest nodes and remote nodes in the configuration section, * because they may have just been added and not have a status entry yet. * In that case, the cached node state will be left NULL, so that the * peer status callback isn't called until we're sure the node started * successfully. */ data.field = "value"; data.has_state = FALSE; crm_foreach_xpath_result(cib, XPATH_GUEST_NODE_CONFIG, remote_cache_refresh_helper, &data); data.field = "id"; data.has_state = FALSE; crm_foreach_xpath_result(cib, XPATH_REMOTE_NODE_CONFIG, remote_cache_refresh_helper, &data); /* Remove all old cache entries that weren't seen in the CIB */ g_hash_table_foreach_remove(crm_remote_peer_cache, is_dirty, NULL); } gboolean crm_is_peer_active(const crm_node_t * node) { if(node == NULL) { return FALSE; } if (is_set(node->flags, crm_remote_node)) { /* remote nodes are never considered active members. This * guarantees they will never be considered for DC membership.*/ return FALSE; } #if SUPPORT_COROSYNC if (is_corosync_cluster()) { return crm_is_corosync_peer_active(node); } #endif crm_err("Unhandled cluster type: %s", name_for_cluster_type(get_cluster_type())); return FALSE; } static gboolean crm_reap_dead_member(gpointer key, gpointer value, gpointer user_data) { crm_node_t *node = value; crm_node_t *search = user_data; if (search == NULL) { return FALSE; } else if (search->id && node->id != search->id) { return FALSE; } else if (search->id == 0 && safe_str_neq(node->uname, search->uname)) { return FALSE; } else if (crm_is_peer_active(value) == FALSE) { crm_info("Removing node with name %s and id %u from membership cache", (node->uname? node->uname : "unknown"), node->id); return TRUE; } return FALSE; } /*! * \brief Remove all peer cache entries matching a node ID and/or uname * * \param[in] id ID of node to remove (or 0 to ignore) * \param[in] name Uname of node to remove (or NULL to ignore) * * \return Number of cache entries removed */ guint reap_crm_member(uint32_t id, const char *name) { int matches = 0; crm_node_t search; if (crm_peer_cache == NULL) { crm_trace("Membership cache not initialized, ignoring purge request"); return 0; } search.id = id; search.uname = name ? strdup(name) : NULL; matches = g_hash_table_foreach_remove(crm_peer_cache, crm_reap_dead_member, &search); if(matches) { crm_notice("Purged %d peer%s with id=%u%s%s from the membership cache", matches, s_if_plural(matches), search.id, (search.uname? " and/or uname=" : ""), (search.uname? search.uname : "")); } else { crm_info("No peers with id=%u%s%s to purge from the membership cache", search.id, (search.uname? " and/or uname=" : ""), (search.uname? search.uname : "")); } free(search.uname); return matches; } static void crm_count_peer(gpointer key, gpointer value, gpointer user_data) { guint *count = user_data; crm_node_t *node = value; if (crm_is_peer_active(node)) { *count = *count + 1; } } guint crm_active_peers(void) { guint count = 0; if (crm_peer_cache) { g_hash_table_foreach(crm_peer_cache, crm_count_peer, &count); } return count; } static void destroy_crm_node(gpointer data) { crm_node_t *node = data; crm_trace("Destroying entry for node %u: %s", node->id, node->uname); free(node->uname); free(node->state); free(node->uuid); free(node->expected); free(node); } void crm_peer_init(void) { if (crm_peer_cache == NULL) { crm_peer_cache = g_hash_table_new_full(crm_strcase_hash, crm_strcase_equal, free, destroy_crm_node); } if (crm_remote_peer_cache == NULL) { crm_remote_peer_cache = g_hash_table_new_full(crm_strcase_hash, crm_strcase_equal, NULL, destroy_crm_node); } + + if (crm_known_peer_cache == NULL) { + crm_known_peer_cache = g_hash_table_new_full(crm_strcase_hash, crm_strcase_equal, free, destroy_crm_node); + } } void crm_peer_destroy(void) { if (crm_peer_cache != NULL) { crm_trace("Destroying peer cache with %d members", g_hash_table_size(crm_peer_cache)); g_hash_table_destroy(crm_peer_cache); crm_peer_cache = NULL; } if (crm_remote_peer_cache != NULL) { crm_trace("Destroying remote peer cache with %d members", g_hash_table_size(crm_remote_peer_cache)); g_hash_table_destroy(crm_remote_peer_cache); crm_remote_peer_cache = NULL; } + + if (crm_known_peer_cache != NULL) { + crm_trace("Destroying known peer cache with %d members", g_hash_table_size(crm_known_peer_cache)); + g_hash_table_destroy(crm_known_peer_cache); + crm_known_peer_cache = NULL; + } + } void (*crm_status_callback) (enum crm_status_type, crm_node_t *, const void *) = NULL; /*! * \brief Set a client function that will be called after peer status changes * * \param[in] dispatch Pointer to function to use as callback * * \note Previously, client callbacks were responsible for peer cache * management. This is no longer the case, and client callbacks should do * only client-specific handling. Callbacks MUST NOT add or remove entries * in the peer caches. */ void crm_set_status_callback(void (*dispatch) (enum crm_status_type, crm_node_t *, const void *)) { crm_status_callback = dispatch; } /*! * \brief Tell the library whether to automatically reap lost nodes * * If TRUE (the default), calling crm_update_peer_proc() will also update the * peer state to CRM_NODE_MEMBER or CRM_NODE_LOST, and crm_update_peer_state() * will reap peers whose state changes to anything other than CRM_NODE_MEMBER. * Callers should leave this enabled unless they plan to manage the cache * separately on their own. * * \param[in] autoreap TRUE to enable automatic reaping, FALSE to disable */ void crm_set_autoreap(gboolean autoreap) { crm_autoreap = autoreap; } static void crm_dump_peer_hash(int level, const char *caller) { GHashTableIter iter; const char *id = NULL; crm_node_t *node = NULL; g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, (gpointer *) &id, (gpointer *) &node)) { do_crm_log(level, "%s: Node %u/%s = %p - %s", caller, node->id, node->uname, node, id); } } static gboolean crm_hash_find_by_data(gpointer key, gpointer value, gpointer user_data) { if(value == user_data) { return TRUE; } return FALSE; } crm_node_t * crm_find_peer_full(unsigned int id, const char *uname, int flags) { crm_node_t *node = NULL; CRM_ASSERT(id > 0 || uname != NULL); crm_peer_init(); if ((uname != NULL) && (flags & CRM_GET_PEER_REMOTE)) { node = g_hash_table_lookup(crm_remote_peer_cache, uname); } if (node == NULL && (flags & CRM_GET_PEER_CLUSTER)) { node = crm_find_peer(id, uname); } return node; } crm_node_t * crm_get_peer_full(unsigned int id, const char *uname, int flags) { crm_node_t *node = NULL; CRM_ASSERT(id > 0 || uname != NULL); crm_peer_init(); if (flags & CRM_GET_PEER_REMOTE) { node = g_hash_table_lookup(crm_remote_peer_cache, uname); } if (node == NULL && (flags & CRM_GET_PEER_CLUSTER)) { node = crm_get_peer(id, uname); } return node; } crm_node_t * crm_find_peer(unsigned int id, const char *uname) { GHashTableIter iter; crm_node_t *node = NULL; crm_node_t *by_id = NULL; crm_node_t *by_name = NULL; CRM_ASSERT(id > 0 || uname != NULL); crm_peer_init(); if (uname != NULL) { g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { if(node->uname && strcasecmp(node->uname, uname) == 0) { crm_trace("Name match: %s = %p", node->uname, node); by_name = node; break; } } } if (id > 0) { g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { if(node->id == id) { crm_trace("ID match: %u = %p", node->id, node); by_id = node; break; } } } node = by_id; /* Good default */ if(by_id == by_name) { /* Nothing to do if they match (both NULL counts) */ crm_trace("Consistent: %p for %u/%s", by_id, id, uname); } else if(by_id == NULL && by_name) { crm_trace("Only one: %p for %u/%s", by_name, id, uname); if(id && by_name->id) { crm_dump_peer_hash(LOG_WARNING, __FUNCTION__); crm_crit("Node %u and %u share the same name '%s'", id, by_name->id, uname); node = NULL; /* Create a new one */ } else { node = by_name; } } else if(by_name == NULL && by_id) { crm_trace("Only one: %p for %u/%s", by_id, id, uname); if(uname && by_id->uname) { crm_dump_peer_hash(LOG_WARNING, __FUNCTION__); crm_crit("Node '%s' and '%s' share the same cluster nodeid %u: assuming '%s' is correct", uname, by_id->uname, id, uname); } } else if(uname && by_id->uname) { if(safe_str_eq(uname, by_id->uname)) { crm_notice("Node '%s' has changed its ID from %u to %u", by_id->uname, by_name->id, by_id->id); g_hash_table_foreach_remove(crm_peer_cache, crm_hash_find_by_data, by_name); } else { crm_warn("Node '%s' and '%s' share the same cluster nodeid: %u %s", by_id->uname, by_name->uname, id, uname); crm_dump_peer_hash(LOG_INFO, __FUNCTION__); crm_abort(__FILE__, __FUNCTION__, __LINE__, "member weirdness", TRUE, TRUE); } } else if(id && by_name->id) { crm_warn("Node %u and %u share the same name: '%s'", by_id->id, by_name->id, uname); } else { /* Simple merge */ /* Only corosync based clusters use nodeid's * * The functions that call crm_update_peer_state() only know nodeid * so 'by_id' is authorative when merging * * Same for crm_update_peer_proc() */ crm_dump_peer_hash(LOG_DEBUG, __FUNCTION__); crm_info("Merging %p into %p", by_name, by_id); g_hash_table_foreach_remove(crm_peer_cache, crm_hash_find_by_data, by_name); } return node; } #if SUPPORT_COROSYNC static guint crm_remove_conflicting_peer(crm_node_t *node) { int matches = 0; GHashTableIter iter; crm_node_t *existing_node = NULL; if (node->id == 0 || node->uname == NULL) { return 0; } if (corosync_cmap_has_config("nodelist") != 0) { return 0; } g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &existing_node)) { if (existing_node->id > 0 && existing_node->id != node->id && existing_node->uname != NULL && strcasecmp(existing_node->uname, node->uname) == 0) { if (crm_is_peer_active(existing_node)) { continue; } crm_warn("Removing cached offline node %u/%s which has conflicting uname with %u", existing_node->id, existing_node->uname, node->id); g_hash_table_iter_remove(&iter); matches++; } } return matches; } #endif /* coverity[-alloc] Memory is referenced in one or both hashtables */ crm_node_t * crm_get_peer(unsigned int id, const char *uname) { crm_node_t *node = NULL; char *uname_lookup = NULL; CRM_ASSERT(id > 0 || uname != NULL); crm_peer_init(); node = crm_find_peer(id, uname); /* if uname wasn't provided, and find_peer did not turn up a uname based on id. * we need to do a lookup of the node name using the id in the cluster membership. */ if ((node == NULL || node->uname == NULL) && (uname == NULL)) { uname_lookup = get_node_name(id); } if (uname_lookup) { uname = uname_lookup; crm_trace("Inferred a name of '%s' for node %u", uname, id); /* try to turn up the node one more time now that we know the uname. */ if (node == NULL) { node = crm_find_peer(id, uname); } } if (node == NULL) { char *uniqueid = crm_generate_uuid(); node = calloc(1, sizeof(crm_node_t)); CRM_ASSERT(node); crm_info("Created entry %s/%p for node %s/%u (%d total)", uniqueid, node, uname, id, 1 + g_hash_table_size(crm_peer_cache)); g_hash_table_replace(crm_peer_cache, uniqueid, node); } if(id > 0 && uname && (node->id == 0 || node->uname == NULL)) { crm_info("Node %u is now known as %s", id, uname); } if(id > 0 && node->id == 0) { node->id = id; } if (uname && (node->uname == NULL)) { crm_update_peer_uname(node, uname); } if(node->uuid == NULL) { const char *uuid = crm_peer_uuid(node); if (uuid) { crm_info("Node %u has uuid %s", id, uuid); } else { crm_info("Cannot obtain a UUID for node %u/%s", id, node->uname); } } free(uname_lookup); return node; } /*! * \internal * \brief Update a node's uname * * \param[in] node Node object to update * \param[in] uname New name to set * * \note This function should not be called within a peer cache iteration, * because in some cases it can remove conflicting cache entries, * which would invalidate the iterator. */ void crm_update_peer_uname(crm_node_t *node, const char *uname) { CRM_CHECK(uname != NULL, crm_err("Bug: can't update node name without name"); return); CRM_CHECK(node != NULL, crm_err("Bug: can't update node name to %s without node", uname); return); if (safe_str_eq(uname, node->uname)) { crm_debug("Node uname '%s' did not change", uname); return; } for (const char *c = uname; *c; ++c) { if ((*c >= 'A') && (*c <= 'Z')) { crm_warn("Node names with capitals are discouraged, consider changing '%s'", uname); break; } } free(node->uname); node->uname = strdup(uname); CRM_ASSERT(node->uname != NULL); if (crm_status_callback) { crm_status_callback(crm_status_uname, node, NULL); } #if SUPPORT_COROSYNC if (is_corosync_cluster() && !is_set(node->flags, crm_remote_node)) { crm_remove_conflicting_peer(node); } #endif } /*! * \internal * \brief Update a node's process information (and potentially state) * * \param[in] source Caller's function name (for log messages) * \param[in] node Node object to update * \param[in] flag Bitmask of new process information * \param[in] status node status (online, offline, etc.) * * \return NULL if any node was reaped from peer caches, value of node otherwise * * \note If this function returns NULL, the supplied node object was likely * freed and should not be used again. This function should not be * called within a cache iteration if reaping is possible, otherwise * reaping could invalidate the iterator. */ crm_node_t * crm_update_peer_proc(const char *source, crm_node_t * node, uint32_t flag, const char *status) { uint32_t last = 0; gboolean changed = FALSE; CRM_CHECK(node != NULL, crm_err("%s: Could not set %s to %s for NULL", source, peer2text(flag), status); return NULL); /* Pacemaker doesn't spawn processes on remote nodes */ if (is_set(node->flags, crm_remote_node)) { return node; } last = node->processes; if (status == NULL) { node->processes = flag; if (node->processes != last) { changed = TRUE; } } else if (safe_str_eq(status, ONLINESTATUS)) { if ((node->processes & flag) != flag) { set_bit(node->processes, flag); changed = TRUE; } } else if (node->processes & flag) { clear_bit(node->processes, flag); changed = TRUE; } if (changed) { if (status == NULL && flag <= crm_proc_none) { crm_info("%s: Node %s[%u] - all processes are now offline", source, node->uname, node->id); } else { crm_info("%s: Node %s[%u] - %s is now %s", source, node->uname, node->id, peer2text(flag), status); } /* Call the client callback first, then update the peer state, * in case the node will be reaped */ if (crm_status_callback) { crm_status_callback(crm_status_processes, node, &last); } /* The client callback shouldn't touch the peer caches, * but as a safety net, bail if the peer cache was destroyed. */ if (crm_peer_cache == NULL) { return NULL; } if (crm_autoreap) { node = crm_update_peer_state(__FUNCTION__, node, is_set(node->processes, crm_get_cluster_proc())? CRM_NODE_MEMBER : CRM_NODE_LOST, 0); } } else { crm_trace("%s: Node %s[%u] - %s is unchanged (%s)", source, node->uname, node->id, peer2text(flag), status); } return node; } void crm_update_peer_expected(const char *source, crm_node_t * node, const char *expected) { char *last = NULL; gboolean changed = FALSE; CRM_CHECK(node != NULL, crm_err("%s: Could not set 'expected' to %s", source, expected); return); /* Remote nodes don't participate in joins */ if (is_set(node->flags, crm_remote_node)) { return; } last = node->expected; if (expected != NULL && safe_str_neq(node->expected, expected)) { node->expected = strdup(expected); changed = TRUE; } if (changed) { crm_info("%s: Node %s[%u] - expected state is now %s (was %s)", source, node->uname, node->id, expected, last); free(last); } else { crm_trace("%s: Node %s[%u] - expected state is unchanged (%s)", source, node->uname, node->id, expected); } } /*! * \internal * \brief Update a node's state and membership information * * \param[in] source Caller's function name (for log messages) * \param[in] node Node object to update * \param[in] state Node's new state * \param[in] membership Node's new membership ID * \param[in] iter If not NULL, pointer to node's peer cache iterator * * \return NULL if any node was reaped, value of node otherwise * * \note If this function returns NULL, the supplied node object was likely * freed and should not be used again. This function may be called from * within a peer cache iteration if the iterator is supplied. */ static crm_node_t * crm_update_peer_state_iter(const char *source, crm_node_t * node, const char *state, int membership, GHashTableIter *iter) { gboolean is_member; CRM_CHECK(node != NULL, crm_err("Could not set state for unknown host to %s" CRM_XS " source=%s", state, source); return NULL); is_member = safe_str_eq(state, CRM_NODE_MEMBER); if (is_member) { node->when_lost = 0; if (membership) { node->last_seen = membership; } } if (state && safe_str_neq(node->state, state)) { char *last = node->state; node->state = strdup(state); crm_notice("Node %s state is now %s " CRM_XS " nodeid=%u previous=%s source=%s", node->uname, state, node->id, (last? last : "unknown"), source); if (crm_status_callback) { crm_status_callback(crm_status_nstate, node, last); } free(last); if (crm_autoreap && !is_member && !is_set(node->flags, crm_remote_node)) { /* We only autoreap from the peer cache, not the remote peer cache, * because the latter should be managed only by * crm_remote_peer_cache_refresh(). */ if(iter) { crm_notice("Purged 1 peer with id=%u and/or uname=%s from the membership cache", node->id, node->uname); g_hash_table_iter_remove(iter); } else { reap_crm_member(node->id, node->uname); } node = NULL; } } else { crm_trace("Node %s state is unchanged (%s) " CRM_XS " nodeid=%u source=%s", node->uname, state, node->id, source); } return node; } /*! * \brief Update a node's state and membership information * * \param[in] source Caller's function name (for log messages) * \param[in] node Node object to update * \param[in] state Node's new state * \param[in] membership Node's new membership ID * * \return NULL if any node was reaped, value of node otherwise * * \note If this function returns NULL, the supplied node object was likely * freed and should not be used again. This function should not be * called within a cache iteration if reaping is possible, * otherwise reaping could invalidate the iterator. */ crm_node_t * crm_update_peer_state(const char *source, crm_node_t * node, const char *state, int membership) { return crm_update_peer_state_iter(source, node, state, membership, NULL); } /*! * \internal * \brief Reap all nodes from cache whose membership information does not match * * \param[in] membership Membership ID of nodes to keep */ void crm_reap_unseen_nodes(uint64_t membership) { GHashTableIter iter; crm_node_t *node = NULL; crm_trace("Reaping unseen nodes..."); g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *)&node)) { if (node->last_seen != membership) { if (node->state) { /* * Calling crm_update_peer_state_iter() allows us to * remove the node from crm_peer_cache without * invalidating our iterator */ crm_update_peer_state_iter(__FUNCTION__, node, CRM_NODE_LOST, membership, &iter); } else { crm_info("State of node %s[%u] is still unknown", node->uname, node->id); } } } } int crm_terminate_member(int nodeid, const char *uname, void *unused) { /* Always use the synchronous, non-mainloop version */ return stonith_api_kick(nodeid, uname, 120, TRUE); } int crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection) { return stonith_api_kick(nodeid, uname, 120, TRUE); } + +static crm_node_t * +crm_find_known_peer(const char *id, const char *uname) +{ + GHashTableIter iter; + crm_node_t *node = NULL; + crm_node_t *by_id = NULL; + crm_node_t *by_name = NULL; + + if (uname) { + g_hash_table_iter_init(&iter, crm_known_peer_cache); + while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { + if (node->uname && strcasecmp(node->uname, uname) == 0) { + crm_trace("Name match: %s = %p", node->uname, node); + by_name = node; + break; + } + } + } + + if (id) { + g_hash_table_iter_init(&iter, crm_known_peer_cache); + while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { + if(strcasecmp(node->uuid, id) == 0) { + crm_trace("ID match: %s= %p", id, node); + by_id = node; + break; + } + } + } + + node = by_id; /* Good default */ + if (by_id == by_name) { + /* Nothing to do if they match (both NULL counts) */ + crm_trace("Consistent: %p for %s/%s", by_id, id, uname); + + } else if (by_id == NULL && by_name) { + crm_trace("Only one: %p for %s/%s", by_name, id, uname); + + if (id) { + node = NULL; + + } else { + node = by_name; + } + + } else if (by_name == NULL && by_id) { + crm_trace("Only one: %p for %s/%s", by_id, id, uname); + + if (uname) { + node = NULL; + } + + } else if (uname && by_id->uname + && safe_str_eq(uname, by_id->uname)) { + /* Multiple nodes have the same uname in the CIB. + * Return by_id. */ + + } else if (id && by_name->uuid + && safe_str_eq(id, by_name->uuid)) { + /* Multiple nodes have the same id in the CIB. + * Return by_name. */ + node = by_name; + + } else { + node = NULL; + } + + if (node == NULL) { + crm_debug("Couldn't find node%s%s%s%s", + id? " " : "", + id? id : "", + uname? " with name " : "", + uname? uname : ""); + } + + return node; +} + +static void +known_peer_cache_refresh_helper(xmlNode *xml_node, void *user_data) +{ + const char *id = crm_element_value(xml_node, XML_ATTR_ID); + const char *uname = crm_element_value(xml_node, XML_ATTR_UNAME); + crm_node_t * node = NULL; + + CRM_CHECK(id != NULL && uname !=NULL, return); + node = crm_find_known_peer(id, uname); + + if (node == NULL) { + char *uniqueid = crm_generate_uuid(); + + node = calloc(1, sizeof(crm_node_t)); + CRM_ASSERT(node != NULL); + + node->uname = strdup(uname); + CRM_ASSERT(node->uname != NULL); + + node->uuid = strdup(id); + CRM_ASSERT(node->uuid != NULL); + + g_hash_table_replace(crm_known_peer_cache, uniqueid, node); + + } else if (is_set(node->flags, crm_node_dirty)) { + if (safe_str_neq(uname, node->uname)) { + free(node->uname); + node->uname = strdup(uname); + CRM_ASSERT(node->uname != NULL); + } + + /* Node is in cache and hasn't been updated already, so mark it clean */ + clear_bit(node->flags, crm_node_dirty); + } + +} + +#define XPATH_MEMBER_NODE_CONFIG \ + "//" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_NODES \ + "/" XML_CIB_TAG_NODE "[not(@type) or @type='member']" + +static void +crm_known_peer_cache_refresh(xmlNode *cib) +{ + crm_peer_init(); + + g_hash_table_foreach(crm_known_peer_cache, mark_dirty, NULL); + + crm_foreach_xpath_result(cib, XPATH_MEMBER_NODE_CONFIG, + known_peer_cache_refresh_helper, NULL); + + /* Remove all old cache entries that weren't seen in the CIB */ + g_hash_table_foreach_remove(crm_known_peer_cache, is_dirty, NULL); +} + +void +crm_peer_caches_refresh(xmlNode *cib) +{ + crm_remote_peer_cache_refresh(cib); + crm_known_peer_cache_refresh(cib); +} + +crm_node_t * +crm_find_known_peer_full(unsigned int id, const char *uname, int flags) +{ + crm_node_t *node = NULL; + char *id_str = NULL; + + CRM_ASSERT(id > 0 || uname != NULL); + + node = crm_find_peer_full(id, uname, flags); + + if (node || !(flags & CRM_GET_PEER_CLUSTER)) { + return node; + } + + if (id > 0) { + id_str = crm_strdup_printf("%u", id); + } + + node = crm_find_known_peer(id_str, uname); + + free(id_str); + return node; +}