Page Menu
Home
ClusterLabs Projects
Search
Configure Global Search
Log In
Files
F3687429
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
20 KB
Referenced Files
None
Subscribers
None
View Options
diff --git a/daemons/controld/controld_membership.c b/daemons/controld/controld_membership.c
index b4515727b0..94cd0caf20 100644
--- a/daemons/controld/controld_membership.c
+++ b/daemons/controld/controld_membership.c
@@ -1,449 +1,444 @@
/*
* Copyright 2004-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
/* put these first so that uuid_t is defined without conflicts */
#include <crm_internal.h>
#include <string.h>
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <crm/common/xml_internal.h>
#include <crm/cluster/internal.h>
#include <pacemaker-controld.h>
void post_cache_update(int instance);
-int last_peer_update = 0;
-
extern gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source);
static void
reap_dead_nodes(gpointer key, gpointer value, gpointer user_data)
{
crm_node_t *node = value;
if (crm_is_peer_active(node) == FALSE) {
crm_update_peer_join(__func__, node, crm_join_none);
if(node && node->uname) {
if (pcmk__str_eq(fsa_our_uname, node->uname, pcmk__str_casei)) {
crm_err("We're not part of the cluster anymore");
register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL);
} else if (AM_I_DC == FALSE && pcmk__str_eq(node->uname, fsa_our_dc, pcmk__str_casei)) {
crm_warn("Our DC node (%s) left the cluster", node->uname);
register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL);
}
}
if (fsa_state == S_INTEGRATION || fsa_state == S_FINALIZE_JOIN) {
check_join_state(fsa_state, __func__);
}
if(node && node->uuid) {
fail_incompletable_actions(transition_graph, node->uuid);
}
}
}
gboolean ever_had_quorum = FALSE;
void
post_cache_update(int instance)
{
xmlNode *no_op = NULL;
crm_peer_seq = instance;
crm_debug("Updated cache after membership event %d.", instance);
g_hash_table_foreach(crm_peer_cache, reap_dead_nodes, NULL);
controld_set_fsa_input_flags(R_MEMBERSHIP);
if (AM_I_DC) {
populate_cib_nodes(node_update_quick | node_update_cluster | node_update_peer |
node_update_expected, __func__);
}
/*
* If we lost nodes, we should re-check the election status
* Safe to call outside of an election
*/
controld_set_fsa_action_flags(A_ELECTION_CHECK);
trigger_fsa();
/* Membership changed, remind everyone we're here.
* This will aid detection of duplicate DCs
*/
no_op = create_request(CRM_OP_NOOP, NULL, NULL, CRM_SYSTEM_CRMD,
AM_I_DC ? CRM_SYSTEM_DC : CRM_SYSTEM_CRMD, NULL);
send_cluster_message(NULL, crm_msg_crmd, no_op, FALSE);
free_xml(no_op);
}
static void
crmd_node_update_complete(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
fsa_data_t *msg_data = NULL;
- last_peer_update = 0;
-
if (rc == pcmk_ok) {
crm_trace("Node update %d complete", call_id);
} else if(call_id < pcmk_ok) {
crm_err("Node update failed: %s (%d)", pcmk_strerror(call_id), call_id);
crm_log_xml_debug(msg, "failed");
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
} else {
crm_err("Node update %d failed: %s (%d)", call_id, pcmk_strerror(rc), rc);
crm_log_xml_debug(msg, "failed");
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
}
}
/*!
* \internal
* \brief Create an XML node state tag with updates
*
* \param[in,out] node Node whose state will be used for update
* \param[in] flags Bitmask of node_update_flags indicating what to update
* \param[in,out] parent XML node to contain update (or NULL)
* \param[in] source Who requested the update (only used for logging)
*
* \return Pointer to created node state tag
*/
xmlNode *
create_node_state_update(crm_node_t *node, int flags, xmlNode *parent,
const char *source)
{
const char *value = NULL;
xmlNode *node_state;
if (!node->state) {
crm_info("Node update for %s cancelled: no state, not seen yet", node->uname);
return NULL;
}
node_state = create_xml_node(parent, XML_CIB_TAG_STATE);
if (pcmk_is_set(node->flags, crm_remote_node)) {
pcmk__xe_set_bool_attr(node_state, XML_NODE_IS_REMOTE, true);
}
set_uuid(node_state, XML_ATTR_UUID, node);
if (crm_element_value(node_state, XML_ATTR_UUID) == NULL) {
crm_info("Node update for %s cancelled: no id", node->uname);
free_xml(node_state);
return NULL;
}
crm_xml_add(node_state, XML_ATTR_UNAME, node->uname);
if ((flags & node_update_cluster) && node->state) {
pcmk__xe_set_bool_attr(node_state, XML_NODE_IN_CLUSTER,
pcmk__str_eq(node->state, CRM_NODE_MEMBER, pcmk__str_casei));
}
if (!pcmk_is_set(node->flags, crm_remote_node)) {
if (flags & node_update_peer) {
value = OFFLINESTATUS;
if (pcmk_is_set(node->processes, crm_get_cluster_proc())) {
value = ONLINESTATUS;
}
crm_xml_add(node_state, XML_NODE_IS_PEER, value);
}
if (flags & node_update_join) {
if (node->join <= crm_join_none) {
value = CRMD_JOINSTATE_DOWN;
} else {
value = CRMD_JOINSTATE_MEMBER;
}
crm_xml_add(node_state, XML_NODE_JOIN_STATE, value);
}
if (flags & node_update_expected) {
crm_xml_add(node_state, XML_NODE_EXPECTED, node->expected);
}
}
crm_xml_add(node_state, XML_ATTR_ORIGIN, source);
return node_state;
}
static void
remove_conflicting_node_callback(xmlNode * msg, int call_id, int rc,
xmlNode * output, void *user_data)
{
char *node_uuid = user_data;
do_crm_log_unlikely(rc == 0 ? LOG_DEBUG : LOG_NOTICE,
"Deletion of the unknown conflicting node \"%s\": %s (rc=%d)",
node_uuid, pcmk_strerror(rc), rc);
}
static void
search_conflicting_node_callback(xmlNode * msg, int call_id, int rc,
xmlNode * output, void *user_data)
{
char *new_node_uuid = user_data;
xmlNode *node_xml = NULL;
if (rc != pcmk_ok) {
if (rc != -ENXIO) {
crm_notice("Searching conflicting nodes for %s failed: %s (%d)",
new_node_uuid, pcmk_strerror(rc), rc);
}
return;
} else if (output == NULL) {
return;
}
if (pcmk__str_eq(crm_element_name(output), XML_CIB_TAG_NODE, pcmk__str_casei)) {
node_xml = output;
} else {
node_xml = pcmk__xml_first_child(output);
}
for (; node_xml != NULL; node_xml = pcmk__xml_next(node_xml)) {
const char *node_uuid = NULL;
const char *node_uname = NULL;
GHashTableIter iter;
crm_node_t *node = NULL;
gboolean known = FALSE;
if (!pcmk__str_eq(crm_element_name(node_xml), XML_CIB_TAG_NODE, pcmk__str_casei)) {
continue;
}
node_uuid = crm_element_value(node_xml, XML_ATTR_ID);
node_uname = crm_element_value(node_xml, XML_ATTR_UNAME);
if (node_uuid == NULL || node_uname == NULL) {
continue;
}
g_hash_table_iter_init(&iter, crm_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
if (node->uuid
&& pcmk__str_eq(node->uuid, node_uuid, pcmk__str_casei)
&& node->uname
&& pcmk__str_eq(node->uname, node_uname, pcmk__str_casei)) {
known = TRUE;
break;
}
}
if (known == FALSE) {
int delete_call_id = 0;
xmlNode *node_state_xml = NULL;
crm_notice("Deleting unknown node %s/%s which has conflicting uname with %s",
node_uuid, node_uname, new_node_uuid);
delete_call_id = fsa_cib_conn->cmds->remove(fsa_cib_conn, XML_CIB_TAG_NODES, node_xml,
cib_scope_local | cib_quorum_override);
fsa_register_cib_callback(delete_call_id, FALSE, strdup(node_uuid),
remove_conflicting_node_callback);
node_state_xml = create_xml_node(NULL, XML_CIB_TAG_STATE);
crm_xml_add(node_state_xml, XML_ATTR_ID, node_uuid);
crm_xml_add(node_state_xml, XML_ATTR_UNAME, node_uname);
delete_call_id = fsa_cib_conn->cmds->remove(fsa_cib_conn, XML_CIB_TAG_STATUS, node_state_xml,
cib_scope_local | cib_quorum_override);
fsa_register_cib_callback(delete_call_id, FALSE, strdup(node_uuid),
remove_conflicting_node_callback);
free_xml(node_state_xml);
}
}
}
static void
node_list_update_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
fsa_data_t *msg_data = NULL;
if(call_id < pcmk_ok) {
crm_err("Node list update failed: %s (%d)", pcmk_strerror(call_id), call_id);
crm_log_xml_debug(msg, "update:failed");
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
} else if(rc < pcmk_ok) {
crm_err("Node update %d failed: %s (%d)", call_id, pcmk_strerror(rc), rc);
crm_log_xml_debug(msg, "update:failed");
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
}
}
void
populate_cib_nodes(enum node_update_flags flags, const char *source)
{
int call_id = 0;
gboolean from_hashtable = TRUE;
int call_options = cib_scope_local | cib_quorum_override;
xmlNode *node_list = create_xml_node(NULL, XML_CIB_TAG_NODES);
#if SUPPORT_COROSYNC
if (!pcmk_is_set(flags, node_update_quick) && is_corosync_cluster()) {
from_hashtable = pcmk__corosync_add_nodes(node_list);
}
#endif
if (from_hashtable) {
GHashTableIter iter;
crm_node_t *node = NULL;
GString *xpath = NULL;
g_hash_table_iter_init(&iter, crm_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
xmlNode *new_node = NULL;
if ((node->uuid != NULL) && (node->uname != NULL)) {
crm_trace("Creating node entry for %s/%s", node->uname, node->uuid);
if (xpath == NULL) {
xpath = g_string_sized_new(512);
} else {
g_string_truncate(xpath, 0);
}
/* We need both to be valid */
new_node = create_xml_node(node_list, XML_CIB_TAG_NODE);
crm_xml_add(new_node, XML_ATTR_ID, node->uuid);
crm_xml_add(new_node, XML_ATTR_UNAME, node->uname);
/* Search and remove unknown nodes with the conflicting uname from CIB */
pcmk__g_strcat(xpath,
"/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION
"/" XML_CIB_TAG_NODES "/" XML_CIB_TAG_NODE
"[@" XML_ATTR_UNAME "='", node->uname, "']"
"[@" XML_ATTR_ID "!='", node->uuid, "']", NULL);
call_id = fsa_cib_conn->cmds->query(fsa_cib_conn,
(const char *) xpath->str,
NULL,
cib_scope_local | cib_xpath);
fsa_register_cib_callback(call_id, FALSE, strdup(node->uuid),
search_conflicting_node_callback);
}
}
if (xpath != NULL) {
g_string_free(xpath, TRUE);
}
}
crm_trace("Populating <nodes> section from %s", from_hashtable ? "hashtable" : "cluster");
fsa_cib_update(XML_CIB_TAG_NODES, node_list, call_options, call_id, NULL);
fsa_register_cib_callback(call_id, FALSE, NULL, node_list_update_callback);
free_xml(node_list);
if (call_id >= pcmk_ok && crm_peer_cache != NULL && AM_I_DC) {
/*
* There is no need to update the local CIB with our values if
* we've not seen valid membership data
*/
GHashTableIter iter;
crm_node_t *node = NULL;
node_list = create_xml_node(NULL, XML_CIB_TAG_STATUS);
g_hash_table_iter_init(&iter, crm_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
create_node_state_update(node, flags, node_list, source);
}
if (crm_remote_peer_cache) {
g_hash_table_iter_init(&iter, crm_remote_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
create_node_state_update(node, flags, node_list, source);
}
}
fsa_cib_update(XML_CIB_TAG_STATUS, node_list, call_options, call_id, NULL);
fsa_register_cib_callback(call_id, FALSE, NULL, crmd_node_update_complete);
- last_peer_update = call_id;
free_xml(node_list);
}
}
static void
cib_quorum_update_complete(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
fsa_data_t *msg_data = NULL;
if (rc == pcmk_ok) {
crm_trace("Quorum update %d complete", call_id);
} else {
crm_err("Quorum update %d failed: %s (%d)", call_id, pcmk_strerror(rc), rc);
crm_log_xml_debug(msg, "failed");
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
}
}
void
crm_update_quorum(gboolean quorum, gboolean force_update)
{
ever_had_quorum |= quorum;
if(ever_had_quorum && quorum == FALSE && no_quorum_suicide_escalation) {
pcmk__panic(__func__);
}
if (AM_I_DC && (force_update || fsa_has_quorum != quorum)) {
int call_id = 0;
xmlNode *update = NULL;
int call_options = cib_scope_local | cib_quorum_override;
update = create_xml_node(NULL, XML_TAG_CIB);
crm_xml_add_int(update, XML_ATTR_HAVE_QUORUM, quorum);
crm_xml_add(update, XML_ATTR_DC_UUID, fsa_our_uuid);
fsa_cib_update(XML_TAG_CIB, update, call_options, call_id, NULL);
crm_debug("Updating quorum status to %s (call=%d)",
pcmk__btoa(quorum), call_id);
fsa_register_cib_callback(call_id, FALSE, NULL, cib_quorum_update_complete);
free_xml(update);
/* Quorum changes usually cause a new transition via other activity:
* quorum gained via a node joining will abort via the node join,
* and quorum lost via a node leaving will usually abort via resource
* activity and/or fencing.
*
* However, it is possible that nothing else causes a transition (e.g.
* someone forces quorum via corosync-cmaptcl, or quorum is lost due to
* a node in standby shutting down cleanly), so here ensure a new
* transition is triggered.
*/
if (quorum) {
/* If quorum was gained, abort after a short delay, in case multiple
* nodes are joining around the same time, so the one that brings us
* to quorum doesn't cause all the remaining ones to be fenced.
*/
abort_after_delay(INFINITY, pcmk__graph_restart, "Quorum gained",
5000);
} else {
abort_transition(INFINITY, pcmk__graph_restart, "Quorum lost",
NULL);
}
}
fsa_has_quorum = quorum;
}
diff --git a/daemons/controld/controld_utils.h b/daemons/controld/controld_utils.h
index a25a82a803..7e188a87cb 100644
--- a/daemons/controld/controld_utils.h
+++ b/daemons/controld/controld_utils.h
@@ -1,128 +1,127 @@
/*
* Copyright 2004-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef CRMD_UTILS__H
# define CRMD_UTILS__H
# include <crm/crm.h>
# include <crm/common/xml.h>
# include <crm/cib/internal.h> // PCMK__CIB_REQUEST_MODIFY
# include <controld_fsa.h> // fsa_cib_conn
# include <controld_alerts.h>
# define FAKE_TE_ID "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"
# define fsa_cib_update(section, data, options, call_id, user_name) \
if(fsa_cib_conn != NULL) { \
call_id = cib_internal_op( \
fsa_cib_conn, PCMK__CIB_REQUEST_MODIFY, NULL, section, data, \
NULL, options, user_name); \
\
} else { \
crm_err("No CIB manager connection available"); \
}
static inline void
fsa_cib_anon_update(const char *section, xmlNode *data) {
if (fsa_cib_conn == NULL) {
crm_err("No CIB connection available");
} else {
int opts = cib_scope_local | cib_quorum_override | cib_can_create;
fsa_cib_conn->cmds->modify(fsa_cib_conn, section, data, opts);
}
}
static inline void
fsa_cib_anon_update_discard_reply(const char *section, xmlNode *data) {
if (fsa_cib_conn == NULL) {
crm_err("No CIB connection available");
} else {
int opts = cib_scope_local | cib_quorum_override | cib_can_create | cib_discard_reply;
fsa_cib_conn->cmds->modify(fsa_cib_conn, section, data, opts);
}
}
extern gboolean fsa_has_quorum;
extern bool controld_shutdown_lock_enabled;
-extern int last_peer_update;
extern int last_resource_update;
enum node_update_flags {
node_update_none = 0x0000,
node_update_quick = 0x0001,
node_update_cluster = 0x0010,
node_update_peer = 0x0020,
node_update_join = 0x0040,
node_update_expected = 0x0100,
node_update_all = node_update_cluster|node_update_peer|node_update_join|node_update_expected,
};
crm_exit_t crmd_exit(crm_exit_t exit_code);
_Noreturn void crmd_fast_exit(crm_exit_t exit_code);
void controld_shutdown_schedulerd_ipc(void);
void controld_stop_sched_timer(void);
void controld_free_sched_timer(void);
void controld_expect_sched_reply(char *ref);
void fsa_dump_actions(uint64_t action, const char *text);
void fsa_dump_inputs(int log_level, const char *text, long long input_register);
gboolean update_dc(xmlNode * msg);
void crm_update_peer_join(const char *source, crm_node_t * node, enum crm_join_phase phase);
xmlNode *create_node_state_update(crm_node_t *node, int flags,
xmlNode *parent, const char *source);
void populate_cib_nodes(enum node_update_flags flags, const char *source);
void crm_update_quorum(gboolean quorum, gboolean force_update);
void controld_close_attrd_ipc(void);
void update_attrd(const char *host, const char *name, const char *value, const char *user_name, gboolean is_remote_node);
void update_attrd_list(GList *attrs, uint32_t opts);
void update_attrd_remote_node_removed(const char *host, const char *user_name);
void update_attrd_clear_failures(const char *host, const char *rsc,
const char *op, const char *interval_spec,
gboolean is_remote_node);
int crmd_join_phase_count(enum crm_join_phase phase);
void crmd_join_phase_log(int level);
void crmd_peer_down(crm_node_t *peer, bool full);
unsigned int cib_op_timeout(void);
bool feature_set_compatible(const char *dc_version, const char *join_version);
bool controld_action_is_recordable(const char *action);
// Subsections of node_state
enum controld_section_e {
controld_section_lrm,
controld_section_lrm_unlocked,
controld_section_attrs,
controld_section_all,
controld_section_all_unlocked
};
void controld_delete_node_state(const char *uname,
enum controld_section_e section, int options);
int controld_delete_resource_history(const char *rsc_id, const char *node,
const char *user_name, int call_options);
const char *get_node_id(xmlNode *lrm_rsc_op);
/* Convenience macro for registering a CIB callback
* (assumes that data can be freed with free())
*/
# define fsa_register_cib_callback(id, flag, data, fn) do { \
CRM_ASSERT(fsa_cib_conn); \
fsa_cib_conn->cmds->register_callback_full( \
fsa_cib_conn, id, cib_op_timeout(), \
flag, data, #fn, fn, free); \
} while(0)
#endif
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Mon, Apr 21, 7:24 PM (12 h, 40 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1663656
Default Alt Text
(20 KB)
Attached To
Mode
rP Pacemaker
Attached
Detach File
Event Timeline
Log In to Comment