diff --git a/lib/cluster/cluster.c b/lib/cluster/cluster.c index be322ac865..0e2e6246f9 100644 --- a/lib/cluster/cluster.c +++ b/lib/cluster/cluster.c @@ -1,519 +1,528 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include CRM_TRACE_INIT_DATA(cluster); #if SUPPORT_HEARTBEAT void *hb_library = NULL; #endif static GHashTable *crm_uuid_cache = NULL; static GHashTable *crm_uname_cache = NULL; static char * get_heartbeat_uuid(uint32_t unused, const char *uname) { char *uuid_calc = NULL; #if SUPPORT_HEARTBEAT cl_uuid_t uuid_raw; const char *unknown = "00000000-0000-0000-0000-000000000000"; if (heartbeat_cluster == NULL) { crm_warn("No connection to heartbeat, using uuid=uname"); return NULL; } if (heartbeat_cluster->llc_ops->get_uuid_by_name(heartbeat_cluster, uname, &uuid_raw) == HA_FAIL) { crm_err("get_uuid_by_name() call failed for host %s", uname); free(uuid_calc); return NULL; } uuid_calc = calloc(1, 50); cl_uuid_unparse(&uuid_raw, uuid_calc); if (safe_str_eq(uuid_calc, unknown)) { crm_warn("Could not calculate UUID for %s", uname); free(uuid_calc); return NULL; } #endif return uuid_calc; } static gboolean uname_is_uuid(void) { static const char *uuid_pref = NULL; if (uuid_pref == NULL) { uuid_pref = getenv("PCMK_uname_is_uuid"); } if (uuid_pref == NULL) { /* true is legacy mode */ uuid_pref = "false"; } return crm_is_true(uuid_pref); } int get_corosync_id(int id, const char *uuid) { if (id == 0 && !uname_is_uuid() && is_corosync_cluster()) { id = crm_atoi(uuid, "0"); } return id; } char * get_corosync_uuid(uint32_t id, const char *uname) { if (!uname_is_uuid() && is_corosync_cluster()) { if (id <= 0) { /* Try the membership cache... */ crm_node_t *node = g_hash_table_lookup(crm_peer_cache, uname); if (node != NULL) { id = node->id; } } if (id > 0) { - return crm_itoa(id); + int len = 32; + char *buffer = NULL; + + buffer = calloc(1, (len + 1)); + if (buffer != NULL) { + snprintf(buffer, len, "%u", id); + } + + return buffer; + } else { crm_warn("Node %s is not yet known by corosync", uname); } } else if (uname != NULL) { return strdup(uname); } return NULL; } void set_node_uuid(const char *uname, const char *uuid) { CRM_CHECK(uuid != NULL, return); CRM_CHECK(uname != NULL, return); if (crm_uuid_cache == NULL) { crm_uuid_cache = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); } g_hash_table_insert(crm_uuid_cache, strdup(uname), strdup(uuid)); } const char * get_node_uuid(uint32_t id, const char *uname) { char *uuid = NULL; enum cluster_type_e type = get_cluster_type(); if (crm_uuid_cache == NULL) { crm_uuid_cache = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); } /* avoid blocking heartbeat calls where possible */ if (uname) { uuid = g_hash_table_lookup(crm_uuid_cache, uname); } if (uuid != NULL) { return uuid; } switch (type) { case pcmk_cluster_corosync: uuid = get_corosync_uuid(id, uname); break; case pcmk_cluster_cman: case pcmk_cluster_classic_ais: if (uname) { uuid = strdup(uname); } break; case pcmk_cluster_heartbeat: uuid = get_heartbeat_uuid(id, uname); break; case pcmk_cluster_unknown: case pcmk_cluster_invalid: crm_err("Unsupported cluster type"); break; } if (uuid == NULL) { return NULL; } if (uname) { g_hash_table_insert(crm_uuid_cache, strdup(uname), uuid); return g_hash_table_lookup(crm_uuid_cache, uname); } /* Memory leak! */ CRM_LOG_ASSERT(uuid != NULL); return uuid; } gboolean crm_cluster_connect(crm_cluster_t *cluster) { enum cluster_type_e type = get_cluster_type(); crm_notice("Connecting to cluster infrastructure: %s", name_for_cluster_type(type)); #if SUPPORT_COROSYNC if (is_openais_cluster()) { crm_peer_init(); return init_cs_connection(cluster); } #endif #if SUPPORT_HEARTBEAT if (is_heartbeat_cluster()) { int rv; CRM_ASSERT(cluster->hb_conn != NULL); /* coverity[var_deref_op] False positive */ if (cluster->hb_conn == NULL) { /* No object passed in, create a new one. */ ll_cluster_t *(*new_cluster) (const char *llctype) = find_library_function(&hb_library, HEARTBEAT_LIBRARY, "ll_cluster_new", 1); cluster->hb_conn = (*new_cluster) ("heartbeat"); /* dlclose(handle); */ } else { /* Object passed in. Disconnect first, then reconnect below. */ cluster->hb_conn->llc_ops->signoff(cluster->hb_conn, FALSE); } /* make sure we are disconnected first with the old object, if any. */ if (heartbeat_cluster && heartbeat_cluster != cluster->hb_conn) { heartbeat_cluster->llc_ops->signoff(heartbeat_cluster, FALSE); } CRM_ASSERT(cluster->hb_conn != NULL); heartbeat_cluster = cluster->hb_conn; rv = register_heartbeat_conn(cluster); if (rv) { /* we'll benefit from a bigger queue length on heartbeat side. * Otherwise, if peers send messages faster than we can consume * them right now, heartbeat messaging layer will kick us out once * it's (small) default queue fills up :( * If we fail to adjust the sendq length, that's not yet fatal, though. */ if (HA_OK != heartbeat_cluster->llc_ops->set_sendq_len(heartbeat_cluster, 1024)) { crm_warn("Cannot set sendq length: %s", heartbeat_cluster->llc_ops->errmsg(heartbeat_cluster)); } } return rv; } #endif crm_info("Unsupported cluster stack: %s", getenv("HA_cluster_type")); return FALSE; } gboolean send_cluster_message(const char *node, enum crm_ais_msg_types service, xmlNode * data, gboolean ordered) { #if SUPPORT_COROSYNC if (is_openais_cluster()) { return send_ais_message(data, FALSE, node, service); } #endif #if SUPPORT_HEARTBEAT if (is_heartbeat_cluster()) { return send_ha_message(heartbeat_cluster, data, node, ordered); } #endif return FALSE; } void empty_uuid_cache(void) { if (crm_uuid_cache != NULL) { g_hash_table_destroy(crm_uuid_cache); crm_uuid_cache = NULL; } } void unget_uuid(const char *uname) { if (crm_uuid_cache == NULL) { return; } g_hash_table_remove(crm_uuid_cache, uname); } const char * get_uuid(const char *uname) { return get_node_uuid(0, uname); } const char * get_uname(const char *uuid) { const char *uname = NULL; if (crm_uname_cache == NULL) { crm_uname_cache = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); } CRM_CHECK(uuid != NULL, return NULL); /* avoid blocking calls where possible */ uname = g_hash_table_lookup(crm_uname_cache, uuid); if (uname != NULL) { crm_trace("%s = %s (cached)", uuid, uname); return uname; } #if SUPPORT_COROSYNC if (is_openais_cluster()) { if (!uname_is_uuid() && is_corosync_cluster()) { uint32_t id = crm_int_helper(uuid, NULL); crm_node_t *node = g_hash_table_lookup(crm_peer_id_cache, GUINT_TO_POINTER(id)); uname = node ? node->uname : NULL; } else { uname = uuid; } if (uname) { crm_trace("Storing %s = %s", uuid, uname); g_hash_table_insert(crm_uname_cache, strdup(uuid), strdup(uname)); } } #endif #if SUPPORT_HEARTBEAT if (is_heartbeat_cluster()) { if (heartbeat_cluster != NULL && uuid != NULL) { cl_uuid_t uuid_raw; char *hb_uname = NULL; char *uuid_copy = strdup(uuid); cl_uuid_parse(uuid_copy, &uuid_raw); hb_uname = malloc( MAX_NAME); if (heartbeat_cluster->llc_ops->get_name_by_uuid(heartbeat_cluster, &uuid_raw, hb_uname, MAX_NAME) == HA_FAIL) { crm_err("Could not calculate uname for %s", uuid); free(uuid_copy); free(hb_uname); } else { crm_trace("Storing %s = %s", uuid, uname); g_hash_table_insert(crm_uname_cache, uuid_copy, hb_uname); } } } #endif return g_hash_table_lookup(crm_uname_cache, uuid); } void set_uuid(xmlNode * node, const char *attr, const char *uname) { const char *uuid_calc = get_uuid(uname); crm_xml_add(node, attr, uuid_calc); return; } const char * name_for_cluster_type(enum cluster_type_e type) { switch (type) { case pcmk_cluster_classic_ais: return "classic openais (with plugin)"; case pcmk_cluster_cman: return "cman"; case pcmk_cluster_corosync: return "corosync"; case pcmk_cluster_heartbeat: return "heartbeat"; case pcmk_cluster_unknown: return "unknown"; case pcmk_cluster_invalid: return "invalid"; } crm_err("Invalid cluster type: %d", type); return "invalid"; } /* Do not expose these two */ int set_cluster_type(enum cluster_type_e type); static enum cluster_type_e cluster_type = pcmk_cluster_unknown; int set_cluster_type(enum cluster_type_e type) { if (cluster_type == pcmk_cluster_unknown) { crm_info("Cluster type set to: %s", name_for_cluster_type(type)); cluster_type = type; return 0; } else if (cluster_type == type) { return 0; } else if (pcmk_cluster_unknown == type) { cluster_type = type; return 0; } crm_err("Cluster type already set to %s, ignoring %s", name_for_cluster_type(cluster_type), name_for_cluster_type(type)); return -1; } enum cluster_type_e get_cluster_type(void) { if (cluster_type == pcmk_cluster_unknown) { const char *cluster = getenv("HA_cluster_type"); cluster_type = pcmk_cluster_invalid; if (cluster) { crm_info("Cluster type is: '%s'", cluster); } else { #if SUPPORT_COROSYNC cluster_type = find_corosync_variant(); if (cluster_type == pcmk_cluster_unknown) { cluster = "heartbeat"; crm_info("Assuming a 'heartbeat' based cluster"); } else { cluster = name_for_cluster_type(cluster_type); crm_info("Detected an active '%s' cluster", cluster); } #else cluster = "heartbeat"; #endif } if (safe_str_eq(cluster, "heartbeat")) { #if SUPPORT_HEARTBEAT cluster_type = pcmk_cluster_heartbeat; #else cluster_type = pcmk_cluster_invalid; #endif } else if (safe_str_eq(cluster, "openais") || safe_str_eq(cluster, "classic openais (with plugin)")) { #if SUPPORT_COROSYNC cluster_type = pcmk_cluster_classic_ais; #else cluster_type = pcmk_cluster_invalid; #endif } else if (safe_str_eq(cluster, "corosync")) { #if SUPPORT_COROSYNC cluster_type = pcmk_cluster_corosync; #else cluster_type = pcmk_cluster_invalid; #endif } else if (safe_str_eq(cluster, "cman")) { #if SUPPORT_CMAN cluster_type = pcmk_cluster_cman; #else cluster_type = pcmk_cluster_invalid; #endif } else { cluster_type = pcmk_cluster_invalid; } if (cluster_type == pcmk_cluster_invalid) { crm_notice ("This installation of Pacemaker does not support the '%s' cluster infrastructure. Terminating.", cluster); exit(100); } } return cluster_type; } gboolean is_cman_cluster(void) { return get_cluster_type() == pcmk_cluster_cman; } gboolean is_corosync_cluster(void) { return get_cluster_type() == pcmk_cluster_corosync; } gboolean is_classic_ais_cluster(void) { return get_cluster_type() == pcmk_cluster_classic_ais; } gboolean is_openais_cluster(void) { enum cluster_type_e type = get_cluster_type(); if (type == pcmk_cluster_classic_ais) { return TRUE; } else if (type == pcmk_cluster_corosync) { return TRUE; } else if (type == pcmk_cluster_cman) { return TRUE; } return FALSE; } gboolean is_heartbeat_cluster(void) { return get_cluster_type() == pcmk_cluster_heartbeat; } diff --git a/lib/cluster/membership.c b/lib/cluster/membership.c index 356c249804..f1e3f79847 100644 --- a/lib/cluster/membership.c +++ b/lib/cluster/membership.c @@ -1,421 +1,421 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #ifndef _GNU_SOURCE # define _GNU_SOURCE #endif #include #include #include #include #include #include #include #include #include #include GHashTable *crm_peer_id_cache = NULL; GHashTable *crm_peer_cache = NULL; unsigned long long crm_peer_seq = 0; gboolean crm_have_quorum = FALSE; gboolean crm_is_peer_active(const crm_node_t * node) { #if SUPPORT_COROSYNC if(is_openais_cluster()) { return crm_is_corosync_peer_active(node); } #endif #if SUPPORT_HEARTBEAT if(is_heartbeat_cluster()) { return crm_is_heartbeat_peer_active(node); } #endif crm_err("Unhandled cluster type: %s", name_for_cluster_type(get_cluster_type())); return FALSE; } static gboolean crm_reap_dead_member(gpointer key, gpointer value, gpointer user_data) { crm_node_t *node = value; crm_node_t *search = user_data; if (search != NULL && node->id != search->id) { return FALSE; } else if (crm_is_peer_active(value) == FALSE) { crm_notice("Removing %s/%u from the membership list", node->uname, node->id); return TRUE; } return FALSE; } guint reap_crm_member(uint32_t id) { int matches = 0; crm_node_t *node = g_hash_table_lookup(crm_peer_id_cache, GUINT_TO_POINTER(id)); if (node == NULL) { crm_info("Peer %u is unknown", id); } else if (crm_is_peer_active(node)) { crm_warn("Peer %u/%s is still active", id, node->uname); } else { if (g_hash_table_remove(crm_peer_id_cache, GUINT_TO_POINTER(id))) { crm_notice("Removed dead peer %u from the uuid cache", id); } else { crm_warn("Peer %u/%s was not removed", id, node->uname); } matches = g_hash_table_foreach_remove(crm_peer_cache, crm_reap_dead_member, node); crm_notice("Removed %d dead peers with id=%u from the membership list", matches, id); } return matches; } static void crm_count_peer(gpointer key, gpointer value, gpointer user_data) { guint *count = user_data; crm_node_t *node = value; if (crm_is_peer_active(node)) { *count = *count + 1; } } guint crm_active_peers(void) { guint count = 0; g_hash_table_foreach(crm_peer_cache, crm_count_peer, &count); return count; } void destroy_crm_node(gpointer data) { crm_node_t *node = data; crm_trace("Destroying entry for node %u", node->id); free(node->addr); free(node->uname); free(node->state); free(node->uuid); free(node); } void crm_peer_init(void) { static gboolean initialized = FALSE; if (initialized) { return; } initialized = TRUE; crm_peer_destroy(); if (crm_peer_cache == NULL) { crm_peer_cache = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, destroy_crm_node); } if (crm_peer_id_cache == NULL) { crm_peer_id_cache = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, NULL); } } void crm_peer_destroy(void) { if (crm_peer_cache != NULL) { g_hash_table_destroy(crm_peer_cache); crm_peer_cache = NULL; } if (crm_peer_id_cache != NULL) { g_hash_table_destroy(crm_peer_id_cache); crm_peer_id_cache = NULL; } } void (*crm_status_callback) (enum crm_status_type, crm_node_t *, const void *) = NULL; void crm_set_status_callback(void (*dispatch) (enum crm_status_type, crm_node_t *, const void *)) { crm_status_callback = dispatch; } /* coverity[-alloc] Memory is referenced in one or both hashtables */ crm_node_t * crm_get_peer(unsigned int id, const char *uname) { crm_node_t *node = NULL; CRM_ASSERT(id > 0 || uname != NULL); crm_peer_init(); if (node == NULL && uname != NULL) { node = g_hash_table_lookup(crm_peer_cache, uname); } if (node == NULL && id > 0) { node = g_hash_table_lookup(crm_peer_id_cache, GUINT_TO_POINTER(id)); if (node && node->uname && uname) { crm_crit("Node %s and %s share the same cluster node id '%u'!", node->uname, uname, id); /* NOTE: Calling crm_new_peer() means the entry in * crm_peer_id_cache will point to the new entity * * TO-DO: Replace the old uname instead? */ node = NULL; } } if (node == NULL) { crm_debug("Creating entry for node %s/%u", uname, id); node = calloc(1, sizeof(crm_node_t)); CRM_ASSERT(node); } if (id > 0 && node->id != id) { node->id = id; crm_info("Node %s now has id: %u", crm_str(uname), id); g_hash_table_replace(crm_peer_id_cache, GUINT_TO_POINTER(node->id), node); } if (uname && node->uname == NULL) { node->uname = strdup(uname); crm_info("Node %u is now known as %s", id, uname); g_hash_table_replace(crm_peer_cache, node->uname, node); if (crm_status_callback) { crm_status_callback(crm_status_uname, node, NULL); } } if (node && node->uname && node->uuid == NULL) { const char *uuid = get_node_uuid(id, node->uname); if(uuid) { node->uuid = strdup(uuid); crm_info("Node %u has uuid %s", id, node->uuid); } else { crm_warn("Cannot obtain a UUID for node %d/%s", id, node->uname); } } return node; } crm_node_t * crm_update_peer(const char *source, unsigned int id, uint64_t born, uint64_t seen, int32_t votes, uint32_t children, const char *uuid, const char *uname, const char *addr, const char *state) { #if SUPPORT_PLUGIN gboolean addr_changed = FALSE; gboolean votes_changed = FALSE; #endif crm_node_t *node = NULL; id = get_corosync_id(id, uuid); node = crm_get_peer(id, uname); CRM_ASSERT(node != NULL); if (node->uuid == NULL) { if (is_openais_cluster()) { /* Yes, overrule whatever was passed in */ node->uuid = get_corosync_uuid(id, uname); } else if (uuid != NULL) { node->uuid = strdup(uuid); } } if (children > 0) { crm_update_peer_proc(source, node, children, state); } if (state != NULL) { crm_update_peer_state(source, node, state, seen); } #if SUPPORT_HEARTBEAT if (born != 0) { node->born = born; } #endif #if SUPPORT_PLUGIN /* These were only used by the plugin */ if (born != 0) { node->born = born; } if (votes > 0 && node->votes != votes) { votes_changed = TRUE; node->votes = votes; } if (addr != NULL) { if (node->addr == NULL || crm_str_eq(node->addr, addr, FALSE) == FALSE) { addr_changed = TRUE; free(node->addr); node->addr = strdup(addr); } } if (addr_changed || votes_changed) { crm_info("%s: Node %s: id=%u state=%s addr=%s%s votes=%d%s born=" U64T " seen=" U64T " proc=%.32x", source, node->uname, node->id, node->state, node->addr, addr_changed ? " (new)" : "", node->votes, votes_changed ? " (new)" : "", node->born, node->last_seen, node->processes); } #endif return node; } void crm_update_peer_proc(const char *source, crm_node_t *node, uint32_t flag, const char *status) { uint32_t last = 0; gboolean changed = FALSE; CRM_CHECK(node != NULL, crm_err("%s: Could not set %s to %s for NULL", source, peer2text(flag), status); return); last = node->processes; if(status == NULL) { node->processes = flag; if(node->processes != last) { changed = TRUE; } } else if (safe_str_eq(status, ONLINESTATUS)) { if ((node->processes & flag) == 0) { set_bit(node->processes, flag); changed = TRUE; } #if SUPPORT_PLUGIN } else if (safe_str_eq(status, CRM_NODE_MEMBER)) { if (flag > 0 && node->processes != flag) { node->processes = flag; changed = TRUE; } #endif } else if (node->processes & flag) { clear_bit(node->processes, flag); changed = TRUE; } if (changed) { if(status == NULL) { crm_info("%s: Node %s[%d] - all processes are now offline", source, node->uname, node->id); } else { crm_info("%s: Node %s[%d] - %s is now %s", source, node->uname, node->id, peer2text(flag), status); } if (crm_status_callback) { crm_status_callback(crm_status_processes, node, &last); } } else { crm_trace("%s: Node %s[%d] - %s is unchanged (%s)", source, node->uname, node->id, peer2text(flag), status); } } void crm_update_peer_expected(const char *source, crm_node_t *node, const char *expected) { char *last = NULL; gboolean changed = FALSE; CRM_CHECK(node != NULL, crm_err("%s: Could not set 'expected' to %s", source, expected); return); last = node->expected; if (expected != NULL && safe_str_neq(node->expected, expected)) { node->expected = strdup(expected); changed = TRUE; } if (changed) { crm_info("%s: Node %s[%d] - expected state is now %s", source, node->uname, node->id, expected); free(last); } else { crm_trace("%s: Node %s[%d] - expected state is unchanged (%s)", source, node->uname, node->id, expected); } } void crm_update_peer_state(const char *source, crm_node_t *node, const char *state, int membership) { char *last = NULL; gboolean changed = FALSE; CRM_CHECK(node != NULL, crm_err("%s: Could not set 'state' to %s", source, state); return); last = node->state; if (state != NULL && safe_str_neq(node->state, state)) { node->state = strdup(state); changed = TRUE; } if (membership != 0 && safe_str_eq(node->state, CRM_NODE_MEMBER)) { node->last_seen = membership; } if (changed) { - crm_notice("%s: Node %s[%d] - state is now %s", source, node->uname, node->id, state); + crm_notice("%s: Node %s[%u] - state is now %s", source, node->uname, node->id, state); if (crm_status_callback) { crm_status_callback(crm_status_nstate, node, last); } free(last); } else { - crm_trace("%s: Node %s[%d] - state is unchanged (%s)", source, node->uname, node->id, state); + crm_trace("%s: Node %s[%u] - state is unchanged (%s)", source, node->uname, node->id, state); } } int crm_terminate_member(int nodeid, const char *uname, void * unused) { /* Always use the synchronous, non-mainloop version */ return stonith_api_kick(nodeid, uname, 120, TRUE); } int crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection) { return stonith_api_kick(nodeid, uname, 120, TRUE); }