Page Menu
Home
ClusterLabs Projects
Search
Configure Global Search
Log In
Files
F1841643
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
58 KB
Referenced Files
None
Subscribers
None
View Options
diff --git a/daemons/pacemakerd/pcmkd_corosync.c b/daemons/pacemakerd/pcmkd_corosync.c
index 82bd25702d..113bd614c1 100644
--- a/daemons/pacemakerd/pcmkd_corosync.c
+++ b/daemons/pacemakerd/pcmkd_corosync.c
@@ -1,305 +1,306 @@
/*
* Copyright 2010-2020 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include "pacemakerd.h"
#include <sys/utsname.h>
#include <sys/stat.h> /* for calls to stat() */
#include <libgen.h> /* For basename() and dirname() */
#include <sys/types.h>
#include <pwd.h> /* For getpwname() */
#include <corosync/hdb.h>
#include <corosync/cfg.h>
#include <corosync/cpg.h>
#include <corosync/cmap.h>
#include <crm/cluster/internal.h>
#include <crm/common/ipc.h> /* for crm_ipc_is_authentic_process */
#include <crm/common/mainloop.h>
#include <crm/common/ipc_internal.h> /* PCMK__SPECIAL_PID* */
static corosync_cfg_handle_t cfg_handle = 0;
/* =::=::=::= CFG - Shutdown stuff =::=::=::= */
static void
cfg_shutdown_callback(corosync_cfg_handle_t h, corosync_cfg_shutdown_flags_t flags)
{
crm_info("Corosync wants to shut down: %s",
(flags == COROSYNC_CFG_SHUTDOWN_FLAG_IMMEDIATE) ? "immediate" :
(flags == COROSYNC_CFG_SHUTDOWN_FLAG_REGARDLESS) ? "forced" : "optional");
/* Never allow corosync to shut down while we're running */
corosync_cfg_replyto_shutdown(h, COROSYNC_CFG_SHUTDOWN_FLAG_NO);
}
static corosync_cfg_callbacks_t cfg_callbacks = {
.corosync_cfg_shutdown_callback = cfg_shutdown_callback,
};
static int
pcmk_cfg_dispatch(gpointer user_data)
{
corosync_cfg_handle_t *handle = (corosync_cfg_handle_t *) user_data;
cs_error_t rc = corosync_cfg_dispatch(*handle, CS_DISPATCH_ALL);
if (rc != CS_OK) {
return -1;
}
return 0;
}
static void
cfg_connection_destroy(gpointer user_data)
{
crm_err("Lost connection to Corosync");
+ corosync_cfg_finalize(cfg_handle);
cfg_handle = 0;
pcmk_shutdown(SIGTERM);
}
gboolean
cluster_disconnect_cfg(void)
{
if (cfg_handle) {
corosync_cfg_finalize(cfg_handle);
cfg_handle = 0;
}
pcmk_shutdown(SIGTERM);
return TRUE;
}
#define cs_repeat(counter, max, code) do { \
code; \
if(rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) { \
counter++; \
crm_debug("Retrying Corosync operation after %ds", counter); \
sleep(counter); \
} else { \
break; \
} \
} while(counter < max)
gboolean
cluster_connect_cfg(void)
{
cs_error_t rc;
int fd = -1, retries = 0, rv;
uid_t found_uid = 0;
gid_t found_gid = 0;
pid_t found_pid = 0;
uint32_t nodeid;
static struct mainloop_fd_callbacks cfg_fd_callbacks = {
.dispatch = pcmk_cfg_dispatch,
.destroy = cfg_connection_destroy,
};
cs_repeat(retries, 30, rc = corosync_cfg_initialize(&cfg_handle, &cfg_callbacks));
if (rc != CS_OK) {
crm_crit("Could not connect to Corosync CFG: %s " CRM_XS " rc=%d",
cs_strerror(rc), rc);
return FALSE;
}
rc = corosync_cfg_fd_get(cfg_handle, &fd);
if (rc != CS_OK) {
crm_crit("Could not get Corosync CFG descriptor: %s " CRM_XS " rc=%d",
cs_strerror(rc), rc);
goto bail;
}
/* CFG provider run as root (in given user namespace, anyway)? */
if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid,
&found_uid, &found_gid))) {
crm_crit("Rejecting Corosync CFG provider because process %lld "
"is running as uid %lld gid %lld, not root",
(long long) PCMK__SPECIAL_PID_AS_0(found_pid),
(long long) found_uid, (long long) found_gid);
goto bail;
} else if (rv < 0) {
crm_crit("Could not authenticate Corosync CFG provider: %s "
CRM_XS " rc=%d", strerror(-rv), -rv);
goto bail;
}
retries = 0;
cs_repeat(retries, 30, rc = corosync_cfg_local_get(cfg_handle, &nodeid));
if (rc != CS_OK) {
crm_crit("Could not get local node ID from Corosync: %s "
CRM_XS " rc=%d", cs_strerror(rc), rc);
goto bail;
}
crm_debug("Corosync reports local node ID is %lu", (unsigned long) nodeid);
mainloop_add_fd("corosync-cfg", G_PRIORITY_DEFAULT, fd, &cfg_handle, &cfg_fd_callbacks);
return TRUE;
bail:
corosync_cfg_finalize(cfg_handle);
return FALSE;
}
void
pcmkd_shutdown_corosync(void)
{
cs_error_t rc;
if (cfg_handle == 0) {
crm_warn("Unable to shut down Corosync: No connection");
return;
}
crm_info("Asking Corosync to shut down");
rc = corosync_cfg_try_shutdown(cfg_handle,
COROSYNC_CFG_SHUTDOWN_FLAG_IMMEDIATE);
if (rc == CS_OK) {
corosync_cfg_finalize(cfg_handle);
cfg_handle = 0;
} else {
crm_warn("Corosync shutdown failed: %s " CRM_XS " rc=%d",
cs_strerror(rc), rc);
}
}
/* =::=::=::= Configuration =::=::=::= */
static int
get_config_opt(uint64_t unused, cmap_handle_t object_handle, const char *key, char **value,
const char *fallback)
{
int rc = 0, retries = 0;
cs_repeat(retries, 5, rc = cmap_get_string(object_handle, key, value));
if (rc != CS_OK) {
crm_trace("Search for %s failed %d, defaulting to %s", key, rc, fallback);
if (fallback) {
*value = strdup(fallback);
} else {
*value = NULL;
}
}
crm_trace("%s: %s", key, *value);
return rc;
}
gboolean
mcp_read_config(void)
{
cs_error_t rc = CS_OK;
int retries = 0;
cmap_handle_t local_handle;
uint64_t config = 0;
int fd = -1;
uid_t found_uid = 0;
gid_t found_gid = 0;
pid_t found_pid = 0;
int rv;
enum cluster_type_e stack;
// There can be only one possibility
do {
rc = cmap_initialize(&local_handle);
if (rc != CS_OK) {
retries++;
crm_info("Could not connect to Corosync CMAP: %s (retrying in %ds) "
CRM_XS " rc=%d", cs_strerror(rc), retries, rc);
sleep(retries);
} else {
break;
}
} while (retries < 5);
if (rc != CS_OK) {
crm_crit("Could not connect to Corosync CMAP: %s "
CRM_XS " rc=%d", cs_strerror(rc), rc);
return FALSE;
}
rc = cmap_fd_get(local_handle, &fd);
if (rc != CS_OK) {
crm_crit("Could not get Corosync CMAP descriptor: %s " CRM_XS " rc=%d",
cs_strerror(rc), rc);
cmap_finalize(local_handle);
return FALSE;
}
/* CMAP provider run as root (in given user namespace, anyway)? */
if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid,
&found_uid, &found_gid))) {
crm_crit("Rejecting Corosync CMAP provider because process %lld "
"is running as uid %lld gid %lld, not root",
(long long) PCMK__SPECIAL_PID_AS_0(found_pid),
(long long) found_uid, (long long) found_gid);
cmap_finalize(local_handle);
return FALSE;
} else if (rv < 0) {
crm_crit("Could not authenticate Corosync CMAP provider: %s "
CRM_XS " rc=%d", strerror(-rv), -rv);
cmap_finalize(local_handle);
return FALSE;
}
stack = get_cluster_type();
if (stack != pcmk_cluster_corosync) {
crm_crit("Expected corosync stack but detected %s " CRM_XS " stack=%d",
name_for_cluster_type(stack), stack);
return FALSE;
}
crm_info("Reading configuration for %s stack",
name_for_cluster_type(stack));
pcmk__set_env_option("cluster_type", "corosync");
pcmk__set_env_option("quorum_type", "corosync");
// If debug logging is not configured, check whether corosync has it
if (pcmk__env_option("debug") == NULL) {
char *debug_enabled = NULL;
get_config_opt(config, local_handle, "logging.debug", &debug_enabled, "off");
if (crm_is_true(debug_enabled)) {
pcmk__set_env_option("debug", "1");
if (get_crm_log_level() < LOG_DEBUG) {
set_crm_log_level(LOG_DEBUG);
}
} else {
pcmk__set_env_option("debug", "0");
}
free(debug_enabled);
}
if(local_handle){
gid_t gid = 0;
if (pcmk_daemon_user(NULL, &gid) < 0) {
crm_warn("Could not authorize group with Corosync " CRM_XS
" No group found for user %s", CRM_DAEMON_USER);
} else {
char key[PATH_MAX];
snprintf(key, PATH_MAX, "uidgid.gid.%u", gid);
rc = cmap_set_uint8(local_handle, key, 1);
if (rc != CS_OK) {
crm_warn("Could not authorize group with Corosync: %s " CRM_XS
" group=%u rc=%d", ais_error2text(rc), gid, rc);
}
}
}
cmap_finalize(local_handle);
return TRUE;
}
diff --git a/lib/cluster/corosync.c b/lib/cluster/corosync.c
index f7b81ba567..7d51a8e1cc 100644
--- a/lib/cluster/corosync.c
+++ b/lib/cluster/corosync.c
@@ -1,766 +1,767 @@
/*
* Copyright 2004-2019 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <netdb.h>
#include <inttypes.h> /* U64T ~ PRIu64 */
#include <bzlib.h>
#include <crm/common/ipc.h>
#include <crm/cluster/internal.h>
#include <crm/common/mainloop.h>
#include <sys/utsname.h>
#include <qb/qbipcc.h>
#include <qb/qbutil.h>
#include <corosync/corodefs.h>
#include <corosync/corotypes.h>
#include <corosync/hdb.h>
#include <corosync/cfg.h>
#include <corosync/cmap.h>
#include <corosync/quorum.h>
#include <crm/msg_xml.h>
#include <crm/common/ipc_internal.h> /* PCMK__SPECIAL_PID* */
quorum_handle_t pcmk_quorum_handle = 0;
gboolean(*quorum_app_callback) (unsigned long long seq, gboolean quorate) = NULL;
char *
get_corosync_uuid(crm_node_t *node)
{
if (node && is_corosync_cluster()) {
if (node->id > 0) {
return crm_strdup_printf("%u", node->id);
} else {
crm_info("Node %s is not yet known by corosync", node->uname);
}
}
return NULL;
}
/*
* CFG functionality stolen from node_name() in corosync-quorumtool.c
* This resolves the first address assigned to a node and returns the name or IP address.
*/
char *
corosync_node_name(uint64_t /*cmap_handle_t */ cmap_handle, uint32_t nodeid)
{
int lpc = 0;
cs_error_t rc = CS_OK;
int retries = 0;
char *name = NULL;
cmap_handle_t local_handle = 0;
int fd = -1;
uid_t found_uid = 0;
gid_t found_gid = 0;
pid_t found_pid = 0;
int rv;
if (nodeid == 0) {
nodeid = get_local_nodeid(0);
}
if (cmap_handle == 0 && local_handle == 0) {
retries = 0;
crm_trace("Initializing CMAP connection");
do {
rc = cmap_initialize(&local_handle);
if (rc != CS_OK) {
retries++;
crm_debug("API connection setup failed: %s. Retrying in %ds", cs_strerror(rc),
retries);
sleep(retries);
}
} while (retries < 5 && rc != CS_OK);
if (rc != CS_OK) {
crm_warn("Could not connect to Cluster Configuration Database API, error %s",
cs_strerror(rc));
local_handle = 0;
}
}
if (cmap_handle == 0) {
cmap_handle = local_handle;
rc = cmap_fd_get(cmap_handle, &fd);
if (rc != CS_OK) {
crm_err("Could not obtain the CMAP API connection: %s (%d)",
cs_strerror(rc), rc);
goto bail;
}
/* CMAP provider run as root (in given user namespace, anyway)? */
if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid,
&found_uid, &found_gid))) {
crm_err("CMAP provider is not authentic:"
" process %lld (uid: %lld, gid: %lld)",
(long long) PCMK__SPECIAL_PID_AS_0(found_pid),
(long long) found_uid, (long long) found_gid);
goto bail;
} else if (rv < 0) {
crm_err("Could not verify authenticity of CMAP provider: %s (%d)",
strerror(-rv), -rv);
goto bail;
}
}
while (name == NULL && cmap_handle != 0) {
uint32_t id = 0;
char *key = NULL;
key = crm_strdup_printf("nodelist.node.%d.nodeid", lpc);
rc = cmap_get_uint32(cmap_handle, key, &id);
crm_trace("Checking %u vs %u from %s", nodeid, id, key);
free(key);
if (rc != CS_OK) {
break;
}
if (nodeid == id) {
crm_trace("Searching for node name for %u in nodelist.node.%d %s", nodeid, lpc, name);
if (name == NULL) {
key = crm_strdup_printf("nodelist.node.%d.name", lpc);
cmap_get_string(cmap_handle, key, &name);
crm_trace("%s = %s", key, name);
free(key);
}
if (name == NULL) {
key = crm_strdup_printf("nodelist.node.%d.ring0_addr", lpc);
cmap_get_string(cmap_handle, key, &name);
crm_trace("%s = %s", key, name);
if (node_name_is_valid(key, name) == FALSE) {
free(name);
name = NULL;
}
free(key);
}
break;
}
lpc++;
}
bail:
if(local_handle) {
cmap_finalize(local_handle);
}
if (name == NULL) {
crm_info("Unable to get node name for nodeid %u", nodeid);
}
return name;
}
void
terminate_cs_connection(crm_cluster_t *cluster)
{
cluster_disconnect_cpg(cluster);
if (pcmk_quorum_handle) {
quorum_finalize(pcmk_quorum_handle);
pcmk_quorum_handle = 0;
}
crm_notice("Disconnected from Corosync");
}
static int
pcmk_quorum_dispatch(gpointer user_data)
{
int rc = 0;
rc = quorum_dispatch(pcmk_quorum_handle, CS_DISPATCH_ALL);
if (rc < 0) {
crm_err("Connection to the Quorum API failed: %d", rc);
+ quorum_finalize(pcmk_quorum_handle);
pcmk_quorum_handle = 0;
return -1;
}
return 0;
}
static void
pcmk_quorum_notification(quorum_handle_t handle,
uint32_t quorate,
uint64_t ring_id, uint32_t view_list_entries, uint32_t * view_list)
{
int i;
GHashTableIter iter;
crm_node_t *node = NULL;
static gboolean init_phase = TRUE;
if (quorate != crm_have_quorum) {
if (quorate) {
crm_notice("Quorum acquired " CRM_XS " membership=%" U64T " members=%lu",
ring_id, (long unsigned int)view_list_entries);
} else {
crm_warn("Quorum lost " CRM_XS " membership=%" U64T " members=%lu",
ring_id, (long unsigned int)view_list_entries);
}
crm_have_quorum = quorate;
} else {
crm_info("Quorum %s " CRM_XS " membership=%" U64T " members=%lu",
(quorate? "retained" : "still lost"), ring_id,
(long unsigned int)view_list_entries);
}
if (view_list_entries == 0 && init_phase) {
crm_info("Corosync membership is still forming, ignoring");
return;
}
init_phase = FALSE;
/* Reset last_seen for all cached nodes so we can tell which ones aren't
* in the view list */
g_hash_table_iter_init(&iter, crm_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
node->last_seen = 0;
}
/* Update the peer cache for each node in view list */
for (i = 0; i < view_list_entries; i++) {
uint32_t id = view_list[i];
crm_debug("Member[%d] %u ", i, id);
/* Get this node's peer cache entry (adding one if not already there) */
node = crm_get_peer(id, NULL);
if (node->uname == NULL) {
char *name = corosync_node_name(0, id);
crm_info("Obtaining name for new node %u", id);
node = crm_get_peer(id, name);
free(name);
}
/* Update the node state (including updating last_seen to ring_id) */
crm_update_peer_state(__FUNCTION__, node, CRM_NODE_MEMBER, ring_id);
}
/* Remove any peer cache entries we didn't update */
crm_reap_unseen_nodes(ring_id);
if (quorum_app_callback) {
quorum_app_callback(ring_id, quorate);
}
}
quorum_callbacks_t quorum_callbacks = {
.quorum_notify_fn = pcmk_quorum_notification,
};
gboolean
cluster_connect_quorum(gboolean(*dispatch) (unsigned long long, gboolean),
void (*destroy) (gpointer))
{
cs_error_t rc;
int fd = 0;
int quorate = 0;
uint32_t quorum_type = 0;
struct mainloop_fd_callbacks quorum_fd_callbacks;
uid_t found_uid = 0;
gid_t found_gid = 0;
pid_t found_pid = 0;
int rv;
quorum_fd_callbacks.dispatch = pcmk_quorum_dispatch;
quorum_fd_callbacks.destroy = destroy;
crm_debug("Configuring Pacemaker to obtain quorum from Corosync");
rc = quorum_initialize(&pcmk_quorum_handle, &quorum_callbacks, &quorum_type);
if (rc != CS_OK) {
crm_err("Could not connect to the Quorum API: %s (%d)",
cs_strerror(rc), rc);
goto bail;
} else if (quorum_type != QUORUM_SET) {
crm_err("Corosync quorum is not configured");
goto bail;
}
rc = quorum_fd_get(pcmk_quorum_handle, &fd);
if (rc != CS_OK) {
crm_err("Could not obtain the Quorum API connection: %s (%d)",
strerror(rc), rc);
goto bail;
}
/* Quorum provider run as root (in given user namespace, anyway)? */
if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid,
&found_uid, &found_gid))) {
crm_err("Quorum provider is not authentic:"
" process %lld (uid: %lld, gid: %lld)",
(long long) PCMK__SPECIAL_PID_AS_0(found_pid),
(long long) found_uid, (long long) found_gid);
rc = CS_ERR_ACCESS;
goto bail;
} else if (rv < 0) {
crm_err("Could not verify authenticity of Quorum provider: %s (%d)",
strerror(-rv), -rv);
rc = CS_ERR_ACCESS;
goto bail;
}
rc = quorum_getquorate(pcmk_quorum_handle, &quorate);
if (rc != CS_OK) {
crm_err("Could not obtain the current Quorum API state: %d", rc);
goto bail;
}
if (quorate) {
crm_notice("Quorum acquired");
} else {
crm_warn("Quorum lost");
}
quorum_app_callback = dispatch;
crm_have_quorum = quorate;
rc = quorum_trackstart(pcmk_quorum_handle, CS_TRACK_CHANGES | CS_TRACK_CURRENT);
if (rc != CS_OK) {
crm_err("Could not setup Quorum API notifications: %d", rc);
goto bail;
}
mainloop_add_fd("quorum", G_PRIORITY_HIGH, fd, dispatch, &quorum_fd_callbacks);
corosync_initialize_nodelist(NULL, FALSE, NULL);
bail:
if (rc != CS_OK) {
quorum_finalize(pcmk_quorum_handle);
return FALSE;
}
return TRUE;
}
gboolean
init_cs_connection(crm_cluster_t * cluster)
{
int retries = 0;
while (retries < 5) {
int rc = init_cs_connection_once(cluster);
retries++;
switch (rc) {
case CS_OK:
return TRUE;
case CS_ERR_TRY_AGAIN:
case CS_ERR_QUEUE_FULL:
sleep(retries);
break;
default:
return FALSE;
}
}
crm_err("Could not connect to corosync after %d retries", retries);
return FALSE;
}
gboolean
init_cs_connection_once(crm_cluster_t * cluster)
{
crm_node_t *peer = NULL;
enum cluster_type_e stack = get_cluster_type();
crm_peer_init();
/* Here we just initialize comms */
if (stack != pcmk_cluster_corosync) {
crm_err("Invalid cluster type: %s (%d)", name_for_cluster_type(stack), stack);
return FALSE;
}
if (cluster_connect_cpg(cluster) == FALSE) {
return FALSE;
}
crm_info("Connection to '%s': established", name_for_cluster_type(stack));
cluster->nodeid = get_local_nodeid(0);
if(cluster->nodeid == 0) {
crm_err("Could not establish local nodeid");
return FALSE;
}
cluster->uname = get_node_name(0);
if(cluster->uname == NULL) {
crm_err("Could not establish local node name");
return FALSE;
}
/* Ensure the local node always exists */
peer = crm_get_peer(cluster->nodeid, cluster->uname);
cluster->uuid = get_corosync_uuid(peer);
return TRUE;
}
gboolean
check_message_sanity(const AIS_Message * msg, const char *data)
{
gboolean sane = TRUE;
int dest = msg->host.type;
int tmp_size = msg->header.size - sizeof(AIS_Message);
if (sane && msg->header.size == 0) {
crm_warn("Message with no size");
sane = FALSE;
}
if (sane && msg->header.error != CS_OK) {
crm_warn("Message header contains an error: %d", msg->header.error);
sane = FALSE;
}
if (sane && ais_data_len(msg) != tmp_size) {
crm_warn("Message payload size is incorrect: expected %d, got %d", ais_data_len(msg),
tmp_size);
sane = TRUE;
}
if (sane && ais_data_len(msg) == 0) {
crm_warn("Message with no payload");
sane = FALSE;
}
if (sane && data && msg->is_compressed == FALSE) {
int str_size = strlen(data) + 1;
if (ais_data_len(msg) != str_size) {
int lpc = 0;
crm_warn("Message payload is corrupted: expected %d bytes, got %d",
ais_data_len(msg), str_size);
sane = FALSE;
for (lpc = (str_size - 10); lpc < msg->size; lpc++) {
if (lpc < 0) {
lpc = 0;
}
crm_debug("bad_data[%d]: %d / '%c'", lpc, data[lpc], data[lpc]);
}
}
}
if (sane == FALSE) {
crm_err("Invalid message %d: (dest=%s:%s, from=%s:%s.%u, compressed=%d, size=%d, total=%d)",
msg->id, ais_dest(&(msg->host)), msg_type2text(dest),
ais_dest(&(msg->sender)), msg_type2text(msg->sender.type),
msg->sender.pid, msg->is_compressed, ais_data_len(msg), msg->header.size);
} else {
crm_trace
("Verified message %d: (dest=%s:%s, from=%s:%s.%u, compressed=%d, size=%d, total=%d)",
msg->id, ais_dest(&(msg->host)), msg_type2text(dest), ais_dest(&(msg->sender)),
msg_type2text(msg->sender.type), msg->sender.pid, msg->is_compressed,
ais_data_len(msg), msg->header.size);
}
return sane;
}
enum cluster_type_e
find_corosync_variant(void)
{
int rc = CS_OK;
cmap_handle_t handle;
rc = cmap_initialize(&handle);
switch(rc) {
case CS_OK:
break;
case CS_ERR_SECURITY:
crm_debug("Failed to initialize the cmap API: Permission denied (%d)", rc);
/* It's there, we just can't talk to it.
* Good enough for us to identify as 'corosync'
*/
return pcmk_cluster_corosync;
default:
crm_info("Failed to initialize the cmap API: %s (%d)",
ais_error2text(rc), rc);
return pcmk_cluster_unknown;
}
cmap_finalize(handle);
return pcmk_cluster_corosync;
}
gboolean
crm_is_corosync_peer_active(const crm_node_t * node)
{
if (node == NULL) {
crm_trace("NULL");
return FALSE;
} else if (safe_str_neq(node->state, CRM_NODE_MEMBER)) {
crm_trace("%s: state=%s", node->uname, node->state);
return FALSE;
} else if ((node->processes & crm_proc_cpg) == 0) {
crm_trace("%s: processes=%.16x", node->uname, node->processes);
return FALSE;
}
return TRUE;
}
gboolean
corosync_initialize_nodelist(void *cluster, gboolean force_member, xmlNode * xml_parent)
{
int lpc = 0;
cs_error_t rc = CS_OK;
int retries = 0;
gboolean any = FALSE;
cmap_handle_t cmap_handle;
int fd = -1;
uid_t found_uid = 0;
gid_t found_gid = 0;
pid_t found_pid = 0;
int rv;
do {
rc = cmap_initialize(&cmap_handle);
if (rc != CS_OK) {
retries++;
crm_debug("API connection setup failed: %s. Retrying in %ds", cs_strerror(rc),
retries);
sleep(retries);
}
} while (retries < 5 && rc != CS_OK);
if (rc != CS_OK) {
crm_warn("Could not connect to Cluster Configuration Database API, error %d", rc);
return FALSE;
}
rc = cmap_fd_get(cmap_handle, &fd);
if (rc != CS_OK) {
crm_err("Could not obtain the CMAP API connection: %s (%d)",
cs_strerror(rc), rc);
goto bail;
}
/* CMAP provider run as root (in given user namespace, anyway)? */
if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid,
&found_uid, &found_gid))) {
crm_err("CMAP provider is not authentic:"
" process %lld (uid: %lld, gid: %lld)",
(long long) PCMK__SPECIAL_PID_AS_0(found_pid),
(long long) found_uid, (long long) found_gid);
goto bail;
} else if (rv < 0) {
crm_err("Could not verify authenticity of CMAP provider: %s (%d)",
strerror(-rv), -rv);
goto bail;
}
crm_peer_init();
crm_trace("Initializing corosync nodelist");
for (lpc = 0; TRUE; lpc++) {
uint32_t nodeid = 0;
char *name = NULL;
char *key = NULL;
key = crm_strdup_printf("nodelist.node.%d.nodeid", lpc);
rc = cmap_get_uint32(cmap_handle, key, &nodeid);
free(key);
if (rc != CS_OK) {
break;
}
name = corosync_node_name(cmap_handle, nodeid);
if (name != NULL) {
GHashTableIter iter;
crm_node_t *node = NULL;
g_hash_table_iter_init(&iter, crm_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
if(node && node->uname && strcasecmp(node->uname, name) == 0) {
if (node->id && node->id != nodeid) {
crm_crit("Nodes %u and %u share the same name '%s': shutting down", node->id,
nodeid, name);
crm_exit(CRM_EX_FATAL);
}
}
}
}
if (nodeid > 0 || name != NULL) {
crm_trace("Initializing node[%d] %u = %s", lpc, nodeid, name);
crm_get_peer(nodeid, name);
}
if (nodeid > 0 && name != NULL) {
any = TRUE;
if (xml_parent) {
xmlNode *node = create_xml_node(xml_parent, XML_CIB_TAG_NODE);
crm_xml_set_id(node, "%u", nodeid);
crm_xml_add(node, XML_ATTR_UNAME, name);
if (force_member) {
crm_xml_add(node, XML_ATTR_TYPE, CRM_NODE_MEMBER);
}
}
}
free(name);
}
bail:
cmap_finalize(cmap_handle);
return any;
}
char *
corosync_cluster_name(void)
{
cmap_handle_t handle;
char *cluster_name = NULL;
cs_error_t rc = CS_OK;
int fd = -1;
uid_t found_uid = 0;
gid_t found_gid = 0;
pid_t found_pid = 0;
int rv;
rc = cmap_initialize(&handle);
if (rc != CS_OK) {
crm_info("Failed to initialize the cmap API: %s (%d)",
cs_strerror(rc), rc);
return NULL;
}
rc = cmap_fd_get(handle, &fd);
if (rc != CS_OK) {
crm_err("Could not obtain the CMAP API connection: %s (%d)",
cs_strerror(rc), rc);
goto bail;
}
/* CMAP provider run as root (in given user namespace, anyway)? */
if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid,
&found_uid, &found_gid))) {
crm_err("CMAP provider is not authentic:"
" process %lld (uid: %lld, gid: %lld)",
(long long) PCMK__SPECIAL_PID_AS_0(found_pid),
(long long) found_uid, (long long) found_gid);
goto bail;
} else if (rv < 0) {
crm_err("Could not verify authenticity of CMAP provider: %s (%d)",
strerror(-rv), -rv);
goto bail;
}
rc = cmap_get_string(handle, "totem.cluster_name", &cluster_name);
if (rc != CS_OK) {
crm_info("Cannot get totem.cluster_name: %s (%d)", cs_strerror(rc), rc);
} else {
crm_debug("cmap totem.cluster_name = '%s'", cluster_name);
}
bail:
cmap_finalize(handle);
return cluster_name;
}
int
corosync_cmap_has_config(const char *prefix)
{
cs_error_t rc = CS_OK;
int retries = 0;
static int found = -1;
cmap_handle_t cmap_handle;
cmap_iter_handle_t iter_handle;
char key_name[CMAP_KEYNAME_MAXLEN + 1];
int fd = -1;
uid_t found_uid = 0;
gid_t found_gid = 0;
pid_t found_pid = 0;
int rv;
if(found != -1) {
return found;
}
do {
rc = cmap_initialize(&cmap_handle);
if (rc != CS_OK) {
retries++;
crm_debug("API connection setup failed: %s. Retrying in %ds", cs_strerror(rc),
retries);
sleep(retries);
}
} while (retries < 5 && rc != CS_OK);
if (rc != CS_OK) {
crm_warn("Could not connect to Cluster Configuration Database API: %s (rc=%d)",
cs_strerror(rc), rc);
return -1;
}
rc = cmap_fd_get(cmap_handle, &fd);
if (rc != CS_OK) {
crm_err("Could not obtain the CMAP API connection: %s (%d)",
cs_strerror(rc), rc);
goto bail;
}
/* CMAP provider run as root (in given user namespace, anyway)? */
if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid,
&found_uid, &found_gid))) {
crm_err("CMAP provider is not authentic:"
" process %lld (uid: %lld, gid: %lld)",
(long long) PCMK__SPECIAL_PID_AS_0(found_pid),
(long long) found_uid, (long long) found_gid);
goto bail;
} else if (rv < 0) {
crm_err("Could not verify authenticity of CMAP provider: %s (%d)",
strerror(-rv), -rv);
goto bail;
}
rc = cmap_iter_init(cmap_handle, prefix, &iter_handle);
if (rc != CS_OK) {
crm_warn("Failed to initialize iteration for corosync cmap '%s': %s (rc=%d)",
prefix, cs_strerror(rc), rc);
goto bail;
}
found = 0;
while ((rc = cmap_iter_next(cmap_handle, iter_handle, key_name, NULL, NULL)) == CS_OK) {
crm_trace("'%s' is configured in corosync cmap: %s", prefix, key_name);
found++;
break;
}
cmap_iter_finalize(cmap_handle, iter_handle);
bail:
cmap_finalize(cmap_handle);
return found;
}
diff --git a/lib/cluster/cpg.c b/lib/cluster/cpg.c
index 2befcbcf2d..91c913caec 100644
--- a/lib/cluster/cpg.c
+++ b/lib/cluster/cpg.c
@@ -1,812 +1,813 @@
/*
* Copyright 2004-2020 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <bzlib.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <netdb.h>
#include <crm/common/ipc.h>
#include <crm/cluster/internal.h>
#include <crm/common/mainloop.h>
#include <sys/utsname.h>
#include <qb/qbipcc.h>
#include <qb/qbutil.h>
#include <corosync/corodefs.h>
#include <corosync/corotypes.h>
#include <corosync/hdb.h>
#include <corosync/cpg.h>
#include <crm/msg_xml.h>
#include <crm/common/ipc_internal.h> /* PCMK__SPECIAL_PID* */
cpg_handle_t pcmk_cpg_handle = 0; /* TODO: Remove, use cluster.cpg_handle */
static bool cpg_evicted = FALSE;
gboolean(*pcmk_cpg_dispatch_fn) (int kind, const char *from, const char *data) = NULL;
#define cs_repeat(counter, max, code) do { \
code; \
if(rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) { \
counter++; \
crm_debug("Retrying operation after %ds", counter); \
sleep(counter); \
} else { \
break; \
} \
} while(counter < max)
void
cluster_disconnect_cpg(crm_cluster_t *cluster)
{
pcmk_cpg_handle = 0;
if (cluster->cpg_handle) {
crm_trace("Disconnecting CPG");
cpg_leave(cluster->cpg_handle, &cluster->group);
cpg_finalize(cluster->cpg_handle);
cluster->cpg_handle = 0;
} else {
crm_info("No CPG connection");
}
}
uint32_t get_local_nodeid(cpg_handle_t handle)
{
cs_error_t rc = CS_OK;
int retries = 0;
static uint32_t local_nodeid = 0;
cpg_handle_t local_handle = handle;
cpg_callbacks_t cb = { };
int fd = -1;
uid_t found_uid = 0;
gid_t found_gid = 0;
pid_t found_pid = 0;
int rv;
if(local_nodeid != 0) {
return local_nodeid;
}
if(handle == 0) {
crm_trace("Creating connection");
cs_repeat(retries, 5, rc = cpg_initialize(&local_handle, &cb));
if (rc != CS_OK) {
crm_err("Could not connect to the CPG API: %s (%d)",
cs_strerror(rc), rc);
return 0;
}
rc = cpg_fd_get(local_handle, &fd);
if (rc != CS_OK) {
crm_err("Could not obtain the CPG API connection: %s (%d)",
cs_strerror(rc), rc);
goto bail;
}
/* CPG provider run as root (in given user namespace, anyway)? */
if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid,
&found_uid, &found_gid))) {
crm_err("CPG provider is not authentic:"
" process %lld (uid: %lld, gid: %lld)",
(long long) PCMK__SPECIAL_PID_AS_0(found_pid),
(long long) found_uid, (long long) found_gid);
goto bail;
} else if (rv < 0) {
crm_err("Could not verify authenticity of CPG provider: %s (%d)",
strerror(-rv), -rv);
goto bail;
}
}
if (rc == CS_OK) {
retries = 0;
crm_trace("Performing lookup");
cs_repeat(retries, 5, rc = cpg_local_get(local_handle, &local_nodeid));
}
if (rc != CS_OK) {
crm_err("Could not get local node id from the CPG API: %s (%d)", ais_error2text(rc), rc);
}
bail:
if(handle == 0) {
crm_trace("Closing connection");
cpg_finalize(local_handle);
}
crm_debug("Local nodeid is %u", local_nodeid);
return local_nodeid;
}
GListPtr cs_message_queue = NULL;
int cs_message_timer = 0;
static ssize_t crm_cs_flush(gpointer data);
static gboolean
crm_cs_flush_cb(gpointer data)
{
cs_message_timer = 0;
crm_cs_flush(data);
return FALSE;
}
#define CS_SEND_MAX 200
static ssize_t
crm_cs_flush(gpointer data)
{
int sent = 0;
ssize_t rc = 0;
int queue_len = 0;
static unsigned int last_sent = 0;
cpg_handle_t *handle = (cpg_handle_t *)data;
if (*handle == 0) {
crm_trace("Connection is dead");
return pcmk_ok;
}
queue_len = g_list_length(cs_message_queue);
if ((queue_len % 1000) == 0 && queue_len > 1) {
crm_err("CPG queue has grown to %d", queue_len);
} else if (queue_len == CS_SEND_MAX) {
crm_warn("CPG queue has grown to %d", queue_len);
}
if (cs_message_timer) {
/* There is already a timer, wait until it goes off */
crm_trace("Timer active %d", cs_message_timer);
return pcmk_ok;
}
while (cs_message_queue && sent < CS_SEND_MAX) {
struct iovec *iov = cs_message_queue->data;
errno = 0;
rc = cpg_mcast_joined(*handle, CPG_TYPE_AGREED, iov, 1);
if (rc != CS_OK) {
break;
}
sent++;
last_sent++;
crm_trace("CPG message sent, size=%llu",
(unsigned long long) iov->iov_len);
cs_message_queue = g_list_remove(cs_message_queue, iov);
free(iov->iov_base);
free(iov);
}
queue_len -= sent;
if (sent > 1 || cs_message_queue) {
crm_info("Sent %d CPG messages (%d remaining, last=%u): %s (%lld)",
sent, queue_len, last_sent, ais_error2text(rc),
(long long) rc);
} else {
crm_trace("Sent %d CPG messages (%d remaining, last=%u): %s (%lld)",
sent, queue_len, last_sent, ais_error2text(rc),
(long long) rc);
}
if (cs_message_queue) {
uint32_t delay_ms = 100;
if(rc != CS_OK) {
/* Proportionally more if sending failed but cap at 1s */
delay_ms = QB_MIN(1000, CS_SEND_MAX + (10 * queue_len));
}
cs_message_timer = g_timeout_add(delay_ms, crm_cs_flush_cb, data);
}
return rc;
}
gboolean
send_cpg_iov(struct iovec * iov)
{
static unsigned int queued = 0;
queued++;
crm_trace("Queueing CPG message %u (%llu bytes)",
queued, (unsigned long long) iov->iov_len);
cs_message_queue = g_list_append(cs_message_queue, iov);
crm_cs_flush(&pcmk_cpg_handle);
return TRUE;
}
static int
pcmk_cpg_dispatch(gpointer user_data)
{
int rc = 0;
crm_cluster_t *cluster = (crm_cluster_t*) user_data;
rc = cpg_dispatch(cluster->cpg_handle, CS_DISPATCH_ONE);
if (rc != CS_OK) {
crm_err("Connection to the CPG API failed: %s (%d)", ais_error2text(rc), rc);
+ cpg_finalize(cluster->cpg_handle);
cluster->cpg_handle = 0;
return -1;
} else if(cpg_evicted) {
crm_err("Evicted from CPG membership");
return -1;
}
return 0;
}
char *
pcmk_message_common_cs(cpg_handle_t handle, uint32_t nodeid, uint32_t pid, void *content,
uint32_t *kind, const char **from)
{
char *data = NULL;
AIS_Message *msg = (AIS_Message *) content;
if(handle) {
// Do filtering and field massaging
uint32_t local_nodeid = get_local_nodeid(handle);
const char *local_name = get_local_node_name();
if (msg->sender.id > 0 && msg->sender.id != nodeid) {
crm_err("Nodeid mismatch from %d.%d: claimed nodeid=%u", nodeid, pid, msg->sender.id);
return NULL;
} else if (msg->host.id != 0 && (local_nodeid != msg->host.id)) {
/* Not for us */
crm_trace("Not for us: %u != %u", msg->host.id, local_nodeid);
return NULL;
} else if (msg->host.size != 0 && safe_str_neq(msg->host.uname, local_name)) {
/* Not for us */
crm_trace("Not for us: %s != %s", msg->host.uname, local_name);
return NULL;
}
msg->sender.id = nodeid;
if (msg->sender.size == 0) {
crm_node_t *peer = crm_get_peer(nodeid, NULL);
if (peer == NULL) {
crm_err("Peer with nodeid=%u is unknown", nodeid);
} else if (peer->uname == NULL) {
crm_err("No uname for peer with nodeid=%u", nodeid);
} else {
crm_notice("Fixing uname for peer with nodeid=%u", nodeid);
msg->sender.size = strlen(peer->uname);
memset(msg->sender.uname, 0, MAX_NAME);
memcpy(msg->sender.uname, peer->uname, msg->sender.size);
}
}
}
crm_trace("Got new%s message (size=%d, %d, %d)",
msg->is_compressed ? " compressed" : "",
ais_data_len(msg), msg->size, msg->compressed_size);
if (kind != NULL) {
*kind = msg->header.id;
}
if (from != NULL) {
*from = msg->sender.uname;
}
if (msg->is_compressed && msg->size > 0) {
int rc = BZ_OK;
char *uncompressed = NULL;
unsigned int new_size = msg->size + 1;
if (check_message_sanity(msg, NULL) == FALSE) {
goto badmsg;
}
crm_trace("Decompressing message data");
uncompressed = calloc(1, new_size);
rc = BZ2_bzBuffToBuffDecompress(uncompressed, &new_size, msg->data, msg->compressed_size, 1, 0);
if (rc != BZ_OK) {
crm_err("Decompression failed: %s " CRM_XS " bzerror=%d",
bz2_strerror(rc), rc);
free(uncompressed);
goto badmsg;
}
CRM_ASSERT(rc == BZ_OK);
CRM_ASSERT(new_size == msg->size);
data = uncompressed;
} else if (check_message_sanity(msg, data) == FALSE) {
goto badmsg;
} else if (safe_str_eq("identify", data)) {
char *pid_s = pcmk__getpid_s();
send_cluster_text(crm_class_cluster, pid_s, TRUE, NULL, crm_msg_ais);
free(pid_s);
return NULL;
} else {
data = strdup(msg->data);
}
// Is this necessary?
crm_get_peer(msg->sender.id, msg->sender.uname);
crm_trace("Payload: %.200s", data);
return data;
badmsg:
crm_err("Invalid message (id=%d, dest=%s:%s, from=%s:%s.%d):"
" min=%d, total=%d, size=%d, bz2_size=%d",
msg->id, ais_dest(&(msg->host)), msg_type2text(msg->host.type),
ais_dest(&(msg->sender)), msg_type2text(msg->sender.type),
msg->sender.pid, (int)sizeof(AIS_Message),
msg->header.size, msg->size, msg->compressed_size);
free(data);
return NULL;
}
static int cmp_member_list_nodeid(const void *first,
const void *second)
{
const struct cpg_address *const a = *((const struct cpg_address **) first),
*const b = *((const struct cpg_address **) second);
if (a->nodeid < b->nodeid) {
return -1;
} else if (a->nodeid > b->nodeid) {
return 1;
}
/* don't bother with "reason" nor "pid" */
return 0;
}
static const char *
cpgreason2str(cpg_reason_t reason)
{
switch (reason) {
case CPG_REASON_JOIN: return " via cpg_join";
case CPG_REASON_LEAVE: return " via cpg_leave";
case CPG_REASON_NODEDOWN: return " via cluster exit";
case CPG_REASON_NODEUP: return " via cluster join";
case CPG_REASON_PROCDOWN: return " for unknown reason";
default: break;
}
return "";
}
static inline const char *
peer_name(crm_node_t *peer)
{
if (peer == NULL) {
return "unknown node";
} else if (peer->uname == NULL) {
return "peer node";
} else {
return peer->uname;
}
}
void
pcmk_cpg_membership(cpg_handle_t handle,
const struct cpg_name *groupName,
const struct cpg_address *member_list, size_t member_list_entries,
const struct cpg_address *left_list, size_t left_list_entries,
const struct cpg_address *joined_list, size_t joined_list_entries)
{
int i;
gboolean found = FALSE;
static int counter = 0;
uint32_t local_nodeid = get_local_nodeid(handle);
const struct cpg_address *key, **sorted;
sorted = malloc(member_list_entries * sizeof(const struct cpg_address *));
CRM_ASSERT(sorted != NULL);
for (size_t iter = 0; iter < member_list_entries; iter++) {
sorted[iter] = member_list + iter;
}
/* so that the cross-matching multiply-subscribed nodes is then cheap */
qsort(sorted, member_list_entries, sizeof(const struct cpg_address *),
cmp_member_list_nodeid);
for (i = 0; i < left_list_entries; i++) {
crm_node_t *peer = crm_find_peer(left_list[i].nodeid, NULL);
const struct cpg_address **rival = NULL;
/* in CPG world, NODE:PROCESS-IN-MEMBERSHIP-OF-G is an 1:N relation
and not playing by this rule may go wild in case of multiple
residual instances of the same pacemaker daemon at the same node
-- we must ensure that the possible local rival(s) won't make us
cry out and bail (e.g. when they quit themselves), since all the
surrounding logic denies this simple fact that the full membership
is discriminated also per the PID of the process beside mere node
ID (and implicitly, group ID); practically, this will be sound in
terms of not preventing progress, since all the CPG joiners are
also API end-point carriers, and that's what matters locally
(who's the winner);
remotely, we will just compare leave_list and member_list and if
the left process has its node retained in member_list (under some
other PID, anyway) we will just ignore it as well
XXX: long-term fix is to establish in-out PID-aware tracking? */
if (peer) {
key = &left_list[i];
rival = bsearch(&key, sorted, member_list_entries,
sizeof(const struct cpg_address *),
cmp_member_list_nodeid);
}
if (rival == NULL) {
crm_info("Group %s event %d: %s (node %u pid %u) left%s",
groupName->value, counter, peer_name(peer),
left_list[i].nodeid, left_list[i].pid,
cpgreason2str(left_list[i].reason));
if (peer) {
crm_update_peer_proc(__FUNCTION__, peer, crm_proc_cpg,
OFFLINESTATUS);
}
} else if (left_list[i].nodeid == local_nodeid) {
crm_warn("Group %s event %d: duplicate local pid %u left%s",
groupName->value, counter,
left_list[i].pid, cpgreason2str(left_list[i].reason));
} else {
crm_warn("Group %s event %d: "
"%s (node %u) duplicate pid %u left%s (%u remains)",
groupName->value, counter, peer_name(peer),
left_list[i].nodeid, left_list[i].pid,
cpgreason2str(left_list[i].reason), (*rival)->pid);
}
}
free(sorted);
sorted = NULL;
for (i = 0; i < joined_list_entries; i++) {
crm_info("Group %s event %d: node %u pid %u joined%s",
groupName->value, counter, joined_list[i].nodeid,
joined_list[i].pid, cpgreason2str(joined_list[i].reason));
}
for (i = 0; i < member_list_entries; i++) {
crm_node_t *peer = crm_get_peer(member_list[i].nodeid, NULL);
if (member_list[i].nodeid == local_nodeid
&& member_list[i].pid != getpid()) {
/* see the note above */
crm_warn("Group %s event %d: detected duplicate local pid %u",
groupName->value, counter, member_list[i].pid);
continue;
}
crm_info("Group %s event %d: %s (node %u pid %u) is member",
groupName->value, counter, peer_name(peer),
member_list[i].nodeid, member_list[i].pid);
/* If the caller left auto-reaping enabled, this will also update the
* state to member.
*/
peer = crm_update_peer_proc(__FUNCTION__, peer, crm_proc_cpg, ONLINESTATUS);
if (peer && peer->state && strcmp(peer->state, CRM_NODE_MEMBER)) {
/* The node is a CPG member, but we currently think it's not a
* cluster member. This is possible only if auto-reaping was
* disabled. The node may be joining, and we happened to get the CPG
* notification before the quorum notification; or the node may have
* just died, and we are processing its final messages; or a bug
* has affected the peer cache.
*/
time_t now = time(NULL);
if (peer->when_lost == 0) {
// Track when we first got into this contradictory state
peer->when_lost = now;
} else if (now > (peer->when_lost + 60)) {
// If it persists for more than a minute, update the state
crm_warn("Node %u is member of group %s but was believed offline",
member_list[i].nodeid, groupName->value);
crm_update_peer_state(__FUNCTION__, peer, CRM_NODE_MEMBER, 0);
}
}
if (local_nodeid == member_list[i].nodeid) {
found = TRUE;
}
}
if (!found) {
crm_err("Local node was evicted from group %s", groupName->value);
cpg_evicted = TRUE;
}
counter++;
}
gboolean
cluster_connect_cpg(crm_cluster_t *cluster)
{
cs_error_t rc;
int fd = -1;
int retries = 0;
uint32_t id = 0;
crm_node_t *peer = NULL;
cpg_handle_t handle = 0;
const char *message_name = pcmk_message_name(crm_system_name);
uid_t found_uid = 0;
gid_t found_gid = 0;
pid_t found_pid = 0;
int rv;
struct mainloop_fd_callbacks cpg_fd_callbacks = {
.dispatch = pcmk_cpg_dispatch,
.destroy = cluster->destroy,
};
cpg_callbacks_t cpg_callbacks = {
.cpg_deliver_fn = cluster->cpg.cpg_deliver_fn,
.cpg_confchg_fn = cluster->cpg.cpg_confchg_fn,
/* .cpg_deliver_fn = pcmk_cpg_deliver, */
/* .cpg_confchg_fn = pcmk_cpg_membership, */
};
cpg_evicted = FALSE;
cluster->group.length = 0;
cluster->group.value[0] = 0;
/* group.value is char[128] */
strncpy(cluster->group.value, message_name, 127);
cluster->group.value[127] = 0;
cluster->group.length = 1 + QB_MIN(127, strlen(cluster->group.value));
cs_repeat(retries, 30, rc = cpg_initialize(&handle, &cpg_callbacks));
if (rc != CS_OK) {
crm_err("Could not connect to the CPG API: %s (%d)",
cs_strerror(rc), rc);
goto bail;
}
rc = cpg_fd_get(handle, &fd);
if (rc != CS_OK) {
crm_err("Could not obtain the CPG API connection: %s (%d)",
cs_strerror(rc), rc);
goto bail;
}
/* CPG provider run as root (in given user namespace, anyway)? */
if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid,
&found_uid, &found_gid))) {
crm_err("CPG provider is not authentic:"
" process %lld (uid: %lld, gid: %lld)",
(long long) PCMK__SPECIAL_PID_AS_0(found_pid),
(long long) found_uid, (long long) found_gid);
rc = CS_ERR_ACCESS;
goto bail;
} else if (rv < 0) {
crm_err("Could not verify authenticity of CPG provider: %s (%d)",
strerror(-rv), -rv);
rc = CS_ERR_ACCESS;
goto bail;
}
id = get_local_nodeid(handle);
if (id == 0) {
crm_err("Could not get local node id from the CPG API");
goto bail;
}
cluster->nodeid = id;
retries = 0;
cs_repeat(retries, 30, rc = cpg_join(handle, &cluster->group));
if (rc != CS_OK) {
crm_err("Could not join the CPG group '%s': %d", message_name, rc);
goto bail;
}
pcmk_cpg_handle = handle;
cluster->cpg_handle = handle;
mainloop_add_fd("corosync-cpg", G_PRIORITY_MEDIUM, fd, cluster, &cpg_fd_callbacks);
bail:
if (rc != CS_OK) {
cpg_finalize(handle);
return FALSE;
}
peer = crm_get_peer(id, NULL);
crm_update_peer_proc(__FUNCTION__, peer, crm_proc_cpg, ONLINESTATUS);
return TRUE;
}
gboolean
send_cluster_message_cs(xmlNode * msg, gboolean local, crm_node_t * node, enum crm_ais_msg_types dest)
{
gboolean rc = TRUE;
char *data = NULL;
data = dump_xml_unformatted(msg);
rc = send_cluster_text(crm_class_cluster, data, local, node, dest);
free(data);
return rc;
}
gboolean
send_cluster_text(enum crm_ais_msg_class msg_class, const char *data,
gboolean local, crm_node_t *node, enum crm_ais_msg_types dest)
{
static int msg_id = 0;
static int local_pid = 0;
static int local_name_len = 0;
static const char *local_name = NULL;
char *target = NULL;
struct iovec *iov;
AIS_Message *msg = NULL;
enum crm_ais_msg_types sender = text2msg_type(crm_system_name);
switch (msg_class) {
case crm_class_cluster:
break;
default:
crm_err("Invalid message class: %d", msg_class);
return FALSE;
}
CRM_CHECK(dest != crm_msg_ais, return FALSE);
if(local_name == NULL) {
local_name = get_local_node_name();
}
if(local_name_len == 0 && local_name) {
local_name_len = strlen(local_name);
}
if (data == NULL) {
data = "";
}
if (local_pid == 0) {
local_pid = getpid();
}
if (sender == crm_msg_none) {
sender = local_pid;
}
msg = calloc(1, sizeof(AIS_Message));
msg_id++;
msg->id = msg_id;
msg->header.id = msg_class;
msg->header.error = CS_OK;
msg->host.type = dest;
msg->host.local = local;
if (node) {
if (node->uname) {
target = strdup(node->uname);
msg->host.size = strlen(node->uname);
memset(msg->host.uname, 0, MAX_NAME);
memcpy(msg->host.uname, node->uname, msg->host.size);
} else {
target = crm_strdup_printf("%u", node->id);
}
msg->host.id = node->id;
} else {
target = strdup("all");
}
msg->sender.id = 0;
msg->sender.type = sender;
msg->sender.pid = local_pid;
msg->sender.size = local_name_len;
memset(msg->sender.uname, 0, MAX_NAME);
if(local_name && msg->sender.size) {
memcpy(msg->sender.uname, local_name, msg->sender.size);
}
msg->size = 1 + strlen(data);
msg->header.size = sizeof(AIS_Message) + msg->size;
if (msg->size < CRM_BZ2_THRESHOLD) {
msg = realloc_safe(msg, msg->header.size);
memcpy(msg->data, data, msg->size);
} else {
char *compressed = NULL;
unsigned int new_size = 0;
char *uncompressed = strdup(data);
if (pcmk__compress(uncompressed, (unsigned int) msg->size, 0,
&compressed, &new_size) == pcmk_rc_ok) {
msg->header.size = sizeof(AIS_Message) + new_size;
msg = realloc_safe(msg, msg->header.size);
memcpy(msg->data, compressed, new_size);
msg->is_compressed = TRUE;
msg->compressed_size = new_size;
} else {
// cppcheck seems not to understand the abort logic in realloc_safe
// cppcheck-suppress memleak
msg = realloc_safe(msg, msg->header.size);
memcpy(msg->data, data, msg->size);
}
free(uncompressed);
free(compressed);
}
iov = calloc(1, sizeof(struct iovec));
iov->iov_base = msg;
iov->iov_len = msg->header.size;
if (msg->compressed_size) {
crm_trace("Queueing CPG message %u to %s (%llu bytes, %d bytes compressed payload): %.200s",
msg->id, target, (unsigned long long) iov->iov_len,
msg->compressed_size, data);
} else {
crm_trace("Queueing CPG message %u to %s (%llu bytes, %d bytes payload): %.200s",
msg->id, target, (unsigned long long) iov->iov_len,
msg->size, data);
}
free(target);
send_cpg_iov(iov);
return TRUE;
}
enum crm_ais_msg_types
text2msg_type(const char *text)
{
int type = crm_msg_none;
CRM_CHECK(text != NULL, return type);
text = pcmk_message_name(text);
if (safe_str_eq(text, "ais")) {
type = crm_msg_ais;
} else if (safe_str_eq(text, CRM_SYSTEM_CIB)) {
type = crm_msg_cib;
} else if (safe_str_eq(text, CRM_SYSTEM_CRMD)
|| safe_str_eq(text, CRM_SYSTEM_DC)) {
type = crm_msg_crmd;
} else if (safe_str_eq(text, CRM_SYSTEM_TENGINE)) {
type = crm_msg_te;
} else if (safe_str_eq(text, CRM_SYSTEM_PENGINE)) {
type = crm_msg_pe;
} else if (safe_str_eq(text, CRM_SYSTEM_LRMD)) {
type = crm_msg_lrmd;
} else if (safe_str_eq(text, CRM_SYSTEM_STONITHD)) {
type = crm_msg_stonithd;
} else if (safe_str_eq(text, "stonith-ng")) {
type = crm_msg_stonith_ng;
} else if (safe_str_eq(text, "attrd")) {
type = crm_msg_attrd;
} else {
/* This will normally be a transient client rather than
* a cluster daemon. Set the type to the pid of the client
*/
int scan_rc = sscanf(text, "%d", &type);
if (scan_rc != 1 || type <= crm_msg_stonith_ng) {
/* Ensure it's sane */
type = crm_msg_none;
}
}
return type;
}
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Sat, Nov 23, 6:54 AM (1 d, 14 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1018336
Default Alt Text
(58 KB)
Attached To
Mode
rP Pacemaker
Attached
Detach File
Event Timeline
Log In to Comment