diff --git a/exec/main.c b/exec/main.c
index 85c74ee7..0ca5634a 100644
--- a/exec/main.c
+++ b/exec/main.c
@@ -1,1474 +1,1407 @@
/*
* Copyright (c) 2002-2006 MontaVista Software, Inc.
* Copyright (c) 2006-2012 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Steven Dake (sdake@redhat.com)
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
/**
* \mainpage Corosync
*
* This is the doxygen generated developer documentation for the Corosync
* project. For more information about Corosync, please see the project
* web site, corosync.org.
*
* \section license License
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include "quorum.h"
#include "totemsrp.h"
#include "logconfig.h"
#include "totemconfig.h"
#include "main.h"
#include "sync.h"
#include "timer.h"
#include "util.h"
#include "apidef.h"
#include "service.h"
#include "schedwrk.h"
#ifdef HAVE_SMALL_MEMORY_FOOTPRINT
#define IPC_LOGSYS_SIZE 1024*64
#else
#define IPC_LOGSYS_SIZE 8192*128
#endif
LOGSYS_DECLARE_SYSTEM ("corosync",
LOGSYS_MODE_OUTPUT_STDERR | LOGSYS_MODE_OUTPUT_SYSLOG,
LOG_DAEMON,
LOG_INFO);
LOGSYS_DECLARE_SUBSYS ("MAIN");
#define SERVER_BACKLOG 5
static int sched_priority = 0;
static unsigned int service_count = 32;
static struct totem_logging_configuration totem_logging_configuration;
static struct corosync_api_v1 *api = NULL;
static int sync_in_process = 1;
static qb_loop_t *corosync_poll_handle;
struct sched_param global_sched_param;
static corosync_timer_handle_t corosync_stats_timer_handle;
static const char *corosync_lock_file = LOCALSTATEDIR"/run/corosync.pid";
static int ip_version = AF_INET;
qb_loop_t *cs_poll_handle_get (void)
{
return (corosync_poll_handle);
}
int cs_poll_dispatch_add (qb_loop_t * handle,
int fd,
int events,
void *data,
int (*dispatch_fn) (int fd,
int revents,
void *data))
{
return qb_loop_poll_add(handle, QB_LOOP_MED, fd, events, data,
dispatch_fn);
}
int cs_poll_dispatch_delete(qb_loop_t * handle, int fd)
{
return qb_loop_poll_del(handle, fd);
}
void corosync_state_dump (void)
{
int i;
for (i = 0; i < SERVICES_COUNT_MAX; i++) {
if (corosync_service[i] && corosync_service[i]->exec_dump_fn) {
corosync_service[i]->exec_dump_fn ();
}
}
}
static void corosync_blackbox_write_to_file (void)
{
char fname[PATH_MAX];
char fdata_fname[PATH_MAX];
char time_str[PATH_MAX];
struct tm cur_time_tm;
time_t cur_time_t;
ssize_t res;
cur_time_t = time(NULL);
localtime_r(&cur_time_t, &cur_time_tm);
strftime(time_str, PATH_MAX, "%Y-%m-%dT%H:%M:%S", &cur_time_tm);
snprintf(fname, PATH_MAX, "%s/fdata-%s-%lld",
get_run_dir(),
time_str,
(long long int)getpid());
if ((res = qb_log_blackbox_write_to_file(fname)) < 0) {
LOGSYS_PERROR(-res, LOGSYS_LEVEL_ERROR, "Can't store blackbox file");
}
snprintf(fdata_fname, sizeof(fdata_fname), "%s/fdata", get_run_dir());
unlink(fdata_fname);
if (symlink(fname, fdata_fname) == -1) {
log_printf(LOGSYS_LEVEL_ERROR, "Can't create symlink to '%s' for corosync blackbox file '%s'",
fname, fdata_fname);
}
}
static void unlink_all_completed (void)
{
api->timer_delete (corosync_stats_timer_handle);
qb_loop_stop (corosync_poll_handle);
icmap_fini();
}
void corosync_shutdown_request (void)
{
corosync_service_unlink_all (api, unlink_all_completed);
}
static int32_t sig_diag_handler (int num, void *data)
{
corosync_state_dump ();
return 0;
}
static int32_t sig_exit_handler (int num, void *data)
{
log_printf(LOGSYS_LEVEL_NOTICE, "Node was shut down by a signal");
corosync_service_unlink_all (api, unlink_all_completed);
return 0;
}
static void sigsegv_handler (int num)
{
(void)signal (SIGSEGV, SIG_DFL);
corosync_blackbox_write_to_file ();
qb_log_fini();
raise (SIGSEGV);
}
/*
* QB wrapper for real signal handler
*/
static int32_t sig_segv_handler (int num, void *data)
{
sigsegv_handler(num);
return 0;
}
static void sigabrt_handler (int num)
{
(void)signal (SIGABRT, SIG_DFL);
corosync_blackbox_write_to_file ();
qb_log_fini();
raise (SIGABRT);
}
/*
* QB wrapper for real signal handler
*/
static int32_t sig_abrt_handler (int num, void *data)
{
sigabrt_handler(num);
return 0;
}
#define LOCALHOST_IP inet_addr("127.0.0.1")
static void *corosync_group_handle;
static struct totempg_group corosync_group = {
.group = "a",
.group_len = 1
};
static void serialize_lock (void)
{
}
static void serialize_unlock (void)
{
}
static void corosync_sync_completed (void)
{
log_printf (LOGSYS_LEVEL_NOTICE,
"Completed service synchronization, ready to provide service.");
sync_in_process = 0;
cs_ipcs_sync_state_changed(sync_in_process);
cs_ipc_allow_connections(1);
/*
* Inform totem to start using new message queue again
*/
totempg_trans_ack();
}
static int corosync_sync_callbacks_retrieve (
int service_id,
struct sync_callbacks *callbacks)
{
if (corosync_service[service_id] == NULL) {
return (-1);
}
if (callbacks == NULL) {
return (0);
}
callbacks->name = corosync_service[service_id]->name;
callbacks->sync_init = corosync_service[service_id]->sync_init;
callbacks->sync_process = corosync_service[service_id]->sync_process;
callbacks->sync_activate = corosync_service[service_id]->sync_activate;
callbacks->sync_abort = corosync_service[service_id]->sync_abort;
return (0);
}
static struct memb_ring_id corosync_ring_id;
static void member_object_joined (unsigned int nodeid)
{
char member_ip[ICMAP_KEYNAME_MAXLEN];
char member_join_count[ICMAP_KEYNAME_MAXLEN];
char member_status[ICMAP_KEYNAME_MAXLEN];
snprintf(member_ip, ICMAP_KEYNAME_MAXLEN,
"runtime.totem.pg.mrp.srp.members.%u.ip", nodeid);
snprintf(member_join_count, ICMAP_KEYNAME_MAXLEN,
"runtime.totem.pg.mrp.srp.members.%u.join_count", nodeid);
snprintf(member_status, ICMAP_KEYNAME_MAXLEN,
"runtime.totem.pg.mrp.srp.members.%u.status", nodeid);
if (icmap_get(member_ip, NULL, NULL, NULL) == CS_OK) {
icmap_inc(member_join_count);
icmap_set_string(member_status, "joined");
} else {
icmap_set_string(member_ip, (char*)api->totem_ifaces_print (nodeid));
icmap_set_uint32(member_join_count, 1);
icmap_set_string(member_status, "joined");
}
log_printf (LOGSYS_LEVEL_DEBUG,
"Member joined: %s", api->totem_ifaces_print (nodeid));
}
static void member_object_left (unsigned int nodeid)
{
char member_status[ICMAP_KEYNAME_MAXLEN];
snprintf(member_status, ICMAP_KEYNAME_MAXLEN,
"runtime.totem.pg.mrp.srp.members.%u.status", nodeid);
icmap_set_string(member_status, "left");
log_printf (LOGSYS_LEVEL_DEBUG,
"Member left: %s", api->totem_ifaces_print (nodeid));
}
static void confchg_fn (
enum totem_configuration_type configuration_type,
const unsigned int *member_list, size_t member_list_entries,
const unsigned int *left_list, size_t left_list_entries,
const unsigned int *joined_list, size_t joined_list_entries,
const struct memb_ring_id *ring_id)
{
int i;
int abort_activate = 0;
if (sync_in_process == 1) {
abort_activate = 1;
}
sync_in_process = 1;
cs_ipcs_sync_state_changed(sync_in_process);
memcpy (&corosync_ring_id, ring_id, sizeof (struct memb_ring_id));
for (i = 0; i < left_list_entries; i++) {
member_object_left (left_list[i]);
}
for (i = 0; i < joined_list_entries; i++) {
member_object_joined (joined_list[i]);
}
/*
* Call configuration change for all services
*/
for (i = 0; i < service_count; i++) {
if (corosync_service[i] && corosync_service[i]->confchg_fn) {
corosync_service[i]->confchg_fn (configuration_type,
member_list, member_list_entries,
left_list, left_list_entries,
joined_list, joined_list_entries, ring_id);
}
}
if (abort_activate) {
sync_abort ();
}
if (configuration_type == TOTEM_CONFIGURATION_TRANSITIONAL) {
sync_save_transitional (member_list, member_list_entries, ring_id);
}
if (configuration_type == TOTEM_CONFIGURATION_REGULAR) {
sync_start (member_list, member_list_entries, ring_id);
}
}
static void priv_drop (void)
{
return; /* TODO: we are still not dropping privs */
}
static void corosync_tty_detach (void)
{
int devnull;
/*
* Disconnect from TTY if this is not a debug run
*/
switch (fork ()) {
case -1:
corosync_exit_error (COROSYNC_DONE_FORK);
break;
case 0:
/*
* child which is disconnected, run this process
*/
break;
default:
exit (0);
break;
}
/* Create new session */
(void)setsid();
/*
* Map stdin/out/err to /dev/null.
*/
devnull = open("/dev/null", O_RDWR);
if (devnull == -1) {
corosync_exit_error (COROSYNC_DONE_STD_TO_NULL_REDIR);
}
if (dup2(devnull, 0) < 0 || dup2(devnull, 1) < 0
|| dup2(devnull, 2) < 0) {
close(devnull);
corosync_exit_error (COROSYNC_DONE_STD_TO_NULL_REDIR);
}
close(devnull);
}
static void corosync_mlockall (void)
{
int res;
struct rlimit rlimit;
rlimit.rlim_cur = RLIM_INFINITY;
rlimit.rlim_max = RLIM_INFINITY;
#ifndef RLIMIT_MEMLOCK
#define RLIMIT_MEMLOCK RLIMIT_VMEM
#endif
setrlimit (RLIMIT_MEMLOCK, &rlimit);
res = mlockall (MCL_CURRENT | MCL_FUTURE);
if (res == -1) {
LOGSYS_PERROR (errno, LOGSYS_LEVEL_WARNING,
"Could not lock memory of service to avoid page faults");
};
}
static void corosync_totem_stats_updater (void *data)
{
totempg_stats_t * stats;
uint32_t total_mtt_rx_token;
uint32_t total_backlog_calc;
uint32_t total_token_holdtime;
int t, prev, i;
int32_t token_count;
char key_name[ICMAP_KEYNAME_MAXLEN];
stats = api->totem_get_stats();
icmap_set_uint32("runtime.totem.pg.msg_reserved", stats->msg_reserved);
icmap_set_uint32("runtime.totem.pg.msg_queue_avail", stats->msg_queue_avail);
icmap_set_uint64("runtime.totem.pg.mrp.srp.orf_token_tx", stats->mrp->srp->orf_token_tx);
icmap_set_uint64("runtime.totem.pg.mrp.srp.orf_token_rx", stats->mrp->srp->orf_token_rx);
icmap_set_uint64("runtime.totem.pg.mrp.srp.memb_merge_detect_tx", stats->mrp->srp->memb_merge_detect_tx);
icmap_set_uint64("runtime.totem.pg.mrp.srp.memb_merge_detect_rx", stats->mrp->srp->memb_merge_detect_rx);
icmap_set_uint64("runtime.totem.pg.mrp.srp.memb_join_tx", stats->mrp->srp->memb_join_tx);
icmap_set_uint64("runtime.totem.pg.mrp.srp.memb_join_rx", stats->mrp->srp->memb_join_rx);
icmap_set_uint64("runtime.totem.pg.mrp.srp.mcast_tx", stats->mrp->srp->mcast_tx);
icmap_set_uint64("runtime.totem.pg.mrp.srp.mcast_retx", stats->mrp->srp->mcast_retx);
icmap_set_uint64("runtime.totem.pg.mrp.srp.mcast_rx", stats->mrp->srp->mcast_rx);
icmap_set_uint64("runtime.totem.pg.mrp.srp.memb_commit_token_tx", stats->mrp->srp->memb_commit_token_tx);
icmap_set_uint64("runtime.totem.pg.mrp.srp.memb_commit_token_rx", stats->mrp->srp->memb_commit_token_rx);
icmap_set_uint64("runtime.totem.pg.mrp.srp.token_hold_cancel_tx", stats->mrp->srp->token_hold_cancel_tx);
icmap_set_uint64("runtime.totem.pg.mrp.srp.token_hold_cancel_rx", stats->mrp->srp->token_hold_cancel_rx);
icmap_set_uint64("runtime.totem.pg.mrp.srp.operational_entered", stats->mrp->srp->operational_entered);
icmap_set_uint64("runtime.totem.pg.mrp.srp.operational_token_lost", stats->mrp->srp->operational_token_lost);
icmap_set_uint64("runtime.totem.pg.mrp.srp.gather_entered", stats->mrp->srp->gather_entered);
icmap_set_uint64("runtime.totem.pg.mrp.srp.gather_token_lost", stats->mrp->srp->gather_token_lost);
icmap_set_uint64("runtime.totem.pg.mrp.srp.commit_entered", stats->mrp->srp->commit_entered);
icmap_set_uint64("runtime.totem.pg.mrp.srp.commit_token_lost", stats->mrp->srp->commit_token_lost);
icmap_set_uint64("runtime.totem.pg.mrp.srp.recovery_entered", stats->mrp->srp->recovery_entered);
icmap_set_uint64("runtime.totem.pg.mrp.srp.recovery_token_lost", stats->mrp->srp->recovery_token_lost);
icmap_set_uint64("runtime.totem.pg.mrp.srp.consensus_timeouts", stats->mrp->srp->consensus_timeouts);
icmap_set_uint64("runtime.totem.pg.mrp.srp.rx_msg_dropped", stats->mrp->srp->rx_msg_dropped);
icmap_set_uint32("runtime.totem.pg.mrp.srp.continuous_gather", stats->mrp->srp->continuous_gather);
icmap_set_uint32("runtime.totem.pg.mrp.srp.continuous_sendmsg_failures",
stats->mrp->srp->continuous_sendmsg_failures);
icmap_set_uint8("runtime.totem.pg.mrp.srp.firewall_enabled_or_nic_failure",
stats->mrp->srp->continuous_gather > MAX_NO_CONT_GATHER ? 1 : 0);
if (stats->mrp->srp->continuous_gather > MAX_NO_CONT_GATHER ||
stats->mrp->srp->continuous_sendmsg_failures > MAX_NO_CONT_SENDMSG_FAILURES) {
log_printf (LOGSYS_LEVEL_WARNING,
"Totem is unable to form a cluster because of an "
"operating system or network fault. The most common "
"cause of this message is that the local firewall is "
"configured improperly.");
icmap_set_uint8("runtime.totem.pg.mrp.srp.firewall_enabled_or_nic_failure", 1);
} else {
icmap_set_uint8("runtime.totem.pg.mrp.srp.firewall_enabled_or_nic_failure", 0);
}
for (i = 0; i < stats->mrp->srp->rrp->interface_count; i++) {
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "runtime.totem.pg.mrp.rrp.%u.faulty", i);
icmap_set_uint8(key_name, stats->mrp->srp->rrp->faulty[i]);
}
total_mtt_rx_token = 0;
total_token_holdtime = 0;
total_backlog_calc = 0;
token_count = 0;
t = stats->mrp->srp->latest_token;
while (1) {
if (t == 0)
prev = TOTEM_TOKEN_STATS_MAX - 1;
else
prev = t - 1;
if (prev == stats->mrp->srp->earliest_token)
break;
/* if tx == 0, then dropped token (not ours) */
if (stats->mrp->srp->token[t].tx != 0 ||
(stats->mrp->srp->token[t].rx - stats->mrp->srp->token[prev].rx) > 0 ) {
total_mtt_rx_token += (stats->mrp->srp->token[t].rx - stats->mrp->srp->token[prev].rx);
total_token_holdtime += (stats->mrp->srp->token[t].tx - stats->mrp->srp->token[t].rx);
total_backlog_calc += stats->mrp->srp->token[t].backlog_calc;
token_count++;
}
t = prev;
}
if (token_count) {
icmap_set_uint32("runtime.totem.pg.mrp.srp.mtt_rx_token", (total_mtt_rx_token / token_count));
icmap_set_uint32("runtime.totem.pg.mrp.srp.avg_token_workload", (total_token_holdtime / token_count));
icmap_set_uint32("runtime.totem.pg.mrp.srp.avg_backlog_calc", (total_backlog_calc / token_count));
}
cs_ipcs_stats_update();
api->timer_add_duration (1500 * MILLI_2_NANO_SECONDS, NULL,
corosync_totem_stats_updater,
&corosync_stats_timer_handle);
}
-static void totem_dynamic_notify(
- int32_t event,
- const char *key_name,
- struct icmap_notify_value new_val,
- struct icmap_notify_value old_val,
- void *user_data)
-{
- int res;
- unsigned int ring_no;
- unsigned int member_no;
- struct totem_ip_address member;
- int add_new_member = 0;
- int remove_old_member = 0;
- char tmp_str[ICMAP_KEYNAME_MAXLEN];
-
- res = sscanf(key_name, "nodelist.node.%u.ring%u%s", &member_no, &ring_no, tmp_str);
- if (res != 3)
- return ;
-
- if (strcmp(tmp_str, "_addr") != 0) {
- return;
- }
-
- if (event == ICMAP_TRACK_ADD && new_val.type == ICMAP_VALUETYPE_STRING) {
- add_new_member = 1;
- }
-
- if (event == ICMAP_TRACK_DELETE && old_val.type == ICMAP_VALUETYPE_STRING) {
- remove_old_member = 1;
- }
-
- if (event == ICMAP_TRACK_MODIFY && new_val.type == ICMAP_VALUETYPE_STRING &&
- old_val.type == ICMAP_VALUETYPE_STRING) {
- add_new_member = 1;
- remove_old_member = 1;
- }
-
- if (remove_old_member) {
- log_printf(LOGSYS_LEVEL_DEBUG,
- "removing dynamic member %s for ring %u", (char *)old_val.data, ring_no);
- if (totemip_parse(&member, (char *)old_val.data, ip_version) == 0) {
- totempg_member_remove (&member, ring_no);
- }
- }
-
- if (add_new_member) {
- log_printf(LOGSYS_LEVEL_DEBUG,
- "adding dynamic member %s for ring %u", (char *)new_val.data, ring_no);
- if (totemip_parse(&member, (char *)new_val.data, ip_version) == 0) {
- totempg_member_add (&member, ring_no);
- }
- }
-}
-
-static void corosync_totem_dynamic_init (void)
-{
- icmap_track_t icmap_track = NULL;
-
- icmap_track_add("nodelist.node.",
- ICMAP_TRACK_ADD | ICMAP_TRACK_DELETE | ICMAP_TRACK_MODIFY | ICMAP_TRACK_PREFIX,
- totem_dynamic_notify,
- NULL,
- &icmap_track);
-}
-
static void corosync_totem_stats_init (void)
{
icmap_set_uint32("runtime.totem.pg.mrp.srp.mtt_rx_token", 0);
icmap_set_uint32("runtime.totem.pg.mrp.srp.avg_token_workload", 0);
icmap_set_uint32("runtime.totem.pg.mrp.srp.avg_backlog_calc", 0);
/* start stats timer */
api->timer_add_duration (1500 * MILLI_2_NANO_SECONDS, NULL,
corosync_totem_stats_updater,
&corosync_stats_timer_handle);
}
-
static void deliver_fn (
unsigned int nodeid,
const void *msg,
unsigned int msg_len,
int endian_conversion_required)
{
const struct qb_ipc_request_header *header;
int32_t service;
int32_t fn_id;
uint32_t id;
header = msg;
if (endian_conversion_required) {
id = swab32 (header->id);
} else {
id = header->id;
}
/*
* Call the proper executive handler
*/
service = id >> 16;
fn_id = id & 0xffff;
if (!corosync_service[service]) {
return;
}
if (fn_id >= corosync_service[service]->exec_engine_count) {
log_printf(LOGSYS_LEVEL_WARNING, "discarded unknown message %d for service %d (max id %d)",
fn_id, service, corosync_service[service]->exec_engine_count);
return;
}
icmap_fast_inc(service_stats_rx[service][fn_id]);
if (endian_conversion_required) {
assert(corosync_service[service]->exec_engine[fn_id].exec_endian_convert_fn != NULL);
corosync_service[service]->exec_engine[fn_id].exec_endian_convert_fn
((void *)msg);
}
corosync_service[service]->exec_engine[fn_id].exec_handler_fn
(msg, nodeid);
}
int main_mcast (
const struct iovec *iovec,
unsigned int iov_len,
unsigned int guarantee)
{
const struct qb_ipc_request_header *req = iovec->iov_base;
int32_t service;
int32_t fn_id;
service = req->id >> 16;
fn_id = req->id & 0xffff;
if (corosync_service[service]) {
icmap_fast_inc(service_stats_tx[service][fn_id]);
}
return (totempg_groups_mcast_joined (corosync_group_handle, iovec, iov_len, guarantee));
}
static void corosync_ring_id_create_or_load (
struct memb_ring_id *memb_ring_id,
const struct totem_ip_address *addr)
{
int fd;
int res = 0;
char filename[PATH_MAX];
snprintf (filename, sizeof(filename), "%s/ringid_%s",
get_run_dir(), totemip_print (addr));
fd = open (filename, O_RDONLY, 0700);
/*
* If file can be opened and read, read the ring id
*/
if (fd != -1) {
res = read (fd, &memb_ring_id->seq, sizeof (uint64_t));
close (fd);
}
/*
* If file could not be opened or read, create a new ring id
*/
if ((fd == -1) || (res != sizeof (uint64_t))) {
memb_ring_id->seq = 0;
umask(0);
fd = open (filename, O_CREAT|O_RDWR, 0700);
if (fd != -1) {
res = write (fd, &memb_ring_id->seq, sizeof (uint64_t));
close (fd);
if (res == -1) {
LOGSYS_PERROR (errno, LOGSYS_LEVEL_ERROR,
"Couldn't write ringid file '%s'", filename);
corosync_exit_error (COROSYNC_DONE_STORE_RINGID);
}
} else {
LOGSYS_PERROR (errno, LOGSYS_LEVEL_ERROR,
"Couldn't create ringid file '%s'", filename);
corosync_exit_error (COROSYNC_DONE_STORE_RINGID);
}
}
totemip_copy(&memb_ring_id->rep, addr);
assert (!totemip_zero_check(&memb_ring_id->rep));
}
static void corosync_ring_id_store (
const struct memb_ring_id *memb_ring_id,
const struct totem_ip_address *addr)
{
char filename[PATH_MAX];
int fd;
int res;
snprintf (filename, sizeof(filename), "%s/ringid_%s",
get_run_dir(), totemip_print (addr));
fd = open (filename, O_WRONLY, 0700);
if (fd == -1) {
fd = open (filename, O_CREAT|O_RDWR, 0700);
}
if (fd == -1) {
LOGSYS_PERROR(errno, LOGSYS_LEVEL_ERROR,
"Couldn't store new ring id %llx to stable storage",
memb_ring_id->seq);
corosync_exit_error (COROSYNC_DONE_STORE_RINGID);
}
log_printf (LOGSYS_LEVEL_DEBUG,
"Storing new sequence id for ring %llx", memb_ring_id->seq);
res = write (fd, &memb_ring_id->seq, sizeof(memb_ring_id->seq));
close (fd);
if (res != sizeof(memb_ring_id->seq)) {
LOGSYS_PERROR(errno, LOGSYS_LEVEL_ERROR,
"Couldn't store new ring id %llx to stable storage",
memb_ring_id->seq);
corosync_exit_error (COROSYNC_DONE_STORE_RINGID);
}
}
static qb_loop_timer_handle recheck_the_q_level_timer;
void corosync_recheck_the_q_level(void *data)
{
totempg_check_q_level(corosync_group_handle);
if (cs_ipcs_q_level_get() == TOTEM_Q_LEVEL_CRITICAL) {
qb_loop_timer_add(cs_poll_handle_get(), QB_LOOP_MED, 1*QB_TIME_NS_IN_MSEC,
NULL, corosync_recheck_the_q_level, &recheck_the_q_level_timer);
}
}
struct sending_allowed_private_data_struct {
int reserved_msgs;
};
int corosync_sending_allowed (
unsigned int service,
unsigned int id,
const void *msg,
void *sending_allowed_private_data)
{
struct sending_allowed_private_data_struct *pd =
(struct sending_allowed_private_data_struct *)sending_allowed_private_data;
struct iovec reserve_iovec;
struct qb_ipc_request_header *header = (struct qb_ipc_request_header *)msg;
int sending_allowed;
reserve_iovec.iov_base = (char *)header;
reserve_iovec.iov_len = header->size;
pd->reserved_msgs = totempg_groups_joined_reserve (
corosync_group_handle,
&reserve_iovec, 1);
if (pd->reserved_msgs == -1) {
return -EINVAL;
}
sending_allowed = QB_FALSE;
if (corosync_quorum_is_quorate() == 1 ||
corosync_service[service]->allow_inquorate == CS_LIB_ALLOW_INQUORATE) {
// we are quorate
// now check flow control
if (corosync_service[service]->lib_engine[id].flow_control == CS_LIB_FLOW_CONTROL_NOT_REQUIRED) {
sending_allowed = QB_TRUE;
} else if (pd->reserved_msgs && sync_in_process == 0) {
sending_allowed = QB_TRUE;
} else if (pd->reserved_msgs == 0) {
return -ENOBUFS;
} else /* (sync_in_process) */ {
return -EINPROGRESS;
}
} else {
return -EHOSTUNREACH;
}
return (sending_allowed);
}
void corosync_sending_allowed_release (void *sending_allowed_private_data)
{
struct sending_allowed_private_data_struct *pd =
(struct sending_allowed_private_data_struct *)sending_allowed_private_data;
if (pd->reserved_msgs == -1) {
return;
}
totempg_groups_joined_release (pd->reserved_msgs);
}
int message_source_is_local (const mar_message_source_t *source)
{
int ret = 0;
assert (source != NULL);
if (source->nodeid == totempg_my_nodeid_get ()) {
ret = 1;
}
return ret;
}
void message_source_set (
mar_message_source_t *source,
void *conn)
{
assert ((source != NULL) && (conn != NULL));
memset (source, 0, sizeof (mar_message_source_t));
source->nodeid = totempg_my_nodeid_get ();
source->conn = conn;
}
struct scheduler_pause_timeout_data {
struct totem_config *totem_config;
qb_loop_timer_handle handle;
unsigned long long tv_prev;
unsigned long long max_tv_diff;
};
static void timer_function_scheduler_timeout (void *data)
{
struct scheduler_pause_timeout_data *timeout_data = (struct scheduler_pause_timeout_data *)data;
unsigned long long tv_current;
unsigned long long tv_diff;
tv_current = qb_util_nano_current_get ();
if (timeout_data->tv_prev == 0) {
/*
* Initial call -> just pretent everything is ok
*/
timeout_data->tv_prev = tv_current;
timeout_data->max_tv_diff = 0;
}
tv_diff = tv_current - timeout_data->tv_prev;
timeout_data->tv_prev = tv_current;
if (tv_diff > timeout_data->max_tv_diff) {
log_printf (LOGSYS_LEVEL_WARNING, "Corosync main process was not scheduled for %0.4f ms "
"(threshold is %0.4f ms). Consider token timeout increase.",
(float)tv_diff / QB_TIME_NS_IN_MSEC, (float)timeout_data->max_tv_diff / QB_TIME_NS_IN_MSEC);
}
/*
* Set next threshold, because token_timeout can change
*/
timeout_data->max_tv_diff = timeout_data->totem_config->token_timeout * QB_TIME_NS_IN_MSEC * 0.8;
qb_loop_timer_add (corosync_poll_handle,
QB_LOOP_MED,
timeout_data->totem_config->token_timeout * QB_TIME_NS_IN_MSEC / 3,
timeout_data,
timer_function_scheduler_timeout,
&timeout_data->handle);
}
static void corosync_setscheduler (void)
{
#if defined(HAVE_PTHREAD_SETSCHEDPARAM) && defined(HAVE_SCHED_GET_PRIORITY_MAX) && defined(HAVE_SCHED_SETSCHEDULER)
int res;
sched_priority = sched_get_priority_max (SCHED_RR);
if (sched_priority != -1) {
global_sched_param.sched_priority = sched_priority;
res = sched_setscheduler (0, SCHED_RR, &global_sched_param);
if (res == -1) {
LOGSYS_PERROR(errno, LOGSYS_LEVEL_WARNING,
"Could not set SCHED_RR at priority %d",
global_sched_param.sched_priority);
global_sched_param.sched_priority = 0;
#ifdef HAVE_QB_LOG_THREAD_PRIORITY_SET
qb_log_thread_priority_set (SCHED_OTHER, 0);
#endif
} else {
/*
* Turn on SCHED_RR in logsys system
*/
#ifdef HAVE_QB_LOG_THREAD_PRIORITY_SET
res = qb_log_thread_priority_set (SCHED_RR, sched_priority);
#else
res = -1;
#endif
if (res == -1) {
log_printf (LOGSYS_LEVEL_ERROR,
"Could not set logsys thread priority."
" Can't continue because of priority inversions.");
corosync_exit_error (COROSYNC_DONE_LOGSETUP);
}
}
} else {
LOGSYS_PERROR (errno, LOGSYS_LEVEL_WARNING,
"Could not get maximum scheduler priority");
sched_priority = 0;
}
#else
log_printf(LOGSYS_LEVEL_WARNING,
"The Platform is missing process priority setting features. Leaving at default.");
#endif
}
static void
_logsys_log_printf(int level, int subsys,
const char *function_name,
const char *file_name,
int file_line,
const char *format,
...) __attribute__((format(printf, 6, 7)));
static void
_logsys_log_printf(int level, int subsys,
const char *function_name,
const char *file_name,
int file_line,
const char *format, ...)
{
va_list ap;
va_start(ap, format);
qb_log_from_external_source_va(function_name, file_name,
format, level, file_line,
subsys, ap);
va_end(ap);
}
static void fplay_key_change_notify_fn (
int32_t event,
const char *key_name,
struct icmap_notify_value new_val,
struct icmap_notify_value old_val,
void *user_data)
{
if (strcmp(key_name, "runtime.blackbox.dump_flight_data") == 0) {
fprintf(stderr,"Writetofile\n");
corosync_blackbox_write_to_file ();
}
if (strcmp(key_name, "runtime.blackbox.dump_state") == 0) {
fprintf(stderr,"statefump\n");
corosync_state_dump ();
}
}
static void corosync_fplay_control_init (void)
{
icmap_track_t track = NULL;
icmap_set_string("runtime.blackbox.dump_flight_data", "no");
icmap_set_string("runtime.blackbox.dump_state", "no");
icmap_track_add("runtime.blackbox.dump_flight_data",
ICMAP_TRACK_ADD | ICMAP_TRACK_DELETE | ICMAP_TRACK_MODIFY,
fplay_key_change_notify_fn,
NULL, &track);
icmap_track_add("runtime.blackbox.dump_state",
ICMAP_TRACK_ADD | ICMAP_TRACK_DELETE | ICMAP_TRACK_MODIFY,
fplay_key_change_notify_fn,
NULL, &track);
}
/*
* Set RO flag for keys, which ether doesn't make sense to change by user (statistic)
* or which when changed are not reflected by runtime (totem.crypto_cipher, ...).
*
* Also some RO keys cannot be determined in this stage, so they are set later in
* other functions (like nodelist.local_node_pos, ...)
*/
static void set_icmap_ro_keys_flag (void)
{
/*
* Set RO flag for all keys of internal configuration and runtime statistics
*/
icmap_set_ro_access("internal_configuration.", CS_TRUE, CS_TRUE);
icmap_set_ro_access("runtime.connections.", CS_TRUE, CS_TRUE);
icmap_set_ro_access("runtime.totem.", CS_TRUE, CS_TRUE);
icmap_set_ro_access("runtime.services.", CS_TRUE, CS_TRUE);
icmap_set_ro_access("runtime.config.", CS_TRUE, CS_TRUE);
/*
* Set RO flag for constrete keys of configuration which can't be changed
* during runtime
*/
icmap_set_ro_access("totem.crypto_cipher", CS_FALSE, CS_TRUE);
icmap_set_ro_access("totem.crypto_hash", CS_FALSE, CS_TRUE);
icmap_set_ro_access("totem.secauth", CS_FALSE, CS_TRUE);
icmap_set_ro_access("totem.ip_version", CS_FALSE, CS_TRUE);
icmap_set_ro_access("totem.rrp_mode", CS_FALSE, CS_TRUE);
icmap_set_ro_access("totem.netmtu", CS_FALSE, CS_TRUE);
icmap_set_ro_access("qb.ipc_type", CS_FALSE, CS_TRUE);
icmap_set_ro_access("config.reload_in_progress", CS_FALSE, CS_TRUE);
icmap_set_ro_access("config.totemconfig_reload_in_progress", CS_FALSE, CS_TRUE);
}
static void main_service_ready (void)
{
int res;
/*
* This must occur after totempg is initialized because "this_ip" must be set
*/
res = corosync_service_defaults_link_and_init (api);
if (res == -1) {
log_printf (LOGSYS_LEVEL_ERROR, "Could not initialize default services");
corosync_exit_error (COROSYNC_DONE_INIT_SERVICES);
}
cs_ipcs_init();
corosync_totem_stats_init ();
corosync_fplay_control_init ();
- corosync_totem_dynamic_init ();
sync_init (
corosync_sync_callbacks_retrieve,
corosync_sync_completed);
}
static enum e_corosync_done corosync_flock (const char *lockfile, pid_t pid)
{
struct flock lock;
enum e_corosync_done err;
char pid_s[17];
int fd_flag;
int lf;
err = COROSYNC_DONE_EXIT;
lf = open (lockfile, O_WRONLY | O_CREAT, 0640);
if (lf == -1) {
log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't create lock file.");
return (COROSYNC_DONE_AQUIRE_LOCK);
}
retry_fcntl:
lock.l_type = F_WRLCK;
lock.l_start = 0;
lock.l_whence = SEEK_SET;
lock.l_len = 0;
if (fcntl (lf, F_SETLK, &lock) == -1) {
switch (errno) {
case EINTR:
goto retry_fcntl;
break;
case EAGAIN:
case EACCES:
log_printf (LOGSYS_LEVEL_ERROR, "Another Corosync instance is already running.");
err = COROSYNC_DONE_ALREADY_RUNNING;
goto error_close;
break;
default:
log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't aquire lock. Error was %s",
strerror(errno));
err = COROSYNC_DONE_AQUIRE_LOCK;
goto error_close;
break;
}
}
if (ftruncate (lf, 0) == -1) {
log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't truncate lock file. Error was %s",
strerror (errno));
err = COROSYNC_DONE_AQUIRE_LOCK;
goto error_close_unlink;
}
memset (pid_s, 0, sizeof (pid_s));
snprintf (pid_s, sizeof (pid_s) - 1, "%u\n", pid);
retry_write:
if (write (lf, pid_s, strlen (pid_s)) != strlen (pid_s)) {
if (errno == EINTR) {
goto retry_write;
} else {
log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't write pid to lock file. "
"Error was %s", strerror (errno));
err = COROSYNC_DONE_AQUIRE_LOCK;
goto error_close_unlink;
}
}
if ((fd_flag = fcntl (lf, F_GETFD, 0)) == -1) {
log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't get close-on-exec flag from lock file. "
"Error was %s", strerror (errno));
err = COROSYNC_DONE_AQUIRE_LOCK;
goto error_close_unlink;
}
fd_flag |= FD_CLOEXEC;
if (fcntl (lf, F_SETFD, fd_flag) == -1) {
log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't set close-on-exec flag to lock file. "
"Error was %s", strerror (errno));
err = COROSYNC_DONE_AQUIRE_LOCK;
goto error_close_unlink;
}
return (err);
error_close_unlink:
unlink (lockfile);
error_close:
close (lf);
return (err);
}
int main (int argc, char **argv, char **envp)
{
const char *error_string;
struct totem_config totem_config;
int res, ch;
int background, setprio, testonly;
struct stat stat_out;
enum e_corosync_done flock_err;
uint64_t totem_config_warnings;
struct scheduler_pause_timeout_data scheduler_pause_timeout_data;
/* default configuration
*/
background = 1;
setprio = 1;
testonly = 0;
while ((ch = getopt (argc, argv, "fprtv")) != EOF) {
switch (ch) {
case 'f':
background = 0;
break;
case 'p':
setprio = 0;
break;
case 'r':
setprio = 1;
break;
case 't':
testonly = 1;
break;
case 'v':
printf ("Corosync Cluster Engine, version '%s'\n", VERSION);
printf ("Copyright (c) 2006-2009 Red Hat, Inc.\n");
logsys_system_fini();
return EXIT_SUCCESS;
break;
default:
fprintf(stderr, \
"usage:\n"\
" -f : Start application in foreground.\n"\
" -p : Do not set process priority.\n"\
" -t : Test configuration and exit.\n"\
" -r : Set round robin realtime scheduling (default).\n"\
" -v : Display version and SVN revision of Corosync and exit.\n");
logsys_system_fini();
return EXIT_FAILURE;
}
}
/*
* Set round robin realtime scheduling with priority 99
* Lock all memory to avoid page faults which may interrupt
* application healthchecking
*/
if (setprio) {
corosync_setscheduler ();
}
corosync_mlockall ();
/*
* Other signals are registered later via qb_loop_signal_add
*/
(void)signal (SIGSEGV, sigsegv_handler);
(void)signal (SIGABRT, sigabrt_handler);
#if MSG_NOSIGNAL != 0
(void)signal (SIGPIPE, SIG_IGN);
#endif
if (icmap_init() != CS_OK) {
log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't initialize configuration component.");
corosync_exit_error (COROSYNC_DONE_ICMAP);
}
set_icmap_ro_keys_flag();
/*
* Initialize the corosync_api_v1 definition
*/
api = apidef_get ();
res = coroparse_configparse(icmap_get_global_map(), &error_string);
if (res == -1) {
log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string);
corosync_exit_error (COROSYNC_DONE_MAINCONFIGREAD);
}
res = corosync_log_config_read (&error_string);
if (res == -1) {
/*
* if we are here, we _must_ flush the logsys queue
* and try to inform that we couldn't read the config.
* this is a desperate attempt before certain death
* and there is no guarantee that we can print to stderr
* nor that logsys is sending the messages where we expect.
*/
log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string);
fprintf(stderr, "%s", error_string);
syslog (LOGSYS_LEVEL_ERROR, "%s", error_string);
corosync_exit_error (COROSYNC_DONE_LOGCONFIGREAD);
}
if (!testonly) {
log_printf (LOGSYS_LEVEL_NOTICE, "Corosync Cluster Engine ('%s'): started and ready to provide service.", VERSION);
log_printf (LOGSYS_LEVEL_INFO, "Corosync built-in features:" PACKAGE_FEATURES "");
}
/*
* Make sure required directory is present
*/
res = stat (get_run_dir(), &stat_out);
if ((res == -1) || (res == 0 && !S_ISDIR(stat_out.st_mode))) {
log_printf (LOGSYS_LEVEL_ERROR, "Required directory not present %s. Please create it.", get_run_dir());
corosync_exit_error (COROSYNC_DONE_DIR_NOT_PRESENT);
}
res = chdir(get_run_dir());
if (res == -1) {
log_printf (LOGSYS_LEVEL_ERROR, "Cannot chdir to run directory %s. "
"Please make sure it has correct context and rights.", get_run_dir());
corosync_exit_error (COROSYNC_DONE_DIR_NOT_PRESENT);
}
res = totem_config_read (&totem_config, &error_string, &totem_config_warnings);
if (res == -1) {
log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string);
corosync_exit_error (COROSYNC_DONE_MAINCONFIGREAD);
}
if (totem_config_warnings & TOTEM_CONFIG_WARNING_MEMBERS_IGNORED) {
log_printf (LOGSYS_LEVEL_WARNING, "member section is used together with nodelist. Members ignored.");
}
if (totem_config_warnings & TOTEM_CONFIG_WARNING_MEMBERS_DEPRECATED) {
log_printf (LOGSYS_LEVEL_WARNING, "member section is deprecated.");
}
if (totem_config_warnings & TOTEM_CONFIG_WARNING_TOTEM_NODEID_IGNORED) {
log_printf (LOGSYS_LEVEL_WARNING, "nodeid appears both in totem section and nodelist. Nodelist one is used.");
}
if (totem_config_warnings != 0) {
log_printf (LOGSYS_LEVEL_WARNING, "Please migrate config file to nodelist.");
}
res = totem_config_keyread (&totem_config, &error_string);
if (res == -1) {
log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string);
corosync_exit_error (COROSYNC_DONE_MAINCONFIGREAD);
}
res = totem_config_validate (&totem_config, &error_string);
if (res == -1) {
log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string);
corosync_exit_error (COROSYNC_DONE_MAINCONFIGREAD);
}
if (testonly) {
corosync_exit_error (COROSYNC_DONE_EXIT);
}
ip_version = totem_config.ip_version;
totem_config.totem_memb_ring_id_create_or_load = corosync_ring_id_create_or_load;
totem_config.totem_memb_ring_id_store = corosync_ring_id_store;
totem_config.totem_logging_configuration = totem_logging_configuration;
totem_config.totem_logging_configuration.log_subsys_id = _logsys_subsys_create("TOTEM", "totem,"
"totemmrp.c,totemrrp.c,totemip.c,totemconfig.c,totemcrypto.c,totemsrp.c,"
"totempg.c,totemiba.c,totemudp.c,totemudpu.c,totemnet.c");
totem_config.totem_logging_configuration.log_level_security = LOGSYS_LEVEL_WARNING;
totem_config.totem_logging_configuration.log_level_error = LOGSYS_LEVEL_ERROR;
totem_config.totem_logging_configuration.log_level_warning = LOGSYS_LEVEL_WARNING;
totem_config.totem_logging_configuration.log_level_notice = LOGSYS_LEVEL_NOTICE;
totem_config.totem_logging_configuration.log_level_debug = LOGSYS_LEVEL_DEBUG;
totem_config.totem_logging_configuration.log_level_trace = LOGSYS_LEVEL_TRACE;
totem_config.totem_logging_configuration.log_printf = _logsys_log_printf;
logsys_config_apply();
/*
* Now we are fully initialized.
*/
if (background) {
corosync_tty_detach ();
}
corosync_poll_handle = qb_loop_create ();
memset(&scheduler_pause_timeout_data, 0, sizeof(scheduler_pause_timeout_data));
scheduler_pause_timeout_data.totem_config = &totem_config;
timer_function_scheduler_timeout (&scheduler_pause_timeout_data);
qb_loop_signal_add(corosync_poll_handle, QB_LOOP_LOW,
SIGUSR2, NULL, sig_diag_handler, NULL);
qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH,
SIGINT, NULL, sig_exit_handler, NULL);
qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH,
SIGSEGV, NULL, sig_segv_handler, NULL);
qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH,
SIGABRT, NULL, sig_abrt_handler, NULL);
qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH,
SIGQUIT, NULL, sig_exit_handler, NULL);
qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH,
SIGTERM, NULL, sig_exit_handler, NULL);
if (logsys_thread_start() != 0) {
log_printf (LOGSYS_LEVEL_ERROR, "Can't initialize log thread");
corosync_exit_error (COROSYNC_DONE_LOGCONFIGREAD);
}
if ((flock_err = corosync_flock (corosync_lock_file, getpid ())) != COROSYNC_DONE_EXIT) {
corosync_exit_error (flock_err);
}
/*
* if totempg_initialize doesn't have root priveleges, it cannot
* bind to a specific interface. This only matters if
* there is more then one interface in a system, so
* in this case, only a warning is printed
*/
/*
* Join multicast group and setup delivery
* and configuration change functions
*/
totempg_initialize (
corosync_poll_handle,
&totem_config);
totempg_service_ready_register (
main_service_ready);
totempg_groups_initialize (
&corosync_group_handle,
deliver_fn,
confchg_fn);
totempg_groups_join (
corosync_group_handle,
&corosync_group,
1);
/*
* Drop root privleges to user 'corosync'
* TODO: Don't really need full root capabilities;
* needed capabilities are:
* CAP_NET_RAW (bindtodevice)
* CAP_SYS_NICE (setscheduler)
* CAP_IPC_LOCK (mlockall)
*/
priv_drop ();
schedwrk_init (
serialize_lock,
serialize_unlock);
/*
* Start main processing loop
*/
qb_loop_run (corosync_poll_handle);
/*
* Exit was requested
*/
totempg_finalize ();
/*
* free the loop resources
*/
qb_loop_destroy (corosync_poll_handle);
/*
* free up the icmap
*/
/*
* Remove pid lock file
*/
unlink (corosync_lock_file);
corosync_exit_error (COROSYNC_DONE_EXIT);
return EXIT_SUCCESS;
}
diff --git a/exec/totemconfig.c b/exec/totemconfig.c
index 2acee2a9..b678752d 100644
--- a/exec/totemconfig.c
+++ b/exec/totemconfig.c
@@ -1,1404 +1,1535 @@
/*
* Copyright (c) 2002-2005 MontaVista Software, Inc.
* Copyright (c) 2006-2013 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Steven Dake (sdake@redhat.com)
* Jan Friesse (jfriesse@redhat.com)
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include "util.h"
#include "totemconfig.h"
#define TOKEN_RETRANSMITS_BEFORE_LOSS_CONST 4
#define TOKEN_TIMEOUT 1000
#define TOKEN_COEFFICIENT 650
#define JOIN_TIMEOUT 50
#define MERGE_TIMEOUT 200
#define DOWNCHECK_TIMEOUT 1000
#define FAIL_TO_RECV_CONST 2500
#define SEQNO_UNCHANGED_CONST 30
#define MINIMUM_TIMEOUT (int)(1000/HZ)*3
#define MAX_NETWORK_DELAY 50
#define WINDOW_SIZE 50
#define MAX_MESSAGES 17
#define MISS_COUNT_CONST 5
#define RRP_PROBLEM_COUNT_TIMEOUT 2000
#define RRP_PROBLEM_COUNT_THRESHOLD_DEFAULT 10
#define RRP_PROBLEM_COUNT_THRESHOLD_MIN 2
#define RRP_AUTORECOVERY_CHECK_TIMEOUT 1000
#define DEFAULT_PORT 5405
static char error_string_response[512];
static void add_totem_config_notification(struct totem_config *totem_config);
/* All the volatile parameters are uint32s, luckily */
static uint32_t *totem_get_param_by_name(struct totem_config *totem_config, const char *param_name)
{
if (strcmp(param_name, "totem.token") == 0)
return &totem_config->token_timeout;
if (strcmp(param_name, "totem.token_retransmit") == 0)
return &totem_config->token_retransmit_timeout;
if (strcmp(param_name, "totem.hold") == 0)
return &totem_config->token_hold_timeout;
if (strcmp(param_name, "totem.token_retransmits_before_loss_const") == 0)
return &totem_config->token_retransmits_before_loss_const;
if (strcmp(param_name, "totem.join") == 0)
return &totem_config->join_timeout;
if (strcmp(param_name, "totem.send_join") == 0)
return &totem_config->send_join_timeout;
if (strcmp(param_name, "totem.consensus") == 0)
return &totem_config->consensus_timeout;
if (strcmp(param_name, "totem.merge") == 0)
return &totem_config->merge_timeout;
if (strcmp(param_name, "totem.downcheck") == 0)
return &totem_config->downcheck_timeout;
if (strcmp(param_name, "totem.fail_recv_const") == 0)
return &totem_config->fail_to_recv_const;
if (strcmp(param_name, "totem.seqno_unchanged_const") == 0)
return &totem_config->seqno_unchanged_const;
if (strcmp(param_name, "totem.rrp_token_expired_timeout") == 0)
return &totem_config->rrp_token_expired_timeout;
if (strcmp(param_name, "totem.rrp_problem_count_timeout") == 0)
return &totem_config->rrp_problem_count_timeout;
if (strcmp(param_name, "totem.rrp_problem_count_threshold") == 0)
return &totem_config->rrp_problem_count_threshold;
if (strcmp(param_name, "totem.rrp_problem_count_mcast_threshold") == 0)
return &totem_config->rrp_problem_count_mcast_threshold;
if (strcmp(param_name, "totem.rrp_autorecovery_check_timeout") == 0)
return &totem_config->rrp_autorecovery_check_timeout;
if (strcmp(param_name, "totem.heartbeat_failures_allowed") == 0)
return &totem_config->heartbeat_failures_allowed;
if (strcmp(param_name, "totem.max_network_delay") == 0)
return &totem_config->max_network_delay;
if (strcmp(param_name, "totem.window_size") == 0)
return &totem_config->window_size;
if (strcmp(param_name, "totem.max_messages") == 0)
return &totem_config->max_messages;
if (strcmp(param_name, "totem.miss_count_const") == 0)
return &totem_config->miss_count_const;
return NULL;
}
/*
* Read key_name from icmap. If key is not found or key_name == delete_key or if allow_zero is false
* and readed value is zero, default value is used and stored into totem_config.
*/
static void totem_volatile_config_set_value (struct totem_config *totem_config,
const char *key_name, const char *deleted_key, unsigned int default_value,
int allow_zero_value)
{
char runtime_key_name[ICMAP_KEYNAME_MAXLEN];
if (icmap_get_uint32(key_name, totem_get_param_by_name(totem_config, key_name)) != CS_OK ||
(deleted_key != NULL && strcmp(deleted_key, key_name) == 0) ||
(!allow_zero_value && *totem_get_param_by_name(totem_config, key_name) == 0)) {
*totem_get_param_by_name(totem_config, key_name) = default_value;
}
/*
* Store totem_config value to cmap runtime section
*/
strcpy(runtime_key_name, "runtime.config.");
strcat(runtime_key_name, key_name);
icmap_set_uint32(runtime_key_name, *totem_get_param_by_name(totem_config, key_name));
}
/*
* Read and validate config values from cmap and store them into totem_config. If key doesn't exists,
* default value is stored. deleted_key is name of key beeing processed by delete operation
* from cmap. It is considered as non existing even if it can be read. Can be NULL.
*/
static void totem_volatile_config_read (struct totem_config *totem_config, const char *deleted_key)
{
uint32_t u32;
totem_volatile_config_set_value(totem_config, "totem.token_retransmits_before_loss_const", deleted_key,
TOKEN_RETRANSMITS_BEFORE_LOSS_CONST, 0);
totem_volatile_config_set_value(totem_config, "totem.token", deleted_key, TOKEN_TIMEOUT, 0);
if (totem_config->interface_count > 0 && totem_config->interfaces[0].member_count > 2) {
u32 = TOKEN_COEFFICIENT;
icmap_get_uint32("totem.token_coefficient", &u32);
totem_config->token_timeout += (totem_config->interfaces[0].member_count - 2) * u32;
/*
* Store totem_config value to cmap runtime section
*/
icmap_set_uint32("runtime.config.totem.token", totem_config->token_timeout);
}
totem_volatile_config_set_value(totem_config, "totem.max_network_delay", deleted_key, MAX_NETWORK_DELAY, 0);
totem_volatile_config_set_value(totem_config, "totem.window_size", deleted_key, WINDOW_SIZE, 0);
totem_volatile_config_set_value(totem_config, "totem.max_messages", deleted_key, MAX_MESSAGES, 0);
totem_volatile_config_set_value(totem_config, "totem.miss_count_const", deleted_key, MISS_COUNT_CONST, 0);
totem_volatile_config_set_value(totem_config, "totem.token_retransmit", deleted_key,
(int)(totem_config->token_timeout / (totem_config->token_retransmits_before_loss_const + 0.2)), 0);
totem_volatile_config_set_value(totem_config, "totem.hold", deleted_key,
(int)(totem_config->token_retransmit_timeout * 0.8 - (1000/HZ)), 0);
totem_volatile_config_set_value(totem_config, "totem.join", deleted_key, JOIN_TIMEOUT, 0);
totem_volatile_config_set_value(totem_config, "totem.consensus", deleted_key,
(int)(float)(1.2 * totem_config->token_timeout), 0);
totem_volatile_config_set_value(totem_config, "totem.merge", deleted_key, MERGE_TIMEOUT, 0);
totem_volatile_config_set_value(totem_config, "totem.downcheck", deleted_key, DOWNCHECK_TIMEOUT, 0);
totem_volatile_config_set_value(totem_config, "totem.fail_recv_const", deleted_key, FAIL_TO_RECV_CONST, 0);
totem_volatile_config_set_value(totem_config, "totem.seqno_unchanged_const", deleted_key,
SEQNO_UNCHANGED_CONST, 0);
totem_volatile_config_set_value(totem_config, "totem.send_join", deleted_key, 0, 1);
totem_volatile_config_set_value(totem_config, "totem.rrp_problem_count_timeout", deleted_key,
RRP_PROBLEM_COUNT_TIMEOUT, 0);
totem_volatile_config_set_value(totem_config, "totem.rrp_problem_count_threshold", deleted_key,
RRP_PROBLEM_COUNT_THRESHOLD_DEFAULT, 0);
totem_volatile_config_set_value(totem_config, "totem.rrp_problem_count_mcast_threshold", deleted_key,
totem_config->rrp_problem_count_threshold * 10, 0);
totem_volatile_config_set_value(totem_config, "totem.rrp_token_expired_timeout", deleted_key,
totem_config->token_retransmit_timeout, 0);
totem_volatile_config_set_value(totem_config, "totem.rrp_autorecovery_check_timeout", deleted_key,
RRP_AUTORECOVERY_CHECK_TIMEOUT, 0);
totem_volatile_config_set_value(totem_config, "totem.heartbeat_failures_allowed", deleted_key, 0, 1);
}
static int totem_volatile_config_validate (
struct totem_config *totem_config,
const char **error_string)
{
static char local_error_reason[512];
const char *error_reason = local_error_reason;
if (totem_config->max_network_delay < MINIMUM_TIMEOUT) {
snprintf (local_error_reason, sizeof(local_error_reason),
"The max_network_delay parameter (%d ms) may not be less than (%d ms).",
totem_config->max_network_delay, MINIMUM_TIMEOUT);
goto parse_error;
}
if (totem_config->token_timeout < MINIMUM_TIMEOUT) {
snprintf (local_error_reason, sizeof(local_error_reason),
"The token timeout parameter (%d ms) may not be less than (%d ms).",
totem_config->token_timeout, MINIMUM_TIMEOUT);
goto parse_error;
}
if (totem_config->token_retransmit_timeout < MINIMUM_TIMEOUT) {
snprintf (local_error_reason, sizeof(local_error_reason),
"The token retransmit timeout parameter (%d ms) may not be less than (%d ms).",
totem_config->token_retransmit_timeout, MINIMUM_TIMEOUT);
goto parse_error;
}
if (totem_config->token_hold_timeout < MINIMUM_TIMEOUT) {
snprintf (local_error_reason, sizeof(local_error_reason),
"The token hold timeout parameter (%d ms) may not be less than (%d ms).",
totem_config->token_hold_timeout, MINIMUM_TIMEOUT);
goto parse_error;
}
if (totem_config->join_timeout < MINIMUM_TIMEOUT) {
snprintf (local_error_reason, sizeof(local_error_reason),
"The join timeout parameter (%d ms) may not be less than (%d ms).",
totem_config->join_timeout, MINIMUM_TIMEOUT);
goto parse_error;
}
if (totem_config->consensus_timeout < MINIMUM_TIMEOUT) {
snprintf (local_error_reason, sizeof(local_error_reason),
"The consensus timeout parameter (%d ms) may not be less than (%d ms).",
totem_config->consensus_timeout, MINIMUM_TIMEOUT);
goto parse_error;
}
if (totem_config->consensus_timeout < totem_config->join_timeout) {
snprintf (local_error_reason, sizeof(local_error_reason),
"The consensus timeout parameter (%d ms) may not be less than join timeout (%d ms).",
totem_config->consensus_timeout, totem_config->join_timeout);
goto parse_error;
}
if (totem_config->merge_timeout < MINIMUM_TIMEOUT) {
snprintf (local_error_reason, sizeof(local_error_reason),
"The merge timeout parameter (%d ms) may not be less than (%d ms).",
totem_config->merge_timeout, MINIMUM_TIMEOUT);
goto parse_error;
}
if (totem_config->downcheck_timeout < MINIMUM_TIMEOUT) {
snprintf (local_error_reason, sizeof(local_error_reason),
"The downcheck timeout parameter (%d ms) may not be less than (%d ms).",
totem_config->downcheck_timeout, MINIMUM_TIMEOUT);
goto parse_error;
}
if (totem_config->rrp_problem_count_timeout < MINIMUM_TIMEOUT) {
snprintf (local_error_reason, sizeof(local_error_reason),
"The RRP problem count timeout parameter (%d ms) may not be less than (%d ms).",
totem_config->rrp_problem_count_timeout, MINIMUM_TIMEOUT);
goto parse_error;
}
if (totem_config->rrp_problem_count_threshold < RRP_PROBLEM_COUNT_THRESHOLD_MIN) {
snprintf (local_error_reason, sizeof(local_error_reason),
"The RRP problem count threshold (%d problem count) may not be less than (%d problem count).",
totem_config->rrp_problem_count_threshold, RRP_PROBLEM_COUNT_THRESHOLD_MIN);
goto parse_error;
}
if (totem_config->rrp_problem_count_mcast_threshold < RRP_PROBLEM_COUNT_THRESHOLD_MIN) {
snprintf (local_error_reason, sizeof(local_error_reason),
"The RRP multicast problem count threshold (%d problem count) may not be less than (%d problem count).",
totem_config->rrp_problem_count_mcast_threshold, RRP_PROBLEM_COUNT_THRESHOLD_MIN);
goto parse_error;
}
if (totem_config->rrp_token_expired_timeout < MINIMUM_TIMEOUT) {
snprintf (local_error_reason, sizeof(local_error_reason),
"The RRP token expired timeout parameter (%d ms) may not be less than (%d ms).",
totem_config->rrp_token_expired_timeout, MINIMUM_TIMEOUT);
goto parse_error;
}
return 0;
parse_error:
snprintf (error_string_response, sizeof(error_string_response),
"parse error in config: %s\n", error_reason);
*error_string = error_string_response;
return (-1);
}
static int totem_get_crypto(struct totem_config *totem_config)
{
char *str;
const char *tmp_cipher;
const char *tmp_hash;
tmp_hash = "sha1";
tmp_cipher = "aes256";
if (icmap_get_string("totem.secauth", &str) == CS_OK) {
if (strcmp (str, "off") == 0) {
tmp_hash = "none";
tmp_cipher = "none";
}
free(str);
}
if (icmap_get_string("totem.crypto_cipher", &str) == CS_OK) {
if (strcmp(str, "none") == 0) {
tmp_cipher = "none";
}
if (strcmp(str, "aes256") == 0) {
tmp_cipher = "aes256";
}
if (strcmp(str, "aes192") == 0) {
tmp_cipher = "aes192";
}
if (strcmp(str, "aes128") == 0) {
tmp_cipher = "aes128";
}
if (strcmp(str, "3des") == 0) {
tmp_cipher = "3des";
}
free(str);
}
if (icmap_get_string("totem.crypto_hash", &str) == CS_OK) {
if (strcmp(str, "none") == 0) {
tmp_hash = "none";
}
if (strcmp(str, "md5") == 0) {
tmp_hash = "md5";
}
if (strcmp(str, "sha1") == 0) {
tmp_hash = "sha1";
}
if (strcmp(str, "sha256") == 0) {
tmp_hash = "sha256";
}
if (strcmp(str, "sha384") == 0) {
tmp_hash = "sha384";
}
if (strcmp(str, "sha512") == 0) {
tmp_hash = "sha512";
}
free(str);
}
if ((strcmp(tmp_cipher, "none") != 0) &&
(strcmp(tmp_hash, "none") == 0)) {
return -1;
}
free(totem_config->crypto_cipher_type);
free(totem_config->crypto_hash_type);
totem_config->crypto_cipher_type = strdup(tmp_cipher);
totem_config->crypto_hash_type = strdup(tmp_hash);
return 0;
}
static int totem_config_get_ip_version(void)
{
int res;
char *str;
res = AF_INET;
if (icmap_get_string("totem.ip_version", &str) == CS_OK) {
if (strcmp(str, "ipv4") == 0) {
res = AF_INET;
}
if (strcmp(str, "ipv6") == 0) {
res = AF_INET6;
}
free(str);
}
return (res);
}
static uint16_t generate_cluster_id (const char *cluster_name)
{
int i;
int value = 0;
for (i = 0; i < strlen(cluster_name); i++) {
value <<= 1;
value += cluster_name[i];
}
return (value & 0xFFFF);
}
static int get_cluster_mcast_addr (
const char *cluster_name,
const struct totem_ip_address *bindnet,
unsigned int ringnumber,
int ip_version,
struct totem_ip_address *res)
{
uint16_t clusterid;
char addr[INET6_ADDRSTRLEN + 1];
int err;
if (cluster_name == NULL) {
return (-1);
}
clusterid = generate_cluster_id(cluster_name) + ringnumber;
memset (res, 0, sizeof(*res));
switch (bindnet->family) {
case AF_INET:
snprintf(addr, sizeof(addr), "239.192.%d.%d", clusterid >> 8, clusterid % 0xFF);
break;
case AF_INET6:
snprintf(addr, sizeof(addr), "ff15::%x", clusterid);
break;
default:
/*
* Unknown family
*/
return (-1);
}
err = totemip_parse (res, addr, ip_version);
return (err);
}
static int find_local_node_in_nodelist(struct totem_config *totem_config)
{
icmap_iter_t iter;
const char *iter_key;
int res = 0;
unsigned int node_pos;
int local_node_pos = -1;
struct totem_ip_address bind_addr;
int interface_up, interface_num;
char tmp_key[ICMAP_KEYNAME_MAXLEN];
char *node_addr_str;
struct totem_ip_address node_addr;
res = totemip_iface_check(&totem_config->interfaces[0].bindnet,
&bind_addr, &interface_up, &interface_num,
totem_config->clear_node_high_bit);
if (res == -1) {
return (-1);
}
iter = icmap_iter_init("nodelist.node.");
while ((iter_key = icmap_iter_next(iter, NULL, NULL)) != NULL) {
res = sscanf(iter_key, "nodelist.node.%u.%s", &node_pos, tmp_key);
if (res != 2) {
continue;
}
if (strcmp(tmp_key, "ring0_addr") != 0) {
continue;
}
snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.ring0_addr", node_pos);
if (icmap_get_string(tmp_key, &node_addr_str) != CS_OK) {
continue;
}
res = totemip_parse (&node_addr, node_addr_str, totem_config->ip_version);
free(node_addr_str);
if (res == -1) {
continue ;
}
if (totemip_equal(&bind_addr, &node_addr)) {
local_node_pos = node_pos;
}
}
icmap_iter_finalize(iter);
return (local_node_pos);
}
-static void put_nodelist_members_to_config(struct totem_config *totem_config)
+/*
+ * Compute difference between two set of totem interface arrays. set1 and set2
+ * are changed so for same ring, ip existing in both set1 and set2 are cleared
+ * (set to 0), and ips which are only in set1 or set2 remains untouched.
+ * totempg_node_add/remove is called.
+ */
+static void compute_interfaces_diff(int interface_count,
+ struct totem_interface *set1,
+ struct totem_interface *set2)
+{
+ int ring_no, set1_pos, set2_pos;
+ struct totem_ip_address empty_ip_address;
+
+ memset(&empty_ip_address, 0, sizeof(empty_ip_address));
+
+ for (ring_no = 0; ring_no < interface_count; ring_no++) {
+ for (set1_pos = 0; set1_pos < set1[ring_no].member_count; set1_pos++) {
+ for (set2_pos = 0; set2_pos < set2[ring_no].member_count; set2_pos++) {
+ /*
+ * For current ring_no remove all set1 items existing
+ * in set2
+ */
+ if (memcmp(&set1[ring_no].member_list[set1_pos],
+ &set2[ring_no].member_list[set2_pos],
+ sizeof(struct totem_ip_address)) == 0) {
+ memset(&set1[ring_no].member_list[set1_pos], 0,
+ sizeof(struct totem_ip_address));
+ memset(&set2[ring_no].member_list[set2_pos], 0,
+ sizeof(struct totem_ip_address));
+ }
+ }
+ }
+ }
+
+ for (ring_no = 0; ring_no < interface_count; ring_no++) {
+ for (set1_pos = 0; set1_pos < set1[ring_no].member_count; set1_pos++) {
+ /*
+ * All items which remained in set1 doesn't exists in set2 any longer so
+ * node has to be removed.
+ */
+ if (memcmp(&set1[ring_no].member_list[set1_pos], &empty_ip_address, sizeof(empty_ip_address)) != 0) {
+ log_printf(LOGSYS_LEVEL_DEBUG,
+ "removing dynamic member %s for ring %u",
+ totemip_print(&set1[ring_no].member_list[set1_pos]),
+ ring_no);
+
+ totempg_member_remove(&set1[ring_no].member_list[set1_pos], ring_no);
+ }
+ }
+ for (set2_pos = 0; set2_pos < set2[ring_no].member_count; set2_pos++) {
+ /*
+ * All items which remained in set2 doesn't existed in set1 so this is no node
+ * and has to be added.
+ */
+ if (memcmp(&set2[ring_no].member_list[set2_pos], &empty_ip_address, sizeof(empty_ip_address)) != 0) {
+ log_printf(LOGSYS_LEVEL_DEBUG,
+ "adding dynamic member %s for ring %u",
+ totemip_print(&set2[ring_no].member_list[set2_pos]),
+ ring_no);
+
+ totempg_member_add(&set2[ring_no].member_list[set2_pos], ring_no);
+ }
+ }
+ }
+}
+
+static void put_nodelist_members_to_config(struct totem_config *totem_config, int reload)
{
icmap_iter_t iter, iter2;
const char *iter_key, *iter_key2;
int res = 0;
unsigned int node_pos;
char tmp_key[ICMAP_KEYNAME_MAXLEN];
char tmp_key2[ICMAP_KEYNAME_MAXLEN];
char *node_addr_str;
int member_count;
unsigned int ringnumber = 0;
int i, j;
+ struct totem_interface *orig_interfaces = NULL;
+ struct totem_interface *new_interfaces = NULL;
+
+ if (reload) {
+ /*
+ * We need to compute diff only for reload. Also for initial configuration
+ * not all totem structures are initialized so corosync will crash during
+ * member_add/remove
+ */
+ orig_interfaces = malloc (sizeof (struct totem_interface) * INTERFACE_MAX);
+ assert(orig_interfaces != NULL);
+ new_interfaces = malloc (sizeof (struct totem_interface) * INTERFACE_MAX);
+ assert(new_interfaces != NULL);
+
+ memcpy(orig_interfaces, totem_config->interfaces, sizeof (struct totem_interface) * INTERFACE_MAX);
+ }
/* Clear out nodelist so we can put the new one in if needed */
for (i = 0; i < totem_config->interface_count; i++) {
for (j = 0; j < PROCESSOR_COUNT_MAX; j++) {
memset(&totem_config->interfaces[i].member_list[j], 0, sizeof(struct totem_ip_address));
}
totem_config->interfaces[i].member_count = 0;
}
iter = icmap_iter_init("nodelist.node.");
while ((iter_key = icmap_iter_next(iter, NULL, NULL)) != NULL) {
res = sscanf(iter_key, "nodelist.node.%u.%s", &node_pos, tmp_key);
if (res != 2) {
continue;
}
if (strcmp(tmp_key, "ring0_addr") != 0) {
continue;
}
snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.", node_pos);
iter2 = icmap_iter_init(tmp_key);
while ((iter_key2 = icmap_iter_next(iter2, NULL, NULL)) != NULL) {
res = sscanf(iter_key2, "nodelist.node.%u.ring%u%s", &node_pos, &ringnumber, tmp_key2);
if (res != 3 || strcmp(tmp_key2, "_addr") != 0) {
continue;
}
if (icmap_get_string(iter_key2, &node_addr_str) != CS_OK) {
continue;
}
member_count = totem_config->interfaces[ringnumber].member_count;
res = totemip_parse(&totem_config->interfaces[ringnumber].member_list[member_count],
node_addr_str, totem_config->ip_version);
if (res != -1) {
totem_config->interfaces[ringnumber].member_count++;
}
free(node_addr_str);
}
icmap_iter_finalize(iter2);
}
icmap_iter_finalize(iter);
+
+ if (reload) {
+ memcpy(new_interfaces, totem_config->interfaces, sizeof (struct totem_interface) * INTERFACE_MAX);
+
+ compute_interfaces_diff(totem_config->interface_count, orig_interfaces, new_interfaces);
+
+ free(new_interfaces);
+ free(orig_interfaces);
+ }
+}
+
+static void nodelist_dynamic_notify(
+ int32_t event,
+ const char *key_name,
+ struct icmap_notify_value new_val,
+ struct icmap_notify_value old_val,
+ void *user_data)
+{
+ int res;
+ unsigned int ring_no;
+ unsigned int member_no;
+ char tmp_str[ICMAP_KEYNAME_MAXLEN];
+ uint8_t reloading;
+ struct totem_config *totem_config = (struct totem_config *)user_data;
+
+ /*
+ * If a full reload is in progress then don't do anything until it's done and
+ * can reconfigure it all atomically
+ */
+ if (icmap_get_uint8("config.totemconfig_reload_in_progress", &reloading) == CS_OK && reloading) {
+ return ;
+ }
+
+ res = sscanf(key_name, "nodelist.node.%u.ring%u%s", &member_no, &ring_no, tmp_str);
+ if (res != 3)
+ return ;
+
+ if (strcmp(tmp_str, "_addr") != 0) {
+ return;
+ }
+
+ put_nodelist_members_to_config(totem_config, 1);
}
+
/*
* Tries to find node (node_pos) in config nodelist which address matches any
* local interface. Address can be stored in ring0_addr or if ipaddr_key_prefix is not NULL
* key with prefix ipaddr_key is used (there can be multiuple of them)
* This function differs * from find_local_node_in_nodelist because it doesn't need bindnetaddr,
* but doesn't work when bind addr is network address (so IP must be exact
* match).
*
* Returns 1 on success (address was found, node_pos is then correctly set) or 0 on failure.
*/
int totem_config_find_local_addr_in_nodelist(const char *ipaddr_key_prefix, unsigned int *node_pos)
{
struct list_head addrs;
struct totem_ip_if_address *if_addr;
icmap_iter_t iter, iter2;
const char *iter_key, *iter_key2;
struct list_head *list;
const char *ipaddr_key;
int ip_version;
struct totem_ip_address node_addr;
char *node_addr_str;
int node_found = 0;
int res = 0;
char tmp_key[ICMAP_KEYNAME_MAXLEN];
if (totemip_getifaddrs(&addrs) == -1) {
return 0;
}
ip_version = totem_config_get_ip_version();
iter = icmap_iter_init("nodelist.node.");
while ((iter_key = icmap_iter_next(iter, NULL, NULL)) != NULL) {
res = sscanf(iter_key, "nodelist.node.%u.%s", node_pos, tmp_key);
if (res != 2) {
continue;
}
if (strcmp(tmp_key, "ring0_addr") != 0) {
continue;
}
if (icmap_get_string(iter_key, &node_addr_str) != CS_OK) {
continue ;
}
free(node_addr_str);
/*
* ring0_addr found -> let's iterate thru ipaddr_key_prefix
*/
snprintf(tmp_key, sizeof(tmp_key), "nodelist.node.%u.%s", *node_pos,
(ipaddr_key_prefix != NULL ? ipaddr_key_prefix : "ring0_addr"));
iter2 = icmap_iter_init(tmp_key);
while ((iter_key2 = icmap_iter_next(iter2, NULL, NULL)) != NULL) {
/*
* ring0_addr must be exact match, not prefix
*/
ipaddr_key = (ipaddr_key_prefix != NULL ? iter_key2 : tmp_key);
if (icmap_get_string(ipaddr_key, &node_addr_str) != CS_OK) {
continue ;
}
if (totemip_parse(&node_addr, node_addr_str, ip_version) == -1) {
free(node_addr_str);
continue ;
}
free(node_addr_str);
/*
* Try to match ip with if_addrs
*/
node_found = 0;
for (list = addrs.next; list != &addrs; list = list->next) {
if_addr = list_entry(list, struct totem_ip_if_address, list);
if (totemip_equal(&node_addr, &if_addr->ip_addr)) {
node_found = 1;
break;
}
}
if (node_found) {
break ;
}
}
icmap_iter_finalize(iter2);
if (node_found) {
break ;
}
}
icmap_iter_finalize(iter);
totemip_freeifaddrs(&addrs);
return (node_found);
}
static void config_convert_nodelist_to_interface(struct totem_config *totem_config)
{
int res = 0;
unsigned int node_pos;
char tmp_key[ICMAP_KEYNAME_MAXLEN];
char tmp_key2[ICMAP_KEYNAME_MAXLEN];
char *node_addr_str;
unsigned int ringnumber = 0;
icmap_iter_t iter;
const char *iter_key;
if (totem_config_find_local_addr_in_nodelist(NULL, &node_pos)) {
/*
* We found node, so create interface section
*/
snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.", node_pos);
iter = icmap_iter_init(tmp_key);
while ((iter_key = icmap_iter_next(iter, NULL, NULL)) != NULL) {
res = sscanf(iter_key, "nodelist.node.%u.ring%u%s", &node_pos, &ringnumber, tmp_key2);
if (res != 3 || strcmp(tmp_key2, "_addr") != 0) {
continue ;
}
if (icmap_get_string(iter_key, &node_addr_str) != CS_OK) {
continue;
}
snprintf(tmp_key2, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.bindnetaddr", ringnumber);
icmap_set_string(tmp_key2, node_addr_str);
free(node_addr_str);
}
icmap_iter_finalize(iter);
}
}
extern int totem_config_read (
struct totem_config *totem_config,
const char **error_string,
uint64_t *warnings)
{
int res = 0;
char *str;
unsigned int ringnumber = 0;
int member_count = 0;
icmap_iter_t iter, member_iter;
const char *iter_key;
const char *member_iter_key;
char ringnumber_key[ICMAP_KEYNAME_MAXLEN];
char tmp_key[ICMAP_KEYNAME_MAXLEN];
uint8_t u8;
uint16_t u16;
char *cluster_name = NULL;
int i;
int local_node_pos;
int nodeid_set;
*warnings = 0;
memset (totem_config, 0, sizeof (struct totem_config));
totem_config->interfaces = malloc (sizeof (struct totem_interface) * INTERFACE_MAX);
if (totem_config->interfaces == 0) {
*error_string = "Out of memory trying to allocate ethernet interface storage area";
return -1;
}
memset (totem_config->interfaces, 0,
sizeof (struct totem_interface) * INTERFACE_MAX);
strcpy (totem_config->rrp_mode, "none");
icmap_get_uint32("totem.version", (uint32_t *)&totem_config->version);
if (totem_get_crypto(totem_config) != 0) {
*error_string = "crypto_cipher requires crypto_hash with value other than none";
return -1;
}
if (icmap_get_string("totem.rrp_mode", &str) == CS_OK) {
if (strlen(str) >= TOTEM_RRP_MODE_BYTES) {
*error_string = "totem.rrp_mode is too long";
free(str);
return -1;
}
strcpy (totem_config->rrp_mode, str);
free(str);
}
icmap_get_uint32("totem.nodeid", &totem_config->node_id);
totem_config->clear_node_high_bit = 0;
if (icmap_get_string("totem.clear_node_high_bit", &str) == CS_OK) {
if (strcmp (str, "yes") == 0) {
totem_config->clear_node_high_bit = 1;
}
free(str);
}
icmap_get_uint32("totem.threads", &totem_config->threads);
icmap_get_uint32("totem.netmtu", &totem_config->net_mtu);
if (icmap_get_string("totem.cluster_name", &cluster_name) != CS_OK) {
cluster_name = NULL;
}
totem_config->ip_version = totem_config_get_ip_version();
if (icmap_get_string("totem.interface.0.bindnetaddr", &str) != CS_OK) {
/*
* We were not able to find ring 0 bindnet addr. Try to use nodelist informations
*/
config_convert_nodelist_to_interface(totem_config);
} else {
free(str);
}
/*
* Broadcast option is global but set in interface section,
* so reset before processing interfaces.
*/
totem_config->broadcast_use = 0;
iter = icmap_iter_init("totem.interface.");
while ((iter_key = icmap_iter_next(iter, NULL, NULL)) != NULL) {
res = sscanf(iter_key, "totem.interface.%[^.].%s", ringnumber_key, tmp_key);
if (res != 2) {
continue;
}
if (strcmp(tmp_key, "bindnetaddr") != 0) {
continue;
}
member_count = 0;
ringnumber = atoi(ringnumber_key);
if (ringnumber >= INTERFACE_MAX) {
free(cluster_name);
snprintf (error_string_response, sizeof(error_string_response),
"parse error in config: interface ring number %u is bigger than allowed maximum %u\n",
ringnumber, INTERFACE_MAX - 1);
*error_string = error_string_response;
return -1;
}
/*
* Get the bind net address
*/
if (icmap_get_string(iter_key, &str) == CS_OK) {
res = totemip_parse (&totem_config->interfaces[ringnumber].bindnet, str,
totem_config->interfaces[ringnumber].mcast_addr.family);
free(str);
}
/*
* Get interface multicast address
*/
snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.mcastaddr", ringnumber);
if (icmap_get_string(tmp_key, &str) == CS_OK) {
res = totemip_parse (&totem_config->interfaces[ringnumber].mcast_addr, str, totem_config->ip_version);
free(str);
} else {
/*
* User not specified address -> autogenerate one from cluster_name key
* (if available)
*/
res = get_cluster_mcast_addr (cluster_name,
&totem_config->interfaces[ringnumber].bindnet,
ringnumber,
totem_config->ip_version,
&totem_config->interfaces[ringnumber].mcast_addr);
}
snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.broadcast", ringnumber);
if (icmap_get_string(tmp_key, &str) == CS_OK) {
if (strcmp (str, "yes") == 0) {
totem_config->broadcast_use = 1;
}
free(str);
}
/*
* Get mcast port
*/
snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.mcastport", ringnumber);
if (icmap_get_uint16(tmp_key, &totem_config->interfaces[ringnumber].ip_port) != CS_OK) {
if (totem_config->broadcast_use) {
totem_config->interfaces[ringnumber].ip_port = DEFAULT_PORT + (2 * ringnumber);
} else {
totem_config->interfaces[ringnumber].ip_port = DEFAULT_PORT;
}
}
/*
* Get the TTL
*/
totem_config->interfaces[ringnumber].ttl = 1;
snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.ttl", ringnumber);
if (icmap_get_uint8(tmp_key, &u8) == CS_OK) {
totem_config->interfaces[ringnumber].ttl = u8;
}
snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.member.", ringnumber);
member_iter = icmap_iter_init(tmp_key);
while ((member_iter_key = icmap_iter_next(member_iter, NULL, NULL)) != NULL) {
if (member_count == 0) {
if (icmap_get_string("nodelist.node.0.ring0_addr", &str) == CS_OK) {
free(str);
*warnings |= TOTEM_CONFIG_WARNING_MEMBERS_IGNORED;
break;
} else {
*warnings |= TOTEM_CONFIG_WARNING_MEMBERS_DEPRECATED;
}
}
if (icmap_get_string(member_iter_key, &str) == CS_OK) {
res = totemip_parse (&totem_config->interfaces[ringnumber].member_list[member_count++],
str, totem_config->ip_version);
}
}
icmap_iter_finalize(member_iter);
totem_config->interfaces[ringnumber].member_count = member_count;
totem_config->interface_count++;
}
icmap_iter_finalize(iter);
/*
* Use broadcast is global, so if set, make sure to fill mcast addr correctly
*/
if (totem_config->broadcast_use) {
for (ringnumber = 0; ringnumber < totem_config->interface_count; ringnumber++) {
totemip_parse (&totem_config->interfaces[ringnumber].mcast_addr,
"255.255.255.255", 0);
}
}
/*
* Store automatically generated items back to icmap
*/
for (i = 0; i < totem_config->interface_count; i++) {
snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.mcastaddr", i);
if (icmap_get_string(tmp_key, &str) == CS_OK) {
free(str);
} else {
str = (char *)totemip_print(&totem_config->interfaces[i].mcast_addr);
icmap_set_string(tmp_key, str);
}
snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.mcastport", i);
if (icmap_get_uint16(tmp_key, &u16) != CS_OK) {
icmap_set_uint16(tmp_key, totem_config->interfaces[i].ip_port);
}
}
totem_config->transport_number = TOTEM_TRANSPORT_UDP;
if (icmap_get_string("totem.transport", &str) == CS_OK) {
if (strcmp (str, "udpu") == 0) {
totem_config->transport_number = TOTEM_TRANSPORT_UDPU;
}
if (strcmp (str, "iba") == 0) {
totem_config->transport_number = TOTEM_TRANSPORT_RDMA;
}
free(str);
}
free(cluster_name);
/*
* Check existence of nodelist
*/
if (icmap_get_string("nodelist.node.0.ring0_addr", &str) == CS_OK) {
free(str);
/*
* find local node
*/
local_node_pos = find_local_node_in_nodelist(totem_config);
if (local_node_pos != -1) {
icmap_set_uint32("nodelist.local_node_pos", local_node_pos);
snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.nodeid", local_node_pos);
nodeid_set = (totem_config->node_id != 0);
if (icmap_get_uint32(tmp_key, &totem_config->node_id) == CS_OK && nodeid_set) {
*warnings |= TOTEM_CONFIG_WARNING_TOTEM_NODEID_IGNORED;
}
/*
* Make localnode ring0_addr read only, so we can be sure that local
* node never changes. If rebinding to other IP would be in future
* supported, this must be changed and handled properly!
*/
snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.ring0_addr", local_node_pos);
icmap_set_ro_access(tmp_key, 0, 1);
icmap_set_ro_access("nodelist.local_node_pos", 0, 1);
}
- put_nodelist_members_to_config(totem_config);
+ put_nodelist_members_to_config(totem_config, 0);
}
/*
* Get things that might change in the future (and can depend on totem_config->interfaces);
*/
totem_volatile_config_read(totem_config, NULL);
icmap_set_uint8("config.totemconfig_reload_in_progress", 0);
add_totem_config_notification(totem_config);
return 0;
}
int totem_config_validate (
struct totem_config *totem_config,
const char **error_string)
{
static char local_error_reason[512];
char parse_error[512];
const char *error_reason = local_error_reason;
int i, j;
unsigned int interface_max = INTERFACE_MAX;
unsigned int port1, port2;
if (totem_config->interface_count == 0) {
error_reason = "No interfaces defined";
goto parse_error;
}
for (i = 0; i < totem_config->interface_count; i++) {
/*
* Some error checking of parsed data to make sure its valid
*/
struct totem_ip_address null_addr;
memset (&null_addr, 0, sizeof (struct totem_ip_address));
if ((totem_config->transport_number == 0) &&
memcmp (&totem_config->interfaces[i].mcast_addr, &null_addr,
sizeof (struct totem_ip_address)) == 0) {
error_reason = "No multicast address specified";
goto parse_error;
}
if (totem_config->interfaces[i].ip_port == 0) {
error_reason = "No multicast port specified";
goto parse_error;
}
if (totem_config->interfaces[i].ttl > 255) {
error_reason = "Invalid TTL (should be 0..255)";
goto parse_error;
}
if (totem_config->transport_number != TOTEM_TRANSPORT_UDP &&
totem_config->interfaces[i].ttl != 1) {
error_reason = "Can only set ttl on multicast transport types";
goto parse_error;
}
if (totem_config->interfaces[i].mcast_addr.family == AF_INET6 &&
totem_config->node_id == 0) {
error_reason = "An IPV6 network requires that a node ID be specified.";
goto parse_error;
}
if (totem_config->broadcast_use == 0 && totem_config->transport_number == TOTEM_TRANSPORT_UDP) {
if (totem_config->interfaces[i].mcast_addr.family != totem_config->interfaces[i].bindnet.family) {
error_reason = "Multicast address family does not match bind address family";
goto parse_error;
}
if (totemip_is_mcast (&totem_config->interfaces[i].mcast_addr) != 0) {
error_reason = "mcastaddr is not a correct multicast address.";
goto parse_error;
}
}
if (totem_config->interfaces[0].bindnet.family != totem_config->interfaces[i].bindnet.family) {
error_reason = "Not all bind address belong to the same IP family";
goto parse_error;
}
/*
* Ensure mcast address/port differs
*/
if (totem_config->transport_number == TOTEM_TRANSPORT_UDP) {
for (j = i + 1; j < totem_config->interface_count; j++) {
port1 = totem_config->interfaces[i].ip_port;
port2 = totem_config->interfaces[j].ip_port;
if (totemip_equal(&totem_config->interfaces[i].mcast_addr,
&totem_config->interfaces[j].mcast_addr) &&
(((port1 > port2 ? port1 : port2) - (port1 < port2 ? port1 : port2)) <= 1)) {
error_reason = "Interfaces multicast address/port pair must differ";
goto parse_error;
}
}
}
}
if (totem_config->version != 2) {
error_reason = "This totem parser can only parse version 2 configurations.";
goto parse_error;
}
if (totem_volatile_config_validate(totem_config, error_string) == -1) {
return (-1);
}
/*
* RRP values validation
*/
if (strcmp (totem_config->rrp_mode, "none") &&
strcmp (totem_config->rrp_mode, "active") &&
strcmp (totem_config->rrp_mode, "passive")) {
snprintf (local_error_reason, sizeof(local_error_reason),
"The RRP mode \"%s\" specified is invalid. It must be none, active, or passive.\n", totem_config->rrp_mode);
goto parse_error;
}
if (strcmp (totem_config->rrp_mode, "none") == 0) {
interface_max = 1;
}
if (interface_max < totem_config->interface_count) {
snprintf (parse_error, sizeof(parse_error),
"%d is too many configured interfaces for the rrp_mode setting %s.",
totem_config->interface_count,
totem_config->rrp_mode);
error_reason = parse_error;
goto parse_error;
}
if (totem_config->net_mtu == 0) {
totem_config->net_mtu = 1500;
}
return 0;
parse_error:
snprintf (error_string_response, sizeof(error_string_response),
"parse error in config: %s\n", error_reason);
*error_string = error_string_response;
return (-1);
}
static int read_keyfile (
const char *key_location,
struct totem_config *totem_config,
const char **error_string)
{
int fd;
int res;
ssize_t expected_key_len = sizeof (totem_config->private_key);
int saved_errno;
char error_str[100];
const char *error_ptr;
fd = open (key_location, O_RDONLY);
if (fd == -1) {
error_ptr = qb_strerror_r(errno, error_str, sizeof(error_str));
snprintf (error_string_response, sizeof(error_string_response),
"Could not open %s: %s\n",
key_location, error_ptr);
goto parse_error;
}
res = read (fd, totem_config->private_key, expected_key_len);
saved_errno = errno;
close (fd);
if (res == -1) {
error_ptr = qb_strerror_r (saved_errno, error_str, sizeof(error_str));
snprintf (error_string_response, sizeof(error_string_response),
"Could not read %s: %s\n",
key_location, error_ptr);
goto parse_error;
}
totem_config->private_key_len = expected_key_len;
if (res != expected_key_len) {
snprintf (error_string_response, sizeof(error_string_response),
"Could only read %d bits of 1024 bits from %s.\n",
res * 8, key_location);
goto parse_error;
}
return 0;
parse_error:
*error_string = error_string_response;
return (-1);
}
int totem_config_keyread (
struct totem_config *totem_config,
const char **error_string)
{
int got_key = 0;
char *key_location = NULL;
int res;
size_t key_len;
memset (totem_config->private_key, 0, 128);
totem_config->private_key_len = 128;
if (strcmp(totem_config->crypto_cipher_type, "none") == 0 &&
strcmp(totem_config->crypto_hash_type, "none") == 0) {
return (0);
}
/* cmap may store the location of the key file */
if (icmap_get_string("totem.keyfile", &key_location) == CS_OK) {
res = read_keyfile(key_location, totem_config, error_string);
free(key_location);
if (res) {
goto key_error;
}
got_key = 1;
} else { /* Or the key itself may be in the cmap */
if (icmap_get("totem.key", NULL, &key_len, NULL) == CS_OK) {
if (key_len > sizeof (totem_config->private_key)) {
sprintf(error_string_response, "key is too long");
goto key_error;
}
if (icmap_get("totem.key", totem_config->private_key, &key_len, NULL) == CS_OK) {
totem_config->private_key_len = key_len;
got_key = 1;
} else {
sprintf(error_string_response, "can't store private key");
goto key_error;
}
}
}
/* In desperation we read the default filename */
if (!got_key) {
const char *filename = getenv("COROSYNC_TOTEM_AUTHKEY_FILE");
if (!filename)
filename = COROSYSCONFDIR "/authkey";
res = read_keyfile(filename, totem_config, error_string);
if (res)
goto key_error;
}
return (0);
key_error:
*error_string = error_string_response;
return (-1);
}
static void debug_dump_totem_config(const struct totem_config *totem_config)
{
log_printf(LOGSYS_LEVEL_DEBUG, "Token Timeout (%d ms) retransmit timeout (%d ms)",
totem_config->token_timeout, totem_config->token_retransmit_timeout);
log_printf(LOGSYS_LEVEL_DEBUG, "token hold (%d ms) retransmits before loss (%d retrans)",
totem_config->token_hold_timeout, totem_config->token_retransmits_before_loss_const);
log_printf(LOGSYS_LEVEL_DEBUG, "join (%d ms) send_join (%d ms) consensus (%d ms) merge (%d ms)",
totem_config->join_timeout, totem_config->send_join_timeout, totem_config->consensus_timeout,
totem_config->merge_timeout);
log_printf(LOGSYS_LEVEL_DEBUG, "downcheck (%d ms) fail to recv const (%d msgs)",
totem_config->downcheck_timeout, totem_config->fail_to_recv_const);
log_printf(LOGSYS_LEVEL_DEBUG,
"seqno unchanged const (%d rotations) Maximum network MTU %d",
totem_config->seqno_unchanged_const, totem_config->net_mtu);
log_printf(LOGSYS_LEVEL_DEBUG,
"window size per rotation (%d messages) maximum messages per rotation (%d messages)",
totem_config->window_size, totem_config->max_messages);
log_printf(LOGSYS_LEVEL_DEBUG, "missed count const (%d messages)", totem_config->miss_count_const);
log_printf(LOGSYS_LEVEL_DEBUG, "RRP token expired timeout (%d ms)",
totem_config->rrp_token_expired_timeout);
log_printf(LOGSYS_LEVEL_DEBUG, "RRP token problem counter (%d ms)",
totem_config->rrp_problem_count_timeout);
log_printf(LOGSYS_LEVEL_DEBUG, "RRP threshold (%d problem count)",
totem_config->rrp_problem_count_threshold);
log_printf(LOGSYS_LEVEL_DEBUG, "RRP multicast threshold (%d problem count)",
totem_config->rrp_problem_count_mcast_threshold);
log_printf(LOGSYS_LEVEL_DEBUG, "RRP automatic recovery check timeout (%d ms)",
totem_config->rrp_autorecovery_check_timeout);
log_printf(LOGSYS_LEVEL_DEBUG, "RRP mode set to %s.",
totem_config->rrp_mode);
log_printf(LOGSYS_LEVEL_DEBUG, "heartbeat_failures_allowed (%d)",
totem_config->heartbeat_failures_allowed);
log_printf(LOGSYS_LEVEL_DEBUG, "max_network_delay (%d ms)", totem_config->max_network_delay);
}
static void totem_change_notify(
int32_t event,
const char *key_name,
struct icmap_notify_value new_val,
struct icmap_notify_value old_val,
void *user_data)
{
struct totem_config *totem_config = (struct totem_config *)user_data;
uint32_t *param;
uint8_t reloading;
const char *deleted_key = NULL;
const char *error_string;
/*
* If a full reload is in progress then don't do anything until it's done and
* can reconfigure it all atomically
*/
if (icmap_get_uint8("config.reload_in_progress", &reloading) == CS_OK && reloading)
return;
param = totem_get_param_by_name((struct totem_config *)user_data, key_name);
/*
* Process change only if changed key is found in totem_config (-> param is not NULL)
* or for special key token_coefficient. token_coefficient key is not stored in
* totem_config, but it is used for computation of token timeout.
*/
if (!param && strcmp(key_name, "totem.token_coefficient") != 0)
return;
/*
* Values other than UINT32 are not supported, or needed (yet)
*/
switch (event) {
case ICMAP_TRACK_DELETE:
deleted_key = key_name;
break;
case ICMAP_TRACK_ADD:
case ICMAP_TRACK_MODIFY:
deleted_key = NULL;
break;
default:
break;
}
totem_volatile_config_read (totem_config, deleted_key);
log_printf(LOGSYS_LEVEL_DEBUG, "Totem related config key changed. Dumping actual totem config.");
debug_dump_totem_config(totem_config);
if (totem_volatile_config_validate(totem_config, &error_string) == -1) {
log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string);
/*
* TODO: Consider corosync exit and/or load defaults for volatile
* values. For now, log error seems to be enough
*/
}
}
static void totem_reload_notify(
int32_t event,
const char *key_name,
struct icmap_notify_value new_val,
struct icmap_notify_value old_val,
void *user_data)
{
struct totem_config *totem_config = (struct totem_config *)user_data;
uint32_t local_node_pos;
const char *error_string;
/* Reload has completed */
if (*(uint8_t *)new_val.data == 0) {
- put_nodelist_members_to_config (totem_config);
+ put_nodelist_members_to_config (totem_config, 1);
totem_volatile_config_read (totem_config, NULL);
log_printf(LOGSYS_LEVEL_DEBUG, "Configuration reloaded. Dumping actual totem config.");
debug_dump_totem_config(totem_config);
if (totem_volatile_config_validate(totem_config, &error_string) == -1) {
log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string);
/*
* TODO: Consider corosync exit and/or load defaults for volatile
* values. For now, log error seems to be enough
*/
}
/* Reinstate the local_node_pos */
local_node_pos = find_local_node_in_nodelist(totem_config);
if (local_node_pos != -1) {
icmap_set_uint32("nodelist.local_node_pos", local_node_pos);
}
icmap_set_uint8("config.totemconfig_reload_in_progress", 0);
} else {
icmap_set_uint8("config.totemconfig_reload_in_progress", 1);
}
}
static void add_totem_config_notification(struct totem_config *totem_config)
{
icmap_track_t icmap_track;
icmap_track_add("totem.",
ICMAP_TRACK_ADD | ICMAP_TRACK_DELETE | ICMAP_TRACK_MODIFY | ICMAP_TRACK_PREFIX,
totem_change_notify,
totem_config,
&icmap_track);
icmap_track_add("config.reload_in_progress",
ICMAP_TRACK_ADD | ICMAP_TRACK_MODIFY,
totem_reload_notify,
totem_config,
&icmap_track);
+
+ icmap_track_add("nodelist.node.",
+ ICMAP_TRACK_ADD | ICMAP_TRACK_DELETE | ICMAP_TRACK_MODIFY | ICMAP_TRACK_PREFIX,
+ nodelist_dynamic_notify,
+ (void *)totem_config,
+ &icmap_track);
}