diff --git a/exec/main.c b/exec/main.c
index 82fb8087..7dc7724b 100644
--- a/exec/main.c
+++ b/exec/main.c
@@ -1,1429 +1,1429 @@
/*
* Copyright (c) 2002-2006 MontaVista Software, Inc.
* Copyright (c) 2006-2012 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Steven Dake (sdake@redhat.com)
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
/**
* \mainpage Corosync
*
* This is the doxygen generated developer documentation for the Corosync
* project. For more information about Corosync, please see the project
* web site, corosync.org.
*
* \section license License
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include "quorum.h"
#include "totemsrp.h"
#include "logconfig.h"
#include "totemconfig.h"
#include "main.h"
#include "sync.h"
#include "timer.h"
#include "util.h"
#include "apidef.h"
#include "service.h"
#include "schedwrk.h"
#ifdef HAVE_SMALL_MEMORY_FOOTPRINT
#define IPC_LOGSYS_SIZE 1024*64
#else
#define IPC_LOGSYS_SIZE 8192*128
#endif
LOGSYS_DECLARE_SYSTEM ("corosync",
LOGSYS_MODE_OUTPUT_STDERR | LOGSYS_MODE_OUTPUT_SYSLOG,
LOG_DAEMON,
LOG_INFO);
LOGSYS_DECLARE_SUBSYS ("MAIN");
#define SERVER_BACKLOG 5
static int sched_priority = 0;
static unsigned int service_count = 32;
static struct totem_logging_configuration totem_logging_configuration;
static struct corosync_api_v1 *api = NULL;
static int sync_in_process = 1;
static qb_loop_t *corosync_poll_handle;
struct sched_param global_sched_param;
static corosync_timer_handle_t corosync_stats_timer_handle;
static const char *corosync_lock_file = LOCALSTATEDIR"/run/corosync.pid";
static int ip_version = AF_INET;
qb_loop_t *cs_poll_handle_get (void)
{
return (corosync_poll_handle);
}
int cs_poll_dispatch_add (qb_loop_t * handle,
int fd,
int events,
void *data,
int (*dispatch_fn) (int fd,
int revents,
void *data))
{
return qb_loop_poll_add(handle, QB_LOOP_MED, fd, events, data,
dispatch_fn);
}
int cs_poll_dispatch_delete(qb_loop_t * handle, int fd)
{
return qb_loop_poll_del(handle, fd);
}
void corosync_state_dump (void)
{
int i;
for (i = 0; i < SERVICES_COUNT_MAX; i++) {
if (corosync_service[i] && corosync_service[i]->exec_dump_fn) {
corosync_service[i]->exec_dump_fn ();
}
}
}
static void corosync_blackbox_write_to_file (void)
{
char fname[PATH_MAX];
char fdata_fname[PATH_MAX];
char time_str[PATH_MAX];
struct tm cur_time_tm;
time_t cur_time_t;
ssize_t res;
cur_time_t = time(NULL);
localtime_r(&cur_time_t, &cur_time_tm);
strftime(time_str, PATH_MAX, "%Y-%m-%dT%H:%M:%S", &cur_time_tm);
snprintf(fname, PATH_MAX, "%s/fdata-%s-%lld",
get_run_dir(),
time_str,
(long long int)getpid());
if ((res = qb_log_blackbox_write_to_file(fname)) < 0) {
LOGSYS_PERROR(-res, LOGSYS_LEVEL_ERROR, "Can't store blackbox file");
}
snprintf(fdata_fname, sizeof(fdata_fname), "%s/fdata", get_run_dir());
unlink(fdata_fname);
if (symlink(fname, fdata_fname) == -1) {
log_printf(LOGSYS_LEVEL_ERROR, "Can't create symlink to '%s' for corosync blackbox file '%s'",
fname, fdata_fname);
}
}
static void unlink_all_completed (void)
{
api->timer_delete (corosync_stats_timer_handle);
qb_loop_stop (corosync_poll_handle);
icmap_fini();
}
void corosync_shutdown_request (void)
{
corosync_service_unlink_all (api, unlink_all_completed);
}
static int32_t sig_diag_handler (int num, void *data)
{
corosync_state_dump ();
return 0;
}
static int32_t sig_exit_handler (int num, void *data)
{
log_printf(LOGSYS_LEVEL_NOTICE, "Node was shut down by a signal");
corosync_service_unlink_all (api, unlink_all_completed);
return 0;
}
static void sigsegv_handler (int num)
{
(void)signal (SIGSEGV, SIG_DFL);
corosync_blackbox_write_to_file ();
qb_log_fini();
raise (SIGSEGV);
}
/*
* QB wrapper for real signal handler
*/
static int32_t sig_segv_handler (int num, void *data)
{
sigsegv_handler(num);
return 0;
}
static void sigabrt_handler (int num)
{
(void)signal (SIGABRT, SIG_DFL);
corosync_blackbox_write_to_file ();
qb_log_fini();
raise (SIGABRT);
}
/*
* QB wrapper for real signal handler
*/
static int32_t sig_abrt_handler (int num, void *data)
{
sigabrt_handler(num);
return 0;
}
#define LOCALHOST_IP inet_addr("127.0.0.1")
static void *corosync_group_handle;
static struct totempg_group corosync_group = {
.group = "a",
.group_len = 1
};
static void serialize_lock (void)
{
}
static void serialize_unlock (void)
{
}
static void corosync_sync_completed (void)
{
log_printf (LOGSYS_LEVEL_NOTICE,
"Completed service synchronization, ready to provide service.");
sync_in_process = 0;
cs_ipcs_sync_state_changed(sync_in_process);
cs_ipc_allow_connections(1);
/*
* Inform totem to start using new message queue again
*/
totempg_trans_ack();
}
static int corosync_sync_callbacks_retrieve (
int service_id,
struct sync_callbacks *callbacks)
{
if (corosync_service[service_id] == NULL) {
return (-1);
}
if (callbacks == NULL) {
return (0);
}
callbacks->name = corosync_service[service_id]->name;
callbacks->sync_init = corosync_service[service_id]->sync_init;
callbacks->sync_process = corosync_service[service_id]->sync_process;
callbacks->sync_activate = corosync_service[service_id]->sync_activate;
callbacks->sync_abort = corosync_service[service_id]->sync_abort;
return (0);
}
static struct memb_ring_id corosync_ring_id;
static void member_object_joined (unsigned int nodeid)
{
char member_ip[ICMAP_KEYNAME_MAXLEN];
char member_join_count[ICMAP_KEYNAME_MAXLEN];
char member_status[ICMAP_KEYNAME_MAXLEN];
snprintf(member_ip, ICMAP_KEYNAME_MAXLEN,
"runtime.totem.pg.mrp.srp.members.%u.ip", nodeid);
snprintf(member_join_count, ICMAP_KEYNAME_MAXLEN,
"runtime.totem.pg.mrp.srp.members.%u.join_count", nodeid);
snprintf(member_status, ICMAP_KEYNAME_MAXLEN,
"runtime.totem.pg.mrp.srp.members.%u.status", nodeid);
if (icmap_get(member_ip, NULL, NULL, NULL) == CS_OK) {
icmap_inc(member_join_count);
icmap_set_string(member_status, "joined");
} else {
icmap_set_string(member_ip, (char*)api->totem_ifaces_print (nodeid));
icmap_set_uint32(member_join_count, 1);
icmap_set_string(member_status, "joined");
}
log_printf (LOGSYS_LEVEL_DEBUG,
"Member joined: %s", api->totem_ifaces_print (nodeid));
}
static void member_object_left (unsigned int nodeid)
{
char member_status[ICMAP_KEYNAME_MAXLEN];
snprintf(member_status, ICMAP_KEYNAME_MAXLEN,
"runtime.totem.pg.mrp.srp.members.%u.status", nodeid);
icmap_set_string(member_status, "left");
log_printf (LOGSYS_LEVEL_DEBUG,
"Member left: %s", api->totem_ifaces_print (nodeid));
}
static void confchg_fn (
enum totem_configuration_type configuration_type,
const unsigned int *member_list, size_t member_list_entries,
const unsigned int *left_list, size_t left_list_entries,
const unsigned int *joined_list, size_t joined_list_entries,
const struct memb_ring_id *ring_id)
{
int i;
int abort_activate = 0;
if (sync_in_process == 1) {
abort_activate = 1;
}
sync_in_process = 1;
cs_ipcs_sync_state_changed(sync_in_process);
memcpy (&corosync_ring_id, ring_id, sizeof (struct memb_ring_id));
for (i = 0; i < left_list_entries; i++) {
member_object_left (left_list[i]);
}
for (i = 0; i < joined_list_entries; i++) {
member_object_joined (joined_list[i]);
}
/*
* Call configuration change for all services
*/
for (i = 0; i < service_count; i++) {
if (corosync_service[i] && corosync_service[i]->confchg_fn) {
corosync_service[i]->confchg_fn (configuration_type,
member_list, member_list_entries,
left_list, left_list_entries,
joined_list, joined_list_entries, ring_id);
}
}
if (abort_activate) {
sync_abort ();
}
if (configuration_type == TOTEM_CONFIGURATION_TRANSITIONAL) {
sync_save_transitional (member_list, member_list_entries, ring_id);
}
if (configuration_type == TOTEM_CONFIGURATION_REGULAR) {
sync_start (member_list, member_list_entries, ring_id);
}
}
static void priv_drop (void)
{
return; /* TODO: we are still not dropping privs */
}
static void corosync_tty_detach (void)
{
int devnull;
/*
* Disconnect from TTY if this is not a debug run
*/
switch (fork ()) {
case -1:
corosync_exit_error (COROSYNC_DONE_FORK);
break;
case 0:
/*
* child which is disconnected, run this process
*/
break;
default:
exit (0);
break;
}
/* Create new session */
(void)setsid();
/*
* Map stdin/out/err to /dev/null.
*/
devnull = open("/dev/null", O_RDWR);
if (devnull == -1) {
corosync_exit_error (COROSYNC_DONE_STD_TO_NULL_REDIR);
}
if (dup2(devnull, 0) < 0 || dup2(devnull, 1) < 0
|| dup2(devnull, 2) < 0) {
close(devnull);
corosync_exit_error (COROSYNC_DONE_STD_TO_NULL_REDIR);
}
close(devnull);
}
static void corosync_mlockall (void)
{
int res;
struct rlimit rlimit;
rlimit.rlim_cur = RLIM_INFINITY;
rlimit.rlim_max = RLIM_INFINITY;
#ifndef RLIMIT_MEMLOCK
#define RLIMIT_MEMLOCK RLIMIT_VMEM
#endif
setrlimit (RLIMIT_MEMLOCK, &rlimit);
res = mlockall (MCL_CURRENT | MCL_FUTURE);
if (res == -1) {
LOGSYS_PERROR (errno, LOGSYS_LEVEL_WARNING,
"Could not lock memory of service to avoid page faults");
};
}
static void corosync_totem_stats_updater (void *data)
{
totempg_stats_t * stats;
uint32_t total_mtt_rx_token;
uint32_t total_backlog_calc;
uint32_t total_token_holdtime;
int t, prev, i;
int32_t token_count;
char key_name[ICMAP_KEYNAME_MAXLEN];
stats = api->totem_get_stats();
icmap_set_uint32("runtime.totem.pg.msg_reserved", stats->msg_reserved);
icmap_set_uint32("runtime.totem.pg.msg_queue_avail", stats->msg_queue_avail);
icmap_set_uint64("runtime.totem.pg.mrp.srp.orf_token_tx", stats->mrp->srp->orf_token_tx);
icmap_set_uint64("runtime.totem.pg.mrp.srp.orf_token_rx", stats->mrp->srp->orf_token_rx);
icmap_set_uint64("runtime.totem.pg.mrp.srp.memb_merge_detect_tx", stats->mrp->srp->memb_merge_detect_tx);
icmap_set_uint64("runtime.totem.pg.mrp.srp.memb_merge_detect_rx", stats->mrp->srp->memb_merge_detect_rx);
icmap_set_uint64("runtime.totem.pg.mrp.srp.memb_join_tx", stats->mrp->srp->memb_join_tx);
icmap_set_uint64("runtime.totem.pg.mrp.srp.memb_join_rx", stats->mrp->srp->memb_join_rx);
icmap_set_uint64("runtime.totem.pg.mrp.srp.mcast_tx", stats->mrp->srp->mcast_tx);
icmap_set_uint64("runtime.totem.pg.mrp.srp.mcast_retx", stats->mrp->srp->mcast_retx);
icmap_set_uint64("runtime.totem.pg.mrp.srp.mcast_rx", stats->mrp->srp->mcast_rx);
icmap_set_uint64("runtime.totem.pg.mrp.srp.memb_commit_token_tx", stats->mrp->srp->memb_commit_token_tx);
icmap_set_uint64("runtime.totem.pg.mrp.srp.memb_commit_token_rx", stats->mrp->srp->memb_commit_token_rx);
icmap_set_uint64("runtime.totem.pg.mrp.srp.token_hold_cancel_tx", stats->mrp->srp->token_hold_cancel_tx);
icmap_set_uint64("runtime.totem.pg.mrp.srp.token_hold_cancel_rx", stats->mrp->srp->token_hold_cancel_rx);
icmap_set_uint64("runtime.totem.pg.mrp.srp.operational_entered", stats->mrp->srp->operational_entered);
icmap_set_uint64("runtime.totem.pg.mrp.srp.operational_token_lost", stats->mrp->srp->operational_token_lost);
icmap_set_uint64("runtime.totem.pg.mrp.srp.gather_entered", stats->mrp->srp->gather_entered);
icmap_set_uint64("runtime.totem.pg.mrp.srp.gather_token_lost", stats->mrp->srp->gather_token_lost);
icmap_set_uint64("runtime.totem.pg.mrp.srp.commit_entered", stats->mrp->srp->commit_entered);
icmap_set_uint64("runtime.totem.pg.mrp.srp.commit_token_lost", stats->mrp->srp->commit_token_lost);
icmap_set_uint64("runtime.totem.pg.mrp.srp.recovery_entered", stats->mrp->srp->recovery_entered);
icmap_set_uint64("runtime.totem.pg.mrp.srp.recovery_token_lost", stats->mrp->srp->recovery_token_lost);
icmap_set_uint64("runtime.totem.pg.mrp.srp.consensus_timeouts", stats->mrp->srp->consensus_timeouts);
icmap_set_uint64("runtime.totem.pg.mrp.srp.rx_msg_dropped", stats->mrp->srp->rx_msg_dropped);
icmap_set_uint32("runtime.totem.pg.mrp.srp.continuous_gather", stats->mrp->srp->continuous_gather);
icmap_set_uint32("runtime.totem.pg.mrp.srp.continuous_sendmsg_failures",
stats->mrp->srp->continuous_sendmsg_failures);
icmap_set_uint8("runtime.totem.pg.mrp.srp.firewall_enabled_or_nic_failure",
stats->mrp->srp->continuous_gather > MAX_NO_CONT_GATHER ? 1 : 0);
if (stats->mrp->srp->continuous_gather > MAX_NO_CONT_GATHER ||
stats->mrp->srp->continuous_sendmsg_failures > MAX_NO_CONT_SENDMSG_FAILURES) {
log_printf (LOGSYS_LEVEL_WARNING,
"Totem is unable to form a cluster because of an "
"operating system or network fault. The most common "
"cause of this message is that the local firewall is "
"configured improperly.");
icmap_set_uint8("runtime.totem.pg.mrp.srp.firewall_enabled_or_nic_failure", 1);
} else {
icmap_set_uint8("runtime.totem.pg.mrp.srp.firewall_enabled_or_nic_failure", 0);
}
for (i = 0; i < stats->mrp->srp->rrp->interface_count; i++) {
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "runtime.totem.pg.mrp.rrp.%u.faulty", i);
icmap_set_uint8(key_name, stats->mrp->srp->rrp->faulty[i]);
}
total_mtt_rx_token = 0;
total_token_holdtime = 0;
total_backlog_calc = 0;
token_count = 0;
t = stats->mrp->srp->latest_token;
while (1) {
if (t == 0)
prev = TOTEM_TOKEN_STATS_MAX - 1;
else
prev = t - 1;
if (prev == stats->mrp->srp->earliest_token)
break;
/* if tx == 0, then dropped token (not ours) */
if (stats->mrp->srp->token[t].tx != 0 ||
(stats->mrp->srp->token[t].rx - stats->mrp->srp->token[prev].rx) > 0 ) {
total_mtt_rx_token += (stats->mrp->srp->token[t].rx - stats->mrp->srp->token[prev].rx);
total_token_holdtime += (stats->mrp->srp->token[t].tx - stats->mrp->srp->token[t].rx);
total_backlog_calc += stats->mrp->srp->token[t].backlog_calc;
token_count++;
}
t = prev;
}
if (token_count) {
icmap_set_uint32("runtime.totem.pg.mrp.srp.mtt_rx_token", (total_mtt_rx_token / token_count));
icmap_set_uint32("runtime.totem.pg.mrp.srp.avg_token_workload", (total_token_holdtime / token_count));
icmap_set_uint32("runtime.totem.pg.mrp.srp.avg_backlog_calc", (total_backlog_calc / token_count));
}
cs_ipcs_stats_update();
api->timer_add_duration (1500 * MILLI_2_NANO_SECONDS, NULL,
corosync_totem_stats_updater,
&corosync_stats_timer_handle);
}
static void corosync_totem_stats_init (void)
{
icmap_set_uint32("runtime.totem.pg.mrp.srp.mtt_rx_token", 0);
icmap_set_uint32("runtime.totem.pg.mrp.srp.avg_token_workload", 0);
icmap_set_uint32("runtime.totem.pg.mrp.srp.avg_backlog_calc", 0);
/* start stats timer */
api->timer_add_duration (1500 * MILLI_2_NANO_SECONDS, NULL,
corosync_totem_stats_updater,
&corosync_stats_timer_handle);
}
static void deliver_fn (
unsigned int nodeid,
const void *msg,
unsigned int msg_len,
int endian_conversion_required)
{
const struct qb_ipc_request_header *header;
int32_t service;
int32_t fn_id;
uint32_t id;
header = msg;
if (endian_conversion_required) {
id = swab32 (header->id);
} else {
id = header->id;
}
/*
* Call the proper executive handler
*/
service = id >> 16;
fn_id = id & 0xffff;
if (!corosync_service[service]) {
return;
}
if (fn_id >= corosync_service[service]->exec_engine_count) {
log_printf(LOGSYS_LEVEL_WARNING, "discarded unknown message %d for service %d (max id %d)",
fn_id, service, corosync_service[service]->exec_engine_count);
return;
}
icmap_fast_inc(service_stats_rx[service][fn_id]);
if (endian_conversion_required) {
assert(corosync_service[service]->exec_engine[fn_id].exec_endian_convert_fn != NULL);
corosync_service[service]->exec_engine[fn_id].exec_endian_convert_fn
((void *)msg);
}
corosync_service[service]->exec_engine[fn_id].exec_handler_fn
(msg, nodeid);
}
int main_mcast (
const struct iovec *iovec,
unsigned int iov_len,
unsigned int guarantee)
{
const struct qb_ipc_request_header *req = iovec->iov_base;
int32_t service;
int32_t fn_id;
service = req->id >> 16;
fn_id = req->id & 0xffff;
if (corosync_service[service]) {
icmap_fast_inc(service_stats_tx[service][fn_id]);
}
return (totempg_groups_mcast_joined (corosync_group_handle, iovec, iov_len, guarantee));
}
static void corosync_ring_id_create_or_load (
struct memb_ring_id *memb_ring_id,
const struct totem_ip_address *addr)
{
int fd;
int res = 0;
char filename[PATH_MAX];
snprintf (filename, sizeof(filename), "%s/ringid_%s",
get_run_dir(), totemip_print (addr));
fd = open (filename, O_RDONLY, 0700);
/*
* If file can be opened and read, read the ring id
*/
if (fd != -1) {
res = read (fd, &memb_ring_id->seq, sizeof (uint64_t));
close (fd);
}
/*
* If file could not be opened or read, create a new ring id
*/
if ((fd == -1) || (res != sizeof (uint64_t))) {
memb_ring_id->seq = 0;
umask(0);
fd = open (filename, O_CREAT|O_RDWR, 0700);
if (fd != -1) {
res = write (fd, &memb_ring_id->seq, sizeof (uint64_t));
close (fd);
if (res == -1) {
LOGSYS_PERROR (errno, LOGSYS_LEVEL_ERROR,
"Couldn't write ringid file '%s'", filename);
corosync_exit_error (COROSYNC_DONE_STORE_RINGID);
}
} else {
LOGSYS_PERROR (errno, LOGSYS_LEVEL_ERROR,
"Couldn't create ringid file '%s'", filename);
corosync_exit_error (COROSYNC_DONE_STORE_RINGID);
}
}
totemip_copy(&memb_ring_id->rep, addr);
assert (!totemip_zero_check(&memb_ring_id->rep));
}
static void corosync_ring_id_store (
const struct memb_ring_id *memb_ring_id,
const struct totem_ip_address *addr)
{
char filename[PATH_MAX];
int fd;
int res;
snprintf (filename, sizeof(filename), "%s/ringid_%s",
get_run_dir(), totemip_print (addr));
fd = open (filename, O_WRONLY, 0700);
if (fd == -1) {
fd = open (filename, O_CREAT|O_RDWR, 0700);
}
if (fd == -1) {
LOGSYS_PERROR(errno, LOGSYS_LEVEL_ERROR,
"Couldn't store new ring id %llx to stable storage",
memb_ring_id->seq);
corosync_exit_error (COROSYNC_DONE_STORE_RINGID);
}
log_printf (LOGSYS_LEVEL_DEBUG,
"Storing new sequence id for ring %llx", memb_ring_id->seq);
res = write (fd, &memb_ring_id->seq, sizeof(memb_ring_id->seq));
close (fd);
if (res != sizeof(memb_ring_id->seq)) {
LOGSYS_PERROR(errno, LOGSYS_LEVEL_ERROR,
"Couldn't store new ring id %llx to stable storage",
memb_ring_id->seq);
corosync_exit_error (COROSYNC_DONE_STORE_RINGID);
}
}
static qb_loop_timer_handle recheck_the_q_level_timer;
void corosync_recheck_the_q_level(void *data)
{
totempg_check_q_level(corosync_group_handle);
if (cs_ipcs_q_level_get() == TOTEM_Q_LEVEL_CRITICAL) {
qb_loop_timer_add(cs_poll_handle_get(), QB_LOOP_MED, 1*QB_TIME_NS_IN_MSEC,
NULL, corosync_recheck_the_q_level, &recheck_the_q_level_timer);
}
}
struct sending_allowed_private_data_struct {
int reserved_msgs;
};
int corosync_sending_allowed (
unsigned int service,
unsigned int id,
const void *msg,
void *sending_allowed_private_data)
{
struct sending_allowed_private_data_struct *pd =
(struct sending_allowed_private_data_struct *)sending_allowed_private_data;
struct iovec reserve_iovec;
struct qb_ipc_request_header *header = (struct qb_ipc_request_header *)msg;
int sending_allowed;
reserve_iovec.iov_base = (char *)header;
reserve_iovec.iov_len = header->size;
pd->reserved_msgs = totempg_groups_joined_reserve (
corosync_group_handle,
&reserve_iovec, 1);
if (pd->reserved_msgs == -1) {
return -EINVAL;
}
sending_allowed = QB_FALSE;
if (corosync_quorum_is_quorate() == 1 ||
corosync_service[service]->allow_inquorate == CS_LIB_ALLOW_INQUORATE) {
// we are quorate
// now check flow control
if (corosync_service[service]->lib_engine[id].flow_control == CS_LIB_FLOW_CONTROL_NOT_REQUIRED) {
sending_allowed = QB_TRUE;
} else if (pd->reserved_msgs && sync_in_process == 0) {
sending_allowed = QB_TRUE;
} else if (pd->reserved_msgs == 0) {
return -ENOBUFS;
} else /* (sync_in_process) */ {
return -EINPROGRESS;
}
} else {
return -EHOSTUNREACH;
}
return (sending_allowed);
}
void corosync_sending_allowed_release (void *sending_allowed_private_data)
{
struct sending_allowed_private_data_struct *pd =
(struct sending_allowed_private_data_struct *)sending_allowed_private_data;
if (pd->reserved_msgs == -1) {
return;
}
totempg_groups_joined_release (pd->reserved_msgs);
}
int message_source_is_local (const mar_message_source_t *source)
{
int ret = 0;
assert (source != NULL);
if (source->nodeid == totempg_my_nodeid_get ()) {
ret = 1;
}
return ret;
}
void message_source_set (
mar_message_source_t *source,
void *conn)
{
assert ((source != NULL) && (conn != NULL));
memset (source, 0, sizeof (mar_message_source_t));
source->nodeid = totempg_my_nodeid_get ();
source->conn = conn;
}
struct scheduler_pause_timeout_data {
struct totem_config *totem_config;
qb_loop_timer_handle handle;
unsigned long long tv_prev;
unsigned long long max_tv_diff;
};
static void timer_function_scheduler_timeout (void *data)
{
struct scheduler_pause_timeout_data *timeout_data = (struct scheduler_pause_timeout_data *)data;
unsigned long long tv_current;
unsigned long long tv_diff;
tv_current = qb_util_nano_current_get ();
if (timeout_data->tv_prev == 0) {
/*
* Initial call -> just pretent everything is ok
*/
timeout_data->tv_prev = tv_current;
timeout_data->max_tv_diff = 0;
}
tv_diff = tv_current - timeout_data->tv_prev;
timeout_data->tv_prev = tv_current;
if (tv_diff > timeout_data->max_tv_diff) {
log_printf (LOGSYS_LEVEL_WARNING, "Corosync main process was not scheduled for %0.4f ms "
"(threshold is %0.4f ms). Consider token timeout increase.",
(float)tv_diff / QB_TIME_NS_IN_MSEC, (float)timeout_data->max_tv_diff / QB_TIME_NS_IN_MSEC);
}
/*
* Set next threshold, because token_timeout can change
*/
timeout_data->max_tv_diff = timeout_data->totem_config->token_timeout * QB_TIME_NS_IN_MSEC * 0.8;
qb_loop_timer_add (corosync_poll_handle,
QB_LOOP_MED,
timeout_data->totem_config->token_timeout * QB_TIME_NS_IN_MSEC / 3,
timeout_data,
timer_function_scheduler_timeout,
&timeout_data->handle);
}
static void corosync_setscheduler (void)
{
#if defined(HAVE_PTHREAD_SETSCHEDPARAM) && defined(HAVE_SCHED_GET_PRIORITY_MAX) && defined(HAVE_SCHED_SETSCHEDULER)
int res;
sched_priority = sched_get_priority_max (SCHED_RR);
if (sched_priority != -1) {
global_sched_param.sched_priority = sched_priority;
res = sched_setscheduler (0, SCHED_RR, &global_sched_param);
if (res == -1) {
LOGSYS_PERROR(errno, LOGSYS_LEVEL_WARNING,
"Could not set SCHED_RR at priority %d",
global_sched_param.sched_priority);
global_sched_param.sched_priority = 0;
#ifdef HAVE_QB_LOG_THREAD_PRIORITY_SET
qb_log_thread_priority_set (SCHED_OTHER, 0);
#endif
} else {
/*
* Turn on SCHED_RR in logsys system
*/
#ifdef HAVE_QB_LOG_THREAD_PRIORITY_SET
res = qb_log_thread_priority_set (SCHED_RR, sched_priority);
#else
res = -1;
#endif
if (res == -1) {
log_printf (LOGSYS_LEVEL_ERROR,
"Could not set logsys thread priority."
" Can't continue because of priority inversions.");
corosync_exit_error (COROSYNC_DONE_LOGSETUP);
}
}
} else {
LOGSYS_PERROR (errno, LOGSYS_LEVEL_WARNING,
"Could not get maximum scheduler priority");
sched_priority = 0;
}
#else
log_printf(LOGSYS_LEVEL_WARNING,
"The Platform is missing process priority setting features. Leaving at default.");
#endif
}
/* The basename man page contains scary warnings about
thread-safety and portability, hence this */
static const char *corosync_basename(const char *file_name)
{
char *base;
base = strrchr (file_name, '/');
if (base) {
return base + 1;
}
return file_name;
}
static void
_logsys_log_printf(int level, int subsys,
const char *function_name,
const char *file_name,
int file_line,
const char *format,
...) __attribute__((format(printf, 6, 7)));
static void
_logsys_log_printf(int level, int subsys,
const char *function_name,
const char *file_name,
int file_line,
const char *format, ...)
{
va_list ap;
va_start(ap, format);
qb_log_from_external_source_va(function_name, corosync_basename(file_name),
format, level, file_line,
subsys, ap);
va_end(ap);
}
static void fplay_key_change_notify_fn (
int32_t event,
const char *key_name,
struct icmap_notify_value new_val,
struct icmap_notify_value old_val,
void *user_data)
{
if (strcmp(key_name, "runtime.blackbox.dump_flight_data") == 0) {
fprintf(stderr,"Writetofile\n");
corosync_blackbox_write_to_file ();
}
if (strcmp(key_name, "runtime.blackbox.dump_state") == 0) {
fprintf(stderr,"statefump\n");
corosync_state_dump ();
}
}
static void corosync_fplay_control_init (void)
{
icmap_track_t track = NULL;
icmap_set_string("runtime.blackbox.dump_flight_data", "no");
icmap_set_string("runtime.blackbox.dump_state", "no");
icmap_track_add("runtime.blackbox.dump_flight_data",
ICMAP_TRACK_ADD | ICMAP_TRACK_DELETE | ICMAP_TRACK_MODIFY,
fplay_key_change_notify_fn,
NULL, &track);
icmap_track_add("runtime.blackbox.dump_state",
ICMAP_TRACK_ADD | ICMAP_TRACK_DELETE | ICMAP_TRACK_MODIFY,
fplay_key_change_notify_fn,
NULL, &track);
}
/*
* Set RO flag for keys, which ether doesn't make sense to change by user (statistic)
* or which when changed are not reflected by runtime (totem.crypto_cipher, ...).
*
* Also some RO keys cannot be determined in this stage, so they are set later in
* other functions (like nodelist.local_node_pos, ...)
*/
static void set_icmap_ro_keys_flag (void)
{
/*
* Set RO flag for all keys of internal configuration and runtime statistics
*/
icmap_set_ro_access("internal_configuration.", CS_TRUE, CS_TRUE);
icmap_set_ro_access("runtime.connections.", CS_TRUE, CS_TRUE);
icmap_set_ro_access("runtime.totem.", CS_TRUE, CS_TRUE);
icmap_set_ro_access("runtime.services.", CS_TRUE, CS_TRUE);
icmap_set_ro_access("runtime.config.", CS_TRUE, CS_TRUE);
icmap_set_ro_access("uidgid.config.", CS_TRUE, CS_TRUE);
/*
* Set RO flag for constrete keys of configuration which can't be changed
* during runtime
*/
icmap_set_ro_access("totem.crypto_cipher", CS_FALSE, CS_TRUE);
icmap_set_ro_access("totem.crypto_hash", CS_FALSE, CS_TRUE);
icmap_set_ro_access("totem.secauth", CS_FALSE, CS_TRUE);
icmap_set_ro_access("totem.ip_version", CS_FALSE, CS_TRUE);
icmap_set_ro_access("totem.rrp_mode", CS_FALSE, CS_TRUE);
icmap_set_ro_access("totem.transport", CS_FALSE, CS_TRUE);
icmap_set_ro_access("totem.cluster_name", CS_FALSE, CS_TRUE);
icmap_set_ro_access("totem.netmtu", CS_FALSE, CS_TRUE);
icmap_set_ro_access("totem.threads", CS_FALSE, CS_TRUE);
icmap_set_ro_access("totem.version", CS_FALSE, CS_TRUE);
icmap_set_ro_access("totem.nodeid", CS_FALSE, CS_TRUE);
icmap_set_ro_access("totem.clear_node_high_bit", CS_FALSE, CS_TRUE);
icmap_set_ro_access("qb.ipc_type", CS_FALSE, CS_TRUE);
icmap_set_ro_access("config.reload_in_progress", CS_FALSE, CS_TRUE);
icmap_set_ro_access("config.totemconfig_reload_in_progress", CS_FALSE, CS_TRUE);
}
static void main_service_ready (void)
{
int res;
/*
* This must occur after totempg is initialized because "this_ip" must be set
*/
res = corosync_service_defaults_link_and_init (api);
if (res == -1) {
log_printf (LOGSYS_LEVEL_ERROR, "Could not initialize default services");
corosync_exit_error (COROSYNC_DONE_INIT_SERVICES);
}
cs_ipcs_init();
corosync_totem_stats_init ();
corosync_fplay_control_init ();
sync_init (
corosync_sync_callbacks_retrieve,
corosync_sync_completed);
}
static enum e_corosync_done corosync_flock (const char *lockfile, pid_t pid)
{
struct flock lock;
enum e_corosync_done err;
char pid_s[17];
int fd_flag;
int lf;
err = COROSYNC_DONE_EXIT;
lf = open (lockfile, O_WRONLY | O_CREAT, 0640);
if (lf == -1) {
log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't create lock file.");
- return (COROSYNC_DONE_AQUIRE_LOCK);
+ return (COROSYNC_DONE_ACQUIRE_LOCK);
}
retry_fcntl:
lock.l_type = F_WRLCK;
lock.l_start = 0;
lock.l_whence = SEEK_SET;
lock.l_len = 0;
if (fcntl (lf, F_SETLK, &lock) == -1) {
switch (errno) {
case EINTR:
goto retry_fcntl;
break;
case EAGAIN:
case EACCES:
log_printf (LOGSYS_LEVEL_ERROR, "Another Corosync instance is already running.");
err = COROSYNC_DONE_ALREADY_RUNNING;
goto error_close;
break;
default:
log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't acquire lock. Error was %s",
strerror(errno));
- err = COROSYNC_DONE_AQUIRE_LOCK;
+ err = COROSYNC_DONE_ACQUIRE_LOCK;
goto error_close;
break;
}
}
if (ftruncate (lf, 0) == -1) {
log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't truncate lock file. Error was %s",
strerror (errno));
- err = COROSYNC_DONE_AQUIRE_LOCK;
+ err = COROSYNC_DONE_ACQUIRE_LOCK;
goto error_close_unlink;
}
memset (pid_s, 0, sizeof (pid_s));
snprintf (pid_s, sizeof (pid_s) - 1, "%u\n", pid);
retry_write:
if (write (lf, pid_s, strlen (pid_s)) != strlen (pid_s)) {
if (errno == EINTR) {
goto retry_write;
} else {
log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't write pid to lock file. "
"Error was %s", strerror (errno));
- err = COROSYNC_DONE_AQUIRE_LOCK;
+ err = COROSYNC_DONE_ACQUIRE_LOCK;
goto error_close_unlink;
}
}
if ((fd_flag = fcntl (lf, F_GETFD, 0)) == -1) {
log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't get close-on-exec flag from lock file. "
"Error was %s", strerror (errno));
- err = COROSYNC_DONE_AQUIRE_LOCK;
+ err = COROSYNC_DONE_ACQUIRE_LOCK;
goto error_close_unlink;
}
fd_flag |= FD_CLOEXEC;
if (fcntl (lf, F_SETFD, fd_flag) == -1) {
log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't set close-on-exec flag to lock file. "
"Error was %s", strerror (errno));
- err = COROSYNC_DONE_AQUIRE_LOCK;
+ err = COROSYNC_DONE_ACQUIRE_LOCK;
goto error_close_unlink;
}
return (err);
error_close_unlink:
unlink (lockfile);
error_close:
close (lf);
return (err);
}
int main (int argc, char **argv, char **envp)
{
const char *error_string;
struct totem_config totem_config;
int res, ch;
int background, setprio, testonly;
struct stat stat_out;
enum e_corosync_done flock_err;
uint64_t totem_config_warnings;
struct scheduler_pause_timeout_data scheduler_pause_timeout_data;
/* default configuration
*/
background = 1;
setprio = 1;
testonly = 0;
while ((ch = getopt (argc, argv, "fprtv")) != EOF) {
switch (ch) {
case 'f':
background = 0;
break;
case 'p':
setprio = 0;
break;
case 'r':
setprio = 1;
break;
case 't':
testonly = 1;
break;
case 'v':
printf ("Corosync Cluster Engine, version '%s'\n", VERSION);
printf ("Copyright (c) 2006-2009 Red Hat, Inc.\n");
logsys_system_fini();
return EXIT_SUCCESS;
break;
default:
fprintf(stderr, \
"usage:\n"\
" -f : Start application in foreground.\n"\
" -p : Do not set process priority.\n"\
" -t : Test configuration and exit.\n"\
" -r : Set round robin realtime scheduling (default).\n"\
" -v : Display version and SVN revision of Corosync and exit.\n");
logsys_system_fini();
return EXIT_FAILURE;
}
}
/*
* Set round robin realtime scheduling with priority 99
* Lock all memory to avoid page faults which may interrupt
* application healthchecking
*/
if (setprio) {
corosync_setscheduler ();
}
corosync_mlockall ();
/*
* Other signals are registered later via qb_loop_signal_add
*/
(void)signal (SIGSEGV, sigsegv_handler);
(void)signal (SIGABRT, sigabrt_handler);
#if MSG_NOSIGNAL != 0
(void)signal (SIGPIPE, SIG_IGN);
#endif
if (icmap_init() != CS_OK) {
log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't initialize configuration component.");
corosync_exit_error (COROSYNC_DONE_ICMAP);
}
set_icmap_ro_keys_flag();
/*
* Initialize the corosync_api_v1 definition
*/
api = apidef_get ();
res = coroparse_configparse(icmap_get_global_map(), &error_string);
if (res == -1) {
log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string);
corosync_exit_error (COROSYNC_DONE_MAINCONFIGREAD);
}
res = corosync_log_config_read (&error_string);
if (res == -1) {
/*
* if we are here, we _must_ flush the logsys queue
* and try to inform that we couldn't read the config.
* this is a desperate attempt before certain death
* and there is no guarantee that we can print to stderr
* nor that logsys is sending the messages where we expect.
*/
log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string);
fprintf(stderr, "%s", error_string);
syslog (LOGSYS_LEVEL_ERROR, "%s", error_string);
corosync_exit_error (COROSYNC_DONE_LOGCONFIGREAD);
}
if (!testonly) {
log_printf (LOGSYS_LEVEL_NOTICE, "Corosync Cluster Engine ('%s'): started and ready to provide service.", VERSION);
log_printf (LOGSYS_LEVEL_INFO, "Corosync built-in features:" PACKAGE_FEATURES "");
}
/*
* Make sure required directory is present
*/
res = stat (get_run_dir(), &stat_out);
if ((res == -1) || (res == 0 && !S_ISDIR(stat_out.st_mode))) {
log_printf (LOGSYS_LEVEL_ERROR, "Required directory not present %s. Please create it.", get_run_dir());
corosync_exit_error (COROSYNC_DONE_DIR_NOT_PRESENT);
}
res = chdir(get_run_dir());
if (res == -1) {
log_printf (LOGSYS_LEVEL_ERROR, "Cannot chdir to run directory %s. "
"Please make sure it has correct context and rights.", get_run_dir());
corosync_exit_error (COROSYNC_DONE_DIR_NOT_PRESENT);
}
res = totem_config_read (&totem_config, &error_string, &totem_config_warnings);
if (res == -1) {
log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string);
corosync_exit_error (COROSYNC_DONE_MAINCONFIGREAD);
}
if (totem_config_warnings & TOTEM_CONFIG_WARNING_MEMBERS_IGNORED) {
log_printf (LOGSYS_LEVEL_WARNING, "member section is used together with nodelist. Members ignored.");
}
if (totem_config_warnings & TOTEM_CONFIG_WARNING_MEMBERS_DEPRECATED) {
log_printf (LOGSYS_LEVEL_WARNING, "member section is deprecated.");
}
if (totem_config_warnings & TOTEM_CONFIG_WARNING_TOTEM_NODEID_IGNORED) {
log_printf (LOGSYS_LEVEL_WARNING, "nodeid appears both in totem section and nodelist. Nodelist one is used.");
}
if (totem_config_warnings != 0) {
log_printf (LOGSYS_LEVEL_WARNING, "Please migrate config file to nodelist.");
}
res = totem_config_keyread (&totem_config, &error_string);
if (res == -1) {
log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string);
corosync_exit_error (COROSYNC_DONE_MAINCONFIGREAD);
}
res = totem_config_validate (&totem_config, &error_string);
if (res == -1) {
log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string);
corosync_exit_error (COROSYNC_DONE_MAINCONFIGREAD);
}
if (testonly) {
corosync_exit_error (COROSYNC_DONE_EXIT);
}
ip_version = totem_config.ip_version;
totem_config.totem_memb_ring_id_create_or_load = corosync_ring_id_create_or_load;
totem_config.totem_memb_ring_id_store = corosync_ring_id_store;
totem_config.totem_logging_configuration = totem_logging_configuration;
totem_config.totem_logging_configuration.log_subsys_id = _logsys_subsys_create("TOTEM", "totem,"
"totemmrp.c,totemrrp.c,totemip.c,totemconfig.c,totemcrypto.c,totemsrp.c,"
"totempg.c,totemiba.c,totemudp.c,totemudpu.c,totemnet.c");
totem_config.totem_logging_configuration.log_level_security = LOGSYS_LEVEL_WARNING;
totem_config.totem_logging_configuration.log_level_error = LOGSYS_LEVEL_ERROR;
totem_config.totem_logging_configuration.log_level_warning = LOGSYS_LEVEL_WARNING;
totem_config.totem_logging_configuration.log_level_notice = LOGSYS_LEVEL_NOTICE;
totem_config.totem_logging_configuration.log_level_debug = LOGSYS_LEVEL_DEBUG;
totem_config.totem_logging_configuration.log_level_trace = LOGSYS_LEVEL_TRACE;
totem_config.totem_logging_configuration.log_printf = _logsys_log_printf;
logsys_config_apply();
/*
* Now we are fully initialized.
*/
if (background) {
corosync_tty_detach ();
}
corosync_poll_handle = qb_loop_create ();
memset(&scheduler_pause_timeout_data, 0, sizeof(scheduler_pause_timeout_data));
scheduler_pause_timeout_data.totem_config = &totem_config;
timer_function_scheduler_timeout (&scheduler_pause_timeout_data);
qb_loop_signal_add(corosync_poll_handle, QB_LOOP_LOW,
SIGUSR2, NULL, sig_diag_handler, NULL);
qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH,
SIGINT, NULL, sig_exit_handler, NULL);
qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH,
SIGSEGV, NULL, sig_segv_handler, NULL);
qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH,
SIGABRT, NULL, sig_abrt_handler, NULL);
qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH,
SIGQUIT, NULL, sig_exit_handler, NULL);
qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH,
SIGTERM, NULL, sig_exit_handler, NULL);
if (logsys_thread_start() != 0) {
log_printf (LOGSYS_LEVEL_ERROR, "Can't initialize log thread");
corosync_exit_error (COROSYNC_DONE_LOGCONFIGREAD);
}
if ((flock_err = corosync_flock (corosync_lock_file, getpid ())) != COROSYNC_DONE_EXIT) {
corosync_exit_error (flock_err);
}
/*
* if totempg_initialize doesn't have root priveleges, it cannot
* bind to a specific interface. This only matters if
* there is more then one interface in a system, so
* in this case, only a warning is printed
*/
/*
* Join multicast group and setup delivery
* and configuration change functions
*/
totempg_initialize (
corosync_poll_handle,
&totem_config);
totempg_service_ready_register (
main_service_ready);
totempg_groups_initialize (
&corosync_group_handle,
deliver_fn,
confchg_fn);
totempg_groups_join (
corosync_group_handle,
&corosync_group,
1);
/*
* Drop root privleges to user 'corosync'
* TODO: Don't really need full root capabilities;
* needed capabilities are:
* CAP_NET_RAW (bindtodevice)
* CAP_SYS_NICE (setscheduler)
* CAP_IPC_LOCK (mlockall)
*/
priv_drop ();
schedwrk_init (
serialize_lock,
serialize_unlock);
/*
* Start main processing loop
*/
qb_loop_run (corosync_poll_handle);
/*
* Exit was requested
*/
totempg_finalize ();
/*
* free the loop resources
*/
qb_loop_destroy (corosync_poll_handle);
/*
* free up the icmap
*/
/*
* Remove pid lock file
*/
unlink (corosync_lock_file);
corosync_exit_error (COROSYNC_DONE_EXIT);
return EXIT_SUCCESS;
}
diff --git a/exec/util.h b/exec/util.h
index 9efd35e5..df5eeb40 100644
--- a/exec/util.h
+++ b/exec/util.h
@@ -1,88 +1,88 @@
/*
* Copyright (c) 2002-2004 MontaVista Software, Inc.
* Copyright (c) 2004 Open Source Development Lab
* Copyright (c) 2006-2011 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Steven Dake (sdake@redhat.com), Mark Haverkamp (markh@osdl.org)
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef UTIL_H_DEFINED
#define UTIL_H_DEFINED
#include
#include
/**
* Get the time of day and convert to nanoseconds
*/
extern cs_time_t clust_time_now(void);
enum e_corosync_done {
COROSYNC_DONE_EXIT = 0,
COROSYNC_DONE_FORK = 4,
COROSYNC_DONE_LOGCONFIGREAD = 7,
COROSYNC_DONE_MAINCONFIGREAD = 8,
COROSYNC_DONE_LOGSETUP = 9,
COROSYNC_DONE_ICMAP = 12,
COROSYNC_DONE_INIT_SERVICES = 13,
COROSYNC_DONE_FATAL_ERR = 15,
COROSYNC_DONE_DIR_NOT_PRESENT = 16,
- COROSYNC_DONE_AQUIRE_LOCK = 17,
+ COROSYNC_DONE_ACQUIRE_LOCK = 17,
COROSYNC_DONE_ALREADY_RUNNING = 18,
COROSYNC_DONE_STD_TO_NULL_REDIR = 19,
COROSYNC_DONE_SERVICE_ENGINE_INIT = 20,
COROSYNC_DONE_STORE_RINGID = 21,
COROSYNC_DONE_PLOAD = 99
};
/**
* Compare two names. returns non-zero on match.
*/
extern int name_match(cs_name_t *name1, cs_name_t *name2);
#define corosync_exit_error(err) _corosync_exit_error ((err), __FILE__, __LINE__)
extern void _corosync_exit_error (enum e_corosync_done err, const char *file,
unsigned int line) __attribute__((noreturn));
void _corosync_out_of_memory_error (void) __attribute__((noreturn));
extern char *getcs_name_t (cs_name_t *name);
extern void setcs_name_t (cs_name_t *name, char *str);
extern int cs_name_tisEqual (cs_name_t *str1, char *str2);
/**
* Get the short name of a service from the service_id.
*/
const char * short_service_name_get(uint32_t service_id,
char *buf, size_t buf_size);
/*
* Return run directory (ether COROSYNC_RUN_DIR env or LOCALSTATEDIR/lib/corosync)
*/
const char *get_run_dir(void);
#endif /* UTIL_H_DEFINED */
diff --git a/man/corosync-qdevice.8 b/man/corosync-qdevice.8
index fbeb328b..338806ea 100644
--- a/man/corosync-qdevice.8
+++ b/man/corosync-qdevice.8
@@ -1,318 +1,318 @@
.\"/*
.\" * Copyright (C) 2016 Red Hat, Inc.
.\" *
.\" * All rights reserved.
.\" *
.\" * Author: Jan Friesse
.\" *
.\" * This software licensed under BSD license, the text of which follows:
.\" *
.\" * Redistribution and use in source and binary forms, with or without
.\" * modification, are permitted provided that the following conditions are met:
.\" *
.\" * - Redistributions of source code must retain the above copyright notice,
.\" * this list of conditions and the following disclaimer.
.\" * - Redistributions in binary form must reproduce the above copyright notice,
.\" * this list of conditions and the following disclaimer in the documentation
.\" * and/or other materials provided with the distribution.
.\" * - Neither the name of Red Hat, Inc. nor the names of its
.\" * contributors may be used to endorse or promote products derived from this
.\" * software without specific prior written permission.
.\" *
.\" * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
.\" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
.\" * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
.\" * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
.\" * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
.\" * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
.\" * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
.\" * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
.\" * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
.\" * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
.\" * THE POSSIBILITY OF SUCH DAMAGE.
.\" */
.TH COROSYNC-QDEVICE 8 2016-06-29
.SH NAME
corosync-qdevice \- QDevice daemon
.SH SYNOPSIS
.B "corosync-qdevice [-dfh] [-S option=value[,option2=value2,...]]"
.SH DESCRIPTION
.B corosync-qdevice
is a daemon running on each node of a cluster. It provides a configured
number of votes to the
quorum subsystem based on a third-party arbitrator's decision. Its primary use
is to allow a cluster to sustain more node failures than standard quorum rules allow.
It is recommended for clusters with an even number of nodes and highly recommended
for 2 node clusters.
.SH OPTIONS
.TP
.B -d
Forcefully turn on debug information without the need to change corosync.conf.
.TP
.B -f
Do not daemonize, run in the foreground.
.TP
.B -h
Show short help text
.TP
.B -S
Set advanced settings described in its own section below. This option
shouldn't be generally used because most of the options are
not safe to change.
.SH CONFIGURATION
.B corosync-qdevice
reads its configuration from corosync.conf file.
The main configuration is within
.B quorum.device
sub-key. Each model also has its own configuration within a
similarly named sub-key.
.TP
.B model
Specifies the model to be used. This parameter is required.
.B corosync-qdevice
is modular and is able to support multiple different models. The model basically
defines what type of arbitrator is used. Currently only
.I net
is supported.
.TP
.B timeout
Specifies how often
.B corosync-qdevice
should call the votequorum_poll function. It is also used by the net model to adjust
its hearbeat timeout. It is recommended that you don't change this value.
Default is 10000.
.TP
.B sync_timeout
Specifies how often
.B corosync-qdevice
should call the votequorum_poll function during a sync phase. It is recommended that you don't change this value.
Default is 30000.
.TP
.B votes
The number of votes provided to the cluster by qdevice. Default is (number_of_nodes - 1) or generally
sum(votes_per_node) - 1.
.PP
.B quorum.device.net
holds the configuration for model 'net'.
.TP
.B tls
Can be one of
.I on, off or required
and specifies if tls should be used.
.I on
means a connection with TLS is attempted first, but if the server doesn't advertise TLS support
then non-TLS will be used.
.I off
is used then TLS is not required and it's then not even tried. This mode is the
only one which doesn't need a properly initialized NSS database.
.I required
means TLS is required and if the server doesn't support TLS, qdevice will
exit with error message. Default is on.
.TP
.B host
Specifies the IP address or host name of the qnetd server to be used. This parameter
is required.
.TP
.B port
Specifies TCP port of qnetd server. Default is 5403.
.TP
.B algorithm
Decision algorithm. Can be one of the
.I ffsplit
or
.I lms.
(actually there are also
.I test
and
.I 2nodelms
, both of which are mainly for developers and shouldn't be used for production clusters). For a
description of what each algorithm means and how the algorithms differ see their individual sections.
Default value is ffsplit.
.TP
.B tie_breaker
can be one of
.I lowest,
.I highest
or valid_node_id (number) values. It's used as a fallback if qdevice has to decide between two or more
equal partitions.
.I lowest
means the partition with the lowest node id is chosen.
.I highest
means the partition with highest node id is chosen. And valid_node_id means that the partition
containing the node with the given node id is chosen.
Default is 'lowest'.
.TP
.B connect_timeout
Timeout when
.B corosync-qdevice
is trying to connect to
.B corosync-qnetd
host. Default is 0.8 * quorum.sync_timeout.
.TP
.B force_ip_version
can be one of
.I 0|4|6
and forces the software to use the given IP version.
.I 0
-(default value) means IPv6 is prefered and IPv4 should be used as a fallback.
+(default value) means IPv6 is preferred and IPv4 should be used as a fallback.
.PP
Logging configuration is within the
.B logging
directive.
.B corosync-qdevice
parses and supports most of the options with exception of
.B to_logfile,
.B logfile
and
.B logfile_priority.
The
.B logger_subsys
sub-directive can be also used if
.B subsys
is set to QDEVICE.
.PP
For
.B corosync-qdevice
to work correctly, the
.B nodelist
directive has to be used and properly configured. Also the net model requires that
.B totem.cluster_name
option is set.
.SH MODEL NET TLS CONFIGURATION
For model net to work using TLS, it's necessary to create the NSS database, import Qnetd
CA certificate, and get/distribute a valid client certificate.
If pcs is used (recommended) the following steps are not needed because pcs does them automatically.
.B corosync-qdevice-net-certutil
is the tool to perform required actions semi-automatically. Please consult the help output of
it and its man page. For a first time configuration it may make sense to start with the
.B -Q
option.
If TLS is not required just edit corosync.conf file and set
.B quorum.device.net.tls
to
.I off.
.SH MODEL NET ALGORITHMS
Algorithms are used to change behavior of how
.B corosync-qnetd
provides votes to a given node/partition. Currently there are two algorithms supported.
.TP
.B ffsplit
This one makes sense only for clusters with even number of nodes. It provides exactly one
vote to the partition with the highest number of active nodes. If there are two exactly similar partitions,
it provides its vote to the partition that has the most clients connected to the qnetd
server. If this number is also equal, then the tie_breaker is used. It is able to transition
its vote if the currently active partition becomes partitioned and a non-active partition
still has at least 50% of the active nodes. Because of this, a vote is not provided
if the qnetd connection is not active.
To use this algorithm it's required to set the number of votes per node to 1 (default)
and the qdevice number of votes has to be also 1. This is achieved by setting
.B quorum.device.votes
key in corosync.conf file to 1.
.TP
.B lms
Last-man-standing. If the node is the only one left in the cluster that can see the
qnetd server then we return a vote.
If more than one node can see the qnetd server but some nodes can't
see each other then the cluster is divided up into 'partitions' based on
their ring_id and this algorithm returns a vote to the largest active partition or,
if there is more than 1 equal partiton, the partition that contains the tie_breaker
node (lowest, highest, etc). For LMS to work, the number
of qdevice votes has to be set to default (so just delete
.B quorum.device.votes
key from corosync.conf).
.SH ADVANCED SETTINGS
Set by using
.B -S
option. The default value is shown in parentheses) Options
beginning with
.B net_
prefix are specific to model net.
.TP
.B lock_file
Lock file location. (/var/run/corosync-qdevice/corosync-qdevice.pid)
.TP
.B local_socket_file
Internal IPC socket file location. (/var/run/corosync-qdevice/corosync-qdevice.sock)
.TP
.B local_socket_backlog
Parameter passed to listen syscall. (10)
.TP
.B max_cs_try_again
How many times to retry the call to a corosync function which has returned CS_ERR_TRY_AGAIN. (10)
.TP
.B votequorum_device_name
Name used for qdevice registration. (Qdevice)
.TP
.B ipc_max_clients
Maximum allowed simultaneous IPC clients. (10)
.TP
.B ipc_max_receive_size
Maximum size of a message received by IPC client. (4096)
.TP
.B ipc_max_send_size
Maximum size of a message allowed to be sent to an IPC client. (65536)
.TP
.B master_wins
Force enable/disable master wins. (default is model)
.TP
.B net_nss_db_dir
NSS database directory. (/etc/corosync/qdevice/net/nssdb)
.TP
.B net_initial_msg_receive_size
Initial (used during connection parameters negotiation)
maximum size of the receive buffer for message (maximum
allowed message size received from qnetd). (32768)
.TP
.B net_initial_msg_send_size
Initial (used during connection parameter negotiation)
maximum size of one send buffer (message) to be sent to server. (32768)
.TP
.B net_min_msg_send_size
Minimum required size of one send buffer (message) to be sent to server. (32768)
.TP
.B net_max_msg_receive_size
Maximum allowed size of receive buffer for a message sent by server. (16777216)
.TP
.B net_max_send_buffers
Maximum number of send buffers. (10)
.TP
.B net_nss_qnetd_cn
Canonical name of qnetd server certificate. (Qnetd Server)
.TP
.B net_nss_client_cert_nickname
NSS nickname of qdevice client certificate. (Cluster Cert)
.TP
.B net_heartbeat_interval_min
Minimum heartbeat timeout accepted by client in ms. (1000)
.TP
.B net_heartbeat_interval_max
Maximum heartbeat timeout accepted by client in ms. (120000)
.TP
.B net_min_connect_timeout
Minimum connection timeout accepted by client in ms. (1000)
.TP
.B net_max_connect_timeout
Maximum connection timeout accepted by client in ms. (120000)
.TP
.B net_test_algorithm_enabled
Enable test algorithm. (if built with --enable-debug on, otherwise off)
.SH SEE ALSO
.BR corosync-qdevice-tool (8)
.BR corosync-qdevice-net-certutil (8)
.BR corosync-qnetd (8)
.BR corosync.conf (5)
.SH AUTHOR
Jan Friesse
.PP
diff --git a/man/corosync-qnetd.8 b/man/corosync-qnetd.8
index 856e6e18..fa582243 100644
--- a/man/corosync-qnetd.8
+++ b/man/corosync-qnetd.8
@@ -1,227 +1,227 @@
.\"/*
.\" * Copyright (C) 2016 Red Hat, Inc.
.\" *
.\" * All rights reserved.
.\" *
.\" * Author: Jan Friesse
.\" *
.\" * This software licensed under BSD license, the text of which follows:
.\" *
.\" * Redistribution and use in source and binary forms, with or without
.\" * modification, are permitted provided that the following conditions are met:
.\" *
.\" * - Redistributions of source code must retain the above copyright notice,
.\" * this list of conditions and the following disclaimer.
.\" * - Redistributions in binary form must reproduce the above copyright notice,
.\" * this list of conditions and the following disclaimer in the documentation
.\" * and/or other materials provided with the distribution.
.\" * - Neither the name of Red Hat, Inc. nor the names of its
.\" * contributors may be used to endorse or promote products derived from this
.\" * software without specific prior written permission.
.\" *
.\" * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
.\" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
.\" * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
.\" * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
.\" * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
.\" * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
.\" * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
.\" * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
.\" * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
.\" * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
.\" * THE POSSIBILITY OF SUCH DAMAGE.
.\" */
.TH COROSYNC-QNETD 8 2016-06-29
.SH NAME
corosync-qnetd \- QNet daemon
.SH SYNOPSIS
.B "corosync-qnetd [-46dfhv] [-l listen_addr] [-p listen_port] [-s tls]
.B [-c client_cert_required] [-m max_clients] [-S option=value[,option2=value2,...]]"
.SH DESCRIPTION
.B corosync-qnetd
is a daemon running outside of the cluster with the purpose of providing a vote to the
.B corosync-qdevice
model net. It's designed to support multiple clusters and be almost configuration
and state free. New clusters are handled dynamically and no configuration file exists.
It's also able to run as non-root user - which is recommended. Connection between the
.B corosync-qdevice
model net client can be optionally configured with TLS client certificate checking.
The communication protocol between server and client is designed to be very simple
and allow backwards compatibility.
.SH OPTIONS
.TP
.B -4
and its counterpart
.B -6
are used to force IPv4 or IPv6 communication. The default is to listen on both address families.
.TP
.B -d
Turn on debug logging. By default the messages sent to syslog are purely operational, this
option sends additional debug messages. For even more detail use the
.B -d
parameter twice.
.TP
.B -f
Do not daemonize, run in the foreground.
.TP
.B -h
Show short help text
.TP
.B -v
Show version and supported communication protocol messages/options.
.TP
.B -l
IP address to listen on. By default the daemon listens on all addresses (wildcard).
.TP
.B -p
TCP port to listen on. Default port is 5403.
.TP
.B -s
Determines if TLS should be used and can be one of
.I on/off/required
(the default is
.I on
).
.I on
means TLS is enabled but the client is not required to start TLS,
.I off
means TLS is completely disabled, and
.I required
means TLS is required.
.I on
and
.I required
require the NSS database to be properly initialized by running the
.B corosync-qnetd-certutil
command.
.TP
.B -c
can be set to
.I on/off.
This option only makes sense if TLS is enabled. When
.B -c
is
.I on
a client is required to send its client certificate (default).
.TP
.B -m
Maximum simultaneous clients. The default is 0 which means no limit.
.TP
.B -S
Set advanced settings described in its own section below. This option
shouldn't be generally used because most of the options are
not safe to change.
.SH UNPRIVILEGED USER CONFIGURATION
It's generally recommended to run
.B corosync-qnetd
as a non root user. If you get a package from a distribution its highly
possible that the packager has done all the hard work for you. If the installation
is performed from source code, a few steps have to be taken.
First it's necessary to create an unprivileged user/group. The following commands
can be used (executed as root):
.nf
# groupadd -r coroqnetd
# useradd -r -g coroqnetd -d / -s /sbin/nologin -c "User for corosync-qnetd" coroqnetd
.fi
The next step is to set the correct owner and group on /etc/corosync/qnetd and /var/run/corosync-qnetd
directories.
.nf
# chown -R coroqnetd:coroqnetd /etc/corosync/qnetd /var/run/corosync-qnetd
.fi
Some systems have the /var/run directory on a tmpfs file system which gets discarded after
a reboot. The solution is to use an initscript which takes care of the /var/run/corosync-qnetd
creation and sets the correct owner and permissions. For systems with systemd it's possible
to use a tmpfile.d configuration file (installed by default if systemd is enabled during
corosync compilation).
The last step is to make sure
.B corosync-qnetd
is really executed as an unprivileged user. For initscript systems it's enough to set the
line COROSYNC_QNETD_RUNAS in /etc/(sysconfig|default)/corosync-qnetd file. If the file
is not already installed then use the one provided in the corosync source code
(init/corosync-qnetd.sysconfig.example). For systemd, overwrite/copy the
corosync-qnetd.service unit file and uncomment/change the "User=" directive.
.SH TLS CONFIGURATION
For TLS to work its necessary to create the NSS database. If pcs is used then the following
steps are not needed because pcs does them automatically.
.B corosync-qnetd-certutil
is the tool to perform required actions. Just run:
.nf
# corosync-qnetd-certutil -i
.fi
If TLS is not required then simply edit /etc/(sysconfig|default)/corosync-qnetd or
systemd unit file and add the parameter
.B -s
.I off
in the proper place.
.SH ADVANCED SETTINGS
Set by the
.B -S
-option. The default value is shown in parantheses.
+option. The default value is shown in parentheses.
.TP
.B listen_backlog
Parameter passed to the listen syscall on the network socket. (10)
.TP
.B max_client_send_buffers
Maximum number of send buffers for one client. (32)
.TP
.B max_client_send_size
Maximum size of one send buffer (message) to be sent to a client. (32768)
.TP
.B max_client_receive_size
Maximum size of the receive buffer for a client message (maximum
allowed message size received by client). (32768)
.TP
.B nss_db_dir
NSS database directory. (/etc/corosync/qnetd/nssdb)
.TP
.B cert_nickname
NSS nickname of qnetd server certificate. (QNetd Cert)
.TP
.B heartbeat_interval_min
Minimum heartbeat timeout accepted by server in ms. (1000)
.TP
.B heartbeat_interval_max
Maximum heartbeat timeout accepted by server in ms. (120000)
.TP
.B dpd_enabled
Dead peer detection enabled. (on)
.TP
.B dpd_interval
How often the DPD algorithm detects dead peers in ms. (10000)
.TP
.B lock_file
Lock file location. (/var/run/corosync-qnetd/corosync-qnetd.pid)
.TP
.B local_socket_file
Internal IPC socket file location. (/var/run/corosync-qnetd/corosync-qnetd.sock)
.TP
.B local_socket_backlog
Parameter passed to listen syscall on the local socket. (10)
.TP
.B ipc_max_clients
Maximum allowed simultaneous IPC clients. (10)
.TP
.B ipc_max_receive_size
Maximum size of a message received by IPC client. (4096)
.TP
.B ipc_max_send_size
Maximum size of a message sent to an IPC client. (10485760)
.SH SEE ALSO
.BR corosync-qnetd-tool (8)
.BR corosync-qnetd-certutil (8)
.BR corosync-qdevice (8)
.SH AUTHOR
Jan Friesse
.PP
diff --git a/man/quorum_initialize.3.in b/man/quorum_initialize.3.in
index e55c284a..227c2c08 100644
--- a/man/quorum_initialize.3.in
+++ b/man/quorum_initialize.3.in
@@ -1,113 +1,113 @@
.\"/*
.\" * Copyright (c) 2012 Red Hat, Inc.
.\" *
.\" * All rights reserved.
.\" *
.\" * Author: Fabio M. Di Nitto
.\" *
.\" * This software licensed under BSD license, the text of which follows:
.\" *
.\" * Redistribution and use in source and binary forms, with or without
.\" * modification, are permitted provided that the following conditions are met:
.\" *
.\" * - Redistributions of source code must retain the above copyright notice,
.\" * this list of conditions and the following disclaimer.
.\" * - Redistributions in binary form must reproduce the above copyright notice,
.\" * this list of conditions and the following disclaimer in the documentation
.\" * and/or other materials provided with the distribution.
.\" * - Neither the name of the MontaVista Software, Inc. nor the names of its
.\" * contributors may be used to endorse or promote products derived from this
.\" * software without specific prior written permission.
.\" *
.\" * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
.\" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
.\" * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
.\" * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
.\" * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
.\" * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
.\" * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
.\" * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
.\" * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
.\" * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
.\" * THE POSSIBILITY OF SUCH DAMAGE.
.\" */
.TH QUORUM_INITIALIZE 3 @BUILDDATE@ "corosync Man Page" "Corosync Cluster Engine Programmer's Manual"
.SH NAME
quorum_initialize \- Create a new connection to the Quorum service
.SH SYNOPSIS
.B #include
.sp
.BI "int quorum_initialize(quorum_handle_t *" handle ", quorum_callbacks_t *" callbacks ", uint32_t *" quorum_type ");"
.SH DESCRIPTION
The
.B quorum_initialize
function is used to initialize a connection to the quorum API.
.PP
Each application may have several connections to the quorum API. Each application
uses the
.I handle
argument to uniquely identify the connection. The
.I handle
argument is then used in other function calls to identify the connection to be used
for communication with the quorum service.
.PP
-Every time the voting configuraton changes (eg a node joins or leave the cluster) or the quorum status change,
+Every time the voting configuration changes (eg a node joins or leave the cluster) or the quorum status change,
the callback is called.
The callback function is described by the following type definitions:
.nf
typedef void (*quorum_notification_fn_t) (
quorum_handle_t handle,
uint32_t quorate,
uint64_t ring_seq,
uint32_t view_list_entries,
uint32_t *view_list
);
.fi
.PP
The
.I callbacks
argument is of the type:
.nf
typedef struct {
quorum_notification_fn_t quorum_notify_fn;
} quorum_callbacks_t;
.fi
.PP
The
.I quorum_type
argument is set to:
.nf
#define QUORUM_FREE 0
#define QUORUM_SET 1
.fi
.PP
.I QUORUM_FREE
value means that no quorum algorithm is loaded and that no callbacks will take place.
.PP
.I QUORUM_SET
value means that one quorum algorithm is configured and that callbacks will take place.
.PP
When a configuration change occurs, the callback
is called from the
.B quorum_dispatch()
function.
.PP
.SH RETURN VALUE
This call returns the CS_OK value if successful, otherwise an error is returned.
.PP
.SH ERRORS
@COMMONIPCERRORS@
.SH "SEE ALSO"
.BR quorum_overview (8),
.BR quorum_finalize (3),
.BR quorum_getquorate (3),
.BR quorum_trackstart (3),
.BR quorum_trackstop (3),
.BR quorum_fd_get (3),
.BR quorum_dispatch (3),
.BR quorum_context_set (3),
.BR quorum_context_get (3)
.PP
diff --git a/man/quorum_trackstart.3.in b/man/quorum_trackstart.3.in
index 044a0480..9e3ff3fc 100644
--- a/man/quorum_trackstart.3.in
+++ b/man/quorum_trackstart.3.in
@@ -1,77 +1,77 @@
.\"/*
.\" * Copyright (c) 2012 Red Hat, Inc.
.\" *
.\" * All rights reserved.
.\" *
.\" * Author: Fabio M. Di Nitto
.\" *
.\" * This software licensed under BSD license, the text of which follows:
.\" *
.\" * Redistribution and use in source and binary forms, with or without
.\" * modification, are permitted provided that the following conditions are met:
.\" *
.\" * - Redistributions of source code must retain the above copyright notice,
.\" * this list of conditions and the following disclaimer.
.\" * - Redistributions in binary form must reproduce the above copyright notice,
.\" * this list of conditions and the following disclaimer in the documentation
.\" * and/or other materials provided with the distribution.
.\" * - Neither the name of the MontaVista Software, Inc. nor the names of its
.\" * contributors may be used to endorse or promote products derived from this
.\" * software without specific prior written permission.
.\" *
.\" * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
.\" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
.\" * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
.\" * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
.\" * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
.\" * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
.\" * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
.\" * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
.\" * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
.\" * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
.\" * THE POSSIBILITY OF SUCH DAMAGE.
.\" */
.TH QUORUM_TRACKSTART 3 @BUILDDATE@ "corosync Man Page" "Corosync Cluster Engine Programmer's Manual"
.SH NAME
quorum_trackstart \- Enable callbacks notification.
.SH SYNOPSIS
.B #include
.sp
.BI "int quorum_trackstart(quorum_handle_t *" handle ", unsigned int " flags ");"
.SH DESCRIPTION
The
.B quorum_trackstart
function is used to enable callbacks notification from the quorum API.
.PP
-Every time the voting configuraton changes (eg a node joins or leave the cluster)
+Every time the voting configuration changes (eg a node joins or leave the cluster)
or the quorum status change, the notification is queued.
.PP
The notification is dispatched via
.B quorum_dispatch()
function that will execute the callback.
.PP
The
.I flags
argument is defined by one or more of the following values and values can be bitwise-or'd
.nf
#define CS_TRACK_CURRENT 0x01
#define CS_TRACK_CHANGES 0x02
#define CS_TRACK_CHANGES_ONLY 0x04
.fi
.SH RETURN VALUE
This call returns the CS_OK value if successful, otherwise an error is returned.
.PP
.SH ERRORS
@COMMONIPCERRORS@
.SH "SEE ALSO"
.BR quorum_overview (8),
.BR quorum_initialize (3),
.BR quorum_finalize (3),
.BR quorum_getquorate (3),
.BR quorum_trackstop (3),
.BR quorum_fd_get (3),
.BR quorum_dispatch (3),
.BR quorum_context_set (3),
.BR quorum_context_get (3)
.PP
diff --git a/man/votequorum_trackstart.3.in b/man/votequorum_trackstart.3.in
index b440713c..af3310b0 100644
--- a/man/votequorum_trackstart.3.in
+++ b/man/votequorum_trackstart.3.in
@@ -1,83 +1,83 @@
.\"/*
.\" * Copyright (c) 2012 Red Hat, Inc.
.\" *
.\" * All rights reserved.
.\" *
.\" * Author: Fabio M. Di Nitto
.\" *
.\" * This software licensed under BSD license, the text of which follows:
.\" *
.\" * Redistribution and use in source and binary forms, with or without
.\" * modification, are permitted provided that the following conditions are met:
.\" *
.\" * - Redistributions of source code must retain the above copyright notice,
.\" * this list of conditions and the following disclaimer.
.\" * - Redistributions in binary form must reproduce the above copyright notice,
.\" * this list of conditions and the following disclaimer in the documentation
.\" * and/or other materials provided with the distribution.
.\" * - Neither the name of the MontaVista Software, Inc. nor the names of its
.\" * contributors may be used to endorse or promote products derived from this
.\" * software without specific prior written permission.
.\" *
.\" * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
.\" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
.\" * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
.\" * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
.\" * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
.\" * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
.\" * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
.\" * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
.\" * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
.\" * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
.\" * THE POSSIBILITY OF SUCH DAMAGE.
.\" */
.TH VOTEQUORUM_TRACKSTART 3 @BUILDDATE@ "corosync Man Page" "Corosync Cluster Engine Programmer's Manual"
.SH NAME
votequorum_trackstart \- Enable callbacks notification.
.SH SYNOPSIS
.B #include
.sp
.BI "int votequorum_trackstart(votequorum_handle_t *" handle ", uint64_t " context ", unsigned int " flags ");"
.SH DESCRIPTION
The
.B votequorum_trackstart
function is used to enable callbacks notification from the votequorum API.
.PP
-Every time the voting configuraton changes (eg a node joins or leave the cluster)
+Every time the voting configuration changes (eg a node joins or leave the cluster)
or the quorum status change or the expected votes changes, the notification is queued.
.PP
The notification is dispatched via
.B votequorum_dispatch()
function that will execute the callback.
.PP
The
.I context
option allows one to set a tracking context.
.PP
The
.I flags
argument is defined by one or more of the following values and values can be bitwise-or'd
.nf
#define CS_TRACK_CURRENT 0x01
#define CS_TRACK_CHANGES 0x02
#define CS_TRACK_CHANGES_ONLY 0x04
.fi
.SH RETURN VALUE
This call returns the CS_OK value if successful, otherwise an error is returned.
.PP
.SH ERRORS
@COMMONIPCERRORS@
.SH "SEE ALSO"
.BR votequorum_overview (8),
.BR votequorum_initialize (3),
.BR votequorum_finalize (3),
.BR votequorum_getinfo (3),
.BR votequorum_trackstop (3),
.BR votequorum_fd_get (3),
.BR votequorum_dispatch (3),
.BR votequorum_context_set (3),
.BR votequorum_context_get (3),
.BR votequorum_setexpected (3),
.BR votequorum_setvotes (3)
.PP
diff --git a/qdevices/corosync-qdevice.c b/qdevices/corosync-qdevice.c
index fd932f10..9916c69b 100644
--- a/qdevices/corosync-qdevice.c
+++ b/qdevices/corosync-qdevice.c
@@ -1,261 +1,261 @@
/*
* Copyright (c) 2015-2016 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Jan Friesse (jfriesse@redhat.com)
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the Red Hat, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include
#include
#include "dynar.h"
#include "dynar-str.h"
#include "dynar-getopt-lex.h"
#include "qdevice-advanced-settings.h"
#include "qdevice-config.h"
#include "qdevice-cmap.h"
#include "qdevice-ipc.h"
#include "qdevice-log.h"
#include "qdevice-model.h"
#include "qdevice-votequorum.h"
#include "utils.h"
struct qdevice_instance *global_instance;
static void
signal_int_handler(int sig)
{
qdevice_log(LOG_DEBUG, "SIGINT received - closing local unix socket");
qdevice_ipc_close(global_instance);
}
static void
signal_term_handler(int sig)
{
qdevice_log(LOG_DEBUG, "SIGTERM received - closing server socket");
qdevice_ipc_close(global_instance);
}
static void
signal_handlers_register(void)
{
struct sigaction act;
act.sa_handler = signal_int_handler;
sigemptyset(&act.sa_mask);
act.sa_flags = SA_RESTART;
sigaction(SIGINT, &act, NULL);
act.sa_handler = signal_term_handler;
sigemptyset(&act.sa_mask);
act.sa_flags = SA_RESTART;
sigaction(SIGTERM, &act, NULL);
}
static void
usage(void)
{
printf("usage: %s [-dfh] [-S option=value[,option2=value2,...]]\n", QDEVICE_PROGRAM_NAME);
}
static void
cli_parse_long_opt(struct qdevice_advanced_settings *advanced_settings, const char *long_opt)
{
struct dynar_getopt_lex lex;
struct dynar dynar_long_opt;
const char *opt;
const char *val;
int res;
dynar_init(&dynar_long_opt, strlen(long_opt) + 1);
if (dynar_str_cpy(&dynar_long_opt, long_opt) != 0) {
errx(1, "Can't alloc memory for long option");
}
dynar_getopt_lex_init(&lex, &dynar_long_opt);
while (dynar_getopt_lex_token_next(&lex) == 0 && strcmp(dynar_data(&lex.option), "") != 0) {
opt = dynar_data(&lex.option);
val = dynar_data(&lex.value);
res = qdevice_advanced_settings_set(advanced_settings, opt, val);
switch (res) {
case -1:
errx(1, "Unknown option '%s'", opt);
break;
case -2:
errx(1, "Invalid value '%s' for option '%s'", val, opt);
break;
}
}
dynar_getopt_lex_destroy(&lex);
dynar_destroy(&dynar_long_opt);
}
static void
cli_parse(int argc, char * const argv[], int *foreground, int *force_debug,
struct qdevice_advanced_settings *advanced_settings)
{
int ch;
*foreground = 0;
*force_debug = 0;
while ((ch = getopt(argc, argv, "dfhS:")) != -1) {
switch (ch) {
case 'd':
*force_debug = 1;
break;
case 'f':
*foreground = 1;
break;
case 'S':
cli_parse_long_opt(advanced_settings, optarg);
break;
case 'h':
case '?':
usage();
exit(1);
break;
}
}
}
int
main(int argc, char * const argv[])
{
struct qdevice_instance instance;
struct qdevice_advanced_settings advanced_settings;
int foreground;
int force_debug;
int lock_file;
int another_instance_running;
if (qdevice_advanced_settings_init(&advanced_settings) != 0) {
errx(1, "Can't alloc memory for advanced settings");
}
cli_parse(argc, argv, &foreground, &force_debug, &advanced_settings);
qdevice_instance_init(&instance, &advanced_settings);
qdevice_cmap_init(&instance);
qdevice_log_init(&instance, force_debug);
/*
* Daemonize
*/
if (!foreground) {
utils_tty_detach();
}
if ((lock_file = utils_flock(advanced_settings.lock_file, getpid(),
&another_instance_running)) == -1) {
if (another_instance_running) {
qdevice_log(LOG_ERR, "Another instance is running");
} else {
- qdevice_log_err(LOG_ERR, "Can't aquire lock");
+ qdevice_log_err(LOG_ERR, "Can't acquire lock");
}
exit(1);
}
qdevice_log(LOG_DEBUG, "Initializing votequorum");
qdevice_votequorum_init(&instance);
qdevice_log(LOG_DEBUG, "Initializing local socket");
if (qdevice_ipc_init(&instance) != 0) {
return (1);
}
qdevice_log(LOG_DEBUG, "Registering qdevice models");
qdevice_model_register_all();
qdevice_log(LOG_DEBUG, "Configuring qdevice");
if (qdevice_instance_configure_from_cmap(&instance) != 0) {
return (1);
}
qdevice_log(LOG_DEBUG, "Configuring master_wins");
if (qdevice_votequorum_master_wins(&instance, (advanced_settings.master_wins ==
QDEVICE_ADVANCED_SETTINGS_MASTER_WINS_FORCE_ON ? 1 : 0)) != 0) {
return (1);
}
qdevice_log(LOG_DEBUG, "Getting configuration node list");
if (qdevice_cmap_store_config_node_list(&instance) != 0) {
return (1);
}
qdevice_log(LOG_DEBUG, "Initializing qdevice model");
if (qdevice_model_init(&instance) != 0) {
return (1);
}
qdevice_log(LOG_DEBUG, "Initializing cmap tracking");
if (qdevice_cmap_add_track(&instance) != 0) {
return (1);
}
qdevice_log(LOG_DEBUG, "Waiting for ring id");
if (qdevice_votequorum_wait_for_ring_id(&instance) != 0) {
return (1);
}
global_instance = &instance;
signal_handlers_register();
qdevice_log(LOG_DEBUG, "Running qdevice model");
if (qdevice_model_run(&instance) != 0) {
return (1);
}
qdevice_log(LOG_DEBUG, "Removing cmap tracking");
if (qdevice_cmap_del_track(&instance) != 0) {
return (1);
}
qdevice_log(LOG_DEBUG, "Destorying qdevice model");
qdevice_model_destroy(&instance);
qdevice_ipc_destroy(&instance);
qdevice_votequorum_destroy(&instance);
qdevice_cmap_destroy(&instance);
qdevice_log_close(&instance);
qdevice_instance_destroy(&instance);
qdevice_advanced_settings_destroy(&advanced_settings);
return (0);
}
diff --git a/qdevices/qdevice-instance.c b/qdevices/qdevice-instance.c
index 862e0029..75bd5372 100644
--- a/qdevices/qdevice-instance.c
+++ b/qdevices/qdevice-instance.c
@@ -1,99 +1,99 @@
/*
* Copyright (c) 2015-2016 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Jan Friesse (jfriesse@redhat.com)
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the Red Hat, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "qdevice-instance.h"
#include "qdevice-log.h"
#include "qdevice-model.h"
int
qdevice_instance_init(struct qdevice_instance *instance,
const struct qdevice_advanced_settings *advanced_settings)
{
memset(instance, 0, sizeof(*instance));
node_list_init(&instance->config_node_list);
instance->vq_last_poll = ((time_t) -1);
instance->advanced_settings = advanced_settings;
return (0);
}
int
qdevice_instance_destroy(struct qdevice_instance *instance)
{
node_list_free(&instance->config_node_list);
return (0);
}
int
qdevice_instance_configure_from_cmap(struct qdevice_instance *instance)
{
char *str;
if (cmap_get_string(instance->cmap_handle, "quorum.device.model", &str) != CS_OK) {
qdevice_log(LOG_ERR, "Can't read quorum.device.model cmap key.");
return (-1);
}
if (qdevice_model_str_to_type(str, &instance->model_type) != 0) {
qdevice_log(LOG_ERR, "Configured device model %s is not supported.", str);
free(str);
return (-1);
}
free(str);
if (cmap_get_uint32(instance->cmap_handle, "runtime.votequorum.this_node_id",
&instance->node_id) != CS_OK) {
- qdevice_log(LOG_ERR, "Unable to retrive this node nodeid.");
+ qdevice_log(LOG_ERR, "Unable to retrieve this node nodeid.");
return (-1);
}
if (cmap_get_uint32(instance->cmap_handle, "quorum.device.timeout", &instance->heartbeat_interval) != CS_OK) {
instance->heartbeat_interval = VOTEQUORUM_QDEVICE_DEFAULT_TIMEOUT;
}
if (cmap_get_uint32(instance->cmap_handle, "quorum.device.sync_timeout",
&instance->sync_heartbeat_interval) != CS_OK) {
instance->sync_heartbeat_interval = VOTEQUORUM_QDEVICE_DEFAULT_SYNC_TIMEOUT;
}
return (0);
}
diff --git a/qdevices/qdevice-net-msg-received.c b/qdevices/qdevice-net-msg-received.c
index b5c2f219..f9148305 100644
--- a/qdevices/qdevice-net-msg-received.c
+++ b/qdevices/qdevice-net-msg-received.c
@@ -1,942 +1,942 @@
/*
* Copyright (c) 2015-2016 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Jan Friesse (jfriesse@redhat.com)
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the Red Hat, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "qdevice-log.h"
#include "qdevice-net-algorithm.h"
#include "qdevice-net-cast-vote-timer.h"
#include "qdevice-net-msg-received.h"
#include "qdevice-net-send.h"
#include "qdevice-net-votequorum.h"
#include "qdevice-net-echo-request-timer.h"
#include "msg.h"
#include "utils.h"
/*
* -1 - Incompatible tls combination
* 0 - Don't use TLS
* 1 - Use TLS
*/
static int
qdevice_net_msg_received_check_tls_compatibility(enum tlv_tls_supported server_tls,
enum tlv_tls_supported client_tls)
{
int res;
res = -1;
switch (server_tls) {
case TLV_TLS_UNSUPPORTED:
switch (client_tls) {
case TLV_TLS_UNSUPPORTED: res = 0; break;
case TLV_TLS_SUPPORTED: res = 0; break;
case TLV_TLS_REQUIRED: res = -1; break;
}
break;
case TLV_TLS_SUPPORTED:
switch (client_tls) {
case TLV_TLS_UNSUPPORTED: res = 0; break;
case TLV_TLS_SUPPORTED: res = 1; break;
case TLV_TLS_REQUIRED: res = 1; break;
}
break;
case TLV_TLS_REQUIRED:
switch (client_tls) {
case TLV_TLS_UNSUPPORTED: res = -1; break;
case TLV_TLS_SUPPORTED: res = 1; break;
case TLV_TLS_REQUIRED: res = 1; break;
}
break;
}
return (res);
}
static void
qdevice_net_msg_received_log_msg_decode_error(int ret)
{
switch (ret) {
case -1:
qdevice_log(LOG_WARNING, "Received message with option with invalid length");
break;
case -2:
qdevice_log(LOG_CRIT, "Can't allocate memory");
break;
case -3:
qdevice_log(LOG_WARNING, "Received inconsistent msg (tlv len > msg size)");
break;
case -4:
qdevice_log(LOG_ERR, "Received message with option with invalid value");
break;
default:
- qdevice_log(LOG_ERR, "Unknown error occured when decoding message");
+ qdevice_log(LOG_ERR, "Unknown error occurred when decoding message");
break;
}
}
static int
qdevice_net_msg_received_unexpected_msg(struct qdevice_net_instance *instance,
const struct msg_decoded *msg, const char *msg_str)
{
qdevice_log(LOG_ERR, "Received unexpected %s message. Disconnecting from server",
msg_str);
instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_UNEXPECTED_MSG;
return (-1);
}
static int
qdevice_net_msg_received_init(struct qdevice_net_instance *instance,
const struct msg_decoded *msg)
{
return (qdevice_net_msg_received_unexpected_msg(instance, msg, "init"));
}
static int
qdevice_net_msg_received_preinit(struct qdevice_net_instance *instance,
const struct msg_decoded *msg)
{
return (qdevice_net_msg_received_unexpected_msg(instance, msg, "preinit"));
}
static int
qdevice_net_msg_check_seq_number(struct qdevice_net_instance *instance,
const struct msg_decoded *msg)
{
if (!msg->seq_number_set || msg->seq_number != instance->last_msg_seq_num) {
qdevice_log(LOG_ERR, "Received message doesn't contain seq_number or "
"it's not expected one.");
return (-1);
}
return (0);
}
static int
qdevice_net_msg_received_preinit_reply(struct qdevice_net_instance *instance,
const struct msg_decoded *msg)
{
int res;
struct send_buffer_list_entry *send_buffer;
qdevice_log(LOG_DEBUG, "Received preinit reply msg");
if (instance->state != QDEVICE_NET_INSTANCE_STATE_WAITING_PREINIT_REPLY) {
qdevice_log(LOG_ERR, "Received unexpected preinit reply message. "
"Disconnecting from server");
instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_UNEXPECTED_MSG;
return (-1);
}
if (qdevice_net_msg_check_seq_number(instance, msg) != 0) {
instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_REQUIRED_OPTION_MISSING;
return (-1);
}
/*
* Check TLS support
*/
if (!msg->tls_supported_set || !msg->tls_client_cert_required_set) {
qdevice_log(LOG_ERR, "Required tls_supported or tls_client_cert_required "
"option is unset");
instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_REQUIRED_OPTION_MISSING;
return (-1);
}
res = qdevice_net_msg_received_check_tls_compatibility(msg->tls_supported, instance->tls_supported);
if (res == -1) {
qdevice_log(LOG_ERR, "Incompatible tls configuration (server %u client %u)",
msg->tls_supported, instance->tls_supported);
instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_INCOMPATIBLE_TLS;
return (-1);
} else if (res == 1) {
/*
* Start TLS
*/
send_buffer = send_buffer_list_get_new(&instance->send_buffer_list);
if (send_buffer == NULL) {
qdevice_log(LOG_ERR, "Can't allocate send list buffer for "
"starttls msg");
instance->disconnect_reason =
QDEVICE_NET_DISCONNECT_REASON_CANT_ALLOCATE_MSG_BUFFER;
return (-1);
}
instance->last_msg_seq_num++;
if (msg_create_starttls(&send_buffer->buffer, 1,
instance->last_msg_seq_num) == 0) {
qdevice_log(LOG_ERR, "Can't allocate send buffer for starttls msg");
instance->disconnect_reason =
QDEVICE_NET_DISCONNECT_REASON_CANT_ALLOCATE_MSG_BUFFER;
send_buffer_list_discard_new(&instance->send_buffer_list, send_buffer);
return (-1);
}
send_buffer_list_put(&instance->send_buffer_list, send_buffer);
instance->state = QDEVICE_NET_INSTANCE_STATE_WAITING_STARTTLS_BEING_SENT;
} else if (res == 0) {
if (qdevice_net_send_init(instance) != 0) {
instance->disconnect_reason =
QDEVICE_NET_DISCONNECT_REASON_CANT_ALLOCATE_MSG_BUFFER;
return (-1);
}
}
return (0);
}
static int
qdevice_net_msg_received_init_reply(struct qdevice_net_instance *instance,
const struct msg_decoded *msg)
{
size_t zi;
int res;
int send_config_node_list;
int send_membership_node_list;
int send_quorum_node_list;
enum tlv_vote vote;
struct tlv_ring_id tlv_rid;
enum tlv_quorate quorate;
qdevice_log(LOG_DEBUG, "Received init reply msg");
if (instance->state != QDEVICE_NET_INSTANCE_STATE_WAITING_INIT_REPLY) {
qdevice_log(LOG_ERR, "Received unexpected init reply message. "
"Disconnecting from server");
instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_UNEXPECTED_MSG;
return (-1);
}
if (qdevice_net_msg_check_seq_number(instance, msg) != 0) {
instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_REQUIRED_OPTION_MISSING;
return (-1);
}
if (!msg->reply_error_code_set) {
qdevice_log(LOG_ERR, "Received init reply message without error code."
"Disconnecting from server");
instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_REQUIRED_OPTION_MISSING;
return (-1);
}
if (msg->reply_error_code != TLV_REPLY_ERROR_CODE_NO_ERROR) {
qdevice_log(LOG_ERR, "Received init reply message with error code %"PRIu16". "
"Disconnecting from server", msg->reply_error_code);
if (msg->reply_error_code == TLV_REPLY_ERROR_CODE_DUPLICATE_NODE_ID) {
qdevice_log(LOG_ERR, "Duplicate node id may be result of server not yet "
"accepted this node disconnect. Retry again.");
instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_SERVER_SENT_DUPLICATE_NODE_ID_ERROR;
} else if (msg->reply_error_code == TLV_REPLY_ERROR_CODE_TIE_BREAKER_DIFFERS_FROM_OTHER_NODES) {
qdevice_log(LOG_ERR, "Configured tie-breaker differs in cluster. This may be "
"result of server not yet accepted this node disconnect. Retry again.");
instance->disconnect_reason =
QDEVICE_NET_DISCONNECT_REASON_SERVER_SENT_TIE_BREAKER_DIFFERS_FROM_OTHER_NODES_ERROR;
} else if (msg->reply_error_code == TLV_REPLY_ERROR_CODE_ALGORITHM_DIFFERS_FROM_OTHER_NODES) {
qdevice_log(LOG_ERR, "Configured algorithm differs in cluster. This may be "
"result of server not yet accepted this node disconnect. Retry again.");
instance->disconnect_reason =
QDEVICE_NET_DISCONNECT_REASON_SERVER_SENT_ALGORITHM_DIFFERS_FROM_OTHER_NODES_ERROR;
} else {
instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_SERVER_SENT_ERROR;
}
return (-1);
}
if (!msg->server_maximum_request_size_set || !msg->server_maximum_reply_size_set) {
qdevice_log(LOG_ERR, "Required maximum_request_size or maximum_reply_size "
"option is unset");
instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_REQUIRED_OPTION_MISSING;
return (-1);
}
if (msg->supported_messages == NULL || msg->supported_options == NULL) {
qdevice_log(LOG_ERR, "Required supported messages or supported options "
"option is unset");
instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_REQUIRED_OPTION_MISSING;
return (-1);
}
if (msg->supported_decision_algorithms == NULL) {
qdevice_log(LOG_ERR, "Required supported decision algorithms option is unset");
instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_REQUIRED_OPTION_MISSING;
return (-1);
}
if (msg->server_maximum_request_size < instance->advanced_settings->net_min_msg_send_size) {
qdevice_log(LOG_ERR,
"Server accepts maximum %zu bytes message but this client minimum "
"is %zu bytes.", msg->server_maximum_request_size,
instance->advanced_settings->net_min_msg_send_size);
instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_INCOMPATIBLE_MSG_SIZE;
return (-1);
}
if (msg->server_maximum_reply_size > instance->advanced_settings->net_max_msg_receive_size) {
qdevice_log(LOG_ERR,
"Server may send message up to %zu bytes message but this client maximum "
"is %zu bytes.", msg->server_maximum_reply_size,
instance->advanced_settings->net_max_msg_receive_size);
instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_INCOMPATIBLE_MSG_SIZE;
return (-1);
}
/*
* Change buffer sizes
*/
dynar_set_max_size(&instance->receive_buffer, msg->server_maximum_reply_size);
send_buffer_list_set_max_buffer_size(&instance->send_buffer_list,
msg->server_maximum_request_size);
/*
* Check if server supports decision algorithm we need
*/
res = 0;
for (zi = 0; zi < msg->no_supported_decision_algorithms && !res; zi++) {
if (msg->supported_decision_algorithms[zi] == instance->decision_algorithm) {
res = 1;
}
}
if (!res) {
qdevice_log(LOG_ERR, "Server doesn't support required decision algorithm");
instance->disconnect_reason =
QDEVICE_NET_DISCONNECT_REASON_SERVER_DOESNT_SUPPORT_REQUIRED_ALGORITHM;
return (-1);
}
/*
* Finally fully connected so it's possible to remove connection timer
*/
if (instance->connect_timer != NULL) {
timer_list_delete(&instance->main_timer_list, instance->connect_timer);
instance->connect_timer = NULL;
}
/*
* Server accepted heartbeat interval -> schedule regular sending of echo request
*/
qdevice_net_echo_request_timer_schedule(instance);
send_config_node_list = 1;
send_membership_node_list = 1;
send_quorum_node_list = 1;
vote = TLV_VOTE_WAIT_FOR_REPLY;
if (qdevice_net_algorithm_connected(instance, &send_config_node_list, &send_membership_node_list,
&send_quorum_node_list, &vote) != 0) {
qdevice_log(LOG_DEBUG, "Algorithm returned error. Disconnecting.");
instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_ALGO_CONNECTED_ERR;
return (-1);
} else {
qdevice_log(LOG_DEBUG, "Algorithm decided to %s config node list, %s membership "
"node list, %s quorum node list and result vote is %s",
(send_config_node_list ? "send" : "not send"),
(send_membership_node_list ? "send" : "not send"),
(send_quorum_node_list ? "send" : "not send"),
tlv_vote_to_str(vote));
}
/*
* Now we can finally really send node list, votequorum node list and update timer
*/
if (send_config_node_list) {
if (qdevice_net_send_config_node_list(instance,
&instance->qdevice_instance_ptr->config_node_list,
instance->qdevice_instance_ptr->config_node_list_version_set,
instance->qdevice_instance_ptr->config_node_list_version, 1) != 0) {
instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_CANT_ALLOCATE_MSG_BUFFER;
return (-1);
}
}
if (send_membership_node_list) {
qdevice_net_votequorum_ring_id_to_tlv(&tlv_rid,
&instance->qdevice_instance_ptr->vq_node_list_ring_id);
if (qdevice_net_send_membership_node_list(instance, &tlv_rid,
instance->qdevice_instance_ptr->vq_node_list_entries,
instance->qdevice_instance_ptr->vq_node_list) != 0) {
instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_CANT_ALLOCATE_MSG_BUFFER;
return (-1);
}
}
if (send_quorum_node_list) {
quorate = (instance->qdevice_instance_ptr->vq_quorum_quorate ?
TLV_QUORATE_QUORATE : TLV_QUORATE_INQUORATE);
if (qdevice_net_send_quorum_node_list(instance,
quorate,
instance->qdevice_instance_ptr->vq_quorum_node_list_entries,
instance->qdevice_instance_ptr->vq_quorum_node_list) != 0) {
instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_CANT_ALLOCATE_MSG_BUFFER;
return (-1);
}
}
if (qdevice_net_cast_vote_timer_update(instance, vote) != 0) {
qdevice_log(LOG_CRIT, "qdevice_net_msg_received_set_option_reply fatal error. "
" Can't update cast vote timer vote");
instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_CANT_SCHEDULE_VOTING_TIMER;
}
instance->state = QDEVICE_NET_INSTANCE_STATE_WAITING_VOTEQUORUM_CMAP_EVENTS;
instance->connected_since_time = time(NULL);
return (0);
}
static int
qdevice_net_msg_received_starttls(struct qdevice_net_instance *instance,
const struct msg_decoded *msg)
{
return (qdevice_net_msg_received_unexpected_msg(instance, msg, "starttls"));
}
static int
qdevice_net_msg_received_server_error(struct qdevice_net_instance *instance,
const struct msg_decoded *msg)
{
if (!msg->reply_error_code_set) {
qdevice_log(LOG_ERR, "Received server error without error code set. "
"Disconnecting from server");
instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_REQUIRED_OPTION_MISSING;
} else {
qdevice_log(LOG_ERR, "Received server error %"PRIu16". "
"Disconnecting from server", msg->reply_error_code);
instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_SERVER_SENT_ERROR;
}
return (-1);
}
static int
qdevice_net_msg_received_set_option(struct qdevice_net_instance *instance,
const struct msg_decoded *msg)
{
return (qdevice_net_msg_received_unexpected_msg(instance, msg, "set option"));
}
static int
qdevice_net_msg_received_set_option_reply(struct qdevice_net_instance *instance,
const struct msg_decoded *msg)
{
if (instance->state != QDEVICE_NET_INSTANCE_STATE_WAITING_VOTEQUORUM_CMAP_EVENTS) {
qdevice_log(LOG_ERR, "Received unexpected set option reply message. "
"Disconnecting from server");
instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_UNEXPECTED_MSG;
return (-1);
}
if (qdevice_net_msg_check_seq_number(instance, msg) != 0) {
instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_REQUIRED_OPTION_MISSING;
return (-1);
}
qdevice_net_echo_request_timer_schedule(instance);
return (0);
}
static int
qdevice_net_msg_received_echo_request(struct qdevice_net_instance *instance,
const struct msg_decoded *msg)
{
return (qdevice_net_msg_received_unexpected_msg(instance, msg, "echo request"));
}
static int
qdevice_net_msg_received_echo_reply(struct qdevice_net_instance *instance,
const struct msg_decoded *msg)
{
if (!msg->seq_number_set) {
qdevice_log(LOG_ERR, "Received echo reply message doesn't contain seq_number.");
instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_REQUIRED_OPTION_MISSING;
return (-1);
}
if (msg->seq_number != instance->echo_request_expected_msg_seq_num) {
qdevice_log(LOG_WARNING, "Received echo reply message seq_number is not expected one.");
}
if (qdevice_net_algorithm_echo_reply_received(instance, msg->seq_number,
msg->seq_number == instance->echo_request_expected_msg_seq_num) != 0) {
qdevice_log(LOG_DEBUG, "Algorithm returned error. Disconnecting");
instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_ALGO_ECHO_REPLY_RECEIVED_ERR;
return (-1);
}
instance->echo_reply_received_msg_seq_num = msg->seq_number;
instance->last_echo_reply_received_time = time(NULL);
return (0);
}
static int
qdevice_net_msg_received_node_list(struct qdevice_net_instance *instance,
const struct msg_decoded *msg)
{
return (qdevice_net_msg_received_unexpected_msg(instance, msg, "node list"));
}
static int
qdevice_net_msg_received_node_list_reply(struct qdevice_net_instance *instance,
const struct msg_decoded *msg)
{
const char *str;
enum tlv_vote result_vote;
int res;
int case_processed;
int ring_id_is_valid;
if (instance->state != QDEVICE_NET_INSTANCE_STATE_WAITING_VOTEQUORUM_CMAP_EVENTS) {
qdevice_log(LOG_ERR, "Received unexpected node list reply message. "
"Disconnecting from server");
instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_UNEXPECTED_MSG;
return (-1);
}
if (!msg->vote_set || !msg->seq_number_set || !msg->node_list_type_set) {
qdevice_log(LOG_ERR, "Received node list reply message without "
"required options. Disconnecting from server");
instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_REQUIRED_OPTION_MISSING;
return (-1);
}
if (!msg->ring_id_set) {
qdevice_log(LOG_ERR, "Received node list reply message "
"without ring id set. Disconnecting from server");
instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_REQUIRED_OPTION_MISSING;
return (-1);
}
str = NULL;
switch (msg->node_list_type) {
case TLV_NODE_LIST_TYPE_INITIAL_CONFIG: str = "initial config"; break;
case TLV_NODE_LIST_TYPE_CHANGED_CONFIG: str = "changed config"; break;
case TLV_NODE_LIST_TYPE_MEMBERSHIP: str ="membership"; break;
case TLV_NODE_LIST_TYPE_QUORUM: str ="quorum"; break;
/*
* Default is not defined intentionally. Compiler shows warning when new node list type
* is added
*/
}
if (str == NULL) {
qdevice_log(LOG_CRIT, "qdevice_net_msg_received_node_list_reply fatal error. "
"Unhandled node_list_type (debug output)");
exit(1);
}
qdevice_log(LOG_DEBUG, "Received %s node list reply", str);
qdevice_log(LOG_DEBUG, " seq = "UTILS_PRI_MSG_SEQ, msg->seq_number);
qdevice_log(LOG_DEBUG, " vote = %s", tlv_vote_to_str(msg->vote));
qdevice_log(LOG_DEBUG, " ring id = ("UTILS_PRI_RING_ID")",
msg->ring_id.node_id, msg->ring_id.seq);
/*
* Call algorithm
*/
result_vote = msg->vote;
if (!tlv_ring_id_eq(&msg->ring_id, &instance->last_sent_ring_id)) {
ring_id_is_valid = 0;
qdevice_log(LOG_DEBUG, "Received node list reply with old ring id.");
} else {
ring_id_is_valid = 1;
}
case_processed = 0;
switch (msg->node_list_type) {
case TLV_NODE_LIST_TYPE_INITIAL_CONFIG:
case TLV_NODE_LIST_TYPE_CHANGED_CONFIG:
case_processed = 1;
res = qdevice_net_algorithm_config_node_list_reply_received(instance,
msg->seq_number, (msg->node_list_type == TLV_NODE_LIST_TYPE_INITIAL_CONFIG),
&msg->ring_id, ring_id_is_valid, &result_vote);
break;
case TLV_NODE_LIST_TYPE_MEMBERSHIP:
case_processed = 1;
res = qdevice_net_algorithm_membership_node_list_reply_received(instance,
msg->seq_number, &msg->ring_id, ring_id_is_valid, &result_vote);
break;
case TLV_NODE_LIST_TYPE_QUORUM:
case_processed = 1;
res = qdevice_net_algorithm_quorum_node_list_reply_received(instance,
msg->seq_number, &msg->ring_id, ring_id_is_valid, &result_vote);
break;
/*
* Default is not defined intentionally. Compiler shows warning when new node list type
* is added
*/
}
if (!case_processed) {
qdevice_log(LOG_CRIT, "qdevice_net_msg_received_node_list_reply fatal error. "
"Unhandled node_list_type (algorithm call)");
exit(1);
}
if (res != 0) {
qdevice_log(LOG_DEBUG, "Algorithm returned error. Disconnecting.");
instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_ALGO_NODE_LIST_REPLY_ERR;
return (-1);
} else {
qdevice_log(LOG_DEBUG, "Algorithm result vote is %s", tlv_vote_to_str(result_vote));
}
if (qdevice_net_cast_vote_timer_update(instance, result_vote) != 0) {
instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_CANT_SCHEDULE_VOTING_TIMER;
return (-1);
}
return (0);
}
static int
qdevice_net_msg_received_ask_for_vote(struct qdevice_net_instance *instance,
const struct msg_decoded *msg)
{
return (qdevice_net_msg_received_unexpected_msg(instance, msg, "ask for vote"));
}
static int
qdevice_net_msg_received_ask_for_vote_reply(struct qdevice_net_instance *instance,
const struct msg_decoded *msg)
{
enum tlv_vote result_vote;
int ring_id_is_valid;
if (instance->state != QDEVICE_NET_INSTANCE_STATE_WAITING_VOTEQUORUM_CMAP_EVENTS) {
qdevice_log(LOG_ERR, "Received unexpected ask for vote reply message. "
"Disconnecting from server");
instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_UNEXPECTED_MSG;
return (-1);
}
if (!msg->vote_set || !msg->seq_number_set || !msg->ring_id_set) {
qdevice_log(LOG_ERR, "Received node list reply message without "
"required options. Disconnecting from server");
instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_REQUIRED_OPTION_MISSING;
return (-1);
}
qdevice_log(LOG_DEBUG, "Received ask for vote reply");
qdevice_log(LOG_DEBUG, " seq = "UTILS_PRI_MSG_SEQ, msg->seq_number);
qdevice_log(LOG_DEBUG, " vote = %s", tlv_vote_to_str(msg->vote));
qdevice_log(LOG_DEBUG, " ring id = ("UTILS_PRI_RING_ID")",
msg->ring_id.node_id, msg->ring_id.seq);
result_vote = msg->vote;
if (!tlv_ring_id_eq(&msg->ring_id, &instance->last_sent_ring_id)) {
ring_id_is_valid = 0;
qdevice_log(LOG_DEBUG, "Received ask for vote reply with old ring id.");
} else {
ring_id_is_valid = 1;
}
if (qdevice_net_algorithm_ask_for_vote_reply_received(instance, msg->seq_number,
&msg->ring_id, ring_id_is_valid, &result_vote) != 0) {
qdevice_log(LOG_DEBUG, "Algorithm returned error. Disconnecting.");
instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_ALGO_ASK_FOR_VOTE_REPLY_ERR;
return (-1);
} else {
qdevice_log(LOG_DEBUG, "Algorithm result vote is %s", tlv_vote_to_str(result_vote));
}
if (qdevice_net_cast_vote_timer_update(instance, result_vote) != 0) {
instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_CANT_SCHEDULE_VOTING_TIMER;
return (-1);
}
return (0);
}
static int
qdevice_net_msg_received_vote_info(struct qdevice_net_instance *instance,
const struct msg_decoded *msg)
{
struct send_buffer_list_entry *send_buffer;
enum tlv_vote result_vote;
int ring_id_is_valid;
if (instance->state != QDEVICE_NET_INSTANCE_STATE_WAITING_VOTEQUORUM_CMAP_EVENTS) {
qdevice_log(LOG_ERR, "Received unexpected vote info message. "
"Disconnecting from server");
instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_UNEXPECTED_MSG;
return (-1);
}
if (!msg->vote_set || !msg->seq_number_set || !msg->ring_id_set) {
qdevice_log(LOG_ERR, "Received node list reply message without "
"required options. Disconnecting from server");
instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_REQUIRED_OPTION_MISSING;
return (-1);
}
qdevice_log(LOG_DEBUG, "Received vote info");
qdevice_log(LOG_DEBUG, " seq = "UTILS_PRI_MSG_SEQ, msg->seq_number);
qdevice_log(LOG_DEBUG, " vote = %s", tlv_vote_to_str(msg->vote));
qdevice_log(LOG_DEBUG, " ring id = ("UTILS_PRI_RING_ID")",
msg->ring_id.node_id, msg->ring_id.seq);
result_vote = msg->vote;
if (!tlv_ring_id_eq(&msg->ring_id, &instance->last_sent_ring_id)) {
ring_id_is_valid = 0;
qdevice_log(LOG_DEBUG, "Received vote info with old ring id.");
} else {
ring_id_is_valid = 1;
}
if (qdevice_net_algorithm_vote_info_received(instance, msg->seq_number,
&msg->ring_id, ring_id_is_valid, &result_vote) != 0) {
qdevice_log(LOG_DEBUG, "Algorithm returned error. Disconnecting.");
instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_ALGO_VOTE_INFO_ERR;
return (-1);
} else {
qdevice_log(LOG_DEBUG, "Algorithm result vote is %s", tlv_vote_to_str(result_vote));
}
if (qdevice_net_cast_vote_timer_update(instance, result_vote) != 0) {
instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_CANT_SCHEDULE_VOTING_TIMER;
return (-1);
}
/*
* Create reply message
*/
send_buffer = send_buffer_list_get_new(&instance->send_buffer_list);
if (send_buffer == NULL) {
qdevice_log(LOG_ERR, "Can't allocate send list buffer for "
"vote info reply msg");
instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_CANT_ALLOCATE_MSG_BUFFER;
return (-1);
}
if (msg_create_vote_info_reply(&send_buffer->buffer, msg->seq_number) == 0) {
qdevice_log(LOG_ERR, "Can't allocate send buffer for "
"vote info reply list msg");
instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_CANT_ALLOCATE_MSG_BUFFER;
send_buffer_list_discard_new(&instance->send_buffer_list, send_buffer);
return (-1);
}
send_buffer_list_put(&instance->send_buffer_list, send_buffer);
return (0);
}
static int
qdevice_net_msg_received_vote_info_reply(struct qdevice_net_instance *instance,
const struct msg_decoded *msg)
{
return (qdevice_net_msg_received_unexpected_msg(instance, msg, "vote info reply"));
}
int
qdevice_net_msg_received(struct qdevice_net_instance *instance)
{
struct msg_decoded msg;
int res;
int ret_val;
int msg_processed;
msg_decoded_init(&msg);
res = msg_decode(&instance->receive_buffer, &msg);
if (res != 0) {
/*
* Error occurred. Disconnect.
*/
qdevice_net_msg_received_log_msg_decode_error(res);
qdevice_log(LOG_ERR, "Disconnecting from server");
instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_MSG_DECODE_ERROR;
return (-1);
}
ret_val = 0;
msg_processed = 0;
switch (msg.type) {
case MSG_TYPE_INIT:
msg_processed = 1;
ret_val = qdevice_net_msg_received_init(instance, &msg);
break;
case MSG_TYPE_PREINIT:
msg_processed = 1;
ret_val = qdevice_net_msg_received_preinit(instance, &msg);
break;
case MSG_TYPE_PREINIT_REPLY:
msg_processed = 1;
ret_val = qdevice_net_msg_received_preinit_reply(instance, &msg);
break;
case MSG_TYPE_STARTTLS:
msg_processed = 1;
ret_val = qdevice_net_msg_received_starttls(instance, &msg);
break;
case MSG_TYPE_SERVER_ERROR:
msg_processed = 1;
ret_val = qdevice_net_msg_received_server_error(instance, &msg);
break;
case MSG_TYPE_INIT_REPLY:
msg_processed = 1;
ret_val = qdevice_net_msg_received_init_reply(instance, &msg);
break;
case MSG_TYPE_SET_OPTION:
msg_processed = 1;
ret_val = qdevice_net_msg_received_set_option(instance, &msg);
break;
case MSG_TYPE_SET_OPTION_REPLY:
msg_processed = 1;
ret_val = qdevice_net_msg_received_set_option_reply(instance, &msg);
break;
case MSG_TYPE_ECHO_REQUEST:
msg_processed = 1;
ret_val = qdevice_net_msg_received_echo_request(instance, &msg);
break;
case MSG_TYPE_ECHO_REPLY:
msg_processed = 1;
ret_val = qdevice_net_msg_received_echo_reply(instance, &msg);
break;
case MSG_TYPE_NODE_LIST:
msg_processed = 1;
ret_val = qdevice_net_msg_received_node_list(instance, &msg);
break;
case MSG_TYPE_NODE_LIST_REPLY:
msg_processed = 1;
ret_val = qdevice_net_msg_received_node_list_reply(instance, &msg);
break;
case MSG_TYPE_ASK_FOR_VOTE:
msg_processed = 1;
ret_val = qdevice_net_msg_received_ask_for_vote(instance, &msg);
break;
case MSG_TYPE_ASK_FOR_VOTE_REPLY:
msg_processed = 1;
ret_val = qdevice_net_msg_received_ask_for_vote_reply(instance, &msg);
break;
case MSG_TYPE_VOTE_INFO:
msg_processed = 1;
ret_val = qdevice_net_msg_received_vote_info(instance, &msg);
break;
case MSG_TYPE_VOTE_INFO_REPLY:
msg_processed = 1;
ret_val = qdevice_net_msg_received_vote_info_reply(instance, &msg);
break;
/*
* Default is not defined intentionally. Compiler shows warning when msg type is added
*/
}
if (!msg_processed) {
qdevice_log(LOG_ERR, "Received unsupported message %u. "
"Disconnecting from server", msg.type);
instance->disconnect_reason = QDEVICE_NET_DISCONNECT_REASON_UNEXPECTED_MSG;
ret_val = -1;
}
msg_decoded_destroy(&msg);
return (ret_val);
}
diff --git a/qdevices/qnetd-algo-ffsplit.c b/qdevices/qnetd-algo-ffsplit.c
index 01e5f0c7..356bdbf8 100644
--- a/qdevices/qnetd-algo-ffsplit.c
+++ b/qdevices/qnetd-algo-ffsplit.c
@@ -1,813 +1,813 @@
/*
* Copyright (c) 2015-2016 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Jan Friesse (jfriesse@redhat.com)
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the Red Hat, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include
#include
#include "qnetd-algo-ffsplit.h"
#include "qnetd-log.h"
#include "qnetd-log-debug.h"
#include "qnetd-cluster-list.h"
#include "qnetd-cluster.h"
#include "qnetd-client-send.h"
enum qnetd_algo_ffsplit_cluster_state {
QNETD_ALGO_FFSPLIT_CLUSTER_STATE_WAITING_FOR_CHANGE,
QNETD_ALGO_FFSPLIT_CLUSTER_STATE_WAITING_FOR_STABLE_MEMBERSHIP,
QNETD_ALGO_FFSPLIT_CLUSTER_STATE_SENDING_NACKS,
QNETD_ALGO_FFSPLIT_CLUSTER_STATE_SENDING_ACKS,
};
struct qnetd_algo_ffsplit_cluster_data {
enum qnetd_algo_ffsplit_cluster_state cluster_state;
const struct node_list *quorate_partition_node_list;
};
enum qnetd_algo_ffsplit_client_state {
QNETD_ALGO_FFSPLIT_CLIENT_STATE_WAITING_FOR_CHANGE,
QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_NACK,
QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_ACK,
};
struct qnetd_algo_ffsplit_client_data {
enum qnetd_algo_ffsplit_client_state client_state;
uint32_t vote_info_expected_seq_num;
};
enum tlv_reply_error_code
qnetd_algo_ffsplit_client_init(struct qnetd_client *client)
{
struct qnetd_algo_ffsplit_cluster_data *cluster_data;
struct qnetd_algo_ffsplit_client_data *client_data;
if (qnetd_cluster_size(client->cluster) == 1) {
cluster_data = malloc(sizeof(*cluster_data));
if (cluster_data == NULL) {
qnetd_log(LOG_ERR, "ffsplit: Can't initialize cluster data for client %s",
client->addr_str);
return (TLV_REPLY_ERROR_CODE_INTERNAL_ERROR);
}
memset(cluster_data, 0, sizeof(*cluster_data));
cluster_data->cluster_state = QNETD_ALGO_FFSPLIT_CLUSTER_STATE_WAITING_FOR_CHANGE;
cluster_data->quorate_partition_node_list = NULL;
client->cluster->algorithm_data = cluster_data;
}
client_data = malloc(sizeof(*client_data));
if (client_data == NULL) {
qnetd_log(LOG_ERR, "ffsplit: Can't initialize node data for client %s",
client->addr_str);
return (TLV_REPLY_ERROR_CODE_INTERNAL_ERROR);
}
memset(client_data, 0, sizeof(*client_data));
client_data->client_state = QNETD_ALGO_FFSPLIT_CLIENT_STATE_WAITING_FOR_CHANGE;
client->algorithm_data = client_data;
return (TLV_REPLY_ERROR_CODE_NO_ERROR);
}
static int
-qnetd_algo_ffsplit_is_prefered_partition(const struct qnetd_client *client,
+qnetd_algo_ffsplit_is_preferred_partition(const struct qnetd_client *client,
const struct node_list *config_node_list, const struct node_list *membership_node_list)
{
- uint32_t prefered_node_id;
+ uint32_t preferred_node_id;
struct node_list_entry *node_entry;
int case_processed;
- prefered_node_id = 0;
+ preferred_node_id = 0;
case_processed = 0;
switch (client->tie_breaker.mode) {
case TLV_TIE_BREAKER_MODE_LOWEST:
node_entry = TAILQ_FIRST(config_node_list);
- prefered_node_id = node_entry->node_id;
+ preferred_node_id = node_entry->node_id;
TAILQ_FOREACH(node_entry, config_node_list, entries) {
- if (node_entry->node_id < prefered_node_id) {
- prefered_node_id = node_entry->node_id;
+ if (node_entry->node_id < preferred_node_id) {
+ preferred_node_id = node_entry->node_id;
}
}
case_processed = 1;
break;
case TLV_TIE_BREAKER_MODE_HIGHEST:
node_entry = TAILQ_FIRST(config_node_list);
- prefered_node_id = node_entry->node_id;
+ preferred_node_id = node_entry->node_id;
TAILQ_FOREACH(node_entry, config_node_list, entries) {
- if (node_entry->node_id > prefered_node_id) {
- prefered_node_id = node_entry->node_id;
+ if (node_entry->node_id > preferred_node_id) {
+ preferred_node_id = node_entry->node_id;
}
}
case_processed = 1;
break;
case TLV_TIE_BREAKER_MODE_NODE_ID:
- prefered_node_id = client->tie_breaker.node_id;
+ preferred_node_id = client->tie_breaker.node_id;
case_processed = 1;
break;
}
if (!case_processed) {
- qnetd_log(LOG_CRIT, "qnetd_algo_ffsplit_is_prefered_partition unprocessed "
+ qnetd_log(LOG_CRIT, "qnetd_algo_ffsplit_is_preferred_partition unprocessed "
"tie_breaker.mode");
exit(1);
}
- return (node_list_find_node_id(membership_node_list, prefered_node_id) != NULL);
+ return (node_list_find_node_id(membership_node_list, preferred_node_id) != NULL);
}
static int
qnetd_algo_ffsplit_is_membership_stable(const struct qnetd_client *client, int client_leaving,
const struct tlv_ring_id *ring_id, const struct node_list *config_node_list,
const struct node_list *membership_node_list)
{
const struct qnetd_client *iter_client1, *iter_client2;
const struct node_list *config_node_list1, *config_node_list2;
const struct node_list *membership_node_list1, *membership_node_list2;
const struct node_list_entry *iter_node1, *iter_node2;
const struct node_list_entry *iter_node3, *iter_node4;
const struct tlv_ring_id *ring_id1, *ring_id2;
/*
* Test if all active clients share same config list.
*/
TAILQ_FOREACH(iter_client1, &client->cluster->client_list, cluster_entries) {
TAILQ_FOREACH(iter_client2, &client->cluster->client_list, cluster_entries) {
if (iter_client1 == iter_client2) {
continue;
}
if (iter_client1->node_id == client->node_id) {
if (client_leaving) {
continue;
}
config_node_list1 = config_node_list;
} else {
config_node_list1 = &iter_client1->configuration_node_list;
}
if (iter_client2->node_id == client->node_id) {
if (client_leaving) {
continue;
}
config_node_list2 = config_node_list;
} else {
config_node_list2 = &iter_client2->configuration_node_list;
}
/*
* Walk thru all node ids in given config node list...
*/
TAILQ_FOREACH(iter_node1, config_node_list1, entries) {
/*
* ... and try to find given node id in other list
*/
iter_node2 = node_list_find_node_id(config_node_list2, iter_node1->node_id);
if (iter_node2 == NULL) {
/*
* Node with iter_node1->node_id was not found in
* config_node_list2 -> lists doesn't match
*/
return (0);
}
}
}
}
/*
* Test if same partitions share same ring ids and membership node list
*/
TAILQ_FOREACH(iter_client1, &client->cluster->client_list, cluster_entries) {
if (iter_client1->node_id == client->node_id) {
if (client_leaving) {
continue;
}
membership_node_list1 = membership_node_list;
ring_id1 = ring_id;
} else {
membership_node_list1 = &iter_client1->last_membership_node_list;
ring_id1 = &iter_client1->last_ring_id;
}
/*
* Walk thru all memberships nodes
*/
TAILQ_FOREACH(iter_node1, membership_node_list1, entries) {
/*
* try to find client with given node id
*/
iter_client2 = qnetd_cluster_find_client_by_node_id(client->cluster,
iter_node1->node_id);
if (iter_client2 == NULL) {
/*
* Client with given id is not connected
*/
continue;
}
if (iter_client2->node_id == client->node_id) {
if (client_leaving) {
continue;
}
membership_node_list2 = membership_node_list;
ring_id2 = ring_id;
} else {
membership_node_list2 = &iter_client2->last_membership_node_list;
ring_id2 = &iter_client2->last_ring_id;
}
/*
* Compare ring ids
*/
if (!tlv_ring_id_eq(ring_id1, ring_id2)) {
return (0);
}
/*
* Now compare that membership node list equals, so walk thru all
* members ...
*/
TAILQ_FOREACH(iter_node3, membership_node_list1, entries) {
/*
* ... and try to find given node id in other membership node list
*/
iter_node4 = node_list_find_node_id(membership_node_list2, iter_node3->node_id);
if (iter_node4 == NULL) {
/*
* Node with iter_node3->node_id was not found in
* membership_node_list2 -> lists doesn't match
*/
return (0);
}
}
}
}
return (1);
}
static size_t
qnetd_algo_ffsplit_no_active_clients_in_partition(const struct qnetd_client *client,
const struct node_list *membership_node_list)
{
const struct node_list_entry *iter_node;
const struct qnetd_client *iter_client;
size_t res;
res = 0;
if (client == NULL || membership_node_list == NULL) {
return (0);
}
TAILQ_FOREACH(iter_node, membership_node_list, entries) {
iter_client = qnetd_cluster_find_client_by_node_id(client->cluster,
iter_node->node_id);
if (iter_client != NULL) {
res++;
}
}
return (res);
}
/*
* Compares two partitions. Return 1 if client1, config_node_list1, membership_node_list1 is
* "better" than client2, config_node_list2, membership_node_list2
*/
static int
qnetd_algo_ffsplit_partition_cmp(const struct qnetd_client *client1,
const struct node_list *config_node_list1, const struct node_list *membership_node_list1,
const struct qnetd_client *client2,
const struct node_list *config_node_list2, const struct node_list *membership_node_list2)
{
size_t part1_active_clients, part2_active_clients;
int res;
res = -1;
if (node_list_size(config_node_list1) % 2 != 0) {
/*
* Odd clusters never split into 50:50.
*/
if (node_list_size(membership_node_list1) > node_list_size(config_node_list1) / 2) {
res = 1; goto exit_res;
} else {
res = 0; goto exit_res;
}
} else {
if (node_list_size(membership_node_list1) > node_list_size(config_node_list1) / 2) {
res = 1; goto exit_res;
} else if (node_list_size(membership_node_list1) < node_list_size(config_node_list1) / 2) {
res = 0; goto exit_res;
}
/*
* 50:50 split
*/
/*
* Check how many active clients are in partitions
*/
part1_active_clients = qnetd_algo_ffsplit_no_active_clients_in_partition(
client1, membership_node_list1);
part2_active_clients = qnetd_algo_ffsplit_no_active_clients_in_partition(
client2, membership_node_list2);
if (part1_active_clients > part2_active_clients) {
res = 1; goto exit_res;
} else if (part1_active_clients < part2_active_clients) {
res = 0; goto exit_res;
}
/*
* Number of active clients in both partitions equals. Use tie-breaker.
*/
- if (qnetd_algo_ffsplit_is_prefered_partition(client1, config_node_list1,
+ if (qnetd_algo_ffsplit_is_preferred_partition(client1, config_node_list1,
membership_node_list1)) {
res = 1; goto exit_res;
} else {
res = 0; goto exit_res;
}
}
exit_res:
if (res == -1) {
qnetd_log(LOG_CRIT, "qnetd_algo_ffsplit_partition_cmp unhandled case");
exit(1);
/* NOTREACHED */
}
return (res);
}
/*
* Select best partition for given client->cluster.
* If there is no partition which could become quorate, NULL is returned
*/
static const struct node_list *
qnetd_algo_ffsplit_select_partition(const struct qnetd_client *client, int client_leaving,
const struct node_list *config_node_list, const struct node_list *membership_node_list)
{
const struct qnetd_client *iter_client;
const struct qnetd_client *best_client;
const struct node_list *best_config_node_list, *best_membership_node_list;
const struct node_list *iter_config_node_list, *iter_membership_node_list;
best_client = NULL;
best_config_node_list = best_membership_node_list = NULL;
/*
* Get highest score
*/
TAILQ_FOREACH(iter_client, &client->cluster->client_list, cluster_entries) {
if (iter_client->node_id == client->node_id) {
if (client_leaving) {
continue;
}
iter_config_node_list = config_node_list;
iter_membership_node_list = membership_node_list;
} else {
iter_config_node_list = &iter_client->configuration_node_list;
iter_membership_node_list = &iter_client->last_membership_node_list;
}
if (qnetd_algo_ffsplit_partition_cmp(iter_client, iter_config_node_list,
iter_membership_node_list, best_client, best_config_node_list,
best_membership_node_list) > 0) {
best_client = iter_client;
best_config_node_list = iter_config_node_list;
best_membership_node_list = iter_membership_node_list;
}
}
return (best_membership_node_list);
}
/*
* Update state of all nodes to match quorate_partition_node_list
*/
static void
qnetd_algo_ffsplit_update_nodes_state(struct qnetd_client *client, int client_leaving,
const struct node_list *quorate_partition_node_list)
{
const struct qnetd_client *iter_client;
struct qnetd_algo_ffsplit_client_data *iter_client_data;
TAILQ_FOREACH(iter_client, &client->cluster->client_list, cluster_entries) {
iter_client_data = (struct qnetd_algo_ffsplit_client_data *)iter_client->algorithm_data;
if (iter_client->node_id == client->node_id && client_leaving) {
iter_client_data->client_state = QNETD_ALGO_FFSPLIT_CLIENT_STATE_WAITING_FOR_CHANGE;
continue;
}
if (quorate_partition_node_list == NULL ||
node_list_find_node_id(quorate_partition_node_list, iter_client->node_id) == NULL) {
iter_client_data->client_state = QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_NACK;
} else {
iter_client_data->client_state = QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_ACK;
}
}
}
/*
* Send vote info. If client_leaving is set, client is ignored. if send_acks
* is set, only ACK votes are send (nodes in QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_ACK state),
* otherwise only NACK votes are send (nodes in QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_NACK state)
*
* Returns number of send votes
*/
static size_t
qnetd_algo_ffsplit_send_votes(struct qnetd_client *client, int client_leaving,
const struct tlv_ring_id *ring_id, int send_acks)
{
size_t sent_votes;
struct qnetd_client *iter_client;
struct qnetd_algo_ffsplit_client_data *iter_client_data;
const struct tlv_ring_id *ring_id_to_send;
enum tlv_vote vote_to_send;
sent_votes = 0;
TAILQ_FOREACH(iter_client, &client->cluster->client_list, cluster_entries) {
if (iter_client->node_id == client->node_id) {
if (client_leaving) {
continue;
}
ring_id_to_send = ring_id;
} else {
ring_id_to_send = &iter_client->last_ring_id;
}
iter_client_data = (struct qnetd_algo_ffsplit_client_data *)iter_client->algorithm_data;
vote_to_send = TLV_VOTE_UNDEFINED;
if (send_acks) {
if (iter_client_data->client_state == QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_ACK) {
vote_to_send = TLV_VOTE_ACK;
}
} else {
if (iter_client_data->client_state == QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_NACK) {
vote_to_send = TLV_VOTE_NACK;
}
}
if (vote_to_send != TLV_VOTE_UNDEFINED) {
iter_client_data->vote_info_expected_seq_num++;
sent_votes++;
if (qnetd_client_send_vote_info(iter_client,
iter_client_data->vote_info_expected_seq_num, ring_id_to_send,
vote_to_send) == -1) {
client->schedule_disconnect = 1;
}
}
}
return (sent_votes);
}
/*
* Return number of clients in QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_ACK state if sending_acks is
* set or number of nodes in QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_NACK state if sending_acks is
* not set
*/
static size_t
qnetd_algo_ffsplit_no_clients_in_sending_state(struct qnetd_client *client, int sending_acks)
{
size_t no_clients;
struct qnetd_client *iter_client;
struct qnetd_algo_ffsplit_client_data *iter_client_data;
no_clients = 0;
TAILQ_FOREACH(iter_client, &client->cluster->client_list, cluster_entries) {
iter_client_data = (struct qnetd_algo_ffsplit_client_data *)iter_client->algorithm_data;
if (sending_acks &&
iter_client_data->client_state == QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_ACK) {
no_clients++;
}
if (!sending_acks &&
iter_client_data->client_state == QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_NACK) {
no_clients++;
}
}
return (no_clients);
}
static enum tlv_vote
qnetd_algo_ffsplit_do(struct qnetd_client *client, int client_leaving,
const struct tlv_ring_id *ring_id, const struct node_list *config_node_list,
const struct node_list *membership_node_list)
{
struct qnetd_algo_ffsplit_cluster_data *cluster_data;
const struct node_list *quorate_partition_node_list;
cluster_data = (struct qnetd_algo_ffsplit_cluster_data *)client->cluster->algorithm_data;
cluster_data->cluster_state = QNETD_ALGO_FFSPLIT_CLUSTER_STATE_WAITING_FOR_STABLE_MEMBERSHIP;
if (!qnetd_algo_ffsplit_is_membership_stable(client, client_leaving,
ring_id, config_node_list, membership_node_list)) {
/*
* Wait until membership is stable
*/
qnetd_log(LOG_DEBUG, "ffsplit: Membership for cluster %s is not yet stable", client->cluster_name);
return (TLV_VOTE_WAIT_FOR_REPLY);
}
qnetd_log(LOG_DEBUG, "ffsplit: Membership for cluster %s is now stable", client->cluster_name);
quorate_partition_node_list = qnetd_algo_ffsplit_select_partition(client, client_leaving,
config_node_list, membership_node_list);
cluster_data->quorate_partition_node_list = quorate_partition_node_list;
if (quorate_partition_node_list == NULL) {
qnetd_log(LOG_DEBUG, "ffsplit: No quorate partition was selected");
} else {
qnetd_log(LOG_DEBUG, "ffsplit: Quorate partition selected");
qnetd_log_debug_dump_node_list(client, quorate_partition_node_list);
}
qnetd_algo_ffsplit_update_nodes_state(client, client_leaving, quorate_partition_node_list);
cluster_data->cluster_state = QNETD_ALGO_FFSPLIT_CLUSTER_STATE_SENDING_NACKS;
if (qnetd_algo_ffsplit_send_votes(client, client_leaving, ring_id, 0) == 0) {
qnetd_log(LOG_DEBUG, "ffsplit: No client gets NACK");
/*
* No one gets nack -> send acks
*/
cluster_data->cluster_state = QNETD_ALGO_FFSPLIT_CLUSTER_STATE_SENDING_ACKS;
if (qnetd_algo_ffsplit_send_votes(client, client_leaving, ring_id, 1) == 0) {
qnetd_log(LOG_DEBUG, "ffsplit: No client gets ACK");
/*
* No one gets acks -> finished
*/
cluster_data->cluster_state = QNETD_ALGO_FFSPLIT_CLUSTER_STATE_WAITING_FOR_CHANGE;
}
}
return (TLV_VOTE_NO_CHANGE);
}
enum tlv_reply_error_code
qnetd_algo_ffsplit_config_node_list_received(struct qnetd_client *client,
uint32_t msg_seq_num, int config_version_set, uint64_t config_version,
const struct node_list *nodes, int initial, enum tlv_vote *result_vote)
{
if (node_list_size(nodes) == 0) {
/*
* Empty node list shouldn't happen
*/
qnetd_log(LOG_ERR, "ffsplit: Received empty config node list for client %s",
client->addr_str);
return (TLV_REPLY_ERROR_CODE_INVALID_CONFIG_NODE_LIST);
}
if (node_list_find_node_id(nodes, client->node_id) == NULL) {
/*
* Current node is not in node list
*/
qnetd_log(LOG_ERR, "ffsplit: Received config node list without client %s",
client->addr_str);
return (TLV_REPLY_ERROR_CODE_INVALID_CONFIG_NODE_LIST);
}
if (initial || node_list_size(&client->last_membership_node_list) == 0) {
/*
* Initial node list -> membership is going to be send by client
*/
*result_vote = TLV_VOTE_ASK_LATER;
} else {
*result_vote = qnetd_algo_ffsplit_do(client, 0, &client->last_ring_id,
nodes, &client->last_membership_node_list);
}
return (TLV_REPLY_ERROR_CODE_NO_ERROR);
}
/*
* Called after client sent membership node list.
* All client fields are already set. Nodes is actual node list.
* msg_seq_num is 32-bit number set by client. If client sent config file version,
* config_version_set is set to 1 and config_version contains valid config file version.
* ring_id and quorate are copied from client votequorum callback.
*
* Function has to return result_vote. This can be one of ack/nack, ask_later (client
* should ask later for a vote) or wait_for_reply (client should wait for reply).
*
* Return TLV_REPLY_ERROR_CODE_NO_ERROR on success, different TLV_REPLY_ERROR_CODE_*
* on failure (error is send back to client)
*/
enum tlv_reply_error_code
qnetd_algo_ffsplit_membership_node_list_received(struct qnetd_client *client,
uint32_t msg_seq_num, const struct tlv_ring_id *ring_id,
const struct node_list *nodes, enum tlv_vote *result_vote)
{
if (node_list_size(nodes) == 0) {
/*
* Empty node list shouldn't happen
*/
qnetd_log(LOG_ERR, "ffsplit: Received empty membership node list for client %s",
client->addr_str);
return (TLV_REPLY_ERROR_CODE_INVALID_MEMBERSHIP_NODE_LIST);
}
if (node_list_find_node_id(nodes, client->node_id) == NULL) {
/*
* Current node is not in node list
*/
qnetd_log(LOG_ERR, "ffsplit: Received membership node list without client %s",
client->addr_str);
return (TLV_REPLY_ERROR_CODE_INVALID_MEMBERSHIP_NODE_LIST);
}
if (node_list_size(&client->configuration_node_list) == 0) {
/*
* Config node list not received -> it's going to be sent later
*/
*result_vote = TLV_VOTE_ASK_LATER;
} else {
*result_vote = qnetd_algo_ffsplit_do(client, 0, ring_id,
&client->configuration_node_list, nodes);
}
return (TLV_REPLY_ERROR_CODE_NO_ERROR);
}
enum tlv_reply_error_code
qnetd_algo_ffsplit_quorum_node_list_received(struct qnetd_client *client,
uint32_t msg_seq_num, enum tlv_quorate quorate, const struct node_list *nodes,
enum tlv_vote *result_vote)
{
/*
* Quorum node list is informative -> no change
*/
*result_vote = TLV_VOTE_NO_CHANGE;
return (TLV_REPLY_ERROR_CODE_NO_ERROR);
}
void
qnetd_algo_ffsplit_client_disconnect(struct qnetd_client *client, int server_going_down)
{
(void)qnetd_algo_ffsplit_do(client, 1, &client->last_ring_id,
&client->configuration_node_list, &client->last_membership_node_list);
free(client->algorithm_data);
if (qnetd_cluster_size(client->cluster) == 1) {
/*
* Last client in the cluster
*/
free(client->cluster->algorithm_data);
}
}
enum tlv_reply_error_code
qnetd_algo_ffsplit_ask_for_vote_received(struct qnetd_client *client, uint32_t msg_seq_num,
enum tlv_vote *result_vote)
{
/*
* Ask for vote is not supported in current algorithm
*/
return (TLV_REPLY_ERROR_CODE_UNSUPPORTED_DECISION_ALGORITHM_MESSAGE);
}
enum tlv_reply_error_code
qnetd_algo_ffsplit_vote_info_reply_received(struct qnetd_client *client, uint32_t msg_seq_num)
{
struct qnetd_algo_ffsplit_cluster_data *cluster_data;
struct qnetd_algo_ffsplit_client_data *client_data;
cluster_data = (struct qnetd_algo_ffsplit_cluster_data *)client->cluster->algorithm_data;
client_data = (struct qnetd_algo_ffsplit_client_data *)client->algorithm_data;
if (client_data->vote_info_expected_seq_num != msg_seq_num) {
qnetd_log(LOG_DEBUG, "ffsplit: Received old vote info reply from client %s",
client->addr_str);
return (TLV_REPLY_ERROR_CODE_NO_ERROR);
}
client_data->client_state = QNETD_ALGO_FFSPLIT_CLIENT_STATE_WAITING_FOR_CHANGE;
if (cluster_data->cluster_state != QNETD_ALGO_FFSPLIT_CLUSTER_STATE_SENDING_NACKS &&
cluster_data->cluster_state != QNETD_ALGO_FFSPLIT_CLUSTER_STATE_SENDING_ACKS) {
return (TLV_REPLY_ERROR_CODE_NO_ERROR);
}
if (cluster_data->cluster_state == QNETD_ALGO_FFSPLIT_CLUSTER_STATE_SENDING_NACKS) {
if (qnetd_algo_ffsplit_no_clients_in_sending_state(client, 0) == 0) {
qnetd_log(LOG_DEBUG, "ffsplit: All NACK votes sent for cluster %s",
client->cluster_name);
cluster_data->cluster_state = QNETD_ALGO_FFSPLIT_CLUSTER_STATE_SENDING_ACKS;
if (qnetd_algo_ffsplit_send_votes(client, 0, &client->last_ring_id, 1) == 0) {
qnetd_log(LOG_DEBUG, "ffsplit: No client gets ACK");
/*
* No one gets acks -> finished
*/
cluster_data->cluster_state = QNETD_ALGO_FFSPLIT_CLUSTER_STATE_WAITING_FOR_CHANGE;
}
}
} else {
if (qnetd_algo_ffsplit_no_clients_in_sending_state(client, 1) == 0) {
qnetd_log(LOG_DEBUG, "ffsplit: All ACK votes sent for cluster %s",
client->cluster_name);
cluster_data->cluster_state = QNETD_ALGO_FFSPLIT_CLUSTER_STATE_WAITING_FOR_CHANGE;
}
}
return (TLV_REPLY_ERROR_CODE_NO_ERROR);
}
enum tlv_reply_error_code
qnetd_algo_ffsplit_timer_callback(struct qnetd_client *client, int *reschedule_timer,
int *send_vote, enum tlv_vote *result_vote)
{
return (TLV_REPLY_ERROR_CODE_NO_ERROR);
}
static struct qnetd_algorithm qnetd_algo_ffsplit = {
.init = qnetd_algo_ffsplit_client_init,
.config_node_list_received = qnetd_algo_ffsplit_config_node_list_received,
.membership_node_list_received = qnetd_algo_ffsplit_membership_node_list_received,
.quorum_node_list_received = qnetd_algo_ffsplit_quorum_node_list_received,
.client_disconnect = qnetd_algo_ffsplit_client_disconnect,
.ask_for_vote_received = qnetd_algo_ffsplit_ask_for_vote_received,
.vote_info_reply_received = qnetd_algo_ffsplit_vote_info_reply_received,
.timer_callback = qnetd_algo_ffsplit_timer_callback,
};
enum tlv_reply_error_code qnetd_algo_ffsplit_register()
{
return (qnetd_algorithm_register(TLV_DECISION_ALGORITHM_TYPE_FFSPLIT, &qnetd_algo_ffsplit));
}
diff --git a/qdevices/qnetd-log.c b/qdevices/qnetd-log.c
index 1d046455..6f225aea 100644
--- a/qdevices/qnetd-log.c
+++ b/qdevices/qnetd-log.c
@@ -1,183 +1,183 @@
/*
* Copyright (c) 2015-2016 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Jan Friesse (jfriesse@redhat.com)
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the Red Hat, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include
#include
#include
#include "qnet-config.h"
#include "qnetd-log.h"
static int qnetd_log_config_target = 0;
static int qnetd_log_config_debug = 0;
static int qnetd_log_config_priority_bump = 0;
static const char qnetd_log_month_str[][4] = {
"Jan", "Feb", "Mar", "Apr", "May", "Jun",
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
};
struct qnetd_log_syslog_prio_to_str_item {
int priority;
const char *priority_str;
};
static struct qnetd_log_syslog_prio_to_str_item qnetd_syslog_prio_to_str_array[] = {
{LOG_EMERG, "emerg"},
{LOG_ALERT, "alert"},
{LOG_CRIT, "crit"},
{LOG_ERR, "error"},
{LOG_WARNING, "warning"},
{LOG_NOTICE, "notice"},
{LOG_INFO, "info"},
{LOG_DEBUG, "debug"},
{-1, NULL}};
void
qnetd_log_init(int target)
{
qnetd_log_config_target = target;
if (qnetd_log_config_target & QNETD_LOG_TARGET_SYSLOG) {
openlog(QNETD_PROGRAM_NAME, LOG_PID, LOG_DAEMON);
}
}
static const char *
qnetd_log_syslog_prio_to_str(int priority)
{
if (priority >= LOG_EMERG && priority <= LOG_DEBUG) {
return (qnetd_syslog_prio_to_str_array[priority].priority_str);
} else {
return ("none");
}
}
void
qnetd_log_vprintf(int priority, const char *format, va_list ap)
{
time_t current_time;
struct tm tm_res;
int final_priority;
va_list ap_copy;
if (priority != LOG_DEBUG || (qnetd_log_config_debug)) {
if (qnetd_log_config_target & QNETD_LOG_TARGET_STDERR) {
current_time = time(NULL);
localtime_r(¤t_time, &tm_res);
fprintf(stderr, "%s %02d %02d:%02d:%02d ",
qnetd_log_month_str[tm_res.tm_mon], tm_res.tm_mday, tm_res.tm_hour,
tm_res.tm_min, tm_res.tm_sec);
fprintf(stderr, "%-7s ", qnetd_log_syslog_prio_to_str(priority));
va_copy(ap_copy, ap);
vfprintf(stderr, format, ap_copy);
va_end(ap_copy);
fprintf(stderr, "\n");
}
if (qnetd_log_config_target & QNETD_LOG_TARGET_SYSLOG) {
final_priority = priority;
if (qnetd_log_config_priority_bump && priority > LOG_INFO) {
final_priority = LOG_INFO;
}
va_copy(ap_copy, ap);
vsyslog(final_priority, format, ap);
va_end(ap_copy);
}
}
}
void
qnetd_log_printf(int priority, const char *format, ...)
{
va_list ap;
va_start(ap, format);
qnetd_log_vprintf(priority, format, ap);
va_end(ap);
}
void
qnetd_log_close(void)
{
if (qnetd_log_config_target & QNETD_LOG_TARGET_SYSLOG) {
closelog();
}
}
void
qnetd_log_set_debug(int enabled)
{
qnetd_log_config_debug = enabled;
}
void
qnetd_log_set_priority_bump(int enabled)
{
qnetd_log_config_priority_bump = enabled;
}
void
qnetd_log_msg_decode_error(int ret)
{
switch (ret) {
case -1:
qnetd_log(LOG_WARNING, "Received message with option with invalid length");
break;
case -2:
qnetd_log(LOG_CRIT, "Can't allocate memory");
break;
case -3:
qnetd_log(LOG_WARNING, "Received inconsistent msg (tlv len > msg size)");
break;
case -4:
qnetd_log(LOG_WARNING, "Received message with option with invalid value");
break;
default:
- qnetd_log(LOG_ERR, "Unknown error occured when decoding message");
+ qnetd_log(LOG_ERR, "Unknown error occurred when decoding message");
break;
}
}