diff --git a/exec/main.c b/exec/main.c
index 7a471a16..fb0486e7 100644
--- a/exec/main.c
+++ b/exec/main.c
@@ -1,1588 +1,1590 @@
/*
* Copyright (c) 2002-2006 MontaVista Software, Inc.
* Copyright (c) 2006-2018 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Steven Dake (sdake@redhat.com)
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
/**
* \mainpage Corosync
*
* This is the doxygen generated developer documentation for the Corosync
* project. For more information about Corosync, please see the project
* web site, corosync.org.
*
* \section license License
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#ifdef HAVE_LIBSYSTEMD
#include
#endif
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include "quorum.h"
#include "totemsrp.h"
#include "logconfig.h"
#include "totemconfig.h"
#include "main.h"
#include "sync.h"
#include "timer.h"
#include "util.h"
#include "apidef.h"
#include "service.h"
#include "schedwrk.h"
#include "ipcs_stats.h"
#include "stats.h"
#ifdef HAVE_SMALL_MEMORY_FOOTPRINT
#define IPC_LOGSYS_SIZE 1024*64
#else
#define IPC_LOGSYS_SIZE 8192*128
#endif
/*
* LibQB adds default "*" syslog filter so we have to set syslog_priority as low
* as possible so filters applied later in _logsys_config_apply_per_file takes
* effect.
*/
LOGSYS_DECLARE_SYSTEM ("corosync",
LOGSYS_MODE_OUTPUT_STDERR | LOGSYS_MODE_OUTPUT_SYSLOG,
LOG_DAEMON,
LOG_EMERG);
LOGSYS_DECLARE_SUBSYS ("MAIN");
#define SERVER_BACKLOG 5
static int sched_priority = 0;
static unsigned int service_count = 32;
static struct totem_logging_configuration totem_logging_configuration;
static struct corosync_api_v1 *api = NULL;
static int sync_in_process = 1;
static qb_loop_t *corosync_poll_handle;
struct sched_param global_sched_param;
static corosync_timer_handle_t corosync_stats_timer_handle;
static const char *corosync_lock_file = LOCALSTATEDIR"/run/corosync.pid";
static char corosync_config_file[PATH_MAX + 1] = COROSYSCONFDIR "/corosync.conf";
qb_loop_t *cs_poll_handle_get (void)
{
return (corosync_poll_handle);
}
int cs_poll_dispatch_add (qb_loop_t * handle,
int fd,
int events,
void *data,
int (*dispatch_fn) (int fd,
int revents,
void *data))
{
return qb_loop_poll_add(handle, QB_LOOP_MED, fd, events, data,
dispatch_fn);
}
int cs_poll_dispatch_delete(qb_loop_t * handle, int fd)
{
return qb_loop_poll_del(handle, fd);
}
void corosync_state_dump (void)
{
int i;
for (i = 0; i < SERVICES_COUNT_MAX; i++) {
if (corosync_service[i] && corosync_service[i]->exec_dump_fn) {
corosync_service[i]->exec_dump_fn ();
}
}
}
const char *corosync_get_config_file(void)
{
return (corosync_config_file);
}
static void corosync_blackbox_write_to_file (void)
{
char fname[PATH_MAX];
char fdata_fname[PATH_MAX];
char time_str[PATH_MAX];
struct tm cur_time_tm;
time_t cur_time_t;
ssize_t res;
cur_time_t = time(NULL);
localtime_r(&cur_time_t, &cur_time_tm);
strftime(time_str, PATH_MAX, "%Y-%m-%dT%H:%M:%S", &cur_time_tm);
if (snprintf(fname, PATH_MAX, "%s/fdata-%s-%lld",
get_state_dir(),
time_str,
(long long int)getpid()) >= PATH_MAX) {
log_printf(LOGSYS_LEVEL_ERROR, "Can't snprintf blackbox file name");
return ;
}
if ((res = qb_log_blackbox_write_to_file(fname)) < 0) {
LOGSYS_PERROR(-res, LOGSYS_LEVEL_ERROR, "Can't store blackbox file");
return ;
}
snprintf(fdata_fname, sizeof(fdata_fname), "%s/fdata", get_state_dir());
unlink(fdata_fname);
if (symlink(fname, fdata_fname) == -1) {
log_printf(LOGSYS_LEVEL_ERROR, "Can't create symlink to '%s' for corosync blackbox file '%s'",
fname, fdata_fname);
}
}
static void unlink_all_completed (void)
{
api->timer_delete (corosync_stats_timer_handle);
qb_loop_stop (corosync_poll_handle);
icmap_fini();
}
void corosync_shutdown_request (void)
{
corosync_service_unlink_all (api, unlink_all_completed);
}
static int32_t sig_diag_handler (int num, void *data)
{
corosync_state_dump ();
return 0;
}
static int32_t sig_exit_handler (int num, void *data)
{
log_printf(LOGSYS_LEVEL_NOTICE, "Node was shut down by a signal");
corosync_service_unlink_all (api, unlink_all_completed);
return 0;
}
static void sigsegv_handler (int num)
{
(void)signal (num, SIG_DFL);
corosync_blackbox_write_to_file ();
qb_log_fini();
raise (num);
}
#define LOCALHOST_IP inet_addr("127.0.0.1")
static void *corosync_group_handle;
static struct totempg_group corosync_group = {
.group = "a",
.group_len = 1
};
static void serialize_lock (void)
{
}
static void serialize_unlock (void)
{
}
static void corosync_sync_completed (void)
{
log_printf (LOGSYS_LEVEL_NOTICE,
"Completed service synchronization, ready to provide service.");
sync_in_process = 0;
cs_ipcs_sync_state_changed(sync_in_process);
cs_ipc_allow_connections(1);
/*
* Inform totem to start using new message queue again
*/
totempg_trans_ack();
#ifdef HAVE_LIBSYSTEMD
sd_notify (0, "READY=1");
#endif
}
static int corosync_sync_callbacks_retrieve (
int service_id,
struct sync_callbacks *callbacks)
{
if (corosync_service[service_id] == NULL) {
return (-1);
}
if (callbacks == NULL) {
return (0);
}
callbacks->name = corosync_service[service_id]->name;
callbacks->sync_init = corosync_service[service_id]->sync_init;
callbacks->sync_process = corosync_service[service_id]->sync_process;
callbacks->sync_activate = corosync_service[service_id]->sync_activate;
callbacks->sync_abort = corosync_service[service_id]->sync_abort;
return (0);
}
static struct memb_ring_id corosync_ring_id;
static void member_object_joined (unsigned int nodeid)
{
char member_ip[ICMAP_KEYNAME_MAXLEN];
char member_join_count[ICMAP_KEYNAME_MAXLEN];
char member_status[ICMAP_KEYNAME_MAXLEN];
snprintf(member_ip, ICMAP_KEYNAME_MAXLEN,
"runtime.members.%u.ip", nodeid);
snprintf(member_join_count, ICMAP_KEYNAME_MAXLEN,
"runtime.members.%u.join_count", nodeid);
snprintf(member_status, ICMAP_KEYNAME_MAXLEN,
"runtime.members.%u.status", nodeid);
if (icmap_get(member_ip, NULL, NULL, NULL) == CS_OK) {
icmap_inc(member_join_count);
icmap_set_string(member_status, "joined");
} else {
icmap_set_string(member_ip, (char*)api->totem_ifaces_print (nodeid));
icmap_set_uint32(member_join_count, 1);
icmap_set_string(member_status, "joined");
}
log_printf (LOGSYS_LEVEL_DEBUG,
"Member joined: %s", api->totem_ifaces_print (nodeid));
}
static void member_object_left (unsigned int nodeid)
{
char member_status[ICMAP_KEYNAME_MAXLEN];
snprintf(member_status, ICMAP_KEYNAME_MAXLEN,
"runtime.members.%u.status", nodeid);
icmap_set_string(member_status, "left");
log_printf (LOGSYS_LEVEL_DEBUG,
"Member left: %s", api->totem_ifaces_print (nodeid));
}
static void confchg_fn (
enum totem_configuration_type configuration_type,
const unsigned int *member_list, size_t member_list_entries,
const unsigned int *left_list, size_t left_list_entries,
const unsigned int *joined_list, size_t joined_list_entries,
const struct memb_ring_id *ring_id)
{
int i;
int abort_activate = 0;
if (sync_in_process == 1) {
abort_activate = 1;
}
sync_in_process = 1;
cs_ipcs_sync_state_changed(sync_in_process);
memcpy (&corosync_ring_id, ring_id, sizeof (struct memb_ring_id));
for (i = 0; i < left_list_entries; i++) {
member_object_left (left_list[i]);
}
for (i = 0; i < joined_list_entries; i++) {
member_object_joined (joined_list[i]);
}
/*
* Call configuration change for all services
*/
for (i = 0; i < service_count; i++) {
if (corosync_service[i] && corosync_service[i]->confchg_fn) {
corosync_service[i]->confchg_fn (configuration_type,
member_list, member_list_entries,
left_list, left_list_entries,
joined_list, joined_list_entries, ring_id);
}
}
if (abort_activate) {
sync_abort ();
}
if (configuration_type == TOTEM_CONFIGURATION_TRANSITIONAL) {
sync_save_transitional (member_list, member_list_entries, ring_id);
}
if (configuration_type == TOTEM_CONFIGURATION_REGULAR) {
sync_start (member_list, member_list_entries, ring_id);
}
}
static void priv_drop (void)
{
return; /* TODO: we are still not dropping privs */
}
static void corosync_tty_detach (void)
{
int devnull;
/*
* Disconnect from TTY if this is not a debug run
*/
switch (fork ()) {
case -1:
corosync_exit_error (COROSYNC_DONE_FORK);
break;
case 0:
/*
* child which is disconnected, run this process
*/
break;
default:
exit (0);
break;
}
/* Create new session */
(void)setsid();
/*
* Map stdin/out/err to /dev/null.
*/
devnull = open("/dev/null", O_RDWR);
if (devnull == -1) {
corosync_exit_error (COROSYNC_DONE_STD_TO_NULL_REDIR);
}
if (dup2(devnull, 0) < 0 || dup2(devnull, 1) < 0
|| dup2(devnull, 2) < 0) {
close(devnull);
corosync_exit_error (COROSYNC_DONE_STD_TO_NULL_REDIR);
}
close(devnull);
}
static void corosync_mlockall (void)
{
int res;
struct rlimit rlimit;
rlimit.rlim_cur = RLIM_INFINITY;
rlimit.rlim_max = RLIM_INFINITY;
#ifndef RLIMIT_MEMLOCK
#define RLIMIT_MEMLOCK RLIMIT_VMEM
#endif
setrlimit (RLIMIT_MEMLOCK, &rlimit);
res = mlockall (MCL_CURRENT | MCL_FUTURE);
if (res == -1) {
LOGSYS_PERROR (errno, LOGSYS_LEVEL_WARNING,
"Could not lock memory of service to avoid page faults");
};
}
static void corosync_totem_stats_updater (void *data)
{
totempg_stats_t * stats;
uint32_t total_mtt_rx_token;
uint32_t total_backlog_calc;
uint32_t total_token_holdtime;
int t, prev;
int32_t token_count;
const char *cstr;
stats = api->totem_get_stats();
stats->srp->firewall_enabled_or_nic_failure = stats->srp->continuous_gather > MAX_NO_CONT_GATHER ? 1 : 0;
if (stats->srp->continuous_gather > MAX_NO_CONT_GATHER ||
stats->srp->continuous_sendmsg_failures > MAX_NO_CONT_SENDMSG_FAILURES) {
cstr = "";
if (stats->srp->continuous_sendmsg_failures > MAX_NO_CONT_SENDMSG_FAILURES) {
cstr = "number of multicast sendmsg failures is above threshold";
}
if (stats->srp->continuous_gather > MAX_NO_CONT_GATHER) {
cstr = "totem is continuously in gather state";
}
log_printf (LOGSYS_LEVEL_WARNING,
"Totem is unable to form a cluster because of an "
"operating system or network fault (reason: %s). The most common "
"cause of this message is that the local firewall is "
"configured improperly.", cstr);
stats->srp->firewall_enabled_or_nic_failure = 1;
} else {
stats->srp->firewall_enabled_or_nic_failure = 0;
}
total_mtt_rx_token = 0;
total_token_holdtime = 0;
total_backlog_calc = 0;
token_count = 0;
t = stats->srp->latest_token;
while (1) {
if (t == 0)
prev = TOTEM_TOKEN_STATS_MAX - 1;
else
prev = t - 1;
if (prev == stats->srp->earliest_token)
break;
/* if tx == 0, then dropped token (not ours) */
if (stats->srp->token[t].tx != 0 ||
(stats->srp->token[t].rx - stats->srp->token[prev].rx) > 0 ) {
total_mtt_rx_token += (stats->srp->token[t].rx - stats->srp->token[prev].rx);
total_token_holdtime += (stats->srp->token[t].tx - stats->srp->token[t].rx);
total_backlog_calc += stats->srp->token[t].backlog_calc;
token_count++;
}
t = prev;
}
if (token_count) {
stats->srp->mtt_rx_token = (total_mtt_rx_token / token_count);
stats->srp->avg_token_workload = (total_token_holdtime / token_count);
stats->srp->avg_backlog_calc = (total_backlog_calc / token_count);
}
stats->srp->time_since_token_last_received = qb_util_nano_current_get () / QB_TIME_NS_IN_MSEC -
stats->srp->token[stats->srp->latest_token].rx;
stats_trigger_trackers();
api->timer_add_duration (1500 * MILLI_2_NANO_SECONDS, NULL,
corosync_totem_stats_updater,
&corosync_stats_timer_handle);
}
static void corosync_totem_stats_init (void)
{
/* start stats timer */
api->timer_add_duration (1500 * MILLI_2_NANO_SECONDS, NULL,
corosync_totem_stats_updater,
&corosync_stats_timer_handle);
}
static void deliver_fn (
unsigned int nodeid,
const void *msg,
unsigned int msg_len,
int endian_conversion_required)
{
const struct qb_ipc_request_header *header;
int32_t service;
int32_t fn_id;
uint32_t id;
header = msg;
if (endian_conversion_required) {
id = swab32 (header->id);
} else {
id = header->id;
}
/*
* Call the proper executive handler
*/
service = id >> 16;
fn_id = id & 0xffff;
if (!corosync_service[service]) {
return;
}
if (fn_id >= corosync_service[service]->exec_engine_count) {
log_printf(LOGSYS_LEVEL_WARNING, "discarded unknown message %d for service %d (max id %d)",
fn_id, service, corosync_service[service]->exec_engine_count);
return;
}
icmap_fast_inc(service_stats_rx[service][fn_id]);
if (endian_conversion_required) {
assert(corosync_service[service]->exec_engine[fn_id].exec_endian_convert_fn != NULL);
corosync_service[service]->exec_engine[fn_id].exec_endian_convert_fn
((void *)msg);
}
corosync_service[service]->exec_engine[fn_id].exec_handler_fn
(msg, nodeid);
}
int main_mcast (
const struct iovec *iovec,
unsigned int iov_len,
unsigned int guarantee)
{
const struct qb_ipc_request_header *req = iovec->iov_base;
int32_t service;
int32_t fn_id;
service = req->id >> 16;
fn_id = req->id & 0xffff;
if (corosync_service[service]) {
icmap_fast_inc(service_stats_tx[service][fn_id]);
}
return (totempg_groups_mcast_joined (corosync_group_handle, iovec, iov_len, guarantee));
}
static void corosync_ring_id_create_or_load (
struct memb_ring_id *memb_ring_id,
unsigned int nodeid)
{
int fd;
int res = 0;
char filename[PATH_MAX];
snprintf (filename, sizeof(filename), "%s/ringid_%u",
get_state_dir(), nodeid);
fd = open (filename, O_RDONLY, 0700);
/*
* If file can be opened and read, read the ring id
*/
if (fd != -1) {
res = read (fd, &memb_ring_id->seq, sizeof (uint64_t));
close (fd);
}
/*
* If file could not be opened or read, create a new ring id
*/
if ((fd == -1) || (res != sizeof (uint64_t))) {
memb_ring_id->seq = 0;
umask(0);
fd = open (filename, O_CREAT|O_RDWR, 0700);
if (fd != -1) {
res = write (fd, &memb_ring_id->seq, sizeof (uint64_t));
close (fd);
if (res == -1) {
LOGSYS_PERROR (errno, LOGSYS_LEVEL_ERROR,
"Couldn't write ringid file '%s'", filename);
corosync_exit_error (COROSYNC_DONE_STORE_RINGID);
}
} else {
LOGSYS_PERROR (errno, LOGSYS_LEVEL_ERROR,
"Couldn't create ringid file '%s'", filename);
corosync_exit_error (COROSYNC_DONE_STORE_RINGID);
}
}
memb_ring_id->rep = nodeid;
}
static void corosync_ring_id_store (
const struct memb_ring_id *memb_ring_id,
unsigned int nodeid)
{
char filename[PATH_MAX];
int fd;
int res;
snprintf (filename, sizeof(filename), "%s/ringid_%u",
get_state_dir(), nodeid);
fd = open (filename, O_WRONLY, 0700);
if (fd == -1) {
fd = open (filename, O_CREAT|O_RDWR, 0700);
}
if (fd == -1) {
LOGSYS_PERROR(errno, LOGSYS_LEVEL_ERROR,
"Couldn't store new ring id " CS_PRI_RING_ID_SEQ " to stable storage",
memb_ring_id->seq);
corosync_exit_error (COROSYNC_DONE_STORE_RINGID);
}
log_printf (LOGSYS_LEVEL_DEBUG,
"Storing new sequence id for ring " CS_PRI_RING_ID_SEQ, memb_ring_id->seq);
res = write (fd, &memb_ring_id->seq, sizeof(memb_ring_id->seq));
close (fd);
if (res != sizeof(memb_ring_id->seq)) {
LOGSYS_PERROR(errno, LOGSYS_LEVEL_ERROR,
"Couldn't store new ring id " CS_PRI_RING_ID_SEQ " to stable storage",
memb_ring_id->seq);
corosync_exit_error (COROSYNC_DONE_STORE_RINGID);
}
}
static qb_loop_timer_handle recheck_the_q_level_timer;
void corosync_recheck_the_q_level(void *data)
{
totempg_check_q_level(corosync_group_handle);
if (cs_ipcs_q_level_get() == TOTEM_Q_LEVEL_CRITICAL) {
qb_loop_timer_add(cs_poll_handle_get(), QB_LOOP_MED, 1*QB_TIME_NS_IN_MSEC,
NULL, corosync_recheck_the_q_level, &recheck_the_q_level_timer);
}
}
struct sending_allowed_private_data_struct {
int reserved_msgs;
};
int corosync_sending_allowed (
unsigned int service,
unsigned int id,
const void *msg,
void *sending_allowed_private_data)
{
struct sending_allowed_private_data_struct *pd =
(struct sending_allowed_private_data_struct *)sending_allowed_private_data;
struct iovec reserve_iovec;
struct qb_ipc_request_header *header = (struct qb_ipc_request_header *)msg;
int sending_allowed;
reserve_iovec.iov_base = (char *)header;
reserve_iovec.iov_len = header->size;
pd->reserved_msgs = totempg_groups_joined_reserve (
corosync_group_handle,
&reserve_iovec, 1);
if (pd->reserved_msgs == -1) {
return -EINVAL;
}
/* Message ID out of range */
if (id >= corosync_service[service]->lib_engine_count) {
return -EINVAL;
}
sending_allowed = QB_FALSE;
if (corosync_quorum_is_quorate() == 1 ||
corosync_service[service]->allow_inquorate == CS_LIB_ALLOW_INQUORATE) {
// we are quorate
// now check flow control
if (corosync_service[service]->lib_engine[id].flow_control == CS_LIB_FLOW_CONTROL_NOT_REQUIRED) {
sending_allowed = QB_TRUE;
} else if (pd->reserved_msgs && sync_in_process == 0) {
sending_allowed = QB_TRUE;
} else if (pd->reserved_msgs == 0) {
return -ENOBUFS;
} else /* (sync_in_process) */ {
return -EINPROGRESS;
}
} else {
return -EHOSTUNREACH;
}
return (sending_allowed);
}
void corosync_sending_allowed_release (void *sending_allowed_private_data)
{
struct sending_allowed_private_data_struct *pd =
(struct sending_allowed_private_data_struct *)sending_allowed_private_data;
if (pd->reserved_msgs == -1) {
return;
}
totempg_groups_joined_release (pd->reserved_msgs);
}
int message_source_is_local (const mar_message_source_t *source)
{
int ret = 0;
assert (source != NULL);
if (source->nodeid == totempg_my_nodeid_get ()) {
ret = 1;
}
return ret;
}
void message_source_set (
mar_message_source_t *source,
void *conn)
{
assert ((source != NULL) && (conn != NULL));
memset (source, 0, sizeof (mar_message_source_t));
source->nodeid = totempg_my_nodeid_get ();
source->conn = conn;
}
struct scheduler_pause_timeout_data {
struct totem_config *totem_config;
qb_loop_timer_handle handle;
unsigned long long tv_prev;
unsigned long long max_tv_diff;
};
static void timer_function_scheduler_timeout (void *data)
{
struct scheduler_pause_timeout_data *timeout_data = (struct scheduler_pause_timeout_data *)data;
unsigned long long tv_current;
unsigned long long tv_diff;
tv_current = qb_util_nano_current_get ();
if (timeout_data->tv_prev == 0) {
/*
* Initial call -> just pretent everything is ok
*/
timeout_data->tv_prev = tv_current;
timeout_data->max_tv_diff = 0;
}
tv_diff = tv_current - timeout_data->tv_prev;
timeout_data->tv_prev = tv_current;
if (tv_diff > timeout_data->max_tv_diff) {
log_printf (LOGSYS_LEVEL_WARNING, "Corosync main process was not scheduled for %0.4f ms "
"(threshold is %0.4f ms). Consider token timeout increase.",
(float)tv_diff / QB_TIME_NS_IN_MSEC, (float)timeout_data->max_tv_diff / QB_TIME_NS_IN_MSEC);
+
+ stats_add_schedmiss_event(tv_current / 1000, (float)tv_diff / QB_TIME_NS_IN_MSEC);
}
/*
* Set next threshold, because token_timeout can change
*/
timeout_data->max_tv_diff = timeout_data->totem_config->token_timeout * QB_TIME_NS_IN_MSEC * 0.8;
qb_loop_timer_add (corosync_poll_handle,
QB_LOOP_MED,
timeout_data->totem_config->token_timeout * QB_TIME_NS_IN_MSEC / 3,
timeout_data,
timer_function_scheduler_timeout,
&timeout_data->handle);
}
static int corosync_set_rr_scheduler (void)
{
int ret_val = 0;
#if defined(HAVE_PTHREAD_SETSCHEDPARAM) && defined(HAVE_SCHED_GET_PRIORITY_MAX) && defined(HAVE_SCHED_SETSCHEDULER)
int res;
sched_priority = sched_get_priority_max (SCHED_RR);
if (sched_priority != -1) {
global_sched_param.sched_priority = sched_priority;
res = sched_setscheduler (0, SCHED_RR, &global_sched_param);
if (res == -1) {
LOGSYS_PERROR(errno, LOGSYS_LEVEL_WARNING,
"Could not set SCHED_RR at priority %d",
global_sched_param.sched_priority);
global_sched_param.sched_priority = 0;
#ifdef HAVE_QB_LOG_THREAD_PRIORITY_SET
qb_log_thread_priority_set (SCHED_OTHER, 0);
#endif
ret_val = -1;
} else {
/*
* Turn on SCHED_RR in logsys system
*/
#ifdef HAVE_QB_LOG_THREAD_PRIORITY_SET
res = qb_log_thread_priority_set (SCHED_RR, sched_priority);
#else
res = -1;
#endif
if (res == -1) {
log_printf (LOGSYS_LEVEL_ERROR,
"Could not set logsys thread priority."
" Can't continue because of priority inversions.");
corosync_exit_error (COROSYNC_DONE_LOGSETUP);
}
}
} else {
LOGSYS_PERROR (errno, LOGSYS_LEVEL_WARNING,
"Could not get maximum scheduler priority");
sched_priority = 0;
ret_val = -1;
}
#else
log_printf(LOGSYS_LEVEL_WARNING,
"The Platform is missing process priority setting features. Leaving at default.");
ret_val = -1;
#endif
return (ret_val);
}
/* The basename man page contains scary warnings about
thread-safety and portability, hence this */
static const char *corosync_basename(const char *file_name)
{
char *base;
base = strrchr (file_name, '/');
if (base) {
return base + 1;
}
return file_name;
}
static void
_logsys_log_printf(int level, int subsys,
const char *function_name,
const char *file_name,
int file_line,
const char *format,
...) __attribute__((format(printf, 6, 7)));
static void
_logsys_log_printf(int level, int subsys,
const char *function_name,
const char *file_name,
int file_line,
const char *format, ...)
{
va_list ap;
va_start(ap, format);
qb_log_from_external_source_va(function_name, corosync_basename(file_name),
format, level, file_line,
subsys, ap);
va_end(ap);
}
static void fplay_key_change_notify_fn (
int32_t event,
const char *key_name,
struct icmap_notify_value new_val,
struct icmap_notify_value old_val,
void *user_data)
{
if (strcmp(key_name, "runtime.blackbox.dump_flight_data") == 0) {
fprintf(stderr,"Writetofile\n");
corosync_blackbox_write_to_file ();
}
if (strcmp(key_name, "runtime.blackbox.dump_state") == 0) {
fprintf(stderr,"statefump\n");
corosync_state_dump ();
}
}
static void corosync_fplay_control_init (void)
{
icmap_track_t track = NULL;
icmap_set_string("runtime.blackbox.dump_flight_data", "no");
icmap_set_string("runtime.blackbox.dump_state", "no");
icmap_track_add("runtime.blackbox.dump_flight_data",
ICMAP_TRACK_ADD | ICMAP_TRACK_DELETE | ICMAP_TRACK_MODIFY,
fplay_key_change_notify_fn,
NULL, &track);
icmap_track_add("runtime.blackbox.dump_state",
ICMAP_TRACK_ADD | ICMAP_TRACK_DELETE | ICMAP_TRACK_MODIFY,
fplay_key_change_notify_fn,
NULL, &track);
}
static void force_gather_notify_fn(
int32_t event,
const char *key_name,
struct icmap_notify_value new_val,
struct icmap_notify_value old_val,
void *user_data)
{
char *key_val;
if (icmap_get_string(key_name, &key_val) == CS_OK && strcmp(key_val, "no") == 0)
goto out;
icmap_set_string("runtime.force_gather", "no");
if (strcmp(key_name, "runtime.force_gather") == 0) {
log_printf(LOGSYS_LEVEL_ERROR, "Forcing into GATHER state\n");
totempg_force_gather();
}
out:
free(key_val);
}
static void corosync_force_gather_init (void)
{
icmap_track_t track = NULL;
icmap_set_string("runtime.force_gather", "no");
icmap_track_add("runtime.force_gather",
ICMAP_TRACK_ADD | ICMAP_TRACK_DELETE | ICMAP_TRACK_MODIFY,
force_gather_notify_fn,
NULL, &track);
}
/*
* Set RO flag for keys, which ether doesn't make sense to change by user (statistic)
* or which when changed are not reflected by runtime (totem.crypto_cipher, ...).
*
* Also some RO keys cannot be determined in this stage, so they are set later in
* other functions (like nodelist.local_node_pos, ...)
*/
static void set_icmap_ro_keys_flag (void)
{
/*
* Set RO flag for all keys of internal configuration and runtime statistics
*/
icmap_set_ro_access("internal_configuration.", CS_TRUE, CS_TRUE);
icmap_set_ro_access("runtime.services.", CS_TRUE, CS_TRUE);
icmap_set_ro_access("runtime.config.", CS_TRUE, CS_TRUE);
icmap_set_ro_access("runtime.totem.", CS_TRUE, CS_TRUE);
icmap_set_ro_access("uidgid.config.", CS_TRUE, CS_TRUE);
icmap_set_ro_access("system.", CS_TRUE, CS_TRUE);
icmap_set_ro_access("nodelist.", CS_TRUE, CS_TRUE);
/*
* Set RO flag for constrete keys of configuration which can't be changed
* during runtime
*/
icmap_set_ro_access("totem.crypto_cipher", CS_FALSE, CS_TRUE);
icmap_set_ro_access("totem.crypto_hash", CS_FALSE, CS_TRUE);
icmap_set_ro_access("totem.keyfile", CS_FALSE, CS_TRUE);
icmap_set_ro_access("totem.key", CS_FALSE, CS_TRUE);
icmap_set_ro_access("totem.secauth", CS_FALSE, CS_TRUE);
icmap_set_ro_access("totem.ip_version", CS_FALSE, CS_TRUE);
icmap_set_ro_access("totem.rrp_mode", CS_FALSE, CS_TRUE);
icmap_set_ro_access("totem.transport", CS_FALSE, CS_TRUE);
icmap_set_ro_access("totem.cluster_name", CS_FALSE, CS_TRUE);
icmap_set_ro_access("totem.netmtu", CS_FALSE, CS_TRUE);
icmap_set_ro_access("totem.threads", CS_FALSE, CS_TRUE);
icmap_set_ro_access("totem.version", CS_FALSE, CS_TRUE);
icmap_set_ro_access("totem.nodeid", CS_FALSE, CS_TRUE);
icmap_set_ro_access("totem.clear_node_high_bit", CS_FALSE, CS_TRUE);
icmap_set_ro_access("config.reload_in_progress", CS_FALSE, CS_TRUE);
icmap_set_ro_access("config.totemconfig_reload_in_progress", CS_FALSE, CS_TRUE);
}
static void main_service_ready (void)
{
int res;
/*
* This must occur after totempg is initialized because "this_ip" must be set
*/
res = corosync_service_defaults_link_and_init (api);
if (res == -1) {
log_printf (LOGSYS_LEVEL_ERROR, "Could not initialize default services");
corosync_exit_error (COROSYNC_DONE_INIT_SERVICES);
}
cs_ipcs_init();
corosync_totem_stats_init ();
corosync_fplay_control_init ();
corosync_force_gather_init ();
sync_init (
corosync_sync_callbacks_retrieve,
corosync_sync_completed);
}
static enum e_corosync_done corosync_flock (const char *lockfile, pid_t pid)
{
struct flock lock;
enum e_corosync_done err;
char pid_s[17];
int fd_flag;
int lf;
err = COROSYNC_DONE_EXIT;
lf = open (lockfile, O_WRONLY | O_CREAT, 0640);
if (lf == -1) {
log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't create lock file.");
return (COROSYNC_DONE_ACQUIRE_LOCK);
}
retry_fcntl:
lock.l_type = F_WRLCK;
lock.l_start = 0;
lock.l_whence = SEEK_SET;
lock.l_len = 0;
if (fcntl (lf, F_SETLK, &lock) == -1) {
switch (errno) {
case EINTR:
goto retry_fcntl;
break;
case EAGAIN:
case EACCES:
log_printf (LOGSYS_LEVEL_ERROR, "Another Corosync instance is already running.");
err = COROSYNC_DONE_ALREADY_RUNNING;
goto error_close;
break;
default:
log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't acquire lock. Error was %s",
strerror(errno));
err = COROSYNC_DONE_ACQUIRE_LOCK;
goto error_close;
break;
}
}
if (ftruncate (lf, 0) == -1) {
log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't truncate lock file. Error was %s",
strerror (errno));
err = COROSYNC_DONE_ACQUIRE_LOCK;
goto error_close_unlink;
}
memset (pid_s, 0, sizeof (pid_s));
snprintf (pid_s, sizeof (pid_s) - 1, "%u\n", pid);
retry_write:
if (write (lf, pid_s, strlen (pid_s)) != strlen (pid_s)) {
if (errno == EINTR) {
goto retry_write;
} else {
log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't write pid to lock file. "
"Error was %s", strerror (errno));
err = COROSYNC_DONE_ACQUIRE_LOCK;
goto error_close_unlink;
}
}
if ((fd_flag = fcntl (lf, F_GETFD, 0)) == -1) {
log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't get close-on-exec flag from lock file. "
"Error was %s", strerror (errno));
err = COROSYNC_DONE_ACQUIRE_LOCK;
goto error_close_unlink;
}
fd_flag |= FD_CLOEXEC;
if (fcntl (lf, F_SETFD, fd_flag) == -1) {
log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't set close-on-exec flag to lock file. "
"Error was %s", strerror (errno));
err = COROSYNC_DONE_ACQUIRE_LOCK;
goto error_close_unlink;
}
return (err);
error_close_unlink:
unlink (lockfile);
error_close:
close (lf);
return (err);
}
static int corosync_move_to_root_cgroup(void) {
FILE *f;
int res = -1;
/*
* /sys/fs/cgroup is hardcoded, because most of Linux distributions are now
* using systemd and systemd uses hardcoded path of cgroup mount point.
*
* This feature is expected to be removed as soon as systemd gets support
* for managing RT configuration.
*/
f = fopen("/sys/fs/cgroup/cpu/cpu.rt_runtime_us", "rt");
if (f == NULL) {
log_printf(LOGSYS_LEVEL_DEBUG, "cpu.rt_runtime_us doesn't exists -> "
"system without cgroup or with disabled CONFIG_RT_GROUP_SCHED");
res = 0;
goto exit_res;
}
(void)fclose(f);
f = fopen("/sys/fs/cgroup/cpu/tasks", "w");
if (f == NULL) {
log_printf(LOGSYS_LEVEL_WARNING, "Can't open cgroups tasks file for writing");
goto exit_res;
}
if (fprintf(f, "%jd\n", (intmax_t)getpid()) <= 0) {
log_printf(LOGSYS_LEVEL_WARNING, "Can't write corosync pid into cgroups tasks file");
goto close_and_exit_res;
}
close_and_exit_res:
if (fclose(f) != 0) {
log_printf(LOGSYS_LEVEL_WARNING, "Can't close cgroups tasks file");
goto exit_res;
}
exit_res:
return (res);
}
int main (int argc, char **argv, char **envp)
{
const char *error_string;
struct totem_config totem_config;
int res, ch;
int background, sched_rr, prio, testonly, move_to_root_cgroup;
struct stat stat_out;
enum e_corosync_done flock_err;
uint64_t totem_config_warnings;
struct scheduler_pause_timeout_data scheduler_pause_timeout_data;
long int tmpli;
char *ep;
char *tmp_str;
int log_subsys_id_totem;
/* default configuration
*/
background = 1;
testonly = 0;
while ((ch = getopt (argc, argv, "c:ftv")) != EOF) {
switch (ch) {
case 'c':
res = snprintf(corosync_config_file, sizeof(corosync_config_file), "%s", optarg);
if (res >= sizeof(corosync_config_file)) {
fprintf (stderr, "Config file path too long.\n");
syslog (LOGSYS_LEVEL_ERROR, "Config file path too long.");
logsys_system_fini();
return EXIT_FAILURE;
}
break;
case 'f':
background = 0;
break;
case 't':
testonly = 1;
break;
case 'v':
printf ("Corosync Cluster Engine, version '%s'\n", VERSION);
printf ("Copyright (c) 2006-2018 Red Hat, Inc.\n");
logsys_system_fini();
return EXIT_SUCCESS;
break;
default:
fprintf(stderr, \
"usage:\n"\
" -c : Corosync config file path.\n"\
" -f : Start application in foreground.\n"\
" -t : Test configuration and exit.\n"\
" -v : Display version and SVN revision of Corosync and exit.\n");
logsys_system_fini();
return EXIT_FAILURE;
}
}
/*
* Other signals are registered later via qb_loop_signal_add
*/
(void)signal (SIGSEGV, sigsegv_handler);
(void)signal (SIGABRT, sigsegv_handler);
#if MSG_NOSIGNAL != 0
(void)signal (SIGPIPE, SIG_IGN);
#endif
if (icmap_init() != CS_OK) {
fprintf (stderr, "Corosync Executive couldn't initialize configuration component.\n");
syslog (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't initialize configuration component.");
corosync_exit_error (COROSYNC_DONE_ICMAP);
}
set_icmap_ro_keys_flag();
/*
* Initialize the corosync_api_v1 definition
*/
api = apidef_get ();
res = coroparse_configparse(icmap_get_global_map(), &error_string);
if (res == -1) {
/*
* Logsys can't log properly at this early stage, and we need to get this message out
*
*/
fprintf (stderr, "%s\n", error_string);
syslog (LOGSYS_LEVEL_ERROR, "%s", error_string);
corosync_exit_error (COROSYNC_DONE_MAINCONFIGREAD);
}
if (stats_map_init(api) != CS_OK) {
fprintf (stderr, "Corosync Executive couldn't initialize statistics component.\n");
syslog (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't initialize statistics component.");
corosync_exit_error (COROSYNC_DONE_STATS);
}
res = corosync_log_config_read (&error_string);
if (res == -1) {
/*
* if we are here, we _must_ flush the logsys queue
* and try to inform that we couldn't read the config.
* this is a desperate attempt before certain death
* and there is no guarantee that we can print to stderr
* nor that logsys is sending the messages where we expect.
*/
log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string);
fprintf(stderr, "%s", error_string);
syslog (LOGSYS_LEVEL_ERROR, "%s", error_string);
corosync_exit_error (COROSYNC_DONE_LOGCONFIGREAD);
}
if (!testonly) {
log_printf (LOGSYS_LEVEL_NOTICE, "Corosync Cluster Engine %s starting up", VERSION);
log_printf (LOGSYS_LEVEL_INFO, "Corosync built-in features:" PACKAGE_FEATURES "");
}
/*
* Create totem logsys subsys before totem_config_read so log functions can be used
*/
log_subsys_id_totem = _logsys_subsys_create("TOTEM", "totem,"
"totemip.c,totemconfig.c,totemcrypto.c,totemsrp.c,"
"totempg.c,totemudp.c,totemudpu.c,totemnet.c,totemknet.c");
/*
* Make sure required directory is present
*/
res = stat (get_state_dir(), &stat_out);
if ((res == -1) || (res == 0 && !S_ISDIR(stat_out.st_mode))) {
log_printf (LOGSYS_LEVEL_ERROR, "State directory %s not present. Please create it.", get_state_dir());
corosync_exit_error (COROSYNC_DONE_DIR_NOT_PRESENT);
}
res = chdir(get_state_dir());
if (res == -1) {
log_printf (LOGSYS_LEVEL_ERROR, "Cannot chdir to state directory %s. "
"Please make sure it has correct context and rights.", get_state_dir());
corosync_exit_error (COROSYNC_DONE_DIR_NOT_PRESENT);
}
res = totem_config_read (&totem_config, &error_string, &totem_config_warnings);
if (res == -1) {
log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string);
corosync_exit_error (COROSYNC_DONE_MAINCONFIGREAD);
}
if (totem_config_warnings & TOTEM_CONFIG_WARNING_MEMBERS_IGNORED) {
log_printf (LOGSYS_LEVEL_WARNING, "member section is used together with nodelist. Members ignored.");
}
if (totem_config_warnings & TOTEM_CONFIG_WARNING_MEMBERS_DEPRECATED) {
log_printf (LOGSYS_LEVEL_WARNING, "member section is deprecated.");
}
if (totem_config_warnings & TOTEM_CONFIG_WARNING_TOTEM_NODEID_IGNORED) {
log_printf (LOGSYS_LEVEL_WARNING, "nodeid appears both in totem section and nodelist. Nodelist one is used.");
}
if (totem_config_warnings & TOTEM_CONFIG_BINDNETADDR_NODELIST_SET) {
log_printf (LOGSYS_LEVEL_WARNING, "interface section bindnetaddr is used together with nodelist. "
"Nodelist one is going to be used.");
}
if (totem_config_warnings != 0) {
log_printf (LOGSYS_LEVEL_WARNING, "Please migrate config file to nodelist.");
}
res = totem_config_keyread (&totem_config, &error_string);
if (res == -1) {
log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string);
corosync_exit_error (COROSYNC_DONE_MAINCONFIGREAD);
}
res = totem_config_validate (&totem_config, &error_string);
if (res == -1) {
log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string);
corosync_exit_error (COROSYNC_DONE_MAINCONFIGREAD);
}
if (testonly) {
corosync_exit_error (COROSYNC_DONE_EXIT);
}
move_to_root_cgroup = 1;
if (icmap_get_string("system.move_to_root_cgroup", &tmp_str) == CS_OK) {
if (strcmp(tmp_str, "yes") != 0) {
move_to_root_cgroup = 0;
}
free(tmp_str);
}
/*
* Try to move corosync into root cpu cgroup. Failure is not fatal and
* error is deliberately ignored.
*/
if (move_to_root_cgroup) {
(void)corosync_move_to_root_cgroup();
}
sched_rr = 1;
if (icmap_get_string("system.sched_rr", &tmp_str) == CS_OK) {
if (strcmp(tmp_str, "yes") != 0) {
sched_rr = 0;
}
free(tmp_str);
}
prio = 0;
if (icmap_get_string("system.priority", &tmp_str) == CS_OK) {
if (strcmp(tmp_str, "max") == 0) {
prio = INT_MIN;
} else if (strcmp(tmp_str, "min") == 0) {
prio = INT_MAX;
} else {
errno = 0;
tmpli = strtol(tmp_str, &ep, 10);
if (errno != 0 || *ep != '\0' || tmpli > INT_MAX || tmpli < INT_MIN) {
log_printf (LOGSYS_LEVEL_ERROR, "Priority value %s is invalid", tmp_str);
corosync_exit_error (COROSYNC_DONE_MAINCONFIGREAD);
}
prio = tmpli;
}
free(tmp_str);
}
/*
* Set round robin realtime scheduling with priority 99
*/
if (sched_rr) {
if (corosync_set_rr_scheduler () != 0) {
prio = INT_MIN;
} else {
prio = 0;
}
}
if (prio != 0) {
if (setpriority(PRIO_PGRP, 0, prio) != 0) {
LOGSYS_PERROR(errno, LOGSYS_LEVEL_WARNING,
"Could not set priority %d", prio);
}
}
totem_config.totem_memb_ring_id_create_or_load = corosync_ring_id_create_or_load;
totem_config.totem_memb_ring_id_store = corosync_ring_id_store;
totem_config.totem_logging_configuration = totem_logging_configuration;
totem_config.totem_logging_configuration.log_subsys_id = log_subsys_id_totem;
totem_config.totem_logging_configuration.log_level_security = LOGSYS_LEVEL_WARNING;
totem_config.totem_logging_configuration.log_level_error = LOGSYS_LEVEL_ERROR;
totem_config.totem_logging_configuration.log_level_warning = LOGSYS_LEVEL_WARNING;
totem_config.totem_logging_configuration.log_level_notice = LOGSYS_LEVEL_NOTICE;
totem_config.totem_logging_configuration.log_level_debug = LOGSYS_LEVEL_DEBUG;
totem_config.totem_logging_configuration.log_level_trace = LOGSYS_LEVEL_TRACE;
totem_config.totem_logging_configuration.log_printf = _logsys_log_printf;
logsys_config_apply();
/*
* Now we are fully initialized.
*/
if (background) {
logsys_blackbox_prefork();
corosync_tty_detach ();
logsys_blackbox_postfork();
log_printf (LOGSYS_LEVEL_DEBUG, "Corosync TTY detached");
}
/*
* Lock all memory to avoid page faults which may interrupt
* application healthchecking
*/
corosync_mlockall ();
corosync_poll_handle = qb_loop_create ();
memset(&scheduler_pause_timeout_data, 0, sizeof(scheduler_pause_timeout_data));
scheduler_pause_timeout_data.totem_config = &totem_config;
timer_function_scheduler_timeout (&scheduler_pause_timeout_data);
qb_loop_signal_add(corosync_poll_handle, QB_LOOP_LOW,
SIGUSR2, NULL, sig_diag_handler, NULL);
qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH,
SIGINT, NULL, sig_exit_handler, NULL);
qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH,
SIGQUIT, NULL, sig_exit_handler, NULL);
qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH,
SIGTERM, NULL, sig_exit_handler, NULL);
if (logsys_thread_start() != 0) {
log_printf (LOGSYS_LEVEL_ERROR, "Can't initialize log thread");
corosync_exit_error (COROSYNC_DONE_LOGCONFIGREAD);
}
if ((flock_err = corosync_flock (corosync_lock_file, getpid ())) != COROSYNC_DONE_EXIT) {
corosync_exit_error (flock_err);
}
/*
* if totempg_initialize doesn't have root priveleges, it cannot
* bind to a specific interface. This only matters if
* there is more then one interface in a system, so
* in this case, only a warning is printed
*/
/*
* Join multicast group and setup delivery
* and configuration change functions
*/
if (totempg_initialize (
corosync_poll_handle,
&totem_config) != 0) {
log_printf (LOGSYS_LEVEL_ERROR, "Can't initialize TOTEM layer");
corosync_exit_error (COROSYNC_DONE_FATAL_ERR);
}
totempg_service_ready_register (
main_service_ready);
totempg_groups_initialize (
&corosync_group_handle,
deliver_fn,
confchg_fn);
totempg_groups_join (
corosync_group_handle,
&corosync_group,
1);
/*
* Drop root privleges to user 'corosync'
* TODO: Don't really need full root capabilities;
* needed capabilities are:
* CAP_NET_RAW (bindtodevice)
* CAP_SYS_NICE (setscheduler)
* CAP_IPC_LOCK (mlockall)
*/
priv_drop ();
schedwrk_init (
serialize_lock,
serialize_unlock);
/*
* Start main processing loop
*/
qb_loop_run (corosync_poll_handle);
/*
* Exit was requested
*/
totempg_finalize ();
/*
* free the loop resources
*/
qb_loop_destroy (corosync_poll_handle);
/*
* free up the icmap
*/
/*
* Remove pid lock file
*/
unlink (corosync_lock_file);
corosync_exit_error (COROSYNC_DONE_EXIT);
return EXIT_SUCCESS;
}
diff --git a/exec/stats.c b/exec/stats.c
index e89504e6..d5c1cbcd 100644
--- a/exec/stats.c
+++ b/exec/stats.c
@@ -1,672 +1,767 @@
/*
- * Copyright (c) 2017 Red Hat, Inc.
+ * Copyright (c) 2017-2020 Red Hat, Inc.
*
* All rights reserved.
*
* Authors: Christine Caulfield (ccaulfie@redhat.com)
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include "util.h"
#include "ipcs_stats.h"
#include "stats.h"
LOGSYS_DECLARE_SUBSYS ("STATS");
static qb_map_t *stats_map;
+/* Structure of an element in the schedmiss array */
+struct schedmiss_entry {
+ uint64_t timestamp;
+ float delay;
+};
+#define MAX_SCHEDMISS_EVENTS 10
+static struct schedmiss_entry schedmiss_event[MAX_SCHEDMISS_EVENTS];
+static unsigned int highest_schedmiss_event;
+
+#define SCHEDMISS_PREFIX "stats.schedmiss"
+
/* Convert iterator number to text and a stats pointer */
struct cs_stats_conv {
- enum {STAT_PG, STAT_SRP, STAT_KNET, STAT_KNET_HANDLE, STAT_IPCSC, STAT_IPCSG} type;
+ enum {STAT_PG, STAT_SRP, STAT_KNET, STAT_KNET_HANDLE, STAT_IPCSC, STAT_IPCSG, STAT_SCHEDMISS} type;
const char *name;
const size_t offset;
const icmap_value_types_t value_type;
};
struct cs_stats_conv cs_pg_stats[] = {
{ STAT_PG, "msg_queue_avail", offsetof(totempg_stats_t, msg_queue_avail), ICMAP_VALUETYPE_UINT32},
{ STAT_PG, "msg_reserved", offsetof(totempg_stats_t, msg_reserved), ICMAP_VALUETYPE_UINT32},
};
struct cs_stats_conv cs_srp_stats[] = {
{ STAT_SRP, "orf_token_tx", offsetof(totemsrp_stats_t, orf_token_tx), ICMAP_VALUETYPE_UINT64},
{ STAT_SRP, "orf_token_rx", offsetof(totemsrp_stats_t, orf_token_rx), ICMAP_VALUETYPE_UINT64},
{ STAT_SRP, "memb_merge_detect_tx", offsetof(totemsrp_stats_t, memb_merge_detect_tx), ICMAP_VALUETYPE_UINT64},
{ STAT_SRP, "memb_merge_detect_rx", offsetof(totemsrp_stats_t, memb_merge_detect_rx), ICMAP_VALUETYPE_UINT64},
{ STAT_SRP, "memb_join_tx", offsetof(totemsrp_stats_t, memb_join_tx), ICMAP_VALUETYPE_UINT64},
{ STAT_SRP, "memb_join_rx", offsetof(totemsrp_stats_t, memb_join_rx), ICMAP_VALUETYPE_UINT64},
{ STAT_SRP, "mcast_tx", offsetof(totemsrp_stats_t, mcast_tx), ICMAP_VALUETYPE_UINT64},
{ STAT_SRP, "mcast_retx", offsetof(totemsrp_stats_t, mcast_retx), ICMAP_VALUETYPE_UINT64},
{ STAT_SRP, "mcast_rx", offsetof(totemsrp_stats_t, mcast_rx), ICMAP_VALUETYPE_UINT64},
{ STAT_SRP, "memb_commit_token_tx", offsetof(totemsrp_stats_t, memb_commit_token_tx), ICMAP_VALUETYPE_UINT64},
{ STAT_SRP, "memb_commit_token_rx", offsetof(totemsrp_stats_t, memb_commit_token_rx), ICMAP_VALUETYPE_UINT64},
{ STAT_SRP, "token_hold_cancel_tx", offsetof(totemsrp_stats_t, token_hold_cancel_tx), ICMAP_VALUETYPE_UINT64},
{ STAT_SRP, "token_hold_cancel_rx", offsetof(totemsrp_stats_t, token_hold_cancel_rx), ICMAP_VALUETYPE_UINT64},
{ STAT_SRP, "operational_entered", offsetof(totemsrp_stats_t, operational_entered), ICMAP_VALUETYPE_UINT64},
{ STAT_SRP, "operational_token_lost", offsetof(totemsrp_stats_t, operational_token_lost), ICMAP_VALUETYPE_UINT64},
{ STAT_SRP, "gather_entered", offsetof(totemsrp_stats_t, gather_entered), ICMAP_VALUETYPE_UINT64},
{ STAT_SRP, "gather_token_lost", offsetof(totemsrp_stats_t, gather_token_lost), ICMAP_VALUETYPE_UINT64},
{ STAT_SRP, "commit_entered", offsetof(totemsrp_stats_t, commit_entered), ICMAP_VALUETYPE_UINT64},
{ STAT_SRP, "commit_token_lost", offsetof(totemsrp_stats_t, commit_token_lost), ICMAP_VALUETYPE_UINT64},
{ STAT_SRP, "recovery_entered", offsetof(totemsrp_stats_t, recovery_entered), ICMAP_VALUETYPE_UINT64},
{ STAT_SRP, "recovery_token_lost", offsetof(totemsrp_stats_t, recovery_token_lost), ICMAP_VALUETYPE_UINT64},
{ STAT_SRP, "consensus_timeouts", offsetof(totemsrp_stats_t, consensus_timeouts), ICMAP_VALUETYPE_UINT64},
{ STAT_SRP, "rx_msg_dropped", offsetof(totemsrp_stats_t, rx_msg_dropped), ICMAP_VALUETYPE_UINT64},
{ STAT_SRP, "time_since_token_last_received", offsetof(totemsrp_stats_t, time_since_token_last_received), ICMAP_VALUETYPE_UINT64},
{ STAT_SRP, "continuous_gather", offsetof(totemsrp_stats_t, continuous_gather), ICMAP_VALUETYPE_UINT32},
{ STAT_SRP, "continuous_sendmsg_failures", offsetof(totemsrp_stats_t, continuous_sendmsg_failures), ICMAP_VALUETYPE_UINT32},
{ STAT_SRP, "firewall_enabled_or_nic_failure", offsetof(totemsrp_stats_t, firewall_enabled_or_nic_failure), ICMAP_VALUETYPE_UINT8},
{ STAT_SRP, "mtt_rx_token", offsetof(totemsrp_stats_t, mtt_rx_token), ICMAP_VALUETYPE_UINT32},
{ STAT_SRP, "avg_token_workload", offsetof(totemsrp_stats_t, avg_token_workload), ICMAP_VALUETYPE_UINT32},
{ STAT_SRP, "avg_backlog_calc", offsetof(totemsrp_stats_t, avg_backlog_calc), ICMAP_VALUETYPE_UINT32},
};
struct cs_stats_conv cs_knet_stats[] = {
{ STAT_KNET, "enabled", offsetof(struct knet_link_status, enabled), ICMAP_VALUETYPE_UINT8},
{ STAT_KNET, "connected", offsetof(struct knet_link_status, connected), ICMAP_VALUETYPE_UINT8},
{ STAT_KNET, "mtu", offsetof(struct knet_link_status, mtu), ICMAP_VALUETYPE_UINT32},
{ STAT_KNET, "tx_data_packets", offsetof(struct knet_link_status, stats.tx_data_packets), ICMAP_VALUETYPE_UINT64},
{ STAT_KNET, "rx_data_packets", offsetof(struct knet_link_status, stats.rx_data_packets), ICMAP_VALUETYPE_UINT64},
{ STAT_KNET, "tx_data_bytes", offsetof(struct knet_link_status, stats.tx_data_bytes), ICMAP_VALUETYPE_UINT64},
{ STAT_KNET, "rx_data_bytes", offsetof(struct knet_link_status, stats.rx_data_bytes), ICMAP_VALUETYPE_UINT64},
{ STAT_KNET, "tx_ping_packets", offsetof(struct knet_link_status, stats.tx_ping_packets), ICMAP_VALUETYPE_UINT64},
{ STAT_KNET, "rx_ping_packets", offsetof(struct knet_link_status, stats.rx_ping_packets), ICMAP_VALUETYPE_UINT64},
{ STAT_KNET, "tx_ping_bytes", offsetof(struct knet_link_status, stats.tx_ping_bytes), ICMAP_VALUETYPE_UINT64},
{ STAT_KNET, "rx_ping_bytes", offsetof(struct knet_link_status, stats.rx_ping_bytes), ICMAP_VALUETYPE_UINT64},
{ STAT_KNET, "tx_pong_packets", offsetof(struct knet_link_status, stats.tx_pong_packets), ICMAP_VALUETYPE_UINT64},
{ STAT_KNET, "rx_pong_packets", offsetof(struct knet_link_status, stats.rx_pong_packets), ICMAP_VALUETYPE_UINT64},
{ STAT_KNET, "tx_pong_bytes", offsetof(struct knet_link_status, stats.tx_pong_bytes), ICMAP_VALUETYPE_UINT64},
{ STAT_KNET, "rx_pong_bytes", offsetof(struct knet_link_status, stats.rx_pong_bytes), ICMAP_VALUETYPE_UINT64},
{ STAT_KNET, "tx_pmtu_packets", offsetof(struct knet_link_status, stats.tx_pmtu_packets), ICMAP_VALUETYPE_UINT64},
{ STAT_KNET, "rx_pmtu_packets", offsetof(struct knet_link_status, stats.rx_pmtu_packets), ICMAP_VALUETYPE_UINT64},
{ STAT_KNET, "tx_pmtu_bytes", offsetof(struct knet_link_status, stats.tx_pmtu_bytes), ICMAP_VALUETYPE_UINT64},
{ STAT_KNET, "rx_pmtu_bytes", offsetof(struct knet_link_status, stats.rx_pmtu_bytes), ICMAP_VALUETYPE_UINT64},
{ STAT_KNET, "tx_total_packets", offsetof(struct knet_link_status, stats.tx_total_packets), ICMAP_VALUETYPE_UINT64},
{ STAT_KNET, "rx_total_packets", offsetof(struct knet_link_status, stats.rx_total_packets), ICMAP_VALUETYPE_UINT64},
{ STAT_KNET, "tx_total_bytes", offsetof(struct knet_link_status, stats.tx_total_bytes), ICMAP_VALUETYPE_UINT64},
{ STAT_KNET, "rx_total_bytes", offsetof(struct knet_link_status, stats.rx_total_bytes), ICMAP_VALUETYPE_UINT64},
{ STAT_KNET, "tx_total_errors", offsetof(struct knet_link_status, stats.tx_total_errors), ICMAP_VALUETYPE_UINT64},
{ STAT_KNET, "rx_total_retries", offsetof(struct knet_link_status, stats.tx_total_retries), ICMAP_VALUETYPE_UINT64},
{ STAT_KNET, "tx_pmtu_errors", offsetof(struct knet_link_status, stats.tx_pmtu_errors), ICMAP_VALUETYPE_UINT32},
{ STAT_KNET, "tx_pmtu_retries", offsetof(struct knet_link_status, stats.tx_pmtu_retries), ICMAP_VALUETYPE_UINT32},
{ STAT_KNET, "tx_ping_errors", offsetof(struct knet_link_status, stats.tx_ping_errors), ICMAP_VALUETYPE_UINT32},
{ STAT_KNET, "tx_ping_retries", offsetof(struct knet_link_status, stats.tx_ping_retries), ICMAP_VALUETYPE_UINT32},
{ STAT_KNET, "tx_pong_errors", offsetof(struct knet_link_status, stats.tx_pong_errors), ICMAP_VALUETYPE_UINT32},
{ STAT_KNET, "tx_pong_retries", offsetof(struct knet_link_status, stats.tx_pong_retries), ICMAP_VALUETYPE_UINT32},
{ STAT_KNET, "tx_data_errors", offsetof(struct knet_link_status, stats.tx_data_errors), ICMAP_VALUETYPE_UINT32},
{ STAT_KNET, "tx_data_retries", offsetof(struct knet_link_status, stats.tx_data_retries), ICMAP_VALUETYPE_UINT32},
{ STAT_KNET, "latency_min", offsetof(struct knet_link_status, stats.latency_min), ICMAP_VALUETYPE_UINT32},
{ STAT_KNET, "latency_max", offsetof(struct knet_link_status, stats.latency_max), ICMAP_VALUETYPE_UINT32},
{ STAT_KNET, "latency_ave", offsetof(struct knet_link_status, stats.latency_ave), ICMAP_VALUETYPE_UINT32},
{ STAT_KNET, "latency_samples", offsetof(struct knet_link_status, stats.latency_samples), ICMAP_VALUETYPE_UINT32},
{ STAT_KNET, "down_count", offsetof(struct knet_link_status, stats.down_count), ICMAP_VALUETYPE_UINT32},
{ STAT_KNET, "up_count", offsetof(struct knet_link_status, stats.up_count), ICMAP_VALUETYPE_UINT32},
};
struct cs_stats_conv cs_knet_handle_stats[] = {
{ STAT_KNET_HANDLE, "tx_uncompressed_packets", offsetof(struct knet_handle_stats, tx_uncompressed_packets), ICMAP_VALUETYPE_UINT64},
{ STAT_KNET_HANDLE, "tx_compressed_packets", offsetof(struct knet_handle_stats, tx_compressed_packets), ICMAP_VALUETYPE_UINT64},
{ STAT_KNET_HANDLE, "tx_compressed_original_bytes", offsetof(struct knet_handle_stats, tx_compressed_original_bytes), ICMAP_VALUETYPE_UINT64},
{ STAT_KNET_HANDLE, "tx_compressed_size_bytes", offsetof(struct knet_handle_stats, tx_compressed_size_bytes), ICMAP_VALUETYPE_UINT64},
{ STAT_KNET_HANDLE, "tx_compress_time_min", offsetof(struct knet_handle_stats, tx_compress_time_min), ICMAP_VALUETYPE_UINT64},
{ STAT_KNET_HANDLE, "tx_compress_time_max", offsetof(struct knet_handle_stats, tx_compress_time_max), ICMAP_VALUETYPE_UINT64},
{ STAT_KNET_HANDLE, "tx_compress_time_ave", offsetof(struct knet_handle_stats, tx_compress_time_ave), ICMAP_VALUETYPE_UINT64},
{ STAT_KNET_HANDLE, "rx_compressed_packets", offsetof(struct knet_handle_stats, rx_compressed_packets), ICMAP_VALUETYPE_UINT64},
{ STAT_KNET_HANDLE, "rx_compressed_original_bytes", offsetof(struct knet_handle_stats, rx_compressed_original_bytes), ICMAP_VALUETYPE_UINT64},
{ STAT_KNET_HANDLE, "rx_compressed_size_bytes", offsetof(struct knet_handle_stats, rx_compressed_size_bytes), ICMAP_VALUETYPE_UINT64},
{ STAT_KNET_HANDLE, "rx_compress_time_min", offsetof(struct knet_handle_stats, rx_compress_time_min), ICMAP_VALUETYPE_UINT64},
{ STAT_KNET_HANDLE, "rx_compress_time_max", offsetof(struct knet_handle_stats, rx_compress_time_max), ICMAP_VALUETYPE_UINT64},
{ STAT_KNET_HANDLE, "rx_compress_time_ave", offsetof(struct knet_handle_stats, rx_compress_time_ave), ICMAP_VALUETYPE_UINT64},
{ STAT_KNET_HANDLE, "tx_crypt_time_min", offsetof(struct knet_handle_stats, tx_crypt_time_min), ICMAP_VALUETYPE_UINT64},
{ STAT_KNET_HANDLE, "tx_crypt_time_max", offsetof(struct knet_handle_stats, tx_crypt_time_max), ICMAP_VALUETYPE_UINT64},
{ STAT_KNET_HANDLE, "tx_crypt_time_ave", offsetof(struct knet_handle_stats, tx_crypt_time_ave), ICMAP_VALUETYPE_UINT64},
{ STAT_KNET_HANDLE, "tx_crypt_byte_overhead", offsetof(struct knet_handle_stats, tx_crypt_byte_overhead), ICMAP_VALUETYPE_UINT64},
{ STAT_KNET_HANDLE, "tx_crypt_packets", offsetof(struct knet_handle_stats, tx_crypt_packets), ICMAP_VALUETYPE_UINT64},
{ STAT_KNET_HANDLE, "rx_crypt_time_min", offsetof(struct knet_handle_stats, rx_crypt_time_min), ICMAP_VALUETYPE_UINT64},
{ STAT_KNET_HANDLE, "rx_crypt_time_max", offsetof(struct knet_handle_stats, rx_crypt_time_max), ICMAP_VALUETYPE_UINT64},
{ STAT_KNET_HANDLE, "rx_crypt_time_ave", offsetof(struct knet_handle_stats, rx_crypt_time_ave), ICMAP_VALUETYPE_UINT64},
{ STAT_KNET_HANDLE, "rx_crypt_packets", offsetof(struct knet_handle_stats, rx_crypt_packets), ICMAP_VALUETYPE_UINT64},
};
struct cs_stats_conv cs_ipcs_conn_stats[] = {
{ STAT_IPCSC, "queueing", offsetof(struct ipcs_conn_stats, cnx.queuing), ICMAP_VALUETYPE_INT32},
{ STAT_IPCSC, "queued", offsetof(struct ipcs_conn_stats, cnx.queued), ICMAP_VALUETYPE_UINT32},
{ STAT_IPCSC, "invalid_request", offsetof(struct ipcs_conn_stats, cnx.invalid_request), ICMAP_VALUETYPE_UINT64},
{ STAT_IPCSC, "overload", offsetof(struct ipcs_conn_stats, cnx.overload), ICMAP_VALUETYPE_UINT64},
{ STAT_IPCSC, "sent", offsetof(struct ipcs_conn_stats, cnx.sent), ICMAP_VALUETYPE_UINT32},
{ STAT_IPCSC, "procname", offsetof(struct ipcs_conn_stats, cnx.proc_name), ICMAP_VALUETYPE_STRING},
{ STAT_IPCSC, "requests", offsetof(struct ipcs_conn_stats, conn.requests), ICMAP_VALUETYPE_UINT64},
{ STAT_IPCSC, "responses", offsetof(struct ipcs_conn_stats, conn.responses), ICMAP_VALUETYPE_UINT64},
{ STAT_IPCSC, "dispatched", offsetof(struct ipcs_conn_stats, conn.events), ICMAP_VALUETYPE_UINT64},
{ STAT_IPCSC, "send_retries", offsetof(struct ipcs_conn_stats, conn.send_retries), ICMAP_VALUETYPE_UINT64},
{ STAT_IPCSC, "recv_retries", offsetof(struct ipcs_conn_stats, conn.recv_retries), ICMAP_VALUETYPE_UINT64},
{ STAT_IPCSC, "flow_control", offsetof(struct ipcs_conn_stats, conn.flow_control_state), ICMAP_VALUETYPE_UINT32},
{ STAT_IPCSC, "flow_control_count", offsetof(struct ipcs_conn_stats, conn.flow_control_count), ICMAP_VALUETYPE_UINT64},
};
struct cs_stats_conv cs_ipcs_global_stats[] = {
{ STAT_IPCSG, "global.active", offsetof(struct ipcs_global_stats, active), ICMAP_VALUETYPE_UINT64},
{ STAT_IPCSG, "global.closed", offsetof(struct ipcs_global_stats, closed), ICMAP_VALUETYPE_UINT64},
};
+struct cs_stats_conv cs_schedmiss_stats[] = {
+ { STAT_SCHEDMISS, "timestamp", offsetof(struct schedmiss_entry, timestamp), ICMAP_VALUETYPE_UINT64},
+ { STAT_SCHEDMISS, "delay", offsetof(struct schedmiss_entry, delay), ICMAP_VALUETYPE_FLOAT},
+};
#define NUM_PG_STATS (sizeof(cs_pg_stats) / sizeof(struct cs_stats_conv))
#define NUM_SRP_STATS (sizeof(cs_srp_stats) / sizeof(struct cs_stats_conv))
#define NUM_KNET_STATS (sizeof(cs_knet_stats) / sizeof(struct cs_stats_conv))
#define NUM_KNET_HANDLE_STATS (sizeof(cs_knet_handle_stats) / sizeof(struct cs_stats_conv))
#define NUM_IPCSC_STATS (sizeof(cs_ipcs_conn_stats) / sizeof(struct cs_stats_conv))
#define NUM_IPCSG_STATS (sizeof(cs_ipcs_global_stats) / sizeof(struct cs_stats_conv))
/* What goes in the trie */
struct stats_item {
char *key_name;
struct cs_stats_conv * cs_conv;
};
/* One of these per tracker */
struct cs_stats_tracker
{
char *key_name;
void *user_data;
int32_t events;
icmap_notify_fn_t notify_fn;
uint64_t old_value;
struct qb_list_head list;
};
QB_LIST_DECLARE (stats_tracker_list_head);
static const struct corosync_api_v1 *api;
static void stats_map_set_value(struct cs_stats_conv *conv,
void *stat_array,
void *value,
size_t *value_len,
icmap_value_types_t *type)
{
if (value_len) {
*value_len = icmap_get_valuetype_len(conv->value_type);
}
if (type) {
*type = conv->value_type;
if ((*type == ICMAP_VALUETYPE_STRING) && value_len && stat_array) {
*value_len = strlen((char *)(stat_array) + conv->offset)+1;
}
}
if (value) {
assert(value_len != NULL);
memcpy(value, (char *)(stat_array) + conv->offset, *value_len);
}
}
static void stats_add_entry(const char *key, struct cs_stats_conv *cs_conv)
{
struct stats_item *item = malloc(sizeof(struct stats_item));
if (item) {
item->cs_conv = cs_conv;
item->key_name = strdup(key);
qb_map_put(stats_map, item->key_name, item);
}
}
static void stats_rm_entry(const char *key)
{
struct stats_item *item = qb_map_get(stats_map, key);
if (item) {
qb_map_rm(stats_map, item->key_name);
free(item->key_name);
free(item);
}
}
cs_error_t stats_map_init(const struct corosync_api_v1 *corosync_api)
{
int i;
char param[ICMAP_KEYNAME_MAXLEN];
api = corosync_api;
stats_map = qb_trie_create();
if (!stats_map) {
return CS_ERR_INIT;
}
/* Populate the static portions of the trie */
for (i = 0; ics_conv;
switch (statinfo->type) {
case STAT_PG:
pg_stats = api->totem_get_stats();
stats_map_set_value(statinfo, pg_stats, value, value_len, type);
break;
case STAT_SRP:
pg_stats = api->totem_get_stats();
stats_map_set_value(statinfo, pg_stats->srp, value, value_len, type);
break;
case STAT_KNET_HANDLE:
res = totemknet_handle_get_stats(&knet_handle_stats);
if (res != CS_OK) {
return res;
}
stats_map_set_value(statinfo, &knet_handle_stats, value, value_len, type);
break;
case STAT_KNET:
if (sscanf(key_name, "stats.knet.node%d.link%d", &nodeid, &link_no) != 2) {
return CS_ERR_NOT_EXIST;
}
/* Validate node & link IDs */
if (nodeid <= 0 || nodeid > KNET_MAX_HOST ||
link_no < 0 || link_no > KNET_MAX_LINK) {
return CS_ERR_NOT_EXIST;
}
/* Always get the latest stats */
res = totemknet_link_get_status((knet_node_id_t)nodeid, (uint8_t)link_no, &link_status);
if (res != CS_OK) {
return CS_ERR_LIBRARY;
}
stats_map_set_value(statinfo, &link_status, value, value_len, type);
break;
case STAT_IPCSC:
if (sscanf(key_name, "stats.ipcs.service%d.%d.%p", &service_id, &pid, &conn_ptr) != 3) {
return CS_ERR_NOT_EXIST;
}
res = cs_ipcs_get_conn_stats(service_id, pid, conn_ptr, &ipcs_conn_stats);
if (res != CS_OK) {
return res;
}
stats_map_set_value(statinfo, &ipcs_conn_stats, value, value_len, type);
break;
case STAT_IPCSG:
cs_ipcs_get_global_stats(&ipcs_global_stats);
stats_map_set_value(statinfo, &ipcs_global_stats, value, value_len, type);
break;
+ case STAT_SCHEDMISS:
+ if (sscanf(key_name, SCHEDMISS_PREFIX ".%d", &sm_event) != 1) {
+ return CS_ERR_NOT_EXIST;
+ }
+
+ sm_type = strrchr(key_name, '.');
+ if (sm_type == NULL) {
+ return CS_ERR_NOT_EXIST;
+ }
+ sm_type++;
+
+ if (strcmp(sm_type, "timestamp") == 0) {
+ memcpy(value, &schedmiss_event[sm_event].timestamp, sizeof(uint64_t));
+ *value_len = sizeof(uint64_t);
+ *type = ICMAP_VALUETYPE_UINT64;
+ }
+ if (strcmp(sm_type, "delay") == 0) {
+ memcpy(value, &schedmiss_event[sm_event].delay, sizeof(float));
+ *value_len = sizeof(float);
+ *type = ICMAP_VALUETYPE_FLOAT;
+ }
+ break;
default:
return CS_ERR_LIBRARY;
}
return CS_OK;
}
-#define STATS_CLEAR "stats.clear."
-#define STATS_CLEAR_KNET "stats.clear.knet"
-#define STATS_CLEAR_IPC "stats.clear.ipc"
-#define STATS_CLEAR_TOTEM "stats.clear.totem"
-#define STATS_CLEAR_ALL "stats.clear.all"
+static void schedmiss_clear_stats(void)
+{
+ int i;
+ char param[ICMAP_KEYNAME_MAXLEN];
+
+ for (i=0; i=0; i--) {
+ schedmiss_event[i+1].timestamp = schedmiss_event[i].timestamp;
+ schedmiss_event[i+1].delay = schedmiss_event[i].delay;
+ }
+
+ /* New entries are always at the front */
+ schedmiss_event[0].timestamp = timestamp;
+ schedmiss_event[0].delay = delay;
+
+ /* If we've not run off the end then add an entry in the trie for the new 'end' one */
+ if (highest_schedmiss_event < MAX_SCHEDMISS_EVENTS) {
+ sprintf(param, SCHEDMISS_PREFIX ".%i.timestamp", highest_schedmiss_event);
+ stats_add_entry(param, &cs_schedmiss_stats[0]);
+ sprintf(param, SCHEDMISS_PREFIX ".%i.delay", highest_schedmiss_event);
+ stats_add_entry(param, &cs_schedmiss_stats[1]);
+ highest_schedmiss_event++;
+ }
+ /* Notifications get sent by the stats_updater */
+}
+
+#define STATS_CLEAR "stats.clear."
+#define STATS_CLEAR_KNET "stats.clear.knet"
+#define STATS_CLEAR_IPC "stats.clear.ipc"
+#define STATS_CLEAR_TOTEM "stats.clear.totem"
+#define STATS_CLEAR_ALL "stats.clear.all"
+#define STATS_CLEAR_SCHEDMISS "stats.clear.schedmiss"
cs_error_t stats_map_set(const char *key_name,
const void *value,
size_t value_len,
icmap_value_types_t type)
{
int cleared = 0;
if (strncmp(key_name, STATS_CLEAR_KNET, strlen(STATS_CLEAR_KNET)) == 0) {
totempg_stats_clear(TOTEMPG_STATS_CLEAR_TRANSPORT);
cleared = 1;
}
if (strncmp(key_name, STATS_CLEAR_IPC, strlen(STATS_CLEAR_IPC)) == 0) {
cs_ipcs_clear_stats();
cleared = 1;
}
if (strncmp(key_name, STATS_CLEAR_TOTEM, strlen(STATS_CLEAR_TOTEM)) == 0) {
totempg_stats_clear(TOTEMPG_STATS_CLEAR_TOTEM);
cleared = 1;
}
+ if (strncmp(key_name, STATS_CLEAR_SCHEDMISS, strlen(STATS_CLEAR_SCHEDMISS)) == 0) {
+ schedmiss_clear_stats();
+ cleared = 1;
+ }
if (strncmp(key_name, STATS_CLEAR_ALL, strlen(STATS_CLEAR_ALL)) == 0) {
totempg_stats_clear(TOTEMPG_STATS_CLEAR_TRANSPORT | TOTEMPG_STATS_CLEAR_TOTEM);
cs_ipcs_clear_stats();
+ schedmiss_clear_stats();
cleared = 1;
}
if (!cleared) {
return CS_ERR_NOT_SUPPORTED;
}
return CS_OK;
}
cs_error_t stats_map_adjust_int(const char *key_name, int32_t step)
{
return CS_ERR_NOT_SUPPORTED;
}
cs_error_t stats_map_delete(const char *key_name)
{
return CS_ERR_NOT_SUPPORTED;
}
int stats_map_is_key_ro(const char *key_name)
{
/* It's all read-only apart from the 'clear' destinations */
if (strncmp(key_name, STATS_CLEAR, strlen(STATS_CLEAR)) == 0) {
return 0;
} else {
return 1;
}
}
icmap_iter_t stats_map_iter_init(const char *prefix)
{
return (qb_map_pref_iter_create(stats_map, prefix));
}
const char *stats_map_iter_next(icmap_iter_t iter, size_t *value_len, icmap_value_types_t *type)
{
const char *res;
struct stats_item *item;
res = qb_map_iter_next(iter, (void **)&item);
if (res == NULL) {
return (res);
}
stats_map_set_value(item->cs_conv, NULL, NULL, value_len, type);
return res;
}
void stats_map_iter_finalize(icmap_iter_t iter)
{
qb_map_iter_free(iter);
}
void stats_trigger_trackers()
{
struct cs_stats_tracker *tracker;
struct qb_list_head *iter;
cs_error_t res;
size_t value_len;
icmap_value_types_t type;
uint64_t value;
struct icmap_notify_value new_val;
struct icmap_notify_value old_val;
qb_list_for_each(iter, &stats_tracker_list_head) {
tracker = qb_list_entry(iter, struct cs_stats_tracker, list);
if (tracker->events & ICMAP_TRACK_PREFIX || !tracker->key_name ) {
continue;
}
res = stats_map_get(tracker->key_name,
&value, &value_len, &type);
/* Check if it has changed */
if ((res == CS_OK) && (memcmp(&value, &tracker->old_value, value_len) != 0)) {
old_val.type = new_val.type = type;
old_val.len = new_val.len = value_len;
old_val.data = new_val.data = &value;
tracker->notify_fn(ICMAP_TRACK_MODIFY, tracker->key_name,
old_val, new_val, tracker->user_data);
memcpy(&tracker->old_value, &value, value_len);
}
}
}
/* Callback from libqb when a key is added/removed */
static void stats_map_notify_fn(uint32_t event, char *key, void *old_value, void *value, void *user_data)
{
struct cs_stats_tracker *tracker = user_data;
struct icmap_notify_value new_val;
struct icmap_notify_value old_val;
char new_value[64];
if (value == NULL && old_value == NULL) {
return ;
}
+ /* Ignore schedmiss trackers as the values are read from the circular buffer */
+ if (strncmp(key, SCHEDMISS_PREFIX, strlen(SCHEDMISS_PREFIX)) == 0 ) {
+ return ;
+ }
+
new_val.data = new_value;
if (stats_map_get(key,
&new_value,
&new_val.len,
&new_val.type) != CS_OK) {
log_printf(LOGSYS_LEVEL_WARNING, "get value of notified key %s failed", key);
return ;
}
/* We don't know what the old value was
but as this only tracks ADD & DELETE I'm not worried
about it */
memcpy(&old_val, &new_val, sizeof(new_val));
tracker->notify_fn(icmap_qbtt_to_tt(event),
key,
new_val,
old_val,
tracker->user_data);
}
cs_error_t stats_map_track_add(const char *key_name,
int32_t track_type,
icmap_notify_fn_t notify_fn,
void *user_data,
icmap_track_t *icmap_track)
{
struct cs_stats_tracker *tracker;
size_t value_len;
icmap_value_types_t type;
cs_error_t err;
/* We can track adding or deleting a key under a prefix */
if ((track_type & ICMAP_TRACK_PREFIX) &&
(!(track_type & ICMAP_TRACK_DELETE) ||
!(track_type & ICMAP_TRACK_ADD))) {
return CS_ERR_NOT_SUPPORTED;
}
tracker = malloc(sizeof(struct cs_stats_tracker));
if (!tracker) {
return CS_ERR_NO_MEMORY;
}
tracker->notify_fn = notify_fn;
tracker->user_data = user_data;
tracker->events = track_type;
if (key_name) {
tracker->key_name = strdup(key_name);
if (!tracker->key_name) {
free(tracker);
return CS_ERR_NO_MEMORY;
}
/* Get initial value */
if (stats_map_get(tracker->key_name,
- &tracker->old_value, &value_len, &type) == CS_OK) {
+ &tracker->old_value, &value_len, &type) != CS_OK) {
tracker->old_value = 0ULL;
}
} else {
tracker->key_name = NULL;
tracker->old_value = 0ULL;
}
/* Add/delete trackers can use the qb_map tracking */
if ((track_type & ICMAP_TRACK_ADD) ||
(track_type & ICMAP_TRACK_DELETE)) {
err = qb_map_notify_add(stats_map, tracker->key_name,
stats_map_notify_fn,
icmap_tt_to_qbtt(track_type),
tracker);
if (err != 0) {
log_printf(LOGSYS_LEVEL_ERROR, "creating stats tracker %s failed. %d\n", tracker->key_name, err);
free(tracker->key_name);
free(tracker);
return (qb_to_cs_error(err));
}
}
qb_list_add (&tracker->list, &stats_tracker_list_head);
*icmap_track = (icmap_track_t)tracker;
return CS_OK;
}
cs_error_t stats_map_track_delete(icmap_track_t icmap_track)
{
struct cs_stats_tracker *tracker = (struct cs_stats_tracker *)icmap_track;
int err;
if ((tracker->events & ICMAP_TRACK_ADD) ||
(tracker->events & ICMAP_TRACK_DELETE)) {
err = qb_map_notify_del_2(stats_map,
tracker->key_name, stats_map_notify_fn,
icmap_tt_to_qbtt(tracker->events), tracker);
if (err) {
log_printf(LOGSYS_LEVEL_ERROR, "deleting tracker %s failed. %d\n", tracker->key_name, err);
}
}
qb_list_del(&tracker->list);
free(tracker->key_name);
free(tracker);
return CS_OK;
}
void *stats_map_track_get_user_data(icmap_track_t icmap_track)
{
struct cs_stats_tracker *tracker = (struct cs_stats_tracker *)icmap_track;
return tracker->user_data;
}
/* Called from totemknet to add/remove keys from our map */
void stats_knet_add_member(knet_node_id_t nodeid, uint8_t link_no)
{
int i;
char param[ICMAP_KEYNAME_MAXLEN];
for (i = 0; i.*
+If corosync is not scheduled after the required period of time it will
+log this event and also write an entry to the stats cmap under this key.
+There can be up to 10 entries (0..9) in here, when an 11th event happens
+the earliest will be removed.
+
+These events will always be in reverse order, so stats.schedmiss.0.* will
+always be the latest event kept and 9 the oldest. If you want to listen
+for notifications then you are recommended to listen for changes
+to stats.schedmiss.0.timestamp or stats.schedmiss.0.delay.
+
+.B timestamp
+The time of the event in ms since the Epoch (ie time_t * 1000 but with
+valid milliseconds).
+
+.B delay
+The time that corosync was paused (in ms, float value).
+
+
.TP
stats.clear.*
These are write-only keys used to clear the stats for various subsystems
.B totem
Clears the pg & srp totem stats.
.B knet
Clears the knet stats
.B ipc
Clears the ipc stats
+.B schedmiss
+Clears the schedmiss stats
+
.B all
Clears all of the above stats
.SH DYNAMIC CHANGE USER/GROUP PERMISSION TO USE COROSYNC IPC
Is the same as in the configuration file. eg: to add UID 500 use
.br
# corosync-cmapctl -s uidgid.uid.500 u8 1
GID is similar, so to add a GID use
.br
# corosync-cmapctl -s uidgid.gid.500 u8 1
For removal of permissions, simply delete the key
.br
# corosync-cmapctl -d uidgid.gid.500
.SH "SEE ALSO"
.BR corosync_overview (7),
.BR corosync.conf (5),
.BR corosync-cmapctl (8)
diff --git a/tools/corosync-cmapctl.c b/tools/corosync-cmapctl.c
index a4b61bd5..ffca7e1b 100644
--- a/tools/corosync-cmapctl.c
+++ b/tools/corosync-cmapctl.c
@@ -1,980 +1,981 @@
/*
* Copyright (c) 2011-2012 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Jan Friesse (jfriesse@redhat.com)
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the Red Hat, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include
#include
#include
#include
#include
#include
#include "../lib/util.h"
#ifndef INFTIM
#define INFTIM -1
#endif
#define MAX_TRY_AGAIN 10
enum user_action {
ACTION_GET,
ACTION_SET,
ACTION_DELETE,
ACTION_DELETE_PREFIX,
ACTION_PRINT_PREFIX,
ACTION_TRACK,
ACTION_LOAD,
ACTION_CLEARSTATS,
};
struct name_to_type_item {
const char *name;
cmap_value_types_t type;
};
struct name_to_type_item name_to_type[] = {
{"i8", CMAP_VALUETYPE_INT8},
{"u8", CMAP_VALUETYPE_UINT8},
{"i16", CMAP_VALUETYPE_INT16},
{"u16", CMAP_VALUETYPE_UINT16},
{"i32", CMAP_VALUETYPE_INT32},
{"u32", CMAP_VALUETYPE_UINT32},
{"i64", CMAP_VALUETYPE_INT64},
{"u64", CMAP_VALUETYPE_UINT64},
{"flt", CMAP_VALUETYPE_FLOAT},
{"dbl", CMAP_VALUETYPE_DOUBLE},
{"str", CMAP_VALUETYPE_STRING},
{"bin", CMAP_VALUETYPE_BINARY}};
int show_binary = 0;
int quiet = 0;
static int convert_name_to_type(const char *name)
{
int i;
for (i = 0; i < sizeof(name_to_type) / sizeof(*name_to_type); i++) {
if (strcmp(name, name_to_type[i].name) == 0) {
return (name_to_type[i].type);
}
}
return (-1);
}
static int print_help(void)
{
printf("\n");
printf("usage: corosync-cmapctl [-b] [-DdghsqTCt] [-p filename] [-m map] [params...]\n");
printf("\n");
printf(" -b show binary values\n");
printf("\n");
printf(" -m select map to use\n");
printf(" The default map is 'icmap' which contains configuration information and some runtime variables used by corosync. \n");
printf(" A 'stats' map is also available which displays network statistics - in great detail when knet is used as the transport.\n");
printf("Set key:\n");
printf(" corosync-cmapctl -s key_name type value\n");
printf("\n");
printf(" where type is one of ([i|u][8|16|32|64] | flt | dbl | str | bin)\n");
printf(" for bin, value is file name (or - for stdin)\n");
printf("\n");
printf(" map can be either 'icmap' (the default) which contains corosync\n");
printf(" configuration information, or 'stats' which contains statistics\n");
printf(" about the networking and IPC traffic in some detail.\n");
printf("\n");
printf("Clear stats:\n");
- printf(" corosync-cmapctl -C [knet|ipc|totem|all]\n");
+ printf(" corosync-cmapctl -C [knet|ipc|totem|schedmiss|all]\n");
printf(" The 'stats' map is implied\n");
printf("\n");
printf("Load settings from a file:\n");
printf(" corosync-cmapctl -p filename\n");
printf("\n");
printf(" the format of the file is:\n");
printf(" [^[^]][ ]\n");
printf(" Keys prefixed with single caret ('^') are deleted (see -d).\n");
printf(" Keys (actually prefixes) prefixed with double caret ('^^') are deleted by prefix (see -D).\n");
printf(" and are optional (not checked) in above cases.\n");
printf(" Other keys are set (see -s) so both and are required.\n");
printf("\n");
printf("Delete key:\n");
printf(" corosync-cmapctl -d key_name...\n");
printf("\n");
printf("Delete multiple keys with prefix:\n");
printf(" corosync-cmapctl -D key_prefix...\n");
printf("\n");
printf("Get key:\n");
printf(" corosync-cmapctl [-b] -g key_name...\n");
printf("\n");
printf("Quiet mode:\n");
printf(" corosync-cmapctl [-b] -q -g key_name...\n");
printf("\n");
printf("Display all keys:\n");
printf(" corosync-cmapctl [-b]\n");
printf("\n");
printf("Display keys with prefix key_name:\n");
printf(" corosync-cmapctl [-b] key_name...\n");
printf("\n");
printf("Track changes on keys with key_name:\n");
printf(" corosync-cmapctl [-b] -t key_name\n");
printf("\n");
printf("Track changes on keys with key prefix:\n");
printf(" corosync-cmapctl [-b] -T key_prefix\n");
printf("\n");
return (0);
}
static void print_binary_key (char *value, size_t value_len)
{
size_t i;
char c;
for (i = 0; i < value_len; i++) {
c = value[i];
if (c >= ' ' && c < 0x7f && c != '\\') {
fputc (c, stdout);
} else {
if (c == '\\') {
printf ("\\\\");
} else {
printf ("\\x%02X", c);
}
}
}
}
static void print_key(cmap_handle_t handle,
const char *key_name,
size_t value_len,
const void *value,
cmap_value_types_t type)
{
char *str;
char *bin_value = NULL;
cs_error_t err;
int8_t i8;
uint8_t u8;
int16_t i16;
uint16_t u16;
int32_t i32;
uint32_t u32;
int64_t i64;
uint64_t u64;
float flt;
double dbl;
int end_loop;
int no_retries;
size_t bin_value_len;
end_loop = 0;
no_retries = 0;
err = CS_OK;
while (!end_loop) {
switch (type) {
case CMAP_VALUETYPE_INT8:
if (value == NULL) {
err = cmap_get_int8(handle, key_name, &i8);
} else {
i8 = *((int8_t *)value);
}
break;
case CMAP_VALUETYPE_INT16:
if (value == NULL) {
err = cmap_get_int16(handle, key_name, &i16);
} else {
i16 = *((int16_t *)value);
}
break;
case CMAP_VALUETYPE_INT32:
if (value == NULL) {
err = cmap_get_int32(handle, key_name, &i32);
} else {
i32 = *((int32_t *)value);
}
break;
case CMAP_VALUETYPE_INT64:
if (value == NULL) {
err = cmap_get_int64(handle, key_name, &i64);
} else {
i64 = *((int64_t *)value);
}
break;
case CMAP_VALUETYPE_UINT8:
if (value == NULL) {
err = cmap_get_uint8(handle, key_name, &u8);
} else {
u8 = *((uint8_t *)value);
}
break;
case CMAP_VALUETYPE_UINT16:
if (value == NULL) {
err = cmap_get_uint16(handle, key_name, &u16);
} else {
u16 = *((uint16_t *)value);
}
break;
case CMAP_VALUETYPE_UINT32:
if (value == NULL) {
err = cmap_get_uint32(handle, key_name, &u32);
} else {
u32 = *((uint32_t *)value);
}
break;
case CMAP_VALUETYPE_UINT64:
if (value == NULL) {
err = cmap_get_uint64(handle, key_name, &u64);
} else {
u64 = *((uint64_t *)value);
}
break;
case CMAP_VALUETYPE_FLOAT:
if (value == NULL) {
err = cmap_get_float(handle, key_name, &flt);
} else {
flt = *((float *)value);
}
break;
case CMAP_VALUETYPE_DOUBLE:
if (value == NULL) {
err = cmap_get_double(handle, key_name, &dbl);
} else {
dbl = *((double *)value);
}
break;
case CMAP_VALUETYPE_STRING:
if (value == NULL) {
err = cmap_get_string(handle, key_name, &str);
} else {
str = (char *)value;
}
break;
case CMAP_VALUETYPE_BINARY:
if (show_binary) {
if (value == NULL) {
bin_value = malloc(value_len);
if (bin_value == NULL) {
fprintf(stderr, "Can't alloc memory\n");
exit(EXIT_FAILURE);
}
bin_value_len = value_len;
err = cmap_get(handle, key_name, bin_value, &bin_value_len, NULL);
} else {
bin_value = (char *)value;
}
}
break;
}
if (err == CS_OK) {
end_loop = 1;
} else if (err == CS_ERR_TRY_AGAIN) {
sleep(1);
no_retries++;
if (no_retries > MAX_TRY_AGAIN) {
end_loop = 1;
}
} else {
end_loop = 1;
}
};
if (err != CS_OK) {
fprintf(stderr, "Can't get value of %s. Error %s\n", key_name, cs_strerror(err));
/*
* bin_value was newly allocated
*/
if (bin_value != NULL && value == NULL) {
free(bin_value);
}
return ;
}
if (!quiet)
printf("%s (", key_name);
switch (type) {
case CMAP_VALUETYPE_INT8:
if (!quiet)
printf("%s) = %"PRId8, "i8", i8);
else
printf("%"PRId8, i8);
break;
case CMAP_VALUETYPE_UINT8:
if (!quiet)
printf("%s) = %"PRIu8, "u8", u8);
else
printf("%"PRIu8, u8);
break;
case CMAP_VALUETYPE_INT16:
if (!quiet)
printf("%s) = %"PRId16, "i16", i16);
else
printf("%"PRId16, i16);
break;
case CMAP_VALUETYPE_UINT16:
if (!quiet)
printf("%s) = %"PRIu16, "u16", u16);
else
printf("%"PRIu16, u16);
break;
case CMAP_VALUETYPE_INT32:
if (!quiet)
printf("%s) = %"PRId32, "i32", i32);
else
printf("%"PRId32, i32);
break;
case CMAP_VALUETYPE_UINT32:
if (!quiet)
printf("%s) = %"PRIu32, "u32", u32);
else
printf("%"PRIu32, u32);
break;
case CMAP_VALUETYPE_INT64:
if (!quiet)
printf("%s) = %"PRId64, "i64", i64);
else
printf("%"PRId64, i64);
break;
case CMAP_VALUETYPE_UINT64:
if (!quiet)
printf("%s) = %"PRIu64, "u64", u64);
else
printf("%"PRIu64, u64);
break;
case CMAP_VALUETYPE_FLOAT:
if (!quiet)
printf("%s) = %f", "flt", flt);
else
printf("%f", flt);
break;
case CMAP_VALUETYPE_DOUBLE:
if (!quiet)
printf("%s) = %lf", "dbl", dbl);
else
printf("%lf", dbl);
break;
case CMAP_VALUETYPE_STRING:
if (!quiet)
printf("%s) = %s", "str", str);
else
printf("%s", str);
if (value == NULL) {
free(str);
}
break;
case CMAP_VALUETYPE_BINARY:
printf("%s)", "bin");
if (show_binary) {
printf(" = ");
if (bin_value) {
print_binary_key(bin_value, value_len);
if (value == NULL) {
free(bin_value);
}
} else {
printf("*empty*");
}
}
break;
}
printf("\n");
}
static void print_iter(cmap_handle_t handle, const char *prefix)
{
cmap_iter_handle_t iter_handle;
char key_name[CMAP_KEYNAME_MAXLEN + 1];
size_t value_len;
cmap_value_types_t type;
cs_error_t err;
err = cmap_iter_init(handle, prefix, &iter_handle);
if (err != CS_OK) {
fprintf (stderr, "Failed to initialize iteration. Error %s\n", cs_strerror(err));
exit (EXIT_FAILURE);
}
while ((err = cmap_iter_next(handle, iter_handle, key_name, &value_len, &type)) == CS_OK) {
print_key(handle, key_name, value_len, NULL, type);
}
cmap_iter_finalize(handle, iter_handle);
}
static void delete_with_prefix(cmap_handle_t handle, const char *prefix)
{
cmap_iter_handle_t iter_handle;
char key_name[CMAP_KEYNAME_MAXLEN + 1];
size_t value_len;
cmap_value_types_t type;
cs_error_t err;
cs_error_t err2;
err = cmap_iter_init(handle, prefix, &iter_handle);
if (err != CS_OK) {
fprintf (stderr, "Failed to initialize iteration. Error %s\n", cs_strerror(err));
exit (EXIT_FAILURE);
}
while ((err = cmap_iter_next(handle, iter_handle, key_name, &value_len, &type)) == CS_OK) {
err2 = cmap_delete(handle, key_name);
if (err2 != CS_OK) {
fprintf(stderr, "Can't delete key %s. Error %s\n", key_name, cs_strerror(err2));
}
}
cmap_iter_finalize(handle, iter_handle);
}
static void cmap_notify_fn(
cmap_handle_t cmap_handle,
cmap_track_handle_t cmap_track_handle,
int32_t event,
const char *key_name,
struct cmap_notify_value new_val,
struct cmap_notify_value old_val,
void *user_data)
{
switch (event) {
case CMAP_TRACK_ADD:
printf("create> ");
print_key(cmap_handle, key_name, new_val.len, new_val.data, new_val.type);
break;
case CMAP_TRACK_DELETE:
printf("delete> ");
print_key(cmap_handle, key_name, old_val.len, old_val.data, old_val.type);
break;
case CMAP_TRACK_MODIFY:
printf("modify> ");
print_key(cmap_handle, key_name, new_val.len, new_val.data, new_val.type);
break;
default:
printf("unknown change> ");
break;
}
}
static void add_track(cmap_handle_t handle, const char *key_name, int prefix)
{
cmap_track_handle_t track_handle;
int32_t track_type;
cs_error_t err;
track_type = CMAP_TRACK_ADD | CMAP_TRACK_DELETE | CMAP_TRACK_MODIFY;
if (prefix) {
track_type |= CMAP_TRACK_PREFIX;
}
err = cmap_track_add(handle, key_name, track_type, cmap_notify_fn, NULL, &track_handle);
if (err != CS_OK) {
fprintf(stderr, "Failed to add tracking function. Error %s\n", cs_strerror(err));
exit (EXIT_FAILURE);
}
}
static void track_changes(cmap_handle_t handle)
{
struct pollfd pfd[2];
int cmap_fd;
cs_error_t err;
int poll_res;
char inbuf[3];
int quit = CS_FALSE;
err = cmap_fd_get(handle, &cmap_fd);
if (err != CS_OK) {
fprintf(stderr, "Failed to get file handle. Error %s\n", cs_strerror(err));
exit (EXIT_FAILURE);
}
pfd[0].fd = cmap_fd;
pfd[1].fd = STDIN_FILENO;
pfd[0].events = pfd[1].events = POLLIN;
printf("Type \"q\" to finish\n");
do {
pfd[0].revents = pfd[1].revents = 0;
poll_res = poll(pfd, 2, INFTIM);
if (poll_res == -1) {
perror("poll");
}
if (pfd[1].revents & POLLIN) {
if (fgets(inbuf, sizeof(inbuf), stdin) == NULL) {
quit = CS_TRUE;
} else if (strncmp(inbuf, "q", 1) == 0) {
quit = CS_TRUE;
}
}
if (pfd[0].revents & POLLIN) {
err = cmap_dispatch(handle, CS_DISPATCH_ALL);
if (err != CS_OK) {
fprintf(stderr, "Dispatch error %s\n", cs_strerror(err));
quit = CS_TRUE;
}
}
} while (poll_res > 0 && !quit);
}
static cs_error_t set_key_bin(cmap_handle_t handle, const char *key_name, const char *fname)
{
FILE *f;
char *val;
char buf[4096];
size_t size;
size_t readed;
size_t pos;
cs_error_t err;
if (strcmp(fname, "-") == 0) {
f = stdin;
} else {
f = fopen(fname, "rb");
if (f == NULL) {
perror("Can't open input file");
exit(EXIT_FAILURE);
}
}
val = NULL;
size = 0;
pos = 0;
while ((readed = fread(buf, 1, sizeof(buf), f)) != 0) {
size += readed;
if ((val = realloc(val, size)) == NULL) {
fprintf(stderr, "Can't alloc memory\n");
exit (EXIT_FAILURE);
}
memcpy(val + pos, buf, readed);
pos += readed;
}
if (f != stdin) {
fclose(f);
}
err = cmap_set(handle, key_name, val, size, CMAP_VALUETYPE_BINARY);
free(val);
return (err);
}
static void set_key(cmap_handle_t handle, const char *key_name, const char *key_type_s, const char *key_value_s)
{
int64_t i64;
uint64_t u64;
double dbl;
float flt;
cs_error_t err = CS_OK;
int scanf_res = 0;
cmap_value_types_t type;
if (convert_name_to_type(key_type_s) == -1) {
fprintf(stderr, "Unknown type %s\n", key_type_s);
exit (EXIT_FAILURE);
}
type = convert_name_to_type(key_type_s);
switch (type) {
case CMAP_VALUETYPE_INT8:
case CMAP_VALUETYPE_INT16:
case CMAP_VALUETYPE_INT32:
case CMAP_VALUETYPE_INT64:
scanf_res = sscanf(key_value_s, "%"PRId64, &i64);
break;
case CMAP_VALUETYPE_UINT8:
case CMAP_VALUETYPE_UINT16:
case CMAP_VALUETYPE_UINT32:
case CMAP_VALUETYPE_UINT64:
scanf_res = sscanf(key_value_s, "%"PRIu64, &u64);
break;
case CMAP_VALUETYPE_FLOAT:
scanf_res = sscanf(key_value_s, "%f", &flt);
break;
case CMAP_VALUETYPE_DOUBLE:
scanf_res = sscanf(key_value_s, "%lf", &dbl);
break;
case CMAP_VALUETYPE_STRING:
case CMAP_VALUETYPE_BINARY:
/*
* Do nothing
*/
scanf_res = 1;
break;
}
if (scanf_res != 1) {
fprintf(stderr, "%s is not valid %s type value\n", key_value_s, key_type_s);
exit(EXIT_FAILURE);
}
/*
* We have parsed value, so insert value
*/
switch (type) {
case CMAP_VALUETYPE_INT8:
if (i64 > INT8_MAX || i64 < INT8_MIN) {
fprintf(stderr, "%s is not valid i8 integer\n", key_value_s);
exit(EXIT_FAILURE);
}
err = cmap_set_int8(handle, key_name, i64);
break;
case CMAP_VALUETYPE_INT16:
if (i64 > INT16_MAX || i64 < INT16_MIN) {
fprintf(stderr, "%s is not valid i16 integer\n", key_value_s);
exit(EXIT_FAILURE);
}
err = cmap_set_int16(handle, key_name, i64);
break;
case CMAP_VALUETYPE_INT32:
if (i64 > INT32_MAX || i64 < INT32_MIN) {
fprintf(stderr, "%s is not valid i32 integer\n", key_value_s);
exit(EXIT_FAILURE);
}
err = cmap_set_int32(handle, key_name, i64);
break;
case CMAP_VALUETYPE_INT64:
err = cmap_set_int64(handle, key_name, i64);
break;
case CMAP_VALUETYPE_UINT8:
if (u64 > UINT8_MAX) {
fprintf(stderr, "%s is not valid u8 integer\n", key_value_s);
exit(EXIT_FAILURE);
}
err = cmap_set_uint8(handle, key_name, u64);
break;
case CMAP_VALUETYPE_UINT16:
if (u64 > UINT16_MAX) {
fprintf(stderr, "%s is not valid u16 integer\n", key_value_s);
exit(EXIT_FAILURE);
}
err = cmap_set_uint16(handle, key_name, u64);
break;
case CMAP_VALUETYPE_UINT32:
if (u64 > UINT32_MAX) {
fprintf(stderr, "%s is not valid u32 integer\n", key_value_s);
exit(EXIT_FAILURE);
}
err = cmap_set_uint32(handle, key_name, u64);
break;
case CMAP_VALUETYPE_UINT64:
err = cmap_set_uint64(handle, key_name, u64);
break;
case CMAP_VALUETYPE_FLOAT:
err = cmap_set_float(handle, key_name, flt);
break;
case CMAP_VALUETYPE_DOUBLE:
err = cmap_set_double(handle, key_name, dbl);
break;
case CMAP_VALUETYPE_STRING:
err = cmap_set_string(handle, key_name, key_value_s);
break;
case CMAP_VALUETYPE_BINARY:
err = set_key_bin(handle, key_name, key_value_s);
break;
}
if (err != CS_OK) {
fprintf (stderr, "Failed to set key %s. Error %s\n", key_name, cs_strerror(err));
exit (EXIT_FAILURE);
}
}
static void read_in_config_file(cmap_handle_t handle, char * filename)
{
int ignore;
int c;
FILE* fh;
char buf[1024];
char * line;
char *key_name;
char *key_type_s;
char *key_value_s;
fh = fopen(filename, "r");
if (fh == NULL) {
perror ("Couldn't open file.");
return;
}
while (fgets (buf, 1024, fh) != NULL) {
/* find the first real character, if it is
* a '#' then ignore this line.
* else process.
* if no real characters then also ignore.
*/
ignore = 1;
for (c = 0; c < 1024; c++) {
if (isblank (buf[c])) {
continue;
}
if (buf[c] == '#' || buf[c] == '\n') {
ignore = 1;
break;
}
ignore = 0;
line = &buf[c];
break;
}
if (ignore == 1) {
continue;
}
/*
* should be:
* [^[^]][ ]
*/
key_name = strtok(line, " \n");
if (key_name && *key_name == '^') {
key_name++;
if (*key_name == '^') {
key_name++;
delete_with_prefix(handle, key_name);
} else {
cs_error_t err;
err = cmap_delete(handle, key_name);
if (err != CS_OK) {
fprintf(stderr, "Can't delete key %s. Error %s\n", key_name, cs_strerror(err));
}
}
} else {
key_type_s = strtok(NULL, " \n");
key_value_s = strtok(NULL, " \n");
set_key(handle, key_name, key_type_s, key_value_s);
}
}
fclose (fh);
}
static void clear_stats(cmap_handle_t handle, char *clear_opt)
{
char key_name[CMAP_KEYNAME_MAXLEN + 1];
sprintf(key_name, "stats.clear.%s", clear_opt);
cmap_set_uint32(handle, key_name, 1);
}
int main(int argc, char *argv[])
{
enum user_action action;
int c;
cs_error_t err;
cmap_handle_t handle;
int i;
size_t value_len;
cmap_value_types_t type;
cmap_map_t map = CMAP_MAP_DEFAULT;
int track_prefix;
int map_set = 0;
int no_retries;
char * clear_opt = NULL;
char * settings_file = NULL;
action = ACTION_PRINT_PREFIX;
track_prefix = 1;
while ((c = getopt(argc, argv, "m:hqgsdDtTbp:C:")) != -1) {
switch (c) {
case 'h':
return print_help();
break;
case 'b':
show_binary++;
break;
case 'q':
quiet = 1;
break;
case 'g':
action = ACTION_GET;
break;
case 's':
action = ACTION_SET;
break;
case 'd':
action = ACTION_DELETE;
break;
case 'D':
action = ACTION_DELETE_PREFIX;
break;
case 'p':
settings_file = optarg;
action = ACTION_LOAD;
break;
case 'C':
if (strcmp(optarg, "knet") == 0 ||
strcmp(optarg, "totem") == 0 ||
strcmp(optarg, "ipc") == 0 ||
+ strcmp(optarg, "schedmiss") == 0 ||
strcmp(optarg, "all") == 0) {
action = ACTION_CLEARSTATS;
clear_opt = optarg;
/* Force the map to be STATS */
map = CMAP_MAP_STATS;
}
else {
- fprintf(stderr, "argument to -C should be 'knet', 'totem', 'ipc' or 'all'\n");
+ fprintf(stderr, "argument to -C should be 'knet', 'totem', 'ipc', 'schedmiss' or 'all'\n");
return (EXIT_FAILURE);
}
break;
case 't':
action = ACTION_TRACK;
track_prefix = 0;
break;
case 'T':
action = ACTION_TRACK;
break;
case 'm':
if (strcmp(optarg, "icmap") == 0 ||
strcmp(optarg, "default") == 0) {
map = CMAP_MAP_ICMAP;
map_set = 1;
}
if (strcmp(optarg, "stats") == 0) {
map = CMAP_MAP_STATS;
map_set = 1;
}
if (!map_set) {
fprintf(stderr, "invalid map name, must be 'default', 'icmap' or 'stats'\n");
return (EXIT_FAILURE);
}
break;
case '?':
return (EXIT_FAILURE);
break;
default:
action = ACTION_PRINT_PREFIX;
break;
}
}
argc -= optind;
argv += optind;
if (argc == 0 &&
action != ACTION_LOAD &&
action != ACTION_CLEARSTATS &&
action != ACTION_PRINT_PREFIX) {
fprintf(stderr, "Expected key after options\n");
return (EXIT_FAILURE);
}
no_retries = 0;
while ((err = cmap_initialize_map(&handle, map)) == CS_ERR_TRY_AGAIN && no_retries++ < MAX_TRY_AGAIN) {
sleep(1);
}
if (err != CS_OK) {
fprintf (stderr, "Failed to initialize the cmap API. Error %s\n", cs_strerror(err));
exit (EXIT_FAILURE);
}
switch (action) {
case ACTION_PRINT_PREFIX:
if (argc == 0) {
print_iter(handle, NULL);
} else {
for (i = 0; i < argc; i++) {
print_iter(handle, argv[i]);
}
}
break;
case ACTION_GET:
for (i = 0; i < argc; i++) {
err = cmap_get(handle, argv[i], NULL, &value_len, &type);
if (err == CS_OK) {
print_key(handle, argv[i], value_len, NULL, type);
} else {
fprintf(stderr, "Can't get key %s. Error %s\n", argv[i], cs_strerror(err));
}
}
break;
case ACTION_DELETE:
for (i = 0; i < argc; i++) {
err = cmap_delete(handle, argv[i]);
if (err != CS_OK) {
fprintf(stderr, "Can't delete key %s. Error %s\n", argv[i], cs_strerror(err));
}
}
break;
case ACTION_DELETE_PREFIX:
for (i = 0; i < argc; i++) {
delete_with_prefix(handle, argv[i]);
}
break;
case ACTION_LOAD:
read_in_config_file(handle, settings_file);
break;
case ACTION_TRACK:
for (i = 0; i < argc; i++) {
add_track(handle, argv[i], track_prefix);
}
track_changes(handle);
break;
case ACTION_SET:
if (argc < 3) {
fprintf(stderr, "At least 3 parameters are expected for set\n");
return (EXIT_FAILURE);
}
set_key(handle, argv[0], argv[1], argv[2]);
break;
case ACTION_CLEARSTATS:
clear_stats(handle, clear_opt);
break;
}
err = cmap_finalize(handle);
if (err != CS_OK) {
fprintf (stderr, "Failed to finalize the cmap API. Error %s\n", cs_strerror(err));
exit (EXIT_FAILURE);
}
return (0);
}