diff --git a/exec/main.c b/exec/main.c index 7a471a16..fb0486e7 100644 --- a/exec/main.c +++ b/exec/main.c @@ -1,1588 +1,1590 @@ /* * Copyright (c) 2002-2006 MontaVista Software, Inc. * Copyright (c) 2006-2018 Red Hat, Inc. * * All rights reserved. * * Author: Steven Dake (sdake@redhat.com) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ /** * \mainpage Corosync * * This is the doxygen generated developer documentation for the Corosync * project. For more information about Corosync, please see the project * web site, corosync.org. * * \section license License * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef HAVE_LIBSYSTEMD #include #endif #include #include #include #include #include #include #include #include #include #include #include #include "quorum.h" #include "totemsrp.h" #include "logconfig.h" #include "totemconfig.h" #include "main.h" #include "sync.h" #include "timer.h" #include "util.h" #include "apidef.h" #include "service.h" #include "schedwrk.h" #include "ipcs_stats.h" #include "stats.h" #ifdef HAVE_SMALL_MEMORY_FOOTPRINT #define IPC_LOGSYS_SIZE 1024*64 #else #define IPC_LOGSYS_SIZE 8192*128 #endif /* * LibQB adds default "*" syslog filter so we have to set syslog_priority as low * as possible so filters applied later in _logsys_config_apply_per_file takes * effect. */ LOGSYS_DECLARE_SYSTEM ("corosync", LOGSYS_MODE_OUTPUT_STDERR | LOGSYS_MODE_OUTPUT_SYSLOG, LOG_DAEMON, LOG_EMERG); LOGSYS_DECLARE_SUBSYS ("MAIN"); #define SERVER_BACKLOG 5 static int sched_priority = 0; static unsigned int service_count = 32; static struct totem_logging_configuration totem_logging_configuration; static struct corosync_api_v1 *api = NULL; static int sync_in_process = 1; static qb_loop_t *corosync_poll_handle; struct sched_param global_sched_param; static corosync_timer_handle_t corosync_stats_timer_handle; static const char *corosync_lock_file = LOCALSTATEDIR"/run/corosync.pid"; static char corosync_config_file[PATH_MAX + 1] = COROSYSCONFDIR "/corosync.conf"; qb_loop_t *cs_poll_handle_get (void) { return (corosync_poll_handle); } int cs_poll_dispatch_add (qb_loop_t * handle, int fd, int events, void *data, int (*dispatch_fn) (int fd, int revents, void *data)) { return qb_loop_poll_add(handle, QB_LOOP_MED, fd, events, data, dispatch_fn); } int cs_poll_dispatch_delete(qb_loop_t * handle, int fd) { return qb_loop_poll_del(handle, fd); } void corosync_state_dump (void) { int i; for (i = 0; i < SERVICES_COUNT_MAX; i++) { if (corosync_service[i] && corosync_service[i]->exec_dump_fn) { corosync_service[i]->exec_dump_fn (); } } } const char *corosync_get_config_file(void) { return (corosync_config_file); } static void corosync_blackbox_write_to_file (void) { char fname[PATH_MAX]; char fdata_fname[PATH_MAX]; char time_str[PATH_MAX]; struct tm cur_time_tm; time_t cur_time_t; ssize_t res; cur_time_t = time(NULL); localtime_r(&cur_time_t, &cur_time_tm); strftime(time_str, PATH_MAX, "%Y-%m-%dT%H:%M:%S", &cur_time_tm); if (snprintf(fname, PATH_MAX, "%s/fdata-%s-%lld", get_state_dir(), time_str, (long long int)getpid()) >= PATH_MAX) { log_printf(LOGSYS_LEVEL_ERROR, "Can't snprintf blackbox file name"); return ; } if ((res = qb_log_blackbox_write_to_file(fname)) < 0) { LOGSYS_PERROR(-res, LOGSYS_LEVEL_ERROR, "Can't store blackbox file"); return ; } snprintf(fdata_fname, sizeof(fdata_fname), "%s/fdata", get_state_dir()); unlink(fdata_fname); if (symlink(fname, fdata_fname) == -1) { log_printf(LOGSYS_LEVEL_ERROR, "Can't create symlink to '%s' for corosync blackbox file '%s'", fname, fdata_fname); } } static void unlink_all_completed (void) { api->timer_delete (corosync_stats_timer_handle); qb_loop_stop (corosync_poll_handle); icmap_fini(); } void corosync_shutdown_request (void) { corosync_service_unlink_all (api, unlink_all_completed); } static int32_t sig_diag_handler (int num, void *data) { corosync_state_dump (); return 0; } static int32_t sig_exit_handler (int num, void *data) { log_printf(LOGSYS_LEVEL_NOTICE, "Node was shut down by a signal"); corosync_service_unlink_all (api, unlink_all_completed); return 0; } static void sigsegv_handler (int num) { (void)signal (num, SIG_DFL); corosync_blackbox_write_to_file (); qb_log_fini(); raise (num); } #define LOCALHOST_IP inet_addr("127.0.0.1") static void *corosync_group_handle; static struct totempg_group corosync_group = { .group = "a", .group_len = 1 }; static void serialize_lock (void) { } static void serialize_unlock (void) { } static void corosync_sync_completed (void) { log_printf (LOGSYS_LEVEL_NOTICE, "Completed service synchronization, ready to provide service."); sync_in_process = 0; cs_ipcs_sync_state_changed(sync_in_process); cs_ipc_allow_connections(1); /* * Inform totem to start using new message queue again */ totempg_trans_ack(); #ifdef HAVE_LIBSYSTEMD sd_notify (0, "READY=1"); #endif } static int corosync_sync_callbacks_retrieve ( int service_id, struct sync_callbacks *callbacks) { if (corosync_service[service_id] == NULL) { return (-1); } if (callbacks == NULL) { return (0); } callbacks->name = corosync_service[service_id]->name; callbacks->sync_init = corosync_service[service_id]->sync_init; callbacks->sync_process = corosync_service[service_id]->sync_process; callbacks->sync_activate = corosync_service[service_id]->sync_activate; callbacks->sync_abort = corosync_service[service_id]->sync_abort; return (0); } static struct memb_ring_id corosync_ring_id; static void member_object_joined (unsigned int nodeid) { char member_ip[ICMAP_KEYNAME_MAXLEN]; char member_join_count[ICMAP_KEYNAME_MAXLEN]; char member_status[ICMAP_KEYNAME_MAXLEN]; snprintf(member_ip, ICMAP_KEYNAME_MAXLEN, "runtime.members.%u.ip", nodeid); snprintf(member_join_count, ICMAP_KEYNAME_MAXLEN, "runtime.members.%u.join_count", nodeid); snprintf(member_status, ICMAP_KEYNAME_MAXLEN, "runtime.members.%u.status", nodeid); if (icmap_get(member_ip, NULL, NULL, NULL) == CS_OK) { icmap_inc(member_join_count); icmap_set_string(member_status, "joined"); } else { icmap_set_string(member_ip, (char*)api->totem_ifaces_print (nodeid)); icmap_set_uint32(member_join_count, 1); icmap_set_string(member_status, "joined"); } log_printf (LOGSYS_LEVEL_DEBUG, "Member joined: %s", api->totem_ifaces_print (nodeid)); } static void member_object_left (unsigned int nodeid) { char member_status[ICMAP_KEYNAME_MAXLEN]; snprintf(member_status, ICMAP_KEYNAME_MAXLEN, "runtime.members.%u.status", nodeid); icmap_set_string(member_status, "left"); log_printf (LOGSYS_LEVEL_DEBUG, "Member left: %s", api->totem_ifaces_print (nodeid)); } static void confchg_fn ( enum totem_configuration_type configuration_type, const unsigned int *member_list, size_t member_list_entries, const unsigned int *left_list, size_t left_list_entries, const unsigned int *joined_list, size_t joined_list_entries, const struct memb_ring_id *ring_id) { int i; int abort_activate = 0; if (sync_in_process == 1) { abort_activate = 1; } sync_in_process = 1; cs_ipcs_sync_state_changed(sync_in_process); memcpy (&corosync_ring_id, ring_id, sizeof (struct memb_ring_id)); for (i = 0; i < left_list_entries; i++) { member_object_left (left_list[i]); } for (i = 0; i < joined_list_entries; i++) { member_object_joined (joined_list[i]); } /* * Call configuration change for all services */ for (i = 0; i < service_count; i++) { if (corosync_service[i] && corosync_service[i]->confchg_fn) { corosync_service[i]->confchg_fn (configuration_type, member_list, member_list_entries, left_list, left_list_entries, joined_list, joined_list_entries, ring_id); } } if (abort_activate) { sync_abort (); } if (configuration_type == TOTEM_CONFIGURATION_TRANSITIONAL) { sync_save_transitional (member_list, member_list_entries, ring_id); } if (configuration_type == TOTEM_CONFIGURATION_REGULAR) { sync_start (member_list, member_list_entries, ring_id); } } static void priv_drop (void) { return; /* TODO: we are still not dropping privs */ } static void corosync_tty_detach (void) { int devnull; /* * Disconnect from TTY if this is not a debug run */ switch (fork ()) { case -1: corosync_exit_error (COROSYNC_DONE_FORK); break; case 0: /* * child which is disconnected, run this process */ break; default: exit (0); break; } /* Create new session */ (void)setsid(); /* * Map stdin/out/err to /dev/null. */ devnull = open("/dev/null", O_RDWR); if (devnull == -1) { corosync_exit_error (COROSYNC_DONE_STD_TO_NULL_REDIR); } if (dup2(devnull, 0) < 0 || dup2(devnull, 1) < 0 || dup2(devnull, 2) < 0) { close(devnull); corosync_exit_error (COROSYNC_DONE_STD_TO_NULL_REDIR); } close(devnull); } static void corosync_mlockall (void) { int res; struct rlimit rlimit; rlimit.rlim_cur = RLIM_INFINITY; rlimit.rlim_max = RLIM_INFINITY; #ifndef RLIMIT_MEMLOCK #define RLIMIT_MEMLOCK RLIMIT_VMEM #endif setrlimit (RLIMIT_MEMLOCK, &rlimit); res = mlockall (MCL_CURRENT | MCL_FUTURE); if (res == -1) { LOGSYS_PERROR (errno, LOGSYS_LEVEL_WARNING, "Could not lock memory of service to avoid page faults"); }; } static void corosync_totem_stats_updater (void *data) { totempg_stats_t * stats; uint32_t total_mtt_rx_token; uint32_t total_backlog_calc; uint32_t total_token_holdtime; int t, prev; int32_t token_count; const char *cstr; stats = api->totem_get_stats(); stats->srp->firewall_enabled_or_nic_failure = stats->srp->continuous_gather > MAX_NO_CONT_GATHER ? 1 : 0; if (stats->srp->continuous_gather > MAX_NO_CONT_GATHER || stats->srp->continuous_sendmsg_failures > MAX_NO_CONT_SENDMSG_FAILURES) { cstr = ""; if (stats->srp->continuous_sendmsg_failures > MAX_NO_CONT_SENDMSG_FAILURES) { cstr = "number of multicast sendmsg failures is above threshold"; } if (stats->srp->continuous_gather > MAX_NO_CONT_GATHER) { cstr = "totem is continuously in gather state"; } log_printf (LOGSYS_LEVEL_WARNING, "Totem is unable to form a cluster because of an " "operating system or network fault (reason: %s). The most common " "cause of this message is that the local firewall is " "configured improperly.", cstr); stats->srp->firewall_enabled_or_nic_failure = 1; } else { stats->srp->firewall_enabled_or_nic_failure = 0; } total_mtt_rx_token = 0; total_token_holdtime = 0; total_backlog_calc = 0; token_count = 0; t = stats->srp->latest_token; while (1) { if (t == 0) prev = TOTEM_TOKEN_STATS_MAX - 1; else prev = t - 1; if (prev == stats->srp->earliest_token) break; /* if tx == 0, then dropped token (not ours) */ if (stats->srp->token[t].tx != 0 || (stats->srp->token[t].rx - stats->srp->token[prev].rx) > 0 ) { total_mtt_rx_token += (stats->srp->token[t].rx - stats->srp->token[prev].rx); total_token_holdtime += (stats->srp->token[t].tx - stats->srp->token[t].rx); total_backlog_calc += stats->srp->token[t].backlog_calc; token_count++; } t = prev; } if (token_count) { stats->srp->mtt_rx_token = (total_mtt_rx_token / token_count); stats->srp->avg_token_workload = (total_token_holdtime / token_count); stats->srp->avg_backlog_calc = (total_backlog_calc / token_count); } stats->srp->time_since_token_last_received = qb_util_nano_current_get () / QB_TIME_NS_IN_MSEC - stats->srp->token[stats->srp->latest_token].rx; stats_trigger_trackers(); api->timer_add_duration (1500 * MILLI_2_NANO_SECONDS, NULL, corosync_totem_stats_updater, &corosync_stats_timer_handle); } static void corosync_totem_stats_init (void) { /* start stats timer */ api->timer_add_duration (1500 * MILLI_2_NANO_SECONDS, NULL, corosync_totem_stats_updater, &corosync_stats_timer_handle); } static void deliver_fn ( unsigned int nodeid, const void *msg, unsigned int msg_len, int endian_conversion_required) { const struct qb_ipc_request_header *header; int32_t service; int32_t fn_id; uint32_t id; header = msg; if (endian_conversion_required) { id = swab32 (header->id); } else { id = header->id; } /* * Call the proper executive handler */ service = id >> 16; fn_id = id & 0xffff; if (!corosync_service[service]) { return; } if (fn_id >= corosync_service[service]->exec_engine_count) { log_printf(LOGSYS_LEVEL_WARNING, "discarded unknown message %d for service %d (max id %d)", fn_id, service, corosync_service[service]->exec_engine_count); return; } icmap_fast_inc(service_stats_rx[service][fn_id]); if (endian_conversion_required) { assert(corosync_service[service]->exec_engine[fn_id].exec_endian_convert_fn != NULL); corosync_service[service]->exec_engine[fn_id].exec_endian_convert_fn ((void *)msg); } corosync_service[service]->exec_engine[fn_id].exec_handler_fn (msg, nodeid); } int main_mcast ( const struct iovec *iovec, unsigned int iov_len, unsigned int guarantee) { const struct qb_ipc_request_header *req = iovec->iov_base; int32_t service; int32_t fn_id; service = req->id >> 16; fn_id = req->id & 0xffff; if (corosync_service[service]) { icmap_fast_inc(service_stats_tx[service][fn_id]); } return (totempg_groups_mcast_joined (corosync_group_handle, iovec, iov_len, guarantee)); } static void corosync_ring_id_create_or_load ( struct memb_ring_id *memb_ring_id, unsigned int nodeid) { int fd; int res = 0; char filename[PATH_MAX]; snprintf (filename, sizeof(filename), "%s/ringid_%u", get_state_dir(), nodeid); fd = open (filename, O_RDONLY, 0700); /* * If file can be opened and read, read the ring id */ if (fd != -1) { res = read (fd, &memb_ring_id->seq, sizeof (uint64_t)); close (fd); } /* * If file could not be opened or read, create a new ring id */ if ((fd == -1) || (res != sizeof (uint64_t))) { memb_ring_id->seq = 0; umask(0); fd = open (filename, O_CREAT|O_RDWR, 0700); if (fd != -1) { res = write (fd, &memb_ring_id->seq, sizeof (uint64_t)); close (fd); if (res == -1) { LOGSYS_PERROR (errno, LOGSYS_LEVEL_ERROR, "Couldn't write ringid file '%s'", filename); corosync_exit_error (COROSYNC_DONE_STORE_RINGID); } } else { LOGSYS_PERROR (errno, LOGSYS_LEVEL_ERROR, "Couldn't create ringid file '%s'", filename); corosync_exit_error (COROSYNC_DONE_STORE_RINGID); } } memb_ring_id->rep = nodeid; } static void corosync_ring_id_store ( const struct memb_ring_id *memb_ring_id, unsigned int nodeid) { char filename[PATH_MAX]; int fd; int res; snprintf (filename, sizeof(filename), "%s/ringid_%u", get_state_dir(), nodeid); fd = open (filename, O_WRONLY, 0700); if (fd == -1) { fd = open (filename, O_CREAT|O_RDWR, 0700); } if (fd == -1) { LOGSYS_PERROR(errno, LOGSYS_LEVEL_ERROR, "Couldn't store new ring id " CS_PRI_RING_ID_SEQ " to stable storage", memb_ring_id->seq); corosync_exit_error (COROSYNC_DONE_STORE_RINGID); } log_printf (LOGSYS_LEVEL_DEBUG, "Storing new sequence id for ring " CS_PRI_RING_ID_SEQ, memb_ring_id->seq); res = write (fd, &memb_ring_id->seq, sizeof(memb_ring_id->seq)); close (fd); if (res != sizeof(memb_ring_id->seq)) { LOGSYS_PERROR(errno, LOGSYS_LEVEL_ERROR, "Couldn't store new ring id " CS_PRI_RING_ID_SEQ " to stable storage", memb_ring_id->seq); corosync_exit_error (COROSYNC_DONE_STORE_RINGID); } } static qb_loop_timer_handle recheck_the_q_level_timer; void corosync_recheck_the_q_level(void *data) { totempg_check_q_level(corosync_group_handle); if (cs_ipcs_q_level_get() == TOTEM_Q_LEVEL_CRITICAL) { qb_loop_timer_add(cs_poll_handle_get(), QB_LOOP_MED, 1*QB_TIME_NS_IN_MSEC, NULL, corosync_recheck_the_q_level, &recheck_the_q_level_timer); } } struct sending_allowed_private_data_struct { int reserved_msgs; }; int corosync_sending_allowed ( unsigned int service, unsigned int id, const void *msg, void *sending_allowed_private_data) { struct sending_allowed_private_data_struct *pd = (struct sending_allowed_private_data_struct *)sending_allowed_private_data; struct iovec reserve_iovec; struct qb_ipc_request_header *header = (struct qb_ipc_request_header *)msg; int sending_allowed; reserve_iovec.iov_base = (char *)header; reserve_iovec.iov_len = header->size; pd->reserved_msgs = totempg_groups_joined_reserve ( corosync_group_handle, &reserve_iovec, 1); if (pd->reserved_msgs == -1) { return -EINVAL; } /* Message ID out of range */ if (id >= corosync_service[service]->lib_engine_count) { return -EINVAL; } sending_allowed = QB_FALSE; if (corosync_quorum_is_quorate() == 1 || corosync_service[service]->allow_inquorate == CS_LIB_ALLOW_INQUORATE) { // we are quorate // now check flow control if (corosync_service[service]->lib_engine[id].flow_control == CS_LIB_FLOW_CONTROL_NOT_REQUIRED) { sending_allowed = QB_TRUE; } else if (pd->reserved_msgs && sync_in_process == 0) { sending_allowed = QB_TRUE; } else if (pd->reserved_msgs == 0) { return -ENOBUFS; } else /* (sync_in_process) */ { return -EINPROGRESS; } } else { return -EHOSTUNREACH; } return (sending_allowed); } void corosync_sending_allowed_release (void *sending_allowed_private_data) { struct sending_allowed_private_data_struct *pd = (struct sending_allowed_private_data_struct *)sending_allowed_private_data; if (pd->reserved_msgs == -1) { return; } totempg_groups_joined_release (pd->reserved_msgs); } int message_source_is_local (const mar_message_source_t *source) { int ret = 0; assert (source != NULL); if (source->nodeid == totempg_my_nodeid_get ()) { ret = 1; } return ret; } void message_source_set ( mar_message_source_t *source, void *conn) { assert ((source != NULL) && (conn != NULL)); memset (source, 0, sizeof (mar_message_source_t)); source->nodeid = totempg_my_nodeid_get (); source->conn = conn; } struct scheduler_pause_timeout_data { struct totem_config *totem_config; qb_loop_timer_handle handle; unsigned long long tv_prev; unsigned long long max_tv_diff; }; static void timer_function_scheduler_timeout (void *data) { struct scheduler_pause_timeout_data *timeout_data = (struct scheduler_pause_timeout_data *)data; unsigned long long tv_current; unsigned long long tv_diff; tv_current = qb_util_nano_current_get (); if (timeout_data->tv_prev == 0) { /* * Initial call -> just pretent everything is ok */ timeout_data->tv_prev = tv_current; timeout_data->max_tv_diff = 0; } tv_diff = tv_current - timeout_data->tv_prev; timeout_data->tv_prev = tv_current; if (tv_diff > timeout_data->max_tv_diff) { log_printf (LOGSYS_LEVEL_WARNING, "Corosync main process was not scheduled for %0.4f ms " "(threshold is %0.4f ms). Consider token timeout increase.", (float)tv_diff / QB_TIME_NS_IN_MSEC, (float)timeout_data->max_tv_diff / QB_TIME_NS_IN_MSEC); + + stats_add_schedmiss_event(tv_current / 1000, (float)tv_diff / QB_TIME_NS_IN_MSEC); } /* * Set next threshold, because token_timeout can change */ timeout_data->max_tv_diff = timeout_data->totem_config->token_timeout * QB_TIME_NS_IN_MSEC * 0.8; qb_loop_timer_add (corosync_poll_handle, QB_LOOP_MED, timeout_data->totem_config->token_timeout * QB_TIME_NS_IN_MSEC / 3, timeout_data, timer_function_scheduler_timeout, &timeout_data->handle); } static int corosync_set_rr_scheduler (void) { int ret_val = 0; #if defined(HAVE_PTHREAD_SETSCHEDPARAM) && defined(HAVE_SCHED_GET_PRIORITY_MAX) && defined(HAVE_SCHED_SETSCHEDULER) int res; sched_priority = sched_get_priority_max (SCHED_RR); if (sched_priority != -1) { global_sched_param.sched_priority = sched_priority; res = sched_setscheduler (0, SCHED_RR, &global_sched_param); if (res == -1) { LOGSYS_PERROR(errno, LOGSYS_LEVEL_WARNING, "Could not set SCHED_RR at priority %d", global_sched_param.sched_priority); global_sched_param.sched_priority = 0; #ifdef HAVE_QB_LOG_THREAD_PRIORITY_SET qb_log_thread_priority_set (SCHED_OTHER, 0); #endif ret_val = -1; } else { /* * Turn on SCHED_RR in logsys system */ #ifdef HAVE_QB_LOG_THREAD_PRIORITY_SET res = qb_log_thread_priority_set (SCHED_RR, sched_priority); #else res = -1; #endif if (res == -1) { log_printf (LOGSYS_LEVEL_ERROR, "Could not set logsys thread priority." " Can't continue because of priority inversions."); corosync_exit_error (COROSYNC_DONE_LOGSETUP); } } } else { LOGSYS_PERROR (errno, LOGSYS_LEVEL_WARNING, "Could not get maximum scheduler priority"); sched_priority = 0; ret_val = -1; } #else log_printf(LOGSYS_LEVEL_WARNING, "The Platform is missing process priority setting features. Leaving at default."); ret_val = -1; #endif return (ret_val); } /* The basename man page contains scary warnings about thread-safety and portability, hence this */ static const char *corosync_basename(const char *file_name) { char *base; base = strrchr (file_name, '/'); if (base) { return base + 1; } return file_name; } static void _logsys_log_printf(int level, int subsys, const char *function_name, const char *file_name, int file_line, const char *format, ...) __attribute__((format(printf, 6, 7))); static void _logsys_log_printf(int level, int subsys, const char *function_name, const char *file_name, int file_line, const char *format, ...) { va_list ap; va_start(ap, format); qb_log_from_external_source_va(function_name, corosync_basename(file_name), format, level, file_line, subsys, ap); va_end(ap); } static void fplay_key_change_notify_fn ( int32_t event, const char *key_name, struct icmap_notify_value new_val, struct icmap_notify_value old_val, void *user_data) { if (strcmp(key_name, "runtime.blackbox.dump_flight_data") == 0) { fprintf(stderr,"Writetofile\n"); corosync_blackbox_write_to_file (); } if (strcmp(key_name, "runtime.blackbox.dump_state") == 0) { fprintf(stderr,"statefump\n"); corosync_state_dump (); } } static void corosync_fplay_control_init (void) { icmap_track_t track = NULL; icmap_set_string("runtime.blackbox.dump_flight_data", "no"); icmap_set_string("runtime.blackbox.dump_state", "no"); icmap_track_add("runtime.blackbox.dump_flight_data", ICMAP_TRACK_ADD | ICMAP_TRACK_DELETE | ICMAP_TRACK_MODIFY, fplay_key_change_notify_fn, NULL, &track); icmap_track_add("runtime.blackbox.dump_state", ICMAP_TRACK_ADD | ICMAP_TRACK_DELETE | ICMAP_TRACK_MODIFY, fplay_key_change_notify_fn, NULL, &track); } static void force_gather_notify_fn( int32_t event, const char *key_name, struct icmap_notify_value new_val, struct icmap_notify_value old_val, void *user_data) { char *key_val; if (icmap_get_string(key_name, &key_val) == CS_OK && strcmp(key_val, "no") == 0) goto out; icmap_set_string("runtime.force_gather", "no"); if (strcmp(key_name, "runtime.force_gather") == 0) { log_printf(LOGSYS_LEVEL_ERROR, "Forcing into GATHER state\n"); totempg_force_gather(); } out: free(key_val); } static void corosync_force_gather_init (void) { icmap_track_t track = NULL; icmap_set_string("runtime.force_gather", "no"); icmap_track_add("runtime.force_gather", ICMAP_TRACK_ADD | ICMAP_TRACK_DELETE | ICMAP_TRACK_MODIFY, force_gather_notify_fn, NULL, &track); } /* * Set RO flag for keys, which ether doesn't make sense to change by user (statistic) * or which when changed are not reflected by runtime (totem.crypto_cipher, ...). * * Also some RO keys cannot be determined in this stage, so they are set later in * other functions (like nodelist.local_node_pos, ...) */ static void set_icmap_ro_keys_flag (void) { /* * Set RO flag for all keys of internal configuration and runtime statistics */ icmap_set_ro_access("internal_configuration.", CS_TRUE, CS_TRUE); icmap_set_ro_access("runtime.services.", CS_TRUE, CS_TRUE); icmap_set_ro_access("runtime.config.", CS_TRUE, CS_TRUE); icmap_set_ro_access("runtime.totem.", CS_TRUE, CS_TRUE); icmap_set_ro_access("uidgid.config.", CS_TRUE, CS_TRUE); icmap_set_ro_access("system.", CS_TRUE, CS_TRUE); icmap_set_ro_access("nodelist.", CS_TRUE, CS_TRUE); /* * Set RO flag for constrete keys of configuration which can't be changed * during runtime */ icmap_set_ro_access("totem.crypto_cipher", CS_FALSE, CS_TRUE); icmap_set_ro_access("totem.crypto_hash", CS_FALSE, CS_TRUE); icmap_set_ro_access("totem.keyfile", CS_FALSE, CS_TRUE); icmap_set_ro_access("totem.key", CS_FALSE, CS_TRUE); icmap_set_ro_access("totem.secauth", CS_FALSE, CS_TRUE); icmap_set_ro_access("totem.ip_version", CS_FALSE, CS_TRUE); icmap_set_ro_access("totem.rrp_mode", CS_FALSE, CS_TRUE); icmap_set_ro_access("totem.transport", CS_FALSE, CS_TRUE); icmap_set_ro_access("totem.cluster_name", CS_FALSE, CS_TRUE); icmap_set_ro_access("totem.netmtu", CS_FALSE, CS_TRUE); icmap_set_ro_access("totem.threads", CS_FALSE, CS_TRUE); icmap_set_ro_access("totem.version", CS_FALSE, CS_TRUE); icmap_set_ro_access("totem.nodeid", CS_FALSE, CS_TRUE); icmap_set_ro_access("totem.clear_node_high_bit", CS_FALSE, CS_TRUE); icmap_set_ro_access("config.reload_in_progress", CS_FALSE, CS_TRUE); icmap_set_ro_access("config.totemconfig_reload_in_progress", CS_FALSE, CS_TRUE); } static void main_service_ready (void) { int res; /* * This must occur after totempg is initialized because "this_ip" must be set */ res = corosync_service_defaults_link_and_init (api); if (res == -1) { log_printf (LOGSYS_LEVEL_ERROR, "Could not initialize default services"); corosync_exit_error (COROSYNC_DONE_INIT_SERVICES); } cs_ipcs_init(); corosync_totem_stats_init (); corosync_fplay_control_init (); corosync_force_gather_init (); sync_init ( corosync_sync_callbacks_retrieve, corosync_sync_completed); } static enum e_corosync_done corosync_flock (const char *lockfile, pid_t pid) { struct flock lock; enum e_corosync_done err; char pid_s[17]; int fd_flag; int lf; err = COROSYNC_DONE_EXIT; lf = open (lockfile, O_WRONLY | O_CREAT, 0640); if (lf == -1) { log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't create lock file."); return (COROSYNC_DONE_ACQUIRE_LOCK); } retry_fcntl: lock.l_type = F_WRLCK; lock.l_start = 0; lock.l_whence = SEEK_SET; lock.l_len = 0; if (fcntl (lf, F_SETLK, &lock) == -1) { switch (errno) { case EINTR: goto retry_fcntl; break; case EAGAIN: case EACCES: log_printf (LOGSYS_LEVEL_ERROR, "Another Corosync instance is already running."); err = COROSYNC_DONE_ALREADY_RUNNING; goto error_close; break; default: log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't acquire lock. Error was %s", strerror(errno)); err = COROSYNC_DONE_ACQUIRE_LOCK; goto error_close; break; } } if (ftruncate (lf, 0) == -1) { log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't truncate lock file. Error was %s", strerror (errno)); err = COROSYNC_DONE_ACQUIRE_LOCK; goto error_close_unlink; } memset (pid_s, 0, sizeof (pid_s)); snprintf (pid_s, sizeof (pid_s) - 1, "%u\n", pid); retry_write: if (write (lf, pid_s, strlen (pid_s)) != strlen (pid_s)) { if (errno == EINTR) { goto retry_write; } else { log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't write pid to lock file. " "Error was %s", strerror (errno)); err = COROSYNC_DONE_ACQUIRE_LOCK; goto error_close_unlink; } } if ((fd_flag = fcntl (lf, F_GETFD, 0)) == -1) { log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't get close-on-exec flag from lock file. " "Error was %s", strerror (errno)); err = COROSYNC_DONE_ACQUIRE_LOCK; goto error_close_unlink; } fd_flag |= FD_CLOEXEC; if (fcntl (lf, F_SETFD, fd_flag) == -1) { log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't set close-on-exec flag to lock file. " "Error was %s", strerror (errno)); err = COROSYNC_DONE_ACQUIRE_LOCK; goto error_close_unlink; } return (err); error_close_unlink: unlink (lockfile); error_close: close (lf); return (err); } static int corosync_move_to_root_cgroup(void) { FILE *f; int res = -1; /* * /sys/fs/cgroup is hardcoded, because most of Linux distributions are now * using systemd and systemd uses hardcoded path of cgroup mount point. * * This feature is expected to be removed as soon as systemd gets support * for managing RT configuration. */ f = fopen("/sys/fs/cgroup/cpu/cpu.rt_runtime_us", "rt"); if (f == NULL) { log_printf(LOGSYS_LEVEL_DEBUG, "cpu.rt_runtime_us doesn't exists -> " "system without cgroup or with disabled CONFIG_RT_GROUP_SCHED"); res = 0; goto exit_res; } (void)fclose(f); f = fopen("/sys/fs/cgroup/cpu/tasks", "w"); if (f == NULL) { log_printf(LOGSYS_LEVEL_WARNING, "Can't open cgroups tasks file for writing"); goto exit_res; } if (fprintf(f, "%jd\n", (intmax_t)getpid()) <= 0) { log_printf(LOGSYS_LEVEL_WARNING, "Can't write corosync pid into cgroups tasks file"); goto close_and_exit_res; } close_and_exit_res: if (fclose(f) != 0) { log_printf(LOGSYS_LEVEL_WARNING, "Can't close cgroups tasks file"); goto exit_res; } exit_res: return (res); } int main (int argc, char **argv, char **envp) { const char *error_string; struct totem_config totem_config; int res, ch; int background, sched_rr, prio, testonly, move_to_root_cgroup; struct stat stat_out; enum e_corosync_done flock_err; uint64_t totem_config_warnings; struct scheduler_pause_timeout_data scheduler_pause_timeout_data; long int tmpli; char *ep; char *tmp_str; int log_subsys_id_totem; /* default configuration */ background = 1; testonly = 0; while ((ch = getopt (argc, argv, "c:ftv")) != EOF) { switch (ch) { case 'c': res = snprintf(corosync_config_file, sizeof(corosync_config_file), "%s", optarg); if (res >= sizeof(corosync_config_file)) { fprintf (stderr, "Config file path too long.\n"); syslog (LOGSYS_LEVEL_ERROR, "Config file path too long."); logsys_system_fini(); return EXIT_FAILURE; } break; case 'f': background = 0; break; case 't': testonly = 1; break; case 'v': printf ("Corosync Cluster Engine, version '%s'\n", VERSION); printf ("Copyright (c) 2006-2018 Red Hat, Inc.\n"); logsys_system_fini(); return EXIT_SUCCESS; break; default: fprintf(stderr, \ "usage:\n"\ " -c : Corosync config file path.\n"\ " -f : Start application in foreground.\n"\ " -t : Test configuration and exit.\n"\ " -v : Display version and SVN revision of Corosync and exit.\n"); logsys_system_fini(); return EXIT_FAILURE; } } /* * Other signals are registered later via qb_loop_signal_add */ (void)signal (SIGSEGV, sigsegv_handler); (void)signal (SIGABRT, sigsegv_handler); #if MSG_NOSIGNAL != 0 (void)signal (SIGPIPE, SIG_IGN); #endif if (icmap_init() != CS_OK) { fprintf (stderr, "Corosync Executive couldn't initialize configuration component.\n"); syslog (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't initialize configuration component."); corosync_exit_error (COROSYNC_DONE_ICMAP); } set_icmap_ro_keys_flag(); /* * Initialize the corosync_api_v1 definition */ api = apidef_get (); res = coroparse_configparse(icmap_get_global_map(), &error_string); if (res == -1) { /* * Logsys can't log properly at this early stage, and we need to get this message out * */ fprintf (stderr, "%s\n", error_string); syslog (LOGSYS_LEVEL_ERROR, "%s", error_string); corosync_exit_error (COROSYNC_DONE_MAINCONFIGREAD); } if (stats_map_init(api) != CS_OK) { fprintf (stderr, "Corosync Executive couldn't initialize statistics component.\n"); syslog (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't initialize statistics component."); corosync_exit_error (COROSYNC_DONE_STATS); } res = corosync_log_config_read (&error_string); if (res == -1) { /* * if we are here, we _must_ flush the logsys queue * and try to inform that we couldn't read the config. * this is a desperate attempt before certain death * and there is no guarantee that we can print to stderr * nor that logsys is sending the messages where we expect. */ log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string); fprintf(stderr, "%s", error_string); syslog (LOGSYS_LEVEL_ERROR, "%s", error_string); corosync_exit_error (COROSYNC_DONE_LOGCONFIGREAD); } if (!testonly) { log_printf (LOGSYS_LEVEL_NOTICE, "Corosync Cluster Engine %s starting up", VERSION); log_printf (LOGSYS_LEVEL_INFO, "Corosync built-in features:" PACKAGE_FEATURES ""); } /* * Create totem logsys subsys before totem_config_read so log functions can be used */ log_subsys_id_totem = _logsys_subsys_create("TOTEM", "totem," "totemip.c,totemconfig.c,totemcrypto.c,totemsrp.c," "totempg.c,totemudp.c,totemudpu.c,totemnet.c,totemknet.c"); /* * Make sure required directory is present */ res = stat (get_state_dir(), &stat_out); if ((res == -1) || (res == 0 && !S_ISDIR(stat_out.st_mode))) { log_printf (LOGSYS_LEVEL_ERROR, "State directory %s not present. Please create it.", get_state_dir()); corosync_exit_error (COROSYNC_DONE_DIR_NOT_PRESENT); } res = chdir(get_state_dir()); if (res == -1) { log_printf (LOGSYS_LEVEL_ERROR, "Cannot chdir to state directory %s. " "Please make sure it has correct context and rights.", get_state_dir()); corosync_exit_error (COROSYNC_DONE_DIR_NOT_PRESENT); } res = totem_config_read (&totem_config, &error_string, &totem_config_warnings); if (res == -1) { log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string); corosync_exit_error (COROSYNC_DONE_MAINCONFIGREAD); } if (totem_config_warnings & TOTEM_CONFIG_WARNING_MEMBERS_IGNORED) { log_printf (LOGSYS_LEVEL_WARNING, "member section is used together with nodelist. Members ignored."); } if (totem_config_warnings & TOTEM_CONFIG_WARNING_MEMBERS_DEPRECATED) { log_printf (LOGSYS_LEVEL_WARNING, "member section is deprecated."); } if (totem_config_warnings & TOTEM_CONFIG_WARNING_TOTEM_NODEID_IGNORED) { log_printf (LOGSYS_LEVEL_WARNING, "nodeid appears both in totem section and nodelist. Nodelist one is used."); } if (totem_config_warnings & TOTEM_CONFIG_BINDNETADDR_NODELIST_SET) { log_printf (LOGSYS_LEVEL_WARNING, "interface section bindnetaddr is used together with nodelist. " "Nodelist one is going to be used."); } if (totem_config_warnings != 0) { log_printf (LOGSYS_LEVEL_WARNING, "Please migrate config file to nodelist."); } res = totem_config_keyread (&totem_config, &error_string); if (res == -1) { log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string); corosync_exit_error (COROSYNC_DONE_MAINCONFIGREAD); } res = totem_config_validate (&totem_config, &error_string); if (res == -1) { log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string); corosync_exit_error (COROSYNC_DONE_MAINCONFIGREAD); } if (testonly) { corosync_exit_error (COROSYNC_DONE_EXIT); } move_to_root_cgroup = 1; if (icmap_get_string("system.move_to_root_cgroup", &tmp_str) == CS_OK) { if (strcmp(tmp_str, "yes") != 0) { move_to_root_cgroup = 0; } free(tmp_str); } /* * Try to move corosync into root cpu cgroup. Failure is not fatal and * error is deliberately ignored. */ if (move_to_root_cgroup) { (void)corosync_move_to_root_cgroup(); } sched_rr = 1; if (icmap_get_string("system.sched_rr", &tmp_str) == CS_OK) { if (strcmp(tmp_str, "yes") != 0) { sched_rr = 0; } free(tmp_str); } prio = 0; if (icmap_get_string("system.priority", &tmp_str) == CS_OK) { if (strcmp(tmp_str, "max") == 0) { prio = INT_MIN; } else if (strcmp(tmp_str, "min") == 0) { prio = INT_MAX; } else { errno = 0; tmpli = strtol(tmp_str, &ep, 10); if (errno != 0 || *ep != '\0' || tmpli > INT_MAX || tmpli < INT_MIN) { log_printf (LOGSYS_LEVEL_ERROR, "Priority value %s is invalid", tmp_str); corosync_exit_error (COROSYNC_DONE_MAINCONFIGREAD); } prio = tmpli; } free(tmp_str); } /* * Set round robin realtime scheduling with priority 99 */ if (sched_rr) { if (corosync_set_rr_scheduler () != 0) { prio = INT_MIN; } else { prio = 0; } } if (prio != 0) { if (setpriority(PRIO_PGRP, 0, prio) != 0) { LOGSYS_PERROR(errno, LOGSYS_LEVEL_WARNING, "Could not set priority %d", prio); } } totem_config.totem_memb_ring_id_create_or_load = corosync_ring_id_create_or_load; totem_config.totem_memb_ring_id_store = corosync_ring_id_store; totem_config.totem_logging_configuration = totem_logging_configuration; totem_config.totem_logging_configuration.log_subsys_id = log_subsys_id_totem; totem_config.totem_logging_configuration.log_level_security = LOGSYS_LEVEL_WARNING; totem_config.totem_logging_configuration.log_level_error = LOGSYS_LEVEL_ERROR; totem_config.totem_logging_configuration.log_level_warning = LOGSYS_LEVEL_WARNING; totem_config.totem_logging_configuration.log_level_notice = LOGSYS_LEVEL_NOTICE; totem_config.totem_logging_configuration.log_level_debug = LOGSYS_LEVEL_DEBUG; totem_config.totem_logging_configuration.log_level_trace = LOGSYS_LEVEL_TRACE; totem_config.totem_logging_configuration.log_printf = _logsys_log_printf; logsys_config_apply(); /* * Now we are fully initialized. */ if (background) { logsys_blackbox_prefork(); corosync_tty_detach (); logsys_blackbox_postfork(); log_printf (LOGSYS_LEVEL_DEBUG, "Corosync TTY detached"); } /* * Lock all memory to avoid page faults which may interrupt * application healthchecking */ corosync_mlockall (); corosync_poll_handle = qb_loop_create (); memset(&scheduler_pause_timeout_data, 0, sizeof(scheduler_pause_timeout_data)); scheduler_pause_timeout_data.totem_config = &totem_config; timer_function_scheduler_timeout (&scheduler_pause_timeout_data); qb_loop_signal_add(corosync_poll_handle, QB_LOOP_LOW, SIGUSR2, NULL, sig_diag_handler, NULL); qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH, SIGINT, NULL, sig_exit_handler, NULL); qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH, SIGQUIT, NULL, sig_exit_handler, NULL); qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH, SIGTERM, NULL, sig_exit_handler, NULL); if (logsys_thread_start() != 0) { log_printf (LOGSYS_LEVEL_ERROR, "Can't initialize log thread"); corosync_exit_error (COROSYNC_DONE_LOGCONFIGREAD); } if ((flock_err = corosync_flock (corosync_lock_file, getpid ())) != COROSYNC_DONE_EXIT) { corosync_exit_error (flock_err); } /* * if totempg_initialize doesn't have root priveleges, it cannot * bind to a specific interface. This only matters if * there is more then one interface in a system, so * in this case, only a warning is printed */ /* * Join multicast group and setup delivery * and configuration change functions */ if (totempg_initialize ( corosync_poll_handle, &totem_config) != 0) { log_printf (LOGSYS_LEVEL_ERROR, "Can't initialize TOTEM layer"); corosync_exit_error (COROSYNC_DONE_FATAL_ERR); } totempg_service_ready_register ( main_service_ready); totempg_groups_initialize ( &corosync_group_handle, deliver_fn, confchg_fn); totempg_groups_join ( corosync_group_handle, &corosync_group, 1); /* * Drop root privleges to user 'corosync' * TODO: Don't really need full root capabilities; * needed capabilities are: * CAP_NET_RAW (bindtodevice) * CAP_SYS_NICE (setscheduler) * CAP_IPC_LOCK (mlockall) */ priv_drop (); schedwrk_init ( serialize_lock, serialize_unlock); /* * Start main processing loop */ qb_loop_run (corosync_poll_handle); /* * Exit was requested */ totempg_finalize (); /* * free the loop resources */ qb_loop_destroy (corosync_poll_handle); /* * free up the icmap */ /* * Remove pid lock file */ unlink (corosync_lock_file); corosync_exit_error (COROSYNC_DONE_EXIT); return EXIT_SUCCESS; } diff --git a/exec/stats.c b/exec/stats.c index e89504e6..d5c1cbcd 100644 --- a/exec/stats.c +++ b/exec/stats.c @@ -1,672 +1,767 @@ /* - * Copyright (c) 2017 Red Hat, Inc. + * Copyright (c) 2017-2020 Red Hat, Inc. * * All rights reserved. * * Authors: Christine Caulfield (ccaulfie@redhat.com) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "util.h" #include "ipcs_stats.h" #include "stats.h" LOGSYS_DECLARE_SUBSYS ("STATS"); static qb_map_t *stats_map; +/* Structure of an element in the schedmiss array */ +struct schedmiss_entry { + uint64_t timestamp; + float delay; +}; +#define MAX_SCHEDMISS_EVENTS 10 +static struct schedmiss_entry schedmiss_event[MAX_SCHEDMISS_EVENTS]; +static unsigned int highest_schedmiss_event; + +#define SCHEDMISS_PREFIX "stats.schedmiss" + /* Convert iterator number to text and a stats pointer */ struct cs_stats_conv { - enum {STAT_PG, STAT_SRP, STAT_KNET, STAT_KNET_HANDLE, STAT_IPCSC, STAT_IPCSG} type; + enum {STAT_PG, STAT_SRP, STAT_KNET, STAT_KNET_HANDLE, STAT_IPCSC, STAT_IPCSG, STAT_SCHEDMISS} type; const char *name; const size_t offset; const icmap_value_types_t value_type; }; struct cs_stats_conv cs_pg_stats[] = { { STAT_PG, "msg_queue_avail", offsetof(totempg_stats_t, msg_queue_avail), ICMAP_VALUETYPE_UINT32}, { STAT_PG, "msg_reserved", offsetof(totempg_stats_t, msg_reserved), ICMAP_VALUETYPE_UINT32}, }; struct cs_stats_conv cs_srp_stats[] = { { STAT_SRP, "orf_token_tx", offsetof(totemsrp_stats_t, orf_token_tx), ICMAP_VALUETYPE_UINT64}, { STAT_SRP, "orf_token_rx", offsetof(totemsrp_stats_t, orf_token_rx), ICMAP_VALUETYPE_UINT64}, { STAT_SRP, "memb_merge_detect_tx", offsetof(totemsrp_stats_t, memb_merge_detect_tx), ICMAP_VALUETYPE_UINT64}, { STAT_SRP, "memb_merge_detect_rx", offsetof(totemsrp_stats_t, memb_merge_detect_rx), ICMAP_VALUETYPE_UINT64}, { STAT_SRP, "memb_join_tx", offsetof(totemsrp_stats_t, memb_join_tx), ICMAP_VALUETYPE_UINT64}, { STAT_SRP, "memb_join_rx", offsetof(totemsrp_stats_t, memb_join_rx), ICMAP_VALUETYPE_UINT64}, { STAT_SRP, "mcast_tx", offsetof(totemsrp_stats_t, mcast_tx), ICMAP_VALUETYPE_UINT64}, { STAT_SRP, "mcast_retx", offsetof(totemsrp_stats_t, mcast_retx), ICMAP_VALUETYPE_UINT64}, { STAT_SRP, "mcast_rx", offsetof(totemsrp_stats_t, mcast_rx), ICMAP_VALUETYPE_UINT64}, { STAT_SRP, "memb_commit_token_tx", offsetof(totemsrp_stats_t, memb_commit_token_tx), ICMAP_VALUETYPE_UINT64}, { STAT_SRP, "memb_commit_token_rx", offsetof(totemsrp_stats_t, memb_commit_token_rx), ICMAP_VALUETYPE_UINT64}, { STAT_SRP, "token_hold_cancel_tx", offsetof(totemsrp_stats_t, token_hold_cancel_tx), ICMAP_VALUETYPE_UINT64}, { STAT_SRP, "token_hold_cancel_rx", offsetof(totemsrp_stats_t, token_hold_cancel_rx), ICMAP_VALUETYPE_UINT64}, { STAT_SRP, "operational_entered", offsetof(totemsrp_stats_t, operational_entered), ICMAP_VALUETYPE_UINT64}, { STAT_SRP, "operational_token_lost", offsetof(totemsrp_stats_t, operational_token_lost), ICMAP_VALUETYPE_UINT64}, { STAT_SRP, "gather_entered", offsetof(totemsrp_stats_t, gather_entered), ICMAP_VALUETYPE_UINT64}, { STAT_SRP, "gather_token_lost", offsetof(totemsrp_stats_t, gather_token_lost), ICMAP_VALUETYPE_UINT64}, { STAT_SRP, "commit_entered", offsetof(totemsrp_stats_t, commit_entered), ICMAP_VALUETYPE_UINT64}, { STAT_SRP, "commit_token_lost", offsetof(totemsrp_stats_t, commit_token_lost), ICMAP_VALUETYPE_UINT64}, { STAT_SRP, "recovery_entered", offsetof(totemsrp_stats_t, recovery_entered), ICMAP_VALUETYPE_UINT64}, { STAT_SRP, "recovery_token_lost", offsetof(totemsrp_stats_t, recovery_token_lost), ICMAP_VALUETYPE_UINT64}, { STAT_SRP, "consensus_timeouts", offsetof(totemsrp_stats_t, consensus_timeouts), ICMAP_VALUETYPE_UINT64}, { STAT_SRP, "rx_msg_dropped", offsetof(totemsrp_stats_t, rx_msg_dropped), ICMAP_VALUETYPE_UINT64}, { STAT_SRP, "time_since_token_last_received", offsetof(totemsrp_stats_t, time_since_token_last_received), ICMAP_VALUETYPE_UINT64}, { STAT_SRP, "continuous_gather", offsetof(totemsrp_stats_t, continuous_gather), ICMAP_VALUETYPE_UINT32}, { STAT_SRP, "continuous_sendmsg_failures", offsetof(totemsrp_stats_t, continuous_sendmsg_failures), ICMAP_VALUETYPE_UINT32}, { STAT_SRP, "firewall_enabled_or_nic_failure", offsetof(totemsrp_stats_t, firewall_enabled_or_nic_failure), ICMAP_VALUETYPE_UINT8}, { STAT_SRP, "mtt_rx_token", offsetof(totemsrp_stats_t, mtt_rx_token), ICMAP_VALUETYPE_UINT32}, { STAT_SRP, "avg_token_workload", offsetof(totemsrp_stats_t, avg_token_workload), ICMAP_VALUETYPE_UINT32}, { STAT_SRP, "avg_backlog_calc", offsetof(totemsrp_stats_t, avg_backlog_calc), ICMAP_VALUETYPE_UINT32}, }; struct cs_stats_conv cs_knet_stats[] = { { STAT_KNET, "enabled", offsetof(struct knet_link_status, enabled), ICMAP_VALUETYPE_UINT8}, { STAT_KNET, "connected", offsetof(struct knet_link_status, connected), ICMAP_VALUETYPE_UINT8}, { STAT_KNET, "mtu", offsetof(struct knet_link_status, mtu), ICMAP_VALUETYPE_UINT32}, { STAT_KNET, "tx_data_packets", offsetof(struct knet_link_status, stats.tx_data_packets), ICMAP_VALUETYPE_UINT64}, { STAT_KNET, "rx_data_packets", offsetof(struct knet_link_status, stats.rx_data_packets), ICMAP_VALUETYPE_UINT64}, { STAT_KNET, "tx_data_bytes", offsetof(struct knet_link_status, stats.tx_data_bytes), ICMAP_VALUETYPE_UINT64}, { STAT_KNET, "rx_data_bytes", offsetof(struct knet_link_status, stats.rx_data_bytes), ICMAP_VALUETYPE_UINT64}, { STAT_KNET, "tx_ping_packets", offsetof(struct knet_link_status, stats.tx_ping_packets), ICMAP_VALUETYPE_UINT64}, { STAT_KNET, "rx_ping_packets", offsetof(struct knet_link_status, stats.rx_ping_packets), ICMAP_VALUETYPE_UINT64}, { STAT_KNET, "tx_ping_bytes", offsetof(struct knet_link_status, stats.tx_ping_bytes), ICMAP_VALUETYPE_UINT64}, { STAT_KNET, "rx_ping_bytes", offsetof(struct knet_link_status, stats.rx_ping_bytes), ICMAP_VALUETYPE_UINT64}, { STAT_KNET, "tx_pong_packets", offsetof(struct knet_link_status, stats.tx_pong_packets), ICMAP_VALUETYPE_UINT64}, { STAT_KNET, "rx_pong_packets", offsetof(struct knet_link_status, stats.rx_pong_packets), ICMAP_VALUETYPE_UINT64}, { STAT_KNET, "tx_pong_bytes", offsetof(struct knet_link_status, stats.tx_pong_bytes), ICMAP_VALUETYPE_UINT64}, { STAT_KNET, "rx_pong_bytes", offsetof(struct knet_link_status, stats.rx_pong_bytes), ICMAP_VALUETYPE_UINT64}, { STAT_KNET, "tx_pmtu_packets", offsetof(struct knet_link_status, stats.tx_pmtu_packets), ICMAP_VALUETYPE_UINT64}, { STAT_KNET, "rx_pmtu_packets", offsetof(struct knet_link_status, stats.rx_pmtu_packets), ICMAP_VALUETYPE_UINT64}, { STAT_KNET, "tx_pmtu_bytes", offsetof(struct knet_link_status, stats.tx_pmtu_bytes), ICMAP_VALUETYPE_UINT64}, { STAT_KNET, "rx_pmtu_bytes", offsetof(struct knet_link_status, stats.rx_pmtu_bytes), ICMAP_VALUETYPE_UINT64}, { STAT_KNET, "tx_total_packets", offsetof(struct knet_link_status, stats.tx_total_packets), ICMAP_VALUETYPE_UINT64}, { STAT_KNET, "rx_total_packets", offsetof(struct knet_link_status, stats.rx_total_packets), ICMAP_VALUETYPE_UINT64}, { STAT_KNET, "tx_total_bytes", offsetof(struct knet_link_status, stats.tx_total_bytes), ICMAP_VALUETYPE_UINT64}, { STAT_KNET, "rx_total_bytes", offsetof(struct knet_link_status, stats.rx_total_bytes), ICMAP_VALUETYPE_UINT64}, { STAT_KNET, "tx_total_errors", offsetof(struct knet_link_status, stats.tx_total_errors), ICMAP_VALUETYPE_UINT64}, { STAT_KNET, "rx_total_retries", offsetof(struct knet_link_status, stats.tx_total_retries), ICMAP_VALUETYPE_UINT64}, { STAT_KNET, "tx_pmtu_errors", offsetof(struct knet_link_status, stats.tx_pmtu_errors), ICMAP_VALUETYPE_UINT32}, { STAT_KNET, "tx_pmtu_retries", offsetof(struct knet_link_status, stats.tx_pmtu_retries), ICMAP_VALUETYPE_UINT32}, { STAT_KNET, "tx_ping_errors", offsetof(struct knet_link_status, stats.tx_ping_errors), ICMAP_VALUETYPE_UINT32}, { STAT_KNET, "tx_ping_retries", offsetof(struct knet_link_status, stats.tx_ping_retries), ICMAP_VALUETYPE_UINT32}, { STAT_KNET, "tx_pong_errors", offsetof(struct knet_link_status, stats.tx_pong_errors), ICMAP_VALUETYPE_UINT32}, { STAT_KNET, "tx_pong_retries", offsetof(struct knet_link_status, stats.tx_pong_retries), ICMAP_VALUETYPE_UINT32}, { STAT_KNET, "tx_data_errors", offsetof(struct knet_link_status, stats.tx_data_errors), ICMAP_VALUETYPE_UINT32}, { STAT_KNET, "tx_data_retries", offsetof(struct knet_link_status, stats.tx_data_retries), ICMAP_VALUETYPE_UINT32}, { STAT_KNET, "latency_min", offsetof(struct knet_link_status, stats.latency_min), ICMAP_VALUETYPE_UINT32}, { STAT_KNET, "latency_max", offsetof(struct knet_link_status, stats.latency_max), ICMAP_VALUETYPE_UINT32}, { STAT_KNET, "latency_ave", offsetof(struct knet_link_status, stats.latency_ave), ICMAP_VALUETYPE_UINT32}, { STAT_KNET, "latency_samples", offsetof(struct knet_link_status, stats.latency_samples), ICMAP_VALUETYPE_UINT32}, { STAT_KNET, "down_count", offsetof(struct knet_link_status, stats.down_count), ICMAP_VALUETYPE_UINT32}, { STAT_KNET, "up_count", offsetof(struct knet_link_status, stats.up_count), ICMAP_VALUETYPE_UINT32}, }; struct cs_stats_conv cs_knet_handle_stats[] = { { STAT_KNET_HANDLE, "tx_uncompressed_packets", offsetof(struct knet_handle_stats, tx_uncompressed_packets), ICMAP_VALUETYPE_UINT64}, { STAT_KNET_HANDLE, "tx_compressed_packets", offsetof(struct knet_handle_stats, tx_compressed_packets), ICMAP_VALUETYPE_UINT64}, { STAT_KNET_HANDLE, "tx_compressed_original_bytes", offsetof(struct knet_handle_stats, tx_compressed_original_bytes), ICMAP_VALUETYPE_UINT64}, { STAT_KNET_HANDLE, "tx_compressed_size_bytes", offsetof(struct knet_handle_stats, tx_compressed_size_bytes), ICMAP_VALUETYPE_UINT64}, { STAT_KNET_HANDLE, "tx_compress_time_min", offsetof(struct knet_handle_stats, tx_compress_time_min), ICMAP_VALUETYPE_UINT64}, { STAT_KNET_HANDLE, "tx_compress_time_max", offsetof(struct knet_handle_stats, tx_compress_time_max), ICMAP_VALUETYPE_UINT64}, { STAT_KNET_HANDLE, "tx_compress_time_ave", offsetof(struct knet_handle_stats, tx_compress_time_ave), ICMAP_VALUETYPE_UINT64}, { STAT_KNET_HANDLE, "rx_compressed_packets", offsetof(struct knet_handle_stats, rx_compressed_packets), ICMAP_VALUETYPE_UINT64}, { STAT_KNET_HANDLE, "rx_compressed_original_bytes", offsetof(struct knet_handle_stats, rx_compressed_original_bytes), ICMAP_VALUETYPE_UINT64}, { STAT_KNET_HANDLE, "rx_compressed_size_bytes", offsetof(struct knet_handle_stats, rx_compressed_size_bytes), ICMAP_VALUETYPE_UINT64}, { STAT_KNET_HANDLE, "rx_compress_time_min", offsetof(struct knet_handle_stats, rx_compress_time_min), ICMAP_VALUETYPE_UINT64}, { STAT_KNET_HANDLE, "rx_compress_time_max", offsetof(struct knet_handle_stats, rx_compress_time_max), ICMAP_VALUETYPE_UINT64}, { STAT_KNET_HANDLE, "rx_compress_time_ave", offsetof(struct knet_handle_stats, rx_compress_time_ave), ICMAP_VALUETYPE_UINT64}, { STAT_KNET_HANDLE, "tx_crypt_time_min", offsetof(struct knet_handle_stats, tx_crypt_time_min), ICMAP_VALUETYPE_UINT64}, { STAT_KNET_HANDLE, "tx_crypt_time_max", offsetof(struct knet_handle_stats, tx_crypt_time_max), ICMAP_VALUETYPE_UINT64}, { STAT_KNET_HANDLE, "tx_crypt_time_ave", offsetof(struct knet_handle_stats, tx_crypt_time_ave), ICMAP_VALUETYPE_UINT64}, { STAT_KNET_HANDLE, "tx_crypt_byte_overhead", offsetof(struct knet_handle_stats, tx_crypt_byte_overhead), ICMAP_VALUETYPE_UINT64}, { STAT_KNET_HANDLE, "tx_crypt_packets", offsetof(struct knet_handle_stats, tx_crypt_packets), ICMAP_VALUETYPE_UINT64}, { STAT_KNET_HANDLE, "rx_crypt_time_min", offsetof(struct knet_handle_stats, rx_crypt_time_min), ICMAP_VALUETYPE_UINT64}, { STAT_KNET_HANDLE, "rx_crypt_time_max", offsetof(struct knet_handle_stats, rx_crypt_time_max), ICMAP_VALUETYPE_UINT64}, { STAT_KNET_HANDLE, "rx_crypt_time_ave", offsetof(struct knet_handle_stats, rx_crypt_time_ave), ICMAP_VALUETYPE_UINT64}, { STAT_KNET_HANDLE, "rx_crypt_packets", offsetof(struct knet_handle_stats, rx_crypt_packets), ICMAP_VALUETYPE_UINT64}, }; struct cs_stats_conv cs_ipcs_conn_stats[] = { { STAT_IPCSC, "queueing", offsetof(struct ipcs_conn_stats, cnx.queuing), ICMAP_VALUETYPE_INT32}, { STAT_IPCSC, "queued", offsetof(struct ipcs_conn_stats, cnx.queued), ICMAP_VALUETYPE_UINT32}, { STAT_IPCSC, "invalid_request", offsetof(struct ipcs_conn_stats, cnx.invalid_request), ICMAP_VALUETYPE_UINT64}, { STAT_IPCSC, "overload", offsetof(struct ipcs_conn_stats, cnx.overload), ICMAP_VALUETYPE_UINT64}, { STAT_IPCSC, "sent", offsetof(struct ipcs_conn_stats, cnx.sent), ICMAP_VALUETYPE_UINT32}, { STAT_IPCSC, "procname", offsetof(struct ipcs_conn_stats, cnx.proc_name), ICMAP_VALUETYPE_STRING}, { STAT_IPCSC, "requests", offsetof(struct ipcs_conn_stats, conn.requests), ICMAP_VALUETYPE_UINT64}, { STAT_IPCSC, "responses", offsetof(struct ipcs_conn_stats, conn.responses), ICMAP_VALUETYPE_UINT64}, { STAT_IPCSC, "dispatched", offsetof(struct ipcs_conn_stats, conn.events), ICMAP_VALUETYPE_UINT64}, { STAT_IPCSC, "send_retries", offsetof(struct ipcs_conn_stats, conn.send_retries), ICMAP_VALUETYPE_UINT64}, { STAT_IPCSC, "recv_retries", offsetof(struct ipcs_conn_stats, conn.recv_retries), ICMAP_VALUETYPE_UINT64}, { STAT_IPCSC, "flow_control", offsetof(struct ipcs_conn_stats, conn.flow_control_state), ICMAP_VALUETYPE_UINT32}, { STAT_IPCSC, "flow_control_count", offsetof(struct ipcs_conn_stats, conn.flow_control_count), ICMAP_VALUETYPE_UINT64}, }; struct cs_stats_conv cs_ipcs_global_stats[] = { { STAT_IPCSG, "global.active", offsetof(struct ipcs_global_stats, active), ICMAP_VALUETYPE_UINT64}, { STAT_IPCSG, "global.closed", offsetof(struct ipcs_global_stats, closed), ICMAP_VALUETYPE_UINT64}, }; +struct cs_stats_conv cs_schedmiss_stats[] = { + { STAT_SCHEDMISS, "timestamp", offsetof(struct schedmiss_entry, timestamp), ICMAP_VALUETYPE_UINT64}, + { STAT_SCHEDMISS, "delay", offsetof(struct schedmiss_entry, delay), ICMAP_VALUETYPE_FLOAT}, +}; #define NUM_PG_STATS (sizeof(cs_pg_stats) / sizeof(struct cs_stats_conv)) #define NUM_SRP_STATS (sizeof(cs_srp_stats) / sizeof(struct cs_stats_conv)) #define NUM_KNET_STATS (sizeof(cs_knet_stats) / sizeof(struct cs_stats_conv)) #define NUM_KNET_HANDLE_STATS (sizeof(cs_knet_handle_stats) / sizeof(struct cs_stats_conv)) #define NUM_IPCSC_STATS (sizeof(cs_ipcs_conn_stats) / sizeof(struct cs_stats_conv)) #define NUM_IPCSG_STATS (sizeof(cs_ipcs_global_stats) / sizeof(struct cs_stats_conv)) /* What goes in the trie */ struct stats_item { char *key_name; struct cs_stats_conv * cs_conv; }; /* One of these per tracker */ struct cs_stats_tracker { char *key_name; void *user_data; int32_t events; icmap_notify_fn_t notify_fn; uint64_t old_value; struct qb_list_head list; }; QB_LIST_DECLARE (stats_tracker_list_head); static const struct corosync_api_v1 *api; static void stats_map_set_value(struct cs_stats_conv *conv, void *stat_array, void *value, size_t *value_len, icmap_value_types_t *type) { if (value_len) { *value_len = icmap_get_valuetype_len(conv->value_type); } if (type) { *type = conv->value_type; if ((*type == ICMAP_VALUETYPE_STRING) && value_len && stat_array) { *value_len = strlen((char *)(stat_array) + conv->offset)+1; } } if (value) { assert(value_len != NULL); memcpy(value, (char *)(stat_array) + conv->offset, *value_len); } } static void stats_add_entry(const char *key, struct cs_stats_conv *cs_conv) { struct stats_item *item = malloc(sizeof(struct stats_item)); if (item) { item->cs_conv = cs_conv; item->key_name = strdup(key); qb_map_put(stats_map, item->key_name, item); } } static void stats_rm_entry(const char *key) { struct stats_item *item = qb_map_get(stats_map, key); if (item) { qb_map_rm(stats_map, item->key_name); free(item->key_name); free(item); } } cs_error_t stats_map_init(const struct corosync_api_v1 *corosync_api) { int i; char param[ICMAP_KEYNAME_MAXLEN]; api = corosync_api; stats_map = qb_trie_create(); if (!stats_map) { return CS_ERR_INIT; } /* Populate the static portions of the trie */ for (i = 0; ics_conv; switch (statinfo->type) { case STAT_PG: pg_stats = api->totem_get_stats(); stats_map_set_value(statinfo, pg_stats, value, value_len, type); break; case STAT_SRP: pg_stats = api->totem_get_stats(); stats_map_set_value(statinfo, pg_stats->srp, value, value_len, type); break; case STAT_KNET_HANDLE: res = totemknet_handle_get_stats(&knet_handle_stats); if (res != CS_OK) { return res; } stats_map_set_value(statinfo, &knet_handle_stats, value, value_len, type); break; case STAT_KNET: if (sscanf(key_name, "stats.knet.node%d.link%d", &nodeid, &link_no) != 2) { return CS_ERR_NOT_EXIST; } /* Validate node & link IDs */ if (nodeid <= 0 || nodeid > KNET_MAX_HOST || link_no < 0 || link_no > KNET_MAX_LINK) { return CS_ERR_NOT_EXIST; } /* Always get the latest stats */ res = totemknet_link_get_status((knet_node_id_t)nodeid, (uint8_t)link_no, &link_status); if (res != CS_OK) { return CS_ERR_LIBRARY; } stats_map_set_value(statinfo, &link_status, value, value_len, type); break; case STAT_IPCSC: if (sscanf(key_name, "stats.ipcs.service%d.%d.%p", &service_id, &pid, &conn_ptr) != 3) { return CS_ERR_NOT_EXIST; } res = cs_ipcs_get_conn_stats(service_id, pid, conn_ptr, &ipcs_conn_stats); if (res != CS_OK) { return res; } stats_map_set_value(statinfo, &ipcs_conn_stats, value, value_len, type); break; case STAT_IPCSG: cs_ipcs_get_global_stats(&ipcs_global_stats); stats_map_set_value(statinfo, &ipcs_global_stats, value, value_len, type); break; + case STAT_SCHEDMISS: + if (sscanf(key_name, SCHEDMISS_PREFIX ".%d", &sm_event) != 1) { + return CS_ERR_NOT_EXIST; + } + + sm_type = strrchr(key_name, '.'); + if (sm_type == NULL) { + return CS_ERR_NOT_EXIST; + } + sm_type++; + + if (strcmp(sm_type, "timestamp") == 0) { + memcpy(value, &schedmiss_event[sm_event].timestamp, sizeof(uint64_t)); + *value_len = sizeof(uint64_t); + *type = ICMAP_VALUETYPE_UINT64; + } + if (strcmp(sm_type, "delay") == 0) { + memcpy(value, &schedmiss_event[sm_event].delay, sizeof(float)); + *value_len = sizeof(float); + *type = ICMAP_VALUETYPE_FLOAT; + } + break; default: return CS_ERR_LIBRARY; } return CS_OK; } -#define STATS_CLEAR "stats.clear." -#define STATS_CLEAR_KNET "stats.clear.knet" -#define STATS_CLEAR_IPC "stats.clear.ipc" -#define STATS_CLEAR_TOTEM "stats.clear.totem" -#define STATS_CLEAR_ALL "stats.clear.all" +static void schedmiss_clear_stats(void) +{ + int i; + char param[ICMAP_KEYNAME_MAXLEN]; + + for (i=0; i=0; i--) { + schedmiss_event[i+1].timestamp = schedmiss_event[i].timestamp; + schedmiss_event[i+1].delay = schedmiss_event[i].delay; + } + + /* New entries are always at the front */ + schedmiss_event[0].timestamp = timestamp; + schedmiss_event[0].delay = delay; + + /* If we've not run off the end then add an entry in the trie for the new 'end' one */ + if (highest_schedmiss_event < MAX_SCHEDMISS_EVENTS) { + sprintf(param, SCHEDMISS_PREFIX ".%i.timestamp", highest_schedmiss_event); + stats_add_entry(param, &cs_schedmiss_stats[0]); + sprintf(param, SCHEDMISS_PREFIX ".%i.delay", highest_schedmiss_event); + stats_add_entry(param, &cs_schedmiss_stats[1]); + highest_schedmiss_event++; + } + /* Notifications get sent by the stats_updater */ +} + +#define STATS_CLEAR "stats.clear." +#define STATS_CLEAR_KNET "stats.clear.knet" +#define STATS_CLEAR_IPC "stats.clear.ipc" +#define STATS_CLEAR_TOTEM "stats.clear.totem" +#define STATS_CLEAR_ALL "stats.clear.all" +#define STATS_CLEAR_SCHEDMISS "stats.clear.schedmiss" cs_error_t stats_map_set(const char *key_name, const void *value, size_t value_len, icmap_value_types_t type) { int cleared = 0; if (strncmp(key_name, STATS_CLEAR_KNET, strlen(STATS_CLEAR_KNET)) == 0) { totempg_stats_clear(TOTEMPG_STATS_CLEAR_TRANSPORT); cleared = 1; } if (strncmp(key_name, STATS_CLEAR_IPC, strlen(STATS_CLEAR_IPC)) == 0) { cs_ipcs_clear_stats(); cleared = 1; } if (strncmp(key_name, STATS_CLEAR_TOTEM, strlen(STATS_CLEAR_TOTEM)) == 0) { totempg_stats_clear(TOTEMPG_STATS_CLEAR_TOTEM); cleared = 1; } + if (strncmp(key_name, STATS_CLEAR_SCHEDMISS, strlen(STATS_CLEAR_SCHEDMISS)) == 0) { + schedmiss_clear_stats(); + cleared = 1; + } if (strncmp(key_name, STATS_CLEAR_ALL, strlen(STATS_CLEAR_ALL)) == 0) { totempg_stats_clear(TOTEMPG_STATS_CLEAR_TRANSPORT | TOTEMPG_STATS_CLEAR_TOTEM); cs_ipcs_clear_stats(); + schedmiss_clear_stats(); cleared = 1; } if (!cleared) { return CS_ERR_NOT_SUPPORTED; } return CS_OK; } cs_error_t stats_map_adjust_int(const char *key_name, int32_t step) { return CS_ERR_NOT_SUPPORTED; } cs_error_t stats_map_delete(const char *key_name) { return CS_ERR_NOT_SUPPORTED; } int stats_map_is_key_ro(const char *key_name) { /* It's all read-only apart from the 'clear' destinations */ if (strncmp(key_name, STATS_CLEAR, strlen(STATS_CLEAR)) == 0) { return 0; } else { return 1; } } icmap_iter_t stats_map_iter_init(const char *prefix) { return (qb_map_pref_iter_create(stats_map, prefix)); } const char *stats_map_iter_next(icmap_iter_t iter, size_t *value_len, icmap_value_types_t *type) { const char *res; struct stats_item *item; res = qb_map_iter_next(iter, (void **)&item); if (res == NULL) { return (res); } stats_map_set_value(item->cs_conv, NULL, NULL, value_len, type); return res; } void stats_map_iter_finalize(icmap_iter_t iter) { qb_map_iter_free(iter); } void stats_trigger_trackers() { struct cs_stats_tracker *tracker; struct qb_list_head *iter; cs_error_t res; size_t value_len; icmap_value_types_t type; uint64_t value; struct icmap_notify_value new_val; struct icmap_notify_value old_val; qb_list_for_each(iter, &stats_tracker_list_head) { tracker = qb_list_entry(iter, struct cs_stats_tracker, list); if (tracker->events & ICMAP_TRACK_PREFIX || !tracker->key_name ) { continue; } res = stats_map_get(tracker->key_name, &value, &value_len, &type); /* Check if it has changed */ if ((res == CS_OK) && (memcmp(&value, &tracker->old_value, value_len) != 0)) { old_val.type = new_val.type = type; old_val.len = new_val.len = value_len; old_val.data = new_val.data = &value; tracker->notify_fn(ICMAP_TRACK_MODIFY, tracker->key_name, old_val, new_val, tracker->user_data); memcpy(&tracker->old_value, &value, value_len); } } } /* Callback from libqb when a key is added/removed */ static void stats_map_notify_fn(uint32_t event, char *key, void *old_value, void *value, void *user_data) { struct cs_stats_tracker *tracker = user_data; struct icmap_notify_value new_val; struct icmap_notify_value old_val; char new_value[64]; if (value == NULL && old_value == NULL) { return ; } + /* Ignore schedmiss trackers as the values are read from the circular buffer */ + if (strncmp(key, SCHEDMISS_PREFIX, strlen(SCHEDMISS_PREFIX)) == 0 ) { + return ; + } + new_val.data = new_value; if (stats_map_get(key, &new_value, &new_val.len, &new_val.type) != CS_OK) { log_printf(LOGSYS_LEVEL_WARNING, "get value of notified key %s failed", key); return ; } /* We don't know what the old value was but as this only tracks ADD & DELETE I'm not worried about it */ memcpy(&old_val, &new_val, sizeof(new_val)); tracker->notify_fn(icmap_qbtt_to_tt(event), key, new_val, old_val, tracker->user_data); } cs_error_t stats_map_track_add(const char *key_name, int32_t track_type, icmap_notify_fn_t notify_fn, void *user_data, icmap_track_t *icmap_track) { struct cs_stats_tracker *tracker; size_t value_len; icmap_value_types_t type; cs_error_t err; /* We can track adding or deleting a key under a prefix */ if ((track_type & ICMAP_TRACK_PREFIX) && (!(track_type & ICMAP_TRACK_DELETE) || !(track_type & ICMAP_TRACK_ADD))) { return CS_ERR_NOT_SUPPORTED; } tracker = malloc(sizeof(struct cs_stats_tracker)); if (!tracker) { return CS_ERR_NO_MEMORY; } tracker->notify_fn = notify_fn; tracker->user_data = user_data; tracker->events = track_type; if (key_name) { tracker->key_name = strdup(key_name); if (!tracker->key_name) { free(tracker); return CS_ERR_NO_MEMORY; } /* Get initial value */ if (stats_map_get(tracker->key_name, - &tracker->old_value, &value_len, &type) == CS_OK) { + &tracker->old_value, &value_len, &type) != CS_OK) { tracker->old_value = 0ULL; } } else { tracker->key_name = NULL; tracker->old_value = 0ULL; } /* Add/delete trackers can use the qb_map tracking */ if ((track_type & ICMAP_TRACK_ADD) || (track_type & ICMAP_TRACK_DELETE)) { err = qb_map_notify_add(stats_map, tracker->key_name, stats_map_notify_fn, icmap_tt_to_qbtt(track_type), tracker); if (err != 0) { log_printf(LOGSYS_LEVEL_ERROR, "creating stats tracker %s failed. %d\n", tracker->key_name, err); free(tracker->key_name); free(tracker); return (qb_to_cs_error(err)); } } qb_list_add (&tracker->list, &stats_tracker_list_head); *icmap_track = (icmap_track_t)tracker; return CS_OK; } cs_error_t stats_map_track_delete(icmap_track_t icmap_track) { struct cs_stats_tracker *tracker = (struct cs_stats_tracker *)icmap_track; int err; if ((tracker->events & ICMAP_TRACK_ADD) || (tracker->events & ICMAP_TRACK_DELETE)) { err = qb_map_notify_del_2(stats_map, tracker->key_name, stats_map_notify_fn, icmap_tt_to_qbtt(tracker->events), tracker); if (err) { log_printf(LOGSYS_LEVEL_ERROR, "deleting tracker %s failed. %d\n", tracker->key_name, err); } } qb_list_del(&tracker->list); free(tracker->key_name); free(tracker); return CS_OK; } void *stats_map_track_get_user_data(icmap_track_t icmap_track) { struct cs_stats_tracker *tracker = (struct cs_stats_tracker *)icmap_track; return tracker->user_data; } /* Called from totemknet to add/remove keys from our map */ void stats_knet_add_member(knet_node_id_t nodeid, uint8_t link_no) { int i; char param[ICMAP_KEYNAME_MAXLEN]; for (i = 0; i.* +If corosync is not scheduled after the required period of time it will +log this event and also write an entry to the stats cmap under this key. +There can be up to 10 entries (0..9) in here, when an 11th event happens +the earliest will be removed. + +These events will always be in reverse order, so stats.schedmiss.0.* will +always be the latest event kept and 9 the oldest. If you want to listen +for notifications then you are recommended to listen for changes +to stats.schedmiss.0.timestamp or stats.schedmiss.0.delay. + +.B timestamp +The time of the event in ms since the Epoch (ie time_t * 1000 but with +valid milliseconds). + +.B delay +The time that corosync was paused (in ms, float value). + + .TP stats.clear.* These are write-only keys used to clear the stats for various subsystems .B totem Clears the pg & srp totem stats. .B knet Clears the knet stats .B ipc Clears the ipc stats +.B schedmiss +Clears the schedmiss stats + .B all Clears all of the above stats .SH DYNAMIC CHANGE USER/GROUP PERMISSION TO USE COROSYNC IPC Is the same as in the configuration file. eg: to add UID 500 use .br # corosync-cmapctl -s uidgid.uid.500 u8 1 GID is similar, so to add a GID use .br # corosync-cmapctl -s uidgid.gid.500 u8 1 For removal of permissions, simply delete the key .br # corosync-cmapctl -d uidgid.gid.500 .SH "SEE ALSO" .BR corosync_overview (7), .BR corosync.conf (5), .BR corosync-cmapctl (8) diff --git a/tools/corosync-cmapctl.c b/tools/corosync-cmapctl.c index a4b61bd5..ffca7e1b 100644 --- a/tools/corosync-cmapctl.c +++ b/tools/corosync-cmapctl.c @@ -1,980 +1,981 @@ /* * Copyright (c) 2011-2012 Red Hat, Inc. * * All rights reserved. * * Author: Jan Friesse (jfriesse@redhat.com) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the Red Hat, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include "../lib/util.h" #ifndef INFTIM #define INFTIM -1 #endif #define MAX_TRY_AGAIN 10 enum user_action { ACTION_GET, ACTION_SET, ACTION_DELETE, ACTION_DELETE_PREFIX, ACTION_PRINT_PREFIX, ACTION_TRACK, ACTION_LOAD, ACTION_CLEARSTATS, }; struct name_to_type_item { const char *name; cmap_value_types_t type; }; struct name_to_type_item name_to_type[] = { {"i8", CMAP_VALUETYPE_INT8}, {"u8", CMAP_VALUETYPE_UINT8}, {"i16", CMAP_VALUETYPE_INT16}, {"u16", CMAP_VALUETYPE_UINT16}, {"i32", CMAP_VALUETYPE_INT32}, {"u32", CMAP_VALUETYPE_UINT32}, {"i64", CMAP_VALUETYPE_INT64}, {"u64", CMAP_VALUETYPE_UINT64}, {"flt", CMAP_VALUETYPE_FLOAT}, {"dbl", CMAP_VALUETYPE_DOUBLE}, {"str", CMAP_VALUETYPE_STRING}, {"bin", CMAP_VALUETYPE_BINARY}}; int show_binary = 0; int quiet = 0; static int convert_name_to_type(const char *name) { int i; for (i = 0; i < sizeof(name_to_type) / sizeof(*name_to_type); i++) { if (strcmp(name, name_to_type[i].name) == 0) { return (name_to_type[i].type); } } return (-1); } static int print_help(void) { printf("\n"); printf("usage: corosync-cmapctl [-b] [-DdghsqTCt] [-p filename] [-m map] [params...]\n"); printf("\n"); printf(" -b show binary values\n"); printf("\n"); printf(" -m select map to use\n"); printf(" The default map is 'icmap' which contains configuration information and some runtime variables used by corosync. \n"); printf(" A 'stats' map is also available which displays network statistics - in great detail when knet is used as the transport.\n"); printf("Set key:\n"); printf(" corosync-cmapctl -s key_name type value\n"); printf("\n"); printf(" where type is one of ([i|u][8|16|32|64] | flt | dbl | str | bin)\n"); printf(" for bin, value is file name (or - for stdin)\n"); printf("\n"); printf(" map can be either 'icmap' (the default) which contains corosync\n"); printf(" configuration information, or 'stats' which contains statistics\n"); printf(" about the networking and IPC traffic in some detail.\n"); printf("\n"); printf("Clear stats:\n"); - printf(" corosync-cmapctl -C [knet|ipc|totem|all]\n"); + printf(" corosync-cmapctl -C [knet|ipc|totem|schedmiss|all]\n"); printf(" The 'stats' map is implied\n"); printf("\n"); printf("Load settings from a file:\n"); printf(" corosync-cmapctl -p filename\n"); printf("\n"); printf(" the format of the file is:\n"); printf(" [^[^]][ ]\n"); printf(" Keys prefixed with single caret ('^') are deleted (see -d).\n"); printf(" Keys (actually prefixes) prefixed with double caret ('^^') are deleted by prefix (see -D).\n"); printf(" and are optional (not checked) in above cases.\n"); printf(" Other keys are set (see -s) so both and are required.\n"); printf("\n"); printf("Delete key:\n"); printf(" corosync-cmapctl -d key_name...\n"); printf("\n"); printf("Delete multiple keys with prefix:\n"); printf(" corosync-cmapctl -D key_prefix...\n"); printf("\n"); printf("Get key:\n"); printf(" corosync-cmapctl [-b] -g key_name...\n"); printf("\n"); printf("Quiet mode:\n"); printf(" corosync-cmapctl [-b] -q -g key_name...\n"); printf("\n"); printf("Display all keys:\n"); printf(" corosync-cmapctl [-b]\n"); printf("\n"); printf("Display keys with prefix key_name:\n"); printf(" corosync-cmapctl [-b] key_name...\n"); printf("\n"); printf("Track changes on keys with key_name:\n"); printf(" corosync-cmapctl [-b] -t key_name\n"); printf("\n"); printf("Track changes on keys with key prefix:\n"); printf(" corosync-cmapctl [-b] -T key_prefix\n"); printf("\n"); return (0); } static void print_binary_key (char *value, size_t value_len) { size_t i; char c; for (i = 0; i < value_len; i++) { c = value[i]; if (c >= ' ' && c < 0x7f && c != '\\') { fputc (c, stdout); } else { if (c == '\\') { printf ("\\\\"); } else { printf ("\\x%02X", c); } } } } static void print_key(cmap_handle_t handle, const char *key_name, size_t value_len, const void *value, cmap_value_types_t type) { char *str; char *bin_value = NULL; cs_error_t err; int8_t i8; uint8_t u8; int16_t i16; uint16_t u16; int32_t i32; uint32_t u32; int64_t i64; uint64_t u64; float flt; double dbl; int end_loop; int no_retries; size_t bin_value_len; end_loop = 0; no_retries = 0; err = CS_OK; while (!end_loop) { switch (type) { case CMAP_VALUETYPE_INT8: if (value == NULL) { err = cmap_get_int8(handle, key_name, &i8); } else { i8 = *((int8_t *)value); } break; case CMAP_VALUETYPE_INT16: if (value == NULL) { err = cmap_get_int16(handle, key_name, &i16); } else { i16 = *((int16_t *)value); } break; case CMAP_VALUETYPE_INT32: if (value == NULL) { err = cmap_get_int32(handle, key_name, &i32); } else { i32 = *((int32_t *)value); } break; case CMAP_VALUETYPE_INT64: if (value == NULL) { err = cmap_get_int64(handle, key_name, &i64); } else { i64 = *((int64_t *)value); } break; case CMAP_VALUETYPE_UINT8: if (value == NULL) { err = cmap_get_uint8(handle, key_name, &u8); } else { u8 = *((uint8_t *)value); } break; case CMAP_VALUETYPE_UINT16: if (value == NULL) { err = cmap_get_uint16(handle, key_name, &u16); } else { u16 = *((uint16_t *)value); } break; case CMAP_VALUETYPE_UINT32: if (value == NULL) { err = cmap_get_uint32(handle, key_name, &u32); } else { u32 = *((uint32_t *)value); } break; case CMAP_VALUETYPE_UINT64: if (value == NULL) { err = cmap_get_uint64(handle, key_name, &u64); } else { u64 = *((uint64_t *)value); } break; case CMAP_VALUETYPE_FLOAT: if (value == NULL) { err = cmap_get_float(handle, key_name, &flt); } else { flt = *((float *)value); } break; case CMAP_VALUETYPE_DOUBLE: if (value == NULL) { err = cmap_get_double(handle, key_name, &dbl); } else { dbl = *((double *)value); } break; case CMAP_VALUETYPE_STRING: if (value == NULL) { err = cmap_get_string(handle, key_name, &str); } else { str = (char *)value; } break; case CMAP_VALUETYPE_BINARY: if (show_binary) { if (value == NULL) { bin_value = malloc(value_len); if (bin_value == NULL) { fprintf(stderr, "Can't alloc memory\n"); exit(EXIT_FAILURE); } bin_value_len = value_len; err = cmap_get(handle, key_name, bin_value, &bin_value_len, NULL); } else { bin_value = (char *)value; } } break; } if (err == CS_OK) { end_loop = 1; } else if (err == CS_ERR_TRY_AGAIN) { sleep(1); no_retries++; if (no_retries > MAX_TRY_AGAIN) { end_loop = 1; } } else { end_loop = 1; } }; if (err != CS_OK) { fprintf(stderr, "Can't get value of %s. Error %s\n", key_name, cs_strerror(err)); /* * bin_value was newly allocated */ if (bin_value != NULL && value == NULL) { free(bin_value); } return ; } if (!quiet) printf("%s (", key_name); switch (type) { case CMAP_VALUETYPE_INT8: if (!quiet) printf("%s) = %"PRId8, "i8", i8); else printf("%"PRId8, i8); break; case CMAP_VALUETYPE_UINT8: if (!quiet) printf("%s) = %"PRIu8, "u8", u8); else printf("%"PRIu8, u8); break; case CMAP_VALUETYPE_INT16: if (!quiet) printf("%s) = %"PRId16, "i16", i16); else printf("%"PRId16, i16); break; case CMAP_VALUETYPE_UINT16: if (!quiet) printf("%s) = %"PRIu16, "u16", u16); else printf("%"PRIu16, u16); break; case CMAP_VALUETYPE_INT32: if (!quiet) printf("%s) = %"PRId32, "i32", i32); else printf("%"PRId32, i32); break; case CMAP_VALUETYPE_UINT32: if (!quiet) printf("%s) = %"PRIu32, "u32", u32); else printf("%"PRIu32, u32); break; case CMAP_VALUETYPE_INT64: if (!quiet) printf("%s) = %"PRId64, "i64", i64); else printf("%"PRId64, i64); break; case CMAP_VALUETYPE_UINT64: if (!quiet) printf("%s) = %"PRIu64, "u64", u64); else printf("%"PRIu64, u64); break; case CMAP_VALUETYPE_FLOAT: if (!quiet) printf("%s) = %f", "flt", flt); else printf("%f", flt); break; case CMAP_VALUETYPE_DOUBLE: if (!quiet) printf("%s) = %lf", "dbl", dbl); else printf("%lf", dbl); break; case CMAP_VALUETYPE_STRING: if (!quiet) printf("%s) = %s", "str", str); else printf("%s", str); if (value == NULL) { free(str); } break; case CMAP_VALUETYPE_BINARY: printf("%s)", "bin"); if (show_binary) { printf(" = "); if (bin_value) { print_binary_key(bin_value, value_len); if (value == NULL) { free(bin_value); } } else { printf("*empty*"); } } break; } printf("\n"); } static void print_iter(cmap_handle_t handle, const char *prefix) { cmap_iter_handle_t iter_handle; char key_name[CMAP_KEYNAME_MAXLEN + 1]; size_t value_len; cmap_value_types_t type; cs_error_t err; err = cmap_iter_init(handle, prefix, &iter_handle); if (err != CS_OK) { fprintf (stderr, "Failed to initialize iteration. Error %s\n", cs_strerror(err)); exit (EXIT_FAILURE); } while ((err = cmap_iter_next(handle, iter_handle, key_name, &value_len, &type)) == CS_OK) { print_key(handle, key_name, value_len, NULL, type); } cmap_iter_finalize(handle, iter_handle); } static void delete_with_prefix(cmap_handle_t handle, const char *prefix) { cmap_iter_handle_t iter_handle; char key_name[CMAP_KEYNAME_MAXLEN + 1]; size_t value_len; cmap_value_types_t type; cs_error_t err; cs_error_t err2; err = cmap_iter_init(handle, prefix, &iter_handle); if (err != CS_OK) { fprintf (stderr, "Failed to initialize iteration. Error %s\n", cs_strerror(err)); exit (EXIT_FAILURE); } while ((err = cmap_iter_next(handle, iter_handle, key_name, &value_len, &type)) == CS_OK) { err2 = cmap_delete(handle, key_name); if (err2 != CS_OK) { fprintf(stderr, "Can't delete key %s. Error %s\n", key_name, cs_strerror(err2)); } } cmap_iter_finalize(handle, iter_handle); } static void cmap_notify_fn( cmap_handle_t cmap_handle, cmap_track_handle_t cmap_track_handle, int32_t event, const char *key_name, struct cmap_notify_value new_val, struct cmap_notify_value old_val, void *user_data) { switch (event) { case CMAP_TRACK_ADD: printf("create> "); print_key(cmap_handle, key_name, new_val.len, new_val.data, new_val.type); break; case CMAP_TRACK_DELETE: printf("delete> "); print_key(cmap_handle, key_name, old_val.len, old_val.data, old_val.type); break; case CMAP_TRACK_MODIFY: printf("modify> "); print_key(cmap_handle, key_name, new_val.len, new_val.data, new_val.type); break; default: printf("unknown change> "); break; } } static void add_track(cmap_handle_t handle, const char *key_name, int prefix) { cmap_track_handle_t track_handle; int32_t track_type; cs_error_t err; track_type = CMAP_TRACK_ADD | CMAP_TRACK_DELETE | CMAP_TRACK_MODIFY; if (prefix) { track_type |= CMAP_TRACK_PREFIX; } err = cmap_track_add(handle, key_name, track_type, cmap_notify_fn, NULL, &track_handle); if (err != CS_OK) { fprintf(stderr, "Failed to add tracking function. Error %s\n", cs_strerror(err)); exit (EXIT_FAILURE); } } static void track_changes(cmap_handle_t handle) { struct pollfd pfd[2]; int cmap_fd; cs_error_t err; int poll_res; char inbuf[3]; int quit = CS_FALSE; err = cmap_fd_get(handle, &cmap_fd); if (err != CS_OK) { fprintf(stderr, "Failed to get file handle. Error %s\n", cs_strerror(err)); exit (EXIT_FAILURE); } pfd[0].fd = cmap_fd; pfd[1].fd = STDIN_FILENO; pfd[0].events = pfd[1].events = POLLIN; printf("Type \"q\" to finish\n"); do { pfd[0].revents = pfd[1].revents = 0; poll_res = poll(pfd, 2, INFTIM); if (poll_res == -1) { perror("poll"); } if (pfd[1].revents & POLLIN) { if (fgets(inbuf, sizeof(inbuf), stdin) == NULL) { quit = CS_TRUE; } else if (strncmp(inbuf, "q", 1) == 0) { quit = CS_TRUE; } } if (pfd[0].revents & POLLIN) { err = cmap_dispatch(handle, CS_DISPATCH_ALL); if (err != CS_OK) { fprintf(stderr, "Dispatch error %s\n", cs_strerror(err)); quit = CS_TRUE; } } } while (poll_res > 0 && !quit); } static cs_error_t set_key_bin(cmap_handle_t handle, const char *key_name, const char *fname) { FILE *f; char *val; char buf[4096]; size_t size; size_t readed; size_t pos; cs_error_t err; if (strcmp(fname, "-") == 0) { f = stdin; } else { f = fopen(fname, "rb"); if (f == NULL) { perror("Can't open input file"); exit(EXIT_FAILURE); } } val = NULL; size = 0; pos = 0; while ((readed = fread(buf, 1, sizeof(buf), f)) != 0) { size += readed; if ((val = realloc(val, size)) == NULL) { fprintf(stderr, "Can't alloc memory\n"); exit (EXIT_FAILURE); } memcpy(val + pos, buf, readed); pos += readed; } if (f != stdin) { fclose(f); } err = cmap_set(handle, key_name, val, size, CMAP_VALUETYPE_BINARY); free(val); return (err); } static void set_key(cmap_handle_t handle, const char *key_name, const char *key_type_s, const char *key_value_s) { int64_t i64; uint64_t u64; double dbl; float flt; cs_error_t err = CS_OK; int scanf_res = 0; cmap_value_types_t type; if (convert_name_to_type(key_type_s) == -1) { fprintf(stderr, "Unknown type %s\n", key_type_s); exit (EXIT_FAILURE); } type = convert_name_to_type(key_type_s); switch (type) { case CMAP_VALUETYPE_INT8: case CMAP_VALUETYPE_INT16: case CMAP_VALUETYPE_INT32: case CMAP_VALUETYPE_INT64: scanf_res = sscanf(key_value_s, "%"PRId64, &i64); break; case CMAP_VALUETYPE_UINT8: case CMAP_VALUETYPE_UINT16: case CMAP_VALUETYPE_UINT32: case CMAP_VALUETYPE_UINT64: scanf_res = sscanf(key_value_s, "%"PRIu64, &u64); break; case CMAP_VALUETYPE_FLOAT: scanf_res = sscanf(key_value_s, "%f", &flt); break; case CMAP_VALUETYPE_DOUBLE: scanf_res = sscanf(key_value_s, "%lf", &dbl); break; case CMAP_VALUETYPE_STRING: case CMAP_VALUETYPE_BINARY: /* * Do nothing */ scanf_res = 1; break; } if (scanf_res != 1) { fprintf(stderr, "%s is not valid %s type value\n", key_value_s, key_type_s); exit(EXIT_FAILURE); } /* * We have parsed value, so insert value */ switch (type) { case CMAP_VALUETYPE_INT8: if (i64 > INT8_MAX || i64 < INT8_MIN) { fprintf(stderr, "%s is not valid i8 integer\n", key_value_s); exit(EXIT_FAILURE); } err = cmap_set_int8(handle, key_name, i64); break; case CMAP_VALUETYPE_INT16: if (i64 > INT16_MAX || i64 < INT16_MIN) { fprintf(stderr, "%s is not valid i16 integer\n", key_value_s); exit(EXIT_FAILURE); } err = cmap_set_int16(handle, key_name, i64); break; case CMAP_VALUETYPE_INT32: if (i64 > INT32_MAX || i64 < INT32_MIN) { fprintf(stderr, "%s is not valid i32 integer\n", key_value_s); exit(EXIT_FAILURE); } err = cmap_set_int32(handle, key_name, i64); break; case CMAP_VALUETYPE_INT64: err = cmap_set_int64(handle, key_name, i64); break; case CMAP_VALUETYPE_UINT8: if (u64 > UINT8_MAX) { fprintf(stderr, "%s is not valid u8 integer\n", key_value_s); exit(EXIT_FAILURE); } err = cmap_set_uint8(handle, key_name, u64); break; case CMAP_VALUETYPE_UINT16: if (u64 > UINT16_MAX) { fprintf(stderr, "%s is not valid u16 integer\n", key_value_s); exit(EXIT_FAILURE); } err = cmap_set_uint16(handle, key_name, u64); break; case CMAP_VALUETYPE_UINT32: if (u64 > UINT32_MAX) { fprintf(stderr, "%s is not valid u32 integer\n", key_value_s); exit(EXIT_FAILURE); } err = cmap_set_uint32(handle, key_name, u64); break; case CMAP_VALUETYPE_UINT64: err = cmap_set_uint64(handle, key_name, u64); break; case CMAP_VALUETYPE_FLOAT: err = cmap_set_float(handle, key_name, flt); break; case CMAP_VALUETYPE_DOUBLE: err = cmap_set_double(handle, key_name, dbl); break; case CMAP_VALUETYPE_STRING: err = cmap_set_string(handle, key_name, key_value_s); break; case CMAP_VALUETYPE_BINARY: err = set_key_bin(handle, key_name, key_value_s); break; } if (err != CS_OK) { fprintf (stderr, "Failed to set key %s. Error %s\n", key_name, cs_strerror(err)); exit (EXIT_FAILURE); } } static void read_in_config_file(cmap_handle_t handle, char * filename) { int ignore; int c; FILE* fh; char buf[1024]; char * line; char *key_name; char *key_type_s; char *key_value_s; fh = fopen(filename, "r"); if (fh == NULL) { perror ("Couldn't open file."); return; } while (fgets (buf, 1024, fh) != NULL) { /* find the first real character, if it is * a '#' then ignore this line. * else process. * if no real characters then also ignore. */ ignore = 1; for (c = 0; c < 1024; c++) { if (isblank (buf[c])) { continue; } if (buf[c] == '#' || buf[c] == '\n') { ignore = 1; break; } ignore = 0; line = &buf[c]; break; } if (ignore == 1) { continue; } /* * should be: * [^[^]][ ] */ key_name = strtok(line, " \n"); if (key_name && *key_name == '^') { key_name++; if (*key_name == '^') { key_name++; delete_with_prefix(handle, key_name); } else { cs_error_t err; err = cmap_delete(handle, key_name); if (err != CS_OK) { fprintf(stderr, "Can't delete key %s. Error %s\n", key_name, cs_strerror(err)); } } } else { key_type_s = strtok(NULL, " \n"); key_value_s = strtok(NULL, " \n"); set_key(handle, key_name, key_type_s, key_value_s); } } fclose (fh); } static void clear_stats(cmap_handle_t handle, char *clear_opt) { char key_name[CMAP_KEYNAME_MAXLEN + 1]; sprintf(key_name, "stats.clear.%s", clear_opt); cmap_set_uint32(handle, key_name, 1); } int main(int argc, char *argv[]) { enum user_action action; int c; cs_error_t err; cmap_handle_t handle; int i; size_t value_len; cmap_value_types_t type; cmap_map_t map = CMAP_MAP_DEFAULT; int track_prefix; int map_set = 0; int no_retries; char * clear_opt = NULL; char * settings_file = NULL; action = ACTION_PRINT_PREFIX; track_prefix = 1; while ((c = getopt(argc, argv, "m:hqgsdDtTbp:C:")) != -1) { switch (c) { case 'h': return print_help(); break; case 'b': show_binary++; break; case 'q': quiet = 1; break; case 'g': action = ACTION_GET; break; case 's': action = ACTION_SET; break; case 'd': action = ACTION_DELETE; break; case 'D': action = ACTION_DELETE_PREFIX; break; case 'p': settings_file = optarg; action = ACTION_LOAD; break; case 'C': if (strcmp(optarg, "knet") == 0 || strcmp(optarg, "totem") == 0 || strcmp(optarg, "ipc") == 0 || + strcmp(optarg, "schedmiss") == 0 || strcmp(optarg, "all") == 0) { action = ACTION_CLEARSTATS; clear_opt = optarg; /* Force the map to be STATS */ map = CMAP_MAP_STATS; } else { - fprintf(stderr, "argument to -C should be 'knet', 'totem', 'ipc' or 'all'\n"); + fprintf(stderr, "argument to -C should be 'knet', 'totem', 'ipc', 'schedmiss' or 'all'\n"); return (EXIT_FAILURE); } break; case 't': action = ACTION_TRACK; track_prefix = 0; break; case 'T': action = ACTION_TRACK; break; case 'm': if (strcmp(optarg, "icmap") == 0 || strcmp(optarg, "default") == 0) { map = CMAP_MAP_ICMAP; map_set = 1; } if (strcmp(optarg, "stats") == 0) { map = CMAP_MAP_STATS; map_set = 1; } if (!map_set) { fprintf(stderr, "invalid map name, must be 'default', 'icmap' or 'stats'\n"); return (EXIT_FAILURE); } break; case '?': return (EXIT_FAILURE); break; default: action = ACTION_PRINT_PREFIX; break; } } argc -= optind; argv += optind; if (argc == 0 && action != ACTION_LOAD && action != ACTION_CLEARSTATS && action != ACTION_PRINT_PREFIX) { fprintf(stderr, "Expected key after options\n"); return (EXIT_FAILURE); } no_retries = 0; while ((err = cmap_initialize_map(&handle, map)) == CS_ERR_TRY_AGAIN && no_retries++ < MAX_TRY_AGAIN) { sleep(1); } if (err != CS_OK) { fprintf (stderr, "Failed to initialize the cmap API. Error %s\n", cs_strerror(err)); exit (EXIT_FAILURE); } switch (action) { case ACTION_PRINT_PREFIX: if (argc == 0) { print_iter(handle, NULL); } else { for (i = 0; i < argc; i++) { print_iter(handle, argv[i]); } } break; case ACTION_GET: for (i = 0; i < argc; i++) { err = cmap_get(handle, argv[i], NULL, &value_len, &type); if (err == CS_OK) { print_key(handle, argv[i], value_len, NULL, type); } else { fprintf(stderr, "Can't get key %s. Error %s\n", argv[i], cs_strerror(err)); } } break; case ACTION_DELETE: for (i = 0; i < argc; i++) { err = cmap_delete(handle, argv[i]); if (err != CS_OK) { fprintf(stderr, "Can't delete key %s. Error %s\n", argv[i], cs_strerror(err)); } } break; case ACTION_DELETE_PREFIX: for (i = 0; i < argc; i++) { delete_with_prefix(handle, argv[i]); } break; case ACTION_LOAD: read_in_config_file(handle, settings_file); break; case ACTION_TRACK: for (i = 0; i < argc; i++) { add_track(handle, argv[i], track_prefix); } track_changes(handle); break; case ACTION_SET: if (argc < 3) { fprintf(stderr, "At least 3 parameters are expected for set\n"); return (EXIT_FAILURE); } set_key(handle, argv[0], argv[1], argv[2]); break; case ACTION_CLEARSTATS: clear_stats(handle, clear_opt); break; } err = cmap_finalize(handle); if (err != CS_OK) { fprintf (stderr, "Failed to finalize the cmap API. Error %s\n", cs_strerror(err)); exit (EXIT_FAILURE); } return (0); }