diff --git a/libknet/Makefile.am b/libknet/Makefile.am index 55d0978d..695bb8a1 100644 --- a/libknet/Makefile.am +++ b/libknet/Makefile.am @@ -1,167 +1,169 @@ # # Copyright (C) 2010-2020 Red Hat, Inc. All rights reserved. # # Authors: Fabio M. Di Nitto # Federico Simoncelli # # This software licensed under GPL-2.0+ # MAINTAINERCLEANFILES = Makefile.in include $(top_srcdir)/build-aux/check.mk SYMFILE = libknet_exported_syms EXTRA_DIST = $(SYMFILE) SUBDIRS = . tests # https://www.gnu.org/software/libtool/manual/html_node/Updating-version-info.html libversion = 2:0:0 # override global LIBS that pulls in lots of craft we don't need here LIBS = sources = \ common.c \ compat.c \ compress.c \ crypto.c \ handle.c \ handle_api.c \ host.c \ links.c \ links_acl.c \ links_acl_ip.c \ links_acl_loopback.c \ logging.c \ netutils.c \ onwire.c \ + onwire_v1.c \ threads_common.c \ threads_dsthandler.c \ threads_heartbeat.c \ threads_pmtud.c \ threads_rx.c \ threads_tx.c \ transports.c \ transport_common.c \ transport_loopback.c \ transport_udp.c \ transport_sctp.c include_HEADERS = libknet.h pkgconfigdir = $(libdir)/pkgconfig pkgconfig_DATA = libknet.pc noinst_HEADERS = \ common.h \ compat.h \ compress.h \ compress_model.h \ crypto.h \ crypto_model.h \ host.h \ internals.h \ links.h \ links_acl.h \ links_acl_ip.h \ links_acl_loopback.h \ logging.h \ netutils.h \ onwire.h \ + onwire_v1.h \ threads_common.h \ threads_dsthandler.h \ threads_heartbeat.h \ threads_pmtud.h \ threads_rx.h \ threads_tx.h \ transports.h \ transport_common.h \ transport_loopback.h \ transport_udp.h \ transport_sctp.h lib_LTLIBRARIES = libknet.la libknet_la_SOURCES = $(sources) AM_CFLAGS += $(libqb_CFLAGS) libknet_la_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) EXTRA_libknet_la_DEPENDENCIES = $(SYMFILE) libknet_la_LDFLAGS = $(AM_LDFLAGS) \ -Wl,--version-script=$(srcdir)/$(SYMFILE) \ -Wl,-rpath=$(pkglibdir) \ -version-info $(libversion) libknet_la_LIBADD = $(PTHREAD_LIBS) $(dl_LIBS) $(rt_LIBS) $(m_LIBS) # Prepare empty value for appending pkglib_LTLIBRARIES = # MODULE_LDFLAGS would mean a target-specific variable for Automake MODULELDFLAGS = $(AM_LDFLAGS) -module -avoid-version -export-dynamic if BUILD_COMPRESS_ZSTD pkglib_LTLIBRARIES += compress_zstd.la compress_zstd_la_LDFLAGS = $(MODULELDFLAGS) compress_zstd_la_CFLAGS = $(AM_CFLAGS) $(libzstd_CFLAGS) compress_zstd_la_LIBADD = $(libzstd_LIBS) endif if BUILD_COMPRESS_ZLIB pkglib_LTLIBRARIES += compress_zlib.la compress_zlib_la_LDFLAGS = $(MODULELDFLAGS) compress_zlib_la_CFLAGS = $(AM_CFLAGS) $(zlib_CFLAGS) compress_zlib_la_LIBADD = $(zlib_LIBS) endif if BUILD_COMPRESS_LZ4 pkglib_LTLIBRARIES += compress_lz4.la compress_lz4hc.la compress_lz4_la_LDFLAGS = $(MODULELDFLAGS) compress_lz4_la_CFLAGS = $(AM_CFLAGS) $(liblz4_CFLAGS) compress_lz4_la_LIBADD = $(liblz4_LIBS) compress_lz4hc_la_LDFLAGS = $(MODULELDFLAGS) compress_lz4hc_la_CFLAGS = $(AM_CFLAGS) $(liblz4_CFLAGS) compress_lz4hc_la_LIBADD = $(liblz4_LIBS) endif if BUILD_COMPRESS_LZO2 pkglib_LTLIBRARIES += compress_lzo2.la compress_lzo2_la_LDFLAGS = $(MODULELDFLAGS) compress_lzo2_la_CFLAGS = $(AM_CFLAGS) $(lzo2_CFLAGS) compress_lzo2_la_LIBADD = $(lzo2_LIBS) endif if BUILD_COMPRESS_LZMA pkglib_LTLIBRARIES += compress_lzma.la compress_lzma_la_LDFLAGS = $(MODULELDFLAGS) compress_lzma_la_CFLAGS = $(AM_CFLAGS) $(liblzma_CFLAGS) compress_lzma_la_LIBADD = $(liblzma_LIBS) endif if BUILD_COMPRESS_BZIP2 pkglib_LTLIBRARIES += compress_bzip2.la compress_bzip2_la_LDFLAGS = $(MODULELDFLAGS) compress_bzip2_la_CFLAGS = $(AM_CFLAGS) $(bzip2_CFLAGS) compress_bzip2_la_LIBADD = $(bzip2_LIBS) endif if BUILD_CRYPTO_NSS pkglib_LTLIBRARIES += crypto_nss.la crypto_nss_la_LDFLAGS = $(MODULELDFLAGS) crypto_nss_la_CFLAGS = $(AM_CFLAGS) $(nss_CFLAGS) crypto_nss_la_LIBADD = $(nss_LIBS) endif if BUILD_CRYPTO_OPENSSL pkglib_LTLIBRARIES += crypto_openssl.la crypto_openssl_la_LDFLAGS = $(MODULELDFLAGS) crypto_openssl_la_CFLAGS = $(AM_CFLAGS) $(openssl_CFLAGS) crypto_openssl_la_LIBADD = $(openssl_LIBS) endif diff --git a/libknet/handle.c b/libknet/handle.c index 02b13856..78330e28 100644 --- a/libknet/handle.c +++ b/libknet/handle.c @@ -1,786 +1,806 @@ /* * Copyright (C) 2010-2020 Red Hat, Inc. All rights reserved. * * Authors: Fabio M. Di Nitto * Federico Simoncelli * * This software licensed under LGPL-2.0+ */ #include "config.h" #include #include #include #include #include #include #include #include #include "internals.h" #include "crypto.h" #include "links.h" #include "compress.h" #include "compat.h" #include "common.h" #include "threads_common.h" #include "threads_heartbeat.h" #include "threads_pmtud.h" #include "threads_dsthandler.h" #include "threads_rx.h" #include "threads_tx.h" #include "transports.h" #include "transport_common.h" #include "logging.h" static pthread_mutex_t handle_config_mutex = PTHREAD_MUTEX_INITIALIZER; pthread_rwlock_t shlib_rwlock; static uint8_t shlib_wrlock_init = 0; static uint32_t knet_ref = 0; static int _init_shlib_tracker(knet_handle_t knet_h) { int savederrno = 0; if (!shlib_wrlock_init) { savederrno = pthread_rwlock_init(&shlib_rwlock, NULL); if (savederrno) { log_err(knet_h, KNET_SUB_HANDLE, "Unable to initialize shared lib rwlock: %s", strerror(savederrno)); errno = savederrno; return -1; } shlib_wrlock_init = 1; } return 0; } static void _fini_shlib_tracker(void) { if (knet_ref == 0) { pthread_rwlock_destroy(&shlib_rwlock); shlib_wrlock_init = 0; } return; } static int _init_locks(knet_handle_t knet_h) { int savederrno = 0; savederrno = pthread_rwlock_init(&knet_h->global_rwlock, NULL); if (savederrno) { log_err(knet_h, KNET_SUB_HANDLE, "Unable to initialize list rwlock: %s", strerror(savederrno)); goto exit_fail; } savederrno = pthread_mutex_init(&knet_h->handle_stats_mutex, NULL); if (savederrno) { log_err(knet_h, KNET_SUB_HANDLE, "Unable to initialize handle stats mutex: %s", strerror(savederrno)); goto exit_fail; } savederrno = pthread_mutex_init(&knet_h->threads_status_mutex, NULL); if (savederrno) { log_err(knet_h, KNET_SUB_HANDLE, "Unable to initialize threads status mutex: %s", strerror(savederrno)); goto exit_fail; } savederrno = pthread_mutex_init(&knet_h->pmtud_mutex, NULL); if (savederrno) { log_err(knet_h, KNET_SUB_HANDLE, "Unable to initialize pmtud mutex: %s", strerror(savederrno)); goto exit_fail; } savederrno = pthread_mutex_init(&knet_h->kmtu_mutex, NULL); if (savederrno) { log_err(knet_h, KNET_SUB_HANDLE, "Unable to initialize kernel_mtu mutex: %s", strerror(savederrno)); goto exit_fail; } savederrno = pthread_cond_init(&knet_h->pmtud_cond, NULL); if (savederrno) { log_err(knet_h, KNET_SUB_HANDLE, "Unable to initialize pmtud conditional mutex: %s", strerror(savederrno)); goto exit_fail; } savederrno = pthread_mutex_init(&knet_h->hb_mutex, NULL); if (savederrno) { log_err(knet_h, KNET_SUB_HANDLE, "Unable to initialize hb_thread mutex: %s", strerror(savederrno)); goto exit_fail; } savederrno = pthread_mutex_init(&knet_h->tx_mutex, NULL); if (savederrno) { log_err(knet_h, KNET_SUB_HANDLE, "Unable to initialize tx_thread mutex: %s", strerror(savederrno)); goto exit_fail; } savederrno = pthread_mutex_init(&knet_h->backoff_mutex, NULL); if (savederrno) { log_err(knet_h, KNET_SUB_HANDLE, "Unable to initialize pong timeout backoff mutex: %s", strerror(savederrno)); goto exit_fail; } savederrno = pthread_mutex_init(&knet_h->tx_seq_num_mutex, NULL); if (savederrno) { log_err(knet_h, KNET_SUB_HANDLE, "Unable to initialize tx_seq_num_mutex mutex: %s", strerror(savederrno)); goto exit_fail; } + savederrno = pthread_mutex_init(&knet_h->onwire_mutex, NULL); + if (savederrno) { + log_err(knet_h, KNET_SUB_HANDLE, "Unable to initialize onwire_mutex mutex: %s", + strerror(savederrno)); + goto exit_fail; + } + return 0; exit_fail: errno = savederrno; return -1; } static void _destroy_locks(knet_handle_t knet_h) { pthread_rwlock_destroy(&knet_h->global_rwlock); pthread_mutex_destroy(&knet_h->pmtud_mutex); pthread_mutex_destroy(&knet_h->kmtu_mutex); pthread_cond_destroy(&knet_h->pmtud_cond); pthread_mutex_destroy(&knet_h->hb_mutex); pthread_mutex_destroy(&knet_h->tx_mutex); pthread_mutex_destroy(&knet_h->backoff_mutex); pthread_mutex_destroy(&knet_h->tx_seq_num_mutex); pthread_mutex_destroy(&knet_h->threads_status_mutex); pthread_mutex_destroy(&knet_h->handle_stats_mutex); + pthread_mutex_destroy(&knet_h->onwire_mutex); } static int _init_socks(knet_handle_t knet_h) { int savederrno = 0; if (_init_socketpair(knet_h, knet_h->dstsockfd)) { savederrno = errno; log_err(knet_h, KNET_SUB_HANDLE, "Unable to initialize internal dstsockpair: %s", strerror(savederrno)); goto exit_fail; } return 0; exit_fail: errno = savederrno; return -1; } static void _close_socks(knet_handle_t knet_h) { _close_socketpair(knet_h, knet_h->dstsockfd); } static int _init_buffers(knet_handle_t knet_h) { int savederrno = 0; int i; size_t bufsize; for (i = 0; i < PCKT_FRAG_MAX; i++) { bufsize = ceil((float)KNET_MAX_PACKET_SIZE / (i + 1)) + KNET_HEADER_ALL_SIZE; knet_h->send_to_links_buf[i] = malloc(bufsize); if (!knet_h->send_to_links_buf[i]) { savederrno = errno; log_err(knet_h, KNET_SUB_HANDLE, "Unable to allocate memory datafd to link buffer: %s", strerror(savederrno)); goto exit_fail; } memset(knet_h->send_to_links_buf[i], 0, bufsize); } for (i = 0; i < PCKT_RX_BUFS; i++) { knet_h->recv_from_links_buf[i] = malloc(KNET_DATABUFSIZE); if (!knet_h->recv_from_links_buf[i]) { savederrno = errno; log_err(knet_h, KNET_SUB_HANDLE, "Unable to allocate memory for link to datafd buffer: %s", strerror(savederrno)); goto exit_fail; } memset(knet_h->recv_from_links_buf[i], 0, KNET_DATABUFSIZE); } knet_h->recv_from_sock_buf = malloc(KNET_DATABUFSIZE); if (!knet_h->recv_from_sock_buf) { savederrno = errno; log_err(knet_h, KNET_SUB_HANDLE, "Unable to allocate memory for app to datafd buffer: %s", strerror(savederrno)); goto exit_fail; } memset(knet_h->recv_from_sock_buf, 0, KNET_DATABUFSIZE); - knet_h->pingbuf = malloc(KNET_HEADER_PING_SIZE); + knet_h->pingbuf = malloc(KNET_HEADER_ALL_SIZE); if (!knet_h->pingbuf) { savederrno = errno; log_err(knet_h, KNET_SUB_HANDLE, "Unable to allocate memory for hearbeat buffer: %s", strerror(savederrno)); goto exit_fail; } - memset(knet_h->pingbuf, 0, KNET_HEADER_PING_SIZE); + memset(knet_h->pingbuf, 0, KNET_HEADER_ALL_SIZE); knet_h->pmtudbuf = malloc(KNET_PMTUD_SIZE_V6 + KNET_HEADER_ALL_SIZE); if (!knet_h->pmtudbuf) { savederrno = errno; log_err(knet_h, KNET_SUB_HANDLE, "Unable to allocate memory for pmtud buffer: %s", strerror(savederrno)); goto exit_fail; } memset(knet_h->pmtudbuf, 0, KNET_PMTUD_SIZE_V6 + KNET_HEADER_ALL_SIZE); for (i = 0; i < PCKT_FRAG_MAX; i++) { bufsize = ceil((float)KNET_MAX_PACKET_SIZE / (i + 1)) + KNET_HEADER_ALL_SIZE + KNET_DATABUFSIZE_CRYPT_PAD; knet_h->send_to_links_buf_crypt[i] = malloc(bufsize); if (!knet_h->send_to_links_buf_crypt[i]) { savederrno = errno; log_err(knet_h, KNET_SUB_HANDLE, "Unable to allocate memory for crypto datafd to link buffer: %s", strerror(savederrno)); goto exit_fail; } memset(knet_h->send_to_links_buf_crypt[i], 0, bufsize); } knet_h->recv_from_links_buf_decrypt = malloc(KNET_DATABUFSIZE_CRYPT); if (!knet_h->recv_from_links_buf_decrypt) { savederrno = errno; log_err(knet_h, KNET_SUB_CRYPTO, "Unable to allocate memory for crypto link to datafd buffer: %s", strerror(savederrno)); goto exit_fail; } memset(knet_h->recv_from_links_buf_decrypt, 0, KNET_DATABUFSIZE_CRYPT); knet_h->recv_from_links_buf_crypt = malloc(KNET_DATABUFSIZE_CRYPT); if (!knet_h->recv_from_links_buf_crypt) { savederrno = errno; log_err(knet_h, KNET_SUB_CRYPTO, "Unable to allocate memory for crypto link to datafd buffer: %s", strerror(savederrno)); goto exit_fail; } memset(knet_h->recv_from_links_buf_crypt, 0, KNET_DATABUFSIZE_CRYPT); knet_h->pingbuf_crypt = malloc(KNET_DATABUFSIZE_CRYPT); if (!knet_h->pingbuf_crypt) { savederrno = errno; log_err(knet_h, KNET_SUB_CRYPTO, "Unable to allocate memory for crypto hearbeat buffer: %s", strerror(savederrno)); goto exit_fail; } memset(knet_h->pingbuf_crypt, 0, KNET_DATABUFSIZE_CRYPT); knet_h->pmtudbuf_crypt = malloc(KNET_DATABUFSIZE_CRYPT); if (!knet_h->pmtudbuf_crypt) { savederrno = errno; log_err(knet_h, KNET_SUB_HANDLE, "Unable to allocate memory for crypto pmtud buffer: %s", strerror(savederrno)); goto exit_fail; } memset(knet_h->pmtudbuf_crypt, 0, KNET_DATABUFSIZE_CRYPT); knet_h->recv_from_links_buf_decompress = malloc(KNET_DATABUFSIZE_COMPRESS); if (!knet_h->recv_from_links_buf_decompress) { savederrno = errno; log_err(knet_h, KNET_SUB_HANDLE, "Unable to allocate memory for decompress buffer: %s", strerror(savederrno)); goto exit_fail; } memset(knet_h->recv_from_links_buf_decompress, 0, KNET_DATABUFSIZE_COMPRESS); knet_h->send_to_links_buf_compress = malloc(KNET_DATABUFSIZE_COMPRESS); if (!knet_h->send_to_links_buf_compress) { savederrno = errno; log_err(knet_h, KNET_SUB_HANDLE, "Unable to allocate memory for compress buffer: %s", strerror(savederrno)); goto exit_fail; } memset(knet_h->send_to_links_buf_compress, 0, KNET_DATABUFSIZE_COMPRESS); memset(knet_h->knet_transport_fd_tracker, 0, sizeof(knet_h->knet_transport_fd_tracker)); for (i = 0; i < KNET_MAX_FDS; i++) { knet_h->knet_transport_fd_tracker[i].transport = KNET_MAX_TRANSPORTS; } return 0; exit_fail: errno = savederrno; return -1; } static void _destroy_buffers(knet_handle_t knet_h) { int i; for (i = 0; i < PCKT_FRAG_MAX; i++) { free(knet_h->send_to_links_buf[i]); free(knet_h->send_to_links_buf_crypt[i]); } for (i = 0; i < PCKT_RX_BUFS; i++) { free(knet_h->recv_from_links_buf[i]); } free(knet_h->recv_from_links_buf_decompress); free(knet_h->send_to_links_buf_compress); free(knet_h->recv_from_sock_buf); free(knet_h->recv_from_links_buf_decrypt); free(knet_h->recv_from_links_buf_crypt); free(knet_h->pingbuf); free(knet_h->pingbuf_crypt); free(knet_h->pmtudbuf); free(knet_h->pmtudbuf_crypt); } static int _init_epolls(knet_handle_t knet_h) { struct epoll_event ev; int savederrno = 0; /* * even if the kernel does dynamic allocation with epoll_ctl * we need to reserve one extra for host to host communication */ knet_h->send_to_links_epollfd = epoll_create(KNET_EPOLL_MAX_EVENTS + 1); if (knet_h->send_to_links_epollfd < 0) { savederrno = errno; log_err(knet_h, KNET_SUB_HANDLE, "Unable to create epoll datafd to link fd: %s", strerror(savederrno)); goto exit_fail; } knet_h->recv_from_links_epollfd = epoll_create(KNET_EPOLL_MAX_EVENTS); if (knet_h->recv_from_links_epollfd < 0) { savederrno = errno; log_err(knet_h, KNET_SUB_HANDLE, "Unable to create epoll link to datafd fd: %s", strerror(savederrno)); goto exit_fail; } knet_h->dst_link_handler_epollfd = epoll_create(KNET_EPOLL_MAX_EVENTS); if (knet_h->dst_link_handler_epollfd < 0) { savederrno = errno; log_err(knet_h, KNET_SUB_HANDLE, "Unable to create epoll dst cache fd: %s", strerror(savederrno)); goto exit_fail; } if (_fdset_cloexec(knet_h->send_to_links_epollfd)) { savederrno = errno; log_err(knet_h, KNET_SUB_HANDLE, "Unable to set CLOEXEC on datafd to link epoll fd: %s", strerror(savederrno)); goto exit_fail; } if (_fdset_cloexec(knet_h->recv_from_links_epollfd)) { savederrno = errno; log_err(knet_h, KNET_SUB_HANDLE, "Unable to set CLOEXEC on link to datafd epoll fd: %s", strerror(savederrno)); goto exit_fail; } if (_fdset_cloexec(knet_h->dst_link_handler_epollfd)) { savederrno = errno; log_err(knet_h, KNET_SUB_HANDLE, "Unable to set CLOEXEC on dst cache epoll fd: %s", strerror(savederrno)); goto exit_fail; } memset(&ev, 0, sizeof(struct epoll_event)); ev.events = EPOLLIN; ev.data.fd = knet_h->dstsockfd[0]; if (epoll_ctl(knet_h->dst_link_handler_epollfd, EPOLL_CTL_ADD, knet_h->dstsockfd[0], &ev)) { savederrno = errno; log_err(knet_h, KNET_SUB_HANDLE, "Unable to add dstsockfd[0] to epoll pool: %s", strerror(savederrno)); goto exit_fail; } return 0; exit_fail: errno = savederrno; return -1; } static void _close_epolls(knet_handle_t knet_h) { struct epoll_event ev; int i; memset(&ev, 0, sizeof(struct epoll_event)); for (i = 0; i < KNET_DATAFD_MAX; i++) { if (knet_h->sockfd[i].in_use) { epoll_ctl(knet_h->send_to_links_epollfd, EPOLL_CTL_DEL, knet_h->sockfd[i].sockfd[knet_h->sockfd[i].is_created], &ev); if (knet_h->sockfd[i].sockfd[knet_h->sockfd[i].is_created]) { _close_socketpair(knet_h, knet_h->sockfd[i].sockfd); } } } epoll_ctl(knet_h->dst_link_handler_epollfd, EPOLL_CTL_DEL, knet_h->dstsockfd[0], &ev); close(knet_h->send_to_links_epollfd); close(knet_h->recv_from_links_epollfd); close(knet_h->dst_link_handler_epollfd); } static int _start_threads(knet_handle_t knet_h) { int savederrno = 0; pthread_attr_t attr; set_thread_status(knet_h, KNET_THREAD_PMTUD, KNET_THREAD_REGISTERED); savederrno = pthread_attr_init(&attr); if (savederrno) { log_err(knet_h, KNET_SUB_HANDLE, "Unable to init pthread attributes: %s", strerror(savederrno)); goto exit_fail; } savederrno = pthread_attr_setstacksize(&attr, KNET_THREAD_STACK_SIZE); if (savederrno) { log_err(knet_h, KNET_SUB_HANDLE, "Unable to set stack size attribute: %s", strerror(savederrno)); goto exit_fail; } savederrno = pthread_create(&knet_h->pmtud_link_handler_thread, &attr, _handle_pmtud_link_thread, (void *) knet_h); if (savederrno) { log_err(knet_h, KNET_SUB_HANDLE, "Unable to start pmtud link thread: %s", strerror(savederrno)); goto exit_fail; } set_thread_status(knet_h, KNET_THREAD_DST_LINK, KNET_THREAD_REGISTERED); savederrno = pthread_create(&knet_h->dst_link_handler_thread, &attr, _handle_dst_link_handler_thread, (void *) knet_h); if (savederrno) { log_err(knet_h, KNET_SUB_HANDLE, "Unable to start dst cache thread: %s", strerror(savederrno)); goto exit_fail; } set_thread_status(knet_h, KNET_THREAD_TX, KNET_THREAD_REGISTERED); savederrno = pthread_create(&knet_h->send_to_links_thread, &attr, _handle_send_to_links_thread, (void *) knet_h); if (savederrno) { log_err(knet_h, KNET_SUB_HANDLE, "Unable to start datafd to link thread: %s", strerror(savederrno)); goto exit_fail; } set_thread_status(knet_h, KNET_THREAD_RX, KNET_THREAD_REGISTERED); savederrno = pthread_create(&knet_h->recv_from_links_thread, &attr, _handle_recv_from_links_thread, (void *) knet_h); if (savederrno) { log_err(knet_h, KNET_SUB_HANDLE, "Unable to start link to datafd thread: %s", strerror(savederrno)); goto exit_fail; } set_thread_status(knet_h, KNET_THREAD_HB, KNET_THREAD_REGISTERED); savederrno = pthread_create(&knet_h->heartbt_thread, &attr, _handle_heartbt_thread, (void *) knet_h); if (savederrno) { log_err(knet_h, KNET_SUB_HANDLE, "Unable to start heartbeat thread: %s", strerror(savederrno)); goto exit_fail; } savederrno = pthread_attr_destroy(&attr); if (savederrno) { log_err(knet_h, KNET_SUB_HANDLE, "Unable to destroy pthread attributes: %s", strerror(savederrno)); /* * Do not return error code. Error is not critical. */ } return 0; exit_fail: errno = savederrno; return -1; } static void _stop_threads(knet_handle_t knet_h) { void *retval; wait_all_threads_status(knet_h, KNET_THREAD_STOPPED); if (knet_h->heartbt_thread) { pthread_cancel(knet_h->heartbt_thread); pthread_join(knet_h->heartbt_thread, &retval); } if (knet_h->send_to_links_thread) { pthread_cancel(knet_h->send_to_links_thread); pthread_join(knet_h->send_to_links_thread, &retval); } if (knet_h->recv_from_links_thread) { pthread_cancel(knet_h->recv_from_links_thread); pthread_join(knet_h->recv_from_links_thread, &retval); } if (knet_h->dst_link_handler_thread) { pthread_cancel(knet_h->dst_link_handler_thread); pthread_join(knet_h->dst_link_handler_thread, &retval); } if (knet_h->pmtud_link_handler_thread) { pthread_cancel(knet_h->pmtud_link_handler_thread); pthread_join(knet_h->pmtud_link_handler_thread, &retval); } } knet_handle_t knet_handle_new(knet_node_id_t host_id, int log_fd, uint8_t default_log_level, uint64_t flags) { knet_handle_t knet_h; int savederrno = 0; struct rlimit cur; if (getrlimit(RLIMIT_NOFILE, &cur) < 0) { return NULL; } if ((log_fd < 0) || ((unsigned int)log_fd >= cur.rlim_max)) { errno = EINVAL; return NULL; } /* * validate incoming request */ if ((log_fd) && (default_log_level > KNET_LOG_DEBUG)) { errno = EINVAL; return NULL; } if (flags > KNET_HANDLE_FLAG_PRIVILEGED * 2 - 1) { errno = EINVAL; return NULL; } /* * allocate handle */ knet_h = malloc(sizeof(struct knet_handle)); if (!knet_h) { errno = ENOMEM; return NULL; } memset(knet_h, 0, sizeof(struct knet_handle)); /* * setting up some handle data so that we can use logging * also when initializing the library global locks * and trackers */ knet_h->flags = flags; /* * copy config in place */ knet_h->host_id = host_id; knet_h->logfd = log_fd; if (knet_h->logfd > 0) { memset(&knet_h->log_levels, default_log_level, KNET_MAX_SUBSYSTEMS); } /* * set internal threads time resolutions */ knet_h->threads_timer_res = KNET_THREADS_TIMER_RES; /* * set pmtud default timers */ knet_h->pmtud_interval = KNET_PMTUD_DEFAULT_INTERVAL; /* * set transports reconnect default timers */ knet_h->reconnect_int = KNET_TRANSPORT_DEFAULT_RECONNECT_INTERVAL; /* * Set 'min' stats to the maximum value so the * first value we get is always less */ knet_h->stats.tx_compress_time_min = UINT64_MAX; knet_h->stats.rx_compress_time_min = UINT64_MAX; knet_h->stats.tx_crypt_time_min = UINT64_MAX; knet_h->stats.rx_crypt_time_min = UINT64_MAX; + /* + * set onwire version. See also comments in internals.h + * on why we don´t use constants directly across the code. + */ + + knet_h->onwire_ver = KNET_HEADER_ONWIRE_MIN_VER; + knet_h->onwire_min_ver = KNET_HEADER_ONWIRE_MIN_VER; + knet_h->onwire_max_ver = KNET_HEADER_ONWIRE_MAX_VER; + knet_h->onwire_ver_remap = 0; + + log_info(knet_h, KNET_SUB_HANDLE, "Default onwire version: %u", knet_h->onwire_ver); + /* * init global shlib tracker */ savederrno = pthread_mutex_lock(&handle_config_mutex); if (savederrno) { log_err(knet_h, KNET_SUB_HANDLE, "Unable to get handle mutex lock: %s", strerror(savederrno)); free(knet_h); knet_h = NULL; errno = savederrno; return NULL; } knet_ref++; if (_init_shlib_tracker(knet_h) < 0) { savederrno = errno; log_err(knet_h, KNET_SUB_HANDLE, "Unable to init handle tracker: %s", strerror(savederrno)); errno = savederrno; pthread_mutex_unlock(&handle_config_mutex); goto exit_fail; } pthread_mutex_unlock(&handle_config_mutex); /* * init main locking structures */ if (_init_locks(knet_h)) { savederrno = errno; goto exit_fail; } /* * init sockets */ if (_init_socks(knet_h)) { savederrno = errno; goto exit_fail; } /* * allocate packet buffers */ if (_init_buffers(knet_h)) { savederrno = errno; goto exit_fail; } if (compress_init(knet_h)) { savederrno = errno; goto exit_fail; } /* * create epoll fds */ if (_init_epolls(knet_h)) { savederrno = errno; goto exit_fail; } /* * start transports */ if (start_all_transports(knet_h)) { savederrno = errno; goto exit_fail; } /* * start internal threads */ if (_start_threads(knet_h)) { savederrno = errno; goto exit_fail; } wait_all_threads_status(knet_h, KNET_THREAD_STARTED); errno = 0; return knet_h; exit_fail: knet_handle_free(knet_h); errno = savederrno; return NULL; } int knet_handle_free(knet_handle_t knet_h) { int savederrno = 0; if (!knet_h) { errno = EINVAL; return -1; } savederrno = get_global_wrlock(knet_h); if (savederrno) { log_err(knet_h, KNET_SUB_HANDLE, "Unable to get write lock: %s", strerror(savederrno)); errno = savederrno; return -1; } if (knet_h->host_head != NULL) { savederrno = EBUSY; log_err(knet_h, KNET_SUB_HANDLE, "Unable to free handle: host(s) or listener(s) are still active: %s", strerror(savederrno)); pthread_rwlock_unlock(&knet_h->global_rwlock); errno = savederrno; return -1; } knet_h->fini_in_progress = 1; pthread_rwlock_unlock(&knet_h->global_rwlock); _stop_threads(knet_h); stop_all_transports(knet_h); _close_epolls(knet_h); _destroy_buffers(knet_h); _close_socks(knet_h); crypto_fini(knet_h, KNET_MAX_CRYPTO_INSTANCES + 1); /* values above MAX_CRYPTO will release all crypto resources */ compress_fini(knet_h, 1); _destroy_locks(knet_h); free(knet_h); knet_h = NULL; (void)pthread_mutex_lock(&handle_config_mutex); knet_ref--; _fini_shlib_tracker(); pthread_mutex_unlock(&handle_config_mutex); errno = 0; return 0; } diff --git a/libknet/host.c b/libknet/host.c index e9e86eb8..c7290d1b 100644 --- a/libknet/host.c +++ b/libknet/host.c @@ -1,721 +1,804 @@ /* * Copyright (C) 2010-2020 Red Hat, Inc. All rights reserved. * * Authors: Fabio M. Di Nitto * Federico Simoncelli * * This software licensed under LGPL-2.0+ */ #include "config.h" #include #include #include #include #include #include "host.h" #include "internals.h" #include "logging.h" #include "threads_common.h" static void _host_list_update(knet_handle_t knet_h) { struct knet_host *host; knet_h->host_ids_entries = 0; for (host = knet_h->host_head; host != NULL; host = host->next) { knet_h->host_ids[knet_h->host_ids_entries] = host->host_id; knet_h->host_ids_entries++; } } int knet_host_add(knet_handle_t knet_h, knet_node_id_t host_id) { int savederrno = 0, err = 0; struct knet_host *host = NULL; uint8_t link_idx; if (!knet_h) { errno = EINVAL; return -1; } savederrno = get_global_wrlock(knet_h); if (savederrno) { log_err(knet_h, KNET_SUB_HOST, "Unable to get write lock: %s", strerror(savederrno)); errno = savederrno; return -1; } if (knet_h->host_index[host_id]) { err = -1; savederrno = EEXIST; log_err(knet_h, KNET_SUB_HOST, "Unable to add host %u: %s", host_id, strerror(savederrno)); goto exit_unlock; } host = malloc(sizeof(struct knet_host)); if (!host) { err = -1; savederrno = errno; log_err(knet_h, KNET_SUB_HOST, "Unable to allocate memory for host %u: %s", host_id, strerror(savederrno)); goto exit_unlock; } memset(host, 0, sizeof(struct knet_host)); /* * set host_id */ host->host_id = host_id; + /* + * fill up our own data + */ + + if (knet_h->host_id == host->host_id) { + host->onwire_ver = knet_h->onwire_ver; + host->onwire_max_ver = knet_h->onwire_max_ver; + } + /* * set default host->name to host_id for logging */ snprintf(host->name, KNET_MAX_HOST_LEN, "%u", host_id); /* * initialize links internal data */ for (link_idx = 0; link_idx < KNET_MAX_LINK; link_idx++) { host->link[link_idx].link_id = link_idx; host->link[link_idx].status.stats.latency_min = UINT32_MAX; } /* * add new host to the index */ knet_h->host_index[host_id] = host; /* * add new host to host list */ if (knet_h->host_head) { host->next = knet_h->host_head; } knet_h->host_head = host; _host_list_update(knet_h); exit_unlock: pthread_rwlock_unlock(&knet_h->global_rwlock); if (err < 0) { free(host); } errno = err ? savederrno : 0; return err; } int knet_host_remove(knet_handle_t knet_h, knet_node_id_t host_id) { int savederrno = 0, err = 0; struct knet_host *host, *removed; uint8_t link_idx; if (!knet_h) { errno = EINVAL; return -1; } savederrno = get_global_wrlock(knet_h); if (savederrno) { log_err(knet_h, KNET_SUB_HOST, "Unable to get write lock: %s", strerror(savederrno)); errno = savederrno; return -1; } host = knet_h->host_index[host_id]; if (!host) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_HOST, "Unable to remove host %u: %s", host_id, strerror(savederrno)); goto exit_unlock; } /* * if links are configured we cannot release the host */ for (link_idx = 0; link_idx < KNET_MAX_LINK; link_idx++) { if (host->link[link_idx].configured) { err = -1; savederrno = EBUSY; log_err(knet_h, KNET_SUB_HOST, "Unable to remove host %u, links are still configured: %s", host_id, strerror(savederrno)); goto exit_unlock; } } removed = NULL; /* * removing host from list */ if (knet_h->host_head->host_id == host_id) { removed = knet_h->host_head; knet_h->host_head = removed->next; } else { for (host = knet_h->host_head; host->next != NULL; host = host->next) { if (host->next->host_id == host_id) { removed = host->next; host->next = removed->next; break; } } } knet_h->host_index[host_id] = NULL; free(removed); _host_list_update(knet_h); exit_unlock: pthread_rwlock_unlock(&knet_h->global_rwlock); errno = err ? savederrno : 0; return err; } int knet_host_set_name(knet_handle_t knet_h, knet_node_id_t host_id, const char *name) { int savederrno = 0, err = 0; struct knet_host *host; if (!knet_h) { errno = EINVAL; return -1; } savederrno = get_global_wrlock(knet_h); if (savederrno) { log_err(knet_h, KNET_SUB_HOST, "Unable to get write lock: %s", strerror(savederrno)); errno = savederrno; return -1; } if (!knet_h->host_index[host_id]) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_HOST, "Unable to find host %u to set name: %s", host_id, strerror(savederrno)); goto exit_unlock; } if (!name) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_HOST, "Unable to set name for host %u: %s", host_id, strerror(savederrno)); goto exit_unlock; } if (strlen(name) >= KNET_MAX_HOST_LEN) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_HOST, "Requested name for host %u is too long: %s", host_id, strerror(savederrno)); goto exit_unlock; } for (host = knet_h->host_head; host != NULL; host = host->next) { if (!strncmp(host->name, name, KNET_MAX_HOST_LEN)) { err = -1; savederrno = EEXIST; log_err(knet_h, KNET_SUB_HOST, "Duplicated name found on host_id %u", host->host_id); goto exit_unlock; } } snprintf(knet_h->host_index[host_id]->name, KNET_MAX_HOST_LEN, "%s", name); exit_unlock: pthread_rwlock_unlock(&knet_h->global_rwlock); errno = err ? savederrno : 0; return err; } int knet_host_get_name_by_host_id(knet_handle_t knet_h, knet_node_id_t host_id, char *name) { int savederrno = 0, err = 0; if (!knet_h) { errno = EINVAL; return -1; } if (!name) { errno = EINVAL; return -1; } savederrno = pthread_rwlock_rdlock(&knet_h->global_rwlock); if (savederrno) { log_err(knet_h, KNET_SUB_HOST, "Unable to get read lock: %s", strerror(savederrno)); errno = savederrno; return -1; } if (!knet_h->host_index[host_id]) { savederrno = EINVAL; err = -1; log_debug(knet_h, KNET_SUB_HOST, "Host %u not found", host_id); goto exit_unlock; } snprintf(name, KNET_MAX_HOST_LEN, "%s", knet_h->host_index[host_id]->name); exit_unlock: pthread_rwlock_unlock(&knet_h->global_rwlock); errno = err ? savederrno : 0; return err; } int knet_host_get_id_by_host_name(knet_handle_t knet_h, const char *name, knet_node_id_t *host_id) { int savederrno = 0, err = 0, found = 0; struct knet_host *host; if (!knet_h) { errno = EINVAL; return -1; } if (!name) { errno = EINVAL; return -1; } if (!host_id) { errno = EINVAL; return -1; } savederrno = pthread_rwlock_rdlock(&knet_h->global_rwlock); if (savederrno) { log_err(knet_h, KNET_SUB_HOST, "Unable to get read lock: %s", strerror(savederrno)); errno = savederrno; return -1; } for (host = knet_h->host_head; host != NULL; host = host->next) { if (!strncmp(name, host->name, KNET_MAX_HOST_LEN)) { found = 1; *host_id = host->host_id; break; } } if (!found) { savederrno = ENOENT; err = -1; } pthread_rwlock_unlock(&knet_h->global_rwlock); errno = err ? savederrno : 0; return err; } int knet_host_get_host_list(knet_handle_t knet_h, knet_node_id_t *host_ids, size_t *host_ids_entries) { int savederrno = 0; if (!knet_h) { errno = EINVAL; return -1; } if ((!host_ids) || (!host_ids_entries)) { errno = EINVAL; return -1; } savederrno = pthread_rwlock_rdlock(&knet_h->global_rwlock); if (savederrno) { log_err(knet_h, KNET_SUB_HOST, "Unable to get read lock: %s", strerror(savederrno)); errno = savederrno; return -1; } memmove(host_ids, knet_h->host_ids, sizeof(knet_h->host_ids)); *host_ids_entries = knet_h->host_ids_entries; pthread_rwlock_unlock(&knet_h->global_rwlock); return 0; } int knet_host_set_policy(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t policy) { int savederrno = 0, err = 0; uint8_t old_policy; if (!knet_h) { errno = EINVAL; return -1; } if (policy > KNET_LINK_POLICY_RR) { errno = EINVAL; return -1; } savederrno = get_global_wrlock(knet_h); if (savederrno) { log_err(knet_h, KNET_SUB_HOST, "Unable to get write lock: %s", strerror(savederrno)); errno = savederrno; return -1; } if (!knet_h->host_index[host_id]) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_HOST, "Unable to set name for host %u: %s", host_id, strerror(savederrno)); goto exit_unlock; } old_policy = knet_h->host_index[host_id]->link_handler_policy; knet_h->host_index[host_id]->link_handler_policy = policy; if (_host_dstcache_update_async(knet_h, knet_h->host_index[host_id])) { savederrno = errno; err = -1; knet_h->host_index[host_id]->link_handler_policy = old_policy; log_debug(knet_h, KNET_SUB_HOST, "Unable to update switch cache for host %u: %s", host_id, strerror(savederrno)); } log_debug(knet_h, KNET_SUB_HOST, "Host %u has new switching policy: %u", host_id, policy); exit_unlock: pthread_rwlock_unlock(&knet_h->global_rwlock); errno = err ? savederrno : 0; return err; } int knet_host_get_policy(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t *policy) { int savederrno = 0, err = 0; if (!knet_h) { errno = EINVAL; return -1; } if (!policy) { errno = EINVAL; return -1; } savederrno = pthread_rwlock_rdlock(&knet_h->global_rwlock); if (savederrno) { log_err(knet_h, KNET_SUB_HOST, "Unable to get read lock: %s", strerror(savederrno)); errno = savederrno; return -1; } if (!knet_h->host_index[host_id]) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_HOST, "Unable to get name for host %u: %s", host_id, strerror(savederrno)); goto exit_unlock; } *policy = knet_h->host_index[host_id]->link_handler_policy; exit_unlock: pthread_rwlock_unlock(&knet_h->global_rwlock); errno = err ? savederrno : 0; return err; } int knet_host_get_status(knet_handle_t knet_h, knet_node_id_t host_id, struct knet_host_status *status) { int savederrno = 0, err = 0; struct knet_host *host; if (!knet_h) { errno = EINVAL; return -1; } if (!status) { errno = EINVAL; return -1; } savederrno = pthread_rwlock_rdlock(&knet_h->global_rwlock); if (savederrno) { log_err(knet_h, KNET_SUB_HOST, "Unable to get read lock: %s", strerror(savederrno)); errno = savederrno; return -1; } host = knet_h->host_index[host_id]; if (!host) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_HOST, "Unable to find host %u: %s", host_id, strerror(savederrno)); goto exit_unlock; } memmove(status, &host->status, sizeof(struct knet_host_status)); exit_unlock: pthread_rwlock_unlock(&knet_h->global_rwlock); errno = err ? savederrno : 0; return err; } int knet_host_enable_status_change_notify(knet_handle_t knet_h, void *host_status_change_notify_fn_private_data, void (*host_status_change_notify_fn) ( void *private_data, knet_node_id_t host_id, uint8_t reachable, uint8_t remote, uint8_t external)) { int savederrno = 0; if (!knet_h) { errno = EINVAL; return -1; } savederrno = get_global_wrlock(knet_h); if (savederrno) { log_err(knet_h, KNET_SUB_HOST, "Unable to get write lock: %s", strerror(savederrno)); errno = savederrno; return -1; } knet_h->host_status_change_notify_fn_private_data = host_status_change_notify_fn_private_data; knet_h->host_status_change_notify_fn = host_status_change_notify_fn; if (knet_h->host_status_change_notify_fn) { log_debug(knet_h, KNET_SUB_HOST, "host_status_change_notify_fn enabled"); } else { log_debug(knet_h, KNET_SUB_HOST, "host_status_change_notify_fn disabled"); } pthread_rwlock_unlock(&knet_h->global_rwlock); errno = 0; return 0; } static void _clear_cbuffers(struct knet_host *host, seq_num_t rx_seq_num) { int i; memset(host->circular_buffer, 0, KNET_CBUFFER_SIZE); host->rx_seq_num = rx_seq_num; memset(host->circular_buffer_defrag, 0, KNET_CBUFFER_SIZE); for (i = 0; i < KNET_MAX_LINK; i++) { memset(&host->defrag_buf[i], 0, sizeof(struct knet_host_defrag_buf)); } } static void _reclaim_old_defrag_bufs(struct knet_host *host, seq_num_t seq_num) { seq_num_t head, tail; /* seq_num boundaries */ int i; head = seq_num + 1; tail = seq_num - (KNET_MAX_LINK + 1); /* * expire old defrag buffers */ for (i = 0; i < KNET_MAX_LINK; i++) { if (host->defrag_buf[i].in_use) { /* * head has done a rollover to 0+ */ if (tail > head) { if ((host->defrag_buf[i].pckt_seq >= head) && (host->defrag_buf[i].pckt_seq <= tail)) { host->defrag_buf[i].in_use = 0; } } else { if ((host->defrag_buf[i].pckt_seq >= head) || (host->defrag_buf[i].pckt_seq <= tail)){ host->defrag_buf[i].in_use = 0; } } } } } /* * check if a given packet seq num is in the circular buffers * defrag_buf = 0 -> use normal cbuf 1 -> use the defrag buffer lookup */ int _seq_num_lookup(struct knet_host *host, seq_num_t seq_num, int defrag_buf, int clear_buf) { size_t head, tail; /* circular buffer indexes */ seq_num_t seq_dist; char *dst_cbuf = host->circular_buffer; char *dst_cbuf_defrag = host->circular_buffer_defrag; seq_num_t *dst_seq_num = &host->rx_seq_num; if (clear_buf) { _clear_cbuffers(host, seq_num); } _reclaim_old_defrag_bufs(host, seq_num); if (seq_num < *dst_seq_num) { seq_dist = (SEQ_MAX - seq_num) + *dst_seq_num; } else { seq_dist = *dst_seq_num - seq_num; } head = seq_num % KNET_CBUFFER_SIZE; if (seq_dist < KNET_CBUFFER_SIZE) { /* seq num is in ring buffer */ if (!defrag_buf) { return (dst_cbuf[head] == 0) ? 1 : 0; } else { return (dst_cbuf_defrag[head] == 0) ? 1 : 0; } } else if (seq_dist <= SEQ_MAX - KNET_CBUFFER_SIZE) { memset(dst_cbuf, 0, KNET_CBUFFER_SIZE); memset(dst_cbuf_defrag, 0, KNET_CBUFFER_SIZE); *dst_seq_num = seq_num; } /* cleaning up circular buffer */ tail = (*dst_seq_num + 1) % KNET_CBUFFER_SIZE; if (tail > head) { memset(dst_cbuf + tail, 0, KNET_CBUFFER_SIZE - tail); memset(dst_cbuf, 0, head + 1); memset(dst_cbuf_defrag + tail, 0, KNET_CBUFFER_SIZE - tail); memset(dst_cbuf_defrag, 0, head + 1); } else { memset(dst_cbuf + tail, 0, head - tail + 1); memset(dst_cbuf_defrag + tail, 0, head - tail + 1); } *dst_seq_num = seq_num; return 1; } void _seq_num_set(struct knet_host *host, seq_num_t seq_num, int defrag_buf) { if (!defrag_buf) { host->circular_buffer[seq_num % KNET_CBUFFER_SIZE] = 1; } else { host->circular_buffer_defrag[seq_num % KNET_CBUFFER_SIZE] = 1; } return; } int _host_dstcache_update_async(knet_handle_t knet_h, struct knet_host *host) { int savederrno = 0; knet_node_id_t host_id = host->host_id; if (sendto(knet_h->dstsockfd[1], &host_id, sizeof(host_id), MSG_DONTWAIT | MSG_NOSIGNAL, NULL, 0) != sizeof(host_id)) { savederrno = errno; log_debug(knet_h, KNET_SUB_HOST, "Unable to write to dstpipefd[1]: %s", strerror(savederrno)); errno = savederrno; return -1; } return 0; } int _host_dstcache_update_sync(knet_handle_t knet_h, struct knet_host *host) { int link_idx; int best_priority = -1; int reachable = 0; if (knet_h->host_id == host->host_id && knet_h->has_loop_link) { host->active_link_entries = 1; return 0; } host->active_link_entries = 0; for (link_idx = 0; link_idx < KNET_MAX_LINK; link_idx++) { if (host->link[link_idx].status.enabled != 1) /* link is not enabled */ continue; if (host->link[link_idx].status.connected != 1) /* link is not enabled */ continue; if (host->link[link_idx].has_valid_mtu != 1) /* link does not have valid MTU */ continue; if (host->link_handler_policy == KNET_LINK_POLICY_PASSIVE) { /* for passive we look for the only active link with higher priority */ if (host->link[link_idx].priority > best_priority) { host->active_links[0] = link_idx; best_priority = host->link[link_idx].priority; } host->active_link_entries = 1; } else { /* for RR and ACTIVE we need to copy all available links */ host->active_links[host->active_link_entries] = link_idx; host->active_link_entries++; } } if (host->link_handler_policy == KNET_LINK_POLICY_PASSIVE) { log_info(knet_h, KNET_SUB_HOST, "host: %u (passive) best link: %u (pri: %u)", host->host_id, host->link[host->active_links[0]].link_id, host->link[host->active_links[0]].priority); } else { log_info(knet_h, KNET_SUB_HOST, "host: %u has %u active links", host->host_id, host->active_link_entries); } /* no active links, we can clean the circular buffers and indexes */ if (!host->active_link_entries) { log_warn(knet_h, KNET_SUB_HOST, "host: %u has no active links", host->host_id); _clear_cbuffers(host, 0); } else { reachable = 1; } if (host->status.reachable != reachable) { host->status.reachable = reachable; if (knet_h->host_status_change_notify_fn) { knet_h->host_status_change_notify_fn( knet_h->host_status_change_notify_fn_private_data, host->host_id, host->status.reachable, host->status.remote, host->status.external); } } return 0; } + +void _handle_onwire_version(knet_handle_t knet_h, struct knet_host *host, struct knet_header *inbuf) +{ + struct knet_host *tmp_host = NULL; + uint8_t onwire_ver = knet_h->onwire_max_ver; + int docallback = 0; + + /* + * data we process here are onwire independent + * we are in a global read only lock context, so it´s safe to parse host lists + * and we can change onwire_ver using the dedicated mutex + */ + + /* + * update current host onwire info + */ + host->onwire_ver = inbuf->kh_version; + host->onwire_max_ver = inbuf->kh_max_ver; + + for (tmp_host = knet_h->host_head; tmp_host != NULL; tmp_host = tmp_host->next) { + /* + * do not attempt to change protocol till + * we see all nodes at least once. + */ + if (!tmp_host->onwire_max_ver) { + return; + } + + /* + * ignore nodes were max ver is lower than our min ver + * logged as error by thread_rx, we need to make sure to skip it + * during onwire_ver calculation. + */ + if (tmp_host->onwire_max_ver < knet_h->onwire_min_ver) { + continue; + } + + /* + * use the highest max_ver common to all known nodes + */ + if (tmp_host->onwire_max_ver < onwire_ver) { + onwire_ver = tmp_host->onwire_max_ver; + } + } + + if (pthread_mutex_lock(&knet_h->onwire_mutex)) { + log_debug(knet_h, KNET_SUB_HOST, "Unable to get onwire mutex lock"); + return; + } + + if (knet_h->onwire_force_ver) { + onwire_ver = knet_h->onwire_force_ver; + } + + if (knet_h->onwire_ver != onwire_ver) { + log_debug(knet_h, KNET_SUB_HOST, "node %u updating onwire version to %u", knet_h->host_id, onwire_ver); + knet_h->onwire_ver = onwire_ver; + docallback = 1; + } + + pthread_mutex_unlock(&knet_h->onwire_mutex); + + /* + * do the callback outside of locked context and use cached value + * to avoid blocking on locking + */ + if ((docallback) && + (knet_h->onwire_ver_notify_fn)) { + knet_h->onwire_ver_notify_fn(knet_h->onwire_ver_notify_fn_private_data, + knet_h->onwire_min_ver, + knet_h->onwire_max_ver, + onwire_ver); + } +} diff --git a/libknet/host.h b/libknet/host.h index 3312c8ba..dba58db8 100644 --- a/libknet/host.h +++ b/libknet/host.h @@ -1,21 +1,23 @@ /* * Copyright (C) 2012-2020 Red Hat, Inc. All rights reserved. * * Authors: Fabio M. Di Nitto * Federico Simoncelli * * This software licensed under LGPL-2.0+ */ #ifndef __KNET_HOST_H__ #define __KNET_HOST_H__ #include "internals.h" int _seq_num_lookup(struct knet_host *host, seq_num_t seq_num, int defrag_buf, int clear_buf); void _seq_num_set(struct knet_host *host, seq_num_t seq_num, int defrag_buf); int _host_dstcache_update_async(knet_handle_t knet_h, struct knet_host *host); int _host_dstcache_update_sync(knet_handle_t knet_h, struct knet_host *host); +void _handle_onwire_version(knet_handle_t knet_h, struct knet_host *host, struct knet_header *inbuf); + #endif diff --git a/libknet/internals.h b/libknet/internals.h index b90cdd5a..fe31b04d 100644 --- a/libknet/internals.h +++ b/libknet/internals.h @@ -1,427 +1,445 @@ /* * Copyright (C) 2010-2020 Red Hat, Inc. All rights reserved. * * Authors: Fabio M. Di Nitto * Federico Simoncelli * * This software licensed under LGPL-2.0+ */ #ifndef __KNET_INTERNALS_H__ #define __KNET_INTERNALS_H__ /* * NOTE: you shouldn't need to include this header normally */ #include #include #include + #include "libknet.h" #include "onwire.h" #include "compat.h" #include "threads_common.h" #define KNET_DATABUFSIZE KNET_MAX_PACKET_SIZE + KNET_HEADER_ALL_SIZE #define KNET_DATABUFSIZE_CRYPT_PAD 1024 #define KNET_DATABUFSIZE_CRYPT KNET_DATABUFSIZE + KNET_DATABUFSIZE_CRYPT_PAD #define KNET_DATABUFSIZE_COMPRESS_PAD 1024 #define KNET_DATABUFSIZE_COMPRESS KNET_DATABUFSIZE + KNET_DATABUFSIZE_COMPRESS_PAD #define KNET_RING_RCVBUFF 8388608 #define PCKT_FRAG_MAX UINT8_MAX #define PCKT_RX_BUFS 512 #define KNET_EPOLL_MAX_EVENTS KNET_DATAFD_MAX + 1 /* * Size of threads stack. Value is choosen by experimenting, how much is needed * to sucesfully finish test suite, and at the time of writing patch it was * ~300KiB. To have some room for future enhancement it is increased * by factor of 3 and rounded. */ #define KNET_THREAD_STACK_SIZE (1024 * 1024) typedef void *knet_transport_link_t; /* per link transport handle */ typedef void *knet_transport_t; /* per knet_h transport handle */ struct knet_transport_ops; /* Forward because of circular dependancy */ struct knet_mmsghdr { struct msghdr msg_hdr; /* Message header */ unsigned int msg_len; /* Number of bytes transmitted */ }; struct knet_link { /* required */ struct sockaddr_storage src_addr; struct sockaddr_storage dst_addr; /* configurable */ unsigned int dynamic; /* see KNET_LINK_DYN_ define above */ uint8_t priority; /* higher priority == preferred for A/P */ unsigned long long ping_interval; /* interval */ unsigned long long pong_timeout; /* timeout */ unsigned long long pong_timeout_adj; /* timeout adjusted for latency */ uint8_t pong_timeout_backoff; /* see link.h for definition */ unsigned int latency_max_samples; /* precision */ uint8_t pong_count; /* how many ping/pong to send/receive before link is up */ uint64_t flags; /* status */ struct knet_link_status status; /* internals */ pthread_mutex_t link_stats_mutex; /* used to update link stats */ uint8_t link_id; uint8_t transport; /* #defined constant from API */ knet_transport_link_t transport_link; /* link_info_t from transport */ int outsock; unsigned int configured:1; /* set to 1 if src/dst have been configured transport initialized on this link*/ unsigned int transport_connected:1; /* set to 1 if lower level transport is connected */ uint8_t received_pong; struct timespec ping_last; /* used by PMTUD thread as temp per-link variables and should always contain the onwire_len value! */ uint32_t proto_overhead; /* IP + UDP/SCTP overhead. NOT to be confused with stats.proto_overhead that includes also knet headers and crypto headers */ struct timespec pmtud_last; uint32_t last_ping_size; uint32_t last_good_mtu; uint32_t last_bad_mtu; uint32_t last_sent_mtu; uint32_t last_recv_mtu; uint32_t pmtud_crypto_timeout_multiplier;/* used by PMTUd to adjust timeouts on high loads */ uint8_t has_valid_mtu; }; #define KNET_CBUFFER_SIZE 4096 struct knet_host_defrag_buf { char buf[KNET_DATABUFSIZE]; uint8_t in_use; /* 0 buffer is free, 1 is in use */ seq_num_t pckt_seq; /* identify the pckt we are receiving */ uint8_t frag_recv; /* how many frags did we receive */ uint8_t frag_map[PCKT_FRAG_MAX];/* bitmap of what we received? */ uint8_t last_first; /* special case if we receive the last fragment first */ ssize_t frag_size; /* normal frag size (not the last one) */ ssize_t last_frag_size; /* the last fragment might not be aligned with MTU size */ struct timespec last_update; /* keep time of the last pckt */ }; struct knet_host { /* required */ knet_node_id_t host_id; /* configurable */ uint8_t link_handler_policy; char name[KNET_MAX_HOST_LEN]; /* status */ struct knet_host_status status; + /* + * onwire info + */ + uint8_t onwire_ver; /* node current onwire version */ + uint8_t onwire_max_ver; /* node supports up to this version */ /* internals */ char circular_buffer[KNET_CBUFFER_SIZE]; seq_num_t rx_seq_num; seq_num_t untimed_rx_seq_num; seq_num_t timed_rx_seq_num; uint8_t got_data; /* defrag/reassembly buffers */ struct knet_host_defrag_buf defrag_buf[KNET_MAX_LINK]; char circular_buffer_defrag[KNET_CBUFFER_SIZE]; /* link stuff */ struct knet_link link[KNET_MAX_LINK]; uint8_t active_link_entries; uint8_t active_links[KNET_MAX_LINK]; struct knet_host *next; }; struct knet_sock { int sockfd[2]; /* sockfd[0] will always be application facing * and sockfd[1] internal if sockpair has been created by knet */ int is_socket; /* check if it's a socket for recvmmsg usage */ int is_created; /* knet created this socket and has to clean up on exit/del */ int in_use; /* set to 1 if it's use, 0 if free */ int has_error; /* set to 1 if there were errors reading from the sock * and socket has been removed from epoll */ }; struct knet_fd_trackers { uint8_t transport; /* transport type (UDP/SCTP...) */ uint8_t data_type; /* internal use for transport to define what data are associated * with this fd */ void *data; /* pointer to the data */ void *access_list_match_entry_head; /* pointer to access list match_entry list head */ }; #define KNET_MAX_FDS KNET_MAX_HOST * KNET_MAX_LINK * 4 #define KNET_MAX_COMPRESS_METHODS UINT8_MAX #define KNET_MAX_CRYPTO_INSTANCES 2 struct knet_handle_stats_extra { uint64_t tx_crypt_pmtu_packets; uint64_t tx_crypt_pmtu_reply_packets; uint64_t tx_crypt_ping_packets; uint64_t tx_crypt_pong_packets; }; struct knet_handle { knet_node_id_t host_id; unsigned int enabled:1; struct knet_sock sockfd[KNET_DATAFD_MAX + 1]; int logfd; uint8_t log_levels[KNET_MAX_SUBSYSTEMS]; int dstsockfd[2]; int send_to_links_epollfd; int recv_from_links_epollfd; int dst_link_handler_epollfd; uint8_t use_access_lists; /* set to 0 for disable, 1 for enable */ unsigned int pmtud_interval; unsigned int manual_mtu; unsigned int data_mtu; /* contains the max data size that we can send onwire * without frags */ struct knet_host *host_head; struct knet_host *host_index[KNET_MAX_HOST]; knet_transport_t transports[KNET_MAX_TRANSPORTS+1]; struct knet_fd_trackers knet_transport_fd_tracker[KNET_MAX_FDS]; /* track status for each fd handled by transports */ struct knet_handle_stats stats; struct knet_handle_stats_extra stats_extra; pthread_mutex_t handle_stats_mutex; /* used to protect handle stats */ uint32_t reconnect_int; knet_node_id_t host_ids[KNET_MAX_HOST]; size_t host_ids_entries; struct knet_header *recv_from_sock_buf; struct knet_header *send_to_links_buf[PCKT_FRAG_MAX]; struct knet_header *recv_from_links_buf[PCKT_RX_BUFS]; struct knet_header *pingbuf; struct knet_header *pmtudbuf; uint8_t threads_status[KNET_THREAD_MAX]; uint8_t threads_flush_queue[KNET_THREAD_MAX]; useconds_t threads_timer_res; pthread_mutex_t threads_status_mutex; pthread_t send_to_links_thread; pthread_t recv_from_links_thread; pthread_t heartbt_thread; pthread_t dst_link_handler_thread; pthread_t pmtud_link_handler_thread; pthread_rwlock_t global_rwlock; /* global config lock */ pthread_mutex_t pmtud_mutex; /* pmtud mutex to handle conditional send/recv + timeout */ pthread_cond_t pmtud_cond; /* conditional for above */ pthread_mutex_t tx_mutex; /* used to protect knet_send_sync and TX thread */ pthread_mutex_t hb_mutex; /* used to protect heartbeat thread and seq_num broadcasting */ pthread_mutex_t backoff_mutex; /* used to protect dst_link->pong_timeout_adj */ pthread_mutex_t kmtu_mutex; /* used to protect kernel_mtu */ + pthread_mutex_t onwire_mutex; /* used to protect onwire version */ + uint8_t onwire_ver; /* currently agreed onwire version across known nodes */ + uint8_t onwire_min_ver; /* min and max are constant and don´t need any mutex protection. */ + uint8_t onwire_max_ver; /* we define them as part of internal handle so that we can mingle with them for testing purposes */ + uint8_t onwire_force_ver; /* manually configure onwire_ver */ + uint8_t onwire_ver_remap; /* when this is on, all mapping will use version 1 for now */ uint32_t kernel_mtu; /* contains the MTU detected by the kernel on a given link */ int pmtud_waiting; int pmtud_running; int pmtud_forcerun; int pmtud_abort; struct crypto_instance *crypto_instance[KNET_MAX_CRYPTO_INSTANCES + 1]; /* store an extra pointer to allow 0|1|2 values without too much magic in the code */ uint8_t crypto_in_use_config; /* crypto config to use for TX */ uint8_t crypto_only; /* allow only crypto (1) or also clear (0) traffic */ size_t sec_block_size; size_t sec_hash_size; size_t sec_salt_size; unsigned char *send_to_links_buf_crypt[PCKT_FRAG_MAX]; unsigned char *recv_from_links_buf_crypt; unsigned char *recv_from_links_buf_decrypt; unsigned char *pingbuf_crypt; unsigned char *pmtudbuf_crypt; int compress_model; int compress_level; size_t compress_threshold; void *compress_int_data[KNET_MAX_COMPRESS_METHODS]; /* for compress method private data */ unsigned char *recv_from_links_buf_decompress; unsigned char *send_to_links_buf_compress; seq_num_t tx_seq_num; pthread_mutex_t tx_seq_num_mutex; uint8_t has_loop_link; uint8_t loop_link; void *dst_host_filter_fn_private_data; int (*dst_host_filter_fn) ( void *private_data, const unsigned char *outdata, ssize_t outdata_len, uint8_t tx_rx, knet_node_id_t this_host_id, knet_node_id_t src_node_id, int8_t *channel, knet_node_id_t *dst_host_ids, size_t *dst_host_ids_entries); void *pmtud_notify_fn_private_data; void (*pmtud_notify_fn) ( void *private_data, unsigned int data_mtu); void *host_status_change_notify_fn_private_data; void (*host_status_change_notify_fn) ( void *private_data, knet_node_id_t host_id, uint8_t reachable, uint8_t remote, uint8_t external); void *link_status_change_notify_fn_private_data; void (*link_status_change_notify_fn) ( void *private_data, knet_node_id_t host_id, uint8_t link_id, uint8_t connected, uint8_t remote, uint8_t external); void *sock_notify_fn_private_data; void (*sock_notify_fn) ( void *private_data, int datafd, int8_t channel, uint8_t tx_rx, int error, int errorno); + void *onwire_ver_notify_fn_private_data; + void (*onwire_ver_notify_fn) ( + void *private_data, + uint8_t onwire_min_ver, + uint8_t onwire_max_ver, + uint8_t onwire_ver); int fini_in_progress; uint64_t flags; }; extern pthread_rwlock_t shlib_rwlock; /* global shared lib load lock */ /* * NOTE: every single operation must be implementend * for every protocol. */ /* * for now knet supports only IP protocols (udp/sctp) * in future there might be others like ARP * or TIPC. * keep this around as transport information * to use for access lists and other operations */ #define TRANSPORT_PROTO_LOOPBACK 0 #define TRANSPORT_PROTO_IP_PROTO 1 /* * some transports like SCTP can filter incoming * connections before knet has to process * any packets. * GENERIC_ACL -> packet has to be read and filterted * PROTO_ACL -> transport provides filtering at lower levels * and packet does not need to be processed */ typedef enum { USE_NO_ACL, USE_GENERIC_ACL, USE_PROTO_ACL } transport_acl; /* * make it easier to map values in transports.c */ #define TRANSPORT_PROTO_NOT_CONNECTION_ORIENTED 0 #define TRANSPORT_PROTO_IS_CONNECTION_ORIENTED 1 typedef struct knet_transport_ops { /* * transport generic information */ const char *transport_name; const uint8_t transport_id; const uint8_t built_in; uint8_t transport_protocol; transport_acl transport_acl_type; /* * connection oriented protocols like SCTP * don´t need dst_addr in sendto calls and * on some OSes are considered EINVAL. */ uint8_t transport_is_connection_oriented; uint32_t transport_mtu_overhead; /* * transport init must allocate the new transport * and perform all internal initializations * (threads, lists, etc). */ int (*transport_init)(knet_handle_t knet_h); /* * transport free must releases _all_ resources * allocated by tranport_init */ int (*transport_free)(knet_handle_t knet_h); /* * link operations should take care of all the * sockets and epoll management for a given link/transport set * transport_link_disable should return err = -1 and errno = EBUSY * if listener is still in use, and any other errno in case * the link cannot be disabled. * * set_config/clear_config are invoked in global write lock context */ int (*transport_link_set_config)(knet_handle_t knet_h, struct knet_link *link); int (*transport_link_clear_config)(knet_handle_t knet_h, struct knet_link *link); /* * transport callback for incoming dynamic connections * this is called in global read lock context */ int (*transport_link_dyn_connect)(knet_handle_t knet_h, int sockfd, struct knet_link *link); /* * return the fd to use for access lists */ int (*transport_link_get_acl_fd)(knet_handle_t knet_h, struct knet_link *link); /* * per transport error handling of recvmmsg * (see _handle_recv_from_links comments for details) */ /* * transport_rx_sock_error is invoked when recvmmsg returns <= 0 * * transport_rx_sock_error is invoked with both global_rdlock */ int (*transport_rx_sock_error)(knet_handle_t knet_h, int sockfd, int recv_err, int recv_errno); /* * transport_tx_sock_error is invoked with global_rwlock and * it's invoked when sendto or sendmmsg returns =< 0 * * it should return: * -1 on internal error * 0 ignore error and continue * 1 retry * any sleep or wait action should happen inside the transport code */ int (*transport_tx_sock_error)(knet_handle_t knet_h, int sockfd, int recv_err, int recv_errno); /* * this function is called on _every_ received packet * to verify if the packet is data or internal protocol error handling * * it should return: * -1 on error * 0 packet is not data and we should continue the packet process loop * 1 packet is not data and we should STOP the packet process loop * 2 packet is data and should be parsed as such * * transport_rx_is_data is invoked with both global_rwlock * and fd_tracker read lock (from RX thread) */ int (*transport_rx_is_data)(knet_handle_t knet_h, int sockfd, struct knet_mmsghdr *msg); /* * this function is called by links.c when a link down event is recorded * to notify the transport that packets are not going through, and give * transport the opportunity to take actions. */ int (*transport_link_is_down)(knet_handle_t knet_h, struct knet_link *link); } knet_transport_ops_t; struct pretty_names { const char *name; uint8_t val; }; #endif diff --git a/libknet/libknet.h b/libknet/libknet.h index b53132ab..ce76ef41 100644 --- a/libknet/libknet.h +++ b/libknet/libknet.h @@ -1,2366 +1,2456 @@ /* * Copyright (C) 2010-2020 Red Hat, Inc. All rights reserved. * * Authors: Fabio M. Di Nitto * Federico Simoncelli * * This software licensed under LGPL-2.0+ */ #ifndef __LIBKNET_H__ #define __LIBKNET_H__ #include #include #include #include #include /** * @file libknet.h * @brief kronosnet API include file * @copyright Copyright (C) 2010-2020 Red Hat, Inc. All rights reserved. * * Kronosnet is an advanced VPN system for High Availability applications. */ #define KNET_API_VER 2 /* * libknet limits */ /* * Maximum number of hosts */ typedef uint16_t knet_node_id_t; #define KNET_MAX_HOST 65536 /* * Maximum number of links between 2 hosts */ #define KNET_MAX_LINK 8 /* * Maximum packet size that should be written to datafd * see knet_handle_new for details */ #define KNET_MAX_PACKET_SIZE 65536 /* * Buffers used for pretty logging * host is used to store both ip addresses and hostnames */ #define KNET_MAX_HOST_LEN 256 #define KNET_MAX_PORT_LEN 6 /* * Some notifications can be generated either on TX or RX */ #define KNET_NOTIFY_TX 0 #define KNET_NOTIFY_RX 1 /* * Link flags */ /* * Where possible, set traffic priority to high. * On Linux this sets the TOS to INTERACTIVE (6), * see tc-prio(8) for more infomation */ #define KNET_LINK_FLAG_TRAFFICHIPRIO (1ULL << 0) /* * Handle flags */ /* * Use privileged operations during socket setup. */ #define KNET_HANDLE_FLAG_PRIVILEGED (1ULL << 0) /* * threads timer resolution (see knet_handle_set_threads_timer_res below) */ #define KNET_THREADS_TIMER_RES 200000 typedef struct knet_handle *knet_handle_t; /* * Handle structs/API calls */ /** * knet_handle_new * * @brief create a new instance of a knet handle * * host_id - Each host in a knet is identified with a unique * ID. when creating a new handle local host_id * must be specified (0 to UINT16_MAX are all valid). * It is the user's responsibility to check that the value * is unique, or bad things might happen. * * log_fd - Write file descriptor. If set to a value > 0, it will be used * to write log packets from libknet to the application. * Setting to 0 will disable logging from libknet. * It is possible to enable logging at any given time (see logging API). * Make sure to either read from this filedescriptor properly and/or * mark it O_NONBLOCK, otherwise if the fd becomes full, libknet could * block. * It is strongly encouraged to use pipes (ex: pipe(2) or pipe2(2)) for * logging fds due to the atomic nature of writes between fds. * See also libknet test suite for reference and guidance. * * default_log_level - * If logfd is specified, it will initialize all subsystems to log * at default_log_level value. (see logging API) * * flags - bitwise OR of some of the following flags: * KNET_HANDLE_FLAG_PRIVILEGED: use privileged operations setting up the * communication sockets. If disabled, failure to acquire large * enough socket buffers is ignored but logged. Inadequate buffers * lead to poor performance. * * @return * on success, a new knet_handle_t is returned. * on failure, NULL is returned and errno is set. * knet-specific errno values: * ENAMETOOLONG - socket buffers couldn't be set big enough and KNET_HANDLE_FLAG_PRIVILEGED was specified * ERANGE - buffer size readback returned unexpected type */ knet_handle_t knet_handle_new(knet_node_id_t host_id, int log_fd, uint8_t default_log_level, uint64_t flags); /** * knet_handle_free * * @brief Destroy a knet handle, free all resources * * knet_h - pointer to knet_handle_t * * @return * knet_handle_free returns * 0 on success * -1 on error and errno is set. */ int knet_handle_free(knet_handle_t knet_h); /** * knet_handle_set_threads_timer_res * * @brief Change internal thread timer resolution * * knet_h - pointer to knet_handle_t * * timeres - some threads inside knet will use usleep(timeres) * to check if any activity has to be performed, or wait * for the next cycle. 'timeres' (expressed in nano seconds) * defines this interval, with a default of KNET_THREADS_TIMER_RES * (200000). * The lower this value is, the more often knet will perform * those checks and allows a more (time) precise execution of * some operations (for example ping/pong), at the cost of higher * CPU usage. * Accepted values: * 0 - reset timer res to default * 1 - 999 invalid (as it would cause 100% CPU spinning on some * epoll operations) * 1000 or higher - valid * * Unless you know exactly what you are doing, stay away from * changing the default or seek written and notarized approval * from the knet developer team. * * @return * knet_handle_set_threads_timer_res returns * 0 on success * -1 on error and errno is set. */ int knet_handle_set_threads_timer_res(knet_handle_t knet_h, useconds_t timeres); /** * knet_handle_get_threads_timer_res * * @brief Get internal thread timer resolutions * * knet_h - pointer to knet_handle_t * * timeres - current timer res value * * @return * knet_handle_set_threads_timer_res returns * 0 on success and timerres will contain the current value * -1 on error and errno is set. */ int knet_handle_get_threads_timer_res(knet_handle_t knet_h, useconds_t *timeres); /** * knet_handle_enable_sock_notify * * @brief Register a callback to receive socket events * * knet_h - pointer to knet_handle_t * * sock_notify_fn_private_data * void pointer to data that can be used to identify * the callback. * * sock_notify_fn * A callback function that is invoked every time * a socket in the datafd pool will report an error (-1) * or an end of read (0) (see socket.7). * This function MUST NEVER block or add substantial delays. * The callback is invoked in an internal unlocked area * to allow calls to knet_handle_add_datafd/knet_handle_remove_datafd * to swap/replace the bad fd. * if both err and errno are 0, it means that the socket * has received a 0 byte packet (EOF?). * The callback function must either remove the fd from knet * (by calling knet_handle_remove_fd()) or dup a new fd in its place. * Failure to do this can cause problems. * * @return * knet_handle_enable_sock_notify returns * 0 on success * -1 on error and errno is set. */ int knet_handle_enable_sock_notify(knet_handle_t knet_h, void *sock_notify_fn_private_data, void (*sock_notify_fn) ( void *private_data, int datafd, int8_t channel, uint8_t tx_rx, int error, int errorno)); /* sorry! can't call it errno ;) */ #define KNET_DATAFD_MAX 32 /** * knet_handle_add_datafd * * @brief Install a file descriptor for communication * * IMPORTANT: In order to add datafd to knet, knet_handle_enable_sock_notify * _MUST_ be set and be able to handle both errors (-1) and * 0 bytes read / write from the provided datafd. * On read error (< 0) from datafd, the socket is automatically * removed from polling to avoid spinning on dead sockets. * It is safe to call knet_handle_remove_datafd even on sockets * that have been removed. * * knet_h - pointer to knet_handle_t * * *datafd - read/write file descriptor. * knet will read data here to send to the other hosts * and will write data received from the network. * Each data packet can be of max size KNET_MAX_PACKET_SIZE! * Applications using knet_send/knet_recv will receive a * proper error if the packet size is not within boundaries. * Applications using their own functions to write to the * datafd should NOT write more than KNET_MAX_PACKET_SIZE. * * Please refer to handle.c on how to set up a socketpair. * * datafd can be 0, and knet_handle_add_datafd will create a properly * populated socket pair the same way as ping_test, or a value * higher than 0. A negative number will return an error. * On exit knet_handle_free will take care to cleanup the * socketpair only if they have been created by knet_handle_add_datafd. * * It is possible to pass either sockets or normal fds. * User provided datafd will be marked as non-blocking and close-on-exec. * * *channel - This value is analogous to the tag in VLAN tagging. * A negative value will auto-allocate a channel. * Setting a value between 0 and 31 will try to allocate that * specific channel (unless already in use). * * It is possible to add up to 32 datafds but be aware that each * one of them must have a receiving end on the other host. * * Example: * hostA channel 0 will be delivered to datafd on hostB channel 0 * hostA channel 1 to hostB channel 1. * * Each channel must have a unique file descriptor. * * If your application could have 2 channels on one host and one * channel on another host, then you can use dst_host_filter * to manipulate channel values on TX and RX. * * @return * knet_handle_add_datafd returns * @retval 0 on success, * *datafd will be populated with a socket if the original value was 0 * or if a specific fd was set, the value is untouched. * *channel will be populated with a channel number if the original value * was negative or the value is untouched if a specific channel * was requested. * * @retval -1 on error and errno is set. * *datafd and *channel are untouched or empty. */ int knet_handle_add_datafd(knet_handle_t knet_h, int *datafd, int8_t *channel); /** * knet_handle_remove_datafd * * @brief Remove a file descriptor from knet * * knet_h - pointer to knet_handle_t * * datafd - file descriptor to remove. * NOTE that if the socket/fd was created by knet_handle_add_datafd, * the socket will be closed by libknet. * * @return * knet_handle_remove_datafd returns * 0 on success * -1 on error and errno is set. */ int knet_handle_remove_datafd(knet_handle_t knet_h, int datafd); /** * knet_handle_get_channel * * @brief Get the channel associated with a file descriptor * * knet_h - pointer to knet_handle_t * * datafd - get the channel associated to this datafd * * *channel - will contain the result * * @return * knet_handle_get_channel returns * @retval 0 on success * and *channel will contain the result * @retval -1 on error and errno is set. * and *channel content is meaningless */ int knet_handle_get_channel(knet_handle_t knet_h, const int datafd, int8_t *channel); /** * knet_handle_get_datafd * * @brief Get the file descriptor associated with a channel * * knet_h - pointer to knet_handle_t * * channel - get the datafd associated to this channel * * *datafd - will contain the result * * @return * knet_handle_get_datafd returns * @retval 0 on success * and *datafd will contain the results * @retval -1 on error and errno is set. * and *datafd content is meaningless */ int knet_handle_get_datafd(knet_handle_t knet_h, const int8_t channel, int *datafd); /** * knet_recv * * @brief Receive data from knet nodes * * knet_h - pointer to knet_handle_t * * buff - pointer to buffer to store the received data * * buff_len - buffer length * * channel - channel number * * @return * knet_recv is a commodity function to wrap iovec operations * around a socket. It returns a call to readv(2). */ ssize_t knet_recv(knet_handle_t knet_h, char *buff, const size_t buff_len, const int8_t channel); /** * knet_send * * @brief Send data to knet nodes * * knet_h - pointer to knet_handle_t * * buff - pointer to the buffer of data to send * * buff_len - length of data to send * * channel - channel number * * @return * knet_send is a commodity function to wrap iovec operations * around a socket. It returns a call to writev(2). */ ssize_t knet_send(knet_handle_t knet_h, const char *buff, const size_t buff_len, const int8_t channel); /** * knet_send_sync * * @brief Synchronously send data to knet nodes * * knet_h - pointer to knet_handle_t * * buff - pointer to the buffer of data to send * * buff_len - length of data to send * * channel - data channel to use (see knet_handle_add_datafd(3)) * * All knet RX/TX operations are async for performance reasons. * There are applications that might need a sync version of data * transmission and receive errors in case of failure to deliver * to another host. * knet_send_sync bypasses the whole TX async layer and delivers * data directly to the link layer, and returns errors accordingly. * knet_send_sync sends only one packet to one host at a time. * It does NOT support multiple destinations or multicast packets. * Decision is still based on dst_host_filter_fn. * * @return * knet_send_sync returns 0 on success and -1 on error. * In addition to normal sendmmsg errors, knet_send_sync can fail * due to: * * @retval ECANCELED - data forward is disabled * @retval EFAULT - dst_host_filter fatal error * @retval EINVAL - dst_host_filter did not provide dst_host_ids_entries on unicast pckts * @retval E2BIG - dst_host_filter did return more than one dst_host_ids_entries on unicast pckts * @retval ENOMSG - received unknown message type * @retval EHOSTDOWN - unicast pckt cannot be delivered because dest host is not connected yet * @retval ECHILD - crypto failed * @retval EAGAIN - sendmmsg was unable to send all messages and there was no progress during retry */ int knet_send_sync(knet_handle_t knet_h, const char *buff, const size_t buff_len, const int8_t channel); /** * knet_handle_enable_filter * * @brief install a filter to route packets * * knet_h - pointer to knet_handle_t * * dst_host_filter_fn_private_data * void pointer to data that can be used to identify * the callback. * * dst_host_filter_fn - * is a callback function that is invoked every time * a packet hits datafd (see knet_handle_new(3)). * the function allows users to tell libknet where the * packet has to be delivered. * * const unsigned char *outdata - is a pointer to the * current packet * ssize_t outdata_len - length of the above data * uint8_t tx_rx - filter is called on tx or rx * (KNET_NOTIFY_TX, KNET_NOTIFY_RX) * knet_node_id_t this_host_id - host_id processing the packet * knet_node_id_t src_host_id - host_id that generated the * packet * knet_node_id_t *dst_host_ids - array of KNET_MAX_HOST knet_node_id_t * where to store the destinations * size_t *dst_host_ids_entries - number of hosts to send the message * * dst_host_filter_fn should return * -1 on error, packet is discarded. * 0 packet is unicast and should be sent to dst_host_ids and there are * dst_host_ids_entries in the buffer. * 1 packet is broadcast/multicast and is sent all hosts. * contents of dst_host_ids and dst_host_ids_entries are ignored. * * @return * knet_handle_enable_filter returns * 0 on success * -1 on error and errno is set. */ int knet_handle_enable_filter(knet_handle_t knet_h, void *dst_host_filter_fn_private_data, int (*dst_host_filter_fn) ( void *private_data, const unsigned char *outdata, ssize_t outdata_len, uint8_t tx_rx, knet_node_id_t this_host_id, knet_node_id_t src_host_id, int8_t *channel, knet_node_id_t *dst_host_ids, size_t *dst_host_ids_entries)); /** * knet_handle_setfwd * * @brief Start packet forwarding * * knet_h - pointer to knet_handle_t * * enable - set to 1 to allow data forwarding, 0 to disable data forwarding. * * @return * knet_handle_setfwd returns * 0 on success * -1 on error and errno is set. * * By default data forwarding is off and no traffic will pass through knet until * it is set on. */ int knet_handle_setfwd(knet_handle_t knet_h, unsigned int enabled); /** * knet_handle_enable_access_lists * * @brief Enable or disable usage of access lists (default: off) * * knet_h - pointer to knet_handle_t * * enable - set to 1 to use access lists, 0 to disable access_lists. * * @return * knet_handle_enable_access_lists returns * 0 on success * -1 on error and errno is set. * * access lists are bound to links. There are 2 types of links: * 1) point to point, where both source and destinations are well known * at configuration time. * 2) open links, where only the source is known at configuration time. * * knet will automatically generate access lists for point to point links. * * For open links, knet provides 4 API calls to manipulate access lists: * knet_link_add_acl(3), knet_link_rm_acl(3), knet_link_insert_acl(3) * and knet_link_clear_acl(3). * Those API calls will work exclusively on open links as they * are of no use on point to point links. * * knet will not enforce any access list unless specifically enabled by * knet_handle_enable_access_lists(3). * * From a security / programming perspective we recommend: * - create the knet handle * - enable access lists * - configure hosts and links * - configure access lists for open links */ int knet_handle_enable_access_lists(knet_handle_t knet_h, unsigned int enabled); #define KNET_PMTUD_DEFAULT_INTERVAL 60 /** * knet_handle_pmtud_setfreq * * @brief Set the interval between PMTUd scans * * knet_h - pointer to knet_handle_t * * interval - define the interval in seconds between PMTUd scans * range from 1 to 86400 (24h) * * @return * knet_handle_pmtud_setfreq returns * 0 on success * -1 on error and errno is set. * * default interval is 60. */ int knet_handle_pmtud_setfreq(knet_handle_t knet_h, unsigned int interval); /** * knet_handle_pmtud_getfreq * * @brief Get the interval between PMTUd scans * * knet_h - pointer to knet_handle_t * * interval - pointer where to store the current interval value * * @return * knet_handle_pmtud_setfreq returns * 0 on success * -1 on error and errno is set. */ int knet_handle_pmtud_getfreq(knet_handle_t knet_h, unsigned int *interval); /** * knet_handle_enable_pmtud_notify * * @brief install a callback to receive PMTUd changes * * knet_h - pointer to knet_handle_t * * pmtud_notify_fn_private_data * void pointer to data that can be used to identify * the callback. * * pmtud_notify_fn * is a callback function that is invoked every time * a path MTU size change is detected. * The function allows libknet to notify the user * of data MTU, that's the max value that can be send * onwire without fragmentation. The data MTU will always * be lower than real link MTU because it accounts for * protocol overhead, knet packet header and (if configured) * crypto overhead, * This function MUST NEVER block or add substantial delays. * * @return * knet_handle_enable_pmtud_notify returns * 0 on success * -1 on error and errno is set. */ int knet_handle_enable_pmtud_notify(knet_handle_t knet_h, void *pmtud_notify_fn_private_data, void (*pmtud_notify_fn) ( void *private_data, unsigned int data_mtu)); /** * knet_handle_pmtud_set * * @brief Set the current interface MTU * * knet_h - pointer to knet_handle_t * * iface_mtu - current interface MTU, value 0 to 65535. 0 will * re-enable automatic MTU discovery. * In a setup with multiple interfaces, please specify * the lowest MTU between the selected intefaces. * knet will automatically adjust this value for * all headers overhead and set the correct data_mtu. * data_mtu can be retrivied with knet_handle_pmtud_get(3) - * or applications will receive a pmtud_nofity event + * or applications will receive a pmtud_notify event * if enabled via knet_handle_enable_pmtud_notify(3). * * @return * knet_handle_pmtud_set returns * 0 on success * -1 on error and errno is set. */ int knet_handle_pmtud_set(knet_handle_t knet_h, unsigned int iface_mtu); /** * knet_handle_pmtud_get * * @brief Get the current data MTU * * knet_h - pointer to knet_handle_t * * data_mtu - pointer where to store data_mtu * * @return * knet_handle_pmtud_get returns * 0 on success * -1 on error and errno is set. */ int knet_handle_pmtud_get(knet_handle_t knet_h, unsigned int *data_mtu); #define KNET_MIN_KEY_LEN 128 #define KNET_MAX_KEY_LEN 4096 struct knet_handle_crypto_cfg { char crypto_model[16]; char crypto_cipher_type[16]; char crypto_hash_type[16]; unsigned char private_key[KNET_MAX_KEY_LEN]; unsigned int private_key_len; }; /** * knet_handle_crypto_set_config * * @brief set up packet cryptographic signing & encryption * * knet_h - pointer to knet_handle_t * * knet_handle_crypto_cfg - * pointer to a knet_handle_crypto_cfg structure * * crypto_model should contain the model name. * Currently only "openssl" and "nss" are supported. * Setting to "none" will disable crypto. * * crypto_cipher_type * should contain the cipher algo name. * It can be set to "none" to disable * encryption. * Currently supported by "nss" model: * "aes128", "aes192" and "aes256". * "openssl" model supports more modes and it strictly * depends on the openssl build. See: EVP_get_cipherbyname * openssl API call for details. * * crypto_hash_type * should contain the hashing algo name. * It can be set to "none" to disable * hashing. * Currently supported by "nss" model: * "md5", "sha1", "sha256", "sha384" and "sha512". * "openssl" model supports more modes and it strictly * depends on the openssl build. See: EVP_get_digestbyname * openssl API call for details. * * private_key will contain the private shared key. * It has to be at least KNET_MIN_KEY_LEN long. * * private_key_len * length of the provided private_key. * * config_num - knet supports 2 concurrent sets of crypto configurations, * to allow runtime change of crypto config and keys. * On RX both configurations will be used sequentially * in an attempt to decrypt/validate a packet (when 2 are available). * Note that this might slow down performance during a reconfiguration. * See also knet_handle_crypto_rx_clear_traffic(3) to enable / disable * processing of clear (unencrypted) traffic. * For TX, the user needs to specify which configuration to use via * knet_handle_crypto_use_config(3). * config_num accepts 0, 1 or 2 as the value. 0 should be used when * all crypto is being disabled. * Calling knet_handle_crypto_set_config(3) twice with * the same config_num will REPLACE the configuration and * NOT activate the second key. If the configuration is currently in use * EBUSY will be returned. See also knet_handle_crypto_use_config(3). * The correct sequence to perform a runtime rekey / reconfiguration * is: * - knet_handle_crypto_set_config(..., 1). -> first time config, will use config1 * - knet_handle_crypto_use_config(..., 1). -> switch TX to config 1 * - knet_handle_crypto_set_config(..., 2). -> install config2 and use it only for RX * - knet_handle_crypto_use_config(..., 2). -> switch TX to config 2 * - knet_handle_crypto_set_config(..., 1). -> with a "none"/"none"/"none" configuration to * release the resources previously allocated * The application is responsible for synchronizing calls on the nodes * to make sure the new config is in place before switching the TX configuration. * Failure to do so will result in knet being unable to talk to some of the nodes. * * Implementation notes/current limitations: * - enabling crypto, will increase latency as packets have * to processed. * - enabling crypto might reduce the overall throughtput * due to crypto data overhead. * - private/public key encryption/hashing is not currently * planned. * - crypto key must be the same for all hosts in the same * knet instance / configX. * - it is safe to call knet_handle_crypto_set_config multiple times at runtime. * The last config will be used. * IMPORTANT: a call to knet_handle_crypto_set_config can fail due to: * 1) failure to obtain locking * 2) errors to initializing the crypto level. * This can happen even in subsequent calls to knet_handle_crypto_set_config(3). * A failure in crypto init will restore the previous crypto configuration if any. * * @return * knet_handle_crypto_set_config returns: * @retval 0 on success * @retval -1 on error and errno is set. * @retval -2 on crypto subsystem initialization error. No errno is provided at the moment (yet). */ int knet_handle_crypto_set_config(knet_handle_t knet_h, struct knet_handle_crypto_cfg *knet_handle_crypto_cfg, uint8_t config_num); #define KNET_CRYPTO_RX_ALLOW_CLEAR_TRAFFIC 0 #define KNET_CRYPTO_RX_DISALLOW_CLEAR_TRAFFIC 1 /** * knet_handle_crypto_rx_clear_traffic * * @brief enable or disable RX processing of clear (unencrypted) traffic * * knet_h - pointer to knet_handle_t * * value - KNET_CRYPTO_RX_ALLOW_CLEAR_TRAFFIC or KNET_CRYPTO_RX_DISALLOW_CLEAR_TRAFFIC * * @return * knet_handle_crypto_use_config returns: * @retval 0 on success * @retval -1 on error and errno is set. */ int knet_handle_crypto_rx_clear_traffic(knet_handle_t knet_h, uint8_t value); /** * knet_handle_crypto_use_config * * @brief specify crypto configuration to use for TX * * knet_h - pointer to knet_handle_t * * config_num - 1|2 use configuration 1 or 2, 0 for clear (unencrypted) traffic. * * @return * knet_handle_crypto_use_config returns: * @retval 0 on success * @retval -1 on error and errno is set. */ int knet_handle_crypto_use_config(knet_handle_t knet_h, uint8_t config_num); #define KNET_COMPRESS_THRESHOLD 100 struct knet_handle_compress_cfg { char compress_model[16]; uint32_t compress_threshold; int compress_level; }; /** * knet_handle_compress * * @brief Set up packet compression * * knet_h - pointer to knet_handle_t * * knet_handle_compress_cfg - * pointer to a knet_handle_compress_cfg structure * * compress_model contains the model name. * See "compress_level" for the list of accepted values. * Setting the value to "none" disables compression. * * compress_threshold * tells the transmission thread to NOT compress * any packets that are smaller than the value * indicated. Default 100 bytes. * Set to 0 to reset to the default. * Set to 1 to compress everything. * Max accepted value is KNET_MAX_PACKET_SIZE. * * compress_level is the "level" parameter for most models: * zlib: 0 (no compression), 1 (minimal) .. 9 (max compression). * lz4: 1 (max compression)... 9 (fastest compression). * lz4hc: 1 (min compression) ... LZ4HC_MAX_CLEVEL (16) or LZ4HC_CLEVEL_MAX (12) * depending on the version of lz4hc libknet was built with. * lzma: 0 (minimal) .. 9 (max compression) * bzip2: 1 (minimal) .. 9 (max compression) * For lzo2 it selects the algorithm to use: * 1 : lzo1x_1_compress (default) * 11 : lzo1x_1_11_compress * 12 : lzo1x_1_12_compress * 15 : lzo1x_1_15_compress * 999: lzo1x_999_compress * Other values select the default algorithm. * Please refer to the documentation of the respective * compression library for guidance about setting this * value. * * Implementation notes: * - it is possible to enable/disable compression at any time. * - nodes can be using a different compression algorithm at any time. * - knet does NOT implement the compression algorithm directly. it relies * on external libraries for this functionality. Please read * the libraries man pages to figure out which algorithm/compression * level is best for the data you are planning to transmit. * * @return * knet_handle_compress returns * 0 on success * -1 on error and errno is set. EINVAL means that either the model or the * level are not supported. */ int knet_handle_compress(knet_handle_t knet_h, struct knet_handle_compress_cfg *knet_handle_compress_cfg); struct knet_handle_stats { size_t size; uint64_t tx_uncompressed_packets; uint64_t tx_compressed_packets; uint64_t tx_compressed_original_bytes; uint64_t tx_compressed_size_bytes; uint64_t tx_compress_time_ave; uint64_t tx_compress_time_min; uint64_t tx_compress_time_max; uint64_t tx_failed_to_compress; uint64_t tx_unable_to_compress; uint64_t rx_compressed_packets; uint64_t rx_compressed_original_bytes; uint64_t rx_compressed_size_bytes; uint64_t rx_compress_time_ave; uint64_t rx_compress_time_min; uint64_t rx_compress_time_max; uint64_t rx_failed_to_decompress; /* Overhead times, measured in usecs */ uint64_t tx_crypt_packets; uint64_t tx_crypt_byte_overhead; uint64_t tx_crypt_time_ave; uint64_t tx_crypt_time_min; uint64_t tx_crypt_time_max; uint64_t rx_crypt_packets; uint64_t rx_crypt_time_ave; uint64_t rx_crypt_time_min; uint64_t rx_crypt_time_max; }; /** * knet_handle_get_stats * * @brief Get statistics for compression & crypto * * knet_h - pointer to knet_handle_t * * knet_handle_stats * pointer to a knet_handle_stats structure * * struct_size * size of knet_handle_stats structure to allow * for backwards compatibility. libknet will only * copy this much data into the stats structure * so that older callers will not get overflowed if * new fields are added. * * @return * 0 on success * -1 on error and errno is set. * */ int knet_handle_get_stats(knet_handle_t knet_h, struct knet_handle_stats *stats, size_t struct_size); /* * Tell knet_handle_clear_stats whether to clear just the handle stats * or all of them. */ #define KNET_CLEARSTATS_HANDLE_ONLY 1 #define KNET_CLEARSTATS_HANDLE_AND_LINK 2 /** * knet_handle_clear_stats * * @brief Clear knet stats, link and/or handle * * knet_h - pointer to knet_handle_t * * clear_option - Which stats to clear, must be one of * * KNET_CLEARSTATS_HANDLE_ONLY or * KNET_CLEARSTATS_HANDLE_AND_LINK * * @return * 0 on success * -1 on error and errno is set. * */ int knet_handle_clear_stats(knet_handle_t knet_h, int clear_option); struct knet_crypto_info { const char *name; /* openssl,nss,etc.. */ uint8_t properties; /* currently unused */ char pad[256]; /* currently unused */ }; /** * knet_get_crypto_list * * @brief Get a list of supported crypto libraries * * crypto_list - array of struct knet_crypto_info * * If NULL then only the number of structs is returned in crypto_list_entries * to allow the caller to allocate sufficient space. * libknet does not allow more than 256 crypto methods at the moment. * it is safe to allocate 256 structs to avoid calling * knet_get_crypto_list twice. * * crypto_list_entries - returns the number of structs in crypto_list * * @return * knet_get_crypto_list returns * 0 on success * -1 on error and errno is set. */ int knet_get_crypto_list(struct knet_crypto_info *crypto_list, size_t *crypto_list_entries); struct knet_compress_info { const char *name; /* bzip2, lz4, etc.. */ uint8_t properties; /* currently unused */ char pad[256]; /* currently unused */ }; /** * knet_get_compress_list * * @brief Get a list of support compression types * * compress_list - array of struct knet_compress_info * * If NULL then only the number of structs is returned in compress_list_entries * to allow the caller to allocate sufficient space. * libknet does not allow more than 256 compress methods at the moment. * it is safe to allocate 256 structs to avoid calling * knet_get_compress_list twice. * * compress_list_entries - returns the number of structs in compress_list * * @return * knet_get_compress_list returns * 0 on success * -1 on error and errno is set. */ int knet_get_compress_list(struct knet_compress_info *compress_list, size_t *compress_list_entries); +/** + * knet_handle_enable_onwire_ver_notify + * + * @brief install a callback to receive onwire changes + * + * knet_h - pointer to knet_handle_t + * + * onwire_ver_notify_fn_private_data + * void pointer to data that can be used to identify + * the callback. + * + * onwire_ver_notify_fn + * is a callback function that is invoked every time + * an onwire version change is detected. + * The function allows libknet to notify the user + * of onwire version changes. + * onwire_min_ver - minimum onwire version supported + * onwire_max_ver - maximum onwire version supported + * onwire_ver - currently onwire version in use + * This function MUST NEVER block or add substantial delays. + * + * NOTE: the callback function will be invoked upon install to + * immediately notify the user of the current configuration. + * During startup, it is safer to use onwire_min_ver and + * onwire_ver on subsequent calls. + * + * @return + * knet_handle_enable_onwire_ver_notify returns + * 0 on success + * -1 on error and errno is set. + */ + +int knet_handle_enable_onwire_ver_notify(knet_handle_t knet_h, + void *onwire_ver_notify_fn_private_data, + void (*onwire_ver_notify_fn) ( + void *private_data, + uint8_t onwire_min_ver, + uint8_t onwire_max_ver, + uint8_t onwire_ver)); + +/** + * knet_handle_get_onwire_ver + * + * @brief get onwire protocol version information + * + * knet_h - pointer to knet_handle_t + * + * host_id - see knet_host_add(3) + * + * onwire_min_ver - minimum onwire version supported by local node. + * this value is set to 0 for remote nodes. + * + * onwire_max_ver - maximum onwire version supported by local or + * remote node. + * + * onwire_ver - currently onwire version in use by local or + * remote node. + * + * @return + * knet_handle_get_onwire_ver returns + * 0 on success + * -1 on error and errno is set. + */ + +int knet_handle_get_onwire_ver(knet_handle_t knet_h, + knet_node_id_t host_id, + uint8_t *onwire_min_ver, + uint8_t *onwire_max_ver, + uint8_t *onwire_ver); + +/** + * knet_handle_set_onwire_ver + * + * @brief force onwire protocol version + * + * knet_h - pointer to knet_handle_t + * + * onwire_ver - onwire version to use. + * reset to 0 to allow knet to detect + * automatically the highest version. + * + * @return + * knet_handle_get_onwire_ver returns + * 0 on success + * -1 on error and errno is set. + */ + +int knet_handle_set_onwire_ver(knet_handle_t knet_h, + uint8_t onwire_ver); + /* * host structs/API calls */ /** * knet_host_add * * @brief Add a new host ID to knet * * knet_h - pointer to knet_handle_t * * host_id - each host in a knet is identified with a unique ID * (see also knet_handle_new(3)) * * @return * knet_host_add returns: * 0 on success * -1 on error and errno is set. */ int knet_host_add(knet_handle_t knet_h, knet_node_id_t host_id); /** * knet_host_remove * * @brief Remove a host ID from knet * * knet_h - pointer to knet_handle_t * * host_id - each host in a knet is identified with a unique ID * (see also knet_handle_new(3)) * * @return * knet_host_remove returns: * 0 on success * -1 on error and errno is set. */ int knet_host_remove(knet_handle_t knet_h, knet_node_id_t host_id); /** * knet_host_set_name * * @brief Set the name of a knet host * * knet_h - pointer to knet_handle_t * * host_id - see knet_host_add(3) * * name - this name will be used for pretty logging and eventually * search for hosts (see also knet_handle_host_get_name(2) and knet_handle_host_get_id(3)). * Only up to KNET_MAX_HOST_LEN - 1 bytes will be accepted and * name has to be unique for each host. * * @return * knet_host_set_name returns: * 0 on success * -1 on error and errno is set. */ int knet_host_set_name(knet_handle_t knet_h, knet_node_id_t host_id, const char *name); /** * knet_host_get_name_by_host_id * * @brief Get the name of a host given its ID * * knet_h - pointer to knet_handle_t * * host_id - see knet_host_add(3) * * name - pointer to a preallocated buffer of at least size KNET_MAX_HOST_LEN * where the current host name will be stored * (as set by knet_host_set_name or default by knet_host_add) * * @return * knet_host_get_name_by_host_id returns: * 0 on success * -1 on error and errno is set (name is left untouched) */ int knet_host_get_name_by_host_id(knet_handle_t knet_h, knet_node_id_t host_id, char *name); /** * knet_host_get_id_by_host_name * * @brief Get the ID of a host given its name * * knet_h - pointer to knet_handle_t * * name - name to lookup, max len KNET_MAX_HOST_LEN * * host_id - where to store the result * * @return * knet_host_get_id_by_host_name returns: * 0 on success * -1 on error and errno is set. */ int knet_host_get_id_by_host_name(knet_handle_t knet_h, const char *name, knet_node_id_t *host_id); /** * knet_host_get_host_list * * @brief Get a list of hosts known to knet * * knet_h - pointer to knet_handle_t * * host_ids - array of at lest KNET_MAX_HOST size * * host_ids_entries - * number of entries writted in host_ids * * @return * knet_host_get_host_list returns * 0 on success * -1 on error and errno is set. */ int knet_host_get_host_list(knet_handle_t knet_h, knet_node_id_t *host_ids, size_t *host_ids_entries); /* * define switching policies */ #define KNET_LINK_POLICY_PASSIVE 0 #define KNET_LINK_POLICY_ACTIVE 1 #define KNET_LINK_POLICY_RR 2 /** * knet_host_set_policy * * @brief Set the switching policy for a host's links * * knet_h - pointer to knet_handle_t * * host_id - see knet_host_add(3) * * policy - there are currently 3 kind of simple switching policies * based on link configuration. * KNET_LINK_POLICY_PASSIVE - the active link with the highest * priority (highest number) will be used. * if one or more active links share * the same priority, the one with * lowest link_id will be used. * * KNET_LINK_POLICY_ACTIVE - all active links will be used * simultaneously to send traffic. * link priority is ignored. * * KNET_LINK_POLICY_RR - round-robin policy, every packet * will be send on a different active * link. * * @return * knet_host_set_policy returns * 0 on success * -1 on error and errno is set. */ int knet_host_set_policy(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t policy); /** * knet_host_get_policy * * @brief Get the switching policy for a host's links * * knet_h - pointer to knet_handle_t * * host_id - see knet_host_add(3) * * policy - will contain the current configured switching policy. * Default is passive when creating a new host. * * @return * knet_host_get_policy returns * 0 on success * -1 on error and errno is set. */ int knet_host_get_policy(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t *policy); /** * knet_host_enable_status_change_notify * * @brief Install a callback to get host status change events * * knet_h - pointer to knet_handle_t * * host_status_change_notify_fn_private_data - * void pointer to data that can be used to identify * the callback * * host_status_change_notify_fn - * is a callback function that is invoked every time * there is a change in the host status. * host status is identified by: * - reachable, this host can send/receive data to/from host_id * - remote, 0 if the host_id is connected locally or 1 if * the there is one or more knet host(s) in between. * NOTE: re-switching is NOT currently implemented, * but this is ready for future and can avoid * an API/ABI breakage later on. * - external, 0 if the host_id is configured locally or 1 if * it has been added from remote nodes config. * NOTE: dynamic topology is NOT currently implemented, * but this is ready for future and can avoid * an API/ABI breakage later on. * This function MUST NEVER block or add substantial delays. * * @return * knet_host_status_change_notify returns * 0 on success * -1 on error and errno is set. */ int knet_host_enable_status_change_notify(knet_handle_t knet_h, void *host_status_change_notify_fn_private_data, void (*host_status_change_notify_fn) ( void *private_data, knet_node_id_t host_id, uint8_t reachable, uint8_t remote, uint8_t external)); /* * define host status structure for quick lookup * struct is in flux as more stats will be added soon * * reachable host_id can be seen either directly connected * or via another host_id * * remote 0 = node is connected locally, 1 is visible via * via another host_id * * external 0 = node is configured/known locally, * 1 host_id has been received via another host_id */ struct knet_host_status { uint8_t reachable; uint8_t remote; uint8_t external; /* add host statistics */ }; /** * knet_host_get_status * * @brief Get the status of a host * * knet_h - pointer to knet_handle_t * * host_id - see knet_host_add(3) * * status - pointer to knet_host_status struct * * @return * knet_handle_pmtud_get returns * 0 on success * -1 on error and errno is set. */ int knet_host_get_status(knet_handle_t knet_h, knet_node_id_t host_id, struct knet_host_status *status); /* * link structs/API calls * * every host allocated/managed by knet_host_* has * KNET_MAX_LINK structures to define the network * paths that connect 2 hosts. * * Each link is identified by a link_id that has a * values between 0 and KNET_MAX_LINK - 1. * * KNOWN LIMITATIONS: * * - let's assume the scenario where two hosts are connected * with any number of links. link_id must match on both sides. * If host_id 0 link_id 0 is configured to connect IP1 to IP2 and * host_id 0 link_id 1 is configured to connect IP3 to IP4, * host_id 1 link_id 0 _must_ connect IP2 to IP1 and likewise * host_id 1 link_id 1 _must_ connect IP4 to IP3. * We might be able to lift this restriction in future, by using * other data to determine src/dst link_id, but for now, deal with it. */ /* * commodity functions to convert strings to sockaddr and viceversa */ /** * knet_strtoaddr * * @brief Convert a hostname string to an address * * host - IPaddr/hostname to convert * be aware only the first IP address will be returned * in case a hostname resolves to multiple IP * * port - port to connect to * * ss - sockaddr_storage where to store the converted data * * sslen - len of the sockaddr_storage * * @return * knet_strtoaddr returns same error codes as getaddrinfo * */ int knet_strtoaddr(const char *host, const char *port, struct sockaddr_storage *ss, socklen_t sslen); /** * knet_addrtostr * * @brief Convert an address to a host name * * ss - sockaddr_storage to convert * * sslen - len of the sockaddr_storage * * host - IPaddr/hostname where to store data * (recommended size: KNET_MAX_HOST_LEN) * * port - port buffer where to store data * (recommended size: KNET_MAX_PORT_LEN) * * @return * knet_strtoaddr returns same error codes as getnameinfo */ int knet_addrtostr(const struct sockaddr_storage *ss, socklen_t sslen, char *addr_buf, size_t addr_buf_size, char *port_buf, size_t port_buf_size); #define KNET_TRANSPORT_LOOPBACK 0 #define KNET_TRANSPORT_UDP 1 #define KNET_TRANSPORT_SCTP 2 #define KNET_MAX_TRANSPORTS UINT8_MAX /* * The Loopback transport is only valid for connections to localhost, the host * with the same node_id specified in knet_handle_new(). Only one link of this * type is allowed. Data sent down a LOOPBACK link will be copied directly from * the knet send datafd to the knet receive datafd so the application must be set * up to take data from that socket at least as often as it is sent or deadlocks * could occur. If used, a LOOPBACK link must be the only link configured to the * local host. */ struct knet_transport_info { const char *name; /* UDP/SCTP/etc... */ uint8_t id; /* value that can be used for link_set_config */ uint8_t properties; /* currently unused */ char pad[256]; /* currently unused */ }; /** * knet_get_transport_list * * @brief Get a list of the transports support by this build of knet * * transport_list - an array of struct transport_info that must be * at least of size struct transport_info * KNET_MAX_TRANSPORTS * * transport_list_entries - pointer to a size_t where to store how many transports * are available in this build of libknet. * * @return * knet_get_transport_list returns * 0 on success * -1 on error and errno is set. */ int knet_get_transport_list(struct knet_transport_info *transport_list, size_t *transport_list_entries); /** * knet_get_transport_name_by_id * * @brief Get a transport name from its ID number * * transport - one of the KNET_TRANSPORT_xxx constants * * @return * knet_get_transport_name_by_id returns: * * @retval pointer to the name on success or * @retval NULL on error and errno is set. */ const char *knet_get_transport_name_by_id(uint8_t transport); /** * knet_get_transport_id_by_name * * @brief Get a transport ID from its name * * name - transport name (UDP/SCTP/etc) * * @return * knet_get_transport_name_by_id returns: * * @retval KNET_MAX_TRANSPORTS on error and errno is set accordingly * @retval KNET_TRANSPORT_xxx on success. */ uint8_t knet_get_transport_id_by_name(const char *name); #define KNET_TRANSPORT_DEFAULT_RECONNECT_INTERVAL 1000 /** * knet_handle_set_transport_reconnect_interval * * @brief Set the interval between transport attempts to reconnect a failed link * * knet_h - pointer to knet_handle_t * * msecs - milliseconds * * @return * knet_handle_set_transport_reconnect_interval returns * 0 on success * -1 on error and errno is set. */ int knet_handle_set_transport_reconnect_interval(knet_handle_t knet_h, uint32_t msecs); /** * knet_handle_get_transport_reconnect_interval * * @brief Get the interval between transport attempts to reconnect a failed link * * knet_h - pointer to knet_handle_t * * msecs - milliseconds * * @return * knet_handle_get_transport_reconnect_interval returns * 0 on success * -1 on error and errno is set. */ int knet_handle_get_transport_reconnect_interval(knet_handle_t knet_h, uint32_t *msecs); /** * knet_link_set_config * * @brief Configure the link to a host * * knet_h - pointer to knet_handle_t * * host_id - see knet_host_add(3) * * link_id - see knet_link_set_config(3) * * transport - one of the KNET_TRANSPORT_xxx constants * * src_addr - sockaddr_storage that can be either IPv4 or IPv6 * * dst_addr - sockaddr_storage that can be either IPv4 or IPv6 * this can be null if we don't know the incoming * IP address/port and the link will remain quiet * till the node on the other end will initiate a * connection * * flags - KNET_LINK_FLAG_* * * @return * knet_link_set_config returns * 0 on success * -1 on error and errno is set. */ int knet_link_set_config(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id, uint8_t transport, struct sockaddr_storage *src_addr, struct sockaddr_storage *dst_addr, uint64_t flags); /** * knet_link_get_config * * @brief Get the link configutation information * * knet_h - pointer to knet_handle_t * * host_id - see knet_host_add(3) * * link_id - see knet_link_set_config(3) * * transport - see knet_link_set_config(3) * * src_addr - sockaddr_storage that can be either IPv4 or IPv6 * * dst_addr - sockaddr_storage that can be either IPv4 or IPv6 * * dynamic - 0 if dst_addr is static or 1 if dst_addr is dynamic. * In case of 1, dst_addr can be NULL and it will be left * untouched. * * flags - KNET_LINK_FLAG_* * * @return * knet_link_get_config returns * 0 on success. * -1 on error and errno is set. */ int knet_link_get_config(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id, uint8_t *transport, struct sockaddr_storage *src_addr, struct sockaddr_storage *dst_addr, uint8_t *dynamic, uint64_t *flags); /** * knet_link_clear_config * * @brief Clear link information and disconnect the link * * knet_h - pointer to knet_handle_t * * host_id - see knet_host_add(3) * * link_id - see knet_link_set_config(3) * * @return * knet_link_clear_config returns * 0 on success. * -1 on error and errno is set. */ int knet_link_clear_config(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id); /* * Access lists management for open links * see also knet_handle_enable_access_lists(3) */ /** * check_type_t * @brief address type enum for knet access lists * * CHECK_TYPE_ADDRESS is the equivalent of a single entry / IP address. * for example: 10.1.9.3 * and the entry is stored in ss1. ss2 can be NULL. * * CHECK_TYPE_MASK is used to configure network/netmask. * for example: 192.168.0.0/24 * the network is stored in ss1 and the netmask in ss2. * * CHECK_TYPE_RANGE defines a value / range of ip addresses. * for example: 172.16.0.1-172.16.0.10 * the start is stored in ss1 and the end in ss2. * * Please be aware that the above examples refer only to IP based protocols. * Other protocols might use ss1 and ss2 in slightly different ways. * At the moment knet only supports IP based protocol, though that might change * in the future. */ typedef enum { CHECK_TYPE_ADDRESS, CHECK_TYPE_MASK, CHECK_TYPE_RANGE } check_type_t; /** * check_acceptreject_t * * @brief enum for accept/reject in knet access lists * * accept or reject incoming packets defined in the access list entry */ typedef enum { CHECK_ACCEPT, CHECK_REJECT } check_acceptreject_t; /** * knet_link_add_acl * * @brief Add access list entry to an open link * * knet_h - pointer to knet_handle_t * * host_id - see knet_host_add(3) * * link_id - see knet_link_set_config(3) * * ss1 / ss2 / type / acceptreject - see typedef definitions for details * * IMPORTANT: the order in which access lists are added is critical and it * is left to the user to add them in the right order. knet * will not attempt to logically sort them. * * For example: * 1 - accept from 10.0.0.0/8 * 2 - reject from 10.0.0.1/32 * * is not the same as: * * 1 - reject from 10.0.0.1/32 * 2 - accept from 10.0.0.0/8 * * In the first example, rule number 2 will never match because * packets from 10.0.0.1 will be accepted by rule number 1. * * @return * knet_link_add_acl returns * 0 on success. * -1 on error and errno is set. */ int knet_link_add_acl(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id, struct sockaddr_storage *ss1, struct sockaddr_storage *ss2, check_type_t type, check_acceptreject_t acceptreject); /** * knet_link_insert_acl * * @brief Insert access list entry to an open link at given index * * knet_h - pointer to knet_handle_t * * host_id - see knet_host_add(3) * * link_id - see knet_link_set_config(3) * * index - insert at position "index" where 0 is the first entry and -1 * appends to the current list. * * ss1 / ss2 / type / acceptreject - see typedef definitions for details * * @return * knet_link_insert_acl returns * 0 on success. * -1 on error and errno is set. */ int knet_link_insert_acl(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id, int index, struct sockaddr_storage *ss1, struct sockaddr_storage *ss2, check_type_t type, check_acceptreject_t acceptreject); /** * knet_link_rm_acl * * @brief Remove access list entry from an open link * * knet_h - pointer to knet_handle_t * * host_id - see knet_host_add(3) * * link_id - see knet_link_set_config(3) * * ss1 / ss2 / type / acceptreject - see typedef definitions for details * * IMPORTANT: the data passed to this API call must match exactly that passed * to knet_link_add_acl(3). * * @return * knet_link_rm_acl returns * 0 on success. * -1 on error and errno is set. */ int knet_link_rm_acl(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id, struct sockaddr_storage *ss1, struct sockaddr_storage *ss2, check_type_t type, check_acceptreject_t acceptreject); /** * knet_link_clear_acl * * @brief Remove all access list entries from an open link * * knet_h - pointer to knet_handle_t * * host_id - see knet_host_add(3) * * link_id - see knet_link_set_config(3) * * @return * knet_link_clear_acl returns * 0 on success. * -1 on error and errno is set. */ int knet_link_clear_acl(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id); /** * knet_link_set_enable * * @brief Enable traffic on a link * * knet_h - pointer to knet_handle_t * * host_id - see knet_host_add(3) * * link_id - see knet_link_set_config(3) * * enabled - 0 disable the link, 1 enable the link * * @return * knet_link_set_enable returns * 0 on success * -1 on error and errno is set. */ int knet_link_set_enable(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id, unsigned int enabled); /** * knet_link_get_enable * * @brief Find out whether a link is enabled or not * * knet_h - pointer to knet_handle_t * * host_id - see knet_host_add(3) * * link_id - see knet_link_set_config(3) * * enabled - 0 disable the link, 1 enable the link * * @return * knet_link_get_enable returns * 0 on success * -1 on error and errno is set. */ int knet_link_get_enable(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id, unsigned int *enabled); #define KNET_LINK_DEFAULT_PING_INTERVAL 1000 /* 1 second */ #define KNET_LINK_DEFAULT_PING_TIMEOUT 2000 /* 2 seconds */ #define KNET_LINK_DEFAULT_PING_PRECISION 2048 /* samples */ /** * knet_link_set_ping_timers * * @brief Set the ping timers for a link * * knet_h - pointer to knet_handle_t * * host_id - see knet_host_add(3) * * link_id - see knet_link_set_config(3) * * interval - specify the ping interval in milliseconds. * * timeout - if no pong is received within this time, * the link is declared dead, in milliseconds. * NOTE: in future it will be possible to set timeout to 0 * for an autocalculated timeout based on interval, pong_count * and latency. The API already accept 0 as value and it will * return ENOSYS / -1. Once the automatic calculation feature * will be implemented, this call will only return EINVAL * for incorrect values. * * precision - how many values of latency are used to calculate * the average link latency (see also knet_link_get_status(3)) * * @return * knet_link_set_ping_timers returns * 0 on success * -1 on error and errno is set. */ int knet_link_set_ping_timers(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id, time_t interval, time_t timeout, unsigned int precision); /** * knet_link_get_ping_timers * * @brief Get the ping timers for a link * * knet_h - pointer to knet_handle_t * * host_id - see knet_host_add(3) * * link_id - see knet_link_set_config(3) * * interval - ping interval * * timeout - if no pong is received within this time, * the link is declared dead * * precision - how many values of latency are used to calculate * the average link latency (see also knet_link_get_status(3)) * * @return * knet_link_get_ping_timers returns * 0 on success * -1 on error and errno is set. */ int knet_link_get_ping_timers(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id, time_t *interval, time_t *timeout, unsigned int *precision); #define KNET_LINK_DEFAULT_PONG_COUNT 5 /** * knet_link_set_pong_count * * @brief Set the pong count for a link * * knet_h - pointer to knet_handle_t * * host_id - see knet_host_add(3) * * link_id - see knet_link_set_config(3) * * pong_count - how many valid ping/pongs before a link is marked UP. * default: 5, value should be > 0 * * @return * knet_link_set_pong_count returns * 0 on success * -1 on error and errno is set. */ int knet_link_set_pong_count(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id, uint8_t pong_count); /** * knet_link_get_pong_count * * @brief Get the pong count for a link * * knet_h - pointer to knet_handle_t * * host_id - see knet_host_add(3) * * link_id - see knet_link_set_config(3) * * pong_count - how many valid ping/pongs before a link is marked UP. * default: 5, value should be > 0 * * @return * knet_link_get_pong_count returns * 0 on success * -1 on error and errno is set. */ int knet_link_get_pong_count(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id, uint8_t *pong_count); /** * knet_link_set_priority * * @brief Set the priority for a link * * knet_h - pointer to knet_handle_t * * host_id - see knet_host_add(3) * * link_id - see knet_link_set_config(3) * * priority - specify the switching priority for this link * see also knet_host_set_policy * * @return * knet_link_set_priority returns * 0 on success * -1 on error and errno is set. */ int knet_link_set_priority(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id, uint8_t priority); /** * knet_link_get_priority * * @brief Get the priority for a link * * knet_h - pointer to knet_handle_t * * host_id - see knet_host_add(3) * * link_id - see knet_link_set_config(3) * * priority - gather the switching priority for this link * see also knet_host_set_policy * * @return * knet_link_get_priority returns * 0 on success * -1 on error and errno is set. */ int knet_link_get_priority(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id, uint8_t *priority); /** * knet_link_get_link_list * * @brief Get a list of links connecting a host * * knet_h - pointer to knet_handle_t * * link_ids - array of at lest KNET_MAX_LINK size * with the list of configured links for a certain host. * * link_ids_entries - * number of entries contained in link_ids * * @return * knet_link_get_link_list returns * 0 on success * -1 on error and errno is set. */ int knet_link_get_link_list(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t *link_ids, size_t *link_ids_entries); /* * define link status structure for quick lookup * * src/dst_{ipaddr,port} strings are filled by * getnameinfo(3) when configuring the link. * if the link is dynamic (see knet_link_set_config(3)) * dst_ipaddr/port will contain ipaddr/port of the currently * connected peer or "Unknown" if it was not possible * to determine the ipaddr/port at runtime. * * enabled see also knet_link_set/get_enable. * * connected the link is connected to a peer and ping/pong traffic * is flowing. * * dynconnected the link has dynamic ip on the other end, and * we can see the other host is sending pings to us. * * pong_last if the link is down, this value tells us how long * ago this link was active. A value of 0 means that the link * has never been active. * * knet_link_stats structure that contains details statistics for the link */ #define MAX_LINK_EVENTS 16 struct knet_link_stats { /* onwire values */ uint64_t tx_data_packets; uint64_t rx_data_packets; uint64_t tx_data_bytes; uint64_t rx_data_bytes; uint64_t rx_ping_packets; uint64_t tx_ping_packets; uint64_t rx_ping_bytes; uint64_t tx_ping_bytes; uint64_t rx_pong_packets; uint64_t tx_pong_packets; uint64_t rx_pong_bytes; uint64_t tx_pong_bytes; uint64_t rx_pmtu_packets; uint64_t tx_pmtu_packets; uint64_t rx_pmtu_bytes; uint64_t tx_pmtu_bytes; /* Only filled in when requested */ uint64_t tx_total_packets; uint64_t rx_total_packets; uint64_t tx_total_bytes; uint64_t rx_total_bytes; uint64_t tx_total_errors; uint64_t tx_total_retries; uint32_t tx_pmtu_errors; uint32_t tx_pmtu_retries; uint32_t tx_ping_errors; uint32_t tx_ping_retries; uint32_t tx_pong_errors; uint32_t tx_pong_retries; uint32_t tx_data_errors; uint32_t tx_data_retries; /* measured in usecs */ uint32_t latency_min; uint32_t latency_max; uint32_t latency_ave; uint32_t latency_samples; /* how many times the link has been going up/down */ uint32_t down_count; uint32_t up_count; /* * circular buffer of time_t structs collecting the history * of up/down events on this link. * the index indicates current/last event. * it is safe to walk back the history by decreasing the index */ time_t last_up_times[MAX_LINK_EVENTS]; time_t last_down_times[MAX_LINK_EVENTS]; int8_t last_up_time_index; int8_t last_down_time_index; /* Always add new stats at the end */ }; struct knet_link_status { size_t size; /* For ABI checking */ char src_ipaddr[KNET_MAX_HOST_LEN]; char src_port[KNET_MAX_PORT_LEN]; char dst_ipaddr[KNET_MAX_HOST_LEN]; char dst_port[KNET_MAX_PORT_LEN]; uint8_t enabled; /* link is configured and admin enabled for traffic */ uint8_t connected; /* link is connected for data (local view) */ uint8_t dynconnected; /* link has been activated by remote dynip */ struct timespec pong_last; unsigned int mtu; /* current detected MTU on this link */ unsigned int proto_overhead; /* contains the size of the IP protocol, knet headers and * crypto headers (if configured). This value is filled in * ONLY after the first PMTUd run on that given link, * and can change if link configuration or crypto configuration * changes at runtime. * WARNING: in general mtu + proto_overhead might or might * not match the output of ifconfig mtu due to crypto * requirements to pad packets to some specific boundaries. */ /* Link statistics */ struct knet_link_stats stats; }; /** * knet_link_get_status * * @brief Get the status (and statistics) for a link * * knet_h - pointer to knet_handle_t * * host_id - see knet_host_add(3) * * link_id - see knet_link_set_config(3) * * status - pointer to knet_link_status struct * * struct_size - max size of knet_link_status - allows library to * add fields without ABI change. Returned structure * will be truncated to this length and .size member * indicates the full size. * * @return * knet_link_get_status returns * 0 on success * -1 on error and errno is set. */ int knet_link_get_status(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id, struct knet_link_status *status, size_t struct_size); /** * knet_link_enable_status_change_notify * * @brief Install a callback to get a link status change events * * knet_h - pointer to knet_handle_t * * host_status_change_notify_fn_private_data - * void pointer to data that can be used to identify * the callback * * host_status_change_notify_fn - * is a callback function that is invoked every time * there is a change in a link status. * host status is identified by: * - connected, 0 if the link has been disconnected, 1 if the link * is connected. * - remote, 0 if the host_id is connected locally or 1 if * the there is one or more knet host(s) in between. * NOTE: re-switching is NOT currently implemented, * but this is ready for future and can avoid * an API/ABI breakage later on. * - external, 0 if the host_id is configured locally or 1 if * it has been added from remote nodes config. * NOTE: dynamic topology is NOT currently implemented, * but this is ready for future and can avoid * an API/ABI breakage later on. * This function MUST NEVER block or add substantial delays. * * @return * knet_host_status_change_notify returns * 0 on success * -1 on error and errno is set. */ int knet_link_enable_status_change_notify(knet_handle_t knet_h, void *link_status_change_notify_fn_private_data, void (*link_status_change_notify_fn) ( void *private_data, knet_node_id_t host_id, uint8_t link_id, uint8_t connected, uint8_t remote, uint8_t external)); /* * logging structs/API calls */ /* * libknet is composed of several subsystems. In order * to easily distinguish log messages coming from different * places, each subsystem has its own ID. * * 0-19 config/management * 20-39 internal threads * 40-59 transports * 60-69 crypto implementations */ #define KNET_SUB_COMMON 0 /* common.c */ #define KNET_SUB_HANDLE 1 /* handle.c alloc/dealloc config changes */ #define KNET_SUB_HOST 2 /* host add/del/modify */ #define KNET_SUB_LISTENER 3 /* listeners add/del/modify... */ #define KNET_SUB_LINK 4 /* link add/del/modify */ #define KNET_SUB_TRANSPORT 5 /* Transport common */ #define KNET_SUB_CRYPTO 6 /* crypto.c config generic layer */ #define KNET_SUB_COMPRESS 7 /* compress.c config generic layer */ #define KNET_SUB_FILTER 19 /* allocated for users to log from dst_filter */ #define KNET_SUB_DSTCACHE 20 /* switching thread (destination cache handling) */ #define KNET_SUB_HEARTBEAT 21 /* heartbeat thread */ #define KNET_SUB_PMTUD 22 /* Path MTU Discovery thread */ #define KNET_SUB_TX 23 /* send to link thread */ #define KNET_SUB_RX 24 /* recv from link thread */ #define KNET_SUB_TRANSP_BASE 40 /* Base log level for transports */ #define KNET_SUB_TRANSP_LOOPBACK (KNET_SUB_TRANSP_BASE + KNET_TRANSPORT_LOOPBACK) #define KNET_SUB_TRANSP_UDP (KNET_SUB_TRANSP_BASE + KNET_TRANSPORT_UDP) #define KNET_SUB_TRANSP_SCTP (KNET_SUB_TRANSP_BASE + KNET_TRANSPORT_SCTP) #define KNET_SUB_NSSCRYPTO 60 /* nsscrypto.c */ #define KNET_SUB_OPENSSLCRYPTO 61 /* opensslcrypto.c */ #define KNET_SUB_ZLIBCOMP 70 /* compress_zlib.c */ #define KNET_SUB_LZ4COMP 71 /* compress_lz4.c */ #define KNET_SUB_LZ4HCCOMP 72 /* compress_lz4.c */ #define KNET_SUB_LZO2COMP 73 /* compress_lzo.c */ #define KNET_SUB_LZMACOMP 74 /* compress_lzma.c */ #define KNET_SUB_BZIP2COMP 75 /* compress_bzip2.c */ #define KNET_SUB_ZSTDCOMP 76 /* compress_zstd.c */ #define KNET_SUB_UNKNOWN UINT8_MAX - 1 #define KNET_MAX_SUBSYSTEMS UINT8_MAX /* * Convert between subsystem IDs and names */ /** * knet_log_get_subsystem_name * * @brief Get a logging system name from its numeric ID * * @return * returns internal name of the subsystem or "common" */ const char *knet_log_get_subsystem_name(uint8_t subsystem); /** * knet_log_get_subsystem_id * * @brief Get a logging system ID from its name * * @return * returns internal ID of the subsystem or KNET_SUB_COMMON */ uint8_t knet_log_get_subsystem_id(const char *name); /* * 4 log levels are enough for everybody */ #define KNET_LOG_ERR 0 /* unrecoverable errors/conditions */ #define KNET_LOG_WARN 1 /* recoverable errors/conditions */ #define KNET_LOG_INFO 2 /* info, link up/down, config changes.. */ #define KNET_LOG_DEBUG 3 /* * Convert between log level values and names */ /** * knet_log_get_loglevel_name * * @brief Get a logging level name from its numeric ID * * @return * returns internal name of the log level or "ERROR" for unknown values */ const char *knet_log_get_loglevel_name(uint8_t level); /** * knet_log_get_loglevel_id * * @brief Get a logging level ID from its name * * @return * returns internal log level ID or KNET_LOG_ERR for invalid names */ uint8_t knet_log_get_loglevel_id(const char *name); /* * every log message is composed by a text message * and message level/subsystem IDs. * In order to make debugging easier it is possible to send those packets * straight to stdout/stderr (see knet_bench.c stdout option). */ #define KNET_MAX_LOG_MSG_SIZE 254 #if KNET_MAX_LOG_MSG_SIZE > PIPE_BUF #error KNET_MAX_LOG_MSG_SIZE cannot be bigger than PIPE_BUF for guaranteed system atomic writes #endif struct knet_log_msg { char msg[KNET_MAX_LOG_MSG_SIZE]; uint8_t subsystem; /* KNET_SUB_* */ uint8_t msglevel; /* KNET_LOG_* */ knet_handle_t knet_h; /* pointer to the handle generating the log */ }; /** * knet_log_set_loglevel * * @brief Set the logging level for a subsystem * * knet_h - same as above * * subsystem - same as above * * level - same as above * * knet_log_set_loglevel allows fine control of log levels by subsystem. * See also knet_handle_new for defaults. * * @return * knet_log_set_loglevel returns * 0 on success * -1 on error and errno is set. */ int knet_log_set_loglevel(knet_handle_t knet_h, uint8_t subsystem, uint8_t level); /** * knet_log_get_loglevel * * @brief Get the logging level for a subsystem * * knet_h - same as above * * subsystem - same as above * * level - same as above * * @return * knet_log_get_loglevel returns * 0 on success * -1 on error and errno is set. */ int knet_log_get_loglevel(knet_handle_t knet_h, uint8_t subsystem, uint8_t *level); #endif diff --git a/libknet/onwire.c b/libknet/onwire.c index 146baa19..75c42fd9 100644 --- a/libknet/onwire.c +++ b/libknet/onwire.c @@ -1,127 +1,268 @@ /* * Copyright (C) 2019-2020 Red Hat, Inc. All rights reserved. * * Author: Fabio M. Di Nitto * * This software licensed under LGPL-2.0+ */ #include "config.h" #include #include #include #include "crypto.h" #include "internals.h" #include "logging.h" #include "common.h" #include "transport_udp.h" #include "transport_sctp.h" /* * unencrypted packet looks like: * * | ip | protocol | knet_header | unencrypted data | * | onwire_len | * | proto_overhead | * | data_len | * | app MTU | * * encrypted packet looks like (not to scale): * * | ip | protocol | salt | crypto(knet_header | data) | crypto_data_pad | hash | * | onwire_len | * | proto_overhead | * | data_len | * | app MTU | * * knet_h->sec_block_size is >= 0 if encryption will pad the data * knet_h->sec_salt_size is >= 0 if encryption is enabled * knet_h->sec_hash_size is >= 0 if signing is enabled */ /* * this function takes in the data that we would like to send * and tells us the outgoing onwire data size with crypto and * all the headers adjustment. * calling thread needs to account for protocol overhead. */ size_t calc_data_outlen(knet_handle_t knet_h, size_t inlen) { size_t outlen = inlen, pad_len = 0; if (knet_h->sec_block_size) { /* * if the crypto mechanism requires padding, calculate the padding * and add it back to outlen because that's what the crypto layer * would do. */ pad_len = knet_h->sec_block_size - (outlen % knet_h->sec_block_size); outlen = outlen + pad_len; } return outlen + knet_h->sec_salt_size + knet_h->sec_hash_size; } /* * this function takes in the data that we would like to send * and tells us what is the real maximum data we can send * accounting for headers and crypto * calling thread needs to account for protocol overhead. */ size_t calc_max_data_outlen(knet_handle_t knet_h, size_t inlen) { size_t outlen = inlen, pad_len = 0; if (knet_h->sec_block_size) { /* * drop both salt and hash, that leaves only the crypto data and padding * we need to calculate the padding based on the real encrypted data * that includes the knet_header. */ outlen = outlen - (knet_h->sec_salt_size + knet_h->sec_hash_size); /* * if the crypto mechanism requires padding, calculate the padding * and remove it, to align the data. * NOTE: we need to remove pad_len + 1 because, based on testing, * if we send data that are already aligned to block_size, the * crypto implementations will add another block_size! * so we want to make sure that our data won't add an unnecessary * block_size that we need to remove later. */ pad_len = outlen % knet_h->sec_block_size; outlen = outlen - (pad_len + 1); /* * add both hash and salt size back, similar to padding above, * the crypto layer will add them to the outlen */ outlen = outlen + (knet_h->sec_salt_size + knet_h->sec_hash_size); } /* * drop KNET_HEADER_ALL_SIZE to provide a clean application MTU * and various crypto headers */ outlen = outlen - (KNET_HEADER_ALL_SIZE + knet_h->sec_salt_size + knet_h->sec_hash_size); return outlen; } /* * set the lowest possible value as failsafe for all links. * KNET_PMTUD_MIN_MTU_V4 < KNET_PMTUD_MIN_MTU_V6 * KNET_PMTUD_OVERHEAD_V6 > KNET_PMTUD_OVERHEAD_V4 * KNET_PMTUD_SCTP_OVERHEAD > KNET_PMTUD_UDP_OVERHEAD */ size_t calc_min_mtu(knet_handle_t knet_h) { return calc_max_data_outlen(knet_h, KNET_PMTUD_MIN_MTU_V4 - (KNET_PMTUD_OVERHEAD_V6 + KNET_PMTUD_SCTP_OVERHEAD)); } + +int knet_handle_enable_onwire_ver_notify(knet_handle_t knet_h, + void *onwire_ver_notify_fn_private_data, + void (*onwire_ver_notify_fn) ( + void *private_data, + uint8_t onwire_min_ver, + uint8_t onwire_max_ver, + uint8_t onwire_ver)) +{ + int savederrno = 0; + + if (!knet_h) { + errno = EINVAL; + return -1; + } + + savederrno = get_global_wrlock(knet_h); + if (savederrno) { + log_err(knet_h, KNET_SUB_HANDLE, "Unable to get write lock: %s", + strerror(savederrno)); + errno = savederrno; + return -1; + } + + knet_h->onwire_ver_notify_fn_private_data = onwire_ver_notify_fn_private_data; + knet_h->onwire_ver_notify_fn = onwire_ver_notify_fn; + if (knet_h->onwire_ver_notify_fn) { + log_debug(knet_h, KNET_SUB_HANDLE, "onwire_ver_notify_fn enabled"); + /* + * generate an artificial call to notify the app of what´s curently + * happening + */ + knet_h->onwire_ver_notify_fn(knet_h->onwire_ver_notify_fn_private_data, + knet_h->onwire_min_ver, + knet_h->onwire_max_ver, + knet_h->onwire_ver); + } else { + log_debug(knet_h, KNET_SUB_HANDLE, "onwire_ver_notify_fn disabled"); + } + + pthread_rwlock_unlock(&knet_h->global_rwlock); + + return 0; +} + +int knet_handle_get_onwire_ver(knet_handle_t knet_h, + knet_node_id_t host_id, + uint8_t *onwire_min_ver, + uint8_t *onwire_max_ver, + uint8_t *onwire_ver) +{ + int err = 0, savederrno = 0; + struct knet_host *host; + + if (!knet_h) { + errno = EINVAL; + return -1; + } + + if (!onwire_min_ver) { + errno = EINVAL; + return -1; + } + + if (!onwire_max_ver) { + errno = EINVAL; + return -1; + } + + if (!onwire_ver) { + errno = EINVAL; + return -1; + } + + /* + * we need a write lock here so that gathering host onwire info + * is not racy (updated by thread_rx) and we can save a mutex_lock + * to gather local node info. + */ + savederrno = get_global_wrlock(knet_h); + if (savederrno) { + log_err(knet_h, KNET_SUB_HANDLE, "Unable to get write lock: %s", + strerror(savederrno)); + errno = savederrno; + return -1; + } + + if (host_id == knet_h->host_id) { + *onwire_min_ver = knet_h->onwire_min_ver; + *onwire_max_ver = knet_h->onwire_max_ver; + *onwire_ver = knet_h->onwire_ver; + } else { + host = knet_h->host_index[host_id]; + if (!host) { + err = -1; + savederrno = EINVAL; + log_err(knet_h, KNET_SUB_HANDLE, "Unable to find host %u: %s", host_id, strerror(savederrno)); + goto out_unlock; + } + *onwire_min_ver = 0; + *onwire_max_ver = host->onwire_max_ver; + *onwire_ver = host->onwire_ver; + } + +out_unlock: + pthread_rwlock_unlock(&knet_h->global_rwlock); + errno = savederrno; + return err; +} + +int knet_handle_set_onwire_ver(knet_handle_t knet_h, + uint8_t onwire_ver) +{ + int savederrno = 0; + + if (!knet_h) { + errno = EINVAL; + return -1; + } + + if ((onwire_ver) && + ((onwire_ver < knet_h->onwire_min_ver) || + (onwire_ver > knet_h->onwire_max_ver))) { + errno = EINVAL; + return -1; + } + + savederrno = get_global_wrlock(knet_h); + if (savederrno) { + log_err(knet_h, KNET_SUB_HANDLE, "Unable to get write lock: %s", + strerror(savederrno)); + errno = savederrno; + return -1; + } + + knet_h->onwire_force_ver = onwire_ver; + + pthread_rwlock_unlock(&knet_h->global_rwlock); + + return 0; +} diff --git a/libknet/onwire.h b/libknet/onwire.h index 1040ea07..2249bfe9 100644 --- a/libknet/onwire.h +++ b/libknet/onwire.h @@ -1,135 +1,166 @@ /* * Copyright (C) 2012-2020 Red Hat, Inc. All rights reserved. * * Authors: Fabio M. Di Nitto * Federico Simoncelli * * This software licensed under LGPL-2.0+ */ #ifndef __KNET_ONWIRE_H__ #define __KNET_ONWIRE_H__ +#include + +#include "libknet.h" + /* * data structures to define network packets. * Start from knet_header at the bottom */ -#include +/* + * Plan is to support MAX_VER with MIN_VER = MAX_VER - 1 + * but for the sake of not rewriting the world later on, + * let´s make sure we can support a random range of protocol + * versions + */ -#include "libknet.h" +#define KNET_HEADER_ONWIRE_MAX_VER 0x01 /* max onwire protocol supported by this build */ +#define KNET_HEADER_ONWIRE_MIN_VER 0x01 /* min onwire protocol supported by this build */ /* - * typedef uint64_t seq_num_t; - * #define SEQ_MAX UINT64_MAX + * Packet types + * + * adding new DATA types requires the packet to contain + * data_seq_num and frag_num/frag_seq in the current data types. + * + * Changing those data types requires major surgery to thread_tx/thread_rx + * and defrag buffer allocation in knet_host_add. + * + * Also please be aware that frags buffer allocation size is not constant + * so you cannot assume each frag is 64K+. + * (see handle.c) */ -typedef uint16_t seq_num_t; + +#define KNET_HEADER_TYPE_DATA 0x00 /* pure data packet */ + +#define KNET_HEADER_TYPE_PING 0x81 /* heartbeat */ +#define KNET_HEADER_TYPE_PONG 0x82 /* reply to heartbeat */ +#define KNET_HEADER_TYPE_PMTUD 0x83 /* Used to determine Path MTU */ +#define KNET_HEADER_TYPE_PMTUD_REPLY 0x84 /* reply from remote host */ + +/* + * KNET_HEADER_TYPE_DATA + */ + +typedef uint16_t seq_num_t; /* data sequence number required to deduplicate pckts */ #define SEQ_MAX UINT16_MAX -struct knet_header_payload_data { - seq_num_t khp_data_seq_num; /* pckt seq number used to deduplicate pkcts */ +struct knet_header_payload_data_v1 { + seq_num_t khp_data_seq_num; /* pckt seq number used to deduplicate pckts */ uint8_t khp_data_compress; /* identify if user data are compressed */ uint8_t khp_data_pad1; /* make sure to have space in the header to grow features */ uint8_t khp_data_bcast; /* data destination bcast/ucast */ uint8_t khp_data_frag_num; /* number of fragments of this pckt. 1 is not fragmented */ uint8_t khp_data_frag_seq; /* as above, indicates the frag sequence number */ int8_t khp_data_channel; /* transport channel data for localsock <-> knet <-> localsock mapping */ uint8_t khp_data_userdata[0]; /* pointer to the real user data */ } __attribute__((packed)); -struct knet_header_payload_ping { - uint8_t khp_ping_link; /* source link id */ +#define khp_data_v1_seq_num kh_payload.khp_data_v1.khp_data_seq_num +#define khp_data_v1_frag_num kh_payload.khp_data_v1.khp_data_frag_num +#define khp_data_v1_frag_seq kh_payload.khp_data_v1.khp_data_frag_seq +#define khp_data_v1_userdata kh_payload.khp_data_v1.khp_data_userdata +#define khp_data_v1_bcast kh_payload.khp_data_v1.khp_data_bcast +#define khp_data_v1_channel kh_payload.khp_data_v1.khp_data_channel +#define khp_data_v1_compress kh_payload.khp_data_v1.khp_data_compress + +/* + * KNET_HEADER_TYPE_PING / KNET_HEADER_TYPE_PONG + */ + +struct knet_header_payload_ping_v1 { + uint8_t khp_ping_link; /* changing khp_ping_link requires changes to thread_rx.c + KNET_LINK_DYNIP code handling */ uint32_t khp_ping_time[4]; /* ping timestamp */ seq_num_t khp_ping_seq_num; /* transport host seq_num */ uint8_t khp_ping_timed; /* timed pinged (1) or forced by seq_num (0) */ } __attribute__((packed)); -/* taken from tracepath6 */ +#define khp_ping_v1_link kh_payload.khp_ping_v1.khp_ping_link +#define khp_ping_v1_time kh_payload.khp_ping_v1.khp_ping_time +#define khp_ping_v1_seq_num kh_payload.khp_ping_v1.khp_ping_seq_num +#define khp_ping_v1_timed kh_payload.khp_ping_v1.khp_ping_timed + +/* + * KNET_HEADER_TYPE_PMTUD / KNET_HEADER_TYPE_PMTUD_REPLY + */ + +/* + * taken from tracepath6 + */ #define KNET_PMTUD_SIZE_V4 65535 #define KNET_PMTUD_SIZE_V6 KNET_PMTUD_SIZE_V4 /* * IPv4/IPv6 header size */ #define KNET_PMTUD_OVERHEAD_V4 20 #define KNET_PMTUD_OVERHEAD_V6 40 #define KNET_PMTUD_MIN_MTU_V4 576 #define KNET_PMTUD_MIN_MTU_V6 1280 -struct knet_header_payload_pmtud { - uint8_t khp_pmtud_link; /* source link id */ +struct knet_header_payload_pmtud_v1 { + uint8_t khp_pmtud_link; /* link_id */ uint16_t khp_pmtud_size; /* size of the current packet */ uint8_t khp_pmtud_data[0]; /* pointer to empty/random data/fill buffer */ } __attribute__((packed)); +#define khp_pmtud_v1_link kh_payload.khp_pmtud_v1.khp_pmtud_link +#define khp_pmtud_v1_size kh_payload.khp_pmtud_v1.khp_pmtud_size +#define khp_pmtud_v1_data kh_payload.khp_pmtud_v1.khp_pmtud_data + +/* + * PMTUd related functions + */ + +size_t calc_data_outlen(knet_handle_t knet_h, size_t inlen); +size_t calc_max_data_outlen(knet_handle_t knet_h, size_t inlen); +size_t calc_min_mtu(knet_handle_t knet_h); + /* * union to reference possible individual payloads */ union knet_header_payload { - struct knet_header_payload_data khp_data; /* pure data packet struct */ - struct knet_header_payload_ping khp_ping; /* heartbeat packet struct */ - struct knet_header_payload_pmtud khp_pmtud; /* Path MTU discovery packet struct */ + struct knet_header_payload_data_v1 khp_data_v1; /* pure data packet struct */ + struct knet_header_payload_ping_v1 khp_ping_v1; /* heartbeat packet struct */ + struct knet_header_payload_pmtud_v1 khp_pmtud_v1; /* Path MTU discovery packet struct */ } __attribute__((packed)); /* - * starting point + * this header CANNOT change or onwire compat will break! */ -#define KNET_HEADER_VERSION 0x01 /* we currently support only one version */ - -#define KNET_HEADER_TYPE_DATA 0x00 /* pure data packet */ - -#define KNET_HEADER_TYPE_PMSK 0x80 /* packet mask */ -#define KNET_HEADER_TYPE_PING 0x81 /* heartbeat */ -#define KNET_HEADER_TYPE_PONG 0x82 /* reply to heartbeat */ -#define KNET_HEADER_TYPE_PMTUD 0x83 /* Used to determine Path MTU */ -#define KNET_HEADER_TYPE_PMTUD_REPLY 0x84 /* reply from remote host */ - struct knet_header { - uint8_t kh_version; /* pckt format/version */ + uint8_t kh_version; /* this pckt format/version */ uint8_t kh_type; /* from above defines. Tells what kind of pckt it is */ knet_node_id_t kh_node; /* host id of the source host for this pckt */ + uint8_t kh_max_ver; /* max version of the protocol supported by this node */ uint8_t kh_pad1; /* make sure to have space in the header to grow features */ - uint8_t kh_pad2; union knet_header_payload kh_payload; /* union of potential data struct based on kh_type */ } __attribute__((packed)); -/* - * commodoty defines to hide structure nesting - * (needs review and cleanup) - */ - -#define khp_data_seq_num kh_payload.khp_data.khp_data_seq_num -#define khp_data_frag_num kh_payload.khp_data.khp_data_frag_num -#define khp_data_frag_seq kh_payload.khp_data.khp_data_frag_seq -#define khp_data_userdata kh_payload.khp_data.khp_data_userdata -#define khp_data_bcast kh_payload.khp_data.khp_data_bcast -#define khp_data_channel kh_payload.khp_data.khp_data_channel -#define khp_data_compress kh_payload.khp_data.khp_data_compress - -#define khp_ping_link kh_payload.khp_ping.khp_ping_link -#define khp_ping_time kh_payload.khp_ping.khp_ping_time -#define khp_ping_seq_num kh_payload.khp_ping.khp_ping_seq_num -#define khp_ping_timed kh_payload.khp_ping.khp_ping_timed - -#define khp_pmtud_link kh_payload.khp_pmtud.khp_pmtud_link -#define khp_pmtud_size kh_payload.khp_pmtud.khp_pmtud_size -#define khp_pmtud_data kh_payload.khp_pmtud.khp_pmtud_data - /* * extra defines to avoid mingling with sizeof() too much */ #define KNET_HEADER_ALL_SIZE sizeof(struct knet_header) #define KNET_HEADER_SIZE (KNET_HEADER_ALL_SIZE - sizeof(union knet_header_payload)) -#define KNET_HEADER_PING_SIZE (KNET_HEADER_SIZE + sizeof(struct knet_header_payload_ping)) -#define KNET_HEADER_PMTUD_SIZE (KNET_HEADER_SIZE + sizeof(struct knet_header_payload_pmtud)) -#define KNET_HEADER_DATA_SIZE (KNET_HEADER_SIZE + sizeof(struct knet_header_payload_data)) - -size_t calc_data_outlen(knet_handle_t knet_h, size_t inlen); -size_t calc_max_data_outlen(knet_handle_t knet_h, size_t inlen); -size_t calc_min_mtu(knet_handle_t knet_h); +#define KNET_HEADER_PING_V1_SIZE (KNET_HEADER_SIZE + sizeof(struct knet_header_payload_ping_v1)) +#define KNET_HEADER_PMTUD_V1_SIZE (KNET_HEADER_SIZE + sizeof(struct knet_header_payload_pmtud_v1)) +#define KNET_HEADER_DATA_V1_SIZE (KNET_HEADER_SIZE + sizeof(struct knet_header_payload_data_v1)) #endif diff --git a/libknet/onwire_v1.c b/libknet/onwire_v1.c new file mode 100644 index 00000000..e716fabd --- /dev/null +++ b/libknet/onwire_v1.c @@ -0,0 +1,216 @@ +/* + * Copyright (C) 2020 Red Hat, Inc. All rights reserved. + * + * Authors: Fabio M. Di Nitto + * + * This software licensed under LGPL-2.0+ + */ + +#include "config.h" + +#include +#include +#include +#include +#include +#include + +#include "logging.h" +#include "host.h" +#include "links.h" +#include "onwire_v1.h" + +int prep_ping_v1(knet_handle_t knet_h, struct knet_link *dst_link, uint8_t onwire_ver, struct timespec clock_now, int timed, ssize_t *outlen) +{ + *outlen = KNET_HEADER_PING_V1_SIZE; + + /* preparing ping buffer */ + knet_h->pingbuf->kh_version = onwire_ver; + knet_h->pingbuf->kh_max_ver = knet_h->onwire_max_ver; + knet_h->pingbuf->kh_type = KNET_HEADER_TYPE_PING; + knet_h->pingbuf->kh_node = htons(knet_h->host_id); + knet_h->pingbuf->khp_ping_v1_link = dst_link->link_id; + knet_h->pingbuf->khp_ping_v1_timed = timed; + memmove(&knet_h->pingbuf->khp_ping_v1_time[0], &clock_now, sizeof(struct timespec)); + + if (pthread_mutex_lock(&knet_h->tx_seq_num_mutex)) { + log_debug(knet_h, KNET_SUB_HEARTBEAT, "Unable to get seq mutex lock"); + return -1; + } + knet_h->pingbuf->khp_ping_v1_seq_num = htons(knet_h->tx_seq_num); + pthread_mutex_unlock(&knet_h->tx_seq_num_mutex); + + return 0; +} + +void prep_pong_v1(knet_handle_t knet_h, struct knet_header *inbuf, ssize_t *outlen) +{ + *outlen = KNET_HEADER_PING_V1_SIZE; + inbuf->kh_type = KNET_HEADER_TYPE_PONG; + inbuf->kh_node = htons(knet_h->host_id); +} + +void process_ping_v1(knet_handle_t knet_h, struct knet_host *src_host, struct knet_link *src_link, struct knet_header *inbuf, ssize_t len) +{ + int wipe_bufs = 0; + seq_num_t recv_seq_num = ntohs(inbuf->khp_ping_v1_seq_num); + + if (!inbuf->khp_ping_v1_timed) { + /* + * we might be receiving this message from all links, but we want + * to process it only the first time + */ + if (recv_seq_num != src_host->untimed_rx_seq_num) { + /* + * cache the untimed seq num + */ + src_host->untimed_rx_seq_num = recv_seq_num; + /* + * if the host has received data in between + * untimed ping, then we don't need to wipe the bufs + */ + if (src_host->got_data) { + src_host->got_data = 0; + wipe_bufs = 0; + } else { + wipe_bufs = 1; + } + } + _seq_num_lookup(src_host, recv_seq_num, 0, wipe_bufs); + } else { + /* + * pings always arrives in bursts over all the link + * catch the first of them to cache the seq num and + * avoid duplicate processing + */ + if (recv_seq_num != src_host->timed_rx_seq_num) { + src_host->timed_rx_seq_num = recv_seq_num; + + if (recv_seq_num == 0) { + _seq_num_lookup(src_host, recv_seq_num, 0, 1); + } + } + } +} + +void process_pong_v1(knet_handle_t knet_h, struct knet_host *src_host, struct knet_link *src_link, struct knet_header *inbuf, struct timespec *recvtime) +{ + memmove(recvtime, &inbuf->khp_ping_v1_time[0], sizeof(struct timespec)); +} + +struct knet_link *get_link_from_pong_v1(knet_handle_t knet_h, struct knet_host *src_host, struct knet_header *inbuf) +{ + return &src_host->link[inbuf->khp_ping_v1_link]; +} + +void prep_pmtud_v1(knet_handle_t knet_h, struct knet_link *dst_link, uint8_t onwire_ver, size_t onwire_len) +{ + knet_h->pmtudbuf->kh_version = onwire_ver; + knet_h->pmtudbuf->kh_max_ver = knet_h->onwire_max_ver; + knet_h->pmtudbuf->kh_type = KNET_HEADER_TYPE_PMTUD; + knet_h->pmtudbuf->kh_node = htons(knet_h->host_id); + knet_h->pmtudbuf->khp_pmtud_v1_link = dst_link->link_id; + knet_h->pmtudbuf->khp_pmtud_v1_size = onwire_len; +} + +void prep_pmtud_reply_v1(knet_handle_t knet_h, struct knet_header *inbuf, ssize_t *outlen) +{ + *outlen = KNET_HEADER_PMTUD_V1_SIZE; + inbuf->kh_type = KNET_HEADER_TYPE_PMTUD_REPLY; + inbuf->kh_node = htons(knet_h->host_id); +} + +void process_pmtud_reply_v1(knet_handle_t knet_h, struct knet_link *src_link, struct knet_header *inbuf) +{ + src_link->last_recv_mtu = inbuf->khp_pmtud_v1_size; +} + +void prep_tx_bufs_v1(knet_handle_t knet_h, + struct knet_header *inbuf, unsigned char *data, size_t inlen, unsigned int temp_data_mtu, + seq_num_t tx_seq_num, int8_t channel, int bcast, int data_compressed, + int *msgs_to_send, struct iovec iov_out[PCKT_FRAG_MAX][2], int *iovcnt_out) +{ + uint8_t frag_idx = 0; + size_t frag_len = inlen; + + /* + * prepare the main header + */ + inbuf->kh_type = KNET_HEADER_TYPE_DATA; + inbuf->kh_version = 1; + inbuf->kh_max_ver = knet_h->onwire_max_ver; + inbuf->kh_node = htons(knet_h->host_id); + + /* + * prepare the data header + */ + inbuf->khp_data_v1_frag_seq = 0; + inbuf->khp_data_v1_bcast = bcast; + inbuf->khp_data_v1_frag_num = ceil((float)inlen / temp_data_mtu); + inbuf->khp_data_v1_channel = channel; + inbuf->khp_data_v1_seq_num = htons(tx_seq_num); + if (data_compressed) { + inbuf->khp_data_v1_compress = knet_h->compress_model; + } else { + inbuf->khp_data_v1_compress = 0; + } + + /* + * handle fragmentation + */ + if (inbuf->khp_data_v1_frag_num > 1) { + while (frag_idx < inbuf->khp_data_v1_frag_num) { + /* + * set the iov_base + */ + iov_out[frag_idx][0].iov_base = (void *)knet_h->send_to_links_buf[frag_idx]; + iov_out[frag_idx][0].iov_len = KNET_HEADER_DATA_V1_SIZE; + iov_out[frag_idx][1].iov_base = data + (temp_data_mtu * frag_idx); + + /* + * set the len + */ + if (frag_len > temp_data_mtu) { + iov_out[frag_idx][1].iov_len = temp_data_mtu; + } else { + iov_out[frag_idx][1].iov_len = frag_len; + } + + /* + * copy the frag info on all buffers + */ + memmove(knet_h->send_to_links_buf[frag_idx], inbuf, KNET_HEADER_DATA_V1_SIZE); + /* + * bump the frag + */ + knet_h->send_to_links_buf[frag_idx]->khp_data_v1_frag_seq = frag_idx + 1; + + frag_len = frag_len - temp_data_mtu; + frag_idx++; + } + *iovcnt_out = 2; + } else { + iov_out[frag_idx][0].iov_base = (void *)inbuf; + iov_out[frag_idx][0].iov_len = frag_len + KNET_HEADER_DATA_V1_SIZE; + *iovcnt_out = 1; + } + *msgs_to_send = inbuf->khp_data_v1_frag_num; +} + +unsigned char *get_data_v1(knet_handle_t knet_h, struct knet_header *inbuf) +{ + return inbuf->khp_data_v1_userdata; +} + +void get_data_header_info_v1(knet_handle_t knet_h, struct knet_header *inbuf, + ssize_t *header_size, int8_t *channel, + seq_num_t *seq_num, uint8_t *decompress_type, + uint8_t *frags, uint8_t *frag_seq) +{ + *header_size = KNET_HEADER_DATA_V1_SIZE; + *channel = inbuf->khp_data_v1_channel; + *seq_num = ntohs(inbuf->khp_data_v1_seq_num); + *decompress_type = inbuf->khp_data_v1_compress; + *frags = inbuf->khp_data_v1_frag_num; + *frag_seq = inbuf->khp_data_v1_frag_seq; +} diff --git a/libknet/onwire_v1.h b/libknet/onwire_v1.h new file mode 100644 index 00000000..c0ca636d --- /dev/null +++ b/libknet/onwire_v1.h @@ -0,0 +1,37 @@ +/* + * Copyright (C) 2020 Red Hat, Inc. All rights reserved. + * + * Authors: Fabio M. Di Nitto + * + * This software licensed under LGPL-2.0+ + */ + +#ifndef __KNET_ONWIRE_V1_H__ +#define __KNET_ONWIRE_V1_H__ + +#include + +#include "internals.h" + +int prep_ping_v1(knet_handle_t knet_h, struct knet_link *dst_link, uint8_t onwire_ver, struct timespec clock_now, int timed, ssize_t *outlen); +void prep_pong_v1(knet_handle_t knet_h, struct knet_header *inbuf, ssize_t *outlen); +void process_ping_v1(knet_handle_t knet_h, struct knet_host *src_host, struct knet_link *src_link, struct knet_header *inbuf, ssize_t len); +void process_pong_v1(knet_handle_t knet_h, struct knet_host *src_host, struct knet_link *src_link, struct knet_header *inbuf, struct timespec *recvtime); +struct knet_link *get_link_from_pong_v1(knet_handle_t knet_h, struct knet_host *src_host, struct knet_header *inbuf); + +void prep_pmtud_v1(knet_handle_t knet_h, struct knet_link *dst_link, uint8_t onwire_ver, size_t onwire_len); +void prep_pmtud_reply_v1(knet_handle_t knet_h, struct knet_header *inbuf, ssize_t *outlen); +void process_pmtud_reply_v1(knet_handle_t knet_h, struct knet_link *src_link, struct knet_header *inbuf); + +void prep_tx_bufs_v1(knet_handle_t knet_h, + struct knet_header *inbuf, unsigned char *data, size_t inlen, unsigned int temp_data_mtu, + seq_num_t tx_seq_num, int8_t channel, int bcast, int data_compressed, + int *msgs_to_send, struct iovec iov_out[PCKT_FRAG_MAX][2], int *iovcnt_out); + +unsigned char *get_data_v1(knet_handle_t knet_h, struct knet_header *inbuf); + +void get_data_header_info_v1(knet_handle_t knet_h, struct knet_header *inbuf, + ssize_t *header_size, int8_t *channel, + seq_num_t *seq_num, uint8_t *decompress_type, + uint8_t *frags, uint8_t *frag_seq); +#endif diff --git a/libknet/tests/Makefile.am b/libknet/tests/Makefile.am index aa948218..06faf5b9 100644 --- a/libknet/tests/Makefile.am +++ b/libknet/tests/Makefile.am @@ -1,108 +1,114 @@ # # Copyright (C) 2016-2020 Red Hat, Inc. All rights reserved. # # Authors: Fabio M. Di Nitto # # This software licensed under GPL-2.0+ # MAINTAINERCLEANFILES = Makefile.in include $(top_srcdir)/build-aux/check.mk include $(top_srcdir)/libknet/tests/api-check.mk EXTRA_DIST = \ api-test-coverage \ api-check.mk AM_CPPFLAGS = -I$(top_srcdir)/libknet AM_CFLAGS += $(PTHREAD_CFLAGS) $(libqb_CFLAGS) LIBS = $(top_builddir)/libknet/libknet.la \ $(PTHREAD_LIBS) $(dl_LIBS) noinst_HEADERS = \ test-common.h # the order of those tests is NOT random. # some functions can only be tested properly after some dependents # API have been validated upfront. check_PROGRAMS = \ $(api_checks) \ $(int_checks) \ $(fun_checks) int_checks = \ int_links_acl_ip_test \ int_timediff_test fun_checks = \ - fun_config_crypto_test + fun_config_crypto_test \ + fun_onwire_upgrade_test # checks below need to be executed manually # or with a specifi environment long_run_checks = \ fun_pmtud_crypto_test benchmarks = \ knet_bench_test noinst_PROGRAMS = \ api_knet_handle_new_limit_test \ pckt_test \ $(benchmarks) \ $(long_run_checks) \ $(check_PROGRAMS) noinst_SCRIPTS = \ api-test-coverage TESTS = $(check_PROGRAMS) if INSTALL_TESTS testsuitedir = $(TESTDIR) testsuite_PROGRAMS = $(noinst_PROGRAMS) endif check-local: check-api-test-coverage check-api-test-coverage: chmod u+x $(top_srcdir)/libknet/tests/api-test-coverage $(top_srcdir)/libknet/tests/api-test-coverage $(top_srcdir) $(top_builddir) pckt_test_SOURCES = pckt_test.c int_links_acl_ip_test_SOURCES = int_links_acl_ip.c \ ../common.c \ ../compat.c \ ../logging.c \ ../netutils.c \ ../threads_common.c \ ../onwire.c \ ../transports.c \ ../transport_common.c \ ../transport_loopback.c \ ../transport_sctp.c \ ../transport_udp.c \ ../links_acl.c \ ../links_acl_ip.c \ ../links_acl_loopback.c int_timediff_test_SOURCES = int_timediff.c knet_bench_test_SOURCES = knet_bench.c \ test-common.c \ ../common.c \ ../logging.c \ ../compat.c \ ../transport_common.c \ ../threads_common.c \ ../onwire.c fun_pmtud_crypto_test_SOURCES = fun_pmtud_crypto.c \ test-common.c \ - ../onwire.c + ../onwire.c \ + ../logging.c \ + ../threads_common.c fun_config_crypto_test_SOURCES = fun_config_crypto.c \ test-common.c + +fun_onwire_upgrade_test_SOURCES = fun_onwire_upgrade.c \ + test-common.c diff --git a/libknet/tests/api-check.mk b/libknet/tests/api-check.mk index a8cb50e9..66c6d0aa 100644 --- a/libknet/tests/api-check.mk +++ b/libknet/tests/api-check.mk @@ -1,298 +1,310 @@ # # Copyright (C) 2016-2020 Red Hat, Inc. All rights reserved. # # Authors: Fabio M. Di Nitto # # This software licensed under GPL-2.0+ # api_checks = \ api_knet_handle_new_test \ api_knet_handle_free_test \ api_knet_handle_compress_test \ api_knet_handle_setfwd_test \ api_knet_handle_enable_access_lists_test \ api_knet_handle_enable_filter_test \ api_knet_handle_enable_sock_notify_test \ api_knet_handle_add_datafd_test \ api_knet_handle_remove_datafd_test \ api_knet_handle_get_channel_test \ api_knet_handle_get_datafd_test \ api_knet_handle_get_stats_test \ api_knet_get_crypto_list_test \ api_knet_get_compress_list_test \ api_knet_handle_clear_stats_test \ api_knet_get_transport_list_test \ api_knet_get_transport_name_by_id_test \ api_knet_get_transport_id_by_name_test \ api_knet_handle_set_transport_reconnect_interval_test \ api_knet_handle_get_transport_reconnect_interval_test \ api_knet_recv_test \ api_knet_send_test \ api_knet_send_crypto_test \ api_knet_send_compress_test \ api_knet_send_sync_test \ api_knet_send_loopback_test \ api_knet_handle_pmtud_setfreq_test \ api_knet_handle_pmtud_getfreq_test \ api_knet_handle_enable_pmtud_notify_test \ api_knet_handle_pmtud_get_test \ api_knet_handle_pmtud_set_test \ api_knet_host_add_test \ api_knet_host_remove_test \ api_knet_host_set_name_test \ api_knet_host_get_name_by_host_id_test \ api_knet_host_get_id_by_host_name_test \ api_knet_host_get_host_list_test \ api_knet_host_set_policy_test \ api_knet_host_get_policy_test \ api_knet_host_get_status_test \ api_knet_host_enable_status_change_notify_test \ api_knet_log_get_subsystem_name_test \ api_knet_log_get_subsystem_id_test \ api_knet_log_get_loglevel_name_test \ api_knet_log_get_loglevel_id_test \ api_knet_log_set_loglevel_test \ api_knet_log_get_loglevel_test \ api_knet_strtoaddr_test \ api_knet_addrtostr_test \ api_knet_link_set_config_test \ api_knet_link_clear_config_test \ api_knet_link_get_config_test \ api_knet_link_set_ping_timers_test \ api_knet_link_get_ping_timers_test \ api_knet_link_set_pong_count_test \ api_knet_link_get_pong_count_test \ api_knet_link_set_priority_test \ api_knet_link_get_priority_test \ api_knet_link_set_enable_test \ api_knet_link_get_enable_test \ api_knet_link_get_link_list_test \ api_knet_link_get_status_test \ api_knet_link_enable_status_change_notify_test \ api_knet_handle_set_threads_timer_res_test \ api_knet_handle_get_threads_timer_res_test \ api_knet_link_add_acl_test \ api_knet_link_insert_acl_test \ api_knet_link_rm_acl_test \ api_knet_link_clear_acl_test \ api_knet_handle_crypto_set_config_test \ api_knet_handle_crypto_use_config_test \ - api_knet_handle_crypto_rx_clear_traffic_test + api_knet_handle_crypto_rx_clear_traffic_test \ + api_knet_handle_enable_onwire_ver_notify_test \ + api_knet_handle_get_onwire_ver_test \ + api_knet_handle_set_onwire_ver_test api_knet_handle_new_test_SOURCES = api_knet_handle_new.c \ test-common.c api_knet_handle_free_test_SOURCES = api_knet_handle_free.c \ test-common.c api_knet_handle_new_limit_test_SOURCES = api_knet_handle_new_limit.c \ test-common.c api_knet_handle_compress_test_SOURCES = api_knet_handle_compress.c \ test-common.c api_knet_handle_setfwd_test_SOURCES = api_knet_handle_setfwd.c \ test-common.c api_knet_handle_enable_access_lists_test_SOURCES = api_knet_handle_enable_access_lists.c \ test-common.c api_knet_handle_enable_filter_test_SOURCES = api_knet_handle_enable_filter.c \ test-common.c api_knet_handle_enable_sock_notify_test_SOURCES = api_knet_handle_enable_sock_notify.c \ test-common.c api_knet_handle_add_datafd_test_SOURCES = api_knet_handle_add_datafd.c \ test-common.c api_knet_handle_remove_datafd_test_SOURCES = api_knet_handle_remove_datafd.c \ test-common.c api_knet_handle_get_channel_test_SOURCES = api_knet_handle_get_channel.c \ test-common.c api_knet_handle_get_datafd_test_SOURCES = api_knet_handle_get_datafd.c \ test-common.c api_knet_handle_get_stats_test_SOURCES = api_knet_handle_get_stats.c \ test-common.c api_knet_get_crypto_list_test_SOURCES = api_knet_get_crypto_list.c \ test-common.c api_knet_get_compress_list_test_SOURCES = api_knet_get_compress_list.c \ test-common.c api_knet_handle_clear_stats_test_SOURCES = api_knet_handle_clear_stats.c \ test-common.c api_knet_get_transport_list_test_SOURCES = api_knet_get_transport_list.c \ test-common.c api_knet_get_transport_name_by_id_test_SOURCES = api_knet_get_transport_name_by_id.c \ test-common.c api_knet_get_transport_id_by_name_test_SOURCES = api_knet_get_transport_id_by_name.c \ test-common.c api_knet_handle_set_transport_reconnect_interval_test_SOURCES = api_knet_handle_set_transport_reconnect_interval.c \ test-common.c api_knet_handle_get_transport_reconnect_interval_test_SOURCES = api_knet_handle_get_transport_reconnect_interval.c \ test-common.c api_knet_recv_test_SOURCES = api_knet_recv.c \ test-common.c api_knet_send_test_SOURCES = api_knet_send.c \ test-common.c api_knet_send_compress_test_SOURCES = api_knet_send_compress.c \ test-common.c api_knet_send_crypto_test_SOURCES = api_knet_send_crypto.c \ test-common.c api_knet_send_loopback_test_SOURCES = api_knet_send_loopback.c \ test-common.c api_knet_send_sync_test_SOURCES = api_knet_send_sync.c \ test-common.c api_knet_handle_pmtud_setfreq_test_SOURCES = api_knet_handle_pmtud_setfreq.c \ test-common.c api_knet_handle_pmtud_getfreq_test_SOURCES = api_knet_handle_pmtud_getfreq.c \ test-common.c api_knet_handle_enable_pmtud_notify_test_SOURCES = api_knet_handle_enable_pmtud_notify.c \ test-common.c api_knet_handle_pmtud_get_test_SOURCES = api_knet_handle_pmtud_get.c \ test-common.c api_knet_handle_pmtud_set_test_SOURCES = api_knet_handle_pmtud_set.c \ test-common.c api_knet_host_add_test_SOURCES = api_knet_host_add.c \ test-common.c api_knet_host_remove_test_SOURCES = api_knet_host_remove.c \ test-common.c api_knet_host_set_name_test_SOURCES = api_knet_host_set_name.c \ test-common.c api_knet_host_get_name_by_host_id_test_SOURCES = api_knet_host_get_name_by_host_id.c \ test-common.c api_knet_host_get_id_by_host_name_test_SOURCES = api_knet_host_get_id_by_host_name.c \ test-common.c api_knet_host_get_host_list_test_SOURCES = api_knet_host_get_host_list.c \ test-common.c api_knet_host_set_policy_test_SOURCES = api_knet_host_set_policy.c \ test-common.c api_knet_host_get_policy_test_SOURCES = api_knet_host_get_policy.c \ test-common.c api_knet_host_get_status_test_SOURCES = api_knet_host_get_status.c \ test-common.c api_knet_host_enable_status_change_notify_test_SOURCES = api_knet_host_enable_status_change_notify.c \ test-common.c api_knet_log_get_subsystem_name_test_SOURCES = api_knet_log_get_subsystem_name.c \ test-common.c api_knet_log_get_subsystem_id_test_SOURCES = api_knet_log_get_subsystem_id.c \ test-common.c api_knet_log_get_loglevel_name_test_SOURCES = api_knet_log_get_loglevel_name.c \ test-common.c api_knet_log_get_loglevel_id_test_SOURCES = api_knet_log_get_loglevel_id.c \ test-common.c api_knet_log_set_loglevel_test_SOURCES = api_knet_log_set_loglevel.c \ test-common.c api_knet_log_get_loglevel_test_SOURCES = api_knet_log_get_loglevel.c \ test-common.c api_knet_strtoaddr_test_SOURCES = api_knet_strtoaddr.c api_knet_addrtostr_test_SOURCES = api_knet_addrtostr.c api_knet_link_set_config_test_SOURCES = api_knet_link_set_config.c \ test-common.c api_knet_link_clear_config_test_SOURCES = api_knet_link_clear_config.c \ test-common.c api_knet_link_get_config_test_SOURCES = api_knet_link_get_config.c \ test-common.c api_knet_link_set_ping_timers_test_SOURCES = api_knet_link_set_ping_timers.c \ test-common.c api_knet_link_get_ping_timers_test_SOURCES = api_knet_link_get_ping_timers.c \ test-common.c api_knet_link_set_pong_count_test_SOURCES = api_knet_link_set_pong_count.c \ test-common.c api_knet_link_get_pong_count_test_SOURCES = api_knet_link_get_pong_count.c \ test-common.c api_knet_link_set_priority_test_SOURCES = api_knet_link_set_priority.c \ test-common.c api_knet_link_get_priority_test_SOURCES = api_knet_link_get_priority.c \ test-common.c api_knet_link_set_enable_test_SOURCES = api_knet_link_set_enable.c \ test-common.c api_knet_link_get_enable_test_SOURCES = api_knet_link_get_enable.c \ test-common.c api_knet_link_get_link_list_test_SOURCES = api_knet_link_get_link_list.c \ test-common.c api_knet_link_get_status_test_SOURCES = api_knet_link_get_status.c \ test-common.c api_knet_link_enable_status_change_notify_test_SOURCES = api_knet_link_enable_status_change_notify.c \ test-common.c api_knet_handle_set_threads_timer_res_test_SOURCES = api_knet_handle_set_threads_timer_res.c \ test-common.c api_knet_handle_get_threads_timer_res_test_SOURCES = api_knet_handle_get_threads_timer_res.c \ test-common.c api_knet_link_add_acl_test_SOURCES = api_knet_link_add_acl.c \ test-common.c api_knet_link_insert_acl_test_SOURCES = api_knet_link_insert_acl.c \ test-common.c api_knet_link_rm_acl_test_SOURCES = api_knet_link_rm_acl.c \ test-common.c api_knet_link_clear_acl_test_SOURCES = api_knet_link_clear_acl.c \ test-common.c api_knet_handle_crypto_set_config_test_SOURCES = api_knet_handle_crypto_set_config.c \ test-common.c api_knet_handle_crypto_use_config_test_SOURCES = api_knet_handle_crypto_use_config.c \ test-common.c api_knet_handle_crypto_rx_clear_traffic_test_SOURCES = api_knet_handle_crypto_rx_clear_traffic.c \ test-common.c + +api_knet_handle_enable_onwire_ver_notify_test_SOURCES = api_knet_handle_enable_onwire_ver_notify.c \ + test-common.c + +api_knet_handle_get_onwire_ver_test_SOURCES = api_knet_handle_get_onwire_ver.c \ + test-common.c + +api_knet_handle_set_onwire_ver_test_SOURCES = api_knet_handle_set_onwire_ver.c \ + test-common.c diff --git a/libknet/tests/api_knet_handle_enable_onwire_ver_notify.c b/libknet/tests/api_knet_handle_enable_onwire_ver_notify.c new file mode 100644 index 00000000..52c85747 --- /dev/null +++ b/libknet/tests/api_knet_handle_enable_onwire_ver_notify.c @@ -0,0 +1,142 @@ +/* + * Copyright (C) 2020 Red Hat, Inc. All rights reserved. + * + * Authors: Fabio M. Di Nitto + * + * This software licensed under GPL-2.0+ + */ + +#include "config.h" + +#include +#include +#include +#include +#include + +#include "libknet.h" + +#include "internals.h" +#include "test-common.h" + +static int private_data; + +static void onwire_ver_notify(void *priv_data, + uint8_t onwire_min_ver, + uint8_t onwire_max_ver, + uint8_t onwire_ver) +{ + return; +} + +static void test(void) +{ + knet_handle_t knet_h; + int logfds[2]; + + printf("Test knet_handle_enable_onwire_ver_notify incorrect knet_h\n"); + + if ((!knet_handle_enable_onwire_ver_notify(NULL, NULL, onwire_ver_notify)) || (errno != EINVAL)) { + printf("knet_handle_enable_onwire_ver_notify accepted invalid knet_h or returned incorrect error: %s\n", strerror(errno)); + exit(FAIL); + } + + setup_logpipes(logfds); + + knet_h = knet_handle_start(logfds, KNET_LOG_DEBUG); + + printf("Test knet_handle_enable_onwire_ver_notify with no private_data\n"); + + if (knet_handle_enable_onwire_ver_notify(knet_h, NULL, onwire_ver_notify) < 0) { + printf("knet_handle_enable_onwire_ver_notify failed: %s\n", strerror(errno)); + knet_handle_free(knet_h); + flush_logs(logfds[0], stdout); + close_logpipes(logfds); + exit(FAIL); + } + + if (knet_h->onwire_ver_notify_fn_private_data != NULL) { + printf("knet_handle_enable_onwire_ver_notify failed to unset private_data"); + knet_handle_free(knet_h); + flush_logs(logfds[0], stdout); + close_logpipes(logfds); + exit(FAIL); + + } + + flush_logs(logfds[0], stdout); + + printf("Test knet_handle_enable_onwire_ver_notify with private_data\n"); + + if (knet_handle_enable_onwire_ver_notify(knet_h, &private_data, NULL) < 0) { + printf("knet_handle_enable_onwire_ver_notify failed: %s\n", strerror(errno)); + knet_handle_free(knet_h); + flush_logs(logfds[0], stdout); + close_logpipes(logfds); + exit(FAIL); + } + + if (knet_h->onwire_ver_notify_fn_private_data != &private_data) { + printf("knet_handle_enable_onwire_ver_notify failed to set private_data"); + knet_handle_free(knet_h); + flush_logs(logfds[0], stdout); + close_logpipes(logfds); + exit(FAIL); + + } + + flush_logs(logfds[0], stdout); + + printf("Test knet_handle_enable_onwire_ver_notify with no onwire_ver_notify fn\n"); + + if (knet_handle_enable_onwire_ver_notify(knet_h, NULL, NULL) < 0) { + printf("knet_handle_enable_onwire_ver_notify failed: %s\n", strerror(errno)); + knet_handle_free(knet_h); + flush_logs(logfds[0], stdout); + close_logpipes(logfds); + exit(FAIL); + } + + if (knet_h->onwire_ver_notify_fn != NULL) { + printf("knet_handle_enable_onwire_ver_notify failed to unset onwire_ver_notify fn"); + knet_handle_free(knet_h); + flush_logs(logfds[0], stdout); + close_logpipes(logfds); + exit(FAIL); + + } + + flush_logs(logfds[0], stdout); + + printf("Test knet_handle_enable_onwire_ver_notify with onwire_ver_notify fn\n"); + + if (knet_handle_enable_onwire_ver_notify(knet_h, NULL, onwire_ver_notify) < 0) { + printf("knet_handle_enable_onwire_ver_notify failed: %s\n", strerror(errno)); + knet_handle_free(knet_h); + flush_logs(logfds[0], stdout); + close_logpipes(logfds); + exit(FAIL); + } + + if (knet_h->onwire_ver_notify_fn != &onwire_ver_notify) { + printf("knet_handle_enable_onwire_ver_notify failed to set onwire_ver_notify fn"); + knet_handle_free(knet_h); + flush_logs(logfds[0], stdout); + close_logpipes(logfds); + exit(FAIL); + + } + + flush_logs(logfds[0], stdout); + + knet_handle_free(knet_h); + flush_logs(logfds[0], stdout); + close_logpipes(logfds); +} + +int main(int argc, char *argv[]) +{ + test(); + + return PASS; +} diff --git a/libknet/tests/api_knet_handle_get_onwire_ver.c b/libknet/tests/api_knet_handle_get_onwire_ver.c new file mode 100644 index 00000000..310e199e --- /dev/null +++ b/libknet/tests/api_knet_handle_get_onwire_ver.c @@ -0,0 +1,138 @@ +/* + * Copyright (C) 2020 Red Hat, Inc. All rights reserved. + * + * Authors: Fabio M. Di Nitto + * + * This software licensed under GPL-2.0+ + */ + +#include "config.h" + +#include +#include +#include +#include +#include + +#include "libknet.h" + +#include "internals.h" +#include "test-common.h" + +static void test(void) +{ + knet_handle_t knet_h; + int logfds[2]; + uint8_t onwire_min_ver, onwire_max_ver, onwire_ver; + + printf("Test knet_handle_get_onwire_ver incorrect knet_h\n"); + + if ((!knet_handle_get_onwire_ver(NULL, 1, &onwire_min_ver, &onwire_max_ver, &onwire_ver)) || (errno != EINVAL)) { + printf("knet_handle_get_onwire_ver accepted invalid knet_h or returned incorrect error: %s\n", strerror(errno)); + exit(FAIL); + } + + setup_logpipes(logfds); + + knet_h = knet_handle_start(logfds, KNET_LOG_DEBUG); + + printf("Test knet_handle_get_onwire_ver with invalid host_id\n"); + + if ((!knet_handle_get_onwire_ver(knet_h, 199, &onwire_min_ver, &onwire_max_ver, &onwire_ver)) || (errno != EINVAL)) { + printf("knet_handle_get_onwire_ver accepted invalid onwire_min_ver or returned incorrect error: %s\n", strerror(errno)); + knet_handle_free(knet_h); + flush_logs(logfds[0], stdout); + close_logpipes(logfds); + exit(FAIL); + } + + flush_logs(logfds[0], stdout); + printf("Test knet_handle_get_onwire_ver with invalid onwire_min_ver\n"); + + if ((!knet_handle_get_onwire_ver(knet_h, knet_h->host_id, NULL, &onwire_max_ver, &onwire_ver)) || (errno != EINVAL)) { + printf("knet_handle_get_onwire_ver accepted invalid onwire_min_ver or returned incorrect error: %s\n", strerror(errno)); + knet_handle_free(knet_h); + flush_logs(logfds[0], stdout); + close_logpipes(logfds); + exit(FAIL); + } + + flush_logs(logfds[0], stdout); + + printf("Test knet_handle_get_onwire_ver with invalid onwire_max_ver\n"); + + if ((!knet_handle_get_onwire_ver(knet_h, knet_h->host_id, &onwire_min_ver, NULL, &onwire_ver)) || (errno != EINVAL)) { + printf("knet_handle_get_onwire_ver accepted invalid onwire_max_ver or returned incorrect error: %s\n", strerror(errno)); + knet_handle_free(knet_h); + flush_logs(logfds[0], stdout); + close_logpipes(logfds); + exit(FAIL); + } + + flush_logs(logfds[0], stdout); + + printf("Test knet_handle_get_onwire_ver with invalid onwire_ver\n"); + + if ((!knet_handle_get_onwire_ver(knet_h, knet_h->host_id, &onwire_min_ver, &onwire_max_ver, NULL)) || (errno != EINVAL)) { + printf("knet_handle_get_onwire_ver accepted invalid onwire_ver or returned incorrect error: %s\n", strerror(errno)); + knet_handle_free(knet_h); + flush_logs(logfds[0], stdout); + close_logpipes(logfds); + exit(FAIL); + } + + flush_logs(logfds[0], stdout); + + printf("Test knet_handle_get_onwire_ver with valid data\n"); + + if (knet_handle_get_onwire_ver(knet_h, knet_h->host_id, &onwire_min_ver, &onwire_max_ver, &onwire_ver) < 0) { + printf("knet_handle_get_onwire_ver accepted invalid onwire_ver or returned incorrect error: %s\n", strerror(errno)); + knet_handle_free(knet_h); + flush_logs(logfds[0], stdout); + close_logpipes(logfds); + exit(FAIL); + } + + flush_logs(logfds[0], stdout); + + if (onwire_min_ver != knet_h->onwire_min_ver) { + printf("knet_handle_get_onwire_ver returned invalid onwire_min_ver\n"); + knet_handle_free(knet_h); + flush_logs(logfds[0], stdout); + close_logpipes(logfds); + exit(FAIL); + } + + flush_logs(logfds[0], stdout); + + if (onwire_max_ver != knet_h->onwire_max_ver) { + printf("knet_handle_get_onwire_ver returned invalid onwire_max_ver\n"); + knet_handle_free(knet_h); + flush_logs(logfds[0], stdout); + close_logpipes(logfds); + exit(FAIL); + } + + flush_logs(logfds[0], stdout); + + if (onwire_ver != knet_h->onwire_ver) { + printf("knet_handle_get_onwire_ver returned invalid onwire_ver\n"); + knet_handle_free(knet_h); + flush_logs(logfds[0], stdout); + close_logpipes(logfds); + exit(FAIL); + } + + flush_logs(logfds[0], stdout); + + knet_handle_free(knet_h); + flush_logs(logfds[0], stdout); + close_logpipes(logfds); +} + +int main(int argc, char *argv[]) +{ + test(); + + return PASS; +} diff --git a/libknet/tests/api_knet_handle_set_onwire_ver.c b/libknet/tests/api_knet_handle_set_onwire_ver.c new file mode 100644 index 00000000..d043284e --- /dev/null +++ b/libknet/tests/api_knet_handle_set_onwire_ver.c @@ -0,0 +1,117 @@ +/* + * Copyright (C) 2020 Red Hat, Inc. All rights reserved. + * + * Authors: Fabio M. Di Nitto + * + * This software licensed under GPL-2.0+ + */ + +#include "config.h" + +#include +#include +#include +#include +#include + +#include "libknet.h" + +#include "internals.h" +#include "test-common.h" + +static void test(void) +{ + knet_handle_t knet_h; + int logfds[2]; + + printf("Test knet_handle_set_onwire_ver incorrect knet_h\n"); + + if ((!knet_handle_set_onwire_ver(NULL, 1)) || (errno != EINVAL)) { + printf("knet_handle_set_onwire_ver accepted invalid knet_h or returned incorrect error: %s\n", strerror(errno)); + exit(FAIL); + } + + setup_logpipes(logfds); + + knet_h = knet_handle_start(logfds, KNET_LOG_DEBUG); + + knet_h->onwire_min_ver = 2; + knet_h->onwire_max_ver = 3; + + printf("Test knet_handle_set_onwire_ver with invalid onwire_ver (1)\n"); + + if ((!knet_handle_set_onwire_ver(knet_h, 1)) || (errno != EINVAL)) { + printf("knet_handle_set_onwire_ver accepted invalid onwire_ver or returned incorrect error: %s\n", strerror(errno)); + knet_handle_free(knet_h); + flush_logs(logfds[0], stdout); + close_logpipes(logfds); + exit(FAIL); + } + + flush_logs(logfds[0], stdout); + + printf("Test knet_handle_set_onwire_ver with invalid onwire_ver (4)\n"); + + if ((!knet_handle_set_onwire_ver(knet_h, 4)) || (errno != EINVAL)) { + printf("knet_handle_set_onwire_ver accepted invalid onwire_ver or returned incorrect error: %s\n", strerror(errno)); + knet_handle_free(knet_h); + flush_logs(logfds[0], stdout); + close_logpipes(logfds); + exit(FAIL); + } + + flush_logs(logfds[0], stdout); + + printf("Test knet_handle_set_onwire_ver with valid onwire_ver (2)\n"); + + if (knet_handle_set_onwire_ver(knet_h, 2) < 0) { + printf("knet_handle_set_onwire_ver did not accepted valid onwire_ver\n"); + knet_handle_free(knet_h); + flush_logs(logfds[0], stdout); + close_logpipes(logfds); + exit(FAIL); + } + + if (knet_h->onwire_force_ver != 2) { + printf("knet_handle_set_onwire_ver did not set correct onwire_ver\n"); + knet_handle_free(knet_h); + flush_logs(logfds[0], stdout); + close_logpipes(logfds); + exit(FAIL); + } + + flush_logs(logfds[0], stdout); + + printf("Test knet_handle_set_onwire_ver reset (0)\n"); + + if (knet_handle_set_onwire_ver(knet_h, 0) < 0) { + printf("knet_handle_set_onwire_ver did not accept valid onwire_ver\n"); + knet_handle_free(knet_h); + flush_logs(logfds[0], stdout); + close_logpipes(logfds); + exit(FAIL); + } + + flush_logs(logfds[0], stdout); + + if (knet_h->onwire_force_ver != 0) { + printf("knet_handle_set_onwire_ver did not set correct onwire_ver\n"); + knet_handle_free(knet_h); + flush_logs(logfds[0], stdout); + close_logpipes(logfds); + exit(FAIL); + } + + flush_logs(logfds[0], stdout); + + knet_handle_free(knet_h); + flush_logs(logfds[0], stdout); + close_logpipes(logfds); +} + +int main(int argc, char *argv[]) +{ + test(); + + return PASS; +} diff --git a/libknet/tests/fun_onwire_upgrade.c b/libknet/tests/fun_onwire_upgrade.c new file mode 100644 index 00000000..40c4f1b5 --- /dev/null +++ b/libknet/tests/fun_onwire_upgrade.c @@ -0,0 +1,373 @@ +/* + * Copyright (C) 2020 Red Hat, Inc. All rights reserved. + * + * Authors: Fabio M. Di Nitto + * + * This software licensed under GPL-2.0+ + */ + +#include "config.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "libknet.h" + +#include "compress.h" +#include "internals.h" +#include "netutils.h" +#include "test-common.h" + +#define TESTNODES 3 + +static int upgrade_onwire_max_ver(knet_handle_t knet_h, uint8_t min, uint8_t max, int seconds) +{ + if (knet_handle_disconnect_links(knet_h) < 0) { + return -1; + } + sleep(seconds); + knet_h->onwire_min_ver = min; + knet_h->onwire_max_ver = max; + if (knet_handle_reconnect_links(knet_h) < 0) { + return -1; + } + sleep(seconds); + return 0; +} + +static void onwire_ver_callback_fn(void *private_data, uint8_t onwire_min_ver, uint8_t onwire_max_ver, uint8_t onwire_ver) +{ + knet_handle_t knet_h = (knet_handle_t)private_data; + + printf("Received callback from %p: min: %u max: %u current: %u\n", knet_h, onwire_min_ver, onwire_max_ver, onwire_ver); +} + +static void test(void) +{ + knet_handle_t knet_h[TESTNODES + 1]; + int logfds[2]; + int i,j; + int seconds = 5; + + if (is_memcheck() || is_helgrind()) { + printf("Test suite is running under valgrind, adjusting wait_for_host timeout\n"); + seconds = seconds * 16; + } + + setup_logpipes(logfds); + + knet_handle_start_nodes(knet_h, TESTNODES, logfds, KNET_LOG_DEBUG); + + flush_logs(logfds[0], stdout); + + for (i = 1; i <= TESTNODES; i++) { + knet_h[i]->onwire_ver_remap = 1; + if (knet_handle_enable_onwire_ver_notify(knet_h[i], (void *)&knet_h[i], onwire_ver_callback_fn) < 0) { + printf("Failed to install onwire ver callback\n"); + knet_handle_stop_nodes(knet_h, TESTNODES); + flush_logs(logfds[0], stdout); + close_logpipes(logfds); + exit(FAIL); + } + } + + flush_logs(logfds[0], stdout); + + knet_handle_join_nodes(knet_h, TESTNODES, 1, AF_INET, KNET_TRANSPORT_UDP); + + flush_logs(logfds[0], stdout); + + for (i = 1; i <= TESTNODES; i++) { + for (j = 1; j <= TESTNODES; j++) { + if (j == i) { + continue; + } + if (knet_h[i]->host_index[j]->status.reachable != 1) { + knet_handle_stop_nodes(knet_h, TESTNODES); + flush_logs(logfds[0], stdout); + close_logpipes(logfds); + exit(FAIL); + } + flush_logs(logfds[0], stdout); + } + } + + printf("Test normal onwire upgrade from %u to %u\n", knet_h[1]->onwire_ver, knet_h[1]->onwire_ver + 1); + + for (i = 1; i <= TESTNODES; i++) { + if (upgrade_onwire_max_ver(knet_h[i], knet_h[1]->onwire_ver, knet_h[1]->onwire_ver + 1, seconds) < 0) { + knet_handle_stop_nodes(knet_h, TESTNODES); + flush_logs(logfds[0], stdout); + close_logpipes(logfds); + exit(FAIL); + } + flush_logs(logfds[0], stdout); + } + + flush_logs(logfds[0], stdout); + sleep(seconds); + flush_logs(logfds[0], stdout); + + for (i = 1; i <= TESTNODES; i++) { + printf("node %u, onwire: %u min: %u max: %u\n", i, knet_h[i]->onwire_ver, knet_h[i]->onwire_min_ver, knet_h[i]->onwire_max_ver); + for (j = 1; j <= TESTNODES; j++) { + if (j == i) { + continue; + } + if ((knet_h[i]->host_index[j]->status.reachable != 1) || (knet_h[i]->onwire_ver != knet_h[1]->onwire_max_ver)) { + knet_handle_stop_nodes(knet_h, TESTNODES); + flush_logs(logfds[0], stdout); + close_logpipes(logfds); + exit(FAIL); + } + } + } + + flush_logs(logfds[0], stdout); + sleep(seconds); + flush_logs(logfds[0], stdout); + + printf("Test onwire upgrade from %u to %u (all but one node)\n", knet_h[1]->onwire_ver, knet_h[1]->onwire_ver + 1); + + for (i = 1; i < TESTNODES; i++) { + if (upgrade_onwire_max_ver(knet_h[i], knet_h[i]->onwire_ver, knet_h[i]->onwire_ver + 1, seconds) < 0) { + knet_handle_stop_nodes(knet_h, TESTNODES); + flush_logs(logfds[0], stdout); + close_logpipes(logfds); + exit(FAIL); + } + flush_logs(logfds[0], stdout); + } + + flush_logs(logfds[0], stdout); + sleep(seconds); + flush_logs(logfds[0], stdout); + + for (i = 1; i <= TESTNODES; i++) { + printf("node %u, onwire: %u min: %u max: %u\n", i, knet_h[i]->onwire_ver, knet_h[i]->onwire_min_ver, knet_h[i]->onwire_max_ver); + for (j = 1; j <= TESTNODES; j++) { + if (j == i) { + continue; + } + if ((knet_h[i]->host_index[j]->status.reachable != 1) || (knet_h[i]->onwire_ver == knet_h[1]->onwire_max_ver)) { + knet_handle_stop_nodes(knet_h, TESTNODES); + flush_logs(logfds[0], stdout); + close_logpipes(logfds); + exit(FAIL); + } + } + } + + flush_logs(logfds[0], stdout); + sleep(seconds); + flush_logs(logfds[0], stdout); + + printf("Test onwire upgrade from %u to %u (all but one node - phase 2, node should be kicked out and remaining nodes should upgrade)\n", knet_h[1]->onwire_max_ver, knet_h[1]->onwire_max_ver + 1); + + for (i = 1; i < TESTNODES; i++) { + if (upgrade_onwire_max_ver(knet_h[i], knet_h[i]->onwire_max_ver, knet_h[i]->onwire_max_ver + 1, seconds) < 0) { + knet_handle_stop_nodes(knet_h, TESTNODES); + flush_logs(logfds[0], stdout); + close_logpipes(logfds); + exit(FAIL); + } + flush_logs(logfds[0], stdout); + } + + flush_logs(logfds[0], stdout); + sleep(seconds); + flush_logs(logfds[0], stdout); + + for (i = 1; i <= TESTNODES; i++) { + printf("node %u, onwire: %u min: %u max: %u\n", i, knet_h[i]->onwire_ver, knet_h[i]->onwire_min_ver, knet_h[i]->onwire_max_ver); + for (j = 1; j <= TESTNODES; j++) { + if (j == i) { + continue; + } + + if (i == TESTNODES) { + /* + * highset node has been kicked out and should not + * be able to reach any other node + */ + if (knet_h[i]->host_index[j]->status.reachable != 0) { + knet_handle_stop_nodes(knet_h, TESTNODES); + flush_logs(logfds[0], stdout); + close_logpipes(logfds); + exit(FAIL); + } + } else { + /* + * all other nodes should detect the highest node unreachable + * and all the remaining nodes reachable + */ + if (j == TESTNODES) { + if (knet_h[i]->host_index[j]->status.reachable != 0) { + knet_handle_stop_nodes(knet_h, TESTNODES); + flush_logs(logfds[0], stdout); + close_logpipes(logfds); + exit(FAIL); + } + } else { + if (knet_h[i]->host_index[j]->status.reachable != 1) { + knet_handle_stop_nodes(knet_h, TESTNODES); + flush_logs(logfds[0], stdout); + close_logpipes(logfds); + exit(FAIL); + } + } + } + } + } + + flush_logs(logfds[0], stdout); + sleep(seconds); + flush_logs(logfds[0], stdout); + + /* + * CHANGE THIS TEST if we decide to support downgrades + */ + printf("Testing node rejoining one version lower (cluster should reject the node)\n"); + + if (upgrade_onwire_max_ver(knet_h[TESTNODES], knet_h[1]->onwire_min_ver - 1, knet_h[1]->onwire_max_ver - 1, seconds) < 0) { + knet_handle_stop_nodes(knet_h, TESTNODES); + flush_logs(logfds[0], stdout); + close_logpipes(logfds); + exit(FAIL); + } + + /* + * need more time here for membership to settle + */ + flush_logs(logfds[0], stdout); + sleep(seconds); + flush_logs(logfds[0], stdout); + sleep(seconds); + flush_logs(logfds[0], stdout); + + for (i = 1; i <= TESTNODES; i++) { + printf("node %u, onwire: %u min: %u max: %u\n", i, knet_h[i]->onwire_ver, knet_h[i]->onwire_min_ver, knet_h[i]->onwire_max_ver); + for (j = 1; j <= TESTNODES; j++) { + if (j == i) { + continue; + } + + if (i == TESTNODES) { + /* + * highset node has been kicked out and should not + * be able to reach any other node + */ + if (knet_h[i]->host_index[j]->status.reachable != 0) { + knet_handle_stop_nodes(knet_h, TESTNODES); + flush_logs(logfds[0], stdout); + close_logpipes(logfds); + exit(FAIL); + } + } else { + /* + * all other nodes should detect the highest node unreachable + * and all the remaining nodes reachable + */ + if (j == TESTNODES) { + if (knet_h[i]->host_index[j]->status.reachable != 0) { + knet_handle_stop_nodes(knet_h, TESTNODES); + flush_logs(logfds[0], stdout); + close_logpipes(logfds); + exit(FAIL); + } + } else { + if (knet_h[i]->host_index[j]->status.reachable != 1) { + knet_handle_stop_nodes(knet_h, TESTNODES); + flush_logs(logfds[0], stdout); + close_logpipes(logfds); + exit(FAIL); + } + } + } + } + } + + printf("Testing node rejoining with proper version (cluster should reform)\n"); + + if (upgrade_onwire_max_ver(knet_h[TESTNODES], knet_h[1]->onwire_min_ver, knet_h[1]->onwire_max_ver, seconds) < 0) { + knet_handle_stop_nodes(knet_h, TESTNODES); + flush_logs(logfds[0], stdout); + close_logpipes(logfds); + exit(FAIL); + } + + /* + * need more time here for membership to settle + */ + flush_logs(logfds[0], stdout); + sleep(seconds); + flush_logs(logfds[0], stdout); + sleep(seconds); + flush_logs(logfds[0], stdout); + + for (i = 1; i <= TESTNODES; i++) { + printf("node %u, onwire: %u min: %u max: %u\n", i, knet_h[i]->onwire_ver, knet_h[i]->onwire_min_ver, knet_h[i]->onwire_max_ver); + for (j = 1; j <= TESTNODES; j++) { + if (j == i) { + continue; + } + if ((knet_h[i]->host_index[j]->status.reachable != 1) || (knet_h[i]->onwire_ver != knet_h[1]->onwire_max_ver)) { + knet_handle_stop_nodes(knet_h, TESTNODES); + flush_logs(logfds[0], stdout); + close_logpipes(logfds); + exit(FAIL); + } + } + } + + printf("Testing node force onwire version\n"); + + for (i = 1; i <= TESTNODES; i++) { + if (knet_handle_set_onwire_ver(knet_h[i], knet_h[i]->onwire_min_ver) < 0) { + knet_handle_stop_nodes(knet_h, TESTNODES); + flush_logs(logfds[0], stdout); + close_logpipes(logfds); + exit(FAIL); + } + } + + /* + * need more time here for membership to settle + */ + flush_logs(logfds[0], stdout); + sleep(seconds); + flush_logs(logfds[0], stdout); + sleep(seconds); + flush_logs(logfds[0], stdout); + + for (i = 1; i <= TESTNODES; i++) { + printf("node %u, onwire: %u min: %u max: %u\n", i, knet_h[i]->onwire_ver, knet_h[i]->onwire_min_ver, knet_h[i]->onwire_max_ver); + for (j = 1; j <= TESTNODES; j++) { + if (j == i) { + continue; + } + if ((knet_h[i]->host_index[j]->status.reachable != 1) || (knet_h[i]->onwire_ver != knet_h[1]->onwire_min_ver)) { + knet_handle_stop_nodes(knet_h, TESTNODES); + flush_logs(logfds[0], stdout); + close_logpipes(logfds); + exit(FAIL); + } + } + } + + flush_logs(logfds[0], stdout); + close_logpipes(logfds); + knet_handle_stop_nodes(knet_h, TESTNODES); +} + +int main(int argc, char *argv[]) +{ + test(); + + return PASS; +} diff --git a/libknet/tests/pckt_test.c b/libknet/tests/pckt_test.c index 30798f3c..35e73c37 100644 --- a/libknet/tests/pckt_test.c +++ b/libknet/tests/pckt_test.c @@ -1,23 +1,23 @@ /* * Copyright (C) 2015-2020 Red Hat, Inc. All rights reserved. * * Author: Fabio M. Di Nitto * * This software licensed under GPL-2.0+ */ #include #include "onwire.h" int main(void) { printf("\nKronosnet network header size printout:\n\n"); printf("KNET_HEADER_ALL_SIZE: %zu\n", KNET_HEADER_ALL_SIZE); printf("KNET_HEADER_SIZE: %zu\n", KNET_HEADER_SIZE); - printf("KNET_HEADER_PING_SIZE: %zu (%zu)\n", KNET_HEADER_PING_SIZE, sizeof(struct knet_header_payload_ping)); - printf("KNET_HEADER_PMTUD_SIZE: %zu (%zu)\n", KNET_HEADER_PMTUD_SIZE, sizeof(struct knet_header_payload_pmtud)); - printf("KNET_HEADER_DATA_SIZE: %zu (%zu)\n", KNET_HEADER_DATA_SIZE, sizeof(struct knet_header_payload_data)); + printf("KNET_HEADER_PING_V1_SIZE: %zu (%zu)\n", KNET_HEADER_PING_V1_SIZE, sizeof(struct knet_header_payload_ping_v1)); + printf("KNET_HEADER_PMTUD_V1_SIZE: %zu (%zu)\n", KNET_HEADER_PMTUD_V1_SIZE, sizeof(struct knet_header_payload_pmtud_v1)); + printf("KNET_HEADER_DATA_V1_SIZE: %zu (%zu)\n", KNET_HEADER_DATA_V1_SIZE, sizeof(struct knet_header_payload_data_v1)); return 0; } diff --git a/libknet/tests/test-common.c b/libknet/tests/test-common.c index c379b87a..4ac74629 100644 --- a/libknet/tests/test-common.c +++ b/libknet/tests/test-common.c @@ -1,705 +1,785 @@ /* * Copyright (C) 2016-2020 Red Hat, Inc. All rights reserved. * * Author: Fabio M. Di Nitto * * This software licensed under GPL-2.0+ */ #include "config.h" #include #include #include #include #include #include #include #include #include #include #include "libknet.h" #include "test-common.h" static pthread_mutex_t log_mutex = PTHREAD_MUTEX_INITIALIZER; static int log_init = 0; static pthread_mutex_t log_thread_mutex = PTHREAD_MUTEX_INITIALIZER; static pthread_t log_thread; static int log_thread_init = 0; static int log_fds[2]; struct log_thread_data { int logfd; FILE *std; }; static struct log_thread_data data; static int _read_pipe(int fd, char **file, size_t *length) { char buf[4096]; int n; int done = 0; *file = NULL; *length = 0; memset(buf, 0, sizeof(buf)); while (!done) { n = read(fd, buf, sizeof(buf)); if (n < 0) { if (errno == EINTR) continue; if (*file) free(*file); return n; } if (n == 0 && (!*length)) return 0; if (n == 0) done = 1; if (*file) *file = realloc(*file, (*length) + n + done); else *file = malloc(n + done); if (!*file) return -1; memmove((*file) + (*length), buf, n); *length += (done + n); } /* Null terminator */ (*file)[(*length) - 1] = 0; return 0; } int execute_shell(const char *command, char **error_string) { pid_t pid; int status, err = 0; int fd[2]; size_t size = 0; if ((command == NULL) || (!error_string)) { errno = EINVAL; return FAIL; } *error_string = NULL; err = pipe(fd); if (err) goto out_clean; pid = fork(); if (pid < 0) { err = pid; goto out_clean; } if (pid) { /* parent */ close(fd[1]); err = _read_pipe(fd[0], error_string, &size); if (err) goto out_clean0; waitpid(pid, &status, 0); if (!WIFEXITED(status)) { err = -1; goto out_clean0; } if (WIFEXITED(status) && WEXITSTATUS(status) != 0) { err = WEXITSTATUS(status); goto out_clean0; } goto out_clean0; } else { /* child */ close(0); close(1); close(2); close(fd[0]); dup2(fd[1], 1); dup2(fd[1], 2); close(fd[1]); execlp("/bin/sh", "/bin/sh", "-c", command, NULL); exit(FAIL); } out_clean: close(fd[1]); out_clean0: close(fd[0]); return err; } int is_memcheck(void) { char *val; val = getenv("KNETMEMCHECK"); if (val) { if (!strncmp(val, "yes", 3)) { return 1; } } return 0; } int is_helgrind(void) { char *val; val = getenv("KNETHELGRIND"); if (val) { if (!strncmp(val, "yes", 3)) { return 1; } } return 0; } void set_scheduler(int policy) { struct sched_param sched_param; int err; err = sched_get_priority_max(policy); if (err < 0) { printf("Could not get maximum scheduler priority\n"); exit(FAIL); } sched_param.sched_priority = err; err = sched_setscheduler(0, policy, &sched_param); if (err < 0) { printf("Could not set priority\n"); exit(FAIL); } return; } int setup_logpipes(int *logfds) { if (pipe2(logfds, O_CLOEXEC | O_NONBLOCK) < 0) { printf("Unable to setup logging pipe\n"); exit(FAIL); } return PASS; } void close_logpipes(int *logfds) { close(logfds[0]); logfds[0] = 0; close(logfds[1]); logfds[1] = 0; } void flush_logs(int logfd, FILE *std) { struct knet_log_msg msg; int len; while (1) { len = read(logfd, &msg, sizeof(msg)); if (len != sizeof(msg)) { /* * clear errno to avoid incorrect propagation */ errno = 0; return; } if (!msg.knet_h) { /* * this is harsh but this function is void * and it is used also inside log_thread. * this is the easiest to get out with an error */ fprintf(std, "NO HANDLE INFO IN LOG MSG!!\n"); abort(); } msg.msg[sizeof(msg.msg) - 1] = 0; fprintf(std, "[knet: %p]: [%s] %s: %.*s\n", msg.knet_h, knet_log_get_loglevel_name(msg.msglevel), knet_log_get_subsystem_name(msg.subsystem), KNET_MAX_LOG_MSG_SIZE, msg.msg); } } static void *_logthread(void *args) { while (1) { int num; struct timeval tv = { 60, 0 }; fd_set rfds; FD_ZERO(&rfds); FD_SET(data.logfd, &rfds); num = select(FD_SETSIZE, &rfds, NULL, NULL, &tv); if (num < 0) { fprintf(data.std, "Unable select over logfd!\nHALTING LOGTHREAD!\n"); return NULL; } if (num == 0) { fprintf(data.std, "[knet]: No logs in the last 60 seconds\n"); continue; } if (FD_ISSET(data.logfd, &rfds)) { flush_logs(data.logfd, data.std); } } } int start_logthread(int logfd, FILE *std) { int savederrno = 0; savederrno = pthread_mutex_lock(&log_thread_mutex); if (savederrno) { printf("Unable to get log_thread mutex lock\n"); return -1; } if (!log_thread_init) { data.logfd = logfd; data.std = std; savederrno = pthread_create(&log_thread, 0, _logthread, NULL); if (savederrno) { printf("Unable to start logging thread: %s\n", strerror(savederrno)); pthread_mutex_unlock(&log_thread_mutex); return -1; } log_thread_init = 1; } pthread_mutex_unlock(&log_thread_mutex); return 0; } int stop_logthread(void) { int savederrno = 0; void *retval; savederrno = pthread_mutex_lock(&log_thread_mutex); if (savederrno) { printf("Unable to get log_thread mutex lock\n"); return -1; } if (log_thread_init) { pthread_cancel(log_thread); pthread_join(log_thread, &retval); log_thread_init = 0; } pthread_mutex_unlock(&log_thread_mutex); return 0; } static void stop_logging(void) { stop_logthread(); flush_logs(log_fds[0], stdout); close_logpipes(log_fds); } int start_logging(FILE *std) { int savederrno = 0; savederrno = pthread_mutex_lock(&log_mutex); if (savederrno) { printf("Unable to get log_mutex lock\n"); return -1; } if (!log_init) { setup_logpipes(log_fds); if (atexit(&stop_logging) != 0) { printf("Unable to register atexit handler to stop logging: %s\n", strerror(errno)); exit(FAIL); } if (start_logthread(log_fds[0], std) < 0) { exit(FAIL); } log_init = 1; } pthread_mutex_unlock(&log_mutex); return log_fds[1]; } knet_handle_t knet_handle_start(int logfds[2], uint8_t log_level) { knet_handle_t knet_h = knet_handle_new(1, logfds[1], log_level, 0); if (knet_h) { printf("knet_handle_new at %p\n", knet_h); return knet_h; } else { printf("knet_handle_new failed: %s\n", strerror(errno)); flush_logs(logfds[0], stdout); close_logpipes(logfds); exit(FAIL); } } +int knet_handle_reconnect_links(knet_handle_t knet_h) +{ + size_t i, j; + knet_node_id_t host_ids[KNET_MAX_HOST]; + uint8_t link_ids[KNET_MAX_LINK]; + size_t host_ids_entries = 0, link_ids_entries = 0; + unsigned int enabled; + + if (!knet_h) { + errno = EINVAL; + return -1; + } + + if (knet_host_get_host_list(knet_h, host_ids, &host_ids_entries) < 0) { + printf("knet_host_get_host_list failed: %s\n", strerror(errno)); + return -1; + } + + for (i = 0; i < host_ids_entries; i++) { + if (knet_link_get_link_list(knet_h, host_ids[i], link_ids, &link_ids_entries)) { + printf("knet_link_get_link_list failed: %s\n", strerror(errno)); + return -1; + } + for (j = 0; j < link_ids_entries; j++) { + if (knet_link_get_enable(knet_h, host_ids[i], link_ids[j], &enabled)) { + printf("knet_link_get_enable failed: %s\n", strerror(errno)); + return -1; + } + if (!enabled) { + if (knet_link_set_enable(knet_h, host_ids[i], j, 1)) { + printf("knet_link_set_enable failed: %s\n", strerror(errno)); + return -1; + } + } + } + } + + return 0; +} + +int knet_handle_disconnect_links(knet_handle_t knet_h) +{ + size_t i, j; + knet_node_id_t host_ids[KNET_MAX_HOST]; + uint8_t link_ids[KNET_MAX_LINK]; + size_t host_ids_entries = 0, link_ids_entries = 0; + unsigned int enabled; + + if (!knet_h) { + errno = EINVAL; + return -1; + } + + if (knet_host_get_host_list(knet_h, host_ids, &host_ids_entries) < 0) { + printf("knet_host_get_host_list failed: %s\n", strerror(errno)); + return -1; + } + + for (i = 0; i < host_ids_entries; i++) { + if (knet_link_get_link_list(knet_h, host_ids[i], link_ids, &link_ids_entries)) { + printf("knet_link_get_link_list failed: %s\n", strerror(errno)); + return -1; + } + for (j = 0; j < link_ids_entries; j++) { + if (knet_link_get_enable(knet_h, host_ids[i], link_ids[j], &enabled)) { + printf("knet_link_get_enable failed: %s\n", strerror(errno)); + return -1; + } + if (enabled) { + if (knet_link_set_enable(knet_h, host_ids[i], j, 0)) { + printf("knet_link_set_enable failed: %s\n", strerror(errno)); + return -1; + } + } + } + } + + return 0; +} + int knet_handle_stop(knet_handle_t knet_h) { size_t i, j; knet_node_id_t host_ids[KNET_MAX_HOST]; uint8_t link_ids[KNET_MAX_LINK]; size_t host_ids_entries = 0, link_ids_entries = 0; unsigned int enabled; if (!knet_h) { errno = EINVAL; return -1; } if (knet_handle_setfwd(knet_h, 0) < 0) { printf("knet_handle_setfwd failed: %s\n", strerror(errno)); return -1; } if (knet_host_get_host_list(knet_h, host_ids, &host_ids_entries) < 0) { printf("knet_host_get_host_list failed: %s\n", strerror(errno)); return -1; } for (i = 0; i < host_ids_entries; i++) { if (knet_link_get_link_list(knet_h, host_ids[i], link_ids, &link_ids_entries)) { printf("knet_link_get_link_list failed: %s\n", strerror(errno)); return -1; } for (j = 0; j < link_ids_entries; j++) { if (knet_link_get_enable(knet_h, host_ids[i], link_ids[j], &enabled)) { printf("knet_link_get_enable failed: %s\n", strerror(errno)); return -1; } if (enabled) { if (knet_link_set_enable(knet_h, host_ids[i], j, 0)) { printf("knet_link_set_enable failed: %s\n", strerror(errno)); return -1; } } printf("clearing config for: %p host: %u link: %zu\n", knet_h, host_ids[i], j); knet_link_clear_config(knet_h, host_ids[i], j); } if (knet_host_remove(knet_h, host_ids[i]) < 0) { printf("knet_host_remove failed: %s\n", strerror(errno)); return -1; } } if (knet_handle_free(knet_h)) { printf("knet_handle_free failed: %s\n", strerror(errno)); return -1; } return 0; } static int _make_local_sockaddr(struct sockaddr_storage *lo, int offset, int family) { in_port_t port; char portstr[32]; if (offset < 0) { /* * api_knet_link_set_config needs to access the API directly, but * it does not send any traffic, so it´s safe to ask the kernel * for a random port. */ port = 0; } else { /* Use the pid if we can. but makes sure its in a sensible range */ port = (getpid() + offset) % (65536-1024) + 1024; } sprintf(portstr, "%u", port); memset(lo, 0, sizeof(struct sockaddr_storage)); printf("Using port %u\n", port); if (family == AF_INET6) { return knet_strtoaddr("::1", portstr, lo, sizeof(struct sockaddr_storage)); } return knet_strtoaddr("127.0.0.1", portstr, lo, sizeof(struct sockaddr_storage)); } int make_local_sockaddr(struct sockaddr_storage *lo, int offset) { return _make_local_sockaddr(lo, offset, AF_INET); } int make_local_sockaddr6(struct sockaddr_storage *lo, int offset) { return _make_local_sockaddr(lo, offset, AF_INET6); } int _knet_link_set_config(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id, uint8_t transport, uint64_t flags, int family, int dynamic, struct sockaddr_storage *lo) { int err = 0, savederrno = 0; uint32_t port; char portstr[32]; for (port = 1025; port < 65536; port++) { sprintf(portstr, "%u", port); memset(lo, 0, sizeof(struct sockaddr_storage)); if (family == AF_INET6) { err = knet_strtoaddr("::1", portstr, lo, sizeof(struct sockaddr_storage)); } else { err = knet_strtoaddr("127.0.0.1", portstr, lo, sizeof(struct sockaddr_storage)); } if (err < 0) { printf("Unable to convert loopback to sockaddr: %s\n", strerror(errno)); goto out; } errno = 0; if (dynamic) { err = knet_link_set_config(knet_h, host_id, link_id, transport, lo, NULL, flags); } else { err = knet_link_set_config(knet_h, host_id, link_id, transport, lo, lo, flags); } savederrno = errno; if ((err < 0) && (savederrno != EADDRINUSE)) { printf("Unable to configure link: %s\n", strerror(savederrno)); goto out; } if (!err) { printf("Using port %u\n", port); goto out; } } if (err) { printf("No more ports available\n"); } out: errno = savederrno; return err; } void test_sleep(knet_handle_t knet_h, int seconds) { if (is_memcheck() || is_helgrind()) { printf("Test suite is running under valgrind, adjusting sleep timers\n"); seconds = seconds * 16; } sleep(seconds); } int wait_for_host(knet_handle_t knet_h, uint16_t host_id, int seconds, int logfd, FILE *std) { int i = 0; if (is_memcheck() || is_helgrind()) { printf("Test suite is running under valgrind, adjusting wait_for_host timeout\n"); seconds = seconds * 16; } while (i < seconds) { flush_logs(logfd, std); if (knet_h->host_index[host_id]->status.reachable == 1) { printf("Waiting for host to settle\n"); test_sleep(knet_h, 1); return 0; } printf("waiting host %u to be reachable for %d more seconds\n", host_id, seconds - i); sleep(1); i++; } return -1; } int wait_for_packet(knet_handle_t knet_h, int seconds, int datafd, int logfd, FILE *std) { fd_set rfds; struct timeval tv; int err = 0, i = 0; if (is_memcheck() || is_helgrind()) { printf("Test suite is running under valgrind, adjusting wait_for_packet timeout\n"); seconds = seconds * 16; } try_again: FD_ZERO(&rfds); FD_SET(datafd, &rfds); tv.tv_sec = 1; tv.tv_usec = 0; err = select(datafd+1, &rfds, NULL, NULL, &tv); /* * on slow arches the first call to select can return 0. * pick an arbitrary 10 times loop (multiplied by waiting seconds) * before failing. */ if ((!err) && (i < seconds)) { flush_logs(logfd, std); i++; goto try_again; } if ((err > 0) && (FD_ISSET(datafd, &rfds))) { return 0; } errno = ETIMEDOUT; return -1; } /* * functional tests helpers */ void knet_handle_start_nodes(knet_handle_t knet_h[], uint8_t numnodes, int logfds[2], uint8_t log_level) { uint8_t i; for (i = 1; i <= numnodes; i++) { knet_h[i] = knet_handle_new(i, logfds[1], log_level, 0); if (!knet_h[i]) { printf("failed to create handle: %s\n", strerror(errno)); break; } else { printf("knet_h[%u] at %p\n", i, knet_h[i]); } } if (i < numnodes) { knet_handle_stop_nodes(knet_h, i); exit(FAIL); } return; } void knet_handle_stop_nodes(knet_handle_t knet_h[], uint8_t numnodes) { uint8_t i; for (i = 1; i <= numnodes; i++) { printf("stopping handle %u at %p\n", i, knet_h[i]); knet_handle_stop(knet_h[i]); } return; } void knet_handle_join_nodes(knet_handle_t knet_h[], uint8_t numnodes, uint8_t numlinks, int family, uint8_t transport) { uint8_t i, x, j; struct sockaddr_storage src, dst; for (i = 1; i <= numnodes; i++) { for (j = 1; j <= numnodes; j++) { /* * don´t connect to itself */ if (j == i) { continue; } printf("host %u adding host: %u\n", i, j); if (knet_host_add(knet_h[i], j) < 0) { printf("Unable to add host: %s\n", strerror(errno)); knet_handle_stop_nodes(knet_h, numnodes); exit(FAIL); } for (x = 0; x < numlinks; x++) { if (family == AF_INET6) { if (make_local_sockaddr6(&src, i + x) < 0) { printf("Unable to convert src to sockaddr: %s\n", strerror(errno)); knet_handle_stop_nodes(knet_h, numnodes); exit(FAIL); } if (make_local_sockaddr6(&dst, j + x) < 0) { printf("Unable to convert dst to sockaddr: %s\n", strerror(errno)); knet_handle_stop_nodes(knet_h, numnodes); exit(FAIL); } } else { if (make_local_sockaddr(&src, i + x) < 0) { printf("Unable to convert src to sockaddr: %s\n", strerror(errno)); knet_handle_stop_nodes(knet_h, numnodes); exit(FAIL); } if (make_local_sockaddr(&dst, j + x) < 0) { printf("Unable to convert dst to sockaddr: %s\n", strerror(errno)); knet_handle_stop_nodes(knet_h, numnodes); exit(FAIL); } } printf("joining node %u with node %u via link %u src offset: %u dst offset: %u\n", i, j, x, i+x, j+x); if (knet_link_set_config(knet_h[i], j, x, transport, &src, &dst, 0) < 0) { printf("unable to configure link: %s\n", strerror(errno)); knet_handle_stop_nodes(knet_h, numnodes); exit(FAIL); } if (knet_link_set_enable(knet_h[i], j, x, 1) < 0) { printf("unable to enable link: %s\n", strerror(errno)); knet_handle_stop_nodes(knet_h, numnodes); exit(FAIL); } } } } for (i = 1; i <= numnodes; i++) { for (j = 1; j <= numnodes; j++) { /* * don´t wait for self */ if (j == i) { continue; } if (wait_for_host(knet_h[i], j, (10 * numnodes) , knet_h[i]->logfd, stdout) < 0) { printf("Cannot connect node %u to node %u: %s\n", i, j, strerror(errno)); knet_handle_stop_nodes(knet_h, numnodes); exit(FAIL); } } } return; } diff --git a/libknet/tests/test-common.h b/libknet/tests/test-common.h index cc31e62c..41ac37ae 100644 --- a/libknet/tests/test-common.h +++ b/libknet/tests/test-common.h @@ -1,91 +1,93 @@ /* * Copyright (C) 2016-2020 Red Hat, Inc. All rights reserved. * * Authors: Fabio M. Di Nitto * * This software licensed under GPL-2.0+ */ #ifndef __KNET_TEST_COMMON_H__ #define __KNET_TEST_COMMON_H__ #include "internals.h" #include /* * error codes from automake test-driver */ #define PASS 0 #define SKIP 77 #define ERROR 99 #define FAIL -1 /* For *BSD compatibility */ #ifndef s6_addr16 #define s6_addr8 __u6_addr.__u6_addr8 #define s6_addr16 __u6_addr.__u6_addr16 #define s6_addr32 __u6_addr.__u6_addr32 #endif /* * common facilities */ int execute_shell(const char *command, char **error_string); int is_memcheck(void); int is_helgrind(void); void set_scheduler(int policy); knet_handle_t knet_handle_start(int logfds[2], uint8_t log_level); /* * consider moving this one as official API */ int knet_handle_stop(knet_handle_t knet_h); /* * knet_link_set_config wrapper required to find a free port */ int _knet_link_set_config(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id, uint8_t transport, uint64_t flags, int family, int dynamic, struct sockaddr_storage *lo); /* * functional test helpers */ void knet_handle_start_nodes(knet_handle_t knet_h[], uint8_t numnodes, int logfds[2], uint8_t log_level); void knet_handle_stop_nodes(knet_handle_t knet_h[], uint8_t numnodes); void knet_handle_join_nodes(knet_handle_t knet_h[], uint8_t numnodes, uint8_t numlinks, int family, uint8_t transport); +int knet_handle_disconnect_links(knet_handle_t knet_h); +int knet_handle_reconnect_links(knet_handle_t knet_h); /* * high level logging function. * automatically setup logpipes and start/stop logging thread. * * start_logging exit(FAIL) on error or fd to pass to knet_handle_new * and it will install an atexit handle to close logging properly * * WARNING: DO NOT use start_logging for api_ or int_ testing. * while start_logging would work just fine, the output * of the logs is more complex to read because of the way * the thread would interleave the output of printf from api_/int_ testing * with knet logs. Functionally speaking you get the exact same logs, * but a lot harder to read due to the thread latency in printing logs. */ int start_logging(FILE *std); int setup_logpipes(int *logfds); void close_logpipes(int *logfds); void flush_logs(int logfd, FILE *std); int start_logthread(int logfd, FILE *std); int stop_logthread(void); int make_local_sockaddr(struct sockaddr_storage *lo, int offset); int make_local_sockaddr6(struct sockaddr_storage *lo, int offset); int wait_for_host(knet_handle_t knet_h, uint16_t host_id, int seconds, int logfd, FILE *std); int wait_for_packet(knet_handle_t knet_h, int seconds, int datafd, int logfd, FILE *std); void test_sleep(knet_handle_t knet_h, int seconds); #endif diff --git a/libknet/threads_heartbeat.c b/libknet/threads_heartbeat.c index 65a8aece..ba039e5d 100644 --- a/libknet/threads_heartbeat.c +++ b/libknet/threads_heartbeat.c @@ -1,238 +1,417 @@ /* * Copyright (C) 2015-2020 Red Hat, Inc. All rights reserved. * * Authors: Fabio M. Di Nitto * Federico Simoncelli * * This software licensed under LGPL-2.0+ */ #include "config.h" #include #include #include #include #include #include "crypto.h" +#include "host.h" #include "links.h" #include "logging.h" #include "transports.h" #include "threads_common.h" #include "threads_heartbeat.h" +#include "onwire_v1.h" static void _link_down(knet_handle_t knet_h, struct knet_host *dst_host, struct knet_link *dst_link) { memset(&dst_link->pmtud_last, 0, sizeof(struct timespec)); dst_link->received_pong = 0; dst_link->status.pong_last.tv_nsec = 0; dst_link->pong_timeout_backoff = KNET_LINK_PONG_TIMEOUT_BACKOFF; if (dst_link->status.connected == 1) { log_info(knet_h, KNET_SUB_LINK, "host: %u link: %u is down", dst_host->host_id, dst_link->link_id); _link_updown(knet_h, dst_host->host_id, dst_link->link_id, dst_link->status.enabled, 0, 1); } } -static void _handle_check_each(knet_handle_t knet_h, struct knet_host *dst_host, struct knet_link *dst_link, int timed) +static void send_ping(knet_handle_t knet_h, struct knet_host *dst_host, struct knet_link *dst_link, int timed) { int err = 0, savederrno = 0, stats_err = 0; int len; - ssize_t outlen = KNET_HEADER_PING_SIZE; + ssize_t outlen; struct timespec clock_now, pong_last; unsigned long long diff_ping; unsigned char *outbuf = (unsigned char *)knet_h->pingbuf; + uint8_t onwire_ver; if (dst_link->transport_connected == 0) { _link_down(knet_h, dst_host, dst_link); return; } /* caching last pong to avoid race conditions */ pong_last = dst_link->status.pong_last; if (clock_gettime(CLOCK_MONOTONIC, &clock_now) != 0) { log_debug(knet_h, KNET_SUB_HEARTBEAT, "Unable to get monotonic clock"); return; } timespec_diff(dst_link->ping_last, clock_now, &diff_ping); if ((diff_ping >= (dst_link->ping_interval * 1000llu)) || (!timed)) { - memmove(&knet_h->pingbuf->khp_ping_time[0], &clock_now, sizeof(struct timespec)); - knet_h->pingbuf->khp_ping_link = dst_link->link_id; - if (pthread_mutex_lock(&knet_h->tx_seq_num_mutex)) { - log_debug(knet_h, KNET_SUB_HEARTBEAT, "Unable to get seq mutex lock"); + if (pthread_mutex_lock(&knet_h->onwire_mutex)) { + log_debug(knet_h, KNET_SUB_HEARTBEAT, "Unable to get onwire mutex lock"); return; } - knet_h->pingbuf->khp_ping_seq_num = htons(knet_h->tx_seq_num); - pthread_mutex_unlock(&knet_h->tx_seq_num_mutex); - knet_h->pingbuf->khp_ping_timed = timed; + onwire_ver = knet_h->onwire_ver; + pthread_mutex_unlock(&knet_h->onwire_mutex); + + if (knet_h->onwire_ver_remap) { + if (prep_ping_v1(knet_h, dst_link, onwire_ver, clock_now, timed, &outlen) < 0) { + return; + } + } else { + switch (onwire_ver) { + case 1: + if (prep_ping_v1(knet_h, dst_link, onwire_ver, clock_now, timed, &outlen) < 0) { + return; + } + break; + default: + log_warn(knet_h, KNET_SUB_HEARTBEAT, "preparing ping onwire version %u not supported", onwire_ver); + return; + break; + } + } if (knet_h->crypto_in_use_config) { if (crypto_encrypt_and_sign(knet_h, (const unsigned char *)knet_h->pingbuf, outlen, knet_h->pingbuf_crypt, &outlen) < 0) { log_debug(knet_h, KNET_SUB_HEARTBEAT, "Unable to crypto ping packet"); return; } outbuf = knet_h->pingbuf_crypt; if (pthread_mutex_lock(&knet_h->handle_stats_mutex) < 0) { log_err(knet_h, KNET_SUB_HEARTBEAT, "Unable to get mutex lock"); return; } knet_h->stats_extra.tx_crypt_ping_packets++; pthread_mutex_unlock(&knet_h->handle_stats_mutex); } stats_err = pthread_mutex_lock(&dst_link->link_stats_mutex); if (stats_err) { log_err(knet_h, KNET_SUB_HEARTBEAT, "Unable to get stats mutex lock for host %u link %u: %s", dst_host->host_id, dst_link->link_id, strerror(stats_err)); return; } retry: if (transport_get_connection_oriented(knet_h, dst_link->transport) == TRANSPORT_PROTO_NOT_CONNECTION_ORIENTED) { len = sendto(dst_link->outsock, outbuf, outlen, MSG_DONTWAIT | MSG_NOSIGNAL, (struct sockaddr *) &dst_link->dst_addr, sizeof(struct sockaddr_storage)); } else { len = sendto(dst_link->outsock, outbuf, outlen, MSG_DONTWAIT | MSG_NOSIGNAL, NULL, 0); } savederrno = errno; dst_link->ping_last = clock_now; dst_link->status.stats.tx_ping_packets++; dst_link->status.stats.tx_ping_bytes += outlen; if (len != outlen) { err = transport_tx_sock_error(knet_h, dst_link->transport, dst_link->outsock, len, savederrno); switch(err) { case -1: /* unrecoverable error */ log_debug(knet_h, KNET_SUB_HEARTBEAT, "Unable to send ping (sock: %d) packet (sendto): %d %s. recorded src ip: %s src port: %s dst ip: %s dst port: %s", dst_link->outsock, savederrno, strerror(savederrno), dst_link->status.src_ipaddr, dst_link->status.src_port, dst_link->status.dst_ipaddr, dst_link->status.dst_port); dst_link->status.stats.tx_ping_errors++; break; case 0: break; case 1: dst_link->status.stats.tx_ping_retries++; goto retry; break; } } else { dst_link->last_ping_size = outlen; } pthread_mutex_unlock(&dst_link->link_stats_mutex); } timespec_diff(pong_last, clock_now, &diff_ping); if ((pong_last.tv_nsec) && (diff_ping >= (dst_link->pong_timeout_adj * 1000llu))) { _link_down(knet_h, dst_host, dst_link); } } +static void send_pong(knet_handle_t knet_h, struct knet_host *src_host, struct knet_link *src_link, struct knet_header *inbuf) +{ + int err = 0, savederrno = 0, stats_err = 0; + unsigned char *outbuf = (unsigned char *)inbuf; + ssize_t len, outlen; + + if (knet_h->onwire_ver_remap) { + prep_pong_v1(knet_h, inbuf, &outlen); + } else { + switch (inbuf->kh_version) { + case 1: + prep_pong_v1(knet_h, inbuf, &outlen); + break; + default: + log_warn(knet_h, KNET_SUB_HEARTBEAT, "preparing pong onwire version %u not supported", inbuf->kh_version); + return; + break; + } + } + + if (knet_h->crypto_in_use_config) { + if (crypto_encrypt_and_sign(knet_h, + (const unsigned char *)inbuf, + outlen, + knet_h->recv_from_links_buf_crypt, + &outlen) < 0) { + log_debug(knet_h, KNET_SUB_HEARTBEAT, "Unable to encrypt pong packet"); + return; + } + outbuf = knet_h->recv_from_links_buf_crypt; + stats_err = pthread_mutex_lock(&knet_h->handle_stats_mutex); + if (stats_err < 0) { + log_err(knet_h, KNET_SUB_HEARTBEAT, "Unable to get mutex lock: %s", strerror(stats_err)); + return; + } + knet_h->stats_extra.tx_crypt_pong_packets++; + pthread_mutex_unlock(&knet_h->handle_stats_mutex); + } + +retry: + if (src_link->transport_connected) { + if (transport_get_connection_oriented(knet_h, src_link->transport) == TRANSPORT_PROTO_NOT_CONNECTION_ORIENTED) { + len = sendto(src_link->outsock, outbuf, outlen, MSG_DONTWAIT | MSG_NOSIGNAL, + (struct sockaddr *) &src_link->dst_addr, sizeof(struct sockaddr_storage)); + } else { + len = sendto(src_link->outsock, outbuf, outlen, MSG_DONTWAIT | MSG_NOSIGNAL, NULL, 0); + } + savederrno = errno; + if (len != outlen) { + err = transport_tx_sock_error(knet_h, src_link->transport, src_link->outsock, len, savederrno); + switch(err) { + case -1: /* unrecoverable error */ + log_debug(knet_h, KNET_SUB_HEARTBEAT, + "Unable to send pong reply (sock: %d) packet (sendto): %d %s. recorded src ip: %s src port: %s dst ip: %s dst port: %s", + src_link->outsock, errno, strerror(errno), + src_link->status.src_ipaddr, src_link->status.src_port, + src_link->status.dst_ipaddr, src_link->status.dst_port); + src_link->status.stats.tx_pong_errors++; + break; + case 0: /* ignore error and continue */ + break; + case 1: /* retry to send those same data */ + src_link->status.stats.tx_pong_retries++; + goto retry; + break; + } + } + src_link->status.stats.tx_pong_packets++; + src_link->status.stats.tx_pong_bytes += outlen; + } +} + +void process_ping(knet_handle_t knet_h, struct knet_host *src_host, struct knet_link *src_link, struct knet_header *inbuf, ssize_t len) +{ + src_link->status.stats.rx_ping_packets++; + src_link->status.stats.rx_ping_bytes += len; + + if (knet_h->onwire_ver_remap) { + process_ping_v1(knet_h, src_host, src_link, inbuf, len); + } else { + switch (inbuf->kh_version) { + case 1: + process_ping_v1(knet_h, src_host, src_link, inbuf, len); + break; + default: + log_warn(knet_h, KNET_SUB_HEARTBEAT, "parsing ping onwire version %u not supported", inbuf->kh_version); + return; + break; + } + } + + send_pong(knet_h, src_host, src_link, inbuf); +} + +void process_pong(knet_handle_t knet_h, struct knet_host *src_host, struct knet_link *src_link, struct knet_header *inbuf, ssize_t len) +{ + struct timespec recvtime; + unsigned long long latency_last; + + clock_gettime(CLOCK_MONOTONIC, &src_link->status.pong_last); + + src_link->status.stats.rx_pong_packets++; + src_link->status.stats.rx_pong_bytes += len; + + + if (knet_h->onwire_ver_remap) { + process_pong_v1(knet_h, src_host, src_link, inbuf, &recvtime); + } else { + switch (inbuf->kh_version) { + case 1: + process_pong_v1(knet_h, src_host, src_link, inbuf, &recvtime); + break; + default: + log_warn(knet_h, KNET_SUB_HEARTBEAT, "parsing pong onwire version %u not supported", inbuf->kh_version); + return; + break; + } + } + + timespec_diff(recvtime, + src_link->status.pong_last, &latency_last); + + if ((latency_last / 1000llu) > src_link->pong_timeout) { + log_debug(knet_h, KNET_SUB_HEARTBEAT, + "Incoming pong packet from host: %u link: %u has higher latency than pong_timeout. Discarding", + src_host->host_id, src_link->link_id); + } else { + + /* + * in words : ('previous mean' * '(count -1)') + 'new value') / 'count' + */ + + src_link->status.stats.latency_samples++; + + /* + * limit to max_samples (precision) + */ + if (src_link->status.stats.latency_samples >= src_link->latency_max_samples) { + src_link->status.stats.latency_samples = src_link->latency_max_samples; + } + src_link->status.stats.latency_ave = + (((src_link->status.stats.latency_ave * (src_link->status.stats.latency_samples - 1)) + (latency_last / 1000llu)) / src_link->status.stats.latency_samples); + + if (src_link->status.stats.latency_ave < src_link->pong_timeout_adj) { + if (!src_link->status.connected) { + if (src_link->received_pong >= src_link->pong_count) { + log_info(knet_h, KNET_SUB_HEARTBEAT, "host: %u link: %u is up", + src_host->host_id, src_link->link_id); + _link_updown(knet_h, src_host->host_id, src_link->link_id, src_link->status.enabled, 1, 0); + } else { + src_link->received_pong++; + log_debug(knet_h, KNET_SUB_HEARTBEAT, "host: %u link: %u received pong: %u", + src_host->host_id, src_link->link_id, src_link->received_pong); + } + } + } + /* Calculate latency stats */ + if (src_link->status.stats.latency_ave > src_link->status.stats.latency_max) { + src_link->status.stats.latency_max = src_link->status.stats.latency_ave; + } + if (src_link->status.stats.latency_ave < src_link->status.stats.latency_min) { + src_link->status.stats.latency_min = src_link->status.stats.latency_ave; + } + } +} + void _send_pings(knet_handle_t knet_h, int timed) { struct knet_host *dst_host; int link_idx; if (pthread_mutex_lock(&knet_h->hb_mutex)) { log_debug(knet_h, KNET_SUB_HEARTBEAT, "Unable to get hb mutex lock"); return; } for (dst_host = knet_h->host_head; dst_host != NULL; dst_host = dst_host->next) { for (link_idx = 0; link_idx < KNET_MAX_LINK; link_idx++) { if ((dst_host->link[link_idx].status.enabled != 1) || (dst_host->link[link_idx].transport == KNET_TRANSPORT_LOOPBACK ) || ((dst_host->link[link_idx].dynamic == KNET_LINK_DYNIP) && (dst_host->link[link_idx].status.dynconnected != 1))) continue; - _handle_check_each(knet_h, dst_host, &dst_host->link[link_idx], timed); + send_ping(knet_h, dst_host, &dst_host->link[link_idx], timed); } } pthread_mutex_unlock(&knet_h->hb_mutex); } static void _adjust_pong_timeouts(knet_handle_t knet_h) { struct knet_host *dst_host; struct knet_link *dst_link; int link_idx; if (pthread_mutex_lock(&knet_h->backoff_mutex)) { log_debug(knet_h, KNET_SUB_HEARTBEAT, "Unable to get backoff_mutex"); return; } for (dst_host = knet_h->host_head; dst_host != NULL; dst_host = dst_host->next) { for (link_idx = 0; link_idx < KNET_MAX_LINK; link_idx++) { if ((dst_host->link[link_idx].status.enabled != 1) || (dst_host->link[link_idx].transport == KNET_TRANSPORT_LOOPBACK ) || ((dst_host->link[link_idx].dynamic == KNET_LINK_DYNIP) && (dst_host->link[link_idx].status.dynconnected != 1))) continue; dst_link = &dst_host->link[link_idx]; if (dst_link->pong_timeout_backoff > 1) { dst_link->pong_timeout_backoff--; } dst_link->pong_timeout_adj = (dst_link->pong_timeout * dst_link->pong_timeout_backoff) + (dst_link->status.stats.latency_ave * KNET_LINK_PONG_TIMEOUT_LAT_MUL); } } pthread_mutex_unlock(&knet_h->backoff_mutex); } void *_handle_heartbt_thread(void *data) { knet_handle_t knet_h = (knet_handle_t) data; int i = 1; set_thread_status(knet_h, KNET_THREAD_HB, KNET_THREAD_STARTED); - /* preparing ping buffer */ - knet_h->pingbuf->kh_version = KNET_HEADER_VERSION; - knet_h->pingbuf->kh_type = KNET_HEADER_TYPE_PING; - knet_h->pingbuf->kh_node = htons(knet_h->host_id); - while (!shutdown_in_progress(knet_h)) { usleep(knet_h->threads_timer_res); if (pthread_rwlock_rdlock(&knet_h->global_rwlock) != 0) { log_debug(knet_h, KNET_SUB_HEARTBEAT, "Unable to get read lock"); continue; } /* * _adjust_pong_timeouts should execute approx once a second. */ if ((i % (1000000 / knet_h->threads_timer_res)) == 0) { _adjust_pong_timeouts(knet_h); i = 1; } else { i++; } _send_pings(knet_h, 1); pthread_rwlock_unlock(&knet_h->global_rwlock); } set_thread_status(knet_h, KNET_THREAD_HB, KNET_THREAD_STOPPED); return NULL; } diff --git a/libknet/threads_heartbeat.h b/libknet/threads_heartbeat.h index 11280555..d0646a6e 100644 --- a/libknet/threads_heartbeat.h +++ b/libknet/threads_heartbeat.h @@ -1,16 +1,19 @@ /* * Copyright (C) 2012-2020 Red Hat, Inc. All rights reserved. * * Authors: Fabio M. Di Nitto * Federico Simoncelli * * This software licensed under LGPL-2.0+ */ #ifndef __KNET_THREADS_HEARTBEAT_H__ #define __KNET_THREADS_HEARTBEAT_H__ void _send_pings(knet_handle_t knet_h, int timed); void *_handle_heartbt_thread(void *data); +void process_ping(knet_handle_t knet_h, struct knet_host *src_host, struct knet_link *src_link, struct knet_header *inbuf, ssize_t len); +void process_pong(knet_handle_t knet_h, struct knet_host *src_host, struct knet_link *src_link, struct knet_header *inbuf, ssize_t len); + #endif diff --git a/libknet/threads_pmtud.c b/libknet/threads_pmtud.c index 9e433290..6bc18af1 100644 --- a/libknet/threads_pmtud.c +++ b/libknet/threads_pmtud.c @@ -1,804 +1,941 @@ /* * Copyright (C) 2015-2020 Red Hat, Inc. All rights reserved. * * Authors: Fabio M. Di Nitto * Federico Simoncelli * * This software licensed under LGPL-2.0+ */ #include "config.h" #include #include #include #include #include "crypto.h" #include "links.h" #include "host.h" #include "logging.h" #include "transports.h" #include "threads_common.h" #include "threads_pmtud.h" +#include "onwire_v1.h" static int _calculate_manual_mtu(knet_handle_t knet_h, struct knet_link *dst_link) { size_t ipproto_overhead_len; /* onwire packet overhead (protocol based) */ switch (dst_link->dst_addr.ss_family) { case AF_INET6: ipproto_overhead_len = KNET_PMTUD_OVERHEAD_V6 + dst_link->proto_overhead; break; case AF_INET: ipproto_overhead_len = KNET_PMTUD_OVERHEAD_V4 + dst_link->proto_overhead; break; default: log_debug(knet_h, KNET_SUB_PMTUD, "unknown protocol"); return 0; break; } dst_link->status.mtu = calc_max_data_outlen(knet_h, knet_h->manual_mtu - ipproto_overhead_len); return 1; } static int _handle_check_link_pmtud(knet_handle_t knet_h, struct knet_host *dst_host, struct knet_link *dst_link) { int err, ret, savederrno, mutex_retry_limit, failsafe, use_kernel_mtu, warn_once; uint32_t kernel_mtu; /* record kernel_mtu from EMSGSIZE */ size_t onwire_len; /* current packet onwire size */ size_t ipproto_overhead_len; /* onwire packet overhead (protocol based) */ size_t max_mtu_len; /* max mtu for protocol */ size_t data_len; /* how much data we can send in the packet * generally would be onwire_len - ipproto_overhead_len * needs to be adjusted for crypto */ size_t app_mtu_len; /* real data that we can send onwire */ ssize_t len; /* len of what we were able to sendto onwire */ + uint8_t onwire_ver; struct timespec ts, pmtud_crypto_start_ts, pmtud_crypto_stop_ts; unsigned long long pong_timeout_adj_tmp, timediff; int pmtud_crypto_reduce = 1; unsigned char *outbuf = (unsigned char *)knet_h->pmtudbuf; warn_once = 0; mutex_retry_limit = 0; failsafe = 0; - knet_h->pmtudbuf->khp_pmtud_link = dst_link->link_id; - switch (dst_link->dst_addr.ss_family) { case AF_INET6: max_mtu_len = KNET_PMTUD_SIZE_V6; ipproto_overhead_len = KNET_PMTUD_OVERHEAD_V6 + dst_link->proto_overhead; break; case AF_INET: max_mtu_len = KNET_PMTUD_SIZE_V4; ipproto_overhead_len = KNET_PMTUD_OVERHEAD_V4 + dst_link->proto_overhead; break; default: log_debug(knet_h, KNET_SUB_PMTUD, "PMTUD aborted, unknown protocol"); return -1; break; } dst_link->last_bad_mtu = 0; dst_link->last_good_mtu = dst_link->last_ping_size + ipproto_overhead_len; /* * discovery starts from the top because kernel will * refuse to send packets > current iface mtu. * this saves us some time and network bw. */ onwire_len = max_mtu_len; -restart: + /* + * cache onwire version for this link / run + */ + if (pthread_mutex_lock(&knet_h->onwire_mutex)) { + log_debug(knet_h, KNET_SUB_PMTUD, "Unable to get onwire mutex lock"); + return -1; + } + onwire_ver = knet_h->onwire_ver; + pthread_mutex_unlock(&knet_h->onwire_mutex); +restart: /* * prevent a race when interface mtu is changed _exactly_ during * the discovery process and it's complex to detect. Easier * to wait the next loop. * 30 is not an arbitrary value. To bisect from 576 to 128000 doesn't * take more than 18/19 steps. */ if (failsafe == 30) { log_err(knet_h, KNET_SUB_PMTUD, "Aborting PMTUD process: Too many attempts. MTU might have changed during discovery."); return -1; } else { failsafe++; } /* * common to all packets */ /* * calculate the application MTU based on current onwire_len minus ipproto_overhead_len */ app_mtu_len = calc_max_data_outlen(knet_h, onwire_len - ipproto_overhead_len); /* * recalculate onwire len back that might be different based * on data padding from crypto layer. */ onwire_len = calc_data_outlen(knet_h, app_mtu_len + KNET_HEADER_ALL_SIZE) + ipproto_overhead_len; /* * calculate the size of what we need to send to sendto(2). * see also onwire.c for packet format explanation. */ data_len = app_mtu_len + knet_h->sec_hash_size + knet_h->sec_salt_size + KNET_HEADER_ALL_SIZE; + if (knet_h->onwire_ver_remap) { + prep_pmtud_v1(knet_h, dst_link, onwire_ver, onwire_len); + } else { + switch (onwire_ver) { + case 1: + prep_pmtud_v1(knet_h, dst_link, onwire_ver, onwire_len); + break; + default: + log_warn(knet_h, KNET_SUB_PMTUD, "preparing PMTUD onwire version %u not supported", onwire_ver); + return -1; + break; + } + } + if (knet_h->crypto_in_use_config) { if (data_len < (knet_h->sec_hash_size + knet_h->sec_salt_size) + 1) { log_debug(knet_h, KNET_SUB_PMTUD, "Aborting PMTUD process: link mtu smaller than crypto header detected (link might have been disconnected)"); return -1; } - knet_h->pmtudbuf->khp_pmtud_size = onwire_len; - if (crypto_encrypt_and_sign(knet_h, (const unsigned char *)knet_h->pmtudbuf, data_len - (knet_h->sec_hash_size + knet_h->sec_salt_size), knet_h->pmtudbuf_crypt, (ssize_t *)&data_len) < 0) { log_debug(knet_h, KNET_SUB_PMTUD, "Unable to crypto pmtud packet"); return -1; } outbuf = knet_h->pmtudbuf_crypt; if (pthread_mutex_lock(&knet_h->handle_stats_mutex) < 0) { log_err(knet_h, KNET_SUB_PMTUD, "Unable to get mutex lock"); return -1; } knet_h->stats_extra.tx_crypt_pmtu_packets++; pthread_mutex_unlock(&knet_h->handle_stats_mutex); - } else { - knet_h->pmtudbuf->khp_pmtud_size = onwire_len; } /* link has gone down, aborting pmtud */ if (dst_link->status.connected != 1) { log_debug(knet_h, KNET_SUB_PMTUD, "PMTUD detected host (%u) link (%u) has been disconnected", dst_host->host_id, dst_link->link_id); return -1; } if (dst_link->transport_connected != 1) { log_debug(knet_h, KNET_SUB_PMTUD, "PMTUD detected host (%u) link (%u) has been disconnected", dst_host->host_id, dst_link->link_id); return -1; } if (pthread_mutex_lock(&knet_h->pmtud_mutex) != 0) { log_debug(knet_h, KNET_SUB_PMTUD, "Unable to get mutex lock"); return -1; } if (knet_h->pmtud_abort) { pthread_mutex_unlock(&knet_h->pmtud_mutex); errno = EDEADLK; return -1; } savederrno = pthread_mutex_lock(&knet_h->tx_mutex); if (savederrno) { pthread_mutex_unlock(&knet_h->pmtud_mutex); log_err(knet_h, KNET_SUB_PMTUD, "Unable to get TX mutex lock: %s", strerror(savederrno)); return -1; } savederrno = pthread_mutex_lock(&dst_link->link_stats_mutex); if (savederrno) { pthread_mutex_unlock(&knet_h->pmtud_mutex); pthread_mutex_unlock(&knet_h->tx_mutex); log_err(knet_h, KNET_SUB_PMTUD, "Unable to get stats mutex lock for host %u link %u: %s", dst_host->host_id, dst_link->link_id, strerror(savederrno)); return -1; } retry: if (transport_get_connection_oriented(knet_h, dst_link->transport) == TRANSPORT_PROTO_NOT_CONNECTION_ORIENTED) { len = sendto(dst_link->outsock, outbuf, data_len, MSG_DONTWAIT | MSG_NOSIGNAL, (struct sockaddr *) &dst_link->dst_addr, sizeof(struct sockaddr_storage)); } else { len = sendto(dst_link->outsock, outbuf, data_len, MSG_DONTWAIT | MSG_NOSIGNAL, NULL, 0); } savederrno = errno; /* * we cannot hold a lock on kmtu_mutex between resetting * knet_h->kernel_mtu here and below where it's used. * use_kernel_mtu tells us if the knet_h->kernel_mtu was * set to 0 and we can trust its value later. */ use_kernel_mtu = 0; if (pthread_mutex_lock(&knet_h->kmtu_mutex) == 0) { use_kernel_mtu = 1; knet_h->kernel_mtu = 0; pthread_mutex_unlock(&knet_h->kmtu_mutex); } kernel_mtu = 0; err = transport_tx_sock_error(knet_h, dst_link->transport, dst_link->outsock, len, savederrno); switch(err) { case -1: /* unrecoverable error */ log_debug(knet_h, KNET_SUB_PMTUD, "Unable to send pmtu packet (sendto): %d %s", savederrno, strerror(savederrno)); pthread_mutex_unlock(&knet_h->tx_mutex); pthread_mutex_unlock(&knet_h->pmtud_mutex); dst_link->status.stats.tx_pmtu_errors++; pthread_mutex_unlock(&dst_link->link_stats_mutex); return -1; + break; case 0: /* ignore error and continue */ break; case 1: /* retry to send those same data */ dst_link->status.stats.tx_pmtu_retries++; goto retry; break; } pthread_mutex_unlock(&knet_h->tx_mutex); if (len != (ssize_t )data_len) { pthread_mutex_unlock(&dst_link->link_stats_mutex); if (savederrno == EMSGSIZE) { /* * we cannot hold a lock on kmtu_mutex between resetting * knet_h->kernel_mtu and here. * use_kernel_mtu tells us if the knet_h->kernel_mtu was * set to 0 previously and we can trust its value now. */ if (use_kernel_mtu) { use_kernel_mtu = 0; if (pthread_mutex_lock(&knet_h->kmtu_mutex) == 0) { kernel_mtu = knet_h->kernel_mtu; pthread_mutex_unlock(&knet_h->kmtu_mutex); } } if (kernel_mtu > 0) { dst_link->last_bad_mtu = kernel_mtu + 1; } else { dst_link->last_bad_mtu = onwire_len; } } else { log_debug(knet_h, KNET_SUB_PMTUD, "Unable to send pmtu packet len: %zu err: %s", onwire_len, strerror(savederrno)); } } else { dst_link->last_sent_mtu = onwire_len; dst_link->last_recv_mtu = 0; dst_link->status.stats.tx_pmtu_packets++; dst_link->status.stats.tx_pmtu_bytes += data_len; pthread_mutex_unlock(&dst_link->link_stats_mutex); if (clock_gettime(CLOCK_REALTIME, &ts) < 0) { log_debug(knet_h, KNET_SUB_PMTUD, "Unable to get current time: %s", strerror(errno)); pthread_mutex_unlock(&knet_h->pmtud_mutex); return -1; } /* * non fatal, we can wait the next round to reduce the * multiplier */ if (clock_gettime(CLOCK_MONOTONIC, &pmtud_crypto_start_ts) < 0) { log_debug(knet_h, KNET_SUB_PMTUD, "Unable to get current time: %s", strerror(errno)); pmtud_crypto_reduce = 0; } /* * set PMTUd reply timeout to match pong_timeout on a given link * * math: internally pong_timeout is expressed in microseconds, while * the public API exports milliseconds. So careful with the 0's here. * the loop is necessary because we are grabbing the current time just above * and add values to it that could overflow into seconds. */ if (pthread_mutex_lock(&knet_h->backoff_mutex)) { log_debug(knet_h, KNET_SUB_PMTUD, "Unable to get backoff_mutex"); pthread_mutex_unlock(&knet_h->pmtud_mutex); return -1; } if (knet_h->crypto_in_use_config) { /* * crypto, under pressure, is a royal PITA */ pong_timeout_adj_tmp = dst_link->pong_timeout_adj * dst_link->pmtud_crypto_timeout_multiplier; } else { pong_timeout_adj_tmp = dst_link->pong_timeout_adj; } ts.tv_sec += pong_timeout_adj_tmp / 1000000; ts.tv_nsec += (((pong_timeout_adj_tmp) % 1000000) * 1000); while (ts.tv_nsec > 1000000000) { ts.tv_sec += 1; ts.tv_nsec -= 1000000000; } pthread_mutex_unlock(&knet_h->backoff_mutex); knet_h->pmtud_waiting = 1; ret = pthread_cond_timedwait(&knet_h->pmtud_cond, &knet_h->pmtud_mutex, &ts); knet_h->pmtud_waiting = 0; if (knet_h->pmtud_abort) { pthread_mutex_unlock(&knet_h->pmtud_mutex); errno = EDEADLK; return -1; } /* * we cannot use shutdown_in_progress in here because * we already hold the read lock */ if (knet_h->fini_in_progress) { pthread_mutex_unlock(&knet_h->pmtud_mutex); log_debug(knet_h, KNET_SUB_PMTUD, "PMTUD aborted. shutdown in progress"); return -1; } if (ret) { if (ret == ETIMEDOUT) { if ((knet_h->crypto_in_use_config) && (dst_link->pmtud_crypto_timeout_multiplier < KNET_LINK_PMTUD_CRYPTO_TIMEOUT_MULTIPLIER_MAX)) { dst_link->pmtud_crypto_timeout_multiplier = dst_link->pmtud_crypto_timeout_multiplier * 2; pmtud_crypto_reduce = 0; log_debug(knet_h, KNET_SUB_PMTUD, "Increasing PMTUd response timeout multiplier to (%u) for host %u link: %u", dst_link->pmtud_crypto_timeout_multiplier, dst_host->host_id, dst_link->link_id); pthread_mutex_unlock(&knet_h->pmtud_mutex); goto restart; } if (!warn_once) { log_warn(knet_h, KNET_SUB_PMTUD, "possible MTU misconfiguration detected. " "kernel is reporting MTU: %u bytes for " "host %u link %u but the other node is " "not acknowledging packets of this size. ", dst_link->last_sent_mtu, dst_host->host_id, dst_link->link_id); log_warn(knet_h, KNET_SUB_PMTUD, "This can be caused by this node interface MTU " "too big or a network device that does not " "support or has been misconfigured to manage MTU " "of this size, or packet loss. knet will continue " "to run but performances might be affected."); warn_once = 1; } } else { pthread_mutex_unlock(&knet_h->pmtud_mutex); if (mutex_retry_limit == 3) { log_debug(knet_h, KNET_SUB_PMTUD, "PMTUD aborted, unable to get mutex lock"); return -1; } mutex_retry_limit++; goto restart; } } if ((knet_h->crypto_in_use_config) && (pmtud_crypto_reduce == 1) && (dst_link->pmtud_crypto_timeout_multiplier > KNET_LINK_PMTUD_CRYPTO_TIMEOUT_MULTIPLIER_MIN)) { if (!clock_gettime(CLOCK_MONOTONIC, &pmtud_crypto_stop_ts)) { timespec_diff(pmtud_crypto_start_ts, pmtud_crypto_stop_ts, &timediff); if (((pong_timeout_adj_tmp * 1000) / 2) > timediff) { dst_link->pmtud_crypto_timeout_multiplier = dst_link->pmtud_crypto_timeout_multiplier / 2; log_debug(knet_h, KNET_SUB_PMTUD, "Decreasing PMTUd response timeout multiplier to (%u) for host %u link: %u", dst_link->pmtud_crypto_timeout_multiplier, dst_host->host_id, dst_link->link_id); } } else { log_debug(knet_h, KNET_SUB_PMTUD, "Unable to get current time: %s", strerror(errno)); } } if ((dst_link->last_recv_mtu != onwire_len) || (ret)) { dst_link->last_bad_mtu = onwire_len; } else { int found_mtu = 0; if (knet_h->sec_block_size) { if ((onwire_len + knet_h->sec_block_size >= max_mtu_len) || ((dst_link->last_bad_mtu) && (dst_link->last_bad_mtu <= (onwire_len + knet_h->sec_block_size)))) { found_mtu = 1; } } else { if ((onwire_len == max_mtu_len) || ((dst_link->last_bad_mtu) && (dst_link->last_bad_mtu == (onwire_len + 1))) || (dst_link->last_bad_mtu == dst_link->last_good_mtu)) { found_mtu = 1; } } if (found_mtu) { /* * account for IP overhead, knet headers and crypto in PMTU calculation */ dst_link->status.mtu = calc_max_data_outlen(knet_h, onwire_len - ipproto_overhead_len); pthread_mutex_unlock(&knet_h->pmtud_mutex); return 0; } dst_link->last_good_mtu = onwire_len; } } if (kernel_mtu) { onwire_len = kernel_mtu; } else { onwire_len = (dst_link->last_good_mtu + dst_link->last_bad_mtu) / 2; } pthread_mutex_unlock(&knet_h->pmtud_mutex); goto restart; } static int _handle_check_pmtud(knet_handle_t knet_h, struct knet_host *dst_host, struct knet_link *dst_link, int force_run) { uint8_t saved_valid_pmtud; unsigned int saved_pmtud; struct timespec clock_now; unsigned long long diff_pmtud, interval; if (clock_gettime(CLOCK_MONOTONIC, &clock_now) != 0) { log_debug(knet_h, KNET_SUB_PMTUD, "Unable to get monotonic clock"); return 0; } if (!force_run) { interval = knet_h->pmtud_interval * 1000000000llu; /* nanoseconds */ timespec_diff(dst_link->pmtud_last, clock_now, &diff_pmtud); if (diff_pmtud < interval) { return dst_link->has_valid_mtu; } } /* * status.proto_overhead should include all IP/(UDP|SCTP)/knet headers * * please note that it is not the same as link->proto_overhead that * includes only either UDP or SCTP (at the moment) overhead. */ switch (dst_link->dst_addr.ss_family) { case AF_INET6: dst_link->status.proto_overhead = KNET_PMTUD_OVERHEAD_V6 + dst_link->proto_overhead + KNET_HEADER_ALL_SIZE + knet_h->sec_hash_size + knet_h->sec_salt_size; break; case AF_INET: dst_link->status.proto_overhead = KNET_PMTUD_OVERHEAD_V4 + dst_link->proto_overhead + KNET_HEADER_ALL_SIZE + knet_h->sec_hash_size + knet_h->sec_salt_size; break; } saved_pmtud = dst_link->status.mtu; saved_valid_pmtud = dst_link->has_valid_mtu; log_debug(knet_h, KNET_SUB_PMTUD, "Starting PMTUD for host: %u link: %u", dst_host->host_id, dst_link->link_id); errno = 0; if (_handle_check_link_pmtud(knet_h, dst_host, dst_link) < 0) { if (errno == EDEADLK) { log_debug(knet_h, KNET_SUB_PMTUD, "PMTUD for host: %u link: %u has been rescheduled", dst_host->host_id, dst_link->link_id); dst_link->status.mtu = saved_pmtud; dst_link->has_valid_mtu = saved_valid_pmtud; errno = EDEADLK; return dst_link->has_valid_mtu; } dst_link->has_valid_mtu = 0; } else { if (dst_link->status.mtu < calc_min_mtu(knet_h)) { log_info(knet_h, KNET_SUB_PMTUD, "Invalid MTU detected for host: %u link: %u mtu: %u", dst_host->host_id, dst_link->link_id, dst_link->status.mtu); dst_link->has_valid_mtu = 0; } else { dst_link->has_valid_mtu = 1; } if (dst_link->has_valid_mtu) { if ((saved_pmtud) && (saved_pmtud != dst_link->status.mtu)) { log_info(knet_h, KNET_SUB_PMTUD, "PMTUD link change for host: %u link: %u from %u to %u", dst_host->host_id, dst_link->link_id, saved_pmtud, dst_link->status.mtu); } log_debug(knet_h, KNET_SUB_PMTUD, "PMTUD completed for host: %u link: %u current link mtu: %u", dst_host->host_id, dst_link->link_id, dst_link->status.mtu); /* * set pmtud_last, if we can, after we are done with the PMTUd process * because it can take a very long time. */ dst_link->pmtud_last = clock_now; if (!clock_gettime(CLOCK_MONOTONIC, &clock_now)) { dst_link->pmtud_last = clock_now; } } } if (saved_valid_pmtud != dst_link->has_valid_mtu) { _host_dstcache_update_async(knet_h, dst_host); } return dst_link->has_valid_mtu; } void *_handle_pmtud_link_thread(void *data) { knet_handle_t knet_h = (knet_handle_t) data; struct knet_host *dst_host; struct knet_link *dst_link; int link_idx; unsigned int have_mtu; unsigned int lower_mtu; int link_has_mtu; int force_run = 0; set_thread_status(knet_h, KNET_THREAD_PMTUD, KNET_THREAD_STARTED); knet_h->data_mtu = calc_min_mtu(knet_h); - /* preparing pmtu buffer */ - knet_h->pmtudbuf->kh_version = KNET_HEADER_VERSION; - knet_h->pmtudbuf->kh_type = KNET_HEADER_TYPE_PMTUD; - knet_h->pmtudbuf->kh_node = htons(knet_h->host_id); - while (!shutdown_in_progress(knet_h)) { usleep(knet_h->threads_timer_res); if (pthread_mutex_lock(&knet_h->pmtud_mutex) != 0) { log_debug(knet_h, KNET_SUB_PMTUD, "Unable to get mutex lock"); continue; } knet_h->pmtud_abort = 0; knet_h->pmtud_running = 1; force_run = knet_h->pmtud_forcerun; knet_h->pmtud_forcerun = 0; pthread_mutex_unlock(&knet_h->pmtud_mutex); if (force_run) { log_debug(knet_h, KNET_SUB_PMTUD, "PMTUd request to rerun has been received"); } if (pthread_rwlock_rdlock(&knet_h->global_rwlock) != 0) { log_debug(knet_h, KNET_SUB_PMTUD, "Unable to get read lock"); continue; } lower_mtu = KNET_PMTUD_SIZE_V4; have_mtu = 0; for (dst_host = knet_h->host_head; dst_host != NULL; dst_host = dst_host->next) { for (link_idx = 0; link_idx < KNET_MAX_LINK; link_idx++) { dst_link = &dst_host->link[link_idx]; if ((dst_link->status.enabled != 1) || (dst_link->status.connected != 1) || (dst_host->link[link_idx].transport == KNET_TRANSPORT_LOOPBACK) || (!dst_link->last_ping_size) || ((dst_link->dynamic == KNET_LINK_DYNIP) && (dst_link->status.dynconnected != 1))) continue; if (!knet_h->manual_mtu) { link_has_mtu = _handle_check_pmtud(knet_h, dst_host, dst_link, force_run); if (errno == EDEADLK) { goto out_unlock; } if (link_has_mtu) { have_mtu = 1; if (dst_link->status.mtu < lower_mtu) { lower_mtu = dst_link->status.mtu; } } } else { link_has_mtu = _calculate_manual_mtu(knet_h, dst_link); if (link_has_mtu) { have_mtu = 1; if (dst_link->status.mtu < lower_mtu) { lower_mtu = dst_link->status.mtu; } } } } } if (have_mtu) { if (knet_h->data_mtu != lower_mtu) { knet_h->data_mtu = lower_mtu; log_info(knet_h, KNET_SUB_PMTUD, "Global data MTU changed to: %u", knet_h->data_mtu); if (knet_h->pmtud_notify_fn) { knet_h->pmtud_notify_fn(knet_h->pmtud_notify_fn_private_data, knet_h->data_mtu); } } } out_unlock: pthread_rwlock_unlock(&knet_h->global_rwlock); if (pthread_mutex_lock(&knet_h->pmtud_mutex) != 0) { log_debug(knet_h, KNET_SUB_PMTUD, "Unable to get mutex lock"); } else { knet_h->pmtud_running = 0; pthread_mutex_unlock(&knet_h->pmtud_mutex); } } set_thread_status(knet_h, KNET_THREAD_PMTUD, KNET_THREAD_STOPPED); return NULL; } +static void send_pmtud_reply(knet_handle_t knet_h, struct knet_link *src_link, struct knet_header *inbuf) +{ + int err = 0, savederrno = 0, stats_err = 0; + unsigned char *outbuf = (unsigned char *)inbuf; + ssize_t len, outlen; + + if (knet_h->onwire_ver_remap) { + prep_pmtud_reply_v1(knet_h, inbuf, &outlen); + } else { + switch (inbuf->kh_version) { + case 1: + prep_pmtud_reply_v1(knet_h, inbuf, &outlen); + break; + default: + log_warn(knet_h, KNET_SUB_PMTUD, "preparing PMTUD reply onwire version %u not supported", inbuf->kh_version); + return; + break; + } + } + + if (knet_h->crypto_in_use_config) { + if (crypto_encrypt_and_sign(knet_h, + (const unsigned char *)inbuf, + outlen, + knet_h->recv_from_links_buf_crypt, + &outlen) < 0) { + log_debug(knet_h, KNET_SUB_PMTUD, "Unable to encrypt PMTUd reply packet"); + return; + } + outbuf = knet_h->recv_from_links_buf_crypt; + stats_err = pthread_mutex_lock(&knet_h->handle_stats_mutex); + if (stats_err < 0) { + log_err(knet_h, KNET_SUB_PMTUD, "Unable to get mutex lock: %s", strerror(stats_err)); + return; + } + knet_h->stats_extra.tx_crypt_pmtu_reply_packets++; + pthread_mutex_unlock(&knet_h->handle_stats_mutex); + } + + savederrno = pthread_mutex_lock(&knet_h->tx_mutex); + if (savederrno) { + log_err(knet_h, KNET_SUB_PMTUD, "Unable to get TX mutex lock: %s", strerror(savederrno)); + return; + } + +retry: + if (src_link->transport_connected) { + if (transport_get_connection_oriented(knet_h, src_link->transport) == TRANSPORT_PROTO_NOT_CONNECTION_ORIENTED) { + len = sendto(src_link->outsock, outbuf, outlen, MSG_DONTWAIT | MSG_NOSIGNAL, + (struct sockaddr *) &src_link->dst_addr, sizeof(struct sockaddr_storage)); + } else { + len = sendto(src_link->outsock, outbuf, outlen, MSG_DONTWAIT | MSG_NOSIGNAL, NULL, 0); + } + savederrno = errno; + if (len != outlen) { + err = transport_tx_sock_error(knet_h, src_link->transport, src_link->outsock, len, savederrno); + stats_err = pthread_mutex_lock(&src_link->link_stats_mutex); + if (stats_err < 0) { + log_err(knet_h, KNET_SUB_PMTUD, "Unable to get mutex lock: %s", strerror(stats_err)); + return; + } + switch(err) { + case -1: /* unrecoverable error */ + log_debug(knet_h, KNET_SUB_PMTUD, + "Unable to send PMTUd reply (sock: %d) packet (sendto): %d %s. recorded src ip: %s src port: %s dst ip: %s dst port: %s", + src_link->outsock, errno, strerror(errno), + src_link->status.src_ipaddr, src_link->status.src_port, + src_link->status.dst_ipaddr, src_link->status.dst_port); + + src_link->status.stats.tx_pmtu_errors++; + break; + case 0: /* ignore error and continue */ + src_link->status.stats.tx_pmtu_errors++; + break; + case 1: /* retry to send those same data */ + src_link->status.stats.tx_pmtu_retries++; + pthread_mutex_unlock(&src_link->link_stats_mutex); + goto retry; + break; + } + pthread_mutex_unlock(&src_link->link_stats_mutex); + } + } + pthread_mutex_unlock(&knet_h->tx_mutex); +} + +void process_pmtud(knet_handle_t knet_h, struct knet_link *src_link, struct knet_header *inbuf) +{ + /* + * at the moment we don't need to take any extra + * actions when processing a PMTUd packet, except + * sending a reply + */ + send_pmtud_reply(knet_h, src_link, inbuf); +} + +void process_pmtud_reply(knet_handle_t knet_h, struct knet_link *src_link, struct knet_header *inbuf) +{ + if (pthread_mutex_lock(&knet_h->pmtud_mutex) != 0) { + log_debug(knet_h, KNET_SUB_PMTUD, "Unable to get mutex lock"); + return; + } + + if (knet_h->onwire_ver_remap) { + process_pmtud_reply_v1(knet_h, src_link, inbuf); + } else { + switch (inbuf->kh_version) { + case 1: + process_pmtud_reply_v1(knet_h, src_link, inbuf); + break; + default: + log_warn(knet_h, KNET_SUB_PMTUD, "preparing PMTUD reply onwire version %u not supported", inbuf->kh_version); + goto out_unlock; + break; + } + } + + pthread_cond_signal(&knet_h->pmtud_cond); +out_unlock: + pthread_mutex_unlock(&knet_h->pmtud_mutex); +} + int knet_handle_pmtud_getfreq(knet_handle_t knet_h, unsigned int *interval) { int savederrno = 0; if (!knet_h) { errno = EINVAL; return -1; } if (!interval) { errno = EINVAL; return -1; } savederrno = pthread_rwlock_rdlock(&knet_h->global_rwlock); if (savederrno) { - log_err(knet_h, KNET_SUB_HANDLE, "Unable to get read lock: %s", + log_err(knet_h, KNET_SUB_PMTUD, "Unable to get read lock: %s", strerror(savederrno)); errno = savederrno; return -1; } *interval = knet_h->pmtud_interval; pthread_rwlock_unlock(&knet_h->global_rwlock); errno = 0; return 0; } int knet_handle_pmtud_setfreq(knet_handle_t knet_h, unsigned int interval) { int savederrno = 0; if (!knet_h) { errno = EINVAL; return -1; } if ((!interval) || (interval > 86400)) { errno = EINVAL; return -1; } savederrno = get_global_wrlock(knet_h); if (savederrno) { - log_err(knet_h, KNET_SUB_HANDLE, "Unable to get write lock: %s", + log_err(knet_h, KNET_SUB_PMTUD, "Unable to get write lock: %s", strerror(savederrno)); errno = savederrno; return -1; } knet_h->pmtud_interval = interval; - log_debug(knet_h, KNET_SUB_HANDLE, "PMTUd interval set to: %u seconds", interval); + log_debug(knet_h, KNET_SUB_PMTUD, "PMTUd interval set to: %u seconds", interval); pthread_rwlock_unlock(&knet_h->global_rwlock); errno = 0; return 0; } int knet_handle_enable_pmtud_notify(knet_handle_t knet_h, void *pmtud_notify_fn_private_data, void (*pmtud_notify_fn) ( void *private_data, unsigned int data_mtu)) { int savederrno = 0; if (!knet_h) { errno = EINVAL; return -1; } savederrno = get_global_wrlock(knet_h); if (savederrno) { - log_err(knet_h, KNET_SUB_HANDLE, "Unable to get write lock: %s", + log_err(knet_h, KNET_SUB_PMTUD, "Unable to get write lock: %s", strerror(savederrno)); errno = savederrno; return -1; } knet_h->pmtud_notify_fn_private_data = pmtud_notify_fn_private_data; knet_h->pmtud_notify_fn = pmtud_notify_fn; if (knet_h->pmtud_notify_fn) { - log_debug(knet_h, KNET_SUB_HANDLE, "pmtud_notify_fn enabled"); + log_debug(knet_h, KNET_SUB_PMTUD, "pmtud_notify_fn enabled"); } else { - log_debug(knet_h, KNET_SUB_HANDLE, "pmtud_notify_fn disabled"); + log_debug(knet_h, KNET_SUB_PMTUD, "pmtud_notify_fn disabled"); } pthread_rwlock_unlock(&knet_h->global_rwlock); errno = 0; return 0; } int knet_handle_pmtud_set(knet_handle_t knet_h, unsigned int iface_mtu) { int savederrno = 0; if (!knet_h) { errno = EINVAL; return -1; } if (iface_mtu > KNET_PMTUD_SIZE_V4) { errno = EINVAL; return -1; } savederrno = pthread_rwlock_rdlock(&knet_h->global_rwlock); if (savederrno) { log_err(knet_h, KNET_SUB_PMTUD, "Unable to get read lock: %s", strerror(savederrno)); errno = savederrno; return -1; } log_info(knet_h, KNET_SUB_PMTUD, "MTU manually set to: %u", iface_mtu); knet_h->manual_mtu = iface_mtu; force_pmtud_run(knet_h, KNET_SUB_PMTUD, 0); pthread_rwlock_unlock(&knet_h->global_rwlock); errno = 0; return 0; } int knet_handle_pmtud_get(knet_handle_t knet_h, unsigned int *data_mtu) { int savederrno = 0; if (!knet_h) { errno = EINVAL; return -1; } if (!data_mtu) { errno = EINVAL; return -1; } savederrno = pthread_rwlock_rdlock(&knet_h->global_rwlock); if (savederrno) { - log_err(knet_h, KNET_SUB_HANDLE, "Unable to get read lock: %s", + log_err(knet_h, KNET_SUB_PMTUD, "Unable to get read lock: %s", strerror(savederrno)); errno = savederrno; return -1; } *data_mtu = knet_h->data_mtu; pthread_rwlock_unlock(&knet_h->global_rwlock); errno = 0; return 0; } diff --git a/libknet/threads_pmtud.h b/libknet/threads_pmtud.h index c2c2c7b7..de12ea33 100644 --- a/libknet/threads_pmtud.h +++ b/libknet/threads_pmtud.h @@ -1,15 +1,18 @@ /* * Copyright (C) 2012-2020 Red Hat, Inc. All rights reserved. * * Authors: Fabio M. Di Nitto * Federico Simoncelli * * This software licensed under LGPL-2.0+ */ #ifndef __KNET_THREADS_PMTUD_H__ #define __KNET_THREADS_PMTUD_H__ void *_handle_pmtud_link_thread(void *data); +void process_pmtud(knet_handle_t knet_h, struct knet_link *src_link, struct knet_header *inbuf); +void process_pmtud_reply(knet_handle_t knet_h, struct knet_link *src_link, struct knet_header *inbuf); + #endif diff --git a/libknet/threads_rx.c b/libknet/threads_rx.c index 31df0915..145b8b6c 100644 --- a/libknet/threads_rx.c +++ b/libknet/threads_rx.c @@ -1,1040 +1,957 @@ /* * Copyright (C) 2012-2020 Red Hat, Inc. All rights reserved. * * Authors: Fabio M. Di Nitto * Federico Simoncelli * * This software licensed under LGPL-2.0+ */ #include "config.h" #include #include #include #include #include #include "compat.h" #include "compress.h" #include "crypto.h" #include "host.h" #include "links.h" #include "links_acl.h" #include "logging.h" #include "transports.h" #include "transport_common.h" #include "threads_common.h" #include "threads_heartbeat.h" +#include "threads_pmtud.h" #include "threads_rx.h" #include "netutils.h" +#include "onwire_v1.h" /* * RECV */ /* * return 1 if a > b * return -1 if b > a * return 0 if they are equal */ -static inline int timecmp(struct timespec a, struct timespec b) +static inline int _timecmp(struct timespec a, struct timespec b) { if (a.tv_sec != b.tv_sec) { if (a.tv_sec > b.tv_sec) { return 1; } else { return -1; } } else { if (a.tv_nsec > b.tv_nsec) { return 1; } else if (a.tv_nsec < b.tv_nsec) { return -1; } else { return 0; } } } /* * this functions needs to return an index (0 to 7) * to a knet_host_defrag_buf. (-1 on errors) */ -static int find_pckt_defrag_buf(knet_handle_t knet_h, struct knet_header *inbuf) +static int _find_pckt_defrag_buf(knet_handle_t knet_h, struct knet_host *src_host, seq_num_t seq_num) { - struct knet_host *src_host = knet_h->host_index[inbuf->kh_node]; int i, oldest; /* * check if there is a buffer already in use handling the same seq_num */ for (i = 0; i < KNET_MAX_LINK; i++) { if (src_host->defrag_buf[i].in_use) { - if (src_host->defrag_buf[i].pckt_seq == inbuf->khp_data_seq_num) { + if (src_host->defrag_buf[i].pckt_seq == seq_num) { return i; } } } /* * If there is no buffer that's handling the current seq_num * either it's new or it's been reclaimed already. * check if it's been reclaimed/seen before using the defrag circular * buffer. If the pckt has been seen before, the buffer expired (ETIME) * and there is no point to try to defrag it again. */ - if (!_seq_num_lookup(src_host, inbuf->khp_data_seq_num, 1, 0)) { + if (!_seq_num_lookup(src_host, seq_num, 1, 0)) { errno = ETIME; return -1; } /* * register the pckt as seen */ - _seq_num_set(src_host, inbuf->khp_data_seq_num, 1); + _seq_num_set(src_host, seq_num, 1); /* * see if there is a free buffer */ for (i = 0; i < KNET_MAX_LINK; i++) { if (!src_host->defrag_buf[i].in_use) { return i; } } /* * at this point, there are no free buffers, the pckt is new * and we need to reclaim a buffer, and we will take the one * with the oldest timestamp. It's as good as any. */ oldest = 0; for (i = 0; i < KNET_MAX_LINK; i++) { - if (timecmp(src_host->defrag_buf[i].last_update, src_host->defrag_buf[oldest].last_update) < 0) { + if (_timecmp(src_host->defrag_buf[i].last_update, src_host->defrag_buf[oldest].last_update) < 0) { oldest = i; } } src_host->defrag_buf[oldest].in_use = 0; return oldest; } -static int pckt_defrag(knet_handle_t knet_h, struct knet_header *inbuf, ssize_t *len) +static int _pckt_defrag(knet_handle_t knet_h, struct knet_host *src_host, seq_num_t seq_num, unsigned char *data, ssize_t *len, uint8_t frags, uint8_t frag_seq) { struct knet_host_defrag_buf *defrag_buf; int defrag_buf_idx; - defrag_buf_idx = find_pckt_defrag_buf(knet_h, inbuf); + defrag_buf_idx = _find_pckt_defrag_buf(knet_h, src_host, seq_num); if (defrag_buf_idx < 0) { return 1; } - defrag_buf = &knet_h->host_index[inbuf->kh_node]->defrag_buf[defrag_buf_idx]; + defrag_buf = &src_host->defrag_buf[defrag_buf_idx]; /* * if the buf is not is use, then make sure it's clean */ if (!defrag_buf->in_use) { memset(defrag_buf, 0, sizeof(struct knet_host_defrag_buf)); defrag_buf->in_use = 1; - defrag_buf->pckt_seq = inbuf->khp_data_seq_num; + defrag_buf->pckt_seq = seq_num; } /* * update timestamp on the buffer */ clock_gettime(CLOCK_MONOTONIC, &defrag_buf->last_update); /* * check if we already received this fragment */ - if (defrag_buf->frag_map[inbuf->khp_data_frag_seq]) { + if (defrag_buf->frag_map[frag_seq]) { /* * if we have received this fragment and we didn't clear the buffer * it means that we don't have all fragments yet */ return 1; } /* * we need to handle the last packet with gloves due to its different size */ - if (inbuf->khp_data_frag_seq == inbuf->khp_data_frag_num) { + if (frag_seq == frags) { defrag_buf->last_frag_size = *len; /* * in the event when the last packet arrives first, * we still don't know the offset vs the other fragments (based on MTU), * so we store the fragment at the end of the buffer where it's safe * and take a copy of the len so that we can restore its offset later. * remember we can't use the local MTU for this calculation because pMTU * can be asymettric between the same hosts. */ if (!defrag_buf->frag_size) { defrag_buf->last_first = 1; memmove(defrag_buf->buf + (KNET_MAX_PACKET_SIZE - *len), - inbuf->khp_data_userdata, + data, *len); } } else { defrag_buf->frag_size = *len; } if (defrag_buf->frag_size) { - memmove(defrag_buf->buf + ((inbuf->khp_data_frag_seq - 1) * defrag_buf->frag_size), - inbuf->khp_data_userdata, *len); + memmove(defrag_buf->buf + ((frag_seq - 1) * defrag_buf->frag_size), + data, *len); } defrag_buf->frag_recv++; - defrag_buf->frag_map[inbuf->khp_data_frag_seq] = 1; + defrag_buf->frag_map[frag_seq] = 1; /* * check if we received all the fragments */ - if (defrag_buf->frag_recv == inbuf->khp_data_frag_num) { + if (defrag_buf->frag_recv == frags) { /* * special case the last pckt */ if (defrag_buf->last_first) { - memmove(defrag_buf->buf + ((inbuf->khp_data_frag_num - 1) * defrag_buf->frag_size), + memmove(defrag_buf->buf + ((frags - 1) * defrag_buf->frag_size), defrag_buf->buf + (KNET_MAX_PACKET_SIZE - defrag_buf->last_frag_size), defrag_buf->last_frag_size); } /* * recalculate packet lenght */ - *len = ((inbuf->khp_data_frag_num - 1) * defrag_buf->frag_size) + defrag_buf->last_frag_size; + *len = ((frags - 1) * defrag_buf->frag_size) + defrag_buf->last_frag_size; /* * copy the pckt back in the user data */ - memmove(inbuf->khp_data_userdata, defrag_buf->buf, *len); + memmove(data, defrag_buf->buf, *len); /* * free this buffer */ defrag_buf->in_use = 0; return 0; } return 1; } -static void _parse_recv_from_links(knet_handle_t knet_h, int sockfd, const struct knet_mmsghdr *msg) +static int _handle_data_stats(knet_handle_t knet_h, struct knet_link *src_link, ssize_t len, uint64_t decrypt_time) +{ + int stats_err; + + /* data stats at the top for consistency with TX */ + src_link->status.stats.rx_data_packets++; + src_link->status.stats.rx_data_bytes += len; + + if (decrypt_time) { + stats_err = pthread_mutex_lock(&knet_h->handle_stats_mutex); + if (stats_err < 0) { + log_err(knet_h, KNET_SUB_RX, "Unable to get mutex lock: %s", strerror(stats_err)); + return -1; + } + /* Only update the crypto overhead for data packets. Mainly to be + consistent with TX */ + if (decrypt_time < knet_h->stats.rx_crypt_time_min) { + knet_h->stats.rx_crypt_time_min = decrypt_time; + } + if (decrypt_time > knet_h->stats.rx_crypt_time_max) { + knet_h->stats.rx_crypt_time_max = decrypt_time; + } + knet_h->stats.rx_crypt_time_ave = + (knet_h->stats.rx_crypt_time_ave * knet_h->stats.rx_crypt_packets + + decrypt_time) / (knet_h->stats.rx_crypt_packets+1); + knet_h->stats.rx_crypt_packets++; + pthread_mutex_unlock(&knet_h->handle_stats_mutex); + } + return 0; +} + +static int _decompress_data(knet_handle_t knet_h, uint8_t decompress_type, unsigned char *data, ssize_t *len, ssize_t header_size) +{ + int err = 0, stats_err = 0; + + if (decompress_type) { + ssize_t decmp_outlen = KNET_DATABUFSIZE_COMPRESS; + struct timespec start_time; + struct timespec end_time; + uint64_t decompress_time; + + clock_gettime(CLOCK_MONOTONIC, &start_time); + err = decompress(knet_h, decompress_type, + data, + *len - header_size, + knet_h->recv_from_links_buf_decompress, + &decmp_outlen); + + clock_gettime(CLOCK_MONOTONIC, &end_time); + timespec_diff(start_time, end_time, &decompress_time); + + stats_err = pthread_mutex_lock(&knet_h->handle_stats_mutex); + if (stats_err < 0) { + log_err(knet_h, KNET_SUB_RX, "Unable to get mutex lock: %s", strerror(stats_err)); + return -1; + } + + if (!err) { + /* Collect stats */ + if (decompress_time < knet_h->stats.rx_compress_time_min) { + knet_h->stats.rx_compress_time_min = decompress_time; + } + if (decompress_time > knet_h->stats.rx_compress_time_max) { + knet_h->stats.rx_compress_time_max = decompress_time; + } + knet_h->stats.rx_compress_time_ave = + (knet_h->stats.rx_compress_time_ave * knet_h->stats.rx_compressed_packets + + decompress_time) / (knet_h->stats.rx_compressed_packets+1); + + knet_h->stats.rx_compressed_packets++; + knet_h->stats.rx_compressed_original_bytes += decmp_outlen; + knet_h->stats.rx_compressed_size_bytes += *len - KNET_HEADER_SIZE; + + memmove(data, knet_h->recv_from_links_buf_decompress, decmp_outlen); + *len = decmp_outlen + header_size; + } else { + knet_h->stats.rx_failed_to_decompress++; + pthread_mutex_unlock(&knet_h->handle_stats_mutex); + log_warn(knet_h, KNET_SUB_COMPRESS, "Unable to decompress packet (%d): %s", + err, strerror(errno)); + return -1; + } + pthread_mutex_unlock(&knet_h->handle_stats_mutex); + } + return 0; +} + +static int _check_destination(knet_handle_t knet_h, struct knet_header *inbuf, unsigned char *data, ssize_t len, ssize_t header_size, int8_t *channel) { - int err = 0, savederrno = 0, stats_err = 0; - ssize_t outlen; - struct knet_host *src_host; - struct knet_link *src_link; - unsigned long long latency_last; knet_node_id_t dst_host_ids[KNET_MAX_HOST]; size_t dst_host_ids_entries = 0; int bcast = 1; - uint64_t decrypt_time = 0; - struct timespec recvtime; - struct knet_header *inbuf = msg->msg_hdr.msg_iov->iov_base; - unsigned char *outbuf = (unsigned char *)msg->msg_hdr.msg_iov->iov_base; - ssize_t len = msg->msg_len; + size_t host_idx; + int found = 0; + + if (knet_h->dst_host_filter_fn) { + bcast = knet_h->dst_host_filter_fn( + knet_h->dst_host_filter_fn_private_data, + data, + len - header_size, + KNET_NOTIFY_RX, + knet_h->host_id, + inbuf->kh_node, + channel, + dst_host_ids, + &dst_host_ids_entries); + if (bcast < 0) { + log_debug(knet_h, KNET_SUB_RX, "Error from dst_host_filter_fn: %d", bcast); + return -1; + } + + if ((!bcast) && (!dst_host_ids_entries)) { + log_debug(knet_h, KNET_SUB_RX, "Message is unicast but no dst_host_ids_entries"); + return -1; + } + + /* check if we are dst for this packet */ + if (!bcast) { + if (dst_host_ids_entries > KNET_MAX_HOST) { + log_debug(knet_h, KNET_SUB_RX, "dst_host_filter_fn returned too many destinations"); + return -1; + } + for (host_idx = 0; host_idx < dst_host_ids_entries; host_idx++) { + if (dst_host_ids[host_idx] == knet_h->host_id) { + found = 1; + break; + } + } + if (!found) { + log_debug(knet_h, KNET_SUB_RX, "Packet is not for us"); + return -1; + } + } + } + return 0; +} + +static int _deliver_data(knet_handle_t knet_h, unsigned char *data, ssize_t len, ssize_t header_size, int8_t channel) +{ struct iovec iov_out[1]; + ssize_t outlen = 0; + + memset(iov_out, 0, sizeof(iov_out)); + +retry: + iov_out[0].iov_base = (void *) data + outlen; + iov_out[0].iov_len = len - (outlen + header_size); + + outlen = writev(knet_h->sockfd[channel].sockfd[knet_h->sockfd[channel].is_created], iov_out, 1); + if ((outlen > 0) && (outlen < (ssize_t)iov_out[0].iov_len)) { + log_debug(knet_h, KNET_SUB_RX, + "Unable to send all data to the application in one go. Expected: %zu Sent: %zd\n", + iov_out[0].iov_len, outlen); + goto retry; + } + + if (outlen <= 0) { + knet_h->sock_notify_fn(knet_h->sock_notify_fn_private_data, + knet_h->sockfd[channel].sockfd[0], + channel, + KNET_NOTIFY_RX, + outlen, + errno); + return -1; + } + + if ((size_t)outlen != iov_out[0].iov_len) { + return -1; + } + + return 0; +} + +static void _process_data(knet_handle_t knet_h, struct knet_host *src_host, struct knet_link *src_link, struct knet_header *inbuf, ssize_t len, uint64_t decrypt_time) +{ int8_t channel; - seq_num_t recv_seq_num; - int wipe_bufs = 0; - int try_decrypt = 0, decrypted = 0, i, found_link = 0; + uint8_t decompress_type = 0; + ssize_t header_size; + seq_num_t seq_num; + uint8_t frags, frag_seq; + unsigned char *data; + + if (_handle_data_stats(knet_h, src_link, len, decrypt_time) < 0) { + return; + } + + /* + * register host is sending data. Required to determine if we need + * to reset circular buffers. (see onwire_v1.c) + */ + src_host->got_data = 1; + + if (knet_h->onwire_ver_remap) { + get_data_header_info_v1(knet_h, inbuf, &header_size, &channel, &seq_num, &decompress_type, &frags, &frag_seq); + data = get_data_v1(knet_h, inbuf); + } else { + switch (inbuf->kh_version) { + case 1: + get_data_header_info_v1(knet_h, inbuf, &header_size, &channel, &seq_num, &decompress_type, &frags, &frag_seq); + data = get_data_v1(knet_h, inbuf); + break; + default: + log_warn(knet_h, KNET_SUB_RX, "processing data onwire version %u not supported", inbuf->kh_version); + return; + break; + } + } + + if (!_seq_num_lookup(src_host, seq_num, 0, 0)) { + if (src_host->link_handler_policy != KNET_LINK_POLICY_ACTIVE) { + log_debug(knet_h, KNET_SUB_RX, "Packet has already been delivered"); + } + return; + } + + if (frags > 1) { + /* + * len as received from the socket also includes extra stuff + * that the defrag code doesn't care about. So strip it + * here and readd only for repadding once we are done + * defragging + * + * the defrag code assumes that data packets have all the same size + * except the last one that might be smaller. + * + */ + len = len - header_size; + if (_pckt_defrag(knet_h, src_host, seq_num, data, &len, frags, frag_seq)) { + return; + } + len = len + header_size; + } + + if (_decompress_data(knet_h, decompress_type, data, &len, header_size) < 0) { + return; + } + + if (!src_host->status.reachable) { + log_debug(knet_h, KNET_SUB_RX, "Source host %u not reachable yet. Discarding packet.", src_host->host_id); + return; + } + + if (knet_h->enabled != 1) /* data forward is disabled */ + return; + + if (_check_destination(knet_h, inbuf, data, len, header_size, &channel) < 0) { + return; + } + + if (!knet_h->sockfd[channel].in_use) { + log_debug(knet_h, KNET_SUB_RX, + "received packet for channel %d but there is no local sock connected", + channel); + return; + } + + if (_deliver_data(knet_h, data, len, header_size, channel) < 0) { + return; + } + + _seq_num_set(src_host, seq_num, 0); +} + +static struct knet_header *_decrypt_packet(knet_handle_t knet_h, struct knet_header *inbuf, ssize_t *len, uint64_t *decrypt_time) +{ + int try_decrypt = 0; + int i = 0; + struct timespec start_time; + struct timespec end_time; + ssize_t outlen; for (i = 1; i <= KNET_MAX_CRYPTO_INSTANCES; i++) { if (knet_h->crypto_instance[i]) { try_decrypt = 1; break; } } if ((!try_decrypt) && (knet_h->crypto_only == KNET_CRYPTO_RX_DISALLOW_CLEAR_TRAFFIC)) { log_debug(knet_h, KNET_SUB_RX, "RX thread configured to accept only crypto packets, but no crypto configs are configured!"); - return; + return NULL; } if (try_decrypt) { - struct timespec start_time; - struct timespec end_time; - clock_gettime(CLOCK_MONOTONIC, &start_time); if (crypto_authenticate_and_decrypt(knet_h, (unsigned char *)inbuf, - len, + *len, knet_h->recv_from_links_buf_decrypt, &outlen) < 0) { log_debug(knet_h, KNET_SUB_RX, "Unable to decrypt/auth packet"); if (knet_h->crypto_only == KNET_CRYPTO_RX_DISALLOW_CLEAR_TRAFFIC) { - return; + return NULL; } log_debug(knet_h, KNET_SUB_RX, "Attempting to process packet as clear data"); } else { clock_gettime(CLOCK_MONOTONIC, &end_time); - timespec_diff(start_time, end_time, &decrypt_time); + timespec_diff(start_time, end_time, decrypt_time); - len = outlen; + *len = outlen; inbuf = (struct knet_header *)knet_h->recv_from_links_buf_decrypt; - decrypted = 1; } } + return inbuf; +} +static int _packet_checks(knet_handle_t knet_h, struct knet_header *inbuf, ssize_t len) +{ if (len < (ssize_t)(KNET_HEADER_SIZE + 1)) { log_debug(knet_h, KNET_SUB_RX, "Packet is too short: %ld", (long)len); - return; + return -1; + } + + /* + * old versions of knet did not advertise max_ver and max_ver is set to 0. + */ + if (!inbuf->kh_max_ver) { + inbuf->kh_max_ver = 1; + } + + /* + * if the node joining max version is lower than the min version + * then we reject the node + */ + if (inbuf->kh_max_ver < knet_h->onwire_min_ver) { + log_warn(knet_h, KNET_SUB_RX, + "Received packet version %u from node %u, lower than currently minimal supported onwire version. Rejecting.", inbuf->kh_version, inbuf->kh_node); + return -1; } - if (inbuf->kh_version != KNET_HEADER_VERSION) { - log_debug(knet_h, KNET_SUB_RX, "Packet version does not match"); + /* + * if the node joining with version higher than our max version + * then we reject the node + */ + if (inbuf->kh_version > knet_h->onwire_max_ver) { + log_warn(knet_h, KNET_SUB_RX, + "Received packet version %u from node %u, higher than currently maximum supported onwire version. Rejecting.", inbuf->kh_version, inbuf->kh_node); + return -1; + } + + /* + * if the node joining with version lower than the current in use version + * then we reject the node + * + * NOTE: should we make this configurable and support downgrades? + */ + if ((!knet_h->onwire_force_ver) && + (inbuf->kh_version < knet_h->onwire_ver) && + (inbuf->kh_max_ver > inbuf->kh_version)) { + log_warn(knet_h, KNET_SUB_RX, + "Received packet version %u from node %u, lower than currently in use onwire version. Rejecting.", inbuf->kh_version, inbuf->kh_node); + return -1; + } + return 0; +} + +static void _handle_dynip(knet_handle_t knet_h, struct knet_host *src_host, struct knet_link *src_link, int sockfd, const struct knet_mmsghdr *msg) +{ + if (src_link->dynamic == KNET_LINK_DYNIP) { + if (cmpaddr(&src_link->dst_addr, msg->msg_hdr.msg_name) != 0) { + log_debug(knet_h, KNET_SUB_RX, "host: %u link: %u appears to have changed ip address", + src_host->host_id, src_link->link_id); + memmove(&src_link->dst_addr, msg->msg_hdr.msg_name, sizeof(struct sockaddr_storage)); + if (knet_addrtostr(&src_link->dst_addr, sockaddr_len(&src_link->dst_addr), + src_link->status.dst_ipaddr, KNET_MAX_HOST_LEN, + src_link->status.dst_port, KNET_MAX_PORT_LEN) != 0) { + log_debug(knet_h, KNET_SUB_RX, "Unable to resolve ???"); + snprintf(src_link->status.dst_ipaddr, KNET_MAX_HOST_LEN - 1, "Unknown!!!"); + snprintf(src_link->status.dst_port, KNET_MAX_PORT_LEN - 1, "??"); + } else { + log_info(knet_h, KNET_SUB_RX, + "host: %u link: %u new connection established from: %s %s", + src_host->host_id, src_link->link_id, + src_link->status.dst_ipaddr, src_link->status.dst_port); + } + } + /* + * transport has already accepted the connection here + * otherwise we would not be receiving packets + */ + transport_link_dyn_connect(knet_h, sockfd, src_link); + } +} + +static void _parse_recv_from_links(knet_handle_t knet_h, int sockfd, const struct knet_mmsghdr *msg) +{ + int savederrno = 0, stats_err = 0; + struct knet_host *src_host; + struct knet_link *src_link; + uint64_t decrypt_time = 0; + struct knet_header *inbuf = msg->msg_hdr.msg_iov->iov_base; + ssize_t len = msg->msg_len; + int i, found_link = 0; + + inbuf = _decrypt_packet(knet_h, inbuf, &len, &decrypt_time); + if (!inbuf) { return; } inbuf->kh_node = ntohs(inbuf->kh_node); + + if (_packet_checks(knet_h, inbuf, len) < 0) { + return; + } + + /* + * determine source host + */ src_host = knet_h->host_index[inbuf->kh_node]; if (src_host == NULL) { /* host not found */ log_debug(knet_h, KNET_SUB_RX, "Unable to find source host for this packet"); return; } - if ((inbuf->kh_type & KNET_HEADER_TYPE_PMSK) != 0) { - /* be aware this works only for PING / PONG and PMTUd packets! */ - src_link = src_host->link + - (inbuf->khp_ping_link % KNET_MAX_LINK); - if (src_link->dynamic == KNET_LINK_DYNIP) { - if (cmpaddr(&src_link->dst_addr, msg->msg_hdr.msg_name) != 0) { - log_debug(knet_h, KNET_SUB_RX, "host: %u link: %u appears to have changed ip address", - src_host->host_id, src_link->link_id); - memmove(&src_link->dst_addr, msg->msg_hdr.msg_name, sizeof(struct sockaddr_storage)); - if (knet_addrtostr(&src_link->dst_addr, sockaddr_len(&src_link->dst_addr), - src_link->status.dst_ipaddr, KNET_MAX_HOST_LEN, - src_link->status.dst_port, KNET_MAX_PORT_LEN) != 0) { - log_debug(knet_h, KNET_SUB_RX, "Unable to resolve ???"); - snprintf(src_link->status.dst_ipaddr, KNET_MAX_HOST_LEN - 1, "Unknown!!!"); - snprintf(src_link->status.dst_port, KNET_MAX_PORT_LEN - 1, "??"); - } else { - log_info(knet_h, KNET_SUB_RX, - "host: %u link: %u new connection established from: %s %s", - src_host->host_id, src_link->link_id, - src_link->status.dst_ipaddr, src_link->status.dst_port); - } + /* + * deteremine source link + */ + if (inbuf->kh_type == KNET_HEADER_TYPE_PING) { + _handle_onwire_version(knet_h, src_host, inbuf); + if (knet_h->onwire_ver_remap) { + src_link = get_link_from_pong_v1(knet_h, src_host, inbuf); + } else { + switch (inbuf->kh_version) { + case 1: + src_link = get_link_from_pong_v1(knet_h, src_host, inbuf); + break; + default: + log_warn(knet_h, KNET_SUB_RX, "Parsing ping onwire version %u not supported", inbuf->kh_version); + return; + break; } - /* - * transport has already accepted the connection here - * otherwise we would not be receiving packets - */ - transport_link_dyn_connect(knet_h, sockfd, src_link); } - } else { /* data packet */ + _handle_dynip(knet_h, src_host, src_link, sockfd, msg); + } else { /* all other packets */ for (i = 0; i < KNET_MAX_LINK; i++) { src_link = &src_host->link[i]; if (cmpaddr(&src_link->dst_addr, msg->msg_hdr.msg_name) == 0) { found_link = 1; break; } } if (!found_link) { log_debug(knet_h, KNET_SUB_RX, "Unable to determine source link for data packet. Discarding packet."); return; } } stats_err = pthread_mutex_lock(&src_link->link_stats_mutex); if (stats_err) { log_err(knet_h, KNET_SUB_RX, "Unable to get stats mutex lock for host %u link %u: %s", src_host->host_id, src_link->link_id, strerror(savederrno)); return; } switch (inbuf->kh_type) { - case KNET_HEADER_TYPE_DATA: - - /* data stats at the top for consistency with TX */ - src_link->status.stats.rx_data_packets++; - src_link->status.stats.rx_data_bytes += len; - - if (decrypted) { - stats_err = pthread_mutex_lock(&knet_h->handle_stats_mutex); - if (stats_err < 0) { - pthread_mutex_unlock(&src_link->link_stats_mutex); - log_err(knet_h, KNET_SUB_RX, "Unable to get mutex lock: %s", strerror(stats_err)); - return; - } - /* Only update the crypto overhead for data packets. Mainly to be - consistent with TX */ - if (decrypt_time < knet_h->stats.rx_crypt_time_min) { - knet_h->stats.rx_crypt_time_min = decrypt_time; - } - if (decrypt_time > knet_h->stats.rx_crypt_time_max) { - knet_h->stats.rx_crypt_time_max = decrypt_time; - } - knet_h->stats.rx_crypt_time_ave = - (knet_h->stats.rx_crypt_time_ave * knet_h->stats.rx_crypt_packets + - decrypt_time) / (knet_h->stats.rx_crypt_packets+1); - knet_h->stats.rx_crypt_packets++; - pthread_mutex_unlock(&knet_h->handle_stats_mutex); - } - - if (!src_host->status.reachable) { - pthread_mutex_unlock(&src_link->link_stats_mutex); - log_debug(knet_h, KNET_SUB_RX, "Source host %u not reachable yet. Discarding packet.", src_host->host_id); - return; - } - - inbuf->khp_data_seq_num = ntohs(inbuf->khp_data_seq_num); - channel = inbuf->khp_data_channel; - src_host->got_data = 1; - - if (!_seq_num_lookup(src_host, inbuf->khp_data_seq_num, 0, 0)) { + case KNET_HEADER_TYPE_DATA: + _process_data(knet_h, src_host, src_link, inbuf, len, decrypt_time); + break; + case KNET_HEADER_TYPE_PING: + process_ping(knet_h, src_host, src_link, inbuf, len); + break; + case KNET_HEADER_TYPE_PONG: + process_pong(knet_h, src_host, src_link, inbuf, len); + break; + case KNET_HEADER_TYPE_PMTUD: + src_link->status.stats.rx_pmtu_packets++; + src_link->status.stats.rx_pmtu_bytes += len; + /* Unlock so we don't deadlock with tx_mutex */ pthread_mutex_unlock(&src_link->link_stats_mutex); - if (src_host->link_handler_policy != KNET_LINK_POLICY_ACTIVE) { - log_debug(knet_h, KNET_SUB_RX, "Packet has already been delivered"); - } - return; - } - - if (inbuf->khp_data_frag_num > 1) { - /* - * len as received from the socket also includes extra stuff - * that the defrag code doesn't care about. So strip it - * here and readd only for repadding once we are done - * defragging - */ - len = len - KNET_HEADER_DATA_SIZE; - if (pckt_defrag(knet_h, inbuf, &len)) { - pthread_mutex_unlock(&src_link->link_stats_mutex); - return; - } - len = len + KNET_HEADER_DATA_SIZE; - } - - if (inbuf->khp_data_compress) { - ssize_t decmp_outlen = KNET_DATABUFSIZE_COMPRESS; - struct timespec start_time; - struct timespec end_time; - uint64_t compress_time; - - clock_gettime(CLOCK_MONOTONIC, &start_time); - err = decompress(knet_h, inbuf->khp_data_compress, - (const unsigned char *)inbuf->khp_data_userdata, - len - KNET_HEADER_DATA_SIZE, - knet_h->recv_from_links_buf_decompress, - &decmp_outlen); - - stats_err = pthread_mutex_lock(&knet_h->handle_stats_mutex); - if (stats_err < 0) { - pthread_mutex_unlock(&src_link->link_stats_mutex); - log_err(knet_h, KNET_SUB_RX, "Unable to get mutex lock: %s", strerror(stats_err)); - return; - } - - clock_gettime(CLOCK_MONOTONIC, &end_time); - timespec_diff(start_time, end_time, &compress_time); - - if (!err) { - /* Collect stats */ - if (compress_time < knet_h->stats.rx_compress_time_min) { - knet_h->stats.rx_compress_time_min = compress_time; - } - if (compress_time > knet_h->stats.rx_compress_time_max) { - knet_h->stats.rx_compress_time_max = compress_time; - } - knet_h->stats.rx_compress_time_ave = - (knet_h->stats.rx_compress_time_ave * knet_h->stats.rx_compressed_packets + - compress_time) / (knet_h->stats.rx_compressed_packets+1); - - knet_h->stats.rx_compressed_packets++; - knet_h->stats.rx_compressed_original_bytes += decmp_outlen; - knet_h->stats.rx_compressed_size_bytes += len - KNET_HEADER_SIZE; - - memmove(inbuf->khp_data_userdata, knet_h->recv_from_links_buf_decompress, decmp_outlen); - len = decmp_outlen + KNET_HEADER_DATA_SIZE; - } else { - knet_h->stats.rx_failed_to_decompress++; - pthread_mutex_unlock(&knet_h->handle_stats_mutex); - pthread_mutex_unlock(&src_link->link_stats_mutex); - log_warn(knet_h, KNET_SUB_COMPRESS, "Unable to decompress packet (%d): %s", - err, strerror(errno)); - return; - } - pthread_mutex_unlock(&knet_h->handle_stats_mutex); - } - - if (knet_h->enabled != 1) /* data forward is disabled */ + process_pmtud(knet_h, src_link, inbuf); + return; /* Don't need to unlock link_stats_mutex */ break; - - if (knet_h->dst_host_filter_fn) { - size_t host_idx; - int found = 0; - - bcast = knet_h->dst_host_filter_fn( - knet_h->dst_host_filter_fn_private_data, - (const unsigned char *)inbuf->khp_data_userdata, - len - KNET_HEADER_DATA_SIZE, - KNET_NOTIFY_RX, - knet_h->host_id, - inbuf->kh_node, - &channel, - dst_host_ids, - &dst_host_ids_entries); - if (bcast < 0) { - pthread_mutex_unlock(&src_link->link_stats_mutex); - log_debug(knet_h, KNET_SUB_RX, "Error from dst_host_filter_fn: %d", bcast); - return; - } - - if ((!bcast) && (!dst_host_ids_entries)) { - pthread_mutex_unlock(&src_link->link_stats_mutex); - log_debug(knet_h, KNET_SUB_RX, "Message is unicast but no dst_host_ids_entries"); - return; - } - - /* check if we are dst for this packet */ - if (!bcast) { - if (dst_host_ids_entries > KNET_MAX_HOST) { - pthread_mutex_unlock(&src_link->link_stats_mutex); - log_debug(knet_h, KNET_SUB_RX, "dst_host_filter_fn returned too many destinations"); - return; - } - for (host_idx = 0; host_idx < dst_host_ids_entries; host_idx++) { - if (dst_host_ids[host_idx] == knet_h->host_id) { - found = 1; - break; - } - } - if (!found) { - pthread_mutex_unlock(&src_link->link_stats_mutex); - log_debug(knet_h, KNET_SUB_RX, "Packet is not for us"); - return; - } - } - } - - if (!knet_h->sockfd[channel].in_use) { + case KNET_HEADER_TYPE_PMTUD_REPLY: + src_link->status.stats.rx_pmtu_packets++; + src_link->status.stats.rx_pmtu_bytes += len; + /* pmtud_mutex can't be acquired while we hold a link_stats_mutex (ordering) */ pthread_mutex_unlock(&src_link->link_stats_mutex); - log_debug(knet_h, KNET_SUB_RX, - "received packet for channel %d but there is no local sock connected", - channel); + process_pmtud_reply(knet_h, src_link, inbuf); return; - } - - outlen = 0; - memset(iov_out, 0, sizeof(iov_out)); - -retry: - iov_out[0].iov_base = (void *) inbuf->khp_data_userdata + outlen; - iov_out[0].iov_len = len - (outlen + KNET_HEADER_DATA_SIZE); - - outlen = writev(knet_h->sockfd[channel].sockfd[knet_h->sockfd[channel].is_created], iov_out, 1); - if ((outlen > 0) && (outlen < (ssize_t)iov_out[0].iov_len)) { - log_debug(knet_h, KNET_SUB_RX, - "Unable to send all data to the application in one go. Expected: %zu Sent: %zd\n", - iov_out[0].iov_len, outlen); - goto retry; - } - - if (outlen <= 0) { - knet_h->sock_notify_fn(knet_h->sock_notify_fn_private_data, - knet_h->sockfd[channel].sockfd[0], - channel, - KNET_NOTIFY_RX, - outlen, - errno); + break; + default: pthread_mutex_unlock(&src_link->link_stats_mutex); return; - } - if ((size_t)outlen == iov_out[0].iov_len) { - _seq_num_set(src_host, inbuf->khp_data_seq_num, 0); - } - break; - case KNET_HEADER_TYPE_PING: - outlen = KNET_HEADER_PING_SIZE; - inbuf->kh_type = KNET_HEADER_TYPE_PONG; - inbuf->kh_node = htons(knet_h->host_id); - recv_seq_num = ntohs(inbuf->khp_ping_seq_num); - src_link->status.stats.rx_ping_packets++; - src_link->status.stats.rx_ping_bytes += len; - - wipe_bufs = 0; - - if (!inbuf->khp_ping_timed) { - /* - * we might be receiving this message from all links, but we want - * to process it only the first time - */ - if (recv_seq_num != src_host->untimed_rx_seq_num) { - /* - * cache the untimed seq num - */ - src_host->untimed_rx_seq_num = recv_seq_num; - /* - * if the host has received data in between - * untimed ping, then we don't need to wipe the bufs - */ - if (src_host->got_data) { - src_host->got_data = 0; - wipe_bufs = 0; - } else { - wipe_bufs = 1; - } - } - _seq_num_lookup(src_host, recv_seq_num, 0, wipe_bufs); - } else { - /* - * pings always arrives in bursts over all the link - * catch the first of them to cache the seq num and - * avoid duplicate processing - */ - if (recv_seq_num != src_host->timed_rx_seq_num) { - src_host->timed_rx_seq_num = recv_seq_num; - - if (recv_seq_num == 0) { - _seq_num_lookup(src_host, recv_seq_num, 0, 1); - } - } - } - - if (knet_h->crypto_in_use_config) { - if (crypto_encrypt_and_sign(knet_h, - (const unsigned char *)inbuf, - outlen, - knet_h->recv_from_links_buf_crypt, - &outlen) < 0) { - log_debug(knet_h, KNET_SUB_RX, "Unable to encrypt pong packet"); - break; - } - outbuf = knet_h->recv_from_links_buf_crypt; - stats_err = pthread_mutex_lock(&knet_h->handle_stats_mutex); - if (stats_err < 0) { - log_err(knet_h, KNET_SUB_RX, "Unable to get mutex lock: %s", strerror(stats_err)); - break; - } - knet_h->stats_extra.tx_crypt_pong_packets++; - pthread_mutex_unlock(&knet_h->handle_stats_mutex); - } - -retry_pong: - if (src_link->transport_connected) { - if (transport_get_connection_oriented(knet_h, src_link->transport) == TRANSPORT_PROTO_NOT_CONNECTION_ORIENTED) { - len = sendto(src_link->outsock, outbuf, outlen, MSG_DONTWAIT | MSG_NOSIGNAL, - (struct sockaddr *) &src_link->dst_addr, sizeof(struct sockaddr_storage)); - } else { - len = sendto(src_link->outsock, outbuf, outlen, MSG_DONTWAIT | MSG_NOSIGNAL, NULL, 0); - } - savederrno = errno; - if (len != outlen) { - err = transport_tx_sock_error(knet_h, src_link->transport, src_link->outsock, len, savederrno); - switch(err) { - case -1: /* unrecoverable error */ - log_debug(knet_h, KNET_SUB_RX, - "Unable to send pong reply (sock: %d) packet (sendto): %d %s. recorded src ip: %s src port: %s dst ip: %s dst port: %s", - src_link->outsock, errno, strerror(errno), - src_link->status.src_ipaddr, src_link->status.src_port, - src_link->status.dst_ipaddr, src_link->status.dst_port); - src_link->status.stats.tx_pong_errors++; - break; - case 0: /* ignore error and continue */ - break; - case 1: /* retry to send those same data */ - src_link->status.stats.tx_pong_retries++; - goto retry_pong; - break; - } - } - src_link->status.stats.tx_pong_packets++; - src_link->status.stats.tx_pong_bytes += outlen; - } - break; - case KNET_HEADER_TYPE_PONG: - src_link->status.stats.rx_pong_packets++; - src_link->status.stats.rx_pong_bytes += len; - clock_gettime(CLOCK_MONOTONIC, &src_link->status.pong_last); - - memmove(&recvtime, &inbuf->khp_ping_time[0], sizeof(struct timespec)); - timespec_diff(recvtime, - src_link->status.pong_last, &latency_last); - - if ((latency_last / 1000llu) > src_link->pong_timeout) { - log_debug(knet_h, KNET_SUB_RX, - "Incoming pong packet from host: %u link: %u has higher latency than pong_timeout. Discarding", - src_host->host_id, src_link->link_id); - } else { - - /* - * in words : ('previous mean' * '(count -1)') + 'new value') / 'count' - */ - - src_link->status.stats.latency_samples++; - - /* - * limit to max_samples (precision) - */ - if (src_link->status.stats.latency_samples >= src_link->latency_max_samples) { - src_link->status.stats.latency_samples = src_link->latency_max_samples; - } - src_link->status.stats.latency_ave = - (((src_link->status.stats.latency_ave * (src_link->status.stats.latency_samples - 1)) + (latency_last / 1000llu)) / src_link->status.stats.latency_samples); - - if (src_link->status.stats.latency_ave < src_link->pong_timeout_adj) { - if (!src_link->status.connected) { - if (src_link->received_pong >= src_link->pong_count) { - log_info(knet_h, KNET_SUB_RX, "host: %u link: %u is up", - src_host->host_id, src_link->link_id); - _link_updown(knet_h, src_host->host_id, src_link->link_id, src_link->status.enabled, 1, 0); - } else { - src_link->received_pong++; - log_debug(knet_h, KNET_SUB_RX, "host: %u link: %u received pong: %u", - src_host->host_id, src_link->link_id, src_link->received_pong); - } - } - } - /* Calculate latency stats */ - if (src_link->status.stats.latency_ave > src_link->status.stats.latency_max) { - src_link->status.stats.latency_max = src_link->status.stats.latency_ave; - } - if (src_link->status.stats.latency_ave < src_link->status.stats.latency_min) { - src_link->status.stats.latency_min = src_link->status.stats.latency_ave; - } - } - break; - case KNET_HEADER_TYPE_PMTUD: - src_link->status.stats.rx_pmtu_packets++; - src_link->status.stats.rx_pmtu_bytes += len; - outlen = KNET_HEADER_PMTUD_SIZE; - inbuf->kh_type = KNET_HEADER_TYPE_PMTUD_REPLY; - inbuf->kh_node = htons(knet_h->host_id); - - if (knet_h->crypto_in_use_config) { - if (crypto_encrypt_and_sign(knet_h, - (const unsigned char *)inbuf, - outlen, - knet_h->recv_from_links_buf_crypt, - &outlen) < 0) { - log_debug(knet_h, KNET_SUB_RX, "Unable to encrypt PMTUd reply packet"); - break; - } - outbuf = knet_h->recv_from_links_buf_crypt; - stats_err = pthread_mutex_lock(&knet_h->handle_stats_mutex); - if (stats_err < 0) { - log_err(knet_h, KNET_SUB_RX, "Unable to get mutex lock: %s", strerror(stats_err)); - break; - } - knet_h->stats_extra.tx_crypt_pmtu_reply_packets++; - pthread_mutex_unlock(&knet_h->handle_stats_mutex); - } - - /* Unlock so we don't deadlock with tx_mutex */ - pthread_mutex_unlock(&src_link->link_stats_mutex); - - savederrno = pthread_mutex_lock(&knet_h->tx_mutex); - if (savederrno) { - log_err(knet_h, KNET_SUB_RX, "Unable to get TX mutex lock: %s", strerror(savederrno)); - goto out_pmtud; - } -retry_pmtud: - if (src_link->transport_connected) { - if (transport_get_connection_oriented(knet_h, src_link->transport) == TRANSPORT_PROTO_NOT_CONNECTION_ORIENTED) { - len = sendto(src_link->outsock, outbuf, outlen, MSG_DONTWAIT | MSG_NOSIGNAL, - (struct sockaddr *) &src_link->dst_addr, sizeof(struct sockaddr_storage)); - } else { - len = sendto(src_link->outsock, outbuf, outlen, MSG_DONTWAIT | MSG_NOSIGNAL, NULL, 0); - } - savederrno = errno; - if (len != outlen) { - err = transport_tx_sock_error(knet_h, src_link->transport, src_link->outsock, len, savederrno); - stats_err = pthread_mutex_lock(&src_link->link_stats_mutex); - if (stats_err < 0) { - log_err(knet_h, KNET_SUB_RX, "Unable to get mutex lock: %s", strerror(stats_err)); - break; - } - switch(err) { - case -1: /* unrecoverable error */ - log_debug(knet_h, KNET_SUB_RX, - "Unable to send PMTUd reply (sock: %d) packet (sendto): %d %s. recorded src ip: %s src port: %s dst ip: %s dst port: %s", - src_link->outsock, errno, strerror(errno), - src_link->status.src_ipaddr, src_link->status.src_port, - src_link->status.dst_ipaddr, src_link->status.dst_port); - - src_link->status.stats.tx_pmtu_errors++; - break; - case 0: /* ignore error and continue */ - src_link->status.stats.tx_pmtu_errors++; - break; - case 1: /* retry to send those same data */ - src_link->status.stats.tx_pmtu_retries++; - pthread_mutex_unlock(&src_link->link_stats_mutex); - goto retry_pmtud; - break; - } - pthread_mutex_unlock(&src_link->link_stats_mutex); - } - } - pthread_mutex_unlock(&knet_h->tx_mutex); -out_pmtud: - return; /* Don't need to unlock link_stats_mutex */ - case KNET_HEADER_TYPE_PMTUD_REPLY: - src_link->status.stats.rx_pmtu_packets++; - src_link->status.stats.rx_pmtu_bytes += len; - - /* pmtud_mutex can't be acquired while we hold a link_stats_mutex (ordering) */ - pthread_mutex_unlock(&src_link->link_stats_mutex); - - if (pthread_mutex_lock(&knet_h->pmtud_mutex) != 0) { - log_debug(knet_h, KNET_SUB_RX, "Unable to get mutex lock"); break; - } - src_link->last_recv_mtu = inbuf->khp_pmtud_size; - pthread_cond_signal(&knet_h->pmtud_cond); - pthread_mutex_unlock(&knet_h->pmtud_mutex); - return; - default: - pthread_mutex_unlock(&src_link->link_stats_mutex); - return; } pthread_mutex_unlock(&src_link->link_stats_mutex); } static void _handle_recv_from_links(knet_handle_t knet_h, int sockfd, struct knet_mmsghdr *msg) { int err, savederrno; int i, msg_recv, transport; if (pthread_rwlock_rdlock(&knet_h->global_rwlock) != 0) { log_debug(knet_h, KNET_SUB_RX, "Unable to get global read lock"); return; } if (_is_valid_fd(knet_h, sockfd) < 1) { /* * this is normal if a fd got an event and before we grab the read lock * and the link is removed by another thread */ goto exit_unlock; } transport = knet_h->knet_transport_fd_tracker[sockfd].transport; /* * reset msg_namelen to buffer size because after recvmmsg * each msg_namelen will contain sizeof sockaddr_in or sockaddr_in6 */ for (i = 0; i < PCKT_RX_BUFS; i++) { msg[i].msg_hdr.msg_namelen = sizeof(struct sockaddr_storage); } msg_recv = _recvmmsg(sockfd, &msg[0], PCKT_RX_BUFS, MSG_DONTWAIT | MSG_NOSIGNAL); savederrno = errno; /* * WARNING: man page for recvmmsg is wrong. Kernel implementation here: * recvmmsg can return: * -1 on error * 0 if the previous run of recvmmsg recorded an error on the socket * N number of messages (see exception below). * * If there is an error from recvmsg after receiving a frame or more, the recvmmsg * loop is interrupted, error recorded in the socket (getsockopt(SO_ERROR) and * it will be visibile in the next run. * * Need to be careful how we handle errors at this stage. * * error messages need to be handled on a per transport/protocol base * at this point we have different layers of error handling * - msg_recv < 0 -> error from this run * msg_recv = 0 -> error from previous run and error on socket needs to be cleared * - per-transport message data * example: msg[i].msg_hdr.msg_flags & MSG_NOTIFICATION or msg_len for SCTP == EOF, * but for UDP it is perfectly legal to receive a 0 bytes message.. go figure * - NOTE: on SCTP MSG_NOTIFICATION we get msg_recv == PCKT_FRAG_MAX messages and no * errno set. That means the error api needs to be able to abort the loop below. */ if (msg_recv <= 0) { transport_rx_sock_error(knet_h, transport, sockfd, msg_recv, savederrno); goto exit_unlock; } for (i = 0; i < msg_recv; i++) { err = transport_rx_is_data(knet_h, transport, sockfd, &msg[i]); /* * TODO: make this section silent once we are confident * all protocols packet handlers are good */ switch(err) { case KNET_TRANSPORT_RX_ERROR: /* on error */ log_debug(knet_h, KNET_SUB_RX, "Transport reported error parsing packet"); goto exit_unlock; break; case KNET_TRANSPORT_RX_NOT_DATA_CONTINUE: /* packet is not data and we should continue the packet process loop */ log_debug(knet_h, KNET_SUB_RX, "Transport reported no data, continue"); break; case KNET_TRANSPORT_RX_NOT_DATA_STOP: /* packet is not data and we should STOP the packet process loop */ log_debug(knet_h, KNET_SUB_RX, "Transport reported no data, stop"); goto exit_unlock; break; case KNET_TRANSPORT_RX_IS_DATA: /* packet is data and should be parsed as such */ /* * processing incoming packets vs access lists */ if ((knet_h->use_access_lists) && (transport_get_acl_type(knet_h, transport) == USE_GENERIC_ACL)) { if (!check_validate(knet_h, sockfd, transport, msg[i].msg_hdr.msg_name)) { char src_ipaddr[KNET_MAX_HOST_LEN]; char src_port[KNET_MAX_PORT_LEN]; memset(src_ipaddr, 0, KNET_MAX_HOST_LEN); memset(src_port, 0, KNET_MAX_PORT_LEN); if (knet_addrtostr(msg[i].msg_hdr.msg_name, sockaddr_len(msg[i].msg_hdr.msg_name), src_ipaddr, KNET_MAX_HOST_LEN, src_port, KNET_MAX_PORT_LEN) < 0) { log_debug(knet_h, KNET_SUB_RX, "Packet rejected: unable to resolve host/port"); } else { log_debug(knet_h, KNET_SUB_RX, "Packet rejected from %s/%s", src_ipaddr, src_port); } /* * continue processing the other packets */ continue; } } _parse_recv_from_links(knet_h, sockfd, &msg[i]); break; case KNET_TRANSPORT_RX_OOB_DATA_CONTINUE: log_debug(knet_h, KNET_SUB_RX, "Transport is processing sock OOB data, continue"); break; case KNET_TRANSPORT_RX_OOB_DATA_STOP: log_debug(knet_h, KNET_SUB_RX, "Transport has completed processing sock OOB data, stop"); goto exit_unlock; break; } } exit_unlock: pthread_rwlock_unlock(&knet_h->global_rwlock); } void *_handle_recv_from_links_thread(void *data) { int i, nev; knet_handle_t knet_h = (knet_handle_t) data; struct epoll_event events[KNET_EPOLL_MAX_EVENTS]; struct sockaddr_storage address[PCKT_RX_BUFS]; struct knet_mmsghdr msg[PCKT_RX_BUFS]; struct iovec iov_in[PCKT_RX_BUFS]; set_thread_status(knet_h, KNET_THREAD_RX, KNET_THREAD_STARTED); memset(&msg, 0, sizeof(msg)); memset(&events, 0, sizeof(events)); for (i = 0; i < PCKT_RX_BUFS; i++) { iov_in[i].iov_base = (void *)knet_h->recv_from_links_buf[i]; iov_in[i].iov_len = KNET_DATABUFSIZE; memset(&msg[i].msg_hdr, 0, sizeof(struct msghdr)); msg[i].msg_hdr.msg_name = &address[i]; msg[i].msg_hdr.msg_namelen = sizeof(struct sockaddr_storage); msg[i].msg_hdr.msg_iov = &iov_in[i]; msg[i].msg_hdr.msg_iovlen = 1; } while (!shutdown_in_progress(knet_h)) { nev = epoll_wait(knet_h->recv_from_links_epollfd, events, KNET_EPOLL_MAX_EVENTS, knet_h->threads_timer_res / 1000); /* * the RX threads only need to notify that there has been at least * one successful run after queue flush has been requested. * See setfwd in handle.c */ if (get_thread_flush_queue(knet_h, KNET_THREAD_RX) == KNET_THREAD_QUEUE_FLUSH) { set_thread_flush_queue(knet_h, KNET_THREAD_RX, KNET_THREAD_QUEUE_FLUSHED); } /* * we use timeout to detect if thread is shutting down */ if (nev == 0) { continue; } for (i = 0; i < nev; i++) { _handle_recv_from_links(knet_h, events[i].data.fd, msg); } } set_thread_status(knet_h, KNET_THREAD_RX, KNET_THREAD_STOPPED); return NULL; } ssize_t knet_recv(knet_handle_t knet_h, char *buff, const size_t buff_len, const int8_t channel) { int savederrno = 0; ssize_t err = 0; struct iovec iov_in; if (!knet_h) { errno = EINVAL; return -1; } if (buff == NULL) { errno = EINVAL; return -1; } if (buff_len <= 0) { errno = EINVAL; return -1; } if (buff_len > KNET_MAX_PACKET_SIZE) { errno = EINVAL; return -1; } if (channel < 0) { errno = EINVAL; return -1; } if (channel >= KNET_DATAFD_MAX) { errno = EINVAL; return -1; } savederrno = pthread_rwlock_rdlock(&knet_h->global_rwlock); if (savederrno) { log_err(knet_h, KNET_SUB_HANDLE, "Unable to get read lock: %s", strerror(savederrno)); errno = savederrno; return -1; } if (!knet_h->sockfd[channel].in_use) { savederrno = EINVAL; err = -1; goto out_unlock; } memset(&iov_in, 0, sizeof(iov_in)); iov_in.iov_base = (void *)buff; iov_in.iov_len = buff_len; err = readv(knet_h->sockfd[channel].sockfd[0], &iov_in, 1); savederrno = errno; out_unlock: pthread_rwlock_unlock(&knet_h->global_rwlock); errno = err ? savederrno : 0; return err; } diff --git a/libknet/threads_tx.c b/libknet/threads_tx.c index 922082b7..74c5ea43 100644 --- a/libknet/threads_tx.c +++ b/libknet/threads_tx.c @@ -1,883 +1,986 @@ /* * Copyright (C) 2012-2020 Red Hat, Inc. All rights reserved. * * Authors: Fabio M. Di Nitto * Federico Simoncelli * * This software licensed under LGPL-2.0+ */ #include "config.h" -#include #include #include #include #include #include #include "compat.h" #include "compress.h" #include "crypto.h" #include "host.h" #include "link.h" #include "logging.h" #include "transports.h" #include "transport_common.h" #include "threads_common.h" #include "threads_heartbeat.h" #include "threads_tx.h" #include "netutils.h" +#include "onwire_v1.h" /* * SEND */ static int _dispatch_to_links(knet_handle_t knet_h, struct knet_host *dst_host, struct knet_mmsghdr *msg, int msgs_to_send) { int link_idx, msg_idx, sent_msgs, prev_sent, progress; int err = 0, savederrno = 0, locked = 0; unsigned int i; struct knet_mmsghdr *cur; struct knet_link *cur_link; for (link_idx = 0; link_idx < dst_host->active_link_entries; link_idx++) { prev_sent = 0; progress = 1; locked = 0; cur_link = &dst_host->link[dst_host->active_links[link_idx]]; if (cur_link->transport == KNET_TRANSPORT_LOOPBACK) { continue; } savederrno = pthread_mutex_lock(&cur_link->link_stats_mutex); if (savederrno) { log_err(knet_h, KNET_SUB_TX, "Unable to get stats mutex lock for host %u link %u: %s", dst_host->host_id, cur_link->link_id, strerror(savederrno)); continue; } locked = 1; msg_idx = 0; while (msg_idx < msgs_to_send) { msg[msg_idx].msg_hdr.msg_name = &cur_link->dst_addr; /* Cast for Linux/BSD compatibility */ for (i=0; i<(unsigned int)msg[msg_idx].msg_hdr.msg_iovlen; i++) { cur_link->status.stats.tx_data_bytes += msg[msg_idx].msg_hdr.msg_iov[i].iov_len; } cur_link->status.stats.tx_data_packets++; msg_idx++; } retry: cur = &msg[prev_sent]; sent_msgs = _sendmmsg(dst_host->link[dst_host->active_links[link_idx]].outsock, transport_get_connection_oriented(knet_h, dst_host->link[dst_host->active_links[link_idx]].transport), &cur[0], msgs_to_send - prev_sent, MSG_DONTWAIT | MSG_NOSIGNAL); savederrno = errno; err = transport_tx_sock_error(knet_h, dst_host->link[dst_host->active_links[link_idx]].transport, dst_host->link[dst_host->active_links[link_idx]].outsock, sent_msgs, savederrno); switch(err) { case -1: /* unrecoverable error */ cur_link->status.stats.tx_data_errors++; goto out_unlock; break; case 0: /* ignore error and continue */ break; case 1: /* retry to send those same data */ cur_link->status.stats.tx_data_retries++; goto retry; break; } prev_sent = prev_sent + sent_msgs; if ((sent_msgs >= 0) && (prev_sent < msgs_to_send)) { if ((sent_msgs) || (progress)) { if (sent_msgs) { progress = 1; } else { progress = 0; } #ifdef DEBUG log_debug(knet_h, KNET_SUB_TX, "Unable to send all (%d/%d) data packets to host %s (%u) link %s:%s (%u)", sent_msgs, msg_idx, dst_host->name, dst_host->host_id, dst_host->link[dst_host->active_links[link_idx]].status.dst_ipaddr, dst_host->link[dst_host->active_links[link_idx]].status.dst_port, dst_host->link[dst_host->active_links[link_idx]].link_id); #endif goto retry; } if (!progress) { savederrno = EAGAIN; err = -1; goto out_unlock; } } if ((dst_host->link_handler_policy == KNET_LINK_POLICY_RR) && (dst_host->active_link_entries > 1)) { uint8_t cur_link_id = dst_host->active_links[0]; memmove(&dst_host->active_links[0], &dst_host->active_links[1], KNET_MAX_LINK - 1); dst_host->active_links[dst_host->active_link_entries - 1] = cur_link_id; break; } pthread_mutex_unlock(&cur_link->link_stats_mutex); locked = 0; } out_unlock: if (locked) { pthread_mutex_unlock(&cur_link->link_stats_mutex); } errno = savederrno; return err; } -static int _parse_recv_from_sock(knet_handle_t knet_h, size_t inlen, int8_t channel, int is_sync) +static int _dispatch_to_local(knet_handle_t knet_h, unsigned char *data, size_t inlen, int8_t channel) { - size_t outlen, frag_len; - struct knet_host *dst_host; - knet_node_id_t dst_host_ids_temp[KNET_MAX_HOST]; - size_t dst_host_ids_entries_temp = 0; - knet_node_id_t dst_host_ids[KNET_MAX_HOST]; - size_t dst_host_ids_entries = 0; - int bcast = 1; - struct iovec iov_out[PCKT_FRAG_MAX][2]; - int iovcnt_out = 2; - uint8_t frag_idx; - unsigned int temp_data_mtu; - size_t host_idx; - int send_mcast = 0; - struct knet_header *inbuf; - int savederrno = 0; - int err = 0; - seq_num_t tx_seq_num; - struct knet_mmsghdr msg[PCKT_FRAG_MAX]; - int msgs_to_send, msg_idx; - unsigned int i; - int j; - int send_local = 0; - int data_compressed = 0; - size_t uncrypted_frag_size; - int stats_locked = 0, stats_err = 0; + int err = 0, savederrno = 0; + const unsigned char *buf = data; + ssize_t buflen = inlen; + struct knet_link *local_link = knet_h->host_index[knet_h->host_id]->link; - inbuf = knet_h->recv_from_sock_buf; - - if (knet_h->enabled != 1) { - log_debug(knet_h, KNET_SUB_TX, "Received data packet but forwarding is disabled"); - savederrno = ECANCELED; - err = -1; - goto out_unlock; +local_retry: + err = write(knet_h->sockfd[channel].sockfd[knet_h->sockfd[channel].is_created], buf, buflen); + savederrno = errno; + if (err < 0) { + log_err(knet_h, KNET_SUB_TRANSP_LOOPBACK, "send local failed. error=%s\n", strerror(errno)); + local_link->status.stats.tx_data_errors++; + goto out; } - - /* - * move this into a separate function to expand on - * extra switching rules - */ - switch(inbuf->kh_type) { - case KNET_HEADER_TYPE_DATA: - if (knet_h->dst_host_filter_fn) { - bcast = knet_h->dst_host_filter_fn( - knet_h->dst_host_filter_fn_private_data, - (const unsigned char *)inbuf->khp_data_userdata, - inlen, - KNET_NOTIFY_TX, - knet_h->host_id, - knet_h->host_id, - &channel, - dst_host_ids_temp, - &dst_host_ids_entries_temp); - if (bcast < 0) { - log_debug(knet_h, KNET_SUB_TX, "Error from dst_host_filter_fn: %d", bcast); - savederrno = EFAULT; - err = -1; - goto out_unlock; - } - - if ((!bcast) && (!dst_host_ids_entries_temp)) { - log_debug(knet_h, KNET_SUB_TX, "Message is unicast but no dst_host_ids_entries"); - savederrno = EINVAL; - err = -1; - goto out_unlock; - } - - if ((!bcast) && - (dst_host_ids_entries_temp > KNET_MAX_HOST)) { - log_debug(knet_h, KNET_SUB_TX, "dst_host_filter_fn returned too many destinations"); - savederrno = EINVAL; - err = -1; - goto out_unlock; - } - } - - /* Send to localhost if appropriate and enabled */ - if (knet_h->has_loop_link) { - send_local = 0; - if (bcast) { - send_local = 1; - } else { - for (i=0; i< dst_host_ids_entries_temp; i++) { - if (dst_host_ids_temp[i] == knet_h->host_id) { - send_local = 1; - } - } - } - if (send_local) { - const unsigned char *buf = inbuf->khp_data_userdata; - ssize_t buflen = inlen; - struct knet_link *local_link; - - local_link = knet_h->host_index[knet_h->host_id]->link; - - local_retry: - err = write(knet_h->sockfd[channel].sockfd[knet_h->sockfd[channel].is_created], buf, buflen); - if (err < 0) { - log_err(knet_h, KNET_SUB_TRANSP_LOOPBACK, "send local failed. error=%s\n", strerror(errno)); - local_link->status.stats.tx_data_errors++; - } - if (err > 0 && err < buflen) { - log_debug(knet_h, KNET_SUB_TRANSP_LOOPBACK, "send local incomplete=%d bytes of %zu\n", err, inlen); - local_link->status.stats.tx_data_retries++; - buf += err; - buflen -= err; - goto local_retry; - } - if (err == buflen) { - local_link->status.stats.tx_data_packets++; - local_link->status.stats.tx_data_bytes += inlen; - } - } - } - break; - default: - log_warn(knet_h, KNET_SUB_TX, "Receiving unknown messages from socket"); - savederrno = ENOMSG; - err = -1; - goto out_unlock; - break; + if (err > 0 && err < buflen) { + log_debug(knet_h, KNET_SUB_TRANSP_LOOPBACK, "send local incomplete=%d bytes of %zu\n", err, inlen); + local_link->status.stats.tx_data_retries++; + buf += err; + buflen -= err; + goto local_retry; } - - if (is_sync) { - if ((bcast) || - ((!bcast) && (dst_host_ids_entries_temp > 1))) { - log_debug(knet_h, KNET_SUB_TX, "knet_send_sync is only supported with unicast packets for one destination"); - savederrno = E2BIG; - err = -1; - goto out_unlock; - } + if (err == buflen) { + local_link->status.stats.tx_data_packets++; + local_link->status.stats.tx_data_bytes += inlen; } +out: + errno = savederrno; + return err; +} - /* - * check destinations hosts before spending time - * in fragmenting/encrypting packets to save - * time processing data for unreachable hosts. - * for unicast, also remap the destination data - * to skip unreachable hosts. - */ - - if (!bcast) { - dst_host_ids_entries = 0; - for (host_idx = 0; host_idx < dst_host_ids_entries_temp; host_idx++) { - dst_host = knet_h->host_index[dst_host_ids_temp[host_idx]]; - if (!dst_host) { - continue; - } - if (!(dst_host->host_id == knet_h->host_id && - knet_h->has_loop_link) && - dst_host->status.reachable) { - dst_host_ids[dst_host_ids_entries] = dst_host_ids_temp[host_idx]; - dst_host_ids_entries++; - } - } - if (!dst_host_ids_entries) { - savederrno = EHOSTDOWN; - err = -1; - goto out_unlock; - } - } else { - send_mcast = 0; - for (dst_host = knet_h->host_head; dst_host != NULL; dst_host = dst_host->next) { - if (!(dst_host->host_id == knet_h->host_id && - knet_h->has_loop_link) && - dst_host->status.reachable) { - send_mcast = 1; - break; - } - } - if (!send_mcast) { - savederrno = EHOSTDOWN; - err = -1; - goto out_unlock; - } - } +static int _prep_tx_bufs(knet_handle_t knet_h, + struct knet_header *inbuf, uint8_t onwire_ver, + unsigned char *data, size_t inlen, + seq_num_t tx_seq_num, int8_t channel, int bcast, int data_compressed, + int *msgs_to_send, struct iovec iov_out[PCKT_FRAG_MAX][2], int *iovcnt_out) +{ + int err = 0, savederrno = 0; + unsigned int temp_data_mtu; if (!knet_h->data_mtu) { /* * using MIN_MTU_V4 for data mtu is not completely accurate but safe enough */ log_debug(knet_h, KNET_SUB_TX, "Received data packet but data MTU is still unknown." " Packet might not be delivered." " Assuming minimum IPv4 MTU (%d)", KNET_PMTUD_MIN_MTU_V4); temp_data_mtu = KNET_PMTUD_MIN_MTU_V4; } else { /* * take a copy of the mtu to avoid value changing under * our feet while we are sending a fragmented pckt */ temp_data_mtu = knet_h->data_mtu; } + if (knet_h->onwire_ver_remap) { + prep_tx_bufs_v1(knet_h, inbuf, data, inlen, temp_data_mtu, tx_seq_num, channel, bcast, data_compressed, msgs_to_send, iov_out, iovcnt_out); + } else { + switch (onwire_ver) { + case 1: + prep_tx_bufs_v1(knet_h, inbuf, data, inlen, temp_data_mtu, tx_seq_num, channel, bcast, data_compressed, msgs_to_send, iov_out, iovcnt_out); + break; + default: /* this should never hit as filters are in place in the calling functions */ + log_warn(knet_h, KNET_SUB_TX, "preparing data onwire version %u not supported", onwire_ver); + savederrno = EINVAL; + err = -1; + goto out; + break; + } + } + +out: + errno = savederrno; + return err; +} + +static int _compress_data(knet_handle_t knet_h, unsigned char* data, size_t *inlen, int *data_compressed) +{ + int err = 0, savederrno = 0; + int stats_locked = 0, stats_err = 0; + size_t cmp_outlen = KNET_DATABUFSIZE_COMPRESS; + struct timespec start_time; + struct timespec end_time; + uint64_t compress_time; + /* * compress data */ - if ((knet_h->compress_model > 0) && (inlen > knet_h->compress_threshold)) { - size_t cmp_outlen = KNET_DATABUFSIZE_COMPRESS; - struct timespec start_time; - struct timespec end_time; - uint64_t compress_time; - - clock_gettime(CLOCK_MONOTONIC, &start_time); - err = compress(knet_h, - (const unsigned char *)inbuf->khp_data_userdata, inlen, - knet_h->send_to_links_buf_compress, (ssize_t *)&cmp_outlen); + if (knet_h->compress_model > 0) { + if (*inlen > knet_h->compress_threshold) { + clock_gettime(CLOCK_MONOTONIC, &start_time); + err = compress(knet_h, + data, *inlen, + knet_h->send_to_links_buf_compress, (ssize_t *)&cmp_outlen); - savederrno = errno; + savederrno = errno; + clock_gettime(CLOCK_MONOTONIC, &end_time); + timespec_diff(start_time, end_time, &compress_time); - stats_err = pthread_mutex_lock(&knet_h->handle_stats_mutex); - if (stats_err < 0) { - log_err(knet_h, KNET_SUB_TX, "Unable to get mutex lock: %s", strerror(stats_err)); - err = -1; - savederrno = stats_err; - goto out_unlock; - } - stats_locked = 1; - /* Collect stats */ - clock_gettime(CLOCK_MONOTONIC, &end_time); - timespec_diff(start_time, end_time, &compress_time); + stats_err = pthread_mutex_lock(&knet_h->handle_stats_mutex); + if (stats_err < 0) { + log_err(knet_h, KNET_SUB_TX, "Unable to get mutex lock: %s", strerror(stats_err)); + err = -1; + savederrno = stats_err; + goto out; + } + stats_locked = 1; + /* Collect stats */ - if (compress_time < knet_h->stats.tx_compress_time_min) { - knet_h->stats.tx_compress_time_min = compress_time; - } - if (compress_time > knet_h->stats.tx_compress_time_max) { - knet_h->stats.tx_compress_time_max = compress_time; - } - knet_h->stats.tx_compress_time_ave = - (unsigned long long)(knet_h->stats.tx_compress_time_ave * knet_h->stats.tx_compressed_packets + - compress_time) / (knet_h->stats.tx_compressed_packets+1); - if (err < 0) { - knet_h->stats.tx_failed_to_compress++; - log_warn(knet_h, KNET_SUB_COMPRESS, "Compression failed (%d): %s", err, strerror(savederrno)); - } else { - knet_h->stats.tx_compressed_packets++; - knet_h->stats.tx_compressed_original_bytes += inlen; - knet_h->stats.tx_compressed_size_bytes += cmp_outlen; - - if (cmp_outlen < inlen) { - memmove(inbuf->khp_data_userdata, knet_h->send_to_links_buf_compress, cmp_outlen); - inlen = cmp_outlen; - data_compressed = 1; + if (compress_time < knet_h->stats.tx_compress_time_min) { + knet_h->stats.tx_compress_time_min = compress_time; + } + if (compress_time > knet_h->stats.tx_compress_time_max) { + knet_h->stats.tx_compress_time_max = compress_time; + } + knet_h->stats.tx_compress_time_ave = + (unsigned long long)(knet_h->stats.tx_compress_time_ave * knet_h->stats.tx_compressed_packets + + compress_time) / (knet_h->stats.tx_compressed_packets+1); + if (err < 0) { + knet_h->stats.tx_failed_to_compress++; + log_warn(knet_h, KNET_SUB_COMPRESS, "Compression failed (%d): %s", err, strerror(savederrno)); } else { - knet_h->stats.tx_unable_to_compress++; + knet_h->stats.tx_compressed_packets++; + knet_h->stats.tx_compressed_original_bytes += *inlen; + knet_h->stats.tx_compressed_size_bytes += cmp_outlen; + + if (cmp_outlen < *inlen) { + memmove(data, knet_h->send_to_links_buf_compress, cmp_outlen); + *inlen = cmp_outlen; + *data_compressed = 1; + } else { + knet_h->stats.tx_unable_to_compress++; + } } } - } - if (!stats_locked) { - stats_err = pthread_mutex_lock(&knet_h->handle_stats_mutex); - if (stats_err < 0) { - log_err(knet_h, KNET_SUB_TX, "Unable to get mutex lock: %s", strerror(stats_err)); - err = -1; - savederrno = stats_err; - goto out_unlock; + if (!*data_compressed) { + if (!stats_locked) { + stats_err = pthread_mutex_lock(&knet_h->handle_stats_mutex); + if (stats_err < 0) { + log_err(knet_h, KNET_SUB_TX, "Unable to get mutex lock: %s", strerror(stats_err)); + err = -1; + savederrno = stats_err; + goto out; + } + stats_locked = 1; + } + knet_h->stats.tx_uncompressed_packets++; + } + if (stats_locked) { + pthread_mutex_unlock(&knet_h->handle_stats_mutex); } } - if (knet_h->compress_model > 0 && !data_compressed) { - knet_h->stats.tx_uncompressed_packets++; - } - pthread_mutex_unlock(&knet_h->handle_stats_mutex); - stats_locked = 0; - /* - * prepare the outgoing buffers - */ +out: + errno = savederrno; + return err; +} - frag_len = inlen; - frag_idx = 0; +static int _encrypt_bufs(knet_handle_t knet_h, int msgs_to_send, struct iovec iov_out[PCKT_FRAG_MAX][2], int *iovcnt_out) +{ + int err = 0, savederrno = 0, stats_err = 0; + struct timespec start_time; + struct timespec end_time; + uint64_t crypt_time; + uint8_t frag_idx = 0; + size_t outlen, uncrypted_frag_size; + int j; - inbuf->khp_data_bcast = bcast; - inbuf->khp_data_frag_num = ceil((float)inlen / temp_data_mtu); - inbuf->khp_data_channel = channel; - if (data_compressed) { - inbuf->khp_data_compress = knet_h->compress_model; - } else { - inbuf->khp_data_compress = 0; + if (knet_h->crypto_in_use_config) { + while (frag_idx < msgs_to_send) { + clock_gettime(CLOCK_MONOTONIC, &start_time); + if (crypto_encrypt_and_signv( + knet_h, + iov_out[frag_idx], *iovcnt_out, + knet_h->send_to_links_buf_crypt[frag_idx], + (ssize_t *)&outlen) < 0) { + log_debug(knet_h, KNET_SUB_TX, "Unable to encrypt packet"); + savederrno = ECHILD; + err = -1; + goto out; + } + clock_gettime(CLOCK_MONOTONIC, &end_time); + timespec_diff(start_time, end_time, &crypt_time); + + stats_err = pthread_mutex_lock(&knet_h->handle_stats_mutex); + if (stats_err < 0) { + log_err(knet_h, KNET_SUB_TX, "Unable to get mutex lock: %s", strerror(stats_err)); + err = -1; + savederrno = stats_err; + goto out; + } + + if (crypt_time < knet_h->stats.tx_crypt_time_min) { + knet_h->stats.tx_crypt_time_min = crypt_time; + } + if (crypt_time > knet_h->stats.tx_crypt_time_max) { + knet_h->stats.tx_crypt_time_max = crypt_time; + } + knet_h->stats.tx_crypt_time_ave = + (knet_h->stats.tx_crypt_time_ave * knet_h->stats.tx_crypt_packets + + crypt_time) / (knet_h->stats.tx_crypt_packets+1); + + uncrypted_frag_size = 0; + for (j=0; j < *iovcnt_out; j++) { + uncrypted_frag_size += iov_out[frag_idx][j].iov_len; + } + knet_h->stats.tx_crypt_byte_overhead += (outlen - uncrypted_frag_size); + knet_h->stats.tx_crypt_packets++; + pthread_mutex_unlock(&knet_h->handle_stats_mutex); + + iov_out[frag_idx][0].iov_base = knet_h->send_to_links_buf_crypt[frag_idx]; + iov_out[frag_idx][0].iov_len = outlen; + frag_idx++; + } + *iovcnt_out = 1; } +out: + errno = savederrno; + return err; +} + +static int _get_tx_seq_num(knet_handle_t knet_h, seq_num_t *tx_seq_num) +{ + int savederrno = 0; - if (pthread_mutex_lock(&knet_h->tx_seq_num_mutex)) { + savederrno = pthread_mutex_lock(&knet_h->tx_seq_num_mutex); + if (savederrno) { log_debug(knet_h, KNET_SUB_TX, "Unable to get seq mutex lock"); - goto out_unlock; + errno = savederrno; + return -1; } + knet_h->tx_seq_num++; /* * force seq_num 0 to detect a node that has crashed and rejoining * the knet instance. seq_num 0 will clear the buffers in the RX * thread */ if (knet_h->tx_seq_num == 0) { knet_h->tx_seq_num++; } /* * cache the value in locked context */ - tx_seq_num = knet_h->tx_seq_num; - inbuf->khp_data_seq_num = htons(knet_h->tx_seq_num); + *tx_seq_num = knet_h->tx_seq_num; pthread_mutex_unlock(&knet_h->tx_seq_num_mutex); /* * forcefully broadcast a ping to all nodes every SEQ_MAX / 8 * pckts. * this solves 2 problems: * 1) on TX socket overloads we generate extra pings to keep links alive * 2) in 3+ nodes setup, where all the traffic is flowing between node 1 and 2, * node 3+ will be able to keep in sync on the TX seq_num even without * receiving traffic or pings in betweens. This avoids issues with * rollover of the circular buffer */ - if (tx_seq_num % (SEQ_MAX / 8) == 0) { + if (*tx_seq_num % (SEQ_MAX / 8) == 0) { _send_pings(knet_h, 0); } + return 0; +} - if (inbuf->khp_data_frag_num > 1) { - while (frag_idx < inbuf->khp_data_frag_num) { - /* - * set the iov_base - */ - iov_out[frag_idx][0].iov_base = (void *)knet_h->send_to_links_buf[frag_idx]; - iov_out[frag_idx][0].iov_len = KNET_HEADER_DATA_SIZE; - iov_out[frag_idx][1].iov_base = inbuf->khp_data_userdata + (temp_data_mtu * frag_idx); - /* - * set the len - */ - if (frag_len > temp_data_mtu) { - iov_out[frag_idx][1].iov_len = temp_data_mtu; - } else { - iov_out[frag_idx][1].iov_len = frag_len; - } +static int _get_data_dests(knet_handle_t knet_h, unsigned char* data, size_t inlen, + int8_t *channel, int *bcast, int *send_local, + knet_node_id_t *dst_host_ids, size_t *dst_host_ids_entries, + int is_sync) +{ + int err = 0, savederrno = 0; + knet_node_id_t dst_host_ids_temp[KNET_MAX_HOST]; /* store destinations from filter */ + size_t dst_host_ids_entries_temp = 0; + size_t dst_host_ids_entries_temp2 = 0; /* workaround gcc here */ + struct knet_host *dst_host; + size_t host_idx; - /* - * copy the frag info on all buffers - */ - knet_h->send_to_links_buf[frag_idx]->kh_type = inbuf->kh_type; - knet_h->send_to_links_buf[frag_idx]->khp_data_seq_num = inbuf->khp_data_seq_num; - knet_h->send_to_links_buf[frag_idx]->khp_data_frag_num = inbuf->khp_data_frag_num; - knet_h->send_to_links_buf[frag_idx]->khp_data_bcast = inbuf->khp_data_bcast; - knet_h->send_to_links_buf[frag_idx]->khp_data_channel = inbuf->khp_data_channel; - knet_h->send_to_links_buf[frag_idx]->khp_data_compress = inbuf->khp_data_compress; - - frag_len = frag_len - temp_data_mtu; - frag_idx++; + if (knet_h->dst_host_filter_fn) { + *bcast = knet_h->dst_host_filter_fn( + knet_h->dst_host_filter_fn_private_data, + data, + inlen, + KNET_NOTIFY_TX, + knet_h->host_id, + knet_h->host_id, + channel, + dst_host_ids_temp, + &dst_host_ids_entries_temp); + if (*bcast < 0) { + log_debug(knet_h, KNET_SUB_TX, "Error from dst_host_filter_fn: %d", *bcast); + savederrno = EFAULT; + err = -1; + goto out; } - iovcnt_out = 2; - } else { - iov_out[frag_idx][0].iov_base = (void *)inbuf; - iov_out[frag_idx][0].iov_len = frag_len + KNET_HEADER_DATA_SIZE; - iovcnt_out = 1; - } - if (knet_h->crypto_in_use_config) { - struct timespec start_time; - struct timespec end_time; - uint64_t crypt_time; + if ((!*bcast) && (!dst_host_ids_entries_temp)) { + log_debug(knet_h, KNET_SUB_TX, "Message is unicast but no dst_host_ids_entries"); + savederrno = EINVAL; + err = -1; + goto out; + } - frag_idx = 0; - while (frag_idx < inbuf->khp_data_frag_num) { - clock_gettime(CLOCK_MONOTONIC, &start_time); - if (crypto_encrypt_and_signv( - knet_h, - iov_out[frag_idx], iovcnt_out, - knet_h->send_to_links_buf_crypt[frag_idx], - (ssize_t *)&outlen) < 0) { - log_debug(knet_h, KNET_SUB_TX, "Unable to encrypt packet"); - savederrno = ECHILD; - err = -1; - goto out_unlock; - } - clock_gettime(CLOCK_MONOTONIC, &end_time); - timespec_diff(start_time, end_time, &crypt_time); + if ((!*bcast) && + (dst_host_ids_entries_temp > KNET_MAX_HOST)) { + log_debug(knet_h, KNET_SUB_TX, "dst_host_filter_fn returned too many destinations"); + savederrno = EINVAL; + err = -1; + goto out; + } - stats_err = pthread_mutex_lock(&knet_h->handle_stats_mutex); - if (stats_err < 0) { - log_err(knet_h, KNET_SUB_TX, "Unable to get mutex lock: %s", strerror(stats_err)); + if (is_sync) { + if ((*bcast) || + ((!*bcast) && (dst_host_ids_entries_temp > 1))) { + log_debug(knet_h, KNET_SUB_TX, "knet_send_sync is only supported with unicast packets for one destination"); + savederrno = E2BIG; err = -1; - savederrno = stats_err; - goto out_unlock; + goto out; } + } + } - if (crypt_time < knet_h->stats.tx_crypt_time_min) { - knet_h->stats.tx_crypt_time_min = crypt_time; + /* + * check destinations hosts before spending time + * in fragmenting/encrypting packets to save + * time processing data for unreachable hosts. + * for unicast, also remap the destination data + * to skip unreachable hosts. + */ + + if (!*bcast) { + *dst_host_ids_entries = dst_host_ids_entries_temp2; + for (host_idx = 0; host_idx < dst_host_ids_entries_temp; host_idx++) { + dst_host = knet_h->host_index[dst_host_ids_temp[host_idx]]; + if (!dst_host) { + continue; } - if (crypt_time > knet_h->stats.tx_crypt_time_max) { - knet_h->stats.tx_crypt_time_max = crypt_time; + if ((dst_host->host_id == knet_h->host_id) && + (knet_h->has_loop_link)) { + *send_local = 1; } - knet_h->stats.tx_crypt_time_ave = - (knet_h->stats.tx_crypt_time_ave * knet_h->stats.tx_crypt_packets + - crypt_time) / (knet_h->stats.tx_crypt_packets+1); - - uncrypted_frag_size = 0; - for (j=0; j < iovcnt_out; j++) { - uncrypted_frag_size += iov_out[frag_idx][j].iov_len; + if (!((dst_host->host_id == knet_h->host_id) && + (knet_h->has_loop_link)) && + dst_host->status.reachable) { + dst_host_ids[dst_host_ids_entries_temp2] = dst_host_ids_temp[host_idx]; + dst_host_ids_entries_temp2++; } - knet_h->stats.tx_crypt_byte_overhead += (outlen - uncrypted_frag_size); - knet_h->stats.tx_crypt_packets++; - pthread_mutex_unlock(&knet_h->handle_stats_mutex); - - iov_out[frag_idx][0].iov_base = knet_h->send_to_links_buf_crypt[frag_idx]; - iov_out[frag_idx][0].iov_len = outlen; - frag_idx++; } - iovcnt_out = 1; + if ((!dst_host_ids_entries_temp2) && (!*send_local)) { + savederrno = EHOSTDOWN; + err = -1; + goto out; + } + *dst_host_ids_entries = dst_host_ids_entries_temp2; + } else { + *bcast = 0; + *send_local = 0; + for (dst_host = knet_h->host_head; dst_host != NULL; dst_host = dst_host->next) { + if ((dst_host->host_id == knet_h->host_id) && + (knet_h->has_loop_link)) { + *send_local = 1; + } + if (!(dst_host->host_id == knet_h->host_id && + knet_h->has_loop_link) && + dst_host->status.reachable) { + *bcast = 1; + } + } + if ((!*bcast) && (!*send_local)) { + savederrno = EHOSTDOWN; + err = -1; + goto out; + } } - memset(&msg, 0, sizeof(msg)); +out: + errno = savederrno; + return err; +} - msgs_to_send = inbuf->khp_data_frag_num; +static int _prep_and_send_msgs(knet_handle_t knet_h, int bcast, knet_node_id_t *dst_host_ids, size_t dst_host_ids_entries, int msgs_to_send, struct iovec iov_out[PCKT_FRAG_MAX][2], int iovcnt_out) +{ + int err = 0, savederrno = 0; + struct knet_host *dst_host; + struct knet_mmsghdr msg[PCKT_FRAG_MAX]; + int msg_idx; + size_t host_idx; + + memset(&msg, 0, sizeof(msg)); msg_idx = 0; while (msg_idx < msgs_to_send) { msg[msg_idx].msg_hdr.msg_namelen = sizeof(struct sockaddr_storage); msg[msg_idx].msg_hdr.msg_iov = &iov_out[msg_idx][0]; msg[msg_idx].msg_hdr.msg_iovlen = iovcnt_out; msg_idx++; } if (!bcast) { for (host_idx = 0; host_idx < dst_host_ids_entries; host_idx++) { dst_host = knet_h->host_index[dst_host_ids[host_idx]]; err = _dispatch_to_links(knet_h, dst_host, &msg[0], msgs_to_send); savederrno = errno; if (err) { - goto out_unlock; + goto out; } } } else { for (dst_host = knet_h->host_head; dst_host != NULL; dst_host = dst_host->next) { if (dst_host->status.reachable) { err = _dispatch_to_links(knet_h, dst_host, &msg[0], msgs_to_send); savederrno = errno; if (err) { - goto out_unlock; + goto out; } } } } -out_unlock: +out: errno = savederrno; return err; } -static void _handle_send_to_links(knet_handle_t knet_h, struct msghdr *msg, int sockfd, int8_t channel, int type) +static int _parse_recv_from_sock(knet_handle_t knet_h, size_t inlen, int8_t channel, uint8_t onwire_ver, int is_sync) +{ + int err = 0, savederrno = 0; + struct knet_header *inbuf = knet_h->recv_from_sock_buf; /* all TX packets are stored here regardless of the onwire */ + unsigned char *data; /* onwire neutrual pointer to data to send */ + int data_compressed = 0; /* track data compression to fill the header */ + seq_num_t tx_seq_num; + + int bcast = 1; /* assume all packets are to be broadcasted unless filter tells us differently */ + knet_node_id_t dst_host_ids[KNET_MAX_HOST]; /* store destinations from filter */ + size_t dst_host_ids_entries = 0; + int send_local = 0; /* send packets to loopback */ + + struct iovec iov_out[PCKT_FRAG_MAX][2]; + int iovcnt_out = 2; + int msgs_to_send = 0; + + if (knet_h->enabled != 1) { + log_debug(knet_h, KNET_SUB_TX, "Received data packet but forwarding is disabled"); + savederrno = ECANCELED; + err = -1; + goto out; + } + + if (knet_h->onwire_ver_remap) { + data = get_data_v1(knet_h, inbuf); + } else { + switch (onwire_ver) { + case 1: + data = get_data_v1(knet_h, inbuf); + break; + default: /* this should never hit as filters are in place in the calling functions */ + log_warn(knet_h, KNET_SUB_TX, "preparing data onwire version %u not supported", onwire_ver); + savederrno = EINVAL; + err = -1; + goto out; + break; + } + } + + err = _get_data_dests(knet_h, data, inlen, + &channel, &bcast, &send_local, + dst_host_ids, &dst_host_ids_entries, + is_sync); + if (err < 0) { + savederrno = errno; + goto out; + } + + /* Send to localhost if appropriate and enabled */ + if (send_local) { + err = _dispatch_to_local(knet_h, data, inlen, channel); + if (err < 0) { + savederrno = errno; + goto out; + } + } + + err = _compress_data(knet_h, data, &inlen, &data_compressed); + if (err < 0) { + savederrno = errno; + goto out; + } + + err = _get_tx_seq_num(knet_h, &tx_seq_num); + if (err < 0) { + savederrno = errno; + goto out; + } + + err = _prep_tx_bufs(knet_h, inbuf, onwire_ver, data, inlen, tx_seq_num, channel, bcast, data_compressed, &msgs_to_send, iov_out, &iovcnt_out); + if (err < 0) { + savederrno = errno; + goto out; + } + + err = _encrypt_bufs(knet_h, msgs_to_send, iov_out, &iovcnt_out); + if (err < 0) { + savederrno = errno; + goto out; + } + + err = _prep_and_send_msgs(knet_h, bcast, dst_host_ids, dst_host_ids_entries, msgs_to_send, iov_out, iovcnt_out); + if (err < 0) { + savederrno = errno; + goto out; + } + +out: + errno = savederrno; + return err; +} + +static void _handle_send_to_links(knet_handle_t knet_h, int sockfd, uint8_t onwire_ver, int8_t channel) { ssize_t inlen = 0; int savederrno = 0, docallback = 0; + struct iovec iov_in; + struct msghdr msg; + struct sockaddr_storage address; + + memset(&iov_in, 0, sizeof(iov_in)); + + if (knet_h->onwire_ver_remap) { + iov_in.iov_base = (void *)get_data_v1(knet_h, knet_h->recv_from_sock_buf); + iov_in.iov_len = KNET_MAX_PACKET_SIZE; + } else { + switch (onwire_ver) { + case 1: + iov_in.iov_base = (void *)get_data_v1(knet_h, knet_h->recv_from_sock_buf); + iov_in.iov_len = KNET_MAX_PACKET_SIZE; + break; + default: + log_warn(knet_h, KNET_SUB_TX, "preparing data onwire version %u not supported", onwire_ver); + break; + } + } + + memset(&msg, 0, sizeof(struct msghdr)); + msg.msg_name = &address; + msg.msg_namelen = sizeof(struct sockaddr_storage); + msg.msg_iov = &iov_in; + msg.msg_iovlen = 1; if ((channel >= 0) && (channel < KNET_DATAFD_MAX) && (!knet_h->sockfd[channel].is_socket)) { - inlen = readv(sockfd, msg->msg_iov, 1); + inlen = readv(sockfd, msg.msg_iov, 1); } else { - inlen = recvmsg(sockfd, msg, MSG_DONTWAIT | MSG_NOSIGNAL); - if (msg->msg_flags & MSG_TRUNC) { + inlen = recvmsg(sockfd, &msg, MSG_DONTWAIT | MSG_NOSIGNAL); + if (msg.msg_flags & MSG_TRUNC) { log_warn(knet_h, KNET_SUB_TX, "Received truncated message from sock %d. Discarding", sockfd); return; } } if (inlen == 0) { savederrno = 0; docallback = 1; } else if (inlen < 0) { struct epoll_event ev; savederrno = errno; docallback = 1; memset(&ev, 0, sizeof(struct epoll_event)); if (epoll_ctl(knet_h->send_to_links_epollfd, EPOLL_CTL_DEL, knet_h->sockfd[channel].sockfd[knet_h->sockfd[channel].is_created], &ev)) { log_err(knet_h, KNET_SUB_TX, "Unable to del datafd %d from linkfd epoll pool: %s", knet_h->sockfd[channel].sockfd[0], strerror(savederrno)); } else { knet_h->sockfd[channel].has_error = 1; } } else { - knet_h->recv_from_sock_buf->kh_type = type; - _parse_recv_from_sock(knet_h, inlen, channel, 0); + _parse_recv_from_sock(knet_h, inlen, channel, onwire_ver, 0); } if (docallback) { knet_h->sock_notify_fn(knet_h->sock_notify_fn_private_data, knet_h->sockfd[channel].sockfd[0], channel, KNET_NOTIFY_TX, inlen, savederrno); } } void *_handle_send_to_links_thread(void *data) { knet_handle_t knet_h = (knet_handle_t) data; struct epoll_event events[KNET_EPOLL_MAX_EVENTS]; - int i, nev, type; + int i, nev; int flush, flush_queue_limit; int8_t channel; - struct iovec iov_in; - struct msghdr msg; - struct sockaddr_storage address; + uint8_t onwire_ver; set_thread_status(knet_h, KNET_THREAD_TX, KNET_THREAD_STARTED); memset(&events, 0, sizeof(events)); - memset(&iov_in, 0, sizeof(iov_in)); - iov_in.iov_base = (void *)knet_h->recv_from_sock_buf->khp_data_userdata; - iov_in.iov_len = KNET_MAX_PACKET_SIZE; - - memset(&msg, 0, sizeof(struct msghdr)); - msg.msg_name = &address; - msg.msg_namelen = sizeof(struct sockaddr_storage); - msg.msg_iov = &iov_in; - msg.msg_iovlen = 1; - - knet_h->recv_from_sock_buf->kh_version = KNET_HEADER_VERSION; - knet_h->recv_from_sock_buf->khp_data_frag_seq = 0; - knet_h->recv_from_sock_buf->kh_node = htons(knet_h->host_id); - - for (i = 0; i < PCKT_FRAG_MAX; i++) { - knet_h->send_to_links_buf[i]->kh_version = KNET_HEADER_VERSION; - knet_h->send_to_links_buf[i]->khp_data_frag_seq = i + 1; - knet_h->send_to_links_buf[i]->kh_node = htons(knet_h->host_id); - } flush_queue_limit = 0; while (!shutdown_in_progress(knet_h)) { nev = epoll_wait(knet_h->send_to_links_epollfd, events, KNET_EPOLL_MAX_EVENTS + 1, knet_h->threads_timer_res / 1000); flush = get_thread_flush_queue(knet_h, KNET_THREAD_TX); /* * we use timeout to detect if thread is shutting down */ if (nev == 0) { /* * ideally we want to communicate that we are done flushing * the queue when we have an epoll timeout event */ if (flush == KNET_THREAD_QUEUE_FLUSH) { set_thread_flush_queue(knet_h, KNET_THREAD_TX, KNET_THREAD_QUEUE_FLUSHED); flush_queue_limit = 0; } continue; } /* * fall back in case the TX sockets will continue receive traffic * and we do not hit an epoll timeout. * * allow up to a 100 loops to flush queues, then we give up. * there might be more clean ways to do it by checking the buffer queue * on each socket, but we have tons of sockets and calculations can go wrong. * Also, why would you disable data forwarding and still send packets? */ if (flush == KNET_THREAD_QUEUE_FLUSH) { if (flush_queue_limit >= 100) { log_debug(knet_h, KNET_SUB_TX, "Timeout flushing the TX queue, expect packet loss"); set_thread_flush_queue(knet_h, KNET_THREAD_TX, KNET_THREAD_QUEUE_FLUSHED); flush_queue_limit = 0; } else { flush_queue_limit++; } } else { flush_queue_limit = 0; } if (pthread_rwlock_rdlock(&knet_h->global_rwlock) != 0) { log_debug(knet_h, KNET_SUB_TX, "Unable to get read lock"); continue; } + if (pthread_mutex_lock(&knet_h->onwire_mutex)) { + log_debug(knet_h, KNET_SUB_TX, "Unable to get onwire mutex lock"); + goto out_unlock; + } + onwire_ver = knet_h->onwire_ver; + pthread_mutex_unlock(&knet_h->onwire_mutex); + for (i = 0; i < nev; i++) { - type = KNET_HEADER_TYPE_DATA; for (channel = 0; channel < KNET_DATAFD_MAX; channel++) { if ((knet_h->sockfd[channel].in_use) && (knet_h->sockfd[channel].sockfd[knet_h->sockfd[channel].is_created] == events[i].data.fd)) { break; } } if (channel >= KNET_DATAFD_MAX) { log_debug(knet_h, KNET_SUB_TX, "No available channels"); continue; /* channel not found */ } if (pthread_mutex_lock(&knet_h->tx_mutex) != 0) { log_debug(knet_h, KNET_SUB_TX, "Unable to get mutex lock"); continue; } - _handle_send_to_links(knet_h, &msg, events[i].data.fd, channel, type); + _handle_send_to_links(knet_h, events[i].data.fd, onwire_ver, channel); pthread_mutex_unlock(&knet_h->tx_mutex); } - +out_unlock: pthread_rwlock_unlock(&knet_h->global_rwlock); } set_thread_status(knet_h, KNET_THREAD_TX, KNET_THREAD_STOPPED); return NULL; } int knet_send_sync(knet_handle_t knet_h, const char *buff, const size_t buff_len, const int8_t channel) { int savederrno = 0, err = 0; + uint8_t onwire_ver; if (!knet_h) { errno = EINVAL; return -1; } if (buff == NULL) { errno = EINVAL; return -1; } if (buff_len <= 0) { errno = EINVAL; return -1; } if (buff_len > KNET_MAX_PACKET_SIZE) { errno = EINVAL; return -1; } if (channel < 0) { errno = EINVAL; return -1; } if (channel >= KNET_DATAFD_MAX) { errno = EINVAL; return -1; } savederrno = pthread_rwlock_rdlock(&knet_h->global_rwlock); if (savederrno) { log_err(knet_h, KNET_SUB_TX, "Unable to get read lock: %s", strerror(savederrno)); errno = savederrno; return -1; } if (!knet_h->sockfd[channel].in_use) { savederrno = EINVAL; err = -1; goto out; } + if (pthread_mutex_lock(&knet_h->onwire_mutex)) { + log_debug(knet_h, KNET_SUB_TX, "Unable to get onwire mutex lock"); + goto out; + } + onwire_ver = knet_h->onwire_ver; + pthread_mutex_unlock(&knet_h->onwire_mutex); + savederrno = pthread_mutex_lock(&knet_h->tx_mutex); if (savederrno) { log_err(knet_h, KNET_SUB_TX, "Unable to get TX mutex lock: %s", strerror(savederrno)); err = -1; goto out; } - knet_h->recv_from_sock_buf->kh_type = KNET_HEADER_TYPE_DATA; - memmove(knet_h->recv_from_sock_buf->khp_data_userdata, buff, buff_len); - err = _parse_recv_from_sock(knet_h, buff_len, channel, 1); + if (knet_h->onwire_ver_remap) { + memmove(get_data_v1(knet_h, knet_h->recv_from_sock_buf), buff, buff_len); + } else { + switch (onwire_ver) { + case 1: + memmove(get_data_v1(knet_h, knet_h->recv_from_sock_buf), buff, buff_len); + break; + default: + log_warn(knet_h, KNET_SUB_TX, "preparing sync data onwire version %u not supported", onwire_ver); + goto out_tx; + break; + } + } + + err = _parse_recv_from_sock(knet_h, buff_len, channel, onwire_ver, 1); savederrno = errno; +out_tx: pthread_mutex_unlock(&knet_h->tx_mutex); - out: pthread_rwlock_unlock(&knet_h->global_rwlock); errno = err ? savederrno : 0; return err; } ssize_t knet_send(knet_handle_t knet_h, const char *buff, const size_t buff_len, const int8_t channel) { int savederrno = 0; ssize_t err = 0; struct iovec iov_out[1]; if (!knet_h) { errno = EINVAL; return -1; } if (buff == NULL) { errno = EINVAL; return -1; } if (buff_len <= 0) { errno = EINVAL; return -1; } if (buff_len > KNET_MAX_PACKET_SIZE) { errno = EINVAL; return -1; } if (channel < 0) { errno = EINVAL; return -1; } if (channel >= KNET_DATAFD_MAX) { errno = EINVAL; return -1; } savederrno = pthread_rwlock_rdlock(&knet_h->global_rwlock); if (savederrno) { log_err(knet_h, KNET_SUB_HANDLE, "Unable to get read lock: %s", strerror(savederrno)); errno = savederrno; return -1; } if (!knet_h->sockfd[channel].in_use) { savederrno = EINVAL; err = -1; goto out_unlock; } memset(iov_out, 0, sizeof(iov_out)); iov_out[0].iov_base = (void *)buff; iov_out[0].iov_len = buff_len; err = writev(knet_h->sockfd[channel].sockfd[0], iov_out, 1); savederrno = errno; out_unlock: pthread_rwlock_unlock(&knet_h->global_rwlock); errno = err ? savederrno : 0; return err; } diff --git a/man/Makefile.am b/man/Makefile.am index 29a59714..cb5c7aad 100644 --- a/man/Makefile.am +++ b/man/Makefile.am @@ -1,148 +1,151 @@ # # Copyright (C) 2017-2020 Red Hat, Inc. All rights reserved. # # Authors: Fabio M. Di Nitto # Federico Simoncelli # # This software licensed under GPL-2.0+ # MAINTAINERCLEANFILES = Makefile.in include $(top_srcdir)/build-aux/check.mk EXTRA_DIST = \ api-to-man-page-coverage if BUILD_MAN knet_man3_MANS = \ knet_addrtostr.3 \ knet_handle_add_datafd.3 \ knet_handle_clear_stats.3 \ knet_handle_compress.3 \ knet_handle_enable_filter.3 \ knet_handle_enable_pmtud_notify.3 \ knet_handle_enable_sock_notify.3 \ knet_handle_free.3 \ knet_handle_get_channel.3 \ knet_get_compress_list.3 \ knet_get_crypto_list.3 \ knet_handle_get_datafd.3 \ knet_handle_get_stats.3 \ knet_get_transport_id_by_name.3 \ knet_get_transport_list.3 \ knet_get_transport_name_by_id.3 \ knet_handle_get_transport_reconnect_interval.3 \ knet_handle_new.3 \ knet_handle_pmtud_get.3 \ knet_handle_pmtud_set.3 \ knet_handle_pmtud_getfreq.3 \ knet_handle_pmtud_setfreq.3 \ knet_handle_remove_datafd.3 \ knet_handle_setfwd.3 \ knet_handle_set_transport_reconnect_interval.3 \ knet_host_add.3 \ knet_host_enable_status_change_notify.3 \ knet_host_get_host_list.3 \ knet_host_get_id_by_host_name.3 \ knet_host_get_name_by_host_id.3 \ knet_host_get_policy.3 \ knet_host_get_status.3 \ knet_host_remove.3 \ knet_host_set_name.3 \ knet_host_set_policy.3 \ knet_link_clear_config.3 \ knet_link_get_config.3 \ knet_link_get_enable.3 \ knet_link_get_link_list.3 \ knet_link_get_ping_timers.3 \ knet_link_get_pong_count.3 \ knet_link_get_priority.3 \ knet_link_get_status.3 \ knet_link_set_config.3 \ knet_link_set_enable.3 \ knet_link_set_ping_timers.3 \ knet_link_set_pong_count.3 \ knet_link_set_priority.3 \ knet_log_get_loglevel.3 \ knet_log_get_loglevel_id.3 \ knet_log_get_loglevel_name.3 \ knet_log_get_subsystem_id.3 \ knet_log_get_subsystem_name.3 \ knet_log_set_loglevel.3 \ knet_recv.3 \ knet_send.3 \ knet_send_sync.3 \ knet_strtoaddr.3 \ knet_handle_set_threads_timer_res.3 \ knet_handle_get_threads_timer_res.3 \ knet_link_enable_status_change_notify.3 \ knet_handle_enable_access_lists.3 \ knet_link_add_acl.3 \ knet_link_insert_acl.3 \ knet_link_rm_acl.3 \ knet_link_clear_acl.3 \ knet_handle_crypto_set_config.3 \ knet_handle_crypto_use_config.3 \ - knet_handle_crypto_rx_clear_traffic.3 + knet_handle_crypto_rx_clear_traffic.3 \ + knet_handle_enable_onwire_ver_notify.3 \ + knet_handle_get_onwire_ver.3 \ + knet_handle_set_onwire_ver.3 if BUILD_LIBNOZZLE nozzle_man3_MANS = \ nozzle_add_ip.3 \ nozzle_close.3 \ nozzle_del_ip.3 \ nozzle_get_fd.3 \ nozzle_get_handle_by_name.3 \ nozzle_get_ips.3 \ nozzle_get_mac.3 \ nozzle_get_mtu.3 \ nozzle_get_name_by_handle.3 \ nozzle_open.3 \ nozzle_reset_mac.3 \ nozzle_reset_mtu.3 \ nozzle_run_updown.3 \ nozzle_set_down.3 \ nozzle_set_mac.3 \ nozzle_set_mtu.3 \ nozzle_set_up.3 endif man3_MANS = $(knet_man3_MANS) $(nozzle_man3_MANS) $(MANS): doxyfile-knet.stamp doxyfile-nozzle.stamp # export LSAN_OPTIONS unconditionally for now. # there is no urgency to fix doxygen2man for leaks or bad memory access # since it's a one-shot tool and doesn't affect runtime. doxyfile-knet.stamp: Doxyfile-knet $(top_srcdir)/libknet/libknet.h $(DOXYGEN) Doxyfile-knet LSAN_OPTIONS="exitcode=0" $(DOXYGEN2MAN) -m -P -o $(builddir) -s 3 -p @PACKAGE_NAME@ -H "Kronosnet Programmer's Manual" \ $$($(UTC_DATE_AT)$(SOURCE_EPOCH) +"-D %F -Y %Y") -d $(builddir)/xml-knet/ libknet_8h.xml touch doxyfile-knet.stamp doxyfile-nozzle.stamp: Doxyfile-nozzle $(top_srcdir)/libnozzle/libnozzle.h if BUILD_LIBNOZZLE $(DOXYGEN) Doxyfile-nozzle LSAN_OPTIONS="exitcode=0" $(DOXYGEN2MAN) -m -P -o $(builddir) -s 3 -p @PACKAGE_NAME@ -H "Kronosnet Programmer's Manual" \ $$($(UTC_DATE_AT)$(SOURCE_EPOCH) +"-D %F -Y %Y") -d $(builddir)/xml-nozzle/ libnozzle_8h.xml endif touch doxyfile-nozzle.stamp noinst_SCRIPTS = api-to-man-page-coverage check-local: check-api-to-man-page-coverage-libknet check-api-to-man-page-coverage-libnozzle check-api-to-man-page-coverage-libnozzle: if BUILD_LIBNOZZLE $(srcdir)/api-to-man-page-coverage $(top_srcdir) nozzle endif check-api-to-man-page-coverage-libknet: $(srcdir)/api-to-man-page-coverage $(top_srcdir) knet endif clean-local: rm -rf doxyfile*.stamp xml* *.3