diff --git a/.gitignore b/.gitignore index 4c954dfa..1bcf390e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,59 +1,60 @@ *.o *.a *.la *.lo *.pc *.tar.* *.sha256* stamp-h1 Makefile.in Makefile .deps .libs .version .dirstamp # build-aux/release.mk related litter /.tarball-version /tag-* aclocal.m4 autoconf autoheader autom4te.cache automake compile config.* configure* debian/changelog debian/kronosnetd.postinst debian/patches depcomp install-sh libtoolize ltmain.sh m4/libtool.m4 m4/lt~obsolete.m4 m4/ltoptions.m4 m4/ltsugar.m4 m4/ltversion.m4 missing libtool autoscan.log init/kronosnetd init/kronosnetd.service kronosnetd/kronosnetd kronosnetd/knet-keygen kronosnetd/kronosnetd.logrotate kronosnet.spec *.swp *_test *_bench test-driver *.trs *.log Doxyfile* doxyfile*.stamp xml-*/ doxyxml *.3 +cov* diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 00000000..ab7a66d9 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,62 @@ +sudo: false + +language: c + +compiler: + - gcc + +env: + global: + # The next declaration is the encrypted COVERITY_SCAN_TOKEN, created + # via the "travis encrypt" command using the project repo's public key + - secure: "UzfmWmjTx8Vq4DBlcbvXiEqWfDoPkoQ63dYgWrdlpvp+JTCb+XYMa/KAt0v8+U2IhixudI6Vuq5ztGFqFL8jnNFHOqfjIqTtdxi5Hen8vRymyqar963HOOhlXQ9+XN6+IztqmJx7jVI26O7m+Pt+CTzhaz8u2eh3yTYq6pIQ0Cs=" + +git: + depth: false + +before_install: + - echo -n | openssl s_client -connect https://scan.coverity.com:443 | sed -ne '/-BEGIN CERTIFICATE-/,/-END CERTIFICATE-/p' | sudo tee -a /etc/ssl/certs/ca- + - ./autogen.sh + +script: if [ "${COVERITY_SCAN_BRANCH}" != 1 ]; then ./configure && make ; fi + +branches: + only: + - coverity_scan + +addons: + coverity_scan: + project: + name: "kronosnet/kronosnet" + description: "Kronosnet, often referred to as knet, is a network abstraction layer designed for High Availability use cases, where redundancy, security, fault tolerance and fast fail-over are the core requirements of your application." + notification_email: fdinitto@redhat.com + build_command_prepend: "./configure" + build_command: "make" + branch_pattern: coverity_scan + apt: + packages: + - build-essential + - libtool-bin + - make + - git + - gcc + - clang + - autoconf + - autotools-dev + - libtool + - libnss3-dev + - libnspr4-dev + - libssl-dev + - pkg-config + - zlib1g-dev + - liblz4-dev + - liblzo2-dev + - liblzma-dev + - libbz2-dev + - libsctp-dev + - libqb-dev + - libxml2-dev + - doxygen + - libzstd-dev + - libnl-3-dev + - libnl-route-3-dev diff --git a/Makefile.am b/Makefile.am index dc5f8a5e..b7d0205a 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1,220 +1,220 @@ # # Copyright (C) 2010-2019 Red Hat, Inc. All rights reserved. # # Author: Fabio M. Di Nitto # # This software licensed under GPL-2.0+ # MAINTAINERCLEANFILES = Makefile.in aclocal.m4 configure depcomp \ config.guess config.sub missing install-sh \ ltmain.sh compile config.h.in config.h.in~ \ autoscan.log configure.scan test-driver \ m4/libtool.m4 m4/lt~obsolete.m4 m4/ltoptions.m4 \ m4/ltsugar.m4 m4/ltversion.m4 include $(top_srcdir)/build-aux/check.mk AUTOMAKE_OPTIONS = foreign ACLOCAL_AMFLAGS = -I m4 SPEC = $(PACKAGE_NAME).spec TARGZFILE = $(PACKAGE_NAME)-$(VERSION).tar.gz EXTRA_DIST = autogen.sh .version \ NOTES_TO_PACKAGE_MAINTAINERS \ $(SPEC).in build-aux SUBDIRS = init libnozzle libknet kronosnetd if BUILD_MAN SUBDIRS += man endif if BUILD_POC SUBDIRS += poc-code endif dist_doc_DATA = \ COPYING.applications \ COPYING.libraries \ COPYRIGHT \ README.licence \ README all-local: $(SPEC) clean-local: - rm -f $(SPEC) + rm -rf $(SPEC) cov* distclean-local: rm -f $(PACKAGE_NAME)-*.tar.* $(PACKAGE_NAME)-*.sha256* tag-* ## make rpm/srpm section. $(SPEC): $(SPEC).in .version config.status rm -f $@-t $@ date="`LC_ALL=C $(UTC_DATE_AT)$(SOURCE_EPOCH) "+%a %b %d %Y"`" && \ if [ -f $(abs_srcdir)/.tarball-version ]; then \ gitver="`cat $(abs_srcdir)/.tarball-version`" && \ rpmver=$$gitver && \ alphatag="" && \ dirty="" && \ numcomm="0"; \ elif [ "`git log -1 --pretty=format:x . 2>&1`" = "x" ]; then \ gitver="`GIT_DIR=$(abs_srcdir)/.git git describe --abbrev=4 --match='v*' HEAD 2>/dev/null`" && \ rpmver=`echo $$gitver | sed -e "s/^v//" -e "s/-.*//g"` && \ alphatag=`echo $$gitver | sed -e "s/.*-//" -e "s/^g//"` && \ vtag=`echo $$gitver | sed -e "s/-.*//g"` && \ numcomm=`GIT_DIR=$(abs_srcdir)/.git git rev-list $$vtag..HEAD | wc -l` && \ cd $(abs_srcdir) && \ git update-index --refresh > /dev/null 2>&1 || true && \ dirty=`git diff-index --name-only HEAD 2>/dev/null` && cd - 2>/dev/null; \ else \ gitver="`cd $(abs_srcdir); build-aux/git-version-gen .tarball-version .gitarchivever`" && \ rpmver=$$gitver && \ alphatag="" && \ dirty="" && \ numcomm="0"; \ fi && \ if [ -n "$$dirty" ]; then dirty="dirty"; else dirty=""; fi && \ if [ "$$numcomm" = "0" ]; then \ sed \ -e "s#@version@#$$rpmver#g" \ -e "s#%glo.*alpha.*##g" \ -e "s#%glo.*numcomm.*##g" \ -e "s#@dirty@#$$dirty#g" \ -e "s#@date@#$$date#g" \ $(abs_srcdir)/$@.in > $@-t; \ else \ sed \ -e "s#@version@#$$rpmver#g" \ -e "s#@alphatag@#$$alphatag#g" \ -e "s#@numcomm@#$$numcomm#g" \ -e "s#@dirty@#$$dirty#g" \ -e "s#@date@#$$date#g" \ $(abs_srcdir)/$@.in > $@-t; \ fi; \ if [ -z "$$dirty" ]; then sed -i -e "s#%glo.*dirty.*##g" $@-t; fi if BUILD_SCTP sed -i -e "s#@sctp@#bcond_without#g" $@-t else sed -i -e "s#@sctp@#bcond_with#g" $@-t endif if BUILD_CRYPTO_NSS sed -i -e "s#@nss@#bcond_without#g" $@-t else sed -i -e "s#@nss@#bcond_with#g" $@-t endif if BUILD_CRYPTO_OPENSSL sed -i -e "s#@openssl@#bcond_without#g" $@-t else sed -i -e "s#@openssl@#bcond_with#g" $@-t endif if BUILD_COMPRESS_ZLIB sed -i -e "s#@zlib@#bcond_without#g" $@-t else sed -i -e "s#@zlib@#bcond_with#g" $@-t endif if BUILD_COMPRESS_LZ4 sed -i -e "s#@lz4@#bcond_without#g" $@-t else sed -i -e "s#@lz4@#bcond_with#g" $@-t endif if BUILD_COMPRESS_LZO2 sed -i -e "s#@lzo2@#bcond_without#g" $@-t else sed -i -e "s#@lzo2@#bcond_with#g" $@-t endif if BUILD_COMPRESS_LZMA sed -i -e "s#@lzma@#bcond_without#g" $@-t else sed -i -e "s#@lzma@#bcond_with#g" $@-t endif if BUILD_COMPRESS_BZIP2 sed -i -e "s#@bzip2@#bcond_without#g" $@-t else sed -i -e "s#@bzip2@#bcond_with#g" $@-t endif if BUILD_COMPRESS_ZSTD sed -i -e "s#@zstd@#bcond_without#g" $@-t else sed -i -e "s#@zstd@#bcond_with#g" $@-t endif if BUILD_KRONOSNETD sed -i -e "s#@kronosnetd@#bcond_without#g" $@-t else sed -i -e "s#@kronosnetd@#bcond_with#g" $@-t endif if BUILD_LIBNOZZLE sed -i -e "s#@libnozzle@#bcond_without#g" $@-t else sed -i -e "s#@libnozzle@#bcond_with#g" $@-t endif if BUILD_RUNAUTOGEN sed -i -e "s#@runautogen@#bcond_without#g" $@-t else sed -i -e "s#@runautogen@#bcond_with#g" $@-t endif if OVERRIDE_RPM_DEBUGINFO sed -i -e "s#@overriderpmdebuginfo@#bcond_without#g" $@-t else sed -i -e "s#@overriderpmdebuginfo@#bcond_with#g" $@-t endif if BUILD_RPM_DEBUGINFO sed -i -e "s#@rpmdebuginfo@#bcond_without#g" $@-t else sed -i -e "s#@rpmdebuginfo@#bcond_with#g" $@-t endif if BUILD_MAN sed -i -e "s#@buildman@#bcond_without#g" $@-t else sed -i -e "s#@buildman@#bcond_with#g" $@-t endif if INSTALL_TESTS sed -i -e "s#@installtests@#bcond_without#g" $@-t else sed -i -e "s#@installtests@#bcond_with#g" $@-t endif sed -i -e "s#@defaultadmgroup@#$(DEFAULTADMGROUP)#g" $@-t chmod a-w $@-t mv $@-t $@ rm -f $@-t* $(TARGZFILE): $(MAKE) dist RPMBUILDOPTS = --define "_sourcedir $(abs_builddir)" \ --define "_specdir $(abs_builddir)" \ --define "_builddir $(abs_builddir)" \ --define "_srcrpmdir $(abs_builddir)" \ --define "_rpmdir $(abs_builddir)" srpm: clean $(MAKE) $(SPEC) $(TARGZFILE) rpmbuild $(RPMBUILDOPTS) --nodeps -bs $(SPEC) rpm: clean $(MAKE) $(SPEC) $(TARGZFILE) rpmbuild $(RPMBUILDOPTS) -ba $(SPEC) # release/versioning BUILT_SOURCES = .version .version: echo $(VERSION) > $@-t && mv $@-t $@ dist-hook: gen-ChangeLog echo $(VERSION) > $(distdir)/.tarball-version echo $(SOURCE_EPOCH) > $(distdir)/source_epoch gen_start_date = 2000-01-01 .PHONY: gen-ChangeLog gen-ChangeLog: if test -d $(abs_srcdir)/.git; then \ LC_ALL=C $(top_srcdir)/build-aux/gitlog-to-changelog \ --since=$(gen_start_date) > $(distdir)/cl-t; \ rm -f $(distdir)/ChangeLog; \ mv $(distdir)/cl-t $(distdir)/ChangeLog; \ fi diff --git a/build-aux/check.mk b/build-aux/check.mk index f42e5528..f372968b 100644 --- a/build-aux/check.mk +++ b/build-aux/check.mk @@ -1,28 +1,46 @@ # # Copyright (C) 2012-2019 Red Hat, Inc. All rights reserved. # # Author: Fabio M. Di Nitto # # This software licensed under GPL-2.0+ # VALGRIND = $(VALGRIND_EXEC) -q --error-exitcode=127 --gen-suppressions=all MEMCHECK = $(VALGRIND) --track-fds=yes --leak-check=full --alignment=16 --suppressions=$(abs_top_srcdir)/build-aux/knet_valgrind_memcheck.supp HELGRIND = $(VALGRIND) --tool=helgrind --suppressions=$(abs_top_srcdir)/build-aux/knet_valgrind_helgrind.supp check-memcheck: $(check_PROGRAMS) if HAS_VALGRIND export KNETMEMCHECK=yes && \ $(MAKE) check LOG_COMPILE="libtool --mode=execute $(MEMCHECK)" else @echo valgrind not available on this platform endif check-helgrind: $(check_PROGRAMS) if HAS_VALGRIND export KNETHELGRIND=yes && \ $(MAKE) check LOG_COMPILE="libtool --mode=execute $(HELGRIND)" else @echo valgrind not available on this platform endif + +check-covscan: +if HAS_COVBUILD + rm -rf $(abs_top_builddir)/cov* + $(MAKE) -C $(abs_top_builddir) clean + $(COVBUILD_EXEC) --dir=$(abs_top_builddir)/cov $(MAKE) -C $(abs_top_builddir) +if HAS_COVANALYZE + $(COVANALYZE_EXEC) --dir=$(abs_top_builddir)/cov --wait-for-license $(covoptions) +if HAS_COVFORMATERRORS + $(COVFORMATERRORS_EXEC) --dir=$(abs_top_builddir)/cov --emacs-style > $(abs_top_builddir)/cov.output.txt + $(COVFORMATERRORS_EXEC) --dir=$(abs_top_builddir)/cov --html-output $(abs_top_builddir)/cov.html +endif +else + @echo directory $(abs_top_builddir)/cov ready to be uploaded to https://scan.coverity.com +endif +else + @echo cov-build not available on this platform +endif diff --git a/configure.ac b/configure.ac index e9625922..e430aeb8 100644 --- a/configure.ac +++ b/configure.ac @@ -1,483 +1,496 @@ # # Copyright (C) 2010-2019 Red Hat, Inc. All rights reserved. # # Authors: Fabio M. Di Nitto # Federico Simoncelli # # This software licensed under GPL-2.0+ # # -*- Autoconf -*- # Process this file with autoconf to produce a configure script. # AC_PREREQ([2.63]) AC_INIT([kronosnet], m4_esyscmd([build-aux/git-version-gen .tarball-version .gitarchivever]), [devel@lists.kronosnet.org]) # Don't let AC_PROC_CC (invoked by AC_USE_SYSTEM_EXTENSIONS) replace # undefined CFLAGS with -g -O2, overriding our special OPT_CFLAGS. : ${CFLAGS=""} AC_USE_SYSTEM_EXTENSIONS AM_INIT_AUTOMAKE([1.13 dist-bzip2 dist-xz color-tests -Wno-portability subdir-objects]) LT_PREREQ([2.2.6]) # --enable-new-dtags: Use RUNPATH instead of RPATH. # It is necessary to have this done before libtool does linker detection. # See also: https://github.com/kronosnet/kronosnet/issues/107 # --as-needed: Modern systems have builtin ceil() making -lm superfluous but # AC_SEARCH_LIBS can't detect this because it tests with a false prototype AX_CHECK_LINK_FLAG([-Wl,--enable-new-dtags], [AM_LDFLAGS=-Wl,--enable-new-dtags], [AC_MSG_ERROR(["Linker support for --enable-new-dtags is required"])]) AX_CHECK_LINK_FLAG([-Wl,--as-needed], [AM_LDFLAGS="$AM_LDFLAGS -Wl,--as-needed"]) AC_SUBST([AM_LDFLAGS]) saved_LDFLAGS="$LDFLAGS" LDFLAGS="$AM_LDFLAGS $LDFLAGS" LT_INIT LDFLAGS="$saved_LDFLAGS" AC_CONFIG_MACRO_DIR([m4]) AC_CONFIG_SRCDIR([kronosnetd/main.c]) AC_CONFIG_HEADERS([config.h]) AC_CANONICAL_HOST AC_LANG([C]) systemddir=${prefix}/lib/systemd/system if test "$prefix" = "NONE"; then prefix="/usr" if test "$localstatedir" = "\${prefix}/var"; then localstatedir="/var" fi if test "$sysconfdir" = "\${prefix}/etc"; then sysconfdir="/etc" fi if test "$systemddir" = "NONE/lib/systemd/system"; then systemddir=/lib/systemd/system fi if test "$libdir" = "\${exec_prefix}/lib"; then if test -e /usr/lib64; then libdir="/usr/lib64" else libdir="/usr/lib" fi fi fi AC_PROG_AWK AC_PROG_GREP AC_PROG_SED AC_PROG_CPP AC_PROG_CC AC_PROG_CC_C99 if test "x$ac_cv_prog_cc_c99" = "xno"; then AC_MSG_ERROR(["C99 support is required"]) fi AC_PROG_LN_S AC_PROG_INSTALL AC_PROG_MAKE_SET PKG_PROG_PKG_CONFIG AC_CHECK_PROGS([VALGRIND_EXEC], [valgrind]) AM_CONDITIONAL([HAS_VALGRIND], [test x$VALGRIND_EXEC != "x"]) +AC_CHECK_PROGS([COVBUILD_EXEC], [cov-build]) +AM_CONDITIONAL([HAS_COVBUILD], [test x$COVBUILD_EXEC != "x"]) + +AC_CHECK_PROGS([COVANALYZE_EXEC], [cov-analyze]) +AM_CONDITIONAL([HAS_COVANALYZE], [test x$COVANALYZE_EXEC != "x"]) + +AC_CHECK_PROGS([COVFORMATERRORS_EXEC], [cov-format-errors]) +AM_CONDITIONAL([HAS_COVFORMATERRORS], [test x$COVFORMATERRORS_EXEC != "x"]) + # KNET_OPTION_DEFINES(stem,type,detection code) # stem: enters name of option, Automake conditional and preprocessor define # type: compress or crypto, determines where the default comes from AC_DEFUN([KNET_OPTION_DEFINES],[ AC_ARG_ENABLE([$2-$1],[AS_HELP_STRING([--disable-$2-$1],[disable libknet $1 support])],, [enable_$2_$1="$enable_$2_all"]) AM_CONDITIONAL([BUILD_]m4_toupper([$2_$1]),[test "x$enable_$2_$1" = xyes]) if test "x$enable_$2_$1" = xyes; then $3 fi AC_DEFINE_UNQUOTED([WITH_]m4_toupper([$2_$1]), [`test "x$enable_$2_$1" != xyes; echo $?`], $1 $2 [built in]) ]) AC_ARG_ENABLE([man], [AS_HELP_STRING([--disable-man],[disable man page creation])],, [ enable_man="yes" ]) AM_CONDITIONAL([BUILD_MAN], [test x$enable_man = xyes]) AC_ARG_ENABLE([libknet-sctp], [AS_HELP_STRING([--disable-libknet-sctp],[disable libknet SCTP support])],, [ enable_libknet_sctp="yes" ]) AM_CONDITIONAL([BUILD_SCTP], [test x$enable_libknet_sctp = xyes]) AC_ARG_ENABLE([crypto-all], [AS_HELP_STRING([--disable-crypto-all],[disable libknet all crypto modules support])],, [ enable_crypto_all="yes" ]) KNET_OPTION_DEFINES([nss],[crypto],[PKG_CHECK_MODULES([nss], [nss])]) KNET_OPTION_DEFINES([openssl],[crypto],[ PKG_CHECK_MODULES([openssl],[libcrypto < 1.1], [AC_DEFINE_UNQUOTED([BUILDCRYPTOOPENSSL10], [1], [openssl 1.0 crypto])], [PKG_CHECK_MODULES([openssl],[libcrypto >= 1.1], [AC_DEFINE_UNQUOTED([BUILDCRYPTOOPENSSL11], [1], [openssl 1.1 crypto])])]) ]) AC_ARG_ENABLE([compress-all], [AS_HELP_STRING([--disable-compress-all],[disable libknet all compress modules support])],, [ enable_compress_all="yes" ]) KNET_OPTION_DEFINES([zstd],[compress],[PKG_CHECK_MODULES([libzstd], [libzstd])]) KNET_OPTION_DEFINES([zlib],[compress],[PKG_CHECK_MODULES([zlib], [zlib])]) KNET_OPTION_DEFINES([lz4],[compress],[PKG_CHECK_MODULES([liblz4], [liblz4])]) KNET_OPTION_DEFINES([lzo2],[compress],[ PKG_CHECK_MODULES([lzo2], [lzo2], [# work around broken pkg-config file in v2.10 AC_SUBST([lzo2_CFLAGS],[`echo $lzo2_CFLAGS | sed 's,/lzo *, ,'`])], [AC_CHECK_HEADERS([lzo/lzo1x.h], [AC_CHECK_LIB([lzo2], [lzo1x_decompress_safe], [AC_SUBST([lzo2_LIBS], [-llzo2])])], [AC_MSG_ERROR(["missing required lzo/lzo1x.h header"])])]) ]) KNET_OPTION_DEFINES([lzma],[compress],[PKG_CHECK_MODULES([liblzma], [liblzma])]) KNET_OPTION_DEFINES([bzip2],[compress],[ PKG_CHECK_MODULES([bzip2], [bzip2],, [AC_CHECK_HEADERS([bzlib.h], [AC_CHECK_LIB([bz2], [BZ2_bzBuffToBuffCompress], [AC_SUBST([bzip2_LIBS], [-lbz2])])], [AC_MSG_ERROR(["missing required bzlib.h"])])]) ]) AC_ARG_ENABLE([install-tests], [AS_HELP_STRING([--enable-install-tests],[install tests])],, [ enable_install_tests="no" ]) AM_CONDITIONAL([INSTALL_TESTS], [test x$enable_install_tests = xyes]) AC_ARG_ENABLE([poc], [AS_HELP_STRING([--enable-poc],[enable building poc code])],, [ enable_poc="no" ]) AM_CONDITIONAL([BUILD_POC], [test x$enable_poc = xyes]) AC_ARG_ENABLE([kronosnetd], [AS_HELP_STRING([--enable-kronosnetd],[Kronosnetd support])],, [ enable_kronosnetd="no" ]) AM_CONDITIONAL([BUILD_KRONOSNETD], [test x$enable_kronosnetd = xyes]) AC_ARG_ENABLE([runautogen], [AS_HELP_STRING([--enable-runautogen],[run autogen.sh])],, [ enable_runautogen="no" ]) AM_CONDITIONAL([BUILD_RUNAUTOGEN], [test x$enable_runautogen = xyes]) override_rpm_debuginfo_option="yes" AC_ARG_ENABLE([rpm-debuginfo], [AS_HELP_STRING([--enable-rpm-debuginfo],[build debuginfo packages])],, [ enable_rpm_debuginfo="no", override_rpm_debuginfo_option="no" ]) AM_CONDITIONAL([BUILD_RPM_DEBUGINFO], [test x$enable_rpm_debuginfo = xyes]) AM_CONDITIONAL([OVERRIDE_RPM_DEBUGINFO], [test x$override_rpm_debuginfo_option = xyes]) AC_ARG_ENABLE([libnozzle], [AS_HELP_STRING([--enable-libnozzle],[libnozzle support])],, [ enable_libnozzle="yes" ]) if test "x$enable_kronosnetd" = xyes; then enable_libnozzle=yes fi AM_CONDITIONAL([BUILD_LIBNOZZLE], [test x$enable_libnozzle = xyes]) # Checks for libraries. AX_PTHREAD(,[AC_MSG_ERROR([POSIX threads support is required])]) saved_LIBS="$LIBS" LIBS= AC_SEARCH_LIBS([ceil], [m], , [AC_MSG_ERROR([ceil not found])]) AC_SUBST([m_LIBS], [$LIBS]) LIBS= AC_SEARCH_LIBS([clock_gettime], [rt], , [AC_MSG_ERROR([clock_gettime not found])]) AC_SUBST([rt_LIBS], [$LIBS]) LIBS= AC_SEARCH_LIBS([dlopen], [dl dld], , [AC_MSG_ERROR([dlopen not found])]) AC_SUBST([dl_LIBS], [$LIBS]) LIBS="$saved_LIBS" +# Check RTLD_DI_ORIGIN (not decalred by musl. glibc has it as an enum so cannot use ifdef) +AC_CHECK_DECL([RTLD_DI_ORIGIN], [AC_DEFINE([HAVE_RTLD_DI_ORIGIN], 1, + [define when RTLD_DI_ORIGIN is declared])], ,[[#include ]]) + # OS detection AC_MSG_CHECKING([for os in ${host_os}]) case "$host_os" in *linux*) AC_DEFINE_UNQUOTED([KNET_LINUX], [1], [Compiling for Linux platform]) AC_MSG_RESULT([Linux]) ;; *bsd*) AC_DEFINE_UNQUOTED([KNET_BSD], [1], [Compiling for BSD platform]) AC_MSG_RESULT([BSD]) ;; *) AC_MSG_ERROR([Unsupported OS? hmmmm]) ;; esac # Checks for header files. AC_CHECK_HEADERS([sys/epoll.h]) AC_CHECK_FUNCS([kevent]) # if neither sys/epoll.h nor kevent are present, we should fail. if test "x$ac_cv_header_sys_epoll_h" = xno && test "x$ac_cv_func_kevent" = xno; then AC_MSG_ERROR([Both epoll and kevent unavailable on this OS]) fi if test "x$ac_cv_header_sys_epoll_h" = xyes && test "x$ac_cv_func_kevent" = xyes; then AC_MSG_ERROR([Both epoll and kevent available on this OS, please contact the maintainers to fix the code]) fi if test "x$enable_libknet_sctp" = xyes; then AC_CHECK_HEADERS([netinet/sctp.h],, [AC_MSG_ERROR(["missing required SCTP headers"])]) fi # Checks for typedefs, structures, and compiler characteristics. AC_C_INLINE AC_TYPE_PID_T AC_TYPE_SIZE_T AC_TYPE_SSIZE_T AC_TYPE_UINT8_T AC_TYPE_UINT16_T AC_TYPE_UINT32_T AC_TYPE_UINT64_T AC_TYPE_INT8_T AC_TYPE_INT16_T AC_TYPE_INT32_T AC_TYPE_INT64_T if test "x$enable_man" = "xyes"; then AC_ARG_VAR([DOXYGEN], [override doxygen executable]) AC_CHECK_PROGS([DOXYGEN], [doxygen], [no]) if test "x$DOXYGEN" = xno; then AC_MSG_ERROR(["Doxygen command not found"]) fi # required by doxyxml to build man pages dynamically # Don't let AC_PROC_CC (invoked by AX_PROG_CC_FOR_BUILD) replace # undefined CFLAGS_FOR_BUILD with -g -O2, overriding our special OPT_CFLAGS. : ${CFLAGS_FOR_BUILD=""} AX_PROG_CC_FOR_BUILD saved_PKG_CONFIG="$PKG_CONFIG" saved_ac_cv_path_PKG_CONFIG="$ac_cv_path_PKG_CONFIG" unset PKG_CONFIG ac_cv_path_PKG_CONFIG AC_PATH_PROG([PKG_CONFIG], [pkg-config]) PKG_CHECK_MODULES([libqb_BUILD], [libqb]) PKG_CHECK_MODULES([libxml_BUILD], [libxml-2.0]) PKG_CONFIG="$saved_PKG_CONFIG" ac_cv_path_PKG_CONFIG="$saved_ac_cv_path_PKG_CONFIG" fi # checks for libnozzle if test "x$enable_libnozzle" = xyes; then if `echo $host_os | grep -q linux`; then PKG_CHECK_MODULES([libnl], [libnl-3.0]) PKG_CHECK_MODULES([libnlroute], [libnl-route-3.0 >= 3.3], [], [PKG_CHECK_MODULES([libnlroute], [libnl-route-3.0 < 3.3], [AC_DEFINE_UNQUOTED([LIBNL3_WORKAROUND], [1], [Enable libnl < 3.3 build workaround])], [])]) fi fi # checks for kronosnetd if test "x$enable_kronosnetd" = xyes; then AC_CHECK_HEADERS([security/pam_appl.h], [AC_CHECK_LIB([pam], [pam_start], [AC_SUBST([pam_LIBS], [-lpam])], [AC_MSG_ERROR([Unable to find LinuxPAM devel files])])]) AC_CHECK_HEADERS([security/pam_misc.h], [AC_CHECK_LIB([pam_misc], [misc_conv], [AC_SUBST([pam_misc_LIBS], [-lpam_misc])], [AC_MSG_ERROR([Unable to find LinuxPAM MISC devel files])])]) PKG_CHECK_MODULES([libqb], [libqb]) AC_CHECK_LIB([qb], [qb_log_thread_priority_set], [have_qb_log_thread_priority_set="yes"], [have_qb_log_thread_priority_set="no"]) if test "x${have_qb_log_thread_priority_set}" = xyes; then AC_DEFINE_UNQUOTED([HAVE_QB_LOG_THREAD_PRIORITY_SET], [1], [have qb_log_thread_priority_set]) fi fi # local options AC_ARG_ENABLE([debug], [AS_HELP_STRING([--enable-debug],[enable debug build])]) AC_ARG_WITH([testdir], [AS_HELP_STRING([--with-testdir=DIR],[path to /usr/lib../kronosnet/tests/ dir where to install the test suite])], [ TESTDIR="$withval" ], [ TESTDIR="$libdir/kronosnet/tests" ]) AC_ARG_WITH([initdefaultdir], [AS_HELP_STRING([--with-initdefaultdir=DIR],[path to /etc/sysconfig or /etc/default dir])], [ INITDEFAULTDIR="$withval" ], [ INITDEFAULTDIR="$sysconfdir/default" ]) AC_ARG_WITH([initddir], [AS_HELP_STRING([--with-initddir=DIR],[path to init script directory])], [ INITDDIR="$withval" ], [ INITDDIR="$sysconfdir/init.d" ]) AC_ARG_WITH([systemddir], [AS_HELP_STRING([--with-systemddir=DIR],[path to systemd unit files directory])], [ SYSTEMDDIR="$withval" ], [ SYSTEMDDIR="$systemddir" ]) AC_ARG_WITH([syslogfacility], [AS_HELP_STRING([--with-syslogfacility=FACILITY],[default syslog facility])], [ SYSLOGFACILITY="$withval" ], [ SYSLOGFACILITY="LOG_DAEMON" ]) AC_ARG_WITH([sysloglevel], [AS_HELP_STRING([--with-sysloglevel=LEVEL],[default syslog level])], [ SYSLOGLEVEL="$withval" ], [ SYSLOGLEVEL="LOG_INFO" ]) AC_ARG_WITH([defaultadmgroup], [AS_HELP_STRING([--with-defaultadmgroup=GROUP], [define PAM group. Users part of this group will be allowed to configure kronosnet. Others will only receive read-only rights.])], [ DEFAULTADMGROUP="$withval" ], [ DEFAULTADMGROUP="kronosnetadm" ]) ## random vars LOGDIR=${localstatedir}/log/ RUNDIR=${localstatedir}/run/ DEFAULT_CONFIG_DIR=${sysconfdir}/kronosnet ## do subst AC_SUBST([TESTDIR]) AC_SUBST([DEFAULT_CONFIG_DIR]) AC_SUBST([INITDEFAULTDIR]) AC_SUBST([INITDDIR]) AC_SUBST([SYSTEMDDIR]) AC_SUBST([LOGDIR]) AC_SUBST([DEFAULTADMGROUP]) AC_DEFINE_UNQUOTED([DEFAULT_CONFIG_DIR], ["$(eval echo ${DEFAULT_CONFIG_DIR})"], [Default config directory]) AC_DEFINE_UNQUOTED([DEFAULT_CONFIG_FILE], ["$(eval echo ${DEFAULT_CONFIG_DIR}/kronosnetd.conf)"], [Default config file]) AC_DEFINE_UNQUOTED([LOGDIR], ["$(eval echo ${LOGDIR})"], [Default logging directory]) AC_DEFINE_UNQUOTED([DEFAULT_LOG_FILE], ["$(eval echo ${LOGDIR}/kronosnetd.log)"], [Default log file]) AC_DEFINE_UNQUOTED([RUNDIR], ["$(eval echo ${RUNDIR})"], [Default run directory]) AC_DEFINE_UNQUOTED([SYSLOGFACILITY], [$(eval echo ${SYSLOGFACILITY})], [Default syslog facility]) AC_DEFINE_UNQUOTED([SYSLOGLEVEL], [$(eval echo ${SYSLOGLEVEL})], [Default syslog level]) AC_DEFINE_UNQUOTED([DEFAULTADMGROUP], ["$(eval echo ${DEFAULTADMGROUP})"], [Default admin group]) # debug build stuff if test "x${enable_debug}" = xyes; then AC_DEFINE_UNQUOTED([DEBUG], [1], [Compiling Debugging code]) OPT_CFLAGS="-O0" else OPT_CFLAGS="-O3" fi # gdb flags if test "x${GCC}" = xyes; then GDB_FLAGS="-ggdb3" else GDB_FLAGS="-g" fi DEFAULT_CFLAGS="-Werror -Wall -Wextra" # manual overrides # generates too much noise for stub APIs UNWANTED_CFLAGS="-Wno-unused-parameter" AC_SUBST([AM_CFLAGS],["$OPT_CFLAGS $GDB_FLAGS $DEFAULT_CFLAGS $UNWANTED_CFLAGS"]) AX_PROG_DATE AS_IF([test "$ax_cv_prog_date_gnu_date:$ax_cv_prog_date_gnu_utc" = yes:yes], [UTC_DATE_AT="date -u -d@"], [AS_IF([test "x$ax_cv_prog_date_bsd_date" = xyes], [UTC_DATE_AT="date -u -r"], [AC_MSG_ERROR([date utility unable to convert epoch to UTC])])]) AC_SUBST([UTC_DATE_AT]) AC_ARG_VAR([SOURCE_EPOCH],[last modification date of the source]) AC_MSG_NOTICE([trying to determine source epoch]) AC_MSG_CHECKING([for source epoch in \$SOURCE_EPOCH]) AS_IF([test -n "$SOURCE_EPOCH"], [AC_MSG_RESULT([yes])], [AC_MSG_RESULT([no]) AC_MSG_CHECKING([for source epoch in source_epoch file]) AS_IF([test -e "$srcdir/source_epoch"], [read SOURCE_EPOCH <"$srcdir/source_epoch" AC_MSG_RESULT([yes])], [AC_MSG_RESULT([no]) AC_MSG_CHECKING([for source epoch baked in by gitattributes export-subst]) SOURCE_EPOCH='$Format:%at$' # template for rewriting by git-archive AS_CASE([$SOURCE_EPOCH], [?Format:*], # was not rewritten [AC_MSG_RESULT([no]) AC_MSG_CHECKING([for source epoch in \$SOURCE_DATE_EPOCH]) AS_IF([test "x$SOURCE_DATE_EPOCH" != x], [SOURCE_EPOCH="$SOURCE_DATE_EPOCH" AC_MSG_RESULT([yes])], [AC_MSG_RESULT([no]) AC_MSG_CHECKING([whether git log can provide a source epoch]) SOURCE_EPOCH=f${SOURCE_EPOCH#\$F} # convert into git log --pretty format SOURCE_EPOCH=$(cd "$srcdir" && git log -1 --pretty=${SOURCE_EPOCH%$} 2>/dev/null) AS_IF([test -n "$SOURCE_EPOCH"], [AC_MSG_RESULT([yes])], [AC_MSG_RESULT([no, using current time and breaking reproducibility]) SOURCE_EPOCH=$(date +%s)])])], [AC_MSG_RESULT([yes])] )]) ]) AC_MSG_NOTICE([using source epoch $($UTC_DATE_AT$SOURCE_EPOCH +'%F %T %Z')]) AC_CONFIG_FILES([ Makefile init/Makefile libnozzle/Makefile libnozzle/libnozzle.pc libnozzle/tests/Makefile kronosnetd/Makefile kronosnetd/kronosnetd.logrotate libknet/Makefile libknet/libknet.pc libknet/tests/Makefile man/Makefile man/Doxyfile-knet man/Doxyfile-nozzle poc-code/Makefile poc-code/iov-hash/Makefile ]) if test "x$VERSION" = "xUNKNOWN"; then AC_MSG_ERROR([m4_text_wrap([ configure was unable to determine the source tree's current version. This generally happens when using git archive (or the github download button) generated tarball/zip file. In order to workaround this issue, either use git clone https://github.com/kronosnet/kronosnet.git or use an official release tarball, available at https://kronosnet.org/releases/. Alternatively you can add a compatible version in a .tarball-version file at the top of the source tree, wipe your autom4te.cache dir and generated configure, and rerun autogen.sh. ], [ ], [ ], [76])]) fi AC_OUTPUT diff --git a/libknet/common.c b/libknet/common.c index 30e537e5..ed8ac899 100644 --- a/libknet/common.c +++ b/libknet/common.c @@ -1,156 +1,186 @@ /* * Copyright (C) 2010-2019 Red Hat, Inc. All rights reserved. * * Author: Fabio M. Di Nitto * * This software licensed under LGPL-2.0+ */ #include "config.h" #include #include #include #include +#include +#include #include #include #include #include #include #include #include "logging.h" #include "common.h" int _fdset_cloexec(int fd) { int fdflags; fdflags = fcntl(fd, F_GETFD, 0); if (fdflags < 0) return -1; fdflags |= FD_CLOEXEC; if (fcntl(fd, F_SETFD, fdflags) < 0) return -1; return 0; } int _fdset_nonblock(int fd) { int fdflags; fdflags = fcntl(fd, F_GETFL, 0); if (fdflags < 0) return -1; fdflags |= O_NONBLOCK; if (fcntl(fd, F_SETFL, fdflags) < 0) return -1; return 0; } +static int get_lib_dir(void *lib_handle, char dir[MAXPATHLEN]) +{ + int res; +#ifndef HAVE_RTLD_DI_ORIGIN + struct link_map *lm; + char l_name[MAXPATHLEN]; +#endif + +#ifdef HAVE_RTLD_DI_ORIGIN + res = dlinfo(lib_handle, RTLD_DI_ORIGIN, dir); +#else + /* + * musl libc doesn't support RTLD_DI_ORIGIN + */ + res = dlinfo(lib_handle, RTLD_DI_LINKMAP, &lm); + if (res == 0) { + snprintf(l_name, sizeof(l_name), "%s", lm->l_name); + snprintf(dir, MAXPATHLEN, "%s", dirname(l_name)); + } +#endif + + return res; +} + static void *open_lib(knet_handle_t knet_h, const char *libname, int extra_flags) { void *ret = NULL; char *error = NULL; char dir[MAXPATHLEN], path[MAXPATHLEN * 2], link[MAXPATHLEN]; struct stat sb; /* * clear any pending error */ dlerror(); ret = dlopen(libname, RTLD_NOW | RTLD_GLOBAL | extra_flags); - error = dlerror(); - if (error != NULL) { - log_err(knet_h, KNET_SUB_COMMON, "unable to dlopen %s: %s", - libname, error); + if (!ret) { + error = dlerror(); + if (error) { + log_err(knet_h, KNET_SUB_COMMON, "unable to dlopen %s: %s", libname, error); + } else { + log_err(knet_h, KNET_SUB_COMMON, "unable to dlopen %s: unknown error", libname); + } errno = EAGAIN; return NULL; } memset(dir, 0, sizeof(dir)); memset(link, 0, sizeof(link)); memset(path, 0, sizeof(path)); - if (dlinfo(ret, RTLD_DI_ORIGIN, &dir) < 0) { + if (get_lib_dir(ret, dir) < 0) { /* * should we dlclose and return error? */ error = dlerror(); log_warn(knet_h, KNET_SUB_COMMON, "unable to dlinfo %s: %s", libname, error); } else { snprintf(path, sizeof(path), "%s/%s", dir, libname); log_info(knet_h, KNET_SUB_COMMON, "%s has been loaded from %s", libname, path); /* * try to resolve the library and check if it is a symlink and to where. * we can't prevent symlink attacks but at least we can log where the library * has been loaded from */ if (lstat(path, &sb) < 0) { log_debug(knet_h, KNET_SUB_COMMON, "Unable to stat %s: %s", path, strerror(errno)); goto out; } if (S_ISLNK(sb.st_mode)) { if (readlink(path, link, sizeof(link)-1) < 0) { log_debug(knet_h, KNET_SUB_COMMON, "Unable to readlink %s: %s", path, strerror(errno)); goto out; } + link[sizeof(link) - 1] = 0; /* * symlink is relative to the directory */ if (link[0] != '/') { snprintf(path, sizeof(path), "%s/%s", dir, link); log_info(knet_h, KNET_SUB_COMMON, "%s/%s is a symlink to %s", dir, libname, path); } else { log_info(knet_h, KNET_SUB_COMMON, "%s/%s is a symlink to %s", dir, libname, link); } } } out: return ret; } void *load_module(knet_handle_t knet_h, const char *type, const char *name) { void *module, *ops; log_msg_t **log_msg_sym; char soname[MAXPATHLEN], opsname[MAXPATHLEN]; snprintf (soname, sizeof soname, "%s_%s.so", type, name); module = open_lib(knet_h, soname, 0); if (!module) { return NULL; } log_msg_sym = dlsym (module, "log_msg"); if (!log_msg_sym) { log_err (knet_h, KNET_SUB_COMMON, "unable to map symbol 'log_msg' in module %s: %s", soname, dlerror ()); errno = EINVAL; return NULL; } *log_msg_sym = log_msg; snprintf (opsname, sizeof opsname, "%s_model", type); ops = dlsym (module, opsname); if (!ops) { log_err (knet_h, KNET_SUB_COMMON, "unable to map symbol 'model' in module %s: %s", soname, dlerror ()); errno = EINVAL; return NULL; } return ops; } diff --git a/libknet/compat.c b/libknet/compat.c index e808f332..2e73c9fc 100644 --- a/libknet/compat.c +++ b/libknet/compat.c @@ -1,114 +1,114 @@ /* * Copyright (C) 2016-2019 Red Hat, Inc. All rights reserved. * * Author: Jan Friesse * * This software licensed under LGPL-2.0+ */ #include "config.h" #include #include #include #include "compat.h" #ifndef HAVE_SYS_EPOLL_H #ifdef HAVE_KEVENT /* for FreeBSD which has kevent instead of epoll */ #include #include #include -#include +#include static int32_t _poll_to_filter_(int32_t event) { int32_t out = 0; if (event & POLLIN) out |= EVFILT_READ; if (event & POLLOUT) out |= EVFILT_WRITE; return out; } int epoll_create(int size) { return kqueue(); } int epoll_ctl(int epfd, int op, int fd, struct epoll_event *event) { int ret = 0; struct kevent ke; short filters = _poll_to_filter_(event->events); switch (op) { /* The kevent man page says that EV_ADD also does MOD */ case EPOLL_CTL_ADD: case EPOLL_CTL_MOD: EV_SET(&ke, fd, filters, EV_ADD | EV_ENABLE, 0, 0, event->data.ptr); break; case EPOLL_CTL_DEL: EV_SET(&ke, fd, filters, EV_DELETE, 0, 0, event->data.ptr); break; default: errno = EINVAL; return -1; } ret = kevent(epfd, &ke, 1, NULL, 0, NULL); return ret; } int epoll_wait(int epfd, struct epoll_event *events, int maxevents, int timeout_ms) { struct kevent kevents[maxevents]; struct timespec timeout = { 0, 0 }; struct timespec *timeout_ptr = &timeout; uint32_t revents; int event_count; int i; int returned_events; if (timeout_ms != -1) { timeout.tv_sec = timeout_ms/1000; timeout.tv_nsec += (timeout_ms % 1000) * 1000000ULL; } else { timeout_ptr = NULL; } event_count = kevent(epfd, NULL, 0, kevents, maxevents, timeout_ptr); if (event_count == -1) { return -1; } returned_events = 0; for (i = 0; i < event_count; i++) { revents = 0; if (kevents[i].flags & EV_ERROR) { revents |= POLLERR; } if (kevents[i].flags & EV_EOF) { revents |= POLLHUP; } if (kevents[i].filter == EVFILT_READ) { revents |= POLLIN; } if (kevents[i].filter == EVFILT_WRITE) { revents |= POLLOUT; } events[returned_events].events = revents; events[returned_events].data.ptr = kevents[i].udata; returned_events++; } return returned_events; } #endif /* HAVE_KEVENT */ #endif /* HAVE_SYS_EPOLL_H */ diff --git a/libknet/compress.c b/libknet/compress.c index 24755c77..20645a96 100644 --- a/libknet/compress.c +++ b/libknet/compress.c @@ -1,484 +1,513 @@ /* * Copyright (C) 2017-2019 Red Hat, Inc. All rights reserved. * * Author: Fabio M. Di Nitto * * This software licensed under LGPL-2.0+ */ #include "config.h" #include #include #include #include #include #include "internals.h" #include "compress.h" #include "compress_model.h" #include "logging.h" #include "threads_common.h" #include "common.h" /* * internal module switch data */ /* * DO NOT CHANGE MODEL_ID HERE OR ONWIRE COMPATIBILITY * WILL BREAK! * * Always add new items before the last NULL. */ -static compress_model_t compress_modules_cmds[] = { +static compress_model_t compress_modules_cmds[KNET_MAX_COMPRESS_METHODS + 1] = { { "none" , 0, 0, 0, NULL }, { "zlib" , 1, WITH_COMPRESS_ZLIB , 0, NULL }, { "lz4" , 2, WITH_COMPRESS_LZ4 , 0, NULL }, { "lz4hc", 3, WITH_COMPRESS_LZ4 , 0, NULL }, { "lzo2" , 4, WITH_COMPRESS_LZO2 , 0, NULL }, { "lzma" , 5, WITH_COMPRESS_LZMA , 0, NULL }, { "bzip2", 6, WITH_COMPRESS_BZIP2, 0, NULL }, { "zstd" , 7, WITH_COMPRESS_ZSTD, 0, NULL }, - { NULL, 255, 0, 0, NULL } + { NULL, KNET_MAX_COMPRESS_METHODS, 0, 0, NULL } }; static int max_model = 0; static struct timespec last_load_failure; static int compress_get_model(const char *model) { int idx = 0; while (compress_modules_cmds[idx].model_name != NULL) { if (!strcmp(compress_modules_cmds[idx].model_name, model)) { return compress_modules_cmds[idx].model_id; } idx++; } return -1; } static int compress_get_max_model(void) { int idx = 0; while (compress_modules_cmds[idx].model_name != NULL) { idx++; } return idx - 1; } static int compress_is_valid_model(int compress_model) { int idx = 0; while (compress_modules_cmds[idx].model_name != NULL) { if ((compress_model == compress_modules_cmds[idx].model_id) && (compress_modules_cmds[idx].built_in == 1)) { return 0; } idx++; } return -1; } static int val_level( knet_handle_t knet_h, int compress_model, int compress_level) { if (compress_modules_cmds[compress_model].ops->val_level != NULL) { return compress_modules_cmds[compress_model].ops->val_level(knet_h, compress_level); } return 0; } /* * compress_check_lib_is_init needs to be invoked in a locked context! */ static int compress_check_lib_is_init(knet_handle_t knet_h, int cmp_model) { /* * lack of a .is_init function means that the module does not require * init per handle so we use a fake reference in the compress_int_data * to identify that we already increased the libref for this handle */ if (compress_modules_cmds[cmp_model].loaded == 1) { if (compress_modules_cmds[cmp_model].ops->is_init == NULL) { if (knet_h->compress_int_data[cmp_model] != NULL) { return 1; } } else { if (compress_modules_cmds[cmp_model].ops->is_init(knet_h, cmp_model) == 1) { return 1; } } } return 0; } /* * compress_load_lib should _always_ be invoked in write lock context */ static int compress_load_lib(knet_handle_t knet_h, int cmp_model, int rate_limit) { struct timespec clock_now; unsigned long long timediff; /* * checking again for paranoia and because * compress_check_lib_is_init is usually invoked in read context * and we need to switch from read to write locking in between. * another thread might have init the library in the meantime */ if (compress_check_lib_is_init(knet_h, cmp_model)) { return 0; } /* * due to the fact that decompress can load libraries * on demand, depending on the compress model selected * on other nodes, it is possible for an attacker * to send crafted packets to attempt to load libraries * at random in a DoS fashion. * If there is an error loading a library, then we want * to rate_limit a retry to reload the library every X * seconds to avoid a lock DoS that could greatly slow * down libknet. */ if (rate_limit) { if ((last_load_failure.tv_sec != 0) || (last_load_failure.tv_nsec != 0)) { clock_gettime(CLOCK_MONOTONIC, &clock_now); timespec_diff(last_load_failure, clock_now, &timediff); if (timediff < 10000000000) { errno = EAGAIN; return -1; } } } if (compress_modules_cmds[cmp_model].loaded == 0) { compress_modules_cmds[cmp_model].ops = load_module (knet_h, "compress", compress_modules_cmds[cmp_model].model_name); if (!compress_modules_cmds[cmp_model].ops) { clock_gettime(CLOCK_MONOTONIC, &last_load_failure); return -1; } if (compress_modules_cmds[cmp_model].ops->abi_ver != KNET_COMPRESS_MODEL_ABI) { log_err(knet_h, KNET_SUB_COMPRESS, "ABI mismatch loading module %s. knet ver: %d, module ver: %d", compress_modules_cmds[cmp_model].model_name, KNET_COMPRESS_MODEL_ABI, compress_modules_cmds[cmp_model].ops->abi_ver); errno = EINVAL; return -1; } compress_modules_cmds[cmp_model].loaded = 1; } if (compress_modules_cmds[cmp_model].ops->init != NULL) { if (compress_modules_cmds[cmp_model].ops->init(knet_h, cmp_model) < 0) { return -1; } } else { knet_h->compress_int_data[cmp_model] = (void *)&"1"; } return 0; } static int compress_lib_test(knet_handle_t knet_h) { int savederrno = 0; unsigned char src[KNET_DATABUFSIZE]; unsigned char dst[KNET_DATABUFSIZE_COMPRESS]; ssize_t dst_comp_len = KNET_DATABUFSIZE_COMPRESS, dst_decomp_len = KNET_DATABUFSIZE; unsigned int i; + int request_level; memset(src, 0, KNET_DATABUFSIZE); memset(dst, 0, KNET_DATABUFSIZE_COMPRESS); /* * NOTE: we cannot use compress and decompress API calls due to locking * so we need to call directly into the modules */ if (compress_modules_cmds[knet_h->compress_model].ops->compress(knet_h, src, KNET_DATABUFSIZE, dst, &dst_comp_len) < 0) { savederrno = errno; log_err(knet_h, KNET_SUB_COMPRESS, "Unable to compress test buffer. Please check your compression settings: %s", strerror(savederrno)); errno = savederrno; return -1; + } else if ((long unsigned int)dst_comp_len >= KNET_DATABUFSIZE) { + /* + * compress not effective, try again using default compression level when available + */ + request_level = knet_h->compress_level; + log_warn(knet_h, KNET_SUB_COMPRESS, + "Requested compression level (%d) did not generate any compressed data (source: %zu destination: %zu)", + request_level, sizeof(src), dst_comp_len); + + if ((!compress_modules_cmds[knet_h->compress_model].ops->get_default_level()) || + ((knet_h->compress_level = compress_modules_cmds[knet_h->compress_model].ops->get_default_level()) == KNET_COMPRESS_UNKNOWN_DEFAULT)) { + log_err(knet_h, KNET_SUB_COMPRESS, "compression %s does not provide a default value", + compress_modules_cmds[knet_h->compress_model].model_name); + errno = EINVAL; + return -1; + } else { + memset(src, 0, KNET_DATABUFSIZE); + memset(dst, 0, KNET_DATABUFSIZE_COMPRESS); + dst_comp_len = KNET_DATABUFSIZE_COMPRESS; + if (compress_modules_cmds[knet_h->compress_model].ops->compress(knet_h, src, KNET_DATABUFSIZE, dst, &dst_comp_len) < 0) { + savederrno = errno; + log_err(knet_h, KNET_SUB_COMPRESS, "Unable to compress with default compression level: %s", strerror(savederrno)); + errno = savederrno; + return -1; + } + log_warn(knet_h, KNET_SUB_COMPRESS, "Requested compression level (%d) did not work, switching to default (%d)", + request_level, knet_h->compress_level); + } } if (compress_modules_cmds[knet_h->compress_model].ops->decompress(knet_h, dst, dst_comp_len, src, &dst_decomp_len) < 0) { savederrno = errno; log_err(knet_h, KNET_SUB_COMPRESS, "Unable to decompress test buffer. Please check your compression settings: %s", strerror(savederrno)); errno = savederrno; return -1; } for (i = 0; i < KNET_DATABUFSIZE; i++) { if (src[i] != 0) { log_err(knet_h, KNET_SUB_COMPRESS, "Decompressed buffer contains incorrect data"); errno = EINVAL; return -1; } } return 0; } int compress_init( knet_handle_t knet_h) { max_model = compress_get_max_model(); if (max_model > KNET_MAX_COMPRESS_METHODS) { log_err(knet_h, KNET_SUB_COMPRESS, "Too many compress methods defined in compress.c."); errno = EINVAL; return -1; } memset(&last_load_failure, 0, sizeof(struct timespec)); return 0; } int compress_cfg( knet_handle_t knet_h, struct knet_handle_compress_cfg *knet_handle_compress_cfg) { int savederrno = 0, err = 0; int cmp_model; cmp_model = compress_get_model(knet_handle_compress_cfg->compress_model); if (cmp_model < 0) { log_err(knet_h, KNET_SUB_COMPRESS, "compress model %s not supported", knet_handle_compress_cfg->compress_model); errno = EINVAL; return -1; } log_debug(knet_h, KNET_SUB_COMPRESS, "Initizializing compress module [%s/%d/%u]", knet_handle_compress_cfg->compress_model, knet_handle_compress_cfg->compress_level, knet_handle_compress_cfg->compress_threshold); if (cmp_model > 0) { if (compress_modules_cmds[cmp_model].built_in == 0) { log_err(knet_h, KNET_SUB_COMPRESS, "compress model %s support has not been built in. Please contact your vendor or fix the build", knet_handle_compress_cfg->compress_model); errno = EINVAL; return -1; } if (knet_handle_compress_cfg->compress_threshold > KNET_MAX_PACKET_SIZE) { log_err(knet_h, KNET_SUB_COMPRESS, "compress threshold cannot be higher than KNET_MAX_PACKET_SIZE (%d).", KNET_MAX_PACKET_SIZE); errno = EINVAL; return -1; } if (knet_handle_compress_cfg->compress_threshold == 0) { knet_h->compress_threshold = KNET_COMPRESS_THRESHOLD; log_debug(knet_h, KNET_SUB_COMPRESS, "resetting compression threshold to default (%d)", KNET_COMPRESS_THRESHOLD); } else { knet_h->compress_threshold = knet_handle_compress_cfg->compress_threshold; } savederrno = pthread_rwlock_rdlock(&shlib_rwlock); if (savederrno) { log_err(knet_h, KNET_SUB_COMPRESS, "Unable to get read lock: %s", strerror(savederrno)); errno = savederrno; return -1; } if (!compress_check_lib_is_init(knet_h, cmp_model)) { /* * need to switch to write lock, load the lib, and return with a write lock * this is not racy because compress_load_lib is written idempotent. */ pthread_rwlock_unlock(&shlib_rwlock); savederrno = pthread_rwlock_wrlock(&shlib_rwlock); if (savederrno) { log_err(knet_h, KNET_SUB_COMPRESS, "Unable to get write lock: %s", strerror(savederrno)); errno = savederrno; return -1; } if (compress_load_lib(knet_h, cmp_model, 0) < 0) { savederrno = errno; log_err(knet_h, KNET_SUB_COMPRESS, "Unable to load library: %s", strerror(savederrno)); err = -1; goto out_unlock; } } if (val_level(knet_h, cmp_model, knet_handle_compress_cfg->compress_level) < 0) { log_err(knet_h, KNET_SUB_COMPRESS, "compress level %d not supported for model %s", knet_handle_compress_cfg->compress_level, knet_handle_compress_cfg->compress_model); savederrno = EINVAL; err = -1; goto out_unlock; } knet_h->compress_model = cmp_model; knet_h->compress_level = knet_handle_compress_cfg->compress_level; if (compress_lib_test(knet_h) < 0) { savederrno = errno; err = -1; goto out_unlock; } out_unlock: pthread_rwlock_unlock(&shlib_rwlock); } if (err) { knet_h->compress_model = 0; knet_h->compress_level = 0; } errno = savederrno; return err; } void compress_fini( knet_handle_t knet_h, int all) { int savederrno = 0; int idx = 0; savederrno = pthread_rwlock_wrlock(&shlib_rwlock); if (savederrno) { log_err(knet_h, KNET_SUB_COMPRESS, "Unable to get write lock: %s", strerror(savederrno)); return; } - while (compress_modules_cmds[idx].model_name != NULL) { - if ((idx < KNET_MAX_COMPRESS_METHODS) && /* check idx first so we don't read bad data */ + while (idx < KNET_MAX_COMPRESS_METHODS) { + if ((compress_modules_cmds[idx].model_name != NULL) && (compress_modules_cmds[idx].built_in == 1) && (compress_modules_cmds[idx].loaded == 1) && (compress_modules_cmds[idx].model_id > 0) && (knet_h->compress_int_data[idx] != NULL)) { if ((all) || (compress_modules_cmds[idx].model_id == knet_h->compress_model)) { if (compress_modules_cmds[idx].ops->fini != NULL) { compress_modules_cmds[idx].ops->fini(knet_h, idx); } else { knet_h->compress_int_data[idx] = NULL; } } } idx++; } pthread_rwlock_unlock(&shlib_rwlock); return; } /* * compress does not require compress_check_lib_is_init * because it's protected by compress_cfg */ int compress( knet_handle_t knet_h, const unsigned char *buf_in, const ssize_t buf_in_len, unsigned char *buf_out, ssize_t *buf_out_len) { return compress_modules_cmds[knet_h->compress_model].ops->compress(knet_h, buf_in, buf_in_len, buf_out, buf_out_len); } int decompress( knet_handle_t knet_h, int compress_model, const unsigned char *buf_in, const ssize_t buf_in_len, unsigned char *buf_out, ssize_t *buf_out_len) { int savederrno = 0, err = 0; if (compress_model > max_model) { log_err(knet_h, KNET_SUB_COMPRESS, "Received packet with unknown compress model %d", compress_model); errno = EINVAL; return -1; } if (compress_is_valid_model(compress_model) < 0) { log_err(knet_h, KNET_SUB_COMPRESS, "Received packet compressed with %s but support is not built in this version of libknet. Please contact your distribution vendor or fix the build.", compress_modules_cmds[compress_model].model_name); errno = EINVAL; return -1; } savederrno = pthread_rwlock_rdlock(&shlib_rwlock); if (savederrno) { log_err(knet_h, KNET_SUB_COMPRESS, "Unable to get read lock: %s", strerror(savederrno)); errno = savederrno; return -1; } if (!compress_check_lib_is_init(knet_h, compress_model)) { /* * need to switch to write lock, load the lib, and return with a write lock * this is not racy because compress_load_lib is written idempotent. */ pthread_rwlock_unlock(&shlib_rwlock); savederrno = pthread_rwlock_wrlock(&shlib_rwlock); if (savederrno) { log_err(knet_h, KNET_SUB_COMPRESS, "Unable to get write lock: %s", strerror(savederrno)); errno = savederrno; return -1; } if (compress_load_lib(knet_h, compress_model, 1) < 0) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_COMPRESS, "Unable to load library: %s", strerror(savederrno)); goto out_unlock; } } err = compress_modules_cmds[compress_model].ops->decompress(knet_h, buf_in, buf_in_len, buf_out, buf_out_len); savederrno = errno; out_unlock: pthread_rwlock_unlock(&shlib_rwlock); errno = savederrno; return err; } int knet_get_compress_list(struct knet_compress_info *compress_list, size_t *compress_list_entries) { int err = 0; int idx = 0; int outidx = 0; if (!compress_list_entries) { errno = EINVAL; return -1; } while (compress_modules_cmds[idx].model_name != NULL) { if (compress_modules_cmds[idx].built_in) { if (compress_list) { compress_list[outidx].name = compress_modules_cmds[idx].model_name; } outidx++; } idx++; } *compress_list_entries = outidx; if (!err) errno = 0; return err; } diff --git a/libknet/compress_bzip2.c b/libknet/compress_bzip2.c index 5a972fff..68f5280c 100644 --- a/libknet/compress_bzip2.c +++ b/libknet/compress_bzip2.c @@ -1,114 +1,126 @@ /* * Copyright (C) 2017-2019 Red Hat, Inc. All rights reserved. * * Author: Fabio M. Di Nitto * * This software licensed under LGPL-2.0+ */ #define KNET_MODULE #include "config.h" #include #include #include "logging.h" #include "compress_model.h" +#ifdef BZIP2_COMPRESS_LEVEL +#define KNET_COMPRESS_DEFAULT BZIP2_COMPRESS_LEVEL +#else +#define KNET_COMPRESS_DEFAULT KNET_COMPRESS_UNKNOWN_DEFAULT +#endif + static int bzip2_compress( knet_handle_t knet_h, const unsigned char *buf_in, const ssize_t buf_in_len, unsigned char *buf_out, ssize_t *buf_out_len) { int err = 0; int savederrno = 0; unsigned int destLen = KNET_DATABUFSIZE_COMPRESS; err = BZ2_bzBuffToBuffCompress((char *)buf_out, &destLen, (char *)buf_in, buf_in_len, knet_h->compress_level, 0, 0); switch(err) { case BZ_OK: *buf_out_len = destLen; break; case BZ_MEM_ERROR: log_err(knet_h, KNET_SUB_BZIP2COMP, "bzip2 compress has not enough memory"); savederrno = ENOMEM; err = -1; break; case BZ_OUTBUFF_FULL: log_err(knet_h, KNET_SUB_BZIP2COMP, "bzip2 unable to compress source in destination buffer"); savederrno = E2BIG; err = -1; break; default: log_err(knet_h, KNET_SUB_BZIP2COMP, "bzip2 compress unknown error %d", err); savederrno = EINVAL; err = -1; break; } errno = savederrno; return err; } static int bzip2_decompress( knet_handle_t knet_h, const unsigned char *buf_in, const ssize_t buf_in_len, unsigned char *buf_out, ssize_t *buf_out_len) { int err = 0; int savederrno = 0; unsigned int destLen = KNET_DATABUFSIZE_COMPRESS; err = BZ2_bzBuffToBuffDecompress((char *)buf_out, &destLen, (char *)buf_in, buf_in_len, 0, 0); switch(err) { case BZ_OK: *buf_out_len = destLen; break; case BZ_MEM_ERROR: log_err(knet_h, KNET_SUB_BZIP2COMP, "bzip2 decompress has not enough memory"); savederrno = ENOMEM; err = -1; break; case BZ_OUTBUFF_FULL: log_err(knet_h, KNET_SUB_BZIP2COMP, "bzip2 unable to decompress source in destination buffer"); savederrno = E2BIG; err = -1; break; case BZ_DATA_ERROR: case BZ_DATA_ERROR_MAGIC: case BZ_UNEXPECTED_EOF: log_err(knet_h, KNET_SUB_BZIP2COMP, "bzip2 decompress detected input data corruption"); savederrno = EINVAL; err = -1; break; default: log_err(knet_h, KNET_SUB_BZIP2COMP, "bzip2 decompress unknown error %d", err); savederrno = EINVAL; err = -1; break; } errno = savederrno; return err; } +static int bzip2_get_default_level() +{ + return KNET_COMPRESS_DEFAULT; +} + compress_ops_t compress_model = { KNET_COMPRESS_MODEL_ABI, NULL, NULL, NULL, NULL, bzip2_compress, - bzip2_decompress + bzip2_decompress, + bzip2_get_default_level }; diff --git a/libknet/compress_lz4.c b/libknet/compress_lz4.c index 60aa1966..db0fa514 100644 --- a/libknet/compress_lz4.c +++ b/libknet/compress_lz4.c @@ -1,91 +1,104 @@ /* * Copyright (C) 2017-2019 Red Hat, Inc. All rights reserved. * * Author: Fabio M. Di Nitto * * This software licensed under LGPL-2.0+ */ #define KNET_MODULE +#define ACCELERATION_DEFAULT 1 /* lz4 default compression level from lz4.c */ #include "config.h" #include #include #include "logging.h" #include "compress_model.h" +#ifdef LZ4_COMPRESS_DEFAULT +#define KNET_COMPRESS_DEFAULT LZ4_COMPRESS_DEFAULT +#else +#define KNET_COMPRESS_DEFAULT KNET_COMPRESS_UNKNOWN_DEFAULT +#endif + static int lz4_compress( knet_handle_t knet_h, const unsigned char *buf_in, const ssize_t buf_in_len, unsigned char *buf_out, ssize_t *buf_out_len) { int lzerr = 0, err = 0; int savederrno = 0; lzerr = LZ4_compress_fast((const char *)buf_in, (char *)buf_out, buf_in_len, KNET_DATABUFSIZE_COMPRESS, knet_h->compress_level); /* * data compressed */ if (lzerr > 0) { *buf_out_len = lzerr; } /* * unable to compress */ if (lzerr == 0) { *buf_out_len = buf_in_len; } /* * lz4 internal error */ if (lzerr < 0) { log_err(knet_h, KNET_SUB_LZ4COMP, "lz4 compression error: %d", lzerr); savederrno = EINVAL; err = -1; } errno = savederrno; return err; } static int lz4_decompress( knet_handle_t knet_h, const unsigned char *buf_in, const ssize_t buf_in_len, unsigned char *buf_out, ssize_t *buf_out_len) { int lzerr = 0, err = 0; int savederrno = 0; lzerr = LZ4_decompress_safe((const char *)buf_in, (char *)buf_out, buf_in_len, KNET_DATABUFSIZE); if (lzerr < 0) { log_err(knet_h, KNET_SUB_LZ4COMP, "lz4 decompression error: %d", lzerr); savederrno = EINVAL; err = -1; } if (lzerr > 0) { *buf_out_len = lzerr; } errno = savederrno; return err; } +static int lz4_get_default_level() +{ + return KNET_COMPRESS_DEFAULT; +} + compress_ops_t compress_model = { KNET_COMPRESS_MODEL_ABI, NULL, NULL, NULL, NULL, lz4_compress, - lz4_decompress + lz4_decompress, + lz4_get_default_level }; diff --git a/libknet/compress_lz4hc.c b/libknet/compress_lz4hc.c index 781bf12f..963ad66c 100644 --- a/libknet/compress_lz4hc.c +++ b/libknet/compress_lz4hc.c @@ -1,102 +1,113 @@ /* * Copyright (C) 2017-2019 Red Hat, Inc. All rights reserved. * * Author: Fabio M. Di Nitto * * This software licensed under LGPL-2.0+ */ #define KNET_MODULE #include "config.h" #include #include #include #include "logging.h" #include "compress_model.h" +#ifdef LZ4HC_CLEVEL_DEFAULT +#define KNET_COMPRESS_DEFAULT LZ4HC_CLEVEL_DEFAULT /* lz4hc default compression level from lz4hc.h */ +#else +#define KNET_COMPRESS_DEFAULT KNET_COMPRESS_UNKNOWN_DEFAULT +#endif #ifdef LZ4HC_CLEVEL_MAX #define KNET_LZ4HC_MAX LZ4HC_CLEVEL_MAX #endif #ifdef LZ4HC_MAX_CLEVEL #define KNET_LZ4HC_MAX LZ4HC_MAX_CLEVEL #endif #ifndef KNET_LZ4HC_MAX /* * older releases of lz4 do not define LZ4HC_CLEVEL range. * According to lz4hc.h, any value between 0 and 16 is valid. * We defalt to 16 based on the comments in the include file * from older versions. */ #define KNET_LZ4HC_MAX 16 #endif static int lz4hc_compress( knet_handle_t knet_h, const unsigned char *buf_in, const ssize_t buf_in_len, unsigned char *buf_out, ssize_t *buf_out_len) { int lzerr = 0, err = 0; int savederrno = 0; lzerr = LZ4_compress_HC((const char *)buf_in, (char *)buf_out, buf_in_len, KNET_DATABUFSIZE_COMPRESS, knet_h->compress_level); /* * data compressed */ if (lzerr > 0) { *buf_out_len = lzerr; } /* * unable to compress */ if (lzerr <= 0) { log_err(knet_h, KNET_SUB_LZ4HCCOMP, "lz4hc compression error: %d", lzerr); savederrno = EINVAL; err = -1; } errno = savederrno; return err; } /* This is a straight copy from compress_lz4.c */ static int lz4_decompress( knet_handle_t knet_h, const unsigned char *buf_in, const ssize_t buf_in_len, unsigned char *buf_out, ssize_t *buf_out_len) { int lzerr = 0, err = 0; int savederrno = 0; lzerr = LZ4_decompress_safe((const char *)buf_in, (char *)buf_out, buf_in_len, KNET_DATABUFSIZE); if (lzerr < 0) { log_err(knet_h, KNET_SUB_LZ4COMP, "lz4 decompression error: %d", lzerr); savederrno = EINVAL; err = -1; } if (lzerr > 0) { *buf_out_len = lzerr; } errno = savederrno; return err; } +static int lz4hc_get_default_level() +{ + return KNET_COMPRESS_DEFAULT; +} + compress_ops_t compress_model = { KNET_COMPRESS_MODEL_ABI, NULL, NULL, NULL, NULL, lz4hc_compress, - lz4_decompress + lz4_decompress, + lz4hc_get_default_level }; diff --git a/libknet/compress_lzma.c b/libknet/compress_lzma.c index 7fdd1782..b4fd05c3 100644 --- a/libknet/compress_lzma.c +++ b/libknet/compress_lzma.c @@ -1,125 +1,137 @@ /* * Copyright (C) 2017-2019 Red Hat, Inc. All rights reserved. * * Author: Fabio M. Di Nitto * * This software licensed under LGPL-2.0+ */ #define KNET_MODULE #include "config.h" #include #include #include "logging.h" #include "compress_model.h" +#ifdef LZMA_PRESET_DEFAULT +#define KNET_COMPRESS_DEFAULT LZMA_PRESET_DEFAULT /* lzma default compression level from lzma.h */ +#else +#define KNET_COMPRESS_DEFAULT KNET_COMPRESS_UNKNOWN_DEFAULT +#endif + static int lzma_compress( knet_handle_t knet_h, const unsigned char *buf_in, const ssize_t buf_in_len, unsigned char *buf_out, ssize_t *buf_out_len) { int err = 0; int savederrno = 0; size_t out_pos = 0; lzma_ret ret = 0; ret = lzma_easy_buffer_encode(knet_h->compress_level, LZMA_CHECK_NONE, NULL, (const uint8_t *)buf_in, buf_in_len, (uint8_t *)buf_out, &out_pos, KNET_DATABUFSIZE_COMPRESS); switch(ret) { case LZMA_OK: *buf_out_len = out_pos; break; case LZMA_MEM_ERROR: log_err(knet_h, KNET_SUB_LZMACOMP, "lzma compress memory allocation failed"); savederrno = ENOMEM; err = -1; break; case LZMA_MEMLIMIT_ERROR: log_err(knet_h, KNET_SUB_LZMACOMP, "lzma compress requires higher memory boundaries (see lzma_memlimit_set)"); savederrno = ENOMEM; err = -1; break; case LZMA_PROG_ERROR: log_err(knet_h, KNET_SUB_LZMACOMP, "lzma compress has been called with incorrect options"); savederrno = EINVAL; err = -1; break; default: log_err(knet_h, KNET_SUB_LZMACOMP, "lzma compress unknown error %u", ret); savederrno = EINVAL; err = -1; break; } errno = savederrno; return err; } static int lzma_decompress( knet_handle_t knet_h, const unsigned char *buf_in, const ssize_t buf_in_len, unsigned char *buf_out, ssize_t *buf_out_len) { int err = 0; int savederrno = 0; uint64_t memlimit = UINT64_MAX; /* disable lzma internal memlimit check */ size_t out_pos = 0, in_pos = 0; lzma_ret ret = 0; ret = lzma_stream_buffer_decode(&memlimit, 0, NULL, (const uint8_t *)buf_in, &in_pos, buf_in_len, (uint8_t *)buf_out, &out_pos, KNET_DATABUFSIZE_COMPRESS); switch(ret) { case LZMA_OK: *buf_out_len = out_pos; break; case LZMA_MEM_ERROR: log_err(knet_h, KNET_SUB_LZMACOMP, "lzma decompress memory allocation failed"); savederrno = ENOMEM; err = -1; break; case LZMA_MEMLIMIT_ERROR: log_err(knet_h, KNET_SUB_LZMACOMP, "lzma decompress requires higher memory boundaries (see lzma_memlimit_set)"); savederrno = ENOMEM; err = -1; break; case LZMA_DATA_ERROR: case LZMA_FORMAT_ERROR: log_err(knet_h, KNET_SUB_LZMACOMP, "lzma decompress invalid data received"); savederrno = EINVAL; err = -1; break; case LZMA_PROG_ERROR: log_err(knet_h, KNET_SUB_LZMACOMP, "lzma decompress has been called with incorrect options"); savederrno = EINVAL; err = -1; break; default: log_err(knet_h, KNET_SUB_LZMACOMP, "lzma decompress unknown error %u", ret); savederrno = EINVAL; err = -1; break; } errno = savederrno; return err; } +static int lzma_get_default_level() +{ + return KNET_COMPRESS_DEFAULT; +} + compress_ops_t compress_model = { KNET_COMPRESS_MODEL_ABI, NULL, NULL, NULL, NULL, lzma_compress, - lzma_decompress + lzma_decompress, + lzma_get_default_level }; diff --git a/libknet/compress_lzo2.c b/libknet/compress_lzo2.c index 12066ed9..72dac824 100644 --- a/libknet/compress_lzo2.c +++ b/libknet/compress_lzo2.c @@ -1,165 +1,178 @@ /* * Copyright (C) 2017-2019 Red Hat, Inc. All rights reserved. * * Author: Fabio M. Di Nitto * * This software licensed under LGPL-2.0+ */ #define KNET_MODULE +#define LZO2_COMPRESS_DEFAULT 1 #include "config.h" #include #include #include #include #include "logging.h" #include "compress_model.h" +#ifdef LZO2_COMPRESS_DEFAULT +#define KNET_COMPRESS_DEFAULT LZO2_COMPRESS_DEFAULT +#else +#define KNET_COMPRESS_DEFAULT KNET_COMPRESS_UNKNOWN_DEFAULT +#endif + static int lzo2_is_init( knet_handle_t knet_h, int method_idx) { if (knet_h->compress_int_data[method_idx]) { return 1; } return 0; } static int lzo2_init( knet_handle_t knet_h, int method_idx) { /* * LZO1X_999_MEM_COMPRESS is the highest amount of memory lzo2 can use */ if (!knet_h->compress_int_data[method_idx]) { knet_h->compress_int_data[method_idx] = malloc(LZO1X_999_MEM_COMPRESS); if (!knet_h->compress_int_data[method_idx]) { log_err(knet_h, KNET_SUB_LZO2COMP, "lzo2 unable to allocate work memory"); errno = ENOMEM; return -1; } memset(knet_h->compress_int_data[method_idx], 0, LZO1X_999_MEM_COMPRESS); } return 0; } static void lzo2_fini( knet_handle_t knet_h, int method_idx) { if (knet_h->compress_int_data[method_idx]) { free(knet_h->compress_int_data[method_idx]); knet_h->compress_int_data[method_idx] = NULL; } return; } static int lzo2_val_level( knet_handle_t knet_h, int compress_level) { switch(compress_level) { case 1: log_debug(knet_h, KNET_SUB_LZO2COMP, "lzo2 will use lzo1x_1_compress internal compress method"); break; case 11: log_debug(knet_h, KNET_SUB_LZO2COMP, "lzo2 will use lzo1x_1_11_compress internal compress method"); break; case 12: log_debug(knet_h, KNET_SUB_LZO2COMP, "lzo2 will use lzo1x_1_12_compress internal compress method"); break; case 15: log_debug(knet_h, KNET_SUB_LZO2COMP, "lzo2 will use lzo1x_1_15_compress internal compress method"); break; case 999: log_debug(knet_h, KNET_SUB_LZO2COMP, "lzo2 will use lzo1x_999_compress internal compress method"); break; default: log_warn(knet_h, KNET_SUB_LZO2COMP, "Unknown lzo2 internal compress method. lzo1x_1_compress will be used as default fallback"); break; } return 0; } static int lzo2_compress( knet_handle_t knet_h, const unsigned char *buf_in, const ssize_t buf_in_len, unsigned char *buf_out, ssize_t *buf_out_len) { int savederrno = 0, lzerr = 0, err = 0; lzo_uint cmp_len; switch(knet_h->compress_level) { case 1: lzerr = lzo1x_1_compress(buf_in, buf_in_len, buf_out, &cmp_len, knet_h->compress_int_data[knet_h->compress_model]); break; case 11: lzerr = lzo1x_1_11_compress(buf_in, buf_in_len, buf_out, &cmp_len, knet_h->compress_int_data[knet_h->compress_model]); break; case 12: lzerr = lzo1x_1_12_compress(buf_in, buf_in_len, buf_out, &cmp_len, knet_h->compress_int_data[knet_h->compress_model]); break; case 15: lzerr = lzo1x_1_15_compress(buf_in, buf_in_len, buf_out, &cmp_len, knet_h->compress_int_data[knet_h->compress_model]); break; case 999: lzerr = lzo1x_999_compress(buf_in, buf_in_len, buf_out, &cmp_len, knet_h->compress_int_data[knet_h->compress_model]); break; default: lzerr = lzo1x_1_compress(buf_in, buf_in_len, buf_out, &cmp_len, knet_h->compress_int_data[knet_h->compress_model]); break; } if (lzerr != LZO_E_OK) { log_err(knet_h, KNET_SUB_LZO2COMP, "lzo2 internal compression error"); savederrno = EAGAIN; err = -1; } else { *buf_out_len = cmp_len; } errno = savederrno; return err; } static int lzo2_decompress( knet_handle_t knet_h, const unsigned char *buf_in, const ssize_t buf_in_len, unsigned char *buf_out, ssize_t *buf_out_len) { int lzerr = 0, err = 0; int savederrno = 0; lzo_uint decmp_len; lzerr = lzo1x_decompress(buf_in, buf_in_len, buf_out, &decmp_len, NULL); if (lzerr != LZO_E_OK) { log_err(knet_h, KNET_SUB_LZO2COMP, "lzo2 internal decompression error"); savederrno = EAGAIN; err = -1; } else { *buf_out_len = decmp_len; } errno = savederrno; return err; } +static int lzo2_get_default_level() +{ + return KNET_COMPRESS_DEFAULT; +} + compress_ops_t compress_model = { KNET_COMPRESS_MODEL_ABI, lzo2_is_init, lzo2_init, lzo2_fini, lzo2_val_level, lzo2_compress, - lzo2_decompress + lzo2_decompress, + lzo2_get_default_level }; diff --git a/libknet/compress_model.h b/libknet/compress_model.h index e69e4915..57cf8115 100644 --- a/libknet/compress_model.h +++ b/libknet/compress_model.h @@ -1,91 +1,97 @@ /* * Copyright (C) 2017-2019 Red Hat, Inc. All rights reserved. * * Author: Fabio M. Di Nitto * * This software licensed under LGPL-2.0+ */ #ifndef __KNET_COMPRESS_MODEL_H__ #define __KNET_COMPRESS_MODEL_H__ #include "internals.h" -#define KNET_COMPRESS_MODEL_ABI 1 +#define KNET_COMPRESS_MODEL_ABI 2 +#define KNET_COMPRESS_UNKNOWN_DEFAULT (-2) typedef struct { uint8_t abi_ver; /* * some libs need special init and handling of buffers etc. * is_init is called in shlib_rwlock read only context to see if * the module has been initialized within this knet_handle. * Providing is_init is optional. A module that does not export * an is_init and if the associated shared library is already loaded * is treated as "does not require init". */ int (*is_init) (knet_handle_t knet_h, int method_idx); /* * init is called when the library requires special init handling, * such as memory allocation and such. * init is invoked in shlib_rwlock write only context when * the module exports this function. * It is optional to provide an init function if the module * does not require any init. */ int (*init) (knet_handle_t knet_h, int method_idx); /* * fini is invoked only on knet_handle_free in a write only context. * It is optional to provide this function if the module * does not require any finalization */ void (*fini) (knet_handle_t knet_h, int method_idx); /* * runtime config validation and compress/decompress */ /* * val_level is called upon compress configuration changes * to make sure that the requested compress_level is valid * within the context of a given module. */ int (*val_level)(knet_handle_t knet_h, int compress_level); /* * required functions * * hopefully those 2 don't require any explanation.... */ int (*compress) (knet_handle_t knet_h, const unsigned char *buf_in, const ssize_t buf_in_len, unsigned char *buf_out, ssize_t *buf_out_len); int (*decompress)(knet_handle_t knet_h, const unsigned char *buf_in, const ssize_t buf_in_len, unsigned char *buf_out, ssize_t *buf_out_len); + + /* + * Get default compression level + */ + int (*get_default_level) (void); } compress_ops_t; typedef struct { const char *model_name; uint8_t model_id; /* sequential unique identifier */ uint8_t built_in; /* set at configure/build time to 1 if available */ /* * library is loaded */ uint8_t loaded; /* * runtime bits */ compress_ops_t *ops; } compress_model_t; #endif diff --git a/libknet/compress_zlib.c b/libknet/compress_zlib.c index 2fb12f51..fa6ae011 100644 --- a/libknet/compress_zlib.c +++ b/libknet/compress_zlib.c @@ -1,117 +1,129 @@ /* * Copyright (C) 2017-2019 Red Hat, Inc. All rights reserved. * * Author: Fabio M. Di Nitto * * This software licensed under LGPL-2.0+ */ #define KNET_MODULE #include "config.h" #include #include #include "logging.h" #include "compress_model.h" +#ifdef Z_DEFAULT_COMPRESSION +#define KNET_COMPRESS_DEFAULT Z_DEFAULT_COMPRESSION /* zlib default compression level from zlib.h */ +#else +#define KNET_COMPRESS_DEFAULT KNET_COMPRESS_UNKNOWN_DEFAULT +#endif + static int zlib_compress( knet_handle_t knet_h, const unsigned char *buf_in, const ssize_t buf_in_len, unsigned char *buf_out, ssize_t *buf_out_len) { int zerr = 0, err = 0; int savederrno = 0; uLongf destLen = *buf_out_len; zerr = compress2(buf_out, &destLen, buf_in, buf_in_len, knet_h->compress_level); *buf_out_len = destLen; switch(zerr) { case Z_OK: err = 0; savederrno = 0; break; case Z_MEM_ERROR: log_err(knet_h, KNET_SUB_ZLIBCOMP, "zlib compress mem error"); err = -1; savederrno = ENOMEM; break; case Z_BUF_ERROR: log_err(knet_h, KNET_SUB_ZLIBCOMP, "zlib compress buf error"); err = -1; savederrno = ENOBUFS; break; case Z_STREAM_ERROR: log_err(knet_h, KNET_SUB_ZLIBCOMP, "zlib compress stream error"); err = -1; savederrno = EINVAL; break; default: log_err(knet_h, KNET_SUB_ZLIBCOMP, "zlib unknown compress error: %d", zerr); break; } errno = savederrno; return err; } static int zlib_decompress( knet_handle_t knet_h, const unsigned char *buf_in, const ssize_t buf_in_len, unsigned char *buf_out, ssize_t *buf_out_len) { int zerr = 0, err = 0; int savederrno = 0; uLongf destLen = *buf_out_len; zerr = uncompress(buf_out, &destLen, buf_in, buf_in_len); *buf_out_len = destLen; switch(zerr) { case Z_OK: err = 0; savederrno = 0; break; case Z_MEM_ERROR: log_err(knet_h, KNET_SUB_ZLIBCOMP, "zlib decompress mem error"); err = -1; savederrno = ENOMEM; break; case Z_BUF_ERROR: log_err(knet_h, KNET_SUB_ZLIBCOMP, "zlib decompress buf error"); err = -1; savederrno = ENOBUFS; break; case Z_DATA_ERROR: log_err(knet_h, KNET_SUB_ZLIBCOMP, "zlib decompress data error"); err = -1; savederrno = EINVAL; break; default: log_err(knet_h, KNET_SUB_ZLIBCOMP, "zlib unknown decompress error: %d", zerr); break; } errno = savederrno; return err; } +static int zlib_get_default_level() +{ + return KNET_COMPRESS_DEFAULT; +} + compress_ops_t compress_model = { KNET_COMPRESS_MODEL_ABI, NULL, NULL, NULL, NULL, zlib_compress, - zlib_decompress + zlib_decompress, + zlib_get_default_level }; diff --git a/libknet/compress_zstd.c b/libknet/compress_zstd.c index e234f8d6..e460d273 100644 --- a/libknet/compress_zstd.c +++ b/libknet/compress_zstd.c @@ -1,160 +1,172 @@ /* * Copyright (C) 2019 Red Hat, Inc. All rights reserved. * * Author: Fabio M. Di Nitto * * This software licensed under LGPL-2.0+ */ #define KNET_MODULE #include "config.h" #include #include #include #include #include "logging.h" #include "compress_model.h" +#ifdef ZSTD_CLEVEL_DEFAULT +#define KNET_COMPRESS_DEFAULT ZSTD_CLEVEL_DEFAULT /* zstd default compression level from zstd.h */ +#else +#define KNET_COMPRESS_DEFAULT KNET_COMPRESS_UNKNOWN_DEFAULT +#endif + struct zstd_ctx { ZSTD_CCtx* cctx; ZSTD_DCtx* dctx; }; static int zstd_is_init( knet_handle_t knet_h, int method_idx) { if (knet_h->compress_int_data[method_idx]) { return 1; } return 0; } static void zstd_fini( knet_handle_t knet_h, int method_idx) { struct zstd_ctx *zstd_ctx = knet_h->compress_int_data[knet_h->compress_model]; if (zstd_ctx) { if (zstd_ctx->cctx) { ZSTD_freeCCtx(zstd_ctx->cctx); } if (zstd_ctx->dctx) { ZSTD_freeDCtx(zstd_ctx->dctx); } free(knet_h->compress_int_data[method_idx]); knet_h->compress_int_data[method_idx] = NULL; } return; } static int zstd_init( knet_handle_t knet_h, int method_idx) { struct zstd_ctx *zstd_ctx; int err = 0; if (!knet_h->compress_int_data[method_idx]) { zstd_ctx = malloc(sizeof(struct zstd_ctx)); if (!zstd_ctx) { errno = ENOMEM; return -1; } memset(zstd_ctx, 0, sizeof(struct zstd_ctx)); + knet_h->compress_int_data[method_idx] = zstd_ctx; + zstd_ctx->cctx = ZSTD_createCCtx(); if (!zstd_ctx->cctx) { log_err(knet_h, KNET_SUB_ZSTDCOMP, "Unable to create compression context"); err = -1; goto out_err; } zstd_ctx->dctx = ZSTD_createDCtx(); if (!zstd_ctx->dctx) { log_err(knet_h, KNET_SUB_ZSTDCOMP, "Unable to create decompression context"); err = -1; goto out_err; } - - knet_h->compress_int_data[method_idx] = zstd_ctx; } out_err: if (err) { zstd_fini(knet_h, method_idx); } return err; } static int zstd_compress( knet_handle_t knet_h, const unsigned char *buf_in, const ssize_t buf_in_len, unsigned char *buf_out, ssize_t *buf_out_len) { struct zstd_ctx *zstd_ctx = knet_h->compress_int_data[knet_h->compress_model]; size_t compress_size; compress_size = ZSTD_compressCCtx(zstd_ctx->cctx, buf_out, *buf_out_len, buf_in, buf_in_len, knet_h->compress_level); if (ZSTD_isError(compress_size)) { log_err(knet_h, KNET_SUB_ZSTDCOMP, "error compressing packet: %s", ZSTD_getErrorName(compress_size)); /* * ZSTD has lots of internal errors that are not easy to map * to standard errnos. Use a generic one for now */ errno = EINVAL; return -1; } *buf_out_len = compress_size; return 0; } static int zstd_decompress( knet_handle_t knet_h, const unsigned char *buf_in, const ssize_t buf_in_len, unsigned char *buf_out, ssize_t *buf_out_len) { struct zstd_ctx *zstd_ctx = knet_h->compress_int_data[knet_h->compress_model]; size_t decompress_size; decompress_size = ZSTD_decompressDCtx(zstd_ctx->dctx, buf_out, *buf_out_len, buf_in, buf_in_len); if (ZSTD_isError(decompress_size)) { log_err(knet_h, KNET_SUB_ZSTDCOMP, "error decompressing packet: %s", ZSTD_getErrorName(decompress_size)); /* * ZSTD has lots of internal errors that are not easy to map * to standard errnos. Use a generic one for now */ errno = EINVAL; return -1; } *buf_out_len = decompress_size; return 0; } +static int zstd_get_default_level() +{ + return KNET_COMPRESS_DEFAULT; +} + compress_ops_t compress_model = { KNET_COMPRESS_MODEL_ABI, zstd_is_init, zstd_init, zstd_fini, NULL, zstd_compress, - zstd_decompress + zstd_decompress, + zstd_get_default_level }; diff --git a/libknet/crypto.c b/libknet/crypto.c index afa4f88c..2c4d5f5c 100644 --- a/libknet/crypto.c +++ b/libknet/crypto.c @@ -1,235 +1,235 @@ /* * Copyright (C) 2012-2019 Red Hat, Inc. All rights reserved. * * Author: Fabio M. Di Nitto * * This software licensed under LGPL-2.0+ */ #include "config.h" -#include +#include #include #include #include #include #include "crypto.h" #include "crypto_model.h" #include "internals.h" #include "logging.h" #include "common.h" /* * internal module switch data */ static crypto_model_t crypto_modules_cmds[] = { { "nss", WITH_CRYPTO_NSS, 0, NULL }, { "openssl", WITH_CRYPTO_OPENSSL, 0, NULL }, { NULL, 0, 0, NULL } }; static int crypto_get_model(const char *model) { int idx = 0; while (crypto_modules_cmds[idx].model_name != NULL) { if (!strcmp(crypto_modules_cmds[idx].model_name, model)) return idx; idx++; } return -1; } /* * exported API */ int crypto_encrypt_and_sign ( knet_handle_t knet_h, const unsigned char *buf_in, const ssize_t buf_in_len, unsigned char *buf_out, ssize_t *buf_out_len) { return crypto_modules_cmds[knet_h->crypto_instance->model].ops->crypt(knet_h, buf_in, buf_in_len, buf_out, buf_out_len); } int crypto_encrypt_and_signv ( knet_handle_t knet_h, const struct iovec *iov_in, int iovcnt_in, unsigned char *buf_out, ssize_t *buf_out_len) { return crypto_modules_cmds[knet_h->crypto_instance->model].ops->cryptv(knet_h, iov_in, iovcnt_in, buf_out, buf_out_len); } int crypto_authenticate_and_decrypt ( knet_handle_t knet_h, const unsigned char *buf_in, const ssize_t buf_in_len, unsigned char *buf_out, ssize_t *buf_out_len) { return crypto_modules_cmds[knet_h->crypto_instance->model].ops->decrypt(knet_h, buf_in, buf_in_len, buf_out, buf_out_len); } int crypto_init( knet_handle_t knet_h, struct knet_handle_crypto_cfg *knet_handle_crypto_cfg) { int err = 0, savederrno = 0; int model = 0; struct crypto_instance *current = NULL, *new = NULL; current = knet_h->crypto_instance; model = crypto_get_model(knet_handle_crypto_cfg->crypto_model); if (model < 0) { log_err(knet_h, KNET_SUB_CRYPTO, "model %s not supported", knet_handle_crypto_cfg->crypto_model); return -1; } if (crypto_modules_cmds[model].built_in == 0) { log_err(knet_h, KNET_SUB_CRYPTO, "this version of libknet was built without %s support. Please contact your vendor or fix the build.", knet_handle_crypto_cfg->crypto_model); return -1; } savederrno = pthread_rwlock_wrlock(&shlib_rwlock); if (savederrno) { log_err(knet_h, KNET_SUB_CRYPTO, "Unable to get write lock: %s", strerror(savederrno)); return -1; } if (!crypto_modules_cmds[model].loaded) { crypto_modules_cmds[model].ops = load_module (knet_h, "crypto", crypto_modules_cmds[model].model_name); if (!crypto_modules_cmds[model].ops) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_CRYPTO, "Unable to load %s lib", crypto_modules_cmds[model].model_name); goto out; } if (crypto_modules_cmds[model].ops->abi_ver != KNET_CRYPTO_MODEL_ABI) { savederrno = EINVAL; err = -1; log_err(knet_h, KNET_SUB_CRYPTO, "ABI mismatch loading module %s. knet ver: %d, module ver: %d", crypto_modules_cmds[model].model_name, KNET_CRYPTO_MODEL_ABI, crypto_modules_cmds[model].ops->abi_ver); goto out; } crypto_modules_cmds[model].loaded = 1; } log_debug(knet_h, KNET_SUB_CRYPTO, "Initizializing crypto module [%s/%s/%s]", knet_handle_crypto_cfg->crypto_model, knet_handle_crypto_cfg->crypto_cipher_type, knet_handle_crypto_cfg->crypto_hash_type); new = malloc(sizeof(struct crypto_instance)); if (!new) { savederrno = ENOMEM; err = -1; log_err(knet_h, KNET_SUB_CRYPTO, "Unable to allocate memory for crypto instance"); goto out; } /* * if crypto_modules_cmds.ops->init fails, it is expected that * it will clean everything by itself. * crypto_modules_cmds.ops->fini is not invoked on error. */ new->model = model; if (crypto_modules_cmds[model].ops->init(knet_h, new, knet_handle_crypto_cfg)) { savederrno = errno; err = -1; goto out; } out: if (!err) { knet_h->crypto_instance = new; knet_h->sec_block_size = new->sec_block_size; knet_h->sec_hash_size = new->sec_hash_size; knet_h->sec_salt_size = new->sec_salt_size; log_debug(knet_h, KNET_SUB_CRYPTO, "Hash size: %zu salt size: %zu block size: %zu", knet_h->sec_hash_size, knet_h->sec_salt_size, knet_h->sec_block_size); if (current) { if (crypto_modules_cmds[current->model].ops->fini != NULL) { crypto_modules_cmds[current->model].ops->fini(knet_h, current); } free(current); } } else { if (new) { free(new); } } pthread_rwlock_unlock(&shlib_rwlock); errno = err ? savederrno : 0; return err; } void crypto_fini( knet_handle_t knet_h) { int savederrno = 0; savederrno = pthread_rwlock_wrlock(&shlib_rwlock); if (savederrno) { log_err(knet_h, KNET_SUB_CRYPTO, "Unable to get write lock: %s", strerror(savederrno)); return; } if (knet_h->crypto_instance) { if (crypto_modules_cmds[knet_h->crypto_instance->model].ops->fini != NULL) { crypto_modules_cmds[knet_h->crypto_instance->model].ops->fini(knet_h, knet_h->crypto_instance); } free(knet_h->crypto_instance); knet_h->sec_block_size = 0; knet_h->sec_hash_size = 0; knet_h->sec_salt_size = 0; knet_h->crypto_instance = NULL; } pthread_rwlock_unlock(&shlib_rwlock); return; } int knet_get_crypto_list(struct knet_crypto_info *crypto_list, size_t *crypto_list_entries) { int err = 0; int idx = 0; int outidx = 0; if (!crypto_list_entries) { errno = EINVAL; return -1; } while (crypto_modules_cmds[idx].model_name != NULL) { if (crypto_modules_cmds[idx].built_in) { if (crypto_list) { crypto_list[outidx].name = crypto_modules_cmds[idx].model_name; } outidx++; } idx++; } *crypto_list_entries = outidx; if (!err) errno = 0; return err; } diff --git a/libknet/handle.c b/libknet/handle.c index 4221bee5..2b11e0f3 100644 --- a/libknet/handle.c +++ b/libknet/handle.c @@ -1,1729 +1,1753 @@ /* * Copyright (C) 2010-2019 Red Hat, Inc. All rights reserved. * * Authors: Fabio M. Di Nitto * Federico Simoncelli * * This software licensed under LGPL-2.0+ */ #include "config.h" #include #include #include #include #include #include #include #include #include #include "internals.h" #include "crypto.h" #include "links.h" #include "compress.h" #include "compat.h" #include "common.h" #include "threads_common.h" #include "threads_heartbeat.h" #include "threads_pmtud.h" #include "threads_dsthandler.h" #include "threads_rx.h" #include "threads_tx.h" #include "transports.h" #include "transport_common.h" #include "logging.h" static pthread_mutex_t handle_config_mutex = PTHREAD_MUTEX_INITIALIZER; pthread_rwlock_t shlib_rwlock; static uint8_t shlib_wrlock_init = 0; static uint32_t knet_ref = 0; static int _init_shlib_tracker(knet_handle_t knet_h) { int savederrno = 0; if (!shlib_wrlock_init) { savederrno = pthread_rwlock_init(&shlib_rwlock, NULL); if (savederrno) { log_err(knet_h, KNET_SUB_HANDLE, "Unable to initialize shared lib rwlock: %s", strerror(savederrno)); errno = savederrno; return -1; } shlib_wrlock_init = 1; } return 0; } static void _fini_shlib_tracker(void) { if (knet_ref == 0) { pthread_rwlock_destroy(&shlib_rwlock); shlib_wrlock_init = 0; } return; } static int _init_locks(knet_handle_t knet_h) { int savederrno = 0; savederrno = pthread_rwlock_init(&knet_h->global_rwlock, NULL); if (savederrno) { log_err(knet_h, KNET_SUB_HANDLE, "Unable to initialize list rwlock: %s", strerror(savederrno)); goto exit_fail; } savederrno = pthread_mutex_init(&knet_h->threads_status_mutex, NULL); if (savederrno) { log_err(knet_h, KNET_SUB_HANDLE, "Unable to initialize threads status mutex: %s", strerror(savederrno)); goto exit_fail; } savederrno = pthread_mutex_init(&knet_h->pmtud_mutex, NULL); if (savederrno) { log_err(knet_h, KNET_SUB_HANDLE, "Unable to initialize pmtud mutex: %s", strerror(savederrno)); goto exit_fail; } savederrno = pthread_mutex_init(&knet_h->kmtu_mutex, NULL); if (savederrno) { log_err(knet_h, KNET_SUB_HANDLE, "Unable to initialize kernel_mtu mutex: %s", strerror(savederrno)); goto exit_fail; } savederrno = pthread_cond_init(&knet_h->pmtud_cond, NULL); if (savederrno) { log_err(knet_h, KNET_SUB_HANDLE, "Unable to initialize pmtud conditional mutex: %s", strerror(savederrno)); goto exit_fail; } savederrno = pthread_mutex_init(&knet_h->hb_mutex, NULL); if (savederrno) { log_err(knet_h, KNET_SUB_HANDLE, "Unable to initialize hb_thread mutex: %s", strerror(savederrno)); goto exit_fail; } savederrno = pthread_mutex_init(&knet_h->tx_mutex, NULL); if (savederrno) { log_err(knet_h, KNET_SUB_HANDLE, "Unable to initialize tx_thread mutex: %s", strerror(savederrno)); goto exit_fail; } savederrno = pthread_mutex_init(&knet_h->backoff_mutex, NULL); if (savederrno) { log_err(knet_h, KNET_SUB_HANDLE, "Unable to initialize pong timeout backoff mutex: %s", strerror(savederrno)); goto exit_fail; } savederrno = pthread_mutex_init(&knet_h->tx_seq_num_mutex, NULL); if (savederrno) { log_err(knet_h, KNET_SUB_HANDLE, "Unable to initialize tx_seq_num_mutex mutex: %s", strerror(savederrno)); goto exit_fail; } return 0; exit_fail: errno = savederrno; return -1; } static void _destroy_locks(knet_handle_t knet_h) { pthread_rwlock_destroy(&knet_h->global_rwlock); pthread_mutex_destroy(&knet_h->pmtud_mutex); pthread_mutex_destroy(&knet_h->kmtu_mutex); pthread_cond_destroy(&knet_h->pmtud_cond); pthread_mutex_destroy(&knet_h->hb_mutex); pthread_mutex_destroy(&knet_h->tx_mutex); pthread_mutex_destroy(&knet_h->backoff_mutex); pthread_mutex_destroy(&knet_h->tx_seq_num_mutex); pthread_mutex_destroy(&knet_h->threads_status_mutex); } static int _init_socks(knet_handle_t knet_h) { int savederrno = 0; if (_init_socketpair(knet_h, knet_h->hostsockfd)) { savederrno = errno; log_err(knet_h, KNET_SUB_HANDLE, "Unable to initialize internal hostsockpair: %s", strerror(savederrno)); goto exit_fail; } if (_init_socketpair(knet_h, knet_h->dstsockfd)) { savederrno = errno; log_err(knet_h, KNET_SUB_HANDLE, "Unable to initialize internal dstsockpair: %s", strerror(savederrno)); goto exit_fail; } return 0; exit_fail: errno = savederrno; return -1; } static void _close_socks(knet_handle_t knet_h) { _close_socketpair(knet_h, knet_h->dstsockfd); _close_socketpair(knet_h, knet_h->hostsockfd); } static int _init_buffers(knet_handle_t knet_h) { int savederrno = 0; int i; size_t bufsize; for (i = 0; i < PCKT_FRAG_MAX; i++) { bufsize = ceil((float)KNET_MAX_PACKET_SIZE / (i + 1)) + KNET_HEADER_ALL_SIZE; knet_h->send_to_links_buf[i] = malloc(bufsize); if (!knet_h->send_to_links_buf[i]) { savederrno = errno; log_err(knet_h, KNET_SUB_HANDLE, "Unable to allocate memory datafd to link buffer: %s", strerror(savederrno)); goto exit_fail; } memset(knet_h->send_to_links_buf[i], 0, bufsize); } for (i = 0; i < PCKT_RX_BUFS; i++) { knet_h->recv_from_links_buf[i] = malloc(KNET_DATABUFSIZE); if (!knet_h->recv_from_links_buf[i]) { savederrno = errno; log_err(knet_h, KNET_SUB_HANDLE, "Unable to allocate memory for link to datafd buffer: %s", strerror(savederrno)); goto exit_fail; } memset(knet_h->recv_from_links_buf[i], 0, KNET_DATABUFSIZE); } knet_h->recv_from_sock_buf = malloc(KNET_DATABUFSIZE); if (!knet_h->recv_from_sock_buf) { savederrno = errno; log_err(knet_h, KNET_SUB_HANDLE, "Unable to allocate memory for app to datafd buffer: %s", strerror(savederrno)); goto exit_fail; } memset(knet_h->recv_from_sock_buf, 0, KNET_DATABUFSIZE); knet_h->pingbuf = malloc(KNET_HEADER_PING_SIZE); if (!knet_h->pingbuf) { savederrno = errno; log_err(knet_h, KNET_SUB_HANDLE, "Unable to allocate memory for hearbeat buffer: %s", strerror(savederrno)); goto exit_fail; } memset(knet_h->pingbuf, 0, KNET_HEADER_PING_SIZE); - knet_h->pmtudbuf = malloc(KNET_PMTUD_SIZE_V6); + knet_h->pmtudbuf = malloc(KNET_PMTUD_SIZE_V6 + KNET_HEADER_ALL_SIZE); if (!knet_h->pmtudbuf) { savederrno = errno; log_err(knet_h, KNET_SUB_HANDLE, "Unable to allocate memory for pmtud buffer: %s", strerror(savederrno)); goto exit_fail; } - memset(knet_h->pmtudbuf, 0, KNET_PMTUD_SIZE_V6); + memset(knet_h->pmtudbuf, 0, KNET_PMTUD_SIZE_V6 + KNET_HEADER_ALL_SIZE); for (i = 0; i < PCKT_FRAG_MAX; i++) { bufsize = ceil((float)KNET_MAX_PACKET_SIZE / (i + 1)) + KNET_HEADER_ALL_SIZE + KNET_DATABUFSIZE_CRYPT_PAD; knet_h->send_to_links_buf_crypt[i] = malloc(bufsize); if (!knet_h->send_to_links_buf_crypt[i]) { savederrno = errno; log_err(knet_h, KNET_SUB_HANDLE, "Unable to allocate memory for crypto datafd to link buffer: %s", strerror(savederrno)); goto exit_fail; } memset(knet_h->send_to_links_buf_crypt[i], 0, bufsize); } knet_h->recv_from_links_buf_decrypt = malloc(KNET_DATABUFSIZE_CRYPT); if (!knet_h->recv_from_links_buf_decrypt) { savederrno = errno; log_err(knet_h, KNET_SUB_CRYPTO, "Unable to allocate memory for crypto link to datafd buffer: %s", strerror(savederrno)); goto exit_fail; } memset(knet_h->recv_from_links_buf_decrypt, 0, KNET_DATABUFSIZE_CRYPT); knet_h->recv_from_links_buf_crypt = malloc(KNET_DATABUFSIZE_CRYPT); if (!knet_h->recv_from_links_buf_crypt) { savederrno = errno; log_err(knet_h, KNET_SUB_CRYPTO, "Unable to allocate memory for crypto link to datafd buffer: %s", strerror(savederrno)); goto exit_fail; } memset(knet_h->recv_from_links_buf_crypt, 0, KNET_DATABUFSIZE_CRYPT); knet_h->pingbuf_crypt = malloc(KNET_DATABUFSIZE_CRYPT); if (!knet_h->pingbuf_crypt) { savederrno = errno; log_err(knet_h, KNET_SUB_CRYPTO, "Unable to allocate memory for crypto hearbeat buffer: %s", strerror(savederrno)); goto exit_fail; } memset(knet_h->pingbuf_crypt, 0, KNET_DATABUFSIZE_CRYPT); knet_h->pmtudbuf_crypt = malloc(KNET_DATABUFSIZE_CRYPT); if (!knet_h->pmtudbuf_crypt) { savederrno = errno; log_err(knet_h, KNET_SUB_HANDLE, "Unable to allocate memory for crypto pmtud buffer: %s", strerror(savederrno)); goto exit_fail; } memset(knet_h->pmtudbuf_crypt, 0, KNET_DATABUFSIZE_CRYPT); knet_h->recv_from_links_buf_decompress = malloc(KNET_DATABUFSIZE_COMPRESS); if (!knet_h->recv_from_links_buf_decompress) { savederrno = errno; log_err(knet_h, KNET_SUB_HANDLE, "Unable to allocate memory for decompress buffer: %s", strerror(savederrno)); goto exit_fail; } memset(knet_h->recv_from_links_buf_decompress, 0, KNET_DATABUFSIZE_COMPRESS); knet_h->send_to_links_buf_compress = malloc(KNET_DATABUFSIZE_COMPRESS); if (!knet_h->send_to_links_buf_compress) { savederrno = errno; log_err(knet_h, KNET_SUB_HANDLE, "Unable to allocate memory for compress buffer: %s", strerror(savederrno)); goto exit_fail; } memset(knet_h->send_to_links_buf_compress, 0, KNET_DATABUFSIZE_COMPRESS); memset(knet_h->knet_transport_fd_tracker, 0, sizeof(knet_h->knet_transport_fd_tracker)); for (i = 0; i < KNET_MAX_FDS; i++) { knet_h->knet_transport_fd_tracker[i].transport = KNET_MAX_TRANSPORTS; } return 0; exit_fail: errno = savederrno; return -1; } static void _destroy_buffers(knet_handle_t knet_h) { int i; for (i = 0; i < PCKT_FRAG_MAX; i++) { free(knet_h->send_to_links_buf[i]); free(knet_h->send_to_links_buf_crypt[i]); } for (i = 0; i < PCKT_RX_BUFS; i++) { free(knet_h->recv_from_links_buf[i]); } free(knet_h->recv_from_links_buf_decompress); free(knet_h->send_to_links_buf_compress); free(knet_h->recv_from_sock_buf); free(knet_h->recv_from_links_buf_decrypt); free(knet_h->recv_from_links_buf_crypt); free(knet_h->pingbuf); free(knet_h->pingbuf_crypt); free(knet_h->pmtudbuf); free(knet_h->pmtudbuf_crypt); } static int _init_epolls(knet_handle_t knet_h) { struct epoll_event ev; int savederrno = 0; /* * even if the kernel does dynamic allocation with epoll_ctl * we need to reserve one extra for host to host communication */ knet_h->send_to_links_epollfd = epoll_create(KNET_EPOLL_MAX_EVENTS + 1); if (knet_h->send_to_links_epollfd < 0) { savederrno = errno; log_err(knet_h, KNET_SUB_HANDLE, "Unable to create epoll datafd to link fd: %s", strerror(savederrno)); goto exit_fail; } knet_h->recv_from_links_epollfd = epoll_create(KNET_EPOLL_MAX_EVENTS); if (knet_h->recv_from_links_epollfd < 0) { savederrno = errno; log_err(knet_h, KNET_SUB_HANDLE, "Unable to create epoll link to datafd fd: %s", strerror(savederrno)); goto exit_fail; } knet_h->dst_link_handler_epollfd = epoll_create(KNET_EPOLL_MAX_EVENTS); if (knet_h->dst_link_handler_epollfd < 0) { savederrno = errno; log_err(knet_h, KNET_SUB_HANDLE, "Unable to create epoll dst cache fd: %s", strerror(savederrno)); goto exit_fail; } if (_fdset_cloexec(knet_h->send_to_links_epollfd)) { savederrno = errno; log_err(knet_h, KNET_SUB_HANDLE, "Unable to set CLOEXEC on datafd to link epoll fd: %s", strerror(savederrno)); goto exit_fail; } if (_fdset_cloexec(knet_h->recv_from_links_epollfd)) { savederrno = errno; log_err(knet_h, KNET_SUB_HANDLE, "Unable to set CLOEXEC on link to datafd epoll fd: %s", strerror(savederrno)); goto exit_fail; } if (_fdset_cloexec(knet_h->dst_link_handler_epollfd)) { savederrno = errno; log_err(knet_h, KNET_SUB_HANDLE, "Unable to set CLOEXEC on dst cache epoll fd: %s", strerror(savederrno)); goto exit_fail; } memset(&ev, 0, sizeof(struct epoll_event)); ev.events = EPOLLIN; ev.data.fd = knet_h->hostsockfd[0]; if (epoll_ctl(knet_h->send_to_links_epollfd, EPOLL_CTL_ADD, knet_h->hostsockfd[0], &ev)) { savederrno = errno; log_err(knet_h, KNET_SUB_HANDLE, "Unable to add hostsockfd[0] to epoll pool: %s", strerror(savederrno)); goto exit_fail; } memset(&ev, 0, sizeof(struct epoll_event)); ev.events = EPOLLIN; ev.data.fd = knet_h->dstsockfd[0]; if (epoll_ctl(knet_h->dst_link_handler_epollfd, EPOLL_CTL_ADD, knet_h->dstsockfd[0], &ev)) { savederrno = errno; log_err(knet_h, KNET_SUB_HANDLE, "Unable to add dstsockfd[0] to epoll pool: %s", strerror(savederrno)); goto exit_fail; } return 0; exit_fail: errno = savederrno; return -1; } static void _close_epolls(knet_handle_t knet_h) { struct epoll_event ev; int i; memset(&ev, 0, sizeof(struct epoll_event)); for (i = 0; i < KNET_DATAFD_MAX; i++) { if (knet_h->sockfd[i].in_use) { epoll_ctl(knet_h->send_to_links_epollfd, EPOLL_CTL_DEL, knet_h->sockfd[i].sockfd[knet_h->sockfd[i].is_created], &ev); if (knet_h->sockfd[i].sockfd[knet_h->sockfd[i].is_created]) { _close_socketpair(knet_h, knet_h->sockfd[i].sockfd); } } } epoll_ctl(knet_h->send_to_links_epollfd, EPOLL_CTL_DEL, knet_h->hostsockfd[0], &ev); epoll_ctl(knet_h->dst_link_handler_epollfd, EPOLL_CTL_DEL, knet_h->dstsockfd[0], &ev); close(knet_h->send_to_links_epollfd); close(knet_h->recv_from_links_epollfd); close(knet_h->dst_link_handler_epollfd); } static int _start_threads(knet_handle_t knet_h) { int savederrno = 0; + pthread_attr_t attr; set_thread_status(knet_h, KNET_THREAD_PMTUD, KNET_THREAD_REGISTERED); - savederrno = pthread_create(&knet_h->pmtud_link_handler_thread, 0, + + savederrno = pthread_attr_init(&attr); + if (savederrno) { + log_err(knet_h, KNET_SUB_HANDLE, "Unable to init pthread attributes: %s", + strerror(savederrno)); + goto exit_fail; + } + savederrno = pthread_attr_setstacksize(&attr, KNET_THREAD_STACK_SIZE); + if (savederrno) { + log_err(knet_h, KNET_SUB_HANDLE, "Unable to set stack size attribute: %s", + strerror(savederrno)); + goto exit_fail; + } + + savederrno = pthread_create(&knet_h->pmtud_link_handler_thread, &attr, _handle_pmtud_link_thread, (void *) knet_h); if (savederrno) { log_err(knet_h, KNET_SUB_HANDLE, "Unable to start pmtud link thread: %s", strerror(savederrno)); goto exit_fail; } set_thread_status(knet_h, KNET_THREAD_DST_LINK, KNET_THREAD_REGISTERED); - savederrno = pthread_create(&knet_h->dst_link_handler_thread, 0, + savederrno = pthread_create(&knet_h->dst_link_handler_thread, &attr, _handle_dst_link_handler_thread, (void *) knet_h); if (savederrno) { log_err(knet_h, KNET_SUB_HANDLE, "Unable to start dst cache thread: %s", strerror(savederrno)); goto exit_fail; } set_thread_status(knet_h, KNET_THREAD_TX, KNET_THREAD_REGISTERED); - savederrno = pthread_create(&knet_h->send_to_links_thread, 0, + savederrno = pthread_create(&knet_h->send_to_links_thread, &attr, _handle_send_to_links_thread, (void *) knet_h); if (savederrno) { log_err(knet_h, KNET_SUB_HANDLE, "Unable to start datafd to link thread: %s", strerror(savederrno)); goto exit_fail; } set_thread_status(knet_h, KNET_THREAD_RX, KNET_THREAD_REGISTERED); - savederrno = pthread_create(&knet_h->recv_from_links_thread, 0, + savederrno = pthread_create(&knet_h->recv_from_links_thread, &attr, _handle_recv_from_links_thread, (void *) knet_h); if (savederrno) { log_err(knet_h, KNET_SUB_HANDLE, "Unable to start link to datafd thread: %s", strerror(savederrno)); goto exit_fail; } set_thread_status(knet_h, KNET_THREAD_HB, KNET_THREAD_REGISTERED); - savederrno = pthread_create(&knet_h->heartbt_thread, 0, + savederrno = pthread_create(&knet_h->heartbt_thread, &attr, _handle_heartbt_thread, (void *) knet_h); if (savederrno) { log_err(knet_h, KNET_SUB_HANDLE, "Unable to start heartbeat thread: %s", strerror(savederrno)); goto exit_fail; } + savederrno = pthread_attr_destroy(&attr); + if (savederrno) { + log_err(knet_h, KNET_SUB_HANDLE, "Unable to destroy pthread attributes: %s", + strerror(savederrno)); + /* + * Do not return error code. Error is not critical. + */ + } + return 0; exit_fail: errno = savederrno; return -1; } static void _stop_threads(knet_handle_t knet_h) { void *retval; wait_all_threads_status(knet_h, KNET_THREAD_STOPPED); if (knet_h->heartbt_thread) { pthread_cancel(knet_h->heartbt_thread); pthread_join(knet_h->heartbt_thread, &retval); } if (knet_h->send_to_links_thread) { pthread_cancel(knet_h->send_to_links_thread); pthread_join(knet_h->send_to_links_thread, &retval); } if (knet_h->recv_from_links_thread) { pthread_cancel(knet_h->recv_from_links_thread); pthread_join(knet_h->recv_from_links_thread, &retval); } if (knet_h->dst_link_handler_thread) { pthread_cancel(knet_h->dst_link_handler_thread); pthread_join(knet_h->dst_link_handler_thread, &retval); } if (knet_h->pmtud_link_handler_thread) { pthread_cancel(knet_h->pmtud_link_handler_thread); pthread_join(knet_h->pmtud_link_handler_thread, &retval); } } knet_handle_t knet_handle_new_ex(knet_node_id_t host_id, int log_fd, uint8_t default_log_level, uint64_t flags) { knet_handle_t knet_h; int savederrno = 0; struct rlimit cur; if (getrlimit(RLIMIT_NOFILE, &cur) < 0) { return NULL; } if ((log_fd < 0) || ((unsigned int)log_fd >= cur.rlim_max)) { errno = EINVAL; return NULL; } /* * validate incoming request */ if ((log_fd) && (default_log_level > KNET_LOG_DEBUG)) { errno = EINVAL; return NULL; } if (flags > KNET_HANDLE_FLAG_PRIVILEGED * 2 - 1) { errno = EINVAL; return NULL; } /* * allocate handle */ knet_h = malloc(sizeof(struct knet_handle)); if (!knet_h) { errno = ENOMEM; return NULL; } memset(knet_h, 0, sizeof(struct knet_handle)); /* * setting up some handle data so that we can use logging * also when initializing the library global locks * and trackers */ knet_h->flags = flags; /* * copy config in place */ knet_h->host_id = host_id; knet_h->logfd = log_fd; if (knet_h->logfd > 0) { memset(&knet_h->log_levels, default_log_level, KNET_MAX_SUBSYSTEMS); } /* * set pmtud default timers */ knet_h->pmtud_interval = KNET_PMTUD_DEFAULT_INTERVAL; /* * set transports reconnect default timers */ knet_h->reconnect_int = KNET_TRANSPORT_DEFAULT_RECONNECT_INTERVAL; /* * Set 'min' stats to the maximum value so the * first value we get is always less */ knet_h->stats.tx_compress_time_min = UINT64_MAX; knet_h->stats.rx_compress_time_min = UINT64_MAX; knet_h->stats.tx_crypt_time_min = UINT64_MAX; knet_h->stats.rx_crypt_time_min = UINT64_MAX; /* * init global shlib tracker */ savederrno = pthread_mutex_lock(&handle_config_mutex); if (savederrno) { log_err(knet_h, KNET_SUB_HANDLE, "Unable to get handle mutex lock: %s", strerror(savederrno)); free(knet_h); knet_h = NULL; errno = savederrno; return NULL; } knet_ref++; if (_init_shlib_tracker(knet_h) < 0) { savederrno = errno; log_err(knet_h, KNET_SUB_HANDLE, "Unable to init handles traceker: %s", strerror(savederrno)); errno = savederrno; goto exit_fail; } pthread_mutex_unlock(&handle_config_mutex); /* * init main locking structures */ if (_init_locks(knet_h)) { savederrno = errno; goto exit_fail; } /* * init sockets */ if (_init_socks(knet_h)) { savederrno = errno; goto exit_fail; } /* * allocate packet buffers */ if (_init_buffers(knet_h)) { savederrno = errno; goto exit_fail; } if (compress_init(knet_h)) { savederrno = errno; goto exit_fail; } /* * create epoll fds */ if (_init_epolls(knet_h)) { savederrno = errno; goto exit_fail; } /* * start transports */ if (start_all_transports(knet_h)) { savederrno = errno; goto exit_fail; } /* * start internal threads */ if (_start_threads(knet_h)) { savederrno = errno; goto exit_fail; } wait_all_threads_status(knet_h, KNET_THREAD_STARTED); errno = 0; return knet_h; exit_fail: knet_handle_free(knet_h); errno = savederrno; return NULL; } knet_handle_t knet_handle_new(knet_node_id_t host_id, int log_fd, uint8_t default_log_level) { return knet_handle_new_ex(host_id, log_fd, default_log_level, KNET_HANDLE_FLAG_PRIVILEGED); } int knet_handle_free(knet_handle_t knet_h) { int savederrno = 0; if (!knet_h) { errno = EINVAL; return -1; } savederrno = get_global_wrlock(knet_h); if (savederrno) { log_err(knet_h, KNET_SUB_HANDLE, "Unable to get write lock: %s", strerror(savederrno)); errno = savederrno; return -1; } if (knet_h->host_head != NULL) { savederrno = EBUSY; log_err(knet_h, KNET_SUB_HANDLE, "Unable to free handle: host(s) or listener(s) are still active: %s", strerror(savederrno)); pthread_rwlock_unlock(&knet_h->global_rwlock); errno = savederrno; return -1; } knet_h->fini_in_progress = 1; pthread_rwlock_unlock(&knet_h->global_rwlock); _stop_threads(knet_h); stop_all_transports(knet_h); _close_epolls(knet_h); _destroy_buffers(knet_h); _close_socks(knet_h); crypto_fini(knet_h); compress_fini(knet_h, 1); _destroy_locks(knet_h); free(knet_h); knet_h = NULL; (void)pthread_mutex_lock(&handle_config_mutex); knet_ref--; _fini_shlib_tracker(); pthread_mutex_unlock(&handle_config_mutex); errno = 0; return 0; } int knet_handle_enable_sock_notify(knet_handle_t knet_h, void *sock_notify_fn_private_data, void (*sock_notify_fn) ( void *private_data, int datafd, int8_t channel, uint8_t tx_rx, int error, int errorno)) { int savederrno = 0; if (!knet_h) { errno = EINVAL; return -1; } if (!sock_notify_fn) { errno = EINVAL; return -1; } savederrno = get_global_wrlock(knet_h); if (savederrno) { log_err(knet_h, KNET_SUB_HANDLE, "Unable to get write lock: %s", strerror(savederrno)); errno = savederrno; return -1; } knet_h->sock_notify_fn_private_data = sock_notify_fn_private_data; knet_h->sock_notify_fn = sock_notify_fn; log_debug(knet_h, KNET_SUB_HANDLE, "sock_notify_fn enabled"); pthread_rwlock_unlock(&knet_h->global_rwlock); return 0; } int knet_handle_add_datafd(knet_handle_t knet_h, int *datafd, int8_t *channel) { int err = 0, savederrno = 0; int i; struct epoll_event ev; if (!knet_h) { errno = EINVAL; return -1; } if (datafd == NULL) { errno = EINVAL; return -1; } if (channel == NULL) { errno = EINVAL; return -1; } if (*channel >= KNET_DATAFD_MAX) { errno = EINVAL; return -1; } savederrno = get_global_wrlock(knet_h); if (savederrno) { log_err(knet_h, KNET_SUB_HANDLE, "Unable to get write lock: %s", strerror(savederrno)); errno = savederrno; return -1; } if (!knet_h->sock_notify_fn) { log_err(knet_h, KNET_SUB_HANDLE, "Adding datafd requires sock notify callback enabled!"); savederrno = EINVAL; err = -1; goto out_unlock; } if (*datafd > 0) { for (i = 0; i < KNET_DATAFD_MAX; i++) { if ((knet_h->sockfd[i].in_use) && (knet_h->sockfd[i].sockfd[0] == *datafd)) { log_err(knet_h, KNET_SUB_HANDLE, "requested datafd: %d already exist in index: %d", *datafd, i); savederrno = EEXIST; err = -1; goto out_unlock; } } } /* * auto allocate a channel */ if (*channel < 0) { for (i = 0; i < KNET_DATAFD_MAX; i++) { if (!knet_h->sockfd[i].in_use) { *channel = i; break; } } if (*channel < 0) { savederrno = EBUSY; err = -1; goto out_unlock; } } else { if (knet_h->sockfd[*channel].in_use) { savederrno = EBUSY; err = -1; goto out_unlock; } } knet_h->sockfd[*channel].is_created = 0; knet_h->sockfd[*channel].is_socket = 0; knet_h->sockfd[*channel].has_error = 0; if (*datafd > 0) { int sockopt; socklen_t sockoptlen = sizeof(sockopt); if (_fdset_cloexec(*datafd)) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_HANDLE, "Unable to set CLOEXEC on datafd: %s", strerror(savederrno)); goto out_unlock; } if (_fdset_nonblock(*datafd)) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_HANDLE, "Unable to set NONBLOCK on datafd: %s", strerror(savederrno)); goto out_unlock; } knet_h->sockfd[*channel].sockfd[0] = *datafd; knet_h->sockfd[*channel].sockfd[1] = 0; if (!getsockopt(knet_h->sockfd[*channel].sockfd[0], SOL_SOCKET, SO_TYPE, &sockopt, &sockoptlen)) { knet_h->sockfd[*channel].is_socket = 1; } } else { if (_init_socketpair(knet_h, knet_h->sockfd[*channel].sockfd)) { savederrno = errno; err = -1; goto out_unlock; } knet_h->sockfd[*channel].is_created = 1; knet_h->sockfd[*channel].is_socket = 1; *datafd = knet_h->sockfd[*channel].sockfd[0]; } memset(&ev, 0, sizeof(struct epoll_event)); ev.events = EPOLLIN; ev.data.fd = knet_h->sockfd[*channel].sockfd[knet_h->sockfd[*channel].is_created]; if (epoll_ctl(knet_h->send_to_links_epollfd, EPOLL_CTL_ADD, knet_h->sockfd[*channel].sockfd[knet_h->sockfd[*channel].is_created], &ev)) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_HANDLE, "Unable to add datafd %d to linkfd epoll pool: %s", knet_h->sockfd[*channel].sockfd[knet_h->sockfd[*channel].is_created], strerror(savederrno)); if (knet_h->sockfd[*channel].is_created) { _close_socketpair(knet_h, knet_h->sockfd[*channel].sockfd); } goto out_unlock; } knet_h->sockfd[*channel].in_use = 1; out_unlock: pthread_rwlock_unlock(&knet_h->global_rwlock); errno = err ? savederrno : 0; return err; } int knet_handle_remove_datafd(knet_handle_t knet_h, int datafd) { int err = 0, savederrno = 0; int8_t channel = -1; int i; struct epoll_event ev; if (!knet_h) { errno = EINVAL; return -1; } if (datafd <= 0) { errno = EINVAL; return -1; } savederrno = get_global_wrlock(knet_h); if (savederrno) { log_err(knet_h, KNET_SUB_HANDLE, "Unable to get write lock: %s", strerror(savederrno)); errno = savederrno; return -1; } for (i = 0; i < KNET_DATAFD_MAX; i++) { if ((knet_h->sockfd[i].in_use) && (knet_h->sockfd[i].sockfd[0] == datafd)) { channel = i; break; } } if (channel < 0) { savederrno = EINVAL; err = -1; goto out_unlock; } if (!knet_h->sockfd[channel].has_error) { memset(&ev, 0, sizeof(struct epoll_event)); if (epoll_ctl(knet_h->send_to_links_epollfd, EPOLL_CTL_DEL, knet_h->sockfd[channel].sockfd[knet_h->sockfd[channel].is_created], &ev)) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_HANDLE, "Unable to del datafd %d from linkfd epoll pool: %s", knet_h->sockfd[channel].sockfd[0], strerror(savederrno)); goto out_unlock; } } if (knet_h->sockfd[channel].is_created) { _close_socketpair(knet_h, knet_h->sockfd[channel].sockfd); } memset(&knet_h->sockfd[channel], 0, sizeof(struct knet_sock)); out_unlock: pthread_rwlock_unlock(&knet_h->global_rwlock); errno = err ? savederrno : 0; return err; } int knet_handle_get_datafd(knet_handle_t knet_h, const int8_t channel, int *datafd) { int err = 0, savederrno = 0; if (!knet_h) { errno = EINVAL; return -1; } if ((channel < 0) || (channel >= KNET_DATAFD_MAX)) { errno = EINVAL; return -1; } if (datafd == NULL) { errno = EINVAL; return -1; } savederrno = pthread_rwlock_rdlock(&knet_h->global_rwlock); if (savederrno) { log_err(knet_h, KNET_SUB_HANDLE, "Unable to get read lock: %s", strerror(savederrno)); errno = savederrno; return -1; } if (!knet_h->sockfd[channel].in_use) { savederrno = EINVAL; err = -1; goto out_unlock; } *datafd = knet_h->sockfd[channel].sockfd[0]; out_unlock: pthread_rwlock_unlock(&knet_h->global_rwlock); errno = err ? savederrno : 0; return err; } int knet_handle_get_channel(knet_handle_t knet_h, const int datafd, int8_t *channel) { int err = 0, savederrno = 0; int i; if (!knet_h) { errno = EINVAL; return -1; } if (datafd <= 0) { errno = EINVAL; return -1; } if (channel == NULL) { errno = EINVAL; return -1; } savederrno = pthread_rwlock_rdlock(&knet_h->global_rwlock); if (savederrno) { log_err(knet_h, KNET_SUB_HANDLE, "Unable to get read lock: %s", strerror(savederrno)); errno = savederrno; return -1; } *channel = -1; for (i = 0; i < KNET_DATAFD_MAX; i++) { if ((knet_h->sockfd[i].in_use) && (knet_h->sockfd[i].sockfd[0] == datafd)) { *channel = i; break; } } if (*channel < 0) { savederrno = EINVAL; err = -1; goto out_unlock; } out_unlock: pthread_rwlock_unlock(&knet_h->global_rwlock); errno = err ? savederrno : 0; return err; } int knet_handle_enable_filter(knet_handle_t knet_h, void *dst_host_filter_fn_private_data, int (*dst_host_filter_fn) ( void *private_data, const unsigned char *outdata, ssize_t outdata_len, uint8_t tx_rx, knet_node_id_t this_host_id, knet_node_id_t src_node_id, int8_t *channel, knet_node_id_t *dst_host_ids, size_t *dst_host_ids_entries)) { int savederrno = 0; if (!knet_h) { errno = EINVAL; return -1; } savederrno = get_global_wrlock(knet_h); if (savederrno) { log_err(knet_h, KNET_SUB_HANDLE, "Unable to get write lock: %s", strerror(savederrno)); errno = savederrno; return -1; } knet_h->dst_host_filter_fn_private_data = dst_host_filter_fn_private_data; knet_h->dst_host_filter_fn = dst_host_filter_fn; if (knet_h->dst_host_filter_fn) { log_debug(knet_h, KNET_SUB_HANDLE, "dst_host_filter_fn enabled"); } else { log_debug(knet_h, KNET_SUB_HANDLE, "dst_host_filter_fn disabled"); } pthread_rwlock_unlock(&knet_h->global_rwlock); errno = 0; return 0; } int knet_handle_setfwd(knet_handle_t knet_h, unsigned int enabled) { int savederrno = 0; if (!knet_h) { errno = EINVAL; return -1; } if (enabled > 1) { errno = EINVAL; return -1; } savederrno = get_global_wrlock(knet_h); if (savederrno) { log_err(knet_h, KNET_SUB_HANDLE, "Unable to get write lock: %s", strerror(savederrno)); errno = savederrno; return -1; } if (enabled) { knet_h->enabled = enabled; log_debug(knet_h, KNET_SUB_HANDLE, "Data forwarding is enabled"); } else { /* * notify TX and RX threads to flush the queues */ if (set_thread_flush_queue(knet_h, KNET_THREAD_TX, KNET_THREAD_QUEUE_FLUSH) < 0) { log_debug(knet_h, KNET_SUB_HANDLE, "Unable to request queue flushing for TX thread"); } if (set_thread_flush_queue(knet_h, KNET_THREAD_RX, KNET_THREAD_QUEUE_FLUSH) < 0) { log_debug(knet_h, KNET_SUB_HANDLE, "Unable to request queue flushing for RX thread"); } } pthread_rwlock_unlock(&knet_h->global_rwlock); /* * when disabling data forward, we need to give time to TX and RX * to flush the queues. * * the TX thread is the main leader here. When there is no more * data in the TX queue, we will also close traffic for RX. */ if (!enabled) { /* * this usleep might be unnecessary, but wait_all_threads_flush_queue * adds extra locking delay. * * allow all threads to run free without extra locking interference * and then we switch to a more active wait in case the scheduler * has decided to delay one thread or another */ usleep(KNET_THREADS_TIMERES * 2); wait_all_threads_flush_queue(knet_h); /* * all threads have done flushing the queue, we can stop data forwarding */ savederrno = get_global_wrlock(knet_h); if (savederrno) { log_err(knet_h, KNET_SUB_HANDLE, "Unable to get write lock: %s", strerror(savederrno)); errno = savederrno; return -1; } knet_h->enabled = enabled; log_debug(knet_h, KNET_SUB_HANDLE, "Data forwarding is disabled"); pthread_rwlock_unlock(&knet_h->global_rwlock); } errno = 0; return 0; } int knet_handle_enable_access_lists(knet_handle_t knet_h, unsigned int enabled) { int savederrno = 0; if (!knet_h) { errno = EINVAL; return -1; } if (enabled > 1) { errno = EINVAL; return -1; } savederrno = get_global_wrlock(knet_h); if (savederrno) { log_err(knet_h, KNET_SUB_HANDLE, "Unable to get write lock: %s", strerror(savederrno)); errno = savederrno; return -1; } knet_h->use_access_lists = enabled; if (enabled) { log_debug(knet_h, KNET_SUB_HANDLE, "Links access lists are enabled"); } else { log_debug(knet_h, KNET_SUB_HANDLE, "Links access lists are disabled"); } pthread_rwlock_unlock(&knet_h->global_rwlock); errno = 0; return 0; } int knet_handle_pmtud_getfreq(knet_handle_t knet_h, unsigned int *interval) { int savederrno = 0; if (!knet_h) { errno = EINVAL; return -1; } if (!interval) { errno = EINVAL; return -1; } savederrno = pthread_rwlock_rdlock(&knet_h->global_rwlock); if (savederrno) { log_err(knet_h, KNET_SUB_HANDLE, "Unable to get read lock: %s", strerror(savederrno)); errno = savederrno; return -1; } *interval = knet_h->pmtud_interval; pthread_rwlock_unlock(&knet_h->global_rwlock); errno = 0; return 0; } int knet_handle_pmtud_setfreq(knet_handle_t knet_h, unsigned int interval) { int savederrno = 0; if (!knet_h) { errno = EINVAL; return -1; } if ((!interval) || (interval > 86400)) { errno = EINVAL; return -1; } savederrno = get_global_wrlock(knet_h); if (savederrno) { log_err(knet_h, KNET_SUB_HANDLE, "Unable to get write lock: %s", strerror(savederrno)); errno = savederrno; return -1; } knet_h->pmtud_interval = interval; log_debug(knet_h, KNET_SUB_HANDLE, "PMTUd interval set to: %u seconds", interval); pthread_rwlock_unlock(&knet_h->global_rwlock); errno = 0; return 0; } int knet_handle_enable_pmtud_notify(knet_handle_t knet_h, void *pmtud_notify_fn_private_data, void (*pmtud_notify_fn) ( void *private_data, unsigned int data_mtu)) { int savederrno = 0; if (!knet_h) { errno = EINVAL; return -1; } savederrno = get_global_wrlock(knet_h); if (savederrno) { log_err(knet_h, KNET_SUB_HANDLE, "Unable to get write lock: %s", strerror(savederrno)); errno = savederrno; return -1; } knet_h->pmtud_notify_fn_private_data = pmtud_notify_fn_private_data; knet_h->pmtud_notify_fn = pmtud_notify_fn; if (knet_h->pmtud_notify_fn) { log_debug(knet_h, KNET_SUB_HANDLE, "pmtud_notify_fn enabled"); } else { log_debug(knet_h, KNET_SUB_HANDLE, "pmtud_notify_fn disabled"); } pthread_rwlock_unlock(&knet_h->global_rwlock); errno = 0; return 0; } int knet_handle_pmtud_set(knet_handle_t knet_h, unsigned int iface_mtu) { int savederrno = 0; if (!knet_h) { errno = EINVAL; return -1; } if (iface_mtu > KNET_PMTUD_SIZE_V4) { errno = EINVAL; return -1; } savederrno = pthread_rwlock_rdlock(&knet_h->global_rwlock); if (savederrno) { log_err(knet_h, KNET_SUB_PMTUD, "Unable to get read lock: %s", strerror(savederrno)); errno = savederrno; return -1; } log_info(knet_h, KNET_SUB_PMTUD, "MTU manually set to: %u", iface_mtu); knet_h->manual_mtu = iface_mtu; force_pmtud_run(knet_h, KNET_SUB_PMTUD, 0); pthread_rwlock_unlock(&knet_h->global_rwlock); errno = 0; return 0; } int knet_handle_pmtud_get(knet_handle_t knet_h, unsigned int *data_mtu) { int savederrno = 0; if (!knet_h) { errno = EINVAL; return -1; } if (!data_mtu) { errno = EINVAL; return -1; } savederrno = pthread_rwlock_rdlock(&knet_h->global_rwlock); if (savederrno) { log_err(knet_h, KNET_SUB_HANDLE, "Unable to get read lock: %s", strerror(savederrno)); errno = savederrno; return -1; } *data_mtu = knet_h->data_mtu; pthread_rwlock_unlock(&knet_h->global_rwlock); errno = 0; return 0; } int knet_handle_crypto(knet_handle_t knet_h, struct knet_handle_crypto_cfg *knet_handle_crypto_cfg) { int savederrno = 0; int err = 0; if (!knet_h) { errno = EINVAL; return -1; } if (!knet_handle_crypto_cfg) { errno = EINVAL; return -1; } savederrno = get_global_wrlock(knet_h); if (savederrno) { log_err(knet_h, KNET_SUB_HANDLE, "Unable to get write lock: %s", strerror(savederrno)); errno = savederrno; return -1; } if ((!strncmp("none", knet_handle_crypto_cfg->crypto_model, 4)) || ((!strncmp("none", knet_handle_crypto_cfg->crypto_cipher_type, 4)) && (!strncmp("none", knet_handle_crypto_cfg->crypto_hash_type, 4)))) { crypto_fini(knet_h); log_debug(knet_h, KNET_SUB_CRYPTO, "crypto is not enabled"); err = 0; goto exit_unlock; } if (knet_handle_crypto_cfg->private_key_len < KNET_MIN_KEY_LEN) { log_debug(knet_h, KNET_SUB_CRYPTO, "private key len too short (min %d): %u", KNET_MIN_KEY_LEN, knet_handle_crypto_cfg->private_key_len); savederrno = EINVAL; err = -1; goto exit_unlock; } if (knet_handle_crypto_cfg->private_key_len > KNET_MAX_KEY_LEN) { log_debug(knet_h, KNET_SUB_CRYPTO, "private key len too long (max %d): %u", KNET_MAX_KEY_LEN, knet_handle_crypto_cfg->private_key_len); savederrno = EINVAL; err = -1; goto exit_unlock; } err = crypto_init(knet_h, knet_handle_crypto_cfg); if (err) { err = -2; savederrno = errno; } exit_unlock: if (!err) { force_pmtud_run(knet_h, KNET_SUB_CRYPTO, 1); } pthread_rwlock_unlock(&knet_h->global_rwlock); errno = err ? savederrno : 0; return err; } int knet_handle_compress(knet_handle_t knet_h, struct knet_handle_compress_cfg *knet_handle_compress_cfg) { int savederrno = 0; int err = 0; if (!knet_h) { errno = EINVAL; return -1; } if (!knet_handle_compress_cfg) { errno = EINVAL; return -1; } savederrno = get_global_wrlock(knet_h); if (savederrno) { log_err(knet_h, KNET_SUB_HANDLE, "Unable to get write lock: %s", strerror(savederrno)); errno = savederrno; return -1; } compress_fini(knet_h, 0); err = compress_cfg(knet_h, knet_handle_compress_cfg); savederrno = errno; pthread_rwlock_unlock(&knet_h->global_rwlock); errno = err ? savederrno : 0; return err; } ssize_t knet_recv(knet_handle_t knet_h, char *buff, const size_t buff_len, const int8_t channel) { int savederrno = 0; ssize_t err = 0; struct iovec iov_in; if (!knet_h) { errno = EINVAL; return -1; } if (buff == NULL) { errno = EINVAL; return -1; } if (buff_len <= 0) { errno = EINVAL; return -1; } if (buff_len > KNET_MAX_PACKET_SIZE) { errno = EINVAL; return -1; } if (channel < 0) { errno = EINVAL; return -1; } if (channel >= KNET_DATAFD_MAX) { errno = EINVAL; return -1; } savederrno = pthread_rwlock_rdlock(&knet_h->global_rwlock); if (savederrno) { log_err(knet_h, KNET_SUB_HANDLE, "Unable to get read lock: %s", strerror(savederrno)); errno = savederrno; return -1; } if (!knet_h->sockfd[channel].in_use) { savederrno = EINVAL; err = -1; goto out_unlock; } memset(&iov_in, 0, sizeof(iov_in)); iov_in.iov_base = (void *)buff; iov_in.iov_len = buff_len; err = readv(knet_h->sockfd[channel].sockfd[0], &iov_in, 1); savederrno = errno; out_unlock: pthread_rwlock_unlock(&knet_h->global_rwlock); errno = err ? savederrno : 0; return err; } ssize_t knet_send(knet_handle_t knet_h, const char *buff, const size_t buff_len, const int8_t channel) { int savederrno = 0; ssize_t err = 0; struct iovec iov_out[1]; if (!knet_h) { errno = EINVAL; return -1; } if (buff == NULL) { errno = EINVAL; return -1; } if (buff_len <= 0) { errno = EINVAL; return -1; } if (buff_len > KNET_MAX_PACKET_SIZE) { errno = EINVAL; return -1; } if (channel < 0) { errno = EINVAL; return -1; } if (channel >= KNET_DATAFD_MAX) { errno = EINVAL; return -1; } savederrno = pthread_rwlock_rdlock(&knet_h->global_rwlock); if (savederrno) { log_err(knet_h, KNET_SUB_HANDLE, "Unable to get read lock: %s", strerror(savederrno)); errno = savederrno; return -1; } if (!knet_h->sockfd[channel].in_use) { savederrno = EINVAL; err = -1; goto out_unlock; } memset(iov_out, 0, sizeof(iov_out)); iov_out[0].iov_base = (void *)buff; iov_out[0].iov_len = buff_len; err = writev(knet_h->sockfd[channel].sockfd[0], iov_out, 1); savederrno = errno; out_unlock: pthread_rwlock_unlock(&knet_h->global_rwlock); errno = err ? savederrno : 0; return err; } int knet_handle_get_stats(knet_handle_t knet_h, struct knet_handle_stats *stats, size_t struct_size) { int savederrno = 0; if (!knet_h) { errno = EINVAL; return -1; } if (!stats) { errno = EINVAL; return -1; } savederrno = get_global_wrlock(knet_h); if (savederrno) { log_err(knet_h, KNET_SUB_HANDLE, "Unable to get write lock: %s", strerror(savederrno)); errno = savederrno; return -1; } if (struct_size > sizeof(struct knet_handle_stats)) { struct_size = sizeof(struct knet_handle_stats); } memmove(stats, &knet_h->stats, struct_size); /* * TX crypt stats only count the data packets sent, so add in the ping/pong/pmtud figures * RX is OK as it counts them before they are sorted. */ stats->tx_crypt_packets += knet_h->stats_extra.tx_crypt_ping_packets + knet_h->stats_extra.tx_crypt_pong_packets + knet_h->stats_extra.tx_crypt_pmtu_packets + knet_h->stats_extra.tx_crypt_pmtu_reply_packets; /* Tell the caller our full size in case they have an old version */ stats->size = sizeof(struct knet_handle_stats); pthread_rwlock_unlock(&knet_h->global_rwlock); return 0; } int knet_handle_clear_stats(knet_handle_t knet_h, int clear_option) { int savederrno = 0; if (!knet_h) { errno = EINVAL; return -1; } if (clear_option != KNET_CLEARSTATS_HANDLE_ONLY && clear_option != KNET_CLEARSTATS_HANDLE_AND_LINK) { errno = EINVAL; return -1; } savederrno = get_global_wrlock(knet_h); if (savederrno) { log_err(knet_h, KNET_SUB_HANDLE, "Unable to get write lock: %s", strerror(savederrno)); errno = savederrno; return -1; } memset(&knet_h->stats, 0, sizeof(struct knet_handle_stats)); memset(&knet_h->stats_extra, 0, sizeof(struct knet_handle_stats_extra)); if (clear_option == KNET_CLEARSTATS_HANDLE_AND_LINK) { _link_clear_stats(knet_h); } pthread_rwlock_unlock(&knet_h->global_rwlock); return 0; } diff --git a/libknet/internals.h b/libknet/internals.h index e0be5770..4fa12543 100644 --- a/libknet/internals.h +++ b/libknet/internals.h @@ -1,568 +1,578 @@ /* * Copyright (C) 2010-2019 Red Hat, Inc. All rights reserved. * * Authors: Fabio M. Di Nitto * Federico Simoncelli * * This software licensed under LGPL-2.0+ */ #ifndef __KNET_INTERNALS_H__ #define __KNET_INTERNALS_H__ /* * NOTE: you shouldn't need to include this header normally */ #include #include "libknet.h" #include "onwire.h" #include "compat.h" #include "threads_common.h" #define KNET_DATABUFSIZE KNET_MAX_PACKET_SIZE + KNET_HEADER_ALL_SIZE #define KNET_DATABUFSIZE_CRYPT_PAD 1024 #define KNET_DATABUFSIZE_CRYPT KNET_DATABUFSIZE + KNET_DATABUFSIZE_CRYPT_PAD #define KNET_DATABUFSIZE_COMPRESS_PAD 1024 #define KNET_DATABUFSIZE_COMPRESS KNET_DATABUFSIZE + KNET_DATABUFSIZE_COMPRESS_PAD #define KNET_RING_RCVBUFF 8388608 #define PCKT_FRAG_MAX UINT8_MAX #define PCKT_RX_BUFS 512 -#define KNET_EPOLL_MAX_EVENTS KNET_DATAFD_MAX +#define KNET_EPOLL_MAX_EVENTS KNET_DATAFD_MAX + 1 + +#define KNET_INTERNAL_DATA_CHANNEL KNET_DATAFD_MAX + +/* + * Size of threads stack. Value is choosen by experimenting, how much is needed + * to sucesfully finish test suite, and at the time of writing patch it was + * ~300KiB. To have some room for future enhancement it is increased + * by factor of 3 and rounded. + */ +#define KNET_THREAD_STACK_SIZE (1024 * 1024) typedef void *knet_transport_link_t; /* per link transport handle */ typedef void *knet_transport_t; /* per knet_h transport handle */ struct knet_transport_ops; /* Forward because of circular dependancy */ struct knet_mmsghdr { struct msghdr msg_hdr; /* Message header */ unsigned int msg_len; /* Number of bytes transmitted */ }; struct knet_link { /* required */ struct sockaddr_storage src_addr; struct sockaddr_storage dst_addr; /* configurable */ unsigned int dynamic; /* see KNET_LINK_DYN_ define above */ uint8_t priority; /* higher priority == preferred for A/P */ unsigned long long ping_interval; /* interval */ unsigned long long pong_timeout; /* timeout */ unsigned long long pong_timeout_adj; /* timeout adjusted for latency */ uint8_t pong_timeout_backoff; /* see link.h for definition */ unsigned int latency_fix; /* precision */ uint8_t pong_count; /* how many ping/pong to send/receive before link is up */ uint64_t flags; /* status */ struct knet_link_status status; /* internals */ uint8_t link_id; uint8_t transport; /* #defined constant from API */ knet_transport_link_t transport_link; /* link_info_t from transport */ int outsock; unsigned int configured:1; /* set to 1 if src/dst have been configured transport initialized on this link*/ unsigned int transport_connected:1; /* set to 1 if lower level transport is connected */ unsigned int latency_exp; uint8_t received_pong; struct timespec ping_last; /* used by PMTUD thread as temp per-link variables and should always contain the onwire_len value! */ uint32_t proto_overhead; /* IP + UDP/SCTP overhead. NOT to be confused with stats.proto_overhead that includes also knet headers and crypto headers */ struct timespec pmtud_last; uint32_t last_ping_size; uint32_t last_good_mtu; uint32_t last_bad_mtu; uint32_t last_sent_mtu; uint32_t last_recv_mtu; uint32_t pmtud_crypto_timeout_multiplier;/* used by PMTUd to adjust timeouts on high loads */ uint8_t has_valid_mtu; }; #define KNET_CBUFFER_SIZE 4096 struct knet_host_defrag_buf { char buf[KNET_DATABUFSIZE]; uint8_t in_use; /* 0 buffer is free, 1 is in use */ seq_num_t pckt_seq; /* identify the pckt we are receiving */ uint8_t frag_recv; /* how many frags did we receive */ uint8_t frag_map[PCKT_FRAG_MAX];/* bitmap of what we received? */ uint8_t last_first; /* special case if we receive the last fragment first */ - uint16_t frag_size; /* normal frag size (not the last one) */ - uint16_t last_frag_size; /* the last fragment might not be aligned with MTU size */ + ssize_t frag_size; /* normal frag size (not the last one) */ + ssize_t last_frag_size; /* the last fragment might not be aligned with MTU size */ struct timespec last_update; /* keep time of the last pckt */ }; struct knet_host { /* required */ knet_node_id_t host_id; /* configurable */ uint8_t link_handler_policy; char name[KNET_MAX_HOST_LEN]; /* status */ struct knet_host_status status; /* internals */ char circular_buffer[KNET_CBUFFER_SIZE]; seq_num_t rx_seq_num; seq_num_t untimed_rx_seq_num; seq_num_t timed_rx_seq_num; uint8_t got_data; /* defrag/reassembly buffers */ struct knet_host_defrag_buf defrag_buf[KNET_MAX_LINK]; char circular_buffer_defrag[KNET_CBUFFER_SIZE]; /* link stuff */ struct knet_link link[KNET_MAX_LINK]; uint8_t active_link_entries; uint8_t active_links[KNET_MAX_LINK]; struct knet_host *next; }; struct knet_sock { int sockfd[2]; /* sockfd[0] will always be application facing * and sockfd[1] internal if sockpair has been created by knet */ int is_socket; /* check if it's a socket for recvmmsg usage */ int is_created; /* knet created this socket and has to clean up on exit/del */ int in_use; /* set to 1 if it's use, 0 if free */ int has_error; /* set to 1 if there were errors reading from the sock * and socket has been removed from epoll */ }; struct knet_fd_trackers { uint8_t transport; /* transport type (UDP/SCTP...) */ uint8_t data_type; /* internal use for transport to define what data are associated * with this fd */ void *data; /* pointer to the data */ void *access_list_match_entry_head; /* pointer to access list match_entry list head */ }; #define KNET_MAX_FDS KNET_MAX_HOST * KNET_MAX_LINK * 4 #define KNET_MAX_COMPRESS_METHODS UINT8_MAX struct knet_handle_stats_extra { uint64_t tx_crypt_pmtu_packets; uint64_t tx_crypt_pmtu_reply_packets; uint64_t tx_crypt_ping_packets; uint64_t tx_crypt_pong_packets; }; struct knet_handle { knet_node_id_t host_id; unsigned int enabled:1; - struct knet_sock sockfd[KNET_DATAFD_MAX]; + struct knet_sock sockfd[KNET_DATAFD_MAX + 1]; int logfd; uint8_t log_levels[KNET_MAX_SUBSYSTEMS]; int hostsockfd[2]; int dstsockfd[2]; int send_to_links_epollfd; int recv_from_links_epollfd; int dst_link_handler_epollfd; uint8_t use_access_lists; /* set to 0 for disable, 1 for enable */ unsigned int pmtud_interval; unsigned int manual_mtu; unsigned int data_mtu; /* contains the max data size that we can send onwire * without frags */ struct knet_host *host_head; struct knet_host *host_index[KNET_MAX_HOST]; knet_transport_t transports[KNET_MAX_TRANSPORTS+1]; struct knet_fd_trackers knet_transport_fd_tracker[KNET_MAX_FDS]; /* track status for each fd handled by transports */ struct knet_handle_stats stats; struct knet_handle_stats_extra stats_extra; uint32_t reconnect_int; knet_node_id_t host_ids[KNET_MAX_HOST]; size_t host_ids_entries; struct knet_header *recv_from_sock_buf; struct knet_header *send_to_links_buf[PCKT_FRAG_MAX]; struct knet_header *recv_from_links_buf[PCKT_RX_BUFS]; struct knet_header *pingbuf; struct knet_header *pmtudbuf; uint8_t threads_status[KNET_THREAD_MAX]; uint8_t threads_flush_queue[KNET_THREAD_MAX]; pthread_mutex_t threads_status_mutex; pthread_t send_to_links_thread; pthread_t recv_from_links_thread; pthread_t heartbt_thread; pthread_t dst_link_handler_thread; pthread_t pmtud_link_handler_thread; pthread_rwlock_t global_rwlock; /* global config lock */ pthread_mutex_t pmtud_mutex; /* pmtud mutex to handle conditional send/recv + timeout */ pthread_cond_t pmtud_cond; /* conditional for above */ pthread_mutex_t tx_mutex; /* used to protect knet_send_sync and TX thread */ pthread_mutex_t hb_mutex; /* used to protect heartbeat thread and seq_num broadcasting */ pthread_mutex_t backoff_mutex; /* used to protect dst_link->pong_timeout_adj */ pthread_mutex_t kmtu_mutex; /* used to protect kernel_mtu */ uint32_t kernel_mtu; /* contains the MTU detected by the kernel on a given link */ int pmtud_waiting; int pmtud_running; int pmtud_forcerun; int pmtud_abort; struct crypto_instance *crypto_instance; size_t sec_block_size; size_t sec_hash_size; size_t sec_salt_size; unsigned char *send_to_links_buf_crypt[PCKT_FRAG_MAX]; unsigned char *recv_from_links_buf_crypt; unsigned char *recv_from_links_buf_decrypt; unsigned char *pingbuf_crypt; unsigned char *pmtudbuf_crypt; int compress_model; int compress_level; size_t compress_threshold; void *compress_int_data[KNET_MAX_COMPRESS_METHODS]; /* for compress method private data */ unsigned char *recv_from_links_buf_decompress; unsigned char *send_to_links_buf_compress; seq_num_t tx_seq_num; pthread_mutex_t tx_seq_num_mutex; uint8_t has_loop_link; uint8_t loop_link; void *dst_host_filter_fn_private_data; int (*dst_host_filter_fn) ( void *private_data, const unsigned char *outdata, ssize_t outdata_len, uint8_t tx_rx, knet_node_id_t this_host_id, knet_node_id_t src_node_id, int8_t *channel, knet_node_id_t *dst_host_ids, size_t *dst_host_ids_entries); void *pmtud_notify_fn_private_data; void (*pmtud_notify_fn) ( void *private_data, unsigned int data_mtu); void *host_status_change_notify_fn_private_data; void (*host_status_change_notify_fn) ( void *private_data, knet_node_id_t host_id, uint8_t reachable, uint8_t remote, uint8_t external); void *sock_notify_fn_private_data; void (*sock_notify_fn) ( void *private_data, int datafd, int8_t channel, uint8_t tx_rx, int error, int errorno); int fini_in_progress; uint64_t flags; }; extern pthread_rwlock_t shlib_rwlock; /* global shared lib load lock */ /* * NOTE: every single operation must be implementend * for every protocol. */ /* * for now knet supports only IP protocols (udp/sctp) * in future there might be others like ARP * or TIPC. * keep this around as transport information * to use for access lists and other operations */ #define TRANSPORT_PROTO_LOOPBACK 0 #define TRANSPORT_PROTO_IP_PROTO 1 /* * some transports like SCTP can filter incoming * connections before knet has to process * any packets. * GENERIC_ACL -> packet has to be read and filterted * PROTO_ACL -> transport provides filtering at lower levels * and packet does not need to be processed */ typedef enum { USE_NO_ACL, USE_GENERIC_ACL, USE_PROTO_ACL } transport_acl; /* * make it easier to map values in transports.c */ #define TRANSPORT_PROTO_NOT_CONNECTION_ORIENTED 0 #define TRANSPORT_PROTO_IS_CONNECTION_ORIENTED 1 typedef struct knet_transport_ops { /* * transport generic information */ const char *transport_name; const uint8_t transport_id; const uint8_t built_in; uint8_t transport_protocol; transport_acl transport_acl_type; /* * connection oriented protocols like SCTP * don´t need dst_addr in sendto calls and * on some OSes are considered EINVAL. */ uint8_t transport_is_connection_oriented; uint32_t transport_mtu_overhead; /* * transport init must allocate the new transport * and perform all internal initializations * (threads, lists, etc). */ int (*transport_init)(knet_handle_t knet_h); /* * transport free must releases _all_ resources * allocated by tranport_init */ int (*transport_free)(knet_handle_t knet_h); /* * link operations should take care of all the * sockets and epoll management for a given link/transport set * transport_link_disable should return err = -1 and errno = EBUSY * if listener is still in use, and any other errno in case * the link cannot be disabled. * * set_config/clear_config are invoked in global write lock context */ int (*transport_link_set_config)(knet_handle_t knet_h, struct knet_link *link); int (*transport_link_clear_config)(knet_handle_t knet_h, struct knet_link *link); /* * transport callback for incoming dynamic connections * this is called in global read lock context */ int (*transport_link_dyn_connect)(knet_handle_t knet_h, int sockfd, struct knet_link *link); /* * return the fd to use for access lists */ int (*transport_link_get_acl_fd)(knet_handle_t knet_h, struct knet_link *link); /* * per transport error handling of recvmmsg * (see _handle_recv_from_links comments for details) */ /* * transport_rx_sock_error is invoked when recvmmsg returns <= 0 * * transport_rx_sock_error is invoked with both global_rdlock */ int (*transport_rx_sock_error)(knet_handle_t knet_h, int sockfd, int recv_err, int recv_errno); /* * transport_tx_sock_error is invoked with global_rwlock and * it's invoked when sendto or sendmmsg returns =< 0 * * it should return: * -1 on internal error * 0 ignore error and continue * 1 retry * any sleep or wait action should happen inside the transport code */ int (*transport_tx_sock_error)(knet_handle_t knet_h, int sockfd, int recv_err, int recv_errno); /* * this function is called on _every_ received packet * to verify if the packet is data or internal protocol error handling * * it should return: * -1 on error * 0 packet is not data and we should continue the packet process loop * 1 packet is not data and we should STOP the packet process loop * 2 packet is data and should be parsed as such * * transport_rx_is_data is invoked with both global_rwlock * and fd_tracker read lock (from RX thread) */ int (*transport_rx_is_data)(knet_handle_t knet_h, int sockfd, struct knet_mmsghdr *msg); } knet_transport_ops_t; socklen_t sockaddr_len(const struct sockaddr_storage *ss); struct pretty_names { const char *name; uint8_t val; }; /** * This is a kernel style list implementation. * * @author Steven Dake */ struct knet_list_head { struct knet_list_head *next; struct knet_list_head *prev; }; /** * @def KNET_LIST_DECLARE() * Declare and initialize a list head. */ #define KNET_LIST_DECLARE(name) \ struct knet_list_head name = { &(name), &(name) } #define KNET_INIT_LIST_HEAD(ptr) do { \ (ptr)->next = (ptr); (ptr)->prev = (ptr); \ } while (0) /** * Initialize the list entry. * * Points next and prev pointers to head. * @param head pointer to the list head */ static inline void knet_list_init(struct knet_list_head *head) { head->next = head; head->prev = head; } /** * Add this element to the list. * * @param element the new element to insert. * @param head pointer to the list head */ static inline void knet_list_add(struct knet_list_head *element, struct knet_list_head *head) { head->next->prev = element; element->next = head->next; element->prev = head; head->next = element; } /** * Add to the list (but at the end of the list). * * @param element pointer to the element to add * @param head pointer to the list head * @see knet_list_add() */ static inline void knet_list_add_tail(struct knet_list_head *element, struct knet_list_head *head) { head->prev->next = element; element->next = head; element->prev = head->prev; head->prev = element; } /** * Delete an entry from the list. * * @param _remove the list item to remove */ static inline void knet_list_del(struct knet_list_head *_remove) { _remove->next->prev = _remove->prev; _remove->prev->next = _remove->next; } /** * Replace old entry by new one * @param old: the element to be replaced * @param new: the new element to insert */ static inline void knet_list_replace(struct knet_list_head *old, struct knet_list_head *new) { new->next = old->next; new->next->prev = new; new->prev = old->prev; new->prev->next = new; } /** * Tests whether list is the last entry in list head * @param list: the entry to test * @param head: the head of the list * @return boolean true/false */ static inline int knet_list_is_last(const struct knet_list_head *list, const struct knet_list_head *head) { return list->next == head; } /** * A quick test to see if the list is empty (pointing to it's self). * @param head pointer to the list head * @return boolean true/false */ static inline int32_t knet_list_empty(const struct knet_list_head *head) { return head->next == head; } /** * Get the struct for this entry * @param ptr: the &struct list_head pointer. * @param type: the type of the struct this is embedded in. * @param member: the name of the list_struct within the struct. */ #define knet_list_entry(ptr,type,member)\ ((type *)((char *)(ptr)-(char*)(&((type *)0)->member))) /** * Get the first element from a list * @param ptr: the &struct list_head pointer. * @param type: the type of the struct this is embedded in. * @param member: the name of the list_struct within the struct. */ #define knet_list_first_entry(ptr, type, member) \ knet_list_entry((ptr)->next, type, member) /** * Iterate over a list * @param pos: the &struct list_head to use as a loop counter. * @param head: the head for your list. */ #define knet_list_for_each(pos, head) \ for (pos = (head)->next; pos != (head); pos = pos->next) /** * Iterate over a list backwards * @param pos: the &struct list_head to use as a loop counter. * @param head: the head for your list. */ #define knet_list_for_each_reverse(pos, head) \ for (pos = (head)->prev; pos != (head); pos = pos->prev) /** * Iterate over a list safe against removal of list entry * @param pos: the &struct list_head to use as a loop counter. * @param n: another &struct list_head to use as temporary storage * @param head: the head for your list. */ #define knet_list_for_each_safe(pos, n, head) \ for (pos = (head)->next, n = pos->next; pos != (head); \ pos = n, n = pos->next) /** * Iterate over list of given type * @param pos: the type * to use as a loop counter. * @param head: the head for your list. * @param member: the name of the list_struct within the struct. */ #define knet_list_for_each_entry(pos, head, member) \ for (pos = knet_list_entry((head)->next, typeof(*pos), member); \ &pos->member != (head); \ pos = knet_list_entry(pos->member.next, typeof(*pos), member)) #endif diff --git a/libknet/libknet.h b/libknet/libknet.h index 11350dba..6a94a78c 100644 --- a/libknet/libknet.h +++ b/libknet/libknet.h @@ -1,2208 +1,2208 @@ /* * Copyright (C) 2010-2019 Red Hat, Inc. All rights reserved. * * Authors: Fabio M. Di Nitto * Federico Simoncelli * * This software licensed under LGPL-2.0+ */ #ifndef __LIBKNET_H__ #define __LIBKNET_H__ #include #include #include #include #include /** * @file libknet.h * @brief kronosnet API include file * @copyright Copyright (C) 2010-2019 Red Hat, Inc. All rights reserved. * * Kronosnet is an advanced VPN system for High Availability applications. */ #define KNET_API_VER 1 /* * libknet limits */ /* * Maximum number of hosts */ typedef uint16_t knet_node_id_t; #define KNET_MAX_HOST 65536 /* * Maximum number of links between 2 hosts */ #define KNET_MAX_LINK 8 /* * Maximum packet size that should be written to datafd * see knet_handle_new for details */ #define KNET_MAX_PACKET_SIZE 65536 /* * Buffers used for pretty logging * host is used to store both ip addresses and hostnames */ #define KNET_MAX_HOST_LEN 256 #define KNET_MAX_PORT_LEN 6 /* * Some notifications can be generated either on TX or RX */ #define KNET_NOTIFY_TX 0 #define KNET_NOTIFY_RX 1 /* * Link flags */ /* * Where possible, set traffic priority to high. * On Linux this sets the TOS to INTERACTIVE (6), * see tc-prio(8) for more infomation */ #define KNET_LINK_FLAG_TRAFFICHIPRIO (1ULL << 0) /* * Handle flags */ /* * Use privileged operations during socket setup. */ #define KNET_HANDLE_FLAG_PRIVILEGED (1ULL << 0) typedef struct knet_handle *knet_handle_t; /* * Handle structs/API calls */ /** * knet_handle_new_ex * * @brief create a new instance of a knet handle * * host_id - Each host in a knet is identified with a unique * ID. when creating a new handle local host_id * must be specified (0 to UINT16_MAX are all valid). * It is the user's responsibility to check that the value * is unique, or bad things might happen. * * log_fd - Write file descriptor. If set to a value > 0, it will be used * to write log packets from libknet to the application. * Setting to 0 will disable logging from libknet. * It is possible to enable logging at any given time (see logging API). * Make sure to either read from this filedescriptor properly and/or * mark it O_NONBLOCK, otherwise if the fd becomes full, libknet could * block. * It is strongly encouraged to use pipes (ex: pipe(2) or pipe2(2)) for * logging fds due to the atomic nature of writes between fds. * See also libknet test suite for reference and guidance. * * default_log_level - * If logfd is specified, it will initialize all subsystems to log * at default_log_level value. (see logging API) * * flags - bitwise OR of some of the following flags: * KNET_HANDLE_FLAG_PRIVILEGED: use privileged operations setting up the * communication sockets. If disabled, failure to acquire large * enough socket buffers is ignored but logged. Inadequate buffers * lead to poor performance. * * @return * on success, a new knet_handle_t is returned. * on failure, NULL is returned and errno is set. * knet-specific errno values: * ENAMETOOLONG - socket buffers couldn't be set big enough and KNET_HANDLE_FLAG_PRIVILEGED was specified * ERANGE - buffer size readback returned unexpected type */ knet_handle_t knet_handle_new_ex(knet_node_id_t host_id, int log_fd, uint8_t default_log_level, uint64_t flags); /** * knet_handle_new * * @brief knet_handle_new_ex with flags = KNET_HANDLE_FLAG_PRIVILEGED. */ knet_handle_t knet_handle_new(knet_node_id_t host_id, int log_fd, uint8_t default_log_level); /** * knet_handle_free * * @brief Destroy a knet handle, free all resources * * knet_h - pointer to knet_handle_t * * @return * knet_handle_free returns * 0 on success * -1 on error and errno is set. */ int knet_handle_free(knet_handle_t knet_h); /** * knet_handle_enable_sock_notify * * @brief Register a callback to receive socket events * * knet_h - pointer to knet_handle_t * * sock_notify_fn_private_data * void pointer to data that can be used to identify * the callback. * * sock_notify_fn * A callback function that is invoked every time * a socket in the datafd pool will report an error (-1) * or an end of read (0) (see socket.7). * This function MUST NEVER block or add substantial delays. * The callback is invoked in an internal unlocked area * to allow calls to knet_handle_add_datafd/knet_handle_remove_datafd * to swap/replace the bad fd. * if both err and errno are 0, it means that the socket * has received a 0 byte packet (EOF?). * The callback function must either remove the fd from knet * (by calling knet_handle_remove_fd()) or dup a new fd in its place. * Failure to do this can cause problems. * * @return * knet_handle_enable_sock_notify returns * 0 on success * -1 on error and errno is set. */ int knet_handle_enable_sock_notify(knet_handle_t knet_h, void *sock_notify_fn_private_data, void (*sock_notify_fn) ( void *private_data, int datafd, int8_t channel, uint8_t tx_rx, int error, int errorno)); /* sorry! can't call it errno ;) */ #define KNET_DATAFD_MAX 32 /** * knet_handle_add_datafd * * @brief Install a file descriptor for communication * * IMPORTANT: In order to add datafd to knet, knet_handle_enable_sock_notify * _MUST_ be set and be able to handle both errors (-1) and * 0 bytes read / write from the provided datafd. * On read error (< 0) from datafd, the socket is automatically * removed from polling to avoid spinning on dead sockets. * It is safe to call knet_handle_remove_datafd even on sockets * that have been removed. * * knet_h - pointer to knet_handle_t * * *datafd - read/write file descriptor. * knet will read data here to send to the other hosts * and will write data received from the network. * Each data packet can be of max size KNET_MAX_PACKET_SIZE! * Applications using knet_send/knet_recv will receive a * proper error if the packet size is not within boundaries. * Applications using their own functions to write to the * datafd should NOT write more than KNET_MAX_PACKET_SIZE. * * Please refer to handle.c on how to set up a socketpair. * * datafd can be 0, and knet_handle_add_datafd will create a properly * populated socket pair the same way as ping_test, or a value * higher than 0. A negative number will return an error. * On exit knet_handle_free will take care to cleanup the * socketpair only if they have been created by knet_handle_add_datafd. * * It is possible to pass either sockets or normal fds. * User provided datafd will be marked as non-blocking and close-on-exec. * * *channel - This value is analogous to the tag in VLAN tagging. * A negative value will auto-allocate a channel. * Setting a value between 0 and 31 will try to allocate that * specific channel (unless already in use). * * It is possible to add up to 32 datafds but be aware that each * one of them must have a receiving end on the other host. * * Example: * hostA channel 0 will be delivered to datafd on hostB channel 0 * hostA channel 1 to hostB channel 1. * * Each channel must have a unique file descriptor. * * If your application could have 2 channels on one host and one * channel on another host, then you can use dst_host_filter * to manipulate channel values on TX and RX. * * @return * knet_handle_add_datafd returns * @retval 0 on success, * *datafd will be populated with a socket if the original value was 0 * or if a specific fd was set, the value is untouched. * *channel will be populated with a channel number if the original value * was negative or the value is untouched if a specific channel * was requested. * * @retval -1 on error and errno is set. * *datafd and *channel are untouched or empty. */ int knet_handle_add_datafd(knet_handle_t knet_h, int *datafd, int8_t *channel); /** * knet_handle_remove_datafd * * @brief Remove a file descriptor from knet * * knet_h - pointer to knet_handle_t * * datafd - file descriptor to remove. * NOTE that if the socket/fd was created by knet_handle_add_datafd, * the socket will be closed by libknet. * * @return * knet_handle_remove_datafd returns * 0 on success * -1 on error and errno is set. */ int knet_handle_remove_datafd(knet_handle_t knet_h, int datafd); /** * knet_handle_get_channel * * @brief Get the channel associated with a file descriptor * * knet_h - pointer to knet_handle_t * * datafd - get the channel associated to this datafd * * *channel - will contain the result * * @return * knet_handle_get_channel returns * @retval 0 on success * and *channel will contain the result * @retval -1 on error and errno is set. * and *channel content is meaningless */ int knet_handle_get_channel(knet_handle_t knet_h, const int datafd, int8_t *channel); /** * knet_handle_get_datafd * * @brief Get the file descriptor associated with a channel * * knet_h - pointer to knet_handle_t * * channel - get the datafd associated to this channel * * *datafd - will contain the result * * @return * knet_handle_get_datafd returns * @retval 0 on success * and *datafd will contain the results * @retval -1 on error and errno is set. * and *datafd content is meaningless */ int knet_handle_get_datafd(knet_handle_t knet_h, const int8_t channel, int *datafd); /** * knet_recv * * @brief Receive data from knet nodes * * knet_h - pointer to knet_handle_t * * buff - pointer to buffer to store the received data * * buff_len - buffer length * * channel - channel number * * @return * knet_recv is a commodity function to wrap iovec operations * around a socket. It returns a call to readv(2). */ ssize_t knet_recv(knet_handle_t knet_h, char *buff, const size_t buff_len, const int8_t channel); /** * knet_send * * @brief Send data to knet nodes * * knet_h - pointer to knet_handle_t * * buff - pointer to the buffer of data to send * * buff_len - length of data to send * * channel - channel number * * @return * knet_send is a commodity function to wrap iovec operations * around a socket. It returns a call to writev(2). */ ssize_t knet_send(knet_handle_t knet_h, const char *buff, const size_t buff_len, const int8_t channel); /** * knet_send_sync * * @brief Synchronously send data to knet nodes * * knet_h - pointer to knet_handle_t * * buff - pointer to the buffer of data to send * * buff_len - length of data to send * * channel - data channel to use (see knet_handle_add_datafd(3)) * * All knet RX/TX operations are async for performance reasons. * There are applications that might need a sync version of data * transmission and receive errors in case of failure to deliver * to another host. * knet_send_sync bypasses the whole TX async layer and delivers * data directly to the link layer, and returns errors accordingly. * knet_send_sync sends only one packet to one host at a time. * It does NOT support multiple destinations or multicast packets. * Decision is still based on dst_host_filter_fn. * * @return * knet_send_sync returns 0 on success and -1 on error. * In addition to normal sendmmsg errors, knet_send_sync can fail * due to: * * @retval ECANCELED - data forward is disabled * @retval EFAULT - dst_host_filter fatal error * @retval EINVAL - dst_host_filter did not provide dst_host_ids_entries on unicast pckts * @retval E2BIG - dst_host_filter did return more than one dst_host_ids_entries on unicast pckts * @retval ENOMSG - received unknown message type * @retval EHOSTDOWN - unicast pckt cannot be delivered because dest host is not connected yet * @retval ECHILD - crypto failed * @retval EAGAIN - sendmmsg was unable to send all messages and there was no progress during retry */ int knet_send_sync(knet_handle_t knet_h, const char *buff, const size_t buff_len, const int8_t channel); /** * knet_handle_enable_filter * * @brief install a filter to route packets * * knet_h - pointer to knet_handle_t * * dst_host_filter_fn_private_data * void pointer to data that can be used to identify * the callback. * * dst_host_filter_fn - * is a callback function that is invoked every time * a packet hits datafd (see knet_handle_new(3)). * the function allows users to tell libknet where the * packet has to be delivered. * * const unsigned char *outdata - is a pointer to the * current packet * ssize_t outdata_len - length of the above data * uint8_t tx_rx - filter is called on tx or rx * (KNET_NOTIFY_TX, KNET_NOTIFY_RX) * knet_node_id_t this_host_id - host_id processing the packet * knet_node_id_t src_host_id - host_id that generated the * packet * knet_node_id_t *dst_host_ids - array of KNET_MAX_HOST knet_node_id_t * where to store the destinations * size_t *dst_host_ids_entries - number of hosts to send the message * * dst_host_filter_fn should return * -1 on error, packet is discarded. * 0 packet is unicast and should be sent to dst_host_ids and there are * dst_host_ids_entries in the buffer. * 1 packet is broadcast/multicast and is sent all hosts. * contents of dst_host_ids and dst_host_ids_entries are ignored. * (see also kronosnetd/etherfilter.* for an example that filters based * on ether protocol) * * @return * knet_handle_enable_filter returns * 0 on success * -1 on error and errno is set. */ int knet_handle_enable_filter(knet_handle_t knet_h, void *dst_host_filter_fn_private_data, int (*dst_host_filter_fn) ( void *private_data, const unsigned char *outdata, ssize_t outdata_len, uint8_t tx_rx, knet_node_id_t this_host_id, knet_node_id_t src_host_id, int8_t *channel, knet_node_id_t *dst_host_ids, size_t *dst_host_ids_entries)); /** * knet_handle_setfwd * * @brief Start packet forwarding * * knet_h - pointer to knet_handle_t * * enable - set to 1 to allow data forwarding, 0 to disable data forwarding. * * @return * knet_handle_setfwd returns * 0 on success * -1 on error and errno is set. * * By default data forwarding is off and no traffic will pass through knet until * it is set on. */ int knet_handle_setfwd(knet_handle_t knet_h, unsigned int enabled); /** * knet_handle_enable_access_lists * * @brief Enable or disable usage of access lists (default: off) * * knet_h - pointer to knet_handle_t * * enable - set to 1 to use access lists, 0 to disable access_lists. * * @return * knet_handle_enable_access_lists returns * 0 on success * -1 on error and errno is set. * * access lists are bound to links. There are 2 types of links: * 1) point to point, where both source and destinations are well known * at configuration time. * 2) open links, where only the source is known at configuration time. * * knet will automatically generate access lists for point to point links. * * For open links, knet provides 4 API calls to manipulate access lists: * knet_link_add_acl(3), knet_link_rm_acl(3), knet_link_insert_acl(3) * and knet_link_clear_acl(3). * Those API calls will work exclusively on open links as they * are of no use on point to point links. * * knet will not enforce any access list unless specifically enabled by * knet_handle_enable_access_lists(3). * * From a security / programming perspective we recommend: * - create the knet handle * - enable access lists * - configure hosts and links * - configure access lists for open links */ int knet_handle_enable_access_lists(knet_handle_t knet_h, unsigned int enabled); #define KNET_PMTUD_DEFAULT_INTERVAL 60 /** * knet_handle_pmtud_setfreq * * @brief Set the interval between PMTUd scans * * knet_h - pointer to knet_handle_t * * interval - define the interval in seconds between PMTUd scans * range from 1 to 86400 (24h) * * @return * knet_handle_pmtud_setfreq returns * 0 on success * -1 on error and errno is set. * * default interval is 60. */ int knet_handle_pmtud_setfreq(knet_handle_t knet_h, unsigned int interval); /** * knet_handle_pmtud_getfreq * * @brief Get the interval between PMTUd scans * * knet_h - pointer to knet_handle_t * * interval - pointer where to store the current interval value * * @return * knet_handle_pmtud_setfreq returns * 0 on success * -1 on error and errno is set. */ int knet_handle_pmtud_getfreq(knet_handle_t knet_h, unsigned int *interval); /** * knet_handle_enable_pmtud_notify * * @brief install a callback to receive PMTUd changes * * knet_h - pointer to knet_handle_t * * pmtud_notify_fn_private_data * void pointer to data that can be used to identify * the callback. * * pmtud_notify_fn * is a callback function that is invoked every time * a path MTU size change is detected. * The function allows libknet to notify the user * of data MTU, that's the max value that can be send * onwire without fragmentation. The data MTU will always * be lower than real link MTU because it accounts for * protocol overhead, knet packet header and (if configured) * crypto overhead, * This function MUST NEVER block or add substantial delays. * * @return * knet_handle_enable_pmtud_notify returns * 0 on success * -1 on error and errno is set. */ int knet_handle_enable_pmtud_notify(knet_handle_t knet_h, void *pmtud_notify_fn_private_data, void (*pmtud_notify_fn) ( void *private_data, unsigned int data_mtu)); /** * knet_handle_pmtud_set * * @brief Set the current interface MTU * * knet_h - pointer to knet_handle_t * * iface_mtu - current interface MTU, value 0 to 65535. 0 will * re-enable automatic MTU discovery. * In a setup with multiple interfaces, please specify * the lowest MTU between the selected intefaces. * knet will automatically adjust this value for * all headers overhead and set the correct data_mtu. * data_mtu can be retrivied with knet_handle_pmtud_get(3) * or applications will receive a pmtud_nofity event * if enabled via knet_handle_enable_pmtud_notify(3). * * @return * knet_handle_pmtud_set returns * 0 on success * -1 on error and errno is set. */ int knet_handle_pmtud_set(knet_handle_t knet_h, unsigned int iface_mtu); /** * knet_handle_pmtud_get * * @brief Get the current data MTU * * knet_h - pointer to knet_handle_t * * data_mtu - pointer where to store data_mtu * * @return * knet_handle_pmtud_get returns * 0 on success * -1 on error and errno is set. */ int knet_handle_pmtud_get(knet_handle_t knet_h, unsigned int *data_mtu); #define KNET_MIN_KEY_LEN 128 #define KNET_MAX_KEY_LEN 4096 struct knet_handle_crypto_cfg { char crypto_model[16]; char crypto_cipher_type[16]; char crypto_hash_type[16]; unsigned char private_key[KNET_MAX_KEY_LEN]; unsigned int private_key_len; }; /** * knet_handle_crypto * * @brief set up packet cryptographic signing & encryption * * knet_h - pointer to knet_handle_t * * knet_handle_crypto_cfg - * pointer to a knet_handle_crypto_cfg structure * * crypto_model should contain the model name. * Currently only "openssl" and "nss" are supported. * Setting to "none" will disable crypto. * * crypto_cipher_type * should contain the cipher algo name. * It can be set to "none" to disable * encryption. * Currently supported by "nss" model: * "aes128", "aes192" and "aes256". * "openssl" model supports more modes and it strictly * depends on the openssl build. See: EVP_get_cipherbyname * openssl API call for details. * * crypto_hash_type * should contain the hashing algo name. * It can be set to "none" to disable * hashing. * Currently supported by "nss" model: * "md5", "sha1", "sha256", "sha384" and "sha512". * "openssl" model supports more modes and it strictly * depends on the openssl build. See: EVP_get_digestbyname * openssl API call for details. * * private_key will contain the private shared key. * It has to be at least KNET_MIN_KEY_LEN long. * * private_key_len * length of the provided private_key. * * Implementation notes/current limitations: * - enabling crypto, will increase latency as packets have * to processed. * - enabling crypto might reduce the overall throughtput * due to crypto data overhead. * - re-keying is not implemented yet. * - private/public key encryption/hashing is not currently * planned. * - crypto key must be the same for all hosts in the same * knet instance. * - it is safe to call knet_handle_crypto multiple times at runtime. * The last config will be used. * IMPORTANT: a call to knet_handle_crypto can fail due to: * 1) failure to obtain locking * 2) errors to initializing the crypto level. * This can happen even in subsequent calls to knet_handle_crypto. * A failure in crypto init will restore the previous crypto configuration. * * @return * knet_handle_crypto returns: * @retval 0 on success * @retval -1 on error and errno is set. * @retval -2 on crypto subsystem initialization error. No errno is provided at the moment (yet). */ int knet_handle_crypto(knet_handle_t knet_h, struct knet_handle_crypto_cfg *knet_handle_crypto_cfg); #define KNET_COMPRESS_THRESHOLD 100 struct knet_handle_compress_cfg { char compress_model[16]; uint32_t compress_threshold; int compress_level; }; /** * knet_handle_compress * * @brief Set up packet compression * * knet_h - pointer to knet_handle_t * * knet_handle_compress_cfg - * pointer to a knet_handle_compress_cfg structure * * compress_model contains the model name. * See "compress_level" for the list of accepted values. * Setting the value to "none" disables compression. * * compress_threshold * tells the transmission thread to NOT compress * any packets that are smaller than the value * indicated. Default 100 bytes. * Set to 0 to reset to the default. * Set to 1 to compress everything. * Max accepted value is KNET_MAX_PACKET_SIZE. * * compress_level is the "level" parameter for most models: * zlib: 0 (no compression), 1 (minimal) .. 9 (max compression). * lz4: 1 (max compression)... 9 (fastest compression). * lz4hc: 1 (min compression) ... LZ4HC_MAX_CLEVEL (16) or LZ4HC_CLEVEL_MAX (12) * depending on the version of lz4hc libknet was built with. * lzma: 0 (minimal) .. 9 (max compression) * bzip2: 1 (minimal) .. 9 (max compression) * For lzo2 it selects the algorithm to use: * 1 : lzo1x_1_compress (default) * 11 : lzo1x_1_11_compress * 12 : lzo1x_1_12_compress * 15 : lzo1x_1_15_compress * 999: lzo1x_999_compress * Other values select the default algorithm. * Please refer to the documentation of the respective * compression library for guidance about setting this * value. * * Implementation notes: * - it is possible to enable/disable compression at any time. * - nodes can be using a different compression algorithm at any time. * - knet does NOT implement the compression algorithm directly. it relies * on external libraries for this functionality. Please read * the libraries man pages to figure out which algorithm/compression * level is best for the data you are planning to transmit. * * @return * knet_handle_compress returns * 0 on success * -1 on error and errno is set. EINVAL means that either the model or the * level are not supported. */ int knet_handle_compress(knet_handle_t knet_h, struct knet_handle_compress_cfg *knet_handle_compress_cfg); struct knet_handle_stats { size_t size; uint64_t tx_uncompressed_packets; uint64_t tx_compressed_packets; uint64_t tx_compressed_original_bytes; uint64_t tx_compressed_size_bytes; uint64_t tx_compress_time_ave; uint64_t tx_compress_time_min; uint64_t tx_compress_time_max; uint64_t rx_compressed_packets; uint64_t rx_compressed_original_bytes; uint64_t rx_compressed_size_bytes; uint64_t rx_compress_time_ave; uint64_t rx_compress_time_min; uint64_t rx_compress_time_max; /* Overhead times, measured in usecs */ uint64_t tx_crypt_packets; uint64_t tx_crypt_byte_overhead; uint64_t tx_crypt_time_ave; uint64_t tx_crypt_time_min; uint64_t tx_crypt_time_max; uint64_t rx_crypt_packets; uint64_t rx_crypt_time_ave; uint64_t rx_crypt_time_min; uint64_t rx_crypt_time_max; }; /** * knet_handle_get_stats * * @brief Get statistics for compression & crypto * * knet_h - pointer to knet_handle_t * * knet_handle_stats * pointer to a knet_handle_stats structure * * struct_size * size of knet_handle_stats structure to allow * for backwards compatibility. libknet will only * copy this much data into the stats structure * so that older callers will not get overflowed if * new fields are added. * * @return * 0 on success * -1 on error and errno is set. * */ int knet_handle_get_stats(knet_handle_t knet_h, struct knet_handle_stats *stats, size_t struct_size); /* * Tell knet_handle_clear_stats whether to clear just the handle stats * or all of them. */ #define KNET_CLEARSTATS_HANDLE_ONLY 1 #define KNET_CLEARSTATS_HANDLE_AND_LINK 2 /** * knet_handle_clear_stats * * @brief Clear knet stats, link and/or handle * * knet_h - pointer to knet_handle_t * * clear_option - Which stats to clear, must be one of * * KNET_CLEARSTATS_HANDLE_ONLY or * KNET_CLEARSTATS_HANDLE_AND_LINK * * @return * 0 on success * -1 on error and errno is set. * */ int knet_handle_clear_stats(knet_handle_t knet_h, int clear_option); struct knet_crypto_info { const char *name; /* openssl,nss,etc.. */ uint8_t properties; /* currently unused */ char pad[256]; /* currently unused */ }; /** * knet_get_crypto_list * * @brief Get a list of supported crypto libraries * * crypto_list - array of struct knet_crypto_info * * If NULL then only the number of structs is returned in crypto_list_entries * to allow the caller to allocate sufficient space. * libknet does not allow more than 256 crypto methods at the moment. * it is safe to allocate 256 structs to avoid calling * knet_get_crypto_list twice. * * crypto_list_entries - returns the number of structs in crypto_list * * @return * knet_get_crypto_list returns * 0 on success * -1 on error and errno is set. */ int knet_get_crypto_list(struct knet_crypto_info *crypto_list, size_t *crypto_list_entries); struct knet_compress_info { const char *name; /* bzip2, lz4, etc.. */ uint8_t properties; /* currently unused */ char pad[256]; /* currently unused */ }; /** * knet_get_compress_list * * @brief Get a list of support compression types * * compress_list - array of struct knet_compress_info * * If NULL then only the number of structs is returned in compress_list_entries * to allow the caller to allocate sufficient space. * libknet does not allow more than 256 compress methods at the moment. * it is safe to allocate 256 structs to avoid calling * knet_get_compress_list twice. * * compress_list_entries - returns the number of structs in compress_list * * @return * knet_get_compress_list returns * 0 on success * -1 on error and errno is set. */ int knet_get_compress_list(struct knet_compress_info *compress_list, size_t *compress_list_entries); /* * host structs/API calls */ /** * knet_host_add * * @brief Add a new host ID to knet * * knet_h - pointer to knet_handle_t * * host_id - each host in a knet is identified with a unique ID * (see also knet_handle_new(3)) * * @return * knet_host_add returns: * 0 on success * -1 on error and errno is set. */ int knet_host_add(knet_handle_t knet_h, knet_node_id_t host_id); /** * knet_host_remove * * @brief Remove a host ID from knet * * knet_h - pointer to knet_handle_t * * host_id - each host in a knet is identified with a unique ID * (see also knet_handle_new(3)) * * @return * knet_host_remove returns: * 0 on success * -1 on error and errno is set. */ int knet_host_remove(knet_handle_t knet_h, knet_node_id_t host_id); /** * knet_host_set_name * * @brief Set the name of a knet host * * knet_h - pointer to knet_handle_t * * host_id - see knet_host_add(3) * * name - this name will be used for pretty logging and eventually * search for hosts (see also knet_handle_host_get_name(2) and knet_handle_host_get_id(3)). * Only up to KNET_MAX_HOST_LEN - 1 bytes will be accepted and * name has to be unique for each host. * * @return * knet_host_set_name returns: * 0 on success * -1 on error and errno is set. */ int knet_host_set_name(knet_handle_t knet_h, knet_node_id_t host_id, const char *name); /** * knet_host_get_name_by_host_id * * @brief Get the name of a host given its ID * * knet_h - pointer to knet_handle_t * * host_id - see knet_host_add(3) * * name - pointer to a preallocated buffer of at least size KNET_MAX_HOST_LEN * where the current host name will be stored * (as set by knet_host_set_name or default by knet_host_add) * * @return * knet_host_get_name_by_host_id returns: * 0 on success * -1 on error and errno is set (name is left untouched) */ int knet_host_get_name_by_host_id(knet_handle_t knet_h, knet_node_id_t host_id, char *name); /** * knet_host_get_id_by_host_name * * @brief Get the ID of a host given its name * * knet_h - pointer to knet_handle_t * * name - name to lookup, max len KNET_MAX_HOST_LEN * * host_id - where to store the result * * @return * knet_host_get_id_by_host_name returns: * 0 on success * -1 on error and errno is set. */ int knet_host_get_id_by_host_name(knet_handle_t knet_h, const char *name, knet_node_id_t *host_id); /** * knet_host_get_host_list * * @brief Get a list of hosts known to knet * * knet_h - pointer to knet_handle_t * * host_ids - array of at lest KNET_MAX_HOST size * * host_ids_entries - * number of entries writted in host_ids * * @return * knet_host_get_host_list returns * 0 on success * -1 on error and errno is set. */ int knet_host_get_host_list(knet_handle_t knet_h, knet_node_id_t *host_ids, size_t *host_ids_entries); /* * define switching policies */ #define KNET_LINK_POLICY_PASSIVE 0 #define KNET_LINK_POLICY_ACTIVE 1 #define KNET_LINK_POLICY_RR 2 /** * knet_host_set_policy * * @brief Set the switching policy for a host's links * * knet_h - pointer to knet_handle_t * * host_id - see knet_host_add(3) * * policy - there are currently 3 kind of simple switching policies * based on link configuration. - * KNET_LINK_POLICY_PASSIVE - the active link with the lowest + * KNET_LINK_POLICY_PASSIVE - the active link with the highest * priority will be used. * if one or more active links share * the same priority, the one with * lowest link_id will be used. * * KNET_LINK_POLICY_ACTIVE - all active links will be used * simultaneously to send traffic. * link priority is ignored. * * KNET_LINK_POLICY_RR - round-robin policy, every packet * will be send on a different active * link. * * @return * knet_host_set_policy returns * 0 on success * -1 on error and errno is set. */ int knet_host_set_policy(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t policy); /** * knet_host_get_policy * * @brief Get the switching policy for a host's links * * knet_h - pointer to knet_handle_t * * host_id - see knet_host_add(3) * * policy - will contain the current configured switching policy. * Default is passive when creating a new host. * * @return * knet_host_get_policy returns * 0 on success * -1 on error and errno is set. */ int knet_host_get_policy(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t *policy); /** * knet_host_enable_status_change_notify * * @brief Install a callback to get host status change events * * knet_h - pointer to knet_handle_t * * host_status_change_notify_fn_private_data - * void pointer to data that can be used to identify * the callback * * host_status_change_notify_fn - * is a callback function that is invoked every time * there is a change in the host status. * host status is identified by: * - reachable, this host can send/receive data to/from host_id * - remote, 0 if the host_id is connected locally or 1 if * the there is one or more knet host(s) in between. * NOTE: re-switching is NOT currently implemented, * but this is ready for future and can avoid * an API/ABI breakage later on. * - external, 0 if the host_id is configured locally or 1 if * it has been added from remote nodes config. * NOTE: dynamic topology is NOT currently implemented, * but this is ready for future and can avoid * an API/ABI breakage later on. * This function MUST NEVER block or add substantial delays. * * @return * knet_host_status_change_notify returns * 0 on success * -1 on error and errno is set. */ int knet_host_enable_status_change_notify(knet_handle_t knet_h, void *host_status_change_notify_fn_private_data, void (*host_status_change_notify_fn) ( void *private_data, knet_node_id_t host_id, uint8_t reachable, uint8_t remote, uint8_t external)); /* * define host status structure for quick lookup * struct is in flux as more stats will be added soon * * reachable host_id can be seen either directly connected * or via another host_id * * remote 0 = node is connected locally, 1 is visible via * via another host_id * * external 0 = node is configured/known locally, * 1 host_id has been received via another host_id */ struct knet_host_status { uint8_t reachable; uint8_t remote; uint8_t external; /* add host statistics */ }; /** * knet_host_get_status * * @brief Get the status of a host * * knet_h - pointer to knet_handle_t * * host_id - see knet_host_add(3) * * status - pointer to knet_host_status struct * * @return * knet_handle_pmtud_get returns * 0 on success * -1 on error and errno is set. */ int knet_host_get_status(knet_handle_t knet_h, knet_node_id_t host_id, struct knet_host_status *status); /* * link structs/API calls * * every host allocated/managed by knet_host_* has * KNET_MAX_LINK structures to define the network * paths that connect 2 hosts. * * Each link is identified by a link_id that has a * values between 0 and KNET_MAX_LINK - 1. * * KNOWN LIMITATIONS: * * - let's assume the scenario where two hosts are connected * with any number of links. link_id must match on both sides. * If host_id 0 link_id 0 is configured to connect IP1 to IP2 and * host_id 0 link_id 1 is configured to connect IP3 to IP4, * host_id 1 link_id 0 _must_ connect IP2 to IP1 and likewise * host_id 1 link_id 1 _must_ connect IP4 to IP3. * We might be able to lift this restriction in future, by using * other data to determine src/dst link_id, but for now, deal with it. */ /* * commodity functions to convert strings to sockaddr and viceversa */ /** * knet_strtoaddr * * @brief Convert a hostname string to an address * * host - IPaddr/hostname to convert * be aware only the first IP address will be returned * in case a hostname resolves to multiple IP * * port - port to connect to * * ss - sockaddr_storage where to store the converted data * * sslen - len of the sockaddr_storage * * @return * knet_strtoaddr returns same error codes as getaddrinfo * */ int knet_strtoaddr(const char *host, const char *port, struct sockaddr_storage *ss, socklen_t sslen); /** * knet_addrtostr * * @brief Convert an address to a host name * * ss - sockaddr_storage to convert * * sslen - len of the sockaddr_storage * * host - IPaddr/hostname where to store data * (recommended size: KNET_MAX_HOST_LEN) * * port - port buffer where to store data * (recommended size: KNET_MAX_PORT_LEN) * * @return * knet_strtoaddr returns same error codes as getnameinfo */ int knet_addrtostr(const struct sockaddr_storage *ss, socklen_t sslen, char *addr_buf, size_t addr_buf_size, char *port_buf, size_t port_buf_size); #define KNET_TRANSPORT_LOOPBACK 0 #define KNET_TRANSPORT_UDP 1 #define KNET_TRANSPORT_SCTP 2 #define KNET_MAX_TRANSPORTS UINT8_MAX /* * The Loopback transport is only valid for connections to localhost, the host * with the same node_id specified in knet_handle_new(). Only one link of this * type is allowed. Data sent down a LOOPBACK link will be copied directly from * the knet send datafd to the knet receive datafd so the application must be set * up to take data from that socket at least as often as it is sent or deadlocks * could occur. If used, a LOOPBACK link must be the only link configured to the * local host. */ struct knet_transport_info { const char *name; /* UDP/SCTP/etc... */ uint8_t id; /* value that can be used for link_set_config */ uint8_t properties; /* currently unused */ char pad[256]; /* currently unused */ }; /** * knet_get_transport_list * * @brief Get a list of the transports support by this build of knet * * transport_list - an array of struct transport_info that must be * at least of size struct transport_info * KNET_MAX_TRANSPORTS * * transport_list_entries - pointer to a size_t where to store how many transports * are available in this build of libknet. * * @return * knet_get_transport_list returns * 0 on success * -1 on error and errno is set. */ int knet_get_transport_list(struct knet_transport_info *transport_list, size_t *transport_list_entries); /** * knet_get_transport_name_by_id * * @brief Get a transport name from its ID number * * transport - one of the KNET_TRANSPORT_xxx constants * * @return * knet_get_transport_name_by_id returns: * * @retval pointer to the name on success or * @retval NULL on error and errno is set. */ const char *knet_get_transport_name_by_id(uint8_t transport); /** * knet_get_transport_id_by_name * * @brief Get a transport ID from its name * * name - transport name (UDP/SCTP/etc) * * @return * knet_get_transport_name_by_id returns: * * @retval KNET_MAX_TRANSPORTS on error and errno is set accordingly * @retval KNET_TRANSPORT_xxx on success. */ uint8_t knet_get_transport_id_by_name(const char *name); #define KNET_TRANSPORT_DEFAULT_RECONNECT_INTERVAL 1000 /** * knet_handle_set_transport_reconnect_interval * * @brief Set the interval between transport attempts to reconnect a failed link * * knet_h - pointer to knet_handle_t * * msecs - milliseconds * * @return * knet_handle_set_transport_reconnect_interval returns * 0 on success * -1 on error and errno is set. */ int knet_handle_set_transport_reconnect_interval(knet_handle_t knet_h, uint32_t msecs); /** * knet_handle_get_transport_reconnect_interval * * @brief Get the interval between transport attempts to reconnect a failed link * * knet_h - pointer to knet_handle_t * * msecs - milliseconds * * @return * knet_handle_get_transport_reconnect_interval returns * 0 on success * -1 on error and errno is set. */ int knet_handle_get_transport_reconnect_interval(knet_handle_t knet_h, uint32_t *msecs); /** * knet_link_set_config * * @brief Configure the link to a host * * knet_h - pointer to knet_handle_t * * host_id - see knet_host_add(3) * * link_id - see knet_link_set_config(3) * * transport - one of the KNET_TRANSPORT_xxx constants * * src_addr - sockaddr_storage that can be either IPv4 or IPv6 * * dst_addr - sockaddr_storage that can be either IPv4 or IPv6 * this can be null if we don't know the incoming * IP address/port and the link will remain quiet * till the node on the other end will initiate a * connection * * flags - KNET_LINK_FLAG_* * * @return * knet_link_set_config returns * 0 on success * -1 on error and errno is set. */ int knet_link_set_config(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id, uint8_t transport, struct sockaddr_storage *src_addr, struct sockaddr_storage *dst_addr, uint64_t flags); /** * knet_link_get_config * * @brief Get the link configutation information * * knet_h - pointer to knet_handle_t * * host_id - see knet_host_add(3) * * link_id - see knet_link_set_config(3) * * transport - see knet_link_set_config(3) * * src_addr - sockaddr_storage that can be either IPv4 or IPv6 * * dst_addr - sockaddr_storage that can be either IPv4 or IPv6 * * dynamic - 0 if dst_addr is static or 1 if dst_addr is dynamic. * In case of 1, dst_addr can be NULL and it will be left * untouched. * * flags - KNET_LINK_FLAG_* * * @return * knet_link_get_config returns * 0 on success. * -1 on error and errno is set. */ int knet_link_get_config(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id, uint8_t *transport, struct sockaddr_storage *src_addr, struct sockaddr_storage *dst_addr, uint8_t *dynamic, uint64_t *flags); /** * knet_link_clear_config * * @brief Clear link information and disconnect the link * * knet_h - pointer to knet_handle_t * * host_id - see knet_host_add(3) * * link_id - see knet_link_set_config(3) * * @return * knet_link_clear_config returns * 0 on success. * -1 on error and errno is set. */ int knet_link_clear_config(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id); /* * Access lists management for open links * see also knet_handle_enable_access_lists(3) */ /** * check_type_t * @brief address type enum for knet access lists * * CHECK_TYPE_ADDRESS is the equivalent of a single entry / IP address. * for example: 10.1.9.3 * and the entry is stored in ss1. ss2 can be NULL. * * CHECK_TYPE_MASK is used to configure network/netmask. * for example: 192.168.0.0/24 * the network is stored in ss1 and the netmask in ss2. * * CHECK_TYPE_RANGE defines a value / range of ip addresses. * for example: 172.16.0.1-172.16.0.10 * the start is stored in ss1 and the end in ss2. * * Please be aware that the above examples refer only to IP based protocols. * Other protocols might use ss1 and ss2 in slightly different ways. * At the moment knet only supports IP based protocol, though that might change * in the future. */ typedef enum { CHECK_TYPE_ADDRESS, CHECK_TYPE_MASK, CHECK_TYPE_RANGE } check_type_t; /** * check_acceptreject_t * * @brief enum for accept/reject in knet access lists * * accept or reject incoming packets defined in the access list entry */ typedef enum { CHECK_ACCEPT, CHECK_REJECT } check_acceptreject_t; /** * knet_link_add_acl * * @brief Add access list entry to an open link * * knet_h - pointer to knet_handle_t * * host_id - see knet_host_add(3) * * link_id - see knet_link_set_config(3) * * ss1 / ss2 / type / acceptreject - see typedef definitions for details * * IMPORTANT: the order in which access lists are added is critical and it * is left to the user to add them in the right order. knet * will not attempt to logically sort them. * * For example: * 1 - accept from 10.0.0.0/8 * 2 - reject from 10.0.0.1/32 * * is not the same as: * * 1 - reject from 10.0.0.1/32 * 2 - accept from 10.0.0.0/8 * * In the first example, rule number 2 will never match because * packets from 10.0.0.1 will be accepted by rule number 1. * * @return * knet_link_add_acl returns * 0 on success. * -1 on error and errno is set. */ int knet_link_add_acl(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id, struct sockaddr_storage *ss1, struct sockaddr_storage *ss2, check_type_t type, check_acceptreject_t acceptreject); /** * knet_link_insert_acl * * @brief Insert access list entry to an open link at given index * * knet_h - pointer to knet_handle_t * * host_id - see knet_host_add(3) * * link_id - see knet_link_set_config(3) * * index - insert at position "index" where 0 is the first entry and -1 * appends to the current list. * * ss1 / ss2 / type / acceptreject - see typedef definitions for details * * @return * knet_link_insert_acl returns * 0 on success. * -1 on error and errno is set. */ int knet_link_insert_acl(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id, int index, struct sockaddr_storage *ss1, struct sockaddr_storage *ss2, check_type_t type, check_acceptreject_t acceptreject); /** * knet_link_rm_acl * * @brief Remove access list entry from an open link * * knet_h - pointer to knet_handle_t * * host_id - see knet_host_add(3) * * link_id - see knet_link_set_config(3) * * ss1 / ss2 / type / acceptreject - see typedef definitions for details * * IMPORTANT: the data passed to this API call must match exactly that passed * to knet_link_add_acl(3). * * @return * knet_link_rm_acl returns * 0 on success. * -1 on error and errno is set. */ int knet_link_rm_acl(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id, struct sockaddr_storage *ss1, struct sockaddr_storage *ss2, check_type_t type, check_acceptreject_t acceptreject); /** * knet_link_clear_acl * * @brief Remove all access list entries from an open link * * knet_h - pointer to knet_handle_t * * host_id - see knet_host_add(3) * * link_id - see knet_link_set_config(3) * * @return * knet_link_clear_acl returns * 0 on success. * -1 on error and errno is set. */ int knet_link_clear_acl(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id); /** * knet_link_set_enable * * @brief Enable traffic on a link * * knet_h - pointer to knet_handle_t * * host_id - see knet_host_add(3) * * link_id - see knet_link_set_config(3) * * enabled - 0 disable the link, 1 enable the link * * @return * knet_link_set_enable returns * 0 on success * -1 on error and errno is set. */ int knet_link_set_enable(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id, unsigned int enabled); /** * knet_link_get_enable * * @brief Find out whether a link is enabled or not * * knet_h - pointer to knet_handle_t * * host_id - see knet_host_add(3) * * link_id - see knet_link_set_config(3) * * enabled - 0 disable the link, 1 enable the link * * @return * knet_link_get_enable returns * 0 on success * -1 on error and errno is set. */ int knet_link_get_enable(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id, unsigned int *enabled); #define KNET_LINK_DEFAULT_PING_INTERVAL 1000 /* 1 second */ #define KNET_LINK_DEFAULT_PING_TIMEOUT 2000 /* 2 seconds */ #define KNET_LINK_DEFAULT_PING_PRECISION 2048 /* samples */ /** * knet_link_set_ping_timers * * @brief Set the ping timers for a link * * knet_h - pointer to knet_handle_t * * host_id - see knet_host_add(3) * * link_id - see knet_link_set_config(3) * * interval - specify the ping interval in milliseconds. * * timeout - if no pong is received within this time, * the link is declared dead, in milliseconds. * NOTE: in future it will be possible to set timeout to 0 * for an autocalculated timeout based on interval, pong_count * and latency. The API already accept 0 as value and it will * return ENOSYS / -1. Once the automatic calculation feature * will be implemented, this call will only return EINVAL * for incorrect values. * * precision - how many values of latency are used to calculate * the average link latency (see also knet_link_get_status(3)) * * @return * knet_link_set_ping_timers returns * 0 on success * -1 on error and errno is set. */ int knet_link_set_ping_timers(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id, time_t interval, time_t timeout, unsigned int precision); /** * knet_link_get_ping_timers * * @brief Get the ping timers for a link * * knet_h - pointer to knet_handle_t * * host_id - see knet_host_add(3) * * link_id - see knet_link_set_config(3) * * interval - ping interval * * timeout - if no pong is received within this time, * the link is declared dead * * precision - how many values of latency are used to calculate * the average link latency (see also knet_link_get_status(3)) * * @return * knet_link_get_ping_timers returns * 0 on success * -1 on error and errno is set. */ int knet_link_get_ping_timers(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id, time_t *interval, time_t *timeout, unsigned int *precision); #define KNET_LINK_DEFAULT_PONG_COUNT 5 /** * knet_link_set_pong_count * * @brief Set the pong count for a link * * knet_h - pointer to knet_handle_t * * host_id - see knet_host_add(3) * * link_id - see knet_link_set_config(3) * * pong_count - how many valid ping/pongs before a link is marked UP. * default: 5, value should be > 0 * * @return * knet_link_set_pong_count returns * 0 on success * -1 on error and errno is set. */ int knet_link_set_pong_count(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id, uint8_t pong_count); /** * knet_link_get_pong_count * * @brief Get the pong count for a link * * knet_h - pointer to knet_handle_t * * host_id - see knet_host_add(3) * * link_id - see knet_link_set_config(3) * * pong_count - how many valid ping/pongs before a link is marked UP. * default: 5, value should be > 0 * * @return * knet_link_get_pong_count returns * 0 on success * -1 on error and errno is set. */ int knet_link_get_pong_count(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id, uint8_t *pong_count); /** * knet_link_set_priority * * @brief Set the priority for a link * * knet_h - pointer to knet_handle_t * * host_id - see knet_host_add(3) * * link_id - see knet_link_set_config(3) * * priority - specify the switching priority for this link * see also knet_host_set_policy * * @return * knet_link_set_priority returns * 0 on success * -1 on error and errno is set. */ int knet_link_set_priority(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id, uint8_t priority); /** * knet_link_get_priority * * @brief Get the priority for a link * * knet_h - pointer to knet_handle_t * * host_id - see knet_host_add(3) * * link_id - see knet_link_set_config(3) * * priority - gather the switching priority for this link * see also knet_host_set_policy * * @return * knet_link_get_priority returns * 0 on success * -1 on error and errno is set. */ int knet_link_get_priority(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id, uint8_t *priority); /** * knet_link_get_link_list * * @brief Get a list of links connecting a host * * knet_h - pointer to knet_handle_t * * link_ids - array of at lest KNET_MAX_LINK size * with the list of configured links for a certain host. * * link_ids_entries - * number of entries contained in link_ids * * @return * knet_link_get_link_list returns * 0 on success * -1 on error and errno is set. */ int knet_link_get_link_list(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t *link_ids, size_t *link_ids_entries); /* * define link status structure for quick lookup * * src/dst_{ipaddr,port} strings are filled by * getnameinfo(3) when configuring the link. * if the link is dynamic (see knet_link_set_config(3)) * dst_ipaddr/port will contain ipaddr/port of the currently * connected peer or "Unknown" if it was not possible * to determine the ipaddr/port at runtime. * * enabled see also knet_link_set/get_enable. * * connected the link is connected to a peer and ping/pong traffic * is flowing. * * dynconnected the link has dynamic ip on the other end, and * we can see the other host is sending pings to us. * * latency average latency of this link * see also knet_link_set/get_timeout. * * pong_last if the link is down, this value tells us how long * ago this link was active. A value of 0 means that the link * has never been active. * * knet_link_stats structure that contains details statistics for the link */ #define MAX_LINK_EVENTS 16 struct knet_link_stats { /* onwire values */ uint64_t tx_data_packets; uint64_t rx_data_packets; uint64_t tx_data_bytes; uint64_t rx_data_bytes; uint64_t rx_ping_packets; uint64_t tx_ping_packets; uint64_t rx_ping_bytes; uint64_t tx_ping_bytes; uint64_t rx_pong_packets; uint64_t tx_pong_packets; uint64_t rx_pong_bytes; uint64_t tx_pong_bytes; uint64_t rx_pmtu_packets; uint64_t tx_pmtu_packets; uint64_t rx_pmtu_bytes; uint64_t tx_pmtu_bytes; /* Only filled in when requested */ uint64_t tx_total_packets; uint64_t rx_total_packets; uint64_t tx_total_bytes; uint64_t rx_total_bytes; uint64_t tx_total_errors; uint64_t tx_total_retries; uint32_t tx_pmtu_errors; uint32_t tx_pmtu_retries; uint32_t tx_ping_errors; uint32_t tx_ping_retries; uint32_t tx_pong_errors; uint32_t tx_pong_retries; uint32_t tx_data_errors; uint32_t tx_data_retries; /* measured in usecs */ uint32_t latency_min; uint32_t latency_max; uint32_t latency_ave; uint32_t latency_samples; /* how many times the link has been going up/down */ uint32_t down_count; uint32_t up_count; /* * circular buffer of time_t structs collecting the history * of up/down events on this link. * the index indicates current/last event. * it is safe to walk back the history by decreasing the index */ time_t last_up_times[MAX_LINK_EVENTS]; time_t last_down_times[MAX_LINK_EVENTS]; int8_t last_up_time_index; int8_t last_down_time_index; /* Always add new stats at the end */ }; struct knet_link_status { size_t size; /* For ABI checking */ char src_ipaddr[KNET_MAX_HOST_LEN]; char src_port[KNET_MAX_PORT_LEN]; char dst_ipaddr[KNET_MAX_HOST_LEN]; char dst_port[KNET_MAX_PORT_LEN]; uint8_t enabled; /* link is configured and admin enabled for traffic */ uint8_t connected; /* link is connected for data (local view) */ uint8_t dynconnected; /* link has been activated by remote dynip */ unsigned long long latency; /* average latency computed by fix/exp */ struct timespec pong_last; unsigned int mtu; /* current detected MTU on this link */ unsigned int proto_overhead; /* contains the size of the IP protocol, knet headers and * crypto headers (if configured). This value is filled in * ONLY after the first PMTUd run on that given link, * and can change if link configuration or crypto configuration * changes at runtime. * WARNING: in general mtu + proto_overhead might or might * not match the output of ifconfig mtu due to crypto * requirements to pad packets to some specific boundaries. */ /* Link statistics */ struct knet_link_stats stats; }; /** * knet_link_get_status * * @brief Get the status (and statistics) for a link * * knet_h - pointer to knet_handle_t * * host_id - see knet_host_add(3) * * link_id - see knet_link_set_config(3) * * status - pointer to knet_link_status struct * * struct_size - max size of knet_link_status - allows library to * add fields without ABI change. Returned structure * will be truncated to this length and .size member * indicates the full size. * * @return * knet_link_get_status returns * 0 on success * -1 on error and errno is set. */ int knet_link_get_status(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id, struct knet_link_status *status, size_t struct_size); /* * logging structs/API calls */ /* * libknet is composed of several subsystems. In order * to easily distinguish log messages coming from different * places, each subsystem has its own ID. * * 0-19 config/management * 20-39 internal threads * 40-59 transports * 60-69 crypto implementations */ #define KNET_SUB_COMMON 0 /* common.c */ #define KNET_SUB_HANDLE 1 /* handle.c alloc/dealloc config changes */ #define KNET_SUB_HOST 2 /* host add/del/modify */ #define KNET_SUB_LISTENER 3 /* listeners add/del/modify... */ #define KNET_SUB_LINK 4 /* link add/del/modify */ #define KNET_SUB_TRANSPORT 5 /* Transport common */ #define KNET_SUB_CRYPTO 6 /* crypto.c config generic layer */ #define KNET_SUB_COMPRESS 7 /* compress.c config generic layer */ #define KNET_SUB_FILTER 19 /* allocated for users to log from dst_filter */ #define KNET_SUB_DSTCACHE 20 /* switching thread (destination cache handling) */ #define KNET_SUB_HEARTBEAT 21 /* heartbeat thread */ #define KNET_SUB_PMTUD 22 /* Path MTU Discovery thread */ #define KNET_SUB_TX 23 /* send to link thread */ #define KNET_SUB_RX 24 /* recv from link thread */ #define KNET_SUB_TRANSP_BASE 40 /* Base log level for transports */ #define KNET_SUB_TRANSP_LOOPBACK (KNET_SUB_TRANSP_BASE + KNET_TRANSPORT_LOOPBACK) #define KNET_SUB_TRANSP_UDP (KNET_SUB_TRANSP_BASE + KNET_TRANSPORT_UDP) #define KNET_SUB_TRANSP_SCTP (KNET_SUB_TRANSP_BASE + KNET_TRANSPORT_SCTP) #define KNET_SUB_NSSCRYPTO 60 /* nsscrypto.c */ #define KNET_SUB_OPENSSLCRYPTO 61 /* opensslcrypto.c */ #define KNET_SUB_ZLIBCOMP 70 /* compress_zlib.c */ #define KNET_SUB_LZ4COMP 71 /* compress_lz4.c */ #define KNET_SUB_LZ4HCCOMP 72 /* compress_lz4.c */ #define KNET_SUB_LZO2COMP 73 /* compress_lzo.c */ #define KNET_SUB_LZMACOMP 74 /* compress_lzma.c */ #define KNET_SUB_BZIP2COMP 75 /* compress_bzip2.c */ #define KNET_SUB_ZSTDCOMP 76 /* compress_zstd.c */ #define KNET_SUB_UNKNOWN UINT8_MAX - 1 #define KNET_MAX_SUBSYSTEMS UINT8_MAX /* * Convert between subsystem IDs and names */ /** * knet_log_get_subsystem_name * * @brief Get a logging system name from its numeric ID * * @return * returns internal name of the subsystem or "common" */ const char *knet_log_get_subsystem_name(uint8_t subsystem); /** * knet_log_get_subsystem_id * * @brief Get a logging system ID from its name * * @return * returns internal ID of the subsystem or KNET_SUB_COMMON */ uint8_t knet_log_get_subsystem_id(const char *name); /* * 4 log levels are enough for everybody */ #define KNET_LOG_ERR 0 /* unrecoverable errors/conditions */ #define KNET_LOG_WARN 1 /* recoverable errors/conditions */ #define KNET_LOG_INFO 2 /* info, link up/down, config changes.. */ #define KNET_LOG_DEBUG 3 /* * Convert between log level values and names */ /** * knet_log_get_loglevel_name * * @brief Get a logging level name from its numeric ID * * @return * returns internal name of the log level or "ERROR" for unknown values */ const char *knet_log_get_loglevel_name(uint8_t level); /** * knet_log_get_loglevel_id * * @brief Get a logging level ID from its name * * @return * returns internal log level ID or KNET_LOG_ERR for invalid names */ uint8_t knet_log_get_loglevel_id(const char *name); /* * every log message is composed by a text message * and message level/subsystem IDs. * In order to make debugging easier it is possible to send those packets * straight to stdout/stderr (see knet_bench.c stdout option). */ #define KNET_MAX_LOG_MSG_SIZE 254 #if KNET_MAX_LOG_MSG_SIZE > PIPE_BUF #error KNET_MAX_LOG_MSG_SIZE cannot be bigger than PIPE_BUF for guaranteed system atomic writes #endif struct knet_log_msg { char msg[KNET_MAX_LOG_MSG_SIZE]; uint8_t subsystem; /* KNET_SUB_* */ uint8_t msglevel; /* KNET_LOG_* */ }; /** * knet_log_set_loglevel * * @brief Set the logging level for a subsystem * * knet_h - same as above * * subsystem - same as above * * level - same as above * * knet_log_set_loglevel allows fine control of log levels by subsystem. * See also knet_handle_new for defaults. * * @return * knet_log_set_loglevel returns * 0 on success * -1 on error and errno is set. */ int knet_log_set_loglevel(knet_handle_t knet_h, uint8_t subsystem, uint8_t level); /** * knet_log_get_loglevel * * @brief Get the logging level for a subsystem * * knet_h - same as above * * subsystem - same as above * * level - same as above * * @return * knet_log_get_loglevel returns * 0 on success * -1 on error and errno is set. */ int knet_log_get_loglevel(knet_handle_t knet_h, uint8_t subsystem, uint8_t *level); #endif diff --git a/libknet/links.c b/libknet/links.c index cd18d9cc..f7eccc3f 100644 --- a/libknet/links.c +++ b/libknet/links.c @@ -1,1519 +1,1519 @@ /* * Copyright (C) 2012-2019 Red Hat, Inc. All rights reserved. * * Authors: Fabio M. Di Nitto * Federico Simoncelli * * This software licensed under LGPL-2.0+ */ #include "config.h" #include #include #include #include #include "internals.h" #include "logging.h" #include "links.h" #include "transports.h" #include "host.h" #include "threads_common.h" #include "links_acl.h" int _link_updown(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id, unsigned int enabled, unsigned int connected) { struct knet_link *link = &knet_h->host_index[host_id]->link[link_id]; if ((link->status.enabled == enabled) && (link->status.connected == connected)) return 0; link->status.enabled = enabled; link->status.connected = connected; _host_dstcache_update_async(knet_h, knet_h->host_index[host_id]); if ((link->status.dynconnected) && (!link->status.connected)) link->status.dynconnected = 0; if (connected) { time(&link->status.stats.last_up_times[link->status.stats.last_up_time_index]); link->status.stats.up_count++; - if (++link->status.stats.last_up_time_index > MAX_LINK_EVENTS) { + if (++link->status.stats.last_up_time_index >= MAX_LINK_EVENTS) { link->status.stats.last_up_time_index = 0; } } else { time(&link->status.stats.last_down_times[link->status.stats.last_down_time_index]); link->status.stats.down_count++; - if (++link->status.stats.last_down_time_index > MAX_LINK_EVENTS) { + if (++link->status.stats.last_down_time_index >= MAX_LINK_EVENTS) { link->status.stats.last_down_time_index = 0; } } return 0; } void _link_clear_stats(knet_handle_t knet_h) { struct knet_host *host; struct knet_link *link; uint32_t host_id; uint8_t link_id; for (host_id = 0; host_id < KNET_MAX_HOST; host_id++) { host = knet_h->host_index[host_id]; if (!host) { continue; } for (link_id = 0; link_id < KNET_MAX_LINK; link_id++) { link = &host->link[link_id]; memset(&link->status.stats, 0, sizeof(struct knet_link_stats)); } } } int knet_link_set_config(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id, uint8_t transport, struct sockaddr_storage *src_addr, struct sockaddr_storage *dst_addr, uint64_t flags) { int savederrno = 0, err = 0, i; struct knet_host *host; struct knet_link *link; if (!knet_h) { errno = EINVAL; return -1; } if (link_id >= KNET_MAX_LINK) { errno = EINVAL; return -1; } if (!src_addr) { errno = EINVAL; return -1; } if (dst_addr && (src_addr->ss_family != dst_addr->ss_family)) { log_err(knet_h, KNET_SUB_LINK, "Source address family does not match destination address family"); errno = EINVAL; return -1; } if (transport >= KNET_MAX_TRANSPORTS) { errno = EINVAL; return -1; } savederrno = get_global_wrlock(knet_h); if (savederrno) { log_err(knet_h, KNET_SUB_LINK, "Unable to get write lock: %s", strerror(savederrno)); errno = savederrno; return -1; } if (transport == KNET_TRANSPORT_LOOPBACK && knet_h->host_id != host_id) { log_err(knet_h, KNET_SUB_LINK, "Cannot create loopback link to remote node"); err = -1; savederrno = EINVAL; goto exit_unlock; } if (knet_h->host_id == host_id && knet_h->has_loop_link) { log_err(knet_h, KNET_SUB_LINK, "Cannot create more than 1 link when loopback is active"); err = -1; savederrno = EINVAL; goto exit_unlock; } host = knet_h->host_index[host_id]; if (!host) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_LINK, "Unable to find host %u: %s", host_id, strerror(savederrno)); goto exit_unlock; } if (transport == KNET_TRANSPORT_LOOPBACK && knet_h->host_id == host_id) { for (i=0; ilink[i].configured) { log_err(knet_h, KNET_SUB_LINK, "Cannot add loopback link when other links are already configured."); err = -1; savederrno = EINVAL; goto exit_unlock; } } } link = &host->link[link_id]; if (link->configured != 0) { err =-1; savederrno = EBUSY; log_err(knet_h, KNET_SUB_LINK, "Host %u link %u is currently configured: %s", host_id, link_id, strerror(savederrno)); goto exit_unlock; } if (link->status.enabled != 0) { err =-1; savederrno = EBUSY; log_err(knet_h, KNET_SUB_LINK, "Host %u link %u is currently in use: %s", host_id, link_id, strerror(savederrno)); goto exit_unlock; } memmove(&link->src_addr, src_addr, sizeof(struct sockaddr_storage)); err = knet_addrtostr(src_addr, sizeof(struct sockaddr_storage), link->status.src_ipaddr, KNET_MAX_HOST_LEN, link->status.src_port, KNET_MAX_PORT_LEN); if (err) { if (err == EAI_SYSTEM) { savederrno = errno; log_warn(knet_h, KNET_SUB_LINK, "Unable to resolve host: %u link: %u source addr/port: %s", host_id, link_id, strerror(savederrno)); } else { savederrno = EINVAL; log_warn(knet_h, KNET_SUB_LINK, "Unable to resolve host: %u link: %u source addr/port: %s", host_id, link_id, gai_strerror(err)); } err = -1; goto exit_unlock; } if (!dst_addr) { link->dynamic = KNET_LINK_DYNIP; } else { link->dynamic = KNET_LINK_STATIC; memmove(&link->dst_addr, dst_addr, sizeof(struct sockaddr_storage)); err = knet_addrtostr(dst_addr, sizeof(struct sockaddr_storage), link->status.dst_ipaddr, KNET_MAX_HOST_LEN, link->status.dst_port, KNET_MAX_PORT_LEN); if (err) { if (err == EAI_SYSTEM) { savederrno = errno; log_warn(knet_h, KNET_SUB_LINK, "Unable to resolve host: %u link: %u destination addr/port: %s", host_id, link_id, strerror(savederrno)); } else { savederrno = EINVAL; log_warn(knet_h, KNET_SUB_LINK, "Unable to resolve host: %u link: %u destination addr/port: %s", host_id, link_id, gai_strerror(err)); } err = -1; goto exit_unlock; } } link->pmtud_crypto_timeout_multiplier = KNET_LINK_PMTUD_CRYPTO_TIMEOUT_MULTIPLIER_MIN; link->pong_count = KNET_LINK_DEFAULT_PONG_COUNT; link->has_valid_mtu = 0; link->ping_interval = KNET_LINK_DEFAULT_PING_INTERVAL * 1000; /* microseconds */ link->pong_timeout = KNET_LINK_DEFAULT_PING_TIMEOUT * 1000; /* microseconds */ link->pong_timeout_backoff = KNET_LINK_PONG_TIMEOUT_BACKOFF; link->pong_timeout_adj = link->pong_timeout * link->pong_timeout_backoff; /* microseconds */ link->latency_fix = KNET_LINK_DEFAULT_PING_PRECISION; link->latency_exp = KNET_LINK_DEFAULT_PING_PRECISION - \ ((link->ping_interval * KNET_LINK_DEFAULT_PING_PRECISION) / 8000000); link->flags = flags; if (transport_link_set_config(knet_h, link, transport) < 0) { savederrno = errno; err = -1; goto exit_unlock; } /* * we can only configure default access lists if we know both endpoints * and the protocol uses GENERIC_ACL, otherwise the protocol has * to setup their own access lists above in transport_link_set_config. */ if ((transport_get_acl_type(knet_h, transport) == USE_GENERIC_ACL) && (link->dynamic == KNET_LINK_STATIC)) { log_debug(knet_h, KNET_SUB_LINK, "Configuring default access lists for host: %u link: %u socket: %d", host_id, link_id, link->outsock); if ((check_add(knet_h, link->outsock, transport, -1, &link->dst_addr, &link->dst_addr, CHECK_TYPE_ADDRESS, CHECK_ACCEPT) < 0) && (errno != EEXIST)) { log_warn(knet_h, KNET_SUB_LINK, "Failed to configure default access lists for host: %u link: %u", host_id, link_id); savederrno = errno; err = -1; goto exit_unlock; } } link->configured = 1; log_debug(knet_h, KNET_SUB_LINK, "host: %u link: %u is configured", host_id, link_id); if (transport == KNET_TRANSPORT_LOOPBACK) { knet_h->has_loop_link = 1; knet_h->loop_link = link_id; host->status.reachable = 1; link->status.mtu = KNET_PMTUD_SIZE_V6; } else { /* * calculate the minimum MTU that is safe to use, * based on RFCs and that each network device should * be able to support without any troubles */ if (link->dynamic == KNET_LINK_STATIC) { /* * with static link we can be more precise than using * the generic calc_min_mtu() */ switch (link->dst_addr.ss_family) { case AF_INET6: link->status.mtu = calc_max_data_outlen(knet_h, KNET_PMTUD_MIN_MTU_V6 - (KNET_PMTUD_OVERHEAD_V6 + link->proto_overhead)); break; case AF_INET: link->status.mtu = calc_max_data_outlen(knet_h, KNET_PMTUD_MIN_MTU_V4 - (KNET_PMTUD_OVERHEAD_V4 + link->proto_overhead)); break; } } else { /* * for dynamic links we start with the minimum MTU * possible and PMTUd will kick in immediately * after connection status is 1 */ link->status.mtu = calc_min_mtu(knet_h); } link->has_valid_mtu = 1; } exit_unlock: pthread_rwlock_unlock(&knet_h->global_rwlock); errno = err ? savederrno : 0; return err; } int knet_link_get_config(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id, uint8_t *transport, struct sockaddr_storage *src_addr, struct sockaddr_storage *dst_addr, uint8_t *dynamic, uint64_t *flags) { int savederrno = 0, err = 0; struct knet_host *host; struct knet_link *link; if (!knet_h) { errno = EINVAL; return -1; } if (link_id >= KNET_MAX_LINK) { errno = EINVAL; return -1; } if (!src_addr) { errno = EINVAL; return -1; } if (!dynamic) { errno = EINVAL; return -1; } if (!transport) { errno = EINVAL; return -1; } if (!flags) { errno = EINVAL; return -1; } savederrno = pthread_rwlock_rdlock(&knet_h->global_rwlock); if (savederrno) { log_err(knet_h, KNET_SUB_LINK, "Unable to get read lock: %s", strerror(savederrno)); errno = savederrno; return -1; } host = knet_h->host_index[host_id]; if (!host) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_LINK, "Unable to find host %u: %s", host_id, strerror(savederrno)); goto exit_unlock; } link = &host->link[link_id]; if (!link->configured) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_LINK, "host %u link %u is not configured: %s", host_id, link_id, strerror(savederrno)); goto exit_unlock; } if ((link->dynamic == KNET_LINK_STATIC) && (!dst_addr)) { savederrno = EINVAL; err = -1; goto exit_unlock; } memmove(src_addr, &link->src_addr, sizeof(struct sockaddr_storage)); *transport = link->transport; *flags = link->flags; if (link->dynamic == KNET_LINK_STATIC) { *dynamic = 0; memmove(dst_addr, &link->dst_addr, sizeof(struct sockaddr_storage)); } else { *dynamic = 1; } exit_unlock: pthread_rwlock_unlock(&knet_h->global_rwlock); errno = err ? savederrno : 0; return err; } int knet_link_clear_config(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id) { int savederrno = 0, err = 0; struct knet_host *host; struct knet_link *link; int sock; uint8_t transport; if (!knet_h) { errno = EINVAL; return -1; } if (link_id >= KNET_MAX_LINK) { errno = EINVAL; return -1; } savederrno = get_global_wrlock(knet_h); if (savederrno) { log_err(knet_h, KNET_SUB_LINK, "Unable to get write lock: %s", strerror(savederrno)); errno = savederrno; return -1; } host = knet_h->host_index[host_id]; if (!host) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_LINK, "Unable to find host %u: %s", host_id, strerror(savederrno)); goto exit_unlock; } link = &host->link[link_id]; if (link->configured != 1) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_LINK, "Host %u link %u is not configured: %s", host_id, link_id, strerror(savederrno)); goto exit_unlock; } if (link->status.enabled != 0) { err = -1; savederrno = EBUSY; log_err(knet_h, KNET_SUB_LINK, "Host %u link %u is currently in use: %s", host_id, link_id, strerror(savederrno)); goto exit_unlock; } /* * remove well known access lists here. * After the transport has done clearing the config, * then we can remove any leftover access lists if the link * is no longer in use. */ if ((transport_get_acl_type(knet_h, link->transport) == USE_GENERIC_ACL) && (link->dynamic == KNET_LINK_STATIC)) { if ((check_rm(knet_h, link->outsock, link->transport, &link->dst_addr, &link->dst_addr, CHECK_TYPE_ADDRESS, CHECK_ACCEPT) < 0) && (errno != ENOENT)) { err = -1; savederrno = errno; log_err(knet_h, KNET_SUB_LINK, "Host %u link %u: unable to remove default access list", host_id, link_id); goto exit_unlock; } } /* * cache it for later as we don't know if the transport * will clear link info during clear_config. */ sock = link->outsock; transport = link->transport; if ((transport_link_clear_config(knet_h, link) < 0) && (errno != EBUSY)) { savederrno = errno; err = -1; goto exit_unlock; } /* * remove any other access lists when the socket is no * longer in use by the transport. */ if ((transport_get_acl_type(knet_h, link->transport) == USE_GENERIC_ACL) && (knet_h->knet_transport_fd_tracker[sock].transport == KNET_MAX_TRANSPORTS)) { check_rmall(knet_h, sock, transport); } memset(link, 0, sizeof(struct knet_link)); link->link_id = link_id; if (knet_h->has_loop_link && host_id == knet_h->host_id && link_id == knet_h->loop_link) { knet_h->has_loop_link = 0; if (host->active_link_entries == 0) { host->status.reachable = 0; } } log_debug(knet_h, KNET_SUB_LINK, "host: %u link: %u config has been wiped", host_id, link_id); exit_unlock: pthread_rwlock_unlock(&knet_h->global_rwlock); errno = err ? savederrno : 0; return err; } int knet_link_set_enable(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id, unsigned int enabled) { int savederrno = 0, err = 0; struct knet_host *host; struct knet_link *link; if (!knet_h) { errno = EINVAL; return -1; } if (link_id >= KNET_MAX_LINK) { errno = EINVAL; return -1; } if (enabled > 1) { errno = EINVAL; return -1; } savederrno = get_global_wrlock(knet_h); if (savederrno) { log_err(knet_h, KNET_SUB_LINK, "Unable to get read lock: %s", strerror(savederrno)); errno = savederrno; return -1; } host = knet_h->host_index[host_id]; if (!host) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_LINK, "Unable to find host %u: %s", host_id, strerror(savederrno)); goto exit_unlock; } link = &host->link[link_id]; if (!link->configured) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_LINK, "host %u link %u is not configured: %s", host_id, link_id, strerror(savederrno)); goto exit_unlock; } if (link->status.enabled == enabled) { err = 0; goto exit_unlock; } err = _link_updown(knet_h, host_id, link_id, enabled, link->status.connected); savederrno = errno; if (enabled) { goto exit_unlock; } log_debug(knet_h, KNET_SUB_LINK, "host: %u link: %u is disabled", host_id, link_id); exit_unlock: pthread_rwlock_unlock(&knet_h->global_rwlock); errno = err ? savederrno : 0; return err; } int knet_link_get_enable(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id, unsigned int *enabled) { int savederrno = 0, err = 0; struct knet_host *host; struct knet_link *link; if (!knet_h) { errno = EINVAL; return -1; } if (link_id >= KNET_MAX_LINK) { errno = EINVAL; return -1; } if (!enabled) { errno = EINVAL; return -1; } savederrno = pthread_rwlock_rdlock(&knet_h->global_rwlock); if (savederrno) { log_err(knet_h, KNET_SUB_LINK, "Unable to get read lock: %s", strerror(savederrno)); errno = savederrno; return -1; } host = knet_h->host_index[host_id]; if (!host) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_LINK, "Unable to find host %u: %s", host_id, strerror(savederrno)); goto exit_unlock; } link = &host->link[link_id]; if (!link->configured) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_LINK, "host %u link %u is not configured: %s", host_id, link_id, strerror(savederrno)); goto exit_unlock; } *enabled = link->status.enabled; exit_unlock: pthread_rwlock_unlock(&knet_h->global_rwlock); errno = err ? savederrno : 0; return err; } int knet_link_set_pong_count(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id, uint8_t pong_count) { int savederrno = 0, err = 0; struct knet_host *host; struct knet_link *link; if (!knet_h) { errno = EINVAL; return -1; } if (link_id >= KNET_MAX_LINK) { errno = EINVAL; return -1; } if (pong_count < 1) { errno = EINVAL; return -1; } savederrno = get_global_wrlock(knet_h); if (savederrno) { log_err(knet_h, KNET_SUB_LINK, "Unable to get write lock: %s", strerror(savederrno)); errno = savederrno; return -1; } host = knet_h->host_index[host_id]; if (!host) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_LINK, "Unable to find host %u: %s", host_id, strerror(savederrno)); goto exit_unlock; } link = &host->link[link_id]; if (!link->configured) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_LINK, "host %u link %u is not configured: %s", host_id, link_id, strerror(savederrno)); goto exit_unlock; } link->pong_count = pong_count; log_debug(knet_h, KNET_SUB_LINK, "host: %u link: %u pong count update: %u", host_id, link_id, link->pong_count); exit_unlock: pthread_rwlock_unlock(&knet_h->global_rwlock); errno = err ? savederrno : 0; return err; } int knet_link_get_pong_count(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id, uint8_t *pong_count) { int savederrno = 0, err = 0; struct knet_host *host; struct knet_link *link; if (!knet_h) { errno = EINVAL; return -1; } if (link_id >= KNET_MAX_LINK) { errno = EINVAL; return -1; } if (!pong_count) { errno = EINVAL; return -1; } savederrno = pthread_rwlock_rdlock(&knet_h->global_rwlock); if (savederrno) { log_err(knet_h, KNET_SUB_LINK, "Unable to get read lock: %s", strerror(savederrno)); errno = savederrno; return -1; } host = knet_h->host_index[host_id]; if (!host) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_LINK, "Unable to find host %u: %s", host_id, strerror(savederrno)); goto exit_unlock; } link = &host->link[link_id]; if (!link->configured) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_LINK, "host %u link %u is not configured: %s", host_id, link_id, strerror(savederrno)); goto exit_unlock; } *pong_count = link->pong_count; exit_unlock: pthread_rwlock_unlock(&knet_h->global_rwlock); errno = err ? savederrno : 0; return err; } int knet_link_set_ping_timers(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id, time_t interval, time_t timeout, unsigned int precision) { int savederrno = 0, err = 0; struct knet_host *host; struct knet_link *link; if (!knet_h) { errno = EINVAL; return -1; } if (link_id >= KNET_MAX_LINK) { errno = EINVAL; return -1; } if (!interval) { errno = EINVAL; return -1; } if (!timeout) { errno = ENOSYS; return -1; } if (!precision) { errno = EINVAL; return -1; } savederrno = get_global_wrlock(knet_h); if (savederrno) { log_err(knet_h, KNET_SUB_LINK, "Unable to get write lock: %s", strerror(savederrno)); errno = savederrno; return -1; } host = knet_h->host_index[host_id]; if (!host) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_LINK, "Unable to find host %u: %s", host_id, strerror(savederrno)); goto exit_unlock; } link = &host->link[link_id]; if (!link->configured) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_LINK, "host %u link %u is not configured: %s", host_id, link_id, strerror(savederrno)); goto exit_unlock; } link->ping_interval = interval * 1000; /* microseconds */ link->pong_timeout = timeout * 1000; /* microseconds */ link->latency_fix = precision; link->latency_exp = precision - \ ((link->ping_interval * precision) / 8000000); log_debug(knet_h, KNET_SUB_LINK, "host: %u link: %u timeout update - interval: %llu timeout: %llu precision: %u", host_id, link_id, link->ping_interval, link->pong_timeout, precision); exit_unlock: pthread_rwlock_unlock(&knet_h->global_rwlock); errno = err ? savederrno : 0; return err; } int knet_link_get_ping_timers(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id, time_t *interval, time_t *timeout, unsigned int *precision) { int savederrno = 0, err = 0; struct knet_host *host; struct knet_link *link; if (!knet_h) { errno = EINVAL; return -1; } if (link_id >= KNET_MAX_LINK) { errno = EINVAL; return -1; } if (!interval) { errno = EINVAL; return -1; } if (!timeout) { errno = EINVAL; return -1; } if (!precision) { errno = EINVAL; return -1; } savederrno = pthread_rwlock_rdlock(&knet_h->global_rwlock); if (savederrno) { log_err(knet_h, KNET_SUB_LINK, "Unable to get read lock: %s", strerror(savederrno)); errno = savederrno; return -1; } host = knet_h->host_index[host_id]; if (!host) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_LINK, "Unable to find host %u: %s", host_id, strerror(savederrno)); goto exit_unlock; } link = &host->link[link_id]; if (!link->configured) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_LINK, "host %u link %u is not configured: %s", host_id, link_id, strerror(savederrno)); goto exit_unlock; } *interval = link->ping_interval / 1000; /* microseconds */ *timeout = link->pong_timeout / 1000; *precision = link->latency_fix; exit_unlock: pthread_rwlock_unlock(&knet_h->global_rwlock); errno = err ? savederrno : 0; return err; } int knet_link_set_priority(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id, uint8_t priority) { int savederrno = 0, err = 0; struct knet_host *host; struct knet_link *link; uint8_t old_priority; if (!knet_h) { errno = EINVAL; return -1; } if (link_id >= KNET_MAX_LINK) { errno = EINVAL; return -1; } savederrno = get_global_wrlock(knet_h); if (savederrno) { log_err(knet_h, KNET_SUB_LINK, "Unable to get write lock: %s", strerror(savederrno)); errno = savederrno; return -1; } host = knet_h->host_index[host_id]; if (!host) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_LINK, "Unable to find host %u: %s", host_id, strerror(savederrno)); goto exit_unlock; } link = &host->link[link_id]; if (!link->configured) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_LINK, "host %u link %u is not configured: %s", host_id, link_id, strerror(savederrno)); goto exit_unlock; } old_priority = link->priority; if (link->priority == priority) { err = 0; goto exit_unlock; } link->priority = priority; if (_host_dstcache_update_sync(knet_h, host)) { savederrno = errno; log_debug(knet_h, KNET_SUB_LINK, "Unable to update link priority (host: %u link: %u priority: %u): %s", host_id, link_id, link->priority, strerror(savederrno)); link->priority = old_priority; err = -1; goto exit_unlock; } log_debug(knet_h, KNET_SUB_LINK, "host: %u link: %u priority set to: %u", host_id, link_id, link->priority); exit_unlock: pthread_rwlock_unlock(&knet_h->global_rwlock); errno = err ? savederrno : 0; return err; } int knet_link_get_priority(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id, uint8_t *priority) { int savederrno = 0, err = 0; struct knet_host *host; struct knet_link *link; if (!knet_h) { errno = EINVAL; return -1; } if (link_id >= KNET_MAX_LINK) { errno = EINVAL; return -1; } if (!priority) { errno = EINVAL; return -1; } savederrno = pthread_rwlock_rdlock(&knet_h->global_rwlock); if (savederrno) { log_err(knet_h, KNET_SUB_LINK, "Unable to get read lock: %s", strerror(savederrno)); errno = savederrno; return -1; } host = knet_h->host_index[host_id]; if (!host) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_LINK, "Unable to find host %u: %s", host_id, strerror(savederrno)); goto exit_unlock; } link = &host->link[link_id]; if (!link->configured) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_LINK, "host %u link %u is not configured: %s", host_id, link_id, strerror(savederrno)); goto exit_unlock; } *priority = link->priority; exit_unlock: pthread_rwlock_unlock(&knet_h->global_rwlock); errno = err ? savederrno : 0; return err; } int knet_link_get_link_list(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t *link_ids, size_t *link_ids_entries) { int savederrno = 0, err = 0, i, count = 0; struct knet_host *host; struct knet_link *link; if (!knet_h) { errno = EINVAL; return -1; } if (!link_ids) { errno = EINVAL; return -1; } if (!link_ids_entries) { errno = EINVAL; return -1; } savederrno = pthread_rwlock_rdlock(&knet_h->global_rwlock); if (savederrno) { log_err(knet_h, KNET_SUB_LINK, "Unable to get read lock: %s", strerror(savederrno)); errno = savederrno; return -1; } host = knet_h->host_index[host_id]; if (!host) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_LINK, "Unable to find host %u: %s", host_id, strerror(savederrno)); goto exit_unlock; } for (i = 0; i < KNET_MAX_LINK; i++) { link = &host->link[i]; if (!link->configured) { continue; } link_ids[count] = i; count++; } *link_ids_entries = count; exit_unlock: pthread_rwlock_unlock(&knet_h->global_rwlock); errno = err ? savederrno : 0; return err; } int knet_link_get_status(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id, struct knet_link_status *status, size_t struct_size) { int savederrno = 0, err = 0; struct knet_host *host; struct knet_link *link; if (!knet_h) { errno = EINVAL; return -1; } if (link_id >= KNET_MAX_LINK) { errno = EINVAL; return -1; } if (!status) { errno = EINVAL; return -1; } savederrno = get_global_wrlock(knet_h); if (savederrno) { log_err(knet_h, KNET_SUB_LINK, "Unable to get read lock: %s", strerror(savederrno)); errno = savederrno; return -1; } host = knet_h->host_index[host_id]; if (!host) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_LINK, "Unable to find host %u: %s", host_id, strerror(savederrno)); goto exit_unlock; } link = &host->link[link_id]; if (!link->configured) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_LINK, "host %u link %u is not configured: %s", host_id, link_id, strerror(savederrno)); goto exit_unlock; } memmove(status, &link->status, struct_size); /* Calculate totals - no point in doing this on-the-fly */ status->stats.rx_total_packets = status->stats.rx_data_packets + status->stats.rx_ping_packets + status->stats.rx_pong_packets + status->stats.rx_pmtu_packets; status->stats.tx_total_packets = status->stats.tx_data_packets + status->stats.tx_ping_packets + status->stats.tx_pong_packets + status->stats.tx_pmtu_packets; status->stats.rx_total_bytes = status->stats.rx_data_bytes + status->stats.rx_ping_bytes + status->stats.rx_pong_bytes + status->stats.rx_pmtu_bytes; status->stats.tx_total_bytes = status->stats.tx_data_bytes + status->stats.tx_ping_bytes + status->stats.tx_pong_bytes + status->stats.tx_pmtu_bytes; status->stats.tx_total_errors = status->stats.tx_data_errors + status->stats.tx_ping_errors + status->stats.tx_pong_errors + status->stats.tx_pmtu_errors; status->stats.tx_total_retries = status->stats.tx_data_retries + status->stats.tx_ping_retries + status->stats.tx_pong_retries + status->stats.tx_pmtu_retries; /* Tell the caller our full size in case they have an old version */ status->size = sizeof(struct knet_link_status); exit_unlock: pthread_rwlock_unlock(&knet_h->global_rwlock); errno = err ? savederrno : 0; return err; } int knet_link_add_acl(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id, struct sockaddr_storage *ss1, struct sockaddr_storage *ss2, check_type_t type, check_acceptreject_t acceptreject) { int savederrno = 0, err = 0; struct knet_host *host; struct knet_link *link; if (!knet_h) { errno = EINVAL; return -1; } if (!ss1) { errno = EINVAL; return -1; } if ((type != CHECK_TYPE_ADDRESS) && (type != CHECK_TYPE_MASK) && (type != CHECK_TYPE_RANGE)) { errno = EINVAL; return -1; } if ((acceptreject != CHECK_ACCEPT) && (acceptreject != CHECK_REJECT)) { errno = EINVAL; return -1; } if ((type != CHECK_TYPE_ADDRESS) && (!ss2)) { errno = EINVAL; return -1; } if ((type == CHECK_TYPE_RANGE) && (ss1->ss_family != ss2->ss_family)) { errno = EINVAL; return -1; } if (link_id >= KNET_MAX_LINK) { errno = EINVAL; return -1; } savederrno = get_global_wrlock(knet_h); if (savederrno) { log_err(knet_h, KNET_SUB_LINK, "Unable to get write lock: %s", strerror(savederrno)); errno = savederrno; return -1; } host = knet_h->host_index[host_id]; if (!host) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_LINK, "Unable to find host %u: %s", host_id, strerror(savederrno)); goto exit_unlock; } link = &host->link[link_id]; if (!link->configured) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_LINK, "host %u link %u is not configured: %s", host_id, link_id, strerror(savederrno)); goto exit_unlock; } if (link->dynamic != KNET_LINK_DYNIP) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_LINK, "host %u link %u is a point to point connection: %s", host_id, link_id, strerror(savederrno)); goto exit_unlock; } err = check_add(knet_h, transport_link_get_acl_fd(knet_h, link), link->transport, -1, ss1, ss2, type, acceptreject); savederrno = errno; exit_unlock: pthread_rwlock_unlock(&knet_h->global_rwlock); errno = savederrno; return err; } int knet_link_insert_acl(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id, int index, struct sockaddr_storage *ss1, struct sockaddr_storage *ss2, check_type_t type, check_acceptreject_t acceptreject) { int savederrno = 0, err = 0; struct knet_host *host; struct knet_link *link; if (!knet_h) { errno = EINVAL; return -1; } if (!ss1) { errno = EINVAL; return -1; } if ((type != CHECK_TYPE_ADDRESS) && (type != CHECK_TYPE_MASK) && (type != CHECK_TYPE_RANGE)) { errno = EINVAL; return -1; } if ((acceptreject != CHECK_ACCEPT) && (acceptreject != CHECK_REJECT)) { errno = EINVAL; return -1; } if ((type != CHECK_TYPE_ADDRESS) && (!ss2)) { errno = EINVAL; return -1; } if ((type == CHECK_TYPE_RANGE) && (ss1->ss_family != ss2->ss_family)) { errno = EINVAL; return -1; } if (link_id >= KNET_MAX_LINK) { errno = EINVAL; return -1; } savederrno = get_global_wrlock(knet_h); if (savederrno) { log_err(knet_h, KNET_SUB_LINK, "Unable to get write lock: %s", strerror(savederrno)); errno = savederrno; return -1; } host = knet_h->host_index[host_id]; if (!host) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_LINK, "Unable to find host %u: %s", host_id, strerror(savederrno)); goto exit_unlock; } link = &host->link[link_id]; if (!link->configured) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_LINK, "host %u link %u is not configured: %s", host_id, link_id, strerror(savederrno)); goto exit_unlock; } if (link->dynamic != KNET_LINK_DYNIP) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_LINK, "host %u link %u is a point to point connection: %s", host_id, link_id, strerror(savederrno)); goto exit_unlock; } err = check_add(knet_h, transport_link_get_acl_fd(knet_h, link), link->transport, index, ss1, ss2, type, acceptreject); savederrno = errno; exit_unlock: pthread_rwlock_unlock(&knet_h->global_rwlock); errno = savederrno; return err; } int knet_link_rm_acl(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id, struct sockaddr_storage *ss1, struct sockaddr_storage *ss2, check_type_t type, check_acceptreject_t acceptreject) { int savederrno = 0, err = 0; struct knet_host *host; struct knet_link *link; if (!knet_h) { errno = EINVAL; return -1; } if (!ss1) { errno = EINVAL; return -1; } if ((type != CHECK_TYPE_ADDRESS) && (type != CHECK_TYPE_MASK) && (type != CHECK_TYPE_RANGE)) { errno = EINVAL; return -1; } if ((acceptreject != CHECK_ACCEPT) && (acceptreject != CHECK_REJECT)) { errno = EINVAL; return -1; } if ((type != CHECK_TYPE_ADDRESS) && (!ss2)) { errno = EINVAL; return -1; } if ((type == CHECK_TYPE_RANGE) && (ss1->ss_family != ss2->ss_family)) { errno = EINVAL; return -1; } if (link_id >= KNET_MAX_LINK) { errno = EINVAL; return -1; } savederrno = get_global_wrlock(knet_h); if (savederrno) { log_err(knet_h, KNET_SUB_LINK, "Unable to get write lock: %s", strerror(savederrno)); errno = savederrno; return -1; } host = knet_h->host_index[host_id]; if (!host) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_LINK, "Unable to find host %u: %s", host_id, strerror(savederrno)); goto exit_unlock; } link = &host->link[link_id]; if (!link->configured) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_LINK, "host %u link %u is not configured: %s", host_id, link_id, strerror(savederrno)); goto exit_unlock; } if (link->dynamic != KNET_LINK_DYNIP) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_LINK, "host %u link %u is a point to point connection: %s", host_id, link_id, strerror(savederrno)); goto exit_unlock; } err = check_rm(knet_h, transport_link_get_acl_fd(knet_h, link), link->transport, ss1, ss2, type, acceptreject); savederrno = errno; exit_unlock: pthread_rwlock_unlock(&knet_h->global_rwlock); errno = savederrno; return err; } int knet_link_clear_acl(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id) { int savederrno = 0, err = 0; struct knet_host *host; struct knet_link *link; if (!knet_h) { errno = EINVAL; return -1; } if (link_id >= KNET_MAX_LINK) { errno = EINVAL; return -1; } savederrno = get_global_wrlock(knet_h); if (savederrno) { log_err(knet_h, KNET_SUB_LINK, "Unable to get write lock: %s", strerror(savederrno)); errno = savederrno; return -1; } host = knet_h->host_index[host_id]; if (!host) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_LINK, "Unable to find host %u: %s", host_id, strerror(savederrno)); goto exit_unlock; } link = &host->link[link_id]; if (!link->configured) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_LINK, "host %u link %u is not configured: %s", host_id, link_id, strerror(savederrno)); goto exit_unlock; } if (link->dynamic != KNET_LINK_DYNIP) { err = -1; savederrno = EINVAL; log_err(knet_h, KNET_SUB_LINK, "host %u link %u is a point to point connection: %s", host_id, link_id, strerror(savederrno)); goto exit_unlock; } check_rmall(knet_h, transport_link_get_acl_fd(knet_h, link), link->transport); exit_unlock: pthread_rwlock_unlock(&knet_h->global_rwlock); errno = savederrno; return err; } diff --git a/libknet/links_acl_ip.c b/libknet/links_acl_ip.c index e479bbdb..4692afa5 100644 --- a/libknet/links_acl_ip.c +++ b/libknet/links_acl_ip.c @@ -1,305 +1,310 @@ /* * Copyright (C) 2016-2019 Red Hat, Inc. All rights reserved. * * Author: Christine Caulfield * * This software licensed under LGPL-2.0+ */ #include "config.h" #include #include #include #include #include #include #include "internals.h" #include "logging.h" #include "transports.h" #include "links_acl.h" #include "links_acl_ip.h" struct ip_acl_match_entry { check_type_t type; check_acceptreject_t acceptreject; struct sockaddr_storage addr1; /* Actual IP address, mask top or low IP */ struct sockaddr_storage addr2; /* high IP address or address bitmask */ struct ip_acl_match_entry *next; }; /* * s6_addr32 is not defined in BSD userland, only kernel. * definition is the same as linux and it works fine for * what we need. */ #ifndef s6_addr32 #define s6_addr32 __u6_addr.__u6_addr32 #endif /* * IPv4 See if the address we have matches the current match entry */ static int ip_matches_v4(struct sockaddr_storage *checkip, struct ip_acl_match_entry *match_entry) { struct sockaddr_in *ip_to_check; struct sockaddr_in *match1; struct sockaddr_in *match2; ip_to_check = (struct sockaddr_in *)checkip; match1 = (struct sockaddr_in *)&match_entry->addr1; match2 = (struct sockaddr_in *)&match_entry->addr2; switch(match_entry->type) { case CHECK_TYPE_ADDRESS: if (ip_to_check->sin_addr.s_addr == match1->sin_addr.s_addr) return 1; break; case CHECK_TYPE_MASK: if ((ip_to_check->sin_addr.s_addr & match2->sin_addr.s_addr) == match1->sin_addr.s_addr) return 1; break; case CHECK_TYPE_RANGE: if ((ntohl(ip_to_check->sin_addr.s_addr) >= ntohl(match1->sin_addr.s_addr)) && (ntohl(ip_to_check->sin_addr.s_addr) <= ntohl(match2->sin_addr.s_addr))) return 1; break; } return 0; } /* * Compare two IPv6 addresses */ static int ip6addr_cmp(struct in6_addr *a, struct in6_addr *b) { uint64_t a_high, a_low; uint64_t b_high, b_low; a_high = ((uint64_t)htonl(a->s6_addr32[0]) << 32) | (uint64_t)htonl(a->s6_addr32[1]); a_low = ((uint64_t)htonl(a->s6_addr32[2]) << 32) | (uint64_t)htonl(a->s6_addr32[3]); b_high = ((uint64_t)htonl(b->s6_addr32[0]) << 32) | (uint64_t)htonl(b->s6_addr32[1]); b_low = ((uint64_t)htonl(b->s6_addr32[2]) << 32) | (uint64_t)htonl(b->s6_addr32[3]); if (a_high > b_high) return 1; if (a_high < b_high) return -1; if (a_low > b_low) return 1; if (a_low < b_low) return -1; return 0; } /* * IPv6 See if the address we have matches the current match entry */ static int ip_matches_v6(struct sockaddr_storage *checkip, struct ip_acl_match_entry *match_entry) { struct sockaddr_in6 *ip_to_check; struct sockaddr_in6 *match1; struct sockaddr_in6 *match2; int i; ip_to_check = (struct sockaddr_in6 *)checkip; match1 = (struct sockaddr_in6 *)&match_entry->addr1; match2 = (struct sockaddr_in6 *)&match_entry->addr2; switch(match_entry->type) { case CHECK_TYPE_ADDRESS: if (!memcmp(ip_to_check->sin6_addr.s6_addr32, match1->sin6_addr.s6_addr32, sizeof(struct in6_addr))) return 1; break; case CHECK_TYPE_MASK: /* * Note that this little loop will quit early if there is a non-match so the * comparison might look backwards compared to the IPv4 one */ for (i=sizeof(struct in6_addr)/4-1; i>=0; i--) { if ((ip_to_check->sin6_addr.s6_addr32[i] & match2->sin6_addr.s6_addr32[i]) != match1->sin6_addr.s6_addr32[i]) return 0; } return 1; case CHECK_TYPE_RANGE: if ((ip6addr_cmp(&ip_to_check->sin6_addr, &match1->sin6_addr) >= 0) && (ip6addr_cmp(&ip_to_check->sin6_addr, &match2->sin6_addr) <= 0)) return 1; break; } return 0; } int ipcheck_validate(void *fd_tracker_match_entry_head, struct sockaddr_storage *checkip) { struct ip_acl_match_entry **match_entry_head = (struct ip_acl_match_entry **)fd_tracker_match_entry_head; struct ip_acl_match_entry *match_entry = *match_entry_head; int (*match_fn)(struct sockaddr_storage *checkip, struct ip_acl_match_entry *match_entry); if (checkip->ss_family == AF_INET){ match_fn = ip_matches_v4; } else { match_fn = ip_matches_v6; } while (match_entry) { if (match_fn(checkip, match_entry)) { if (match_entry->acceptreject == CHECK_ACCEPT) return 1; else return 0; } match_entry = match_entry->next; } return 0; /* Default reject */ } /* * Routines to manuipulate access lists */ void ipcheck_rmall(void *fd_tracker_match_entry_head) { struct ip_acl_match_entry **match_entry_head = (struct ip_acl_match_entry **)fd_tracker_match_entry_head; struct ip_acl_match_entry *next_match_entry; struct ip_acl_match_entry *match_entry = *match_entry_head; while (match_entry) { next_match_entry = match_entry->next; free(match_entry); match_entry = next_match_entry; } *match_entry_head = NULL; } static struct ip_acl_match_entry *ipcheck_findmatch(struct ip_acl_match_entry **match_entry_head, struct sockaddr_storage *ss1, struct sockaddr_storage *ss2, check_type_t type, check_acceptreject_t acceptreject) { struct ip_acl_match_entry *match_entry = *match_entry_head; while (match_entry) { if ((!memcmp(&match_entry->addr1, ss1, sizeof(struct sockaddr_storage))) && (!memcmp(&match_entry->addr2, ss2, sizeof(struct sockaddr_storage))) && (match_entry->type == type) && (match_entry->acceptreject == acceptreject)) { return match_entry; } match_entry = match_entry->next; } return NULL; } int ipcheck_rmip(void *fd_tracker_match_entry_head, struct sockaddr_storage *ss1, struct sockaddr_storage *ss2, check_type_t type, check_acceptreject_t acceptreject) { struct ip_acl_match_entry **match_entry_head = (struct ip_acl_match_entry **)fd_tracker_match_entry_head; struct ip_acl_match_entry *next_match_entry = NULL; struct ip_acl_match_entry *rm_match_entry; struct ip_acl_match_entry *match_entry = *match_entry_head; rm_match_entry = ipcheck_findmatch(match_entry_head, ss1, ss2, type, acceptreject); if (!rm_match_entry) { errno = ENOENT; return -1; } while (match_entry) { next_match_entry = match_entry->next; /* * we are removing the list head, be careful */ if (rm_match_entry == match_entry) { *match_entry_head = next_match_entry; free(match_entry); break; } /* * the next one is the one we need to remove */ if (rm_match_entry == next_match_entry) { match_entry->next = next_match_entry->next; free(next_match_entry); break; } match_entry = next_match_entry; } return 0; } int ipcheck_addip(void *fd_tracker_match_entry_head, int index, struct sockaddr_storage *ss1, struct sockaddr_storage *ss2, check_type_t type, check_acceptreject_t acceptreject) { struct ip_acl_match_entry **match_entry_head = (struct ip_acl_match_entry **)fd_tracker_match_entry_head; struct ip_acl_match_entry *new_match_entry; struct ip_acl_match_entry *match_entry = *match_entry_head; int i = 0; if (ipcheck_findmatch(match_entry_head, ss1, ss2, type, acceptreject) != NULL) { errno = EEXIST; return -1; } new_match_entry = malloc(sizeof(struct ip_acl_match_entry)); if (!new_match_entry) { return -1; } memmove(&new_match_entry->addr1, ss1, sizeof(struct sockaddr_storage)); memmove(&new_match_entry->addr2, ss2, sizeof(struct sockaddr_storage)); new_match_entry->type = type; new_match_entry->acceptreject = acceptreject; new_match_entry->next = NULL; if (match_entry) { /* * special case for index 0, since we need to update * the head of the list */ if (index == 0) { *match_entry_head = new_match_entry; new_match_entry->next = match_entry; } else { /* * find the end of the list or stop at "index" */ - while ((match_entry->next) || (i < index)) { + + while (match_entry->next) { match_entry = match_entry->next; + if (i == index) { + break; + } i++; } + /* * insert if there are more entries in the list */ if (match_entry->next) { new_match_entry->next = match_entry->next; } /* * add if we are at the end */ match_entry->next = new_match_entry; } } else { /* * first entry in the list */ *match_entry_head = new_match_entry; } return 0; } diff --git a/libknet/logging.c b/libknet/logging.c index 2efee1b9..378eef59 100644 --- a/libknet/logging.c +++ b/libknet/logging.c @@ -1,248 +1,248 @@ /* * Copyright (C) 2010-2019 Red Hat, Inc. All rights reserved. * * Author: Fabio M. Di Nitto * * This software licensed under LGPL-2.0+ */ #include "config.h" #include #include #include #include #include #include #include #include "internals.h" #include "logging.h" #include "threads_common.h" -static struct pretty_names subsystem_names[] = +static struct pretty_names subsystem_names[KNET_MAX_SUBSYSTEMS] = { { "common", KNET_SUB_COMMON }, { "handle", KNET_SUB_HANDLE }, { "host", KNET_SUB_HOST }, { "listener", KNET_SUB_LISTENER }, { "link", KNET_SUB_LINK }, { "transport", KNET_SUB_TRANSPORT }, { "crypto", KNET_SUB_CRYPTO }, { "compress", KNET_SUB_COMPRESS }, { "filter", KNET_SUB_FILTER }, { "dstcache", KNET_SUB_DSTCACHE }, { "heartbeat", KNET_SUB_HEARTBEAT }, { "pmtud", KNET_SUB_PMTUD }, { "tx", KNET_SUB_TX }, { "rx", KNET_SUB_RX }, { "loopback", KNET_SUB_TRANSP_LOOPBACK }, { "udp", KNET_SUB_TRANSP_UDP }, { "sctp", KNET_SUB_TRANSP_SCTP }, { "nsscrypto", KNET_SUB_NSSCRYPTO }, { "opensslcrypto", KNET_SUB_OPENSSLCRYPTO }, { "zlibcomp", KNET_SUB_ZLIBCOMP }, { "lz4comp", KNET_SUB_LZ4COMP }, { "lz4hccomp", KNET_SUB_LZ4HCCOMP }, { "lzo2comp", KNET_SUB_LZO2COMP }, { "lzmacomp", KNET_SUB_LZMACOMP }, { "bzip2comp", KNET_SUB_BZIP2COMP }, { "zstdcomp", KNET_SUB_ZSTDCOMP }, { "unknown", KNET_SUB_UNKNOWN } /* unknown MUST always be last in this array */ }; const char *knet_log_get_subsystem_name(uint8_t subsystem) { unsigned int i; for (i = 0; i < KNET_MAX_SUBSYSTEMS; i++) { if (subsystem_names[i].val == KNET_SUB_UNKNOWN) { break; } if (subsystem_names[i].val == subsystem) { errno = 0; return subsystem_names[i].name; } } return "unknown"; } uint8_t knet_log_get_subsystem_id(const char *name) { unsigned int i; for (i = 0; i < KNET_MAX_SUBSYSTEMS; i++) { if (subsystem_names[i].val == KNET_SUB_UNKNOWN) { break; } if (strcasecmp(name, subsystem_names[i].name) == 0) { errno = 0; return subsystem_names[i].val; } } return KNET_SUB_UNKNOWN; } static int is_valid_subsystem(uint8_t subsystem) { unsigned int i; for (i = 0; i < KNET_MAX_SUBSYSTEMS; i++) { if ((subsystem != KNET_SUB_UNKNOWN) && (subsystem_names[i].val == KNET_SUB_UNKNOWN)) { break; } if (subsystem_names[i].val == subsystem) { return 0; } } return -1; } -static struct pretty_names loglevel_names[] = +static struct pretty_names loglevel_names[KNET_LOG_DEBUG + 1] = { { "ERROR", KNET_LOG_ERR }, { "WARNING", KNET_LOG_WARN }, { "info", KNET_LOG_INFO }, { "debug", KNET_LOG_DEBUG } }; const char *knet_log_get_loglevel_name(uint8_t level) { unsigned int i; for (i = 0; i <= KNET_LOG_DEBUG; i++) { if (loglevel_names[i].val == level) { errno = 0; return loglevel_names[i].name; } } return "ERROR"; } uint8_t knet_log_get_loglevel_id(const char *name) { unsigned int i; for (i = 0; i <= KNET_LOG_DEBUG; i++) { if (strcasecmp(name, loglevel_names[i].name) == 0) { errno = 0; return loglevel_names[i].val; } } return KNET_LOG_ERR; } int knet_log_set_loglevel(knet_handle_t knet_h, uint8_t subsystem, uint8_t level) { int savederrno = 0; if (!knet_h) { errno = EINVAL; return -1; } if (is_valid_subsystem(subsystem) < 0) { errno = EINVAL; return -1; } if (level > KNET_LOG_DEBUG) { errno = EINVAL; return -1; } savederrno = get_global_wrlock(knet_h); if (savederrno) { log_err(knet_h, subsystem, "Unable to get write lock: %s", strerror(savederrno)); errno = savederrno; return -1; } knet_h->log_levels[subsystem] = level; pthread_rwlock_unlock(&knet_h->global_rwlock); errno = 0; return 0; } int knet_log_get_loglevel(knet_handle_t knet_h, uint8_t subsystem, uint8_t *level) { int savederrno = 0; if (!knet_h) { errno = EINVAL; return -1; } if (is_valid_subsystem(subsystem) < 0) { errno = EINVAL; return -1; } if (!level) { errno = EINVAL; return -1; } savederrno = pthread_rwlock_rdlock(&knet_h->global_rwlock); if (savederrno) { log_err(knet_h, subsystem, "Unable to get write lock: %s", strerror(savederrno)); errno = savederrno; return -1; } *level = knet_h->log_levels[subsystem]; pthread_rwlock_unlock(&knet_h->global_rwlock); errno = 0; return 0; } void log_msg(knet_handle_t knet_h, uint8_t subsystem, uint8_t msglevel, const char *fmt, ...) { va_list ap; struct knet_log_msg msg; size_t byte_cnt = 0; int len; if ((!knet_h) || (subsystem == KNET_MAX_SUBSYSTEMS) || (msglevel > knet_h->log_levels[subsystem])) return; if (knet_h->logfd <= 0) goto out; memset(&msg, 0, sizeof(struct knet_log_msg)); msg.subsystem = subsystem; msg.msglevel = msglevel; va_start(ap, fmt); #ifdef __clang__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wformat-nonliteral" #endif vsnprintf(msg.msg, sizeof(msg.msg), fmt, ap); #ifdef __clang__ #pragma clang diagnostic pop #endif va_end(ap); while (byte_cnt < sizeof(struct knet_log_msg)) { len = write(knet_h->logfd, &msg, sizeof(struct knet_log_msg) - byte_cnt); if (len <= 0) { goto out; } byte_cnt += len; } out: return; } diff --git a/libknet/onwire.c b/libknet/onwire.c index 143ac4b7..e3fd293b 100644 --- a/libknet/onwire.c +++ b/libknet/onwire.c @@ -1,127 +1,127 @@ /* * Copyright (C) 2019 Red Hat, Inc. All rights reserved. * * Author: Fabio M. Di Nitto * * This software licensed under LGPL-2.0+ */ #include "config.h" -#include +#include #include #include #include "crypto.h" #include "internals.h" #include "logging.h" #include "common.h" #include "transport_udp.h" #include "transport_sctp.h" /* * unencrypted packet looks like: * * | ip | protocol | knet_header | unencrypted data | * | onwire_len | * | proto_overhead | * | data_len | * | app MTU | * * encrypted packet looks like (not to scale): * * | ip | protocol | salt | crypto(knet_header | data) | crypto_data_pad | hash | * | onwire_len | * | proto_overhead | * | data_len | * | app MTU | * * knet_h->sec_block_size is >= 0 if encryption will pad the data * knet_h->sec_salt_size is >= 0 if encryption is enabled * knet_h->sec_hash_size is >= 0 if signing is enabled */ /* * this function takes in the data that we would like to send * and tells us the outgoing onwire data size with crypto and * all the headers adjustment. * calling thread needs to account for protocol overhead. */ size_t calc_data_outlen(knet_handle_t knet_h, size_t inlen) { size_t outlen = inlen, pad_len = 0; if (knet_h->sec_block_size) { /* * if the crypto mechanism requires padding, calculate the padding * and add it back to outlen because that's what the crypto layer * would do. */ pad_len = knet_h->sec_block_size - (outlen % knet_h->sec_block_size); outlen = outlen + pad_len; } return outlen + knet_h->sec_salt_size + knet_h->sec_hash_size; } /* * this function takes in the data that we would like to send * and tells us what is the real maximum data we can send * accounting for headers and crypto * calling thread needs to account for protocol overhead. */ size_t calc_max_data_outlen(knet_handle_t knet_h, size_t inlen) { size_t outlen = inlen, pad_len = 0; if (knet_h->sec_block_size) { /* * drop both salt and hash, that leaves only the crypto data and padding * we need to calculate the padding based on the real encrypted data * that includes the knet_header. */ outlen = outlen - (knet_h->sec_salt_size + knet_h->sec_hash_size); /* * if the crypto mechanism requires padding, calculate the padding * and remove it, to align the data. * NOTE: we need to remove pad_len + 1 because, based on testing, * if we send data that are already aligned to block_size, the * crypto implementations will add another block_size! * so we want to make sure that our data won't add an unnecessary * block_size that we need to remove later. */ pad_len = outlen % knet_h->sec_block_size; outlen = outlen - (pad_len + 1); /* * add both hash and salt size back, similar to padding above, * the crypto layer will add them to the outlen */ outlen = outlen + (knet_h->sec_salt_size + knet_h->sec_hash_size); } /* * drop KNET_HEADER_ALL_SIZE to provide a clean application MTU * and various crypto headers */ outlen = outlen - (KNET_HEADER_ALL_SIZE + knet_h->sec_salt_size + knet_h->sec_hash_size); return outlen; } /* * set the lowest possible value as failsafe for all links. * KNET_PMTUD_MIN_MTU_V4 < KNET_PMTUD_MIN_MTU_V6 * KNET_PMTUD_OVERHEAD_V6 > KNET_PMTUD_OVERHEAD_V4 * KNET_PMTUD_SCTP_OVERHEAD > KNET_PMTUD_UDP_OVERHEAD */ size_t calc_min_mtu(knet_handle_t knet_h) { return calc_max_data_outlen(knet_h, KNET_PMTUD_MIN_MTU_V4 - (KNET_PMTUD_OVERHEAD_V6 + KNET_PMTUD_SCTP_OVERHEAD)); } diff --git a/libknet/tests/api_knet_handle_compress.c b/libknet/tests/api_knet_handle_compress.c index 40b6f397..509bdf6c 100644 --- a/libknet/tests/api_knet_handle_compress.c +++ b/libknet/tests/api_knet_handle_compress.c @@ -1,181 +1,214 @@ /* * Copyright (C) 2017-2019 Red Hat, Inc. All rights reserved. * * Authors: Fabio M. Di Nitto * * This software licensed under GPL-2.0+ */ #include "config.h" #include #include #include #include #include #include "libknet.h" #include "internals.h" #include "test-common.h" static void test(void) { knet_handle_t knet_h; int logfds[2]; struct knet_handle_compress_cfg knet_handle_compress_cfg; memset(&knet_handle_compress_cfg, 0, sizeof(struct knet_handle_compress_cfg)); printf("Test knet_handle_compress incorrect knet_h\n"); if ((!knet_handle_compress(NULL, &knet_handle_compress_cfg)) || (errno != EINVAL)) { printf("knet_handle_compress accepted invalid knet_h or returned incorrect error: %s\n", strerror(errno)); exit(FAIL); } setup_logpipes(logfds); knet_h = knet_handle_start(logfds, KNET_LOG_DEBUG); flush_logs(logfds[0], stdout); printf("Test knet_handle_compress with invalid cfg\n"); if ((!knet_handle_compress(knet_h, NULL)) || (errno != EINVAL)) { printf("knet_handle_compress accepted invalid cfg or returned incorrect error: %s\n", strerror(errno)); knet_handle_free(knet_h); flush_logs(logfds[0], stdout); close_logpipes(logfds); exit(FAIL); } flush_logs(logfds[0], stdout); printf("Test knet_handle_compress with un-initialized cfg\n"); memset(&knet_handle_compress_cfg, 0, sizeof(struct knet_handle_compress_cfg)); if ((!knet_handle_compress(knet_h, &knet_handle_compress_cfg)) || (errno != EINVAL)) { printf("knet_handle_compress accepted invalid un-initialized cfg\n"); knet_handle_free(knet_h); flush_logs(logfds[0], stdout); close_logpipes(logfds); exit(FAIL); } flush_logs(logfds[0], stdout); printf("Test knet_handle_compress with none compress model (disable compress)\n"); memset(&knet_handle_compress_cfg, 0, sizeof(struct knet_handle_compress_cfg)); strncpy(knet_handle_compress_cfg.compress_model, "none", sizeof(knet_handle_compress_cfg.compress_model) - 1); if (knet_handle_compress(knet_h, &knet_handle_compress_cfg) != 0) { printf("knet_handle_compress did not accept none compress mode cfg\n"); knet_handle_free(knet_h); flush_logs(logfds[0], stdout); close_logpipes(logfds); exit(FAIL); } flush_logs(logfds[0], stdout); +#if WITH_COMPRESS_BZIP2 > 0 + printf("Test knet_handle_compress with bzip2 (no default) with negative level (-3)\n"); + memset(&knet_handle_compress_cfg, 0, sizeof(struct knet_handle_compress_cfg)); + strncpy(knet_handle_compress_cfg.compress_model, "bzip2", sizeof(knet_handle_compress_cfg.compress_model) - 1); + knet_handle_compress_cfg.compress_level = -3; + + if((!knet_handle_compress(knet_h, &knet_handle_compress_cfg)) || (errno != EINVAL)) { + printf("knet_handle_compress accepted invalid (-3) compress level and for bzip2, which is no default defined\n"); + knet_handle_free(knet_h); + flush_logs(logfds[0], stdout); + close_logpipes(logfds); + exit(FAIL); + } + + flush_logs(logfds[0], stdout); +#endif + + printf("Test knet_handle_compress with zlib compress and not effective compression level (0)\n"); + + memset(&knet_handle_compress_cfg, 0, sizeof(struct knet_handle_compress_cfg)); + strncpy(knet_handle_compress_cfg.compress_model, "zlib", sizeof(knet_handle_compress_cfg.compress_model) - 1); + knet_handle_compress_cfg.compress_level = 0; + + if((knet_handle_compress(knet_h, &knet_handle_compress_cfg)) || (errno == EINVAL)) { + printf("knet_handle_compress failed to compress with default compression level\n"); + knet_handle_free(knet_h); + flush_logs(logfds[0], stdout); + close_logpipes(logfds); + exit(FAIL); + } + + flush_logs(logfds[0], stdout); + printf("Test knet_handle_compress with zlib compress and negative level (-2)\n"); memset(&knet_handle_compress_cfg, 0, sizeof(struct knet_handle_compress_cfg)); strncpy(knet_handle_compress_cfg.compress_model, "zlib", sizeof(knet_handle_compress_cfg.compress_model) - 1); knet_handle_compress_cfg.compress_level = -2; if ((!knet_handle_compress(knet_h, &knet_handle_compress_cfg)) || (errno != EINVAL)) { printf("knet_handle_compress accepted invalid (-2) compress level for zlib\n"); knet_handle_free(knet_h); flush_logs(logfds[0], stdout); close_logpipes(logfds); exit(FAIL); } flush_logs(logfds[0], stdout); printf("Test knet_handle_compress with zlib compress and excessive compress level\n"); memset(&knet_handle_compress_cfg, 0, sizeof(struct knet_handle_compress_cfg)); strncpy(knet_handle_compress_cfg.compress_model, "zlib", sizeof(knet_handle_compress_cfg.compress_model) - 1); knet_handle_compress_cfg.compress_level = 10; if ((!knet_handle_compress(knet_h, &knet_handle_compress_cfg)) || (errno != EINVAL)) { printf("knet_handle_compress accepted invalid (10) compress level for zlib\n"); knet_handle_free(knet_h); flush_logs(logfds[0], stdout); close_logpipes(logfds); exit(FAIL); } flush_logs(logfds[0], stdout); printf("Test knet_handle_compress with zlib compress and excessive compress threshold\n"); memset(&knet_handle_compress_cfg, 0, sizeof(struct knet_handle_compress_cfg)); strncpy(knet_handle_compress_cfg.compress_model, "zlib", sizeof(knet_handle_compress_cfg.compress_model) - 1); knet_handle_compress_cfg.compress_level = 1; knet_handle_compress_cfg.compress_threshold = KNET_MAX_PACKET_SIZE +1; if ((!knet_handle_compress(knet_h, &knet_handle_compress_cfg)) || (errno != EINVAL)) { printf("knet_handle_compress accepted invalid compress threshold\n"); knet_handle_free(knet_h); flush_logs(logfds[0], stdout); close_logpipes(logfds); exit(FAIL); } flush_logs(logfds[0], stdout); printf("Test knet_handle_compress with zlib compress model normal compress level and threshold\n"); memset(&knet_handle_compress_cfg, 0, sizeof(struct knet_handle_compress_cfg)); strncpy(knet_handle_compress_cfg.compress_model, "zlib", sizeof(knet_handle_compress_cfg.compress_model) - 1); knet_handle_compress_cfg.compress_level = 1; knet_handle_compress_cfg.compress_threshold = 64; if (knet_handle_compress(knet_h, &knet_handle_compress_cfg) != 0) { printf("knet_handle_compress did not accept zlib compress mode with compress level 1 cfg\n"); knet_handle_free(knet_h); flush_logs(logfds[0], stdout); close_logpipes(logfds); exit(FAIL); } flush_logs(logfds[0], stdout); knet_handle_free(knet_h); flush_logs(logfds[0], stdout); close_logpipes(logfds); } int main(int argc, char *argv[]) { struct knet_compress_info compress_list[16]; size_t compress_list_entries; size_t i; memset(compress_list, 0, sizeof(compress_list)); if (knet_get_compress_list(compress_list, &compress_list_entries) < 0) { printf("knet_get_compress_list failed: %s\n", strerror(errno)); return FAIL; } if (compress_list_entries == 0) { printf("no compression modules detected. Skipping\n"); return SKIP; } for (i=0; i < compress_list_entries; i++) { if (!strcmp(compress_list[i].name, "zlib")) { test(); return PASS; } } printf("WARNING: zlib support not builtin the library. Unable to test/verify internal compress API calls\n"); return SKIP; } diff --git a/libknet/tests/api_knet_handle_pmtud_set.c b/libknet/tests/api_knet_handle_pmtud_set.c index 7a7ffb34..44043688 100644 --- a/libknet/tests/api_knet_handle_pmtud_set.c +++ b/libknet/tests/api_knet_handle_pmtud_set.c @@ -1,226 +1,226 @@ /* * Copyright (C) 2016-2019 Red Hat, Inc. All rights reserved. * * Authors: Fabio M. Di Nitto * * This software licensed under GPL-2.0+ */ #include "config.h" #include #include #include #include #include #include "libknet.h" #include "internals.h" #include "test-common.h" static int private_data; static void sock_notify(void *pvt_data, int datafd, int8_t channel, uint8_t tx_rx, int error, int errorno) { return; } static void test(void) { knet_handle_t knet_h; int logfds[2]; unsigned int iface_mtu = 0, data_mtu; int datafd = 0; int8_t channel = 0; struct sockaddr_storage lo; if (make_local_sockaddr(&lo, 0) < 0) { printf("Unable to convert loopback to sockaddr: %s\n", strerror(errno)); exit(FAIL); } printf("Test knet_handle_pmtud_set incorrect knet_h\n"); if ((!knet_handle_pmtud_set(NULL, iface_mtu)) || (errno != EINVAL)) { printf("knet_handle_pmtud_set accepted invalid knet_h or returned incorrect error: %s\n", strerror(errno)); exit(FAIL); } setup_logpipes(logfds); knet_h = knet_handle_start(logfds, KNET_LOG_DEBUG); flush_logs(logfds[0], stdout); iface_mtu = KNET_PMTUD_SIZE_V4 + 1; printf("Test knet_handle_pmtud_set with wrong iface_mtu\n"); if ((!knet_handle_pmtud_set(knet_h, iface_mtu)) || (errno != EINVAL)) { printf("knet_handle_pmtud_set accepted invalid data_mtu or returned incorrect error: %s\n", strerror(errno)); knet_handle_free(knet_h); flush_logs(logfds[0], stdout); close_logpipes(logfds); exit(FAIL); } if (knet_handle_enable_sock_notify(knet_h, &private_data, sock_notify) < 0) { printf("knet_handle_enable_sock_notify failed: %s\n", strerror(errno)); knet_handle_free(knet_h); flush_logs(logfds[0], stdout); close_logpipes(logfds); exit(FAIL); } datafd = 0; channel = -1; if (knet_handle_add_datafd(knet_h, &datafd, &channel) < 0) { printf("knet_handle_add_datafd failed: %s\n", strerror(errno)); knet_handle_free(knet_h); flush_logs(logfds[0], stdout); close_logpipes(logfds); exit(FAIL); } if (knet_host_add(knet_h, 1) < 0) { printf("knet_host_add failed: %s\n", strerror(errno)); knet_handle_free(knet_h); flush_logs(logfds[0], stdout); close_logpipes(logfds); exit(FAIL); } if (knet_link_set_config(knet_h, 1, 0, KNET_TRANSPORT_UDP, &lo, &lo, 0) < 0) { printf("Unable to configure link: %s\n", strerror(errno)); knet_host_remove(knet_h, 1); knet_handle_free(knet_h); flush_logs(logfds[0], stdout); close_logpipes(logfds); exit(FAIL); } if (knet_link_set_pong_count(knet_h, 1, 0, 1) < 0) { printf("knet_link_set_pong_count failed: %s\n", strerror(errno)); knet_host_remove(knet_h, 1); knet_handle_free(knet_h); flush_logs(logfds[0], stdout); close_logpipes(logfds); exit(FAIL); } if (knet_link_set_enable(knet_h, 1, 0, 1) < 0) { printf("knet_link_set_enable failed: %s\n", strerror(errno)); knet_link_clear_config(knet_h, 1, 0); knet_host_remove(knet_h, 1); knet_handle_free(knet_h); flush_logs(logfds[0], stdout); close_logpipes(logfds); exit(FAIL); } if (wait_for_host(knet_h, 1, 4, logfds[0], stdout) < 0) { printf("timeout waiting for host to be reachable"); knet_link_set_enable(knet_h, 1, 0, 0); knet_link_clear_config(knet_h, 1, 0); knet_host_remove(knet_h, 1); knet_handle_free(knet_h); flush_logs(logfds[0], stdout); close_logpipes(logfds); exit(FAIL); } flush_logs(logfds[0], stdout); if (knet_handle_pmtud_get(knet_h, &data_mtu) < 0) { printf("knet_handle_pmtud_get failed error: %s\n", strerror(errno)); knet_link_set_enable(knet_h, 1, 0, 0); knet_link_clear_config(knet_h, 1, 0); knet_host_remove(knet_h, 1); knet_handle_free(knet_h); flush_logs(logfds[0], stdout); close_logpipes(logfds); exit(FAIL); } /* * 28 = IP (20) + UDP (8) */ iface_mtu = data_mtu + 28 + KNET_HEADER_ALL_SIZE - 64; printf("Test knet_handle_pmtud_set with iface_mtu %u\n", iface_mtu); if (knet_handle_pmtud_set(knet_h, iface_mtu) < 0) { printf("knet_handle_pmtud_set failed error: %s\n", strerror(errno)); knet_link_set_enable(knet_h, 1, 0, 0); knet_link_clear_config(knet_h, 1, 0); knet_host_remove(knet_h, 1); knet_handle_free(knet_h); flush_logs(logfds[0], stdout); close_logpipes(logfds); exit(FAIL); } /* * wait for PMTUd to pick up the change */ sleep(1); flush_logs(logfds[0], stdout); if (knet_h->data_mtu != data_mtu - 64) { printf("knet_handle_pmtud_set failed to set the value\n"); knet_link_set_enable(knet_h, 1, 0, 0); knet_link_clear_config(knet_h, 1, 0); knet_host_remove(knet_h, 1); knet_handle_free(knet_h); flush_logs(logfds[0], stdout); close_logpipes(logfds); exit(FAIL); } printf("Test knet_handle_pmtud_set with iface_mtu 0\n"); if (knet_handle_pmtud_set(knet_h, 0) < 0) { printf("knet_handle_pmtud_set failed error: %s\n", strerror(errno)); knet_link_set_enable(knet_h, 1, 0, 0); knet_link_clear_config(knet_h, 1, 0); knet_host_remove(knet_h, 1); knet_handle_free(knet_h); flush_logs(logfds[0], stdout); close_logpipes(logfds); exit(FAIL); } /* * wait for PMTUd to pick up the change */ - sleep(1); + sleep(2); flush_logs(logfds[0], stdout); if (knet_h->data_mtu != data_mtu) { printf("knet_handle_pmtud_set failed to redetect MTU\n"); knet_link_set_enable(knet_h, 1, 0, 0); knet_link_clear_config(knet_h, 1, 0); knet_host_remove(knet_h, 1); knet_handle_free(knet_h); flush_logs(logfds[0], stdout); close_logpipes(logfds); exit(FAIL); } knet_link_set_enable(knet_h, 1, 0, 0); knet_link_clear_config(knet_h, 1, 0); knet_host_remove(knet_h, 1); knet_handle_free(knet_h); flush_logs(logfds[0], stdout); close_logpipes(logfds); } int main(int argc, char *argv[]) { test(); return PASS; } diff --git a/libknet/tests/knet_bench.c b/libknet/tests/knet_bench.c index dfe5238e..dc04239c 100644 --- a/libknet/tests/knet_bench.c +++ b/libknet/tests/knet_bench.c @@ -1,1306 +1,1311 @@ /* * Copyright (C) 2016-2019 Red Hat, Inc. All rights reserved. * * Authors: Fabio M. Di Nitto * * This software licensed under GPL-2.0+ */ #include "config.h" #include #include #include #include #include #include #include #include #include #include "libknet.h" #include "compat.h" #include "internals.h" #include "netutils.h" #include "transport_common.h" #include "threads_common.h" #include "test-common.h" #define MAX_NODES 128 static int senderid = -1; static int thisnodeid = -1; static knet_handle_t knet_h; static int datafd = 0; static int8_t channel = 0; static int globallistener = 0; static int continous = 0; static int show_stats = 0; static struct sockaddr_storage allv4; static struct sockaddr_storage allv6; static int broadcast_test = 1; static pthread_t rx_thread = (pthread_t)NULL; static char *rx_buf[PCKT_FRAG_MAX]; static int wait_for_perf_rx = 0; static char *compresscfg = NULL; static char *cryptocfg = NULL; static int machine_output = 0; static int use_access_lists = 0; static int bench_shutdown_in_progress = 0; static pthread_mutex_t shutdown_mutex = PTHREAD_MUTEX_INITIALIZER; #define TEST_PING 0 #define TEST_PING_AND_DATA 1 #define TEST_PERF_BY_SIZE 2 #define TEST_PERF_BY_TIME 3 static int test_type = TEST_PING; #define TEST_START 2 #define TEST_STOP 4 #define TEST_COMPLETE 6 #define ONE_GIGABYTE 1073741824 static uint64_t perf_by_size_size = 1 * ONE_GIGABYTE; static uint64_t perf_by_time_secs = 10; struct node { int nodeid; int links; uint8_t transport[KNET_MAX_LINK]; struct sockaddr_storage address[KNET_MAX_LINK]; }; static void print_help(void) { printf("knet_bench usage:\n"); printf(" -h print this help (no really)\n"); printf(" -d enable debug logs (default INFO)\n"); printf(" -f enable use of access lists (default: off)\n"); printf(" -c [implementation]:[crypto]:[hashing] crypto configuration. (default disabled)\n"); printf(" Example: -c nss:aes128:sha1\n"); printf(" -z [implementation]:[level]:[threshold] compress configuration. (default disabled)\n"); printf(" Example: -z zlib:5:100\n"); printf(" -p [active|passive|rr] (default: passive)\n"); printf(" -P [UDP|SCTP] (default: UDP) protocol (transport) to use for all links\n"); printf(" -t [nodeid] This nodeid (required)\n"); printf(" -n [nodeid],[proto]/[link1_ip],[link2_..] Other nodes information (at least one required)\n"); printf(" Example: -n 1,192.168.8.1,SCTP/3ffe::8:1,UDP/172...\n"); printf(" can be repeated up to %d and should contain also the localnode info\n", MAX_NODES); printf(" -b [port] baseport (default: 50000)\n"); printf(" -l enable global listener on 0.0.0.0/:: (default: off, incompatible with -o)\n"); printf(" -o enable baseport offset per nodeid\n"); printf(" -m change PMTUd interval in seconds (default: 60)\n"); printf(" -w dont wait for all nodes to be up before starting the test (default: wait)\n"); printf(" -T [ping|ping_data|perf-by-size|perf-by-time]\n"); printf(" test type (default: ping)\n"); printf(" ping: will wait for all hosts to join the knet network, sleep 5 seconds and quit\n"); printf(" ping_data: will wait for all hosts to join the knet network, sends some data to all nodes and quit\n"); printf(" perf-by-size: will wait for all hosts to join the knet network,\n"); printf(" perform a series of benchmarks by transmitting a known\n"); printf(" size/quantity of packets and measuring the time, then quit\n"); printf(" perf-by-time: will wait for all hosts to join the knet network,\n"); printf(" perform a series of benchmarks by transmitting a known\n"); printf(" size of packets for a given amount of time (10 seconds)\n"); printf(" and measuring the quantity of data transmitted, then quit\n"); printf(" -s nodeid that will generate traffic for benchmarks\n"); printf(" -S [size|seconds] when used in combination with -T perf-by-size it indicates how many GB of traffic to generate for the test. (default: 1GB)\n"); printf(" when used in combination with -T perf-by-time it indicates how many Seconds of traffic to generate for the test. (default: 10 seconds)\n"); printf(" -C repeat the test continously (default: off)\n"); printf(" -X[XX] show stats at the end of the run (default: 1)\n"); printf(" 1: show handle stats, 2: show summary link stats\n"); printf(" 3: show detailed link stats\n"); printf(" -a enable machine parsable output (default: off).\n"); } static void parse_nodes(char *nodesinfo[MAX_NODES], int onidx, int port, struct node nodes[MAX_NODES], int *thisidx) { int i; char *temp = NULL; char port_str[10]; memset(port_str, 0, sizeof(port_str)); sprintf(port_str, "%d", port); for (i = 0; i < onidx; i++) { nodes[i].nodeid = atoi(strtok(nodesinfo[i], ",")); if ((nodes[i].nodeid < 0) || (nodes[i].nodeid > KNET_MAX_HOST)) { printf("Invalid nodeid: %d (0 - %d)\n", nodes[i].nodeid, KNET_MAX_HOST); exit(FAIL); } if (thisnodeid == nodes[i].nodeid) { *thisidx = i; } while((temp = strtok(NULL, ","))) { char *slash = NULL; uint8_t transport; if (nodes[i].links == KNET_MAX_LINK) { printf("Too many links configured. Max %d\n", KNET_MAX_LINK); exit(FAIL); } slash = strstr(temp, "/"); if (slash) { memset(slash, 0, 1); transport = knet_get_transport_id_by_name(temp); if (transport == KNET_MAX_TRANSPORTS) { printf("Unknown transport: %s\n", temp); exit(FAIL); } nodes[i].transport[nodes[i].links] = transport; temp = slash + 1; } else { nodes[i].transport[nodes[i].links] = KNET_TRANSPORT_UDP; } if (knet_strtoaddr(temp, port_str, &nodes[i].address[nodes[i].links], sizeof(struct sockaddr_storage)) < 0) { printf("Unable to convert %s to sockaddress\n", temp); exit(FAIL); } nodes[i].links++; } } if (knet_strtoaddr("0.0.0.0", port_str, &allv4, sizeof(struct sockaddr_storage)) < 0) { printf("Unable to convert 0.0.0.0 to sockaddress\n"); exit(FAIL); } if (knet_strtoaddr("::", port_str, &allv6, sizeof(struct sockaddr_storage)) < 0) { printf("Unable to convert :: to sockaddress\n"); exit(FAIL); } for (i = 1; i < onidx; i++) { if (nodes[0].links != nodes[i].links) { printf("knet_bench does not support unbalanced link configuration\n"); exit(FAIL); } } return; } static int private_data; static void sock_notify(void *pvt_data, int local_datafd, int8_t local_channel, uint8_t tx_rx, int error, int errorno) { printf("[info]: error (%d - %d - %s) from socket: %d\n", error, errorno, strerror(errno), local_datafd); return; } static int ping_dst_host_filter(void *pvt_data, const unsigned char *outdata, ssize_t outdata_len, uint8_t tx_rx, knet_node_id_t this_host_id, knet_node_id_t src_host_id, int8_t *dst_channel, knet_node_id_t *dst_host_ids, size_t *dst_host_ids_entries) { if (broadcast_test) { return 1; } if (tx_rx == KNET_NOTIFY_TX) { memmove(&dst_host_ids[0], outdata, 2); } else { dst_host_ids[0] = this_host_id; } *dst_host_ids_entries = 1; return 0; } static void setup_knet(int argc, char *argv[]) { int logfd = 0; int rv; char *policystr = NULL, *protostr = NULL; char *othernodeinfo[MAX_NODES]; struct node nodes[MAX_NODES]; int thisidx = -1; int onidx = 0; int debug = KNET_LOG_INFO; int port = 50000, portoffset = 0; int thisport = 0, otherport = 0; int thisnewport = 0, othernewport = 0; struct sockaddr_in *so_in; struct sockaddr_in6 *so_in6; struct sockaddr_storage *src; int i, link_idx, allnodesup = 0; int policy = KNET_LINK_POLICY_PASSIVE, policyfound = 0; int protocol = KNET_TRANSPORT_UDP, protofound = 0; int wait = 1; int pmtud_interval = 60; struct knet_handle_crypto_cfg knet_handle_crypto_cfg; char *cryptomodel = NULL, *cryptotype = NULL, *cryptohash = NULL; struct knet_handle_compress_cfg knet_handle_compress_cfg; memset(nodes, 0, sizeof(nodes)); while ((rv = getopt(argc, argv, "aCT:S:s:ldfom:wb:t:n:c:p:X::P:z:h")) != EOF) { switch(rv) { case 'h': print_help(); exit(PASS); break; case 'a': machine_output = 1; break; case 'd': debug = KNET_LOG_DEBUG; break; case 'f': use_access_lists = 1; break; case 'c': if (cryptocfg) { printf("Error: -c can only be specified once\n"); exit(FAIL); } cryptocfg = optarg; break; case 'p': if (policystr) { printf("Error: -p can only be specified once\n"); exit(FAIL); } - policystr = optarg; - if (!strcmp(policystr, "active")) { - policy = KNET_LINK_POLICY_ACTIVE; - policyfound = 1; - } - /* - * we can't use rr because clangs can't compile - * an array of 3 strings, one of which is 2 bytes long - */ - if (!strcmp(policystr, "round-robin")) { - policy = KNET_LINK_POLICY_RR; - policyfound = 1; - } - if (!strcmp(policystr, "passive")) { - policy = KNET_LINK_POLICY_PASSIVE; - policyfound = 1; + if (optarg) { + policystr = optarg; + if (!strcmp(policystr, "active")) { + policy = KNET_LINK_POLICY_ACTIVE; + policyfound = 1; + } + /* + * we can't use rr because clangs can't compile + * an array of 3 strings, one of which is 2 bytes long + */ + if (!strcmp(policystr, "round-robin")) { + policy = KNET_LINK_POLICY_RR; + policyfound = 1; + } + if (!strcmp(policystr, "passive")) { + policy = KNET_LINK_POLICY_PASSIVE; + policyfound = 1; + } } if (!policyfound) { printf("Error: invalid policy %s specified. -p accepts active|passive|rr\n", policystr); exit(FAIL); } break; case 'P': if (protostr) { printf("Error: -P can only be specified once\n"); exit(FAIL); } - protostr = optarg; - if (!strcmp(protostr, "UDP")) { - protocol = KNET_TRANSPORT_UDP; - protofound = 1; - } - if (!strcmp(protostr, "SCTP")) { - protocol = KNET_TRANSPORT_SCTP; - protofound = 1; + if (optarg) { + protostr = optarg; + if (!strcmp(protostr, "UDP")) { + protocol = KNET_TRANSPORT_UDP; + protofound = 1; + } + if (!strcmp(protostr, "SCTP")) { + protocol = KNET_TRANSPORT_SCTP; + protofound = 1; + } } if (!protofound) { printf("Error: invalid protocol %s specified. -P accepts udp|sctp\n", policystr); exit(FAIL); } break; case 't': if (thisnodeid >= 0) { printf("Error: -t can only be specified once\n"); exit(FAIL); } thisnodeid = atoi(optarg); if ((thisnodeid < 0) || (thisnodeid > 65536)) { printf("Error: -t nodeid out of range %d (1 - 65536)\n", thisnodeid); exit(FAIL); } break; case 'n': if (onidx == MAX_NODES) { printf("Error: too many other nodes. Max %d\n", MAX_NODES); exit(FAIL); } othernodeinfo[onidx] = optarg; onidx++; break; case 'b': port = atoi(optarg); if ((port < 1) || (port > 65536)) { printf("Error: port %d out of range (1 - 65536)\n", port); exit(FAIL); } break; case 'o': if (globallistener) { printf("Error: -l cannot be used with -o\n"); exit(FAIL); } portoffset = 1; break; case 'm': pmtud_interval = atoi(optarg); if (pmtud_interval < 1) { printf("Error: pmtud interval %d out of range (> 0)\n", pmtud_interval); exit(FAIL); } break; case 'l': if (portoffset) { printf("Error: -o cannot be used with -l\n"); exit(FAIL); } globallistener = 1; break; case 'w': wait = 0; break; case 's': if (senderid >= 0) { printf("Error: -s can only be specified once\n"); exit(FAIL); } senderid = atoi(optarg); if ((senderid < 0) || (senderid > 65536)) { printf("Error: -s nodeid out of range %d (1 - 65536)\n", senderid); exit(FAIL); } break; case 'T': - if (!strcmp("ping", optarg)) { - test_type = TEST_PING; - } - if (!strcmp("ping_data", optarg)) { - test_type = TEST_PING_AND_DATA; - } - if (!strcmp("perf-by-size", optarg)) { - test_type = TEST_PERF_BY_SIZE; - } - if (!strcmp("perf-by-time", optarg)) { - test_type = TEST_PERF_BY_TIME; + if (optarg) { + if (!strcmp("ping", optarg)) { + test_type = TEST_PING; + } + if (!strcmp("ping_data", optarg)) { + test_type = TEST_PING_AND_DATA; + } + if (!strcmp("perf-by-size", optarg)) { + test_type = TEST_PERF_BY_SIZE; + } + if (!strcmp("perf-by-time", optarg)) { + test_type = TEST_PERF_BY_TIME; + } + } else { + printf("Error: -T requires an option\n"); + exit(FAIL); } break; case 'S': perf_by_size_size = (uint64_t)atoi(optarg) * ONE_GIGABYTE; perf_by_time_secs = (uint64_t)atoi(optarg); break; case 'C': continous = 1; break; case 'X': if (optarg) { show_stats = atoi(optarg); } else { show_stats = 1; } break; case 'z': if (compresscfg) { printf("Error: -c can only be specified once\n"); exit(FAIL); } compresscfg = optarg; break; default: break; } } if (thisnodeid < 0) { printf("Who am I?!? missing -t from command line?\n"); exit(FAIL); } if (onidx < 1) { printf("no other nodes configured?!? missing -n from command line\n"); exit(FAIL); } parse_nodes(othernodeinfo, onidx, port, nodes, &thisidx); if (thisidx < 0) { printf("no config for this node found\n"); exit(FAIL); } if (senderid >= 0) { for (i=0; i < onidx; i++) { if (senderid == nodes[i].nodeid) { break; } } if (i == onidx) { printf("Unable to find senderid in nodelist\n"); exit(FAIL); } } if (((test_type == TEST_PERF_BY_SIZE) || (test_type == TEST_PERF_BY_TIME)) && (senderid < 0)) { printf("Error: performance test requires -s to be set (for now)\n"); exit(FAIL); } logfd = start_logging(stdout); knet_h = knet_handle_new(thisnodeid, logfd, debug); if (!knet_h) { printf("Unable to knet_handle_new: %s\n", strerror(errno)); exit(FAIL); } if (knet_handle_enable_access_lists(knet_h, use_access_lists) < 0) { printf("Unable to knet_handle_enable_access_lists: %s\n", strerror(errno)); exit(FAIL); } if (cryptocfg) { memset(&knet_handle_crypto_cfg, 0, sizeof(knet_handle_crypto_cfg)); cryptomodel = strtok(cryptocfg, ":"); cryptotype = strtok(NULL, ":"); cryptohash = strtok(NULL, ":"); if (cryptomodel) { strncpy(knet_handle_crypto_cfg.crypto_model, cryptomodel, sizeof(knet_handle_crypto_cfg.crypto_model) - 1); } if (cryptotype) { strncpy(knet_handle_crypto_cfg.crypto_cipher_type, cryptotype, sizeof(knet_handle_crypto_cfg.crypto_cipher_type) - 1); } if (cryptohash) { strncpy(knet_handle_crypto_cfg.crypto_hash_type, cryptohash, sizeof(knet_handle_crypto_cfg.crypto_hash_type) - 1); } knet_handle_crypto_cfg.private_key_len = KNET_MAX_KEY_LEN; if (knet_handle_crypto(knet_h, &knet_handle_crypto_cfg)) { printf("Unable to init crypto\n"); exit(FAIL); } } if (compresscfg) { memset(&knet_handle_compress_cfg, 0, sizeof(struct knet_handle_compress_cfg)); snprintf(knet_handle_compress_cfg.compress_model, 16, "%s", strtok(compresscfg, ":")); knet_handle_compress_cfg.compress_level = atoi(strtok(NULL, ":")); knet_handle_compress_cfg.compress_threshold = atoi(strtok(NULL, ":")); if (knet_handle_compress(knet_h, &knet_handle_compress_cfg)) { printf("Unable to configure compress\n"); exit(FAIL); } } if (knet_handle_enable_sock_notify(knet_h, &private_data, sock_notify) < 0) { printf("knet_handle_enable_sock_notify failed: %s\n", strerror(errno)); knet_handle_free(knet_h); exit(FAIL); } datafd = 0; channel = -1; if (knet_handle_add_datafd(knet_h, &datafd, &channel) < 0) { printf("knet_handle_add_datafd failed: %s\n", strerror(errno)); knet_handle_free(knet_h); exit(FAIL); } if (knet_handle_pmtud_setfreq(knet_h, pmtud_interval) < 0) { printf("knet_handle_pmtud_setfreq failed: %s\n", strerror(errno)); knet_handle_free(knet_h); exit(FAIL); } for (i=0; i < onidx; i++) { if (i == thisidx) { continue; } if (knet_host_add(knet_h, nodes[i].nodeid) < 0) { printf("knet_host_add failed: %s\n", strerror(errno)); exit(FAIL); } if (knet_host_set_policy(knet_h, nodes[i].nodeid, policy) < 0) { printf("knet_host_set_policy failed: %s\n", strerror(errno)); exit(FAIL); } for (link_idx = 0; link_idx < nodes[i].links; link_idx++) { if (portoffset) { if (nodes[thisidx].address[link_idx].ss_family == AF_INET) { so_in = (struct sockaddr_in *)&nodes[thisidx].address[link_idx]; thisport = ntohs(so_in->sin_port); thisnewport = thisport + nodes[i].nodeid; so_in->sin_port = (htons(thisnewport)); so_in = (struct sockaddr_in *)&nodes[i].address[link_idx]; otherport = ntohs(so_in->sin_port); othernewport = otherport + nodes[thisidx].nodeid; so_in->sin_port = (htons(othernewport)); } else { so_in6 = (struct sockaddr_in6 *)&nodes[thisidx].address[link_idx]; thisport = ntohs(so_in6->sin6_port); thisnewport = thisport + nodes[i].nodeid; so_in6->sin6_port = (htons(thisnewport)); so_in6 = (struct sockaddr_in6 *)&nodes[i].address[link_idx]; otherport = ntohs(so_in6->sin6_port); othernewport = otherport + nodes[thisidx].nodeid; so_in6->sin6_port = (htons(othernewport)); } } if (!globallistener) { src = &nodes[thisidx].address[link_idx]; } else { if (nodes[thisidx].address[link_idx].ss_family == AF_INET) { src = &allv4; } else { src = &allv6; } } /* * -P overrides per link protocol configuration */ if (protofound) { nodes[i].transport[link_idx] = protocol; } if (knet_link_set_config(knet_h, nodes[i].nodeid, link_idx, nodes[i].transport[link_idx], src, &nodes[i].address[link_idx], 0) < 0) { printf("Unable to configure link: %s\n", strerror(errno)); exit(FAIL); } if (portoffset) { if (nodes[thisidx].address[link_idx].ss_family == AF_INET) { so_in = (struct sockaddr_in *)&nodes[thisidx].address[link_idx]; so_in->sin_port = (htons(thisport)); so_in = (struct sockaddr_in *)&nodes[i].address[link_idx]; so_in->sin_port = (htons(otherport)); } else { so_in6 = (struct sockaddr_in6 *)&nodes[thisidx].address[link_idx]; so_in6->sin6_port = (htons(thisport)); so_in6 = (struct sockaddr_in6 *)&nodes[i].address[link_idx]; so_in6->sin6_port = (htons(otherport)); } } if (knet_link_set_enable(knet_h, nodes[i].nodeid, link_idx, 1) < 0) { printf("knet_link_set_enable failed: %s\n", strerror(errno)); exit(FAIL); } if (knet_link_set_ping_timers(knet_h, nodes[i].nodeid, link_idx, 1000, 10000, 2048) < 0) { printf("knet_link_set_ping_timers failed: %s\n", strerror(errno)); exit(FAIL); } if (knet_link_set_pong_count(knet_h, nodes[i].nodeid, link_idx, 2) < 0) { printf("knet_link_set_pong_count failed: %s\n", strerror(errno)); exit(FAIL); } } } if (knet_handle_enable_filter(knet_h, NULL, ping_dst_host_filter)) { printf("Unable to enable dst_host_filter: %s\n", strerror(errno)); exit(FAIL); } if (knet_handle_setfwd(knet_h, 1) < 0) { printf("knet_handle_setfwd failed: %s\n", strerror(errno)); exit(FAIL); } if (wait) { while(!allnodesup) { allnodesup = 1; for (i=0; i < onidx; i++) { if (i == thisidx) { continue; } if (knet_h->host_index[nodes[i].nodeid]->status.reachable != 1) { printf("[info]: waiting host %d to be reachable\n", nodes[i].nodeid); allnodesup = 0; } } if (!allnodesup) { sleep(1); } } sleep(1); } } static void *_rx_thread(void *args) { int rx_epoll; struct epoll_event ev; struct epoll_event events[KNET_EPOLL_MAX_EVENTS]; struct sockaddr_storage address[PCKT_FRAG_MAX]; struct knet_mmsghdr msg[PCKT_FRAG_MAX]; struct iovec iov_in[PCKT_FRAG_MAX]; int i, msg_recv; struct timespec clock_start, clock_end; unsigned long long time_diff = 0; uint64_t rx_pkts = 0; uint64_t rx_bytes = 0; unsigned int current_pckt_size = 0; for (i = 0; i < PCKT_FRAG_MAX; i++) { rx_buf[i] = malloc(KNET_MAX_PACKET_SIZE); if (!rx_buf[i]) { printf("RXT: Unable to malloc!\nHALTING RX THREAD!\n"); return NULL; } memset(rx_buf[i], 0, KNET_MAX_PACKET_SIZE); iov_in[i].iov_base = (void *)rx_buf[i]; iov_in[i].iov_len = KNET_MAX_PACKET_SIZE; memset(&msg[i].msg_hdr, 0, sizeof(struct msghdr)); msg[i].msg_hdr.msg_name = &address[i]; msg[i].msg_hdr.msg_namelen = sizeof(struct sockaddr_storage); msg[i].msg_hdr.msg_iov = &iov_in[i]; msg[i].msg_hdr.msg_iovlen = 1; } rx_epoll = epoll_create(KNET_EPOLL_MAX_EVENTS + 1); if (rx_epoll < 0) { printf("RXT: Unable to create epoll!\nHALTING RX THREAD!\n"); return NULL; } memset(&ev, 0, sizeof(struct epoll_event)); ev.events = EPOLLIN; ev.data.fd = datafd; if (epoll_ctl(rx_epoll, EPOLL_CTL_ADD, datafd, &ev)) { printf("RXT: Unable to add datafd to epoll\nHALTING RX THREAD!\n"); return NULL; } memset(&clock_start, 0, sizeof(clock_start)); memset(&clock_end, 0, sizeof(clock_start)); while (!bench_shutdown_in_progress) { if (epoll_wait(rx_epoll, events, KNET_EPOLL_MAX_EVENTS, 1) >= 1) { msg_recv = _recvmmsg(datafd, &msg[0], PCKT_FRAG_MAX, MSG_DONTWAIT | MSG_NOSIGNAL); if (msg_recv < 0) { printf("[info]: RXT: error from recvmmsg: %s\n", strerror(errno)); } switch(test_type) { case TEST_PING_AND_DATA: for (i = 0; i < msg_recv; i++) { if (msg[i].msg_len == 0) { printf("[info]: RXT: received 0 bytes message?\n"); } printf("[info]: received %u bytes message: %s\n", msg[i].msg_len, (char *)msg[i].msg_hdr.msg_iov->iov_base); } break; case TEST_PERF_BY_TIME: case TEST_PERF_BY_SIZE: for (i = 0; i < msg_recv; i++) { if (msg[i].msg_len < 64) { if (msg[i].msg_len == 0) { printf("[info]: RXT: received 0 bytes message?\n"); } if (msg[i].msg_len == TEST_START) { if (clock_gettime(CLOCK_MONOTONIC, &clock_start) != 0) { printf("[info]: unable to get start time!\n"); } } if (msg[i].msg_len == TEST_STOP) { double average_rx_mbytes; double average_rx_pkts; double time_diff_sec; if (clock_gettime(CLOCK_MONOTONIC, &clock_end) != 0) { printf("[info]: unable to get end time!\n"); } timespec_diff(clock_start, clock_end, &time_diff); /* * adjust for sleep(2) between sending the last data and TEST_STOP */ time_diff = time_diff - 2000000000llu; /* * convert to seconds */ time_diff_sec = (double)time_diff / 1000000000llu; average_rx_mbytes = (double)((rx_bytes / time_diff_sec) / (1024 * 1024)); average_rx_pkts = (double)(rx_pkts / time_diff_sec); if (!machine_output) { printf("[perf] execution time: %8.4f secs Average speed: %8.4f MB/sec %8.4f pckts/sec (size: %u total: %" PRIu64 ")\n", time_diff_sec, average_rx_mbytes, average_rx_pkts, current_pckt_size, rx_pkts); } else { printf("[perf],%.4f,%u,%" PRIu64 ",%.4f,%.4f\n", time_diff_sec, current_pckt_size, rx_pkts, average_rx_mbytes, average_rx_pkts); } rx_pkts = 0; rx_bytes = 0; current_pckt_size = 0; } if (msg[i].msg_len == TEST_COMPLETE) { wait_for_perf_rx = 1; } continue; } rx_pkts++; rx_bytes = rx_bytes + msg[i].msg_len; current_pckt_size = msg[i].msg_len; } break; } } } epoll_ctl(rx_epoll, EPOLL_CTL_DEL, datafd, &ev); close(rx_epoll); return NULL; } static void setup_data_txrx_common(void) { if (!rx_thread) { if (knet_handle_enable_filter(knet_h, NULL, ping_dst_host_filter)) { printf("Unable to enable dst_host_filter: %s\n", strerror(errno)); exit(FAIL); } printf("[info]: setting up rx thread\n"); if (pthread_create(&rx_thread, 0, _rx_thread, NULL)) { printf("Unable to start rx thread\n"); exit(FAIL); } } } static void stop_rx_thread(void) { void *retval; int i; if (rx_thread) { printf("[info]: shutting down rx thread\n"); sleep(2); pthread_cancel(rx_thread); pthread_join(rx_thread, &retval); for (i = 0; i < PCKT_FRAG_MAX; i ++) { free(rx_buf[i]); } } } static void send_ping_data(void) { char buf[65535]; ssize_t len; memset(&buf, 0, sizeof(buf)); snprintf(buf, sizeof(buf), "Hello world!"); if (compresscfg) { len = sizeof(buf); } else { len = strlen(buf); } if (knet_send(knet_h, buf, len, channel) != len) { printf("[info]: Error sending hello world: %s\n", strerror(errno)); } sleep(1); } static int send_messages(struct knet_mmsghdr *msg, int msgs_to_send) { int sent_msgs, prev_sent, progress, total_sent; total_sent = 0; sent_msgs = 0; prev_sent = 0; progress = 1; retry: errno = 0; sent_msgs = _sendmmsg(datafd, 0, &msg[0], msgs_to_send, MSG_NOSIGNAL); if (sent_msgs < 0) { if ((errno == EAGAIN) || (errno == EWOULDBLOCK)) { usleep(KNET_THREADS_TIMERES / 16); goto retry; } printf("[info]: Unable to send messages: %s\n", strerror(errno)); return -1; } total_sent = total_sent + sent_msgs; if ((sent_msgs >= 0) && (sent_msgs < msgs_to_send)) { if ((sent_msgs) || (progress)) { msgs_to_send = msgs_to_send - sent_msgs; prev_sent = prev_sent + sent_msgs; if (sent_msgs) { progress = 1; } else { progress = 0; } goto retry; } if (!progress) { printf("[info]: Unable to send more messages after retry\n"); } } return total_sent; } static int setup_send_buffers_common(struct knet_mmsghdr *msg, struct iovec *iov_out, char *tx_buf[]) { int i; for (i = 0; i < PCKT_FRAG_MAX; i++) { tx_buf[i] = malloc(KNET_MAX_PACKET_SIZE); if (!tx_buf[i]) { printf("TXT: Unable to malloc!\n"); return -1; } memset(tx_buf[i], 0, KNET_MAX_PACKET_SIZE); iov_out[i].iov_base = (void *)tx_buf[i]; memset(&msg[i].msg_hdr, 0, sizeof(struct msghdr)); msg[i].msg_hdr.msg_iov = &iov_out[i]; msg[i].msg_hdr.msg_iovlen = 1; } return 0; } static void send_perf_data_by_size(void) { char *tx_buf[PCKT_FRAG_MAX]; struct knet_mmsghdr msg[PCKT_FRAG_MAX]; struct iovec iov_out[PCKT_FRAG_MAX]; char ctrl_message[16]; int sent_msgs; int i; uint64_t total_pkts_to_tx; uint64_t packets_to_send; uint32_t packetsize = 64; setup_send_buffers_common(msg, iov_out, tx_buf); while (packetsize <= KNET_MAX_PACKET_SIZE) { for (i = 0; i < PCKT_FRAG_MAX; i++) { iov_out[i].iov_len = packetsize; } total_pkts_to_tx = perf_by_size_size / packetsize; printf("[info]: testing with %u packet size. total bytes to transfer: %" PRIu64 " (%" PRIu64 " packets)\n", packetsize, perf_by_size_size, total_pkts_to_tx); memset(ctrl_message, 0, sizeof(ctrl_message)); knet_send(knet_h, ctrl_message, TEST_START, channel); while (total_pkts_to_tx > 0) { if (total_pkts_to_tx >= PCKT_FRAG_MAX) { packets_to_send = PCKT_FRAG_MAX; } else { packets_to_send = total_pkts_to_tx; } sent_msgs = send_messages(&msg[0], packets_to_send); if (sent_msgs < 0) { printf("Something went wrong, aborting\n"); exit(FAIL); } total_pkts_to_tx = total_pkts_to_tx - sent_msgs; } sleep(2); knet_send(knet_h, ctrl_message, TEST_STOP, channel); if (packetsize == KNET_MAX_PACKET_SIZE) { break; } /* * Use a multiplier that can always divide properly a GB * into smaller chunks without worry about boundaries */ packetsize *= 4; if (packetsize > KNET_MAX_PACKET_SIZE) { packetsize = KNET_MAX_PACKET_SIZE; } } knet_send(knet_h, ctrl_message, TEST_COMPLETE, channel); for (i = 0; i < PCKT_FRAG_MAX; i++) { free(tx_buf[i]); } } /* For sorting the node list into order */ static int node_compare(const void *aptr, const void *bptr) { uint16_t a,b; a = *(uint16_t *)aptr; b = *(uint16_t *)bptr; return a > b; } static void display_stats(int level) { struct knet_handle_stats handle_stats; struct knet_link_status link_status; struct knet_link_stats total_link_stats; knet_node_id_t host_list[KNET_MAX_HOST]; uint8_t link_list[KNET_MAX_LINK]; - int res; unsigned int i,j; size_t num_hosts, num_links; - res = knet_handle_get_stats(knet_h, &handle_stats, sizeof(handle_stats)); - if (res) { + if (knet_handle_get_stats(knet_h, &handle_stats, sizeof(handle_stats)) < 0) { perror("[info]: failed to get knet handle stats"); return; } + if (compresscfg || cryptocfg) { printf("\n"); printf("[stat]: handle stats\n"); printf("[stat]: ------------\n"); if (compresscfg) { printf("[stat]: tx_uncompressed_packets: %" PRIu64 "\n", handle_stats.tx_uncompressed_packets); printf("[stat]: tx_compressed_packets: %" PRIu64 "\n", handle_stats.tx_compressed_packets); printf("[stat]: tx_compressed_original_bytes: %" PRIu64 "\n", handle_stats.tx_compressed_original_bytes); printf("[stat]: tx_compressed_size_bytes: %" PRIu64 "\n", handle_stats.tx_compressed_size_bytes ); printf("[stat]: tx_compress_time_ave: %" PRIu64 "\n", handle_stats.tx_compress_time_ave); printf("[stat]: tx_compress_time_min: %" PRIu64 "\n", handle_stats.tx_compress_time_min); printf("[stat]: tx_compress_time_max: %" PRIu64 "\n", handle_stats.tx_compress_time_max); printf("[stat]: rx_compressed_packets: %" PRIu64 "\n", handle_stats.rx_compressed_packets); printf("[stat]: rx_compressed_original_bytes: %" PRIu64 "\n", handle_stats.rx_compressed_original_bytes); printf("[stat]: rx_compressed_size_bytes: %" PRIu64 "\n", handle_stats.rx_compressed_size_bytes); printf("[stat]: rx_compress_time_ave: %" PRIu64 "\n", handle_stats.rx_compress_time_ave); printf("[stat]: rx_compress_time_min: %" PRIu64 "\n", handle_stats.rx_compress_time_min); printf("[stat]: rx_compress_time_max: %" PRIu64 "\n", handle_stats.rx_compress_time_max); printf("\n"); } if (cryptocfg) { printf("[stat]: tx_crypt_packets: %" PRIu64 "\n", handle_stats.tx_crypt_packets); printf("[stat]: tx_crypt_byte_overhead: %" PRIu64 "\n", handle_stats.tx_crypt_byte_overhead); printf("[stat]: tx_crypt_time_ave: %" PRIu64 "\n", handle_stats.tx_crypt_time_ave); printf("[stat]: tx_crypt_time_min: %" PRIu64 "\n", handle_stats.tx_crypt_time_min); printf("[stat]: tx_crypt_time_max: %" PRIu64 "\n", handle_stats.tx_crypt_time_max); printf("[stat]: rx_crypt_packets: %" PRIu64 "\n", handle_stats.rx_crypt_packets); printf("[stat]: rx_crypt_time_ave: %" PRIu64 "\n", handle_stats.rx_crypt_time_ave); printf("[stat]: rx_crypt_time_min: %" PRIu64 "\n", handle_stats.rx_crypt_time_min); printf("[stat]: rx_crypt_time_max: %" PRIu64 "\n", handle_stats.rx_crypt_time_max); printf("\n"); } } if (level < 2) { return; } memset(&total_link_stats, 0, sizeof(struct knet_link_stats)); - res = knet_host_get_host_list(knet_h, host_list, &num_hosts); - if (res) { + if (knet_host_get_host_list(knet_h, host_list, &num_hosts) < 0) { perror("[info]: cannot get host list for stats"); return; } /* Print in host ID order */ qsort(host_list, num_hosts, sizeof(uint16_t), node_compare); for (j=0; j 2) { printf("\n"); printf("[stat]: Node %d Link %d\n", host_list[j], link_list[i]); printf("[stat]: tx_data_packets: %" PRIu64 "\n", link_status.stats.tx_data_packets); printf("[stat]: rx_data_packets: %" PRIu64 "\n", link_status.stats.rx_data_packets); printf("[stat]: tx_data_bytes: %" PRIu64 "\n", link_status.stats.tx_data_bytes); printf("[stat]: rx_data_bytes: %" PRIu64 "\n", link_status.stats.rx_data_bytes); printf("[stat]: rx_ping_packets: %" PRIu64 "\n", link_status.stats.rx_ping_packets); printf("[stat]: tx_ping_packets: %" PRIu64 "\n", link_status.stats.tx_ping_packets); printf("[stat]: rx_ping_bytes: %" PRIu64 "\n", link_status.stats.rx_ping_bytes); printf("[stat]: tx_ping_bytes: %" PRIu64 "\n", link_status.stats.tx_ping_bytes); printf("[stat]: rx_pong_packets: %" PRIu64 "\n", link_status.stats.rx_pong_packets); printf("[stat]: tx_pong_packets: %" PRIu64 "\n", link_status.stats.tx_pong_packets); printf("[stat]: rx_pong_bytes: %" PRIu64 "\n", link_status.stats.rx_pong_bytes); printf("[stat]: tx_pong_bytes: %" PRIu64 "\n", link_status.stats.tx_pong_bytes); printf("[stat]: rx_pmtu_packets: %" PRIu64 "\n", link_status.stats.rx_pmtu_packets); printf("[stat]: tx_pmtu_packets: %" PRIu64 "\n", link_status.stats.tx_pmtu_packets); printf("[stat]: rx_pmtu_bytes: %" PRIu64 "\n", link_status.stats.rx_pmtu_bytes); printf("[stat]: tx_pmtu_bytes: %" PRIu64 "\n", link_status.stats.tx_pmtu_bytes); printf("[stat]: tx_total_packets: %" PRIu64 "\n", link_status.stats.tx_total_packets); printf("[stat]: rx_total_packets: %" PRIu64 "\n", link_status.stats.rx_total_packets); printf("[stat]: tx_total_bytes: %" PRIu64 "\n", link_status.stats.tx_total_bytes); printf("[stat]: rx_total_bytes: %" PRIu64 "\n", link_status.stats.rx_total_bytes); printf("[stat]: tx_total_errors: %" PRIu64 "\n", link_status.stats.tx_total_errors); printf("[stat]: tx_total_retries: %" PRIu64 "\n", link_status.stats.tx_total_retries); printf("[stat]: tx_pmtu_errors: %" PRIu32 "\n", link_status.stats.tx_pmtu_errors); printf("[stat]: tx_pmtu_retries: %" PRIu32 "\n", link_status.stats.tx_pmtu_retries); printf("[stat]: tx_ping_errors: %" PRIu32 "\n", link_status.stats.tx_ping_errors); printf("[stat]: tx_ping_retries: %" PRIu32 "\n", link_status.stats.tx_ping_retries); printf("[stat]: tx_pong_errors: %" PRIu32 "\n", link_status.stats.tx_pong_errors); printf("[stat]: tx_pong_retries: %" PRIu32 "\n", link_status.stats.tx_pong_retries); printf("[stat]: tx_data_errors: %" PRIu32 "\n", link_status.stats.tx_data_errors); printf("[stat]: tx_data_retries: %" PRIu32 "\n", link_status.stats.tx_data_retries); printf("[stat]: latency_min: %" PRIu32 "\n", link_status.stats.latency_min); printf("[stat]: latency_max: %" PRIu32 "\n", link_status.stats.latency_max); printf("[stat]: latency_ave: %" PRIu32 "\n", link_status.stats.latency_ave); printf("[stat]: latency_samples: %" PRIu32 "\n", link_status.stats.latency_samples); printf("[stat]: down_count: %" PRIu32 "\n", link_status.stats.down_count); printf("[stat]: up_count: %" PRIu32 "\n", link_status.stats.up_count); } } } printf("\n"); printf("[stat]: Total link stats\n"); printf("[stat]: ----------------\n"); printf("[stat]: tx_data_packets: %" PRIu64 "\n", total_link_stats.tx_data_packets); printf("[stat]: rx_data_packets: %" PRIu64 "\n", total_link_stats.rx_data_packets); printf("[stat]: tx_data_bytes: %" PRIu64 "\n", total_link_stats.tx_data_bytes); printf("[stat]: rx_data_bytes: %" PRIu64 "\n", total_link_stats.rx_data_bytes); printf("[stat]: rx_ping_packets: %" PRIu64 "\n", total_link_stats.rx_ping_packets); printf("[stat]: tx_ping_packets: %" PRIu64 "\n", total_link_stats.tx_ping_packets); printf("[stat]: rx_ping_bytes: %" PRIu64 "\n", total_link_stats.rx_ping_bytes); printf("[stat]: tx_ping_bytes: %" PRIu64 "\n", total_link_stats.tx_ping_bytes); printf("[stat]: rx_pong_packets: %" PRIu64 "\n", total_link_stats.rx_pong_packets); printf("[stat]: tx_pong_packets: %" PRIu64 "\n", total_link_stats.tx_pong_packets); printf("[stat]: rx_pong_bytes: %" PRIu64 "\n", total_link_stats.rx_pong_bytes); printf("[stat]: tx_pong_bytes: %" PRIu64 "\n", total_link_stats.tx_pong_bytes); printf("[stat]: rx_pmtu_packets: %" PRIu64 "\n", total_link_stats.rx_pmtu_packets); printf("[stat]: tx_pmtu_packets: %" PRIu64 "\n", total_link_stats.tx_pmtu_packets); printf("[stat]: rx_pmtu_bytes: %" PRIu64 "\n", total_link_stats.rx_pmtu_bytes); printf("[stat]: tx_pmtu_bytes: %" PRIu64 "\n", total_link_stats.tx_pmtu_bytes); printf("[stat]: tx_total_packets: %" PRIu64 "\n", total_link_stats.tx_total_packets); printf("[stat]: rx_total_packets: %" PRIu64 "\n", total_link_stats.rx_total_packets); printf("[stat]: tx_total_bytes: %" PRIu64 "\n", total_link_stats.tx_total_bytes); printf("[stat]: rx_total_bytes: %" PRIu64 "\n", total_link_stats.rx_total_bytes); printf("[stat]: tx_total_errors: %" PRIu64 "\n", total_link_stats.tx_total_errors); printf("[stat]: tx_total_retries: %" PRIu64 "\n", total_link_stats.tx_total_retries); printf("[stat]: tx_pmtu_errors: %" PRIu32 "\n", total_link_stats.tx_pmtu_errors); printf("[stat]: tx_pmtu_retries: %" PRIu32 "\n", total_link_stats.tx_pmtu_retries); printf("[stat]: tx_ping_errors: %" PRIu32 "\n", total_link_stats.tx_ping_errors); printf("[stat]: tx_ping_retries: %" PRIu32 "\n", total_link_stats.tx_ping_retries); printf("[stat]: tx_pong_errors: %" PRIu32 "\n", total_link_stats.tx_pong_errors); printf("[stat]: tx_pong_retries: %" PRIu32 "\n", total_link_stats.tx_pong_retries); printf("[stat]: tx_data_errors: %" PRIu32 "\n", total_link_stats.tx_data_errors); printf("[stat]: tx_data_retries: %" PRIu32 "\n", total_link_stats.tx_data_retries); printf("[stat]: down_count: %" PRIu32 "\n", total_link_stats.down_count); printf("[stat]: up_count: %" PRIu32 "\n", total_link_stats.up_count); } static void send_perf_data_by_time(void) { char *tx_buf[PCKT_FRAG_MAX]; struct knet_mmsghdr msg[PCKT_FRAG_MAX]; struct iovec iov_out[PCKT_FRAG_MAX]; char ctrl_message[16]; int sent_msgs; int i; uint32_t packetsize = 64; struct timespec clock_start, clock_end; unsigned long long time_diff = 0; setup_send_buffers_common(msg, iov_out, tx_buf); memset(&clock_start, 0, sizeof(clock_start)); memset(&clock_end, 0, sizeof(clock_start)); while (packetsize <= KNET_MAX_PACKET_SIZE) { for (i = 0; i < PCKT_FRAG_MAX; i++) { iov_out[i].iov_len = packetsize; } printf("[info]: testing with %u bytes packet size for %" PRIu64 " seconds.\n", packetsize, perf_by_time_secs); memset(ctrl_message, 0, sizeof(ctrl_message)); knet_send(knet_h, ctrl_message, TEST_START, channel); if (clock_gettime(CLOCK_MONOTONIC, &clock_start) != 0) { printf("[info]: unable to get start time!\n"); } time_diff = 0; while (time_diff < (perf_by_time_secs * 1000000000llu)) { sent_msgs = send_messages(&msg[0], PCKT_FRAG_MAX); if (sent_msgs < 0) { printf("Something went wrong, aborting\n"); exit(FAIL); } if (clock_gettime(CLOCK_MONOTONIC, &clock_end) != 0) { printf("[info]: unable to get end time!\n"); } timespec_diff(clock_start, clock_end, &time_diff); } sleep(2); knet_send(knet_h, ctrl_message, TEST_STOP, channel); if (packetsize == KNET_MAX_PACKET_SIZE) { break; } /* * Use a multiplier that can always divide properly a GB * into smaller chunks without worry about boundaries */ packetsize *= 4; if (packetsize > KNET_MAX_PACKET_SIZE) { packetsize = KNET_MAX_PACKET_SIZE; } } knet_send(knet_h, ctrl_message, TEST_COMPLETE, channel); for (i = 0; i < PCKT_FRAG_MAX; i++) { free(tx_buf[i]); } } static void cleanup_all(void) { if (pthread_mutex_lock(&shutdown_mutex)) { return; } if (bench_shutdown_in_progress) { pthread_mutex_unlock(&shutdown_mutex); return; } bench_shutdown_in_progress = 1; pthread_mutex_unlock(&shutdown_mutex); if (rx_thread) { stop_rx_thread(); } knet_handle_stop(knet_h); } static void sigint_handler(int signum) { printf("[info]: cleaning up... got signal: %d\n", signum); cleanup_all(); exit(PASS); } int main(int argc, char *argv[]) { if (signal(SIGINT, sigint_handler) == SIG_ERR) { printf("Unable to configure SIGINT handler\n"); exit(FAIL); } setup_knet(argc, argv); setup_data_txrx_common(); sleep(5); restart: switch(test_type) { default: case TEST_PING: /* basic ping, no data */ sleep(5); break; case TEST_PING_AND_DATA: send_ping_data(); break; case TEST_PERF_BY_SIZE: if (senderid == thisnodeid) { send_perf_data_by_size(); } else { printf("[info]: waiting for perf rx thread to finish\n"); while(!wait_for_perf_rx) { sleep(1); } } break; case TEST_PERF_BY_TIME: if (senderid == thisnodeid) { send_perf_data_by_time(); } else { printf("[info]: waiting for perf rx thread to finish\n"); while(!wait_for_perf_rx) { sleep(1); } } break; } if (continous) { goto restart; } if (show_stats) { display_stats(show_stats); } cleanup_all(); return PASS; } diff --git a/libknet/tests/test-common.c b/libknet/tests/test-common.c index a7d6c5cc..4bf58c61 100644 --- a/libknet/tests/test-common.c +++ b/libknet/tests/test-common.c @@ -1,527 +1,526 @@ /* * Copyright (C) 2016-2019 Red Hat, Inc. All rights reserved. * * Author: Fabio M. Di Nitto * * This software licensed under GPL-2.0+ */ #include "config.h" #include #include #include #include #include #include #include #include #include #include #include "libknet.h" #include "test-common.h" static pthread_mutex_t log_mutex = PTHREAD_MUTEX_INITIALIZER; static int log_init = 0; static pthread_mutex_t log_thread_mutex = PTHREAD_MUTEX_INITIALIZER; static pthread_t log_thread; static int log_thread_init = 0; static int log_fds[2]; struct log_thread_data { int logfd; FILE *std; }; static struct log_thread_data data; static pthread_mutex_t shutdown_mutex = PTHREAD_MUTEX_INITIALIZER; static int stop_in_progress = 0; static int _read_pipe(int fd, char **file, size_t *length) { char buf[4096]; int n; int done = 0; *file = NULL; *length = 0; memset(buf, 0, sizeof(buf)); while (!done) { n = read(fd, buf, sizeof(buf)); if (n < 0) { if (errno == EINTR) continue; if (*file) free(*file); return n; } if (n == 0 && (!*length)) return 0; if (n == 0) done = 1; if (*file) *file = realloc(*file, (*length) + n + done); else *file = malloc(n + done); if (!*file) return -1; memmove((*file) + (*length), buf, n); *length += (done + n); } /* Null terminator */ (*file)[(*length) - 1] = 0; return 0; } int execute_shell(const char *command, char **error_string) { pid_t pid; int status, err = 0; int fd[2]; size_t size = 0; if ((command == NULL) || (!error_string)) { errno = EINVAL; return FAIL; } *error_string = NULL; err = pipe(fd); if (err) goto out_clean; pid = fork(); if (pid < 0) { err = pid; goto out_clean; } if (pid) { /* parent */ close(fd[1]); err = _read_pipe(fd[0], error_string, &size); if (err) goto out_clean0; waitpid(pid, &status, 0); if (!WIFEXITED(status)) { err = -1; goto out_clean0; } if (WIFEXITED(status) && WEXITSTATUS(status) != 0) { err = WEXITSTATUS(status); goto out_clean0; } goto out_clean0; } else { /* child */ close(0); close(1); close(2); close(fd[0]); dup2(fd[1], 1); dup2(fd[1], 2); close(fd[1]); execlp("/bin/sh", "/bin/sh", "-c", command, NULL); exit(FAIL); } out_clean: close(fd[1]); out_clean0: close(fd[0]); return err; } int is_memcheck(void) { char *val; val = getenv("KNETMEMCHECK"); if (val) { if (!strncmp(val, "yes", 3)) { return 1; } } return 0; } int is_helgrind(void) { char *val; val = getenv("KNETHELGRIND"); if (val) { if (!strncmp(val, "yes", 3)) { return 1; } } return 0; } void set_scheduler(int policy) { struct sched_param sched_param; int err; err = sched_get_priority_max(policy); if (err < 0) { printf("Could not get maximum scheduler priority\n"); exit(FAIL); } sched_param.sched_priority = err; err = sched_setscheduler(0, policy, &sched_param); if (err < 0) { printf("Could not set priority\n"); exit(FAIL); } return; } int setup_logpipes(int *logfds) { if (pipe2(logfds, O_CLOEXEC | O_NONBLOCK) < 0) { printf("Unable to setup logging pipe\n"); exit(FAIL); } return PASS; } void close_logpipes(int *logfds) { close(logfds[0]); logfds[0] = 0; close(logfds[1]); logfds[1] = 0; } void flush_logs(int logfd, FILE *std) { + struct knet_log_msg msg; + int len; + while (1) { - struct knet_log_msg msg; - - for (size_t bytes_read = 0; bytes_read < sizeof(msg); ) { - int len = read(logfd, &msg + bytes_read, - sizeof(msg) - bytes_read); - if (len <= 0) { - /* - * clear errno to avoid incorrect propagation - */ - errno = 0; - return; - } - bytes_read += len; + len = read(logfd, &msg, sizeof(msg)); + if (len != sizeof(msg)) { + /* + * clear errno to avoid incorrect propagation + */ + errno = 0; + return; } + msg.msg[sizeof(msg.msg) - 1] = 0; + fprintf(std, "[knet]: [%s] %s: %.*s\n", knet_log_get_loglevel_name(msg.msglevel), knet_log_get_subsystem_name(msg.subsystem), KNET_MAX_LOG_MSG_SIZE, msg.msg); } } static void *_logthread(void *args) { while (1) { int num; struct timeval tv = { 60, 0 }; fd_set rfds; FD_ZERO(&rfds); FD_SET(data.logfd, &rfds); num = select(FD_SETSIZE, &rfds, NULL, NULL, &tv); if (num < 0) { fprintf(data.std, "Unable select over logfd!\nHALTING LOGTHREAD!\n"); return NULL; } if (num == 0) { fprintf(data.std, "[knet]: No logs in the last 60 seconds\n"); continue; } if (FD_ISSET(data.logfd, &rfds)) { flush_logs(data.logfd, data.std); } } } int start_logthread(int logfd, FILE *std) { int savederrno = 0; savederrno = pthread_mutex_lock(&log_thread_mutex); if (savederrno) { printf("Unable to get log_thread mutex lock\n"); return -1; } if (!log_thread_init) { data.logfd = logfd; data.std = std; savederrno = pthread_create(&log_thread, 0, _logthread, NULL); if (savederrno) { printf("Unable to start logging thread: %s\n", strerror(savederrno)); pthread_mutex_unlock(&log_thread_mutex); return -1; } log_thread_init = 1; } pthread_mutex_unlock(&log_thread_mutex); return 0; } int stop_logthread(void) { int savederrno = 0; void *retval; savederrno = pthread_mutex_lock(&log_thread_mutex); if (savederrno) { printf("Unable to get log_thread mutex lock\n"); return -1; } if (log_thread_init) { pthread_cancel(log_thread); pthread_join(log_thread, &retval); log_thread_init = 0; } pthread_mutex_unlock(&log_thread_mutex); return 0; } static void stop_logging(void) { stop_logthread(); flush_logs(log_fds[0], stdout); close_logpipes(log_fds); } int start_logging(FILE *std) { int savederrno = 0; savederrno = pthread_mutex_lock(&log_mutex); if (savederrno) { printf("Unable to get log_mutex lock\n"); return -1; } if (!log_init) { setup_logpipes(log_fds); if (atexit(&stop_logging) != 0) { printf("Unable to register atexit handler to stop logging: %s\n", strerror(errno)); exit(FAIL); } if (start_logthread(log_fds[0], std) < 0) { exit(FAIL); } log_init = 1; } pthread_mutex_unlock(&log_mutex); return log_fds[1]; } knet_handle_t knet_handle_start(int logfds[2], uint8_t log_level) { knet_handle_t knet_h = knet_handle_new_ex(1, logfds[1], log_level, 0); if (knet_h) { return knet_h; } else { printf("knet_handle_new failed: %s\n", strerror(errno)); flush_logs(logfds[0], stdout); close_logpipes(logfds); exit(FAIL); } } int knet_handle_stop(knet_handle_t knet_h) { int savederrno; size_t i, j; knet_node_id_t host_ids[KNET_MAX_HOST]; uint8_t link_ids[KNET_MAX_LINK]; size_t host_ids_entries = 0, link_ids_entries = 0; struct knet_link_status status; savederrno = pthread_mutex_lock(&shutdown_mutex); if (savederrno) { printf("Unable to get shutdown mutex lock\n"); return -1; } if (stop_in_progress) { pthread_mutex_unlock(&shutdown_mutex); errno = EINVAL; return -1; } stop_in_progress = 1; pthread_mutex_unlock(&shutdown_mutex); if (!knet_h) { errno = EINVAL; return -1; } if (knet_handle_setfwd(knet_h, 0) < 0) { printf("knet_handle_setfwd failed: %s\n", strerror(errno)); return -1; } if (knet_host_get_host_list(knet_h, host_ids, &host_ids_entries) < 0) { printf("knet_host_get_host_list failed: %s\n", strerror(errno)); return -1; } for (i = 0; i < host_ids_entries; i++) { if (knet_link_get_link_list(knet_h, host_ids[i], link_ids, &link_ids_entries)) { printf("knet_link_get_link_list failed: %s\n", strerror(errno)); return -1; } for (j = 0; j < link_ids_entries; j++) { if (knet_link_get_status(knet_h, host_ids[i], link_ids[j], &status, sizeof(struct knet_link_status))) { printf("knet_link_get_status failed: %s\n", strerror(errno)); return -1; } if (status.enabled) { if (knet_link_set_enable(knet_h, host_ids[i], j, 0)) { printf("knet_link_set_enable failed: %s\n", strerror(errno)); return -1; } } knet_link_clear_config(knet_h, host_ids[i], j); } if (knet_host_remove(knet_h, host_ids[i]) < 0) { printf("knet_host_remove failed: %s\n", strerror(errno)); return -1; } } if (knet_handle_free(knet_h)) { printf("knet_handle_free failed: %s\n", strerror(errno)); return -1; } return 0; } static int _make_local_sockaddr(struct sockaddr_storage *lo, uint16_t offset, int family) { uint32_t port; char portstr[32]; /* Use the pid if we can. but makes sure its in a sensible range */ port = (uint32_t)getpid() + offset; if (port < 1024) { port += 1024; } if (port > 65536) { port = port & 0xFFFF; } sprintf(portstr, "%u", port); memset(lo, 0, sizeof(struct sockaddr_storage)); printf("Using port %u\n", port); if (family == AF_INET6) { return knet_strtoaddr("::1", portstr, lo, sizeof(struct sockaddr_storage)); } return knet_strtoaddr("127.0.0.1", portstr, lo, sizeof(struct sockaddr_storage)); } int make_local_sockaddr(struct sockaddr_storage *lo, uint16_t offset) { return _make_local_sockaddr(lo, offset, AF_INET); } int make_local_sockaddr6(struct sockaddr_storage *lo, uint16_t offset) { return _make_local_sockaddr(lo, offset, AF_INET6); } int wait_for_host(knet_handle_t knet_h, uint16_t host_id, int seconds, int logfd, FILE *std) { int i = 0; if (is_memcheck() || is_helgrind()) { printf("Test suite is running under valgrind, adjusting wait_for_host timeout\n"); seconds = seconds * 16; } while (i < seconds) { flush_logs(logfd, std); if (knet_h->host_index[host_id]->status.reachable == 1) { return 0; } printf("waiting host %u to be reachable for %d more seconds\n", host_id, seconds - i); sleep(1); i++; } return -1; } int wait_for_packet(knet_handle_t knet_h, int seconds, int datafd, int logfd, FILE *std) { fd_set rfds; struct timeval tv; int err = 0, i = 0; if (is_memcheck() || is_helgrind()) { printf("Test suite is running under valgrind, adjusting wait_for_packet timeout\n"); seconds = seconds * 16; } try_again: FD_ZERO(&rfds); FD_SET(datafd, &rfds); tv.tv_sec = 1; tv.tv_usec = 0; err = select(datafd+1, &rfds, NULL, NULL, &tv); /* * on slow arches the first call to select can return 0. * pick an arbitrary 10 times loop (multiplied by waiting seconds) * before failing. */ if ((!err) && (i < seconds)) { flush_logs(logfd, std); i++; goto try_again; } if ((err > 0) && (FD_ISSET(datafd, &rfds))) { return 0; } return -1; } diff --git a/libknet/threads_heartbeat.c b/libknet/threads_heartbeat.c index 8f8a7ecb..fa1f30c4 100644 --- a/libknet/threads_heartbeat.c +++ b/libknet/threads_heartbeat.c @@ -1,225 +1,225 @@ /* * Copyright (C) 2015-2019 Red Hat, Inc. All rights reserved. * * Authors: Fabio M. Di Nitto * Federico Simoncelli * * This software licensed under LGPL-2.0+ */ #include "config.h" #include #include #include #include #include #include "crypto.h" #include "links.h" #include "logging.h" #include "transports.h" #include "threads_common.h" #include "threads_heartbeat.h" static void _link_down(knet_handle_t knet_h, struct knet_host *dst_host, struct knet_link *dst_link) { memset(&dst_link->pmtud_last, 0, sizeof(struct timespec)); dst_link->received_pong = 0; dst_link->status.pong_last.tv_nsec = 0; dst_link->pong_timeout_backoff = KNET_LINK_PONG_TIMEOUT_BACKOFF; if (dst_link->status.connected == 1) { log_info(knet_h, KNET_SUB_LINK, "host: %u link: %u is down", dst_host->host_id, dst_link->link_id); _link_updown(knet_h, dst_host->host_id, dst_link->link_id, dst_link->status.enabled, 0); } } static void _handle_check_each(knet_handle_t knet_h, struct knet_host *dst_host, struct knet_link *dst_link, int timed) { int err = 0, savederrno = 0; int len; ssize_t outlen = KNET_HEADER_PING_SIZE; struct timespec clock_now, pong_last; unsigned long long diff_ping; unsigned char *outbuf = (unsigned char *)knet_h->pingbuf; if (dst_link->transport_connected == 0) { _link_down(knet_h, dst_host, dst_link); return; } /* caching last pong to avoid race conditions */ pong_last = dst_link->status.pong_last; if (clock_gettime(CLOCK_MONOTONIC, &clock_now) != 0) { log_debug(knet_h, KNET_SUB_HEARTBEAT, "Unable to get monotonic clock"); return; } timespec_diff(dst_link->ping_last, clock_now, &diff_ping); if ((diff_ping >= (dst_link->ping_interval * 1000llu)) || (!timed)) { memmove(&knet_h->pingbuf->khp_ping_time[0], &clock_now, sizeof(struct timespec)); knet_h->pingbuf->khp_ping_link = dst_link->link_id; if (pthread_mutex_lock(&knet_h->tx_seq_num_mutex)) { log_debug(knet_h, KNET_SUB_HEARTBEAT, "Unable to get seq mutex lock"); return; } knet_h->pingbuf->khp_ping_seq_num = htons(knet_h->tx_seq_num); pthread_mutex_unlock(&knet_h->tx_seq_num_mutex); knet_h->pingbuf->khp_ping_timed = timed; if (knet_h->crypto_instance) { if (crypto_encrypt_and_sign(knet_h, (const unsigned char *)knet_h->pingbuf, outlen, knet_h->pingbuf_crypt, &outlen) < 0) { log_debug(knet_h, KNET_SUB_HEARTBEAT, "Unable to crypto ping packet"); return; } outbuf = knet_h->pingbuf_crypt; knet_h->stats_extra.tx_crypt_ping_packets++; } retry: if (transport_get_connection_oriented(knet_h, dst_link->transport) == TRANSPORT_PROTO_NOT_CONNECTION_ORIENTED) { len = sendto(dst_link->outsock, outbuf, outlen, MSG_DONTWAIT | MSG_NOSIGNAL, (struct sockaddr *) &dst_link->dst_addr, sizeof(struct sockaddr_storage)); } else { len = sendto(dst_link->outsock, outbuf, outlen, MSG_DONTWAIT | MSG_NOSIGNAL, NULL, 0); } savederrno = errno; dst_link->ping_last = clock_now; dst_link->status.stats.tx_ping_packets++; dst_link->status.stats.tx_ping_bytes += outlen; if (len != outlen) { err = transport_tx_sock_error(knet_h, dst_link->transport, dst_link->outsock, len, savederrno); switch(err) { case -1: /* unrecoverable error */ log_debug(knet_h, KNET_SUB_HEARTBEAT, "Unable to send ping (sock: %d) packet (sendto): %d %s. recorded src ip: %s src port: %s dst ip: %s dst port: %s", dst_link->outsock, savederrno, strerror(savederrno), dst_link->status.src_ipaddr, dst_link->status.src_port, dst_link->status.dst_ipaddr, dst_link->status.dst_port); dst_link->status.stats.tx_ping_errors++; break; case 0: break; case 1: dst_link->status.stats.tx_ping_retries++; goto retry; break; } } else { dst_link->last_ping_size = outlen; } } timespec_diff(pong_last, clock_now, &diff_ping); if ((pong_last.tv_nsec) && (diff_ping >= (dst_link->pong_timeout_adj * 1000llu))) { _link_down(knet_h, dst_host, dst_link); } } void _send_pings(knet_handle_t knet_h, int timed) { struct knet_host *dst_host; int link_idx; if (pthread_mutex_lock(&knet_h->hb_mutex)) { log_debug(knet_h, KNET_SUB_HEARTBEAT, "Unable to get hb mutex lock"); return; } for (dst_host = knet_h->host_head; dst_host != NULL; dst_host = dst_host->next) { for (link_idx = 0; link_idx < KNET_MAX_LINK; link_idx++) { if ((dst_host->link[link_idx].status.enabled != 1) || (dst_host->link[link_idx].transport == KNET_TRANSPORT_LOOPBACK ) || ((dst_host->link[link_idx].dynamic == KNET_LINK_DYNIP) && (dst_host->link[link_idx].status.dynconnected != 1))) continue; _handle_check_each(knet_h, dst_host, &dst_host->link[link_idx], timed); } } pthread_mutex_unlock(&knet_h->hb_mutex); } static void _adjust_pong_timeouts(knet_handle_t knet_h) { struct knet_host *dst_host; struct knet_link *dst_link; int link_idx; if (pthread_mutex_lock(&knet_h->backoff_mutex)) { log_debug(knet_h, KNET_SUB_HEARTBEAT, "Unable to get backoff_mutex"); return; } for (dst_host = knet_h->host_head; dst_host != NULL; dst_host = dst_host->next) { for (link_idx = 0; link_idx < KNET_MAX_LINK; link_idx++) { if ((dst_host->link[link_idx].status.enabled != 1) || (dst_host->link[link_idx].transport == KNET_TRANSPORT_LOOPBACK ) || ((dst_host->link[link_idx].dynamic == KNET_LINK_DYNIP) && (dst_host->link[link_idx].status.dynconnected != 1))) continue; dst_link = &dst_host->link[link_idx]; if (dst_link->pong_timeout_backoff > 1) { dst_link->pong_timeout_backoff--; } - dst_link->pong_timeout_adj = (dst_link->pong_timeout * dst_link->pong_timeout_backoff) + (dst_link->status.stats.latency_max * KNET_LINK_PONG_TIMEOUT_LAT_MUL); + dst_link->pong_timeout_adj = (dst_link->pong_timeout * dst_link->pong_timeout_backoff) + (dst_link->status.latency * KNET_LINK_PONG_TIMEOUT_LAT_MUL); } } pthread_mutex_unlock(&knet_h->backoff_mutex); } void *_handle_heartbt_thread(void *data) { knet_handle_t knet_h = (knet_handle_t) data; int i = 1; set_thread_status(knet_h, KNET_THREAD_HB, KNET_THREAD_STARTED); /* preparing ping buffer */ knet_h->pingbuf->kh_version = KNET_HEADER_VERSION; knet_h->pingbuf->kh_type = KNET_HEADER_TYPE_PING; knet_h->pingbuf->kh_node = htons(knet_h->host_id); while (!shutdown_in_progress(knet_h)) { usleep(KNET_THREADS_TIMERES); if (pthread_rwlock_rdlock(&knet_h->global_rwlock) != 0) { log_debug(knet_h, KNET_SUB_HEARTBEAT, "Unable to get read lock"); continue; } /* * _adjust_pong_timeouts should execute approx once a second. */ if ((i % (1000000 / KNET_THREADS_TIMERES)) == 0) { _adjust_pong_timeouts(knet_h); i = 1; } else { i++; } _send_pings(knet_h, 1); pthread_rwlock_unlock(&knet_h->global_rwlock); } set_thread_status(knet_h, KNET_THREAD_HB, KNET_THREAD_STOPPED); return NULL; } diff --git a/libknet/threads_pmtud.c b/libknet/threads_pmtud.c index ebd5b4b0..75d51962 100644 --- a/libknet/threads_pmtud.c +++ b/libknet/threads_pmtud.c @@ -1,612 +1,616 @@ /* * Copyright (C) 2015-2019 Red Hat, Inc. All rights reserved. * * Authors: Fabio M. Di Nitto * Federico Simoncelli * * This software licensed under LGPL-2.0+ */ #include "config.h" #include #include #include #include #include "crypto.h" #include "links.h" #include "host.h" #include "logging.h" #include "transports.h" #include "threads_common.h" #include "threads_pmtud.h" static int _calculate_manual_mtu(knet_handle_t knet_h, struct knet_link *dst_link) { size_t ipproto_overhead_len; /* onwire packet overhead (protocol based) */ switch (dst_link->dst_addr.ss_family) { case AF_INET6: ipproto_overhead_len = KNET_PMTUD_OVERHEAD_V6 + dst_link->proto_overhead; break; case AF_INET: ipproto_overhead_len = KNET_PMTUD_OVERHEAD_V4 + dst_link->proto_overhead; break; default: log_debug(knet_h, KNET_SUB_PMTUD, "unknown protocol"); return 0; break; } dst_link->status.mtu = calc_max_data_outlen(knet_h, knet_h->manual_mtu - ipproto_overhead_len); return 1; } static int _handle_check_link_pmtud(knet_handle_t knet_h, struct knet_host *dst_host, struct knet_link *dst_link) { int err, ret, savederrno, mutex_retry_limit, failsafe, use_kernel_mtu, warn_once; uint32_t kernel_mtu; /* record kernel_mtu from EMSGSIZE */ size_t onwire_len; /* current packet onwire size */ size_t ipproto_overhead_len; /* onwire packet overhead (protocol based) */ size_t max_mtu_len; /* max mtu for protocol */ size_t data_len; /* how much data we can send in the packet * generally would be onwire_len - ipproto_overhead_len * needs to be adjusted for crypto */ size_t app_mtu_len; /* real data that we can send onwire */ ssize_t len; /* len of what we were able to sendto onwire */ struct timespec ts, pmtud_crypto_start_ts, pmtud_crypto_stop_ts; unsigned long long pong_timeout_adj_tmp, timediff; int pmtud_crypto_reduce = 1; unsigned char *outbuf = (unsigned char *)knet_h->pmtudbuf; warn_once = 0; mutex_retry_limit = 0; failsafe = 0; knet_h->pmtudbuf->khp_pmtud_link = dst_link->link_id; switch (dst_link->dst_addr.ss_family) { case AF_INET6: max_mtu_len = KNET_PMTUD_SIZE_V6; ipproto_overhead_len = KNET_PMTUD_OVERHEAD_V6 + dst_link->proto_overhead; break; case AF_INET: max_mtu_len = KNET_PMTUD_SIZE_V4; ipproto_overhead_len = KNET_PMTUD_OVERHEAD_V4 + dst_link->proto_overhead; break; default: log_debug(knet_h, KNET_SUB_PMTUD, "PMTUD aborted, unknown protocol"); return -1; break; } dst_link->last_bad_mtu = 0; dst_link->last_good_mtu = dst_link->last_ping_size + ipproto_overhead_len; /* * discovery starts from the top because kernel will * refuse to send packets > current iface mtu. * this saves us some time and network bw. */ onwire_len = max_mtu_len; restart: /* * prevent a race when interface mtu is changed _exactly_ during * the discovery process and it's complex to detect. Easier * to wait the next loop. * 30 is not an arbitrary value. To bisect from 576 to 128000 doesn't * take more than 18/19 steps. */ if (failsafe == 30) { log_err(knet_h, KNET_SUB_PMTUD, "Aborting PMTUD process: Too many attempts. MTU might have changed during discovery."); return -1; } else { failsafe++; } /* * common to all packets */ /* * calculate the application MTU based on current onwire_len minus ipproto_overhead_len */ app_mtu_len = calc_max_data_outlen(knet_h, onwire_len - ipproto_overhead_len); /* * recalculate onwire len back that might be different based * on data padding from crypto layer. */ onwire_len = calc_data_outlen(knet_h, app_mtu_len + KNET_HEADER_ALL_SIZE) + ipproto_overhead_len; /* * calculate the size of what we need to send to sendto(2). * see also onwire.c for packet format explanation. */ data_len = app_mtu_len + knet_h->sec_hash_size + knet_h->sec_salt_size + KNET_HEADER_ALL_SIZE; if (knet_h->crypto_instance) { if (data_len < (knet_h->sec_hash_size + knet_h->sec_salt_size) + 1) { log_debug(knet_h, KNET_SUB_PMTUD, "Aborting PMTUD process: link mtu smaller than crypto header detected (link might have been disconnected)"); return -1; } knet_h->pmtudbuf->khp_pmtud_size = onwire_len; if (crypto_encrypt_and_sign(knet_h, (const unsigned char *)knet_h->pmtudbuf, data_len - (knet_h->sec_hash_size + knet_h->sec_salt_size), knet_h->pmtudbuf_crypt, (ssize_t *)&data_len) < 0) { log_debug(knet_h, KNET_SUB_PMTUD, "Unable to crypto pmtud packet"); return -1; } outbuf = knet_h->pmtudbuf_crypt; knet_h->stats_extra.tx_crypt_pmtu_packets++; } else { knet_h->pmtudbuf->khp_pmtud_size = onwire_len; } /* link has gone down, aborting pmtud */ if (dst_link->status.connected != 1) { log_debug(knet_h, KNET_SUB_PMTUD, "PMTUD detected host (%u) link (%u) has been disconnected", dst_host->host_id, dst_link->link_id); return -1; } if (dst_link->transport_connected != 1) { log_debug(knet_h, KNET_SUB_PMTUD, "PMTUD detected host (%u) link (%u) has been disconnected", dst_host->host_id, dst_link->link_id); return -1; } if (pthread_mutex_lock(&knet_h->pmtud_mutex) != 0) { log_debug(knet_h, KNET_SUB_PMTUD, "Unable to get mutex lock"); return -1; } if (knet_h->pmtud_abort) { pthread_mutex_unlock(&knet_h->pmtud_mutex); errno = EDEADLK; return -1; } savederrno = pthread_mutex_lock(&knet_h->tx_mutex); if (savederrno) { log_err(knet_h, KNET_SUB_PMTUD, "Unable to get TX mutex lock: %s", strerror(savederrno)); return -1; } retry: if (transport_get_connection_oriented(knet_h, dst_link->transport) == TRANSPORT_PROTO_NOT_CONNECTION_ORIENTED) { len = sendto(dst_link->outsock, outbuf, data_len, MSG_DONTWAIT | MSG_NOSIGNAL, (struct sockaddr *) &dst_link->dst_addr, sizeof(struct sockaddr_storage)); } else { len = sendto(dst_link->outsock, outbuf, data_len, MSG_DONTWAIT | MSG_NOSIGNAL, NULL, 0); } savederrno = errno; /* * we cannot hold a lock on kmtu_mutex between resetting * knet_h->kernel_mtu here and below where it's used. * use_kernel_mtu tells us if the knet_h->kernel_mtu was * set to 0 and we can trust its value later. */ use_kernel_mtu = 0; if (pthread_mutex_lock(&knet_h->kmtu_mutex) == 0) { use_kernel_mtu = 1; knet_h->kernel_mtu = 0; pthread_mutex_unlock(&knet_h->kmtu_mutex); } kernel_mtu = 0; err = transport_tx_sock_error(knet_h, dst_link->transport, dst_link->outsock, len, savederrno); switch(err) { case -1: /* unrecoverable error */ log_debug(knet_h, KNET_SUB_PMTUD, "Unable to send pmtu packet (sendto): %d %s", savederrno, strerror(savederrno)); pthread_mutex_unlock(&knet_h->tx_mutex); pthread_mutex_unlock(&knet_h->pmtud_mutex); dst_link->status.stats.tx_pmtu_errors++; return -1; case 0: /* ignore error and continue */ break; case 1: /* retry to send those same data */ dst_link->status.stats.tx_pmtu_retries++; goto retry; break; } pthread_mutex_unlock(&knet_h->tx_mutex); if (len != (ssize_t )data_len) { if (savederrno == EMSGSIZE) { /* * we cannot hold a lock on kmtu_mutex between resetting * knet_h->kernel_mtu and here. * use_kernel_mtu tells us if the knet_h->kernel_mtu was * set to 0 previously and we can trust its value now. */ if (use_kernel_mtu) { use_kernel_mtu = 0; if (pthread_mutex_lock(&knet_h->kmtu_mutex) == 0) { kernel_mtu = knet_h->kernel_mtu; pthread_mutex_unlock(&knet_h->kmtu_mutex); } } if (kernel_mtu > 0) { dst_link->last_bad_mtu = kernel_mtu + 1; } else { dst_link->last_bad_mtu = onwire_len; } } else { log_debug(knet_h, KNET_SUB_PMTUD, "Unable to send pmtu packet len: %zu err: %s", onwire_len, strerror(savederrno)); } } else { dst_link->last_sent_mtu = onwire_len; dst_link->last_recv_mtu = 0; dst_link->status.stats.tx_pmtu_packets++; dst_link->status.stats.tx_pmtu_bytes += data_len; if (clock_gettime(CLOCK_REALTIME, &ts) < 0) { log_debug(knet_h, KNET_SUB_PMTUD, "Unable to get current time: %s", strerror(errno)); pthread_mutex_unlock(&knet_h->pmtud_mutex); return -1; } /* * non fatal, we can wait the next round to reduce the * multiplier */ if (clock_gettime(CLOCK_MONOTONIC, &pmtud_crypto_start_ts) < 0) { log_debug(knet_h, KNET_SUB_PMTUD, "Unable to get current time: %s", strerror(errno)); pmtud_crypto_reduce = 0; } /* * set PMTUd reply timeout to match pong_timeout on a given link * * math: internally pong_timeout is expressed in microseconds, while * the public API exports milliseconds. So careful with the 0's here. * the loop is necessary because we are grabbing the current time just above * and add values to it that could overflow into seconds. */ if (pthread_mutex_lock(&knet_h->backoff_mutex)) { log_debug(knet_h, KNET_SUB_PMTUD, "Unable to get backoff_mutex"); pthread_mutex_unlock(&knet_h->pmtud_mutex); return -1; } if (knet_h->crypto_instance) { /* * crypto, under pressure, is a royal PITA */ pong_timeout_adj_tmp = dst_link->pong_timeout_adj * dst_link->pmtud_crypto_timeout_multiplier; } else { pong_timeout_adj_tmp = dst_link->pong_timeout_adj; } ts.tv_sec += pong_timeout_adj_tmp / 1000000; ts.tv_nsec += (((pong_timeout_adj_tmp) % 1000000) * 1000); while (ts.tv_nsec > 1000000000) { ts.tv_sec += 1; ts.tv_nsec -= 1000000000; } pthread_mutex_unlock(&knet_h->backoff_mutex); knet_h->pmtud_waiting = 1; ret = pthread_cond_timedwait(&knet_h->pmtud_cond, &knet_h->pmtud_mutex, &ts); knet_h->pmtud_waiting = 0; if (knet_h->pmtud_abort) { pthread_mutex_unlock(&knet_h->pmtud_mutex); errno = EDEADLK; return -1; } - if (shutdown_in_progress(knet_h)) { + /* + * we cannot use shutdown_in_progress in here because + * we already hold the read lock + */ + if (knet_h->fini_in_progress) { pthread_mutex_unlock(&knet_h->pmtud_mutex); log_debug(knet_h, KNET_SUB_PMTUD, "PMTUD aborted. shutdown in progress"); return -1; } if (ret) { if (ret == ETIMEDOUT) { if ((knet_h->crypto_instance) && (dst_link->pmtud_crypto_timeout_multiplier < KNET_LINK_PMTUD_CRYPTO_TIMEOUT_MULTIPLIER_MAX)) { dst_link->pmtud_crypto_timeout_multiplier = dst_link->pmtud_crypto_timeout_multiplier * 2; pmtud_crypto_reduce = 0; log_debug(knet_h, KNET_SUB_PMTUD, "Increasing PMTUd response timeout multiplier to (%u) for host %u link: %u", dst_link->pmtud_crypto_timeout_multiplier, dst_host->host_id, dst_link->link_id); pthread_mutex_unlock(&knet_h->pmtud_mutex); goto restart; } if (!warn_once) { log_warn(knet_h, KNET_SUB_PMTUD, "possible MTU misconfiguration detected. " "kernel is reporting MTU: %u bytes for " "host %u link %u but the other node is " "not acknowledging packets of this size. ", dst_link->last_sent_mtu, dst_host->host_id, dst_link->link_id); log_warn(knet_h, KNET_SUB_PMTUD, "This can be caused by this node interface MTU " "too big or a network device that does not " "support or has been misconfigured to manage MTU " "of this size, or packet loss. knet will continue " "to run but performances might be affected."); warn_once = 1; } } else { pthread_mutex_unlock(&knet_h->pmtud_mutex); if (mutex_retry_limit == 3) { log_debug(knet_h, KNET_SUB_PMTUD, "PMTUD aborted, unable to get mutex lock"); return -1; } mutex_retry_limit++; goto restart; } } if ((knet_h->crypto_instance) && (pmtud_crypto_reduce == 1) && (dst_link->pmtud_crypto_timeout_multiplier > KNET_LINK_PMTUD_CRYPTO_TIMEOUT_MULTIPLIER_MIN)) { if (!clock_gettime(CLOCK_MONOTONIC, &pmtud_crypto_stop_ts)) { timespec_diff(pmtud_crypto_start_ts, pmtud_crypto_stop_ts, &timediff); if (((pong_timeout_adj_tmp * 1000) / 2) > timediff) { dst_link->pmtud_crypto_timeout_multiplier = dst_link->pmtud_crypto_timeout_multiplier / 2; log_debug(knet_h, KNET_SUB_PMTUD, "Decreasing PMTUd response timeout multiplier to (%u) for host %u link: %u", dst_link->pmtud_crypto_timeout_multiplier, dst_host->host_id, dst_link->link_id); } } else { log_debug(knet_h, KNET_SUB_PMTUD, "Unable to get current time: %s", strerror(errno)); } } if ((dst_link->last_recv_mtu != onwire_len) || (ret)) { dst_link->last_bad_mtu = onwire_len; } else { int found_mtu = 0; if (knet_h->sec_block_size) { if ((onwire_len + knet_h->sec_block_size >= max_mtu_len) || ((dst_link->last_bad_mtu) && (dst_link->last_bad_mtu <= (onwire_len + knet_h->sec_block_size)))) { found_mtu = 1; } } else { if ((onwire_len == max_mtu_len) || ((dst_link->last_bad_mtu) && (dst_link->last_bad_mtu == (onwire_len + 1))) || (dst_link->last_bad_mtu == dst_link->last_good_mtu)) { found_mtu = 1; } } if (found_mtu) { /* * account for IP overhead, knet headers and crypto in PMTU calculation */ dst_link->status.mtu = calc_max_data_outlen(knet_h, onwire_len - ipproto_overhead_len); pthread_mutex_unlock(&knet_h->pmtud_mutex); return 0; } dst_link->last_good_mtu = onwire_len; } } if (kernel_mtu) { onwire_len = kernel_mtu; } else { onwire_len = (dst_link->last_good_mtu + dst_link->last_bad_mtu) / 2; } pthread_mutex_unlock(&knet_h->pmtud_mutex); goto restart; } static int _handle_check_pmtud(knet_handle_t knet_h, struct knet_host *dst_host, struct knet_link *dst_link, int force_run) { uint8_t saved_valid_pmtud; unsigned int saved_pmtud; struct timespec clock_now; unsigned long long diff_pmtud, interval; if (clock_gettime(CLOCK_MONOTONIC, &clock_now) != 0) { log_debug(knet_h, KNET_SUB_PMTUD, "Unable to get monotonic clock"); return 0; } if (!force_run) { interval = knet_h->pmtud_interval * 1000000000llu; /* nanoseconds */ timespec_diff(dst_link->pmtud_last, clock_now, &diff_pmtud); if (diff_pmtud < interval) { return dst_link->has_valid_mtu; } } /* * status.proto_overhead should include all IP/(UDP|SCTP)/knet headers * * please note that it is not the same as link->proto_overhead that * includes only either UDP or SCTP (at the moment) overhead. */ switch (dst_link->dst_addr.ss_family) { case AF_INET6: dst_link->status.proto_overhead = KNET_PMTUD_OVERHEAD_V6 + dst_link->proto_overhead + KNET_HEADER_ALL_SIZE + knet_h->sec_hash_size + knet_h->sec_salt_size; break; case AF_INET: dst_link->status.proto_overhead = KNET_PMTUD_OVERHEAD_V4 + dst_link->proto_overhead + KNET_HEADER_ALL_SIZE + knet_h->sec_hash_size + knet_h->sec_salt_size; break; } saved_pmtud = dst_link->status.mtu; saved_valid_pmtud = dst_link->has_valid_mtu; log_debug(knet_h, KNET_SUB_PMTUD, "Starting PMTUD for host: %u link: %u", dst_host->host_id, dst_link->link_id); errno = 0; if (_handle_check_link_pmtud(knet_h, dst_host, dst_link) < 0) { if (errno == EDEADLK) { log_debug(knet_h, KNET_SUB_PMTUD, "PMTUD for host: %u link: %u has been rescheduled", dst_host->host_id, dst_link->link_id); dst_link->status.mtu = saved_pmtud; dst_link->has_valid_mtu = saved_valid_pmtud; errno = EDEADLK; return dst_link->has_valid_mtu; } dst_link->has_valid_mtu = 0; } else { dst_link->has_valid_mtu = 1; if (dst_link->has_valid_mtu) { if ((saved_pmtud) && (saved_pmtud != dst_link->status.mtu)) { log_info(knet_h, KNET_SUB_PMTUD, "PMTUD link change for host: %u link: %u from %u to %u", dst_host->host_id, dst_link->link_id, saved_pmtud, dst_link->status.mtu); } log_debug(knet_h, KNET_SUB_PMTUD, "PMTUD completed for host: %u link: %u current link mtu: %u", dst_host->host_id, dst_link->link_id, dst_link->status.mtu); /* * set pmtud_last, if we can, after we are done with the PMTUd process * because it can take a very long time. */ dst_link->pmtud_last = clock_now; if (!clock_gettime(CLOCK_MONOTONIC, &clock_now)) { dst_link->pmtud_last = clock_now; } } } if (saved_valid_pmtud != dst_link->has_valid_mtu) { - _host_dstcache_update_sync(knet_h, dst_host); + _host_dstcache_update_async(knet_h, dst_host); } return dst_link->has_valid_mtu; } void *_handle_pmtud_link_thread(void *data) { knet_handle_t knet_h = (knet_handle_t) data; struct knet_host *dst_host; struct knet_link *dst_link; int link_idx; unsigned int have_mtu; unsigned int lower_mtu; int link_has_mtu; int force_run = 0; set_thread_status(knet_h, KNET_THREAD_PMTUD, KNET_THREAD_STARTED); knet_h->data_mtu = calc_min_mtu(knet_h); /* preparing pmtu buffer */ knet_h->pmtudbuf->kh_version = KNET_HEADER_VERSION; knet_h->pmtudbuf->kh_type = KNET_HEADER_TYPE_PMTUD; knet_h->pmtudbuf->kh_node = htons(knet_h->host_id); while (!shutdown_in_progress(knet_h)) { usleep(KNET_THREADS_TIMERES); if (pthread_mutex_lock(&knet_h->pmtud_mutex) != 0) { log_debug(knet_h, KNET_SUB_PMTUD, "Unable to get mutex lock"); continue; } knet_h->pmtud_abort = 0; knet_h->pmtud_running = 1; force_run = knet_h->pmtud_forcerun; knet_h->pmtud_forcerun = 0; pthread_mutex_unlock(&knet_h->pmtud_mutex); if (force_run) { log_debug(knet_h, KNET_SUB_PMTUD, "PMTUd request to rerun has been received"); } if (pthread_rwlock_rdlock(&knet_h->global_rwlock) != 0) { log_debug(knet_h, KNET_SUB_PMTUD, "Unable to get read lock"); continue; } lower_mtu = KNET_PMTUD_SIZE_V4; have_mtu = 0; for (dst_host = knet_h->host_head; dst_host != NULL; dst_host = dst_host->next) { for (link_idx = 0; link_idx < KNET_MAX_LINK; link_idx++) { dst_link = &dst_host->link[link_idx]; if ((dst_link->status.enabled != 1) || (dst_link->status.connected != 1) || (dst_host->link[link_idx].transport == KNET_TRANSPORT_LOOPBACK) || (!dst_link->last_ping_size) || ((dst_link->dynamic == KNET_LINK_DYNIP) && (dst_link->status.dynconnected != 1))) continue; if (!knet_h->manual_mtu) { link_has_mtu = _handle_check_pmtud(knet_h, dst_host, dst_link, force_run); if (errno == EDEADLK) { goto out_unlock; } if (link_has_mtu) { have_mtu = 1; if (dst_link->status.mtu < lower_mtu) { lower_mtu = dst_link->status.mtu; } } } else { link_has_mtu = _calculate_manual_mtu(knet_h, dst_link); if (link_has_mtu) { have_mtu = 1; if (dst_link->status.mtu < lower_mtu) { lower_mtu = dst_link->status.mtu; } } } } } if (have_mtu) { if (knet_h->data_mtu != lower_mtu) { knet_h->data_mtu = lower_mtu; log_info(knet_h, KNET_SUB_PMTUD, "Global data MTU changed to: %u", knet_h->data_mtu); if (knet_h->pmtud_notify_fn) { knet_h->pmtud_notify_fn(knet_h->pmtud_notify_fn_private_data, knet_h->data_mtu); } } } out_unlock: pthread_rwlock_unlock(&knet_h->global_rwlock); if (pthread_mutex_lock(&knet_h->pmtud_mutex) != 0) { log_debug(knet_h, KNET_SUB_PMTUD, "Unable to get mutex lock"); } else { knet_h->pmtud_running = 0; pthread_mutex_unlock(&knet_h->pmtud_mutex); } } set_thread_status(knet_h, KNET_THREAD_PMTUD, KNET_THREAD_STOPPED); return NULL; } diff --git a/libknet/threads_rx.c b/libknet/threads_rx.c index f4cc1642..e8fe264a 100644 --- a/libknet/threads_rx.c +++ b/libknet/threads_rx.c @@ -1,888 +1,896 @@ /* * Copyright (C) 2012-2019 Red Hat, Inc. All rights reserved. * * Authors: Fabio M. Di Nitto * Federico Simoncelli * * This software licensed under LGPL-2.0+ */ #include "config.h" #include #include #include #include #include #include "compat.h" #include "compress.h" #include "crypto.h" #include "host.h" #include "links.h" #include "links_acl.h" #include "logging.h" #include "transports.h" #include "transport_common.h" #include "threads_common.h" #include "threads_heartbeat.h" #include "threads_rx.h" #include "netutils.h" /* * RECV */ /* * return 1 if a > b * return -1 if b > a * return 0 if they are equal */ static inline int timecmp(struct timespec a, struct timespec b) { if (a.tv_sec != b.tv_sec) { if (a.tv_sec > b.tv_sec) { return 1; } else { return -1; } } else { if (a.tv_nsec > b.tv_nsec) { return 1; } else if (a.tv_nsec < b.tv_nsec) { return -1; } else { return 0; } } } /* * this functions needs to return an index (0 to 7) * to a knet_host_defrag_buf. (-1 on errors) */ static int find_pckt_defrag_buf(knet_handle_t knet_h, struct knet_header *inbuf) { struct knet_host *src_host = knet_h->host_index[inbuf->kh_node]; int i, oldest; /* * check if there is a buffer already in use handling the same seq_num */ for (i = 0; i < KNET_MAX_LINK; i++) { if (src_host->defrag_buf[i].in_use) { if (src_host->defrag_buf[i].pckt_seq == inbuf->khp_data_seq_num) { return i; } } } /* * If there is no buffer that's handling the current seq_num * either it's new or it's been reclaimed already. * check if it's been reclaimed/seen before using the defrag circular * buffer. If the pckt has been seen before, the buffer expired (ETIME) * and there is no point to try to defrag it again. */ if (!_seq_num_lookup(src_host, inbuf->khp_data_seq_num, 1, 0)) { errno = ETIME; return -1; } /* * register the pckt as seen */ _seq_num_set(src_host, inbuf->khp_data_seq_num, 1); /* * see if there is a free buffer */ for (i = 0; i < KNET_MAX_LINK; i++) { if (!src_host->defrag_buf[i].in_use) { return i; } } /* * at this point, there are no free buffers, the pckt is new * and we need to reclaim a buffer, and we will take the one * with the oldest timestamp. It's as good as any. */ oldest = 0; for (i = 0; i < KNET_MAX_LINK; i++) { if (timecmp(src_host->defrag_buf[i].last_update, src_host->defrag_buf[oldest].last_update) < 0) { oldest = i; } } src_host->defrag_buf[oldest].in_use = 0; return oldest; } static int pckt_defrag(knet_handle_t knet_h, struct knet_header *inbuf, ssize_t *len) { struct knet_host_defrag_buf *defrag_buf; int defrag_buf_idx; defrag_buf_idx = find_pckt_defrag_buf(knet_h, inbuf); if (defrag_buf_idx < 0) { if (errno == ETIME) { log_debug(knet_h, KNET_SUB_RX, "Defrag buffer expired"); } return 1; } defrag_buf = &knet_h->host_index[inbuf->kh_node]->defrag_buf[defrag_buf_idx]; /* * if the buf is not is use, then make sure it's clean */ if (!defrag_buf->in_use) { memset(defrag_buf, 0, sizeof(struct knet_host_defrag_buf)); defrag_buf->in_use = 1; defrag_buf->pckt_seq = inbuf->khp_data_seq_num; } /* * update timestamp on the buffer */ clock_gettime(CLOCK_MONOTONIC, &defrag_buf->last_update); /* * check if we already received this fragment */ if (defrag_buf->frag_map[inbuf->khp_data_frag_seq]) { /* * if we have received this fragment and we didn't clear the buffer * it means that we don't have all fragments yet */ return 1; } /* * we need to handle the last packet with gloves due to its different size */ if (inbuf->khp_data_frag_seq == inbuf->khp_data_frag_num) { defrag_buf->last_frag_size = *len; /* * in the event when the last packet arrives first, * we still don't know the offset vs the other fragments (based on MTU), * so we store the fragment at the end of the buffer where it's safe * and take a copy of the len so that we can restore its offset later. * remember we can't use the local MTU for this calculation because pMTU * can be asymettric between the same hosts. */ if (!defrag_buf->frag_size) { defrag_buf->last_first = 1; memmove(defrag_buf->buf + (KNET_MAX_PACKET_SIZE - *len), inbuf->khp_data_userdata, *len); } } else { defrag_buf->frag_size = *len; } memmove(defrag_buf->buf + ((inbuf->khp_data_frag_seq - 1) * defrag_buf->frag_size), inbuf->khp_data_userdata, *len); defrag_buf->frag_recv++; defrag_buf->frag_map[inbuf->khp_data_frag_seq] = 1; /* * check if we received all the fragments */ if (defrag_buf->frag_recv == inbuf->khp_data_frag_num) { /* * special case the last pckt */ if (defrag_buf->last_first) { memmove(defrag_buf->buf + ((inbuf->khp_data_frag_num - 1) * defrag_buf->frag_size), defrag_buf->buf + (KNET_MAX_PACKET_SIZE - defrag_buf->last_frag_size), defrag_buf->last_frag_size); } /* * recalculate packet lenght */ *len = ((inbuf->khp_data_frag_num - 1) * defrag_buf->frag_size) + defrag_buf->last_frag_size; /* * copy the pckt back in the user data */ memmove(inbuf->khp_data_userdata, defrag_buf->buf, *len); /* * free this buffer */ defrag_buf->in_use = 0; return 0; } return 1; } static void _parse_recv_from_links(knet_handle_t knet_h, int sockfd, const struct knet_mmsghdr *msg) { int err = 0, savederrno = 0; ssize_t outlen; struct knet_host *src_host; struct knet_link *src_link; unsigned long long latency_last; knet_node_id_t dst_host_ids[KNET_MAX_HOST]; size_t dst_host_ids_entries = 0; int bcast = 1; int was_decrypted = 0; uint64_t crypt_time = 0; struct timespec recvtime; struct knet_header *inbuf = msg->msg_hdr.msg_iov->iov_base; unsigned char *outbuf = (unsigned char *)msg->msg_hdr.msg_iov->iov_base; ssize_t len = msg->msg_len; struct knet_hostinfo *knet_hostinfo; struct iovec iov_out[1]; int8_t channel; struct sockaddr_storage pckt_src; seq_num_t recv_seq_num; int wipe_bufs = 0; if (knet_h->crypto_instance) { struct timespec start_time; struct timespec end_time; clock_gettime(CLOCK_MONOTONIC, &start_time); if (crypto_authenticate_and_decrypt(knet_h, (unsigned char *)inbuf, len, knet_h->recv_from_links_buf_decrypt, &outlen) < 0) { log_debug(knet_h, KNET_SUB_RX, "Unable to decrypt/auth packet"); return; } clock_gettime(CLOCK_MONOTONIC, &end_time); timespec_diff(start_time, end_time, &crypt_time); if (crypt_time < knet_h->stats.rx_crypt_time_min) { knet_h->stats.rx_crypt_time_min = crypt_time; } if (crypt_time > knet_h->stats.rx_crypt_time_max) { knet_h->stats.rx_crypt_time_max = crypt_time; } len = outlen; inbuf = (struct knet_header *)knet_h->recv_from_links_buf_decrypt; was_decrypted++; } if (len < (ssize_t)(KNET_HEADER_SIZE + 1)) { log_debug(knet_h, KNET_SUB_RX, "Packet is too short: %ld", (long)len); return; } if (inbuf->kh_version != KNET_HEADER_VERSION) { log_debug(knet_h, KNET_SUB_RX, "Packet version does not match"); return; } inbuf->kh_node = ntohs(inbuf->kh_node); src_host = knet_h->host_index[inbuf->kh_node]; if (src_host == NULL) { /* host not found */ log_debug(knet_h, KNET_SUB_RX, "Unable to find source host for this packet"); return; } src_link = NULL; src_link = src_host->link + (inbuf->khp_ping_link % KNET_MAX_LINK); if ((inbuf->kh_type & KNET_HEADER_TYPE_PMSK) != 0) { if (src_link->dynamic == KNET_LINK_DYNIP) { /* * cpyaddrport will only copy address and port of the incoming * packet and strip extra bits such as flow and scopeid */ cpyaddrport(&pckt_src, msg->msg_hdr.msg_name); if (cmpaddr(&src_link->dst_addr, sockaddr_len(&src_link->dst_addr), &pckt_src, sockaddr_len(&pckt_src)) != 0) { log_debug(knet_h, KNET_SUB_RX, "host: %u link: %u appears to have changed ip address", src_host->host_id, src_link->link_id); memmove(&src_link->dst_addr, &pckt_src, sizeof(struct sockaddr_storage)); if (knet_addrtostr(&src_link->dst_addr, sockaddr_len(msg->msg_hdr.msg_name), src_link->status.dst_ipaddr, KNET_MAX_HOST_LEN, src_link->status.dst_port, KNET_MAX_PORT_LEN) != 0) { log_debug(knet_h, KNET_SUB_RX, "Unable to resolve ???"); snprintf(src_link->status.dst_ipaddr, KNET_MAX_HOST_LEN - 1, "Unknown!!!"); snprintf(src_link->status.dst_port, KNET_MAX_PORT_LEN - 1, "??"); } else { log_info(knet_h, KNET_SUB_RX, "host: %u link: %u new connection established from: %s %s", src_host->host_id, src_link->link_id, src_link->status.dst_ipaddr, src_link->status.dst_port); } } /* * transport has already accepted the connection here * otherwise we would not be receiving packets */ transport_link_dyn_connect(knet_h, sockfd, src_link); } } switch (inbuf->kh_type) { case KNET_HEADER_TYPE_HOST_INFO: case KNET_HEADER_TYPE_DATA: /* * TODO: should we accept data even if we can't reply to the other node? * how would that work with SCTP and guaranteed delivery? */ if (!src_host->status.reachable) { log_debug(knet_h, KNET_SUB_RX, "Source host %u not reachable yet", src_host->host_id); //return; } inbuf->khp_data_seq_num = ntohs(inbuf->khp_data_seq_num); channel = inbuf->khp_data_channel; src_host->got_data = 1; if (src_link) { src_link->status.stats.rx_data_packets++; src_link->status.stats.rx_data_bytes += len; } if (!_seq_num_lookup(src_host, inbuf->khp_data_seq_num, 0, 0)) { if (src_host->link_handler_policy != KNET_LINK_POLICY_ACTIVE) { log_debug(knet_h, KNET_SUB_RX, "Packet has already been delivered"); } return; } if (inbuf->khp_data_frag_num > 1) { /* * len as received from the socket also includes extra stuff * that the defrag code doesn't care about. So strip it * here and readd only for repadding once we are done * defragging */ len = len - KNET_HEADER_DATA_SIZE; if (pckt_defrag(knet_h, inbuf, &len)) { return; } len = len + KNET_HEADER_DATA_SIZE; } if (inbuf->khp_data_compress) { ssize_t decmp_outlen = KNET_DATABUFSIZE_COMPRESS; struct timespec start_time; struct timespec end_time; uint64_t compress_time; clock_gettime(CLOCK_MONOTONIC, &start_time); err = decompress(knet_h, inbuf->khp_data_compress, (const unsigned char *)inbuf->khp_data_userdata, len - KNET_HEADER_DATA_SIZE, knet_h->recv_from_links_buf_decompress, &decmp_outlen); if (!err) { /* Collect stats */ clock_gettime(CLOCK_MONOTONIC, &end_time); timespec_diff(start_time, end_time, &compress_time); if (compress_time < knet_h->stats.rx_compress_time_min) { knet_h->stats.rx_compress_time_min = compress_time; } if (compress_time > knet_h->stats.rx_compress_time_max) { knet_h->stats.rx_compress_time_max = compress_time; } knet_h->stats.rx_compress_time_ave = (knet_h->stats.rx_compress_time_ave * knet_h->stats.rx_compressed_packets + compress_time) / (knet_h->stats.rx_compressed_packets+1); knet_h->stats.rx_compressed_packets++; knet_h->stats.rx_compressed_original_bytes += decmp_outlen; knet_h->stats.rx_compressed_size_bytes += len - KNET_HEADER_SIZE; memmove(inbuf->khp_data_userdata, knet_h->recv_from_links_buf_decompress, decmp_outlen); len = decmp_outlen + KNET_HEADER_DATA_SIZE; } else { log_warn(knet_h, KNET_SUB_COMPRESS, "Unable to decompress packet (%d): %s", err, strerror(errno)); return; } } if (inbuf->kh_type == KNET_HEADER_TYPE_DATA) { if (knet_h->enabled != 1) /* data forward is disabled */ break; /* Only update the crypto overhead for data packets. Mainly to be consistent with TX */ knet_h->stats.rx_crypt_time_ave = (knet_h->stats.rx_crypt_time_ave * knet_h->stats.rx_crypt_packets + crypt_time) / (knet_h->stats.rx_crypt_packets+1); knet_h->stats.rx_crypt_packets++; if (knet_h->dst_host_filter_fn) { size_t host_idx; int found = 0; bcast = knet_h->dst_host_filter_fn( knet_h->dst_host_filter_fn_private_data, (const unsigned char *)inbuf->khp_data_userdata, len - KNET_HEADER_DATA_SIZE, KNET_NOTIFY_RX, knet_h->host_id, inbuf->kh_node, &channel, dst_host_ids, &dst_host_ids_entries); if (bcast < 0) { log_debug(knet_h, KNET_SUB_RX, "Error from dst_host_filter_fn: %d", bcast); return; } if ((!bcast) && (!dst_host_ids_entries)) { log_debug(knet_h, KNET_SUB_RX, "Message is unicast but no dst_host_ids_entries"); return; } /* check if we are dst for this packet */ if (!bcast) { if (dst_host_ids_entries > KNET_MAX_HOST) { log_debug(knet_h, KNET_SUB_RX, "dst_host_filter_fn returned too many destinations"); return; } for (host_idx = 0; host_idx < dst_host_ids_entries; host_idx++) { if (dst_host_ids[host_idx] == knet_h->host_id) { found = 1; break; } } if (!found) { log_debug(knet_h, KNET_SUB_RX, "Packet is not for us"); return; } } } } if (inbuf->kh_type == KNET_HEADER_TYPE_DATA) { if (!knet_h->sockfd[channel].in_use) { log_debug(knet_h, KNET_SUB_RX, "received packet for channel %d but there is no local sock connected", channel); return; } memset(iov_out, 0, sizeof(iov_out)); iov_out[0].iov_base = (void *) inbuf->khp_data_userdata; iov_out[0].iov_len = len - KNET_HEADER_DATA_SIZE; outlen = writev(knet_h->sockfd[channel].sockfd[knet_h->sockfd[channel].is_created], iov_out, 1); if (outlen <= 0) { knet_h->sock_notify_fn(knet_h->sock_notify_fn_private_data, knet_h->sockfd[channel].sockfd[0], channel, KNET_NOTIFY_RX, outlen, errno); return; } if ((size_t)outlen == iov_out[0].iov_len) { _seq_num_set(src_host, inbuf->khp_data_seq_num, 0); } } else { /* HOSTINFO */ knet_hostinfo = (struct knet_hostinfo *)inbuf->khp_data_userdata; if (knet_hostinfo->khi_bcast == KNET_HOSTINFO_UCAST) { knet_hostinfo->khi_dst_node_id = ntohs(knet_hostinfo->khi_dst_node_id); } if (!_seq_num_lookup(src_host, inbuf->khp_data_seq_num, 0, 0)) { return; } _seq_num_set(src_host, inbuf->khp_data_seq_num, 0); switch(knet_hostinfo->khi_type) { case KNET_HOSTINFO_TYPE_LINK_UP_DOWN: break; case KNET_HOSTINFO_TYPE_LINK_TABLE: break; default: log_warn(knet_h, KNET_SUB_RX, "Receiving unknown host info message from host %u", src_host->host_id); break; } } break; case KNET_HEADER_TYPE_PING: outlen = KNET_HEADER_PING_SIZE; inbuf->kh_type = KNET_HEADER_TYPE_PONG; inbuf->kh_node = htons(knet_h->host_id); recv_seq_num = ntohs(inbuf->khp_ping_seq_num); src_link->status.stats.rx_ping_packets++; src_link->status.stats.rx_ping_bytes += len; wipe_bufs = 0; if (!inbuf->khp_ping_timed) { /* * we might be receiving this message from all links, but we want * to process it only the first time */ if (recv_seq_num != src_host->untimed_rx_seq_num) { /* * cache the untimed seq num */ src_host->untimed_rx_seq_num = recv_seq_num; /* * if the host has received data in between * untimed ping, then we don't need to wipe the bufs */ if (src_host->got_data) { src_host->got_data = 0; wipe_bufs = 0; } else { wipe_bufs = 1; } } _seq_num_lookup(src_host, recv_seq_num, 0, wipe_bufs); } else { /* * pings always arrives in bursts over all the link * catch the first of them to cache the seq num and * avoid duplicate processing */ if (recv_seq_num != src_host->timed_rx_seq_num) { src_host->timed_rx_seq_num = recv_seq_num; if (recv_seq_num == 0) { _seq_num_lookup(src_host, recv_seq_num, 0, 1); } } } if (knet_h->crypto_instance) { if (crypto_encrypt_and_sign(knet_h, (const unsigned char *)inbuf, outlen, knet_h->recv_from_links_buf_crypt, &outlen) < 0) { log_debug(knet_h, KNET_SUB_RX, "Unable to encrypt pong packet"); break; } outbuf = knet_h->recv_from_links_buf_crypt; knet_h->stats_extra.tx_crypt_pong_packets++; } retry_pong: if (transport_get_connection_oriented(knet_h, src_link->transport) == TRANSPORT_PROTO_NOT_CONNECTION_ORIENTED) { len = sendto(src_link->outsock, outbuf, outlen, MSG_DONTWAIT | MSG_NOSIGNAL, (struct sockaddr *) &src_link->dst_addr, sizeof(struct sockaddr_storage)); } else { len = sendto(src_link->outsock, outbuf, outlen, MSG_DONTWAIT | MSG_NOSIGNAL, NULL, 0); } savederrno = errno; if (len != outlen) { err = transport_tx_sock_error(knet_h, src_link->transport, src_link->outsock, len, savederrno); switch(err) { case -1: /* unrecoverable error */ log_debug(knet_h, KNET_SUB_RX, "Unable to send pong reply (sock: %d) packet (sendto): %d %s. recorded src ip: %s src port: %s dst ip: %s dst port: %s", src_link->outsock, errno, strerror(errno), src_link->status.src_ipaddr, src_link->status.src_port, src_link->status.dst_ipaddr, src_link->status.dst_port); src_link->status.stats.tx_pong_errors++; break; case 0: /* ignore error and continue */ break; case 1: /* retry to send those same data */ src_link->status.stats.tx_pong_retries++; goto retry_pong; break; } } src_link->status.stats.tx_pong_packets++; src_link->status.stats.tx_pong_bytes += outlen; break; case KNET_HEADER_TYPE_PONG: src_link->status.stats.rx_pong_packets++; src_link->status.stats.rx_pong_bytes += len; clock_gettime(CLOCK_MONOTONIC, &src_link->status.pong_last); memmove(&recvtime, &inbuf->khp_ping_time[0], sizeof(struct timespec)); timespec_diff(recvtime, src_link->status.pong_last, &latency_last); - src_link->status.latency = - ((src_link->status.latency * src_link->latency_exp) + - ((latency_last / 1000llu) * - (src_link->latency_fix - src_link->latency_exp))) / - src_link->latency_fix; - - if (src_link->status.latency < src_link->pong_timeout_adj) { - if (!src_link->status.connected) { - if (src_link->received_pong >= src_link->pong_count) { - log_info(knet_h, KNET_SUB_RX, "host: %u link: %u is up", - src_host->host_id, src_link->link_id); - _link_updown(knet_h, src_host->host_id, src_link->link_id, src_link->status.enabled, 1); - } else { - src_link->received_pong++; - log_debug(knet_h, KNET_SUB_RX, "host: %u link: %u received pong: %u", - src_host->host_id, src_link->link_id, src_link->received_pong); + if ((latency_last / 1000llu) > src_link->pong_timeout) { + log_debug(knet_h, KNET_SUB_RX, + "Incoming pong packet from host: %u link: %u has higher latency than pong_timeout. Discarding", + src_host->host_id, src_link->link_id); + } else { + src_link->status.latency = + ((src_link->status.latency * src_link->latency_exp) + + ((latency_last / 1000llu) * + (src_link->latency_fix - src_link->latency_exp))) / + src_link->latency_fix; + + if (src_link->status.latency < src_link->pong_timeout_adj) { + if (!src_link->status.connected) { + if (src_link->received_pong >= src_link->pong_count) { + log_info(knet_h, KNET_SUB_RX, "host: %u link: %u is up", + src_host->host_id, src_link->link_id); + _link_updown(knet_h, src_host->host_id, src_link->link_id, src_link->status.enabled, 1); + } else { + src_link->received_pong++; + log_debug(knet_h, KNET_SUB_RX, "host: %u link: %u received pong: %u", + src_host->host_id, src_link->link_id, src_link->received_pong); + } } } + /* Calculate latency stats */ + if (src_link->status.latency > src_link->status.stats.latency_max) { + src_link->status.stats.latency_max = src_link->status.latency; + } + if (src_link->status.latency < src_link->status.stats.latency_min) { + src_link->status.stats.latency_min = src_link->status.latency; + } + src_link->status.stats.latency_ave = + (src_link->status.stats.latency_ave * src_link->status.stats.latency_samples + + src_link->status.latency) / (src_link->status.stats.latency_samples+1); + src_link->status.stats.latency_samples++; } - /* Calculate latency stats */ - if (src_link->status.latency > src_link->status.stats.latency_max) { - src_link->status.stats.latency_max = src_link->status.latency; - } - if (src_link->status.latency < src_link->status.stats.latency_min) { - src_link->status.stats.latency_min = src_link->status.latency; - } - src_link->status.stats.latency_ave = - (src_link->status.stats.latency_ave * src_link->status.stats.latency_samples + - src_link->status.latency) / (src_link->status.stats.latency_samples+1); - src_link->status.stats.latency_samples++; - break; case KNET_HEADER_TYPE_PMTUD: src_link->status.stats.rx_pmtu_packets++; src_link->status.stats.rx_pmtu_bytes += len; outlen = KNET_HEADER_PMTUD_SIZE; inbuf->kh_type = KNET_HEADER_TYPE_PMTUD_REPLY; inbuf->kh_node = htons(knet_h->host_id); if (knet_h->crypto_instance) { if (crypto_encrypt_and_sign(knet_h, (const unsigned char *)inbuf, outlen, knet_h->recv_from_links_buf_crypt, &outlen) < 0) { log_debug(knet_h, KNET_SUB_RX, "Unable to encrypt PMTUd reply packet"); break; } outbuf = knet_h->recv_from_links_buf_crypt; knet_h->stats_extra.tx_crypt_pmtu_reply_packets++; } savederrno = pthread_mutex_lock(&knet_h->tx_mutex); if (savederrno) { log_err(knet_h, KNET_SUB_RX, "Unable to get TX mutex lock: %s", strerror(savederrno)); goto out_pmtud; } retry_pmtud: if (transport_get_connection_oriented(knet_h, src_link->transport) == TRANSPORT_PROTO_NOT_CONNECTION_ORIENTED) { len = sendto(src_link->outsock, outbuf, outlen, MSG_DONTWAIT | MSG_NOSIGNAL, (struct sockaddr *) &src_link->dst_addr, sizeof(struct sockaddr_storage)); } else { len = sendto(src_link->outsock, outbuf, outlen, MSG_DONTWAIT | MSG_NOSIGNAL, NULL, 0); } savederrno = errno; if (len != outlen) { err = transport_tx_sock_error(knet_h, src_link->transport, src_link->outsock, len, savederrno); switch(err) { case -1: /* unrecoverable error */ log_debug(knet_h, KNET_SUB_RX, "Unable to send PMTUd reply (sock: %d) packet (sendto): %d %s. recorded src ip: %s src port: %s dst ip: %s dst port: %s", src_link->outsock, errno, strerror(errno), src_link->status.src_ipaddr, src_link->status.src_port, src_link->status.dst_ipaddr, src_link->status.dst_port); src_link->status.stats.tx_pmtu_errors++; break; case 0: /* ignore error and continue */ src_link->status.stats.tx_pmtu_errors++; break; case 1: /* retry to send those same data */ src_link->status.stats.tx_pmtu_retries++; goto retry_pmtud; break; } } pthread_mutex_unlock(&knet_h->tx_mutex); out_pmtud: break; case KNET_HEADER_TYPE_PMTUD_REPLY: src_link->status.stats.rx_pmtu_packets++; src_link->status.stats.rx_pmtu_bytes += len; if (pthread_mutex_lock(&knet_h->pmtud_mutex) != 0) { log_debug(knet_h, KNET_SUB_RX, "Unable to get mutex lock"); break; } src_link->last_recv_mtu = inbuf->khp_pmtud_size; pthread_cond_signal(&knet_h->pmtud_cond); pthread_mutex_unlock(&knet_h->pmtud_mutex); break; default: return; } } static void _handle_recv_from_links(knet_handle_t knet_h, int sockfd, struct knet_mmsghdr *msg) { int err, savederrno; int i, msg_recv, transport; if (pthread_rwlock_rdlock(&knet_h->global_rwlock) != 0) { log_debug(knet_h, KNET_SUB_RX, "Unable to get global read lock"); return; } if (_is_valid_fd(knet_h, sockfd) < 1) { /* * this is normal if a fd got an event and before we grab the read lock * and the link is removed by another thread */ goto exit_unlock; } transport = knet_h->knet_transport_fd_tracker[sockfd].transport; /* * reset msg_namelen to buffer size because after recvmmsg * each msg_namelen will contain sizeof sockaddr_in or sockaddr_in6 */ for (i = 0; i < PCKT_RX_BUFS; i++) { msg[i].msg_hdr.msg_namelen = sizeof(struct sockaddr_storage); } msg_recv = _recvmmsg(sockfd, &msg[0], PCKT_RX_BUFS, MSG_DONTWAIT | MSG_NOSIGNAL); savederrno = errno; /* * WARNING: man page for recvmmsg is wrong. Kernel implementation here: * recvmmsg can return: * -1 on error * 0 if the previous run of recvmmsg recorded an error on the socket * N number of messages (see exception below). * * If there is an error from recvmsg after receiving a frame or more, the recvmmsg * loop is interrupted, error recorded in the socket (getsockopt(SO_ERROR) and * it will be visibile in the next run. * * Need to be careful how we handle errors at this stage. * * error messages need to be handled on a per transport/protocol base * at this point we have different layers of error handling * - msg_recv < 0 -> error from this run * msg_recv = 0 -> error from previous run and error on socket needs to be cleared * - per-transport message data * example: msg[i].msg_hdr.msg_flags & MSG_NOTIFICATION or msg_len for SCTP == EOF, * but for UDP it is perfectly legal to receive a 0 bytes message.. go figure * - NOTE: on SCTP MSG_NOTIFICATION we get msg_recv == PCKT_FRAG_MAX messages and no * errno set. That means the error api needs to be able to abort the loop below. */ if (msg_recv <= 0) { transport_rx_sock_error(knet_h, transport, sockfd, msg_recv, savederrno); goto exit_unlock; } for (i = 0; i < msg_recv; i++) { err = transport_rx_is_data(knet_h, transport, sockfd, &msg[i]); /* * TODO: make this section silent once we are confident * all protocols packet handlers are good */ switch(err) { case -1: /* on error */ log_debug(knet_h, KNET_SUB_RX, "Transport reported error parsing packet"); goto exit_unlock; break; case 0: /* packet is not data and we should continue the packet process loop */ log_debug(knet_h, KNET_SUB_RX, "Transport reported no data, continue"); break; case 1: /* packet is not data and we should STOP the packet process loop */ log_debug(knet_h, KNET_SUB_RX, "Transport reported no data, stop"); goto exit_unlock; break; case 2: /* packet is data and should be parsed as such */ /* * processing incoming packets vs access lists */ if ((knet_h->use_access_lists) && (transport_get_acl_type(knet_h, transport) == USE_GENERIC_ACL)) { if (!check_validate(knet_h, sockfd, transport, msg[i].msg_hdr.msg_name)) { char src_ipaddr[KNET_MAX_HOST_LEN]; char src_port[KNET_MAX_PORT_LEN]; memset(src_ipaddr, 0, KNET_MAX_HOST_LEN); memset(src_port, 0, KNET_MAX_PORT_LEN); - knet_addrtostr(msg[i].msg_hdr.msg_name, sockaddr_len(msg[i].msg_hdr.msg_name), - src_ipaddr, KNET_MAX_HOST_LEN, - src_port, KNET_MAX_PORT_LEN); + if (knet_addrtostr(msg[i].msg_hdr.msg_name, sockaddr_len(msg[i].msg_hdr.msg_name), + src_ipaddr, KNET_MAX_HOST_LEN, + src_port, KNET_MAX_PORT_LEN) < 0) { - log_debug(knet_h, KNET_SUB_RX, "Packet rejected from %s/%s", src_ipaddr, src_port); + log_debug(knet_h, KNET_SUB_RX, "Packet rejected: unable to resolve host/port"); + } else { + log_debug(knet_h, KNET_SUB_RX, "Packet rejected from %s/%s", src_ipaddr, src_port); + } /* * continue processing the other packets */ continue; } } _parse_recv_from_links(knet_h, sockfd, &msg[i]); break; } } exit_unlock: pthread_rwlock_unlock(&knet_h->global_rwlock); } void *_handle_recv_from_links_thread(void *data) { int i, nev; knet_handle_t knet_h = (knet_handle_t) data; struct epoll_event events[KNET_EPOLL_MAX_EVENTS]; struct sockaddr_storage address[PCKT_RX_BUFS]; struct knet_mmsghdr msg[PCKT_RX_BUFS]; struct iovec iov_in[PCKT_RX_BUFS]; set_thread_status(knet_h, KNET_THREAD_RX, KNET_THREAD_STARTED); memset(&msg, 0, sizeof(msg)); for (i = 0; i < PCKT_RX_BUFS; i++) { iov_in[i].iov_base = (void *)knet_h->recv_from_links_buf[i]; iov_in[i].iov_len = KNET_DATABUFSIZE; memset(&msg[i].msg_hdr, 0, sizeof(struct msghdr)); msg[i].msg_hdr.msg_name = &address[i]; msg[i].msg_hdr.msg_namelen = sizeof(struct sockaddr_storage); msg[i].msg_hdr.msg_iov = &iov_in[i]; msg[i].msg_hdr.msg_iovlen = 1; } while (!shutdown_in_progress(knet_h)) { nev = epoll_wait(knet_h->recv_from_links_epollfd, events, KNET_EPOLL_MAX_EVENTS, KNET_THREADS_TIMERES / 1000); /* * the RX threads only need to notify that there has been at least * one successful run after queue flush has been requested. * See setfwd in handle.c */ if (get_thread_flush_queue(knet_h, KNET_THREAD_RX) == KNET_THREAD_QUEUE_FLUSH) { set_thread_flush_queue(knet_h, KNET_THREAD_RX, KNET_THREAD_QUEUE_FLUSHED); } /* * we use timeout to detect if thread is shutting down */ if (nev == 0) { continue; } for (i = 0; i < nev; i++) { _handle_recv_from_links(knet_h, events[i].data.fd, msg); } } set_thread_status(knet_h, KNET_THREAD_RX, KNET_THREAD_STOPPED); return NULL; } diff --git a/libknet/threads_tx.c b/libknet/threads_tx.c index 3969b109..1d954d6e 100644 --- a/libknet/threads_tx.c +++ b/libknet/threads_tx.c @@ -1,783 +1,788 @@ /* * Copyright (C) 2012-2019 Red Hat, Inc. All rights reserved. * * Authors: Fabio M. Di Nitto * Federico Simoncelli * * This software licensed under LGPL-2.0+ */ #include "config.h" #include #include #include #include #include #include #include "compat.h" #include "compress.h" #include "crypto.h" #include "host.h" #include "link.h" #include "logging.h" #include "transports.h" #include "transport_common.h" #include "threads_common.h" #include "threads_heartbeat.h" #include "threads_tx.h" #include "netutils.h" /* * SEND */ static int _dispatch_to_links(knet_handle_t knet_h, struct knet_host *dst_host, struct knet_mmsghdr *msg, int msgs_to_send) { int link_idx, msg_idx, sent_msgs, prev_sent, progress; int err = 0, savederrno = 0; unsigned int i; struct knet_mmsghdr *cur; struct knet_link *cur_link; for (link_idx = 0; link_idx < dst_host->active_link_entries; link_idx++) { prev_sent = 0; progress = 1; cur_link = &dst_host->link[dst_host->active_links[link_idx]]; if (cur_link->transport == KNET_TRANSPORT_LOOPBACK) { continue; } msg_idx = 0; while (msg_idx < msgs_to_send) { msg[msg_idx].msg_hdr.msg_name = &cur_link->dst_addr; /* Cast for Linux/BSD compatibility */ for (i=0; i<(unsigned int)msg[msg_idx].msg_hdr.msg_iovlen; i++) { cur_link->status.stats.tx_data_bytes += msg[msg_idx].msg_hdr.msg_iov[i].iov_len; } cur_link->status.stats.tx_data_packets++; msg_idx++; } retry: cur = &msg[prev_sent]; sent_msgs = _sendmmsg(dst_host->link[dst_host->active_links[link_idx]].outsock, transport_get_connection_oriented(knet_h, dst_host->link[dst_host->active_links[link_idx]].transport), &cur[0], msgs_to_send - prev_sent, MSG_DONTWAIT | MSG_NOSIGNAL); savederrno = errno; err = transport_tx_sock_error(knet_h, dst_host->link[dst_host->active_links[link_idx]].transport, dst_host->link[dst_host->active_links[link_idx]].outsock, sent_msgs, savederrno); switch(err) { case -1: /* unrecoverable error */ cur_link->status.stats.tx_data_errors++; goto out_unlock; break; case 0: /* ignore error and continue */ break; case 1: /* retry to send those same data */ cur_link->status.stats.tx_data_retries++; goto retry; break; } prev_sent = prev_sent + sent_msgs; if ((sent_msgs >= 0) && (prev_sent < msgs_to_send)) { if ((sent_msgs) || (progress)) { if (sent_msgs) { progress = 1; } else { progress = 0; } #ifdef DEBUG log_debug(knet_h, KNET_SUB_TX, "Unable to send all (%d/%d) data packets to host %s (%u) link %s:%s (%u)", sent_msgs, msg_idx, dst_host->name, dst_host->host_id, dst_host->link[dst_host->active_links[link_idx]].status.dst_ipaddr, dst_host->link[dst_host->active_links[link_idx]].status.dst_port, dst_host->link[dst_host->active_links[link_idx]].link_id); #endif goto retry; } if (!progress) { savederrno = EAGAIN; err = -1; goto out_unlock; } } if ((dst_host->link_handler_policy == KNET_LINK_POLICY_RR) && (dst_host->active_link_entries > 1)) { uint8_t cur_link_id = dst_host->active_links[0]; memmove(&dst_host->active_links[0], &dst_host->active_links[1], KNET_MAX_LINK - 1); dst_host->active_links[dst_host->active_link_entries - 1] = cur_link_id; break; } } out_unlock: errno = savederrno; return err; } static int _parse_recv_from_sock(knet_handle_t knet_h, size_t inlen, int8_t channel, int is_sync) { size_t outlen, frag_len; struct knet_host *dst_host; knet_node_id_t dst_host_ids_temp[KNET_MAX_HOST]; size_t dst_host_ids_entries_temp = 0; knet_node_id_t dst_host_ids[KNET_MAX_HOST]; size_t dst_host_ids_entries = 0; int bcast = 1; struct knet_hostinfo *knet_hostinfo; struct iovec iov_out[PCKT_FRAG_MAX][2]; int iovcnt_out = 2; uint8_t frag_idx; unsigned int temp_data_mtu; size_t host_idx; int send_mcast = 0; struct knet_header *inbuf; int savederrno = 0; int err = 0; seq_num_t tx_seq_num; struct knet_mmsghdr msg[PCKT_FRAG_MAX]; int msgs_to_send, msg_idx; unsigned int i; int j; int send_local = 0; int data_compressed = 0; size_t uncrypted_frag_size; inbuf = knet_h->recv_from_sock_buf; if ((knet_h->enabled != 1) && (inbuf->kh_type != KNET_HEADER_TYPE_HOST_INFO)) { /* data forward is disabled */ log_debug(knet_h, KNET_SUB_TX, "Received data packet but forwarding is disabled"); savederrno = ECANCELED; err = -1; goto out_unlock; } /* * move this into a separate function to expand on * extra switching rules */ switch(inbuf->kh_type) { case KNET_HEADER_TYPE_DATA: if (knet_h->dst_host_filter_fn) { bcast = knet_h->dst_host_filter_fn( knet_h->dst_host_filter_fn_private_data, (const unsigned char *)inbuf->khp_data_userdata, inlen, KNET_NOTIFY_TX, knet_h->host_id, knet_h->host_id, &channel, dst_host_ids_temp, &dst_host_ids_entries_temp); if (bcast < 0) { log_debug(knet_h, KNET_SUB_TX, "Error from dst_host_filter_fn: %d", bcast); savederrno = EFAULT; err = -1; goto out_unlock; } if ((!bcast) && (!dst_host_ids_entries_temp)) { log_debug(knet_h, KNET_SUB_TX, "Message is unicast but no dst_host_ids_entries"); savederrno = EINVAL; err = -1; goto out_unlock; } if ((!bcast) && (dst_host_ids_entries_temp > KNET_MAX_HOST)) { log_debug(knet_h, KNET_SUB_TX, "dst_host_filter_fn returned too many destinations"); savederrno = EINVAL; err = -1; goto out_unlock; } } /* Send to localhost if appropriate and enabled */ if (knet_h->has_loop_link) { send_local = 0; if (bcast) { send_local = 1; } else { for (i=0; i< dst_host_ids_entries_temp; i++) { if (dst_host_ids_temp[i] == knet_h->host_id) { send_local = 1; } } } if (send_local) { const unsigned char *buf = inbuf->khp_data_userdata; ssize_t buflen = inlen; struct knet_link *local_link; local_link = knet_h->host_index[knet_h->host_id]->link; local_retry: err = write(knet_h->sockfd[channel].sockfd[knet_h->sockfd[channel].is_created], buf, buflen); if (err < 0) { log_err(knet_h, KNET_SUB_TRANSP_LOOPBACK, "send local failed. error=%s\n", strerror(errno)); local_link->status.stats.tx_data_errors++; } if (err > 0 && err < buflen) { log_debug(knet_h, KNET_SUB_TRANSP_LOOPBACK, "send local incomplete=%d bytes of %zu\n", err, inlen); local_link->status.stats.tx_data_retries++; buf += err; buflen -= err; - usleep(KNET_THREADS_TIMERES / 16); goto local_retry; } if (err == buflen) { local_link->status.stats.tx_data_packets++; local_link->status.stats.tx_data_bytes += inlen; } } } break; case KNET_HEADER_TYPE_HOST_INFO: knet_hostinfo = (struct knet_hostinfo *)inbuf->khp_data_userdata; if (knet_hostinfo->khi_bcast == KNET_HOSTINFO_UCAST) { bcast = 0; dst_host_ids_temp[0] = knet_hostinfo->khi_dst_node_id; dst_host_ids_entries_temp = 1; knet_hostinfo->khi_dst_node_id = htons(knet_hostinfo->khi_dst_node_id); } break; default: log_warn(knet_h, KNET_SUB_TX, "Receiving unknown messages from socket"); savederrno = ENOMSG; err = -1; goto out_unlock; break; } if (is_sync) { if ((bcast) || ((!bcast) && (dst_host_ids_entries_temp > 1))) { log_debug(knet_h, KNET_SUB_TX, "knet_send_sync is only supported with unicast packets for one destination"); savederrno = E2BIG; err = -1; goto out_unlock; } } /* * check destinations hosts before spending time * in fragmenting/encrypting packets to save * time processing data for unreachable hosts. * for unicast, also remap the destination data * to skip unreachable hosts. */ if (!bcast) { dst_host_ids_entries = 0; for (host_idx = 0; host_idx < dst_host_ids_entries_temp; host_idx++) { dst_host = knet_h->host_index[dst_host_ids_temp[host_idx]]; if (!dst_host) { continue; } if (!(dst_host->host_id == knet_h->host_id && knet_h->has_loop_link) && dst_host->status.reachable) { dst_host_ids[dst_host_ids_entries] = dst_host_ids_temp[host_idx]; dst_host_ids_entries++; } } if (!dst_host_ids_entries) { savederrno = EHOSTDOWN; err = -1; goto out_unlock; } } else { send_mcast = 0; for (dst_host = knet_h->host_head; dst_host != NULL; dst_host = dst_host->next) { if (!(dst_host->host_id == knet_h->host_id && knet_h->has_loop_link) && dst_host->status.reachable) { send_mcast = 1; break; } } if (!send_mcast) { savederrno = EHOSTDOWN; err = -1; goto out_unlock; } } if (!knet_h->data_mtu) { /* * using MIN_MTU_V4 for data mtu is not completely accurate but safe enough */ log_debug(knet_h, KNET_SUB_TX, "Received data packet but data MTU is still unknown." " Packet might not be delivered." " Assuming minimum IPv4 MTU (%d)", KNET_PMTUD_MIN_MTU_V4); temp_data_mtu = KNET_PMTUD_MIN_MTU_V4; } else { /* * take a copy of the mtu to avoid value changing under * our feet while we are sending a fragmented pckt */ temp_data_mtu = knet_h->data_mtu; } /* * compress data */ if ((knet_h->compress_model > 0) && (inlen > knet_h->compress_threshold)) { size_t cmp_outlen = KNET_DATABUFSIZE_COMPRESS; struct timespec start_time; struct timespec end_time; uint64_t compress_time; clock_gettime(CLOCK_MONOTONIC, &start_time); err = compress(knet_h, (const unsigned char *)inbuf->khp_data_userdata, inlen, knet_h->send_to_links_buf_compress, (ssize_t *)&cmp_outlen); if (err < 0) { log_warn(knet_h, KNET_SUB_COMPRESS, "Compression failed (%d): %s", err, strerror(errno)); } else { /* Collect stats */ clock_gettime(CLOCK_MONOTONIC, &end_time); timespec_diff(start_time, end_time, &compress_time); if (compress_time < knet_h->stats.tx_compress_time_min) { knet_h->stats.tx_compress_time_min = compress_time; } if (compress_time > knet_h->stats.tx_compress_time_max) { knet_h->stats.tx_compress_time_max = compress_time; } knet_h->stats.tx_compress_time_ave = (unsigned long long)(knet_h->stats.tx_compress_time_ave * knet_h->stats.tx_compressed_packets + compress_time) / (knet_h->stats.tx_compressed_packets+1); knet_h->stats.tx_compressed_packets++; knet_h->stats.tx_compressed_original_bytes += inlen; knet_h->stats.tx_compressed_size_bytes += cmp_outlen; if (cmp_outlen < inlen) { memmove(inbuf->khp_data_userdata, knet_h->send_to_links_buf_compress, cmp_outlen); inlen = cmp_outlen; data_compressed = 1; } } } if (knet_h->compress_model > 0 && !data_compressed) { knet_h->stats.tx_uncompressed_packets++; } /* * prepare the outgoing buffers */ frag_len = inlen; frag_idx = 0; inbuf->khp_data_bcast = bcast; inbuf->khp_data_frag_num = ceil((float)inlen / temp_data_mtu); inbuf->khp_data_channel = channel; if (data_compressed) { inbuf->khp_data_compress = knet_h->compress_model; } else { inbuf->khp_data_compress = 0; } if (pthread_mutex_lock(&knet_h->tx_seq_num_mutex)) { log_debug(knet_h, KNET_SUB_TX, "Unable to get seq mutex lock"); goto out_unlock; } knet_h->tx_seq_num++; /* * force seq_num 0 to detect a node that has crashed and rejoining * the knet instance. seq_num 0 will clear the buffers in the RX * thread */ if (knet_h->tx_seq_num == 0) { knet_h->tx_seq_num++; } /* * cache the value in locked context */ tx_seq_num = knet_h->tx_seq_num; inbuf->khp_data_seq_num = htons(knet_h->tx_seq_num); pthread_mutex_unlock(&knet_h->tx_seq_num_mutex); /* * forcefully broadcast a ping to all nodes every SEQ_MAX / 8 * pckts. * this solves 2 problems: * 1) on TX socket overloads we generate extra pings to keep links alive * 2) in 3+ nodes setup, where all the traffic is flowing between node 1 and 2, * node 3+ will be able to keep in sync on the TX seq_num even without * receiving traffic or pings in betweens. This avoids issues with * rollover of the circular buffer */ if (tx_seq_num % (SEQ_MAX / 8) == 0) { _send_pings(knet_h, 0); } if (inbuf->khp_data_frag_num > 1) { while (frag_idx < inbuf->khp_data_frag_num) { /* * set the iov_base */ iov_out[frag_idx][0].iov_base = (void *)knet_h->send_to_links_buf[frag_idx]; iov_out[frag_idx][0].iov_len = KNET_HEADER_DATA_SIZE; iov_out[frag_idx][1].iov_base = inbuf->khp_data_userdata + (temp_data_mtu * frag_idx); /* * set the len */ if (frag_len > temp_data_mtu) { iov_out[frag_idx][1].iov_len = temp_data_mtu; } else { iov_out[frag_idx][1].iov_len = frag_len; } /* * copy the frag info on all buffers */ knet_h->send_to_links_buf[frag_idx]->kh_type = inbuf->kh_type; knet_h->send_to_links_buf[frag_idx]->khp_data_seq_num = inbuf->khp_data_seq_num; knet_h->send_to_links_buf[frag_idx]->khp_data_frag_num = inbuf->khp_data_frag_num; knet_h->send_to_links_buf[frag_idx]->khp_data_bcast = inbuf->khp_data_bcast; knet_h->send_to_links_buf[frag_idx]->khp_data_channel = inbuf->khp_data_channel; knet_h->send_to_links_buf[frag_idx]->khp_data_compress = inbuf->khp_data_compress; frag_len = frag_len - temp_data_mtu; frag_idx++; } iovcnt_out = 2; } else { iov_out[frag_idx][0].iov_base = (void *)inbuf; iov_out[frag_idx][0].iov_len = frag_len + KNET_HEADER_DATA_SIZE; iovcnt_out = 1; } if (knet_h->crypto_instance) { struct timespec start_time; struct timespec end_time; uint64_t crypt_time; frag_idx = 0; while (frag_idx < inbuf->khp_data_frag_num) { clock_gettime(CLOCK_MONOTONIC, &start_time); if (crypto_encrypt_and_signv( knet_h, iov_out[frag_idx], iovcnt_out, knet_h->send_to_links_buf_crypt[frag_idx], (ssize_t *)&outlen) < 0) { log_debug(knet_h, KNET_SUB_TX, "Unable to encrypt packet"); savederrno = ECHILD; err = -1; goto out_unlock; } clock_gettime(CLOCK_MONOTONIC, &end_time); timespec_diff(start_time, end_time, &crypt_time); if (crypt_time < knet_h->stats.tx_crypt_time_min) { knet_h->stats.tx_crypt_time_min = crypt_time; } if (crypt_time > knet_h->stats.tx_crypt_time_max) { knet_h->stats.tx_crypt_time_max = crypt_time; } knet_h->stats.tx_crypt_time_ave = (knet_h->stats.tx_crypt_time_ave * knet_h->stats.tx_crypt_packets + crypt_time) / (knet_h->stats.tx_crypt_packets+1); uncrypted_frag_size = 0; for (j=0; j < iovcnt_out; j++) { uncrypted_frag_size += iov_out[frag_idx][j].iov_len; } knet_h->stats.tx_crypt_byte_overhead += (outlen - uncrypted_frag_size); knet_h->stats.tx_crypt_packets++; iov_out[frag_idx][0].iov_base = knet_h->send_to_links_buf_crypt[frag_idx]; iov_out[frag_idx][0].iov_len = outlen; frag_idx++; } iovcnt_out = 1; } memset(&msg, 0, sizeof(msg)); msgs_to_send = inbuf->khp_data_frag_num; msg_idx = 0; while (msg_idx < msgs_to_send) { msg[msg_idx].msg_hdr.msg_namelen = sizeof(struct sockaddr_storage); msg[msg_idx].msg_hdr.msg_iov = &iov_out[msg_idx][0]; msg[msg_idx].msg_hdr.msg_iovlen = iovcnt_out; msg_idx++; } if (!bcast) { for (host_idx = 0; host_idx < dst_host_ids_entries; host_idx++) { dst_host = knet_h->host_index[dst_host_ids[host_idx]]; err = _dispatch_to_links(knet_h, dst_host, &msg[0], msgs_to_send); savederrno = errno; if (err) { goto out_unlock; } } } else { for (dst_host = knet_h->host_head; dst_host != NULL; dst_host = dst_host->next) { if (dst_host->status.reachable) { err = _dispatch_to_links(knet_h, dst_host, &msg[0], msgs_to_send); savederrno = errno; if (err) { goto out_unlock; } } } } out_unlock: errno = savederrno; return err; } int knet_send_sync(knet_handle_t knet_h, const char *buff, const size_t buff_len, const int8_t channel) { int savederrno = 0, err = 0; if (!knet_h) { errno = EINVAL; return -1; } if (buff == NULL) { errno = EINVAL; return -1; } if (buff_len <= 0) { errno = EINVAL; return -1; } if (buff_len > KNET_MAX_PACKET_SIZE) { errno = EINVAL; return -1; } if (channel < 0) { errno = EINVAL; return -1; } if (channel >= KNET_DATAFD_MAX) { errno = EINVAL; return -1; } savederrno = pthread_rwlock_rdlock(&knet_h->global_rwlock); if (savederrno) { log_err(knet_h, KNET_SUB_TX, "Unable to get read lock: %s", strerror(savederrno)); errno = savederrno; return -1; } if (!knet_h->sockfd[channel].in_use) { savederrno = EINVAL; err = -1; goto out; } savederrno = pthread_mutex_lock(&knet_h->tx_mutex); if (savederrno) { log_err(knet_h, KNET_SUB_TX, "Unable to get TX mutex lock: %s", strerror(savederrno)); err = -1; goto out; } knet_h->recv_from_sock_buf->kh_type = KNET_HEADER_TYPE_DATA; memmove(knet_h->recv_from_sock_buf->khp_data_userdata, buff, buff_len); err = _parse_recv_from_sock(knet_h, buff_len, channel, 1); savederrno = errno; pthread_mutex_unlock(&knet_h->tx_mutex); out: pthread_rwlock_unlock(&knet_h->global_rwlock); errno = err ? savederrno : 0; return err; } static void _handle_send_to_links(knet_handle_t knet_h, struct msghdr *msg, int sockfd, int8_t channel, int type) { ssize_t inlen = 0; int savederrno = 0, docallback = 0; if ((channel >= 0) && (channel < KNET_DATAFD_MAX) && (!knet_h->sockfd[channel].is_socket)) { inlen = readv(sockfd, msg->msg_iov, 1); } else { inlen = recvmsg(sockfd, msg, MSG_DONTWAIT | MSG_NOSIGNAL); } if (inlen == 0) { savederrno = 0; docallback = 1; } else if (inlen < 0) { struct epoll_event ev; savederrno = errno; docallback = 1; memset(&ev, 0, sizeof(struct epoll_event)); - if (epoll_ctl(knet_h->send_to_links_epollfd, - EPOLL_CTL_DEL, knet_h->sockfd[channel].sockfd[knet_h->sockfd[channel].is_created], &ev)) { - log_err(knet_h, KNET_SUB_TX, "Unable to del datafd %d from linkfd epoll pool: %s", - knet_h->sockfd[channel].sockfd[0], strerror(savederrno)); - } else { - knet_h->sockfd[channel].has_error = 1; + if (channel != KNET_INTERNAL_DATA_CHANNEL) { + if (epoll_ctl(knet_h->send_to_links_epollfd, + EPOLL_CTL_DEL, knet_h->sockfd[channel].sockfd[knet_h->sockfd[channel].is_created], &ev)) { + log_err(knet_h, KNET_SUB_TX, "Unable to del datafd %d from linkfd epoll pool: %s", + knet_h->sockfd[channel].sockfd[0], strerror(savederrno)); + } else { + knet_h->sockfd[channel].has_error = 1; + } } + /* + * TODO: add error handling for KNET_INTERNAL_DATA_CHANNEL + * once we add support for internal knet communication + */ } else { knet_h->recv_from_sock_buf->kh_type = type; _parse_recv_from_sock(knet_h, inlen, channel, 0); } - if (docallback) { + if ((docallback) && (channel != KNET_INTERNAL_DATA_CHANNEL)) { knet_h->sock_notify_fn(knet_h->sock_notify_fn_private_data, knet_h->sockfd[channel].sockfd[0], channel, KNET_NOTIFY_TX, inlen, savederrno); } } void *_handle_send_to_links_thread(void *data) { knet_handle_t knet_h = (knet_handle_t) data; struct epoll_event events[KNET_EPOLL_MAX_EVENTS]; int i, nev, type; int flush, flush_queue_limit; int8_t channel; struct iovec iov_in; struct msghdr msg; struct sockaddr_storage address; set_thread_status(knet_h, KNET_THREAD_TX, KNET_THREAD_STARTED); memset(&iov_in, 0, sizeof(iov_in)); iov_in.iov_base = (void *)knet_h->recv_from_sock_buf->khp_data_userdata; iov_in.iov_len = KNET_MAX_PACKET_SIZE; memset(&msg, 0, sizeof(struct msghdr)); msg.msg_name = &address; msg.msg_namelen = sizeof(struct sockaddr_storage); msg.msg_iov = &iov_in; msg.msg_iovlen = 1; knet_h->recv_from_sock_buf->kh_version = KNET_HEADER_VERSION; knet_h->recv_from_sock_buf->khp_data_frag_seq = 0; knet_h->recv_from_sock_buf->kh_node = htons(knet_h->host_id); for (i = 0; i < PCKT_FRAG_MAX; i++) { knet_h->send_to_links_buf[i]->kh_version = KNET_HEADER_VERSION; knet_h->send_to_links_buf[i]->khp_data_frag_seq = i + 1; knet_h->send_to_links_buf[i]->kh_node = htons(knet_h->host_id); } flush_queue_limit = 0; while (!shutdown_in_progress(knet_h)) { nev = epoll_wait(knet_h->send_to_links_epollfd, events, KNET_EPOLL_MAX_EVENTS + 1, KNET_THREADS_TIMERES / 1000); flush = get_thread_flush_queue(knet_h, KNET_THREAD_TX); /* * we use timeout to detect if thread is shutting down */ if (nev == 0) { /* * ideally we want to communicate that we are done flushing * the queue when we have an epoll timeout event */ if (flush == KNET_THREAD_QUEUE_FLUSH) { set_thread_flush_queue(knet_h, KNET_THREAD_TX, KNET_THREAD_QUEUE_FLUSHED); flush_queue_limit = 0; } continue; } /* * fall back in case the TX sockets will continue receive traffic * and we do not hit an epoll timeout. * * allow up to a 100 loops to flush queues, then we give up. * there might be more clean ways to do it by checking the buffer queue * on each socket, but we have tons of sockets and calculations can go wrong. * Also, why would you disable data forwarding and still send packets? */ if (flush == KNET_THREAD_QUEUE_FLUSH) { if (flush_queue_limit >= 100) { log_debug(knet_h, KNET_SUB_TX, "Timeout flushing the TX queue, expect packet loss"); set_thread_flush_queue(knet_h, KNET_THREAD_TX, KNET_THREAD_QUEUE_FLUSHED); flush_queue_limit = 0; } else { flush_queue_limit++; } } else { flush_queue_limit = 0; } if (pthread_rwlock_rdlock(&knet_h->global_rwlock) != 0) { log_debug(knet_h, KNET_SUB_TX, "Unable to get read lock"); continue; } for (i = 0; i < nev; i++) { if (events[i].data.fd == knet_h->hostsockfd[0]) { type = KNET_HEADER_TYPE_HOST_INFO; - channel = -1; + channel = KNET_INTERNAL_DATA_CHANNEL; } else { type = KNET_HEADER_TYPE_DATA; for (channel = 0; channel < KNET_DATAFD_MAX; channel++) { if ((knet_h->sockfd[channel].in_use) && (knet_h->sockfd[channel].sockfd[knet_h->sockfd[channel].is_created] == events[i].data.fd)) { break; } } if (channel >= KNET_DATAFD_MAX) { log_debug(knet_h, KNET_SUB_TX, "No available channels"); continue; /* channel not found */ } } if (pthread_mutex_lock(&knet_h->tx_mutex) != 0) { log_debug(knet_h, KNET_SUB_TX, "Unable to get mutex lock"); continue; } _handle_send_to_links(knet_h, &msg, events[i].data.fd, channel, type); pthread_mutex_unlock(&knet_h->tx_mutex); } pthread_rwlock_unlock(&knet_h->global_rwlock); } set_thread_status(knet_h, KNET_THREAD_TX, KNET_THREAD_STOPPED); return NULL; } diff --git a/libknet/transport_sctp.c b/libknet/transport_sctp.c index d97d6f98..505c1f29 100644 --- a/libknet/transport_sctp.c +++ b/libknet/transport_sctp.c @@ -1,1547 +1,1584 @@ /* * Copyright (C) 2016-2019 Red Hat, Inc. All rights reserved. * * Author: Christine Caulfield * * This software licensed under LGPL-2.0+ */ #include "config.h" #include #include #include #include #include #include #include +#include #include "compat.h" #include "host.h" #include "links.h" #include "links_acl.h" #include "links_acl_ip.h" #include "logging.h" #include "common.h" #include "transport_common.h" #include "threads_common.h" #ifdef HAVE_NETINET_SCTP_H #include #include "transport_sctp.h" typedef struct sctp_handle_info { struct knet_list_head listen_links_list; struct knet_list_head connect_links_list; int connect_epollfd; int connectsockfd[2]; int listen_epollfd; int listensockfd[2]; pthread_t connect_thread; pthread_t listen_thread; socklen_t event_subscribe_kernel_size; char *event_subscribe_buffer; } sctp_handle_info_t; /* * use by fd_tracker data type */ #define SCTP_NO_LINK_INFO 0 #define SCTP_LISTENER_LINK_INFO 1 #define SCTP_ACCEPTED_LINK_INFO 2 #define SCTP_CONNECT_LINK_INFO 3 /* * this value is per listener */ #define MAX_ACCEPTED_SOCKS 256 typedef struct sctp_listen_link_info { struct knet_list_head list; int listen_sock; int accepted_socks[MAX_ACCEPTED_SOCKS]; struct sockaddr_storage src_address; int on_listener_epoll; int on_rx_epoll; } sctp_listen_link_info_t; typedef struct sctp_accepted_link_info { char mread_buf[KNET_DATABUFSIZE]; ssize_t mread_len; sctp_listen_link_info_t *link_info; } sctp_accepted_link_info_t ; typedef struct sctp_connect_link_info { struct knet_list_head list; sctp_listen_link_info_t *listener; struct knet_link *link; struct sockaddr_storage dst_address; int connect_sock; int on_connected_epoll; int on_rx_epoll; int close_sock; } sctp_connect_link_info_t; /* * socket handling functions * * those functions do NOT perform locking. locking * should be handled in the right context from callers */ /* * sockets are removed from rx_epoll from callers * see also error handling functions */ static int _close_connect_socket(knet_handle_t knet_h, struct knet_link *kn_link) { int err = 0, savederrno = 0; sctp_connect_link_info_t *info = kn_link->transport_link; sctp_handle_info_t *handle_info = knet_h->transports[KNET_TRANSPORT_SCTP]; struct epoll_event ev; if (info->on_connected_epoll) { memset(&ev, 0, sizeof(struct epoll_event)); ev.events = EPOLLOUT; ev.data.fd = info->connect_sock; if (epoll_ctl(handle_info->connect_epollfd, EPOLL_CTL_DEL, info->connect_sock, &ev)) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to remove connected socket from the epoll pool: %s", strerror(errno)); goto exit_error; } info->on_connected_epoll = 0; } exit_error: if (info->connect_sock != -1) { if (_set_fd_tracker(knet_h, info->connect_sock, KNET_MAX_TRANSPORTS, SCTP_NO_LINK_INFO, NULL) < 0) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to set fd tracker: %s", strerror(savederrno)); goto exit_error; } close(info->connect_sock); info->connect_sock = -1; } errno = savederrno; return err; } static int _enable_sctp_notifications(knet_handle_t knet_h, int sock, const char *type) { int err = 0, savederrno = 0; sctp_handle_info_t *handle_info = knet_h->transports[KNET_TRANSPORT_SCTP]; if (setsockopt(sock, IPPROTO_SCTP, SCTP_EVENTS, handle_info->event_subscribe_buffer, handle_info->event_subscribe_kernel_size) < 0) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to enable %s events: %s", type, strerror(savederrno)); } errno = savederrno; return err; } static int _configure_sctp_socket(knet_handle_t knet_h, int sock, struct sockaddr_storage *address, uint64_t flags, const char *type) { int err = 0, savederrno = 0; int value; int level; #ifdef SOL_SCTP level = SOL_SCTP; #else level = IPPROTO_SCTP; #endif if (_configure_transport_socket(knet_h, sock, address, flags, type) < 0) { savederrno = errno; err = -1; goto exit_error; } value = 1; if (setsockopt(sock, level, SCTP_NODELAY, &value, sizeof(value)) < 0) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSPORT, "Unable to set sctp nodelay: %s", strerror(savederrno)); goto exit_error; } if (_enable_sctp_notifications(knet_h, sock, type) < 0) { savederrno = errno; err = -1; } exit_error: errno = savederrno; return err; } static int _reconnect_socket(knet_handle_t knet_h, struct knet_link *kn_link) { int err = 0, savederrno = 0; sctp_connect_link_info_t *info = kn_link->transport_link; sctp_handle_info_t *handle_info = knet_h->transports[KNET_TRANSPORT_SCTP]; struct epoll_event ev; if (connect(info->connect_sock, (struct sockaddr *)&kn_link->dst_addr, sockaddr_len(&kn_link->dst_addr)) < 0) { if ((errno != EALREADY) && (errno != EINPROGRESS) && (errno != EISCONN)) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to connect SCTP socket %d: %s", info->connect_sock, strerror(savederrno)); goto exit_error; } } if (!info->on_connected_epoll) { memset(&ev, 0, sizeof(struct epoll_event)); ev.events = EPOLLOUT; ev.data.fd = info->connect_sock; if (epoll_ctl(handle_info->connect_epollfd, EPOLL_CTL_ADD, info->connect_sock, &ev)) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to add send/recv to epoll pool: %s", strerror(savederrno)); goto exit_error; } info->on_connected_epoll = 1; } exit_error: errno = savederrno; return err; } static int _create_connect_socket(knet_handle_t knet_h, struct knet_link *kn_link) { int err = 0, savederrno = 0; sctp_connect_link_info_t *info = kn_link->transport_link; sctp_handle_info_t *handle_info = knet_h->transports[KNET_TRANSPORT_SCTP]; struct epoll_event ev; int connect_sock; connect_sock = socket(kn_link->dst_addr.ss_family, SOCK_STREAM, IPPROTO_SCTP); if (connect_sock < 0) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to create send/recv socket: %s", strerror(savederrno)); goto exit_error; } if (_configure_sctp_socket(knet_h, connect_sock, &kn_link->dst_addr, kn_link->flags, "SCTP connect") < 0) { savederrno = errno; err = -1; goto exit_error; } if (_set_fd_tracker(knet_h, connect_sock, KNET_TRANSPORT_SCTP, SCTP_CONNECT_LINK_INFO, info) < 0) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to set fd tracker: %s", strerror(savederrno)); goto exit_error; } info->connect_sock = connect_sock; info->close_sock = 0; if (_reconnect_socket(knet_h, kn_link) < 0) { savederrno = errno; err = -1; goto exit_error; } exit_error: if (err) { if (info->on_connected_epoll) { epoll_ctl(handle_info->connect_epollfd, EPOLL_CTL_DEL, connect_sock, &ev); } if (connect_sock >= 0) { close(connect_sock); } } errno = savederrno; return err; } +static void _lock_sleep_relock(knet_handle_t knet_h) +{ + int i = 0; + + /* Don't hold onto the lock while sleeping */ + pthread_rwlock_unlock(&knet_h->global_rwlock); + + while (i < 5) { + usleep(KNET_THREADS_TIMERES / 16); + if (!pthread_rwlock_rdlock(&knet_h->global_rwlock)) { + /* + * lock acquired, we can go out + */ + return; + } else { + log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to get read lock!"); + i++; + } + } + /* + * time to crash! if we cannot re-acquire the lock + * there is no easy way out of this one + */ + assert(0); +} + int sctp_transport_tx_sock_error(knet_handle_t knet_h, int sockfd, int recv_err, int recv_errno) { sctp_connect_link_info_t *connect_info = knet_h->knet_transport_fd_tracker[sockfd].data; sctp_accepted_link_info_t *accepted_info = knet_h->knet_transport_fd_tracker[sockfd].data; sctp_listen_link_info_t *listen_info; if (recv_err < 0) { switch (knet_h->knet_transport_fd_tracker[sockfd].data_type) { case SCTP_CONNECT_LINK_INFO: if (connect_info->link->transport_connected == 0) { return -1; } break; case SCTP_ACCEPTED_LINK_INFO: listen_info = accepted_info->link_info; if (listen_info->listen_sock != sockfd) { if (listen_info->on_rx_epoll == 0) { return -1; } } break; } if (recv_errno == EAGAIN) { #ifdef DEBUG log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Sock: %d is overloaded. Slowing TX down", sockfd); #endif - /* Don't hold onto the lock while sleeping */ - pthread_rwlock_unlock(&knet_h->global_rwlock); - usleep(KNET_THREADS_TIMERES / 16); - pthread_rwlock_rdlock(&knet_h->global_rwlock); + _lock_sleep_relock(knet_h); return 1; } return -1; } return 0; } /* * socket error management functions * * both called with global read lock. * * NOTE: we need to remove the fd from the epoll as soon as possible * even before we notify the respective thread to take care of it * because scheduling can make it so that this thread will overload * and the threads supposed to take care of the error will never * be able to take action. * we CANNOT handle FDs here diretly (close/reconnect/etc) due * to locking context. We need to delegate that to their respective * management threads within global write lock. * * this function is called from: * - RX thread with recv_err <= 0 directly on recvmmsg error * - transport_rx_is_data when msg_len == 0 (recv_err = 1) * - transport_rx_is_data on notification (recv_err = 2) * * basically this small abouse of recv_err is to detect notifications * generated by sockets created by listen(). */ int sctp_transport_rx_sock_error(knet_handle_t knet_h, int sockfd, int recv_err, int recv_errno) { struct epoll_event ev; sctp_connect_link_info_t *connect_info = knet_h->knet_transport_fd_tracker[sockfd].data; sctp_accepted_link_info_t *accepted_info = knet_h->knet_transport_fd_tracker[sockfd].data; sctp_listen_link_info_t *listen_info; sctp_handle_info_t *handle_info = knet_h->transports[KNET_TRANSPORT_SCTP]; switch (knet_h->knet_transport_fd_tracker[sockfd].data_type) { case SCTP_CONNECT_LINK_INFO: /* * all connect link have notifications enabled * and we accept only data from notification and * generic recvmmsg errors. * * Errors generated by msg_len 0 can be ignored because * they follow a notification (double notification) */ if (recv_err != 1) { connect_info->link->transport_connected = 0; if (connect_info->on_rx_epoll) { memset(&ev, 0, sizeof(struct epoll_event)); ev.events = EPOLLIN; ev.data.fd = sockfd; if (epoll_ctl(knet_h->recv_from_links_epollfd, EPOLL_CTL_DEL, sockfd, &ev)) { log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to remove EOFed socket from epoll pool: %s", strerror(errno)); return -1; } connect_info->on_rx_epoll = 0; } log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Notifying connect thread that sockfd %d received an error", sockfd); if (sendto(handle_info->connectsockfd[1], &sockfd, sizeof(int), MSG_DONTWAIT | MSG_NOSIGNAL, NULL, 0) != sizeof(int)) { log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to notify connect thread: %s", strerror(errno)); } } break; case SCTP_ACCEPTED_LINK_INFO: listen_info = accepted_info->link_info; if (listen_info->listen_sock != sockfd) { if (recv_err != 1) { if (listen_info->on_rx_epoll) { memset(&ev, 0, sizeof(struct epoll_event)); ev.events = EPOLLIN; ev.data.fd = sockfd; if (epoll_ctl(knet_h->recv_from_links_epollfd, EPOLL_CTL_DEL, sockfd, &ev)) { log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to remove EOFed socket from epoll pool: %s", strerror(errno)); return -1; } listen_info->on_rx_epoll = 0; } log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Notifying listen thread that sockfd %d received an error", sockfd); if (sendto(handle_info->listensockfd[1], &sockfd, sizeof(int), MSG_DONTWAIT | MSG_NOSIGNAL, NULL, 0) != sizeof(int)) { log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to notify listen thread: %s", strerror(errno)); } } } else { /* * this means the listen() socket has generated * a notification. now what? :-) */ log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Received stray notification for listen() socket %d", sockfd); } break; default: log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Received unknown notification? %d", sockfd); break; } /* * Under RX pressure we need to give time to IPC to pick up the message */ - /* Don't hold onto the lock while sleeping */ - pthread_rwlock_unlock(&knet_h->global_rwlock); - usleep(KNET_THREADS_TIMERES / 2); - pthread_rwlock_rdlock(&knet_h->global_rwlock); + _lock_sleep_relock(knet_h); return 0; } /* * NOTE: sctp_transport_rx_is_data is called with global rdlock * delegate any FD error management to sctp_transport_rx_sock_error * and keep this code to parsing incoming data only */ int sctp_transport_rx_is_data(knet_handle_t knet_h, int sockfd, struct knet_mmsghdr *msg) { size_t i; struct iovec *iov = msg->msg_hdr.msg_iov; size_t iovlen = msg->msg_hdr.msg_iovlen; struct sctp_assoc_change *sac; union sctp_notification *snp; sctp_accepted_link_info_t *info = knet_h->knet_transport_fd_tracker[sockfd].data; if (!(msg->msg_hdr.msg_flags & MSG_NOTIFICATION)) { if (msg->msg_len == 0) { log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "received 0 bytes len packet: %d", sockfd); /* * NOTE: with event notification enabled, we receive error twice: * 1) from the event notification * 2) followed by a 0 byte msg_len * * This is generally not a problem if not for causing extra * handling for the same issue. Should we drop notifications * and keep the code generic (handle all errors via msg_len = 0) * or keep the duplication as safety measure, or drop msg_len = 0 * handling (what about sockets without events enabled?) */ sctp_transport_rx_sock_error(knet_h, sockfd, 1, 0); return 1; } /* * missing MSG_EOR has to be treated as a short read * from the socket and we need to fill in the mread buf * while we wait for MSG_EOR */ if (!(msg->msg_hdr.msg_flags & MSG_EOR)) { /* * copy the incoming data into mread_buf + mread_len (incremental) * and increase mread_len */ memmove(info->mread_buf + info->mread_len, iov->iov_base, msg->msg_len); info->mread_len = info->mread_len + msg->msg_len; return 0; } /* * got EOR. * if mread_len is > 0 we are completing a packet from short reads * complete reassembling the packet in mread_buf, copy it back in the iov * and set the iov/msg len numbers (size) correctly */ if (info->mread_len) { /* * add last fragment to mread_buf */ memmove(info->mread_buf + info->mread_len, iov->iov_base, msg->msg_len); info->mread_len = info->mread_len + msg->msg_len; /* * move all back into the iovec */ memmove(iov->iov_base, info->mread_buf, info->mread_len); msg->msg_len = info->mread_len; info->mread_len = 0; } return 2; } if (!(msg->msg_hdr.msg_flags & MSG_EOR)) { return 1; } for (i=0; i< iovlen; i++) { snp = iov[i].iov_base; switch (snp->sn_header.sn_type) { case SCTP_ASSOC_CHANGE: log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "[event] sctp assoc change"); sac = &snp->sn_assoc_change; if (sac->sac_state == SCTP_COMM_LOST) { log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "[event] sctp assoc change: comm_lost"); sctp_transport_rx_sock_error(knet_h, sockfd, 2, 0); } break; case SCTP_SHUTDOWN_EVENT: log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "[event] sctp shutdown event"); sctp_transport_rx_sock_error(knet_h, sockfd, 2, 0); break; case SCTP_SEND_FAILED: log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "[event] sctp send failed"); break; case SCTP_PEER_ADDR_CHANGE: log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "[event] sctp peer addr change"); break; case SCTP_REMOTE_ERROR: log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "[event] sctp remote error"); break; default: log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "[event] unknown sctp event type: %hu\n", snp->sn_header.sn_type); break; } } return 0; } /* * connect / outgoing socket management thread */ /* * _handle_connected_sctp* are called with a global write lock * from the connect_thread */ static void _handle_connected_sctp(knet_handle_t knet_h, int connect_sock) { int err; struct epoll_event ev; unsigned int status, len = sizeof(status); sctp_handle_info_t *handle_info = knet_h->transports[KNET_TRANSPORT_SCTP]; sctp_connect_link_info_t *info = knet_h->knet_transport_fd_tracker[connect_sock].data; struct knet_link *kn_link = info->link; err = getsockopt(connect_sock, SOL_SOCKET, SO_ERROR, &status, &len); if (err) { log_err(knet_h, KNET_SUB_TRANSP_SCTP, "SCTP getsockopt() on connecting socket %d failed: %s", connect_sock, strerror(errno)); return; } if (info->close_sock) { if (_close_connect_socket(knet_h, kn_link) < 0) { log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to close sock %d from _handle_connected_sctp: %s", connect_sock, strerror(errno)); return; } info->close_sock = 0; if (_create_connect_socket(knet_h, kn_link) < 0) { log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to recreate connecting sock! %s", strerror(errno)); return; } } if (status) { log_info(knet_h, KNET_SUB_TRANSP_SCTP, "SCTP connect on %d to %s port %s failed: %s", connect_sock, kn_link->status.dst_ipaddr, kn_link->status.dst_port, strerror(status)); /* * No need to create a new socket if connect failed, * just retry connect */ _reconnect_socket(knet_h, info->link); return; } /* * Connected - Remove us from the connect epoll */ memset(&ev, 0, sizeof(struct epoll_event)); ev.events = EPOLLOUT; ev.data.fd = connect_sock; if (epoll_ctl(handle_info->connect_epollfd, EPOLL_CTL_DEL, connect_sock, &ev)) { log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to remove connected socket %d from epoll pool: %s", connect_sock, strerror(errno)); } info->on_connected_epoll = 0; kn_link->transport_connected = 1; kn_link->outsock = info->connect_sock; memset(&ev, 0, sizeof(struct epoll_event)); ev.events = EPOLLIN; ev.data.fd = connect_sock; if (epoll_ctl(knet_h->recv_from_links_epollfd, EPOLL_CTL_ADD, connect_sock, &ev)) { log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to add connected socket to epoll pool: %s", strerror(errno)); } info->on_rx_epoll = 1; log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "SCTP handler fd %d now connected to %s port %s", connect_sock, kn_link->status.dst_ipaddr, kn_link->status.dst_port); } static void _handle_connected_sctp_errors(knet_handle_t knet_h) { int sockfd = -1; sctp_handle_info_t *handle_info = knet_h->transports[KNET_TRANSPORT_SCTP]; sctp_connect_link_info_t *info; if (recv(handle_info->connectsockfd[0], &sockfd, sizeof(int), MSG_DONTWAIT | MSG_NOSIGNAL) != sizeof(int)) { log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Short read on connectsockfd"); return; } if (_is_valid_fd(knet_h, sockfd) < 1) { log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Received stray notification for connected socket fd error"); return; } + /* + * revalidate sockfd + */ + if ((sockfd < 0) || (sockfd >= KNET_MAX_FDS)) { + return; + } + log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Processing connected error on socket: %d", sockfd); info = knet_h->knet_transport_fd_tracker[sockfd].data; info->close_sock = 1; info->link->transport_connected = 0; _reconnect_socket(knet_h, info->link); } static void *_sctp_connect_thread(void *data) { int savederrno; int i, nev; knet_handle_t knet_h = (knet_handle_t) data; sctp_handle_info_t *handle_info = knet_h->transports[KNET_TRANSPORT_SCTP]; struct epoll_event events[KNET_EPOLL_MAX_EVENTS]; set_thread_status(knet_h, KNET_THREAD_SCTP_CONN, KNET_THREAD_STARTED); while (!shutdown_in_progress(knet_h)) { nev = epoll_wait(handle_info->connect_epollfd, events, KNET_EPOLL_MAX_EVENTS, KNET_THREADS_TIMERES / 1000); /* * we use timeout to detect if thread is shutting down */ if (nev == 0) { continue; } if (nev < 0) { log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "SCTP connect handler EPOLL ERROR: %s", strerror(errno)); continue; } /* * Sort out which FD has a connection */ savederrno = get_global_wrlock(knet_h); if (savederrno) { log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to get write lock: %s", strerror(savederrno)); continue; } /* * minor optimization: deduplicate events * * in some cases we can receive multiple notifcations * of the same FD having issues or need handling. * It's enough to process it once even tho it's safe * to handle them multiple times. */ for (i = 0; i < nev; i++) { if (events[i].data.fd == handle_info->connectsockfd[0]) { log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Received notification from rx_error for connected socket"); _handle_connected_sctp_errors(knet_h); } else { if (_is_valid_fd(knet_h, events[i].data.fd) == 1) { _handle_connected_sctp(knet_h, events[i].data.fd); } else { log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Received stray notification for dead fd %d\n", events[i].data.fd); } } } pthread_rwlock_unlock(&knet_h->global_rwlock); /* * this thread can generate events for itself. * we need to sleep in between loops to allow other threads * to be scheduled */ usleep(knet_h->reconnect_int * 1000); } set_thread_status(knet_h, KNET_THREAD_SCTP_CONN, KNET_THREAD_STOPPED); return NULL; } /* * listen/incoming connections management thread */ /* * Listener received a new connection * called with a write lock from main thread */ static void _handle_incoming_sctp(knet_handle_t knet_h, int listen_sock) { int err = 0, savederrno = 0; int new_fd; int i = -1; sctp_listen_link_info_t *info = knet_h->knet_transport_fd_tracker[listen_sock].data; struct epoll_event ev; struct sockaddr_storage ss; socklen_t sock_len = sizeof(ss); char addr_str[KNET_MAX_HOST_LEN]; char port_str[KNET_MAX_PORT_LEN]; sctp_accepted_link_info_t *accept_info = NULL; new_fd = accept(listen_sock, (struct sockaddr *)&ss, &sock_len); if (new_fd < 0) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Incoming: accept error: %s", strerror(errno)); goto exit_error; } if (knet_addrtostr(&ss, sizeof(ss), addr_str, KNET_MAX_HOST_LEN, port_str, KNET_MAX_PORT_LEN) < 0) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Incoming: unable to gather socket info"); goto exit_error; } log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Incoming: received connection from: %s port: %s", addr_str, port_str); if (knet_h->use_access_lists) { if (!check_validate(knet_h, listen_sock, KNET_TRANSPORT_SCTP, &ss)) { savederrno = EINVAL; log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Connection rejected from %s/%s", addr_str, port_str); close(new_fd); errno = savederrno; return; } } /* * Keep a track of all accepted FDs */ for (i=0; iaccepted_socks[i] == -1) { info->accepted_socks[i] = new_fd; break; } } if (i == MAX_ACCEPTED_SOCKS) { errno = EBUSY; err = -1; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Incoming: too many connections!"); goto exit_error; } if (_configure_common_socket(knet_h, new_fd, 0, "SCTP incoming") < 0) { /* Inherit flags from listener? */ savederrno = errno; err = -1; goto exit_error; } if (_enable_sctp_notifications(knet_h, new_fd, "Incoming connection") < 0) { savederrno = errno; err = -1; goto exit_error; } accept_info = malloc(sizeof(sctp_accepted_link_info_t)); if (!accept_info) { savederrno = errno; err = -1; goto exit_error; } memset(accept_info, 0, sizeof(sctp_accepted_link_info_t)); accept_info->link_info = info; if (_set_fd_tracker(knet_h, new_fd, KNET_TRANSPORT_SCTP, SCTP_ACCEPTED_LINK_INFO, accept_info) < 0) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to set fd tracker: %s", strerror(errno)); goto exit_error; } memset(&ev, 0, sizeof(struct epoll_event)); ev.events = EPOLLIN; ev.data.fd = new_fd; if (epoll_ctl(knet_h->recv_from_links_epollfd, EPOLL_CTL_ADD, new_fd, &ev)) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Incoming: unable to add accepted socket %d to epoll pool: %s", new_fd, strerror(errno)); goto exit_error; } info->on_rx_epoll = 1; log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Incoming: accepted new fd %d for %s/%s (listen fd: %d). index: %d", new_fd, addr_str, port_str, info->listen_sock, i); exit_error: if (err) { - if ((i >= 0) || (i < MAX_ACCEPTED_SOCKS)) { + if ((i >= 0) && (i < MAX_ACCEPTED_SOCKS)) { info->accepted_socks[i] = -1; } _set_fd_tracker(knet_h, new_fd, KNET_MAX_TRANSPORTS, SCTP_NO_LINK_INFO, NULL); free(accept_info); - close(new_fd); + if (new_fd >= 0) { + close(new_fd); + } } errno = savederrno; return; } /* * Listen thread received a notification of a bad socket that needs closing * called with a write lock from main thread */ static void _handle_listen_sctp_errors(knet_handle_t knet_h) { int sockfd = -1; sctp_handle_info_t *handle_info = knet_h->transports[KNET_TRANSPORT_SCTP]; sctp_accepted_link_info_t *accept_info; sctp_listen_link_info_t *info; struct knet_host *host; int link_idx; int i; if (recv(handle_info->listensockfd[0], &sockfd, sizeof(int), MSG_DONTWAIT | MSG_NOSIGNAL) != sizeof(int)) { log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Short read on listensockfd"); return; } if (_is_valid_fd(knet_h, sockfd) < 1) { log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Received stray notification for listen socket fd error"); return; } + /* + * revalidate sockfd + */ + if ((sockfd < 0) || (sockfd >= KNET_MAX_FDS)) { + return; + } + log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Processing listen error on socket: %d", sockfd); accept_info = knet_h->knet_transport_fd_tracker[sockfd].data; info = accept_info->link_info; /* * clear all links using this accepted socket as * outbound dynamically connected socket */ for (host = knet_h->host_head; host != NULL; host = host->next) { for (link_idx = 0; link_idx < KNET_MAX_LINK; link_idx++) { if ((host->link[link_idx].dynamic == KNET_LINK_DYNIP) && (host->link[link_idx].outsock == sockfd)) { log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Found dynamic connection on host %d link %d (%d)", host->host_id, link_idx, sockfd); host->link[link_idx].status.dynconnected = 0; host->link[link_idx].transport_connected = 0; host->link[link_idx].outsock = 0; memset(&host->link[link_idx].dst_addr, 0, sizeof(struct sockaddr_storage)); } } } for (i=0; iaccepted_socks[i]) { log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Closing accepted socket %d", sockfd); _set_fd_tracker(knet_h, sockfd, KNET_MAX_TRANSPORTS, SCTP_NO_LINK_INFO, NULL); info->accepted_socks[i] = -1; free(accept_info); close(sockfd); break; /* Keeps covscan happy */ } } } static void *_sctp_listen_thread(void *data) { int savederrno; int i, nev; knet_handle_t knet_h = (knet_handle_t) data; sctp_handle_info_t *handle_info = knet_h->transports[KNET_TRANSPORT_SCTP]; struct epoll_event events[KNET_EPOLL_MAX_EVENTS]; set_thread_status(knet_h, KNET_THREAD_SCTP_LISTEN, KNET_THREAD_STARTED); while (!shutdown_in_progress(knet_h)) { nev = epoll_wait(handle_info->listen_epollfd, events, KNET_EPOLL_MAX_EVENTS, KNET_THREADS_TIMERES / 1000); /* * we use timeout to detect if thread is shutting down */ if (nev == 0) { continue; } if (nev < 0) { log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "SCTP listen handler EPOLL ERROR: %s", strerror(errno)); continue; } savederrno = get_global_wrlock(knet_h); if (savederrno) { log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to get write lock: %s", strerror(savederrno)); continue; } /* * Sort out which FD has an incoming connection */ for (i = 0; i < nev; i++) { if (events[i].data.fd == handle_info->listensockfd[0]) { log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Received notification from rx_error for listener/accepted socket"); _handle_listen_sctp_errors(knet_h); } else { if (_is_valid_fd(knet_h, events[i].data.fd) == 1) { _handle_incoming_sctp(knet_h, events[i].data.fd); } else { log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Received listen notification from invalid socket"); } } } pthread_rwlock_unlock(&knet_h->global_rwlock); } set_thread_status(knet_h, KNET_THREAD_SCTP_LISTEN, KNET_THREAD_STOPPED); return NULL; } /* * sctp_link_listener_start/stop are called in global write lock * context from set_config and clear_config. */ static sctp_listen_link_info_t *sctp_link_listener_start(knet_handle_t knet_h, struct knet_link *kn_link) { int err = 0, savederrno = 0; int listen_sock = -1; struct epoll_event ev; sctp_listen_link_info_t *info = NULL; sctp_handle_info_t *handle_info = knet_h->transports[KNET_TRANSPORT_SCTP]; /* * Only allocate a new listener if src address is different */ knet_list_for_each_entry(info, &handle_info->listen_links_list, list) { if (memcmp(&info->src_address, &kn_link->src_addr, sizeof(struct sockaddr_storage)) == 0) { if ((check_add(knet_h, info->listen_sock, KNET_TRANSPORT_SCTP, -1, &kn_link->dst_addr, &kn_link->dst_addr, CHECK_TYPE_ADDRESS, CHECK_ACCEPT) < 0) && (errno != EEXIST)) { return NULL; } return info; } } info = malloc(sizeof(sctp_listen_link_info_t)); if (!info) { err = -1; goto exit_error; } memset(info, 0, sizeof(sctp_listen_link_info_t)); memset(info->accepted_socks, -1, sizeof(info->accepted_socks)); memmove(&info->src_address, &kn_link->src_addr, sizeof(struct sockaddr_storage)); listen_sock = socket(kn_link->src_addr.ss_family, SOCK_STREAM, IPPROTO_SCTP); if (listen_sock < 0) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to create listener socket: %s", strerror(savederrno)); goto exit_error; } if (_configure_sctp_socket(knet_h, listen_sock, &kn_link->src_addr, kn_link->flags, "SCTP listener") < 0) { savederrno = errno; err = -1; goto exit_error; } if (bind(listen_sock, (struct sockaddr *)&kn_link->src_addr, sockaddr_len(&kn_link->src_addr)) < 0) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to bind listener socket: %s", strerror(savederrno)); goto exit_error; } if (listen(listen_sock, 5) < 0) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to listen on listener socket: %s", strerror(savederrno)); goto exit_error; } if (_set_fd_tracker(knet_h, listen_sock, KNET_TRANSPORT_SCTP, SCTP_LISTENER_LINK_INFO, info) < 0) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to set fd tracker: %s", strerror(savederrno)); goto exit_error; } if ((check_add(knet_h, listen_sock, KNET_TRANSPORT_SCTP, -1, &kn_link->dst_addr, &kn_link->dst_addr, CHECK_TYPE_ADDRESS, CHECK_ACCEPT) < 0) && (errno != EEXIST)) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to configure default access lists: %s", strerror(savederrno)); goto exit_error; } memset(&ev, 0, sizeof(struct epoll_event)); ev.events = EPOLLIN; ev.data.fd = listen_sock; if (epoll_ctl(handle_info->listen_epollfd, EPOLL_CTL_ADD, listen_sock, &ev)) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to add listener to epoll pool: %s", strerror(savederrno)); goto exit_error; } info->on_listener_epoll = 1; info->listen_sock = listen_sock; knet_list_add(&info->list, &handle_info->listen_links_list); log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Listening on fd %d for %s:%s", listen_sock, kn_link->status.src_ipaddr, kn_link->status.src_port); exit_error: if (err) { - if (info->on_listener_epoll) { + if ((info) && (info->on_listener_epoll)) { epoll_ctl(handle_info->listen_epollfd, EPOLL_CTL_DEL, listen_sock, &ev); } - check_rmall(knet_h, listen_sock, KNET_TRANSPORT_SCTP); if (listen_sock >= 0) { + check_rmall(knet_h, listen_sock, KNET_TRANSPORT_SCTP); close(listen_sock); } if (info) { free(info); info = NULL; } } errno = savederrno; return info; } static int sctp_link_listener_stop(knet_handle_t knet_h, struct knet_link *kn_link) { int err = 0, savederrno = 0; int found = 0, i; struct knet_host *host; int link_idx; sctp_handle_info_t *handle_info = knet_h->transports[KNET_TRANSPORT_SCTP]; sctp_connect_link_info_t *this_link_info = kn_link->transport_link; sctp_listen_link_info_t *info = this_link_info->listener; sctp_connect_link_info_t *link_info; struct epoll_event ev; for (host = knet_h->host_head; host != NULL; host = host->next) { for (link_idx = 0; link_idx < KNET_MAX_LINK; link_idx++) { if (&host->link[link_idx] == kn_link) continue; link_info = host->link[link_idx].transport_link; if ((link_info) && (link_info->listener == info)) { found = 1; break; } } } if ((check_rm(knet_h, info->listen_sock, KNET_TRANSPORT_SCTP, &kn_link->dst_addr, &kn_link->dst_addr, CHECK_TYPE_ADDRESS, CHECK_ACCEPT) < 0) && (errno != ENOENT)) { log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to remove default access lists for %d", info->listen_sock); } if (found) { this_link_info->listener = NULL; log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "SCTP listener socket %d still in use", info->listen_sock); savederrno = EBUSY; err = -1; goto exit_error; } if (info->on_listener_epoll) { memset(&ev, 0, sizeof(struct epoll_event)); ev.events = EPOLLIN; ev.data.fd = info->listen_sock; if (epoll_ctl(handle_info->listen_epollfd, EPOLL_CTL_DEL, info->listen_sock, &ev)) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to remove listener to epoll pool: %s", strerror(savederrno)); goto exit_error; } info->on_listener_epoll = 0; } if (_set_fd_tracker(knet_h, info->listen_sock, KNET_MAX_TRANSPORTS, SCTP_NO_LINK_INFO, NULL) < 0) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to set fd tracker: %s", strerror(savederrno)); goto exit_error; } check_rmall(knet_h, info->listen_sock, KNET_TRANSPORT_SCTP); close(info->listen_sock); for (i=0; i< MAX_ACCEPTED_SOCKS; i++) { if (info->accepted_socks[i] > -1) { memset(&ev, 0, sizeof(struct epoll_event)); ev.events = EPOLLIN; ev.data.fd = info->accepted_socks[i]; if (epoll_ctl(knet_h->recv_from_links_epollfd, EPOLL_CTL_DEL, info->accepted_socks[i], &ev)) { log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to remove EOFed socket from epoll pool: %s", strerror(errno)); } info->on_rx_epoll = 0; free(knet_h->knet_transport_fd_tracker[info->accepted_socks[i]].data); close(info->accepted_socks[i]); if (_set_fd_tracker(knet_h, info->accepted_socks[i], KNET_MAX_TRANSPORTS, SCTP_NO_LINK_INFO, NULL) < 0) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to set fd tracker: %s", strerror(savederrno)); goto exit_error; } info->accepted_socks[i] = -1; } } knet_list_del(&info->list); free(info); this_link_info->listener = NULL; exit_error: errno = savederrno; return err; } /* * Links config/clear. Both called with global wrlock from link_set_config/clear_config */ int sctp_transport_link_set_config(knet_handle_t knet_h, struct knet_link *kn_link) { int savederrno = 0, err = 0; sctp_connect_link_info_t *info; sctp_handle_info_t *handle_info = knet_h->transports[KNET_TRANSPORT_SCTP]; info = malloc(sizeof(sctp_connect_link_info_t)); if (!info) { goto exit_error; } memset(info, 0, sizeof(sctp_connect_link_info_t)); kn_link->transport_link = info; info->link = kn_link; memmove(&info->dst_address, &kn_link->dst_addr, sizeof(struct sockaddr_storage)); info->on_connected_epoll = 0; info->connect_sock = -1; info->listener = sctp_link_listener_start(knet_h, kn_link); if (!info->listener) { savederrno = errno; err = -1; goto exit_error; } if (kn_link->dynamic == KNET_LINK_STATIC) { if (_create_connect_socket(knet_h, kn_link) < 0) { savederrno = errno; err = -1; goto exit_error; } kn_link->outsock = info->connect_sock; } knet_list_add(&info->list, &handle_info->connect_links_list); exit_error: if (err) { if (info) { - if (info->connect_sock) { + if (info->connect_sock >= 0) { close(info->connect_sock); } if (info->listener) { sctp_link_listener_stop(knet_h, kn_link); } kn_link->transport_link = NULL; free(info); } } errno = savederrno; return err; } /* * called with global wrlock */ int sctp_transport_link_clear_config(knet_handle_t knet_h, struct knet_link *kn_link) { int err = 0, savederrno = 0; sctp_connect_link_info_t *info; struct epoll_event ev; if (!kn_link) { errno = EINVAL; return -1; } info = kn_link->transport_link; if (!info) { errno = EINVAL; return -1; } if ((sctp_link_listener_stop(knet_h, kn_link) <0) && (errno != EBUSY)) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to remove listener trasport: %s", strerror(savederrno)); goto exit_error; } if (info->on_rx_epoll) { memset(&ev, 0, sizeof(struct epoll_event)); ev.events = EPOLLIN; ev.data.fd = info->connect_sock; if (epoll_ctl(knet_h->recv_from_links_epollfd, EPOLL_CTL_DEL, info->connect_sock, &ev)) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to remove connected socket from epoll pool: %s", strerror(savederrno)); goto exit_error; } info->on_rx_epoll = 0; } if (_close_connect_socket(knet_h, kn_link) < 0) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to close connected socket: %s", strerror(savederrno)); goto exit_error; } knet_list_del(&info->list); free(info); kn_link->transport_link = NULL; exit_error: errno = savederrno; return err; } /* * transport_free and transport_init are * called only from knet_handle_new and knet_handle_free. * all resources (hosts/links) should have been already freed at this point * and they are called in a write locked context, hence they * don't need their own locking. */ int sctp_transport_free(knet_handle_t knet_h) { sctp_handle_info_t *handle_info; void *thread_status; struct epoll_event ev; if (!knet_h->transports[KNET_TRANSPORT_SCTP]) { errno = EINVAL; return -1; } handle_info = knet_h->transports[KNET_TRANSPORT_SCTP]; /* * keep it here while we debug list usage and such */ if (!knet_list_empty(&handle_info->listen_links_list)) { log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Internal error. listen links list is not empty"); } if (!knet_list_empty(&handle_info->connect_links_list)) { log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Internal error. connect links list is not empty"); } if (handle_info->listen_thread) { pthread_cancel(handle_info->listen_thread); pthread_join(handle_info->listen_thread, &thread_status); } if (handle_info->connect_thread) { pthread_cancel(handle_info->connect_thread); pthread_join(handle_info->connect_thread, &thread_status); } if (handle_info->listensockfd[0] >= 0) { memset(&ev, 0, sizeof(struct epoll_event)); ev.events = EPOLLIN; ev.data.fd = handle_info->listensockfd[0]; epoll_ctl(handle_info->listen_epollfd, EPOLL_CTL_DEL, handle_info->listensockfd[0], &ev); } if (handle_info->connectsockfd[0] >= 0) { memset(&ev, 0, sizeof(struct epoll_event)); ev.events = EPOLLIN; ev.data.fd = handle_info->connectsockfd[0]; epoll_ctl(handle_info->connect_epollfd, EPOLL_CTL_DEL, handle_info->connectsockfd[0], &ev); } _close_socketpair(knet_h, handle_info->connectsockfd); _close_socketpair(knet_h, handle_info->listensockfd); if (handle_info->listen_epollfd >= 0) { close(handle_info->listen_epollfd); } if (handle_info->connect_epollfd >= 0) { close(handle_info->connect_epollfd); } free(handle_info->event_subscribe_buffer); free(handle_info); knet_h->transports[KNET_TRANSPORT_SCTP] = NULL; return 0; } static int _sctp_subscribe_init(knet_handle_t knet_h) { int test_socket, savederrno; sctp_handle_info_t *handle_info = knet_h->transports[KNET_TRANSPORT_SCTP]; char dummy_events[100]; struct sctp_event_subscribe *events; /* Below we set the first 6 fields of this expanding struct. * SCTP_EVENTS is deprecated, but SCTP_EVENT is not available * on Linux; on the other hand, FreeBSD and old Linux does not * accept small transfers, so we can't simply use this minimum * everywhere. Thus we query and store the native size. */ const unsigned int subscribe_min = 6; test_socket = socket(PF_INET, SOCK_STREAM, IPPROTO_SCTP); if (test_socket < 0) { if (errno == EPROTONOSUPPORT) { log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "SCTP not supported, skipping initialization"); return 0; } savederrno = errno; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to create test socket: %s", strerror(savederrno)); return savederrno; } handle_info->event_subscribe_kernel_size = sizeof dummy_events; if (getsockopt(test_socket, IPPROTO_SCTP, SCTP_EVENTS, &dummy_events, &handle_info->event_subscribe_kernel_size)) { close(test_socket); savederrno = errno; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to query kernel size of struct sctp_event_subscribe: %s", strerror(savederrno)); return savederrno; } close(test_socket); if (handle_info->event_subscribe_kernel_size < subscribe_min) { savederrno = ERANGE; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "No kernel support for the necessary notifications: struct sctp_event_subscribe is %u bytes, %u needed", handle_info->event_subscribe_kernel_size, subscribe_min); return savederrno; } events = malloc(handle_info->event_subscribe_kernel_size); if (!events) { savederrno = errno; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Failed to allocate event subscribe buffer: %s", strerror(savederrno)); return savederrno; } memset(events, 0, handle_info->event_subscribe_kernel_size); events->sctp_data_io_event = 1; events->sctp_association_event = 1; events->sctp_address_event = 1; events->sctp_send_failure_event = 1; events->sctp_peer_error_event = 1; events->sctp_shutdown_event = 1; handle_info->event_subscribe_buffer = (char *)events; log_debug(knet_h, KNET_SUB_TRANSP_SCTP, "Size of struct sctp_event_subscribe is %u in kernel, %zu in user space", handle_info->event_subscribe_kernel_size, sizeof(struct sctp_event_subscribe)); return 0; } int sctp_transport_init(knet_handle_t knet_h) { int err = 0, savederrno = 0; sctp_handle_info_t *handle_info; struct epoll_event ev; if (knet_h->transports[KNET_TRANSPORT_SCTP]) { errno = EEXIST; return -1; } handle_info = malloc(sizeof(sctp_handle_info_t)); if (!handle_info) { return -1; } memset(handle_info, 0,sizeof(sctp_handle_info_t)); knet_h->transports[KNET_TRANSPORT_SCTP] = handle_info; savederrno = _sctp_subscribe_init(knet_h); if (savederrno) { err = -1; goto exit_fail; } knet_list_init(&handle_info->listen_links_list); knet_list_init(&handle_info->connect_links_list); handle_info->listen_epollfd = epoll_create(KNET_EPOLL_MAX_EVENTS + 1); if (handle_info->listen_epollfd < 0) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to create epoll listen fd: %s", strerror(savederrno)); goto exit_fail; } if (_fdset_cloexec(handle_info->listen_epollfd)) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to set CLOEXEC on listen_epollfd: %s", strerror(savederrno)); goto exit_fail; } handle_info->connect_epollfd = epoll_create(KNET_EPOLL_MAX_EVENTS + 1); if (handle_info->connect_epollfd < 0) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to create epoll connect fd: %s", strerror(savederrno)); goto exit_fail; } if (_fdset_cloexec(handle_info->connect_epollfd)) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to set CLOEXEC on connect_epollfd: %s", strerror(savederrno)); goto exit_fail; } if (_init_socketpair(knet_h, handle_info->connectsockfd) < 0) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to init connect socketpair: %s", strerror(savederrno)); goto exit_fail; } memset(&ev, 0, sizeof(struct epoll_event)); ev.events = EPOLLIN; ev.data.fd = handle_info->connectsockfd[0]; if (epoll_ctl(handle_info->connect_epollfd, EPOLL_CTL_ADD, handle_info->connectsockfd[0], &ev)) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to add connectsockfd[0] to connect epoll pool: %s", strerror(savederrno)); goto exit_fail; } if (_init_socketpair(knet_h, handle_info->listensockfd) < 0) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to init listen socketpair: %s", strerror(savederrno)); goto exit_fail; } memset(&ev, 0, sizeof(struct epoll_event)); ev.events = EPOLLIN; ev.data.fd = handle_info->listensockfd[0]; if (epoll_ctl(handle_info->listen_epollfd, EPOLL_CTL_ADD, handle_info->listensockfd[0], &ev)) { savederrno = errno; err = -1; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to add listensockfd[0] to listen epoll pool: %s", strerror(savederrno)); goto exit_fail; } /* * Start connect & listener threads */ set_thread_status(knet_h, KNET_THREAD_SCTP_LISTEN, KNET_THREAD_REGISTERED); savederrno = pthread_create(&handle_info->listen_thread, 0, _sctp_listen_thread, (void *) knet_h); if (savederrno) { err = -1; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to start sctp listen thread: %s", strerror(savederrno)); goto exit_fail; } set_thread_status(knet_h, KNET_THREAD_SCTP_CONN, KNET_THREAD_REGISTERED); savederrno = pthread_create(&handle_info->connect_thread, 0, _sctp_connect_thread, (void *) knet_h); if (savederrno) { err = -1; log_err(knet_h, KNET_SUB_TRANSP_SCTP, "Unable to start sctp connect thread: %s", strerror(savederrno)); goto exit_fail; } exit_fail: if (err < 0) { sctp_transport_free(knet_h); } errno = savederrno; return err; } int sctp_transport_link_dyn_connect(knet_handle_t knet_h, int sockfd, struct knet_link *kn_link) { kn_link->outsock = sockfd; kn_link->status.dynconnected = 1; kn_link->transport_connected = 1; return 0; } int sctp_transport_link_get_acl_fd(knet_handle_t knet_h, struct knet_link *kn_link) { sctp_connect_link_info_t *this_link_info = kn_link->transport_link; sctp_listen_link_info_t *info = this_link_info->listener; return info->listen_sock; } #endif diff --git a/libnozzle/internals.h b/libnozzle/internals.h index c9192a81..ab870ef7 100644 --- a/libnozzle/internals.h +++ b/libnozzle/internals.h @@ -1,69 +1,69 @@ /* * Copyright (C) 2017-2019 Red Hat, Inc. All rights reserved. * * Author: Fabio M. Di Nitto * * This software licensed under LGPL-2.0+ */ #ifndef __NOZZLE_INTERNALS_H__ #define __NOZZLE_INTERNALS_H__ #include "config.h" #ifdef KNET_LINUX #include #endif #include #include "libnozzle.h" struct nozzle_lib_config { struct nozzle_iface *head; int ioctlfd; #ifdef KNET_LINUX struct nl_sock *nlsock; #endif }; #define MACADDR_CHAR_MAX 18 /* * 11 = post-down.d * 1 = / */ #define UPDOWN_PATH_MAX PATH_MAX - 11 - 1 - IFNAMSIZ struct nozzle_iface { - char name[IFNAMSIZ]; /* interface name */ + char name[IFNAMSIZ - 1]; /* interface name */ int fd; /* interface fd */ int up; /* interface status 0 is down, 1 is up */ /* * extra data */ struct nozzle_ip *ip; /* configured ip addresses */ /* * default MAC address assigned by the kernel at creation time */ char default_mac[MACADDR_CHAR_MAX + 1]; int default_mtu; /* MTU assigned by the kernel at creation time */ int current_mtu; /* MTU configured by libnozzle user */ int hasupdown; /* interface has up/down path to scripts configured */ char updownpath[UPDOWN_PATH_MAX]; /* path to up/down scripts if configured */ struct nozzle_iface *next; }; #define ifname ifr.ifr_name int execute_bin_sh_command(const char *command, char **error_string); int find_ip(nozzle_t nozzle, const char *ipaddr, const char *prefix, struct nozzle_ip **ip, struct nozzle_ip **ip_prev); char *generate_v4_broadcast(const char *ipaddr, const char *prefix); #endif diff --git a/libnozzle/libnozzle.c b/libnozzle/libnozzle.c index 15863ecf..3a1f827c 100644 --- a/libnozzle/libnozzle.c +++ b/libnozzle/libnozzle.c @@ -1,1208 +1,1225 @@ /* * Copyright (C) 2010-2019 Red Hat, Inc. All rights reserved. * * Author: Fabio M. Di Nitto * * This software licensed under LGPL-2.0+ */ #include "config.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef KNET_LINUX #include /* * libnl3 < 3.3 includes kernel headers directly * causing conflicts with net/if.h included above */ #ifdef LIBNL3_WORKAROUND #define _LINUX_IF_H 1 #endif #include #include #include #include #endif #ifdef KNET_BSD #include #endif #include "libnozzle.h" #include "internals.h" /* * internal functions are all _unlocked_ * locking should be handled at external API functions */ static int lib_init = 0; static struct nozzle_lib_config lib_cfg; static pthread_mutex_t config_mutex = PTHREAD_MUTEX_INITIALIZER; /* * internal helpers */ static void lib_fini(void) { if (lib_cfg.head == NULL) { #ifdef KNET_LINUX nl_close(lib_cfg.nlsock); nl_socket_free(lib_cfg.nlsock); #endif close(lib_cfg.ioctlfd); lib_init = 0; } } static int is_valid_nozzle(const nozzle_t nozzle) { nozzle_t temp; if (!nozzle) { return 0; } if (!lib_init) { return 0; } temp = lib_cfg.head; while (temp != NULL) { if (nozzle == temp) return 1; temp = temp->next; } return 0; } static void destroy_iface(nozzle_t nozzle) { #ifdef KNET_BSD struct ifreq ifr; #endif if (!nozzle) return; - if (nozzle->fd) + if (nozzle->fd >= 0) close(nozzle->fd); #ifdef KNET_BSD memset(&ifr, 0, sizeof(struct ifreq)); strncpy(ifname, nozzle->name, IFNAMSIZ); ioctl(lib_cfg.ioctlfd, SIOCIFDESTROY, &ifr); #endif free(nozzle); lib_fini(); return; } static int get_iface_mtu(const nozzle_t nozzle) { int err = 0, savederrno = 0; struct ifreq ifr; memset(&ifr, 0, sizeof(struct ifreq)); strncpy(ifname, nozzle->name, IFNAMSIZ); err = ioctl(lib_cfg.ioctlfd, SIOCGIFMTU, &ifr); if (err) { savederrno = errno; goto out_clean; } err = ifr.ifr_mtu; out_clean: errno = savederrno; return err; } static int get_iface_mac(const nozzle_t nozzle, char **ether_addr) { int err = 0, savederrno = 0; struct ifreq ifr; char mac[MACADDR_CHAR_MAX]; #ifdef KNET_BSD struct ifaddrs *ifap = NULL; struct ifaddrs *ifa; int found = 0; #endif memset(&mac, 0, MACADDR_CHAR_MAX); memset(&ifr, 0, sizeof(struct ifreq)); strncpy(ifname, nozzle->name, IFNAMSIZ); #ifdef KNET_LINUX err = ioctl(lib_cfg.ioctlfd, SIOCGIFHWADDR, &ifr); if (err) { savederrno = errno; goto out_clean; } ether_ntoa_r((struct ether_addr *)ifr.ifr_hwaddr.sa_data, mac); #endif #ifdef KNET_BSD /* * there is no ioctl to get the ether address of an interface on FreeBSD * (not to be confused with hwaddr). Use workaround described here: * https://lists.freebsd.org/pipermail/freebsd-hackers/2004-June/007394.html */ err = getifaddrs(&ifap); if (err < 0) { savederrno = errno; goto out_clean; } ifa = ifap; while (ifa) { if (!strncmp(nozzle->name, ifa->ifa_name, IFNAMSIZ)) { found = 1; break; } ifa=ifa->ifa_next; } if (found) { ether_ntoa_r((struct ether_addr *)LLADDR((struct sockaddr_dl *)ifa->ifa_addr), mac); } else { errno = EINVAL; err = -1; } freeifaddrs(ifap); if (err) { goto out_clean; } #endif *ether_addr = strdup(mac); if (!*ether_addr) { savederrno = errno; err = -1; } out_clean: errno = savederrno; return err; } #define IP_ADD 1 #define IP_DEL 2 static int _set_ip(nozzle_t nozzle, int command, const char *ipaddr, const char *prefix, int secondary) { int fam; char *broadcast = NULL; int err = 0; #ifdef KNET_LINUX struct rtnl_addr *addr = NULL; struct nl_addr *local_addr = NULL; struct nl_addr *bcast_addr = NULL; struct nl_cache *cache = NULL; int ifindex; #endif #ifdef KNET_BSD char cmdline[4096]; char proto[6]; char *error_string = NULL; #endif if (!strchr(ipaddr, ':')) { fam = AF_INET; broadcast = generate_v4_broadcast(ipaddr, prefix); if (!broadcast) { errno = EINVAL; return -1; } } else { fam = AF_INET6; } #ifdef KNET_LINUX addr = rtnl_addr_alloc(); if (!addr) { errno = ENOMEM; - return -1; + err = -1; + goto out; } if (rtnl_link_alloc_cache(lib_cfg.nlsock, AF_UNSPEC, &cache) < 0) { errno = ENOMEM; err = -1; goto out; } ifindex = rtnl_link_name2i(cache, nozzle->name); if (ifindex == 0) { errno = ENOENT; err = -1; goto out; } rtnl_addr_set_ifindex(addr, ifindex); if (nl_addr_parse(ipaddr, fam, &local_addr) < 0) { errno = EINVAL; err = -1; goto out; } if (rtnl_addr_set_local(addr, local_addr) < 0) { errno = EINVAL; err = -1; goto out; } if (broadcast) { if (nl_addr_parse(broadcast, fam, &bcast_addr) < 0) { errno = EINVAL; err = -1; goto out; } if (rtnl_addr_set_broadcast(addr, bcast_addr) < 0) { errno = EINVAL; err = -1; goto out; } } rtnl_addr_set_prefixlen(addr, atoi(prefix)); if (command == IP_ADD) { if (rtnl_addr_add(lib_cfg.nlsock, addr, 0) < 0) { errno = EINVAL; err = -1; goto out; } } else { if (rtnl_addr_delete(lib_cfg.nlsock, addr, 0) < 0) { errno = EINVAL; err = -1; goto out; } } out: if (addr) { rtnl_addr_put(addr); } if (local_addr) { nl_addr_put(local_addr); } if (bcast_addr) { nl_addr_put(bcast_addr); } if (cache) { nl_cache_put(cache); } if (broadcast) { free(broadcast); } return err; #endif #ifdef KNET_BSD /* * TODO: port to use ioctl and such, drop shell forking here */ memset(cmdline, 0, sizeof(cmdline)); if (fam == AF_INET) { snprintf(proto, sizeof(proto), "inet"); } else { snprintf(proto, sizeof(proto), "inet6"); } if (command == IP_ADD) { snprintf(cmdline, sizeof(cmdline)-1, "ifconfig %s %s %s/%s", nozzle->name, proto, ipaddr, prefix); if (broadcast) { snprintf(cmdline + strlen(cmdline), sizeof(cmdline) - strlen(cmdline) -1, " broadcast %s", broadcast); } if ((secondary) && (fam == AF_INET)) { snprintf(cmdline + strlen(cmdline), sizeof(cmdline) - strlen(cmdline) -1, " alias"); } } else { snprintf(cmdline, sizeof(cmdline)-1, "ifconfig %s %s %s/%s delete", nozzle->name, proto, ipaddr, prefix); } if (broadcast) { free(broadcast); } /* * temporary workaround as we port libnozzle to BSD ioctl * for IP address management */ err = execute_bin_sh_command(cmdline, &error_string); if (error_string) { free(error_string); error_string = NULL; } return err; #endif } /* * Exported public API */ nozzle_t nozzle_open(char *devname, size_t devname_size, const char *updownpath) { int savederrno = 0; nozzle_t nozzle = NULL; char *temp_mac = NULL; #ifdef KNET_LINUX struct ifreq ifr; #endif #ifdef KNET_BSD uint16_t i; long int nozzlenum = 0; char curnozzle[IFNAMSIZ]; #endif if (devname == NULL) { errno = EINVAL; return NULL; } if (devname_size < IFNAMSIZ) { errno = EINVAL; return NULL; } /* Need to allow space for trailing NUL */ if (strlen(devname) >= IFNAMSIZ) { errno = E2BIG; return NULL; } #ifdef KNET_BSD /* * BSD does not support named devices like Linux * but it is possible to force a nozzleX device number * where X is 0 to 255. */ if (strlen(devname)) { if (strncmp(devname, "tap", 3)) { errno = EINVAL; return NULL; } errno = 0; nozzlenum = strtol(devname+3, NULL, 10); if (errno) { errno = EINVAL; return NULL; } if ((nozzlenum < 0) || (nozzlenum > 255)) { errno = EINVAL; return NULL; } } #endif if (updownpath) { /* only absolute paths */ if (updownpath[0] != '/') { errno = EINVAL; return NULL; } if (strlen(updownpath) >= UPDOWN_PATH_MAX) { errno = E2BIG; return NULL; } } savederrno = pthread_mutex_lock(&config_mutex); if (savederrno) { errno = savederrno; return NULL; } if (!lib_init) { lib_cfg.head = NULL; #ifdef KNET_LINUX lib_cfg.nlsock = nl_socket_alloc(); if (!lib_cfg.nlsock) { savederrno = errno; goto out_error; } if (nl_connect(lib_cfg.nlsock, NETLINK_ROUTE) < 0) { savederrno = EBUSY; goto out_error; } lib_cfg.ioctlfd = socket(AF_INET, SOCK_STREAM, 0); #endif #ifdef KNET_BSD lib_cfg.ioctlfd = socket(AF_LOCAL, SOCK_DGRAM, 0); #endif if (lib_cfg.ioctlfd < 0) { savederrno = errno; goto out_error; } lib_init = 1; } nozzle = malloc(sizeof(struct nozzle_iface)); if (!nozzle) { savederrno = ENOMEM; goto out_error; } memset(nozzle, 0, sizeof(struct nozzle_iface)); #ifdef KNET_BSD if (!strlen(devname)) { + /* + * FreeBSD 13 kernel has changed how the tap module + * works and tap0 cannot be removed from the system. + * This means that tap0 settings are never reset to default + * and nozzle cannot control the default state of the device + * when taking over. + * nozzle expects some parameters to be default when opening + * a tap device (such as random mac address, default MTU, no + * other attributes, etc.) + * + * For 13 and higher, simply skip tap0 as usable device. + */ +#if __FreeBSD__ >= 13 + for (i = 1; i < 256; i++) { +#else for (i = 0; i < 256; i++) { +#endif snprintf(curnozzle, sizeof(curnozzle) - 1, "/dev/tap%u", i); nozzle->fd = open(curnozzle, O_RDWR); savederrno = errno; if (nozzle->fd > 0) { break; } } snprintf(curnozzle, sizeof(curnozzle) -1 , "tap%u", i); } else { snprintf(curnozzle, sizeof(curnozzle) - 1, "/dev/%s", devname); nozzle->fd = open(curnozzle, O_RDWR); savederrno = errno; snprintf(curnozzle, sizeof(curnozzle) - 1, "%s", devname); } if (nozzle->fd < 0) { savederrno = EBUSY; goto out_error; } strncpy(devname, curnozzle, IFNAMSIZ); - strncpy(nozzle->name, curnozzle, IFNAMSIZ); + memmove(nozzle->name, curnozzle, IFNAMSIZ - 1); #endif #ifdef KNET_LINUX if ((nozzle->fd = open("/dev/net/tun", O_RDWR)) < 0) { savederrno = errno; goto out_error; } memset(&ifr, 0, sizeof(struct ifreq)); memmove(ifname, devname, IFNAMSIZ); ifr.ifr_flags = IFF_TAP | IFF_NO_PI; if (ioctl(nozzle->fd, TUNSETIFF, &ifr) < 0) { savederrno = errno; goto out_error; } if ((strlen(devname) > 0) && (strcmp(devname, ifname) != 0)) { savederrno = EBUSY; goto out_error; } strncpy(devname, ifname, IFNAMSIZ); - strncpy(nozzle->name, ifname, IFNAMSIZ); + memmove(nozzle->name, ifname, IFNAMSIZ - 1); #endif nozzle->default_mtu = get_iface_mtu(nozzle); if (nozzle->default_mtu < 0) { savederrno = errno; goto out_error; } if (get_iface_mac(nozzle, &temp_mac) < 0) { savederrno = errno; goto out_error; } strncpy(nozzle->default_mac, temp_mac, 18); free(temp_mac); if (updownpath) { int len = strlen(updownpath); strcpy(nozzle->updownpath, updownpath); if (nozzle->updownpath[len-1] != '/') { nozzle->updownpath[len] = '/'; } nozzle->hasupdown = 1; } nozzle->next = lib_cfg.head; lib_cfg.head = nozzle; pthread_mutex_unlock(&config_mutex); errno = savederrno; return nozzle; out_error: destroy_iface(nozzle); pthread_mutex_unlock(&config_mutex); errno = savederrno; return NULL; } int nozzle_close(nozzle_t nozzle) { int err = 0, savederrno = 0; nozzle_t temp = lib_cfg.head; nozzle_t prev = lib_cfg.head; struct nozzle_ip *ip, *ip_next; savederrno = pthread_mutex_lock(&config_mutex); if (savederrno) { errno = savederrno; return -1; } if (!is_valid_nozzle(nozzle)) { savederrno = EINVAL; err = -1; goto out_clean; } while ((temp) && (temp != nozzle)) { prev = temp; temp = temp->next; } if (nozzle == prev) { lib_cfg.head = nozzle->next; } else { prev->next = nozzle->next; } ip = nozzle->ip; while (ip) { ip_next = ip->next; free(ip); ip = ip_next; } destroy_iface(nozzle); out_clean: pthread_mutex_unlock(&config_mutex); errno = savederrno; return err; } int nozzle_run_updown(const nozzle_t nozzle, uint8_t action, char **exec_string) { int err = 0, savederrno = 0; char command[PATH_MAX]; const char *action_str = NULL; struct stat sb; if (action > NOZZLE_POSTDOWN) { errno = EINVAL; return -1; } if (!exec_string) { errno = EINVAL; return -1; } savederrno = pthread_mutex_lock(&config_mutex); if (savederrno) { errno = savederrno; return -1; } if (!is_valid_nozzle(nozzle)) { savederrno = EINVAL; err = -1; goto out_clean; } if (!nozzle->hasupdown) { savederrno = EINVAL; err = -1; goto out_clean; } switch(action) { case NOZZLE_PREUP: action_str = "pre-up.d"; break; case NOZZLE_UP: action_str = "up.d"; break; case NOZZLE_DOWN: action_str = "down.d"; break; case NOZZLE_POSTDOWN: action_str = "post-down.d"; break; } memset(command, 0, PATH_MAX); snprintf(command, PATH_MAX, "%s/%s/%s", nozzle->updownpath, action_str, nozzle->name); err = stat(command, &sb); if (err) { savederrno = errno; goto out_clean; } /* * clear errno from previous calls as there is no errno * returned from execute_bin_sh_command */ savederrno = 0; err = execute_bin_sh_command(command, exec_string); if (err) { err = -2; } out_clean: pthread_mutex_unlock(&config_mutex); errno = savederrno; return err; } int nozzle_set_up(nozzle_t nozzle) { int err = 0, savederrno = 0; struct ifreq ifr; savederrno = pthread_mutex_lock(&config_mutex); if (savederrno) { errno = savederrno; return -1; } if (!is_valid_nozzle(nozzle)) { savederrno = EINVAL; err = -1; goto out_clean; } if (nozzle->up) { goto out_clean; } memset(&ifr, 0, sizeof(struct ifreq)); strncpy(ifname, nozzle->name, IFNAMSIZ); err = ioctl(lib_cfg.ioctlfd, SIOCGIFFLAGS, &ifr); if (err) { savederrno = errno; goto out_clean; } ifr.ifr_flags |= IFF_UP | IFF_RUNNING; err = ioctl(lib_cfg.ioctlfd, SIOCSIFFLAGS, &ifr); if (err) { savederrno = errno; goto out_clean; } nozzle->up = 1; out_clean: pthread_mutex_unlock(&config_mutex); errno = savederrno; return err; } int nozzle_set_down(nozzle_t nozzle) { int err = 0, savederrno = 0; struct ifreq ifr; savederrno = pthread_mutex_lock(&config_mutex); if (savederrno) { errno = savederrno; return -1; } if (!is_valid_nozzle(nozzle)) { savederrno = EINVAL; err = -1; goto out_clean; } if (!nozzle->up) { goto out_clean; } memset(&ifr, 0, sizeof(struct ifreq)); strncpy(ifname, nozzle->name, IFNAMSIZ); err = ioctl(lib_cfg.ioctlfd, SIOCGIFFLAGS, &ifr); if (err) { savederrno = errno; goto out_clean; } ifr.ifr_flags &= ~IFF_UP; err = ioctl(lib_cfg.ioctlfd, SIOCSIFFLAGS, &ifr); if (err) { savederrno = errno; goto out_clean; } nozzle->up = 0; out_clean: pthread_mutex_unlock(&config_mutex); errno = savederrno; return err; } int nozzle_get_mtu(const nozzle_t nozzle) { int err = 0, savederrno = 0; savederrno = pthread_mutex_lock(&config_mutex); if (savederrno) { errno = savederrno; return -1; } if (!is_valid_nozzle(nozzle)) { savederrno = EINVAL; err = -1; goto out_clean; } err = get_iface_mtu(nozzle); savederrno = errno; out_clean: pthread_mutex_unlock(&config_mutex); errno = savederrno; return err; } int nozzle_get_mac(const nozzle_t nozzle, char **ether_addr) { int err = 0, savederrno = 0; if (!ether_addr) { errno = EINVAL; return -1; } savederrno = pthread_mutex_lock(&config_mutex); if (savederrno) { errno = savederrno; return -1; } if (!is_valid_nozzle(nozzle)) { savederrno = EINVAL; err = -1; goto out_clean; } err = get_iface_mac(nozzle, ether_addr); out_clean: pthread_mutex_unlock(&config_mutex); errno = savederrno; return err; } int nozzle_set_mac(nozzle_t nozzle, const char *ether_addr) { int err = 0, savederrno = 0; struct ifreq ifr; if (!ether_addr) { errno = EINVAL; return -1; } savederrno = pthread_mutex_lock(&config_mutex); if (savederrno) { errno = savederrno; return -1; } if (!is_valid_nozzle(nozzle)) { savederrno = EINVAL; err = -1; goto out_clean; } memset(&ifr, 0, sizeof(struct ifreq)); strncpy(ifname, nozzle->name, IFNAMSIZ); #ifdef KNET_LINUX err = ioctl(lib_cfg.ioctlfd, SIOCGIFHWADDR, &ifr); if (err) { savederrno = errno; goto out_clean; } memmove(ifr.ifr_hwaddr.sa_data, ether_aton(ether_addr), ETH_ALEN); err = ioctl(lib_cfg.ioctlfd, SIOCSIFHWADDR, &ifr); savederrno = errno; #endif #ifdef KNET_BSD err = ioctl(lib_cfg.ioctlfd, SIOCGIFADDR, &ifr); if (err) { savederrno = errno; goto out_clean; } memmove(ifr.ifr_addr.sa_data, ether_aton(ether_addr), ETHER_ADDR_LEN); ifr.ifr_addr.sa_len = ETHER_ADDR_LEN; err = ioctl(lib_cfg.ioctlfd, SIOCSIFLLADDR, &ifr); savederrno = errno; #endif out_clean: pthread_mutex_unlock(&config_mutex); errno = savederrno; return err; } int nozzle_reset_mac(nozzle_t nozzle) { return nozzle_set_mac(nozzle, nozzle->default_mac); } nozzle_t nozzle_get_handle_by_name(const char *devname) { int savederrno = 0; nozzle_t nozzle; if ((devname == NULL) || (strlen(devname) > IFNAMSIZ)) { errno = EINVAL; return NULL; } savederrno = pthread_mutex_lock(&config_mutex); if (savederrno) { errno = savederrno; return NULL; } nozzle = lib_cfg.head; while (nozzle != NULL) { if (!strcmp(devname, nozzle->name)) break; nozzle = nozzle->next; } if (!nozzle) { savederrno = ENOENT; } pthread_mutex_unlock(&config_mutex); errno = savederrno; return nozzle; } const char *nozzle_get_name_by_handle(const nozzle_t nozzle) { int savederrno = 0; char *name = NULL; savederrno = pthread_mutex_lock(&config_mutex); if (savederrno) { errno = savederrno; return NULL; } if (!is_valid_nozzle(nozzle)) { savederrno = ENOENT; goto out_clean; } name = nozzle->name; out_clean: pthread_mutex_unlock(&config_mutex); errno = savederrno; return name; } int nozzle_get_fd(const nozzle_t nozzle) { int fd = -1, savederrno = 0; savederrno = pthread_mutex_lock(&config_mutex); if (savederrno) { errno = savederrno; return -1; } if (!is_valid_nozzle(nozzle)) { savederrno = ENOENT; fd = -1; goto out_clean; } fd = nozzle->fd; out_clean: pthread_mutex_unlock(&config_mutex); errno = savederrno; return fd; } int nozzle_set_mtu(nozzle_t nozzle, const int mtu) { int err = 0, savederrno = 0; struct nozzle_ip *tmp_ip; struct ifreq ifr; if (!mtu) { errno = EINVAL; return -1; } savederrno = pthread_mutex_lock(&config_mutex); if (savederrno) { errno = savederrno; return -1; } if (!is_valid_nozzle(nozzle)) { savederrno = EINVAL; err = -1; goto out_clean; } err = nozzle->current_mtu = get_iface_mtu(nozzle); if (err < 0) { savederrno = errno; goto out_clean; } memset(&ifr, 0, sizeof(struct ifreq)); strncpy(ifname, nozzle->name, IFNAMSIZ); ifr.ifr_mtu = mtu; err = ioctl(lib_cfg.ioctlfd, SIOCSIFMTU, &ifr); if (err) { savederrno = errno; goto out_clean; } if ((nozzle->current_mtu < 1280) && (mtu >= 1280)) { tmp_ip = nozzle->ip; while(tmp_ip) { if (tmp_ip->domain == AF_INET6) { err = _set_ip(nozzle, IP_ADD, tmp_ip->ipaddr, tmp_ip->prefix, 0); if (err) { savederrno = errno; err = -1; goto out_clean; } } tmp_ip = tmp_ip->next; } } out_clean: pthread_mutex_unlock(&config_mutex); errno = savederrno; return err; } int nozzle_reset_mtu(nozzle_t nozzle) { return nozzle_set_mtu(nozzle, nozzle->default_mtu); } int nozzle_add_ip(nozzle_t nozzle, const char *ipaddr, const char *prefix) { int err = 0, savederrno = 0; int found = 0; struct nozzle_ip *ip = NULL, *ip_prev = NULL, *ip_last = NULL; int secondary = 0; if ((!ipaddr) || (!prefix)) { errno = EINVAL; return -1; } savederrno = pthread_mutex_lock(&config_mutex); if (savederrno) { errno = savederrno; return -1; } if (!is_valid_nozzle(nozzle)) { savederrno = EINVAL; err = -1; goto out_clean; } found = find_ip(nozzle, ipaddr, prefix, &ip, &ip_prev); if (found) { goto out_clean; } ip = malloc(sizeof(struct nozzle_ip)); if (!ip) { savederrno = errno; err = -1 ; goto out_clean; } memset(ip, 0, sizeof(struct nozzle_ip)); strncpy(ip->ipaddr, ipaddr, IPADDR_CHAR_MAX); strncpy(ip->prefix, prefix, PREFIX_CHAR_MAX); if (!strchr(ip->ipaddr, ':')) { ip->domain = AF_INET; } else { ip->domain = AF_INET6; } /* * if user asks for an IPv6 address, but MTU < 1280 * store the IP and bring it up later if and when MTU > 1280 */ if ((ip->domain == AF_INET6) && (get_iface_mtu(nozzle) < 1280)) { err = 0; } else { if (nozzle->ip) { secondary = 1; } err = _set_ip(nozzle, IP_ADD, ipaddr, prefix, secondary); savederrno = errno; } if (err) { free(ip); goto out_clean; } if (nozzle->ip) { ip_last = nozzle->ip; while (ip_last->next != NULL) { ip_last = ip_last->next; } ip_last->next = ip; } else { nozzle->ip = ip; } out_clean: pthread_mutex_unlock(&config_mutex); errno = savederrno; return err; } int nozzle_del_ip(nozzle_t nozzle, const char *ipaddr, const char *prefix) { int err = 0, savederrno = 0; int found = 0; struct nozzle_ip *ip = NULL, *ip_prev = NULL; if ((!ipaddr) || (!prefix)) { errno = EINVAL; return -1; } savederrno = pthread_mutex_lock(&config_mutex); if (savederrno) { errno = savederrno; return -1; } if (!is_valid_nozzle(nozzle)) { savederrno = EINVAL; err = -1; goto out_clean; } found = find_ip(nozzle, ipaddr, prefix, &ip, &ip_prev); if (!found) { goto out_clean; } /* * if user asks for an IPv6 address, but MTU < 1280 * the IP might not be configured on the interface and we only need to * remove it from our internal database */ if ((ip->domain == AF_INET6) && (get_iface_mtu(nozzle) < 1280)) { err = 0; } else { err = _set_ip(nozzle, IP_DEL, ipaddr, prefix, 0); savederrno = errno; } if (!err) { if (ip == ip_prev) { nozzle->ip = ip->next; } else { ip_prev->next = ip->next; } free(ip); } out_clean: pthread_mutex_unlock(&config_mutex); errno = savederrno; return err; } int nozzle_get_ips(const nozzle_t nozzle, struct nozzle_ip **nozzle_ip) { int err = 0, savederrno = 0; if (!nozzle_ip) { errno = EINVAL; return -1; } savederrno = pthread_mutex_lock(&config_mutex); if (savederrno) { errno = savederrno; return -1; } if (!is_valid_nozzle(nozzle)) { err = -1; savederrno = EINVAL; goto out_clean; } *nozzle_ip = nozzle->ip; out_clean: pthread_mutex_unlock(&config_mutex); errno = savederrno; return err; } diff --git a/libnozzle/tests/api_nozzle_get_name_by_handle.c b/libnozzle/tests/api_nozzle_get_name_by_handle.c index 0fe9eb4f..c05e4486 100644 --- a/libnozzle/tests/api_nozzle_get_name_by_handle.c +++ b/libnozzle/tests/api_nozzle_get_name_by_handle.c @@ -1,74 +1,78 @@ /* * Copyright (C) 2018-2019 Red Hat, Inc. All rights reserved. * * Author: Fabio M. Di Nitto * * This software licensed under GPL-2.0+ */ #include "config.h" #include #include #include #include #include "test-common.h" static int test(void) { char device_name[2*IFNAMSIZ]; const char *device_name_tmp; size_t size = IFNAMSIZ; nozzle_t nozzle; int err = 0; printf("Testing get name by handle\n"); memset(device_name, 0, size); nozzle = nozzle_open(device_name, size, NULL); if (!nozzle) { printf("Unable to init %s\n", device_name); return -1; } device_name_tmp = nozzle_get_name_by_handle(nozzle); - if ((!device_name_tmp) && (errno != ENOENT)) { - printf("Unable to get name by handle\n"); + if (!device_name_tmp) { + if (errno != ENOENT) { + printf("Unable to get name by handle\n"); + } else { + printf("received incorrect errno!\n"); + } err = -1; goto out_clean; } if (strcmp(device_name, device_name_tmp)) { printf("get name by handle returned different names for the same handle\n"); err = -1; goto out_clean; } printf("Testing error conditions\n"); device_name_tmp = nozzle_get_name_by_handle(NULL); if ((device_name_tmp) || (errno != ENOENT)) { printf("get name by handle returned wrong error\n"); err = -1; goto out_clean; } out_clean: if (nozzle) { nozzle_close(nozzle); } return err; } int main(void) { need_root(); if (test() < 0) return FAIL; return PASS; } diff --git a/libnozzle/tests/api_nozzle_set_mac.c b/libnozzle/tests/api_nozzle_set_mac.c index 244e866f..abb96364 100644 --- a/libnozzle/tests/api_nozzle_set_mac.c +++ b/libnozzle/tests/api_nozzle_set_mac.c @@ -1,162 +1,158 @@ /* * Copyright (C) 2018-2019 Red Hat, Inc. All rights reserved. * * Author: Fabio M. Di Nitto * * This software licensed under GPL-2.0+ */ #include "config.h" #include #include #include #include #include #include #include #include #include #ifdef KNET_LINUX #include #include #endif #ifdef KNET_BSD #include #endif #include "test-common.h" static int test(void) { char device_name[IFNAMSIZ]; size_t size = IFNAMSIZ; int err=0; nozzle_t nozzle; - char *original_mac = NULL, *current_mac = NULL, *temp_mac = NULL, *err_mac = NULL; + char *original_mac = NULL, *current_mac = NULL, *temp_mac = NULL; struct ether_addr *orig_mac, *cur_mac, *tmp_mac; printf("Testing set MAC\n"); memset(device_name, 0, size); nozzle = nozzle_open(device_name, size, NULL); if (!nozzle) { printf("Unable to init %s\n", device_name); return -1; } printf("Get current MAC\n"); if (nozzle_get_mac(nozzle, &original_mac) < 0) { printf("Unable to get current MAC address.\n"); err = -1; goto out_clean; } orig_mac = ether_aton(original_mac); if (nozzle_get_mac(nozzle, ¤t_mac) < 0) { printf("Unable to get current MAC address.\n"); err = -1; goto out_clean; } printf("Current MAC: %s\n", current_mac); printf("Setting MAC: 00:01:01:01:01:01\n"); if (nozzle_set_mac(nozzle, "00:01:01:01:01:01") < 0) { printf("Unable to set current MAC address.\n"); err = -1; goto out_clean; } if (nozzle_get_mac(nozzle, &temp_mac) < 0) { printf("Unable to get current MAC address.\n"); err = -1; goto out_clean; } printf("Current MAC: %s\n", temp_mac); cur_mac = ether_aton(current_mac); tmp_mac = ether_aton(temp_mac); printf("Comparing MAC addresses\n"); if (memcmp(cur_mac, tmp_mac, sizeof(struct ether_addr))) { printf("Mac addresses are not the same?!\n"); err = -1; goto out_clean; } printf("Testing reset_mac\n"); if (nozzle_reset_mac(nozzle) < 0) { printf("Unable to reset mac address\n"); err = -1; goto out_clean; } - if (current_mac) + if (current_mac) { free(current_mac); + current_mac = NULL; + } if (nozzle_get_mac(nozzle, ¤t_mac) < 0) { printf("Unable to get current MAC address.\n"); err = -1; goto out_clean; } cur_mac = ether_aton(current_mac); if (memcmp(cur_mac, orig_mac, sizeof(struct ether_addr))) { printf("Mac addresses are not the same?!\n"); err = -1; goto out_clean; } printf("Testing ERROR conditions\n"); printf("Pass NULL to set_mac (pass1)\n"); errno = 0; if ((nozzle_set_mac(nozzle, NULL) >= 0) || (errno != EINVAL)) { printf("Something is wrong in nozzle_set_mac sanity checks\n"); err = -1; goto out_clean; } printf("Pass NULL to set_mac (pass2)\n"); errno = 0; - if ((nozzle_set_mac(NULL, err_mac) >= 0) || (errno != EINVAL)) { + if ((nozzle_set_mac(NULL, current_mac) >= 0) || (errno != EINVAL)) { printf("Something is wrong in nozzle_set_mac sanity checks\n"); err = -1; goto out_clean; } out_clean: - if (err_mac) { - printf("Something managed to set err_mac!\n"); - err = -1; - free(err_mac); - } - if (current_mac) free(current_mac); if (temp_mac) free(temp_mac); if (original_mac) free(original_mac); if (nozzle) { nozzle_close(nozzle); } return err; } int main(void) { need_root(); if (test() < 0) return FAIL; return PASS; } diff --git a/libnozzle/tests/test-common.c b/libnozzle/tests/test-common.c index b36be79c..3afd2ec8 100644 --- a/libnozzle/tests/test-common.c +++ b/libnozzle/tests/test-common.c @@ -1,159 +1,159 @@ /* * Copyright (C) 2018-2019 Red Hat, Inc. All rights reserved. * * Author: Fabio M. Di Nitto * * This software licensed under GPL-2.0+ */ #include "config.h" #include #include #include #include #include #include #include #include #include "test-common.h" void need_root(void) { if (geteuid() != 0) { printf("This test requires root privileges\n"); exit(SKIP); } } int test_iface(char *name, size_t size, const char *updownpath) { nozzle_t nozzle; nozzle=nozzle_open(name, size, updownpath); if (!nozzle) { printf("Unable to open nozzle.\n"); return -1; } printf("Created interface: %s\n", name); if (is_if_in_system(name) > 0) { printf("Found interface %s on the system\n", name); } else { printf("Unable to find interface %s on the system\n", name); } if (!nozzle_get_handle_by_name(name)) { printf("Unable to find interface %s in nozzle db\n", name); } else { printf("Found interface %s in nozzle db\n", name); } nozzle_close(nozzle); if (is_if_in_system(name) == 0) printf("Successfully removed interface %s from the system\n", name); return 0; } int is_if_in_system(char *name) { struct ifaddrs *ifap = NULL; struct ifaddrs *ifa; int found = 0; if (getifaddrs(&ifap) < 0) { printf("Unable to get interface list.\n"); return -1; } ifa = ifap; while (ifa) { if (!strncmp(name, ifa->ifa_name, IFNAMSIZ)) { found = 1; break; } ifa=ifa->ifa_next; } freeifaddrs(ifap); return found; } int get_random_byte(void) { pid_t mypid; uint8_t *pid; uint8_t randombyte = 0; uint8_t i; if (sizeof(pid_t) < 4) { printf("pid_t is smaller than 4 bytes?\n"); exit(77); } mypid = getpid(); pid = (uint8_t *)&mypid; for (i = 0; i < sizeof(pid_t); i++) { if (pid[i] == 0) { pid[i] = 128; } } randombyte = pid[1]; return randombyte; } void make_local_ips(char *testipv4_1, char *testipv4_2, char *testipv6_1, char *testipv6_2) { pid_t mypid; uint8_t *pid; uint8_t i; memset(testipv4_1, 0, IPBUFSIZE); memset(testipv4_2, 0, IPBUFSIZE); memset(testipv6_1, 0, IPBUFSIZE); memset(testipv6_2, 0, IPBUFSIZE); mypid = getpid(); pid = (uint8_t *)&mypid; for (i = 0; i < sizeof(pid_t); i++) { - if (pid[i] == 0) { + if ((pid[i] == 0) || (pid[i] == 255)) { pid[i] = 128; } } snprintf(testipv4_1, IPBUFSIZE - 1, "127.%u.%u.%u", pid[1], pid[2], pid[0]); snprintf(testipv4_2, IPBUFSIZE - 1, "127.%u.%d.%u", pid[1], pid[2]+1, pid[0]); snprintf(testipv6_1, IPBUFSIZE - 1, "fd%x:%x%x::1", pid[1], pid[2], pid[0]); snprintf(testipv6_2, IPBUFSIZE - 1, "fd%x:%x%x:1::1", pid[1], pid[2], pid[0]); }