Page MenuHomeClusterLabs Projects

No OneTemporary

This file is larger than 256 KB, so syntax highlighting was skipped.
diff --git a/configure.ac b/configure.ac
index 7705a306..58d46c65 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,802 +1,804 @@
# -*- Autoconf -*-
# Process this file with autoconf to produce a configure script.
# bootstrap / init
AC_PREREQ([2.61])
AC_INIT([corosync],
m4_esyscmd([build-aux/git-version-gen .tarball-version .gitarchivever]),
[users@clusterlabs.org])
AC_USE_SYSTEM_EXTENSIONS
AM_INIT_AUTOMAKE([foreign 1.11])
LT_PREREQ([2.2.6])
LT_INIT
AM_SILENT_RULES([yes])
AC_CONFIG_SRCDIR([lib/cpg.c])
AC_CONFIG_HEADER([include/corosync/config.h])
AC_CONFIG_MACRO_DIR([m4])
AC_CANONICAL_HOST
AC_LANG([C])
AC_SUBST(WITH_LIST, [""])
#Enable inter-library dependencies
AC_ARG_ENABLE(interlib-deps,
[AC_HELP_STRING([--disable-interlib-deps ],[disable inter-library dependencies (might break builds)])],
[enable_interlib_deps="$enableval"],
[enable_interlib_deps="yes"])
AC_MSG_NOTICE([enable inter-library dependencies: $enable_interlib_deps])
if test "${enable_interlib_deps}" == "yes"; then
link_all_deplibs=yes
link_all_deplibs_CXX=yes
else
link_all_deplibs=no
link_all_deplibs_CXX=no
fi
dnl Fix default variables - "prefix" variable if not specified
systemddir=${prefix}/lib/systemd/system
if test "$prefix" = "NONE"; then
prefix="/usr"
dnl Fix "localstatedir" variable if not specified
if test "$localstatedir" = "\${prefix}/var"; then
localstatedir="/var"
fi
dnl Fix "sysconfdir" variable if not specified
if test "$sysconfdir" = "\${prefix}/etc"; then
sysconfdir="/etc"
fi
if test "$systemddir" = "NONE/lib/systemd/system"; then
systemddir=/lib/systemd/system
fi
dnl Fix "libdir" variable if not specified
if test "$libdir" = "\${exec_prefix}/lib"; then
if test -e /usr/lib64; then
libdir="/usr/lib64"
else
libdir="/usr/lib"
fi
fi
fi
if test "$srcdir" = "."; then
AC_MSG_NOTICE([building in place srcdir:$srcdir])
AC_DEFINE([BUILDING_IN_PLACE], 1, [building in place])
else
AC_MSG_NOTICE([building out of tree srcdir:$srcdir])
fi
# Checks for programs.
# check stolen from gnulib/m4/gnu-make.m4
if ! ${MAKE-make} --version /cannot/make/this >/dev/null 2>&1; then
AC_MSG_ERROR([you don't seem to have GNU make; it is required])
fi
sinclude(corosync-default.m4)
AC_PROG_CC
AC_PROG_CC_C99
if test "x$ac_cv_prog_cc_c99" = "xno"; then
AC_MSG_ERROR(["C99 support is required"])
fi
AC_PROG_INSTALL
AC_PROG_LN_S
AC_PROG_MAKE_SET
AC_PROG_SED
AC_CHECK_PROGS([GROFF], [groff])
AC_CHECK_PROGS([PKGCONFIG], [pkg-config])
AC_CHECK_PROGS([AUGTOOL], [augtool])
AC_CHECK_PROGS([DOT], [dot])
AC_CHECK_PROGS([DOXYGEN], [doxygen])
AC_CHECK_PROGS([AWK], [awk])
AC_PATH_PROG([BASHPATH], [bash])
# Checks for compiler characteristics.
AC_PROG_GCC_TRADITIONAL
AC_C_CONST
AC_C_INLINE
AC_C_VOLATILE
# Checks for header files.
AC_HEADER_DIRENT
AC_HEADER_STDC
AC_HEADER_SYS_WAIT
AC_CHECK_HEADERS([arpa/inet.h fcntl.h limits.h netdb.h netinet/in.h stdint.h \
stdlib.h string.h sys/ioctl.h sys/param.h sys/socket.h \
sys/time.h syslog.h unistd.h sys/types.h getopt.h malloc.h \
utmpx.h ifaddrs.h stddef.h sys/file.h sys/uio.h])
# Check entries in specific structs
AC_CHECK_MEMBER([struct sockaddr_in.sin_len],
[AC_DEFINE_UNQUOTED([HAVE_SOCK_SIN_LEN], [1], [sockaddr_in needs sin_len])],
[], [[#include <netinet/in.h>]])
AC_CHECK_MEMBER([struct sockaddr_in6.sin6_len],
[AC_DEFINE_UNQUOTED([HAVE_SOCK_SIN6_LEN], [1], [sockaddr_in6 needs sin6_len])],
[], [[#include <netinet/in.h>]])
AC_CHECK_MEMBER([struct msghdr.msg_control],
[AC_DEFINE_UNQUOTED([HAVE_MSGHDR_CONTROL], [1], [msghdr has msg_control])],
[], [[#include <sys/socket.h>]])
AC_CHECK_MEMBER([struct msghdr.msg_controllen],
[AC_DEFINE_UNQUOTED([HAVE_MSGHDR_CONTROLLEN], [1], [msghdr has msg_controllen])],
[], [[#include <sys/socket.h>]])
AC_CHECK_MEMBER([struct msghdr.msg_flags],
[AC_DEFINE_UNQUOTED([HAVE_MSGHDR_FLAGS], [1], [msghdr has msg_flags])],
[], [[#include <sys/socket.h>]])
AC_CHECK_MEMBER([struct msghdr.msg_accrights],
[AC_DEFINE_UNQUOTED([HAVE_MSGHDR_ACCRIGHTS], [1], [msghdr has msg_accrights])],
[], [[#include <sys/socket.h>]])
AC_CHECK_MEMBER([struct msghdr.msg_accrightslen],
[AC_DEFINE_UNQUOTED([HAVE_MSGHDR_ACCRIGHTSLEN], [1], [msghdr has msg_accrightslen])],
[], [[#include <sys/socket.h>]])
# Checks for typedefs.
AC_TYPE_UID_T
AC_TYPE_INT16_T
AC_TYPE_INT32_T
AC_TYPE_INT64_T
AC_TYPE_INT8_T
AC_TYPE_UINT16_T
AC_TYPE_UINT32_T
AC_TYPE_UINT64_T
AC_TYPE_UINT8_T
AC_TYPE_SIZE_T
AC_TYPE_SSIZE_T
# Checks for libraries.
SAVE_CPPFLAGS="$CPPFLAGS"
SAVE_LIBS="$LIBS"
PKG_CHECK_MODULES([LIBQB], [libqb])
CPPFLAGS="$CPPFLAGS $LIBQB_CFLAGS"
LIBS="$LIBS $LIBQB_LIBS"
AC_CHECK_LIB([qb], [qb_log_thread_priority_set], \
have_qb_log_thread_priority_set="yes", \
have_qb_log_thread_priority_set="no")
if test "x${have_qb_log_thread_priority_set}" = xyes; then
AC_DEFINE_UNQUOTED([HAVE_QB_LOG_THREAD_PRIORITY_SET], 1, [have qb_log_thread_priority_set])
fi
AC_CHECK_LIB([qb], [qb_log_file_reopen], \
have_qb_log_file_reopen="yes", \
have_qb_log_file_reopen="no")
if test "x${have_qb_log_file_reopen}" = xyes; then
AC_DEFINE_UNQUOTED([HAVE_QB_LOG_FILE_REOPEN], 1, [have qb_log_file_reopen])
fi
AM_CONDITIONAL(HAVE_QB_LOG_FILE_REOPEN, test x$have_qb_log_file_reopen = xyes)
CPPFLAGS="$SAVE_CPPFLAGS"
LIBS="$SAVE_LIBS"
AC_CHECK_LIB([pthread], [pthread_create])
AC_CHECK_LIB([socket], [socket])
PKG_CHECK_MODULES([knet],[libknet])
AC_CHECK_LIB([nsl], [t_open])
AC_CHECK_LIB([rt], [sched_getscheduler])
AC_CHECK_LIB([z], [crc32],
AM_CONDITIONAL([HAVE_CRC32], true),
AM_CONDITIONAL([HAVE_CRC32], false))
# this hack is necessary to check for symbols on out of tree builds
# but it is as horrible as it gets and in theory users should be
# invoking ./configure with proper LIBRARY_PATH set.
OLDLIBS="$LIBS"
LIBS="$knet_LIBS $LIBS"
AC_CHECK_LIB([knet],[knet_handle_enable_access_lists],
[AC_DEFINE_UNQUOTED([HAVE_KNET_ACCESS_LIST], 1, [have knet access list])])
AC_CHECK_LIB([knet],[knet_handle_crypto_set_config],
[AC_DEFINE_UNQUOTED([HAVE_KNET_CRYPTO_RECONF], 1, [have knet crypto reconfig support])])
+AC_CHECK_LIB([knet],[knet_handle_get_onwire_ver],
+ [AC_DEFINE_UNQUOTED([HAVE_KNET_ONWIRE_VER], 1, [have knet onwire versioning])])
LIBS="$OLDLIBS"
# Checks for library functions.
AC_FUNC_ALLOCA
AC_FUNC_CLOSEDIR_VOID
AC_FUNC_ERROR_AT_LINE
AC_FUNC_FORK
AC_FUNC_MALLOC
AC_FUNC_MEMCMP
AC_FUNC_MMAP
AC_FUNC_REALLOC
AC_FUNC_SELECT_ARGTYPES
AC_FUNC_VPRINTF
AC_CHECK_FUNCS([alarm alphasort atexit bzero dup2 endgrent endpwent fdatasync \
fcntl getcwd getpeerucred getpeereid gettimeofday inet_ntoa \
memmove memset mkdir scandir select socket strcasecmp strchr \
strdup strerror strrchr strspn strstr pthread_setschedparam \
sched_get_priority_max sched_setscheduler getifaddrs \
clock_gettime ftruncate gethostname localtime_r munmap strtol])
AC_CONFIG_FILES([Makefile
exec/Makefile
include/Makefile
init/Makefile
lib/Makefile
common_lib/Makefile
man/Makefile
pkgconfig/Makefile
test/Makefile
tools/Makefile
conf/Makefile
vqsim/Makefile
Doxyfile
conf/logrotate/Makefile])
### Local business
dnl ===============================================
dnl Functions / global M4 variables
dnl ===============================================
dnl Global list of LIB names
m4_define([local_soname_list], [])dnl
dnl Upcase parameter
m4_define([local_upcase], [translit([$*], [a-z], [A-Z])])dnl
dnl M4 macro for include lib/lib$1.soname and subst that
m4_define([LIB_SONAME_IMPORT],[dnl
m4_define([local_libname], local_upcase($1)[_SONAME])dnl
m4_define([local_soname], translit(m4_sinclude(lib/lib$1.verso), [
], []))dnl
local_libname="local_soname"dnl
m4_define([local_soname_list], m4_defn([local_soname_list])[,]local_libname[,]local_upcase($1))dnl
AC_SUBST(local_libname)dnl
])dnl
dnl M4 macro for print padspaces (used in LIB_MSG_RESULT). It takes 2 arguments, length of string to pad and desired
dnl (padded) length
m4_define([m4_printpadspace],[ifelse(m4_eval([$2 - $1 < 1]),[1],,[ ][m4_printpadspace([$1],m4_eval([$2 - 1]))])])dnl
dnl Show AC_MSG_RESULT for specific libraries
m4_define([LIB_MSG_RESULT], [ifelse([$#], [1], ,[dnl
AC_MSG_RESULT([ $2 Library SONAME m4_printpadspace(len($2),8) = ${$1}])
LIB_MSG_RESULT(m4_shift(m4_shift($@)))dnl
])])dnl
# ===============================================
# Helpers
# ===============================================
## check if the compiler supports -Werror -Wunknown-warning-option
AC_MSG_CHECKING([whether $CC supports -Wunknown-warning-option -Werror])
BACKUP="$CPPFLAGS"
CPPFLAGS="$CPPFLAGS -Werror -Wunknown-warning-option"
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([])],
[unknown_warnings_as_errors='-Wunknown-warning-option -Werror'; AC_MSG_RESULT([yes])],
[unknown_warnings_as_errors=''; AC_MSG_RESULT([no])])
CPPFLAGS="$BACKUP"
## helper for CC stuff
cc_supports_flag() {
BACKUP="$CPPFLAGS"
CPPFLAGS="$CPPFLAGS $@ $unknown_warnings_as_errors"
AC_MSG_CHECKING([whether $CC supports "$@"])
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([])],
[RC=0; AC_MSG_RESULT([yes])],
[RC=1; AC_MSG_RESULT([no])])
CPPFLAGS="$BACKUP"
return $RC
}
## cleanup
AC_MSG_NOTICE(Sanitizing prefix: ${prefix})
case $prefix in
NONE) prefix=/usr/local;;
esac
AC_MSG_NOTICE(Sanitizing exec_prefix: ${exec_prefix})
case $exec_prefix in
dnl For consistency with Corosync, map NONE->$prefix
NONE) exec_prefix=$prefix;;
prefix) exec_prefix=$prefix;;
esac
## local defines
PACKAGE_FEATURES=""
LINT_FLAGS="-weak -unrecog +posixlib +ignoresigns -fcnuse \
-badflag -D__gnuc_va_list=va_list -D__attribute\(x\)="
# default libraries SONAME
SOMAJOR="5"
SOMINOR="0"
SOMICRO="0"
SONAME="${SOMAJOR}.${SOMINOR}.${SOMICRO}"
# specific libraries SONAME
LIB_SONAME_IMPORT([cfg])
LIB_SONAME_IMPORT([cpg])
LIB_SONAME_IMPORT([quorum])
LIB_SONAME_IMPORT([sam])
LIB_SONAME_IMPORT([votequorum])
LIB_SONAME_IMPORT([cmap])
# local options
AC_ARG_ENABLE([ansi],
[ --enable-ansi : force to build with ANSI standards. ],
[ default="no" ])
AC_ARG_ENABLE([fatal-warnings],
[ --enable-fatal-warnings : enable fatal warnings. ],
[ default="no" ])
AC_ARG_ENABLE([debug],
[ --enable-debug : enable debug build. ],
[ default="no" ])
AC_ARG_WITH([sanitizers],
[AS_HELP_STRING([--with-sanitizers=...,...],
[enable SANitizer build, do *NOT* use for production. Only ASAN/UBSAN/TSAN are currently supported])],
[ SANITIZERS="$withval" ],
[ SANITIZERS="" ])
AC_ARG_ENABLE([secure-build],
[ --enable-secure-build : enable PIE/RELRO build. ],
[],
[enable_secure_build="yes"])
AC_ARG_ENABLE([user-flags],
[ --enable-user-flags : rely on user environment. ],
[ default="no" ])
AC_ARG_ENABLE([coverage],
[ --enable-coverage : coverage analysis of the codebase. ],
[ default="no" ])
AC_ARG_ENABLE([small-memory-footprint],
[ --enable-small-memory-footprint : Use small message queues and small messages sizes. ],
[ default="no" ])
AC_ARG_ENABLE([dbus],
[ --enable-dbus : dbus events. ],,
[ enable_dbus="no" ])
AC_ARG_ENABLE([monitoring],
[ --enable-monitoring : resource monitoring ],,
[ default="no" ])
AM_CONDITIONAL(BUILD_MONITORING, test x$enable_monitoring = xyes)
AC_ARG_ENABLE([watchdog],
[ --enable-watchdog : Watchdog support ],,
[ default="no" ])
AM_CONDITIONAL(BUILD_WATCHDOG, test x$enable_watchdog = xyes)
AC_ARG_ENABLE([augeas],
[ --enable-augeas : Install the augeas lens for corosync.conf ],,
[ enable_augeas="no" ])
AM_CONDITIONAL(INSTALL_AUGEAS, test x$enable_augeas = xyes)
AC_ARG_ENABLE([systemd],
[ --enable-systemd : Install systemd service files],,
[ enable_systemd="no" ])
AM_CONDITIONAL(INSTALL_SYSTEMD, test x$enable_systemd = xyes)
AC_ARG_WITH([initconfigdir],
[AS_HELP_STRING([--with-initconfigdir=DIR],
[configuration directory @<:@SYSCONFDIR/sysconfig@:>@])],
[INITCONFIGDIR="$withval"],
[INITCONFIGDIR='${sysconfdir}/sysconfig'])
AC_SUBST([INITCONFIGDIR])
AC_ARG_WITH([initddir],
[ --with-initddir=DIR : path to init script directory. ],
[ INITDDIR="$withval" ],
[ INITDDIR="$sysconfdir/init.d" ])
AC_ARG_WITH([systemddir],
[ --with-systemddir=DIR : path to systemd unit files directory. ],
[ SYSTEMDDIR="$withval" ],
[ SYSTEMDDIR="$systemddir" ])
AC_ARG_WITH([logdir],
[ --with-logdir=DIR : the base directory for corosync logging files. ],
[ LOGDIR="$withval" ],
[ LOGDIR="$localstatedir/log/cluster" ])
AC_ARG_WITH([logrotatedir],
[ --with-logrotatedir=DIR : the base directory for logrorate.d files. ],
[ LOGROTATEDIR="$withval" ],
[ LOGROTATEDIR="$sysconfdir/logrotate.d" ])
AC_ARG_ENABLE([snmp],
[ --enable-snmp : SNMP protocol support ],
[ default="no" ])
AC_ARG_ENABLE([xmlconf],
[ --enable-xmlconf : XML configuration support ],,
[ enable_xmlconf="no" ])
AM_CONDITIONAL(INSTALL_XMLCONF, test x$enable_xmlconf = xyes)
AC_ARG_ENABLE([vqsim],
[ --enable-vqsim : Quorum simulator support ],,
[ enable_vqsim="no" ])
AM_CONDITIONAL(BUILD_VQSIM, test x$enable_vqsim = xyes)
AC_ARG_ENABLE([nozzle],
[ --enable-nozzle : Support for nozzle ],,
[ enable_nozzle="no" ])
# *FLAGS handling goes here
ENV_CFLAGS="$CFLAGS"
ENV_CPPFLAGS="$CPPFLAGS"
ENV_LDFLAGS="$LDFLAGS"
# debug build stuff
if test "x${enable_debug}" = xyes; then
AC_DEFINE_UNQUOTED([DEBUG], [1], [Compiling Debugging code])
OPT_CFLAGS="-O0"
PACKAGE_FEATURES="$PACKAGE_FEATURES debug"
else
OPT_CFLAGS="-O3"
fi
# gdb flags
if test "x${GCC}" = xyes; then
GDB_FLAGS="-ggdb3"
else
GDB_FLAGS="-g"
fi
# --- ASAN/UBSAN/TSAN (see man gcc) ---
# when using SANitizers, we need to pass the -fsanitize..
# to both CFLAGS and LDFLAGS. The CFLAGS/LDFLAGS must be
# specified as first in the list or there will be runtime
# issues (for example user has to LD_PRELOAD asan for it to work
# properly).
if test -n "${SANITIZERS}"; then
SANITIZERS=$(echo $SANITIZERS | sed -e 's/,/ /g')
for SANITIZER in $SANITIZERS; do
case $SANITIZER in
asan|ASAN)
SANITIZERS_CFLAGS="$SANITIZERS_CFLAGS -fsanitize=address"
SANITIZERS_LDFLAGS="$SANITIZERS_LDFLAGS -fsanitize=address -lasan"
AC_CHECK_LIB([asan],[main],,AC_MSG_ERROR([Unable to find libasan]))
;;
ubsan|UBSAN)
SANITIZERS_CFLAGS="$SANITIZERS_CFLAGS -fsanitize=undefined"
SANITIZERS_LDFLAGS="$SANITIZERS_LDFLAGS -fsanitize=undefined -lubsan"
AC_CHECK_LIB([ubsan],[main],,AC_MSG_ERROR([Unable to find libubsan]))
;;
tsan|TSAN)
SANITIZERS_CFLAGS="$SANITIZERS_CFLAGS -fsanitize=thread"
SANITIZERS_LDFLAGS="$SANITIZERS_LDFLAGS -fsanitize=thread -ltsan"
AC_CHECK_LIB([tsan],[main],,AC_MSG_ERROR([Unable to find libtsan]))
;;
esac
done
fi
# Look for dbus-1
if test "x${enable_dbus}" = xyes; then
PKG_CHECK_MODULES([DBUS],[dbus-1])
AC_DEFINE_UNQUOTED([HAVE_DBUS], 1, [have dbus])
PACKAGE_FEATURES="$PACKAGE_FEATURES dbus"
WITH_LIST="$WITH_LIST --with dbus"
fi
if test "x${enable_monitoring}" = xyes; then
PKG_CHECK_MODULES([statgrab], [libstatgrab])
PKG_CHECK_MODULES([statgrabge090], [libstatgrab >= 0.90],
AC_DEFINE_UNQUOTED([HAVE_LIBSTATGRAB_GE_090], 1, [have libstatgrab >= 0.90]),
TMP_VARIABLE=1)
AC_DEFINE_UNQUOTED([HAVE_MONITORING], 1, [have resource monitoring])
PACKAGE_FEATURES="$PACKAGE_FEATURES monitoring"
WITH_LIST="$WITH_LIST --with monitoring"
fi
if test "x${enable_watchdog}" = xyes; then
AC_CHECK_HEADER([linux/watchdog.h], [], [AC_MSG_ERROR([watchdog requires linux/watchdog.h])])
AC_CHECK_HEADER([linux/reboot.h], [], [AC_MSG_ERROR([watchdog requires linux/reboot.h])])
AC_DEFINE_UNQUOTED([HAVE_WATCHDOG], 1, [have watchdog])
PACKAGE_FEATURES="$PACKAGE_FEATURES watchdog"
WITH_LIST="$WITH_LIST --with watchdog"
fi
if test "x${enable_augeas}" = xyes; then
PACKAGE_FEATURES="$PACKAGE_FEATURES augeas"
fi
if test "x${enable_systemd}" = xyes; then
PKG_CHECK_MODULES([libsystemd], [libsystemd])
AC_DEFINE([HAVE_LIBSYSTEMD], [1], [have systemd interface library])
PACKAGE_FEATURES="$PACKAGE_FEATURES systemd"
WITH_LIST="$WITH_LIST --with systemd"
fi
if test "x${enable_xmlconf}" = xyes; then
PACKAGE_FEATURES="$PACKAGE_FEATURES xmlconf"
WITH_LIST="$WITH_LIST --with xmlconf"
fi
if test "x${enable_vqsim}" = xyes; then
vqsim_readline=no
AC_CHECK_HEADERS([readline/readline.h readline/history.h],
[],
AC_MSG_WARN([vqsim will lack readline support]))
PACKAGE_FEATURES="$PACKAGE_FEATURES vqsim"
WITH_LIST="$WITH_LIST --with vqsim"
fi
AM_CONDITIONAL(VQSIM_READLINE, [test "x${ac_cv_header_readline_readline_h}" = xyes])
# Look for nozzle
if test "x${enable_nozzle}" = xyes; then
PKG_CHECK_MODULES([nozzle],[libnozzle])
AC_DEFINE_UNQUOTED([HAVE_LIBNOZZLE], 1, [have nozzle])
PACKAGE_FEATURES="$PACKAGE_FEATURES nozzle"
WITH_LIST="$WITH_LIST --with nozzle"
fi
do_snmp=0
if test "x${enable_snmp}" = xyes; then
AC_PATH_PROGS([SNMPCONFIG], [net-snmp-config])
if test "x${SNMPCONFIG}" != "x"; then
AC_MSG_CHECKING([for snmp includes])
SNMP_PREFIX=`$SNMPCONFIG --prefix`
SNMP_INCLUDES="-I$SNMP_PREFIX/include"
AC_MSG_RESULT([$SNMP_INCLUDES])
AC_MSG_CHECKING([for snmp libraries])
SNMP_LIBS=`$SNMPCONFIG --libs`
AC_MSG_RESULT([$SNMP_LIBS])
AC_SUBST([SNMP_LIBS])
saveCFLAGS="$CFLAGS"
CFLAGS="$CFLAGS $SNMP_INCLUDES"
AC_CHECK_HEADERS([net-snmp/net-snmp-config.h])
CFLAGS="$saveCFLAGS"
if test "x${ac_cv_header_net_snmp_net_snmp_config_h}" != "xyes"; then
AC_MSG_ERROR([Unable to use net-snmp/net-snmp-config.h])
fi
savedLibs=$LIBS
LIBS="$LIBS $SNMP_LIBS"
AC_CHECK_FUNCS([netsnmp_transport_open_client])
if test $ac_cv_func_netsnmp_transport_open_client != yes; then
AC_CHECK_FUNCS([netsnmp_tdomain_transport])
if test $ac_cv_func_netsnmp_tdomain_transport != yes; then
AC_MSG_ERROR([No usable SNMP client transport implementation found])
fi
else
AC_DEFINE_UNQUOTED([NETSNMPV54], $NETSNMP_NEW_SUPPORT, [have net-snmp5.4 over])
fi
LIBS=$savedLibs
do_snmp=1
PACKAGE_FEATURES="$PACKAGE_FEATURES snmp"
WITH_LIST="$WITH_LIST --with snmp"
AC_DEFINE_UNQUOTED([ENABLE_SNMP], $do_snmp, [Build in support for sending SNMP traps])
else
AC_MSG_ERROR([You need the net_snmp development package to continue.])
fi
fi
AM_CONDITIONAL(BUILD_SNMP, test "${do_snmp}" = "1")
# extra warnings
EXTRA_WARNINGS=""
WARNLIST="
all
shadow
missing-prototypes
missing-declarations
strict-prototypes
pointer-arith
write-strings
cast-align
bad-function-cast
missing-format-attribute
format=2
format-security
format-nonliteral
no-long-long
unsigned-char
no-strict-aliasing
"
for j in $WARNLIST; do
if cc_supports_flag -W$j; then
EXTRA_WARNINGS="$EXTRA_WARNINGS -W$j";
fi
done
if test "x${enable_coverage}" = xyes && \
cc_supports_flag -ftest-coverage && \
cc_supports_flag -fprofile-arcs ; then
AC_MSG_NOTICE([Enabling Coverage (enable -O0 by default)])
OPT_CFLAGS="-O0"
COVERAGE_CFLAGS="-ftest-coverage -fprofile-arcs"
COVERAGE_LDFLAGS="-ftest-coverage -fprofile-arcs"
PACKAGE_FEATURES="$PACKAGE_FEATURES coverage"
else
COVERAGE_CFLAGS=""
COVERAGE_LDFLAGS=""
fi
if test "x${enable_small_memory_footprint}" = xyes ; then
AC_DEFINE_UNQUOTED([HAVE_SMALL_MEMORY_FOOTPRINT], 1, [have small_memory_footprint])
PACKAGE_FEATURES="$PACKAGE_FEATURES small-memory-footprint"
fi
if test "x${enable_ansi}" = xyes && \
cc_supports_flag -std=iso9899:199409 ; then
AC_MSG_NOTICE([Enabling ANSI Compatibility])
ANSI_CPPFLAGS="-ansi -DANSI_ONLY"
PACKAGE_FEATURES="$PACKAGE_FEATURES ansi"
else
ANSI_CPPFLAGS=""
fi
if test "x${enable_fatal_warnings}" = xyes && \
cc_supports_flag -Werror ; then
AC_MSG_NOTICE([Enabling Fatal Warnings (-Werror)])
WERROR_CFLAGS="-Werror"
PACKAGE_FEATURES="$PACKAGE_FEATURES fatal-warnings"
else
WERROR_CFLAGS=""
fi
# don't add addtional cflags
if test "x${enable_user_flags}" = xyes; then
OPT_CFLAGS=""
GDB_FLAGS=""
EXTRA_WARNINGS=""
fi
if test "x${enable_secure_build}" = xyes; then
# stolen from apache configure snippet
AC_CACHE_CHECK([whether $CC accepts PIE flags], [ap_cv_cc_pie], [
save_CFLAGS=$CFLAGS
save_LDFLAGS=$LDFLAGS
CFLAGS="$CFLAGS -fPIE"
LDFLAGS="$LDFLAGS -pie"
AC_TRY_RUN([static int foo[30000]; int main () { return 0; }],
[ap_cv_cc_pie=yes], [ap_cv_cc_pie=no], [ap_cv_cc_pie=yes])
CFLAGS=$save_CFLAGS
LDFLAGS=$save_LDFLAGS
])
if test "$ap_cv_cc_pie" = "yes"; then
SEC_FLAGS="$SEC_FLAGS -fPIE"
SEC_LDFLAGS="$SEC_LDFLAGS -pie"
PACKAGE_FEATURES="$PACKAGE_FEATURES pie"
fi
# similar to above
AC_CACHE_CHECK([whether $CC accepts RELRO flags], [ap_cv_cc_relro], [
save_LDFLAGS=$LDFLAGS
LDFLAGS="$LDFLAGS -Wl,-z,relro"
AC_TRY_RUN([static int foo[30000]; int main () { return 0; }],
[ap_cv_cc_relro=yes], [ap_cv_cc_relro=no], [ap_cv_cc_relro=yes])
LDFLAGS=$save_LDFLAGS
])
if test "$ap_cv_cc_relro" = "yes"; then
SEC_LDFLAGS="$SEC_LDFLAGS -Wl,-z,relro"
PACKAGE_FEATURES="$PACKAGE_FEATURES relro"
fi
AC_CACHE_CHECK([whether $CC accepts BINDNOW flags], [ap_cv_cc_bindnow], [
save_LDFLAGS=$LDFLAGS
LDFLAGS="$LDFLAGS -Wl,-z,now"
AC_TRY_RUN([static int foo[30000]; int main () { return 0; }],
[ap_cv_cc_bindnow=yes], [ap_cv_cc_bindnow=no], [ap_cv_cc_bindnow=yes])
LDFLAGS=$save_LDFLAGS
])
if test "$ap_cv_cc_bindnow" = "yes"; then
SEC_LDFLAGS="$SEC_LDFLAGS -Wl,-z,now"
PACKAGE_FEATURES="$PACKAGE_FEATURES bindnow"
fi
fi
AC_CACHE_CHECK([whether $CC accepts "--as-needed"], [ap_cv_cc_as_needed], [
save_LDFLAGS=$LDFLAGS
LDFLAGS="$LDFLAGS -Wl,--as-needed"
AC_TRY_RUN([static int foo[30000]; int main () { return 0; }],
[ap_cv_cc_as_needed=yes], [ap_cv_cc_as_needed=no], [ap_cv_cc_as_needed=yes])
LDFLAGS=$save_LDFLAGS
])
AC_CACHE_CHECK([whether $CC accepts "--version-script"], [ap_cv_cc_version_script], [
save_LDFLAGS=$LDFLAGS
LDFLAGS="$LDFLAGS -Wl,--version-script=conftest.versions"
echo "CONFTEST { };" >conftest.versions
AC_TRY_RUN([static int foo[30000]; int main () { return 0; }],
[ap_cv_cc_version_script=yes], [ap_cv_cc_version_script=no], [ap_cv_cc_version_script=yes])
rm -f conftest.versions
LDFLAGS=$save_LDFLAGS
])
if test "$ap_cv_cc_version_script" = "yes"; then
AC_SUBST(VERSCRIPT_LDFLAGS, ["-Wl,--version-script=\$(srcdir)/lib\$(call get_libname,\$<).versions"])
else
AC_SUBST(VERSCRIPT_LDFLAGS, [""])
fi
# define global include dirs
INCLUDE_DIRS="$INCLUDE_DIRS -I\$(top_builddir)/include -I\$(top_srcdir)/include"
INCLUDE_DIRS="$INCLUDE_DIRS -I\$(top_builddir)/include/corosync -I\$(top_srcdir)/include/corosync"
# final build of *FLAGS
CFLAGS="$SANITIZERS_CFLAGS $ENV_CFLAGS $lt_prog_compiler_pic $SEC_FLAGS $OPT_CFLAGS $GDB_FLAGS \
$COVERAGE_CFLAGS $EXTRA_WARNINGS \
$WERROR_CFLAGS $LIBQB_CFLAGS \
$SNMP_INCLUDES"
CPPFLAGS="$ENV_CPPFLAGS $ANSI_CPPFLAGS $INCLUDE_DIRS"
LDFLAGS="$SANITIZERS_LDFLAGS $ENV_LDFLAGS $lt_prog_compiler_pic $SEC_LDFLAGS $COVERAGE_LDFLAGS"
if test "$ap_cv_cc_as_needed" = "yes"; then
LDFLAGS="$LDFLAGS -Wl,--as-needed"
fi
# substitute what we need:
AC_SUBST([BASHPATH])
AC_SUBST([INITDDIR])
AC_SUBST([SYSTEMDDIR])
AC_SUBST([LOGDIR])
AC_SUBST([LOGROTATEDIR])
AC_SUBST([SOMAJOR])
AC_SUBST([SOMINOR])
AC_SUBST([SOMICRO])
AC_SUBST([SONAME])
AM_CONDITIONAL(INSTALL_MIB, test "${do_snmp}" = "1")
AM_CONDITIONAL(INSTALL_DBUSCONF, test "${enable_dbus}" = "yes")
AM_CONDITIONAL(AUGTOOL, test -n "${AUGTOOL}")
AM_CONDITIONAL(BUILD_HTML_DOCS, test -n "${GROFF}")
AC_SUBST([LINT_FLAGS])
AC_DEFINE_UNQUOTED([LOCALSTATEDIR], "$(eval echo ${localstatedir})", [localstate directory])
COROSYSCONFDIR=${sysconfdir}/corosync
AC_SUBST([COROSYSCONFDIR])
AC_DEFINE_UNQUOTED([COROSYSCONFDIR], "$(eval echo ${COROSYSCONFDIR})", [corosync config directory])
AC_DEFINE_UNQUOTED([PACKAGE_FEATURES], "${PACKAGE_FEATURES}", [corosync built-in features])
AC_OUTPUT
AC_MSG_RESULT([])
AC_MSG_RESULT([$PACKAGE configuration:])
AC_MSG_RESULT([ Version = ${VERSION}])
AC_MSG_RESULT([ Prefix = ${prefix}])
AC_MSG_RESULT([ Executables = ${sbindir}])
AC_MSG_RESULT([ Man pages = ${mandir}])
AC_MSG_RESULT([ Doc dir = ${docdir}])
AC_MSG_RESULT([ Libraries = ${libdir}])
AC_MSG_RESULT([ Header files = ${includedir}])
AC_MSG_RESULT([ Arch-independent files = ${datadir}])
AC_MSG_RESULT([ State information = ${localstatedir}])
AC_MSG_RESULT([ System configuration = ${sysconfdir}])
AC_MSG_RESULT([ System init.d directory = ${INITDDIR}])
AC_MSG_RESULT([ System systemd directory = ${SYSTEMDDIR}])
AC_MSG_RESULT([ Log directory = ${LOGDIR}])
AC_MSG_RESULT([ Log rotate directory = ${LOGROTATEDIR}])
AC_MSG_RESULT([ corosync config dir = ${COROSYSCONFDIR}])
AC_MSG_RESULT([ init config directory = ${INITCONFIGDIR}])
AC_MSG_RESULT([ Features =${PACKAGE_FEATURES}])
AC_MSG_RESULT([])
AC_MSG_RESULT([$PACKAGE build info:])
AC_MSG_RESULT([ Library SONAME = ${SONAME}])
LIB_MSG_RESULT(m4_shift(local_soname_list))dnl
AC_MSG_RESULT([ Default optimization = ${OPT_CFLAGS}])
AC_MSG_RESULT([ Default debug options = ${GDB_FLAGS}])
AC_MSG_RESULT([ Extra compiler warnings = ${EXTRA_WARNING}])
AC_MSG_RESULT([ Env. defined CFLAG = ${ENV_CFLAGS}])
AC_MSG_RESULT([ Env. defined CPPFLAGS = ${ENV_CPPFLAGS}])
AC_MSG_RESULT([ Env. defined LDFLAGS = ${ENV_LDFLAGS}])
AC_MSG_RESULT([ ANSI defined CPPFLAGS = ${ANSI_CPPFLAGS}])
AC_MSG_RESULT([ Coverage CFLAGS = ${COVERAGE_CFLAGS}])
AC_MSG_RESULT([ Coverage LDFLAGS = ${COVERAGE_LDFLAGS}])
AC_MSG_RESULT([ Fatal War. CFLAGS = ${WERROR_CFLAGS}])
AC_MSG_RESULT([ Final CFLAGS = ${CFLAGS}])
AC_MSG_RESULT([ Final CPPFLAGS = ${CPPFLAGS}])
AC_MSG_RESULT([ Final LDFLAGS = ${LDFLAGS}])
diff --git a/exec/cfg.c b/exec/cfg.c
index 75b644ab..c300cc8f 100644
--- a/exec/cfg.c
+++ b/exec/cfg.c
@@ -1,1307 +1,1373 @@
/*
* Copyright (c) 2005-2006 MontaVista Software, Inc.
* Copyright (c) 2006-2018 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Steven Dake (sdake@redhat.com)
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <config.h>
#include <sys/types.h>
#include <sys/uio.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <unistd.h>
#include <fcntl.h>
#include <stdlib.h>
#include <stdio.h>
#include <stddef.h>
#include <limits.h>
#include <errno.h>
#include <string.h>
#include <assert.h>
#include <corosync/corotypes.h>
#include <qb/qbipc_common.h>
#include <corosync/cfg.h>
#include <qb/qblist.h>
#include <corosync/mar_gen.h>
#include <corosync/totem/totemip.h>
#include <corosync/totem/totem.h>
#include <corosync/ipc_cfg.h>
#include <corosync/logsys.h>
#include <corosync/coroapi.h>
#include <corosync/icmap.h>
#include <corosync/corodefs.h>
#include "totemconfig.h"
+#include "totemknet.h"
#include "service.h"
#include "main.h"
LOGSYS_DECLARE_SUBSYS ("CFG");
enum cfg_message_req_types {
MESSAGE_REQ_EXEC_CFG_RINGREENABLE = 0,
MESSAGE_REQ_EXEC_CFG_KILLNODE = 1,
MESSAGE_REQ_EXEC_CFG_SHUTDOWN = 2,
MESSAGE_REQ_EXEC_CFG_RELOAD_CONFIG = 3,
MESSAGE_REQ_EXEC_CFG_CRYPTO_RECONFIG = 4
};
#define DEFAULT_SHUTDOWN_TIMEOUT 5
static struct qb_list_head trackers_list;
/*
* Variables controlling a requested shutdown
*/
static corosync_timer_handle_t shutdown_timer;
static struct cfg_info *shutdown_con;
static uint32_t shutdown_flags;
static int shutdown_yes;
static int shutdown_no;
static int shutdown_expected;
struct cfg_info
{
struct qb_list_head list;
void *conn;
void *tracker_conn;
enum {SHUTDOWN_REPLY_UNKNOWN, SHUTDOWN_REPLY_YES, SHUTDOWN_REPLY_NO} shutdown_reply;
};
static void cfg_confchg_fn (
enum totem_configuration_type configuration_type,
const unsigned int *member_list, size_t member_list_entries,
const unsigned int *left_list, size_t left_list_entries,
const unsigned int *joined_list, size_t joined_list_entries,
const struct memb_ring_id *ring_id);
static char *cfg_exec_init_fn (struct corosync_api_v1 *corosync_api_v1);
static struct corosync_api_v1 *api;
static int cfg_lib_init_fn (void *conn);
static int cfg_lib_exit_fn (void *conn);
static void message_handler_req_exec_cfg_ringreenable (
const void *message,
unsigned int nodeid);
static void message_handler_req_exec_cfg_killnode (
const void *message,
unsigned int nodeid);
static void message_handler_req_exec_cfg_shutdown (
const void *message,
unsigned int nodeid);
static void message_handler_req_exec_cfg_reload_config (
const void *message,
unsigned int nodeid);
static void message_handler_req_exec_cfg_reconfig_crypto (
const void *message,
unsigned int nodeid);
static void exec_cfg_killnode_endian_convert (void *msg);
static void message_handler_req_lib_cfg_ringstatusget (
void *conn,
const void *msg);
+static void message_handler_req_lib_cfg_nodestatusget (
+ void *conn,
+ const void *msg);
+
static void message_handler_req_lib_cfg_ringreenable (
void *conn,
const void *msg);
static void message_handler_req_lib_cfg_killnode (
void *conn,
const void *msg);
static void message_handler_req_lib_cfg_tryshutdown (
void *conn,
const void *msg);
static void message_handler_req_lib_cfg_replytoshutdown (
void *conn,
const void *msg);
static void message_handler_req_lib_cfg_get_node_addrs (
void *conn,
const void *msg);
static void message_handler_req_lib_cfg_local_get (
void *conn,
const void *msg);
static void message_handler_req_lib_cfg_reload_config (
void *conn,
const void *msg);
static void message_handler_req_lib_cfg_reopen_log_files (
void *conn,
const void *msg);
/*
* Service Handler Definition
*/
static struct corosync_lib_handler cfg_lib_engine[] =
{
{ /* 0 */
.lib_handler_fn = message_handler_req_lib_cfg_ringstatusget,
.flow_control = CS_LIB_FLOW_CONTROL_REQUIRED
},
{ /* 1 */
.lib_handler_fn = message_handler_req_lib_cfg_ringreenable,
.flow_control = CS_LIB_FLOW_CONTROL_REQUIRED
},
{ /* 2 */
.lib_handler_fn = message_handler_req_lib_cfg_killnode,
.flow_control = CS_LIB_FLOW_CONTROL_REQUIRED
},
{ /* 3 */
.lib_handler_fn = message_handler_req_lib_cfg_tryshutdown,
.flow_control = CS_LIB_FLOW_CONTROL_REQUIRED
},
{ /* 4 */
.lib_handler_fn = message_handler_req_lib_cfg_replytoshutdown,
.flow_control = CS_LIB_FLOW_CONTROL_REQUIRED
},
{ /* 5 */
.lib_handler_fn = message_handler_req_lib_cfg_get_node_addrs,
.flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED
},
{ /* 6 */
.lib_handler_fn = message_handler_req_lib_cfg_local_get,
.flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED
},
{ /* 7 */
.lib_handler_fn = message_handler_req_lib_cfg_reload_config,
.flow_control = CS_LIB_FLOW_CONTROL_REQUIRED
},
{ /* 8 */
.lib_handler_fn = message_handler_req_lib_cfg_reopen_log_files,
.flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED
+ },
+ { /* 9 */
+ .lib_handler_fn = message_handler_req_lib_cfg_nodestatusget,
+ .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED
}
};
static struct corosync_exec_handler cfg_exec_engine[] =
{
{ /* 0 */
.exec_handler_fn = message_handler_req_exec_cfg_ringreenable,
},
{ /* 1 */
.exec_handler_fn = message_handler_req_exec_cfg_killnode,
.exec_endian_convert_fn = exec_cfg_killnode_endian_convert
},
{ /* 2 */
.exec_handler_fn = message_handler_req_exec_cfg_shutdown,
},
{ /* 3 */
.exec_handler_fn = message_handler_req_exec_cfg_reload_config,
},
{ /* 4 */
.exec_handler_fn = message_handler_req_exec_cfg_reconfig_crypto,
}
};
/*
* Exports the interface for the service
*/
struct corosync_service_engine cfg_service_engine = {
.name = "corosync configuration service",
.id = CFG_SERVICE,
.priority = 1,
.private_data_size = sizeof(struct cfg_info),
.flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED,
.allow_inquorate = CS_LIB_ALLOW_INQUORATE,
.lib_init_fn = cfg_lib_init_fn,
.lib_exit_fn = cfg_lib_exit_fn,
.lib_engine = cfg_lib_engine,
.lib_engine_count = sizeof (cfg_lib_engine) / sizeof (struct corosync_lib_handler),
.exec_init_fn = cfg_exec_init_fn,
.exec_engine = cfg_exec_engine,
.exec_engine_count = sizeof (cfg_exec_engine) / sizeof (struct corosync_exec_handler),
.confchg_fn = cfg_confchg_fn
};
struct corosync_service_engine *cfg_get_service_engine_ver0 (void)
{
return (&cfg_service_engine);
}
struct req_exec_cfg_ringreenable {
struct qb_ipc_request_header header __attribute__((aligned(8)));
mar_message_source_t source __attribute__((aligned(8)));
};
struct req_exec_cfg_reload_config {
struct qb_ipc_request_header header __attribute__((aligned(8)));
mar_message_source_t source __attribute__((aligned(8)));
};
struct req_exec_cfg_crypto_reconfig {
struct qb_ipc_request_header header __attribute__((aligned(8)));
mar_uint32_t phase __attribute__((aligned(8)));
};
struct req_exec_cfg_killnode {
struct qb_ipc_request_header header __attribute__((aligned(8)));
mar_uint32_t nodeid __attribute__((aligned(8)));
mar_name_t reason __attribute__((aligned(8)));
};
struct req_exec_cfg_shutdown {
struct qb_ipc_request_header header __attribute__((aligned(8)));
};
/* IMPL */
static char *cfg_exec_init_fn (
struct corosync_api_v1 *corosync_api_v1)
{
api = corosync_api_v1;
qb_list_init(&trackers_list);
return (NULL);
}
static void cfg_confchg_fn (
enum totem_configuration_type configuration_type,
const unsigned int *member_list, size_t member_list_entries,
const unsigned int *left_list, size_t left_list_entries,
const unsigned int *joined_list, size_t joined_list_entries,
const struct memb_ring_id *ring_id)
{
}
/*
* Tell other nodes we are shutting down
*/
static int send_shutdown(void)
{
struct req_exec_cfg_shutdown req_exec_cfg_shutdown;
struct iovec iovec;
ENTER();
req_exec_cfg_shutdown.header.size =
sizeof (struct req_exec_cfg_shutdown);
req_exec_cfg_shutdown.header.id = SERVICE_ID_MAKE (CFG_SERVICE,
MESSAGE_REQ_EXEC_CFG_SHUTDOWN);
iovec.iov_base = (char *)&req_exec_cfg_shutdown;
iovec.iov_len = sizeof (struct req_exec_cfg_shutdown);
assert (api->totem_mcast (&iovec, 1, TOTEM_SAFE) == 0);
LEAVE();
return 0;
}
static void send_test_shutdown(void *only_conn, void *exclude_conn, int status)
{
struct res_lib_cfg_testshutdown res_lib_cfg_testshutdown;
struct qb_list_head *iter;
ENTER();
res_lib_cfg_testshutdown.header.size = sizeof(struct res_lib_cfg_testshutdown);
res_lib_cfg_testshutdown.header.id = MESSAGE_RES_CFG_TESTSHUTDOWN;
res_lib_cfg_testshutdown.header.error = status;
res_lib_cfg_testshutdown.flags = shutdown_flags;
if (only_conn) {
TRACE1("sending testshutdown to only %p", only_conn);
api->ipc_dispatch_send(only_conn, &res_lib_cfg_testshutdown,
sizeof(res_lib_cfg_testshutdown));
} else {
qb_list_for_each(iter, &trackers_list) {
struct cfg_info *ci = qb_list_entry(iter, struct cfg_info, list);
if (ci->conn != exclude_conn) {
TRACE1("sending testshutdown to %p", ci->tracker_conn);
api->ipc_dispatch_send(ci->tracker_conn, &res_lib_cfg_testshutdown,
sizeof(res_lib_cfg_testshutdown));
}
}
}
LEAVE();
}
static void check_shutdown_status(void)
{
ENTER();
/*
* Shutdown client might have gone away
*/
if (!shutdown_con) {
LEAVE();
return;
}
/*
* All replies safely gathered in ?
*/
if (shutdown_yes + shutdown_no >= shutdown_expected) {
struct res_lib_cfg_tryshutdown res_lib_cfg_tryshutdown;
api->timer_delete(shutdown_timer);
if (shutdown_yes >= shutdown_expected ||
shutdown_flags == CFG_SHUTDOWN_FLAG_REGARDLESS) {
TRACE1("shutdown confirmed");
res_lib_cfg_tryshutdown.header.size = sizeof(struct res_lib_cfg_tryshutdown);
res_lib_cfg_tryshutdown.header.id = MESSAGE_RES_CFG_TRYSHUTDOWN;
res_lib_cfg_tryshutdown.header.error = CS_OK;
/*
* Tell originator that shutdown was confirmed
*/
api->ipc_response_send(shutdown_con->conn, &res_lib_cfg_tryshutdown,
sizeof(res_lib_cfg_tryshutdown));
shutdown_con = NULL;
/*
* Tell other nodes we are going down
*/
send_shutdown();
}
else {
TRACE1("shutdown cancelled");
res_lib_cfg_tryshutdown.header.size = sizeof(struct res_lib_cfg_tryshutdown);
res_lib_cfg_tryshutdown.header.id = MESSAGE_RES_CFG_TRYSHUTDOWN;
res_lib_cfg_tryshutdown.header.error = CS_ERR_BUSY;
/*
* Tell originator that shutdown was cancelled
*/
api->ipc_response_send(shutdown_con->conn, &res_lib_cfg_tryshutdown,
sizeof(res_lib_cfg_tryshutdown));
shutdown_con = NULL;
}
log_printf(LOGSYS_LEVEL_DEBUG, "shutdown decision is: (yes count: %d, no count: %d) flags=%x",
shutdown_yes, shutdown_no, shutdown_flags);
}
LEAVE();
}
/*
* Not all nodes responded to the shutdown (in time)
*/
static void shutdown_timer_fn(void *arg)
{
ENTER();
/*
* Mark undecideds as "NO"
*/
shutdown_no = shutdown_expected;
check_shutdown_status();
send_test_shutdown(NULL, NULL, CS_ERR_TIMEOUT);
LEAVE();
}
static void remove_ci_from_shutdown(struct cfg_info *ci)
{
ENTER();
/*
* If the controlling shutdown process has quit, then cancel the
* shutdown session
*/
if (ci == shutdown_con) {
shutdown_con = NULL;
api->timer_delete(shutdown_timer);
}
if (!qb_list_empty(&ci->list)) {
qb_list_del(&ci->list);
qb_list_init(&ci->list);
/*
* Remove our option
*/
if (shutdown_con) {
if (ci->shutdown_reply == SHUTDOWN_REPLY_YES)
shutdown_yes--;
if (ci->shutdown_reply == SHUTDOWN_REPLY_NO)
shutdown_no--;
}
/*
* If we are leaving, then that's an implicit YES to shutdown
*/
ci->shutdown_reply = SHUTDOWN_REPLY_YES;
shutdown_yes++;
check_shutdown_status();
}
LEAVE();
}
int cfg_lib_exit_fn (void *conn)
{
struct cfg_info *ci = (struct cfg_info *)api->ipc_private_data_get (conn);
ENTER();
remove_ci_from_shutdown(ci);
LEAVE();
return (0);
}
static int cfg_lib_init_fn (void *conn)
{
struct cfg_info *ci = (struct cfg_info *)api->ipc_private_data_get (conn);
ENTER();
qb_list_init(&ci->list);
LEAVE();
return (0);
}
/*
* Executive message handlers
*/
static void message_handler_req_exec_cfg_ringreenable (
const void *message,
unsigned int nodeid)
{
ENTER();
LEAVE();
}
static void exec_cfg_killnode_endian_convert (void *msg)
{
struct req_exec_cfg_killnode *req_exec_cfg_killnode =
(struct req_exec_cfg_killnode *)msg;
ENTER();
swab_mar_name_t(&req_exec_cfg_killnode->reason);
LEAVE();
}
static void message_handler_req_exec_cfg_killnode (
const void *message,
unsigned int nodeid)
{
const struct req_exec_cfg_killnode *req_exec_cfg_killnode = message;
cs_name_t reason;
ENTER();
log_printf(LOGSYS_LEVEL_DEBUG, "request to kill node " CS_PRI_NODE_ID " (us=" CS_PRI_NODE_ID ")",
req_exec_cfg_killnode->nodeid, api->totem_nodeid_get());
if (req_exec_cfg_killnode->nodeid == api->totem_nodeid_get()) {
marshall_from_mar_name_t(&reason, &req_exec_cfg_killnode->reason);
log_printf(LOGSYS_LEVEL_NOTICE, "Killed by node " CS_PRI_NODE_ID " : %s",
nodeid, reason.value);
corosync_fatal_error(COROSYNC_FATAL_ERROR_EXIT);
}
LEAVE();
}
/*
* Self shutdown
*/
static void message_handler_req_exec_cfg_shutdown (
const void *message,
unsigned int nodeid)
{
ENTER();
log_printf(LOGSYS_LEVEL_NOTICE, "Node " CS_PRI_NODE_ID " was shut down by sysadmin", nodeid);
if (nodeid == api->totem_nodeid_get()) {
api->shutdown_request();
}
LEAVE();
}
/* strcmp replacement that can handle NULLs */
static int nullcheck_strcmp(const char* left, const char *right)
{
if (!left && right)
return -1;
if (left && !right)
return 1;
if (!left && !right)
return 0;
return strcmp(left, right);
}
/*
* If a key has changed value in the new file, then warn the user and remove it from the temp_map
*/
static void delete_and_notify_if_changed(icmap_map_t temp_map, const char *key_name)
{
if (!(icmap_key_value_eq(temp_map, key_name, icmap_get_global_map(), key_name))) {
if (icmap_delete_r(temp_map, key_name) == CS_OK) {
log_printf(LOGSYS_LEVEL_NOTICE, "Modified entry '%s' in corosync.conf cannot be changed at run-time", key_name);
}
}
}
/*
* Remove any keys from the new config file that in the new corosync.conf but that
* cannot be changed at run time. A log message will be issued for each
* entry that the user wants to change but they cannot.
*
* Add more here as needed.
*/
static void remove_ro_entries(icmap_map_t temp_map)
{
#ifndef HAVE_KNET_CRYPTO_RECONF
delete_and_notify_if_changed(temp_map, "totem.secauth");
delete_and_notify_if_changed(temp_map, "totem.crypto_hash");
delete_and_notify_if_changed(temp_map, "totem.crypto_cipher");
delete_and_notify_if_changed(temp_map, "totem.keyfile");
delete_and_notify_if_changed(temp_map, "totem.key");
#endif
delete_and_notify_if_changed(temp_map, "totem.version");
delete_and_notify_if_changed(temp_map, "totem.threads");
delete_and_notify_if_changed(temp_map, "totem.ip_version");
delete_and_notify_if_changed(temp_map, "totem.rrp_mode");
delete_and_notify_if_changed(temp_map, "totem.netmtu");
delete_and_notify_if_changed(temp_map, "totem.interface.ringnumber");
delete_and_notify_if_changed(temp_map, "totem.interface.bindnetaddr");
delete_and_notify_if_changed(temp_map, "totem.interface.mcastaddr");
delete_and_notify_if_changed(temp_map, "totem.interface.broadcast");
delete_and_notify_if_changed(temp_map, "totem.interface.mcastport");
delete_and_notify_if_changed(temp_map, "totem.interface.ttl");
delete_and_notify_if_changed(temp_map, "totem.transport");
delete_and_notify_if_changed(temp_map, "totem.cluster_name");
delete_and_notify_if_changed(temp_map, "quorum.provider");
delete_and_notify_if_changed(temp_map, "system.move_to_root_cgroup");
delete_and_notify_if_changed(temp_map, "system.sched_rr");
delete_and_notify_if_changed(temp_map, "system.priority");
delete_and_notify_if_changed(temp_map, "system.qb_ipc_type");
delete_and_notify_if_changed(temp_map, "system.state_dir");
}
/*
* Remove entries that exist in the global map, but not in the temp_map, this will
* cause delete notifications to be sent to any listeners.
*
* NOTE: This routine depends entirely on the keys returned by the iterators
* being in alpha-sorted order.
*/
static void remove_deleted_entries(icmap_map_t temp_map, const char *prefix)
{
icmap_iter_t old_iter;
icmap_iter_t new_iter;
const char *old_key, *new_key;
int ret;
old_iter = icmap_iter_init(prefix);
new_iter = icmap_iter_init_r(temp_map, prefix);
old_key = icmap_iter_next(old_iter, NULL, NULL);
new_key = icmap_iter_next(new_iter, NULL, NULL);
while (old_key || new_key) {
ret = nullcheck_strcmp(old_key, new_key);
if ((ret < 0 && old_key) || !new_key) {
/*
* new_key is greater, a line (or more) has been deleted
* Continue until old is >= new
*/
do {
/* Remove it from icmap & send notifications */
icmap_delete(old_key);
old_key = icmap_iter_next(old_iter, NULL, NULL);
ret = nullcheck_strcmp(old_key, new_key);
} while (ret < 0 && old_key);
}
else if ((ret > 0 && new_key) || !old_key) {
/*
* old_key is greater, a line (or more) has been added
* Continue until new is >= old
*
* we don't need to do anything special with this like tell
* icmap. That will happen when we copy the values over
*/
do {
new_key = icmap_iter_next(new_iter, NULL, NULL);
ret = nullcheck_strcmp(old_key, new_key);
} while (ret > 0 && new_key);
}
if (ret == 0) {
new_key = icmap_iter_next(new_iter, NULL, NULL);
old_key = icmap_iter_next(old_iter, NULL, NULL);
}
}
icmap_iter_finalize(new_iter);
icmap_iter_finalize(old_iter);
}
/*
* Reload configuration file
*/
static void message_handler_req_exec_cfg_reload_config (
const void *message,
unsigned int nodeid)
{
const struct req_exec_cfg_reload_config *req_exec_cfg_reload_config = message;
struct res_lib_cfg_reload_config res_lib_cfg_reload_config;
struct totem_config new_config;
icmap_map_t temp_map;
const char *error_string;
int res = CS_OK;
ENTER();
log_printf(LOGSYS_LEVEL_NOTICE, "Config reload requested by node " CS_PRI_NODE_ID, nodeid);
icmap_set_uint8("config.totemconfig_reload_in_progress", 1);
/* Make sure there is no rubbish in this that might be checked, even on error */
memset(&new_config, 0, sizeof(new_config));
/*
* Set up a new hashtable as a staging area.
*/
if ((res = icmap_init_r(&temp_map)) != CS_OK) {
log_printf(LOGSYS_LEVEL_ERROR, "Unable to create temporary icmap. config file reload cancelled\n");
goto reload_fini_nomap;
}
/*
* Load new config into the temporary map
*/
res = coroparse_configparse(temp_map, &error_string);
if (res == -1) {
log_printf (LOGSYS_LEVEL_ERROR, "Unable to reload config file: %s", error_string);
res = CS_ERR_INVALID_PARAM;
goto reload_fini_nofree;
}
/* Signal start of the reload process */
icmap_set_uint8("config.reload_in_progress", 1);
/* Detect deleted entries and remove them from the main icmap hashtable */
remove_deleted_entries(temp_map, "logging.");
remove_deleted_entries(temp_map, "totem.");
remove_deleted_entries(temp_map, "nodelist.");
remove_deleted_entries(temp_map, "quorum.");
remove_deleted_entries(temp_map, "uidgid.config.");
remove_deleted_entries(temp_map, "nozzle.");
/* Remove entries that cannot be changed */
remove_ro_entries(temp_map);
/* Take a copy of the current setup so we can check what has changed */
memset(&new_config, 0, sizeof(new_config));
new_config.orig_interfaces = malloc (sizeof (struct totem_interface) * INTERFACE_MAX);
assert(new_config.orig_interfaces != NULL);
totempg_get_config(&new_config);
new_config.crypto_changed = 0;
new_config.interfaces = malloc (sizeof (struct totem_interface) * INTERFACE_MAX);
assert(new_config.interfaces != NULL);
memset(new_config.interfaces, 0, sizeof (struct totem_interface) * INTERFACE_MAX);
/* For UDP[U] the configuration on link0 is static (apart from the nodelist) and only read at
startup. So preserve it here */
if ( (new_config.transport_number == TOTEM_TRANSPORT_UDP) ||
(new_config.transport_number == TOTEM_TRANSPORT_UDPU)) {
memcpy(&new_config.interfaces[0], &new_config.orig_interfaces[0],
sizeof(struct totem_interface));
}
/* Calculate new node and interface definitions */
if (totemconfig_configure_new_params(&new_config, temp_map, &error_string) == -1) {
log_printf (LOGSYS_LEVEL_ERROR, "Cannot configure new interface definitions: %s\n", error_string);
res = CS_ERR_INVALID_PARAM;
goto reload_fini;
}
/* Read from temp_map into new_config */
totem_volatile_config_read(&new_config, temp_map, NULL);
/* Get updated crypto parameters. Will set a flag in new_config if things have changed */
if (totem_reread_crypto_config(&new_config, temp_map, &error_string) == -1) {
log_printf (LOGSYS_LEVEL_ERROR, "Crypto configuration is not valid: %s\n", error_string);
res = CS_ERR_INVALID_PARAM;
goto reload_fini;
}
/* Validate dynamic parameters */
if (totem_volatile_config_validate(&new_config, temp_map, &error_string) == -1) {
log_printf (LOGSYS_LEVEL_ERROR, "Configuration is not valid: %s\n", error_string);
res = CS_ERR_INVALID_PARAM;
goto reload_fini;
}
/* Save this here so we can get at it for the later phases of crypto change */
if (new_config.crypto_changed) {
#ifndef HAVE_KNET_CRYPTO_RECONF
new_config.crypto_changed = 0;
log_printf (LOGSYS_LEVEL_ERROR, "Crypto reconfiguration is not supported by the linked version of knet\n");
res = CS_ERR_INVALID_PARAM;
goto reload_fini;
#endif
}
/*
* Copy new keys into live config.
*/
if ( (res = icmap_copy_map(icmap_get_global_map(), temp_map)) != CS_OK) {
log_printf (LOGSYS_LEVEL_ERROR, "Error making new config live. cmap database may be inconsistent\n");
/* Return res from icmap */
goto reload_fini;
}
/* Copy into live system */
totempg_put_config(&new_config);
totemconfig_commit_new_params(&new_config, temp_map);
free(new_config.interfaces);
reload_fini:
/* All done - let clients know */
icmap_set_int32("config.reload_status", res);
icmap_set_uint8("config.totemconfig_reload_in_progress", 0);
icmap_set_uint8("config.reload_in_progress", 0);
/* Finished with the temporary storage */
free(new_config.orig_interfaces);
reload_fini_nofree:
icmap_fini_r(temp_map);
reload_fini_nomap:
/* If crypto was changed, now it's loaded on all nodes we can enable it.
* Each node sends its own PHASE message so we're not relying on the leader
* node to survive the transition
*/
if (new_config.crypto_changed) {
struct req_exec_cfg_crypto_reconfig req_exec_cfg_crypto_reconfig;
struct iovec iovec;
req_exec_cfg_crypto_reconfig.header.size =
sizeof (struct req_exec_cfg_crypto_reconfig);
req_exec_cfg_crypto_reconfig.header.id = SERVICE_ID_MAKE (CFG_SERVICE,
MESSAGE_REQ_EXEC_CFG_CRYPTO_RECONFIG);
req_exec_cfg_crypto_reconfig.phase = CRYPTO_RECONFIG_PHASE_ACTIVATE;
iovec.iov_base = (char *)&req_exec_cfg_crypto_reconfig;
iovec.iov_len = sizeof (struct req_exec_cfg_crypto_reconfig);
assert (api->totem_mcast (&iovec, 1, TOTEM_SAFE) == 0);
}
/* All done, return result to the caller if it was on this system */
if (nodeid == api->totem_nodeid_get()) {
res_lib_cfg_reload_config.header.size = sizeof(res_lib_cfg_reload_config);
res_lib_cfg_reload_config.header.id = MESSAGE_RES_CFG_RELOAD_CONFIG;
res_lib_cfg_reload_config.header.error = res;
api->ipc_response_send(req_exec_cfg_reload_config->source.conn,
&res_lib_cfg_reload_config,
sizeof(res_lib_cfg_reload_config));
api->ipc_refcnt_dec(req_exec_cfg_reload_config->source.conn);;
}
LEAVE();
}
/* Handle the phases of crypto reload
* The first time we are called is after the new crypto config has been loaded
* but not activated.
*
* 1 - activate the new crypto configuration
* 2 - clear out the old configuration
*/
static void message_handler_req_exec_cfg_reconfig_crypto (
const void *message,
unsigned int nodeid)
{
const struct req_exec_cfg_crypto_reconfig *req_exec_cfg_crypto_reconfig = message;
/* Got our own reconfig message */
if (nodeid == api->totem_nodeid_get()) {
log_printf (LOGSYS_LEVEL_DEBUG, "Crypto reconfiguration phase %d", req_exec_cfg_crypto_reconfig->phase);
/* Do the deed */
totempg_crypto_reconfigure_phase(req_exec_cfg_crypto_reconfig->phase);
/* Move to the next phase if not finished */
if (req_exec_cfg_crypto_reconfig->phase < CRYPTO_RECONFIG_PHASE_CLEANUP) {
struct req_exec_cfg_crypto_reconfig req_exec_cfg_crypto_reconfig2;
struct iovec iovec;
req_exec_cfg_crypto_reconfig2.header.size =
sizeof (struct req_exec_cfg_crypto_reconfig);
req_exec_cfg_crypto_reconfig2.header.id = SERVICE_ID_MAKE (CFG_SERVICE,
MESSAGE_REQ_EXEC_CFG_CRYPTO_RECONFIG);
req_exec_cfg_crypto_reconfig2.phase = CRYPTO_RECONFIG_PHASE_CLEANUP;
iovec.iov_base = (char *)&req_exec_cfg_crypto_reconfig2;
iovec.iov_len = sizeof (struct req_exec_cfg_crypto_reconfig);
assert (api->totem_mcast (&iovec, 1, TOTEM_SAFE) == 0);
}
}
}
/*
* Library Interface Implementation
*/
static void message_handler_req_lib_cfg_ringstatusget (
void *conn,
const void *msg)
{
struct res_lib_cfg_ringstatusget res_lib_cfg_ringstatusget;
struct totem_ip_address interfaces[INTERFACE_MAX];
unsigned int iface_count;
char **status;
const char *totem_ip_string;
char ifname[CFG_INTERFACE_NAME_MAX_LEN];
unsigned int iface_ids[INTERFACE_MAX];
unsigned int i;
cs_error_t res = CS_OK;
ENTER();
res_lib_cfg_ringstatusget.header.id = MESSAGE_RES_CFG_RINGSTATUSGET;
res_lib_cfg_ringstatusget.header.size = sizeof (struct res_lib_cfg_ringstatusget);
api->totem_ifaces_get (
api->totem_nodeid_get(),
iface_ids,
interfaces,
INTERFACE_MAX,
&status,
&iface_count);
assert(iface_count <= CFG_MAX_INTERFACES);
res_lib_cfg_ringstatusget.interface_count = iface_count;
for (i = 0; i < iface_count; i++) {
totem_ip_string
= (const char *)api->totem_ip_print (&interfaces[i]);
if (!totem_ip_string) {
totem_ip_string="";
}
/* Allow for i/f number at the start */
if (strlen(totem_ip_string) >= CFG_INTERFACE_NAME_MAX_LEN-3) {
log_printf(LOGSYS_LEVEL_ERROR, "String representation of interface %u is too long", i);
res = CS_ERR_NAME_TOO_LONG;
goto send_response;
}
snprintf(ifname, sizeof(ifname), "%d %s", iface_ids[i], totem_ip_string);
if (strlen(status[i]) >= CFG_INTERFACE_STATUS_MAX_LEN) {
log_printf(LOGSYS_LEVEL_ERROR, "Status string for interface %u is too long", i);
res = CS_ERR_NAME_TOO_LONG;
goto send_response;
}
strcpy ((char *)&res_lib_cfg_ringstatusget.interface_status[i],
status[i]);
strcpy ((char *)&res_lib_cfg_ringstatusget.interface_name[i],
ifname);
}
send_response:
res_lib_cfg_ringstatusget.header.error = res;
api->ipc_response_send (
conn,
&res_lib_cfg_ringstatusget,
sizeof (struct res_lib_cfg_ringstatusget));
LEAVE();
}
+
+static void message_handler_req_lib_cfg_nodestatusget (
+ void *conn,
+ const void *msg)
+{
+ struct res_lib_cfg_nodestatusget res_lib_cfg_nodestatusget;
+ struct req_lib_cfg_nodestatusget *req_lib_cfg_nodestatusget = (struct req_lib_cfg_nodestatusget *)msg;
+ struct totem_node_status node_status;
+ cs_error_t res = CS_OK;
+ int i;
+
+ ENTER();
+
+ /* Currently only one structure version supported */
+ if (req_lib_cfg_nodestatusget->version == TOTEM_NODE_STATUS_STRUCTURE_VERSION)
+ {
+ res_lib_cfg_nodestatusget.header.id = MESSAGE_RES_CFG_NODESTATUSGET;
+ res_lib_cfg_nodestatusget.header.size = sizeof (struct res_lib_cfg_nodestatusget);
+
+ memset(&node_status, 0, sizeof(node_status));
+ res = totempg_nodestatus_get(req_lib_cfg_nodestatusget->nodeid,
+ &node_status);
+ if (res == 0) {
+ res_lib_cfg_nodestatusget.node_status.nodeid = req_lib_cfg_nodestatusget->nodeid;
+ res_lib_cfg_nodestatusget.node_status.version = node_status.version;
+ res_lib_cfg_nodestatusget.node_status.reachable = node_status.reachable;
+ res_lib_cfg_nodestatusget.node_status.remote = node_status.remote;
+ res_lib_cfg_nodestatusget.node_status.external = node_status.external;
+ res_lib_cfg_nodestatusget.node_status.onwire_min = node_status.onwire_min;
+ res_lib_cfg_nodestatusget.node_status.onwire_max = node_status.onwire_max;
+ res_lib_cfg_nodestatusget.node_status.onwire_ver= node_status.onwire_ver;
+
+ for (i=0; i < KNET_MAX_LINK; i++) {
+ res_lib_cfg_nodestatusget.node_status.link_status[i].enabled = node_status.link_status[i].enabled;
+ res_lib_cfg_nodestatusget.node_status.link_status[i].connected = node_status.link_status[i].connected;
+ res_lib_cfg_nodestatusget.node_status.link_status[i].dynconnected = node_status.link_status[i].dynconnected;
+ res_lib_cfg_nodestatusget.node_status.link_status[i].mtu = node_status.link_status[i].mtu;
+ memcpy(res_lib_cfg_nodestatusget.node_status.link_status[i].src_ipaddr,
+ node_status.link_status[i].src_ipaddr, CFG_MAX_HOST_LEN);
+ memcpy(res_lib_cfg_nodestatusget.node_status.link_status[i].dst_ipaddr,
+ node_status.link_status[i].dst_ipaddr, CFG_MAX_HOST_LEN);
+ }
+ }
+ } else {
+ res = CS_ERR_NOT_SUPPORTED;
+ }
+
+ res_lib_cfg_nodestatusget.header.error = res;
+ api->ipc_response_send (
+ conn,
+ &res_lib_cfg_nodestatusget,
+ sizeof (struct res_lib_cfg_nodestatusget));
+
+ LEAVE();
+}
+
+
static void message_handler_req_lib_cfg_ringreenable (
void *conn,
const void *msg)
{
struct res_lib_cfg_ringreenable res_lib_cfg_ringreenable;
ENTER();
res_lib_cfg_ringreenable.header.id = MESSAGE_RES_CFG_RINGREENABLE;
res_lib_cfg_ringreenable.header.size = sizeof (struct res_lib_cfg_ringreenable);
res_lib_cfg_ringreenable.header.error = CS_ERR_NOT_SUPPORTED;
api->ipc_response_send (
conn, &res_lib_cfg_ringreenable,
sizeof (struct res_lib_cfg_ringreenable));
LEAVE();
}
static void message_handler_req_lib_cfg_killnode (
void *conn,
const void *msg)
{
const struct req_lib_cfg_killnode *req_lib_cfg_killnode = msg;
struct res_lib_cfg_killnode res_lib_cfg_killnode;
struct req_exec_cfg_killnode req_exec_cfg_killnode;
struct iovec iovec;
char key_name[ICMAP_KEYNAME_MAXLEN];
char tmp_key[ICMAP_KEYNAME_MAXLEN + 1];
icmap_map_t map;
icmap_iter_t iter;
const char *iter_key;
uint32_t nodeid;
char *status_str = NULL;
int match_nodeid_flag = 0;
cs_error_t error = CS_OK;
ENTER();
map = icmap_get_global_map();
iter = icmap_iter_init_r(map, "runtime.members.");
while ((iter_key = icmap_iter_next(iter, NULL, NULL)) != NULL) {
if (sscanf(iter_key, "runtime.members.%u.%s", &nodeid, key_name) != 2) {
continue;
}
if (strcmp(key_name, "status") != 0) {
continue;
}
if (nodeid != req_lib_cfg_killnode->nodeid) {
continue;
}
match_nodeid_flag = 1;
snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "runtime.members.%u.status", nodeid);
if (icmap_get_string_r(map, tmp_key, &status_str) != CS_OK) {
error = CS_ERR_LIBRARY;
goto send_response;
}
if (strcmp(status_str, "joined") != 0) {
error = CS_ERR_NOT_EXIST;
goto send_response;
}
break;
}
if (!match_nodeid_flag) {
error = CS_ERR_NOT_EXIST;
goto send_response;
}
req_exec_cfg_killnode.header.size =
sizeof (struct req_exec_cfg_killnode);
req_exec_cfg_killnode.header.id = SERVICE_ID_MAKE (CFG_SERVICE,
MESSAGE_REQ_EXEC_CFG_KILLNODE);
req_exec_cfg_killnode.nodeid = req_lib_cfg_killnode->nodeid;
marshall_to_mar_name_t(&req_exec_cfg_killnode.reason, &req_lib_cfg_killnode->reason);
iovec.iov_base = (char *)&req_exec_cfg_killnode;
iovec.iov_len = sizeof (struct req_exec_cfg_killnode);
(void)api->totem_mcast (&iovec, 1, TOTEM_SAFE);
send_response:
res_lib_cfg_killnode.header.size = sizeof(struct res_lib_cfg_killnode);
res_lib_cfg_killnode.header.id = MESSAGE_RES_CFG_KILLNODE;
res_lib_cfg_killnode.header.error = error;
api->ipc_response_send(conn, &res_lib_cfg_killnode,
sizeof(res_lib_cfg_killnode));
free(status_str);
icmap_iter_finalize(iter);
LEAVE();
}
static void message_handler_req_lib_cfg_tryshutdown (
void *conn,
const void *msg)
{
struct cfg_info *ci = (struct cfg_info *)api->ipc_private_data_get (conn);
const struct req_lib_cfg_tryshutdown *req_lib_cfg_tryshutdown = msg;
struct qb_list_head *iter;
ENTER();
if (req_lib_cfg_tryshutdown->flags == CFG_SHUTDOWN_FLAG_IMMEDIATE) {
struct res_lib_cfg_tryshutdown res_lib_cfg_tryshutdown;
/*
* Tell other nodes
*/
send_shutdown();
res_lib_cfg_tryshutdown.header.size = sizeof(struct res_lib_cfg_tryshutdown);
res_lib_cfg_tryshutdown.header.id = MESSAGE_RES_CFG_TRYSHUTDOWN;
res_lib_cfg_tryshutdown.header.error = CS_OK;
api->ipc_response_send(conn, &res_lib_cfg_tryshutdown,
sizeof(res_lib_cfg_tryshutdown));
LEAVE();
return;
}
/*
* Shutdown in progress, return an error
*/
if (shutdown_con) {
struct res_lib_cfg_tryshutdown res_lib_cfg_tryshutdown;
res_lib_cfg_tryshutdown.header.size = sizeof(struct res_lib_cfg_tryshutdown);
res_lib_cfg_tryshutdown.header.id = MESSAGE_RES_CFG_TRYSHUTDOWN;
res_lib_cfg_tryshutdown.header.error = CS_ERR_EXIST;
api->ipc_response_send(conn, &res_lib_cfg_tryshutdown,
sizeof(res_lib_cfg_tryshutdown));
LEAVE();
return;
}
ci->conn = conn;
shutdown_con = (struct cfg_info *)api->ipc_private_data_get (conn);
shutdown_flags = req_lib_cfg_tryshutdown->flags;
shutdown_yes = 0;
shutdown_no = 0;
/*
* Count the number of listeners
*/
shutdown_expected = 0;
qb_list_for_each(iter, &trackers_list) {
struct cfg_info *testci = qb_list_entry(iter, struct cfg_info, list);
/*
* It is assumed that we will allow shutdown
*/
if (testci != ci) {
testci->shutdown_reply = SHUTDOWN_REPLY_UNKNOWN;
shutdown_expected++;
}
}
/*
* If no-one is listening for events then we can just go down now
*/
if (shutdown_expected == 0) {
struct res_lib_cfg_tryshutdown res_lib_cfg_tryshutdown;
res_lib_cfg_tryshutdown.header.size = sizeof(struct res_lib_cfg_tryshutdown);
res_lib_cfg_tryshutdown.header.id = MESSAGE_RES_CFG_TRYSHUTDOWN;
res_lib_cfg_tryshutdown.header.error = CS_OK;
/*
* Tell originator that shutdown was confirmed
*/
api->ipc_response_send(conn, &res_lib_cfg_tryshutdown,
sizeof(res_lib_cfg_tryshutdown));
send_shutdown();
LEAVE();
return;
}
else {
unsigned int shutdown_timeout = DEFAULT_SHUTDOWN_TIMEOUT;
/*
* Look for a shutdown timeout in configuration map
*/
icmap_get_uint32("cfg.shutdown_timeout", &shutdown_timeout);
/*
* Start the timer. If we don't get a full set of replies before this goes
* off we'll cancel the shutdown
*/
api->timer_add_duration((unsigned long long)shutdown_timeout*1000000000, NULL,
shutdown_timer_fn, &shutdown_timer);
/*
* Tell the users we would like to shut down
*/
send_test_shutdown(NULL, conn, CS_OK);
}
/*
* We don't sent a reply to the caller here.
* We send it when we know if we can shut down or not
*/
LEAVE();
}
static void message_handler_req_lib_cfg_replytoshutdown (
void *conn,
const void *msg)
{
struct cfg_info *ci = (struct cfg_info *)api->ipc_private_data_get (conn);
const struct req_lib_cfg_replytoshutdown *req_lib_cfg_replytoshutdown = msg;
struct res_lib_cfg_replytoshutdown res_lib_cfg_replytoshutdown;
int status = CS_OK;
ENTER();
if (!shutdown_con) {
status = CS_ERR_ACCESS;
goto exit_fn;
}
if (req_lib_cfg_replytoshutdown->response) {
shutdown_yes++;
ci->shutdown_reply = SHUTDOWN_REPLY_YES;
}
else {
shutdown_no++;
ci->shutdown_reply = SHUTDOWN_REPLY_NO;
}
check_shutdown_status();
exit_fn:
res_lib_cfg_replytoshutdown.header.error = status;
res_lib_cfg_replytoshutdown.header.id = MESSAGE_RES_CFG_REPLYTOSHUTDOWN;
res_lib_cfg_replytoshutdown.header.size = sizeof(res_lib_cfg_replytoshutdown);
api->ipc_response_send(conn, &res_lib_cfg_replytoshutdown,
sizeof(res_lib_cfg_replytoshutdown));
LEAVE();
}
static void message_handler_req_lib_cfg_get_node_addrs (void *conn,
const void *msg)
{
struct totem_ip_address node_ifs[INTERFACE_MAX];
unsigned int iface_ids[INTERFACE_MAX];
char buf[PIPE_BUF];
char **status;
unsigned int num_interfaces = 0;
struct sockaddr_storage *ss;
int ret = CS_OK;
int i;
int live_addrs = 0;
const struct req_lib_cfg_get_node_addrs *req_lib_cfg_get_node_addrs = msg;
struct res_lib_cfg_get_node_addrs *res_lib_cfg_get_node_addrs = (struct res_lib_cfg_get_node_addrs *)buf;
unsigned int nodeid = req_lib_cfg_get_node_addrs->nodeid;
char *addr_buf;
if (nodeid == 0)
nodeid = api->totem_nodeid_get();
if (api->totem_ifaces_get(nodeid, iface_ids, node_ifs, INTERFACE_MAX, &status, &num_interfaces)) {
ret = CS_ERR_EXIST;
num_interfaces = 0;
}
res_lib_cfg_get_node_addrs->header.size = sizeof(struct res_lib_cfg_get_node_addrs) + (num_interfaces * TOTEMIP_ADDRLEN);
res_lib_cfg_get_node_addrs->header.id = MESSAGE_RES_CFG_GET_NODE_ADDRS;
res_lib_cfg_get_node_addrs->header.error = ret;
if (num_interfaces) {
res_lib_cfg_get_node_addrs->family = node_ifs[0].family;
for (i = 0, addr_buf = (char *)res_lib_cfg_get_node_addrs->addrs;
i < num_interfaces; i++) {
ss = (struct sockaddr_storage *)&node_ifs[i].addr;
if (ss->ss_family) {
memcpy(addr_buf, node_ifs[i].addr, TOTEMIP_ADDRLEN);
live_addrs++;
addr_buf += TOTEMIP_ADDRLEN;
}
}
res_lib_cfg_get_node_addrs->num_addrs = live_addrs;
} else {
res_lib_cfg_get_node_addrs->header.error = CS_ERR_NOT_EXIST;
}
api->ipc_response_send(conn, res_lib_cfg_get_node_addrs, res_lib_cfg_get_node_addrs->header.size);
}
static void message_handler_req_lib_cfg_local_get (void *conn, const void *msg)
{
struct res_lib_cfg_local_get res_lib_cfg_local_get;
res_lib_cfg_local_get.header.size = sizeof(res_lib_cfg_local_get);
res_lib_cfg_local_get.header.id = MESSAGE_RES_CFG_LOCAL_GET;
res_lib_cfg_local_get.header.error = CS_OK;
res_lib_cfg_local_get.local_nodeid = api->totem_nodeid_get ();
api->ipc_response_send(conn, &res_lib_cfg_local_get,
sizeof(res_lib_cfg_local_get));
}
static void message_handler_req_lib_cfg_reload_config (void *conn, const void *msg)
{
struct req_exec_cfg_reload_config req_exec_cfg_reload_config;
struct iovec iovec;
ENTER();
req_exec_cfg_reload_config.header.size =
sizeof (struct req_exec_cfg_reload_config);
req_exec_cfg_reload_config.header.id = SERVICE_ID_MAKE (CFG_SERVICE,
MESSAGE_REQ_EXEC_CFG_RELOAD_CONFIG);
api->ipc_source_set (&req_exec_cfg_reload_config.source, conn);
api->ipc_refcnt_inc(conn);
iovec.iov_base = (char *)&req_exec_cfg_reload_config;
iovec.iov_len = sizeof (struct req_exec_cfg_reload_config);
assert (api->totem_mcast (&iovec, 1, TOTEM_SAFE) == 0);
LEAVE();
}
static void message_handler_req_lib_cfg_reopen_log_files (void *conn, const void *msg)
{
struct res_lib_cfg_reopen_log_files res_lib_cfg_reopen_log_files;
cs_error_t res;
ENTER();
log_printf(LOGSYS_LEVEL_DEBUG, "Reopening logging files\n");
res = logsys_reopen_log_files();
res_lib_cfg_reopen_log_files.header.size = sizeof(res_lib_cfg_reopen_log_files);
res_lib_cfg_reopen_log_files.header.id = MESSAGE_RES_CFG_REOPEN_LOG_FILES;
res_lib_cfg_reopen_log_files.header.error = res;
api->ipc_response_send(conn,
&res_lib_cfg_reopen_log_files,
sizeof(res_lib_cfg_reopen_log_files));
LEAVE();
}
diff --git a/exec/totemknet.c b/exec/totemknet.c
index 0834e8e4..772752c5 100644
--- a/exec/totemknet.c
+++ b/exec/totemknet.c
@@ -1,2129 +1,2206 @@
/*
* Copyright (c) 2016-2020 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Christine Caulfield (ccaulfie@redhat.com)
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <config.h>
#include <assert.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/socket.h>
#include <netdb.h>
#include <sys/un.h>
#include <sys/ioctl.h>
#include <sys/param.h>
#include <netinet/in.h>
#include <net/ethernet.h>
#include <arpa/inet.h>
#include <unistd.h>
#include <fcntl.h>
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
#include <pthread.h>
#include <sched.h>
#include <time.h>
#include <sys/time.h>
#include <sys/poll.h>
#include <sys/uio.h>
#include <limits.h>
#include <qb/qbdefs.h>
#include <qb/qbloop.h>
#ifdef HAVE_LIBNOZZLE
#include <libgen.h>
#include <libnozzle.h>
#endif
#include <corosync/sq.h>
#include <corosync/swab.h>
#include <corosync/logsys.h>
#include <corosync/icmap.h>
#include <corosync/totem/totemip.h>
#include "totemknet.h"
#include "main.h"
#include "util.h"
#include <libknet.h>
#include <corosync/totem/totemstats.h>
#ifndef MSG_NOSIGNAL
#define MSG_NOSIGNAL 0
#endif
#ifdef HAVE_LIBNOZZLE
static int setup_nozzle(void *knet_context);
#endif
/* Should match that used by cfg */
#define CFG_INTERFACE_STATUS_MAX_LEN 512
struct totemknet_instance {
struct crypto_instance *crypto_inst;
qb_loop_t *poll_handle;
knet_handle_t knet_handle;
int link_mode;
void *context;
void (*totemknet_deliver_fn) (
void *context,
const void *msg,
unsigned int msg_len,
const struct sockaddr_storage *system_from);
void (*totemknet_iface_change_fn) (
void *context,
const struct totem_ip_address *iface_address,
unsigned int link_no);
void (*totemknet_mtu_changed) (
void *context,
int net_mtu);
void (*totemknet_target_set_completed) (void *context);
/*
* Function and data used to log messages
*/
int totemknet_log_level_security;
int totemknet_log_level_error;
int totemknet_log_level_warning;
int totemknet_log_level_notice;
int totemknet_log_level_debug;
int totemknet_subsys_id;
int knet_subsys_id;
void (*totemknet_log_printf) (
int level,
int subsys,
const char *function,
const char *file,
int line,
const char *format,
...)__attribute__((format(printf, 6, 7)));
void *knet_context;
char iov_buffer[KNET_MAX_PACKET_SIZE];
char *link_status[INTERFACE_MAX];
struct totem_ip_address my_ids[INTERFACE_MAX];
uint16_t ip_port[INTERFACE_MAX];
int our_nodeid;
int loopback_link;
struct totem_config *totem_config;
struct totem_ip_address token_target;
qb_loop_timer_handle timer_netif_check_timeout;
qb_loop_timer_handle timer_merge_detect_timeout;
int send_merge_detect_message;
unsigned int merge_detect_messages_sent_before_timeout;
int logpipes[2];
int knet_fd;
pthread_mutex_t log_mutex;
#ifdef HAVE_LIBNOZZLE
char *nozzle_name;
char *nozzle_ipaddr;
char *nozzle_prefix;
char *nozzle_macaddr;
nozzle_t nozzle_handle;
#endif
};
/* Awkward. But needed to get stats from knet */
struct totemknet_instance *global_instance;
struct work_item {
const void *msg;
unsigned int msg_len;
struct totemknet_instance *instance;
};
int totemknet_member_list_rebind_ip (
void *knet_context);
static int totemknet_configure_compression (
void *knet_context,
struct totem_config *totem_config);
static void totemknet_start_merge_detect_timeout(
void *knet_context);
static void totemknet_stop_merge_detect_timeout(
void *knet_context);
static void log_flush_messages (
void *knet_context);
static void totemknet_instance_initialize (struct totemknet_instance *instance)
{
int res;
memset (instance, 0, sizeof (struct totemknet_instance));
res = pthread_mutex_init(&instance->log_mutex, NULL);
/*
* There is not too much else what can be done.
*/
assert(res == 0);
}
#define knet_log_printf_lock(level, subsys, function, file, line, format, args...) \
do { \
(void)pthread_mutex_lock(&instance->log_mutex); \
instance->totemknet_log_printf ( \
level, subsys, function, file, line, \
(const char *)format, ##args); \
(void)pthread_mutex_unlock(&instance->log_mutex); \
} while (0);
#define knet_log_printf(level, format, args...) \
do { \
knet_log_printf_lock ( \
level, instance->totemknet_subsys_id, \
__FUNCTION__, __FILE__, __LINE__, \
(const char *)format, ##args); \
} while (0);
#define libknet_log_printf(level, format, args...) \
do { \
knet_log_printf_lock ( \
level, instance->knet_subsys_id, \
__FUNCTION__, "libknet.h", __LINE__, \
(const char *)format, ##args); \
} while (0);
#define KNET_LOGSYS_PERROR(err_num, level, fmt, args...) \
do { \
char _error_str[LOGSYS_MAX_PERROR_MSG_LEN]; \
const char *_error_ptr = qb_strerror_r(err_num, _error_str, sizeof(_error_str)); \
instance->totemknet_log_printf ( \
level, instance->totemknet_subsys_id, \
__FUNCTION__, __FILE__, __LINE__, \
fmt ": %s (%d)", ##args, _error_ptr, err_num); \
} while(0)
#ifdef HAVE_LIBNOZZLE
static inline int is_ether_addr_multicast(const uint8_t *addr)
{
return (addr[0] & 0x01);
}
static inline int is_ether_addr_zero(const uint8_t *addr)
{
return (!addr[0] && !addr[1] && !addr[2] && !addr[3] && !addr[4] && !addr[5]);
}
static int ether_host_filter_fn(void *private_data,
const unsigned char *outdata,
ssize_t outdata_len,
uint8_t tx_rx,
knet_node_id_t this_host_id,
knet_node_id_t src_host_id,
int8_t *channel,
knet_node_id_t *dst_host_ids,
size_t *dst_host_ids_entries)
{
struct ether_header *eth_h = (struct ether_header *)outdata;
uint8_t *dst_mac = (uint8_t *)eth_h->ether_dhost;
uint16_t dst_host_id;
if (is_ether_addr_zero(dst_mac))
return -1;
if (is_ether_addr_multicast(dst_mac)) {
return 1;
}
memmove(&dst_host_id, &dst_mac[4], 2);
dst_host_ids[0] = ntohs(dst_host_id);
*dst_host_ids_entries = 1;
return 0;
}
#endif
static int dst_host_filter_callback_fn(void *private_data,
const unsigned char *outdata,
ssize_t outdata_len,
uint8_t tx_rx,
knet_node_id_t this_host_id,
knet_node_id_t src_host_id,
int8_t *channel,
knet_node_id_t *dst_host_ids,
size_t *dst_host_ids_entries)
{
struct totem_message_header *header = (struct totem_message_header *)outdata;
int res;
#ifdef HAVE_LIBNOZZLE
if (*channel != 0) {
return ether_host_filter_fn(private_data,
outdata, outdata_len,
tx_rx,
this_host_id, src_host_id,
channel,
dst_host_ids,
dst_host_ids_entries);
}
#endif
if (header->target_nodeid) {
dst_host_ids[0] = header->target_nodeid;
*dst_host_ids_entries = 1;
res = 0; /* unicast message */
}
else {
*dst_host_ids_entries = 0;
res = 1; /* multicast message */
}
return res;
}
static void socket_error_callback_fn(void *private_data, int datafd, int8_t channel, uint8_t tx_rx, int error, int errorno)
{
struct totemknet_instance *instance = (struct totemknet_instance *)private_data;
knet_log_printf (LOGSYS_LEVEL_DEBUG, "Knet socket ERROR notification called: txrx=%d, error=%d, errorno=%d", tx_rx, error, errorno);
if ((error == -1 && errorno != EAGAIN) || (error == 0)) {
knet_handle_remove_datafd(instance->knet_handle, datafd);
}
}
static void host_change_callback_fn(void *private_data, knet_node_id_t host_id, uint8_t reachable, uint8_t remote, uint8_t external)
{
struct totemknet_instance *instance = (struct totemknet_instance *)private_data;
// TODO: what? if anything.
knet_log_printf (LOGSYS_LEVEL_DEBUG, "Knet host change callback. nodeid: " CS_PRI_NODE_ID " reachable: %d", host_id, reachable);
}
static void pmtu_change_callback_fn(void *private_data, unsigned int data_mtu)
{
struct totemknet_instance *instance = (struct totemknet_instance *)private_data;
knet_log_printf (LOGSYS_LEVEL_DEBUG, "Knet pMTU change: %d", data_mtu);
/* We don't need to tell corosync the actual knet MTU */
// instance->totemknet_mtu_changed(instance->context, data_mtu);
}
int totemknet_crypto_set (
void *knet_context,
const char *cipher_type,
const char *hash_type)
{
return (0);
}
static inline void ucast_sendmsg (
struct totemknet_instance *instance,
struct totem_ip_address *system_to,
const void *msg,
unsigned int msg_len)
{
int res = 0;
struct totem_message_header *header = (struct totem_message_header *)msg;
struct msghdr msg_ucast;
struct iovec iovec;
header->target_nodeid = system_to->nodeid;
iovec.iov_base = (void *)msg;
iovec.iov_len = msg_len;
/*
* Build unicast message
*/
memset(&msg_ucast, 0, sizeof(msg_ucast));
msg_ucast.msg_iov = (void *)&iovec;
msg_ucast.msg_iovlen = 1;
#ifdef HAVE_MSGHDR_CONTROL
msg_ucast.msg_control = 0;
#endif
#ifdef HAVE_MSGHDR_CONTROLLEN
msg_ucast.msg_controllen = 0;
#endif
#ifdef HAVE_MSGHDR_FLAGS
msg_ucast.msg_flags = 0;
#endif
#ifdef HAVE_MSGHDR_ACCRIGHTS
msg_ucast.msg_accrights = NULL;
#endif
#ifdef HAVE_MSGHDR_ACCRIGHTSLEN
msg_ucast.msg_accrightslen = 0;
#endif
/*
* Transmit unicast message
* An error here is recovered by totemsrp
*/
res = sendmsg (instance->knet_fd, &msg_ucast, MSG_NOSIGNAL);
if (res < 0) {
KNET_LOGSYS_PERROR (errno, instance->totemknet_log_level_debug,
"sendmsg(ucast) failed (non-critical)");
}
}
static inline void mcast_sendmsg (
struct totemknet_instance *instance,
const void *msg,
unsigned int msg_len,
int only_active)
{
int res;
struct totem_message_header *header = (struct totem_message_header *)msg;
struct msghdr msg_mcast;
struct iovec iovec;
iovec.iov_base = (void *)msg;
iovec.iov_len = msg_len;
header->target_nodeid = 0;
/*
* Build multicast message
*/
memset(&msg_mcast, 0, sizeof(msg_mcast));
msg_mcast.msg_iov = (void *)&iovec;
msg_mcast.msg_iovlen = 1;
#ifdef HAVE_MSGHDR_CONTROL
msg_mcast.msg_control = 0;
#endif
#ifdef HAVE_MSGHDR_CONTROLLEN
msg_mcast.msg_controllen = 0;
#endif
#ifdef HAVE_MSGHDR_FLAGS
msg_mcast.msg_flags = 0;
#endif
#ifdef HAVE_MSGHDR_ACCRIGHTS
msg_mcast.msg_accrights = NULL;
#endif
#ifdef HAVE_MSGHDR_ACCRIGHTSLEN
msg_mcast.msg_accrightslen = 0;
#endif
// log_printf (LOGSYS_LEVEL_DEBUG, "totemknet: mcast_sendmsg. only_active=%d, len=%d", only_active, msg_len);
res = sendmsg (instance->knet_fd, &msg_mcast, MSG_NOSIGNAL);
if (res < msg_len) {
knet_log_printf (LOGSYS_LEVEL_DEBUG, "totemknet: mcast_send sendmsg returned %d", res);
}
if (!only_active || instance->send_merge_detect_message) {
/*
* Current message was sent to all nodes
*/
instance->merge_detect_messages_sent_before_timeout++;
instance->send_merge_detect_message = 0;
}
}
static int node_compare(const void *aptr, const void *bptr)
{
uint16_t a,b;
a = *(uint16_t *)aptr;
b = *(uint16_t *)bptr;
return a > b;
}
#ifndef OWN_INDEX_NONE
#define OWN_INDEX_NONE -1
#endif
+int totemknet_nodestatus_get (
+ void *knet_context,
+ unsigned int nodeid,
+ struct totem_node_status *node_status)
+{
+ int i;
+ int res = 0;
+ struct knet_link_status link_status;
+ struct totemknet_instance *instance = (struct totemknet_instance *)knet_context;
+ struct knet_host_status knet_host_status;
+ uint8_t link_list[KNET_MAX_LINK];
+ size_t num_links;
+
+ if (!instance->knet_handle) {
+ return CS_ERR_NOT_EXIST; /* Not using knet */
+ }
+
+ if (!node_status) {
+ return CS_ERR_INVALID_PARAM;
+ }
+
+ res = knet_host_get_status(instance->knet_handle,
+ nodeid,
+ &knet_host_status);
+ if (res) {
+ knet_log_printf (LOGSYS_LEVEL_WARNING, "knet_handle_get_host_status(%d) failed: %d", nodeid, res);
+ return (-1);
+ }
+ node_status->nodeid = nodeid;
+ node_status->reachable = knet_host_status.reachable;
+ node_status->remote = knet_host_status.remote;
+ node_status->external = knet_host_status.external;
+
+#ifdef HAVE_KNET_ONWIRE_VER
+ res = knet_handle_get_onwire_ver(instance->knet_handle,
+ nodeid,
+ &node_status->onwire_min,
+ &node_status->onwire_max,
+ &node_status->onwire_ver);
+ if (res) {
+ knet_log_printf (LOGSYS_LEVEL_WARNING, "knet_handle_get_onwire_ver(%d) failed: %d", nodeid, res);
+ return (-1);
+ }
+#endif
+ /* Get link info */
+ res = knet_link_get_link_list(instance->knet_handle,
+ nodeid, link_list, &num_links);
+ if (res) {
+ knet_log_printf (LOGSYS_LEVEL_WARNING, "knet_link_get_link_list(%d) failed: %d", nodeid, res);
+ return (-1);
+ }
+
+ for (i=0; i < num_links; i++) {
+ if (!instance->totem_config->interfaces[link_list[i]].configured) {
+ continue;
+ }
+ res = knet_link_get_status(instance->knet_handle,
+ nodeid,
+ link_list[i],
+ &link_status,
+ sizeof(link_status));
+ if (res == 0) {
+ node_status->link_status[i].enabled = link_status.enabled;
+ node_status->link_status[i].connected = link_status.connected;
+ node_status->link_status[i].dynconnected = link_status.dynconnected;
+ node_status->link_status[i].mtu = link_status.mtu;
+ memcpy(node_status->link_status[i].src_ipaddr, link_status.src_ipaddr, KNET_MAX_HOST_LEN);
+ memcpy(node_status->link_status[i].dst_ipaddr, link_status.dst_ipaddr, KNET_MAX_HOST_LEN);
+ } else {
+ knet_log_printf (LOGSYS_LEVEL_WARNING, "knet_link_get_link_status(%d, %d) failed: %d", nodeid, link_list[i], res);
+ }
+ }
+ return res;
+}
+
+
+
int totemknet_ifaces_get (void *knet_context,
char ***status,
unsigned int *iface_count)
{
struct totemknet_instance *instance = (struct totemknet_instance *)knet_context;
struct knet_link_status link_status;
knet_node_id_t host_list[KNET_MAX_HOST];
uint8_t link_list[KNET_MAX_LINK];
size_t num_hosts;
size_t num_links;
size_t link_idx;
int i,j;
char *ptr;
int res = 0;
/*
* Don't do the whole 'link_info' bit if the caller just wants
* a count of interfaces.
*/
if (status) {
int own_idx = OWN_INDEX_NONE;
res = knet_host_get_host_list(instance->knet_handle,
host_list, &num_hosts);
if (res) {
return (-1);
}
qsort(host_list, num_hosts, sizeof(uint16_t), node_compare);
for (j=0; j<num_hosts; j++) {
if (host_list[j] == instance->our_nodeid) {
own_idx = j;
break;
}
}
for (i=0; i<INTERFACE_MAX; i++) {
memset(instance->link_status[i], 'd', CFG_INTERFACE_STATUS_MAX_LEN-1);
if (own_idx != OWN_INDEX_NONE) {
instance->link_status[i][own_idx] = 'n';
}
instance->link_status[i][num_hosts] = '\0';
}
/* This is all a bit "inside-out" because "status" is a set of strings per link
* and knet orders things by host
*/
for (j=0; j<num_hosts; j++) {
if (own_idx != OWN_INDEX_NONE && j == own_idx) {
continue ;
}
res = knet_link_get_link_list(instance->knet_handle,
host_list[j], link_list, &num_links);
if (res) {
return (-1);
}
link_idx = 0;
for (i=0; i < num_links; i++) {
/*
* Skip over links that are unconfigured to corosync. This is basically
* link0 if corosync isn't using it for comms, as we will still
* have it set up for loopback.
*/
if (!instance->totem_config->interfaces[link_list[i]].configured) {
continue;
}
ptr = instance->link_status[link_idx++];
res = knet_link_get_status(instance->knet_handle,
host_list[j],
link_list[i],
&link_status,
sizeof(link_status));
if (res == 0) {
ptr[j] = '0' + (link_status.enabled |
link_status.connected<<1 |
link_status.dynconnected<<2);
}
else {
knet_log_printf (LOGSYS_LEVEL_ERROR,
"totemknet_ifaces_get: Cannot get link status: %s", strerror(errno));
ptr[j] = '?';
}
}
}
*status = instance->link_status;
}
*iface_count = INTERFACE_MAX;
return (res);
}
int totemknet_finalize (
void *knet_context)
{
struct totemknet_instance *instance = (struct totemknet_instance *)knet_context;
int res = 0;
int i,j;
static knet_node_id_t nodes[KNET_MAX_HOST]; /* static to save stack */
uint8_t links[KNET_MAX_LINK];
size_t num_nodes;
size_t num_links;
knet_log_printf(LOG_DEBUG, "totemknet: finalize");
qb_loop_poll_del (instance->poll_handle, instance->logpipes[0]);
qb_loop_poll_del (instance->poll_handle, instance->knet_fd);
/*
* Disable forwarding to make knet flush send queue. This ensures that the LEAVE message will be sent.
*/
res = knet_handle_setfwd(instance->knet_handle, 0);
if (res) {
knet_log_printf (LOGSYS_LEVEL_CRIT, "totemknet: knet_handle_setfwd failed: %s", strerror(errno));
}
res = knet_host_get_host_list(instance->knet_handle, nodes, &num_nodes);
if (res) {
knet_log_printf (LOGSYS_LEVEL_ERROR, "Cannot get knet node list for shutdown: %s", strerror(errno));
/* Crash out anyway */
goto finalise_error;
}
/* Tidily shut down all nodes & links. */
for (i=0; i<num_nodes; i++) {
res = knet_link_get_link_list(instance->knet_handle, nodes[i], links, &num_links);
if (res) {
knet_log_printf (LOGSYS_LEVEL_ERROR, "Cannot get knet link list for node " CS_PRI_NODE_ID ": %s", nodes[i], strerror(errno));
goto finalise_error;
}
for (j=0; j<num_links; j++) {
res = knet_link_set_enable(instance->knet_handle, nodes[i], links[j], 0);
if (res) {
knet_log_printf (LOGSYS_LEVEL_ERROR, "totemknet: knet_link_set_enable(node " CS_PRI_NODE_ID ", link %d) failed: %s", nodes[i], links[j], strerror(errno));
}
res = knet_link_clear_config(instance->knet_handle, nodes[i], links[j]);
if (res) {
knet_log_printf (LOGSYS_LEVEL_ERROR, "totemknet: knet_link_clear_config(node " CS_PRI_NODE_ID ", link %d) failed: %s", nodes[i], links[j], strerror(errno));
}
}
res = knet_host_remove(instance->knet_handle, nodes[i]);
if (res) {
knet_log_printf (LOGSYS_LEVEL_ERROR, "totemknet: knet_host_remove(node " CS_PRI_NODE_ID ") failed: %s", nodes[i], strerror(errno));
}
}
finalise_error:
res = knet_handle_free(instance->knet_handle);
if (res) {
knet_log_printf (LOGSYS_LEVEL_CRIT, "totemknet: knet_handle_free failed: %s", strerror(errno));
}
totemknet_stop_merge_detect_timeout(instance);
log_flush_messages(instance);
/*
* Error is deliberately ignored
*/
(void)pthread_mutex_destroy(&instance->log_mutex);
return (res);
}
static int log_deliver_fn (
int fd,
int revents,
void *data)
{
struct totemknet_instance *instance = (struct totemknet_instance *)data;
char buffer[sizeof(struct knet_log_msg)*4];
char *bufptr = buffer;
int done = 0;
int len;
len = read(fd, buffer, sizeof(buffer));
while (done < len) {
struct knet_log_msg *msg = (struct knet_log_msg *)bufptr;
switch (msg->msglevel) {
case KNET_LOG_ERR:
libknet_log_printf (LOGSYS_LEVEL_ERROR, "%s: %s",
knet_log_get_subsystem_name(msg->subsystem),
msg->msg);
break;
case KNET_LOG_WARN:
libknet_log_printf (LOGSYS_LEVEL_WARNING, "%s: %s",
knet_log_get_subsystem_name(msg->subsystem),
msg->msg);
break;
case KNET_LOG_INFO:
libknet_log_printf (LOGSYS_LEVEL_INFO, "%s: %s",
knet_log_get_subsystem_name(msg->subsystem),
msg->msg);
break;
case KNET_LOG_DEBUG:
libknet_log_printf (LOGSYS_LEVEL_DEBUG, "%s: %s",
knet_log_get_subsystem_name(msg->subsystem),
msg->msg);
break;
}
bufptr += sizeof(struct knet_log_msg);
done += sizeof(struct knet_log_msg);
}
return 0;
}
static int data_deliver_fn (
int fd,
int revents,
void *data)
{
struct totemknet_instance *instance = (struct totemknet_instance *)data;
struct msghdr msg_hdr;
struct iovec iov_recv;
struct sockaddr_storage system_from;
ssize_t msg_len;
int truncated_packet;
iov_recv.iov_base = instance->iov_buffer;
iov_recv.iov_len = KNET_MAX_PACKET_SIZE;
msg_hdr.msg_name = &system_from;
msg_hdr.msg_namelen = sizeof (struct sockaddr_storage);
msg_hdr.msg_iov = &iov_recv;
msg_hdr.msg_iovlen = 1;
#ifdef HAVE_MSGHDR_CONTROL
msg_hdr.msg_control = 0;
#endif
#ifdef HAVE_MSGHDR_CONTROLLEN
msg_hdr.msg_controllen = 0;
#endif
#ifdef HAVE_MSGHDR_FLAGS
msg_hdr.msg_flags = 0;
#endif
#ifdef HAVE_MSGHDR_ACCRIGHTS
msg_hdr.msg_accrights = NULL;
#endif
#ifdef HAVE_MSGHDR_ACCRIGHTSLEN
msg_hdr.msg_accrightslen = 0;
#endif
msg_len = recvmsg (fd, &msg_hdr, MSG_NOSIGNAL | MSG_DONTWAIT);
if (msg_len <= 0) {
return (0);
}
truncated_packet = 0;
#ifdef HAVE_MSGHDR_FLAGS
if (msg_hdr.msg_flags & MSG_TRUNC) {
truncated_packet = 1;
}
#else
/*
* We don't have MSGHDR_FLAGS, but we can (hopefully) safely make assumption that
* if bytes_received == KNET_MAX_PACKET_SIZE then packet is truncated
*/
if (bytes_received == KNET_MAX_PACKET_SIZE) {
truncated_packet = 1;
}
#endif
if (truncated_packet) {
knet_log_printf(instance->totemknet_log_level_error,
"Received too big message. This may be because something bad is happening"
"on the network (attack?), or you tried join more nodes than corosync is"
"compiled with (%u) or bug in the code (bad estimation of "
"the KNET_MAX_PACKET_SIZE). Dropping packet.", PROCESSOR_COUNT_MAX);
return (0);
}
/*
* Handle incoming message
*/
instance->totemknet_deliver_fn (
instance->context,
instance->iov_buffer,
msg_len,
&system_from);
return (0);
}
static void timer_function_netif_check_timeout (
void *data)
{
struct totemknet_instance *instance = (struct totemknet_instance *)data;
int i;
for (i=0; i < INTERFACE_MAX; i++) {
if (!instance->totem_config->interfaces[i].configured) {
continue;
}
instance->totemknet_iface_change_fn (instance->context,
&instance->my_ids[i],
i);
}
}
static void knet_set_access_list_config(struct totemknet_instance *instance)
{
#ifdef HAVE_KNET_ACCESS_LIST
uint32_t value;
cs_error_t err;
value = instance->totem_config->block_unlisted_ips;
knet_log_printf (LOGSYS_LEVEL_DEBUG, "knet_enable access list: %d", value);
err = knet_handle_enable_access_lists(instance->knet_handle, value);
if (err) {
KNET_LOGSYS_PERROR(errno, LOGSYS_LEVEL_WARNING, "knet_handle_enable_access_lists failed");
}
#endif
}
/* NOTE: this relies on the fact that totem_reload_notify() is called first */
static void totemknet_refresh_config(
int32_t event,
const char *key_name,
struct icmap_notify_value new_val,
struct icmap_notify_value old_val,
void *user_data)
{
uint8_t reloading;
uint32_t value;
uint32_t link_no;
size_t num_nodes;
knet_node_id_t host_ids[KNET_MAX_HOST];
int i;
int err;
struct totemknet_instance *instance = (struct totemknet_instance *)user_data;
ENTER();
/*
* If a full reload is in progress then don't do anything until it's done and
* can reconfigure it all atomically
*/
if (icmap_get_uint8("config.totemconfig_reload_in_progress", &reloading) == CS_OK && reloading) {
return;
}
knet_set_access_list_config(instance);
if (icmap_get_uint32("totem.knet_pmtud_interval", &value) == CS_OK) {
instance->totem_config->knet_pmtud_interval = value;
knet_log_printf (LOGSYS_LEVEL_DEBUG, "knet_pmtud_interval now %d", value);
err = knet_handle_pmtud_setfreq(instance->knet_handle, instance->totem_config->knet_pmtud_interval);
if (err) {
KNET_LOGSYS_PERROR(errno, LOGSYS_LEVEL_WARNING, "knet_handle_pmtud_setfreq failed");
}
}
/* Configure link parameters for each node */
err = knet_host_get_host_list(instance->knet_handle, host_ids, &num_nodes);
if (err != 0) {
KNET_LOGSYS_PERROR(errno, LOGSYS_LEVEL_ERROR, "knet_host_get_host_list failed");
}
for (i=0; i<num_nodes; i++) {
for (link_no = 0; link_no < INTERFACE_MAX; link_no++) {
if (host_ids[i] == instance->our_nodeid || !instance->totem_config->interfaces[link_no].configured) {
continue;
}
err = knet_link_set_ping_timers(instance->knet_handle, host_ids[i], link_no,
instance->totem_config->interfaces[link_no].knet_ping_interval,
instance->totem_config->interfaces[link_no].knet_ping_timeout,
instance->totem_config->interfaces[link_no].knet_ping_precision);
if (err) {
KNET_LOGSYS_PERROR(errno, LOGSYS_LEVEL_ERROR, "knet_link_set_ping_timers for node " CS_PRI_NODE_ID " link %d failed", host_ids[i], link_no);
}
err = knet_link_set_pong_count(instance->knet_handle, host_ids[i], link_no,
instance->totem_config->interfaces[link_no].knet_pong_count);
if (err) {
KNET_LOGSYS_PERROR(errno, LOGSYS_LEVEL_ERROR, "knet_link_set_pong_count for node " CS_PRI_NODE_ID " link %d failed",host_ids[i], link_no);
}
err = knet_link_set_priority(instance->knet_handle, host_ids[i], link_no,
instance->totem_config->interfaces[link_no].knet_link_priority);
if (err) {
KNET_LOGSYS_PERROR(errno, LOGSYS_LEVEL_ERROR, "knet_link_set_priority for node " CS_PRI_NODE_ID " link %d failed", host_ids[i], link_no);
}
}
}
LEAVE();
}
static void totemknet_add_config_notifications(struct totemknet_instance *instance)
{
icmap_track_t icmap_track_totem = NULL;
icmap_track_t icmap_track_reload = NULL;
ENTER();
icmap_track_add("totem.",
ICMAP_TRACK_ADD | ICMAP_TRACK_DELETE | ICMAP_TRACK_MODIFY | ICMAP_TRACK_PREFIX,
totemknet_refresh_config,
instance,
&icmap_track_totem);
icmap_track_add("config.totemconfig_reload_in_progress",
ICMAP_TRACK_ADD | ICMAP_TRACK_MODIFY,
totemknet_refresh_config,
instance,
&icmap_track_reload);
LEAVE();
}
static int totemknet_is_crypto_enabled(const struct totemknet_instance *instance)
{
return (!(strcmp(instance->totem_config->crypto_cipher_type, "none") == 0 &&
strcmp(instance->totem_config->crypto_hash_type, "none") == 0));
}
static int totemknet_set_knet_crypto(struct totemknet_instance *instance)
{
struct knet_handle_crypto_cfg crypto_cfg;
int res;
/* These have already been validated */
memcpy(crypto_cfg.crypto_model, instance->totem_config->crypto_model, sizeof(crypto_cfg.crypto_model));
memcpy(crypto_cfg.crypto_cipher_type, instance->totem_config->crypto_cipher_type, sizeof(crypto_cfg.crypto_model));
memcpy(crypto_cfg.crypto_hash_type, instance->totem_config->crypto_hash_type, sizeof(crypto_cfg.crypto_model));
memcpy(crypto_cfg.private_key, instance->totem_config->private_key, instance->totem_config->private_key_len);
crypto_cfg.private_key_len = instance->totem_config->private_key_len;
#ifdef HAVE_KNET_CRYPTO_RECONF
knet_log_printf(LOGSYS_LEVEL_DEBUG, "Configuring crypto %s/%s/%s on index %d",
crypto_cfg.crypto_model,
crypto_cfg.crypto_cipher_type,
crypto_cfg.crypto_hash_type,
instance->totem_config->crypto_index
);
/* If crypto is being disabled we need to explicitly allow cleartext traffic in knet */
if (!totemknet_is_crypto_enabled(instance)) {
res = knet_handle_crypto_rx_clear_traffic(instance->knet_handle, KNET_CRYPTO_RX_ALLOW_CLEAR_TRAFFIC);
if (res) {
knet_log_printf(LOGSYS_LEVEL_ERROR, "knet_handle_crypto_rx_clear_traffic(ALLOW) failed %s", strerror(errno));
}
}
/* use_config will be called later when all nodes are synced */
res = knet_handle_crypto_set_config(instance->knet_handle, &crypto_cfg, instance->totem_config->crypto_index);
if (res == -1) {
knet_log_printf(LOGSYS_LEVEL_ERROR, "knet_handle_crypto_set_config (index %d) failed: %s", instance->totem_config->crypto_index, strerror(errno));
goto exit_error;
}
if (res == -2) {
knet_log_printf(LOGSYS_LEVEL_ERROR, "knet_handle_crypto_set_config (index %d) failed: -2", instance->totem_config->crypto_index);
goto exit_error;
}
#else
knet_log_printf(LOGSYS_LEVEL_DEBUG, "Configuring crypto %s/%s/%s",
crypto_cfg.crypto_model,
crypto_cfg.crypto_cipher_type,
crypto_cfg.crypto_hash_type
);
res = knet_handle_crypto(instance->knet_handle, &crypto_cfg);
if (res == -1) {
knet_log_printf(LOGSYS_LEVEL_ERROR, "knet_handle_crypto failed: %s", strerror(errno));
goto exit_error;
}
if (res == -2) {
knet_log_printf(LOGSYS_LEVEL_ERROR, "knet_handle_crypto failed: -2");
goto exit_error;
}
#endif
exit_error:
return res;
}
/*
* Create an instance
*/
int totemknet_initialize (
qb_loop_t *poll_handle,
void **knet_context,
struct totem_config *totem_config,
totemsrp_stats_t *stats,
void *context,
void (*deliver_fn) (
void *context,
const void *msg,
unsigned int msg_len,
const struct sockaddr_storage *system_from),
void (*iface_change_fn) (
void *context,
const struct totem_ip_address *iface_address,
unsigned int link_no),
void (*mtu_changed) (
void *context,
int net_mtu),
void (*target_set_completed) (
void *context))
{
struct totemknet_instance *instance;
int8_t channel=0;
int res;
int i;
instance = malloc (sizeof (struct totemknet_instance));
if (instance == NULL) {
return (-1);
}
totemknet_instance_initialize (instance);
instance->totem_config = totem_config;
/*
* Configure logging
*/
instance->totemknet_log_level_security = 1; //totem_config->totem_logging_configuration.log_level_security;
instance->totemknet_log_level_error = totem_config->totem_logging_configuration.log_level_error;
instance->totemknet_log_level_warning = totem_config->totem_logging_configuration.log_level_warning;
instance->totemknet_log_level_notice = totem_config->totem_logging_configuration.log_level_notice;
instance->totemknet_log_level_debug = totem_config->totem_logging_configuration.log_level_debug;
instance->totemknet_subsys_id = totem_config->totem_logging_configuration.log_subsys_id;
instance->totemknet_log_printf = totem_config->totem_logging_configuration.log_printf;
instance->knet_subsys_id = _logsys_subsys_create("KNET", "libknet.h");
/*
* Initialize local variables for totemknet
*/
instance->our_nodeid = instance->totem_config->node_id;
for (i=0; i< INTERFACE_MAX; i++) {
totemip_copy(&instance->my_ids[i], &totem_config->interfaces[i].bindnet);
instance->my_ids[i].nodeid = instance->our_nodeid;
instance->ip_port[i] = totem_config->interfaces[i].ip_port;
/* Needed for totemsrp */
totem_config->interfaces[i].boundto.nodeid = instance->our_nodeid;
}
instance->poll_handle = poll_handle;
instance->context = context;
instance->totemknet_deliver_fn = deliver_fn;
instance->totemknet_iface_change_fn = iface_change_fn;
instance->totemknet_mtu_changed = mtu_changed;
instance->totemknet_target_set_completed = target_set_completed;
instance->loopback_link = 0;
res = pipe(instance->logpipes);
if (res == -1) {
KNET_LOGSYS_PERROR(errno, LOGSYS_LEVEL_CRIT, "failed to create pipe for instance->logpipes");
goto exit_error;
}
if (fcntl(instance->logpipes[0], F_SETFL, O_NONBLOCK) == -1 ||
fcntl(instance->logpipes[1], F_SETFL, O_NONBLOCK) == -1) {
KNET_LOGSYS_PERROR(errno, LOGSYS_LEVEL_CRIT, "failed to set O_NONBLOCK flag for instance->logpipes");
goto exit_error;
}
#if !defined(KNET_API_VER) || (KNET_API_VER == 1)
instance->knet_handle = knet_handle_new(instance->totem_config->node_id, instance->logpipes[1], KNET_LOG_DEBUG);
#endif
#if KNET_API_VER == 2
instance->knet_handle = knet_handle_new(instance->totem_config->node_id, instance->logpipes[1], KNET_LOG_DEBUG, KNET_HANDLE_FLAG_PRIVILEGED);
#endif
if (!instance->knet_handle) {
KNET_LOGSYS_PERROR(errno, LOGSYS_LEVEL_CRIT, "knet_handle_new failed");
goto exit_error;
}
knet_set_access_list_config(instance);
res = knet_handle_pmtud_setfreq(instance->knet_handle, instance->totem_config->knet_pmtud_interval);
if (res) {
KNET_LOGSYS_PERROR(errno, LOGSYS_LEVEL_WARNING, "knet_handle_pmtud_setfreq failed");
}
res = knet_handle_enable_filter(instance->knet_handle, instance, dst_host_filter_callback_fn);
if (res) {
KNET_LOGSYS_PERROR(errno, LOGSYS_LEVEL_WARNING, "knet_handle_enable_filter failed");
}
res = knet_handle_enable_sock_notify(instance->knet_handle, instance, socket_error_callback_fn);
if (res) {
KNET_LOGSYS_PERROR(errno, LOGSYS_LEVEL_WARNING, "knet_handle_enable_sock_notify failed");
}
res = knet_host_enable_status_change_notify(instance->knet_handle, instance, host_change_callback_fn);
if (res) {
KNET_LOGSYS_PERROR(errno, LOGSYS_LEVEL_WARNING, "knet_host_enable_status_change_notify failed");
}
res = knet_handle_enable_pmtud_notify(instance->knet_handle, instance, pmtu_change_callback_fn);
if (res) {
KNET_LOGSYS_PERROR(errno, LOGSYS_LEVEL_WARNING, "knet_handle_enable_pmtud_notify failed");
}
global_instance = instance;
/* Get an fd into knet */
instance->knet_fd = 0;
res = knet_handle_add_datafd(instance->knet_handle, &instance->knet_fd, &channel);
if (res) {
knet_log_printf(LOG_DEBUG, "knet_handle_add_datafd failed: %s", strerror(errno));
goto exit_error;
}
/* Enable crypto if requested */
#ifdef HAVE_KNET_CRYPTO_RECONF
if (totemknet_is_crypto_enabled(instance)) {
res = totemknet_set_knet_crypto(instance);
if (res == 0) {
res = knet_handle_crypto_use_config(instance->knet_handle, totem_config->crypto_index);
if (res) {
knet_log_printf(LOG_DEBUG, "knet_handle_crypto_use_config failed: %s", strerror(errno));
goto exit_error;
}
} else {
knet_log_printf(LOG_DEBUG, "Failed to set up knet crypto");
goto exit_error;
}
res = knet_handle_crypto_rx_clear_traffic(instance->knet_handle, KNET_CRYPTO_RX_DISALLOW_CLEAR_TRAFFIC);
if (res) {
knet_log_printf(LOG_DEBUG, "knet_handle_crypto_rx_clear_traffic (DISALLOW) failed: %s", strerror(errno));
goto exit_error;
}
} else {
res = knet_handle_crypto_rx_clear_traffic(instance->knet_handle, KNET_CRYPTO_RX_ALLOW_CLEAR_TRAFFIC);
if (res) {
knet_log_printf(LOG_DEBUG, "knet_handle_crypto_rx_clear_traffic (ALLOW) failed: %s", strerror(errno));
goto exit_error;
}
}
#else
if (totemknet_is_crypto_enabled(instance)) {
res = totemknet_set_knet_crypto(instance);
if (res) {
knet_log_printf(LOG_DEBUG, "Failed to set up knet crypto");
goto exit_error;
}
}
#endif
/* Set up compression */
if (strcmp(totem_config->knet_compression_model, "none") != 0) {
/* Not fatal, but will log */
(void)totemknet_configure_compression(knet_context, totem_config);
}
knet_handle_setfwd(instance->knet_handle, 1);
instance->link_mode = KNET_LINK_POLICY_PASSIVE;
if (strcmp(instance->totem_config->link_mode, "active")==0) {
instance->link_mode = KNET_LINK_POLICY_ACTIVE;
}
if (strcmp(instance->totem_config->link_mode, "rr")==0) {
instance->link_mode = KNET_LINK_POLICY_RR;
}
for (i=0; i<INTERFACE_MAX; i++) {
instance->link_status[i] = malloc(CFG_INTERFACE_STATUS_MAX_LEN);
if (!instance->link_status[i]) {
goto exit_error;
}
}
qb_loop_poll_add (instance->poll_handle,
QB_LOOP_MED,
instance->logpipes[0],
POLLIN, instance, log_deliver_fn);
qb_loop_poll_add (instance->poll_handle,
QB_LOOP_HIGH,
instance->knet_fd,
POLLIN, instance, data_deliver_fn);
/*
* Upper layer isn't ready to receive message because it hasn't
* initialized yet. Add short timer to check the interfaces.
*/
qb_loop_timer_add (instance->poll_handle,
QB_LOOP_MED,
100*QB_TIME_NS_IN_MSEC,
(void *)instance,
timer_function_netif_check_timeout,
&instance->timer_netif_check_timeout);
totemknet_start_merge_detect_timeout(instance);
/* Start listening for config changes */
totemknet_add_config_notifications(instance);
/* Add stats keys to icmap */
stats_knet_add_handle();
knet_log_printf (LOGSYS_LEVEL_INFO, "totemknet initialized");
*knet_context = instance;
return (0);
exit_error:
log_flush_messages(instance);
free(instance);
return (-1);
}
void *totemknet_buffer_alloc (void)
{
/* Need to have space for a message AND a struct mcast in case of encapsulated messages */
return malloc(KNET_MAX_PACKET_SIZE + 512);
}
void totemknet_buffer_release (void *ptr)
{
return free (ptr);
}
int totemknet_processor_count_set (
void *knet_context,
int processor_count)
{
return (0);
}
int totemknet_recv_flush (void *knet_context)
{
return (0);
}
int totemknet_send_flush (void *knet_context)
{
return (0);
}
int totemknet_token_send (
void *knet_context,
const void *msg,
unsigned int msg_len)
{
struct totemknet_instance *instance = (struct totemknet_instance *)knet_context;
int res = 0;
ucast_sendmsg (instance, &instance->token_target, msg, msg_len);
return (res);
}
int totemknet_mcast_flush_send (
void *knet_context,
const void *msg,
unsigned int msg_len)
{
struct totemknet_instance *instance = (struct totemknet_instance *)knet_context;
int res = 0;
mcast_sendmsg (instance, msg, msg_len, 0);
return (res);
}
int totemknet_mcast_noflush_send (
void *knet_context,
const void *msg,
unsigned int msg_len)
{
struct totemknet_instance *instance = (struct totemknet_instance *)knet_context;
int res = 0;
mcast_sendmsg (instance, msg, msg_len, 1);
return (res);
}
extern int totemknet_iface_check (void *knet_context)
{
struct totemknet_instance *instance = (struct totemknet_instance *)knet_context;
int res = 0;
knet_log_printf(LOG_DEBUG, "totemknet: iface_check");
return (res);
}
extern void totemknet_net_mtu_adjust (void *knet_context, struct totem_config *totem_config)
{
struct totemknet_instance *instance = (struct totemknet_instance *)knet_context;
knet_log_printf(LOG_DEBUG, "totemknet: Returning MTU of %d", totem_config->net_mtu);
}
int totemknet_token_target_set (
void *knet_context,
unsigned int nodeid)
{
struct totemknet_instance *instance = (struct totemknet_instance *)knet_context;
int res = 0;
instance->token_target.nodeid = nodeid;
instance->totemknet_target_set_completed (instance->context);
return (res);
}
extern int totemknet_recv_mcast_empty (
void *knet_context)
{
struct totemknet_instance *instance = (struct totemknet_instance *)knet_context;
unsigned int res;
struct sockaddr_storage system_from;
struct msghdr msg_hdr;
struct iovec iov_recv;
struct pollfd ufd;
int nfds;
int msg_processed = 0;
iov_recv.iov_base = instance->iov_buffer;
iov_recv.iov_len = KNET_MAX_PACKET_SIZE;
msg_hdr.msg_name = &system_from;
msg_hdr.msg_namelen = sizeof (struct sockaddr_storage);
msg_hdr.msg_iov = &iov_recv;
msg_hdr.msg_iovlen = 1;
#ifdef HAVE_MSGHDR_CONTROL
msg_hdr.msg_control = 0;
#endif
#ifdef HAVE_MSGHDR_CONTROLLEN
msg_hdr.msg_controllen = 0;
#endif
#ifdef HAVE_MSGHDR_FLAGS
msg_hdr.msg_flags = 0;
#endif
#ifdef HAVE_MSGHDR_ACCRIGHTS
msg_msg_hdr.msg_accrights = NULL;
#endif
#ifdef HAVE_MSGHDR_ACCRIGHTSLEN
msg_msg_hdr.msg_accrightslen = 0;
#endif
do {
ufd.fd = instance->knet_fd;
ufd.events = POLLIN;
nfds = poll (&ufd, 1, 0);
if (nfds == 1 && ufd.revents & POLLIN) {
res = recvmsg (instance->knet_fd, &msg_hdr, MSG_NOSIGNAL | MSG_DONTWAIT);
if (res != -1) {
msg_processed = 1;
} else {
msg_processed = -1;
}
}
} while (nfds == 1);
return (msg_processed);
}
int totemknet_iface_set (void *knet_context,
const struct totem_ip_address *local_addr,
unsigned short ip_port,
unsigned int iface_no)
{
struct totemknet_instance *instance = (struct totemknet_instance *)knet_context;
totemip_copy(&instance->my_ids[iface_no], local_addr);
knet_log_printf(LOG_INFO, "Configured link number %d: local addr: %s, port=%d", iface_no, totemip_print(local_addr), ip_port);
instance->ip_port[iface_no] = ip_port;
return 0;
}
int totemknet_member_add (
void *knet_context,
const struct totem_ip_address *local,
const struct totem_ip_address *member,
int link_no)
{
struct totemknet_instance *instance = (struct totemknet_instance *)knet_context;
int err;
int port = instance->ip_port[link_no];
struct sockaddr_storage remote_ss;
struct sockaddr_storage local_ss;
int addrlen;
int i;
int host_found = 0;
knet_node_id_t host_ids[KNET_MAX_HOST];
size_t num_host_ids;
/* Only create 1 loopback link and use link 0 */
if (member->nodeid == instance->our_nodeid) {
if (!instance->loopback_link) {
link_no = 0;
instance->loopback_link = 1;
} else {
/* Already done */
return 0;
}
}
knet_log_printf (LOGSYS_LEVEL_DEBUG, "knet: member_add: " CS_PRI_NODE_ID " (%s), link=%d", member->nodeid, totemip_print(member), link_no);
knet_log_printf (LOGSYS_LEVEL_DEBUG, "knet: local: " CS_PRI_NODE_ID " (%s)", local->nodeid, totemip_print(local));
/* Only add the host if it doesn't already exist in knet */
err = knet_host_get_host_list(instance->knet_handle, host_ids, &num_host_ids);
if (err) {
KNET_LOGSYS_PERROR(errno, LOGSYS_LEVEL_ERROR, "knet_host_get_host_list");
return -1;
}
for (i=0; i<num_host_ids; i++) {
if (host_ids[i] == member->nodeid) {
host_found = 1;
}
}
if (!host_found) {
err = knet_host_add(instance->knet_handle, member->nodeid);
if (err != 0 && errno != EEXIST) {
KNET_LOGSYS_PERROR(errno, LOGSYS_LEVEL_ERROR, "knet_host_add");
return -1;
}
} else {
knet_log_printf (LOGSYS_LEVEL_DEBUG, "nodeid " CS_PRI_NODE_ID " already added", member->nodeid);
}
if (err == 0) {
if (knet_host_set_policy(instance->knet_handle, member->nodeid, instance->link_mode)) {
KNET_LOGSYS_PERROR(errno, LOGSYS_LEVEL_ERROR, "knet_set_policy failed");
return -1;
}
}
memset(&local_ss, 0, sizeof(local_ss));
memset(&remote_ss, 0, sizeof(remote_ss));
/* Casts to remove const */
totemip_totemip_to_sockaddr_convert((struct totem_ip_address *)member, port, &remote_ss, &addrlen);
totemip_totemip_to_sockaddr_convert((struct totem_ip_address *)local, port, &local_ss, &addrlen);
if (member->nodeid == instance->our_nodeid) {
knet_log_printf (LOGSYS_LEVEL_DEBUG, "knet: loopback link is %d\n", link_no);
err = knet_link_set_config(instance->knet_handle, member->nodeid, link_no,
KNET_TRANSPORT_LOOPBACK,
&local_ss, &remote_ss, KNET_LINK_FLAG_TRAFFICHIPRIO);
}
else {
err = knet_link_set_config(instance->knet_handle, member->nodeid, link_no,
instance->totem_config->interfaces[link_no].knet_transport,
&local_ss, &remote_ss, KNET_LINK_FLAG_TRAFFICHIPRIO);
}
if (err) {
KNET_LOGSYS_PERROR(errno, LOGSYS_LEVEL_ERROR, "knet_link_set_config failed");
return -1;
}
knet_log_printf (LOGSYS_LEVEL_DEBUG, "knet: member_add: Setting link prio to %d",
instance->totem_config->interfaces[link_no].knet_link_priority);
err = knet_link_set_priority(instance->knet_handle, member->nodeid, link_no,
instance->totem_config->interfaces[link_no].knet_link_priority);
if (err) {
KNET_LOGSYS_PERROR(errno, LOGSYS_LEVEL_ERROR, "knet_link_set_priority for nodeid " CS_PRI_NODE_ID ", link %d failed", member->nodeid, link_no);
}
/* ping timeouts maybe 0 here for a newly added interface so we leave this till later, it will
get done in totemknet_refresh_config */
if (instance->totem_config->interfaces[link_no].knet_ping_interval != 0) {
err = knet_link_set_ping_timers(instance->knet_handle, member->nodeid, link_no,
instance->totem_config->interfaces[link_no].knet_ping_interval,
instance->totem_config->interfaces[link_no].knet_ping_timeout,
instance->totem_config->interfaces[link_no].knet_ping_precision);
if (err) {
KNET_LOGSYS_PERROR(errno, LOGSYS_LEVEL_ERROR, "knet_link_set_ping_timers for nodeid " CS_PRI_NODE_ID ", link %d failed", member->nodeid, link_no);
}
err = knet_link_set_pong_count(instance->knet_handle, member->nodeid, link_no,
instance->totem_config->interfaces[link_no].knet_pong_count);
if (err) {
KNET_LOGSYS_PERROR(errno, LOGSYS_LEVEL_ERROR, "knet_link_set_pong_count for nodeid " CS_PRI_NODE_ID ", link %d failed", member->nodeid, link_no);
}
}
err = knet_link_set_enable(instance->knet_handle, member->nodeid, link_no, 1);
if (err) {
KNET_LOGSYS_PERROR(errno, LOGSYS_LEVEL_ERROR, "knet_link_set_enable for nodeid " CS_PRI_NODE_ID ", link %d failed", member->nodeid, link_no);
return -1;
}
/* register stats */
stats_knet_add_member(member->nodeid, link_no);
return (0);
}
int totemknet_member_remove (
void *knet_context,
const struct totem_ip_address *token_target,
int link_no)
{
struct totemknet_instance *instance = (struct totemknet_instance *)knet_context;
int res;
uint8_t link_list[KNET_MAX_LINK];
size_t num_links;
knet_log_printf (LOGSYS_LEVEL_DEBUG, "knet: member_remove: " CS_PRI_NODE_ID ", link=%d", token_target->nodeid, link_no);
/* Don't remove the link with the loopback on it until we shut down */
if (token_target->nodeid == instance->our_nodeid) {
return 0;
}
/* Tidy stats */
stats_knet_del_member(token_target->nodeid, link_no);
/* Remove the link first */
res = knet_link_set_enable(instance->knet_handle, token_target->nodeid, link_no, 0);
if (res != 0) {
KNET_LOGSYS_PERROR(errno, LOGSYS_LEVEL_ERROR, "knet_link_set enable(off) for nodeid " CS_PRI_NODE_ID ", link %d failed", token_target->nodeid, link_no);
return res;
}
res = knet_link_clear_config(instance->knet_handle, token_target->nodeid, link_no);
if (res != 0) {
KNET_LOGSYS_PERROR(errno, LOGSYS_LEVEL_ERROR, "knet_link_clear_config for nodeid " CS_PRI_NODE_ID ", link %d failed", token_target->nodeid, link_no);
return res;
}
/* If this is the last link, then remove the node */
res = knet_link_get_link_list(instance->knet_handle,
token_target->nodeid, link_list, &num_links);
if (res) {
return (0); /* not really failure */
}
if (num_links == 0) {
res = knet_host_remove(instance->knet_handle, token_target->nodeid);
}
return res;
}
int totemknet_member_list_rebind_ip (
void *knet_context)
{
return (0);
}
static int totemknet_configure_compression (
void *knet_context,
struct totem_config *totem_config)
{
struct totemknet_instance *instance = (struct totemknet_instance *)knet_context;
struct knet_handle_compress_cfg compress_cfg;
int res = 0;
assert(strlen(totem_config->knet_compression_model) < sizeof(compress_cfg.compress_model));
strcpy(compress_cfg.compress_model, totem_config->knet_compression_model);
compress_cfg.compress_threshold = totem_config->knet_compression_threshold;
compress_cfg.compress_level = totem_config->knet_compression_level;
res = knet_handle_compress(instance->knet_handle, &compress_cfg);
if (res) {
KNET_LOGSYS_PERROR(errno, LOGSYS_LEVEL_ERROR, "knet_handle_compress failed");
}
return res;
}
int totemknet_reconfigure (
void *knet_context,
struct totem_config *totem_config)
{
struct totemknet_instance *instance = (struct totemknet_instance *)knet_context;
int res = 0;
(void)totemknet_configure_compression(knet_context, totem_config);
#ifdef HAVE_LIBNOZZLE
/* Set up nozzle device(s). Return code is ignored, because inability
* configure nozzle is not fatal problem, errors are logged and
* there is not much else we can do */
(void)setup_nozzle(instance);
#endif
if (totem_config->crypto_changed) {
/* Flip crypto_index */
totem_config->crypto_index = 3-totem_config->crypto_index;
res = totemknet_set_knet_crypto(instance);
knet_log_printf(LOG_INFO, "kronosnet crypto reconfigured on index %d: %s/%s/%s", totem_config->crypto_index,
totem_config->crypto_model,
totem_config->crypto_cipher_type,
totem_config->crypto_hash_type);
}
return (res);
}
int totemknet_crypto_reconfigure_phase (
void *knet_context,
struct totem_config *totem_config,
cfg_message_crypto_reconfig_phase_t phase)
{
#ifdef HAVE_KNET_CRYPTO_RECONF
int res;
int config_to_use;
int config_to_clear;
struct knet_handle_crypto_cfg crypto_cfg;
struct totemknet_instance *instance = (struct totemknet_instance *)knet_context;
knet_log_printf(LOGSYS_LEVEL_DEBUG, "totemknet_crypto_reconfigure_phase %d, index=%d\n", phase, totem_config->crypto_index);
switch (phase) {
case CRYPTO_RECONFIG_PHASE_ACTIVATE:
config_to_use = totem_config->crypto_index;
if (!totemknet_is_crypto_enabled(instance)) {
config_to_use = 0; /* we are clearing it */
}
/* Enable the new config on this node */
res = knet_handle_crypto_use_config(instance->knet_handle, config_to_use);
if (res == -1) {
knet_log_printf(LOGSYS_LEVEL_ERROR, "knet_handle_crypto_use_config %d failed: %s", config_to_use, strerror(errno));
}
break;
case CRYPTO_RECONFIG_PHASE_CLEANUP:
/*
* All nodes should now have the new config. clear the old one out
* OR disable crypto entirely if that's what the new config insists on.
*/
config_to_clear = 3-totem_config->crypto_index;
knet_log_printf(LOGSYS_LEVEL_DEBUG, "Clearing old knet crypto config %d\n", config_to_clear);
strcpy(crypto_cfg.crypto_model, "none");
strcpy(crypto_cfg.crypto_cipher_type, "none");
strcpy(crypto_cfg.crypto_hash_type, "none");
res = knet_handle_crypto_set_config(instance->knet_handle, &crypto_cfg, config_to_clear);
if (res == -1) {
knet_log_printf(LOGSYS_LEVEL_ERROR, "knet_handle_crypto_set_config to clear index %d failed: %s", config_to_clear, strerror(errno));
}
if (res == -2) {
knet_log_printf(LOGSYS_LEVEL_ERROR, "knet_handle_crypto_set_config to clear index %d failed: -2", config_to_clear);
}
/* If crypto is enabled then disable all cleartext reception */
if (totemknet_is_crypto_enabled(instance)) {
res = knet_handle_crypto_rx_clear_traffic(instance->knet_handle, KNET_CRYPTO_RX_DISALLOW_CLEAR_TRAFFIC);
if (res) {
knet_log_printf(LOGSYS_LEVEL_ERROR, "knet_handle_crypto_rx_clear_traffic(DISALLOW) failed %s", strerror(errno));
}
}
}
#endif
return 0;
}
void totemknet_stats_clear (
void *knet_context)
{
struct totemknet_instance *instance = (struct totemknet_instance *)knet_context;
(void) knet_handle_clear_stats(instance->knet_handle, KNET_CLEARSTATS_HANDLE_AND_LINK);
}
/* For the stats module */
int totemknet_link_get_status (
knet_node_id_t node, uint8_t link_no,
struct knet_link_status *status)
{
int res;
int ret = CS_OK;
/* We are probably not using knet */
if (!global_instance) {
return CS_ERR_NOT_EXIST;
}
if (link_no >= INTERFACE_MAX) {
return CS_ERR_NOT_EXIST; /* Invalid link number */
}
res = knet_link_get_status(global_instance->knet_handle, node, link_no, status, sizeof(struct knet_link_status));
if (res) {
switch (errno) {
case EINVAL:
ret = CS_ERR_INVALID_PARAM;
break;
case EBUSY:
ret = CS_ERR_BUSY;
break;
case EDEADLK:
ret = CS_ERR_TRY_AGAIN;
break;
default:
ret = CS_ERR_LIBRARY;
break;
}
}
return (ret);
}
int totemknet_handle_get_stats (
struct knet_handle_stats *stats)
{
int res;
/* We are probably not using knet */
if (!global_instance) {
return CS_ERR_NOT_EXIST;
}
res = knet_handle_get_stats(global_instance->knet_handle, stats, sizeof(struct knet_handle_stats));
if (res != 0) {
return (qb_to_cs_error(-errno));
}
return CS_OK;
}
static void timer_function_merge_detect_timeout (
void *data)
{
struct totemknet_instance *instance = (struct totemknet_instance *)data;
if (instance->merge_detect_messages_sent_before_timeout == 0) {
instance->send_merge_detect_message = 1;
}
instance->merge_detect_messages_sent_before_timeout = 0;
totemknet_start_merge_detect_timeout(instance);
}
static void totemknet_start_merge_detect_timeout(
void *knet_context)
{
struct totemknet_instance *instance = (struct totemknet_instance *)knet_context;
qb_loop_timer_add(instance->poll_handle,
QB_LOOP_MED,
instance->totem_config->merge_timeout * 2 * QB_TIME_NS_IN_MSEC,
(void *)instance,
timer_function_merge_detect_timeout,
&instance->timer_merge_detect_timeout);
}
static void totemknet_stop_merge_detect_timeout(
void *knet_context)
{
struct totemknet_instance *instance = (struct totemknet_instance *)knet_context;
qb_loop_timer_del(instance->poll_handle,
instance->timer_merge_detect_timeout);
}
static void log_flush_messages (void *knet_context)
{
struct pollfd pfd;
struct totemknet_instance *instance = (struct totemknet_instance *)knet_context;
int cont;
cont = 1;
while (cont) {
pfd.fd = instance->logpipes[0];
pfd.events = POLLIN;
pfd.revents = 0;
if ((poll(&pfd, 1, 0) > 0) &&
(pfd.revents & POLLIN) &&
(log_deliver_fn(instance->logpipes[0], POLLIN, instance) == 0)) {
cont = 1;
} else {
cont = 0;
}
}
}
#ifdef HAVE_LIBNOZZLE
#define NOZZLE_NAME "nozzle.name"
#define NOZZLE_IPADDR "nozzle.ipaddr"
#define NOZZLE_PREFIX "nozzle.ipprefix"
#define NOZZLE_MACADDR "nozzle.macaddr"
#define NOZZLE_CHANNEL 1
static char *get_nozzle_script_dir(void *knet_context)
{
struct totemknet_instance *instance = (struct totemknet_instance *)knet_context;
char filename[PATH_MAX + FILENAME_MAX + 1];
static char updown_dirname[PATH_MAX + FILENAME_MAX + 1];
int res;
const char *dirname_res;
/*
* Build script directory based on corosync.conf file location
*/
res = snprintf(filename, sizeof(filename), "%s",
corosync_get_config_file());
if (res >= sizeof(filename)) {
knet_log_printf (LOGSYS_LEVEL_DEBUG, "nozzle up/down path too long");
return NULL;
}
dirname_res = dirname(filename);
res = snprintf(updown_dirname, sizeof(updown_dirname), "%s/%s",
dirname_res, "updown.d");
if (res >= sizeof(updown_dirname)) {
knet_log_printf (LOGSYS_LEVEL_DEBUG, "nozzle up/down path too long");
return NULL;
}
return updown_dirname;
}
/*
* Deliberately doesn't return the status as caller doesn't care.
* The result will be logged though
*/
static void run_nozzle_script(struct totemknet_instance *instance, int type, const char *typename)
{
int res;
char *exec_string;
res = nozzle_run_updown(instance->nozzle_handle, type, &exec_string);
if (res == -1 && errno != ENOENT) {
knet_log_printf (LOGSYS_LEVEL_INFO, "exec nozzle %s script failed: %s", typename, strerror(errno));
} else if (res == -2) {
knet_log_printf (LOGSYS_LEVEL_INFO, "nozzle %s script failed", typename);
knet_log_printf (LOGSYS_LEVEL_INFO, "%s", exec_string);
}
}
/*
* Reparse IP address to add in our node ID
* IPv6 addresses must end in '::'
* IPv4 addresses must just be valid
* '/xx' lengths are optional for IPv6, mandatory for IPv4
*
* Returns the modified IP address as a string to pass into libnozzle
*/
static int reparse_nozzle_ip_address(struct totemknet_instance *instance,
const char *input_addr,
const char *prefix, int nodeid,
char *output_addr, size_t output_len)
{
char *coloncolon;
int bits;
int max_prefix = 64;
uint32_t nodeid_mask;
uint32_t addr_mask;
uint32_t masked_nodeid;
struct in_addr *addr;
struct totem_ip_address totemip;
coloncolon = strstr(input_addr, "::");
if (!coloncolon) {
max_prefix = 30;
}
bits = atoi(prefix);
if (bits < 8 || bits > max_prefix) {
knet_log_printf(LOGSYS_LEVEL_ERROR, "nozzle IP address prefix must be >= 8 and <= %d (got %d)", max_prefix, bits);
return -1;
}
/* IPv6 is easy */
if (coloncolon) {
memcpy(output_addr, input_addr, coloncolon-input_addr);
sprintf(output_addr + (coloncolon-input_addr), "::%x", nodeid);
return 0;
}
/* For IPv4 we need to parse the address into binary, mask off the required bits,
* add in the masked_nodeid and 'print' it out again
*/
nodeid_mask = UINT32_MAX & ((1<<(32 - bits)) - 1);
addr_mask = UINT32_MAX ^ nodeid_mask;
masked_nodeid = nodeid & nodeid_mask;
if (totemip_parse(&totemip, input_addr, AF_INET)) {
knet_log_printf(LOGSYS_LEVEL_ERROR, "Failed to parse IPv4 nozzle IP address");
return -1;
}
addr = (struct in_addr *)&totemip.addr;
addr->s_addr &= htonl(addr_mask);
addr->s_addr |= htonl(masked_nodeid);
inet_ntop(AF_INET, addr, output_addr, output_len);
return 0;
}
static int create_nozzle_device(void *knet_context, const char *name,
const char *ipaddr, const char *prefix,
const char *macaddr)
{
struct totemknet_instance *instance = (struct totemknet_instance *)knet_context;
char device_name[IFNAMSIZ+1];
size_t size = IFNAMSIZ;
int8_t channel = NOZZLE_CHANNEL;
nozzle_t nozzle_dev;
int nozzle_fd;
int res;
char *updown_dir;
char parsed_ipaddr[INET6_ADDRSTRLEN];
char mac[19];
memset(device_name, 0, size);
memset(&mac, 0, sizeof(mac));
strncpy(device_name, name, size);
updown_dir = get_nozzle_script_dir(knet_context);
knet_log_printf (LOGSYS_LEVEL_INFO, "nozzle script dir is %s", updown_dir);
nozzle_dev = nozzle_open(device_name, size, updown_dir);
if (!nozzle_dev) {
knet_log_printf (LOGSYS_LEVEL_ERROR, "Unable to init nozzle device %s: %s", device_name, strerror(errno));
return -1;
}
instance->nozzle_handle = nozzle_dev;
if (nozzle_set_mac(nozzle_dev, macaddr) < 0) {
knet_log_printf (LOGSYS_LEVEL_ERROR, "Unable to add set nozzle MAC to %s: %s", mac, strerror(errno));
goto out_clean;
}
if (reparse_nozzle_ip_address(instance, ipaddr, prefix, instance->our_nodeid, parsed_ipaddr, sizeof(parsed_ipaddr))) {
/* Prints its own errors */
goto out_clean;
}
knet_log_printf (LOGSYS_LEVEL_INFO, "Local nozzle IP address is %s / %d", parsed_ipaddr, atoi(prefix));
if (nozzle_add_ip(nozzle_dev, parsed_ipaddr, prefix) < 0) {
knet_log_printf (LOGSYS_LEVEL_ERROR, "Unable to add set nozzle IP addr to %s/%s: %s", parsed_ipaddr, prefix, strerror(errno));
goto out_clean;
}
nozzle_fd = nozzle_get_fd(nozzle_dev);
knet_log_printf (LOGSYS_LEVEL_INFO, "Opened '%s' on fd %d", device_name, nozzle_fd);
res = knet_handle_add_datafd(instance->knet_handle, &nozzle_fd, &channel);
if (res != 0) {
knet_log_printf (LOGSYS_LEVEL_ERROR, "Unable to add nozzle FD to knet: %s", strerror(errno));
goto out_clean;
}
run_nozzle_script(instance, NOZZLE_PREUP, "pre-up");
res = nozzle_set_up(nozzle_dev);
if (res != 0) {
knet_log_printf (LOGSYS_LEVEL_ERROR, "Unable to set nozzle interface UP: %s", strerror(errno));
goto out_clean;
}
run_nozzle_script(instance, NOZZLE_UP, "up");
return 0;
out_clean:
nozzle_close(nozzle_dev);
return -1;
}
static int remove_nozzle_device(void *knet_context)
{
struct totemknet_instance *instance = (struct totemknet_instance *)knet_context;
int res;
int datafd;
res = knet_handle_get_datafd(instance->knet_handle, NOZZLE_CHANNEL, &datafd);
if (res != 0) {
knet_log_printf (LOGSYS_LEVEL_ERROR, "Can't find datafd for channel %d: %s", NOZZLE_CHANNEL, strerror(errno));
return -1;
}
res = knet_handle_remove_datafd(instance->knet_handle, datafd);
if (res != 0) {
knet_log_printf (LOGSYS_LEVEL_ERROR, "Can't remove datafd for nozzle channel %d: %s", NOZZLE_CHANNEL, strerror(errno));
return -1;
}
run_nozzle_script(instance, NOZZLE_DOWN, "pre-down");
res = nozzle_set_down(instance->nozzle_handle);
if (res != 0) {
knet_log_printf (LOGSYS_LEVEL_ERROR, "Can't set nozzle device down: %s", strerror(errno));
return -1;
}
run_nozzle_script(instance, NOZZLE_POSTDOWN, "post-down");
res = nozzle_close(instance->nozzle_handle);
if (res != 0) {
knet_log_printf (LOGSYS_LEVEL_ERROR, "Can't close nozzle device: %s", strerror(errno));
return -1;
}
knet_log_printf (LOGSYS_LEVEL_INFO, "Removed nozzle device");
return 0;
}
static void free_nozzle(struct totemknet_instance *instance)
{
free(instance->nozzle_name);
free(instance->nozzle_ipaddr);
free(instance->nozzle_prefix);
free(instance->nozzle_macaddr);
instance->nozzle_name = instance->nozzle_ipaddr = instance->nozzle_prefix =
instance->nozzle_macaddr = NULL;
}
static int setup_nozzle(void *knet_context)
{
struct totemknet_instance *instance = (struct totemknet_instance *)knet_context;
char *ipaddr_str = NULL;
char *name_str = NULL;
char *prefix_str = NULL;
char *macaddr_str = NULL;
char mac[32];
int name_res;
int macaddr_res;
int res = -1;
/*
* Return value ignored on purpose. icmap_get_string changes
* ipaddr_str/prefix_str only on success.
*/
(void)icmap_get_string(NOZZLE_IPADDR, &ipaddr_str);
(void)icmap_get_string(NOZZLE_PREFIX, &prefix_str);
macaddr_res = icmap_get_string(NOZZLE_MACADDR, &macaddr_str);
name_res = icmap_get_string(NOZZLE_NAME, &name_str);
/* Is is being removed? */
if (name_res == CS_ERR_NOT_EXIST && instance->nozzle_handle) {
remove_nozzle_device(instance);
free_nozzle(instance);
goto out_free;
}
if (!name_str) {
/* no nozzle */
goto out_free;
}
if (!ipaddr_str) {
knet_log_printf (LOGSYS_LEVEL_ERROR, "No IP address supplied for Nozzle device");
goto out_free;
}
if (!prefix_str) {
knet_log_printf (LOGSYS_LEVEL_ERROR, "No prefix supplied for Nozzle IP address");
goto out_free;
}
if (macaddr_str && strlen(macaddr_str) != 17) {
knet_log_printf (LOGSYS_LEVEL_ERROR, "macaddr for nozzle device is not in the correct format '%s'", macaddr_str);
goto out_free;
}
if (!macaddr_str) {
macaddr_str = (char*)"54:54:01:00:00:00";
}
if (instance->nozzle_name &&
(strcmp(name_str, instance->nozzle_name) == 0) &&
(strcmp(ipaddr_str, instance->nozzle_ipaddr) == 0) &&
(strcmp(prefix_str, instance->nozzle_prefix) == 0) &&
(instance->nozzle_macaddr == NULL ||
strcmp(macaddr_str, instance->nozzle_macaddr) == 0)) {
/* Nothing has changed */
knet_log_printf (LOGSYS_LEVEL_DEBUG, "Nozzle device info not changed");
goto out_free;
}
/* Add nodeid into MAC address */
memcpy(mac, macaddr_str, 12);
snprintf(mac+12, sizeof(mac) - 13, "%02x:%02x",
instance->our_nodeid >> 8,
instance->our_nodeid & 0xFF);
knet_log_printf (LOGSYS_LEVEL_INFO, "Local nozzle MAC address is %s", mac);
if (name_res == CS_OK && name_str) {
/* Reconfigure */
if (instance->nozzle_name) {
remove_nozzle_device(instance);
free_nozzle(instance);
}
res = create_nozzle_device(knet_context, name_str, ipaddr_str, prefix_str,
mac);
instance->nozzle_name = strdup(name_str);
instance->nozzle_ipaddr = strdup(ipaddr_str);
instance->nozzle_prefix = strdup(prefix_str);
instance->nozzle_macaddr = strdup(macaddr_str);
if (!instance->nozzle_name || !instance->nozzle_ipaddr ||
!instance->nozzle_prefix) {
knet_log_printf (LOGSYS_LEVEL_ERROR, "strdup failed in nozzle allocation");
/*
* This 'free' will cause a complete reconfigure of the device next time we reload
* but will also let the the current device keep working until then.
* remove_nozzle() only needs the, statically-allocated, nozzle_handle
*/
free_nozzle(instance);
}
}
out_free:
free(name_str);
free(ipaddr_str);
free(prefix_str);
if (macaddr_res == CS_OK) {
free(macaddr_str);
}
return res;
}
#endif // HAVE_LIBNOZZLE
diff --git a/exec/totemknet.h b/exec/totemknet.h
index 3957b7f2..30068747 100644
--- a/exec/totemknet.h
+++ b/exec/totemknet.h
@@ -1,154 +1,157 @@
/*
* Copyright (c) 2005 MontaVista Software, Inc.
* Copyright (c) 2006-2011 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Steven Dake (sdake@redhat.com)
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef TOTEMKNET_H_DEFINED
#define TOTEMKNET_H_DEFINED
#include <sys/types.h>
#include <sys/socket.h>
#include <qb/qbloop.h>
#include <corosync/totem/totem.h>
/**
* Create an instance
*/
extern int totemknet_initialize (
qb_loop_t *poll_handle,
void **knet_context,
struct totem_config *totem_config,
totemsrp_stats_t *stats,
void *context,
void (*deliver_fn) (
void *context,
const void *msg,
unsigned int msg_len,
const struct sockaddr_storage *system_from),
void (*iface_change_fn) (
void *context,
const struct totem_ip_address *iface_address,
unsigned int ring_no),
void (*mtu_changed) (
void *context,
int net_mtu),
void (*target_set_completed) (
void *context));
extern void *totemknet_buffer_alloc (void);
extern void totemknet_buffer_release (void *ptr);
extern int totemknet_processor_count_set (
void *knet_context,
int processor_count);
extern int totemknet_token_send (
void *knet_context,
const void *msg,
unsigned int msg_len);
extern int totemknet_mcast_flush_send (
void *knet_context,
const void *msg,
unsigned int msg_len);
extern int totemknet_mcast_noflush_send (
void *knet_context,
const void *msg,
unsigned int msg_len);
extern int totemknet_recv_flush (void *knet_context);
extern int totemknet_send_flush (void *knet_context);
extern int totemknet_iface_check (void *knet_context);
extern int totemknet_finalize (void *knet_context);
extern void totemknet_net_mtu_adjust (void *knet_context, struct totem_config *totem_config);
+extern int totemknet_nodestatus_get (void *knet_context, unsigned int nodeid,
+ struct totem_node_status *node_status);
+
extern int totemknet_ifaces_get (void *net_context,
char ***status,
unsigned int *iface_count);
extern int totemknet_iface_set (void *net_context,
const struct totem_ip_address *local_addr,
unsigned short ip_port,
unsigned int iface_no);
extern int totemknet_token_target_set (
void *knet_context,
unsigned int nodeid);
extern int totemknet_crypto_set (
void *knet_context,
const char *cipher_type,
const char *hash_type);
extern int totemknet_recv_mcast_empty (
void *knet_context);
extern int totemknet_member_add (
void *knet_context,
const struct totem_ip_address *local,
const struct totem_ip_address *member,
int ring_no);
extern int totemknet_member_remove (
void *knet_context,
const struct totem_ip_address *member,
int ring_no);
extern int totemknet_member_set_active (
void *knet_context,
const struct totem_ip_address *member_ip,
int active);
extern int totemknet_reconfigure (
void *knet_context,
struct totem_config *totem_config);
extern int totemknet_crypto_reconfigure_phase (
void *knet_context,
struct totem_config *totem_config,
cfg_message_crypto_reconfig_phase_t phase);
extern void totemknet_stats_clear (
void *knet_context);
#endif /* TOTEMKNET_H_DEFINED */
diff --git a/exec/totemnet.c b/exec/totemnet.c
index ae44dbf8..a4b90a3d 100644
--- a/exec/totemnet.c
+++ b/exec/totemnet.c
@@ -1,607 +1,628 @@
/*
* Copyright (c) 2005 MontaVista Software, Inc.
* Copyright (c) 2006-2018 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Steven Dake (sdake@redhat.com)
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <config.h>
#include <assert.h>
#include <totemudp.h>
#include <totemudpu.h>
#include <totemknet.h>
#include <totemnet.h>
#include <qb/qbloop.h>
#define LOGSYS_UTILS_ONLY 1
#include <corosync/logsys.h>
struct transport {
const char *name;
int (*initialize) (
qb_loop_t *loop_pt,
void **transport_instance,
struct totem_config *totem_config,
totemsrp_stats_t *stats,
void *context,
void (*deliver_fn) (
void *context,
const void *msg,
unsigned int msg_len,
const struct sockaddr_storage *system_from),
void (*iface_change_fn) (
void *context,
const struct totem_ip_address *iface_address,
unsigned int ring_no),
void (*mtu_changed) (
void *context,
int net_mtu),
void (*target_set_completed) (
void *context));
void *(*buffer_alloc) (void);
void (*buffer_release) (void *ptr);
int (*processor_count_set) (
void *transport_context,
int processor_count);
int (*token_send) (
void *transport_context,
const void *msg,
unsigned int msg_len);
int (*mcast_flush_send) (
void *transport_context,
const void *msg,
unsigned int msg_len);
int (*mcast_noflush_send) (
void *transport_context,
const void *msg,
unsigned int msg_len);
int (*recv_flush) (void *transport_context);
int (*send_flush) (void *transport_context);
int (*iface_check) (void *transport_context);
int (*finalize) (void *transport_context);
void (*net_mtu_adjust) (void *transport_context, struct totem_config *totem_config);
const char *(*iface_print) (void *transport_context);
int (*ifaces_get) (
void *transport_context,
char ***status,
unsigned int *iface_count);
+ int (*nodestatus_get) (
+ void *transport_context,
+ unsigned int nodeid,
+ struct totem_node_status *node_status);
+
int (*token_target_set) (
void *transport_context,
unsigned int nodeid);
int (*crypto_set) (
void *transport_context,
const char *cipher_type,
const char *hash_type);
int (*recv_mcast_empty) (
void *transport_context);
int (*iface_set) (
void *transport_context,
const struct totem_ip_address *local,
unsigned short ip_port,
unsigned int ring_no);
int (*member_add) (
void *transport_context,
const struct totem_ip_address *local,
const struct totem_ip_address *member,
int ring_no);
int (*member_remove) (
void *transport_context,
const struct totem_ip_address *member,
int ring_no);
int (*member_set_active) (
void *transport_context,
const struct totem_ip_address *member,
int active);
int (*reconfigure) (
void *net_context,
struct totem_config *totem_config);
int (*crypto_reconfigure_phase) (
void *net_context,
struct totem_config *totem_config,
cfg_message_crypto_reconfig_phase_t phase);
void (*stats_clear) (
void *net_context);
};
struct transport transport_entries[] = {
{
.name = "UDP/IP Multicast",
.initialize = totemudp_initialize,
.buffer_alloc = totemudp_buffer_alloc,
.buffer_release = totemudp_buffer_release,
.processor_count_set = totemudp_processor_count_set,
.token_send = totemudp_token_send,
.mcast_flush_send = totemudp_mcast_flush_send,
.mcast_noflush_send = totemudp_mcast_noflush_send,
.recv_flush = totemudp_recv_flush,
.send_flush = totemudp_send_flush,
.iface_set = totemudp_iface_set,
.iface_check = totemudp_iface_check,
.finalize = totemudp_finalize,
.net_mtu_adjust = totemudp_net_mtu_adjust,
.ifaces_get = totemudp_ifaces_get,
+ .nodestatus_get = totemudp_nodestatus_get,
.token_target_set = totemudp_token_target_set,
.crypto_set = totemudp_crypto_set,
.recv_mcast_empty = totemudp_recv_mcast_empty,
.member_add = totemudp_member_add,
.member_remove = totemudp_member_remove,
.reconfigure = totemudp_reconfigure,
.crypto_reconfigure_phase = NULL
},
{
.name = "UDP/IP Unicast",
.initialize = totemudpu_initialize,
.buffer_alloc = totemudpu_buffer_alloc,
.buffer_release = totemudpu_buffer_release,
.processor_count_set = totemudpu_processor_count_set,
.token_send = totemudpu_token_send,
.mcast_flush_send = totemudpu_mcast_flush_send,
.mcast_noflush_send = totemudpu_mcast_noflush_send,
.recv_flush = totemudpu_recv_flush,
.send_flush = totemudpu_send_flush,
.iface_set = totemudpu_iface_set,
.iface_check = totemudpu_iface_check,
.finalize = totemudpu_finalize,
.net_mtu_adjust = totemudpu_net_mtu_adjust,
.ifaces_get = totemudpu_ifaces_get,
+ .nodestatus_get = totemudpu_nodestatus_get,
.token_target_set = totemudpu_token_target_set,
.crypto_set = totemudpu_crypto_set,
.recv_mcast_empty = totemudpu_recv_mcast_empty,
.member_add = totemudpu_member_add,
.member_remove = totemudpu_member_remove,
.reconfigure = totemudpu_reconfigure,
.crypto_reconfigure_phase = NULL
},
{
.name = "Kronosnet",
.initialize = totemknet_initialize,
.buffer_alloc = totemknet_buffer_alloc,
.buffer_release = totemknet_buffer_release,
.processor_count_set = totemknet_processor_count_set,
.token_send = totemknet_token_send,
.mcast_flush_send = totemknet_mcast_flush_send,
.mcast_noflush_send = totemknet_mcast_noflush_send,
.recv_flush = totemknet_recv_flush,
.send_flush = totemknet_send_flush,
.iface_set = totemknet_iface_set,
.iface_check = totemknet_iface_check,
.finalize = totemknet_finalize,
.net_mtu_adjust = totemknet_net_mtu_adjust,
.ifaces_get = totemknet_ifaces_get,
+ .nodestatus_get = totemknet_nodestatus_get,
.token_target_set = totemknet_token_target_set,
.crypto_set = totemknet_crypto_set,
.recv_mcast_empty = totemknet_recv_mcast_empty,
.member_add = totemknet_member_add,
.member_remove = totemknet_member_remove,
.reconfigure = totemknet_reconfigure,
.crypto_reconfigure_phase = totemknet_crypto_reconfigure_phase,
.stats_clear = totemknet_stats_clear
}
};
struct totemnet_instance {
void *transport_context;
struct transport *transport;
void (*totemnet_log_printf) (
int level,
int subsys,
const char *function,
const char *file,
int line,
const char *format,
...)__attribute__((format(printf, 6, 7)));
int totemnet_subsys_id;
};
#define log_printf(level, format, args...) \
do { \
instance->totemnet_log_printf ( \
level, \
instance->totemnet_subsys_id, \
__FUNCTION__, __FILE__, __LINE__, \
(const char *)format, ##args); \
} while (0);
static void totemnet_instance_initialize (
struct totemnet_instance *instance,
struct totem_config *config)
{
int transport;
instance->totemnet_log_printf = config->totem_logging_configuration.log_printf;
instance->totemnet_subsys_id = config->totem_logging_configuration.log_subsys_id;
transport = config->transport_number;
log_printf (LOGSYS_LEVEL_NOTICE,
"Initializing transport (%s).", transport_entries[transport].name);
instance->transport = &transport_entries[transport];
}
int totemnet_crypto_set (
void *net_context,
const char *cipher_type,
const char *hash_type)
{
struct totemnet_instance *instance = (struct totemnet_instance *)net_context;
int res = 0;
res = instance->transport->crypto_set (instance->transport_context,
cipher_type, hash_type);
return res;
}
int totemnet_finalize (
void *net_context)
{
struct totemnet_instance *instance = (struct totemnet_instance *)net_context;
int res = 0;
res = instance->transport->finalize (instance->transport_context);
return (res);
}
int totemnet_initialize (
qb_loop_t *loop_pt,
void **net_context,
struct totem_config *totem_config,
totemsrp_stats_t *stats,
void *context,
void (*deliver_fn) (
void *context,
const void *msg,
unsigned int msg_len,
const struct sockaddr_storage *system_from),
void (*iface_change_fn) (
void *context,
const struct totem_ip_address *iface_address,
unsigned int ring_no),
void (*mtu_changed) (
void *context,
int net_mtu),
void (*target_set_completed) (
void *context))
{
struct totemnet_instance *instance;
unsigned int res;
instance = malloc (sizeof (struct totemnet_instance));
if (instance == NULL) {
return (-1);
}
totemnet_instance_initialize (instance, totem_config);
res = instance->transport->initialize (loop_pt,
&instance->transport_context, totem_config, stats,
context, deliver_fn, iface_change_fn, mtu_changed, target_set_completed);
if (res == -1) {
goto error_destroy;
}
*net_context = instance;
return (0);
error_destroy:
free (instance);
return (-1);
}
void *totemnet_buffer_alloc (void *net_context)
{
struct totemnet_instance *instance = net_context;
assert (instance != NULL);
assert (instance->transport != NULL);
return instance->transport->buffer_alloc();
}
void totemnet_buffer_release (void *net_context, void *ptr)
{
struct totemnet_instance *instance = net_context;
assert (instance != NULL);
assert (instance->transport != NULL);
instance->transport->buffer_release (ptr);
}
int totemnet_processor_count_set (
void *net_context,
int processor_count)
{
struct totemnet_instance *instance = (struct totemnet_instance *)net_context;
int res = 0;
res = instance->transport->processor_count_set (instance->transport_context, processor_count);
return (res);
}
int totemnet_recv_flush (void *net_context)
{
struct totemnet_instance *instance = (struct totemnet_instance *)net_context;
int res = 0;
res = instance->transport->recv_flush (instance->transport_context);
return (res);
}
int totemnet_send_flush (void *net_context)
{
struct totemnet_instance *instance = (struct totemnet_instance *)net_context;
int res = 0;
res = instance->transport->send_flush (instance->transport_context);
return (res);
}
int totemnet_token_send (
void *net_context,
const void *msg,
unsigned int msg_len)
{
struct totemnet_instance *instance = (struct totemnet_instance *)net_context;
int res = 0;
res = instance->transport->token_send (instance->transport_context, msg, msg_len);
return (res);
}
int totemnet_mcast_flush_send (
void *net_context,
const void *msg,
unsigned int msg_len)
{
struct totemnet_instance *instance = (struct totemnet_instance *)net_context;
int res = 0;
res = instance->transport->mcast_flush_send (instance->transport_context, msg, msg_len);
return (res);
}
int totemnet_mcast_noflush_send (
void *net_context,
const void *msg,
unsigned int msg_len)
{
struct totemnet_instance *instance = (struct totemnet_instance *)net_context;
int res = 0;
res = instance->transport->mcast_noflush_send (instance->transport_context, msg, msg_len);
return (res);
}
extern int totemnet_iface_check (void *net_context)
{
struct totemnet_instance *instance = (struct totemnet_instance *)net_context;
int res = 0;
res = instance->transport->iface_check (instance->transport_context);
return (res);
}
extern int totemnet_net_mtu_adjust (void *net_context, struct totem_config *totem_config)
{
struct totemnet_instance *instance = (struct totemnet_instance *)net_context;
int res = 0;
instance->transport->net_mtu_adjust (instance->transport_context, totem_config);
return (res);
}
int totemnet_iface_set (void *net_context,
const struct totem_ip_address *interface_addr,
unsigned short ip_port,
unsigned int iface_no)
{
struct totemnet_instance *instance = (struct totemnet_instance *)net_context;
int res;
res = instance->transport->iface_set (instance->transport_context, interface_addr, ip_port, iface_no);
return (res);
}
+extern int totemnet_nodestatus_get (
+ void *net_context,
+ unsigned int nodeid,
+ struct totem_node_status *node_status)
+{
+ struct totemnet_instance *instance = (struct totemnet_instance *)net_context;
+ unsigned int res;
+
+ res = instance->transport->nodestatus_get (instance->transport_context, nodeid, node_status);
+
+ return (res);
+}
+
int totemnet_ifaces_get (
void *net_context,
char ***status,
unsigned int *iface_count)
{
struct totemnet_instance *instance = (struct totemnet_instance *)net_context;
unsigned int res;
res = instance->transport->ifaces_get (instance->transport_context, status, iface_count);
return (res);
}
int totemnet_token_target_set (
void *net_context,
unsigned int nodeid)
{
struct totemnet_instance *instance = (struct totemnet_instance *)net_context;
unsigned int res;
res = instance->transport->token_target_set (instance->transport_context, nodeid);
return (res);
}
extern int totemnet_recv_mcast_empty (
void *net_context)
{
struct totemnet_instance *instance = (struct totemnet_instance *)net_context;
unsigned int res;
res = instance->transport->recv_mcast_empty (instance->transport_context);
return (res);
}
extern int totemnet_member_add (
void *net_context,
const struct totem_ip_address *local,
const struct totem_ip_address *member,
int ring_no)
{
struct totemnet_instance *instance = (struct totemnet_instance *)net_context;
unsigned int res = 0;
if (instance->transport->member_add) {
res = instance->transport->member_add (
instance->transport_context,
local,
member,
ring_no);
}
return (res);
}
extern int totemnet_member_remove (
void *net_context,
const struct totem_ip_address *member,
int ring_no)
{
struct totemnet_instance *instance = (struct totemnet_instance *)net_context;
unsigned int res = 0;
if (instance->transport->member_remove) {
res = instance->transport->member_remove (
instance->transport_context,
member,
ring_no);
}
return (res);
}
int totemnet_member_set_active (
void *net_context,
const struct totem_ip_address *member,
int active)
{
struct totemnet_instance *instance = (struct totemnet_instance *)net_context;
unsigned int res = 0;
if (instance->transport->member_set_active) {
res = instance->transport->member_set_active (
instance->transport_context,
member,
active);
}
return (res);
}
int totemnet_reconfigure (
void *net_context,
struct totem_config *totem_config)
{
struct totemnet_instance *instance = (struct totemnet_instance *)net_context;
unsigned int res = 0;
res = instance->transport->reconfigure (
instance->transport_context,
totem_config);
return (res);
}
int totemnet_crypto_reconfigure_phase (
void *net_context,
struct totem_config *totem_config,
cfg_message_crypto_reconfig_phase_t phase)
{
struct totemnet_instance *instance = (struct totemnet_instance *)net_context;
unsigned int res = 0;
if (instance->transport->crypto_reconfigure_phase) {
res = instance->transport->crypto_reconfigure_phase (
instance->transport_context,
totem_config, phase);
}
return (res);
}
void totemnet_stats_clear (
void *net_context)
{
struct totemnet_instance *instance = (struct totemnet_instance *)net_context;
if (instance->transport->stats_clear) {
instance->transport->stats_clear (
instance->transport_context);
}
}
diff --git a/exec/totemnet.h b/exec/totemnet.h
index 46c1dd8d..c6a99235 100644
--- a/exec/totemnet.h
+++ b/exec/totemnet.h
@@ -1,161 +1,166 @@
/*
* Copyright (c) 2005 MontaVista Software, Inc.
* Copyright (c) 2006-2007, 2009 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Steven Dake (sdake@redhat.com)
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
/**
* @file
* Totem Network interface - also does encryption/decryption
*
* depends on poll abstraction, POSIX, IPV4
*/
#ifndef TOTEMNET_H_DEFINED
#define TOTEMNET_H_DEFINED
#include <sys/types.h>
#include <sys/socket.h>
#include <corosync/totem/totem.h>
#define TOTEMNET_NOFLUSH 0
#define TOTEMNET_FLUSH 1
/**
* Create an instance
*/
extern int totemnet_initialize (
qb_loop_t *poll_handle,
void **net_context,
struct totem_config *totem_config,
totemsrp_stats_t *stats,
void *context,
void (*deliver_fn) (
void *context,
const void *msg,
unsigned int msg_len,
const struct sockaddr_storage *system_from),
void (*iface_change_fn) (
void *context,
const struct totem_ip_address *iface_address,
unsigned int iface_no),
void (*mtu_changed) (
void *context,
int net_mtu),
void (*target_set_completed) (
void *context));
extern void *totemnet_buffer_alloc (void *net_context);
extern void totemnet_buffer_release (void *net_context, void *ptr);
extern int totemnet_processor_count_set (
void *net_context,
int processor_count);
extern int totemnet_token_send (
void *net_context,
const void *msg,
unsigned int msg_len);
extern int totemnet_mcast_flush_send (
void *net_context,
const void *msg,
unsigned int msg_len);
extern int totemnet_mcast_noflush_send (
void *net_context,
const void *msg,
unsigned int msg_len);
extern int totemnet_recv_flush (void *net_context);
extern int totemnet_send_flush (void *net_context);
extern int totemnet_iface_set (void *net_context,
const struct totem_ip_address *interface_addr,
unsigned short ip_port,
unsigned int iface_no);
extern int totemnet_iface_check (void *net_context);
extern int totemnet_finalize (void *net_context);
extern int totemnet_net_mtu_adjust (void *net_context, struct totem_config *totem_config);
extern int totemnet_reconfigure (void *net_context, struct totem_config *totem_config);
extern int totemnet_crypto_reconfigure_phase (void *net_context, struct totem_config *totem_config, cfg_message_crypto_reconfig_phase_t phase);
extern void totemnet_stats_clear (void *net_context);
extern const char *totemnet_iface_print (void *net_context);
+extern int totemnet_nodestatus_get (
+ void *net_context,
+ unsigned int nodeid,
+ struct totem_node_status *node_status);
+
extern int totemnet_ifaces_get (
void *net_context,
char ***status,
unsigned int *iface_count);
extern int totemnet_token_target_set (
void *net_context,
unsigned int target_nodeid);
extern int totemnet_crypto_set (
void *net_context,
const char *cipher_type,
const char *hash_type);
extern int totemnet_recv_mcast_empty (
void *net_context);
extern int totemnet_member_add (
void *net_context,
const struct totem_ip_address *local,
const struct totem_ip_address *member,
int ring_no);
extern int totemnet_member_remove (
void *net_context,
const struct totem_ip_address *member,
int ring_no);
extern int totemnet_member_set_active (
void *net_context,
const struct totem_ip_address *member,
int active);
#endif /* TOTEMNET_H_DEFINED */
diff --git a/exec/totempg.c b/exec/totempg.c
index 7b1f755e..a2484323 100644
--- a/exec/totempg.c
+++ b/exec/totempg.c
@@ -1,1613 +1,1620 @@
/*
* Copyright (c) 2003-2005 MontaVista Software, Inc.
* Copyright (c) 2005 OSDL.
* Copyright (c) 2006-2012 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Steven Dake (sdake@redhat.com)
* Author: Mark Haverkamp (markh@osdl.org)
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
* FRAGMENTATION AND PACKING ALGORITHM:
*
* Assemble the entire message into one buffer
* if full fragment
* store fragment into lengths list
* for each full fragment
* multicast fragment
* set length and fragment fields of pg mesage
* store remaining multicast into head of fragmentation data and set lens field
*
* If a message exceeds the maximum packet size allowed by the totem
* single ring protocol, the protocol could lose forward progress.
* Statically calculating the allowed data amount doesn't work because
* the amount of data allowed depends on the number of fragments in
* each message. In this implementation, the maximum fragment size
* is dynamically calculated for each fragment added to the message.
* It is possible for a message to be two bytes short of the maximum
* packet size. This occurs when a message or collection of
* messages + the mcast header + the lens are two bytes short of the
* end of the packet. Since another len field consumes two bytes, the
* len field would consume the rest of the packet without room for data.
*
* One optimization would be to forgo the final len field and determine
* it from the size of the udp datagram. Then this condition would no
* longer occur.
*/
/*
* ASSEMBLY AND UNPACKING ALGORITHM:
*
* copy incoming packet into assembly data buffer indexed by current
* location of end of fragment
*
* if not fragmented
* deliver all messages in assembly data buffer
* else
* if msg_count > 1 and fragmented
* deliver all messages except last message in assembly data buffer
* copy last fragmented section to start of assembly data buffer
* else
* if msg_count = 1 and fragmented
* do nothing
*
*/
#include <config.h>
#ifdef HAVE_ALLOCA_H
#include <alloca.h>
#endif
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <sys/uio.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <pthread.h>
#include <errno.h>
#include <limits.h>
#include <corosync/swab.h>
#include <qb/qblist.h>
#include <qb/qbloop.h>
#include <qb/qbipcs.h>
#include <corosync/totem/totempg.h>
#define LOGSYS_UTILS_ONLY 1
#include <corosync/logsys.h>
#include "util.h"
#include "totemsrp.h"
struct totempg_mcast_header {
short version;
short type;
};
#if !(defined(__i386__) || defined(__x86_64__))
/*
* Need align on architectures different then i386 or x86_64
*/
#define TOTEMPG_NEED_ALIGN 1
#endif
/*
* totempg_mcast structure
*
* header: Identify the mcast.
* fragmented: Set if this message continues into next message
* continuation: Set if this message is a continuation from last message
* msg_count Indicates how many packed messages are contained
* in the mcast.
* Also, the size of each packed message and the messages themselves are
* appended to the end of this structure when sent.
*/
struct totempg_mcast {
struct totempg_mcast_header header;
unsigned char fragmented;
unsigned char continuation;
unsigned short msg_count;
/*
* short msg_len[msg_count];
*/
/*
* data for messages
*/
};
/*
* Maximum packet size for totem pg messages
*/
#define TOTEMPG_PACKET_SIZE (totempg_totem_config->net_mtu - \
sizeof (struct totempg_mcast))
/*
* Local variables used for packing small messages
*/
static unsigned short mcast_packed_msg_lens[FRAME_SIZE_MAX];
static int mcast_packed_msg_count = 0;
static int totempg_reserved = 1;
static unsigned int totempg_size_limit;
static totem_queue_level_changed_fn totem_queue_level_changed = NULL;
static uint32_t totempg_threaded_mode = 0;
static void *totemsrp_context;
/*
* Function and data used to log messages
*/
static int totempg_log_level_security;
static int totempg_log_level_error;
static int totempg_log_level_warning;
static int totempg_log_level_notice;
static int totempg_log_level_debug;
static int totempg_subsys_id;
static void (*totempg_log_printf) (
int level,
int subsys,
const char *function,
const char *file,
int line,
const char *format, ...) __attribute__((format(printf, 6, 7)));
struct totem_config *totempg_totem_config;
static totempg_stats_t totempg_stats;
enum throw_away_mode {
THROW_AWAY_INACTIVE,
THROW_AWAY_ACTIVE
};
struct assembly {
unsigned int nodeid;
unsigned char data[MESSAGE_SIZE_MAX+KNET_MAX_PACKET_SIZE];
int index;
unsigned char last_frag_num;
enum throw_away_mode throw_away_mode;
struct qb_list_head list;
};
static void assembly_deref (struct assembly *assembly);
static int callback_token_received_fn (enum totem_callback_token_type type,
const void *data);
QB_LIST_DECLARE(assembly_list_inuse);
/*
* Free list is used both for transitional and operational assemblies
*/
QB_LIST_DECLARE(assembly_list_free);
QB_LIST_DECLARE(assembly_list_inuse_trans);
QB_LIST_DECLARE(totempg_groups_list);
/*
* Staging buffer for packed messages. Messages are staged in this buffer
* before sending. Multiple messages may fit which cuts down on the
* number of mcasts sent. If a message doesn't completely fit, then
* the mcast header has a fragment bit set that says that there are more
* data to follow. fragment_size is an index into the buffer. It indicates
* the size of message data and where to place new message data.
* fragment_contuation indicates whether the first packed message in
* the buffer is a continuation of a previously packed fragment.
*/
static unsigned char *fragmentation_data;
static int fragment_size = 0;
static int fragment_continuation = 0;
static int totempg_waiting_transack = 0;
struct totempg_group_instance {
void (*deliver_fn) (
unsigned int nodeid,
const void *msg,
unsigned int msg_len,
int endian_conversion_required);
void (*confchg_fn) (
enum totem_configuration_type configuration_type,
const unsigned int *member_list, size_t member_list_entries,
const unsigned int *left_list, size_t left_list_entries,
const unsigned int *joined_list, size_t joined_list_entries,
const struct memb_ring_id *ring_id);
struct totempg_group *groups;
int groups_cnt;
int32_t q_level;
struct qb_list_head list;
};
static unsigned char next_fragment = 1;
static pthread_mutex_t totempg_mutex = PTHREAD_MUTEX_INITIALIZER;
static pthread_mutex_t callback_token_mutex = PTHREAD_MUTEX_INITIALIZER;
static pthread_mutex_t mcast_msg_mutex = PTHREAD_MUTEX_INITIALIZER;
#define log_printf(level, format, args...) \
do { \
totempg_log_printf(level, \
totempg_subsys_id, \
__FUNCTION__, __FILE__, __LINE__, \
format, ##args); \
} while (0);
static int msg_count_send_ok (int msg_count);
static int byte_count_send_ok (int byte_count);
static void totempg_waiting_trans_ack_cb (int waiting_trans_ack)
{
log_printf(LOG_DEBUG, "waiting_trans_ack changed to %u", waiting_trans_ack);
totempg_waiting_transack = waiting_trans_ack;
}
static struct assembly *assembly_ref (unsigned int nodeid)
{
struct assembly *assembly;
struct qb_list_head *list;
struct qb_list_head *active_assembly_list_inuse;
if (totempg_waiting_transack) {
active_assembly_list_inuse = &assembly_list_inuse_trans;
} else {
active_assembly_list_inuse = &assembly_list_inuse;
}
/*
* Search inuse list for node id and return assembly buffer if found
*/
qb_list_for_each(list, active_assembly_list_inuse) {
assembly = qb_list_entry (list, struct assembly, list);
if (nodeid == assembly->nodeid) {
return (assembly);
}
}
/*
* Nothing found in inuse list get one from free list if available
*/
if (qb_list_empty (&assembly_list_free) == 0) {
assembly = qb_list_first_entry (&assembly_list_free, struct assembly, list);
qb_list_del (&assembly->list);
qb_list_add (&assembly->list, active_assembly_list_inuse);
assembly->nodeid = nodeid;
assembly->index = 0;
assembly->last_frag_num = 0;
assembly->throw_away_mode = THROW_AWAY_INACTIVE;
return (assembly);
}
/*
* Nothing available in inuse or free list, so allocate a new one
*/
assembly = malloc (sizeof (struct assembly));
/*
* TODO handle memory allocation failure here
*/
assert (assembly);
assembly->nodeid = nodeid;
assembly->data[0] = 0;
assembly->index = 0;
assembly->last_frag_num = 0;
assembly->throw_away_mode = THROW_AWAY_INACTIVE;
qb_list_init (&assembly->list);
qb_list_add (&assembly->list, active_assembly_list_inuse);
return (assembly);
}
static void assembly_deref (struct assembly *assembly)
{
qb_list_del (&assembly->list);
qb_list_add (&assembly->list, &assembly_list_free);
}
static void assembly_deref_from_normal_and_trans (int nodeid)
{
int j;
struct qb_list_head *list, *tmp_iter;
struct qb_list_head *active_assembly_list_inuse;
struct assembly *assembly;
for (j = 0; j < 2; j++) {
if (j == 0) {
active_assembly_list_inuse = &assembly_list_inuse;
} else {
active_assembly_list_inuse = &assembly_list_inuse_trans;
}
qb_list_for_each_safe(list, tmp_iter, active_assembly_list_inuse) {
assembly = qb_list_entry (list, struct assembly, list);
if (nodeid == assembly->nodeid) {
qb_list_del (&assembly->list);
qb_list_add (&assembly->list, &assembly_list_free);
}
}
}
}
static inline void app_confchg_fn (
enum totem_configuration_type configuration_type,
const unsigned int *member_list, size_t member_list_entries,
const unsigned int *left_list, size_t left_list_entries,
const unsigned int *joined_list, size_t joined_list_entries,
const struct memb_ring_id *ring_id)
{
int i;
struct totempg_group_instance *instance;
struct qb_list_head *list;
/*
* For every leaving processor, add to free list
* This also has the side effect of clearing out the dataset
* In the leaving processor's assembly buffer.
*/
for (i = 0; i < left_list_entries; i++) {
assembly_deref_from_normal_and_trans (left_list[i]);
}
qb_list_for_each(list, &totempg_groups_list) {
instance = qb_list_entry (list, struct totempg_group_instance, list);
if (instance->confchg_fn) {
instance->confchg_fn (
configuration_type,
member_list,
member_list_entries,
left_list,
left_list_entries,
joined_list,
joined_list_entries,
ring_id);
}
}
}
static inline void group_endian_convert (
void *msg,
int msg_len)
{
unsigned short *group_len;
int i;
char *aligned_msg;
#ifdef TOTEMPG_NEED_ALIGN
/*
* Align data structure for not i386 or x86_64
*/
if ((size_t)msg % 4 != 0) {
aligned_msg = alloca(msg_len);
memcpy(aligned_msg, msg, msg_len);
} else {
aligned_msg = msg;
}
#else
aligned_msg = msg;
#endif
group_len = (unsigned short *)aligned_msg;
group_len[0] = swab16(group_len[0]);
for (i = 1; i < group_len[0] + 1; i++) {
group_len[i] = swab16(group_len[i]);
}
if (aligned_msg != msg) {
memcpy(msg, aligned_msg, msg_len);
}
}
static inline int group_matches (
struct iovec *iovec,
unsigned int iov_len,
struct totempg_group *groups_b,
unsigned int group_b_cnt,
unsigned int *adjust_iovec)
{
unsigned short *group_len;
char *group_name;
int i;
int j;
#ifdef TOTEMPG_NEED_ALIGN
struct iovec iovec_aligned = { NULL, 0 };
#endif
assert (iov_len == 1);
#ifdef TOTEMPG_NEED_ALIGN
/*
* Align data structure for not i386 or x86_64
*/
if ((size_t)iovec->iov_base % 4 != 0) {
iovec_aligned.iov_base = alloca(iovec->iov_len);
memcpy(iovec_aligned.iov_base, iovec->iov_base, iovec->iov_len);
iovec_aligned.iov_len = iovec->iov_len;
iovec = &iovec_aligned;
}
#endif
group_len = (unsigned short *)iovec->iov_base;
group_name = ((char *)iovec->iov_base) +
sizeof (unsigned short) * (group_len[0] + 1);
/*
* Calculate amount to adjust the iovec by before delivering to app
*/
*adjust_iovec = sizeof (unsigned short) * (group_len[0] + 1);
for (i = 1; i < group_len[0] + 1; i++) {
*adjust_iovec += group_len[i];
}
/*
* Determine if this message should be delivered to this instance
*/
for (i = 1; i < group_len[0] + 1; i++) {
for (j = 0; j < group_b_cnt; j++) {
if ((group_len[i] == groups_b[j].group_len) &&
(memcmp (groups_b[j].group, group_name, group_len[i]) == 0)) {
return (1);
}
}
group_name += group_len[i];
}
return (0);
}
static inline void app_deliver_fn (
unsigned int nodeid,
void *msg,
unsigned int msg_len,
int endian_conversion_required)
{
struct totempg_group_instance *instance;
struct iovec stripped_iovec;
unsigned int adjust_iovec;
struct iovec *iovec;
struct qb_list_head *list;
struct iovec aligned_iovec = { NULL, 0 };
if (endian_conversion_required) {
group_endian_convert (msg, msg_len);
}
/*
* TODO: segmentation/assembly need to be redesigned to provide aligned access
* in all cases to avoid memory copies on non386 archs. Probably broke backwars
* compatibility
*/
#ifdef TOTEMPG_NEED_ALIGN
/*
* Align data structure for not i386 or x86_64
*/
aligned_iovec.iov_base = alloca(msg_len);
aligned_iovec.iov_len = msg_len;
memcpy(aligned_iovec.iov_base, msg, msg_len);
#else
aligned_iovec.iov_base = msg;
aligned_iovec.iov_len = msg_len;
#endif
iovec = &aligned_iovec;
qb_list_for_each(list, &totempg_groups_list) {
instance = qb_list_entry (list, struct totempg_group_instance, list);
if (group_matches (iovec, 1, instance->groups, instance->groups_cnt, &adjust_iovec)) {
stripped_iovec.iov_len = iovec->iov_len - adjust_iovec;
stripped_iovec.iov_base = (char *)iovec->iov_base + adjust_iovec;
#ifdef TOTEMPG_NEED_ALIGN
/*
* Align data structure for not i386 or x86_64
*/
if ((char *)iovec->iov_base + adjust_iovec % 4 != 0) {
/*
* Deal with misalignment
*/
stripped_iovec.iov_base =
alloca (stripped_iovec.iov_len);
memcpy (stripped_iovec.iov_base,
(char *)iovec->iov_base + adjust_iovec,
stripped_iovec.iov_len);
}
#endif
instance->deliver_fn (
nodeid,
stripped_iovec.iov_base,
stripped_iovec.iov_len,
endian_conversion_required);
}
}
}
static void totempg_confchg_fn (
enum totem_configuration_type configuration_type,
const unsigned int *member_list, size_t member_list_entries,
const unsigned int *left_list, size_t left_list_entries,
const unsigned int *joined_list, size_t joined_list_entries,
const struct memb_ring_id *ring_id)
{
// TODO optimize this
app_confchg_fn (configuration_type,
member_list, member_list_entries,
left_list, left_list_entries,
joined_list, joined_list_entries,
ring_id);
}
static void totempg_deliver_fn (
unsigned int nodeid,
const void *msg,
unsigned int msg_len,
int endian_conversion_required)
{
struct totempg_mcast *mcast;
unsigned short *msg_lens;
int i;
struct assembly *assembly;
char header[FRAME_SIZE_MAX];
int msg_count;
int continuation;
int start;
const char *data;
int datasize;
struct iovec iov_delv;
size_t expected_msg_len;
assembly = assembly_ref (nodeid);
assert (assembly);
if (msg_len < sizeof(struct totempg_mcast)) {
log_printf(LOG_WARNING,
"Message (totempg_mcast) received from node " CS_PRI_NODE_ID " is too short... Ignoring.", nodeid);
return ;
}
/*
* Assemble the header into one block of data and
* assemble the packet contents into one block of data to simplify delivery
*/
mcast = (struct totempg_mcast *)msg;
if (endian_conversion_required) {
mcast->msg_count = swab16 (mcast->msg_count);
}
msg_count = mcast->msg_count;
datasize = sizeof (struct totempg_mcast) +
msg_count * sizeof (unsigned short);
if (msg_len < datasize) {
log_printf(LOG_WARNING,
"Message (totempg_mcast datasize) received from node " CS_PRI_NODE_ID
" is too short... Ignoring.", nodeid);
return ;
}
memcpy (header, msg, datasize);
data = msg;
msg_lens = (unsigned short *) (header + sizeof (struct totempg_mcast));
expected_msg_len = datasize;
for (i = 0; i < mcast->msg_count; i++) {
if (endian_conversion_required) {
msg_lens[i] = swab16 (msg_lens[i]);
}
expected_msg_len += msg_lens[i];
}
if (msg_len != expected_msg_len) {
log_printf(LOG_WARNING,
"Message (totempg_mcast) received from node " CS_PRI_NODE_ID
" doesn't have expected length of %zu (has %u) bytes... Ignoring.",
nodeid, expected_msg_len, msg_len);
return ;
}
assert((assembly->index+msg_len) < sizeof(assembly->data));
memcpy (&assembly->data[assembly->index], &data[datasize],
msg_len - datasize);
/*
* If the last message in the buffer is a fragment, then we
* can't deliver it. We'll first deliver the full messages
* then adjust the assembly buffer so we can add the rest of the
* fragment when it arrives.
*/
msg_count = mcast->fragmented ? mcast->msg_count - 1 : mcast->msg_count;
continuation = mcast->continuation;
iov_delv.iov_base = (void *)&assembly->data[0];
iov_delv.iov_len = assembly->index + msg_lens[0];
/*
* Make sure that if this message is a continuation, that it
* matches the sequence number of the previous fragment.
* Also, if the first packed message is a continuation
* of a previous message, but the assembly buffer
* is empty, then we need to discard it since we can't
* assemble a complete message. Likewise, if this message isn't a
* continuation and the assembly buffer is empty, we have to discard
* the continued message.
*/
start = 0;
if (assembly->throw_away_mode == THROW_AWAY_ACTIVE) {
/* Throw away the first msg block */
if (mcast->fragmented == 0 || mcast->fragmented == 1) {
assembly->throw_away_mode = THROW_AWAY_INACTIVE;
assembly->index += msg_lens[0];
iov_delv.iov_base = (void *)&assembly->data[assembly->index];
iov_delv.iov_len = msg_lens[1];
start = 1;
}
} else
if (assembly->throw_away_mode == THROW_AWAY_INACTIVE) {
if (continuation == assembly->last_frag_num) {
assembly->last_frag_num = mcast->fragmented;
for (i = start; i < msg_count; i++) {
app_deliver_fn(nodeid, iov_delv.iov_base, iov_delv.iov_len,
endian_conversion_required);
assembly->index += msg_lens[i];
iov_delv.iov_base = (void *)&assembly->data[assembly->index];
if (i < (msg_count - 1)) {
iov_delv.iov_len = msg_lens[i + 1];
}
}
} else {
log_printf (LOG_DEBUG, "fragmented continuation %u is not equal to assembly last_frag_num %u",
continuation, assembly->last_frag_num);
assembly->throw_away_mode = THROW_AWAY_ACTIVE;
}
}
if (mcast->fragmented == 0) {
/*
* End of messages, dereference assembly struct
*/
assembly->last_frag_num = 0;
assembly->index = 0;
assembly_deref (assembly);
} else {
/*
* Message is fragmented, keep around assembly list
*/
if (mcast->msg_count > 1) {
memmove (&assembly->data[0],
&assembly->data[assembly->index],
msg_lens[msg_count]);
assembly->index = 0;
}
assembly->index += msg_lens[msg_count];
}
}
/*
* Totem Process Group Abstraction
* depends on poll abstraction, POSIX, IPV4
*/
void *callback_token_received_handle;
int callback_token_received_fn (enum totem_callback_token_type type,
const void *data)
{
struct totempg_mcast mcast;
struct iovec iovecs[3];
if (totempg_threaded_mode == 1) {
pthread_mutex_lock (&mcast_msg_mutex);
}
if (mcast_packed_msg_count == 0) {
if (totempg_threaded_mode == 1) {
pthread_mutex_unlock (&mcast_msg_mutex);
}
return (0);
}
if (totemsrp_avail(totemsrp_context) == 0) {
if (totempg_threaded_mode == 1) {
pthread_mutex_unlock (&mcast_msg_mutex);
}
return (0);
}
mcast.header.version = 0;
mcast.header.type = 0;
mcast.fragmented = 0;
/*
* Was the first message in this buffer a continuation of a
* fragmented message?
*/
mcast.continuation = fragment_continuation;
fragment_continuation = 0;
mcast.msg_count = mcast_packed_msg_count;
iovecs[0].iov_base = (void *)&mcast;
iovecs[0].iov_len = sizeof (struct totempg_mcast);
iovecs[1].iov_base = (void *)mcast_packed_msg_lens;
iovecs[1].iov_len = mcast_packed_msg_count * sizeof (unsigned short);
iovecs[2].iov_base = (void *)&fragmentation_data[0];
iovecs[2].iov_len = fragment_size;
(void)totemsrp_mcast (totemsrp_context, iovecs, 3, 0);
mcast_packed_msg_count = 0;
fragment_size = 0;
if (totempg_threaded_mode == 1) {
pthread_mutex_unlock (&mcast_msg_mutex);
}
return (0);
}
/*
* Initialize the totem process group abstraction
*/
int totempg_initialize (
qb_loop_t *poll_handle,
struct totem_config *totem_config)
{
int res;
totempg_totem_config = totem_config;
totempg_log_level_security = totem_config->totem_logging_configuration.log_level_security;
totempg_log_level_error = totem_config->totem_logging_configuration.log_level_error;
totempg_log_level_warning = totem_config->totem_logging_configuration.log_level_warning;
totempg_log_level_notice = totem_config->totem_logging_configuration.log_level_notice;
totempg_log_level_debug = totem_config->totem_logging_configuration.log_level_debug;
totempg_log_printf = totem_config->totem_logging_configuration.log_printf;
totempg_subsys_id = totem_config->totem_logging_configuration.log_subsys_id;
fragmentation_data = malloc (TOTEMPG_PACKET_SIZE);
if (fragmentation_data == 0) {
return (-1);
}
totemsrp_net_mtu_adjust (totem_config);
res = totemsrp_initialize (
poll_handle,
&totemsrp_context,
totem_config,
&totempg_stats,
totempg_deliver_fn,
totempg_confchg_fn,
totempg_waiting_trans_ack_cb);
if (res == -1) {
goto error_exit;
}
totemsrp_callback_token_create (
totemsrp_context,
&callback_token_received_handle,
TOTEM_CALLBACK_TOKEN_RECEIVED,
0,
callback_token_received_fn,
0);
totempg_size_limit = (totemsrp_avail(totemsrp_context) - 1) *
(totempg_totem_config->net_mtu -
sizeof (struct totempg_mcast) - 16);
qb_list_init (&totempg_groups_list);
error_exit:
return (res);
}
void totempg_finalize (void)
{
if (totempg_threaded_mode == 1) {
pthread_mutex_lock (&totempg_mutex);
}
totemsrp_finalize (totemsrp_context);
if (totempg_threaded_mode == 1) {
pthread_mutex_unlock (&totempg_mutex);
}
}
/*
* Multicast a message
*/
static int mcast_msg (
struct iovec *iovec_in,
unsigned int iov_len,
int guarantee)
{
int res = 0;
struct totempg_mcast mcast;
struct iovec iovecs[3];
struct iovec iovec[64];
int i;
int dest, src;
int max_packet_size = 0;
int copy_len = 0;
int copy_base = 0;
int total_size = 0;
if (totempg_threaded_mode == 1) {
pthread_mutex_lock (&mcast_msg_mutex);
}
totemsrp_event_signal (totemsrp_context, TOTEM_EVENT_NEW_MSG, 1);
/*
* Remove zero length iovectors from the list
*/
assert (iov_len < 64);
for (dest = 0, src = 0; src < iov_len; src++) {
if (iovec_in[src].iov_len) {
memcpy (&iovec[dest++], &iovec_in[src],
sizeof (struct iovec));
}
}
iov_len = dest;
max_packet_size = TOTEMPG_PACKET_SIZE -
(sizeof (unsigned short) * (mcast_packed_msg_count + 1));
mcast_packed_msg_lens[mcast_packed_msg_count] = 0;
/*
* Check if we would overwrite new message queue
*/
for (i = 0; i < iov_len; i++) {
total_size += iovec[i].iov_len;
}
if (byte_count_send_ok (total_size + sizeof(unsigned short) *
(mcast_packed_msg_count)) == 0) {
if (totempg_threaded_mode == 1) {
pthread_mutex_unlock (&mcast_msg_mutex);
}
return(-1);
}
memset(&mcast, 0, sizeof(mcast));
mcast.header.version = 0;
for (i = 0; i < iov_len; ) {
mcast.fragmented = 0;
mcast.continuation = fragment_continuation;
copy_len = iovec[i].iov_len - copy_base;
/*
* If it all fits with room left over, copy it in.
* We need to leave at least sizeof(short) + 1 bytes in the
* fragment_buffer on exit so that max_packet_size + fragment_size
* doesn't exceed the size of the fragment_buffer on the next call.
*/
if ((iovec[i].iov_len + fragment_size) <
(max_packet_size - sizeof (unsigned short))) {
memcpy (&fragmentation_data[fragment_size],
(char *)iovec[i].iov_base + copy_base, copy_len);
fragment_size += copy_len;
mcast_packed_msg_lens[mcast_packed_msg_count] += copy_len;
next_fragment = 1;
copy_len = 0;
copy_base = 0;
i++;
continue;
/*
* If it just fits or is too big, then send out what fits.
*/
} else {
unsigned char *data_ptr;
copy_len = min(copy_len, max_packet_size - fragment_size);
if( copy_len == max_packet_size )
data_ptr = (unsigned char *)iovec[i].iov_base + copy_base;
else {
data_ptr = fragmentation_data;
}
memcpy (&fragmentation_data[fragment_size],
(unsigned char *)iovec[i].iov_base + copy_base, copy_len);
mcast_packed_msg_lens[mcast_packed_msg_count] += copy_len;
/*
* if we're not on the last iovec or the iovec is too large to
* fit, then indicate a fragment. This also means that the next
* message will have the continuation of this one.
*/
if ((i < (iov_len - 1)) ||
((copy_base + copy_len) < iovec[i].iov_len)) {
if (!next_fragment) {
next_fragment++;
}
fragment_continuation = next_fragment;
mcast.fragmented = next_fragment++;
assert(fragment_continuation != 0);
assert(mcast.fragmented != 0);
} else {
fragment_continuation = 0;
}
/*
* assemble the message and send it
*/
mcast.msg_count = ++mcast_packed_msg_count;
iovecs[0].iov_base = (void *)&mcast;
iovecs[0].iov_len = sizeof(struct totempg_mcast);
iovecs[1].iov_base = (void *)mcast_packed_msg_lens;
iovecs[1].iov_len = mcast_packed_msg_count *
sizeof(unsigned short);
iovecs[2].iov_base = (void *)data_ptr;
iovecs[2].iov_len = fragment_size + copy_len;
assert (totemsrp_avail(totemsrp_context) > 0);
res = totemsrp_mcast (totemsrp_context, iovecs, 3, guarantee);
if (res == -1) {
goto error_exit;
}
/*
* Recalculate counts and indexes for the next.
*/
mcast_packed_msg_lens[0] = 0;
mcast_packed_msg_count = 0;
fragment_size = 0;
max_packet_size = TOTEMPG_PACKET_SIZE - (sizeof(unsigned short));
/*
* If the iovec all fit, go to the next iovec
*/
if ((copy_base + copy_len) == iovec[i].iov_len) {
copy_len = 0;
copy_base = 0;
i++;
/*
* Continue with the rest of the current iovec.
*/
} else {
copy_base += copy_len;
}
}
}
/*
* Bump only if we added message data. This may be zero if
* the last buffer just fit into the fragmentation_data buffer
* and we were at the last iovec.
*/
if (mcast_packed_msg_lens[mcast_packed_msg_count]) {
mcast_packed_msg_count++;
}
error_exit:
if (totempg_threaded_mode == 1) {
pthread_mutex_unlock (&mcast_msg_mutex);
}
return (res);
}
/*
* Determine if a message of msg_size could be queued
*/
static int msg_count_send_ok (
int msg_count)
{
int avail = 0;
avail = totemsrp_avail (totemsrp_context);
totempg_stats.msg_queue_avail = avail;
return ((avail - totempg_reserved) > msg_count);
}
static int byte_count_send_ok (
int byte_count)
{
unsigned int msg_count = 0;
int avail = 0;
avail = totemsrp_avail (totemsrp_context);
msg_count = (byte_count / (totempg_totem_config->net_mtu - sizeof (struct totempg_mcast) - 16)) + 1;
return (avail >= msg_count);
}
static int send_reserve (
int msg_size)
{
unsigned int msg_count = 0;
msg_count = (msg_size / (totempg_totem_config->net_mtu - sizeof (struct totempg_mcast) - 16)) + 1;
totempg_reserved += msg_count;
totempg_stats.msg_reserved = totempg_reserved;
return (msg_count);
}
static void send_release (
int msg_count)
{
totempg_reserved -= msg_count;
totempg_stats.msg_reserved = totempg_reserved;
}
#ifndef HAVE_SMALL_MEMORY_FOOTPRINT
#undef MESSAGE_QUEUE_MAX
#define MESSAGE_QUEUE_MAX ((4 * MESSAGE_SIZE_MAX) / totempg_totem_config->net_mtu)
#endif /* HAVE_SMALL_MEMORY_FOOTPRINT */
static uint32_t q_level_precent_used(void)
{
return (100 - (((totemsrp_avail(totemsrp_context) - totempg_reserved) * 100) / MESSAGE_QUEUE_MAX));
}
int totempg_callback_token_create (
void **handle_out,
enum totem_callback_token_type type,
int delete,
int (*callback_fn) (enum totem_callback_token_type type, const void *),
const void *data)
{
unsigned int res;
if (totempg_threaded_mode == 1) {
pthread_mutex_lock (&callback_token_mutex);
}
res = totemsrp_callback_token_create (totemsrp_context, handle_out, type, delete,
callback_fn, data);
if (totempg_threaded_mode == 1) {
pthread_mutex_unlock (&callback_token_mutex);
}
return (res);
}
void totempg_callback_token_destroy (
void *handle_out)
{
if (totempg_threaded_mode == 1) {
pthread_mutex_lock (&callback_token_mutex);
}
totemsrp_callback_token_destroy (totemsrp_context, handle_out);
if (totempg_threaded_mode == 1) {
pthread_mutex_unlock (&callback_token_mutex);
}
}
/*
* vi: set autoindent tabstop=4 shiftwidth=4 :
*/
int totempg_groups_initialize (
void **totempg_groups_instance,
void (*deliver_fn) (
unsigned int nodeid,
const void *msg,
unsigned int msg_len,
int endian_conversion_required),
void (*confchg_fn) (
enum totem_configuration_type configuration_type,
const unsigned int *member_list, size_t member_list_entries,
const unsigned int *left_list, size_t left_list_entries,
const unsigned int *joined_list, size_t joined_list_entries,
const struct memb_ring_id *ring_id))
{
struct totempg_group_instance *instance;
if (totempg_threaded_mode == 1) {
pthread_mutex_lock (&totempg_mutex);
}
instance = malloc (sizeof (struct totempg_group_instance));
if (instance == NULL) {
goto error_exit;
}
instance->deliver_fn = deliver_fn;
instance->confchg_fn = confchg_fn;
instance->groups = 0;
instance->groups_cnt = 0;
instance->q_level = QB_LOOP_MED;
qb_list_init (&instance->list);
qb_list_add (&instance->list, &totempg_groups_list);
if (totempg_threaded_mode == 1) {
pthread_mutex_unlock (&totempg_mutex);
}
*totempg_groups_instance = instance;
return (0);
error_exit:
if (totempg_threaded_mode == 1) {
pthread_mutex_unlock (&totempg_mutex);
}
return (-1);
}
int totempg_groups_join (
void *totempg_groups_instance,
const struct totempg_group *groups,
size_t group_cnt)
{
struct totempg_group_instance *instance = (struct totempg_group_instance *)totempg_groups_instance;
struct totempg_group *new_groups;
int res = 0;
if (totempg_threaded_mode == 1) {
pthread_mutex_lock (&totempg_mutex);
}
new_groups = realloc (instance->groups,
sizeof (struct totempg_group) *
(instance->groups_cnt + group_cnt));
if (new_groups == 0) {
res = -1;
goto error_exit;
}
memcpy (&new_groups[instance->groups_cnt],
groups, group_cnt * sizeof (struct totempg_group));
instance->groups = new_groups;
instance->groups_cnt += group_cnt;
error_exit:
if (totempg_threaded_mode == 1) {
pthread_mutex_unlock (&totempg_mutex);
}
return (res);
}
int totempg_groups_leave (
void *totempg_groups_instance,
const struct totempg_group *groups,
size_t group_cnt)
{
if (totempg_threaded_mode == 1) {
pthread_mutex_lock (&totempg_mutex);
}
if (totempg_threaded_mode == 1) {
pthread_mutex_unlock (&totempg_mutex);
}
return (0);
}
#define MAX_IOVECS_FROM_APP 32
#define MAX_GROUPS_PER_MSG 32
int totempg_groups_mcast_joined (
void *totempg_groups_instance,
const struct iovec *iovec,
unsigned int iov_len,
int guarantee)
{
struct totempg_group_instance *instance = (struct totempg_group_instance *)totempg_groups_instance;
unsigned short group_len[MAX_GROUPS_PER_MSG + 1];
struct iovec iovec_mcast[MAX_GROUPS_PER_MSG + 1 + MAX_IOVECS_FROM_APP];
int i;
unsigned int res;
if (totempg_threaded_mode == 1) {
pthread_mutex_lock (&totempg_mutex);
}
/*
* Build group_len structure and the iovec_mcast structure
*/
group_len[0] = instance->groups_cnt;
for (i = 0; i < instance->groups_cnt; i++) {
group_len[i + 1] = instance->groups[i].group_len;
iovec_mcast[i + 1].iov_len = instance->groups[i].group_len;
iovec_mcast[i + 1].iov_base = (void *) instance->groups[i].group;
}
iovec_mcast[0].iov_len = (instance->groups_cnt + 1) * sizeof (unsigned short);
iovec_mcast[0].iov_base = group_len;
for (i = 0; i < iov_len; i++) {
iovec_mcast[i + instance->groups_cnt + 1].iov_len = iovec[i].iov_len;
iovec_mcast[i + instance->groups_cnt + 1].iov_base = iovec[i].iov_base;
}
res = mcast_msg (iovec_mcast, iov_len + instance->groups_cnt + 1, guarantee);
if (totempg_threaded_mode == 1) {
pthread_mutex_unlock (&totempg_mutex);
}
return (res);
}
static void check_q_level(
void *totempg_groups_instance)
{
struct totempg_group_instance *instance = (struct totempg_group_instance *)totempg_groups_instance;
int32_t old_level = instance->q_level;
int32_t percent_used = q_level_precent_used();
if (percent_used >= 75 && instance->q_level != TOTEM_Q_LEVEL_CRITICAL) {
instance->q_level = TOTEM_Q_LEVEL_CRITICAL;
} else if (percent_used < 30 && instance->q_level != TOTEM_Q_LEVEL_LOW) {
instance->q_level = TOTEM_Q_LEVEL_LOW;
} else if (percent_used > 40 && percent_used < 50 && instance->q_level != TOTEM_Q_LEVEL_GOOD) {
instance->q_level = TOTEM_Q_LEVEL_GOOD;
} else if (percent_used > 60 && percent_used < 70 && instance->q_level != TOTEM_Q_LEVEL_HIGH) {
instance->q_level = TOTEM_Q_LEVEL_HIGH;
}
if (totem_queue_level_changed && old_level != instance->q_level) {
totem_queue_level_changed(instance->q_level);
}
}
void totempg_check_q_level(
void *totempg_groups_instance)
{
struct totempg_group_instance *instance = (struct totempg_group_instance *)totempg_groups_instance;
check_q_level(instance);
}
int totempg_groups_joined_reserve (
void *totempg_groups_instance,
const struct iovec *iovec,
unsigned int iov_len)
{
struct totempg_group_instance *instance = (struct totempg_group_instance *)totempg_groups_instance;
unsigned int size = 0;
unsigned int i;
unsigned int reserved = 0;
if (totempg_threaded_mode == 1) {
pthread_mutex_lock (&totempg_mutex);
pthread_mutex_lock (&mcast_msg_mutex);
}
for (i = 0; i < instance->groups_cnt; i++) {
size += instance->groups[i].group_len;
}
for (i = 0; i < iov_len; i++) {
size += iovec[i].iov_len;
}
if (size >= totempg_size_limit) {
reserved = -1;
goto error_exit;
}
if (byte_count_send_ok (size)) {
reserved = send_reserve (size);
} else {
reserved = 0;
}
error_exit:
check_q_level(instance);
if (totempg_threaded_mode == 1) {
pthread_mutex_unlock (&mcast_msg_mutex);
pthread_mutex_unlock (&totempg_mutex);
}
return (reserved);
}
int totempg_groups_joined_release (int msg_count)
{
if (totempg_threaded_mode == 1) {
pthread_mutex_lock (&totempg_mutex);
pthread_mutex_lock (&mcast_msg_mutex);
}
send_release (msg_count);
if (totempg_threaded_mode == 1) {
pthread_mutex_unlock (&mcast_msg_mutex);
pthread_mutex_unlock (&totempg_mutex);
}
return 0;
}
int totempg_groups_mcast_groups (
void *totempg_groups_instance,
int guarantee,
const struct totempg_group *groups,
size_t groups_cnt,
const struct iovec *iovec,
unsigned int iov_len)
{
unsigned short group_len[MAX_GROUPS_PER_MSG + 1];
struct iovec iovec_mcast[MAX_GROUPS_PER_MSG + 1 + MAX_IOVECS_FROM_APP];
int i;
unsigned int res;
if (totempg_threaded_mode == 1) {
pthread_mutex_lock (&totempg_mutex);
}
/*
* Build group_len structure and the iovec_mcast structure
*/
group_len[0] = groups_cnt;
for (i = 0; i < groups_cnt; i++) {
group_len[i + 1] = groups[i].group_len;
iovec_mcast[i + 1].iov_len = groups[i].group_len;
iovec_mcast[i + 1].iov_base = (void *) groups[i].group;
}
iovec_mcast[0].iov_len = (groups_cnt + 1) * sizeof (unsigned short);
iovec_mcast[0].iov_base = group_len;
for (i = 0; i < iov_len; i++) {
iovec_mcast[i + groups_cnt + 1].iov_len = iovec[i].iov_len;
iovec_mcast[i + groups_cnt + 1].iov_base = iovec[i].iov_base;
}
res = mcast_msg (iovec_mcast, iov_len + groups_cnt + 1, guarantee);
if (totempg_threaded_mode == 1) {
pthread_mutex_unlock (&totempg_mutex);
}
return (res);
}
/*
* Returns -1 if error, 0 if can't send, 1 if can send the message
*/
int totempg_groups_send_ok_groups (
void *totempg_groups_instance,
const struct totempg_group *groups,
size_t groups_cnt,
const struct iovec *iovec,
unsigned int iov_len)
{
unsigned int size = 0;
unsigned int i;
unsigned int res;
if (totempg_threaded_mode == 1) {
pthread_mutex_lock (&totempg_mutex);
}
for (i = 0; i < groups_cnt; i++) {
size += groups[i].group_len;
}
for (i = 0; i < iov_len; i++) {
size += iovec[i].iov_len;
}
res = msg_count_send_ok (size);
if (totempg_threaded_mode == 1) {
pthread_mutex_unlock (&totempg_mutex);
}
return (res);
}
int totempg_iface_set (
struct totem_ip_address *interface_addr,
unsigned short ip_port,
unsigned int iface_no)
{
int res;
res = totemsrp_iface_set (
totemsrp_context,
interface_addr,
ip_port,
iface_no);
return (res);
}
+int totempg_nodestatus_get (unsigned int nodeid,
+ struct totem_node_status *node_status)
+{
+ memset(node_status, 0, sizeof(struct totem_node_status));
+ return totemsrp_nodestatus_get (totemsrp_context, nodeid, node_status);
+}
+
int totempg_ifaces_get (
unsigned int nodeid,
unsigned int *interface_id,
struct totem_ip_address *interfaces,
unsigned int interfaces_size,
char ***status,
unsigned int *iface_count)
{
int res;
res = totemsrp_ifaces_get (
totemsrp_context,
nodeid,
interface_id,
interfaces,
interfaces_size,
status,
iface_count);
return (res);
}
void totempg_event_signal (enum totem_event_type type, int value)
{
totemsrp_event_signal (totemsrp_context, type, value);
}
void* totempg_get_stats (void)
{
return &totempg_stats;
}
int totempg_crypto_set (
const char *cipher_type,
const char *hash_type)
{
int res;
res = totemsrp_crypto_set (totemsrp_context, cipher_type, hash_type);
return (res);
}
#define ONE_IFACE_LEN 63
const char *totempg_ifaces_print (unsigned int nodeid)
{
static char iface_string[256 * INTERFACE_MAX];
char one_iface[ONE_IFACE_LEN+1];
struct totem_ip_address interfaces[INTERFACE_MAX];
unsigned int iface_count;
unsigned int iface_ids[INTERFACE_MAX];
unsigned int i;
int res;
iface_string[0] = '\0';
res = totempg_ifaces_get (nodeid, iface_ids, interfaces, INTERFACE_MAX, NULL, &iface_count);
if (res == -1) {
return ("no interface found for nodeid");
}
res = totempg_ifaces_get (nodeid, iface_ids, interfaces, INTERFACE_MAX, NULL, &iface_count);
for (i = 0; i < iface_count; i++) {
if (!interfaces[i].family) {
continue;
}
snprintf (one_iface, ONE_IFACE_LEN,
"r(%d) ip(%s) ",
i, totemip_print (&interfaces[i]));
strcat (iface_string, one_iface);
}
return (iface_string);
}
unsigned int totempg_my_nodeid_get (void)
{
return (totemsrp_my_nodeid_get(totemsrp_context));
}
int totempg_my_family_get (void)
{
return (totemsrp_my_family_get(totemsrp_context));
}
extern void totempg_service_ready_register (
void (*totem_service_ready) (void))
{
totemsrp_service_ready_register (totemsrp_context, totem_service_ready);
}
void totempg_queue_level_register_callback (totem_queue_level_changed_fn fn)
{
totem_queue_level_changed = fn;
}
extern int totempg_member_add (
const struct totem_ip_address *member,
int ring_no)
{
return totemsrp_member_add (totemsrp_context, member, ring_no);
}
extern int totempg_member_remove (
const struct totem_ip_address *member,
int ring_no)
{
return totemsrp_member_remove (totemsrp_context, member, ring_no);
}
extern int totempg_reconfigure (void)
{
return totemsrp_reconfigure (totemsrp_context, totempg_totem_config);
}
extern int totempg_crypto_reconfigure_phase (cfg_message_crypto_reconfig_phase_t phase)
{
return totemsrp_crypto_reconfigure_phase (totemsrp_context, totempg_totem_config, phase);
}
extern void totempg_stats_clear (int flags)
{
if (flags & TOTEMPG_STATS_CLEAR_TOTEM) {
totempg_stats.msg_reserved = 0;
totempg_stats.msg_queue_avail = 0;
}
return totemsrp_stats_clear (totemsrp_context, flags);
}
void totempg_threaded_mode_enable (void)
{
totempg_threaded_mode = 1;
totemsrp_threaded_mode_enable (totemsrp_context);
}
void totempg_trans_ack (void)
{
totemsrp_trans_ack (totemsrp_context);
}
void totempg_force_gather (void)
{
totemsrp_force_gather(totemsrp_context);
}
/* Assumes ->orig_interfaces is already allocated */
void totempg_get_config(struct totem_config *config)
{
struct totem_interface *temp_if = config->orig_interfaces;
memcpy(config, totempg_totem_config, sizeof(struct totem_config));
config->orig_interfaces = temp_if;
memcpy(config->orig_interfaces, totempg_totem_config->interfaces, sizeof(struct totem_interface) * INTERFACE_MAX);
config->interfaces = NULL;
}
void totempg_put_config(struct totem_config *config)
{
struct totem_interface *temp_if = totempg_totem_config->interfaces;
/* Preseve the existing interfaces[] array as transports might have pointers saved */
memcpy(totempg_totem_config->interfaces, config->interfaces, sizeof(struct totem_interface) * INTERFACE_MAX);
memcpy(totempg_totem_config, config, sizeof(struct totem_config));
totempg_totem_config->interfaces = temp_if;
}
diff --git a/exec/totemsrp.c b/exec/totemsrp.c
index 0dadf521..949d367b 100644
--- a/exec/totemsrp.c
+++ b/exec/totemsrp.c
@@ -1,5214 +1,5235 @@
/*
* Copyright (c) 2003-2006 MontaVista Software, Inc.
* Copyright (c) 2006-2018 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Steven Dake (sdake@redhat.com)
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
* The first version of this code was based upon Yair Amir's PhD thesis:
* http://www.cs.jhu.edu/~yairamir/phd.ps) (ch4,5).
*
* The current version of totemsrp implements the Totem protocol specified in:
* http://citeseer.ist.psu.edu/amir95totem.html
*
* The deviations from the above published protocols are:
* - token hold mode where token doesn't rotate on unused ring - reduces cpu
* usage on 1.6ghz xeon from 35% to less then .1 % as measured by top
*/
#include <config.h>
#include <assert.h>
#ifdef HAVE_ALLOCA_H
#include <alloca.h>
#endif
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/socket.h>
#include <netdb.h>
#include <sys/un.h>
#include <sys/ioctl.h>
#include <sys/param.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <unistd.h>
#include <fcntl.h>
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
#include <sched.h>
#include <time.h>
#include <sys/time.h>
#include <sys/poll.h>
#include <sys/uio.h>
#include <limits.h>
#include <qb/qblist.h>
#include <qb/qbdefs.h>
#include <qb/qbutil.h>
#include <qb/qbloop.h>
#include <corosync/swab.h>
#include <corosync/sq.h>
#define LOGSYS_UTILS_ONLY 1
#include <corosync/logsys.h>
#include "totemsrp.h"
#include "totemnet.h"
#include "icmap.h"
#include "totemconfig.h"
#include "cs_queue.h"
#define LOCALHOST_IP inet_addr("127.0.0.1")
#define QUEUE_RTR_ITEMS_SIZE_MAX 16384 /* allow 16384 retransmit items */
#define RETRANS_MESSAGE_QUEUE_SIZE_MAX 16384 /* allow 500 messages to be queued */
#define RECEIVED_MESSAGE_QUEUE_SIZE_MAX 500 /* allow 500 messages to be queued */
#define MAXIOVS 5
#define RETRANSMIT_ENTRIES_MAX 30
#define TOKEN_SIZE_MAX 64000 /* bytes */
#define LEAVE_DUMMY_NODEID 0
/*
* SRP address.
*/
struct srp_addr {
unsigned int nodeid;
};
/*
* Rollover handling:
* SEQNO_START_MSG is the starting sequence number after a new configuration
* This should remain zero, unless testing overflow in which case
* 0x7ffff000 and 0xfffff000 are good starting values.
*
* SEQNO_START_TOKEN is the starting sequence number after a new configuration
* for a token. This should remain zero, unless testing overflow in which
* case 07fffff00 or 0xffffff00 are good starting values.
*/
#define SEQNO_START_MSG 0x0
#define SEQNO_START_TOKEN 0x0
/*
* These can be used ot test different rollover points
* #define SEQNO_START_MSG 0xfffffe00
* #define SEQNO_START_TOKEN 0xfffffe00
*/
/*
* These can be used to test the error recovery algorithms
* #define TEST_DROP_ORF_TOKEN_PERCENTAGE 30
* #define TEST_DROP_COMMIT_TOKEN_PERCENTAGE 30
* #define TEST_DROP_MCAST_PERCENTAGE 50
* #define TEST_RECOVERY_MSG_COUNT 300
*/
/*
* we compare incoming messages to determine if their endian is
* different - if so convert them
*
* do not change
*/
#define ENDIAN_LOCAL 0xff22
enum message_type {
MESSAGE_TYPE_ORF_TOKEN = 0, /* Ordering, Reliability, Flow (ORF) control Token */
MESSAGE_TYPE_MCAST = 1, /* ring ordered multicast message */
MESSAGE_TYPE_MEMB_MERGE_DETECT = 2, /* merge rings if there are available rings */
MESSAGE_TYPE_MEMB_JOIN = 3, /* membership join message */
MESSAGE_TYPE_MEMB_COMMIT_TOKEN = 4, /* membership commit token */
MESSAGE_TYPE_TOKEN_HOLD_CANCEL = 5, /* cancel the holding of the token */
};
enum encapsulation_type {
MESSAGE_ENCAPSULATED = 1,
MESSAGE_NOT_ENCAPSULATED = 2
};
/*
* New membership algorithm local variables
*/
struct consensus_list_item {
struct srp_addr addr;
int set;
};
struct token_callback_instance {
struct qb_list_head list;
int (*callback_fn) (enum totem_callback_token_type type, const void *);
enum totem_callback_token_type callback_type;
int delete;
void *data;
};
struct totemsrp_socket {
int mcast;
int token;
};
struct mcast {
struct totem_message_header header;
struct srp_addr system_from;
unsigned int seq;
int this_seqno;
struct memb_ring_id ring_id;
unsigned int node_id;
int guarantee;
} __attribute__((packed));
struct rtr_item {
struct memb_ring_id ring_id;
unsigned int seq;
}__attribute__((packed));
struct orf_token {
struct totem_message_header header;
unsigned int seq;
unsigned int token_seq;
unsigned int aru;
unsigned int aru_addr;
struct memb_ring_id ring_id;
unsigned int backlog;
unsigned int fcc;
int retrans_flg;
int rtr_list_entries;
struct rtr_item rtr_list[0];
}__attribute__((packed));
struct memb_join {
struct totem_message_header header;
struct srp_addr system_from;
unsigned int proc_list_entries;
unsigned int failed_list_entries;
unsigned long long ring_seq;
unsigned char end_of_memb_join[0];
/*
* These parts of the data structure are dynamic:
* struct srp_addr proc_list[];
* struct srp_addr failed_list[];
*/
} __attribute__((packed));
struct memb_merge_detect {
struct totem_message_header header;
struct srp_addr system_from;
struct memb_ring_id ring_id;
} __attribute__((packed));
struct token_hold_cancel {
struct totem_message_header header;
struct memb_ring_id ring_id;
} __attribute__((packed));
struct memb_commit_token_memb_entry {
struct memb_ring_id ring_id;
unsigned int aru;
unsigned int high_delivered;
unsigned int received_flg;
}__attribute__((packed));
struct memb_commit_token {
struct totem_message_header header;
unsigned int token_seq;
struct memb_ring_id ring_id;
unsigned int retrans_flg;
int memb_index;
int addr_entries;
unsigned char end_of_commit_token[0];
/*
* These parts of the data structure are dynamic:
*
* struct srp_addr addr[PROCESSOR_COUNT_MAX];
* struct memb_commit_token_memb_entry memb_list[PROCESSOR_COUNT_MAX];
*/
}__attribute__((packed));
struct message_item {
struct mcast *mcast;
unsigned int msg_len;
};
struct sort_queue_item {
struct mcast *mcast;
unsigned int msg_len;
};
enum memb_state {
MEMB_STATE_OPERATIONAL = 1,
MEMB_STATE_GATHER = 2,
MEMB_STATE_COMMIT = 3,
MEMB_STATE_RECOVERY = 4
};
struct totemsrp_instance {
int iface_changes;
int failed_to_recv;
/*
* Flow control mcasts and remcasts on last and current orf_token
*/
int fcc_remcast_last;
int fcc_mcast_last;
int fcc_remcast_current;
struct consensus_list_item consensus_list[PROCESSOR_COUNT_MAX];
int consensus_list_entries;
int lowest_active_if;
struct srp_addr my_id;
struct totem_ip_address my_addrs[INTERFACE_MAX];
struct srp_addr my_proc_list[PROCESSOR_COUNT_MAX];
struct srp_addr my_failed_list[PROCESSOR_COUNT_MAX];
struct srp_addr my_new_memb_list[PROCESSOR_COUNT_MAX];
struct srp_addr my_trans_memb_list[PROCESSOR_COUNT_MAX];
struct srp_addr my_memb_list[PROCESSOR_COUNT_MAX];
struct srp_addr my_deliver_memb_list[PROCESSOR_COUNT_MAX];
struct srp_addr my_left_memb_list[PROCESSOR_COUNT_MAX];
unsigned int my_leave_memb_list[PROCESSOR_COUNT_MAX];
int my_proc_list_entries;
int my_failed_list_entries;
int my_new_memb_entries;
int my_trans_memb_entries;
int my_memb_entries;
int my_deliver_memb_entries;
int my_left_memb_entries;
int my_leave_memb_entries;
struct memb_ring_id my_ring_id;
struct memb_ring_id my_old_ring_id;
int my_aru_count;
int my_merge_detect_timeout_outstanding;
unsigned int my_last_aru;
int my_seq_unchanged;
int my_received_flg;
unsigned int my_high_seq_received;
unsigned int my_install_seq;
int my_rotation_counter;
int my_set_retrans_flg;
int my_retrans_flg_count;
unsigned int my_high_ring_delivered;
int heartbeat_timeout;
/*
* Queues used to order, deliver, and recover messages
*/
struct cs_queue new_message_queue;
struct cs_queue new_message_queue_trans;
struct cs_queue retrans_message_queue;
struct sq regular_sort_queue;
struct sq recovery_sort_queue;
/*
* Received up to and including
*/
unsigned int my_aru;
unsigned int my_high_delivered;
struct qb_list_head token_callback_received_listhead;
struct qb_list_head token_callback_sent_listhead;
char orf_token_retransmit[TOKEN_SIZE_MAX];
int orf_token_retransmit_size;
unsigned int my_token_seq;
/*
* Timers
*/
qb_loop_timer_handle timer_pause_timeout;
qb_loop_timer_handle timer_orf_token_timeout;
qb_loop_timer_handle timer_orf_token_warning;
qb_loop_timer_handle timer_orf_token_retransmit_timeout;
qb_loop_timer_handle timer_orf_token_hold_retransmit_timeout;
qb_loop_timer_handle timer_merge_detect_timeout;
qb_loop_timer_handle memb_timer_state_gather_join_timeout;
qb_loop_timer_handle memb_timer_state_gather_consensus_timeout;
qb_loop_timer_handle memb_timer_state_commit_timeout;
qb_loop_timer_handle timer_heartbeat_timeout;
/*
* Function and data used to log messages
*/
int totemsrp_log_level_security;
int totemsrp_log_level_error;
int totemsrp_log_level_warning;
int totemsrp_log_level_notice;
int totemsrp_log_level_debug;
int totemsrp_log_level_trace;
int totemsrp_subsys_id;
void (*totemsrp_log_printf) (
int level,
int subsys,
const char *function,
const char *file,
int line,
const char *format, ...)__attribute__((format(printf, 6, 7)));;
enum memb_state memb_state;
//TODO struct srp_addr next_memb;
qb_loop_t *totemsrp_poll_handle;
struct totem_ip_address mcast_address;
void (*totemsrp_deliver_fn) (
unsigned int nodeid,
const void *msg,
unsigned int msg_len,
int endian_conversion_required);
void (*totemsrp_confchg_fn) (
enum totem_configuration_type configuration_type,
const unsigned int *member_list, size_t member_list_entries,
const unsigned int *left_list, size_t left_list_entries,
const unsigned int *joined_list, size_t joined_list_entries,
const struct memb_ring_id *ring_id);
void (*totemsrp_service_ready_fn) (void);
void (*totemsrp_waiting_trans_ack_cb_fn) (
int waiting_trans_ack);
void (*memb_ring_id_create_or_load) (
struct memb_ring_id *memb_ring_id,
unsigned int nodeid);
void (*memb_ring_id_store) (
const struct memb_ring_id *memb_ring_id,
unsigned int nodeid);
int global_seqno;
int my_token_held;
unsigned long long token_ring_id_seq;
unsigned int last_released;
unsigned int set_aru;
int old_ring_state_saved;
int old_ring_state_aru;
unsigned int old_ring_state_high_seq_received;
unsigned int my_last_seq;
struct timeval tv_old;
void *totemnet_context;
struct totem_config *totem_config;
unsigned int use_heartbeat;
unsigned int my_trc;
unsigned int my_pbl;
unsigned int my_cbl;
uint64_t pause_timestamp;
struct memb_commit_token *commit_token;
totemsrp_stats_t stats;
uint32_t orf_token_discard;
uint32_t originated_orf_token;
uint32_t threaded_mode_enabled;
uint32_t waiting_trans_ack;
int flushing;
void * token_recv_event_handle;
void * token_sent_event_handle;
char commit_token_storage[40000];
};
struct message_handlers {
int count;
int (*handler_functions[6]) (
struct totemsrp_instance *instance,
const void *msg,
size_t msg_len,
int endian_conversion_needed);
};
enum gather_state_from {
TOTEMSRP_GSFROM_CONSENSUS_TIMEOUT = 0,
TOTEMSRP_GSFROM_GATHER_MISSING1 = 1,
TOTEMSRP_GSFROM_THE_TOKEN_WAS_LOST_IN_THE_OPERATIONAL_STATE = 2,
TOTEMSRP_GSFROM_THE_CONSENSUS_TIMEOUT_EXPIRED = 3,
TOTEMSRP_GSFROM_THE_TOKEN_WAS_LOST_IN_THE_COMMIT_STATE = 4,
TOTEMSRP_GSFROM_THE_TOKEN_WAS_LOST_IN_THE_RECOVERY_STATE = 5,
TOTEMSRP_GSFROM_FAILED_TO_RECEIVE = 6,
TOTEMSRP_GSFROM_FOREIGN_MESSAGE_IN_OPERATIONAL_STATE = 7,
TOTEMSRP_GSFROM_FOREIGN_MESSAGE_IN_GATHER_STATE = 8,
TOTEMSRP_GSFROM_MERGE_DURING_OPERATIONAL_STATE = 9,
TOTEMSRP_GSFROM_MERGE_DURING_GATHER_STATE = 10,
TOTEMSRP_GSFROM_MERGE_DURING_JOIN = 11,
TOTEMSRP_GSFROM_JOIN_DURING_OPERATIONAL_STATE = 12,
TOTEMSRP_GSFROM_JOIN_DURING_COMMIT_STATE = 13,
TOTEMSRP_GSFROM_JOIN_DURING_RECOVERY = 14,
TOTEMSRP_GSFROM_INTERFACE_CHANGE = 15,
TOTEMSRP_GSFROM_MAX = TOTEMSRP_GSFROM_INTERFACE_CHANGE,
};
const char* gather_state_from_desc [] = {
[TOTEMSRP_GSFROM_CONSENSUS_TIMEOUT] = "consensus timeout",
[TOTEMSRP_GSFROM_GATHER_MISSING1] = "MISSING",
[TOTEMSRP_GSFROM_THE_TOKEN_WAS_LOST_IN_THE_OPERATIONAL_STATE] = "The token was lost in the OPERATIONAL state.",
[TOTEMSRP_GSFROM_THE_CONSENSUS_TIMEOUT_EXPIRED] = "The consensus timeout expired.",
[TOTEMSRP_GSFROM_THE_TOKEN_WAS_LOST_IN_THE_COMMIT_STATE] = "The token was lost in the COMMIT state.",
[TOTEMSRP_GSFROM_THE_TOKEN_WAS_LOST_IN_THE_RECOVERY_STATE] = "The token was lost in the RECOVERY state.",
[TOTEMSRP_GSFROM_FAILED_TO_RECEIVE] = "failed to receive",
[TOTEMSRP_GSFROM_FOREIGN_MESSAGE_IN_OPERATIONAL_STATE] = "foreign message in operational state",
[TOTEMSRP_GSFROM_FOREIGN_MESSAGE_IN_GATHER_STATE] = "foreign message in gather state",
[TOTEMSRP_GSFROM_MERGE_DURING_OPERATIONAL_STATE] = "merge during operational state",
[TOTEMSRP_GSFROM_MERGE_DURING_GATHER_STATE] = "merge during gather state",
[TOTEMSRP_GSFROM_MERGE_DURING_JOIN] = "merge during join",
[TOTEMSRP_GSFROM_JOIN_DURING_OPERATIONAL_STATE] = "join during operational state",
[TOTEMSRP_GSFROM_JOIN_DURING_COMMIT_STATE] = "join during commit state",
[TOTEMSRP_GSFROM_JOIN_DURING_RECOVERY] = "join during recovery",
[TOTEMSRP_GSFROM_INTERFACE_CHANGE] = "interface change",
};
/*
* forward decls
*/
static int message_handler_orf_token (
struct totemsrp_instance *instance,
const void *msg,
size_t msg_len,
int endian_conversion_needed);
static int message_handler_mcast (
struct totemsrp_instance *instance,
const void *msg,
size_t msg_len,
int endian_conversion_needed);
static int message_handler_memb_merge_detect (
struct totemsrp_instance *instance,
const void *msg,
size_t msg_len,
int endian_conversion_needed);
static int message_handler_memb_join (
struct totemsrp_instance *instance,
const void *msg,
size_t msg_len,
int endian_conversion_needed);
static int message_handler_memb_commit_token (
struct totemsrp_instance *instance,
const void *msg,
size_t msg_len,
int endian_conversion_needed);
static int message_handler_token_hold_cancel (
struct totemsrp_instance *instance,
const void *msg,
size_t msg_len,
int endian_conversion_needed);
static void totemsrp_instance_initialize (struct totemsrp_instance *instance);
static void srp_addr_to_nodeid (
struct totemsrp_instance *instance,
unsigned int *nodeid_out,
struct srp_addr *srp_addr_in,
unsigned int entries);
static int srp_addr_equal (const struct srp_addr *a, const struct srp_addr *b);
static void memb_leave_message_send (struct totemsrp_instance *instance);
static void token_callbacks_execute (struct totemsrp_instance *instance, enum totem_callback_token_type type);
static void memb_state_gather_enter (struct totemsrp_instance *instance, enum gather_state_from gather_from);
static void messages_deliver_to_app (struct totemsrp_instance *instance, int skip, unsigned int end_point);
static int orf_token_mcast (struct totemsrp_instance *instance, struct orf_token *oken,
int fcc_mcasts_allowed);
static void messages_free (struct totemsrp_instance *instance, unsigned int token_aru);
static void memb_ring_id_set (struct totemsrp_instance *instance,
const struct memb_ring_id *ring_id);
static void target_set_completed (void *context);
static void memb_state_commit_token_update (struct totemsrp_instance *instance);
static void memb_state_commit_token_target_set (struct totemsrp_instance *instance);
static int memb_state_commit_token_send (struct totemsrp_instance *instance);
static int memb_state_commit_token_send_recovery (struct totemsrp_instance *instance, struct memb_commit_token *memb_commit_token);
static void memb_state_commit_token_create (struct totemsrp_instance *instance);
static int token_hold_cancel_send (struct totemsrp_instance *instance);
static void orf_token_endian_convert (const struct orf_token *in, struct orf_token *out);
static void memb_commit_token_endian_convert (const struct memb_commit_token *in, struct memb_commit_token *out);
static void memb_join_endian_convert (const struct memb_join *in, struct memb_join *out);
static void mcast_endian_convert (const struct mcast *in, struct mcast *out);
static void memb_merge_detect_endian_convert (
const struct memb_merge_detect *in,
struct memb_merge_detect *out);
static struct srp_addr srp_addr_endian_convert (struct srp_addr in);
static void timer_function_orf_token_timeout (void *data);
static void timer_function_orf_token_warning (void *data);
static void timer_function_pause_timeout (void *data);
static void timer_function_heartbeat_timeout (void *data);
static void timer_function_token_retransmit_timeout (void *data);
static void timer_function_token_hold_retransmit_timeout (void *data);
static void timer_function_merge_detect_timeout (void *data);
static void *totemsrp_buffer_alloc (struct totemsrp_instance *instance);
static void totemsrp_buffer_release (struct totemsrp_instance *instance, void *ptr);
static const char* gsfrom_to_msg(enum gather_state_from gsfrom);
void main_deliver_fn (
void *context,
const void *msg,
unsigned int msg_len,
const struct sockaddr_storage *system_from);
void main_iface_change_fn (
void *context,
const struct totem_ip_address *iface_address,
unsigned int iface_no);
struct message_handlers totemsrp_message_handlers = {
6,
{
message_handler_orf_token, /* MESSAGE_TYPE_ORF_TOKEN */
message_handler_mcast, /* MESSAGE_TYPE_MCAST */
message_handler_memb_merge_detect, /* MESSAGE_TYPE_MEMB_MERGE_DETECT */
message_handler_memb_join, /* MESSAGE_TYPE_MEMB_JOIN */
message_handler_memb_commit_token, /* MESSAGE_TYPE_MEMB_COMMIT_TOKEN */
message_handler_token_hold_cancel /* MESSAGE_TYPE_TOKEN_HOLD_CANCEL */
}
};
#define log_printf(level, format, args...) \
do { \
instance->totemsrp_log_printf ( \
level, instance->totemsrp_subsys_id, \
__FUNCTION__, __FILE__, __LINE__, \
format, ##args); \
} while (0);
#define LOGSYS_PERROR(err_num, level, fmt, args...) \
do { \
char _error_str[LOGSYS_MAX_PERROR_MSG_LEN]; \
const char *_error_ptr = qb_strerror_r(err_num, _error_str, sizeof(_error_str)); \
instance->totemsrp_log_printf ( \
level, instance->totemsrp_subsys_id, \
__FUNCTION__, __FILE__, __LINE__, \
fmt ": %s (%d)\n", ##args, _error_ptr, err_num); \
} while(0)
static const char* gsfrom_to_msg(enum gather_state_from gsfrom)
{
if (gsfrom <= TOTEMSRP_GSFROM_MAX) {
return gather_state_from_desc[gsfrom];
}
else {
return "UNKNOWN";
}
}
static void totemsrp_instance_initialize (struct totemsrp_instance *instance)
{
memset (instance, 0, sizeof (struct totemsrp_instance));
qb_list_init (&instance->token_callback_received_listhead);
qb_list_init (&instance->token_callback_sent_listhead);
instance->my_received_flg = 1;
instance->my_token_seq = SEQNO_START_TOKEN - 1;
instance->memb_state = MEMB_STATE_OPERATIONAL;
instance->set_aru = -1;
instance->my_aru = SEQNO_START_MSG;
instance->my_high_seq_received = SEQNO_START_MSG;
instance->my_high_delivered = SEQNO_START_MSG;
instance->orf_token_discard = 0;
instance->originated_orf_token = 0;
instance->commit_token = (struct memb_commit_token *)instance->commit_token_storage;
instance->waiting_trans_ack = 1;
}
static int pause_flush (struct totemsrp_instance *instance)
{
uint64_t now_msec;
uint64_t timestamp_msec;
int res = 0;
now_msec = (qb_util_nano_current_get () / QB_TIME_NS_IN_MSEC);
timestamp_msec = instance->pause_timestamp / QB_TIME_NS_IN_MSEC;
if ((now_msec - timestamp_msec) > (instance->totem_config->token_timeout / 2)) {
log_printf (instance->totemsrp_log_level_notice,
"Process pause detected for %d ms, flushing membership messages.", (unsigned int)(now_msec - timestamp_msec));
/*
* -1 indicates an error from recvmsg
*/
do {
res = totemnet_recv_mcast_empty (instance->totemnet_context);
} while (res == -1);
}
return (res);
}
static int token_event_stats_collector (enum totem_callback_token_type type, const void *void_instance)
{
struct totemsrp_instance *instance = (struct totemsrp_instance *)void_instance;
uint32_t time_now;
unsigned long long nano_secs = qb_util_nano_current_get ();
time_now = (nano_secs / QB_TIME_NS_IN_MSEC);
if (type == TOTEM_CALLBACK_TOKEN_RECEIVED) {
/* incr latest token the index */
if (instance->stats.latest_token == (TOTEM_TOKEN_STATS_MAX - 1))
instance->stats.latest_token = 0;
else
instance->stats.latest_token++;
if (instance->stats.earliest_token == instance->stats.latest_token) {
/* we have filled up the array, start overwriting */
if (instance->stats.earliest_token == (TOTEM_TOKEN_STATS_MAX - 1))
instance->stats.earliest_token = 0;
else
instance->stats.earliest_token++;
instance->stats.token[instance->stats.earliest_token].rx = 0;
instance->stats.token[instance->stats.earliest_token].tx = 0;
instance->stats.token[instance->stats.earliest_token].backlog_calc = 0;
}
instance->stats.token[instance->stats.latest_token].rx = time_now;
instance->stats.token[instance->stats.latest_token].tx = 0; /* in case we drop the token */
} else {
instance->stats.token[instance->stats.latest_token].tx = time_now;
}
return 0;
}
static void totempg_mtu_changed(void *context, int net_mtu)
{
struct totemsrp_instance *instance = context;
instance->totem_config->net_mtu = net_mtu - 2 * sizeof (struct mcast);
log_printf (instance->totemsrp_log_level_debug,
"Net MTU changed to %d, new value is %d",
net_mtu, instance->totem_config->net_mtu);
}
/*
* Exported interfaces
*/
int totemsrp_initialize (
qb_loop_t *poll_handle,
void **srp_context,
struct totem_config *totem_config,
totempg_stats_t *stats,
void (*deliver_fn) (
unsigned int nodeid,
const void *msg,
unsigned int msg_len,
int endian_conversion_required),
void (*confchg_fn) (
enum totem_configuration_type configuration_type,
const unsigned int *member_list, size_t member_list_entries,
const unsigned int *left_list, size_t left_list_entries,
const unsigned int *joined_list, size_t joined_list_entries,
const struct memb_ring_id *ring_id),
void (*waiting_trans_ack_cb_fn) (
int waiting_trans_ack))
{
struct totemsrp_instance *instance;
int res;
instance = malloc (sizeof (struct totemsrp_instance));
if (instance == NULL) {
goto error_exit;
}
totemsrp_instance_initialize (instance);
instance->totemsrp_waiting_trans_ack_cb_fn = waiting_trans_ack_cb_fn;
instance->totemsrp_waiting_trans_ack_cb_fn (1);
stats->srp = &instance->stats;
instance->stats.latest_token = 0;
instance->stats.earliest_token = 0;
instance->totem_config = totem_config;
/*
* Configure logging
*/
instance->totemsrp_log_level_security = totem_config->totem_logging_configuration.log_level_security;
instance->totemsrp_log_level_error = totem_config->totem_logging_configuration.log_level_error;
instance->totemsrp_log_level_warning = totem_config->totem_logging_configuration.log_level_warning;
instance->totemsrp_log_level_notice = totem_config->totem_logging_configuration.log_level_notice;
instance->totemsrp_log_level_debug = totem_config->totem_logging_configuration.log_level_debug;
instance->totemsrp_log_level_trace = totem_config->totem_logging_configuration.log_level_trace;
instance->totemsrp_subsys_id = totem_config->totem_logging_configuration.log_subsys_id;
instance->totemsrp_log_printf = totem_config->totem_logging_configuration.log_printf;
/*
* Configure totem store and load functions
*/
instance->memb_ring_id_create_or_load = totem_config->totem_memb_ring_id_create_or_load;
instance->memb_ring_id_store = totem_config->totem_memb_ring_id_store;
/*
* Initialize local variables for totemsrp
*/
totemip_copy (&instance->mcast_address, &totem_config->interfaces[instance->lowest_active_if].mcast_addr);
/*
* Display totem configuration
*/
log_printf (instance->totemsrp_log_level_debug,
"Token Timeout (%d ms) retransmit timeout (%d ms)",
totem_config->token_timeout, totem_config->token_retransmit_timeout);
if (totem_config->token_warning) {
uint32_t token_warning_ms = totem_config->token_warning * totem_config->token_timeout / 100;
log_printf(instance->totemsrp_log_level_debug,
"Token warning every %d ms (%d%% of Token Timeout)",
token_warning_ms, totem_config->token_warning);
if (token_warning_ms < totem_config->token_retransmit_timeout)
log_printf (LOGSYS_LEVEL_DEBUG,
"The token warning interval (%d ms) is less than the token retransmit timeout (%d ms) "
"which can lead to spurious token warnings. Consider increasing the token_warning parameter.",
token_warning_ms, totem_config->token_retransmit_timeout);
} else {
log_printf(instance->totemsrp_log_level_debug,
"Token warnings disabled");
}
log_printf (instance->totemsrp_log_level_debug,
"token hold (%d ms) retransmits before loss (%d retrans)",
totem_config->token_hold_timeout, totem_config->token_retransmits_before_loss_const);
log_printf (instance->totemsrp_log_level_debug,
"join (%d ms) send_join (%d ms) consensus (%d ms) merge (%d ms)",
totem_config->join_timeout,
totem_config->send_join_timeout,
totem_config->consensus_timeout,
totem_config->merge_timeout);
log_printf (instance->totemsrp_log_level_debug,
"downcheck (%d ms) fail to recv const (%d msgs)",
totem_config->downcheck_timeout, totem_config->fail_to_recv_const);
log_printf (instance->totemsrp_log_level_debug,
"seqno unchanged const (%d rotations) Maximum network MTU %d", totem_config->seqno_unchanged_const, totem_config->net_mtu);
log_printf (instance->totemsrp_log_level_debug,
"window size per rotation (%d messages) maximum messages per rotation (%d messages)",
totem_config->window_size, totem_config->max_messages);
log_printf (instance->totemsrp_log_level_debug,
"missed count const (%d messages)",
totem_config->miss_count_const);
log_printf (instance->totemsrp_log_level_debug,
"send threads (%d threads)", totem_config->threads);
log_printf (instance->totemsrp_log_level_debug,
"heartbeat_failures_allowed (%d)", totem_config->heartbeat_failures_allowed);
log_printf (instance->totemsrp_log_level_debug,
"max_network_delay (%d ms)", totem_config->max_network_delay);
cs_queue_init (&instance->retrans_message_queue, RETRANS_MESSAGE_QUEUE_SIZE_MAX,
sizeof (struct message_item), instance->threaded_mode_enabled);
sq_init (&instance->regular_sort_queue,
QUEUE_RTR_ITEMS_SIZE_MAX, sizeof (struct sort_queue_item), 0);
sq_init (&instance->recovery_sort_queue,
QUEUE_RTR_ITEMS_SIZE_MAX, sizeof (struct sort_queue_item), 0);
instance->totemsrp_poll_handle = poll_handle;
instance->totemsrp_deliver_fn = deliver_fn;
instance->totemsrp_confchg_fn = confchg_fn;
instance->use_heartbeat = 1;
timer_function_pause_timeout (instance);
if ( totem_config->heartbeat_failures_allowed == 0 ) {
log_printf (instance->totemsrp_log_level_debug,
"HeartBeat is Disabled. To enable set heartbeat_failures_allowed > 0");
instance->use_heartbeat = 0;
}
if (instance->use_heartbeat) {
instance->heartbeat_timeout
= (totem_config->heartbeat_failures_allowed) * totem_config->token_retransmit_timeout
+ totem_config->max_network_delay;
if (instance->heartbeat_timeout >= totem_config->token_timeout) {
log_printf (instance->totemsrp_log_level_debug,
"total heartbeat_timeout (%d ms) is not less than token timeout (%d ms)",
instance->heartbeat_timeout,
totem_config->token_timeout);
log_printf (instance->totemsrp_log_level_debug,
"heartbeat_timeout = heartbeat_failures_allowed * token_retransmit_timeout + max_network_delay");
log_printf (instance->totemsrp_log_level_debug,
"heartbeat timeout should be less than the token timeout. Heartbeat is disabled!!");
instance->use_heartbeat = 0;
}
else {
log_printf (instance->totemsrp_log_level_debug,
"total heartbeat_timeout (%d ms)", instance->heartbeat_timeout);
}
}
res = totemnet_initialize (
poll_handle,
&instance->totemnet_context,
totem_config,
stats->srp,
instance,
main_deliver_fn,
main_iface_change_fn,
totempg_mtu_changed,
target_set_completed);
if (res == -1) {
goto error_exit;
}
instance->my_id.nodeid = instance->totem_config->interfaces[instance->lowest_active_if].boundto.nodeid;
/*
* Must have net_mtu adjusted by totemnet_initialize first
*/
cs_queue_init (&instance->new_message_queue,
MESSAGE_QUEUE_MAX,
sizeof (struct message_item), instance->threaded_mode_enabled);
cs_queue_init (&instance->new_message_queue_trans,
MESSAGE_QUEUE_MAX,
sizeof (struct message_item), instance->threaded_mode_enabled);
totemsrp_callback_token_create (instance,
&instance->token_recv_event_handle,
TOTEM_CALLBACK_TOKEN_RECEIVED,
0,
token_event_stats_collector,
instance);
totemsrp_callback_token_create (instance,
&instance->token_sent_event_handle,
TOTEM_CALLBACK_TOKEN_SENT,
0,
token_event_stats_collector,
instance);
*srp_context = instance;
return (0);
error_exit:
return (-1);
}
void totemsrp_finalize (
void *srp_context)
{
struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context;
memb_leave_message_send (instance);
totemnet_finalize (instance->totemnet_context);
cs_queue_free (&instance->new_message_queue);
cs_queue_free (&instance->new_message_queue_trans);
cs_queue_free (&instance->retrans_message_queue);
sq_free (&instance->regular_sort_queue);
sq_free (&instance->recovery_sort_queue);
free (instance);
}
+int totemsrp_nodestatus_get (
+ void *srp_context,
+ unsigned int nodeid,
+ struct totem_node_status *node_status)
+{
+ struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context;
+ int i;
+
+ node_status->version = TOTEM_NODE_STATUS_STRUCTURE_VERSION;
+
+ /* Fill in 'reachable' here as the lower level UDP[u] layers don't know */
+ for (i = 0; i < instance->my_proc_list_entries; i++) {
+ if (instance->my_proc_list[i].nodeid == nodeid) {
+ node_status->reachable = 1;
+ }
+ }
+
+ return totemnet_nodestatus_get(instance->totemnet_context, nodeid, node_status);
+}
+
+
/*
* Return configured interfaces. interfaces is array of totem_ip addresses allocated by caller,
* with interaces_size number of items. iface_count is final number of interfaces filled by this
* function.
*
* Function returns 0 on success, otherwise if interfaces array is not big enough, -2 is returned,
* and if interface was not found, -1 is returned.
*/
int totemsrp_ifaces_get (
void *srp_context,
unsigned int nodeid,
unsigned int *interface_id,
struct totem_ip_address *interfaces,
unsigned int interfaces_size,
char ***status,
unsigned int *iface_count)
{
struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context;
struct totem_ip_address *iface_ptr = interfaces;
int res = 0;
int i,n;
int num_ifs = 0;
memset(interfaces, 0, sizeof(struct totem_ip_address) * interfaces_size);
*iface_count = INTERFACE_MAX;
for (i=0; i<INTERFACE_MAX; i++) {
for (n=0; n < instance->totem_config->interfaces[i].member_count; n++) {
if (instance->totem_config->interfaces[i].configured &&
instance->totem_config->interfaces[i].member_list[n].nodeid == nodeid) {
memcpy(iface_ptr, &instance->totem_config->interfaces[i].member_list[n], sizeof(struct totem_ip_address));
interface_id[num_ifs] = i;
iface_ptr++;
if (++num_ifs > interfaces_size) {
res = -2;
break;
}
}
}
}
totemnet_ifaces_get(instance->totemnet_context, status, iface_count);
*iface_count = num_ifs;
return (res);
}
int totemsrp_crypto_set (
void *srp_context,
const char *cipher_type,
const char *hash_type)
{
struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context;
int res;
res = totemnet_crypto_set(instance->totemnet_context, cipher_type, hash_type);
return (res);
}
unsigned int totemsrp_my_nodeid_get (
void *srp_context)
{
struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context;
unsigned int res;
res = instance->my_id.nodeid;
return (res);
}
int totemsrp_my_family_get (
void *srp_context)
{
struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context;
int res;
res = instance->totem_config->interfaces[instance->lowest_active_if].boundto.family;
return (res);
}
/*
* Set operations for use by the membership algorithm
*/
static int srp_addr_equal (const struct srp_addr *a, const struct srp_addr *b)
{
if (a->nodeid == b->nodeid) {
return 1;
}
return 0;
}
static void srp_addr_to_nodeid (
struct totemsrp_instance *instance,
unsigned int *nodeid_out,
struct srp_addr *srp_addr_in,
unsigned int entries)
{
unsigned int i;
for (i = 0; i < entries; i++) {
nodeid_out[i] = srp_addr_in[i].nodeid;
}
}
static struct srp_addr srp_addr_endian_convert (struct srp_addr in)
{
struct srp_addr res;
res.nodeid = swab32 (in.nodeid);
return (res);
}
static void memb_consensus_reset (struct totemsrp_instance *instance)
{
instance->consensus_list_entries = 0;
}
static void memb_set_subtract (
struct srp_addr *out_list, int *out_list_entries,
struct srp_addr *one_list, int one_list_entries,
struct srp_addr *two_list, int two_list_entries)
{
int found = 0;
int i;
int j;
*out_list_entries = 0;
for (i = 0; i < one_list_entries; i++) {
for (j = 0; j < two_list_entries; j++) {
if (srp_addr_equal (&one_list[i], &two_list[j])) {
found = 1;
break;
}
}
if (found == 0) {
out_list[*out_list_entries] = one_list[i];
*out_list_entries = *out_list_entries + 1;
}
found = 0;
}
}
/*
* Set consensus for a specific processor
*/
static void memb_consensus_set (
struct totemsrp_instance *instance,
const struct srp_addr *addr)
{
int found = 0;
int i;
for (i = 0; i < instance->consensus_list_entries; i++) {
if (srp_addr_equal(addr, &instance->consensus_list[i].addr)) {
found = 1;
break; /* found entry */
}
}
instance->consensus_list[i].addr = *addr;
instance->consensus_list[i].set = 1;
if (found == 0) {
instance->consensus_list_entries++;
}
return;
}
/*
* Is consensus set for a specific processor
*/
static int memb_consensus_isset (
struct totemsrp_instance *instance,
const struct srp_addr *addr)
{
int i;
for (i = 0; i < instance->consensus_list_entries; i++) {
if (srp_addr_equal (addr, &instance->consensus_list[i].addr)) {
return (instance->consensus_list[i].set);
}
}
return (0);
}
/*
* Is consensus agreed upon based upon consensus database
*/
static int memb_consensus_agreed (
struct totemsrp_instance *instance)
{
struct srp_addr token_memb[PROCESSOR_COUNT_MAX];
int token_memb_entries = 0;
int agreed = 1;
int i;
memb_set_subtract (token_memb, &token_memb_entries,
instance->my_proc_list, instance->my_proc_list_entries,
instance->my_failed_list, instance->my_failed_list_entries);
for (i = 0; i < token_memb_entries; i++) {
if (memb_consensus_isset (instance, &token_memb[i]) == 0) {
agreed = 0;
break;
}
}
if (agreed && instance->failed_to_recv == 1) {
/*
* Both nodes agreed on our failure. We don't care how many proc list items left because we
* will create single ring anyway.
*/
return (agreed);
}
assert (token_memb_entries >= 1);
return (agreed);
}
static void memb_consensus_notset (
struct totemsrp_instance *instance,
struct srp_addr *no_consensus_list,
int *no_consensus_list_entries,
struct srp_addr *comparison_list,
int comparison_list_entries)
{
int i;
*no_consensus_list_entries = 0;
for (i = 0; i < instance->my_proc_list_entries; i++) {
if (memb_consensus_isset (instance, &instance->my_proc_list[i]) == 0) {
no_consensus_list[*no_consensus_list_entries] = instance->my_proc_list[i];
*no_consensus_list_entries = *no_consensus_list_entries + 1;
}
}
}
/*
* Is set1 equal to set2 Entries can be in different orders
*/
static int memb_set_equal (
struct srp_addr *set1, int set1_entries,
struct srp_addr *set2, int set2_entries)
{
int i;
int j;
int found = 0;
if (set1_entries != set2_entries) {
return (0);
}
for (i = 0; i < set2_entries; i++) {
for (j = 0; j < set1_entries; j++) {
if (srp_addr_equal (&set1[j], &set2[i])) {
found = 1;
break;
}
}
if (found == 0) {
return (0);
}
found = 0;
}
return (1);
}
/*
* Is subset fully contained in fullset
*/
static int memb_set_subset (
const struct srp_addr *subset, int subset_entries,
const struct srp_addr *fullset, int fullset_entries)
{
int i;
int j;
int found = 0;
if (subset_entries > fullset_entries) {
return (0);
}
for (i = 0; i < subset_entries; i++) {
for (j = 0; j < fullset_entries; j++) {
if (srp_addr_equal (&subset[i], &fullset[j])) {
found = 1;
}
}
if (found == 0) {
return (0);
}
found = 0;
}
return (1);
}
/*
* merge subset into fullset taking care not to add duplicates
*/
static void memb_set_merge (
const struct srp_addr *subset, int subset_entries,
struct srp_addr *fullset, int *fullset_entries)
{
int found = 0;
int i;
int j;
for (i = 0; i < subset_entries; i++) {
for (j = 0; j < *fullset_entries; j++) {
if (srp_addr_equal (&fullset[j], &subset[i])) {
found = 1;
break;
}
}
if (found == 0) {
fullset[*fullset_entries] = subset[i];
*fullset_entries = *fullset_entries + 1;
}
found = 0;
}
return;
}
static void memb_set_and_with_ring_id (
struct srp_addr *set1,
struct memb_ring_id *set1_ring_ids,
int set1_entries,
struct srp_addr *set2,
int set2_entries,
struct memb_ring_id *old_ring_id,
struct srp_addr *and,
int *and_entries)
{
int i;
int j;
int found = 0;
*and_entries = 0;
for (i = 0; i < set2_entries; i++) {
for (j = 0; j < set1_entries; j++) {
if (srp_addr_equal (&set1[j], &set2[i])) {
if (memcmp (&set1_ring_ids[j], old_ring_id, sizeof (struct memb_ring_id)) == 0) {
found = 1;
}
break;
}
}
if (found) {
and[*and_entries] = set1[j];
*and_entries = *and_entries + 1;
}
found = 0;
}
return;
}
static void memb_set_log(
struct totemsrp_instance *instance,
int level,
const char *string,
struct srp_addr *list,
int list_entries)
{
char int_buf[32];
char list_str[512];
int i;
memset(list_str, 0, sizeof(list_str));
for (i = 0; i < list_entries; i++) {
if (i == 0) {
snprintf(int_buf, sizeof(int_buf), CS_PRI_NODE_ID, list[i].nodeid);
} else {
snprintf(int_buf, sizeof(int_buf), "," CS_PRI_NODE_ID, list[i].nodeid);
}
if (strlen(list_str) + strlen(int_buf) >= sizeof(list_str)) {
break ;
}
strcat(list_str, int_buf);
}
log_printf(level, "List '%s' contains %d entries: %s", string, list_entries, list_str);
}
static void my_leave_memb_clear(
struct totemsrp_instance *instance)
{
memset(instance->my_leave_memb_list, 0, sizeof(instance->my_leave_memb_list));
instance->my_leave_memb_entries = 0;
}
static unsigned int my_leave_memb_match(
struct totemsrp_instance *instance,
unsigned int nodeid)
{
int i;
unsigned int ret = 0;
for (i = 0; i < instance->my_leave_memb_entries; i++){
if (instance->my_leave_memb_list[i] == nodeid){
ret = nodeid;
break;
}
}
return ret;
}
static void my_leave_memb_set(
struct totemsrp_instance *instance,
unsigned int nodeid)
{
int i, found = 0;
for (i = 0; i < instance->my_leave_memb_entries; i++){
if (instance->my_leave_memb_list[i] == nodeid){
found = 1;
break;
}
}
if (found == 1) {
return;
}
if (instance->my_leave_memb_entries < (PROCESSOR_COUNT_MAX - 1)) {
instance->my_leave_memb_list[instance->my_leave_memb_entries] = nodeid;
instance->my_leave_memb_entries++;
} else {
log_printf (instance->totemsrp_log_level_warning,
"Cannot set LEAVE nodeid=" CS_PRI_NODE_ID, nodeid);
}
}
static void *totemsrp_buffer_alloc (struct totemsrp_instance *instance)
{
assert (instance != NULL);
return totemnet_buffer_alloc (instance->totemnet_context);
}
static void totemsrp_buffer_release (struct totemsrp_instance *instance, void *ptr)
{
assert (instance != NULL);
totemnet_buffer_release (instance->totemnet_context, ptr);
}
static void reset_token_retransmit_timeout (struct totemsrp_instance *instance)
{
int32_t res;
qb_loop_timer_del (instance->totemsrp_poll_handle,
instance->timer_orf_token_retransmit_timeout);
res = qb_loop_timer_add (instance->totemsrp_poll_handle,
QB_LOOP_MED,
instance->totem_config->token_retransmit_timeout*QB_TIME_NS_IN_MSEC,
(void *)instance,
timer_function_token_retransmit_timeout,
&instance->timer_orf_token_retransmit_timeout);
if (res != 0) {
log_printf(instance->totemsrp_log_level_error, "reset_token_retransmit_timeout - qb_loop_timer_add error : %d", res);
}
}
static void start_merge_detect_timeout (struct totemsrp_instance *instance)
{
int32_t res;
if (instance->my_merge_detect_timeout_outstanding == 0) {
res = qb_loop_timer_add (instance->totemsrp_poll_handle,
QB_LOOP_MED,
instance->totem_config->merge_timeout*QB_TIME_NS_IN_MSEC,
(void *)instance,
timer_function_merge_detect_timeout,
&instance->timer_merge_detect_timeout);
if (res != 0) {
log_printf(instance->totemsrp_log_level_error, "start_merge_detect_timeout - qb_loop_timer_add error : %d", res);
}
instance->my_merge_detect_timeout_outstanding = 1;
}
}
static void cancel_merge_detect_timeout (struct totemsrp_instance *instance)
{
qb_loop_timer_del (instance->totemsrp_poll_handle, instance->timer_merge_detect_timeout);
instance->my_merge_detect_timeout_outstanding = 0;
}
/*
* ring_state_* is used to save and restore the sort queue
* state when a recovery operation fails (and enters gather)
*/
static void old_ring_state_save (struct totemsrp_instance *instance)
{
if (instance->old_ring_state_saved == 0) {
instance->old_ring_state_saved = 1;
memcpy (&instance->my_old_ring_id, &instance->my_ring_id,
sizeof (struct memb_ring_id));
instance->old_ring_state_aru = instance->my_aru;
instance->old_ring_state_high_seq_received = instance->my_high_seq_received;
log_printf (instance->totemsrp_log_level_debug,
"Saving state aru %x high seq received %x",
instance->my_aru, instance->my_high_seq_received);
}
}
static void old_ring_state_restore (struct totemsrp_instance *instance)
{
instance->my_aru = instance->old_ring_state_aru;
instance->my_high_seq_received = instance->old_ring_state_high_seq_received;
log_printf (instance->totemsrp_log_level_debug,
"Restoring instance->my_aru %x my high seq received %x",
instance->my_aru, instance->my_high_seq_received);
}
static void old_ring_state_reset (struct totemsrp_instance *instance)
{
log_printf (instance->totemsrp_log_level_debug,
"Resetting old ring state");
instance->old_ring_state_saved = 0;
}
static void reset_pause_timeout (struct totemsrp_instance *instance)
{
int32_t res;
qb_loop_timer_del (instance->totemsrp_poll_handle, instance->timer_pause_timeout);
res = qb_loop_timer_add (instance->totemsrp_poll_handle,
QB_LOOP_MED,
instance->totem_config->token_timeout * QB_TIME_NS_IN_MSEC / 5,
(void *)instance,
timer_function_pause_timeout,
&instance->timer_pause_timeout);
if (res != 0) {
log_printf(instance->totemsrp_log_level_error, "reset_pause_timeout - qb_loop_timer_add error : %d", res);
}
}
static void reset_token_warning (struct totemsrp_instance *instance) {
int32_t res;
qb_loop_timer_del (instance->totemsrp_poll_handle, instance->timer_orf_token_warning);
res = qb_loop_timer_add (instance->totemsrp_poll_handle,
QB_LOOP_MED,
instance->totem_config->token_warning * instance->totem_config->token_timeout / 100 * QB_TIME_NS_IN_MSEC,
(void *)instance,
timer_function_orf_token_warning,
&instance->timer_orf_token_warning);
if (res != 0) {
log_printf(instance->totemsrp_log_level_error, "reset_token_warning - qb_loop_timer_add error : %d", res);
}
}
static void reset_token_timeout (struct totemsrp_instance *instance) {
int32_t res;
qb_loop_timer_del (instance->totemsrp_poll_handle, instance->timer_orf_token_timeout);
res = qb_loop_timer_add (instance->totemsrp_poll_handle,
QB_LOOP_MED,
instance->totem_config->token_timeout*QB_TIME_NS_IN_MSEC,
(void *)instance,
timer_function_orf_token_timeout,
&instance->timer_orf_token_timeout);
if (res != 0) {
log_printf(instance->totemsrp_log_level_error, "reset_token_timeout - qb_loop_timer_add error : %d", res);
}
if (instance->totem_config->token_warning)
reset_token_warning(instance);
}
static void reset_heartbeat_timeout (struct totemsrp_instance *instance) {
int32_t res;
qb_loop_timer_del (instance->totemsrp_poll_handle, instance->timer_heartbeat_timeout);
res = qb_loop_timer_add (instance->totemsrp_poll_handle,
QB_LOOP_MED,
instance->heartbeat_timeout*QB_TIME_NS_IN_MSEC,
(void *)instance,
timer_function_heartbeat_timeout,
&instance->timer_heartbeat_timeout);
if (res != 0) {
log_printf(instance->totemsrp_log_level_error, "reset_heartbeat_timeout - qb_loop_timer_add error : %d", res);
}
}
static void cancel_token_warning (struct totemsrp_instance *instance) {
qb_loop_timer_del (instance->totemsrp_poll_handle, instance->timer_orf_token_warning);
}
static void cancel_token_timeout (struct totemsrp_instance *instance) {
qb_loop_timer_del (instance->totemsrp_poll_handle, instance->timer_orf_token_timeout);
if (instance->totem_config->token_warning)
cancel_token_warning(instance);
}
static void cancel_heartbeat_timeout (struct totemsrp_instance *instance) {
qb_loop_timer_del (instance->totemsrp_poll_handle, instance->timer_heartbeat_timeout);
}
static void cancel_token_retransmit_timeout (struct totemsrp_instance *instance)
{
qb_loop_timer_del (instance->totemsrp_poll_handle, instance->timer_orf_token_retransmit_timeout);
}
static void start_token_hold_retransmit_timeout (struct totemsrp_instance *instance)
{
int32_t res;
res = qb_loop_timer_add (instance->totemsrp_poll_handle,
QB_LOOP_MED,
instance->totem_config->token_hold_timeout*QB_TIME_NS_IN_MSEC,
(void *)instance,
timer_function_token_hold_retransmit_timeout,
&instance->timer_orf_token_hold_retransmit_timeout);
if (res != 0) {
log_printf(instance->totemsrp_log_level_error, "start_token_hold_retransmit_timeout - qb_loop_timer_add error : %d", res);
}
}
static void cancel_token_hold_retransmit_timeout (struct totemsrp_instance *instance)
{
qb_loop_timer_del (instance->totemsrp_poll_handle,
instance->timer_orf_token_hold_retransmit_timeout);
}
static void memb_state_consensus_timeout_expired (
struct totemsrp_instance *instance)
{
struct srp_addr no_consensus_list[PROCESSOR_COUNT_MAX];
int no_consensus_list_entries;
instance->stats.consensus_timeouts++;
if (memb_consensus_agreed (instance)) {
memb_consensus_reset (instance);
memb_consensus_set (instance, &instance->my_id);
reset_token_timeout (instance); // REVIEWED
} else {
memb_consensus_notset (
instance,
no_consensus_list,
&no_consensus_list_entries,
instance->my_proc_list,
instance->my_proc_list_entries);
memb_set_merge (no_consensus_list, no_consensus_list_entries,
instance->my_failed_list, &instance->my_failed_list_entries);
memb_state_gather_enter (instance, TOTEMSRP_GSFROM_CONSENSUS_TIMEOUT);
}
}
static void memb_join_message_send (struct totemsrp_instance *instance);
static void memb_merge_detect_transmit (struct totemsrp_instance *instance);
/*
* Timers used for various states of the membership algorithm
*/
static void timer_function_pause_timeout (void *data)
{
struct totemsrp_instance *instance = data;
instance->pause_timestamp = qb_util_nano_current_get ();
reset_pause_timeout (instance);
}
static void memb_recovery_state_token_loss (struct totemsrp_instance *instance)
{
old_ring_state_restore (instance);
memb_state_gather_enter (instance, TOTEMSRP_GSFROM_THE_TOKEN_WAS_LOST_IN_THE_RECOVERY_STATE);
instance->stats.recovery_token_lost++;
}
static void timer_function_orf_token_warning (void *data)
{
struct totemsrp_instance *instance = data;
uint64_t tv_diff;
/* need to protect against the case where token_warning is set to 0 dynamically */
if (instance->totem_config->token_warning) {
tv_diff = qb_util_nano_current_get () / QB_TIME_NS_IN_MSEC -
instance->stats.token[instance->stats.latest_token].rx;
log_printf (instance->totemsrp_log_level_notice,
"Token has not been received in %d ms ", (unsigned int) tv_diff);
reset_token_warning(instance);
} else {
cancel_token_warning(instance);
}
}
static void timer_function_orf_token_timeout (void *data)
{
struct totemsrp_instance *instance = data;
switch (instance->memb_state) {
case MEMB_STATE_OPERATIONAL:
log_printf (instance->totemsrp_log_level_debug,
"The token was lost in the OPERATIONAL state.");
log_printf (instance->totemsrp_log_level_notice,
"A processor failed, forming new configuration:"
" token timed out (%ums), waiting %ums for consensus.",
instance->totem_config->token_timeout,
instance->totem_config->consensus_timeout);
totemnet_iface_check (instance->totemnet_context);
memb_state_gather_enter (instance, TOTEMSRP_GSFROM_THE_TOKEN_WAS_LOST_IN_THE_OPERATIONAL_STATE);
instance->stats.operational_token_lost++;
break;
case MEMB_STATE_GATHER:
log_printf (instance->totemsrp_log_level_debug,
"The consensus timeout expired (%ums).",
instance->totem_config->consensus_timeout);
memb_state_consensus_timeout_expired (instance);
memb_state_gather_enter (instance, TOTEMSRP_GSFROM_THE_CONSENSUS_TIMEOUT_EXPIRED);
instance->stats.gather_token_lost++;
break;
case MEMB_STATE_COMMIT:
log_printf (instance->totemsrp_log_level_debug,
"The token was lost in the COMMIT state.");
memb_state_gather_enter (instance, TOTEMSRP_GSFROM_THE_TOKEN_WAS_LOST_IN_THE_COMMIT_STATE);
instance->stats.commit_token_lost++;
break;
case MEMB_STATE_RECOVERY:
log_printf (instance->totemsrp_log_level_debug,
"The token was lost in the RECOVERY state.");
memb_recovery_state_token_loss (instance);
instance->orf_token_discard = 1;
break;
}
}
static void timer_function_heartbeat_timeout (void *data)
{
struct totemsrp_instance *instance = data;
log_printf (instance->totemsrp_log_level_debug,
"HeartBeat Timer expired Invoking token loss mechanism in state %d ", instance->memb_state);
timer_function_orf_token_timeout(data);
}
static void memb_timer_function_state_gather (void *data)
{
struct totemsrp_instance *instance = data;
int32_t res;
switch (instance->memb_state) {
case MEMB_STATE_OPERATIONAL:
case MEMB_STATE_RECOVERY:
assert (0); /* this should never happen */
break;
case MEMB_STATE_GATHER:
case MEMB_STATE_COMMIT:
memb_join_message_send (instance);
/*
* Restart the join timeout
`*/
qb_loop_timer_del (instance->totemsrp_poll_handle, instance->memb_timer_state_gather_join_timeout);
res = qb_loop_timer_add (instance->totemsrp_poll_handle,
QB_LOOP_MED,
instance->totem_config->join_timeout*QB_TIME_NS_IN_MSEC,
(void *)instance,
memb_timer_function_state_gather,
&instance->memb_timer_state_gather_join_timeout);
if (res != 0) {
log_printf(instance->totemsrp_log_level_error, "memb_timer_function_state_gather - qb_loop_timer_add error : %d", res);
}
break;
}
}
static void memb_timer_function_gather_consensus_timeout (void *data)
{
struct totemsrp_instance *instance = data;
memb_state_consensus_timeout_expired (instance);
}
static void deliver_messages_from_recovery_to_regular (struct totemsrp_instance *instance)
{
unsigned int i;
struct sort_queue_item *recovery_message_item;
struct sort_queue_item regular_message_item;
unsigned int range = 0;
int res;
void *ptr;
struct mcast *mcast;
log_printf (instance->totemsrp_log_level_debug,
"recovery to regular %x-%x", SEQNO_START_MSG + 1, instance->my_aru);
range = instance->my_aru - SEQNO_START_MSG;
/*
* Move messages from recovery to regular sort queue
*/
// todo should i be initialized to 0 or 1 ?
for (i = 1; i <= range; i++) {
res = sq_item_get (&instance->recovery_sort_queue,
i + SEQNO_START_MSG, &ptr);
if (res != 0) {
continue;
}
recovery_message_item = ptr;
/*
* Convert recovery message into regular message
*/
mcast = recovery_message_item->mcast;
if (mcast->header.encapsulated == MESSAGE_ENCAPSULATED) {
/*
* Message is a recovery message encapsulated
* in a new ring message
*/
regular_message_item.mcast =
(struct mcast *)(((char *)recovery_message_item->mcast) + sizeof (struct mcast));
regular_message_item.msg_len =
recovery_message_item->msg_len - sizeof (struct mcast);
mcast = regular_message_item.mcast;
} else {
/*
* TODO this case shouldn't happen
*/
continue;
}
log_printf (instance->totemsrp_log_level_debug,
"comparing if ring id is for this processors old ring seqno " CS_PRI_RING_ID_SEQ,
(uint64_t)mcast->seq);
/*
* Only add this message to the regular sort
* queue if it was originated with the same ring
* id as the previous ring
*/
if (memcmp (&instance->my_old_ring_id, &mcast->ring_id,
sizeof (struct memb_ring_id)) == 0) {
res = sq_item_inuse (&instance->regular_sort_queue, mcast->seq);
if (res == 0) {
sq_item_add (&instance->regular_sort_queue,
&regular_message_item, mcast->seq);
if (sq_lt_compare (instance->old_ring_state_high_seq_received, mcast->seq)) {
instance->old_ring_state_high_seq_received = mcast->seq;
}
}
} else {
log_printf (instance->totemsrp_log_level_debug,
"-not adding msg with seq no " CS_PRI_RING_ID_SEQ, (uint64_t)mcast->seq);
}
}
}
/*
* Change states in the state machine of the membership algorithm
*/
static void memb_state_operational_enter (struct totemsrp_instance *instance)
{
struct srp_addr joined_list[PROCESSOR_COUNT_MAX];
int joined_list_entries = 0;
unsigned int aru_save;
unsigned int joined_list_totemip[PROCESSOR_COUNT_MAX];
unsigned int trans_memb_list_totemip[PROCESSOR_COUNT_MAX];
unsigned int new_memb_list_totemip[PROCESSOR_COUNT_MAX];
unsigned int left_list[PROCESSOR_COUNT_MAX];
unsigned int i;
unsigned int res;
char left_node_msg[1024];
char joined_node_msg[1024];
char failed_node_msg[1024];
instance->originated_orf_token = 0;
memb_consensus_reset (instance);
old_ring_state_reset (instance);
deliver_messages_from_recovery_to_regular (instance);
log_printf (instance->totemsrp_log_level_trace,
"Delivering to app %x to %x",
instance->my_high_delivered + 1, instance->old_ring_state_high_seq_received);
aru_save = instance->my_aru;
instance->my_aru = instance->old_ring_state_aru;
messages_deliver_to_app (instance, 0, instance->old_ring_state_high_seq_received);
/*
* Calculate joined and left list
*/
memb_set_subtract (instance->my_left_memb_list,
&instance->my_left_memb_entries,
instance->my_memb_list, instance->my_memb_entries,
instance->my_trans_memb_list, instance->my_trans_memb_entries);
memb_set_subtract (joined_list, &joined_list_entries,
instance->my_new_memb_list, instance->my_new_memb_entries,
instance->my_trans_memb_list, instance->my_trans_memb_entries);
/*
* Install new membership
*/
instance->my_memb_entries = instance->my_new_memb_entries;
memcpy (&instance->my_memb_list, instance->my_new_memb_list,
sizeof (struct srp_addr) * instance->my_memb_entries);
instance->last_released = 0;
instance->my_set_retrans_flg = 0;
/*
* Deliver transitional configuration to application
*/
srp_addr_to_nodeid (instance, left_list, instance->my_left_memb_list,
instance->my_left_memb_entries);
srp_addr_to_nodeid (instance, trans_memb_list_totemip,
instance->my_trans_memb_list, instance->my_trans_memb_entries);
instance->totemsrp_confchg_fn (TOTEM_CONFIGURATION_TRANSITIONAL,
trans_memb_list_totemip, instance->my_trans_memb_entries,
left_list, instance->my_left_memb_entries,
0, 0, &instance->my_ring_id);
instance->waiting_trans_ack = 1;
instance->totemsrp_waiting_trans_ack_cb_fn (1);
// TODO we need to filter to ensure we only deliver those
// messages which are part of instance->my_deliver_memb
messages_deliver_to_app (instance, 1, instance->old_ring_state_high_seq_received);
instance->my_aru = aru_save;
/*
* Deliver regular configuration to application
*/
srp_addr_to_nodeid (instance, new_memb_list_totemip,
instance->my_new_memb_list, instance->my_new_memb_entries);
srp_addr_to_nodeid (instance, joined_list_totemip, joined_list,
joined_list_entries);
instance->totemsrp_confchg_fn (TOTEM_CONFIGURATION_REGULAR,
new_memb_list_totemip, instance->my_new_memb_entries,
0, 0,
joined_list_totemip, joined_list_entries, &instance->my_ring_id);
/*
* The recovery sort queue now becomes the regular
* sort queue. It is necessary to copy the state
* into the regular sort queue.
*/
sq_copy (&instance->regular_sort_queue, &instance->recovery_sort_queue);
instance->my_last_aru = SEQNO_START_MSG;
/* When making my_proc_list smaller, ensure that the
* now non-used entries are zero-ed out. There are some suspect
* assert's that assume that there is always 2 entries in the list.
* These fail when my_proc_list is reduced to 1 entry (and the
* valid [0] entry is the same as the 'unused' [1] entry).
*/
memset(instance->my_proc_list, 0,
sizeof (struct srp_addr) * instance->my_proc_list_entries);
instance->my_proc_list_entries = instance->my_new_memb_entries;
memcpy (instance->my_proc_list, instance->my_new_memb_list,
sizeof (struct srp_addr) * instance->my_memb_entries);
instance->my_failed_list_entries = 0;
/*
* TODO Not exactly to spec
*
* At the entry to this function all messages without a gap are
* deliered.
*
* This code throw away messages from the last gap in the sort queue
* to my_high_seq_received
*
* What should really happen is we should deliver all messages up to
* a gap, then delier the transitional configuration, then deliver
* the messages between the first gap and my_high_seq_received, then
* deliver a regular configuration, then deliver the regular
* configuration
*
* Unfortunately totempg doesn't appear to like this operating mode
* which needs more inspection
*/
i = instance->my_high_seq_received + 1;
do {
void *ptr;
i -= 1;
res = sq_item_get (&instance->regular_sort_queue, i, &ptr);
if (i == 0) {
break;
}
} while (res);
instance->my_high_delivered = i;
for (i = 0; i <= instance->my_high_delivered; i++) {
void *ptr;
res = sq_item_get (&instance->regular_sort_queue, i, &ptr);
if (res == 0) {
struct sort_queue_item *regular_message;
regular_message = ptr;
free (regular_message->mcast);
}
}
sq_items_release (&instance->regular_sort_queue, instance->my_high_delivered);
instance->last_released = instance->my_high_delivered;
if (joined_list_entries) {
int sptr = 0;
sptr += snprintf(joined_node_msg, sizeof(joined_node_msg)-sptr, " joined:");
for (i=0; i< joined_list_entries; i++) {
sptr += snprintf(joined_node_msg+sptr, sizeof(joined_node_msg)-sptr, " " CS_PRI_NODE_ID, joined_list_totemip[i]);
}
}
else {
joined_node_msg[0] = '\0';
}
if (instance->my_left_memb_entries) {
int sptr = 0;
int sptr2 = 0;
sptr += snprintf(left_node_msg, sizeof(left_node_msg)-sptr, " left:");
for (i=0; i< instance->my_left_memb_entries; i++) {
sptr += snprintf(left_node_msg+sptr, sizeof(left_node_msg)-sptr, " " CS_PRI_NODE_ID, left_list[i]);
}
for (i=0; i< instance->my_left_memb_entries; i++) {
if (my_leave_memb_match(instance, left_list[i]) == 0) {
if (sptr2 == 0) {
sptr2 += snprintf(failed_node_msg, sizeof(failed_node_msg)-sptr2, " failed:");
}
sptr2 += snprintf(failed_node_msg+sptr2, sizeof(left_node_msg)-sptr2, " " CS_PRI_NODE_ID, left_list[i]);
}
}
if (sptr2 == 0) {
failed_node_msg[0] = '\0';
}
}
else {
left_node_msg[0] = '\0';
failed_node_msg[0] = '\0';
}
my_leave_memb_clear(instance);
log_printf (instance->totemsrp_log_level_debug,
"entering OPERATIONAL state.");
log_printf (instance->totemsrp_log_level_notice,
"A new membership (" CS_PRI_RING_ID ") was formed. Members%s%s",
instance->my_ring_id.rep,
(uint64_t)instance->my_ring_id.seq,
joined_node_msg,
left_node_msg);
if (strlen(failed_node_msg)) {
log_printf (instance->totemsrp_log_level_notice,
"Failed to receive the leave message.%s",
failed_node_msg);
}
instance->memb_state = MEMB_STATE_OPERATIONAL;
instance->stats.operational_entered++;
instance->stats.continuous_gather = 0;
instance->my_received_flg = 1;
reset_pause_timeout (instance);
/*
* Save ring id information from this configuration to determine
* which processors are transitioning from old regular configuration
* in to new regular configuration on the next configuration change
*/
memcpy (&instance->my_old_ring_id, &instance->my_ring_id,
sizeof (struct memb_ring_id));
return;
}
static void memb_state_gather_enter (
struct totemsrp_instance *instance,
enum gather_state_from gather_from)
{
int32_t res;
instance->orf_token_discard = 1;
instance->originated_orf_token = 0;
memb_set_merge (
&instance->my_id, 1,
instance->my_proc_list, &instance->my_proc_list_entries);
memb_join_message_send (instance);
/*
* Restart the join timeout
*/
qb_loop_timer_del (instance->totemsrp_poll_handle, instance->memb_timer_state_gather_join_timeout);
res = qb_loop_timer_add (instance->totemsrp_poll_handle,
QB_LOOP_MED,
instance->totem_config->join_timeout*QB_TIME_NS_IN_MSEC,
(void *)instance,
memb_timer_function_state_gather,
&instance->memb_timer_state_gather_join_timeout);
if (res != 0) {
log_printf(instance->totemsrp_log_level_error, "memb_state_gather_enter - qb_loop_timer_add error(1) : %d", res);
}
/*
* Restart the consensus timeout
*/
qb_loop_timer_del (instance->totemsrp_poll_handle,
instance->memb_timer_state_gather_consensus_timeout);
res = qb_loop_timer_add (instance->totemsrp_poll_handle,
QB_LOOP_MED,
instance->totem_config->consensus_timeout*QB_TIME_NS_IN_MSEC,
(void *)instance,
memb_timer_function_gather_consensus_timeout,
&instance->memb_timer_state_gather_consensus_timeout);
if (res != 0) {
log_printf(instance->totemsrp_log_level_error, "memb_state_gather_enter - qb_loop_timer_add error(2) : %d", res);
}
/*
* Cancel the token loss and token retransmission timeouts
*/
cancel_token_retransmit_timeout (instance); // REVIEWED
cancel_token_timeout (instance); // REVIEWED
cancel_merge_detect_timeout (instance);
memb_consensus_reset (instance);
memb_consensus_set (instance, &instance->my_id);
log_printf (instance->totemsrp_log_level_debug,
"entering GATHER state from %d(%s).",
gather_from, gsfrom_to_msg(gather_from));
instance->memb_state = MEMB_STATE_GATHER;
instance->stats.gather_entered++;
if (gather_from == TOTEMSRP_GSFROM_THE_CONSENSUS_TIMEOUT_EXPIRED) {
/*
* State 3 means gather, so we are continuously gathering.
*/
instance->stats.continuous_gather++;
}
return;
}
static void timer_function_token_retransmit_timeout (void *data);
static void target_set_completed (
void *context)
{
struct totemsrp_instance *instance = (struct totemsrp_instance *)context;
memb_state_commit_token_send (instance);
}
static void memb_state_commit_enter (
struct totemsrp_instance *instance)
{
old_ring_state_save (instance);
memb_state_commit_token_update (instance);
memb_state_commit_token_target_set (instance);
qb_loop_timer_del (instance->totemsrp_poll_handle, instance->memb_timer_state_gather_join_timeout);
instance->memb_timer_state_gather_join_timeout = 0;
qb_loop_timer_del (instance->totemsrp_poll_handle, instance->memb_timer_state_gather_consensus_timeout);
instance->memb_timer_state_gather_consensus_timeout = 0;
memb_ring_id_set (instance, &instance->commit_token->ring_id);
instance->memb_ring_id_store (&instance->my_ring_id, instance->my_id.nodeid);
instance->token_ring_id_seq = instance->my_ring_id.seq;
log_printf (instance->totemsrp_log_level_debug,
"entering COMMIT state.");
instance->memb_state = MEMB_STATE_COMMIT;
reset_token_retransmit_timeout (instance); // REVIEWED
reset_token_timeout (instance); // REVIEWED
instance->stats.commit_entered++;
instance->stats.continuous_gather = 0;
/*
* reset all flow control variables since we are starting a new ring
*/
instance->my_trc = 0;
instance->my_pbl = 0;
instance->my_cbl = 0;
/*
* commit token sent after callback that token target has been set
*/
}
static void memb_state_recovery_enter (
struct totemsrp_instance *instance,
struct memb_commit_token *commit_token)
{
int i;
int local_received_flg = 1;
unsigned int low_ring_aru;
unsigned int range = 0;
unsigned int messages_originated = 0;
const struct srp_addr *addr;
struct memb_commit_token_memb_entry *memb_list;
struct memb_ring_id my_new_memb_ring_id_list[PROCESSOR_COUNT_MAX];
addr = (const struct srp_addr *)commit_token->end_of_commit_token;
memb_list = (struct memb_commit_token_memb_entry *)(addr + commit_token->addr_entries);
log_printf (instance->totemsrp_log_level_debug,
"entering RECOVERY state.");
instance->orf_token_discard = 0;
instance->my_high_ring_delivered = 0;
sq_reinit (&instance->recovery_sort_queue, SEQNO_START_MSG);
cs_queue_reinit (&instance->retrans_message_queue);
low_ring_aru = instance->old_ring_state_high_seq_received;
memb_state_commit_token_send_recovery (instance, commit_token);
instance->my_token_seq = SEQNO_START_TOKEN - 1;
/*
* Build regular configuration
*/
totemnet_processor_count_set (
instance->totemnet_context,
commit_token->addr_entries);
/*
* Build transitional configuration
*/
for (i = 0; i < instance->my_new_memb_entries; i++) {
memcpy (&my_new_memb_ring_id_list[i],
&memb_list[i].ring_id,
sizeof (struct memb_ring_id));
}
memb_set_and_with_ring_id (
instance->my_new_memb_list,
my_new_memb_ring_id_list,
instance->my_new_memb_entries,
instance->my_memb_list,
instance->my_memb_entries,
&instance->my_old_ring_id,
instance->my_trans_memb_list,
&instance->my_trans_memb_entries);
for (i = 0; i < instance->my_trans_memb_entries; i++) {
log_printf (instance->totemsrp_log_level_debug,
"TRANS [%d] member " CS_PRI_NODE_ID ":", i, instance->my_trans_memb_list[i].nodeid);
}
for (i = 0; i < instance->my_new_memb_entries; i++) {
log_printf (instance->totemsrp_log_level_debug,
"position [%d] member " CS_PRI_NODE_ID ":", i, addr[i].nodeid);
log_printf (instance->totemsrp_log_level_debug,
"previous ringid (" CS_PRI_RING_ID ")",
memb_list[i].ring_id.rep, (uint64_t)memb_list[i].ring_id.seq);
log_printf (instance->totemsrp_log_level_debug,
"aru %x high delivered %x received flag %d",
memb_list[i].aru,
memb_list[i].high_delivered,
memb_list[i].received_flg);
// assert (totemip_print (&memb_list[i].ring_id.rep) != 0);
}
/*
* Determine if any received flag is false
*/
for (i = 0; i < commit_token->addr_entries; i++) {
if (memb_set_subset (&instance->my_new_memb_list[i], 1,
instance->my_trans_memb_list, instance->my_trans_memb_entries) &&
memb_list[i].received_flg == 0) {
instance->my_deliver_memb_entries = instance->my_trans_memb_entries;
memcpy (instance->my_deliver_memb_list, instance->my_trans_memb_list,
sizeof (struct srp_addr) * instance->my_trans_memb_entries);
local_received_flg = 0;
break;
}
}
if (local_received_flg == 1) {
goto no_originate;
} /* Else originate messages if we should */
/*
* Calculate my_low_ring_aru, instance->my_high_ring_delivered for the transitional membership
*/
for (i = 0; i < commit_token->addr_entries; i++) {
if (memb_set_subset (&instance->my_new_memb_list[i], 1,
instance->my_deliver_memb_list,
instance->my_deliver_memb_entries) &&
memcmp (&instance->my_old_ring_id,
&memb_list[i].ring_id,
sizeof (struct memb_ring_id)) == 0) {
if (sq_lt_compare (memb_list[i].aru, low_ring_aru)) {
low_ring_aru = memb_list[i].aru;
}
if (sq_lt_compare (instance->my_high_ring_delivered, memb_list[i].high_delivered)) {
instance->my_high_ring_delivered = memb_list[i].high_delivered;
}
}
}
/*
* Copy all old ring messages to instance->retrans_message_queue
*/
range = instance->old_ring_state_high_seq_received - low_ring_aru;
if (range == 0) {
/*
* No messages to copy
*/
goto no_originate;
}
assert (range < QUEUE_RTR_ITEMS_SIZE_MAX);
log_printf (instance->totemsrp_log_level_debug,
"copying all old ring messages from %x-%x.",
low_ring_aru + 1, instance->old_ring_state_high_seq_received);
for (i = 1; i <= range; i++) {
struct sort_queue_item *sort_queue_item;
struct message_item message_item;
void *ptr;
int res;
res = sq_item_get (&instance->regular_sort_queue,
low_ring_aru + i, &ptr);
if (res != 0) {
continue;
}
sort_queue_item = ptr;
messages_originated++;
memset (&message_item, 0, sizeof (struct message_item));
// TODO LEAK
message_item.mcast = totemsrp_buffer_alloc (instance);
assert (message_item.mcast);
memset(message_item.mcast, 0, sizeof (struct mcast));
message_item.mcast->header.magic = TOTEM_MH_MAGIC;
message_item.mcast->header.version = TOTEM_MH_VERSION;
message_item.mcast->header.type = MESSAGE_TYPE_MCAST;
message_item.mcast->system_from = instance->my_id;
message_item.mcast->header.encapsulated = MESSAGE_ENCAPSULATED;
message_item.mcast->header.nodeid = instance->my_id.nodeid;
assert (message_item.mcast->header.nodeid);
memcpy (&message_item.mcast->ring_id, &instance->my_ring_id,
sizeof (struct memb_ring_id));
message_item.msg_len = sort_queue_item->msg_len + sizeof (struct mcast);
memcpy (((char *)message_item.mcast) + sizeof (struct mcast),
sort_queue_item->mcast,
sort_queue_item->msg_len);
cs_queue_item_add (&instance->retrans_message_queue, &message_item);
}
log_printf (instance->totemsrp_log_level_debug,
"Originated %d messages in RECOVERY.", messages_originated);
goto originated;
no_originate:
log_printf (instance->totemsrp_log_level_debug,
"Did not need to originate any messages in recovery.");
originated:
instance->my_aru = SEQNO_START_MSG;
instance->my_aru_count = 0;
instance->my_seq_unchanged = 0;
instance->my_high_seq_received = SEQNO_START_MSG;
instance->my_install_seq = SEQNO_START_MSG;
instance->last_released = SEQNO_START_MSG;
reset_token_timeout (instance); // REVIEWED
reset_token_retransmit_timeout (instance); // REVIEWED
instance->memb_state = MEMB_STATE_RECOVERY;
instance->stats.recovery_entered++;
instance->stats.continuous_gather = 0;
return;
}
void totemsrp_event_signal (void *srp_context, enum totem_event_type type, int value)
{
struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context;
token_hold_cancel_send (instance);
return;
}
int totemsrp_mcast (
void *srp_context,
struct iovec *iovec,
unsigned int iov_len,
int guarantee)
{
struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context;
int i;
struct message_item message_item;
char *addr;
unsigned int addr_idx;
struct cs_queue *queue_use;
if (instance->waiting_trans_ack) {
queue_use = &instance->new_message_queue_trans;
} else {
queue_use = &instance->new_message_queue;
}
if (cs_queue_is_full (queue_use)) {
log_printf (instance->totemsrp_log_level_debug, "queue full");
return (-1);
}
memset (&message_item, 0, sizeof (struct message_item));
/*
* Allocate pending item
*/
message_item.mcast = totemsrp_buffer_alloc (instance);
if (message_item.mcast == 0) {
goto error_mcast;
}
/*
* Set mcast header
*/
memset(message_item.mcast, 0, sizeof (struct mcast));
message_item.mcast->header.magic = TOTEM_MH_MAGIC;
message_item.mcast->header.version = TOTEM_MH_VERSION;
message_item.mcast->header.type = MESSAGE_TYPE_MCAST;
message_item.mcast->header.encapsulated = MESSAGE_NOT_ENCAPSULATED;
message_item.mcast->header.nodeid = instance->my_id.nodeid;
assert (message_item.mcast->header.nodeid);
message_item.mcast->guarantee = guarantee;
message_item.mcast->system_from = instance->my_id;
addr = (char *)message_item.mcast;
addr_idx = sizeof (struct mcast);
for (i = 0; i < iov_len; i++) {
memcpy (&addr[addr_idx], iovec[i].iov_base, iovec[i].iov_len);
addr_idx += iovec[i].iov_len;
}
message_item.msg_len = addr_idx;
log_printf (instance->totemsrp_log_level_trace, "mcasted message added to pending queue");
instance->stats.mcast_tx++;
cs_queue_item_add (queue_use, &message_item);
return (0);
error_mcast:
return (-1);
}
/*
* Determine if there is room to queue a new message
*/
int totemsrp_avail (void *srp_context)
{
struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context;
int avail;
struct cs_queue *queue_use;
if (instance->waiting_trans_ack) {
queue_use = &instance->new_message_queue_trans;
} else {
queue_use = &instance->new_message_queue;
}
cs_queue_avail (queue_use, &avail);
return (avail);
}
/*
* ORF Token Management
*/
/*
* Recast message to mcast group if it is available
*/
static int orf_token_remcast (
struct totemsrp_instance *instance,
int seq)
{
struct sort_queue_item *sort_queue_item;
int res;
void *ptr;
struct sq *sort_queue;
if (instance->memb_state == MEMB_STATE_RECOVERY) {
sort_queue = &instance->recovery_sort_queue;
} else {
sort_queue = &instance->regular_sort_queue;
}
res = sq_in_range (sort_queue, seq);
if (res == 0) {
log_printf (instance->totemsrp_log_level_debug, "sq not in range");
return (-1);
}
/*
* Get RTR item at seq, if not available, return
*/
res = sq_item_get (sort_queue, seq, &ptr);
if (res != 0) {
return -1;
}
sort_queue_item = ptr;
totemnet_mcast_noflush_send (
instance->totemnet_context,
sort_queue_item->mcast,
sort_queue_item->msg_len);
return (0);
}
/*
* Free all freeable messages from ring
*/
static void messages_free (
struct totemsrp_instance *instance,
unsigned int token_aru)
{
struct sort_queue_item *regular_message;
unsigned int i;
int res;
int log_release = 0;
unsigned int release_to;
unsigned int range = 0;
release_to = token_aru;
if (sq_lt_compare (instance->my_last_aru, release_to)) {
release_to = instance->my_last_aru;
}
if (sq_lt_compare (instance->my_high_delivered, release_to)) {
release_to = instance->my_high_delivered;
}
/*
* Ensure we dont try release before an already released point
*/
if (sq_lt_compare (release_to, instance->last_released)) {
return;
}
range = release_to - instance->last_released;
assert (range < QUEUE_RTR_ITEMS_SIZE_MAX);
/*
* Release retransmit list items if group aru indicates they are transmitted
*/
for (i = 1; i <= range; i++) {
void *ptr;
res = sq_item_get (&instance->regular_sort_queue,
instance->last_released + i, &ptr);
if (res == 0) {
regular_message = ptr;
totemsrp_buffer_release (instance, regular_message->mcast);
}
sq_items_release (&instance->regular_sort_queue,
instance->last_released + i);
log_release = 1;
}
instance->last_released += range;
if (log_release) {
log_printf (instance->totemsrp_log_level_trace,
"releasing messages up to and including %x", release_to);
}
}
static void update_aru (
struct totemsrp_instance *instance)
{
unsigned int i;
int res;
struct sq *sort_queue;
unsigned int range;
unsigned int my_aru_saved = 0;
if (instance->memb_state == MEMB_STATE_RECOVERY) {
sort_queue = &instance->recovery_sort_queue;
} else {
sort_queue = &instance->regular_sort_queue;
}
range = instance->my_high_seq_received - instance->my_aru;
my_aru_saved = instance->my_aru;
for (i = 1; i <= range; i++) {
void *ptr;
res = sq_item_get (sort_queue, my_aru_saved + i, &ptr);
/*
* If hole, stop updating aru
*/
if (res != 0) {
break;
}
}
instance->my_aru += i - 1;
}
/*
* Multicasts pending messages onto the ring (requires orf_token possession)
*/
static int orf_token_mcast (
struct totemsrp_instance *instance,
struct orf_token *token,
int fcc_mcasts_allowed)
{
struct message_item *message_item = 0;
struct cs_queue *mcast_queue;
struct sq *sort_queue;
struct sort_queue_item sort_queue_item;
struct mcast *mcast;
unsigned int fcc_mcast_current;
if (instance->memb_state == MEMB_STATE_RECOVERY) {
mcast_queue = &instance->retrans_message_queue;
sort_queue = &instance->recovery_sort_queue;
reset_token_retransmit_timeout (instance); // REVIEWED
} else {
if (instance->waiting_trans_ack) {
mcast_queue = &instance->new_message_queue_trans;
} else {
mcast_queue = &instance->new_message_queue;
}
sort_queue = &instance->regular_sort_queue;
}
for (fcc_mcast_current = 0; fcc_mcast_current < fcc_mcasts_allowed; fcc_mcast_current++) {
if (cs_queue_is_empty (mcast_queue)) {
break;
}
message_item = (struct message_item *)cs_queue_item_get (mcast_queue);
message_item->mcast->seq = ++token->seq;
message_item->mcast->this_seqno = instance->global_seqno++;
/*
* Build IO vector
*/
memset (&sort_queue_item, 0, sizeof (struct sort_queue_item));
sort_queue_item.mcast = message_item->mcast;
sort_queue_item.msg_len = message_item->msg_len;
mcast = sort_queue_item.mcast;
memcpy (&mcast->ring_id, &instance->my_ring_id, sizeof (struct memb_ring_id));
/*
* Add message to retransmit queue
*/
sq_item_add (sort_queue, &sort_queue_item, message_item->mcast->seq);
totemnet_mcast_noflush_send (
instance->totemnet_context,
message_item->mcast,
message_item->msg_len);
/*
* Delete item from pending queue
*/
cs_queue_item_remove (mcast_queue);
/*
* If messages mcasted, deliver any new messages to totempg
*/
instance->my_high_seq_received = token->seq;
}
update_aru (instance);
/*
* Return 1 if more messages are available for single node clusters
*/
return (fcc_mcast_current);
}
/*
* Remulticasts messages in orf_token's retransmit list (requires orf_token)
* Modify's orf_token's rtr to include retransmits required by this process
*/
static int orf_token_rtr (
struct totemsrp_instance *instance,
struct orf_token *orf_token,
unsigned int *fcc_allowed)
{
unsigned int res;
unsigned int i, j;
unsigned int found;
struct sq *sort_queue;
struct rtr_item *rtr_list;
unsigned int range = 0;
char retransmit_msg[1024];
char value[64];
if (instance->memb_state == MEMB_STATE_RECOVERY) {
sort_queue = &instance->recovery_sort_queue;
} else {
sort_queue = &instance->regular_sort_queue;
}
rtr_list = &orf_token->rtr_list[0];
strcpy (retransmit_msg, "Retransmit List: ");
if (orf_token->rtr_list_entries) {
log_printf (instance->totemsrp_log_level_debug,
"Retransmit List %d", orf_token->rtr_list_entries);
for (i = 0; i < orf_token->rtr_list_entries; i++) {
sprintf (value, "%x ", rtr_list[i].seq);
strcat (retransmit_msg, value);
}
strcat (retransmit_msg, "");
log_printf (instance->totemsrp_log_level_notice,
"%s", retransmit_msg);
}
/*
* Retransmit messages on orf_token's RTR list from RTR queue
*/
for (instance->fcc_remcast_current = 0, i = 0;
instance->fcc_remcast_current < *fcc_allowed && i < orf_token->rtr_list_entries;) {
/*
* If this retransmit request isn't from this configuration,
* try next rtr entry
*/
if (memcmp (&rtr_list[i].ring_id, &instance->my_ring_id,
sizeof (struct memb_ring_id)) != 0) {
i += 1;
continue;
}
res = orf_token_remcast (instance, rtr_list[i].seq);
if (res == 0) {
/*
* Multicasted message, so no need to copy to new retransmit list
*/
orf_token->rtr_list_entries -= 1;
assert (orf_token->rtr_list_entries >= 0);
memmove (&rtr_list[i], &rtr_list[i + 1],
sizeof (struct rtr_item) * (orf_token->rtr_list_entries - i));
instance->stats.mcast_retx++;
instance->fcc_remcast_current++;
} else {
i += 1;
}
}
*fcc_allowed = *fcc_allowed - instance->fcc_remcast_current;
/*
* Add messages to retransmit to RTR list
* but only retry if there is room in the retransmit list
*/
range = orf_token->seq - instance->my_aru;
assert (range < QUEUE_RTR_ITEMS_SIZE_MAX);
for (i = 1; (orf_token->rtr_list_entries < RETRANSMIT_ENTRIES_MAX) &&
(i <= range); i++) {
/*
* Ensure message is within the sort queue range
*/
res = sq_in_range (sort_queue, instance->my_aru + i);
if (res == 0) {
break;
}
/*
* Find if a message is missing from this processor
*/
res = sq_item_inuse (sort_queue, instance->my_aru + i);
if (res == 0) {
/*
* Determine how many times we have missed receiving
* this sequence number. sq_item_miss_count increments
* a counter for the sequence number. The miss count
* will be returned and compared. This allows time for
* delayed multicast messages to be received before
* declaring the message is missing and requesting a
* retransmit.
*/
res = sq_item_miss_count (sort_queue, instance->my_aru + i);
if (res < instance->totem_config->miss_count_const) {
continue;
}
/*
* Determine if missing message is already in retransmit list
*/
found = 0;
for (j = 0; j < orf_token->rtr_list_entries; j++) {
if (instance->my_aru + i == rtr_list[j].seq) {
found = 1;
}
}
if (found == 0) {
/*
* Missing message not found in current retransmit list so add it
*/
memcpy (&rtr_list[orf_token->rtr_list_entries].ring_id,
&instance->my_ring_id, sizeof (struct memb_ring_id));
rtr_list[orf_token->rtr_list_entries].seq = instance->my_aru + i;
orf_token->rtr_list_entries++;
}
}
}
return (instance->fcc_remcast_current);
}
static void token_retransmit (struct totemsrp_instance *instance)
{
totemnet_token_send (instance->totemnet_context,
instance->orf_token_retransmit,
instance->orf_token_retransmit_size);
}
/*
* Retransmit the regular token if no mcast or token has
* been received in retransmit token period retransmit
* the token to the next processor
*/
static void timer_function_token_retransmit_timeout (void *data)
{
struct totemsrp_instance *instance = data;
switch (instance->memb_state) {
case MEMB_STATE_GATHER:
break;
case MEMB_STATE_COMMIT:
case MEMB_STATE_OPERATIONAL:
case MEMB_STATE_RECOVERY:
token_retransmit (instance);
reset_token_retransmit_timeout (instance); // REVIEWED
break;
}
}
static void timer_function_token_hold_retransmit_timeout (void *data)
{
struct totemsrp_instance *instance = data;
switch (instance->memb_state) {
case MEMB_STATE_GATHER:
break;
case MEMB_STATE_COMMIT:
break;
case MEMB_STATE_OPERATIONAL:
case MEMB_STATE_RECOVERY:
token_retransmit (instance);
break;
}
}
static void timer_function_merge_detect_timeout(void *data)
{
struct totemsrp_instance *instance = data;
instance->my_merge_detect_timeout_outstanding = 0;
switch (instance->memb_state) {
case MEMB_STATE_OPERATIONAL:
if (instance->my_ring_id.rep == instance->my_id.nodeid) {
memb_merge_detect_transmit (instance);
}
break;
case MEMB_STATE_GATHER:
case MEMB_STATE_COMMIT:
case MEMB_STATE_RECOVERY:
break;
}
}
/*
* Send orf_token to next member (requires orf_token)
*/
static int token_send (
struct totemsrp_instance *instance,
struct orf_token *orf_token,
int forward_token)
{
int res = 0;
unsigned int orf_token_size;
orf_token_size = sizeof (struct orf_token) +
(orf_token->rtr_list_entries * sizeof (struct rtr_item));
orf_token->header.nodeid = instance->my_id.nodeid;
memcpy (instance->orf_token_retransmit, orf_token, orf_token_size);
instance->orf_token_retransmit_size = orf_token_size;
assert (orf_token->header.nodeid);
if (forward_token == 0) {
return (0);
}
totemnet_token_send (instance->totemnet_context,
orf_token,
orf_token_size);
return (res);
}
static int token_hold_cancel_send (struct totemsrp_instance *instance)
{
struct token_hold_cancel token_hold_cancel;
/*
* Only cancel if the token is currently held
*/
if (instance->my_token_held == 0) {
return (0);
}
instance->my_token_held = 0;
/*
* Build message
*/
token_hold_cancel.header.magic = TOTEM_MH_MAGIC;
token_hold_cancel.header.version = TOTEM_MH_VERSION;
token_hold_cancel.header.type = MESSAGE_TYPE_TOKEN_HOLD_CANCEL;
token_hold_cancel.header.encapsulated = 0;
token_hold_cancel.header.nodeid = instance->my_id.nodeid;
memcpy (&token_hold_cancel.ring_id, &instance->my_ring_id,
sizeof (struct memb_ring_id));
assert (token_hold_cancel.header.nodeid);
instance->stats.token_hold_cancel_tx++;
totemnet_mcast_flush_send (instance->totemnet_context, &token_hold_cancel,
sizeof (struct token_hold_cancel));
return (0);
}
static int orf_token_send_initial (struct totemsrp_instance *instance)
{
struct orf_token orf_token;
int res;
orf_token.header.magic = TOTEM_MH_MAGIC;
orf_token.header.version = TOTEM_MH_VERSION;
orf_token.header.type = MESSAGE_TYPE_ORF_TOKEN;
orf_token.header.encapsulated = 0;
orf_token.header.nodeid = instance->my_id.nodeid;
assert (orf_token.header.nodeid);
orf_token.seq = SEQNO_START_MSG;
orf_token.token_seq = SEQNO_START_TOKEN;
orf_token.retrans_flg = 1;
instance->my_set_retrans_flg = 1;
instance->stats.orf_token_tx++;
if (cs_queue_is_empty (&instance->retrans_message_queue) == 1) {
orf_token.retrans_flg = 0;
instance->my_set_retrans_flg = 0;
} else {
orf_token.retrans_flg = 1;
instance->my_set_retrans_flg = 1;
}
orf_token.aru = 0;
orf_token.aru = SEQNO_START_MSG - 1;
orf_token.aru_addr = instance->my_id.nodeid;
memcpy (&orf_token.ring_id, &instance->my_ring_id, sizeof (struct memb_ring_id));
orf_token.fcc = 0;
orf_token.backlog = 0;
orf_token.rtr_list_entries = 0;
res = token_send (instance, &orf_token, 1);
return (res);
}
static void memb_state_commit_token_update (
struct totemsrp_instance *instance)
{
struct srp_addr *addr;
struct memb_commit_token_memb_entry *memb_list;
unsigned int high_aru;
unsigned int i;
addr = (struct srp_addr *)instance->commit_token->end_of_commit_token;
memb_list = (struct memb_commit_token_memb_entry *)(addr + instance->commit_token->addr_entries);
memcpy (instance->my_new_memb_list, addr,
sizeof (struct srp_addr) * instance->commit_token->addr_entries);
instance->my_new_memb_entries = instance->commit_token->addr_entries;
memcpy (&memb_list[instance->commit_token->memb_index].ring_id,
&instance->my_old_ring_id, sizeof (struct memb_ring_id));
memb_list[instance->commit_token->memb_index].aru = instance->old_ring_state_aru;
/*
* TODO high delivered is really instance->my_aru, but with safe this
* could change?
*/
instance->my_received_flg =
(instance->my_aru == instance->my_high_seq_received);
memb_list[instance->commit_token->memb_index].received_flg = instance->my_received_flg;
memb_list[instance->commit_token->memb_index].high_delivered = instance->my_high_delivered;
/*
* find high aru up to current memb_index for all matching ring ids
* if any ring id matching memb_index has aru less then high aru set
* received flag for that entry to false
*/
high_aru = memb_list[instance->commit_token->memb_index].aru;
for (i = 0; i <= instance->commit_token->memb_index; i++) {
if (memcmp (&memb_list[instance->commit_token->memb_index].ring_id,
&memb_list[i].ring_id,
sizeof (struct memb_ring_id)) == 0) {
if (sq_lt_compare (high_aru, memb_list[i].aru)) {
high_aru = memb_list[i].aru;
}
}
}
for (i = 0; i <= instance->commit_token->memb_index; i++) {
if (memcmp (&memb_list[instance->commit_token->memb_index].ring_id,
&memb_list[i].ring_id,
sizeof (struct memb_ring_id)) == 0) {
if (sq_lt_compare (memb_list[i].aru, high_aru)) {
memb_list[i].received_flg = 0;
if (i == instance->commit_token->memb_index) {
instance->my_received_flg = 0;
}
}
}
}
instance->commit_token->header.nodeid = instance->my_id.nodeid;
instance->commit_token->memb_index += 1;
assert (instance->commit_token->memb_index <= instance->commit_token->addr_entries);
assert (instance->commit_token->header.nodeid);
}
static void memb_state_commit_token_target_set (
struct totemsrp_instance *instance)
{
struct srp_addr *addr;
addr = (struct srp_addr *)instance->commit_token->end_of_commit_token;
/* Totemnet just looks at the node id */
totemnet_token_target_set (
instance->totemnet_context,
addr[instance->commit_token->memb_index %
instance->commit_token->addr_entries].nodeid);
}
static int memb_state_commit_token_send_recovery (
struct totemsrp_instance *instance,
struct memb_commit_token *commit_token)
{
unsigned int commit_token_size;
commit_token->token_seq++;
commit_token->header.nodeid = instance->my_id.nodeid;
commit_token_size = sizeof (struct memb_commit_token) +
((sizeof (struct srp_addr) +
sizeof (struct memb_commit_token_memb_entry)) * commit_token->addr_entries);
/*
* Make a copy for retransmission if necessary
*/
memcpy (instance->orf_token_retransmit, commit_token, commit_token_size);
instance->orf_token_retransmit_size = commit_token_size;
instance->stats.memb_commit_token_tx++;
totemnet_token_send (instance->totemnet_context,
commit_token,
commit_token_size);
/*
* Request retransmission of the commit token in case it is lost
*/
reset_token_retransmit_timeout (instance);
return (0);
}
static int memb_state_commit_token_send (
struct totemsrp_instance *instance)
{
unsigned int commit_token_size;
instance->commit_token->token_seq++;
instance->commit_token->header.nodeid = instance->my_id.nodeid;
commit_token_size = sizeof (struct memb_commit_token) +
((sizeof (struct srp_addr) +
sizeof (struct memb_commit_token_memb_entry)) * instance->commit_token->addr_entries);
/*
* Make a copy for retransmission if necessary
*/
memcpy (instance->orf_token_retransmit, instance->commit_token, commit_token_size);
instance->orf_token_retransmit_size = commit_token_size;
instance->stats.memb_commit_token_tx++;
totemnet_token_send (instance->totemnet_context,
instance->commit_token,
commit_token_size);
/*
* Request retransmission of the commit token in case it is lost
*/
reset_token_retransmit_timeout (instance);
return (0);
}
static int memb_lowest_in_config (struct totemsrp_instance *instance)
{
struct srp_addr token_memb[PROCESSOR_COUNT_MAX];
int token_memb_entries = 0;
int i;
unsigned int lowest_nodeid;
memb_set_subtract (token_memb, &token_memb_entries,
instance->my_proc_list, instance->my_proc_list_entries,
instance->my_failed_list, instance->my_failed_list_entries);
/*
* find representative by searching for smallest identifier
*/
assert(token_memb_entries > 0);
lowest_nodeid = token_memb[0].nodeid;
for (i = 1; i < token_memb_entries; i++) {
if (lowest_nodeid > token_memb[i].nodeid) {
lowest_nodeid = token_memb[i].nodeid;
}
}
return (lowest_nodeid == instance->my_id.nodeid);
}
static int srp_addr_compare (const void *a, const void *b)
{
const struct srp_addr *srp_a = (const struct srp_addr *)a;
const struct srp_addr *srp_b = (const struct srp_addr *)b;
if (srp_a->nodeid < srp_b->nodeid) {
return -1;
} else if (srp_a->nodeid > srp_b->nodeid) {
return 1;
} else {
return 0;
}
}
static void memb_state_commit_token_create (
struct totemsrp_instance *instance)
{
struct srp_addr token_memb[PROCESSOR_COUNT_MAX];
struct srp_addr *addr;
struct memb_commit_token_memb_entry *memb_list;
int token_memb_entries = 0;
log_printf (instance->totemsrp_log_level_debug,
"Creating commit token because I am the rep.");
memb_set_subtract (token_memb, &token_memb_entries,
instance->my_proc_list, instance->my_proc_list_entries,
instance->my_failed_list, instance->my_failed_list_entries);
memset (instance->commit_token, 0, sizeof (struct memb_commit_token));
instance->commit_token->header.magic = TOTEM_MH_MAGIC;
instance->commit_token->header.version = TOTEM_MH_VERSION;
instance->commit_token->header.type = MESSAGE_TYPE_MEMB_COMMIT_TOKEN;
instance->commit_token->header.encapsulated = 0;
instance->commit_token->header.nodeid = instance->my_id.nodeid;
assert (instance->commit_token->header.nodeid);
instance->commit_token->ring_id.rep = instance->my_id.nodeid;
instance->commit_token->ring_id.seq = instance->token_ring_id_seq + 4;
/*
* This qsort is necessary to ensure the commit token traverses
* the ring in the proper order
*/
qsort (token_memb, token_memb_entries, sizeof (struct srp_addr),
srp_addr_compare);
instance->commit_token->memb_index = 0;
instance->commit_token->addr_entries = token_memb_entries;
addr = (struct srp_addr *)instance->commit_token->end_of_commit_token;
memb_list = (struct memb_commit_token_memb_entry *)(addr + instance->commit_token->addr_entries);
memcpy (addr, token_memb,
token_memb_entries * sizeof (struct srp_addr));
memset (memb_list, 0,
sizeof (struct memb_commit_token_memb_entry) * token_memb_entries);
}
static void memb_join_message_send (struct totemsrp_instance *instance)
{
char memb_join_data[40000];
struct memb_join *memb_join = (struct memb_join *)memb_join_data;
char *addr;
unsigned int addr_idx;
size_t msg_len;
memb_join->header.magic = TOTEM_MH_MAGIC;
memb_join->header.version = TOTEM_MH_VERSION;
memb_join->header.type = MESSAGE_TYPE_MEMB_JOIN;
memb_join->header.encapsulated = 0;
memb_join->header.nodeid = instance->my_id.nodeid;
assert (memb_join->header.nodeid);
msg_len = sizeof(struct memb_join) +
((instance->my_proc_list_entries + instance->my_failed_list_entries) * sizeof(struct srp_addr));
if (msg_len > sizeof(memb_join_data)) {
log_printf (instance->totemsrp_log_level_error,
"memb_join_message too long. Ignoring message.");
return ;
}
memb_join->ring_seq = instance->my_ring_id.seq;
memb_join->proc_list_entries = instance->my_proc_list_entries;
memb_join->failed_list_entries = instance->my_failed_list_entries;
memb_join->system_from = instance->my_id;
/*
* This mess adds the joined and failed processor lists into the join
* message
*/
addr = (char *)memb_join;
addr_idx = sizeof (struct memb_join);
memcpy (&addr[addr_idx],
instance->my_proc_list,
instance->my_proc_list_entries *
sizeof (struct srp_addr));
addr_idx +=
instance->my_proc_list_entries *
sizeof (struct srp_addr);
memcpy (&addr[addr_idx],
instance->my_failed_list,
instance->my_failed_list_entries *
sizeof (struct srp_addr));
addr_idx +=
instance->my_failed_list_entries *
sizeof (struct srp_addr);
if (instance->totem_config->send_join_timeout) {
usleep (random() % (instance->totem_config->send_join_timeout * 1000));
}
instance->stats.memb_join_tx++;
totemnet_mcast_flush_send (
instance->totemnet_context,
memb_join,
addr_idx);
}
static void memb_leave_message_send (struct totemsrp_instance *instance)
{
char memb_join_data[40000];
struct memb_join *memb_join = (struct memb_join *)memb_join_data;
char *addr;
unsigned int addr_idx;
int active_memb_entries;
struct srp_addr active_memb[PROCESSOR_COUNT_MAX];
size_t msg_len;
log_printf (instance->totemsrp_log_level_debug,
"sending join/leave message");
/*
* add us to the failed list, and remove us from
* the members list
*/
memb_set_merge(
&instance->my_id, 1,
instance->my_failed_list, &instance->my_failed_list_entries);
memb_set_subtract (active_memb, &active_memb_entries,
instance->my_proc_list, instance->my_proc_list_entries,
&instance->my_id, 1);
msg_len = sizeof(struct memb_join) +
((active_memb_entries + instance->my_failed_list_entries) * sizeof(struct srp_addr));
if (msg_len > sizeof(memb_join_data)) {
log_printf (instance->totemsrp_log_level_error,
"memb_leave message too long. Ignoring message.");
return ;
}
memb_join->header.magic = TOTEM_MH_MAGIC;
memb_join->header.version = TOTEM_MH_VERSION;
memb_join->header.type = MESSAGE_TYPE_MEMB_JOIN;
memb_join->header.encapsulated = 0;
memb_join->header.nodeid = LEAVE_DUMMY_NODEID;
memb_join->ring_seq = instance->my_ring_id.seq;
memb_join->proc_list_entries = active_memb_entries;
memb_join->failed_list_entries = instance->my_failed_list_entries;
memb_join->system_from = instance->my_id;
// TODO: CC Maybe use the actual join send routine.
/*
* This mess adds the joined and failed processor lists into the join
* message
*/
addr = (char *)memb_join;
addr_idx = sizeof (struct memb_join);
memcpy (&addr[addr_idx],
active_memb,
active_memb_entries *
sizeof (struct srp_addr));
addr_idx +=
active_memb_entries *
sizeof (struct srp_addr);
memcpy (&addr[addr_idx],
instance->my_failed_list,
instance->my_failed_list_entries *
sizeof (struct srp_addr));
addr_idx +=
instance->my_failed_list_entries *
sizeof (struct srp_addr);
if (instance->totem_config->send_join_timeout) {
usleep (random() % (instance->totem_config->send_join_timeout * 1000));
}
instance->stats.memb_join_tx++;
totemnet_mcast_flush_send (
instance->totemnet_context,
memb_join,
addr_idx);
}
static void memb_merge_detect_transmit (struct totemsrp_instance *instance)
{
struct memb_merge_detect memb_merge_detect;
memb_merge_detect.header.magic = TOTEM_MH_MAGIC;
memb_merge_detect.header.version = TOTEM_MH_VERSION;
memb_merge_detect.header.type = MESSAGE_TYPE_MEMB_MERGE_DETECT;
memb_merge_detect.header.encapsulated = 0;
memb_merge_detect.header.nodeid = instance->my_id.nodeid;
memb_merge_detect.system_from = instance->my_id;
memcpy (&memb_merge_detect.ring_id, &instance->my_ring_id,
sizeof (struct memb_ring_id));
assert (memb_merge_detect.header.nodeid);
instance->stats.memb_merge_detect_tx++;
totemnet_mcast_flush_send (instance->totemnet_context,
&memb_merge_detect,
sizeof (struct memb_merge_detect));
}
static void memb_ring_id_set (
struct totemsrp_instance *instance,
const struct memb_ring_id *ring_id)
{
memcpy (&instance->my_ring_id, ring_id, sizeof (struct memb_ring_id));
}
int totemsrp_callback_token_create (
void *srp_context,
void **handle_out,
enum totem_callback_token_type type,
int delete,
int (*callback_fn) (enum totem_callback_token_type type, const void *),
const void *data)
{
struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context;
struct token_callback_instance *callback_handle;
token_hold_cancel_send (instance);
callback_handle = malloc (sizeof (struct token_callback_instance));
if (callback_handle == 0) {
return (-1);
}
*handle_out = (void *)callback_handle;
qb_list_init (&callback_handle->list);
callback_handle->callback_fn = callback_fn;
callback_handle->data = (void *) data;
callback_handle->callback_type = type;
callback_handle->delete = delete;
switch (type) {
case TOTEM_CALLBACK_TOKEN_RECEIVED:
qb_list_add (&callback_handle->list, &instance->token_callback_received_listhead);
break;
case TOTEM_CALLBACK_TOKEN_SENT:
qb_list_add (&callback_handle->list, &instance->token_callback_sent_listhead);
break;
}
return (0);
}
void totemsrp_callback_token_destroy (void *srp_context, void **handle_out)
{
struct token_callback_instance *h;
if (*handle_out) {
h = (struct token_callback_instance *)*handle_out;
qb_list_del (&h->list);
free (h);
h = NULL;
*handle_out = 0;
}
}
static void token_callbacks_execute (
struct totemsrp_instance *instance,
enum totem_callback_token_type type)
{
struct qb_list_head *list, *tmp_iter;
struct qb_list_head *callback_listhead = 0;
struct token_callback_instance *token_callback_instance;
int res;
int del;
switch (type) {
case TOTEM_CALLBACK_TOKEN_RECEIVED:
callback_listhead = &instance->token_callback_received_listhead;
break;
case TOTEM_CALLBACK_TOKEN_SENT:
callback_listhead = &instance->token_callback_sent_listhead;
break;
default:
assert (0);
}
qb_list_for_each_safe(list, tmp_iter, callback_listhead) {
token_callback_instance = qb_list_entry (list, struct token_callback_instance, list);
del = token_callback_instance->delete;
if (del == 1) {
qb_list_del (list);
}
res = token_callback_instance->callback_fn (
token_callback_instance->callback_type,
token_callback_instance->data);
/*
* This callback failed to execute, try it again on the next token
*/
if (res == -1 && del == 1) {
qb_list_add (list, callback_listhead);
} else if (del) {
free (token_callback_instance);
}
}
}
/*
* Flow control functions
*/
static unsigned int backlog_get (struct totemsrp_instance *instance)
{
unsigned int backlog = 0;
struct cs_queue *queue_use = NULL;
if (instance->memb_state == MEMB_STATE_OPERATIONAL) {
if (instance->waiting_trans_ack) {
queue_use = &instance->new_message_queue_trans;
} else {
queue_use = &instance->new_message_queue;
}
} else
if (instance->memb_state == MEMB_STATE_RECOVERY) {
queue_use = &instance->retrans_message_queue;
}
if (queue_use != NULL) {
backlog = cs_queue_used (queue_use);
}
instance->stats.token[instance->stats.latest_token].backlog_calc = backlog;
return (backlog);
}
static int fcc_calculate (
struct totemsrp_instance *instance,
struct orf_token *token)
{
unsigned int transmits_allowed;
unsigned int backlog_calc;
transmits_allowed = instance->totem_config->max_messages;
if (transmits_allowed > instance->totem_config->window_size - token->fcc) {
transmits_allowed = instance->totem_config->window_size - token->fcc;
}
instance->my_cbl = backlog_get (instance);
/*
* Only do backlog calculation if there is a backlog otherwise
* we would result in div by zero
*/
if (token->backlog + instance->my_cbl - instance->my_pbl) {
backlog_calc = (instance->totem_config->window_size * instance->my_pbl) /
(token->backlog + instance->my_cbl - instance->my_pbl);
if (backlog_calc > 0 && transmits_allowed > backlog_calc) {
transmits_allowed = backlog_calc;
}
}
return (transmits_allowed);
}
/*
* don't overflow the RTR sort queue
*/
static void fcc_rtr_limit (
struct totemsrp_instance *instance,
struct orf_token *token,
unsigned int *transmits_allowed)
{
int check = QUEUE_RTR_ITEMS_SIZE_MAX;
check -= (*transmits_allowed + instance->totem_config->window_size);
assert (check >= 0);
if (sq_lt_compare (instance->last_released +
QUEUE_RTR_ITEMS_SIZE_MAX - *transmits_allowed -
instance->totem_config->window_size,
token->seq)) {
*transmits_allowed = 0;
}
}
static void fcc_token_update (
struct totemsrp_instance *instance,
struct orf_token *token,
unsigned int msgs_transmitted)
{
token->fcc += msgs_transmitted - instance->my_trc;
token->backlog += instance->my_cbl - instance->my_pbl;
instance->my_trc = msgs_transmitted;
instance->my_pbl = instance->my_cbl;
}
/*
* Sanity checkers
*/
static int check_orf_token_sanity(
const struct totemsrp_instance *instance,
const void *msg,
size_t msg_len,
int endian_conversion_needed)
{
int rtr_entries;
const struct orf_token *token = (const struct orf_token *)msg;
size_t required_len;
if (msg_len < sizeof(struct orf_token)) {
log_printf (instance->totemsrp_log_level_security,
"Received orf_token message is too short... ignoring.");
return (-1);
}
if (endian_conversion_needed) {
rtr_entries = swab32(token->rtr_list_entries);
} else {
rtr_entries = token->rtr_list_entries;
}
required_len = sizeof(struct orf_token) + rtr_entries * sizeof(struct rtr_item);
if (msg_len < required_len) {
log_printf (instance->totemsrp_log_level_security,
"Received orf_token message is too short... ignoring.");
return (-1);
}
return (0);
}
static int check_mcast_sanity(
struct totemsrp_instance *instance,
const void *msg,
size_t msg_len,
int endian_conversion_needed)
{
if (msg_len < sizeof(struct mcast)) {
log_printf (instance->totemsrp_log_level_security,
"Received mcast message is too short... ignoring.");
return (-1);
}
return (0);
}
static int check_memb_merge_detect_sanity(
struct totemsrp_instance *instance,
const void *msg,
size_t msg_len,
int endian_conversion_needed)
{
if (msg_len < sizeof(struct memb_merge_detect)) {
log_printf (instance->totemsrp_log_level_security,
"Received memb_merge_detect message is too short... ignoring.");
return (-1);
}
return (0);
}
static int check_memb_join_sanity(
struct totemsrp_instance *instance,
const void *msg,
size_t msg_len,
int endian_conversion_needed)
{
const struct memb_join *mj_msg = (const struct memb_join *)msg;
unsigned int proc_list_entries;
unsigned int failed_list_entries;
size_t required_len;
if (msg_len < sizeof(struct memb_join)) {
log_printf (instance->totemsrp_log_level_security,
"Received memb_join message is too short... ignoring.");
return (-1);
}
proc_list_entries = mj_msg->proc_list_entries;
failed_list_entries = mj_msg->failed_list_entries;
if (endian_conversion_needed) {
proc_list_entries = swab32(proc_list_entries);
failed_list_entries = swab32(failed_list_entries);
}
required_len = sizeof(struct memb_join) + ((proc_list_entries + failed_list_entries) * sizeof(struct srp_addr));
if (msg_len < required_len) {
log_printf (instance->totemsrp_log_level_security,
"Received memb_join message is too short... ignoring.");
return (-1);
}
return (0);
}
static int check_memb_commit_token_sanity(
struct totemsrp_instance *instance,
const void *msg,
size_t msg_len,
int endian_conversion_needed)
{
const struct memb_commit_token *mct_msg = (const struct memb_commit_token *)msg;
unsigned int addr_entries;
size_t required_len;
if (msg_len < sizeof(struct memb_commit_token)) {
log_printf (instance->totemsrp_log_level_security,
"Received memb_commit_token message is too short... ignoring.");
return (0);
}
addr_entries= mct_msg->addr_entries;
if (endian_conversion_needed) {
addr_entries = swab32(addr_entries);
}
required_len = sizeof(struct memb_commit_token) +
(addr_entries * (sizeof(struct srp_addr) + sizeof(struct memb_commit_token_memb_entry)));
if (msg_len < required_len) {
log_printf (instance->totemsrp_log_level_security,
"Received memb_commit_token message is too short... ignoring.");
return (-1);
}
return (0);
}
static int check_token_hold_cancel_sanity(
struct totemsrp_instance *instance,
const void *msg,
size_t msg_len,
int endian_conversion_needed)
{
if (msg_len < sizeof(struct token_hold_cancel)) {
log_printf (instance->totemsrp_log_level_security,
"Received token_hold_cancel message is too short... ignoring.");
return (-1);
}
return (0);
}
/*
* Message Handlers
*/
unsigned long long int tv_old;
/*
* message handler called when TOKEN message type received
*/
static int message_handler_orf_token (
struct totemsrp_instance *instance,
const void *msg,
size_t msg_len,
int endian_conversion_needed)
{
char token_storage[1500];
char token_convert[1500];
struct orf_token *token = NULL;
int forward_token;
unsigned int transmits_allowed;
unsigned int mcasted_retransmit;
unsigned int mcasted_regular;
unsigned int last_aru;
#ifdef GIVEINFO
unsigned long long tv_current;
unsigned long long tv_diff;
tv_current = qb_util_nano_current_get ();
tv_diff = tv_current - tv_old;
tv_old = tv_current;
log_printf (instance->totemsrp_log_level_debug,
"Time since last token %0.4f ms", ((float)tv_diff) / 1000000.0);
#endif
if (check_orf_token_sanity(instance, msg, msg_len, endian_conversion_needed) == -1) {
return (0);
}
if (instance->orf_token_discard) {
return (0);
}
#ifdef TEST_DROP_ORF_TOKEN_PERCENTAGE
if (random()%100 < TEST_DROP_ORF_TOKEN_PERCENTAGE) {
return (0);
}
#endif
if (endian_conversion_needed) {
orf_token_endian_convert ((struct orf_token *)msg,
(struct orf_token *)token_convert);
msg = (struct orf_token *)token_convert;
}
/*
* Make copy of token and retransmit list in case we have
* to flush incoming messages from the kernel queue
*/
token = (struct orf_token *)token_storage;
memcpy (token, msg, sizeof (struct orf_token));
memcpy (&token->rtr_list[0], (char *)msg + sizeof (struct orf_token),
sizeof (struct rtr_item) * RETRANSMIT_ENTRIES_MAX);
/*
* Handle merge detection timeout
*/
if (token->seq == instance->my_last_seq) {
start_merge_detect_timeout (instance);
instance->my_seq_unchanged += 1;
} else {
cancel_merge_detect_timeout (instance);
cancel_token_hold_retransmit_timeout (instance);
instance->my_seq_unchanged = 0;
}
instance->my_last_seq = token->seq;
#ifdef TEST_RECOVERY_MSG_COUNT
if (instance->memb_state == MEMB_STATE_OPERATIONAL && token->seq > TEST_RECOVERY_MSG_COUNT) {
return (0);
}
#endif
instance->flushing = 1;
totemnet_recv_flush (instance->totemnet_context);
instance->flushing = 0;
/*
* Determine if we should hold (in reality drop) the token
*/
instance->my_token_held = 0;
if (instance->my_ring_id.rep == instance->my_id.nodeid &&
instance->my_seq_unchanged > instance->totem_config->seqno_unchanged_const) {
instance->my_token_held = 1;
} else {
if (instance->my_ring_id.rep != instance->my_id.nodeid &&
instance->my_seq_unchanged >= instance->totem_config->seqno_unchanged_const) {
instance->my_token_held = 1;
}
}
/*
* Hold onto token when there is no activity on ring and
* this processor is the ring rep
*/
forward_token = 1;
if (instance->my_ring_id.rep == instance->my_id.nodeid) {
if (instance->my_token_held) {
forward_token = 0;
}
}
switch (instance->memb_state) {
case MEMB_STATE_COMMIT:
/* Discard token */
break;
case MEMB_STATE_OPERATIONAL:
messages_free (instance, token->aru);
/*
* Do NOT add break, this case should also execute code in gather case.
*/
case MEMB_STATE_GATHER:
/*
* DO NOT add break, we use different free mechanism in recovery state
*/
case MEMB_STATE_RECOVERY:
/*
* Discard tokens from another configuration
*/
if (memcmp (&token->ring_id, &instance->my_ring_id,
sizeof (struct memb_ring_id)) != 0) {
if ((forward_token)
&& instance->use_heartbeat) {
reset_heartbeat_timeout(instance);
}
else {
cancel_heartbeat_timeout(instance);
}
return (0); /* discard token */
}
/*
* Discard retransmitted tokens
*/
if (sq_lte_compare (token->token_seq, instance->my_token_seq)) {
return (0); /* discard token */
}
/*
* Token is valid so trigger callbacks
*/
token_callbacks_execute (instance, TOTEM_CALLBACK_TOKEN_RECEIVED);
last_aru = instance->my_last_aru;
instance->my_last_aru = token->aru;
transmits_allowed = fcc_calculate (instance, token);
mcasted_retransmit = orf_token_rtr (instance, token, &transmits_allowed);
if (instance->my_token_held == 1 &&
(token->rtr_list_entries > 0 || mcasted_retransmit > 0)) {
instance->my_token_held = 0;
forward_token = 1;
}
fcc_rtr_limit (instance, token, &transmits_allowed);
mcasted_regular = orf_token_mcast (instance, token, transmits_allowed);
/*
if (mcasted_regular) {
printf ("mcasted regular %d\n", mcasted_regular);
printf ("token seq %d\n", token->seq);
}
*/
fcc_token_update (instance, token, mcasted_retransmit +
mcasted_regular);
if (sq_lt_compare (instance->my_aru, token->aru) ||
instance->my_id.nodeid == token->aru_addr ||
token->aru_addr == 0) {
token->aru = instance->my_aru;
if (token->aru == token->seq) {
token->aru_addr = 0;
} else {
token->aru_addr = instance->my_id.nodeid;
}
}
if (token->aru == last_aru && token->aru_addr != 0) {
instance->my_aru_count += 1;
} else {
instance->my_aru_count = 0;
}
/*
* We really don't follow specification there. In specification, OTHER nodes
* detect failure of one node (based on aru_count) and my_id IS NEVER added
* to failed list (so node never mark itself as failed)
*/
if (instance->my_aru_count > instance->totem_config->fail_to_recv_const &&
token->aru_addr == instance->my_id.nodeid) {
log_printf (instance->totemsrp_log_level_error,
"FAILED TO RECEIVE");
instance->failed_to_recv = 1;
memb_set_merge (&instance->my_id, 1,
instance->my_failed_list,
&instance->my_failed_list_entries);
memb_state_gather_enter (instance, TOTEMSRP_GSFROM_FAILED_TO_RECEIVE);
} else {
instance->my_token_seq = token->token_seq;
token->token_seq += 1;
if (instance->memb_state == MEMB_STATE_RECOVERY) {
/*
* instance->my_aru == instance->my_high_seq_received means this processor
* has recovered all messages it can recover
* (ie: its retrans queue is empty)
*/
if (cs_queue_is_empty (&instance->retrans_message_queue) == 0) {
if (token->retrans_flg == 0) {
token->retrans_flg = 1;
instance->my_set_retrans_flg = 1;
}
} else
if (token->retrans_flg == 1 && instance->my_set_retrans_flg) {
token->retrans_flg = 0;
instance->my_set_retrans_flg = 0;
}
log_printf (instance->totemsrp_log_level_debug,
"token retrans flag is %d my set retrans flag%d retrans queue empty %d count %d, aru %x",
token->retrans_flg, instance->my_set_retrans_flg,
cs_queue_is_empty (&instance->retrans_message_queue),
instance->my_retrans_flg_count, token->aru);
if (token->retrans_flg == 0) {
instance->my_retrans_flg_count += 1;
} else {
instance->my_retrans_flg_count = 0;
}
if (instance->my_retrans_flg_count == 2) {
instance->my_install_seq = token->seq;
}
log_printf (instance->totemsrp_log_level_debug,
"install seq %x aru %x high seq received %x",
instance->my_install_seq, instance->my_aru, instance->my_high_seq_received);
if (instance->my_retrans_flg_count >= 2 &&
instance->my_received_flg == 0 &&
sq_lte_compare (instance->my_install_seq, instance->my_aru)) {
instance->my_received_flg = 1;
instance->my_deliver_memb_entries = instance->my_trans_memb_entries;
memcpy (instance->my_deliver_memb_list, instance->my_trans_memb_list,
sizeof (struct totem_ip_address) * instance->my_trans_memb_entries);
}
if (instance->my_retrans_flg_count >= 3 &&
sq_lte_compare (instance->my_install_seq, token->aru)) {
instance->my_rotation_counter += 1;
} else {
instance->my_rotation_counter = 0;
}
if (instance->my_rotation_counter == 2) {
log_printf (instance->totemsrp_log_level_debug,
"retrans flag count %x token aru %x install seq %x aru %x %x",
instance->my_retrans_flg_count, token->aru, instance->my_install_seq,
instance->my_aru, token->seq);
memb_state_operational_enter (instance);
instance->my_rotation_counter = 0;
instance->my_retrans_flg_count = 0;
}
}
totemnet_send_flush (instance->totemnet_context);
token_send (instance, token, forward_token);
#ifdef GIVEINFO
tv_current = qb_util_nano_current_get ();
tv_diff = tv_current - tv_old;
tv_old = tv_current;
log_printf (instance->totemsrp_log_level_debug,
"I held %0.4f ms",
((float)tv_diff) / 1000000.0);
#endif
if (instance->memb_state == MEMB_STATE_OPERATIONAL) {
messages_deliver_to_app (instance, 0,
instance->my_high_seq_received);
}
/*
* Deliver messages after token has been transmitted
* to improve performance
*/
reset_token_timeout (instance); // REVIEWED
reset_token_retransmit_timeout (instance); // REVIEWED
if (instance->my_id.nodeid == instance->my_ring_id.rep &&
instance->my_token_held == 1) {
start_token_hold_retransmit_timeout (instance);
}
token_callbacks_execute (instance, TOTEM_CALLBACK_TOKEN_SENT);
}
break;
}
if ((forward_token)
&& instance->use_heartbeat) {
reset_heartbeat_timeout(instance);
}
else {
cancel_heartbeat_timeout(instance);
}
return (0);
}
static void messages_deliver_to_app (
struct totemsrp_instance *instance,
int skip,
unsigned int end_point)
{
struct sort_queue_item *sort_queue_item_p;
unsigned int i;
int res;
struct mcast *mcast_in;
struct mcast mcast_header;
unsigned int range = 0;
int endian_conversion_required;
unsigned int my_high_delivered_stored = 0;
struct srp_addr aligned_system_from;
range = end_point - instance->my_high_delivered;
if (range) {
log_printf (instance->totemsrp_log_level_trace,
"Delivering %x to %x", instance->my_high_delivered,
end_point);
}
assert (range < QUEUE_RTR_ITEMS_SIZE_MAX);
my_high_delivered_stored = instance->my_high_delivered;
/*
* Deliver messages in order from rtr queue to pending delivery queue
*/
for (i = 1; i <= range; i++) {
void *ptr = 0;
/*
* If out of range of sort queue, stop assembly
*/
res = sq_in_range (&instance->regular_sort_queue,
my_high_delivered_stored + i);
if (res == 0) {
break;
}
res = sq_item_get (&instance->regular_sort_queue,
my_high_delivered_stored + i, &ptr);
/*
* If hole, stop assembly
*/
if (res != 0 && skip == 0) {
break;
}
instance->my_high_delivered = my_high_delivered_stored + i;
if (res != 0) {
continue;
}
sort_queue_item_p = ptr;
mcast_in = sort_queue_item_p->mcast;
assert (mcast_in != (struct mcast *)0xdeadbeef);
endian_conversion_required = 0;
if (mcast_in->header.magic != TOTEM_MH_MAGIC) {
endian_conversion_required = 1;
mcast_endian_convert (mcast_in, &mcast_header);
} else {
memcpy (&mcast_header, mcast_in, sizeof (struct mcast));
}
aligned_system_from = mcast_header.system_from;
/*
* Skip messages not originated in instance->my_deliver_memb
*/
if (skip &&
memb_set_subset (&aligned_system_from,
1,
instance->my_deliver_memb_list,
instance->my_deliver_memb_entries) == 0) {
instance->my_high_delivered = my_high_delivered_stored + i;
continue;
}
/*
* Message found
*/
log_printf (instance->totemsrp_log_level_trace,
"Delivering MCAST message with seq %x to pending delivery queue",
mcast_header.seq);
/*
* Message is locally originated multicast
*/
instance->totemsrp_deliver_fn (
mcast_header.header.nodeid,
((char *)sort_queue_item_p->mcast) + sizeof (struct mcast),
sort_queue_item_p->msg_len - sizeof (struct mcast),
endian_conversion_required);
}
}
/*
* recv message handler called when MCAST message type received
*/
static int message_handler_mcast (
struct totemsrp_instance *instance,
const void *msg,
size_t msg_len,
int endian_conversion_needed)
{
struct sort_queue_item sort_queue_item;
struct sq *sort_queue;
struct mcast mcast_header;
struct srp_addr aligned_system_from;
if (check_mcast_sanity(instance, msg, msg_len, endian_conversion_needed) == -1) {
return (0);
}
if (endian_conversion_needed) {
mcast_endian_convert (msg, &mcast_header);
} else {
memcpy (&mcast_header, msg, sizeof (struct mcast));
}
if (mcast_header.header.encapsulated == MESSAGE_ENCAPSULATED) {
sort_queue = &instance->recovery_sort_queue;
} else {
sort_queue = &instance->regular_sort_queue;
}
assert (msg_len <= FRAME_SIZE_MAX);
#ifdef TEST_DROP_MCAST_PERCENTAGE
if (random()%100 < TEST_DROP_MCAST_PERCENTAGE) {
return (0);
}
#endif
/*
* If the message is foreign execute the switch below
*/
if (memcmp (&instance->my_ring_id, &mcast_header.ring_id,
sizeof (struct memb_ring_id)) != 0) {
aligned_system_from = mcast_header.system_from;
switch (instance->memb_state) {
case MEMB_STATE_OPERATIONAL:
memb_set_merge (
&aligned_system_from, 1,
instance->my_proc_list, &instance->my_proc_list_entries);
memb_state_gather_enter (instance, TOTEMSRP_GSFROM_FOREIGN_MESSAGE_IN_OPERATIONAL_STATE);
break;
case MEMB_STATE_GATHER:
if (!memb_set_subset (
&aligned_system_from,
1,
instance->my_proc_list,
instance->my_proc_list_entries)) {
memb_set_merge (&aligned_system_from, 1,
instance->my_proc_list, &instance->my_proc_list_entries);
memb_state_gather_enter (instance, TOTEMSRP_GSFROM_FOREIGN_MESSAGE_IN_GATHER_STATE);
return (0);
}
break;
case MEMB_STATE_COMMIT:
/* discard message */
instance->stats.rx_msg_dropped++;
break;
case MEMB_STATE_RECOVERY:
/* discard message */
instance->stats.rx_msg_dropped++;
break;
}
return (0);
}
log_printf (instance->totemsrp_log_level_trace,
"Received ringid (" CS_PRI_RING_ID ") seq %x",
mcast_header.ring_id.rep,
(uint64_t)mcast_header.ring_id.seq,
mcast_header.seq);
/*
* Add mcast message to rtr queue if not already in rtr queue
* otherwise free io vectors
*/
if (msg_len > 0 && msg_len <= FRAME_SIZE_MAX &&
sq_in_range (sort_queue, mcast_header.seq) &&
sq_item_inuse (sort_queue, mcast_header.seq) == 0) {
/*
* Allocate new multicast memory block
*/
// TODO LEAK
sort_queue_item.mcast = totemsrp_buffer_alloc (instance);
if (sort_queue_item.mcast == NULL) {
return (-1); /* error here is corrected by the algorithm */
}
memcpy (sort_queue_item.mcast, msg, msg_len);
sort_queue_item.msg_len = msg_len;
if (sq_lt_compare (instance->my_high_seq_received,
mcast_header.seq)) {
instance->my_high_seq_received = mcast_header.seq;
}
sq_item_add (sort_queue, &sort_queue_item, mcast_header.seq);
}
update_aru (instance);
if (instance->memb_state == MEMB_STATE_OPERATIONAL) {
messages_deliver_to_app (instance, 0, instance->my_high_seq_received);
}
/* TODO remove from retrans message queue for old ring in recovery state */
return (0);
}
static int message_handler_memb_merge_detect (
struct totemsrp_instance *instance,
const void *msg,
size_t msg_len,
int endian_conversion_needed)
{
struct memb_merge_detect memb_merge_detect;
struct srp_addr aligned_system_from;
if (check_memb_merge_detect_sanity(instance, msg, msg_len, endian_conversion_needed) == -1) {
return (0);
}
if (endian_conversion_needed) {
memb_merge_detect_endian_convert (msg, &memb_merge_detect);
} else {
memcpy (&memb_merge_detect, msg,
sizeof (struct memb_merge_detect));
}
/*
* do nothing if this is a merge detect from this configuration
*/
if (memcmp (&instance->my_ring_id, &memb_merge_detect.ring_id,
sizeof (struct memb_ring_id)) == 0) {
return (0);
}
aligned_system_from = memb_merge_detect.system_from;
/*
* Execute merge operation
*/
switch (instance->memb_state) {
case MEMB_STATE_OPERATIONAL:
memb_set_merge (&aligned_system_from, 1,
instance->my_proc_list, &instance->my_proc_list_entries);
memb_state_gather_enter (instance, TOTEMSRP_GSFROM_MERGE_DURING_OPERATIONAL_STATE);
break;
case MEMB_STATE_GATHER:
if (!memb_set_subset (
&aligned_system_from,
1,
instance->my_proc_list,
instance->my_proc_list_entries)) {
memb_set_merge (&aligned_system_from, 1,
instance->my_proc_list, &instance->my_proc_list_entries);
memb_state_gather_enter (instance, TOTEMSRP_GSFROM_MERGE_DURING_GATHER_STATE);
return (0);
}
break;
case MEMB_STATE_COMMIT:
/* do nothing in commit */
break;
case MEMB_STATE_RECOVERY:
/* do nothing in recovery */
break;
}
return (0);
}
static void memb_join_process (
struct totemsrp_instance *instance,
const struct memb_join *memb_join)
{
struct srp_addr *proc_list;
struct srp_addr *failed_list;
int gather_entered = 0;
int fail_minus_memb_entries = 0;
struct srp_addr fail_minus_memb[PROCESSOR_COUNT_MAX];
struct srp_addr aligned_system_from;
proc_list = (struct srp_addr *)memb_join->end_of_memb_join;
failed_list = proc_list + memb_join->proc_list_entries;
aligned_system_from = memb_join->system_from;
log_printf(instance->totemsrp_log_level_trace, "memb_join_process");
memb_set_log(instance, instance->totemsrp_log_level_trace,
"proclist", proc_list, memb_join->proc_list_entries);
memb_set_log(instance, instance->totemsrp_log_level_trace,
"faillist", failed_list, memb_join->failed_list_entries);
memb_set_log(instance, instance->totemsrp_log_level_trace,
"my_proclist", instance->my_proc_list, instance->my_proc_list_entries);
memb_set_log(instance, instance->totemsrp_log_level_trace,
"my_faillist", instance->my_failed_list, instance->my_failed_list_entries);
if (memb_join->header.type == MESSAGE_TYPE_MEMB_JOIN) {
if (instance->flushing) {
if (memb_join->header.nodeid == LEAVE_DUMMY_NODEID) {
log_printf (instance->totemsrp_log_level_warning,
"Discarding LEAVE message during flush, nodeid=" CS_PRI_NODE_ID,
memb_join->failed_list_entries > 0 ? failed_list[memb_join->failed_list_entries - 1 ].nodeid : LEAVE_DUMMY_NODEID);
if (memb_join->failed_list_entries > 0) {
my_leave_memb_set(instance, failed_list[memb_join->failed_list_entries - 1 ].nodeid);
}
} else {
log_printf (instance->totemsrp_log_level_warning,
"Discarding JOIN message during flush, nodeid=" CS_PRI_NODE_ID, memb_join->header.nodeid);
}
return;
} else {
if (memb_join->header.nodeid == LEAVE_DUMMY_NODEID) {
log_printf (instance->totemsrp_log_level_debug,
"Received LEAVE message from " CS_PRI_NODE_ID, memb_join->failed_list_entries > 0 ? failed_list[memb_join->failed_list_entries - 1 ].nodeid : LEAVE_DUMMY_NODEID);
if (memb_join->failed_list_entries > 0) {
my_leave_memb_set(instance, failed_list[memb_join->failed_list_entries - 1 ].nodeid);
}
}
}
}
if (memb_set_equal (proc_list,
memb_join->proc_list_entries,
instance->my_proc_list,
instance->my_proc_list_entries) &&
memb_set_equal (failed_list,
memb_join->failed_list_entries,
instance->my_failed_list,
instance->my_failed_list_entries)) {
if (memb_join->header.nodeid != LEAVE_DUMMY_NODEID) {
memb_consensus_set (instance, &aligned_system_from);
}
if (memb_consensus_agreed (instance) && instance->failed_to_recv == 1) {
instance->failed_to_recv = 0;
instance->my_proc_list[0] = instance->my_id;
instance->my_proc_list_entries = 1;
instance->my_failed_list_entries = 0;
memb_state_commit_token_create (instance);
memb_state_commit_enter (instance);
return;
}
if (memb_consensus_agreed (instance) &&
memb_lowest_in_config (instance)) {
memb_state_commit_token_create (instance);
memb_state_commit_enter (instance);
} else {
goto out;
}
} else
if (memb_set_subset (proc_list,
memb_join->proc_list_entries,
instance->my_proc_list,
instance->my_proc_list_entries) &&
memb_set_subset (failed_list,
memb_join->failed_list_entries,
instance->my_failed_list,
instance->my_failed_list_entries)) {
goto out;
} else
if (memb_set_subset (&aligned_system_from, 1,
instance->my_failed_list, instance->my_failed_list_entries)) {
goto out;
} else {
memb_set_merge (proc_list,
memb_join->proc_list_entries,
instance->my_proc_list, &instance->my_proc_list_entries);
if (memb_set_subset (
&instance->my_id, 1,
failed_list, memb_join->failed_list_entries)) {
memb_set_merge (
&aligned_system_from, 1,
instance->my_failed_list, &instance->my_failed_list_entries);
} else {
if (memb_set_subset (
&aligned_system_from, 1,
instance->my_memb_list,
instance->my_memb_entries)) {
if (memb_set_subset (
&aligned_system_from, 1,
instance->my_failed_list,
instance->my_failed_list_entries) == 0) {
memb_set_merge (failed_list,
memb_join->failed_list_entries,
instance->my_failed_list, &instance->my_failed_list_entries);
} else {
memb_set_subtract (fail_minus_memb,
&fail_minus_memb_entries,
failed_list,
memb_join->failed_list_entries,
instance->my_memb_list,
instance->my_memb_entries);
memb_set_merge (fail_minus_memb,
fail_minus_memb_entries,
instance->my_failed_list,
&instance->my_failed_list_entries);
}
}
}
memb_state_gather_enter (instance, TOTEMSRP_GSFROM_MERGE_DURING_JOIN);
gather_entered = 1;
}
out:
if (gather_entered == 0 &&
instance->memb_state == MEMB_STATE_OPERATIONAL) {
memb_state_gather_enter (instance, TOTEMSRP_GSFROM_JOIN_DURING_OPERATIONAL_STATE);
}
}
static void memb_join_endian_convert (const struct memb_join *in, struct memb_join *out)
{
int i;
struct srp_addr *in_proc_list;
struct srp_addr *in_failed_list;
struct srp_addr *out_proc_list;
struct srp_addr *out_failed_list;
out->header.magic = TOTEM_MH_MAGIC;
out->header.version = TOTEM_MH_VERSION;
out->header.type = in->header.type;
out->header.nodeid = swab32 (in->header.nodeid);
out->system_from = srp_addr_endian_convert(in->system_from);
out->proc_list_entries = swab32 (in->proc_list_entries);
out->failed_list_entries = swab32 (in->failed_list_entries);
out->ring_seq = swab64 (in->ring_seq);
in_proc_list = (struct srp_addr *)in->end_of_memb_join;
in_failed_list = in_proc_list + out->proc_list_entries;
out_proc_list = (struct srp_addr *)out->end_of_memb_join;
out_failed_list = out_proc_list + out->proc_list_entries;
for (i = 0; i < out->proc_list_entries; i++) {
out_proc_list[i] = srp_addr_endian_convert (in_proc_list[i]);
}
for (i = 0; i < out->failed_list_entries; i++) {
out_failed_list[i] = srp_addr_endian_convert (in_failed_list[i]);
}
}
static void memb_commit_token_endian_convert (const struct memb_commit_token *in, struct memb_commit_token *out)
{
int i;
struct srp_addr *in_addr = (struct srp_addr *)in->end_of_commit_token;
struct srp_addr *out_addr = (struct srp_addr *)out->end_of_commit_token;
struct memb_commit_token_memb_entry *in_memb_list;
struct memb_commit_token_memb_entry *out_memb_list;
out->header.magic = TOTEM_MH_MAGIC;
out->header.version = TOTEM_MH_VERSION;
out->header.type = in->header.type;
out->header.nodeid = swab32 (in->header.nodeid);
out->token_seq = swab32 (in->token_seq);
out->ring_id.rep = swab32(in->ring_id.rep);
out->ring_id.seq = swab64 (in->ring_id.seq);
out->retrans_flg = swab32 (in->retrans_flg);
out->memb_index = swab32 (in->memb_index);
out->addr_entries = swab32 (in->addr_entries);
in_memb_list = (struct memb_commit_token_memb_entry *)(in_addr + out->addr_entries);
out_memb_list = (struct memb_commit_token_memb_entry *)(out_addr + out->addr_entries);
for (i = 0; i < out->addr_entries; i++) {
out_addr[i] = srp_addr_endian_convert (in_addr[i]);
/*
* Only convert the memb entry if it has been set
*/
if (in_memb_list[i].ring_id.rep != 0) {
out_memb_list[i].ring_id.rep = swab32(in_memb_list[i].ring_id.rep);
out_memb_list[i].ring_id.seq =
swab64 (in_memb_list[i].ring_id.seq);
out_memb_list[i].aru = swab32 (in_memb_list[i].aru);
out_memb_list[i].high_delivered = swab32 (in_memb_list[i].high_delivered);
out_memb_list[i].received_flg = swab32 (in_memb_list[i].received_flg);
}
}
}
static void orf_token_endian_convert (const struct orf_token *in, struct orf_token *out)
{
int i;
out->header.magic = TOTEM_MH_MAGIC;
out->header.version = TOTEM_MH_VERSION;
out->header.type = in->header.type;
out->header.nodeid = swab32 (in->header.nodeid);
out->seq = swab32 (in->seq);
out->token_seq = swab32 (in->token_seq);
out->aru = swab32 (in->aru);
out->ring_id.rep = swab32(in->ring_id.rep);
out->aru_addr = swab32(in->aru_addr);
out->ring_id.seq = swab64 (in->ring_id.seq);
out->fcc = swab32 (in->fcc);
out->backlog = swab32 (in->backlog);
out->retrans_flg = swab32 (in->retrans_flg);
out->rtr_list_entries = swab32 (in->rtr_list_entries);
for (i = 0; i < out->rtr_list_entries; i++) {
out->rtr_list[i].ring_id.rep = swab32(in->rtr_list[i].ring_id.rep);
out->rtr_list[i].ring_id.seq = swab64 (in->rtr_list[i].ring_id.seq);
out->rtr_list[i].seq = swab32 (in->rtr_list[i].seq);
}
}
static void mcast_endian_convert (const struct mcast *in, struct mcast *out)
{
out->header.magic = TOTEM_MH_MAGIC;
out->header.version = TOTEM_MH_VERSION;
out->header.type = in->header.type;
out->header.nodeid = swab32 (in->header.nodeid);
out->header.encapsulated = in->header.encapsulated;
out->seq = swab32 (in->seq);
out->this_seqno = swab32 (in->this_seqno);
out->ring_id.rep = swab32(in->ring_id.rep);
out->ring_id.seq = swab64 (in->ring_id.seq);
out->node_id = swab32 (in->node_id);
out->guarantee = swab32 (in->guarantee);
out->system_from = srp_addr_endian_convert(in->system_from);
}
static void memb_merge_detect_endian_convert (
const struct memb_merge_detect *in,
struct memb_merge_detect *out)
{
out->header.magic = TOTEM_MH_MAGIC;
out->header.version = TOTEM_MH_VERSION;
out->header.type = in->header.type;
out->header.nodeid = swab32 (in->header.nodeid);
out->ring_id.rep = swab32(in->ring_id.rep);
out->ring_id.seq = swab64 (in->ring_id.seq);
out->system_from = srp_addr_endian_convert (in->system_from);
}
static int ignore_join_under_operational (
struct totemsrp_instance *instance,
const struct memb_join *memb_join)
{
struct srp_addr *proc_list;
struct srp_addr *failed_list;
unsigned long long ring_seq;
struct srp_addr aligned_system_from;
proc_list = (struct srp_addr *)memb_join->end_of_memb_join;
failed_list = proc_list + memb_join->proc_list_entries;
ring_seq = memb_join->ring_seq;
aligned_system_from = memb_join->system_from;
if (memb_set_subset (&instance->my_id, 1,
failed_list, memb_join->failed_list_entries)) {
return (1);
}
/*
* In operational state, my_proc_list is exactly the same as
* my_memb_list.
*/
if ((memb_set_subset (&aligned_system_from, 1,
instance->my_memb_list, instance->my_memb_entries)) &&
(ring_seq < instance->my_ring_id.seq)) {
return (1);
}
return (0);
}
static int message_handler_memb_join (
struct totemsrp_instance *instance,
const void *msg,
size_t msg_len,
int endian_conversion_needed)
{
const struct memb_join *memb_join;
struct memb_join *memb_join_convert = alloca (msg_len);
struct srp_addr aligned_system_from;
if (check_memb_join_sanity(instance, msg, msg_len, endian_conversion_needed) == -1) {
return (0);
}
if (endian_conversion_needed) {
memb_join = memb_join_convert;
memb_join_endian_convert (msg, memb_join_convert);
} else {
memb_join = msg;
}
aligned_system_from = memb_join->system_from;
/*
* If the process paused because it wasn't scheduled in a timely
* fashion, flush the join messages because they may be queued
* entries
*/
if (pause_flush (instance)) {
return (0);
}
if (instance->token_ring_id_seq < memb_join->ring_seq) {
instance->token_ring_id_seq = memb_join->ring_seq;
}
switch (instance->memb_state) {
case MEMB_STATE_OPERATIONAL:
if (!ignore_join_under_operational (instance, memb_join)) {
memb_join_process (instance, memb_join);
}
break;
case MEMB_STATE_GATHER:
memb_join_process (instance, memb_join);
break;
case MEMB_STATE_COMMIT:
if (memb_set_subset (&aligned_system_from,
1,
instance->my_new_memb_list,
instance->my_new_memb_entries) &&
memb_join->ring_seq >= instance->my_ring_id.seq) {
memb_join_process (instance, memb_join);
memb_state_gather_enter (instance, TOTEMSRP_GSFROM_JOIN_DURING_COMMIT_STATE);
}
break;
case MEMB_STATE_RECOVERY:
if (memb_set_subset (&aligned_system_from,
1,
instance->my_new_memb_list,
instance->my_new_memb_entries) &&
memb_join->ring_seq >= instance->my_ring_id.seq) {
memb_join_process (instance, memb_join);
memb_recovery_state_token_loss (instance);
memb_state_gather_enter (instance, TOTEMSRP_GSFROM_JOIN_DURING_RECOVERY);
}
break;
}
return (0);
}
static int message_handler_memb_commit_token (
struct totemsrp_instance *instance,
const void *msg,
size_t msg_len,
int endian_conversion_needed)
{
struct memb_commit_token *memb_commit_token_convert = alloca (msg_len);
struct memb_commit_token *memb_commit_token;
struct srp_addr sub[PROCESSOR_COUNT_MAX];
int sub_entries;
struct srp_addr *addr;
log_printf (instance->totemsrp_log_level_debug,
"got commit token");
if (check_memb_commit_token_sanity(instance, msg, msg_len, endian_conversion_needed) == -1) {
return (0);
}
if (endian_conversion_needed) {
memb_commit_token_endian_convert (msg, memb_commit_token_convert);
} else {
memcpy (memb_commit_token_convert, msg, msg_len);
}
memb_commit_token = memb_commit_token_convert;
addr = (struct srp_addr *)memb_commit_token->end_of_commit_token;
#ifdef TEST_DROP_COMMIT_TOKEN_PERCENTAGE
if (random()%100 < TEST_DROP_COMMIT_TOKEN_PERCENTAGE) {
return (0);
}
#endif
switch (instance->memb_state) {
case MEMB_STATE_OPERATIONAL:
/* discard token */
break;
case MEMB_STATE_GATHER:
memb_set_subtract (sub, &sub_entries,
instance->my_proc_list, instance->my_proc_list_entries,
instance->my_failed_list, instance->my_failed_list_entries);
if (memb_set_equal (addr,
memb_commit_token->addr_entries,
sub,
sub_entries) &&
memb_commit_token->ring_id.seq > instance->my_ring_id.seq) {
memcpy (instance->commit_token, memb_commit_token, msg_len);
memb_state_commit_enter (instance);
}
break;
case MEMB_STATE_COMMIT:
/*
* If retransmitted commit tokens are sent on this ring
* filter them out and only enter recovery once the
* commit token has traversed the array. This is
* determined by :
* memb_commit_token->memb_index == memb_commit_token->addr_entries) {
*/
if (memb_commit_token->ring_id.seq == instance->my_ring_id.seq &&
memb_commit_token->memb_index == memb_commit_token->addr_entries) {
memb_state_recovery_enter (instance, memb_commit_token);
}
break;
case MEMB_STATE_RECOVERY:
if (instance->my_id.nodeid == instance->my_ring_id.rep) {
/* Filter out duplicated tokens */
if (instance->originated_orf_token) {
break;
}
instance->originated_orf_token = 1;
log_printf (instance->totemsrp_log_level_debug,
"Sending initial ORF token");
// TODO convert instead of initiate
orf_token_send_initial (instance);
reset_token_timeout (instance); // REVIEWED
reset_token_retransmit_timeout (instance); // REVIEWED
}
break;
}
return (0);
}
static int message_handler_token_hold_cancel (
struct totemsrp_instance *instance,
const void *msg,
size_t msg_len,
int endian_conversion_needed)
{
const struct token_hold_cancel *token_hold_cancel = msg;
if (check_token_hold_cancel_sanity(instance, msg, msg_len, endian_conversion_needed) == -1) {
return (0);
}
if (memcmp (&token_hold_cancel->ring_id, &instance->my_ring_id,
sizeof (struct memb_ring_id)) == 0) {
instance->my_seq_unchanged = 0;
if (instance->my_ring_id.rep == instance->my_id.nodeid) {
timer_function_token_retransmit_timeout (instance);
}
}
return (0);
}
static int check_message_header_validity(
void *context,
const void *msg,
unsigned int msg_len,
const struct sockaddr_storage *system_from)
{
struct totemsrp_instance *instance = context;
const struct totem_message_header *message_header = msg;
const char *guessed_str;
const char *msg_byte = msg;
if (msg_len < sizeof (struct totem_message_header)) {
log_printf (instance->totemsrp_log_level_security,
"Message received from %s is too short... Ignoring %u.",
totemip_sa_print((struct sockaddr *)system_from), (unsigned int)msg_len);
return (-1);
}
if (message_header->magic != TOTEM_MH_MAGIC &&
message_header->magic != swab16(TOTEM_MH_MAGIC)) {
/*
* We've received ether Knet, old version of Corosync,
* or something else. Do some guessing to display (hopefully)
* helpful message
*/
guessed_str = NULL;
if (message_header->magic == 0xFFFF) {
/*
* Corosync 2.2 used header with two UINT8_MAX
*/
guessed_str = "Corosync 2.2";
} else if (message_header->magic == 0xFEFE) {
/*
* Corosync 2.3+ used header with two UINT8_MAX - 1
*/
guessed_str = "Corosync 2.3+";
} else if (msg_byte[0] == 0x01) {
/*
* Knet has stable1 with first byte of message == 1
*/
guessed_str = "unencrypted Kronosnet";
} else if (msg_byte[0] >= 0 && msg_byte[0] <= 5) {
/*
* Unencrypted Corosync 1.x/OpenAIS has first byte
* 0-5. Collision with Knet (but still worth the try)
*/
guessed_str = "unencrypted Corosync 2.0/2.1/1.x/OpenAIS";
} else {
/*
* Encrypted Kronosned packet has a hash at the end of
* the packet and nothing specific at the beginning of the
* packet (just encrypted data).
* Encrypted Corosync 1.x/OpenAIS is quite similar but hash_digest
* is in the beginning of the packet.
*
* So it's not possible to reliably detect ether of them.
*/
guessed_str = "encrypted Kronosnet/Corosync 2.0/2.1/1.x/OpenAIS or unknown";
}
log_printf(instance->totemsrp_log_level_security,
"Message received from %s has bad magic number (probably sent by %s).. Ignoring",
totemip_sa_print((struct sockaddr *)system_from),
guessed_str);
return (-1);
}
if (message_header->version != TOTEM_MH_VERSION) {
log_printf(instance->totemsrp_log_level_security,
"Message received from %s has unsupported version %u... Ignoring",
totemip_sa_print((struct sockaddr *)system_from),
message_header->version);
return (-1);
}
return (0);
}
void main_deliver_fn (
void *context,
const void *msg,
unsigned int msg_len,
const struct sockaddr_storage *system_from)
{
struct totemsrp_instance *instance = context;
const struct totem_message_header *message_header = msg;
if (check_message_header_validity(context, msg, msg_len, system_from) == -1) {
return ;
}
switch (message_header->type) {
case MESSAGE_TYPE_ORF_TOKEN:
instance->stats.orf_token_rx++;
break;
case MESSAGE_TYPE_MCAST:
instance->stats.mcast_rx++;
break;
case MESSAGE_TYPE_MEMB_MERGE_DETECT:
instance->stats.memb_merge_detect_rx++;
break;
case MESSAGE_TYPE_MEMB_JOIN:
instance->stats.memb_join_rx++;
break;
case MESSAGE_TYPE_MEMB_COMMIT_TOKEN:
instance->stats.memb_commit_token_rx++;
break;
case MESSAGE_TYPE_TOKEN_HOLD_CANCEL:
instance->stats.token_hold_cancel_rx++;
break;
default:
log_printf (instance->totemsrp_log_level_security,
"Message received from %s has wrong type... ignoring %d.\n",
totemip_sa_print((struct sockaddr *)system_from),
(int)message_header->type);
instance->stats.rx_msg_dropped++;
return;
}
/*
* Handle incoming message
*/
totemsrp_message_handlers.handler_functions[(int)message_header->type] (
instance,
msg,
msg_len,
message_header->magic != TOTEM_MH_MAGIC);
}
int totemsrp_iface_set (
void *context,
const struct totem_ip_address *interface_addr,
unsigned short ip_port,
unsigned int iface_no)
{
struct totemsrp_instance *instance = context;
int res;
totemip_copy(&instance->my_addrs[iface_no], interface_addr);
res = totemnet_iface_set (
instance->totemnet_context,
interface_addr,
ip_port,
iface_no);
return (res);
}
/* Contrary to its name, this only gets called when the interface is enabled */
void main_iface_change_fn (
void *context,
const struct totem_ip_address *iface_addr,
unsigned int iface_no)
{
struct totemsrp_instance *instance = context;
int num_interfaces;
int i;
if (!instance->my_id.nodeid) {
instance->my_id.nodeid = iface_addr->nodeid;
}
totemip_copy (&instance->my_addrs[iface_no], iface_addr);
if (instance->iface_changes++ == 0) {
instance->memb_ring_id_create_or_load (&instance->my_ring_id, instance->my_id.nodeid);
/*
* Increase the ring_id sequence number. This doesn't follow specification.
* Solves problem with restarted leader node (node with lowest nodeid) before
* rest of the cluster forms new membership and guarantees unique ring_id for
* new singleton configuration.
*/
instance->my_ring_id.seq++;
instance->token_ring_id_seq = instance->my_ring_id.seq;
log_printf (
instance->totemsrp_log_level_debug,
"Created or loaded sequence id " CS_PRI_RING_ID " for this ring.",
instance->my_ring_id.rep,
(uint64_t)instance->my_ring_id.seq);
if (instance->totemsrp_service_ready_fn) {
instance->totemsrp_service_ready_fn ();
}
}
num_interfaces = 0;
for (i = 0; i < INTERFACE_MAX; i++) {
if (instance->totem_config->interfaces[i].configured) {
num_interfaces++;
}
}
if (instance->iface_changes >= num_interfaces) {
/* We need to clear orig_interfaces so that 'commit' diffs against nothing */
instance->totem_config->orig_interfaces = malloc (sizeof (struct totem_interface) * INTERFACE_MAX);
assert(instance->totem_config->orig_interfaces != NULL);
memset(instance->totem_config->orig_interfaces, 0, sizeof (struct totem_interface) * INTERFACE_MAX);
totemconfig_commit_new_params(instance->totem_config, icmap_get_global_map());
memb_state_gather_enter (instance, TOTEMSRP_GSFROM_INTERFACE_CHANGE);
free(instance->totem_config->orig_interfaces);
}
}
void totemsrp_net_mtu_adjust (struct totem_config *totem_config) {
totem_config->net_mtu -= 2 * sizeof (struct mcast);
}
void totemsrp_service_ready_register (
void *context,
void (*totem_service_ready) (void))
{
struct totemsrp_instance *instance = (struct totemsrp_instance *)context;
instance->totemsrp_service_ready_fn = totem_service_ready;
}
int totemsrp_member_add (
void *context,
const struct totem_ip_address *member,
int iface_no)
{
struct totemsrp_instance *instance = (struct totemsrp_instance *)context;
int res;
res = totemnet_member_add (instance->totemnet_context, &instance->my_addrs[iface_no], member, iface_no);
return (res);
}
int totemsrp_member_remove (
void *context,
const struct totem_ip_address *member,
int iface_no)
{
struct totemsrp_instance *instance = (struct totemsrp_instance *)context;
int res;
res = totemnet_member_remove (instance->totemnet_context, member, iface_no);
return (res);
}
void totemsrp_threaded_mode_enable (void *context)
{
struct totemsrp_instance *instance = (struct totemsrp_instance *)context;
instance->threaded_mode_enabled = 1;
}
void totemsrp_trans_ack (void *context)
{
struct totemsrp_instance *instance = (struct totemsrp_instance *)context;
instance->waiting_trans_ack = 0;
instance->totemsrp_waiting_trans_ack_cb_fn (0);
}
int totemsrp_reconfigure (void *context, struct totem_config *totem_config)
{
struct totemsrp_instance *instance = (struct totemsrp_instance *)context;
int res;
res = totemnet_reconfigure (instance->totemnet_context, totem_config);
return (res);
}
int totemsrp_crypto_reconfigure_phase (void *context, struct totem_config *totem_config, cfg_message_crypto_reconfig_phase_t phase)
{
struct totemsrp_instance *instance = (struct totemsrp_instance *)context;
int res;
res = totemnet_crypto_reconfigure_phase (instance->totemnet_context, totem_config, phase);
return (res);
}
void totemsrp_stats_clear (void *context, int flags)
{
struct totemsrp_instance *instance = (struct totemsrp_instance *)context;
memset(&instance->stats, 0, sizeof(totemsrp_stats_t));
if (flags & TOTEMPG_STATS_CLEAR_TRANSPORT) {
totemnet_stats_clear (instance->totemnet_context);
}
}
void totemsrp_force_gather (void *context)
{
timer_function_orf_token_timeout(context);
}
diff --git a/exec/totemsrp.h b/exec/totemsrp.h
index c8c1c45c..49e00955 100644
--- a/exec/totemsrp.h
+++ b/exec/totemsrp.h
@@ -1,165 +1,168 @@
/*
* Copyright (c) 2003-2005 MontaVista Software, Inc.
* Copyright (c) 2006-2011 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Steven Dake (sdake@redhat.com)
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
/**
* @file
* Totem Single Ring Protocol
*
* depends on poll abstraction, POSIX, IPV4
*/
#ifndef TOTEMSRP_H_DEFINED
#define TOTEMSRP_H_DEFINED
#include <corosync/totem/totem.h>
#include <qb/qbloop.h>
/**
* Create a protocol instance
*/
int totemsrp_initialize (
qb_loop_t *poll_handle,
void **srp_context,
struct totem_config *totem_config,
totempg_stats_t *stats,
void (*deliver_fn) (
unsigned int nodeid,
const void *msg,
unsigned int msg_len,
int endian_conversion_required),
void (*confchg_fn) (
enum totem_configuration_type configuration_type,
const unsigned int *member_list, size_t member_list_entries,
const unsigned int *left_list, size_t left_list_entries,
const unsigned int *joined_list, size_t joined_list_entries,
const struct memb_ring_id *ring_id),
void (*waiting_trans_ack_cb_fn) (
int waiting_trans_ack));
void totemsrp_finalize (void *srp_context);
/**
* Multicast a message
*/
int totemsrp_mcast (
void *srp_context,
struct iovec *iovec,
unsigned int iov_len,
int priority);
/**
* Return number of available messages that can be queued
*/
int totemsrp_avail (void *srp_context);
int totemsrp_callback_token_create (
void *srp_context,
void **handle_out,
enum totem_callback_token_type type,
int delete,
int (*callback_fn) (enum totem_callback_token_type type, const void *),
const void *data);
void totemsrp_callback_token_destroy (
void *srp_context,
void **handle_out);
void totemsrp_event_signal (void *srp_context, enum totem_event_type type, int value);
extern void totemsrp_net_mtu_adjust (struct totem_config *totem_config);
+extern int totemsrp_nodestatus_get (void *srp_context, unsigned int nodeid,
+ struct totem_node_status *node_status);
+
extern int totemsrp_ifaces_get (
void *srp_context,
unsigned int nodeid,
unsigned int *interface_id,
struct totem_ip_address *interfaces,
unsigned int interfaces_size,
char ***status,
unsigned int *iface_count);
extern unsigned int totemsrp_my_nodeid_get (
void *srp_context);
extern int totemsrp_my_family_get (
void *srp_context);
extern int totemsrp_crypto_set (
void *srp_context,
const char *cipher_type,
const char *hash_type);
void totemsrp_service_ready_register (
void *srp_context,
void (*totem_service_ready) (void));
extern int totemsrp_iface_set (
void *srp_context,
const struct totem_ip_address *interface_addr,
unsigned short ip_port,
unsigned int iface_no);
extern int totemsrp_member_add (
void *srp_context,
const struct totem_ip_address *member,
int ring_no);
extern int totemsrp_member_remove (
void *srp_context,
const struct totem_ip_address *member,
int ring_no);
void totemsrp_threaded_mode_enable (
void *srp_context);
void totemsrp_trans_ack (
void *srp_context);
int totemsrp_reconfigure (
void *context,
struct totem_config *totem_config);
int totemsrp_crypto_reconfigure_phase (
void *context,
struct totem_config *totem_config,
cfg_message_crypto_reconfig_phase_t phase);
void totemsrp_stats_clear (
void *srp_context, int flags);
void totemsrp_force_gather (
void *context);
#endif /* TOTEMSRP_H_DEFINED */
diff --git a/exec/totemudp.c b/exec/totemudp.c
index 749fc7e8..fd3215b5 100644
--- a/exec/totemudp.c
+++ b/exec/totemudp.c
@@ -1,1520 +1,1549 @@
/*
* Copyright (c) 2005 MontaVista Software, Inc.
* Copyright (c) 2006-2018 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Steven Dake (sdake@redhat.com)
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <config.h>
#include <assert.h>
#include <pthread.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/socket.h>
#include <netdb.h>
#include <sys/un.h>
#include <sys/ioctl.h>
#include <sys/param.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <unistd.h>
#include <fcntl.h>
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
#include <sched.h>
#include <time.h>
#include <sys/time.h>
#include <sys/poll.h>
#include <sys/uio.h>
#include <limits.h>
#include <corosync/sq.h>
#include <corosync/swab.h>
#include <qb/qbdefs.h>
#include <qb/qbloop.h>
#define LOGSYS_UTILS_ONLY 1
#include <corosync/logsys.h>
#include "totemudp.h"
#include "util.h"
#ifndef MSG_NOSIGNAL
#define MSG_NOSIGNAL 0
#endif
#define MCAST_SOCKET_BUFFER_SIZE (TRANSMITS_ALLOWED * FRAME_SIZE_MAX)
#define NETIF_STATE_REPORT_UP 1
#define NETIF_STATE_REPORT_DOWN 2
#define BIND_STATE_UNBOUND 0
#define BIND_STATE_REGULAR 1
#define BIND_STATE_LOOPBACK 2
struct totemudp_member {
struct qb_list_head list;
struct totem_ip_address member;
};
struct totemudp_socket {
int mcast_recv;
int mcast_send;
int token;
/*
* Socket used for local multicast delivery. We don't rely on multicast
* loop and rather this UNIX DGRAM socket is used. Socket is created by
* socketpair call and they are used in same way as pipe (so [0] is read
* end and [1] is write end)
*/
int local_mcast_loop[2];
};
struct totemudp_instance {
qb_loop_t *totemudp_poll_handle;
struct totem_interface *totem_interface;
int netif_state_report;
int netif_bind_state;
void *context;
void (*totemudp_deliver_fn) (
void *context,
const void *msg,
unsigned int msg_len,
const struct sockaddr_storage *system_from);
void (*totemudp_iface_change_fn) (
void *context,
const struct totem_ip_address *iface_address,
unsigned int ring_no);
void (*totemudp_target_set_completed) (void *context);
/*
* Function and data used to log messages
*/
int totemudp_log_level_security;
int totemudp_log_level_error;
int totemudp_log_level_warning;
int totemudp_log_level_notice;
int totemudp_log_level_debug;
int totemudp_subsys_id;
void (*totemudp_log_printf) (
int level,
int subsys,
const char *function,
const char *file,
int line,
const char *format,
...)__attribute__((format(printf, 6, 7)));
void *udp_context;
struct qb_list_head member_list;
char iov_buffer[UDP_RECEIVE_FRAME_SIZE_MAX];
char iov_buffer_flush[UDP_RECEIVE_FRAME_SIZE_MAX];
struct iovec totemudp_iov_recv;
struct iovec totemudp_iov_recv_flush;
struct totemudp_socket totemudp_sockets;
struct totem_ip_address mcast_address;
int stats_sent;
int stats_recv;
int stats_delv;
int stats_remcasts;
int stats_orf_token;
struct timeval stats_tv_start;
struct totem_ip_address my_id;
int firstrun;
qb_loop_timer_handle timer_netif_check_timeout;
unsigned int my_memb_entries;
int flushing;
struct totem_config *totem_config;
totemsrp_stats_t *stats;
struct totem_ip_address token_target;
};
struct work_item {
const void *msg;
unsigned int msg_len;
struct totemudp_instance *instance;
};
static int totemudp_build_sockets (
struct totemudp_instance *instance,
struct totem_ip_address *bindnet_address,
struct totem_ip_address *mcastaddress,
struct totemudp_socket *sockets,
struct totem_ip_address *bound_to);
static struct totem_ip_address localhost;
static void totemudp_instance_initialize (struct totemudp_instance *instance)
{
memset (instance, 0, sizeof (struct totemudp_instance));
instance->netif_state_report = NETIF_STATE_REPORT_UP | NETIF_STATE_REPORT_DOWN;
instance->totemudp_iov_recv.iov_base = instance->iov_buffer;
instance->totemudp_iov_recv.iov_len = UDP_RECEIVE_FRAME_SIZE_MAX; //sizeof (instance->iov_buffer);
instance->totemudp_iov_recv_flush.iov_base = instance->iov_buffer_flush;
instance->totemudp_iov_recv_flush.iov_len = UDP_RECEIVE_FRAME_SIZE_MAX; //sizeof (instance->iov_buffer);
/*
* There is always atleast 1 processor
*/
instance->my_memb_entries = 1;
qb_list_init (&instance->member_list);
}
#define log_printf(level, format, args...) \
do { \
instance->totemudp_log_printf ( \
level, instance->totemudp_subsys_id, \
__FUNCTION__, __FILE__, __LINE__, \
(const char *)format, ##args); \
} while (0);
#define LOGSYS_PERROR(err_num, level, fmt, args...) \
do { \
char _error_str[LOGSYS_MAX_PERROR_MSG_LEN]; \
const char *_error_ptr = qb_strerror_r(err_num, _error_str, sizeof(_error_str)); \
instance->totemudp_log_printf ( \
level, instance->totemudp_subsys_id, \
__FUNCTION__, __FILE__, __LINE__, \
fmt ": %s (%d)\n", ##args, _error_ptr, err_num); \
} while(0)
int totemudp_crypto_set (
void *udp_context,
const char *cipher_type,
const char *hash_type)
{
return (0);
}
static inline void ucast_sendmsg (
struct totemudp_instance *instance,
struct totem_ip_address *system_to,
const void *msg,
unsigned int msg_len)
{
struct msghdr msg_ucast;
int res = 0;
struct sockaddr_storage sockaddr;
struct iovec iovec;
int addrlen;
iovec.iov_base = (void*)msg;
iovec.iov_len = msg_len;
/*
* Build unicast message
*/
memset(&msg_ucast, 0, sizeof(msg_ucast));
totemip_totemip_to_sockaddr_convert(system_to,
instance->totem_interface->ip_port, &sockaddr, &addrlen);
msg_ucast.msg_name = &sockaddr;
msg_ucast.msg_namelen = addrlen;
msg_ucast.msg_iov = (void *)&iovec;
msg_ucast.msg_iovlen = 1;
#ifdef HAVE_MSGHDR_CONTROL
msg_ucast.msg_control = 0;
#endif
#ifdef HAVE_MSGHDR_CONTROLLEN
msg_ucast.msg_controllen = 0;
#endif
#ifdef HAVE_MSGHDR_FLAGS
msg_ucast.msg_flags = 0;
#endif
#ifdef HAVE_MSGHDR_ACCRIGHTS
msg_ucast.msg_accrights = NULL;
#endif
#ifdef HAVE_MSGHDR_ACCRIGHTSLEN
msg_ucast.msg_accrightslen = 0;
#endif
/*
* Transmit unicast message
* An error here is recovered by totemsrp
*/
res = sendmsg (instance->totemudp_sockets.mcast_send, &msg_ucast,
MSG_NOSIGNAL);
if (res < 0) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_debug,
"sendmsg(ucast) failed (non-critical)");
}
}
static inline void mcast_sendmsg (
struct totemudp_instance *instance,
const void *msg,
unsigned int msg_len)
{
struct msghdr msg_mcast;
int res = 0;
struct iovec iovec;
struct sockaddr_storage sockaddr;
int addrlen;
iovec.iov_base = (void *)msg;
iovec.iov_len = msg_len;
/*
* Build multicast message
*/
totemip_totemip_to_sockaddr_convert(&instance->mcast_address,
instance->totem_interface->ip_port, &sockaddr, &addrlen);
memset(&msg_mcast, 0, sizeof(msg_mcast));
msg_mcast.msg_name = &sockaddr;
msg_mcast.msg_namelen = addrlen;
msg_mcast.msg_iov = (void *)&iovec;
msg_mcast.msg_iovlen = 1;
#ifdef HAVE_MSGHDR_CONTROL
msg_mcast.msg_control = 0;
#endif
#ifdef HAVE_MSGHDR_CONTROLLEN
msg_mcast.msg_controllen = 0;
#endif
#ifdef HAVE_MSGHDR_FLAGS
msg_mcast.msg_flags = 0;
#endif
#ifdef HAVE_MSGHDR_ACCRIGHTS
msg_mcast.msg_accrights = NULL;
#endif
#ifdef HAVE_MSGHDR_ACCRIGHTSLEN
msg_mcast.msg_accrightslen = 0;
#endif
/*
* Transmit multicast message
* An error here is recovered by totemsrp
*/
res = sendmsg (instance->totemudp_sockets.mcast_send, &msg_mcast,
MSG_NOSIGNAL);
if (res < 0) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_debug,
"sendmsg(mcast) failed (non-critical)");
instance->stats->continuous_sendmsg_failures++;
} else {
instance->stats->continuous_sendmsg_failures = 0;
}
/*
* Transmit multicast message to local unix mcast loop
* An error here is recovered by totemsrp
*/
msg_mcast.msg_name = NULL;
msg_mcast.msg_namelen = 0;
res = sendmsg (instance->totemudp_sockets.local_mcast_loop[1], &msg_mcast,
MSG_NOSIGNAL);
if (res < 0) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_debug,
"sendmsg(local mcast loop) failed (non-critical)");
}
}
int totemudp_finalize (
void *udp_context)
{
struct totemudp_instance *instance = (struct totemudp_instance *)udp_context;
int res = 0;
if (instance->totemudp_sockets.mcast_recv > 0) {
qb_loop_poll_del (instance->totemudp_poll_handle,
instance->totemudp_sockets.mcast_recv);
close (instance->totemudp_sockets.mcast_recv);
}
if (instance->totemudp_sockets.mcast_send > 0) {
close (instance->totemudp_sockets.mcast_send);
}
if (instance->totemudp_sockets.local_mcast_loop[0] > 0) {
qb_loop_poll_del (instance->totemudp_poll_handle,
instance->totemudp_sockets.local_mcast_loop[0]);
close (instance->totemudp_sockets.local_mcast_loop[0]);
close (instance->totemudp_sockets.local_mcast_loop[1]);
}
if (instance->totemudp_sockets.token > 0) {
qb_loop_poll_del (instance->totemudp_poll_handle,
instance->totemudp_sockets.token);
close (instance->totemudp_sockets.token);
}
return (res);
}
/*
* Only designed to work with a message with one iov
*/
static int net_deliver_fn (
int fd,
int revents,
void *data)
{
struct totemudp_instance *instance = (struct totemudp_instance *)data;
struct msghdr msg_recv;
struct iovec *iovec;
struct sockaddr_storage system_from;
int bytes_received;
int truncated_packet;
if (instance->flushing == 1) {
iovec = &instance->totemudp_iov_recv_flush;
} else {
iovec = &instance->totemudp_iov_recv;
}
/*
* Receive datagram
*/
msg_recv.msg_name = &system_from;
msg_recv.msg_namelen = sizeof (struct sockaddr_storage);
msg_recv.msg_iov = iovec;
msg_recv.msg_iovlen = 1;
#ifdef HAVE_MSGHDR_CONTROL
msg_recv.msg_control = 0;
#endif
#ifdef HAVE_MSGHDR_CONTROLLEN
msg_recv.msg_controllen = 0;
#endif
#ifdef HAVE_MSGHDR_FLAGS
msg_recv.msg_flags = 0;
#endif
#ifdef HAVE_MSGHDR_ACCRIGHTS
msg_recv.msg_accrights = NULL;
#endif
#ifdef HAVE_MSGHDR_ACCRIGHTSLEN
msg_recv.msg_accrightslen = 0;
#endif
bytes_received = recvmsg (fd, &msg_recv, MSG_NOSIGNAL | MSG_DONTWAIT);
if (bytes_received == -1) {
return (0);
} else {
instance->stats_recv += bytes_received;
}
truncated_packet = 0;
#ifdef HAVE_MSGHDR_FLAGS
if (msg_recv.msg_flags & MSG_TRUNC) {
truncated_packet = 1;
}
#else
/*
* We don't have MSGHDR_FLAGS, but we can (hopefully) safely make assumption that
* if bytes_received == UDP_RECIEVE_FRAME_SIZE_MAX then packet is truncated
*/
if (bytes_received == UDP_RECEIVE_FRAME_SIZE_MAX) {
truncated_packet = 1;
}
#endif
if (truncated_packet) {
log_printf (instance->totemudp_log_level_error,
"Received too big message. This may be because something bad is happening"
"on the network (attack?), or you tried join more nodes than corosync is"
"compiled with (%u) or bug in the code (bad estimation of "
"the UDP_RECEIVE_FRAME_SIZE_MAX). Dropping packet.", PROCESSOR_COUNT_MAX);
return (0);
}
iovec->iov_len = bytes_received;
/*
* Handle incoming message
*/
instance->totemudp_deliver_fn (
instance->context,
iovec->iov_base,
iovec->iov_len,
&system_from);
iovec->iov_len = UDP_RECEIVE_FRAME_SIZE_MAX;
return (0);
}
static int netif_determine (
struct totemudp_instance *instance,
struct totem_ip_address *bindnet,
struct totem_ip_address *bound_to,
int *interface_up,
int *interface_num)
{
int res;
res = totemip_iface_check (bindnet, bound_to,
interface_up, interface_num,
instance->totem_config->clear_node_high_bit);
return (res);
}
/*
* If the interface is up, the sockets for totem are built. If the interface is down
* this function is requeued in the timer list to retry building the sockets later.
*/
static void timer_function_netif_check_timeout (
void *data)
{
struct totemudp_instance *instance = (struct totemudp_instance *)data;
int interface_up;
int interface_num;
struct totem_ip_address *bind_address;
/*
* Build sockets for every interface
*/
netif_determine (instance,
&instance->totem_interface->bindnet,
&instance->totem_interface->boundto,
&interface_up, &interface_num);
/*
* If the network interface isn't back up and we are already
* in loopback mode, add timer to check again and return
*/
if ((instance->netif_bind_state == BIND_STATE_LOOPBACK &&
interface_up == 0) ||
(instance->my_memb_entries == 1 &&
instance->netif_bind_state == BIND_STATE_REGULAR &&
interface_up == 1)) {
qb_loop_timer_add (instance->totemudp_poll_handle,
QB_LOOP_MED,
instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC,
(void *)instance,
timer_function_netif_check_timeout,
&instance->timer_netif_check_timeout);
/*
* Add a timer to check for a downed regular interface
*/
return;
}
if (instance->totemudp_sockets.mcast_recv > 0) {
qb_loop_poll_del (instance->totemudp_poll_handle,
instance->totemudp_sockets.mcast_recv);
close (instance->totemudp_sockets.mcast_recv);
}
if (instance->totemudp_sockets.mcast_send > 0) {
close (instance->totemudp_sockets.mcast_send);
}
if (instance->totemudp_sockets.local_mcast_loop[0] > 0) {
qb_loop_poll_del (instance->totemudp_poll_handle,
instance->totemudp_sockets.local_mcast_loop[0]);
close (instance->totemudp_sockets.local_mcast_loop[0]);
close (instance->totemudp_sockets.local_mcast_loop[1]);
}
if (instance->totemudp_sockets.token > 0) {
qb_loop_poll_del (instance->totemudp_poll_handle,
instance->totemudp_sockets.token);
close (instance->totemudp_sockets.token);
}
if (interface_up == 0) {
/*
* Interface is not up
*/
instance->netif_bind_state = BIND_STATE_LOOPBACK;
bind_address = &localhost;
/*
* Add a timer to retry building interfaces and request memb_gather_enter
*/
qb_loop_timer_add (instance->totemudp_poll_handle,
QB_LOOP_MED,
instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC,
(void *)instance,
timer_function_netif_check_timeout,
&instance->timer_netif_check_timeout);
} else {
/*
* Interface is up
*/
instance->netif_bind_state = BIND_STATE_REGULAR;
bind_address = &instance->totem_interface->bindnet;
}
/*
* Create and bind the multicast and unicast sockets
*/
(void)totemudp_build_sockets (instance,
&instance->mcast_address,
bind_address,
&instance->totemudp_sockets,
&instance->totem_interface->boundto);
qb_loop_poll_add (
instance->totemudp_poll_handle,
QB_LOOP_MED,
instance->totemudp_sockets.mcast_recv,
POLLIN, instance, net_deliver_fn);
qb_loop_poll_add (
instance->totemudp_poll_handle,
QB_LOOP_MED,
instance->totemudp_sockets.local_mcast_loop[0],
POLLIN, instance, net_deliver_fn);
qb_loop_poll_add (
instance->totemudp_poll_handle,
QB_LOOP_MED,
instance->totemudp_sockets.token,
POLLIN, instance, net_deliver_fn);
totemip_copy (&instance->my_id, &instance->totem_interface->boundto);
/*
* This reports changes in the interface to the user and totemsrp
*/
if (instance->netif_bind_state == BIND_STATE_REGULAR) {
if (instance->netif_state_report & NETIF_STATE_REPORT_UP) {
log_printf (instance->totemudp_log_level_notice,
"The network interface [%s] is now up.",
totemip_print (&instance->totem_interface->boundto));
instance->netif_state_report = NETIF_STATE_REPORT_DOWN;
instance->totemudp_iface_change_fn (instance->context, &instance->my_id, 0);
}
/*
* Add a timer to check for interface going down in single membership
*/
if (instance->my_memb_entries == 1) {
qb_loop_timer_add (instance->totemudp_poll_handle,
QB_LOOP_MED,
instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC,
(void *)instance,
timer_function_netif_check_timeout,
&instance->timer_netif_check_timeout);
}
} else {
if (instance->netif_state_report & NETIF_STATE_REPORT_DOWN) {
log_printf (instance->totemudp_log_level_notice,
"The network interface is down.");
instance->totemudp_iface_change_fn (instance->context, &instance->my_id, 0);
}
instance->netif_state_report = NETIF_STATE_REPORT_UP;
}
}
/* Set the socket priority to INTERACTIVE to ensure
that our messages don't get queued behind anything else */
static void totemudp_traffic_control_set(struct totemudp_instance *instance, int sock)
{
#ifdef SO_PRIORITY
int prio = 6; /* TC_PRIO_INTERACTIVE */
if (setsockopt(sock, SOL_SOCKET, SO_PRIORITY, &prio, sizeof(int))) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "Could not set traffic priority");
}
#endif
}
static int totemudp_build_sockets_ip (
struct totemudp_instance *instance,
struct totem_ip_address *mcast_address,
struct totem_ip_address *bindnet_address,
struct totemudp_socket *sockets,
struct totem_ip_address *bound_to,
int interface_num)
{
struct sockaddr_storage sockaddr;
struct ipv6_mreq mreq6;
struct ip_mreq mreq;
struct sockaddr_storage mcast_ss, boundto_ss;
struct sockaddr_in6 *mcast_sin6 = (struct sockaddr_in6 *)&mcast_ss;
struct sockaddr_in *mcast_sin = (struct sockaddr_in *)&mcast_ss;
struct sockaddr_in *boundto_sin = (struct sockaddr_in *)&boundto_ss;
unsigned int sendbuf_size;
unsigned int recvbuf_size;
unsigned int optlen = sizeof (sendbuf_size);
unsigned int retries;
int addrlen;
int res;
int flag;
uint8_t sflag;
int i;
/*
* Create multicast recv socket
*/
sockets->mcast_recv = socket (bindnet_address->family, SOCK_DGRAM, 0);
if (sockets->mcast_recv == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"socket() failed");
return (-1);
}
totemip_nosigpipe (sockets->mcast_recv);
res = fcntl (sockets->mcast_recv, F_SETFL, O_NONBLOCK);
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"Could not set non-blocking operation on multicast socket");
return (-1);
}
/*
* Force reuse
*/
flag = 1;
if ( setsockopt(sockets->mcast_recv, SOL_SOCKET, SO_REUSEADDR, (char *)&flag, sizeof (flag)) < 0) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"setsockopt(SO_REUSEADDR) failed");
return (-1);
}
/*
* Create local multicast loop socket
*/
if (socketpair(AF_UNIX, SOCK_DGRAM, 0, sockets->local_mcast_loop) == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"socket() failed");
return (-1);
}
for (i = 0; i < 2; i++) {
totemip_nosigpipe (sockets->local_mcast_loop[i]);
res = fcntl (sockets->local_mcast_loop[i], F_SETFL, O_NONBLOCK);
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"Could not set non-blocking operation on multicast socket");
return (-1);
}
}
/*
* Setup mcast send socket
*/
sockets->mcast_send = socket (bindnet_address->family, SOCK_DGRAM, 0);
if (sockets->mcast_send == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"socket() failed");
return (-1);
}
totemip_nosigpipe (sockets->mcast_send);
res = fcntl (sockets->mcast_send, F_SETFL, O_NONBLOCK);
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"Could not set non-blocking operation on multicast socket");
return (-1);
}
/*
* Force reuse
*/
flag = 1;
if ( setsockopt(sockets->mcast_send, SOL_SOCKET, SO_REUSEADDR, (char *)&flag, sizeof (flag)) < 0) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"setsockopt(SO_REUSEADDR) failed");
return (-1);
}
totemip_totemip_to_sockaddr_convert(bound_to, instance->totem_interface->ip_port - 1,
&sockaddr, &addrlen);
retries = 0;
while (1) {
res = bind (sockets->mcast_send, (struct sockaddr *)&sockaddr, addrlen);
if (res == 0) {
break;
}
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"Unable to bind the socket to send multicast packets");
if (++retries > BIND_MAX_RETRIES) {
break;
}
/*
* Wait for a while
*/
(void)poll(NULL, 0, BIND_RETRIES_INTERVAL * retries);
}
if (res == -1) {
return (-1);
}
/*
* Setup unicast socket
*/
sockets->token = socket (bindnet_address->family, SOCK_DGRAM, 0);
if (sockets->token == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"socket() failed");
return (-1);
}
totemip_nosigpipe (sockets->token);
res = fcntl (sockets->token, F_SETFL, O_NONBLOCK);
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"Could not set non-blocking operation on token socket");
return (-1);
}
/*
* Force reuse
*/
flag = 1;
if ( setsockopt(sockets->token, SOL_SOCKET, SO_REUSEADDR, (char *)&flag, sizeof (flag)) < 0) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"setsockopt(SO_REUSEADDR) failed");
return (-1);
}
/*
* Bind to unicast socket used for token send/receives
* This has the side effect of binding to the correct interface
*/
totemip_totemip_to_sockaddr_convert(bound_to, instance->totem_interface->ip_port, &sockaddr, &addrlen);
retries = 0;
while (1) {
res = bind (sockets->token, (struct sockaddr *)&sockaddr, addrlen);
if (res == 0) {
break;
}
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"Unable to bind UDP unicast socket");
if (++retries > BIND_MAX_RETRIES) {
break;
}
/*
* Wait for a while
*/
(void)poll(NULL, 0, BIND_RETRIES_INTERVAL * retries);
}
if (res == -1) {
return (-1);
}
recvbuf_size = MCAST_SOCKET_BUFFER_SIZE;
sendbuf_size = MCAST_SOCKET_BUFFER_SIZE;
/*
* Set buffer sizes to avoid overruns
*/
res = setsockopt (sockets->mcast_recv, SOL_SOCKET, SO_RCVBUF, &recvbuf_size, optlen);
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_debug,
"Unable to set SO_RCVBUF size on UDP mcast socket");
return (-1);
}
res = setsockopt (sockets->mcast_send, SOL_SOCKET, SO_SNDBUF, &sendbuf_size, optlen);
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_debug,
"Unable to set SO_SNDBUF size on UDP mcast socket");
return (-1);
}
res = setsockopt (sockets->local_mcast_loop[0], SOL_SOCKET, SO_RCVBUF, &recvbuf_size, optlen);
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_debug,
"Unable to set SO_RCVBUF size on UDP local mcast loop socket");
return (-1);
}
res = setsockopt (sockets->local_mcast_loop[1], SOL_SOCKET, SO_SNDBUF, &sendbuf_size, optlen);
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_debug,
"Unable to set SO_SNDBUF size on UDP local mcast loop socket");
return (-1);
}
res = getsockopt (sockets->mcast_recv, SOL_SOCKET, SO_RCVBUF, &recvbuf_size, &optlen);
if (res == 0) {
log_printf (instance->totemudp_log_level_debug,
"Receive multicast socket recv buffer size (%d bytes).", recvbuf_size);
}
res = getsockopt (sockets->mcast_send, SOL_SOCKET, SO_SNDBUF, &sendbuf_size, &optlen);
if (res == 0) {
log_printf (instance->totemudp_log_level_debug,
"Transmit multicast socket send buffer size (%d bytes).", sendbuf_size);
}
res = getsockopt (sockets->local_mcast_loop[0], SOL_SOCKET, SO_RCVBUF, &recvbuf_size, &optlen);
if (res == 0) {
log_printf (instance->totemudp_log_level_debug,
"Local receive multicast loop socket recv buffer size (%d bytes).", recvbuf_size);
}
res = getsockopt (sockets->local_mcast_loop[1], SOL_SOCKET, SO_SNDBUF, &sendbuf_size, &optlen);
if (res == 0) {
log_printf (instance->totemudp_log_level_debug,
"Local transmit multicast loop socket send buffer size (%d bytes).", sendbuf_size);
}
/*
* Join group membership on socket
*/
totemip_totemip_to_sockaddr_convert(mcast_address, instance->totem_interface->ip_port, &mcast_ss, &addrlen);
totemip_totemip_to_sockaddr_convert(bound_to, instance->totem_interface->ip_port, &boundto_ss, &addrlen);
if (instance->totem_config->broadcast_use == 1) {
unsigned int broadcast = 1;
if ((setsockopt(sockets->mcast_recv, SOL_SOCKET,
SO_BROADCAST, &broadcast, sizeof (broadcast))) == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"setting broadcast option failed");
return (-1);
}
if ((setsockopt(sockets->mcast_send, SOL_SOCKET,
SO_BROADCAST, &broadcast, sizeof (broadcast))) == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"setting broadcast option failed");
return (-1);
}
} else {
switch (bindnet_address->family) {
case AF_INET:
memset(&mreq, 0, sizeof(mreq));
mreq.imr_multiaddr.s_addr = mcast_sin->sin_addr.s_addr;
mreq.imr_interface.s_addr = boundto_sin->sin_addr.s_addr;
res = setsockopt (sockets->mcast_recv, IPPROTO_IP, IP_ADD_MEMBERSHIP,
&mreq, sizeof (mreq));
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"join ipv4 multicast group failed");
return (-1);
}
break;
case AF_INET6:
memset(&mreq6, 0, sizeof(mreq6));
memcpy(&mreq6.ipv6mr_multiaddr, &mcast_sin6->sin6_addr, sizeof(struct in6_addr));
mreq6.ipv6mr_interface = interface_num;
res = setsockopt (sockets->mcast_recv, IPPROTO_IPV6, IPV6_JOIN_GROUP,
&mreq6, sizeof (mreq6));
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"join ipv6 multicast group failed");
return (-1);
}
break;
}
}
/*
* Turn off multicast loopback
*/
flag = 0;
switch ( bindnet_address->family ) {
case AF_INET:
sflag = 0;
res = setsockopt (sockets->mcast_send, IPPROTO_IP, IP_MULTICAST_LOOP,
&sflag, sizeof (sflag));
break;
case AF_INET6:
res = setsockopt (sockets->mcast_send, IPPROTO_IPV6, IPV6_MULTICAST_LOOP,
&flag, sizeof (flag));
}
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"Unable to turn off multicast loopback");
return (-1);
}
/*
* Set multicast packets TTL
*/
flag = instance->totem_interface->ttl;
if (bindnet_address->family == AF_INET6) {
res = setsockopt (sockets->mcast_send, IPPROTO_IPV6, IPV6_MULTICAST_HOPS,
&flag, sizeof (flag));
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"set mcast v6 TTL failed");
return (-1);
}
} else {
sflag = flag;
res = setsockopt(sockets->mcast_send, IPPROTO_IP, IP_MULTICAST_TTL,
&sflag, sizeof(sflag));
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"set mcast v4 TTL failed");
return (-1);
}
}
/*
* Bind to a specific interface for multicast send and receive
*/
switch ( bindnet_address->family ) {
case AF_INET:
if (setsockopt (sockets->mcast_send, IPPROTO_IP, IP_MULTICAST_IF,
&boundto_sin->sin_addr, sizeof (boundto_sin->sin_addr)) < 0) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"cannot select interface for multicast packets (send)");
return (-1);
}
if (setsockopt (sockets->mcast_recv, IPPROTO_IP, IP_MULTICAST_IF,
&boundto_sin->sin_addr, sizeof (boundto_sin->sin_addr)) < 0) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"cannot select interface for multicast packets (recv)");
return (-1);
}
break;
case AF_INET6:
if (setsockopt (sockets->mcast_send, IPPROTO_IPV6, IPV6_MULTICAST_IF,
&interface_num, sizeof (interface_num)) < 0) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"cannot select interface for multicast packets (send v6)");
return (-1);
}
if (setsockopt (sockets->mcast_recv, IPPROTO_IPV6, IPV6_MULTICAST_IF,
&interface_num, sizeof (interface_num)) < 0) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"cannot select interface for multicast packets (recv v6)");
return (-1);
}
break;
}
/*
* Bind to multicast socket used for multicast receives
* This needs to happen after all of the multicast setsockopt() calls
* as the kernel seems to only put them into effect (for IPV6) when bind()
* is called.
*/
totemip_totemip_to_sockaddr_convert(mcast_address,
instance->totem_interface->ip_port, &sockaddr, &addrlen);
retries = 0;
while (1) {
res = bind (sockets->mcast_recv, (struct sockaddr *)&sockaddr, addrlen);
if (res == 0) {
break;
}
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"Unable to bind the socket to receive multicast packets");
if (++retries > BIND_MAX_RETRIES) {
break;
}
/*
* Wait for a while
*/
(void)poll(NULL, 0, BIND_RETRIES_INTERVAL * retries);
}
if (res == -1) {
return (-1);
}
return 0;
}
static int totemudp_build_sockets (
struct totemudp_instance *instance,
struct totem_ip_address *mcast_address,
struct totem_ip_address *bindnet_address,
struct totemudp_socket *sockets,
struct totem_ip_address *bound_to)
{
int interface_num;
int interface_up;
int res;
/*
* Determine the ip address bound to and the interface name
*/
res = netif_determine (instance,
bindnet_address,
bound_to,
&interface_up,
&interface_num);
if (res == -1) {
return (-1);
}
totemip_copy(&instance->my_id, bound_to);
res = totemudp_build_sockets_ip (instance, mcast_address,
bindnet_address, sockets, bound_to, interface_num);
if (res == -1) {
/* if we get here, corosync won't work anyway, so better leaving than faking to work */
LOGSYS_PERROR (errno, instance->totemudp_log_level_error,
"Unable to create sockets, exiting");
exit(EXIT_FAILURE);
}
/* We only send out of the token socket */
totemudp_traffic_control_set(instance, sockets->token);
return res;
}
/*
* Totem Network interface
* depends on poll abstraction, POSIX, IPV4
*/
/*
* Create an instance
*/
int totemudp_initialize (
qb_loop_t *poll_handle,
void **udp_context,
struct totem_config *totem_config,
totemsrp_stats_t *stats,
void *context,
void (*deliver_fn) (
void *context,
const void *msg,
unsigned int msg_len,
const struct sockaddr_storage *system_from),
void (*iface_change_fn) (
void *context,
const struct totem_ip_address *iface_address,
unsigned int ring_no),
void (*mtu_changed) (
void *context,
int net_mtu),
void (*target_set_completed) (
void *context))
{
struct totemudp_instance *instance;
instance = malloc (sizeof (struct totemudp_instance));
if (instance == NULL) {
return (-1);
}
totemudp_instance_initialize (instance);
instance->totem_config = totem_config;
instance->stats = stats;
/*
* Configure logging
*/
instance->totemudp_log_level_security = 1; //totem_config->totem_logging_configuration.log_level_security;
instance->totemudp_log_level_error = totem_config->totem_logging_configuration.log_level_error;
instance->totemudp_log_level_warning = totem_config->totem_logging_configuration.log_level_warning;
instance->totemudp_log_level_notice = totem_config->totem_logging_configuration.log_level_notice;
instance->totemudp_log_level_debug = totem_config->totem_logging_configuration.log_level_debug;
instance->totemudp_subsys_id = totem_config->totem_logging_configuration.log_subsys_id;
instance->totemudp_log_printf = totem_config->totem_logging_configuration.log_printf;
/*
* Initialize local variables for totemudp
*/
instance->totem_interface = &totem_config->interfaces[0];
totemip_copy (&instance->mcast_address, &instance->totem_interface->mcast_addr);
memset (instance->iov_buffer, 0, UDP_RECEIVE_FRAME_SIZE_MAX);
instance->totemudp_poll_handle = poll_handle;
instance->totem_interface->bindnet.nodeid = instance->totem_config->node_id;
instance->context = context;
instance->totemudp_deliver_fn = deliver_fn;
instance->totemudp_iface_change_fn = iface_change_fn;
instance->totemudp_target_set_completed = target_set_completed;
totemip_localhost (instance->mcast_address.family, &localhost);
localhost.nodeid = instance->totem_config->node_id;
/*
* RRP layer isn't ready to receive message because it hasn't
* initialized yet. Add short timer to check the interfaces.
*/
qb_loop_timer_add (instance->totemudp_poll_handle,
QB_LOOP_MED,
100*QB_TIME_NS_IN_MSEC,
(void *)instance,
timer_function_netif_check_timeout,
&instance->timer_netif_check_timeout);
*udp_context = instance;
return (0);
}
void *totemudp_buffer_alloc (void)
{
return malloc (FRAME_SIZE_MAX);
}
void totemudp_buffer_release (void *ptr)
{
return free (ptr);
}
int totemudp_processor_count_set (
void *udp_context,
int processor_count)
{
struct totemudp_instance *instance = (struct totemudp_instance *)udp_context;
int res = 0;
instance->my_memb_entries = processor_count;
qb_loop_timer_del (instance->totemudp_poll_handle,
instance->timer_netif_check_timeout);
if (processor_count == 1) {
qb_loop_timer_add (instance->totemudp_poll_handle,
QB_LOOP_MED,
instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC,
(void *)instance,
timer_function_netif_check_timeout,
&instance->timer_netif_check_timeout);
}
return (res);
}
int totemudp_recv_flush (void *udp_context)
{
struct totemudp_instance *instance = (struct totemudp_instance *)udp_context;
struct pollfd ufd;
int nfds;
int res = 0;
int i;
int sock;
instance->flushing = 1;
for (i = 0; i < 2; i++) {
sock = -1;
if (i == 0) {
sock = instance->totemudp_sockets.mcast_recv;
}
if (i == 1) {
sock = instance->totemudp_sockets.local_mcast_loop[0];
}
assert(sock != -1);
do {
ufd.fd = sock;
ufd.events = POLLIN;
nfds = poll (&ufd, 1, 0);
if (nfds == 1 && ufd.revents & POLLIN) {
net_deliver_fn (sock, ufd.revents, instance);
}
} while (nfds == 1);
}
instance->flushing = 0;
return (res);
}
int totemudp_send_flush (void *udp_context)
{
return 0;
}
int totemudp_token_send (
void *udp_context,
const void *msg,
unsigned int msg_len)
{
struct totemudp_instance *instance = (struct totemudp_instance *)udp_context;
int res = 0;
ucast_sendmsg (instance, &instance->token_target, msg, msg_len);
return (res);
}
int totemudp_mcast_flush_send (
void *udp_context,
const void *msg,
unsigned int msg_len)
{
struct totemudp_instance *instance = (struct totemudp_instance *)udp_context;
int res = 0;
mcast_sendmsg (instance, msg, msg_len);
return (res);
}
int totemudp_mcast_noflush_send (
void *udp_context,
const void *msg,
unsigned int msg_len)
{
struct totemudp_instance *instance = (struct totemudp_instance *)udp_context;
int res = 0;
mcast_sendmsg (instance, msg, msg_len);
return (res);
}
extern int totemudp_iface_check (void *udp_context)
{
struct totemudp_instance *instance = (struct totemudp_instance *)udp_context;
int res = 0;
timer_function_netif_check_timeout (instance);
return (res);
}
+int totemudp_nodestatus_get (void *udp_context, unsigned int nodeid,
+ struct totem_node_status *node_status)
+{
+ struct totemudp_instance *instance = (struct totemudp_instance *)udp_context;
+ struct qb_list_head *list;
+ struct totemudp_member *member;
+
+ qb_list_for_each(list, &(instance->member_list)) {
+ member = qb_list_entry (list,
+ struct totemudp_member,
+ list);
+
+ if (member->member.nodeid == nodeid) {
+ node_status->nodeid = nodeid;
+ /* reachable is filled in by totemsrp */
+ node_status->link_status[0].enabled = 1;
+ if (instance->netif_bind_state == BIND_STATE_REGULAR) {
+ node_status->link_status[0].enabled = 1;
+ } else {
+ node_status->link_status[0].enabled = 0;
+ }
+ node_status->link_status[0].connected = node_status->reachable;
+ node_status->link_status[0].mtu = instance->totem_config->net_mtu;
+ strncpy(node_status->link_status[0].src_ipaddr, totemip_print(&member->member), KNET_MAX_HOST_LEN-1);
+ }
+ }
+ return (0);
+}
+
int totemudp_ifaces_get (
void *net_context,
char ***status,
unsigned int *iface_count)
{
static char *statuses[INTERFACE_MAX] = {(char*)"OK"};
if (status) {
*status = statuses;
}
*iface_count = 1;
return (0);
}
extern void totemudp_net_mtu_adjust (void *udp_context, struct totem_config *totem_config)
{
totem_config->net_mtu -= totemip_udpip_header_size(totem_config->interfaces[0].bindnet.family);
}
int totemudp_token_target_set (
void *udp_context,
unsigned int nodeid)
{
struct totemudp_instance *instance = (struct totemudp_instance *)udp_context;
struct qb_list_head *list;
struct totemudp_member *member;
int res = 0;
qb_list_for_each(list, &(instance->member_list)) {
member = qb_list_entry (list,
struct totemudp_member,
list);
if (member->member.nodeid == nodeid) {
memcpy (&instance->token_target, &member->member,
sizeof (struct totem_ip_address));
instance->totemudp_target_set_completed (instance->context);
break;
}
}
return (res);
}
extern int totemudp_recv_mcast_empty (
void *udp_context)
{
struct totemudp_instance *instance = (struct totemudp_instance *)udp_context;
unsigned int res;
struct sockaddr_storage system_from;
struct msghdr msg_recv;
struct pollfd ufd;
int nfds;
int msg_processed = 0;
int i;
int sock;
/*
* Receive datagram
*/
msg_recv.msg_name = &system_from;
msg_recv.msg_namelen = sizeof (struct sockaddr_storage);
msg_recv.msg_iov = &instance->totemudp_iov_recv_flush;
msg_recv.msg_iovlen = 1;
#ifdef HAVE_MSGHDR_CONTROL
msg_recv.msg_control = 0;
#endif
#ifdef HAVE_MSGHDR_CONTROLLEN
msg_recv.msg_controllen = 0;
#endif
#ifdef HAVE_MSGHDR_FLAGS
msg_recv.msg_flags = 0;
#endif
#ifdef HAVE_MSGHDR_ACCRIGHTS
msg_recv.msg_accrights = NULL;
#endif
#ifdef HAVE_MSGHDR_ACCRIGHTSLEN
msg_recv.msg_accrightslen = 0;
#endif
for (i = 0; i < 2; i++) {
sock = -1;
if (i == 0) {
sock = instance->totemudp_sockets.mcast_recv;
}
if (i == 1) {
sock = instance->totemudp_sockets.local_mcast_loop[0];
}
assert(sock != -1);
do {
ufd.fd = sock;
ufd.events = POLLIN;
nfds = poll (&ufd, 1, 0);
if (nfds == 1 && ufd.revents & POLLIN) {
res = recvmsg (sock, &msg_recv, MSG_NOSIGNAL | MSG_DONTWAIT);
if (res != -1) {
msg_processed = 1;
} else {
msg_processed = -1;
}
}
} while (nfds == 1);
}
return (msg_processed);
}
int totemudp_member_add (
void *udp_context,
const struct totem_ip_address *local,
const struct totem_ip_address *member,
int ring_no)
{
struct totemudp_instance *instance = (struct totemudp_instance *)udp_context;
struct totemudp_member *new_member;
new_member = malloc (sizeof (struct totemudp_member));
if (new_member == NULL) {
return (-1);
}
memset(new_member, 0, sizeof(*new_member));
qb_list_init (&new_member->list);
qb_list_add_tail (&new_member->list, &instance->member_list);
memcpy (&new_member->member, member, sizeof (struct totem_ip_address));
return (0);
}
int totemudp_member_remove (
void *udp_context,
const struct totem_ip_address *token_target,
int ring_no)
{
int found = 0;
struct qb_list_head *list;
struct totemudp_member *member;
struct totemudp_instance *instance = (struct totemudp_instance *)udp_context;
/*
* Find the member to remove and close its socket
*/
qb_list_for_each(list, &(instance->member_list)) {
member = qb_list_entry (list,
struct totemudp_member,
list);
if (totemip_compare (token_target, &member->member)==0) {
found = 1;
break;
}
}
/*
* Delete the member from the list
*/
if (found) {
qb_list_del (list);
}
return (0);
}
int totemudp_iface_set (void *net_context,
const struct totem_ip_address *local_addr,
unsigned short ip_port,
unsigned int iface_no)
{
/* Not supported */
return (-1);
}
int totemudp_reconfigure (
void *udp_context,
struct totem_config *totem_config)
{
/* Not supported */
return (-1);
}
diff --git a/exec/totemudp.h b/exec/totemudp.h
index d4a01f64..7d2abcd9 100644
--- a/exec/totemudp.h
+++ b/exec/totemudp.h
@@ -1,141 +1,144 @@
/*
* Copyright (c) 2005 MontaVista Software, Inc.
* Copyright (c) 2006-2011 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Steven Dake (sdake@redhat.com)
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef TOTEMUDP_H_DEFINED
#define TOTEMUDP_H_DEFINED
#include <sys/types.h>
#include <sys/socket.h>
#include <qb/qbloop.h>
#include <corosync/totem/totem.h>
/**
* Create an instance
*/
extern int totemudp_initialize (
qb_loop_t* poll_handle,
void **udp_context,
struct totem_config *totem_config,
totemsrp_stats_t *stats,
void *context,
void (*deliver_fn) (
void *context,
const void *msg,
unsigned int msg_len,
const struct sockaddr_storage *system_from),
void (*iface_change_fn) (
void *context,
const struct totem_ip_address *iface_address,
unsigned int ring_no),
void (*mtu_changed) (
void *context,
int net_mtu),
void (*target_set_completed) (
void *context));
extern void *totemudp_buffer_alloc (void);
extern void totemudp_buffer_release (void *ptr);
extern int totemudp_processor_count_set (
void *udp_context,
int processor_count);
extern int totemudp_token_send (
void *udp_context,
const void *msg,
unsigned int msg_len);
extern int totemudp_mcast_flush_send (
void *udp_context,
const void *msg,
unsigned int msg_len);
extern int totemudp_mcast_noflush_send (
void *udp_context,
const void *msg,
unsigned int msg_len);
+extern int totemudp_nodestatus_get (void *net_context, unsigned int nodeid,
+ struct totem_node_status *node_status);
+
extern int totemudp_ifaces_get (void *net_context,
char ***status,
unsigned int *iface_count);
extern int totemudp_recv_flush (void *udp_context);
extern int totemudp_send_flush (void *udp_context);
extern int totemudp_iface_set (void *net_context,
const struct totem_ip_address *local_addr,
unsigned short ip_port,
unsigned int iface_no);
extern int totemudp_iface_check (void *udp_context);
extern int totemudp_finalize (void *udp_context);
extern void totemudp_net_mtu_adjust (void *udp_context, struct totem_config *totem_config);
extern int totemudp_token_target_set (
void *udp_context,
unsigned int nodeid);
extern int totemudp_crypto_set (
void *udp_context,
const char *cipher_type,
const char *hash_type);
extern int totemudp_recv_mcast_empty (
void *udp_context);
extern int totemudp_member_add (
void *udpu_context,
const struct totem_ip_address *local,
const struct totem_ip_address *member,
int ring_no);
extern int totemudp_member_remove (
void *udpu_context,
const struct totem_ip_address *member,
int ring_no);
extern int totemudp_reconfigure (
void *udp_context,
struct totem_config *totem_config);
#endif /* TOTEMUDP_H_DEFINED */
diff --git a/exec/totemudpu.c b/exec/totemudpu.c
index 914a3285..d095d46d 100644
--- a/exec/totemudpu.c
+++ b/exec/totemudpu.c
@@ -1,1424 +1,1452 @@
/*
* Copyright (c) 2005 MontaVista Software, Inc.
* Copyright (c) 2006-2018 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Steven Dake (sdake@redhat.com)
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <config.h>
#include <assert.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/socket.h>
#include <netdb.h>
#include <sys/un.h>
#include <sys/ioctl.h>
#include <sys/param.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <unistd.h>
#include <fcntl.h>
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
#include <sched.h>
#include <time.h>
#include <sys/time.h>
#include <sys/poll.h>
#include <sys/uio.h>
#include <limits.h>
#include <qb/qblist.h>
#include <qb/qbdefs.h>
#include <qb/qbloop.h>
#include <corosync/sq.h>
#include <corosync/swab.h>
#define LOGSYS_UTILS_ONLY 1
#include <corosync/logsys.h>
#include "totemudpu.h"
#include "util.h"
#ifndef MSG_NOSIGNAL
#define MSG_NOSIGNAL 0
#endif
#define MCAST_SOCKET_BUFFER_SIZE (TRANSMITS_ALLOWED * UDP_RECEIVE_FRAME_SIZE_MAX)
#define NETIF_STATE_REPORT_UP 1
#define NETIF_STATE_REPORT_DOWN 2
#define BIND_STATE_UNBOUND 0
#define BIND_STATE_REGULAR 1
#define BIND_STATE_LOOPBACK 2
struct totemudpu_member {
struct qb_list_head list;
struct totem_ip_address member;
int fd;
int active;
};
struct totemudpu_instance {
qb_loop_t *totemudpu_poll_handle;
struct totem_interface *totem_interface;
int netif_state_report;
int netif_bind_state;
void *context;
void (*totemudpu_deliver_fn) (
void *context,
const void *msg,
unsigned int msg_len,
const struct sockaddr_storage *system_from);
void (*totemudpu_iface_change_fn) (
void *context,
const struct totem_ip_address *iface_address,
unsigned int ring_no);
void (*totemudpu_target_set_completed) (void *context);
/*
* Function and data used to log messages
*/
int totemudpu_log_level_security;
int totemudpu_log_level_error;
int totemudpu_log_level_warning;
int totemudpu_log_level_notice;
int totemudpu_log_level_debug;
int totemudpu_subsys_id;
void (*totemudpu_log_printf) (
int level,
int subsys,
const char *function,
const char *file,
int line,
const char *format,
...)__attribute__((format(printf, 6, 7)));
void *udpu_context;
char iov_buffer[UDP_RECEIVE_FRAME_SIZE_MAX];
struct iovec totemudpu_iov_recv;
struct qb_list_head member_list;
int stats_sent;
int stats_recv;
int stats_delv;
int stats_remcasts;
int stats_orf_token;
struct timeval stats_tv_start;
struct totem_ip_address my_id;
int firstrun;
qb_loop_timer_handle timer_netif_check_timeout;
unsigned int my_memb_entries;
struct totem_config *totem_config;
totemsrp_stats_t *stats;
struct totem_ip_address token_target;
int token_socket;
int local_loop_sock[2];
qb_loop_timer_handle timer_merge_detect_timeout;
int send_merge_detect_message;
unsigned int merge_detect_messages_sent_before_timeout;
};
struct work_item {
const void *msg;
unsigned int msg_len;
struct totemudpu_instance *instance;
};
static int totemudpu_build_sockets (
struct totemudpu_instance *instance,
struct totem_ip_address *bindnet_address,
struct totem_ip_address *bound_to);
static int totemudpu_create_sending_socket(
void *udpu_context,
const struct totem_ip_address *member);
int totemudpu_member_list_rebind_ip (
void *udpu_context);
static void totemudpu_start_merge_detect_timeout(
void *udpu_context);
static void totemudpu_stop_merge_detect_timeout(
void *udpu_context);
static void totemudpu_instance_initialize (struct totemudpu_instance *instance)
{
memset (instance, 0, sizeof (struct totemudpu_instance));
instance->netif_state_report = NETIF_STATE_REPORT_UP | NETIF_STATE_REPORT_DOWN;
instance->totemudpu_iov_recv.iov_base = instance->iov_buffer;
instance->totemudpu_iov_recv.iov_len = UDP_RECEIVE_FRAME_SIZE_MAX; //sizeof (instance->iov_buffer);
/*
* There is always atleast 1 processor
*/
instance->my_memb_entries = 1;
qb_list_init (&instance->member_list);
}
#define log_printf(level, format, args...) \
do { \
instance->totemudpu_log_printf ( \
level, instance->totemudpu_subsys_id, \
__FUNCTION__, __FILE__, __LINE__, \
(const char *)format, ##args); \
} while (0);
#define LOGSYS_PERROR(err_num, level, fmt, args...) \
do { \
char _error_str[LOGSYS_MAX_PERROR_MSG_LEN]; \
const char *_error_ptr = qb_strerror_r(err_num, _error_str, sizeof(_error_str)); \
instance->totemudpu_log_printf ( \
level, instance->totemudpu_subsys_id, \
__FUNCTION__, __FILE__, __LINE__, \
fmt ": %s (%d)", ##args, _error_ptr, err_num); \
} while(0)
int totemudpu_crypto_set (
void *udpu_context,
const char *cipher_type,
const char *hash_type)
{
return (0);
}
static inline void ucast_sendmsg (
struct totemudpu_instance *instance,
struct totem_ip_address *system_to,
const void *msg,
unsigned int msg_len)
{
struct msghdr msg_ucast;
int res = 0;
struct sockaddr_storage sockaddr;
struct iovec iovec;
int addrlen;
int send_sock;
iovec.iov_base = (void *)msg;
iovec.iov_len = msg_len;
/*
* Build unicast message
*/
totemip_totemip_to_sockaddr_convert(system_to,
instance->totem_interface->ip_port, &sockaddr, &addrlen);
memset(&msg_ucast, 0, sizeof(msg_ucast));
msg_ucast.msg_name = &sockaddr;
msg_ucast.msg_namelen = addrlen;
msg_ucast.msg_iov = (void *)&iovec;
msg_ucast.msg_iovlen = 1;
#ifdef HAVE_MSGHDR_CONTROL
msg_ucast.msg_control = 0;
#endif
#ifdef HAVE_MSGHDR_CONTROLLEN
msg_ucast.msg_controllen = 0;
#endif
#ifdef HAVE_MSGHDR_FLAGS
msg_ucast.msg_flags = 0;
#endif
#ifdef HAVE_MSGHDR_ACCRIGHTS
msg_ucast.msg_accrights = NULL;
#endif
#ifdef HAVE_MSGHDR_ACCRIGHTSLEN
msg_ucast.msg_accrightslen = 0;
#endif
if (instance->netif_bind_state == BIND_STATE_REGULAR) {
send_sock = instance->token_socket;
} else {
send_sock = instance->local_loop_sock[1];
msg_ucast.msg_name = NULL;
msg_ucast.msg_namelen = 0;
}
/*
* Transmit unicast message
* An error here is recovered by totemsrp
*/
res = sendmsg (send_sock, &msg_ucast, MSG_NOSIGNAL);
if (res < 0) {
LOGSYS_PERROR (errno, instance->totemudpu_log_level_debug,
"sendmsg(ucast) failed (non-critical)");
}
}
static inline void mcast_sendmsg (
struct totemudpu_instance *instance,
const void *msg,
unsigned int msg_len,
int only_active)
{
struct msghdr msg_mcast;
int res = 0;
struct iovec iovec;
struct sockaddr_storage sockaddr;
int addrlen;
struct qb_list_head *list;
struct totemudpu_member *member;
iovec.iov_base = (void *)msg;
iovec.iov_len = msg_len;
memset(&msg_mcast, 0, sizeof(msg_mcast));
/*
* Build multicast message
*/
if (instance->netif_bind_state == BIND_STATE_REGULAR) {
qb_list_for_each(list, &(instance->member_list)) {
member = qb_list_entry (list,
struct totemudpu_member,
list);
/*
* Do not send multicast message if message is not "flush", member
* is inactive and timeout for sending merge message didn't expired.
*/
if (only_active && !member->active && !instance->send_merge_detect_message)
continue ;
totemip_totemip_to_sockaddr_convert(&member->member,
instance->totem_interface->ip_port, &sockaddr, &addrlen);
msg_mcast.msg_name = &sockaddr;
msg_mcast.msg_namelen = addrlen;
msg_mcast.msg_iov = (void *)&iovec;
msg_mcast.msg_iovlen = 1;
#ifdef HAVE_MSGHDR_CONTROL
msg_mcast.msg_control = 0;
#endif
#ifdef HAVE_MSGHDR_CONTROLLEN
msg_mcast.msg_controllen = 0;
#endif
#ifdef HAVE_MSGHDR_FLAGS
msg_mcast.msg_flags = 0;
#endif
#ifdef HAVE_MSGHDR_ACCRIGHTS
msg_mcast.msg_accrights = NULL;
#endif
#ifdef HAVE_MSGHDR_ACCRIGHTSLEN
msg_mcast.msg_accrightslen = 0;
#endif
/*
* Transmit multicast message
* An error here is recovered by totemsrp
*/
res = sendmsg (member->fd, &msg_mcast, MSG_NOSIGNAL);
if (res < 0) {
LOGSYS_PERROR (errno, instance->totemudpu_log_level_debug,
"sendmsg(mcast) failed (non-critical)");
}
}
if (!only_active || instance->send_merge_detect_message) {
/*
* Current message was sent to all nodes
*/
instance->merge_detect_messages_sent_before_timeout++;
instance->send_merge_detect_message = 0;
}
} else {
/*
* Transmit multicast message to local unix mcast loop
* An error here is recovered by totemsrp
*/
msg_mcast.msg_name = NULL;
msg_mcast.msg_namelen = 0;
msg_mcast.msg_iov = (void *)&iovec;
msg_mcast.msg_iovlen = 1;
#ifdef HAVE_MSGHDR_CONTROL
msg_mcast.msg_control = 0;
#endif
#ifdef HAVE_MSGHDR_CONTROLLEN
msg_mcast.msg_controllen = 0;
#endif
#ifdef HAVE_MSGHDR_FLAGS
msg_mcast.msg_flags = 0;
#endif
#ifdef HAVE_MSGHDR_ACCRIGHTS
msg_mcast.msg_accrights = NULL;
#endif
#ifdef HAVE_MSGHDR_ACCRIGHTSLEN
msg_mcast.msg_accrightslen = 0;
#endif
res = sendmsg (instance->local_loop_sock[1], &msg_mcast,
MSG_NOSIGNAL);
if (res < 0) {
LOGSYS_PERROR (errno, instance->totemudpu_log_level_debug,
"sendmsg(local mcast loop) failed (non-critical)");
}
}
}
int totemudpu_finalize (
void *udpu_context)
{
struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context;
int res = 0;
if (instance->token_socket > 0) {
qb_loop_poll_del (instance->totemudpu_poll_handle,
instance->token_socket);
close (instance->token_socket);
}
if (instance->local_loop_sock[0] > 0) {
qb_loop_poll_del (instance->totemudpu_poll_handle,
instance->local_loop_sock[0]);
close (instance->local_loop_sock[0]);
close (instance->local_loop_sock[1]);
}
totemudpu_stop_merge_detect_timeout(instance);
return (res);
}
static struct totemudpu_member *find_member_by_sockaddr(
const void *udpu_context,
const struct sockaddr *sa)
{
struct qb_list_head *list;
struct totemudpu_member *member;
struct totemudpu_member *res_member;
const struct totemudpu_instance *instance = (const struct totemudpu_instance *)udpu_context;
res_member = NULL;
qb_list_for_each(list, &(instance->member_list)) {
member = qb_list_entry (list,
struct totemudpu_member,
list);
if (totemip_sa_equal(&member->member, sa)) {
res_member = member;
break ;
}
}
return (res_member);
}
static int net_deliver_fn (
int fd,
int revents,
void *data)
{
struct totemudpu_instance *instance = (struct totemudpu_instance *)data;
struct msghdr msg_recv;
struct iovec *iovec;
struct sockaddr_storage system_from;
int bytes_received;
int truncated_packet;
iovec = &instance->totemudpu_iov_recv;
/*
* Receive datagram
*/
msg_recv.msg_name = &system_from;
msg_recv.msg_namelen = sizeof (struct sockaddr_storage);
msg_recv.msg_iov = iovec;
msg_recv.msg_iovlen = 1;
#ifdef HAVE_MSGHDR_CONTROL
msg_recv.msg_control = 0;
#endif
#ifdef HAVE_MSGHDR_CONTROLLEN
msg_recv.msg_controllen = 0;
#endif
#ifdef HAVE_MSGHDR_FLAGS
msg_recv.msg_flags = 0;
#endif
#ifdef HAVE_MSGHDR_ACCRIGHTS
msg_recv.msg_accrights = NULL;
#endif
#ifdef HAVE_MSGHDR_ACCRIGHTSLEN
msg_recv.msg_accrightslen = 0;
#endif
bytes_received = recvmsg (fd, &msg_recv, MSG_NOSIGNAL | MSG_DONTWAIT);
if (bytes_received == -1) {
return (0);
} else {
instance->stats_recv += bytes_received;
}
truncated_packet = 0;
#ifdef HAVE_MSGHDR_FLAGS
if (msg_recv.msg_flags & MSG_TRUNC) {
truncated_packet = 1;
}
#else
/*
* We don't have MSGHDR_FLAGS, but we can (hopefully) safely make assumption that
* if bytes_received == UDP_RECEIVE_FRAME_SIZE_MAX then packet is truncated
*/
if (bytes_received == UDP_RECEIVE_FRAME_SIZE_MAX) {
truncated_packet = 1;
}
#endif
if (truncated_packet) {
log_printf (instance->totemudpu_log_level_error,
"Received too big message. This may be because something bad is happening"
"on the network (attack?), or you tried join more nodes than corosync is"
"compiled with (%u) or bug in the code (bad estimation of "
"the UDP_RECEIVE_FRAME_SIZE_MAX). Dropping packet.", PROCESSOR_COUNT_MAX);
return (0);
}
if (instance->totem_config->block_unlisted_ips &&
find_member_by_sockaddr(instance, (const struct sockaddr *)&system_from) == NULL) {
log_printf(instance->totemudpu_log_level_debug, "Packet rejected from %s",
totemip_sa_print((const struct sockaddr *)&system_from));
return (0);
}
iovec->iov_len = bytes_received;
/*
* Handle incoming message
*/
instance->totemudpu_deliver_fn (
instance->context,
iovec->iov_base,
iovec->iov_len,
&system_from);
iovec->iov_len = UDP_RECEIVE_FRAME_SIZE_MAX;
return (0);
}
static int netif_determine (
struct totemudpu_instance *instance,
struct totem_ip_address *bindnet,
struct totem_ip_address *bound_to,
int *interface_up,
int *interface_num)
{
int res;
res = totemip_iface_check (bindnet, bound_to,
interface_up, interface_num,
instance->totem_config->clear_node_high_bit);
return (res);
}
/*
* If the interface is up, the sockets for totem are built. If the interface is down
* this function is requeued in the timer list to retry building the sockets later.
*/
static void timer_function_netif_check_timeout (
void *data)
{
struct totemudpu_instance *instance = (struct totemudpu_instance *)data;
int interface_up;
int interface_num;
/*
* Build sockets for every interface
*/
netif_determine (instance,
&instance->totem_interface->bindnet,
&instance->totem_interface->boundto,
&interface_up, &interface_num);
/*
* If the network interface isn't back up and we are already
* in loopback mode, add timer to check again and return
*/
if ((instance->netif_bind_state == BIND_STATE_LOOPBACK &&
interface_up == 0) ||
(instance->my_memb_entries == 1 &&
instance->netif_bind_state == BIND_STATE_REGULAR &&
interface_up == 1)) {
qb_loop_timer_add (instance->totemudpu_poll_handle,
QB_LOOP_MED,
instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC,
(void *)instance,
timer_function_netif_check_timeout,
&instance->timer_netif_check_timeout);
/*
* Add a timer to check for a downed regular interface
*/
return;
}
if (instance->token_socket > 0) {
qb_loop_poll_del (instance->totemudpu_poll_handle,
instance->token_socket);
close (instance->token_socket);
instance->token_socket = -1;
}
if (interface_up == 0) {
if (instance->netif_bind_state == BIND_STATE_UNBOUND) {
log_printf (instance->totemudpu_log_level_error,
"One of your ip addresses are now bound to localhost. "
"Corosync would not work correctly.");
exit(COROSYNC_DONE_FATAL_ERR);
}
/*
* Interface is not up
*/
instance->netif_bind_state = BIND_STATE_LOOPBACK;
/*
* Add a timer to retry building interfaces and request memb_gather_enter
*/
qb_loop_timer_add (instance->totemudpu_poll_handle,
QB_LOOP_MED,
instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC,
(void *)instance,
timer_function_netif_check_timeout,
&instance->timer_netif_check_timeout);
} else {
/*
* Interface is up
*/
instance->netif_bind_state = BIND_STATE_REGULAR;
}
/*
* Create and bind the multicast and unicast sockets
*/
totemudpu_build_sockets (instance,
&instance->totem_interface->bindnet,
&instance->totem_interface->boundto);
if (instance->netif_bind_state == BIND_STATE_REGULAR) {
qb_loop_poll_add (instance->totemudpu_poll_handle,
QB_LOOP_MED,
instance->token_socket,
POLLIN, instance, net_deliver_fn);
}
totemip_copy (&instance->my_id, &instance->totem_interface->boundto);
/*
* This reports changes in the interface to the user and totemsrp
*/
if (instance->netif_bind_state == BIND_STATE_REGULAR) {
if (instance->netif_state_report & NETIF_STATE_REPORT_UP) {
log_printf (instance->totemudpu_log_level_notice,
"The network interface [%s] is now up.",
totemip_print (&instance->totem_interface->boundto));
instance->netif_state_report = NETIF_STATE_REPORT_DOWN;
instance->totemudpu_iface_change_fn (instance->context, &instance->my_id, 0);
}
/*
* Add a timer to check for interface going down in single membership
*/
if (instance->my_memb_entries == 1) {
qb_loop_timer_add (instance->totemudpu_poll_handle,
QB_LOOP_MED,
instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC,
(void *)instance,
timer_function_netif_check_timeout,
&instance->timer_netif_check_timeout);
}
} else {
if (instance->netif_state_report & NETIF_STATE_REPORT_DOWN) {
log_printf (instance->totemudpu_log_level_notice,
"The network interface is down.");
instance->totemudpu_iface_change_fn (instance->context, &instance->my_id, 0);
}
instance->netif_state_report = NETIF_STATE_REPORT_UP;
}
}
/* Set the socket priority to INTERACTIVE to ensure
that our messages don't get queued behind anything else */
static void totemudpu_traffic_control_set(struct totemudpu_instance *instance, int sock)
{
#ifdef SO_PRIORITY
int prio = 6; /* TC_PRIO_INTERACTIVE */
if (setsockopt(sock, SOL_SOCKET, SO_PRIORITY, &prio, sizeof(int))) {
LOGSYS_PERROR (errno, instance->totemudpu_log_level_warning,
"Could not set traffic priority");
}
#endif
}
static int totemudpu_build_sockets_ip (
struct totemudpu_instance *instance,
struct totem_ip_address *bindnet_address,
struct totem_ip_address *bound_to,
int interface_num)
{
struct sockaddr_storage sockaddr;
int addrlen;
int res;
unsigned int recvbuf_size;
unsigned int optlen = sizeof (recvbuf_size);
unsigned int retries = 0;
/*
* Setup unicast socket
*/
instance->token_socket = socket (bindnet_address->family, SOCK_DGRAM, 0);
if (instance->token_socket == -1) {
LOGSYS_PERROR (errno, instance->totemudpu_log_level_warning,
"socket() failed");
return (-1);
}
totemip_nosigpipe (instance->token_socket);
res = fcntl (instance->token_socket, F_SETFL, O_NONBLOCK);
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudpu_log_level_warning,
"Could not set non-blocking operation on token socket");
return (-1);
}
/*
* Bind to unicast socket used for token send/receives
* This has the side effect of binding to the correct interface
*/
totemip_totemip_to_sockaddr_convert(bound_to, instance->totem_interface->ip_port, &sockaddr, &addrlen);
while (1) {
res = bind (instance->token_socket, (struct sockaddr *)&sockaddr, addrlen);
if (res == 0) {
break;
}
LOGSYS_PERROR (errno, instance->totemudpu_log_level_warning,
"bind token socket failed");
if (++retries > BIND_MAX_RETRIES) {
break;
}
/*
* Wait for a while
*/
(void)poll(NULL, 0, BIND_RETRIES_INTERVAL * retries);
}
if (res == -1) {
return (-1);
}
/*
* the token_socket can receive many messages. Allow a large number
* of receive messages on this socket
*/
recvbuf_size = MCAST_SOCKET_BUFFER_SIZE;
res = setsockopt (instance->token_socket, SOL_SOCKET, SO_RCVBUF,
&recvbuf_size, optlen);
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudpu_log_level_notice,
"Could not set recvbuf size");
}
return 0;
}
+int totemudpu_nodestatus_get (void *udpu_context, unsigned int nodeid,
+ struct totem_node_status *node_status)
+{
+ struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context;
+ struct qb_list_head *list;
+ struct totemudpu_member *member;
+
+ qb_list_for_each(list, &(instance->member_list)) {
+ member = qb_list_entry (list,
+ struct totemudpu_member,
+ list);
+
+ if (member->member.nodeid == nodeid) {
+ node_status->nodeid = nodeid;
+ /* reachable is filled in by totemsrp */
+ if (instance->netif_bind_state == BIND_STATE_REGULAR) {
+ node_status->link_status[0].enabled = 1;
+ } else {
+ node_status->link_status[0].enabled = 0;
+ }
+ node_status->link_status[0].connected = node_status->reachable;
+ node_status->link_status[0].mtu = instance->totem_config->net_mtu;
+ strncpy(node_status->link_status[0].src_ipaddr, totemip_print(&member->member), KNET_MAX_HOST_LEN-1);
+ }
+ }
+ return (0);
+}
+
int totemudpu_ifaces_get (
void *net_context,
char ***status,
unsigned int *iface_count)
{
static char *statuses[INTERFACE_MAX] = {(char*)"OK"};
if (status) {
*status = statuses;
}
*iface_count = 1;
return (0);
}
static int totemudpu_build_local_sockets(
struct totemudpu_instance *instance)
{
int i;
unsigned int sendbuf_size;
unsigned int recvbuf_size;
unsigned int optlen = sizeof (sendbuf_size);
int res;
/*
* Create local multicast loop socket
*/
if (socketpair(AF_UNIX, SOCK_DGRAM, 0, instance->local_loop_sock) == -1) {
LOGSYS_PERROR (errno, instance->totemudpu_log_level_warning,
"socket() failed");
return (-1);
}
for (i = 0; i < 2; i++) {
totemip_nosigpipe (instance->local_loop_sock[i]);
res = fcntl (instance->local_loop_sock[i], F_SETFL, O_NONBLOCK);
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudpu_log_level_warning,
"Could not set non-blocking operation on multicast socket");
return (-1);
}
}
recvbuf_size = MCAST_SOCKET_BUFFER_SIZE;
sendbuf_size = MCAST_SOCKET_BUFFER_SIZE;
res = setsockopt (instance->local_loop_sock[0], SOL_SOCKET, SO_RCVBUF, &recvbuf_size, optlen);
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudpu_log_level_debug,
"Unable to set SO_RCVBUF size on UDP local mcast loop socket");
return (-1);
}
res = setsockopt (instance->local_loop_sock[1], SOL_SOCKET, SO_SNDBUF, &sendbuf_size, optlen);
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudpu_log_level_debug,
"Unable to set SO_SNDBUF size on UDP local mcast loop socket");
return (-1);
}
res = getsockopt (instance->local_loop_sock[0], SOL_SOCKET, SO_RCVBUF, &recvbuf_size, &optlen);
if (res == 0) {
log_printf (instance->totemudpu_log_level_debug,
"Local receive multicast loop socket recv buffer size (%d bytes).", recvbuf_size);
}
res = getsockopt (instance->local_loop_sock[1], SOL_SOCKET, SO_SNDBUF, &sendbuf_size, &optlen);
if (res == 0) {
log_printf (instance->totemudpu_log_level_debug,
"Local transmit multicast loop socket send buffer size (%d bytes).", sendbuf_size);
}
return (0);
}
static int totemudpu_build_sockets (
struct totemudpu_instance *instance,
struct totem_ip_address *bindnet_address,
struct totem_ip_address *bound_to)
{
int interface_num;
int interface_up;
int res;
/*
* Determine the ip address bound to and the interface name
*/
res = netif_determine (instance,
bindnet_address,
bound_to,
&interface_up,
&interface_num);
if (res == -1) {
return (-1);
}
totemip_copy(&instance->my_id, bound_to);
res = totemudpu_build_sockets_ip (instance,
bindnet_address, bound_to, interface_num);
if (res == -1) {
/* if we get here, corosync won't work anyway, so better leaving than faking to work */
LOGSYS_PERROR (errno, instance->totemudpu_log_level_error,
"Unable to create sockets, exiting");
exit(EXIT_FAILURE);
}
/* We only send out of the token socket */
totemudpu_traffic_control_set(instance, instance->token_socket);
/*
* Rebind all members to new ips
*/
totemudpu_member_list_rebind_ip(instance);
return res;
}
/*
* Totem Network interface
* depends on poll abstraction, POSIX, IPV4
*/
/*
* Create an instance
*/
int totemudpu_initialize (
qb_loop_t *poll_handle,
void **udpu_context,
struct totem_config *totem_config,
totemsrp_stats_t *stats,
void *context,
void (*deliver_fn) (
void *context,
const void *msg,
unsigned int msg_len,
const struct sockaddr_storage *system_from),
void (*iface_change_fn) (
void *context,
const struct totem_ip_address *iface_address,
unsigned int ring_no),
void (*mtu_changed) (
void *context,
int net_mtu),
void (*target_set_completed) (
void *context))
{
struct totemudpu_instance *instance;
instance = malloc (sizeof (struct totemudpu_instance));
if (instance == NULL) {
return (-1);
}
totemudpu_instance_initialize (instance);
instance->totem_config = totem_config;
instance->stats = stats;
/*
* Configure logging
*/
instance->totemudpu_log_level_security = 1; //totem_config->totem_logging_configuration.log_level_security;
instance->totemudpu_log_level_error = totem_config->totem_logging_configuration.log_level_error;
instance->totemudpu_log_level_warning = totem_config->totem_logging_configuration.log_level_warning;
instance->totemudpu_log_level_notice = totem_config->totem_logging_configuration.log_level_notice;
instance->totemudpu_log_level_debug = totem_config->totem_logging_configuration.log_level_debug;
instance->totemudpu_subsys_id = totem_config->totem_logging_configuration.log_subsys_id;
instance->totemudpu_log_printf = totem_config->totem_logging_configuration.log_printf;
/*
* Initialize local variables for totemudpu
*/
instance->totem_interface = &totem_config->interfaces[0];
memset (instance->iov_buffer, 0, UDP_RECEIVE_FRAME_SIZE_MAX);
instance->totemudpu_poll_handle = poll_handle;
instance->totem_interface->bindnet.nodeid = instance->totem_config->node_id;
instance->context = context;
instance->totemudpu_deliver_fn = deliver_fn;
instance->totemudpu_iface_change_fn = iface_change_fn;
instance->totemudpu_target_set_completed = target_set_completed;
/*
* Create static local mcast sockets
*/
if (totemudpu_build_local_sockets(instance) == -1) {
free(instance);
return (-1);
}
qb_loop_poll_add (
instance->totemudpu_poll_handle,
QB_LOOP_MED,
instance->local_loop_sock[0],
POLLIN, instance, net_deliver_fn);
/*
* RRP layer isn't ready to receive message because it hasn't
* initialized yet. Add short timer to check the interfaces.
*/
qb_loop_timer_add (instance->totemudpu_poll_handle,
QB_LOOP_MED,
100*QB_TIME_NS_IN_MSEC,
(void *)instance,
timer_function_netif_check_timeout,
&instance->timer_netif_check_timeout);
totemudpu_start_merge_detect_timeout((void*)instance);
*udpu_context = instance;
return (0);
}
void *totemudpu_buffer_alloc (void)
{
return malloc (FRAME_SIZE_MAX);
}
void totemudpu_buffer_release (void *ptr)
{
return free (ptr);
}
int totemudpu_processor_count_set (
void *udpu_context,
int processor_count)
{
struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context;
int res = 0;
instance->my_memb_entries = processor_count;
qb_loop_timer_del (instance->totemudpu_poll_handle,
instance->timer_netif_check_timeout);
if (processor_count == 1) {
qb_loop_timer_add (instance->totemudpu_poll_handle,
QB_LOOP_MED,
instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC,
(void *)instance,
timer_function_netif_check_timeout,
&instance->timer_netif_check_timeout);
}
return (res);
}
int totemudpu_recv_flush (void *udpu_context)
{
int res = 0;
return (res);
}
int totemudpu_send_flush (void *udpu_context)
{
int res = 0;
return (res);
}
int totemudpu_token_send (
void *udpu_context,
const void *msg,
unsigned int msg_len)
{
struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context;
int res = 0;
ucast_sendmsg (instance, &instance->token_target, msg, msg_len);
return (res);
}
int totemudpu_mcast_flush_send (
void *udpu_context,
const void *msg,
unsigned int msg_len)
{
struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context;
int res = 0;
mcast_sendmsg (instance, msg, msg_len, 0);
return (res);
}
int totemudpu_mcast_noflush_send (
void *udpu_context,
const void *msg,
unsigned int msg_len)
{
struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context;
int res = 0;
mcast_sendmsg (instance, msg, msg_len, 1);
return (res);
}
extern int totemudpu_iface_check (void *udpu_context)
{
struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context;
int res = 0;
timer_function_netif_check_timeout (instance);
return (res);
}
extern void totemudpu_net_mtu_adjust (void *udpu_context, struct totem_config *totem_config)
{
totem_config->net_mtu -= totemip_udpip_header_size(totem_config->interfaces[0].bindnet.family);
}
int totemudpu_token_target_set (
void *udpu_context,
unsigned int nodeid)
{
struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context;
struct qb_list_head *list;
struct totemudpu_member *member;
int res = 0;
qb_list_for_each(list, &(instance->member_list)) {
member = qb_list_entry (list,
struct totemudpu_member,
list);
if (member->member.nodeid == nodeid) {
memcpy (&instance->token_target, &member->member,
sizeof (struct totem_ip_address));
instance->totemudpu_target_set_completed (instance->context);
break;
}
}
return (res);
}
extern int totemudpu_recv_mcast_empty (
void *udpu_context)
{
struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context;
unsigned int res;
struct sockaddr_storage system_from;
struct msghdr msg_recv;
struct pollfd ufd;
int nfds, i;
int msg_processed = 0;
int sock;
/*
* Receive datagram
*/
msg_recv.msg_name = &system_from;
msg_recv.msg_namelen = sizeof (struct sockaddr_storage);
msg_recv.msg_iov = &instance->totemudpu_iov_recv;
msg_recv.msg_iovlen = 1;
#ifdef HAVE_MSGHDR_CONTROL
msg_recv.msg_control = 0;
#endif
#ifdef HAVE_MSGHDR_CONTROLLEN
msg_recv.msg_controllen = 0;
#endif
#ifdef HAVE_MSGHDR_FLAGS
msg_recv.msg_flags = 0;
#endif
#ifdef HAVE_MSGHDR_ACCRIGHTS
msg_recv.msg_accrights = NULL;
#endif
#ifdef HAVE_MSGHDR_ACCRIGHTSLEN
msg_recv.msg_accrightslen = 0;
#endif
for (i = 0; i < 2; i++) {
sock = -1;
if (i == 0) {
if (instance->netif_bind_state == BIND_STATE_REGULAR) {
sock = instance->token_socket;
} else {
continue;
}
}
if (i == 1) {
sock = instance->local_loop_sock[0];
}
assert(sock != -1);
do {
ufd.fd = sock;
ufd.events = POLLIN;
nfds = poll (&ufd, 1, 0);
if (nfds == 1 && ufd.revents & POLLIN) {
res = recvmsg (sock, &msg_recv, MSG_NOSIGNAL | MSG_DONTWAIT);
if (res != -1) {
msg_processed = 1;
} else {
msg_processed = -1;
}
}
} while (nfds == 1);
}
return (msg_processed);
}
static int totemudpu_create_sending_socket(
void *udpu_context,
const struct totem_ip_address *member)
{
struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context;
int fd;
int res;
unsigned int sendbuf_size;
unsigned int optlen = sizeof (sendbuf_size);
struct sockaddr_storage sockaddr;
int addrlen;
fd = socket (member->family, SOCK_DGRAM, 0);
if (fd == -1) {
LOGSYS_PERROR (errno, instance->totemudpu_log_level_warning,
"Could not create socket for new member");
return (-1);
}
totemip_nosigpipe (fd);
res = fcntl (fd, F_SETFL, O_NONBLOCK);
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudpu_log_level_warning,
"Could not set non-blocking operation on token socket");
goto error_close_fd;
}
/*
* These sockets are used to send multicast messages, so their buffers
* should be large
*/
sendbuf_size = MCAST_SOCKET_BUFFER_SIZE;
res = setsockopt (fd, SOL_SOCKET, SO_SNDBUF,
&sendbuf_size, optlen);
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudpu_log_level_notice,
"Could not set sendbuf size");
/*
* Fail in setting sendbuf size is not fatal -> don't exit
*/
}
/*
* Bind to sending interface
*/
totemip_totemip_to_sockaddr_convert(&instance->my_id, 0, &sockaddr, &addrlen);
res = bind (fd, (struct sockaddr *)&sockaddr, addrlen);
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudpu_log_level_warning,
"bind token socket failed");
goto error_close_fd;
}
return (fd);
error_close_fd:
close(fd);
return (-1);
}
int totemudpu_iface_set (void *net_context,
const struct totem_ip_address *local_addr,
unsigned short ip_port,
unsigned int iface_no)
{
/* Not supported */
return (-1);
}
int totemudpu_member_add (
void *udpu_context,
const struct totem_ip_address *local,
const struct totem_ip_address *member,
int ring_no)
{
struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context;
struct totemudpu_member *new_member;
new_member = malloc (sizeof (struct totemudpu_member));
if (new_member == NULL) {
return (-1);
}
memset(new_member, 0, sizeof(*new_member));
log_printf (LOGSYS_LEVEL_NOTICE, "adding new UDPU member {%s}",
totemip_print(member));
qb_list_init (&new_member->list);
qb_list_add_tail (&new_member->list, &instance->member_list);
memcpy (&new_member->member, member, sizeof (struct totem_ip_address));
new_member->fd = totemudpu_create_sending_socket(udpu_context, member);
new_member->active = 1;
return (0);
}
int totemudpu_member_remove (
void *udpu_context,
const struct totem_ip_address *token_target,
int ring_no)
{
int found = 0;
struct qb_list_head *list;
struct totemudpu_member *member;
struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context;
/*
* Find the member to remove and close its socket
*/
qb_list_for_each(list, &(instance->member_list)) {
member = qb_list_entry (list,
struct totemudpu_member,
list);
if (totemip_compare (token_target, &member->member)==0) {
log_printf(LOGSYS_LEVEL_NOTICE,
"removing UDPU member {%s}",
totemip_print(&member->member));
if (member->fd > 0) {
log_printf(LOGSYS_LEVEL_DEBUG,
"Closing socket to: {%s}",
totemip_print(&member->member));
qb_loop_poll_del (instance->totemudpu_poll_handle,
member->fd);
close (member->fd);
}
found = 1;
break;
}
}
/*
* Delete the member from the list
*/
if (found) {
qb_list_del (list);
}
instance = NULL;
return (0);
}
int totemudpu_member_list_rebind_ip (
void *udpu_context)
{
struct qb_list_head *list;
struct totemudpu_member *member;
struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context;
qb_list_for_each(list, &(instance->member_list)) {
member = qb_list_entry (list,
struct totemudpu_member,
list);
if (member->fd > 0) {
close (member->fd);
}
member->fd = totemudpu_create_sending_socket(udpu_context, &member->member);
}
return (0);
}
static void timer_function_merge_detect_timeout (
void *data)
{
struct totemudpu_instance *instance = (struct totemudpu_instance *)data;
if (instance->merge_detect_messages_sent_before_timeout == 0) {
instance->send_merge_detect_message = 1;
}
instance->merge_detect_messages_sent_before_timeout = 0;
totemudpu_start_merge_detect_timeout(instance);
}
static void totemudpu_start_merge_detect_timeout(
void *udpu_context)
{
struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context;
qb_loop_timer_add(instance->totemudpu_poll_handle,
QB_LOOP_MED,
instance->totem_config->merge_timeout * 2 * QB_TIME_NS_IN_MSEC,
(void *)instance,
timer_function_merge_detect_timeout,
&instance->timer_merge_detect_timeout);
}
static void totemudpu_stop_merge_detect_timeout(
void *udpu_context)
{
struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context;
qb_loop_timer_del(instance->totemudpu_poll_handle,
instance->timer_merge_detect_timeout);
}
int totemudpu_reconfigure (
void *udpu_context,
struct totem_config *totem_config)
{
/* Not supported */
return (-1);
}
diff --git a/exec/totemudpu.h b/exec/totemudpu.h
index 47ee4772..07e63459 100644
--- a/exec/totemudpu.h
+++ b/exec/totemudpu.h
@@ -1,141 +1,144 @@
/*
* Copyright (c) 2005 MontaVista Software, Inc.
* Copyright (c) 2006-2011 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Steven Dake (sdake@redhat.com)
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef TOTEMUDPU_H_DEFINED
#define TOTEMUDPU_H_DEFINED
#include <sys/types.h>
#include <sys/socket.h>
#include <qb/qbloop.h>
#include <corosync/totem/totem.h>
/**
* Create an instance
*/
extern int totemudpu_initialize (
qb_loop_t *poll_handle,
void **udpu_context,
struct totem_config *totem_config,
totemsrp_stats_t *stats,
void *context,
void (*deliver_fn) (
void *context,
const void *msg,
unsigned int msg_len,
const struct sockaddr_storage *system_from),
void (*iface_change_fn) (
void *context,
const struct totem_ip_address *iface_address,
unsigned int ring_no),
void (*mtu_changed) (
void *context,
int net_mtu),
void (*target_set_completed) (
void *context));
extern void *totemudpu_buffer_alloc (void);
extern void totemudpu_buffer_release (void *ptr);
extern int totemudpu_processor_count_set (
void *udpu_context,
int processor_count);
extern int totemudpu_token_send (
void *udpu_context,
const void *msg,
unsigned int msg_len);
extern int totemudpu_mcast_flush_send (
void *udpu_context,
const void *msg,
unsigned int msg_len);
extern int totemudpu_mcast_noflush_send (
void *udpu_context,
const void *msg,
unsigned int msg_len);
+extern int totemudpu_nodestatus_get (void *net_context, unsigned int nodeid,
+ struct totem_node_status *node_status);
+
extern int totemudpu_ifaces_get (void *net_context,
char ***status,
unsigned int *iface_count);
extern int totemudpu_recv_flush (void *udpu_context);
extern int totemudpu_send_flush (void *udpu_context);
extern int totemudpu_iface_set (void *net_context,
const struct totem_ip_address *local_addr,
unsigned short ip_port,
unsigned int iface_no);
extern int totemudpu_iface_check (void *udpu_context);
extern int totemudpu_finalize (void *udpu_context);
extern void totemudpu_net_mtu_adjust (void *udpu_context, struct totem_config *totem_config);
extern int totemudpu_token_target_set (
void *udpu_context,
unsigned int nodeid);
extern int totemudpu_crypto_set (
void *udpu_context,
const char *cipher_type,
const char *hash_type);
extern int totemudpu_recv_mcast_empty (
void *udpu_context);
extern int totemudpu_member_add (
void *udpu_context,
const struct totem_ip_address *local,
const struct totem_ip_address *member,
int ring_no);
extern int totemudpu_member_remove (
void *udpu_context,
const struct totem_ip_address *member,
int ring_no);
extern int totemudpu_reconfigure (
void *udpu_context,
struct totem_config *totem_config);
#endif /* TOTEMUDPU_H_DEFINED */
diff --git a/include/corosync/cfg.h b/include/corosync/cfg.h
index fa967b0a..c9cd06d0 100644
--- a/include/corosync/cfg.h
+++ b/include/corosync/cfg.h
@@ -1,249 +1,286 @@
/*
* Copyright (c) 2005 MontaVista Software, Inc.
* Copyright (c) 2006-2013 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Steven Dake (sdake@redhat.com)
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef COROSYNC_CFG_H_DEFINED
#define COROSYNC_CFG_H_DEFINED
#include <netinet/in.h>
#include <corosync/corotypes.h>
typedef uint64_t corosync_cfg_handle_t;
/**
* Shutdown types.
*/
typedef enum {
/**
* REQUEST is the normal shutdown.
* Other daemons will be consulted.
*/
COROSYNC_CFG_SHUTDOWN_FLAG_REQUEST = 0,
/**
* REGARDLESS will tell other daemons but ignore their opinions.
*/
COROSYNC_CFG_SHUTDOWN_FLAG_REGARDLESS = 1,
/**
* IMMEDIATE will shut down straight away
* (but still tell other nodes).
*/
COROSYNC_CFG_SHUTDOWN_FLAG_IMMEDIATE = 2,
} corosync_cfg_shutdown_flags_t;
/**
* @brief enum corosync_cfg_shutdown_reply_flags_t
*/
typedef enum {
COROSYNC_CFG_SHUTDOWN_FLAG_NO = 0,
COROSYNC_CFG_SHUTDOWN_FLAG_YES = 1,
} corosync_cfg_shutdown_reply_flags_t;
/**
* @brief corosync_cfg_shutdown_callback_t callback
*/
typedef void (*corosync_cfg_shutdown_callback_t) (
corosync_cfg_handle_t cfg_handle,
corosync_cfg_shutdown_flags_t flags);
/**
* @brief struct corosync_cfg_shutdown_callback_t
*/
typedef struct {
corosync_cfg_shutdown_callback_t corosync_cfg_shutdown_callback;
} corosync_cfg_callbacks_t;
/**
* A node address. This is a complete sockaddr_in[6]
*
* To explain:
* If you cast cna_address to a 'struct sockaddr', the sa_family field
* will be AF_INET or AF_INET6. Armed with that knowledge you can then
* cast it to a sockaddr_in or sockaddr_in6 and pull out the address.
* No other sockaddr fields are valid.
* Also, you must ignore any part of the sockaddr beyond the length supplied
*/
typedef struct
{
int address_length; /**< @todo FIXME: set but never used */
char address[sizeof(struct sockaddr_in6)];
} corosync_cfg_node_address_t;
/*
* Interfaces
*/
#ifdef __cplusplus
extern "C" {
#endif
/**
* @brief corosync_cfg_initialize
* @param cfg_handle
* @param cfg_callbacks
* @return
*/
cs_error_t
corosync_cfg_initialize (
corosync_cfg_handle_t *cfg_handle,
const corosync_cfg_callbacks_t *cfg_callbacks);
/**
* @brief corosync_cfg_fd_get
* @param cfg_handle
* @param selection_fd
* @return
*/
cs_error_t
corosync_cfg_fd_get (
corosync_cfg_handle_t cfg_handle,
int32_t *selection_fd);
/**
* @brief corosync_cfg_dispatch
* @param cfg_handle
* @param dispatch_flags
* @return
*/
cs_error_t
corosync_cfg_dispatch (
corosync_cfg_handle_t cfg_handle,
cs_dispatch_flags_t dispatch_flags);
/**
* @brief corosync_cfg_finalize
* @param cfg_handle
* @return
*/
cs_error_t
corosync_cfg_finalize (
corosync_cfg_handle_t cfg_handle);
/**
* @brief corosync_cfg_ring_status_get
* @param cfg_handle
* @param interface_names
* @param status
* @param interface_count
* @return
*/
cs_error_t
corosync_cfg_ring_status_get (
corosync_cfg_handle_t cfg_handle,
char ***interface_names,
char ***status,
unsigned int *interface_count);
+#define CFG_NODE_STATUS_STRUCT_VERSION 1
+#define CFG_MAX_HOST_LEN 256
+#define CFG_MAX_LINKS 8
+struct corosync_knet_link_status {
+ uint8_t enabled; /* link is configured and admin enabled for traffic */
+ uint8_t connected; /* link is connected for data (local view) */
+ uint8_t dynconnected; /* link has been activated by remote dynip */
+ unsigned int mtu; /* current detected MTU on this link */
+ char src_ipaddr[CFG_MAX_HOST_LEN];
+ char dst_ipaddr[CFG_MAX_HOST_LEN];
+};
+
+struct corosync_knet_node_status {
+ uint32_t version;
+ unsigned int nodeid;
+ uint8_t reachable;
+ uint8_t remote;
+ uint8_t external;
+ uint8_t onwire_min;
+ uint8_t onwire_max;
+ uint8_t onwire_ver;
+ struct corosync_knet_link_status link_status[CFG_MAX_LINKS];
+};
+
+/**
+ * @brief corosync_cfg_node_status_get
+ * @param cfg_handle
+ * @param nodeid
+ * @param node_status
+ * @return
+ */
+cs_error_t
+corosync_cfg_node_status_get (
+ corosync_cfg_handle_t cfg_handle,
+ unsigned int nodeid,
+ struct corosync_knet_node_status *node_status);
+
/**
* @brief corosync_cfg_kill_node
* @param cfg_handle
* @param nodeid
* @param reason
* @return
*/
cs_error_t
corosync_cfg_kill_node (
corosync_cfg_handle_t cfg_handle,
unsigned int nodeid,
const char *reason);
/**
* @brief corosync_cfg_try_shutdown
* @param cfg_handle
* @param flags
* @return
*/
cs_error_t
corosync_cfg_try_shutdown (
corosync_cfg_handle_t cfg_handle,
corosync_cfg_shutdown_flags_t flags);
/**
* @brief corosync_cfg_replyto_shutdown
* @param cfg_handle
* @param flags
* @return
*/
cs_error_t
corosync_cfg_replyto_shutdown (
corosync_cfg_handle_t cfg_handle,
corosync_cfg_shutdown_reply_flags_t flags);
/**
* @brief corosync_cfg_get_node_addrs
* @param cfg_handle
* @param nodeid
* @param max_addrs
* @param num_addrs
* @param addrs
* @return
*/
cs_error_t
corosync_cfg_get_node_addrs (
corosync_cfg_handle_t cfg_handle,
unsigned int nodeid,
size_t max_addrs,
int *num_addrs,
corosync_cfg_node_address_t *addrs);
/**
* @brief corosync_cfg_local_get
* @param handle
* @param local_nodeid
* @return
*/
cs_error_t
corosync_cfg_local_get (
corosync_cfg_handle_t handle,
unsigned int *local_nodeid);
/**
* @brief corosync_cfg_reload_config
* @param handle
* @return
*/
cs_error_t corosync_cfg_reload_config (
corosync_cfg_handle_t handle);
/**
* @brief Reopen logging files
* @param handle CFG service handle
* @return CS_OK on success, CS_ERR_NOT_SUPPORTED if reopening of logging files is not available,
* otherwise one of common errors.
*/
cs_error_t corosync_cfg_reopen_log_files (
corosync_cfg_handle_t handle);
#ifdef __cplusplus
}
#endif
#endif /* COROSYNC_CFG_H_DEFINED */
diff --git a/include/corosync/ipc_cfg.h b/include/corosync/ipc_cfg.h
index 79da02e0..b4ac9fc5 100644
--- a/include/corosync/ipc_cfg.h
+++ b/include/corosync/ipc_cfg.h
@@ -1,263 +1,282 @@
/*
* Copyright (c) 2005 MontaVista Software, Inc.
* Copyright (c) 2009-2013 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Steven Dake (sdake@redhat.com)
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef IPC_CFG_H_DEFINED
#define IPC_CFG_H_DEFINED
#include <netinet/in.h>
#include <corosync/corotypes.h>
#include <corosync/mar_gen.h>
#define CFG_INTERFACE_NAME_MAX_LEN 128
#define CFG_INTERFACE_STATUS_MAX_LEN 512
/*
* Too keep future ABI compatibility, this value
* is intentionaly bigger then INTERFACE_MAX
*/
#define CFG_MAX_INTERFACES 16
/**
* @brief The req_lib_cfg_types enum
*/
enum req_lib_cfg_types {
MESSAGE_REQ_CFG_RINGSTATUSGET = 0,
MESSAGE_REQ_CFG_RINGREENABLE = 1,
MESSAGE_REQ_CFG_KILLNODE = 2,
MESSAGE_REQ_CFG_TRYSHUTDOWN = 3,
MESSAGE_REQ_CFG_REPLYTOSHUTDOWN = 4,
MESSAGE_REQ_CFG_GET_NODE_ADDRS = 5,
MESSAGE_REQ_CFG_LOCAL_GET = 6,
MESSAGE_REQ_CFG_RELOAD_CONFIG = 7,
- MESSAGE_REQ_CFG_REOPEN_LOG_FILES = 8
+ MESSAGE_REQ_CFG_REOPEN_LOG_FILES = 8,
+ MESSAGE_REQ_CFG_NODESTATUSGET = 9
};
/**
* @brief The res_lib_cfg_types enum
*/
enum res_lib_cfg_types {
MESSAGE_RES_CFG_RINGSTATUSGET = 0,
MESSAGE_RES_CFG_RINGREENABLE = 1,
MESSAGE_RES_CFG_STATETRACKSTART = 2,
MESSAGE_RES_CFG_STATETRACKSTOP = 3,
MESSAGE_RES_CFG_ADMINISTRATIVESTATESET = 4,
MESSAGE_RES_CFG_ADMINISTRATIVESTATEGET = 5,
MESSAGE_RES_CFG_SERVICELOAD = 6,
MESSAGE_RES_CFG_SERVICEUNLOAD = 7,
MESSAGE_RES_CFG_KILLNODE = 8,
MESSAGE_RES_CFG_TRYSHUTDOWN = 9,
MESSAGE_RES_CFG_TESTSHUTDOWN = 10,
MESSAGE_RES_CFG_GET_NODE_ADDRS = 11,
MESSAGE_RES_CFG_LOCAL_GET = 12,
MESSAGE_RES_CFG_REPLYTOSHUTDOWN = 13,
MESSAGE_RES_CFG_RELOAD_CONFIG = 14,
- MESSAGE_RES_CFG_REOPEN_LOG_FILES = 15
+ MESSAGE_RES_CFG_REOPEN_LOG_FILES = 15,
+ MESSAGE_RES_CFG_NODESTATUSGET = 16
};
/**
* @brief The req_lib_cfg_ringstatusget struct
*/
struct req_lib_cfg_ringstatusget {
struct qb_ipc_request_header header __attribute__((aligned(8)));
};
/**
* @brief The res_lib_cfg_ringstatusget struct
*/
struct res_lib_cfg_ringstatusget {
struct qb_ipc_response_header header __attribute__((aligned(8)));
mar_uint32_t interface_count __attribute__((aligned(8)));
char interface_name[CFG_MAX_INTERFACES][CFG_INTERFACE_NAME_MAX_LEN] __attribute__((aligned(8)));
char interface_status[CFG_MAX_INTERFACES][CFG_INTERFACE_STATUS_MAX_LEN] __attribute__((aligned(8)));
};
+/**
+ * @brief The req_lib_cfg_nodestatusget struct
+ */
+struct req_lib_cfg_nodestatusget {
+ struct qb_ipc_request_header header __attribute__((aligned(8)));
+ unsigned int nodeid __attribute__((aligned(8)));
+ mar_uint32_t version __attribute__((aligned(8)));
+};
+
+/**
+ * @brief The res_lib_cfg_nodestatusget struct
+ */
+struct res_lib_cfg_nodestatusget {
+ struct qb_ipc_response_header header __attribute__((aligned(8)));
+ struct corosync_knet_node_status node_status __attribute__((aligned(8)));
+};
+
/**
* @brief The req_lib_cfg_ringreenable struct
*/
struct req_lib_cfg_ringreenable {
struct qb_ipc_request_header header __attribute__((aligned(8)));
};
/**
* @brief The res_lib_cfg_ringreenable struct
*/
struct res_lib_cfg_ringreenable {
struct qb_ipc_response_header header __attribute__((aligned(8)));
};
/**
* @brief The req_lib_cfg_killnode struct
*/
struct req_lib_cfg_killnode {
struct qb_ipc_request_header header __attribute__((aligned(8)));
unsigned int nodeid __attribute__((aligned(8)));
cs_name_t reason __attribute__((aligned(8)));
};
/**
* @brief The res_lib_cfg_killnode struct
*/
struct res_lib_cfg_killnode {
struct qb_ipc_response_header header __attribute__((aligned(8)));
};
/**
* @brief The req_lib_cfg_tryshutdown struct
*/
struct req_lib_cfg_tryshutdown {
struct qb_ipc_request_header header __attribute__((aligned(8)));
unsigned int flags;
};
/**
* @brief The res_lib_cfg_tryshutdown struct
*/
struct res_lib_cfg_tryshutdown {
struct qb_ipc_response_header header __attribute__((aligned(8)));
};
/**
* @brief The req_lib_cfg_replytoshutdown struct
*/
struct req_lib_cfg_replytoshutdown {
struct qb_ipc_request_header header __attribute__((aligned(8)));
unsigned int response;
};
/**
* @brief The res_lib_cfg_replytoshutdown struct
*/
struct res_lib_cfg_replytoshutdown {
struct qb_ipc_response_header header __attribute__((aligned(8)));
};
/**
* @brief The res_lib_cfg_testshutdown struct
*/
struct res_lib_cfg_testshutdown {
struct qb_ipc_response_header header __attribute__((aligned(8)));
unsigned int flags;
};
/**
* @brief The req_lib_cfg_get_node_addrs struct
*/
struct req_lib_cfg_get_node_addrs {
struct qb_ipc_request_header header __attribute__((aligned(8)));
unsigned int nodeid;
};
/**
* @brief The res_lib_cfg_get_node_addrs struct
*/
struct res_lib_cfg_get_node_addrs {
struct qb_ipc_response_header header __attribute__((aligned(8)));
unsigned int family;
unsigned int num_addrs;
/* array of TOTEMIP_ADDRLEN items */
char addrs[];
};
/**
* @brief The req_lib_cfg_local_get struct
*/
struct req_lib_cfg_local_get {
struct qb_ipc_request_header header __attribute__((aligned(8)));
};
/**
* @brief The res_lib_cfg_local_get struct
*/
struct res_lib_cfg_local_get {
struct qb_ipc_response_header header __attribute__((aligned(8)));
mar_uint32_t local_nodeid __attribute__((aligned(8)));
};
/**
* @brief The req_lib_cfg_reload_config struct
*/
struct req_lib_cfg_reload_config {
struct qb_ipc_request_header header __attribute__((aligned(8)));
};
/**
* @brief The res_lib_cfg_reload_config struct
*/
struct res_lib_cfg_reload_config {
struct qb_ipc_response_header header __attribute__((aligned(8)));
};
/**
* @brief The req_lib_cfg_reopen_log_files struct
*/
struct req_lib_cfg_reopen_log_files {
struct qb_ipc_request_header header __attribute__((aligned(8)));
};
/**
* @brief The res_lib_cfg_reopen_log_files struct
*/
struct res_lib_cfg_reopen_log_files {
struct qb_ipc_response_header header __attribute__((aligned(8)));
};
/**
* @brief corosync_administrative_target_t enum
*/
typedef enum {
AIS_AMF_ADMINISTRATIVETARGET_SERVICEUNIT = 0,
AIS_AMF_ADMINISTRATIVETARGET_SERVICEGROUP = 1,
AIS_AMF_ADMINISTRATIVETARGET_COMPONENTSERVICEINSTANCE = 2,
AIS_AMF_ADMINISTRATIVETARGET_NODE = 3
} corosync_administrative_target_t;
/**
* @brief corosync_administrative_state_t enum
*/
typedef enum {
AIS_AMF_ADMINISTRATIVESTATE_UNLOCKED = 0,
AIS_AMF_ADMINISTRATIVESTATE_LOCKED = 1,
AIS_AMF_ADMINISTRATIVESTATE_STOPPING = 2
} corosync_administrative_state_t;
/**
* @brief corosync_shutdown_flags_t enum
*/
typedef enum {
CFG_SHUTDOWN_FLAG_REQUEST = 0,
CFG_SHUTDOWN_FLAG_REGARDLESS = 1,
CFG_SHUTDOWN_FLAG_IMMEDIATE = 2,
} corosync_shutdown_flags_t;
#endif /* IPC_CFG_H_DEFINED */
diff --git a/include/corosync/totem/totem.h b/include/corosync/totem/totem.h
index 6f43527a..8b166566 100644
--- a/include/corosync/totem/totem.h
+++ b/include/corosync/totem/totem.h
@@ -1,273 +1,292 @@
/*
* Copyright (c) 2005 MontaVista Software, Inc.
* Copyright (c) 2006-2012 Red Hat, Inc.
*
* Author: Steven Dake (sdake@redhat.com)
*
* All rights reserved.
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef TOTEM_H_DEFINED
#define TOTEM_H_DEFINED
#include "totemip.h"
#include <libknet.h>
#include <corosync/hdb.h>
#include <corosync/totem/totemstats.h>
#ifdef HAVE_SMALL_MEMORY_FOOTPRINT
#define PROCESSOR_COUNT_MAX 16
#define MESSAGE_SIZE_MAX 1024*64
#define MESSAGE_QUEUE_MAX 512
#else
#define PROCESSOR_COUNT_MAX 384
#define MESSAGE_SIZE_MAX 1024*1024 /* (1MB) */
#define MESSAGE_QUEUE_MAX ((4 * MESSAGE_SIZE_MAX) / totem_config->net_mtu)
#endif /* HAVE_SMALL_MEMORY_FOOTPRINT */
#define FRAME_SIZE_MAX KNET_MAX_PACKET_SIZE
#define CONFIG_STRING_LEN_MAX 128
/*
* Estimation of required buffer size for totemudp and totemudpu - it should be at least
* sizeof(memb_join) + PROCESSOR_MAX * 2 * sizeof(srp_addr))
* if we want to support PROCESSOR_MAX nodes, but because we don't have
* srp_addr and memb_join, we have to use estimation.
* TODO: Consider moving srp_addr/memb_join into totem headers instead of totemsrp.c
*/
#define UDP_RECEIVE_FRAME_SIZE_MAX (PROCESSOR_COUNT_MAX * (INTERFACE_MAX * 2 * sizeof(struct totem_ip_address)) + 1024)
#define TRANSMITS_ALLOWED 16
#define SEND_THREADS_MAX 16
/* This must be <= KNET_MAX_LINK */
#define INTERFACE_MAX 8
#define BIND_MAX_RETRIES 10
#define BIND_RETRIES_INTERVAL 100
/**
* Maximum number of continuous gather states
*/
#define MAX_NO_CONT_GATHER 3
/*
* Maximum number of continuous failures get from sendmsg call
*/
#define MAX_NO_CONT_SENDMSG_FAILURES 30
struct totem_interface {
struct totem_ip_address bindnet;
struct totem_ip_address boundto;
struct totem_ip_address mcast_addr;
struct totem_ip_address local_ip;
uint16_t ip_port;
uint16_t ttl;
uint8_t configured;
int member_count;
int knet_link_priority;
int knet_ping_interval;
int knet_ping_timeout;
int knet_ping_precision;
int knet_pong_count;
int knet_transport;
struct totem_ip_address member_list[PROCESSOR_COUNT_MAX];
};
struct totem_logging_configuration {
void (*log_printf) (
int level,
int subsys,
const char *function_name,
const char *file_name,
int file_line,
const char *format,
...) __attribute__((format(printf, 6, 7)));
int log_level_security;
int log_level_error;
int log_level_warning;
int log_level_notice;
int log_level_debug;
int log_level_trace;
int log_subsys_id;
};
/*
* COrosync TOtem. Also used as an endian_detector.
*/
#define TOTEM_MH_MAGIC 0xC070
#define TOTEM_MH_VERSION 0x03
struct totem_message_header {
unsigned short magic;
char version;
char type;
char encapsulated;
unsigned int nodeid;
unsigned int target_nodeid;
} __attribute__((packed));
enum {
TOTEM_PRIVATE_KEY_LEN_MIN = KNET_MIN_KEY_LEN,
TOTEM_PRIVATE_KEY_LEN_MAX = KNET_MAX_KEY_LEN
};
enum { TOTEM_LINK_MODE_BYTES = 64 };
typedef enum {
TOTEM_TRANSPORT_UDP = 0,
TOTEM_TRANSPORT_UDPU = 1,
TOTEM_TRANSPORT_KNET = 2
} totem_transport_t;
#define MEMB_RING_ID
struct memb_ring_id {
unsigned int rep;
unsigned long long seq;
} __attribute__((packed));
typedef enum {
CRYPTO_RECONFIG_PHASE_ACTIVATE = 1,
CRYPTO_RECONFIG_PHASE_CLEANUP = 2,
} cfg_message_crypto_reconfig_phase_t;
struct totem_config {
int version;
/*
* network
*/
struct totem_interface *interfaces;
struct totem_interface *orig_interfaces; /* for reload */
unsigned int node_id;
unsigned int clear_node_high_bit;
unsigned int knet_pmtud_interval;
/*
* key information
*/
unsigned char private_key[TOTEM_PRIVATE_KEY_LEN_MAX];
unsigned int private_key_len;
/*
* Totem configuration parameters
*/
unsigned int token_timeout;
unsigned int token_warning;
unsigned int token_retransmit_timeout;
unsigned int token_hold_timeout;
unsigned int token_retransmits_before_loss_const;
unsigned int join_timeout;
unsigned int send_join_timeout;
unsigned int consensus_timeout;
unsigned int merge_timeout;
unsigned int downcheck_timeout;
unsigned int fail_to_recv_const;
unsigned int seqno_unchanged_const;
char link_mode[TOTEM_LINK_MODE_BYTES];
struct totem_logging_configuration totem_logging_configuration;
unsigned int net_mtu;
unsigned int threads;
unsigned int heartbeat_failures_allowed;
unsigned int max_network_delay;
unsigned int window_size;
unsigned int max_messages;
unsigned int broadcast_use;
char crypto_model[CONFIG_STRING_LEN_MAX];
char crypto_cipher_type[CONFIG_STRING_LEN_MAX];
char crypto_hash_type[CONFIG_STRING_LEN_MAX];
int crypto_index; /* Num of crypto config currently loaded into knet ( 1 or 2 ) */
int crypto_changed; /* Has crypto changed since last time? (it's expensive to reload) */
char knet_compression_model[CONFIG_STRING_LEN_MAX];
uint32_t knet_compression_threshold;
int knet_compression_level;
totem_transport_t transport_number;
unsigned int miss_count_const;
enum totem_ip_version_enum ip_version;
unsigned int block_unlisted_ips;
void (*totem_memb_ring_id_create_or_load) (
struct memb_ring_id *memb_ring_id,
unsigned int nodeid);
void (*totem_memb_ring_id_store) (
const struct memb_ring_id *memb_ring_id,
unsigned int nodeid);
};
+/*
+ * Node status returned from the API
+ * Usually the same as the cfg version (except for
+ * link_status)
+ */
+#define TOTEM_NODE_STATUS_STRUCTURE_VERSION 1
+struct totem_node_status {
+ uint32_t version; /* Structure version */
+ unsigned int nodeid;
+ uint8_t reachable;
+ uint8_t remote;
+ uint8_t external;
+ uint8_t onwire_min;
+ uint8_t onwire_max;
+ uint8_t onwire_ver;
+ struct knet_link_status link_status[KNET_MAX_LINK];
+};
+
+
#define TOTEM_CONFIGURATION_TYPE
enum totem_configuration_type {
TOTEM_CONFIGURATION_REGULAR,
TOTEM_CONFIGURATION_TRANSITIONAL
};
#define TOTEM_CALLBACK_TOKEN_TYPE
enum totem_callback_token_type {
TOTEM_CALLBACK_TOKEN_RECEIVED = 1,
TOTEM_CALLBACK_TOKEN_SENT = 2
};
enum totem_event_type {
TOTEM_EVENT_DELIVERY_CONGESTED,
TOTEM_EVENT_NEW_MSG,
};
#endif /* TOTEM_H_DEFINED */
diff --git a/include/corosync/totem/totempg.h b/include/corosync/totem/totempg.h
index af9bf71f..d63540cf 100644
--- a/include/corosync/totem/totempg.h
+++ b/include/corosync/totem/totempg.h
@@ -1,207 +1,210 @@
/*
* Copyright (c) 2003-2005 MontaVista Software, Inc.
* Copyright (c) 2006-2011 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Steven Dake (sdake@redhat.com)
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
/**
* @file
* Totem Single Ring Protocol
*
* depends on poll abstraction, POSIX, IPV4
*/
#ifndef TOTEMPG_H_DEFINED
#define TOTEMPG_H_DEFINED
#ifdef __cplusplus
extern "C" {
#endif
#include <sys/types.h>
#include <netinet/in.h>
#include "totem.h"
#include <qb/qbloop.h>
struct totempg_group {
const void *group;
size_t group_len;
};
#define TOTEMPG_AGREED 0
#define TOTEMPG_SAFE 1
/**
* Initialize the totem process groups abstraction
*/
extern int totempg_initialize (
qb_loop_t* poll_handle,
struct totem_config *totem_config
);
extern void totempg_finalize (void);
extern int totempg_callback_token_create (void **handle_out,
enum totem_callback_token_type type,
int delete,
int (*callback_fn) (enum totem_callback_token_type type, const void *),
const void *data);
extern void totempg_callback_token_destroy (void *handle);
/**
* Initialize a groups instance
*/
extern int totempg_groups_initialize (
void **instance,
void (*deliver_fn) (
unsigned int nodeid,
const void *msg,
unsigned int msg_len,
int endian_conversion_required),
void (*confchg_fn) (
enum totem_configuration_type configuration_type,
const unsigned int *member_list, size_t member_list_entries,
const unsigned int *left_list, size_t left_list_entries,
const unsigned int *joined_list, size_t joined_list_entries,
const struct memb_ring_id *ring_id));
extern int totempg_groups_finalize (void *instance);
extern int totempg_groups_join (
void *instance,
const struct totempg_group *groups,
size_t group_cnt);
extern int totempg_groups_leave (
void *instance,
const struct totempg_group *groups,
size_t group_cnt);
extern int totempg_groups_mcast_joined (
void *instance,
const struct iovec *iovec,
unsigned int iov_len,
int guarantee);
extern int totempg_groups_joined_reserve (
void *instance,
const struct iovec *iovec,
unsigned int iov_len);
extern int totempg_groups_joined_release (
int msg_count);
extern int totempg_groups_mcast_groups (
void *instance,
int guarantee,
const struct totempg_group *groups,
size_t groups_cnt,
const struct iovec *iovec,
unsigned int iov_len);
extern int totempg_groups_send_ok_groups (
void *instance,
const struct totempg_group *groups,
size_t groups_cnt,
const struct iovec *iovec,
unsigned int iov_len);
extern int totempg_ifaces_get (
unsigned int nodeid,
unsigned int *interface_id,
struct totem_ip_address *interfaces,
unsigned int interfaces_size,
char ***status,
unsigned int *iface_count);
+extern int totempg_nodestatus_get (unsigned int nodeid,
+ struct totem_node_status *node_status);
+
extern void* totempg_get_stats (void);
void totempg_event_signal (enum totem_event_type type, int value);
extern const char *totempg_ifaces_print (unsigned int nodeid);
extern unsigned int totempg_my_nodeid_get (void);
extern int totempg_my_family_get (void);
extern int totempg_crypto_set (const char *cipher_type, const char *hash_type);
extern void totempg_service_ready_register (
void (*totem_service_ready) (void));
extern int totempg_iface_set (
struct totem_ip_address *interface_addr,
unsigned short ip_port,
unsigned int iface_no);
extern int totempg_member_add (
const struct totem_ip_address *member,
int ring_no);
extern int totempg_member_remove (
const struct totem_ip_address *member,
int ring_no);
enum totem_q_level {
TOTEM_Q_LEVEL_LOW,
TOTEM_Q_LEVEL_GOOD,
TOTEM_Q_LEVEL_HIGH,
TOTEM_Q_LEVEL_CRITICAL
};
void totempg_check_q_level(void *instance);
typedef void (*totem_queue_level_changed_fn) (enum totem_q_level level);
extern void totempg_queue_level_register_callback (totem_queue_level_changed_fn);
extern void totempg_threaded_mode_enable (void);
extern void totempg_trans_ack (void);
extern int totempg_reconfigure (void);
extern int totempg_crypto_reconfigure_phase (cfg_message_crypto_reconfig_phase_t phase);
extern void totempg_force_gather (void);
extern void totempg_get_config(struct totem_config *config);
extern void totempg_put_config(struct totem_config *config);
#ifdef __cplusplus
}
#endif
#endif /* TOTEMPG_H_DEFINED */
diff --git a/lib/cfg.c b/lib/cfg.c
index 8a01c589..16ce6be5 100644
--- a/lib/cfg.c
+++ b/lib/cfg.c
@@ -1,686 +1,736 @@
/*
* Copyright (c) 2002-2005 MontaVista Software, Inc.
- * Copyright (c) 2006-2018 Red Hat, Inc.
+ * Copyright (c) 2006-2020 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Steven Dake (sdake@redhat.com)
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <config.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <errno.h>
#include <pthread.h>
#include <limits.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/select.h>
#include <sys/un.h>
#include <sys/uio.h>
#include <qb/qbipcc.h>
#include <corosync/corotypes.h>
#include <corosync/corodefs.h>
#include <corosync/hdb.h>
#include <corosync/cfg.h>
#include <corosync/ipc_cfg.h>
#include "util.h"
/*
* Data structure for instance data
*/
struct cfg_inst {
qb_ipcc_connection_t *c;
corosync_cfg_callbacks_t callbacks;
cs_name_t comp_name;
int comp_registered;
int finalize;
};
/*
* All instances in one database
*/
static void cfg_inst_free (void *inst);
DECLARE_HDB_DATABASE (cfg_hdb, cfg_inst_free);
/*
* Implementation
*/
cs_error_t
corosync_cfg_initialize (
corosync_cfg_handle_t *cfg_handle,
const corosync_cfg_callbacks_t *cfg_callbacks)
{
struct cfg_inst *cfg_inst;
cs_error_t error = CS_OK;
error = hdb_error_to_cs (hdb_handle_create (&cfg_hdb, sizeof (struct cfg_inst), cfg_handle));
if (error != CS_OK) {
goto error_no_destroy;
}
error = hdb_error_to_cs (hdb_handle_get (&cfg_hdb, *cfg_handle, (void *)&cfg_inst));
if (error != CS_OK) {
goto error_destroy;
}
cfg_inst->finalize = 0;
cfg_inst->c = qb_ipcc_connect ("cfg", IPC_REQUEST_SIZE);
if (cfg_inst->c == NULL) {
error = qb_to_cs_error(-errno);
goto error_put_destroy;
}
if (cfg_callbacks) {
memcpy (&cfg_inst->callbacks, cfg_callbacks, sizeof (corosync_cfg_callbacks_t));
}
(void)hdb_handle_put (&cfg_hdb, *cfg_handle);
return (CS_OK);
error_put_destroy:
(void)hdb_handle_put (&cfg_hdb, *cfg_handle);
error_destroy:
(void)hdb_handle_destroy (&cfg_hdb, *cfg_handle);
error_no_destroy:
return (error);
}
cs_error_t
corosync_cfg_fd_get (
corosync_cfg_handle_t cfg_handle,
int32_t *selection_fd)
{
struct cfg_inst *cfg_inst;
cs_error_t error;
error = hdb_error_to_cs (hdb_handle_get (&cfg_hdb, cfg_handle, (void *)&cfg_inst));
if (error != CS_OK) {
return (error);
}
error = qb_to_cs_error (qb_ipcc_fd_get (cfg_inst->c, selection_fd));
(void)hdb_handle_put (&cfg_hdb, cfg_handle);
return (error);
}
cs_error_t
corosync_cfg_dispatch (
corosync_cfg_handle_t cfg_handle,
cs_dispatch_flags_t dispatch_flags)
{
int timeout = -1;
cs_error_t error;
int cont = 1; /* always continue do loop except when set to 0 */
struct cfg_inst *cfg_inst;
struct res_lib_cfg_testshutdown *res_lib_cfg_testshutdown;
corosync_cfg_callbacks_t callbacks;
struct qb_ipc_response_header *dispatch_data;
char dispatch_buf[IPC_DISPATCH_SIZE];
error = hdb_error_to_cs (hdb_handle_get (&cfg_hdb, cfg_handle,
(void *)&cfg_inst));
if (error != CS_OK) {
return (error);
}
/*
* Timeout instantly for CS_DISPATCH_ONE_NONBLOCKING or CS_DISPATCH_ALL and
* wait indefinately for CS_DISPATCH_ONE or CS_DISPATCH_BLOCKING
*/
if (dispatch_flags == CS_DISPATCH_ALL || dispatch_flags == CS_DISPATCH_ONE_NONBLOCKING) {
timeout = 0;
}
dispatch_data = (struct qb_ipc_response_header *)dispatch_buf;
do {
error = qb_to_cs_error (qb_ipcc_event_recv (
cfg_inst->c,
dispatch_buf,
IPC_DISPATCH_SIZE,
timeout));
if (error == CS_ERR_BAD_HANDLE) {
error = CS_OK;
goto error_put;
}
if (error == CS_ERR_TRY_AGAIN) {
if (dispatch_flags == CS_DISPATCH_ONE_NONBLOCKING) {
/*
* Don't mask error
*/
goto error_put;
}
error = CS_OK;
if (dispatch_flags == CS_DISPATCH_ALL) {
break; /* exit do while cont is 1 loop */
} else {
continue; /* next poll */
}
}
if (error != CS_OK) {
goto error_put;
}
/*
* Make copy of callbacks, message data, unlock instance, and call callback
* A risk of this dispatch method is that the callback routines may
* operate at the same time that cfgFinalize has been called in another thread.
*/
memcpy (&callbacks, &cfg_inst->callbacks, sizeof (corosync_cfg_callbacks_t));
/*
* Dispatch incoming response
*/
switch (dispatch_data->id) {
case MESSAGE_RES_CFG_TESTSHUTDOWN:
if (callbacks.corosync_cfg_shutdown_callback == NULL) {
break;
}
res_lib_cfg_testshutdown = (struct res_lib_cfg_testshutdown *)dispatch_data;
callbacks.corosync_cfg_shutdown_callback(cfg_handle, res_lib_cfg_testshutdown->flags);
break;
default:
error = CS_ERR_LIBRARY;
goto error_nounlock;
break;
}
if (cfg_inst->finalize) {
/*
* If the finalize has been called then get out of the dispatch.
*/
error = CS_ERR_BAD_HANDLE;
goto error_put;
}
/*
* Determine if more messages should be processed
*/
if (dispatch_flags == CS_DISPATCH_ONE || dispatch_flags == CS_DISPATCH_ONE_NONBLOCKING) {
cont = 0;
}
} while (cont);
error_put:
(void)hdb_handle_put (&cfg_hdb, cfg_handle);
error_nounlock:
return (error);
}
static void cfg_inst_free (void *inst)
{
struct cfg_inst *cfg_inst = (struct cfg_inst *)inst;
qb_ipcc_disconnect(cfg_inst->c);
}
cs_error_t
corosync_cfg_finalize (
corosync_cfg_handle_t cfg_handle)
{
struct cfg_inst *cfg_inst;
cs_error_t error;
error = hdb_error_to_cs(hdb_handle_get (&cfg_hdb, cfg_handle, (void *)&cfg_inst));
if (error != CS_OK) {
return (error);
}
/*
* Another thread has already started finalizing
*/
if (cfg_inst->finalize) {
(void)hdb_handle_put (&cfg_hdb, cfg_handle);
return (CS_ERR_BAD_HANDLE);
}
cfg_inst->finalize = 1;
(void)hdb_handle_destroy (&cfg_hdb, cfg_handle);
(void)hdb_handle_put (&cfg_hdb, cfg_handle);
return (error);
}
cs_error_t
corosync_cfg_ring_status_get (
corosync_cfg_handle_t cfg_handle,
char ***interface_names,
char ***status,
unsigned int *interface_count)
{
struct cfg_inst *cfg_inst;
struct req_lib_cfg_ringstatusget req_lib_cfg_ringstatusget;
struct res_lib_cfg_ringstatusget res_lib_cfg_ringstatusget;
unsigned int i, j;
cs_error_t error;
struct iovec iov;
error = hdb_error_to_cs(hdb_handle_get (&cfg_hdb, cfg_handle, (void *)&cfg_inst));
if (error != CS_OK) {
return (error);
}
req_lib_cfg_ringstatusget.header.size = sizeof (struct req_lib_cfg_ringstatusget);
req_lib_cfg_ringstatusget.header.id = MESSAGE_REQ_CFG_RINGSTATUSGET;
iov.iov_base = (void *)&req_lib_cfg_ringstatusget,
iov.iov_len = sizeof (struct req_lib_cfg_ringstatusget),
error = qb_to_cs_error (qb_ipcc_sendv_recv(cfg_inst->c,
&iov,
1,
&res_lib_cfg_ringstatusget,
sizeof (struct res_lib_cfg_ringstatusget), CS_IPC_TIMEOUT_MS));
if (error != CS_OK) {
goto exit_handle_put;
}
*interface_count = res_lib_cfg_ringstatusget.interface_count;
*interface_names = malloc (sizeof (char *) * *interface_count);
if (*interface_names == NULL) {
return (CS_ERR_NO_MEMORY);
}
memset (*interface_names, 0, sizeof (char *) * *interface_count);
*status = malloc (sizeof (char *) * *interface_count);
if (*status == NULL) {
error = CS_ERR_NO_MEMORY;
goto error_free_interface_names_array;
}
memset (*status, 0, sizeof (char *) * *interface_count);
for (i = 0; i < res_lib_cfg_ringstatusget.interface_count; i++) {
(*(interface_names))[i] = strdup (res_lib_cfg_ringstatusget.interface_name[i]);
if ((*(interface_names))[i] == NULL) {
error = CS_ERR_NO_MEMORY;
goto error_free_interface_names;
}
}
for (i = 0; i < res_lib_cfg_ringstatusget.interface_count; i++) {
(*(status))[i] = strdup (res_lib_cfg_ringstatusget.interface_status[i]);
if ((*(status))[i] == NULL) {
error = CS_ERR_NO_MEMORY;
goto error_free_status;
}
}
goto exit_handle_put;
error_free_status:
for (j = 0; j < i; j++) {
free ((*(status))[j]);
}
i = *interface_count;
error_free_interface_names:
for (j = 0; j < i; j++) {
free ((*(interface_names))[j]);
}
free (*status);
error_free_interface_names_array:
free (*interface_names);
exit_handle_put:
(void)hdb_handle_put (&cfg_hdb, cfg_handle);
return (error);
}
+cs_error_t
+corosync_cfg_node_status_get (
+ corosync_cfg_handle_t cfg_handle,
+ unsigned int nodeid,
+ struct corosync_knet_node_status *node_status)
+{
+ struct cfg_inst *cfg_inst;
+ struct req_lib_cfg_nodestatusget req_lib_cfg_nodestatusget;
+ struct res_lib_cfg_nodestatusget res_lib_cfg_nodestatusget;
+ cs_error_t error;
+ struct iovec iov;
+
+ if (!node_status) {
+ return (CS_ERR_INVALID_PARAM);
+ }
+
+ error = hdb_error_to_cs(hdb_handle_get (&cfg_hdb, cfg_handle, (void *)&cfg_inst));
+ if (error != CS_OK) {
+ return (error);
+ }
+
+ req_lib_cfg_nodestatusget.header.size = sizeof (struct req_lib_cfg_nodestatusget);
+ req_lib_cfg_nodestatusget.header.id = MESSAGE_REQ_CFG_NODESTATUSGET;
+ req_lib_cfg_nodestatusget.nodeid = nodeid;
+ req_lib_cfg_nodestatusget.version = CFG_NODE_STATUS_STRUCT_VERSION;
+
+ iov.iov_base = (void *)&req_lib_cfg_nodestatusget,
+ iov.iov_len = sizeof (struct req_lib_cfg_nodestatusget),
+
+ error = qb_to_cs_error (qb_ipcc_sendv_recv(cfg_inst->c,
+ &iov,
+ 1,
+ &res_lib_cfg_nodestatusget,
+ sizeof (struct res_lib_cfg_nodestatusget), CS_IPC_TIMEOUT_MS));
+
+ if (error == CS_OK) {
+ memcpy(node_status, &res_lib_cfg_nodestatusget.node_status, sizeof(struct corosync_knet_node_status));
+ }
+
+ /* corosync sent us something we don't really understand.
+ - we might need to revisit this in the case of future structure versions */
+ if (res_lib_cfg_nodestatusget.node_status.version != CFG_NODE_STATUS_STRUCT_VERSION) {
+ error = CS_ERR_NOT_SUPPORTED;
+ }
+
+ (void)hdb_handle_put (&cfg_hdb, cfg_handle);
+
+ return (error);
+}
+
cs_error_t
corosync_cfg_kill_node (
corosync_cfg_handle_t cfg_handle,
unsigned int nodeid,
const char *reason)
{
struct cfg_inst *cfg_inst;
struct req_lib_cfg_killnode req_lib_cfg_killnode;
struct res_lib_cfg_killnode res_lib_cfg_killnode;
cs_error_t error;
struct iovec iov;
if (strlen(reason) >= CS_MAX_NAME_LENGTH)
return CS_ERR_NAME_TOO_LONG;
error = hdb_error_to_cs (hdb_handle_get (&cfg_hdb, cfg_handle,
(void *)&cfg_inst));
if (error != CS_OK) {
return (error);
}
req_lib_cfg_killnode.header.id = MESSAGE_REQ_CFG_KILLNODE;
req_lib_cfg_killnode.header.size = sizeof (struct req_lib_cfg_killnode);
req_lib_cfg_killnode.nodeid = nodeid;
strcpy((char *)req_lib_cfg_killnode.reason.value, reason);
req_lib_cfg_killnode.reason.length = strlen(reason)+1;
iov.iov_base = (void *)&req_lib_cfg_killnode;
iov.iov_len = sizeof (struct req_lib_cfg_killnode);
error = qb_to_cs_error (qb_ipcc_sendv_recv (cfg_inst->c,
&iov,
1,
&res_lib_cfg_killnode,
sizeof (struct res_lib_cfg_killnode), CS_IPC_TIMEOUT_MS));
error = res_lib_cfg_killnode.header.error;
(void)hdb_handle_put (&cfg_hdb, cfg_handle);
return (error == CS_OK ? res_lib_cfg_killnode.header.error : error);
}
cs_error_t
corosync_cfg_try_shutdown (
corosync_cfg_handle_t cfg_handle,
corosync_cfg_shutdown_flags_t flags)
{
struct cfg_inst *cfg_inst;
struct req_lib_cfg_tryshutdown req_lib_cfg_tryshutdown;
struct res_lib_cfg_tryshutdown res_lib_cfg_tryshutdown;
cs_error_t error;
struct iovec iov;
error = hdb_error_to_cs(hdb_handle_get (&cfg_hdb, cfg_handle,
(void *)&cfg_inst));
if (error != CS_OK) {
return (error);
}
req_lib_cfg_tryshutdown.header.id = MESSAGE_REQ_CFG_TRYSHUTDOWN;
req_lib_cfg_tryshutdown.header.size = sizeof (struct req_lib_cfg_tryshutdown);
req_lib_cfg_tryshutdown.flags = flags;
iov.iov_base = (void *)&req_lib_cfg_tryshutdown;
iov.iov_len = sizeof (req_lib_cfg_tryshutdown);
error = qb_to_cs_error (qb_ipcc_sendv_recv (cfg_inst->c,
&iov,
1,
&res_lib_cfg_tryshutdown,
sizeof (struct res_lib_cfg_tryshutdown), CS_IPC_TIMEOUT_MS));
(void)hdb_handle_put (&cfg_hdb, cfg_handle);
return (error == CS_OK ? res_lib_cfg_tryshutdown.header.error : error);
}
cs_error_t
corosync_cfg_replyto_shutdown (
corosync_cfg_handle_t cfg_handle,
corosync_cfg_shutdown_reply_flags_t response)
{
struct cfg_inst *cfg_inst;
struct req_lib_cfg_replytoshutdown req_lib_cfg_replytoshutdown;
struct res_lib_cfg_replytoshutdown res_lib_cfg_replytoshutdown;
struct iovec iov;
cs_error_t error;
error = hdb_error_to_cs(hdb_handle_get (&cfg_hdb, cfg_handle,
(void *)&cfg_inst));
if (error != CS_OK) {
return (error);
}
req_lib_cfg_replytoshutdown.header.id = MESSAGE_REQ_CFG_REPLYTOSHUTDOWN;
req_lib_cfg_replytoshutdown.header.size = sizeof (struct req_lib_cfg_replytoshutdown);
req_lib_cfg_replytoshutdown.response = response;
iov.iov_base = (void *)&req_lib_cfg_replytoshutdown;
iov.iov_len = sizeof (struct req_lib_cfg_replytoshutdown);
error = qb_to_cs_error (qb_ipcc_sendv_recv (cfg_inst->c,
&iov,
1,
&res_lib_cfg_replytoshutdown,
sizeof (struct res_lib_cfg_replytoshutdown), CS_IPC_TIMEOUT_MS));
return (error);
}
cs_error_t corosync_cfg_get_node_addrs (
corosync_cfg_handle_t cfg_handle,
unsigned int nodeid,
size_t max_addrs,
int *num_addrs,
corosync_cfg_node_address_t *addrs)
{
cs_error_t error;
struct req_lib_cfg_get_node_addrs req_lib_cfg_get_node_addrs;
struct res_lib_cfg_get_node_addrs *res_lib_cfg_get_node_addrs;
struct cfg_inst *cfg_inst;
int addrlen = 0;
int i;
struct iovec iov;
const char *addr_buf;
char response_buf[IPC_RESPONSE_SIZE];
char zeroes[sizeof(struct sockaddr_storage)];
error = hdb_error_to_cs(hdb_handle_get (&cfg_hdb, cfg_handle,
(void *)&cfg_inst));
if (error != CS_OK) {
return (error);
}
memset(zeroes, 0, sizeof(zeroes));
req_lib_cfg_get_node_addrs.header.size = sizeof (req_lib_cfg_get_node_addrs);
req_lib_cfg_get_node_addrs.header.id = MESSAGE_REQ_CFG_GET_NODE_ADDRS;
req_lib_cfg_get_node_addrs.nodeid = nodeid;
iov.iov_base = (char *)&req_lib_cfg_get_node_addrs;
iov.iov_len = sizeof (req_lib_cfg_get_node_addrs);
error = qb_to_cs_error (qb_ipcc_sendv_recv (
cfg_inst->c,
&iov, 1,
response_buf, IPC_RESPONSE_SIZE, CS_IPC_TIMEOUT_MS));
res_lib_cfg_get_node_addrs = (struct res_lib_cfg_get_node_addrs *)response_buf;
if (error != CS_OK) {
goto error_put;
}
if (res_lib_cfg_get_node_addrs->family == AF_INET)
addrlen = sizeof(struct sockaddr_in);
if (res_lib_cfg_get_node_addrs->family == AF_INET6)
addrlen = sizeof(struct sockaddr_in6);
for (i = 0, addr_buf = (char *)res_lib_cfg_get_node_addrs->addrs;
i < max_addrs && i<res_lib_cfg_get_node_addrs->num_addrs;
i++, addr_buf += TOTEMIP_ADDRLEN) {
struct sockaddr_in *in;
struct sockaddr_in6 *in6;
addrs[i].address_length = addrlen;
if (res_lib_cfg_get_node_addrs->family == AF_INET) {
in = (struct sockaddr_in *)addrs[i].address;
if (memcmp(addr_buf, zeroes, addrlen) == 0) {
in->sin_family = 0;
} else {
in->sin_family = AF_INET;
}
memcpy(&in->sin_addr, addr_buf, sizeof(struct in_addr));
}
if (res_lib_cfg_get_node_addrs->family == AF_INET6) {
in6 = (struct sockaddr_in6 *)addrs[i].address;
if (memcmp(addr_buf, zeroes, addrlen) == 0) {
in6->sin6_family = 0;
} else {
in6->sin6_family = AF_INET6;
}
memcpy(&in6->sin6_addr, addr_buf, sizeof(struct in6_addr));
}
/* Mark it as unused */
}
*num_addrs = res_lib_cfg_get_node_addrs->num_addrs;
errno = error = res_lib_cfg_get_node_addrs->header.error;
error_put:
hdb_handle_put (&cfg_hdb, cfg_handle);
return (error);
}
cs_error_t corosync_cfg_local_get (
corosync_cfg_handle_t handle,
unsigned int *local_nodeid)
{
cs_error_t error;
struct cfg_inst *cfg_inst;
struct iovec iov;
struct req_lib_cfg_local_get req_lib_cfg_local_get;
struct res_lib_cfg_local_get res_lib_cfg_local_get;
error = hdb_error_to_cs(hdb_handle_get (&cfg_hdb, handle, (void *)&cfg_inst));
if (error != CS_OK) {
return (error);
}
req_lib_cfg_local_get.header.size = sizeof (struct qb_ipc_request_header);
req_lib_cfg_local_get.header.id = MESSAGE_REQ_CFG_LOCAL_GET;
iov.iov_base = (void *)&req_lib_cfg_local_get;
iov.iov_len = sizeof (struct req_lib_cfg_local_get);
error = qb_to_cs_error (qb_ipcc_sendv_recv (
cfg_inst->c,
&iov,
1,
&res_lib_cfg_local_get,
sizeof (struct res_lib_cfg_local_get), CS_IPC_TIMEOUT_MS));
if (error != CS_OK) {
goto error_exit;
}
error = res_lib_cfg_local_get.header.error;
*local_nodeid = res_lib_cfg_local_get.local_nodeid;
error_exit:
(void)hdb_handle_put (&cfg_hdb, handle);
return (error);
}
cs_error_t corosync_cfg_reload_config (
corosync_cfg_handle_t handle)
{
cs_error_t error;
struct cfg_inst *cfg_inst;
struct iovec iov;
struct req_lib_cfg_reload_config req_lib_cfg_reload_config;
struct res_lib_cfg_reload_config res_lib_cfg_reload_config;
error = hdb_error_to_cs(hdb_handle_get (&cfg_hdb, handle, (void *)&cfg_inst));
if (error != CS_OK) {
return (error);
}
req_lib_cfg_reload_config.header.size = sizeof (struct qb_ipc_request_header);
req_lib_cfg_reload_config.header.id = MESSAGE_REQ_CFG_RELOAD_CONFIG;
iov.iov_base = (void *)&req_lib_cfg_reload_config;
iov.iov_len = sizeof (struct req_lib_cfg_reload_config);
error = qb_to_cs_error (qb_ipcc_sendv_recv (
cfg_inst->c,
&iov,
1,
&res_lib_cfg_reload_config,
sizeof (struct res_lib_cfg_reload_config), CS_IPC_TIMEOUT_MS));
if (error != CS_OK) {
goto error_exit;
}
error = res_lib_cfg_reload_config.header.error;
error_exit:
(void)hdb_handle_put (&cfg_hdb, handle);
return (error);
}
cs_error_t corosync_cfg_reopen_log_files (
corosync_cfg_handle_t handle)
{
cs_error_t error;
struct cfg_inst *cfg_inst;
struct iovec iov;
struct req_lib_cfg_reopen_log_files req_lib_cfg_reopen_log_files;
struct res_lib_cfg_reopen_log_files res_lib_cfg_reopen_log_files;
error = hdb_error_to_cs(hdb_handle_get (&cfg_hdb, handle, (void *)&cfg_inst));
if (error != CS_OK) {
return (error);
}
req_lib_cfg_reopen_log_files.header.size = sizeof (struct qb_ipc_request_header);
req_lib_cfg_reopen_log_files.header.id = MESSAGE_REQ_CFG_REOPEN_LOG_FILES;
iov.iov_base = (void *)&req_lib_cfg_reopen_log_files;
iov.iov_len = sizeof (struct req_lib_cfg_reopen_log_files);
error = qb_to_cs_error (qb_ipcc_sendv_recv (
cfg_inst->c,
&iov,
1,
&res_lib_cfg_reopen_log_files,
sizeof (struct res_lib_cfg_reopen_log_files), CS_IPC_TIMEOUT_MS));
if (error != CS_OK) {
goto error_exit;
}
error = res_lib_cfg_reopen_log_files.header.error;
error_exit:
(void)hdb_handle_put (&cfg_hdb, handle);
return (error);
}
diff --git a/lib/libcfg.versions b/lib/libcfg.versions
index a87727c0..8fba9184 100644
--- a/lib/libcfg.versions
+++ b/lib/libcfg.versions
@@ -1,17 +1,18 @@
# Version and symbol export for libcfg.so
COROSYNC_CFG_0.82 {
global:
corosync_cfg_initialize;
corosync_cfg_fd_get;
corosync_cfg_dispatch;
corosync_cfg_finalize;
corosync_cfg_administrative_state_get;
corosync_cfg_administrative_state_set;
corosync_cfg_track;
corosync_cfg_track_stop;
corosync_cfg_ring_status_get;
+ corosync_cfg_node_status_get;
corosync_cfg_ring_reenable;
corosync_cfg_service_load;
corosync_cfg_service_unload;
};
diff --git a/lib/libcfg.verso b/lib/libcfg.verso
index a3fcc712..0ee843cc 100644
--- a/lib/libcfg.verso
+++ b/lib/libcfg.verso
@@ -1 +1 @@
-7.1.0
+7.2.0
diff --git a/man/corosync-cfgtool.8 b/man/corosync-cfgtool.8
index 4ec074ad..007cbbe3 100644
--- a/man/corosync-cfgtool.8
+++ b/man/corosync-cfgtool.8
@@ -1,104 +1,130 @@
.\"
.\" * Copyright (C) 2010-2020 Red Hat, Inc.
.\" *
.\" * All rights reserved.
.\" *
.\" * Author: Angus Salkeld <asalkeld@redhat.com>
.\" *
.\" * This software licensed under BSD license, the text of which follows:
.\" *
.\" * Redistribution and use in source and binary forms, with or without
.\" * modification, are permitted provided that the following conditions are met:
.\" *
.\" * - Redistributions of source code must retain the above copyright notice,
.\" * this list of conditions and the following disclaimer.
.\" * - Redistributions in binary form must reproduce the above copyright notice,
.\" * this list of conditions and the following disclaimer in the documentation
.\" * and/or other materials provided with the distribution.
.\" * - Neither the name of the MontaVista Software, Inc. nor the names of its
.\" * contributors may be used to endorse or promote products derived from this
.\" * software without specific prior written permission.
.\" *
.\" * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
.\" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
.\" * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
.\" * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
.\" * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
.\" * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
.\" * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
.\" * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
.\" * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
.\" * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
.\" * THE POSSIBILITY OF SUCH DAMAGE.
.\" */
.TH "COROSYNC-CFGTOOL" "8" "2020-02-10" "" ""
.SH "NAME"
corosync-cfgtool \- An administrative tool for corosync.
.SH "SYNOPSIS"
-.B corosync\-cfgtool [[\-i IP_address] [\-b] \-s] [\-R] [\-L] [\-k nodeid] [\-a nodeid] [\-h] [\-H]
+.B corosync\-cfgtool [[\-i IP_address] [\-b] [\-s] [\-n] [\-R] [\-L] [\-k nodeid] [\-a nodeid] [\-h] [\-H]
.SH "DESCRIPTION"
.B corosync\-cfgtool
A tool for displaying and configuring active parameters within corosync.
.SH "OPTIONS"
.TP
.B -i
Finds only information about the specified interface IP address or link id with -s.
.TP
.B -s
Displays the status of the current links on this node for UDP/UDPU, with extended status
for KNET.
After each link, the nodes on that link are displayed in order with their status,
-for example there are 3 nodes with KNET transportation:
+for example there are 3 nodes with KNET transport:
LINK ID 0
addr = 192.168.100.80
status:
nodeid 1: localhost
nodeid 2: connected
nodeid 3: connected
.TP
.B -b
-Displays the brief status of the current links on this node (KNET only) when used
+Displays the brief status of the current links on this node when used
with "-s". If any interfaces are faulty, 1 is returned by the binary. If all interfaces
are active 0 is returned to the shell.
After each link, the nodes on that link are displayed in order with their status
encoded into a single digit, or characters 'n', 'd' and '?' with special meaning.
1=link enabled, 2=link connected, So a 3 in a node position indicates that the
link is both enabled and connected. Status represented by character 'n' is used for
-localhost link. Character '?' means that Crosync was unable to get status of link from knet (log
+localhost link. Character '?' means that Corosync was unable to get status of link from knet (log
should contain more information). Character 'd' shouldn't appear and it means that Corosync
was unable to configure a link and it is result of some error which should have been logged.
The output will be:
LINK ID 0
addr = 192.168.100.80
status = n33
.TP
+.B -n
+Displays the status of the current nodes in the system with their link status(es).
+.P
+.nf
+Local node ID 1, transport knet
+nodeid: 2 reachable onwire (min/max/cur): 0, 1, 1
+ LINK: 0 (192.168.1.101->192.168.1.102) enabled connected mtu: 1397
+ LINK: 1 (192.168.4.1->192.168.4.2) enabled mtu: 469
+ LINK: 2 (192.168.9.1->192.168.9.2) enabled mtu: 469
+.fi
+.P
+Only reachable nodes are displayed so "reachable" should always be there.
+.br
+'onwire' versions are the knet on-wire versions that are supported/in use (where appropriate).
+.br
+IP addresses are the local and remote IP addresses (for UDP[U] only the local IP address is shown)
+.br
+enabled - means the link has been brought up
+.br
+connected - means that the link is connected to the remote node
+.br
+dynconnected - is not currently implemented
+.br
+mtu - shows the size of data packets. Should be the link packet size less a small amount
+for protocol overheads and encryption
+.TP
.B -R
Tell all instances of corosync in this cluster to reload corosync.conf.
Running corosync-cfgtool -R where nodes are running different versions
of corosync (including minor versions) is unsupported and may result in undefined
behaviour.
.TP
.B -L
Tell corosync to reopen all logging files. In contrast to other subcommands,
nothing is displayed on terminal if call is successful.
.TP
.B -k
Kill a node identified by node id.
.TP
.B -a
Display the IP address(es) of a node.
.TP
.B -h
Print basic usage.
.TP
.B -H
Shutdown corosync cleanly on this node.
.SH "SEE ALSO"
.BR corosync_overview (7),
.SH "AUTHOR"
Angus Salkeld
.PP
diff --git a/tools/corosync-cfgtool.c b/tools/corosync-cfgtool.c
index d920960b..c4f23f79 100644
--- a/tools/corosync-cfgtool.c
+++ b/tools/corosync-cfgtool.c
@@ -1,539 +1,542 @@
/*
* Copyright (c) 2006-2020 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Steven Dake <sdake@redhat.com>
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <config.h>
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <unistd.h>
#include <string.h>
#include <pthread.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/select.h>
#include <sys/un.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <limits.h>
#include <corosync/corotypes.h>
#include <corosync/totem/totem.h>
#include <corosync/cfg.h>
#include <corosync/cmap.h>
#include "util.h"
#define cs_repeat(result, max, code) \
do { \
int counter = 0; \
do { \
result = code; \
if (result == CS_ERR_TRY_AGAIN) { \
sleep(1); \
counter++; \
} else { \
break; \
} \
} while (counter < max); \
} while (0)
enum user_action {
ACTION_NOOP=0,
ACTION_LINKSTATUS_GET,
+ ACTION_NODESTATUS_GET,
ACTION_RELOAD_CONFIG,
ACTION_REOPEN_LOG_FILES,
ACTION_SHUTDOW,
ACTION_SHOWADDR,
ACTION_KILL_NODE,
};
static int node_compare(const void *aptr, const void *bptr)
{
uint32_t a,b;
a = *(uint32_t *)aptr;
b = *(uint32_t *)bptr;
return a > b;
}
static int
-linkstatusget_do (char *interface_name, int brief)
+nodestatusget_do (enum user_action action, int brief)
{
cs_error_t result;
corosync_cfg_handle_t handle;
cmap_handle_t cmap_handle;
- unsigned int interface_count;
- char **interface_names;
- char **interface_status;
- uint32_t nodeid_list[KNET_MAX_HOST];
char iter_key[CMAP_KEYNAME_MAXLEN];
- unsigned int i;
cmap_iter_handle_t iter;
+ unsigned int local_nodeid;
+ unsigned int local_nodeid_index=0;
+ unsigned int other_nodeid_index=0;
unsigned int nodeid;
int nodeid_match_guard;
cmap_value_types_t type;
size_t value_len;
- int rc = EXIT_SUCCESS;
- int len, s = 0, t;
- char stat_ch;
char *str;
- totem_transport_t transport_number = TOTEM_TRANSPORT_KNET;
- int no_match = 1;
+ char *transport_str = NULL;
+ uint32_t nodeid_list[KNET_MAX_HOST];
+ int s = 0;
+ int rc = EXIT_SUCCESS;
+ int transport_number = TOTEM_TRANSPORT_KNET;
+ int i,j;
+ struct corosync_knet_node_status node_status;
- printf ("Printing link status.\n");
result = corosync_cfg_initialize (&handle, NULL);
if (result != CS_OK) {
fprintf (stderr, "Could not initialize corosync configuration API error %d\n", result);
exit (EXIT_FAILURE);
}
+
result = cmap_initialize (&cmap_handle);
if (result != CS_OK) {
fprintf (stderr, "Could not initialize corosync cmap API error %d\n", result);
exit (EXIT_FAILURE);
}
result = cmap_get_string(cmap_handle, "totem.transport", &str);
if (result == CS_OK) {
if (strcmp (str, "udpu") == 0) {
transport_number = TOTEM_TRANSPORT_UDPU;
}
if (strcmp (str, "udp") == 0) {
transport_number = TOTEM_TRANSPORT_UDP;
}
- free(str);
+ transport_str = str;
+ }
+ if (!transport_str) {
+ transport_str = strdup("knet"); /* It's the default */
+ }
+
+ result = corosync_cfg_local_get(handle, &local_nodeid);
+ if (result != CS_OK) {
+ fprintf (stderr, "Could not get the local node id, the error is: %d\n", result);
+ free(transport_str);
+ cmap_finalize(cmap_handle);
+ corosync_cfg_finalize(handle);
+ return EXIT_FAILURE;
}
/* Get a list of nodes. We do it this way rather than using votequorum as cfgtool
* needs to be independent of quorum type
*/
result = cmap_iter_init(cmap_handle, "nodelist.node.", &iter);
if (result != CS_OK) {
fprintf (stderr, "Could not get nodelist from cmap. error %d\n", result);
+ free(transport_str);
+ cmap_finalize(cmap_handle);
+ corosync_cfg_finalize(handle);
exit (EXIT_FAILURE);
}
while ((cmap_iter_next(cmap_handle, iter, iter_key, &value_len, &type)) == CS_OK) {
nodeid_match_guard = 0;
if (sscanf(iter_key, "nodelist.node.%*u.nodeid%n", &nodeid_match_guard) != 0) {
continue;
}
/* check for exact match */
if (nodeid_match_guard != strlen(iter_key)) {
continue;
}
if (cmap_get_uint32(cmap_handle, iter_key, &nodeid) == CS_OK) {
+ if (nodeid == local_nodeid) {
+ local_nodeid_index = s;
+ } else {
+ /* Bit of an odd one this. but local node only uses one link (of course, to itself)
+ so if we want to know which links are active across the cluster we need to look
+ at another node (any other) node's link list */
+ other_nodeid_index = s;
+ }
nodeid_list[s++] = nodeid;
}
}
-
- /* totemknet returns nodes in nodeid order - even though it doesn't tell us
- what the nodeid is. So sort our node list and we can then look up
- knet node pos to get an actual nodeid.
- Yep, I really should have totally rewritten the cfg interface for this.
- */
+ /* It's nice to have these in nodeid order */
qsort(nodeid_list, s, sizeof(uint32_t), node_compare);
- result = corosync_cfg_local_get(handle, &nodeid);
- if (result != CS_OK) {
- fprintf (stderr, "Could not get the local node id, the error is: %d\n", result);
+ cmap_finalize(cmap_handle);
+
+ printf ("Local node ID " CS_PRI_NODE_ID ", transport %s\n", local_nodeid, transport_str);
+
+ /* If node status requested then do print node-based info */
+ if (action == ACTION_NODESTATUS_GET) {
+ for (i=0; i<s; i++) {
+ result = corosync_cfg_node_status_get(handle, nodeid_list[i], &node_status);
+ if (result == CS_OK) {
+ /* Only display node info if it is reachable (and not us) */
+ if (node_status.reachable && node_status.nodeid != local_nodeid) {
+ printf("nodeid: %d", node_status.nodeid);
+ printf(" reachable");
+ if (node_status.remote) {
+ printf(" remote");
+ }
+ if (node_status.external) {
+ printf(" external");
+ }
+#ifdef HAVE_KNET_ONWIRE_VER
+ if (transport_number == TOTEM_TRANSPORT_KNET) {
+ printf(" onwire (min/max/cur): %d, %d, %d",
+ node_status.onwire_min,
+ node_status.onwire_max,
+ node_status.onwire_ver);
+ }
+#endif
+ printf("\n");
+ for (j=0; j<CFG_MAX_LINKS; j++) {
+ if (node_status.link_status[j].enabled) {
+ printf(" LINK: %d", j);
+ printf(" (%s%s%s)",
+ node_status.link_status[j].src_ipaddr,
+ transport_number==TOTEM_TRANSPORT_KNET?"->":"",
+ node_status.link_status[j].dst_ipaddr);
+ if (node_status.link_status[j].enabled) {
+ printf(" enabled");
+ }
+ if (node_status.link_status[j].connected) {
+ printf(" connected");
+ }
+ if (node_status.link_status[j].dynconnected) {
+ printf(" dynconnected");
+ }
+ printf(" mtu: %d\n", node_status.link_status[j].mtu);
+ }
+ }
+ printf("\n");
+ }
+ }
+ }
}
+ /* Print in link order */
else {
- printf ("Local node ID " CS_PRI_NODE_ID "\n", nodeid);
- }
+ struct corosync_knet_node_status node_info[s];
+ memset(node_info, 0, sizeof(node_info));
- result = corosync_cfg_ring_status_get (handle,
- &interface_names,
- &interface_status,
- &interface_count);
- if (result != CS_OK) {
- fprintf (stderr, "Could not get the link status, the error is: %d\n", result);
- } else {
- for (i = 0; i < interface_count; i++) {
- char *cur_iface_name_space = strchr(interface_names[i], ' ');
- int show_current_iface;
-
- s = 0;
- /*
- * Interface_name is "<linkid> <IP address>"
- * separate them out
- */
- if (!cur_iface_name_space) {
- continue;
- }
- *cur_iface_name_space = '\0';
-
- show_current_iface = 1;
- if (interface_name != NULL && interface_name[0] != '\0' &&
- strcmp(interface_name, interface_names[i]) != 0 &&
- strcmp(interface_name, cur_iface_name_space + 1) != 0) {
- show_current_iface = 0;
+ for (i=0; i<s; i++) {
+ result = corosync_cfg_node_status_get(handle, nodeid_list[i], &node_info[i]);
+ if (result != CS_OK) {
+ fprintf (stderr, "Could not get the node status for nodeid %d, the error is: %d\n", nodeid_list[i], result);
}
+ }
- if (show_current_iface) {
- no_match = 0;
- printf ("LINK ID %s\n", interface_names[i]);
- printf ("\taddr\t= %s\n", cur_iface_name_space + 1);
- /*
- * UDP(U) interface_status is always OK and doesn't contain
- * detailed information (only knet does).
- */
- if ((!brief) && (transport_number == TOTEM_TRANSPORT_KNET)) {
- len = strlen(interface_status[i]);
- printf ("\tstatus:\n");
- while (s < len) {
- nodeid = nodeid_list[s];
- printf("\t\tnodeid %2d:\t", nodeid);
- stat_ch = interface_status[i][s];
-
- /* Set return code to 1 if status is not localhost or connected. */
- if (rc == EXIT_SUCCESS) {
- if ((stat_ch != 'n') && (stat_ch != '3')) {
- rc = EXIT_FAILURE;
- }
+ for (i=0; i<CFG_MAX_LINKS; i++) {
+ if (node_info[other_nodeid_index].link_status[i].enabled) {
+ printf("LINK ID %d\n", i);
+ printf("\taddr\t= %s\n", node_info[other_nodeid_index].link_status[i].src_ipaddr);
+ if (brief) {
+ printf("\tstatus\t= ");
+ for (j=0; j<s; j++) {
+ char status = (node_info[j].link_status[i].enabled |
+ (node_info[j].link_status[i].connected << 1)) + '0';
+ if (status == '0') {
+ status = 'n';
}
-
- if (stat_ch >= '0' && stat_ch <= '9') {
- t = stat_ch - '0';
-
- /*
- * bit 0 - enabled
- * bit 1 - connected
- * bit 2 - dynconnected
- */
- if (t & 0x2) {
+ printf("%c", status);
+ }
+ printf("\n");
+ } else {
+ printf("\tstatus:\n");
+ for (j=0; j<s; j++) {
+ printf("\t\tnodeid: %3d:\t", node_info[j].nodeid);
+ if (j == local_nodeid_index) {
+ printf("localhost");
+ } else {
+ if (node_info[j].link_status[i].connected) {
printf("connected");
} else {
printf("disconnected");
}
-
- if (!(t & 0x1)) {
- printf(" (not enabled)");
- }
- printf("\n");
- } else if (stat_ch == 'n') {
- printf("localhost\n");
- } else if (stat_ch == '?') {
- printf("knet error\n");
- } else if (stat_ch == 'd') {
- printf("config error\n");
- } else {
- printf("can't decode status character '%c'\n", stat_ch);
- }
- s++;
- }
- } else {
- printf ("\tstatus\t= %s\n", interface_status[i]);
-
- /* Set return code to 1 if status is not localhost or connected. */
- if ((rc == EXIT_SUCCESS) && (transport_number == TOTEM_TRANSPORT_KNET)) {
- len = strlen(interface_status[i]);
- while (s < len) {
- stat_ch = interface_status[i][s];
- if ((stat_ch != 'n') && (stat_ch != '3')) {
- rc = EXIT_FAILURE;
- break;
- }
- s++;
}
+ printf("\n");
}
}
}
}
-
- /* No match for value of -i option */
- if (no_match) {
- rc = EXIT_FAILURE;
- fprintf(stderr, "Can't match any IP address or link id\n");
- }
-
- for (i = 0; i < interface_count; i++) {
- free(interface_status[i]);
- free(interface_names[i]);
- }
- free(interface_status);
- free(interface_names);
}
-
- (void)cmap_finalize (cmap_handle);
- (void)corosync_cfg_finalize (handle);
+ free(transport_str);
+ corosync_cfg_finalize(handle);
return rc;
}
static int reload_config_do (void)
{
cs_error_t result;
corosync_cfg_handle_t handle;
int rc;
rc = EXIT_SUCCESS;
printf ("Reloading corosync.conf...\n");
result = corosync_cfg_initialize (&handle, NULL);
if (result != CS_OK) {
fprintf (stderr, "Could not initialize corosync configuration API error %s\n", cs_strerror(result));
exit (EXIT_FAILURE);
}
result = corosync_cfg_reload_config (handle);
if (result != CS_OK) {
fprintf (stderr, "Could not reload configuration. Error %s\n", cs_strerror(result));
rc = (int)result;
}
else {
printf ("Done\n");
}
(void)corosync_cfg_finalize (handle);
return (rc);
}
static int reopen_log_files_do (void)
{
cs_error_t result;
corosync_cfg_handle_t handle;
int rc;
rc = EXIT_SUCCESS;
result = corosync_cfg_initialize (&handle, NULL);
if (result != CS_OK) {
fprintf (stderr, "Could not initialize corosync configuration API error %s\n", cs_strerror(result));
exit (EXIT_FAILURE);
}
result = corosync_cfg_reopen_log_files (handle);
if (result != CS_OK) {
fprintf (stderr, "Could not reopen corosync logging files. Error %s\n", cs_strerror(result));
rc = (int)result;
}
(void)corosync_cfg_finalize (handle);
return (rc);
}
static void shutdown_do(void)
{
cs_error_t result;
corosync_cfg_handle_t handle;
corosync_cfg_callbacks_t callbacks;
callbacks.corosync_cfg_shutdown_callback = NULL;
result = corosync_cfg_initialize (&handle, &callbacks);
if (result != CS_OK) {
fprintf (stderr, "Could not initialize corosync configuration API error %d\n", result);
exit (EXIT_FAILURE);
}
printf ("Shutting down corosync\n");
cs_repeat(result, 30, corosync_cfg_try_shutdown (handle, COROSYNC_CFG_SHUTDOWN_FLAG_REQUEST));
if (result != CS_OK) {
fprintf (stderr, "Could not shutdown (error = %d)\n", result);
}
(void)corosync_cfg_finalize (handle);
}
static int showaddrs_do(unsigned int nodeid)
{
cs_error_t result;
corosync_cfg_handle_t handle;
int numaddrs;
int i;
int rc = EXIT_SUCCESS;
corosync_cfg_node_address_t addrs[INTERFACE_MAX];
result = corosync_cfg_initialize (&handle, NULL);
if (result != CS_OK) {
fprintf (stderr, "Could not initialize corosync configuration API error %d\n", result);
exit (EXIT_FAILURE);
}
if (corosync_cfg_get_node_addrs(handle, nodeid, INTERFACE_MAX, &numaddrs, addrs) == CS_OK) {
for (i=0; i<numaddrs; i++) {
char buf[INET6_ADDRSTRLEN];
struct sockaddr_storage *ss = (struct sockaddr_storage *)addrs[i].address;
struct sockaddr_in *sin = (struct sockaddr_in *)addrs[i].address;
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addrs[i].address;
void *saddr;
if (!ss->ss_family) {
continue;
}
if (ss->ss_family == AF_INET6) {
saddr = &sin6->sin6_addr;
} else {
saddr = &sin->sin_addr;
}
inet_ntop(ss->ss_family, saddr, buf, sizeof(buf));
if (i != 0) {
printf(" ");
}
printf("%s", buf);
}
printf("\n");
} else {
fprintf (stderr, "Could not get node address for nodeid %d\n", nodeid);
rc = EXIT_FAILURE;
}
(void)corosync_cfg_finalize (handle);
return rc;
}
static void killnode_do(unsigned int nodeid)
{
cs_error_t result;
corosync_cfg_handle_t handle;
printf ("Killing node " CS_PRI_NODE_ID "\n", nodeid);
result = corosync_cfg_initialize (&handle, NULL);
if (result != CS_OK) {
fprintf (stderr, "Could not initialize corosync configuration API error %d\n", result);
exit (EXIT_FAILURE);
}
result = corosync_cfg_kill_node (handle, nodeid, "Killed by corosync-cfgtool");
if (result != CS_OK) {
fprintf (stderr, "Could not kill node (error = %s)\n", cs_strerror(result));
exit(EXIT_FAILURE);
}
(void)corosync_cfg_finalize (handle);
}
static void usage_do (void)
{
printf ("corosync-cfgtool [[-i <interface ip>] [-b] -s] [-R] [-L] [-k nodeid] [-a nodeid] [-h] [-H]\n\n");
printf ("A tool for displaying and configuring active parameters within corosync.\n");
printf ("options:\n");
printf ("\t-i\tFinds only information about the specified interface IP address or link id when used with -s..\n");
- printf ("\t-s\tDisplays the status of the current links on this node(UDP/UDPU), with extended status for KNET.\n");
- printf ("\t-b\tDisplays the brief status of the current links on this node when used with -s.(KNET only)\n");
+ printf ("\t-s\tDisplays the status of the current links on this node.\n");
+ printf ("\t-n\tDisplays the status of the connected nodes and their links.\n");
+ printf ("\t-b\tDisplays the brief status of the current links on this node when used with -s.\n");
printf ("\t-R\tTell all instances of corosync in this cluster to reload corosync.conf.\n");
printf ("\t-L\tTell corosync to reopen all logging files.\n");
printf ("\t-k\tKill a node identified by node id.\n");
printf ("\t-a\tDisplay the IP address(es) of a node\n");
printf ("\t-h\tPrint basic usage.\n");
printf ("\t-H\tShutdown corosync cleanly on this node.\n");
}
int main (int argc, char *argv[]) {
- const char *options = "i:sbrRLk:a:hH";
+ const char *options = "i:snbrRLk:a:hH";
int opt;
unsigned int nodeid = 0;
char interface_name[128] = "";
int rc = EXIT_SUCCESS;
enum user_action action = ACTION_NOOP;
int brief = 0;
long long int l;
while ( (opt = getopt(argc, argv, options)) != -1 ) {
switch (opt) {
case 'i':
strncpy(interface_name, optarg, sizeof(interface_name));
interface_name[sizeof(interface_name) - 1] = '\0';
break;
case 's':
action = ACTION_LINKSTATUS_GET;
break;
+ case 'n':
+ action = ACTION_NODESTATUS_GET;
+ break;
case 'b':
brief = 1;
break;
case 'R':
action = ACTION_RELOAD_CONFIG;
break;
case 'L':
action = ACTION_REOPEN_LOG_FILES;
break;
case 'k':
if (util_strtonum(optarg, 1, UINT_MAX, &l) == -1) {
fprintf(stderr, "The nodeid was not valid, try a positive number\n");
exit(EXIT_FAILURE);
}
nodeid = l;
action = ACTION_KILL_NODE;
break;
case 'H':
action = ACTION_SHUTDOW;
break;
case 'a':
if (util_strtonum(optarg, 1, UINT_MAX, &l) == -1) {
fprintf(stderr, "The nodeid was not valid, try a positive number\n");
exit(EXIT_FAILURE);
}
nodeid = l;
action = ACTION_SHOWADDR;
break;
case '?':
return (EXIT_FAILURE);
break;
case 'h':
default:
break;
}
}
switch(action) {
case ACTION_LINKSTATUS_GET:
- rc = linkstatusget_do(interface_name, brief);
+ rc = nodestatusget_do(action, brief);
+ break;
+ case ACTION_NODESTATUS_GET:
+ rc = nodestatusget_do(action, brief);
break;
case ACTION_RELOAD_CONFIG:
rc = reload_config_do();
break;
case ACTION_REOPEN_LOG_FILES:
rc = reopen_log_files_do();
break;
case ACTION_KILL_NODE:
killnode_do(nodeid);
break;
case ACTION_SHUTDOW:
shutdown_do();
break;
case ACTION_SHOWADDR:
rc = showaddrs_do(nodeid);
break;
case ACTION_NOOP:
default:
usage_do();
break;
}
return (rc);
}

File Metadata

Mime Type
text/x-diff
Expires
Mon, Feb 24, 11:53 PM (15 h, 24 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1464537
Default Alt Text
(523 KB)

Event Timeline