Page MenuHomeClusterLabs Projects

No OneTemporary

This file is larger than 256 KB, so syntax highlighting was skipped.
diff --git a/configure.ac b/configure.ac
index 81fc91bd..f3e414bb 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,780 +1,773 @@
# -*- Autoconf -*-
# Process this file with autoconf to produce a configure script.
# bootstrap / init
AC_PREREQ([2.61])
AC_INIT([corosync],
m4_esyscmd([build-aux/git-version-gen .tarball-version]),
[users@clusterlabs.org])
AC_USE_SYSTEM_EXTENSIONS
AM_INIT_AUTOMAKE([foreign 1.11])
LT_PREREQ([2.2.6])
LT_INIT
AM_SILENT_RULES([yes])
AC_CONFIG_SRCDIR([lib/cpg.c])
AC_CONFIG_HEADER([include/corosync/config.h])
AC_CONFIG_MACRO_DIR([m4])
AC_CANONICAL_HOST
AC_LANG([C])
AC_SUBST(WITH_LIST, [""])
#Enable inter-library dependencies
AC_ARG_ENABLE(interlib-deps,
[AC_HELP_STRING([--disable-interlib-deps ],[disable inter-library dependencies (might break builds)])],
[enable_interlib_deps="$enableval"],
[enable_interlib_deps="yes"])
AC_MSG_NOTICE([enable inter-library dependencies: $enable_interlib_deps])
if test "${enable_interlib_deps}" == "yes"; then
link_all_deplibs=yes
link_all_deplibs_CXX=yes
else
link_all_deplibs=no
link_all_deplibs_CXX=no
fi
dnl Fix default variables - "prefix" variable if not specified
if test "$prefix" = "NONE"; then
prefix="/usr"
dnl Fix "localstatedir" variable if not specified
if test "$localstatedir" = "\${prefix}/var"; then
localstatedir="/var"
fi
dnl Fix "sysconfdir" variable if not specified
if test "$sysconfdir" = "\${prefix}/etc"; then
sysconfdir="/etc"
fi
dnl Fix "libdir" variable if not specified
if test "$libdir" = "\${exec_prefix}/lib"; then
if test -e /usr/lib64; then
libdir="/usr/lib64"
else
libdir="/usr/lib"
fi
fi
fi
if test "$srcdir" = "."; then
AC_MSG_NOTICE([building in place srcdir:$srcdir])
AC_DEFINE([BUILDING_IN_PLACE], 1, [building in place])
else
AC_MSG_NOTICE([building out of tree srcdir:$srcdir])
fi
# Checks for programs.
# check stolen from gnulib/m4/gnu-make.m4
if ! ${MAKE-make} --version /cannot/make/this >/dev/null 2>&1; then
AC_MSG_ERROR([you don't seem to have GNU make; it is required])
fi
sinclude(corosync-default.m4)
AC_PROG_CC
AC_PROG_INSTALL
AC_PROG_LN_S
AC_PROG_MAKE_SET
AC_PROG_SED
AC_CHECK_PROGS([GROFF], [groff])
AC_CHECK_PROGS([PKGCONFIG], [pkg-config])
AC_CHECK_PROGS([AUGTOOL], [augtool])
AC_CHECK_PROGS([DOT], [dot])
AC_CHECK_PROGS([DOXYGEN], [doxygen])
AC_CHECK_PROGS([AWK], [awk])
AC_CHECK_PROGS([SED], [sed])
AC_PATH_PROG([BASHPATH], [bash])
# Checks for compiler characteristics.
AC_PROG_GCC_TRADITIONAL
AC_C_CONST
AC_C_INLINE
AC_C_VOLATILE
# Checks for header files.
AC_HEADER_DIRENT
AC_HEADER_STDC
AC_HEADER_SYS_WAIT
AC_CHECK_HEADERS([arpa/inet.h fcntl.h limits.h netdb.h netinet/in.h stdint.h \
stdlib.h string.h sys/ioctl.h sys/param.h sys/socket.h \
sys/time.h syslog.h unistd.h sys/types.h getopt.h malloc.h \
utmpx.h ifaddrs.h stddef.h sys/file.h sys/uio.h])
# Check entries in specific structs
AC_CHECK_MEMBER([struct sockaddr_in.sin_len],
[AC_DEFINE_UNQUOTED([HAVE_SOCK_SIN_LEN], [1], [sockaddr_in needs sin_len])],
[], [[#include <netinet/in.h>]])
AC_CHECK_MEMBER([struct sockaddr_in6.sin6_len],
[AC_DEFINE_UNQUOTED([HAVE_SOCK_SIN6_LEN], [1], [sockaddr_in6 needs sin6_len])],
[], [[#include <netinet/in.h>]])
AC_CHECK_MEMBER([struct msghdr.msg_control],
[AC_DEFINE_UNQUOTED([HAVE_MSGHDR_CONTROL], [1], [msghdr has msg_control])],
[], [[#include <sys/socket.h>]])
AC_CHECK_MEMBER([struct msghdr.msg_controllen],
[AC_DEFINE_UNQUOTED([HAVE_MSGHDR_CONTROLLEN], [1], [msghdr has msg_controllen])],
[], [[#include <sys/socket.h>]])
AC_CHECK_MEMBER([struct msghdr.msg_flags],
[AC_DEFINE_UNQUOTED([HAVE_MSGHDR_FLAGS], [1], [msghdr has msg_flags])],
[], [[#include <sys/socket.h>]])
AC_CHECK_MEMBER([struct msghdr.msg_accrights],
[AC_DEFINE_UNQUOTED([HAVE_MSGHDR_ACCRIGHTS], [1], [msghdr has msg_accrights])],
[], [[#include <sys/socket.h>]])
AC_CHECK_MEMBER([struct msghdr.msg_accrightslen],
[AC_DEFINE_UNQUOTED([HAVE_MSGHDR_ACCRIGHTSLEN], [1], [msghdr has msg_accrightslen])],
[], [[#include <sys/socket.h>]])
# Checks for typedefs.
AC_TYPE_UID_T
AC_TYPE_INT16_T
AC_TYPE_INT32_T
AC_TYPE_INT64_T
AC_TYPE_INT8_T
AC_TYPE_UINT16_T
AC_TYPE_UINT32_T
AC_TYPE_UINT64_T
AC_TYPE_UINT8_T
AC_TYPE_SIZE_T
AC_TYPE_SSIZE_T
# Checks for libraries.
PKG_CHECK_MODULES([nss],[nss])
SAVE_CPPFLAGS="$CPPFLAGS"
SAVE_LIBS="$LIBS"
PKG_CHECK_MODULES([LIBQB], [libqb])
CPPFLAGS="$CPPFLAGS $LIBQB_CFLAGS"
LIBS="$LIBS $LIBQB_LIBS"
AC_CHECK_LIB([qb], [qb_log_thread_priority_set], \
have_qb_log_thread_priority_set="yes", \
have_qb_log_thread_priority_set="no")
if test "x${have_qb_log_thread_priority_set}" = xyes; then
AC_DEFINE_UNQUOTED([HAVE_QB_LOG_THREAD_PRIORITY_SET], 1, [have qb_log_thread_priority_set])
fi
CPPFLAGS="$SAVE_CPPFLAGS"
LIBS="$SAVE_LIBS"
AC_CHECK_LIB([pthread], [pthread_create])
AC_CHECK_LIB([socket], [socket])
+AC_CHECK_LIB([knet], [knet_handle_new])
AC_CHECK_LIB([nsl], [t_open])
AC_CHECK_LIB([rt], [sched_getscheduler])
AC_CHECK_LIB([z], [crc32],
AM_CONDITIONAL([BUILD_CPGHUM], true),
AM_CONDITIONAL([BUILD_CPGHUM], false))
# Checks for library functions.
AC_FUNC_ALLOCA
AC_FUNC_CLOSEDIR_VOID
AC_FUNC_ERROR_AT_LINE
AC_FUNC_FORK
AC_FUNC_MALLOC
AC_FUNC_MEMCMP
AC_FUNC_MMAP
AC_FUNC_REALLOC
AC_FUNC_SELECT_ARGTYPES
AC_FUNC_VPRINTF
AC_CHECK_FUNCS([alarm alphasort atexit bzero dup2 endgrent endpwent fdatasync \
fcntl getcwd getpeerucred getpeereid gettimeofday inet_ntoa \
memmove memset mkdir scandir select socket strcasecmp strchr \
strdup strerror strrchr strspn strstr pthread_setschedparam \
sched_get_priority_max sched_setscheduler getifaddrs \
clock_gettime ftruncate gethostname localtime_r munmap strtol])
AC_CONFIG_FILES([Makefile
exec/Makefile
include/Makefile
init/Makefile
lib/Makefile
common_lib/Makefile
man/Makefile
pkgconfig/Makefile
test/Makefile
cts/Makefile
cts/agents/Makefile
cts/CTSvars.py
tools/Makefile
conf/Makefile
qdevices/Makefile
Doxyfile
conf/logrotate/Makefile
conf/tmpfiles.d/Makefile])
### Local business
dnl ===============================================
dnl Functions / global M4 variables
dnl ===============================================
dnl Global list of LIB names
m4_define([local_soname_list], [])dnl
dnl Upcase parameter
m4_define([local_upcase], [translit([$*], [a-z], [A-Z])])dnl
dnl M4 macro for include lib/lib$1.soname and subst that
m4_define([LIB_SONAME_IMPORT],[dnl
m4_define([local_libname], local_upcase($1)[_SONAME])dnl
m4_define([local_soname], translit(m4_sinclude(lib/lib$1.verso), [
], []))dnl
local_libname="local_soname"dnl
m4_define([local_soname_list], m4_defn([local_soname_list])[,]local_libname[,]local_upcase($1))dnl
AC_SUBST(local_libname)dnl
])dnl
dnl M4 macro for print padspaces (used in LIB_MSG_RESULT). It takes 2 arguments, length of string to pad and desired
dnl (padded) length
m4_define([m4_printpadspace],[ifelse(m4_eval([$2 - $1 < 1]),[1],,[ ][m4_printpadspace([$1],m4_eval([$2 - 1]))])])dnl
dnl Show AC_MSG_RESULT for specific libraries
m4_define([LIB_MSG_RESULT], [ifelse([$#], [1], ,[dnl
AC_MSG_RESULT([ $2 Library SONAME m4_printpadspace(len($2),8) = ${$1}])
LIB_MSG_RESULT(m4_shift(m4_shift($@)))dnl
])])dnl
# ===============================================
# Helpers
# ===============================================
## check if the compiler supports -Werror -Wunknown-warning-option
AC_MSG_CHECKING([whether $CC supports -Wunknown-warning-option -Werror])
BACKUP="$CPPFLAGS"
CPPFLAGS="$CPPFLAGS -Werror -Wunknown-warning-option"
AC_PREPROC_IFELSE([AC_LANG_PROGRAM([])],
[unknown_warnings_as_errors='-Wunknown-warning-option -Werror'; AC_MSG_RESULT([yes])],
[unknown_warnings_as_errors=''; AC_MSG_RESULT([no])])
CPPFLAGS="$BACKUP"
## helper for CC stuff
cc_supports_flag() {
BACKUP="$CPPFLAGS"
CPPFLAGS="$CPPFLAGS $@ $unknown_warnings_as_errors"
AC_MSG_CHECKING([whether $CC supports "$@"])
AC_PREPROC_IFELSE([AC_LANG_PROGRAM([])],
[RC=0; AC_MSG_RESULT([yes])],
[RC=1; AC_MSG_RESULT([no])])
CPPFLAGS="$BACKUP"
return $RC
}
## cleanup
AC_MSG_NOTICE(Sanitizing prefix: ${prefix})
case $prefix in
NONE) prefix=/usr/local;;
esac
AC_MSG_NOTICE(Sanitizing exec_prefix: ${exec_prefix})
case $exec_prefix in
dnl For consistency with Corosync, map NONE->$prefix
NONE) exec_prefix=$prefix;;
prefix) exec_prefix=$prefix;;
esac
## local defines
PACKAGE_FEATURES=""
LINT_FLAGS="-weak -unrecog +posixlib +ignoresigns -fcnuse \
-badflag -D__gnuc_va_list=va_list -D__attribute\(x\)="
# default libraries SONAME
SOMAJOR="5"
SOMINOR="0"
SOMICRO="0"
SONAME="${SOMAJOR}.${SOMINOR}.${SOMICRO}"
# specific libraries SONAME
LIB_SONAME_IMPORT([cfg])
LIB_SONAME_IMPORT([cpg])
LIB_SONAME_IMPORT([quorum])
LIB_SONAME_IMPORT([sam])
LIB_SONAME_IMPORT([votequorum])
LIB_SONAME_IMPORT([cmap])
# local options
AC_ARG_ENABLE([ansi],
[ --enable-ansi : force to build with ANSI standards. ],
[ default="no" ])
AC_ARG_ENABLE([fatal-warnings],
[ --enable-fatal-warnings : enable fatal warnings. ],
[ default="no" ])
AC_ARG_ENABLE([debug],
[ --enable-debug : enable debug build. ],
[ default="no" ])
AC_ARG_ENABLE([secure-build],
[ --enable-secure-build : enable PIE/RELRO build. ],
[],
[enable_secure_build="yes"])
AC_ARG_ENABLE([user-flags],
[ --enable-user-flags : rely on user environment. ],
[ default="no" ])
AC_ARG_ENABLE([coverage],
[ --enable-coverage : coverage analysis of the codebase. ],
[ default="no" ])
AC_ARG_ENABLE([small-memory-footprint],
[ --enable-small-memory-footprint : Use small message queues and small messages sizes. ],
[ default="no" ])
AC_ARG_ENABLE([dbus],
[ --enable-dbus : dbus events. ],,
[ enable_dbus="no" ])
AC_ARG_ENABLE([testagents],
[ --enable-testagents : Install Test Agents. ],,
[ default="no" ])
AC_ARG_ENABLE([rdma],
[ --enable-rdma : Infiniband RDMA transport support ],,
[ enable_rdma="no" ])
AM_CONDITIONAL(BUILD_RDMA, test x$enable_rdma = xyes)
AC_ARG_ENABLE([monitoring],
[ --enable-monitoring : resource monitoring ],,
[ default="no" ])
AM_CONDITIONAL(BUILD_MONITORING, test x$enable_monitoring = xyes)
AC_ARG_ENABLE([watchdog],
[ --enable-watchdog : Watchdog support ],,
[ default="no" ])
AM_CONDITIONAL(BUILD_WATCHDOG, test x$enable_watchdog = xyes)
AC_ARG_ENABLE([augeas],
[ --enable-augeas : Install the augeas lens for corosync.conf ],,
[ enable_augeas="no" ])
AM_CONDITIONAL(INSTALL_AUGEAS, test x$enable_augeas = xyes)
AC_ARG_ENABLE([systemd],
[ --enable-systemd : Install systemd service files],,
[ enable_systemd="no" ])
AM_CONDITIONAL(INSTALL_SYSTEMD, test x$enable_systemd = xyes)
AC_ARG_ENABLE([upstart],
[ --enable-upstart : Install upstart service files],,
[ enable_upstart="no" ])
AM_CONDITIONAL(INSTALL_UPSTART, test x$enable_upstart = xyes)
AC_ARG_WITH([initddir],
[ --with-initddir=DIR : path to init script directory. ],
[ INITDDIR="$withval" ],
[ INITDDIR="$sysconfdir/init.d" ])
AC_ARG_WITH([systemddir],
[ --with-systemddir=DIR : path to systemd unit files directory. ],
[ SYSTEMDDIR="$withval" ],
[ SYSTEMDDIR="/lib/systemd/system" ])
AC_ARG_WITH([upstartdir],
[ --with-upstartdir=DIR : path to upstart config files directory. ],
[ UPSTARTDIR="$withval" ],
[ UPSTARTDIR="$sysconfdir/init" ])
AC_ARG_WITH([initwrappersdir],
[ --with-initwrappersdir=DIR : path to init wrappers files directory. ],
[ INITWRAPPERSDIR="$withval" ],
[ INITWRAPPERSDIR="$datarootdir/corosync" ])
AC_ARG_WITH([logdir],
[ --with-logdir=DIR : the base directory for corosync logging files. ],
[ LOGDIR="$withval" ],
[ LOGDIR="$localstatedir/log/cluster" ])
AC_ARG_WITH([logrotatedir],
[ --with-logrotatedir=DIR : the base directory for logrorate.d files. ],
[ LOGROTATEDIR="$withval" ],
[ LOGROTATEDIR="$sysconfdir/logrotate.d" ])
AC_ARG_WITH([tmpfilesdir],
[ --with-tmpfilesdir=DIR : path to tmpfiles.d configuration files directory. ],
[ TMPFILESDIR="$withval" ],
[ TMPFILESDIR="/lib/tmpfiles.d" ])
AC_ARG_ENABLE([snmp],
[ --enable-snmp : SNMP protocol support ],
[ default="no" ])
AC_ARG_ENABLE([xmlconf],
[ --enable-xmlconf : XML configuration support ],,
[ enable_xmlconf="no" ])
AM_CONDITIONAL(INSTALL_XMLCONF, test x$enable_xmlconf = xyes)
AC_ARG_ENABLE([qdevices],
[ --enable-qdevices : Quorum devices support ],,
[ enable_qdevices="no" ])
AM_CONDITIONAL(BUILD_QDEVICES, test x$enable_qdevices = xyes)
AC_ARG_ENABLE([qnetd],
[ --enable-qnetd : Quorum Net Daemon support ],,
[ enable_qnetd="no" ])
AM_CONDITIONAL(BUILD_QNETD, test x$enable_qnetd = xyes)
# *FLAGS handling goes here
ENV_CFLAGS="$CFLAGS"
ENV_CPPFLAGS="$CPPFLAGS"
ENV_LDFLAGS="$LDFLAGS"
# debug build stuff
if test "x${enable_debug}" = xyes; then
AC_DEFINE_UNQUOTED([DEBUG], [1], [Compiling Debugging code])
OPT_CFLAGS="-O0"
PACKAGE_FEATURES="$PACKAGE_FEATURES debug"
else
OPT_CFLAGS="-O3"
fi
# gdb flags
if test "x${GCC}" = xyes; then
GDB_FLAGS="-ggdb3"
else
GDB_FLAGS="-g"
fi
# Look for dbus-1
if test "x${enable_dbus}" = xyes; then
PKG_CHECK_MODULES([DBUS],[dbus-1])
AC_DEFINE_UNQUOTED([HAVE_DBUS], 1, [have dbus])
PACKAGE_FEATURES="$PACKAGE_FEATURES dbus"
WITH_LIST="$WITH_LIST --with dbus"
fi
if test "x${enable_testagents}" = xyes; then
AC_DEFINE_UNQUOTED([HAVE_TESTAGENTS], 1, [have testagents])
PACKAGE_FEATURES="$PACKAGE_FEATURES testagents"
WITH_LIST="$WITH_LIST --with testagents"
fi
-if test "x${enable_rdma}" = xyes; then
- PKG_CHECK_MODULES([rdmacm],[rdmacm])
- PKG_CHECK_MODULES([ibverbs],[ibverbs])
- AC_DEFINE_UNQUOTED([HAVE_RDMA], 1, [have rdmacm])
- PACKAGE_FEATURES="$PACKAGE_FEATURES rdma"
- WITH_LIST="$WITH_LIST --with rdma"
-fi
-
if test "x${enable_monitoring}" = xyes; then
PKG_CHECK_MODULES([statgrab], [libstatgrab])
PKG_CHECK_MODULES([statgrabge090], [libstatgrab >= 0.90],
AC_DEFINE_UNQUOTED([HAVE_LIBSTATGRAB_GE_090], 1, [have libstatgrab >= 0.90]),
TMP_VARIABLE=1)
AC_DEFINE_UNQUOTED([HAVE_MONITORING], 1, [have resource monitoring])
PACKAGE_FEATURES="$PACKAGE_FEATURES monitoring"
WITH_LIST="$WITH_LIST --with monitoring"
fi
if test "x${enable_watchdog}" = xyes; then
AC_CHECK_HEADER([linux/watchdog.h], [], [AC_MSG_ERROR([watchdog requires linux/watchdog.h])])
AC_CHECK_HEADER([linux/reboot.h], [], [AC_MSG_ERROR([watchdog requires linux/reboot.h])])
AC_DEFINE_UNQUOTED([HAVE_WATCHDOG], 1, [have watchdog])
PACKAGE_FEATURES="$PACKAGE_FEATURES watchdog"
WITH_LIST="$WITH_LIST --with watchdog"
fi
if test "x${enable_augeas}" = xyes; then
PACKAGE_FEATURES="$PACKAGE_FEATURES augeas"
fi
if test "x${enable_systemd}" = xyes; then
PACKAGE_FEATURES="$PACKAGE_FEATURES systemd"
WITH_LIST="$WITH_LIST --with systemd"
fi
if test "x${enable_upstart}" = xyes; then
PACKAGE_FEATURES="$PACKAGE_FEATURES upstart"
WITH_LIST="$WITH_LIST --with upstart"
fi
if test "x${enable_xmlconf}" = xyes; then
PACKAGE_FEATURES="$PACKAGE_FEATURES xmlconf"
WITH_LIST="$WITH_LIST --with xmlconf"
fi
if test "x${enable_qdevices}" = xyes; then
PACKAGE_FEATURES="$PACKAGE_FEATURES qdevices"
fi
if test "x${enable_qnetd}" = xyes; then
PACKAGE_FEATURES="$PACKAGE_FEATURES qnetd"
fi
do_snmp=0
if test "x${enable_snmp}" = xyes; then
AC_PATH_PROGS([SNMPCONFIG], [net-snmp-config])
if test "x${SNMPCONFIG}" != "x"; then
AC_MSG_CHECKING([for snmp includes])
SNMP_PREFIX=`$SNMPCONFIG --prefix`
SNMP_INCLUDES="-I$SNMP_PREFIX/include"
AC_MSG_RESULT([$SNMP_INCLUDES])
AC_MSG_CHECKING([for snmp libraries])
SNMP_LIBS=`$SNMPCONFIG --libs`
AC_MSG_RESULT([$SNMP_LIBS])
AC_SUBST([SNMP_LIBS])
saveCFLAGS="$CFLAGS"
CFLAGS="$CFLAGS $SNMP_INCLUDES"
AC_CHECK_HEADERS([net-snmp/net-snmp-config.h])
CFLAGS="$saveCFLAGS"
if test "x${ac_cv_header_net_snmp_net_snmp_config_h}" != "xyes"; then
AC_MSG_ERROR([Unable to use net-snmp/net-snmp-config.h])
fi
savedLibs=$LIBS
LIBS="$LIBS $SNMP_LIBS"
AC_CHECK_FUNCS([netsnmp_transport_open_client])
if test $ac_cv_func_netsnmp_transport_open_client != yes; then
AC_CHECK_FUNCS([netsnmp_tdomain_transport])
if test $ac_cv_func_netsnmp_tdomain_transport != yes; then
AC_MSG_ERROR([No usable SNMP client transport implementation found])
fi
else
AC_DEFINE_UNQUOTED([NETSNMPV54], $NETSNMP_NEW_SUPPORT, [have net-snmp5.4 over])
fi
LIBS=$savedLibs
do_snmp=1
PACKAGE_FEATURES="$PACKAGE_FEATURES snmp"
WITH_LIST="$WITH_LIST --with snmp"
AC_DEFINE_UNQUOTED([ENABLE_SNMP], $do_snmp, [Build in support for sending SNMP traps])
else
AC_MSG_ERROR([You need the net_snmp development package to continue.])
fi
fi
AM_CONDITIONAL(BUILD_SNMP, test "${do_snmp}" = "1")
# extra warnings
EXTRA_WARNINGS=""
WARNLIST="
all
shadow
missing-prototypes
missing-declarations
strict-prototypes
declaration-after-statement
pointer-arith
write-strings
cast-align
bad-function-cast
missing-format-attribute
format=2
format-security
format-nonliteral
no-long-long
unsigned-char
gnu89-inline
no-strict-aliasing
"
for j in $WARNLIST; do
if cc_supports_flag -W$j; then
EXTRA_WARNINGS="$EXTRA_WARNINGS -W$j";
fi
done
if test "x${enable_coverage}" = xyes && \
cc_supports_flag -ftest-coverage && \
cc_supports_flag -fprofile-arcs ; then
AC_MSG_NOTICE([Enabling Coverage (enable -O0 by default)])
OPT_CFLAGS="-O0"
COVERAGE_CFLAGS="-ftest-coverage -fprofile-arcs"
COVERAGE_LDFLAGS="-ftest-coverage -fprofile-arcs"
PACKAGE_FEATURES="$PACKAGE_FEATURES coverage"
else
COVERAGE_CFLAGS=""
COVERAGE_LDFLAGS=""
fi
if test "x${enable_small_memory_footprint}" = xyes ; then
AC_DEFINE_UNQUOTED([HAVE_SMALL_MEMORY_FOOTPRINT], 1, [have small_memory_footprint])
PACKAGE_FEATURES="$PACKAGE_FEATURES small-memory-footprint"
fi
if test "x${enable_ansi}" = xyes && \
cc_supports_flag -std=iso9899:199409 ; then
AC_MSG_NOTICE([Enabling ANSI Compatibility])
ANSI_CPPFLAGS="-ansi -DANSI_ONLY"
PACKAGE_FEATURES="$PACKAGE_FEATURES ansi"
else
ANSI_CPPFLAGS=""
fi
if test "x${enable_fatal_warnings}" = xyes && \
cc_supports_flag -Werror ; then
AC_MSG_NOTICE([Enabling Fatal Warnings (-Werror)])
WERROR_CFLAGS="-Werror"
PACKAGE_FEATURES="$PACKAGE_FEATURES fatal-warnings"
else
WERROR_CFLAGS=""
fi
# don't add addtional cflags
if test "x${enable_user_flags}" = xyes; then
OPT_CFLAGS=""
GDB_FLAGS=""
EXTRA_WARNINGS=""
fi
if test "x${enable_secure_build}" = xyes; then
# stolen from apache configure snippet
AC_CACHE_CHECK([whether $CC accepts PIE flags], [ap_cv_cc_pie], [
save_CFLAGS=$CFLAGS
save_LDFLAGS=$LDFLAGS
CFLAGS="$CFLAGS -fPIE"
LDFLAGS="$LDFLAGS -pie"
AC_TRY_RUN([static int foo[30000]; int main () { return 0; }],
[ap_cv_cc_pie=yes], [ap_cv_cc_pie=no], [ap_cv_cc_pie=yes])
CFLAGS=$save_CFLAGS
LDFLAGS=$save_LDFLAGS
])
if test "$ap_cv_cc_pie" = "yes"; then
SEC_FLAGS="$SEC_FLAGS -fPIE"
SEC_LDFLAGS="$SEC_LDFLAGS -pie"
PACKAGE_FEATURES="$PACKAGE_FEATURES pie"
fi
# similar to above
AC_CACHE_CHECK([whether $CC accepts RELRO flags], [ap_cv_cc_relro], [
save_LDFLAGS=$LDFLAGS
LDFLAGS="$LDFLAGS -Wl,-z,relro"
AC_TRY_RUN([static int foo[30000]; int main () { return 0; }],
[ap_cv_cc_relro=yes], [ap_cv_cc_relro=no], [ap_cv_cc_relro=yes])
LDFLAGS=$save_LDFLAGS
])
if test "$ap_cv_cc_relro" = "yes"; then
SEC_LDFLAGS="$SEC_LDFLAGS -Wl,-z,relro"
PACKAGE_FEATURES="$PACKAGE_FEATURES relro"
fi
AC_CACHE_CHECK([whether $CC accepts BINDNOW flags], [ap_cv_cc_bindnow], [
save_LDFLAGS=$LDFLAGS
LDFLAGS="$LDFLAGS -Wl,-z,now"
AC_TRY_RUN([static int foo[30000]; int main () { return 0; }],
[ap_cv_cc_bindnow=yes], [ap_cv_cc_bindnow=no], [ap_cv_cc_bindnow=yes])
LDFLAGS=$save_LDFLAGS
])
if test "$ap_cv_cc_bindnow" = "yes"; then
SEC_LDFLAGS="$SEC_LDFLAGS -Wl,-z,now"
PACKAGE_FEATURES="$PACKAGE_FEATURES bindnow"
fi
fi
AC_CACHE_CHECK([whether $CC accepts "--as-needed"], [ap_cv_cc_as_needed], [
save_LDFLAGS=$LDFLAGS
LDFLAGS="$LDFLAGS -Wl,--as-needed"
AC_TRY_RUN([static int foo[30000]; int main () { return 0; }],
[ap_cv_cc_as_needed=yes], [ap_cv_cc_as_needed=no], [ap_cv_cc_as_needed=yes])
LDFLAGS=$save_LDFLAGS
])
AC_CACHE_CHECK([whether $CC accepts "--version-script"], [ap_cv_cc_version_script], [
save_LDFLAGS=$LDFLAGS
LDFLAGS="$LDFLAGS -Wl,--version-script=conftest.versions"
echo "CONFTEST { };" >conftest.versions
AC_TRY_RUN([static int foo[30000]; int main () { return 0; }],
[ap_cv_cc_version_script=yes], [ap_cv_cc_version_script=no], [ap_cv_cc_version_script=yes])
rm -f conftest.versions
LDFLAGS=$save_LDFLAGS
])
if test "$ap_cv_cc_version_script" = "yes"; then
AC_SUBST(VERSCRIPT_LDFLAGS, ["-Wl,--version-script=\$(srcdir)/lib\$(call get_libname,\$<).versions"])
else
AC_SUBST(VERSCRIPT_LDFLAGS, [""])
fi
# define global include dirs
INCLUDE_DIRS="$INCLUDE_DIRS -I\$(top_builddir)/include -I\$(top_srcdir)/include"
INCLUDE_DIRS="$INCLUDE_DIRS -I\$(top_builddir)/include/corosync -I\$(top_srcdir)/include/corosync"
# final build of *FLAGS
CFLAGS="$ENV_CFLAGS $lt_prog_compiler_pic $SEC_FLAGS $OPT_CFLAGS $GDB_FLAGS \
$COVERAGE_CFLAGS $EXTRA_WARNINGS \
$WERROR_CFLAGS $NSS_CFLAGS $LIBQB_CFLAGS \
$SNMP_INCLUDES"
CPPFLAGS="$ENV_CPPFLAGS $ANSI_CPPFLAGS $INCLUDE_DIRS"
LDFLAGS="$ENV_LDFLAGS $lt_prog_compiler_pic $SEC_LDFLAGS $COVERAGE_LDFLAGS"
if test "$ap_cv_cc_as_needed" = "yes"; then
LDFLAGS="$LDFLAGS -Wl,--as-needed"
fi
# substitute what we need:
AC_SUBST([BASHPATH])
AC_SUBST([INITDDIR])
AC_SUBST([SYSTEMDDIR])
AC_SUBST([UPSTARTDIR])
INITWRAPPERSDIR=$(eval echo ${INITWRAPPERSDIR})
AC_SUBST([INITWRAPPERSDIR])
AC_SUBST([LOGDIR])
AC_SUBST([LOGROTATEDIR])
AC_SUBST([TMPFILESDIR])
AC_SUBST([SOMAJOR])
AC_SUBST([SOMINOR])
AC_SUBST([SOMICRO])
AC_SUBST([SONAME])
AM_CONDITIONAL(INSTALL_TESTAGENTS, test "${enable_testagents}" = "yes")
AM_CONDITIONAL(INSTALL_MIB, test "${do_snmp}" = "1")
AM_CONDITIONAL(INSTALL_DBUSCONF, test "${enable_dbus}" = "yes")
AM_CONDITIONAL(AUGTOOL, test -n "${AUGTOOL}")
AC_SUBST([NSS_LDFLAGS])
AM_CONDITIONAL(BUILD_HTML_DOCS, test -n "${GROFF}")
AC_SUBST([LINT_FLAGS])
AC_DEFINE_UNQUOTED([LOCALSTATEDIR], "$(eval echo ${localstatedir})", [localstate directory])
COROSYSCONFDIR=${sysconfdir}/corosync
AC_SUBST([COROSYSCONFDIR])
AC_DEFINE_UNQUOTED([COROSYSCONFDIR], "$(eval echo ${COROSYSCONFDIR})", [corosync config directory])
AC_DEFINE_UNQUOTED([PACKAGE_FEATURES], "${PACKAGE_FEATURES}", [corosync built-in features])
AC_OUTPUT
AC_MSG_RESULT([])
AC_MSG_RESULT([$PACKAGE configuration:])
AC_MSG_RESULT([ Version = ${VERSION}])
AC_MSG_RESULT([ Prefix = ${prefix}])
AC_MSG_RESULT([ Executables = ${sbindir}])
AC_MSG_RESULT([ Man pages = ${mandir}])
AC_MSG_RESULT([ Doc dir = ${docdir}])
AC_MSG_RESULT([ Libraries = ${libdir}])
AC_MSG_RESULT([ Header files = ${includedir}])
AC_MSG_RESULT([ Arch-independent files = ${datadir}])
AC_MSG_RESULT([ State information = ${localstatedir}])
AC_MSG_RESULT([ System configuration = ${sysconfdir}])
AC_MSG_RESULT([ System init.d directory = ${INITDDIR}])
AC_MSG_RESULT([ System systemd directory = ${SYSTEMDDIR}])
AC_MSG_RESULT([ System upstart directory = ${UPSTARTDIR}])
AC_MSG_RESULT([ System init wraps dir = ${INITWRAPPERSDIR}])
AC_MSG_RESULT([ System tmpfiles.d = ${TMPFILESDIR}])
AC_MSG_RESULT([ Log directory = ${LOGDIR}])
AC_MSG_RESULT([ Log rotate directory = ${LOGROTATEDIR}])
AC_MSG_RESULT([ corosync config dir = ${COROSYSCONFDIR}])
AC_MSG_RESULT([ Features =${PACKAGE_FEATURES}])
AC_MSG_RESULT([])
AC_MSG_RESULT([$PACKAGE build info:])
AC_MSG_RESULT([ Library SONAME = ${SONAME}])
LIB_MSG_RESULT(m4_shift(local_soname_list))dnl
AC_MSG_RESULT([ Default optimization = ${OPT_CFLAGS}])
AC_MSG_RESULT([ Default debug options = ${GDB_CFLAGS}])
AC_MSG_RESULT([ Extra compiler warnings = ${EXTRA_WARNING}])
AC_MSG_RESULT([ Env. defined CFLAG = ${ENV_CFLAGS}])
AC_MSG_RESULT([ Env. defined CPPFLAGS = ${ENV_CPPFLAGS}])
AC_MSG_RESULT([ Env. defined LDFLAGS = ${ENV_LDFLAGS}])
AC_MSG_RESULT([ ANSI defined CPPFLAGS = ${ANSI_CPPFLAGS}])
AC_MSG_RESULT([ Coverage CFLAGS = ${COVERAGE_CFLAGS}])
AC_MSG_RESULT([ Coverage LDFLAGS = ${COVERAGE_LDFLAGS}])
AC_MSG_RESULT([ Fatal War. CFLAGS = ${WERROR_CFLAGS}])
AC_MSG_RESULT([ Final CFLAGS = ${CFLAGS}])
AC_MSG_RESULT([ Final CPPFLAGS = ${CPPFLAGS}])
AC_MSG_RESULT([ Final LDFLAGS = ${LDFLAGS}])
diff --git a/exec/Makefile.am b/exec/Makefile.am
index 9ca77202..27a6d600 100644
--- a/exec/Makefile.am
+++ b/exec/Makefile.am
@@ -1,82 +1,79 @@
# Copyright (c) 2009 Red Hat, Inc.
#
# Authors: Andrew Beekhof
# Steven Dake (sdake@redhat.com)
#
# This software licensed under BSD license, the text of which follows:
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# - Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# - Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# - Neither the name of the MontaVista Software, Inc. nor the names of its
# contributors may be used to endorse or promote products derived from this
# software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
# THE POSSIBILITY OF SUCH DAMAGE.
MAINTAINERCLEANFILES = Makefile.in
noinst_HEADERS = apidef.h cs_queue.h logconfig.h main.h \
quorum.h service.h timer.h totemconfig.h \
- totemmrp.h totemnet.h totemudp.h totemiba.h \
- totemrrp.h totemudpu.h totemsrp.h util.h vsf.h \
+ totemnet.h totemudp.h \
+ totemudpu.h totemsrp.h util.h vsf.h \
schedwrk.h sync.h fsm.h votequorum.h vsf_ykd.h \
- totemcrypto.h
+ totemknet.h
TOTEM_SRC = totemip.c totemnet.c totemudp.c \
- totemudpu.c totemrrp.c totemsrp.c totemmrp.c \
- totempg.c totemcrypto.c
+ totemudpu.c totemsrp.c \
+ totempg.c totemknet.c
-if BUILD_RDMA
-TOTEM_SRC += totemiba.c
-endif
lib_LTLIBRARIES = libtotem_pg.la
libtotem_pg_la_SOURCES = $(TOTEM_SRC)
libtotem_pg_la_CFLAGS = $(nss_CFLAGS) $(rdmacm_CFLAGS) $(ibverbs_CFLAGS)
libtotem_pg_la_LDFLAGS = -version-number $(subst .,:,$(SONAME))
libtotem_pg_la_LIBADD = -lpthread $(LIBQB_LIBS) $(nss_LIBS) \
- $(rdmacm_LIBS) $(ibverbs_LIBS)
+ $(rdmacm_LIBS) $(ibverbs_LIBS) -lknet
sbin_PROGRAMS = corosync
corosync_SOURCES = vsf_ykd.c coroparse.c vsf_quorum.c sync.c \
logsys.c cfg.c cmap.c cpg.c pload.c \
votequorum.c util.c schedwrk.c main.c \
apidef.c quorum.c icmap.c timer.c \
ipc_glue.c service.c logconfig.c totemconfig.c
if BUILD_MONITORING
corosync_SOURCES += mon.c
endif
if BUILD_WATCHDOG
corosync_SOURCES += wd.c
endif
corosync_CPPFLAGS = -DLOGCONFIG_USE_ICMAP=1
corosync_CFLAGS = $(statgrab_CFLAGS)
corosync_LDADD = libtotem_pg.la ../common_lib/libcorosync_common.la \
$(LIBQB_LIBS) $(statgrab_LIBS)
corosync_DEPENDENCIES = libtotem_pg.la ../common_lib/libcorosync_common.la
lint:
-splint $(LINT_FLAGS) $(CPPFLAGS) $(CFLAGS) *.c
diff --git a/exec/coroparse.c b/exec/coroparse.c
index 8296681f..16e2d148 100644
--- a/exec/coroparse.c
+++ b/exec/coroparse.c
@@ -1,1340 +1,1400 @@
/*
* Copyright (c) 2006-2013 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Patrick Caulfield (pcaulfie@redhat.com)
* Jan Friesse (jfriesse@redhat.com)
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <config.h>
#include <sys/types.h>
#include <sys/uio.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/un.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <unistd.h>
#include <fcntl.h>
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
#include <string.h>
#include <dirent.h>
#include <limits.h>
#include <stddef.h>
#include <grp.h>
#include <pwd.h>
#include <corosync/list.h>
#include <qb/qbutil.h>
#define LOGSYS_UTILS_ONLY 1
#include <corosync/logsys.h>
#include <corosync/icmap.h>
#include "main.h"
#include "util.h"
enum parser_cb_type {
PARSER_CB_START,
PARSER_CB_END,
PARSER_CB_SECTION_START,
PARSER_CB_SECTION_END,
PARSER_CB_ITEM,
};
enum main_cp_cb_data_state {
MAIN_CP_CB_DATA_STATE_NORMAL,
MAIN_CP_CB_DATA_STATE_TOTEM,
MAIN_CP_CB_DATA_STATE_INTERFACE,
MAIN_CP_CB_DATA_STATE_LOGGER_SUBSYS,
MAIN_CP_CB_DATA_STATE_UIDGID,
MAIN_CP_CB_DATA_STATE_LOGGING_DAEMON,
MAIN_CP_CB_DATA_STATE_MEMBER,
MAIN_CP_CB_DATA_STATE_QUORUM,
MAIN_CP_CB_DATA_STATE_QDEVICE,
MAIN_CP_CB_DATA_STATE_NODELIST,
MAIN_CP_CB_DATA_STATE_NODELIST_NODE,
MAIN_CP_CB_DATA_STATE_PLOAD,
MAIN_CP_CB_DATA_STATE_QB,
MAIN_CP_CB_DATA_STATE_RESOURCES,
MAIN_CP_CB_DATA_STATE_RESOURCES_SYSTEM,
MAIN_CP_CB_DATA_STATE_RESOURCES_PROCESS,
MAIN_CP_CB_DATA_STATE_RESOURCES_SYSTEM_MEMUSED,
MAIN_CP_CB_DATA_STATE_RESOURCES_PROCESS_MEMUSED
};
typedef int (*parser_cb_f)(const char *path,
char *key,
char *value,
enum main_cp_cb_data_state *state,
enum parser_cb_type type,
const char **error_string,
icmap_map_t config_map,
void *user_data);
struct key_value_list_item {
char *key;
char *value;
struct list_head list;
};
struct main_cp_cb_data {
- int ringnumber;
+ int linknumber;
char *bindnetaddr;
char *mcastaddr;
char *broadcast;
int mcastport;
int ttl;
+ int knet_link_priority;
+ int knet_ping_interval;
+ int knet_ping_timeout;
+ int knet_ping_precision;
struct list_head logger_subsys_items_head;
char *subsys;
char *logging_daemon_name;
struct list_head member_items_head;
int node_number;
int ring0_addr_added;
};
static int read_config_file_into_icmap(
const char **error_string, icmap_map_t config_map);
static char error_string_response[512];
static int uid_determine (const char *req_user)
{
int pw_uid = 0;
struct passwd passwd;
struct passwd* pwdptr = &passwd;
struct passwd* temp_pwd_pt;
char *pwdbuffer;
int pwdlinelen, rc;
long int id;
char *ep;
id = strtol(req_user, &ep, 10);
if (*ep == '\0' && id >= 0 && id <= UINT_MAX) {
return (id);
}
pwdlinelen = sysconf (_SC_GETPW_R_SIZE_MAX);
if (pwdlinelen == -1) {
pwdlinelen = 256;
}
pwdbuffer = malloc (pwdlinelen);
while ((rc = getpwnam_r (req_user, pwdptr, pwdbuffer, pwdlinelen, &temp_pwd_pt)) == ERANGE) {
char *n;
pwdlinelen *= 2;
if (pwdlinelen <= 32678) {
n = realloc (pwdbuffer, pwdlinelen);
if (n != NULL) {
pwdbuffer = n;
continue;
}
}
}
if (rc != 0) {
free (pwdbuffer);
sprintf (error_string_response, "getpwnam_r(): %s", strerror(rc));
return (-1);
}
if (temp_pwd_pt == NULL) {
free (pwdbuffer);
sprintf (error_string_response,
"The '%s' user is not found in /etc/passwd, please read the documentation.",
req_user);
return (-1);
}
pw_uid = passwd.pw_uid;
free (pwdbuffer);
return pw_uid;
}
static int gid_determine (const char *req_group)
{
int corosync_gid = 0;
struct group group;
struct group * grpptr = &group;
struct group * temp_grp_pt;
char *grpbuffer;
int grplinelen, rc;
long int id;
char *ep;
id = strtol(req_group, &ep, 10);
if (*ep == '\0' && id >= 0 && id <= UINT_MAX) {
return (id);
}
grplinelen = sysconf (_SC_GETGR_R_SIZE_MAX);
if (grplinelen == -1) {
grplinelen = 256;
}
grpbuffer = malloc (grplinelen);
while ((rc = getgrnam_r (req_group, grpptr, grpbuffer, grplinelen, &temp_grp_pt)) == ERANGE) {
char *n;
grplinelen *= 2;
if (grplinelen <= 32678) {
n = realloc (grpbuffer, grplinelen);
if (n != NULL) {
grpbuffer = n;
continue;
}
}
}
if (rc != 0) {
free (grpbuffer);
sprintf (error_string_response, "getgrnam_r(): %s", strerror(rc));
return (-1);
}
if (temp_grp_pt == NULL) {
free (grpbuffer);
sprintf (error_string_response,
"The '%s' group is not found in /etc/group, please read the documentation.",
req_group);
return (-1);
}
corosync_gid = group.gr_gid;
free (grpbuffer);
return corosync_gid;
}
static char *strchr_rs (const char *haystack, int byte)
{
const char *end_address = strchr (haystack, byte);
if (end_address) {
end_address += 1; /* skip past { or = */
while (*end_address == ' ' || *end_address == '\t')
end_address++;
}
return ((char *) end_address);
}
int coroparse_configparse (icmap_map_t config_map, const char **error_string)
{
if (read_config_file_into_icmap(error_string, config_map)) {
return -1;
}
return 0;
}
static char *remove_whitespace(char *string, int remove_colon_and_brace)
{
char *start;
char *end;
start = string;
while (*start == ' ' || *start == '\t')
start++;
end = start+(strlen(start))-1;
while ((*end == ' ' || *end == '\t' || (remove_colon_and_brace && (*end == ':' || *end == '{'))) && end > start)
end--;
if (end != start)
*(end+1) = '\0';
return start;
}
static int parse_section(FILE *fp,
char *path,
const char **error_string,
int depth,
enum main_cp_cb_data_state state,
parser_cb_f parser_cb,
icmap_map_t config_map,
void *user_data)
{
char line[512];
int i;
char *loc;
int ignore_line;
char new_keyname[ICMAP_KEYNAME_MAXLEN];
if (strcmp(path, "") == 0) {
parser_cb("", NULL, NULL, &state, PARSER_CB_START, error_string, config_map, user_data);
}
while (fgets (line, sizeof (line), fp)) {
if (strlen(line) > 0) {
if (line[strlen(line) - 1] == '\n')
line[strlen(line) - 1] = '\0';
if (strlen (line) > 0 && line[strlen(line) - 1] == '\r')
line[strlen(line) - 1] = '\0';
}
/*
* Clear out white space and tabs
*/
for (i = strlen (line) - 1; i > -1; i--) {
if (line[i] == '\t' || line[i] == ' ') {
line[i] = '\0';
} else {
break;
}
}
ignore_line = 1;
for (i = 0; i < strlen (line); i++) {
if (line[i] != '\t' && line[i] != ' ') {
if (line[i] != '#')
ignore_line = 0;
break;
}
}
/*
* Clear out comments and empty lines
*/
if (ignore_line) {
continue;
}
/* New section ? */
if ((loc = strchr_rs (line, '{'))) {
char *section = remove_whitespace(line, 1);
enum main_cp_cb_data_state newstate;
loc--;
*loc = '\0';
if (strlen(path) + strlen(section) + 1 >= ICMAP_KEYNAME_MAXLEN) {
*error_string = "parser error: Start of section makes total cmap path too long";
return -1;
}
strcpy(new_keyname, path);
if (strcmp(path, "") != 0) {
strcat(new_keyname, ".");
}
strcat(new_keyname, section);
/* Only use the new state for items further down the stack */
newstate = state;
if (!parser_cb(new_keyname, NULL, NULL, &newstate, PARSER_CB_SECTION_START, error_string, config_map, user_data)) {
return -1;
}
if (parse_section(fp, new_keyname, error_string, depth + 1, newstate, parser_cb, config_map, user_data))
return -1;
continue ;
}
/* New key/value */
if ((loc = strchr_rs (line, ':'))) {
char *key;
char *value;
*(loc-1) = '\0';
key = remove_whitespace(line, 1);
value = remove_whitespace(loc, 0);
if (strlen(path) + strlen(key) + 1 >= ICMAP_KEYNAME_MAXLEN) {
*error_string = "parser error: New key makes total cmap path too long";
return -1;
}
strcpy(new_keyname, path);
if (strcmp(path, "") != 0) {
strcat(new_keyname, ".");
}
strcat(new_keyname, key);
if (!parser_cb(new_keyname, key, value, &state, PARSER_CB_ITEM, error_string, config_map, user_data)) {
return -1;
}
continue ;
}
if (strchr_rs (line, '}')) {
if (depth == 0) {
*error_string = "parser error: Unexpected closing brace";
return -1;
}
if (!parser_cb(path, NULL, NULL, &state, PARSER_CB_SECTION_END, error_string, config_map, user_data)) {
return -1;
}
return 0;
}
}
if (strcmp(path, "") != 0) {
*error_string = "parser error: Missing closing brace";
return -1;
}
if (strcmp(path, "") == 0) {
parser_cb("", NULL, NULL, &state, PARSER_CB_END, error_string, config_map, user_data);
}
return 0;
}
static int safe_atoq_range(icmap_value_types_t value_type, long long int *min_val, long long int *max_val)
{
switch (value_type) {
case ICMAP_VALUETYPE_INT8: *min_val = INT8_MIN; *max_val = INT8_MAX; break;
case ICMAP_VALUETYPE_UINT8: *min_val = 0; *max_val = UINT8_MAX; break;
case ICMAP_VALUETYPE_INT16: *min_val = INT16_MIN; *max_val = INT16_MAX; break;
case ICMAP_VALUETYPE_UINT16: *min_val = 0; *max_val = UINT16_MAX; break;
case ICMAP_VALUETYPE_INT32: *min_val = INT32_MIN; *max_val = INT32_MAX; break;
case ICMAP_VALUETYPE_UINT32: *min_val = 0; *max_val = UINT32_MAX; break;
default:
return (-1);
}
return (0);
}
/*
* Convert string str to long long int res. Type of result is target_type and currently only
* ICMAP_VALUETYPE_[U]INT[8|16|32] is supported.
* Return 0 on success, -1 on failure.
*/
static int safe_atoq(const char *str, long long int *res, icmap_value_types_t target_type)
{
long long int val;
long long int min_val, max_val;
char *endptr;
errno = 0;
val = strtoll(str, &endptr, 10);
if (errno == ERANGE) {
return (-1);
}
if (endptr == str) {
return (-1);
}
if (*endptr != '\0') {
return (-1);
}
if (safe_atoq_range(target_type, &min_val, &max_val) != 0) {
return (-1);
}
if (val < min_val || val > max_val) {
return (-1);
}
*res = val;
return (0);
}
static int str_to_ull(const char *str, unsigned long long int *res)
{
unsigned long long int val;
char *endptr;
errno = 0;
val = strtoull(str, &endptr, 10);
if (errno == ERANGE) {
return (-1);
}
if (endptr == str) {
return (-1);
}
if (*endptr != '\0') {
return (-1);
}
*res = val;
return (0);
}
static int main_config_parser_cb(const char *path,
char *key,
char *value,
enum main_cp_cb_data_state *state,
enum parser_cb_type type,
const char **error_string,
icmap_map_t config_map,
void *user_data)
{
int ii;
long long int val;
long long int min_val, max_val;
icmap_value_types_t val_type = ICMAP_VALUETYPE_BINARY;
unsigned long long int ull;
int add_as_string;
char key_name[ICMAP_KEYNAME_MAXLEN];
static char formated_err[256];
struct main_cp_cb_data *data = (struct main_cp_cb_data *)user_data;
struct key_value_list_item *kv_item;
struct list_head *iter, *iter_next;
int uid, gid;
switch (type) {
case PARSER_CB_START:
memset(data, 0, sizeof(struct main_cp_cb_data));
*state = MAIN_CP_CB_DATA_STATE_NORMAL;
break;
case PARSER_CB_END:
break;
case PARSER_CB_ITEM:
add_as_string = 1;
switch (*state) {
case MAIN_CP_CB_DATA_STATE_NORMAL:
break;
case MAIN_CP_CB_DATA_STATE_PLOAD:
if ((strcmp(path, "pload.count") == 0) ||
(strcmp(path, "pload.size") == 0)) {
val_type = ICMAP_VALUETYPE_UINT32;
if (safe_atoq(value, &val, val_type) != 0) {
goto atoi_error;
}
icmap_set_uint32_r(config_map, path, val);
add_as_string = 0;
}
break;
case MAIN_CP_CB_DATA_STATE_QUORUM:
if ((strcmp(path, "quorum.expected_votes") == 0) ||
(strcmp(path, "quorum.votes") == 0) ||
(strcmp(path, "quorum.last_man_standing_window") == 0) ||
(strcmp(path, "quorum.leaving_timeout") == 0)) {
val_type = ICMAP_VALUETYPE_UINT32;
if (safe_atoq(value, &val, val_type) != 0) {
goto atoi_error;
}
icmap_set_uint32_r(config_map, path, val);
add_as_string = 0;
}
if ((strcmp(path, "quorum.two_node") == 0) ||
(strcmp(path, "quorum.expected_votes_tracking") == 0) ||
(strcmp(path, "quorum.allow_downscale") == 0) ||
(strcmp(path, "quorum.wait_for_all") == 0) ||
(strcmp(path, "quorum.auto_tie_breaker") == 0) ||
(strcmp(path, "quorum.last_man_standing") == 0)) {
val_type = ICMAP_VALUETYPE_UINT8;
if (safe_atoq(value, &val, val_type) != 0) {
goto atoi_error;
}
icmap_set_uint8_r(config_map, path, val);
add_as_string = 0;
}
break;
case MAIN_CP_CB_DATA_STATE_QDEVICE:
if ((strcmp(path, "quorum.device.timeout") == 0) ||
(strcmp(path, "quorum.device.sync_timeout") == 0) ||
(strcmp(path, "quorum.device.votes") == 0)) {
val_type = ICMAP_VALUETYPE_UINT32;
if (safe_atoq(value, &val, val_type) != 0) {
goto atoi_error;
}
icmap_set_uint32_r(config_map, path, val);
add_as_string = 0;
}
if ((strcmp(path, "quorum.device.master_wins") == 0)) {
val_type = ICMAP_VALUETYPE_UINT8;
if (safe_atoq(value, &val, val_type) != 0) {
goto atoi_error;
}
icmap_set_uint8_r(config_map, path, val);
add_as_string = 0;
}
break;
case MAIN_CP_CB_DATA_STATE_TOTEM:
if ((strcmp(path, "totem.version") == 0) ||
(strcmp(path, "totem.nodeid") == 0) ||
(strcmp(path, "totem.threads") == 0) ||
(strcmp(path, "totem.token") == 0) ||
(strcmp(path, "totem.token_coefficient") == 0) ||
(strcmp(path, "totem.token_retransmit") == 0) ||
(strcmp(path, "totem.hold") == 0) ||
(strcmp(path, "totem.token_retransmits_before_loss_const") == 0) ||
(strcmp(path, "totem.join") == 0) ||
(strcmp(path, "totem.send_join") == 0) ||
(strcmp(path, "totem.consensus") == 0) ||
(strcmp(path, "totem.merge") == 0) ||
(strcmp(path, "totem.downcheck") == 0) ||
(strcmp(path, "totem.fail_recv_const") == 0) ||
(strcmp(path, "totem.seqno_unchanged_const") == 0) ||
(strcmp(path, "totem.rrp_token_expired_timeout") == 0) ||
(strcmp(path, "totem.rrp_problem_count_timeout") == 0) ||
(strcmp(path, "totem.rrp_problem_count_threshold") == 0) ||
(strcmp(path, "totem.rrp_problem_count_mcast_threshold") == 0) ||
(strcmp(path, "totem.rrp_autorecovery_check_timeout") == 0) ||
(strcmp(path, "totem.heartbeat_failures_allowed") == 0) ||
(strcmp(path, "totem.max_network_delay") == 0) ||
(strcmp(path, "totem.window_size") == 0) ||
(strcmp(path, "totem.max_messages") == 0) ||
(strcmp(path, "totem.miss_count_const") == 0) ||
(strcmp(path, "totem.netmtu") == 0)) {
val_type = ICMAP_VALUETYPE_UINT32;
if (safe_atoq(value, &val, val_type) != 0) {
goto atoi_error;
}
icmap_set_uint32_r(config_map,path, val);
add_as_string = 0;
}
if (strcmp(path, "totem.config_version") == 0) {
if (str_to_ull(value, &ull) != 0) {
goto atoi_error;
}
icmap_set_uint64_r(config_map, path, ull);
add_as_string = 0;
}
if (strcmp(path, "totem.ip_version") == 0) {
if ((strcmp(value, "ipv4") != 0) &&
(strcmp(value, "ipv6") != 0)) {
*error_string = "Invalid ip_version type";
return (0);
}
}
if (strcmp(path, "totem.crypto_type") == 0) {
if ((strcmp(value, "nss") != 0) &&
(strcmp(value, "aes256") != 0) &&
(strcmp(value, "aes192") != 0) &&
(strcmp(value, "aes128") != 0) &&
(strcmp(value, "3des") != 0)) {
*error_string = "Invalid crypto type";
return (0);
}
}
if (strcmp(path, "totem.crypto_cipher") == 0) {
if ((strcmp(value, "none") != 0) &&
(strcmp(value, "aes256") != 0) &&
(strcmp(value, "aes192") != 0) &&
(strcmp(value, "aes128") != 0) &&
(strcmp(value, "3des") != 0)) {
*error_string = "Invalid cipher type";
return (0);
}
}
if (strcmp(path, "totem.crypto_hash") == 0) {
if ((strcmp(value, "none") != 0) &&
(strcmp(value, "md5") != 0) &&
(strcmp(value, "sha1") != 0) &&
(strcmp(value, "sha256") != 0) &&
(strcmp(value, "sha384") != 0) &&
(strcmp(value, "sha512") != 0)) {
*error_string = "Invalid hash type";
return (0);
}
}
break;
case MAIN_CP_CB_DATA_STATE_QB:
if (strcmp(path, "qb.ipc_type") == 0) {
if ((strcmp(value, "native") != 0) &&
(strcmp(value, "shm") != 0) &&
(strcmp(value, "socket") != 0)) {
*error_string = "Invalid qb ipc_type";
return (0);
}
}
break;
case MAIN_CP_CB_DATA_STATE_INTERFACE:
- if (strcmp(path, "totem.interface.ringnumber") == 0) {
+ if (strcmp(path, "totem.interface.linknumber") == 0) {
val_type = ICMAP_VALUETYPE_UINT8;
if (safe_atoq(value, &val, val_type) != 0) {
goto atoi_error;
}
- data->ringnumber = val;
+ data->linknumber = val;
add_as_string = 0;
}
if (strcmp(path, "totem.interface.bindnetaddr") == 0) {
data->bindnetaddr = strdup(value);
add_as_string = 0;
}
if (strcmp(path, "totem.interface.mcastaddr") == 0) {
data->mcastaddr = strdup(value);
add_as_string = 0;
}
if (strcmp(path, "totem.interface.broadcast") == 0) {
data->broadcast = strdup(value);
add_as_string = 0;
}
if (strcmp(path, "totem.interface.mcastport") == 0) {
val_type = ICMAP_VALUETYPE_UINT16;
if (safe_atoq(value, &val, val_type) != 0) {
goto atoi_error;
}
data->mcastport = val;
add_as_string = 0;
}
if (strcmp(path, "totem.interface.ttl") == 0) {
val_type = ICMAP_VALUETYPE_UINT8;
if (safe_atoq(value, &val, val_type) != 0) {
goto atoi_error;
}
data->ttl = val;
add_as_string = 0;
}
+ if (strcmp(path, "totem.interface.knet_link_priority") == 0) {
+ val_type = ICMAP_VALUETYPE_UINT8;
+ if (safe_atoq(value, &val, val_type) != 0) {
+ goto atoi_error;
+ }
+ data->knet_link_priority = val;
+ add_as_string = 0;
+ }
+ if (strcmp(path, "totem.interface.knet_ping_interval") == 0) {
+ val_type = ICMAP_VALUETYPE_UINT32;
+ if (safe_atoq(value, &val, val_type) != 0) {
+ goto atoi_error;
+ }
+ data->knet_ping_interval = val;
+ add_as_string = 0;
+ }
+ if (strcmp(path, "totem.interface.knet_ping_timeout") == 0) {
+ val_type = ICMAP_VALUETYPE_UINT32;
+ if (safe_atoq(value, &val, val_type) != 0) {
+ goto atoi_error;
+ }
+ data->knet_ping_timeout = val;
+ add_as_string = 0;
+ }
+ if (strcmp(path, "totem.interface.knet_ping_precision") == 0) {
+ val_type = ICMAP_VALUETYPE_UINT32;
+ if (safe_atoq(value, &val, val_type) != 0) {
+ goto atoi_error;
+ }
+ data->knet_ping_precision = val;
+ add_as_string = 0;
+ }
break;
case MAIN_CP_CB_DATA_STATE_LOGGER_SUBSYS:
if (strcmp(key, "subsys") == 0) {
data->subsys = strdup(value);
if (data->subsys == NULL) {
*error_string = "Can't alloc memory";
return (0);
}
} else {
kv_item = malloc(sizeof(*kv_item));
if (kv_item == NULL) {
*error_string = "Can't alloc memory";
return (0);
}
memset(kv_item, 0, sizeof(*kv_item));
kv_item->key = strdup(key);
kv_item->value = strdup(value);
if (kv_item->key == NULL || kv_item->value == NULL) {
free(kv_item);
*error_string = "Can't alloc memory";
return (0);
}
list_init(&kv_item->list);
list_add(&kv_item->list, &data->logger_subsys_items_head);
}
add_as_string = 0;
break;
case MAIN_CP_CB_DATA_STATE_LOGGING_DAEMON:
if (strcmp(key, "subsys") == 0) {
data->subsys = strdup(value);
if (data->subsys == NULL) {
*error_string = "Can't alloc memory";
return (0);
}
} else if (strcmp(key, "name") == 0) {
data->logging_daemon_name = strdup(value);
if (data->logging_daemon_name == NULL) {
*error_string = "Can't alloc memory";
return (0);
}
} else {
kv_item = malloc(sizeof(*kv_item));
if (kv_item == NULL) {
*error_string = "Can't alloc memory";
return (0);
}
memset(kv_item, 0, sizeof(*kv_item));
kv_item->key = strdup(key);
kv_item->value = strdup(value);
if (kv_item->key == NULL || kv_item->value == NULL) {
free(kv_item);
*error_string = "Can't alloc memory";
return (0);
}
list_init(&kv_item->list);
list_add(&kv_item->list, &data->logger_subsys_items_head);
}
add_as_string = 0;
break;
case MAIN_CP_CB_DATA_STATE_UIDGID:
if (strcmp(key, "uid") == 0) {
uid = uid_determine(value);
if (uid == -1) {
*error_string = error_string_response;
return (0);
}
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "uidgid.config.uid.%u",
uid);
icmap_set_uint8_r(config_map, key_name, 1);
add_as_string = 0;
} else if (strcmp(key, "gid") == 0) {
gid = gid_determine(value);
if (gid == -1) {
*error_string = error_string_response;
return (0);
}
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "uidgid.config.gid.%u",
gid);
icmap_set_uint8_r(config_map, key_name, 1);
add_as_string = 0;
} else {
*error_string = "uidgid: Only uid and gid are allowed items";
return (0);
}
break;
case MAIN_CP_CB_DATA_STATE_MEMBER:
if (strcmp(key, "memberaddr") != 0) {
*error_string = "Only memberaddr is allowed in member section";
return (0);
}
kv_item = malloc(sizeof(*kv_item));
if (kv_item == NULL) {
*error_string = "Can't alloc memory";
return (0);
}
memset(kv_item, 0, sizeof(*kv_item));
kv_item->key = strdup(key);
kv_item->value = strdup(value);
if (kv_item->key == NULL || kv_item->value == NULL) {
free(kv_item);
*error_string = "Can't alloc memory";
return (0);
}
list_init(&kv_item->list);
list_add(&kv_item->list, &data->member_items_head);
add_as_string = 0;
break;
case MAIN_CP_CB_DATA_STATE_NODELIST:
break;
case MAIN_CP_CB_DATA_STATE_NODELIST_NODE:
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.%s", data->node_number, key);
if ((strcmp(key, "nodeid") == 0) ||
(strcmp(key, "quorum_votes") == 0)) {
val_type = ICMAP_VALUETYPE_UINT32;
if (safe_atoq(value, &val, val_type) != 0) {
goto atoi_error;
}
icmap_set_uint32_r(config_map, key_name, val);
add_as_string = 0;
}
if (strcmp(key, "ring0_addr") == 0) {
data->ring0_addr_added = 1;
}
if (add_as_string) {
icmap_set_string_r(config_map, key_name, value);
add_as_string = 0;
}
break;
case MAIN_CP_CB_DATA_STATE_RESOURCES:
if (strcmp(key, "watchdog_timeout") == 0) {
val_type = ICMAP_VALUETYPE_UINT32;
if (safe_atoq(value, &val, val_type) != 0) {
goto atoi_error;
}
icmap_set_uint32_r(config_map,path, val);
add_as_string = 0;
}
break;
case MAIN_CP_CB_DATA_STATE_RESOURCES_SYSTEM:
case MAIN_CP_CB_DATA_STATE_RESOURCES_SYSTEM_MEMUSED:
if (strcmp(key, "poll_period") == 0) {
if (str_to_ull(value, &ull) != 0) {
goto atoi_error;
}
icmap_set_uint64_r(config_map,path, ull);
add_as_string = 0;
}
break;
case MAIN_CP_CB_DATA_STATE_RESOURCES_PROCESS:
case MAIN_CP_CB_DATA_STATE_RESOURCES_PROCESS_MEMUSED:
if (strcmp(key, "poll_period") == 0) {
if (str_to_ull(value, &ull) != 0) {
goto atoi_error;
}
icmap_set_uint64_r(config_map,path, ull);
add_as_string = 0;
}
break;
}
if (add_as_string) {
icmap_set_string_r(config_map, path, value);
}
break;
case PARSER_CB_SECTION_START:
if (strcmp(path, "totem.interface") == 0) {
*state = MAIN_CP_CB_DATA_STATE_INTERFACE;
- data->ringnumber = 0;
+ data->linknumber = 0;
data->mcastport = -1;
data->ttl = -1;
+ data->knet_link_priority = -1;
+ data->knet_ping_interval = -1;
+ data->knet_ping_timeout = -1;
+ data->knet_ping_precision = -1;
list_init(&data->member_items_head);
};
if (strcmp(path, "totem") == 0) {
*state = MAIN_CP_CB_DATA_STATE_TOTEM;
};
if (strcmp(path, "qb") == 0) {
*state = MAIN_CP_CB_DATA_STATE_QB;
}
if (strcmp(path, "logging.logger_subsys") == 0) {
*state = MAIN_CP_CB_DATA_STATE_LOGGER_SUBSYS;
list_init(&data->logger_subsys_items_head);
data->subsys = NULL;
}
if (strcmp(path, "logging.logging_daemon") == 0) {
*state = MAIN_CP_CB_DATA_STATE_LOGGING_DAEMON;
list_init(&data->logger_subsys_items_head);
data->subsys = NULL;
data->logging_daemon_name = NULL;
}
if (strcmp(path, "uidgid") == 0) {
*state = MAIN_CP_CB_DATA_STATE_UIDGID;
}
if (strcmp(path, "totem.interface.member") == 0) {
*state = MAIN_CP_CB_DATA_STATE_MEMBER;
}
if (strcmp(path, "quorum") == 0) {
*state = MAIN_CP_CB_DATA_STATE_QUORUM;
}
if (strcmp(path, "quorum.device") == 0) {
*state = MAIN_CP_CB_DATA_STATE_QDEVICE;
}
if (strcmp(path, "nodelist") == 0) {
*state = MAIN_CP_CB_DATA_STATE_NODELIST;
data->node_number = 0;
}
if (strcmp(path, "nodelist.node") == 0) {
*state = MAIN_CP_CB_DATA_STATE_NODELIST_NODE;
data->ring0_addr_added = 0;
}
if (strcmp(path, "resources") == 0) {
*state = MAIN_CP_CB_DATA_STATE_RESOURCES;
}
if (strcmp(path, "resources.system") == 0) {
*state = MAIN_CP_CB_DATA_STATE_RESOURCES_SYSTEM;
}
if (strcmp(path, "resources.system.memory_used") == 0) {
*state = MAIN_CP_CB_DATA_STATE_RESOURCES_SYSTEM_MEMUSED;
}
if (strcmp(path, "resources.process") == 0) {
*state = MAIN_CP_CB_DATA_STATE_RESOURCES_PROCESS;
}
if (strcmp(path, "resources.process.memory_used") == 0) {
*state = MAIN_CP_CB_DATA_STATE_RESOURCES_PROCESS_MEMUSED;
}
break;
case PARSER_CB_SECTION_END:
switch (*state) {
case MAIN_CP_CB_DATA_STATE_INTERFACE:
/*
* Create new interface section
*/
if (data->bindnetaddr != NULL) {
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.bindnetaddr",
- data->ringnumber);
+ data->linknumber);
icmap_set_string_r(config_map, key_name, data->bindnetaddr);
free(data->bindnetaddr);
data->bindnetaddr = NULL;
}
if (data->mcastaddr != NULL) {
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.mcastaddr",
- data->ringnumber);
+ data->linknumber);
icmap_set_string_r(config_map, key_name, data->mcastaddr);
free(data->mcastaddr);
data->mcastaddr = NULL;
}
if (data->broadcast != NULL) {
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.broadcast",
- data->ringnumber);
+ data->linknumber);
icmap_set_string_r(config_map, key_name, data->broadcast);
free(data->broadcast);
data->broadcast = NULL;
}
if (data->mcastport > -1) {
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.mcastport",
- data->ringnumber);
+ data->linknumber);
icmap_set_uint16_r(config_map, key_name, data->mcastport);
}
if (data->ttl > -1) {
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.ttl",
- data->ringnumber);
+ data->linknumber);
icmap_set_uint8_r(config_map, key_name, data->ttl);
}
+ if (data->knet_link_priority > -1) {
+ snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.knet_link_priority",
+ data->linknumber);
+ icmap_set_uint8_r(config_map, key_name, data->knet_link_priority);
+ }
+ if (data->knet_ping_interval > -1) {
+ snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.knet_ping_interval",
+ data->linknumber);
+ icmap_set_uint32_r(config_map, key_name, data->knet_ping_interval);
+ }
+ if (data->knet_ping_timeout > -1) {
+ snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.knet_ping_timeout",
+ data->linknumber);
+ icmap_set_uint32_r(config_map, key_name, data->knet_ping_timeout);
+ }
+ if (data->knet_ping_precision > -1) {
+ snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.knet_ping_precision",
+ data->linknumber);
+ icmap_set_uint32_r(config_map, key_name, data->knet_ping_precision);
+ }
ii = 0;
for (iter = data->member_items_head.next;
iter != &data->member_items_head; iter = iter_next) {
kv_item = list_entry(iter, struct key_value_list_item, list);
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.member.%u",
- data->ringnumber, ii);
+ data->linknumber, ii);
icmap_set_string_r(config_map, key_name, kv_item->value);
iter_next = iter->next;
free(kv_item->value);
free(kv_item->key);
free(kv_item);
ii++;
}
break;
case MAIN_CP_CB_DATA_STATE_LOGGER_SUBSYS:
if (data->subsys == NULL) {
*error_string = "No subsys key in logger_subsys directive";
return (0);
}
for (iter = data->logger_subsys_items_head.next;
iter != &data->logger_subsys_items_head; iter = iter_next) {
kv_item = list_entry(iter, struct key_value_list_item, list);
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "logging.logger_subsys.%s.%s",
data->subsys, kv_item->key);
icmap_set_string_r(config_map, key_name, kv_item->value);
iter_next = iter->next;
free(kv_item->value);
free(kv_item->key);
free(kv_item);
}
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "logging.logger_subsys.%s.subsys",
data->subsys);
icmap_set_string_r(config_map, key_name, data->subsys);
free(data->subsys);
break;
case MAIN_CP_CB_DATA_STATE_LOGGING_DAEMON:
if (data->logging_daemon_name == NULL) {
*error_string = "No name key in logging_daemon directive";
return (0);
}
for (iter = data->logger_subsys_items_head.next;
iter != &data->logger_subsys_items_head; iter = iter_next) {
kv_item = list_entry(iter, struct key_value_list_item, list);
if (data->subsys == NULL) {
if (strcmp(data->logging_daemon_name, "corosync") == 0) {
snprintf(key_name, ICMAP_KEYNAME_MAXLEN,
"logging.%s",
kv_item->key);
} else {
snprintf(key_name, ICMAP_KEYNAME_MAXLEN,
"logging.logging_daemon.%s.%s",
data->logging_daemon_name, kv_item->key);
}
} else {
if (strcmp(data->logging_daemon_name, "corosync") == 0) {
snprintf(key_name, ICMAP_KEYNAME_MAXLEN,
"logging.logger_subsys.%s.%s",
data->subsys,
kv_item->key);
} else {
snprintf(key_name, ICMAP_KEYNAME_MAXLEN,
"logging.logging_daemon.%s.%s.%s",
data->logging_daemon_name, data->subsys,
kv_item->key);
}
}
icmap_set_string_r(config_map, key_name, kv_item->value);
iter_next = iter->next;
free(kv_item->value);
free(kv_item->key);
free(kv_item);
}
if (data->subsys == NULL) {
if (strcmp(data->logging_daemon_name, "corosync") != 0) {
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "logging.logging_daemon.%s.name",
data->logging_daemon_name);
icmap_set_string_r(config_map, key_name, data->logging_daemon_name);
}
} else {
if (strcmp(data->logging_daemon_name, "corosync") == 0) {
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "logging.logger_subsys.%s.subsys",
data->subsys);
icmap_set_string_r(config_map, key_name, data->subsys);
} else {
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "logging.logging_daemon.%s.%s.subsys",
data->logging_daemon_name, data->subsys);
icmap_set_string_r(config_map, key_name, data->subsys);
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "logging.logging_daemon.%s.%s.name",
data->logging_daemon_name, data->subsys);
icmap_set_string_r(config_map, key_name, data->logging_daemon_name);
}
}
free(data->subsys);
free(data->logging_daemon_name);
break;
case MAIN_CP_CB_DATA_STATE_NODELIST_NODE:
if (!data->ring0_addr_added) {
*error_string = "No ring0_addr specified for node";
return (0);
}
data->node_number++;
break;
case MAIN_CP_CB_DATA_STATE_NORMAL:
case MAIN_CP_CB_DATA_STATE_PLOAD:
case MAIN_CP_CB_DATA_STATE_UIDGID:
case MAIN_CP_CB_DATA_STATE_MEMBER:
case MAIN_CP_CB_DATA_STATE_QUORUM:
case MAIN_CP_CB_DATA_STATE_QDEVICE:
case MAIN_CP_CB_DATA_STATE_NODELIST:
case MAIN_CP_CB_DATA_STATE_TOTEM:
case MAIN_CP_CB_DATA_STATE_QB:
break;
case MAIN_CP_CB_DATA_STATE_RESOURCES:
*state = MAIN_CP_CB_DATA_STATE_NORMAL;
break;
case MAIN_CP_CB_DATA_STATE_RESOURCES_SYSTEM:
*state = MAIN_CP_CB_DATA_STATE_RESOURCES;
break;
case MAIN_CP_CB_DATA_STATE_RESOURCES_SYSTEM_MEMUSED:
*state = MAIN_CP_CB_DATA_STATE_RESOURCES_SYSTEM;
break;
case MAIN_CP_CB_DATA_STATE_RESOURCES_PROCESS:
*state = MAIN_CP_CB_DATA_STATE_RESOURCES;
break;
case MAIN_CP_CB_DATA_STATE_RESOURCES_PROCESS_MEMUSED:
*state = MAIN_CP_CB_DATA_STATE_RESOURCES_PROCESS;
break;
}
break;
}
return (1);
atoi_error:
min_val = max_val = 0;
/*
* This is really assert, because developer ether doesn't set val_type correctly or
* we've got here after some nasty memory overwrite
*/
assert(safe_atoq_range(val_type, &min_val, &max_val) == 0);
snprintf(formated_err, sizeof(formated_err),
"Value of key \"%s\" is expected to be integer in range (%lld..%lld), but \"%s\" was given",
key, min_val, max_val, value);
*error_string = formated_err;
return (0);
}
static int uidgid_config_parser_cb(const char *path,
char *key,
char *value,
enum main_cp_cb_data_state *state,
enum parser_cb_type type,
const char **error_string,
icmap_map_t config_map,
void *user_data)
{
char key_name[ICMAP_KEYNAME_MAXLEN];
int uid, gid;
switch (type) {
case PARSER_CB_START:
break;
case PARSER_CB_END:
break;
case PARSER_CB_ITEM:
if (strcmp(path, "uidgid.uid") == 0) {
uid = uid_determine(value);
if (uid == -1) {
*error_string = error_string_response;
return (0);
}
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "uidgid.config.uid.%u",
uid);
icmap_set_uint8_r(config_map, key_name, 1);
} else if (strcmp(path, "uidgid.gid") == 0) {
gid = gid_determine(value);
if (gid == -1) {
*error_string = error_string_response;
return (0);
}
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "uidgid.config.gid.%u",
gid);
icmap_set_uint8_r(config_map, key_name, 1);
} else {
*error_string = "uidgid: Only uid and gid are allowed items";
return (0);
}
break;
case PARSER_CB_SECTION_START:
if (strcmp(path, "uidgid") != 0) {
*error_string = "uidgid: Can't add subsection different than uidgid";
return (0);
};
break;
case PARSER_CB_SECTION_END:
break;
}
return (1);
}
static int read_uidgid_files_into_icmap(
const char **error_string,
icmap_map_t config_map)
{
FILE *fp;
const char *dirname;
DIR *dp;
struct dirent *dirent;
struct dirent *entry;
char filename[PATH_MAX + FILENAME_MAX + 1];
int res = 0;
size_t len;
int return_code;
struct stat stat_buf;
enum main_cp_cb_data_state state = MAIN_CP_CB_DATA_STATE_NORMAL;
char key_name[ICMAP_KEYNAME_MAXLEN];
dirname = COROSYSCONFDIR "/uidgid.d";
dp = opendir (dirname);
if (dp == NULL)
return 0;
len = offsetof(struct dirent, d_name) + FILENAME_MAX + 1;
entry = malloc(len);
if (entry == NULL) {
res = 0;
goto error_exit;
}
for (return_code = readdir_r(dp, entry, &dirent);
dirent != NULL && return_code == 0;
return_code = readdir_r(dp, entry, &dirent)) {
snprintf(filename, sizeof (filename), "%s/%s", dirname, dirent->d_name);
res = stat (filename, &stat_buf);
if (res == 0 && S_ISREG(stat_buf.st_mode)) {
fp = fopen (filename, "r");
if (fp == NULL) continue;
key_name[0] = 0;
res = parse_section(fp, key_name, error_string, 0, state, uidgid_config_parser_cb, config_map, NULL);
fclose (fp);
if (res != 0) {
goto error_exit;
}
}
}
error_exit:
free (entry);
closedir(dp);
return res;
}
/* Read config file and load into icmap */
static int read_config_file_into_icmap(
const char **error_string,
icmap_map_t config_map)
{
FILE *fp;
const char *filename;
char *error_reason = error_string_response;
int res;
char key_name[ICMAP_KEYNAME_MAXLEN];
struct main_cp_cb_data data;
enum main_cp_cb_data_state state = MAIN_CP_CB_DATA_STATE_NORMAL;
filename = getenv ("COROSYNC_MAIN_CONFIG_FILE");
if (!filename)
filename = COROSYSCONFDIR "/corosync.conf";
fp = fopen (filename, "r");
if (fp == NULL) {
char error_str[100];
const char *error_ptr = qb_strerror_r(errno, error_str, sizeof(error_str));
snprintf (error_reason, sizeof(error_string_response),
"Can't read file %s reason = (%s)",
filename, error_ptr);
*error_string = error_reason;
return -1;
}
key_name[0] = 0;
res = parse_section(fp, key_name, error_string, 0, state, main_config_parser_cb, config_map, &data);
fclose(fp);
if (res == 0) {
res = read_uidgid_files_into_icmap(error_string, config_map);
}
if (res == 0) {
snprintf (error_reason, sizeof(error_string_response),
"Successfully read main configuration file '%s'.", filename);
*error_string = error_reason;
}
return res;
}
diff --git a/exec/main.c b/exec/main.c
index 7dc7724b..2a286d85 100644
--- a/exec/main.c
+++ b/exec/main.c
@@ -1,1429 +1,1425 @@
/*
* Copyright (c) 2002-2006 MontaVista Software, Inc.
* Copyright (c) 2006-2012 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Steven Dake (sdake@redhat.com)
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
/**
* \mainpage Corosync
*
* This is the doxygen generated developer documentation for the Corosync
* project. For more information about Corosync, please see the project
* web site, <a href="http://www.corosync.org">corosync.org</a>.
*
* \section license License
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <config.h>
#include <pthread.h>
#include <assert.h>
#include <sys/types.h>
#include <sys/file.h>
#include <sys/poll.h>
#include <sys/uio.h>
#include <sys/mman.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <sys/stat.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <unistd.h>
#include <fcntl.h>
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
#include <signal.h>
#include <sched.h>
#include <time.h>
#include <semaphore.h>
#include <string.h>
#include <qb/qbdefs.h>
#include <qb/qblog.h>
#include <qb/qbloop.h>
#include <qb/qbutil.h>
#include <qb/qbipcs.h>
#include <corosync/swab.h>
#include <corosync/corotypes.h>
#include <corosync/corodefs.h>
#include <corosync/list.h>
#include <corosync/totem/totempg.h>
#include <corosync/logsys.h>
#include <corosync/icmap.h>
#include "quorum.h"
#include "totemsrp.h"
#include "logconfig.h"
#include "totemconfig.h"
#include "main.h"
#include "sync.h"
#include "timer.h"
#include "util.h"
#include "apidef.h"
#include "service.h"
#include "schedwrk.h"
#ifdef HAVE_SMALL_MEMORY_FOOTPRINT
#define IPC_LOGSYS_SIZE 1024*64
#else
#define IPC_LOGSYS_SIZE 8192*128
#endif
LOGSYS_DECLARE_SYSTEM ("corosync",
LOGSYS_MODE_OUTPUT_STDERR | LOGSYS_MODE_OUTPUT_SYSLOG,
LOG_DAEMON,
LOG_INFO);
LOGSYS_DECLARE_SUBSYS ("MAIN");
#define SERVER_BACKLOG 5
static int sched_priority = 0;
static unsigned int service_count = 32;
static struct totem_logging_configuration totem_logging_configuration;
static struct corosync_api_v1 *api = NULL;
static int sync_in_process = 1;
static qb_loop_t *corosync_poll_handle;
struct sched_param global_sched_param;
static corosync_timer_handle_t corosync_stats_timer_handle;
static const char *corosync_lock_file = LOCALSTATEDIR"/run/corosync.pid";
static int ip_version = AF_INET;
qb_loop_t *cs_poll_handle_get (void)
{
return (corosync_poll_handle);
}
int cs_poll_dispatch_add (qb_loop_t * handle,
int fd,
int events,
void *data,
int (*dispatch_fn) (int fd,
int revents,
void *data))
{
return qb_loop_poll_add(handle, QB_LOOP_MED, fd, events, data,
dispatch_fn);
}
int cs_poll_dispatch_delete(qb_loop_t * handle, int fd)
{
return qb_loop_poll_del(handle, fd);
}
void corosync_state_dump (void)
{
int i;
for (i = 0; i < SERVICES_COUNT_MAX; i++) {
if (corosync_service[i] && corosync_service[i]->exec_dump_fn) {
corosync_service[i]->exec_dump_fn ();
}
}
}
static void corosync_blackbox_write_to_file (void)
{
char fname[PATH_MAX];
char fdata_fname[PATH_MAX];
char time_str[PATH_MAX];
struct tm cur_time_tm;
time_t cur_time_t;
ssize_t res;
cur_time_t = time(NULL);
localtime_r(&cur_time_t, &cur_time_tm);
strftime(time_str, PATH_MAX, "%Y-%m-%dT%H:%M:%S", &cur_time_tm);
snprintf(fname, PATH_MAX, "%s/fdata-%s-%lld",
get_run_dir(),
time_str,
(long long int)getpid());
if ((res = qb_log_blackbox_write_to_file(fname)) < 0) {
LOGSYS_PERROR(-res, LOGSYS_LEVEL_ERROR, "Can't store blackbox file");
}
snprintf(fdata_fname, sizeof(fdata_fname), "%s/fdata", get_run_dir());
unlink(fdata_fname);
if (symlink(fname, fdata_fname) == -1) {
log_printf(LOGSYS_LEVEL_ERROR, "Can't create symlink to '%s' for corosync blackbox file '%s'",
fname, fdata_fname);
}
}
static void unlink_all_completed (void)
{
api->timer_delete (corosync_stats_timer_handle);
qb_loop_stop (corosync_poll_handle);
icmap_fini();
}
void corosync_shutdown_request (void)
{
corosync_service_unlink_all (api, unlink_all_completed);
}
static int32_t sig_diag_handler (int num, void *data)
{
corosync_state_dump ();
return 0;
}
static int32_t sig_exit_handler (int num, void *data)
{
log_printf(LOGSYS_LEVEL_NOTICE, "Node was shut down by a signal");
corosync_service_unlink_all (api, unlink_all_completed);
return 0;
}
static void sigsegv_handler (int num)
{
(void)signal (SIGSEGV, SIG_DFL);
corosync_blackbox_write_to_file ();
qb_log_fini();
raise (SIGSEGV);
}
/*
* QB wrapper for real signal handler
*/
static int32_t sig_segv_handler (int num, void *data)
{
sigsegv_handler(num);
return 0;
}
static void sigabrt_handler (int num)
{
(void)signal (SIGABRT, SIG_DFL);
corosync_blackbox_write_to_file ();
qb_log_fini();
raise (SIGABRT);
}
/*
* QB wrapper for real signal handler
*/
static int32_t sig_abrt_handler (int num, void *data)
{
sigabrt_handler(num);
return 0;
}
#define LOCALHOST_IP inet_addr("127.0.0.1")
static void *corosync_group_handle;
static struct totempg_group corosync_group = {
.group = "a",
.group_len = 1
};
static void serialize_lock (void)
{
}
static void serialize_unlock (void)
{
}
static void corosync_sync_completed (void)
{
log_printf (LOGSYS_LEVEL_NOTICE,
"Completed service synchronization, ready to provide service.");
sync_in_process = 0;
cs_ipcs_sync_state_changed(sync_in_process);
cs_ipc_allow_connections(1);
/*
* Inform totem to start using new message queue again
*/
totempg_trans_ack();
}
static int corosync_sync_callbacks_retrieve (
int service_id,
struct sync_callbacks *callbacks)
{
if (corosync_service[service_id] == NULL) {
return (-1);
}
if (callbacks == NULL) {
return (0);
}
callbacks->name = corosync_service[service_id]->name;
callbacks->sync_init = corosync_service[service_id]->sync_init;
callbacks->sync_process = corosync_service[service_id]->sync_process;
callbacks->sync_activate = corosync_service[service_id]->sync_activate;
callbacks->sync_abort = corosync_service[service_id]->sync_abort;
return (0);
}
static struct memb_ring_id corosync_ring_id;
static void member_object_joined (unsigned int nodeid)
{
char member_ip[ICMAP_KEYNAME_MAXLEN];
char member_join_count[ICMAP_KEYNAME_MAXLEN];
char member_status[ICMAP_KEYNAME_MAXLEN];
snprintf(member_ip, ICMAP_KEYNAME_MAXLEN,
"runtime.totem.pg.mrp.srp.members.%u.ip", nodeid);
snprintf(member_join_count, ICMAP_KEYNAME_MAXLEN,
"runtime.totem.pg.mrp.srp.members.%u.join_count", nodeid);
snprintf(member_status, ICMAP_KEYNAME_MAXLEN,
"runtime.totem.pg.mrp.srp.members.%u.status", nodeid);
if (icmap_get(member_ip, NULL, NULL, NULL) == CS_OK) {
icmap_inc(member_join_count);
icmap_set_string(member_status, "joined");
} else {
icmap_set_string(member_ip, (char*)api->totem_ifaces_print (nodeid));
icmap_set_uint32(member_join_count, 1);
icmap_set_string(member_status, "joined");
}
log_printf (LOGSYS_LEVEL_DEBUG,
"Member joined: %s", api->totem_ifaces_print (nodeid));
}
static void member_object_left (unsigned int nodeid)
{
char member_status[ICMAP_KEYNAME_MAXLEN];
snprintf(member_status, ICMAP_KEYNAME_MAXLEN,
"runtime.totem.pg.mrp.srp.members.%u.status", nodeid);
icmap_set_string(member_status, "left");
log_printf (LOGSYS_LEVEL_DEBUG,
"Member left: %s", api->totem_ifaces_print (nodeid));
}
static void confchg_fn (
enum totem_configuration_type configuration_type,
const unsigned int *member_list, size_t member_list_entries,
const unsigned int *left_list, size_t left_list_entries,
const unsigned int *joined_list, size_t joined_list_entries,
const struct memb_ring_id *ring_id)
{
int i;
int abort_activate = 0;
if (sync_in_process == 1) {
abort_activate = 1;
}
sync_in_process = 1;
cs_ipcs_sync_state_changed(sync_in_process);
memcpy (&corosync_ring_id, ring_id, sizeof (struct memb_ring_id));
for (i = 0; i < left_list_entries; i++) {
member_object_left (left_list[i]);
}
for (i = 0; i < joined_list_entries; i++) {
member_object_joined (joined_list[i]);
}
/*
* Call configuration change for all services
*/
for (i = 0; i < service_count; i++) {
if (corosync_service[i] && corosync_service[i]->confchg_fn) {
corosync_service[i]->confchg_fn (configuration_type,
member_list, member_list_entries,
left_list, left_list_entries,
joined_list, joined_list_entries, ring_id);
}
}
if (abort_activate) {
sync_abort ();
}
if (configuration_type == TOTEM_CONFIGURATION_TRANSITIONAL) {
sync_save_transitional (member_list, member_list_entries, ring_id);
}
if (configuration_type == TOTEM_CONFIGURATION_REGULAR) {
sync_start (member_list, member_list_entries, ring_id);
}
}
static void priv_drop (void)
{
return; /* TODO: we are still not dropping privs */
}
static void corosync_tty_detach (void)
{
int devnull;
/*
* Disconnect from TTY if this is not a debug run
*/
switch (fork ()) {
case -1:
corosync_exit_error (COROSYNC_DONE_FORK);
break;
case 0:
/*
* child which is disconnected, run this process
*/
break;
default:
exit (0);
break;
}
/* Create new session */
(void)setsid();
/*
* Map stdin/out/err to /dev/null.
*/
devnull = open("/dev/null", O_RDWR);
if (devnull == -1) {
corosync_exit_error (COROSYNC_DONE_STD_TO_NULL_REDIR);
}
if (dup2(devnull, 0) < 0 || dup2(devnull, 1) < 0
|| dup2(devnull, 2) < 0) {
close(devnull);
corosync_exit_error (COROSYNC_DONE_STD_TO_NULL_REDIR);
}
close(devnull);
}
static void corosync_mlockall (void)
{
int res;
struct rlimit rlimit;
rlimit.rlim_cur = RLIM_INFINITY;
rlimit.rlim_max = RLIM_INFINITY;
#ifndef RLIMIT_MEMLOCK
#define RLIMIT_MEMLOCK RLIMIT_VMEM
#endif
setrlimit (RLIMIT_MEMLOCK, &rlimit);
res = mlockall (MCL_CURRENT | MCL_FUTURE);
if (res == -1) {
LOGSYS_PERROR (errno, LOGSYS_LEVEL_WARNING,
"Could not lock memory of service to avoid page faults");
};
}
static void corosync_totem_stats_updater (void *data)
{
totempg_stats_t * stats;
uint32_t total_mtt_rx_token;
uint32_t total_backlog_calc;
uint32_t total_token_holdtime;
- int t, prev, i;
+ int t, prev;
int32_t token_count;
- char key_name[ICMAP_KEYNAME_MAXLEN];
stats = api->totem_get_stats();
icmap_set_uint32("runtime.totem.pg.msg_reserved", stats->msg_reserved);
icmap_set_uint32("runtime.totem.pg.msg_queue_avail", stats->msg_queue_avail);
- icmap_set_uint64("runtime.totem.pg.mrp.srp.orf_token_tx", stats->mrp->srp->orf_token_tx);
- icmap_set_uint64("runtime.totem.pg.mrp.srp.orf_token_rx", stats->mrp->srp->orf_token_rx);
- icmap_set_uint64("runtime.totem.pg.mrp.srp.memb_merge_detect_tx", stats->mrp->srp->memb_merge_detect_tx);
- icmap_set_uint64("runtime.totem.pg.mrp.srp.memb_merge_detect_rx", stats->mrp->srp->memb_merge_detect_rx);
- icmap_set_uint64("runtime.totem.pg.mrp.srp.memb_join_tx", stats->mrp->srp->memb_join_tx);
- icmap_set_uint64("runtime.totem.pg.mrp.srp.memb_join_rx", stats->mrp->srp->memb_join_rx);
- icmap_set_uint64("runtime.totem.pg.mrp.srp.mcast_tx", stats->mrp->srp->mcast_tx);
- icmap_set_uint64("runtime.totem.pg.mrp.srp.mcast_retx", stats->mrp->srp->mcast_retx);
- icmap_set_uint64("runtime.totem.pg.mrp.srp.mcast_rx", stats->mrp->srp->mcast_rx);
- icmap_set_uint64("runtime.totem.pg.mrp.srp.memb_commit_token_tx", stats->mrp->srp->memb_commit_token_tx);
- icmap_set_uint64("runtime.totem.pg.mrp.srp.memb_commit_token_rx", stats->mrp->srp->memb_commit_token_rx);
- icmap_set_uint64("runtime.totem.pg.mrp.srp.token_hold_cancel_tx", stats->mrp->srp->token_hold_cancel_tx);
- icmap_set_uint64("runtime.totem.pg.mrp.srp.token_hold_cancel_rx", stats->mrp->srp->token_hold_cancel_rx);
- icmap_set_uint64("runtime.totem.pg.mrp.srp.operational_entered", stats->mrp->srp->operational_entered);
- icmap_set_uint64("runtime.totem.pg.mrp.srp.operational_token_lost", stats->mrp->srp->operational_token_lost);
- icmap_set_uint64("runtime.totem.pg.mrp.srp.gather_entered", stats->mrp->srp->gather_entered);
- icmap_set_uint64("runtime.totem.pg.mrp.srp.gather_token_lost", stats->mrp->srp->gather_token_lost);
- icmap_set_uint64("runtime.totem.pg.mrp.srp.commit_entered", stats->mrp->srp->commit_entered);
- icmap_set_uint64("runtime.totem.pg.mrp.srp.commit_token_lost", stats->mrp->srp->commit_token_lost);
- icmap_set_uint64("runtime.totem.pg.mrp.srp.recovery_entered", stats->mrp->srp->recovery_entered);
- icmap_set_uint64("runtime.totem.pg.mrp.srp.recovery_token_lost", stats->mrp->srp->recovery_token_lost);
- icmap_set_uint64("runtime.totem.pg.mrp.srp.consensus_timeouts", stats->mrp->srp->consensus_timeouts);
- icmap_set_uint64("runtime.totem.pg.mrp.srp.rx_msg_dropped", stats->mrp->srp->rx_msg_dropped);
- icmap_set_uint32("runtime.totem.pg.mrp.srp.continuous_gather", stats->mrp->srp->continuous_gather);
- icmap_set_uint32("runtime.totem.pg.mrp.srp.continuous_sendmsg_failures",
- stats->mrp->srp->continuous_sendmsg_failures);
-
- icmap_set_uint8("runtime.totem.pg.mrp.srp.firewall_enabled_or_nic_failure",
- stats->mrp->srp->continuous_gather > MAX_NO_CONT_GATHER ? 1 : 0);
-
- if (stats->mrp->srp->continuous_gather > MAX_NO_CONT_GATHER ||
- stats->mrp->srp->continuous_sendmsg_failures > MAX_NO_CONT_SENDMSG_FAILURES) {
+ icmap_set_uint64("runtime.totem.pg.srp.orf_token_tx", stats->srp->orf_token_tx);
+ icmap_set_uint64("runtime.totem.pg.srp.orf_token_rx", stats->srp->orf_token_rx);
+ icmap_set_uint64("runtime.totem.pg.srp.memb_merge_detect_tx", stats->srp->memb_merge_detect_tx);
+ icmap_set_uint64("runtime.totem.pg.srp.memb_merge_detect_rx", stats->srp->memb_merge_detect_rx);
+ icmap_set_uint64("runtime.totem.pg.srp.memb_join_tx", stats->srp->memb_join_tx);
+ icmap_set_uint64("runtime.totem.pg.srp.memb_join_rx", stats->srp->memb_join_rx);
+ icmap_set_uint64("runtime.totem.pg.srp.mcast_tx", stats->srp->mcast_tx);
+ icmap_set_uint64("runtime.totem.pg.srp.mcast_retx", stats->srp->mcast_retx);
+ icmap_set_uint64("runtime.totem.pg.srp.mcast_rx", stats->srp->mcast_rx);
+ icmap_set_uint64("runtime.totem.pg.srp.memb_commit_token_tx", stats->srp->memb_commit_token_tx);
+ icmap_set_uint64("runtime.totem.pg.srp.memb_commit_token_rx", stats->srp->memb_commit_token_rx);
+ icmap_set_uint64("runtime.totem.pg.srp.token_hold_cancel_tx", stats->srp->token_hold_cancel_tx);
+ icmap_set_uint64("runtime.totem.pg.srp.token_hold_cancel_rx", stats->srp->token_hold_cancel_rx);
+ icmap_set_uint64("runtime.totem.pg.srp.operational_entered", stats->srp->operational_entered);
+ icmap_set_uint64("runtime.totem.pg.srp.operational_token_lost", stats->srp->operational_token_lost);
+ icmap_set_uint64("runtime.totem.pg.srp.gather_entered", stats->srp->gather_entered);
+ icmap_set_uint64("runtime.totem.pg.srp.gather_token_lost", stats->srp->gather_token_lost);
+ icmap_set_uint64("runtime.totem.pg.srp.commit_entered", stats->srp->commit_entered);
+ icmap_set_uint64("runtime.totem.pg.srp.commit_token_lost", stats->srp->commit_token_lost);
+ icmap_set_uint64("runtime.totem.pg.srp.recovery_entered", stats->srp->recovery_entered);
+ icmap_set_uint64("runtime.totem.pg.srp.recovery_token_lost", stats->srp->recovery_token_lost);
+ icmap_set_uint64("runtime.totem.pg.srp.consensus_timeouts", stats->srp->consensus_timeouts);
+ icmap_set_uint64("runtime.totem.pg.srp.rx_msg_dropped", stats->srp->rx_msg_dropped);
+ icmap_set_uint32("runtime.totem.pg.srp.continuous_gather", stats->srp->continuous_gather);
+ icmap_set_uint32("runtime.totem.pg.srp.continuous_sendmsg_failures",
+ stats->srp->continuous_sendmsg_failures);
+
+ icmap_set_uint8("runtime.totem.pg.srp.firewall_enabled_or_nic_failure",
+ stats->srp->continuous_gather > MAX_NO_CONT_GATHER ? 1 : 0);
+
+ if (stats->srp->continuous_gather > MAX_NO_CONT_GATHER ||
+ stats->srp->continuous_sendmsg_failures > MAX_NO_CONT_SENDMSG_FAILURES) {
log_printf (LOGSYS_LEVEL_WARNING,
"Totem is unable to form a cluster because of an "
"operating system or network fault. The most common "
"cause of this message is that the local firewall is "
"configured improperly.");
- icmap_set_uint8("runtime.totem.pg.mrp.srp.firewall_enabled_or_nic_failure", 1);
+ icmap_set_uint8("runtime.totem.pg.srp.firewall_enabled_or_nic_failure", 1);
} else {
- icmap_set_uint8("runtime.totem.pg.mrp.srp.firewall_enabled_or_nic_failure", 0);
+ icmap_set_uint8("runtime.totem.pg.srp.firewall_enabled_or_nic_failure", 0);
}
- for (i = 0; i < stats->mrp->srp->rrp->interface_count; i++) {
- snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "runtime.totem.pg.mrp.rrp.%u.faulty", i);
- icmap_set_uint8(key_name, stats->mrp->srp->rrp->faulty[i]);
- }
total_mtt_rx_token = 0;
total_token_holdtime = 0;
total_backlog_calc = 0;
token_count = 0;
- t = stats->mrp->srp->latest_token;
+ t = stats->srp->latest_token;
while (1) {
if (t == 0)
prev = TOTEM_TOKEN_STATS_MAX - 1;
else
prev = t - 1;
- if (prev == stats->mrp->srp->earliest_token)
+ if (prev == stats->srp->earliest_token)
break;
/* if tx == 0, then dropped token (not ours) */
- if (stats->mrp->srp->token[t].tx != 0 ||
- (stats->mrp->srp->token[t].rx - stats->mrp->srp->token[prev].rx) > 0 ) {
- total_mtt_rx_token += (stats->mrp->srp->token[t].rx - stats->mrp->srp->token[prev].rx);
- total_token_holdtime += (stats->mrp->srp->token[t].tx - stats->mrp->srp->token[t].rx);
- total_backlog_calc += stats->mrp->srp->token[t].backlog_calc;
+ if (stats->srp->token[t].tx != 0 ||
+ (stats->srp->token[t].rx - stats->srp->token[prev].rx) > 0 ) {
+ total_mtt_rx_token += (stats->srp->token[t].rx - stats->srp->token[prev].rx);
+ total_token_holdtime += (stats->srp->token[t].tx - stats->srp->token[t].rx);
+ total_backlog_calc += stats->srp->token[t].backlog_calc;
token_count++;
}
t = prev;
}
if (token_count) {
- icmap_set_uint32("runtime.totem.pg.mrp.srp.mtt_rx_token", (total_mtt_rx_token / token_count));
- icmap_set_uint32("runtime.totem.pg.mrp.srp.avg_token_workload", (total_token_holdtime / token_count));
- icmap_set_uint32("runtime.totem.pg.mrp.srp.avg_backlog_calc", (total_backlog_calc / token_count));
+ icmap_set_uint32("runtime.totem.pg.srp.mtt_rx_token", (total_mtt_rx_token / token_count));
+ icmap_set_uint32("runtime.totem.pg.srp.avg_token_workload", (total_token_holdtime / token_count));
+ icmap_set_uint32("runtime.totem.pg.srp.avg_backlog_calc", (total_backlog_calc / token_count));
}
cs_ipcs_stats_update();
api->timer_add_duration (1500 * MILLI_2_NANO_SECONDS, NULL,
corosync_totem_stats_updater,
&corosync_stats_timer_handle);
}
static void corosync_totem_stats_init (void)
{
- icmap_set_uint32("runtime.totem.pg.mrp.srp.mtt_rx_token", 0);
- icmap_set_uint32("runtime.totem.pg.mrp.srp.avg_token_workload", 0);
- icmap_set_uint32("runtime.totem.pg.mrp.srp.avg_backlog_calc", 0);
+ icmap_set_uint32("runtime.totem.pg.srp.mtt_rx_token", 0);
+ icmap_set_uint32("runtime.totem.pg.srp.avg_token_workload", 0);
+ icmap_set_uint32("runtime.totem.pg.srp.avg_backlog_calc", 0);
/* start stats timer */
api->timer_add_duration (1500 * MILLI_2_NANO_SECONDS, NULL,
corosync_totem_stats_updater,
&corosync_stats_timer_handle);
}
static void deliver_fn (
unsigned int nodeid,
const void *msg,
unsigned int msg_len,
int endian_conversion_required)
{
const struct qb_ipc_request_header *header;
int32_t service;
int32_t fn_id;
uint32_t id;
header = msg;
if (endian_conversion_required) {
id = swab32 (header->id);
} else {
id = header->id;
}
/*
* Call the proper executive handler
*/
service = id >> 16;
fn_id = id & 0xffff;
if (!corosync_service[service]) {
return;
}
if (fn_id >= corosync_service[service]->exec_engine_count) {
log_printf(LOGSYS_LEVEL_WARNING, "discarded unknown message %d for service %d (max id %d)",
fn_id, service, corosync_service[service]->exec_engine_count);
return;
}
icmap_fast_inc(service_stats_rx[service][fn_id]);
if (endian_conversion_required) {
assert(corosync_service[service]->exec_engine[fn_id].exec_endian_convert_fn != NULL);
corosync_service[service]->exec_engine[fn_id].exec_endian_convert_fn
((void *)msg);
}
corosync_service[service]->exec_engine[fn_id].exec_handler_fn
(msg, nodeid);
}
int main_mcast (
const struct iovec *iovec,
unsigned int iov_len,
unsigned int guarantee)
{
const struct qb_ipc_request_header *req = iovec->iov_base;
int32_t service;
int32_t fn_id;
service = req->id >> 16;
fn_id = req->id & 0xffff;
if (corosync_service[service]) {
icmap_fast_inc(service_stats_tx[service][fn_id]);
}
return (totempg_groups_mcast_joined (corosync_group_handle, iovec, iov_len, guarantee));
}
static void corosync_ring_id_create_or_load (
struct memb_ring_id *memb_ring_id,
const struct totem_ip_address *addr)
{
int fd;
int res = 0;
char filename[PATH_MAX];
snprintf (filename, sizeof(filename), "%s/ringid_%s",
get_run_dir(), totemip_print (addr));
fd = open (filename, O_RDONLY, 0700);
/*
* If file can be opened and read, read the ring id
*/
if (fd != -1) {
res = read (fd, &memb_ring_id->seq, sizeof (uint64_t));
close (fd);
}
/*
* If file could not be opened or read, create a new ring id
*/
if ((fd == -1) || (res != sizeof (uint64_t))) {
memb_ring_id->seq = 0;
umask(0);
fd = open (filename, O_CREAT|O_RDWR, 0700);
if (fd != -1) {
res = write (fd, &memb_ring_id->seq, sizeof (uint64_t));
close (fd);
if (res == -1) {
LOGSYS_PERROR (errno, LOGSYS_LEVEL_ERROR,
"Couldn't write ringid file '%s'", filename);
corosync_exit_error (COROSYNC_DONE_STORE_RINGID);
}
} else {
LOGSYS_PERROR (errno, LOGSYS_LEVEL_ERROR,
"Couldn't create ringid file '%s'", filename);
corosync_exit_error (COROSYNC_DONE_STORE_RINGID);
}
}
totemip_copy(&memb_ring_id->rep, addr);
assert (!totemip_zero_check(&memb_ring_id->rep));
}
static void corosync_ring_id_store (
const struct memb_ring_id *memb_ring_id,
const struct totem_ip_address *addr)
{
char filename[PATH_MAX];
int fd;
int res;
snprintf (filename, sizeof(filename), "%s/ringid_%s",
get_run_dir(), totemip_print (addr));
fd = open (filename, O_WRONLY, 0700);
if (fd == -1) {
fd = open (filename, O_CREAT|O_RDWR, 0700);
}
if (fd == -1) {
LOGSYS_PERROR(errno, LOGSYS_LEVEL_ERROR,
"Couldn't store new ring id %llx to stable storage",
memb_ring_id->seq);
corosync_exit_error (COROSYNC_DONE_STORE_RINGID);
}
log_printf (LOGSYS_LEVEL_DEBUG,
"Storing new sequence id for ring %llx", memb_ring_id->seq);
res = write (fd, &memb_ring_id->seq, sizeof(memb_ring_id->seq));
close (fd);
if (res != sizeof(memb_ring_id->seq)) {
LOGSYS_PERROR(errno, LOGSYS_LEVEL_ERROR,
"Couldn't store new ring id %llx to stable storage",
memb_ring_id->seq);
corosync_exit_error (COROSYNC_DONE_STORE_RINGID);
}
}
static qb_loop_timer_handle recheck_the_q_level_timer;
void corosync_recheck_the_q_level(void *data)
{
totempg_check_q_level(corosync_group_handle);
if (cs_ipcs_q_level_get() == TOTEM_Q_LEVEL_CRITICAL) {
qb_loop_timer_add(cs_poll_handle_get(), QB_LOOP_MED, 1*QB_TIME_NS_IN_MSEC,
NULL, corosync_recheck_the_q_level, &recheck_the_q_level_timer);
}
}
struct sending_allowed_private_data_struct {
int reserved_msgs;
};
int corosync_sending_allowed (
unsigned int service,
unsigned int id,
const void *msg,
void *sending_allowed_private_data)
{
struct sending_allowed_private_data_struct *pd =
(struct sending_allowed_private_data_struct *)sending_allowed_private_data;
struct iovec reserve_iovec;
struct qb_ipc_request_header *header = (struct qb_ipc_request_header *)msg;
int sending_allowed;
reserve_iovec.iov_base = (char *)header;
reserve_iovec.iov_len = header->size;
pd->reserved_msgs = totempg_groups_joined_reserve (
corosync_group_handle,
&reserve_iovec, 1);
if (pd->reserved_msgs == -1) {
return -EINVAL;
}
sending_allowed = QB_FALSE;
if (corosync_quorum_is_quorate() == 1 ||
corosync_service[service]->allow_inquorate == CS_LIB_ALLOW_INQUORATE) {
// we are quorate
// now check flow control
if (corosync_service[service]->lib_engine[id].flow_control == CS_LIB_FLOW_CONTROL_NOT_REQUIRED) {
sending_allowed = QB_TRUE;
} else if (pd->reserved_msgs && sync_in_process == 0) {
sending_allowed = QB_TRUE;
} else if (pd->reserved_msgs == 0) {
return -ENOBUFS;
} else /* (sync_in_process) */ {
return -EINPROGRESS;
}
} else {
return -EHOSTUNREACH;
}
return (sending_allowed);
}
void corosync_sending_allowed_release (void *sending_allowed_private_data)
{
struct sending_allowed_private_data_struct *pd =
(struct sending_allowed_private_data_struct *)sending_allowed_private_data;
if (pd->reserved_msgs == -1) {
return;
}
totempg_groups_joined_release (pd->reserved_msgs);
}
int message_source_is_local (const mar_message_source_t *source)
{
int ret = 0;
assert (source != NULL);
if (source->nodeid == totempg_my_nodeid_get ()) {
ret = 1;
}
return ret;
}
void message_source_set (
mar_message_source_t *source,
void *conn)
{
assert ((source != NULL) && (conn != NULL));
memset (source, 0, sizeof (mar_message_source_t));
source->nodeid = totempg_my_nodeid_get ();
source->conn = conn;
}
struct scheduler_pause_timeout_data {
struct totem_config *totem_config;
qb_loop_timer_handle handle;
unsigned long long tv_prev;
unsigned long long max_tv_diff;
};
static void timer_function_scheduler_timeout (void *data)
{
struct scheduler_pause_timeout_data *timeout_data = (struct scheduler_pause_timeout_data *)data;
unsigned long long tv_current;
unsigned long long tv_diff;
tv_current = qb_util_nano_current_get ();
if (timeout_data->tv_prev == 0) {
/*
* Initial call -> just pretent everything is ok
*/
timeout_data->tv_prev = tv_current;
timeout_data->max_tv_diff = 0;
}
tv_diff = tv_current - timeout_data->tv_prev;
timeout_data->tv_prev = tv_current;
if (tv_diff > timeout_data->max_tv_diff) {
log_printf (LOGSYS_LEVEL_WARNING, "Corosync main process was not scheduled for %0.4f ms "
"(threshold is %0.4f ms). Consider token timeout increase.",
(float)tv_diff / QB_TIME_NS_IN_MSEC, (float)timeout_data->max_tv_diff / QB_TIME_NS_IN_MSEC);
}
/*
* Set next threshold, because token_timeout can change
*/
timeout_data->max_tv_diff = timeout_data->totem_config->token_timeout * QB_TIME_NS_IN_MSEC * 0.8;
qb_loop_timer_add (corosync_poll_handle,
QB_LOOP_MED,
timeout_data->totem_config->token_timeout * QB_TIME_NS_IN_MSEC / 3,
timeout_data,
timer_function_scheduler_timeout,
&timeout_data->handle);
}
static void corosync_setscheduler (void)
{
#if defined(HAVE_PTHREAD_SETSCHEDPARAM) && defined(HAVE_SCHED_GET_PRIORITY_MAX) && defined(HAVE_SCHED_SETSCHEDULER)
int res;
sched_priority = sched_get_priority_max (SCHED_RR);
if (sched_priority != -1) {
global_sched_param.sched_priority = sched_priority;
res = sched_setscheduler (0, SCHED_RR, &global_sched_param);
if (res == -1) {
LOGSYS_PERROR(errno, LOGSYS_LEVEL_WARNING,
"Could not set SCHED_RR at priority %d",
global_sched_param.sched_priority);
global_sched_param.sched_priority = 0;
#ifdef HAVE_QB_LOG_THREAD_PRIORITY_SET
qb_log_thread_priority_set (SCHED_OTHER, 0);
#endif
} else {
/*
* Turn on SCHED_RR in logsys system
*/
#ifdef HAVE_QB_LOG_THREAD_PRIORITY_SET
res = qb_log_thread_priority_set (SCHED_RR, sched_priority);
#else
res = -1;
#endif
if (res == -1) {
log_printf (LOGSYS_LEVEL_ERROR,
"Could not set logsys thread priority."
" Can't continue because of priority inversions.");
corosync_exit_error (COROSYNC_DONE_LOGSETUP);
}
}
} else {
LOGSYS_PERROR (errno, LOGSYS_LEVEL_WARNING,
"Could not get maximum scheduler priority");
sched_priority = 0;
}
#else
log_printf(LOGSYS_LEVEL_WARNING,
"The Platform is missing process priority setting features. Leaving at default.");
#endif
}
/* The basename man page contains scary warnings about
thread-safety and portability, hence this */
static const char *corosync_basename(const char *file_name)
{
char *base;
base = strrchr (file_name, '/');
if (base) {
return base + 1;
}
return file_name;
}
static void
_logsys_log_printf(int level, int subsys,
const char *function_name,
const char *file_name,
int file_line,
const char *format,
...) __attribute__((format(printf, 6, 7)));
static void
_logsys_log_printf(int level, int subsys,
const char *function_name,
const char *file_name,
int file_line,
const char *format, ...)
{
va_list ap;
va_start(ap, format);
qb_log_from_external_source_va(function_name, corosync_basename(file_name),
format, level, file_line,
subsys, ap);
va_end(ap);
}
static void fplay_key_change_notify_fn (
int32_t event,
const char *key_name,
struct icmap_notify_value new_val,
struct icmap_notify_value old_val,
void *user_data)
{
if (strcmp(key_name, "runtime.blackbox.dump_flight_data") == 0) {
fprintf(stderr,"Writetofile\n");
corosync_blackbox_write_to_file ();
}
if (strcmp(key_name, "runtime.blackbox.dump_state") == 0) {
fprintf(stderr,"statefump\n");
corosync_state_dump ();
}
}
static void corosync_fplay_control_init (void)
{
icmap_track_t track = NULL;
icmap_set_string("runtime.blackbox.dump_flight_data", "no");
icmap_set_string("runtime.blackbox.dump_state", "no");
icmap_track_add("runtime.blackbox.dump_flight_data",
ICMAP_TRACK_ADD | ICMAP_TRACK_DELETE | ICMAP_TRACK_MODIFY,
fplay_key_change_notify_fn,
NULL, &track);
icmap_track_add("runtime.blackbox.dump_state",
ICMAP_TRACK_ADD | ICMAP_TRACK_DELETE | ICMAP_TRACK_MODIFY,
fplay_key_change_notify_fn,
NULL, &track);
}
/*
* Set RO flag for keys, which ether doesn't make sense to change by user (statistic)
* or which when changed are not reflected by runtime (totem.crypto_cipher, ...).
*
* Also some RO keys cannot be determined in this stage, so they are set later in
* other functions (like nodelist.local_node_pos, ...)
*/
static void set_icmap_ro_keys_flag (void)
{
/*
* Set RO flag for all keys of internal configuration and runtime statistics
*/
icmap_set_ro_access("internal_configuration.", CS_TRUE, CS_TRUE);
icmap_set_ro_access("runtime.connections.", CS_TRUE, CS_TRUE);
icmap_set_ro_access("runtime.totem.", CS_TRUE, CS_TRUE);
icmap_set_ro_access("runtime.services.", CS_TRUE, CS_TRUE);
icmap_set_ro_access("runtime.config.", CS_TRUE, CS_TRUE);
icmap_set_ro_access("uidgid.config.", CS_TRUE, CS_TRUE);
/*
* Set RO flag for constrete keys of configuration which can't be changed
* during runtime
*/
icmap_set_ro_access("totem.crypto_cipher", CS_FALSE, CS_TRUE);
icmap_set_ro_access("totem.crypto_hash", CS_FALSE, CS_TRUE);
icmap_set_ro_access("totem.secauth", CS_FALSE, CS_TRUE);
icmap_set_ro_access("totem.ip_version", CS_FALSE, CS_TRUE);
icmap_set_ro_access("totem.rrp_mode", CS_FALSE, CS_TRUE);
icmap_set_ro_access("totem.transport", CS_FALSE, CS_TRUE);
icmap_set_ro_access("totem.cluster_name", CS_FALSE, CS_TRUE);
icmap_set_ro_access("totem.netmtu", CS_FALSE, CS_TRUE);
icmap_set_ro_access("totem.threads", CS_FALSE, CS_TRUE);
icmap_set_ro_access("totem.version", CS_FALSE, CS_TRUE);
icmap_set_ro_access("totem.nodeid", CS_FALSE, CS_TRUE);
icmap_set_ro_access("totem.clear_node_high_bit", CS_FALSE, CS_TRUE);
icmap_set_ro_access("qb.ipc_type", CS_FALSE, CS_TRUE);
icmap_set_ro_access("config.reload_in_progress", CS_FALSE, CS_TRUE);
icmap_set_ro_access("config.totemconfig_reload_in_progress", CS_FALSE, CS_TRUE);
}
static void main_service_ready (void)
{
int res;
/*
* This must occur after totempg is initialized because "this_ip" must be set
*/
res = corosync_service_defaults_link_and_init (api);
if (res == -1) {
log_printf (LOGSYS_LEVEL_ERROR, "Could not initialize default services");
corosync_exit_error (COROSYNC_DONE_INIT_SERVICES);
}
cs_ipcs_init();
corosync_totem_stats_init ();
corosync_fplay_control_init ();
sync_init (
corosync_sync_callbacks_retrieve,
corosync_sync_completed);
}
static enum e_corosync_done corosync_flock (const char *lockfile, pid_t pid)
{
struct flock lock;
enum e_corosync_done err;
char pid_s[17];
int fd_flag;
int lf;
err = COROSYNC_DONE_EXIT;
lf = open (lockfile, O_WRONLY | O_CREAT, 0640);
if (lf == -1) {
log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't create lock file.");
return (COROSYNC_DONE_ACQUIRE_LOCK);
}
retry_fcntl:
lock.l_type = F_WRLCK;
lock.l_start = 0;
lock.l_whence = SEEK_SET;
lock.l_len = 0;
if (fcntl (lf, F_SETLK, &lock) == -1) {
switch (errno) {
case EINTR:
goto retry_fcntl;
break;
case EAGAIN:
case EACCES:
log_printf (LOGSYS_LEVEL_ERROR, "Another Corosync instance is already running.");
err = COROSYNC_DONE_ALREADY_RUNNING;
goto error_close;
break;
default:
log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't acquire lock. Error was %s",
strerror(errno));
err = COROSYNC_DONE_ACQUIRE_LOCK;
goto error_close;
break;
}
}
if (ftruncate (lf, 0) == -1) {
log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't truncate lock file. Error was %s",
strerror (errno));
err = COROSYNC_DONE_ACQUIRE_LOCK;
goto error_close_unlink;
}
memset (pid_s, 0, sizeof (pid_s));
snprintf (pid_s, sizeof (pid_s) - 1, "%u\n", pid);
retry_write:
if (write (lf, pid_s, strlen (pid_s)) != strlen (pid_s)) {
if (errno == EINTR) {
goto retry_write;
} else {
log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't write pid to lock file. "
"Error was %s", strerror (errno));
err = COROSYNC_DONE_ACQUIRE_LOCK;
goto error_close_unlink;
}
}
if ((fd_flag = fcntl (lf, F_GETFD, 0)) == -1) {
log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't get close-on-exec flag from lock file. "
"Error was %s", strerror (errno));
err = COROSYNC_DONE_ACQUIRE_LOCK;
goto error_close_unlink;
}
fd_flag |= FD_CLOEXEC;
if (fcntl (lf, F_SETFD, fd_flag) == -1) {
log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't set close-on-exec flag to lock file. "
"Error was %s", strerror (errno));
err = COROSYNC_DONE_ACQUIRE_LOCK;
goto error_close_unlink;
}
return (err);
error_close_unlink:
unlink (lockfile);
error_close:
close (lf);
return (err);
}
int main (int argc, char **argv, char **envp)
{
const char *error_string;
struct totem_config totem_config;
int res, ch;
int background, setprio, testonly;
struct stat stat_out;
enum e_corosync_done flock_err;
uint64_t totem_config_warnings;
struct scheduler_pause_timeout_data scheduler_pause_timeout_data;
/* default configuration
*/
background = 1;
setprio = 1;
testonly = 0;
while ((ch = getopt (argc, argv, "fprtv")) != EOF) {
switch (ch) {
case 'f':
background = 0;
break;
case 'p':
setprio = 0;
break;
case 'r':
setprio = 1;
break;
case 't':
testonly = 1;
break;
case 'v':
printf ("Corosync Cluster Engine, version '%s'\n", VERSION);
printf ("Copyright (c) 2006-2009 Red Hat, Inc.\n");
logsys_system_fini();
return EXIT_SUCCESS;
break;
default:
fprintf(stderr, \
"usage:\n"\
" -f : Start application in foreground.\n"\
" -p : Do not set process priority.\n"\
" -t : Test configuration and exit.\n"\
" -r : Set round robin realtime scheduling (default).\n"\
" -v : Display version and SVN revision of Corosync and exit.\n");
logsys_system_fini();
return EXIT_FAILURE;
}
}
/*
* Set round robin realtime scheduling with priority 99
* Lock all memory to avoid page faults which may interrupt
* application healthchecking
*/
if (setprio) {
corosync_setscheduler ();
}
corosync_mlockall ();
/*
* Other signals are registered later via qb_loop_signal_add
*/
(void)signal (SIGSEGV, sigsegv_handler);
(void)signal (SIGABRT, sigabrt_handler);
#if MSG_NOSIGNAL != 0
(void)signal (SIGPIPE, SIG_IGN);
#endif
if (icmap_init() != CS_OK) {
log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't initialize configuration component.");
corosync_exit_error (COROSYNC_DONE_ICMAP);
}
set_icmap_ro_keys_flag();
/*
* Initialize the corosync_api_v1 definition
*/
api = apidef_get ();
res = coroparse_configparse(icmap_get_global_map(), &error_string);
if (res == -1) {
log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string);
corosync_exit_error (COROSYNC_DONE_MAINCONFIGREAD);
}
res = corosync_log_config_read (&error_string);
if (res == -1) {
/*
* if we are here, we _must_ flush the logsys queue
* and try to inform that we couldn't read the config.
* this is a desperate attempt before certain death
* and there is no guarantee that we can print to stderr
* nor that logsys is sending the messages where we expect.
*/
log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string);
fprintf(stderr, "%s", error_string);
syslog (LOGSYS_LEVEL_ERROR, "%s", error_string);
corosync_exit_error (COROSYNC_DONE_LOGCONFIGREAD);
}
if (!testonly) {
log_printf (LOGSYS_LEVEL_NOTICE, "Corosync Cluster Engine ('%s'): started and ready to provide service.", VERSION);
log_printf (LOGSYS_LEVEL_INFO, "Corosync built-in features:" PACKAGE_FEATURES "");
}
/*
* Make sure required directory is present
*/
res = stat (get_run_dir(), &stat_out);
if ((res == -1) || (res == 0 && !S_ISDIR(stat_out.st_mode))) {
log_printf (LOGSYS_LEVEL_ERROR, "Required directory not present %s. Please create it.", get_run_dir());
corosync_exit_error (COROSYNC_DONE_DIR_NOT_PRESENT);
}
res = chdir(get_run_dir());
if (res == -1) {
log_printf (LOGSYS_LEVEL_ERROR, "Cannot chdir to run directory %s. "
"Please make sure it has correct context and rights.", get_run_dir());
corosync_exit_error (COROSYNC_DONE_DIR_NOT_PRESENT);
}
res = totem_config_read (&totem_config, &error_string, &totem_config_warnings);
if (res == -1) {
log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string);
corosync_exit_error (COROSYNC_DONE_MAINCONFIGREAD);
}
if (totem_config_warnings & TOTEM_CONFIG_WARNING_MEMBERS_IGNORED) {
log_printf (LOGSYS_LEVEL_WARNING, "member section is used together with nodelist. Members ignored.");
}
if (totem_config_warnings & TOTEM_CONFIG_WARNING_MEMBERS_DEPRECATED) {
log_printf (LOGSYS_LEVEL_WARNING, "member section is deprecated.");
}
if (totem_config_warnings & TOTEM_CONFIG_WARNING_TOTEM_NODEID_IGNORED) {
log_printf (LOGSYS_LEVEL_WARNING, "nodeid appears both in totem section and nodelist. Nodelist one is used.");
}
if (totem_config_warnings != 0) {
log_printf (LOGSYS_LEVEL_WARNING, "Please migrate config file to nodelist.");
}
res = totem_config_keyread (&totem_config, &error_string);
if (res == -1) {
log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string);
corosync_exit_error (COROSYNC_DONE_MAINCONFIGREAD);
}
res = totem_config_validate (&totem_config, &error_string);
if (res == -1) {
log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string);
corosync_exit_error (COROSYNC_DONE_MAINCONFIGREAD);
}
if (testonly) {
corosync_exit_error (COROSYNC_DONE_EXIT);
}
ip_version = totem_config.ip_version;
totem_config.totem_memb_ring_id_create_or_load = corosync_ring_id_create_or_load;
totem_config.totem_memb_ring_id_store = corosync_ring_id_store;
totem_config.totem_logging_configuration = totem_logging_configuration;
totem_config.totem_logging_configuration.log_subsys_id = _logsys_subsys_create("TOTEM", "totem,"
"totemmrp.c,totemrrp.c,totemip.c,totemconfig.c,totemcrypto.c,totemsrp.c,"
- "totempg.c,totemiba.c,totemudp.c,totemudpu.c,totemnet.c");
+ "totempg.c,totemiba.c,totemudp.c,totemudpu.c,totemnet.c,totemknet.c");
totem_config.totem_logging_configuration.log_level_security = LOGSYS_LEVEL_WARNING;
totem_config.totem_logging_configuration.log_level_error = LOGSYS_LEVEL_ERROR;
totem_config.totem_logging_configuration.log_level_warning = LOGSYS_LEVEL_WARNING;
totem_config.totem_logging_configuration.log_level_notice = LOGSYS_LEVEL_NOTICE;
totem_config.totem_logging_configuration.log_level_debug = LOGSYS_LEVEL_DEBUG;
totem_config.totem_logging_configuration.log_level_trace = LOGSYS_LEVEL_TRACE;
totem_config.totem_logging_configuration.log_printf = _logsys_log_printf;
+
logsys_config_apply();
/*
* Now we are fully initialized.
*/
if (background) {
corosync_tty_detach ();
}
corosync_poll_handle = qb_loop_create ();
memset(&scheduler_pause_timeout_data, 0, sizeof(scheduler_pause_timeout_data));
scheduler_pause_timeout_data.totem_config = &totem_config;
timer_function_scheduler_timeout (&scheduler_pause_timeout_data);
qb_loop_signal_add(corosync_poll_handle, QB_LOOP_LOW,
SIGUSR2, NULL, sig_diag_handler, NULL);
qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH,
SIGINT, NULL, sig_exit_handler, NULL);
qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH,
SIGSEGV, NULL, sig_segv_handler, NULL);
qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH,
SIGABRT, NULL, sig_abrt_handler, NULL);
qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH,
SIGQUIT, NULL, sig_exit_handler, NULL);
qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH,
SIGTERM, NULL, sig_exit_handler, NULL);
if (logsys_thread_start() != 0) {
log_printf (LOGSYS_LEVEL_ERROR, "Can't initialize log thread");
corosync_exit_error (COROSYNC_DONE_LOGCONFIGREAD);
}
if ((flock_err = corosync_flock (corosync_lock_file, getpid ())) != COROSYNC_DONE_EXIT) {
corosync_exit_error (flock_err);
}
/*
* if totempg_initialize doesn't have root priveleges, it cannot
* bind to a specific interface. This only matters if
* there is more then one interface in a system, so
* in this case, only a warning is printed
*/
/*
* Join multicast group and setup delivery
* and configuration change functions
*/
totempg_initialize (
corosync_poll_handle,
&totem_config);
totempg_service_ready_register (
main_service_ready);
totempg_groups_initialize (
&corosync_group_handle,
deliver_fn,
confchg_fn);
totempg_groups_join (
corosync_group_handle,
&corosync_group,
1);
/*
* Drop root privleges to user 'corosync'
* TODO: Don't really need full root capabilities;
* needed capabilities are:
* CAP_NET_RAW (bindtodevice)
* CAP_SYS_NICE (setscheduler)
* CAP_IPC_LOCK (mlockall)
*/
priv_drop ();
schedwrk_init (
serialize_lock,
serialize_unlock);
/*
* Start main processing loop
*/
qb_loop_run (corosync_poll_handle);
/*
* Exit was requested
*/
totempg_finalize ();
/*
* free the loop resources
*/
qb_loop_destroy (corosync_poll_handle);
/*
* free up the icmap
*/
/*
* Remove pid lock file
*/
unlink (corosync_lock_file);
corosync_exit_error (COROSYNC_DONE_EXIT);
return EXIT_SUCCESS;
}
diff --git a/exec/totemconfig.c b/exec/totemconfig.c
index 84a828db..7b44d047 100644
--- a/exec/totemconfig.c
+++ b/exec/totemconfig.c
@@ -1,1660 +1,1653 @@
/*
* Copyright (c) 2002-2005 MontaVista Software, Inc.
* Copyright (c) 2006-2013 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Steven Dake (sdake@redhat.com)
* Jan Friesse (jfriesse@redhat.com)
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <config.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <errno.h>
#include <unistd.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <sys/param.h>
#include <corosync/swab.h>
#include <corosync/list.h>
#include <qb/qbdefs.h>
#include <corosync/totem/totem.h>
#include <corosync/config.h>
#include <corosync/logsys.h>
#include <corosync/icmap.h>
#include "util.h"
#include "totemconfig.h"
#define TOKEN_RETRANSMITS_BEFORE_LOSS_CONST 4
#define TOKEN_TIMEOUT 1000
#define TOKEN_COEFFICIENT 650
#define JOIN_TIMEOUT 50
#define MERGE_TIMEOUT 200
#define DOWNCHECK_TIMEOUT 1000
#define FAIL_TO_RECV_CONST 2500
#define SEQNO_UNCHANGED_CONST 30
#define MINIMUM_TIMEOUT (int)(1000/HZ)*3
#define MAX_NETWORK_DELAY 50
#define WINDOW_SIZE 50
#define MAX_MESSAGES 17
#define MISS_COUNT_CONST 5
-#define RRP_PROBLEM_COUNT_TIMEOUT 2000
-#define RRP_PROBLEM_COUNT_THRESHOLD_DEFAULT 10
-#define RRP_PROBLEM_COUNT_THRESHOLD_MIN 2
-#define RRP_AUTORECOVERY_CHECK_TIMEOUT 1000
+
+/* These currently match the defaults in libknet.h */
+#define KNET_PING_INTERVAL 1000
+#define KNET_PING_TIMEOUT 2000
+#define KNET_PING_PRECISION 2048
#define DEFAULT_PORT 5405
static char error_string_response[512];
static void add_totem_config_notification(struct totem_config *totem_config);
/* All the volatile parameters are uint32s, luckily */
static uint32_t *totem_get_param_by_name(struct totem_config *totem_config, const char *param_name)
{
if (strcmp(param_name, "totem.token") == 0)
return &totem_config->token_timeout;
if (strcmp(param_name, "totem.token_retransmit") == 0)
return &totem_config->token_retransmit_timeout;
if (strcmp(param_name, "totem.hold") == 0)
return &totem_config->token_hold_timeout;
if (strcmp(param_name, "totem.token_retransmits_before_loss_const") == 0)
return &totem_config->token_retransmits_before_loss_const;
if (strcmp(param_name, "totem.join") == 0)
return &totem_config->join_timeout;
if (strcmp(param_name, "totem.send_join") == 0)
return &totem_config->send_join_timeout;
if (strcmp(param_name, "totem.consensus") == 0)
return &totem_config->consensus_timeout;
if (strcmp(param_name, "totem.merge") == 0)
return &totem_config->merge_timeout;
if (strcmp(param_name, "totem.downcheck") == 0)
return &totem_config->downcheck_timeout;
if (strcmp(param_name, "totem.fail_recv_const") == 0)
return &totem_config->fail_to_recv_const;
if (strcmp(param_name, "totem.seqno_unchanged_const") == 0)
return &totem_config->seqno_unchanged_const;
- if (strcmp(param_name, "totem.rrp_token_expired_timeout") == 0)
- return &totem_config->rrp_token_expired_timeout;
- if (strcmp(param_name, "totem.rrp_problem_count_timeout") == 0)
- return &totem_config->rrp_problem_count_timeout;
- if (strcmp(param_name, "totem.rrp_problem_count_threshold") == 0)
- return &totem_config->rrp_problem_count_threshold;
- if (strcmp(param_name, "totem.rrp_problem_count_mcast_threshold") == 0)
- return &totem_config->rrp_problem_count_mcast_threshold;
- if (strcmp(param_name, "totem.rrp_autorecovery_check_timeout") == 0)
- return &totem_config->rrp_autorecovery_check_timeout;
if (strcmp(param_name, "totem.heartbeat_failures_allowed") == 0)
return &totem_config->heartbeat_failures_allowed;
if (strcmp(param_name, "totem.max_network_delay") == 0)
return &totem_config->max_network_delay;
if (strcmp(param_name, "totem.window_size") == 0)
return &totem_config->window_size;
if (strcmp(param_name, "totem.max_messages") == 0)
return &totem_config->max_messages;
if (strcmp(param_name, "totem.miss_count_const") == 0)
return &totem_config->miss_count_const;
return NULL;
}
/*
* Read key_name from icmap. If key is not found or key_name == delete_key or if allow_zero is false
* and readed value is zero, default value is used and stored into totem_config.
*/
static void totem_volatile_config_set_value (struct totem_config *totem_config,
const char *key_name, const char *deleted_key, unsigned int default_value,
int allow_zero_value)
{
char runtime_key_name[ICMAP_KEYNAME_MAXLEN];
if (icmap_get_uint32(key_name, totem_get_param_by_name(totem_config, key_name)) != CS_OK ||
(deleted_key != NULL && strcmp(deleted_key, key_name) == 0) ||
(!allow_zero_value && *totem_get_param_by_name(totem_config, key_name) == 0)) {
*totem_get_param_by_name(totem_config, key_name) = default_value;
}
/*
* Store totem_config value to cmap runtime section
*/
if (strlen("runtime.config.") + strlen(key_name) >= ICMAP_KEYNAME_MAXLEN) {
/*
* This shouldn't happen
*/
return ;
}
strcpy(runtime_key_name, "runtime.config.");
strcat(runtime_key_name, key_name);
icmap_set_uint32(runtime_key_name, *totem_get_param_by_name(totem_config, key_name));
}
/*
* Read and validate config values from cmap and store them into totem_config. If key doesn't exists,
* default value is stored. deleted_key is name of key beeing processed by delete operation
* from cmap. It is considered as non existing even if it can be read. Can be NULL.
*/
static void totem_volatile_config_read (struct totem_config *totem_config, const char *deleted_key)
{
uint32_t u32;
totem_volatile_config_set_value(totem_config, "totem.token_retransmits_before_loss_const", deleted_key,
TOKEN_RETRANSMITS_BEFORE_LOSS_CONST, 0);
totem_volatile_config_set_value(totem_config, "totem.token", deleted_key, TOKEN_TIMEOUT, 0);
if (totem_config->interface_count > 0 && totem_config->interfaces[0].member_count > 2) {
u32 = TOKEN_COEFFICIENT;
icmap_get_uint32("totem.token_coefficient", &u32);
totem_config->token_timeout += (totem_config->interfaces[0].member_count - 2) * u32;
/*
* Store totem_config value to cmap runtime section
*/
icmap_set_uint32("runtime.config.totem.token", totem_config->token_timeout);
}
totem_volatile_config_set_value(totem_config, "totem.max_network_delay", deleted_key, MAX_NETWORK_DELAY, 0);
totem_volatile_config_set_value(totem_config, "totem.window_size", deleted_key, WINDOW_SIZE, 0);
totem_volatile_config_set_value(totem_config, "totem.max_messages", deleted_key, MAX_MESSAGES, 0);
totem_volatile_config_set_value(totem_config, "totem.miss_count_const", deleted_key, MISS_COUNT_CONST, 0);
totem_volatile_config_set_value(totem_config, "totem.token_retransmit", deleted_key,
(int)(totem_config->token_timeout / (totem_config->token_retransmits_before_loss_const + 0.2)), 0);
totem_volatile_config_set_value(totem_config, "totem.hold", deleted_key,
(int)(totem_config->token_retransmit_timeout * 0.8 - (1000/HZ)), 0);
totem_volatile_config_set_value(totem_config, "totem.join", deleted_key, JOIN_TIMEOUT, 0);
totem_volatile_config_set_value(totem_config, "totem.consensus", deleted_key,
(int)(float)(1.2 * totem_config->token_timeout), 0);
totem_volatile_config_set_value(totem_config, "totem.merge", deleted_key, MERGE_TIMEOUT, 0);
totem_volatile_config_set_value(totem_config, "totem.downcheck", deleted_key, DOWNCHECK_TIMEOUT, 0);
totem_volatile_config_set_value(totem_config, "totem.fail_recv_const", deleted_key, FAIL_TO_RECV_CONST, 0);
totem_volatile_config_set_value(totem_config, "totem.seqno_unchanged_const", deleted_key,
SEQNO_UNCHANGED_CONST, 0);
totem_volatile_config_set_value(totem_config, "totem.send_join", deleted_key, 0, 1);
- totem_volatile_config_set_value(totem_config, "totem.rrp_problem_count_timeout", deleted_key,
- RRP_PROBLEM_COUNT_TIMEOUT, 0);
-
- totem_volatile_config_set_value(totem_config, "totem.rrp_problem_count_threshold", deleted_key,
- RRP_PROBLEM_COUNT_THRESHOLD_DEFAULT, 0);
-
- totem_volatile_config_set_value(totem_config, "totem.rrp_problem_count_mcast_threshold", deleted_key,
- totem_config->rrp_problem_count_threshold * 10, 0);
-
- totem_volatile_config_set_value(totem_config, "totem.rrp_token_expired_timeout", deleted_key,
- totem_config->token_retransmit_timeout, 0);
-
- totem_volatile_config_set_value(totem_config, "totem.rrp_autorecovery_check_timeout", deleted_key,
- RRP_AUTORECOVERY_CHECK_TIMEOUT, 0);
-
totem_volatile_config_set_value(totem_config, "totem.heartbeat_failures_allowed", deleted_key, 0, 1);
}
static int totem_volatile_config_validate (
struct totem_config *totem_config,
const char **error_string)
{
static char local_error_reason[512];
const char *error_reason = local_error_reason;
if (totem_config->max_network_delay < MINIMUM_TIMEOUT) {
snprintf (local_error_reason, sizeof(local_error_reason),
"The max_network_delay parameter (%d ms) may not be less than (%d ms).",
totem_config->max_network_delay, MINIMUM_TIMEOUT);
goto parse_error;
}
if (totem_config->token_timeout < MINIMUM_TIMEOUT) {
snprintf (local_error_reason, sizeof(local_error_reason),
"The token timeout parameter (%d ms) may not be less than (%d ms).",
totem_config->token_timeout, MINIMUM_TIMEOUT);
goto parse_error;
}
if (totem_config->token_retransmit_timeout < MINIMUM_TIMEOUT) {
snprintf (local_error_reason, sizeof(local_error_reason),
"The token retransmit timeout parameter (%d ms) may not be less than (%d ms).",
totem_config->token_retransmit_timeout, MINIMUM_TIMEOUT);
goto parse_error;
}
if (totem_config->token_hold_timeout < MINIMUM_TIMEOUT) {
snprintf (local_error_reason, sizeof(local_error_reason),
"The token hold timeout parameter (%d ms) may not be less than (%d ms).",
totem_config->token_hold_timeout, MINIMUM_TIMEOUT);
goto parse_error;
}
if (totem_config->join_timeout < MINIMUM_TIMEOUT) {
snprintf (local_error_reason, sizeof(local_error_reason),
"The join timeout parameter (%d ms) may not be less than (%d ms).",
totem_config->join_timeout, MINIMUM_TIMEOUT);
goto parse_error;
}
if (totem_config->consensus_timeout < MINIMUM_TIMEOUT) {
snprintf (local_error_reason, sizeof(local_error_reason),
"The consensus timeout parameter (%d ms) may not be less than (%d ms).",
totem_config->consensus_timeout, MINIMUM_TIMEOUT);
goto parse_error;
}
if (totem_config->consensus_timeout < totem_config->join_timeout) {
snprintf (local_error_reason, sizeof(local_error_reason),
"The consensus timeout parameter (%d ms) may not be less than join timeout (%d ms).",
totem_config->consensus_timeout, totem_config->join_timeout);
goto parse_error;
}
if (totem_config->merge_timeout < MINIMUM_TIMEOUT) {
snprintf (local_error_reason, sizeof(local_error_reason),
"The merge timeout parameter (%d ms) may not be less than (%d ms).",
totem_config->merge_timeout, MINIMUM_TIMEOUT);
goto parse_error;
}
if (totem_config->downcheck_timeout < MINIMUM_TIMEOUT) {
snprintf (local_error_reason, sizeof(local_error_reason),
"The downcheck timeout parameter (%d ms) may not be less than (%d ms).",
totem_config->downcheck_timeout, MINIMUM_TIMEOUT);
goto parse_error;
}
- if (totem_config->rrp_problem_count_timeout < MINIMUM_TIMEOUT) {
- snprintf (local_error_reason, sizeof(local_error_reason),
- "The RRP problem count timeout parameter (%d ms) may not be less than (%d ms).",
- totem_config->rrp_problem_count_timeout, MINIMUM_TIMEOUT);
- goto parse_error;
- }
-
- if (totem_config->rrp_problem_count_threshold < RRP_PROBLEM_COUNT_THRESHOLD_MIN) {
- snprintf (local_error_reason, sizeof(local_error_reason),
- "The RRP problem count threshold (%d problem count) may not be less than (%d problem count).",
- totem_config->rrp_problem_count_threshold, RRP_PROBLEM_COUNT_THRESHOLD_MIN);
- goto parse_error;
- }
- if (totem_config->rrp_problem_count_mcast_threshold < RRP_PROBLEM_COUNT_THRESHOLD_MIN) {
- snprintf (local_error_reason, sizeof(local_error_reason),
- "The RRP multicast problem count threshold (%d problem count) may not be less than (%d problem count).",
- totem_config->rrp_problem_count_mcast_threshold, RRP_PROBLEM_COUNT_THRESHOLD_MIN);
- goto parse_error;
- }
-
- if (totem_config->rrp_token_expired_timeout < MINIMUM_TIMEOUT) {
- snprintf (local_error_reason, sizeof(local_error_reason),
- "The RRP token expired timeout parameter (%d ms) may not be less than (%d ms).",
- totem_config->rrp_token_expired_timeout, MINIMUM_TIMEOUT);
- goto parse_error;
- }
-
return 0;
parse_error:
snprintf (error_string_response, sizeof(error_string_response),
"parse error in config: %s\n", error_reason);
*error_string = error_string_response;
return (-1);
}
static int totem_get_crypto(struct totem_config *totem_config)
{
char *str;
const char *tmp_cipher;
const char *tmp_hash;
- tmp_hash = "sha1";
- tmp_cipher = "aes256";
-
- if (icmap_get_string("totem.secauth", &str) == CS_OK) {
- if (strcmp (str, "off") == 0) {
- tmp_hash = "none";
- tmp_cipher = "none";
- }
- free(str);
- }
+ tmp_hash = "none";
+ tmp_cipher = "none";
if (icmap_get_string("totem.crypto_cipher", &str) == CS_OK) {
if (strcmp(str, "none") == 0) {
tmp_cipher = "none";
}
if (strcmp(str, "aes256") == 0) {
tmp_cipher = "aes256";
}
if (strcmp(str, "aes192") == 0) {
tmp_cipher = "aes192";
}
if (strcmp(str, "aes128") == 0) {
tmp_cipher = "aes128";
}
+ if (strcmp(str, "aes256") == 0) {
+ tmp_cipher = "aes256";
+ }
if (strcmp(str, "3des") == 0) {
tmp_cipher = "3des";
}
free(str);
}
if (icmap_get_string("totem.crypto_hash", &str) == CS_OK) {
if (strcmp(str, "none") == 0) {
tmp_hash = "none";
}
if (strcmp(str, "md5") == 0) {
tmp_hash = "md5";
}
if (strcmp(str, "sha1") == 0) {
tmp_hash = "sha1";
}
if (strcmp(str, "sha256") == 0) {
tmp_hash = "sha256";
}
if (strcmp(str, "sha384") == 0) {
tmp_hash = "sha384";
}
if (strcmp(str, "sha512") == 0) {
tmp_hash = "sha512";
}
free(str);
}
if ((strcmp(tmp_cipher, "none") != 0) &&
(strcmp(tmp_hash, "none") == 0)) {
return -1;
}
free(totem_config->crypto_cipher_type);
free(totem_config->crypto_hash_type);
totem_config->crypto_cipher_type = strdup(tmp_cipher);
totem_config->crypto_hash_type = strdup(tmp_hash);
return 0;
}
static int totem_config_get_ip_version(void)
{
int res;
char *str;
res = AF_INET;
if (icmap_get_string("totem.ip_version", &str) == CS_OK) {
if (strcmp(str, "ipv4") == 0) {
res = AF_INET;
}
if (strcmp(str, "ipv6") == 0) {
res = AF_INET6;
}
free(str);
}
return (res);
}
static uint16_t generate_cluster_id (const char *cluster_name)
{
int i;
int value = 0;
for (i = 0; i < strlen(cluster_name); i++) {
value <<= 1;
value += cluster_name[i];
}
return (value & 0xFFFF);
}
static int get_cluster_mcast_addr (
const char *cluster_name,
- unsigned int ringnumber,
+ unsigned int linknumber,
int ip_version,
struct totem_ip_address *res)
{
uint16_t clusterid;
char addr[INET6_ADDRSTRLEN + 1];
int err;
if (cluster_name == NULL) {
return (-1);
}
- clusterid = generate_cluster_id(cluster_name) + ringnumber;
+ clusterid = generate_cluster_id(cluster_name) + linknumber;
memset (res, 0, sizeof(*res));
switch (ip_version) {
case AF_INET:
snprintf(addr, sizeof(addr), "239.192.%d.%d", clusterid >> 8, clusterid % 0xFF);
break;
case AF_INET6:
snprintf(addr, sizeof(addr), "ff15::%x", clusterid);
break;
default:
/*
* Unknown family
*/
return (-1);
}
err = totemip_parse (res, addr, ip_version);
return (err);
}
static unsigned int generate_nodeid_for_duplicate_test(
struct totem_config *totem_config,
char *addr)
{
unsigned int nodeid;
struct totem_ip_address totemip;
/* AF_INET hard-coded here because auto-generated nodeids
are only for IPv4 */
if (totemip_parse(&totemip, addr, AF_INET) != 0)
return -1;
memcpy (&nodeid, &totemip.addr, sizeof (unsigned int));
#if __BYTE_ORDER == __LITTLE_ENDIAN
nodeid = swab32 (nodeid);
#endif
if (totem_config->clear_node_high_bit) {
nodeid &= 0x7FFFFFFF;
}
return nodeid;
}
static int check_for_duplicate_nodeids(
struct totem_config *totem_config,
const char **error_string)
{
icmap_iter_t iter;
icmap_iter_t subiter;
const char *iter_key;
int res = 0;
int retval = 0;
char tmp_key[ICMAP_KEYNAME_MAXLEN];
char *ring0_addr=NULL;
char *ring0_addr1=NULL;
unsigned int node_pos;
unsigned int node_pos1;
unsigned int nodeid;
unsigned int nodeid1;
int autogenerated;
iter = icmap_iter_init("nodelist.node.");
while ((iter_key = icmap_iter_next(iter, NULL, NULL)) != NULL) {
res = sscanf(iter_key, "nodelist.node.%u.%s", &node_pos, tmp_key);
if (res != 2) {
continue;
}
if (strcmp(tmp_key, "ring0_addr") != 0) {
continue;
}
snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.nodeid", node_pos);
autogenerated = 0;
if (icmap_get_uint32(tmp_key, &nodeid) != CS_OK) {
snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.ring0_addr", node_pos);
if (icmap_get_string(tmp_key, &ring0_addr) != CS_OK) {
continue;
}
/* Generate nodeid so we can check that auto-generated nodeids don't clash either */
nodeid = generate_nodeid_for_duplicate_test(totem_config, ring0_addr);
if (nodeid == -1) {
continue;
}
autogenerated = 1;
}
node_pos1 = 0;
subiter = icmap_iter_init("nodelist.node.");
while (((iter_key = icmap_iter_next(subiter, NULL, NULL)) != NULL) && (node_pos1 < node_pos)) {
res = sscanf(iter_key, "nodelist.node.%u.%s", &node_pos1, tmp_key);
if ((res != 2) || (node_pos1 >= node_pos)) {
continue;
}
if (strcmp(tmp_key, "ring0_addr") != 0) {
continue;
}
snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.nodeid", node_pos1);
if (icmap_get_uint32(tmp_key, &nodeid1) != CS_OK) {
snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.ring0_addr", node_pos1);
if (icmap_get_string(tmp_key, &ring0_addr1) != CS_OK) {
continue;
}
nodeid1 = generate_nodeid_for_duplicate_test(totem_config, ring0_addr1);
if (nodeid1 == -1) {
continue;
}
}
if (nodeid == nodeid1) {
retval = -1;
snprintf (error_string_response, sizeof(error_string_response),
"Nodeid %u%s%s%s appears twice in corosync.conf", nodeid,
autogenerated?"(autogenerated from ":"",
autogenerated?ring0_addr:"",
autogenerated?")":"");
log_printf (LOGSYS_LEVEL_ERROR, error_string_response);
*error_string = error_string_response;
break;
}
}
icmap_iter_finalize(subiter);
}
icmap_iter_finalize(iter);
return retval;
}
static int find_local_node_in_nodelist(struct totem_config *totem_config)
{
icmap_iter_t iter;
const char *iter_key;
int res = 0;
unsigned int node_pos;
int local_node_pos = -1;
struct totem_ip_address bind_addr;
int interface_up, interface_num;
char tmp_key[ICMAP_KEYNAME_MAXLEN];
char *node_addr_str;
struct totem_ip_address node_addr;
res = totemip_iface_check(&totem_config->interfaces[0].bindnet,
&bind_addr, &interface_up, &interface_num,
totem_config->clear_node_high_bit);
if (res == -1) {
return (-1);
}
iter = icmap_iter_init("nodelist.node.");
while ((iter_key = icmap_iter_next(iter, NULL, NULL)) != NULL) {
res = sscanf(iter_key, "nodelist.node.%u.%s", &node_pos, tmp_key);
if (res != 2) {
continue;
}
if (strcmp(tmp_key, "ring0_addr") != 0) {
continue;
}
snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.ring0_addr", node_pos);
if (icmap_get_string(tmp_key, &node_addr_str) != CS_OK) {
continue;
}
res = totemip_parse (&node_addr, node_addr_str, totem_config->ip_version);
free(node_addr_str);
if (res == -1) {
continue ;
}
if (totemip_equal(&bind_addr, &node_addr)) {
local_node_pos = node_pos;
}
}
icmap_iter_finalize(iter);
return (local_node_pos);
}
/*
* Compute difference between two set of totem interface arrays. set1 and set2
* are changed so for same ring, ip existing in both set1 and set2 are cleared
* (set to 0), and ips which are only in set1 or set2 remains untouched.
* totempg_node_add/remove is called.
*/
static void compute_interfaces_diff(int interface_count,
struct totem_interface *set1,
struct totem_interface *set2)
{
int ring_no, set1_pos, set2_pos;
struct totem_ip_address empty_ip_address;
memset(&empty_ip_address, 0, sizeof(empty_ip_address));
for (ring_no = 0; ring_no < interface_count; ring_no++) {
for (set1_pos = 0; set1_pos < set1[ring_no].member_count; set1_pos++) {
for (set2_pos = 0; set2_pos < set2[ring_no].member_count; set2_pos++) {
/*
* For current ring_no remove all set1 items existing
* in set2
*/
if (memcmp(&set1[ring_no].member_list[set1_pos],
&set2[ring_no].member_list[set2_pos],
sizeof(struct totem_ip_address)) == 0) {
memset(&set1[ring_no].member_list[set1_pos], 0,
sizeof(struct totem_ip_address));
memset(&set2[ring_no].member_list[set2_pos], 0,
sizeof(struct totem_ip_address));
}
}
}
}
for (ring_no = 0; ring_no < interface_count; ring_no++) {
for (set1_pos = 0; set1_pos < set1[ring_no].member_count; set1_pos++) {
/*
* All items which remained in set1 doesn't exists in set2 any longer so
* node has to be removed.
*/
if (memcmp(&set1[ring_no].member_list[set1_pos], &empty_ip_address, sizeof(empty_ip_address)) != 0) {
log_printf(LOGSYS_LEVEL_DEBUG,
"removing dynamic member %s for ring %u",
totemip_print(&set1[ring_no].member_list[set1_pos]),
ring_no);
totempg_member_remove(&set1[ring_no].member_list[set1_pos], ring_no);
}
}
for (set2_pos = 0; set2_pos < set2[ring_no].member_count; set2_pos++) {
/*
* All items which remained in set2 doesn't existed in set1 so this is no node
* and has to be added.
*/
if (memcmp(&set2[ring_no].member_list[set2_pos], &empty_ip_address, sizeof(empty_ip_address)) != 0) {
log_printf(LOGSYS_LEVEL_DEBUG,
"adding dynamic member %s for ring %u",
totemip_print(&set2[ring_no].member_list[set2_pos]),
ring_no);
totempg_member_add(&set2[ring_no].member_list[set2_pos], ring_no);
}
}
}
}
static void put_nodelist_members_to_config(struct totem_config *totem_config, int reload)
{
icmap_iter_t iter, iter2;
const char *iter_key, *iter_key2;
int res = 0;
unsigned int node_pos;
char tmp_key[ICMAP_KEYNAME_MAXLEN];
char tmp_key2[ICMAP_KEYNAME_MAXLEN];
char *node_addr_str;
int member_count;
- unsigned int ringnumber = 0;
+ unsigned int linknumber = 0;
int i, j;
struct totem_interface *orig_interfaces = NULL;
struct totem_interface *new_interfaces = NULL;
if (reload) {
/*
* We need to compute diff only for reload. Also for initial configuration
* not all totem structures are initialized so corosync will crash during
* member_add/remove
*/
orig_interfaces = malloc (sizeof (struct totem_interface) * INTERFACE_MAX);
assert(orig_interfaces != NULL);
new_interfaces = malloc (sizeof (struct totem_interface) * INTERFACE_MAX);
assert(new_interfaces != NULL);
memcpy(orig_interfaces, totem_config->interfaces, sizeof (struct totem_interface) * INTERFACE_MAX);
}
/* Clear out nodelist so we can put the new one in if needed */
for (i = 0; i < totem_config->interface_count; i++) {
for (j = 0; j < PROCESSOR_COUNT_MAX; j++) {
memset(&totem_config->interfaces[i].member_list[j], 0, sizeof(struct totem_ip_address));
}
totem_config->interfaces[i].member_count = 0;
}
iter = icmap_iter_init("nodelist.node.");
while ((iter_key = icmap_iter_next(iter, NULL, NULL)) != NULL) {
res = sscanf(iter_key, "nodelist.node.%u.%s", &node_pos, tmp_key);
if (res != 2) {
continue;
}
if (strcmp(tmp_key, "ring0_addr") != 0) {
continue;
}
snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.", node_pos);
iter2 = icmap_iter_init(tmp_key);
while ((iter_key2 = icmap_iter_next(iter2, NULL, NULL)) != NULL) {
- res = sscanf(iter_key2, "nodelist.node.%u.ring%u%s", &node_pos, &ringnumber, tmp_key2);
+ unsigned int nodeid;
+
+ snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.nodeid", node_pos);
+ if (icmap_get_uint32(tmp_key, &nodeid) != CS_OK) {
+ }
+
+ res = sscanf(iter_key2, "nodelist.node.%u.ring%u%s", &node_pos, &linknumber, tmp_key2);
if (res != 3 || strcmp(tmp_key2, "_addr") != 0) {
continue;
}
if (icmap_get_string(iter_key2, &node_addr_str) != CS_OK) {
continue;
}
- member_count = totem_config->interfaces[ringnumber].member_count;
+ member_count = totem_config->interfaces[linknumber].member_count;
- res = totemip_parse(&totem_config->interfaces[ringnumber].member_list[member_count],
+ res = totemip_parse(&totem_config->interfaces[linknumber].member_list[member_count],
node_addr_str, totem_config->ip_version);
if (res != -1) {
- totem_config->interfaces[ringnumber].member_count++;
+ totem_config->interfaces[linknumber].member_list[member_count].nodeid = nodeid;
+ totem_config->interfaces[linknumber].member_count++;
}
free(node_addr_str);
}
icmap_iter_finalize(iter2);
}
icmap_iter_finalize(iter);
if (reload) {
memcpy(new_interfaces, totem_config->interfaces, sizeof (struct totem_interface) * INTERFACE_MAX);
compute_interfaces_diff(totem_config->interface_count, orig_interfaces, new_interfaces);
free(new_interfaces);
free(orig_interfaces);
}
}
static void nodelist_dynamic_notify(
int32_t event,
const char *key_name,
struct icmap_notify_value new_val,
struct icmap_notify_value old_val,
void *user_data)
{
int res;
unsigned int ring_no;
unsigned int member_no;
char tmp_str[ICMAP_KEYNAME_MAXLEN];
uint8_t reloading;
struct totem_config *totem_config = (struct totem_config *)user_data;
/*
* If a full reload is in progress then don't do anything until it's done and
* can reconfigure it all atomically
*/
if (icmap_get_uint8("config.totemconfig_reload_in_progress", &reloading) == CS_OK && reloading) {
return ;
}
res = sscanf(key_name, "nodelist.node.%u.ring%u%s", &member_no, &ring_no, tmp_str);
if (res != 3)
return ;
if (strcmp(tmp_str, "_addr") != 0) {
return;
}
put_nodelist_members_to_config(totem_config, 1);
}
/*
* Tries to find node (node_pos) in config nodelist which address matches any
* local interface. Address can be stored in ring0_addr or if ipaddr_key_prefix is not NULL
* key with prefix ipaddr_key is used (there can be multiuple of them)
* This function differs * from find_local_node_in_nodelist because it doesn't need bindnetaddr,
* but doesn't work when bind addr is network address (so IP must be exact
* match).
*
* Returns 1 on success (address was found, node_pos is then correctly set) or 0 on failure.
*/
int totem_config_find_local_addr_in_nodelist(const char *ipaddr_key_prefix, unsigned int *node_pos)
{
struct list_head addrs;
struct totem_ip_if_address *if_addr;
icmap_iter_t iter, iter2;
const char *iter_key, *iter_key2;
struct list_head *list;
const char *ipaddr_key;
int ip_version;
struct totem_ip_address node_addr;
char *node_addr_str;
int node_found = 0;
int res = 0;
char tmp_key[ICMAP_KEYNAME_MAXLEN];
if (totemip_getifaddrs(&addrs) == -1) {
return 0;
}
ip_version = totem_config_get_ip_version();
iter = icmap_iter_init("nodelist.node.");
while ((iter_key = icmap_iter_next(iter, NULL, NULL)) != NULL) {
res = sscanf(iter_key, "nodelist.node.%u.%s", node_pos, tmp_key);
if (res != 2) {
continue;
}
if (strcmp(tmp_key, "ring0_addr") != 0) {
continue;
}
if (icmap_get_string(iter_key, &node_addr_str) != CS_OK) {
continue ;
}
free(node_addr_str);
/*
* ring0_addr found -> let's iterate thru ipaddr_key_prefix
*/
snprintf(tmp_key, sizeof(tmp_key), "nodelist.node.%u.%s", *node_pos,
(ipaddr_key_prefix != NULL ? ipaddr_key_prefix : "ring0_addr"));
iter2 = icmap_iter_init(tmp_key);
while ((iter_key2 = icmap_iter_next(iter2, NULL, NULL)) != NULL) {
/*
* ring0_addr must be exact match, not prefix
*/
ipaddr_key = (ipaddr_key_prefix != NULL ? iter_key2 : tmp_key);
if (icmap_get_string(ipaddr_key, &node_addr_str) != CS_OK) {
continue ;
}
if (totemip_parse(&node_addr, node_addr_str, ip_version) == -1) {
free(node_addr_str);
continue ;
}
free(node_addr_str);
/*
* Try to match ip with if_addrs
*/
node_found = 0;
for (list = addrs.next; list != &addrs; list = list->next) {
if_addr = list_entry(list, struct totem_ip_if_address, list);
if (totemip_equal(&node_addr, &if_addr->ip_addr)) {
node_found = 1;
break;
}
}
if (node_found) {
break ;
}
}
icmap_iter_finalize(iter2);
if (node_found) {
break ;
}
}
icmap_iter_finalize(iter);
totemip_freeifaddrs(&addrs);
return (node_found);
}
static void config_convert_nodelist_to_interface(struct totem_config *totem_config)
{
int res = 0;
unsigned int node_pos;
char tmp_key[ICMAP_KEYNAME_MAXLEN];
char tmp_key2[ICMAP_KEYNAME_MAXLEN];
char *node_addr_str;
- unsigned int ringnumber = 0;
+ unsigned int linknumber = 0;
icmap_iter_t iter;
const char *iter_key;
if (totem_config_find_local_addr_in_nodelist(NULL, &node_pos)) {
/*
* We found node, so create interface section
*/
snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.", node_pos);
iter = icmap_iter_init(tmp_key);
while ((iter_key = icmap_iter_next(iter, NULL, NULL)) != NULL) {
- res = sscanf(iter_key, "nodelist.node.%u.ring%u%s", &node_pos, &ringnumber, tmp_key2);
+ res = sscanf(iter_key, "nodelist.node.%u.ring%u%s", &node_pos, &linknumber, tmp_key2);
if (res != 3 || strcmp(tmp_key2, "_addr") != 0) {
continue ;
}
if (icmap_get_string(iter_key, &node_addr_str) != CS_OK) {
continue;
}
- snprintf(tmp_key2, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.bindnetaddr", ringnumber);
+ snprintf(tmp_key2, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.bindnetaddr", linknumber);
icmap_set_string(tmp_key2, node_addr_str);
free(node_addr_str);
}
icmap_iter_finalize(iter);
}
}
extern int totem_config_read (
struct totem_config *totem_config,
const char **error_string,
uint64_t *warnings)
{
int res = 0;
char *str;
- unsigned int ringnumber = 0;
+ unsigned int linknumber = 0;
int member_count = 0;
icmap_iter_t iter, member_iter;
const char *iter_key;
const char *member_iter_key;
- char ringnumber_key[ICMAP_KEYNAME_MAXLEN];
+ char linknumber_key[ICMAP_KEYNAME_MAXLEN];
char tmp_key[ICMAP_KEYNAME_MAXLEN];
uint8_t u8;
uint16_t u16;
+ uint32_t u32;
char *cluster_name = NULL;
int i;
int local_node_pos;
int nodeid_set;
*warnings = 0;
memset (totem_config, 0, sizeof (struct totem_config));
totem_config->interfaces = malloc (sizeof (struct totem_interface) * INTERFACE_MAX);
if (totem_config->interfaces == 0) {
*error_string = "Out of memory trying to allocate ethernet interface storage area";
return -1;
}
memset (totem_config->interfaces, 0,
sizeof (struct totem_interface) * INTERFACE_MAX);
- strcpy (totem_config->rrp_mode, "none");
+ strcpy (totem_config->link_mode, "passive");
icmap_get_uint32("totem.version", (uint32_t *)&totem_config->version);
if (totem_get_crypto(totem_config) != 0) {
*error_string = "crypto_cipher requires crypto_hash with value other than none";
return -1;
}
- if (icmap_get_string("totem.rrp_mode", &str) == CS_OK) {
- if (strlen(str) >= TOTEM_RRP_MODE_BYTES) {
- *error_string = "totem.rrp_mode is too long";
+ if (icmap_get_string("totem.link_mode", &str) == CS_OK) {
+ if (strlen(str) >= TOTEM_LINK_MODE_BYTES) {
+ *error_string = "totem.link_mode is too long";
free(str);
return -1;
}
- strcpy (totem_config->rrp_mode, str);
+ strcpy (totem_config->link_mode, str);
free(str);
}
icmap_get_uint32("totem.nodeid", &totem_config->node_id);
totem_config->clear_node_high_bit = 0;
if (icmap_get_string("totem.clear_node_high_bit", &str) == CS_OK) {
if (strcmp (str, "yes") == 0) {
totem_config->clear_node_high_bit = 1;
}
free(str);
}
icmap_get_uint32("totem.threads", &totem_config->threads);
icmap_get_uint32("totem.netmtu", &totem_config->net_mtu);
if (icmap_get_string("totem.cluster_name", &cluster_name) != CS_OK) {
cluster_name = NULL;
}
totem_config->ip_version = totem_config_get_ip_version();
if (icmap_get_string("totem.interface.0.bindnetaddr", &str) != CS_OK) {
/*
* We were not able to find ring 0 bindnet addr. Try to use nodelist informations
*/
config_convert_nodelist_to_interface(totem_config);
} else {
free(str);
}
/*
* Broadcast option is global but set in interface section,
* so reset before processing interfaces.
*/
totem_config->broadcast_use = 0;
iter = icmap_iter_init("totem.interface.");
while ((iter_key = icmap_iter_next(iter, NULL, NULL)) != NULL) {
- res = sscanf(iter_key, "totem.interface.%[^.].%s", ringnumber_key, tmp_key);
+ res = sscanf(iter_key, "totem.interface.%[^.].%s", linknumber_key, tmp_key);
if (res != 2) {
continue;
}
if (strcmp(tmp_key, "bindnetaddr") != 0) {
continue;
}
member_count = 0;
- ringnumber = atoi(ringnumber_key);
+ linknumber = atoi(linknumber_key);
- if (ringnumber >= INTERFACE_MAX) {
+ if (linknumber >= INTERFACE_MAX) {
free(cluster_name);
snprintf (error_string_response, sizeof(error_string_response),
"parse error in config: interface ring number %u is bigger than allowed maximum %u\n",
- ringnumber, INTERFACE_MAX - 1);
+ linknumber, INTERFACE_MAX - 1);
*error_string = error_string_response;
return -1;
}
/*
* Get the bind net address
*/
if (icmap_get_string(iter_key, &str) == CS_OK) {
- res = totemip_parse (&totem_config->interfaces[ringnumber].bindnet, str,
+ res = totemip_parse (&totem_config->interfaces[linknumber].bindnet, str,
totem_config->ip_version);
free(str);
}
/*
* Get interface multicast address
*/
- snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.mcastaddr", ringnumber);
+ snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.mcastaddr", linknumber);
if (icmap_get_string(tmp_key, &str) == CS_OK) {
- res = totemip_parse (&totem_config->interfaces[ringnumber].mcast_addr, str, totem_config->ip_version);
+ res = totemip_parse (&totem_config->interfaces[linknumber].mcast_addr, str, totem_config->ip_version);
free(str);
} else {
/*
* User not specified address -> autogenerate one from cluster_name key
* (if available). Return code is intentionally ignored, because
* udpu doesn't need mcastaddr and validity of mcastaddr for udp is
* checked later anyway.
*/
(void)get_cluster_mcast_addr (cluster_name,
- ringnumber,
+ linknumber,
totem_config->ip_version,
- &totem_config->interfaces[ringnumber].mcast_addr);
+ &totem_config->interfaces[linknumber].mcast_addr);
}
- snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.broadcast", ringnumber);
+ snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.broadcast", linknumber);
if (icmap_get_string(tmp_key, &str) == CS_OK) {
if (strcmp (str, "yes") == 0) {
totem_config->broadcast_use = 1;
}
free(str);
}
/*
* Get mcast port
*/
- snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.mcastport", ringnumber);
- if (icmap_get_uint16(tmp_key, &totem_config->interfaces[ringnumber].ip_port) != CS_OK) {
+ snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.mcastport", linknumber);
+ if (icmap_get_uint16(tmp_key, &totem_config->interfaces[linknumber].ip_port) != CS_OK) {
if (totem_config->broadcast_use) {
- totem_config->interfaces[ringnumber].ip_port = DEFAULT_PORT + (2 * ringnumber);
+ totem_config->interfaces[linknumber].ip_port = DEFAULT_PORT + (2 * linknumber);
} else {
- totem_config->interfaces[ringnumber].ip_port = DEFAULT_PORT;
+ totem_config->interfaces[linknumber].ip_port = DEFAULT_PORT;
}
}
/*
* Get the TTL
*/
- totem_config->interfaces[ringnumber].ttl = 1;
+ totem_config->interfaces[linknumber].ttl = 1;
+
+ snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.ttl", linknumber);
+
+ if (icmap_get_uint8(tmp_key, &u8) == CS_OK) {
+ totem_config->interfaces[linknumber].ttl = u8;
+ }
- snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.ttl", ringnumber);
+ /*
+ * Get the knet link params
+ */
+ totem_config->interfaces[linknumber].knet_link_priority = 1;
+ snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.knet_link_priority", linknumber);
if (icmap_get_uint8(tmp_key, &u8) == CS_OK) {
- totem_config->interfaces[ringnumber].ttl = u8;
+ totem_config->interfaces[linknumber].knet_link_priority = u8;
+ }
+
+ totem_config->interfaces[linknumber].knet_ping_interval = KNET_PING_INTERVAL;
+ snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.knet_ping_interval", linknumber);
+ if (icmap_get_uint32(tmp_key, &u32) == CS_OK) {
+ totem_config->interfaces[linknumber].knet_ping_interval = u32;
+ }
+ totem_config->interfaces[linknumber].knet_ping_timeout = KNET_PING_TIMEOUT;
+ snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.knet_ping_timeout", linknumber);
+ if (icmap_get_uint32(tmp_key, &u32) == CS_OK) {
+ totem_config->interfaces[linknumber].knet_ping_timeout = u32;
+ }
+ totem_config->interfaces[linknumber].knet_ping_precision = KNET_PING_PRECISION;
+ snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.knet_ping_precision", linknumber);
+ if (icmap_get_uint32(tmp_key, &u32) == CS_OK) {
+ totem_config->interfaces[linknumber].knet_ping_precision = u32;
}
- snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.member.", ringnumber);
+ snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.member.", linknumber);
member_iter = icmap_iter_init(tmp_key);
while ((member_iter_key = icmap_iter_next(member_iter, NULL, NULL)) != NULL) {
if (member_count == 0) {
if (icmap_get_string("nodelist.node.0.ring0_addr", &str) == CS_OK) {
free(str);
*warnings |= TOTEM_CONFIG_WARNING_MEMBERS_IGNORED;
break;
} else {
*warnings |= TOTEM_CONFIG_WARNING_MEMBERS_DEPRECATED;
}
}
if (icmap_get_string(member_iter_key, &str) == CS_OK) {
- res = totemip_parse (&totem_config->interfaces[ringnumber].member_list[member_count++],
+ res = totemip_parse (&totem_config->interfaces[linknumber].member_list[member_count++],
str, totem_config->ip_version);
}
}
icmap_iter_finalize(member_iter);
- totem_config->interfaces[ringnumber].member_count = member_count;
+ totem_config->interfaces[linknumber].member_count = member_count;
totem_config->interface_count++;
}
icmap_iter_finalize(iter);
/*
* Use broadcast is global, so if set, make sure to fill mcast addr correctly
*/
if (totem_config->broadcast_use) {
- for (ringnumber = 0; ringnumber < totem_config->interface_count; ringnumber++) {
- totemip_parse (&totem_config->interfaces[ringnumber].mcast_addr,
+ for (linknumber = 0; linknumber < totem_config->interface_count; linknumber++) {
+ totemip_parse (&totem_config->interfaces[linknumber].mcast_addr,
"255.255.255.255", 0);
}
}
/*
* Store automatically generated items back to icmap
*/
for (i = 0; i < totem_config->interface_count; i++) {
snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.mcastaddr", i);
if (icmap_get_string(tmp_key, &str) == CS_OK) {
free(str);
} else {
str = (char *)totemip_print(&totem_config->interfaces[i].mcast_addr);
icmap_set_string(tmp_key, str);
}
snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.mcastport", i);
if (icmap_get_uint16(tmp_key, &u16) != CS_OK) {
icmap_set_uint16(tmp_key, totem_config->interfaces[i].ip_port);
}
}
- totem_config->transport_number = TOTEM_TRANSPORT_UDP;
+ totem_config->transport_number = TOTEM_TRANSPORT_KNET;
if (icmap_get_string("totem.transport", &str) == CS_OK) {
if (strcmp (str, "udpu") == 0) {
totem_config->transport_number = TOTEM_TRANSPORT_UDPU;
}
- if (strcmp (str, "iba") == 0) {
- totem_config->transport_number = TOTEM_TRANSPORT_RDMA;
+ if (strcmp (str, "udp") == 0) {
+ totem_config->transport_number = TOTEM_TRANSPORT_UDP;
+ }
+
+ if (strcmp (str, "knet") == 0) {
+ totem_config->transport_number = TOTEM_TRANSPORT_KNET;
}
+
free(str);
}
free(cluster_name);
/*
* Check existence of nodelist
*/
if (icmap_get_string("nodelist.node.0.ring0_addr", &str) == CS_OK) {
free(str);
/*
* find local node
*/
local_node_pos = find_local_node_in_nodelist(totem_config);
if (local_node_pos != -1) {
icmap_set_uint32("nodelist.local_node_pos", local_node_pos);
snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.nodeid", local_node_pos);
nodeid_set = (totem_config->node_id != 0);
if (icmap_get_uint32(tmp_key, &totem_config->node_id) == CS_OK && nodeid_set) {
*warnings |= TOTEM_CONFIG_WARNING_TOTEM_NODEID_IGNORED;
}
/*
* Make localnode ring0_addr read only, so we can be sure that local
* node never changes. If rebinding to other IP would be in future
* supported, this must be changed and handled properly!
*/
snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.ring0_addr", local_node_pos);
icmap_set_ro_access(tmp_key, 0, 1);
icmap_set_ro_access("nodelist.local_node_pos", 0, 1);
}
put_nodelist_members_to_config(totem_config, 0);
}
/*
* Get things that might change in the future (and can depend on totem_config->interfaces);
*/
totem_volatile_config_read(totem_config, NULL);
icmap_set_uint8("config.totemconfig_reload_in_progress", 0);
add_totem_config_notification(totem_config);
return 0;
}
int totem_config_validate (
struct totem_config *totem_config,
const char **error_string)
{
static char local_error_reason[512];
char parse_error[512];
const char *error_reason = local_error_reason;
int i, j;
unsigned int interface_max = INTERFACE_MAX;
unsigned int port1, port2;
if (totem_config->interface_count == 0) {
error_reason = "No interfaces defined";
goto parse_error;
}
for (i = 0; i < totem_config->interface_count; i++) {
/*
* Some error checking of parsed data to make sure its valid
*/
struct totem_ip_address null_addr;
memset (&null_addr, 0, sizeof (struct totem_ip_address));
if ((totem_config->transport_number == 0) &&
memcmp (&totem_config->interfaces[i].mcast_addr, &null_addr,
sizeof (struct totem_ip_address)) == 0) {
error_reason = "No multicast address specified";
goto parse_error;
}
if (totem_config->interfaces[i].ip_port == 0) {
error_reason = "No multicast port specified";
goto parse_error;
}
if (totem_config->interfaces[i].ttl > 255) {
error_reason = "Invalid TTL (should be 0..255)";
goto parse_error;
}
if (totem_config->transport_number != TOTEM_TRANSPORT_UDP &&
totem_config->interfaces[i].ttl != 1) {
error_reason = "Can only set ttl on multicast transport types";
goto parse_error;
}
+ if (totem_config->interfaces[i].knet_link_priority > 255) {
+ error_reason = "Invalid link priority (should be 0..255)";
+ goto parse_error;
+ }
+ if (totem_config->transport_number != TOTEM_TRANSPORT_KNET &&
+ totem_config->interfaces[i].knet_link_priority != 1) {
+ error_reason = "Can only set link priority on knet transport type";
+ goto parse_error;
+ }
if (totem_config->interfaces[i].mcast_addr.family == AF_INET6 &&
totem_config->node_id == 0) {
error_reason = "An IPV6 network requires that a node ID be specified.";
goto parse_error;
}
if (totem_config->broadcast_use == 0 && totem_config->transport_number == TOTEM_TRANSPORT_UDP) {
if (totem_config->interfaces[i].mcast_addr.family != totem_config->interfaces[i].bindnet.family) {
error_reason = "Multicast address family does not match bind address family";
goto parse_error;
}
if (totemip_is_mcast (&totem_config->interfaces[i].mcast_addr) != 0) {
error_reason = "mcastaddr is not a correct multicast address.";
goto parse_error;
}
}
if (totem_config->interfaces[0].bindnet.family != totem_config->interfaces[i].bindnet.family) {
error_reason = "Not all bind address belong to the same IP family";
goto parse_error;
}
/*
* Ensure mcast address/port differs
*/
if (totem_config->transport_number == TOTEM_TRANSPORT_UDP) {
for (j = i + 1; j < totem_config->interface_count; j++) {
port1 = totem_config->interfaces[i].ip_port;
port2 = totem_config->interfaces[j].ip_port;
if (totemip_equal(&totem_config->interfaces[i].mcast_addr,
&totem_config->interfaces[j].mcast_addr) &&
(((port1 > port2 ? port1 : port2) - (port1 < port2 ? port1 : port2)) <= 1)) {
error_reason = "Interfaces multicast address/port pair must differ";
goto parse_error;
}
}
}
}
if (totem_config->version != 2) {
error_reason = "This totem parser can only parse version 2 configurations.";
goto parse_error;
}
if (totem_volatile_config_validate(totem_config, error_string) == -1) {
return (-1);
}
if (check_for_duplicate_nodeids(totem_config, error_string) == -1) {
return (-1);
}
/*
- * RRP values validation
+ * KNET Link values validation
*/
- if (strcmp (totem_config->rrp_mode, "none") &&
- strcmp (totem_config->rrp_mode, "active") &&
- strcmp (totem_config->rrp_mode, "passive")) {
+ if (strcmp (totem_config->link_mode, "active") &&
+ strcmp (totem_config->link_mode, "rr") &&
+ strcmp (totem_config->link_mode, "passive")) {
snprintf (local_error_reason, sizeof(local_error_reason),
- "The RRP mode \"%s\" specified is invalid. It must be none, active, or passive.\n", totem_config->rrp_mode);
+ "The Knet link mode \"%s\" specified is invalid. It must be active, passive or rr.\n", totem_config->link_mode);
goto parse_error;
}
- if (strcmp (totem_config->rrp_mode, "none") == 0) {
+ /* Only Knet does multiple interfaces */
+ if (totem_config->transport_number != TOTEM_TRANSPORT_KNET) {
interface_max = 1;
}
+
if (interface_max < totem_config->interface_count) {
snprintf (parse_error, sizeof(parse_error),
- "%d is too many configured interfaces for the rrp_mode setting %s.",
- totem_config->interface_count,
- totem_config->rrp_mode);
+ "%d is too many configured interfaces for non-Knet transport.",
+ totem_config->interface_count);
error_reason = parse_error;
goto parse_error;
}
+ /* Only knet allows crypto */
+ if (totem_config->transport_number != TOTEM_TRANSPORT_KNET) {
+ if ((strcmp(totem_config->crypto_cipher_type, "none") != 0) ||
+ (strcmp(totem_config->crypto_hash_type, "none") != 0)) {
+
+ snprintf (parse_error, sizeof(parse_error),
+ "crypto_cipher & crypto_hash are only valid for the Knet transport.");
+ error_reason = parse_error;
+ goto parse_error;
+ }
+ }
+
if (totem_config->net_mtu == 0) {
totem_config->net_mtu = 1500;
}
return 0;
parse_error:
snprintf (error_string_response, sizeof(error_string_response),
"parse error in config: %s\n", error_reason);
*error_string = error_string_response;
return (-1);
}
static int read_keyfile (
const char *key_location,
struct totem_config *totem_config,
const char **error_string)
{
int fd;
int res;
ssize_t expected_key_len = sizeof (totem_config->private_key);
int saved_errno;
char error_str[100];
const char *error_ptr;
fd = open (key_location, O_RDONLY);
if (fd == -1) {
error_ptr = qb_strerror_r(errno, error_str, sizeof(error_str));
snprintf (error_string_response, sizeof(error_string_response),
"Could not open %s: %s\n",
key_location, error_ptr);
goto parse_error;
}
res = read (fd, totem_config->private_key, expected_key_len);
saved_errno = errno;
close (fd);
if (res == -1) {
error_ptr = qb_strerror_r (saved_errno, error_str, sizeof(error_str));
snprintf (error_string_response, sizeof(error_string_response),
"Could not read %s: %s\n",
key_location, error_ptr);
goto parse_error;
}
totem_config->private_key_len = expected_key_len;
if (res != expected_key_len) {
snprintf (error_string_response, sizeof(error_string_response),
"Could only read %d bits of 1024 bits from %s.\n",
res * 8, key_location);
goto parse_error;
}
return 0;
parse_error:
*error_string = error_string_response;
return (-1);
}
int totem_config_keyread (
struct totem_config *totem_config,
const char **error_string)
{
int got_key = 0;
char *key_location = NULL;
int res;
size_t key_len;
memset (totem_config->private_key, 0, 128);
totem_config->private_key_len = 128;
if (strcmp(totem_config->crypto_cipher_type, "none") == 0 &&
strcmp(totem_config->crypto_hash_type, "none") == 0) {
return (0);
}
/* cmap may store the location of the key file */
if (icmap_get_string("totem.keyfile", &key_location) == CS_OK) {
res = read_keyfile(key_location, totem_config, error_string);
free(key_location);
if (res) {
goto key_error;
}
got_key = 1;
} else { /* Or the key itself may be in the cmap */
if (icmap_get("totem.key", NULL, &key_len, NULL) == CS_OK) {
if (key_len > sizeof (totem_config->private_key)) {
sprintf(error_string_response, "key is too long");
goto key_error;
}
if (icmap_get("totem.key", totem_config->private_key, &key_len, NULL) == CS_OK) {
totem_config->private_key_len = key_len;
got_key = 1;
} else {
sprintf(error_string_response, "can't store private key");
goto key_error;
}
}
}
/* In desperation we read the default filename */
if (!got_key) {
const char *filename = getenv("COROSYNC_TOTEM_AUTHKEY_FILE");
if (!filename)
filename = COROSYSCONFDIR "/authkey";
res = read_keyfile(filename, totem_config, error_string);
if (res)
goto key_error;
}
return (0);
key_error:
*error_string = error_string_response;
return (-1);
}
static void debug_dump_totem_config(const struct totem_config *totem_config)
{
log_printf(LOGSYS_LEVEL_DEBUG, "Token Timeout (%d ms) retransmit timeout (%d ms)",
totem_config->token_timeout, totem_config->token_retransmit_timeout);
log_printf(LOGSYS_LEVEL_DEBUG, "token hold (%d ms) retransmits before loss (%d retrans)",
totem_config->token_hold_timeout, totem_config->token_retransmits_before_loss_const);
log_printf(LOGSYS_LEVEL_DEBUG, "join (%d ms) send_join (%d ms) consensus (%d ms) merge (%d ms)",
totem_config->join_timeout, totem_config->send_join_timeout, totem_config->consensus_timeout,
totem_config->merge_timeout);
log_printf(LOGSYS_LEVEL_DEBUG, "downcheck (%d ms) fail to recv const (%d msgs)",
totem_config->downcheck_timeout, totem_config->fail_to_recv_const);
log_printf(LOGSYS_LEVEL_DEBUG,
"seqno unchanged const (%d rotations) Maximum network MTU %d",
totem_config->seqno_unchanged_const, totem_config->net_mtu);
log_printf(LOGSYS_LEVEL_DEBUG,
"window size per rotation (%d messages) maximum messages per rotation (%d messages)",
totem_config->window_size, totem_config->max_messages);
log_printf(LOGSYS_LEVEL_DEBUG, "missed count const (%d messages)", totem_config->miss_count_const);
- log_printf(LOGSYS_LEVEL_DEBUG, "RRP token expired timeout (%d ms)",
- totem_config->rrp_token_expired_timeout);
- log_printf(LOGSYS_LEVEL_DEBUG, "RRP token problem counter (%d ms)",
- totem_config->rrp_problem_count_timeout);
- log_printf(LOGSYS_LEVEL_DEBUG, "RRP threshold (%d problem count)",
- totem_config->rrp_problem_count_threshold);
- log_printf(LOGSYS_LEVEL_DEBUG, "RRP multicast threshold (%d problem count)",
- totem_config->rrp_problem_count_mcast_threshold);
- log_printf(LOGSYS_LEVEL_DEBUG, "RRP automatic recovery check timeout (%d ms)",
- totem_config->rrp_autorecovery_check_timeout);
- log_printf(LOGSYS_LEVEL_DEBUG, "RRP mode set to %s.",
- totem_config->rrp_mode);
log_printf(LOGSYS_LEVEL_DEBUG, "heartbeat_failures_allowed (%d)",
totem_config->heartbeat_failures_allowed);
log_printf(LOGSYS_LEVEL_DEBUG, "max_network_delay (%d ms)", totem_config->max_network_delay);
}
static void totem_change_notify(
int32_t event,
const char *key_name,
struct icmap_notify_value new_val,
struct icmap_notify_value old_val,
void *user_data)
{
struct totem_config *totem_config = (struct totem_config *)user_data;
uint32_t *param;
uint8_t reloading;
const char *deleted_key = NULL;
const char *error_string;
/*
* If a full reload is in progress then don't do anything until it's done and
* can reconfigure it all atomically
*/
if (icmap_get_uint8("config.reload_in_progress", &reloading) == CS_OK && reloading)
return;
param = totem_get_param_by_name((struct totem_config *)user_data, key_name);
/*
* Process change only if changed key is found in totem_config (-> param is not NULL)
* or for special key token_coefficient. token_coefficient key is not stored in
* totem_config, but it is used for computation of token timeout.
*/
if (!param && strcmp(key_name, "totem.token_coefficient") != 0)
return;
/*
* Values other than UINT32 are not supported, or needed (yet)
*/
switch (event) {
case ICMAP_TRACK_DELETE:
deleted_key = key_name;
break;
case ICMAP_TRACK_ADD:
case ICMAP_TRACK_MODIFY:
deleted_key = NULL;
break;
default:
break;
}
totem_volatile_config_read (totem_config, deleted_key);
log_printf(LOGSYS_LEVEL_DEBUG, "Totem related config key changed. Dumping actual totem config.");
debug_dump_totem_config(totem_config);
if (totem_volatile_config_validate(totem_config, &error_string) == -1) {
log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string);
/*
* TODO: Consider corosync exit and/or load defaults for volatile
* values. For now, log error seems to be enough
*/
}
}
static void totem_reload_notify(
int32_t event,
const char *key_name,
struct icmap_notify_value new_val,
struct icmap_notify_value old_val,
void *user_data)
{
struct totem_config *totem_config = (struct totem_config *)user_data;
uint32_t local_node_pos;
const char *error_string;
/* Reload has completed */
if (*(uint8_t *)new_val.data == 0) {
put_nodelist_members_to_config (totem_config, 1);
totem_volatile_config_read (totem_config, NULL);
log_printf(LOGSYS_LEVEL_DEBUG, "Configuration reloaded. Dumping actual totem config.");
debug_dump_totem_config(totem_config);
if (totem_volatile_config_validate(totem_config, &error_string) == -1) {
log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string);
/*
* TODO: Consider corosync exit and/or load defaults for volatile
* values. For now, log error seems to be enough
*/
}
/* Reinstate the local_node_pos */
local_node_pos = find_local_node_in_nodelist(totem_config);
if (local_node_pos != -1) {
icmap_set_uint32("nodelist.local_node_pos", local_node_pos);
}
icmap_set_uint8("config.totemconfig_reload_in_progress", 0);
} else {
icmap_set_uint8("config.totemconfig_reload_in_progress", 1);
}
}
static void add_totem_config_notification(struct totem_config *totem_config)
{
icmap_track_t icmap_track;
icmap_track_add("totem.",
ICMAP_TRACK_ADD | ICMAP_TRACK_DELETE | ICMAP_TRACK_MODIFY | ICMAP_TRACK_PREFIX,
totem_change_notify,
totem_config,
&icmap_track);
icmap_track_add("config.reload_in_progress",
ICMAP_TRACK_ADD | ICMAP_TRACK_MODIFY,
totem_reload_notify,
totem_config,
&icmap_track);
icmap_track_add("nodelist.node.",
ICMAP_TRACK_ADD | ICMAP_TRACK_DELETE | ICMAP_TRACK_MODIFY | ICMAP_TRACK_PREFIX,
nodelist_dynamic_notify,
(void *)totem_config,
&icmap_track);
}
diff --git a/exec/totemcrypto.c b/exec/totemcrypto.c
deleted file mode 100644
index a97ba62f..00000000
--- a/exec/totemcrypto.c
+++ /dev/null
@@ -1,831 +0,0 @@
-/*
- * Copyright (c) 2006-2012 Red Hat, Inc.
- *
- * All rights reserved.
- *
- * Author: Steven Dake (sdake@redhat.com)
- * Christine Caulfield (ccaulfie@redhat.com)
- * Jan Friesse (jfriesse@redhat.com)
- * Fabio M. Di Nitto (fdinitto@redhat.com)
- *
- * This software licensed under BSD license, the text of which follows:
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * - Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- * - Neither the name of the MontaVista Software, Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from this
- * software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
- * THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "config.h"
-
-#include <nss.h>
-#include <pk11pub.h>
-#include <pkcs11.h>
-#include <prerror.h>
-#include <blapit.h>
-#include <hasht.h>
-
-#define LOGSYS_UTILS_ONLY 1
-#include <corosync/logsys.h>
-#include <corosync/totem/totem.h>
-#include "totemcrypto.h"
-
-/*
- * define onwire crypto header
- */
-
-struct crypto_config_header {
- uint8_t crypto_cipher_type;
- uint8_t crypto_hash_type;
- uint8_t __pad0;
- uint8_t __pad1;
-} __attribute__((packed));
-
-/*
- * crypto definitions and conversion tables
- */
-
-#define SALT_SIZE 16
-
-/*
- * This are defined in new NSS. For older one, we will define our own
- */
-#ifndef AES_256_KEY_LENGTH
-#define AES_256_KEY_LENGTH 32
-#endif
-
-#ifndef AES_192_KEY_LENGTH
-#define AES_192_KEY_LENGTH 24
-#endif
-
-#ifndef AES_128_KEY_LENGTH
-#define AES_128_KEY_LENGTH 16
-#endif
-
-/*
- * while CRYPTO_CIPHER_TYPE_2_X are not a real cipher at all,
- * we still allocate a value for them because we use crypto_crypt_t
- * internally and we don't want overlaps
- */
-
-enum crypto_crypt_t {
- CRYPTO_CIPHER_TYPE_NONE = 0,
- CRYPTO_CIPHER_TYPE_AES256 = 1,
- CRYPTO_CIPHER_TYPE_AES192 = 2,
- CRYPTO_CIPHER_TYPE_AES128 = 3,
- CRYPTO_CIPHER_TYPE_3DES = 4,
- CRYPTO_CIPHER_TYPE_2_3 = UINT8_MAX - 1,
- CRYPTO_CIPHER_TYPE_2_2 = UINT8_MAX
-};
-
-CK_MECHANISM_TYPE cipher_to_nss[] = {
- 0, /* CRYPTO_CIPHER_TYPE_NONE */
- CKM_AES_CBC_PAD, /* CRYPTO_CIPHER_TYPE_AES256 */
- CKM_AES_CBC_PAD, /* CRYPTO_CIPHER_TYPE_AES192 */
- CKM_AES_CBC_PAD, /* CRYPTO_CIPHER_TYPE_AES128 */
- CKM_DES3_CBC_PAD /* CRYPTO_CIPHER_TYPE_3DES */
-};
-
-size_t cipher_key_len[] = {
- 0, /* CRYPTO_CIPHER_TYPE_NONE */
- AES_256_KEY_LENGTH, /* CRYPTO_CIPHER_TYPE_AES256 */
- AES_192_KEY_LENGTH, /* CRYPTO_CIPHER_TYPE_AES192 */
- AES_128_KEY_LENGTH, /* CRYPTO_CIPHER_TYPE_AES128 */
- 24 /* CRYPTO_CIPHER_TYPE_3DES - no magic in nss headers */
-};
-
-size_t cypher_block_len[] = {
- 0, /* CRYPTO_CIPHER_TYPE_NONE */
- AES_BLOCK_SIZE, /* CRYPTO_CIPHER_TYPE_AES256 */
- AES_BLOCK_SIZE, /* CRYPTO_CIPHER_TYPE_AES192 */
- AES_BLOCK_SIZE, /* CRYPTO_CIPHER_TYPE_AES128 */
- 0 /* CRYPTO_CIPHER_TYPE_3DES */
-};
-
-/*
- * hash definitions and conversion tables
- */
-
-/*
- * while CRYPTO_HASH_TYPE_2_X are not a real hash mechanism at all,
- * we still allocate a value for them because we use crypto_hash_t
- * internally and we don't want overlaps
- */
-
-enum crypto_hash_t {
- CRYPTO_HASH_TYPE_NONE = 0,
- CRYPTO_HASH_TYPE_MD5 = 1,
- CRYPTO_HASH_TYPE_SHA1 = 2,
- CRYPTO_HASH_TYPE_SHA256 = 3,
- CRYPTO_HASH_TYPE_SHA384 = 4,
- CRYPTO_HASH_TYPE_SHA512 = 5,
- CRYPTO_HASH_TYPE_2_3 = UINT8_MAX - 1,
- CRYPTO_HASH_TYPE_2_2 = UINT8_MAX
-};
-
-CK_MECHANISM_TYPE hash_to_nss[] = {
- 0, /* CRYPTO_HASH_TYPE_NONE */
- CKM_MD5_HMAC, /* CRYPTO_HASH_TYPE_MD5 */
- CKM_SHA_1_HMAC, /* CRYPTO_HASH_TYPE_SHA1 */
- CKM_SHA256_HMAC, /* CRYPTO_HASH_TYPE_SHA256 */
- CKM_SHA384_HMAC, /* CRYPTO_HASH_TYPE_SHA384 */
- CKM_SHA512_HMAC /* CRYPTO_HASH_TYPE_SHA512 */
-};
-
-size_t hash_len[] = {
- 0, /* CRYPTO_HASH_TYPE_NONE */
- MD5_LENGTH, /* CRYPTO_HASH_TYPE_MD5 */
- SHA1_LENGTH, /* CRYPTO_HASH_TYPE_SHA1 */
- SHA256_LENGTH, /* CRYPTO_HASH_TYPE_SHA256 */
- SHA384_LENGTH, /* CRYPTO_HASH_TYPE_SHA384 */
- SHA512_LENGTH /* CRYPTO_HASH_TYPE_SHA512 */
-};
-
-size_t hash_block_len[] = {
- 0, /* CRYPTO_HASH_TYPE_NONE */
- MD5_BLOCK_LENGTH, /* CRYPTO_HASH_TYPE_MD5 */
- SHA1_BLOCK_LENGTH, /* CRYPTO_HASH_TYPE_SHA1 */
- SHA256_BLOCK_LENGTH, /* CRYPTO_HASH_TYPE_SHA256 */
- SHA384_BLOCK_LENGTH, /* CRYPTO_HASH_TYPE_SHA384 */
- SHA512_BLOCK_LENGTH /* CRYPTO_HASH_TYPE_SHA512 */
-};
-
-struct crypto_instance {
- PK11SymKey *nss_sym_key;
- PK11SymKey *nss_sym_key_sign;
-
- unsigned char private_key[1024];
-
- unsigned int private_key_len;
-
- enum crypto_crypt_t crypto_cipher_type;
-
- enum crypto_hash_t crypto_hash_type;
-
- unsigned int crypto_header_size;
-
- void (*log_printf_func) (
- int level,
- int subsys,
- const char *function,
- const char *file,
- int line,
- const char *format,
- ...)__attribute__((format(printf, 6, 7)));
-
- int log_level_security;
- int log_level_notice;
- int log_level_error;
- int log_subsys_id;
-};
-
-#define log_printf(level, format, args...) \
-do { \
- instance->log_printf_func ( \
- level, instance->log_subsys_id, \
- __FUNCTION__, __FILE__, __LINE__, \
- (const char *)format, ##args); \
-} while (0);
-
-/*
- * crypt/decrypt functions
- */
-
-static int string_to_crypto_cipher_type(const char* crypto_cipher_type)
-{
- if (strcmp(crypto_cipher_type, "none") == 0) {
- return CRYPTO_CIPHER_TYPE_NONE;
- } else if (strcmp(crypto_cipher_type, "aes256") == 0) {
- return CRYPTO_CIPHER_TYPE_AES256;
- } else if (strcmp(crypto_cipher_type, "aes192") == 0) {
- return CRYPTO_CIPHER_TYPE_AES192;
- } else if (strcmp(crypto_cipher_type, "aes128") == 0) {
- return CRYPTO_CIPHER_TYPE_AES128;
- } else if (strcmp(crypto_cipher_type, "3des") == 0) {
- return CRYPTO_CIPHER_TYPE_3DES;
- }
- return CRYPTO_CIPHER_TYPE_AES256;
-}
-
-static int init_nss_crypto(struct crypto_instance *instance)
-{
- PK11SlotInfo* crypt_slot = NULL;
- SECItem crypt_param;
-
- if (!cipher_to_nss[instance->crypto_cipher_type]) {
- return 0;
- }
-
- crypt_param.type = siBuffer;
- crypt_param.data = instance->private_key;
- crypt_param.len = cipher_key_len[instance->crypto_cipher_type];
-
- crypt_slot = PK11_GetBestSlot(cipher_to_nss[instance->crypto_cipher_type], NULL);
- if (crypt_slot == NULL) {
- log_printf(instance->log_level_security, "Unable to find security slot (err %d)",
- PR_GetError());
- return -1;
- }
-
- instance->nss_sym_key = PK11_ImportSymKey(crypt_slot,
- cipher_to_nss[instance->crypto_cipher_type],
- PK11_OriginUnwrap, CKA_ENCRYPT|CKA_DECRYPT,
- &crypt_param, NULL);
- if (instance->nss_sym_key == NULL) {
- log_printf(instance->log_level_security, "Failure to import key into NSS (err %d)",
- PR_GetError());
- return -1;
- }
-
- PK11_FreeSlot(crypt_slot);
-
- return 0;
-}
-
-static int encrypt_nss(
- struct crypto_instance *instance,
- const unsigned char *buf_in,
- const size_t buf_in_len,
- unsigned char *buf_out,
- size_t *buf_out_len)
-{
- PK11Context* crypt_context = NULL;
- SECItem crypt_param;
- SECItem *nss_sec_param = NULL;
- int tmp1_outlen = 0;
- unsigned int tmp2_outlen = 0;
- unsigned char *salt = buf_out;
- unsigned char *data = buf_out + SALT_SIZE;
- int err = -1;
-
- if (!cipher_to_nss[instance->crypto_cipher_type]) {
- memcpy(buf_out, buf_in, buf_in_len);
- *buf_out_len = buf_in_len;
- return 0;
- }
-
- if (PK11_GenerateRandom (salt, SALT_SIZE) != SECSuccess) {
- log_printf(instance->log_level_security,
- "Failure to generate a random number %d",
- PR_GetError());
- goto out;
- }
-
- crypt_param.type = siBuffer;
- crypt_param.data = salt;
- crypt_param.len = SALT_SIZE;
-
- nss_sec_param = PK11_ParamFromIV (cipher_to_nss[instance->crypto_cipher_type],
- &crypt_param);
- if (nss_sec_param == NULL) {
- log_printf(instance->log_level_security,
- "Failure to set up PKCS11 param (err %d)",
- PR_GetError());
- goto out;
- }
-
- /*
- * Create cipher context for encryption
- */
- crypt_context = PK11_CreateContextBySymKey (cipher_to_nss[instance->crypto_cipher_type],
- CKA_ENCRYPT,
- instance->nss_sym_key,
- nss_sec_param);
- if (!crypt_context) {
- log_printf(instance->log_level_security,
- "PK11_CreateContext failed (encrypt) crypt_type=%d (err %d)",
- (int)cipher_to_nss[instance->crypto_cipher_type],
- PR_GetError());
- goto out;
- }
-
- if (PK11_CipherOp(crypt_context, data,
- &tmp1_outlen,
- FRAME_SIZE_MAX - instance->crypto_header_size,
- (unsigned char *)buf_in, buf_in_len) != SECSuccess) {
- log_printf(instance->log_level_security,
- "PK11_CipherOp failed (encrypt) crypt_type=%d (err %d)",
- (int)cipher_to_nss[instance->crypto_cipher_type],
- PR_GetError());
- goto out;
- }
-
- if (PK11_DigestFinal(crypt_context, data + tmp1_outlen,
- &tmp2_outlen, FRAME_SIZE_MAX - tmp1_outlen) != SECSuccess) {
- log_printf(instance->log_level_security,
- "PK11_DigestFinal failed (encrypt) crypt_type=%d (err %d)",
- (int)cipher_to_nss[instance->crypto_cipher_type],
- PR_GetError());
- goto out;
-
- }
-
- *buf_out_len = tmp1_outlen + tmp2_outlen + SALT_SIZE;
-
- err = 0;
-
-out:
- if (crypt_context) {
- PK11_DestroyContext(crypt_context, PR_TRUE);
- }
- if (nss_sec_param) {
- SECITEM_FreeItem(nss_sec_param, PR_TRUE);
- }
- return err;
-}
-
-static int decrypt_nss (
- struct crypto_instance *instance,
- unsigned char *buf,
- int *buf_len)
-{
- PK11Context* decrypt_context = NULL;
- SECItem decrypt_param;
- int tmp1_outlen = 0;
- unsigned int tmp2_outlen = 0;
- unsigned char *salt = buf;
- unsigned char *data = salt + SALT_SIZE;
- int datalen = *buf_len - SALT_SIZE;
- unsigned char outbuf[FRAME_SIZE_MAX];
- int outbuf_len;
- int err = -1;
-
- if (!cipher_to_nss[instance->crypto_cipher_type]) {
- return 0;
- }
-
- /* Create cipher context for decryption */
- decrypt_param.type = siBuffer;
- decrypt_param.data = salt;
- decrypt_param.len = SALT_SIZE;
-
- decrypt_context = PK11_CreateContextBySymKey(cipher_to_nss[instance->crypto_cipher_type],
- CKA_DECRYPT,
- instance->nss_sym_key, &decrypt_param);
- if (!decrypt_context) {
- log_printf(instance->log_level_security,
- "PK11_CreateContext (decrypt) failed (err %d)",
- PR_GetError());
- goto out;
- }
-
- if (PK11_CipherOp(decrypt_context, outbuf, &tmp1_outlen,
- sizeof(outbuf), data, datalen) != SECSuccess) {
- log_printf(instance->log_level_security,
- "PK11_CipherOp (decrypt) failed (err %d)",
- PR_GetError());
- goto out;
- }
-
- if (PK11_DigestFinal(decrypt_context, outbuf + tmp1_outlen, &tmp2_outlen,
- sizeof(outbuf) - tmp1_outlen) != SECSuccess) {
- log_printf(instance->log_level_security,
- "PK11_DigestFinal (decrypt) failed (err %d)",
- PR_GetError());
- goto out;
- }
-
- outbuf_len = tmp1_outlen + tmp2_outlen;
-
- memset(buf, 0, *buf_len);
- memcpy(buf, outbuf, outbuf_len);
-
- *buf_len = outbuf_len;
-
- err = 0;
-
-out:
- if (decrypt_context) {
- PK11_DestroyContext(decrypt_context, PR_TRUE);
- }
-
- return err;
-}
-
-
-/*
- * hash/hmac/digest functions
- */
-
-static int string_to_crypto_hash_type(const char* crypto_hash_type)
-{
- if (strcmp(crypto_hash_type, "none") == 0) {
- return CRYPTO_HASH_TYPE_NONE;
- } else if (strcmp(crypto_hash_type, "md5") == 0) {
- return CRYPTO_HASH_TYPE_MD5;
- } else if (strcmp(crypto_hash_type, "sha1") == 0) {
- return CRYPTO_HASH_TYPE_SHA1;
- } else if (strcmp(crypto_hash_type, "sha256") == 0) {
- return CRYPTO_HASH_TYPE_SHA256;
- } else if (strcmp(crypto_hash_type, "sha384") == 0) {
- return CRYPTO_HASH_TYPE_SHA384;
- } else if (strcmp(crypto_hash_type, "sha512") == 0) {
- return CRYPTO_HASH_TYPE_SHA512;
- }
-
- return CRYPTO_HASH_TYPE_SHA1;
-}
-
-static int init_nss_hash(struct crypto_instance *instance)
-{
- PK11SlotInfo* hash_slot = NULL;
- SECItem hash_param;
-
- if (!hash_to_nss[instance->crypto_hash_type]) {
- return 0;
- }
-
- hash_param.type = siBuffer;
- hash_param.data = instance->private_key;
- hash_param.len = instance->private_key_len;
-
- hash_slot = PK11_GetBestSlot(hash_to_nss[instance->crypto_hash_type], NULL);
- if (hash_slot == NULL) {
- log_printf(instance->log_level_security, "Unable to find security slot (err %d)",
- PR_GetError());
- return -1;
- }
-
- instance->nss_sym_key_sign = PK11_ImportSymKey(hash_slot,
- hash_to_nss[instance->crypto_hash_type],
- PK11_OriginUnwrap, CKA_SIGN,
- &hash_param, NULL);
- if (instance->nss_sym_key_sign == NULL) {
- log_printf(instance->log_level_security, "Failure to import key into NSS (err %d)",
- PR_GetError());
- return -1;
- }
-
- PK11_FreeSlot(hash_slot);
-
- return 0;
-}
-
-static int calculate_nss_hash(
- struct crypto_instance *instance,
- const unsigned char *buf,
- const size_t buf_len,
- unsigned char *hash)
-{
- PK11Context* hash_context = NULL;
- SECItem hash_param;
- unsigned int hash_tmp_outlen = 0;
- unsigned char hash_block[hash_block_len[instance->crypto_hash_type]];
- int err = -1;
-
- /* Now do the digest */
- hash_param.type = siBuffer;
- hash_param.data = 0;
- hash_param.len = 0;
-
- hash_context = PK11_CreateContextBySymKey(hash_to_nss[instance->crypto_hash_type],
- CKA_SIGN,
- instance->nss_sym_key_sign,
- &hash_param);
-
- if (!hash_context) {
- log_printf(instance->log_level_security,
- "PK11_CreateContext failed (hash) hash_type=%d (err %d)",
- (int)hash_to_nss[instance->crypto_hash_type],
- PR_GetError());
- goto out;
- }
-
- if (PK11_DigestBegin(hash_context) != SECSuccess) {
- log_printf(instance->log_level_security,
- "PK11_DigestBegin failed (hash) hash_type=%d (err %d)",
- (int)hash_to_nss[instance->crypto_hash_type],
- PR_GetError());
- goto out;
- }
-
- if (PK11_DigestOp(hash_context,
- buf,
- buf_len) != SECSuccess) {
- log_printf(instance->log_level_security,
- "PK11_DigestOp failed (hash) hash_type=%d (err %d)",
- (int)hash_to_nss[instance->crypto_hash_type],
- PR_GetError());
- goto out;
- }
-
- if (PK11_DigestFinal(hash_context,
- hash_block,
- &hash_tmp_outlen,
- hash_block_len[instance->crypto_hash_type]) != SECSuccess) {
- log_printf(instance->log_level_security,
- "PK11_DigestFinale failed (hash) hash_type=%d (err %d)",
- (int)hash_to_nss[instance->crypto_hash_type],
- PR_GetError());
- goto out;
- }
-
- memcpy(hash, hash_block, hash_len[instance->crypto_hash_type]);
- err = 0;
-
-out:
- if (hash_context) {
- PK11_DestroyContext(hash_context, PR_TRUE);
- }
-
- return err;
-}
-
-/*
- * global/glue nss functions
- */
-
-static int init_nss_db(struct crypto_instance *instance)
-{
- if ((!cipher_to_nss[instance->crypto_cipher_type]) &&
- (!hash_to_nss[instance->crypto_hash_type])) {
- return 0;
- }
-
- if (NSS_NoDB_Init(".") != SECSuccess) {
- log_printf(instance->log_level_security, "NSS DB initialization failed (err %d)",
- PR_GetError());
- return -1;
- }
-
- return 0;
-}
-
-static int init_nss(struct crypto_instance *instance,
- const char *crypto_cipher_type,
- const char *crypto_hash_type)
-{
- log_printf(instance->log_level_notice,
- "Initializing transmit/receive security (NSS) crypto: %s hash: %s",
- crypto_cipher_type, crypto_hash_type);
-
- if (init_nss_db(instance) < 0) {
- return -1;
- }
-
- if (init_nss_crypto(instance) < 0) {
- return -1;
- }
-
- if (init_nss_hash(instance) < 0) {
- return -1;
- }
-
- return 0;
-}
-
-static int encrypt_and_sign_nss_2_3 (
- struct crypto_instance *instance,
- const unsigned char *buf_in,
- const size_t buf_in_len,
- unsigned char *buf_out,
- size_t *buf_out_len)
-{
- if (encrypt_nss(instance,
- buf_in, buf_in_len,
- buf_out + sizeof(struct crypto_config_header), buf_out_len) < 0) {
- return -1;
- }
-
- *buf_out_len += sizeof(struct crypto_config_header);
-
- if (hash_to_nss[instance->crypto_hash_type]) {
- if (calculate_nss_hash(instance, buf_out, *buf_out_len, buf_out + *buf_out_len) < 0) {
- return -1;
- }
- *buf_out_len += hash_len[instance->crypto_hash_type];
- }
-
- return 0;
-}
-
-static int authenticate_nss_2_3 (
- struct crypto_instance *instance,
- unsigned char *buf,
- int *buf_len)
-{
- if (hash_to_nss[instance->crypto_hash_type]) {
- unsigned char tmp_hash[hash_len[instance->crypto_hash_type]];
- int datalen = *buf_len - hash_len[instance->crypto_hash_type];
-
- if (calculate_nss_hash(instance, buf, datalen, tmp_hash) < 0) {
- return -1;
- }
-
- if (memcmp(tmp_hash, buf + datalen, hash_len[instance->crypto_hash_type]) != 0) {
- log_printf(instance->log_level_error, "Digest does not match");
- return -1;
- }
- *buf_len = datalen;
- }
-
- return 0;
-}
-
-static int decrypt_nss_2_3 (
- struct crypto_instance *instance,
- unsigned char *buf,
- int *buf_len)
-{
- *buf_len -= sizeof(struct crypto_config_header);
-
- if (decrypt_nss(instance, buf + sizeof(struct crypto_config_header), buf_len) < 0) {
- return -1;
- }
-
- return 0;
-}
-
-/*
- * exported API
- */
-
-size_t crypto_sec_header_size(
- const char *crypto_cipher_type,
- const char *crypto_hash_type)
-{
- int crypto_cipher = string_to_crypto_cipher_type(crypto_cipher_type);
- int crypto_hash = string_to_crypto_hash_type(crypto_hash_type);
- size_t hdr_size = 0;
- int block_size = 0;
-
- hdr_size = sizeof(struct crypto_config_header);
-
- if (crypto_hash) {
- hdr_size += hash_len[crypto_hash];
- }
-
- if (crypto_cipher) {
- hdr_size += SALT_SIZE;
- if (cypher_block_len[crypto_cipher]) {
- block_size = cypher_block_len[crypto_cipher];
- } else {
- block_size = PK11_GetBlockSize(crypto_cipher, NULL);
- if (block_size < 0) {
- /*
- * failsafe. we can potentially lose up to 63
- * byte per packet, but better than fragmenting
- */
- block_size = 64;
- }
- }
- hdr_size += (block_size * 2);
- }
-
- return hdr_size;
-}
-
-/*
- * 2.0 packet format:
- * crypto_cipher_type | crypto_hash_type | __pad0 | __pad1 | hash | salt | data
- * only data is encrypted, hash only covers salt + data
- *
- * 2.2/2.3 packet format
- * fake_crypto_cipher_type | fake_crypto_hash_type | __pad0 | __pad1 | salt | data | hash
- * only data is encrypted, hash covers the whole packet
- *
- * we need to leave fake_* unencrypted for older versions of corosync to reject the packets,
- * we need to leave __pad0|1 unencrypted for performance reasons (saves at least 2 memcpy and
- * and extra buffer but values are hashed and verified.
- */
-
-int crypto_encrypt_and_sign (
- struct crypto_instance *instance,
- const unsigned char *buf_in,
- const size_t buf_in_len,
- unsigned char *buf_out,
- size_t *buf_out_len)
-{
- struct crypto_config_header *cch = (struct crypto_config_header *)buf_out;
- int err;
-
- cch->crypto_cipher_type = CRYPTO_CIPHER_TYPE_2_3;
- cch->crypto_hash_type = CRYPTO_HASH_TYPE_2_3;
- cch->__pad0 = 0;
- cch->__pad1 = 0;
-
- err = encrypt_and_sign_nss_2_3(instance,
- buf_in, buf_in_len,
- buf_out, buf_out_len);
-
- return err;
-}
-
-int crypto_authenticate_and_decrypt (struct crypto_instance *instance,
- unsigned char *buf,
- int *buf_len)
-{
- struct crypto_config_header *cch = (struct crypto_config_header *)buf;
-
- if (cch->crypto_cipher_type != CRYPTO_CIPHER_TYPE_2_3) {
- log_printf(instance->log_level_security,
- "Incoming packet has different crypto type. Rejecting");
- return -1;
- }
-
- if (cch->crypto_hash_type != CRYPTO_HASH_TYPE_2_3) {
- log_printf(instance->log_level_security,
- "Incoming packet has different hash type. Rejecting");
- return -1;
- }
-
- /*
- * authenticate packet first
- */
-
- if (authenticate_nss_2_3(instance, buf, buf_len) != 0) {
- return -1;
- }
-
- /*
- * now we can "trust" the padding bytes/future features
- */
-
- if ((cch->__pad0 != 0) || (cch->__pad1 != 0)) {
- log_printf(instance->log_level_security,
- "Incoming packet appears to have features not supported by this version of corosync. Rejecting");
- return -1;
- }
-
- /*
- * decrypt
- */
-
- if (decrypt_nss_2_3(instance, buf, buf_len) != 0) {
- return -1;
- }
-
- /*
- * invalidate config header and kill it
- */
- cch = NULL;
- memmove(buf, buf + sizeof(struct crypto_config_header), *buf_len);
-
- return 0;
-}
-
-struct crypto_instance *crypto_init(
- const unsigned char *private_key,
- unsigned int private_key_len,
- const char *crypto_cipher_type,
- const char *crypto_hash_type,
- void (*log_printf_func) (
- int level,
- int subsys,
- const char *function,
- const char *file,
- int line,
- const char *format,
- ...)__attribute__((format(printf, 6, 7))),
- int log_level_security,
- int log_level_notice,
- int log_level_error,
- int log_subsys_id)
-{
- struct crypto_instance *instance;
- instance = malloc(sizeof(*instance));
- if (instance == NULL) {
- return (NULL);
- }
- memset(instance, 0, sizeof(struct crypto_instance));
-
- memcpy(instance->private_key, private_key, private_key_len);
- instance->private_key_len = private_key_len;
-
- instance->crypto_cipher_type = string_to_crypto_cipher_type(crypto_cipher_type);
- instance->crypto_hash_type = string_to_crypto_hash_type(crypto_hash_type);
-
- instance->crypto_header_size = crypto_sec_header_size(crypto_cipher_type, crypto_hash_type);
-
- instance->log_printf_func = log_printf_func;
- instance->log_level_security = log_level_security;
- instance->log_level_notice = log_level_notice;
- instance->log_level_error = log_level_error;
- instance->log_subsys_id = log_subsys_id;
-
- if (init_nss(instance, crypto_cipher_type, crypto_hash_type) < 0) {
- free(instance);
- return(NULL);
- }
-
- return (instance);
-}
diff --git a/exec/totemcrypto.h b/exec/totemcrypto.h
deleted file mode 100644
index 7c06c391..00000000
--- a/exec/totemcrypto.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright (c) 2006-2012 Red Hat, Inc.
- *
- * All rights reserved.
- *
- * Author: Steven Dake (sdake@redhat.com)
- * Christine Caulfield (ccaulfie@redhat.com)
- * Jan Friesse (jfriesse@redhat.com)
- *
- * This software licensed under BSD license, the text of which follows:
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * - Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- * - Neither the name of the MontaVista Software, Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from this
- * software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
- * THE POSSIBILITY OF SUCH DAMAGE.
- */
-#ifndef TOTEMCRYPTO_H_DEFINED
-#define TOTEMCRYPTO_H_DEFINED
-
-#include <sys/types.h>
-
-struct crypto_instance;
-
-extern size_t crypto_sec_header_size(
- const char *crypto_cipher_type,
- const char *crypto_hash_type);
-
-extern int crypto_authenticate_and_decrypt (
- struct crypto_instance *instance,
- unsigned char *buf,
- int *buf_len);
-
-extern int crypto_encrypt_and_sign (
- struct crypto_instance *instance,
- const unsigned char *buf_in,
- const size_t buf_in_len,
- unsigned char *buf_out,
- size_t *buf_out_len);
-
-extern struct crypto_instance *crypto_init(
- const unsigned char *private_key,
- unsigned int private_key_len,
- const char *crypto_cipher_type,
- const char *crypto_hash_type,
- void (*log_printf_func) (
- int level,
- int subsys,
- const char *function,
- const char *file,
- int line,
- const char *format,
- ...)__attribute__((format(printf, 6, 7))),
- int log_level_security,
- int log_level_notice,
- int log_level_error,
- int log_subsys_id);
-
-#endif /* TOTEMCRYPTO_H_DEFINED */
diff --git a/exec/totemiba.c b/exec/totemiba.c
deleted file mode 100644
index b22c3c5e..00000000
--- a/exec/totemiba.c
+++ /dev/null
@@ -1,1642 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Red Hat, Inc.
- *
- * All rights reserved.
- *
- * Author: Steven Dake (sdake@redhat.com)
-
- * This software licensed under BSD license, the text of which follows:
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * - Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- * - Neither the name of the MontaVista Software, Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from this
- * software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
- * THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <config.h>
-
-#include <assert.h>
-#include <pthread.h>
-#include <sys/mman.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/socket.h>
-#include <netdb.h>
-#include <sys/un.h>
-#include <sys/ioctl.h>
-#include <sys/param.h>
-#include <netinet/in.h>
-#include <arpa/inet.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <errno.h>
-#include <sched.h>
-#include <time.h>
-#include <sys/time.h>
-#include <sys/poll.h>
-#include <limits.h>
-#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
-#include <sys/types.h>
-#include <sys/socket.h>
-#include <netdb.h>
-#include <rdma/rdma_cma.h>
-#include <assert.h>
-#include <errno.h>
-
-#include <corosync/sq.h>
-#include <corosync/list.h>
-#include <corosync/hdb.h>
-#include <corosync/swab.h>
-
-#include <qb/qbdefs.h>
-#include <qb/qbloop.h>
-#define LOGSYS_UTILS_ONLY 1
-#include <corosync/logsys.h>
-#include "totemiba.h"
-
-#define COMPLETION_QUEUE_ENTRIES 100
-
-#define TOTAL_READ_POSTS 100
-
-#define MAX_MTU_SIZE 4096
-
-#define MCAST_REJOIN_MSEC 100
-
-struct totemiba_instance {
- struct sockaddr bind_addr;
-
- struct sockaddr send_token_bind_addr;
-
- struct sockaddr mcast_addr;
-
- struct sockaddr token_addr;
-
- struct sockaddr local_mcast_bind_addr;
-
- struct totem_interface *totem_interface;
-
- struct totem_config *totem_config;
-
- totemsrp_stats_t *stats;
-
- void (*totemiba_iface_change_fn) (
- void *context,
- const struct totem_ip_address *iface_address);
-
- void (*totemiba_deliver_fn) (
- void *context,
- const void *msg,
- unsigned int msg_len);
-
- void (*totemiba_target_set_completed) (
- void *context);
-
- void *rrp_context;
-
- qb_loop_timer_handle timer_netif_check_timeout;
-
- qb_loop_t *totemiba_poll_handle;
-
- struct totem_ip_address my_id;
-
- struct rdma_event_channel *mcast_channel;
-
- struct rdma_cm_id *mcast_cma_id;
-
- struct ibv_pd *mcast_pd;
-
- struct sockaddr mcast_dest_addr;
-
- uint32_t mcast_qpn;
-
- uint32_t mcast_qkey;
-
- struct ibv_ah *mcast_ah;
-
- struct ibv_comp_channel *mcast_send_completion_channel;
-
- struct ibv_comp_channel *mcast_recv_completion_channel;
-
- struct ibv_cq *mcast_send_cq;
-
- struct ibv_cq *mcast_recv_cq;
-
- int recv_token_accepted;
-
- struct rdma_event_channel *recv_token_channel;
-
- struct rdma_event_channel *listen_recv_token_channel;
-
- struct rdma_cm_id *listen_recv_token_cma_id;
-
- struct rdma_cm_id *recv_token_cma_id;
-
- struct ibv_pd *recv_token_pd;
-
- struct sockaddr recv_token_dest_addr;
-
- struct ibv_comp_channel *recv_token_send_completion_channel;
-
- struct ibv_comp_channel *recv_token_recv_completion_channel;
-
- struct ibv_cq *recv_token_send_cq;
-
- struct ibv_cq *recv_token_recv_cq;
-
- int send_token_bound;
-
- struct rdma_event_channel *send_token_channel;
-
- struct rdma_cm_id *send_token_cma_id;
-
- struct ibv_pd *send_token_pd;
-
- struct sockaddr send_token_dest_addr;
-
- uint32_t send_token_qpn;
-
- uint32_t send_token_qkey;
-
- struct ibv_ah *send_token_ah;
-
- struct ibv_comp_channel *send_token_send_completion_channel;
-
- struct ibv_comp_channel *send_token_recv_completion_channel;
-
- struct ibv_cq *send_token_send_cq;
-
- struct ibv_cq *send_token_recv_cq;
-
- void (*totemiba_log_printf) (
- int level,
- int subsys,
- const char *function,
- const char *file,
- int line,
- const char *format,
- ...)__attribute__((format(printf, 6, 7)));
-
-
- int totemiba_subsys_id;
-
- struct list_head mcast_send_buf_free;
-
- struct list_head token_send_buf_free;
-
- struct list_head mcast_send_buf_head;
-
- struct list_head token_send_buf_head;
-
- struct list_head recv_token_recv_buf_head;
-
- int mcast_seen_joined;
-
- qb_loop_timer_handle mcast_rejoin;
-};
-union u {
- uint64_t wr_id;
- void *v;
-};
-
-#define log_printf(level, format, args...) \
-do { \
- instance->totemiba_log_printf ( \
- level, \
- instance->totemiba_subsys_id, \
- __FUNCTION__, __FILE__, __LINE__, \
- (const char *)format, ##args); \
-} while (0);
-
-struct recv_buf {
- struct list_head list_all;
- struct ibv_recv_wr recv_wr;
- struct ibv_sge sge;
- struct ibv_mr *mr;
- char buffer[MAX_MTU_SIZE + sizeof (struct ibv_grh)];
-};
-
-struct send_buf {
- struct list_head list_free;
- struct list_head list_all;
- struct ibv_mr *mr;
- char buffer[MAX_MTU_SIZE];
-};
-
-static hdb_handle_t
-void2wrid (void *v) { union u u; u.v = v; return u.wr_id; }
-
-static void *
-wrid2void (uint64_t wr_id) { union u u; u.wr_id = wr_id; return u.v; }
-
-static void totemiba_instance_initialize (struct totemiba_instance *instance)
-{
- memset (instance, 0, sizeof (struct totemiba_instance));
- list_init (&instance->mcast_send_buf_free);
- list_init (&instance->token_send_buf_free);
- list_init (&instance->mcast_send_buf_head);
- list_init (&instance->token_send_buf_head);
- list_init (&instance->recv_token_recv_buf_head);
-}
-
-static inline struct send_buf *mcast_send_buf_get (
- struct totemiba_instance *instance)
-{
- struct send_buf *send_buf;
-
- if (list_empty (&instance->mcast_send_buf_free) == 0) {
- send_buf = list_entry (instance->mcast_send_buf_free.next, struct send_buf, list_free);
- list_del (&send_buf->list_free);
- return (send_buf);
- }
-
- send_buf = malloc (sizeof (struct send_buf));
- if (send_buf == NULL) {
- return (NULL);
- }
- send_buf->mr = ibv_reg_mr (instance->mcast_pd,
- send_buf->buffer,
- MAX_MTU_SIZE, IBV_ACCESS_LOCAL_WRITE);
- if (send_buf->mr == NULL) {
- log_printf (LOGSYS_LEVEL_ERROR, "couldn't register memory range");
- free (send_buf);
- return (NULL);
- }
- list_init (&send_buf->list_all);
- list_add_tail (&send_buf->list_all, &instance->mcast_send_buf_head);
-
- return (send_buf);
-}
-
-static inline void mcast_send_buf_put (
- struct totemiba_instance *instance,
- struct send_buf *send_buf)
-{
- list_init (&send_buf->list_free);
- list_add_tail (&send_buf->list_free, &instance->mcast_send_buf_free);
-}
-
-static inline struct send_buf *token_send_buf_get (
- struct totemiba_instance *instance)
-{
- struct send_buf *send_buf;
-
- if (list_empty (&instance->token_send_buf_free) == 0) {
- send_buf = list_entry (instance->token_send_buf_free.next, struct send_buf, list_free);
- list_del (&send_buf->list_free);
- return (send_buf);
- }
-
- send_buf = malloc (sizeof (struct send_buf));
- if (send_buf == NULL) {
- return (NULL);
- }
- send_buf->mr = ibv_reg_mr (instance->send_token_pd,
- send_buf->buffer,
- MAX_MTU_SIZE, IBV_ACCESS_LOCAL_WRITE);
- if (send_buf->mr == NULL) {
- log_printf (LOGSYS_LEVEL_ERROR, "couldn't register memory range");
- free (send_buf);
- return (NULL);
- }
- list_init (&send_buf->list_all);
- list_add_tail (&send_buf->list_all, &instance->token_send_buf_head);
-
- return (send_buf);
-}
-
-static inline void token_send_buf_destroy (struct totemiba_instance *instance)
-{
- struct list_head *list;
- struct send_buf *send_buf;
-
- for (list = instance->token_send_buf_head.next; list != &instance->token_send_buf_head;) {
- send_buf = list_entry (list, struct send_buf, list_all);
- list = list->next;
- ibv_dereg_mr (send_buf->mr);
- free (send_buf);
- }
-
- list_init (&instance->token_send_buf_free);
- list_init (&instance->token_send_buf_head);
-}
-
-static inline void token_send_buf_put (
- struct totemiba_instance *instance,
- struct send_buf *send_buf)
-{
- list_init (&send_buf->list_free);
- list_add_tail (&send_buf->list_free, &instance->token_send_buf_free);
-}
-
-static inline struct recv_buf *recv_token_recv_buf_create (
- struct totemiba_instance *instance)
-{
- struct recv_buf *recv_buf;
-
- recv_buf = malloc (sizeof (struct recv_buf));
- if (recv_buf == NULL) {
- return (NULL);
- }
-
- recv_buf->mr = ibv_reg_mr (instance->recv_token_pd, &recv_buf->buffer,
- MAX_MTU_SIZE + sizeof (struct ibv_grh),
- IBV_ACCESS_LOCAL_WRITE);
-
- recv_buf->recv_wr.next = NULL;
- recv_buf->recv_wr.sg_list = &recv_buf->sge;
- recv_buf->recv_wr.num_sge = 1;
- recv_buf->recv_wr.wr_id = (uintptr_t)recv_buf;
-
- recv_buf->sge.length = MAX_MTU_SIZE + sizeof (struct ibv_grh);
- recv_buf->sge.lkey = recv_buf->mr->lkey;
- recv_buf->sge.addr = (uintptr_t)recv_buf->buffer;
-
- list_init (&recv_buf->list_all);
- list_add (&recv_buf->list_all, &instance->recv_token_recv_buf_head);
- return (recv_buf);
-}
-
-static inline int recv_token_recv_buf_post (struct totemiba_instance *instance, struct recv_buf *recv_buf)
-{
- struct ibv_recv_wr *fail_recv;
- int res;
-
- res = ibv_post_recv (instance->recv_token_cma_id->qp, &recv_buf->recv_wr, &fail_recv);
-
- return (res);
-}
-
-static inline void recv_token_recv_buf_post_initial (struct totemiba_instance *instance)
-{
- struct recv_buf *recv_buf;
- unsigned int i;
-
- for (i = 0; i < TOTAL_READ_POSTS; i++) {
- recv_buf = recv_token_recv_buf_create (instance);
-
- recv_token_recv_buf_post (instance, recv_buf);
- }
-}
-
-static inline void recv_token_recv_buf_post_destroy (
- struct totemiba_instance *instance)
-{
- struct recv_buf *recv_buf;
- struct list_head *list;
-
- for (list = instance->recv_token_recv_buf_head.next;
- list != &instance->recv_token_recv_buf_head;) {
-
- recv_buf = list_entry (list, struct recv_buf, list_all);
- list = list->next;
- ibv_dereg_mr (recv_buf->mr);
- free (recv_buf);
- }
- list_init (&instance->recv_token_recv_buf_head);
-}
-
-static inline struct recv_buf *mcast_recv_buf_create (struct totemiba_instance *instance)
-{
- struct recv_buf *recv_buf;
- struct ibv_mr *mr;
-
- recv_buf = malloc (sizeof (struct recv_buf));
- if (recv_buf == NULL) {
- return (NULL);
- }
-
- mr = ibv_reg_mr (instance->mcast_pd, &recv_buf->buffer,
- MAX_MTU_SIZE + sizeof (struct ibv_grh),
- IBV_ACCESS_LOCAL_WRITE);
-
- recv_buf->recv_wr.next = NULL;
- recv_buf->recv_wr.sg_list = &recv_buf->sge;
- recv_buf->recv_wr.num_sge = 1;
- recv_buf->recv_wr.wr_id = (uintptr_t)recv_buf;
-
- recv_buf->sge.length = MAX_MTU_SIZE + sizeof (struct ibv_grh);
- recv_buf->sge.lkey = mr->lkey;
- recv_buf->sge.addr = (uintptr_t)recv_buf->buffer;
-
- return (recv_buf);
-}
-
-static inline int mcast_recv_buf_post (struct totemiba_instance *instance, struct recv_buf *recv_buf)
-{
- struct ibv_recv_wr *fail_recv;
- int res;
-
- res = ibv_post_recv (instance->mcast_cma_id->qp, &recv_buf->recv_wr, &fail_recv);
-
- return (res);
-}
-
-static inline void mcast_recv_buf_post_initial (struct totemiba_instance *instance)
-{
- struct recv_buf *recv_buf;
- unsigned int i;
-
- for (i = 0; i < TOTAL_READ_POSTS; i++) {
- recv_buf = mcast_recv_buf_create (instance);
-
- mcast_recv_buf_post (instance, recv_buf);
- }
-}
-
-static inline void iba_deliver_fn (struct totemiba_instance *instance, uint64_t wr_id, uint32_t bytes)
-{
- const char *addr;
- const struct recv_buf *recv_buf;
-
- recv_buf = wrid2void(wr_id);
- addr = &recv_buf->buffer[sizeof (struct ibv_grh)];
-
- bytes -= sizeof (struct ibv_grh);
- instance->totemiba_deliver_fn (instance->rrp_context, addr, bytes);
-}
-
-static int mcast_cq_send_event_fn (int fd, int events, void *context)
-{
- struct totemiba_instance *instance = (struct totemiba_instance *)context;
- struct ibv_wc wc[32];
- struct ibv_cq *ev_cq;
- void *ev_ctx;
- int res;
- int i;
-
- ibv_get_cq_event (instance->mcast_send_completion_channel, &ev_cq, &ev_ctx);
- ibv_ack_cq_events (ev_cq, 1);
- res = ibv_req_notify_cq (ev_cq, 0);
-
- res = ibv_poll_cq (instance->mcast_send_cq, 32, wc);
- if (res > 0) {
- for (i = 0; i < res; i++) {
- mcast_send_buf_put (instance, wrid2void(wc[i].wr_id));
- }
- }
-
- return (0);
-}
-
-static int mcast_cq_recv_event_fn (int fd, int events, void *context)
-{
- struct totemiba_instance *instance = (struct totemiba_instance *)context;
- struct ibv_wc wc[64];
- struct ibv_cq *ev_cq;
- void *ev_ctx;
- int res;
- int i;
-
- ibv_get_cq_event (instance->mcast_recv_completion_channel, &ev_cq, &ev_ctx);
- ibv_ack_cq_events (ev_cq, 1);
- res = ibv_req_notify_cq (ev_cq, 0);
-
- res = ibv_poll_cq (instance->mcast_recv_cq, 64, wc);
- if (res > 0) {
- for (i = 0; i < res; i++) {
- iba_deliver_fn (instance, wc[i].wr_id, wc[i].byte_len);
- mcast_recv_buf_post (instance, wrid2void(wc[i].wr_id));
- }
- }
-
- return (0);
-}
-
-static void mcast_rejoin (void *data)
-{
- int res;
- struct totemiba_instance *instance = (struct totemiba_instance *)data;
-
- res = rdma_leave_multicast (instance->mcast_cma_id, &instance->mcast_addr);
- if (instance->mcast_ah) {
- ibv_destroy_ah (instance->mcast_ah);
- instance->mcast_ah = 0;
- }
-
- res = rdma_join_multicast (instance->mcast_cma_id, &instance->mcast_addr, instance);
- if (res != 0) {
- log_printf (LOGSYS_LEVEL_DEBUG,
- "rdma_join_multicast failed, errno=%d, rejoining in %u ms",
- errno,
- MCAST_REJOIN_MSEC);
- qb_loop_timer_add (instance->totemiba_poll_handle,
- QB_LOOP_MED,
- MCAST_REJOIN_MSEC * QB_TIME_NS_IN_MSEC,
- (void *)instance,
- mcast_rejoin,
- &instance->mcast_rejoin);
- }
-}
-
-static int mcast_rdma_event_fn (int fd, int events, void *context)
-{
- struct totemiba_instance *instance = (struct totemiba_instance *)context;
- struct rdma_cm_event *event;
-
- int res;
-
- res = rdma_get_cm_event (instance->mcast_channel, &event);
- if (res != 0) {
- return (0);
- }
-
- switch (event->event) {
- /*
- * occurs when we resolve the multicast address
- */
- case RDMA_CM_EVENT_ADDR_RESOLVED:
- res = rdma_join_multicast (instance->mcast_cma_id, &instance->mcast_addr, instance);
- usleep(1000);
- if (res == 0) break;
- case RDMA_CM_EVENT_MULTICAST_ERROR:
- log_printf (LOGSYS_LEVEL_ERROR, "multicast error, trying to rejoin in %u ms", MCAST_REJOIN_MSEC);
- qb_loop_timer_add (instance->totemiba_poll_handle,
- QB_LOOP_MED,
- MCAST_REJOIN_MSEC * QB_TIME_NS_IN_MSEC,
- (void *)instance,
- mcast_rejoin,
- &instance->mcast_rejoin);
- break;
- /*
- * occurs when the CM joins the multicast group
- */
- case RDMA_CM_EVENT_MULTICAST_JOIN:
- instance->mcast_qpn = event->param.ud.qp_num;
- instance->mcast_qkey = event->param.ud.qkey;
- instance->mcast_ah = ibv_create_ah (instance->mcast_pd, &event->param.ud.ah_attr);
-
- if (instance->mcast_seen_joined == 0) {
- log_printf (LOGSYS_LEVEL_DEBUG, "joining mcast 1st time, running callbacks");
- instance->totemiba_iface_change_fn (instance->rrp_context, &instance->my_id);
- instance->mcast_seen_joined=1;
- }
- log_printf (LOGSYS_LEVEL_NOTICE, "Joined multicast!");
- break;
- case RDMA_CM_EVENT_ADDR_ERROR:
- case RDMA_CM_EVENT_ROUTE_ERROR:
- case RDMA_CM_EVENT_DEVICE_REMOVAL:
- break;
- default:
- log_printf (LOGSYS_LEVEL_ERROR, "default %d", event->event);
- break;
- }
-
- rdma_ack_cm_event (event);
- return (0);
-}
-
-static int recv_token_cq_send_event_fn (
- int fd,
- int revents,
- void *context)
-{
- struct totemiba_instance *instance = (struct totemiba_instance *)context;
- struct ibv_wc wc[32];
- struct ibv_cq *ev_cq;
- void *ev_ctx;
- int res;
- int i;
-
- ibv_get_cq_event (instance->recv_token_send_completion_channel, &ev_cq, &ev_ctx);
- ibv_ack_cq_events (ev_cq, 1);
- res = ibv_req_notify_cq (ev_cq, 0);
-
- res = ibv_poll_cq (instance->recv_token_send_cq, 32, wc);
- if (res > 0) {
- for (i = 0; i < res; i++) {
- iba_deliver_fn (instance, wc[i].wr_id, wc[i].byte_len);
- ibv_dereg_mr (wrid2void(wc[i].wr_id));
- }
- }
-
- return (0);
-}
-
-static int recv_token_cq_recv_event_fn (int fd, int events, void *context)
-{
- struct totemiba_instance *instance = (struct totemiba_instance *)context;
- struct ibv_wc wc[32];
- struct ibv_cq *ev_cq;
- void *ev_ctx;
- int res;
- int i;
-
- ibv_get_cq_event (instance->recv_token_recv_completion_channel, &ev_cq, &ev_ctx);
- ibv_ack_cq_events (ev_cq, 1);
- res = ibv_req_notify_cq (ev_cq, 0);
-
- res = ibv_poll_cq (instance->recv_token_recv_cq, 32, wc);
- if (res > 0) {
- for (i = 0; i < res; i++) {
- iba_deliver_fn (instance, wc[i].wr_id, wc[i].byte_len);
- recv_token_recv_buf_post (instance, wrid2void(wc[i].wr_id));
- }
- }
-
- return (0);
-}
-
-static int recv_token_accept_destroy (struct totemiba_instance *instance)
-{
- if (instance->recv_token_accepted == 0) {
- return (0);
- }
-
- qb_loop_poll_del (
- instance->totemiba_poll_handle,
- instance->recv_token_recv_completion_channel->fd);
-
- qb_loop_poll_del (
- instance->totemiba_poll_handle,
- instance->recv_token_send_completion_channel->fd);
-
- rdma_destroy_qp (instance->recv_token_cma_id);
-
- recv_token_recv_buf_post_destroy (instance);
-
- ibv_destroy_cq (instance->recv_token_send_cq);
-
- ibv_destroy_cq (instance->recv_token_recv_cq);
-
- ibv_destroy_comp_channel (instance->recv_token_send_completion_channel);
-
- ibv_destroy_comp_channel (instance->recv_token_recv_completion_channel);
-
- ibv_dealloc_pd (instance->recv_token_pd);
-
- rdma_destroy_id (instance->recv_token_cma_id);
-
- return (0);
-}
-
-static int recv_token_accept_setup (struct totemiba_instance *instance)
-{
- struct ibv_qp_init_attr init_qp_attr;
- int res = 0;
-
- /*
- * Allocate the protection domain
- */
- instance->recv_token_pd = ibv_alloc_pd (instance->recv_token_cma_id->verbs);
-
- /*
- * Create a completion channel
- */
- instance->recv_token_recv_completion_channel = ibv_create_comp_channel (instance->recv_token_cma_id->verbs);
- if (instance->recv_token_recv_completion_channel == NULL) {
- log_printf (LOGSYS_LEVEL_ERROR, "couldn't create completion channel");
- return (-1);
- }
-
- /*
- * Create the completion queue
- */
- instance->recv_token_recv_cq = ibv_create_cq (instance->recv_token_cma_id->verbs,
- COMPLETION_QUEUE_ENTRIES, instance,
- instance->recv_token_recv_completion_channel, 0);
- if (instance->recv_token_recv_cq == NULL) {
- log_printf (LOGSYS_LEVEL_ERROR, "couldn't create completion queue");
- return (-1);
- }
- res = ibv_req_notify_cq (instance->recv_token_recv_cq, 0);
- if (res != 0) {
- log_printf (LOGSYS_LEVEL_ERROR, "couldn't request notifications of the completion queue");
- return (-1);
- }
-
- /*
- * Create a completion channel
- */
- instance->recv_token_send_completion_channel = ibv_create_comp_channel (instance->recv_token_cma_id->verbs);
- if (instance->recv_token_send_completion_channel == NULL) {
- log_printf (LOGSYS_LEVEL_ERROR, "couldn't create completion channel");
- return (-1);
- }
-
- /*
- * Create the completion queue
- */
- instance->recv_token_send_cq = ibv_create_cq (instance->recv_token_cma_id->verbs,
- COMPLETION_QUEUE_ENTRIES, instance,
- instance->recv_token_send_completion_channel, 0);
- if (instance->recv_token_send_cq == NULL) {
- log_printf (LOGSYS_LEVEL_ERROR, "couldn't create completion queue");
- return (-1);
- }
- res = ibv_req_notify_cq (instance->recv_token_send_cq, 0);
- if (res != 0) {
- log_printf (LOGSYS_LEVEL_ERROR, "couldn't request notifications of the completion queue");
- return (-1);
- }
- memset (&init_qp_attr, 0, sizeof (struct ibv_qp_init_attr));
- init_qp_attr.cap.max_send_wr = 50;
- init_qp_attr.cap.max_recv_wr = TOTAL_READ_POSTS;
- init_qp_attr.cap.max_send_sge = 1;
- init_qp_attr.cap.max_recv_sge = 1;
- init_qp_attr.qp_context = instance;
- init_qp_attr.sq_sig_all = 0;
- init_qp_attr.qp_type = IBV_QPT_UD;
- init_qp_attr.send_cq = instance->recv_token_send_cq;
- init_qp_attr.recv_cq = instance->recv_token_recv_cq;
- res = rdma_create_qp (instance->recv_token_cma_id, instance->recv_token_pd,
- &init_qp_attr);
- if (res != 0) {
- log_printf (LOGSYS_LEVEL_ERROR, "couldn't create queue pair");
- return (-1);
- }
-
- recv_token_recv_buf_post_initial (instance);
-
- qb_loop_poll_add (
- instance->totemiba_poll_handle,
- QB_LOOP_MED,
- instance->recv_token_recv_completion_channel->fd,
- POLLIN, instance, recv_token_cq_recv_event_fn);
-
- qb_loop_poll_add (
- instance->totemiba_poll_handle,
- QB_LOOP_MED,
- instance->recv_token_send_completion_channel->fd,
- POLLIN, instance, recv_token_cq_send_event_fn);
-
- instance->recv_token_accepted = 1;
-
- return (res);
-};
-
-static int recv_token_rdma_event_fn (int fd, int events, void *context)
-{
- struct totemiba_instance *instance = (struct totemiba_instance *)context;
- struct rdma_cm_event *event;
- struct rdma_conn_param conn_param;
-
- int res;
-
- res = rdma_get_cm_event (instance->listen_recv_token_channel, &event);
- if (res != 0) {
- return (0);
- }
-
- switch (event->event) {
- case RDMA_CM_EVENT_CONNECT_REQUEST:
- recv_token_accept_destroy (instance);
-
- instance->recv_token_cma_id = event->id;
- recv_token_accept_setup (instance);
- memset (&conn_param, 0, sizeof (struct rdma_conn_param));
- conn_param.qp_num = instance->recv_token_cma_id->qp->qp_num;
- res = rdma_accept (instance->recv_token_cma_id, &conn_param);
- break;
- default:
- log_printf (LOGSYS_LEVEL_ERROR, "default %d", event->event);
- break;
- }
-
- res = rdma_ack_cm_event (event);
- return (0);
-}
-
-static int send_token_cq_send_event_fn (int fd, int events, void *context)
-{
- struct totemiba_instance *instance = (struct totemiba_instance *)context;
- struct ibv_wc wc[32];
- struct ibv_cq *ev_cq;
- void *ev_ctx;
- int res;
- int i;
-
- ibv_get_cq_event (instance->send_token_send_completion_channel, &ev_cq, &ev_ctx);
- ibv_ack_cq_events (ev_cq, 1);
- res = ibv_req_notify_cq (ev_cq, 0);
-
- res = ibv_poll_cq (instance->send_token_send_cq, 32, wc);
- if (res > 0) {
- for (i = 0; i < res; i++) {
- token_send_buf_put (instance, wrid2void(wc[i].wr_id));
- }
- }
-
- return (0);
-}
-
-static int send_token_cq_recv_event_fn (int fd, int events, void *context)
-{
- struct totemiba_instance *instance = (struct totemiba_instance *)context;
- struct ibv_wc wc[32];
- struct ibv_cq *ev_cq;
- void *ev_ctx;
- int res;
- int i;
-
- ibv_get_cq_event (instance->send_token_recv_completion_channel, &ev_cq, &ev_ctx);
- ibv_ack_cq_events (ev_cq, 1);
- res = ibv_req_notify_cq (ev_cq, 0);
-
- res = ibv_poll_cq (instance->send_token_recv_cq, 32, wc);
- if (res > 0) {
- for (i = 0; i < res; i++) {
- iba_deliver_fn (instance, wc[i].wr_id, wc[i].byte_len);
- }
- }
-
- return (0);
-}
-
-static int send_token_rdma_event_fn (int fd, int events, void *context)
-{
- struct totemiba_instance *instance = (struct totemiba_instance *)context;
- struct rdma_cm_event *event;
- struct rdma_conn_param conn_param;
-
- int res;
-
- res = rdma_get_cm_event (instance->send_token_channel, &event);
- if (res != 0) {
- return (0);
- }
-
- switch (event->event) {
- /*
- * occurs when we resolve the multicast address
- */
- case RDMA_CM_EVENT_ADDR_RESOLVED:
- res = rdma_resolve_route (instance->send_token_cma_id, 2000);
- break;
- /*
- * occurs when the CM joins the multicast group
- */
- case RDMA_CM_EVENT_ROUTE_RESOLVED:
- memset (&conn_param, 0, sizeof (struct rdma_conn_param));
- conn_param.private_data = NULL;
- conn_param.private_data_len = 0;
- res = rdma_connect (instance->send_token_cma_id, &conn_param);
- break;
- case RDMA_CM_EVENT_ESTABLISHED:
- instance->send_token_qpn = event->param.ud.qp_num;
- instance->send_token_qkey = event->param.ud.qkey;
- instance->send_token_ah = ibv_create_ah (instance->send_token_pd, &event->param.ud.ah_attr);
- instance->totemiba_target_set_completed (instance->rrp_context);
- break;
-
- case RDMA_CM_EVENT_ADDR_ERROR:
- case RDMA_CM_EVENT_ROUTE_ERROR:
- case RDMA_CM_EVENT_MULTICAST_ERROR:
- log_printf (LOGSYS_LEVEL_ERROR,
- "send_token_rdma_event_fn multicast error");
- break;
- case RDMA_CM_EVENT_DEVICE_REMOVAL:
- break;
- case RDMA_CM_EVENT_UNREACHABLE:
- log_printf (LOGSYS_LEVEL_ERROR,
- "send_token_rdma_event_fn unreachable");
- break;
- default:
- log_printf (LOGSYS_LEVEL_ERROR,
- "send_token_rdma_event_fn unknown event %d",
- event->event);
- break;
- }
-
- rdma_ack_cm_event (event);
- return (0);
-}
-
-static int send_token_bind (struct totemiba_instance *instance)
-{
- int res;
- struct ibv_qp_init_attr init_qp_attr;
-
- instance->send_token_channel = rdma_create_event_channel();
- if (instance->send_token_channel == NULL) {
- log_printf (LOGSYS_LEVEL_ERROR, "couldn't create rdma channel");
- return (-1);
- }
-
- res = rdma_create_id (instance->send_token_channel,
- &instance->send_token_cma_id, NULL, RDMA_PS_UDP);
- if (res) {
- log_printf (LOGSYS_LEVEL_ERROR, "error creating send_token_cma_id");
- return (-1);
- }
-
- res = rdma_bind_addr (instance->send_token_cma_id,
- &instance->send_token_bind_addr);
- if (res) {
- log_printf (LOGSYS_LEVEL_ERROR, "error doing rdma_bind_addr for send token");
- return (-1);
- }
-
- /*
- * Resolve the send_token address into a GUID
- */
- res = rdma_resolve_addr (instance->send_token_cma_id,
- &instance->bind_addr, &instance->token_addr, 2000);
- if (res) {
- log_printf (LOGSYS_LEVEL_ERROR, "error resolving send token address %d %d", res, errno);
- return (-1);
- }
-
- /*
- * Allocate the protection domain
- */
- instance->send_token_pd = ibv_alloc_pd (instance->send_token_cma_id->verbs);
-
- /*
- * Create a completion channel
- */
- instance->send_token_recv_completion_channel = ibv_create_comp_channel (instance->send_token_cma_id->verbs);
- if (instance->send_token_recv_completion_channel == NULL) {
- log_printf (LOGSYS_LEVEL_ERROR, "couldn't create completion channel");
- return (-1);
- }
-
- /*
- * Create the completion queue
- */
- instance->send_token_recv_cq = ibv_create_cq (instance->send_token_cma_id->verbs,
- COMPLETION_QUEUE_ENTRIES, instance,
- instance->send_token_recv_completion_channel, 0);
- if (instance->send_token_recv_cq == NULL) {
- log_printf (LOGSYS_LEVEL_ERROR, "couldn't create completion queue");
- return (-1);
- }
- res = ibv_req_notify_cq (instance->send_token_recv_cq, 0);
- if (res != 0) {
- log_printf (LOGSYS_LEVEL_ERROR,
- "couldn't request notifications of the completion queue");
- return (-1);
- }
-
- /*
- * Create a completion channel
- */
- instance->send_token_send_completion_channel =
- ibv_create_comp_channel (instance->send_token_cma_id->verbs);
-
- if (instance->send_token_send_completion_channel == NULL) {
- log_printf (LOGSYS_LEVEL_ERROR, "couldn't create completion channel");
- return (-1);
- }
-
- /*
- * Create the completion queue
- */
- instance->send_token_send_cq = ibv_create_cq (
- instance->send_token_cma_id->verbs,
- COMPLETION_QUEUE_ENTRIES, instance,
- instance->send_token_send_completion_channel, 0);
- if (instance->send_token_send_cq == NULL) {
- log_printf (LOGSYS_LEVEL_ERROR, "couldn't create completion queue");
- return (-1);
- }
-
- res = ibv_req_notify_cq (instance->send_token_send_cq, 0);
- if (res != 0) {
- log_printf (LOGSYS_LEVEL_ERROR,
- "couldn't request notifications of the completion queue");
- return (-1);
- }
- memset (&init_qp_attr, 0, sizeof (struct ibv_qp_init_attr));
- init_qp_attr.cap.max_send_wr = 50;
- init_qp_attr.cap.max_recv_wr = TOTAL_READ_POSTS;
- init_qp_attr.cap.max_send_sge = 1;
- init_qp_attr.cap.max_recv_sge = 1;
- init_qp_attr.qp_context = instance;
- init_qp_attr.sq_sig_all = 0;
- init_qp_attr.qp_type = IBV_QPT_UD;
- init_qp_attr.send_cq = instance->send_token_send_cq;
- init_qp_attr.recv_cq = instance->send_token_recv_cq;
- res = rdma_create_qp (instance->send_token_cma_id,
- instance->send_token_pd, &init_qp_attr);
- if (res != 0) {
- log_printf (LOGSYS_LEVEL_ERROR, "couldn't create queue pair");
- return (-1);
- }
-
- qb_loop_poll_add (
- instance->totemiba_poll_handle,
- QB_LOOP_MED,
- instance->send_token_recv_completion_channel->fd,
- POLLIN, instance, send_token_cq_recv_event_fn);
-
- qb_loop_poll_add (
- instance->totemiba_poll_handle,
- QB_LOOP_MED,
- instance->send_token_send_completion_channel->fd,
- POLLIN, instance, send_token_cq_send_event_fn);
-
- qb_loop_poll_add (
- instance->totemiba_poll_handle,
- QB_LOOP_MED,
- instance->send_token_channel->fd,
- POLLIN, instance, send_token_rdma_event_fn);
-
- instance->send_token_bound = 1;
- return (0);
-}
-
-static int send_token_unbind (struct totemiba_instance *instance)
-{
- if (instance->send_token_bound == 0) {
- return (0);
- }
-
- qb_loop_poll_del (
- instance->totemiba_poll_handle,
- instance->send_token_recv_completion_channel->fd);
- qb_loop_poll_del (
- instance->totemiba_poll_handle,
- instance->send_token_send_completion_channel->fd);
- qb_loop_poll_del (
- instance->totemiba_poll_handle,
- instance->send_token_channel->fd);
-
- if(instance->send_token_ah)
- {
- ibv_destroy_ah(instance->send_token_ah);
- instance->send_token_ah = 0;
- }
-
- rdma_destroy_qp (instance->send_token_cma_id);
- ibv_destroy_cq (instance->send_token_send_cq);
- ibv_destroy_cq (instance->send_token_recv_cq);
- ibv_destroy_comp_channel (instance->send_token_send_completion_channel);
- ibv_destroy_comp_channel (instance->send_token_recv_completion_channel);
- token_send_buf_destroy (instance);
- ibv_dealloc_pd (instance->send_token_pd);
- rdma_destroy_id (instance->send_token_cma_id);
- rdma_destroy_event_channel (instance->send_token_channel);
- return (0);
-}
-
-static int recv_token_bind (struct totemiba_instance *instance)
-{
- int res;
- struct ibv_port_attr port_attr;
-
- instance->listen_recv_token_channel = rdma_create_event_channel();
- if (instance->listen_recv_token_channel == NULL) {
- log_printf (LOGSYS_LEVEL_ERROR, "couldn't create rdma channel");
- return (-1);
- }
-
- res = rdma_create_id (instance->listen_recv_token_channel,
- &instance->listen_recv_token_cma_id, NULL, RDMA_PS_UDP);
- if (res) {
- log_printf (LOGSYS_LEVEL_ERROR, "error creating recv_token_cma_id");
- return (-1);
- }
-
- res = rdma_bind_addr (instance->listen_recv_token_cma_id,
- &instance->bind_addr);
- if (res) {
- log_printf (LOGSYS_LEVEL_ERROR, "error doing rdma_bind_addr for recv token");
- return (-1);
- }
-
- /*
- * Determine active_mtu of port and compare it with the configured one (160 is aproximation of all totem
- * structures.
- *
- * TODO: Implement MTU discovery also for IP and handle MTU correctly for all structures inside totemsrp,
- * crypto, ...
- */
- res = ibv_query_port (instance->listen_recv_token_cma_id->verbs, instance->listen_recv_token_cma_id->port_num, &port_attr);
- if ( (1 << (port_attr.active_mtu + 7)) < instance->totem_config->net_mtu + 160) {
- log_printf (LOGSYS_LEVEL_ERROR, "requested net_mtu is %d and is larger than the active port mtu %d\n",\
- instance->totem_config->net_mtu + 160, (1 << (port_attr.active_mtu + 7)));
- return (-1);
- }
-
- /*
- * Resolve the recv_token address into a GUID
- */
- res = rdma_listen (instance->listen_recv_token_cma_id, 10);
- if (res) {
- log_printf (LOGSYS_LEVEL_ERROR, "error listening %d %d", res, errno);
- return (-1);
- }
-
- qb_loop_poll_add (
- instance->totemiba_poll_handle,
- QB_LOOP_MED,
- instance->listen_recv_token_channel->fd,
- POLLIN, instance, recv_token_rdma_event_fn);
-
- return (0);
-}
-
-static int mcast_bind (struct totemiba_instance *instance)
-{
- int res;
- struct ibv_qp_init_attr init_qp_attr;
-
- instance->mcast_channel = rdma_create_event_channel();
- if (instance->mcast_channel == NULL) {
- log_printf (LOGSYS_LEVEL_ERROR, "couldn't create rdma channel");
- return (-1);
- }
-
- res = rdma_create_id (instance->mcast_channel, &instance->mcast_cma_id, NULL, RDMA_PS_UDP);
- if (res) {
- log_printf (LOGSYS_LEVEL_ERROR, "error creating mcast_cma_id");
- return (-1);
- }
-
- res = rdma_bind_addr (instance->mcast_cma_id, &instance->local_mcast_bind_addr);
- if (res) {
- log_printf (LOGSYS_LEVEL_ERROR, "error doing rdma_bind_addr for mcast");
- return (-1);
- }
-
- /*
- * Resolve the multicast address into a GUID
- */
- res = rdma_resolve_addr (instance->mcast_cma_id, &instance->local_mcast_bind_addr,
- &instance->mcast_addr, 5000);
- if (res) {
- log_printf (LOGSYS_LEVEL_ERROR, "error resolving multicast address %d %d", res, errno);
- return (-1);
- }
-
- /*
- * Allocate the protection domain
- */
- instance->mcast_pd = ibv_alloc_pd (instance->mcast_cma_id->verbs);
-
- /*
- * Create a completion channel
- */
- instance->mcast_recv_completion_channel = ibv_create_comp_channel (instance->mcast_cma_id->verbs);
- if (instance->mcast_recv_completion_channel == NULL) {
- log_printf (LOGSYS_LEVEL_ERROR, "couldn't create completion channel");
- return (-1);
- }
-
- /*
- * Create the completion queue
- */
- instance->mcast_recv_cq = ibv_create_cq (instance->mcast_cma_id->verbs,
- COMPLETION_QUEUE_ENTRIES, instance,
- instance->mcast_recv_completion_channel, 0);
- if (instance->mcast_recv_cq == NULL) {
- log_printf (LOGSYS_LEVEL_ERROR, "couldn't create completion queue");
- return (-1);
- }
- res = ibv_req_notify_cq (instance->mcast_recv_cq, 0);
- if (res != 0) {
- log_printf (LOGSYS_LEVEL_ERROR, "couldn't request notifications of the completion queue");
- return (-1);
- }
-
- /*
- * Create a completion channel
- */
- instance->mcast_send_completion_channel = ibv_create_comp_channel (instance->mcast_cma_id->verbs);
- if (instance->mcast_send_completion_channel == NULL) {
- log_printf (LOGSYS_LEVEL_ERROR, "couldn't create completion channel");
- return (-1);
- }
-
- /*
- * Create the completion queue
- */
- instance->mcast_send_cq = ibv_create_cq (instance->mcast_cma_id->verbs,
- COMPLETION_QUEUE_ENTRIES, instance,
- instance->mcast_send_completion_channel, 0);
- if (instance->mcast_send_cq == NULL) {
- log_printf (LOGSYS_LEVEL_ERROR, "couldn't create completion queue");
- return (-1);
- }
- res = ibv_req_notify_cq (instance->mcast_send_cq, 0);
- if (res != 0) {
- log_printf (LOGSYS_LEVEL_ERROR, "couldn't request notifications of the completion queue");
- return (-1);
- }
- memset (&init_qp_attr, 0, sizeof (struct ibv_qp_init_attr));
- init_qp_attr.cap.max_send_wr = 50;
- init_qp_attr.cap.max_recv_wr = TOTAL_READ_POSTS;
- init_qp_attr.cap.max_send_sge = 1;
- init_qp_attr.cap.max_recv_sge = 1;
- init_qp_attr.qp_context = instance;
- init_qp_attr.sq_sig_all = 0;
- init_qp_attr.qp_type = IBV_QPT_UD;
- init_qp_attr.send_cq = instance->mcast_send_cq;
- init_qp_attr.recv_cq = instance->mcast_recv_cq;
- res = rdma_create_qp (instance->mcast_cma_id, instance->mcast_pd,
- &init_qp_attr);
- if (res != 0) {
- log_printf (LOGSYS_LEVEL_ERROR, "couldn't create queue pair");
- return (-1);
- }
-
- mcast_recv_buf_post_initial (instance);
-
- qb_loop_poll_add (
- instance->totemiba_poll_handle,
- QB_LOOP_MED,
- instance->mcast_recv_completion_channel->fd,
- POLLIN, instance, mcast_cq_recv_event_fn);
-
- qb_loop_poll_add (
- instance->totemiba_poll_handle,
- QB_LOOP_MED,
- instance->mcast_send_completion_channel->fd,
- POLLIN, instance, mcast_cq_send_event_fn);
-
- qb_loop_poll_add (
- instance->totemiba_poll_handle,
- QB_LOOP_MED,
- instance->mcast_channel->fd,
- POLLIN, instance, mcast_rdma_event_fn);
-
- return (0);
-}
-
-static void timer_function_netif_check_timeout (
- void *data)
-{
- struct totemiba_instance *instance = (struct totemiba_instance *)data;
- int res;
- int interface_up;
- int interface_num;
- int addr_len;
-
- totemip_iface_check (&instance->totem_interface->bindnet,
- &instance->totem_interface->boundto, &interface_up, &interface_num, instance->totem_config->clear_node_high_bit);
-
- totemip_totemip_to_sockaddr_convert(&instance->totem_interface->boundto,
- instance->totem_interface->ip_port, (struct sockaddr_storage *)&instance->bind_addr,
- &addr_len);
-
- totemip_totemip_to_sockaddr_convert(&instance->totem_interface->boundto,
- 0, (struct sockaddr_storage *)&instance->send_token_bind_addr,
- &addr_len);
-
- totemip_totemip_to_sockaddr_convert(&instance->totem_interface->boundto,
- 0, (struct sockaddr_storage *)&instance->local_mcast_bind_addr,
- &addr_len);
-
- totemip_totemip_to_sockaddr_convert(&instance->totem_interface->boundto,
- instance->totem_interface->ip_port, (struct sockaddr_storage *)&instance->my_id,
- &addr_len);
-
- totemip_sockaddr_to_totemip_convert(
- (const struct sockaddr_storage *)&instance->bind_addr,
- &instance->my_id);
-
- memcpy (&instance->my_id, &instance->totem_interface->boundto,
- sizeof (struct totem_ip_address));
-
- totemip_totemip_to_sockaddr_convert(&instance->totem_interface->mcast_addr,
- instance->totem_interface->ip_port,
- (struct sockaddr_storage *)&instance->mcast_addr, &addr_len);
-
- res = recv_token_bind (instance);
-
- res = mcast_bind (instance);
-}
-
-int totemiba_crypto_set (
- void *iba_context,
- const char *cipher_type,
- const char *hash_type)
-{
- struct totemiba_instance *instance = (struct totemiba_instance *)iba_context;
- int res = 0;
-
- instance = NULL;
-
- return (res);
-}
-
-int totemiba_finalize (
- void *iba_context)
-{
- struct totemiba_instance *instance = (struct totemiba_instance *)iba_context;
- int res = 0;
-
- instance = NULL;
-
- return (res);
-}
-
-/*
- * Create an instance
- */
-int totemiba_initialize (
- qb_loop_t *qb_poll_handle,
- void **iba_context,
- struct totem_config *totem_config,
- totemsrp_stats_t *stats,
- int interface_no,
- void *context,
-
- void (*deliver_fn) (
- void *context,
- const void *msg,
- unsigned int msg_len),
-
- void (*iface_change_fn) (
- void *context,
- const struct totem_ip_address *iface_address),
-
- void (*target_set_completed) (
- void *context))
-{
- struct totemiba_instance *instance;
- int res = 0;
-
- instance = malloc (sizeof (struct totemiba_instance));
- if (instance == NULL) {
- return (-1);
- }
-
- totemiba_instance_initialize (instance);
-
- instance->totem_interface = &totem_config->interfaces[interface_no];
-
- instance->totemiba_poll_handle = qb_poll_handle;
-
- instance->totem_interface->bindnet.nodeid = totem_config->node_id;
-
- instance->totemiba_deliver_fn = deliver_fn;
-
- instance->totemiba_target_set_completed = target_set_completed;
-
- instance->totemiba_iface_change_fn = iface_change_fn;
-
- instance->totem_config = totem_config;
- instance->stats = stats;
-
- instance->rrp_context = context;
-
- qb_loop_timer_add (instance->totemiba_poll_handle,
- QB_LOOP_MED,
- 100*QB_TIME_NS_IN_MSEC,
- (void *)instance,
- timer_function_netif_check_timeout,
- &instance->timer_netif_check_timeout);
-
- instance->totemiba_subsys_id = totem_config->totem_logging_configuration.log_subsys_id;
- instance->totemiba_log_printf = totem_config->totem_logging_configuration.log_printf;
-
- *iba_context = instance;
- return (res);
-}
-
-void *totemiba_buffer_alloc (void)
-{
- return malloc (MAX_MTU_SIZE);
-}
-
-void totemiba_buffer_release (void *ptr)
-{
- return free (ptr);
-}
-
-int totemiba_processor_count_set (
- void *iba_context,
- int processor_count)
-{
- struct totemiba_instance *instance = (struct totemiba_instance *)iba_context;
- int res = 0;
-
- instance = NULL;
-
- return (res);
-}
-
-int totemiba_recv_flush (void *iba_context)
-{
- struct totemiba_instance *instance = (struct totemiba_instance *)iba_context;
- int res = 0;
-
- instance = NULL;
-
- return (res);
-}
-
-int totemiba_send_flush (void *iba_context)
-{
- struct totemiba_instance *instance = (struct totemiba_instance *)iba_context;
- int res = 0;
-
- instance = NULL;
-
- return (res);
-}
-
-int totemiba_token_send (
- void *iba_context,
- const void *ms,
- unsigned int msg_len)
-{
- struct totemiba_instance *instance = (struct totemiba_instance *)iba_context;
- int res = 0;
- struct ibv_send_wr send_wr, *failed_send_wr;
- struct ibv_sge sge;
- void *msg;
- struct send_buf *send_buf;
-
- send_buf = token_send_buf_get (instance);
- if (send_buf == NULL) {
- return (-1);
- }
- msg = send_buf->buffer;
- memcpy (msg, ms, msg_len);
-
- send_wr.next = NULL;
- send_wr.sg_list = &sge;
- send_wr.num_sge = 1;
- send_wr.opcode = IBV_WR_SEND;
- send_wr.send_flags = IBV_SEND_SIGNALED;
- send_wr.wr_id = void2wrid(send_buf);
- send_wr.imm_data = 0;
- send_wr.wr.ud.ah = instance->send_token_ah;
- send_wr.wr.ud.remote_qpn = instance->send_token_qpn;
- send_wr.wr.ud.remote_qkey = instance->send_token_qkey;
-
- sge.length = msg_len;
- sge.lkey = send_buf->mr->lkey;
- sge.addr = (uintptr_t)msg;
-
- if(instance->send_token_ah != 0 && instance->send_token_bound)
- res = ibv_post_send (instance->send_token_cma_id->qp, &send_wr, &failed_send_wr);
-
- return (res);
-}
-
-int totemiba_mcast_flush_send (
- void *iba_context,
- const void *ms,
- unsigned int msg_len)
-{
- struct totemiba_instance *instance = (struct totemiba_instance *)iba_context;
- int res = 0;
- struct ibv_send_wr send_wr, *failed_send_wr;
- struct ibv_sge sge;
- void *msg;
- struct send_buf *send_buf;
-
- send_buf = mcast_send_buf_get (instance);
- if (send_buf == NULL) {
- return (-1);
- }
-
- msg = send_buf->buffer;
- memcpy (msg, ms, msg_len);
- send_wr.next = NULL;
- send_wr.sg_list = &sge;
- send_wr.num_sge = 1;
- send_wr.opcode = IBV_WR_SEND;
- send_wr.send_flags = IBV_SEND_SIGNALED;
- send_wr.wr_id = void2wrid(send_buf);
- send_wr.imm_data = 0;
- send_wr.wr.ud.ah = instance->mcast_ah;
- send_wr.wr.ud.remote_qpn = instance->mcast_qpn;
- send_wr.wr.ud.remote_qkey = instance->mcast_qkey;
-
- sge.length = msg_len;
- sge.lkey = send_buf->mr->lkey;
- sge.addr = (uintptr_t)msg;
-
- if (instance->mcast_ah != 0) {
- res = ibv_post_send (instance->mcast_cma_id->qp, &send_wr, &failed_send_wr);
- }
-
- return (res);
-}
-
-int totemiba_mcast_noflush_send (
- void *iba_context,
- const void *ms,
- unsigned int msg_len)
-{
- struct totemiba_instance *instance = (struct totemiba_instance *)iba_context;
- int res = 0;
- struct ibv_send_wr send_wr, *failed_send_wr;
- struct ibv_sge sge;
- void *msg;
- struct send_buf *send_buf;
-
- send_buf = mcast_send_buf_get (instance);
- if (send_buf == NULL) {
- return (-1);
- }
-
- msg = send_buf->buffer;
- memcpy (msg, ms, msg_len);
- send_wr.next = NULL;
- send_wr.sg_list = &sge;
- send_wr.num_sge = 1;
- send_wr.opcode = IBV_WR_SEND;
- send_wr.send_flags = IBV_SEND_SIGNALED;
- send_wr.wr_id = void2wrid(send_buf);
- send_wr.imm_data = 0;
- send_wr.wr.ud.ah = instance->mcast_ah;
- send_wr.wr.ud.remote_qpn = instance->mcast_qpn;
- send_wr.wr.ud.remote_qkey = instance->mcast_qkey;
-
- sge.length = msg_len;
- sge.lkey = send_buf->mr->lkey;
- sge.addr = (uintptr_t)msg;
-
- if (instance->mcast_ah != 0) {
- res = ibv_post_send (instance->mcast_cma_id->qp, &send_wr, &failed_send_wr);
- }
-
- return (res);
-}
-
-extern int totemiba_iface_check (void *iba_context)
-{
- struct totemiba_instance *instance = (struct totemiba_instance *)iba_context;
- int res = 0;
-
- instance = NULL;
-
- return (res);
-}
-
-extern void totemiba_net_mtu_adjust (void *iba_context, struct totem_config *totem_config)
-{
- struct totemiba_instance *instance = (struct totemiba_instance *)iba_context;
- instance = NULL;
-}
-
-const char *totemiba_iface_print (void *iba_context) {
- struct totemiba_instance *instance = (struct totemiba_instance *)iba_context;
-
- const char *ret_char;
-
- ret_char = totemip_print (&instance->my_id);
-
- return (ret_char);
-}
-
-int totemiba_iface_get (
- void *iba_context,
- struct totem_ip_address *addr)
-{
- struct totemiba_instance *instance = (struct totemiba_instance *)iba_context;
- int res = 0;
-
- memcpy (addr, &instance->my_id, sizeof (struct totem_ip_address));
-
- return (res);
-}
-
-int totemiba_token_target_set (
- void *iba_context,
- const struct totem_ip_address *token_target)
-{
- struct totemiba_instance *instance = (struct totemiba_instance *)iba_context;
- int res = 0;
- int addr_len = 16;
-
- totemip_totemip_to_sockaddr_convert((struct totem_ip_address *)token_target,
- instance->totem_interface->ip_port, (struct sockaddr_storage *)&instance->token_addr,
- &addr_len);
-
- res = send_token_unbind (instance);
-
- res = send_token_bind (instance);
-
- return (res);
-}
-
-extern int totemiba_recv_mcast_empty (
- void *iba_context)
-{
- struct totemiba_instance *instance = (struct totemiba_instance *)iba_context;
- int res = 0;
-
- instance = NULL;
-
- return (res);
-}
-
diff --git a/exec/totemiba.h b/exec/totemiba.h
deleted file mode 100644
index 7e7a689a..00000000
--- a/exec/totemiba.h
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * Copyright (c) 2009-2011 Red Hat, Inc.
- *
- * All rights reserved.
- *
- * Author: Steven Dake (sdake@redhat.com)
- *
- * This software licensed under BSD license, the text of which follows:
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * - Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- * - Neither the name of the MontaVista Software, Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from this
- * software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
- * THE POSSIBILITY OF SUCH DAMAGE.
- */
-#ifndef TOTEMIBA_H_DEFINED
-#define TOTEMIBA_H_DEFINED
-
-#include <sys/types.h>
-#include <sys/socket.h>
-#include <corosync/hdb.h>
-#include <qb/qbloop.h>
-
-#include <corosync/totem/totem.h>
-
-/**
- * Create an instance
- */
-extern int totemiba_initialize (
- qb_loop_t* qb_poll_handle,
- void **iba_handle,
- struct totem_config *totem_config,
- totemsrp_stats_t *stats,
- int interface_no,
- void *context,
-
- void (*deliver_fn) (
- void *context,
- const void *msg,
- unsigned int msg_len),
-
- void (*iface_change_fn) (
- void *context,
- const struct totem_ip_address *iface_address),
-
- void (*target_set_completed) (
- void *context));
-
-extern void *totemiba_buffer_alloc (void);
-
-extern void totemiba_buffer_release (void *ptr);
-
-extern int totemiba_processor_count_set (
- void *iba_context,
- int processor_count);
-
-extern int totemiba_token_send (
- void *iba_context,
- const void *msg,
- unsigned int msg_len);
-
-extern int totemiba_mcast_flush_send (
- void *iba_context,
- const void *msg,
- unsigned int msg_len);
-
-extern int totemiba_mcast_noflush_send (
- void *iba_context,
- const void *msg,
- unsigned int msg_len);
-
-extern int totemiba_recv_flush (void *iba_context);
-
-extern int totemiba_send_flush (void *iba_context);
-
-extern int totemiba_iface_check (void *iba_context);
-
-extern int totemiba_finalize (void *iba_context);
-
-extern void totemiba_net_mtu_adjust (void *iba_context, struct totem_config *totem_config);
-
-extern const char *totemiba_iface_print (void *iba_context);
-
-extern int totemiba_iface_get (
- void *iba_context,
- struct totem_ip_address *addr);
-
-extern int totemiba_token_target_set (
- void *iba_context,
- const struct totem_ip_address *token_target);
-
-extern int totemiba_crypto_set (
- void *iba_context,
- const char *cipher_type,
- const char *hash_type);
-
-extern int totemiba_recv_mcast_empty (
- void *iba_context);
-
-#endif /* TOTEMIBA_H_DEFINED */
diff --git a/exec/totemmrp.c b/exec/totemmrp.c
deleted file mode 100644
index 6166f32c..00000000
--- a/exec/totemmrp.c
+++ /dev/null
@@ -1,286 +0,0 @@
-/*
- * Copyright (c) 2005 MontaVista Software, Inc.
- * Copyright (c) 2006-2007, 2009 Red Hat, Inc.
- *
- * All rights reserved.
- *
- * Author: Steven Dake (sdake@redhat.com)
- *
- * This software licensed under BSD license, the text of which follows:
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * - Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- * - Neither the name of the MontaVista Software, Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from this
- * software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
- * THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <config.h>
-
-#include <sys/mman.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/socket.h>
-#include <netdb.h>
-#include <sys/un.h>
-#include <sys/ioctl.h>
-#include <sys/param.h>
-#include <netinet/in.h>
-#include <arpa/inet.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <errno.h>
-#include <sched.h>
-#include <time.h>
-#include <sys/time.h>
-#include <sys/poll.h>
-
-#include <corosync/totem/totem.h>
-#include <qb/qbloop.h>
-
-#include "totemmrp.h"
-#include "totemsrp.h"
-
-void *totemsrp_context;
-
-void totemmrp_deliver_fn (
- unsigned int nodeid,
- const void *msg,
- unsigned int msg_len,
- int endian_conversion_required);
-
-void totemmrp_confchg_fn (
- enum totem_configuration_type configuration_type,
- const unsigned int *member_list, size_t member_list_entries,
- const unsigned int *left_list, size_t left_list_entries,
- const unsigned int *joined_list, size_t joined_list_entries,
- const struct memb_ring_id *ring_id);
-
-void (*pg_deliver_fn) (
- unsigned int nodeid,
- const void *msg,
- unsigned int msg_len,
- int endian_conversion_required) = 0;
-
-void (*pg_confchg_fn) (
- enum totem_configuration_type configuration_type,
- const unsigned int *member_list, size_t member_list_entries,
- const unsigned int *left_list, size_t left_list_entries,
- const unsigned int *joined_list, size_t joined_list_entries,
- const struct memb_ring_id *ring_id) = 0;
-
-void totemmrp_deliver_fn (
- unsigned int nodeid,
- const void *msg,
- unsigned int msg_len,
- int endian_conversion_required)
-{
- pg_deliver_fn (nodeid, msg, msg_len, endian_conversion_required);
-}
-
-void totemmrp_confchg_fn (
- enum totem_configuration_type configuration_type,
- const unsigned int *member_list, size_t member_list_entries,
- const unsigned int *left_list, size_t left_list_entries,
- const unsigned int *joined_list, size_t joined_list_entries,
- const struct memb_ring_id *ring_id)
-{
- pg_confchg_fn (configuration_type,
- member_list, member_list_entries,
- left_list, left_list_entries,
- joined_list, joined_list_entries,
- ring_id);
-}
-
-/*
- * Initialize the totem multiple ring protocol
- */
-int totemmrp_initialize (
- qb_loop_t *poll_handle,
- struct totem_config *totem_config,
- totempg_stats_t *stats,
-
- void (*deliver_fn) (
- unsigned int nodeid,
- const void *msg,
- unsigned int msg_len,
- int endian_conversion_required),
- void (*confchg_fn) (
- enum totem_configuration_type configuration_type,
- const unsigned int *member_list, size_t member_list_entries,
- const unsigned int *left_list, size_t left_list_entries,
- const unsigned int *joined_list, size_t joined_list_entries,
- const struct memb_ring_id *ring_id),
- void (*waiting_trans_ack_cb_fn) (
- int waiting_trans_ack))
-{
- int result;
- pg_deliver_fn = deliver_fn;
- pg_confchg_fn = confchg_fn;
-
- stats->mrp = calloc (sizeof(totemmrp_stats_t), 1);
- result = totemsrp_initialize (
- poll_handle,
- &totemsrp_context,
- totem_config,
- stats->mrp,
- totemmrp_deliver_fn,
- totemmrp_confchg_fn,
- waiting_trans_ack_cb_fn);
-
- return (result);
-}
-
-void totemmrp_finalize (void)
-{
- totemsrp_finalize (totemsrp_context);
-}
-
-/*
- * Multicast a message
- */
-int totemmrp_mcast (
- struct iovec *iovec,
- unsigned int iov_len,
- int priority)
-{
- return totemsrp_mcast (totemsrp_context, iovec, iov_len, priority);
-}
-
-/*
- * Return number of available messages that can be queued
- */
-int totemmrp_avail (void)
-{
- return (totemsrp_avail (totemsrp_context));
-}
-
-int totemmrp_callback_token_create (
- void **handle_out,
- enum totem_callback_token_type type,
- int delete,
- int (*callback_fn) (enum totem_callback_token_type type, const void *),
- const void *data)
-{
- return totemsrp_callback_token_create (totemsrp_context, handle_out, type, delete, callback_fn, data);
-}
-
-void totemmrp_callback_token_destroy (
- void *handle_out)
-{
- totemsrp_callback_token_destroy (totemsrp_context, handle_out);
-}
-
-void totemmrp_event_signal (enum totem_event_type type, int value)
-{
- totemsrp_event_signal (totemsrp_context, type, value);
-}
-
-int totemmrp_ifaces_get (
- unsigned int nodeid,
- struct totem_ip_address *interfaces,
- unsigned int interfaces_size,
- char ***status,
- unsigned int *iface_count)
-{
- int res;
-
- res = totemsrp_ifaces_get (
- totemsrp_context,
- nodeid,
- interfaces,
- interfaces_size,
- status,
- iface_count);
-
- return (res);
-}
-
-int totemmrp_crypto_set (
- const char *cipher_type,
- const char *hash_type)
-{
- return totemsrp_crypto_set (totemsrp_context,
- cipher_type,
- hash_type);
-}
-
-unsigned int totemmrp_my_nodeid_get (void)
-{
- return (totemsrp_my_nodeid_get (totemsrp_context));
-}
-
-int totemmrp_my_family_get (void)
-{
- return (totemsrp_my_family_get (totemsrp_context));
-}
-
-extern int totemmrp_ring_reenable (void)
-{
- int res;
-
- res = totemsrp_ring_reenable (
- totemsrp_context);
-
- return (res);
-}
-
-extern void totemmrp_service_ready_register (
- void (*totem_service_ready) (void))
-{
- totemsrp_service_ready_register (
- totemsrp_context,
- totem_service_ready);
-}
-
-int totemmrp_member_add (
- const struct totem_ip_address *member,
- int ring_no)
-{
- int res;
-
- res = totemsrp_member_add (totemsrp_context, member, ring_no);
-
- return (res);
-}
-
-int totemmrp_member_remove (
- const struct totem_ip_address *member,
- int ring_no)
-{
- int res;
-
- res = totemsrp_member_remove (totemsrp_context, member, ring_no);
-
- return (res);
-}
-
-void totemmrp_threaded_mode_enable (void)
-{
- totemsrp_threaded_mode_enable (totemsrp_context);
-}
-
-void totemmrp_trans_ack (void)
-{
- totemsrp_trans_ack (totemsrp_context);
-}
diff --git a/exec/totemmrp.h b/exec/totemmrp.h
deleted file mode 100644
index 2988c8f0..00000000
--- a/exec/totemmrp.h
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * Copyright (c) 2005 MontaVista Software, Inc.
- * Copyright (c) 2006-2011 Red Hat, Inc.
- *
- * All rights reserved.
- *
- * Author: Steven Dake (sdake@redhat.com)
- *
- * This software licensed under BSD license, the text of which follows:
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * - Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- * - Neither the name of the MontaVista Software, Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from this
- * software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
- * THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/**
- * @file
- * Totem Single Ring Protocol
- *
- * depends on poll abstraction, POSIX, IPV4
- */
-
-#ifndef TOTEMMRP_H_DEFINED
-#define TOTEMMRP_H_DEFINED
-
-#include <corosync/totem/totem.h>
-
-/**
- * Initialize the logger
- */
-extern void totemmrp_log_printf_init (
- void (*log_printf) (int , char *, ...),
- int log_level_security,
- int log_level_error,
- int log_level_warning,
- int log_level_notice,
- int log_level_debug);
-
-/**
- * Initialize the group messaging interface
- */
-extern int totemmrp_initialize (
- qb_loop_t *poll_handle,
- struct totem_config *totem_config,
- totempg_stats_t *stats,
-
- void (*deliver_fn) (
- unsigned int nodeid,
- const void *msg,
- unsigned int msg_len,
- int endian_conversion_required),
- void (*confchg_fn) (
- enum totem_configuration_type configuration_type,
- const unsigned int *member_list, size_t member_list_entries,
- const unsigned int *left_list, size_t left_list_entries,
- const unsigned int *joined_list, size_t joined_list_entries,
- const struct memb_ring_id *ring_id),
- void (*waiting_trans_ack_cb_fn) (
- int waiting_trans_ack));
-
-extern void totemmrp_finalize (void);
-
-/**
- * Multicast a message
- */
-extern int totemmrp_mcast (
- struct iovec *iovec,
- unsigned int iov_len,
- int priority);
-
-/**
- * Return number of available messages that can be queued
- */
-extern int totemmrp_avail (void);
-
-extern int totemmrp_callback_token_create (
- void **handle_out,
- enum totem_callback_token_type type,
- int delete,
- int (*callback_fn) (enum totem_callback_token_type type, const void *),
- const void *data);
-
-extern void totemmrp_callback_token_destroy (
- void *handle_out);
-
-extern void totemmrp_event_signal (enum totem_event_type type, int value);
-
-extern int totemmrp_ifaces_get (
- unsigned int nodeid,
- struct totem_ip_address *interfaces,
- unsigned int interfaces_size,
- char ***status,
- unsigned int *iface_count);
-
-extern unsigned int totemmrp_my_nodeid_get (void);
-
-extern int totemmrp_my_family_get (void);
-
-extern int totemmrp_crypto_set (const char *cipher_type, const char *hash_type);
-
-extern int totemmrp_ring_reenable (void);
-
-extern void totemmrp_service_ready_register (
- void (*totem_service_ready) (void));
-
-extern int totemmrp_member_add (
- const struct totem_ip_address *member,
- int ring_no);
-
-extern int totemmrp_member_remove (
- const struct totem_ip_address *member,
- int ring_no);
-
-void totemmrp_threaded_mode_enable (void);
-
-void totemmrp_trans_ack (void);
-
-#endif /* TOTEMMRP_H_DEFINED */
diff --git a/exec/totemnet.c b/exec/totemnet.c
index 059ae77b..c24fdbc6 100644
--- a/exec/totemnet.c
+++ b/exec/totemnet.c
@@ -1,518 +1,525 @@
/*
* Copyright (c) 2005 MontaVista Software, Inc.
* Copyright (c) 2006-2012 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Steven Dake (sdake@redhat.com)
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <config.h>
#include <assert.h>
#ifdef HAVE_RDMA
#include <totemiba.h>
#endif
#include <totemudp.h>
#include <totemudpu.h>
+#include <totemknet.h>
#include <totemnet.h>
#include <qb/qbloop.h>
#define LOGSYS_UTILS_ONLY 1
#include <corosync/logsys.h>
struct transport {
const char *name;
-
+
int (*initialize) (
qb_loop_t *loop_pt,
void **transport_instance,
struct totem_config *totem_config,
totemsrp_stats_t *stats,
- int interface_no,
void *context,
void (*deliver_fn) (
void *context,
const void *msg,
unsigned int msg_len),
void (*iface_change_fn) (
void *context,
- const struct totem_ip_address *iface_address),
+ const struct totem_ip_address *iface_address,
+ unsigned int ring_no),
+
+ void (*mtu_changed) (
+ void *context,
+ int net_mtu),
void (*target_set_completed) (
void *context));
void *(*buffer_alloc) (void);
void (*buffer_release) (void *ptr);
int (*processor_count_set) (
void *transport_context,
int processor_count);
int (*token_send) (
void *transport_context,
const void *msg,
unsigned int msg_len);
int (*mcast_flush_send) (
void *transport_context,
const void *msg,
unsigned int msg_len);
int (*mcast_noflush_send) (
void *transport_context,
const void *msg,
unsigned int msg_len);
int (*recv_flush) (void *transport_context);
int (*send_flush) (void *transport_context);
int (*iface_check) (void *transport_context);
int (*finalize) (void *transport_context);
void (*net_mtu_adjust) (void *transport_context, struct totem_config *totem_config);
const char *(*iface_print) (void *transport_context);
- int (*iface_get) (
+ int (*ifaces_get) (
void *transport_context,
- struct totem_ip_address *addr);
+ char ***status,
+ unsigned int *iface_count);
int (*token_target_set) (
void *transport_context,
const struct totem_ip_address *token_target);
int (*crypto_set) (
void *transport_context,
const char *cipher_type,
const char *hash_type);
int (*recv_mcast_empty) (
void *transport_context);
int (*member_add) (
void *transport_context,
- const struct totem_ip_address *member);
+ const struct totem_ip_address *local,
+ const struct totem_ip_address *member,
+ int ring_no);
int (*member_remove) (
void *transport_context,
- const struct totem_ip_address *member);
+ const struct totem_ip_address *member,
+ int ring_no);
int (*member_set_active) (
void *transport_context,
const struct totem_ip_address *member,
int active);
};
struct transport transport_entries[] = {
{
.name = "UDP/IP Multicast",
.initialize = totemudp_initialize,
.buffer_alloc = totemudp_buffer_alloc,
.buffer_release = totemudp_buffer_release,
.processor_count_set = totemudp_processor_count_set,
.token_send = totemudp_token_send,
.mcast_flush_send = totemudp_mcast_flush_send,
.mcast_noflush_send = totemudp_mcast_noflush_send,
.recv_flush = totemudp_recv_flush,
.send_flush = totemudp_send_flush,
.iface_check = totemudp_iface_check,
.finalize = totemudp_finalize,
.net_mtu_adjust = totemudp_net_mtu_adjust,
- .iface_print = totemudp_iface_print,
- .iface_get = totemudp_iface_get,
+ .ifaces_get = totemudp_ifaces_get,
.token_target_set = totemudp_token_target_set,
.crypto_set = totemudp_crypto_set,
.recv_mcast_empty = totemudp_recv_mcast_empty
},
{
.name = "UDP/IP Unicast",
.initialize = totemudpu_initialize,
.buffer_alloc = totemudpu_buffer_alloc,
.buffer_release = totemudpu_buffer_release,
.processor_count_set = totemudpu_processor_count_set,
.token_send = totemudpu_token_send,
.mcast_flush_send = totemudpu_mcast_flush_send,
.mcast_noflush_send = totemudpu_mcast_noflush_send,
.recv_flush = totemudpu_recv_flush,
.send_flush = totemudpu_send_flush,
.iface_check = totemudpu_iface_check,
.finalize = totemudpu_finalize,
.net_mtu_adjust = totemudpu_net_mtu_adjust,
- .iface_print = totemudpu_iface_print,
- .iface_get = totemudpu_iface_get,
+ .ifaces_get = totemudpu_ifaces_get,
.token_target_set = totemudpu_token_target_set,
.crypto_set = totemudpu_crypto_set,
.recv_mcast_empty = totemudpu_recv_mcast_empty,
.member_add = totemudpu_member_add,
.member_remove = totemudpu_member_remove,
- .member_set_active = totemudpu_member_set_active
},
-#ifdef HAVE_RDMA
{
- .name = "Infiniband/IP",
- .initialize = totemiba_initialize,
- .buffer_alloc = totemiba_buffer_alloc,
- .buffer_release = totemiba_buffer_release,
- .processor_count_set = totemiba_processor_count_set,
- .token_send = totemiba_token_send,
- .mcast_flush_send = totemiba_mcast_flush_send,
- .mcast_noflush_send = totemiba_mcast_noflush_send,
- .recv_flush = totemiba_recv_flush,
- .send_flush = totemiba_send_flush,
- .iface_check = totemiba_iface_check,
- .finalize = totemiba_finalize,
- .net_mtu_adjust = totemiba_net_mtu_adjust,
- .iface_print = totemiba_iface_print,
- .iface_get = totemiba_iface_get,
- .token_target_set = totemiba_token_target_set,
- .crypto_set = totemiba_crypto_set,
- .recv_mcast_empty = totemiba_recv_mcast_empty
+ .name = "Kronosnet",
+ .initialize = totemknet_initialize,
+ .buffer_alloc = totemknet_buffer_alloc,
+ .buffer_release = totemknet_buffer_release,
+ .processor_count_set = totemknet_processor_count_set,
+ .token_send = totemknet_token_send,
+ .mcast_flush_send = totemknet_mcast_flush_send,
+ .mcast_noflush_send = totemknet_mcast_noflush_send,
+ .recv_flush = totemknet_recv_flush,
+ .send_flush = totemknet_send_flush,
+ .iface_check = totemknet_iface_check,
+ .finalize = totemknet_finalize,
+ .net_mtu_adjust = totemknet_net_mtu_adjust,
+ .ifaces_get = totemknet_ifaces_get,
+ .token_target_set = totemknet_token_target_set,
+ .crypto_set = totemknet_crypto_set,
+ .recv_mcast_empty = totemknet_recv_mcast_empty,
+ .member_add = totemknet_member_add,
+ .member_remove = totemknet_member_remove,
}
-#endif
};
-
+
struct totemnet_instance {
void *transport_context;
struct transport *transport;
-
void (*totemnet_log_printf) (
int level,
int subsys,
const char *function,
const char *file,
int line,
const char *format,
...)__attribute__((format(printf, 6, 7)));
int totemnet_subsys_id;
};
#define log_printf(level, format, args...) \
do { \
instance->totemnet_log_printf ( \
level, \
instance->totemnet_subsys_id, \
__FUNCTION__, __FILE__, __LINE__, \
(const char *)format, ##args); \
} while (0);
static void totemnet_instance_initialize (
struct totemnet_instance *instance,
struct totem_config *config)
{
int transport;
instance->totemnet_log_printf = config->totem_logging_configuration.log_printf;
instance->totemnet_subsys_id = config->totem_logging_configuration.log_subsys_id;
transport = config->transport_number;
log_printf (LOGSYS_LEVEL_NOTICE,
"Initializing transport (%s).", transport_entries[transport].name);
instance->transport = &transport_entries[transport];
}
int totemnet_crypto_set (
void *net_context,
const char *cipher_type,
const char *hash_type)
{
struct totemnet_instance *instance = (struct totemnet_instance *)net_context;
int res = 0;
res = instance->transport->crypto_set (instance->transport_context,
cipher_type, hash_type);
return res;
}
int totemnet_finalize (
void *net_context)
{
struct totemnet_instance *instance = (struct totemnet_instance *)net_context;
int res = 0;
res = instance->transport->finalize (instance->transport_context);
return (res);
}
int totemnet_initialize (
qb_loop_t *loop_pt,
void **net_context,
struct totem_config *totem_config,
totemsrp_stats_t *stats,
- int interface_no,
void *context,
void (*deliver_fn) (
void *context,
const void *msg,
unsigned int msg_len),
void (*iface_change_fn) (
void *context,
- const struct totem_ip_address *iface_address),
+ const struct totem_ip_address *iface_address,
+ unsigned int ring_no),
+
+ void (*mtu_changed) (
+ void *context,
+ int net_mtu),
void (*target_set_completed) (
void *context))
{
struct totemnet_instance *instance;
unsigned int res;
instance = malloc (sizeof (struct totemnet_instance));
if (instance == NULL) {
return (-1);
}
totemnet_instance_initialize (instance, totem_config);
res = instance->transport->initialize (loop_pt,
&instance->transport_context, totem_config, stats,
- interface_no, context, deliver_fn, iface_change_fn, target_set_completed);
+ context, deliver_fn, iface_change_fn, mtu_changed, target_set_completed);
if (res == -1) {
goto error_destroy;
}
*net_context = instance;
return (0);
error_destroy:
free (instance);
return (-1);
}
void *totemnet_buffer_alloc (void *net_context)
{
struct totemnet_instance *instance = net_context;
assert (instance != NULL);
assert (instance->transport != NULL);
return instance->transport->buffer_alloc();
}
void totemnet_buffer_release (void *net_context, void *ptr)
{
struct totemnet_instance *instance = net_context;
assert (instance != NULL);
assert (instance->transport != NULL);
instance->transport->buffer_release (ptr);
}
int totemnet_processor_count_set (
void *net_context,
int processor_count)
{
struct totemnet_instance *instance = (struct totemnet_instance *)net_context;
int res = 0;
res = instance->transport->processor_count_set (instance->transport_context, processor_count);
return (res);
}
int totemnet_recv_flush (void *net_context)
{
struct totemnet_instance *instance = (struct totemnet_instance *)net_context;
int res = 0;
res = instance->transport->recv_flush (instance->transport_context);
return (res);
}
int totemnet_send_flush (void *net_context)
{
struct totemnet_instance *instance = (struct totemnet_instance *)net_context;
int res = 0;
res = instance->transport->send_flush (instance->transport_context);
return (res);
}
int totemnet_token_send (
void *net_context,
const void *msg,
unsigned int msg_len)
{
struct totemnet_instance *instance = (struct totemnet_instance *)net_context;
int res = 0;
res = instance->transport->token_send (instance->transport_context, msg, msg_len);
return (res);
}
int totemnet_mcast_flush_send (
void *net_context,
const void *msg,
unsigned int msg_len)
{
struct totemnet_instance *instance = (struct totemnet_instance *)net_context;
int res = 0;
res = instance->transport->mcast_flush_send (instance->transport_context, msg, msg_len);
return (res);
}
int totemnet_mcast_noflush_send (
void *net_context,
const void *msg,
unsigned int msg_len)
{
struct totemnet_instance *instance = (struct totemnet_instance *)net_context;
int res = 0;
res = instance->transport->mcast_noflush_send (instance->transport_context, msg, msg_len);
return (res);
}
extern int totemnet_iface_check (void *net_context)
{
struct totemnet_instance *instance = (struct totemnet_instance *)net_context;
int res = 0;
res = instance->transport->iface_check (instance->transport_context);
return (res);
}
extern int totemnet_net_mtu_adjust (void *net_context, struct totem_config *totem_config)
{
struct totemnet_instance *instance = (struct totemnet_instance *)net_context;
int res = 0;
instance->transport->net_mtu_adjust (instance->transport_context, totem_config);
return (res);
}
-const char *totemnet_iface_print (void *net_context) {
- struct totemnet_instance *instance = (struct totemnet_instance *)net_context;
- const char *ret_char;
-
- ret_char = instance->transport->iface_print (instance->transport_context);
- return (ret_char);
-}
-
-int totemnet_iface_get (
+int totemnet_ifaces_get (
void *net_context,
- struct totem_ip_address *addr)
+ char ***status,
+ unsigned int *iface_count)
{
struct totemnet_instance *instance = (struct totemnet_instance *)net_context;
unsigned int res;
- res = instance->transport->iface_get (instance->transport_context, addr);
-
+ res = instance->transport->ifaces_get (instance->transport_context, status, iface_count);
+
return (res);
}
int totemnet_token_target_set (
void *net_context,
const struct totem_ip_address *token_target)
{
struct totemnet_instance *instance = (struct totemnet_instance *)net_context;
unsigned int res;
res = instance->transport->token_target_set (instance->transport_context, token_target);
return (res);
}
extern int totemnet_recv_mcast_empty (
void *net_context)
{
struct totemnet_instance *instance = (struct totemnet_instance *)net_context;
unsigned int res;
res = instance->transport->recv_mcast_empty (instance->transport_context);
return (res);
}
extern int totemnet_member_add (
void *net_context,
- const struct totem_ip_address *member)
+ const struct totem_ip_address *local,
+ const struct totem_ip_address *member,
+ int ring_no)
{
struct totemnet_instance *instance = (struct totemnet_instance *)net_context;
unsigned int res = 0;
if (instance->transport->member_add) {
res = instance->transport->member_add (
instance->transport_context,
- member);
+ local,
+ member,
+ ring_no);
}
return (res);
}
extern int totemnet_member_remove (
void *net_context,
- const struct totem_ip_address *member)
+ const struct totem_ip_address *member,
+ int ring_no)
{
struct totemnet_instance *instance = (struct totemnet_instance *)net_context;
unsigned int res = 0;
if (instance->transport->member_remove) {
res = instance->transport->member_remove (
instance->transport_context,
- member);
+ member,
+ ring_no);
}
return (res);
}
int totemnet_member_set_active (
void *net_context,
const struct totem_ip_address *member,
int active)
{
struct totemnet_instance *instance = (struct totemnet_instance *)net_context;
unsigned int res = 0;
if (instance->transport->member_set_active) {
res = instance->transport->member_set_active (
instance->transport_context,
member,
active);
}
return (res);
}
diff --git a/exec/totemnet.h b/exec/totemnet.h
index cede2147..6f944b8a 100644
--- a/exec/totemnet.h
+++ b/exec/totemnet.h
@@ -1,141 +1,149 @@
/*
* Copyright (c) 2005 MontaVista Software, Inc.
* Copyright (c) 2006-2007, 2009 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Steven Dake (sdake@redhat.com)
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
/**
* @file
* Totem Network interface - also does encryption/decryption
*
* depends on poll abstraction, POSIX, IPV4
*/
#ifndef TOTEMNET_H_DEFINED
#define TOTEMNET_H_DEFINED
#include <sys/types.h>
#include <sys/socket.h>
#include <corosync/totem/totem.h>
#define TOTEMNET_NOFLUSH 0
#define TOTEMNET_FLUSH 1
/**
* Create an instance
*/
extern int totemnet_initialize (
qb_loop_t *poll_handle,
void **net_context,
struct totem_config *totem_config,
totemsrp_stats_t *stats,
- int interface_no,
void *context,
void (*deliver_fn) (
void *context,
const void *msg,
unsigned int msg_len),
void (*iface_change_fn) (
void *context,
- const struct totem_ip_address *iface_address),
+ const struct totem_ip_address *iface_address,
+ unsigned int iface_no),
+
+ void (*mtu_changed) (
+ void *context,
+ int net_mtu),
void (*target_set_completed) (
void *context));
extern void *totemnet_buffer_alloc (void *net_context);
extern void totemnet_buffer_release (void *net_context, void *ptr);
extern int totemnet_processor_count_set (
void *net_context,
int processor_count);
extern int totemnet_token_send (
void *net_context,
const void *msg,
unsigned int msg_len);
extern int totemnet_mcast_flush_send (
void *net_context,
const void *msg,
unsigned int msg_len);
extern int totemnet_mcast_noflush_send (
void *net_context,
const void *msg,
unsigned int msg_len);
extern int totemnet_recv_flush (void *net_context);
extern int totemnet_send_flush (void *net_context);
extern int totemnet_iface_check (void *net_context);
extern int totemnet_finalize (void *net_context);
extern int totemnet_net_mtu_adjust (void *net_context, struct totem_config *totem_config);
extern const char *totemnet_iface_print (void *net_context);
-extern int totemnet_iface_get (
+extern int totemnet_ifaces_get (
void *net_context,
- struct totem_ip_address *addr);
+ char ***status,
+ unsigned int *iface_count);
extern int totemnet_token_target_set (
void *net_context,
const struct totem_ip_address *token_target);
extern int totemnet_crypto_set (
void *net_context,
const char *cipher_type,
const char *hash_type);
extern int totemnet_recv_mcast_empty (
void *net_context);
extern int totemnet_member_add (
void *net_context,
- const struct totem_ip_address *member);
+ const struct totem_ip_address *local,
+ const struct totem_ip_address *member,
+ int ring_no);
extern int totemnet_member_remove (
void *net_context,
- const struct totem_ip_address *member);
+ const struct totem_ip_address *member,
+ int ring_no);
extern int totemnet_member_set_active (
void *net_context,
const struct totem_ip_address *member,
int active);
#endif /* TOTEMNET_H_DEFINED */
diff --git a/exec/totempg.c b/exec/totempg.c
index 0b467827..927afa85 100644
--- a/exec/totempg.c
+++ b/exec/totempg.c
@@ -1,1534 +1,1537 @@
/*
* Copyright (c) 2003-2005 MontaVista Software, Inc.
* Copyright (c) 2005 OSDL.
* Copyright (c) 2006-2012 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Steven Dake (sdake@redhat.com)
* Author: Mark Haverkamp (markh@osdl.org)
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
* FRAGMENTATION AND PACKING ALGORITHM:
*
* Assemble the entire message into one buffer
* if full fragment
* store fragment into lengths list
* for each full fragment
* multicast fragment
* set length and fragment fields of pg mesage
* store remaining multicast into head of fragmentation data and set lens field
*
* If a message exceeds the maximum packet size allowed by the totem
* single ring protocol, the protocol could lose forward progress.
* Statically calculating the allowed data amount doesn't work because
* the amount of data allowed depends on the number of fragments in
* each message. In this implementation, the maximum fragment size
* is dynamically calculated for each fragment added to the message.
* It is possible for a message to be two bytes short of the maximum
* packet size. This occurs when a message or collection of
* messages + the mcast header + the lens are two bytes short of the
* end of the packet. Since another len field consumes two bytes, the
* len field would consume the rest of the packet without room for data.
*
* One optimization would be to forgo the final len field and determine
* it from the size of the udp datagram. Then this condition would no
* longer occur.
*/
/*
* ASSEMBLY AND UNPACKING ALGORITHM:
*
* copy incoming packet into assembly data buffer indexed by current
* location of end of fragment
*
* if not fragmented
* deliver all messages in assembly data buffer
* else
* if msg_count > 1 and fragmented
* deliver all messages except last message in assembly data buffer
* copy last fragmented section to start of assembly data buffer
* else
* if msg_count = 1 and fragmented
* do nothing
*
*/
#include <config.h>
#ifdef HAVE_ALLOCA_H
#include <alloca.h>
#endif
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <sys/uio.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <pthread.h>
#include <errno.h>
#include <limits.h>
#include <corosync/swab.h>
#include <corosync/list.h>
#include <qb/qbloop.h>
#include <qb/qbipcs.h>
#include <corosync/totem/totempg.h>
#define LOGSYS_UTILS_ONLY 1
#include <corosync/logsys.h>
-#include "totemmrp.h"
#include "totemsrp.h"
#define min(a,b) ((a) < (b)) ? a : b
struct totempg_mcast_header {
short version;
short type;
};
#if !(defined(__i386__) || defined(__x86_64__))
/*
* Need align on architectures different then i386 or x86_64
*/
#define TOTEMPG_NEED_ALIGN 1
#endif
/*
* totempg_mcast structure
*
* header: Identify the mcast.
* fragmented: Set if this message continues into next message
* continuation: Set if this message is a continuation from last message
* msg_count Indicates how many packed messages are contained
* in the mcast.
* Also, the size of each packed message and the messages themselves are
* appended to the end of this structure when sent.
*/
struct totempg_mcast {
struct totempg_mcast_header header;
unsigned char fragmented;
unsigned char continuation;
unsigned short msg_count;
/*
* short msg_len[msg_count];
*/
/*
* data for messages
*/
};
/*
* Maximum packet size for totem pg messages
*/
#define TOTEMPG_PACKET_SIZE (totempg_totem_config->net_mtu - \
sizeof (struct totempg_mcast))
/*
* Local variables used for packing small messages
*/
static unsigned short mcast_packed_msg_lens[FRAME_SIZE_MAX];
static int mcast_packed_msg_count = 0;
static int totempg_reserved = 1;
static unsigned int totempg_size_limit;
static totem_queue_level_changed_fn totem_queue_level_changed = NULL;
static uint32_t totempg_threaded_mode = 0;
+static void *totemsrp_context;
+
/*
* Function and data used to log messages
*/
static int totempg_log_level_security;
static int totempg_log_level_error;
static int totempg_log_level_warning;
static int totempg_log_level_notice;
static int totempg_log_level_debug;
static int totempg_subsys_id;
static void (*totempg_log_printf) (
int level,
int subsys,
const char *function,
const char *file,
int line,
const char *format, ...) __attribute__((format(printf, 6, 7)));
struct totem_config *totempg_totem_config;
static totempg_stats_t totempg_stats;
enum throw_away_mode {
THROW_AWAY_INACTIVE,
THROW_AWAY_ACTIVE
};
struct assembly {
unsigned int nodeid;
unsigned char data[MESSAGE_SIZE_MAX];
int index;
unsigned char last_frag_num;
enum throw_away_mode throw_away_mode;
struct list_head list;
};
static void assembly_deref (struct assembly *assembly);
static int callback_token_received_fn (enum totem_callback_token_type type,
const void *data);
DECLARE_LIST_INIT(assembly_list_inuse);
/*
* Free list is used both for transitional and operational assemblies
*/
DECLARE_LIST_INIT(assembly_list_free);
DECLARE_LIST_INIT(assembly_list_inuse_trans);
DECLARE_LIST_INIT(totempg_groups_list);
/*
* Staging buffer for packed messages. Messages are staged in this buffer
* before sending. Multiple messages may fit which cuts down on the
* number of mcasts sent. If a message doesn't completely fit, then
* the mcast header has a fragment bit set that says that there are more
* data to follow. fragment_size is an index into the buffer. It indicates
* the size of message data and where to place new message data.
* fragment_contuation indicates whether the first packed message in
* the buffer is a continuation of a previously packed fragment.
*/
static unsigned char *fragmentation_data;
static int fragment_size = 0;
static int fragment_continuation = 0;
static int totempg_waiting_transack = 0;
struct totempg_group_instance {
void (*deliver_fn) (
unsigned int nodeid,
const void *msg,
unsigned int msg_len,
int endian_conversion_required);
void (*confchg_fn) (
enum totem_configuration_type configuration_type,
const unsigned int *member_list, size_t member_list_entries,
const unsigned int *left_list, size_t left_list_entries,
const unsigned int *joined_list, size_t joined_list_entries,
const struct memb_ring_id *ring_id);
struct totempg_group *groups;
int groups_cnt;
int32_t q_level;
struct list_head list;
};
static unsigned char next_fragment = 1;
static pthread_mutex_t totempg_mutex = PTHREAD_MUTEX_INITIALIZER;
static pthread_mutex_t callback_token_mutex = PTHREAD_MUTEX_INITIALIZER;
static pthread_mutex_t mcast_msg_mutex = PTHREAD_MUTEX_INITIALIZER;
#define log_printf(level, format, args...) \
do { \
totempg_log_printf(level, \
totempg_subsys_id, \
__FUNCTION__, __FILE__, __LINE__, \
format, ##args); \
} while (0);
static int msg_count_send_ok (int msg_count);
static int byte_count_send_ok (int byte_count);
static void totempg_waiting_trans_ack_cb (int waiting_trans_ack)
{
log_printf(LOG_DEBUG, "waiting_trans_ack changed to %u", waiting_trans_ack);
totempg_waiting_transack = waiting_trans_ack;
}
static struct assembly *assembly_ref (unsigned int nodeid)
{
struct assembly *assembly;
struct list_head *list;
struct list_head *active_assembly_list_inuse;
if (totempg_waiting_transack) {
active_assembly_list_inuse = &assembly_list_inuse_trans;
} else {
active_assembly_list_inuse = &assembly_list_inuse;
}
/*
* Search inuse list for node id and return assembly buffer if found
*/
for (list = active_assembly_list_inuse->next;
list != active_assembly_list_inuse;
list = list->next) {
assembly = list_entry (list, struct assembly, list);
if (nodeid == assembly->nodeid) {
return (assembly);
}
}
/*
* Nothing found in inuse list get one from free list if available
*/
if (list_empty (&assembly_list_free) == 0) {
assembly = list_entry (assembly_list_free.next, struct assembly, list);
list_del (&assembly->list);
list_add (&assembly->list, active_assembly_list_inuse);
assembly->nodeid = nodeid;
assembly->index = 0;
assembly->last_frag_num = 0;
assembly->throw_away_mode = THROW_AWAY_INACTIVE;
return (assembly);
}
/*
* Nothing available in inuse or free list, so allocate a new one
*/
assembly = malloc (sizeof (struct assembly));
/*
* TODO handle memory allocation failure here
*/
assert (assembly);
assembly->nodeid = nodeid;
assembly->data[0] = 0;
assembly->index = 0;
assembly->last_frag_num = 0;
assembly->throw_away_mode = THROW_AWAY_INACTIVE;
list_init (&assembly->list);
list_add (&assembly->list, active_assembly_list_inuse);
return (assembly);
}
static void assembly_deref (struct assembly *assembly)
{
list_del (&assembly->list);
list_add (&assembly->list, &assembly_list_free);
}
static void assembly_deref_from_normal_and_trans (int nodeid)
{
int j;
struct list_head *list, *list_next;
struct list_head *active_assembly_list_inuse;
struct assembly *assembly;
for (j = 0; j < 2; j++) {
if (j == 0) {
active_assembly_list_inuse = &assembly_list_inuse;
} else {
active_assembly_list_inuse = &assembly_list_inuse_trans;
}
for (list = active_assembly_list_inuse->next;
list != active_assembly_list_inuse;
list = list_next) {
list_next = list->next;
assembly = list_entry (list, struct assembly, list);
if (nodeid == assembly->nodeid) {
list_del (&assembly->list);
list_add (&assembly->list, &assembly_list_free);
}
}
}
}
static inline void app_confchg_fn (
enum totem_configuration_type configuration_type,
const unsigned int *member_list, size_t member_list_entries,
const unsigned int *left_list, size_t left_list_entries,
const unsigned int *joined_list, size_t joined_list_entries,
const struct memb_ring_id *ring_id)
{
int i;
struct totempg_group_instance *instance;
struct list_head *list;
/*
* For every leaving processor, add to free list
* This also has the side effect of clearing out the dataset
* In the leaving processor's assembly buffer.
*/
for (i = 0; i < left_list_entries; i++) {
assembly_deref_from_normal_and_trans (left_list[i]);
}
for (list = totempg_groups_list.next;
list != &totempg_groups_list;
list = list->next) {
instance = list_entry (list, struct totempg_group_instance, list);
if (instance->confchg_fn) {
instance->confchg_fn (
configuration_type,
member_list,
member_list_entries,
left_list,
left_list_entries,
joined_list,
joined_list_entries,
ring_id);
}
}
}
static inline void group_endian_convert (
void *msg,
int msg_len)
{
unsigned short *group_len;
int i;
char *aligned_msg;
#ifdef TOTEMPG_NEED_ALIGN
/*
* Align data structure for not i386 or x86_64
*/
if ((size_t)msg % 4 != 0) {
aligned_msg = alloca(msg_len);
memcpy(aligned_msg, msg, msg_len);
} else {
aligned_msg = msg;
}
#else
aligned_msg = msg;
#endif
group_len = (unsigned short *)aligned_msg;
group_len[0] = swab16(group_len[0]);
for (i = 1; i < group_len[0] + 1; i++) {
group_len[i] = swab16(group_len[i]);
}
if (aligned_msg != msg) {
memcpy(msg, aligned_msg, msg_len);
}
}
static inline int group_matches (
struct iovec *iovec,
unsigned int iov_len,
struct totempg_group *groups_b,
unsigned int group_b_cnt,
unsigned int *adjust_iovec)
{
unsigned short *group_len;
char *group_name;
int i;
int j;
#ifdef TOTEMPG_NEED_ALIGN
struct iovec iovec_aligned = { NULL, 0 };
#endif
assert (iov_len == 1);
#ifdef TOTEMPG_NEED_ALIGN
/*
* Align data structure for not i386 or x86_64
*/
if ((size_t)iovec->iov_base % 4 != 0) {
iovec_aligned.iov_base = alloca(iovec->iov_len);
memcpy(iovec_aligned.iov_base, iovec->iov_base, iovec->iov_len);
iovec_aligned.iov_len = iovec->iov_len;
iovec = &iovec_aligned;
}
#endif
group_len = (unsigned short *)iovec->iov_base;
group_name = ((char *)iovec->iov_base) +
sizeof (unsigned short) * (group_len[0] + 1);
/*
* Calculate amount to adjust the iovec by before delivering to app
*/
*adjust_iovec = sizeof (unsigned short) * (group_len[0] + 1);
for (i = 1; i < group_len[0] + 1; i++) {
*adjust_iovec += group_len[i];
}
/*
* Determine if this message should be delivered to this instance
*/
for (i = 1; i < group_len[0] + 1; i++) {
for (j = 0; j < group_b_cnt; j++) {
if ((group_len[i] == groups_b[j].group_len) &&
(memcmp (groups_b[j].group, group_name, group_len[i]) == 0)) {
return (1);
}
}
group_name += group_len[i];
}
return (0);
}
static inline void app_deliver_fn (
unsigned int nodeid,
void *msg,
unsigned int msg_len,
int endian_conversion_required)
{
struct totempg_group_instance *instance;
struct iovec stripped_iovec;
unsigned int adjust_iovec;
struct iovec *iovec;
struct list_head *list;
struct iovec aligned_iovec = { NULL, 0 };
if (endian_conversion_required) {
group_endian_convert (msg, msg_len);
}
/*
* TODO: segmentation/assembly need to be redesigned to provide aligned access
* in all cases to avoid memory copies on non386 archs. Probably broke backwars
* compatibility
*/
#ifdef TOTEMPG_NEED_ALIGN
/*
* Align data structure for not i386 or x86_64
*/
aligned_iovec.iov_base = alloca(msg_len);
aligned_iovec.iov_len = msg_len;
memcpy(aligned_iovec.iov_base, msg, msg_len);
#else
aligned_iovec.iov_base = msg;
aligned_iovec.iov_len = msg_len;
#endif
iovec = &aligned_iovec;
for (list = totempg_groups_list.next;
list != &totempg_groups_list;
list = list->next) {
instance = list_entry (list, struct totempg_group_instance, list);
if (group_matches (iovec, 1, instance->groups, instance->groups_cnt, &adjust_iovec)) {
stripped_iovec.iov_len = iovec->iov_len - adjust_iovec;
stripped_iovec.iov_base = (char *)iovec->iov_base + adjust_iovec;
#ifdef TOTEMPG_NEED_ALIGN
/*
* Align data structure for not i386 or x86_64
*/
if ((char *)iovec->iov_base + adjust_iovec % 4 != 0) {
/*
* Deal with misalignment
*/
stripped_iovec.iov_base =
alloca (stripped_iovec.iov_len);
memcpy (stripped_iovec.iov_base,
(char *)iovec->iov_base + adjust_iovec,
stripped_iovec.iov_len);
}
#endif
instance->deliver_fn (
nodeid,
stripped_iovec.iov_base,
stripped_iovec.iov_len,
endian_conversion_required);
}
}
}
static void totempg_confchg_fn (
enum totem_configuration_type configuration_type,
const unsigned int *member_list, size_t member_list_entries,
const unsigned int *left_list, size_t left_list_entries,
const unsigned int *joined_list, size_t joined_list_entries,
const struct memb_ring_id *ring_id)
{
// TODO optimize this
app_confchg_fn (configuration_type,
member_list, member_list_entries,
left_list, left_list_entries,
joined_list, joined_list_entries,
ring_id);
}
static void totempg_deliver_fn (
unsigned int nodeid,
const void *msg,
unsigned int msg_len,
int endian_conversion_required)
{
struct totempg_mcast *mcast;
unsigned short *msg_lens;
int i;
struct assembly *assembly;
char header[FRAME_SIZE_MAX];
int msg_count;
int continuation;
int start;
const char *data;
int datasize;
struct iovec iov_delv;
assembly = assembly_ref (nodeid);
assert (assembly);
/*
* Assemble the header into one block of data and
* assemble the packet contents into one block of data to simplify delivery
*/
mcast = (struct totempg_mcast *)msg;
if (endian_conversion_required) {
mcast->msg_count = swab16 (mcast->msg_count);
}
msg_count = mcast->msg_count;
datasize = sizeof (struct totempg_mcast) +
msg_count * sizeof (unsigned short);
memcpy (header, msg, datasize);
data = msg;
msg_lens = (unsigned short *) (header + sizeof (struct totempg_mcast));
if (endian_conversion_required) {
for (i = 0; i < mcast->msg_count; i++) {
msg_lens[i] = swab16 (msg_lens[i]);
}
}
memcpy (&assembly->data[assembly->index], &data[datasize],
msg_len - datasize);
/*
* If the last message in the buffer is a fragment, then we
* can't deliver it. We'll first deliver the full messages
* then adjust the assembly buffer so we can add the rest of the
* fragment when it arrives.
*/
msg_count = mcast->fragmented ? mcast->msg_count - 1 : mcast->msg_count;
continuation = mcast->continuation;
iov_delv.iov_base = (void *)&assembly->data[0];
iov_delv.iov_len = assembly->index + msg_lens[0];
/*
* Make sure that if this message is a continuation, that it
* matches the sequence number of the previous fragment.
* Also, if the first packed message is a continuation
* of a previous message, but the assembly buffer
* is empty, then we need to discard it since we can't
* assemble a complete message. Likewise, if this message isn't a
* continuation and the assembly buffer is empty, we have to discard
* the continued message.
*/
start = 0;
if (assembly->throw_away_mode == THROW_AWAY_ACTIVE) {
/* Throw away the first msg block */
if (mcast->fragmented == 0 || mcast->fragmented == 1) {
assembly->throw_away_mode = THROW_AWAY_INACTIVE;
assembly->index += msg_lens[0];
iov_delv.iov_base = (void *)&assembly->data[assembly->index];
iov_delv.iov_len = msg_lens[1];
start = 1;
}
} else
if (assembly->throw_away_mode == THROW_AWAY_INACTIVE) {
if (continuation == assembly->last_frag_num) {
assembly->last_frag_num = mcast->fragmented;
for (i = start; i < msg_count; i++) {
app_deliver_fn(nodeid, iov_delv.iov_base, iov_delv.iov_len,
endian_conversion_required);
assembly->index += msg_lens[i];
iov_delv.iov_base = (void *)&assembly->data[assembly->index];
if (i < (msg_count - 1)) {
iov_delv.iov_len = msg_lens[i + 1];
}
}
} else {
log_printf (LOG_DEBUG, "fragmented continuation %u is not equal to assembly last_frag_num %u",
continuation, assembly->last_frag_num);
assembly->throw_away_mode = THROW_AWAY_ACTIVE;
}
}
if (mcast->fragmented == 0) {
/*
* End of messages, dereference assembly struct
*/
assembly->last_frag_num = 0;
assembly->index = 0;
assembly_deref (assembly);
} else {
/*
* Message is fragmented, keep around assembly list
*/
if (mcast->msg_count > 1) {
memmove (&assembly->data[0],
&assembly->data[assembly->index],
msg_lens[msg_count]);
assembly->index = 0;
}
assembly->index += msg_lens[msg_count];
}
}
/*
* Totem Process Group Abstraction
* depends on poll abstraction, POSIX, IPV4
*/
void *callback_token_received_handle;
int callback_token_received_fn (enum totem_callback_token_type type,
const void *data)
{
struct totempg_mcast mcast;
struct iovec iovecs[3];
if (totempg_threaded_mode == 1) {
pthread_mutex_lock (&mcast_msg_mutex);
}
if (mcast_packed_msg_count == 0) {
if (totempg_threaded_mode == 1) {
pthread_mutex_unlock (&mcast_msg_mutex);
}
return (0);
}
- if (totemmrp_avail() == 0) {
+ if (totemsrp_avail(totemsrp_context) == 0) {
if (totempg_threaded_mode == 1) {
pthread_mutex_unlock (&mcast_msg_mutex);
}
return (0);
}
mcast.header.version = 0;
mcast.header.type = 0;
mcast.fragmented = 0;
/*
* Was the first message in this buffer a continuation of a
* fragmented message?
*/
mcast.continuation = fragment_continuation;
fragment_continuation = 0;
mcast.msg_count = mcast_packed_msg_count;
iovecs[0].iov_base = (void *)&mcast;
iovecs[0].iov_len = sizeof (struct totempg_mcast);
iovecs[1].iov_base = (void *)mcast_packed_msg_lens;
iovecs[1].iov_len = mcast_packed_msg_count * sizeof (unsigned short);
iovecs[2].iov_base = (void *)&fragmentation_data[0];
iovecs[2].iov_len = fragment_size;
- (void)totemmrp_mcast (iovecs, 3, 0);
+ (void)totemsrp_mcast (totemsrp_context, iovecs, 3, 0);
mcast_packed_msg_count = 0;
fragment_size = 0;
if (totempg_threaded_mode == 1) {
pthread_mutex_unlock (&mcast_msg_mutex);
}
return (0);
}
/*
* Initialize the totem process group abstraction
*/
int totempg_initialize (
qb_loop_t *poll_handle,
struct totem_config *totem_config)
{
int res;
totempg_totem_config = totem_config;
totempg_log_level_security = totem_config->totem_logging_configuration.log_level_security;
totempg_log_level_error = totem_config->totem_logging_configuration.log_level_error;
totempg_log_level_warning = totem_config->totem_logging_configuration.log_level_warning;
totempg_log_level_notice = totem_config->totem_logging_configuration.log_level_notice;
totempg_log_level_debug = totem_config->totem_logging_configuration.log_level_debug;
totempg_log_printf = totem_config->totem_logging_configuration.log_printf;
totempg_subsys_id = totem_config->totem_logging_configuration.log_subsys_id;
fragmentation_data = malloc (TOTEMPG_PACKET_SIZE);
if (fragmentation_data == 0) {
return (-1);
}
totemsrp_net_mtu_adjust (totem_config);
- res = totemmrp_initialize (
+ res = totemsrp_initialize (
poll_handle,
+ &totemsrp_context,
totem_config,
&totempg_stats,
totempg_deliver_fn,
totempg_confchg_fn,
totempg_waiting_trans_ack_cb);
- totemmrp_callback_token_create (
+ totemsrp_callback_token_create (
+ totemsrp_context,
&callback_token_received_handle,
TOTEM_CALLBACK_TOKEN_RECEIVED,
0,
callback_token_received_fn,
0);
- totempg_size_limit = (totemmrp_avail() - 1) *
+ totempg_size_limit = (totemsrp_avail(totemsrp_context) - 1) *
(totempg_totem_config->net_mtu -
sizeof (struct totempg_mcast) - 16);
list_init (&totempg_groups_list);
return (res);
}
void totempg_finalize (void)
{
if (totempg_threaded_mode == 1) {
pthread_mutex_lock (&totempg_mutex);
}
- totemmrp_finalize ();
+ totemsrp_finalize (totemsrp_context);
if (totempg_threaded_mode == 1) {
pthread_mutex_unlock (&totempg_mutex);
}
}
/*
* Multicast a message
*/
static int mcast_msg (
struct iovec *iovec_in,
unsigned int iov_len,
int guarantee)
{
int res = 0;
struct totempg_mcast mcast;
struct iovec iovecs[3];
struct iovec iovec[64];
int i;
int dest, src;
int max_packet_size = 0;
int copy_len = 0;
int copy_base = 0;
int total_size = 0;
if (totempg_threaded_mode == 1) {
pthread_mutex_lock (&mcast_msg_mutex);
}
- totemmrp_event_signal (TOTEM_EVENT_NEW_MSG, 1);
+ totemsrp_event_signal (totemsrp_context, TOTEM_EVENT_NEW_MSG, 1);
/*
* Remove zero length iovectors from the list
*/
assert (iov_len < 64);
for (dest = 0, src = 0; src < iov_len; src++) {
if (iovec_in[src].iov_len) {
memcpy (&iovec[dest++], &iovec_in[src],
sizeof (struct iovec));
}
}
iov_len = dest;
max_packet_size = TOTEMPG_PACKET_SIZE -
(sizeof (unsigned short) * (mcast_packed_msg_count + 1));
mcast_packed_msg_lens[mcast_packed_msg_count] = 0;
/*
* Check if we would overwrite new message queue
*/
for (i = 0; i < iov_len; i++) {
total_size += iovec[i].iov_len;
}
if (byte_count_send_ok (total_size + sizeof(unsigned short) *
(mcast_packed_msg_count)) == 0) {
if (totempg_threaded_mode == 1) {
pthread_mutex_unlock (&mcast_msg_mutex);
}
return(-1);
}
mcast.header.version = 0;
for (i = 0; i < iov_len; ) {
mcast.fragmented = 0;
mcast.continuation = fragment_continuation;
copy_len = iovec[i].iov_len - copy_base;
/*
* If it all fits with room left over, copy it in.
* We need to leave at least sizeof(short) + 1 bytes in the
* fragment_buffer on exit so that max_packet_size + fragment_size
* doesn't exceed the size of the fragment_buffer on the next call.
*/
if ((copy_len + fragment_size) <
(max_packet_size - sizeof (unsigned short))) {
memcpy (&fragmentation_data[fragment_size],
(char *)iovec[i].iov_base + copy_base, copy_len);
fragment_size += copy_len;
mcast_packed_msg_lens[mcast_packed_msg_count] += copy_len;
next_fragment = 1;
copy_len = 0;
copy_base = 0;
i++;
continue;
/*
* If it just fits or is too big, then send out what fits.
*/
} else {
unsigned char *data_ptr;
copy_len = min(copy_len, max_packet_size - fragment_size);
if( copy_len == max_packet_size )
data_ptr = (unsigned char *)iovec[i].iov_base + copy_base;
else {
data_ptr = fragmentation_data;
memcpy (&fragmentation_data[fragment_size],
(unsigned char *)iovec[i].iov_base + copy_base, copy_len);
}
memcpy (&fragmentation_data[fragment_size],
(unsigned char *)iovec[i].iov_base + copy_base, copy_len);
mcast_packed_msg_lens[mcast_packed_msg_count] += copy_len;
/*
* if we're not on the last iovec or the iovec is too large to
* fit, then indicate a fragment. This also means that the next
* message will have the continuation of this one.
*/
if ((i < (iov_len - 1)) ||
((copy_base + copy_len) < iovec[i].iov_len)) {
if (!next_fragment) {
next_fragment++;
}
fragment_continuation = next_fragment;
mcast.fragmented = next_fragment++;
assert(fragment_continuation != 0);
assert(mcast.fragmented != 0);
} else {
fragment_continuation = 0;
}
/*
* assemble the message and send it
*/
mcast.msg_count = ++mcast_packed_msg_count;
iovecs[0].iov_base = (void *)&mcast;
iovecs[0].iov_len = sizeof(struct totempg_mcast);
iovecs[1].iov_base = (void *)mcast_packed_msg_lens;
iovecs[1].iov_len = mcast_packed_msg_count *
sizeof(unsigned short);
iovecs[2].iov_base = (void *)data_ptr;
iovecs[2].iov_len = max_packet_size;
- assert (totemmrp_avail() > 0);
- res = totemmrp_mcast (iovecs, 3, guarantee);
+ assert (totemsrp_avail(totemsrp_context) > 0);
+ res = totemsrp_mcast (totemsrp_context, iovecs, 3, guarantee);
if (res == -1) {
goto error_exit;
}
/*
* Recalculate counts and indexes for the next.
*/
mcast_packed_msg_lens[0] = 0;
mcast_packed_msg_count = 0;
fragment_size = 0;
max_packet_size = TOTEMPG_PACKET_SIZE - (sizeof(unsigned short));
/*
* If the iovec all fit, go to the next iovec
*/
if ((copy_base + copy_len) == iovec[i].iov_len) {
copy_len = 0;
copy_base = 0;
i++;
/*
* Continue with the rest of the current iovec.
*/
} else {
copy_base += copy_len;
}
}
}
/*
* Bump only if we added message data. This may be zero if
* the last buffer just fit into the fragmentation_data buffer
* and we were at the last iovec.
*/
if (mcast_packed_msg_lens[mcast_packed_msg_count]) {
mcast_packed_msg_count++;
}
error_exit:
if (totempg_threaded_mode == 1) {
pthread_mutex_unlock (&mcast_msg_mutex);
}
return (res);
}
/*
* Determine if a message of msg_size could be queued
*/
static int msg_count_send_ok (
int msg_count)
{
int avail = 0;
- avail = totemmrp_avail ();
+ avail = totemsrp_avail (totemsrp_context);
totempg_stats.msg_queue_avail = avail;
return ((avail - totempg_reserved) > msg_count);
}
static int byte_count_send_ok (
int byte_count)
{
unsigned int msg_count = 0;
int avail = 0;
- avail = totemmrp_avail ();
+ avail = totemsrp_avail (totemsrp_context);
msg_count = (byte_count / (totempg_totem_config->net_mtu - sizeof (struct totempg_mcast) - 16)) + 1;
return (avail >= msg_count);
}
static int send_reserve (
int msg_size)
{
unsigned int msg_count = 0;
msg_count = (msg_size / (totempg_totem_config->net_mtu - sizeof (struct totempg_mcast) - 16)) + 1;
totempg_reserved += msg_count;
totempg_stats.msg_reserved = totempg_reserved;
return (msg_count);
}
static void send_release (
int msg_count)
{
totempg_reserved -= msg_count;
totempg_stats.msg_reserved = totempg_reserved;
}
#ifndef HAVE_SMALL_MEMORY_FOOTPRINT
#undef MESSAGE_QUEUE_MAX
#define MESSAGE_QUEUE_MAX ((4 * MESSAGE_SIZE_MAX) / totempg_totem_config->net_mtu)
#endif /* HAVE_SMALL_MEMORY_FOOTPRINT */
static uint32_t q_level_precent_used(void)
{
- return (100 - (((totemmrp_avail() - totempg_reserved) * 100) / MESSAGE_QUEUE_MAX));
+ return (100 - (((totemsrp_avail(totemsrp_context) - totempg_reserved) * 100) / MESSAGE_QUEUE_MAX));
}
int totempg_callback_token_create (
void **handle_out,
enum totem_callback_token_type type,
int delete,
int (*callback_fn) (enum totem_callback_token_type type, const void *),
const void *data)
{
unsigned int res;
if (totempg_threaded_mode == 1) {
pthread_mutex_lock (&callback_token_mutex);
}
- res = totemmrp_callback_token_create (handle_out, type, delete,
+ res = totemsrp_callback_token_create (totemsrp_context, handle_out, type, delete,
callback_fn, data);
if (totempg_threaded_mode == 1) {
pthread_mutex_unlock (&callback_token_mutex);
}
return (res);
}
void totempg_callback_token_destroy (
void *handle_out)
{
if (totempg_threaded_mode == 1) {
pthread_mutex_lock (&callback_token_mutex);
}
- totemmrp_callback_token_destroy (handle_out);
+ totemsrp_callback_token_destroy (totemsrp_context, handle_out);
if (totempg_threaded_mode == 1) {
pthread_mutex_unlock (&callback_token_mutex);
}
}
/*
* vi: set autoindent tabstop=4 shiftwidth=4 :
*/
int totempg_groups_initialize (
void **totempg_groups_instance,
void (*deliver_fn) (
unsigned int nodeid,
const void *msg,
unsigned int msg_len,
int endian_conversion_required),
void (*confchg_fn) (
enum totem_configuration_type configuration_type,
const unsigned int *member_list, size_t member_list_entries,
const unsigned int *left_list, size_t left_list_entries,
const unsigned int *joined_list, size_t joined_list_entries,
const struct memb_ring_id *ring_id))
{
struct totempg_group_instance *instance;
if (totempg_threaded_mode == 1) {
pthread_mutex_lock (&totempg_mutex);
}
instance = malloc (sizeof (struct totempg_group_instance));
if (instance == NULL) {
goto error_exit;
}
instance->deliver_fn = deliver_fn;
instance->confchg_fn = confchg_fn;
instance->groups = 0;
instance->groups_cnt = 0;
instance->q_level = QB_LOOP_MED;
list_init (&instance->list);
list_add (&instance->list, &totempg_groups_list);
if (totempg_threaded_mode == 1) {
pthread_mutex_unlock (&totempg_mutex);
}
*totempg_groups_instance = instance;
return (0);
error_exit:
if (totempg_threaded_mode == 1) {
pthread_mutex_unlock (&totempg_mutex);
}
return (-1);
}
int totempg_groups_join (
void *totempg_groups_instance,
const struct totempg_group *groups,
size_t group_cnt)
{
struct totempg_group_instance *instance = (struct totempg_group_instance *)totempg_groups_instance;
struct totempg_group *new_groups;
unsigned int res = 0;
if (totempg_threaded_mode == 1) {
pthread_mutex_lock (&totempg_mutex);
}
new_groups = realloc (instance->groups,
sizeof (struct totempg_group) *
(instance->groups_cnt + group_cnt));
if (new_groups == 0) {
res = ENOMEM;
goto error_exit;
}
memcpy (&new_groups[instance->groups_cnt],
groups, group_cnt * sizeof (struct totempg_group));
instance->groups = new_groups;
instance->groups_cnt += group_cnt;
error_exit:
if (totempg_threaded_mode == 1) {
pthread_mutex_unlock (&totempg_mutex);
}
return (res);
}
int totempg_groups_leave (
void *totempg_groups_instance,
const struct totempg_group *groups,
size_t group_cnt)
{
if (totempg_threaded_mode == 1) {
pthread_mutex_lock (&totempg_mutex);
}
if (totempg_threaded_mode == 1) {
pthread_mutex_unlock (&totempg_mutex);
}
return (0);
}
#define MAX_IOVECS_FROM_APP 32
#define MAX_GROUPS_PER_MSG 32
int totempg_groups_mcast_joined (
void *totempg_groups_instance,
const struct iovec *iovec,
unsigned int iov_len,
int guarantee)
{
struct totempg_group_instance *instance = (struct totempg_group_instance *)totempg_groups_instance;
unsigned short group_len[MAX_GROUPS_PER_MSG + 1];
struct iovec iovec_mcast[MAX_GROUPS_PER_MSG + 1 + MAX_IOVECS_FROM_APP];
int i;
unsigned int res;
if (totempg_threaded_mode == 1) {
pthread_mutex_lock (&totempg_mutex);
}
/*
* Build group_len structure and the iovec_mcast structure
*/
group_len[0] = instance->groups_cnt;
for (i = 0; i < instance->groups_cnt; i++) {
group_len[i + 1] = instance->groups[i].group_len;
iovec_mcast[i + 1].iov_len = instance->groups[i].group_len;
iovec_mcast[i + 1].iov_base = (void *) instance->groups[i].group;
}
iovec_mcast[0].iov_len = (instance->groups_cnt + 1) * sizeof (unsigned short);
iovec_mcast[0].iov_base = group_len;
for (i = 0; i < iov_len; i++) {
iovec_mcast[i + instance->groups_cnt + 1].iov_len = iovec[i].iov_len;
iovec_mcast[i + instance->groups_cnt + 1].iov_base = iovec[i].iov_base;
}
res = mcast_msg (iovec_mcast, iov_len + instance->groups_cnt + 1, guarantee);
if (totempg_threaded_mode == 1) {
pthread_mutex_unlock (&totempg_mutex);
}
return (res);
}
static void check_q_level(
void *totempg_groups_instance)
{
struct totempg_group_instance *instance = (struct totempg_group_instance *)totempg_groups_instance;
int32_t old_level = instance->q_level;
int32_t percent_used = q_level_precent_used();
if (percent_used >= 75 && instance->q_level != TOTEM_Q_LEVEL_CRITICAL) {
instance->q_level = TOTEM_Q_LEVEL_CRITICAL;
} else if (percent_used < 30 && instance->q_level != TOTEM_Q_LEVEL_LOW) {
instance->q_level = TOTEM_Q_LEVEL_LOW;
} else if (percent_used > 40 && percent_used < 50 && instance->q_level != TOTEM_Q_LEVEL_GOOD) {
instance->q_level = TOTEM_Q_LEVEL_GOOD;
} else if (percent_used > 60 && percent_used < 70 && instance->q_level != TOTEM_Q_LEVEL_HIGH) {
instance->q_level = TOTEM_Q_LEVEL_HIGH;
}
if (totem_queue_level_changed && old_level != instance->q_level) {
totem_queue_level_changed(instance->q_level);
}
}
void totempg_check_q_level(
void *totempg_groups_instance)
{
struct totempg_group_instance *instance = (struct totempg_group_instance *)totempg_groups_instance;
check_q_level(instance);
}
int totempg_groups_joined_reserve (
void *totempg_groups_instance,
const struct iovec *iovec,
unsigned int iov_len)
{
struct totempg_group_instance *instance = (struct totempg_group_instance *)totempg_groups_instance;
unsigned int size = 0;
unsigned int i;
unsigned int reserved = 0;
if (totempg_threaded_mode == 1) {
pthread_mutex_lock (&totempg_mutex);
pthread_mutex_lock (&mcast_msg_mutex);
}
for (i = 0; i < instance->groups_cnt; i++) {
size += instance->groups[i].group_len;
}
for (i = 0; i < iov_len; i++) {
size += iovec[i].iov_len;
}
if (size >= totempg_size_limit) {
reserved = -1;
goto error_exit;
}
if (byte_count_send_ok (size)) {
reserved = send_reserve (size);
} else {
reserved = 0;
}
error_exit:
check_q_level(instance);
if (totempg_threaded_mode == 1) {
pthread_mutex_unlock (&mcast_msg_mutex);
pthread_mutex_unlock (&totempg_mutex);
}
return (reserved);
}
int totempg_groups_joined_release (int msg_count)
{
if (totempg_threaded_mode == 1) {
pthread_mutex_lock (&totempg_mutex);
pthread_mutex_lock (&mcast_msg_mutex);
}
send_release (msg_count);
if (totempg_threaded_mode == 1) {
pthread_mutex_unlock (&mcast_msg_mutex);
pthread_mutex_unlock (&totempg_mutex);
}
return 0;
}
int totempg_groups_mcast_groups (
void *totempg_groups_instance,
int guarantee,
const struct totempg_group *groups,
size_t groups_cnt,
const struct iovec *iovec,
unsigned int iov_len)
{
unsigned short group_len[MAX_GROUPS_PER_MSG + 1];
struct iovec iovec_mcast[MAX_GROUPS_PER_MSG + 1 + MAX_IOVECS_FROM_APP];
int i;
unsigned int res;
if (totempg_threaded_mode == 1) {
pthread_mutex_lock (&totempg_mutex);
}
/*
* Build group_len structure and the iovec_mcast structure
*/
group_len[0] = groups_cnt;
for (i = 0; i < groups_cnt; i++) {
group_len[i + 1] = groups[i].group_len;
iovec_mcast[i + 1].iov_len = groups[i].group_len;
iovec_mcast[i + 1].iov_base = (void *) groups[i].group;
}
iovec_mcast[0].iov_len = (groups_cnt + 1) * sizeof (unsigned short);
iovec_mcast[0].iov_base = group_len;
for (i = 0; i < iov_len; i++) {
iovec_mcast[i + groups_cnt + 1].iov_len = iovec[i].iov_len;
iovec_mcast[i + groups_cnt + 1].iov_base = iovec[i].iov_base;
}
res = mcast_msg (iovec_mcast, iov_len + groups_cnt + 1, guarantee);
if (totempg_threaded_mode == 1) {
pthread_mutex_unlock (&totempg_mutex);
}
return (res);
}
/*
* Returns -1 if error, 0 if can't send, 1 if can send the message
*/
int totempg_groups_send_ok_groups (
void *totempg_groups_instance,
const struct totempg_group *groups,
size_t groups_cnt,
const struct iovec *iovec,
unsigned int iov_len)
{
unsigned int size = 0;
unsigned int i;
unsigned int res;
if (totempg_threaded_mode == 1) {
pthread_mutex_lock (&totempg_mutex);
}
for (i = 0; i < groups_cnt; i++) {
size += groups[i].group_len;
}
for (i = 0; i < iov_len; i++) {
size += iovec[i].iov_len;
}
res = msg_count_send_ok (size);
if (totempg_threaded_mode == 1) {
pthread_mutex_unlock (&totempg_mutex);
}
return (res);
}
int totempg_ifaces_get (
unsigned int nodeid,
struct totem_ip_address *interfaces,
unsigned int interfaces_size,
char ***status,
unsigned int *iface_count)
{
int res;
- res = totemmrp_ifaces_get (
+ res = totemsrp_ifaces_get (
+ totemsrp_context,
nodeid,
interfaces,
interfaces_size,
status,
iface_count);
return (res);
}
void totempg_event_signal (enum totem_event_type type, int value)
{
- totemmrp_event_signal (type, value);
+ totemsrp_event_signal (totemsrp_context, type, value);
}
void* totempg_get_stats (void)
{
return &totempg_stats;
}
int totempg_crypto_set (
const char *cipher_type,
const char *hash_type)
{
int res;
- res = totemmrp_crypto_set (cipher_type, hash_type);
+ res = totemsrp_crypto_set (totemsrp_context, cipher_type, hash_type);
return (res);
}
int totempg_ring_reenable (void)
{
int res;
- res = totemmrp_ring_reenable ();
+ res = totemsrp_ring_reenable (totemsrp_context);
return (res);
}
#define ONE_IFACE_LEN 63
const char *totempg_ifaces_print (unsigned int nodeid)
{
static char iface_string[256 * INTERFACE_MAX];
char one_iface[ONE_IFACE_LEN+1];
struct totem_ip_address interfaces[INTERFACE_MAX];
- char **status;
unsigned int iface_count;
unsigned int i;
int res;
iface_string[0] = '\0';
- res = totempg_ifaces_get (nodeid, interfaces, INTERFACE_MAX, &status, &iface_count);
+ res = totempg_ifaces_get (nodeid, interfaces, INTERFACE_MAX, NULL, &iface_count);
if (res == -1) {
return ("no interface found for nodeid");
}
- res = totempg_ifaces_get (nodeid, interfaces, INTERFACE_MAX, &status, &iface_count);
+ res = totempg_ifaces_get (nodeid, interfaces, INTERFACE_MAX, NULL, &iface_count);
for (i = 0; i < iface_count; i++) {
snprintf (one_iface, ONE_IFACE_LEN,
"r(%d) ip(%s) ",
i, totemip_print (&interfaces[i]));
strcat (iface_string, one_iface);
}
return (iface_string);
}
unsigned int totempg_my_nodeid_get (void)
{
- return (totemmrp_my_nodeid_get());
+ return (totemsrp_my_nodeid_get(totemsrp_context));
}
int totempg_my_family_get (void)
{
- return (totemmrp_my_family_get());
+ return (totemsrp_my_family_get(totemsrp_context));
}
extern void totempg_service_ready_register (
void (*totem_service_ready) (void))
{
- totemmrp_service_ready_register (totem_service_ready);
+ totemsrp_service_ready_register (totemsrp_context, totem_service_ready);
}
void totempg_queue_level_register_callback (totem_queue_level_changed_fn fn)
{
totem_queue_level_changed = fn;
}
extern int totempg_member_add (
const struct totem_ip_address *member,
int ring_no)
{
- return totemmrp_member_add (member, ring_no);
+ return totemsrp_member_add (totemsrp_context, member, ring_no);
}
extern int totempg_member_remove (
const struct totem_ip_address *member,
int ring_no)
{
- return totemmrp_member_remove (member, ring_no);
+ return totemsrp_member_remove (totemsrp_context, member, ring_no);
}
void totempg_threaded_mode_enable (void)
{
totempg_threaded_mode = 1;
- totemmrp_threaded_mode_enable ();
+ totemsrp_threaded_mode_enable (totemsrp_context);
}
void totempg_trans_ack (void)
{
- totemmrp_trans_ack ();
+ totemsrp_trans_ack (totemsrp_context);
}
diff --git a/exec/totemrrp.c b/exec/totemrrp.c
deleted file mode 100644
index 0d9a58d5..00000000
--- a/exec/totemrrp.c
+++ /dev/null
@@ -1,2335 +0,0 @@
-/*
- * Copyright (c) 2005 MontaVista Software, Inc.
- * Copyright (c) 2006-2012 Red Hat, Inc.
- *
- * All rights reserved.
- *
- * Author: Steven Dake (sdake@redhat.com)
- *
- * This software licensed under BSD license, the text of which follows:
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * - Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- * - Neither the name of the MontaVista Software, Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from this
- * software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
- * THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <config.h>
-
-#include <assert.h>
-#include <pthread.h>
-#include <sys/mman.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/socket.h>
-#include <netdb.h>
-#include <sys/un.h>
-#include <sys/ioctl.h>
-#include <sys/param.h>
-#include <netinet/in.h>
-#include <arpa/inet.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <errno.h>
-#include <sched.h>
-#include <time.h>
-#include <sys/time.h>
-#include <sys/poll.h>
-#include <limits.h>
-
-#include <corosync/sq.h>
-#include <corosync/list.h>
-#include <corosync/swab.h>
-#include <qb/qbdefs.h>
-#include <qb/qbloop.h>
-#define LOGSYS_UTILS_ONLY 1
-#include <corosync/logsys.h>
-
-#include "totemnet.h"
-#include "totemrrp.h"
-
-void rrp_deliver_fn (
- void *context,
- const void *msg,
- unsigned int msg_len);
-
-void rrp_iface_change_fn (
- void *context,
- const struct totem_ip_address *iface_addr);
-
-struct totemrrp_instance;
-struct passive_instance {
- struct totemrrp_instance *rrp_instance;
- unsigned int *faulty;
- unsigned int *token_recv_count;
- unsigned int *mcast_recv_count;
- unsigned char token[15000];
- unsigned int token_len;
- qb_loop_timer_handle timer_expired_token;
- qb_loop_timer_handle timer_problem_decrementer;
- void *totemrrp_context;
- unsigned int token_xmit_iface;
- unsigned int msg_xmit_iface;
-};
-
-struct active_instance {
- struct totemrrp_instance *rrp_instance;
- unsigned int *faulty;
- unsigned int *last_token_recv;
- unsigned int *counter_problems;
- unsigned char token[15000];
- unsigned int token_len;
- unsigned int last_token_seq;
- qb_loop_timer_handle timer_expired_token;
- qb_loop_timer_handle timer_problem_decrementer;
- void *totemrrp_context;
-};
-
-struct rrp_algo {
- const char *name;
-
- void * (*initialize) (
- struct totemrrp_instance *rrp_instance,
- int interface_count);
-
- void (*mcast_recv) (
- struct totemrrp_instance *instance,
- unsigned int iface_no,
- void *context,
- const void *msg,
- unsigned int msg_len);
-
- void (*mcast_noflush_send) (
- struct totemrrp_instance *instance,
- const void *msg,
- unsigned int msg_len);
-
- void (*mcast_flush_send) (
- struct totemrrp_instance *instance,
- const void *msg,
- unsigned int msg_len);
-
- void (*token_recv) (
- struct totemrrp_instance *instance,
- unsigned int iface_no,
- void *context,
- const void *msg,
- unsigned int msg_len,
- unsigned int token_seqid);
-
- void (*token_send) (
- struct totemrrp_instance *instance,
- const void *msg,
- unsigned int msg_len);
-
- void (*recv_flush) (
- struct totemrrp_instance *instance);
-
- void (*send_flush) (
- struct totemrrp_instance *instance);
-
- void (*iface_check) (
- struct totemrrp_instance *instance);
-
- void (*processor_count_set) (
- struct totemrrp_instance *instance,
- unsigned int processor_count);
-
- void (*token_target_set) (
- struct totemrrp_instance *instance,
- struct totem_ip_address *token_target,
- unsigned int iface_no);
-
- void (*ring_reenable) (
- struct totemrrp_instance *instance,
- unsigned int iface_no);
-
- int (*mcast_recv_empty) (
- struct totemrrp_instance *instance);
-
- int (*member_add) (
- struct totemrrp_instance *instance,
- const struct totem_ip_address *member,
- unsigned int iface_no);
-
- int (*member_remove) (
- struct totemrrp_instance *instance,
- const struct totem_ip_address *member,
- unsigned int iface_no);
-
- void (*membership_changed) (
- struct totemrrp_instance *instance,
- enum totem_configuration_type configuration_type,
- const struct srp_addr *member_list, size_t member_list_entries,
- const struct srp_addr *left_list, size_t left_list_entries,
- const struct srp_addr *joined_list, size_t joined_list_entries,
- const struct memb_ring_id *ring_id);
-};
-
-#define STATUS_STR_LEN 512
-struct totemrrp_instance {
- qb_loop_t *poll_handle;
-
- struct totem_interface *interfaces;
-
- struct rrp_algo *rrp_algo;
-
- void *context;
-
- char *status[INTERFACE_MAX];
-
- void (*totemrrp_deliver_fn) (
- void *context,
- const void *msg,
- unsigned int msg_len);
-
- void (*totemrrp_iface_change_fn) (
- void *context,
- const struct totem_ip_address *iface_addr,
- unsigned int iface_no);
-
- void (*totemrrp_token_seqid_get) (
- const void *msg,
- unsigned int *seqid,
- unsigned int *token_is);
-
- void (*totemrrp_target_set_completed) (
- void *context);
-
- unsigned int (*totemrrp_msgs_missing) (void);
-
- /*
- * Function and data used to log messages
- */
- int totemrrp_log_level_security;
-
- int totemrrp_log_level_error;
-
- int totemrrp_log_level_warning;
-
- int totemrrp_log_level_notice;
-
- int totemrrp_log_level_debug;
-
- int totemrrp_subsys_id;
-
- void (*totemrrp_log_printf) (
- int level,
- int subsys,
- const char *function,
- const char *file,
- int line,
- const char *format, ...)__attribute__((format(printf, 6, 7)));
-
- void **net_handles;
-
- void *rrp_algo_instance;
-
- int interface_count;
-
- int processor_count;
-
- int my_nodeid;
-
- struct totem_config *totem_config;
-
- void *deliver_fn_context[INTERFACE_MAX];
-
- qb_loop_timer_handle timer_active_test_ring_timeout[INTERFACE_MAX];
-
- totemrrp_stats_t stats;
-};
-
-static void stats_set_interface_faulty(struct totemrrp_instance *rrp_instance,
- unsigned int iface_no, int is_faulty);
-
-/*
- * None Replication Forward Declerations
- */
-static void none_mcast_recv (
- struct totemrrp_instance *instance,
- unsigned int iface_no,
- void *context,
- const void *msg,
- unsigned int msg_len);
-
-static void none_mcast_noflush_send (
- struct totemrrp_instance *instance,
- const void *msg,
- unsigned int msg_len);
-
-static void none_mcast_flush_send (
- struct totemrrp_instance *instance,
- const void *msg,
- unsigned int msg_len);
-
-static void none_token_recv (
- struct totemrrp_instance *instance,
- unsigned int iface_no,
- void *context,
- const void *msg,
- unsigned int msg_len,
- unsigned int token_seqid);
-
-static void none_token_send (
- struct totemrrp_instance *instance,
- const void *msg,
- unsigned int msg_len);
-
-static void none_recv_flush (
- struct totemrrp_instance *instance);
-
-static void none_send_flush (
- struct totemrrp_instance *instance);
-
-static void none_iface_check (
- struct totemrrp_instance *instance);
-
-static void none_processor_count_set (
- struct totemrrp_instance *instance,
- unsigned int processor_count_set);
-
-static void none_token_target_set (
- struct totemrrp_instance *instance,
- struct totem_ip_address *token_target,
- unsigned int iface_no);
-
-static void none_ring_reenable (
- struct totemrrp_instance *instance,
- unsigned int iface_no);
-
-static int none_mcast_recv_empty (
- struct totemrrp_instance *instance);
-
-static int none_member_add (
- struct totemrrp_instance *instance,
- const struct totem_ip_address *member,
- unsigned int iface_no);
-
-static int none_member_remove (
- struct totemrrp_instance *instance,
- const struct totem_ip_address *member,
- unsigned int iface_no);
-
-static void none_membership_changed (
- struct totemrrp_instance *instance,
- enum totem_configuration_type configuration_type,
- const struct srp_addr *member_list, size_t member_list_entries,
- const struct srp_addr *left_list, size_t left_list_entries,
- const struct srp_addr *joined_list, size_t joined_list_entries,
- const struct memb_ring_id *ring_id);
-
-/*
- * Passive Replication Forward Declerations
- */
-static void *passive_instance_initialize (
- struct totemrrp_instance *rrp_instance,
- int interface_count);
-
-static void passive_mcast_recv (
- struct totemrrp_instance *instance,
- unsigned int iface_no,
- void *context,
- const void *msg,
- unsigned int msg_len);
-
-static void passive_mcast_noflush_send (
- struct totemrrp_instance *instance,
- const void *msg,
- unsigned int msg_len);
-
-static void passive_mcast_flush_send (
- struct totemrrp_instance *instance,
- const void *msg,
- unsigned int msg_len);
-
-static void passive_monitor (
- struct totemrrp_instance *rrp_instance,
- unsigned int iface_no,
- int is_token_recv_count);
-
-static void passive_token_recv (
- struct totemrrp_instance *instance,
- unsigned int iface_no,
- void *context,
- const void *msg,
- unsigned int msg_len,
- unsigned int token_seqid);
-
-static void passive_token_send (
- struct totemrrp_instance *instance,
- const void *msg,
- unsigned int msg_len);
-
-static void passive_recv_flush (
- struct totemrrp_instance *instance);
-
-static void passive_send_flush (
- struct totemrrp_instance *instance);
-
-static void passive_iface_check (
- struct totemrrp_instance *instance);
-
-static void passive_processor_count_set (
- struct totemrrp_instance *instance,
- unsigned int processor_count_set);
-
-static void passive_token_target_set (
- struct totemrrp_instance *instance,
- struct totem_ip_address *token_target,
- unsigned int iface_no);
-
-static void passive_ring_reenable (
- struct totemrrp_instance *instance,
- unsigned int iface_no);
-
-static int passive_mcast_recv_empty (
- struct totemrrp_instance *instance);
-
-static int passive_member_add (
- struct totemrrp_instance *instance,
- const struct totem_ip_address *member,
- unsigned int iface_no);
-
-static int passive_member_remove (
- struct totemrrp_instance *instance,
- const struct totem_ip_address *member,
- unsigned int iface_no);
-
-static void passive_membership_changed (
- struct totemrrp_instance *instance,
- enum totem_configuration_type configuration_type,
- const struct srp_addr *member_list, size_t member_list_entries,
- const struct srp_addr *left_list, size_t left_list_entries,
- const struct srp_addr *joined_list, size_t joined_list_entries,
- const struct memb_ring_id *ring_id);
-
-/*
- * Active Replication Forward Definitions
- */
-static void *active_instance_initialize (
- struct totemrrp_instance *rrp_instance,
- int interface_count);
-
-static void active_mcast_recv (
- struct totemrrp_instance *instance,
- unsigned int iface_no,
- void *context,
- const void *msg,
- unsigned int msg_len);
-
-static void active_mcast_noflush_send (
- struct totemrrp_instance *instance,
- const void *msg,
- unsigned int msg_len);
-
-static void active_mcast_flush_send (
- struct totemrrp_instance *instance,
- const void *msg,
- unsigned int msg_len);
-
-static void active_token_recv (
- struct totemrrp_instance *instance,
- unsigned int iface_no,
- void *context,
- const void *msg,
- unsigned int msg_len,
- unsigned int token_seqid);
-
-static void active_token_send (
- struct totemrrp_instance *instance,
- const void *msg,
- unsigned int msg_len);
-
-static void active_recv_flush (
- struct totemrrp_instance *instance);
-
-static void active_send_flush (
- struct totemrrp_instance *instance);
-
-static void active_iface_check (
- struct totemrrp_instance *instance);
-
-static void active_processor_count_set (
- struct totemrrp_instance *instance,
- unsigned int processor_count_set);
-
-static void active_token_target_set (
- struct totemrrp_instance *instance,
- struct totem_ip_address *token_target,
- unsigned int iface_no);
-
-static void active_ring_reenable (
- struct totemrrp_instance *instance,
- unsigned int iface_no);
-
-static int active_mcast_recv_empty (
- struct totemrrp_instance *instance);
-
-static int active_member_add (
- struct totemrrp_instance *instance,
- const struct totem_ip_address *member,
- unsigned int iface_no);
-
-static int active_member_remove (
- struct totemrrp_instance *instance,
- const struct totem_ip_address *member,
- unsigned int iface_no);
-
-static void active_membership_changed (
- struct totemrrp_instance *instance,
- enum totem_configuration_type configuration_type,
- const struct srp_addr *member_list, size_t member_list_entries,
- const struct srp_addr *left_list, size_t left_list_entries,
- const struct srp_addr *joined_list, size_t joined_list_entries,
- const struct memb_ring_id *ring_id);
-
-static void active_timer_expired_token_start (
- struct active_instance *active_instance);
-
-static void active_timer_expired_token_cancel (
- struct active_instance *active_instance);
-
-static void active_timer_problem_decrementer_start (
- struct active_instance *active_instance);
-
-static void active_timer_problem_decrementer_cancel (
- struct active_instance *active_instance);
-
-/*
- * 0-5 reserved for totemsrp.c
- */
-#define MESSAGE_TYPE_RING_TEST_ACTIVE 6
-#define MESSAGE_TYPE_RING_TEST_ACTIVATE 7
-
-#define ENDIAN_LOCAL 0xff22
-
-/*
- * Rollover handling:
- *
- * ARR_SEQNO_START_TOKEN is the starting sequence number of last seen sequence
- * for a token for active redundand ring. This should remain zero, unless testing
- * overflow in which case 07fffff00 or 0xffffff00 are good starting values.
- * It should be same as on defined in totemsrp.c
- */
-
-#define ARR_SEQNO_START_TOKEN 0x0
-
-/*
- * These can be used ot test different rollover points
- * #define ARR_SEQNO_START_MSG 0xfffffe00
- */
-
-/*
- * Threshold value when recv_count for passive rrp should be adjusted.
- * Set this value to some smaller for testing of adjusting proper
- * functionality. Also keep in mind that this value must be smaller
- * then rrp_problem_count_threshold
- */
-#define PASSIVE_RECV_COUNT_THRESHOLD (INT_MAX / 2)
-
-struct message_header {
- char type;
- char encapsulated;
- unsigned short endian_detector;
- int ring_number;
- int nodeid_activator;
-} __attribute__((packed));
-
-struct deliver_fn_context {
- struct totemrrp_instance *instance;
- void *context;
- int iface_no;
-};
-
-struct rrp_algo none_algo = {
- .name = "none",
- .initialize = NULL,
- .mcast_recv = none_mcast_recv,
- .mcast_noflush_send = none_mcast_noflush_send,
- .mcast_flush_send = none_mcast_flush_send,
- .token_recv = none_token_recv,
- .token_send = none_token_send,
- .recv_flush = none_recv_flush,
- .send_flush = none_send_flush,
- .iface_check = none_iface_check,
- .processor_count_set = none_processor_count_set,
- .token_target_set = none_token_target_set,
- .ring_reenable = none_ring_reenable,
- .mcast_recv_empty = none_mcast_recv_empty,
- .member_add = none_member_add,
- .member_remove = none_member_remove,
- .membership_changed = none_membership_changed
-};
-
-struct rrp_algo passive_algo = {
- .name = "passive",
- .initialize = passive_instance_initialize,
- .mcast_recv = passive_mcast_recv,
- .mcast_noflush_send = passive_mcast_noflush_send,
- .mcast_flush_send = passive_mcast_flush_send,
- .token_recv = passive_token_recv,
- .token_send = passive_token_send,
- .recv_flush = passive_recv_flush,
- .send_flush = passive_send_flush,
- .iface_check = passive_iface_check,
- .processor_count_set = passive_processor_count_set,
- .token_target_set = passive_token_target_set,
- .ring_reenable = passive_ring_reenable,
- .mcast_recv_empty = passive_mcast_recv_empty,
- .member_add = passive_member_add,
- .member_remove = passive_member_remove,
- .membership_changed = passive_membership_changed
-};
-
-struct rrp_algo active_algo = {
- .name = "active",
- .initialize = active_instance_initialize,
- .mcast_recv = active_mcast_recv,
- .mcast_noflush_send = active_mcast_noflush_send,
- .mcast_flush_send = active_mcast_flush_send,
- .token_recv = active_token_recv,
- .token_send = active_token_send,
- .recv_flush = active_recv_flush,
- .send_flush = active_send_flush,
- .iface_check = active_iface_check,
- .processor_count_set = active_processor_count_set,
- .token_target_set = active_token_target_set,
- .ring_reenable = active_ring_reenable,
- .mcast_recv_empty = active_mcast_recv_empty,
- .member_add = active_member_add,
- .member_remove = active_member_remove,
- .membership_changed = active_membership_changed
-};
-
-struct rrp_algo *rrp_algos[] = {
- &none_algo,
- &passive_algo,
- &active_algo
-};
-
-#define RRP_ALGOS_COUNT 3
-
-#define log_printf(level, format, args...) \
-do { \
- rrp_instance->totemrrp_log_printf ( \
- level, rrp_instance->totemrrp_subsys_id, \
- __FUNCTION__, __FILE__, __LINE__, \
- format, ##args); \
-} while (0);
-
-static void stats_set_interface_faulty(struct totemrrp_instance *rrp_instance,
- unsigned int iface_no, int is_faulty)
-{
- rrp_instance->stats.faulty[iface_no] = (is_faulty ? 1 : 0);
-}
-
-static void test_active_msg_endian_convert(const struct message_header *in, struct message_header *out)
-{
- out->type = in->type;
- out->encapsulated = in->encapsulated;
- out->endian_detector = ENDIAN_LOCAL;
- out->ring_number = swab32 (in->ring_number);
- out->nodeid_activator = swab32(in->nodeid_activator);
-}
-
-static void timer_function_test_ring_timeout (void *context)
-{
- struct deliver_fn_context *deliver_fn_context = (struct deliver_fn_context *)context;
- struct totemrrp_instance *rrp_instance = deliver_fn_context->instance;
- unsigned int *faulty = NULL;
- int iface_no = deliver_fn_context->iface_no;
- struct message_header msg = {
- .type = MESSAGE_TYPE_RING_TEST_ACTIVE,
- .endian_detector = ENDIAN_LOCAL,
- };
-
- if (strcmp(rrp_instance->totem_config->rrp_mode, "active") == 0)
- faulty = ((struct active_instance *)(rrp_instance->rrp_algo_instance))->faulty;
- if (strcmp(rrp_instance->totem_config->rrp_mode, "passive") == 0)
- faulty = ((struct passive_instance *)(rrp_instance->rrp_algo_instance))->faulty;
-
- assert (faulty != NULL);
-
- if (faulty[iface_no] == 1) {
- msg.ring_number = iface_no;
- msg.nodeid_activator = rrp_instance->my_nodeid;
- totemnet_token_send (
- rrp_instance->net_handles[iface_no],
- &msg, sizeof (struct message_header));
- qb_loop_timer_add (rrp_instance->poll_handle,
- QB_LOOP_MED,
- rrp_instance->totem_config->rrp_autorecovery_check_timeout*QB_TIME_NS_IN_MSEC,
- (void *)deliver_fn_context,
- timer_function_test_ring_timeout,
- &rrp_instance->timer_active_test_ring_timeout[iface_no]);
- }
-}
-
-/*
- * None Replication Implementation
- */
-
-static void none_mcast_recv (
- struct totemrrp_instance *rrp_instance,
- unsigned int iface_no,
- void *context,
- const void *msg,
- unsigned int msg_len)
-{
- rrp_instance->totemrrp_deliver_fn (
- context,
- msg,
- msg_len);
-}
-
-static void none_mcast_flush_send (
- struct totemrrp_instance *instance,
- const void *msg,
- unsigned int msg_len)
-{
- totemnet_mcast_flush_send (instance->net_handles[0], msg, msg_len);
-}
-
-static void none_mcast_noflush_send (
- struct totemrrp_instance *instance,
- const void *msg,
- unsigned int msg_len)
-{
- totemnet_mcast_noflush_send (instance->net_handles[0], msg, msg_len);
-}
-
-static void none_token_recv (
- struct totemrrp_instance *rrp_instance,
- unsigned int iface_no,
- void *context,
- const void *msg,
- unsigned int msg_len,
- unsigned int token_seq)
-{
- rrp_instance->totemrrp_deliver_fn (
- context,
- msg,
- msg_len);
-}
-
-static void none_token_send (
- struct totemrrp_instance *instance,
- const void *msg,
- unsigned int msg_len)
-{
- totemnet_token_send (
- instance->net_handles[0],
- msg, msg_len);
-}
-
-static void none_recv_flush (struct totemrrp_instance *instance)
-{
- totemnet_recv_flush (instance->net_handles[0]);
-}
-
-static void none_send_flush (struct totemrrp_instance *instance)
-{
- totemnet_send_flush (instance->net_handles[0]);
-}
-
-static void none_iface_check (struct totemrrp_instance *instance)
-{
- totemnet_iface_check (instance->net_handles[0]);
-}
-
-static void none_processor_count_set (
- struct totemrrp_instance *instance,
- unsigned int processor_count)
-{
- totemnet_processor_count_set (instance->net_handles[0],
- processor_count);
-}
-
-static void none_token_target_set (
- struct totemrrp_instance *instance,
- struct totem_ip_address *token_target,
- unsigned int iface_no)
-{
- totemnet_token_target_set (instance->net_handles[0], token_target);
-}
-
-static void none_ring_reenable (
- struct totemrrp_instance *instance,
- unsigned int iface_no)
-{
- /*
- * No operation
- */
-}
-
-static int none_mcast_recv_empty (
- struct totemrrp_instance *instance)
-{
- int res;
-
- res = totemnet_recv_mcast_empty (instance->net_handles[0]);
-
- return (res);
-}
-
-static int none_member_add (
- struct totemrrp_instance *instance,
- const struct totem_ip_address *member,
- unsigned int iface_no)
-{
- int res;
- res = totemnet_member_add (instance->net_handles[0], member);
- return (res);
-}
-
-static int none_member_remove (
- struct totemrrp_instance *instance,
- const struct totem_ip_address *member,
- unsigned int iface_no)
-{
- int res;
- res = totemnet_member_remove (instance->net_handles[0], member);
- return (res);
-}
-
-static void none_membership_changed (
- struct totemrrp_instance *rrp_instance,
- enum totem_configuration_type configuration_type,
- const struct srp_addr *member_list, size_t member_list_entries,
- const struct srp_addr *left_list, size_t left_list_entries,
- const struct srp_addr *joined_list, size_t joined_list_entries,
- const struct memb_ring_id *ring_id)
-{
- int i;
-
- for (i = 0; i < left_list_entries; i++) {
- if (left_list->no_addrs < 1 ||
- (left_list[i].addr[0].family != AF_INET && left_list[i].addr[0].family != AF_INET6)) {
- log_printf(rrp_instance->totemrrp_log_level_error,
- "Membership left list contains incorrect address. "
- "This is sign of misconfiguration between nodes!");
- } else {
- totemnet_member_set_active(rrp_instance->net_handles[0],
- &left_list[i].addr[0], 0);
- }
- }
-
- for (i = 0; i < joined_list_entries; i++) {
- if (joined_list->no_addrs < 1 ||
- (joined_list[i].addr[0].family != AF_INET && joined_list[i].addr[0].family != AF_INET6)) {
- log_printf(rrp_instance->totemrrp_log_level_error,
- "Membership join list contains incorrect address. "
- "This is sign of misconfiguration between nodes!");
- } else {
- totemnet_member_set_active(rrp_instance->net_handles[0],
- &joined_list[i].addr[0], 1);
- }
- }
-}
-
-/*
- * Passive Replication Implementation
- */
-void *passive_instance_initialize (
- struct totemrrp_instance *rrp_instance,
- int interface_count)
-{
- struct passive_instance *instance;
- int i;
-
- instance = malloc (sizeof (struct passive_instance));
- if (instance == 0) {
- goto error_exit;
- }
- memset (instance, 0, sizeof (struct passive_instance));
-
- instance->faulty = malloc (sizeof (int) * interface_count);
- if (instance->faulty == 0) {
- free (instance);
- instance = 0;
- goto error_exit;
- }
- memset (instance->faulty, 0, sizeof (int) * interface_count);
-
- for (i = 0; i < interface_count; i++) {
- stats_set_interface_faulty (rrp_instance, i, 0);
- }
-
- instance->token_recv_count = malloc (sizeof (int) * interface_count);
- if (instance->token_recv_count == 0) {
- free (instance->faulty);
- free (instance);
- instance = 0;
- goto error_exit;
- }
- memset (instance->token_recv_count, 0, sizeof (int) * interface_count);
-
- instance->mcast_recv_count = malloc (sizeof (int) * interface_count);
- if (instance->mcast_recv_count == 0) {
- free (instance->token_recv_count);
- free (instance->faulty);
- free (instance);
- instance = 0;
- goto error_exit;
- }
- memset (instance->mcast_recv_count, 0, sizeof (int) * interface_count);
-
-error_exit:
- return ((void *)instance);
-}
-
-static void timer_function_passive_token_expired (void *context)
-{
- struct passive_instance *passive_instance = (struct passive_instance *)context;
- struct totemrrp_instance *rrp_instance = passive_instance->rrp_instance;
-
- rrp_instance->totemrrp_deliver_fn (
- passive_instance->totemrrp_context,
- passive_instance->token,
- passive_instance->token_len);
-}
-
-/* TODO
-static void timer_function_passive_problem_decrementer (void *context)
-{
-// struct passive_instance *passive_instance = (struct passive_instance *)context;
-// struct totemrrp_instance *rrp_instance = passive_instance->rrp_instance;
-
-}
-*/
-
-
-static void passive_timer_expired_token_start (
- struct passive_instance *passive_instance)
-{
- qb_loop_timer_add (
- passive_instance->rrp_instance->poll_handle,
- QB_LOOP_MED,
- passive_instance->rrp_instance->totem_config->rrp_token_expired_timeout*QB_TIME_NS_IN_MSEC,
- (void *)passive_instance,
- timer_function_passive_token_expired,
- &passive_instance->timer_expired_token);
-}
-
-static void passive_timer_expired_token_cancel (
- struct passive_instance *passive_instance)
-{
- qb_loop_timer_del (
- passive_instance->rrp_instance->poll_handle,
- passive_instance->timer_expired_token);
-}
-
-/*
-static void passive_timer_problem_decrementer_start (
- struct passive_instance *passive_instance)
-{
- qb_loop_timer_add (
- QB_LOOP_MED,
- passive_instance->rrp_instance->poll_handle,
- passive_instance->rrp_instance->totem_config->rrp_problem_count_timeout*QB_TIME_NS_IN_MSEC,
- (void *)passive_instance,
- timer_function_passive_problem_decrementer,
- &passive_instance->timer_problem_decrementer);
-}
-
-static void passive_timer_problem_decrementer_cancel (
- struct passive_instance *passive_instance)
-{
- qb_loop_timer_del (
- passive_instance->rrp_instance->poll_handle,
- passive_instance->timer_problem_decrementer);
-}
-*/
-
-/*
- * Monitor function implementation from rrp paper.
- * rrp_instance is passive rrp instance, iface_no is interface with received messgae/token and
- * is_token_recv_count is boolean variable which donates if message is token (>1) or regular
- * message (= 0)
- */
-static void passive_monitor (
- struct totemrrp_instance *rrp_instance,
- unsigned int iface_no,
- int is_token_recv_count)
-{
- struct passive_instance *passive_instance = (struct passive_instance *)rrp_instance->rrp_algo_instance;
- unsigned int *recv_count;
- unsigned int max;
- unsigned int i;
- unsigned int min_all, min_active;
- unsigned int threshold;
-
- /*
- * Monitor for failures
- */
- if (is_token_recv_count) {
- recv_count = passive_instance->token_recv_count;
- threshold = rrp_instance->totem_config->rrp_problem_count_threshold;
- } else {
- recv_count = passive_instance->mcast_recv_count;
- threshold = rrp_instance->totem_config->rrp_problem_count_mcast_threshold;
- }
-
- recv_count[iface_no] += 1;
-
- max = 0;
- for (i = 0; i < rrp_instance->interface_count; i++) {
- if (max < recv_count[i]) {
- max = recv_count[i];
- }
- }
-
- /*
- * Max is larger then threshold -> start adjusting process
- */
- if (max > PASSIVE_RECV_COUNT_THRESHOLD) {
- min_all = min_active = recv_count[iface_no];
-
- for (i = 0; i < rrp_instance->interface_count; i++) {
- if (recv_count[i] < min_all) {
- min_all = recv_count[i];
- }
-
- if (passive_instance->faulty[i] == 0 &&
- recv_count[i] < min_active) {
- min_active = recv_count[i];
- }
- }
-
- if (min_all > 0) {
- /*
- * There is one or more faulty device with recv_count > 0
- */
- for (i = 0; i < rrp_instance->interface_count; i++) {
- recv_count[i] -= min_all;
- }
- } else {
- /*
- * No faulty device with recv_count > 0, adjust only active
- * devices
- */
- for (i = 0; i < rrp_instance->interface_count; i++) {
- if (passive_instance->faulty[i] == 0) {
- recv_count[i] -= min_active;
- }
- }
- }
-
- /*
- * Find again max
- */
- max = 0;
-
- for (i = 0; i < rrp_instance->interface_count; i++) {
- if (max < recv_count[i]) {
- max = recv_count[i];
- }
- }
- }
-
- for (i = 0; i < rrp_instance->interface_count; i++) {
- if ((passive_instance->faulty[i] == 0) &&
- (max - recv_count[i] > threshold)) {
- passive_instance->faulty[i] = 1;
-
- qb_loop_timer_add (rrp_instance->poll_handle,
- QB_LOOP_MED,
- rrp_instance->totem_config->rrp_autorecovery_check_timeout*QB_TIME_NS_IN_MSEC,
- rrp_instance->deliver_fn_context[i],
- timer_function_test_ring_timeout,
- &rrp_instance->timer_active_test_ring_timeout[i]);
-
- stats_set_interface_faulty (rrp_instance, i, passive_instance->faulty[i]);
-
- snprintf (rrp_instance->status[i], STATUS_STR_LEN,
- "Marking ringid %u interface %s FAULTY",
- i,
- totemnet_iface_print (rrp_instance->net_handles[i]));
- log_printf (
- rrp_instance->totemrrp_log_level_error,
- "%s",
- rrp_instance->status[i]);
- }
- }
-}
-
-static void passive_mcast_recv (
- struct totemrrp_instance *rrp_instance,
- unsigned int iface_no,
- void *context,
- const void *msg,
- unsigned int msg_len)
-{
- struct passive_instance *passive_instance = (struct passive_instance *)rrp_instance->rrp_algo_instance;
-
- rrp_instance->totemrrp_deliver_fn (
- context,
- msg,
- msg_len);
-
- if (rrp_instance->totemrrp_msgs_missing() == 0 &&
- passive_instance->timer_expired_token) {
- /*
- * Delivers the last token
- */
- rrp_instance->totemrrp_deliver_fn (
- passive_instance->totemrrp_context,
- passive_instance->token,
- passive_instance->token_len);
- passive_timer_expired_token_cancel (passive_instance);
- }
-
- passive_monitor (rrp_instance, iface_no, 0);
-}
-
-static void passive_mcast_flush_send (
- struct totemrrp_instance *instance,
- const void *msg,
- unsigned int msg_len)
-{
- struct passive_instance *passive_instance = (struct passive_instance *)instance->rrp_algo_instance;
- int i = 0;
-
- do {
- passive_instance->msg_xmit_iface = (passive_instance->msg_xmit_iface + 1) % instance->interface_count;
- i++;
- } while ((i <= instance->interface_count) && (passive_instance->faulty[passive_instance->msg_xmit_iface] == 1));
-
- if (i <= instance->interface_count) {
- totemnet_mcast_flush_send (instance->net_handles[passive_instance->msg_xmit_iface], msg, msg_len);
- }
-}
-
-static void passive_mcast_noflush_send (
- struct totemrrp_instance *instance,
- const void *msg,
- unsigned int msg_len)
-{
- struct passive_instance *passive_instance = (struct passive_instance *)instance->rrp_algo_instance;
- int i = 0;
-
- do {
- passive_instance->msg_xmit_iface = (passive_instance->msg_xmit_iface + 1) % instance->interface_count;
- i++;
- } while ((i <= instance->interface_count) && (passive_instance->faulty[passive_instance->msg_xmit_iface] == 1));
-
- if (i <= instance->interface_count) {
- totemnet_mcast_noflush_send (instance->net_handles[passive_instance->msg_xmit_iface], msg, msg_len);
- }
-}
-
-static void passive_token_recv (
- struct totemrrp_instance *rrp_instance,
- unsigned int iface_no,
- void *context,
- const void *msg,
- unsigned int msg_len,
- unsigned int token_seq)
-{
- struct passive_instance *passive_instance = (struct passive_instance *)rrp_instance->rrp_algo_instance;
-
- passive_instance->totemrrp_context = context; // this should be in totemrrp_instance ? TODO
-
- if (rrp_instance->totemrrp_msgs_missing() == 0) {
- rrp_instance->totemrrp_deliver_fn (
- context,
- msg,
- msg_len);
- } else {
- memcpy (passive_instance->token, msg, msg_len);
- passive_timer_expired_token_start (passive_instance);
-
- }
-
- passive_monitor (rrp_instance, iface_no, 1);
-}
-
-static void passive_token_send (
- struct totemrrp_instance *instance,
- const void *msg,
- unsigned int msg_len)
-{
- struct passive_instance *passive_instance = (struct passive_instance *)instance->rrp_algo_instance;
- int i = 0;
-
- do {
- passive_instance->token_xmit_iface = (passive_instance->token_xmit_iface + 1) % instance->interface_count;
- i++;
- } while ((i <= instance->interface_count) && (passive_instance->faulty[passive_instance->token_xmit_iface] == 1));
-
- if (i <= instance->interface_count) {
- totemnet_token_send (
- instance->net_handles[passive_instance->token_xmit_iface],
- msg, msg_len);
- }
-
-}
-
-static void passive_recv_flush (struct totemrrp_instance *instance)
-{
- struct passive_instance *rrp_algo_instance = (struct passive_instance *)instance->rrp_algo_instance;
- unsigned int i;
-
- for (i = 0; i < instance->interface_count; i++) {
- if (rrp_algo_instance->faulty[i] == 0) {
-
- totemnet_recv_flush (instance->net_handles[i]);
- }
- }
-}
-
-static void passive_send_flush (struct totemrrp_instance *instance)
-{
- struct passive_instance *rrp_algo_instance = (struct passive_instance *)instance->rrp_algo_instance;
- unsigned int i;
-
- for (i = 0; i < instance->interface_count; i++) {
- if (rrp_algo_instance->faulty[i] == 0) {
-
- totemnet_send_flush (instance->net_handles[i]);
- }
- }
-}
-
-static void passive_iface_check (struct totemrrp_instance *instance)
-{
- struct passive_instance *rrp_algo_instance = (struct passive_instance *)instance->rrp_algo_instance;
- unsigned int i;
-
- for (i = 0; i < instance->interface_count; i++) {
- if (rrp_algo_instance->faulty[i] == 0) {
-
- totemnet_iface_check (instance->net_handles[i]);
- }
- }
-}
-
-static void passive_processor_count_set (
- struct totemrrp_instance *instance,
- unsigned int processor_count)
-{
- struct passive_instance *rrp_algo_instance = (struct passive_instance *)instance->rrp_algo_instance;
- unsigned int i;
-
- for (i = 0; i < instance->interface_count; i++) {
- if (rrp_algo_instance->faulty[i] == 0) {
-
- totemnet_processor_count_set (instance->net_handles[i],
- processor_count);
- }
- }
-}
-
-static void passive_token_target_set (
- struct totemrrp_instance *instance,
- struct totem_ip_address *token_target,
- unsigned int iface_no)
-{
- totemnet_token_target_set (instance->net_handles[iface_no], token_target);
-}
-
-static int passive_mcast_recv_empty (
- struct totemrrp_instance *instance)
-{
- int res;
- int msgs_emptied = 0;
- int i;
-
- for (i = 0; i < instance->interface_count; i++) {
- res = totemnet_recv_mcast_empty (instance->net_handles[i]);
- if (res == -1) {
- return (-1);
- }
- if (res == 1) {
- msgs_emptied = 1;
- }
- }
-
- return (msgs_emptied);
-}
-
-static int passive_member_add (
- struct totemrrp_instance *instance,
- const struct totem_ip_address *member,
- unsigned int iface_no)
-{
- int res;
- res = totemnet_member_add (instance->net_handles[iface_no], member);
- return (res);
-}
-
-static int passive_member_remove (
- struct totemrrp_instance *instance,
- const struct totem_ip_address *member,
- unsigned int iface_no)
-{
- int res;
- res = totemnet_member_remove (instance->net_handles[iface_no], member);
- return (res);
-}
-
-static void passive_membership_changed (
- struct totemrrp_instance *rrp_instance,
- enum totem_configuration_type configuration_type,
- const struct srp_addr *member_list, size_t member_list_entries,
- const struct srp_addr *left_list, size_t left_list_entries,
- const struct srp_addr *joined_list, size_t joined_list_entries,
- const struct memb_ring_id *ring_id)
-{
- int i;
- int interface;
-
- for (interface = 0; interface < rrp_instance->interface_count; interface++) {
- for (i = 0; i < left_list_entries; i++) {
- if (left_list->no_addrs < interface + 1 ||
- (left_list[i].addr[interface].family != AF_INET &&
- left_list[i].addr[interface].family != AF_INET6)) {
- log_printf(rrp_instance->totemrrp_log_level_error,
- "Membership left list contains incorrect address. "
- "This is sign of misconfiguration between nodes!");
- } else {
- totemnet_member_set_active(rrp_instance->net_handles[interface],
- &left_list[i].addr[interface], 0);
- }
- }
-
- for (i = 0; i < joined_list_entries; i++) {
- if (joined_list->no_addrs < interface + 1 ||
- (joined_list[i].addr[interface].family != AF_INET &&
- joined_list[i].addr[interface].family != AF_INET6)) {
- log_printf(rrp_instance->totemrrp_log_level_error,
- "Membership join list contains incorrect address. "
- "This is sign of misconfiguration between nodes!");
- } else {
- totemnet_member_set_active(rrp_instance->net_handles[interface],
- &joined_list[i].addr[interface], 1);
- }
- }
- }
-}
-
-static void passive_ring_reenable (
- struct totemrrp_instance *instance,
- unsigned int iface_no)
-{
- struct passive_instance *rrp_algo_instance = (struct passive_instance *)instance->rrp_algo_instance;
- int i;
-
- memset (rrp_algo_instance->mcast_recv_count, 0, sizeof (unsigned int) *
- instance->interface_count);
- memset (rrp_algo_instance->token_recv_count, 0, sizeof (unsigned int) *
- instance->interface_count);
-
- if (iface_no == instance->interface_count) {
- memset (rrp_algo_instance->faulty, 0, sizeof (unsigned int) *
- instance->interface_count);
- for (i = 0; i < instance->interface_count; i++) {
- stats_set_interface_faulty (instance, i, 0);
- }
- } else {
- rrp_algo_instance->faulty[iface_no] = 0;
- stats_set_interface_faulty (instance, iface_no, 0);
- }
-}
-
-/*
- * Active Replication Implementation
- */
-void *active_instance_initialize (
- struct totemrrp_instance *rrp_instance,
- int interface_count)
-{
- struct active_instance *instance;
- int i;
-
- instance = malloc (sizeof (struct active_instance));
- if (instance == 0) {
- goto error_exit;
- }
- memset (instance, 0, sizeof (struct active_instance));
-
- instance->faulty = malloc (sizeof (int) * interface_count);
- if (instance->faulty == 0) {
- free (instance);
- instance = 0;
- goto error_exit;
- }
- memset (instance->faulty, 0, sizeof (unsigned int) * interface_count);
-
- for (i = 0; i < interface_count; i++) {
- stats_set_interface_faulty (rrp_instance, i, 0);
- }
-
- instance->last_token_recv = malloc (sizeof (int) * interface_count);
- if (instance->last_token_recv == 0) {
- free (instance->faulty);
- free (instance);
- instance = 0;
- goto error_exit;
- }
- memset (instance->last_token_recv, 0, sizeof (unsigned int) * interface_count);
-
- instance->counter_problems = malloc (sizeof (int) * interface_count);
- if (instance->counter_problems == 0) {
- free (instance->last_token_recv);
- free (instance->faulty);
- free (instance);
- instance = 0;
- goto error_exit;
- }
- memset (instance->counter_problems, 0, sizeof (unsigned int) * interface_count);
-
- instance->timer_expired_token = 0;
-
- instance->timer_problem_decrementer = 0;
-
- instance->rrp_instance = rrp_instance;
-
- instance->last_token_seq = ARR_SEQNO_START_TOKEN - 1;
-
-error_exit:
- return ((void *)instance);
-}
-static void timer_function_active_problem_decrementer (void *context)
-{
- struct active_instance *active_instance = (struct active_instance *)context;
- struct totemrrp_instance *rrp_instance = active_instance->rrp_instance;
- unsigned int problem_found = 0;
- unsigned int i;
-
- for (i = 0; i < rrp_instance->interface_count; i++) {
- if (active_instance->counter_problems[i] > 0) {
- problem_found = 1;
- active_instance->counter_problems[i] -= 1;
- if (active_instance->counter_problems[i] == 0) {
- snprintf (rrp_instance->status[i], STATUS_STR_LEN,
- "ring %d active with no faults", i);
- } else {
- snprintf (rrp_instance->status[i], STATUS_STR_LEN,
- "Decrementing problem counter for iface %s to [%d of %d]",
- totemnet_iface_print (rrp_instance->net_handles[i]),
- active_instance->counter_problems[i],
- rrp_instance->totem_config->rrp_problem_count_threshold);
- }
- log_printf (
- rrp_instance->totemrrp_log_level_warning,
- "%s",
- rrp_instance->status[i]);
- }
- }
- if (problem_found) {
- active_timer_problem_decrementer_start (active_instance);
- } else {
- active_instance->timer_problem_decrementer = 0;
- }
-}
-
-static void timer_function_active_token_expired (void *context)
-{
- struct active_instance *active_instance = (struct active_instance *)context;
- struct totemrrp_instance *rrp_instance = active_instance->rrp_instance;
- unsigned int i;
-
- for (i = 0; i < rrp_instance->interface_count; i++) {
- if (active_instance->last_token_recv[i] == 0) {
- active_instance->counter_problems[i] += 1;
-
- if (active_instance->timer_problem_decrementer == 0) {
- active_timer_problem_decrementer_start (active_instance);
- }
- snprintf (rrp_instance->status[i], STATUS_STR_LEN,
- "Incrementing problem counter for seqid %d iface %s to [%d of %d]",
- active_instance->last_token_seq,
- totemnet_iface_print (rrp_instance->net_handles[i]),
- active_instance->counter_problems[i],
- rrp_instance->totem_config->rrp_problem_count_threshold);
- log_printf (
- rrp_instance->totemrrp_log_level_warning,
- "%s",
- rrp_instance->status[i]);
- }
- }
- for (i = 0; i < rrp_instance->interface_count; i++) {
- if (active_instance->counter_problems[i] >= rrp_instance->totem_config->rrp_problem_count_threshold &&
- active_instance->faulty[i] == 0) {
- active_instance->faulty[i] = 1;
-
- qb_loop_timer_add (rrp_instance->poll_handle,
- QB_LOOP_MED,
- rrp_instance->totem_config->rrp_autorecovery_check_timeout*QB_TIME_NS_IN_MSEC,
- rrp_instance->deliver_fn_context[i],
- timer_function_test_ring_timeout,
- &rrp_instance->timer_active_test_ring_timeout[i]);
-
- stats_set_interface_faulty (rrp_instance, i, active_instance->faulty[i]);
-
- snprintf (rrp_instance->status[i], STATUS_STR_LEN,
- "Marking seqid %d ringid %u interface %s FAULTY",
- active_instance->last_token_seq,
- i,
- totemnet_iface_print (rrp_instance->net_handles[i]));
- log_printf (
- rrp_instance->totemrrp_log_level_error,
- "%s",
- rrp_instance->status[i]);
- active_timer_problem_decrementer_cancel (active_instance);
- }
- }
-
- rrp_instance->totemrrp_deliver_fn (
- active_instance->totemrrp_context,
- active_instance->token,
- active_instance->token_len);
-}
-
-static void active_timer_expired_token_start (
- struct active_instance *active_instance)
-{
- qb_loop_timer_add (
- active_instance->rrp_instance->poll_handle,
- QB_LOOP_MED,
- active_instance->rrp_instance->totem_config->rrp_token_expired_timeout*QB_TIME_NS_IN_MSEC,
- (void *)active_instance,
- timer_function_active_token_expired,
- &active_instance->timer_expired_token);
-}
-
-static void active_timer_expired_token_cancel (
- struct active_instance *active_instance)
-{
- qb_loop_timer_del (
- active_instance->rrp_instance->poll_handle,
- active_instance->timer_expired_token);
-}
-
-static void active_timer_problem_decrementer_start (
- struct active_instance *active_instance)
-{
- qb_loop_timer_add (
- active_instance->rrp_instance->poll_handle,
- QB_LOOP_MED,
- active_instance->rrp_instance->totem_config->rrp_problem_count_timeout*QB_TIME_NS_IN_MSEC,
- (void *)active_instance,
- timer_function_active_problem_decrementer,
- &active_instance->timer_problem_decrementer);
-}
-
-static void active_timer_problem_decrementer_cancel (
- struct active_instance *active_instance)
-{
- qb_loop_timer_del (
- active_instance->rrp_instance->poll_handle,
- active_instance->timer_problem_decrementer);
- active_instance->timer_problem_decrementer = 0;
-}
-
-
-/*
- * active replication
- */
-static void active_mcast_recv (
- struct totemrrp_instance *instance,
- unsigned int iface_no,
- void *context,
- const void *msg,
- unsigned int msg_len)
-{
- instance->totemrrp_deliver_fn (
- context,
- msg,
- msg_len);
-}
-
-static void active_mcast_flush_send (
- struct totemrrp_instance *instance,
- const void *msg,
- unsigned int msg_len)
-{
- int i;
- struct active_instance *rrp_algo_instance = (struct active_instance *)instance->rrp_algo_instance;
-
- for (i = 0; i < instance->interface_count; i++) {
- if (rrp_algo_instance->faulty[i] == 0) {
- totemnet_mcast_flush_send (instance->net_handles[i], msg, msg_len);
- }
- }
-}
-
-static void active_mcast_noflush_send (
- struct totemrrp_instance *instance,
- const void *msg,
- unsigned int msg_len)
-{
- int i;
- struct active_instance *rrp_algo_instance = (struct active_instance *)instance->rrp_algo_instance;
-
- for (i = 0; i < instance->interface_count; i++) {
- if (rrp_algo_instance->faulty[i] == 0) {
- totemnet_mcast_noflush_send (instance->net_handles[i], msg, msg_len);
- }
- }
-}
-
-static void active_token_recv (
- struct totemrrp_instance *rrp_instance,
- unsigned int iface_no,
- void *context,
- const void *msg,
- unsigned int msg_len,
- unsigned int token_seq)
-{
- int i;
- struct active_instance *active_instance = (struct active_instance *)rrp_instance->rrp_algo_instance;
-
- active_instance->totemrrp_context = context;
- if (sq_lt_compare (active_instance->last_token_seq, token_seq)) {
- memcpy (active_instance->token, msg, msg_len);
- active_instance->token_len = msg_len;
- for (i = 0; i < rrp_instance->interface_count; i++) {
- active_instance->last_token_recv[i] = 0;
- }
-
- active_instance->last_token_recv[iface_no] = 1;
- active_timer_expired_token_start (active_instance);
- }
-
- /*
- * This doesn't follow spec because the spec assumes we will know
- * when token resets occur.
- */
- active_instance->last_token_seq = token_seq;
-
- if (token_seq == active_instance->last_token_seq) {
- active_instance->last_token_recv[iface_no] = 1;
- for (i = 0; i < rrp_instance->interface_count; i++) {
- if ((active_instance->last_token_recv[i] == 0) &&
- active_instance->faulty[i] == 0) {
- return; /* don't deliver token */
- }
- }
- active_timer_expired_token_cancel (active_instance);
-
- rrp_instance->totemrrp_deliver_fn (
- context,
- msg,
- msg_len);
- }
-}
-
-static void active_token_send (
- struct totemrrp_instance *instance,
- const void *msg,
- unsigned int msg_len)
-{
- struct active_instance *rrp_algo_instance = (struct active_instance *)instance->rrp_algo_instance;
- int i;
-
- for (i = 0; i < instance->interface_count; i++) {
- if (rrp_algo_instance->faulty[i] == 0) {
- totemnet_token_send (
- instance->net_handles[i],
- msg, msg_len);
-
- }
- }
-}
-
-static void active_recv_flush (struct totemrrp_instance *instance)
-{
- struct active_instance *rrp_algo_instance = (struct active_instance *)instance->rrp_algo_instance;
- unsigned int i;
-
- for (i = 0; i < instance->interface_count; i++) {
- if (rrp_algo_instance->faulty[i] == 0) {
-
- totemnet_recv_flush (instance->net_handles[i]);
- }
- }
-}
-
-static void active_send_flush (struct totemrrp_instance *instance)
-{
- struct active_instance *rrp_algo_instance = (struct active_instance *)instance->rrp_algo_instance;
- unsigned int i;
-
- for (i = 0; i < instance->interface_count; i++) {
- if (rrp_algo_instance->faulty[i] == 0) {
-
- totemnet_send_flush (instance->net_handles[i]);
- }
- }
-}
-
-static int active_member_add (
- struct totemrrp_instance *instance,
- const struct totem_ip_address *member,
- unsigned int iface_no)
-{
- int res;
- res = totemnet_member_add (instance->net_handles[iface_no], member);
- return (res);
-}
-
-static int active_member_remove (
- struct totemrrp_instance *instance,
- const struct totem_ip_address *member,
- unsigned int iface_no)
-{
- int res;
- res = totemnet_member_remove (instance->net_handles[iface_no], member);
- return (res);
-}
-
-static void active_membership_changed (
- struct totemrrp_instance *rrp_instance,
- enum totem_configuration_type configuration_type,
- const struct srp_addr *member_list, size_t member_list_entries,
- const struct srp_addr *left_list, size_t left_list_entries,
- const struct srp_addr *joined_list, size_t joined_list_entries,
- const struct memb_ring_id *ring_id)
-{
- int i;
- int interface;
-
- for (interface = 0; interface < rrp_instance->interface_count; interface++) {
- for (i = 0; i < left_list_entries; i++) {
- if (left_list->no_addrs < interface + 1 ||
- (left_list[i].addr[interface].family != AF_INET &&
- left_list[i].addr[interface].family != AF_INET6)) {
- log_printf(rrp_instance->totemrrp_log_level_error,
- "Membership left list contains incorrect address. "
- "This is sign of misconfiguration between nodes!");
- } else {
- totemnet_member_set_active(rrp_instance->net_handles[interface],
- &left_list[i].addr[interface], 0);
- }
- }
-
- for (i = 0; i < joined_list_entries; i++) {
- if (joined_list->no_addrs < interface + 1 ||
- (joined_list[i].addr[interface].family != AF_INET &&
- joined_list[i].addr[interface].family != AF_INET6)) {
- log_printf(rrp_instance->totemrrp_log_level_error,
- "Membership join list contains incorrect address. "
- "This is sign of misconfiguration between nodes!");
- } else {
- totemnet_member_set_active(rrp_instance->net_handles[interface],
- &joined_list[i].addr[interface], 1);
- }
- }
- }
-}
-
-static void active_iface_check (struct totemrrp_instance *instance)
-{
- struct active_instance *rrp_algo_instance = (struct active_instance *)instance->rrp_algo_instance;
- unsigned int i;
-
- for (i = 0; i < instance->interface_count; i++) {
- if (rrp_algo_instance->faulty[i] == 0) {
-
- totemnet_iface_check (instance->net_handles[i]);
- }
- }
-}
-
-static void active_processor_count_set (
- struct totemrrp_instance *instance,
- unsigned int processor_count)
-{
- struct active_instance *rrp_algo_instance = (struct active_instance *)instance->rrp_algo_instance;
- unsigned int i;
-
- for (i = 0; i < instance->interface_count; i++) {
- if (rrp_algo_instance->faulty[i] == 0) {
-
- totemnet_processor_count_set (instance->net_handles[i],
- processor_count);
- }
- }
-}
-
-static void active_token_target_set (
- struct totemrrp_instance *instance,
- struct totem_ip_address *token_target,
- unsigned int iface_no)
-{
- totemnet_token_target_set (instance->net_handles[iface_no], token_target);
-}
-
-static int active_mcast_recv_empty (
- struct totemrrp_instance *instance)
-{
- int res;
- int msgs_emptied = 0;
- int i;
-
- for (i = 0; i < instance->interface_count; i++) {
- res = totemnet_recv_mcast_empty (instance->net_handles[i]);
- if (res == -1) {
- return (-1);
- }
- if (res == 1) {
- msgs_emptied = 1;
- }
- }
-
- return (msgs_emptied);
-}
-
-static void active_ring_reenable (
- struct totemrrp_instance *instance,
- unsigned int iface_no)
-{
- struct active_instance *rrp_algo_instance = (struct active_instance *)instance->rrp_algo_instance;
- int i;
-
- if (iface_no == instance->interface_count) {
- memset (rrp_algo_instance->last_token_recv, 0, sizeof (unsigned int) *
- instance->interface_count);
- memset (rrp_algo_instance->faulty, 0, sizeof (unsigned int) *
- instance->interface_count);
- memset (rrp_algo_instance->counter_problems, 0, sizeof (unsigned int) *
- instance->interface_count);
-
- for (i = 0; i < instance->interface_count; i++) {
- stats_set_interface_faulty (instance, i, 0);
- }
- } else {
- rrp_algo_instance->last_token_recv[iface_no] = 0;
- rrp_algo_instance->faulty[iface_no] = 0;
- rrp_algo_instance->counter_problems[iface_no] = 0;
-
- stats_set_interface_faulty (instance, iface_no, 0);
- }
-}
-
-static void totemrrp_instance_initialize (struct totemrrp_instance *instance)
-{
- memset (instance, 0, sizeof (struct totemrrp_instance));
-}
-
-static int totemrrp_algorithm_set (
- struct totem_config *totem_config,
- struct totemrrp_instance *instance)
-{
- unsigned int res = -1;
- unsigned int i;
-
- for (i = 0; i < RRP_ALGOS_COUNT; i++) {
- if (strcmp (totem_config->rrp_mode, rrp_algos[i]->name) == 0) {
- instance->rrp_algo = rrp_algos[i];
- if (rrp_algos[i]->initialize) {
- instance->rrp_algo_instance = rrp_algos[i]->initialize (
- instance,
- totem_config->interface_count);
- }
- res = 0;
- break;
- }
- }
- for (i = 0; i < totem_config->interface_count; i++) {
- instance->status[i] = malloc (STATUS_STR_LEN+1);
- snprintf (instance->status[i], STATUS_STR_LEN,
- "ring %d active with no faults", i);
- }
- return (res);
-}
-
-void rrp_deliver_fn (
- void *context,
- const void *msg,
- unsigned int msg_len)
-{
- unsigned int token_seqid;
- unsigned int token_is;
-
- struct deliver_fn_context *deliver_fn_context = (struct deliver_fn_context *)context;
- struct totemrrp_instance *rrp_instance = deliver_fn_context->instance;
- const struct message_header *hdr = msg;
- struct message_header tmp_msg, activate_msg;
-
- memset(&tmp_msg, 0, sizeof(struct message_header));
- memset(&activate_msg, 0, sizeof(struct message_header));
-
- rrp_instance->totemrrp_token_seqid_get (
- msg,
- &token_seqid,
- &token_is);
-
- if (hdr->type == MESSAGE_TYPE_RING_TEST_ACTIVE) {
- log_printf (
- rrp_instance->totemrrp_log_level_debug,
- "received message requesting test of ring now active");
-
- if (hdr->endian_detector != ENDIAN_LOCAL) {
- test_active_msg_endian_convert(hdr, &tmp_msg);
- hdr = &tmp_msg;
- }
-
- if (hdr->nodeid_activator == rrp_instance->my_nodeid) {
- /*
- * Send an activate message
- */
- activate_msg.type = MESSAGE_TYPE_RING_TEST_ACTIVATE;
- activate_msg.endian_detector = ENDIAN_LOCAL;
- activate_msg.ring_number = hdr->ring_number;
- activate_msg.nodeid_activator = rrp_instance->my_nodeid;
- totemnet_token_send (
- rrp_instance->net_handles[deliver_fn_context->iface_no],
- &activate_msg, sizeof (struct message_header));
- } else {
- /*
- * Send a ring test message
- */
- totemnet_token_send (
- rrp_instance->net_handles[deliver_fn_context->iface_no],
- msg, msg_len);
- }
- } else
- if (hdr->type == MESSAGE_TYPE_RING_TEST_ACTIVATE) {
-
- if (hdr->endian_detector != ENDIAN_LOCAL) {
- test_active_msg_endian_convert(hdr, &tmp_msg);
- hdr = &tmp_msg;
- }
-
- log_printf (
- rrp_instance->totemrrp_log_level_debug,
- "Received ring test activate message for ring %d sent by node %u",
- hdr->ring_number,
- hdr->nodeid_activator);
-
- if (rrp_instance->stats.faulty[deliver_fn_context->iface_no]) {
- log_printf (rrp_instance->totemrrp_log_level_notice,
- "Automatically recovered ring %d", hdr->ring_number);
- }
-
- totemrrp_ring_reenable (rrp_instance, deliver_fn_context->iface_no);
- if (hdr->nodeid_activator != rrp_instance->my_nodeid) {
- totemnet_token_send (
- rrp_instance->net_handles[deliver_fn_context->iface_no],
- msg, msg_len);
- }
- } else
- if (token_is) {
- /*
- * Deliver to the token receiver for this rrp algorithm
- */
- rrp_instance->rrp_algo->token_recv (
- rrp_instance,
- deliver_fn_context->iface_no,
- deliver_fn_context->context,
- msg,
- msg_len,
- token_seqid);
- } else {
- /*
- * Deliver to the mcast receiver for this rrp algorithm
- */
- rrp_instance->rrp_algo->mcast_recv (
- rrp_instance,
- deliver_fn_context->iface_no,
- deliver_fn_context->context,
- msg,
- msg_len);
- }
-}
-
-void rrp_iface_change_fn (
- void *context,
- const struct totem_ip_address *iface_addr)
-{
- struct deliver_fn_context *deliver_fn_context = (struct deliver_fn_context *)context;
-
- deliver_fn_context->instance->my_nodeid = iface_addr->nodeid;
- deliver_fn_context->instance->totemrrp_iface_change_fn (
- deliver_fn_context->context,
- iface_addr,
- deliver_fn_context->iface_no);
-}
-
-int totemrrp_finalize (
- void *rrp_context)
-{
- struct totemrrp_instance *instance = (struct totemrrp_instance *)rrp_context;
- int i;
-
- for (i = 0; i < instance->interface_count; i++) {
- totemnet_finalize (instance->net_handles[i]);
- }
- free (instance->net_handles);
- free (instance);
- return (0);
-}
-
-static void rrp_target_set_completed (void *context)
-{
- struct deliver_fn_context *deliver_fn_context = (struct deliver_fn_context *)context;
-
- deliver_fn_context->instance->totemrrp_target_set_completed (deliver_fn_context->context);
-}
-
-/*
- * Totem Redundant Ring interface
- * depends on poll abstraction, POSIX, IPV4
- */
-
-/*
- * Create an instance
- */
-int totemrrp_initialize (
- qb_loop_t *poll_handle,
- void **rrp_context,
- struct totem_config *totem_config,
- totemsrp_stats_t *stats,
- void *context,
-
- void (*deliver_fn) (
- void *context,
- const void *msg,
- unsigned int msg_len),
-
- void (*iface_change_fn) (
- void *context,
- const struct totem_ip_address *iface_addr,
- unsigned int iface_no),
-
- void (*token_seqid_get) (
- const void *msg,
- unsigned int *seqid,
- unsigned int *token_is),
-
- unsigned int (*msgs_missing) (void),
-
- void (*target_set_completed) (void *context))
-{
- struct totemrrp_instance *instance;
- unsigned int res;
- int i;
-
- instance = malloc (sizeof (struct totemrrp_instance));
- if (instance == 0) {
- return (-1);
- }
-
- totemrrp_instance_initialize (instance);
-
- instance->totem_config = totem_config;
- stats->rrp = &instance->stats;
- instance->stats.interface_count = totem_config->interface_count;
- instance->stats.faulty = calloc(instance->stats.interface_count, sizeof(uint8_t));
-
- res = totemrrp_algorithm_set (
- instance->totem_config,
- instance);
- if (res == -1) {
- goto error_destroy;
- }
-
- /*
- * Configure logging
- */
- instance->totemrrp_log_level_security = totem_config->totem_logging_configuration.log_level_security;
- instance->totemrrp_log_level_error = totem_config->totem_logging_configuration.log_level_error;
- instance->totemrrp_log_level_warning = totem_config->totem_logging_configuration.log_level_warning;
- instance->totemrrp_log_level_notice = totem_config->totem_logging_configuration.log_level_notice;
- instance->totemrrp_log_level_debug = totem_config->totem_logging_configuration.log_level_debug;
- instance->totemrrp_subsys_id = totem_config->totem_logging_configuration.log_subsys_id;
- instance->totemrrp_log_printf = totem_config->totem_logging_configuration.log_printf;
-
- instance->interfaces = totem_config->interfaces;
-
- instance->poll_handle = poll_handle;
-
- instance->totemrrp_deliver_fn = deliver_fn;
-
- instance->totemrrp_iface_change_fn = iface_change_fn;
-
- instance->totemrrp_token_seqid_get = token_seqid_get;
-
- instance->totemrrp_target_set_completed = target_set_completed;
-
- instance->totemrrp_msgs_missing = msgs_missing;
-
- instance->interface_count = totem_config->interface_count;
-
- instance->net_handles = malloc (sizeof (void *) * totem_config->interface_count);
-
- instance->context = context;
-
- instance->poll_handle = poll_handle;
-
-
- for (i = 0; i < totem_config->interface_count; i++) {
- struct deliver_fn_context *deliver_fn_context;
-
- deliver_fn_context = malloc (sizeof (struct deliver_fn_context));
- assert (deliver_fn_context);
- deliver_fn_context->instance = instance;
- deliver_fn_context->context = context;
- deliver_fn_context->iface_no = i;
- instance->deliver_fn_context[i] = (void *)deliver_fn_context;
-
- totemnet_initialize (
- poll_handle,
- &instance->net_handles[i],
- totem_config,
- stats,
- i,
- (void *)deliver_fn_context,
- rrp_deliver_fn,
- rrp_iface_change_fn,
- rrp_target_set_completed);
-
- totemnet_net_mtu_adjust (instance->net_handles[i], totem_config);
- }
-
- *rrp_context = instance;
- return (0);
-
-error_destroy:
- free (instance);
- return (res);
-}
-
-void *totemrrp_buffer_alloc (void *rrp_context)
-{
- struct totemrrp_instance *instance = rrp_context;
- assert (instance != NULL);
- return totemnet_buffer_alloc (instance->net_handles[0]);
-}
-
-void totemrrp_buffer_release (void *rrp_context, void *ptr)
-{
- struct totemrrp_instance *instance = rrp_context;
- assert (instance != NULL);
- totemnet_buffer_release (instance->net_handles[0], ptr);
-}
-
-int totemrrp_processor_count_set (
- void *rrp_context,
- unsigned int processor_count)
-{
- struct totemrrp_instance *instance = (struct totemrrp_instance *)rrp_context;
- instance->rrp_algo->processor_count_set (instance, processor_count);
-
- instance->processor_count = processor_count;
-
- return (0);
-}
-
-int totemrrp_token_target_set (
- void *rrp_context,
- struct totem_ip_address *addr,
- unsigned int iface_no)
-{
- struct totemrrp_instance *instance = (struct totemrrp_instance *)rrp_context;
- instance->rrp_algo->token_target_set (instance, addr, iface_no);
-
- return (0);
-}
-int totemrrp_recv_flush (void *rrp_context)
-{
- struct totemrrp_instance *instance = (struct totemrrp_instance *)rrp_context;
-
- instance->rrp_algo->recv_flush (instance);
-
- return (0);
-}
-
-int totemrrp_send_flush (void *rrp_context)
-{
- struct totemrrp_instance *instance = (struct totemrrp_instance *)rrp_context;
- instance->rrp_algo->send_flush (instance);
-
- return (0);
-}
-
-int totemrrp_token_send (
- void *rrp_context,
- const void *msg,
- unsigned int msg_len)
-{
- struct totemrrp_instance *instance = (struct totemrrp_instance *)rrp_context;
- instance->rrp_algo->token_send (instance, msg, msg_len);
-
- return (0);
-}
-
-int totemrrp_mcast_flush_send (
- void *rrp_context,
- const void *msg,
- unsigned int msg_len)
-{
- struct totemrrp_instance *instance = (struct totemrrp_instance *)rrp_context;
- int res = 0;
-
-// TODO this needs to return the result
- instance->rrp_algo->mcast_flush_send (instance, msg, msg_len);
-
- return (res);
-}
-
-int totemrrp_mcast_noflush_send (
- void *rrp_context,
- const void *msg,
- unsigned int msg_len)
-{
- struct totemrrp_instance *instance = (struct totemrrp_instance *)rrp_context;
- /*
- * merge detects go out through mcast_flush_send so it is safe to
- * flush these messages if we are only one processor. This avoids
- * an encryption/hmac and decryption/hmac
- */
- if (instance->processor_count > 1) {
-
-// TODO this needs to return the result
- instance->rrp_algo->mcast_noflush_send (instance, msg, msg_len);
- }
-
- return (0);
-}
-
-int totemrrp_iface_check (void *rrp_context)
-{
- struct totemrrp_instance *instance = (struct totemrrp_instance *)rrp_context;
-
- instance->rrp_algo->iface_check (instance);
-
- return (0);
-}
-
-int totemrrp_ifaces_get (
- void *rrp_context,
- char ***status,
- unsigned int *iface_count)
-{
- struct totemrrp_instance *instance = (struct totemrrp_instance *)rrp_context;
- *status = instance->status;
-
- if (iface_count) {
- *iface_count = instance->interface_count;
- }
-
- return (0);
-}
-
-int totemrrp_crypto_set (
- void *rrp_context,
- const char *cipher_type,
- const char *hash_type)
-{
- struct totemrrp_instance *instance = (struct totemrrp_instance *)rrp_context;
- int res;
-
- res = totemnet_crypto_set(instance->net_handles[0], cipher_type, hash_type);
-
- return (res);
-}
-
-
-/*
- * iface_no indicates the interface number [0, ..., interface_count-1] of the
- * specific ring which will be reenabled. We specify iface_no == interface_count
- * means reenabling all the rings.
- */
-int totemrrp_ring_reenable (
- void *rrp_context,
- unsigned int iface_no)
-{
- struct totemrrp_instance *instance = (struct totemrrp_instance *)rrp_context;
- int res = 0;
- unsigned int i;
-
- instance->rrp_algo->ring_reenable (instance, iface_no);
-
- if (iface_no == instance->interface_count) {
- for (i = 0; i < instance->interface_count; i++) {
- snprintf (instance->status[i], STATUS_STR_LEN,
- "ring %d active with no faults", i);
- }
- } else {
- snprintf (instance->status[iface_no], STATUS_STR_LEN,
- "ring %d active with no faults", iface_no);
- }
-
- return (res);
-}
-
-extern int totemrrp_mcast_recv_empty (
- void *rrp_context)
-{
- struct totemrrp_instance *instance = (struct totemrrp_instance *)rrp_context;
- int res;
-
- res = instance->rrp_algo->mcast_recv_empty (instance);
-
- return (res);
-}
-
-int totemrrp_member_add (
- void *rrp_context,
- const struct totem_ip_address *member,
- int iface_no)
-{
- struct totemrrp_instance *instance = (struct totemrrp_instance *)rrp_context;
- int res;
-
- res = instance->rrp_algo->member_add (instance, member, iface_no);
-
- return (res);
-}
-
-int totemrrp_member_remove (
- void *rrp_context,
- const struct totem_ip_address *member,
- int iface_no)
-{
- struct totemrrp_instance *instance = (struct totemrrp_instance *)rrp_context;
- int res;
-
- res = instance->rrp_algo->member_remove (instance, member, iface_no);
-
- return (res);
-}
-
-void totemrrp_membership_changed (
- void *rrp_context,
- enum totem_configuration_type configuration_type,
- const struct srp_addr *member_list, size_t member_list_entries,
- const struct srp_addr *left_list, size_t left_list_entries,
- const struct srp_addr *joined_list, size_t joined_list_entries,
- const struct memb_ring_id *ring_id)
-{
- struct totemrrp_instance *instance = (struct totemrrp_instance *)rrp_context;
-
- instance->rrp_algo->membership_changed (instance,
- configuration_type,
- member_list, member_list_entries,
- left_list, left_list_entries,
- joined_list, joined_list_entries,
- ring_id);
-}
diff --git a/exec/totemrrp.h b/exec/totemrrp.h
deleted file mode 100644
index ca570519..00000000
--- a/exec/totemrrp.h
+++ /dev/null
@@ -1,170 +0,0 @@
-/*
- * Copyright (c) 2005 MontaVista Software, Inc.
- * Copyright (c) 2006-2007, 2009 Red Hat, Inc.
- *
- * All rights reserved.
- *
- * Author: Steven Dake (sdake@redhat.com)
- *
- * This software licensed under BSD license, the text of which follows:
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * - Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- * - Neither the name of the MontaVista Software, Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from this
- * software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
- * THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/**
- * @file
- * Totem Network interface - also does encryption/decryption
- *
- * depends on poll abstraction, POSIX, IPV4
- */
-
-#ifndef TOTEMRRP_H_DEFINED
-#define TOTEMRRP_H_DEFINED
-
-#include <sys/types.h>
-#include <sys/socket.h>
-#include <qb/qbloop.h>
-#include <corosync/totem/totem.h>
-
-#define TOTEMRRP_NOFLUSH 0
-#define TOTEMRRP_FLUSH 1
-
-/*
- * SRP address. Used mainly in totemsrp.c, but we need it here to inform RRP about
- * membership change.
- */
-struct srp_addr {
- uint8_t no_addrs;
- struct totem_ip_address addr[INTERFACE_MAX];
-};
-
-/**
- * Create an instance
- */
-extern int totemrrp_initialize (
- qb_loop_t *poll_handle,
- void **rrp_context,
- struct totem_config *totem_config,
- totemsrp_stats_t *stats,
- void *context,
-
- void (*deliver_fn) (
- void *context,
- const void *msg,
- unsigned int msg_len),
-
- void (*iface_change_fn) (
- void *context,
- const struct totem_ip_address *iface_addr,
- unsigned int iface_no),
-
- void (*token_seqid_get) (
- const void *msg,
- unsigned int *seqid,
- unsigned int *token_is),
-
- unsigned int (*msgs_missing) (void),
-
- void (*target_set_completed) (
- void *context)
- );
-
-extern void *totemrrp_buffer_alloc (
- void *rrp_context);
-
-extern void totemrrp_buffer_release (
- void *rrp_context,
- void *ptr);
-
-extern int totemrrp_processor_count_set (
- void *rrp_context,
- unsigned int processor_count);
-
-extern int totemrrp_token_send (
- void *rrp_context,
- const void *msg,
- unsigned int msg_len);
-
-extern int totemrrp_mcast_noflush_send (
- void *rrp_context,
- const void *msg,
- unsigned int msg_len);
-
-extern int totemrrp_mcast_flush_send (
- void *rrp_context,
- const void *msg,
- unsigned int msg_len);
-
-extern int totemrrp_recv_flush (
- void *rrp_context);
-
-extern int totemrrp_send_flush (
- void *rrp_context);
-
-extern int totemrrp_token_target_set (
- void *rrp_context,
- struct totem_ip_address *target,
- unsigned int iface_no);
-
-extern int totemrrp_iface_check (void *rrp_context);
-
-extern int totemrrp_finalize (void *rrp_context);
-
-extern int totemrrp_ifaces_get (
- void *rrp_context,
- char ***status,
- unsigned int *iface_count);
-
-extern int totemrrp_crypto_set (
- void *rrp_context,
- const char *cipher_type,
- const char *hash_type);
-
-extern int totemrrp_ring_reenable (
- void *rrp_context,
- unsigned int iface_no);
-
-extern int totemrrp_mcast_recv_empty (
- void *rrp_context);
-
-extern int totemrrp_member_add (
- void *net_context,
- const struct totem_ip_address *member,
- int iface_no);
-
-extern int totemrrp_member_remove (
- void *net_context,
- const struct totem_ip_address *member,
- int iface_no);
-
-extern void totemrrp_membership_changed (
- void *rrp_context,
- enum totem_configuration_type configuration_type,
- const struct srp_addr *member_list, size_t member_list_entries,
- const struct srp_addr *left_list, size_t left_list_entries,
- const struct srp_addr *joined_list, size_t joined_list_entries,
- const struct memb_ring_id *ring_id);
-
-#endif /* TOTEMRRP_H_DEFINED */
diff --git a/exec/totemsrp.c b/exec/totemsrp.c
index f80f8e41..3cee4576 100644
--- a/exec/totemsrp.c
+++ b/exec/totemsrp.c
@@ -1,4849 +1,4784 @@
/*
* Copyright (c) 2003-2006 MontaVista Software, Inc.
* Copyright (c) 2006-2009 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Steven Dake (sdake@redhat.com)
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
* The first version of this code was based upon Yair Amir's PhD thesis:
* http://www.cs.jhu.edu/~yairamir/phd.ps) (ch4,5).
*
* The current version of totemsrp implements the Totem protocol specified in:
* http://citeseer.ist.psu.edu/amir95totem.html
*
* The deviations from the above published protocols are:
* - encryption of message contents with nss
* - authentication of meessage contents with SHA1/HMAC
* - token hold mode where token doesn't rotate on unused ring - reduces cpu
* usage on 1.6ghz xeon from 35% to less then .1 % as measured by top
*/
#include <config.h>
#include <assert.h>
#ifdef HAVE_ALLOCA_H
#include <alloca.h>
#endif
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/socket.h>
#include <netdb.h>
#include <sys/un.h>
#include <sys/ioctl.h>
#include <sys/param.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <unistd.h>
#include <fcntl.h>
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
#include <sched.h>
#include <time.h>
#include <sys/time.h>
#include <sys/poll.h>
#include <sys/uio.h>
#include <limits.h>
#include <qb/qbdefs.h>
#include <qb/qbutil.h>
#include <qb/qbloop.h>
#include <corosync/swab.h>
#include <corosync/sq.h>
#include <corosync/list.h>
#define LOGSYS_UTILS_ONLY 1
#include <corosync/logsys.h>
#include "totemsrp.h"
-#include "totemrrp.h"
#include "totemnet.h"
#include "cs_queue.h"
#define LOCALHOST_IP inet_addr("127.0.0.1")
#define QUEUE_RTR_ITEMS_SIZE_MAX 16384 /* allow 16384 retransmit items */
#define RETRANS_MESSAGE_QUEUE_SIZE_MAX 16384 /* allow 500 messages to be queued */
#define RECEIVED_MESSAGE_QUEUE_SIZE_MAX 500 /* allow 500 messages to be queued */
#define MAXIOVS 5
#define RETRANSMIT_ENTRIES_MAX 30
#define TOKEN_SIZE_MAX 64000 /* bytes */
#define LEAVE_DUMMY_NODEID 0
+/*
+ * SRP address.
+ * CC: TODO: Can we remove IP address from this and just use nodeids?
+ */
+struct srp_addr {
+ uint8_t no_addrs;
+ struct totem_ip_address addr[INTERFACE_MAX];
+};
+
/*
* Rollover handling:
* SEQNO_START_MSG is the starting sequence number after a new configuration
* This should remain zero, unless testing overflow in which case
* 0x7ffff000 and 0xfffff000 are good starting values.
*
* SEQNO_START_TOKEN is the starting sequence number after a new configuration
* for a token. This should remain zero, unless testing overflow in which
* case 07fffff00 or 0xffffff00 are good starting values.
*/
#define SEQNO_START_MSG 0x0
#define SEQNO_START_TOKEN 0x0
/*
* These can be used ot test different rollover points
* #define SEQNO_START_MSG 0xfffffe00
* #define SEQNO_START_TOKEN 0xfffffe00
*/
/*
* These can be used to test the error recovery algorithms
* #define TEST_DROP_ORF_TOKEN_PERCENTAGE 30
* #define TEST_DROP_COMMIT_TOKEN_PERCENTAGE 30
* #define TEST_DROP_MCAST_PERCENTAGE 50
* #define TEST_RECOVERY_MSG_COUNT 300
*/
/*
* we compare incoming messages to determine if their endian is
* different - if so convert them
*
* do not change
*/
#define ENDIAN_LOCAL 0xff22
enum message_type {
MESSAGE_TYPE_ORF_TOKEN = 0, /* Ordering, Reliability, Flow (ORF) control Token */
MESSAGE_TYPE_MCAST = 1, /* ring ordered multicast message */
MESSAGE_TYPE_MEMB_MERGE_DETECT = 2, /* merge rings if there are available rings */
MESSAGE_TYPE_MEMB_JOIN = 3, /* membership join message */
MESSAGE_TYPE_MEMB_COMMIT_TOKEN = 4, /* membership commit token */
MESSAGE_TYPE_TOKEN_HOLD_CANCEL = 5, /* cancel the holding of the token */
};
enum encapsulation_type {
MESSAGE_ENCAPSULATED = 1,
MESSAGE_NOT_ENCAPSULATED = 2
};
/*
* New membership algorithm local variables
*/
struct consensus_list_item {
struct srp_addr addr;
int set;
};
struct token_callback_instance {
struct list_head list;
int (*callback_fn) (enum totem_callback_token_type type, const void *);
enum totem_callback_token_type callback_type;
int delete;
void *data;
};
struct totemsrp_socket {
int mcast;
int token;
};
-struct message_header {
- char type;
- char encapsulated;
- unsigned short endian_detector;
- unsigned int nodeid;
-} __attribute__((packed));
-
-
struct mcast {
- struct message_header header;
+ struct totem_message_header header;
struct srp_addr system_from;
unsigned int seq;
int this_seqno;
struct memb_ring_id ring_id;
unsigned int node_id;
int guarantee;
} __attribute__((packed));
struct rtr_item {
struct memb_ring_id ring_id;
unsigned int seq;
}__attribute__((packed));
struct orf_token {
- struct message_header header;
+ struct totem_message_header header;
unsigned int seq;
unsigned int token_seq;
unsigned int aru;
unsigned int aru_addr;
struct memb_ring_id ring_id;
unsigned int backlog;
unsigned int fcc;
int retrans_flg;
int rtr_list_entries;
struct rtr_item rtr_list[0];
}__attribute__((packed));
struct memb_join {
- struct message_header header;
+ struct totem_message_header header;
struct srp_addr system_from;
unsigned int proc_list_entries;
unsigned int failed_list_entries;
unsigned long long ring_seq;
unsigned char end_of_memb_join[0];
/*
* These parts of the data structure are dynamic:
* struct srp_addr proc_list[];
* struct srp_addr failed_list[];
*/
} __attribute__((packed));
struct memb_merge_detect {
- struct message_header header;
+ struct totem_message_header header;
struct srp_addr system_from;
struct memb_ring_id ring_id;
} __attribute__((packed));
struct token_hold_cancel {
- struct message_header header;
+ struct totem_message_header header;
struct memb_ring_id ring_id;
} __attribute__((packed));
struct memb_commit_token_memb_entry {
struct memb_ring_id ring_id;
unsigned int aru;
unsigned int high_delivered;
unsigned int received_flg;
}__attribute__((packed));
struct memb_commit_token {
- struct message_header header;
+ struct totem_message_header header;
unsigned int token_seq;
struct memb_ring_id ring_id;
unsigned int retrans_flg;
int memb_index;
int addr_entries;
unsigned char end_of_commit_token[0];
/*
* These parts of the data structure are dynamic:
*
* struct srp_addr addr[PROCESSOR_COUNT_MAX];
* struct memb_commit_token_memb_entry memb_list[PROCESSOR_COUNT_MAX];
*/
}__attribute__((packed));
struct message_item {
struct mcast *mcast;
unsigned int msg_len;
};
struct sort_queue_item {
struct mcast *mcast;
unsigned int msg_len;
};
enum memb_state {
MEMB_STATE_OPERATIONAL = 1,
MEMB_STATE_GATHER = 2,
MEMB_STATE_COMMIT = 3,
MEMB_STATE_RECOVERY = 4
};
struct totemsrp_instance {
int iface_changes;
int failed_to_recv;
/*
* Flow control mcasts and remcasts on last and current orf_token
*/
int fcc_remcast_last;
int fcc_mcast_last;
int fcc_remcast_current;
struct consensus_list_item consensus_list[PROCESSOR_COUNT_MAX];
int consensus_list_entries;
struct srp_addr my_id;
struct srp_addr my_proc_list[PROCESSOR_COUNT_MAX];
struct srp_addr my_failed_list[PROCESSOR_COUNT_MAX];
struct srp_addr my_new_memb_list[PROCESSOR_COUNT_MAX];
struct srp_addr my_trans_memb_list[PROCESSOR_COUNT_MAX];
struct srp_addr my_memb_list[PROCESSOR_COUNT_MAX];
struct srp_addr my_deliver_memb_list[PROCESSOR_COUNT_MAX];
struct srp_addr my_left_memb_list[PROCESSOR_COUNT_MAX];
unsigned int my_leave_memb_list[PROCESSOR_COUNT_MAX];
-
+
int my_proc_list_entries;
int my_failed_list_entries;
int my_new_memb_entries;
int my_trans_memb_entries;
int my_memb_entries;
int my_deliver_memb_entries;
int my_left_memb_entries;
-
+
int my_leave_memb_entries;
struct memb_ring_id my_ring_id;
struct memb_ring_id my_old_ring_id;
int my_aru_count;
int my_merge_detect_timeout_outstanding;
unsigned int my_last_aru;
int my_seq_unchanged;
int my_received_flg;
unsigned int my_high_seq_received;
unsigned int my_install_seq;
int my_rotation_counter;
int my_set_retrans_flg;
int my_retrans_flg_count;
unsigned int my_high_ring_delivered;
int heartbeat_timeout;
/*
* Queues used to order, deliver, and recover messages
*/
struct cs_queue new_message_queue;
struct cs_queue new_message_queue_trans;
struct cs_queue retrans_message_queue;
struct sq regular_sort_queue;
struct sq recovery_sort_queue;
/*
* Received up to and including
*/
unsigned int my_aru;
unsigned int my_high_delivered;
struct list_head token_callback_received_listhead;
struct list_head token_callback_sent_listhead;
char orf_token_retransmit[TOKEN_SIZE_MAX];
int orf_token_retransmit_size;
unsigned int my_token_seq;
/*
* Timers
*/
qb_loop_timer_handle timer_pause_timeout;
qb_loop_timer_handle timer_orf_token_timeout;
qb_loop_timer_handle timer_orf_token_retransmit_timeout;
qb_loop_timer_handle timer_orf_token_hold_retransmit_timeout;
qb_loop_timer_handle timer_merge_detect_timeout;
qb_loop_timer_handle memb_timer_state_gather_join_timeout;
qb_loop_timer_handle memb_timer_state_gather_consensus_timeout;
qb_loop_timer_handle memb_timer_state_commit_timeout;
qb_loop_timer_handle timer_heartbeat_timeout;
/*
* Function and data used to log messages
*/
int totemsrp_log_level_security;
int totemsrp_log_level_error;
int totemsrp_log_level_warning;
int totemsrp_log_level_notice;
int totemsrp_log_level_debug;
int totemsrp_log_level_trace;
int totemsrp_subsys_id;
void (*totemsrp_log_printf) (
int level,
int sybsys,
const char *function,
const char *file,
int line,
const char *format, ...)__attribute__((format(printf, 6, 7)));;
enum memb_state memb_state;
//TODO struct srp_addr next_memb;
qb_loop_t *totemsrp_poll_handle;
struct totem_ip_address mcast_address;
void (*totemsrp_deliver_fn) (
unsigned int nodeid,
const void *msg,
unsigned int msg_len,
int endian_conversion_required);
void (*totemsrp_confchg_fn) (
enum totem_configuration_type configuration_type,
const unsigned int *member_list, size_t member_list_entries,
const unsigned int *left_list, size_t left_list_entries,
const unsigned int *joined_list, size_t joined_list_entries,
const struct memb_ring_id *ring_id);
void (*totemsrp_service_ready_fn) (void);
void (*totemsrp_waiting_trans_ack_cb_fn) (
int waiting_trans_ack);
void (*memb_ring_id_create_or_load) (
struct memb_ring_id *memb_ring_id,
const struct totem_ip_address *addr);
void (*memb_ring_id_store) (
const struct memb_ring_id *memb_ring_id,
const struct totem_ip_address *addr);
int global_seqno;
int my_token_held;
unsigned long long token_ring_id_seq;
unsigned int last_released;
unsigned int set_aru;
int old_ring_state_saved;
int old_ring_state_aru;
unsigned int old_ring_state_high_seq_received;
unsigned int my_last_seq;
struct timeval tv_old;
- void *totemrrp_context;
+ void *totemnet_context;
struct totem_config *totem_config;
unsigned int use_heartbeat;
unsigned int my_trc;
unsigned int my_pbl;
unsigned int my_cbl;
uint64_t pause_timestamp;
struct memb_commit_token *commit_token;
totemsrp_stats_t stats;
uint32_t orf_token_discard;
uint32_t originated_orf_token;
uint32_t threaded_mode_enabled;
uint32_t waiting_trans_ack;
int flushing;
-
+
void * token_recv_event_handle;
void * token_sent_event_handle;
char commit_token_storage[40000];
};
struct message_handlers {
int count;
int (*handler_functions[6]) (
struct totemsrp_instance *instance,
const void *msg,
size_t msg_len,
int endian_conversion_needed);
};
enum gather_state_from {
TOTEMSRP_GSFROM_CONSENSUS_TIMEOUT = 0,
TOTEMSRP_GSFROM_GATHER_MISSING1 = 1,
TOTEMSRP_GSFROM_THE_TOKEN_WAS_LOST_IN_THE_OPERATIONAL_STATE = 2,
TOTEMSRP_GSFROM_THE_CONSENSUS_TIMEOUT_EXPIRED = 3,
TOTEMSRP_GSFROM_THE_TOKEN_WAS_LOST_IN_THE_COMMIT_STATE = 4,
TOTEMSRP_GSFROM_THE_TOKEN_WAS_LOST_IN_THE_RECOVERY_STATE = 5,
TOTEMSRP_GSFROM_FAILED_TO_RECEIVE = 6,
TOTEMSRP_GSFROM_FOREIGN_MESSAGE_IN_OPERATIONAL_STATE = 7,
TOTEMSRP_GSFROM_FOREIGN_MESSAGE_IN_GATHER_STATE = 8,
TOTEMSRP_GSFROM_MERGE_DURING_OPERATIONAL_STATE = 9,
TOTEMSRP_GSFROM_MERGE_DURING_GATHER_STATE = 10,
TOTEMSRP_GSFROM_MERGE_DURING_JOIN = 11,
TOTEMSRP_GSFROM_JOIN_DURING_OPERATIONAL_STATE = 12,
TOTEMSRP_GSFROM_JOIN_DURING_COMMIT_STATE = 13,
TOTEMSRP_GSFROM_JOIN_DURING_RECOVERY = 14,
TOTEMSRP_GSFROM_INTERFACE_CHANGE = 15,
TOTEMSRP_GSFROM_MAX = TOTEMSRP_GSFROM_INTERFACE_CHANGE,
};
const char* gather_state_from_desc [] = {
[TOTEMSRP_GSFROM_CONSENSUS_TIMEOUT] = "consensus timeout",
[TOTEMSRP_GSFROM_GATHER_MISSING1] = "MISSING",
[TOTEMSRP_GSFROM_THE_TOKEN_WAS_LOST_IN_THE_OPERATIONAL_STATE] = "The token was lost in the OPERATIONAL state.",
[TOTEMSRP_GSFROM_THE_CONSENSUS_TIMEOUT_EXPIRED] = "The consensus timeout expired.",
[TOTEMSRP_GSFROM_THE_TOKEN_WAS_LOST_IN_THE_COMMIT_STATE] = "The token was lost in the COMMIT state.",
[TOTEMSRP_GSFROM_THE_TOKEN_WAS_LOST_IN_THE_RECOVERY_STATE] = "The token was lost in the RECOVERY state.",
[TOTEMSRP_GSFROM_FAILED_TO_RECEIVE] = "failed to receive",
[TOTEMSRP_GSFROM_FOREIGN_MESSAGE_IN_OPERATIONAL_STATE] = "foreign message in operational state",
[TOTEMSRP_GSFROM_FOREIGN_MESSAGE_IN_GATHER_STATE] = "foreign message in gather state",
[TOTEMSRP_GSFROM_MERGE_DURING_OPERATIONAL_STATE] = "merge during operational state",
[TOTEMSRP_GSFROM_MERGE_DURING_GATHER_STATE] = "merge during gather state",
[TOTEMSRP_GSFROM_MERGE_DURING_JOIN] = "merge during join",
[TOTEMSRP_GSFROM_JOIN_DURING_OPERATIONAL_STATE] = "join during operational state",
[TOTEMSRP_GSFROM_JOIN_DURING_COMMIT_STATE] = "join during commit state",
[TOTEMSRP_GSFROM_JOIN_DURING_RECOVERY] = "join during recovery",
[TOTEMSRP_GSFROM_INTERFACE_CHANGE] = "interface change",
};
/*
* forward decls
*/
static int message_handler_orf_token (
struct totemsrp_instance *instance,
const void *msg,
size_t msg_len,
int endian_conversion_needed);
static int message_handler_mcast (
struct totemsrp_instance *instance,
const void *msg,
size_t msg_len,
int endian_conversion_needed);
static int message_handler_memb_merge_detect (
struct totemsrp_instance *instance,
const void *msg,
size_t msg_len,
int endian_conversion_needed);
static int message_handler_memb_join (
struct totemsrp_instance *instance,
const void *msg,
size_t msg_len,
int endian_conversion_needed);
static int message_handler_memb_commit_token (
struct totemsrp_instance *instance,
const void *msg,
size_t msg_len,
int endian_conversion_needed);
static int message_handler_token_hold_cancel (
struct totemsrp_instance *instance,
const void *msg,
size_t msg_len,
int endian_conversion_needed);
static void totemsrp_instance_initialize (struct totemsrp_instance *instance);
-static unsigned int main_msgs_missing (void);
-
-static void main_token_seqid_get (
- const void *msg,
- unsigned int *seqid,
- unsigned int *token_is);
-
static void srp_addr_copy (struct srp_addr *dest, const struct srp_addr *src);
static void srp_addr_to_nodeid (
unsigned int *nodeid_out,
struct srp_addr *srp_addr_in,
unsigned int entries);
static int srp_addr_equal (const struct srp_addr *a, const struct srp_addr *b);
static void memb_leave_message_send (struct totemsrp_instance *instance);
static void token_callbacks_execute (struct totemsrp_instance *instance, enum totem_callback_token_type type);
static void memb_state_gather_enter (struct totemsrp_instance *instance, enum gather_state_from gather_from);
static void messages_deliver_to_app (struct totemsrp_instance *instance, int skip, unsigned int end_point);
static int orf_token_mcast (struct totemsrp_instance *instance, struct orf_token *oken,
int fcc_mcasts_allowed);
static void messages_free (struct totemsrp_instance *instance, unsigned int token_aru);
static void memb_ring_id_set (struct totemsrp_instance *instance,
const struct memb_ring_id *ring_id);
static void target_set_completed (void *context);
static void memb_state_commit_token_update (struct totemsrp_instance *instance);
static void memb_state_commit_token_target_set (struct totemsrp_instance *instance);
static int memb_state_commit_token_send (struct totemsrp_instance *instance);
static int memb_state_commit_token_send_recovery (struct totemsrp_instance *instance, struct memb_commit_token *memb_commit_token);
static void memb_state_commit_token_create (struct totemsrp_instance *instance);
static int token_hold_cancel_send (struct totemsrp_instance *instance);
static void orf_token_endian_convert (const struct orf_token *in, struct orf_token *out);
static void memb_commit_token_endian_convert (const struct memb_commit_token *in, struct memb_commit_token *out);
static void memb_join_endian_convert (const struct memb_join *in, struct memb_join *out);
static void mcast_endian_convert (const struct mcast *in, struct mcast *out);
static void memb_merge_detect_endian_convert (
const struct memb_merge_detect *in,
struct memb_merge_detect *out);
static void srp_addr_copy_endian_convert (struct srp_addr *out, const struct srp_addr *in);
static void timer_function_orf_token_timeout (void *data);
static void timer_function_pause_timeout (void *data);
static void timer_function_heartbeat_timeout (void *data);
static void timer_function_token_retransmit_timeout (void *data);
static void timer_function_token_hold_retransmit_timeout (void *data);
static void timer_function_merge_detect_timeout (void *data);
static void *totemsrp_buffer_alloc (struct totemsrp_instance *instance);
static void totemsrp_buffer_release (struct totemsrp_instance *instance, void *ptr);
static const char* gsfrom_to_msg(enum gather_state_from gsfrom);
void main_deliver_fn (
void *context,
const void *msg,
unsigned int msg_len);
void main_iface_change_fn (
void *context,
const struct totem_ip_address *iface_address,
unsigned int iface_no);
struct message_handlers totemsrp_message_handlers = {
6,
{
message_handler_orf_token, /* MESSAGE_TYPE_ORF_TOKEN */
message_handler_mcast, /* MESSAGE_TYPE_MCAST */
message_handler_memb_merge_detect, /* MESSAGE_TYPE_MEMB_MERGE_DETECT */
message_handler_memb_join, /* MESSAGE_TYPE_MEMB_JOIN */
message_handler_memb_commit_token, /* MESSAGE_TYPE_MEMB_COMMIT_TOKEN */
message_handler_token_hold_cancel /* MESSAGE_TYPE_TOKEN_HOLD_CANCEL */
}
};
#define log_printf(level, format, args...) \
do { \
instance->totemsrp_log_printf ( \
level, instance->totemsrp_subsys_id, \
__FUNCTION__, __FILE__, __LINE__, \
format, ##args); \
} while (0);
#define LOGSYS_PERROR(err_num, level, fmt, args...) \
do { \
char _error_str[LOGSYS_MAX_PERROR_MSG_LEN]; \
const char *_error_ptr = qb_strerror_r(err_num, _error_str, sizeof(_error_str)); \
instance->totemsrp_log_printf ( \
level, instance->totemsrp_subsys_id, \
__FUNCTION__, __FILE__, __LINE__, \
fmt ": %s (%d)\n", ##args, _error_ptr, err_num); \
} while(0)
static const char* gsfrom_to_msg(enum gather_state_from gsfrom)
{
if (gsfrom <= TOTEMSRP_GSFROM_MAX) {
return gather_state_from_desc[gsfrom];
}
else {
return "UNKNOWN";
}
}
static void totemsrp_instance_initialize (struct totemsrp_instance *instance)
{
memset (instance, 0, sizeof (struct totemsrp_instance));
list_init (&instance->token_callback_received_listhead);
list_init (&instance->token_callback_sent_listhead);
instance->my_received_flg = 1;
instance->my_token_seq = SEQNO_START_TOKEN - 1;
instance->memb_state = MEMB_STATE_OPERATIONAL;
instance->set_aru = -1;
instance->my_aru = SEQNO_START_MSG;
instance->my_high_seq_received = SEQNO_START_MSG;
instance->my_high_delivered = SEQNO_START_MSG;
instance->orf_token_discard = 0;
instance->originated_orf_token = 0;
instance->commit_token = (struct memb_commit_token *)instance->commit_token_storage;
instance->my_id.no_addrs = INTERFACE_MAX;
instance->waiting_trans_ack = 1;
}
-static void main_token_seqid_get (
- const void *msg,
- unsigned int *seqid,
- unsigned int *token_is)
-{
- const struct orf_token *token = msg;
-
- *seqid = 0;
- *token_is = 0;
- if (token->header.type == MESSAGE_TYPE_ORF_TOKEN) {
- *seqid = token->token_seq;
- *token_is = 1;
- }
-}
-
-static unsigned int main_msgs_missing (void)
-{
-// TODO
- return (0);
-}
-
static int pause_flush (struct totemsrp_instance *instance)
{
uint64_t now_msec;
uint64_t timestamp_msec;
int res = 0;
now_msec = (qb_util_nano_current_get () / QB_TIME_NS_IN_MSEC);
timestamp_msec = instance->pause_timestamp / QB_TIME_NS_IN_MSEC;
if ((now_msec - timestamp_msec) > (instance->totem_config->token_timeout / 2)) {
log_printf (instance->totemsrp_log_level_notice,
"Process pause detected for %d ms, flushing membership messages.", (unsigned int)(now_msec - timestamp_msec));
/*
* -1 indicates an error from recvmsg
*/
do {
- res = totemrrp_mcast_recv_empty (instance->totemrrp_context);
+ res = totemnet_recv_mcast_empty (instance->totemnet_context);
} while (res == -1);
}
return (res);
}
static int token_event_stats_collector (enum totem_callback_token_type type, const void *void_instance)
{
struct totemsrp_instance *instance = (struct totemsrp_instance *)void_instance;
uint32_t time_now;
unsigned long long nano_secs = qb_util_nano_current_get ();
time_now = (nano_secs / QB_TIME_NS_IN_MSEC);
if (type == TOTEM_CALLBACK_TOKEN_RECEIVED) {
/* incr latest token the index */
if (instance->stats.latest_token == (TOTEM_TOKEN_STATS_MAX - 1))
instance->stats.latest_token = 0;
else
instance->stats.latest_token++;
if (instance->stats.earliest_token == instance->stats.latest_token) {
/* we have filled up the array, start overwriting */
if (instance->stats.earliest_token == (TOTEM_TOKEN_STATS_MAX - 1))
instance->stats.earliest_token = 0;
else
instance->stats.earliest_token++;
instance->stats.token[instance->stats.earliest_token].rx = 0;
instance->stats.token[instance->stats.earliest_token].tx = 0;
instance->stats.token[instance->stats.earliest_token].backlog_calc = 0;
}
instance->stats.token[instance->stats.latest_token].rx = time_now;
instance->stats.token[instance->stats.latest_token].tx = 0; /* in case we drop the token */
} else {
instance->stats.token[instance->stats.latest_token].tx = time_now;
}
return 0;
}
+static void totempg_mtu_changed(void *context, int net_mtu)
+{
+ struct totemsrp_instance *instance = context;
+
+ instance->totem_config->net_mtu = net_mtu - sizeof (struct mcast);
+
+ log_printf (instance->totemsrp_log_level_debug,
+ "Net MTU changed to %d, new value is %d",
+ net_mtu, instance->totem_config->net_mtu);
+}
+
/*
* Exported interfaces
*/
int totemsrp_initialize (
qb_loop_t *poll_handle,
void **srp_context,
struct totem_config *totem_config,
- totemmrp_stats_t *stats,
+ totempg_stats_t *stats,
void (*deliver_fn) (
unsigned int nodeid,
const void *msg,
unsigned int msg_len,
int endian_conversion_required),
void (*confchg_fn) (
enum totem_configuration_type configuration_type,
const unsigned int *member_list, size_t member_list_entries,
const unsigned int *left_list, size_t left_list_entries,
const unsigned int *joined_list, size_t joined_list_entries,
const struct memb_ring_id *ring_id),
void (*waiting_trans_ack_cb_fn) (
int waiting_trans_ack))
{
struct totemsrp_instance *instance;
instance = malloc (sizeof (struct totemsrp_instance));
if (instance == NULL) {
goto error_exit;
}
totemsrp_instance_initialize (instance);
instance->totemsrp_waiting_trans_ack_cb_fn = waiting_trans_ack_cb_fn;
instance->totemsrp_waiting_trans_ack_cb_fn (1);
stats->srp = &instance->stats;
instance->stats.latest_token = 0;
instance->stats.earliest_token = 0;
instance->totem_config = totem_config;
/*
* Configure logging
*/
instance->totemsrp_log_level_security = totem_config->totem_logging_configuration.log_level_security;
instance->totemsrp_log_level_error = totem_config->totem_logging_configuration.log_level_error;
instance->totemsrp_log_level_warning = totem_config->totem_logging_configuration.log_level_warning;
instance->totemsrp_log_level_notice = totem_config->totem_logging_configuration.log_level_notice;
instance->totemsrp_log_level_debug = totem_config->totem_logging_configuration.log_level_debug;
instance->totemsrp_log_level_trace = totem_config->totem_logging_configuration.log_level_trace;
instance->totemsrp_subsys_id = totem_config->totem_logging_configuration.log_subsys_id;
instance->totemsrp_log_printf = totem_config->totem_logging_configuration.log_printf;
/*
* Configure totem store and load functions
*/
instance->memb_ring_id_create_or_load = totem_config->totem_memb_ring_id_create_or_load;
instance->memb_ring_id_store = totem_config->totem_memb_ring_id_store;
/*
* Initialize local variables for totemsrp
*/
totemip_copy (&instance->mcast_address, &totem_config->interfaces[0].mcast_addr);
/*
* Display totem configuration
*/
log_printf (instance->totemsrp_log_level_debug,
"Token Timeout (%d ms) retransmit timeout (%d ms)",
totem_config->token_timeout, totem_config->token_retransmit_timeout);
log_printf (instance->totemsrp_log_level_debug,
"token hold (%d ms) retransmits before loss (%d retrans)",
totem_config->token_hold_timeout, totem_config->token_retransmits_before_loss_const);
log_printf (instance->totemsrp_log_level_debug,
"join (%d ms) send_join (%d ms) consensus (%d ms) merge (%d ms)",
totem_config->join_timeout,
totem_config->send_join_timeout,
totem_config->consensus_timeout,
totem_config->merge_timeout);
log_printf (instance->totemsrp_log_level_debug,
"downcheck (%d ms) fail to recv const (%d msgs)",
totem_config->downcheck_timeout, totem_config->fail_to_recv_const);
log_printf (instance->totemsrp_log_level_debug,
"seqno unchanged const (%d rotations) Maximum network MTU %d", totem_config->seqno_unchanged_const, totem_config->net_mtu);
log_printf (instance->totemsrp_log_level_debug,
"window size per rotation (%d messages) maximum messages per rotation (%d messages)",
totem_config->window_size, totem_config->max_messages);
log_printf (instance->totemsrp_log_level_debug,
"missed count const (%d messages)",
totem_config->miss_count_const);
log_printf (instance->totemsrp_log_level_debug,
"send threads (%d threads)", totem_config->threads);
- log_printf (instance->totemsrp_log_level_debug,
- "RRP token expired timeout (%d ms)",
- totem_config->rrp_token_expired_timeout);
- log_printf (instance->totemsrp_log_level_debug,
- "RRP token problem counter (%d ms)",
- totem_config->rrp_problem_count_timeout);
- log_printf (instance->totemsrp_log_level_debug,
- "RRP threshold (%d problem count)",
- totem_config->rrp_problem_count_threshold);
- log_printf (instance->totemsrp_log_level_debug,
- "RRP multicast threshold (%d problem count)",
- totem_config->rrp_problem_count_mcast_threshold);
- log_printf (instance->totemsrp_log_level_debug,
- "RRP automatic recovery check timeout (%d ms)",
- totem_config->rrp_autorecovery_check_timeout);
- log_printf (instance->totemsrp_log_level_debug,
- "RRP mode set to %s.", instance->totem_config->rrp_mode);
log_printf (instance->totemsrp_log_level_debug,
"heartbeat_failures_allowed (%d)", totem_config->heartbeat_failures_allowed);
log_printf (instance->totemsrp_log_level_debug,
"max_network_delay (%d ms)", totem_config->max_network_delay);
cs_queue_init (&instance->retrans_message_queue, RETRANS_MESSAGE_QUEUE_SIZE_MAX,
sizeof (struct message_item), instance->threaded_mode_enabled);
sq_init (&instance->regular_sort_queue,
QUEUE_RTR_ITEMS_SIZE_MAX, sizeof (struct sort_queue_item), 0);
sq_init (&instance->recovery_sort_queue,
QUEUE_RTR_ITEMS_SIZE_MAX, sizeof (struct sort_queue_item), 0);
instance->totemsrp_poll_handle = poll_handle;
instance->totemsrp_deliver_fn = deliver_fn;
instance->totemsrp_confchg_fn = confchg_fn;
instance->use_heartbeat = 1;
timer_function_pause_timeout (instance);
if ( totem_config->heartbeat_failures_allowed == 0 ) {
log_printf (instance->totemsrp_log_level_debug,
"HeartBeat is Disabled. To enable set heartbeat_failures_allowed > 0");
instance->use_heartbeat = 0;
}
if (instance->use_heartbeat) {
instance->heartbeat_timeout
= (totem_config->heartbeat_failures_allowed) * totem_config->token_retransmit_timeout
+ totem_config->max_network_delay;
if (instance->heartbeat_timeout >= totem_config->token_timeout) {
log_printf (instance->totemsrp_log_level_debug,
"total heartbeat_timeout (%d ms) is not less than token timeout (%d ms)",
instance->heartbeat_timeout,
totem_config->token_timeout);
log_printf (instance->totemsrp_log_level_debug,
"heartbeat_timeout = heartbeat_failures_allowed * token_retransmit_timeout + max_network_delay");
log_printf (instance->totemsrp_log_level_debug,
"heartbeat timeout should be less than the token timeout. Heartbeat is disabled!!");
instance->use_heartbeat = 0;
}
else {
log_printf (instance->totemsrp_log_level_debug,
"total heartbeat_timeout (%d ms)", instance->heartbeat_timeout);
}
}
- totemrrp_initialize (
+ totemnet_initialize (
poll_handle,
- &instance->totemrrp_context,
+ &instance->totemnet_context,
totem_config,
stats->srp,
instance,
main_deliver_fn,
main_iface_change_fn,
- main_token_seqid_get,
- main_msgs_missing,
+ totempg_mtu_changed,
target_set_completed);
/*
- * Must have net_mtu adjusted by totemrrp_initialize first
+ * Must have net_mtu adjusted by totemnet_initialize first
*/
cs_queue_init (&instance->new_message_queue,
MESSAGE_QUEUE_MAX,
sizeof (struct message_item), instance->threaded_mode_enabled);
cs_queue_init (&instance->new_message_queue_trans,
MESSAGE_QUEUE_MAX,
sizeof (struct message_item), instance->threaded_mode_enabled);
totemsrp_callback_token_create (instance,
&instance->token_recv_event_handle,
TOTEM_CALLBACK_TOKEN_RECEIVED,
0,
token_event_stats_collector,
instance);
totemsrp_callback_token_create (instance,
&instance->token_sent_event_handle,
TOTEM_CALLBACK_TOKEN_SENT,
0,
token_event_stats_collector,
instance);
*srp_context = instance;
return (0);
error_exit:
return (-1);
}
void totemsrp_finalize (
void *srp_context)
{
struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context;
-
memb_leave_message_send (instance);
- totemrrp_finalize (instance->totemrrp_context);
+ totemnet_finalize (instance->totemnet_context);
cs_queue_free (&instance->new_message_queue);
cs_queue_free (&instance->new_message_queue_trans);
cs_queue_free (&instance->retrans_message_queue);
sq_free (&instance->regular_sort_queue);
sq_free (&instance->recovery_sort_queue);
free (instance);
}
/*
* Return configured interfaces. interfaces is array of totem_ip addresses allocated by caller,
* with interaces_size number of items. iface_count is final number of interfaces filled by this
* function.
*
* Function returns 0 on success, otherwise if interfaces array is not big enough, -2 is returned,
* and if interface was not found, -1 is returned.
*/
int totemsrp_ifaces_get (
void *srp_context,
unsigned int nodeid,
struct totem_ip_address *interfaces,
unsigned int interfaces_size,
char ***status,
unsigned int *iface_count)
{
struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context;
int res = 0;
unsigned int found = 0;
unsigned int i;
for (i = 0; i < instance->my_memb_entries; i++) {
if (instance->my_memb_list[i].addr[0].nodeid == nodeid) {
found = 1;
break;
}
}
if (found) {
*iface_count = instance->totem_config->interface_count;
if (interfaces_size >= *iface_count) {
memcpy (interfaces, instance->my_memb_list[i].addr,
sizeof (struct totem_ip_address) * *iface_count);
} else {
res = -2;
}
goto finish;
}
for (i = 0; i < instance->my_left_memb_entries; i++) {
if (instance->my_left_memb_list[i].addr[0].nodeid == nodeid) {
found = 1;
break;
}
}
if (found) {
*iface_count = instance->totem_config->interface_count;
if (interfaces_size >= *iface_count) {
memcpy (interfaces, instance->my_left_memb_list[i].addr,
sizeof (struct totem_ip_address) * *iface_count);
} else {
res = -2;
}
} else {
res = -1;
}
finish:
- totemrrp_ifaces_get (instance->totemrrp_context, status, NULL);
+ totemnet_ifaces_get(instance->totemnet_context, status, iface_count);
return (res);
}
int totemsrp_crypto_set (
void *srp_context,
const char *cipher_type,
const char *hash_type)
{
struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context;
int res;
- res = totemrrp_crypto_set(instance->totemrrp_context, cipher_type, hash_type);
+ res = totemnet_crypto_set(instance->totemnet_context, cipher_type, hash_type);
return (res);
}
unsigned int totemsrp_my_nodeid_get (
void *srp_context)
{
struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context;
unsigned int res;
res = instance->totem_config->interfaces[0].boundto.nodeid;
return (res);
}
int totemsrp_my_family_get (
void *srp_context)
{
struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context;
int res;
res = instance->totem_config->interfaces[0].boundto.family;
return (res);
}
-
int totemsrp_ring_reenable (
void *srp_context)
{
- struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context;
-
- totemrrp_ring_reenable (instance->totemrrp_context,
- instance->totem_config->interface_count);
-
- return (0);
+ return (0);
}
/*
* Set operations for use by the membership algorithm
*/
static int srp_addr_equal (const struct srp_addr *a, const struct srp_addr *b)
{
unsigned int i;
unsigned int res;
for (i = 0; i < 1; i++) {
res = totemip_equal (&a->addr[i], &b->addr[i]);
if (res == 0) {
return (0);
}
}
return (1);
}
static void srp_addr_copy (struct srp_addr *dest, const struct srp_addr *src)
{
unsigned int i;
dest->no_addrs = src->no_addrs;
for (i = 0; i < INTERFACE_MAX; i++) {
totemip_copy (&dest->addr[i], &src->addr[i]);
}
}
static void srp_addr_to_nodeid (
unsigned int *nodeid_out,
struct srp_addr *srp_addr_in,
unsigned int entries)
{
unsigned int i;
for (i = 0; i < entries; i++) {
nodeid_out[i] = srp_addr_in[i].addr[0].nodeid;
}
}
static void srp_addr_copy_endian_convert (struct srp_addr *out, const struct srp_addr *in)
{
int i;
for (i = 0; i < INTERFACE_MAX; i++) {
totemip_copy_endian_convert (&out->addr[i], &in->addr[i]);
}
}
static void memb_consensus_reset (struct totemsrp_instance *instance)
{
instance->consensus_list_entries = 0;
}
static void memb_set_subtract (
struct srp_addr *out_list, int *out_list_entries,
struct srp_addr *one_list, int one_list_entries,
struct srp_addr *two_list, int two_list_entries)
{
int found = 0;
int i;
int j;
*out_list_entries = 0;
for (i = 0; i < one_list_entries; i++) {
for (j = 0; j < two_list_entries; j++) {
if (srp_addr_equal (&one_list[i], &two_list[j])) {
found = 1;
break;
}
}
if (found == 0) {
srp_addr_copy (&out_list[*out_list_entries], &one_list[i]);
*out_list_entries = *out_list_entries + 1;
}
found = 0;
}
}
/*
* Set consensus for a specific processor
*/
static void memb_consensus_set (
struct totemsrp_instance *instance,
const struct srp_addr *addr)
{
int found = 0;
int i;
if (addr->addr[0].nodeid == LEAVE_DUMMY_NODEID)
return;
for (i = 0; i < instance->consensus_list_entries; i++) {
if (srp_addr_equal(addr, &instance->consensus_list[i].addr)) {
found = 1;
break; /* found entry */
}
}
srp_addr_copy (&instance->consensus_list[i].addr, addr);
instance->consensus_list[i].set = 1;
if (found == 0) {
instance->consensus_list_entries++;
}
return;
}
/*
* Is consensus set for a specific processor
*/
static int memb_consensus_isset (
struct totemsrp_instance *instance,
const struct srp_addr *addr)
{
int i;
for (i = 0; i < instance->consensus_list_entries; i++) {
if (srp_addr_equal (addr, &instance->consensus_list[i].addr)) {
return (instance->consensus_list[i].set);
}
}
return (0);
}
/*
* Is consensus agreed upon based upon consensus database
*/
static int memb_consensus_agreed (
struct totemsrp_instance *instance)
{
struct srp_addr token_memb[PROCESSOR_COUNT_MAX];
int token_memb_entries = 0;
int agreed = 1;
int i;
memb_set_subtract (token_memb, &token_memb_entries,
instance->my_proc_list, instance->my_proc_list_entries,
instance->my_failed_list, instance->my_failed_list_entries);
for (i = 0; i < token_memb_entries; i++) {
if (memb_consensus_isset (instance, &token_memb[i]) == 0) {
agreed = 0;
break;
}
}
if (agreed && instance->failed_to_recv == 1) {
/*
* Both nodes agreed on our failure. We don't care how many proc list items left because we
* will create single ring anyway.
*/
return (agreed);
}
assert (token_memb_entries >= 1);
return (agreed);
}
static void memb_consensus_notset (
struct totemsrp_instance *instance,
struct srp_addr *no_consensus_list,
int *no_consensus_list_entries,
struct srp_addr *comparison_list,
int comparison_list_entries)
{
int i;
*no_consensus_list_entries = 0;
for (i = 0; i < instance->my_proc_list_entries; i++) {
if (memb_consensus_isset (instance, &instance->my_proc_list[i]) == 0) {
srp_addr_copy (&no_consensus_list[*no_consensus_list_entries], &instance->my_proc_list[i]);
*no_consensus_list_entries = *no_consensus_list_entries + 1;
}
}
}
/*
* Is set1 equal to set2 Entries can be in different orders
*/
static int memb_set_equal (
struct srp_addr *set1, int set1_entries,
struct srp_addr *set2, int set2_entries)
{
int i;
int j;
int found = 0;
if (set1_entries != set2_entries) {
return (0);
}
for (i = 0; i < set2_entries; i++) {
for (j = 0; j < set1_entries; j++) {
if (srp_addr_equal (&set1[j], &set2[i])) {
found = 1;
break;
}
}
if (found == 0) {
return (0);
}
found = 0;
}
return (1);
}
/*
* Is subset fully contained in fullset
*/
static int memb_set_subset (
const struct srp_addr *subset, int subset_entries,
const struct srp_addr *fullset, int fullset_entries)
{
int i;
int j;
int found = 0;
if (subset_entries > fullset_entries) {
return (0);
}
for (i = 0; i < subset_entries; i++) {
for (j = 0; j < fullset_entries; j++) {
if (srp_addr_equal (&subset[i], &fullset[j])) {
found = 1;
}
}
if (found == 0) {
return (0);
}
found = 0;
}
return (1);
}
/*
* merge subset into fullset taking care not to add duplicates
*/
static void memb_set_merge (
const struct srp_addr *subset, int subset_entries,
struct srp_addr *fullset, int *fullset_entries)
{
int found = 0;
int i;
int j;
for (i = 0; i < subset_entries; i++) {
for (j = 0; j < *fullset_entries; j++) {
if (srp_addr_equal (&fullset[j], &subset[i])) {
found = 1;
break;
}
}
if (found == 0) {
srp_addr_copy (&fullset[*fullset_entries], &subset[i]);
*fullset_entries = *fullset_entries + 1;
}
found = 0;
}
return;
}
static void memb_set_and_with_ring_id (
struct srp_addr *set1,
struct memb_ring_id *set1_ring_ids,
int set1_entries,
struct srp_addr *set2,
int set2_entries,
struct memb_ring_id *old_ring_id,
struct srp_addr *and,
int *and_entries)
{
int i;
int j;
int found = 0;
*and_entries = 0;
for (i = 0; i < set2_entries; i++) {
for (j = 0; j < set1_entries; j++) {
if (srp_addr_equal (&set1[j], &set2[i])) {
if (memcmp (&set1_ring_ids[j], old_ring_id, sizeof (struct memb_ring_id)) == 0) {
found = 1;
}
break;
}
}
if (found) {
srp_addr_copy (&and[*and_entries], &set1[j]);
*and_entries = *and_entries + 1;
}
found = 0;
}
return;
}
#ifdef CODE_COVERAGE
static void memb_set_print (
char *string,
struct srp_addr *list,
int list_entries)
{
int i;
int j;
printf ("List '%s' contains %d entries:\n", string, list_entries);
for (i = 0; i < list_entries; i++) {
printf ("Address %d with %d rings\n", i, list[i].no_addrs);
for (j = 0; j < list[i].no_addrs; j++) {
printf ("\tiface %d %s\n", j, totemip_print (&list[i].addr[j]));
printf ("\tfamily %d\n", list[i].addr[j].family);
}
}
}
#endif
static void my_leave_memb_clear(
struct totemsrp_instance *instance)
{
memset(instance->my_leave_memb_list, 0, sizeof(instance->my_leave_memb_list));
instance->my_leave_memb_entries = 0;
}
static unsigned int my_leave_memb_match(
struct totemsrp_instance *instance,
unsigned int nodeid)
{
int i;
unsigned int ret = 0;
for (i = 0; i < instance->my_leave_memb_entries; i++){
if (instance->my_leave_memb_list[i] == nodeid){
ret = nodeid;
break;
}
}
return ret;
}
static void my_leave_memb_set(
struct totemsrp_instance *instance,
unsigned int nodeid)
{
int i, found = 0;
for (i = 0; i < instance->my_leave_memb_entries; i++){
if (instance->my_leave_memb_list[i] == nodeid){
found = 1;
break;
}
}
if (found == 1) {
return;
}
if (instance->my_leave_memb_entries < (PROCESSOR_COUNT_MAX - 1)) {
instance->my_leave_memb_list[instance->my_leave_memb_entries] = nodeid;
instance->my_leave_memb_entries++;
} else {
log_printf (instance->totemsrp_log_level_warning,
"Cannot set LEAVE nodeid=%d", nodeid);
}
}
static void *totemsrp_buffer_alloc (struct totemsrp_instance *instance)
{
assert (instance != NULL);
- return totemrrp_buffer_alloc (instance->totemrrp_context);
+ return totemnet_buffer_alloc (instance->totemnet_context);
}
static void totemsrp_buffer_release (struct totemsrp_instance *instance, void *ptr)
{
assert (instance != NULL);
- totemrrp_buffer_release (instance->totemrrp_context, ptr);
+ totemnet_buffer_release (instance->totemnet_context, ptr);
}
static void reset_token_retransmit_timeout (struct totemsrp_instance *instance)
{
int32_t res;
qb_loop_timer_del (instance->totemsrp_poll_handle,
instance->timer_orf_token_retransmit_timeout);
res = qb_loop_timer_add (instance->totemsrp_poll_handle,
QB_LOOP_MED,
instance->totem_config->token_retransmit_timeout*QB_TIME_NS_IN_MSEC,
(void *)instance,
timer_function_token_retransmit_timeout,
&instance->timer_orf_token_retransmit_timeout);
if (res != 0) {
log_printf(instance->totemsrp_log_level_error, "reset_token_retransmit_timeout - qb_loop_timer_add error : %d", res);
}
}
static void start_merge_detect_timeout (struct totemsrp_instance *instance)
{
int32_t res;
if (instance->my_merge_detect_timeout_outstanding == 0) {
res = qb_loop_timer_add (instance->totemsrp_poll_handle,
QB_LOOP_MED,
instance->totem_config->merge_timeout*QB_TIME_NS_IN_MSEC,
(void *)instance,
timer_function_merge_detect_timeout,
&instance->timer_merge_detect_timeout);
if (res != 0) {
log_printf(instance->totemsrp_log_level_error, "start_merge_detect_timeout - qb_loop_timer_add error : %d", res);
}
instance->my_merge_detect_timeout_outstanding = 1;
}
}
static void cancel_merge_detect_timeout (struct totemsrp_instance *instance)
{
qb_loop_timer_del (instance->totemsrp_poll_handle, instance->timer_merge_detect_timeout);
instance->my_merge_detect_timeout_outstanding = 0;
}
/*
* ring_state_* is used to save and restore the sort queue
* state when a recovery operation fails (and enters gather)
*/
static void old_ring_state_save (struct totemsrp_instance *instance)
{
if (instance->old_ring_state_saved == 0) {
instance->old_ring_state_saved = 1;
memcpy (&instance->my_old_ring_id, &instance->my_ring_id,
sizeof (struct memb_ring_id));
instance->old_ring_state_aru = instance->my_aru;
instance->old_ring_state_high_seq_received = instance->my_high_seq_received;
log_printf (instance->totemsrp_log_level_debug,
"Saving state aru %x high seq received %x",
instance->my_aru, instance->my_high_seq_received);
}
}
static void old_ring_state_restore (struct totemsrp_instance *instance)
{
instance->my_aru = instance->old_ring_state_aru;
instance->my_high_seq_received = instance->old_ring_state_high_seq_received;
log_printf (instance->totemsrp_log_level_debug,
"Restoring instance->my_aru %x my high seq received %x",
instance->my_aru, instance->my_high_seq_received);
}
static void old_ring_state_reset (struct totemsrp_instance *instance)
{
log_printf (instance->totemsrp_log_level_debug,
"Resetting old ring state");
instance->old_ring_state_saved = 0;
}
static void reset_pause_timeout (struct totemsrp_instance *instance)
{
int32_t res;
qb_loop_timer_del (instance->totemsrp_poll_handle, instance->timer_pause_timeout);
res = qb_loop_timer_add (instance->totemsrp_poll_handle,
QB_LOOP_MED,
instance->totem_config->token_timeout * QB_TIME_NS_IN_MSEC / 5,
(void *)instance,
timer_function_pause_timeout,
&instance->timer_pause_timeout);
if (res != 0) {
log_printf(instance->totemsrp_log_level_error, "reset_pause_timeout - qb_loop_timer_add error : %d", res);
}
}
static void reset_token_timeout (struct totemsrp_instance *instance) {
int32_t res;
qb_loop_timer_del (instance->totemsrp_poll_handle, instance->timer_orf_token_timeout);
res = qb_loop_timer_add (instance->totemsrp_poll_handle,
QB_LOOP_MED,
instance->totem_config->token_timeout*QB_TIME_NS_IN_MSEC,
(void *)instance,
timer_function_orf_token_timeout,
&instance->timer_orf_token_timeout);
if (res != 0) {
log_printf(instance->totemsrp_log_level_error, "reset_token_timeout - qb_loop_timer_add error : %d", res);
}
}
static void reset_heartbeat_timeout (struct totemsrp_instance *instance) {
int32_t res;
qb_loop_timer_del (instance->totemsrp_poll_handle, instance->timer_heartbeat_timeout);
res = qb_loop_timer_add (instance->totemsrp_poll_handle,
QB_LOOP_MED,
instance->heartbeat_timeout*QB_TIME_NS_IN_MSEC,
(void *)instance,
timer_function_heartbeat_timeout,
&instance->timer_heartbeat_timeout);
if (res != 0) {
log_printf(instance->totemsrp_log_level_error, "reset_heartbeat_timeout - qb_loop_timer_add error : %d", res);
}
}
static void cancel_token_timeout (struct totemsrp_instance *instance) {
qb_loop_timer_del (instance->totemsrp_poll_handle, instance->timer_orf_token_timeout);
}
static void cancel_heartbeat_timeout (struct totemsrp_instance *instance) {
qb_loop_timer_del (instance->totemsrp_poll_handle, instance->timer_heartbeat_timeout);
}
static void cancel_token_retransmit_timeout (struct totemsrp_instance *instance)
{
qb_loop_timer_del (instance->totemsrp_poll_handle, instance->timer_orf_token_retransmit_timeout);
}
static void start_token_hold_retransmit_timeout (struct totemsrp_instance *instance)
{
int32_t res;
res = qb_loop_timer_add (instance->totemsrp_poll_handle,
QB_LOOP_MED,
instance->totem_config->token_hold_timeout*QB_TIME_NS_IN_MSEC,
(void *)instance,
timer_function_token_hold_retransmit_timeout,
&instance->timer_orf_token_hold_retransmit_timeout);
if (res != 0) {
log_printf(instance->totemsrp_log_level_error, "start_token_hold_retransmit_timeout - qb_loop_timer_add error : %d", res);
}
}
static void cancel_token_hold_retransmit_timeout (struct totemsrp_instance *instance)
{
qb_loop_timer_del (instance->totemsrp_poll_handle,
instance->timer_orf_token_hold_retransmit_timeout);
}
static void memb_state_consensus_timeout_expired (
struct totemsrp_instance *instance)
{
struct srp_addr no_consensus_list[PROCESSOR_COUNT_MAX];
int no_consensus_list_entries;
instance->stats.consensus_timeouts++;
if (memb_consensus_agreed (instance)) {
memb_consensus_reset (instance);
memb_consensus_set (instance, &instance->my_id);
reset_token_timeout (instance); // REVIEWED
} else {
memb_consensus_notset (
instance,
no_consensus_list,
&no_consensus_list_entries,
instance->my_proc_list,
instance->my_proc_list_entries);
memb_set_merge (no_consensus_list, no_consensus_list_entries,
instance->my_failed_list, &instance->my_failed_list_entries);
memb_state_gather_enter (instance, TOTEMSRP_GSFROM_CONSENSUS_TIMEOUT);
}
}
static void memb_join_message_send (struct totemsrp_instance *instance);
static void memb_merge_detect_transmit (struct totemsrp_instance *instance);
/*
* Timers used for various states of the membership algorithm
*/
static void timer_function_pause_timeout (void *data)
{
struct totemsrp_instance *instance = data;
instance->pause_timestamp = qb_util_nano_current_get ();
reset_pause_timeout (instance);
}
static void memb_recovery_state_token_loss (struct totemsrp_instance *instance)
{
old_ring_state_restore (instance);
memb_state_gather_enter (instance, TOTEMSRP_GSFROM_THE_TOKEN_WAS_LOST_IN_THE_RECOVERY_STATE);
instance->stats.recovery_token_lost++;
}
static void timer_function_orf_token_timeout (void *data)
{
struct totemsrp_instance *instance = data;
switch (instance->memb_state) {
case MEMB_STATE_OPERATIONAL:
log_printf (instance->totemsrp_log_level_debug,
"The token was lost in the OPERATIONAL state.");
log_printf (instance->totemsrp_log_level_notice,
"A processor failed, forming new configuration.");
- totemrrp_iface_check (instance->totemrrp_context);
+ totemnet_iface_check (instance->totemnet_context);
memb_state_gather_enter (instance, TOTEMSRP_GSFROM_THE_TOKEN_WAS_LOST_IN_THE_OPERATIONAL_STATE);
instance->stats.operational_token_lost++;
break;
case MEMB_STATE_GATHER:
log_printf (instance->totemsrp_log_level_debug,
"The consensus timeout expired.");
memb_state_consensus_timeout_expired (instance);
memb_state_gather_enter (instance, TOTEMSRP_GSFROM_THE_CONSENSUS_TIMEOUT_EXPIRED);
instance->stats.gather_token_lost++;
break;
case MEMB_STATE_COMMIT:
log_printf (instance->totemsrp_log_level_debug,
"The token was lost in the COMMIT state.");
memb_state_gather_enter (instance, TOTEMSRP_GSFROM_THE_TOKEN_WAS_LOST_IN_THE_COMMIT_STATE);
instance->stats.commit_token_lost++;
break;
case MEMB_STATE_RECOVERY:
log_printf (instance->totemsrp_log_level_debug,
"The token was lost in the RECOVERY state.");
memb_recovery_state_token_loss (instance);
instance->orf_token_discard = 1;
break;
}
}
static void timer_function_heartbeat_timeout (void *data)
{
struct totemsrp_instance *instance = data;
log_printf (instance->totemsrp_log_level_debug,
"HeartBeat Timer expired Invoking token loss mechanism in state %d ", instance->memb_state);
timer_function_orf_token_timeout(data);
}
static void memb_timer_function_state_gather (void *data)
{
struct totemsrp_instance *instance = data;
int32_t res;
switch (instance->memb_state) {
case MEMB_STATE_OPERATIONAL:
case MEMB_STATE_RECOVERY:
assert (0); /* this should never happen */
break;
case MEMB_STATE_GATHER:
case MEMB_STATE_COMMIT:
memb_join_message_send (instance);
/*
* Restart the join timeout
`*/
qb_loop_timer_del (instance->totemsrp_poll_handle, instance->memb_timer_state_gather_join_timeout);
res = qb_loop_timer_add (instance->totemsrp_poll_handle,
QB_LOOP_MED,
instance->totem_config->join_timeout*QB_TIME_NS_IN_MSEC,
(void *)instance,
memb_timer_function_state_gather,
&instance->memb_timer_state_gather_join_timeout);
if (res != 0) {
log_printf(instance->totemsrp_log_level_error, "memb_timer_function_state_gather - qb_loop_timer_add error : %d", res);
}
break;
}
}
static void memb_timer_function_gather_consensus_timeout (void *data)
{
struct totemsrp_instance *instance = data;
memb_state_consensus_timeout_expired (instance);
}
static void deliver_messages_from_recovery_to_regular (struct totemsrp_instance *instance)
{
unsigned int i;
struct sort_queue_item *recovery_message_item;
struct sort_queue_item regular_message_item;
unsigned int range = 0;
int res;
void *ptr;
struct mcast *mcast;
log_printf (instance->totemsrp_log_level_debug,
"recovery to regular %x-%x", SEQNO_START_MSG + 1, instance->my_aru);
range = instance->my_aru - SEQNO_START_MSG;
/*
* Move messages from recovery to regular sort queue
*/
// todo should i be initialized to 0 or 1 ?
for (i = 1; i <= range; i++) {
res = sq_item_get (&instance->recovery_sort_queue,
i + SEQNO_START_MSG, &ptr);
if (res != 0) {
continue;
}
recovery_message_item = ptr;
/*
* Convert recovery message into regular message
*/
mcast = recovery_message_item->mcast;
if (mcast->header.encapsulated == MESSAGE_ENCAPSULATED) {
/*
* Message is a recovery message encapsulated
* in a new ring message
*/
regular_message_item.mcast =
(struct mcast *)(((char *)recovery_message_item->mcast) + sizeof (struct mcast));
regular_message_item.msg_len =
recovery_message_item->msg_len - sizeof (struct mcast);
mcast = regular_message_item.mcast;
} else {
/*
* TODO this case shouldn't happen
*/
continue;
}
log_printf (instance->totemsrp_log_level_debug,
"comparing if ring id is for this processors old ring seqno %d",
mcast->seq);
/*
* Only add this message to the regular sort
* queue if it was originated with the same ring
* id as the previous ring
*/
if (memcmp (&instance->my_old_ring_id, &mcast->ring_id,
sizeof (struct memb_ring_id)) == 0) {
res = sq_item_inuse (&instance->regular_sort_queue, mcast->seq);
if (res == 0) {
sq_item_add (&instance->regular_sort_queue,
&regular_message_item, mcast->seq);
if (sq_lt_compare (instance->old_ring_state_high_seq_received, mcast->seq)) {
instance->old_ring_state_high_seq_received = mcast->seq;
}
}
} else {
log_printf (instance->totemsrp_log_level_debug,
"-not adding msg with seq no %x", mcast->seq);
}
}
}
/*
* Change states in the state machine of the membership algorithm
*/
static void memb_state_operational_enter (struct totemsrp_instance *instance)
{
struct srp_addr joined_list[PROCESSOR_COUNT_MAX];
int joined_list_entries = 0;
unsigned int aru_save;
unsigned int joined_list_totemip[PROCESSOR_COUNT_MAX];
unsigned int trans_memb_list_totemip[PROCESSOR_COUNT_MAX];
unsigned int new_memb_list_totemip[PROCESSOR_COUNT_MAX];
unsigned int left_list[PROCESSOR_COUNT_MAX];
unsigned int i;
unsigned int res;
char left_node_msg[1024];
char joined_node_msg[1024];
char failed_node_msg[1024];
instance->originated_orf_token = 0;
memb_consensus_reset (instance);
old_ring_state_reset (instance);
deliver_messages_from_recovery_to_regular (instance);
log_printf (instance->totemsrp_log_level_trace,
"Delivering to app %x to %x",
instance->my_high_delivered + 1, instance->old_ring_state_high_seq_received);
aru_save = instance->my_aru;
instance->my_aru = instance->old_ring_state_aru;
messages_deliver_to_app (instance, 0, instance->old_ring_state_high_seq_received);
/*
* Calculate joined and left list
*/
memb_set_subtract (instance->my_left_memb_list,
&instance->my_left_memb_entries,
instance->my_memb_list, instance->my_memb_entries,
instance->my_trans_memb_list, instance->my_trans_memb_entries);
memb_set_subtract (joined_list, &joined_list_entries,
instance->my_new_memb_list, instance->my_new_memb_entries,
instance->my_trans_memb_list, instance->my_trans_memb_entries);
/*
* Install new membership
*/
instance->my_memb_entries = instance->my_new_memb_entries;
memcpy (&instance->my_memb_list, instance->my_new_memb_list,
sizeof (struct srp_addr) * instance->my_memb_entries);
instance->last_released = 0;
instance->my_set_retrans_flg = 0;
- /*
- * Inform RRP about transitional change
- */
- totemrrp_membership_changed (
- instance->totemrrp_context,
- TOTEM_CONFIGURATION_TRANSITIONAL,
- instance->my_trans_memb_list, instance->my_trans_memb_entries,
- instance->my_left_memb_list, instance->my_left_memb_entries,
- NULL, 0,
- &instance->my_ring_id);
/*
* Deliver transitional configuration to application
*/
srp_addr_to_nodeid (left_list, instance->my_left_memb_list,
instance->my_left_memb_entries);
srp_addr_to_nodeid (trans_memb_list_totemip,
instance->my_trans_memb_list, instance->my_trans_memb_entries);
instance->totemsrp_confchg_fn (TOTEM_CONFIGURATION_TRANSITIONAL,
trans_memb_list_totemip, instance->my_trans_memb_entries,
left_list, instance->my_left_memb_entries,
0, 0, &instance->my_ring_id);
instance->waiting_trans_ack = 1;
instance->totemsrp_waiting_trans_ack_cb_fn (1);
// TODO we need to filter to ensure we only deliver those
// messages which are part of instance->my_deliver_memb
messages_deliver_to_app (instance, 1, instance->old_ring_state_high_seq_received);
instance->my_aru = aru_save;
- /*
- * Inform RRP about regular membership change
- */
- totemrrp_membership_changed (
- instance->totemrrp_context,
- TOTEM_CONFIGURATION_REGULAR,
- instance->my_new_memb_list, instance->my_new_memb_entries,
- NULL, 0,
- joined_list, joined_list_entries,
- &instance->my_ring_id);
/*
* Deliver regular configuration to application
*/
srp_addr_to_nodeid (new_memb_list_totemip,
instance->my_new_memb_list, instance->my_new_memb_entries);
srp_addr_to_nodeid (joined_list_totemip, joined_list,
joined_list_entries);
instance->totemsrp_confchg_fn (TOTEM_CONFIGURATION_REGULAR,
new_memb_list_totemip, instance->my_new_memb_entries,
0, 0,
joined_list_totemip, joined_list_entries, &instance->my_ring_id);
/*
* The recovery sort queue now becomes the regular
* sort queue. It is necessary to copy the state
* into the regular sort queue.
*/
sq_copy (&instance->regular_sort_queue, &instance->recovery_sort_queue);
instance->my_last_aru = SEQNO_START_MSG;
/* When making my_proc_list smaller, ensure that the
* now non-used entries are zero-ed out. There are some suspect
* assert's that assume that there is always 2 entries in the list.
* These fail when my_proc_list is reduced to 1 entry (and the
* valid [0] entry is the same as the 'unused' [1] entry).
*/
memset(instance->my_proc_list, 0,
sizeof (struct srp_addr) * instance->my_proc_list_entries);
instance->my_proc_list_entries = instance->my_new_memb_entries;
memcpy (instance->my_proc_list, instance->my_new_memb_list,
sizeof (struct srp_addr) * instance->my_memb_entries);
instance->my_failed_list_entries = 0;
/*
* TODO Not exactly to spec
*
* At the entry to this function all messages without a gap are
* deliered.
*
* This code throw away messages from the last gap in the sort queue
* to my_high_seq_received
*
* What should really happen is we should deliver all messages up to
* a gap, then delier the transitional configuration, then deliver
* the messages between the first gap and my_high_seq_received, then
* deliver a regular configuration, then deliver the regular
* configuration
*
* Unfortunately totempg doesn't appear to like this operating mode
* which needs more inspection
*/
i = instance->my_high_seq_received + 1;
do {
void *ptr;
i -= 1;
res = sq_item_get (&instance->regular_sort_queue, i, &ptr);
if (i == 0) {
break;
}
} while (res);
instance->my_high_delivered = i;
for (i = 0; i <= instance->my_high_delivered; i++) {
void *ptr;
res = sq_item_get (&instance->regular_sort_queue, i, &ptr);
if (res == 0) {
struct sort_queue_item *regular_message;
regular_message = ptr;
free (regular_message->mcast);
}
}
sq_items_release (&instance->regular_sort_queue, instance->my_high_delivered);
instance->last_released = instance->my_high_delivered;
if (joined_list_entries) {
int sptr = 0;
sptr += snprintf(joined_node_msg, sizeof(joined_node_msg)-sptr, " joined:");
for (i=0; i< joined_list_entries; i++) {
sptr += snprintf(joined_node_msg+sptr, sizeof(joined_node_msg)-sptr, " %u", joined_list_totemip[i]);
}
}
else {
joined_node_msg[0] = '\0';
}
if (instance->my_left_memb_entries) {
int sptr = 0;
int sptr2 = 0;
sptr += snprintf(left_node_msg, sizeof(left_node_msg)-sptr, " left:");
for (i=0; i< instance->my_left_memb_entries; i++) {
sptr += snprintf(left_node_msg+sptr, sizeof(left_node_msg)-sptr, " %u", left_list[i]);
}
for (i=0; i< instance->my_left_memb_entries; i++) {
if (my_leave_memb_match(instance, left_list[i]) == 0) {
if (sptr2 == 0) {
sptr2 += snprintf(failed_node_msg, sizeof(failed_node_msg)-sptr2, " failed:");
}
sptr2 += snprintf(failed_node_msg+sptr2, sizeof(left_node_msg)-sptr2, " %u", left_list[i]);
- }
+ }
}
if (sptr2 == 0) {
failed_node_msg[0] = '\0';
}
}
else {
left_node_msg[0] = '\0';
failed_node_msg[0] = '\0';
}
my_leave_memb_clear(instance);
log_printf (instance->totemsrp_log_level_debug,
"entering OPERATIONAL state.");
log_printf (instance->totemsrp_log_level_notice,
"A new membership (%s:%lld) was formed. Members%s%s",
totemip_print (&instance->my_ring_id.rep),
instance->my_ring_id.seq,
joined_node_msg,
left_node_msg);
if (strlen(failed_node_msg)) {
log_printf (instance->totemsrp_log_level_notice,
"Failed to receive the leave message.%s",
failed_node_msg);
}
instance->memb_state = MEMB_STATE_OPERATIONAL;
instance->stats.operational_entered++;
instance->stats.continuous_gather = 0;
instance->my_received_flg = 1;
reset_pause_timeout (instance);
/*
* Save ring id information from this configuration to determine
* which processors are transitioning from old regular configuration
* in to new regular configuration on the next configuration change
*/
memcpy (&instance->my_old_ring_id, &instance->my_ring_id,
sizeof (struct memb_ring_id));
return;
}
static void memb_state_gather_enter (
struct totemsrp_instance *instance,
enum gather_state_from gather_from)
{
int32_t res;
instance->orf_token_discard = 1;
instance->originated_orf_token = 0;
memb_set_merge (
&instance->my_id, 1,
instance->my_proc_list, &instance->my_proc_list_entries);
memb_join_message_send (instance);
/*
* Restart the join timeout
*/
qb_loop_timer_del (instance->totemsrp_poll_handle, instance->memb_timer_state_gather_join_timeout);
res = qb_loop_timer_add (instance->totemsrp_poll_handle,
QB_LOOP_MED,
instance->totem_config->join_timeout*QB_TIME_NS_IN_MSEC,
(void *)instance,
memb_timer_function_state_gather,
&instance->memb_timer_state_gather_join_timeout);
if (res != 0) {
log_printf(instance->totemsrp_log_level_error, "memb_state_gather_enter - qb_loop_timer_add error(1) : %d", res);
}
/*
* Restart the consensus timeout
*/
qb_loop_timer_del (instance->totemsrp_poll_handle,
instance->memb_timer_state_gather_consensus_timeout);
res = qb_loop_timer_add (instance->totemsrp_poll_handle,
QB_LOOP_MED,
instance->totem_config->consensus_timeout*QB_TIME_NS_IN_MSEC,
(void *)instance,
memb_timer_function_gather_consensus_timeout,
&instance->memb_timer_state_gather_consensus_timeout);
if (res != 0) {
log_printf(instance->totemsrp_log_level_error, "memb_state_gather_enter - qb_loop_timer_add error(2) : %d", res);
}
/*
* Cancel the token loss and token retransmission timeouts
*/
cancel_token_retransmit_timeout (instance); // REVIEWED
cancel_token_timeout (instance); // REVIEWED
cancel_merge_detect_timeout (instance);
memb_consensus_reset (instance);
memb_consensus_set (instance, &instance->my_id);
log_printf (instance->totemsrp_log_level_debug,
"entering GATHER state from %d(%s).",
gather_from, gsfrom_to_msg(gather_from));
instance->memb_state = MEMB_STATE_GATHER;
instance->stats.gather_entered++;
if (gather_from == TOTEMSRP_GSFROM_THE_CONSENSUS_TIMEOUT_EXPIRED) {
/*
* State 3 means gather, so we are continuously gathering.
*/
instance->stats.continuous_gather++;
}
return;
}
static void timer_function_token_retransmit_timeout (void *data);
static void target_set_completed (
void *context)
{
struct totemsrp_instance *instance = (struct totemsrp_instance *)context;
memb_state_commit_token_send (instance);
}
static void memb_state_commit_enter (
struct totemsrp_instance *instance)
{
old_ring_state_save (instance);
memb_state_commit_token_update (instance);
memb_state_commit_token_target_set (instance);
qb_loop_timer_del (instance->totemsrp_poll_handle, instance->memb_timer_state_gather_join_timeout);
instance->memb_timer_state_gather_join_timeout = 0;
qb_loop_timer_del (instance->totemsrp_poll_handle, instance->memb_timer_state_gather_consensus_timeout);
instance->memb_timer_state_gather_consensus_timeout = 0;
memb_ring_id_set (instance, &instance->commit_token->ring_id);
instance->memb_ring_id_store (&instance->my_ring_id, &instance->my_id.addr[0]);
instance->token_ring_id_seq = instance->my_ring_id.seq;
log_printf (instance->totemsrp_log_level_debug,
"entering COMMIT state.");
instance->memb_state = MEMB_STATE_COMMIT;
reset_token_retransmit_timeout (instance); // REVIEWED
reset_token_timeout (instance); // REVIEWED
instance->stats.commit_entered++;
instance->stats.continuous_gather = 0;
/*
* reset all flow control variables since we are starting a new ring
*/
instance->my_trc = 0;
instance->my_pbl = 0;
instance->my_cbl = 0;
/*
* commit token sent after callback that token target has been set
*/
}
static void memb_state_recovery_enter (
struct totemsrp_instance *instance,
struct memb_commit_token *commit_token)
{
int i;
int local_received_flg = 1;
unsigned int low_ring_aru;
unsigned int range = 0;
unsigned int messages_originated = 0;
const struct srp_addr *addr;
struct memb_commit_token_memb_entry *memb_list;
struct memb_ring_id my_new_memb_ring_id_list[PROCESSOR_COUNT_MAX];
addr = (const struct srp_addr *)commit_token->end_of_commit_token;
memb_list = (struct memb_commit_token_memb_entry *)(addr + commit_token->addr_entries);
log_printf (instance->totemsrp_log_level_debug,
"entering RECOVERY state.");
instance->orf_token_discard = 0;
instance->my_high_ring_delivered = 0;
sq_reinit (&instance->recovery_sort_queue, SEQNO_START_MSG);
cs_queue_reinit (&instance->retrans_message_queue);
low_ring_aru = instance->old_ring_state_high_seq_received;
memb_state_commit_token_send_recovery (instance, commit_token);
instance->my_token_seq = SEQNO_START_TOKEN - 1;
/*
* Build regular configuration
*/
- totemrrp_processor_count_set (
- instance->totemrrp_context,
+ totemnet_processor_count_set (
+ instance->totemnet_context,
commit_token->addr_entries);
/*
* Build transitional configuration
*/
for (i = 0; i < instance->my_new_memb_entries; i++) {
memcpy (&my_new_memb_ring_id_list[i],
&memb_list[i].ring_id,
sizeof (struct memb_ring_id));
}
memb_set_and_with_ring_id (
instance->my_new_memb_list,
my_new_memb_ring_id_list,
instance->my_new_memb_entries,
instance->my_memb_list,
instance->my_memb_entries,
&instance->my_old_ring_id,
instance->my_trans_memb_list,
&instance->my_trans_memb_entries);
for (i = 0; i < instance->my_trans_memb_entries; i++) {
log_printf (instance->totemsrp_log_level_debug,
"TRANS [%d] member %s:", i, totemip_print (&instance->my_trans_memb_list[i].addr[0]));
}
for (i = 0; i < instance->my_new_memb_entries; i++) {
log_printf (instance->totemsrp_log_level_debug,
"position [%d] member %s:", i, totemip_print (&addr[i].addr[0]));
log_printf (instance->totemsrp_log_level_debug,
"previous ring seq %llx rep %s",
memb_list[i].ring_id.seq,
totemip_print (&memb_list[i].ring_id.rep));
log_printf (instance->totemsrp_log_level_debug,
"aru %x high delivered %x received flag %d",
memb_list[i].aru,
memb_list[i].high_delivered,
memb_list[i].received_flg);
// assert (totemip_print (&memb_list[i].ring_id.rep) != 0);
}
/*
* Determine if any received flag is false
*/
for (i = 0; i < commit_token->addr_entries; i++) {
if (memb_set_subset (&instance->my_new_memb_list[i], 1,
instance->my_trans_memb_list, instance->my_trans_memb_entries) &&
memb_list[i].received_flg == 0) {
instance->my_deliver_memb_entries = instance->my_trans_memb_entries;
memcpy (instance->my_deliver_memb_list, instance->my_trans_memb_list,
sizeof (struct srp_addr) * instance->my_trans_memb_entries);
local_received_flg = 0;
break;
}
}
if (local_received_flg == 1) {
goto no_originate;
} /* Else originate messages if we should */
/*
* Calculate my_low_ring_aru, instance->my_high_ring_delivered for the transitional membership
*/
for (i = 0; i < commit_token->addr_entries; i++) {
if (memb_set_subset (&instance->my_new_memb_list[i], 1,
instance->my_deliver_memb_list,
instance->my_deliver_memb_entries) &&
memcmp (&instance->my_old_ring_id,
&memb_list[i].ring_id,
sizeof (struct memb_ring_id)) == 0) {
if (sq_lt_compare (memb_list[i].aru, low_ring_aru)) {
low_ring_aru = memb_list[i].aru;
}
if (sq_lt_compare (instance->my_high_ring_delivered, memb_list[i].high_delivered)) {
instance->my_high_ring_delivered = memb_list[i].high_delivered;
}
}
}
/*
* Copy all old ring messages to instance->retrans_message_queue
*/
range = instance->old_ring_state_high_seq_received - low_ring_aru;
if (range == 0) {
/*
* No messages to copy
*/
goto no_originate;
}
assert (range < QUEUE_RTR_ITEMS_SIZE_MAX);
log_printf (instance->totemsrp_log_level_debug,
"copying all old ring messages from %x-%x.",
low_ring_aru + 1, instance->old_ring_state_high_seq_received);
for (i = 1; i <= range; i++) {
struct sort_queue_item *sort_queue_item;
struct message_item message_item;
void *ptr;
int res;
res = sq_item_get (&instance->regular_sort_queue,
low_ring_aru + i, &ptr);
if (res != 0) {
continue;
}
sort_queue_item = ptr;
messages_originated++;
memset (&message_item, 0, sizeof (struct message_item));
// TODO LEAK
message_item.mcast = totemsrp_buffer_alloc (instance);
assert (message_item.mcast);
message_item.mcast->header.type = MESSAGE_TYPE_MCAST;
srp_addr_copy (&message_item.mcast->system_from, &instance->my_id);
message_item.mcast->header.encapsulated = MESSAGE_ENCAPSULATED;
message_item.mcast->header.nodeid = instance->my_id.addr[0].nodeid;
assert (message_item.mcast->header.nodeid);
message_item.mcast->header.endian_detector = ENDIAN_LOCAL;
memcpy (&message_item.mcast->ring_id, &instance->my_ring_id,
sizeof (struct memb_ring_id));
message_item.msg_len = sort_queue_item->msg_len + sizeof (struct mcast);
memcpy (((char *)message_item.mcast) + sizeof (struct mcast),
sort_queue_item->mcast,
sort_queue_item->msg_len);
cs_queue_item_add (&instance->retrans_message_queue, &message_item);
}
log_printf (instance->totemsrp_log_level_debug,
"Originated %d messages in RECOVERY.", messages_originated);
goto originated;
no_originate:
log_printf (instance->totemsrp_log_level_debug,
"Did not need to originate any messages in recovery.");
originated:
instance->my_aru = SEQNO_START_MSG;
instance->my_aru_count = 0;
instance->my_seq_unchanged = 0;
instance->my_high_seq_received = SEQNO_START_MSG;
instance->my_install_seq = SEQNO_START_MSG;
instance->last_released = SEQNO_START_MSG;
reset_token_timeout (instance); // REVIEWED
reset_token_retransmit_timeout (instance); // REVIEWED
instance->memb_state = MEMB_STATE_RECOVERY;
instance->stats.recovery_entered++;
instance->stats.continuous_gather = 0;
return;
}
void totemsrp_event_signal (void *srp_context, enum totem_event_type type, int value)
{
struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context;
token_hold_cancel_send (instance);
return;
}
int totemsrp_mcast (
void *srp_context,
struct iovec *iovec,
unsigned int iov_len,
int guarantee)
{
struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context;
int i;
struct message_item message_item;
char *addr;
unsigned int addr_idx;
struct cs_queue *queue_use;
if (instance->waiting_trans_ack) {
queue_use = &instance->new_message_queue_trans;
} else {
queue_use = &instance->new_message_queue;
}
if (cs_queue_is_full (queue_use)) {
log_printf (instance->totemsrp_log_level_debug, "queue full");
return (-1);
}
memset (&message_item, 0, sizeof (struct message_item));
/*
* Allocate pending item
*/
message_item.mcast = totemsrp_buffer_alloc (instance);
if (message_item.mcast == 0) {
goto error_mcast;
}
/*
* Set mcast header
*/
memset(message_item.mcast, 0, sizeof (struct mcast));
message_item.mcast->header.type = MESSAGE_TYPE_MCAST;
message_item.mcast->header.endian_detector = ENDIAN_LOCAL;
message_item.mcast->header.encapsulated = MESSAGE_NOT_ENCAPSULATED;
message_item.mcast->header.nodeid = instance->my_id.addr[0].nodeid;
assert (message_item.mcast->header.nodeid);
message_item.mcast->guarantee = guarantee;
srp_addr_copy (&message_item.mcast->system_from, &instance->my_id);
addr = (char *)message_item.mcast;
addr_idx = sizeof (struct mcast);
for (i = 0; i < iov_len; i++) {
memcpy (&addr[addr_idx], iovec[i].iov_base, iovec[i].iov_len);
addr_idx += iovec[i].iov_len;
}
message_item.msg_len = addr_idx;
log_printf (instance->totemsrp_log_level_trace, "mcasted message added to pending queue");
instance->stats.mcast_tx++;
cs_queue_item_add (queue_use, &message_item);
return (0);
error_mcast:
return (-1);
}
/*
* Determine if there is room to queue a new message
*/
int totemsrp_avail (void *srp_context)
{
struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context;
int avail;
struct cs_queue *queue_use;
if (instance->waiting_trans_ack) {
queue_use = &instance->new_message_queue_trans;
} else {
queue_use = &instance->new_message_queue;
}
cs_queue_avail (queue_use, &avail);
return (avail);
}
/*
* ORF Token Management
*/
/*
* Recast message to mcast group if it is available
*/
static int orf_token_remcast (
struct totemsrp_instance *instance,
int seq)
{
struct sort_queue_item *sort_queue_item;
int res;
void *ptr;
struct sq *sort_queue;
if (instance->memb_state == MEMB_STATE_RECOVERY) {
sort_queue = &instance->recovery_sort_queue;
} else {
sort_queue = &instance->regular_sort_queue;
}
res = sq_in_range (sort_queue, seq);
if (res == 0) {
log_printf (instance->totemsrp_log_level_debug, "sq not in range");
return (-1);
}
/*
* Get RTR item at seq, if not available, return
*/
res = sq_item_get (sort_queue, seq, &ptr);
if (res != 0) {
return -1;
}
sort_queue_item = ptr;
- totemrrp_mcast_noflush_send (
- instance->totemrrp_context,
+ totemnet_mcast_noflush_send (
+ instance->totemnet_context,
sort_queue_item->mcast,
sort_queue_item->msg_len);
return (0);
}
/*
* Free all freeable messages from ring
*/
static void messages_free (
struct totemsrp_instance *instance,
unsigned int token_aru)
{
struct sort_queue_item *regular_message;
unsigned int i;
int res;
int log_release = 0;
unsigned int release_to;
unsigned int range = 0;
release_to = token_aru;
if (sq_lt_compare (instance->my_last_aru, release_to)) {
release_to = instance->my_last_aru;
}
if (sq_lt_compare (instance->my_high_delivered, release_to)) {
release_to = instance->my_high_delivered;
}
/*
* Ensure we dont try release before an already released point
*/
if (sq_lt_compare (release_to, instance->last_released)) {
return;
}
range = release_to - instance->last_released;
assert (range < QUEUE_RTR_ITEMS_SIZE_MAX);
/*
* Release retransmit list items if group aru indicates they are transmitted
*/
for (i = 1; i <= range; i++) {
void *ptr;
res = sq_item_get (&instance->regular_sort_queue,
instance->last_released + i, &ptr);
if (res == 0) {
regular_message = ptr;
totemsrp_buffer_release (instance, regular_message->mcast);
}
sq_items_release (&instance->regular_sort_queue,
instance->last_released + i);
log_release = 1;
}
instance->last_released += range;
if (log_release) {
log_printf (instance->totemsrp_log_level_trace,
"releasing messages up to and including %x", release_to);
}
}
static void update_aru (
struct totemsrp_instance *instance)
{
unsigned int i;
int res;
struct sq *sort_queue;
unsigned int range;
unsigned int my_aru_saved = 0;
if (instance->memb_state == MEMB_STATE_RECOVERY) {
sort_queue = &instance->recovery_sort_queue;
} else {
sort_queue = &instance->regular_sort_queue;
}
range = instance->my_high_seq_received - instance->my_aru;
my_aru_saved = instance->my_aru;
for (i = 1; i <= range; i++) {
void *ptr;
res = sq_item_get (sort_queue, my_aru_saved + i, &ptr);
/*
* If hole, stop updating aru
*/
if (res != 0) {
break;
}
}
instance->my_aru += i - 1;
}
/*
* Multicasts pending messages onto the ring (requires orf_token possession)
*/
static int orf_token_mcast (
struct totemsrp_instance *instance,
struct orf_token *token,
int fcc_mcasts_allowed)
{
struct message_item *message_item = 0;
struct cs_queue *mcast_queue;
struct sq *sort_queue;
struct sort_queue_item sort_queue_item;
struct mcast *mcast;
unsigned int fcc_mcast_current;
if (instance->memb_state == MEMB_STATE_RECOVERY) {
mcast_queue = &instance->retrans_message_queue;
sort_queue = &instance->recovery_sort_queue;
reset_token_retransmit_timeout (instance); // REVIEWED
} else {
if (instance->waiting_trans_ack) {
mcast_queue = &instance->new_message_queue_trans;
} else {
mcast_queue = &instance->new_message_queue;
}
sort_queue = &instance->regular_sort_queue;
}
for (fcc_mcast_current = 0; fcc_mcast_current < fcc_mcasts_allowed; fcc_mcast_current++) {
if (cs_queue_is_empty (mcast_queue)) {
break;
}
message_item = (struct message_item *)cs_queue_item_get (mcast_queue);
message_item->mcast->seq = ++token->seq;
message_item->mcast->this_seqno = instance->global_seqno++;
/*
* Build IO vector
*/
memset (&sort_queue_item, 0, sizeof (struct sort_queue_item));
sort_queue_item.mcast = message_item->mcast;
sort_queue_item.msg_len = message_item->msg_len;
mcast = sort_queue_item.mcast;
memcpy (&mcast->ring_id, &instance->my_ring_id, sizeof (struct memb_ring_id));
/*
* Add message to retransmit queue
*/
sq_item_add (sort_queue, &sort_queue_item, message_item->mcast->seq);
- totemrrp_mcast_noflush_send (
- instance->totemrrp_context,
+ totemnet_mcast_noflush_send (
+ instance->totemnet_context,
message_item->mcast,
message_item->msg_len);
/*
* Delete item from pending queue
*/
cs_queue_item_remove (mcast_queue);
/*
* If messages mcasted, deliver any new messages to totempg
*/
instance->my_high_seq_received = token->seq;
}
update_aru (instance);
/*
* Return 1 if more messages are available for single node clusters
*/
return (fcc_mcast_current);
}
/*
* Remulticasts messages in orf_token's retransmit list (requires orf_token)
* Modify's orf_token's rtr to include retransmits required by this process
*/
static int orf_token_rtr (
struct totemsrp_instance *instance,
struct orf_token *orf_token,
unsigned int *fcc_allowed)
{
unsigned int res;
unsigned int i, j;
unsigned int found;
struct sq *sort_queue;
struct rtr_item *rtr_list;
unsigned int range = 0;
char retransmit_msg[1024];
char value[64];
if (instance->memb_state == MEMB_STATE_RECOVERY) {
sort_queue = &instance->recovery_sort_queue;
} else {
sort_queue = &instance->regular_sort_queue;
}
rtr_list = &orf_token->rtr_list[0];
strcpy (retransmit_msg, "Retransmit List: ");
if (orf_token->rtr_list_entries) {
log_printf (instance->totemsrp_log_level_debug,
"Retransmit List %d", orf_token->rtr_list_entries);
for (i = 0; i < orf_token->rtr_list_entries; i++) {
sprintf (value, "%x ", rtr_list[i].seq);
strcat (retransmit_msg, value);
}
strcat (retransmit_msg, "");
log_printf (instance->totemsrp_log_level_notice,
"%s", retransmit_msg);
}
/*
* Retransmit messages on orf_token's RTR list from RTR queue
*/
for (instance->fcc_remcast_current = 0, i = 0;
instance->fcc_remcast_current < *fcc_allowed && i < orf_token->rtr_list_entries;) {
/*
* If this retransmit request isn't from this configuration,
* try next rtr entry
*/
if (memcmp (&rtr_list[i].ring_id, &instance->my_ring_id,
sizeof (struct memb_ring_id)) != 0) {
i += 1;
continue;
}
res = orf_token_remcast (instance, rtr_list[i].seq);
if (res == 0) {
/*
* Multicasted message, so no need to copy to new retransmit list
*/
orf_token->rtr_list_entries -= 1;
assert (orf_token->rtr_list_entries >= 0);
memmove (&rtr_list[i], &rtr_list[i + 1],
sizeof (struct rtr_item) * (orf_token->rtr_list_entries - i));
instance->stats.mcast_retx++;
instance->fcc_remcast_current++;
} else {
i += 1;
}
}
*fcc_allowed = *fcc_allowed - instance->fcc_remcast_current;
/*
* Add messages to retransmit to RTR list
* but only retry if there is room in the retransmit list
*/
range = orf_token->seq - instance->my_aru;
assert (range < QUEUE_RTR_ITEMS_SIZE_MAX);
for (i = 1; (orf_token->rtr_list_entries < RETRANSMIT_ENTRIES_MAX) &&
(i <= range); i++) {
/*
* Ensure message is within the sort queue range
*/
res = sq_in_range (sort_queue, instance->my_aru + i);
if (res == 0) {
break;
}
/*
* Find if a message is missing from this processor
*/
res = sq_item_inuse (sort_queue, instance->my_aru + i);
if (res == 0) {
/*
* Determine how many times we have missed receiving
* this sequence number. sq_item_miss_count increments
* a counter for the sequence number. The miss count
* will be returned and compared. This allows time for
* delayed multicast messages to be received before
* declaring the message is missing and requesting a
* retransmit.
*/
res = sq_item_miss_count (sort_queue, instance->my_aru + i);
if (res < instance->totem_config->miss_count_const) {
continue;
}
/*
* Determine if missing message is already in retransmit list
*/
found = 0;
for (j = 0; j < orf_token->rtr_list_entries; j++) {
if (instance->my_aru + i == rtr_list[j].seq) {
found = 1;
}
}
if (found == 0) {
/*
* Missing message not found in current retransmit list so add it
*/
memcpy (&rtr_list[orf_token->rtr_list_entries].ring_id,
&instance->my_ring_id, sizeof (struct memb_ring_id));
rtr_list[orf_token->rtr_list_entries].seq = instance->my_aru + i;
orf_token->rtr_list_entries++;
}
}
}
return (instance->fcc_remcast_current);
}
static void token_retransmit (struct totemsrp_instance *instance)
{
- totemrrp_token_send (instance->totemrrp_context,
+ totemnet_token_send (instance->totemnet_context,
instance->orf_token_retransmit,
instance->orf_token_retransmit_size);
}
/*
* Retransmit the regular token if no mcast or token has
* been received in retransmit token period retransmit
* the token to the next processor
*/
static void timer_function_token_retransmit_timeout (void *data)
{
struct totemsrp_instance *instance = data;
switch (instance->memb_state) {
case MEMB_STATE_GATHER:
break;
case MEMB_STATE_COMMIT:
case MEMB_STATE_OPERATIONAL:
case MEMB_STATE_RECOVERY:
token_retransmit (instance);
reset_token_retransmit_timeout (instance); // REVIEWED
break;
}
}
static void timer_function_token_hold_retransmit_timeout (void *data)
{
struct totemsrp_instance *instance = data;
switch (instance->memb_state) {
case MEMB_STATE_GATHER:
break;
case MEMB_STATE_COMMIT:
break;
case MEMB_STATE_OPERATIONAL:
case MEMB_STATE_RECOVERY:
token_retransmit (instance);
break;
}
}
static void timer_function_merge_detect_timeout(void *data)
{
struct totemsrp_instance *instance = data;
instance->my_merge_detect_timeout_outstanding = 0;
switch (instance->memb_state) {
case MEMB_STATE_OPERATIONAL:
if (totemip_equal(&instance->my_ring_id.rep, &instance->my_id.addr[0])) {
memb_merge_detect_transmit (instance);
}
break;
case MEMB_STATE_GATHER:
case MEMB_STATE_COMMIT:
case MEMB_STATE_RECOVERY:
break;
}
}
/*
* Send orf_token to next member (requires orf_token)
*/
static int token_send (
struct totemsrp_instance *instance,
struct orf_token *orf_token,
int forward_token)
{
int res = 0;
unsigned int orf_token_size;
orf_token_size = sizeof (struct orf_token) +
(orf_token->rtr_list_entries * sizeof (struct rtr_item));
orf_token->header.nodeid = instance->my_id.addr[0].nodeid;
memcpy (instance->orf_token_retransmit, orf_token, orf_token_size);
instance->orf_token_retransmit_size = orf_token_size;
assert (orf_token->header.nodeid);
if (forward_token == 0) {
return (0);
}
- totemrrp_token_send (instance->totemrrp_context,
+ totemnet_token_send (instance->totemnet_context,
orf_token,
orf_token_size);
return (res);
}
static int token_hold_cancel_send (struct totemsrp_instance *instance)
{
struct token_hold_cancel token_hold_cancel;
/*
* Only cancel if the token is currently held
*/
if (instance->my_token_held == 0) {
return (0);
}
instance->my_token_held = 0;
/*
* Build message
*/
token_hold_cancel.header.type = MESSAGE_TYPE_TOKEN_HOLD_CANCEL;
token_hold_cancel.header.endian_detector = ENDIAN_LOCAL;
token_hold_cancel.header.encapsulated = 0;
token_hold_cancel.header.nodeid = instance->my_id.addr[0].nodeid;
memcpy (&token_hold_cancel.ring_id, &instance->my_ring_id,
sizeof (struct memb_ring_id));
assert (token_hold_cancel.header.nodeid);
instance->stats.token_hold_cancel_tx++;
- totemrrp_mcast_flush_send (instance->totemrrp_context, &token_hold_cancel,
+ totemnet_mcast_flush_send (instance->totemnet_context, &token_hold_cancel,
sizeof (struct token_hold_cancel));
return (0);
}
static int orf_token_send_initial (struct totemsrp_instance *instance)
{
struct orf_token orf_token;
int res;
orf_token.header.type = MESSAGE_TYPE_ORF_TOKEN;
orf_token.header.endian_detector = ENDIAN_LOCAL;
orf_token.header.encapsulated = 0;
orf_token.header.nodeid = instance->my_id.addr[0].nodeid;
assert (orf_token.header.nodeid);
orf_token.seq = SEQNO_START_MSG;
orf_token.token_seq = SEQNO_START_TOKEN;
orf_token.retrans_flg = 1;
instance->my_set_retrans_flg = 1;
instance->stats.orf_token_tx++;
if (cs_queue_is_empty (&instance->retrans_message_queue) == 1) {
orf_token.retrans_flg = 0;
instance->my_set_retrans_flg = 0;
} else {
orf_token.retrans_flg = 1;
instance->my_set_retrans_flg = 1;
}
orf_token.aru = 0;
orf_token.aru = SEQNO_START_MSG - 1;
orf_token.aru_addr = instance->my_id.addr[0].nodeid;
memcpy (&orf_token.ring_id, &instance->my_ring_id, sizeof (struct memb_ring_id));
orf_token.fcc = 0;
orf_token.backlog = 0;
orf_token.rtr_list_entries = 0;
res = token_send (instance, &orf_token, 1);
return (res);
}
static void memb_state_commit_token_update (
struct totemsrp_instance *instance)
{
struct srp_addr *addr;
struct memb_commit_token_memb_entry *memb_list;
unsigned int high_aru;
unsigned int i;
addr = (struct srp_addr *)instance->commit_token->end_of_commit_token;
memb_list = (struct memb_commit_token_memb_entry *)(addr + instance->commit_token->addr_entries);
memcpy (instance->my_new_memb_list, addr,
sizeof (struct srp_addr) * instance->commit_token->addr_entries);
instance->my_new_memb_entries = instance->commit_token->addr_entries;
memcpy (&memb_list[instance->commit_token->memb_index].ring_id,
&instance->my_old_ring_id, sizeof (struct memb_ring_id));
memb_list[instance->commit_token->memb_index].aru = instance->old_ring_state_aru;
/*
* TODO high delivered is really instance->my_aru, but with safe this
* could change?
*/
instance->my_received_flg =
(instance->my_aru == instance->my_high_seq_received);
memb_list[instance->commit_token->memb_index].received_flg = instance->my_received_flg;
memb_list[instance->commit_token->memb_index].high_delivered = instance->my_high_delivered;
/*
* find high aru up to current memb_index for all matching ring ids
* if any ring id matching memb_index has aru less then high aru set
* received flag for that entry to false
*/
high_aru = memb_list[instance->commit_token->memb_index].aru;
for (i = 0; i <= instance->commit_token->memb_index; i++) {
if (memcmp (&memb_list[instance->commit_token->memb_index].ring_id,
&memb_list[i].ring_id,
sizeof (struct memb_ring_id)) == 0) {
if (sq_lt_compare (high_aru, memb_list[i].aru)) {
high_aru = memb_list[i].aru;
}
}
}
for (i = 0; i <= instance->commit_token->memb_index; i++) {
if (memcmp (&memb_list[instance->commit_token->memb_index].ring_id,
&memb_list[i].ring_id,
sizeof (struct memb_ring_id)) == 0) {
if (sq_lt_compare (memb_list[i].aru, high_aru)) {
memb_list[i].received_flg = 0;
if (i == instance->commit_token->memb_index) {
instance->my_received_flg = 0;
}
}
}
}
instance->commit_token->header.nodeid = instance->my_id.addr[0].nodeid;
instance->commit_token->memb_index += 1;
assert (instance->commit_token->memb_index <= instance->commit_token->addr_entries);
assert (instance->commit_token->header.nodeid);
}
static void memb_state_commit_token_target_set (
struct totemsrp_instance *instance)
{
struct srp_addr *addr;
- unsigned int i;
addr = (struct srp_addr *)instance->commit_token->end_of_commit_token;
- for (i = 0; i < instance->totem_config->interface_count; i++) {
- totemrrp_token_target_set (
- instance->totemrrp_context,
- &addr[instance->commit_token->memb_index %
- instance->commit_token->addr_entries].addr[i],
- i);
- }
+ /* Totemnet just looks at the node id */
+ totemnet_token_target_set (
+ instance->totemnet_context,
+ &addr[instance->commit_token->memb_index %
+ instance->commit_token->addr_entries].addr[0]);
}
static int memb_state_commit_token_send_recovery (
struct totemsrp_instance *instance,
struct memb_commit_token *commit_token)
{
unsigned int commit_token_size;
commit_token->token_seq++;
commit_token->header.nodeid = instance->my_id.addr[0].nodeid;
commit_token_size = sizeof (struct memb_commit_token) +
((sizeof (struct srp_addr) +
sizeof (struct memb_commit_token_memb_entry)) * commit_token->addr_entries);
/*
* Make a copy for retransmission if necessary
*/
memcpy (instance->orf_token_retransmit, commit_token, commit_token_size);
instance->orf_token_retransmit_size = commit_token_size;
instance->stats.memb_commit_token_tx++;
- totemrrp_token_send (instance->totemrrp_context,
+ totemnet_token_send (instance->totemnet_context,
commit_token,
commit_token_size);
/*
* Request retransmission of the commit token in case it is lost
*/
reset_token_retransmit_timeout (instance);
return (0);
}
static int memb_state_commit_token_send (
struct totemsrp_instance *instance)
{
unsigned int commit_token_size;
instance->commit_token->token_seq++;
instance->commit_token->header.nodeid = instance->my_id.addr[0].nodeid;
commit_token_size = sizeof (struct memb_commit_token) +
((sizeof (struct srp_addr) +
sizeof (struct memb_commit_token_memb_entry)) * instance->commit_token->addr_entries);
/*
* Make a copy for retransmission if necessary
*/
memcpy (instance->orf_token_retransmit, instance->commit_token, commit_token_size);
instance->orf_token_retransmit_size = commit_token_size;
instance->stats.memb_commit_token_tx++;
- totemrrp_token_send (instance->totemrrp_context,
+ totemnet_token_send (instance->totemnet_context,
instance->commit_token,
commit_token_size);
/*
* Request retransmission of the commit token in case it is lost
*/
reset_token_retransmit_timeout (instance);
return (0);
}
static int memb_lowest_in_config (struct totemsrp_instance *instance)
{
struct srp_addr token_memb[PROCESSOR_COUNT_MAX];
int token_memb_entries = 0;
int i;
struct totem_ip_address *lowest_addr;
memb_set_subtract (token_memb, &token_memb_entries,
instance->my_proc_list, instance->my_proc_list_entries,
instance->my_failed_list, instance->my_failed_list_entries);
/*
* find representative by searching for smallest identifier
*/
lowest_addr = &token_memb[0].addr[0];
for (i = 1; i < token_memb_entries; i++) {
if (totemip_compare(lowest_addr, &token_memb[i].addr[0]) > 0) {
totemip_copy (lowest_addr, &token_memb[i].addr[0]);
}
}
return (totemip_compare (lowest_addr, &instance->my_id.addr[0]) == 0);
}
static int srp_addr_compare (const void *a, const void *b)
{
const struct srp_addr *srp_a = (const struct srp_addr *)a;
const struct srp_addr *srp_b = (const struct srp_addr *)b;
return (totemip_compare (&srp_a->addr[0], &srp_b->addr[0]));
}
static void memb_state_commit_token_create (
struct totemsrp_instance *instance)
{
struct srp_addr token_memb[PROCESSOR_COUNT_MAX];
struct srp_addr *addr;
struct memb_commit_token_memb_entry *memb_list;
int token_memb_entries = 0;
log_printf (instance->totemsrp_log_level_debug,
"Creating commit token because I am the rep.");
memb_set_subtract (token_memb, &token_memb_entries,
instance->my_proc_list, instance->my_proc_list_entries,
instance->my_failed_list, instance->my_failed_list_entries);
memset (instance->commit_token, 0, sizeof (struct memb_commit_token));
instance->commit_token->header.type = MESSAGE_TYPE_MEMB_COMMIT_TOKEN;
instance->commit_token->header.endian_detector = ENDIAN_LOCAL;
instance->commit_token->header.encapsulated = 0;
instance->commit_token->header.nodeid = instance->my_id.addr[0].nodeid;
assert (instance->commit_token->header.nodeid);
totemip_copy(&instance->commit_token->ring_id.rep, &instance->my_id.addr[0]);
instance->commit_token->ring_id.seq = instance->token_ring_id_seq + 4;
/*
* This qsort is necessary to ensure the commit token traverses
* the ring in the proper order
*/
qsort (token_memb, token_memb_entries, sizeof (struct srp_addr),
srp_addr_compare);
instance->commit_token->memb_index = 0;
instance->commit_token->addr_entries = token_memb_entries;
addr = (struct srp_addr *)instance->commit_token->end_of_commit_token;
memb_list = (struct memb_commit_token_memb_entry *)(addr + instance->commit_token->addr_entries);
memcpy (addr, token_memb,
token_memb_entries * sizeof (struct srp_addr));
memset (memb_list, 0,
sizeof (struct memb_commit_token_memb_entry) * token_memb_entries);
}
static void memb_join_message_send (struct totemsrp_instance *instance)
{
char memb_join_data[40000];
struct memb_join *memb_join = (struct memb_join *)memb_join_data;
char *addr;
unsigned int addr_idx;
memb_join->header.type = MESSAGE_TYPE_MEMB_JOIN;
memb_join->header.endian_detector = ENDIAN_LOCAL;
memb_join->header.encapsulated = 0;
memb_join->header.nodeid = instance->my_id.addr[0].nodeid;
assert (memb_join->header.nodeid);
memb_join->ring_seq = instance->my_ring_id.seq;
memb_join->proc_list_entries = instance->my_proc_list_entries;
memb_join->failed_list_entries = instance->my_failed_list_entries;
srp_addr_copy (&memb_join->system_from, &instance->my_id);
/*
* This mess adds the joined and failed processor lists into the join
* message
*/
addr = (char *)memb_join;
addr_idx = sizeof (struct memb_join);
memcpy (&addr[addr_idx],
instance->my_proc_list,
instance->my_proc_list_entries *
sizeof (struct srp_addr));
addr_idx +=
instance->my_proc_list_entries *
sizeof (struct srp_addr);
memcpy (&addr[addr_idx],
instance->my_failed_list,
instance->my_failed_list_entries *
sizeof (struct srp_addr));
addr_idx +=
instance->my_failed_list_entries *
sizeof (struct srp_addr);
if (instance->totem_config->send_join_timeout) {
usleep (random() % (instance->totem_config->send_join_timeout * 1000));
}
instance->stats.memb_join_tx++;
- totemrrp_mcast_flush_send (
- instance->totemrrp_context,
+ totemnet_mcast_flush_send (
+ instance->totemnet_context,
memb_join,
addr_idx);
}
static void memb_leave_message_send (struct totemsrp_instance *instance)
{
char memb_join_data[40000];
struct memb_join *memb_join = (struct memb_join *)memb_join_data;
char *addr;
unsigned int addr_idx;
int active_memb_entries;
struct srp_addr active_memb[PROCESSOR_COUNT_MAX];
log_printf (instance->totemsrp_log_level_debug,
"sending join/leave message");
/*
* add us to the failed list, and remove us from
* the members list
*/
memb_set_merge(
&instance->my_id, 1,
instance->my_failed_list, &instance->my_failed_list_entries);
memb_set_subtract (active_memb, &active_memb_entries,
instance->my_proc_list, instance->my_proc_list_entries,
&instance->my_id, 1);
memb_join->header.type = MESSAGE_TYPE_MEMB_JOIN;
memb_join->header.endian_detector = ENDIAN_LOCAL;
memb_join->header.encapsulated = 0;
memb_join->header.nodeid = LEAVE_DUMMY_NODEID;
memb_join->ring_seq = instance->my_ring_id.seq;
memb_join->proc_list_entries = active_memb_entries;
memb_join->failed_list_entries = instance->my_failed_list_entries;
srp_addr_copy (&memb_join->system_from, &instance->my_id);
memb_join->system_from.addr[0].nodeid = LEAVE_DUMMY_NODEID;
// TODO: CC Maybe use the actual join send routine.
/*
* This mess adds the joined and failed processor lists into the join
* message
*/
addr = (char *)memb_join;
addr_idx = sizeof (struct memb_join);
memcpy (&addr[addr_idx],
active_memb,
active_memb_entries *
sizeof (struct srp_addr));
addr_idx +=
active_memb_entries *
sizeof (struct srp_addr);
memcpy (&addr[addr_idx],
instance->my_failed_list,
instance->my_failed_list_entries *
sizeof (struct srp_addr));
addr_idx +=
instance->my_failed_list_entries *
sizeof (struct srp_addr);
if (instance->totem_config->send_join_timeout) {
usleep (random() % (instance->totem_config->send_join_timeout * 1000));
}
instance->stats.memb_join_tx++;
- totemrrp_mcast_flush_send (
- instance->totemrrp_context,
+ totemnet_mcast_flush_send (
+ instance->totemnet_context,
memb_join,
addr_idx);
}
static void memb_merge_detect_transmit (struct totemsrp_instance *instance)
{
struct memb_merge_detect memb_merge_detect;
memb_merge_detect.header.type = MESSAGE_TYPE_MEMB_MERGE_DETECT;
memb_merge_detect.header.endian_detector = ENDIAN_LOCAL;
memb_merge_detect.header.encapsulated = 0;
memb_merge_detect.header.nodeid = instance->my_id.addr[0].nodeid;
srp_addr_copy (&memb_merge_detect.system_from, &instance->my_id);
memcpy (&memb_merge_detect.ring_id, &instance->my_ring_id,
sizeof (struct memb_ring_id));
assert (memb_merge_detect.header.nodeid);
instance->stats.memb_merge_detect_tx++;
- totemrrp_mcast_flush_send (instance->totemrrp_context,
+ totemnet_mcast_flush_send (instance->totemnet_context,
&memb_merge_detect,
sizeof (struct memb_merge_detect));
}
static void memb_ring_id_set (
struct totemsrp_instance *instance,
const struct memb_ring_id *ring_id)
{
memcpy (&instance->my_ring_id, ring_id, sizeof (struct memb_ring_id));
}
int totemsrp_callback_token_create (
void *srp_context,
void **handle_out,
enum totem_callback_token_type type,
int delete,
int (*callback_fn) (enum totem_callback_token_type type, const void *),
const void *data)
{
struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context;
struct token_callback_instance *callback_handle;
token_hold_cancel_send (instance);
callback_handle = malloc (sizeof (struct token_callback_instance));
if (callback_handle == 0) {
return (-1);
}
*handle_out = (void *)callback_handle;
list_init (&callback_handle->list);
callback_handle->callback_fn = callback_fn;
callback_handle->data = (void *) data;
callback_handle->callback_type = type;
callback_handle->delete = delete;
switch (type) {
case TOTEM_CALLBACK_TOKEN_RECEIVED:
list_add (&callback_handle->list, &instance->token_callback_received_listhead);
break;
case TOTEM_CALLBACK_TOKEN_SENT:
list_add (&callback_handle->list, &instance->token_callback_sent_listhead);
break;
}
return (0);
}
void totemsrp_callback_token_destroy (void *srp_context, void **handle_out)
{
struct token_callback_instance *h;
if (*handle_out) {
h = (struct token_callback_instance *)*handle_out;
list_del (&h->list);
free (h);
h = NULL;
*handle_out = 0;
}
}
static void token_callbacks_execute (
struct totemsrp_instance *instance,
enum totem_callback_token_type type)
{
struct list_head *list;
struct list_head *list_next;
struct list_head *callback_listhead = 0;
struct token_callback_instance *token_callback_instance;
int res;
int del;
switch (type) {
case TOTEM_CALLBACK_TOKEN_RECEIVED:
callback_listhead = &instance->token_callback_received_listhead;
break;
case TOTEM_CALLBACK_TOKEN_SENT:
callback_listhead = &instance->token_callback_sent_listhead;
break;
default:
assert (0);
}
for (list = callback_listhead->next; list != callback_listhead;
list = list_next) {
token_callback_instance = list_entry (list, struct token_callback_instance, list);
list_next = list->next;
del = token_callback_instance->delete;
if (del == 1) {
list_del (list);
}
res = token_callback_instance->callback_fn (
token_callback_instance->callback_type,
token_callback_instance->data);
/*
* This callback failed to execute, try it again on the next token
*/
if (res == -1 && del == 1) {
list_add (list, callback_listhead);
} else if (del) {
free (token_callback_instance);
}
}
}
/*
* Flow control functions
*/
static unsigned int backlog_get (struct totemsrp_instance *instance)
{
unsigned int backlog = 0;
struct cs_queue *queue_use = NULL;
if (instance->memb_state == MEMB_STATE_OPERATIONAL) {
if (instance->waiting_trans_ack) {
queue_use = &instance->new_message_queue_trans;
} else {
queue_use = &instance->new_message_queue;
}
} else
if (instance->memb_state == MEMB_STATE_RECOVERY) {
queue_use = &instance->retrans_message_queue;
}
if (queue_use != NULL) {
backlog = cs_queue_used (queue_use);
}
instance->stats.token[instance->stats.latest_token].backlog_calc = backlog;
return (backlog);
}
static int fcc_calculate (
struct totemsrp_instance *instance,
struct orf_token *token)
{
unsigned int transmits_allowed;
unsigned int backlog_calc;
transmits_allowed = instance->totem_config->max_messages;
if (transmits_allowed > instance->totem_config->window_size - token->fcc) {
transmits_allowed = instance->totem_config->window_size - token->fcc;
}
instance->my_cbl = backlog_get (instance);
/*
* Only do backlog calculation if there is a backlog otherwise
* we would result in div by zero
*/
if (token->backlog + instance->my_cbl - instance->my_pbl) {
backlog_calc = (instance->totem_config->window_size * instance->my_pbl) /
(token->backlog + instance->my_cbl - instance->my_pbl);
if (backlog_calc > 0 && transmits_allowed > backlog_calc) {
transmits_allowed = backlog_calc;
}
}
return (transmits_allowed);
}
/*
* don't overflow the RTR sort queue
*/
static void fcc_rtr_limit (
struct totemsrp_instance *instance,
struct orf_token *token,
unsigned int *transmits_allowed)
{
int check = QUEUE_RTR_ITEMS_SIZE_MAX;
check -= (*transmits_allowed + instance->totem_config->window_size);
assert (check >= 0);
if (sq_lt_compare (instance->last_released +
QUEUE_RTR_ITEMS_SIZE_MAX - *transmits_allowed -
instance->totem_config->window_size,
token->seq)) {
*transmits_allowed = 0;
}
}
static void fcc_token_update (
struct totemsrp_instance *instance,
struct orf_token *token,
unsigned int msgs_transmitted)
{
token->fcc += msgs_transmitted - instance->my_trc;
token->backlog += instance->my_cbl - instance->my_pbl;
instance->my_trc = msgs_transmitted;
instance->my_pbl = instance->my_cbl;
}
/*
* Message Handlers
*/
unsigned long long int tv_old;
/*
* message handler called when TOKEN message type received
*/
static int message_handler_orf_token (
struct totemsrp_instance *instance,
const void *msg,
size_t msg_len,
int endian_conversion_needed)
{
char token_storage[1500];
char token_convert[1500];
struct orf_token *token = NULL;
int forward_token;
unsigned int transmits_allowed;
unsigned int mcasted_retransmit;
unsigned int mcasted_regular;
unsigned int last_aru;
#ifdef GIVEINFO
unsigned long long tv_current;
unsigned long long tv_diff;
tv_current = qb_util_nano_current_get ();
tv_diff = tv_current - tv_old;
tv_old = tv_current;
log_printf (instance->totemsrp_log_level_debug,
"Time since last token %0.4f ms", ((float)tv_diff) / 1000000.0);
#endif
if (instance->orf_token_discard) {
return (0);
}
#ifdef TEST_DROP_ORF_TOKEN_PERCENTAGE
if (random()%100 < TEST_DROP_ORF_TOKEN_PERCENTAGE) {
return (0);
}
#endif
if (endian_conversion_needed) {
orf_token_endian_convert ((struct orf_token *)msg,
(struct orf_token *)token_convert);
msg = (struct orf_token *)token_convert;
}
/*
* Make copy of token and retransmit list in case we have
* to flush incoming messages from the kernel queue
*/
token = (struct orf_token *)token_storage;
memcpy (token, msg, sizeof (struct orf_token));
memcpy (&token->rtr_list[0], (char *)msg + sizeof (struct orf_token),
sizeof (struct rtr_item) * RETRANSMIT_ENTRIES_MAX);
/*
* Handle merge detection timeout
*/
if (token->seq == instance->my_last_seq) {
start_merge_detect_timeout (instance);
instance->my_seq_unchanged += 1;
} else {
cancel_merge_detect_timeout (instance);
cancel_token_hold_retransmit_timeout (instance);
instance->my_seq_unchanged = 0;
}
instance->my_last_seq = token->seq;
#ifdef TEST_RECOVERY_MSG_COUNT
if (instance->memb_state == MEMB_STATE_OPERATIONAL && token->seq > TEST_RECOVERY_MSG_COUNT) {
return (0);
}
#endif
instance->flushing = 1;
- totemrrp_recv_flush (instance->totemrrp_context);
+ totemnet_recv_flush (instance->totemnet_context);
instance->flushing = 0;
/*
* Determine if we should hold (in reality drop) the token
*/
instance->my_token_held = 0;
if (totemip_equal(&instance->my_ring_id.rep, &instance->my_id.addr[0]) &&
instance->my_seq_unchanged > instance->totem_config->seqno_unchanged_const) {
instance->my_token_held = 1;
} else
if (!totemip_equal(&instance->my_ring_id.rep, &instance->my_id.addr[0]) &&
instance->my_seq_unchanged >= instance->totem_config->seqno_unchanged_const) {
instance->my_token_held = 1;
}
/*
* Hold onto token when there is no activity on ring and
* this processor is the ring rep
*/
forward_token = 1;
if (totemip_equal(&instance->my_ring_id.rep, &instance->my_id.addr[0])) {
if (instance->my_token_held) {
forward_token = 0;
}
}
token_callbacks_execute (instance, TOTEM_CALLBACK_TOKEN_RECEIVED);
switch (instance->memb_state) {
case MEMB_STATE_COMMIT:
/* Discard token */
break;
case MEMB_STATE_OPERATIONAL:
messages_free (instance, token->aru);
/*
* Do NOT add break, this case should also execute code in gather case.
*/
case MEMB_STATE_GATHER:
/*
* DO NOT add break, we use different free mechanism in recovery state
*/
case MEMB_STATE_RECOVERY:
/*
* Discard tokens from another configuration
*/
if (memcmp (&token->ring_id, &instance->my_ring_id,
sizeof (struct memb_ring_id)) != 0) {
if ((forward_token)
&& instance->use_heartbeat) {
reset_heartbeat_timeout(instance);
}
else {
cancel_heartbeat_timeout(instance);
}
return (0); /* discard token */
}
/*
* Discard retransmitted tokens
*/
if (sq_lte_compare (token->token_seq, instance->my_token_seq)) {
return (0); /* discard token */
}
last_aru = instance->my_last_aru;
instance->my_last_aru = token->aru;
transmits_allowed = fcc_calculate (instance, token);
mcasted_retransmit = orf_token_rtr (instance, token, &transmits_allowed);
if (instance->my_token_held == 1 &&
(token->rtr_list_entries > 0 || mcasted_retransmit > 0)) {
instance->my_token_held = 0;
forward_token = 1;
}
fcc_rtr_limit (instance, token, &transmits_allowed);
mcasted_regular = orf_token_mcast (instance, token, transmits_allowed);
/*
if (mcasted_regular) {
printf ("mcasted regular %d\n", mcasted_regular);
printf ("token seq %d\n", token->seq);
}
*/
fcc_token_update (instance, token, mcasted_retransmit +
mcasted_regular);
if (sq_lt_compare (instance->my_aru, token->aru) ||
instance->my_id.addr[0].nodeid == token->aru_addr ||
token->aru_addr == 0) {
token->aru = instance->my_aru;
if (token->aru == token->seq) {
token->aru_addr = 0;
} else {
token->aru_addr = instance->my_id.addr[0].nodeid;
}
}
if (token->aru == last_aru && token->aru_addr != 0) {
instance->my_aru_count += 1;
} else {
instance->my_aru_count = 0;
}
/*
* We really don't follow specification there. In specification, OTHER nodes
* detect failure of one node (based on aru_count) and my_id IS NEVER added
* to failed list (so node never mark itself as failed)
*/
if (instance->my_aru_count > instance->totem_config->fail_to_recv_const &&
token->aru_addr == instance->my_id.addr[0].nodeid) {
log_printf (instance->totemsrp_log_level_error,
"FAILED TO RECEIVE");
instance->failed_to_recv = 1;
memb_set_merge (&instance->my_id, 1,
instance->my_failed_list,
&instance->my_failed_list_entries);
memb_state_gather_enter (instance, TOTEMSRP_GSFROM_FAILED_TO_RECEIVE);
} else {
instance->my_token_seq = token->token_seq;
token->token_seq += 1;
if (instance->memb_state == MEMB_STATE_RECOVERY) {
/*
* instance->my_aru == instance->my_high_seq_received means this processor
* has recovered all messages it can recover
* (ie: its retrans queue is empty)
*/
if (cs_queue_is_empty (&instance->retrans_message_queue) == 0) {
if (token->retrans_flg == 0) {
token->retrans_flg = 1;
instance->my_set_retrans_flg = 1;
}
} else
if (token->retrans_flg == 1 && instance->my_set_retrans_flg) {
token->retrans_flg = 0;
instance->my_set_retrans_flg = 0;
}
log_printf (instance->totemsrp_log_level_debug,
"token retrans flag is %d my set retrans flag%d retrans queue empty %d count %d, aru %x",
token->retrans_flg, instance->my_set_retrans_flg,
cs_queue_is_empty (&instance->retrans_message_queue),
instance->my_retrans_flg_count, token->aru);
if (token->retrans_flg == 0) {
instance->my_retrans_flg_count += 1;
} else {
instance->my_retrans_flg_count = 0;
}
if (instance->my_retrans_flg_count == 2) {
instance->my_install_seq = token->seq;
}
log_printf (instance->totemsrp_log_level_debug,
"install seq %x aru %x high seq received %x",
instance->my_install_seq, instance->my_aru, instance->my_high_seq_received);
if (instance->my_retrans_flg_count >= 2 &&
instance->my_received_flg == 0 &&
sq_lte_compare (instance->my_install_seq, instance->my_aru)) {
instance->my_received_flg = 1;
instance->my_deliver_memb_entries = instance->my_trans_memb_entries;
memcpy (instance->my_deliver_memb_list, instance->my_trans_memb_list,
sizeof (struct totem_ip_address) * instance->my_trans_memb_entries);
}
if (instance->my_retrans_flg_count >= 3 &&
sq_lte_compare (instance->my_install_seq, token->aru)) {
instance->my_rotation_counter += 1;
} else {
instance->my_rotation_counter = 0;
}
if (instance->my_rotation_counter == 2) {
log_printf (instance->totemsrp_log_level_debug,
"retrans flag count %x token aru %x install seq %x aru %x %x",
instance->my_retrans_flg_count, token->aru, instance->my_install_seq,
instance->my_aru, token->seq);
memb_state_operational_enter (instance);
instance->my_rotation_counter = 0;
instance->my_retrans_flg_count = 0;
}
}
- totemrrp_send_flush (instance->totemrrp_context);
+ totemnet_send_flush (instance->totemnet_context);
token_send (instance, token, forward_token);
#ifdef GIVEINFO
tv_current = qb_util_nano_current_get ();
tv_diff = tv_current - tv_old;
tv_old = tv_current;
log_printf (instance->totemsrp_log_level_debug,
"I held %0.4f ms",
((float)tv_diff) / 1000000.0);
#endif
if (instance->memb_state == MEMB_STATE_OPERATIONAL) {
messages_deliver_to_app (instance, 0,
instance->my_high_seq_received);
}
/*
* Deliver messages after token has been transmitted
* to improve performance
*/
reset_token_timeout (instance); // REVIEWED
reset_token_retransmit_timeout (instance); // REVIEWED
if (totemip_equal(&instance->my_id.addr[0], &instance->my_ring_id.rep) &&
instance->my_token_held == 1) {
start_token_hold_retransmit_timeout (instance);
}
token_callbacks_execute (instance, TOTEM_CALLBACK_TOKEN_SENT);
}
break;
}
if ((forward_token)
&& instance->use_heartbeat) {
reset_heartbeat_timeout(instance);
}
else {
cancel_heartbeat_timeout(instance);
}
return (0);
}
static void messages_deliver_to_app (
struct totemsrp_instance *instance,
int skip,
unsigned int end_point)
{
struct sort_queue_item *sort_queue_item_p;
unsigned int i;
int res;
struct mcast *mcast_in;
struct mcast mcast_header;
unsigned int range = 0;
int endian_conversion_required;
unsigned int my_high_delivered_stored = 0;
range = end_point - instance->my_high_delivered;
if (range) {
log_printf (instance->totemsrp_log_level_trace,
"Delivering %x to %x", instance->my_high_delivered,
end_point);
}
assert (range < QUEUE_RTR_ITEMS_SIZE_MAX);
my_high_delivered_stored = instance->my_high_delivered;
/*
* Deliver messages in order from rtr queue to pending delivery queue
*/
for (i = 1; i <= range; i++) {
void *ptr = 0;
/*
* If out of range of sort queue, stop assembly
*/
res = sq_in_range (&instance->regular_sort_queue,
my_high_delivered_stored + i);
if (res == 0) {
break;
}
res = sq_item_get (&instance->regular_sort_queue,
my_high_delivered_stored + i, &ptr);
/*
* If hole, stop assembly
*/
if (res != 0 && skip == 0) {
break;
}
instance->my_high_delivered = my_high_delivered_stored + i;
if (res != 0) {
continue;
}
sort_queue_item_p = ptr;
mcast_in = sort_queue_item_p->mcast;
assert (mcast_in != (struct mcast *)0xdeadbeef);
endian_conversion_required = 0;
if (mcast_in->header.endian_detector != ENDIAN_LOCAL) {
endian_conversion_required = 1;
mcast_endian_convert (mcast_in, &mcast_header);
} else {
memcpy (&mcast_header, mcast_in, sizeof (struct mcast));
}
/*
* Skip messages not originated in instance->my_deliver_memb
*/
if (skip &&
memb_set_subset (&mcast_header.system_from,
1,
instance->my_deliver_memb_list,
instance->my_deliver_memb_entries) == 0) {
instance->my_high_delivered = my_high_delivered_stored + i;
continue;
}
/*
* Message found
*/
log_printf (instance->totemsrp_log_level_trace,
"Delivering MCAST message with seq %x to pending delivery queue",
mcast_header.seq);
/*
* Message is locally originated multicast
*/
instance->totemsrp_deliver_fn (
mcast_header.header.nodeid,
((char *)sort_queue_item_p->mcast) + sizeof (struct mcast),
sort_queue_item_p->msg_len - sizeof (struct mcast),
endian_conversion_required);
}
}
/*
* recv message handler called when MCAST message type received
*/
static int message_handler_mcast (
struct totemsrp_instance *instance,
const void *msg,
size_t msg_len,
int endian_conversion_needed)
{
struct sort_queue_item sort_queue_item;
struct sq *sort_queue;
struct mcast mcast_header;
if (endian_conversion_needed) {
mcast_endian_convert (msg, &mcast_header);
} else {
memcpy (&mcast_header, msg, sizeof (struct mcast));
}
if (mcast_header.header.encapsulated == MESSAGE_ENCAPSULATED) {
sort_queue = &instance->recovery_sort_queue;
} else {
sort_queue = &instance->regular_sort_queue;
}
assert (msg_len <= FRAME_SIZE_MAX);
#ifdef TEST_DROP_MCAST_PERCENTAGE
if (random()%100 < TEST_DROP_MCAST_PERCENTAGE) {
return (0);
}
#endif
/*
* If the message is foreign execute the switch below
*/
if (memcmp (&instance->my_ring_id, &mcast_header.ring_id,
sizeof (struct memb_ring_id)) != 0) {
switch (instance->memb_state) {
case MEMB_STATE_OPERATIONAL:
memb_set_merge (
&mcast_header.system_from, 1,
instance->my_proc_list, &instance->my_proc_list_entries);
memb_state_gather_enter (instance, TOTEMSRP_GSFROM_FOREIGN_MESSAGE_IN_OPERATIONAL_STATE);
break;
case MEMB_STATE_GATHER:
if (!memb_set_subset (
&mcast_header.system_from,
1,
instance->my_proc_list,
instance->my_proc_list_entries)) {
memb_set_merge (&mcast_header.system_from, 1,
instance->my_proc_list, &instance->my_proc_list_entries);
memb_state_gather_enter (instance, TOTEMSRP_GSFROM_FOREIGN_MESSAGE_IN_GATHER_STATE);
return (0);
}
break;
case MEMB_STATE_COMMIT:
/* discard message */
instance->stats.rx_msg_dropped++;
break;
case MEMB_STATE_RECOVERY:
/* discard message */
instance->stats.rx_msg_dropped++;
break;
}
return (0);
}
log_printf (instance->totemsrp_log_level_trace,
"Received ringid(%s:%lld) seq %x",
totemip_print (&mcast_header.ring_id.rep),
mcast_header.ring_id.seq,
mcast_header.seq);
/*
* Add mcast message to rtr queue if not already in rtr queue
* otherwise free io vectors
*/
if (msg_len > 0 && msg_len <= FRAME_SIZE_MAX &&
sq_in_range (sort_queue, mcast_header.seq) &&
sq_item_inuse (sort_queue, mcast_header.seq) == 0) {
/*
* Allocate new multicast memory block
*/
// TODO LEAK
sort_queue_item.mcast = totemsrp_buffer_alloc (instance);
if (sort_queue_item.mcast == NULL) {
return (-1); /* error here is corrected by the algorithm */
}
memcpy (sort_queue_item.mcast, msg, msg_len);
sort_queue_item.msg_len = msg_len;
if (sq_lt_compare (instance->my_high_seq_received,
mcast_header.seq)) {
instance->my_high_seq_received = mcast_header.seq;
}
sq_item_add (sort_queue, &sort_queue_item, mcast_header.seq);
}
update_aru (instance);
if (instance->memb_state == MEMB_STATE_OPERATIONAL) {
messages_deliver_to_app (instance, 0, instance->my_high_seq_received);
}
/* TODO remove from retrans message queue for old ring in recovery state */
return (0);
}
static int message_handler_memb_merge_detect (
struct totemsrp_instance *instance,
const void *msg,
size_t msg_len,
int endian_conversion_needed)
{
struct memb_merge_detect memb_merge_detect;
if (endian_conversion_needed) {
memb_merge_detect_endian_convert (msg, &memb_merge_detect);
} else {
memcpy (&memb_merge_detect, msg,
sizeof (struct memb_merge_detect));
}
/*
* do nothing if this is a merge detect from this configuration
*/
if (memcmp (&instance->my_ring_id, &memb_merge_detect.ring_id,
sizeof (struct memb_ring_id)) == 0) {
return (0);
}
/*
* Execute merge operation
*/
switch (instance->memb_state) {
case MEMB_STATE_OPERATIONAL:
memb_set_merge (&memb_merge_detect.system_from, 1,
instance->my_proc_list, &instance->my_proc_list_entries);
memb_state_gather_enter (instance, TOTEMSRP_GSFROM_MERGE_DURING_OPERATIONAL_STATE);
break;
case MEMB_STATE_GATHER:
if (!memb_set_subset (
&memb_merge_detect.system_from,
1,
instance->my_proc_list,
instance->my_proc_list_entries)) {
memb_set_merge (&memb_merge_detect.system_from, 1,
instance->my_proc_list, &instance->my_proc_list_entries);
memb_state_gather_enter (instance, TOTEMSRP_GSFROM_MERGE_DURING_GATHER_STATE);
return (0);
}
break;
case MEMB_STATE_COMMIT:
/* do nothing in commit */
break;
case MEMB_STATE_RECOVERY:
/* do nothing in recovery */
break;
}
return (0);
}
static void memb_join_process (
struct totemsrp_instance *instance,
const struct memb_join *memb_join)
{
struct srp_addr *proc_list;
struct srp_addr *failed_list;
int gather_entered = 0;
int fail_minus_memb_entries = 0;
struct srp_addr fail_minus_memb[PROCESSOR_COUNT_MAX];
proc_list = (struct srp_addr *)memb_join->end_of_memb_join;
failed_list = proc_list + memb_join->proc_list_entries;
/*
memb_set_print ("proclist", proc_list, memb_join->proc_list_entries);
memb_set_print ("faillist", failed_list, memb_join->failed_list_entries);
memb_set_print ("my_proclist", instance->my_proc_list, instance->my_proc_list_entries);
memb_set_print ("my_faillist", instance->my_failed_list, instance->my_failed_list_entries);
-*/
if (memb_join->header.type == MESSAGE_TYPE_MEMB_JOIN) {
if (instance->flushing) {
if (memb_join->header.nodeid == LEAVE_DUMMY_NODEID) {
log_printf (instance->totemsrp_log_level_warning,
- "Discarding LEAVE message during flush, nodeid=%u",
+ "Discarding LEAVE message during flush, nodeid=%u",
memb_join->failed_list_entries > 0 ? failed_list[memb_join->failed_list_entries - 1 ].addr[0].nodeid : LEAVE_DUMMY_NODEID);
if (memb_join->failed_list_entries > 0) {
my_leave_memb_set(instance, failed_list[memb_join->failed_list_entries - 1 ].addr[0].nodeid);
}
} else {
log_printf (instance->totemsrp_log_level_warning,
"Discarding JOIN message during flush, nodeid=%d", memb_join->header.nodeid);
}
return;
} else {
if (memb_join->header.nodeid == LEAVE_DUMMY_NODEID) {
log_printf (instance->totemsrp_log_level_debug,
"Received LEAVE message from %u", memb_join->failed_list_entries > 0 ? failed_list[memb_join->failed_list_entries - 1 ].addr[0].nodeid : LEAVE_DUMMY_NODEID);
if (memb_join->failed_list_entries > 0) {
my_leave_memb_set(instance, failed_list[memb_join->failed_list_entries - 1 ].addr[0].nodeid);
}
}
}
-
+
}
if (memb_set_equal (proc_list,
memb_join->proc_list_entries,
instance->my_proc_list,
instance->my_proc_list_entries) &&
memb_set_equal (failed_list,
memb_join->failed_list_entries,
instance->my_failed_list,
instance->my_failed_list_entries)) {
memb_consensus_set (instance, &memb_join->system_from);
if (memb_consensus_agreed (instance) && instance->failed_to_recv == 1) {
instance->failed_to_recv = 0;
srp_addr_copy (&instance->my_proc_list[0],
&instance->my_id);
instance->my_proc_list_entries = 1;
instance->my_failed_list_entries = 0;
memb_state_commit_token_create (instance);
memb_state_commit_enter (instance);
return;
}
if (memb_consensus_agreed (instance) &&
memb_lowest_in_config (instance)) {
memb_state_commit_token_create (instance);
memb_state_commit_enter (instance);
} else {
goto out;
}
} else
if (memb_set_subset (proc_list,
memb_join->proc_list_entries,
instance->my_proc_list,
instance->my_proc_list_entries) &&
memb_set_subset (failed_list,
memb_join->failed_list_entries,
instance->my_failed_list,
instance->my_failed_list_entries)) {
goto out;
} else
if (memb_set_subset (&memb_join->system_from, 1,
instance->my_failed_list, instance->my_failed_list_entries)) {
goto out;
} else {
memb_set_merge (proc_list,
memb_join->proc_list_entries,
instance->my_proc_list, &instance->my_proc_list_entries);
if (memb_set_subset (
&instance->my_id, 1,
failed_list, memb_join->failed_list_entries)) {
memb_set_merge (
&memb_join->system_from, 1,
instance->my_failed_list, &instance->my_failed_list_entries);
} else {
if (memb_set_subset (
&memb_join->system_from, 1,
instance->my_memb_list,
instance->my_memb_entries)) {
if (memb_set_subset (
&memb_join->system_from, 1,
instance->my_failed_list,
instance->my_failed_list_entries) == 0) {
memb_set_merge (failed_list,
memb_join->failed_list_entries,
instance->my_failed_list, &instance->my_failed_list_entries);
} else {
memb_set_subtract (fail_minus_memb,
&fail_minus_memb_entries,
failed_list,
memb_join->failed_list_entries,
instance->my_memb_list,
instance->my_memb_entries);
memb_set_merge (fail_minus_memb,
fail_minus_memb_entries,
instance->my_failed_list,
&instance->my_failed_list_entries);
}
}
}
memb_state_gather_enter (instance, TOTEMSRP_GSFROM_MERGE_DURING_JOIN);
gather_entered = 1;
}
out:
if (gather_entered == 0 &&
instance->memb_state == MEMB_STATE_OPERATIONAL) {
memb_state_gather_enter (instance, TOTEMSRP_GSFROM_JOIN_DURING_OPERATIONAL_STATE);
}
}
static void memb_join_endian_convert (const struct memb_join *in, struct memb_join *out)
{
int i;
struct srp_addr *in_proc_list;
struct srp_addr *in_failed_list;
struct srp_addr *out_proc_list;
struct srp_addr *out_failed_list;
out->header.type = in->header.type;
out->header.endian_detector = ENDIAN_LOCAL;
out->header.nodeid = swab32 (in->header.nodeid);
srp_addr_copy_endian_convert (&out->system_from, &in->system_from);
out->proc_list_entries = swab32 (in->proc_list_entries);
out->failed_list_entries = swab32 (in->failed_list_entries);
out->ring_seq = swab64 (in->ring_seq);
in_proc_list = (struct srp_addr *)in->end_of_memb_join;
in_failed_list = in_proc_list + out->proc_list_entries;
out_proc_list = (struct srp_addr *)out->end_of_memb_join;
out_failed_list = out_proc_list + out->proc_list_entries;
for (i = 0; i < out->proc_list_entries; i++) {
srp_addr_copy_endian_convert (&out_proc_list[i], &in_proc_list[i]);
}
for (i = 0; i < out->failed_list_entries; i++) {
srp_addr_copy_endian_convert (&out_failed_list[i], &in_failed_list[i]);
}
}
static void memb_commit_token_endian_convert (const struct memb_commit_token *in, struct memb_commit_token *out)
{
int i;
struct srp_addr *in_addr = (struct srp_addr *)in->end_of_commit_token;
struct srp_addr *out_addr = (struct srp_addr *)out->end_of_commit_token;
struct memb_commit_token_memb_entry *in_memb_list;
struct memb_commit_token_memb_entry *out_memb_list;
out->header.type = in->header.type;
out->header.endian_detector = ENDIAN_LOCAL;
out->header.nodeid = swab32 (in->header.nodeid);
out->token_seq = swab32 (in->token_seq);
totemip_copy_endian_convert(&out->ring_id.rep, &in->ring_id.rep);
out->ring_id.seq = swab64 (in->ring_id.seq);
out->retrans_flg = swab32 (in->retrans_flg);
out->memb_index = swab32 (in->memb_index);
out->addr_entries = swab32 (in->addr_entries);
in_memb_list = (struct memb_commit_token_memb_entry *)(in_addr + out->addr_entries);
out_memb_list = (struct memb_commit_token_memb_entry *)(out_addr + out->addr_entries);
for (i = 0; i < out->addr_entries; i++) {
srp_addr_copy_endian_convert (&out_addr[i], &in_addr[i]);
/*
* Only convert the memb entry if it has been set
*/
if (in_memb_list[i].ring_id.rep.family != 0) {
totemip_copy_endian_convert (&out_memb_list[i].ring_id.rep,
&in_memb_list[i].ring_id.rep);
out_memb_list[i].ring_id.seq =
swab64 (in_memb_list[i].ring_id.seq);
out_memb_list[i].aru = swab32 (in_memb_list[i].aru);
out_memb_list[i].high_delivered = swab32 (in_memb_list[i].high_delivered);
out_memb_list[i].received_flg = swab32 (in_memb_list[i].received_flg);
}
}
}
static void orf_token_endian_convert (const struct orf_token *in, struct orf_token *out)
{
int i;
out->header.type = in->header.type;
out->header.endian_detector = ENDIAN_LOCAL;
out->header.nodeid = swab32 (in->header.nodeid);
out->seq = swab32 (in->seq);
out->token_seq = swab32 (in->token_seq);
out->aru = swab32 (in->aru);
totemip_copy_endian_convert(&out->ring_id.rep, &in->ring_id.rep);
out->aru_addr = swab32(in->aru_addr);
out->ring_id.seq = swab64 (in->ring_id.seq);
out->fcc = swab32 (in->fcc);
out->backlog = swab32 (in->backlog);
out->retrans_flg = swab32 (in->retrans_flg);
out->rtr_list_entries = swab32 (in->rtr_list_entries);
for (i = 0; i < out->rtr_list_entries; i++) {
totemip_copy_endian_convert(&out->rtr_list[i].ring_id.rep, &in->rtr_list[i].ring_id.rep);
out->rtr_list[i].ring_id.seq = swab64 (in->rtr_list[i].ring_id.seq);
out->rtr_list[i].seq = swab32 (in->rtr_list[i].seq);
}
}
static void mcast_endian_convert (const struct mcast *in, struct mcast *out)
{
out->header.type = in->header.type;
out->header.endian_detector = ENDIAN_LOCAL;
out->header.nodeid = swab32 (in->header.nodeid);
out->header.encapsulated = in->header.encapsulated;
out->seq = swab32 (in->seq);
out->this_seqno = swab32 (in->this_seqno);
totemip_copy_endian_convert(&out->ring_id.rep, &in->ring_id.rep);
out->ring_id.seq = swab64 (in->ring_id.seq);
out->node_id = swab32 (in->node_id);
out->guarantee = swab32 (in->guarantee);
srp_addr_copy_endian_convert (&out->system_from, &in->system_from);
}
static void memb_merge_detect_endian_convert (
const struct memb_merge_detect *in,
struct memb_merge_detect *out)
{
out->header.type = in->header.type;
out->header.endian_detector = ENDIAN_LOCAL;
out->header.nodeid = swab32 (in->header.nodeid);
totemip_copy_endian_convert(&out->ring_id.rep, &in->ring_id.rep);
out->ring_id.seq = swab64 (in->ring_id.seq);
srp_addr_copy_endian_convert (&out->system_from, &in->system_from);
}
static int ignore_join_under_operational (
struct totemsrp_instance *instance,
const struct memb_join *memb_join)
{
struct srp_addr *proc_list;
struct srp_addr *failed_list;
unsigned long long ring_seq;
proc_list = (struct srp_addr *)memb_join->end_of_memb_join;
failed_list = proc_list + memb_join->proc_list_entries;
ring_seq = memb_join->ring_seq;
if (memb_set_subset (&instance->my_id, 1,
failed_list, memb_join->failed_list_entries)) {
return (1);
}
/*
* In operational state, my_proc_list is exactly the same as
* my_memb_list.
*/
if ((memb_set_subset (&memb_join->system_from, 1,
instance->my_memb_list, instance->my_memb_entries)) &&
(ring_seq < instance->my_ring_id.seq)) {
return (1);
}
return (0);
}
static int message_handler_memb_join (
struct totemsrp_instance *instance,
const void *msg,
size_t msg_len,
int endian_conversion_needed)
{
const struct memb_join *memb_join;
struct memb_join *memb_join_convert = alloca (msg_len);
if (endian_conversion_needed) {
memb_join = memb_join_convert;
memb_join_endian_convert (msg, memb_join_convert);
} else {
memb_join = msg;
}
/*
* If the process paused because it wasn't scheduled in a timely
* fashion, flush the join messages because they may be queued
* entries
*/
if (pause_flush (instance)) {
return (0);
}
if (instance->token_ring_id_seq < memb_join->ring_seq) {
instance->token_ring_id_seq = memb_join->ring_seq;
}
switch (instance->memb_state) {
case MEMB_STATE_OPERATIONAL:
if (!ignore_join_under_operational (instance, memb_join)) {
memb_join_process (instance, memb_join);
}
break;
case MEMB_STATE_GATHER:
memb_join_process (instance, memb_join);
break;
case MEMB_STATE_COMMIT:
if (memb_set_subset (&memb_join->system_from,
1,
instance->my_new_memb_list,
instance->my_new_memb_entries) &&
memb_join->ring_seq >= instance->my_ring_id.seq) {
memb_join_process (instance, memb_join);
memb_state_gather_enter (instance, TOTEMSRP_GSFROM_JOIN_DURING_COMMIT_STATE);
}
break;
case MEMB_STATE_RECOVERY:
if (memb_set_subset (&memb_join->system_from,
1,
instance->my_new_memb_list,
instance->my_new_memb_entries) &&
memb_join->ring_seq >= instance->my_ring_id.seq) {
memb_join_process (instance, memb_join);
memb_recovery_state_token_loss (instance);
memb_state_gather_enter (instance, TOTEMSRP_GSFROM_JOIN_DURING_RECOVERY);
}
break;
}
return (0);
}
static int message_handler_memb_commit_token (
struct totemsrp_instance *instance,
const void *msg,
size_t msg_len,
int endian_conversion_needed)
{
struct memb_commit_token *memb_commit_token_convert = alloca (msg_len);
struct memb_commit_token *memb_commit_token;
struct srp_addr sub[PROCESSOR_COUNT_MAX];
int sub_entries;
struct srp_addr *addr;
log_printf (instance->totemsrp_log_level_debug,
"got commit token");
if (endian_conversion_needed) {
memb_commit_token_endian_convert (msg, memb_commit_token_convert);
} else {
memcpy (memb_commit_token_convert, msg, msg_len);
}
memb_commit_token = memb_commit_token_convert;
addr = (struct srp_addr *)memb_commit_token->end_of_commit_token;
#ifdef TEST_DROP_COMMIT_TOKEN_PERCENTAGE
if (random()%100 < TEST_DROP_COMMIT_TOKEN_PERCENTAGE) {
return (0);
}
#endif
switch (instance->memb_state) {
case MEMB_STATE_OPERATIONAL:
/* discard token */
break;
case MEMB_STATE_GATHER:
memb_set_subtract (sub, &sub_entries,
instance->my_proc_list, instance->my_proc_list_entries,
instance->my_failed_list, instance->my_failed_list_entries);
if (memb_set_equal (addr,
memb_commit_token->addr_entries,
sub,
sub_entries) &&
memb_commit_token->ring_id.seq > instance->my_ring_id.seq) {
memcpy (instance->commit_token, memb_commit_token, msg_len);
memb_state_commit_enter (instance);
}
break;
case MEMB_STATE_COMMIT:
/*
* If retransmitted commit tokens are sent on this ring
* filter them out and only enter recovery once the
* commit token has traversed the array. This is
* determined by :
* memb_commit_token->memb_index == memb_commit_token->addr_entries) {
*/
if (memb_commit_token->ring_id.seq == instance->my_ring_id.seq &&
memb_commit_token->memb_index == memb_commit_token->addr_entries) {
memb_state_recovery_enter (instance, memb_commit_token);
}
break;
case MEMB_STATE_RECOVERY:
if (totemip_equal (&instance->my_id.addr[0], &instance->my_ring_id.rep)) {
/* Filter out duplicated tokens */
if (instance->originated_orf_token) {
break;
}
instance->originated_orf_token = 1;
log_printf (instance->totemsrp_log_level_debug,
"Sending initial ORF token");
// TODO convert instead of initiate
orf_token_send_initial (instance);
reset_token_timeout (instance); // REVIEWED
reset_token_retransmit_timeout (instance); // REVIEWED
}
break;
}
return (0);
}
static int message_handler_token_hold_cancel (
struct totemsrp_instance *instance,
const void *msg,
size_t msg_len,
int endian_conversion_needed)
{
const struct token_hold_cancel *token_hold_cancel = msg;
if (memcmp (&token_hold_cancel->ring_id, &instance->my_ring_id,
sizeof (struct memb_ring_id)) == 0) {
instance->my_seq_unchanged = 0;
if (totemip_equal(&instance->my_ring_id.rep, &instance->my_id.addr[0])) {
timer_function_token_retransmit_timeout (instance);
}
}
return (0);
}
void main_deliver_fn (
void *context,
const void *msg,
unsigned int msg_len)
{
struct totemsrp_instance *instance = context;
- const struct message_header *message_header = msg;
+ const struct totem_message_header *message_header = msg;
- if (msg_len < sizeof (struct message_header)) {
+ if (msg_len < sizeof (struct totem_message_header)) {
log_printf (instance->totemsrp_log_level_security,
"Received message is too short... ignoring %u.",
(unsigned int)msg_len);
return;
}
switch (message_header->type) {
case MESSAGE_TYPE_ORF_TOKEN:
instance->stats.orf_token_rx++;
break;
case MESSAGE_TYPE_MCAST:
instance->stats.mcast_rx++;
break;
case MESSAGE_TYPE_MEMB_MERGE_DETECT:
instance->stats.memb_merge_detect_rx++;
break;
case MESSAGE_TYPE_MEMB_JOIN:
instance->stats.memb_join_rx++;
break;
case MESSAGE_TYPE_MEMB_COMMIT_TOKEN:
instance->stats.memb_commit_token_rx++;
break;
case MESSAGE_TYPE_TOKEN_HOLD_CANCEL:
instance->stats.token_hold_cancel_rx++;
break;
default:
log_printf (instance->totemsrp_log_level_security, "Type of received message is wrong... ignoring %d.\n", (int)message_header->type);
printf ("wrong message type\n");
instance->stats.rx_msg_dropped++;
return;
}
/*
* Handle incoming message
*/
totemsrp_message_handlers.handler_functions[(int)message_header->type] (
instance,
msg,
msg_len,
message_header->endian_detector != ENDIAN_LOCAL);
}
void main_iface_change_fn (
void *context,
const struct totem_ip_address *iface_addr,
unsigned int iface_no)
{
struct totemsrp_instance *instance = context;
int i;
totemip_copy (&instance->my_id.addr[iface_no], iface_addr);
assert (instance->my_id.addr[iface_no].nodeid);
totemip_copy (&instance->my_memb_list[0].addr[iface_no], iface_addr);
if (instance->iface_changes++ == 0) {
instance->memb_ring_id_create_or_load (&instance->my_ring_id,
&instance->my_id.addr[0]);
instance->token_ring_id_seq = instance->my_ring_id.seq;
log_printf (
instance->totemsrp_log_level_debug,
"Created or loaded sequence id %llx.%s for this ring.",
instance->my_ring_id.seq,
totemip_print (&instance->my_ring_id.rep));
if (instance->totemsrp_service_ready_fn) {
instance->totemsrp_service_ready_fn ();
}
}
for (i = 0; i < instance->totem_config->interfaces[iface_no].member_count; i++) {
totemsrp_member_add (instance,
&instance->totem_config->interfaces[iface_no].member_list[i],
iface_no);
}
if (instance->iface_changes >= instance->totem_config->interface_count) {
memb_state_gather_enter (instance, TOTEMSRP_GSFROM_INTERFACE_CHANGE);
}
}
void totemsrp_net_mtu_adjust (struct totem_config *totem_config) {
totem_config->net_mtu -= sizeof (struct mcast);
}
void totemsrp_service_ready_register (
void *context,
void (*totem_service_ready) (void))
{
struct totemsrp_instance *instance = (struct totemsrp_instance *)context;
instance->totemsrp_service_ready_fn = totem_service_ready;
}
int totemsrp_member_add (
void *context,
const struct totem_ip_address *member,
- int ring_no)
+ int link_no)
{
struct totemsrp_instance *instance = (struct totemsrp_instance *)context;
int res;
- res = totemrrp_member_add (instance->totemrrp_context, member, ring_no);
+ res = totemnet_member_add (instance->totemnet_context, &instance->my_id.addr[link_no], member, link_no);
return (res);
}
int totemsrp_member_remove (
void *context,
const struct totem_ip_address *member,
- int ring_no)
+ int link_no)
{
struct totemsrp_instance *instance = (struct totemsrp_instance *)context;
int res;
- res = totemrrp_member_remove (instance->totemrrp_context, member, ring_no);
+ res = totemnet_member_remove (instance->totemnet_context, member, link_no);
return (res);
}
void totemsrp_threaded_mode_enable (void *context)
{
struct totemsrp_instance *instance = (struct totemsrp_instance *)context;
instance->threaded_mode_enabled = 1;
}
void totemsrp_trans_ack (void *context)
{
struct totemsrp_instance *instance = (struct totemsrp_instance *)context;
instance->waiting_trans_ack = 0;
instance->totemsrp_waiting_trans_ack_cb_fn (0);
}
diff --git a/exec/totemsrp.h b/exec/totemsrp.h
index 185276f2..4930c937 100644
--- a/exec/totemsrp.h
+++ b/exec/totemsrp.h
@@ -1,146 +1,146 @@
/*
* Copyright (c) 2003-2005 MontaVista Software, Inc.
* Copyright (c) 2006-2011 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Steven Dake (sdake@redhat.com)
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
/**
* @file
* Totem Single Ring Protocol
*
* depends on poll abstraction, POSIX, IPV4
*/
#ifndef TOTEMSRP_H_DEFINED
#define TOTEMSRP_H_DEFINED
#include <corosync/totem/totem.h>
#include <qb/qbloop.h>
/**
* Create a protocol instance
*/
int totemsrp_initialize (
qb_loop_t *poll_handle,
void **srp_context,
struct totem_config *totem_config,
- totemmrp_stats_t *stats,
+ totempg_stats_t *stats,
void (*deliver_fn) (
unsigned int nodeid,
const void *msg,
unsigned int msg_len,
int endian_conversion_required),
void (*confchg_fn) (
enum totem_configuration_type configuration_type,
const unsigned int *member_list, size_t member_list_entries,
const unsigned int *left_list, size_t left_list_entries,
const unsigned int *joined_list, size_t joined_list_entries,
const struct memb_ring_id *ring_id),
void (*waiting_trans_ack_cb_fn) (
int waiting_trans_ack));
void totemsrp_finalize (void *srp_context);
/**
* Multicast a message
*/
int totemsrp_mcast (
void *srp_context,
struct iovec *iovec,
unsigned int iov_len,
int priority);
/**
* Return number of available messages that can be queued
*/
int totemsrp_avail (void *srp_context);
int totemsrp_callback_token_create (
void *srp_context,
void **handle_out,
enum totem_callback_token_type type,
int delete,
int (*callback_fn) (enum totem_callback_token_type type, const void *),
const void *data);
void totemsrp_callback_token_destroy (
void *srp_context,
void **handle_out);
void totemsrp_event_signal (void *srp_context, enum totem_event_type type, int value);
extern void totemsrp_net_mtu_adjust (struct totem_config *totem_config);
extern int totemsrp_ifaces_get (
void *srp_context,
unsigned int nodeid,
struct totem_ip_address *interfaces,
unsigned int interfaces_size,
char ***status,
unsigned int *iface_count);
extern unsigned int totemsrp_my_nodeid_get (
void *srp_context);
extern int totemsrp_my_family_get (
void *srp_context);
extern int totemsrp_crypto_set (
void *srp_context,
const char *cipher_type,
const char *hash_type);
extern int totemsrp_ring_reenable (
void *srp_context);
void totemsrp_service_ready_register (
void *srp_context,
void (*totem_service_ready) (void));
extern int totemsrp_member_add (
void *srp_context,
const struct totem_ip_address *member,
int ring_no);
extern int totemsrp_member_remove (
void *srp_context,
const struct totem_ip_address *member,
int ring_no);
void totemsrp_threaded_mode_enable (
void *srp_context);
void totemsrp_trans_ack (
void *srp_context);
#endif /* TOTEMSRP_H_DEFINED */
diff --git a/exec/totemudp.c b/exec/totemudp.c
index 31d05704..c103007d 100644
--- a/exec/totemudp.c
+++ b/exec/totemudp.c
@@ -1,1429 +1,1366 @@
/*
* Copyright (c) 2005 MontaVista Software, Inc.
* Copyright (c) 2006-2012 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Steven Dake (sdake@redhat.com)
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <config.h>
#include <assert.h>
#include <pthread.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/socket.h>
#include <netdb.h>
#include <sys/un.h>
#include <sys/ioctl.h>
#include <sys/param.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <unistd.h>
#include <fcntl.h>
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
#include <sched.h>
#include <time.h>
#include <sys/time.h>
#include <sys/poll.h>
#include <sys/uio.h>
#include <limits.h>
#include <corosync/sq.h>
#include <corosync/swab.h>
#include <corosync/list.h>
#include <qb/qbdefs.h>
#include <qb/qbloop.h>
#define LOGSYS_UTILS_ONLY 1
#include <corosync/logsys.h>
#include "totemudp.h"
#include "util.h"
-#include "totemcrypto.h"
#include <nss.h>
#include <pk11pub.h>
#include <pkcs11.h>
#include <prerror.h>
#ifndef MSG_NOSIGNAL
#define MSG_NOSIGNAL 0
#endif
#define MCAST_SOCKET_BUFFER_SIZE (TRANSMITS_ALLOWED * FRAME_SIZE_MAX)
#define NETIF_STATE_REPORT_UP 1
#define NETIF_STATE_REPORT_DOWN 2
#define BIND_STATE_UNBOUND 0
#define BIND_STATE_REGULAR 1
#define BIND_STATE_LOOPBACK 2
#define MESSAGE_TYPE_MEMB_JOIN 3
struct totemudp_socket {
int mcast_recv;
int mcast_send;
int token;
/*
* Socket used for local multicast delivery. We don't rely on multicast
* loop and rather this UNIX DGRAM socket is used. Socket is created by
* socketpair call and they are used in same way as pipe (so [0] is read
* end and [1] is write end)
*/
int local_mcast_loop[2];
};
struct totemudp_instance {
- struct crypto_instance *crypto_inst;
-
qb_loop_t *totemudp_poll_handle;
struct totem_interface *totem_interface;
int netif_state_report;
int netif_bind_state;
void *context;
void (*totemudp_deliver_fn) (
void *context,
const void *msg,
unsigned int msg_len);
void (*totemudp_iface_change_fn) (
void *context,
- const struct totem_ip_address *iface_address);
+ const struct totem_ip_address *iface_address,
+ unsigned int ring_no);
void (*totemudp_target_set_completed) (void *context);
/*
* Function and data used to log messages
*/
int totemudp_log_level_security;
int totemudp_log_level_error;
int totemudp_log_level_warning;
int totemudp_log_level_notice;
int totemudp_log_level_debug;
int totemudp_subsys_id;
void (*totemudp_log_printf) (
int level,
int subsys,
const char *function,
const char *file,
int line,
const char *format,
...)__attribute__((format(printf, 6, 7)));
void *udp_context;
char iov_buffer[FRAME_SIZE_MAX];
char iov_buffer_flush[FRAME_SIZE_MAX];
struct iovec totemudp_iov_recv;
struct iovec totemudp_iov_recv_flush;
struct totemudp_socket totemudp_sockets;
struct totem_ip_address mcast_address;
int stats_sent;
int stats_recv;
int stats_delv;
int stats_remcasts;
int stats_orf_token;
struct timeval stats_tv_start;
struct totem_ip_address my_id;
int firstrun;
qb_loop_timer_handle timer_netif_check_timeout;
unsigned int my_memb_entries;
int flushing;
struct totem_config *totem_config;
totemsrp_stats_t *stats;
struct totem_ip_address token_target;
};
struct work_item {
const void *msg;
unsigned int msg_len;
struct totemudp_instance *instance;
};
static int totemudp_build_sockets (
struct totemudp_instance *instance,
struct totem_ip_address *bindnet_address,
struct totem_ip_address *mcastaddress,
struct totemudp_socket *sockets,
struct totem_ip_address *bound_to);
static struct totem_ip_address localhost;
static void totemudp_instance_initialize (struct totemudp_instance *instance)
{
memset (instance, 0, sizeof (struct totemudp_instance));
instance->netif_state_report = NETIF_STATE_REPORT_UP | NETIF_STATE_REPORT_DOWN;
instance->totemudp_iov_recv.iov_base = instance->iov_buffer;
instance->totemudp_iov_recv.iov_len = FRAME_SIZE_MAX; //sizeof (instance->iov_buffer);
instance->totemudp_iov_recv_flush.iov_base = instance->iov_buffer_flush;
instance->totemudp_iov_recv_flush.iov_len = FRAME_SIZE_MAX; //sizeof (instance->iov_buffer);
/*
* There is always atleast 1 processor
*/
instance->my_memb_entries = 1;
}
#define log_printf(level, format, args...) \
do { \
instance->totemudp_log_printf ( \
level, instance->totemudp_subsys_id, \
__FUNCTION__, __FILE__, __LINE__, \
(const char *)format, ##args); \
} while (0);
#define LOGSYS_PERROR(err_num, level, fmt, args...) \
do { \
char _error_str[LOGSYS_MAX_PERROR_MSG_LEN]; \
const char *_error_ptr = qb_strerror_r(err_num, _error_str, sizeof(_error_str)); \
instance->totemudp_log_printf ( \
level, instance->totemudp_subsys_id, \
__FUNCTION__, __FILE__, __LINE__, \
fmt ": %s (%d)\n", ##args, _error_ptr, err_num); \
} while(0)
int totemudp_crypto_set (
void *udp_context,
const char *cipher_type,
const char *hash_type)
{
return (0);
}
static inline void ucast_sendmsg (
struct totemudp_instance *instance,
struct totem_ip_address *system_to,
const void *msg,
unsigned int msg_len)
{
struct msghdr msg_ucast;
int res = 0;
- size_t buf_out_len;
- unsigned char buf_out[FRAME_SIZE_MAX];
struct sockaddr_storage sockaddr;
struct iovec iovec;
int addrlen;
- /*
- * Encrypt and digest the message
- */
- if (crypto_encrypt_and_sign (
- instance->crypto_inst,
- (const unsigned char *)msg,
- msg_len,
- buf_out,
- &buf_out_len) != 0) {
- log_printf(LOGSYS_LEVEL_CRIT, "Error encrypting/signing packet (non-critical)");
- return;
- }
-
- iovec.iov_base = (void *)buf_out;
- iovec.iov_len = buf_out_len;
+ iovec.iov_base = (void*)msg;
+ iovec.iov_len = msg_len;
/*
* Build unicast message
*/
memset(&msg_ucast, 0, sizeof(msg_ucast));
totemip_totemip_to_sockaddr_convert(system_to,
instance->totem_interface->ip_port, &sockaddr, &addrlen);
msg_ucast.msg_name = &sockaddr;
msg_ucast.msg_namelen = addrlen;
msg_ucast.msg_iov = (void *)&iovec;
msg_ucast.msg_iovlen = 1;
#ifdef HAVE_MSGHDR_CONTROL
msg_ucast.msg_control = 0;
#endif
#ifdef HAVE_MSGHDR_CONTROLLEN
msg_ucast.msg_controllen = 0;
#endif
#ifdef HAVE_MSGHDR_FLAGS
msg_ucast.msg_flags = 0;
#endif
#ifdef HAVE_MSGHDR_ACCRIGHTS
msg_ucast.msg_accrights = NULL;
#endif
#ifdef HAVE_MSGHDR_ACCRIGHTSLEN
msg_ucast.msg_accrightslen = 0;
#endif
/*
* Transmit unicast message
* An error here is recovered by totemsrp
*/
res = sendmsg (instance->totemudp_sockets.mcast_send, &msg_ucast,
MSG_NOSIGNAL);
if (res < 0) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_debug,
"sendmsg(ucast) failed (non-critical)");
}
}
static inline void mcast_sendmsg (
struct totemudp_instance *instance,
const void *msg,
unsigned int msg_len)
{
struct msghdr msg_mcast;
int res = 0;
- size_t buf_out_len;
- unsigned char buf_out[FRAME_SIZE_MAX];
struct iovec iovec;
struct sockaddr_storage sockaddr;
int addrlen;
- /*
- * Encrypt and digest the message
- */
- if (crypto_encrypt_and_sign (
- instance->crypto_inst,
- (const unsigned char *)msg,
- msg_len,
- buf_out,
- &buf_out_len) != 0) {
- log_printf(LOGSYS_LEVEL_CRIT, "Error encrypting/signing packet (non-critical)");
- return;
- }
-
- iovec.iov_base = (void *)&buf_out;
- iovec.iov_len = buf_out_len;
+ iovec.iov_base = (void *)msg;
+ iovec.iov_len = msg_len;
/*
* Build multicast message
*/
totemip_totemip_to_sockaddr_convert(&instance->mcast_address,
instance->totem_interface->ip_port, &sockaddr, &addrlen);
memset(&msg_mcast, 0, sizeof(msg_mcast));
msg_mcast.msg_name = &sockaddr;
msg_mcast.msg_namelen = addrlen;
msg_mcast.msg_iov = (void *)&iovec;
msg_mcast.msg_iovlen = 1;
#ifdef HAVE_MSGHDR_CONTROL
msg_mcast.msg_control = 0;
#endif
#ifdef HAVE_MSGHDR_CONTROLLEN
msg_mcast.msg_controllen = 0;
#endif
#ifdef HAVE_MSGHDR_FLAGS
msg_mcast.msg_flags = 0;
#endif
#ifdef HAVE_MSGHDR_ACCRIGHTS
msg_mcast.msg_accrights = NULL;
#endif
#ifdef HAVE_MSGHDR_ACCRIGHTSLEN
msg_mcast.msg_accrightslen = 0;
#endif
/*
* Transmit multicast message
* An error here is recovered by totemsrp
*/
res = sendmsg (instance->totemudp_sockets.mcast_send, &msg_mcast,
MSG_NOSIGNAL);
if (res < 0) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_debug,
"sendmsg(mcast) failed (non-critical)");
instance->stats->continuous_sendmsg_failures++;
} else {
instance->stats->continuous_sendmsg_failures = 0;
}
/*
* Transmit multicast message to local unix mcast loop
* An error here is recovered by totemsrp
*/
msg_mcast.msg_name = NULL;
msg_mcast.msg_namelen = 0;
res = sendmsg (instance->totemudp_sockets.local_mcast_loop[1], &msg_mcast,
MSG_NOSIGNAL);
if (res < 0) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_debug,
"sendmsg(local mcast loop) failed (non-critical)");
}
}
int totemudp_finalize (
void *udp_context)
{
struct totemudp_instance *instance = (struct totemudp_instance *)udp_context;
int res = 0;
if (instance->totemudp_sockets.mcast_recv > 0) {
qb_loop_poll_del (instance->totemudp_poll_handle,
instance->totemudp_sockets.mcast_recv);
close (instance->totemudp_sockets.mcast_recv);
}
if (instance->totemudp_sockets.mcast_send > 0) {
close (instance->totemudp_sockets.mcast_send);
}
if (instance->totemudp_sockets.local_mcast_loop[0] > 0) {
qb_loop_poll_del (instance->totemudp_poll_handle,
instance->totemudp_sockets.local_mcast_loop[0]);
close (instance->totemudp_sockets.local_mcast_loop[0]);
close (instance->totemudp_sockets.local_mcast_loop[1]);
}
if (instance->totemudp_sockets.token > 0) {
qb_loop_poll_del (instance->totemudp_poll_handle,
instance->totemudp_sockets.token);
close (instance->totemudp_sockets.token);
}
return (res);
}
/*
* Only designed to work with a message with one iov
*/
static int net_deliver_fn (
int fd,
int revents,
void *data)
{
struct totemudp_instance *instance = (struct totemudp_instance *)data;
struct msghdr msg_recv;
struct iovec *iovec;
struct sockaddr_storage system_from;
int bytes_received;
- int res = 0;
char *message_type;
if (instance->flushing == 1) {
iovec = &instance->totemudp_iov_recv_flush;
} else {
iovec = &instance->totemudp_iov_recv;
}
/*
* Receive datagram
*/
msg_recv.msg_name = &system_from;
msg_recv.msg_namelen = sizeof (struct sockaddr_storage);
msg_recv.msg_iov = iovec;
msg_recv.msg_iovlen = 1;
#ifdef HAVE_MSGHDR_CONTROL
msg_recv.msg_control = 0;
#endif
#ifdef HAVE_MSGHDR_CONTROLLEN
msg_recv.msg_controllen = 0;
#endif
#ifdef HAVE_MSGHDR_FLAGS
msg_recv.msg_flags = 0;
#endif
#ifdef HAVE_MSGHDR_ACCRIGHTS
msg_recv.msg_accrights = NULL;
#endif
#ifdef HAVE_MSGHDR_ACCRIGHTSLEN
msg_recv.msg_accrightslen = 0;
#endif
bytes_received = recvmsg (fd, &msg_recv, MSG_NOSIGNAL | MSG_DONTWAIT);
if (bytes_received == -1) {
return (0);
} else {
instance->stats_recv += bytes_received;
}
- /*
- * Authenticate and if authenticated, decrypt datagram
- */
- res = crypto_authenticate_and_decrypt (instance->crypto_inst, iovec->iov_base, &bytes_received);
- if (res == -1) {
- log_printf (instance->totemudp_log_level_security, "Received message has invalid digest... ignoring.");
- log_printf (instance->totemudp_log_level_security,
- "Invalid packet data");
- iovec->iov_len = FRAME_SIZE_MAX;
- return 0;
- }
iovec->iov_len = bytes_received;
/*
* Drop all non-mcast messages (more specifically join
* messages should be dropped)
*/
message_type = (char *)iovec->iov_base;
if (instance->flushing == 1 && *message_type == MESSAGE_TYPE_MEMB_JOIN) {
log_printf(instance->totemudp_log_level_warning, "JOIN or LEAVE message was thrown away during flush operation.");
iovec->iov_len = FRAME_SIZE_MAX;
return (0);
}
/*
* Handle incoming message
*/
instance->totemudp_deliver_fn (
instance->context,
iovec->iov_base,
iovec->iov_len);
iovec->iov_len = FRAME_SIZE_MAX;
return (0);
}
static int netif_determine (
struct totemudp_instance *instance,
struct totem_ip_address *bindnet,
struct totem_ip_address *bound_to,
int *interface_up,
int *interface_num)
{
int res;
res = totemip_iface_check (bindnet, bound_to,
interface_up, interface_num,
instance->totem_config->clear_node_high_bit);
return (res);
}
/*
* If the interface is up, the sockets for totem are built. If the interface is down
* this function is requeued in the timer list to retry building the sockets later.
*/
static void timer_function_netif_check_timeout (
void *data)
{
struct totemudp_instance *instance = (struct totemudp_instance *)data;
int interface_up;
int interface_num;
struct totem_ip_address *bind_address;
/*
* Build sockets for every interface
*/
netif_determine (instance,
&instance->totem_interface->bindnet,
&instance->totem_interface->boundto,
&interface_up, &interface_num);
/*
* If the network interface isn't back up and we are already
* in loopback mode, add timer to check again and return
*/
if ((instance->netif_bind_state == BIND_STATE_LOOPBACK &&
interface_up == 0) ||
(instance->my_memb_entries == 1 &&
instance->netif_bind_state == BIND_STATE_REGULAR &&
interface_up == 1)) {
qb_loop_timer_add (instance->totemudp_poll_handle,
QB_LOOP_MED,
instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC,
(void *)instance,
timer_function_netif_check_timeout,
&instance->timer_netif_check_timeout);
/*
* Add a timer to check for a downed regular interface
*/
return;
}
if (instance->totemudp_sockets.mcast_recv > 0) {
qb_loop_poll_del (instance->totemudp_poll_handle,
instance->totemudp_sockets.mcast_recv);
close (instance->totemudp_sockets.mcast_recv);
}
if (instance->totemudp_sockets.mcast_send > 0) {
close (instance->totemudp_sockets.mcast_send);
}
if (instance->totemudp_sockets.local_mcast_loop[0] > 0) {
qb_loop_poll_del (instance->totemudp_poll_handle,
instance->totemudp_sockets.local_mcast_loop[0]);
close (instance->totemudp_sockets.local_mcast_loop[0]);
close (instance->totemudp_sockets.local_mcast_loop[1]);
}
if (instance->totemudp_sockets.token > 0) {
qb_loop_poll_del (instance->totemudp_poll_handle,
instance->totemudp_sockets.token);
close (instance->totemudp_sockets.token);
}
if (interface_up == 0) {
/*
* Interface is not up
*/
instance->netif_bind_state = BIND_STATE_LOOPBACK;
bind_address = &localhost;
/*
* Add a timer to retry building interfaces and request memb_gather_enter
*/
qb_loop_timer_add (instance->totemudp_poll_handle,
QB_LOOP_MED,
instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC,
(void *)instance,
timer_function_netif_check_timeout,
&instance->timer_netif_check_timeout);
} else {
/*
* Interface is up
*/
instance->netif_bind_state = BIND_STATE_REGULAR;
bind_address = &instance->totem_interface->bindnet;
}
/*
* Create and bind the multicast and unicast sockets
*/
(void)totemudp_build_sockets (instance,
&instance->mcast_address,
bind_address,
&instance->totemudp_sockets,
&instance->totem_interface->boundto);
qb_loop_poll_add (
instance->totemudp_poll_handle,
QB_LOOP_MED,
instance->totemudp_sockets.mcast_recv,
POLLIN, instance, net_deliver_fn);
qb_loop_poll_add (
instance->totemudp_poll_handle,
QB_LOOP_MED,
instance->totemudp_sockets.local_mcast_loop[0],
POLLIN, instance, net_deliver_fn);
qb_loop_poll_add (
instance->totemudp_poll_handle,
QB_LOOP_MED,
instance->totemudp_sockets.token,
POLLIN, instance, net_deliver_fn);
totemip_copy (&instance->my_id, &instance->totem_interface->boundto);
/*
* This reports changes in the interface to the user and totemsrp
*/
if (instance->netif_bind_state == BIND_STATE_REGULAR) {
if (instance->netif_state_report & NETIF_STATE_REPORT_UP) {
log_printf (instance->totemudp_log_level_notice,
"The network interface [%s] is now up.",
totemip_print (&instance->totem_interface->boundto));
instance->netif_state_report = NETIF_STATE_REPORT_DOWN;
- instance->totemudp_iface_change_fn (instance->context, &instance->my_id);
+ instance->totemudp_iface_change_fn (instance->context, &instance->my_id, 0);
}
/*
* Add a timer to check for interface going down in single membership
*/
if (instance->my_memb_entries == 1) {
qb_loop_timer_add (instance->totemudp_poll_handle,
QB_LOOP_MED,
instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC,
(void *)instance,
timer_function_netif_check_timeout,
&instance->timer_netif_check_timeout);
}
} else {
if (instance->netif_state_report & NETIF_STATE_REPORT_DOWN) {
log_printf (instance->totemudp_log_level_notice,
"The network interface is down.");
- instance->totemudp_iface_change_fn (instance->context, &instance->my_id);
+ instance->totemudp_iface_change_fn (instance->context, &instance->my_id, 0);
}
instance->netif_state_report = NETIF_STATE_REPORT_UP;
}
}
/* Set the socket priority to INTERACTIVE to ensure
that our messages don't get queued behind anything else */
static void totemudp_traffic_control_set(struct totemudp_instance *instance, int sock)
{
#ifdef SO_PRIORITY
int prio = 6; /* TC_PRIO_INTERACTIVE */
if (setsockopt(sock, SOL_SOCKET, SO_PRIORITY, &prio, sizeof(int))) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "Could not set traffic priority");
}
#endif
}
static int totemudp_build_sockets_ip (
struct totemudp_instance *instance,
struct totem_ip_address *mcast_address,
struct totem_ip_address *bindnet_address,
struct totemudp_socket *sockets,
struct totem_ip_address *bound_to,
int interface_num)
{
struct sockaddr_storage sockaddr;
struct ipv6_mreq mreq6;
struct ip_mreq mreq;
struct sockaddr_storage mcast_ss, boundto_ss;
struct sockaddr_in6 *mcast_sin6 = (struct sockaddr_in6 *)&mcast_ss;
struct sockaddr_in *mcast_sin = (struct sockaddr_in *)&mcast_ss;
struct sockaddr_in *boundto_sin = (struct sockaddr_in *)&boundto_ss;
unsigned int sendbuf_size;
unsigned int recvbuf_size;
unsigned int optlen = sizeof (sendbuf_size);
int addrlen;
int res;
int flag;
uint8_t sflag;
int i;
/*
* Create multicast recv socket
*/
sockets->mcast_recv = socket (bindnet_address->family, SOCK_DGRAM, 0);
if (sockets->mcast_recv == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"socket() failed");
return (-1);
}
totemip_nosigpipe (sockets->mcast_recv);
res = fcntl (sockets->mcast_recv, F_SETFL, O_NONBLOCK);
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"Could not set non-blocking operation on multicast socket");
return (-1);
}
/*
* Force reuse
*/
flag = 1;
if ( setsockopt(sockets->mcast_recv, SOL_SOCKET, SO_REUSEADDR, (char *)&flag, sizeof (flag)) < 0) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"setsockopt(SO_REUSEADDR) failed");
return (-1);
}
/*
* Create local multicast loop socket
*/
if (socketpair(AF_UNIX, SOCK_DGRAM, 0, sockets->local_mcast_loop) == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"socket() failed");
return (-1);
}
for (i = 0; i < 2; i++) {
totemip_nosigpipe (sockets->local_mcast_loop[i]);
res = fcntl (sockets->local_mcast_loop[i], F_SETFL, O_NONBLOCK);
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"Could not set non-blocking operation on multicast socket");
return (-1);
}
}
/*
* Setup mcast send socket
*/
sockets->mcast_send = socket (bindnet_address->family, SOCK_DGRAM, 0);
if (sockets->mcast_send == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"socket() failed");
return (-1);
}
totemip_nosigpipe (sockets->mcast_send);
res = fcntl (sockets->mcast_send, F_SETFL, O_NONBLOCK);
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"Could not set non-blocking operation on multicast socket");
return (-1);
}
/*
* Force reuse
*/
flag = 1;
if ( setsockopt(sockets->mcast_send, SOL_SOCKET, SO_REUSEADDR, (char *)&flag, sizeof (flag)) < 0) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"setsockopt(SO_REUSEADDR) failed");
return (-1);
}
totemip_totemip_to_sockaddr_convert(bound_to, instance->totem_interface->ip_port - 1,
&sockaddr, &addrlen);
res = bind (sockets->mcast_send, (struct sockaddr *)&sockaddr, addrlen);
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"Unable to bind the socket to send multicast packets");
return (-1);
}
/*
* Setup unicast socket
*/
sockets->token = socket (bindnet_address->family, SOCK_DGRAM, 0);
if (sockets->token == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"socket() failed");
return (-1);
}
totemip_nosigpipe (sockets->token);
res = fcntl (sockets->token, F_SETFL, O_NONBLOCK);
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"Could not set non-blocking operation on token socket");
return (-1);
}
/*
* Force reuse
*/
flag = 1;
if ( setsockopt(sockets->token, SOL_SOCKET, SO_REUSEADDR, (char *)&flag, sizeof (flag)) < 0) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"setsockopt(SO_REUSEADDR) failed");
return (-1);
}
/*
* Bind to unicast socket used for token send/receives
* This has the side effect of binding to the correct interface
*/
totemip_totemip_to_sockaddr_convert(bound_to, instance->totem_interface->ip_port, &sockaddr, &addrlen);
res = bind (sockets->token, (struct sockaddr *)&sockaddr, addrlen);
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"Unable to bind UDP unicast socket");
return (-1);
}
recvbuf_size = MCAST_SOCKET_BUFFER_SIZE;
sendbuf_size = MCAST_SOCKET_BUFFER_SIZE;
/*
* Set buffer sizes to avoid overruns
*/
res = setsockopt (sockets->mcast_recv, SOL_SOCKET, SO_RCVBUF, &recvbuf_size, optlen);
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_debug,
"Unable to set SO_RCVBUF size on UDP mcast socket");
return (-1);
}
res = setsockopt (sockets->mcast_send, SOL_SOCKET, SO_SNDBUF, &sendbuf_size, optlen);
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_debug,
"Unable to set SO_SNDBUF size on UDP mcast socket");
return (-1);
}
res = setsockopt (sockets->local_mcast_loop[0], SOL_SOCKET, SO_RCVBUF, &recvbuf_size, optlen);
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_debug,
"Unable to set SO_RCVBUF size on UDP local mcast loop socket");
return (-1);
}
res = setsockopt (sockets->local_mcast_loop[1], SOL_SOCKET, SO_SNDBUF, &sendbuf_size, optlen);
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_debug,
"Unable to set SO_SNDBUF size on UDP local mcast loop socket");
return (-1);
}
res = getsockopt (sockets->mcast_recv, SOL_SOCKET, SO_RCVBUF, &recvbuf_size, &optlen);
if (res == 0) {
log_printf (instance->totemudp_log_level_debug,
"Receive multicast socket recv buffer size (%d bytes).", recvbuf_size);
}
res = getsockopt (sockets->mcast_send, SOL_SOCKET, SO_SNDBUF, &sendbuf_size, &optlen);
if (res == 0) {
log_printf (instance->totemudp_log_level_debug,
"Transmit multicast socket send buffer size (%d bytes).", sendbuf_size);
}
res = getsockopt (sockets->local_mcast_loop[0], SOL_SOCKET, SO_RCVBUF, &recvbuf_size, &optlen);
if (res == 0) {
log_printf (instance->totemudp_log_level_debug,
"Local receive multicast loop socket recv buffer size (%d bytes).", recvbuf_size);
}
res = getsockopt (sockets->local_mcast_loop[1], SOL_SOCKET, SO_SNDBUF, &sendbuf_size, &optlen);
if (res == 0) {
log_printf (instance->totemudp_log_level_debug,
"Local transmit multicast loop socket send buffer size (%d bytes).", sendbuf_size);
}
/*
* Join group membership on socket
*/
totemip_totemip_to_sockaddr_convert(mcast_address, instance->totem_interface->ip_port, &mcast_ss, &addrlen);
totemip_totemip_to_sockaddr_convert(bound_to, instance->totem_interface->ip_port, &boundto_ss, &addrlen);
if (instance->totem_config->broadcast_use == 1) {
unsigned int broadcast = 1;
if ((setsockopt(sockets->mcast_recv, SOL_SOCKET,
SO_BROADCAST, &broadcast, sizeof (broadcast))) == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"setting broadcast option failed");
return (-1);
}
if ((setsockopt(sockets->mcast_send, SOL_SOCKET,
SO_BROADCAST, &broadcast, sizeof (broadcast))) == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"setting broadcast option failed");
return (-1);
}
} else {
switch (bindnet_address->family) {
case AF_INET:
memset(&mreq, 0, sizeof(mreq));
mreq.imr_multiaddr.s_addr = mcast_sin->sin_addr.s_addr;
mreq.imr_interface.s_addr = boundto_sin->sin_addr.s_addr;
res = setsockopt (sockets->mcast_recv, IPPROTO_IP, IP_ADD_MEMBERSHIP,
&mreq, sizeof (mreq));
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"join ipv4 multicast group failed");
return (-1);
}
break;
case AF_INET6:
memset(&mreq6, 0, sizeof(mreq6));
memcpy(&mreq6.ipv6mr_multiaddr, &mcast_sin6->sin6_addr, sizeof(struct in6_addr));
mreq6.ipv6mr_interface = interface_num;
res = setsockopt (sockets->mcast_recv, IPPROTO_IPV6, IPV6_JOIN_GROUP,
&mreq6, sizeof (mreq6));
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"join ipv6 multicast group failed");
return (-1);
}
break;
}
}
/*
* Turn off multicast loopback
*/
flag = 0;
switch ( bindnet_address->family ) {
case AF_INET:
sflag = 0;
res = setsockopt (sockets->mcast_send, IPPROTO_IP, IP_MULTICAST_LOOP,
&sflag, sizeof (sflag));
break;
case AF_INET6:
res = setsockopt (sockets->mcast_send, IPPROTO_IPV6, IPV6_MULTICAST_LOOP,
&flag, sizeof (flag));
}
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"Unable to turn off multicast loopback");
return (-1);
}
/*
* Set multicast packets TTL
*/
flag = instance->totem_interface->ttl;
if (bindnet_address->family == AF_INET6) {
res = setsockopt (sockets->mcast_send, IPPROTO_IPV6, IPV6_MULTICAST_HOPS,
&flag, sizeof (flag));
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"set mcast v6 TTL failed");
return (-1);
}
} else {
sflag = flag;
res = setsockopt(sockets->mcast_send, IPPROTO_IP, IP_MULTICAST_TTL,
&sflag, sizeof(sflag));
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"set mcast v4 TTL failed");
return (-1);
}
}
/*
* Bind to a specific interface for multicast send and receive
*/
switch ( bindnet_address->family ) {
case AF_INET:
if (setsockopt (sockets->mcast_send, IPPROTO_IP, IP_MULTICAST_IF,
&boundto_sin->sin_addr, sizeof (boundto_sin->sin_addr)) < 0) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"cannot select interface for multicast packets (send)");
return (-1);
}
if (setsockopt (sockets->mcast_recv, IPPROTO_IP, IP_MULTICAST_IF,
&boundto_sin->sin_addr, sizeof (boundto_sin->sin_addr)) < 0) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"cannot select interface for multicast packets (recv)");
return (-1);
}
break;
case AF_INET6:
if (setsockopt (sockets->mcast_send, IPPROTO_IPV6, IPV6_MULTICAST_IF,
&interface_num, sizeof (interface_num)) < 0) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"cannot select interface for multicast packets (send v6)");
return (-1);
}
if (setsockopt (sockets->mcast_recv, IPPROTO_IPV6, IPV6_MULTICAST_IF,
&interface_num, sizeof (interface_num)) < 0) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"cannot select interface for multicast packets (recv v6)");
return (-1);
}
break;
}
/*
* Bind to multicast socket used for multicast receives
* This needs to happen after all of the multicast setsockopt() calls
* as the kernel seems to only put them into effect (for IPV6) when bind()
* is called.
*/
totemip_totemip_to_sockaddr_convert(mcast_address,
instance->totem_interface->ip_port, &sockaddr, &addrlen);
res = bind (sockets->mcast_recv, (struct sockaddr *)&sockaddr, addrlen);
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"Unable to bind the socket to receive multicast packets");
return (-1);
}
return 0;
}
static int totemudp_build_sockets (
struct totemudp_instance *instance,
struct totem_ip_address *mcast_address,
struct totem_ip_address *bindnet_address,
struct totemudp_socket *sockets,
struct totem_ip_address *bound_to)
{
int interface_num;
int interface_up;
int res;
/*
* Determine the ip address bound to and the interface name
*/
res = netif_determine (instance,
bindnet_address,
bound_to,
&interface_up,
&interface_num);
if (res == -1) {
return (-1);
}
totemip_copy(&instance->my_id, bound_to);
res = totemudp_build_sockets_ip (instance, mcast_address,
bindnet_address, sockets, bound_to, interface_num);
/* We only send out of the token socket */
totemudp_traffic_control_set(instance, sockets->token);
return res;
}
/*
- * Totem Network interface - also does encryption/decryption
+ * Totem Network interface
* depends on poll abstraction, POSIX, IPV4
*/
/*
* Create an instance
*/
int totemudp_initialize (
qb_loop_t *poll_handle,
void **udp_context,
struct totem_config *totem_config,
totemsrp_stats_t *stats,
- int interface_no,
+
void *context,
void (*deliver_fn) (
void *context,
const void *msg,
unsigned int msg_len),
void (*iface_change_fn) (
void *context,
- const struct totem_ip_address *iface_address),
+ const struct totem_ip_address *iface_address,
+ unsigned int ring_no),
+
+ void (*mtu_changed) (
+ void *context,
+ int net_mtu),
void (*target_set_completed) (
void *context))
{
struct totemudp_instance *instance;
instance = malloc (sizeof (struct totemudp_instance));
if (instance == NULL) {
return (-1);
}
totemudp_instance_initialize (instance);
instance->totem_config = totem_config;
instance->stats = stats;
/*
* Configure logging
*/
instance->totemudp_log_level_security = 1; //totem_config->totem_logging_configuration.log_level_security;
instance->totemudp_log_level_error = totem_config->totem_logging_configuration.log_level_error;
instance->totemudp_log_level_warning = totem_config->totem_logging_configuration.log_level_warning;
instance->totemudp_log_level_notice = totem_config->totem_logging_configuration.log_level_notice;
instance->totemudp_log_level_debug = totem_config->totem_logging_configuration.log_level_debug;
instance->totemudp_subsys_id = totem_config->totem_logging_configuration.log_subsys_id;
instance->totemudp_log_printf = totem_config->totem_logging_configuration.log_printf;
- /*
- * Initialize random number generator for later use to generate salt
- */
- instance->crypto_inst = crypto_init (totem_config->private_key,
- totem_config->private_key_len,
- totem_config->crypto_cipher_type,
- totem_config->crypto_hash_type,
- instance->totemudp_log_printf,
- instance->totemudp_log_level_security,
- instance->totemudp_log_level_notice,
- instance->totemudp_log_level_error,
- instance->totemudp_subsys_id);
- if (instance->crypto_inst == NULL) {
- free(instance);
- return (-1);
- }
/*
* Initialize local variables for totemudp
*/
- instance->totem_interface = &totem_config->interfaces[interface_no];
+ instance->totem_interface = &totem_config->interfaces[0];
totemip_copy (&instance->mcast_address, &instance->totem_interface->mcast_addr);
memset (instance->iov_buffer, 0, FRAME_SIZE_MAX);
instance->totemudp_poll_handle = poll_handle;
instance->totem_interface->bindnet.nodeid = instance->totem_config->node_id;
instance->context = context;
instance->totemudp_deliver_fn = deliver_fn;
instance->totemudp_iface_change_fn = iface_change_fn;
instance->totemudp_target_set_completed = target_set_completed;
totemip_localhost (instance->mcast_address.family, &localhost);
localhost.nodeid = instance->totem_config->node_id;
/*
* RRP layer isn't ready to receive message because it hasn't
* initialized yet. Add short timer to check the interfaces.
*/
qb_loop_timer_add (instance->totemudp_poll_handle,
QB_LOOP_MED,
100*QB_TIME_NS_IN_MSEC,
(void *)instance,
timer_function_netif_check_timeout,
&instance->timer_netif_check_timeout);
*udp_context = instance;
return (0);
}
void *totemudp_buffer_alloc (void)
{
return malloc (FRAME_SIZE_MAX);
}
void totemudp_buffer_release (void *ptr)
{
return free (ptr);
}
int totemudp_processor_count_set (
void *udp_context,
int processor_count)
{
struct totemudp_instance *instance = (struct totemudp_instance *)udp_context;
int res = 0;
instance->my_memb_entries = processor_count;
qb_loop_timer_del (instance->totemudp_poll_handle,
instance->timer_netif_check_timeout);
if (processor_count == 1) {
qb_loop_timer_add (instance->totemudp_poll_handle,
QB_LOOP_MED,
instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC,
(void *)instance,
timer_function_netif_check_timeout,
&instance->timer_netif_check_timeout);
}
return (res);
}
int totemudp_recv_flush (void *udp_context)
{
struct totemudp_instance *instance = (struct totemudp_instance *)udp_context;
struct pollfd ufd;
int nfds;
int res = 0;
int i;
int sock;
instance->flushing = 1;
for (i = 0; i < 2; i++) {
sock = -1;
if (i == 0) {
sock = instance->totemudp_sockets.mcast_recv;
}
if (i == 1) {
sock = instance->totemudp_sockets.local_mcast_loop[0];
}
assert(sock != -1);
do {
ufd.fd = sock;
ufd.events = POLLIN;
nfds = poll (&ufd, 1, 0);
if (nfds == 1 && ufd.revents & POLLIN) {
net_deliver_fn (sock, ufd.revents, instance);
}
} while (nfds == 1);
}
instance->flushing = 0;
return (res);
}
int totemudp_send_flush (void *udp_context)
{
return 0;
}
int totemudp_token_send (
void *udp_context,
const void *msg,
unsigned int msg_len)
{
struct totemudp_instance *instance = (struct totemudp_instance *)udp_context;
int res = 0;
ucast_sendmsg (instance, &instance->token_target, msg, msg_len);
return (res);
}
int totemudp_mcast_flush_send (
void *udp_context,
const void *msg,
unsigned int msg_len)
{
struct totemudp_instance *instance = (struct totemudp_instance *)udp_context;
int res = 0;
mcast_sendmsg (instance, msg, msg_len);
return (res);
}
int totemudp_mcast_noflush_send (
void *udp_context,
const void *msg,
unsigned int msg_len)
{
struct totemudp_instance *instance = (struct totemudp_instance *)udp_context;
int res = 0;
mcast_sendmsg (instance, msg, msg_len);
return (res);
}
extern int totemudp_iface_check (void *udp_context)
{
struct totemudp_instance *instance = (struct totemudp_instance *)udp_context;
int res = 0;
timer_function_netif_check_timeout (instance);
return (res);
}
-extern void totemudp_net_mtu_adjust (void *udp_context, struct totem_config *totem_config)
+int totemudp_ifaces_get (
+ void *net_context,
+ char ***status,
+ unsigned int *iface_count)
{
+ static char *statuses[INTERFACE_MAX] = {(char*)"OK"};
- assert(totem_config->interface_count > 0);
-
- totem_config->net_mtu -= crypto_sec_header_size(totem_config->crypto_cipher_type,
- totem_config->crypto_hash_type) +
- totemip_udpip_header_size(totem_config->interfaces[0].bindnet.family);
-}
-
-const char *totemudp_iface_print (void *udp_context) {
- struct totemudp_instance *instance = (struct totemudp_instance *)udp_context;
- const char *ret_char;
-
- ret_char = totemip_print (&instance->my_id);
+ if (status) {
+ *status = statuses;
+ }
+ *iface_count = 1;
- return (ret_char);
+ return (0);
}
-int totemudp_iface_get (
- void *udp_context,
- struct totem_ip_address *addr)
+extern void totemudp_net_mtu_adjust (void *udp_context, struct totem_config *totem_config)
{
- struct totemudp_instance *instance = (struct totemudp_instance *)udp_context;
- int res = 0;
- memcpy (addr, &instance->my_id, sizeof (struct totem_ip_address));
+ assert(totem_config->interface_count > 0);
- return (res);
+ totem_config->net_mtu -= totemip_udpip_header_size(totem_config->interfaces[0].bindnet.family);
}
int totemudp_token_target_set (
void *udp_context,
const struct totem_ip_address *token_target)
{
struct totemudp_instance *instance = (struct totemudp_instance *)udp_context;
int res = 0;
memcpy (&instance->token_target, token_target,
sizeof (struct totem_ip_address));
instance->totemudp_target_set_completed (instance->context);
return (res);
}
extern int totemudp_recv_mcast_empty (
void *udp_context)
{
struct totemudp_instance *instance = (struct totemudp_instance *)udp_context;
unsigned int res;
struct sockaddr_storage system_from;
struct msghdr msg_recv;
struct pollfd ufd;
int nfds;
int msg_processed = 0;
int i;
int sock;
/*
* Receive datagram
*/
msg_recv.msg_name = &system_from;
msg_recv.msg_namelen = sizeof (struct sockaddr_storage);
msg_recv.msg_iov = &instance->totemudp_iov_recv_flush;
msg_recv.msg_iovlen = 1;
#ifdef HAVE_MSGHDR_CONTROL
msg_recv.msg_control = 0;
#endif
#ifdef HAVE_MSGHDR_CONTROLLEN
msg_recv.msg_controllen = 0;
#endif
#ifdef HAVE_MSGHDR_FLAGS
msg_recv.msg_flags = 0;
#endif
#ifdef HAVE_MSGHDR_ACCRIGHTS
msg_recv.msg_accrights = NULL;
#endif
#ifdef HAVE_MSGHDR_ACCRIGHTSLEN
msg_recv.msg_accrightslen = 0;
#endif
for (i = 0; i < 2; i++) {
sock = -1;
if (i == 0) {
sock = instance->totemudp_sockets.mcast_recv;
}
if (i == 1) {
sock = instance->totemudp_sockets.local_mcast_loop[0];
}
assert(sock != -1);
do {
ufd.fd = sock;
ufd.events = POLLIN;
nfds = poll (&ufd, 1, 0);
if (nfds == 1 && ufd.revents & POLLIN) {
res = recvmsg (sock, &msg_recv, MSG_NOSIGNAL | MSG_DONTWAIT);
if (res != -1) {
msg_processed = 1;
} else {
msg_processed = -1;
}
}
} while (nfds == 1);
}
return (msg_processed);
}
diff --git a/exec/totemudp.h b/exec/totemudp.h
index 697307a9..67b70da7 100644
--- a/exec/totemudp.h
+++ b/exec/totemudp.h
@@ -1,118 +1,120 @@
/*
* Copyright (c) 2005 MontaVista Software, Inc.
* Copyright (c) 2006-2011 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Steven Dake (sdake@redhat.com)
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef TOTEMUDP_H_DEFINED
#define TOTEMUDP_H_DEFINED
#include <sys/types.h>
#include <sys/socket.h>
#include <qb/qbloop.h>
#include <corosync/totem/totem.h>
/**
* Create an instance
*/
extern int totemudp_initialize (
qb_loop_t* poll_handle,
void **udp_context,
struct totem_config *totem_config,
totemsrp_stats_t *stats,
- int interface_no,
void *context,
void (*deliver_fn) (
void *context,
const void *msg,
unsigned int msg_len),
void (*iface_change_fn) (
void *context,
- const struct totem_ip_address *iface_address),
+ const struct totem_ip_address *iface_address,
+ unsigned int ring_no),
+
+ void (*mtu_changed) (
+ void *context,
+ int net_mtu),
void (*target_set_completed) (
void *context));
extern void *totemudp_buffer_alloc (void);
extern void totemudp_buffer_release (void *ptr);
extern int totemudp_processor_count_set (
void *udp_context,
int processor_count);
extern int totemudp_token_send (
void *udp_context,
const void *msg,
unsigned int msg_len);
extern int totemudp_mcast_flush_send (
void *udp_context,
const void *msg,
unsigned int msg_len);
extern int totemudp_mcast_noflush_send (
void *udp_context,
const void *msg,
unsigned int msg_len);
+extern int totemudp_ifaces_get (void *net_context,
+ char ***status,
+ unsigned int *iface_count);
+
extern int totemudp_recv_flush (void *udp_context);
extern int totemudp_send_flush (void *udp_context);
extern int totemudp_iface_check (void *udp_context);
extern int totemudp_finalize (void *udp_context);
extern void totemudp_net_mtu_adjust (void *udp_context, struct totem_config *totem_config);
-extern const char *totemudp_iface_print (void *udp_context);
-
-extern int totemudp_iface_get (
- void *udp_context,
- struct totem_ip_address *addr);
-
extern int totemudp_token_target_set (
void *udp_context,
const struct totem_ip_address *token_target);
extern int totemudp_crypto_set (
void *udp_context,
const char *cipher_type,
const char *hash_type);
extern int totemudp_recv_mcast_empty (
void *udp_context);
#endif /* TOTEMUDP_H_DEFINED */
diff --git a/exec/totemudpu.c b/exec/totemudpu.c
index 037f82b4..8c0c074c 100644
--- a/exec/totemudpu.c
+++ b/exec/totemudpu.c
@@ -1,1286 +1,1187 @@
/*
* Copyright (c) 2005 MontaVista Software, Inc.
* Copyright (c) 2006-2012 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Steven Dake (sdake@redhat.com)
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <config.h>
#include <assert.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/socket.h>
#include <netdb.h>
#include <sys/un.h>
#include <sys/ioctl.h>
#include <sys/param.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <unistd.h>
#include <fcntl.h>
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
#include <sched.h>
#include <time.h>
#include <sys/time.h>
#include <sys/poll.h>
#include <sys/uio.h>
#include <limits.h>
#include <qb/qbdefs.h>
#include <qb/qbloop.h>
#include <corosync/sq.h>
#include <corosync/list.h>
#include <corosync/swab.h>
#define LOGSYS_UTILS_ONLY 1
#include <corosync/logsys.h>
#include "totemudpu.h"
#include "util.h"
-#include "totemcrypto.h"
#include <nss.h>
#include <pk11pub.h>
#include <pkcs11.h>
#include <prerror.h>
#ifndef MSG_NOSIGNAL
#define MSG_NOSIGNAL 0
#endif
#define MCAST_SOCKET_BUFFER_SIZE (TRANSMITS_ALLOWED * FRAME_SIZE_MAX)
#define NETIF_STATE_REPORT_UP 1
#define NETIF_STATE_REPORT_DOWN 2
#define BIND_STATE_UNBOUND 0
#define BIND_STATE_REGULAR 1
#define BIND_STATE_LOOPBACK 2
struct totemudpu_member {
struct list_head list;
struct totem_ip_address member;
int fd;
int active;
};
struct totemudpu_instance {
- struct crypto_instance *crypto_inst;
-
qb_loop_t *totemudpu_poll_handle;
struct totem_interface *totem_interface;
int netif_state_report;
int netif_bind_state;
void *context;
void (*totemudpu_deliver_fn) (
void *context,
const void *msg,
unsigned int msg_len);
void (*totemudpu_iface_change_fn) (
void *context,
- const struct totem_ip_address *iface_address);
+ const struct totem_ip_address *iface_address,
+ unsigned int ring_no);
void (*totemudpu_target_set_completed) (void *context);
/*
* Function and data used to log messages
*/
int totemudpu_log_level_security;
int totemudpu_log_level_error;
int totemudpu_log_level_warning;
int totemudpu_log_level_notice;
int totemudpu_log_level_debug;
int totemudpu_subsys_id;
void (*totemudpu_log_printf) (
int level,
int subsys,
const char *function,
const char *file,
int line,
const char *format,
...)__attribute__((format(printf, 6, 7)));
void *udpu_context;
char iov_buffer[FRAME_SIZE_MAX];
struct iovec totemudpu_iov_recv;
struct list_head member_list;
int stats_sent;
int stats_recv;
int stats_delv;
int stats_remcasts;
int stats_orf_token;
struct timeval stats_tv_start;
struct totem_ip_address my_id;
int firstrun;
qb_loop_timer_handle timer_netif_check_timeout;
unsigned int my_memb_entries;
struct totem_config *totem_config;
totemsrp_stats_t *stats;
struct totem_ip_address token_target;
int token_socket;
qb_loop_timer_handle timer_merge_detect_timeout;
int send_merge_detect_message;
unsigned int merge_detect_messages_sent_before_timeout;
};
struct work_item {
const void *msg;
unsigned int msg_len;
struct totemudpu_instance *instance;
};
static int totemudpu_build_sockets (
struct totemudpu_instance *instance,
struct totem_ip_address *bindnet_address,
struct totem_ip_address *bound_to);
static int totemudpu_create_sending_socket(
void *udpu_context,
const struct totem_ip_address *member);
int totemudpu_member_list_rebind_ip (
void *udpu_context);
static void totemudpu_start_merge_detect_timeout(
void *udpu_context);
static void totemudpu_stop_merge_detect_timeout(
void *udpu_context);
static struct totem_ip_address localhost;
static void totemudpu_instance_initialize (struct totemudpu_instance *instance)
{
memset (instance, 0, sizeof (struct totemudpu_instance));
instance->netif_state_report = NETIF_STATE_REPORT_UP | NETIF_STATE_REPORT_DOWN;
instance->totemudpu_iov_recv.iov_base = instance->iov_buffer;
instance->totemudpu_iov_recv.iov_len = FRAME_SIZE_MAX; //sizeof (instance->iov_buffer);
/*
* There is always atleast 1 processor
*/
instance->my_memb_entries = 1;
list_init (&instance->member_list);
}
#define log_printf(level, format, args...) \
do { \
instance->totemudpu_log_printf ( \
level, instance->totemudpu_subsys_id, \
__FUNCTION__, __FILE__, __LINE__, \
(const char *)format, ##args); \
} while (0);
#define LOGSYS_PERROR(err_num, level, fmt, args...) \
do { \
char _error_str[LOGSYS_MAX_PERROR_MSG_LEN]; \
const char *_error_ptr = qb_strerror_r(err_num, _error_str, sizeof(_error_str)); \
instance->totemudpu_log_printf ( \
level, instance->totemudpu_subsys_id, \
__FUNCTION__, __FILE__, __LINE__, \
fmt ": %s (%d)", ##args, _error_ptr, err_num); \
} while(0)
int totemudpu_crypto_set (
void *udpu_context,
const char *cipher_type,
const char *hash_type)
{
return (0);
}
static inline void ucast_sendmsg (
struct totemudpu_instance *instance,
struct totem_ip_address *system_to,
const void *msg,
unsigned int msg_len)
{
struct msghdr msg_ucast;
int res = 0;
- size_t buf_out_len;
- unsigned char buf_out[FRAME_SIZE_MAX];
struct sockaddr_storage sockaddr;
struct iovec iovec;
int addrlen;
- /*
- * Encrypt and digest the message
- */
- if (crypto_encrypt_and_sign (
- instance->crypto_inst,
- (const unsigned char *)msg,
- msg_len,
- buf_out,
- &buf_out_len) != 0) {
- log_printf(LOGSYS_LEVEL_CRIT, "Error encrypting/signing packet (non-critical)");
- return;
- }
-
- iovec.iov_base = (void *)buf_out;
- iovec.iov_len = buf_out_len;
+ iovec.iov_base = (void *)msg;
+ iovec.iov_len = msg_len;
/*
* Build unicast message
*/
totemip_totemip_to_sockaddr_convert(system_to,
instance->totem_interface->ip_port, &sockaddr, &addrlen);
memset(&msg_ucast, 0, sizeof(msg_ucast));
msg_ucast.msg_name = &sockaddr;
msg_ucast.msg_namelen = addrlen;
msg_ucast.msg_iov = (void *)&iovec;
msg_ucast.msg_iovlen = 1;
#ifdef HAVE_MSGHDR_CONTROL
msg_ucast.msg_control = 0;
#endif
#ifdef HAVE_MSGHDR_CONTROLLEN
msg_ucast.msg_controllen = 0;
#endif
#ifdef HAVE_MSGHDR_FLAGS
msg_ucast.msg_flags = 0;
#endif
#ifdef HAVE_MSGHDR_ACCRIGHTS
msg_ucast.msg_accrights = NULL;
#endif
#ifdef HAVE_MSGHDR_ACCRIGHTSLEN
msg_ucast.msg_accrightslen = 0;
#endif
/*
* Transmit unicast message
* An error here is recovered by totemsrp
*/
res = sendmsg (instance->token_socket, &msg_ucast, MSG_NOSIGNAL);
if (res < 0) {
LOGSYS_PERROR (errno, instance->totemudpu_log_level_debug,
"sendmsg(ucast) failed (non-critical)");
}
}
static inline void mcast_sendmsg (
struct totemudpu_instance *instance,
const void *msg,
unsigned int msg_len,
int only_active)
{
struct msghdr msg_mcast;
int res = 0;
- size_t buf_out_len;
- unsigned char buf_out[FRAME_SIZE_MAX];
struct iovec iovec;
struct sockaddr_storage sockaddr;
int addrlen;
struct list_head *list;
struct totemudpu_member *member;
- /*
- * Encrypt and digest the message
- */
- if (crypto_encrypt_and_sign (
- instance->crypto_inst,
- (const unsigned char *)msg,
- msg_len,
- buf_out,
- &buf_out_len) != 0) {
- log_printf(LOGSYS_LEVEL_CRIT, "Error encrypting/signing packet (non-critical)");
- return;
- }
-
- iovec.iov_base = (void *)buf_out;
- iovec.iov_len = buf_out_len;
+ iovec.iov_base = (void *)msg;
+ iovec.iov_len = msg_len;
memset(&msg_mcast, 0, sizeof(msg_mcast));
/*
* Build multicast message
*/
for (list = instance->member_list.next;
list != &instance->member_list;
list = list->next) {
member = list_entry (list,
struct totemudpu_member,
list);
/*
* Do not send multicast message if message is not "flush", member
* is inactive and timeout for sending merge message didn't expired.
*/
if (only_active && !member->active && !instance->send_merge_detect_message)
continue ;
totemip_totemip_to_sockaddr_convert(&member->member,
instance->totem_interface->ip_port, &sockaddr, &addrlen);
msg_mcast.msg_name = &sockaddr;
msg_mcast.msg_namelen = addrlen;
msg_mcast.msg_iov = (void *)&iovec;
msg_mcast.msg_iovlen = 1;
#ifdef HAVE_MSGHDR_CONTROL
msg_mcast.msg_control = 0;
#endif
#ifdef HAVE_MSGHDR_CONTROLLEN
msg_mcast.msg_controllen = 0;
#endif
#ifdef HAVE_MSGHDR_FLAGS
msg_mcast.msg_flags = 0;
#endif
#ifdef HAVE_MSGHDR_ACCRIGHTS
msg_mcast.msg_accrights = NULL;
#endif
#ifdef HAVE_MSGHDR_ACCRIGHTSLEN
msg_mcast.msg_accrightslen = 0;
#endif
/*
* Transmit multicast message
* An error here is recovered by totemsrp
*/
res = sendmsg (member->fd, &msg_mcast, MSG_NOSIGNAL);
if (res < 0) {
LOGSYS_PERROR (errno, instance->totemudpu_log_level_debug,
"sendmsg(mcast) failed (non-critical)");
}
}
if (!only_active || instance->send_merge_detect_message) {
/*
* Current message was sent to all nodes
*/
instance->merge_detect_messages_sent_before_timeout++;
instance->send_merge_detect_message = 0;
}
}
int totemudpu_finalize (
void *udpu_context)
{
struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context;
int res = 0;
if (instance->token_socket > 0) {
qb_loop_poll_del (instance->totemudpu_poll_handle,
instance->token_socket);
close (instance->token_socket);
}
totemudpu_stop_merge_detect_timeout(instance);
return (res);
}
static int net_deliver_fn (
int fd,
int revents,
void *data)
{
struct totemudpu_instance *instance = (struct totemudpu_instance *)data;
struct msghdr msg_recv;
struct iovec *iovec;
struct sockaddr_storage system_from;
int bytes_received;
- int res = 0;
iovec = &instance->totemudpu_iov_recv;
/*
* Receive datagram
*/
msg_recv.msg_name = &system_from;
msg_recv.msg_namelen = sizeof (struct sockaddr_storage);
msg_recv.msg_iov = iovec;
msg_recv.msg_iovlen = 1;
#ifdef HAVE_MSGHDR_CONTROL
msg_recv.msg_control = 0;
#endif
#ifdef HAVE_MSGHDR_CONTROLLEN
msg_recv.msg_controllen = 0;
#endif
#ifdef HAVE_MSGHDR_FLAGS
msg_recv.msg_flags = 0;
#endif
#ifdef HAVE_MSGHDR_ACCRIGHTS
msg_recv.msg_accrights = NULL;
#endif
#ifdef HAVE_MSGHDR_ACCRIGHTSLEN
msg_recv.msg_accrightslen = 0;
#endif
bytes_received = recvmsg (fd, &msg_recv, MSG_NOSIGNAL | MSG_DONTWAIT);
if (bytes_received == -1) {
return (0);
} else {
instance->stats_recv += bytes_received;
}
- /*
- * Authenticate and if authenticated, decrypt datagram
- */
-
- res = crypto_authenticate_and_decrypt (instance->crypto_inst, iovec->iov_base, &bytes_received);
- if (res == -1) {
- log_printf (instance->totemudpu_log_level_security, "Received message has invalid digest... ignoring.");
- log_printf (instance->totemudpu_log_level_security,
- "Invalid packet data");
- iovec->iov_len = FRAME_SIZE_MAX;
- return 0;
- }
iovec->iov_len = bytes_received;
/*
* Handle incoming message
*/
instance->totemudpu_deliver_fn (
instance->context,
iovec->iov_base,
iovec->iov_len);
iovec->iov_len = FRAME_SIZE_MAX;
return (0);
}
static int netif_determine (
struct totemudpu_instance *instance,
struct totem_ip_address *bindnet,
struct totem_ip_address *bound_to,
int *interface_up,
int *interface_num)
{
int res;
res = totemip_iface_check (bindnet, bound_to,
interface_up, interface_num,
instance->totem_config->clear_node_high_bit);
return (res);
}
/*
* If the interface is up, the sockets for totem are built. If the interface is down
* this function is requeued in the timer list to retry building the sockets later.
*/
static void timer_function_netif_check_timeout (
void *data)
{
struct totemudpu_instance *instance = (struct totemudpu_instance *)data;
int interface_up;
int interface_num;
struct totem_ip_address *bind_address;
/*
* Build sockets for every interface
*/
netif_determine (instance,
&instance->totem_interface->bindnet,
&instance->totem_interface->boundto,
&interface_up, &interface_num);
/*
* If the network interface isn't back up and we are already
* in loopback mode, add timer to check again and return
*/
if ((instance->netif_bind_state == BIND_STATE_LOOPBACK &&
interface_up == 0) ||
(instance->my_memb_entries == 1 &&
instance->netif_bind_state == BIND_STATE_REGULAR &&
interface_up == 1)) {
qb_loop_timer_add (instance->totemudpu_poll_handle,
QB_LOOP_MED,
instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC,
(void *)instance,
timer_function_netif_check_timeout,
&instance->timer_netif_check_timeout);
/*
* Add a timer to check for a downed regular interface
*/
return;
}
if (instance->token_socket > 0) {
qb_loop_poll_del (instance->totemudpu_poll_handle,
instance->token_socket);
close (instance->token_socket);
}
if (interface_up == 0) {
/*
* Interface is not up
*/
instance->netif_bind_state = BIND_STATE_LOOPBACK;
bind_address = &localhost;
/*
* Add a timer to retry building interfaces and request memb_gather_enter
*/
qb_loop_timer_add (instance->totemudpu_poll_handle,
QB_LOOP_MED,
instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC,
(void *)instance,
timer_function_netif_check_timeout,
&instance->timer_netif_check_timeout);
} else {
/*
* Interface is up
*/
instance->netif_bind_state = BIND_STATE_REGULAR;
bind_address = &instance->totem_interface->bindnet;
}
/*
* Create and bind the multicast and unicast sockets
*/
totemudpu_build_sockets (instance,
bind_address,
&instance->totem_interface->boundto);
qb_loop_poll_add (instance->totemudpu_poll_handle,
QB_LOOP_MED,
instance->token_socket,
POLLIN, instance, net_deliver_fn);
totemip_copy (&instance->my_id, &instance->totem_interface->boundto);
/*
* This reports changes in the interface to the user and totemsrp
*/
if (instance->netif_bind_state == BIND_STATE_REGULAR) {
if (instance->netif_state_report & NETIF_STATE_REPORT_UP) {
log_printf (instance->totemudpu_log_level_notice,
"The network interface [%s] is now up.",
totemip_print (&instance->totem_interface->boundto));
instance->netif_state_report = NETIF_STATE_REPORT_DOWN;
- instance->totemudpu_iface_change_fn (instance->context, &instance->my_id);
+ instance->totemudpu_iface_change_fn (instance->context, &instance->my_id, 0);
}
/*
* Add a timer to check for interface going down in single membership
*/
if (instance->my_memb_entries == 1) {
qb_loop_timer_add (instance->totemudpu_poll_handle,
QB_LOOP_MED,
instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC,
(void *)instance,
timer_function_netif_check_timeout,
&instance->timer_netif_check_timeout);
}
} else {
if (instance->netif_state_report & NETIF_STATE_REPORT_DOWN) {
log_printf (instance->totemudpu_log_level_notice,
"The network interface is down.");
- instance->totemudpu_iface_change_fn (instance->context, &instance->my_id);
+ instance->totemudpu_iface_change_fn (instance->context, &instance->my_id, 0);
}
instance->netif_state_report = NETIF_STATE_REPORT_UP;
}
}
/* Set the socket priority to INTERACTIVE to ensure
that our messages don't get queued behind anything else */
static void totemudpu_traffic_control_set(struct totemudpu_instance *instance, int sock)
{
#ifdef SO_PRIORITY
int prio = 6; /* TC_PRIO_INTERACTIVE */
if (setsockopt(sock, SOL_SOCKET, SO_PRIORITY, &prio, sizeof(int))) {
LOGSYS_PERROR (errno, instance->totemudpu_log_level_warning,
"Could not set traffic priority");
}
#endif
}
static int totemudpu_build_sockets_ip (
struct totemudpu_instance *instance,
struct totem_ip_address *bindnet_address,
struct totem_ip_address *bound_to,
int interface_num)
{
struct sockaddr_storage sockaddr;
int addrlen;
int res;
unsigned int recvbuf_size;
unsigned int optlen = sizeof (recvbuf_size);
/*
* Setup unicast socket
*/
instance->token_socket = socket (bindnet_address->family, SOCK_DGRAM, 0);
if (instance->token_socket == -1) {
LOGSYS_PERROR (errno, instance->totemudpu_log_level_warning,
"socket() failed");
return (-1);
}
totemip_nosigpipe (instance->token_socket);
res = fcntl (instance->token_socket, F_SETFL, O_NONBLOCK);
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudpu_log_level_warning,
"Could not set non-blocking operation on token socket");
return (-1);
}
/*
* Bind to unicast socket used for token send/receives
* This has the side effect of binding to the correct interface
*/
totemip_totemip_to_sockaddr_convert(bound_to, instance->totem_interface->ip_port, &sockaddr, &addrlen);
res = bind (instance->token_socket, (struct sockaddr *)&sockaddr, addrlen);
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudpu_log_level_warning,
"bind token socket failed");
return (-1);
}
/*
* the token_socket can receive many messages. Allow a large number
* of receive messages on this socket
*/
recvbuf_size = MCAST_SOCKET_BUFFER_SIZE;
res = setsockopt (instance->token_socket, SOL_SOCKET, SO_RCVBUF,
&recvbuf_size, optlen);
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudpu_log_level_notice,
"Could not set recvbuf size");
}
return 0;
}
+int totemudpu_ifaces_get (
+ void *net_context,
+ char ***status,
+ unsigned int *iface_count)
+{
+ static char *statuses[INTERFACE_MAX] = {(char*)"OK"};
+
+ if (status) {
+ *status = statuses;
+ }
+ *iface_count = 1;
+
+ return (0);
+}
+
+
static int totemudpu_build_sockets (
struct totemudpu_instance *instance,
struct totem_ip_address *bindnet_address,
struct totem_ip_address *bound_to)
{
int interface_num;
int interface_up;
int res;
/*
* Determine the ip address bound to and the interface name
*/
res = netif_determine (instance,
bindnet_address,
bound_to,
&interface_up,
&interface_num);
if (res == -1) {
return (-1);
}
totemip_copy(&instance->my_id, bound_to);
res = totemudpu_build_sockets_ip (instance,
bindnet_address, bound_to, interface_num);
/* We only send out of the token socket */
totemudpu_traffic_control_set(instance, instance->token_socket);
/*
* Rebind all members to new ips
*/
totemudpu_member_list_rebind_ip(instance);
return res;
}
/*
- * Totem Network interface - also does encryption/decryption
+ * Totem Network interface
* depends on poll abstraction, POSIX, IPV4
*/
/*
* Create an instance
*/
int totemudpu_initialize (
qb_loop_t *poll_handle,
void **udpu_context,
struct totem_config *totem_config,
totemsrp_stats_t *stats,
- int interface_no,
void *context,
void (*deliver_fn) (
void *context,
const void *msg,
unsigned int msg_len),
void (*iface_change_fn) (
void *context,
- const struct totem_ip_address *iface_address),
+ const struct totem_ip_address *iface_address,
+ unsigned int ring_no),
+
+ void (*mtu_changed) (
+ void *context,
+ int net_mtu),
void (*target_set_completed) (
void *context))
{
struct totemudpu_instance *instance;
instance = malloc (sizeof (struct totemudpu_instance));
if (instance == NULL) {
return (-1);
}
totemudpu_instance_initialize (instance);
instance->totem_config = totem_config;
instance->stats = stats;
/*
* Configure logging
*/
instance->totemudpu_log_level_security = 1; //totem_config->totem_logging_configuration.log_level_security;
instance->totemudpu_log_level_error = totem_config->totem_logging_configuration.log_level_error;
instance->totemudpu_log_level_warning = totem_config->totem_logging_configuration.log_level_warning;
instance->totemudpu_log_level_notice = totem_config->totem_logging_configuration.log_level_notice;
instance->totemudpu_log_level_debug = totem_config->totem_logging_configuration.log_level_debug;
instance->totemudpu_subsys_id = totem_config->totem_logging_configuration.log_subsys_id;
instance->totemudpu_log_printf = totem_config->totem_logging_configuration.log_printf;
- /*
- * Initialize random number generator for later use to generate salt
- */
- instance->crypto_inst = crypto_init (totem_config->private_key,
- totem_config->private_key_len,
- totem_config->crypto_cipher_type,
- totem_config->crypto_hash_type,
- instance->totemudpu_log_printf,
- instance->totemudpu_log_level_security,
- instance->totemudpu_log_level_notice,
- instance->totemudpu_log_level_error,
- instance->totemudpu_subsys_id);
- if (instance->crypto_inst == NULL) {
- free(instance);
- return (-1);
- }
/*
* Initialize local variables for totemudpu
*/
- instance->totem_interface = &totem_config->interfaces[interface_no];
+ instance->totem_interface = &totem_config->interfaces[0];
memset (instance->iov_buffer, 0, FRAME_SIZE_MAX);
instance->totemudpu_poll_handle = poll_handle;
instance->totem_interface->bindnet.nodeid = instance->totem_config->node_id;
instance->context = context;
instance->totemudpu_deliver_fn = deliver_fn;
instance->totemudpu_iface_change_fn = iface_change_fn;
instance->totemudpu_target_set_completed = target_set_completed;
totemip_localhost (AF_INET, &localhost);
localhost.nodeid = instance->totem_config->node_id;
/*
* RRP layer isn't ready to receive message because it hasn't
* initialized yet. Add short timer to check the interfaces.
*/
qb_loop_timer_add (instance->totemudpu_poll_handle,
QB_LOOP_MED,
100*QB_TIME_NS_IN_MSEC,
(void *)instance,
timer_function_netif_check_timeout,
&instance->timer_netif_check_timeout);
totemudpu_start_merge_detect_timeout(instance);
*udpu_context = instance;
return (0);
}
void *totemudpu_buffer_alloc (void)
{
return malloc (FRAME_SIZE_MAX);
}
void totemudpu_buffer_release (void *ptr)
{
return free (ptr);
}
int totemudpu_processor_count_set (
void *udpu_context,
int processor_count)
{
struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context;
int res = 0;
instance->my_memb_entries = processor_count;
qb_loop_timer_del (instance->totemudpu_poll_handle,
instance->timer_netif_check_timeout);
if (processor_count == 1) {
qb_loop_timer_add (instance->totemudpu_poll_handle,
QB_LOOP_MED,
instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC,
(void *)instance,
timer_function_netif_check_timeout,
&instance->timer_netif_check_timeout);
}
return (res);
}
int totemudpu_recv_flush (void *udpu_context)
{
int res = 0;
return (res);
}
int totemudpu_send_flush (void *udpu_context)
{
int res = 0;
return (res);
}
int totemudpu_token_send (
void *udpu_context,
const void *msg,
unsigned int msg_len)
{
struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context;
int res = 0;
ucast_sendmsg (instance, &instance->token_target, msg, msg_len);
return (res);
}
int totemudpu_mcast_flush_send (
void *udpu_context,
const void *msg,
unsigned int msg_len)
{
struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context;
int res = 0;
mcast_sendmsg (instance, msg, msg_len, 0);
return (res);
}
int totemudpu_mcast_noflush_send (
void *udpu_context,
const void *msg,
unsigned int msg_len)
{
struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context;
int res = 0;
mcast_sendmsg (instance, msg, msg_len, 1);
return (res);
}
extern int totemudpu_iface_check (void *udpu_context)
{
struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context;
int res = 0;
timer_function_netif_check_timeout (instance);
return (res);
}
extern void totemudpu_net_mtu_adjust (void *udpu_context, struct totem_config *totem_config)
{
assert(totem_config->interface_count > 0);
- totem_config->net_mtu -= crypto_sec_header_size(totem_config->crypto_cipher_type,
- totem_config->crypto_hash_type) +
- totemip_udpip_header_size(totem_config->interfaces[0].bindnet.family);
-}
-
-const char *totemudpu_iface_print (void *udpu_context) {
- struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context;
- const char *ret_char;
-
- ret_char = totemip_print (&instance->my_id);
-
- return (ret_char);
+ totem_config->net_mtu -= totemip_udpip_header_size(totem_config->interfaces[0].bindnet.family);
}
-int totemudpu_iface_get (
- void *udpu_context,
- struct totem_ip_address *addr)
-{
- struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context;
- int res = 0;
-
- memcpy (addr, &instance->my_id, sizeof (struct totem_ip_address));
-
- return (res);
-}
int totemudpu_token_target_set (
void *udpu_context,
const struct totem_ip_address *token_target)
{
struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context;
int res = 0;
memcpy (&instance->token_target, token_target,
sizeof (struct totem_ip_address));
instance->totemudpu_target_set_completed (instance->context);
return (res);
}
extern int totemudpu_recv_mcast_empty (
void *udpu_context)
{
struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context;
unsigned int res;
struct sockaddr_storage system_from;
struct msghdr msg_recv;
struct pollfd ufd;
int nfds;
int msg_processed = 0;
/*
* Receive datagram
*/
msg_recv.msg_name = &system_from;
msg_recv.msg_namelen = sizeof (struct sockaddr_storage);
msg_recv.msg_iov = &instance->totemudpu_iov_recv;
msg_recv.msg_iovlen = 1;
#ifdef HAVE_MSGHDR_CONTROL
msg_recv.msg_control = 0;
#endif
#ifdef HAVE_MSGHDR_CONTROLLEN
msg_recv.msg_controllen = 0;
#endif
#ifdef HAVE_MSGHDR_FLAGS
msg_recv.msg_flags = 0;
#endif
#ifdef HAVE_MSGHDR_ACCRIGHTS
msg_recv.msg_accrights = NULL;
#endif
#ifdef HAVE_MSGHDR_ACCRIGHTSLEN
msg_recv.msg_accrightslen = 0;
#endif
do {
ufd.fd = instance->token_socket;
ufd.events = POLLIN;
nfds = poll (&ufd, 1, 0);
if (nfds == 1 && ufd.revents & POLLIN) {
res = recvmsg (instance->token_socket, &msg_recv, MSG_NOSIGNAL | MSG_DONTWAIT);
if (res != -1) {
msg_processed = 1;
} else {
msg_processed = -1;
}
}
} while (nfds == 1);
return (msg_processed);
}
static int totemudpu_create_sending_socket(
void *udpu_context,
const struct totem_ip_address *member)
{
struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context;
int fd;
int res;
unsigned int sendbuf_size;
unsigned int optlen = sizeof (sendbuf_size);
struct sockaddr_storage sockaddr;
int addrlen;
fd = socket (member->family, SOCK_DGRAM, 0);
if (fd == -1) {
LOGSYS_PERROR (errno, instance->totemudpu_log_level_warning,
"Could not create socket for new member");
return (-1);
}
totemip_nosigpipe (fd);
res = fcntl (fd, F_SETFL, O_NONBLOCK);
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudpu_log_level_warning,
"Could not set non-blocking operation on token socket");
goto error_close_fd;
}
/*
* These sockets are used to send multicast messages, so their buffers
* should be large
*/
sendbuf_size = MCAST_SOCKET_BUFFER_SIZE;
res = setsockopt (fd, SOL_SOCKET, SO_SNDBUF,
&sendbuf_size, optlen);
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudpu_log_level_notice,
"Could not set sendbuf size");
/*
* Fail in setting sendbuf size is not fatal -> don't exit
*/
}
/*
* Bind to sending interface
*/
totemip_totemip_to_sockaddr_convert(&instance->my_id, 0, &sockaddr, &addrlen);
res = bind (fd, (struct sockaddr *)&sockaddr, addrlen);
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudpu_log_level_warning,
"bind token socket failed");
goto error_close_fd;
}
return (fd);
error_close_fd:
close(fd);
return (-1);
}
int totemudpu_member_add (
void *udpu_context,
- const struct totem_ip_address *member)
+ const struct totem_ip_address *local,
+ const struct totem_ip_address *member,
+ int ring_no)
{
struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context;
struct totemudpu_member *new_member;
new_member = malloc (sizeof (struct totemudpu_member));
if (new_member == NULL) {
return (-1);
}
memset(new_member, 0, sizeof(*new_member));
log_printf (LOGSYS_LEVEL_NOTICE, "adding new UDPU member {%s}",
totemip_print(member));
list_init (&new_member->list);
list_add_tail (&new_member->list, &instance->member_list);
memcpy (&new_member->member, member, sizeof (struct totem_ip_address));
new_member->fd = totemudpu_create_sending_socket(udpu_context, member);
- new_member->active = 0;
+ new_member->active = 1;
return (0);
}
int totemudpu_member_remove (
void *udpu_context,
- const struct totem_ip_address *token_target)
+ const struct totem_ip_address *token_target,
+ int ring_no)
{
int found = 0;
struct list_head *list;
struct totemudpu_member *member;
struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context;
/*
* Find the member to remove and close its socket
*/
for (list = instance->member_list.next;
list != &instance->member_list;
list = list->next) {
member = list_entry (list,
struct totemudpu_member,
list);
if (totemip_compare (token_target, &member->member)==0) {
log_printf(LOGSYS_LEVEL_NOTICE,
"removing UDPU member {%s}",
totemip_print(&member->member));
if (member->fd > 0) {
log_printf(LOGSYS_LEVEL_DEBUG,
"Closing socket to: {%s}",
totemip_print(&member->member));
qb_loop_poll_del (instance->totemudpu_poll_handle,
member->fd);
close (member->fd);
}
found = 1;
break;
}
}
/*
* Delete the member from the list
*/
if (found) {
list_del (list);
}
instance = NULL;
return (0);
}
int totemudpu_member_list_rebind_ip (
void *udpu_context)
{
struct list_head *list;
struct totemudpu_member *member;
struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context;
for (list = instance->member_list.next;
list != &instance->member_list;
list = list->next) {
member = list_entry (list,
struct totemudpu_member,
list);
if (member->fd > 0) {
close (member->fd);
}
member->fd = totemudpu_create_sending_socket(udpu_context, &member->member);
}
return (0);
}
-int totemudpu_member_set_active (
- void *udpu_context,
- const struct totem_ip_address *member_ip,
- int active)
-{
- struct list_head *list;
- struct totemudpu_member *member;
- int addr_found = 0;
-
- struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context;
-
- /*
- * Find the member to set active flag
- */
- for (list = instance->member_list.next; list != &instance->member_list; list = list->next) {
- member = list_entry (list, struct totemudpu_member, list);
-
- if (totemip_compare (member_ip, &member->member) == 0) {
- log_printf(LOGSYS_LEVEL_DEBUG,
- "Marking UDPU member %s %s",
- totemip_print(&member->member),
- (active ? "active" : "inactive"));
-
- member->active = active;
- addr_found = 1;
-
- break;
- }
- }
-
- if (!addr_found) {
- log_printf(LOGSYS_LEVEL_DEBUG,
- "Can't find UDPU member %s (should be marked as %s)",
- totemip_print(member_ip),
- (active ? "active" : "inactive"));
- }
-
- return (0);
-}
static void timer_function_merge_detect_timeout (
void *data)
{
struct totemudpu_instance *instance = (struct totemudpu_instance *)data;
if (instance->merge_detect_messages_sent_before_timeout == 0) {
instance->send_merge_detect_message = 1;
}
instance->merge_detect_messages_sent_before_timeout = 0;
totemudpu_start_merge_detect_timeout(instance);
}
static void totemudpu_start_merge_detect_timeout(
void *udpu_context)
{
struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context;
qb_loop_timer_add(instance->totemudpu_poll_handle,
QB_LOOP_MED,
instance->totem_config->merge_timeout * 2 * QB_TIME_NS_IN_MSEC,
(void *)instance,
timer_function_merge_detect_timeout,
&instance->timer_merge_detect_timeout);
}
static void totemudpu_stop_merge_detect_timeout(
void *udpu_context)
{
struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context;
qb_loop_timer_del(instance->totemudpu_poll_handle,
instance->timer_merge_detect_timeout);
}
diff --git a/exec/totemudpu.h b/exec/totemudpu.h
index 51cd3233..afce4c2d 100644
--- a/exec/totemudpu.h
+++ b/exec/totemudpu.h
@@ -1,131 +1,131 @@
/*
* Copyright (c) 2005 MontaVista Software, Inc.
* Copyright (c) 2006-2011 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Steven Dake (sdake@redhat.com)
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef TOTEMUDPU_H_DEFINED
#define TOTEMUDPU_H_DEFINED
#include <sys/types.h>
#include <sys/socket.h>
#include <qb/qbloop.h>
#include <corosync/totem/totem.h>
/**
* Create an instance
*/
extern int totemudpu_initialize (
qb_loop_t *poll_handle,
void **udpu_context,
struct totem_config *totem_config,
totemsrp_stats_t *stats,
- int interface_no,
void *context,
void (*deliver_fn) (
void *context,
const void *msg,
unsigned int msg_len),
void (*iface_change_fn) (
void *context,
- const struct totem_ip_address *iface_address),
+ const struct totem_ip_address *iface_address,
+ unsigned int ring_no),
+
+ void (*mtu_changed) (
+ void *context,
+ int net_mtu),
void (*target_set_completed) (
void *context));
extern void *totemudpu_buffer_alloc (void);
extern void totemudpu_buffer_release (void *ptr);
extern int totemudpu_processor_count_set (
void *udpu_context,
int processor_count);
extern int totemudpu_token_send (
void *udpu_context,
const void *msg,
unsigned int msg_len);
extern int totemudpu_mcast_flush_send (
void *udpu_context,
const void *msg,
unsigned int msg_len);
extern int totemudpu_mcast_noflush_send (
void *udpu_context,
const void *msg,
unsigned int msg_len);
+extern int totemudpu_ifaces_get (void *net_context,
+ char ***status,
+ unsigned int *iface_count);
+
extern int totemudpu_recv_flush (void *udpu_context);
extern int totemudpu_send_flush (void *udpu_context);
extern int totemudpu_iface_check (void *udpu_context);
extern int totemudpu_finalize (void *udpu_context);
extern void totemudpu_net_mtu_adjust (void *udpu_context, struct totem_config *totem_config);
-extern const char *totemudpu_iface_print (void *udpu_context);
-
-extern int totemudpu_iface_get (
- void *udpu_context,
- struct totem_ip_address *addr);
-
extern int totemudpu_token_target_set (
void *udpu_context,
const struct totem_ip_address *token_target);
extern int totemudpu_crypto_set (
void *udpu_context,
const char *cipher_type,
const char *hash_type);
extern int totemudpu_recv_mcast_empty (
void *udpu_context);
extern int totemudpu_member_add (
void *udpu_context,
- const struct totem_ip_address *member);
+ const struct totem_ip_address *local,
+ const struct totem_ip_address *member,
+ int ring_no);
extern int totemudpu_member_remove (
void *udpu_context,
- const struct totem_ip_address *member);
-
-extern int totemudpu_member_set_active (
- void *udpu_context,
- const struct totem_ip_address *member_ip,
- int active);
+ const struct totem_ip_address *member,
+ int ring_no);
#endif /* TOTEMUDPU_H_DEFINED */
diff --git a/include/corosync/coroapi.h b/include/corosync/coroapi.h
index 7e1f27cb..ff02a54f 100644
--- a/include/corosync/coroapi.h
+++ b/include/corosync/coroapi.h
@@ -1,527 +1,527 @@
/*
* Copyright (c) 2008-2012 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Steven Dake (sdake@redhat.com)
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef COROAPI_H_DEFINED
#define COROAPI_H_DEFINED
#include <config.h>
#include <stdio.h>
#ifdef HAVE_SYS_UIO_H
#include <sys/uio.h>
#endif
#include <corosync/hdb.h>
#include <qb/qbloop.h>
#include <corosync/swab.h>
/**
* @brief The mar_message_source_t struct
*/
typedef struct {
uint32_t nodeid __attribute__((aligned(8)));
void *conn __attribute__((aligned(8)));
} mar_message_source_t __attribute__((aligned(8)));
/**
* @brief swab_mar_message_source_t
* @param to_swab
*/
static inline void swab_mar_message_source_t (mar_message_source_t *to_swab)
{
swab32 (to_swab->nodeid);
/*
* if it is from a byteswapped machine, then we can safely
* ignore its conn info data structure since this is only
* local to the machine
*/
to_swab->conn = NULL;
}
#ifndef TIMER_HANDLE_T
/**
* @brief corosync_timer_handle_t
*/
typedef qb_loop_timer_handle corosync_timer_handle_t;
#define TIMER_HANDLE_T 1
#endif
/**
* @brief The corosync_tpg_group struct
*/
struct corosync_tpg_group {
const void *group;
size_t group_len;
};
#define TOTEMIP_ADDRLEN (sizeof(struct in6_addr))
-#define INTERFACE_MAX 2
+#define INTERFACE_MAX 8
#ifndef MESSAGE_QUEUE_MAX
#ifdef HAVE_SMALL_MEMORY_FOOTPRINT
#define PROCESSOR_COUNT_MAX 16
#define MESSAGE_SIZE_MAX 1024*64
#define MESSAGE_QUEUE_MAX 512
#else
#define PROCESSOR_COUNT_MAX 384
#define MESSAGE_SIZE_MAX 1024*1024
#define MESSAGE_QUEUE_MAX ((4 * MESSAGE_SIZE_MAX) / totem_config->net_mtu)
#endif /* HAVE_SMALL_MEMORY_FOOTPRINT */
#endif /* MESSAGE_QUEUE_MAX */
#define TOTEM_AGREED 0
#define TOTEM_SAFE 1
#define MILLI_2_NANO_SECONDS 1000000ULL
#if !defined(TOTEM_IP_ADDRESS)
/**
* @brief The totem_ip_address struct
*/
struct totem_ip_address {
unsigned int nodeid;
unsigned short family;
unsigned char addr[TOTEMIP_ADDRLEN];
} __attribute__((packed));
#endif
#if !defined(MEMB_RING_ID)
/**
* @brief The memb_ring_id struct
*/
struct memb_ring_id {
struct totem_ip_address rep;
unsigned long long seq;
} __attribute__((packed));
#endif
#if !defined(TOTEM_CONFIGURATION_TYPE)
/**
* @brief The totem_configuration_type enum
*/
enum totem_configuration_type {
TOTEM_CONFIGURATION_REGULAR,
TOTEM_CONFIGURATION_TRANSITIONAL
};
#endif
#if !defined(TOTEM_CALLBACK_TOKEN_TYPE)
/**
* @brief The totem_callback_token_type enum
*/
enum totem_callback_token_type {
TOTEM_CALLBACK_TOKEN_RECEIVED = 1,
TOTEM_CALLBACK_TOKEN_SENT = 2
};
#endif
/**
* @brief The cs_lib_flow_control enum
*/
enum cs_lib_flow_control {
CS_LIB_FLOW_CONTROL_REQUIRED = 1,
CS_LIB_FLOW_CONTROL_NOT_REQUIRED = 2
};
#define corosync_lib_flow_control cs_lib_flow_control
#define COROSYNC_LIB_FLOW_CONTROL_REQUIRED CS_LIB_FLOW_CONTROL_REQUIRED
#define COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED CS_LIB_FLOW_CONTROL_NOT_REQUIRED
/**
* @brief The cs_lib_allow_inquorate enum
*/
enum cs_lib_allow_inquorate {
CS_LIB_DISALLOW_INQUORATE = 0, /* default */
CS_LIB_ALLOW_INQUORATE = 1
};
#if !defined (COROSYNC_FLOW_CONTROL_STATE)
/**
* @brief The cs_flow_control_state enum
*/
enum cs_flow_control_state {
CS_FLOW_CONTROL_STATE_DISABLED,
CS_FLOW_CONTROL_STATE_ENABLED
};
#define corosync_flow_control_state cs_flow_control_state
#define CS_FLOW_CONTROL_STATE_DISABLED CS_FLOW_CONTROL_STATE_DISABLED
#define CS_FLOW_CONTROL_STATE_ENABLED CS_FLOW_CONTROL_STATE_ENABLED
#endif /* COROSYNC_FLOW_CONTROL_STATE */
/**
* @brief The cs_fatal_error_t enum.
*/
typedef enum {
COROSYNC_FATAL_ERROR_EXIT = -1,
COROSYNC_LIBAIS_SOCKET = -6,
COROSYNC_LIBAIS_BIND = -7,
COROSYNC_READKEY = -8,
COROSYNC_INVALID_CONFIG = -9,
COROSYNC_DYNAMICLOAD = -12,
COROSYNC_OUT_OF_MEMORY = -15,
COROSYNC_FATAL_ERR = -16
} cs_fatal_error_t;
#define corosync_fatal_error_t cs_fatal_error_t;
#ifndef QUORUM_H_DEFINED
/**
*@brief The quorum_callback_fn_t callback
*/
typedef void (*quorum_callback_fn_t) (int quorate, void *context);
/**
* @brief The quorum_callin_functions struct
*/
struct quorum_callin_functions {
int (*quorate) (void);
int (*register_callback) (quorum_callback_fn_t callback_fn, void *contexxt);
int (*unregister_callback) (quorum_callback_fn_t callback_fn, void *context);
};
/**
* @brief The sync_callback_fn_t callback
*/
typedef void (*sync_callback_fn_t) (
const unsigned int *view_list,
size_t view_list_entries,
int primary_designated,
struct memb_ring_id *ring_id);
#endif /* QUORUM_H_DEFINED */
/**
* @brief The corosync_api_v1 struct
*/
struct corosync_api_v1 {
/*
* Time and timer APIs
*/
int (*timer_add_duration) (
unsigned long long nanoseconds_in_future,
void *data,
void (*timer_nf) (void *data),
corosync_timer_handle_t *handle);
int (*timer_add_absolute) (
unsigned long long nanoseconds_from_epoch,
void *data,
void (*timer_fn) (void *data),
corosync_timer_handle_t *handle);
void (*timer_delete) (
corosync_timer_handle_t timer_handle);
unsigned long long (*timer_time_get) (void);
unsigned long long (*timer_expire_time_get) (
corosync_timer_handle_t timer_handle);
/*
* IPC APIs
*/
void (*ipc_source_set) (mar_message_source_t *source, void *conn);
int (*ipc_source_is_local) (const mar_message_source_t *source);
void *(*ipc_private_data_get) (void *conn);
int (*ipc_response_send) (void *conn, const void *msg, size_t mlen);
int (*ipc_response_iov_send) (void *conn,
const struct iovec *iov, unsigned int iov_len);
int (*ipc_dispatch_send) (void *conn, const void *msg, size_t mlen);
int (*ipc_dispatch_iov_send) (void *conn,
const struct iovec *iov, unsigned int iov_len);
void (*ipc_refcnt_inc) (void *conn);
void (*ipc_refcnt_dec) (void *conn);
/*
* Totem APIs
*/
unsigned int (*totem_nodeid_get) (void);
int (*totem_family_get) (void);
int (*totem_ring_reenable) (void);
int (*totem_mcast) (const struct iovec *iovec,
unsigned int iov_len, unsigned int guarantee);
int (*totem_ifaces_get) (
unsigned int nodeid,
struct totem_ip_address *interfaces,
unsigned int interfaces_size,
char ***status,
unsigned int *iface_count);
const char *(*totem_ifaces_print) (unsigned int nodeid);
const char *(*totem_ip_print) (const struct totem_ip_address *addr);
int (*totem_crypto_set) (const char *cipher_type, const char *hash_type);
int (*totem_callback_token_create) (
void **handle_out,
enum totem_callback_token_type type,
int delete,
int (*callback_fn) (enum totem_callback_token_type type,
const void *),
const void *data);
/*
* Totem open process groups API for those service engines
* wanting their own groups
*/
int (*tpg_init) (
void **instance,
void (*deliver_fn) (
unsigned int nodeid,
const void *msg,
unsigned int msg_len,
int endian_conversion_required),
void (*confchg_fn) (
enum totem_configuration_type configuration_type,
const unsigned int *member_list,
size_t member_list_entries,
const unsigned int *left_list,
size_t left_list_entries,
const unsigned int *joined_list,
size_t joined_list_entries,
const struct memb_ring_id *ring_id));
int (*tpg_exit) (
void *instance);
int (*tpg_join) (
void *instance,
const struct corosync_tpg_group *groups,
size_t group_cnt);
int (*tpg_leave) (
void *instance,
const struct corosync_tpg_group *groups,
size_t group_cnt);
int (*tpg_joined_mcast) (
void *totempg_groups_instance,
const struct iovec *iovec,
unsigned int iov_len,
int guarantee);
int (*tpg_joined_reserve) (
void *totempg_groups_instance,
const struct iovec *iovec,
unsigned int iov_len);
int (*tpg_joined_release) (
int reserved_msgs);
int (*tpg_groups_mcast) (
void *instance,
int guarantee,
const struct corosync_tpg_group *groups,
size_t groups_cnt,
const struct iovec *iovec,
unsigned int iov_len);
int (*tpg_groups_reserve) (
void *instance,
const struct corosync_tpg_group *groups,
size_t groups_cnt,
const struct iovec *iovec,
unsigned int iov_len);
int (*tpg_groups_release) (
int reserved_msgs);
int (*schedwrk_create) (
hdb_handle_t *handle,
int (schedwrk_fn) (const void *),
const void *context);
void (*schedwrk_destroy) (hdb_handle_t handle);
int (*sync_request) (
const char *service_name);
/*
* User plugin-callable functions for quorum
*/
int (*quorum_is_quorate) (void);
int (*quorum_register_callback) (quorum_callback_fn_t callback_fn, void *context);
int (*quorum_unregister_callback) (quorum_callback_fn_t callback_fn, void *context);
/*
* This one is for the quorum management plugin's use
*/
int (*quorum_initialize)(struct quorum_callin_functions *fns);
/*
* Plugin loading and unloading
*/
int (*plugin_interface_reference) (
hdb_handle_t *handle,
const char *iface_name,
int version,
void **interface,
void *context);
int (*plugin_interface_release) (hdb_handle_t handle);
/*
* Service loading and unloading APIs
*/
unsigned int (*service_link_and_init) (
struct corosync_api_v1 *corosync_api_v1,
const char *service_name,
unsigned int service_ver);
unsigned int (*service_unlink_and_exit) (
struct corosync_api_v1 *corosync_api_v1,
const char *service_name,
unsigned int service_ver);
/*
* Error handling APIs
*/
void (*error_memory_failure) (void) __attribute__ ((noreturn));
#define corosync_fatal_error(err) api->fatal_error ((err), __FILE__, __LINE__)
void (*fatal_error) (cs_fatal_error_t err,
const char *file,
unsigned int line) __attribute__ ((noreturn));
void (*shutdown_request) (void);
void (*state_dump) (void);
qb_loop_t *(*poll_handle_get) (void);
void *(*totem_get_stats)(void);
int (*schedwrk_create_nolock) (
hdb_handle_t *handle,
int (schedwrk_fn) (const void *),
const void *context);
int (*poll_dispatch_add) (qb_loop_t * handle,
int fd,
int events,
void *data,
int (*dispatch_fn) (int fd,
int revents,
void *data));
int (*poll_dispatch_delete) (
qb_loop_t * handle,
int fd);
};
#define SERVICE_ID_MAKE(a,b) ( ((a)<<16) | (b) )
#define SERVICE_HANDLER_MAXIMUM_COUNT 64
#define SERVICES_COUNT_MAX 64
/**
* @brief The corosync_lib_handler struct
*/
struct corosync_lib_handler {
void (*lib_handler_fn) (void *conn, const void *msg);
enum cs_lib_flow_control flow_control;
};
/**
* @brief The corosync_exec_handler struct
*/
struct corosync_exec_handler {
void (*exec_handler_fn) (const void *msg, unsigned int nodeid);
void (*exec_endian_convert_fn) (void *msg);
};
/**
* @brief The corosync_service_engine_iface_ver0 struct
*/
struct corosync_service_engine_iface_ver0 {
struct corosync_service_engine *(*corosync_get_service_engine_ver0) (void);
};
/**
* @brief The corosync_service_engine struct
*/
struct corosync_service_engine {
const char *name;
unsigned short id;
unsigned short priority; /* Lower priority are loaded first, unloaded last.
* 0 is a special case which always loaded _and_ unloaded last
*/
unsigned int private_data_size;
enum cs_lib_flow_control flow_control;
enum cs_lib_allow_inquorate allow_inquorate;
char *(*exec_init_fn) (struct corosync_api_v1 *);
int (*exec_exit_fn) (void);
void (*exec_dump_fn) (void);
int (*lib_init_fn) (void *conn);
int (*lib_exit_fn) (void *conn);
struct corosync_lib_handler *lib_engine;
int lib_engine_count;
struct corosync_exec_handler *exec_engine;
int exec_engine_count;
int (*config_init_fn) (struct corosync_api_v1 *);
void (*confchg_fn) (
enum totem_configuration_type configuration_type,
const unsigned int *member_list, size_t member_list_entries,
const unsigned int *left_list, size_t left_list_entries,
const unsigned int *joined_list, size_t joined_list_entries,
const struct memb_ring_id *ring_id);
void (*sync_init) (
const unsigned int *trans_list,
size_t trans_list_entries,
const unsigned int *member_list,
size_t member_list_entries,
const struct memb_ring_id *ring_id);
int (*sync_process) (void);
void (*sync_activate) (void);
void (*sync_abort) (void);
};
#endif /* COROAPI_H_DEFINED */
diff --git a/include/corosync/totem/totem.h b/include/corosync/totem/totem.h
index 7f9fb0f9..f1b88d40 100644
--- a/include/corosync/totem/totem.h
+++ b/include/corosync/totem/totem.h
@@ -1,293 +1,281 @@
/*
* Copyright (c) 2005 MontaVista Software, Inc.
* Copyright (c) 2006-2012 Red Hat, Inc.
*
* Author: Steven Dake (sdake@redhat.com)
*
* All rights reserved.
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef TOTEM_H_DEFINED
#define TOTEM_H_DEFINED
#include "totemip.h"
#include <corosync/hdb.h>
#ifdef HAVE_SMALL_MEMORY_FOOTPRINT
#define PROCESSOR_COUNT_MAX 16
#define MESSAGE_SIZE_MAX 1024*64
#define MESSAGE_QUEUE_MAX 512
#else
#define PROCESSOR_COUNT_MAX 384
#define MESSAGE_SIZE_MAX 1024*1024 /* (1MB) */
#define MESSAGE_QUEUE_MAX ((4 * MESSAGE_SIZE_MAX) / totem_config->net_mtu)
#endif /* HAVE_SMALL_MEMORY_FOOTPRINT */
#define FRAME_SIZE_MAX 10000
#define TRANSMITS_ALLOWED 16
#define SEND_THREADS_MAX 16
-#define INTERFACE_MAX 2
+/* This must be <= KNET_MAX_LINK */
+#define INTERFACE_MAX 8
/**
* Maximum number of continuous gather states
*/
#define MAX_NO_CONT_GATHER 3
/*
* Maximum number of continuous failures get from sendmsg call
*/
#define MAX_NO_CONT_SENDMSG_FAILURES 30
struct totem_interface {
struct totem_ip_address bindnet;
struct totem_ip_address boundto;
struct totem_ip_address mcast_addr;
uint16_t ip_port;
uint16_t ttl;
int member_count;
+ int knet_link_priority;
+ int knet_ping_interval;
+ int knet_ping_timeout;
+ int knet_ping_precision;
struct totem_ip_address member_list[PROCESSOR_COUNT_MAX];
};
struct totem_logging_configuration {
void (*log_printf) (
int level,
int subsys,
const char *function_name,
const char *file_name,
int file_line,
const char *format,
...) __attribute__((format(printf, 6, 7)));
int log_level_security;
int log_level_error;
int log_level_warning;
int log_level_notice;
int log_level_debug;
int log_level_trace;
int log_subsys_id;
};
-enum { TOTEM_PRIVATE_KEY_LEN = 128 };
-enum { TOTEM_RRP_MODE_BYTES = 64 };
+struct totem_message_header {
+ char type;
+ char encapsulated;
+ unsigned short endian_detector;
+ unsigned int nodeid;
+ unsigned int target_nodeid;
+} __attribute__((packed));
+
+enum { TOTEM_PRIVATE_KEY_LEN = 4096 };
+enum { TOTEM_LINK_MODE_BYTES = 64 };
typedef enum {
TOTEM_TRANSPORT_UDP = 0,
TOTEM_TRANSPORT_UDPU = 1,
- TOTEM_TRANSPORT_RDMA = 2
+ TOTEM_TRANSPORT_KNET = 2
} totem_transport_t;
#define MEMB_RING_ID
struct memb_ring_id {
struct totem_ip_address rep;
unsigned long long seq;
} __attribute__((packed));
struct totem_config {
int version;
/*
* network
*/
struct totem_interface *interfaces;
unsigned int interface_count;
unsigned int node_id;
unsigned int clear_node_high_bit;
/*
* key information
*/
unsigned char private_key[TOTEM_PRIVATE_KEY_LEN];
unsigned int private_key_len;
/*
* Totem configuration parameters
*/
unsigned int token_timeout;
unsigned int token_retransmit_timeout;
unsigned int token_hold_timeout;
unsigned int token_retransmits_before_loss_const;
unsigned int join_timeout;
unsigned int send_join_timeout;
unsigned int consensus_timeout;
unsigned int merge_timeout;
unsigned int downcheck_timeout;
unsigned int fail_to_recv_const;
unsigned int seqno_unchanged_const;
- unsigned int rrp_token_expired_timeout;
-
- unsigned int rrp_problem_count_timeout;
-
- unsigned int rrp_problem_count_threshold;
-
- unsigned int rrp_problem_count_mcast_threshold;
-
- unsigned int rrp_autorecovery_check_timeout;
-
- char rrp_mode[TOTEM_RRP_MODE_BYTES];
+ char link_mode[TOTEM_LINK_MODE_BYTES];
struct totem_logging_configuration totem_logging_configuration;
unsigned int net_mtu;
unsigned int threads;
unsigned int heartbeat_failures_allowed;
unsigned int max_network_delay;
unsigned int window_size;
unsigned int max_messages;
const char *vsf_type;
unsigned int broadcast_use;
char *crypto_cipher_type;
char *crypto_hash_type;
totem_transport_t transport_number;
unsigned int miss_count_const;
int ip_version;
void (*totem_memb_ring_id_create_or_load) (
struct memb_ring_id *memb_ring_id,
const struct totem_ip_address *addr);
void (*totem_memb_ring_id_store) (
const struct memb_ring_id *memb_ring_id,
const struct totem_ip_address *addr);
};
#define TOTEM_CONFIGURATION_TYPE
enum totem_configuration_type {
TOTEM_CONFIGURATION_REGULAR,
TOTEM_CONFIGURATION_TRANSITIONAL
};
#define TOTEM_CALLBACK_TOKEN_TYPE
enum totem_callback_token_type {
TOTEM_CALLBACK_TOKEN_RECEIVED = 1,
TOTEM_CALLBACK_TOKEN_SENT = 2
};
enum totem_event_type {
TOTEM_EVENT_DELIVERY_CONGESTED,
TOTEM_EVENT_NEW_MSG,
};
typedef struct {
int is_dirty;
time_t last_updated;
} totem_stats_header_t;
typedef struct {
totem_stats_header_t hdr;
uint32_t iface_changes;
} totemnet_stats_t;
-typedef struct {
- totem_stats_header_t hdr;
- totemnet_stats_t *net;
- char *algo_name;
- uint8_t *faulty;
- uint32_t interface_count;
-} totemrrp_stats_t;
-
-
typedef struct {
uint32_t rx;
uint32_t tx;
int backlog_calc;
} totemsrp_token_stats_t;
typedef struct {
totem_stats_header_t hdr;
- totemrrp_stats_t *rrp;
uint64_t orf_token_tx;
uint64_t orf_token_rx;
uint64_t memb_merge_detect_tx;
uint64_t memb_merge_detect_rx;
uint64_t memb_join_tx;
uint64_t memb_join_rx;
uint64_t mcast_tx;
uint64_t mcast_retx;
uint64_t mcast_rx;
uint64_t memb_commit_token_tx;
uint64_t memb_commit_token_rx;
uint64_t token_hold_cancel_tx;
uint64_t token_hold_cancel_rx;
uint64_t operational_entered;
uint64_t operational_token_lost;
uint64_t gather_entered;
uint64_t gather_token_lost;
uint64_t commit_entered;
uint64_t commit_token_lost;
uint64_t recovery_entered;
uint64_t recovery_token_lost;
uint64_t consensus_timeouts;
uint64_t rx_msg_dropped;
uint32_t continuous_gather;
uint32_t continuous_sendmsg_failures;
int earliest_token;
int latest_token;
#define TOTEM_TOKEN_STATS_MAX 100
totemsrp_token_stats_t token[TOTEM_TOKEN_STATS_MAX];
} totemsrp_stats_t;
#define TOTEM_CONFIGURATION_TYPE
typedef struct {
totem_stats_header_t hdr;
totemsrp_stats_t *srp;
-} totemmrp_stats_t;
-
-typedef struct {
- totem_stats_header_t hdr;
- totemmrp_stats_t *mrp;
uint32_t msg_reserved;
uint32_t msg_queue_avail;
} totempg_stats_t;
#endif /* TOTEM_H_DEFINED */
diff --git a/man/corosync.conf.5 b/man/corosync.conf.5
index 00a7a382..6df81fe7 100644
--- a/man/corosync.conf.5
+++ b/man/corosync.conf.5
@@ -1,693 +1,644 @@
.\"/*
.\" * Copyright (c) 2005 MontaVista Software, Inc.
.\" * Copyright (c) 2006-2012 Red Hat, Inc.
.\" *
.\" * All rights reserved.
.\" *
.\" * Author: Steven Dake (sdake@redhat.com)
.\" *
.\" * This software licensed under BSD license, the text of which follows:
.\" *
.\" * Redistribution and use in source and binary forms, with or without
.\" * modification, are permitted provided that the following conditions are met:
.\" *
.\" * - Redistributions of source code must retain the above copyright notice,
.\" * this list of conditions and the following disclaimer.
.\" * - Redistributions in binary form must reproduce the above copyright notice,
.\" * this list of conditions and the following disclaimer in the documentation
.\" * and/or other materials provided with the distribution.
.\" * - Neither the name of the MontaVista Software, Inc. nor the names of its
.\" * contributors may be used to endorse or promote products derived from this
.\" * software without specific prior written permission.
.\" *
.\" * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
.\" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
.\" * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
.\" * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
.\" * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
.\" * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
.\" * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
.\" * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
.\" * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
.\" * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
.\" * THE POSSIBILITY OF SUCH DAMAGE.
.\" */
.TH COROSYNC_CONF 5 2012-10-10 "corosync Man Page" "Corosync Cluster Engine Programmer's Manual"
.SH NAME
corosync.conf - corosync executive configuration file
.SH SYNOPSIS
/etc/corosync/corosync.conf
.SH DESCRIPTION
The corosync.conf instructs the corosync executive about various parameters
needed to control the corosync executive. Empty lines and lines starting with
# character are ignored. The configuration file consists of bracketed top level
directives. The possible directive choices are:
.TP
totem { }
This top level directive contains configuration options for the totem protocol.
.TP
logging { }
This top level directive contains configuration options for logging.
.TP
quorum { }
This top level directive contains configuration options for quorum.
.TP
nodelist { }
This top level directive contains configuration options for nodes in cluster.
.TP
qb { }
This top level directive contains configuration options related to libqb.
.PP
-.PP
-Within the
-.B totem
-directive, an interface directive is required. There is also one configuration
-option which is required:
-.PP
-.PP
-Within the
-.B interface
-sub-directive of totem there are four parameters which are required. There is
-one parameter which is optional.
+The
+.B interface sub-directive of totem is optional for UDP and knet transports.
+
+For knet, multiple interface subsections define parameters for each knet link on the
+system.
+
+For UDP, there should be just one interface section that defines the multicast or
+broadcast options for the link.
+
+For UDPU an interface section is not needed and it is recommended that the nodelist
+is used to define cluster nodes.
+
+.TP
+linknumber
+This specifies the link number for the interface. When using the knet
+protocol, each interface should specify separate link numbers to uniquely
+identify to the membership protocol which interface to use for which link.
+The linknumber must start at 0. For UDP the only supported linknumber is 0.
.TP
-ringnumber
-This specifies the ring number for the interface. When using the redundant
-ring protocol, each interface should specify separate ring numbers to uniquely
-identify to the membership protocol which interface to use for which redundant
-ring. The ringnumber must start at 0.
+knet_link_priority
+This specifies the priority for the link when knet is used in 'passive'
+mode. (see link_mode below)
.TP
-bindnetaddr
+knet_ping_interval
+This specifies the interval between knet link pings.
+(default 1000 ms)
+
+.TP
+knet_ping_timeout
+If no ping is received within this time, the knet link is declared dead.
+(default 2000 ms)
+
+.TP
+knet_ping_precision
+How many values of latency are used to calculate
+the average link latency. (default 2048 samples)
+
+.TP
+bindnetaddr (udp only)
This specifies the network address the corosync executive should bind
-to.
+to when using udp.
-bindnetaddr should be an IP address configured on the system, or a network
+bindnetaddr (udp only)
+should be an IP address configured on the system, or a network
address.
For example, if the local interface is 192.168.5.92 with netmask
255.255.255.0, you should set bindnetaddr to 192.168.5.92 or 192.168.5.0.
If the local interface is 192.168.5.92 with netmask 255.255.255.192,
set bindnetaddr to 192.168.5.92 or 192.168.5.64, and so forth.
This may also be an IPV6 address, in which case IPV6 networking will be used.
In this case, the exact address must be specified and there is no automatic
selection of the network interface within a specific subnet as with IPv4.
If IPv6 networking is used, the nodeid field in nodelist must be specified.
.TP
-broadcast
+broadcast (udp only)
This is optional and can be set to yes. If it is set to yes, the broadcast
address will be used for communication. If this option is set, mcastaddr
should not be set.
.TP
-mcastaddr
+mcastaddr (udp only)
This is the multicast address used by corosync executive. The default
should work for most networks, but the network administrator should be queried
about a multicast address to use. Avoid 224.x.x.x because this is a "config"
multicast address.
This may also be an IPV6 multicast address, in which case IPV6 networking
will be used. If IPv6 networking is used, the nodeid field in nodelist must
be specified.
-It's not needed to use this option if cluster_name option is used. If both options
+It's not necessary to use this option if cluster_name option is used. If both options
are used, mcastaddr has higher priority.
.TP
-mcastport
+mcastport (udp only)
This specifies the UDP port number. It is possible to use the same multicast
address on a network with the corosync services configured for different
UDP ports.
Please note corosync uses two UDP ports mcastport (for mcast receives) and
mcastport - 1 (for mcast sends).
If you have multiple clusters on the same network using the same mcastaddr
please configure the mcastports with a gap.
.TP
-ttl
+ttl (udp only)
This specifies the Time To Live (TTL). If you run your cluster on a routed
network then the default of "1" will be too small. This option provides
a way to increase this up to 255. The valid range is 0..255.
-Note that this is only valid on multicast transport types.
.PP
.PP
Within the
.B totem
directive, there are seven configuration options of which one is required,
five are optional, and one is required when IPV6 is configured in the interface
subdirective. The required directive controls the version of the totem
configuration. The optional option unless using IPV6 directive controls
identification of the processor. The optional options control secrecy and
-authentication, the redundant ring mode of operation and maximum network MTU
+authentication, the network mode of operation and maximum network MTU
field.
.TP
version
This specifies the version of the configuration file. Currently the only
valid version for this directive is 2.
.PP
clear_node_high_bit
This configuration option is optional and is only relevant when no nodeid is
specified. Some corosync clients require a signed 32 bit nodeid that is greater
than zero however by default corosync uses all 32 bits of the IPv4 address space
when generating a nodeid. Set this option to yes to force the high bit to be
zero and therefor ensure the nodeid is a positive signed 32 bit integer.
WARNING: The clusters behavior is undefined if this option is enabled on only
a subset of the cluster (for example during a rolling upgrade).
.TP
crypto_hash
This specifies which HMAC authentication should be used to authenticate all
messages. Valid values are none (no authentication), md5, sha1, sha256,
-sha384 and sha512.
+sha384 and sha512. Encrypted transmission is only supported for
+the knet transport.
The default is sha1.
.TP
crypto_cipher
This specifies which cipher should be used to encrypt all messages.
Valid values are none (no encryption), aes256, aes192, aes128 and 3des.
-Enabling crypto_cipher, requires also enabling of crypto_hash.
+Enabling crypto_cipher, requires also enabling of crypto_hash. Encrypted
+transmission is only supported for the knet transport.
The default is aes256.
.TP
-secauth
-This specifies that HMAC/SHA1 authentication should be used to authenticate
-all messages. It further specifies that all data should be encrypted with the
-nss library and aes256 encryption algorithm to protect data from eavesdropping.
-
-Enabling this option adds a encryption header to every message sent by totem which
-reduces total throughput. Also encryption and authentication consume extra CPU
-cycles in corosync.
-
-The default is on.
+link_mode
+This specifies the Kronosnet mode, which may be passive, active, or
+rr (round-robin).
+.B passive:
+the active link with the lowest priority will be used. If one or more
+links share the same priority the one with the lowest link ID will
+be used.
+.B active:
+All active links will be used simultaneously to send traffic.
+link priority is ignored.
+.B rr:
+Round-Robin policy. Each packet will be sent to the next active link in
+order.
-WARNING: This parameter is deprecated. It's recomended to use combination of
-crypto_cipher and crypto_hash.
-
-.TP
-rrp_mode
-This specifies the mode of redundant ring, which may be none, active, or
-passive. Currently only 'passive' is supported or tested
-(using 'active' is not recommended). Active replication offers
-slightly lower latency from transmit to delivery in faulty network
-environments but with less performance.
-Passive replication may nearly double the speed of the totem protocol
-if the protocol doesn't become cpu bound. The final option is none, in
-which case only one network interface will be used to operate the totem
-protocol.
+If only one interface directive is specified, passive is automatically chosen.
-If only one interface directive is specified, none is automatically chosen.
-If multiple interface directives are specified, only active or passive may
-be chosen.
-
-The maximum number of interface directives that is allowed for either
-modes (active or passive) is 2.
+The maximum number of interface directives that is allowed with Kronosnet
+is 8. For other transports it is 1.
When using multiple interfaces, make sure to use different multicast
address/port (port for same address must differ by at least two) pair
-for each interface (this is checked by parser) to make rrp works.
+for each interface (this is checked by parser).
.TP
netmtu
This specifies the network maximum transmit unit. To set this value beyond
1500, the regular frame MTU, requires ethernet devices that support large, or
also called jumbo, frames. If any device in the network doesn't support large
frames, the protocol will not operate properly. The hosts must also have their
mtu size set from 1500 to whatever frame size is specified here.
Please note while some NICs or switches claim large frame support, they support
9000 MTU as the maximum frame size including the IP header. Setting the netmtu
and host MTUs to 9000 will cause totem to use the full 9000 bytes of the frame.
Then Linux will add a 18 byte header moving the full frame size to 9018. As a
result some hardware will not operate properly with this size of data. A netmtu
of 8982 seems to work for the few large frame devices that have been tested.
Some manufacturers claim large frame support when in fact they support frame
sizes of 4500 bytes.
When sending multicast traffic, if the network frequently reconfigures, chances are
that some device in the network doesn't support large frames.
Choose hardware carefully if intending to use large frame support.
The default is 1500.
.TP
transport
-This directive controls the transport mechanism used. If the interface to
-which corosync is binding is an RDMA interface such as RoCEE or Infiniband, the
-"iba" parameter may be specified. To avoid the use of multicast entirely, a
-unicast transport parameter "udpu" can be specified. This requires specifying
-the list of members in nodelist directive, that could potentially make up
-the membership before deployment.
-
-The default is udp. The transport type can also be set to udpu or iba.
+This directive controls the transport mechanism used.
+The default is knet. The transport type can also be set to udpu or udp.
+Only knet allows crypto or multiple interfaces per node.
.TP
cluster_name
This specifies the name of cluster and it's used for automatic generating
of multicast address.
.TP
config_version
This specifies version of config file. This is converted to unsigned 64-bit int.
By default it's 0. Option is used to prevent joining old nodes with not
up-to-date configuration. If value is not 0, and node is going for first time
(only for first time, join after split doesn't follow this rules)
from single-node membership to multiple nodes membership, other nodes
config_versions are collected. If current node config_version is not
equal to highest of collected versions, corosync is terminated.
.TP
ip_version
Specifies version of IP to use for communication. Value can be one of
ipv4 or ipv6. Default (if unspecified) is ipv4.
Within the
.B totem
directive, there are several configuration options which are used to control
the operation of the protocol. It is generally not recommended to change any
of these values without proper guidance and sufficient testing. Some networks
may require larger values if suffering from frequent reconfigurations. Some
applications may require faster failure detection times which can be achieved
by reducing the token timeout.
.TP
token
This timeout is used directly or as a base for real token timeout calculation (explained in
.B token_coefficient
section). Token timeout specifies in milliseconds until a token loss is declared after not
receiving a token. This is the time spent detecting a failure of a processor
in the current configuration. Reforming a new configuration takes about 50
milliseconds in addition to this timeout.
For real token timeout used by totem it's possible to read cmap value of
.B runtime.config.token
key.
The default is 1000 milliseconds.
.TP
token_coefficient
This value is used only when
.B nodelist
section is specified and contains at least 3 nodes. If so, real token timeout
is then computed as token + (number_of_nodes - 2) * token_coefficient.
This allows cluster to scale without manually changing token timeout
every time new node is added. This value can be set to 0 resulting
in effective removal of this feature.
The default is 650 milliseconds.
.TP
token_retransmit
This timeout specifies in milliseconds after how long before receiving a token
the token is retransmitted. This will be automatically calculated if token
is modified. It is not recommended to alter this value without guidance from
the corosync community.
The default is 238 milliseconds.
.TP
hold
This timeout specifies in milliseconds how long the token should be held by
the representative when the protocol is under low utilization. It is not
recommended to alter this value without guidance from the corosync community.
The default is 180 milliseconds.
.TP
token_retransmits_before_loss_const
This value identifies how many token retransmits should be attempted before
forming a new configuration. If this value is set, retransmit and hold will
be automatically calculated from retransmits_before_loss and token.
The default is 4 retransmissions.
.TP
join
This timeout specifies in milliseconds how long to wait for join messages in
the membership protocol.
The default is 50 milliseconds.
.TP
send_join
This timeout specifies in milliseconds an upper range between 0 and send_join
to wait before sending a join message. For configurations with less then
32 nodes, this parameter is not necessary. For larger rings, this parameter
is necessary to ensure the NIC is not overflowed with join messages on
formation of a new ring. A reasonable value for large rings (128 nodes) would
be 80msec. Other timer values must also change if this value is changed. Seek
advice from the corosync mailing list if trying to run larger configurations.
The default is 0 milliseconds.
.TP
consensus
This timeout specifies in milliseconds how long to wait for consensus to be
achieved before starting a new round of membership configuration. The minimum
value for consensus must be 1.2 * token. This value will be automatically
calculated at 1.2 * token if the user doesn't specify a consensus value.
For two node clusters, a consensus larger then the join timeout but less then
token is safe. For three node or larger clusters, consensus should be larger
then token. There is an increasing risk of odd membership changes, which stil
guarantee virtual synchrony, as node count grows if consensus is less than
token.
The default is 1200 milliseconds.
.TP
merge
This timeout specifies in milliseconds how long to wait before checking for
a partition when no multicast traffic is being sent. If multicast traffic
is being sent, the merge detection happens automatically as a function of
the protocol.
The default is 200 milliseconds.
.TP
downcheck
This timeout specifies in milliseconds how long to wait before checking
that a network interface is back up after it has been downed.
The default is 1000 millseconds.
.TP
fail_recv_const
This constant specifies how many rotations of the token without receiving any
of the messages when messages should be received may occur before a new
configuration is formed.
The default is 2500 failures to receive a message.
.TP
seqno_unchanged_const
This constant specifies how many rotations of the token without any multicast
traffic should occur before the hold timer is started.
The default is 30 rotations.
.TP
heartbeat_failures_allowed
[HeartBeating mechanism]
Configures the optional HeartBeating mechanism for faster failure detection. Keep in
mind that engaging this mechanism in lossy networks could cause faulty loss declaration
as the mechanism relies on the network for heartbeating.
So as a rule of thumb use this mechanism if you require improved failure in low to
medium utilized networks.
This constant specifies the number of heartbeat failures the system should tolerate
before declaring heartbeat failure e.g 3. Also if this value is not set or is 0 then the
heartbeat mechanism is not engaged in the system and token rotation is the method
of failure detection
The default is 0 (disabled).
.TP
max_network_delay
[HeartBeating mechanism]
This constant specifies in milliseconds the approximate delay that your network takes
to transport one packet from one machine to another. This value is to be set by system
engineers and please don't change if not sure as this effects the failure detection
mechanism using heartbeat.
The default is 50 milliseconds.
.TP
window_size
This constant specifies the maximum number of messages that may be sent on one
token rotation. If all processors perform equally well, this value could be
large (300), which would introduce higher latency from origination to delivery
for very large rings. To reduce latency in large rings(16+), the defaults are
a safe compromise. If 1 or more slow processor(s) are present among fast
processors, window_size should be no larger then 256000 / netmtu to avoid
overflow of the kernel receive buffers. The user is notified of this by
the display of a retransmit list in the notification logs. There is no loss
of data, but performance is reduced when these errors occur.
The default is 50 messages.
.TP
max_messages
This constant specifies the maximum number of messages that may be sent by one
processor on receipt of the token. The max_messages parameter is limited to
256000 / netmtu to prevent overflow of the kernel transmit buffers.
The default is 17 messages.
.TP
miss_count_const
This constant defines the maximum number of times on receipt of a token
a message is checked for retransmission before a retransmission occurs. This
parameter is useful to modify for switches that delay multicast packets
compared to unicast packets. The default setting works well for nearly all
modern switches.
The default is 5 messages.
-.TP
-rrp_problem_count_timeout
-This specifies the time in milliseconds to wait before decrementing the
-problem count by 1 for a particular ring to ensure a link is not marked
-faulty for transient network failures.
-
-The default is 2000 milliseconds.
-
-.TP
-rrp_problem_count_threshold
-This specifies the number of times a problem is detected with a link before
-setting the link faulty. Once a link is set faulty, no more data is
-transmitted upon it. Also, the problem counter is no longer decremented when
-the problem count timeout expires.
-
-A problem is detected whenever all tokens from the proceeding processor have
-not been received within the rrp_token_expired_timeout. The
-rrp_problem_count_threshold * rrp_token_expired_timeout should be atleast 50
-milliseconds less then the token timeout, or a complete reconfiguration
-may occur.
-
-The default is 10 problem counts.
-
-.TP
-rrp_problem_count_mcast_threshold
-This specifies the number of times a problem is detected with multicast before
-setting the link faulty for passive rrp mode. This variable is unused in active
-rrp mode.
-
-The default is 10 times rrp_problem_count_threshold.
-
-.TP
-rrp_token_expired_timeout
-This specifies the time in milliseconds to increment the problem counter for
-the redundant ring protocol after not having received a token from all rings
-for a particular processor.
-
-This value will automatically be calculated from the token timeout and
-problem_count_threshold but may be overridden. It is not recommended to
-override this value without guidance from the corosync community.
-
-The default is 47 milliseconds.
-
-.TP
-rrp_autorecovery_check_timeout
-This specifies the time in milliseconds to check if the failed ring can be
-auto-recovered.
-
-The default is 1000 milliseconds.
-
.PP
Within the
.B logging
directive, there are several configuration options which are all optional.
.PP
The following 3 options are valid only for the top level logging directive:
.TP
timestamp
This specifies that a timestamp is placed on all log messages.
The default is off.
.TP
fileline
This specifies that file and line should be printed.
The default is off.
.TP
function_name
This specifies that the code function name should be printed.
The default is off.
.PP
The following options are valid both for top level logging directive
and they can be overridden in logger_subsys entries.
.TP
to_stderr
.TP
to_logfile
.TP
to_syslog
These specify the destination of logging output. Any combination of
these options may be specified. Valid options are
.B yes
and
.B no.
The default is syslog and stderr.
Please note, if you are using to_logfile and want to rotate the file, use logrotate(8)
with the option
.B
copytruncate.
eg.
.ne 18
.RS
.nf
.ft CW
/var/log/corosync.log {
missingok
compress
notifempty
daily
rotate 7
copytruncate
}
.ft
.fi
.RE
.TP
logfile
If the
.B to_logfile
directive is set to
.B yes
, this option specifies the pathname of the log file.
No default.
.TP
logfile_priority
This specifies the logfile priority for this particular subsystem. Ignored if debug is on.
Possible values are: alert, crit, debug (same as debug = on), emerg, err, info, notice, warning.
The default is: info.
.TP
syslog_facility
This specifies the syslog facility type that will be used for any messages
sent to syslog. options are daemon, local0, local1, local2, local3, local4,
local5, local6 & local7.
The default is daemon.
.TP
syslog_priority
This specifies the syslog level for this particular subsystem. Ignored if debug is on.
Possible values are: alert, crit, debug (same as debug = on), emerg, err, info, notice, warning.
The default is: info.
.TP
debug
This specifies whether debug output is logged for this particular logger. Also can contain
value trace, what is highest level of debug information.
The default is off.
.PP
Within the
.B logging
directive, logger_subsys directives are optional.
.PP
Within the
.B logger_subsys
sub-directive, all of the above logging configuration options are valid and
can be used to override the default settings.
The subsys entry, described below, is mandatory to identify the subsystem.
.TP
subsys
This specifies the subsystem identity (name) for which logging is specified. This is the
name used by a service in the log_init () call. E.g. 'CPG'. This directive is
required.
.PP
Within the
.B quorum
directive it is possible to specify the quorum algorithm to use with the
.TP
provider
directive. At the time of writing only corosync_votequorum is supported.
See votequorum(5) for configuration options.
.PP
Within the
.B nodelist
directive it is possible to specify specific information about nodes in cluster. Directive
can contain only
.B node
sub-directive, which specifies every node that should be a member of the membership, and where
non-default options are needed. Every node must have at least ring0_addr field filled.
-For UDPU, every node that should be a member of the membership must be specified.
+Every node that should be a member of the membership must be specified.
Possible options are:
.TP
ringX_addr
-This specifies ip address of one of the nodes. X is ring number.
+This specifies ip address of one of the nodes. X is link number.
.TP
nodeid
-This configuration option is optional when using IPv4 and required when using
-IPv6. This is a 32 bit value specifying the node identifier delivered to the
-cluster membership service. If this is not specified with IPv4, the node id
-will be determined from the 32 bit IP address the system to which the system
-is bound with ring identifier of 0. The node identifier value of zero is
+This configuration option is required for each node. It is a 32 bit value
+specifying the node identifier delivered to the
+cluster membership service. The node identifier value of zero is
reserved and should not be used.
.PP
Within the
.B qb
directive it is possible to specify options for libqb.
Possible option is:
.TP
ipc_type
This specifies type of IPC to use. Can be one of native (default), shm and socket.
Native means one of shm or socket, depending on what is supported by OS. On systems
with support for both, SHM is selected. SHM is generally faster, but need to allocate
ring buffer file in /dev/shm.
.SH "FILES"
.TP
/etc/corosync/corosync.conf
The corosync executive configuration file.
.SH "SEE ALSO"
.BR corosync_overview (8),
.BR votequorum (5),
.BR corosync-qdevice (8),
.BR logrotate (8)
.PP

File Metadata

Mime Type
text/x-diff
Expires
Tue, Feb 25, 4:39 AM (1 d, 10 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1464676
Default Alt Text
(668 KB)

Event Timeline