diff --git a/LICENSE b/LICENSE index 250db9ad..8d32cd17 100644 --- a/LICENSE +++ b/LICENSE @@ -1,85 +1,59 @@ -*** -All cryptographic software in this package is subject to the following legal -notice: -This package includes publicly available encryption source code which, -together with object code resulting from the compiling of publicly -available source code, may be exported from the United States under License -Exception TSU prsuant to 15 C.F.R Section 740.13(e). -*** ----------------------------------------------------------------------------- The following license applies to every file in this source distribution except -for the files exec/crypto.c, exec/crypto.h, git-version-gen, and -gitlog-to-changelog. The exec/crypto[c|h] file license is described in its -full content later in this file and are part of the executive runtime and -components of binary images produced by the source code. +for the files git-version-gen, and gitlog-to-changelog. The git* files, which are available under GPLv3 or later, are only used by our release process to generate text file content and are not part of any generated binary. ----------------------------------------------------------------------------- Copyright (c) 2002-2004 MontaVista Software, Inc. Copyright (c) 2005-2010 Red Hat, Inc. All rights reserved. This software licensed under BSD license, the text of which follows: Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name of the MontaVista Software, Inc. nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ------------------------------------------------------------------------------ -The corosync project uses software from the LibTomCrypt project -www.libtomcrypt.org. This software is contained the files exec/crypto.c and -exec/crypto.h. The following license applies to the files exec/crypto.c and -exec/crytpo.h: ------------------------------------------------------------------------------ -LibTomCrypt is public domain. As should all quality software be. - -All of the software was either written by or donated to Tom St Denis for the -purposes of this project. The only exception is the SAFER.C source which has -no known license status (assumed copyrighted) which is why SAFER,C is shipped -as disabled. - -Tom St Denis - ----------------------------------------------------------------------------- The corosync project uses software for release processing which generates changelogs and version information for the software. These programs are not used by the generated binaries or libraries These files are git-version-gen and gitlog-to-changelog. ----------------------------------------------------------------------------- The license for these files is as follows: This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . diff --git a/SECURITY b/SECURITY index 09fb894f..a2a4fdf1 100644 --- a/SECURITY +++ b/SECURITY @@ -1,172 +1,106 @@ -*** -All cryptographic software in this package is subject to the following legal -notice: -This package includes publicly available encryption source code which, -together with object code resulting from the compiling of publicly -available source code, may be exported from the United States under License -Exception TSU prsuant to 15 C.F.R Section 740.13(e). -*** Security Design of corosync The corosync project intends to mitigate the following threats: 1. forged group messaging messages which are intended to fault the corosync executive 2. forged group messaging messages which are intended to fault applications using corosync apis 3. monitoring of network data to capture sensitive information The corosync project does not intend to mitigate the following threats: 1. physical access to the hardware which could expose the private key 2. privledged access to the operating system which could expose the private key or be used to inject errors into the corosync executive. 3. library user creates requests which are intended to fault the corosync executive The corosync project mitigates the threats using two mechanisms: 1. Authentication 2. Secrecy Library Interface ----------------- The corosync executive authenticates every library user. The library is only allowed to access services if it's GID is corosync or 0. Unauthorized library users are rejected. The corosync group is a trusted group. If the administrator doesn't trust the application, it should not be added to the group! Any member of the corosync group could potentially send a malformed request to the executive and cause it to fault. Group Messaging Interface ------------------------- Group messaging uses UDP/IP to communicate with other corosync executives using messages. It is possible without authentication of every packet that an attacker could forge messages. These forged messages could fault the corosync executive distributed state machines. It would also be possible to corrupt end applications by forging changes. Since messages are sent using UDP/IP it would be possible to snoop those messages and rebuild sensitive data. To solve these problems, the group messaging interface uses two new interfaces interal to it's implementation: 1. encrypt_and_sign - encrypts and signs a message securely 2. authenticate_and_decrypt - authenticates and decrypts a message securely When the executive wants to send a message over the network, it uses encrypt_and_sign to prepare the message to be sent. When the executive wants to receive a message from the network, it uses authenticate_and_decrypt to verify the message is valid and decrypt it. -There are currently two encryption methods available in corosync. -sha1/hmac/sober which are coded internally, and AES/SHA256 which -are in the Mozilla Network Security Services (libnss) library. +Corosync uses AES256/SHA-1 which from the Mozilla Network Security +Services (libnss) library. The internal functions utilize the following algorithms: sha1 - hash algorithm secure for using with hmac hmac - produces a 16 byte digest from any length input -sober - pseudo random number generator and stream cipher Every message starts with a struct security { unsigned char digest[20]; A one way hash digest unsigned char salt[16]; A securely generated random number } -INTERNAL SECURITY CODE: ------------------------ -The hmac algorithm requires a 16 byte key. -The sober algorithm requires a 16 byte private key. -The sober algorithm requires a 16 byte public initial vector. - -The private key is read from disk and stored in memory for use with the -sober algorithm to generate the three required keys. - -When a message is sent (encrypt_and_sign): ------------------------------------------- -1. sober is used to create a 16 byte random number (salt) using the md4 - algorithm -2. sober is keyed with the private key and the initial vector is set to the - salt. Then a 48 byte key is read from the sober algorithm. This 48 byte - key is split into 3 16 byte keys. The keys are the hmac key, the sober key - and the sober initial vector. -3. A sober instance is keyed with the sober key and sober initial vector - from step #2. -4. The data of the packet, except for the security header, is encrypted using - the sober cipher that was initialized in step #3. -5. The salt is stored in the security header of the outgoing message. -6. The hmac is initialized with the hmac key generated in step #2. -7. The message, except for the security header, is hmaced to produce a digest - using the sha1 algorithm. -8. The digest is stored in the outgoing message. -9. The message is transmitted. - - -When a message is received (decrypt_and_authenticate): ------------------------------------------------------- -1. sober is keyed with the private key and the initial vector is set to the - salt in the received message. Then a 48 byte key is read from the sober - algorithm. This 48 byte key is split into 3 16 byte keys. The keys are the - hmac key, the sober key and the sober initial vector. -2. The sober key and sober initial vector from step #1 are used to key a - new sober instance. -3. The hmac is setup using the hmac key generated in step #1 using sha1. -5. The message is authenticated, except for the security header. -6. If the message was not authenticated, the caller is told of the result. - The caller ignores the message. -7. The message is decrypted, except for the security header, using the sober - algorithm in step #2. -8. The message is processed. - -This does consume some resources. It ensures the private key is never shared -openly, that messages are authenticated, that messages are encrypted, and that -any key exposure of the sober encryption key, sober initial vector, or hmac -key can only be used to attack one of the algorithms. Finally every key used -is randomly unique (within the 2^128 search space of the input to sober) to -ensure that keys are never reused, nonce's are never reused, and hmac's are -never reused. - - USING LIBNSS ------------ - -The process is similar in concept to the above, but most of the details are -hidden inside the NSS library. When corosync is started up libnss is initialised, +When corosync is started up libnss is initialised, the private key is read into memory and stored for later use by the code. When a message is sent (encrypt_and_sign): ------------------------------------------ - The message is encrypted using AES. - A digest of that message is then created using SHA256 and based on the private key. - the message is then transmitted. When a message is received (decrypt_and_authenticate): - A Digest of the encrypted message is created using the private key - That digest is compared to the one in the message security_header - If they do not match the packet is rejected - If they do match then the message is decrypted using the private key. - The message is processed. Compatibility ------------- The encryption type can be changed at runtime with a single command: -# corosync-cfgtool -c1 +# corosync-cfgtool -c0 -(0 for sober, 1 for nss) +(0 for nss) This will tell all cluster nodes to start using libnss encryption. Note that it is possible to upgrade node individially by setting the encryption type in corosync.conf. The last byte of the packet indicates the decryption algorithm that the receiver should use. Once all nodes are using libnss encryption, the option should be set in in corosync.conf so that it takes effect at the next system reboot. Comments welcome mailto:discuss@corosync.org diff --git a/conf/lenses/corosync.aug b/conf/lenses/corosync.aug index 4a8317b8..30504784 100644 --- a/conf/lenses/corosync.aug +++ b/conf/lenses/corosync.aug @@ -1,165 +1,165 @@ (* Process /etc/corosync/corosync.conf *) (* The lens is based on the corosync.conf(5) man page *) module Corosync = autoload xfm let comment = Util.comment let empty = Util.empty let dels = Util.del_str let eol = Util.eol let ws = del /[ \t]+/ " " let wsc = del /:[ \t]+/ ": " let indent = del /[ \t]*/ "" (* We require that braces are always followed by a newline *) let obr = del /\{([ \t]*)\n/ "{\n" let cbr = del /[ \t]*}[ \t]*\n/ "}\n" let ikey (k:regexp) = indent . key k let section (n:regexp) (b:lens) = [ ikey n . ws . obr . (b|empty|comment)* . cbr ] let kv (k:regexp) (v:regexp) = [ ikey k . wsc . store v . eol ] (* FIXME: it would be much more concise to write *) (* [ key k . ws . (bare | quoted) ] *) (* but the typechecker trips over that *) let qstr (k:regexp) = let delq = del /['"]/ "\"" in let bare = del /["']?/ "" . store /[^"' \t\n]+/ . del /["']?/ "" in let quoted = delq . store /.*[ \t].*/ . delq in [ ikey k . wsc . bare . eol ] |[ ikey k . wsc . quoted . eol ] (* A integer subsection *) let interface = let setting = kv "ringnumber" Rx.integer |kv "mcastport" Rx.integer |kv "ttl" Rx.integer |qstr /bindnetaddr|mcastaddr/ in section "interface" setting (* The totem section *) let totem = let setting = kv "clear_node_high_bit" /yes|no/ |kv "rrp_mode" /none|active|passive/ |kv "vsftype" /none|ykd/ |kv "secauth" /on|off/ - |kv "crypto_type" /nss|sober/ + |kv "crypto_type" /nss|aes256/ |kv "transport" /udp|iba/ |kv "version" Rx.integer |kv "nodeid" Rx.integer |kv "threads" Rx.integer |kv "netmtu" Rx.integer |kv "token" Rx.integer |kv "token_retransmit" Rx.integer |kv "hold" Rx.integer |kv "token_retransmits_before_loss_const" Rx.integer |kv "join" Rx.integer |kv "send_join" Rx.integer |kv "consensus" Rx.integer |kv "merge" Rx.integer |kv "downcheck" Rx.integer |kv "fail_to_recv_const" Rx.integer |kv "seqno_unchanged_const" Rx.integer |kv "heartbeat_failures_allowed" Rx.integer |kv "max_network_delay" Rx.integer |kv "max_messages" Rx.integer |kv "window_size" Rx.integer |kv "rrp_problem_count_timeout" Rx.integer |kv "rrp_problem_count_threshold" Rx.integer |kv "rrp_token_expired_timeout" Rx.integer |interface in section "totem" setting let common_logging = kv "to_syslog" /yes|no|on|off/ |kv "to_stderr" /yes|no|on|off/ |kv "to_logfile" /yes|no|on|off/ |kv "debug" /yes|no|on|off/ |kv "logfile_priority" /alert|crit|debug|emerg|err|info|notice|warning/ |kv "syslog_priority" /alert|crit|debug|emerg|err|info|notice|warning/ |kv "syslog_facility" /daemon|local0|local1|local2|local3|local4|local5|local6|local7/ |qstr /logfile|tags/ (* A logger_subsys subsection *) let logger_subsys = let setting = qstr /subsys/ |common_logging in section "logger_subsys" setting (* The logging section *) let logging = let setting = kv "fileline" /yes|no|on|off/ |kv "function_name" /yes|no|on|off/ |kv "timestamp" /yes|no|on|off/ |common_logging |logger_subsys in section "logging" setting (* The resource section *) let common_resource = kv "max" Rx.decimal |kv "poll_period" Rx.integer |kv "recovery" /reboot|shutdown|watchdog|none/ let memory_used = let setting = common_resource in section "memory_used" setting let load_15min = let setting = common_resource in section "load_15min" setting let system = let setting = load_15min |memory_used in section "system" setting (* The resources section *) let resources = let setting = system in section "resources" setting (* The quorum section *) let quorum = let setting = qstr /provider/ |kv "expected_votes" Rx.integer |kv "votes" Rx.integer |kv "wait_for_all" Rx.integer |kv "last_man_standing" Rx.integer |kv "last_man_standing_window" Rx.integer |kv "auto_tie_breaker" Rx.integer |kv "two_node" Rx.integer in section "quorum" setting (* The service section *) let service = let setting = qstr /name|ver/ in section "service" setting (* The uidgid section *) let uidgid = let setting = qstr /uid|gid/ in section "uidgid" setting let lns = (comment|empty|totem|quorum|logging|resources|service|uidgid)* let xfm = transform lns (incl "/etc/corosync/corosync.conf") diff --git a/configure.ac b/configure.ac index fbaabacf..c6b04e7f 100644 --- a/configure.ac +++ b/configure.ac @@ -1,661 +1,651 @@ # -*- Autoconf -*- # Process this file with autoconf to produce a configure script. # bootstrap / init AC_PREREQ([2.61]) AC_INIT([corosync], m4_esyscmd([build-aux/git-version-gen .tarball-version]), [discuss@lists.corosync.org]) AM_INIT_AUTOMAKE([-Wno-portability]) AM_SILENT_RULES([yes]) AC_CONFIG_SRCDIR([lib/cpg.c]) AC_CONFIG_HEADER([include/corosync/config.h]) AC_CANONICAL_HOST AC_LANG([C]) dnl Fix default variables - "prefix" variable if not specified if test "$prefix" = "NONE"; then prefix="/usr" dnl Fix "localstatedir" variable if not specified if test "$localstatedir" = "\${prefix}/var"; then localstatedir="/var" fi dnl Fix "sysconfdir" variable if not specified if test "$sysconfdir" = "\${prefix}/etc"; then sysconfdir="/etc" fi dnl Fix "libdir" variable if not specified if test "$libdir" = "\${exec_prefix}/lib"; then if test -e /usr/lib64; then libdir="/usr/lib64" else libdir="/usr/lib" fi fi fi if test "$srcdir" = "."; then AC_MSG_NOTICE([building in place srcdir:$srcdir]) AC_DEFINE([BUILDING_IN_PLACE], 1, [building in place]) else AC_MSG_NOTICE([building out of tree srcdir:$srcdir]) fi # Checks for programs. # check stolen from gnulib/m4/gnu-make.m4 if ! ${MAKE-make} --version /cannot/make/this >/dev/null 2>&1; then AC_MSG_ERROR([you don't seem to have GNU make; it is required]) fi sinclude(coroysync-default.m4) AC_PROG_CC AC_PROG_INSTALL AC_PROG_LN_S AC_PROG_MAKE_SET AC_PROG_RANLIB AC_CHECK_PROGS([GROFF], [groff]) AC_CHECK_PROGS([PKGCONFIG], [pkg-config]) AC_CHECK_PROGS([AUGTOOL], [augtool]) AC_CHECK_PROGS([DOT], [dot]) AC_CHECK_PROGS([DOXYGEN], [doxygen]) AC_CHECK_PROGS([AWK], [awk]) # Checks for libraries. AC_CHECK_LIB([dl], [dlopen]) AC_CHECK_LIB([pthread], [pthread_create]) AC_CHECK_LIB([socket], [socket]) AC_CHECK_LIB([nsl], [t_open]) AC_CHECK_LIB([rt], [sched_getscheduler]) PKG_CHECK_MODULES([LIBQB], [libqb]) AC_CHECK_LIB([qb], [qb_log_thread_priority_set], \ have_qb_log_thread_priority_set="yes", \ have_qb_log_thread_priority_set="no") if test "x${have_qb_log_thread_priority_set}" = xyes; then AC_DEFINE_UNQUOTED([HAVE_QB_LOG_THREAD_PRIORITY_SET], 1, [have qb_log_thread_priority_set]) fi +PKG_CHECK_MODULES([nss],[nss]) # Checks for header files. AC_FUNC_ALLOCA AC_HEADER_DIRENT AC_HEADER_STDC AC_HEADER_SYS_WAIT AC_CHECK_HEADERS([arpa/inet.h fcntl.h limits.h netdb.h netinet/in.h stdint.h \ stdlib.h string.h sys/ioctl.h sys/param.h sys/socket.h \ sys/time.h syslog.h unistd.h sys/types.h getopt.h malloc.h \ sys/sockio.h utmpx.h ifaddrs.h]) # Checks for typedefs, structures, and compiler characteristics. AC_C_CONST AC_TYPE_UID_T AC_C_INLINE AC_TYPE_INT16_T AC_TYPE_INT32_T AC_TYPE_INT64_T AC_TYPE_INT8_T AC_TYPE_SIZE_T AC_TYPE_SSIZE_T AC_HEADER_TIME AC_TYPE_UINT16_T AC_TYPE_UINT32_T AC_TYPE_UINT64_T AC_TYPE_UINT8_T AC_C_VOLATILE # Checks for library functions. AC_FUNC_CLOSEDIR_VOID AC_FUNC_ERROR_AT_LINE AC_REPLACE_FNMATCH AC_FUNC_FORK AC_PROG_GCC_TRADITIONAL AC_FUNC_MALLOC AC_FUNC_MEMCMP AC_FUNC_REALLOC AC_FUNC_SELECT_ARGTYPES AC_TYPE_SIGNAL AC_FUNC_VPRINTF AC_CHECK_FUNCS([alarm alphasort atexit bzero dup2 endgrent endpwent fcntl \ getcwd getpeerucred getpeereid gettimeofday inet_ntoa memmove \ memset mkdir scandir select socket strcasecmp strchr strdup \ strerror strrchr strspn strstr pthread_setschedparam \ sched_get_priority_max sched_setscheduler getifaddrs]) AC_CONFIG_FILES([Makefile exec/Makefile include/Makefile init/Makefile lib/Makefile common_lib/Makefile man/Makefile pkgconfig/Makefile test/Makefile cts/Makefile cts/agents/Makefile cts/CTSvars.py tools/Makefile conf/Makefile Doxyfile]) ### Local business dnl =============================================== dnl Functions / global M4 variables dnl =============================================== dnl Global list of LIB names m4_define([local_soname_list], [])dnl dnl Upcase parameter m4_define([local_upcase], [translit([$*], [a-z], [A-Z])])dnl dnl M4 macro for include lib/lib$1.soname and subst that m4_define([LIB_SONAME_IMPORT],[dnl m4_define([local_libname], local_upcase($1)[_SONAME])dnl m4_define([local_soname], translit(m4_sinclude(lib/lib$1.verso), [ ], []))dnl local_libname="local_soname"dnl m4_define([local_soname_list], m4_defn([local_soname_list])[,]local_libname[,]local_upcase($1))dnl AC_SUBST(local_libname)dnl ])dnl dnl M4 macro for print padspaces (used in LIB_MSG_RESULT). It takes 2 arguments, length of string to pad and desired dnl (padded) length m4_define([m4_printpadspace],[ifelse(m4_eval([$2 - $1 < 1]),[1],,[ ][m4_printpadspace([$1],m4_eval([$2 - 1]))])])dnl dnl Show AC_MSG_RESULT for specific libraries m4_define([LIB_MSG_RESULT], [ifelse([$#], [1], ,[dnl AC_MSG_RESULT([ $2 Library SONAME m4_printpadspace(len($2),8) = ${$1}]) LIB_MSG_RESULT(m4_shift(m4_shift($@)))dnl ])])dnl # =============================================== # Helpers # =============================================== ## helper for CC stuff cc_supports_flag() { local CPPFLAGS="$CPPFLAGS $@" AC_MSG_CHECKING([whether $CC supports "$@"]) AC_PREPROC_IFELSE([AC_LANG_PROGRAM([])], [RC=0; AC_MSG_RESULT([yes])], [RC=1; AC_MSG_RESULT([no])]) return $RC } ## cleanup AC_MSG_NOTICE(Sanitizing prefix: ${prefix}) case $prefix in NONE) prefix=/usr/local;; esac AC_MSG_NOTICE(Sanitizing exec_prefix: ${exec_prefix}) case $exec_prefix in dnl For consistency with Corosync, map NONE->$prefix NONE) exec_prefix=$prefix;; prefix) exec_prefix=$prefix;; esac ## local defines PACKAGE_FEATURES="" LINT_FLAGS="-weak -unrecog +posixlib +ignoresigns -fcnuse \ -badflag -D__gnuc_va_list=va_list -D__attribute\(x\)=" # default libraries SONAME SOMAJOR="4" SOMINOR="0" SOMICRO="0" SONAME="${SOMAJOR}.${SOMINOR}.${SOMICRO}" # specific libraries SONAME LIB_SONAME_IMPORT([cfg]) LIB_SONAME_IMPORT([cpg]) LIB_SONAME_IMPORT([quorum]) LIB_SONAME_IMPORT([sam]) LIB_SONAME_IMPORT([votequorum]) LIB_SONAME_IMPORT([cmap]) # local options AC_ARG_ENABLE([ansi], [ --enable-ansi : force to build with ANSI standards. ], [ default="no" ]) AC_ARG_ENABLE([fatal-warnings], [ --enable-fatal-warnings : enable fatal warnings. ], [ default="no" ]) AC_ARG_ENABLE([debug], [ --enable-debug : enable debug build. ], [ default="no" ]) AC_ARG_ENABLE([user-flags], [ --enable-user-flags : rely on user environment. ], [ default="no" ]) AC_ARG_ENABLE([coverage], [ --enable-coverage : coverage analysis of the codebase. ], [ default="no" ]) AC_ARG_ENABLE([small-memory-footprint], [ --enable-small-memory-footprint : Use small message queues and small messages sizes. ], [ default="no" ]) -AC_ARG_ENABLE([nss], - [ --enable-nss : Network Security Services encryption. ],, - [ enable_nss="yes" ]) - AC_ARG_ENABLE([dbus], [ --enable-dbus : dbus events. ],, [ enable_dbus="no" ]) AC_ARG_ENABLE([testagents], [ --enable-testagents : Install Test Agents. ],, [ default="no" ]) AC_ARG_ENABLE([rdma], [ --enable-rdma : Infiniband RDMA transport support ],, [ enable_rdma="no" ]) AM_CONDITIONAL(BUILD_RDMA, test x$enable_rdma = xyes) AC_ARG_ENABLE([monitoring], [ --enable-monitoring : resource monitoring ],, [ default="no" ]) AM_CONDITIONAL(BUILD_MONITORING, test x$enable_monitoring = xyes) AC_ARG_ENABLE([watchdog], [ --enable-watchdog : Watchdog support ],, [ default="no" ]) AM_CONDITIONAL(BUILD_WATCHDOG, test x$enable_watchdog = xyes) AC_ARG_ENABLE([augeas], [ --enable-augeas : Install the augeas lens for corosync.conf ],, [ enable_augeas="no" ]) AM_CONDITIONAL(INSTALL_AUGEAS, test x$enable_augeas = xyes) AC_ARG_ENABLE([systemd], [ --enable-systemd : Install systemd service files],, [ enable_systemd="no" ]) AM_CONDITIONAL(INSTALL_SYSTEMD, test x$enable_systemd = xyes) AC_ARG_WITH([initddir], [ --with-initddir=DIR : path to init script directory. ], [ INITDDIR="$withval" ], [ INITDDIR="$sysconfdir/init.d" ]) AC_ARG_ENABLE([snmp], [ --enable-snmp : SNMP protocol support ], [ default="no" ]) AC_ARG_ENABLE([xmlconf], [ --enable-xmlconf : XML configuration support ],, [ enable_xmlconf="no" ]) AM_CONDITIONAL(INSTALL_XMLCONF, test x$enable_xmlconf = xyes) # OS detection # THIS SECTION MUST DIE! CP=cp OS_LDL="-ldl" have_linux="no" case "$host_os" in *linux*) AC_DEFINE_UNQUOTED([COROSYNC_LINUX], [1], [Compiling for Linux platform]) OS_CFLAGS="" OS_CPPFLAGS="-D_GNU_SOURCE" OS_LDFLAGS="" OS_DYFLAGS="-rdynamic" DARWIN_OPTS="" have_linux="yes" ;; darwin*) AC_DEFINE_UNQUOTED([COROSYNC_DARWIN], [1], [Compiling for Darwin platform]) CP=rsync OS_CFLAGS="" OS_CPPFLAGS="" OS_LDFLAGS="" OS_DYFLAGS="" DARWIN_OPTS="-dynamiclib -bind_at_load \ -current_version ${SONAME} \ -compatibility_version ${SONAME} -install_name \$(libdir)/\$(@)" AC_DEFINE_UNQUOTED([MAP_ANONYMOUS], [MAP_ANON], [Shared memory define for Darwin platform]) AC_DEFINE_UNQUOTED([PATH_MAX], [4096], [Number of chars in a path name including nul]) AC_DEFINE_UNQUOTED([NAME_MAX], [255], [Number of chars in a file name]) ;; *bsd*) AC_DEFINE_UNQUOTED([COROSYNC_BSD], [1], [Compiling for BSD platform]) AC_DEFINE_UNQUOTED([MAP_ANONYMOUS], [MAP_ANON], [Shared memory define for Darwin platform]) OS_CFLAGS="" OS_CPPFLAGS="-I/usr/local/include" OS_LDFLAGS="-L/usr/local/lib" OS_DYFLAGS="-export-dynamic" DARWIN_OPTS="" OS_LDL="" case "$host_os" in *freebsd[[234567]]*) ;; *freebsd*) AC_DEFINE_UNQUOTED([COROSYNC_FREEBSD_GE_8], [1], [Compiling for FreeBSD >= 8 platform]) ;; esac ;; *solaris*) AC_DEFINE_UNQUOTED([COROSYNC_SOLARIS], [1], [Compiling for Solaris platform]) AC_DEFINE_UNQUOTED([TS_CLASS], [1], [Prevent being scheduled RR]) AC_DEFINE_UNQUOTED([_SEM_SEMUN_UNDEFINED], [1], [The semun structure is undefined]) CP=rsync OS_CFLAGS="" OS_CPPFLAGS="-D_REENTRANT" OS_LDFLAGS="" OS_DYFLAGS="-Wl,-z,lazyload" DARWIN_OPTS="" SOLARIS_OPTS=" " ;; *) AC_MSG_ERROR([Unsupported OS? hmmmm]) ;; esac AC_SUBST(CP) # *FLAGS handling goes here ENV_CFLAGS="$CFLAGS" ENV_CPPFLAGS="$CPPFLAGS" ENV_LDFLAGS="$LDFLAGS" # debug build stuff if test "x${enable_debug}" = xyes; then AC_DEFINE_UNQUOTED([DEBUG], [1], [Compiling Debugging code]) OPT_CFLAGS="-O0" PACKAGE_FEATURES="$PACKAGE_FEATURES debug" else OPT_CFLAGS="-O3" fi # gdb flags if test "x${GCC}" = xyes; then GDB_FLAGS="-ggdb3" else GDB_FLAGS="-g" fi -# Look for libnss -if test "x${enable_nss}" = xyes; then - PKG_CHECK_MODULES([nss],[nss]) - AC_DEFINE_UNQUOTED([HAVE_LIBNSS], 1, [have libnss]) - PACKAGE_FEATURES="$PACKAGE_FEATURES nss" -fi - # Look for dbus-1 if test "x${enable_dbus}" = xyes; then PKG_CHECK_MODULES([DBUS],[dbus-1]) AC_DEFINE_UNQUOTED([HAVE_DBUS], 1, [have dbus]) PACKAGE_FEATURES="$PACKAGE_FEATURES dbus" fi if test "x${enable_testagents}" = xyes; then AC_DEFINE_UNQUOTED([HAVE_TESTAGENTS], 1, [have testagents]) PACKAGE_FEATURES="$PACKAGE_FEATURES testagents" fi if test "x${enable_rdma}" = xyes; then PKG_CHECK_MODULES([rdmacm],[rdmacm]) PKG_CHECK_MODULES([ibverbs],[ibverbs]) AC_DEFINE_UNQUOTED([HAVE_RDMA], 1, [have rdmacm]) PACKAGE_FEATURES="$PACKAGE_FEATURES rdma" fi if test "x${enable_monitoring}" = xyes; then AC_CHECK_LIB([statgrab], [sg_get_mem_stats], have_libstatgrab="yes", have_libstatgrab="no") if test "x${have_libstatgrab}" = xyes; then AC_DEFINE_UNQUOTED([HAVE_LIBSTATGRAB], 1, [have libstatgrab]) statgrab_LIBS="-lstatgrab" else if test "x${have_linux}" = xno; then AC_MSG_ERROR(monitoring requires libstatgrab on non-linux systems) fi fi AC_SUBST([statgrab_LIBS]) AC_DEFINE_UNQUOTED([HAVE_MONITORING], 1, [have resource monitoring]) PACKAGE_FEATURES="$PACKAGE_FEATURES monitoring" fi if test "x${enable_watchdog}" = xyes; then AC_CHECK_HEADER(linux/watchdog.h,,AC_MSG_ERROR(watchdog requires linux/watchdog.h)) AC_CHECK_HEADER(linux/reboot.h,,AC_MSG_ERROR(watchdog requires linux/reboot.h)) AC_DEFINE_UNQUOTED([HAVE_WATCHDOG], 1, [have watchdog]) PACKAGE_FEATURES="$PACKAGE_FEATURES watchdog" fi if test "x${enable_augeas}" = xyes; then PACKAGE_FEATURES="$PACKAGE_FEATURES augeas" fi if test "x${enable_systemd}" = xyes; then PACKAGE_FEATURES="$PACKAGE_FEATURES systemd" fi if test "x${enable_xmlconf}" = xyes; then PACKAGE_FEATURES="$PACKAGE_FEATURES xmlconf" fi if test "x${enable_snmp}" = xyes; then SNMPCONFIG="" AC_CHECK_HEADERS(net-snmp/net-snmp-config.h) if test "x${ac_cv_header_net_snmp_net_snmp_config_h}" != "xyes"; then enable_snmp=no fi if test $enable_snmp != no; then AC_PATH_PROGS(SNMPCONFIG, net-snmp-config) if test "X${SNMPCONFIG}" = "X"; then AC_MSG_RESULT(You need the net_snmp development package to continue.) enable_snmp=no fi fi if test $enable_snmp != no; then AC_MSG_CHECKING(for special snmp libraries) SNMPLIBS=`$SNMPCONFIG --libs` AC_MSG_RESULT($SNMPLIBS) fi if test $enable_snmp != no; then savedLibs=$LIBS LIBS="$LIBS $SNMPLIBS" AC_CHECK_FUNCS(netsnmp_transport_open_client) if test $ac_cv_func_netsnmp_transport_open_client != yes; then AC_CHECK_FUNCS(netsnmp_tdomain_transport) if test $ac_cv_func_netsnmp_tdomain_transport != yes; then enable_snmp=no fi else AC_DEFINE_UNQUOTED([NETSNMPV54], $NETSNMP_NEW_SUPPORT, [have net-snmp5.4 over]) fi LIBS=$savedLibs fi AC_MSG_CHECKING(for snmp) AC_MSG_RESULT($enable_snmp) if test $enable_snmp = no; then enable_snmp=0 AC_MSG_ERROR(Unable to support SNMP) else enable_snmp=1 PACKAGE_FEATURES="$PACKAGE_FEATURES snmp" AC_DEFINE_UNQUOTED([ENABLE_SNMP], $enable_snmp, [Build in support for sending SNMP traps]) fi else enable_snmp=0 fi AC_SUBST([SNMPLIBS]) AM_CONDITIONAL(BUILD_SNMP, test "${enable_snmp}" = "1") # extra warnings EXTRA_WARNINGS="" WARNLIST=" all shadow missing-prototypes missing-declarations strict-prototypes declaration-after-statement pointer-arith write-strings cast-align bad-function-cast missing-format-attribute format=2 format-security format-nonliteral no-long-long unsigned-char gnu89-inline no-strict-aliasing " for j in $WARNLIST; do if cc_supports_flag -W$j; then EXTRA_WARNINGS="$EXTRA_WARNINGS -W$j"; fi done if test "x${enable_coverage}" = xyes && \ cc_supports_flag -ftest-coverage && \ cc_supports_flag -fprofile-arcs ; then AC_MSG_NOTICE([Enabling Coverage (enable -O0 by default)]) OPT_CFLAGS="-O0" COVERAGE_CFLAGS="-ftest-coverage -fprofile-arcs" COVERAGE_LDFLAGS="-ftest-coverage -fprofile-arcs" PACKAGE_FEATURES="$PACKAGE_FEATURES coverage" else COVERAGE_CFLAGS="" COVERAGE_LDFLAGS="" fi if test "x${enable_small_memory_footprint}" = xyes ; then AC_DEFINE_UNQUOTED([HAVE_SMALL_MEMORY_FOOTPRINT], 1, [have small_memory_footprint]) PACKAGE_FEATURES="$PACKAGE_FEATURES small-memory-footprint" fi if test "x${enable_ansi}" = xyes && \ cc_supports_flag -std=iso9899:199409 ; then AC_MSG_NOTICE([Enabling ANSI Compatibility]) ANSI_CPPFLAGS="-ansi -D_GNU_SOURCE -DANSI_ONLY" PACKAGE_FEATURES="$PACKAGE_FEATURES ansi" else ANSI_CPPFLAGS="" fi if test "x${enable_fatal_warnings}" = xyes && \ cc_supports_flag -Werror ; then AC_MSG_NOTICE([Enabling Fatal Warnings (-Werror)]) WERROR_CFLAGS="-Werror" PACKAGE_FEATURES="$PACKAGE_FEATURES fatal-warnings" else WERROR_CFLAGS="" fi # don't add addtional cflags if test "x${enable_user_flags}" = xyes; then OPT_CFLAGS="" GDB_FLAGS="" EXTRA_WARNINGS="" fi # final build of *FLAGS CFLAGS="$ENV_CFLAGS $OPT_CFLAGS $GDB_FLAGS $OS_CFLAGS \ $COVERAGE_CFLAGS $EXTRA_WARNINGS $WERROR_CFLAGS $NSS_CFLAGS" CPPFLAGS="$ENV_CPPFLAGS $ANSI_CPPFLAGS $OS_CPPFLAGS" LDFLAGS="$ENV_LDFLAGS $COVERAGE_LDFLAGS $OS_LDFLAGS" # substitute what we need: AC_SUBST([INITDDIR]) AC_SUBST([SOMAJOR]) AC_SUBST([SOMINOR]) AC_SUBST([SOMICRO]) AC_SUBST([SONAME]) AC_SUBST([OS_DYFLAGS]) AC_SUBST([OS_LDL]) AM_CONDITIONAL(INSTALL_TESTAGENTS, test -n "${enable_testagents}") AM_CONDITIONAL(INSTALL_MIB, test "${enable_snmp}" = "1") AM_CONDITIONAL(INSTALL_DBUSCONF, test "${enable_dbus}" = "1") AM_CONDITIONAL(AUGTOOL, test -n "${AUGTOOL}") AC_SUBST([NSS_LDFLAGS]) AM_CONDITIONAL(BUILD_DARWIN, test -n "${DARWIN_OPTS}") AM_CONDITIONAL(BUILD_SOLARIS, test -n "${SOLARIS_OPTS}") AC_SUBST([DARWIN_OPTS]) AC_SUBST([SOLARIS_OPTS]) AM_CONDITIONAL(BUILD_HTML_DOCS, test -n "${GROFF}") AC_SUBST([LINT_FLAGS]) AC_DEFINE_UNQUOTED([LOCALSTATEDIR], "$(eval echo ${localstatedir})", [localstate directory]) COROSYSCONFDIR=${sysconfdir}/corosync AC_SUBST([COROSYSCONFDIR]) AC_DEFINE_UNQUOTED([COROSYSCONFDIR], "$(eval echo ${COROSYSCONFDIR})", [corosync config directory]) AC_DEFINE_UNQUOTED([PACKAGE_FEATURES], "${PACKAGE_FEATURES}", [corosync built-in features]) AC_OUTPUT AC_MSG_RESULT([]) AC_MSG_RESULT([$PACKAGE configuration:]) AC_MSG_RESULT([ Version = ${VERSION}]) AC_MSG_RESULT([ Prefix = ${prefix}]) AC_MSG_RESULT([ Executables = ${sbindir}]) AC_MSG_RESULT([ Man pages = ${mandir}]) AC_MSG_RESULT([ Doc dir = ${docdir}]) AC_MSG_RESULT([ Libraries = ${libdir}]) AC_MSG_RESULT([ Header files = ${includedir}]) AC_MSG_RESULT([ Arch-independent files = ${datadir}]) AC_MSG_RESULT([ State information = ${localstatedir}]) AC_MSG_RESULT([ System configuration = ${sysconfdir}]) AC_MSG_RESULT([ System init.d directory = ${INITDDIR}]) AC_MSG_RESULT([ corosync config dir = ${COROSYSCONFDIR}]) AC_MSG_RESULT([ Features =${PACKAGE_FEATURES}]) AC_MSG_RESULT([]) AC_MSG_RESULT([$PACKAGE build info:]) AC_MSG_RESULT([ Library SONAME = ${SONAME}]) LIB_MSG_RESULT(m4_shift(local_soname_list))dnl AC_MSG_RESULT([ Default optimization = ${OPT_CFLAGS}]) AC_MSG_RESULT([ Default debug options = ${GDB_CFLAGS}]) AC_MSG_RESULT([ Extra compiler warnings = ${EXTRA_WARNING}]) AC_MSG_RESULT([ Env. defined CFLAG = ${ENV_CFLAGS}]) AC_MSG_RESULT([ Env. defined CPPFLAGS = ${ENV_CPPFLAGS}]) AC_MSG_RESULT([ Env. defined LDFLAGS = ${ENV_LDFLAGS}]) AC_MSG_RESULT([ OS defined CFLAGS = ${OS_CFLAGS}]) AC_MSG_RESULT([ OS defined CPPFLAGS = ${OS_CPPFLAGS}]) AC_MSG_RESULT([ OS defined LDFLAGS = ${OS_LDFLAGS}]) AC_MSG_RESULT([ OS defined LDL = ${OS_LDL}]) AC_MSG_RESULT([ OS defined DYFLAGS = ${OS_DYFLAGS}]) AC_MSG_RESULT([ ANSI defined CPPFLAGS = ${ANSI_CPPFLAGS}]) AC_MSG_RESULT([ Coverage CFLAGS = ${COVERAGE_CFLAGS}]) AC_MSG_RESULT([ Coverage LDFLAGS = ${COVERAGE_LDFLAGS}]) AC_MSG_RESULT([ Fatal War. CFLAGS = ${WERROR_CFLAGS}]) AC_MSG_RESULT([ Final CFLAGS = ${CFLAGS}]) AC_MSG_RESULT([ Final CPPFLAGS = ${CPPFLAGS}]) AC_MSG_RESULT([ Final LDFLAGS = ${LDFLAGS}]) diff --git a/corosync.spec.in b/corosync.spec.in index f73e39ee..7539c32e 100644 --- a/corosync.spec.in +++ b/corosync.spec.in @@ -1,299 +1,291 @@ %global alphatag @alphatag@ %global numcomm @numcomm@ %global dirty @dirty@ # Conditionals # Invoke "rpmbuild --without " or "rpmbuild --with " # to disable or enable specific features %bcond_with testagents %bcond_with watchdog %bcond_with monitoring %bcond_with snmp %bcond_with dbus %bcond_with rdma %bcond_with systemd -%bcond_with nss %bcond_with xmlconf Name: corosync Summary: The Corosync Cluster Engine and Application Programming Interfaces Version: @version@ Release: 1%{?numcomm:.%{numcomm}}%{?alphatag:.%{alphatag}}%{?dirty:.%{dirty}}%{?dist} License: BSD Group: System Environment/Base URL: http://ftp.corosync.org Source0: ftp://ftp:user@ftp.corosync.org/downloads/%{name}-%{version}/%{name}-%{version}%{?numcomm:.%{numcomm}}%{?alphatag:-%{alphatag}}%{?dirty:-%{dirty}}.tar.gz # Runtime bits Requires: corosynclib = %{version}-%{release} Requires(pre): /usr/sbin/useradd Requires(post): /sbin/chkconfig Requires(preun): /sbin/chkconfig Conflicts: openais <= 0.89, openais-devel <= 0.89 # Build bits %define buildtrunk 0 %{?_with_buildtrunk: %define buildtrunk 1} BuildRequires: libqb-devel +BuildRequires: nss-devel %if %{buildtrunk} BuildRequires: autoconf automake %endif -%if %{with nss} -BuildRequires: nss-devel -%endif %if %{with rdma} BuildRequires: libibverbs-devel librdmacm-devel %endif %if %{with snmp} BuildRequires: net-snmp-devel %endif %if %{with dbus} BuildRequires: dbus-devel %endif %if %{with systemd} BuildRequires: systemd-units %endif %if %{with xmlconf} Requires: libxslt %endif BuildRoot: %(mktemp -ud %{_tmppath}/%{name}-%{version}-%{release}-XXXXXX) %prep %setup -q -n %{name}-%{version}%{?numcomm:.%{numcomm}}%{?alphatag:-%{alphatag}}%{?dirty:-%{dirty}} %build %if %{buildtrunk} ./autogen.sh %endif %if %{with rdma} export ibverbs_CFLAGS=-I/usr/include/infiniband \ export ibverbs_LIBS=-libverbs \ export rdmacm_CFLAGS=-I/usr/include/rdma \ export rdmacm_LIBS=-lrdmacm \ %endif %{configure} \ -%if %{with nss} - --enable-nss \ -%else - --disable-nss \ -%endif %if %{with testagents} --enable-testagents \ %endif %if %{with watchdog} --enable-watchdog \ %endif %if %{with monitoring} --enable-monitoring \ %endif %if %{with snmp} --enable-snmp \ %endif %if %{with dbus} --enable-dbus \ %endif %if %{with rdma} --enable-rdma \ %endif %if %{with systemd} --enable-systemd \ %endif %if %{with xmlconf} --enable-xmlconf \ %endif --with-initddir=%{_initrddir} make %{_smp_mflags} %install rm -rf %{buildroot} make install DESTDIR=%{buildroot} %if %{with dbus} mkdir -p -m 0700 %{buildroot}/%{_sysconfdir}/dbus-1/system.d install -m 644 %{_builddir}/%{name}-%{version}/conf/corosync-signals.conf %{buildroot}/%{_sysconfdir}/dbus-1/system.d/corosync-signals.conf %endif ## tree fixup # drop static libs rm -f %{buildroot}%{_libdir}/*.a # drop docs and html docs for now rm -rf %{buildroot}%{_docdir}/* %clean rm -rf %{buildroot} %description This package contains the Corosync Cluster Engine Executive, several default APIs and libraries, default configuration files, and an init script. %post if [ $1 -eq 1 ]; then /sbin/chkconfig --add corosync || : fi %preun if [ $1 -eq 0 ]; then /sbin/service corosync stop &>/dev/null || : /sbin/chkconfig --del corosync || : fi %files %defattr(-,root,root,-) %doc LICENSE SECURITY %{_sbindir}/corosync %{_sbindir}/corosync-keygen %{_sbindir}/corosync-cmapctl %{_sbindir}/corosync-cfgtool %{_sbindir}/corosync-fplay %{_sbindir}/corosync-cpgtool %{_sbindir}/corosync-quorumtool %{_sbindir}/corosync-notifyd %{_bindir}/corosync-blackbox %if %{with xmlconf} %{_bindir}/corosync-xmlproc %config(noreplace) %{_sysconfdir}/corosync/corosync.xml.example %dir %{_datadir}/corosync %dir %{_datadir}/corosync/xml2conf.xsl %{_mandir}/man8/corosync-xmlproc.8* %{_mandir}/man5/corosync.xml.5* %endif %dir %{_sysconfdir}/corosync %dir %{_sysconfdir}/corosync/uidgid.d %config(noreplace) %{_sysconfdir}/corosync/corosync.conf.example %config(noreplace) %{_sysconfdir}/corosync/corosync.conf.example.udpu %if %{with dbus} %{_sysconfdir}/dbus-1/system.d/corosync-signals.conf %endif %if %{with snmp} %{_datadir}/snmp/mibs/COROSYNC-MIB.txt %endif %if %{with systemd} %{_unitdir}/corosync.service %{_unitdir}/corosync-notifyd.service %else %{_initrddir}/corosync %{_initrddir}/corosync-notifyd %endif %dir %{_localstatedir}/lib/corosync %dir %{_localstatedir}/log/cluster %{_mandir}/man8/corosync_overview.8* %{_mandir}/man8/corosync.8* %{_mandir}/man8/corosync-blackbox.8* %{_mandir}/man8/corosync-cmapctl.8* %{_mandir}/man8/corosync-keygen.8* %{_mandir}/man8/corosync-cfgtool.8* %{_mandir}/man8/corosync-cpgtool.8* %{_mandir}/man8/corosync-fplay.8* %{_mandir}/man8/corosync-notifyd.8* %{_mandir}/man8/corosync-quorumtool.8* %{_mandir}/man5/corosync.conf.5* %{_mandir}/man5/votequorum.5* # optional testagent rpm # %if %{with testagents} %package -n corosync-testagents Summary: The Corosync Cluster Engine Test Agents Group: Development/Libraries Requires: %{name} = %{version}-%{release} %description -n corosync-testagents This package contains corosync test agents. %files -n corosync-testagents %defattr(755,root,root,-) %{_datadir}/corosync/tests/mem_leak_test.sh %{_datadir}/corosync/tests/net_breaker.sh %{_datadir}/corosync/tests/cmap-dispatch-deadlock.sh %{_datadir}/corosync/tests/shm_leak_audit.sh %{_bindir}/cpg_test_agent %{_bindir}/sam_test_agent %{_bindir}/votequorum_test_agent %endif # library # %package -n corosynclib Summary: The Corosync Cluster Engine Libraries Group: System Environment/Libraries Requires: %{name} = %{version}-%{release} %description -n corosynclib This package contains corosync libraries. %files -n corosynclib %defattr(-,root,root,-) %doc LICENSE %{_libdir}/libcfg.so.* %{_libdir}/libcpg.so.* %{_libdir}/libcmap.so.* %{_libdir}/libtotem_pg.so.* %{_libdir}/libquorum.so.* %{_libdir}/libvotequorum.so.* %{_libdir}/libsam.so.* %{_libdir}/libcorosync_common.so.* %post -n corosynclib -p /sbin/ldconfig %postun -n corosynclib -p /sbin/ldconfig %package -n corosynclib-devel Summary: The Corosync Cluster Engine Development Kit Group: Development/Libraries Requires: corosynclib = %{version}-%{release} Requires: pkgconfig Provides: corosync-devel = %{version} Obsoletes: corosync-devel < 0.92-7 %description -n corosynclib-devel This package contains include files and man pages used to develop using The Corosync Cluster Engine APIs. %files -n corosynclib-devel %defattr(-,root,root,-) %doc LICENSE %dir %{_includedir}/corosync/ %{_includedir}/corosync/corodefs.h %{_includedir}/corosync/cfg.h %{_includedir}/corosync/cmap.h %{_includedir}/corosync/corotypes.h %{_includedir}/corosync/cpg.h %{_includedir}/corosync/hdb.h %{_includedir}/corosync/sam.h %{_includedir}/corosync/quorum.h %{_includedir}/corosync/votequorum.h %dir %{_includedir}/corosync/totem/ %{_includedir}/corosync/totem/totem.h %{_includedir}/corosync/totem/totemip.h %{_includedir}/corosync/totem/totempg.h %{_libdir}/libcfg.so %{_libdir}/libcpg.so %{_libdir}/libcmap.so %{_libdir}/libtotem_pg.so %{_libdir}/libquorum.so %{_libdir}/libvotequorum.so %{_libdir}/libsam.so %{_libdir}/libcorosync_common.so %{_libdir}/pkgconfig/*.pc %{_mandir}/man3/cpg_*3* %{_mandir}/man3/quorum_*3* %{_mandir}/man3/votequorum_*3* %{_mandir}/man3/sam_*3* %{_mandir}/man8/cpg_overview.8* %{_mandir}/man8/votequorum_overview.8* %{_mandir}/man8/sam_overview.8* %{_mandir}/man3/cmap_*3* %{_mandir}/man8/cmap_overview.8* %{_mandir}/man8/quorum_overview.8* %changelog * @date@ Autotools generated version - @version@-1-@numcomm@.@alphatag@.@dirty@ - Autotools generated version diff --git a/cts/agents/Makefile.am b/cts/agents/Makefile.am index d9aaa846..6e8bb6a6 100644 --- a/cts/agents/Makefile.am +++ b/cts/agents/Makefile.am @@ -1,68 +1,69 @@ # # Copyright (c) 2010 Red Hat, Inc. # # Authors: Angus Salkeld (asalkeld@redhat.com) # # This software licensed under BSD license, the text of which follows: # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # - Redistributions of source code must retain the above copyright notice, # this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # - Neither the name of the MontaVista Software, Inc. nor the names of its # contributors may be used to endorse or promote products derived from this # software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF # THE POSSIBILITY OF SUCH DAMAGE. MAINTAINERCLEANFILES = Makefile.in INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include \ -I$(top_builddir)/include/corosync SOURCES = TEST_AGENTS = cpg_test_agent sam_test_agent votequorum_test_agent if INSTALL_TESTAGENTS agentdir = $(datadir)/$(PACKAGE)/tests bin_PROGRAMS = $(TEST_AGENTS) dist_agent_SCRIPTS = mem_leak_test.sh net_breaker.sh cmap-dispatch-deadlock.sh shm_leak_audit.sh AM_CFLAGS = -fPIC else noinst_PROGRAMS = $(TEST_AGENTS) noinst_SCRIPTS = mem_leak_test.sh net_breaker.sh cmap-dispatch-deadlock.sh shm_leak_audit.sh endif noinst_HEADERS = common_test_agent.h cpg_test_agent_SOURCES = cpg_test_agent.c common_test_agent.c -cpg_test_agent_LDADD = -lcpg -lcfg ../../exec/crypto.o -lcorosync_common $(LIBQB_LIBS) +cpg_test_agent_CFLAGS = $(nss_CFLAGS) +cpg_test_agent_LDADD = -lcpg -lcfg -lcorosync_common $(LIBQB_LIBS) $(nss_LIBS) cpg_test_agent_LDFLAGS = -L../../lib -L. -L../../common_lib sam_test_agent_SOURCES = sam_test_agent.c common_test_agent.c sam_test_agent_LDADD = -lsam -lquorum -lcmap -lcorosync_common $(LIBQB_LIBS) sam_test_agent_LDFLAGS = -L../../lib -L../../common_lib votequorum_test_agent_SOURCES = votequorum_test_agent.c common_test_agent.c votequorum_test_agent_LDADD = -lvotequorum -lquorum -lcorosync_common $(LIBQB_LIBS) votequorum_test_agent_LDFLAGS = -L../../lib -L../../common_lib clean-local: rm -f *.o *.a *.so* *.da *.bb *.bbg lint: -splint $(LINT_FLAGS) $(CFLAGS) *.c diff --git a/cts/agents/cpg_test_agent.c b/cts/agents/cpg_test_agent.c index e99f5229..f9e0350d 100644 --- a/cts/agents/cpg_test_agent.c +++ b/cts/agents/cpg_test_agent.c @@ -1,769 +1,782 @@ /* * Copyright (c) 2010 Red Hat, Inc. * * All rights reserved. * * Author: Angus Salkeld (asalkeld@redhat.com) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#include "../../exec/crypto.h" #include "common_test_agent.h" +#include +#include typedef enum { MSG_OK, MSG_NODEID_ERR, MSG_PID_ERR, MSG_SEQ_ERR, MSG_SIZE_ERR, MSG_SHA1_ERR, } msg_status_t; typedef struct { uint32_t nodeid; pid_t pid; unsigned char sha1[20]; uint32_t seq; size_t size; unsigned char payload[0]; } msg_t; #define LOG_STR_SIZE 80 typedef struct { char log[LOG_STR_SIZE]; struct list_head list; } log_entry_t; static char big_and_buf[HOW_BIG_AND_BUF]; static int32_t record_config_events_g = 0; static int32_t record_messages_g = 0; static cpg_handle_t cpg_handle = 0; static corosync_cfg_handle_t cfg_handle = 0; static int32_t cpg_fd = -1; static int32_t cfg_fd = -1; static struct list_head config_chg_log_head; static struct list_head msg_log_head; static pid_t my_pid; static uint32_t my_nodeid; static int32_t my_seq; static int32_t use_zcb = QB_FALSE; static int32_t my_msgs_to_send; static int32_t my_msgs_sent; static int32_t total_stored_msgs = 0; static int32_t total_msgs_revd = 0; static int32_t in_cnchg = 0; static int32_t pcmk_test = 0; +PK11Context* sha1_context; static void send_some_more_messages (void * unused); static char* err_status_string (char * buf, size_t buf_len, msg_status_t status) { switch (status) { case MSG_OK: strncpy (buf, "OK", buf_len); break; case MSG_NODEID_ERR: strncpy (buf, "NODEID_ERR", buf_len); break; case MSG_PID_ERR: strncpy (buf, "PID_ERR", buf_len); break; case MSG_SEQ_ERR: strncpy (buf, "SEQ_ERR", buf_len); break; case MSG_SIZE_ERR: strncpy (buf, "SIZE_ERR", buf_len); break; case MSG_SHA1_ERR: strncpy (buf, "SHA1_ERR", buf_len); break; default: strncpy (buf, "UNKNOWN_ERR", buf_len); break; } if (buf_len > 0) { buf[buf_len - 1] = '\0'; } return buf; } static void delivery_callback ( cpg_handle_t handle, const struct cpg_name *groupName, uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) { log_entry_t *log_pt; msg_t *msg_pt = (msg_t*)msg; msg_status_t status = MSG_OK; char status_buf[20]; unsigned char sha1_compare[20]; - hash_state sha1_hash; + unsigned int sha1_len; if (record_messages_g == 0) { return; } if (nodeid != msg_pt->nodeid) { status = MSG_NODEID_ERR; } if (pid != msg_pt->pid) { status = MSG_PID_ERR; } if (msg_len != msg_pt->size) { status = MSG_SIZE_ERR; } - sha1_init (&sha1_hash); - sha1_process (&sha1_hash, msg_pt->payload, (msg_pt->size - sizeof (msg_t))); - sha1_done (&sha1_hash, sha1_compare); + PK11_DigestBegin(sha1_context); + PK11_DigestOp(sha1_context, msg_pt->payload, (msg_pt->size - sizeof (msg_t))); + PK11_DigestFinal(sha1_context, sha1_compare, &sha1_len, sizeof(sha1_compare)); if (memcmp (sha1_compare, msg_pt->sha1, 20) != 0) { qb_log (LOG_ERR, "msg seq:%d; incorrect hash", msg_pt->seq); status = MSG_SHA1_ERR; } log_pt = malloc (sizeof(log_entry_t)); list_init (&log_pt->list); snprintf (log_pt->log, LOG_STR_SIZE, "%d:%d:%d:%s;", msg_pt->nodeid, msg_pt->seq, my_seq, err_status_string (status_buf, 20, status)); list_add_tail (&log_pt->list, &msg_log_head); total_stored_msgs++; total_msgs_revd++; my_seq++; if ((total_msgs_revd % 1000) == 0) { qb_log (LOG_INFO, "rx %d", total_msgs_revd); } } static void config_change_callback ( cpg_handle_t handle, const struct cpg_name *groupName, const struct cpg_address *member_list, size_t member_list_entries, const struct cpg_address *left_list, size_t left_list_entries, const struct cpg_address *joined_list, size_t joined_list_entries) { int i; log_entry_t *log_pt; /* group_name,ip,pid,join|leave */ if (record_config_events_g > 0) { qb_log (LOG_INFO, "got cpg event[recording] for group %s", groupName->value); } else { qb_log (LOG_INFO, "got cpg event[ignoring] for group %s", groupName->value); } for (i = 0; i < left_list_entries; i++) { if (record_config_events_g > 0) { log_pt = malloc (sizeof(log_entry_t)); list_init (&log_pt->list); snprintf (log_pt->log, LOG_STR_SIZE, "%s,%d,%d,left", groupName->value, left_list[i].nodeid,left_list[i].pid); list_add_tail(&log_pt->list, &config_chg_log_head); qb_log (LOG_INFO, "cpg event %s", log_pt->log); } } for (i = 0; i < joined_list_entries; i++) { if (record_config_events_g > 0) { log_pt = malloc (sizeof(log_entry_t)); list_init (&log_pt->list); snprintf (log_pt->log, LOG_STR_SIZE, "%s,%d,%d,join", groupName->value, joined_list[i].nodeid,joined_list[i].pid); list_add_tail (&log_pt->list, &config_chg_log_head); qb_log (LOG_INFO, "cpg event %s", log_pt->log); } } if (pcmk_test == 1) { in_cnchg = 1; send_some_more_messages (NULL); in_cnchg = 0; } } static void my_shutdown_callback (corosync_cfg_handle_t handle, corosync_cfg_shutdown_flags_t flags) { qb_log (LOG_CRIT, "flags:%d", flags); if (flags & COROSYNC_CFG_SHUTDOWN_FLAG_REQUEST) { corosync_cfg_replyto_shutdown (cfg_handle, COROSYNC_CFG_SHUTDOWN_FLAG_YES); } } static corosync_cfg_callbacks_t cfg_callbacks = { .corosync_cfg_shutdown_callback = my_shutdown_callback, }; static cpg_callbacks_t callbacks = { .cpg_deliver_fn = delivery_callback, .cpg_confchg_fn = config_change_callback, }; static void record_messages (void) { record_messages_g = 1; qb_log (LOG_INFO, "record:%d", record_messages_g); } static void record_config_events (int sock) { char response[100]; record_config_events_g = 1; qb_log (LOG_INFO, "record:%d", record_config_events_g); snprintf (response, 100, "%s", OK_STR); send (sock, response, strlen (response), 0); } static void read_config_event (int sock) { const char *empty = "None"; struct list_head * list = config_chg_log_head.next; log_entry_t *entry; if (list != &config_chg_log_head) { entry = list_entry (list, log_entry_t, list); send (sock, entry->log, strlen (entry->log), 0); list_del (&entry->list); free (entry); } else { qb_log (LOG_DEBUG, "no events in list"); send (sock, empty, strlen (empty), 0); } } static void read_messages (int sock, char* atmost_str) { struct list_head * list; log_entry_t *entry; int atmost = atoi (atmost_str); int packed = 0; ssize_t rc; if (atmost == 0) atmost = 1; if (atmost > (HOW_BIG_AND_BUF / LOG_STR_SIZE)) atmost = (HOW_BIG_AND_BUF / LOG_STR_SIZE); big_and_buf[0] = '\0'; for (list = msg_log_head.next; (!list_empty (&msg_log_head) && packed < atmost); ) { entry = list_entry (list, log_entry_t, list); strcat (big_and_buf, entry->log); packed++; list = list->next; list_del (&entry->list); free (entry); total_stored_msgs--; } if (packed == 0) { strcpy (big_and_buf, "None"); } else { if ((total_stored_msgs % 1000) == 0) { qb_log(LOG_INFO, "sending %d; total_stored_msgs:%d; len:%d", packed, total_stored_msgs, (int)strlen (big_and_buf)); } } rc = send (sock, big_and_buf, strlen (big_and_buf), 0); assert(rc = strlen (big_and_buf)); } static qb_loop_timer_handle more_messages_timer_handle; static void send_some_more_messages_later (void) { cpg_dispatch (cpg_handle, CS_DISPATCH_ALL); qb_loop_timer_add ( ta_poll_handle_get(), QB_LOOP_MED, 300*QB_TIME_NS_IN_MSEC, NULL, send_some_more_messages, &more_messages_timer_handle); } static void send_some_more_messages_zcb (void) { msg_t *my_msg; int i; int send_now; size_t payload_size; size_t total_size; - hash_state sha1_hash; + unsigned int sha1_len; cs_error_t res; cpg_flow_control_state_t fc_state; void *zcb_buffer; if (cpg_fd < 0) return; send_now = my_msgs_to_send; payload_size = (rand() % 100000); total_size = payload_size + sizeof (msg_t); cpg_zcb_alloc (cpg_handle, total_size, &zcb_buffer); my_msg = (msg_t*)zcb_buffer; qb_log(LOG_DEBUG, "send_now:%d", send_now); my_msg->pid = my_pid; my_msg->nodeid = my_nodeid; my_msg->size = sizeof (msg_t) + payload_size; my_msg->seq = my_msgs_sent; for (i = 0; i < payload_size; i++) { my_msg->payload[i] = i; } - sha1_init (&sha1_hash); - sha1_process (&sha1_hash, my_msg->payload, payload_size); - sha1_done (&sha1_hash, my_msg->sha1); + PK11_DigestBegin(sha1_context); + PK11_DigestOp(sha1_context, my_msg->payload, payload_size); + PK11_DigestFinal(sha1_context, my_msg->sha1, &sha1_len, sizeof(my_msg->sha1)); for (i = 0; i < send_now; i++) { res = cpg_flow_control_state_get (cpg_handle, &fc_state); if (res == CS_OK && fc_state == CPG_FLOW_CONTROL_ENABLED) { /* lets do this later */ send_some_more_messages_later (); qb_log (LOG_INFO, "flow control enabled."); goto free_buffer; } res = cpg_zcb_mcast_joined (cpg_handle, CPG_TYPE_AGREED, zcb_buffer, total_size); if (res == CS_ERR_TRY_AGAIN) { /* lets do this later */ send_some_more_messages_later (); goto free_buffer; } else if (res != CS_OK) { qb_log (LOG_ERR, "cpg_mcast_joined error:%d, exiting.", res); exit (-2); } my_msgs_sent++; my_msgs_to_send--; } free_buffer: cpg_zcb_free (cpg_handle, zcb_buffer); } #define cs_repeat(counter, max, code) do { \ code; \ if (res == CS_ERR_TRY_AGAIN) { \ counter++; \ sleep(counter); \ } \ } while (res == CS_ERR_TRY_AGAIN && counter < max) static unsigned char buffer[200000]; static void send_some_more_messages_normal (void) { msg_t my_msg; struct iovec iov[2]; int i; int send_now; size_t payload_size; - hash_state sha1_hash; cs_error_t res; cpg_flow_control_state_t fc_state; int retries = 0; time_t before; + unsigned int sha1_len; if (cpg_fd < 0) return; send_now = my_msgs_to_send; qb_log (LOG_TRACE, "send_now:%d", send_now); my_msg.pid = my_pid; my_msg.nodeid = my_nodeid; payload_size = (rand() % 10000); my_msg.size = sizeof (msg_t) + payload_size; my_msg.seq = my_msgs_sent; for (i = 0; i < payload_size; i++) { buffer[i] = i; } - sha1_init (&sha1_hash); - sha1_process (&sha1_hash, buffer, payload_size); - sha1_done (&sha1_hash, my_msg.sha1); + PK11_DigestBegin(sha1_context); + PK11_DigestOp(sha1_context, buffer, payload_size); + PK11_DigestFinal(sha1_context, my_msg.sha1, &sha1_len, sizeof(my_msg.sha1)); iov[0].iov_len = sizeof (msg_t); iov[0].iov_base = &my_msg; iov[1].iov_len = payload_size; iov[1].iov_base = buffer; for (i = 0; i < send_now; i++) { if (in_cnchg && pcmk_test) { retries = 0; before = time(NULL); cs_repeat(retries, 30, res = cpg_mcast_joined(cpg_handle, CPG_TYPE_AGREED, iov, 2)); if (retries > 20) { qb_log (LOG_ERR, "cs_repeat: blocked for :%lu secs.", (unsigned long)(time(NULL) - before)); } if (res != CS_OK) { qb_log (LOG_ERR, "cpg_mcast_joined error:%d.", res); return; } } else { res = cpg_flow_control_state_get (cpg_handle, &fc_state); if (res == CS_OK && fc_state == CPG_FLOW_CONTROL_ENABLED) { /* lets do this later */ send_some_more_messages_later (); qb_log (LOG_INFO, "flow control enabled."); return; } res = cpg_mcast_joined (cpg_handle, CPG_TYPE_AGREED, iov, 2); if (res == CS_ERR_TRY_AGAIN) { /* lets do this later */ send_some_more_messages_later (); if (i > 0) { qb_log (LOG_INFO, "TRY_AGAIN %d to send.", my_msgs_to_send); } return; } else if (res != CS_OK) { qb_log (LOG_ERR, "cpg_mcast_joined error:%d, exiting.", res); exit (-2); } } my_msgs_sent++; my_msg.seq = my_msgs_sent; my_msgs_to_send--; } qb_log (LOG_TRACE, "sent %d; to send %d.", my_msgs_sent, my_msgs_to_send); } static void send_some_more_messages (void * unused) { if (use_zcb) { send_some_more_messages_zcb (); } else { send_some_more_messages_normal (); } } static void msg_blaster (int sock, char* num_to_send_str) { my_msgs_to_send = atoi (num_to_send_str); my_msgs_sent = 0; my_seq = 1; my_pid = getpid(); use_zcb = QB_FALSE; total_stored_msgs = 0; cpg_local_get (cpg_handle, &my_nodeid); /* control the limits */ if (my_msgs_to_send <= 0) my_msgs_to_send = 1; if (my_msgs_to_send > 10000) my_msgs_to_send = 10000; send_some_more_messages_normal (); } static void context_test (int sock) { char response[100]; char *cmp; cpg_context_set (cpg_handle, response); cpg_context_get (cpg_handle, (void**)&cmp); if (response != cmp) { snprintf (response, 100, "%s", FAIL_STR); } else { snprintf (response, 100, "%s", OK_STR); } send (sock, response, strlen (response), 0); } static void msg_blaster_zcb (int sock, char* num_to_send_str) { my_msgs_to_send = atoi (num_to_send_str); my_seq = 1; my_pid = getpid(); use_zcb = QB_TRUE; total_stored_msgs = 0; cpg_local_get (cpg_handle, &my_nodeid); /* control the limits */ if (my_msgs_to_send <= 0) my_msgs_to_send = 1; if (my_msgs_to_send > 10000) my_msgs_to_send = 10000; send_some_more_messages_zcb (); } static int cfg_dispatch_wrapper_fn ( int fd, int revents, void *data) { cs_error_t error; if (revents & POLLHUP || revents & POLLERR) { qb_log (LOG_ERR, "got POLLHUP disconnecting from CFG"); corosync_cfg_finalize(cfg_handle); cfg_handle = 0; return -1; } error = corosync_cfg_dispatch (cfg_handle, CS_DISPATCH_ALL); if (error == CS_ERR_LIBRARY) { qb_log (LOG_ERR, "got LIB error disconnecting from CFG."); corosync_cfg_finalize(cfg_handle); cfg_handle = 0; return -1; } return 0; } static int cpg_dispatch_wrapper_fn ( int fd, int revents, void *data) { cs_error_t error; if (revents & POLLHUP || revents & POLLERR) { qb_log (LOG_ERR, "got POLLHUP disconnecting from CPG"); cpg_finalize(cpg_handle); cpg_handle = 0; return -1; } error = cpg_dispatch (cpg_handle, CS_DISPATCH_ALL); if (error == CS_ERR_LIBRARY) { qb_log (LOG_ERR, "got LIB error disconnecting from CPG"); cpg_finalize(cpg_handle); cpg_handle = 0; return -1; } return 0; } static void do_command (int sock, char* func, char*args[], int num_args) { int result; char response[100]; struct cpg_name group_name; qb_log (LOG_TRACE, "RPC:%s() called.", func); if (strcmp ("cpg_mcast_joined",func) == 0) { struct iovec iov[5]; int a; for (a = 0; a < num_args; a++) { iov[a].iov_base = args[a]; iov[a].iov_len = strlen(args[a])+1; } cpg_mcast_joined (cpg_handle, CPG_TYPE_AGREED, iov, num_args); } else if (strcmp ("cpg_join",func) == 0) { strcpy (group_name.value, args[0]); group_name.length = strlen(args[0]); - result = cpg_join (cpg_handle, &group_name); if (result != CS_OK) { qb_log (LOG_ERR, "Could not join process group, error %d", result); exit (1); } qb_log (LOG_INFO, "called cpg_join(%s)!", group_name.value); } else if (strcmp ("cpg_leave",func) == 0) { strcpy (group_name.value, args[0]); group_name.length = strlen(args[0]); result = cpg_leave (cpg_handle, &group_name); if (result != CS_OK) { qb_log (LOG_ERR, "Could not leave process group, error %d", result); exit (1); } qb_log (LOG_INFO, "called cpg_leave(%s)!", group_name.value); } else if (strcmp ("cpg_initialize",func) == 0) { int retry_count = 0; result = cpg_initialize (&cpg_handle, &callbacks); while (result != CS_OK) { qb_log (LOG_ERR, "cpg_initialize error %d (attempt %d)", result, retry_count); if (retry_count >= 3) { exit (1); } sleep(1); retry_count++; result = cpg_initialize (&cpg_handle, &callbacks); } cpg_fd_get (cpg_handle, &cpg_fd); qb_loop_poll_add (ta_poll_handle_get(), QB_LOOP_MED, cpg_fd, POLLIN|POLLNVAL, NULL, cpg_dispatch_wrapper_fn); } else if (strcmp ("cpg_local_get", func) == 0) { unsigned int local_nodeid; cpg_local_get (cpg_handle, &local_nodeid); snprintf (response, 100, "%u",local_nodeid); send (sock, response, strlen (response), 0); } else if (strcmp ("cpg_finalize", func) == 0) { if (cpg_handle > 0) { cpg_finalize (cpg_handle); cpg_handle = 0; } } else if (strcmp ("record_config_events", func) == 0) { record_config_events (sock); } else if (strcmp ("record_messages", func) == 0) { record_messages (); } else if (strcmp ("read_config_event", func) == 0) { read_config_event (sock); } else if (strcmp ("read_messages", func) == 0) { read_messages (sock, args[0]); } else if (strcmp ("msg_blaster_zcb", func) == 0) { msg_blaster_zcb (sock, args[0]); } else if (strcmp ("pcmk_test", func) == 0) { pcmk_test = 1; } else if (strcmp ("msg_blaster", func) == 0) { msg_blaster (sock, args[0]); } else if (strcmp ("context_test", func) == 0) { context_test (sock); } else if (strcmp ("are_you_ok_dude", func) == 0) { snprintf (response, 100, "%s", OK_STR); send (sock, response, strlen (response), 0); } else if (strcmp ("cfg_shutdown", func) == 0) { qb_log (LOG_INFO, "calling %s() called!", func); result = corosync_cfg_try_shutdown (cfg_handle, COROSYNC_CFG_SHUTDOWN_FLAG_REQUEST); qb_log (LOG_INFO,"%s() returned %d!", func, result); } else if (strcmp ("cfg_initialize",func) == 0) { int retry_count = 0; qb_log (LOG_INFO,"%s() called!", func); result = corosync_cfg_initialize (&cfg_handle, &cfg_callbacks); while (result != CS_OK) { qb_log (LOG_ERR, "cfg_initialize error %d (attempt %d)", result, retry_count); if (retry_count >= 3) { exit (1); } sleep(1); retry_count++; result = corosync_cfg_initialize (&cfg_handle, &cfg_callbacks); } qb_log (LOG_INFO,"corosync_cfg_initialize() == %d", result); result = corosync_cfg_fd_get (cfg_handle, &cfg_fd); qb_log (LOG_INFO,"corosync_cfg_fd_get() == %d", result); qb_loop_poll_add (ta_poll_handle_get(), QB_LOOP_MED, cfg_fd, POLLIN|POLLNVAL, NULL, cfg_dispatch_wrapper_fn); } else { qb_log(LOG_ERR, "RPC:%s not supported!", func); } } static void my_pre_exit(void) { qb_log (LOG_INFO, "%s PRE EXIT", __FILE__); if (cpg_handle > 0) { cpg_finalize (cpg_handle); cpg_handle = 0; } if (cfg_handle > 0) { corosync_cfg_finalize (cfg_handle); cfg_handle = 0; } + + PK11_DestroyContext(sha1_context, PR_TRUE); } int main(int argc, char *argv[]) { list_init (&msg_log_head); list_init (&config_chg_log_head); + if (NSS_NoDB_Init(".") != SECSuccess) { + qb_log(LOG_ERR, "Couldn't initialize nss"); + exit (0); + } + + if ((sha1_context = PK11_CreateDigestContext(SEC_OID_SHA1)) == NULL) { + qb_log(LOG_ERR, "Couldn't initialize nss"); + exit (0); + } + return test_agent_run ("cpg_test_agent", 9034, do_command, my_pre_exit); } diff --git a/cts/corotests.py b/cts/corotests.py index f52ca3d0..f7b29a52 100644 --- a/cts/corotests.py +++ b/cts/corotests.py @@ -1,1614 +1,1614 @@ __copyright__=''' Copyright (c) 2010 Red Hat, Inc. ''' # All rights reserved. # # Author: Angus Salkeld # # This software licensed under BSD license, the text of which follows: # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # - Redistributions of source code must retain the above copyright notice, # this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # - Neither the name of the MontaVista Software, Inc. nor the names of its # contributors may be used to endorse or promote products derived from this # software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF # THE POSSIBILITY OF SUCH DAMAGE. import random import socket from UserDict import UserDict from cts.CTStests import * from corosync import CpgTestAgent ################################################################### class CoroTest(CTSTest): ''' basic class to make sure that new configuration is applied and old configuration is removed. ''' def __init__(self, cm): CTSTest.__init__(self,cm) self.start = StartTest(cm) self.stop = StopTest(cm) self.config = {} self.config['logging/logger_subsys[1]/subsys'] = 'MAIN' self.config['logging/logger_subsys[1]/debug'] = 'on' self.need_all_up = True self.CM.start_cpg = True self.cpg_name = 'cts_group' def setup(self, node): ret = CTSTest.setup(self, node) # setup the authkey localauthkey = '/tmp/authkey' if not os.path.exists(localauthkey): self.CM.rsh(node, 'corosync-keygen -l') self.CM.rsh.cp("%s:%s" % (node, "/etc/corosync/authkey"), localauthkey) for n in self.CM.Env["nodes"]: if n is not node: #copy key onto other nodes self.CM.rsh.cp(localauthkey, "%s:%s" % (n, "/etc/corosync/authkey")) # copy over any new config for c in self.config: self.CM.new_config[c] = self.config[c] # apply the config self.CM.apply_new_config(self.need_all_up) # start/stop all corosyncs' for n in self.CM.Env["nodes"]: if self.need_all_up and not self.CM.StataCM(n): self.incr("started") self.start(n) if self.need_all_up and self.CM.start_cpg: self.CM.cpg_agent[n].clean_start() self.CM.cpg_agent[n].cpg_join(self.cpg_name) self.CM.cpg_agent[n].cfg_initialize() if not self.need_all_up and self.CM.StataCM(n): self.incr("stopped") self.stop(n) return ret def config_valid(self, config): return True def teardown(self, node): self.CM.apply_default_config() return CTSTest.teardown(self, node) ################################################################### class CpgContextTest(CoroTest): def __init__(self, cm): self.name="CpgContextTest" CoroTest.__init__(self, cm) self.CM.start_cpg = True def __call__(self, node): self.incr("calls") res = self.CM.cpg_agent[node].context_test() if 'OK' in res: return self.success() else: return self.failure('context_test failed') ################################################################### class CpgConfigChangeBase(CoroTest): ''' join a cpg group on each node, and test that the following causes a leave event: - a call to cpg_leave() - app exit - node leave - node leave (with large token timeout) ''' def setup(self, node): ret = CoroTest.setup(self, node) self.listener = None self.wobbly = None for n in self.CM.Env["nodes"]: if self.wobbly is None: self.wobbly = n elif self.listener is None: self.listener = n if self.CM.cpg_agent.has_key(self.wobbly): self.wobbly_id = self.CM.cpg_agent[self.wobbly].cpg_local_get() if self.CM.cpg_agent.has_key(self.listener): self.CM.cpg_agent[self.listener].record_config_events(truncate=True) return ret def wait_for_config_change(self): found = False max_timeout = 60 * 15 waited = 0 printit = 0 self.CM.log("Waiting for config change on " + self.listener) while not found: try: event = self.CM.cpg_agent[self.listener].read_config_event() except: return self.failure('connection to test cpg_agent failed.') if not event == None: self.CM.debug("RECEIVED: " + str(event)) if event == None: if waited >= max_timeout: return self.failure("timedout(" + str(waited) + " sec) == no event!") else: time.sleep(1) waited = waited + 1 printit = printit + 1 if printit is 60: print 'waited ' + str(waited) + ' seconds' printit = 0 elif str(event.node_id) in str(self.wobbly_id) and not event.is_member: self.CM.log("Got the config change in " + str(waited) + " seconds") found = True else: self.CM.debug("No match") self.CM.debug("wobbly nodeid:" + str(self.wobbly_id)) self.CM.debug("event nodeid:" + str(event.node_id)) self.CM.debug("event.is_member:" + str(event.is_member)) if found: return self.success() ################################################################### class CpgCfgChgOnGroupLeave(CpgConfigChangeBase): def __init__(self, cm): CpgConfigChangeBase.__init__(self,cm) self.name="CpgCfgChgOnGroupLeave" def failure_action(self): self.CM.log("calling cpg_leave() on " + self.wobbly) self.CM.cpg_agent[self.wobbly].cpg_leave(self.cpg_name) def __call__(self, node): self.incr("calls") self.failure_action() return self.wait_for_config_change() ################################################################### class CpgCfgChgOnNodeLeave(CpgConfigChangeBase): def __init__(self, cm): CpgConfigChangeBase.__init__(self,cm) self.name="CpgCfgChgOnNodeLeave" def failure_action(self): self.CM.log("stopping corosync on " + self.wobbly) self.stop(self.wobbly) def __call__(self, node): self.incr("calls") self.failure_action() return self.wait_for_config_change() ################################################################### class CpgCfgChgOnLowestNodeJoin(CTSTest): ''' 1) stop all nodes 2) start all but the node with the smallest ip address 3) start recording events 4) start the last node ''' def __init__(self, cm): CTSTest.__init__(self, cm) self.name="CpgCfgChgOnLowestNodeJoin" self.start = StartTest(cm) self.stop = StopTest(cm) self.config = {} self.need_all_up = False def config_valid(self, config): return True def lowest_ip_set(self): self.lowest = None for n in self.CM.Env["nodes"]: if self.lowest is None: self.lowest = n self.CM.log("lowest node is " + self.lowest) def setup(self, node): # stop all nodes for n in self.CM.Env["nodes"]: self.CM.StopaCM(n) self.lowest_ip_set() # copy over any new config for c in self.config: self.CM.new_config[c] = self.config[c] # install the config self.CM.install_all_config() # start all but lowest self.listener = None for n in self.CM.Env["nodes"]: if n is not self.lowest: if self.listener is None: self.listener = n self.incr("started") self.CM.log("starting " + n) self.start(n) self.CM.cpg_agent[n].clean_start() self.CM.cpg_agent[n].cpg_join(self.cpg_name) # start recording events pats = [] pats.append("%s .*sync: node joined.*" % self.listener) pats.append("%s .*sync: activate correctly.*" % self.listener) self.sync_log = self.create_watch(pats, 60) self.sync_log.setwatch() self.CM.log("setup done") return CTSTest.setup(self, node) def __call__(self, node): self.incr("calls") self.start(self.lowest) self.CM.cpg_agent[self.lowest].clean_start() self.CM.cpg_agent[self.lowest].cpg_join(self.cpg_name) self.wobbly_id = self.CM.cpg_agent[self.lowest].cpg_local_get() self.CM.log("waiting for sync events") if not self.sync_log.lookforall(): return self.failure("Patterns not found: " + repr(self.sync_log.unmatched)) else: return self.success() ################################################################### class CpgCfgChgOnExecCrash(CpgConfigChangeBase): def __init__(self, cm): CpgConfigChangeBase.__init__(self,cm) self.name="CpgCfgChgOnExecCrash" def failure_action(self): self.CM.log("sending KILL to corosync on " + self.wobbly) self.CM.rsh(self.wobbly, "killall -9 corosync") self.CM.rsh(self.wobbly, "rm -f /var/run/corosync.pid") self.CM.ShouldBeStatus[self.wobbly] = "down" def __call__(self, node): self.incr("calls") self.failure_action() return self.wait_for_config_change() ################################################################### class CpgCfgChgOnNodeIsolate(CpgConfigChangeBase): def __init__(self, cm): CpgConfigChangeBase.__init__(self,cm) self.name="CpgCfgChgOnNodeIsolate" def config_valid(self, config): if config.has_key('totem/rrp_mode'): return False else: return True def failure_action(self): self.CM.log("isolating node " + self.wobbly) self.CM.isolate_node(self.wobbly) def __call__(self, node): self.incr("calls") self.failure_action() return self.wait_for_config_change() def teardown(self, node): self.CM.unisolate_node (self.wobbly) return CpgConfigChangeBase.teardown(self, node) ################################################################### class CpgCfgChgOnNodeRestart(CpgConfigChangeBase): def __init__(self, cm): CpgConfigChangeBase.__init__(self,cm) self.name="CpgCfgChgOnNodeRestart" self.CM.start_cpg = False def config_valid(self, config): if config.has_key('totem/secauth'): if config['totem/secauth'] is 'on': return False else: return True if config.has_key('totem/rrp_mode'): return False else: return True def failure_action(self): self.CM.log("2: isolating node " + self.wobbly) self.CM.isolate_node(self.wobbly) self.CM.log("3: Killing corosync on " + self.wobbly) self.CM.rsh(self.wobbly, "killall -9 corosync") self.CM.rsh(self.wobbly, "rm -f /var/run/corosync.pid") self.CM.ShouldBeStatus[self.wobbly] = "down" self.CM.log("4: unisolating node " + self.wobbly) self.CM.unisolate_node (self.wobbly) self.CM.log("5: starting corosync on " + self.wobbly) self.CM.StartaCM(self.wobbly) time.sleep(5) self.CM.log("6: starting cpg on all nodes") self.CM.start_cpg = True for node in self.CM.Env["nodes"]: self.CM.cpg_agent[node] = CpgTestAgent(node, self.CM.Env) self.CM.cpg_agent[node].start() self.CM.cpg_agent[node].cpg_join(self.cpg_name) self.wobbly_id = self.CM.cpg_agent[self.wobbly].cpg_local_get() self.CM.cpg_agent[self.listener].record_config_events(truncate=True) self.CM.log("7: isolating node " + self.wobbly) self.CM.isolate_node(self.wobbly) self.CM.log("8: Killing corosync on " + self.wobbly) self.CM.rsh(self.wobbly, "killall -9 corosync") self.CM.rsh(self.wobbly, "rm -f /var/run/corosync.pid") self.CM.ShouldBeStatus[self.wobbly] = "down" self.CM.log("9: unisolating node " + self.wobbly) self.CM.unisolate_node (self.wobbly) self.CM.log("10: starting corosync on " + self.wobbly) self.CM.StartaCM(self.wobbly) def __call__(self, node): self.incr("calls") self.failure_action() return self.wait_for_config_change() def teardown(self, node): self.CM.unisolate_node (self.wobbly) return CpgConfigChangeBase.teardown(self, node) ################################################################### class CpgMsgOrderBase(CoroTest): def __init__(self, cm): CoroTest.__init__(self,cm) self.num_msgs_per_node = 0 self.total_num_msgs = 0 def setup(self, node): ret = CoroTest.setup(self, node) for n in self.CM.Env["nodes"]: self.CM.cpg_agent[n].clean_start() self.CM.cpg_agent[n].cpg_join(self.cpg_name) self.CM.cpg_agent[n].record_messages() time.sleep(1) return ret def cpg_msg_blaster(self): for n in self.CM.Env["nodes"]: self.CM.cpg_agent[n].msg_blaster(self.num_msgs_per_node) def wait_and_validate_order(self): msgs = {} self.total_num_msgs = 0 for n in self.CM.Env["nodes"]: self.total_num_msgs = self.total_num_msgs + self.num_msgs_per_node for n in self.CM.Env["nodes"]: msgs[n] = [] stopped = False waited = 0 while len(msgs[n]) < self.total_num_msgs and waited < 360: try: msg = self.CM.cpg_agent[n].read_messages(50) except: return self.failure('connection to test cpg_agent failed.') if not msg == None: msgl = msg.split(";") # remove empty entries not_done=True while not_done: try: msgl.remove('') except: not_done = False msgs[n].extend(msgl) elif msg == None: time.sleep(2) waited = waited + 2 if len(msgs[n]) < self.total_num_msgs: return self.failure("expected %d messages from %s got %d" % (self.total_num_msgs, n, len(msgs[n]))) fail = False error_message = '' for i in range(0, self.total_num_msgs): first = None for n in self.CM.Env["nodes"]: # first test for errors params = msgs[n][i].split(":") if not 'OK' in params[3]: fail = True error_message = 'error: ' + params[3] + ' in received message' self.CM.log(str(params)) # then look for out of order messages if first == None: first = n else: if not msgs[first][i] == msgs[n][i]: # message order not the same! fail = True error_message = 'message out of order' self.CM.log(msgs[first][i] + " != " + msgs[n][i]) if fail: return self.failure(error_message) else: return self.success() ################################################################### class CpgMsgOrderBasic(CpgMsgOrderBase): ''' each sends & logs lots of messages ''' def __init__(self, cm): CpgMsgOrderBase.__init__(self,cm) self.name="CpgMsgOrderBasic" self.num_msgs_per_node = 9000 def __call__(self, node): self.incr("calls") for n in self.CM.Env["nodes"]: self.CM.cpg_agent[n].msg_blaster(self.num_msgs_per_node) return self.wait_and_validate_order() ################################################################### class CpgMsgOrderZcb(CpgMsgOrderBase): ''' each sends & logs lots of messages ''' def __init__(self, cm): CpgMsgOrderBase.__init__(self,cm) self.name="CpgMsgOrderZcb" self.num_msgs_per_node = 9000 def __call__(self, node): self.incr("calls") for n in self.CM.Env["nodes"]: self.CM.cpg_agent[n].msg_blaster_zcb(self.num_msgs_per_node) return self.wait_and_validate_order() ################################################################### class MemLeakObject(CoroTest): ''' run mem_leak_test.sh -1 ''' def __init__(self, cm): CoroTest.__init__(self,cm) self.name="MemLeakObject" def __call__(self, node): self.incr("calls") mem_leaked = self.CM.rsh(node, "/usr/share/corosync/tests/mem_leak_test.sh -1") if mem_leaked is 0: return self.success() else: return self.failure(str(mem_leaked) + 'kB memory leaked.') ################################################################### class MemLeakSession(CoroTest): ''' run mem_leak_test.sh -2 ''' def __init__(self, cm): CoroTest.__init__(self,cm) self.name="MemLeakSession" def __call__(self, node): self.incr("calls") mem_leaked = self.CM.rsh(node, "/usr/share/corosync/tests/mem_leak_test.sh -2") if mem_leaked is 0: return self.success() else: return self.failure(str(mem_leaked) + 'kB memory leaked.') ################################################################### class CMapDispatchDeadlock(CoroTest): ''' run cmap-dispatch-deadlock.sh ''' def __init__(self, cm): CoroTest.__init__(self,cm) self.name="CMapDispatchDeadlock" def __call__(self, node): self.incr("calls") result = self.CM.rsh(node, "/usr/share/corosync/tests/cmap-dispatch-deadlock.sh") if result is 0: return self.success() else: return self.failure('Deadlock detected') ################################################################### class SamTest1(CoroTest): def __init__(self, cm): CoroTest.__init__(self, cm) self.name="SamTest1" def __call__(self, node): self.incr("calls") res = self.CM.sam_agent[node].test1() if 'OK' in res: return self.success() else: return self.failure(self.name + ' failed') ################################################################### class SamTest2(CoroTest): def __init__(self, cm): CoroTest.__init__(self, cm) self.name="SamTest2" def __call__(self, node): self.incr("calls") res = self.CM.sam_agent[node].test2() if 'OK' in res: return self.success() else: return self.failure(self.name + ' failed') ################################################################### class SamTest4(CoroTest): def __init__(self, cm): CoroTest.__init__(self, cm) self.name="SamTest4" def __call__(self, node): self.incr("calls") res = self.CM.sam_agent[node].test4() if 'OK' in res: return self.success() else: return self.failure(self.name + ' failed') ################################################################### class SamTest5(CoroTest): def __init__(self, cm): CoroTest.__init__(self, cm) self.name="SamTest5" def __call__(self, node): self.incr("calls") res = self.CM.sam_agent[node].test5() if 'OK' in res: return self.success() else: return self.failure(self.name + ' failed') ################################################################### class SamTest6(CoroTest): def __init__(self, cm): CoroTest.__init__(self, cm) self.name="SamTest6" def __call__(self, node): self.incr("calls") res = self.CM.sam_agent[node].test6() if 'OK' in res: return self.success() else: return self.failure(self.name + ' failed') ################################################################### class SamTest8(CoroTest): def __init__(self, cm): CoroTest.__init__(self, cm) self.name="SamTest8" def __call__(self, node): self.incr("calls") res = self.CM.sam_agent[node].test8() if 'OK' in res: return self.success() else: return self.failure(self.name + ' failed') ################################################################### class SamTest9(CoroTest): def __init__(self, cm): CoroTest.__init__(self, cm) self.name="SamTest9" def __call__(self, node): self.incr("calls") res = self.CM.sam_agent[node].test9() if 'OK' in res: return self.success() else: return self.failure(self.name + ' failed') class QuorumState(object): def __init__(self, cm, node): self.node = node self.CM = cm self.CM.votequorum_agent[self.node].init() def refresh(self): info = self.CM.votequorum_agent[self.node].votequorum_getinfo() assert(info != 'FAIL') assert(info != 'NOT_SUPPORTED') #self.CM.log('refresh: ' + info) params = info.split(':') self.node_votes = int(params[0]) self.expected_votes = int(params[1]) self.highest_expected = int(params[2]) self.total_votes = int(params[3]) self.quorum = int(params[4]) self.quorate = self.CM.votequorum_agent[self.node].quorum_getquorate() assert(self.quorate != 'FAIL') assert(self.quorate != 'NOT_SUPPORTED') #self.CM.log('quorate: ' + str(self.quorate)) ################################################################### class VoteQuorumBase(CoroTest): def setup(self, node): ret = CoroTest.setup(self, node) self.listener = None for n in self.CM.Env["nodes"]: if self.listener is None: self.listener = n return ret def config_valid(self, config): if config.has_key('totem/rrp_mode'): return False if config.has_key('quorum/provider'): return False return True ################################################################### class VoteQuorumGoDown(VoteQuorumBase): # all up # calc min expected votes to get Q # bring nodes down one-by-one # confirm cluster looses Q when V < EV # def __init__(self, cm): VoteQuorumBase.__init__(self, cm) self.name="VoteQuorumGoDown" self.victims = [] self.expected = len(self.CM.Env["nodes"]) self.config['quorum/provider'] = 'corosync_votequorum' self.config['quorum/expected_votes'] = self.expected #self.CM.log('set expected to %d' % (self.expected)) def __call__(self, node): self.incr("calls") self.victims = [] pats = [] pats.append("%s .*VQ notification quorate: 0" % self.listener) pats.append("%s .*NQ notification quorate: 0" % self.listener) quorum = self.create_watch(pats, 30) quorum.setwatch() state = QuorumState(self.CM, self.listener) state.refresh() for n in self.CM.Env["nodes"]: if n is self.listener: continue self.victims.append(n) self.CM.StopaCM(n) #if not self.wait_for_quorum_change(): # return self.failure(self.error_message) nodes_alive = len(self.CM.Env["nodes"]) - len(self.victims) state.refresh() #self.expected = self.expected - 1 if state.node_votes != 1: self.failure('unexpected number of node_votes') if state.expected_votes != self.expected: self.CM.log('nev: %d != exp %d' % (state.expected_votes, self.expected)) self.failure('unexpected number of expected_votes') if state.total_votes != nodes_alive: self.failure('unexpected number of total votes:%d, nodes_alive:%d' % (state.total_votes, nodes_alive)) min = ((len(self.CM.Env["nodes"]) + 2) / 2) if min != state.quorum: self.failure('we should have %d (not %d) as quorum' % (min, state.quorum)) if nodes_alive < state.quorum: if state.quorate == 1: self.failure('we should NOT have quorum(%d) %d > %d' % (state.quorate, state.quorum, nodes_alive)) else: if state.quorate == 0: self.failure('we should have quorum(%d) %d <= %d' % (state.quorate, state.quorum, nodes_alive)) if not quorum.lookforall(): self.CM.log("Patterns not found: " + repr(quorum.unmatched)) return self.failure('quorm event not found') return self.success() ################################################################### class VoteQuorumGoUp(VoteQuorumBase): # all down # calc min expected votes to get Q # bring nodes up one-by-one # confirm cluster gains Q when V >= EV def __init__(self, cm): VoteQuorumBase.__init__(self, cm) self.name="VoteQuorumGoUp" self.need_all_up = False self.expected = len(self.CM.Env["nodes"]) self.config['quorum/provider'] = 'corosync_votequorum' self.config['quorum/expected_votes'] = self.expected #self.CM.log('set expected to %d' % (self.expected)) def __call__(self, node): self.incr("calls") pats = [] pats.append("%s .*VQ notification quorate: 1" % self.listener) pats.append("%s .*NQ notification quorate: 1" % self.listener) quorum = self.create_watch(pats, 30) quorum.setwatch() self.CM.StartaCM(self.listener) nodes_alive = 1 state = QuorumState(self.CM, self.listener) state.refresh() for n in self.CM.Env["nodes"]: if n is self.listener: continue #if not self.wait_for_quorum_change(): # return self.failure(self.error_message) if state.node_votes != 1: self.failure('unexpected number of node_votes') if state.expected_votes != self.expected: self.CM.log('nev: %d != exp %d' % (state.expected_votes, self.expected)) self.failure('unexpected number of expected_votes') if state.total_votes != nodes_alive: self.failure('unexpected number of total votes') min = ((len(self.CM.Env["nodes"]) + 2) / 2) if min != state.quorum: self.failure('we should have %d (not %d) as quorum' % (min, state.quorum)) if nodes_alive < state.quorum: if state.quorate == 1: self.failure('we should NOT have quorum(%d) %d > %d' % (state.quorate, state.quorum, nodes_alive)) else: if state.quorate == 0: self.failure('we should have quorum(%d) %d <= %d' % (state.quorate, state.quorum, nodes_alive)) self.CM.StartaCM(n) nodes_alive = nodes_alive + 1 state.refresh() if not quorum.lookforall(): self.CM.log("Patterns not found: " + repr(quorum.unmatched)) return self.failure('quorm event not found') return self.success() ################################################################### class VoteQuorumWaitForAll(VoteQuorumBase): # all down # bring nodes up one-by-one # confirm cluster gains Q when V == num nodes def __init__(self, cm): VoteQuorumBase.__init__(self, cm) self.name="VoteQuorumWaitForAll" self.need_all_up = False self.expected = len(self.CM.Env["nodes"]) self.config['quorum/provider'] = 'corosync_votequorum' self.config['quorum/expected_votes'] = self.expected self.config['quorum/wait_for_all'] = '1' def __call__(self, node): self.incr("calls") pats = [] pats.append("%s .*VQ notification quorate: 1" % self.listener) pats.append("%s .*NQ notification quorate: 1" % self.listener) quorum = self.create_watch(pats, 30) quorum.setwatch() # make absolutly all are stopped for n in self.CM.Env["nodes"]: self.CM.StopaCM(n) # start the listener self.CM.StartaCM(self.listener) nodes_alive = 1 state = QuorumState(self.CM, self.listener) state.refresh() for n in self.CM.Env["nodes"]: if n is self.listener: continue self.CM.StartaCM(n) nodes_alive = nodes_alive + 1 state.refresh() if state.node_votes != 1: self.failure('unexpected number of node_votes') if state.expected_votes != self.expected: self.CM.log('nev: %d != exp %d' % (state.expected_votes, self.expected)) self.failure('unexpected number of expected_votes') if state.total_votes != nodes_alive: self.failure('unexpected number of total votes') if nodes_alive < len(self.CM.Env["nodes"]): if state.quorate == 1: self.failure('we should NOT have quorum(%d) %d > %d' % (state.quorate, len(self.CM.Env["nodes"]), nodes_alive)) else: if state.quorate == 0: self.failure('we should have quorum(%d) %d <= %d' % (state.quorate, len(self.CM.Env["nodes"]), nodes_alive)) if not quorum.lookforall(): self.CM.log("Patterns not found: " + repr(quorum.unmatched)) return self.failure('quorm event not found') return self.success() ################################################################### class VoteQuorumContextTest(CoroTest): def __init__(self, cm): CoroTest.__init__(self, cm) self.name="VoteQuorumContextTest" self.expected = len(self.CM.Env["nodes"]) self.config['quorum/provider'] = 'corosync_votequorum' self.config['quorum/expected_votes'] = self.expected def __call__(self, node): self.incr("calls") res = self.CM.votequorum_agent[node].context_test() if 'OK' in res: return self.success() else: return self.failure('context_test failed') ################################################################### class GenSimulStart(CoroTest): '''Start all the nodes ~ simultaneously''' def __init__(self, cm): CoroTest.__init__(self,cm) self.name="GenSimulStart" self.need_all_up = False self.stopall = SimulStopLite(cm) self.startall = SimulStartLite(cm) def __call__(self, dummy): '''Perform the 'SimulStart' test. ''' self.incr("calls") # We ignore the "node" parameter... # Shut down all the nodes... ret = self.stopall(None) if not ret: return self.failure("Setup failed") self.CM.clear_all_caches() if not self.startall(None): return self.failure("Startall failed") return self.success() ################################################################### class GenSimulStop(CoroTest): '''Stop all the nodes ~ simultaneously''' def __init__(self, cm): CoroTest.__init__(self,cm) self.name="GenSimulStop" self.startall = SimulStartLite(cm) self.stopall = SimulStopLite(cm) self.need_all_up = True def __call__(self, dummy): '''Perform the 'GenSimulStop' test. ''' self.incr("calls") # We ignore the "node" parameter... # Start up all the nodes... ret = self.startall(None) if not ret: return self.failure("Setup failed") if not self.stopall(None): return self.failure("Stopall failed") return self.success() class GenFlipTest(CoroTest): def __init__(self, cm): CoroTest.__init__(self,cm) self.name="GenFlipTest" self.test = FlipTest(cm) def __call__(self, dummy): '''Perform the test. ''' self.incr("calls") return self.test.__call__(dummy) class GenRestartTest(CoroTest): def __init__(self, cm): CoroTest.__init__(self,cm) self.name="GenRestartTest" self.test = RestartTest(cm) def __call__(self, dummy): '''Perform the test. ''' self.incr("calls") return self.test.__call__(dummy) class GenStartOnebyOne(CoroTest): def __init__(self, cm): CoroTest.__init__(self,cm) self.name="GenStartOnebyOne" self.test = RestartOnebyOne(cm) def __call__(self, dummy): '''Perform the test. ''' self.incr("calls") return self.test.__call__(dummy) class GenStopOnebyOne(CoroTest): def __init__(self, cm): CoroTest.__init__(self,cm) self.name="GenStopOnebyOne" self.test = StopOnebyOne(cm) def __call__(self, dummy): '''Perform the test. ''' self.incr("calls") return self.test.__call__(dummy) class GenRestartOnebyOne(CoroTest): def __init__(self, cm): CoroTest.__init__(self,cm) self.name="GenRestartOnebyOne" self.test = RestartOnebyOne(cm) def __call__(self, dummy): '''Perform the test. ''' self.incr("calls") return self.test.__call__(dummy) ################################################################### class GenStopAllBeekhof(CoroTest): '''Stop all the nodes ~ simultaneously''' def __init__(self, cm): CoroTest.__init__(self,cm) self.name="GenStopAllBeekhof" self.need_all_up = True self.config['logging/logger_subsys[2]/subsys'] = 'CFG' self.config['logging/logger_subsys[2]/debug'] = 'on' def __call__(self, node): '''Perform the 'GenStopAllBeekhof' test. ''' self.incr("calls") stopping = int(time.time()) for n in self.CM.Env["nodes"]: self.CM.cpg_agent[n].pcmk_test() for n in self.CM.Env["nodes"]: self.CM.cpg_agent[n].msg_blaster(1000) for n in self.CM.Env["nodes"]: self.CM.cpg_agent[n].cfg_shutdown() self.CM.ShouldBeStatus[n] = "down" waited = 0 max_wait = 60 * 15 still_up = list(self.CM.Env["nodes"]) while len(still_up) > 0: waited = int(time.time()) - stopping self.CM.log("%s still up %s; waited %d secs" % (self.name, str(still_up), waited)) if waited > max_wait: break time.sleep(3) for v in self.CM.Env["nodes"]: if v in still_up: self.CM.ShouldBeStatus[n] = "down" if not self.CM.StataCM(v): still_up.remove(v) waited = int(time.time()) - stopping if waited > max_wait: return self.failure("Waited %d secs for nodes: %s to stop" % (waited, str(still_up))) self.CM.log("%s ALL good (waited %d secs)" % (self.name, waited)) return self.success() ################################################################### class NoWDConfig(CoroTest): '''Assertion: no config == no watchdog Setup: no config, kmod inserted 1] make sure watchdog is not enabled ''' def __init__(self, cm): CoroTest.__init__(self,cm) self.name="NoWDConfig" self.need_all_up = False def config_valid(self, config): return not config.has_key('resources') def __call__(self, node): '''Perform the 'NoWDConfig' test. ''' self.incr("calls") self.CM.StopaCM(node) pats = [] pats.append("%s .*no resources configured." % node) w = self.create_watch(pats, 60) w.setwatch() self.CM.StartaCM(node) if not w.lookforall(): return self.failure("Patterns not found: " + repr(w.unmatched)) else: return self.success() ################################################################### class WDConfigNoWd(CoroTest): '''Assertion: watchdog config but no watchdog kmod will emit a log Setup: config watchdog, but no kmod 1] look in the log for warning that there is no kmod ''' def __init__(self, cm): CoroTest.__init__(self,cm) self.name="WDConfigNoWd" self.need_all_up = False def __call__(self, node): '''Perform the 'WDConfigNoWd' test. ''' self.incr("calls") self.CM.StopaCM(node) self.CM.rsh(node, 'rmmod softdog') pats = [] pats.append("%s .*No Watchdog, try modprobe.*" % node) w = self.create_watch(pats, 60) w.setwatch() self.CM.StartaCM(node) if not w.lookforall(): return self.failure("Patterns not found: " + repr(w.unmatched)) else: return self.success() ################################################################### class NoWDOnCorosyncStop(CoroTest): '''Configure WD then /etc/init.d/corosync stop must stay up for > 60 secs ''' def __init__(self, cm): CoroTest.__init__(self,cm) self.name="NoWDOnCorosyncStop" self.need_all_up = False def __call__(self, node): '''Perform the test. ''' self.incr("calls") self.CM.StopaCM(node) self.CM.rsh(node, 'modprobe softdog') self.CM.StartaCM(node) pats = [] pats.append("%s .*Unexpected close, not stopping watchdog.*" % node) w = self.create_watch(pats, 60) w.setwatch() self.CM.StopaCM(node) if w.lookforall(): return self.failure("Should have closed the WD better: " + repr(w.matched)) else: return self.success() ################################################################### class WDOnForkBomb(CoroTest): '''Configure memory resource run memory leaker / forkbomb confirm watchdog action ''' def __init__(self, cm): CoroTest.__init__(self,cm) self.name="WDOnForkBomb" self.need_all_up = False self.config['logging/logger_subsys[2]/subsys'] = 'WD' self.config['logging/logger_subsys[2]/debug'] = 'on' self.config['resources/system/memory_used/recovery'] = 'watchdog' self.config['resources/system/memory_used/max'] = '80' self.config['resources/system/memory_used/poll_period'] = '800' def __call__(self, node): '''Perform the test. ''' self.incr("calls") # get the uptime up_before = self.CM.rsh(node, 'cut -d. -f1 /proc/uptime', 1).rstrip() self.CM.StopaCM(node) self.CM.rsh(node, 'modprobe softdog') self.CM.StartaCM(node) self.CM.rsh(node, ':(){ :|:& };:', synchronous=0) self.CM.log("wait for it to watchdog") time.sleep(60 * 5) ping_able = False while not ping_able: if self.CM.rsh("localhost", "ping -nq -c10 -w10 %s" % node) == 0: ping_able = True self.CM.log("can ping 10 in 10secs.") else: self.CM.log("not yet responding to pings.") self.CM.ShouldBeStatus[node] = "down" # wait for the node to come back up self.CM.log("waiting for node to come back up.") if self.CM.ns.WaitForNodeToComeUp(node): up_after = self.CM.rsh(node, 'cut -d. -f1 /proc/uptime', 1).rstrip() if int(up_after) < int(up_before): return self.success() else: return self.failure("node didn't seem to watchdog uptime 1 %s; 2 %s" %(up_before, up_after)) else: return self.failure("node didn't seem to come back up") ################################################################### class SamWdIntegration1(CoroTest): '''start sam hc kill agent confirm action ''' def __init__(self, cm): CoroTest.__init__(self,cm) self.name="SamWdIntegration1" self.need_all_up = True self.config['logging/logger_subsys[2]/subsys'] = 'WD' self.config['logging/logger_subsys[2]/debug'] = 'on' def __call__(self, node): '''Perform the test. ''' self.incr("calls") self.CM.sam_agent[node].setup_hc() pids = self.CM.sam_agent[node].getpid().rstrip().split(" ") pats = [] for pid in pids: pats.append('%s .*resource "%s" failed!' % (node, pid)) w = self.create_watch(pats, 60) w.setwatch() self.CM.sam_agent[node].kill() look_result = w.look() if not look_result: return self.failure("Patterns not found: " + repr(w.regexes)) else: return self.success() ################################################################### class SamWdIntegration2(CoroTest): '''start sam hc call sam_stop() confirm resource "stopped" and no watchdog action. ''' def __init__(self, cm): CoroTest.__init__(self,cm) self.name="SamWdIntegration2" self.need_all_up = True self.config['logging/logger_subsys[2]/subsys'] = 'WD' self.config['logging/logger_subsys[2]/debug'] = 'on' def __call__(self, node): '''Perform the test. ''' self.incr("calls") self.CM.sam_agent[node].setup_hc() pids = self.CM.sam_agent[node].getpid().rstrip().split(" ") no_pats = [] yes_pats = [] for pid in pids: no_pats.append('%s .*resource "%s" failed!' % (node, pid)) yes_pats.append('%s .*Fsm:%s event "config_changed", state "running" --> "stopped"' % (node, pid)) yes_w = self.create_watch(yes_pats, 10) no_w = self.create_watch(no_pats, 10) yes_w.setwatch() no_w.setwatch() time.sleep(2) self.CM.sam_agent[node].sam_stop() yes_matched = yes_w.look() no_matched = no_w.look() if no_matched: return self.failure("Patterns found: " + repr(no_matched)) else: if not yes_matched: return self.failure("Patterns NOT found: " + repr(yes_w.regexes)) return self.success() ################################################################### class WdDeleteResource(CoroTest): '''config resource & start corosync check that it is getting checked delete the object resource object check that we do NOT get watchdog'ed ''' def __init__(self, cm): CoroTest.__init__(self,cm) self.name="WdDeleteResource" self.need_all_up = True self.config['logging/logger_subsys[2]/subsys'] = 'MON' self.config['logging/logger_subsys[2]/debug'] = 'on' self.config['logging/logger_subsys[3]/subsys'] = 'WD' self.config['logging/logger_subsys[3]/debug'] = 'on' self.config['resources/system/memory_used/recovery'] = 'watchdog' self.config['resources/system/memory_used/max'] = '80' self.config['resources/system/memory_used/poll_period'] = '800' def __call__(self, node): '''Perform the test. ''' self.incr("calls") no_pats = [] yes_pats = [] no_pats.append('%s .*resource "memory_used" failed!' % node) yes_pats.append('%s .*resource "memory_used" deleted from cmap!' % node) yes_w = self.create_watch(yes_pats, 10) no_w = self.create_watch(no_pats, 10) yes_w.setwatch() no_w.setwatch() time.sleep(2) self.CM.rsh(node, 'corosync-cmapctl -D resources.system.memory_used') yes_matched = yes_w.look() no_matched = no_w.look() if no_matched: return self.failure("Patterns found: " + repr(no_matched)) else: if not yes_matched: return self.failure("Patterns NOT found: " + repr(yes_w.regexes)) return self.success() ################################################################### class ResourcePollAdjust(CoroTest): '''config resource & start corosync change the poll_period check that we do NOT get watchdog'ed ''' def __init__(self, cm): CoroTest.__init__(self,cm) self.name="ResourcePollAdjust" self.need_all_up = True self.config['logging/logger_subsys[2]/subsys'] = 'MON' self.config['logging/logger_subsys[2]/debug'] = 'on' self.config['logging/logger_subsys[3]/subsys'] = 'WD' self.config['logging/logger_subsys[3]/debug'] = 'on' self.config['resources/system/memory_used/recovery'] = 'none' self.config['resources/system/memory_used/max'] = '80' self.config['resources/system/memory_used/poll_period'] = '800' def __call__(self, node): '''Perform the test. ''' self.incr("calls") no_pats = [] no_pats.append('%s .*resource "memory_used" failed!' % node) no_pats.append('%s .*Could NOT use poll_period.*' % node) no_w = self.create_watch(no_pats, 10) no_w.setwatch() changes = 0 while changes < 50: changes = changes + 1 poll_period = int(random.random() * 5000) if poll_period < 500: poll_period = 500 self.CM.log("setting poll_period to: %d" % poll_period) self.CM.rsh(node, 'corosync-cmapctl -s resources.system.memory_used.poll_period str %d' % poll_period) sleep_time = poll_period * 2 / 1000 if sleep_time < 1: sleep_time = 1 time.sleep(sleep_time) no_matched = no_w.look() if no_matched: return self.failure("Patterns found: " + repr(no_matched)) return self.success() ################################################################### class RebootOnHighMem(CoroTest): '''Configure memory resource run memory leaker / forkbomb confirm reboot action ''' def __init__(self, cm): CoroTest.__init__(self,cm) self.name="RebootOnHighMem" self.need_all_up = True self.config['logging/logger_subsys[2]/subsys'] = 'WD' self.config['logging/logger_subsys[2]/debug'] = 'on' self.config['resources/system/memory_used/recovery'] = 'reboot' self.config['resources/system/memory_used/max'] = '80' self.config['resources/system/memory_used/poll_period'] = '800' def __call__(self, node): '''Perform the test. ''' self.incr("calls") # get the uptime up_before = self.CM.rsh(node, 'cut -d. -f1 /proc/uptime', 1).rstrip() cmd = 'corosync-cmapctl resources.system.memory_used. | grep current | cut -d= -f2' mem_current_str = self.CM.rsh(node, cmd, 1).rstrip() mem_new_max = int(mem_current_str) + 5 self.CM.log("current mem usage: %s, new max:%d" % (mem_current_str, mem_new_max)) cmd = 'corosync-cmapctl -s resources.system.memory_used.max str ' + str(mem_new_max) self.CM.rsh(node, cmd) self.CM.rsh(node, 'memhog -r10000 200m', synchronous=0) self.CM.log("wait for it to reboot") time.sleep(60 * 3) cmd = 'corosync-cmapctl resources.system.memory_used. | grep current | cut -d= -f2' mem_current_str = self.CM.rsh(node, cmd, 1).rstrip() self.CM.log("current mem usage: %s" % (mem_current_str)) ping_able = False while not ping_able: if self.CM.rsh("localhost", "ping -nq -c10 -w10 %s" % node) == 0: ping_able = True self.CM.log("can ping 10 in 10secs.") else: self.CM.log("not yet responding to pings.") self.CM.ShouldBeStatus[node] = "down" # wait for the node to come back up self.CM.log("waiting for node to come back up.") if self.CM.ns.WaitForNodeToComeUp(node): up_after = self.CM.rsh(node, 'cut -d. -f1 /proc/uptime', 1).rstrip() if int(up_after) < int(up_before): return self.success() else: return self.failure("node didn't seem to watchdog uptime 1 %s; 2 %s" %(up_before, up_after)) else: return self.failure("node didn't seem to come back up") GenTestClasses = [] GenTestClasses.append(GenSimulStart) GenTestClasses.append(GenSimulStop) GenTestClasses.append(GenFlipTest) GenTestClasses.append(GenRestartTest) GenTestClasses.append(GenStartOnebyOne) GenTestClasses.append(GenStopOnebyOne) GenTestClasses.append(GenRestartOnebyOne) GenTestClasses.append(GenStopAllBeekhof) GenTestClasses.append(CpgMsgOrderBasic) GenTestClasses.append(CpgMsgOrderZcb) GenTestClasses.append(CpgCfgChgOnExecCrash) GenTestClasses.append(CpgCfgChgOnGroupLeave) GenTestClasses.append(CpgCfgChgOnNodeLeave) GenTestClasses.append(CpgCfgChgOnNodeIsolate) #GenTestClasses.append(CpgCfgChgOnNodeRestart) AllTestClasses = [] AllTestClasses.append(CpgContextTest) AllTestClasses.append(SamTest1) AllTestClasses.append(SamTest2) AllTestClasses.append(SamTest4) AllTestClasses.append(SamTest5) AllTestClasses.append(SamTest6) AllTestClasses.append(SamTest8) AllTestClasses.append(SamTest9) AllTestClasses.append(SamWdIntegration1) AllTestClasses.append(SamWdIntegration2) AllTestClasses.append(NoWDConfig) AllTestClasses.append(WDConfigNoWd) AllTestClasses.append(NoWDOnCorosyncStop) #AllTestClasses.append(WDOnForkBomb) AllTestClasses.append(WdDeleteResource) #AllTestClasses.append(RebootOnHighMem) AllTestClasses.append(ResourcePollAdjust) AllTestClasses.append(MemLeakObject) AllTestClasses.append(MemLeakSession) #AllTestClasses.append(CMapDispatchDeadlock) # quorum tests AllTestClasses.append(VoteQuorumContextTest) GenTestClasses.append(VoteQuorumGoDown) GenTestClasses.append(VoteQuorumGoUp) GenTestClasses.append(VoteQuorumWaitForAll) # FIXME need log messages in sync #GenTestClasses.append(CpgCfgChgOnLowestNodeJoin) class ConfigContainer(UserDict): def __init__ (self, name): self.name = name UserDict.__init__(self) def CoroTestList(cm, audits): result = [] configs = [] for testclass in AllTestClasses: bound_test = testclass(cm) if bound_test.is_applicable(): bound_test.Audits = audits result.append(bound_test) default = ConfigContainer('default') default['logging/fileline'] = 'on' default['logging/function_name'] = 'off' default['logging/logfile_priority'] = 'info' default['logging/syslog_priority'] = 'info' default['logging/syslog_facility'] = 'daemon' default['uidgid/uid'] = '0' default['uidgid/gid'] = '0' configs.append(default) a = ConfigContainer('none_5min') a['totem/token'] = (5 * 60 * 1000) a['totem/consensus'] = int(5 * 60 * 1000 * 1.2) + 1 configs.append(a) b = ConfigContainer('pcmk_basic') b['totem/token'] = 5000 b['totem/token_retransmits_before_loss_const'] = 10 b['totem/join'] = 1000 b['totem/consensus'] = 7500 configs.append(b) c = ConfigContainer('pcmk_sec_nss') c['totem/secauth'] = 'on' c['totem/crypto_type'] = 'nss' c['totem/token'] = 5000 c['totem/token_retransmits_before_loss_const'] = 10 c['totem/join'] = 1000 c['totem/consensus'] = 7500 configs.append(c) # # s = ConfigContainer('pcmk_vq') # s['quorum/provider'] = 'corosync_votequorum' # s['quorum/expected_votes'] = len(cm.Env["nodes"]) # s['totem/token'] = 5000 # s['totem/token_retransmits_before_loss_const'] = 10 # s['totem/join'] = 1000 # s['totem/vsftype'] = 'none' # s['totem/consensus'] = 7500 # s['totem/max_messages'] = 20 # configs.append(s) # - d = ConfigContainer('sec_sober') + d = ConfigContainer('sec_nss') d['totem/secauth'] = 'on' - d['totem/crypto_type'] = 'sober' + d['totem/crypto_type'] = 'nss' configs.append(d) if not cm.Env["RrpBindAddr"] is None: g = ConfigContainer('rrp_passive') g['totem/rrp_mode'] = 'passive' g['totem/interface[2]/ringnumber'] = '1' g['totem/interface[2]/bindnetaddr'] = cm.Env["RrpBindAddr"] g['totem/interface[2]/mcastaddr'] = '226.94.1.2' g['totem/interface[2]/mcastport'] = '5405' configs.append(g) h = ConfigContainer('rrp_active') h['totem/rrp_mode'] = 'active' h['totem/interface[2]/ringnumber'] = '1' h['totem/interface[2]/bindnetaddr'] = cm.Env["RrpBindAddr"] h['totem/interface[2]/mcastaddr'] = '226.94.1.2' h['totem/interface[2]/mcastport'] = '5405' configs.append(h) else: print 'Not including rrp tests. Use --rrp-binaddr to enable them.' num=1 for cfg in configs: for testclass in GenTestClasses: bound_test = testclass(cm) if bound_test.is_applicable() and bound_test.config_valid(cfg): bound_test.Audits = audits for c in cfg.keys(): bound_test.config[c] = cfg[c] bound_test.name = bound_test.name + '_' + cfg.name result.append(bound_test) num = num + 1 return result diff --git a/exec/Makefile.am b/exec/Makefile.am index 78f3a3ee..c481c0ae 100644 --- a/exec/Makefile.am +++ b/exec/Makefile.am @@ -1,113 +1,113 @@ # Copyright (c) 2009 Red Hat, Inc. # # Authors: Andrew Beekhof # Steven Dake (sdake@redhat.com) # # This software licensed under BSD license, the text of which follows: # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # - Redistributions of source code must retain the above copyright notice, # this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # - Neither the name of the MontaVista Software, Inc. nor the names of its # contributors may be used to endorse or promote products derived from this # software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF # THE POSSIBILITY OF SUCH DAMAGE. MAINTAINERCLEANFILES = Makefile.in AM_CFLAGS = -fPIC INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include $(nss_CFLAGS) $(rdmacm_CFLAGS) $(ibverbs_CFLAGS) TOTEM_SRC = totemip.c totemnet.c totemudp.c \ totemudpu.c totemrrp.c totemsrp.c totemmrp.c \ - totempg.c crypto.c cs_queue.h + totempg.c cs_queue.h if BUILD_RDMA TOTEM_SRC += totemiba.c endif lib_LIBRARIES = libtotem_pg.a sbin_PROGRAMS = corosync libtotem_pg_a_SOURCES = $(TOTEM_SRC) corosync_SOURCES = vsf_ykd.c coroparse.c vsf_quorum.c sync.c \ logsys.c cfg.c cmap.c cpg.c mon.c pload.c \ votequorum.c wd.c util.c schedwrk.c main.c \ apidef.c quorum.c icmap.c timer.c \ ipc_glue.c service.c mainconfig.c totemconfig.c corosync_LDADD = -ltotem_pg -lcorosync_common $(LIBQB_LIBS) $(statgrab_LIBS) corosync_DEPENDENCIES = libtotem_pg.so.$(SONAME) ../common_lib/libcorosync_common.so corosync_LDFLAGS = $(OS_DYFLAGS) -L./ -L../common_lib TOTEM_OBJS = $(TOTEM_SRC:%.c=%.o) SHARED_LIBS = $(lib_LIBRARIES:%.a=%.so.$(SONAME)) SHARED_LIBS_SO = $(SHARED_LIBS:%.so.$(SONAME)=%.so) SHARED_LIBS_SO_TWO = $(SHARED_LIBS:%.so.$(SONAME)=%.so.$(SOMAJOR)) -noinst_HEADERS = apidef.h crypto.h mainconfig.h main.h \ +noinst_HEADERS = apidef.h mainconfig.h main.h \ quorum.h service.h timer.h totemconfig.h \ totemmrp.h totemnet.h totemudp.h totemiba.h totemrrp.h \ totemudpu.h totemsrp.h util.h vsf.h schedwrk.h \ sync.h fsm.h votequorum.h vsf_ykd.h if BUILD_DARWIN libtotem_pg.so.$(SONAME): $(TOTEM_OBJS) $(CC) $(LDFLAGS) $(DARWIN_OPTS) $(TOTEM_OBJS) -o $@ -lpthread ln -sf libtotem_pg.so.$(SONAME) libtotem_pg.so ln -sf libtotem_pg.so.$(SONAME) libtotem_pg.so.$(SOMAJOR) else if BUILD_SOLARIS libtotem_pg.so.$(SONAME): $(TOTEM_OBJS) $(LD) $(LDFLAGS) -G $(TOTEM_OBJS) -o $@ -lpthread ln -sf libtotem_pg.so.$(SONAME) libtotem_pg.so ln -sf libtotem_pg.so.$(SONAME) libtotem_pg.so.$(SOMAJOR) else libtotem_pg.so.$(SONAME): $(TOTEM_OBJS) $(CC) -shared -o $@ \ -Wl,-soname=libtotem_pg.so.$(SOMAJOR) \ $(LDFLAGS) $^ $(nss_LIBS) $(rdmacm_LIBS) $(ibverbs_LIBS) -lpthread ln -sf libtotem_pg.so.$(SONAME) libtotem_pg.so ln -sf libtotem_pg.so.$(SONAME) libtotem_pg.so.$(SOMAJOR) endif endif lint: -splint $(INCLUDES) $(LINT_FLAGS) $(CFLAGS) *.c all-local: $(SHARED_LIBS) @echo Built corosync Executive install-exec-local: $(INSTALL) -d $(DESTDIR)/$(libdir) $(INSTALL) -m 755 $(SHARED_LIBS) $(DESTDIR)/$(libdir) $(CP) -a $(SHARED_LIBS_SO) $(SHARED_LIBS_SO_TWO) $(DESTDIR)/$(libdir) uninstall-local: cd $(DESTDIR)/$(libdir) && \ rm -f $(SHARED_LIBS) $(SHARED_LIBS_SO) $(SHARED_LIBS_SO_TWO) clean-local: rm -f corosync *.o gmon.out *.da *.bb *.bbg *.so* diff --git a/exec/coroparse.c b/exec/coroparse.c index e9081ff3..5efffc0d 100644 --- a/exec/coroparse.c +++ b/exec/coroparse.c @@ -1,1070 +1,1078 @@ /* * Copyright (c) 2006-2012 Red Hat, Inc. * * All rights reserved. * * Author: Patrick Caulfield (pcaulfie@redhat.com) * Jan Friesse (jfriesse@redhat.com) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define LOGSYS_UTILS_ONLY 1 #include #include #include "main.h" #include "util.h" enum parser_cb_type { PARSER_CB_START, PARSER_CB_END, PARSER_CB_SECTION_START, PARSER_CB_SECTION_END, PARSER_CB_ITEM, }; typedef int (*parser_cb_f)(const char *path, char *key, char *value, enum parser_cb_type type, const char **error_string, void *user_data); enum main_cp_cb_data_state { MAIN_CP_CB_DATA_STATE_NORMAL, MAIN_CP_CB_DATA_STATE_TOTEM, MAIN_CP_CB_DATA_STATE_INTERFACE, MAIN_CP_CB_DATA_STATE_LOGGER_SUBSYS, MAIN_CP_CB_DATA_STATE_UIDGID, MAIN_CP_CB_DATA_STATE_LOGGING_DAEMON, MAIN_CP_CB_DATA_STATE_MEMBER, MAIN_CP_CB_DATA_STATE_QUORUM, MAIN_CP_CB_DATA_STATE_QDEVICE, MAIN_CP_CB_DATA_STATE_NODELIST, MAIN_CP_CB_DATA_STATE_NODELIST_NODE, MAIN_CP_CB_DATA_STATE_PLOAD }; struct key_value_list_item { char *key; char *value; struct list_head list; }; struct main_cp_cb_data { enum main_cp_cb_data_state state; int ringnumber; char *bindnetaddr; char *mcastaddr; char *broadcast; int mcastport; int ttl; struct list_head logger_subsys_items_head; char *subsys; char *logging_daemon_name; struct list_head member_items_head; int node_number; int ring0_addr_added; }; static int read_config_file_into_icmap( const char **error_string); static char error_string_response[512]; static int uid_determine (const char *req_user) { int pw_uid = 0; struct passwd passwd; struct passwd* pwdptr = &passwd; struct passwd* temp_pwd_pt; char *pwdbuffer; int pwdlinelen; pwdlinelen = sysconf (_SC_GETPW_R_SIZE_MAX); if (pwdlinelen == -1) { pwdlinelen = 256; } pwdbuffer = malloc (pwdlinelen); if ((getpwnam_r (req_user, pwdptr, pwdbuffer, pwdlinelen, &temp_pwd_pt)) != 0) { sprintf (error_string_response, "The '%s' user is not found in /etc/passwd, please read the documentation.", req_user); return (-1); } pw_uid = passwd.pw_uid; free (pwdbuffer); return pw_uid; } static int gid_determine (const char *req_group) { int corosync_gid = 0; struct group group; struct group * grpptr = &group; struct group * temp_grp_pt; char *grpbuffer; int grplinelen; grplinelen = sysconf (_SC_GETGR_R_SIZE_MAX); if (grplinelen == -1) { grplinelen = 256; } grpbuffer = malloc (grplinelen); if ((getgrnam_r (req_group, grpptr, grpbuffer, grplinelen, &temp_grp_pt)) != 0) { sprintf (error_string_response, "The '%s' group is not found in /etc/group, please read the documentation.", req_group); return (-1); } corosync_gid = group.gr_gid; free (grpbuffer); return corosync_gid; } static char *strchr_rs (const char *haystack, int byte) { const char *end_address = strchr (haystack, byte); if (end_address) { end_address += 1; /* skip past { or = */ while (*end_address == ' ' || *end_address == '\t') end_address++; } return ((char *) end_address); } int coroparse_configparse (const char **error_string) { if (read_config_file_into_icmap(error_string)) { return -1; } return 0; } static char *remove_whitespace(char *string) { char *start; char *end; start = string; while (*start == ' ' || *start == '\t') start++; end = start+(strlen(start))-1; while ((*end == ' ' || *end == '\t' || *end == ':' || *end == '{') && end > start) end--; if (end != start) *(end+1) = '\0'; return start; } static int parse_section(FILE *fp, char *path, const char **error_string, parser_cb_f parser_cb, void *user_data) { char line[512]; int i; char *loc; int ignore_line; char new_keyname[ICMAP_KEYNAME_MAXLEN]; if (strcmp(path, "") == 0) { parser_cb("", NULL, NULL, PARSER_CB_START, error_string, user_data); } while (fgets (line, sizeof (line), fp)) { if (strlen(line) > 0) { if (line[strlen(line) - 1] == '\n') line[strlen(line) - 1] = '\0'; if (strlen (line) > 0 && line[strlen(line) - 1] == '\r') line[strlen(line) - 1] = '\0'; } /* * Clear out white space and tabs */ for (i = strlen (line) - 1; i > -1; i--) { if (line[i] == '\t' || line[i] == ' ') { line[i] = '\0'; } else { break; } } ignore_line = 1; for (i = 0; i < strlen (line); i++) { if (line[i] != '\t' && line[i] != ' ') { if (line[i] != '#') ignore_line = 0; break; } } /* * Clear out comments and empty lines */ if (ignore_line) { continue; } /* New section ? */ if ((loc = strchr_rs (line, '{'))) { char *section = remove_whitespace(line); loc--; *loc = '\0'; strcpy(new_keyname, path); if (strcmp(path, "") != 0) { strcat(new_keyname, "."); } strcat(new_keyname, section); if (!parser_cb(new_keyname, NULL, NULL, PARSER_CB_SECTION_START, error_string, user_data)) { return -1; } if (parse_section(fp, new_keyname, error_string, parser_cb, user_data)) return -1; } /* New key/value */ if ((loc = strchr_rs (line, ':'))) { char *key; char *value; *(loc-1) = '\0'; key = remove_whitespace(line); value = remove_whitespace(loc); strcpy(new_keyname, path); if (strcmp(path, "") != 0) { strcat(new_keyname, "."); } strcat(new_keyname, key); if (!parser_cb(new_keyname, key, value, PARSER_CB_ITEM, error_string, user_data)) { return -1; } } if (strchr_rs (line, '}')) { if (!parser_cb(path, NULL, NULL, PARSER_CB_SECTION_END, error_string, user_data)) { return -1; } return 0; } } if (strcmp(path, "") != 0) { *error_string = "Missing closing brace"; return -1; } if (strcmp(path, "") == 0) { parser_cb("", NULL, NULL, PARSER_CB_END, error_string, user_data); } return 0; } static int safe_atoi(const char *str, int *res) { int val; char *endptr; errno = 0; val = strtol(str, &endptr, 10); if (errno == ERANGE) { return (-1); } if (endptr == str) { return (-1); } if (*endptr != '\0') { return (-1); } *res = val; return (0); } static int main_config_parser_cb(const char *path, char *key, char *value, enum parser_cb_type type, const char **error_string, void *user_data) { int i; int add_as_string; char key_name[ICMAP_KEYNAME_MAXLEN]; static char formated_err[256]; struct main_cp_cb_data *data = (struct main_cp_cb_data *)user_data; struct key_value_list_item *kv_item; struct list_head *iter, *iter_next; int uid, gid; switch (type) { case PARSER_CB_START: memset(data, 0, sizeof(struct main_cp_cb_data)); data->state = MAIN_CP_CB_DATA_STATE_NORMAL; break; case PARSER_CB_END: break; case PARSER_CB_ITEM: add_as_string = 1; switch (data->state) { case MAIN_CP_CB_DATA_STATE_NORMAL: break; case MAIN_CP_CB_DATA_STATE_PLOAD: if ((strcmp(path, "pload.count") == 0) || (strcmp(path, "pload.size") == 0)) { if (safe_atoi(value, &i) != 0) { goto atoi_error; } icmap_set_uint32(path, i); add_as_string = 0; } break; case MAIN_CP_CB_DATA_STATE_QUORUM: if ((strcmp(path, "quorum.expected_votes") == 0) || (strcmp(path, "quorum.votes") == 0) || (strcmp(path, "quorum.last_man_standing_window") == 0) || (strcmp(path, "quorum.leaving_timeout") == 0)) { if (safe_atoi(value, &i) != 0) { goto atoi_error; } icmap_set_uint32(path, i); add_as_string = 0; } if ((strcmp(path, "quorum.two_node") == 0) || (strcmp(path, "quorum.allow_downscale") == 0) || (strcmp(path, "quorum.wait_for_all") == 0) || (strcmp(path, "quorum.auto_tie_breaker") == 0) || (strcmp(path, "quorum.last_man_standing") == 0)) { if (safe_atoi(value, &i) != 0) { goto atoi_error; } icmap_set_uint8(path, i); add_as_string = 0; } break; case MAIN_CP_CB_DATA_STATE_QDEVICE: if ((strcmp(path, "quorum.device.timeout") == 0) || (strcmp(path, "quorum.device.votes") == 0)) { if (safe_atoi(value, &i) != 0) { goto atoi_error; } icmap_set_uint32(path, i); add_as_string = 0; } case MAIN_CP_CB_DATA_STATE_TOTEM: if ((strcmp(path, "totem.version") == 0) || (strcmp(path, "totem.nodeid") == 0) || (strcmp(path, "totem.threads") == 0) || (strcmp(path, "totem.token") == 0) || (strcmp(path, "totem.token_retransmit") == 0) || (strcmp(path, "totem.hold") == 0) || (strcmp(path, "totem.token_retransmits_before_loss_const") == 0) || (strcmp(path, "totem.join") == 0) || (strcmp(path, "totem.send_join") == 0) || (strcmp(path, "totem.consensus") == 0) || (strcmp(path, "totem.merge") == 0) || (strcmp(path, "totem.downcheck") == 0) || (strcmp(path, "totem.fail_recv_const") == 0) || (strcmp(path, "totem.seqno_unchanged_const") == 0) || (strcmp(path, "totem.rrp_token_expired_timeout") == 0) || (strcmp(path, "totem.rrp_problem_count_timeout") == 0) || (strcmp(path, "totem.rrp_problem_count_threshold") == 0) || (strcmp(path, "totem.rrp_problem_count_mcast_threshold") == 0) || (strcmp(path, "totem.rrp_autorecovery_check_timeout") == 0) || (strcmp(path, "totem.heartbeat_failures_allowed") == 0) || (strcmp(path, "totem.max_network_delay") == 0) || (strcmp(path, "totem.window_size") == 0) || (strcmp(path, "totem.max_messages") == 0) || (strcmp(path, "totem.miss_count_const") == 0) || (strcmp(path, "totem.netmtu") == 0)) { if (safe_atoi(value, &i) != 0) { goto atoi_error; } icmap_set_uint32(path, i); add_as_string = 0; } + if (strcmp(path, "totem.crypto_type") == 0) { + if ((strcmp(value, "nss") != 0) && + (strcmp(value, "aes256") != 0)) { + *error_string = "Invalid crypto type"; + + return (0); + } + } break; case MAIN_CP_CB_DATA_STATE_INTERFACE: if (strcmp(path, "totem.interface.ringnumber") == 0) { if (safe_atoi(value, &i) != 0) { goto atoi_error; } data->ringnumber = i; add_as_string = 0; } if (strcmp(path, "totem.interface.bindnetaddr") == 0) { data->bindnetaddr = strdup(value); add_as_string = 0; } if (strcmp(path, "totem.interface.mcastaddr") == 0) { data->mcastaddr = strdup(value); add_as_string = 0; } if (strcmp(path, "totem.interface.broadcast") == 0) { data->broadcast = strdup(value); add_as_string = 0; } if (strcmp(path, "totem.interface.mcastport") == 0) { if (safe_atoi(value, &i) != 0) { goto atoi_error; } data->mcastport = i; if (data->mcastport < 0 || data->mcastport > 65535) { *error_string = "Invalid multicast port (should be 0..65535)"; return (0); }; add_as_string = 0; } if (strcmp(path, "totem.interface.ttl") == 0) { if (safe_atoi(value, &i) != 0) { goto atoi_error; } data->ttl = i; if (data->ttl < 0 || data->ttl > 255) { *error_string = "Invalid TTL (should be 0..255)"; return (0); }; add_as_string = 0; } break; case MAIN_CP_CB_DATA_STATE_LOGGER_SUBSYS: if (strcmp(key, "subsys") == 0) { data->subsys = strdup(value); if (data->subsys == NULL) { *error_string = "Can't alloc memory"; return (0); } } else { kv_item = malloc(sizeof(*kv_item)); if (kv_item == NULL) { *error_string = "Can't alloc memory"; return (0); } memset(kv_item, 0, sizeof(*kv_item)); kv_item->key = strdup(key); kv_item->value = strdup(value); if (kv_item->key == NULL || kv_item->value == NULL) { free(kv_item); *error_string = "Can't alloc memory"; return (0); } list_init(&kv_item->list); list_add(&kv_item->list, &data->logger_subsys_items_head); } add_as_string = 0; break; case MAIN_CP_CB_DATA_STATE_LOGGING_DAEMON: if (strcmp(key, "subsys") == 0) { data->subsys = strdup(value); if (data->subsys == NULL) { *error_string = "Can't alloc memory"; return (0); } } else if (strcmp(key, "name") == 0) { data->logging_daemon_name = strdup(value); if (data->logging_daemon_name == NULL) { *error_string = "Can't alloc memory"; return (0); } } else { kv_item = malloc(sizeof(*kv_item)); if (kv_item == NULL) { *error_string = "Can't alloc memory"; return (0); } memset(kv_item, 0, sizeof(*kv_item)); kv_item->key = strdup(key); kv_item->value = strdup(value); if (kv_item->key == NULL || kv_item->value == NULL) { free(kv_item); *error_string = "Can't alloc memory"; return (0); } list_init(&kv_item->list); list_add(&kv_item->list, &data->logger_subsys_items_head); } add_as_string = 0; break; case MAIN_CP_CB_DATA_STATE_UIDGID: if (strcmp(key, "uid") == 0) { uid = uid_determine(value); if (uid == -1) { *error_string = error_string_response; return (0); } snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "uidgid.uid.%u", uid); icmap_set_uint8(key_name, 1); add_as_string = 0; } else if (strcmp(key, "gid") == 0) { gid = gid_determine(value); if (gid == -1) { *error_string = error_string_response; return (0); } snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "uidgid.gid.%u", gid); icmap_set_uint8(key_name, 1); add_as_string = 0; } else { *error_string = "uidgid: Only uid and gid are allowed items"; return (0); } break; case MAIN_CP_CB_DATA_STATE_MEMBER: if (strcmp(key, "memberaddr") != 0) { *error_string = "Only memberaddr is allowed in member section"; return (0); } kv_item = malloc(sizeof(*kv_item)); if (kv_item == NULL) { *error_string = "Can't alloc memory"; return (0); } memset(kv_item, 0, sizeof(*kv_item)); kv_item->key = strdup(key); kv_item->value = strdup(value); if (kv_item->key == NULL || kv_item->value == NULL) { free(kv_item); *error_string = "Can't alloc memory"; return (0); } list_init(&kv_item->list); list_add(&kv_item->list, &data->member_items_head); add_as_string = 0; break; case MAIN_CP_CB_DATA_STATE_NODELIST: break; case MAIN_CP_CB_DATA_STATE_NODELIST_NODE: snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.%s", data->node_number, key); if ((strcmp(key, "nodeid") == 0) || (strcmp(key, "quorum_votes") == 0)) { if (safe_atoi(value, &i) != 0) { goto atoi_error; } icmap_set_uint32(key_name, i); add_as_string = 0; } if (strcmp(key, "ring0_addr") == 0) { data->ring0_addr_added = 1; } if (add_as_string) { icmap_set_string(key_name, value); add_as_string = 0; } break; } if (add_as_string) { icmap_set_string(path, value); } break; case PARSER_CB_SECTION_START: if (strcmp(path, "totem.interface") == 0) { data->state = MAIN_CP_CB_DATA_STATE_INTERFACE; data->ringnumber = 0; data->mcastport = -1; data->ttl = -1; list_init(&data->member_items_head); }; if (strcmp(path, "totem") == 0) { data->state = MAIN_CP_CB_DATA_STATE_TOTEM; }; if (strcmp(path, "logging.logger_subsys") == 0) { data->state = MAIN_CP_CB_DATA_STATE_LOGGER_SUBSYS; list_init(&data->logger_subsys_items_head); data->subsys = NULL; } if (strcmp(path, "logging.logging_daemon") == 0) { data->state = MAIN_CP_CB_DATA_STATE_LOGGING_DAEMON; list_init(&data->logger_subsys_items_head); data->subsys = NULL; data->logging_daemon_name = NULL; } if (strcmp(path, "uidgid") == 0) { data->state = MAIN_CP_CB_DATA_STATE_UIDGID; } if (strcmp(path, "totem.interface.member") == 0) { data->state = MAIN_CP_CB_DATA_STATE_MEMBER; } if (strcmp(path, "quorum") == 0) { data->state = MAIN_CP_CB_DATA_STATE_QUORUM; } if (strcmp(path, "quorum.device") == 0) { data->state = MAIN_CP_CB_DATA_STATE_QDEVICE; } if (strcmp(path, "nodelist") == 0) { data->state = MAIN_CP_CB_DATA_STATE_NODELIST; data->node_number = 0; } if (strcmp(path, "nodelist.node") == 0) { data->state = MAIN_CP_CB_DATA_STATE_NODELIST_NODE; data->ring0_addr_added = 0; } break; case PARSER_CB_SECTION_END: switch (data->state) { case MAIN_CP_CB_DATA_STATE_NORMAL: break; case MAIN_CP_CB_DATA_STATE_PLOAD: data->state = MAIN_CP_CB_DATA_STATE_NORMAL; break; case MAIN_CP_CB_DATA_STATE_INTERFACE: /* * Create new interface section */ if (data->bindnetaddr != NULL) { snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.bindnetaddr", data->ringnumber); icmap_set_string(key_name, data->bindnetaddr); free(data->bindnetaddr); data->bindnetaddr = NULL; } if (data->mcastaddr != NULL) { snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.mcastaddr", data->ringnumber); icmap_set_string(key_name, data->mcastaddr); free(data->mcastaddr); data->mcastaddr = NULL; } if (data->broadcast != NULL) { snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.broadcast", data->ringnumber); icmap_set_string(key_name, data->broadcast); free(data->broadcast); data->broadcast = NULL; } if (data->mcastport > -1) { snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.mcastport", data->ringnumber); icmap_set_uint16(key_name, data->mcastport); } if (data->ttl > -1) { snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.ttl", data->ringnumber); icmap_set_uint8(key_name, data->ttl); } i = 0; for (iter = data->member_items_head.next; iter != &data->member_items_head; iter = iter_next) { kv_item = list_entry(iter, struct key_value_list_item, list); snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.member.%u", data->ringnumber, i); icmap_set_string(key_name, kv_item->value); iter_next = iter->next; free(kv_item->value); free(kv_item->key); free(kv_item); i++; } data->state = MAIN_CP_CB_DATA_STATE_TOTEM; break; case MAIN_CP_CB_DATA_STATE_TOTEM: data->state = MAIN_CP_CB_DATA_STATE_NORMAL; break; case MAIN_CP_CB_DATA_STATE_LOGGER_SUBSYS: if (data->subsys == NULL) { *error_string = "No subsys key in logger_subsys directive"; return (0); } for (iter = data->logger_subsys_items_head.next; iter != &data->logger_subsys_items_head; iter = iter_next) { kv_item = list_entry(iter, struct key_value_list_item, list); snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "logging.logger_subsys.%s.%s", data->subsys, kv_item->key); icmap_set_string(key_name, kv_item->value); iter_next = iter->next; free(kv_item->value); free(kv_item->key); free(kv_item); } snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "logging.logger_subsys.%s.subsys", data->subsys); icmap_set_string(key_name, data->subsys); free(data->subsys); data->state = MAIN_CP_CB_DATA_STATE_NORMAL; break; case MAIN_CP_CB_DATA_STATE_LOGGING_DAEMON: if (data->logging_daemon_name == NULL) { *error_string = "No name key in logging_daemon directive"; return (0); } for (iter = data->logger_subsys_items_head.next; iter != &data->logger_subsys_items_head; iter = iter_next) { kv_item = list_entry(iter, struct key_value_list_item, list); if (data->subsys == NULL) { if (strcmp(data->logging_daemon_name, "corosync") == 0) { snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "logging.%s", kv_item->key); } else { snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "logging.logging_daemon.%s.%s", data->logging_daemon_name, kv_item->key); } } else { if (strcmp(data->logging_daemon_name, "corosync") == 0) { snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "logging.logger_subsys.%s.%s", data->subsys, kv_item->key); } else { snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "logging.logging_daemon.%s.%s.%s", data->logging_daemon_name, data->subsys, kv_item->key); } } icmap_set_string(key_name, kv_item->value); iter_next = iter->next; free(kv_item->value); free(kv_item->key); free(kv_item); } if (data->subsys == NULL) { if (strcmp(data->logging_daemon_name, "corosync") != 0) { snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "logging.logging_daemon.%s.name", data->logging_daemon_name); icmap_set_string(key_name, data->logging_daemon_name); } } else { if (strcmp(data->logging_daemon_name, "corosync") == 0) { snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "logging.logger_subsys.%s.subsys", data->subsys); icmap_set_string(key_name, data->subsys); } else { snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "logging.logging_daemon.%s.%s.subsys", data->logging_daemon_name, data->subsys); icmap_set_string(key_name, data->subsys); snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "logging.logging_daemon.%s.%s.name", data->logging_daemon_name, data->subsys); icmap_set_string(key_name, data->logging_daemon_name); } } free(data->subsys); free(data->logging_daemon_name); data->state = MAIN_CP_CB_DATA_STATE_NORMAL; break; case MAIN_CP_CB_DATA_STATE_UIDGID: data->state = MAIN_CP_CB_DATA_STATE_UIDGID; break; case MAIN_CP_CB_DATA_STATE_MEMBER: data->state = MAIN_CP_CB_DATA_STATE_INTERFACE; break; case MAIN_CP_CB_DATA_STATE_QUORUM: data->state = MAIN_CP_CB_DATA_STATE_NORMAL; break; case MAIN_CP_CB_DATA_STATE_QDEVICE: data->state = MAIN_CP_CB_DATA_STATE_QUORUM; break; case MAIN_CP_CB_DATA_STATE_NODELIST: data->state = MAIN_CP_CB_DATA_STATE_NORMAL; break; case MAIN_CP_CB_DATA_STATE_NODELIST_NODE: if (!data->ring0_addr_added) { *error_string = "No ring0_addr specified for node"; return (0); } data->node_number++; data->state = MAIN_CP_CB_DATA_STATE_NODELIST; break; } break; } return (1); atoi_error: snprintf(formated_err, sizeof(formated_err), "Value of key \"%s\" must be integer, but \"%s\" was given", key, value); *error_string = formated_err; return (0); } static int uidgid_config_parser_cb(const char *path, char *key, char *value, enum parser_cb_type type, const char **error_string, void *user_data) { char key_name[ICMAP_KEYNAME_MAXLEN]; int uid, gid; switch (type) { case PARSER_CB_START: break; case PARSER_CB_END: break; case PARSER_CB_ITEM: if (strcmp(path, "uidgid.uid") == 0) { uid = uid_determine(value); if (uid == -1) { *error_string = error_string_response; return (0); } snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "uidgid.uid.%u", uid); icmap_set_uint8(key_name, 1); } else if (strcmp(key, "uidgid.gid") == 0) { gid = gid_determine(value); if (gid == -1) { *error_string = error_string_response; return (0); } snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "uidgid.gid.%u", gid); icmap_set_uint8(key_name, 1); } else { *error_string = "uidgid: Only uid and gid are allowed items"; return (0); } break; case PARSER_CB_SECTION_START: if (strcmp(path, "uidgid") != 0) { *error_string = "uidgid: Can't add subsection different then uidgid"; return (0); }; break; case PARSER_CB_SECTION_END: break; } return (1); } static int read_uidgid_files_into_icmap( const char **error_string) { FILE *fp; const char *dirname; DIR *dp; struct dirent *dirent; struct dirent *entry; char filename[PATH_MAX + FILENAME_MAX + 1]; int res = 0; size_t len; int return_code; struct stat stat_buf; char key_name[ICMAP_KEYNAME_MAXLEN]; dirname = COROSYSCONFDIR "/uidgid.d"; dp = opendir (dirname); if (dp == NULL) return 0; len = offsetof(struct dirent, d_name) + NAME_MAX + 1; entry = malloc(len); if (entry == NULL) { res = 0; goto error_exit; } for (return_code = readdir_r(dp, entry, &dirent); dirent != NULL && return_code == 0; return_code = readdir_r(dp, entry, &dirent)) { snprintf(filename, sizeof (filename), "%s/%s", dirname, dirent->d_name); stat (filename, &stat_buf); if (S_ISREG(stat_buf.st_mode)) { fp = fopen (filename, "r"); if (fp == NULL) continue; key_name[0] = 0; res = parse_section(fp, key_name, error_string, uidgid_config_parser_cb, NULL); fclose (fp); if (res != 0) { goto error_exit; } } } error_exit: free (entry); closedir(dp); return res; } /* Read config file and load into icmap */ static int read_config_file_into_icmap( const char **error_string) { FILE *fp; const char *filename; char *error_reason = error_string_response; int res; char key_name[ICMAP_KEYNAME_MAXLEN]; struct main_cp_cb_data data; filename = getenv ("COROSYNC_MAIN_CONFIG_FILE"); if (!filename) filename = COROSYSCONFDIR "/corosync.conf"; fp = fopen (filename, "r"); if (fp == NULL) { char error_str[100]; const char *error_ptr = qb_strerror_r(errno, error_str, sizeof(error_str)); snprintf (error_reason, sizeof(error_string_response), "Can't read file %s reason = (%s)", filename, error_ptr); *error_string = error_reason; return -1; } key_name[0] = 0; res = parse_section(fp, key_name, error_string, main_config_parser_cb, &data); fclose(fp); if (res == 0) { res = read_uidgid_files_into_icmap(error_string); } if (res == 0) { snprintf (error_reason, sizeof(error_string_response), "Successfully read main configuration file '%s'.", filename); *error_string = error_reason; } return res; } diff --git a/exec/crypto.c b/exec/crypto.c deleted file mode 100644 index 14fb8074..00000000 --- a/exec/crypto.c +++ /dev/null @@ -1,1340 +0,0 @@ -/* LibTomCrypt, modular cryptographic library -- Tom St Denis - * - * LibTomCrypt is a library that provides various cryptographic - * algorithms in a highly modular and flexible manner. - * - * The library is free for all purposes without any express - * guarantee it works. - * - * Tom St Denis, tomstdenis@iahu.ca, http://libtomcrypt.com - */ - -#include - -#include -#include -#include -#include -#include -#include -#include -#if defined(COROSYNC_BSD) -#include -#endif -#include -#include -#include - -#include "crypto.h" - -#define CONST64(n) n ## ULL - -typedef uint32_t ulong32; -typedef uint64_t ulong64; - -#if __BYTE_ORDER == __LITTLE_ENDIAN -#define ENDIAN_LITTLE -#elif __BYTE_ORDER == __BIG_ENDIAN -#define ENDIAN_BIG -#else -#error "cannot detect byte order" -#endif - -#if defined(COROSYNC_LINUX) -#if __WORDSIZE == 64 -#define ENDIAN_64BITWORD -#endif -#if __WORDSIZE == 32 -#define ENDIAN_32BITWORD -#endif -#else -/* XXX need to find a better default - */ -#define ENDIAN_32BITWORD -#endif - -/* ---- HELPER MACROS ---- */ -#ifdef ENDIAN_NEUTRAL - -#define STORE32L(x, y) \ - { (y)[3] = (unsigned char)(((x)>>24)&255); (y)[2] = (unsigned char)(((x)>>16)&255); \ - (y)[1] = (unsigned char)(((x)>>8)&255); (y)[0] = (unsigned char)((x)&255); } - -#define LOAD32L(x, y) \ - { x = ((unsigned long)((y)[3] & 255)<<24) | \ - ((unsigned long)((y)[2] & 255)<<16) | \ - ((unsigned long)((y)[1] & 255)<<8) | \ - ((unsigned long)((y)[0] & 255)); } - -#define STORE64L(x, y) \ - { (y)[7] = (unsigned char)(((x)>>56)&255); (y)[6] = (unsigned char)(((x)>>48)&255); \ - (y)[5] = (unsigned char)(((x)>>40)&255); (y)[4] = (unsigned char)(((x)>>32)&255); \ - (y)[3] = (unsigned char)(((x)>>24)&255); (y)[2] = (unsigned char)(((x)>>16)&255); \ - (y)[1] = (unsigned char)(((x)>>8)&255); (y)[0] = (unsigned char)((x)&255); } - -#define LOAD64L(x, y) \ - { x = (((ulong64)((y)[7] & 255))<<56)|(((ulong64)((y)[6] & 255))<<48)| \ - (((ulong64)((y)[5] & 255))<<40)|(((ulong64)((y)[4] & 255))<<32)| \ - (((ulong64)((y)[3] & 255))<<24)|(((ulong64)((y)[2] & 255))<<16)| \ - (((ulong64)((y)[1] & 255))<<8)|(((ulong64)((y)[0] & 255))); } - -#define STORE32H(x, y) \ - { (y)[0] = (unsigned char)(((x)>>24)&255); (y)[1] = (unsigned char)(((x)>>16)&255); \ - (y)[2] = (unsigned char)(((x)>>8)&255); (y)[3] = (unsigned char)((x)&255); } - -#define LOAD32H(x, y) \ - { x = ((unsigned long)((y)[0] & 255)<<24) | \ - ((unsigned long)((y)[1] & 255)<<16) | \ - ((unsigned long)((y)[2] & 255)<<8) | \ - ((unsigned long)((y)[3] & 255)); } - -#define STORE64H(x, y) \ - { (y)[0] = (unsigned char)(((x)>>56)&255); (y)[1] = (unsigned char)(((x)>>48)&255); \ - (y)[2] = (unsigned char)(((x)>>40)&255); (y)[3] = (unsigned char)(((x)>>32)&255); \ - (y)[4] = (unsigned char)(((x)>>24)&255); (y)[5] = (unsigned char)(((x)>>16)&255); \ - (y)[6] = (unsigned char)(((x)>>8)&255); (y)[7] = (unsigned char)((x)&255); } - -#define LOAD64H(x, y) \ - { x = (((ulong64)((y)[0] & 255))<<56)|(((ulong64)((y)[1] & 255))<<48) | \ - (((ulong64)((y)[2] & 255))<<40)|(((ulong64)((y)[3] & 255))<<32) | \ - (((ulong64)((y)[4] & 255))<<24)|(((ulong64)((y)[5] & 255))<<16) | \ - (((ulong64)((y)[6] & 255))<<8)|(((ulong64)((y)[7] & 255))); } - -#endif /* ENDIAN_NEUTRAL */ - -#ifdef ENDIAN_LITTLE - -#define STORE32H(x, y) \ - { (y)[0] = (unsigned char)(((x)>>24)&255); (y)[1] = (unsigned char)(((x)>>16)&255); \ - (y)[2] = (unsigned char)(((x)>>8)&255); (y)[3] = (unsigned char)((x)&255); } - -#define LOAD32H(x, y) \ - { x = ((unsigned long)((y)[0] & 255)<<24) | \ - ((unsigned long)((y)[1] & 255)<<16) | \ - ((unsigned long)((y)[2] & 255)<<8) | \ - ((unsigned long)((y)[3] & 255)); } - -#define STORE64H(x, y) \ - { (y)[0] = (unsigned char)(((x)>>56)&255); (y)[1] = (unsigned char)(((x)>>48)&255); \ - (y)[2] = (unsigned char)(((x)>>40)&255); (y)[3] = (unsigned char)(((x)>>32)&255); \ - (y)[4] = (unsigned char)(((x)>>24)&255); (y)[5] = (unsigned char)(((x)>>16)&255); \ - (y)[6] = (unsigned char)(((x)>>8)&255); (y)[7] = (unsigned char)((x)&255); } - -#define LOAD64H(x, y) \ - { x = (((ulong64)((y)[0] & 255))<<56)|(((ulong64)((y)[1] & 255))<<48) | \ - (((ulong64)((y)[2] & 255))<<40)|(((ulong64)((y)[3] & 255))<<32) | \ - (((ulong64)((y)[4] & 255))<<24)|(((ulong64)((y)[5] & 255))<<16) | \ - (((ulong64)((y)[6] & 255))<<8)|(((ulong64)((y)[7] & 255))); } - -#ifdef ENDIAN_32BITWORD - -#define STORE32L(x, y) \ - { unsigned long __t = (x); memcpy(y, &__t, 4); } - -#define LOAD32L(x, y) \ - memcpy(&(x), y, 4); - -#define STORE64L(x, y) \ - { (y)[7] = (unsigned char)(((x)>>56)&255); (y)[6] = (unsigned char)(((x)>>48)&255); \ - (y)[5] = (unsigned char)(((x)>>40)&255); (y)[4] = (unsigned char)(((x)>>32)&255); \ - (y)[3] = (unsigned char)(((x)>>24)&255); (y)[2] = (unsigned char)(((x)>>16)&255); \ - (y)[1] = (unsigned char)(((x)>>8)&255); (y)[0] = (unsigned char)((x)&255); } - -#define LOAD64L(x, y) \ - { x = (((ulong64)((y)[7] & 255))<<56)|(((ulong64)((y)[6] & 255))<<48)| \ - (((ulong64)((y)[5] & 255))<<40)|(((ulong64)((y)[4] & 255))<<32)| \ - (((ulong64)((y)[3] & 255))<<24)|(((ulong64)((y)[2] & 255))<<16)| \ - (((ulong64)((y)[1] & 255))<<8)|(((ulong64)((y)[0] & 255))); } - -#else /* 64-bit words then */ - -#define STORE32L(x, y) \ - { unsigned long __t = (x); memcpy(y, &__t, 4); } - -#define LOAD32L(x, y) \ - { memcpy(&(x), y, 4); x &= 0xFFFFFFFF; } - -#define STORE64L(x, y) \ - { ulong64 __t = (x); memcpy(y, &__t, 8); } - -#define LOAD64L(x, y) \ - { memcpy(&(x), y, 8); } - -#endif /* ENDIAN_64BITWORD */ - -#endif /* ENDIAN_LITTLE */ - -#ifdef ENDIAN_BIG -#define STORE32L(x, y) \ - { (y)[3] = (unsigned char)(((x)>>24)&255); (y)[2] = (unsigned char)(((x)>>16)&255); \ - (y)[1] = (unsigned char)(((x)>>8)&255); (y)[0] = (unsigned char)((x)&255); } - -#define LOAD32L(x, y) \ - { x = ((unsigned long)((y)[3] & 255)<<24) | \ - ((unsigned long)((y)[2] & 255)<<16) | \ - ((unsigned long)((y)[1] & 255)<<8) | \ - ((unsigned long)((y)[0] & 255)); } - -#define STORE64L(x, y) \ - { (y)[7] = (unsigned char)(((x)>>56)&255); (y)[6] = (unsigned char)(((x)>>48)&255); \ - (y)[5] = (unsigned char)(((x)>>40)&255); (y)[4] = (unsigned char)(((x)>>32)&255); \ - (y)[3] = (unsigned char)(((x)>>24)&255); (y)[2] = (unsigned char)(((x)>>16)&255); \ - (y)[1] = (unsigned char)(((x)>>8)&255); (y)[0] = (unsigned char)((x)&255); } - -#define LOAD64L(x, y) \ - { x = (((ulong64)((y)[7] & 255))<<56)|(((ulong64)((y)[6] & 255))<<48) | \ - (((ulong64)((y)[5] & 255))<<40)|(((ulong64)((y)[4] & 255))<<32) | \ - (((ulong64)((y)[3] & 255))<<24)|(((ulong64)((y)[2] & 255))<<16) | \ - (((ulong64)((y)[1] & 255))<<8)|(((ulong64)((y)[0] & 255))); } - -#ifdef ENDIAN_32BITWORD - -#define STORE32H(x, y) \ - { unsigned long __t = (x); memcpy(y, &__t, 4); } - -#define LOAD32H(x, y) \ - memcpy(&(x), y, 4); - -#define STORE64H(x, y) \ - { (y)[0] = (unsigned char)(((x)>>56)&255); (y)[1] = (unsigned char)(((x)>>48)&255); \ - (y)[2] = (unsigned char)(((x)>>40)&255); (y)[3] = (unsigned char)(((x)>>32)&255); \ - (y)[4] = (unsigned char)(((x)>>24)&255); (y)[5] = (unsigned char)(((x)>>16)&255); \ - (y)[6] = (unsigned char)(((x)>>8)&255); (y)[7] = (unsigned char)((x)&255); } - -#define LOAD64H(x, y) \ - { x = (((ulong64)((y)[0] & 255))<<56)|(((ulong64)((y)[1] & 255))<<48)| \ - (((ulong64)((y)[2] & 255))<<40)|(((ulong64)((y)[3] & 255))<<32)| \ - (((ulong64)((y)[4] & 255))<<24)|(((ulong64)((y)[5] & 255))<<16)| \ - (((ulong64)((y)[6] & 255))<<8)| (((ulong64)((y)[7] & 255))); } - -#else /* 64-bit words then */ - -#define STORE32H(x, y) \ - { unsigned long __t = (x); memcpy(y, &__t, 4); } - -#define LOAD32H(x, y) \ - { memcpy(&(x), y, 4); x &= 0xFFFFFFFF; } - -#define STORE64H(x, y) \ - { ulong64 __t = (x); memcpy(y, &__t, 8); } - -#define LOAD64H(x, y) \ - { memcpy(&(x), y, 8); } - -#endif /* ENDIAN_64BITWORD */ -#endif /* ENDIAN_BIG */ - -#define BSWAP(x) ( ((x>>24)&0x000000FFUL) | ((x<<24)&0xFF000000UL) | \ - ((x>>8)&0x0000FF00UL) | ((x<<8)&0x00FF0000UL) ) - -#if defined(__GNUC__) && defined(__i386__) && !defined(INTEL_CC) - -static inline unsigned long ROL(unsigned long word, int i) -{ - __asm__("roll %%cl,%0" - :"=r" (word) - :"0" (word),"c" (i)); - return word; -} - -static inline unsigned long ROR(unsigned long word, int i) -{ - __asm__("rorl %%cl,%0" - :"=r" (word) - :"0" (word),"c" (i)); - return word; -} - -#else - -/* rotates the hard way */ -#define ROL(x, y) ( (((unsigned long)(x)<<(unsigned long)((y)&31)) | (((unsigned long)(x)&0xFFFFFFFFUL)>>(unsigned long)(32-((y)&31)))) & 0xFFFFFFFFUL) -#define ROR(x, y) ( ((((unsigned long)(x)&0xFFFFFFFFUL)>>(unsigned long)((y)&31)) | ((unsigned long)(x)<<(unsigned long)(32-((y)&31)))) & 0xFFFFFFFFUL) - -#endif - -#define ROL64(x, y) \ - ( (((x)<<((ulong64)(y)&63)) | \ - (((x)&CONST64(0xFFFFFFFFFFFFFFFF))>>((ulong64)64-((y)&63)))) & CONST64(0xFFFFFFFFFFFFFFFF)) - -#define ROR64(x, y) \ - ( ((((x)&CONST64(0xFFFFFFFFFFFFFFFF))>>((ulong64)(y)&CONST64(63))) | \ - ((x)<<((ulong64)(64-((y)&CONST64(63)))))) & CONST64(0xFFFFFFFFFFFFFFFF)) - -#undef MAX -#undef MIN -#define MAX(x, y) ( ((x)>(y))?(x):(y) ) -#define MIN(x, y) ( ((x)<(y))?(x):(y) ) - -/* extract a byte portably */ -#define byte(x, n) (((x) >> (8 * (n))) & 255) - -#define CONST64(n) n ## ULL - -/* a simple macro for making hash "process" functions */ -#define HASH_PROCESS(func_name, compress_name, state_var, block_size) \ -int func_name (hash_state * md, const unsigned char *buf, unsigned long len) \ -{ \ - unsigned long n; \ - if (md-> state_var .curlen > sizeof(md-> state_var .buf)) { \ - return CRYPT_INVALID_ARG; \ - } \ - while (len > 0) { \ - if (md-> state_var .curlen == 0 && len >= block_size) { \ - compress_name (md, (unsigned char *)buf); \ - md-> state_var .length += block_size * 8; \ - buf += block_size; \ - len -= block_size; \ - } else { \ - n = MIN(len, (block_size - md-> state_var .curlen)); \ - memcpy(md-> state_var .buf + md-> state_var.curlen, buf, (size_t)n); \ - md-> state_var .curlen += n; \ - buf += n; \ - len -= n; \ - if (md-> state_var .curlen == block_size) { \ - compress_name (md, md-> state_var .buf); \ - md-> state_var .length += 8*block_size; \ - md-> state_var .curlen = 0; \ - } \ - } \ - } \ - return CRYPT_OK; \ -} - -#define MAXBLOCKSIZE 128 - -/* - * The mycrypt_macros.h file - */ - -/* ---- HELPER MACROS ---- */ -#ifdef ENDIAN_NEUTRAL - -#define STORE32L(x, y) \ - { (y)[3] = (unsigned char)(((x)>>24)&255); (y)[2] = (unsigned char)(((x)>>16)&255); \ - (y)[1] = (unsigned char)(((x)>>8)&255); (y)[0] = (unsigned char)((x)&255); } - -#define LOAD32L(x, y) \ - { x = ((unsigned long)((y)[3] & 255)<<24) | \ - ((unsigned long)((y)[2] & 255)<<16) | \ - ((unsigned long)((y)[1] & 255)<<8) | \ - ((unsigned long)((y)[0] & 255)); } - -#define STORE64L(x, y) \ - { (y)[7] = (unsigned char)(((x)>>56)&255); (y)[6] = (unsigned char)(((x)>>48)&255); \ - (y)[5] = (unsigned char)(((x)>>40)&255); (y)[4] = (unsigned char)(((x)>>32)&255); \ - (y)[3] = (unsigned char)(((x)>>24)&255); (y)[2] = (unsigned char)(((x)>>16)&255); \ - (y)[1] = (unsigned char)(((x)>>8)&255); (y)[0] = (unsigned char)((x)&255); } - -#define LOAD64L(x, y) \ - { x = (((ulong64)((y)[7] & 255))<<56)|(((ulong64)((y)[6] & 255))<<48)| \ - (((ulong64)((y)[5] & 255))<<40)|(((ulong64)((y)[4] & 255))<<32)| \ - (((ulong64)((y)[3] & 255))<<24)|(((ulong64)((y)[2] & 255))<<16)| \ - (((ulong64)((y)[1] & 255))<<8)|(((ulong64)((y)[0] & 255))); } - -#define STORE32H(x, y) \ - { (y)[0] = (unsigned char)(((x)>>24)&255); (y)[1] = (unsigned char)(((x)>>16)&255); \ - (y)[2] = (unsigned char)(((x)>>8)&255); (y)[3] = (unsigned char)((x)&255); } - -#define LOAD32H(x, y) \ - { x = ((unsigned long)((y)[0] & 255)<<24) | \ - ((unsigned long)((y)[1] & 255)<<16) | \ - ((unsigned long)((y)[2] & 255)<<8) | \ - ((unsigned long)((y)[3] & 255)); } - -#define STORE64H(x, y) \ - { (y)[0] = (unsigned char)(((x)>>56)&255); (y)[1] = (unsigned char)(((x)>>48)&255); \ - (y)[2] = (unsigned char)(((x)>>40)&255); (y)[3] = (unsigned char)(((x)>>32)&255); \ - (y)[4] = (unsigned char)(((x)>>24)&255); (y)[5] = (unsigned char)(((x)>>16)&255); \ - (y)[6] = (unsigned char)(((x)>>8)&255); (y)[7] = (unsigned char)((x)&255); } - -#define LOAD64H(x, y) \ - { x = (((ulong64)((y)[0] & 255))<<56)|(((ulong64)((y)[1] & 255))<<48) | \ - (((ulong64)((y)[2] & 255))<<40)|(((ulong64)((y)[3] & 255))<<32) | \ - (((ulong64)((y)[4] & 255))<<24)|(((ulong64)((y)[5] & 255))<<16) | \ - (((ulong64)((y)[6] & 255))<<8)|(((ulong64)((y)[7] & 255))); } - -#endif /* ENDIAN_NEUTRAL */ - -#ifdef ENDIAN_LITTLE - -#define STORE32H(x, y) \ - { (y)[0] = (unsigned char)(((x)>>24)&255); (y)[1] = (unsigned char)(((x)>>16)&255); \ - (y)[2] = (unsigned char)(((x)>>8)&255); (y)[3] = (unsigned char)((x)&255); } - -#define LOAD32H(x, y) \ - { x = ((unsigned long)((y)[0] & 255)<<24) | \ - ((unsigned long)((y)[1] & 255)<<16) | \ - ((unsigned long)((y)[2] & 255)<<8) | \ - ((unsigned long)((y)[3] & 255)); } - -#define STORE64H(x, y) \ - { (y)[0] = (unsigned char)(((x)>>56)&255); (y)[1] = (unsigned char)(((x)>>48)&255); \ - (y)[2] = (unsigned char)(((x)>>40)&255); (y)[3] = (unsigned char)(((x)>>32)&255); \ - (y)[4] = (unsigned char)(((x)>>24)&255); (y)[5] = (unsigned char)(((x)>>16)&255); \ - (y)[6] = (unsigned char)(((x)>>8)&255); (y)[7] = (unsigned char)((x)&255); } - -#define LOAD64H(x, y) \ - { x = (((ulong64)((y)[0] & 255))<<56)|(((ulong64)((y)[1] & 255))<<48) | \ - (((ulong64)((y)[2] & 255))<<40)|(((ulong64)((y)[3] & 255))<<32) | \ - (((ulong64)((y)[4] & 255))<<24)|(((ulong64)((y)[5] & 255))<<16) | \ - (((ulong64)((y)[6] & 255))<<8)|(((ulong64)((y)[7] & 255))); } - -#ifdef ENDIAN_32BITWORD - -#define STORE32L(x, y) \ - { unsigned long __t = (x); memcpy(y, &__t, 4); } - -#define LOAD32L(x, y) \ - memcpy(&(x), y, 4); - -#define STORE64L(x, y) \ - { (y)[7] = (unsigned char)(((x)>>56)&255); (y)[6] = (unsigned char)(((x)>>48)&255); \ - (y)[5] = (unsigned char)(((x)>>40)&255); (y)[4] = (unsigned char)(((x)>>32)&255); \ - (y)[3] = (unsigned char)(((x)>>24)&255); (y)[2] = (unsigned char)(((x)>>16)&255); \ - (y)[1] = (unsigned char)(((x)>>8)&255); (y)[0] = (unsigned char)((x)&255); } - -#define LOAD64L(x, y) \ - { x = (((ulong64)((y)[7] & 255))<<56)|(((ulong64)((y)[6] & 255))<<48)| \ - (((ulong64)((y)[5] & 255))<<40)|(((ulong64)((y)[4] & 255))<<32)| \ - (((ulong64)((y)[3] & 255))<<24)|(((ulong64)((y)[2] & 255))<<16)| \ - (((ulong64)((y)[1] & 255))<<8)|(((ulong64)((y)[0] & 255))); } - -#else /* 64-bit words then */ - -#define STORE32L(x, y) \ - { unsigned long __t = (x); memcpy(y, &__t, 4); } - -#define LOAD32L(x, y) \ - { memcpy(&(x), y, 4); x &= 0xFFFFFFFF; } - -#define STORE64L(x, y) \ - { ulong64 __t = (x); memcpy(y, &__t, 8); } - -#define LOAD64L(x, y) \ - { memcpy(&(x), y, 8); } - -#endif /* ENDIAN_64BITWORD */ - -#endif /* ENDIAN_LITTLE */ - -#ifdef ENDIAN_BIG -#define STORE32L(x, y) \ - { (y)[3] = (unsigned char)(((x)>>24)&255); (y)[2] = (unsigned char)(((x)>>16)&255); \ - (y)[1] = (unsigned char)(((x)>>8)&255); (y)[0] = (unsigned char)((x)&255); } - -#define LOAD32L(x, y) \ - { x = ((unsigned long)((y)[3] & 255)<<24) | \ - ((unsigned long)((y)[2] & 255)<<16) | \ - ((unsigned long)((y)[1] & 255)<<8) | \ - ((unsigned long)((y)[0] & 255)); } - -#define STORE64L(x, y) \ - { (y)[7] = (unsigned char)(((x)>>56)&255); (y)[6] = (unsigned char)(((x)>>48)&255); \ - (y)[5] = (unsigned char)(((x)>>40)&255); (y)[4] = (unsigned char)(((x)>>32)&255); \ - (y)[3] = (unsigned char)(((x)>>24)&255); (y)[2] = (unsigned char)(((x)>>16)&255); \ - (y)[1] = (unsigned char)(((x)>>8)&255); (y)[0] = (unsigned char)((x)&255); } - -#define LOAD64L(x, y) \ - { x = (((ulong64)((y)[7] & 255))<<56)|(((ulong64)((y)[6] & 255))<<48) | \ - (((ulong64)((y)[5] & 255))<<40)|(((ulong64)((y)[4] & 255))<<32) | \ - (((ulong64)((y)[3] & 255))<<24)|(((ulong64)((y)[2] & 255))<<16) | \ - (((ulong64)((y)[1] & 255))<<8)|(((ulong64)((y)[0] & 255))); } - -#ifdef ENDIAN_32BITWORD - -#define STORE32H(x, y) \ - { unsigned long __t = (x); memcpy(y, &__t, 4); } - -#define LOAD32H(x, y) \ - memcpy(&(x), y, 4); - -#define STORE64H(x, y) \ - { (y)[0] = (unsigned char)(((x)>>56)&255); (y)[1] = (unsigned char)(((x)>>48)&255); \ - (y)[2] = (unsigned char)(((x)>>40)&255); (y)[3] = (unsigned char)(((x)>>32)&255); \ - (y)[4] = (unsigned char)(((x)>>24)&255); (y)[5] = (unsigned char)(((x)>>16)&255); \ - (y)[6] = (unsigned char)(((x)>>8)&255); (y)[7] = (unsigned char)((x)&255); } - -#define LOAD64H(x, y) \ - { x = (((ulong64)((y)[0] & 255))<<56)|(((ulong64)((y)[1] & 255))<<48)| \ - (((ulong64)((y)[2] & 255))<<40)|(((ulong64)((y)[3] & 255))<<32)| \ - (((ulong64)((y)[4] & 255))<<24)|(((ulong64)((y)[5] & 255))<<16)| \ - (((ulong64)((y)[6] & 255))<<8)| (((ulong64)((y)[7] & 255))); } - -#else /* 64-bit words then */ - -#define STORE32H(x, y) \ - { unsigned long __t = (x); memcpy(y, &__t, 4); } - -#define LOAD32H(x, y) \ - { memcpy(&(x), y, 4); x &= 0xFFFFFFFF; } - -#define STORE64H(x, y) \ - { ulong64 __t = (x); memcpy(y, &__t, 8); } - -#define LOAD64H(x, y) \ - { memcpy(&(x), y, 8); } - -#endif /* ENDIAN_64BITWORD */ -#endif /* ENDIAN_BIG */ - -#define BSWAP(x) ( ((x>>24)&0x000000FFUL) | ((x<<24)&0xFF000000UL) | \ - ((x>>8)&0x0000FF00UL) | ((x<<8)&0x00FF0000UL) ) - - -#define ROL64(x, y) \ - ( (((x)<<((ulong64)(y)&63)) | \ - (((x)&CONST64(0xFFFFFFFFFFFFFFFF))>>((ulong64)64-((y)&63)))) & CONST64(0xFFFFFFFFFFFFFFFF)) - -#define ROR64(x, y) \ - ( ((((x)&CONST64(0xFFFFFFFFFFFFFFFF))>>((ulong64)(y)&CONST64(63))) | \ - ((x)<<((ulong64)(64-((y)&CONST64(63)))))) & CONST64(0xFFFFFFFFFFFFFFFF)) - -#undef MAX -#undef MIN -#define MAX(x, y) ( ((x)>(y))?(x):(y) ) -#define MIN(x, y) ( ((x)<(y))?(x):(y) ) - -/* extract a byte portably */ -#define byte(x, n) (((x) >> (8 * (n))) & 255) - -/* $Id: s128multab.h 213 2003-12-16 04:27:12Z ggr $ */ -/* @(#)TuringMultab.h 1.3 (QUALCOMM) 02/09/03 */ -/* Multiplication table for Turing using 0xD02B4367 */ - -static const ulong32 Multab[256] = { - 0x00000000, 0xD02B4367, 0xED5686CE, 0x3D7DC5A9, - 0x97AC41D1, 0x478702B6, 0x7AFAC71F, 0xAAD18478, - 0x631582EF, 0xB33EC188, 0x8E430421, 0x5E684746, - 0xF4B9C33E, 0x24928059, 0x19EF45F0, 0xC9C40697, - 0xC62A4993, 0x16010AF4, 0x2B7CCF5D, 0xFB578C3A, - 0x51860842, 0x81AD4B25, 0xBCD08E8C, 0x6CFBCDEB, - 0xA53FCB7C, 0x7514881B, 0x48694DB2, 0x98420ED5, - 0x32938AAD, 0xE2B8C9CA, 0xDFC50C63, 0x0FEE4F04, - 0xC154926B, 0x117FD10C, 0x2C0214A5, 0xFC2957C2, - 0x56F8D3BA, 0x86D390DD, 0xBBAE5574, 0x6B851613, - 0xA2411084, 0x726A53E3, 0x4F17964A, 0x9F3CD52D, - 0x35ED5155, 0xE5C61232, 0xD8BBD79B, 0x089094FC, - 0x077EDBF8, 0xD755989F, 0xEA285D36, 0x3A031E51, - 0x90D29A29, 0x40F9D94E, 0x7D841CE7, 0xADAF5F80, - 0x646B5917, 0xB4401A70, 0x893DDFD9, 0x59169CBE, - 0xF3C718C6, 0x23EC5BA1, 0x1E919E08, 0xCEBADD6F, - 0xCFA869D6, 0x1F832AB1, 0x22FEEF18, 0xF2D5AC7F, - 0x58042807, 0x882F6B60, 0xB552AEC9, 0x6579EDAE, - 0xACBDEB39, 0x7C96A85E, 0x41EB6DF7, 0x91C02E90, - 0x3B11AAE8, 0xEB3AE98F, 0xD6472C26, 0x066C6F41, - 0x09822045, 0xD9A96322, 0xE4D4A68B, 0x34FFE5EC, - 0x9E2E6194, 0x4E0522F3, 0x7378E75A, 0xA353A43D, - 0x6A97A2AA, 0xBABCE1CD, 0x87C12464, 0x57EA6703, - 0xFD3BE37B, 0x2D10A01C, 0x106D65B5, 0xC04626D2, - 0x0EFCFBBD, 0xDED7B8DA, 0xE3AA7D73, 0x33813E14, - 0x9950BA6C, 0x497BF90B, 0x74063CA2, 0xA42D7FC5, - 0x6DE97952, 0xBDC23A35, 0x80BFFF9C, 0x5094BCFB, - 0xFA453883, 0x2A6E7BE4, 0x1713BE4D, 0xC738FD2A, - 0xC8D6B22E, 0x18FDF149, 0x258034E0, 0xF5AB7787, - 0x5F7AF3FF, 0x8F51B098, 0xB22C7531, 0x62073656, - 0xABC330C1, 0x7BE873A6, 0x4695B60F, 0x96BEF568, - 0x3C6F7110, 0xEC443277, 0xD139F7DE, 0x0112B4B9, - 0xD31DD2E1, 0x03369186, 0x3E4B542F, 0xEE601748, - 0x44B19330, 0x949AD057, 0xA9E715FE, 0x79CC5699, - 0xB008500E, 0x60231369, 0x5D5ED6C0, 0x8D7595A7, - 0x27A411DF, 0xF78F52B8, 0xCAF29711, 0x1AD9D476, - 0x15379B72, 0xC51CD815, 0xF8611DBC, 0x284A5EDB, - 0x829BDAA3, 0x52B099C4, 0x6FCD5C6D, 0xBFE61F0A, - 0x7622199D, 0xA6095AFA, 0x9B749F53, 0x4B5FDC34, - 0xE18E584C, 0x31A51B2B, 0x0CD8DE82, 0xDCF39DE5, - 0x1249408A, 0xC26203ED, 0xFF1FC644, 0x2F348523, - 0x85E5015B, 0x55CE423C, 0x68B38795, 0xB898C4F2, - 0x715CC265, 0xA1778102, 0x9C0A44AB, 0x4C2107CC, - 0xE6F083B4, 0x36DBC0D3, 0x0BA6057A, 0xDB8D461D, - 0xD4630919, 0x04484A7E, 0x39358FD7, 0xE91ECCB0, - 0x43CF48C8, 0x93E40BAF, 0xAE99CE06, 0x7EB28D61, - 0xB7768BF6, 0x675DC891, 0x5A200D38, 0x8A0B4E5F, - 0x20DACA27, 0xF0F18940, 0xCD8C4CE9, 0x1DA70F8E, - 0x1CB5BB37, 0xCC9EF850, 0xF1E33DF9, 0x21C87E9E, - 0x8B19FAE6, 0x5B32B981, 0x664F7C28, 0xB6643F4F, - 0x7FA039D8, 0xAF8B7ABF, 0x92F6BF16, 0x42DDFC71, - 0xE80C7809, 0x38273B6E, 0x055AFEC7, 0xD571BDA0, - 0xDA9FF2A4, 0x0AB4B1C3, 0x37C9746A, 0xE7E2370D, - 0x4D33B375, 0x9D18F012, 0xA06535BB, 0x704E76DC, - 0xB98A704B, 0x69A1332C, 0x54DCF685, 0x84F7B5E2, - 0x2E26319A, 0xFE0D72FD, 0xC370B754, 0x135BF433, - 0xDDE1295C, 0x0DCA6A3B, 0x30B7AF92, 0xE09CECF5, - 0x4A4D688D, 0x9A662BEA, 0xA71BEE43, 0x7730AD24, - 0xBEF4ABB3, 0x6EDFE8D4, 0x53A22D7D, 0x83896E1A, - 0x2958EA62, 0xF973A905, 0xC40E6CAC, 0x14252FCB, - 0x1BCB60CF, 0xCBE023A8, 0xF69DE601, 0x26B6A566, - 0x8C67211E, 0x5C4C6279, 0x6131A7D0, 0xB11AE4B7, - 0x78DEE220, 0xA8F5A147, 0x958864EE, 0x45A32789, - 0xEF72A3F1, 0x3F59E096, 0x0224253F, 0xD20F6658, -}; - -/* $Id: s128sbox.h 213 2003-12-16 04:27:12Z ggr $ */ -/* Sbox for SOBER-128 */ -/* - * This is really the combination of two SBoxes; the least significant - * 24 bits comes from: - * 8->32 Sbox generated by Millan et. al. at Queensland University of - * Technology. See: E. Dawson, W. Millan, L. Burnett, G. Carter, - * "On the Design of 8*32 S-boxes". Unpublished report, by the - * Information Systems Research Centre, - * Queensland University of Technology, 1999. - * - * The most significant 8 bits are the Skipjack "F table", which can be - * found at http://csrc.nist.gov/CryptoToolkit/skipjack/skipjack.pdf . - * In this optimised table, though, the intent is to XOR the word from - * the table selected by the high byte with the input word. Thus, the - * high byte is actually the Skipjack F-table entry XORED with its - * table index. - */ -static const ulong32 Sbox[256] = { - 0xa3aa1887, 0xd65e435c, 0x0b65c042, 0x800e6ef4, - 0xfc57ee20, 0x4d84fed3, 0xf066c502, 0xf354e8ae, - 0xbb2ee9d9, 0x281f38d4, 0x1f829b5d, 0x735cdf3c, - 0x95864249, 0xbc2e3963, 0xa1f4429f, 0xf6432c35, - 0xf7f40325, 0x3cc0dd70, 0x5f973ded, 0x9902dc5e, - 0xda175b42, 0x590012bf, 0xdc94d78c, 0x39aab26b, - 0x4ac11b9a, 0x8c168146, 0xc3ea8ec5, 0x058ac28f, - 0x52ed5c0f, 0x25b4101c, 0x5a2db082, 0x370929e1, - 0x2a1843de, 0xfe8299fc, 0x202fbc4b, 0x833915dd, - 0x33a803fa, 0xd446b2de, 0x46233342, 0x4fcee7c3, - 0x3ad607ef, 0x9e97ebab, 0x507f859b, 0xe81f2e2f, - 0xc55b71da, 0xd7e2269a, 0x1339c3d1, 0x7ca56b36, - 0xa6c9def2, 0xb5c9fc5f, 0x5927b3a3, 0x89a56ddf, - 0xc625b510, 0x560f85a7, 0xace82e71, 0x2ecb8816, - 0x44951e2a, 0x97f5f6af, 0xdfcbc2b3, 0xce4ff55d, - 0xcb6b6214, 0x2b0b83e3, 0x549ea6f5, 0x9de041af, - 0x792f1f17, 0xf73b99ee, 0x39a65ec0, 0x4c7016c6, - 0x857709a4, 0xd6326e01, 0xc7b280d9, 0x5cfb1418, - 0xa6aff227, 0xfd548203, 0x506b9d96, 0xa117a8c0, - 0x9cd5bf6e, 0xdcee7888, 0x61fcfe64, 0xf7a193cd, - 0x050d0184, 0xe8ae4930, 0x88014f36, 0xd6a87088, - 0x6bad6c2a, 0x1422c678, 0xe9204de7, 0xb7c2e759, - 0x0200248e, 0x013b446b, 0xda0d9fc2, 0x0414a895, - 0x3a6cc3a1, 0x56fef170, 0x86c19155, 0xcf7b8a66, - 0x551b5e69, 0xb4a8623e, 0xa2bdfa35, 0xc4f068cc, - 0x573a6acd, 0x6355e936, 0x03602db9, 0x0edf13c1, - 0x2d0bb16d, 0x6980b83c, 0xfeb23763, 0x3dd8a911, - 0x01b6bc13, 0xf55579d7, 0xf55c2fa8, 0x19f4196e, - 0xe7db5476, 0x8d64a866, 0xc06e16ad, 0xb17fc515, - 0xc46feb3c, 0x8bc8a306, 0xad6799d9, 0x571a9133, - 0x992466dd, 0x92eb5dcd, 0xac118f50, 0x9fafb226, - 0xa1b9cef3, 0x3ab36189, 0x347a19b1, 0x62c73084, - 0xc27ded5c, 0x6c8bc58f, 0x1cdde421, 0xed1e47fb, - 0xcdcc715e, 0xb9c0ff99, 0x4b122f0f, 0xc4d25184, - 0xaf7a5e6c, 0x5bbf18bc, 0x8dd7c6e0, 0x5fb7e420, - 0x521f523f, 0x4ad9b8a2, 0xe9da1a6b, 0x97888c02, - 0x19d1e354, 0x5aba7d79, 0xa2cc7753, 0x8c2d9655, - 0x19829da1, 0x531590a7, 0x19c1c149, 0x3d537f1c, - 0x50779b69, 0xed71f2b7, 0x463c58fa, 0x52dc4418, - 0xc18c8c76, 0xc120d9f0, 0xafa80d4d, 0x3b74c473, - 0xd09410e9, 0x290e4211, 0xc3c8082b, 0x8f6b334a, - 0x3bf68ed2, 0xa843cc1b, 0x8d3c0ff3, 0x20e564a0, - 0xf8f55a4f, 0x2b40f8e7, 0xfea7f15f, 0xcf00fe21, - 0x8a6d37d6, 0xd0d506f1, 0xade00973, 0xefbbde36, - 0x84670fa8, 0xfa31ab9e, 0xaedab618, 0xc01f52f5, - 0x6558eb4f, 0x71b9e343, 0x4b8d77dd, 0x8cb93da6, - 0x740fd52d, 0x425412f8, 0xc5a63360, 0x10e53ad0, - 0x5a700f1c, 0x8324ed0b, 0xe53dc1ec, 0x1a366795, - 0x6d549d15, 0xc5ce46d7, 0xe17abe76, 0x5f48e0a0, - 0xd0f07c02, 0x941249b7, 0xe49ed6ba, 0x37a47f78, - 0xe1cfffbd, 0xb007ca84, 0xbb65f4da, 0xb59f35da, - 0x33d2aa44, 0x417452ac, 0xc0d674a7, 0x2d61a46a, - 0xdc63152a, 0x3e12b7aa, 0x6e615927, 0xa14fb118, - 0xa151758d, 0xba81687b, 0xe152f0b3, 0x764254ed, - 0x34c77271, 0x0a31acab, 0x54f94aec, 0xb9e994cd, - 0x574d9e81, 0x5b623730, 0xce8a21e8, 0x37917f0b, - 0xe8a9b5d6, 0x9697adf8, 0xf3d30431, 0x5dcac921, - 0x76b35d46, 0xaa430a36, 0xc2194022, 0x22bca65e, - 0xdaec70ba, 0xdfaea8cc, 0x777bae8b, 0x242924d5, - 0x1f098a5a, 0x4b396b81, 0x55de2522, 0x435c1cb8, - 0xaeb8fe1d, 0x9db3c697, 0x5b164f83, 0xe0c16376, - 0xa319224c, 0xd0203b35, 0x433ac0fe, 0x1466a19a, - 0x45f0b24f, 0x51fda998, 0xc0d52d71, 0xfa0896a8, - 0xf9e6053f, 0xa4b0d300, 0xd499cbcc, 0xb95e3d40, -}; - - -/* Implementation of SOBER-128 by Tom St Denis. - * Based on s128fast.c reference code supplied by Greg Rose of QUALCOMM. - */ - -const struct _prng_descriptor sober128_desc = -{ - "sober128", 64, - &sober128_start, - &sober128_add_entropy, - &sober128_ready, - &sober128_read, -}; - -const struct _prng_descriptor *prng_descriptor[] = { - &sober128_desc -}; - -/* don't change these... */ -#define N 17 -#define FOLD N /* how many iterations of folding to do */ -#define INITKONST 0x6996c53a /* value of KONST to use during key loading */ -#define KEYP 15 /* where to insert key words */ -#define FOLDP 4 /* where to insert non-linear feedback */ - -#define B(x,i) ((unsigned char)(((x) >> (8*i)) & 0xFF)) - -static ulong32 BYTE2WORD(const unsigned char *b) -{ - ulong32 t; - LOAD32L(t, b); - return t; -} - -#define WORD2BYTE(w, b) STORE32L(b, w) - -static void XORWORD(ulong32 w, unsigned char *b) -{ - ulong32 t; - LOAD32L(t, b); - t ^= w; - STORE32L(t, b); -} - -/* give correct offset for the current position of the register, - * where logically R[0] is at position "zero". - */ -#define OFF(zero, i) (((zero)+(i)) % N) - -/* step the LFSR */ -/* After stepping, "zero" moves right one place */ -#define STEP(R,z) \ - R[OFF(z,0)] = R[OFF(z,15)] ^ R[OFF(z,4)] ^ (R[OFF(z,0)] << 8) ^ Multab[(R[OFF(z,0)] >> 24) & 0xFF]; - -static void cycle(ulong32 *R) -{ - ulong32 t; - int i; - - STEP(R,0); - t = R[0]; - for (i = 1; i < N; ++i) { - R[i-1] = R[i]; - } - R[N-1] = t; -} - -/* Return a non-linear function of some parts of the register. - */ -#define NLFUNC(c,z) \ -{ \ - t = c->R[OFF(z,0)] + c->R[OFF(z,16)]; \ - t ^= Sbox[(t >> 24) & 0xFF]; \ - t = ROR(t, 8); \ - t = ((t + c->R[OFF(z,1)]) ^ c->konst) + c->R[OFF(z,6)]; \ - t ^= Sbox[(t >> 24) & 0xFF]; \ - t = t + c->R[OFF(z,13)]; \ -} - -static ulong32 nltap(struct sober128_prng *c) -{ - ulong32 t; - NLFUNC(c, 0); - return t; -} - -/* initialise to known state - */ -int sober128_start(prng_state *prng) -{ - int i; - struct sober128_prng *c; - - c = &(prng->sober128); - - /* Register initialised to Fibonacci numbers */ - c->R[0] = 1; - c->R[1] = 1; - for (i = 2; i < N; ++i) { - c->R[i] = c->R[i-1] + c->R[i-2]; - } - c->konst = INITKONST; - - /* next add_entropy will be the key */ - c->flag = 1; - c->set = 0; - - return CRYPT_OK; -} - -/* Save the current register state - */ -static void s128_savestate(struct sober128_prng *c) -{ - int i; - for (i = 0; i < N; ++i) { - c->initR[i] = c->R[i]; - } -} - -/* initialise to previously saved register state - */ -static void s128_reloadstate(struct sober128_prng *c) -{ - int i; - - for (i = 0; i < N; ++i) { - c->R[i] = c->initR[i]; - } -} - -/* Initialise "konst" - */ -static void s128_genkonst(struct sober128_prng *c) -{ - ulong32 newkonst; - - do { - cycle(c->R); - newkonst = nltap(c); - } while ((newkonst & 0xFF000000) == 0); - c->konst = newkonst; -} - -/* Load key material into the register - */ -#define ADDKEY(k) \ - c->R[KEYP] += (k); - -#define XORNL(nl) \ - c->R[FOLDP] ^= (nl); - -/* nonlinear diffusion of register for key */ -#define DROUND(z) STEP(c->R,z); NLFUNC(c,(z+1)); c->R[OFF((z+1),FOLDP)] ^= t; -static void s128_diffuse(struct sober128_prng *c) -{ - ulong32 t; - /* relies on FOLD == N == 17! */ - DROUND(0); - DROUND(1); - DROUND(2); - DROUND(3); - DROUND(4); - DROUND(5); - DROUND(6); - DROUND(7); - DROUND(8); - DROUND(9); - DROUND(10); - DROUND(11); - DROUND(12); - DROUND(13); - DROUND(14); - DROUND(15); - DROUND(16); -} - -int sober128_add_entropy(const unsigned char *buf, unsigned long len, prng_state *prng) -{ - struct sober128_prng *c; - ulong32 i, k; - - c = &(prng->sober128); - - if (c->flag == 1) { - /* this is the first call to the add_entropy so this input is the key */ - /* len must be multiple of 4 bytes */ - assert ((len & 3) == 0); - - for (i = 0; i < len; i += 4) { - k = BYTE2WORD(&buf[i]); - ADDKEY(k); - cycle(c->R); - XORNL(nltap(c)); - } - - /* also fold in the length of the key */ - ADDKEY(len); - - /* now diffuse */ - s128_diffuse(c); - - s128_genkonst(c); - s128_savestate(c); - c->nbuf = 0; - c->flag = 0; - c->set = 1; - } else { - /* ok we are adding an IV then... */ - s128_reloadstate(c); - - /* len must be multiple of 4 bytes */ - assert ((len & 3) == 0); - - for (i = 0; i < len; i += 4) { - k = BYTE2WORD(&buf[i]); - ADDKEY(k); - cycle(c->R); - XORNL(nltap(c)); - } - - /* also fold in the length of the key */ - ADDKEY(len); - - /* now diffuse */ - s128_diffuse(c); - c->nbuf = 0; - } - - return CRYPT_OK; -} - -int sober128_ready(prng_state *prng) -{ - return prng->sober128.set == 1 ? CRYPT_OK : CRYPT_ERROR; -} - -/* XOR pseudo-random bytes into buffer - */ -#define SROUND(z) STEP(c->R,z); NLFUNC(c,(z+1)); XORWORD(t, buf+(z*4)); - -unsigned long sober128_read(unsigned char *buf, unsigned long nbytes, prng_state *prng) -{ - struct sober128_prng *c; - ulong32 t, tlen; - - c = &(prng->sober128); - t = 0; - tlen = nbytes; - - /* handle any previously buffered bytes */ - while (c->nbuf != 0 && nbytes != 0) { - *buf++ ^= c->sbuf & 0xFF; - c->sbuf >>= 8; - c->nbuf -= 8; - --nbytes; - } - -#ifndef SMALL_CODE - /* do lots at a time, if there's enough to do */ - while (nbytes >= N*4) { - SROUND(0); - SROUND(1); - SROUND(2); - SROUND(3); - SROUND(4); - SROUND(5); - SROUND(6); - SROUND(7); - SROUND(8); - SROUND(9); - SROUND(10); - SROUND(11); - SROUND(12); - SROUND(13); - SROUND(14); - SROUND(15); - SROUND(16); - buf += 4*N; - nbytes -= 4*N; - } -#endif - - /* do small or odd size buffers the slow way */ - while (4 <= nbytes) { - cycle(c->R); - t = nltap(c); - XORWORD(t, buf); - buf += 4; - nbytes -= 4; - } - - /* handle any trailing bytes */ - if (nbytes != 0) { - cycle(c->R); - c->sbuf = nltap(c); - c->nbuf = 32; - while (c->nbuf != 0 && nbytes != 0) { - *buf++ ^= c->sbuf & 0xFF; - c->sbuf >>= 8; - c->nbuf -= 8; - --nbytes; - } - } - - return tlen; -} - -/* SHA1 code by Tom St Denis */ - -const struct _hash_descriptor sha1_desc = -{ - "sha1", - 2, - 20, - 64, - - /* DER identifier */ - { 0x30, 0x21, 0x30, 0x09, 0x06, 0x05, 0x2B, 0x0E, - 0x03, 0x02, 0x1A, 0x05, 0x00, 0x04, 0x14 }, - 15, - - &sha1_init, - &sha1_process, - &sha1_done, -}; - -#define F0(x,y,z) (z ^ (x & (y ^ z))) -#define F1(x,y,z) (x ^ y ^ z) -#define F2(x,y,z) ((x & y) | (z & (x | y))) -#define F3(x,y,z) (x ^ y ^ z) - -static void sha1_compress(hash_state *md, const unsigned char *buf) -{ - ulong32 a,b,c,d,e,W[80],i; - - /* copy the state into 512-bits into W[0..15] */ - for (i = 0; i < 16; i++) { - LOAD32H(W[i], buf + (4*i)); - } - - /* copy state */ - a = md->sha1.state[0]; - b = md->sha1.state[1]; - c = md->sha1.state[2]; - d = md->sha1.state[3]; - e = md->sha1.state[4]; - - /* expand it */ - for (i = 16; i < 80; i++) { - W[i] = ROL(W[i-3] ^ W[i-8] ^ W[i-14] ^ W[i-16], 1); - } - - /* compress */ - /* round one */ - #define FF0(a,b,c,d,e,i) e = (ROL(a, 5) + F0(b,c,d) + e + W[i] + 0x5a827999UL); b = ROL(b, 30); - #define FF1(a,b,c,d,e,i) e = (ROL(a, 5) + F1(b,c,d) + e + W[i] + 0x6ed9eba1UL); b = ROL(b, 30); - #define FF2(a,b,c,d,e,i) e = (ROL(a, 5) + F2(b,c,d) + e + W[i] + 0x8f1bbcdcUL); b = ROL(b, 30); - #define FF3(a,b,c,d,e,i) e = (ROL(a, 5) + F3(b,c,d) + e + W[i] + 0xca62c1d6UL); b = ROL(b, 30); - - for (i = 0; i < 20; ) { - FF0(a,b,c,d,e,i++); - FF0(e,a,b,c,d,i++); - FF0(d,e,a,b,c,i++); - FF0(c,d,e,a,b,i++); - FF0(b,c,d,e,a,i++); - } - - /* round two */ - for (; i < 40; ) { - FF1(a,b,c,d,e,i++); - FF1(e,a,b,c,d,i++); - FF1(d,e,a,b,c,i++); - FF1(c,d,e,a,b,i++); - FF1(b,c,d,e,a,i++); - } - - /* round three */ - for (; i < 60; ) { - FF2(a,b,c,d,e,i++); - FF2(e,a,b,c,d,i++); - FF2(d,e,a,b,c,i++); - FF2(c,d,e,a,b,i++); - FF2(b,c,d,e,a,i++); - } - - /* round four */ - for (; i < 80; ) { - FF3(a,b,c,d,e,i++); - FF3(e,a,b,c,d,i++); - FF3(d,e,a,b,c,i++); - FF3(c,d,e,a,b,i++); - FF3(b,c,d,e,a,i++); - } - - #undef FF0 - #undef FF1 - #undef FF2 - #undef FF3 - - /* store */ - md->sha1.state[0] = md->sha1.state[0] + a; - md->sha1.state[1] = md->sha1.state[1] + b; - md->sha1.state[2] = md->sha1.state[2] + c; - md->sha1.state[3] = md->sha1.state[3] + d; - md->sha1.state[4] = md->sha1.state[4] + e; -} - -void sha1_init(hash_state * md) -{ - md->sha1.state[0] = 0x67452301UL; - md->sha1.state[1] = 0xefcdab89UL; - md->sha1.state[2] = 0x98badcfeUL; - md->sha1.state[3] = 0x10325476UL; - md->sha1.state[4] = 0xc3d2e1f0UL; - md->sha1.curlen = 0; - md->sha1.length = 0; -} - -HASH_PROCESS(sha1_process, sha1_compress, sha1, 64) - -int sha1_done(hash_state * md, unsigned char *hash) -{ - int i; - - /* - * Assert there isn't an invalid argument - */ - assert (md->sha1.curlen < sizeof (md->sha1.buf)); - - /* increase the length of the message */ - md->sha1.length += md->sha1.curlen * 8; - - /* append the '1' bit */ - md->sha1.buf[md->sha1.curlen++] = (unsigned char)0x80; - - /* if the length is currently above 56 bytes we append zeros - * then compress. Then we can fall back to padding zeros and length - * encoding like normal. - */ - if (md->sha1.curlen > 56) { - while (md->sha1.curlen < 64) { - md->sha1.buf[md->sha1.curlen++] = (unsigned char)0; - } - sha1_compress(md, md->sha1.buf); - md->sha1.curlen = 0; - } - - /* pad upto 56 bytes of zeroes */ - while (md->sha1.curlen < 56) { - md->sha1.buf[md->sha1.curlen++] = (unsigned char)0; - } - - /* store length */ - STORE64H(md->sha1.length, md->sha1.buf+56); - sha1_compress(md, md->sha1.buf); - - /* copy output */ - for (i = 0; i < 5; i++) { - STORE32H(md->sha1.state[i], hash+(4*i)); - } - return CRYPT_OK; -} - -/* Submited by Dobes Vandermeer (dobes@smartt.com) */ - -/* - (1) append zeros to the end of K to create a B byte string - (e.g., if K is of length 20 bytes and B=64, then K will be - appended with 44 zero bytes 0x00) - (2) XOR (bitwise exclusive-OR) the B byte string computed in step - (1) with ipad (ipad = the byte 0x36 repeated B times) - (3) append the stream of data 'text' to the B byte string resulting - from step (2) - (4) apply H to the stream generated in step (3) - (5) XOR (bitwise exclusive-OR) the B byte string computed in - step (1) with opad (opad = the byte 0x5C repeated B times.) - (6) append the H result from step (4) to the B byte string - resulting from step (5) - (7) apply H to the stream generated in step (6) and output - the result -*/ - -int hmac_init(hmac_state *hmac, int hash, const unsigned char *key, unsigned long keylen) -{ - unsigned char buf[128]; - unsigned long i; - int err; - - hmac->hash = hash; - - /* valid key length? */ - assert (keylen > 0); - assert (keylen <= hash_descriptor[hash]->blocksize); - - memcpy(hmac->key, key, (size_t)keylen); - if(keylen < hash_descriptor[hash]->blocksize) { - memset((hmac->key) + keylen, 0, (size_t)(hash_descriptor[hash]->blocksize - keylen)); - } - - // Create the initial vector for step (3) - for(i=0; i < hash_descriptor[hash]->blocksize; i++) { - buf[i] = hmac->key[i] ^ 0x36; - } - - // Pre-pend that to the hash data - hash_descriptor[hash]->init(&hmac->md); - err = hash_descriptor[hash]->process(&hmac->md, buf, hash_descriptor[hash]->blocksize); - - return err; -} - -int hmac_process(hmac_state *hmac, const unsigned char *buf, unsigned long len) -{ - return hash_descriptor[hmac->hash]->process(&hmac->md, buf, len); -} - -/* Submited by Dobes Vandermeer (dobes@smartt.com) */ - -/* - (1) append zeros to the end of K to create a B byte string - (e.g., if K is of length 20 bytes and B=64, then K will be - appended with 44 zero bytes 0x00) - (2) XOR (bitwise exclusive-OR) the B byte string computed in step - (1) with ipad (ipad = the byte 0x36 repeated B times) - (3) append the stream of data 'text' to the B byte string resulting - from step (2) - (4) apply H to the stream generated in step (3) - (5) XOR (bitwise exclusive-OR) the B byte string computed in - step (1) with opad (opad = the byte 0x5C repeated B times.) - (6) append the H result from step (4) to the B byte string - resulting from step (5) - (7) apply H to the stream generated in step (6) and output - the result -*/ - -int hmac_done(hmac_state *hmac, unsigned char *hashOut, unsigned long *outlen) -{ - unsigned char buf[128]; - unsigned char isha[256]; - unsigned long hashsize, i; - int hash, err; - - /* test hash */ - hash = hmac->hash; - - /* get the hash message digest size */ - hashsize = hash_descriptor[hash]->hashsize; - - // Get the hash of the first HMAC vector plus the data - if ((err = hash_descriptor[hash]->done(&hmac->md, isha)) != CRYPT_OK) { - goto __ERR; - } - - // Create the second HMAC vector vector for step (3) - for(i=0; i < hash_descriptor[hash]->blocksize; i++) { - buf[i] = hmac->key[i] ^ 0x5C; - } - - // Now calculate the "outer" hash for step (5), (6), and (7) - hash_descriptor[hash]->init(&hmac->md); - if ((err = hash_descriptor[hash]->process(&hmac->md, buf, hash_descriptor[hash]->blocksize)) != CRYPT_OK) { - goto __ERR; - } - if ((err = hash_descriptor[hash]->process(&hmac->md, isha, hashsize)) != CRYPT_OK) { - goto __ERR; - } - if ((err = hash_descriptor[hash]->done(&hmac->md, buf)) != CRYPT_OK) { - goto __ERR; - } - - // copy to output - for (i = 0; i < hashsize && i < *outlen; i++) { - hashOut[i] = buf[i]; - } - *outlen = i; - - err = CRYPT_OK; -__ERR: - - return err; -} - -const struct _hash_descriptor *hash_descriptor[] = -{ - &sha1_desc -}; - -/* portable way to get secure random bits to feed a PRNG */ -/* on *NIX read /dev/random */ -static unsigned long rng_nix(unsigned char *buf, unsigned long len, - void (*callback)(void)) -{ - int fd; - unsigned long rb = 0; - - fd = open ("/dev/urandom", O_RDONLY); - - if (fd >= 0) { - rb = (unsigned long)read (fd, buf, len); - close (fd); - } - - return (rb); -} - -/* on ANSI C platforms with 100 < CLOCKS_PER_SEC < 10000 */ -#if defined(XCLOCKS_PER_SEC) - -#define ANSI_RNG - -static unsigned long rng_ansic(unsigned char *buf, unsigned long len, - void (*callback)(void)) -{ - clock_t t1; - int l, acc, bits, a, b; - - if (XCLOCKS_PER_SEC < 100 || XCLOCKS_PER_SEC > 10000) { - return 0; - } - - l = len; - bits = 8; - acc = a = b = 0; - while (len--) { - if (callback != NULL) callback(); - while (bits--) { - do { - t1 = XCLOCK(); while (t1 == XCLOCK()) a ^= 1; - t1 = XCLOCK(); while (t1 == XCLOCK()) b ^= 1; - } while (a == b); - acc = (acc << 1) | a; - } - *buf++ = acc; - acc = 0; - bits = 8; - } - acc = bits = a = b = 0; - return l; -} - -#endif - -unsigned long rng_get_bytes(unsigned char *buf, unsigned long len, - void (*callback)(void)) -{ - unsigned long x; - - x = rng_nix(buf, len, callback); if (x != 0) { return x; } -#ifdef ANSI_RNG - x = rng_ansic(buf, len, callback); if (x != 0) { return x; } -#endif - return 0; -} - -int rng_make_prng(int bits, int wprng, prng_state *prng, - void (*callback)(void)) -{ - unsigned char buf[258]; - int err; - - if (bits < 64 || bits > 1024) { - return CRYPT_INVALID_PRNGSIZE; - } - - if ((err = prng_descriptor[wprng]->start(prng)) != CRYPT_OK) { - return err; - } - - bits = ((bits/8)+((bits&7)!=0?1:0)) * 2; - if (rng_get_bytes(buf, (unsigned long)bits, callback) != (unsigned long)bits) { - return CRYPT_ERROR_READPRNG; - } - - if ((err = prng_descriptor[wprng]->add_entropy(buf, (unsigned long)bits, prng)) != CRYPT_OK) { - return err; - } - - if ((err = prng_descriptor[wprng]->ready(prng)) != CRYPT_OK) { - return err; - } - - return CRYPT_OK; -} diff --git a/exec/crypto.h b/exec/crypto.h deleted file mode 100644 index a154ff76..00000000 --- a/exec/crypto.h +++ /dev/null @@ -1,134 +0,0 @@ -#ifndef CRYPTO_H_DEFINED -#define CRYPTO_H_DEFINED - -#include - -#define DIGEST_SHA1 0 -#define PRNG_SOBER 0 - - -enum { - CRYPT_OK=0, /* Result OK */ - CRYPT_ERROR, /* Generic Error */ - CRYPT_NOP, /* Not a failure but no operation was performed */ - - CRYPT_INVALID_KEYSIZE, /* Invalid key size given */ - CRYPT_INVALID_ROUNDS, /* Invalid number of rounds */ - CRYPT_FAIL_TESTVECTOR, /* Algorithm failed test vectors */ - - CRYPT_BUFFER_OVERFLOW, /* Not enough space for output */ - CRYPT_INVALID_PACKET, /* Invalid input packet given */ - - CRYPT_INVALID_PRNGSIZE, /* Invalid number of bits for a PRNG */ - CRYPT_ERROR_READPRNG, /* Could not read enough from PRNG */ - - CRYPT_INVALID_CIPHER, /* Invalid cipher specified */ - CRYPT_INVALID_HASH, /* Invalid hash specified */ - CRYPT_INVALID_PRNG, /* Invalid PRNG specified */ - - CRYPT_MEM, /* Out of memory */ - CRYPT_PK_TYPE_MISMATCH, /* Not equivalent types of PK keys */ - CRYPT_PK_NOT_PRIVATE, /* Requires a private PK key */ - - CRYPT_INVALID_ARG, /* Generic invalid argument */ - CRYPT_FILE_NOTFOUND, /* File Not Found */ - - CRYPT_PK_INVALID_TYPE, /* Invalid type of PK key */ - CRYPT_PK_INVALID_SYSTEM,/* Invalid PK system specified */ - CRYPT_PK_DUP, /* Duplicate key already in key ring */ - CRYPT_PK_NOT_FOUND, /* Key not found in keyring */ - CRYPT_PK_INVALID_SIZE, /* Invalid size input for PK parameters */ - - CRYPT_INVALID_PRIME_SIZE/* Invalid size of prime requested */ -}; - -struct sha1_state { - unsigned long long length; - unsigned long state[5], curlen; - unsigned char buf[64]; -}; -typedef union Hash_state { - struct sha1_state sha1; -} hash_state; - -struct _hash_descriptor { - const char *name; - unsigned char ID; - unsigned long hashsize; /* digest output size in bytes */ - unsigned long blocksize; /* the block size the hash uses */ - unsigned char DER[64]; /* DER encoded identifier */ - unsigned long DERlen; /* length of DER encoding */ - void (*init)(hash_state *); - int (*process)(hash_state *, const unsigned char *, unsigned long); - int (*done)(hash_state *, unsigned char *); - int (*test)(void); -}; - -extern const struct _hash_descriptor *hash_descriptor[]; - -void sha1_init(hash_state * md); -int sha1_process(hash_state * md, const unsigned char *buf, unsigned long len); -int sha1_done(hash_state * md, unsigned char *hash); -int sha1_test(void); - -int hash_memory(int hash, const unsigned char *data, unsigned long len, unsigned char *dst, unsigned long *outlen); - -#define MAXBLOCKSIZE 128 -typedef struct Hmac_state { - hash_state md; - int hash; - hash_state hashstate; - unsigned char key[MAXBLOCKSIZE]; -} hmac_state; - -int hmac_init(hmac_state *hmac, int hash, const unsigned char *key, unsigned long keylen); -int hmac_process(hmac_state *hmac, const unsigned char *buf, unsigned long len); -int hmac_done(hmac_state *hmac, unsigned char *hashOut, unsigned long *outlen); -int hmac_test(void); -int hmac_memory(int hash, const unsigned char *key, unsigned long keylen, - const unsigned char *data, unsigned long len, - unsigned char *dst, unsigned long *dstlen); - -struct sober128_prng { - uint32_t R[17], /* Working storage for the shift register */ - initR[17], /* saved register contents */ - konst, /* key dependent constant */ - sbuf; /* partial word encryption buffer */ - - int nbuf, /* number of part-word stream bits buffered */ - flag, /* first add_entropy call or not? */ - set; /* did we call add_entropy to set key? */ - -}; - -typedef union Prng_state { - struct sober128_prng sober128; -} prng_state; - -struct _prng_descriptor { - const char *name; - int export_size; /* size in bytes of exported state */ - int (*start)(prng_state *); - int (*add_entropy)(const unsigned char *, unsigned long, prng_state *); - int (*ready)(prng_state *); - unsigned long (*read)(unsigned char *, unsigned long, prng_state *); -}; - -extern const struct _prng_descriptor *prng_descriptor[]; - -int sober128_start(prng_state *prng); -int sober128_add_entropy(const unsigned char *buf, unsigned long len, prng_state *prng); -int sober128_ready(prng_state *prng); -unsigned long sober128_read(unsigned char *buf, unsigned long len, prng_state *prng); -int sober128_done(prng_state *prng); -int sober128_export(unsigned char *out, unsigned long *outlen, prng_state *prng); -int sober128_import(const unsigned char *in, unsigned long inlen, prng_state *prng); -int sober128_test(void); - -unsigned long rng_get_bytes(unsigned char *buf, - unsigned long len, - void (*callback)(void)); - -int rng_make_prng(int bits, int wprng, prng_state *prng, void (*callback)(void)); - -#endif /* CRYPTO_H_DEFINED */ diff --git a/exec/totemconfig.c b/exec/totemconfig.c index e53163f0..dcc9b66e 100644 --- a/exec/totemconfig.c +++ b/exec/totemconfig.c @@ -1,1085 +1,1074 @@ /* * Copyright (c) 2002-2005 MontaVista Software, Inc. * Copyright (c) 2006-2012 Red Hat, Inc. * * All rights reserved. * * Author: Steven Dake (sdake@redhat.com) * Jan Friesse (jfriesse@redhat.com) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#ifdef HAVE_LIBNSS #include #include #include #include -#endif #include "util.h" #include "totemconfig.h" #define TOKEN_RETRANSMITS_BEFORE_LOSS_CONST 4 #define TOKEN_TIMEOUT 1000 #define TOKEN_RETRANSMIT_TIMEOUT (int)(TOKEN_TIMEOUT / (TOKEN_RETRANSMITS_BEFORE_LOSS_CONST + 0.2)) #define TOKEN_HOLD_TIMEOUT (int)(TOKEN_RETRANSMIT_TIMEOUT * 0.8 - (1000/(int)HZ)) #define JOIN_TIMEOUT 50 #define MERGE_TIMEOUT 200 #define DOWNCHECK_TIMEOUT 1000 #define FAIL_TO_RECV_CONST 2500 #define SEQNO_UNCHANGED_CONST 30 #define MINIMUM_TIMEOUT (int)(1000/HZ)*3 #define MAX_NETWORK_DELAY 50 #define WINDOW_SIZE 50 #define MAX_MESSAGES 17 #define MISS_COUNT_CONST 5 #define RRP_PROBLEM_COUNT_TIMEOUT 2000 #define RRP_PROBLEM_COUNT_THRESHOLD_DEFAULT 10 #define RRP_PROBLEM_COUNT_THRESHOLD_MIN 2 #define RRP_AUTORECOVERY_CHECK_TIMEOUT 1000 #define DEFAULT_PORT 5405 static char error_string_response[512]; static void add_totem_config_notification(struct totem_config *totem_config); static void totem_volatile_config_read (struct totem_config *totem_config) { char *str; icmap_get_uint32("totem.token", &totem_config->token_timeout); icmap_get_uint32("totem.token_retransmit", &totem_config->token_retransmit_timeout); icmap_get_uint32("totem.hold", &totem_config->token_hold_timeout); icmap_get_uint32("totem.token_retransmits_before_loss_const", &totem_config->token_retransmits_before_loss_const); icmap_get_uint32("totem.join", &totem_config->join_timeout); icmap_get_uint32("totem.send_join", &totem_config->send_join_timeout); icmap_get_uint32("totem.consensus", &totem_config->consensus_timeout); icmap_get_uint32("totem.merge", &totem_config->merge_timeout); icmap_get_uint32("totem.downcheck", &totem_config->downcheck_timeout); icmap_get_uint32("totem.fail_recv_const", &totem_config->fail_to_recv_const); icmap_get_uint32("totem.seqno_unchanged_const", &totem_config->seqno_unchanged_const); icmap_get_uint32("totem.rrp_token_expired_timeout", &totem_config->rrp_token_expired_timeout); icmap_get_uint32("totem.rrp_problem_count_timeout", &totem_config->rrp_problem_count_timeout); icmap_get_uint32("totem.rrp_problem_count_threshold", &totem_config->rrp_problem_count_threshold); icmap_get_uint32("totem.rrp_problem_count_mcast_threshold", &totem_config->rrp_problem_count_mcast_threshold); icmap_get_uint32("totem.rrp_autorecovery_check_timeout", &totem_config->rrp_autorecovery_check_timeout); icmap_get_uint32("totem.heartbeat_failures_allowed", &totem_config->heartbeat_failures_allowed); icmap_get_uint32("totem.max_network_delay", &totem_config->max_network_delay); icmap_get_uint32("totem.window_size", &totem_config->window_size); icmap_get_uint32("totem.max_messages", &totem_config->max_messages); icmap_get_uint32("totem.miss_count_const", &totem_config->miss_count_const); if (icmap_get_string("totem.vsftype", &str) == CS_OK) { totem_config->vsf_type = str; } } static void totem_get_crypto_type(struct totem_config *totem_config) { char *str; - totem_config->crypto_type = TOTEM_CRYPTO_SOBER; - -#ifdef HAVE_LIBNSS - /* - * We must set these even if the key does not exist. - * Encryption type can be set on-the-fly using CFG - */ + /* + * We must set these even if the key does not exist. + * Encryption type can be set on-the-fly using CFG + */ totem_config->crypto_crypt_type = CKM_AES_CBC_PAD; totem_config->crypto_sign_type = CKM_SHA256_RSA_PKCS; -#endif if (icmap_get_string("totem.crypto_type", &str) == CS_OK) { - if (strcmp(str, "sober") == 0) { - totem_config->crypto_type = TOTEM_CRYPTO_SOBER; - } -#ifdef HAVE_LIBNSS - if (strcmp(str, "nss") == 0) { - totem_config->crypto_type = TOTEM_CRYPTO_NSS; + if (strcmp(str, "nss") == 0 || strcmp(str, "aes256") == 0) { + totem_config->crypto_type = TOTEM_CRYPTO_AES256; } -#endif free(str); } } static uint16_t generate_cluster_id (const char *cluster_name) { int i; int value = 0; for (i = 0; i < strlen(cluster_name); i++) { value <<= 1; value += cluster_name[i]; } return (value & 0xFFFF); } static int get_cluster_mcast_addr ( const char *cluster_name, const struct totem_ip_address *bindnet, unsigned int ringnumber, struct totem_ip_address *res) { uint16_t clusterid; char addr[INET6_ADDRSTRLEN + 1]; int err; if (cluster_name == NULL) { return (-1); } clusterid = generate_cluster_id(cluster_name) + ringnumber; memset (res, 0, sizeof(res)); switch (bindnet->family) { case AF_INET: snprintf(addr, sizeof(addr), "239.192.%d.%d", clusterid >> 8, clusterid % 0xFF); break; case AF_INET6: snprintf(addr, sizeof(addr), "ff15::%x", clusterid); break; default: /* * Unknown family */ return (-1); } err = totemip_parse (res, addr, 0); return (err); } static int find_local_node_in_nodelist(struct totem_config *totem_config) { icmap_iter_t iter; const char *iter_key; int res = 0; int node_pos; int local_node_pos = -1; struct totem_ip_address bind_addr; int interface_up, interface_num; char tmp_key[ICMAP_KEYNAME_MAXLEN]; char *node_addr_str; struct totem_ip_address node_addr; res = totemip_iface_check(&totem_config->interfaces[0].bindnet, &bind_addr, &interface_up, &interface_num, totem_config->clear_node_high_bit); if (res == -1) { return (-1); } iter = icmap_iter_init("nodelist.node."); while ((iter_key = icmap_iter_next(iter, NULL, NULL)) != NULL) { res = sscanf(iter_key, "nodelist.node.%u.%s", &node_pos, tmp_key); if (res != 2) { continue; } if (strcmp(tmp_key, "ring0_addr") != 0) { continue; } snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.ring0_addr", node_pos); if (icmap_get_string(tmp_key, &node_addr_str) != CS_OK) { continue; } res = totemip_parse (&node_addr, node_addr_str, 0); free(node_addr_str); if (res == -1) { continue ; } if (totemip_equal(&bind_addr, &node_addr)) { local_node_pos = node_pos; } } icmap_iter_finalize(iter); return (local_node_pos); } static void put_nodelist_members_to_config(struct totem_config *totem_config) { icmap_iter_t iter, iter2; const char *iter_key, *iter_key2; int res = 0; int node_pos; char tmp_key[ICMAP_KEYNAME_MAXLEN]; char tmp_key2[ICMAP_KEYNAME_MAXLEN]; char *node_addr_str; int member_count; unsigned int ringnumber = 0; iter = icmap_iter_init("nodelist.node."); while ((iter_key = icmap_iter_next(iter, NULL, NULL)) != NULL) { res = sscanf(iter_key, "nodelist.node.%u.%s", &node_pos, tmp_key); if (res != 2) { continue; } if (strcmp(tmp_key, "ring0_addr") != 0) { continue; } snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.", node_pos); iter2 = icmap_iter_init(tmp_key); while ((iter_key2 = icmap_iter_next(iter2, NULL, NULL)) != NULL) { res = sscanf(iter_key2, "nodelist.node.%u.ring%u%s", &node_pos, &ringnumber, tmp_key2); if (res != 3 || strcmp(tmp_key2, "_addr") != 0) { continue; } if (icmap_get_string(iter_key2, &node_addr_str) != CS_OK) { continue; } member_count = totem_config->interfaces[ringnumber].member_count; res = totemip_parse(&totem_config->interfaces[ringnumber].member_list[member_count], node_addr_str, 0); if (res != -1) { totem_config->interfaces[ringnumber].member_count++; } free(node_addr_str); } icmap_iter_finalize(iter2); } icmap_iter_finalize(iter); } static void config_convert_nodelist_to_interface(struct totem_config *totem_config) { icmap_iter_t iter; const char *iter_key; int res = 0; int node_pos; char tmp_key[ICMAP_KEYNAME_MAXLEN]; char tmp_key2[ICMAP_KEYNAME_MAXLEN]; char *node_addr_str; unsigned int ringnumber = 0; struct list_head addrs; struct list_head *list; struct totem_ip_if_address *if_addr; struct totem_ip_address node_addr; int node_found; if (totemip_getifaddrs(&addrs) == -1) { return ; } iter = icmap_iter_init("nodelist.node."); while ((iter_key = icmap_iter_next(iter, NULL, NULL)) != NULL) { res = sscanf(iter_key, "nodelist.node.%u.%s", &node_pos, tmp_key); if (res != 2) { continue; } if (strcmp(tmp_key, "ring0_addr") != 0) { continue; } if (icmap_get_string(iter_key, &node_addr_str) != CS_OK) { continue ; } if (totemip_parse(&node_addr, node_addr_str, 0) == -1) { free(node_addr_str); continue ; } free(node_addr_str); /* * Try to find node in if_addrs */ node_found = 0; for (list = addrs.next; list != &addrs; list = list->next) { if_addr = list_entry(list, struct totem_ip_if_address, list); if (totemip_equal(&node_addr, &if_addr->ip_addr)) { node_found = 1; break; } } if (node_found) { break ; } } icmap_iter_finalize(iter); if (node_found) { /* * We found node, so create interface section */ snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.", node_pos); iter = icmap_iter_init(tmp_key); while ((iter_key = icmap_iter_next(iter, NULL, NULL)) != NULL) { res = sscanf(iter_key, "nodelist.node.%u.ring%u%s", &node_pos, &ringnumber, tmp_key2); if (res != 3 || strcmp(tmp_key2, "_addr") != 0) { continue ; } if (icmap_get_string(iter_key, &node_addr_str) != CS_OK) { continue; } snprintf(tmp_key2, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.bindnetaddr", ringnumber); icmap_set_string(tmp_key2, node_addr_str); free(node_addr_str); } icmap_iter_finalize(iter); } } extern int totem_config_read ( struct totem_config *totem_config, const char **error_string, uint64_t *warnings) { int res = 0; char *str; unsigned int ringnumber = 0; int member_count = 0; icmap_iter_t iter, member_iter; const char *iter_key; const char *member_iter_key; char ringnumber_key[ICMAP_KEYNAME_MAXLEN]; char tmp_key[ICMAP_KEYNAME_MAXLEN]; uint8_t u8; uint16_t u16; char *cluster_name = NULL; int i; int local_node_pos; int nodeid_set; *warnings = 0; memset (totem_config, 0, sizeof (struct totem_config)); totem_config->interfaces = malloc (sizeof (struct totem_interface) * INTERFACE_MAX); if (totem_config->interfaces == 0) { *error_string = "Out of memory trying to allocate ethernet interface storage area"; return -1; } memset (totem_config->interfaces, 0, sizeof (struct totem_interface) * INTERFACE_MAX); totem_config->secauth = 1; strcpy (totem_config->rrp_mode, "none"); icmap_get_uint32("totem.version", (uint32_t *)&totem_config->version); if (icmap_get_string("totem.secauth", &str) == CS_OK) { if (strcmp (str, "on") == 0) { totem_config->secauth = 1; } if (strcmp (str, "off") == 0) { totem_config->secauth = 0; } free(str); } if (totem_config->secauth == 1) { totem_get_crypto_type(totem_config); } if (icmap_get_string("totem.rrp_mode", &str) == CS_OK) { strcpy (totem_config->rrp_mode, str); free(str); } icmap_get_uint32("totem.nodeid", &totem_config->node_id); totem_config->clear_node_high_bit = 0; if (icmap_get_string("totem.clear_node_high_bit", &str) == CS_OK) { if (strcmp (str, "yes") == 0) { totem_config->clear_node_high_bit = 1; } free(str); } icmap_get_uint32("totem.threads", &totem_config->threads); icmap_get_uint32("totem.netmtu", &totem_config->net_mtu); icmap_get_string("totem.cluster_name", &cluster_name); /* * Get things that might change in the future */ totem_volatile_config_read(totem_config); if (icmap_get_string("totem.interface.0.bindnetaddr", &str) != CS_OK) { /* * We were not able to find ring 0 bindnet addr. Try to use nodelist informations */ config_convert_nodelist_to_interface(totem_config); } else { free(str); } iter = icmap_iter_init("totem.interface."); while ((iter_key = icmap_iter_next(iter, NULL, NULL)) != NULL) { res = sscanf(iter_key, "totem.interface.%[^.].%s", ringnumber_key, tmp_key); if (res != 2) { continue; } if (strcmp(tmp_key, "bindnetaddr") != 0) { continue; } member_count = 0; ringnumber = atoi(ringnumber_key); /* * Get the bind net address */ if (icmap_get_string(iter_key, &str) == CS_OK) { res = totemip_parse (&totem_config->interfaces[ringnumber].bindnet, str, totem_config->interfaces[ringnumber].mcast_addr.family); free(str); } /* * Get interface multicast address */ snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.mcastaddr", ringnumber); if (icmap_get_string(tmp_key, &str) == CS_OK) { res = totemip_parse (&totem_config->interfaces[ringnumber].mcast_addr, str, 0); free(str); } else { /* * User not specified address -> autogenerate one from cluster_name key * (if available) */ res = get_cluster_mcast_addr (cluster_name, &totem_config->interfaces[ringnumber].bindnet, ringnumber, &totem_config->interfaces[ringnumber].mcast_addr); } totem_config->broadcast_use = 0; snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.broadcast", ringnumber); if (icmap_get_string(tmp_key, &str) == CS_OK) { if (strcmp (str, "yes") == 0) { totem_config->broadcast_use = 1; totemip_parse ( &totem_config->interfaces[ringnumber].mcast_addr, "255.255.255.255", 0); } free(str); } /* * Get mcast port */ snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.mcastport", ringnumber); if (icmap_get_uint16(tmp_key, &totem_config->interfaces[ringnumber].ip_port) != CS_OK) { if (totem_config->broadcast_use) { totem_config->interfaces[ringnumber].ip_port = DEFAULT_PORT + (2 * ringnumber); } else { totem_config->interfaces[ringnumber].ip_port = DEFAULT_PORT; } } /* * Get the TTL */ totem_config->interfaces[ringnumber].ttl = 1; snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.ttl", ringnumber); if (icmap_get_uint8(tmp_key, &u8) == CS_OK) { totem_config->interfaces[ringnumber].ttl = u8; } snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.member.", ringnumber); member_iter = icmap_iter_init(tmp_key); while ((member_iter_key = icmap_iter_next(member_iter, NULL, NULL)) != NULL) { if (member_count == 0) { if (icmap_get_string("nodelist.node.0.ring0_addr", &str) == CS_OK) { free(str); *warnings |= TOTEM_CONFIG_WARNING_MEMBERS_IGNORED; break; } else { *warnings |= TOTEM_CONFIG_WARNING_MEMBERS_DEPRECATED; } } if (icmap_get_string(member_iter_key, &str) == CS_OK) { res = totemip_parse (&totem_config->interfaces[ringnumber].member_list[member_count++], str, 0); } } icmap_iter_finalize(member_iter); totem_config->interfaces[ringnumber].member_count = member_count; totem_config->interface_count++; } icmap_iter_finalize(iter); /* * Store automatically generated items back to icmap */ for (i = 0; i < totem_config->interface_count; i++) { snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.mcastaddr", i); if (icmap_get_string(tmp_key, &str) == CS_OK) { free(str); } else { str = (char *)totemip_print(&totem_config->interfaces[i].mcast_addr); icmap_set_string(tmp_key, str); } snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.mcastport", i); if (icmap_get_uint16(tmp_key, &u16) != CS_OK) { icmap_set_uint16(tmp_key, totem_config->interfaces[i].ip_port); } } totem_config->transport_number = TOTEM_TRANSPORT_UDP; if (icmap_get_string("totem.transport", &str) == CS_OK) { if (strcmp (str, "udpu") == 0) { totem_config->transport_number = TOTEM_TRANSPORT_UDPU; } if (strcmp (str, "iba") == 0) { totem_config->transport_number = TOTEM_TRANSPORT_RDMA; } free(str); } free(cluster_name); /* * Check existence of nodelist */ if (icmap_get_string("nodelist.node.0.ring0_addr", &str) == CS_OK) { free(str); /* * find local node */ local_node_pos = find_local_node_in_nodelist(totem_config); if (local_node_pos != -1) { icmap_set_uint32("nodelist.local_node_pos", local_node_pos); snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.nodeid", local_node_pos); nodeid_set = (totem_config->node_id != 0); if (icmap_get_uint32(tmp_key, &totem_config->node_id) == CS_OK && nodeid_set) { *warnings |= TOTEM_CONFIG_WARNING_TOTEM_NODEID_IGNORED; } /* * Make localnode ring0_addr read only, so we can be sure that local * node never changes. If rebinding to other IP would be in future * supported, this must be changed and handled properly! */ snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.ring0_addr", local_node_pos); icmap_set_ro_access(tmp_key, 0, 1); icmap_set_ro_access("nodelist.local_node_pos", 0, 1); } put_nodelist_members_to_config(totem_config); } add_totem_config_notification(totem_config); return 0; } int totem_config_validate ( struct totem_config *totem_config, const char **error_string) { static char local_error_reason[512]; char parse_error[512]; const char *error_reason = local_error_reason; int i; unsigned int interface_max = INTERFACE_MAX; if (totem_config->interface_count == 0) { error_reason = "No interfaces defined"; goto parse_error; } for (i = 0; i < totem_config->interface_count; i++) { /* * Some error checking of parsed data to make sure its valid */ struct totem_ip_address null_addr; memset (&null_addr, 0, sizeof (struct totem_ip_address)); if ((totem_config->transport_number == 0) && memcmp (&totem_config->interfaces[i].mcast_addr, &null_addr, sizeof (struct totem_ip_address)) == 0) { error_reason = "No multicast address specified"; goto parse_error; } if (totem_config->interfaces[i].ip_port == 0) { error_reason = "No multicast port specified"; goto parse_error; } if (totem_config->interfaces[i].ttl > 255) { error_reason = "Invalid TTL (should be 0..255)"; goto parse_error; } if (totem_config->transport_number != TOTEM_TRANSPORT_UDP && totem_config->interfaces[i].ttl != 1) { error_reason = "Can only set ttl on multicast transport types"; goto parse_error; } if (totem_config->interfaces[i].mcast_addr.family == AF_INET6 && totem_config->node_id == 0) { error_reason = "An IPV6 network requires that a node ID be specified."; goto parse_error; } if (totem_config->broadcast_use == 0 && totem_config->transport_number == 0) { if (totem_config->interfaces[i].mcast_addr.family != totem_config->interfaces[i].bindnet.family) { error_reason = "Multicast address family does not match bind address family"; goto parse_error; } if (totem_config->interfaces[i].mcast_addr.family != totem_config->interfaces[i].bindnet.family) { error_reason = "Not all bind address belong to the same IP family"; goto parse_error; } if (totemip_is_mcast (&totem_config->interfaces[i].mcast_addr) != 0) { error_reason = "mcastaddr is not a correct multicast address."; goto parse_error; } } } if (totem_config->version != 2) { error_reason = "This totem parser can only parse version 2 configurations."; goto parse_error; } if (totem_config->token_retransmits_before_loss_const == 0) { totem_config->token_retransmits_before_loss_const = TOKEN_RETRANSMITS_BEFORE_LOSS_CONST; } /* * Setup timeout values that are not setup by user */ if (totem_config->token_timeout == 0) { totem_config->token_timeout = TOKEN_TIMEOUT; if (totem_config->token_retransmits_before_loss_const == 0) { totem_config->token_retransmits_before_loss_const = TOKEN_RETRANSMITS_BEFORE_LOSS_CONST; } if (totem_config->token_retransmit_timeout == 0) { totem_config->token_retransmit_timeout = (int)(totem_config->token_timeout / (totem_config->token_retransmits_before_loss_const + 0.2)); } if (totem_config->token_hold_timeout == 0) { totem_config->token_hold_timeout = (int)(totem_config->token_retransmit_timeout * 0.8 - (1000/HZ)); } } if (totem_config->max_network_delay == 0) { totem_config->max_network_delay = MAX_NETWORK_DELAY; } if (totem_config->max_network_delay < MINIMUM_TIMEOUT) { snprintf (local_error_reason, sizeof(local_error_reason), "The max_network_delay parameter (%d ms) may not be less then (%d ms).", totem_config->max_network_delay, MINIMUM_TIMEOUT); goto parse_error; } if (totem_config->window_size == 0) { totem_config->window_size = WINDOW_SIZE; } if (totem_config->max_messages == 0) { totem_config->max_messages = MAX_MESSAGES; } if (totem_config->miss_count_const == 0) { totem_config->miss_count_const = MISS_COUNT_CONST; } if (totem_config->token_timeout < MINIMUM_TIMEOUT) { snprintf (local_error_reason, sizeof(local_error_reason), "The token timeout parameter (%d ms) may not be less then (%d ms).", totem_config->token_timeout, MINIMUM_TIMEOUT); goto parse_error; } if (totem_config->token_retransmit_timeout == 0) { totem_config->token_retransmit_timeout = (int)(totem_config->token_timeout / (totem_config->token_retransmits_before_loss_const + 0.2)); } if (totem_config->token_hold_timeout == 0) { totem_config->token_hold_timeout = (int)(totem_config->token_retransmit_timeout * 0.8 - (1000/HZ)); } if (totem_config->token_retransmit_timeout < MINIMUM_TIMEOUT) { snprintf (local_error_reason, sizeof(local_error_reason), "The token retransmit timeout parameter (%d ms) may not be less then (%d ms).", totem_config->token_retransmit_timeout, MINIMUM_TIMEOUT); goto parse_error; } if (totem_config->token_hold_timeout == 0) { totem_config->token_hold_timeout = TOKEN_HOLD_TIMEOUT; } if (totem_config->token_hold_timeout < MINIMUM_TIMEOUT) { snprintf (local_error_reason, sizeof(local_error_reason), "The token hold timeout parameter (%d ms) may not be less then (%d ms).", totem_config->token_hold_timeout, MINIMUM_TIMEOUT); goto parse_error; } if (totem_config->join_timeout == 0) { totem_config->join_timeout = JOIN_TIMEOUT; } if (totem_config->join_timeout < MINIMUM_TIMEOUT) { snprintf (local_error_reason, sizeof(local_error_reason), "The join timeout parameter (%d ms) may not be less then (%d ms).", totem_config->join_timeout, MINIMUM_TIMEOUT); goto parse_error; } if (totem_config->consensus_timeout == 0) { totem_config->consensus_timeout = (int)(float)(1.2 * totem_config->token_timeout); } if (totem_config->consensus_timeout < MINIMUM_TIMEOUT) { snprintf (local_error_reason, sizeof(local_error_reason), "The consensus timeout parameter (%d ms) may not be less then (%d ms).", totem_config->consensus_timeout, MINIMUM_TIMEOUT); goto parse_error; } if (totem_config->merge_timeout == 0) { totem_config->merge_timeout = MERGE_TIMEOUT; } if (totem_config->merge_timeout < MINIMUM_TIMEOUT) { snprintf (local_error_reason, sizeof(local_error_reason), "The merge timeout parameter (%d ms) may not be less then (%d ms).", totem_config->merge_timeout, MINIMUM_TIMEOUT); goto parse_error; } if (totem_config->downcheck_timeout == 0) { totem_config->downcheck_timeout = DOWNCHECK_TIMEOUT; } if (totem_config->downcheck_timeout < MINIMUM_TIMEOUT) { snprintf (local_error_reason, sizeof(local_error_reason), "The downcheck timeout parameter (%d ms) may not be less then (%d ms).", totem_config->downcheck_timeout, MINIMUM_TIMEOUT); goto parse_error; } /* * RRP values validation */ if (strcmp (totem_config->rrp_mode, "none") && strcmp (totem_config->rrp_mode, "active") && strcmp (totem_config->rrp_mode, "passive")) { snprintf (local_error_reason, sizeof(local_error_reason), "The RRP mode \"%s\" specified is invalid. It must be none, active, or passive.\n", totem_config->rrp_mode); goto parse_error; } if (totem_config->rrp_problem_count_timeout == 0) { totem_config->rrp_problem_count_timeout = RRP_PROBLEM_COUNT_TIMEOUT; } if (totem_config->rrp_problem_count_timeout < MINIMUM_TIMEOUT) { snprintf (local_error_reason, sizeof(local_error_reason), "The RRP problem count timeout parameter (%d ms) may not be less then (%d ms).", totem_config->rrp_problem_count_timeout, MINIMUM_TIMEOUT); goto parse_error; } if (totem_config->rrp_problem_count_threshold == 0) { totem_config->rrp_problem_count_threshold = RRP_PROBLEM_COUNT_THRESHOLD_DEFAULT; } if (totem_config->rrp_problem_count_mcast_threshold == 0) { totem_config->rrp_problem_count_mcast_threshold = totem_config->rrp_problem_count_threshold * 10; } if (totem_config->rrp_problem_count_threshold < RRP_PROBLEM_COUNT_THRESHOLD_MIN) { snprintf (local_error_reason, sizeof(local_error_reason), "The RRP problem count threshold (%d problem count) may not be less then (%d problem count).", totem_config->rrp_problem_count_threshold, RRP_PROBLEM_COUNT_THRESHOLD_MIN); goto parse_error; } if (totem_config->rrp_problem_count_mcast_threshold < RRP_PROBLEM_COUNT_THRESHOLD_MIN) { snprintf (local_error_reason, sizeof(local_error_reason), "The RRP multicast problem count threshold (%d problem count) may not be less then (%d problem count).", totem_config->rrp_problem_count_mcast_threshold, RRP_PROBLEM_COUNT_THRESHOLD_MIN); goto parse_error; } if (totem_config->rrp_token_expired_timeout == 0) { totem_config->rrp_token_expired_timeout = totem_config->token_retransmit_timeout; } if (totem_config->rrp_token_expired_timeout < MINIMUM_TIMEOUT) { snprintf (local_error_reason, sizeof(local_error_reason), "The RRP token expired timeout parameter (%d ms) may not be less then (%d ms).", totem_config->rrp_token_expired_timeout, MINIMUM_TIMEOUT); goto parse_error; } if (totem_config->rrp_autorecovery_check_timeout == 0) { totem_config->rrp_autorecovery_check_timeout = RRP_AUTORECOVERY_CHECK_TIMEOUT; } if (strcmp (totem_config->rrp_mode, "none") == 0) { interface_max = 1; } if (interface_max < totem_config->interface_count) { snprintf (parse_error, sizeof(parse_error), "%d is too many configured interfaces for the rrp_mode setting %s.", totem_config->interface_count, totem_config->rrp_mode); error_reason = parse_error; goto parse_error; } if (totem_config->fail_to_recv_const == 0) { totem_config->fail_to_recv_const = FAIL_TO_RECV_CONST; } if (totem_config->seqno_unchanged_const == 0) { totem_config->seqno_unchanged_const = SEQNO_UNCHANGED_CONST; } if (totem_config->net_mtu == 0) { totem_config->net_mtu = 1500; } if ((MESSAGE_QUEUE_MAX) < totem_config->max_messages) { snprintf (local_error_reason, sizeof(local_error_reason), "The max_messages parameter (%d messages) may not be greater then (%d messages).", totem_config->max_messages, MESSAGE_QUEUE_MAX); goto parse_error; } if (totem_config->threads > SEND_THREADS_MAX) { totem_config->threads = SEND_THREADS_MAX; } if (totem_config->secauth == 0) { totem_config->threads = 0; } if (totem_config->net_mtu > FRAME_SIZE_MAX) { error_reason = "This net_mtu parameter is greater then the maximum frame size"; goto parse_error; } if (totem_config->vsf_type == NULL) { totem_config->vsf_type = "none"; } return (0); parse_error: snprintf (error_string_response, sizeof(error_string_response), "parse error in config: %s\n", error_reason); *error_string = error_string_response; return (-1); } static int read_keyfile ( const char *key_location, struct totem_config *totem_config, const char **error_string) { int fd; int res; ssize_t expected_key_len = sizeof (totem_config->private_key); int saved_errno; char error_str[100]; const char *error_ptr; fd = open (key_location, O_RDONLY); if (fd == -1) { error_ptr = qb_strerror_r(errno, error_str, sizeof(error_str)); snprintf (error_string_response, sizeof(error_string_response), "Could not open %s: %s\n", key_location, error_ptr); goto parse_error; } res = read (fd, totem_config->private_key, expected_key_len); saved_errno = errno; close (fd); if (res == -1) { error_ptr = qb_strerror_r (saved_errno, error_str, sizeof(error_str)); snprintf (error_string_response, sizeof(error_string_response), "Could not read %s: %s\n", key_location, error_ptr); goto parse_error; } totem_config->private_key_len = expected_key_len; if (res != expected_key_len) { snprintf (error_string_response, sizeof(error_string_response), "Could only read %d bits of 1024 bits from %s.\n", res * 8, key_location); goto parse_error; } return 0; parse_error: *error_string = error_string_response; return (-1); } int totem_config_keyread ( struct totem_config *totem_config, const char **error_string) { int got_key = 0; char *key_location = NULL; int res; size_t key_len; memset (totem_config->private_key, 0, 128); totem_config->private_key_len = 128; if (totem_config->secauth == 0) { return (0); } /* cmap may store the location of the key file */ if (icmap_get_string("totem.keyfile", &key_location) == CS_OK) { res = read_keyfile(key_location, totem_config, error_string); free(key_location); if (res) { goto key_error; } got_key = 1; } else { /* Or the key itself may be in the cmap */ if (icmap_get("totem.key", NULL, &key_len, NULL) == CS_OK) { if (key_len > sizeof (totem_config->private_key)) { sprintf(error_string_response, "key is too long"); goto key_error; } if (icmap_get("totem.key", totem_config->private_key, &key_len, NULL) == CS_OK) { totem_config->private_key_len = key_len; got_key = 1; } else { sprintf(error_string_response, "can't store private key"); goto key_error; } } } /* In desperation we read the default filename */ if (!got_key) { const char *filename = getenv("COROSYNC_TOTEM_AUTHKEY_FILE"); if (!filename) filename = COROSYSCONFDIR "/authkey"; res = read_keyfile(filename, totem_config, error_string); if (res) goto key_error; } return (0); key_error: *error_string = error_string_response; return (-1); } static void totem_change_notify( int32_t event, const char *key_name, struct icmap_notify_value new_val, struct icmap_notify_value old_val, void *user_data) { totem_volatile_config_read((struct totem_config *)user_data); } static void add_totem_config_notification(struct totem_config *totem_config) { icmap_track_t icmap_track; icmap_track_add("totem.", ICMAP_TRACK_ADD | ICMAP_TRACK_DELETE | ICMAP_TRACK_MODIFY | ICMAP_TRACK_PREFIX, totem_change_notify, totem_config, &icmap_track); } diff --git a/exec/totemsrp.c b/exec/totemsrp.c index c3195534..6a16db8e 100644 --- a/exec/totemsrp.c +++ b/exec/totemsrp.c @@ -1,4541 +1,4539 @@ /* * Copyright (c) 2003-2006 MontaVista Software, Inc. * Copyright (c) 2006-2009 Red Hat, Inc. * * All rights reserved. * * Author: Steven Dake (sdake@redhat.com) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ /* * The first version of this code was based upon Yair Amir's PhD thesis: * http://www.cs.jhu.edu/~yairamir/phd.ps) (ch4,5). * * The current version of totemsrp implements the Totem protocol specified in: * http://citeseer.ist.psu.edu/amir95totem.html * * The deviations from the above published protocols are: - * - encryption of message contents with SOBER128 + * - encryption of message contents with nss * - authentication of meessage contents with SHA1/HMAC * - token hold mode where token doesn't rotate on unused ring - reduces cpu * usage on 1.6ghz xeon from 35% to less then .1 % as measured by top */ #include #include #ifdef HAVE_ALLOCA_H #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define LOGSYS_UTILS_ONLY 1 #include #include "totemsrp.h" #include "totemrrp.h" #include "totemnet.h" -#include "crypto.h" #include "cs_queue.h" #define LOCALHOST_IP inet_addr("127.0.0.1") #define QUEUE_RTR_ITEMS_SIZE_MAX 16384 /* allow 16384 retransmit items */ #define RETRANS_MESSAGE_QUEUE_SIZE_MAX 16384 /* allow 500 messages to be queued */ #define RECEIVED_MESSAGE_QUEUE_SIZE_MAX 500 /* allow 500 messages to be queued */ #define MAXIOVS 5 #define RETRANSMIT_ENTRIES_MAX 30 #define TOKEN_SIZE_MAX 64000 /* bytes */ #define LEAVE_DUMMY_NODEID 0 /* * Rollover handling: * SEQNO_START_MSG is the starting sequence number after a new configuration * This should remain zero, unless testing overflow in which case * 0x7ffff000 and 0xfffff000 are good starting values. * * SEQNO_START_TOKEN is the starting sequence number after a new configuration * for a token. This should remain zero, unless testing overflow in which * case 07fffff00 or 0xffffff00 are good starting values. */ #define SEQNO_START_MSG 0x0 #define SEQNO_START_TOKEN 0x0 /* * These can be used ot test different rollover points * #define SEQNO_START_MSG 0xfffffe00 * #define SEQNO_START_TOKEN 0xfffffe00 */ /* * These can be used to test the error recovery algorithms * #define TEST_DROP_ORF_TOKEN_PERCENTAGE 30 * #define TEST_DROP_COMMIT_TOKEN_PERCENTAGE 30 * #define TEST_DROP_MCAST_PERCENTAGE 50 * #define TEST_RECOVERY_MSG_COUNT 300 */ /* * we compare incoming messages to determine if their endian is * different - if so convert them * * do not change */ #define ENDIAN_LOCAL 0xff22 enum message_type { MESSAGE_TYPE_ORF_TOKEN = 0, /* Ordering, Reliability, Flow (ORF) control Token */ MESSAGE_TYPE_MCAST = 1, /* ring ordered multicast message */ MESSAGE_TYPE_MEMB_MERGE_DETECT = 2, /* merge rings if there are available rings */ MESSAGE_TYPE_MEMB_JOIN = 3, /* membership join message */ MESSAGE_TYPE_MEMB_COMMIT_TOKEN = 4, /* membership commit token */ MESSAGE_TYPE_TOKEN_HOLD_CANCEL = 5, /* cancel the holding of the token */ }; enum encapsulation_type { MESSAGE_ENCAPSULATED = 1, MESSAGE_NOT_ENCAPSULATED = 2 }; /* * New membership algorithm local variables */ struct srp_addr { struct totem_ip_address addr[INTERFACE_MAX]; }; struct consensus_list_item { struct srp_addr addr; int set; }; struct token_callback_instance { struct list_head list; int (*callback_fn) (enum totem_callback_token_type type, const void *); enum totem_callback_token_type callback_type; int delete; void *data; }; struct totemsrp_socket { int mcast; int token; }; struct message_header { char type; char encapsulated; unsigned short endian_detector; unsigned int nodeid; } __attribute__((packed)); struct mcast { struct message_header header; struct srp_addr system_from; unsigned int seq; int this_seqno; struct memb_ring_id ring_id; unsigned int node_id; int guarantee; } __attribute__((packed)); struct rtr_item { struct memb_ring_id ring_id; unsigned int seq; }__attribute__((packed)); struct orf_token { struct message_header header; unsigned int seq; unsigned int token_seq; unsigned int aru; unsigned int aru_addr; struct memb_ring_id ring_id; unsigned int backlog; unsigned int fcc; int retrans_flg; int rtr_list_entries; struct rtr_item rtr_list[0]; }__attribute__((packed)); struct memb_join { struct message_header header; struct srp_addr system_from; unsigned int proc_list_entries; unsigned int failed_list_entries; unsigned long long ring_seq; unsigned char end_of_memb_join[0]; /* * These parts of the data structure are dynamic: * struct srp_addr proc_list[]; * struct srp_addr failed_list[]; */ } __attribute__((packed)); struct memb_merge_detect { struct message_header header; struct srp_addr system_from; struct memb_ring_id ring_id; } __attribute__((packed)); struct token_hold_cancel { struct message_header header; struct memb_ring_id ring_id; } __attribute__((packed)); struct memb_commit_token_memb_entry { struct memb_ring_id ring_id; unsigned int aru; unsigned int high_delivered; unsigned int received_flg; }__attribute__((packed)); struct memb_commit_token { struct message_header header; unsigned int token_seq; struct memb_ring_id ring_id; unsigned int retrans_flg; int memb_index; int addr_entries; unsigned char end_of_commit_token[0]; /* * These parts of the data structure are dynamic: * * struct srp_addr addr[PROCESSOR_COUNT_MAX]; * struct memb_commit_token_memb_entry memb_list[PROCESSOR_COUNT_MAX]; */ }__attribute__((packed)); struct message_item { struct mcast *mcast; unsigned int msg_len; }; struct sort_queue_item { struct mcast *mcast; unsigned int msg_len; }; struct orf_token_mcast_thread_state { char iobuf[9000]; - prng_state prng_state; }; enum memb_state { MEMB_STATE_OPERATIONAL = 1, MEMB_STATE_GATHER = 2, MEMB_STATE_COMMIT = 3, MEMB_STATE_RECOVERY = 4 }; struct totemsrp_instance { int iface_changes; int failed_to_recv; /* * Flow control mcasts and remcasts on last and current orf_token */ int fcc_remcast_last; int fcc_mcast_last; int fcc_remcast_current; struct consensus_list_item consensus_list[PROCESSOR_COUNT_MAX]; int consensus_list_entries; struct srp_addr my_id; struct srp_addr my_proc_list[PROCESSOR_COUNT_MAX]; struct srp_addr my_failed_list[PROCESSOR_COUNT_MAX]; struct srp_addr my_new_memb_list[PROCESSOR_COUNT_MAX]; struct srp_addr my_trans_memb_list[PROCESSOR_COUNT_MAX]; struct srp_addr my_memb_list[PROCESSOR_COUNT_MAX]; struct srp_addr my_deliver_memb_list[PROCESSOR_COUNT_MAX]; struct srp_addr my_left_memb_list[PROCESSOR_COUNT_MAX]; int my_proc_list_entries; int my_failed_list_entries; int my_new_memb_entries; int my_trans_memb_entries; int my_memb_entries; int my_deliver_memb_entries; int my_left_memb_entries; struct memb_ring_id my_ring_id; struct memb_ring_id my_old_ring_id; int my_aru_count; int my_merge_detect_timeout_outstanding; unsigned int my_last_aru; int my_seq_unchanged; int my_received_flg; unsigned int my_high_seq_received; unsigned int my_install_seq; int my_rotation_counter; int my_set_retrans_flg; int my_retrans_flg_count; unsigned int my_high_ring_delivered; int heartbeat_timeout; /* * Queues used to order, deliver, and recover messages */ struct cs_queue new_message_queue; struct cs_queue retrans_message_queue; struct sq regular_sort_queue; struct sq recovery_sort_queue; /* * Received up to and including */ unsigned int my_aru; unsigned int my_high_delivered; struct list_head token_callback_received_listhead; struct list_head token_callback_sent_listhead; char orf_token_retransmit[TOKEN_SIZE_MAX]; int orf_token_retransmit_size; unsigned int my_token_seq; /* * Timers */ qb_loop_timer_handle timer_pause_timeout; qb_loop_timer_handle timer_orf_token_timeout; qb_loop_timer_handle timer_orf_token_retransmit_timeout; qb_loop_timer_handle timer_orf_token_hold_retransmit_timeout; qb_loop_timer_handle timer_merge_detect_timeout; qb_loop_timer_handle memb_timer_state_gather_join_timeout; qb_loop_timer_handle memb_timer_state_gather_consensus_timeout; qb_loop_timer_handle memb_timer_state_commit_timeout; qb_loop_timer_handle timer_heartbeat_timeout; /* * Function and data used to log messages */ int totemsrp_log_level_security; int totemsrp_log_level_error; int totemsrp_log_level_warning; int totemsrp_log_level_notice; int totemsrp_log_level_debug; int totemsrp_subsys_id; void (*totemsrp_log_printf) ( int level, int sybsys, const char *function, const char *file, int line, const char *format, ...)__attribute__((format(printf, 6, 7)));; enum memb_state memb_state; //TODO struct srp_addr next_memb; qb_loop_t *totemsrp_poll_handle; struct totem_ip_address mcast_address; void (*totemsrp_deliver_fn) ( unsigned int nodeid, const void *msg, unsigned int msg_len, int endian_conversion_required); void (*totemsrp_confchg_fn) ( enum totem_configuration_type configuration_type, const unsigned int *member_list, size_t member_list_entries, const unsigned int *left_list, size_t left_list_entries, const unsigned int *joined_list, size_t joined_list_entries, const struct memb_ring_id *ring_id); void (*totemsrp_service_ready_fn) (void); int global_seqno; int my_token_held; unsigned long long token_ring_id_seq; unsigned int last_released; unsigned int set_aru; int old_ring_state_saved; int old_ring_state_aru; unsigned int old_ring_state_high_seq_received; unsigned int my_last_seq; struct timeval tv_old; void *totemrrp_context; struct totem_config *totem_config; unsigned int use_heartbeat; unsigned int my_trc; unsigned int my_pbl; unsigned int my_cbl; uint64_t pause_timestamp; struct memb_commit_token *commit_token; totemsrp_stats_t stats; uint32_t orf_token_discard; uint32_t threaded_mode_enabled; void * token_recv_event_handle; void * token_sent_event_handle; char commit_token_storage[40000]; }; struct message_handlers { int count; int (*handler_functions[6]) ( struct totemsrp_instance *instance, const void *msg, size_t msg_len, int endian_conversion_needed); }; /* * forward decls */ static int message_handler_orf_token ( struct totemsrp_instance *instance, const void *msg, size_t msg_len, int endian_conversion_needed); static int message_handler_mcast ( struct totemsrp_instance *instance, const void *msg, size_t msg_len, int endian_conversion_needed); static int message_handler_memb_merge_detect ( struct totemsrp_instance *instance, const void *msg, size_t msg_len, int endian_conversion_needed); static int message_handler_memb_join ( struct totemsrp_instance *instance, const void *msg, size_t msg_len, int endian_conversion_needed); static int message_handler_memb_commit_token ( struct totemsrp_instance *instance, const void *msg, size_t msg_len, int endian_conversion_needed); static int message_handler_token_hold_cancel ( struct totemsrp_instance *instance, const void *msg, size_t msg_len, int endian_conversion_needed); static void totemsrp_instance_initialize (struct totemsrp_instance *instance); static unsigned int main_msgs_missing (void); static void main_token_seqid_get ( const void *msg, unsigned int *seqid, unsigned int *token_is); static void srp_addr_copy (struct srp_addr *dest, const struct srp_addr *src); static void srp_addr_to_nodeid ( unsigned int *nodeid_out, struct srp_addr *srp_addr_in, unsigned int entries); static int srp_addr_equal (const struct srp_addr *a, const struct srp_addr *b); static void memb_leave_message_send (struct totemsrp_instance *instance); static void memb_ring_id_create_or_load (struct totemsrp_instance *, struct memb_ring_id *); static void token_callbacks_execute (struct totemsrp_instance *instance, enum totem_callback_token_type type); static void memb_state_gather_enter (struct totemsrp_instance *instance, int gather_from); static void messages_deliver_to_app (struct totemsrp_instance *instance, int skip, unsigned int end_point); static int orf_token_mcast (struct totemsrp_instance *instance, struct orf_token *oken, int fcc_mcasts_allowed); static void messages_free (struct totemsrp_instance *instance, unsigned int token_aru); static void memb_ring_id_set_and_store (struct totemsrp_instance *instance, const struct memb_ring_id *ring_id); static void target_set_completed (void *context); static void memb_state_commit_token_update (struct totemsrp_instance *instance); static void memb_state_commit_token_target_set (struct totemsrp_instance *instance); static int memb_state_commit_token_send (struct totemsrp_instance *instance); static int memb_state_commit_token_send_recovery (struct totemsrp_instance *instance, struct memb_commit_token *memb_commit_token); static void memb_state_commit_token_create (struct totemsrp_instance *instance); static int token_hold_cancel_send (struct totemsrp_instance *instance); static void orf_token_endian_convert (const struct orf_token *in, struct orf_token *out); static void memb_commit_token_endian_convert (const struct memb_commit_token *in, struct memb_commit_token *out); static void memb_join_endian_convert (const struct memb_join *in, struct memb_join *out); static void mcast_endian_convert (const struct mcast *in, struct mcast *out); static void memb_merge_detect_endian_convert ( const struct memb_merge_detect *in, struct memb_merge_detect *out); static void srp_addr_copy_endian_convert (struct srp_addr *out, const struct srp_addr *in); static void timer_function_orf_token_timeout (void *data); static void timer_function_pause_timeout (void *data); static void timer_function_heartbeat_timeout (void *data); static void timer_function_token_retransmit_timeout (void *data); static void timer_function_token_hold_retransmit_timeout (void *data); static void timer_function_merge_detect_timeout (void *data); static void *totemsrp_buffer_alloc (struct totemsrp_instance *instance); static void totemsrp_buffer_release (struct totemsrp_instance *instance, void *ptr); void main_deliver_fn ( void *context, const void *msg, unsigned int msg_len); void main_iface_change_fn ( void *context, const struct totem_ip_address *iface_address, unsigned int iface_no); struct message_handlers totemsrp_message_handlers = { 6, { message_handler_orf_token, /* MESSAGE_TYPE_ORF_TOKEN */ message_handler_mcast, /* MESSAGE_TYPE_MCAST */ message_handler_memb_merge_detect, /* MESSAGE_TYPE_MEMB_MERGE_DETECT */ message_handler_memb_join, /* MESSAGE_TYPE_MEMB_JOIN */ message_handler_memb_commit_token, /* MESSAGE_TYPE_MEMB_COMMIT_TOKEN */ message_handler_token_hold_cancel /* MESSAGE_TYPE_TOKEN_HOLD_CANCEL */ } }; static const char *rundir = NULL; #define log_printf(level, format, args...) \ do { \ instance->totemsrp_log_printf ( \ level, instance->totemsrp_subsys_id, \ __FUNCTION__, __FILE__, __LINE__, \ format, ##args); \ } while (0); #define LOGSYS_PERROR(err_num, level, fmt, args...) \ do { \ char _error_str[LOGSYS_MAX_PERROR_MSG_LEN]; \ const char *_error_ptr = qb_strerror_r(err_num, _error_str, sizeof(_error_str)); \ instance->totemsrp_log_printf ( \ level, instance->totemsrp_subsys_id, \ __FUNCTION__, __FILE__, __LINE__, \ fmt ": %s (%d)\n", ##args, _error_ptr, err_num); \ } while(0) static void totemsrp_instance_initialize (struct totemsrp_instance *instance) { memset (instance, 0, sizeof (struct totemsrp_instance)); list_init (&instance->token_callback_received_listhead); list_init (&instance->token_callback_sent_listhead); instance->my_received_flg = 1; instance->my_token_seq = SEQNO_START_TOKEN - 1; instance->memb_state = MEMB_STATE_OPERATIONAL; instance->set_aru = -1; instance->my_aru = SEQNO_START_MSG; instance->my_high_seq_received = SEQNO_START_MSG; instance->my_high_delivered = SEQNO_START_MSG; instance->orf_token_discard = 0; instance->commit_token = (struct memb_commit_token *)instance->commit_token_storage; } static void main_token_seqid_get ( const void *msg, unsigned int *seqid, unsigned int *token_is) { const struct orf_token *token = msg; *seqid = 0; *token_is = 0; if (token->header.type == MESSAGE_TYPE_ORF_TOKEN) { *seqid = token->token_seq; *token_is = 1; } } static unsigned int main_msgs_missing (void) { // TODO return (0); } static int pause_flush (struct totemsrp_instance *instance) { uint64_t now_msec; uint64_t timestamp_msec; int res = 0; now_msec = (qb_util_nano_current_get () / QB_TIME_NS_IN_MSEC); timestamp_msec = instance->pause_timestamp / QB_TIME_NS_IN_MSEC; if ((now_msec - timestamp_msec) > (instance->totem_config->token_timeout / 2)) { log_printf (instance->totemsrp_log_level_notice, "Process pause detected for %d ms, flushing membership messages.", (unsigned int)(now_msec - timestamp_msec)); /* * -1 indicates an error from recvmsg */ do { res = totemrrp_mcast_recv_empty (instance->totemrrp_context); } while (res == -1); } return (res); } static int token_event_stats_collector (enum totem_callback_token_type type, const void *void_instance) { struct totemsrp_instance *instance = (struct totemsrp_instance *)void_instance; uint32_t time_now; unsigned long long nano_secs = qb_util_nano_current_get (); time_now = (nano_secs / QB_TIME_NS_IN_MSEC); if (type == TOTEM_CALLBACK_TOKEN_RECEIVED) { /* incr latest token the index */ if (instance->stats.latest_token == (TOTEM_TOKEN_STATS_MAX - 1)) instance->stats.latest_token = 0; else instance->stats.latest_token++; if (instance->stats.earliest_token == instance->stats.latest_token) { /* we have filled up the array, start overwriting */ if (instance->stats.earliest_token == (TOTEM_TOKEN_STATS_MAX - 1)) instance->stats.earliest_token = 0; else instance->stats.earliest_token++; instance->stats.token[instance->stats.earliest_token].rx = 0; instance->stats.token[instance->stats.earliest_token].tx = 0; instance->stats.token[instance->stats.earliest_token].backlog_calc = 0; } instance->stats.token[instance->stats.latest_token].rx = time_now; instance->stats.token[instance->stats.latest_token].tx = 0; /* in case we drop the token */ } else { instance->stats.token[instance->stats.latest_token].tx = time_now; } return 0; } /* * Exported interfaces */ int totemsrp_initialize ( qb_loop_t *poll_handle, void **srp_context, struct totem_config *totem_config, totemmrp_stats_t *stats, void (*deliver_fn) ( unsigned int nodeid, const void *msg, unsigned int msg_len, int endian_conversion_required), void (*confchg_fn) ( enum totem_configuration_type configuration_type, const unsigned int *member_list, size_t member_list_entries, const unsigned int *left_list, size_t left_list_entries, const unsigned int *joined_list, size_t joined_list_entries, const struct memb_ring_id *ring_id)) { struct totemsrp_instance *instance; unsigned int res; instance = malloc (sizeof (struct totemsrp_instance)); if (instance == NULL) { goto error_exit; } rundir = getenv ("COROSYNC_RUN_DIR"); if (rundir == NULL) { rundir = LOCALSTATEDIR "/lib/corosync"; } res = mkdir (rundir, 0700); if (res == -1 && errno != EEXIST) { goto error_destroy; } res = chdir (rundir); if (res == -1) { goto error_destroy; } totemsrp_instance_initialize (instance); stats->srp = &instance->stats; instance->stats.latest_token = 0; instance->stats.earliest_token = 0; instance->totem_config = totem_config; /* * Configure logging */ instance->totemsrp_log_level_security = totem_config->totem_logging_configuration.log_level_security; instance->totemsrp_log_level_error = totem_config->totem_logging_configuration.log_level_error; instance->totemsrp_log_level_warning = totem_config->totem_logging_configuration.log_level_warning; instance->totemsrp_log_level_notice = totem_config->totem_logging_configuration.log_level_notice; instance->totemsrp_log_level_debug = totem_config->totem_logging_configuration.log_level_debug; instance->totemsrp_subsys_id = totem_config->totem_logging_configuration.log_subsys_id; instance->totemsrp_log_printf = totem_config->totem_logging_configuration.log_printf; /* * Initialize local variables for totemsrp */ totemip_copy (&instance->mcast_address, &totem_config->interfaces[0].mcast_addr); /* * Display totem configuration */ log_printf (instance->totemsrp_log_level_debug, "Token Timeout (%d ms) retransmit timeout (%d ms)", totem_config->token_timeout, totem_config->token_retransmit_timeout); log_printf (instance->totemsrp_log_level_debug, "token hold (%d ms) retransmits before loss (%d retrans)", totem_config->token_hold_timeout, totem_config->token_retransmits_before_loss_const); log_printf (instance->totemsrp_log_level_debug, "join (%d ms) send_join (%d ms) consensus (%d ms) merge (%d ms)", totem_config->join_timeout, totem_config->send_join_timeout, totem_config->consensus_timeout, totem_config->merge_timeout); log_printf (instance->totemsrp_log_level_debug, "downcheck (%d ms) fail to recv const (%d msgs)", totem_config->downcheck_timeout, totem_config->fail_to_recv_const); log_printf (instance->totemsrp_log_level_debug, "seqno unchanged const (%d rotations) Maximum network MTU %d", totem_config->seqno_unchanged_const, totem_config->net_mtu); log_printf (instance->totemsrp_log_level_debug, "window size per rotation (%d messages) maximum messages per rotation (%d messages)", totem_config->window_size, totem_config->max_messages); log_printf (instance->totemsrp_log_level_debug, "missed count const (%d messages)", totem_config->miss_count_const); log_printf (instance->totemsrp_log_level_debug, "send threads (%d threads)", totem_config->threads); log_printf (instance->totemsrp_log_level_debug, "RRP token expired timeout (%d ms)", totem_config->rrp_token_expired_timeout); log_printf (instance->totemsrp_log_level_debug, "RRP token problem counter (%d ms)", totem_config->rrp_problem_count_timeout); log_printf (instance->totemsrp_log_level_debug, "RRP threshold (%d problem count)", totem_config->rrp_problem_count_threshold); log_printf (instance->totemsrp_log_level_debug, "RRP multicast threshold (%d problem count)", totem_config->rrp_problem_count_mcast_threshold); log_printf (instance->totemsrp_log_level_debug, "RRP automatic recovery check timeout (%d ms)", totem_config->rrp_autorecovery_check_timeout); log_printf (instance->totemsrp_log_level_debug, "RRP mode set to %s.", instance->totem_config->rrp_mode); log_printf (instance->totemsrp_log_level_debug, "heartbeat_failures_allowed (%d)", totem_config->heartbeat_failures_allowed); log_printf (instance->totemsrp_log_level_debug, "max_network_delay (%d ms)", totem_config->max_network_delay); cs_queue_init (&instance->retrans_message_queue, RETRANS_MESSAGE_QUEUE_SIZE_MAX, sizeof (struct message_item), instance->threaded_mode_enabled); sq_init (&instance->regular_sort_queue, QUEUE_RTR_ITEMS_SIZE_MAX, sizeof (struct sort_queue_item), 0); sq_init (&instance->recovery_sort_queue, QUEUE_RTR_ITEMS_SIZE_MAX, sizeof (struct sort_queue_item), 0); instance->totemsrp_poll_handle = poll_handle; instance->totemsrp_deliver_fn = deliver_fn; instance->totemsrp_confchg_fn = confchg_fn; instance->use_heartbeat = 1; timer_function_pause_timeout (instance); if ( totem_config->heartbeat_failures_allowed == 0 ) { log_printf (instance->totemsrp_log_level_debug, "HeartBeat is Disabled. To enable set heartbeat_failures_allowed > 0"); instance->use_heartbeat = 0; } if (instance->use_heartbeat) { instance->heartbeat_timeout = (totem_config->heartbeat_failures_allowed) * totem_config->token_retransmit_timeout + totem_config->max_network_delay; if (instance->heartbeat_timeout >= totem_config->token_timeout) { log_printf (instance->totemsrp_log_level_debug, "total heartbeat_timeout (%d ms) is not less than token timeout (%d ms)", instance->heartbeat_timeout, totem_config->token_timeout); log_printf (instance->totemsrp_log_level_debug, "heartbeat_timeout = heartbeat_failures_allowed * token_retransmit_timeout + max_network_delay"); log_printf (instance->totemsrp_log_level_debug, "heartbeat timeout should be less than the token timeout. HeartBeat is Diabled !!"); instance->use_heartbeat = 0; } else { log_printf (instance->totemsrp_log_level_debug, "total heartbeat_timeout (%d ms)", instance->heartbeat_timeout); } } totemrrp_initialize ( poll_handle, &instance->totemrrp_context, totem_config, stats->srp, instance, main_deliver_fn, main_iface_change_fn, main_token_seqid_get, main_msgs_missing, target_set_completed); /* * Must have net_mtu adjusted by totemrrp_initialize first */ cs_queue_init (&instance->new_message_queue, MESSAGE_QUEUE_MAX, sizeof (struct message_item), instance->threaded_mode_enabled); totemsrp_callback_token_create (instance, &instance->token_recv_event_handle, TOTEM_CALLBACK_TOKEN_RECEIVED, 0, token_event_stats_collector, instance); totemsrp_callback_token_create (instance, &instance->token_sent_event_handle, TOTEM_CALLBACK_TOKEN_SENT, 0, token_event_stats_collector, instance); *srp_context = instance; return (0); error_destroy: free (instance); error_exit: return (-1); } void totemsrp_finalize ( void *srp_context) { struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context; memb_leave_message_send (instance); totemrrp_finalize (instance->totemrrp_context); cs_queue_free (&instance->new_message_queue); cs_queue_free (&instance->retrans_message_queue); sq_free (&instance->regular_sort_queue); sq_free (&instance->recovery_sort_queue); free (instance); } int totemsrp_ifaces_get ( void *srp_context, unsigned int nodeid, struct totem_ip_address *interfaces, char ***status, unsigned int *iface_count) { struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context; int res = 0; unsigned int found = 0; unsigned int i; for (i = 0; i < instance->my_memb_entries; i++) { if (instance->my_memb_list[i].addr[0].nodeid == nodeid) { found = 1; break; } } if (found) { memcpy (interfaces, &instance->my_memb_list[i], sizeof (struct srp_addr)); *iface_count = instance->totem_config->interface_count; goto finish; } for (i = 0; i < instance->my_left_memb_entries; i++) { if (instance->my_left_memb_list[i].addr[0].nodeid == nodeid) { found = 1; break; } } if (found) { memcpy (interfaces, &instance->my_left_memb_list[i], sizeof (struct srp_addr)); *iface_count = instance->totem_config->interface_count; } else { res = -1; } finish: totemrrp_ifaces_get (instance->totemrrp_context, status, NULL); return (res); } int totemsrp_crypto_set ( void *srp_context, unsigned int type) { struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context; int res; res = totemrrp_crypto_set(instance->totemrrp_context, type); return (res); } unsigned int totemsrp_my_nodeid_get ( void *srp_context) { struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context; unsigned int res; res = instance->totem_config->interfaces[0].boundto.nodeid; return (res); } int totemsrp_my_family_get ( void *srp_context) { struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context; int res; res = instance->totem_config->interfaces[0].boundto.family; return (res); } int totemsrp_ring_reenable ( void *srp_context) { struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context; totemrrp_ring_reenable (instance->totemrrp_context, instance->totem_config->interface_count); return (0); } /* * Set operations for use by the membership algorithm */ static int srp_addr_equal (const struct srp_addr *a, const struct srp_addr *b) { unsigned int i; unsigned int res; for (i = 0; i < 1; i++) { res = totemip_equal (&a->addr[i], &b->addr[i]); if (res == 0) { return (0); } } return (1); } static void srp_addr_copy (struct srp_addr *dest, const struct srp_addr *src) { unsigned int i; for (i = 0; i < INTERFACE_MAX; i++) { totemip_copy (&dest->addr[i], &src->addr[i]); } } static void srp_addr_to_nodeid ( unsigned int *nodeid_out, struct srp_addr *srp_addr_in, unsigned int entries) { unsigned int i; for (i = 0; i < entries; i++) { nodeid_out[i] = srp_addr_in[i].addr[0].nodeid; } } static void srp_addr_copy_endian_convert (struct srp_addr *out, const struct srp_addr *in) { int i; for (i = 0; i < INTERFACE_MAX; i++) { totemip_copy_endian_convert (&out->addr[i], &in->addr[i]); } } static void memb_consensus_reset (struct totemsrp_instance *instance) { instance->consensus_list_entries = 0; } static void memb_set_subtract ( struct srp_addr *out_list, int *out_list_entries, struct srp_addr *one_list, int one_list_entries, struct srp_addr *two_list, int two_list_entries) { int found = 0; int i; int j; *out_list_entries = 0; for (i = 0; i < one_list_entries; i++) { for (j = 0; j < two_list_entries; j++) { if (srp_addr_equal (&one_list[i], &two_list[j])) { found = 1; break; } } if (found == 0) { srp_addr_copy (&out_list[*out_list_entries], &one_list[i]); *out_list_entries = *out_list_entries + 1; } found = 0; } } /* * Set consensus for a specific processor */ static void memb_consensus_set ( struct totemsrp_instance *instance, const struct srp_addr *addr) { int found = 0; int i; if (addr->addr[0].nodeid == LEAVE_DUMMY_NODEID) return; for (i = 0; i < instance->consensus_list_entries; i++) { if (srp_addr_equal(addr, &instance->consensus_list[i].addr)) { found = 1; break; /* found entry */ } } srp_addr_copy (&instance->consensus_list[i].addr, addr); instance->consensus_list[i].set = 1; if (found == 0) { instance->consensus_list_entries++; } return; } /* * Is consensus set for a specific processor */ static int memb_consensus_isset ( struct totemsrp_instance *instance, const struct srp_addr *addr) { int i; for (i = 0; i < instance->consensus_list_entries; i++) { if (srp_addr_equal (addr, &instance->consensus_list[i].addr)) { return (instance->consensus_list[i].set); } } return (0); } /* * Is consensus agreed upon based upon consensus database */ static int memb_consensus_agreed ( struct totemsrp_instance *instance) { struct srp_addr token_memb[PROCESSOR_COUNT_MAX]; int token_memb_entries = 0; int agreed = 1; int i; memb_set_subtract (token_memb, &token_memb_entries, instance->my_proc_list, instance->my_proc_list_entries, instance->my_failed_list, instance->my_failed_list_entries); for (i = 0; i < token_memb_entries; i++) { if (memb_consensus_isset (instance, &token_memb[i]) == 0) { agreed = 0; break; } } assert (token_memb_entries >= 1); return (agreed); } static void memb_consensus_notset ( struct totemsrp_instance *instance, struct srp_addr *no_consensus_list, int *no_consensus_list_entries, struct srp_addr *comparison_list, int comparison_list_entries) { int i; *no_consensus_list_entries = 0; for (i = 0; i < instance->my_proc_list_entries; i++) { if (memb_consensus_isset (instance, &instance->my_proc_list[i]) == 0) { srp_addr_copy (&no_consensus_list[*no_consensus_list_entries], &instance->my_proc_list[i]); *no_consensus_list_entries = *no_consensus_list_entries + 1; } } } /* * Is set1 equal to set2 Entries can be in different orders */ static int memb_set_equal ( struct srp_addr *set1, int set1_entries, struct srp_addr *set2, int set2_entries) { int i; int j; int found = 0; if (set1_entries != set2_entries) { return (0); } for (i = 0; i < set2_entries; i++) { for (j = 0; j < set1_entries; j++) { if (srp_addr_equal (&set1[j], &set2[i])) { found = 1; break; } } if (found == 0) { return (0); } found = 0; } return (1); } /* * Is subset fully contained in fullset */ static int memb_set_subset ( const struct srp_addr *subset, int subset_entries, const struct srp_addr *fullset, int fullset_entries) { int i; int j; int found = 0; if (subset_entries > fullset_entries) { return (0); } for (i = 0; i < subset_entries; i++) { for (j = 0; j < fullset_entries; j++) { if (srp_addr_equal (&subset[i], &fullset[j])) { found = 1; } } if (found == 0) { return (0); } found = 0; } return (1); } /* * merge subset into fullset taking care not to add duplicates */ static void memb_set_merge ( const struct srp_addr *subset, int subset_entries, struct srp_addr *fullset, int *fullset_entries) { int found = 0; int i; int j; for (i = 0; i < subset_entries; i++) { for (j = 0; j < *fullset_entries; j++) { if (srp_addr_equal (&fullset[j], &subset[i])) { found = 1; break; } } if (found == 0) { srp_addr_copy (&fullset[*fullset_entries], &subset[i]); *fullset_entries = *fullset_entries + 1; } found = 0; } return; } static void memb_set_and_with_ring_id ( struct srp_addr *set1, struct memb_ring_id *set1_ring_ids, int set1_entries, struct srp_addr *set2, int set2_entries, struct memb_ring_id *old_ring_id, struct srp_addr *and, int *and_entries) { int i; int j; int found = 0; *and_entries = 0; for (i = 0; i < set2_entries; i++) { for (j = 0; j < set1_entries; j++) { if (srp_addr_equal (&set1[j], &set2[i])) { if (memcmp (&set1_ring_ids[j], old_ring_id, sizeof (struct memb_ring_id)) == 0) { found = 1; } break; } } if (found) { srp_addr_copy (&and[*and_entries], &set1[j]); *and_entries = *and_entries + 1; } found = 0; } return; } #ifdef CODE_COVERAGE static void memb_set_print ( char *string, struct srp_addr *list, int list_entries) { int i; int j; printf ("List '%s' contains %d entries:\n", string, list_entries); for (i = 0; i < list_entries; i++) { for (j = 0; j < INTERFACE_MAX; j++) { printf ("Address %d\n", i); printf ("\tiface %d %s\n", j, totemip_print (&list[i].addr[j])); printf ("family %d\n", list[i].addr[j].family); } } } #endif static void *totemsrp_buffer_alloc (struct totemsrp_instance *instance) { assert (instance != NULL); return totemrrp_buffer_alloc (instance->totemrrp_context); } static void totemsrp_buffer_release (struct totemsrp_instance *instance, void *ptr) { assert (instance != NULL); totemrrp_buffer_release (instance->totemrrp_context, ptr); } static void reset_token_retransmit_timeout (struct totemsrp_instance *instance) { qb_loop_timer_del (instance->totemsrp_poll_handle, instance->timer_orf_token_retransmit_timeout); qb_loop_timer_add (instance->totemsrp_poll_handle, QB_LOOP_MED, instance->totem_config->token_retransmit_timeout*QB_TIME_NS_IN_MSEC, (void *)instance, timer_function_token_retransmit_timeout, &instance->timer_orf_token_retransmit_timeout); } static void start_merge_detect_timeout (struct totemsrp_instance *instance) { if (instance->my_merge_detect_timeout_outstanding == 0) { qb_loop_timer_add (instance->totemsrp_poll_handle, QB_LOOP_MED, instance->totem_config->merge_timeout*QB_TIME_NS_IN_MSEC, (void *)instance, timer_function_merge_detect_timeout, &instance->timer_merge_detect_timeout); instance->my_merge_detect_timeout_outstanding = 1; } } static void cancel_merge_detect_timeout (struct totemsrp_instance *instance) { qb_loop_timer_del (instance->totemsrp_poll_handle, instance->timer_merge_detect_timeout); instance->my_merge_detect_timeout_outstanding = 0; } /* * ring_state_* is used to save and restore the sort queue * state when a recovery operation fails (and enters gather) */ static void old_ring_state_save (struct totemsrp_instance *instance) { if (instance->old_ring_state_saved == 0) { instance->old_ring_state_saved = 1; memcpy (&instance->my_old_ring_id, &instance->my_ring_id, sizeof (struct memb_ring_id)); instance->old_ring_state_aru = instance->my_aru; instance->old_ring_state_high_seq_received = instance->my_high_seq_received; log_printf (instance->totemsrp_log_level_debug, "Saving state aru %x high seq received %x", instance->my_aru, instance->my_high_seq_received); } } static void old_ring_state_restore (struct totemsrp_instance *instance) { instance->my_aru = instance->old_ring_state_aru; instance->my_high_seq_received = instance->old_ring_state_high_seq_received; log_printf (instance->totemsrp_log_level_debug, "Restoring instance->my_aru %x my high seq received %x", instance->my_aru, instance->my_high_seq_received); } static void old_ring_state_reset (struct totemsrp_instance *instance) { log_printf (instance->totemsrp_log_level_debug, "Resetting old ring state"); instance->old_ring_state_saved = 0; } static void reset_pause_timeout (struct totemsrp_instance *instance) { qb_loop_timer_del (instance->totemsrp_poll_handle, instance->timer_pause_timeout); qb_loop_timer_add (instance->totemsrp_poll_handle, QB_LOOP_MED, instance->totem_config->token_timeout * QB_TIME_NS_IN_MSEC / 5, (void *)instance, timer_function_pause_timeout, &instance->timer_pause_timeout); } static void reset_token_timeout (struct totemsrp_instance *instance) { qb_loop_timer_del (instance->totemsrp_poll_handle, instance->timer_orf_token_timeout); qb_loop_timer_add (instance->totemsrp_poll_handle, QB_LOOP_MED, instance->totem_config->token_timeout*QB_TIME_NS_IN_MSEC, (void *)instance, timer_function_orf_token_timeout, &instance->timer_orf_token_timeout); } static void reset_heartbeat_timeout (struct totemsrp_instance *instance) { qb_loop_timer_del (instance->totemsrp_poll_handle, instance->timer_heartbeat_timeout); qb_loop_timer_add (instance->totemsrp_poll_handle, QB_LOOP_MED, instance->heartbeat_timeout*QB_TIME_NS_IN_MSEC, (void *)instance, timer_function_heartbeat_timeout, &instance->timer_heartbeat_timeout); } static void cancel_token_timeout (struct totemsrp_instance *instance) { qb_loop_timer_del (instance->totemsrp_poll_handle, instance->timer_orf_token_timeout); } static void cancel_heartbeat_timeout (struct totemsrp_instance *instance) { qb_loop_timer_del (instance->totemsrp_poll_handle, instance->timer_heartbeat_timeout); } static void cancel_token_retransmit_timeout (struct totemsrp_instance *instance) { qb_loop_timer_del (instance->totemsrp_poll_handle, instance->timer_orf_token_retransmit_timeout); } static void start_token_hold_retransmit_timeout (struct totemsrp_instance *instance) { qb_loop_timer_add (instance->totemsrp_poll_handle, QB_LOOP_MED, instance->totem_config->token_hold_timeout*QB_TIME_NS_IN_MSEC, (void *)instance, timer_function_token_hold_retransmit_timeout, &instance->timer_orf_token_hold_retransmit_timeout); } static void cancel_token_hold_retransmit_timeout (struct totemsrp_instance *instance) { qb_loop_timer_del (instance->totemsrp_poll_handle, instance->timer_orf_token_hold_retransmit_timeout); } static void memb_state_consensus_timeout_expired ( struct totemsrp_instance *instance) { struct srp_addr no_consensus_list[PROCESSOR_COUNT_MAX]; int no_consensus_list_entries; instance->stats.consensus_timeouts++; if (memb_consensus_agreed (instance)) { memb_consensus_reset (instance); memb_consensus_set (instance, &instance->my_id); reset_token_timeout (instance); // REVIEWED } else { memb_consensus_notset ( instance, no_consensus_list, &no_consensus_list_entries, instance->my_proc_list, instance->my_proc_list_entries); memb_set_merge (no_consensus_list, no_consensus_list_entries, instance->my_failed_list, &instance->my_failed_list_entries); memb_state_gather_enter (instance, 0); } } static void memb_join_message_send (struct totemsrp_instance *instance); static void memb_merge_detect_transmit (struct totemsrp_instance *instance); /* * Timers used for various states of the membership algorithm */ static void timer_function_pause_timeout (void *data) { struct totemsrp_instance *instance = data; instance->pause_timestamp = qb_util_nano_current_get (); reset_pause_timeout (instance); } static void memb_recovery_state_token_loss (struct totemsrp_instance *instance) { old_ring_state_restore (instance); memb_state_gather_enter (instance, 5); instance->stats.recovery_token_lost++; } static void timer_function_orf_token_timeout (void *data) { struct totemsrp_instance *instance = data; switch (instance->memb_state) { case MEMB_STATE_OPERATIONAL: log_printf (instance->totemsrp_log_level_debug, "The token was lost in the OPERATIONAL state."); log_printf (instance->totemsrp_log_level_notice, "A processor failed, forming new configuration."); totemrrp_iface_check (instance->totemrrp_context); memb_state_gather_enter (instance, 2); instance->stats.operational_token_lost++; break; case MEMB_STATE_GATHER: log_printf (instance->totemsrp_log_level_debug, "The consensus timeout expired."); memb_state_consensus_timeout_expired (instance); memb_state_gather_enter (instance, 3); instance->stats.gather_token_lost++; break; case MEMB_STATE_COMMIT: log_printf (instance->totemsrp_log_level_debug, "The token was lost in the COMMIT state."); memb_state_gather_enter (instance, 4); instance->stats.commit_token_lost++; break; case MEMB_STATE_RECOVERY: log_printf (instance->totemsrp_log_level_debug, "The token was lost in the RECOVERY state."); memb_recovery_state_token_loss (instance); instance->orf_token_discard = 1; break; } } static void timer_function_heartbeat_timeout (void *data) { struct totemsrp_instance *instance = data; log_printf (instance->totemsrp_log_level_debug, "HeartBeat Timer expired Invoking token loss mechanism in state %d ", instance->memb_state); timer_function_orf_token_timeout(data); } static void memb_timer_function_state_gather (void *data) { struct totemsrp_instance *instance = data; switch (instance->memb_state) { case MEMB_STATE_OPERATIONAL: case MEMB_STATE_RECOVERY: assert (0); /* this should never happen */ break; case MEMB_STATE_GATHER: case MEMB_STATE_COMMIT: memb_join_message_send (instance); /* * Restart the join timeout `*/ qb_loop_timer_del (instance->totemsrp_poll_handle, instance->memb_timer_state_gather_join_timeout); qb_loop_timer_add (instance->totemsrp_poll_handle, QB_LOOP_MED, instance->totem_config->join_timeout*QB_TIME_NS_IN_MSEC, (void *)instance, memb_timer_function_state_gather, &instance->memb_timer_state_gather_join_timeout); break; } } static void memb_timer_function_gather_consensus_timeout (void *data) { struct totemsrp_instance *instance = data; memb_state_consensus_timeout_expired (instance); } static void deliver_messages_from_recovery_to_regular (struct totemsrp_instance *instance) { unsigned int i; struct sort_queue_item *recovery_message_item; struct sort_queue_item regular_message_item; unsigned int range = 0; int res; void *ptr; struct mcast *mcast; log_printf (instance->totemsrp_log_level_debug, "recovery to regular %x-%x", SEQNO_START_MSG + 1, instance->my_aru); range = instance->my_aru - SEQNO_START_MSG; /* * Move messages from recovery to regular sort queue */ // todo should i be initialized to 0 or 1 ? for (i = 1; i <= range; i++) { res = sq_item_get (&instance->recovery_sort_queue, i + SEQNO_START_MSG, &ptr); if (res != 0) { continue; } recovery_message_item = ptr; /* * Convert recovery message into regular message */ mcast = recovery_message_item->mcast; if (mcast->header.encapsulated == MESSAGE_ENCAPSULATED) { /* * Message is a recovery message encapsulated * in a new ring message */ regular_message_item.mcast = (struct mcast *)(((char *)recovery_message_item->mcast) + sizeof (struct mcast)); regular_message_item.msg_len = recovery_message_item->msg_len - sizeof (struct mcast); mcast = regular_message_item.mcast; } else { /* * TODO this case shouldn't happen */ continue; } log_printf (instance->totemsrp_log_level_debug, "comparing if ring id is for this processors old ring seqno %d", mcast->seq); /* * Only add this message to the regular sort * queue if it was originated with the same ring * id as the previous ring */ if (memcmp (&instance->my_old_ring_id, &mcast->ring_id, sizeof (struct memb_ring_id)) == 0) { res = sq_item_inuse (&instance->regular_sort_queue, mcast->seq); if (res == 0) { sq_item_add (&instance->regular_sort_queue, ®ular_message_item, mcast->seq); if (sq_lt_compare (instance->old_ring_state_high_seq_received, mcast->seq)) { instance->old_ring_state_high_seq_received = mcast->seq; } } } else { log_printf (instance->totemsrp_log_level_debug, "-not adding msg with seq no %x", mcast->seq); } } } /* * Change states in the state machine of the membership algorithm */ static void memb_state_operational_enter (struct totemsrp_instance *instance) { struct srp_addr joined_list[PROCESSOR_COUNT_MAX]; int joined_list_entries = 0; unsigned int aru_save; unsigned int joined_list_totemip[PROCESSOR_COUNT_MAX]; unsigned int trans_memb_list_totemip[PROCESSOR_COUNT_MAX]; unsigned int new_memb_list_totemip[PROCESSOR_COUNT_MAX]; unsigned int left_list[PROCESSOR_COUNT_MAX]; unsigned int i; unsigned int res; memb_consensus_reset (instance); old_ring_state_reset (instance); deliver_messages_from_recovery_to_regular (instance); log_printf (instance->totemsrp_log_level_debug, "Delivering to app %x to %x", instance->my_high_delivered + 1, instance->old_ring_state_high_seq_received); aru_save = instance->my_aru; instance->my_aru = instance->old_ring_state_aru; messages_deliver_to_app (instance, 0, instance->old_ring_state_high_seq_received); /* * Calculate joined and left list */ memb_set_subtract (instance->my_left_memb_list, &instance->my_left_memb_entries, instance->my_memb_list, instance->my_memb_entries, instance->my_trans_memb_list, instance->my_trans_memb_entries); memb_set_subtract (joined_list, &joined_list_entries, instance->my_new_memb_list, instance->my_new_memb_entries, instance->my_trans_memb_list, instance->my_trans_memb_entries); /* * Install new membership */ instance->my_memb_entries = instance->my_new_memb_entries; memcpy (&instance->my_memb_list, instance->my_new_memb_list, sizeof (struct srp_addr) * instance->my_memb_entries); instance->last_released = 0; instance->my_set_retrans_flg = 0; /* * Deliver transitional configuration to application */ srp_addr_to_nodeid (left_list, instance->my_left_memb_list, instance->my_left_memb_entries); srp_addr_to_nodeid (trans_memb_list_totemip, instance->my_trans_memb_list, instance->my_trans_memb_entries); instance->totemsrp_confchg_fn (TOTEM_CONFIGURATION_TRANSITIONAL, trans_memb_list_totemip, instance->my_trans_memb_entries, left_list, instance->my_left_memb_entries, 0, 0, &instance->my_ring_id); // TODO we need to filter to ensure we only deliver those // messages which are part of instance->my_deliver_memb messages_deliver_to_app (instance, 1, instance->old_ring_state_high_seq_received); instance->my_aru = aru_save; /* * Deliver regular configuration to application */ srp_addr_to_nodeid (new_memb_list_totemip, instance->my_new_memb_list, instance->my_new_memb_entries); srp_addr_to_nodeid (joined_list_totemip, joined_list, joined_list_entries); instance->totemsrp_confchg_fn (TOTEM_CONFIGURATION_REGULAR, new_memb_list_totemip, instance->my_new_memb_entries, 0, 0, joined_list_totemip, joined_list_entries, &instance->my_ring_id); /* * The recovery sort queue now becomes the regular * sort queue. It is necessary to copy the state * into the regular sort queue. */ sq_copy (&instance->regular_sort_queue, &instance->recovery_sort_queue); instance->my_last_aru = SEQNO_START_MSG; /* When making my_proc_list smaller, ensure that the * now non-used entries are zero-ed out. There are some suspect * assert's that assume that there is always 2 entries in the list. * These fail when my_proc_list is reduced to 1 entry (and the * valid [0] entry is the same as the 'unused' [1] entry). */ memset(instance->my_proc_list, 0, sizeof (struct srp_addr) * instance->my_proc_list_entries); instance->my_proc_list_entries = instance->my_new_memb_entries; memcpy (instance->my_proc_list, instance->my_new_memb_list, sizeof (struct srp_addr) * instance->my_memb_entries); instance->my_failed_list_entries = 0; /* * TODO Not exactly to spec * * At the entry to this function all messages without a gap are * deliered. * * This code throw away messages from the last gap in the sort queue * to my_high_seq_received * * What should really happen is we should deliver all messages up to * a gap, then delier the transitional configuration, then deliver * the messages between the first gap and my_high_seq_received, then * deliver a regular configuration, then deliver the regular * configuration * * Unfortunately totempg doesn't appear to like this operating mode * which needs more inspection */ i = instance->my_high_seq_received + 1; do { void *ptr; i -= 1; res = sq_item_get (&instance->regular_sort_queue, i, &ptr); if (i == 0) { break; } } while (res); instance->my_high_delivered = i; for (i = 0; i <= instance->my_high_delivered; i++) { void *ptr; res = sq_item_get (&instance->regular_sort_queue, i, &ptr); if (res == 0) { struct sort_queue_item *regular_message; regular_message = ptr; free (regular_message->mcast); } } sq_items_release (&instance->regular_sort_queue, instance->my_high_delivered); instance->last_released = instance->my_high_delivered; log_printf (instance->totemsrp_log_level_debug, "entering OPERATIONAL state."); log_printf (instance->totemsrp_log_level_notice, "A processor joined or left the membership and a new membership was formed."); instance->memb_state = MEMB_STATE_OPERATIONAL; instance->stats.operational_entered++; instance->stats.continuous_gather = 0; instance->my_received_flg = 1; reset_pause_timeout (instance); /* * Save ring id information from this configuration to determine * which processors are transitioning from old regular configuration * in to new regular configuration on the next configuration change */ memcpy (&instance->my_old_ring_id, &instance->my_ring_id, sizeof (struct memb_ring_id)); return; } static void memb_state_gather_enter ( struct totemsrp_instance *instance, int gather_from) { instance->orf_token_discard = 1; memb_set_merge ( &instance->my_id, 1, instance->my_proc_list, &instance->my_proc_list_entries); memb_join_message_send (instance); /* * Restart the join timeout */ qb_loop_timer_del (instance->totemsrp_poll_handle, instance->memb_timer_state_gather_join_timeout); qb_loop_timer_add (instance->totemsrp_poll_handle, QB_LOOP_MED, instance->totem_config->join_timeout*QB_TIME_NS_IN_MSEC, (void *)instance, memb_timer_function_state_gather, &instance->memb_timer_state_gather_join_timeout); /* * Restart the consensus timeout */ qb_loop_timer_del (instance->totemsrp_poll_handle, instance->memb_timer_state_gather_consensus_timeout); qb_loop_timer_add (instance->totemsrp_poll_handle, QB_LOOP_MED, instance->totem_config->consensus_timeout*QB_TIME_NS_IN_MSEC, (void *)instance, memb_timer_function_gather_consensus_timeout, &instance->memb_timer_state_gather_consensus_timeout); /* * Cancel the token loss and token retransmission timeouts */ cancel_token_retransmit_timeout (instance); // REVIEWED cancel_token_timeout (instance); // REVIEWED cancel_merge_detect_timeout (instance); memb_consensus_reset (instance); memb_consensus_set (instance, &instance->my_id); log_printf (instance->totemsrp_log_level_debug, "entering GATHER state from %d.", gather_from); instance->memb_state = MEMB_STATE_GATHER; instance->stats.gather_entered++; if (gather_from == 3) { /* * State 3 means gather, so we are continuously gathering. */ instance->stats.continuous_gather++; } if (instance->stats.continuous_gather > MAX_NO_CONT_GATHER) { log_printf (instance->totemsrp_log_level_warning, "Totem is unable to form a cluster because of an " "operating system or network fault. The most common " "cause of this message is that the local firewall is " "configured improperly."); } return; } static void timer_function_token_retransmit_timeout (void *data); static void target_set_completed ( void *context) { struct totemsrp_instance *instance = (struct totemsrp_instance *)context; memb_state_commit_token_send (instance); } static void memb_state_commit_enter ( struct totemsrp_instance *instance) { old_ring_state_save (instance); memb_state_commit_token_update (instance); memb_state_commit_token_target_set (instance); qb_loop_timer_del (instance->totemsrp_poll_handle, instance->memb_timer_state_gather_join_timeout); instance->memb_timer_state_gather_join_timeout = 0; qb_loop_timer_del (instance->totemsrp_poll_handle, instance->memb_timer_state_gather_consensus_timeout); instance->memb_timer_state_gather_consensus_timeout = 0; memb_ring_id_set_and_store (instance, &instance->commit_token->ring_id); instance->token_ring_id_seq = instance->my_ring_id.seq; log_printf (instance->totemsrp_log_level_debug, "entering COMMIT state."); instance->memb_state = MEMB_STATE_COMMIT; reset_token_retransmit_timeout (instance); // REVIEWED reset_token_timeout (instance); // REVIEWED instance->stats.commit_entered++; instance->stats.continuous_gather = 0; /* * reset all flow control variables since we are starting a new ring */ instance->my_trc = 0; instance->my_pbl = 0; instance->my_cbl = 0; /* * commit token sent after callback that token target has been set */ } static void memb_state_recovery_enter ( struct totemsrp_instance *instance, struct memb_commit_token *commit_token) { int i; int local_received_flg = 1; unsigned int low_ring_aru; unsigned int range = 0; unsigned int messages_originated = 0; const struct srp_addr *addr; struct memb_commit_token_memb_entry *memb_list; struct memb_ring_id my_new_memb_ring_id_list[PROCESSOR_COUNT_MAX]; addr = (const struct srp_addr *)commit_token->end_of_commit_token; memb_list = (struct memb_commit_token_memb_entry *)(addr + commit_token->addr_entries); log_printf (instance->totemsrp_log_level_debug, "entering RECOVERY state."); instance->orf_token_discard = 0; instance->my_high_ring_delivered = 0; sq_reinit (&instance->recovery_sort_queue, SEQNO_START_MSG); cs_queue_reinit (&instance->retrans_message_queue); low_ring_aru = instance->old_ring_state_high_seq_received; memb_state_commit_token_send_recovery (instance, commit_token); instance->my_token_seq = SEQNO_START_TOKEN - 1; /* * Build regular configuration */ totemrrp_processor_count_set ( instance->totemrrp_context, commit_token->addr_entries); /* * Build transitional configuration */ for (i = 0; i < instance->my_new_memb_entries; i++) { memcpy (&my_new_memb_ring_id_list[i], &memb_list[i].ring_id, sizeof (struct memb_ring_id)); } memb_set_and_with_ring_id ( instance->my_new_memb_list, my_new_memb_ring_id_list, instance->my_new_memb_entries, instance->my_memb_list, instance->my_memb_entries, &instance->my_old_ring_id, instance->my_trans_memb_list, &instance->my_trans_memb_entries); for (i = 0; i < instance->my_trans_memb_entries; i++) { log_printf (instance->totemsrp_log_level_debug, "TRANS [%d] member %s:", i, totemip_print (&instance->my_trans_memb_list[i].addr[0])); } for (i = 0; i < instance->my_new_memb_entries; i++) { log_printf (instance->totemsrp_log_level_debug, "position [%d] member %s:", i, totemip_print (&addr[i].addr[0])); log_printf (instance->totemsrp_log_level_debug, "previous ring seq %llx rep %s", memb_list[i].ring_id.seq, totemip_print (&memb_list[i].ring_id.rep)); log_printf (instance->totemsrp_log_level_debug, "aru %x high delivered %x received flag %d", memb_list[i].aru, memb_list[i].high_delivered, memb_list[i].received_flg); // assert (totemip_print (&memb_list[i].ring_id.rep) != 0); } /* * Determine if any received flag is false */ for (i = 0; i < commit_token->addr_entries; i++) { if (memb_set_subset (&instance->my_new_memb_list[i], 1, instance->my_trans_memb_list, instance->my_trans_memb_entries) && memb_list[i].received_flg == 0) { instance->my_deliver_memb_entries = instance->my_trans_memb_entries; memcpy (instance->my_deliver_memb_list, instance->my_trans_memb_list, sizeof (struct srp_addr) * instance->my_trans_memb_entries); local_received_flg = 0; break; } } if (local_received_flg == 1) { goto no_originate; } /* Else originate messages if we should */ /* * Calculate my_low_ring_aru, instance->my_high_ring_delivered for the transitional membership */ for (i = 0; i < commit_token->addr_entries; i++) { if (memb_set_subset (&instance->my_new_memb_list[i], 1, instance->my_deliver_memb_list, instance->my_deliver_memb_entries) && memcmp (&instance->my_old_ring_id, &memb_list[i].ring_id, sizeof (struct memb_ring_id)) == 0) { if (sq_lt_compare (memb_list[i].aru, low_ring_aru)) { low_ring_aru = memb_list[i].aru; } if (sq_lt_compare (instance->my_high_ring_delivered, memb_list[i].high_delivered)) { instance->my_high_ring_delivered = memb_list[i].high_delivered; } } } /* * Copy all old ring messages to instance->retrans_message_queue */ range = instance->old_ring_state_high_seq_received - low_ring_aru; if (range == 0) { /* * No messages to copy */ goto no_originate; } assert (range < QUEUE_RTR_ITEMS_SIZE_MAX); log_printf (instance->totemsrp_log_level_debug, "copying all old ring messages from %x-%x.", low_ring_aru + 1, instance->old_ring_state_high_seq_received); for (i = 1; i <= range; i++) { struct sort_queue_item *sort_queue_item; struct message_item message_item; void *ptr; int res; res = sq_item_get (&instance->regular_sort_queue, low_ring_aru + i, &ptr); if (res != 0) { continue; } sort_queue_item = ptr; messages_originated++; memset (&message_item, 0, sizeof (struct message_item)); // TODO LEAK message_item.mcast = totemsrp_buffer_alloc (instance); assert (message_item.mcast); message_item.mcast->header.type = MESSAGE_TYPE_MCAST; srp_addr_copy (&message_item.mcast->system_from, &instance->my_id); message_item.mcast->header.encapsulated = MESSAGE_ENCAPSULATED; message_item.mcast->header.nodeid = instance->my_id.addr[0].nodeid; assert (message_item.mcast->header.nodeid); message_item.mcast->header.endian_detector = ENDIAN_LOCAL; memcpy (&message_item.mcast->ring_id, &instance->my_ring_id, sizeof (struct memb_ring_id)); message_item.msg_len = sort_queue_item->msg_len + sizeof (struct mcast); memcpy (((char *)message_item.mcast) + sizeof (struct mcast), sort_queue_item->mcast, sort_queue_item->msg_len); cs_queue_item_add (&instance->retrans_message_queue, &message_item); } log_printf (instance->totemsrp_log_level_debug, "Originated %d messages in RECOVERY.", messages_originated); goto originated; no_originate: log_printf (instance->totemsrp_log_level_debug, "Did not need to originate any messages in recovery."); originated: instance->my_aru = SEQNO_START_MSG; instance->my_aru_count = 0; instance->my_seq_unchanged = 0; instance->my_high_seq_received = SEQNO_START_MSG; instance->my_install_seq = SEQNO_START_MSG; instance->last_released = SEQNO_START_MSG; reset_token_timeout (instance); // REVIEWED reset_token_retransmit_timeout (instance); // REVIEWED instance->memb_state = MEMB_STATE_RECOVERY; instance->stats.recovery_entered++; instance->stats.continuous_gather = 0; return; } void totemsrp_event_signal (void *srp_context, enum totem_event_type type, int value) { struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context; token_hold_cancel_send (instance); return; } int totemsrp_mcast ( void *srp_context, struct iovec *iovec, unsigned int iov_len, int guarantee) { struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context; int i; struct message_item message_item; char *addr; unsigned int addr_idx; if (cs_queue_is_full (&instance->new_message_queue)) { log_printf (instance->totemsrp_log_level_debug, "queue full"); return (-1); } memset (&message_item, 0, sizeof (struct message_item)); /* * Allocate pending item */ message_item.mcast = totemsrp_buffer_alloc (instance); if (message_item.mcast == 0) { goto error_mcast; } /* * Set mcast header */ memset(message_item.mcast, 0, sizeof (struct mcast)); message_item.mcast->header.type = MESSAGE_TYPE_MCAST; message_item.mcast->header.endian_detector = ENDIAN_LOCAL; message_item.mcast->header.encapsulated = MESSAGE_NOT_ENCAPSULATED; message_item.mcast->header.nodeid = instance->my_id.addr[0].nodeid; assert (message_item.mcast->header.nodeid); message_item.mcast->guarantee = guarantee; srp_addr_copy (&message_item.mcast->system_from, &instance->my_id); addr = (char *)message_item.mcast; addr_idx = sizeof (struct mcast); for (i = 0; i < iov_len; i++) { memcpy (&addr[addr_idx], iovec[i].iov_base, iovec[i].iov_len); addr_idx += iovec[i].iov_len; } message_item.msg_len = addr_idx; log_printf (instance->totemsrp_log_level_debug, "mcasted message added to pending queue"); instance->stats.mcast_tx++; cs_queue_item_add (&instance->new_message_queue, &message_item); return (0); error_mcast: return (-1); } /* * Determine if there is room to queue a new message */ int totemsrp_avail (void *srp_context) { struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context; int avail; cs_queue_avail (&instance->new_message_queue, &avail); return (avail); } /* * ORF Token Management */ /* * Recast message to mcast group if it is available */ static int orf_token_remcast ( struct totemsrp_instance *instance, int seq) { struct sort_queue_item *sort_queue_item; int res; void *ptr; struct sq *sort_queue; if (instance->memb_state == MEMB_STATE_RECOVERY) { sort_queue = &instance->recovery_sort_queue; } else { sort_queue = &instance->regular_sort_queue; } res = sq_in_range (sort_queue, seq); if (res == 0) { log_printf (instance->totemsrp_log_level_debug, "sq not in range"); return (-1); } /* * Get RTR item at seq, if not available, return */ res = sq_item_get (sort_queue, seq, &ptr); if (res != 0) { return -1; } sort_queue_item = ptr; totemrrp_mcast_noflush_send ( instance->totemrrp_context, sort_queue_item->mcast, sort_queue_item->msg_len); return (0); } /* * Free all freeable messages from ring */ static void messages_free ( struct totemsrp_instance *instance, unsigned int token_aru) { struct sort_queue_item *regular_message; unsigned int i; int res; int log_release = 0; unsigned int release_to; unsigned int range = 0; release_to = token_aru; if (sq_lt_compare (instance->my_last_aru, release_to)) { release_to = instance->my_last_aru; } if (sq_lt_compare (instance->my_high_delivered, release_to)) { release_to = instance->my_high_delivered; } /* * Ensure we dont try release before an already released point */ if (sq_lt_compare (release_to, instance->last_released)) { return; } range = release_to - instance->last_released; assert (range < QUEUE_RTR_ITEMS_SIZE_MAX); /* * Release retransmit list items if group aru indicates they are transmitted */ for (i = 1; i <= range; i++) { void *ptr; res = sq_item_get (&instance->regular_sort_queue, instance->last_released + i, &ptr); if (res == 0) { regular_message = ptr; totemsrp_buffer_release (instance, regular_message->mcast); } sq_items_release (&instance->regular_sort_queue, instance->last_released + i); log_release = 1; } instance->last_released += range; if (log_release) { log_printf (instance->totemsrp_log_level_debug, "releasing messages up to and including %x", release_to); } } static void update_aru ( struct totemsrp_instance *instance) { unsigned int i; int res; struct sq *sort_queue; unsigned int range; unsigned int my_aru_saved = 0; if (instance->memb_state == MEMB_STATE_RECOVERY) { sort_queue = &instance->recovery_sort_queue; } else { sort_queue = &instance->regular_sort_queue; } range = instance->my_high_seq_received - instance->my_aru; my_aru_saved = instance->my_aru; for (i = 1; i <= range; i++) { void *ptr; res = sq_item_get (sort_queue, my_aru_saved + i, &ptr); /* * If hole, stop updating aru */ if (res != 0) { break; } } instance->my_aru += i - 1; } /* * Multicasts pending messages onto the ring (requires orf_token possession) */ static int orf_token_mcast ( struct totemsrp_instance *instance, struct orf_token *token, int fcc_mcasts_allowed) { struct message_item *message_item = 0; struct cs_queue *mcast_queue; struct sq *sort_queue; struct sort_queue_item sort_queue_item; struct mcast *mcast; unsigned int fcc_mcast_current; if (instance->memb_state == MEMB_STATE_RECOVERY) { mcast_queue = &instance->retrans_message_queue; sort_queue = &instance->recovery_sort_queue; reset_token_retransmit_timeout (instance); // REVIEWED } else { mcast_queue = &instance->new_message_queue; sort_queue = &instance->regular_sort_queue; } for (fcc_mcast_current = 0; fcc_mcast_current < fcc_mcasts_allowed; fcc_mcast_current++) { if (cs_queue_is_empty (mcast_queue)) { break; } message_item = (struct message_item *)cs_queue_item_get (mcast_queue); message_item->mcast->seq = ++token->seq; message_item->mcast->this_seqno = instance->global_seqno++; /* * Build IO vector */ memset (&sort_queue_item, 0, sizeof (struct sort_queue_item)); sort_queue_item.mcast = message_item->mcast; sort_queue_item.msg_len = message_item->msg_len; mcast = sort_queue_item.mcast; memcpy (&mcast->ring_id, &instance->my_ring_id, sizeof (struct memb_ring_id)); /* * Add message to retransmit queue */ sq_item_add (sort_queue, &sort_queue_item, message_item->mcast->seq); totemrrp_mcast_noflush_send ( instance->totemrrp_context, message_item->mcast, message_item->msg_len); /* * Delete item from pending queue */ cs_queue_item_remove (mcast_queue); /* * If messages mcasted, deliver any new messages to totempg */ instance->my_high_seq_received = token->seq; } update_aru (instance); /* * Return 1 if more messages are available for single node clusters */ return (fcc_mcast_current); } /* * Remulticasts messages in orf_token's retransmit list (requires orf_token) * Modify's orf_token's rtr to include retransmits required by this process */ static int orf_token_rtr ( struct totemsrp_instance *instance, struct orf_token *orf_token, unsigned int *fcc_allowed) { unsigned int res; unsigned int i, j; unsigned int found; struct sq *sort_queue; struct rtr_item *rtr_list; unsigned int range = 0; char retransmit_msg[1024]; char value[64]; if (instance->memb_state == MEMB_STATE_RECOVERY) { sort_queue = &instance->recovery_sort_queue; } else { sort_queue = &instance->regular_sort_queue; } rtr_list = &orf_token->rtr_list[0]; strcpy (retransmit_msg, "Retransmit List: "); if (orf_token->rtr_list_entries) { log_printf (instance->totemsrp_log_level_debug, "Retransmit List %d", orf_token->rtr_list_entries); for (i = 0; i < orf_token->rtr_list_entries; i++) { sprintf (value, "%x ", rtr_list[i].seq); strcat (retransmit_msg, value); } strcat (retransmit_msg, ""); log_printf (instance->totemsrp_log_level_notice, "%s", retransmit_msg); } /* * Retransmit messages on orf_token's RTR list from RTR queue */ for (instance->fcc_remcast_current = 0, i = 0; instance->fcc_remcast_current < *fcc_allowed && i < orf_token->rtr_list_entries;) { /* * If this retransmit request isn't from this configuration, * try next rtr entry */ if (memcmp (&rtr_list[i].ring_id, &instance->my_ring_id, sizeof (struct memb_ring_id)) != 0) { i += 1; continue; } res = orf_token_remcast (instance, rtr_list[i].seq); if (res == 0) { /* * Multicasted message, so no need to copy to new retransmit list */ orf_token->rtr_list_entries -= 1; assert (orf_token->rtr_list_entries >= 0); memmove (&rtr_list[i], &rtr_list[i + 1], sizeof (struct rtr_item) * (orf_token->rtr_list_entries - i)); instance->stats.mcast_retx++; instance->fcc_remcast_current++; } else { i += 1; } } *fcc_allowed = *fcc_allowed - instance->fcc_remcast_current; /* * Add messages to retransmit to RTR list * but only retry if there is room in the retransmit list */ range = orf_token->seq - instance->my_aru; assert (range < QUEUE_RTR_ITEMS_SIZE_MAX); for (i = 1; (orf_token->rtr_list_entries < RETRANSMIT_ENTRIES_MAX) && (i <= range); i++) { /* * Ensure message is within the sort queue range */ res = sq_in_range (sort_queue, instance->my_aru + i); if (res == 0) { break; } /* * Find if a message is missing from this processor */ res = sq_item_inuse (sort_queue, instance->my_aru + i); if (res == 0) { /* * Determine how many times we have missed receiving * this sequence number. sq_item_miss_count increments * a counter for the sequence number. The miss count * will be returned and compared. This allows time for * delayed multicast messages to be received before * declaring the message is missing and requesting a * retransmit. */ res = sq_item_miss_count (sort_queue, instance->my_aru + i); if (res < instance->totem_config->miss_count_const) { continue; } /* * Determine if missing message is already in retransmit list */ found = 0; for (j = 0; j < orf_token->rtr_list_entries; j++) { if (instance->my_aru + i == rtr_list[j].seq) { found = 1; } } if (found == 0) { /* * Missing message not found in current retransmit list so add it */ memcpy (&rtr_list[orf_token->rtr_list_entries].ring_id, &instance->my_ring_id, sizeof (struct memb_ring_id)); rtr_list[orf_token->rtr_list_entries].seq = instance->my_aru + i; orf_token->rtr_list_entries++; } } } return (instance->fcc_remcast_current); } static void token_retransmit (struct totemsrp_instance *instance) { totemrrp_token_send (instance->totemrrp_context, instance->orf_token_retransmit, instance->orf_token_retransmit_size); } /* * Retransmit the regular token if no mcast or token has * been received in retransmit token period retransmit * the token to the next processor */ static void timer_function_token_retransmit_timeout (void *data) { struct totemsrp_instance *instance = data; switch (instance->memb_state) { case MEMB_STATE_GATHER: break; case MEMB_STATE_COMMIT: case MEMB_STATE_OPERATIONAL: case MEMB_STATE_RECOVERY: token_retransmit (instance); reset_token_retransmit_timeout (instance); // REVIEWED break; } } static void timer_function_token_hold_retransmit_timeout (void *data) { struct totemsrp_instance *instance = data; switch (instance->memb_state) { case MEMB_STATE_GATHER: break; case MEMB_STATE_COMMIT: break; case MEMB_STATE_OPERATIONAL: case MEMB_STATE_RECOVERY: token_retransmit (instance); break; } } static void timer_function_merge_detect_timeout(void *data) { struct totemsrp_instance *instance = data; instance->my_merge_detect_timeout_outstanding = 0; switch (instance->memb_state) { case MEMB_STATE_OPERATIONAL: if (totemip_equal(&instance->my_ring_id.rep, &instance->my_id.addr[0])) { memb_merge_detect_transmit (instance); } break; case MEMB_STATE_GATHER: case MEMB_STATE_COMMIT: case MEMB_STATE_RECOVERY: break; } } /* * Send orf_token to next member (requires orf_token) */ static int token_send ( struct totemsrp_instance *instance, struct orf_token *orf_token, int forward_token) { int res = 0; unsigned int orf_token_size; orf_token_size = sizeof (struct orf_token) + (orf_token->rtr_list_entries * sizeof (struct rtr_item)); orf_token->header.nodeid = instance->my_id.addr[0].nodeid; memcpy (instance->orf_token_retransmit, orf_token, orf_token_size); instance->orf_token_retransmit_size = orf_token_size; assert (orf_token->header.nodeid); if (forward_token == 0) { return (0); } totemrrp_token_send (instance->totemrrp_context, orf_token, orf_token_size); return (res); } static int token_hold_cancel_send (struct totemsrp_instance *instance) { struct token_hold_cancel token_hold_cancel; /* * Only cancel if the token is currently held */ if (instance->my_token_held == 0) { return (0); } instance->my_token_held = 0; /* * Build message */ token_hold_cancel.header.type = MESSAGE_TYPE_TOKEN_HOLD_CANCEL; token_hold_cancel.header.endian_detector = ENDIAN_LOCAL; token_hold_cancel.header.encapsulated = 0; token_hold_cancel.header.nodeid = instance->my_id.addr[0].nodeid; memcpy (&token_hold_cancel.ring_id, &instance->my_ring_id, sizeof (struct memb_ring_id)); assert (token_hold_cancel.header.nodeid); instance->stats.token_hold_cancel_tx++; totemrrp_mcast_flush_send (instance->totemrrp_context, &token_hold_cancel, sizeof (struct token_hold_cancel)); return (0); } static int orf_token_send_initial (struct totemsrp_instance *instance) { struct orf_token orf_token; int res; orf_token.header.type = MESSAGE_TYPE_ORF_TOKEN; orf_token.header.endian_detector = ENDIAN_LOCAL; orf_token.header.encapsulated = 0; orf_token.header.nodeid = instance->my_id.addr[0].nodeid; assert (orf_token.header.nodeid); orf_token.seq = SEQNO_START_MSG; orf_token.token_seq = SEQNO_START_TOKEN; orf_token.retrans_flg = 1; instance->my_set_retrans_flg = 1; instance->stats.orf_token_tx++; if (cs_queue_is_empty (&instance->retrans_message_queue) == 1) { orf_token.retrans_flg = 0; instance->my_set_retrans_flg = 0; } else { orf_token.retrans_flg = 1; instance->my_set_retrans_flg = 1; } orf_token.aru = 0; orf_token.aru = SEQNO_START_MSG - 1; orf_token.aru_addr = instance->my_id.addr[0].nodeid; memcpy (&orf_token.ring_id, &instance->my_ring_id, sizeof (struct memb_ring_id)); orf_token.fcc = 0; orf_token.backlog = 0; orf_token.rtr_list_entries = 0; res = token_send (instance, &orf_token, 1); return (res); } static void memb_state_commit_token_update ( struct totemsrp_instance *instance) { struct srp_addr *addr; struct memb_commit_token_memb_entry *memb_list; unsigned int high_aru; unsigned int i; addr = (struct srp_addr *)instance->commit_token->end_of_commit_token; memb_list = (struct memb_commit_token_memb_entry *)(addr + instance->commit_token->addr_entries); memcpy (instance->my_new_memb_list, addr, sizeof (struct srp_addr) * instance->commit_token->addr_entries); instance->my_new_memb_entries = instance->commit_token->addr_entries; memcpy (&memb_list[instance->commit_token->memb_index].ring_id, &instance->my_old_ring_id, sizeof (struct memb_ring_id)); memb_list[instance->commit_token->memb_index].aru = instance->old_ring_state_aru; /* * TODO high delivered is really instance->my_aru, but with safe this * could change? */ instance->my_received_flg = (instance->my_aru == instance->my_high_seq_received); memb_list[instance->commit_token->memb_index].received_flg = instance->my_received_flg; memb_list[instance->commit_token->memb_index].high_delivered = instance->my_high_delivered; /* * find high aru up to current memb_index for all matching ring ids * if any ring id matching memb_index has aru less then high aru set * received flag for that entry to false */ high_aru = memb_list[instance->commit_token->memb_index].aru; for (i = 0; i <= instance->commit_token->memb_index; i++) { if (memcmp (&memb_list[instance->commit_token->memb_index].ring_id, &memb_list[i].ring_id, sizeof (struct memb_ring_id)) == 0) { if (sq_lt_compare (high_aru, memb_list[i].aru)) { high_aru = memb_list[i].aru; } } } for (i = 0; i <= instance->commit_token->memb_index; i++) { if (memcmp (&memb_list[instance->commit_token->memb_index].ring_id, &memb_list[i].ring_id, sizeof (struct memb_ring_id)) == 0) { if (sq_lt_compare (memb_list[i].aru, high_aru)) { memb_list[i].received_flg = 0; if (i == instance->commit_token->memb_index) { instance->my_received_flg = 0; } } } } instance->commit_token->header.nodeid = instance->my_id.addr[0].nodeid; instance->commit_token->memb_index += 1; assert (instance->commit_token->memb_index <= instance->commit_token->addr_entries); assert (instance->commit_token->header.nodeid); } static void memb_state_commit_token_target_set ( struct totemsrp_instance *instance) { struct srp_addr *addr; unsigned int i; addr = (struct srp_addr *)instance->commit_token->end_of_commit_token; for (i = 0; i < instance->totem_config->interface_count; i++) { totemrrp_token_target_set ( instance->totemrrp_context, &addr[instance->commit_token->memb_index % instance->commit_token->addr_entries].addr[i], i); } } static int memb_state_commit_token_send_recovery ( struct totemsrp_instance *instance, struct memb_commit_token *commit_token) { unsigned int commit_token_size; commit_token->token_seq++; commit_token->header.nodeid = instance->my_id.addr[0].nodeid; commit_token_size = sizeof (struct memb_commit_token) + ((sizeof (struct srp_addr) + sizeof (struct memb_commit_token_memb_entry)) * commit_token->addr_entries); /* * Make a copy for retransmission if necessary */ memcpy (instance->orf_token_retransmit, commit_token, commit_token_size); instance->orf_token_retransmit_size = commit_token_size; instance->stats.memb_commit_token_tx++; totemrrp_token_send (instance->totemrrp_context, commit_token, commit_token_size); /* * Request retransmission of the commit token in case it is lost */ reset_token_retransmit_timeout (instance); return (0); } static int memb_state_commit_token_send ( struct totemsrp_instance *instance) { unsigned int commit_token_size; instance->commit_token->token_seq++; instance->commit_token->header.nodeid = instance->my_id.addr[0].nodeid; commit_token_size = sizeof (struct memb_commit_token) + ((sizeof (struct srp_addr) + sizeof (struct memb_commit_token_memb_entry)) * instance->commit_token->addr_entries); /* * Make a copy for retransmission if necessary */ memcpy (instance->orf_token_retransmit, instance->commit_token, commit_token_size); instance->orf_token_retransmit_size = commit_token_size; instance->stats.memb_commit_token_tx++; totemrrp_token_send (instance->totemrrp_context, instance->commit_token, commit_token_size); /* * Request retransmission of the commit token in case it is lost */ reset_token_retransmit_timeout (instance); return (0); } static int memb_lowest_in_config (struct totemsrp_instance *instance) { struct srp_addr token_memb[PROCESSOR_COUNT_MAX]; int token_memb_entries = 0; int i; struct totem_ip_address *lowest_addr; memb_set_subtract (token_memb, &token_memb_entries, instance->my_proc_list, instance->my_proc_list_entries, instance->my_failed_list, instance->my_failed_list_entries); /* * find representative by searching for smallest identifier */ lowest_addr = &token_memb[0].addr[0]; for (i = 1; i < token_memb_entries; i++) { if (totemip_compare(lowest_addr, &token_memb[i].addr[0]) > 0) { totemip_copy (lowest_addr, &token_memb[i].addr[0]); } } return (totemip_compare (lowest_addr, &instance->my_id.addr[0]) == 0); } static int srp_addr_compare (const void *a, const void *b) { const struct srp_addr *srp_a = (const struct srp_addr *)a; const struct srp_addr *srp_b = (const struct srp_addr *)b; return (totemip_compare (&srp_a->addr[0], &srp_b->addr[0])); } static void memb_state_commit_token_create ( struct totemsrp_instance *instance) { struct srp_addr token_memb[PROCESSOR_COUNT_MAX]; struct srp_addr *addr; struct memb_commit_token_memb_entry *memb_list; int token_memb_entries = 0; log_printf (instance->totemsrp_log_level_debug, "Creating commit token because I am the rep."); memb_set_subtract (token_memb, &token_memb_entries, instance->my_proc_list, instance->my_proc_list_entries, instance->my_failed_list, instance->my_failed_list_entries); memset (instance->commit_token, 0, sizeof (struct memb_commit_token)); instance->commit_token->header.type = MESSAGE_TYPE_MEMB_COMMIT_TOKEN; instance->commit_token->header.endian_detector = ENDIAN_LOCAL; instance->commit_token->header.encapsulated = 0; instance->commit_token->header.nodeid = instance->my_id.addr[0].nodeid; assert (instance->commit_token->header.nodeid); totemip_copy(&instance->commit_token->ring_id.rep, &instance->my_id.addr[0]); instance->commit_token->ring_id.seq = instance->token_ring_id_seq + 4; /* * This qsort is necessary to ensure the commit token traverses * the ring in the proper order */ qsort (token_memb, token_memb_entries, sizeof (struct srp_addr), srp_addr_compare); instance->commit_token->memb_index = 0; instance->commit_token->addr_entries = token_memb_entries; addr = (struct srp_addr *)instance->commit_token->end_of_commit_token; memb_list = (struct memb_commit_token_memb_entry *)(addr + instance->commit_token->addr_entries); memcpy (addr, token_memb, token_memb_entries * sizeof (struct srp_addr)); memset (memb_list, 0, sizeof (struct memb_commit_token_memb_entry) * token_memb_entries); } static void memb_join_message_send (struct totemsrp_instance *instance) { char memb_join_data[40000]; struct memb_join *memb_join = (struct memb_join *)memb_join_data; char *addr; unsigned int addr_idx; memb_join->header.type = MESSAGE_TYPE_MEMB_JOIN; memb_join->header.endian_detector = ENDIAN_LOCAL; memb_join->header.encapsulated = 0; memb_join->header.nodeid = instance->my_id.addr[0].nodeid; assert (memb_join->header.nodeid); memb_join->ring_seq = instance->my_ring_id.seq; memb_join->proc_list_entries = instance->my_proc_list_entries; memb_join->failed_list_entries = instance->my_failed_list_entries; srp_addr_copy (&memb_join->system_from, &instance->my_id); /* * This mess adds the joined and failed processor lists into the join * message */ addr = (char *)memb_join; addr_idx = sizeof (struct memb_join); memcpy (&addr[addr_idx], instance->my_proc_list, instance->my_proc_list_entries * sizeof (struct srp_addr)); addr_idx += instance->my_proc_list_entries * sizeof (struct srp_addr); memcpy (&addr[addr_idx], instance->my_failed_list, instance->my_failed_list_entries * sizeof (struct srp_addr)); addr_idx += instance->my_failed_list_entries * sizeof (struct srp_addr); if (instance->totem_config->send_join_timeout) { usleep (random() % (instance->totem_config->send_join_timeout * 1000)); } instance->stats.memb_join_tx++; totemrrp_mcast_flush_send ( instance->totemrrp_context, memb_join, addr_idx); } static void memb_leave_message_send (struct totemsrp_instance *instance) { char memb_join_data[40000]; struct memb_join *memb_join = (struct memb_join *)memb_join_data; char *addr; unsigned int addr_idx; int active_memb_entries; struct srp_addr active_memb[PROCESSOR_COUNT_MAX]; log_printf (instance->totemsrp_log_level_debug, "sending join/leave message"); /* * add us to the failed list, and remove us from * the members list */ memb_set_merge( &instance->my_id, 1, instance->my_failed_list, &instance->my_failed_list_entries); memb_set_subtract (active_memb, &active_memb_entries, instance->my_proc_list, instance->my_proc_list_entries, &instance->my_id, 1); memb_join->header.type = MESSAGE_TYPE_MEMB_JOIN; memb_join->header.endian_detector = ENDIAN_LOCAL; memb_join->header.encapsulated = 0; memb_join->header.nodeid = LEAVE_DUMMY_NODEID; memb_join->ring_seq = instance->my_ring_id.seq; memb_join->proc_list_entries = active_memb_entries; memb_join->failed_list_entries = instance->my_failed_list_entries; srp_addr_copy (&memb_join->system_from, &instance->my_id); memb_join->system_from.addr[0].nodeid = LEAVE_DUMMY_NODEID; // TODO: CC Maybe use the actual join send routine. /* * This mess adds the joined and failed processor lists into the join * message */ addr = (char *)memb_join; addr_idx = sizeof (struct memb_join); memcpy (&addr[addr_idx], active_memb, active_memb_entries * sizeof (struct srp_addr)); addr_idx += active_memb_entries * sizeof (struct srp_addr); memcpy (&addr[addr_idx], instance->my_failed_list, instance->my_failed_list_entries * sizeof (struct srp_addr)); addr_idx += instance->my_failed_list_entries * sizeof (struct srp_addr); if (instance->totem_config->send_join_timeout) { usleep (random() % (instance->totem_config->send_join_timeout * 1000)); } instance->stats.memb_join_tx++; totemrrp_mcast_flush_send ( instance->totemrrp_context, memb_join, addr_idx); } static void memb_merge_detect_transmit (struct totemsrp_instance *instance) { struct memb_merge_detect memb_merge_detect; memb_merge_detect.header.type = MESSAGE_TYPE_MEMB_MERGE_DETECT; memb_merge_detect.header.endian_detector = ENDIAN_LOCAL; memb_merge_detect.header.encapsulated = 0; memb_merge_detect.header.nodeid = instance->my_id.addr[0].nodeid; srp_addr_copy (&memb_merge_detect.system_from, &instance->my_id); memcpy (&memb_merge_detect.ring_id, &instance->my_ring_id, sizeof (struct memb_ring_id)); assert (memb_merge_detect.header.nodeid); instance->stats.memb_merge_detect_tx++; totemrrp_mcast_flush_send (instance->totemrrp_context, &memb_merge_detect, sizeof (struct memb_merge_detect)); } static void memb_ring_id_create_or_load ( struct totemsrp_instance *instance, struct memb_ring_id *memb_ring_id) { int fd; int res = 0; char filename[PATH_MAX]; snprintf (filename, sizeof(filename), "%s/ringid_%s", rundir, totemip_print (&instance->my_id.addr[0])); fd = open (filename, O_RDONLY, 0700); /* * If file can be opened and read, read the ring id */ if (fd != -1) { res = read (fd, &memb_ring_id->seq, sizeof (uint64_t)); close (fd); } /* * If file could not be opened or read, create a new ring id */ if ((fd == -1) || (res != sizeof (uint64_t))) { memb_ring_id->seq = 0; umask(0); fd = open (filename, O_CREAT|O_RDWR, 0700); if (fd != -1) { res = write (fd, &memb_ring_id->seq, sizeof (uint64_t)); close (fd); if (res == -1) { LOGSYS_PERROR (errno, instance->totemsrp_log_level_warning, "Couldn't write ringid file '%s'", filename); } } else { LOGSYS_PERROR (errno, instance->totemsrp_log_level_warning, "Couldn't create ringid file '%s'", filename); } } totemip_copy(&memb_ring_id->rep, &instance->my_id.addr[0]); assert (!totemip_zero_check(&memb_ring_id->rep)); instance->token_ring_id_seq = memb_ring_id->seq; } static void memb_ring_id_set_and_store ( struct totemsrp_instance *instance, const struct memb_ring_id *ring_id) { char filename[256]; int fd; int res; memcpy (&instance->my_ring_id, ring_id, sizeof (struct memb_ring_id)); snprintf (filename, sizeof(filename), "%s/ringid_%s", rundir, totemip_print (&instance->my_id.addr[0])); fd = open (filename, O_WRONLY, 0777); if (fd == -1) { fd = open (filename, O_CREAT|O_RDWR, 0777); } if (fd == -1) { LOGSYS_PERROR(errno, instance->totemsrp_log_level_warning, "Couldn't store new ring id %llx to stable storage", instance->my_ring_id.seq); assert (0); return; } log_printf (instance->totemsrp_log_level_debug, "Storing new sequence id for ring %llx", instance->my_ring_id.seq); //assert (fd > 0); res = write (fd, &instance->my_ring_id.seq, sizeof (unsigned long long)); assert (res == sizeof (unsigned long long)); close (fd); } int totemsrp_callback_token_create ( void *srp_context, void **handle_out, enum totem_callback_token_type type, int delete, int (*callback_fn) (enum totem_callback_token_type type, const void *), const void *data) { struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context; struct token_callback_instance *callback_handle; token_hold_cancel_send (instance); callback_handle = malloc (sizeof (struct token_callback_instance)); if (callback_handle == 0) { return (-1); } *handle_out = (void *)callback_handle; list_init (&callback_handle->list); callback_handle->callback_fn = callback_fn; callback_handle->data = (void *) data; callback_handle->callback_type = type; callback_handle->delete = delete; switch (type) { case TOTEM_CALLBACK_TOKEN_RECEIVED: list_add (&callback_handle->list, &instance->token_callback_received_listhead); break; case TOTEM_CALLBACK_TOKEN_SENT: list_add (&callback_handle->list, &instance->token_callback_sent_listhead); break; } return (0); } void totemsrp_callback_token_destroy (void *srp_context, void **handle_out) { struct token_callback_instance *h; if (*handle_out) { h = (struct token_callback_instance *)*handle_out; list_del (&h->list); free (h); h = NULL; *handle_out = 0; } } static void token_callbacks_execute ( struct totemsrp_instance *instance, enum totem_callback_token_type type) { struct list_head *list; struct list_head *list_next; struct list_head *callback_listhead = 0; struct token_callback_instance *token_callback_instance; int res; int del; switch (type) { case TOTEM_CALLBACK_TOKEN_RECEIVED: callback_listhead = &instance->token_callback_received_listhead; break; case TOTEM_CALLBACK_TOKEN_SENT: callback_listhead = &instance->token_callback_sent_listhead; break; default: assert (0); } for (list = callback_listhead->next; list != callback_listhead; list = list_next) { token_callback_instance = list_entry (list, struct token_callback_instance, list); list_next = list->next; del = token_callback_instance->delete; if (del == 1) { list_del (list); } res = token_callback_instance->callback_fn ( token_callback_instance->callback_type, token_callback_instance->data); /* * This callback failed to execute, try it again on the next token */ if (res == -1 && del == 1) { list_add (list, callback_listhead); } else if (del) { free (token_callback_instance); } } } /* * Flow control functions */ static unsigned int backlog_get (struct totemsrp_instance *instance) { unsigned int backlog = 0; if (instance->memb_state == MEMB_STATE_OPERATIONAL) { backlog = cs_queue_used (&instance->new_message_queue); } else if (instance->memb_state == MEMB_STATE_RECOVERY) { backlog = cs_queue_used (&instance->retrans_message_queue); } instance->stats.token[instance->stats.latest_token].backlog_calc = backlog; return (backlog); } static int fcc_calculate ( struct totemsrp_instance *instance, struct orf_token *token) { unsigned int transmits_allowed; unsigned int backlog_calc; transmits_allowed = instance->totem_config->max_messages; if (transmits_allowed > instance->totem_config->window_size - token->fcc) { transmits_allowed = instance->totem_config->window_size - token->fcc; } instance->my_cbl = backlog_get (instance); /* * Only do backlog calculation if there is a backlog otherwise * we would result in div by zero */ if (token->backlog + instance->my_cbl - instance->my_pbl) { backlog_calc = (instance->totem_config->window_size * instance->my_pbl) / (token->backlog + instance->my_cbl - instance->my_pbl); if (backlog_calc > 0 && transmits_allowed > backlog_calc) { transmits_allowed = backlog_calc; } } return (transmits_allowed); } /* * don't overflow the RTR sort queue */ static void fcc_rtr_limit ( struct totemsrp_instance *instance, struct orf_token *token, unsigned int *transmits_allowed) { int check = QUEUE_RTR_ITEMS_SIZE_MAX; check -= (*transmits_allowed + instance->totem_config->window_size); assert (check >= 0); if (sq_lt_compare (instance->last_released + QUEUE_RTR_ITEMS_SIZE_MAX - *transmits_allowed - instance->totem_config->window_size, token->seq)) { *transmits_allowed = 0; } } static void fcc_token_update ( struct totemsrp_instance *instance, struct orf_token *token, unsigned int msgs_transmitted) { token->fcc += msgs_transmitted - instance->my_trc; token->backlog += instance->my_cbl - instance->my_pbl; instance->my_trc = msgs_transmitted; instance->my_pbl = instance->my_cbl; } /* * Message Handlers */ unsigned long long int tv_old; /* * message handler called when TOKEN message type received */ static int message_handler_orf_token ( struct totemsrp_instance *instance, const void *msg, size_t msg_len, int endian_conversion_needed) { char token_storage[1500]; char token_convert[1500]; struct orf_token *token = NULL; int forward_token; unsigned int transmits_allowed; unsigned int mcasted_retransmit; unsigned int mcasted_regular; unsigned int last_aru; #ifdef GIVEINFO unsigned long long tv_current; unsigned long long tv_diff; tv_current = qb_util_nano_current_get (); tv_diff = tv_current - tv_old; tv_old = tv_current; log_printf (instance->totemsrp_log_level_debug, "Time since last token %0.4f ms", ((float)tv_diff) / 1000000.0); #endif if (instance->orf_token_discard) { return (0); } #ifdef TEST_DROP_ORF_TOKEN_PERCENTAGE if (random()%100 < TEST_DROP_ORF_TOKEN_PERCENTAGE) { return (0); } #endif if (endian_conversion_needed) { orf_token_endian_convert ((struct orf_token *)msg, (struct orf_token *)token_convert); msg = (struct orf_token *)token_convert; } /* * Make copy of token and retransmit list in case we have * to flush incoming messages from the kernel queue */ token = (struct orf_token *)token_storage; memcpy (token, msg, sizeof (struct orf_token)); memcpy (&token->rtr_list[0], (char *)msg + sizeof (struct orf_token), sizeof (struct rtr_item) * RETRANSMIT_ENTRIES_MAX); /* * Handle merge detection timeout */ if (token->seq == instance->my_last_seq) { start_merge_detect_timeout (instance); instance->my_seq_unchanged += 1; } else { cancel_merge_detect_timeout (instance); cancel_token_hold_retransmit_timeout (instance); instance->my_seq_unchanged = 0; } instance->my_last_seq = token->seq; #ifdef TEST_RECOVERY_MSG_COUNT if (instance->memb_state == MEMB_STATE_OPERATIONAL && token->seq > TEST_RECOVERY_MSG_COUNT) { return (0); } #endif totemrrp_recv_flush (instance->totemrrp_context); /* * Determine if we should hold (in reality drop) the token */ instance->my_token_held = 0; if (totemip_equal(&instance->my_ring_id.rep, &instance->my_id.addr[0]) && instance->my_seq_unchanged > instance->totem_config->seqno_unchanged_const) { instance->my_token_held = 1; } else if (!totemip_equal(&instance->my_ring_id.rep, &instance->my_id.addr[0]) && instance->my_seq_unchanged >= instance->totem_config->seqno_unchanged_const) { instance->my_token_held = 1; } /* * Hold onto token when there is no activity on ring and * this processor is the ring rep */ forward_token = 1; if (totemip_equal(&instance->my_ring_id.rep, &instance->my_id.addr[0])) { if (instance->my_token_held) { forward_token = 0; } } token_callbacks_execute (instance, TOTEM_CALLBACK_TOKEN_RECEIVED); switch (instance->memb_state) { case MEMB_STATE_COMMIT: /* Discard token */ break; case MEMB_STATE_OPERATIONAL: messages_free (instance, token->aru); /* * Do NOT add break, this case should also execute code in gather case. */ case MEMB_STATE_GATHER: /* * DO NOT add break, we use different free mechanism in recovery state */ case MEMB_STATE_RECOVERY: /* * Discard tokens from another configuration */ if (memcmp (&token->ring_id, &instance->my_ring_id, sizeof (struct memb_ring_id)) != 0) { if ((forward_token) && instance->use_heartbeat) { reset_heartbeat_timeout(instance); } else { cancel_heartbeat_timeout(instance); } return (0); /* discard token */ } /* * Discard retransmitted tokens */ if (sq_lte_compare (token->token_seq, instance->my_token_seq)) { return (0); /* discard token */ } last_aru = instance->my_last_aru; instance->my_last_aru = token->aru; transmits_allowed = fcc_calculate (instance, token); mcasted_retransmit = orf_token_rtr (instance, token, &transmits_allowed); fcc_rtr_limit (instance, token, &transmits_allowed); mcasted_regular = orf_token_mcast (instance, token, transmits_allowed); /* if (mcasted_regular) { printf ("mcasted regular %d\n", mcasted_regular); printf ("token seq %d\n", token->seq); } */ fcc_token_update (instance, token, mcasted_retransmit + mcasted_regular); if (sq_lt_compare (instance->my_aru, token->aru) || instance->my_id.addr[0].nodeid == token->aru_addr || token->aru_addr == 0) { token->aru = instance->my_aru; if (token->aru == token->seq) { token->aru_addr = 0; } else { token->aru_addr = instance->my_id.addr[0].nodeid; } } if (token->aru == last_aru && token->aru_addr != 0) { instance->my_aru_count += 1; } else { instance->my_aru_count = 0; } if (instance->my_aru_count > instance->totem_config->fail_to_recv_const && token->aru_addr == instance->my_id.addr[0].nodeid) { log_printf (instance->totemsrp_log_level_error, "FAILED TO RECEIVE"); instance->failed_to_recv = 1; memb_set_merge (&instance->my_id, 1, instance->my_failed_list, &instance->my_failed_list_entries); memb_state_gather_enter (instance, 6); } else { instance->my_token_seq = token->token_seq; token->token_seq += 1; if (instance->memb_state == MEMB_STATE_RECOVERY) { /* * instance->my_aru == instance->my_high_seq_received means this processor * has recovered all messages it can recover * (ie: its retrans queue is empty) */ if (cs_queue_is_empty (&instance->retrans_message_queue) == 0) { if (token->retrans_flg == 0) { token->retrans_flg = 1; instance->my_set_retrans_flg = 1; } } else if (token->retrans_flg == 1 && instance->my_set_retrans_flg) { token->retrans_flg = 0; instance->my_set_retrans_flg = 0; } log_printf (instance->totemsrp_log_level_debug, "token retrans flag is %d my set retrans flag%d retrans queue empty %d count %d, aru %x", token->retrans_flg, instance->my_set_retrans_flg, cs_queue_is_empty (&instance->retrans_message_queue), instance->my_retrans_flg_count, token->aru); if (token->retrans_flg == 0) { instance->my_retrans_flg_count += 1; } else { instance->my_retrans_flg_count = 0; } if (instance->my_retrans_flg_count == 2) { instance->my_install_seq = token->seq; } log_printf (instance->totemsrp_log_level_debug, "install seq %x aru %x high seq received %x", instance->my_install_seq, instance->my_aru, instance->my_high_seq_received); if (instance->my_retrans_flg_count >= 2 && instance->my_received_flg == 0 && sq_lte_compare (instance->my_install_seq, instance->my_aru)) { instance->my_received_flg = 1; instance->my_deliver_memb_entries = instance->my_trans_memb_entries; memcpy (instance->my_deliver_memb_list, instance->my_trans_memb_list, sizeof (struct totem_ip_address) * instance->my_trans_memb_entries); } if (instance->my_retrans_flg_count >= 3 && sq_lte_compare (instance->my_install_seq, token->aru)) { instance->my_rotation_counter += 1; } else { instance->my_rotation_counter = 0; } if (instance->my_rotation_counter == 2) { log_printf (instance->totemsrp_log_level_debug, "retrans flag count %x token aru %x install seq %x aru %x %x", instance->my_retrans_flg_count, token->aru, instance->my_install_seq, instance->my_aru, token->seq); memb_state_operational_enter (instance); instance->my_rotation_counter = 0; instance->my_retrans_flg_count = 0; } } totemrrp_send_flush (instance->totemrrp_context); token_send (instance, token, forward_token); #ifdef GIVEINFO tv_current = qb_util_nano_current_get (); tv_diff = tv_current - tv_old; tv_old = tv_current; log_printf (instance->totemsrp_log_level_debug, "I held %0.4f ms", ((float)tv_diff) / 1000000.0); #endif if (instance->memb_state == MEMB_STATE_OPERATIONAL) { messages_deliver_to_app (instance, 0, instance->my_high_seq_received); } /* * Deliver messages after token has been transmitted * to improve performance */ reset_token_timeout (instance); // REVIEWED reset_token_retransmit_timeout (instance); // REVIEWED if (totemip_equal(&instance->my_id.addr[0], &instance->my_ring_id.rep) && instance->my_token_held == 1) { start_token_hold_retransmit_timeout (instance); } token_callbacks_execute (instance, TOTEM_CALLBACK_TOKEN_SENT); } break; } if ((forward_token) && instance->use_heartbeat) { reset_heartbeat_timeout(instance); } else { cancel_heartbeat_timeout(instance); } return (0); } static void messages_deliver_to_app ( struct totemsrp_instance *instance, int skip, unsigned int end_point) { struct sort_queue_item *sort_queue_item_p; unsigned int i; int res; struct mcast *mcast_in; struct mcast mcast_header; unsigned int range = 0; int endian_conversion_required; unsigned int my_high_delivered_stored = 0; range = end_point - instance->my_high_delivered; if (range) { log_printf (instance->totemsrp_log_level_debug, "Delivering %x to %x", instance->my_high_delivered, end_point); } assert (range < QUEUE_RTR_ITEMS_SIZE_MAX); my_high_delivered_stored = instance->my_high_delivered; /* * Deliver messages in order from rtr queue to pending delivery queue */ for (i = 1; i <= range; i++) { void *ptr = 0; /* * If out of range of sort queue, stop assembly */ res = sq_in_range (&instance->regular_sort_queue, my_high_delivered_stored + i); if (res == 0) { break; } res = sq_item_get (&instance->regular_sort_queue, my_high_delivered_stored + i, &ptr); /* * If hole, stop assembly */ if (res != 0 && skip == 0) { break; } instance->my_high_delivered = my_high_delivered_stored + i; if (res != 0) { continue; } sort_queue_item_p = ptr; mcast_in = sort_queue_item_p->mcast; assert (mcast_in != (struct mcast *)0xdeadbeef); endian_conversion_required = 0; if (mcast_in->header.endian_detector != ENDIAN_LOCAL) { endian_conversion_required = 1; mcast_endian_convert (mcast_in, &mcast_header); } else { memcpy (&mcast_header, mcast_in, sizeof (struct mcast)); } /* * Skip messages not originated in instance->my_deliver_memb */ if (skip && memb_set_subset (&mcast_header.system_from, 1, instance->my_deliver_memb_list, instance->my_deliver_memb_entries) == 0) { instance->my_high_delivered = my_high_delivered_stored + i; continue; } /* * Message found */ log_printf (instance->totemsrp_log_level_debug, "Delivering MCAST message with seq %x to pending delivery queue", mcast_header.seq); /* * Message is locally originated multicast */ instance->totemsrp_deliver_fn ( mcast_header.header.nodeid, ((char *)sort_queue_item_p->mcast) + sizeof (struct mcast), sort_queue_item_p->msg_len - sizeof (struct mcast), endian_conversion_required); } } /* * recv message handler called when MCAST message type received */ static int message_handler_mcast ( struct totemsrp_instance *instance, const void *msg, size_t msg_len, int endian_conversion_needed) { struct sort_queue_item sort_queue_item; struct sq *sort_queue; struct mcast mcast_header; if (endian_conversion_needed) { mcast_endian_convert (msg, &mcast_header); } else { memcpy (&mcast_header, msg, sizeof (struct mcast)); } if (mcast_header.header.encapsulated == MESSAGE_ENCAPSULATED) { sort_queue = &instance->recovery_sort_queue; } else { sort_queue = &instance->regular_sort_queue; } assert (msg_len <= FRAME_SIZE_MAX); #ifdef TEST_DROP_MCAST_PERCENTAGE if (random()%100 < TEST_DROP_MCAST_PERCENTAGE) { return (0); } #endif /* * If the message is foreign execute the switch below */ if (memcmp (&instance->my_ring_id, &mcast_header.ring_id, sizeof (struct memb_ring_id)) != 0) { switch (instance->memb_state) { case MEMB_STATE_OPERATIONAL: memb_set_merge ( &mcast_header.system_from, 1, instance->my_proc_list, &instance->my_proc_list_entries); memb_state_gather_enter (instance, 7); break; case MEMB_STATE_GATHER: if (!memb_set_subset ( &mcast_header.system_from, 1, instance->my_proc_list, instance->my_proc_list_entries)) { memb_set_merge (&mcast_header.system_from, 1, instance->my_proc_list, &instance->my_proc_list_entries); memb_state_gather_enter (instance, 8); return (0); } break; case MEMB_STATE_COMMIT: /* discard message */ instance->stats.rx_msg_dropped++; break; case MEMB_STATE_RECOVERY: /* discard message */ instance->stats.rx_msg_dropped++; break; } return (0); } log_printf (instance->totemsrp_log_level_debug, "Received ringid(%s:%lld) seq %x", totemip_print (&mcast_header.ring_id.rep), mcast_header.ring_id.seq, mcast_header.seq); /* * Add mcast message to rtr queue if not already in rtr queue * otherwise free io vectors */ if (msg_len > 0 && msg_len <= FRAME_SIZE_MAX && sq_in_range (sort_queue, mcast_header.seq) && sq_item_inuse (sort_queue, mcast_header.seq) == 0) { /* * Allocate new multicast memory block */ // TODO LEAK sort_queue_item.mcast = totemsrp_buffer_alloc (instance); if (sort_queue_item.mcast == NULL) { return (-1); /* error here is corrected by the algorithm */ } memcpy (sort_queue_item.mcast, msg, msg_len); sort_queue_item.msg_len = msg_len; if (sq_lt_compare (instance->my_high_seq_received, mcast_header.seq)) { instance->my_high_seq_received = mcast_header.seq; } sq_item_add (sort_queue, &sort_queue_item, mcast_header.seq); } update_aru (instance); if (instance->memb_state == MEMB_STATE_OPERATIONAL) { messages_deliver_to_app (instance, 0, instance->my_high_seq_received); } /* TODO remove from retrans message queue for old ring in recovery state */ return (0); } static int message_handler_memb_merge_detect ( struct totemsrp_instance *instance, const void *msg, size_t msg_len, int endian_conversion_needed) { struct memb_merge_detect memb_merge_detect; if (endian_conversion_needed) { memb_merge_detect_endian_convert (msg, &memb_merge_detect); } else { memcpy (&memb_merge_detect, msg, sizeof (struct memb_merge_detect)); } /* * do nothing if this is a merge detect from this configuration */ if (memcmp (&instance->my_ring_id, &memb_merge_detect.ring_id, sizeof (struct memb_ring_id)) == 0) { return (0); } /* * Execute merge operation */ switch (instance->memb_state) { case MEMB_STATE_OPERATIONAL: memb_set_merge (&memb_merge_detect.system_from, 1, instance->my_proc_list, &instance->my_proc_list_entries); memb_state_gather_enter (instance, 9); break; case MEMB_STATE_GATHER: if (!memb_set_subset ( &memb_merge_detect.system_from, 1, instance->my_proc_list, instance->my_proc_list_entries)) { memb_set_merge (&memb_merge_detect.system_from, 1, instance->my_proc_list, &instance->my_proc_list_entries); memb_state_gather_enter (instance, 10); return (0); } break; case MEMB_STATE_COMMIT: /* do nothing in commit */ break; case MEMB_STATE_RECOVERY: /* do nothing in recovery */ break; } return (0); } static void memb_join_process ( struct totemsrp_instance *instance, const struct memb_join *memb_join) { struct srp_addr *proc_list; struct srp_addr *failed_list; int gather_entered = 0; int fail_minus_memb_entries = 0; struct srp_addr fail_minus_memb[PROCESSOR_COUNT_MAX]; proc_list = (struct srp_addr *)memb_join->end_of_memb_join; failed_list = proc_list + memb_join->proc_list_entries; /* memb_set_print ("proclist", proc_list, memb_join->proc_list_entries); memb_set_print ("faillist", failed_list, memb_join->failed_list_entries); memb_set_print ("my_proclist", instance->my_proc_list, instance->my_proc_list_entries); memb_set_print ("my_faillist", instance->my_failed_list, instance->my_failed_list_entries); -*/ if (memb_set_equal (proc_list, memb_join->proc_list_entries, instance->my_proc_list, instance->my_proc_list_entries) && memb_set_equal (failed_list, memb_join->failed_list_entries, instance->my_failed_list, instance->my_failed_list_entries)) { memb_consensus_set (instance, &memb_join->system_from); if (memb_consensus_agreed (instance) && instance->failed_to_recv == 1) { instance->failed_to_recv = 0; srp_addr_copy (&instance->my_proc_list[0], &instance->my_id); instance->my_proc_list_entries = 1; instance->my_failed_list_entries = 0; memb_state_commit_token_create (instance); memb_state_commit_enter (instance); return; } if (memb_consensus_agreed (instance) && memb_lowest_in_config (instance)) { memb_state_commit_token_create (instance); memb_state_commit_enter (instance); } else { goto out; } } else if (memb_set_subset (proc_list, memb_join->proc_list_entries, instance->my_proc_list, instance->my_proc_list_entries) && memb_set_subset (failed_list, memb_join->failed_list_entries, instance->my_failed_list, instance->my_failed_list_entries)) { goto out; } else if (memb_set_subset (&memb_join->system_from, 1, instance->my_failed_list, instance->my_failed_list_entries)) { goto out; } else { memb_set_merge (proc_list, memb_join->proc_list_entries, instance->my_proc_list, &instance->my_proc_list_entries); if (memb_set_subset ( &instance->my_id, 1, failed_list, memb_join->failed_list_entries)) { memb_set_merge ( &memb_join->system_from, 1, instance->my_failed_list, &instance->my_failed_list_entries); } else { if (memb_set_subset ( &memb_join->system_from, 1, instance->my_memb_list, instance->my_memb_entries)) { if (memb_set_subset ( &memb_join->system_from, 1, instance->my_failed_list, instance->my_failed_list_entries) == 0) { memb_set_merge (failed_list, memb_join->failed_list_entries, instance->my_failed_list, &instance->my_failed_list_entries); } else { memb_set_subtract (fail_minus_memb, &fail_minus_memb_entries, failed_list, memb_join->failed_list_entries, instance->my_memb_list, instance->my_memb_entries); memb_set_merge (fail_minus_memb, fail_minus_memb_entries, instance->my_failed_list, &instance->my_failed_list_entries); } } } memb_state_gather_enter (instance, 11); gather_entered = 1; } out: if (gather_entered == 0 && instance->memb_state == MEMB_STATE_OPERATIONAL) { memb_state_gather_enter (instance, 12); } } static void memb_join_endian_convert (const struct memb_join *in, struct memb_join *out) { int i; struct srp_addr *in_proc_list; struct srp_addr *in_failed_list; struct srp_addr *out_proc_list; struct srp_addr *out_failed_list; out->header.type = in->header.type; out->header.endian_detector = ENDIAN_LOCAL; out->header.nodeid = swab32 (in->header.nodeid); srp_addr_copy_endian_convert (&out->system_from, &in->system_from); out->proc_list_entries = swab32 (in->proc_list_entries); out->failed_list_entries = swab32 (in->failed_list_entries); out->ring_seq = swab64 (in->ring_seq); in_proc_list = (struct srp_addr *)in->end_of_memb_join; in_failed_list = in_proc_list + out->proc_list_entries; out_proc_list = (struct srp_addr *)out->end_of_memb_join; out_failed_list = out_proc_list + out->proc_list_entries; for (i = 0; i < out->proc_list_entries; i++) { srp_addr_copy_endian_convert (&out_proc_list[i], &in_proc_list[i]); } for (i = 0; i < out->failed_list_entries; i++) { srp_addr_copy_endian_convert (&out_failed_list[i], &in_failed_list[i]); } } static void memb_commit_token_endian_convert (const struct memb_commit_token *in, struct memb_commit_token *out) { int i; struct srp_addr *in_addr = (struct srp_addr *)in->end_of_commit_token; struct srp_addr *out_addr = (struct srp_addr *)out->end_of_commit_token; struct memb_commit_token_memb_entry *in_memb_list; struct memb_commit_token_memb_entry *out_memb_list; out->header.type = in->header.type; out->header.endian_detector = ENDIAN_LOCAL; out->header.nodeid = swab32 (in->header.nodeid); out->token_seq = swab32 (in->token_seq); totemip_copy_endian_convert(&out->ring_id.rep, &in->ring_id.rep); out->ring_id.seq = swab64 (in->ring_id.seq); out->retrans_flg = swab32 (in->retrans_flg); out->memb_index = swab32 (in->memb_index); out->addr_entries = swab32 (in->addr_entries); in_memb_list = (struct memb_commit_token_memb_entry *)(in_addr + out->addr_entries); out_memb_list = (struct memb_commit_token_memb_entry *)(out_addr + out->addr_entries); for (i = 0; i < out->addr_entries; i++) { srp_addr_copy_endian_convert (&out_addr[i], &in_addr[i]); /* * Only convert the memb entry if it has been set */ if (in_memb_list[i].ring_id.rep.family != 0) { totemip_copy_endian_convert (&out_memb_list[i].ring_id.rep, &in_memb_list[i].ring_id.rep); out_memb_list[i].ring_id.seq = swab64 (in_memb_list[i].ring_id.seq); out_memb_list[i].aru = swab32 (in_memb_list[i].aru); out_memb_list[i].high_delivered = swab32 (in_memb_list[i].high_delivered); out_memb_list[i].received_flg = swab32 (in_memb_list[i].received_flg); } } } static void orf_token_endian_convert (const struct orf_token *in, struct orf_token *out) { int i; out->header.type = in->header.type; out->header.endian_detector = ENDIAN_LOCAL; out->header.nodeid = swab32 (in->header.nodeid); out->seq = swab32 (in->seq); out->token_seq = swab32 (in->token_seq); out->aru = swab32 (in->aru); totemip_copy_endian_convert(&out->ring_id.rep, &in->ring_id.rep); out->aru_addr = swab32(in->aru_addr); out->ring_id.seq = swab64 (in->ring_id.seq); out->fcc = swab32 (in->fcc); out->backlog = swab32 (in->backlog); out->retrans_flg = swab32 (in->retrans_flg); out->rtr_list_entries = swab32 (in->rtr_list_entries); for (i = 0; i < out->rtr_list_entries; i++) { totemip_copy_endian_convert(&out->rtr_list[i].ring_id.rep, &in->rtr_list[i].ring_id.rep); out->rtr_list[i].ring_id.seq = swab64 (in->rtr_list[i].ring_id.seq); out->rtr_list[i].seq = swab32 (in->rtr_list[i].seq); } } static void mcast_endian_convert (const struct mcast *in, struct mcast *out) { out->header.type = in->header.type; out->header.endian_detector = ENDIAN_LOCAL; out->header.nodeid = swab32 (in->header.nodeid); out->header.encapsulated = in->header.encapsulated; out->seq = swab32 (in->seq); out->this_seqno = swab32 (in->this_seqno); totemip_copy_endian_convert(&out->ring_id.rep, &in->ring_id.rep); out->ring_id.seq = swab64 (in->ring_id.seq); out->node_id = swab32 (in->node_id); out->guarantee = swab32 (in->guarantee); srp_addr_copy_endian_convert (&out->system_from, &in->system_from); } static void memb_merge_detect_endian_convert ( const struct memb_merge_detect *in, struct memb_merge_detect *out) { out->header.type = in->header.type; out->header.endian_detector = ENDIAN_LOCAL; out->header.nodeid = swab32 (in->header.nodeid); totemip_copy_endian_convert(&out->ring_id.rep, &in->ring_id.rep); out->ring_id.seq = swab64 (in->ring_id.seq); srp_addr_copy_endian_convert (&out->system_from, &in->system_from); } static int message_handler_memb_join ( struct totemsrp_instance *instance, const void *msg, size_t msg_len, int endian_conversion_needed) { const struct memb_join *memb_join; struct memb_join *memb_join_convert = alloca (msg_len); if (endian_conversion_needed) { memb_join = memb_join_convert; memb_join_endian_convert (msg, memb_join_convert); } else { memb_join = msg; } /* * If the process paused because it wasn't scheduled in a timely * fashion, flush the join messages because they may be queued * entries */ if (pause_flush (instance)) { return (0); } if (instance->token_ring_id_seq < memb_join->ring_seq) { instance->token_ring_id_seq = memb_join->ring_seq; } switch (instance->memb_state) { case MEMB_STATE_OPERATIONAL: memb_join_process (instance, memb_join); break; case MEMB_STATE_GATHER: memb_join_process (instance, memb_join); break; case MEMB_STATE_COMMIT: if (memb_set_subset (&memb_join->system_from, 1, instance->my_new_memb_list, instance->my_new_memb_entries) && memb_join->ring_seq >= instance->my_ring_id.seq) { memb_join_process (instance, memb_join); memb_state_gather_enter (instance, 13); } break; case MEMB_STATE_RECOVERY: if (memb_set_subset (&memb_join->system_from, 1, instance->my_new_memb_list, instance->my_new_memb_entries) && memb_join->ring_seq >= instance->my_ring_id.seq) { memb_join_process (instance, memb_join); memb_recovery_state_token_loss (instance); memb_state_gather_enter (instance, 14); } break; } return (0); } static int message_handler_memb_commit_token ( struct totemsrp_instance *instance, const void *msg, size_t msg_len, int endian_conversion_needed) { struct memb_commit_token *memb_commit_token_convert = alloca (msg_len); struct memb_commit_token *memb_commit_token; struct srp_addr sub[PROCESSOR_COUNT_MAX]; int sub_entries; struct srp_addr *addr; log_printf (instance->totemsrp_log_level_debug, "got commit token"); if (endian_conversion_needed) { memb_commit_token_endian_convert (msg, memb_commit_token_convert); } else { memcpy (memb_commit_token_convert, msg, msg_len); } memb_commit_token = memb_commit_token_convert; addr = (struct srp_addr *)memb_commit_token->end_of_commit_token; #ifdef TEST_DROP_COMMIT_TOKEN_PERCENTAGE if (random()%100 < TEST_DROP_COMMIT_TOKEN_PERCENTAGE) { return (0); } #endif switch (instance->memb_state) { case MEMB_STATE_OPERATIONAL: /* discard token */ break; case MEMB_STATE_GATHER: memb_set_subtract (sub, &sub_entries, instance->my_proc_list, instance->my_proc_list_entries, instance->my_failed_list, instance->my_failed_list_entries); if (memb_set_equal (addr, memb_commit_token->addr_entries, sub, sub_entries) && memb_commit_token->ring_id.seq > instance->my_ring_id.seq) { memcpy (instance->commit_token, memb_commit_token, msg_len); memb_state_commit_enter (instance); } break; case MEMB_STATE_COMMIT: /* * If retransmitted commit tokens are sent on this ring * filter them out and only enter recovery once the * commit token has traversed the array. This is * determined by : * memb_commit_token->memb_index == memb_commit_token->addr_entries) { */ if (memb_commit_token->ring_id.seq == instance->my_ring_id.seq && memb_commit_token->memb_index == memb_commit_token->addr_entries) { memb_state_recovery_enter (instance, memb_commit_token); } break; case MEMB_STATE_RECOVERY: if (totemip_equal (&instance->my_id.addr[0], &instance->my_ring_id.rep)) { log_printf (instance->totemsrp_log_level_debug, "Sending initial ORF token"); // TODO convert instead of initiate orf_token_send_initial (instance); reset_token_timeout (instance); // REVIEWED reset_token_retransmit_timeout (instance); // REVIEWED } break; } return (0); } static int message_handler_token_hold_cancel ( struct totemsrp_instance *instance, const void *msg, size_t msg_len, int endian_conversion_needed) { const struct token_hold_cancel *token_hold_cancel = msg; if (memcmp (&token_hold_cancel->ring_id, &instance->my_ring_id, sizeof (struct memb_ring_id)) == 0) { instance->my_seq_unchanged = 0; if (totemip_equal(&instance->my_ring_id.rep, &instance->my_id.addr[0])) { timer_function_token_retransmit_timeout (instance); } } return (0); } void main_deliver_fn ( void *context, const void *msg, unsigned int msg_len) { struct totemsrp_instance *instance = context; const struct message_header *message_header = msg; if (msg_len < sizeof (struct message_header)) { log_printf (instance->totemsrp_log_level_security, "Received message is too short... ignoring %u.", (unsigned int)msg_len); return; } switch (message_header->type) { case MESSAGE_TYPE_ORF_TOKEN: instance->stats.orf_token_rx++; break; case MESSAGE_TYPE_MCAST: instance->stats.mcast_rx++; break; case MESSAGE_TYPE_MEMB_MERGE_DETECT: instance->stats.memb_merge_detect_rx++; break; case MESSAGE_TYPE_MEMB_JOIN: instance->stats.memb_join_rx++; break; case MESSAGE_TYPE_MEMB_COMMIT_TOKEN: instance->stats.memb_commit_token_rx++; break; case MESSAGE_TYPE_TOKEN_HOLD_CANCEL: instance->stats.token_hold_cancel_rx++; break; default: log_printf (instance->totemsrp_log_level_security, "Type of received message is wrong... ignoring %d.\n", (int)message_header->type); printf ("wrong message type\n"); instance->stats.rx_msg_dropped++; return; } /* * Handle incoming message */ totemsrp_message_handlers.handler_functions[(int)message_header->type] ( instance, msg, msg_len, message_header->endian_detector != ENDIAN_LOCAL); } void main_iface_change_fn ( void *context, const struct totem_ip_address *iface_addr, unsigned int iface_no) { struct totemsrp_instance *instance = context; int i; totemip_copy (&instance->my_id.addr[iface_no], iface_addr); assert (instance->my_id.addr[iface_no].nodeid); totemip_copy (&instance->my_memb_list[0].addr[iface_no], iface_addr); if (instance->iface_changes++ == 0) { memb_ring_id_create_or_load (instance, &instance->my_ring_id); log_printf ( instance->totemsrp_log_level_debug, "Created or loaded sequence id %llx.%s for this ring.", instance->my_ring_id.seq, totemip_print (&instance->my_ring_id.rep)); if (instance->totemsrp_service_ready_fn) { instance->totemsrp_service_ready_fn (); } } for (i = 0; i < instance->totem_config->interfaces[iface_no].member_count; i++) { totemsrp_member_add (instance, &instance->totem_config->interfaces[iface_no].member_list[i], iface_no); } if (instance->iface_changes >= instance->totem_config->interface_count) { memb_state_gather_enter (instance, 15); } } void totemsrp_net_mtu_adjust (struct totem_config *totem_config) { totem_config->net_mtu -= sizeof (struct mcast); } void totemsrp_service_ready_register ( void *context, void (*totem_service_ready) (void)) { struct totemsrp_instance *instance = (struct totemsrp_instance *)context; instance->totemsrp_service_ready_fn = totem_service_ready; } int totemsrp_member_add ( void *context, const struct totem_ip_address *member, int ring_no) { struct totemsrp_instance *instance = (struct totemsrp_instance *)context; int res; res = totemrrp_member_add (instance->totemrrp_context, member, ring_no); return (res); } int totemsrp_member_remove ( void *context, const struct totem_ip_address *member, int ring_no) { struct totemsrp_instance *instance = (struct totemsrp_instance *)context; int res; res = totemrrp_member_remove (instance->totemrrp_context, member, ring_no); return (res); } void totemsrp_threaded_mode_enable (void *context) { struct totemsrp_instance *instance = (struct totemsrp_instance *)context; instance->threaded_mode_enabled = 1; } diff --git a/exec/totemudp.c b/exec/totemudp.c index 0fe52aa1..407df30e 100644 --- a/exec/totemudp.c +++ b/exec/totemudp.c @@ -1,1939 +1,1745 @@ /* * Copyright (c) 2005 MontaVista Software, Inc. * Copyright (c) 2006-2012 Red Hat, Inc. * * All rights reserved. * * Author: Steven Dake (sdake@redhat.com) * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define LOGSYS_UTILS_ONLY 1 #include #include "totemudp.h" -#include "crypto.h" #include "util.h" -#ifdef HAVE_LIBNSS #include #include #include #include -#endif #ifndef MSG_NOSIGNAL #define MSG_NOSIGNAL 0 #endif #define MCAST_SOCKET_BUFFER_SIZE (TRANSMITS_ALLOWED * FRAME_SIZE_MAX) #define NETIF_STATE_REPORT_UP 1 #define NETIF_STATE_REPORT_DOWN 2 #define BIND_STATE_UNBOUND 0 #define BIND_STATE_REGULAR 1 #define BIND_STATE_LOOPBACK 2 #define MESSAGE_TYPE_MCAST 1 #define HMAC_HASH_SIZE 20 struct security_header { unsigned char hash_digest[HMAC_HASH_SIZE]; /* The hash *MUST* be first in the data structure */ unsigned char salt[16]; /* random number */ char msg[0]; } __attribute__((packed)); -struct totemudp_mcast_thread_state { - unsigned char iobuf[FRAME_SIZE_MAX]; - prng_state prng_state; -}; - struct totemudp_socket { int mcast_recv; int mcast_send; int token; }; struct totemudp_instance { - hmac_state totemudp_hmac_state; - - prng_state totemudp_prng_state; - -#ifdef HAVE_LIBNSS PK11SymKey *nss_sym_key; PK11SymKey *nss_sym_key_sign; -#endif unsigned char totemudp_private_key[1024]; unsigned int totemudp_private_key_len; qb_loop_t *totemudp_poll_handle; struct totem_interface *totem_interface; int netif_state_report; int netif_bind_state; void *context; void (*totemudp_deliver_fn) ( void *context, const void *msg, unsigned int msg_len); void (*totemudp_iface_change_fn) ( void *context, const struct totem_ip_address *iface_address); void (*totemudp_target_set_completed) (void *context); /* * Function and data used to log messages */ int totemudp_log_level_security; int totemudp_log_level_error; int totemudp_log_level_warning; int totemudp_log_level_notice; int totemudp_log_level_debug; int totemudp_subsys_id; void (*totemudp_log_printf) ( int level, int subsys, const char *function, const char *file, int line, const char *format, ...)__attribute__((format(printf, 6, 7))); void *udp_context; char iov_buffer[FRAME_SIZE_MAX]; char iov_buffer_flush[FRAME_SIZE_MAX]; struct iovec totemudp_iov_recv; struct iovec totemudp_iov_recv_flush; struct totemudp_socket totemudp_sockets; struct totem_ip_address mcast_address; int stats_sent; int stats_recv; int stats_delv; int stats_remcasts; int stats_orf_token; struct timeval stats_tv_start; struct totem_ip_address my_id; int firstrun; qb_loop_timer_handle timer_netif_check_timeout; unsigned int my_memb_entries; int flushing; struct totem_config *totem_config; struct totem_ip_address token_target; }; struct work_item { const void *msg; unsigned int msg_len; struct totemudp_instance *instance; }; static int totemudp_build_sockets ( struct totemudp_instance *instance, struct totem_ip_address *bindnet_address, struct totem_ip_address *mcastaddress, struct totemudp_socket *sockets, struct totem_ip_address *bound_to); static struct totem_ip_address localhost; static void totemudp_instance_initialize (struct totemudp_instance *instance) { memset (instance, 0, sizeof (struct totemudp_instance)); instance->netif_state_report = NETIF_STATE_REPORT_UP | NETIF_STATE_REPORT_DOWN; instance->totemudp_iov_recv.iov_base = instance->iov_buffer; instance->totemudp_iov_recv.iov_len = FRAME_SIZE_MAX; //sizeof (instance->iov_buffer); instance->totemudp_iov_recv_flush.iov_base = instance->iov_buffer_flush; instance->totemudp_iov_recv_flush.iov_len = FRAME_SIZE_MAX; //sizeof (instance->iov_buffer); /* * There is always atleast 1 processor */ instance->my_memb_entries = 1; } #define log_printf(level, format, args...) \ do { \ instance->totemudp_log_printf ( \ level, instance->totemudp_subsys_id, \ __FUNCTION__, __FILE__, __LINE__, \ (const char *)format, ##args); \ } while (0); #define LOGSYS_PERROR(err_num, level, fmt, args...) \ do { \ char _error_str[LOGSYS_MAX_PERROR_MSG_LEN]; \ const char *_error_ptr = qb_strerror_r(err_num, _error_str, sizeof(_error_str)); \ instance->totemudp_log_printf ( \ level, instance->totemudp_subsys_id, \ __FUNCTION__, __FILE__, __LINE__, \ fmt ": %s (%d)", ##args, _error_ptr, err_num); \ } while(0) -static int authenticate_and_decrypt_sober ( - struct totemudp_instance *instance, - struct iovec *iov, - unsigned int iov_len) -{ - unsigned char keys[48]; - struct security_header *header = (struct security_header *)iov[0].iov_base; - prng_state keygen_prng_state; - prng_state stream_prng_state; - unsigned char *hmac_key = &keys[32]; - unsigned char *cipher_key = &keys[16]; - unsigned char *initial_vector = &keys[0]; - unsigned char digest_comparison[HMAC_HASH_SIZE]; - unsigned long len; - - /* - * Generate MAC, CIPHER, IV keys from private key - */ - memset (keys, 0, sizeof (keys)); - sober128_start (&keygen_prng_state); - sober128_add_entropy (instance->totemudp_private_key, - instance->totemudp_private_key_len, &keygen_prng_state); - sober128_add_entropy (header->salt, sizeof (header->salt), &keygen_prng_state); - - sober128_read (keys, sizeof (keys), &keygen_prng_state); - - /* - * Setup stream cipher - */ - sober128_start (&stream_prng_state); - sober128_add_entropy (cipher_key, 16, &stream_prng_state); - sober128_add_entropy (initial_vector, 16, &stream_prng_state); - - /* - * Authenticate contents of message - */ - hmac_init (&instance->totemudp_hmac_state, DIGEST_SHA1, hmac_key, 16); - - hmac_process (&instance->totemudp_hmac_state, - (unsigned char *)iov->iov_base + HMAC_HASH_SIZE, - iov->iov_len - HMAC_HASH_SIZE); - - len = hash_descriptor[DIGEST_SHA1]->hashsize; - assert (HMAC_HASH_SIZE >= len); - hmac_done (&instance->totemudp_hmac_state, digest_comparison, &len); - - if (memcmp (digest_comparison, header->hash_digest, len) != 0) { - return (-1); - } - - /* - * Decrypt the contents of the message with the cipher key - */ - sober128_read ((unsigned char*)iov->iov_base + - sizeof (struct security_header), - iov->iov_len - sizeof (struct security_header), - &stream_prng_state); - - return (0); -} - -static void init_sober_crypto( - struct totemudp_instance *instance) -{ - log_printf(instance->totemudp_log_level_notice, - "Initializing transmit/receive security: libtomcrypt SOBER128/SHA1HMAC (mode 0)."); - rng_make_prng (128, PRNG_SOBER, &instance->totemudp_prng_state, NULL); -} - -#ifdef HAVE_LIBNSS - static unsigned char *copy_from_iovec( const struct iovec *iov, unsigned int iov_len, size_t *buf_size) { int i; size_t bufptr; size_t buflen = 0; unsigned char *newbuf; for (i=0; i buf_size) { copylen = buf_size - bufptr; } memcpy(iov[i].iov_base, buf+bufptr, copylen); bufptr += copylen; if (iov[i].iov_len != copylen) { iov[i].iov_len = copylen; return; } } } static void init_nss_crypto( struct totemudp_instance *instance) { PK11SlotInfo* aes_slot = NULL; PK11SlotInfo* sha1_slot = NULL; SECItem key_item; SECStatus rv; log_printf(instance->totemudp_log_level_notice, - "Initializing transmit/receive security: NSS AES128CBC/SHA1HMAC (mode 1)."); + "Initializing transmit/receive security: NSS AES256CBC/SHA1HMAC (mode %u).", TOTEM_CRYPTO_AES256); rv = NSS_NoDB_Init("."); if (rv != SECSuccess) { log_printf(instance->totemudp_log_level_security, "NSS initialization failed (err %d)", PR_GetError()); goto out; } aes_slot = PK11_GetBestSlot(instance->totem_config->crypto_crypt_type, NULL); if (aes_slot == NULL) { log_printf(instance->totemudp_log_level_security, "Unable to find security slot (err %d)", PR_GetError()); goto out; } sha1_slot = PK11_GetBestSlot(CKM_SHA_1_HMAC, NULL); if (sha1_slot == NULL) { log_printf(instance->totemudp_log_level_security, "Unable to find security slot (err %d)", PR_GetError()); goto out; } /* * Make the private key into a SymKey that we can use */ key_item.type = siBuffer; key_item.data = instance->totem_config->private_key; - key_item.len = 32; /* Use 128 bits */ + key_item.len = 32; /* Use 256 bits */ instance->nss_sym_key = PK11_ImportSymKey(aes_slot, instance->totem_config->crypto_crypt_type, PK11_OriginUnwrap, CKA_ENCRYPT|CKA_DECRYPT, &key_item, NULL); if (instance->nss_sym_key == NULL) { log_printf(instance->totemudp_log_level_security, "Failure to import key into NSS (err %d)", PR_GetError()); goto out; } instance->nss_sym_key_sign = PK11_ImportSymKey(sha1_slot, CKM_SHA_1_HMAC, PK11_OriginUnwrap, CKA_SIGN, &key_item, NULL); if (instance->nss_sym_key_sign == NULL) { log_printf(instance->totemudp_log_level_security, "Failure to import key into NSS (err %d)", PR_GetError()); goto out; } out: return; } static int encrypt_and_sign_nss ( struct totemudp_instance *instance, unsigned char *buf, size_t *buf_len, const struct iovec *iovec, unsigned int iov_len) { PK11Context* enc_context = NULL; SECStatus rv1, rv2; int tmp1_outlen; unsigned int tmp2_outlen; unsigned char *inbuf; unsigned char *data; unsigned char *outdata; size_t datalen; SECItem no_params; SECItem iv_item; struct security_header *header; SECItem *nss_sec_param; unsigned char nss_iv_data[16]; SECStatus rv; no_params.type = siBuffer; no_params.data = 0; no_params.len = 0; tmp1_outlen = tmp2_outlen = 0; inbuf = copy_from_iovec(iovec, iov_len, &datalen); if (!inbuf) { log_printf(instance->totemudp_log_level_security, "malloc error copying buffer from iovec"); return -1; } data = inbuf + sizeof (struct security_header); datalen -= sizeof (struct security_header); outdata = buf + sizeof (struct security_header); header = (struct security_header *)buf; rv = PK11_GenerateRandom ( nss_iv_data, sizeof (nss_iv_data)); if (rv != SECSuccess) { log_printf(instance->totemudp_log_level_security, "Failure to generate a random number %d", PR_GetError()); } memcpy(header->salt, nss_iv_data, sizeof(nss_iv_data)); iv_item.type = siBuffer; iv_item.data = nss_iv_data; iv_item.len = sizeof (nss_iv_data); nss_sec_param = PK11_ParamFromIV ( instance->totem_config->crypto_crypt_type, &iv_item); if (nss_sec_param == NULL) { log_printf(instance->totemudp_log_level_security, "Failure to set up PKCS11 param (err %d)", PR_GetError()); free (inbuf); return (-1); } /* * Create cipher context for encryption */ enc_context = PK11_CreateContextBySymKey ( instance->totem_config->crypto_crypt_type, CKA_ENCRYPT, instance->nss_sym_key, nss_sec_param); if (!enc_context) { char err[1024]; PR_GetErrorText(err); err[PR_GetErrorTextLength()] = 0; log_printf(instance->totemudp_log_level_security, "PK11_CreateContext failed (encrypt) crypt_type=%d (err %d): %s", instance->totem_config->crypto_crypt_type, PR_GetError(), err); free(inbuf); return -1; } rv1 = PK11_CipherOp(enc_context, outdata, &tmp1_outlen, FRAME_SIZE_MAX - sizeof(struct security_header), data, datalen); rv2 = PK11_DigestFinal(enc_context, outdata + tmp1_outlen, &tmp2_outlen, FRAME_SIZE_MAX - tmp1_outlen); PK11_DestroyContext(enc_context, PR_TRUE); *buf_len = tmp1_outlen + tmp2_outlen; free(inbuf); // memcpy(&outdata[*buf_len], nss_iv_data, sizeof(nss_iv_data)); if (rv1 != SECSuccess || rv2 != SECSuccess) goto out; /* Now do the digest */ enc_context = PK11_CreateContextBySymKey(CKM_SHA_1_HMAC, CKA_SIGN, instance->nss_sym_key_sign, &no_params); if (!enc_context) { char err[1024]; PR_GetErrorText(err); err[PR_GetErrorTextLength()] = 0; log_printf(instance->totemudp_log_level_security, "encrypt: PK11_CreateContext failed (digest) err %d: %s", PR_GetError(), err); return -1; } PK11_DigestBegin(enc_context); rv1 = PK11_DigestOp(enc_context, outdata - 16, *buf_len + 16); rv2 = PK11_DigestFinal(enc_context, header->hash_digest, &tmp2_outlen, sizeof(header->hash_digest)); PK11_DestroyContext(enc_context, PR_TRUE); if (rv1 != SECSuccess || rv2 != SECSuccess) goto out; *buf_len = *buf_len + sizeof(struct security_header); SECITEM_FreeItem(nss_sec_param, PR_TRUE); return 0; out: return -1; } static int authenticate_and_decrypt_nss ( struct totemudp_instance *instance, struct iovec *iov, unsigned int iov_len) { PK11Context* enc_context = NULL; SECStatus rv1, rv2; int tmp1_outlen; unsigned int tmp2_outlen; unsigned char outbuf[FRAME_SIZE_MAX]; unsigned char digest[HMAC_HASH_SIZE]; unsigned char *outdata; int result_len; unsigned char *data; unsigned char *inbuf; size_t datalen; struct security_header *header = (struct security_header *)iov[0].iov_base; SECItem no_params; SECItem ivdata; no_params.type = siBuffer; no_params.data = 0; no_params.len = 0; tmp1_outlen = tmp2_outlen = 0; if (iov_len > 1) { inbuf = copy_from_iovec(iov, iov_len, &datalen); if (!inbuf) { log_printf(instance->totemudp_log_level_security, "malloc error copying buffer from iovec"); return -1; } } else { inbuf = (unsigned char *)iov[0].iov_base; datalen = iov[0].iov_len; } data = inbuf + sizeof (struct security_header) - 16; datalen = datalen - sizeof (struct security_header) + 16; outdata = outbuf + sizeof (struct security_header); /* Check the digest */ enc_context = PK11_CreateContextBySymKey ( CKM_SHA_1_HMAC, CKA_SIGN, instance->nss_sym_key_sign, &no_params); if (!enc_context) { char err[1024]; PR_GetErrorText(err); err[PR_GetErrorTextLength()] = 0; log_printf(instance->totemudp_log_level_security, "PK11_CreateContext failed (check digest) err %d: %s", PR_GetError(), err); free (inbuf); return -1; } PK11_DigestBegin(enc_context); rv1 = PK11_DigestOp(enc_context, data, datalen); rv2 = PK11_DigestFinal(enc_context, digest, &tmp2_outlen, sizeof(digest)); PK11_DestroyContext(enc_context, PR_TRUE); if (rv1 != SECSuccess || rv2 != SECSuccess) { log_printf(instance->totemudp_log_level_security, "Digest check failed"); return -1; } if (memcmp(digest, header->hash_digest, tmp2_outlen) != 0) { log_printf(instance->totemudp_log_level_error, "Digest does not match"); return -1; } /* * Get rid of salt */ data += 16; datalen -= 16; /* Create cipher context for decryption */ ivdata.type = siBuffer; ivdata.data = header->salt; ivdata.len = sizeof(header->salt); enc_context = PK11_CreateContextBySymKey( instance->totem_config->crypto_crypt_type, CKA_DECRYPT, instance->nss_sym_key, &ivdata); if (!enc_context) { log_printf(instance->totemudp_log_level_security, "PK11_CreateContext (decrypt) failed (err %d)", PR_GetError()); return -1; } rv1 = PK11_CipherOp(enc_context, outdata, &tmp1_outlen, sizeof(outbuf) - sizeof (struct security_header), data, datalen); if (rv1 != SECSuccess) { log_printf(instance->totemudp_log_level_security, "PK11_CipherOp (decrypt) failed (err %d)", PR_GetError()); } rv2 = PK11_DigestFinal(enc_context, outdata + tmp1_outlen, &tmp2_outlen, sizeof(outbuf) - tmp1_outlen); PK11_DestroyContext(enc_context, PR_TRUE); result_len = tmp1_outlen + tmp2_outlen + sizeof (struct security_header); /* Copy it back to the buffer */ copy_to_iovec(iov, iov_len, outbuf, result_len); if (iov_len > 1) free(inbuf); if (rv1 != SECSuccess || rv2 != SECSuccess) return -1; return 0; } -#endif - -static int encrypt_and_sign_sober ( - struct totemudp_instance *instance, - unsigned char *buf, - size_t *buf_len, - const struct iovec *iovec, - unsigned int iov_len) -{ - int i; - unsigned char *addr; - unsigned char keys[48]; - struct security_header *header; - unsigned char *hmac_key = &keys[32]; - unsigned char *cipher_key = &keys[16]; - unsigned char *initial_vector = &keys[0]; - unsigned long len; - size_t outlen = 0; - hmac_state hmac_st; - prng_state keygen_prng_state; - prng_state stream_prng_state; - prng_state *prng_state_in = &instance->totemudp_prng_state; - - header = (struct security_header *)buf; - addr = buf + sizeof (struct security_header); - - memset (keys, 0, sizeof (keys)); - memset (header->salt, 0, sizeof (header->salt)); - - /* - * Generate MAC, CIPHER, IV keys from private key - */ - sober128_read (header->salt, sizeof (header->salt), prng_state_in); - sober128_start (&keygen_prng_state); - sober128_add_entropy (instance->totemudp_private_key, - instance->totemudp_private_key_len, - &keygen_prng_state); - sober128_add_entropy (header->salt, sizeof (header->salt), - &keygen_prng_state); - - sober128_read (keys, sizeof (keys), &keygen_prng_state); - - /* - * Setup stream cipher - */ - sober128_start (&stream_prng_state); - sober128_add_entropy (cipher_key, 16, &stream_prng_state); - sober128_add_entropy (initial_vector, 16, &stream_prng_state); - - outlen = sizeof (struct security_header); - /* - * Copy remainder of message, then encrypt it - */ - for (i = 1; i < iov_len; i++) { - memcpy (addr, iovec[i].iov_base, iovec[i].iov_len); - addr += iovec[i].iov_len; - outlen += iovec[i].iov_len; - } - - /* - * Encrypt message by XORing stream cipher data - */ - sober128_read (buf + sizeof (struct security_header), - outlen - sizeof (struct security_header), - &stream_prng_state); - - memset (&hmac_st, 0, sizeof (hmac_st)); - - /* - * Sign the contents of the message with the hmac key and store signature in message - */ - hmac_init (&hmac_st, DIGEST_SHA1, hmac_key, 16); - - hmac_process (&hmac_st, - buf + HMAC_HASH_SIZE, - outlen - HMAC_HASH_SIZE); - - len = hash_descriptor[DIGEST_SHA1]->hashsize; - - hmac_done (&hmac_st, header->hash_digest, &len); - - *buf_len = outlen; - - return 0; -} static int encrypt_and_sign_worker ( struct totemudp_instance *instance, unsigned char *buf, size_t *buf_len, const struct iovec *iovec, unsigned int iov_len) { - if (instance->totem_config->crypto_type == TOTEM_CRYPTO_SOBER) { - return encrypt_and_sign_sober(instance, buf, buf_len, iovec, iov_len); - } -#ifdef HAVE_LIBNSS - if (instance->totem_config->crypto_type == TOTEM_CRYPTO_NSS) { + + if (instance->totem_config->crypto_type == TOTEM_CRYPTO_AES256) { return encrypt_and_sign_nss(instance, buf, buf_len, iovec, iov_len); } -#endif + return -1; } static int authenticate_and_decrypt ( struct totemudp_instance *instance, struct iovec *iov, unsigned int iov_len) { unsigned char type; unsigned char *endbuf = (unsigned char *)iov[iov_len-1].iov_base; int res = -1; /* * Get the encryption type and remove it from the buffer */ type = endbuf[iov[iov_len-1].iov_len-1]; iov[iov_len-1].iov_len -= 1; - if (type == TOTEM_CRYPTO_SOBER) { - res = authenticate_and_decrypt_sober(instance, iov, iov_len); - } - -#ifdef HAVE_LIBNSS - if (type == TOTEM_CRYPTO_NSS) { + if (type == TOTEM_CRYPTO_AES256) { res = authenticate_and_decrypt_nss(instance, iov, iov_len); } -#endif - - /* - * If it failed, then try decrypting the whole packet - */ - if (res == -1) { - iov[iov_len-1].iov_len += 1; - res = authenticate_and_decrypt_sober(instance, iov, iov_len); - } return res; } static void init_crypto( struct totemudp_instance *instance) { - init_sober_crypto(instance); -#ifdef HAVE_LIBNSS + init_nss_crypto(instance); -#endif } int totemudp_crypto_set ( void *udp_context, unsigned int type) { struct totemudp_instance *instance = (struct totemudp_instance *)udp_context; int res = 0; /* * Validate crypto algorithm */ switch (type) { - case TOTEM_CRYPTO_SOBER: + case TOTEM_CRYPTO_AES256: log_printf(instance->totemudp_log_level_security, - "Transmit security set to: libtomcrypt SOBER128/SHA1HMAC (mode 0)"); - break; - case TOTEM_CRYPTO_NSS: - log_printf(instance->totemudp_log_level_security, - "Transmit security set to: NSS AES128CBC/SHA1HMAC (mode 1)"); + "Transmit security set to: NSS AES256CBC/SHA1HMAC (mode %u)", type); break; default: res = -1; break; } return (res); } static inline void ucast_sendmsg ( struct totemudp_instance *instance, struct totem_ip_address *system_to, const void *msg, unsigned int msg_len) { struct msghdr msg_ucast; int res = 0; size_t buf_len; unsigned char sheader[sizeof (struct security_header)]; unsigned char encrypt_data[FRAME_SIZE_MAX]; struct iovec iovec_encrypt[2]; const struct iovec *iovec_sendmsg; struct sockaddr_storage sockaddr; struct iovec iovec; unsigned int iov_len; int addrlen; if (instance->totem_config->secauth == 1) { iovec_encrypt[0].iov_base = (void *)sheader; iovec_encrypt[0].iov_len = sizeof (struct security_header); iovec_encrypt[1].iov_base = (void *)msg; iovec_encrypt[1].iov_len = msg_len; - /* * Encrypt and digest the message */ encrypt_and_sign_worker ( instance, encrypt_data, &buf_len, iovec_encrypt, 2); encrypt_data[buf_len++] = instance->totem_config->crypto_type; iovec_encrypt[0].iov_base = (void *)encrypt_data; iovec_encrypt[0].iov_len = buf_len; iovec_sendmsg = &iovec_encrypt[0]; iov_len = 1; } else { iovec.iov_base = (void *)msg; iovec.iov_len = msg_len; iovec_sendmsg = &iovec; iov_len = 1; } /* * Build unicast message */ totemip_totemip_to_sockaddr_convert(system_to, instance->totem_interface->ip_port, &sockaddr, &addrlen); msg_ucast.msg_name = &sockaddr; msg_ucast.msg_namelen = addrlen; msg_ucast.msg_iov = (void *) iovec_sendmsg; msg_ucast.msg_iovlen = iov_len; #if !defined(COROSYNC_SOLARIS) msg_ucast.msg_control = 0; msg_ucast.msg_controllen = 0; msg_ucast.msg_flags = 0; #else msg_ucast.msg_accrights = NULL; msg_ucast.msg_accrightslen = 0; #endif /* * Transmit unicast message * An error here is recovered by totemsrp */ res = sendmsg (instance->totemudp_sockets.mcast_send, &msg_ucast, MSG_NOSIGNAL); if (res < 0) { LOGSYS_PERROR (errno, instance->totemudp_log_level_debug, "sendmsg(ucast) failed (non-critical)"); } } static inline void mcast_sendmsg ( struct totemudp_instance *instance, const void *msg, unsigned int msg_len) { struct msghdr msg_mcast; int res = 0; size_t buf_len; unsigned char sheader[sizeof (struct security_header)]; unsigned char encrypt_data[FRAME_SIZE_MAX]; struct iovec iovec_encrypt[2]; struct iovec iovec; const struct iovec *iovec_sendmsg; struct sockaddr_storage sockaddr; unsigned int iov_len; int addrlen; if (instance->totem_config->secauth == 1) { iovec_encrypt[0].iov_base = (void *)sheader; iovec_encrypt[0].iov_len = sizeof (struct security_header); iovec_encrypt[1].iov_base = (void *)msg; iovec_encrypt[1].iov_len = msg_len; /* * Encrypt and digest the message */ encrypt_and_sign_worker ( instance, encrypt_data, &buf_len, iovec_encrypt, 2); encrypt_data[buf_len++] = instance->totem_config->crypto_type; iovec_encrypt[0].iov_base = (void *)encrypt_data; iovec_encrypt[0].iov_len = buf_len; iovec_sendmsg = &iovec_encrypt[0]; iov_len = 1; } else { iovec.iov_base = (void *)msg; iovec.iov_len = msg_len; iovec_sendmsg = &iovec; iov_len = 1; } /* * Build multicast message */ totemip_totemip_to_sockaddr_convert(&instance->mcast_address, instance->totem_interface->ip_port, &sockaddr, &addrlen); msg_mcast.msg_name = &sockaddr; msg_mcast.msg_namelen = addrlen; msg_mcast.msg_iov = (void *) iovec_sendmsg; msg_mcast.msg_iovlen = iov_len; #if !defined(COROSYNC_SOLARIS) msg_mcast.msg_control = 0; msg_mcast.msg_controllen = 0; msg_mcast.msg_flags = 0; #else msg_mcast.msg_accrights = NULL; msg_mcast.msg_accrightslen = 0; #endif /* * Transmit multicast message * An error here is recovered by totemsrp */ res = sendmsg (instance->totemudp_sockets.mcast_send, &msg_mcast, MSG_NOSIGNAL); if (res < 0) { LOGSYS_PERROR (errno, instance->totemudp_log_level_debug, "sendmsg(mcast) failed (non-critical)"); } } int totemudp_finalize ( void *udp_context) { struct totemudp_instance *instance = (struct totemudp_instance *)udp_context; int res = 0; if (instance->totemudp_sockets.mcast_recv > 0) { close (instance->totemudp_sockets.mcast_recv); qb_loop_poll_del (instance->totemudp_poll_handle, instance->totemudp_sockets.mcast_recv); } if (instance->totemudp_sockets.mcast_send > 0) { close (instance->totemudp_sockets.mcast_send); } if (instance->totemudp_sockets.token > 0) { close (instance->totemudp_sockets.token); qb_loop_poll_del (instance->totemudp_poll_handle, instance->totemudp_sockets.token); } return (res); } /* * Only designed to work with a message with one iov */ static int net_deliver_fn ( int fd, int revents, void *data) { struct totemudp_instance *instance = (struct totemudp_instance *)data; struct msghdr msg_recv; struct iovec *iovec; struct sockaddr_storage system_from; int bytes_received; int res = 0; unsigned char *msg_offset; unsigned int size_delv; char *message_type; if (instance->flushing == 1) { iovec = &instance->totemudp_iov_recv_flush; } else { iovec = &instance->totemudp_iov_recv; } /* * Receive datagram */ msg_recv.msg_name = &system_from; msg_recv.msg_namelen = sizeof (struct sockaddr_storage); msg_recv.msg_iov = iovec; msg_recv.msg_iovlen = 1; #if !defined(COROSYNC_SOLARIS) msg_recv.msg_control = 0; msg_recv.msg_controllen = 0; msg_recv.msg_flags = 0; #else msg_recv.msg_accrights = NULL; msg_recv.msg_accrightslen = 0; #endif bytes_received = recvmsg (fd, &msg_recv, MSG_NOSIGNAL | MSG_DONTWAIT); if (bytes_received == -1) { return (0); } else { instance->stats_recv += bytes_received; } if ((instance->totem_config->secauth == 1) && (bytes_received < sizeof (struct security_header))) { log_printf (instance->totemudp_log_level_security, "Received message is too short... ignoring %d.", bytes_received); return (0); } iovec->iov_len = bytes_received; if (instance->totem_config->secauth == 1) { /* * Authenticate and if authenticated, decrypt datagram */ res = authenticate_and_decrypt (instance, iovec, 1); if (res == -1) { log_printf (instance->totemudp_log_level_security, "Received message has invalid digest... ignoring."); log_printf (instance->totemudp_log_level_security, "Invalid packet data"); iovec->iov_len = FRAME_SIZE_MAX; return 0; } msg_offset = (unsigned char *)iovec->iov_base + sizeof (struct security_header); size_delv = bytes_received - sizeof (struct security_header); } else { msg_offset = (void *)iovec->iov_base; size_delv = bytes_received; } /* * Drop all non-mcast messages (more specifically join * messages should be dropped) */ message_type = (char *)msg_offset; if (instance->flushing == 1 && *message_type != MESSAGE_TYPE_MCAST) { iovec->iov_len = FRAME_SIZE_MAX; return (0); } /* * Handle incoming message */ instance->totemudp_deliver_fn ( instance->context, msg_offset, size_delv); iovec->iov_len = FRAME_SIZE_MAX; return (0); } static int netif_determine ( struct totemudp_instance *instance, struct totem_ip_address *bindnet, struct totem_ip_address *bound_to, int *interface_up, int *interface_num) { int res; res = totemip_iface_check (bindnet, bound_to, interface_up, interface_num, instance->totem_config->clear_node_high_bit); return (res); } /* * If the interface is up, the sockets for totem are built. If the interface is down * this function is requeued in the timer list to retry building the sockets later. */ static void timer_function_netif_check_timeout ( void *data) { struct totemudp_instance *instance = (struct totemudp_instance *)data; int interface_up; int interface_num; struct totem_ip_address *bind_address; /* * Build sockets for every interface */ netif_determine (instance, &instance->totem_interface->bindnet, &instance->totem_interface->boundto, &interface_up, &interface_num); /* * If the network interface isn't back up and we are already * in loopback mode, add timer to check again and return */ if ((instance->netif_bind_state == BIND_STATE_LOOPBACK && interface_up == 0) || (instance->my_memb_entries == 1 && instance->netif_bind_state == BIND_STATE_REGULAR && interface_up == 1)) { qb_loop_timer_add (instance->totemudp_poll_handle, QB_LOOP_MED, instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC, (void *)instance, timer_function_netif_check_timeout, &instance->timer_netif_check_timeout); /* * Add a timer to check for a downed regular interface */ return; } if (instance->totemudp_sockets.mcast_recv > 0) { close (instance->totemudp_sockets.mcast_recv); qb_loop_poll_del (instance->totemudp_poll_handle, instance->totemudp_sockets.mcast_recv); } if (instance->totemudp_sockets.mcast_send > 0) { close (instance->totemudp_sockets.mcast_send); } if (instance->totemudp_sockets.token > 0) { close (instance->totemudp_sockets.token); qb_loop_poll_del (instance->totemudp_poll_handle, instance->totemudp_sockets.token); } if (interface_up == 0) { /* * Interface is not up */ instance->netif_bind_state = BIND_STATE_LOOPBACK; bind_address = &localhost; /* * Add a timer to retry building interfaces and request memb_gather_enter */ qb_loop_timer_add (instance->totemudp_poll_handle, QB_LOOP_MED, instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC, (void *)instance, timer_function_netif_check_timeout, &instance->timer_netif_check_timeout); } else { /* * Interface is up */ instance->netif_bind_state = BIND_STATE_REGULAR; bind_address = &instance->totem_interface->bindnet; } /* * Create and bind the multicast and unicast sockets */ (void)totemudp_build_sockets (instance, &instance->mcast_address, bind_address, &instance->totemudp_sockets, &instance->totem_interface->boundto); qb_loop_poll_add ( instance->totemudp_poll_handle, QB_LOOP_MED, instance->totemudp_sockets.mcast_recv, POLLIN, instance, net_deliver_fn); qb_loop_poll_add ( instance->totemudp_poll_handle, QB_LOOP_MED, instance->totemudp_sockets.token, POLLIN, instance, net_deliver_fn); totemip_copy (&instance->my_id, &instance->totem_interface->boundto); /* * This reports changes in the interface to the user and totemsrp */ if (instance->netif_bind_state == BIND_STATE_REGULAR) { if (instance->netif_state_report & NETIF_STATE_REPORT_UP) { log_printf (instance->totemudp_log_level_notice, "The network interface [%s] is now up.", totemip_print (&instance->totem_interface->boundto)); instance->netif_state_report = NETIF_STATE_REPORT_DOWN; instance->totemudp_iface_change_fn (instance->context, &instance->my_id); } /* * Add a timer to check for interface going down in single membership */ if (instance->my_memb_entries == 1) { qb_loop_timer_add (instance->totemudp_poll_handle, QB_LOOP_MED, instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC, (void *)instance, timer_function_netif_check_timeout, &instance->timer_netif_check_timeout); } } else { if (instance->netif_state_report & NETIF_STATE_REPORT_DOWN) { log_printf (instance->totemudp_log_level_notice, "The network interface is down."); instance->totemudp_iface_change_fn (instance->context, &instance->my_id); } instance->netif_state_report = NETIF_STATE_REPORT_UP; } } /* Set the socket priority to INTERACTIVE to ensure that our messages don't get queued behind anything else */ static void totemudp_traffic_control_set(struct totemudp_instance *instance, int sock) { #ifdef SO_PRIORITY int prio = 6; /* TC_PRIO_INTERACTIVE */ if (setsockopt(sock, SOL_SOCKET, SO_PRIORITY, &prio, sizeof(int))) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "Could not set traffic priority"); } #endif } static int totemudp_build_sockets_ip ( struct totemudp_instance *instance, struct totem_ip_address *mcast_address, struct totem_ip_address *bindnet_address, struct totemudp_socket *sockets, struct totem_ip_address *bound_to, int interface_num) { struct sockaddr_storage sockaddr; struct ipv6_mreq mreq6; struct ip_mreq mreq; struct sockaddr_storage mcast_ss, boundto_ss; struct sockaddr_in6 *mcast_sin6 = (struct sockaddr_in6 *)&mcast_ss; struct sockaddr_in *mcast_sin = (struct sockaddr_in *)&mcast_ss; struct sockaddr_in *boundto_sin = (struct sockaddr_in *)&boundto_ss; unsigned int sendbuf_size; unsigned int recvbuf_size; unsigned int optlen = sizeof (sendbuf_size); int addrlen; int res; int flag; /* * Create multicast recv socket */ sockets->mcast_recv = socket (bindnet_address->family, SOCK_DGRAM, 0); if (sockets->mcast_recv == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "socket() failed"); return (-1); } totemip_nosigpipe (sockets->mcast_recv); res = fcntl (sockets->mcast_recv, F_SETFL, O_NONBLOCK); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "Could not set non-blocking operation on multicast socket"); return (-1); } /* * Force reuse */ flag = 1; if ( setsockopt(sockets->mcast_recv, SOL_SOCKET, SO_REUSEADDR, (char *)&flag, sizeof (flag)) < 0) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "setsockopt(SO_REUSEADDR) failed"); return (-1); } /* * Bind to multicast socket used for multicast receives */ totemip_totemip_to_sockaddr_convert(mcast_address, instance->totem_interface->ip_port, &sockaddr, &addrlen); res = bind (sockets->mcast_recv, (struct sockaddr *)&sockaddr, addrlen); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "Unable to bind the socket to receive multicast packets"); return (-1); } /* * Setup mcast send socket */ sockets->mcast_send = socket (bindnet_address->family, SOCK_DGRAM, 0); if (sockets->mcast_send == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "socket() failed"); return (-1); } totemip_nosigpipe (sockets->mcast_send); res = fcntl (sockets->mcast_send, F_SETFL, O_NONBLOCK); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "Could not set non-blocking operation on multicast socket"); return (-1); } /* * Force reuse */ flag = 1; if ( setsockopt(sockets->mcast_send, SOL_SOCKET, SO_REUSEADDR, (char *)&flag, sizeof (flag)) < 0) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "setsockopt(SO_REUSEADDR) failed"); return (-1); } totemip_totemip_to_sockaddr_convert(bound_to, instance->totem_interface->ip_port - 1, &sockaddr, &addrlen); res = bind (sockets->mcast_send, (struct sockaddr *)&sockaddr, addrlen); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "Unable to bind the socket to send multicast packets"); return (-1); } /* * Setup unicast socket */ sockets->token = socket (bindnet_address->family, SOCK_DGRAM, 0); if (sockets->token == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "socket() failed"); return (-1); } totemip_nosigpipe (sockets->token); res = fcntl (sockets->token, F_SETFL, O_NONBLOCK); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "Could not set non-blocking operation on token socket"); return (-1); } /* * Force reuse */ flag = 1; if ( setsockopt(sockets->token, SOL_SOCKET, SO_REUSEADDR, (char *)&flag, sizeof (flag)) < 0) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "setsockopt(SO_REUSEADDR) failed"); return (-1); } /* * Bind to unicast socket used for token send/receives * This has the side effect of binding to the correct interface */ totemip_totemip_to_sockaddr_convert(bound_to, instance->totem_interface->ip_port, &sockaddr, &addrlen); res = bind (sockets->token, (struct sockaddr *)&sockaddr, addrlen); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "Unable to bind UDP unicast socket"); return (-1); } recvbuf_size = MCAST_SOCKET_BUFFER_SIZE; sendbuf_size = MCAST_SOCKET_BUFFER_SIZE; /* * Set buffer sizes to avoid overruns */ res = setsockopt (sockets->mcast_recv, SOL_SOCKET, SO_RCVBUF, &recvbuf_size, optlen); res = setsockopt (sockets->mcast_send, SOL_SOCKET, SO_SNDBUF, &sendbuf_size, optlen); res = getsockopt (sockets->mcast_recv, SOL_SOCKET, SO_RCVBUF, &recvbuf_size, &optlen); if (res == 0) { log_printf (instance->totemudp_log_level_debug, "Receive multicast socket recv buffer size (%d bytes).", recvbuf_size); } res = getsockopt (sockets->mcast_send, SOL_SOCKET, SO_SNDBUF, &sendbuf_size, &optlen); if (res == 0) { log_printf (instance->totemudp_log_level_debug, "Transmit multicast socket send buffer size (%d bytes).", sendbuf_size); } /* * Join group membership on socket */ totemip_totemip_to_sockaddr_convert(mcast_address, instance->totem_interface->ip_port, &mcast_ss, &addrlen); totemip_totemip_to_sockaddr_convert(bound_to, instance->totem_interface->ip_port, &boundto_ss, &addrlen); if (instance->totem_config->broadcast_use == 1) { unsigned int broadcast = 1; if ((setsockopt(sockets->mcast_recv, SOL_SOCKET, SO_BROADCAST, &broadcast, sizeof (broadcast))) == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "setting broadcast option failed"); return (-1); } if ((setsockopt(sockets->mcast_send, SOL_SOCKET, SO_BROADCAST, &broadcast, sizeof (broadcast))) == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "setting broadcast option failed"); return (-1); } } else { switch (bindnet_address->family) { case AF_INET: memset(&mreq, 0, sizeof(mreq)); mreq.imr_multiaddr.s_addr = mcast_sin->sin_addr.s_addr; mreq.imr_interface.s_addr = boundto_sin->sin_addr.s_addr; res = setsockopt (sockets->mcast_recv, IPPROTO_IP, IP_ADD_MEMBERSHIP, &mreq, sizeof (mreq)); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "join ipv4 multicast group failed"); return (-1); } break; case AF_INET6: memset(&mreq6, 0, sizeof(mreq6)); memcpy(&mreq6.ipv6mr_multiaddr, &mcast_sin6->sin6_addr, sizeof(struct in6_addr)); mreq6.ipv6mr_interface = interface_num; res = setsockopt (sockets->mcast_recv, IPPROTO_IPV6, IPV6_JOIN_GROUP, &mreq6, sizeof (mreq6)); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "join ipv6 multicast group failed"); return (-1); } break; } } /* * Turn on multicast loopback */ flag = 1; switch ( bindnet_address->family ) { case AF_INET: res = setsockopt (sockets->mcast_send, IPPROTO_IP, IP_MULTICAST_LOOP, &flag, sizeof (flag)); break; case AF_INET6: res = setsockopt (sockets->mcast_send, IPPROTO_IPV6, IPV6_MULTICAST_LOOP, &flag, sizeof (flag)); } if (res == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "Unable to turn on multicast loopback"); return (-1); } /* * Set multicast packets TTL */ flag = instance->totem_interface->ttl; if (bindnet_address->family == AF_INET6) { res = setsockopt (sockets->mcast_send, IPPROTO_IPV6, IPV6_MULTICAST_HOPS, &flag, sizeof (flag)); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "set mcast v6 TTL failed"); return (-1); } } else { res = setsockopt(sockets->mcast_send, IPPROTO_IP, IP_MULTICAST_TTL, &flag, sizeof(flag)); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "set mcast v4 TTL failed"); return (-1); } } /* * Bind to a specific interface for multicast send and receive */ switch ( bindnet_address->family ) { case AF_INET: if (setsockopt (sockets->mcast_send, IPPROTO_IP, IP_MULTICAST_IF, &boundto_sin->sin_addr, sizeof (boundto_sin->sin_addr)) < 0) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "cannot select interface for multicast packets (send)"); return (-1); } if (setsockopt (sockets->mcast_recv, IPPROTO_IP, IP_MULTICAST_IF, &boundto_sin->sin_addr, sizeof (boundto_sin->sin_addr)) < 0) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "cannot select interface for multicast packets (recv)"); return (-1); } break; case AF_INET6: if (setsockopt (sockets->mcast_send, IPPROTO_IPV6, IPV6_MULTICAST_IF, &interface_num, sizeof (interface_num)) < 0) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "cannot select interface for multicast packets (send v6)"); return (-1); } if (setsockopt (sockets->mcast_recv, IPPROTO_IPV6, IPV6_MULTICAST_IF, &interface_num, sizeof (interface_num)) < 0) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "cannot select interface for multicast packets (recv v6)"); return (-1); } break; } return 0; } static int totemudp_build_sockets ( struct totemudp_instance *instance, struct totem_ip_address *mcast_address, struct totem_ip_address *bindnet_address, struct totemudp_socket *sockets, struct totem_ip_address *bound_to) { int interface_num; int interface_up; int res; /* * Determine the ip address bound to and the interface name */ res = netif_determine (instance, bindnet_address, bound_to, &interface_up, &interface_num); if (res == -1) { return (-1); } totemip_copy(&instance->my_id, bound_to); res = totemudp_build_sockets_ip (instance, mcast_address, bindnet_address, sockets, bound_to, interface_num); /* We only send out of the token socket */ totemudp_traffic_control_set(instance, sockets->token); return res; } /* * Totem Network interface - also does encryption/decryption * depends on poll abstraction, POSIX, IPV4 */ /* * Create an instance */ int totemudp_initialize ( qb_loop_t *poll_handle, void **udp_context, struct totem_config *totem_config, int interface_no, void *context, void (*deliver_fn) ( void *context, const void *msg, unsigned int msg_len), void (*iface_change_fn) ( void *context, const struct totem_ip_address *iface_address), void (*target_set_completed) ( void *context)) { struct totemudp_instance *instance; instance = malloc (sizeof (struct totemudp_instance)); if (instance == NULL) { return (-1); } totemudp_instance_initialize (instance); instance->totem_config = totem_config; /* * Configure logging */ instance->totemudp_log_level_security = 1; //totem_config->totem_logging_configuration.log_level_security; instance->totemudp_log_level_error = totem_config->totem_logging_configuration.log_level_error; instance->totemudp_log_level_warning = totem_config->totem_logging_configuration.log_level_warning; instance->totemudp_log_level_notice = totem_config->totem_logging_configuration.log_level_notice; instance->totemudp_log_level_debug = totem_config->totem_logging_configuration.log_level_debug; instance->totemudp_subsys_id = totem_config->totem_logging_configuration.log_subsys_id; instance->totemudp_log_printf = totem_config->totem_logging_configuration.log_printf; /* * Initialize random number generator for later use to generate salt */ memcpy (instance->totemudp_private_key, totem_config->private_key, totem_config->private_key_len); instance->totemudp_private_key_len = totem_config->private_key_len; init_crypto(instance); /* * Initialize local variables for totemudp */ instance->totem_interface = &totem_config->interfaces[interface_no]; totemip_copy (&instance->mcast_address, &instance->totem_interface->mcast_addr); memset (instance->iov_buffer, 0, FRAME_SIZE_MAX); instance->totemudp_poll_handle = poll_handle; instance->totem_interface->bindnet.nodeid = instance->totem_config->node_id; instance->context = context; instance->totemudp_deliver_fn = deliver_fn; instance->totemudp_iface_change_fn = iface_change_fn; instance->totemudp_target_set_completed = target_set_completed; totemip_localhost (instance->mcast_address.family, &localhost); localhost.nodeid = instance->totem_config->node_id; /* * RRP layer isn't ready to receive message because it hasn't * initialized yet. Add short timer to check the interfaces. */ qb_loop_timer_add (instance->totemudp_poll_handle, QB_LOOP_MED, 100*QB_TIME_NS_IN_MSEC, (void *)instance, timer_function_netif_check_timeout, &instance->timer_netif_check_timeout); *udp_context = instance; return (0); } void *totemudp_buffer_alloc (void) { return malloc (FRAME_SIZE_MAX); } void totemudp_buffer_release (void *ptr) { return free (ptr); } int totemudp_processor_count_set ( void *udp_context, int processor_count) { struct totemudp_instance *instance = (struct totemudp_instance *)udp_context; int res = 0; instance->my_memb_entries = processor_count; qb_loop_timer_del (instance->totemudp_poll_handle, instance->timer_netif_check_timeout); if (processor_count == 1) { qb_loop_timer_add (instance->totemudp_poll_handle, QB_LOOP_MED, instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC, (void *)instance, timer_function_netif_check_timeout, &instance->timer_netif_check_timeout); } return (res); } int totemudp_recv_flush (void *udp_context) { struct totemudp_instance *instance = (struct totemudp_instance *)udp_context; struct pollfd ufd; int nfds; int res = 0; instance->flushing = 1; do { ufd.fd = instance->totemudp_sockets.mcast_recv; ufd.events = POLLIN; nfds = poll (&ufd, 1, 0); if (nfds == 1 && ufd.revents & POLLIN) { net_deliver_fn (instance->totemudp_sockets.mcast_recv, ufd.revents, instance); } } while (nfds == 1); instance->flushing = 0; return (res); } int totemudp_send_flush (void *udp_context) { return 0; } int totemudp_token_send ( void *udp_context, const void *msg, unsigned int msg_len) { struct totemudp_instance *instance = (struct totemudp_instance *)udp_context; int res = 0; ucast_sendmsg (instance, &instance->token_target, msg, msg_len); return (res); } int totemudp_mcast_flush_send ( void *udp_context, const void *msg, unsigned int msg_len) { struct totemudp_instance *instance = (struct totemudp_instance *)udp_context; int res = 0; mcast_sendmsg (instance, msg, msg_len); return (res); } int totemudp_mcast_noflush_send ( void *udp_context, const void *msg, unsigned int msg_len) { struct totemudp_instance *instance = (struct totemudp_instance *)udp_context; int res = 0; mcast_sendmsg (instance, msg, msg_len); return (res); } extern int totemudp_iface_check (void *udp_context) { struct totemudp_instance *instance = (struct totemudp_instance *)udp_context; int res = 0; timer_function_netif_check_timeout (instance); return (res); } extern void totemudp_net_mtu_adjust (void *udp_context, struct totem_config *totem_config) { #define UDPIP_HEADER_SIZE (20 + 8) /* 20 bytes for ip 8 bytes for udp */ if (totem_config->secauth == 1) { totem_config->net_mtu -= sizeof (struct security_header) + UDPIP_HEADER_SIZE; } else { totem_config->net_mtu -= UDPIP_HEADER_SIZE; } } const char *totemudp_iface_print (void *udp_context) { struct totemudp_instance *instance = (struct totemudp_instance *)udp_context; const char *ret_char; ret_char = totemip_print (&instance->my_id); return (ret_char); } int totemudp_iface_get ( void *udp_context, struct totem_ip_address *addr) { struct totemudp_instance *instance = (struct totemudp_instance *)udp_context; int res = 0; memcpy (addr, &instance->my_id, sizeof (struct totem_ip_address)); return (res); } int totemudp_token_target_set ( void *udp_context, const struct totem_ip_address *token_target) { struct totemudp_instance *instance = (struct totemudp_instance *)udp_context; int res = 0; memcpy (&instance->token_target, token_target, sizeof (struct totem_ip_address)); instance->totemudp_target_set_completed (instance->context); return (res); } extern int totemudp_recv_mcast_empty ( void *udp_context) { struct totemudp_instance *instance = (struct totemudp_instance *)udp_context; unsigned int res; struct sockaddr_storage system_from; struct msghdr msg_recv; struct pollfd ufd; int nfds; int msg_processed = 0; /* * Receive datagram */ msg_recv.msg_name = &system_from; msg_recv.msg_namelen = sizeof (struct sockaddr_storage); msg_recv.msg_iov = &instance->totemudp_iov_recv_flush; msg_recv.msg_iovlen = 1; #if !defined(COROSYNC_SOLARIS) msg_recv.msg_control = 0; msg_recv.msg_controllen = 0; msg_recv.msg_flags = 0; #else msg_recv.msg_accrights = NULL; msg_recv.msg_accrightslen = 0; #endif do { ufd.fd = instance->totemudp_sockets.mcast_recv; ufd.events = POLLIN; nfds = poll (&ufd, 1, 0); if (nfds == 1 && ufd.revents & POLLIN) { res = recvmsg (instance->totemudp_sockets.mcast_recv, &msg_recv, MSG_NOSIGNAL | MSG_DONTWAIT); if (res != -1) { msg_processed = 1; } else { msg_processed = -1; } } } while (nfds == 1); return (msg_processed); } diff --git a/exec/totemudpu.c b/exec/totemudpu.c index 250921e4..36a50238 100644 --- a/exec/totemudpu.c +++ b/exec/totemudpu.c @@ -1,1732 +1,1544 @@ /* * Copyright (c) 2005 MontaVista Software, Inc. * Copyright (c) 2006-2012 Red Hat, Inc. * * All rights reserved. * * Author: Steven Dake (sdake@redhat.com) * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define LOGSYS_UTILS_ONLY 1 #include #include "totemudpu.h" -#include "crypto.h" #include "util.h" -#ifdef HAVE_LIBNSS #include #include #include #include -#endif #ifndef MSG_NOSIGNAL #define MSG_NOSIGNAL 0 #endif #define MCAST_SOCKET_BUFFER_SIZE (TRANSMITS_ALLOWED * FRAME_SIZE_MAX) #define NETIF_STATE_REPORT_UP 1 #define NETIF_STATE_REPORT_DOWN 2 #define BIND_STATE_UNBOUND 0 #define BIND_STATE_REGULAR 1 #define BIND_STATE_LOOPBACK 2 #define HMAC_HASH_SIZE 20 struct security_header { unsigned char hash_digest[HMAC_HASH_SIZE]; /* The hash *MUST* be first in the data structure */ unsigned char salt[16]; /* random number */ char msg[0]; } __attribute__((packed)); struct totemudpu_member { struct list_head list; struct totem_ip_address member; int fd; }; struct totemudpu_instance { - hmac_state totemudpu_hmac_state; - - prng_state totemudpu_prng_state; - -#ifdef HAVE_LIBNSS PK11SymKey *nss_sym_key; PK11SymKey *nss_sym_key_sign; -#endif unsigned char totemudpu_private_key[1024]; unsigned int totemudpu_private_key_len; qb_loop_t *totemudpu_poll_handle; struct totem_interface *totem_interface; int netif_state_report; int netif_bind_state; void *context; void (*totemudpu_deliver_fn) ( void *context, const void *msg, unsigned int msg_len); void (*totemudpu_iface_change_fn) ( void *context, const struct totem_ip_address *iface_address); void (*totemudpu_target_set_completed) (void *context); /* * Function and data used to log messages */ int totemudpu_log_level_security; int totemudpu_log_level_error; int totemudpu_log_level_warning; int totemudpu_log_level_notice; int totemudpu_log_level_debug; int totemudpu_subsys_id; void (*totemudpu_log_printf) ( int level, int subsys, const char *function, const char *file, int line, const char *format, ...)__attribute__((format(printf, 6, 7))); void *udpu_context; char iov_buffer[FRAME_SIZE_MAX]; struct iovec totemudpu_iov_recv; struct list_head member_list; int stats_sent; int stats_recv; int stats_delv; int stats_remcasts; int stats_orf_token; struct timeval stats_tv_start; struct totem_ip_address my_id; int firstrun; qb_loop_timer_handle timer_netif_check_timeout; unsigned int my_memb_entries; struct totem_config *totem_config; struct totem_ip_address token_target; int token_socket; }; struct work_item { const void *msg; unsigned int msg_len; struct totemudpu_instance *instance; }; static int totemudpu_build_sockets ( struct totemudpu_instance *instance, struct totem_ip_address *bindnet_address, struct totem_ip_address *bound_to); static struct totem_ip_address localhost; static void totemudpu_instance_initialize (struct totemudpu_instance *instance) { memset (instance, 0, sizeof (struct totemudpu_instance)); instance->netif_state_report = NETIF_STATE_REPORT_UP | NETIF_STATE_REPORT_DOWN; instance->totemudpu_iov_recv.iov_base = instance->iov_buffer; instance->totemudpu_iov_recv.iov_len = FRAME_SIZE_MAX; //sizeof (instance->iov_buffer); /* * There is always atleast 1 processor */ instance->my_memb_entries = 1; list_init (&instance->member_list); } #define log_printf(level, format, args...) \ do { \ instance->totemudpu_log_printf ( \ level, instance->totemudpu_subsys_id, \ __FUNCTION__, __FILE__, __LINE__, \ (const char *)format, ##args); \ } while (0); #define LOGSYS_PERROR(err_num, level, fmt, args...) \ do { \ char _error_str[LOGSYS_MAX_PERROR_MSG_LEN]; \ const char *_error_ptr = qb_strerror_r(err_num, _error_str, sizeof(_error_str)); \ instance->totemudpu_log_printf ( \ level, instance->totemudpu_subsys_id, \ __FUNCTION__, __FILE__, __LINE__, \ fmt ": %s (%d)", ##args, _error_ptr, err_num); \ } while(0) -static int authenticate_and_decrypt_sober ( - struct totemudpu_instance *instance, - struct iovec *iov, - unsigned int iov_len) -{ - unsigned char keys[48]; - struct security_header *header = (struct security_header *)iov[0].iov_base; - prng_state keygen_prng_state; - prng_state stream_prng_state; - unsigned char *hmac_key = &keys[32]; - unsigned char *cipher_key = &keys[16]; - unsigned char *initial_vector = &keys[0]; - unsigned char digest_comparison[HMAC_HASH_SIZE]; - unsigned long len; - - /* - * Generate MAC, CIPHER, IV keys from private key - */ - memset (keys, 0, sizeof (keys)); - sober128_start (&keygen_prng_state); - sober128_add_entropy (instance->totemudpu_private_key, - instance->totemudpu_private_key_len, &keygen_prng_state); - sober128_add_entropy (header->salt, sizeof (header->salt), &keygen_prng_state); - - sober128_read (keys, sizeof (keys), &keygen_prng_state); - - /* - * Setup stream cipher - */ - sober128_start (&stream_prng_state); - sober128_add_entropy (cipher_key, 16, &stream_prng_state); - sober128_add_entropy (initial_vector, 16, &stream_prng_state); - - /* - * Authenticate contents of message - */ - hmac_init (&instance->totemudpu_hmac_state, DIGEST_SHA1, hmac_key, 16); - - hmac_process (&instance->totemudpu_hmac_state, - (unsigned char *)iov->iov_base + HMAC_HASH_SIZE, - iov->iov_len - HMAC_HASH_SIZE); - - len = hash_descriptor[DIGEST_SHA1]->hashsize; - assert (HMAC_HASH_SIZE >= len); - hmac_done (&instance->totemudpu_hmac_state, digest_comparison, &len); - - if (memcmp (digest_comparison, header->hash_digest, len) != 0) { - return (-1); - } - - /* - * Decrypt the contents of the message with the cipher key - */ - sober128_read ((unsigned char*)iov->iov_base + - sizeof (struct security_header), - iov->iov_len - sizeof (struct security_header), - &stream_prng_state); - - return (0); -} - -static void init_sober_crypto( - struct totemudpu_instance *instance) -{ - log_printf(instance->totemudpu_log_level_notice, - "Initializing transmit/receive security: libtomcrypt SOBER128/SHA1HMAC (mode 0)."); - rng_make_prng (128, PRNG_SOBER, &instance->totemudpu_prng_state, NULL); -} - -#ifdef HAVE_LIBNSS - static unsigned char *copy_from_iovec( const struct iovec *iov, unsigned int iov_len, size_t *buf_size) { int i; size_t bufptr; size_t buflen = 0; unsigned char *newbuf; for (i=0; i buf_size) { copylen = buf_size - bufptr; } memcpy(iov[i].iov_base, buf+bufptr, copylen); bufptr += copylen; if (iov[i].iov_len != copylen) { iov[i].iov_len = copylen; return; } } } static void init_nss_crypto( struct totemudpu_instance *instance) { PK11SlotInfo* aes_slot = NULL; PK11SlotInfo* sha1_slot = NULL; SECItem key_item; SECStatus rv; log_printf(instance->totemudpu_log_level_notice, - "Initializing transmit/receive security: NSS AES128CBC/SHA1HMAC (mode 1)."); + "Initializing transmit/receive security: NSS AES256CBC/SHA1HMAC (mode %u).", TOTEM_CRYPTO_AES256); rv = NSS_NoDB_Init("."); if (rv != SECSuccess) { log_printf(instance->totemudpu_log_level_security, "NSS initialization failed (err %d)", PR_GetError()); goto out; } aes_slot = PK11_GetBestSlot(instance->totem_config->crypto_crypt_type, NULL); if (aes_slot == NULL) { log_printf(instance->totemudpu_log_level_security, "Unable to find security slot (err %d)", PR_GetError()); goto out; } sha1_slot = PK11_GetBestSlot(CKM_SHA_1_HMAC, NULL); if (sha1_slot == NULL) { log_printf(instance->totemudpu_log_level_security, "Unable to find security slot (err %d)", PR_GetError()); goto out; } /* * Make the private key into a SymKey that we can use */ key_item.type = siBuffer; key_item.data = instance->totem_config->private_key; - key_item.len = 32; /* Use 128 bits */ + key_item.len = 32; /* Use 256 bits */ instance->nss_sym_key = PK11_ImportSymKey(aes_slot, instance->totem_config->crypto_crypt_type, PK11_OriginUnwrap, CKA_ENCRYPT|CKA_DECRYPT, &key_item, NULL); if (instance->nss_sym_key == NULL) { log_printf(instance->totemudpu_log_level_security, "Failure to import key into NSS (err %d)", PR_GetError()); goto out; } instance->nss_sym_key_sign = PK11_ImportSymKey(sha1_slot, CKM_SHA_1_HMAC, PK11_OriginUnwrap, CKA_SIGN, &key_item, NULL); if (instance->nss_sym_key_sign == NULL) { log_printf(instance->totemudpu_log_level_security, "Failure to import key into NSS (err %d)", PR_GetError()); goto out; } out: return; } static int encrypt_and_sign_nss ( struct totemudpu_instance *instance, unsigned char *buf, size_t *buf_len, const struct iovec *iovec, unsigned int iov_len) { PK11Context* enc_context = NULL; SECStatus rv1, rv2; int tmp1_outlen; unsigned int tmp2_outlen; unsigned char *inbuf; unsigned char *data; unsigned char *outdata; size_t datalen; SECItem no_params; SECItem iv_item; struct security_header *header; SECItem *nss_sec_param; unsigned char nss_iv_data[16]; SECStatus rv; no_params.type = siBuffer; no_params.data = 0; no_params.len = 0; tmp1_outlen = tmp2_outlen = 0; inbuf = copy_from_iovec(iovec, iov_len, &datalen); if (!inbuf) { log_printf(instance->totemudpu_log_level_security, "malloc error copying buffer from iovec"); return -1; } data = inbuf + sizeof (struct security_header); datalen -= sizeof (struct security_header); outdata = buf + sizeof (struct security_header); header = (struct security_header *)buf; rv = PK11_GenerateRandom ( nss_iv_data, sizeof (nss_iv_data)); if (rv != SECSuccess) { log_printf(instance->totemudpu_log_level_security, "Failure to generate a random number %d", PR_GetError()); } memcpy(header->salt, nss_iv_data, sizeof(nss_iv_data)); iv_item.type = siBuffer; iv_item.data = nss_iv_data; iv_item.len = sizeof (nss_iv_data); nss_sec_param = PK11_ParamFromIV ( instance->totem_config->crypto_crypt_type, &iv_item); if (nss_sec_param == NULL) { log_printf(instance->totemudpu_log_level_security, "Failure to set up PKCS11 param (err %d)", PR_GetError()); free (inbuf); return (-1); } /* * Create cipher context for encryption */ enc_context = PK11_CreateContextBySymKey ( instance->totem_config->crypto_crypt_type, CKA_ENCRYPT, instance->nss_sym_key, nss_sec_param); if (!enc_context) { char err[1024]; PR_GetErrorText(err); err[PR_GetErrorTextLength()] = 0; log_printf(instance->totemudpu_log_level_security, "PK11_CreateContext failed (encrypt) crypt_type=%d (err %d): %s", instance->totem_config->crypto_crypt_type, PR_GetError(), err); free(inbuf); return -1; } rv1 = PK11_CipherOp(enc_context, outdata, &tmp1_outlen, FRAME_SIZE_MAX - sizeof(struct security_header), data, datalen); rv2 = PK11_DigestFinal(enc_context, outdata + tmp1_outlen, &tmp2_outlen, FRAME_SIZE_MAX - tmp1_outlen); PK11_DestroyContext(enc_context, PR_TRUE); *buf_len = tmp1_outlen + tmp2_outlen; free(inbuf); // memcpy(&outdata[*buf_len], nss_iv_data, sizeof(nss_iv_data)); if (rv1 != SECSuccess || rv2 != SECSuccess) goto out; /* Now do the digest */ enc_context = PK11_CreateContextBySymKey(CKM_SHA_1_HMAC, CKA_SIGN, instance->nss_sym_key_sign, &no_params); if (!enc_context) { char err[1024]; PR_GetErrorText(err); err[PR_GetErrorTextLength()] = 0; log_printf(instance->totemudpu_log_level_security, "encrypt: PK11_CreateContext failed (digest) err %d: %s", PR_GetError(), err); return -1; } PK11_DigestBegin(enc_context); rv1 = PK11_DigestOp(enc_context, outdata - 16, *buf_len + 16); rv2 = PK11_DigestFinal(enc_context, header->hash_digest, &tmp2_outlen, sizeof(header->hash_digest)); PK11_DestroyContext(enc_context, PR_TRUE); if (rv1 != SECSuccess || rv2 != SECSuccess) goto out; *buf_len = *buf_len + sizeof(struct security_header); SECITEM_FreeItem(nss_sec_param, PR_TRUE); return 0; out: return -1; } static int authenticate_and_decrypt_nss ( struct totemudpu_instance *instance, struct iovec *iov, unsigned int iov_len) { PK11Context* enc_context = NULL; SECStatus rv1, rv2; int tmp1_outlen; unsigned int tmp2_outlen; unsigned char outbuf[FRAME_SIZE_MAX]; unsigned char digest[HMAC_HASH_SIZE]; unsigned char *outdata; int result_len; unsigned char *data; unsigned char *inbuf; size_t datalen; struct security_header *header = (struct security_header *)iov[0].iov_base; SECItem no_params; SECItem ivdata; no_params.type = siBuffer; no_params.data = 0; no_params.len = 0; tmp1_outlen = tmp2_outlen = 0; if (iov_len > 1) { inbuf = copy_from_iovec(iov, iov_len, &datalen); if (!inbuf) { log_printf(instance->totemudpu_log_level_security, "malloc error copying buffer from iovec"); return -1; } } else { inbuf = (unsigned char *)iov[0].iov_base; datalen = iov[0].iov_len; } data = inbuf + sizeof (struct security_header) - 16; datalen = datalen - sizeof (struct security_header) + 16; outdata = outbuf + sizeof (struct security_header); /* Check the digest */ enc_context = PK11_CreateContextBySymKey ( CKM_SHA_1_HMAC, CKA_SIGN, instance->nss_sym_key_sign, &no_params); if (!enc_context) { char err[1024]; PR_GetErrorText(err); err[PR_GetErrorTextLength()] = 0; log_printf(instance->totemudpu_log_level_security, "PK11_CreateContext failed (check digest) err %d: %s", PR_GetError(), err); free (inbuf); return -1; } PK11_DigestBegin(enc_context); rv1 = PK11_DigestOp(enc_context, data, datalen); rv2 = PK11_DigestFinal(enc_context, digest, &tmp2_outlen, sizeof(digest)); PK11_DestroyContext(enc_context, PR_TRUE); if (rv1 != SECSuccess || rv2 != SECSuccess) { log_printf(instance->totemudpu_log_level_security, "Digest check failed"); return -1; } if (memcmp(digest, header->hash_digest, tmp2_outlen) != 0) { log_printf(instance->totemudpu_log_level_error, "Digest does not match"); return -1; } /* * Get rid of salt */ data += 16; datalen -= 16; /* Create cipher context for decryption */ ivdata.type = siBuffer; ivdata.data = header->salt; ivdata.len = sizeof(header->salt); enc_context = PK11_CreateContextBySymKey( instance->totem_config->crypto_crypt_type, CKA_DECRYPT, instance->nss_sym_key, &ivdata); if (!enc_context) { log_printf(instance->totemudpu_log_level_security, "PK11_CreateContext (decrypt) failed (err %d)", PR_GetError()); return -1; } rv1 = PK11_CipherOp(enc_context, outdata, &tmp1_outlen, sizeof(outbuf) - sizeof (struct security_header), data, datalen); if (rv1 != SECSuccess) { log_printf(instance->totemudpu_log_level_security, "PK11_CipherOp (decrypt) failed (err %d)", PR_GetError()); } rv2 = PK11_DigestFinal(enc_context, outdata + tmp1_outlen, &tmp2_outlen, sizeof(outbuf) - tmp1_outlen); PK11_DestroyContext(enc_context, PR_TRUE); result_len = tmp1_outlen + tmp2_outlen + sizeof (struct security_header); /* Copy it back to the buffer */ copy_to_iovec(iov, iov_len, outbuf, result_len); if (iov_len > 1) free(inbuf); if (rv1 != SECSuccess || rv2 != SECSuccess) return -1; return 0; } -#endif - -static int encrypt_and_sign_sober ( - struct totemudpu_instance *instance, - unsigned char *buf, - size_t *buf_len, - const struct iovec *iovec, - unsigned int iov_len) -{ - int i; - unsigned char *addr; - unsigned char keys[48]; - struct security_header *header; - unsigned char *hmac_key = &keys[32]; - unsigned char *cipher_key = &keys[16]; - unsigned char *initial_vector = &keys[0]; - unsigned long len; - size_t outlen = 0; - hmac_state hmac_st; - prng_state keygen_prng_state; - prng_state stream_prng_state; - prng_state *prng_state_in = &instance->totemudpu_prng_state; - - header = (struct security_header *)buf; - addr = buf + sizeof (struct security_header); - - memset (keys, 0, sizeof (keys)); - memset (header->salt, 0, sizeof (header->salt)); - - /* - * Generate MAC, CIPHER, IV keys from private key - */ - sober128_read (header->salt, sizeof (header->salt), prng_state_in); - sober128_start (&keygen_prng_state); - sober128_add_entropy (instance->totemudpu_private_key, - instance->totemudpu_private_key_len, - &keygen_prng_state); - sober128_add_entropy (header->salt, sizeof (header->salt), - &keygen_prng_state); - - sober128_read (keys, sizeof (keys), &keygen_prng_state); - - /* - * Setup stream cipher - */ - sober128_start (&stream_prng_state); - sober128_add_entropy (cipher_key, 16, &stream_prng_state); - sober128_add_entropy (initial_vector, 16, &stream_prng_state); - - outlen = sizeof (struct security_header); - /* - * Copy remainder of message, then encrypt it - */ - for (i = 1; i < iov_len; i++) { - memcpy (addr, iovec[i].iov_base, iovec[i].iov_len); - addr += iovec[i].iov_len; - outlen += iovec[i].iov_len; - } - - /* - * Encrypt message by XORing stream cipher data - */ - sober128_read (buf + sizeof (struct security_header), - outlen - sizeof (struct security_header), - &stream_prng_state); - - memset (&hmac_st, 0, sizeof (hmac_st)); - - /* - * Sign the contents of the message with the hmac key and store signature in message - */ - hmac_init (&hmac_st, DIGEST_SHA1, hmac_key, 16); - - hmac_process (&hmac_st, - buf + HMAC_HASH_SIZE, - outlen - HMAC_HASH_SIZE); - - len = hash_descriptor[DIGEST_SHA1]->hashsize; - - hmac_done (&hmac_st, header->hash_digest, &len); - - *buf_len = outlen; - - return 0; -} static int encrypt_and_sign_worker ( struct totemudpu_instance *instance, unsigned char *buf, size_t *buf_len, const struct iovec *iovec, unsigned int iov_len) { - if (instance->totem_config->crypto_type == TOTEM_CRYPTO_SOBER) { - return encrypt_and_sign_sober(instance, buf, buf_len, iovec, iov_len); - } -#ifdef HAVE_LIBNSS - if (instance->totem_config->crypto_type == TOTEM_CRYPTO_NSS) { + + if (instance->totem_config->crypto_type == TOTEM_CRYPTO_AES256) { return encrypt_and_sign_nss(instance, buf, buf_len, iovec, iov_len); } -#endif + return -1; } static int authenticate_and_decrypt ( struct totemudpu_instance *instance, struct iovec *iov, unsigned int iov_len) { unsigned char type; unsigned char *endbuf = (unsigned char *)iov[iov_len-1].iov_base; int res = -1; /* * Get the encryption type and remove it from the buffer */ type = endbuf[iov[iov_len-1].iov_len-1]; iov[iov_len-1].iov_len -= 1; - if (type == TOTEM_CRYPTO_SOBER) { - res = authenticate_and_decrypt_sober(instance, iov, iov_len); - } - -#ifdef HAVE_LIBNSS - if (type == TOTEM_CRYPTO_NSS) { + if (type == TOTEM_CRYPTO_AES256) { res = authenticate_and_decrypt_nss(instance, iov, iov_len); } -#endif - - /* - * If it failed, then try decrypting the whole packet - */ - if (res == -1) { - iov[iov_len-1].iov_len += 1; - res = authenticate_and_decrypt_sober(instance, iov, iov_len); - } return res; } static void init_crypto( struct totemudpu_instance *instance) { - init_sober_crypto(instance); -#ifdef HAVE_LIBNSS + init_nss_crypto(instance); -#endif } int totemudpu_crypto_set ( void *udpu_context, unsigned int type) { struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; int res = 0; /* * Validate crypto algorithm */ switch (type) { - case TOTEM_CRYPTO_SOBER: - log_printf(instance->totemudpu_log_level_security, - "Transmit security set to: libtomcrypt SOBER128/SHA1HMAC (mode 0)"); - break; - case TOTEM_CRYPTO_NSS: + case TOTEM_CRYPTO_AES256: log_printf(instance->totemudpu_log_level_security, - "Transmit security set to: NSS AES128CBC/SHA1HMAC (mode 1)"); + "Transmit security set to: NSS AES256CBC/SHA1HMAC (mode %u)", type); break; default: res = -1; break; } return (res); } static inline void ucast_sendmsg ( struct totemudpu_instance *instance, struct totem_ip_address *system_to, const void *msg, unsigned int msg_len) { struct msghdr msg_ucast; int res = 0; size_t buf_len; unsigned char sheader[sizeof (struct security_header)]; unsigned char encrypt_data[FRAME_SIZE_MAX]; struct iovec iovec_encrypt[2]; const struct iovec *iovec_sendmsg; struct sockaddr_storage sockaddr; struct iovec iovec; unsigned int iov_len; int addrlen; if (instance->totem_config->secauth == 1) { iovec_encrypt[0].iov_base = (void *)sheader; iovec_encrypt[0].iov_len = sizeof (struct security_header); iovec_encrypt[1].iov_base = (void *)msg; iovec_encrypt[1].iov_len = msg_len; /* * Encrypt and digest the message */ encrypt_and_sign_worker ( instance, encrypt_data, &buf_len, iovec_encrypt, 2); encrypt_data[buf_len++] = instance->totem_config->crypto_type; iovec_encrypt[0].iov_base = (void *)encrypt_data; iovec_encrypt[0].iov_len = buf_len; iovec_sendmsg = &iovec_encrypt[0]; iov_len = 1; } else { iovec.iov_base = (void *)msg; iovec.iov_len = msg_len; iovec_sendmsg = &iovec; iov_len = 1; } /* * Build unicast message */ totemip_totemip_to_sockaddr_convert(system_to, instance->totem_interface->ip_port, &sockaddr, &addrlen); msg_ucast.msg_name = &sockaddr; msg_ucast.msg_namelen = addrlen; msg_ucast.msg_iov = (void *) iovec_sendmsg; msg_ucast.msg_iovlen = iov_len; #if !defined(COROSYNC_SOLARIS) msg_ucast.msg_control = 0; msg_ucast.msg_controllen = 0; msg_ucast.msg_flags = 0; #else msg_ucast.msg_accrights = NULL; msg_ucast.msg_accrightslen = 0; #endif /* * Transmit unicast message * An error here is recovered by totemsrp */ res = sendmsg (instance->token_socket, &msg_ucast, MSG_NOSIGNAL); if (res < 0) { LOGSYS_PERROR (errno, instance->totemudpu_log_level_debug, "sendmsg(ucast) failed (non-critical)"); } } static inline void mcast_sendmsg ( struct totemudpu_instance *instance, const void *msg, unsigned int msg_len) { struct msghdr msg_mcast; int res = 0; size_t buf_len; unsigned char sheader[sizeof (struct security_header)]; unsigned char encrypt_data[FRAME_SIZE_MAX]; struct iovec iovec_encrypt[2]; struct iovec iovec; const struct iovec *iovec_sendmsg; struct sockaddr_storage sockaddr; unsigned int iov_len; int addrlen; struct list_head *list; struct totemudpu_member *member; if (instance->totem_config->secauth == 1) { iovec_encrypt[0].iov_base = (void *)sheader; iovec_encrypt[0].iov_len = sizeof (struct security_header); iovec_encrypt[1].iov_base = (void *)msg; iovec_encrypt[1].iov_len = msg_len; /* * Encrypt and digest the message */ encrypt_and_sign_worker ( instance, encrypt_data, &buf_len, iovec_encrypt, 2); encrypt_data[buf_len++] = instance->totem_config->crypto_type; iovec_encrypt[0].iov_base = (void *)encrypt_data; iovec_encrypt[0].iov_len = buf_len; iovec_sendmsg = &iovec_encrypt[0]; iov_len = 1; } else { iovec.iov_base = (void *)msg; iovec.iov_len = msg_len; iovec_sendmsg = &iovec; iov_len = 1; } /* * Build multicast message */ for (list = instance->member_list.next; list != &instance->member_list; list = list->next) { member = list_entry (list, struct totemudpu_member, list); totemip_totemip_to_sockaddr_convert(&member->member, instance->totem_interface->ip_port, &sockaddr, &addrlen); msg_mcast.msg_name = &sockaddr; msg_mcast.msg_namelen = addrlen; msg_mcast.msg_iov = (void *) iovec_sendmsg; msg_mcast.msg_iovlen = iov_len; #if !defined(COROSYNC_SOLARIS) msg_mcast.msg_control = 0; msg_mcast.msg_controllen = 0; msg_mcast.msg_flags = 0; #else msg_mcast.msg_accrights = NULL; msg_mcast.msg_accrightslen = 0; #endif /* * Transmit multicast message * An error here is recovered by totemsrp */ res = sendmsg (member->fd, &msg_mcast, MSG_NOSIGNAL); if (res < 0) { LOGSYS_PERROR (errno, instance->totemudpu_log_level_debug, "sendmsg(mcast) failed (non-critical)"); } } } int totemudpu_finalize ( void *udpu_context) { struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; int res = 0; if (instance->token_socket > 0) { close (instance->token_socket); qb_loop_poll_del (instance->totemudpu_poll_handle, instance->token_socket); } return (res); } static int net_deliver_fn ( int fd, int revents, void *data) { struct totemudpu_instance *instance = (struct totemudpu_instance *)data; struct msghdr msg_recv; struct iovec *iovec; struct sockaddr_storage system_from; int bytes_received; int res = 0; unsigned char *msg_offset; unsigned int size_delv; iovec = &instance->totemudpu_iov_recv; /* * Receive datagram */ msg_recv.msg_name = &system_from; msg_recv.msg_namelen = sizeof (struct sockaddr_storage); msg_recv.msg_iov = iovec; msg_recv.msg_iovlen = 1; #if !defined(COROSYNC_SOLARIS) msg_recv.msg_control = 0; msg_recv.msg_controllen = 0; msg_recv.msg_flags = 0; #else msg_recv.msg_accrights = NULL; msg_recv.msg_accrightslen = 0; #endif bytes_received = recvmsg (fd, &msg_recv, MSG_NOSIGNAL | MSG_DONTWAIT); if (bytes_received == -1) { return (0); } else { instance->stats_recv += bytes_received; } if ((instance->totem_config->secauth == 1) && (bytes_received < sizeof (struct security_header))) { log_printf (instance->totemudpu_log_level_security, "Received message is too short... ignoring %d.", bytes_received); return (0); } iovec->iov_len = bytes_received; if (instance->totem_config->secauth == 1) { /* * Authenticate and if authenticated, decrypt datagram */ res = authenticate_and_decrypt (instance, iovec, 1); if (res == -1) { log_printf (instance->totemudpu_log_level_security, "Received message has invalid digest... ignoring."); log_printf (instance->totemudpu_log_level_security, "Invalid packet data"); iovec->iov_len = FRAME_SIZE_MAX; return 0; } msg_offset = (unsigned char *)iovec->iov_base + sizeof (struct security_header); size_delv = bytes_received - sizeof (struct security_header); } else { msg_offset = (void *)iovec->iov_base; size_delv = bytes_received; } /* * Handle incoming message */ instance->totemudpu_deliver_fn ( instance->context, msg_offset, size_delv); iovec->iov_len = FRAME_SIZE_MAX; return (0); } static int netif_determine ( struct totemudpu_instance *instance, struct totem_ip_address *bindnet, struct totem_ip_address *bound_to, int *interface_up, int *interface_num) { int res; res = totemip_iface_check (bindnet, bound_to, interface_up, interface_num, instance->totem_config->clear_node_high_bit); return (res); } /* * If the interface is up, the sockets for totem are built. If the interface is down * this function is requeued in the timer list to retry building the sockets later. */ static void timer_function_netif_check_timeout ( void *data) { struct totemudpu_instance *instance = (struct totemudpu_instance *)data; int interface_up; int interface_num; struct totem_ip_address *bind_address; /* * Build sockets for every interface */ netif_determine (instance, &instance->totem_interface->bindnet, &instance->totem_interface->boundto, &interface_up, &interface_num); /* * If the network interface isn't back up and we are already * in loopback mode, add timer to check again and return */ if ((instance->netif_bind_state == BIND_STATE_LOOPBACK && interface_up == 0) || (instance->my_memb_entries == 1 && instance->netif_bind_state == BIND_STATE_REGULAR && interface_up == 1)) { qb_loop_timer_add (instance->totemudpu_poll_handle, QB_LOOP_MED, instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC, (void *)instance, timer_function_netif_check_timeout, &instance->timer_netif_check_timeout); /* * Add a timer to check for a downed regular interface */ return; } if (instance->token_socket > 0) { close (instance->token_socket); qb_loop_poll_del (instance->totemudpu_poll_handle, instance->token_socket); } if (interface_up == 0) { /* * Interface is not up */ instance->netif_bind_state = BIND_STATE_LOOPBACK; bind_address = &localhost; /* * Add a timer to retry building interfaces and request memb_gather_enter */ qb_loop_timer_add (instance->totemudpu_poll_handle, QB_LOOP_MED, instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC, (void *)instance, timer_function_netif_check_timeout, &instance->timer_netif_check_timeout); } else { /* * Interface is up */ instance->netif_bind_state = BIND_STATE_REGULAR; bind_address = &instance->totem_interface->bindnet; } /* * Create and bind the multicast and unicast sockets */ totemudpu_build_sockets (instance, bind_address, &instance->totem_interface->boundto); qb_loop_poll_add (instance->totemudpu_poll_handle, QB_LOOP_MED, instance->token_socket, POLLIN, instance, net_deliver_fn); totemip_copy (&instance->my_id, &instance->totem_interface->boundto); /* * This reports changes in the interface to the user and totemsrp */ if (instance->netif_bind_state == BIND_STATE_REGULAR) { if (instance->netif_state_report & NETIF_STATE_REPORT_UP) { log_printf (instance->totemudpu_log_level_notice, "The network interface [%s] is now up.", totemip_print (&instance->totem_interface->boundto)); instance->netif_state_report = NETIF_STATE_REPORT_DOWN; instance->totemudpu_iface_change_fn (instance->context, &instance->my_id); } /* * Add a timer to check for interface going down in single membership */ if (instance->my_memb_entries == 1) { qb_loop_timer_add (instance->totemudpu_poll_handle, QB_LOOP_MED, instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC, (void *)instance, timer_function_netif_check_timeout, &instance->timer_netif_check_timeout); } } else { if (instance->netif_state_report & NETIF_STATE_REPORT_DOWN) { log_printf (instance->totemudpu_log_level_notice, "The network interface is down."); instance->totemudpu_iface_change_fn (instance->context, &instance->my_id); } instance->netif_state_report = NETIF_STATE_REPORT_UP; } } /* Set the socket priority to INTERACTIVE to ensure that our messages don't get queued behind anything else */ static void totemudpu_traffic_control_set(struct totemudpu_instance *instance, int sock) { #ifdef SO_PRIORITY int prio = 6; /* TC_PRIO_INTERACTIVE */ if (setsockopt(sock, SOL_SOCKET, SO_PRIORITY, &prio, sizeof(int))) { LOGSYS_PERROR (errno, instance->totemudpu_log_level_warning, "Could not set traffic priority"); } #endif } static int totemudpu_build_sockets_ip ( struct totemudpu_instance *instance, struct totem_ip_address *bindnet_address, struct totem_ip_address *bound_to, int interface_num) { struct sockaddr_storage sockaddr; int addrlen; int res; unsigned int recvbuf_size; unsigned int optlen = sizeof (recvbuf_size); /* * Setup unicast socket */ instance->token_socket = socket (bindnet_address->family, SOCK_DGRAM, 0); if (instance->token_socket == -1) { LOGSYS_PERROR (errno, instance->totemudpu_log_level_warning, "socket() failed"); return (-1); } totemip_nosigpipe (instance->token_socket); res = fcntl (instance->token_socket, F_SETFL, O_NONBLOCK); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudpu_log_level_warning, "Could not set non-blocking operation on token socket"); return (-1); } /* * Bind to unicast socket used for token send/receives * This has the side effect of binding to the correct interface */ totemip_totemip_to_sockaddr_convert(bound_to, instance->totem_interface->ip_port, &sockaddr, &addrlen); res = bind (instance->token_socket, (struct sockaddr *)&sockaddr, addrlen); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudpu_log_level_warning, "bind token socket failed"); return (-1); } /* * the token_socket can receive many messages. Allow a large number * of receive messages on this socket */ recvbuf_size = MCAST_SOCKET_BUFFER_SIZE; res = setsockopt (instance->token_socket, SOL_SOCKET, SO_RCVBUF, &recvbuf_size, optlen); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudpu_log_level_notice, "Could not set recvbuf size"); } return 0; } static int totemudpu_build_sockets ( struct totemudpu_instance *instance, struct totem_ip_address *bindnet_address, struct totem_ip_address *bound_to) { int interface_num; int interface_up; int res; /* * Determine the ip address bound to and the interface name */ res = netif_determine (instance, bindnet_address, bound_to, &interface_up, &interface_num); if (res == -1) { return (-1); } totemip_copy(&instance->my_id, bound_to); res = totemudpu_build_sockets_ip (instance, bindnet_address, bound_to, interface_num); /* We only send out of the token socket */ totemudpu_traffic_control_set(instance, instance->token_socket); return res; } /* * Totem Network interface - also does encryption/decryption * depends on poll abstraction, POSIX, IPV4 */ /* * Create an instance */ int totemudpu_initialize ( qb_loop_t *poll_handle, void **udpu_context, struct totem_config *totem_config, int interface_no, void *context, void (*deliver_fn) ( void *context, const void *msg, unsigned int msg_len), void (*iface_change_fn) ( void *context, const struct totem_ip_address *iface_address), void (*target_set_completed) ( void *context)) { struct totemudpu_instance *instance; instance = malloc (sizeof (struct totemudpu_instance)); if (instance == NULL) { return (-1); } totemudpu_instance_initialize (instance); instance->totem_config = totem_config; /* * Configure logging */ instance->totemudpu_log_level_security = 1; //totem_config->totem_logging_configuration.log_level_security; instance->totemudpu_log_level_error = totem_config->totem_logging_configuration.log_level_error; instance->totemudpu_log_level_warning = totem_config->totem_logging_configuration.log_level_warning; instance->totemudpu_log_level_notice = totem_config->totem_logging_configuration.log_level_notice; instance->totemudpu_log_level_debug = totem_config->totem_logging_configuration.log_level_debug; instance->totemudpu_subsys_id = totem_config->totem_logging_configuration.log_subsys_id; instance->totemudpu_log_printf = totem_config->totem_logging_configuration.log_printf; /* * Initialize random number generator for later use to generate salt */ memcpy (instance->totemudpu_private_key, totem_config->private_key, totem_config->private_key_len); instance->totemudpu_private_key_len = totem_config->private_key_len; init_crypto(instance); /* * Initialize local variables for totemudpu */ instance->totem_interface = &totem_config->interfaces[interface_no]; memset (instance->iov_buffer, 0, FRAME_SIZE_MAX); instance->totemudpu_poll_handle = poll_handle; instance->totem_interface->bindnet.nodeid = instance->totem_config->node_id; instance->context = context; instance->totemudpu_deliver_fn = deliver_fn; instance->totemudpu_iface_change_fn = iface_change_fn; instance->totemudpu_target_set_completed = target_set_completed; totemip_localhost (AF_INET, &localhost); localhost.nodeid = instance->totem_config->node_id; /* * RRP layer isn't ready to receive message because it hasn't * initialized yet. Add short timer to check the interfaces. */ qb_loop_timer_add (instance->totemudpu_poll_handle, QB_LOOP_MED, 100*QB_TIME_NS_IN_MSEC, (void *)instance, timer_function_netif_check_timeout, &instance->timer_netif_check_timeout); *udpu_context = instance; return (0); } void *totemudpu_buffer_alloc (void) { return malloc (FRAME_SIZE_MAX); } void totemudpu_buffer_release (void *ptr) { return free (ptr); } int totemudpu_processor_count_set ( void *udpu_context, int processor_count) { struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; int res = 0; instance->my_memb_entries = processor_count; qb_loop_timer_del (instance->totemudpu_poll_handle, instance->timer_netif_check_timeout); if (processor_count == 1) { qb_loop_timer_add (instance->totemudpu_poll_handle, QB_LOOP_MED, instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC, (void *)instance, timer_function_netif_check_timeout, &instance->timer_netif_check_timeout); } return (res); } int totemudpu_recv_flush (void *udpu_context) { int res = 0; return (res); } int totemudpu_send_flush (void *udpu_context) { int res = 0; return (res); } int totemudpu_token_send ( void *udpu_context, const void *msg, unsigned int msg_len) { struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; int res = 0; ucast_sendmsg (instance, &instance->token_target, msg, msg_len); return (res); } int totemudpu_mcast_flush_send ( void *udpu_context, const void *msg, unsigned int msg_len) { struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; int res = 0; mcast_sendmsg (instance, msg, msg_len); return (res); } int totemudpu_mcast_noflush_send ( void *udpu_context, const void *msg, unsigned int msg_len) { struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; int res = 0; mcast_sendmsg (instance, msg, msg_len); return (res); } extern int totemudpu_iface_check (void *udpu_context) { struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; int res = 0; timer_function_netif_check_timeout (instance); return (res); } extern void totemudpu_net_mtu_adjust (void *udpu_context, struct totem_config *totem_config) { #define UDPIP_HEADER_SIZE (20 + 8) /* 20 bytes for ip 8 bytes for udp */ if (totem_config->secauth == 1) { totem_config->net_mtu -= sizeof (struct security_header) + UDPIP_HEADER_SIZE; } else { totem_config->net_mtu -= UDPIP_HEADER_SIZE; } } const char *totemudpu_iface_print (void *udpu_context) { struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; const char *ret_char; ret_char = totemip_print (&instance->my_id); return (ret_char); } int totemudpu_iface_get ( void *udpu_context, struct totem_ip_address *addr) { struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; int res = 0; memcpy (addr, &instance->my_id, sizeof (struct totem_ip_address)); return (res); } int totemudpu_token_target_set ( void *udpu_context, const struct totem_ip_address *token_target) { struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; int res = 0; memcpy (&instance->token_target, token_target, sizeof (struct totem_ip_address)); instance->totemudpu_target_set_completed (instance->context); return (res); } extern int totemudpu_recv_mcast_empty ( void *udpu_context) { struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; unsigned int res; struct sockaddr_storage system_from; struct msghdr msg_recv; struct pollfd ufd; int nfds; int msg_processed = 0; /* * Receive datagram */ msg_recv.msg_name = &system_from; msg_recv.msg_namelen = sizeof (struct sockaddr_storage); msg_recv.msg_iov = &instance->totemudpu_iov_recv; msg_recv.msg_iovlen = 1; #if !defined(COROSYNC_SOLARIS) msg_recv.msg_control = 0; msg_recv.msg_controllen = 0; msg_recv.msg_flags = 0; #else msg_recv.msg_accrights = NULL; msg_recv.msg_accrightslen = 0; #endif do { ufd.fd = instance->token_socket; ufd.events = POLLIN; nfds = poll (&ufd, 1, 0); if (nfds == 1 && ufd.revents & POLLIN) { res = recvmsg (instance->token_socket, &msg_recv, MSG_NOSIGNAL | MSG_DONTWAIT); if (res != -1) { msg_processed = 1; } else { msg_processed = -1; } } } while (nfds == 1); return (msg_processed); } int totemudpu_member_add ( void *udpu_context, const struct totem_ip_address *member) { struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; struct totemudpu_member *new_member; int res; unsigned int sendbuf_size; unsigned int optlen = sizeof (sendbuf_size); new_member = malloc (sizeof (struct totemudpu_member)); if (new_member == NULL) { return (-1); } log_printf (LOGSYS_LEVEL_NOTICE, "adding new UDPU member {%s}", totemip_print(member)); list_init (&new_member->list); list_add_tail (&new_member->list, &instance->member_list); memcpy (&new_member->member, member, sizeof (struct totem_ip_address)); new_member->fd = socket (member->family, SOCK_DGRAM, 0); if (new_member->fd == -1) { LOGSYS_PERROR (errno, instance->totemudpu_log_level_warning, "Could not create socket for new member"); return (-1); } totemip_nosigpipe (new_member->fd); res = fcntl (new_member->fd, F_SETFL, O_NONBLOCK); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudpu_log_level_warning, "Could not set non-blocking operation on token socket"); return (-1); } /* * These sockets are used to send multicast messages, so their buffers * should be large */ sendbuf_size = MCAST_SOCKET_BUFFER_SIZE; res = setsockopt (new_member->fd, SOL_SOCKET, SO_SNDBUF, &sendbuf_size, optlen); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudpu_log_level_notice, "Could not set sendbuf size"); } return (0); } int totemudpu_member_remove ( void *udpu_context, const struct totem_ip_address *token_target) { int found = 0; struct list_head *list; struct totemudpu_member *member; struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; /* * Find the member to remove and close its socket */ for (list = instance->member_list.next; list != &instance->member_list; list = list->next) { member = list_entry (list, struct totemudpu_member, list); if (totemip_compare (token_target, &member->member)==0) { log_printf(LOGSYS_LEVEL_NOTICE, "removing UDPU member {%s}", totemip_print(&member->member)); if (member->fd > 0) { log_printf(LOGSYS_LEVEL_DEBUG, "Closing socket to: {%s}", totemip_print(&member->member)); qb_loop_poll_del (instance->totemudpu_poll_handle, member->fd); close (member->fd); } found = 1; break; } } /* * Delete the member from the list */ if (found) { list_del (list); } instance = NULL; return (0); } diff --git a/include/corosync/totem/totem.h b/include/corosync/totem/totem.h index a9cb1f3d..31285378 100644 --- a/include/corosync/totem/totem.h +++ b/include/corosync/totem/totem.h @@ -1,280 +1,280 @@ /* * Copyright (c) 2005 MontaVista Software, Inc. * Copyright (c) 2006-2012 Red Hat, Inc. * * Author: Steven Dake (sdake@redhat.com) * * All rights reserved. * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef TOTEM_H_DEFINED #define TOTEM_H_DEFINED #include "totemip.h" #include #ifdef HAVE_SMALL_MEMORY_FOOTPRINT #define PROCESSOR_COUNT_MAX 16 #define MESSAGE_SIZE_MAX 1024*64 #define MESSAGE_QUEUE_MAX 512 #else #define PROCESSOR_COUNT_MAX 384 #define MESSAGE_SIZE_MAX 1024*1024 /* (1MB) */ #define MESSAGE_QUEUE_MAX ((4 * MESSAGE_SIZE_MAX) / totem_config->net_mtu) #endif /* HAVE_SMALL_MEMORY_FOOTPRINT */ #define FRAME_SIZE_MAX 10000 #define TRANSMITS_ALLOWED 16 #define SEND_THREADS_MAX 16 #define INTERFACE_MAX 2 /** * Maximum number of continuous gather states */ #define MAX_NO_CONT_GATHER 3 struct totem_interface { struct totem_ip_address bindnet; struct totem_ip_address boundto; struct totem_ip_address mcast_addr; uint16_t ip_port; uint16_t ttl; int member_count; struct totem_ip_address member_list[PROCESSOR_COUNT_MAX]; }; struct totem_logging_configuration { void (*log_printf) ( int level, int subsys, const char *function_name, const char *file_name, int file_line, const char *format, ...) __attribute__((format(printf, 6, 7))); int log_level_security; int log_level_error; int log_level_warning; int log_level_notice; int log_level_debug; int log_subsys_id; }; enum { TOTEM_PRIVATE_KEY_LEN = 128 }; enum { TOTEM_RRP_MODE_BYTES = 64 }; typedef enum { TOTEM_TRANSPORT_UDP = 0, TOTEM_TRANSPORT_UDPU = 1, TOTEM_TRANSPORT_RDMA = 2 } totem_transport_t; struct totem_config { int version; /* * network */ struct totem_interface *interfaces; unsigned int interface_count; unsigned int node_id; unsigned int clear_node_high_bit; /* * key information */ unsigned char private_key[TOTEM_PRIVATE_KEY_LEN]; unsigned int private_key_len; /* * Totem configuration parameters */ unsigned int token_timeout; unsigned int token_retransmit_timeout; unsigned int token_hold_timeout; unsigned int token_retransmits_before_loss_const; unsigned int join_timeout; unsigned int send_join_timeout; unsigned int consensus_timeout; unsigned int merge_timeout; unsigned int downcheck_timeout; unsigned int fail_to_recv_const; unsigned int seqno_unchanged_const; unsigned int rrp_token_expired_timeout; unsigned int rrp_problem_count_timeout; unsigned int rrp_problem_count_threshold; unsigned int rrp_problem_count_mcast_threshold; unsigned int rrp_autorecovery_check_timeout; char rrp_mode[TOTEM_RRP_MODE_BYTES]; struct totem_logging_configuration totem_logging_configuration; unsigned int secauth; unsigned int net_mtu; unsigned int threads; unsigned int heartbeat_failures_allowed; unsigned int max_network_delay; unsigned int window_size; unsigned int max_messages; const char *vsf_type; unsigned int broadcast_use; - enum { TOTEM_CRYPTO_SOBER=0, TOTEM_CRYPTO_NSS } crypto_type; + enum { TOTEM_CRYPTO_AES256 = 0} crypto_type; int crypto_crypt_type; int crypto_sign_type; totem_transport_t transport_number; unsigned int miss_count_const; }; #define TOTEM_CONFIGURATION_TYPE enum totem_configuration_type { TOTEM_CONFIGURATION_REGULAR, TOTEM_CONFIGURATION_TRANSITIONAL }; #define TOTEM_CALLBACK_TOKEN_TYPE enum totem_callback_token_type { TOTEM_CALLBACK_TOKEN_RECEIVED = 1, TOTEM_CALLBACK_TOKEN_SENT = 2 }; enum totem_event_type { TOTEM_EVENT_DELIVERY_CONGESTED, TOTEM_EVENT_NEW_MSG, }; #define MEMB_RING_ID struct memb_ring_id { struct totem_ip_address rep; unsigned long long seq; } __attribute__((packed)); typedef struct { int is_dirty; time_t last_updated; } totem_stats_header_t; typedef struct { totem_stats_header_t hdr; uint32_t iface_changes; } totemnet_stats_t; typedef struct { totem_stats_header_t hdr; totemnet_stats_t *net; char *algo_name; uint8_t *faulty; uint32_t interface_count; } totemrrp_stats_t; typedef struct { uint32_t rx; uint32_t tx; int backlog_calc; } totemsrp_token_stats_t; typedef struct { totem_stats_header_t hdr; totemrrp_stats_t *rrp; uint64_t orf_token_tx; uint64_t orf_token_rx; uint64_t memb_merge_detect_tx; uint64_t memb_merge_detect_rx; uint64_t memb_join_tx; uint64_t memb_join_rx; uint64_t mcast_tx; uint64_t mcast_retx; uint64_t mcast_rx; uint64_t memb_commit_token_tx; uint64_t memb_commit_token_rx; uint64_t token_hold_cancel_tx; uint64_t token_hold_cancel_rx; uint64_t operational_entered; uint64_t operational_token_lost; uint64_t gather_entered; uint64_t gather_token_lost; uint64_t commit_entered; uint64_t commit_token_lost; uint64_t recovery_entered; uint64_t recovery_token_lost; uint64_t consensus_timeouts; uint64_t rx_msg_dropped; uint32_t continuous_gather; int earliest_token; int latest_token; #define TOTEM_TOKEN_STATS_MAX 100 totemsrp_token_stats_t token[TOTEM_TOKEN_STATS_MAX]; } totemsrp_stats_t; #define TOTEM_CONFIGURATION_TYPE typedef struct { totem_stats_header_t hdr; totemsrp_stats_t *srp; } totemmrp_stats_t; typedef struct { totem_stats_header_t hdr; totemmrp_stats_t *mrp; uint32_t msg_reserved; uint32_t msg_queue_avail; } totempg_stats_t; #endif /* TOTEM_H_DEFINED */ diff --git a/man/corosync.conf.5 b/man/corosync.conf.5 index 7cd1824f..e7722da1 100644 --- a/man/corosync.conf.5 +++ b/man/corosync.conf.5 @@ -1,641 +1,629 @@ .\"/* .\" * Copyright (c) 2005 MontaVista Software, Inc. .\" * Copyright (c) 2006-2012 Red Hat, Inc. .\" * .\" * All rights reserved. .\" * .\" * Author: Steven Dake (sdake@redhat.com) .\" * .\" * This software licensed under BSD license, the text of which follows: .\" * .\" * Redistribution and use in source and binary forms, with or without .\" * modification, are permitted provided that the following conditions are met: .\" * .\" * - Redistributions of source code must retain the above copyright notice, .\" * this list of conditions and the following disclaimer. .\" * - Redistributions in binary form must reproduce the above copyright notice, .\" * this list of conditions and the following disclaimer in the documentation .\" * and/or other materials provided with the distribution. .\" * - Neither the name of the MontaVista Software, Inc. nor the names of its .\" * contributors may be used to endorse or promote products derived from this .\" * software without specific prior written permission. .\" * .\" * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" .\" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE .\" * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR .\" * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF .\" * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS .\" * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN .\" * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) .\" * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF .\" * THE POSSIBILITY OF SUCH DAMAGE. .\" */ .TH COROSYNC_CONF 5 2012-01-12 "corosync Man Page" "Corosync Cluster Engine Programmer's Manual" .SH NAME corosync.conf - corosync executive configuration file .SH SYNOPSIS /etc/corosync/corosync.conf .SH DESCRIPTION The corosync.conf instructs the corosync executive about various parameters needed to control the corosync executive. Empty lines and lines starting with # character are ignored. The configuration file consists of bracketed top level directives. The possible directive choices are: .TP totem { } This top level directive contains configuration options for the totem protocol. .TP logging { } This top level directive contains configuration options for logging. .TP quorum { } This top level directive contains configuration options for quorum. .TP nodelist { } This top level directive contains configuration options for nodes in cluster. .PP .PP Within the .B totem directive, an interface directive is required. There is also one configuration option which is required: .PP .PP Within the .B interface sub-directive of totem there are four parameters which are required. There is one parameter which is optional. .TP ringnumber This specifies the ring number for the interface. When using the redundant ring protocol, each interface should specify separate ring numbers to uniquely identify to the membership protocol which interface to use for which redundant ring. The ringnumber must start at 0. .TP bindnetaddr This specifies the network address the corosync executive should bind to. For example, if the local interface is 192.168.5.92 with netmask 255.255.255.0, set bindnetaddr to 192.168.5.0. If the local interface is 192.168.5.92 with netmask 255.255.255.192, set bindnetaddr to 192.168.5.64, and so forth. This may also be an IPV6 address, in which case IPV6 networking will be used. In this case, the full address must be specified and there is no automatic selection of the network interface within a specific subnet as with IPv4. If IPv6 networking is used, the nodeid field in nodelist must be specified. .TP broadcast This is optional and can be set to yes. If it is set to yes, the broadcast address will be used for communication. If this option is set, mcastaddr should not be set. .TP mcastaddr This is the multicast address used by corosync executive. The default should work for most networks, but the network administrator should be queried about a multicast address to use. Avoid 224.x.x.x because this is a "config" multicast address. This may also be an IPV6 multicast address, in which case IPV6 networking will be used. If IPv6 networking is used, the nodeid field in nodelist must be specified. It's not needed to use this option if cluster_name option is used. If both options are used, mcastaddr has higher priority. .TP mcastport This specifies the UDP port number. It is possible to use the same multicast address on a network with the corosync services configured for different UDP ports. Please note corosync uses two UDP ports mcastport (for mcast receives) and mcastport - 1 (for mcast sends). If you have multiple clusters on the same network using the same mcastaddr please configure the mcastports with a gap. .TP ttl This specifies the Time To Live (TTL). If you run your cluster on a routed network then the default of "1" will be too small. This option provides a way to increase this up to 255. The valid range is 0..255. Note that this is only valid on multicast transport types. .PP .PP Within the .B totem directive, there are seven configuration options of which one is required, five are optional, and one is required when IPV6 is configured in the interface subdirective. The required directive controls the version of the totem configuration. The optional option unless using IPV6 directive controls identification of the processor. The optional options control secrecy and authentication, the redundant ring mode of operation and maximum network MTU field. .TP version This specifies the version of the configuration file. Currently the only valid version for this directive is 2. .PP .PP .TP clear_node_high_bit This configuration option is optional and is only relevant when no nodeid is specified. Some corosync clients require a signed 32 bit nodeid that is greater than zero however by default corosync uses all 32 bits of the IPv4 address space when generating a nodeid. Set this option to yes to force the high bit to be zero and therefor ensure the nodeid is a positive signed 32 bit integer. WARNING: The clusters behavior is undefined if this option is enabled on only a subset of the cluster (for example during a rolling upgrade). .TP secauth This specifies that HMAC/SHA1 authentication should be used to authenticate all messages. It further specifies that all data should be encrypted with the -sober128 encryption algorithm to protect data from eavesdropping. - -Enabling this option adds a 36 byte header to every message sent by totem which -reduces total throughput. Encryption and authentication consume 75% of CPU -cycles in aisexec as measured with gprof when enabled. - -For 100mbit networks with 1500 MTU frame transmissions: -A throughput of 9mb/sec is possible with 100% cpu utilization when this -option is enabled on 3ghz cpus. -A throughput of 10mb/sec is possible wth 20% cpu utilization when this -optin is disabled on 3ghz cpus. - -For gig-e networks with large frame transmissions: -A throughput of 20mb/sec is possible when this option is enabled on -3ghz cpus. -A throughput of 60mb/sec is possible when this option is disabled on -3ghz cpus. +nss library and aes256 encryption algorithm to protect data from eavesdropping. + +Enabling this option adds a encryption header to every message sent by totem which +reduces total throughput. Also encryption and authentication consume extra CPU +cycles in corosync. The default is on. .TP rrp_mode This specifies the mode of redundant ring, which may be none, active, or passive. Active replication offers slightly lower latency from transmit to delivery in faulty network environments but with less performance. Passive replication may nearly double the speed of the totem protocol if the protocol doesn't become cpu bound. The final option is none, in which case only one network interface will be used to operate the totem protocol. If only one interface directive is specified, none is automatically chosen. If multiple interface directives are specified, only active or passive may be chosen. .TP netmtu This specifies the network maximum transmit unit. To set this value beyond 1500, the regular frame MTU, requires ethernet devices that support large, or also called jumbo, frames. If any device in the network doesn't support large frames, the protocol will not operate properly. The hosts must also have their mtu size set from 1500 to whatever frame size is specified here. Please note while some NICs or switches claim large frame support, they support 9000 MTU as the maximum frame size including the IP header. Setting the netmtu and host MTUs to 9000 will cause totem to use the full 9000 bytes of the frame. Then Linux will add a 18 byte header moving the full frame size to 9018. As a result some hardware will not operate properly with this size of data. A netmtu of 8982 seems to work for the few large frame devices that have been tested. Some manufacturers claim large frame support when in fact they support frame sizes of 4500 bytes. When sending multicast traffic, if the network frequently reconfigures, chances are that some device in the network doesn't support large frames. Choose hardware carefully if intending to use large frame support. The default is 1500. .TP vsftype This directive controls the virtual synchrony filter type used to identify a primary component. The preferred choice is YKD dynamic linear voting, however, for clusters larger then 32 nodes YKD consumes alot of memory. For large scale clusters that are created by changing the MAX_PROCESSORS_COUNT #define in the C code totem.h file, the virtual synchrony filter "none" is recommended but then AMF and DLCK services (which are currently experimental) are not safe for use. The default is ykd. The vsftype can also be set to none. .TP transport This directive controls the transport mechanism used. If the interface to which corosync is binding is an RDMA interface such as RoCEE or Infiniband, the "iba" parameter may be specified. To avoid the use of multicast entirely, a unicast transport parameter "udpu" can be specified. This requires specifying the list of members in nodelist directive, that could potentially make up the membership before deployment. The default is udp. The transport type can also be set to udpu or iba. .TP cluster_name This specifies the name of cluster and it's used for automatic generating of multicast address. Within the .B totem directive, there are several configuration options which are used to control the operation of the protocol. It is generally not recommended to change any of these values without proper guidance and sufficient testing. Some networks may require larger values if suffering from frequent reconfigurations. Some applications may require faster failure detection times which can be achieved by reducing the token timeout. .TP token This timeout specifies in milliseconds until a token loss is declared after not receiving a token. This is the time spent detecting a failure of a processor in the current configuration. Reforming a new configuration takes about 50 milliseconds in addition to this timeout. The default is 1000 milliseconds. .TP token_retransmit This timeout specifies in milliseconds after how long before receiving a token the token is retransmitted. This will be automatically calculated if token is modified. It is not recommended to alter this value without guidance from the corosync community. The default is 238 milliseconds. .TP hold This timeout specifies in milliseconds how long the token should be held by the representative when the protocol is under low utilization. It is not recommended to alter this value without guidance from the corosync community. The default is 180 milliseconds. .TP token_retransmits_before_loss_const This value identifies how many token retransmits should be attempted before forming a new configuration. If this value is set, retransmit and hold will be automatically calculated from retransmits_before_loss and token. The default is 4 retransmissions. .TP join This timeout specifies in milliseconds how long to wait for join messages in the membership protocol. The default is 50 milliseconds. .TP send_join This timeout specifies in milliseconds an upper range between 0 and send_join to wait before sending a join message. For configurations with less then 32 nodes, this parameter is not necessary. For larger rings, this parameter is necessary to ensure the NIC is not overflowed with join messages on formation of a new ring. A reasonable value for large rings (128 nodes) would be 80msec. Other timer values must also change if this value is changed. Seek advice from the corosync mailing list if trying to run larger configurations. The default is 0 milliseconds. .TP consensus This timeout specifies in milliseconds how long to wait for consensus to be achieved before starting a new round of membership configuration. The minimum value for consensus must be 1.2 * token. This value will be automatically calculated at 1.2 * token if the user doesn't specify a consensus value. For two node clusters, a consensus larger then the join timeout but less then token is safe. For three node or larger clusters, consensus should be larger then token. There is an increasing risk of odd membership changes, which stil guarantee virtual synchrony, as node count grows if consensus is less than token. The default is 1200 milliseconds. .TP merge This timeout specifies in milliseconds how long to wait before checking for a partition when no multicast traffic is being sent. If multicast traffic is being sent, the merge detection happens automatically as a function of the protocol. The default is 200 milliseconds. .TP downcheck This timeout specifies in milliseconds how long to wait before checking that a network interface is back up after it has been downed. The default is 1000 millseconds. .TP fail_recv_const This constant specifies how many rotations of the token without receiving any of the messages when messages should be received may occur before a new configuration is formed. The default is 2500 failures to receive a message. .TP seqno_unchanged_const This constant specifies how many rotations of the token without any multicast traffic should occur before the merge detection timeout is started. The default is 30 rotations. .TP heartbeat_failures_allowed [HeartBeating mechanism] Configures the optional HeartBeating mechanism for faster failure detection. Keep in mind that engaging this mechanism in lossy networks could cause faulty loss declaration as the mechanism relies on the network for heartbeating. So as a rule of thumb use this mechanism if you require improved failure in low to medium utilized networks. This constant specifies the number of heartbeat failures the system should tolerate before declaring heartbeat failure e.g 3. Also if this value is not set or is 0 then the heartbeat mechanism is not engaged in the system and token rotation is the method of failure detection The default is 0 (disabled). .TP max_network_delay [HeartBeating mechanism] This constant specifies in milliseconds the approximate delay that your network takes to transport one packet from one machine to another. This value is to be set by system engineers and please dont change if not sure as this effects the failure detection mechanism using heartbeat. The default is 50 milliseconds. .TP window_size This constant specifies the maximum number of messages that may be sent on one token rotation. If all processors perform equally well, this value could be large (300), which would introduce higher latency from origination to delivery for very large rings. To reduce latency in large rings(16+), the defaults are a safe compromise. If 1 or more slow processor(s) are present among fast processors, window_size should be no larger then 256000 / netmtu to avoid overflow of the kernel receive buffers. The user is notified of this by the display of a retransmit list in the notification logs. There is no loss of data, but performance is reduced when these errors occur. The default is 50 messages. .TP max_messages This constant specifies the maximum number of messages that may be sent by one processor on receipt of the token. The max_messages parameter is limited to 256000 / netmtu to prevent overflow of the kernel transmit buffers. The default is 17 messages. .TP miss_count_const This constant defines the maximum number of times on receipt of a token a message is checked for retransmission before a retransmission occurs. This parameter is useful to modify for switches that delay multicast packets compared to unicast packets. The default setting works well for nearly all modern switches. The default is 5 messages. .TP rrp_problem_count_timeout This specifies the time in milliseconds to wait before decrementing the problem count by 1 for a particular ring to ensure a link is not marked faulty for transient network failures. The default is 2000 milliseconds. .TP rrp_problem_count_threshold This specifies the number of times a problem is detected with a link before setting the link faulty. Once a link is set faulty, no more data is transmitted upon it. Also, the problem counter is no longer decremented when the problem count timeout expires. A problem is detected whenever all tokens from the proceeding processor have not been received within the rrp_token_expired_timeout. The rrp_problem_count_threshold * rrp_token_expired_timeout should be atleast 50 milliseconds less then the token timeout, or a complete reconfiguration may occur. The default is 10 problem counts. .TP rrp_problem_count_mcast_threshold This specifies the number of times a problem is detected with multicast before setting the link faulty for passive rrp mode. This variable is unused in active rrp mode. The default is 10 times rrp_problem_count_threshold. .TP rrp_token_expired_timeout This specifies the time in milliseconds to increment the problem counter for the redundant ring protocol after not having received a token from all rings for a particular processor. This value will automatically be calculated from the token timeout and problem_count_threshold but may be overridden. It is not recommended to override this value without guidance from the corosync community. The default is 47 milliseconds. .TP rrp_autorecovery_check_timeout This specifies the time in milliseconds to check if the failed ring can be auto-recovered. The default is 1000 milliseconds. .PP Within the .B logging directive, there are several configuration options which are all optional. .PP The following 3 options are valid only for the top level logging directive: .TP timestamp This specifies that a timestamp is placed on all log messages. The default is off. .TP fileline This specifies that file and line should be printed. The default is off. .TP function_name This specifies that the code function name should be printed. The default is off. .PP The following options are valid both for top level logging directive and they can be overriden in logger_subsys entries. .TP to_stderr .TP to_logfile .TP to_syslog These specify the destination of logging output. Any combination of these options may be specified. Valid options are .B yes and .B no. The default is syslog and stderr. Please note, if you are using to_logfile and want to rotate the file, use logrotate(8) with the option .B copytruncate. eg. .IP .RS .ne 18 .nf .ta 4n 30n 33n /var/log/corosync.log { missingok compress notifempty daily rotate 7 copytruncate } .ta .fi .RE .IP .PP .TP logfile If the .B to_logfile directive is set to .B yes , this option specifies the pathname of the log file. No default. .TP logfile_priority This specifies the logfile priority for this particular subsystem. Ignored if debug is on. Possible values are: alert, crit, debug (same as debug = on), emerg, err, info, notice, warning. The default is: info. .TP syslog_facility This specifies the syslog facility type that will be used for any messages sent to syslog. options are daemon, local0, local1, local2, local3, local4, local5, local6 & local7. The default is daemon. .TP syslog_priority This specifies the syslog level for this particular subsystem. Ignored if debug is on. Possible values are: alert, crit, debug (same as debug = on), emerg, err, info, notice, warning. The default is: info. .TP debug This specifies whether debug output is logged for this particular logger. The default is off. .PP Within the .B logging directive, logger_subsys directives are optional. .PP Within the .B logger_subsys sub-directive, all of the above logging configuration options are valid and can be used to override the default settings. The subsys entry, described below, is mandatory to identify the subsystem. .TP subsys This specifies the subsystem identity (name) for which logging is specified. This is the name used by a service in the log_init () call. E.g. 'CPG'. This directive is required. .PP Within the .B quorum directive it is possible to specify the quorum algorithm to use with the .TP provider directive. At the time of writing only corosync_votequorum is supported. Please refer to quorum modules man pages (8) for specific config options. .PP Within the .B nodelist directive it is possible to specify specific informations about nodes in cluster. Directive can contain only .B node sub-directive, which specifies every node that should be a member of the membership, and where non-default options are needed. Every node must have at least ring0_addr field filled. For UDPU, every node that should be a member of the membership must be specified. Possible options are: .TP ringX_addr This specifies ip address of one of the nodes. X is ring number. .TP nodeid This configuration option is optional when using IPv4 and required when using IPv6. This is a 32 bit value specifying the node identifier delivered to the cluster membership service. If this is not specified with IPv4, the node id will be determined from the 32 bit IP address the system to which the system is bound with ring identifier of 0. The node identifier value of zero is reserved and should not be used. .TP quorum_votes .SH "FILES" .TP /etc/corosync/corosync.conf The corosync executive configuration file. .SH "SEE ALSO" .BR corosync_overview (8), .BR votequorum (5), .BR logrotate (8) .PP diff --git a/man/corosync_overview.8 b/man/corosync_overview.8 index 999ced9b..459ccfc9 100644 --- a/man/corosync_overview.8 +++ b/man/corosync_overview.8 @@ -1,195 +1,195 @@ .\"/* .\" * Copyright (c) 2005 MontaVista Software, Inc. .\" * Copyright (c) 2006-2009 Red Hat, Inc. .\" * .\" * All rights reserved. .\" * .\" * Author: Steven Dake (sdake@redhat.com) .\" * .\" * This software licensed under BSD license, the text of which follows: .\" * .\" * Redistribution and use in source and binary forms, with or without .\" * modification, are permitted provided that the following conditions are met: .\" * .\" * - Redistributions of source code must retain the above copyright notice, .\" * this list of conditions and the following disclaimer. .\" * - Redistributions in binary form must reproduce the above copyright notice, .\" * this list of conditions and the following disclaimer in the documentation .\" * and/or other materials provided with the distribution. .\" * - Neither the name of the MontaVista Software, Inc. nor the names of its .\" * contributors may be used to endorse or promote products derived from this .\" * software without specific prior written permission. .\" * .\" * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" .\" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE .\" * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR .\" * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF .\" * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS .\" * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN .\" * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) .\" * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF .\" * THE POSSIBILITY OF SUCH DAMAGE. .\" */ .TH COROSYNC_OVERVIEW 8 2012-02-13 "corosync Man Page" "Corosync Cluster Engine Programmer's Manual" .SH NAME corosync_overview \- Corosync overview .SH OVERVIEW The corosync project's purpose is to implement and support a production quality Revised BSD licensed implementation of a high performance low overhead high availability development toolkit. Faults occur for various reasons: .PP * Application Faults .PP * Middleware Faults .PP * Operating System Faults .PP * Hardware Faults The major focus of high availability in the past has been to mask hardware faults. Faults in other components of the system have gone unsolved until Corosync. Corosync is designed for applications to replicate their state to up to 16 processors. The processors all contain a replica of the application state. The corosync project provides a group message API called CPG. The project developers recommend CPG be used for most applications. The CPG service implements a closed group messaging model presenting extended virtual synchrony guarantees. To manage conditions where the process executing the CPG application exchange fails, we provide the Simple Availability Manager (sam) to provide simple application restart. .SH QUICKSTART The corosync executive must be configured. In the directory conf in the source distribution are several files that must be copied to the /etc/corosync directory. If corosync is packaged by a distro, this may be complete. The directory contains the file corosync.conf. Please read the corosync.conf(5) man page for details on the configuration options. The corosync project will work out of the box with the default configuration options, although the administrator may desire different options. The corosync executive uses cryptographic techniques to ensure authenticity and privacy of the messages. In order for corosync to be secure and operate, a private key must be generated and shared to all processors. First generate the key on one of the nodes: unix# corosync-keygen .br Corosync Cluster Engine Authentication key generator. .br Gathering 1024 bits for key from /dev/random. .br Press keys on your keyboard to generate entropy. .br Writing corosync key to /etc/corosync/authkey. .PP After this operation, a private key will be in the file /etc/corosync/authkey. This private key must be copied to every processor in the cluster. If the private key isn't the same for every node, those nodes with nonmatching private keys will not be able to join the same configuration. Copy the key to some security transportable storage or use ssh to transmit the key from node to node. Then install the key with the command: unix#: install -D --group=0 --owner=0 --mode=0400 /path_to_authkey/authkey /etc/corosync/authkey If a message "Invalid digest" appears from the corosync executive, the keys are not consistent between processors. Finally run the corosync executive. If corosync is packaged from a distro, it may be set to start on system start. It may also be turned off by default in which case the init script for corosync must be enabled. .SH USING LIBRARIES The corosync libraries have header files which must be included in the developer's application. Once the header file is included, the developer can reference the corosync interfaces. The corosync project recommends to distros to place include files in /usr/include/corosync. .SH IPv6 The corosync project supports both IPv4 and IPv6 network addresses. The entire cluster must use either IPv4 or IPv6 for the cluster communication mechanism. In order to use IPv6, IPv6 addresses must be specified in the bindnetaddr and mcastaddr fields in the configuration file. The nodeid field must also be set. An example of this is: nodeid: 2 bindnetaddr: fec0::1:a800:4ff:fe00:20 mcastaddr: ff05::1 To configure a host for IPv6, use the ifconfig program to add interfaces: box20: ifconfig eth0 add fec0::1:a800:4ff:fe00:20/64 box30: ifconfig eth0 add fec0::1:a800:4ff:fe00:30/64 If the /64 is not specified, a route for the IPv6 network will not be configured which will cause significant problems. Make sure a route is available for IPv6 traffic. .SH ARCHITECTURE The corosync libraries are a thin IPC interface to the corosync executive. The corosync executive implements the functionality of the corosync APIs for distributed coming. The corosync executive uses the Totem extended virtual synchrony protocol. The advantage to the end user is excellent performance characteristics and a proven protocol with excellent reliability. This protocol connects the processors in a configuration together so they may communicate. .SH ENVIRONMENT VARIABLES The corosync executive process uses four environment variables during startup. If these environment variables are not set, defaults will be used. .TP COROSYNC_MAIN_CONFIG_FILE This specifies the fully qualified path to the corosync configuration file. The default is /etc/corosync/corosync.conf. .TP COROSYNC_TOTEM_AUTHKEY_FILE This specifies the fully qualified path to the shared key used to authenticate and encrypt data used within the Totem protocol. The default is /etc/corosync/authkey. .SH SECURITY The corosync executive optionally encrypts all messages sent over the network -using the SOBER-128 stream cipher. The corosync executive uses HMAC and SHA1 to -authenticate all messages. The corosync executive library uses SOBER-128 +using the AES-128 cipher. The corosync executive uses HMAC and SHA1 to +authenticate all messages. The corosync executive library uses NSS as a pseudo random number generator. If membership messages can be captured by intruders, it is possible to execute a denial of service attack on the cluster. In this scenario, the cluster is likely already compromised and a DOS attack is the least of the administration's worries. The security in corosync does not offer perfect forward secrecy because the keys are reused. It may be possible for an intruder by capturing packets in an automated fashion to determine the shared key. No such automated attack has been published as of yet. In this scenario, the cluster is likely already compromised to allow the long-term capture of transmitted data. For security reasons, the corosync executive binary should NEVER be setuid or setgid in the filesystem. .SH BUGS None that are known. .SH "SEE ALSO" .BR corosync.conf (5), .BR corosync-keygen (8), .BR cpg_overview (8), .BR sam_overview (8) .PP diff --git a/test/Makefile.am b/test/Makefile.am index a7895f32..7010c50a 100644 --- a/test/Makefile.am +++ b/test/Makefile.am @@ -1,89 +1,90 @@ # # Copyright (c) 2009 Red Hat, Inc. # # Authors: Andrew Beekhof # Steven Dake (sdake@redhat.com) # # This software licensed under BSD license, the text of which follows: # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # - Redistributions of source code must retain the above copyright notice, # this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # - Neither the name of the MontaVista Software, Inc. nor the names of its # contributors may be used to endorse or promote products derived from this # software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF # THE POSSIBILITY OF SUCH DAMAGE. MAINTAINERCLEANFILES = Makefile.in INCLUDES = -I$(top_builddir)/include/corosync -I$(top_srcdir)/include EXTRA_DIST = ploadstart.sh noinst_PROGRAMS = cpgverify testcpg testcpg2 cpgbench \ testquorum testvotequorum1 testvotequorum2 \ stress_cpgfdget stress_cpgcontext cpgbound testsam \ testcpgzc cpgbenchzc testzcgc stress_cpgzc noinst_SCRIPTS = ploadstart COMMON_OPTS = -L../lib -L../common_lib -Wl,-rpath,../common_lib testcpg_LDADD = -lcpg $(LIBQB_LIBS) testcpg_LDFLAGS = $(COMMON_OPTS) testcpg2_LDADD = -lcpg $(LIBQB_LIBS) testcpg2_LDFLAGS = $(COMMON_OPTS) testcpgzc_LDADD = -lcpg $(LIBQB_LIBS) testcpgzc_LDFLAGS = $(COMMON_OPTS) testzcgc_LDADD = -lcpg $(LIBQB_LIBS) testzcgc_LDFLAGS = $(COMMON_OPTS) stress_cpgzc_LDADD = -lcpg $(LIBQB_LIBS) stress_cpgzc_LDFLAGS = $(COMMON_OPTS) stress_cpgfdget_LDADD = -lcpg $(LIBQB_LIBS) stress_cpgfdget_LDFLAGS = $(COMMON_OPTS) stress_cpgcontext_LDADD = -lcpg $(LIBQB_LIBS) stress_cpgcontext_LDFLAGS = $(COMMON_OPTS) testquorum_LDADD = -lquorum $(LIBQB_LIBS) testquorum_LDFLAGS = $(COMMON_OPTS) testvotequorum1_LDADD = -lvotequorum $(LIBQB_LIBS) testvotequorum1_LDFLAGS = $(COMMON_OPTS) testvotequorum2_LDADD = -lvotequorum $(LIBQB_LIBS) testvotequorum2_LDFLAGS = $(COMMON_OPTS) -cpgverify_LDADD = -lcpg -ltotem_pg $(LIBQB_LIBS) +cpgverify_CFLAGS = $(nss_CFLAGS) +cpgverify_LDADD = -lcpg -ltotem_pg $(LIBQB_LIBS) $(nss_LIBS) cpgverify_LDFLAGS = $(COMMON_OPTS) -L../exec cpgbound_LDADD = -lcpg $(LIBQB_LIBS) cpgbound_LDFLAGS = $(COMMON_OPTS) cpgbench_LDADD = -lcpg $(LIBQB_LIBS) cpgbench_LDFLAGS = $(COMMON_OPTS) cpgbenchzc_LDADD = -lcpg -lcorosync_common $(LIBQB_LIBS) cpgbenchzc_LDFLAGS = $(COMMON_OPTS) testsam_LDADD = -lsam -lcmap -lquorum $(LIBQB_LIBS) testsam_LDFLAGS = $(COMMON_OPTS) ploadstart: ploadstart.sh cp $^ $@ chmod 755 $@ LINT_FILES1:=$(filter-out sa_error.c, $(wildcard *.c)) LINT_FILES:=$(filter-out testparse.c, $(LINT_FILES1)) lint: -for f in $(LINT_FILES) ; do echo Splint $$f ; splint $(INCLUDES) $(LINT_FLAGS) $(CFLAGS) $$f ; done clean-local: rm -f fdata ploadstart diff --git a/test/cpgverify.c b/test/cpgverify.c index 56a9b8b8..350c6b2d 100644 --- a/test/cpgverify.c +++ b/test/cpgverify.c @@ -1,173 +1,190 @@ /* * Copyright (c) 2009 Red Hat, Inc. * * All rights reserved. * * Author: Steven Dake (sdake@redhat.com) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include -#include "../exec/crypto.h" + +#include +#include struct my_msg { unsigned int msg_size; unsigned char sha1[20]; unsigned char buffer[0]; }; static int deliveries = 0; +PK11Context* sha1_context; + static void cpg_deliver_fn ( cpg_handle_t handle, const struct cpg_name *group_name, uint32_t nodeid, uint32_t pid, void *m, size_t msg_len) { const struct my_msg *msg2 = m; unsigned char sha1_compare[20]; - hash_state sha1_hash; unsigned int i; + unsigned int sha1_len; printf ("msg '%s'\n", msg2->buffer); - sha1_init (&sha1_hash); - sha1_process (&sha1_hash, msg2->buffer, msg2->msg_size); - sha1_done (&sha1_hash, sha1_compare); + PK11_DigestBegin(sha1_context); + PK11_DigestOp(sha1_context, msg2->buffer, msg2->msg_size); + PK11_DigestFinal(sha1_context, sha1_compare, &sha1_len, sizeof(sha1_compare)); printf ("SIZE %d HASH: ", msg2->msg_size); for (i = 0; i < 20; i++) { printf ("%x", sha1_compare[i]); } printf ("\n"); if (memcmp (sha1_compare, msg2->sha1, 20) != 0) { printf ("incorrect hash\n"); exit (1); } deliveries++; } static void cpg_confchg_fn ( cpg_handle_t handle, const struct cpg_name *group_name, const struct cpg_address *member_list, size_t member_list_entries, const struct cpg_address *left_list, size_t left_list_entries, const struct cpg_address *joined_list, size_t joined_list_entries) { } static cpg_callbacks_t callbacks = { cpg_deliver_fn, cpg_confchg_fn }; static struct cpg_name group_name = { .value = "cpg_bm", .length = 6 }; static unsigned char buffer[200000]; int main (int argc, char *argv[]) { cpg_handle_t handle; cs_error_t result; int i = 0; int j; struct my_msg msg; - hash_state sha1_hash; struct iovec iov[2]; const char *options = "i:"; int iter = 1000; int opt; int run_forever = 1; + unsigned int sha1_len; while ((opt = getopt(argc, argv, options)) != -1) { switch (opt) { case 'i': run_forever = 0; iter = atoi(optarg); break; } } result = cpg_initialize (&handle, &callbacks); if (result != CS_OK) { printf ("Couldn't initialize CPG service %d\n", result); exit (0); } + if (NSS_NoDB_Init(".") != SECSuccess) { + printf ("Couldn't initialize nss\n"); + exit (0); + } + + if ((sha1_context = PK11_CreateDigestContext(SEC_OID_SHA1)) == NULL) { + printf ("Couldn't initialize nss\n"); + exit (0); + } + result = cpg_join (handle, &group_name); if (result != CS_OK) { printf ("cpg_join failed with result %d\n", result); exit (1); } iov[0].iov_base = (void *)&msg; iov[0].iov_len = sizeof (struct my_msg); iov[1].iov_base = (void *)buffer; /* * Demonstrate cpg_mcast_joined */ i = 0; do { msg.msg_size = 100 + rand() % 100000; iov[1].iov_len = msg.msg_size; for (j = 0; j < msg.msg_size; j++) { buffer[j] = j; } sprintf ((char *)buffer, "cpg_mcast_joined: This is message %12d", i); - sha1_init (&sha1_hash); - sha1_process (&sha1_hash, buffer, msg.msg_size); - sha1_done (&sha1_hash, msg.sha1); + + PK11_DigestBegin(sha1_context); + PK11_DigestOp(sha1_context, buffer, msg.msg_size); + PK11_DigestFinal(sha1_context, msg.sha1, &sha1_len, sizeof(msg.sha1)); try_again_one: result = cpg_mcast_joined (handle, CPG_TYPE_AGREED, iov, 2); if (result == CS_ERR_TRY_AGAIN) { goto try_again_one; } result = cpg_dispatch (handle, CS_DISPATCH_ALL); i++; } while (run_forever || i < iter); + PK11_DestroyContext(sha1_context, PR_TRUE); + cpg_finalize (handle); return (0); } diff --git a/test/stress_cpgcontext.c b/test/stress_cpgcontext.c index 0e03f40d..ba14f63d 100644 --- a/test/stress_cpgcontext.c +++ b/test/stress_cpgcontext.c @@ -1,135 +1,134 @@ /* * Copyright (c) 2009 Red Hat, Inc. * * All rights reserved. * * Author: Steven Dake (sdake@redhat.com) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include -#include "../exec/crypto.h" struct my_msg { unsigned int msg_size; unsigned char sha1[20]; unsigned char buffer[0]; }; static void cpg_deliver_fn ( cpg_handle_t handle, const struct cpg_name *group_name, uint32_t nodeid, uint32_t pid, void *m, size_t msg_len) { } static void cpg_confchg_fn ( cpg_handle_t handle, const struct cpg_name *group_name, const struct cpg_address *member_list, size_t member_list_entries, const struct cpg_address *left_list, size_t left_list_entries, const struct cpg_address *joined_list, size_t joined_list_entries) { } static cpg_callbacks_t callbacks = { cpg_deliver_fn, cpg_confchg_fn }; static void sigintr_handler (int num) { exit (1); } #define ITERATIONS (1000000) #define INSTANCES 100 int main (void) { cpg_handle_t handle[INSTANCES]; cs_error_t res; void *context[INSTANCES]; int i, j; void *ctx; signal (SIGINT, sigintr_handler); for (i = 0; i < INSTANCES; i++) { res = cpg_initialize (&handle[i], &callbacks); if (res != CS_OK) { printf ("FAIL %d\n", res); exit (-1); } } for (j = 0; j < ITERATIONS; j++) { for (i = 0; i < INSTANCES; i++) { context[i] = malloc (20); res = cpg_context_set (handle[i], context[i]); if (res != CS_OK) { printf ("FAIL %d\n", res); exit (-1); } } for (i = 0; i < INSTANCES; i++) { res = cpg_context_get (handle[i], &ctx); if (res != CS_OK) { printf ("FAIL %d\n", res); exit (-1); } if (ctx != context[i]) { printf ("FAIL\n"); exit (-1); } free (ctx); } } for (i = 0; i < INSTANCES; i++) { cpg_finalize (handle[i]); } printf ("PASS\n"); return (0); } diff --git a/test/stress_cpgfdget.c b/test/stress_cpgfdget.c index e7b3fe63..af1357e2 100644 --- a/test/stress_cpgfdget.c +++ b/test/stress_cpgfdget.c @@ -1,118 +1,117 @@ /* * Copyright (c) 2009 Red Hat, Inc. * * All rights reserved. * * Author: Steven Dake (sdake@redhat.com) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include -#include "../exec/crypto.h" struct my_msg { unsigned int msg_size; unsigned char sha1[20]; unsigned char buffer[0]; }; static void cpg_deliver_fn ( cpg_handle_t handle, const struct cpg_name *group_name, uint32_t nodeid, uint32_t pid, void *m, size_t msg_len) { } static void cpg_confchg_fn ( cpg_handle_t handle, const struct cpg_name *group_name, const struct cpg_address *member_list, size_t member_list_entries, const struct cpg_address *left_list, size_t left_list_entries, const struct cpg_address *joined_list, size_t joined_list_entries) { } static cpg_callbacks_t callbacks = { cpg_deliver_fn, cpg_confchg_fn }; static void sigintr_handler (int num) { exit (1); } #define ITERATIONS (1000*2000) int main (void) { cpg_handle_t handle; cs_error_t res; int original_fd; int i; int fd; signal (SIGINT, sigintr_handler); res = cpg_initialize (&handle, &callbacks); if (res != CS_OK) { printf ("FAIL %d\n", res); exit (-1); } res = cpg_fd_get (handle, &original_fd); if (res != CS_OK) { printf ("FAIL %d\n", res); } for (i = 0; i < ITERATIONS; i++) { res = cpg_fd_get (handle, &fd); if (original_fd != fd) { printf ("FAIL\n"); exit (-1); } } cpg_finalize (handle); printf ("PASS\n"); return (0); }