Page MenuHomeClusterLabs Projects

No OneTemporary

This file is larger than 256 KB, so syntax highlighting was skipped.
diff --git a/configure.ac b/configure.ac
index 0857932b..1993b032 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,611 +1,623 @@
# -*- Autoconf -*-
# Process this file with autoconf to produce a configure script.
# bootstrap / init
AC_PREREQ([2.61])
AC_INIT([corosync],
m4_esyscmd([build-aux/git-version-gen .tarball-version]),
[discuss@lists.corosync.org])
AC_USE_SYSTEM_EXTENSIONS
AM_INIT_AUTOMAKE([-Wno-portability])
LT_PREREQ([2.2.6])
LT_INIT
AM_SILENT_RULES([yes])
AC_CONFIG_SRCDIR([lib/cpg.c])
AC_CONFIG_HEADER([include/corosync/config.h])
AC_CONFIG_MACRO_DIR([m4])
AC_CANONICAL_HOST
AC_LANG([C])
dnl Fix default variables - "prefix" variable if not specified
if test "$prefix" = "NONE"; then
prefix="/usr"
dnl Fix "localstatedir" variable if not specified
if test "$localstatedir" = "\${prefix}/var"; then
localstatedir="/var"
fi
dnl Fix "sysconfdir" variable if not specified
if test "$sysconfdir" = "\${prefix}/etc"; then
sysconfdir="/etc"
fi
dnl Fix "libdir" variable if not specified
if test "$libdir" = "\${exec_prefix}/lib"; then
if test -e /usr/lib64; then
libdir="/usr/lib64"
else
libdir="/usr/lib"
fi
fi
fi
if test "$srcdir" = "."; then
AC_MSG_NOTICE([building in place srcdir:$srcdir])
AC_DEFINE([BUILDING_IN_PLACE], 1, [building in place])
else
AC_MSG_NOTICE([building out of tree srcdir:$srcdir])
fi
# Checks for programs.
# check stolen from gnulib/m4/gnu-make.m4
if ! ${MAKE-make} --version /cannot/make/this >/dev/null 2>&1; then
AC_MSG_ERROR([you don't seem to have GNU make; it is required])
fi
sinclude(coroysync-default.m4)
AC_PROG_CC
AC_PROG_CXX
AC_PROG_INSTALL
AC_PROG_LN_S
AC_PROG_MAKE_SET
AC_PROG_RANLIB
AC_PROG_SED
AC_PROG_LIBTOOL
AC_CHECK_PROGS([GROFF], [groff])
AC_CHECK_PROGS([PKGCONFIG], [pkg-config])
AC_CHECK_PROGS([AUGTOOL], [augtool])
AC_CHECK_PROGS([DOT], [dot])
AC_CHECK_PROGS([DOXYGEN], [doxygen])
AC_CHECK_PROGS([AWK], [awk])
AC_CHECK_PROGS([SED], [sed])
AC_PATH_PROG([BASHPATH], [bash])
# Checks for compiler characteristics.
AC_PROG_GCC_TRADITIONAL
AC_C_CONST
AC_C_INLINE
AC_C_VOLATILE
# Checks for header files.
AC_HEADER_DIRENT
AC_HEADER_STDC
AC_HEADER_SYS_WAIT
AC_CHECK_HEADERS([arpa/inet.h fcntl.h limits.h netdb.h netinet/in.h stdint.h \
stdlib.h string.h sys/ioctl.h sys/param.h sys/socket.h \
sys/time.h syslog.h unistd.h sys/types.h getopt.h malloc.h \
utmpx.h ifaddrs.h stddef.h sys/file.h sys/uio.h])
+# Check entries in specific structs
AC_CHECK_MEMBER([struct sockaddr_in.sin_len],
[AC_DEFINE_UNQUOTED([HAVE_SOCK_SIN_LEN], [1], [sockaddr_in needs sin_len])],
[], [[#include <netinet/in.h>]])
AC_CHECK_MEMBER([struct sockaddr_in6.sin6_len],
[AC_DEFINE_UNQUOTED([HAVE_SOCK_SIN6_LEN], [1], [sockaddr_in6 needs sin6_len])],
[], [[#include <netinet/in.h>]])
+AC_CHECK_MEMBER([struct msghdr.msg_control],
+ [AC_DEFINE_UNQUOTED([HAVE_MSGHDR_CONTROL], [1], [msghdr has msg_control])],
+ [], [[#include <sys/socket.h>]])
+AC_CHECK_MEMBER([struct msghdr.msg_controllen],
+ [AC_DEFINE_UNQUOTED([HAVE_MSGHDR_CONTROLLEN], [1], [msghdr has msg_controllen])],
+ [], [[#include <sys/socket.h>]])
+AC_CHECK_MEMBER([struct msghdr.msg_flags],
+ [AC_DEFINE_UNQUOTED([HAVE_MSGHDR_FLAGS], [1], [msghdr has msg_flags])],
+ [], [[#include <sys/socket.h>]])
+AC_CHECK_MEMBER([struct msghdr.msg_accrights],
+ [AC_DEFINE_UNQUOTED([HAVE_MSGHDR_ACCRIGHTS], [1], [msghdr has msg_accrights])],
+ [], [[#include <sys/socket.h>]])
+AC_CHECK_MEMBER([struct msghdr.msg_accrightslen],
+ [AC_DEFINE_UNQUOTED([HAVE_MSGHDR_ACCRIGHTSLEN], [1], [msghdr has msg_accrightslen])],
+ [], [[#include <sys/socket.h>]])
# Checks for typedefs.
AC_TYPE_UID_T
AC_TYPE_INT16_T
AC_TYPE_INT32_T
AC_TYPE_INT64_T
AC_TYPE_INT8_T
AC_TYPE_UINT16_T
AC_TYPE_UINT32_T
AC_TYPE_UINT64_T
AC_TYPE_UINT8_T
AC_TYPE_SIZE_T
AC_TYPE_SSIZE_T
AC_TYPE_SIGNAL
# Checks for libraries.
PKG_CHECK_MODULES([nss],[nss])
PKG_CHECK_MODULES([LIBQB], [libqb])
AC_CHECK_LIB([qb], [qb_log_thread_priority_set], \
have_qb_log_thread_priority_set="yes", \
have_qb_log_thread_priority_set="no")
if test "x${have_qb_log_thread_priority_set}" = xyes; then
AC_DEFINE_UNQUOTED([HAVE_QB_LOG_THREAD_PRIORITY_SET], 1, [have qb_log_thread_priority_set])
fi
AC_CHECK_LIB([pthread], [pthread_create])
AC_CHECK_LIB([socket], [socket])
AC_CHECK_LIB([nsl], [t_open])
AC_CHECK_LIB([rt], [sched_getscheduler])
# Checks for library functions.
AC_FUNC_ALLOCA
AC_FUNC_CLOSEDIR_VOID
AC_FUNC_ERROR_AT_LINE
AC_FUNC_FORK
AC_FUNC_MALLOC
AC_FUNC_MEMCMP
AC_FUNC_MMAP
AC_FUNC_REALLOC
AC_FUNC_SELECT_ARGTYPES
AC_FUNC_VPRINTF
AC_CHECK_FUNCS([alarm alphasort atexit bzero dup2 endgrent endpwent fcntl \
getcwd getpeerucred getpeereid gettimeofday inet_ntoa memmove \
memset mkdir scandir select socket strcasecmp strchr strdup \
strerror strrchr strspn strstr pthread_setschedparam \
sched_get_priority_max sched_setscheduler getifaddrs \
clock_gettime ftruncate gethostname localtime_r munmap strtol])
AC_CONFIG_FILES([Makefile
exec/Makefile
include/Makefile
init/Makefile
lib/Makefile
common_lib/Makefile
man/Makefile
pkgconfig/Makefile
test/Makefile
cts/Makefile
cts/agents/Makefile
cts/CTSvars.py
tools/Makefile
conf/Makefile
qdevices/Makefile
Doxyfile])
### Local business
dnl ===============================================
dnl Functions / global M4 variables
dnl ===============================================
dnl Global list of LIB names
m4_define([local_soname_list], [])dnl
dnl Upcase parameter
m4_define([local_upcase], [translit([$*], [a-z], [A-Z])])dnl
dnl M4 macro for include lib/lib$1.soname and subst that
m4_define([LIB_SONAME_IMPORT],[dnl
m4_define([local_libname], local_upcase($1)[_SONAME])dnl
m4_define([local_soname], translit(m4_sinclude(lib/lib$1.verso), [
], []))dnl
local_libname="local_soname"dnl
m4_define([local_soname_list], m4_defn([local_soname_list])[,]local_libname[,]local_upcase($1))dnl
AC_SUBST(local_libname)dnl
])dnl
dnl M4 macro for print padspaces (used in LIB_MSG_RESULT). It takes 2 arguments, length of string to pad and desired
dnl (padded) length
m4_define([m4_printpadspace],[ifelse(m4_eval([$2 - $1 < 1]),[1],,[ ][m4_printpadspace([$1],m4_eval([$2 - 1]))])])dnl
dnl Show AC_MSG_RESULT for specific libraries
m4_define([LIB_MSG_RESULT], [ifelse([$#], [1], ,[dnl
AC_MSG_RESULT([ $2 Library SONAME m4_printpadspace(len($2),8) = ${$1}])
LIB_MSG_RESULT(m4_shift(m4_shift($@)))dnl
])])dnl
# ===============================================
# Helpers
# ===============================================
## helper for CC stuff
cc_supports_flag() {
BACKUP="$CPPFLAGS"
CPPFLAGS="$CPPFLAGS $@"
AC_MSG_CHECKING([whether $CC supports "$@"])
AC_PREPROC_IFELSE([AC_LANG_PROGRAM([])],
[RC=0; AC_MSG_RESULT([yes])],
[RC=1; AC_MSG_RESULT([no])])
CPPFLAGS="$BACKUP"
return $RC
}
## cleanup
AC_MSG_NOTICE(Sanitizing prefix: ${prefix})
case $prefix in
NONE) prefix=/usr/local;;
esac
AC_MSG_NOTICE(Sanitizing exec_prefix: ${exec_prefix})
case $exec_prefix in
dnl For consistency with Corosync, map NONE->$prefix
NONE) exec_prefix=$prefix;;
prefix) exec_prefix=$prefix;;
esac
## local defines
PACKAGE_FEATURES=""
LINT_FLAGS="-weak -unrecog +posixlib +ignoresigns -fcnuse \
-badflag -D__gnuc_va_list=va_list -D__attribute\(x\)="
# default libraries SONAME
SOMAJOR="5"
SOMINOR="0"
SOMICRO="0"
SONAME="${SOMAJOR}.${SOMINOR}.${SOMICRO}"
# specific libraries SONAME
LIB_SONAME_IMPORT([cfg])
LIB_SONAME_IMPORT([cpg])
LIB_SONAME_IMPORT([quorum])
LIB_SONAME_IMPORT([sam])
LIB_SONAME_IMPORT([votequorum])
LIB_SONAME_IMPORT([cmap])
# local options
AC_ARG_ENABLE([ansi],
[ --enable-ansi : force to build with ANSI standards. ],
[ default="no" ])
AC_ARG_ENABLE([fatal-warnings],
[ --enable-fatal-warnings : enable fatal warnings. ],
[ default="no" ])
AC_ARG_ENABLE([debug],
[ --enable-debug : enable debug build. ],
[ default="no" ])
AC_ARG_ENABLE([user-flags],
[ --enable-user-flags : rely on user environment. ],
[ default="no" ])
AC_ARG_ENABLE([coverage],
[ --enable-coverage : coverage analysis of the codebase. ],
[ default="no" ])
AC_ARG_ENABLE([small-memory-footprint],
[ --enable-small-memory-footprint : Use small message queues and small messages sizes. ],
[ default="no" ])
AC_ARG_ENABLE([dbus],
[ --enable-dbus : dbus events. ],,
[ enable_dbus="no" ])
AC_ARG_ENABLE([testagents],
[ --enable-testagents : Install Test Agents. ],,
[ default="no" ])
AC_ARG_ENABLE([rdma],
[ --enable-rdma : Infiniband RDMA transport support ],,
[ enable_rdma="no" ])
AM_CONDITIONAL(BUILD_RDMA, test x$enable_rdma = xyes)
AC_ARG_ENABLE([monitoring],
[ --enable-monitoring : resource monitoring ],,
[ default="no" ])
AM_CONDITIONAL(BUILD_MONITORING, test x$enable_monitoring = xyes)
AC_ARG_ENABLE([watchdog],
[ --enable-watchdog : Watchdog support ],,
[ default="no" ])
AM_CONDITIONAL(BUILD_WATCHDOG, test x$enable_watchdog = xyes)
AC_ARG_ENABLE([augeas],
[ --enable-augeas : Install the augeas lens for corosync.conf ],,
[ enable_augeas="no" ])
AM_CONDITIONAL(INSTALL_AUGEAS, test x$enable_augeas = xyes)
AC_ARG_ENABLE([systemd],
[ --enable-systemd : Install systemd service files],,
[ enable_systemd="no" ])
AM_CONDITIONAL(INSTALL_SYSTEMD, test x$enable_systemd = xyes)
AC_ARG_WITH([initddir],
[ --with-initddir=DIR : path to init script directory. ],
[ INITDDIR="$withval" ],
[ INITDDIR="$sysconfdir/init.d" ])
AC_ARG_WITH([systemddir],
[ --with-systemddir=DIR : path to systemd unit files directory. ],
[ SYSTEMDDIR="$withval" ],
[ SYSTEMDDIR="/lib/systemd/system" ])
AC_ARG_WITH([initwrappersdir],
[ --with-initwrappersdir=DIR : path to init wrappers files directory. ],
[ INITWRAPPERSDIR="$withval" ],
[ INITWRAPPERSDIR="$datarootdir/corosync" ])
AC_ARG_ENABLE([snmp],
[ --enable-snmp : SNMP protocol support ],
[ default="no" ])
AC_ARG_ENABLE([xmlconf],
[ --enable-xmlconf : XML configuration support ],,
[ enable_xmlconf="no" ])
AM_CONDITIONAL(INSTALL_XMLCONF, test x$enable_xmlconf = xyes)
AC_ARG_ENABLE([qdevices],
[ --enable-qdevices : Quorum devices support ],,
[ enable_qdevices="no" ])
AM_CONDITIONAL(BUILD_QDEVICES, test x$enable_qdevices = xyes)
# OS detection
# THIS SECTION MUST DIE!
case "$host_os" in
darwin*)
# this is unused. needs verification
DARWIN_OPTS="-dynamiclib -bind_at_load \
-current_version ${SONAME} \
-compatibility_version ${SONAME} -install_name \$(libdir)/\$(@)"
;;
- *solaris*)
- AC_DEFINE_UNQUOTED([COROSYNC_SOLARIS], [1],
- [Compiling for Solaris platform])
- ;;
esac
# *FLAGS handling goes here
ENV_CFLAGS="$CFLAGS"
ENV_CPPFLAGS="$CPPFLAGS"
ENV_LDFLAGS="$LDFLAGS"
# debug build stuff
if test "x${enable_debug}" = xyes; then
AC_DEFINE_UNQUOTED([DEBUG], [1], [Compiling Debugging code])
OPT_CFLAGS="-O0"
PACKAGE_FEATURES="$PACKAGE_FEATURES debug"
else
OPT_CFLAGS="-O3"
fi
# gdb flags
if test "x${GCC}" = xyes; then
GDB_FLAGS="-ggdb3"
else
GDB_FLAGS="-g"
fi
# Look for dbus-1
if test "x${enable_dbus}" = xyes; then
PKG_CHECK_MODULES([DBUS],[dbus-1])
AC_DEFINE_UNQUOTED([HAVE_DBUS], 1, [have dbus])
PACKAGE_FEATURES="$PACKAGE_FEATURES dbus"
fi
if test "x${enable_testagents}" = xyes; then
AC_DEFINE_UNQUOTED([HAVE_TESTAGENTS], 1, [have testagents])
PACKAGE_FEATURES="$PACKAGE_FEATURES testagents"
fi
if test "x${enable_rdma}" = xyes; then
PKG_CHECK_MODULES([rdmacm],[rdmacm])
PKG_CHECK_MODULES([ibverbs],[ibverbs])
AC_DEFINE_UNQUOTED([HAVE_RDMA], 1, [have rdmacm])
PACKAGE_FEATURES="$PACKAGE_FEATURES rdma"
fi
if test "x${enable_monitoring}" = xyes; then
PKG_CHECK_MODULES([statgrab], [libstatgrab])
AC_DEFINE_UNQUOTED([HAVE_MONITORING], 1, [have resource monitoring])
PACKAGE_FEATURES="$PACKAGE_FEATURES monitoring"
fi
if test "x${enable_watchdog}" = xyes; then
AC_CHECK_HEADER([linux/watchdog.h], [], [AC_MSG_ERROR([watchdog requires linux/watchdog.h])])
AC_CHECK_HEADER([linux/reboot.h], [], [AC_MSG_ERROR([watchdog requires linux/reboot.h])])
AC_DEFINE_UNQUOTED([HAVE_WATCHDOG], 1, [have watchdog])
PACKAGE_FEATURES="$PACKAGE_FEATURES watchdog"
fi
if test "x${enable_augeas}" = xyes; then
PACKAGE_FEATURES="$PACKAGE_FEATURES augeas"
fi
if test "x${enable_systemd}" = xyes; then
PACKAGE_FEATURES="$PACKAGE_FEATURES systemd"
fi
if test "x${enable_xmlconf}" = xyes; then
PACKAGE_FEATURES="$PACKAGE_FEATURES xmlconf"
fi
if test "x${enable_qdevices}" = xyes; then
PACKAGE_FEATURES="$PACKAGE_FEATURES qdevices"
fi
do_snmp=0
if test "x${enable_snmp}" = xyes; then
AC_PATH_PROGS([SNMPCONFIG], [net-snmp-config])
if test "x${SNMPCONFIG}" != "x"; then
AC_MSG_CHECKING([for snmp includes])
SNMP_PREFIX=`$SNMPCONFIG --prefix`
SNMP_INCLUDES="-I$SNMP_PREFIX/include"
AC_MSG_RESULT([$SNMP_INCLUDES])
AC_MSG_CHECKING([for snmp libraries])
SNMP_LIBS=`$SNMPCONFIG --libs`
AC_MSG_RESULT([$SNMP_LIBS])
AC_SUBST([SNMP_LIBS])
saveCFLAGS="$CFLAGS"
CFLAGS="$CFLAGS $SNMP_INCLUDES"
AC_CHECK_HEADERS([net-snmp/net-snmp-config.h])
CFLAGS="$saveCFLAGS"
if test "x${ac_cv_header_net_snmp_net_snmp_config_h}" != "xyes"; then
AC_MSG_ERROR([Unable to use net-snmp/net-snmp-config.h])
fi
savedLibs=$LIBS
LIBS="$LIBS $SNMP_LIBS"
AC_CHECK_FUNCS([netsnmp_transport_open_client])
if test $ac_cv_func_netsnmp_transport_open_client != yes; then
AC_CHECK_FUNCS([netsnmp_tdomain_transport])
if test $ac_cv_func_netsnmp_tdomain_transport != yes; then
AC_MSG_ERROR([No usable SNMP client transport implementation found])
fi
else
AC_DEFINE_UNQUOTED([NETSNMPV54], $NETSNMP_NEW_SUPPORT, [have net-snmp5.4 over])
fi
LIBS=$savedLibs
do_snmp=1
PACKAGE_FEATURES="$PACKAGE_FEATURES snmp"
AC_DEFINE_UNQUOTED([ENABLE_SNMP], $do_snmp, [Build in support for sending SNMP traps])
else
AC_MSG_ERROR([You need the net_snmp development package to continue.])
fi
fi
AM_CONDITIONAL(BUILD_SNMP, test "${do_snmp}" = "1")
# extra warnings
EXTRA_WARNINGS=""
WARNLIST="
all
shadow
missing-prototypes
missing-declarations
strict-prototypes
declaration-after-statement
pointer-arith
write-strings
cast-align
bad-function-cast
missing-format-attribute
format=2
format-security
format-nonliteral
no-long-long
unsigned-char
gnu89-inline
no-strict-aliasing
"
for j in $WARNLIST; do
if cc_supports_flag -W$j; then
EXTRA_WARNINGS="$EXTRA_WARNINGS -W$j";
fi
done
if test "x${enable_coverage}" = xyes && \
cc_supports_flag -ftest-coverage && \
cc_supports_flag -fprofile-arcs ; then
AC_MSG_NOTICE([Enabling Coverage (enable -O0 by default)])
OPT_CFLAGS="-O0"
COVERAGE_CFLAGS="-ftest-coverage -fprofile-arcs"
COVERAGE_LDFLAGS="-ftest-coverage -fprofile-arcs"
PACKAGE_FEATURES="$PACKAGE_FEATURES coverage"
else
COVERAGE_CFLAGS=""
COVERAGE_LDFLAGS=""
fi
if test "x${enable_small_memory_footprint}" = xyes ; then
AC_DEFINE_UNQUOTED([HAVE_SMALL_MEMORY_FOOTPRINT], 1, [have small_memory_footprint])
PACKAGE_FEATURES="$PACKAGE_FEATURES small-memory-footprint"
fi
if test "x${enable_ansi}" = xyes && \
cc_supports_flag -std=iso9899:199409 ; then
AC_MSG_NOTICE([Enabling ANSI Compatibility])
ANSI_CPPFLAGS="-ansi -DANSI_ONLY"
PACKAGE_FEATURES="$PACKAGE_FEATURES ansi"
else
ANSI_CPPFLAGS=""
fi
if test "x${enable_fatal_warnings}" = xyes && \
cc_supports_flag -Werror ; then
AC_MSG_NOTICE([Enabling Fatal Warnings (-Werror)])
WERROR_CFLAGS="-Werror"
PACKAGE_FEATURES="$PACKAGE_FEATURES fatal-warnings"
else
WERROR_CFLAGS=""
fi
# don't add addtional cflags
if test "x${enable_user_flags}" = xyes; then
OPT_CFLAGS=""
GDB_FLAGS=""
EXTRA_WARNINGS=""
fi
# final build of *FLAGS
CFLAGS="$ENV_CFLAGS $OPT_CFLAGS $GDB_FLAGS \
$COVERAGE_CFLAGS $EXTRA_WARNINGS \
$WERROR_CFLAGS $NSS_CFLAGS $LIBQB_CFLAGS \
$SNMP_INCLUDES"
CPPFLAGS="$ENV_CPPFLAGS $ANSI_CPPFLAGS"
LDFLAGS="$ENV_LDFLAGS $COVERAGE_LDFLAGS"
# substitute what we need:
AC_SUBST([BASHPATH])
AC_SUBST([INITDDIR])
AC_SUBST([SYSTEMDDIR])
INITWRAPPERSDIR=$(eval echo ${INITWRAPPERSDIR})
AC_SUBST([INITWRAPPERSDIR])
AC_SUBST([SOMAJOR])
AC_SUBST([SOMINOR])
AC_SUBST([SOMICRO])
AC_SUBST([SONAME])
AM_CONDITIONAL(INSTALL_TESTAGENTS, test -n "${enable_testagents}")
AM_CONDITIONAL(INSTALL_MIB, test "${do_snmp}" = "1")
AM_CONDITIONAL(INSTALL_DBUSCONF, test "${enable_dbus}" = "1")
AM_CONDITIONAL(AUGTOOL, test -n "${AUGTOOL}")
AC_SUBST([NSS_LDFLAGS])
AM_CONDITIONAL(BUILD_HTML_DOCS, test -n "${GROFF}")
AC_SUBST([LINT_FLAGS])
AC_DEFINE_UNQUOTED([LOCALSTATEDIR], "$(eval echo ${localstatedir})", [localstate directory])
COROSYSCONFDIR=${sysconfdir}/corosync
AC_SUBST([COROSYSCONFDIR])
AC_DEFINE_UNQUOTED([COROSYSCONFDIR], "$(eval echo ${COROSYSCONFDIR})", [corosync config directory])
AC_DEFINE_UNQUOTED([PACKAGE_FEATURES], "${PACKAGE_FEATURES}", [corosync built-in features])
AC_OUTPUT
AC_MSG_RESULT([])
AC_MSG_RESULT([$PACKAGE configuration:])
AC_MSG_RESULT([ Version = ${VERSION}])
AC_MSG_RESULT([ Prefix = ${prefix}])
AC_MSG_RESULT([ Executables = ${sbindir}])
AC_MSG_RESULT([ Man pages = ${mandir}])
AC_MSG_RESULT([ Doc dir = ${docdir}])
AC_MSG_RESULT([ Libraries = ${libdir}])
AC_MSG_RESULT([ Header files = ${includedir}])
AC_MSG_RESULT([ Arch-independent files = ${datadir}])
AC_MSG_RESULT([ State information = ${localstatedir}])
AC_MSG_RESULT([ System configuration = ${sysconfdir}])
AC_MSG_RESULT([ System init.d directory = ${INITDDIR}])
AC_MSG_RESULT([ System systemd directory = ${SYSTEMDDIR}])
AC_MSG_RESULT([ System init wraps dir = ${INITWRAPPERSDIR}])
AC_MSG_RESULT([ corosync config dir = ${COROSYSCONFDIR}])
AC_MSG_RESULT([ Features =${PACKAGE_FEATURES}])
AC_MSG_RESULT([])
AC_MSG_RESULT([$PACKAGE build info:])
AC_MSG_RESULT([ Library SONAME = ${SONAME}])
LIB_MSG_RESULT(m4_shift(local_soname_list))dnl
AC_MSG_RESULT([ Default optimization = ${OPT_CFLAGS}])
AC_MSG_RESULT([ Default debug options = ${GDB_CFLAGS}])
AC_MSG_RESULT([ Extra compiler warnings = ${EXTRA_WARNING}])
AC_MSG_RESULT([ Env. defined CFLAG = ${ENV_CFLAGS}])
AC_MSG_RESULT([ Env. defined CPPFLAGS = ${ENV_CPPFLAGS}])
AC_MSG_RESULT([ Env. defined LDFLAGS = ${ENV_LDFLAGS}])
AC_MSG_RESULT([ ANSI defined CPPFLAGS = ${ANSI_CPPFLAGS}])
AC_MSG_RESULT([ Coverage CFLAGS = ${COVERAGE_CFLAGS}])
AC_MSG_RESULT([ Coverage LDFLAGS = ${COVERAGE_LDFLAGS}])
AC_MSG_RESULT([ Fatal War. CFLAGS = ${WERROR_CFLAGS}])
AC_MSG_RESULT([ Final CFLAGS = ${CFLAGS}])
AC_MSG_RESULT([ Final CPPFLAGS = ${CPPFLAGS}])
AC_MSG_RESULT([ Final LDFLAGS = ${LDFLAGS}])
diff --git a/exec/cfg.c b/exec/cfg.c
index 98878d5f..d730d024 100644
--- a/exec/cfg.c
+++ b/exec/cfg.c
@@ -1,835 +1,831 @@
/*
* Copyright (c) 2005-2006 MontaVista Software, Inc.
* Copyright (c) 2006-2012 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Steven Dake (sdake@redhat.com)
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <config.h>
#include <sys/types.h>
#include <sys/uio.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <unistd.h>
#include <fcntl.h>
#include <stdlib.h>
#include <stdio.h>
#include <limits.h>
#include <errno.h>
#include <string.h>
#include <assert.h>
#include <corosync/corotypes.h>
#include <qb/qbipc_common.h>
#include <corosync/cfg.h>
#include <corosync/list.h>
#include <corosync/mar_gen.h>
#include <corosync/totem/totemip.h>
#include <corosync/totem/totem.h>
#include <corosync/ipc_cfg.h>
#include <corosync/logsys.h>
#include <corosync/coroapi.h>
#include <corosync/icmap.h>
#include <corosync/corodefs.h>
#include "service.h"
LOGSYS_DECLARE_SUBSYS ("CFG");
enum cfg_message_req_types {
MESSAGE_REQ_EXEC_CFG_RINGREENABLE = 0,
MESSAGE_REQ_EXEC_CFG_KILLNODE = 1,
MESSAGE_REQ_EXEC_CFG_SHUTDOWN = 2
};
#define DEFAULT_SHUTDOWN_TIMEOUT 5
static struct list_head trackers_list;
/*
* Variables controlling a requested shutdown
*/
static corosync_timer_handle_t shutdown_timer;
static struct cfg_info *shutdown_con;
static uint32_t shutdown_flags;
static int shutdown_yes;
static int shutdown_no;
static int shutdown_expected;
struct cfg_info
{
struct list_head list;
void *conn;
void *tracker_conn;
enum {SHUTDOWN_REPLY_UNKNOWN, SHUTDOWN_REPLY_YES, SHUTDOWN_REPLY_NO} shutdown_reply;
};
static void cfg_confchg_fn (
enum totem_configuration_type configuration_type,
const unsigned int *member_list, size_t member_list_entries,
const unsigned int *left_list, size_t left_list_entries,
const unsigned int *joined_list, size_t joined_list_entries,
const struct memb_ring_id *ring_id);
static char *cfg_exec_init_fn (struct corosync_api_v1 *corosync_api_v1);
static struct corosync_api_v1 *api;
static int cfg_lib_init_fn (void *conn);
static int cfg_lib_exit_fn (void *conn);
static void message_handler_req_exec_cfg_ringreenable (
const void *message,
unsigned int nodeid);
static void message_handler_req_exec_cfg_killnode (
const void *message,
unsigned int nodeid);
static void message_handler_req_exec_cfg_shutdown (
const void *message,
unsigned int nodeid);
static void exec_cfg_killnode_endian_convert (void *msg);
static void message_handler_req_lib_cfg_ringstatusget (
void *conn,
const void *msg);
static void message_handler_req_lib_cfg_ringreenable (
void *conn,
const void *msg);
static void message_handler_req_lib_cfg_killnode (
void *conn,
const void *msg);
static void message_handler_req_lib_cfg_tryshutdown (
void *conn,
const void *msg);
static void message_handler_req_lib_cfg_replytoshutdown (
void *conn,
const void *msg);
static void message_handler_req_lib_cfg_get_node_addrs (
void *conn,
const void *msg);
static void message_handler_req_lib_cfg_local_get (
void *conn,
const void *msg);
/*
* Service Handler Definition
*/
static struct corosync_lib_handler cfg_lib_engine[] =
{
{ /* 0 */
.lib_handler_fn = message_handler_req_lib_cfg_ringstatusget,
.flow_control = CS_LIB_FLOW_CONTROL_REQUIRED
},
{ /* 1 */
.lib_handler_fn = message_handler_req_lib_cfg_ringreenable,
.flow_control = CS_LIB_FLOW_CONTROL_REQUIRED
},
{ /* 2 */
.lib_handler_fn = message_handler_req_lib_cfg_killnode,
.flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED
},
{ /* 3 */
.lib_handler_fn = message_handler_req_lib_cfg_tryshutdown,
.flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED
},
{ /* 4 */
.lib_handler_fn = message_handler_req_lib_cfg_replytoshutdown,
.flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED
},
{ /* 5 */
.lib_handler_fn = message_handler_req_lib_cfg_get_node_addrs,
.flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED
},
{ /* 6 */
.lib_handler_fn = message_handler_req_lib_cfg_local_get,
.flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED
}
};
static struct corosync_exec_handler cfg_exec_engine[] =
{
{ /* 0 */
.exec_handler_fn = message_handler_req_exec_cfg_ringreenable,
},
{ /* 1 */
.exec_handler_fn = message_handler_req_exec_cfg_killnode,
.exec_endian_convert_fn = exec_cfg_killnode_endian_convert
},
{ /* 2 */
.exec_handler_fn = message_handler_req_exec_cfg_shutdown,
}
};
/*
* Exports the interface for the service
*/
struct corosync_service_engine cfg_service_engine = {
.name = "corosync configuration service",
.id = CFG_SERVICE,
.priority = 1,
.private_data_size = sizeof(struct cfg_info),
.flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED,
.allow_inquorate = CS_LIB_ALLOW_INQUORATE,
.lib_init_fn = cfg_lib_init_fn,
.lib_exit_fn = cfg_lib_exit_fn,
.lib_engine = cfg_lib_engine,
.lib_engine_count = sizeof (cfg_lib_engine) / sizeof (struct corosync_lib_handler),
.exec_init_fn = cfg_exec_init_fn,
.exec_engine = cfg_exec_engine,
.exec_engine_count = sizeof (cfg_exec_engine) / sizeof (struct corosync_exec_handler),
.confchg_fn = cfg_confchg_fn
};
struct corosync_service_engine *cfg_get_service_engine_ver0 (void)
{
return (&cfg_service_engine);
}
struct req_exec_cfg_ringreenable {
struct qb_ipc_request_header header __attribute__((aligned(8)));
mar_message_source_t source __attribute__((aligned(8)));
};
struct req_exec_cfg_killnode {
struct qb_ipc_request_header header __attribute__((aligned(8)));
mar_uint32_t nodeid __attribute__((aligned(8)));
mar_name_t reason __attribute__((aligned(8)));
};
struct req_exec_cfg_shutdown {
struct qb_ipc_request_header header __attribute__((aligned(8)));
};
/* IMPL */
static char *cfg_exec_init_fn (
struct corosync_api_v1 *corosync_api_v1)
{
-#ifdef COROSYNC_SOLARIS
- logsys_subsys_init();
-#endif
-
api = corosync_api_v1;
list_init(&trackers_list);
return (NULL);
}
static void cfg_confchg_fn (
enum totem_configuration_type configuration_type,
const unsigned int *member_list, size_t member_list_entries,
const unsigned int *left_list, size_t left_list_entries,
const unsigned int *joined_list, size_t joined_list_entries,
const struct memb_ring_id *ring_id)
{
}
/*
* Tell other nodes we are shutting down
*/
static int send_shutdown(void)
{
struct req_exec_cfg_shutdown req_exec_cfg_shutdown;
struct iovec iovec;
ENTER();
req_exec_cfg_shutdown.header.size =
sizeof (struct req_exec_cfg_shutdown);
req_exec_cfg_shutdown.header.id = SERVICE_ID_MAKE (CFG_SERVICE,
MESSAGE_REQ_EXEC_CFG_SHUTDOWN);
iovec.iov_base = (char *)&req_exec_cfg_shutdown;
iovec.iov_len = sizeof (struct req_exec_cfg_shutdown);
assert (api->totem_mcast (&iovec, 1, TOTEM_SAFE) == 0);
LEAVE();
return 0;
}
static void send_test_shutdown(void *only_conn, void *exclude_conn, int status)
{
struct res_lib_cfg_testshutdown res_lib_cfg_testshutdown;
struct list_head *iter;
ENTER();
res_lib_cfg_testshutdown.header.size = sizeof(struct res_lib_cfg_testshutdown);
res_lib_cfg_testshutdown.header.id = MESSAGE_RES_CFG_TESTSHUTDOWN;
res_lib_cfg_testshutdown.header.error = status;
res_lib_cfg_testshutdown.flags = shutdown_flags;
if (only_conn) {
TRACE1("sending testshutdown to only %p", only_conn);
api->ipc_dispatch_send(only_conn, &res_lib_cfg_testshutdown,
sizeof(res_lib_cfg_testshutdown));
} else {
for (iter = trackers_list.next; iter != &trackers_list; iter = iter->next) {
struct cfg_info *ci = list_entry(iter, struct cfg_info, list);
if (ci->conn != exclude_conn) {
TRACE1("sending testshutdown to %p", ci->tracker_conn);
api->ipc_dispatch_send(ci->tracker_conn, &res_lib_cfg_testshutdown,
sizeof(res_lib_cfg_testshutdown));
}
}
}
LEAVE();
}
static void check_shutdown_status(void)
{
ENTER();
/*
* Shutdown client might have gone away
*/
if (!shutdown_con) {
LEAVE();
return;
}
/*
* All replies safely gathered in ?
*/
if (shutdown_yes + shutdown_no >= shutdown_expected) {
struct res_lib_cfg_tryshutdown res_lib_cfg_tryshutdown;
api->timer_delete(shutdown_timer);
if (shutdown_yes >= shutdown_expected ||
shutdown_flags == CFG_SHUTDOWN_FLAG_REGARDLESS) {
TRACE1("shutdown confirmed");
res_lib_cfg_tryshutdown.header.size = sizeof(struct res_lib_cfg_tryshutdown);
res_lib_cfg_tryshutdown.header.id = MESSAGE_RES_CFG_TRYSHUTDOWN;
res_lib_cfg_tryshutdown.header.error = CS_OK;
/*
* Tell originator that shutdown was confirmed
*/
api->ipc_response_send(shutdown_con->conn, &res_lib_cfg_tryshutdown,
sizeof(res_lib_cfg_tryshutdown));
shutdown_con = NULL;
/*
* Tell other nodes we are going down
*/
send_shutdown();
}
else {
TRACE1("shutdown cancelled");
res_lib_cfg_tryshutdown.header.size = sizeof(struct res_lib_cfg_tryshutdown);
res_lib_cfg_tryshutdown.header.id = MESSAGE_RES_CFG_TRYSHUTDOWN;
res_lib_cfg_tryshutdown.header.error = CS_ERR_BUSY;
/*
* Tell originator that shutdown was cancelled
*/
api->ipc_response_send(shutdown_con->conn, &res_lib_cfg_tryshutdown,
sizeof(res_lib_cfg_tryshutdown));
shutdown_con = NULL;
}
log_printf(LOGSYS_LEVEL_DEBUG, "shutdown decision is: (yes count: %d, no count: %d) flags=%x",
shutdown_yes, shutdown_no, shutdown_flags);
}
LEAVE();
}
/*
* Not all nodes responded to the shutdown (in time)
*/
static void shutdown_timer_fn(void *arg)
{
ENTER();
/*
* Mark undecideds as "NO"
*/
shutdown_no = shutdown_expected;
check_shutdown_status();
send_test_shutdown(NULL, NULL, CS_ERR_TIMEOUT);
LEAVE();
}
static void remove_ci_from_shutdown(struct cfg_info *ci)
{
ENTER();
/*
* If the controlling shutdown process has quit, then cancel the
* shutdown session
*/
if (ci == shutdown_con) {
shutdown_con = NULL;
api->timer_delete(shutdown_timer);
}
if (!list_empty(&ci->list)) {
list_del(&ci->list);
list_init(&ci->list);
/*
* Remove our option
*/
if (shutdown_con) {
if (ci->shutdown_reply == SHUTDOWN_REPLY_YES)
shutdown_yes--;
if (ci->shutdown_reply == SHUTDOWN_REPLY_NO)
shutdown_no--;
}
/*
* If we are leaving, then that's an implicit YES to shutdown
*/
ci->shutdown_reply = SHUTDOWN_REPLY_YES;
shutdown_yes++;
check_shutdown_status();
}
LEAVE();
}
int cfg_lib_exit_fn (void *conn)
{
struct cfg_info *ci = (struct cfg_info *)api->ipc_private_data_get (conn);
ENTER();
remove_ci_from_shutdown(ci);
LEAVE();
return (0);
}
static int cfg_lib_init_fn (void *conn)
{
struct cfg_info *ci = (struct cfg_info *)api->ipc_private_data_get (conn);
ENTER();
list_init(&ci->list);
LEAVE();
return (0);
}
/*
* Executive message handlers
*/
static void message_handler_req_exec_cfg_ringreenable (
const void *message,
unsigned int nodeid)
{
const struct req_exec_cfg_ringreenable *req_exec_cfg_ringreenable
= message;
struct res_lib_cfg_ringreenable res_lib_cfg_ringreenable;
ENTER();
api->totem_ring_reenable ();
if (api->ipc_source_is_local(&req_exec_cfg_ringreenable->source)) {
res_lib_cfg_ringreenable.header.id = MESSAGE_RES_CFG_RINGREENABLE;
res_lib_cfg_ringreenable.header.size = sizeof (struct res_lib_cfg_ringreenable);
res_lib_cfg_ringreenable.header.error = CS_OK;
api->ipc_response_send (
req_exec_cfg_ringreenable->source.conn,
&res_lib_cfg_ringreenable,
sizeof (struct res_lib_cfg_ringreenable));
api->ipc_refcnt_dec(req_exec_cfg_ringreenable->source.conn);
}
LEAVE();
}
static void exec_cfg_killnode_endian_convert (void *msg)
{
struct req_exec_cfg_killnode *req_exec_cfg_killnode =
(struct req_exec_cfg_killnode *)msg;
ENTER();
swab_mar_name_t(&req_exec_cfg_killnode->reason);
LEAVE();
}
static void message_handler_req_exec_cfg_killnode (
const void *message,
unsigned int nodeid)
{
const struct req_exec_cfg_killnode *req_exec_cfg_killnode = message;
cs_name_t reason;
ENTER();
log_printf(LOGSYS_LEVEL_DEBUG, "request to kill node %d(us=%d): %s",
req_exec_cfg_killnode->nodeid, api->totem_nodeid_get(), reason.value);
if (req_exec_cfg_killnode->nodeid == api->totem_nodeid_get()) {
marshall_from_mar_name_t(&reason, &req_exec_cfg_killnode->reason);
log_printf(LOGSYS_LEVEL_NOTICE, "Killed by node %d: %s",
nodeid, reason.value);
corosync_fatal_error(COROSYNC_FATAL_ERROR_EXIT);
}
LEAVE();
}
/*
* Self shutdown
*/
static void message_handler_req_exec_cfg_shutdown (
const void *message,
unsigned int nodeid)
{
ENTER();
log_printf(LOGSYS_LEVEL_NOTICE, "Node %d was shut down by sysadmin", nodeid);
if (nodeid == api->totem_nodeid_get()) {
api->shutdown_request();
}
LEAVE();
}
/*
* Library Interface Implementation
*/
static void message_handler_req_lib_cfg_ringstatusget (
void *conn,
const void *msg)
{
struct res_lib_cfg_ringstatusget res_lib_cfg_ringstatusget;
struct totem_ip_address interfaces[INTERFACE_MAX];
unsigned int iface_count;
char **status;
const char *totem_ip_string;
unsigned int i;
ENTER();
res_lib_cfg_ringstatusget.header.id = MESSAGE_RES_CFG_RINGSTATUSGET;
res_lib_cfg_ringstatusget.header.size = sizeof (struct res_lib_cfg_ringstatusget);
res_lib_cfg_ringstatusget.header.error = CS_OK;
api->totem_ifaces_get (
api->totem_nodeid_get(),
interfaces,
INTERFACE_MAX,
&status,
&iface_count);
res_lib_cfg_ringstatusget.interface_count = iface_count;
for (i = 0; i < iface_count; i++) {
totem_ip_string
= (const char *)api->totem_ip_print (&interfaces[i]);
strcpy ((char *)&res_lib_cfg_ringstatusget.interface_status[i],
status[i]);
strcpy ((char *)&res_lib_cfg_ringstatusget.interface_name[i],
totem_ip_string);
}
api->ipc_response_send (
conn,
&res_lib_cfg_ringstatusget,
sizeof (struct res_lib_cfg_ringstatusget));
LEAVE();
}
static void message_handler_req_lib_cfg_ringreenable (
void *conn,
const void *msg)
{
struct req_exec_cfg_ringreenable req_exec_cfg_ringreenable;
struct iovec iovec;
ENTER();
req_exec_cfg_ringreenable.header.size =
sizeof (struct req_exec_cfg_ringreenable);
req_exec_cfg_ringreenable.header.id = SERVICE_ID_MAKE (CFG_SERVICE,
MESSAGE_REQ_EXEC_CFG_RINGREENABLE);
api->ipc_source_set (&req_exec_cfg_ringreenable.source, conn);
api->ipc_refcnt_inc(conn);
iovec.iov_base = (char *)&req_exec_cfg_ringreenable;
iovec.iov_len = sizeof (struct req_exec_cfg_ringreenable);
assert (api->totem_mcast (&iovec, 1, TOTEM_SAFE) == 0);
LEAVE();
}
static void message_handler_req_lib_cfg_killnode (
void *conn,
const void *msg)
{
const struct req_lib_cfg_killnode *req_lib_cfg_killnode = msg;
struct res_lib_cfg_killnode res_lib_cfg_killnode;
struct req_exec_cfg_killnode req_exec_cfg_killnode;
struct iovec iovec;
ENTER();
req_exec_cfg_killnode.header.size =
sizeof (struct req_exec_cfg_killnode);
req_exec_cfg_killnode.header.id = SERVICE_ID_MAKE (CFG_SERVICE,
MESSAGE_REQ_EXEC_CFG_KILLNODE);
req_exec_cfg_killnode.nodeid = req_lib_cfg_killnode->nodeid;
marshall_to_mar_name_t(&req_exec_cfg_killnode.reason, &req_lib_cfg_killnode->reason);
iovec.iov_base = (char *)&req_exec_cfg_killnode;
iovec.iov_len = sizeof (struct req_exec_cfg_killnode);
(void)api->totem_mcast (&iovec, 1, TOTEM_SAFE);
res_lib_cfg_killnode.header.size = sizeof(struct res_lib_cfg_killnode);
res_lib_cfg_killnode.header.id = MESSAGE_RES_CFG_KILLNODE;
res_lib_cfg_killnode.header.error = CS_OK;
api->ipc_response_send(conn, &res_lib_cfg_killnode,
sizeof(res_lib_cfg_killnode));
LEAVE();
}
static void message_handler_req_lib_cfg_tryshutdown (
void *conn,
const void *msg)
{
struct cfg_info *ci = (struct cfg_info *)api->ipc_private_data_get (conn);
const struct req_lib_cfg_tryshutdown *req_lib_cfg_tryshutdown = msg;
struct list_head *iter;
ENTER();
if (req_lib_cfg_tryshutdown->flags == CFG_SHUTDOWN_FLAG_IMMEDIATE) {
struct res_lib_cfg_tryshutdown res_lib_cfg_tryshutdown;
/*
* Tell other nodes
*/
send_shutdown();
res_lib_cfg_tryshutdown.header.size = sizeof(struct res_lib_cfg_tryshutdown);
res_lib_cfg_tryshutdown.header.id = MESSAGE_RES_CFG_TRYSHUTDOWN;
res_lib_cfg_tryshutdown.header.error = CS_OK;
api->ipc_response_send(conn, &res_lib_cfg_tryshutdown,
sizeof(res_lib_cfg_tryshutdown));
LEAVE();
return;
}
/*
* Shutdown in progress, return an error
*/
if (shutdown_con) {
struct res_lib_cfg_tryshutdown res_lib_cfg_tryshutdown;
res_lib_cfg_tryshutdown.header.size = sizeof(struct res_lib_cfg_tryshutdown);
res_lib_cfg_tryshutdown.header.id = MESSAGE_RES_CFG_TRYSHUTDOWN;
res_lib_cfg_tryshutdown.header.error = CS_ERR_EXIST;
api->ipc_response_send(conn, &res_lib_cfg_tryshutdown,
sizeof(res_lib_cfg_tryshutdown));
LEAVE();
return;
}
ci->conn = conn;
shutdown_con = (struct cfg_info *)api->ipc_private_data_get (conn);
shutdown_flags = req_lib_cfg_tryshutdown->flags;
shutdown_yes = 0;
shutdown_no = 0;
/*
* Count the number of listeners
*/
shutdown_expected = 0;
for (iter = trackers_list.next; iter != &trackers_list; iter = iter->next) {
struct cfg_info *testci = list_entry(iter, struct cfg_info, list);
/*
* It is assumed that we will allow shutdown
*/
if (testci != ci) {
testci->shutdown_reply = SHUTDOWN_REPLY_UNKNOWN;
shutdown_expected++;
}
}
/*
* If no-one is listening for events then we can just go down now
*/
if (shutdown_expected == 0) {
struct res_lib_cfg_tryshutdown res_lib_cfg_tryshutdown;
res_lib_cfg_tryshutdown.header.size = sizeof(struct res_lib_cfg_tryshutdown);
res_lib_cfg_tryshutdown.header.id = MESSAGE_RES_CFG_TRYSHUTDOWN;
res_lib_cfg_tryshutdown.header.error = CS_OK;
/*
* Tell originator that shutdown was confirmed
*/
api->ipc_response_send(conn, &res_lib_cfg_tryshutdown,
sizeof(res_lib_cfg_tryshutdown));
send_shutdown();
LEAVE();
return;
}
else {
unsigned int shutdown_timeout = DEFAULT_SHUTDOWN_TIMEOUT;
/*
* Look for a shutdown timeout in configuration map
*/
icmap_get_uint32("cfg.shutdown_timeout", &shutdown_timeout);
/*
* Start the timer. If we don't get a full set of replies before this goes
* off we'll cancel the shutdown
*/
api->timer_add_duration((unsigned long long)shutdown_timeout*1000000000, NULL,
shutdown_timer_fn, &shutdown_timer);
/*
* Tell the users we would like to shut down
*/
send_test_shutdown(NULL, conn, CS_OK);
}
/*
* We don't sent a reply to the caller here.
* We send it when we know if we can shut down or not
*/
LEAVE();
}
static void message_handler_req_lib_cfg_replytoshutdown (
void *conn,
const void *msg)
{
struct cfg_info *ci = (struct cfg_info *)api->ipc_private_data_get (conn);
const struct req_lib_cfg_replytoshutdown *req_lib_cfg_replytoshutdown = msg;
struct res_lib_cfg_replytoshutdown res_lib_cfg_replytoshutdown;
int status = CS_OK;
ENTER();
if (!shutdown_con) {
status = CS_ERR_ACCESS;
goto exit_fn;
}
if (req_lib_cfg_replytoshutdown->response) {
shutdown_yes++;
ci->shutdown_reply = SHUTDOWN_REPLY_YES;
}
else {
shutdown_no++;
ci->shutdown_reply = SHUTDOWN_REPLY_NO;
}
check_shutdown_status();
exit_fn:
res_lib_cfg_replytoshutdown.header.error = status;
res_lib_cfg_replytoshutdown.header.id = MESSAGE_RES_CFG_REPLYTOSHUTDOWN;
res_lib_cfg_replytoshutdown.header.size = sizeof(res_lib_cfg_replytoshutdown);
api->ipc_response_send(conn, &res_lib_cfg_replytoshutdown,
sizeof(res_lib_cfg_replytoshutdown));
LEAVE();
}
static void message_handler_req_lib_cfg_get_node_addrs (void *conn,
const void *msg)
{
struct totem_ip_address node_ifs[INTERFACE_MAX];
char buf[PIPE_BUF];
char **status;
unsigned int num_interfaces = 0;
int ret = CS_OK;
int i;
const struct req_lib_cfg_get_node_addrs *req_lib_cfg_get_node_addrs = msg;
struct res_lib_cfg_get_node_addrs *res_lib_cfg_get_node_addrs = (struct res_lib_cfg_get_node_addrs *)buf;
unsigned int nodeid = req_lib_cfg_get_node_addrs->nodeid;
char *addr_buf;
if (nodeid == 0)
nodeid = api->totem_nodeid_get();
api->totem_ifaces_get(nodeid, node_ifs, INTERFACE_MAX, &status, &num_interfaces);
res_lib_cfg_get_node_addrs->header.size = sizeof(struct res_lib_cfg_get_node_addrs) + (num_interfaces * TOTEMIP_ADDRLEN);
res_lib_cfg_get_node_addrs->header.id = MESSAGE_RES_CFG_GET_NODE_ADDRS;
res_lib_cfg_get_node_addrs->header.error = ret;
res_lib_cfg_get_node_addrs->num_addrs = num_interfaces;
if (num_interfaces) {
res_lib_cfg_get_node_addrs->family = node_ifs[0].family;
for (i = 0, addr_buf = (char *)res_lib_cfg_get_node_addrs->addrs;
i < num_interfaces; i++, addr_buf += TOTEMIP_ADDRLEN) {
memcpy(addr_buf, node_ifs[i].addr, TOTEMIP_ADDRLEN);
}
}
else {
res_lib_cfg_get_node_addrs->header.error = CS_ERR_NOT_EXIST;
}
api->ipc_response_send(conn, res_lib_cfg_get_node_addrs, res_lib_cfg_get_node_addrs->header.size);
}
static void message_handler_req_lib_cfg_local_get (void *conn, const void *msg)
{
struct res_lib_cfg_local_get res_lib_cfg_local_get;
res_lib_cfg_local_get.header.size = sizeof(res_lib_cfg_local_get);
res_lib_cfg_local_get.header.id = MESSAGE_RES_CFG_LOCAL_GET;
res_lib_cfg_local_get.header.error = CS_OK;
res_lib_cfg_local_get.local_nodeid = api->totem_nodeid_get ();
api->ipc_response_send(conn, &res_lib_cfg_local_get,
sizeof(res_lib_cfg_local_get));
}
diff --git a/exec/cmap.c b/exec/cmap.c
index 9ae4a878..f4b688db 100644
--- a/exec/cmap.c
+++ b/exec/cmap.c
@@ -1,608 +1,604 @@
/*
* Copyright (c) 2011-2012 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Jan Friesse (jfriesse@redhat.com)
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the Red Hat, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <config.h>
#include <sys/types.h>
#include <unistd.h>
#include <fcntl.h>
#include <stdlib.h>
#include <errno.h>
#include <unistd.h>
#include <poll.h>
#include <assert.h>
#include <qb/qbloop.h>
#include <qb/qbipc_common.h>
#include <corosync/corotypes.h>
#include <corosync/corodefs.h>
#include <corosync/list.h>
#include <corosync/mar_gen.h>
#include <corosync/ipc_cmap.h>
#include <corosync/logsys.h>
#include <corosync/coroapi.h>
#include <corosync/icmap.h>
#include "service.h"
LOGSYS_DECLARE_SUBSYS ("CMAP");
struct cmap_conn_info {
struct hdb_handle_database iter_db;
struct hdb_handle_database track_db;
};
typedef uint64_t cmap_iter_handle_t;
typedef uint64_t cmap_track_handle_t;
struct cmap_track_user_data {
void *conn;
cmap_track_handle_t track_handle;
uint64_t track_inst_handle;
};
static struct corosync_api_v1 *api;
static char *cmap_exec_init_fn (struct corosync_api_v1 *corosync_api);
static int cmap_exec_exit_fn(void);
static int cmap_lib_init_fn (void *conn);
static int cmap_lib_exit_fn (void *conn);
static void message_handler_req_lib_cmap_set(void *conn, const void *message);
static void message_handler_req_lib_cmap_delete(void *conn, const void *message);
static void message_handler_req_lib_cmap_get(void *conn, const void *message);
static void message_handler_req_lib_cmap_adjust_int(void *conn, const void *message);
static void message_handler_req_lib_cmap_iter_init(void *conn, const void *message);
static void message_handler_req_lib_cmap_iter_next(void *conn, const void *message);
static void message_handler_req_lib_cmap_iter_finalize(void *conn, const void *message);
static void message_handler_req_lib_cmap_track_add(void *conn, const void *message);
static void message_handler_req_lib_cmap_track_delete(void *conn, const void *message);
static void cmap_notify_fn(int32_t event,
const char *key_name,
struct icmap_notify_value new_val,
struct icmap_notify_value old_val,
void *user_data);
/*
* Library Handler Definition
*/
static struct corosync_lib_handler cmap_lib_engine[] =
{
{ /* 0 */
.lib_handler_fn = message_handler_req_lib_cmap_set,
.flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED
},
{ /* 1 */
.lib_handler_fn = message_handler_req_lib_cmap_delete,
.flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED
},
{ /* 2 */
.lib_handler_fn = message_handler_req_lib_cmap_get,
.flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED
},
{ /* 3 */
.lib_handler_fn = message_handler_req_lib_cmap_adjust_int,
.flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED
},
{ /* 4 */
.lib_handler_fn = message_handler_req_lib_cmap_iter_init,
.flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED
},
{ /* 5 */
.lib_handler_fn = message_handler_req_lib_cmap_iter_next,
.flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED
},
{ /* 6 */
.lib_handler_fn = message_handler_req_lib_cmap_iter_finalize,
.flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED
},
{ /* 7 */
.lib_handler_fn = message_handler_req_lib_cmap_track_add,
.flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED
},
{ /* 8 */
.lib_handler_fn = message_handler_req_lib_cmap_track_delete,
.flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED
},
};
struct corosync_service_engine cmap_service_engine = {
.name = "corosync configuration map access",
.id = CMAP_SERVICE,
.priority = 1,
.private_data_size = sizeof(struct cmap_conn_info),
.flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED,
.allow_inquorate = CS_LIB_ALLOW_INQUORATE,
.lib_init_fn = cmap_lib_init_fn,
.lib_exit_fn = cmap_lib_exit_fn,
.lib_engine = cmap_lib_engine,
.lib_engine_count = sizeof (cmap_lib_engine) / sizeof (struct corosync_lib_handler),
.exec_init_fn = cmap_exec_init_fn,
.exec_exit_fn = cmap_exec_exit_fn,
};
struct corosync_service_engine *cmap_get_service_engine_ver0 (void)
{
return (&cmap_service_engine);
}
static int cmap_exec_exit_fn(void)
{
return 0;
}
static char *cmap_exec_init_fn (
struct corosync_api_v1 *corosync_api)
{
-
-#ifdef COROSYNC_SOLARIS
- logsys_subsys_init();
-#endif
api = corosync_api;
return (NULL);
}
static int cmap_lib_init_fn (void *conn)
{
struct cmap_conn_info *conn_info = (struct cmap_conn_info *)api->ipc_private_data_get (conn);
log_printf(LOGSYS_LEVEL_DEBUG, "lib_init_fn: conn=%p", conn);
api->ipc_refcnt_inc(conn);
memset(conn_info, 0, sizeof(*conn_info));
hdb_create(&conn_info->iter_db);
hdb_create(&conn_info->track_db);
return (0);
}
static int cmap_lib_exit_fn (void *conn)
{
struct cmap_conn_info *conn_info = (struct cmap_conn_info *)api->ipc_private_data_get (conn);
hdb_handle_t iter_handle = 0;
icmap_iter_t *iter;
hdb_handle_t track_handle = 0;
icmap_track_t *track;
log_printf(LOGSYS_LEVEL_DEBUG, "exit_fn for conn=%p", conn);
hdb_iterator_reset(&conn_info->iter_db);
while (hdb_iterator_next(&conn_info->iter_db,
(void*)&iter, &iter_handle) == 0) {
icmap_iter_finalize(*iter);
(void)hdb_handle_put (&conn_info->iter_db, iter_handle);
}
hdb_destroy(&conn_info->iter_db);
hdb_iterator_reset(&conn_info->track_db);
while (hdb_iterator_next(&conn_info->track_db,
(void*)&track, &track_handle) == 0) {
free(icmap_track_get_user_data(*track));
icmap_track_delete(*track);
(void)hdb_handle_put (&conn_info->track_db, track_handle);
}
hdb_destroy(&conn_info->track_db);
api->ipc_refcnt_dec(conn);
return (0);
}
static void message_handler_req_lib_cmap_set(void *conn, const void *message)
{
const struct req_lib_cmap_set *req_lib_cmap_set = message;
struct res_lib_cmap_set res_lib_cmap_set;
cs_error_t ret;
if (icmap_is_key_ro((char *)req_lib_cmap_set->key_name.value)) {
ret = CS_ERR_ACCESS;
} else {
ret = icmap_set((char *)req_lib_cmap_set->key_name.value, &req_lib_cmap_set->value,
req_lib_cmap_set->value_len, req_lib_cmap_set->type);
}
memset(&res_lib_cmap_set, 0, sizeof(res_lib_cmap_set));
res_lib_cmap_set.header.size = sizeof(res_lib_cmap_set);
res_lib_cmap_set.header.id = MESSAGE_RES_CMAP_SET;
res_lib_cmap_set.header.error = ret;
api->ipc_response_send(conn, &res_lib_cmap_set, sizeof(res_lib_cmap_set));
}
static void message_handler_req_lib_cmap_delete(void *conn, const void *message)
{
const struct req_lib_cmap_set *req_lib_cmap_set = message;
struct res_lib_cmap_delete res_lib_cmap_delete;
cs_error_t ret;
if (icmap_is_key_ro((char *)req_lib_cmap_set->key_name.value)) {
ret = CS_ERR_ACCESS;
} else {
ret = icmap_delete((char *)req_lib_cmap_set->key_name.value);
}
memset(&res_lib_cmap_delete, 0, sizeof(res_lib_cmap_delete));
res_lib_cmap_delete.header.size = sizeof(res_lib_cmap_delete);
res_lib_cmap_delete.header.id = MESSAGE_RES_CMAP_DELETE;
res_lib_cmap_delete.header.error = ret;
api->ipc_response_send(conn, &res_lib_cmap_delete, sizeof(res_lib_cmap_delete));
}
static void message_handler_req_lib_cmap_get(void *conn, const void *message)
{
const struct req_lib_cmap_get *req_lib_cmap_get = message;
struct res_lib_cmap_get *res_lib_cmap_get;
struct res_lib_cmap_get error_res_lib_cmap_get;
cs_error_t ret;
size_t value_len;
size_t res_lib_cmap_get_size;
icmap_value_types_t type;
void *value;
value_len = req_lib_cmap_get->value_len;
res_lib_cmap_get_size = sizeof(*res_lib_cmap_get) + value_len;
res_lib_cmap_get = malloc(res_lib_cmap_get_size);
if (res_lib_cmap_get == NULL) {
ret = CS_ERR_NO_MEMORY;
goto error_exit;
}
memset(res_lib_cmap_get, 0, res_lib_cmap_get_size);
if (value_len > 0) {
value = res_lib_cmap_get->value;
} else {
value = NULL;
}
ret = icmap_get((char *)req_lib_cmap_get->key_name.value,
value,
&value_len,
&type);
if (ret != CS_OK) {
free(res_lib_cmap_get);
goto error_exit;
}
res_lib_cmap_get->header.size = res_lib_cmap_get_size;
res_lib_cmap_get->header.id = MESSAGE_RES_CMAP_GET;
res_lib_cmap_get->header.error = ret;
res_lib_cmap_get->type = type;
res_lib_cmap_get->value_len = value_len;
api->ipc_response_send(conn, res_lib_cmap_get, res_lib_cmap_get_size);
free(res_lib_cmap_get);
return ;
error_exit:
memset(&error_res_lib_cmap_get, 0, sizeof(error_res_lib_cmap_get));
error_res_lib_cmap_get.header.size = sizeof(error_res_lib_cmap_get);
error_res_lib_cmap_get.header.id = MESSAGE_RES_CMAP_GET;
error_res_lib_cmap_get.header.error = ret;
api->ipc_response_send(conn, &error_res_lib_cmap_get, sizeof(error_res_lib_cmap_get));
}
static void message_handler_req_lib_cmap_adjust_int(void *conn, const void *message)
{
const struct req_lib_cmap_adjust_int *req_lib_cmap_adjust_int = message;
struct res_lib_cmap_adjust_int res_lib_cmap_adjust_int;
cs_error_t ret;
if (icmap_is_key_ro((char *)req_lib_cmap_adjust_int->key_name.value)) {
ret = CS_ERR_ACCESS;
} else {
ret = icmap_adjust_int((char *)req_lib_cmap_adjust_int->key_name.value,
req_lib_cmap_adjust_int->step);
}
memset(&res_lib_cmap_adjust_int, 0, sizeof(res_lib_cmap_adjust_int));
res_lib_cmap_adjust_int.header.size = sizeof(res_lib_cmap_adjust_int);
res_lib_cmap_adjust_int.header.id = MESSAGE_RES_CMAP_ADJUST_INT;
res_lib_cmap_adjust_int.header.error = ret;
api->ipc_response_send(conn, &res_lib_cmap_adjust_int, sizeof(res_lib_cmap_adjust_int));
}
static void message_handler_req_lib_cmap_iter_init(void *conn, const void *message)
{
const struct req_lib_cmap_iter_init *req_lib_cmap_iter_init = message;
struct res_lib_cmap_iter_init res_lib_cmap_iter_init;
cs_error_t ret;
icmap_iter_t iter;
icmap_iter_t *hdb_iter;
cmap_iter_handle_t handle = 0ULL;
const char *prefix;
struct cmap_conn_info *conn_info = (struct cmap_conn_info *)api->ipc_private_data_get (conn);
if (req_lib_cmap_iter_init->prefix.length > 0) {
prefix = (char *)req_lib_cmap_iter_init->prefix.value;
} else {
prefix = NULL;
}
iter = icmap_iter_init(prefix);
if (iter == NULL) {
ret = CS_ERR_NO_SECTIONS;
goto reply_send;
}
ret = hdb_error_to_cs(hdb_handle_create(&conn_info->iter_db, sizeof(iter), &handle));
if (ret != CS_OK) {
goto reply_send;
}
ret = hdb_error_to_cs(hdb_handle_get(&conn_info->iter_db, handle, (void *)&hdb_iter));
if (ret != CS_OK) {
goto reply_send;
}
*hdb_iter = iter;
(void)hdb_handle_put (&conn_info->iter_db, handle);
reply_send:
memset(&res_lib_cmap_iter_init, 0, sizeof(res_lib_cmap_iter_init));
res_lib_cmap_iter_init.header.size = sizeof(res_lib_cmap_iter_init);
res_lib_cmap_iter_init.header.id = MESSAGE_RES_CMAP_ITER_INIT;
res_lib_cmap_iter_init.header.error = ret;
res_lib_cmap_iter_init.iter_handle = handle;
api->ipc_response_send(conn, &res_lib_cmap_iter_init, sizeof(res_lib_cmap_iter_init));
}
static void message_handler_req_lib_cmap_iter_next(void *conn, const void *message)
{
const struct req_lib_cmap_iter_next *req_lib_cmap_iter_next = message;
struct res_lib_cmap_iter_next res_lib_cmap_iter_next;
cs_error_t ret;
icmap_iter_t *iter;
size_t value_len;
icmap_value_types_t type;
const char *res = NULL;
struct cmap_conn_info *conn_info = (struct cmap_conn_info *)api->ipc_private_data_get (conn);
ret = hdb_error_to_cs(hdb_handle_get(&conn_info->iter_db,
req_lib_cmap_iter_next->iter_handle, (void *)&iter));
if (ret != CS_OK) {
goto reply_send;
}
res = icmap_iter_next(*iter, &value_len, &type);
if (res == NULL) {
ret = CS_ERR_NO_SECTIONS;
}
(void)hdb_handle_put (&conn_info->iter_db, req_lib_cmap_iter_next->iter_handle);
reply_send:
memset(&res_lib_cmap_iter_next, 0, sizeof(res_lib_cmap_iter_next));
res_lib_cmap_iter_next.header.size = sizeof(res_lib_cmap_iter_next);
res_lib_cmap_iter_next.header.id = MESSAGE_RES_CMAP_ITER_NEXT;
res_lib_cmap_iter_next.header.error = ret;
if (res != NULL) {
res_lib_cmap_iter_next.value_len = value_len;
res_lib_cmap_iter_next.type = type;
memcpy(res_lib_cmap_iter_next.key_name.value, res, strlen(res));
res_lib_cmap_iter_next.key_name.length = strlen(res);
}
api->ipc_response_send(conn, &res_lib_cmap_iter_next, sizeof(res_lib_cmap_iter_next));
}
static void message_handler_req_lib_cmap_iter_finalize(void *conn, const void *message)
{
const struct req_lib_cmap_iter_finalize *req_lib_cmap_iter_finalize = message;
struct res_lib_cmap_iter_finalize res_lib_cmap_iter_finalize;
cs_error_t ret;
icmap_iter_t *iter;
struct cmap_conn_info *conn_info = (struct cmap_conn_info *)api->ipc_private_data_get (conn);
ret = hdb_error_to_cs(hdb_handle_get(&conn_info->iter_db,
req_lib_cmap_iter_finalize->iter_handle, (void *)&iter));
if (ret != CS_OK) {
goto reply_send;
}
icmap_iter_finalize(*iter);
(void)hdb_handle_destroy(&conn_info->iter_db, req_lib_cmap_iter_finalize->iter_handle);
(void)hdb_handle_put (&conn_info->iter_db, req_lib_cmap_iter_finalize->iter_handle);
reply_send:
memset(&res_lib_cmap_iter_finalize, 0, sizeof(res_lib_cmap_iter_finalize));
res_lib_cmap_iter_finalize.header.size = sizeof(res_lib_cmap_iter_finalize);
res_lib_cmap_iter_finalize.header.id = MESSAGE_RES_CMAP_ITER_FINALIZE;
res_lib_cmap_iter_finalize.header.error = ret;
api->ipc_response_send(conn, &res_lib_cmap_iter_finalize, sizeof(res_lib_cmap_iter_finalize));
}
static void cmap_notify_fn(int32_t event,
const char *key_name,
struct icmap_notify_value new_val,
struct icmap_notify_value old_val,
void *user_data)
{
struct cmap_track_user_data *cmap_track_user_data = (struct cmap_track_user_data *)user_data;
struct res_lib_cmap_notify_callback res_lib_cmap_notify_callback;
struct iovec iov[3];
memset(&res_lib_cmap_notify_callback, 0, sizeof(res_lib_cmap_notify_callback));
res_lib_cmap_notify_callback.header.size = sizeof(res_lib_cmap_notify_callback) + new_val.len + old_val.len;
res_lib_cmap_notify_callback.header.id = MESSAGE_RES_CMAP_NOTIFY_CALLBACK;
res_lib_cmap_notify_callback.header.error = CS_OK;
res_lib_cmap_notify_callback.new_value_type = new_val.type;
res_lib_cmap_notify_callback.old_value_type = old_val.type;
res_lib_cmap_notify_callback.new_value_len = new_val.len;
res_lib_cmap_notify_callback.old_value_len = old_val.len;
res_lib_cmap_notify_callback.event = event;
res_lib_cmap_notify_callback.key_name.length = strlen(key_name);
res_lib_cmap_notify_callback.track_inst_handle = cmap_track_user_data->track_inst_handle;
memcpy(res_lib_cmap_notify_callback.key_name.value, key_name, strlen(key_name));
iov[0].iov_base = (char *)&res_lib_cmap_notify_callback;
iov[0].iov_len = sizeof(res_lib_cmap_notify_callback);
iov[1].iov_base = (char *)new_val.data;
iov[1].iov_len = new_val.len;
iov[2].iov_base = (char *)old_val.data;
iov[2].iov_len = old_val.len;
api->ipc_dispatch_iov_send(cmap_track_user_data->conn, iov, 3);
}
static void message_handler_req_lib_cmap_track_add(void *conn, const void *message)
{
const struct req_lib_cmap_track_add *req_lib_cmap_track_add = message;
struct res_lib_cmap_track_add res_lib_cmap_track_add;
cs_error_t ret;
cmap_track_handle_t handle = 0;
icmap_track_t track = NULL;
icmap_track_t *hdb_track;
struct cmap_track_user_data *cmap_track_user_data;
const char *key_name;
struct cmap_conn_info *conn_info = (struct cmap_conn_info *)api->ipc_private_data_get (conn);
cmap_track_user_data = malloc(sizeof(*cmap_track_user_data));
if (cmap_track_user_data == NULL) {
ret = CS_ERR_NO_MEMORY;
goto reply_send;
}
memset(cmap_track_user_data, 0, sizeof(*cmap_track_user_data));
if (req_lib_cmap_track_add->key_name.length > 0) {
key_name = (char *)req_lib_cmap_track_add->key_name.value;
} else {
key_name = NULL;
}
ret = icmap_track_add(key_name,
req_lib_cmap_track_add->track_type,
cmap_notify_fn,
cmap_track_user_data,
&track);
if (ret != CS_OK) {
free(cmap_track_user_data);
goto reply_send;
}
ret = hdb_error_to_cs(hdb_handle_create(&conn_info->track_db, sizeof(track), &handle));
if (ret != CS_OK) {
free(cmap_track_user_data);
goto reply_send;
}
ret = hdb_error_to_cs(hdb_handle_get(&conn_info->track_db, handle, (void *)&hdb_track));
if (ret != CS_OK) {
free(cmap_track_user_data);
goto reply_send;
}
*hdb_track = track;
cmap_track_user_data->conn = conn;
cmap_track_user_data->track_handle = handle;
cmap_track_user_data->track_inst_handle = req_lib_cmap_track_add->track_inst_handle;
(void)hdb_handle_put (&conn_info->track_db, handle);
reply_send:
memset(&res_lib_cmap_track_add, 0, sizeof(res_lib_cmap_track_add));
res_lib_cmap_track_add.header.size = sizeof(res_lib_cmap_track_add);
res_lib_cmap_track_add.header.id = MESSAGE_RES_CMAP_TRACK_ADD;
res_lib_cmap_track_add.header.error = ret;
res_lib_cmap_track_add.track_handle = handle;
api->ipc_response_send(conn, &res_lib_cmap_track_add, sizeof(res_lib_cmap_track_add));
}
static void message_handler_req_lib_cmap_track_delete(void *conn, const void *message)
{
const struct req_lib_cmap_track_delete *req_lib_cmap_track_delete = message;
struct res_lib_cmap_track_delete res_lib_cmap_track_delete;
cs_error_t ret;
icmap_track_t *track;
struct cmap_conn_info *conn_info = (struct cmap_conn_info *)api->ipc_private_data_get (conn);
uint64_t track_inst_handle = 0;
ret = hdb_error_to_cs(hdb_handle_get(&conn_info->track_db,
req_lib_cmap_track_delete->track_handle, (void *)&track));
if (ret != CS_OK) {
goto reply_send;
}
track_inst_handle = ((struct cmap_track_user_data *)icmap_track_get_user_data(*track))->track_inst_handle;
free(icmap_track_get_user_data(*track));
ret = icmap_track_delete(*track);
(void)hdb_handle_put (&conn_info->track_db, req_lib_cmap_track_delete->track_handle);
(void)hdb_handle_destroy(&conn_info->track_db, req_lib_cmap_track_delete->track_handle);
reply_send:
memset(&res_lib_cmap_track_delete, 0, sizeof(res_lib_cmap_track_delete));
res_lib_cmap_track_delete.header.size = sizeof(res_lib_cmap_track_delete);
res_lib_cmap_track_delete.header.id = MESSAGE_RES_CMAP_TRACK_DELETE;
res_lib_cmap_track_delete.header.error = ret;
res_lib_cmap_track_delete.track_inst_handle = track_inst_handle;
api->ipc_response_send(conn, &res_lib_cmap_track_delete, sizeof(res_lib_cmap_track_delete));
}
diff --git a/exec/cpg.c b/exec/cpg.c
index 209aaaa3..95eb8472 100644
--- a/exec/cpg.c
+++ b/exec/cpg.c
@@ -1,2200 +1,2197 @@
/*
* Copyright (c) 2006-2012 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Christine Caulfield (ccaulfie@redhat.com)
* Author: Jan Friesse (jfriesse@redhat.com)
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <config.h>
#ifdef HAVE_ALLOCA_H
#include <alloca.h>
#endif
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <sys/ioctl.h>
#include <netinet/in.h>
#include <sys/uio.h>
#include <unistd.h>
#include <fcntl.h>
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
#include <time.h>
#include <assert.h>
#include <unistd.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <sys/mman.h>
#include <qb/qbmap.h>
#include <corosync/corotypes.h>
#include <qb/qbipc_common.h>
#include <corosync/corodefs.h>
#include <corosync/list.h>
#include <corosync/logsys.h>
#include <corosync/coroapi.h>
#include <corosync/cpg.h>
#include <corosync/ipc_cpg.h>
#ifndef MAP_ANONYMOUS
#define MAP_ANONYMOUS MAP_ANON
#endif
#include "service.h"
LOGSYS_DECLARE_SUBSYS ("CPG");
#define GROUP_HASH_SIZE 32
enum cpg_message_req_types {
MESSAGE_REQ_EXEC_CPG_PROCJOIN = 0,
MESSAGE_REQ_EXEC_CPG_PROCLEAVE = 1,
MESSAGE_REQ_EXEC_CPG_JOINLIST = 2,
MESSAGE_REQ_EXEC_CPG_MCAST = 3,
MESSAGE_REQ_EXEC_CPG_DOWNLIST_OLD = 4,
MESSAGE_REQ_EXEC_CPG_DOWNLIST = 5
};
struct zcb_mapped {
struct list_head list;
void *addr;
size_t size;
};
/*
* state` exec deliver
* match group name, pid -> if matched deliver for YES:
* XXX indicates impossible state
*
* join leave mcast
* UNJOINED XXX XXX NO
* LEAVE_STARTED XXX YES(unjoined_enter) YES
* JOIN_STARTED YES(join_started_enter) XXX NO
* JOIN_COMPLETED XXX NO YES
*
* join_started_enter
* set JOIN_COMPLETED
* add entry to process_info list
* unjoined_enter
* set UNJOINED
* delete entry from process_info list
*
*
* library accept join error codes
* UNJOINED YES(CS_OK) set JOIN_STARTED
* LEAVE_STARTED NO(CS_ERR_BUSY)
* JOIN_STARTED NO(CS_ERR_EXIST)
* JOIN_COMPlETED NO(CS_ERR_EXIST)
*
* library accept leave error codes
* UNJOINED NO(CS_ERR_NOT_EXIST)
* LEAVE_STARTED NO(CS_ERR_NOT_EXIST)
* JOIN_STARTED NO(CS_ERR_BUSY)
* JOIN_COMPLETED YES(CS_OK) set LEAVE_STARTED
*
* library accept mcast
* UNJOINED NO(CS_ERR_NOT_EXIST)
* LEAVE_STARTED NO(CS_ERR_NOT_EXIST)
* JOIN_STARTED YES(CS_OK)
* JOIN_COMPLETED YES(CS_OK)
*/
enum cpd_state {
CPD_STATE_UNJOINED,
CPD_STATE_LEAVE_STARTED,
CPD_STATE_JOIN_STARTED,
CPD_STATE_JOIN_COMPLETED
};
enum cpg_sync_state {
CPGSYNC_DOWNLIST,
CPGSYNC_JOINLIST
};
enum cpg_downlist_state_e {
CPG_DOWNLIST_NONE,
CPG_DOWNLIST_WAITING_FOR_MESSAGES,
CPG_DOWNLIST_APPLYING,
};
static enum cpg_downlist_state_e downlist_state;
static struct list_head downlist_messages_head;
static struct list_head joinlist_messages_head;
struct cpg_pd {
void *conn;
mar_cpg_name_t group_name;
uint32_t pid;
enum cpd_state cpd_state;
unsigned int flags;
int initial_totem_conf_sent;
struct list_head list;
struct list_head iteration_instance_list_head;
struct list_head zcb_mapped_list_head;
};
struct cpg_iteration_instance {
hdb_handle_t handle;
struct list_head list;
struct list_head items_list_head; /* List of process_info */
struct list_head *current_pointer;
};
DECLARE_HDB_DATABASE(cpg_iteration_handle_t_db,NULL);
DECLARE_LIST_INIT(cpg_pd_list_head);
static unsigned int my_member_list[PROCESSOR_COUNT_MAX];
static unsigned int my_member_list_entries;
static unsigned int my_old_member_list[PROCESSOR_COUNT_MAX];
static unsigned int my_old_member_list_entries = 0;
static struct corosync_api_v1 *api = NULL;
static enum cpg_sync_state my_sync_state = CPGSYNC_DOWNLIST;
static mar_cpg_ring_id_t last_sync_ring_id;
struct process_info {
unsigned int nodeid;
uint32_t pid;
mar_cpg_name_t group;
struct list_head list; /* on the group_info members list */
};
DECLARE_LIST_INIT(process_info_list_head);
struct join_list_entry {
uint32_t pid;
mar_cpg_name_t group_name;
};
/*
* Service Interfaces required by service_message_handler struct
*/
static char *cpg_exec_init_fn (struct corosync_api_v1 *);
static int cpg_lib_init_fn (void *conn);
static int cpg_lib_exit_fn (void *conn);
static void message_handler_req_exec_cpg_procjoin (
const void *message,
unsigned int nodeid);
static void message_handler_req_exec_cpg_procleave (
const void *message,
unsigned int nodeid);
static void message_handler_req_exec_cpg_joinlist (
const void *message,
unsigned int nodeid);
static void message_handler_req_exec_cpg_mcast (
const void *message,
unsigned int nodeid);
static void message_handler_req_exec_cpg_downlist_old (
const void *message,
unsigned int nodeid);
static void message_handler_req_exec_cpg_downlist (
const void *message,
unsigned int nodeid);
static void exec_cpg_procjoin_endian_convert (void *msg);
static void exec_cpg_joinlist_endian_convert (void *msg);
static void exec_cpg_mcast_endian_convert (void *msg);
static void exec_cpg_downlist_endian_convert_old (void *msg);
static void exec_cpg_downlist_endian_convert (void *msg);
static void message_handler_req_lib_cpg_join (void *conn, const void *message);
static void message_handler_req_lib_cpg_leave (void *conn, const void *message);
static void message_handler_req_lib_cpg_finalize (void *conn, const void *message);
static void message_handler_req_lib_cpg_mcast (void *conn, const void *message);
static void message_handler_req_lib_cpg_membership (void *conn,
const void *message);
static void message_handler_req_lib_cpg_local_get (void *conn,
const void *message);
static void message_handler_req_lib_cpg_iteration_initialize (
void *conn,
const void *message);
static void message_handler_req_lib_cpg_iteration_next (
void *conn,
const void *message);
static void message_handler_req_lib_cpg_iteration_finalize (
void *conn,
const void *message);
static void message_handler_req_lib_cpg_zc_alloc (
void *conn,
const void *message);
static void message_handler_req_lib_cpg_zc_free (
void *conn,
const void *message);
static void message_handler_req_lib_cpg_zc_execute (
void *conn,
const void *message);
static int cpg_node_joinleave_send (unsigned int pid, const mar_cpg_name_t *group_name, int fn, int reason);
static int cpg_exec_send_downlist(void);
static int cpg_exec_send_joinlist(void);
static void downlist_messages_delete (void);
static void downlist_master_choose_and_send (void);
static void joinlist_inform_clients (void);
static void joinlist_messages_delete (void);
static void cpg_sync_init (
const unsigned int *trans_list,
size_t trans_list_entries,
const unsigned int *member_list,
size_t member_list_entries,
const struct memb_ring_id *ring_id);
static int cpg_sync_process (void);
static void cpg_sync_activate (void);
static void cpg_sync_abort (void);
static void do_proc_join(
const mar_cpg_name_t *name,
uint32_t pid,
unsigned int nodeid,
int reason);
static int notify_lib_totem_membership (
void *conn,
int member_list_entries,
const unsigned int *member_list);
static inline int zcb_all_free (
struct cpg_pd *cpd);
static char *cpg_print_group_name (
const mar_cpg_name_t *group);
/*
* Library Handler Definition
*/
static struct corosync_lib_handler cpg_lib_engine[] =
{
{ /* 0 - MESSAGE_REQ_CPG_JOIN */
.lib_handler_fn = message_handler_req_lib_cpg_join,
.flow_control = CS_LIB_FLOW_CONTROL_REQUIRED
},
{ /* 1 - MESSAGE_REQ_CPG_LEAVE */
.lib_handler_fn = message_handler_req_lib_cpg_leave,
.flow_control = CS_LIB_FLOW_CONTROL_REQUIRED
},
{ /* 2 - MESSAGE_REQ_CPG_MCAST */
.lib_handler_fn = message_handler_req_lib_cpg_mcast,
.flow_control = CS_LIB_FLOW_CONTROL_REQUIRED
},
{ /* 3 - MESSAGE_REQ_CPG_MEMBERSHIP */
.lib_handler_fn = message_handler_req_lib_cpg_membership,
.flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED
},
{ /* 4 - MESSAGE_REQ_CPG_LOCAL_GET */
.lib_handler_fn = message_handler_req_lib_cpg_local_get,
.flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED
},
{ /* 5 - MESSAGE_REQ_CPG_ITERATIONINITIALIZE */
.lib_handler_fn = message_handler_req_lib_cpg_iteration_initialize,
.flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED
},
{ /* 6 - MESSAGE_REQ_CPG_ITERATIONNEXT */
.lib_handler_fn = message_handler_req_lib_cpg_iteration_next,
.flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED
},
{ /* 7 - MESSAGE_REQ_CPG_ITERATIONFINALIZE */
.lib_handler_fn = message_handler_req_lib_cpg_iteration_finalize,
.flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED
},
{ /* 8 - MESSAGE_REQ_CPG_FINALIZE */
.lib_handler_fn = message_handler_req_lib_cpg_finalize,
.flow_control = CS_LIB_FLOW_CONTROL_REQUIRED
},
{ /* 9 */
.lib_handler_fn = message_handler_req_lib_cpg_zc_alloc,
.flow_control = CS_LIB_FLOW_CONTROL_REQUIRED
},
{ /* 10 */
.lib_handler_fn = message_handler_req_lib_cpg_zc_free,
.flow_control = CS_LIB_FLOW_CONTROL_REQUIRED
},
{ /* 11 */
.lib_handler_fn = message_handler_req_lib_cpg_zc_execute,
.flow_control = CS_LIB_FLOW_CONTROL_REQUIRED
},
};
static struct corosync_exec_handler cpg_exec_engine[] =
{
{ /* 0 - MESSAGE_REQ_EXEC_CPG_PROCJOIN */
.exec_handler_fn = message_handler_req_exec_cpg_procjoin,
.exec_endian_convert_fn = exec_cpg_procjoin_endian_convert
},
{ /* 1 - MESSAGE_REQ_EXEC_CPG_PROCLEAVE */
.exec_handler_fn = message_handler_req_exec_cpg_procleave,
.exec_endian_convert_fn = exec_cpg_procjoin_endian_convert
},
{ /* 2 - MESSAGE_REQ_EXEC_CPG_JOINLIST */
.exec_handler_fn = message_handler_req_exec_cpg_joinlist,
.exec_endian_convert_fn = exec_cpg_joinlist_endian_convert
},
{ /* 3 - MESSAGE_REQ_EXEC_CPG_MCAST */
.exec_handler_fn = message_handler_req_exec_cpg_mcast,
.exec_endian_convert_fn = exec_cpg_mcast_endian_convert
},
{ /* 4 - MESSAGE_REQ_EXEC_CPG_DOWNLIST_OLD */
.exec_handler_fn = message_handler_req_exec_cpg_downlist_old,
.exec_endian_convert_fn = exec_cpg_downlist_endian_convert_old
},
{ /* 5 - MESSAGE_REQ_EXEC_CPG_DOWNLIST */
.exec_handler_fn = message_handler_req_exec_cpg_downlist,
.exec_endian_convert_fn = exec_cpg_downlist_endian_convert
},
};
struct corosync_service_engine cpg_service_engine = {
.name = "corosync cluster closed process group service v1.01",
.id = CPG_SERVICE,
.priority = 1,
.private_data_size = sizeof (struct cpg_pd),
.flow_control = CS_LIB_FLOW_CONTROL_REQUIRED,
.allow_inquorate = CS_LIB_ALLOW_INQUORATE,
.lib_init_fn = cpg_lib_init_fn,
.lib_exit_fn = cpg_lib_exit_fn,
.lib_engine = cpg_lib_engine,
.lib_engine_count = sizeof (cpg_lib_engine) / sizeof (struct corosync_lib_handler),
.exec_init_fn = cpg_exec_init_fn,
.exec_dump_fn = NULL,
.exec_engine = cpg_exec_engine,
.exec_engine_count = sizeof (cpg_exec_engine) / sizeof (struct corosync_exec_handler),
.sync_init = cpg_sync_init,
.sync_process = cpg_sync_process,
.sync_activate = cpg_sync_activate,
.sync_abort = cpg_sync_abort
};
struct corosync_service_engine *cpg_get_service_engine_ver0 (void)
{
return (&cpg_service_engine);
}
struct req_exec_cpg_procjoin {
struct qb_ipc_request_header header __attribute__((aligned(8)));
mar_cpg_name_t group_name __attribute__((aligned(8)));
mar_uint32_t pid __attribute__((aligned(8)));
mar_uint32_t reason __attribute__((aligned(8)));
};
struct req_exec_cpg_mcast {
struct qb_ipc_request_header header __attribute__((aligned(8)));
mar_cpg_name_t group_name __attribute__((aligned(8)));
mar_uint32_t msglen __attribute__((aligned(8)));
mar_uint32_t pid __attribute__((aligned(8)));
mar_message_source_t source __attribute__((aligned(8)));
mar_uint8_t message[] __attribute__((aligned(8)));
};
struct req_exec_cpg_downlist_old {
struct qb_ipc_request_header header __attribute__((aligned(8)));
mar_uint32_t left_nodes __attribute__((aligned(8)));
mar_uint32_t nodeids[PROCESSOR_COUNT_MAX] __attribute__((aligned(8)));
};
struct req_exec_cpg_downlist {
struct qb_ipc_request_header header __attribute__((aligned(8)));
/* merge decisions */
mar_uint32_t old_members __attribute__((aligned(8)));
/* downlist below */
mar_uint32_t left_nodes __attribute__((aligned(8)));
mar_uint32_t nodeids[PROCESSOR_COUNT_MAX] __attribute__((aligned(8)));
};
struct downlist_msg {
mar_uint32_t sender_nodeid;
mar_uint32_t old_members __attribute__((aligned(8)));
mar_uint32_t left_nodes __attribute__((aligned(8)));
mar_uint32_t nodeids[PROCESSOR_COUNT_MAX] __attribute__((aligned(8)));
struct list_head list;
};
struct joinlist_msg {
mar_uint32_t sender_nodeid;
uint32_t pid;
mar_cpg_name_t group_name;
struct list_head list;
};
static struct req_exec_cpg_downlist g_req_exec_cpg_downlist;
/*
* Function print group name. It's not reentrant
*/
static char *cpg_print_group_name(const mar_cpg_name_t *group)
{
static char res[CPG_MAX_NAME_LENGTH * 4 + 1];
int dest_pos = 0;
char c;
int i;
for (i = 0; i < group->length; i++) {
c = group->value[i];
if (c >= ' ' && c < 0x7f && c != '\\') {
res[dest_pos++] = c;
} else {
if (c == '\\') {
res[dest_pos++] = '\\';
res[dest_pos++] = '\\';
} else {
snprintf(res + dest_pos, sizeof(res) - dest_pos, "\\x%02X", c);
dest_pos += 4;
}
}
}
res[dest_pos] = 0;
return (res);
}
static void cpg_sync_init (
const unsigned int *trans_list,
size_t trans_list_entries,
const unsigned int *member_list,
size_t member_list_entries,
const struct memb_ring_id *ring_id)
{
int entries;
int i, j;
int found;
my_sync_state = CPGSYNC_DOWNLIST;
memcpy (my_member_list, member_list, member_list_entries *
sizeof (unsigned int));
my_member_list_entries = member_list_entries;
last_sync_ring_id.nodeid = ring_id->rep.nodeid;
last_sync_ring_id.seq = ring_id->seq;
downlist_state = CPG_DOWNLIST_WAITING_FOR_MESSAGES;
entries = 0;
/*
* Determine list of nodeids for downlist message
*/
for (i = 0; i < my_old_member_list_entries; i++) {
found = 0;
for (j = 0; j < trans_list_entries; j++) {
if (my_old_member_list[i] == trans_list[j]) {
found = 1;
break;
}
}
if (found == 0) {
g_req_exec_cpg_downlist.nodeids[entries++] =
my_old_member_list[i];
}
}
g_req_exec_cpg_downlist.left_nodes = entries;
}
static int cpg_sync_process (void)
{
int res = -1;
if (my_sync_state == CPGSYNC_DOWNLIST) {
res = cpg_exec_send_downlist();
if (res == -1) {
return (-1);
}
my_sync_state = CPGSYNC_JOINLIST;
}
if (my_sync_state == CPGSYNC_JOINLIST) {
res = cpg_exec_send_joinlist();
}
return (res);
}
static void cpg_sync_activate (void)
{
memcpy (my_old_member_list, my_member_list,
my_member_list_entries * sizeof (unsigned int));
my_old_member_list_entries = my_member_list_entries;
if (downlist_state == CPG_DOWNLIST_WAITING_FOR_MESSAGES) {
downlist_master_choose_and_send ();
}
joinlist_inform_clients ();
downlist_messages_delete ();
downlist_state = CPG_DOWNLIST_NONE;
joinlist_messages_delete ();
notify_lib_totem_membership (NULL, my_member_list_entries, my_member_list);
}
static void cpg_sync_abort (void)
{
downlist_state = CPG_DOWNLIST_NONE;
downlist_messages_delete ();
joinlist_messages_delete ();
}
static int notify_lib_totem_membership (
void *conn,
int member_list_entries,
const unsigned int *member_list)
{
struct list_head *iter;
char *buf;
int size;
struct res_lib_cpg_totem_confchg_callback *res;
size = sizeof(struct res_lib_cpg_totem_confchg_callback) +
sizeof(mar_uint32_t) * (member_list_entries);
buf = alloca(size);
if (!buf)
return CS_ERR_LIBRARY;
res = (struct res_lib_cpg_totem_confchg_callback *)buf;
res->member_list_entries = member_list_entries;
res->header.size = size;
res->header.id = MESSAGE_RES_CPG_TOTEM_CONFCHG_CALLBACK;
res->header.error = CS_OK;
memcpy (&res->ring_id, &last_sync_ring_id, sizeof (mar_cpg_ring_id_t));
memcpy (res->member_list, member_list, res->member_list_entries * sizeof (mar_uint32_t));
if (conn == NULL) {
for (iter = cpg_pd_list_head.next; iter != &cpg_pd_list_head; iter = iter->next) {
struct cpg_pd *cpg_pd = list_entry (iter, struct cpg_pd, list);
api->ipc_dispatch_send (cpg_pd->conn, buf, size);
}
} else {
api->ipc_dispatch_send (conn, buf, size);
}
return CS_OK;
}
static int notify_lib_joinlist(
const mar_cpg_name_t *group_name,
void *conn,
int joined_list_entries,
mar_cpg_address_t *joined_list,
int left_list_entries,
mar_cpg_address_t *left_list,
int id)
{
int size;
char *buf;
struct list_head *iter;
int count;
struct res_lib_cpg_confchg_callback *res;
mar_cpg_address_t *retgi;
count = 0;
for (iter = process_info_list_head.next; iter != &process_info_list_head; iter = iter->next) {
struct process_info *pi = list_entry (iter, struct process_info, list);
if (mar_name_compare (&pi->group, group_name) == 0) {
int i;
int founded = 0;
for (i = 0; i < left_list_entries; i++) {
if (left_list[i].nodeid == pi->nodeid && left_list[i].pid == pi->pid) {
founded++;
}
}
if (!founded)
count++;
}
}
size = sizeof(struct res_lib_cpg_confchg_callback) +
sizeof(mar_cpg_address_t) * (count + left_list_entries + joined_list_entries);
buf = alloca(size);
if (!buf)
return CS_ERR_LIBRARY;
res = (struct res_lib_cpg_confchg_callback *)buf;
res->joined_list_entries = joined_list_entries;
res->left_list_entries = left_list_entries;
res->member_list_entries = count;
retgi = res->member_list;
res->header.size = size;
res->header.id = id;
res->header.error = CS_OK;
memcpy(&res->group_name, group_name, sizeof(mar_cpg_name_t));
for (iter = process_info_list_head.next; iter != &process_info_list_head; iter = iter->next) {
struct process_info *pi=list_entry (iter, struct process_info, list);
if (mar_name_compare (&pi->group, group_name) == 0) {
int i;
int founded = 0;
for (i = 0;i < left_list_entries; i++) {
if (left_list[i].nodeid == pi->nodeid && left_list[i].pid == pi->pid) {
founded++;
}
}
if (!founded) {
retgi->nodeid = pi->nodeid;
retgi->pid = pi->pid;
retgi++;
}
}
}
if (left_list_entries) {
memcpy (retgi, left_list, left_list_entries * sizeof(mar_cpg_address_t));
retgi += left_list_entries;
}
if (joined_list_entries) {
memcpy (retgi, joined_list, joined_list_entries * sizeof(mar_cpg_address_t));
retgi += joined_list_entries;
}
if (conn) {
api->ipc_dispatch_send (conn, buf, size);
} else {
for (iter = cpg_pd_list_head.next; iter != &cpg_pd_list_head; iter = iter->next) {
struct cpg_pd *cpd = list_entry (iter, struct cpg_pd, list);
if (mar_name_compare (&cpd->group_name, group_name) == 0) {
assert (joined_list_entries <= 1);
if (joined_list_entries) {
if (joined_list[0].pid == cpd->pid &&
joined_list[0].nodeid == api->totem_nodeid_get()) {
cpd->cpd_state = CPD_STATE_JOIN_COMPLETED;
}
}
if (cpd->cpd_state == CPD_STATE_JOIN_COMPLETED ||
cpd->cpd_state == CPD_STATE_LEAVE_STARTED) {
api->ipc_dispatch_send (cpd->conn, buf, size);
}
if (left_list_entries) {
if (left_list[0].pid == cpd->pid &&
left_list[0].nodeid == api->totem_nodeid_get() &&
left_list[0].reason == CONFCHG_CPG_REASON_LEAVE) {
cpd->pid = 0;
memset (&cpd->group_name, 0, sizeof(cpd->group_name));
cpd->cpd_state = CPD_STATE_UNJOINED;
}
}
}
}
}
/*
* Traverse thru cpds and send totem membership for cpd, where it is not send yet
*/
for (iter = cpg_pd_list_head.next; iter != &cpg_pd_list_head; iter = iter->next) {
struct cpg_pd *cpd = list_entry (iter, struct cpg_pd, list);
if ((cpd->flags & CPG_MODEL_V1_DELIVER_INITIAL_TOTEM_CONF) && (cpd->initial_totem_conf_sent == 0)) {
cpd->initial_totem_conf_sent = 1;
notify_lib_totem_membership (cpd->conn, my_old_member_list_entries, my_old_member_list);
}
}
return CS_OK;
}
static void downlist_log(const char *msg, struct downlist_msg* dl)
{
log_printf (LOG_DEBUG,
"%s: sender %s; members(old:%d left:%d)",
msg,
api->totem_ifaces_print(dl->sender_nodeid),
dl->old_members,
dl->left_nodes);
}
static struct downlist_msg* downlist_master_choose (void)
{
struct downlist_msg *cmp;
struct downlist_msg *best = NULL;
struct list_head *iter;
uint32_t cmp_members;
uint32_t best_members;
uint32_t i;
int ignore_msg;
for (iter = downlist_messages_head.next;
iter != &downlist_messages_head;
iter = iter->next) {
cmp = list_entry(iter, struct downlist_msg, list);
downlist_log("comparing", cmp);
ignore_msg = 0;
for (i = 0; i < cmp->left_nodes; i++) {
if (cmp->nodeids[i] == api->totem_nodeid_get()) {
log_printf (LOG_DEBUG, "Ignoring this entry because I'm in the left list\n");
ignore_msg = 1;
break;
}
}
if (ignore_msg) {
continue ;
}
if (best == NULL) {
best = cmp;
continue;
}
best_members = best->old_members - best->left_nodes;
cmp_members = cmp->old_members - cmp->left_nodes;
if (cmp_members > best_members) {
best = cmp;
} else if (cmp_members == best_members) {
if (cmp->old_members > best->old_members) {
best = cmp;
} else if (cmp->old_members == best->old_members) {
if (cmp->sender_nodeid < best->sender_nodeid) {
best = cmp;
}
}
}
}
assert (best != NULL);
return best;
}
static void downlist_master_choose_and_send (void)
{
struct downlist_msg *stored_msg;
struct list_head *iter;
struct process_info *left_pi;
qb_map_t *group_map;
struct cpg_name cpg_group;
mar_cpg_name_t group;
struct confchg_data{
struct cpg_name cpg_group;
mar_cpg_address_t left_list[CPG_MEMBERS_MAX];
int left_list_entries;
struct list_head list;
} *pcd;
qb_map_iter_t *miter;
int i, size;
downlist_state = CPG_DOWNLIST_APPLYING;
stored_msg = downlist_master_choose ();
if (!stored_msg) {
log_printf (LOGSYS_LEVEL_DEBUG, "NO chosen downlist");
return;
}
downlist_log("chosen downlist", stored_msg);
group_map = qb_skiplist_create();
/*
* only the cpg groups included in left nodes should receive
* confchg event, so we will collect these cpg groups and
* relative left_lists here.
*/
for (iter = process_info_list_head.next; iter != &process_info_list_head; ) {
struct process_info *pi = list_entry(iter, struct process_info, list);
iter = iter->next;
left_pi = NULL;
for (i = 0; i < stored_msg->left_nodes; i++) {
if (pi->nodeid == stored_msg->nodeids[i]) {
left_pi = pi;
break;
}
}
if (left_pi) {
marshall_from_mar_cpg_name_t(&cpg_group, &left_pi->group);
cpg_group.value[cpg_group.length] = 0;
pcd = (struct confchg_data *)qb_map_get(group_map, cpg_group.value);
if (pcd == NULL) {
pcd = (struct confchg_data *)calloc(1, sizeof(struct confchg_data));
memcpy(&pcd->cpg_group, &cpg_group, sizeof(struct cpg_name));
qb_map_put(group_map, pcd->cpg_group.value, pcd);
}
size = pcd->left_list_entries;
pcd->left_list[size].nodeid = left_pi->nodeid;
pcd->left_list[size].pid = left_pi->pid;
pcd->left_list[size].reason = CONFCHG_CPG_REASON_NODEDOWN;
pcd->left_list_entries++;
list_del (&left_pi->list);
free (left_pi);
}
}
/* send only one confchg event per cpg group */
miter = qb_map_iter_create(group_map);
while (qb_map_iter_next(miter, (void **)&pcd)) {
marshall_to_mar_cpg_name_t(&group, &pcd->cpg_group);
log_printf (LOG_DEBUG, "left_list_entries:%d", pcd->left_list_entries);
for (i=0; i<pcd->left_list_entries; i++) {
log_printf (LOG_DEBUG, "left_list[%d] group:%s, ip:%s, pid:%d",
i, cpg_print_group_name(&group),
(char*)api->totem_ifaces_print(pcd->left_list[i].nodeid),
pcd->left_list[i].pid);
}
/* send confchg event */
notify_lib_joinlist(&group, NULL,
0, NULL,
pcd->left_list_entries,
pcd->left_list,
MESSAGE_RES_CPG_CONFCHG_CALLBACK);
free(pcd);
}
qb_map_iter_free(miter);
qb_map_destroy(group_map);
}
static void joinlist_inform_clients (void)
{
struct joinlist_msg *stored_msg;
struct list_head *iter;
unsigned int i;
i = 0;
for (iter = joinlist_messages_head.next;
iter != &joinlist_messages_head;
iter = iter->next) {
stored_msg = list_entry(iter, struct joinlist_msg, list);
log_printf (LOG_DEBUG, "joinlist_messages[%u] group:%s, ip:%s, pid:%d",
i++, cpg_print_group_name(&stored_msg->group_name),
(char*)api->totem_ifaces_print(stored_msg->sender_nodeid),
stored_msg->pid);
/* Ignore our own messages */
if (stored_msg->sender_nodeid == api->totem_nodeid_get()) {
continue ;
}
do_proc_join (&stored_msg->group_name, stored_msg->pid, stored_msg->sender_nodeid,
CONFCHG_CPG_REASON_NODEUP);
}
}
static void downlist_messages_delete (void)
{
struct downlist_msg *stored_msg;
struct list_head *iter, *iter_next;
for (iter = downlist_messages_head.next;
iter != &downlist_messages_head;
iter = iter_next) {
iter_next = iter->next;
stored_msg = list_entry(iter, struct downlist_msg, list);
list_del (&stored_msg->list);
free (stored_msg);
}
}
static void joinlist_messages_delete (void)
{
struct joinlist_msg *stored_msg;
struct list_head *iter, *iter_next;
for (iter = joinlist_messages_head.next;
iter != &joinlist_messages_head;
iter = iter_next) {
iter_next = iter->next;
stored_msg = list_entry(iter, struct joinlist_msg, list);
list_del (&stored_msg->list);
free (stored_msg);
}
list_init (&joinlist_messages_head);
}
static char *cpg_exec_init_fn (struct corosync_api_v1 *corosync_api)
{
-#ifdef COROSYNC_SOLARIS
- logsys_subsys_init();
-#endif
list_init (&downlist_messages_head);
list_init (&joinlist_messages_head);
api = corosync_api;
return (NULL);
}
static void cpg_iteration_instance_finalize (struct cpg_iteration_instance *cpg_iteration_instance)
{
struct list_head *iter, *iter_next;
struct process_info *pi;
for (iter = cpg_iteration_instance->items_list_head.next;
iter != &cpg_iteration_instance->items_list_head;
iter = iter_next) {
iter_next = iter->next;
pi = list_entry (iter, struct process_info, list);
list_del (&pi->list);
free (pi);
}
list_del (&cpg_iteration_instance->list);
hdb_handle_destroy (&cpg_iteration_handle_t_db, cpg_iteration_instance->handle);
}
static void cpg_pd_finalize (struct cpg_pd *cpd)
{
struct list_head *iter, *iter_next;
struct cpg_iteration_instance *cpii;
zcb_all_free(cpd);
for (iter = cpd->iteration_instance_list_head.next;
iter != &cpd->iteration_instance_list_head;
iter = iter_next) {
iter_next = iter->next;
cpii = list_entry (iter, struct cpg_iteration_instance, list);
cpg_iteration_instance_finalize (cpii);
}
list_del (&cpd->list);
}
static int cpg_lib_exit_fn (void *conn)
{
struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn);
log_printf(LOGSYS_LEVEL_DEBUG, "exit_fn for conn=%p", conn);
if (cpd->group_name.length > 0 && cpd->cpd_state != CPD_STATE_LEAVE_STARTED) {
cpg_node_joinleave_send (cpd->pid, &cpd->group_name,
MESSAGE_REQ_EXEC_CPG_PROCLEAVE, CONFCHG_CPG_REASON_PROCDOWN);
}
cpg_pd_finalize (cpd);
api->ipc_refcnt_dec (conn);
return (0);
}
static int cpg_node_joinleave_send (unsigned int pid, const mar_cpg_name_t *group_name, int fn, int reason)
{
struct req_exec_cpg_procjoin req_exec_cpg_procjoin;
struct iovec req_exec_cpg_iovec;
int result;
memcpy(&req_exec_cpg_procjoin.group_name, group_name, sizeof(mar_cpg_name_t));
req_exec_cpg_procjoin.pid = pid;
req_exec_cpg_procjoin.reason = reason;
req_exec_cpg_procjoin.header.size = sizeof(req_exec_cpg_procjoin);
req_exec_cpg_procjoin.header.id = SERVICE_ID_MAKE(CPG_SERVICE, fn);
req_exec_cpg_iovec.iov_base = (char *)&req_exec_cpg_procjoin;
req_exec_cpg_iovec.iov_len = sizeof(req_exec_cpg_procjoin);
result = api->totem_mcast (&req_exec_cpg_iovec, 1, TOTEM_AGREED);
return (result);
}
/* Can byteswap join & leave messages */
static void exec_cpg_procjoin_endian_convert (void *msg)
{
struct req_exec_cpg_procjoin *req_exec_cpg_procjoin = msg;
req_exec_cpg_procjoin->pid = swab32(req_exec_cpg_procjoin->pid);
swab_mar_cpg_name_t (&req_exec_cpg_procjoin->group_name);
req_exec_cpg_procjoin->reason = swab32(req_exec_cpg_procjoin->reason);
}
static void exec_cpg_joinlist_endian_convert (void *msg_v)
{
char *msg = msg_v;
struct qb_ipc_response_header *res = (struct qb_ipc_response_header *)msg;
struct join_list_entry *jle = (struct join_list_entry *)(msg + sizeof(struct qb_ipc_response_header));
swab_mar_int32_t (&res->size);
while ((const char*)jle < msg + res->size) {
jle->pid = swab32(jle->pid);
swab_mar_cpg_name_t (&jle->group_name);
jle++;
}
}
static void exec_cpg_downlist_endian_convert_old (void *msg)
{
}
static void exec_cpg_downlist_endian_convert (void *msg)
{
struct req_exec_cpg_downlist *req_exec_cpg_downlist = msg;
unsigned int i;
req_exec_cpg_downlist->left_nodes = swab32(req_exec_cpg_downlist->left_nodes);
req_exec_cpg_downlist->old_members = swab32(req_exec_cpg_downlist->old_members);
for (i = 0; i < req_exec_cpg_downlist->left_nodes; i++) {
req_exec_cpg_downlist->nodeids[i] = swab32(req_exec_cpg_downlist->nodeids[i]);
}
}
static void exec_cpg_mcast_endian_convert (void *msg)
{
struct req_exec_cpg_mcast *req_exec_cpg_mcast = msg;
swab_coroipc_request_header_t (&req_exec_cpg_mcast->header);
swab_mar_cpg_name_t (&req_exec_cpg_mcast->group_name);
req_exec_cpg_mcast->pid = swab32(req_exec_cpg_mcast->pid);
req_exec_cpg_mcast->msglen = swab32(req_exec_cpg_mcast->msglen);
swab_mar_message_source_t (&req_exec_cpg_mcast->source);
}
static struct process_info *process_info_find(const mar_cpg_name_t *group_name, uint32_t pid, unsigned int nodeid) {
struct list_head *iter;
for (iter = process_info_list_head.next; iter != &process_info_list_head; ) {
struct process_info *pi = list_entry (iter, struct process_info, list);
iter = iter->next;
if (pi->pid == pid && pi->nodeid == nodeid &&
mar_name_compare (&pi->group, group_name) == 0) {
return pi;
}
}
return NULL;
}
static void do_proc_join(
const mar_cpg_name_t *name,
uint32_t pid,
unsigned int nodeid,
int reason)
{
struct process_info *pi;
struct process_info *pi_entry;
mar_cpg_address_t notify_info;
struct list_head *list;
struct list_head *list_to_add = NULL;
if (process_info_find (name, pid, nodeid) != NULL) {
return ;
}
pi = malloc (sizeof (struct process_info));
if (!pi) {
log_printf(LOGSYS_LEVEL_WARNING, "Unable to allocate process_info struct");
return;
}
pi->nodeid = nodeid;
pi->pid = pid;
memcpy(&pi->group, name, sizeof(*name));
list_init(&pi->list);
/*
* Insert new process in sorted order so synchronization works properly
*/
list_to_add = &process_info_list_head;
for (list = process_info_list_head.next; list != &process_info_list_head; list = list->next) {
pi_entry = list_entry(list, struct process_info, list);
if (pi_entry->nodeid > pi->nodeid ||
(pi_entry->nodeid == pi->nodeid && pi_entry->pid > pi->pid)) {
break;
}
list_to_add = list;
}
list_add (&pi->list, list_to_add);
notify_info.pid = pi->pid;
notify_info.nodeid = nodeid;
notify_info.reason = reason;
notify_lib_joinlist(&pi->group, NULL,
1, &notify_info,
0, NULL,
MESSAGE_RES_CPG_CONFCHG_CALLBACK);
}
static void message_handler_req_exec_cpg_downlist_old (
const void *message,
unsigned int nodeid)
{
log_printf (LOGSYS_LEVEL_WARNING, "downlist OLD from node %d",
nodeid);
}
static void message_handler_req_exec_cpg_downlist(
const void *message,
unsigned int nodeid)
{
const struct req_exec_cpg_downlist *req_exec_cpg_downlist = message;
int i;
struct list_head *iter;
struct downlist_msg *stored_msg;
int found;
if (downlist_state != CPG_DOWNLIST_WAITING_FOR_MESSAGES) {
log_printf (LOGSYS_LEVEL_WARNING, "downlist left_list: %d received in state %d",
req_exec_cpg_downlist->left_nodes, downlist_state);
return;
}
stored_msg = malloc (sizeof (struct downlist_msg));
stored_msg->sender_nodeid = nodeid;
stored_msg->old_members = req_exec_cpg_downlist->old_members;
stored_msg->left_nodes = req_exec_cpg_downlist->left_nodes;
memcpy (stored_msg->nodeids, req_exec_cpg_downlist->nodeids,
req_exec_cpg_downlist->left_nodes * sizeof (mar_uint32_t));
list_init (&stored_msg->list);
list_add (&stored_msg->list, &downlist_messages_head);
for (i = 0; i < my_member_list_entries; i++) {
found = 0;
for (iter = downlist_messages_head.next;
iter != &downlist_messages_head;
iter = iter->next) {
stored_msg = list_entry(iter, struct downlist_msg, list);
if (my_member_list[i] == stored_msg->sender_nodeid) {
found = 1;
}
}
if (!found) {
return;
}
}
downlist_master_choose_and_send ();
}
static void message_handler_req_exec_cpg_procjoin (
const void *message,
unsigned int nodeid)
{
const struct req_exec_cpg_procjoin *req_exec_cpg_procjoin = message;
log_printf(LOGSYS_LEVEL_DEBUG, "got procjoin message from cluster node %d (%s) for pid %u",
nodeid,
api->totem_ifaces_print(nodeid),
(unsigned int)req_exec_cpg_procjoin->pid);
do_proc_join (&req_exec_cpg_procjoin->group_name,
req_exec_cpg_procjoin->pid, nodeid,
CONFCHG_CPG_REASON_JOIN);
}
static void message_handler_req_exec_cpg_procleave (
const void *message,
unsigned int nodeid)
{
const struct req_exec_cpg_procjoin *req_exec_cpg_procjoin = message;
struct process_info *pi;
struct list_head *iter;
mar_cpg_address_t notify_info;
log_printf(LOGSYS_LEVEL_DEBUG, "got procleave message from cluster node %d", nodeid);
notify_info.pid = req_exec_cpg_procjoin->pid;
notify_info.nodeid = nodeid;
notify_info.reason = req_exec_cpg_procjoin->reason;
notify_lib_joinlist(&req_exec_cpg_procjoin->group_name, NULL,
0, NULL,
1, &notify_info,
MESSAGE_RES_CPG_CONFCHG_CALLBACK);
for (iter = process_info_list_head.next; iter != &process_info_list_head; ) {
pi = list_entry(iter, struct process_info, list);
iter = iter->next;
if (pi->pid == req_exec_cpg_procjoin->pid && pi->nodeid == nodeid &&
mar_name_compare (&pi->group, &req_exec_cpg_procjoin->group_name)==0) {
list_del (&pi->list);
free (pi);
}
}
}
/* Got a proclist from another node */
static void message_handler_req_exec_cpg_joinlist (
const void *message_v,
unsigned int nodeid)
{
const char *message = message_v;
const struct qb_ipc_response_header *res = (const struct qb_ipc_response_header *)message;
const struct join_list_entry *jle = (const struct join_list_entry *)(message + sizeof(struct qb_ipc_response_header));
struct joinlist_msg *stored_msg;
log_printf(LOGSYS_LEVEL_DEBUG, "got joinlist message from node %x",
nodeid);
while ((const char*)jle < message + res->size) {
stored_msg = malloc (sizeof (struct joinlist_msg));
memset(stored_msg, 0, sizeof (struct joinlist_msg));
stored_msg->sender_nodeid = nodeid;
stored_msg->pid = jle->pid;
memcpy(&stored_msg->group_name, &jle->group_name, sizeof(mar_cpg_name_t));
list_init (&stored_msg->list);
list_add (&stored_msg->list, &joinlist_messages_head);
jle++;
}
}
static void message_handler_req_exec_cpg_mcast (
const void *message,
unsigned int nodeid)
{
const struct req_exec_cpg_mcast *req_exec_cpg_mcast = message;
struct res_lib_cpg_deliver_callback res_lib_cpg_mcast;
int msglen = req_exec_cpg_mcast->msglen;
struct list_head *iter, *pi_iter;
struct cpg_pd *cpd;
struct iovec iovec[2];
int known_node = 0;
res_lib_cpg_mcast.header.id = MESSAGE_RES_CPG_DELIVER_CALLBACK;
res_lib_cpg_mcast.header.size = sizeof(res_lib_cpg_mcast) + msglen;
res_lib_cpg_mcast.msglen = msglen;
res_lib_cpg_mcast.pid = req_exec_cpg_mcast->pid;
res_lib_cpg_mcast.nodeid = nodeid;
memcpy(&res_lib_cpg_mcast.group_name, &req_exec_cpg_mcast->group_name,
sizeof(mar_cpg_name_t));
iovec[0].iov_base = (void *)&res_lib_cpg_mcast;
iovec[0].iov_len = sizeof (res_lib_cpg_mcast);
iovec[1].iov_base = (char*)message+sizeof(*req_exec_cpg_mcast);
iovec[1].iov_len = msglen;
for (iter = cpg_pd_list_head.next; iter != &cpg_pd_list_head; ) {
cpd = list_entry(iter, struct cpg_pd, list);
iter = iter->next;
if ((cpd->cpd_state == CPD_STATE_LEAVE_STARTED || cpd->cpd_state == CPD_STATE_JOIN_COMPLETED)
&& (mar_name_compare (&cpd->group_name, &req_exec_cpg_mcast->group_name) == 0)) {
if (!known_node) {
/* Try to find, if we know the node */
for (pi_iter = process_info_list_head.next;
pi_iter != &process_info_list_head; pi_iter = pi_iter->next) {
struct process_info *pi = list_entry (pi_iter, struct process_info, list);
if (pi->nodeid == nodeid &&
mar_name_compare (&pi->group, &req_exec_cpg_mcast->group_name) == 0) {
known_node = 1;
break;
}
}
}
if (!known_node) {
log_printf(LOGSYS_LEVEL_WARNING, "Unknown node -> we will not deliver message");
return ;
}
api->ipc_dispatch_iov_send (cpd->conn, iovec, 2);
}
}
}
static int cpg_exec_send_downlist(void)
{
struct iovec iov;
g_req_exec_cpg_downlist.header.id = SERVICE_ID_MAKE(CPG_SERVICE, MESSAGE_REQ_EXEC_CPG_DOWNLIST);
g_req_exec_cpg_downlist.header.size = sizeof(struct req_exec_cpg_downlist);
g_req_exec_cpg_downlist.old_members = my_old_member_list_entries;
iov.iov_base = (void *)&g_req_exec_cpg_downlist;
iov.iov_len = g_req_exec_cpg_downlist.header.size;
return (api->totem_mcast (&iov, 1, TOTEM_AGREED));
}
static int cpg_exec_send_joinlist(void)
{
int count = 0;
struct list_head *iter;
struct qb_ipc_response_header *res;
char *buf;
struct join_list_entry *jle;
struct iovec req_exec_cpg_iovec;
for (iter = process_info_list_head.next; iter != &process_info_list_head; iter = iter->next) {
struct process_info *pi = list_entry (iter, struct process_info, list);
if (pi->nodeid == api->totem_nodeid_get ()) {
count++;
}
}
/* Nothing to send */
if (!count)
return 0;
buf = alloca(sizeof(struct qb_ipc_response_header) + sizeof(struct join_list_entry) * count);
if (!buf) {
log_printf(LOGSYS_LEVEL_WARNING, "Unable to allocate joinlist buffer");
return -1;
}
jle = (struct join_list_entry *)(buf + sizeof(struct qb_ipc_response_header));
res = (struct qb_ipc_response_header *)buf;
for (iter = process_info_list_head.next; iter != &process_info_list_head; iter = iter->next) {
struct process_info *pi = list_entry (iter, struct process_info, list);
if (pi->nodeid == api->totem_nodeid_get ()) {
memcpy (&jle->group_name, &pi->group, sizeof (mar_cpg_name_t));
jle->pid = pi->pid;
jle++;
}
}
res->id = SERVICE_ID_MAKE(CPG_SERVICE, MESSAGE_REQ_EXEC_CPG_JOINLIST);
res->size = sizeof(struct qb_ipc_response_header)+sizeof(struct join_list_entry) * count;
req_exec_cpg_iovec.iov_base = buf;
req_exec_cpg_iovec.iov_len = res->size;
return (api->totem_mcast (&req_exec_cpg_iovec, 1, TOTEM_AGREED));
}
static int cpg_lib_init_fn (void *conn)
{
struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn);
memset (cpd, 0, sizeof(struct cpg_pd));
cpd->conn = conn;
list_add (&cpd->list, &cpg_pd_list_head);
list_init (&cpd->iteration_instance_list_head);
list_init (&cpd->zcb_mapped_list_head);
api->ipc_refcnt_inc (conn);
log_printf(LOGSYS_LEVEL_DEBUG, "lib_init_fn: conn=%p, cpd=%p", conn, cpd);
return (0);
}
/* Join message from the library */
static void message_handler_req_lib_cpg_join (void *conn, const void *message)
{
const struct req_lib_cpg_join *req_lib_cpg_join = message;
struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn);
struct res_lib_cpg_join res_lib_cpg_join;
cs_error_t error = CS_OK;
struct list_head *iter;
/* Test, if we don't have same pid and group name joined */
for (iter = cpg_pd_list_head.next; iter != &cpg_pd_list_head; iter = iter->next) {
struct cpg_pd *cpd_item = list_entry (iter, struct cpg_pd, list);
if (cpd_item->pid == req_lib_cpg_join->pid &&
mar_name_compare(&req_lib_cpg_join->group_name, &cpd_item->group_name) == 0) {
/* We have same pid and group name joined -> return error */
error = CS_ERR_EXIST;
goto response_send;
}
}
/*
* Same check must be done in process info list, because there may be not yet delivered
* leave of client.
*/
for (iter = process_info_list_head.next; iter != &process_info_list_head; iter = iter->next) {
struct process_info *pi = list_entry (iter, struct process_info, list);
if (pi->nodeid == api->totem_nodeid_get () && pi->pid == req_lib_cpg_join->pid &&
mar_name_compare(&req_lib_cpg_join->group_name, &pi->group) == 0) {
/* We have same pid and group name joined -> return error */
error = CS_ERR_TRY_AGAIN;
goto response_send;
}
}
if (req_lib_cpg_join->group_name.length > CPG_MAX_NAME_LENGTH) {
error = CS_ERR_NAME_TOO_LONG;
goto response_send;
}
switch (cpd->cpd_state) {
case CPD_STATE_UNJOINED:
error = CS_OK;
cpd->cpd_state = CPD_STATE_JOIN_STARTED;
cpd->pid = req_lib_cpg_join->pid;
cpd->flags = req_lib_cpg_join->flags;
memcpy (&cpd->group_name, &req_lib_cpg_join->group_name,
sizeof (cpd->group_name));
cpg_node_joinleave_send (req_lib_cpg_join->pid,
&req_lib_cpg_join->group_name,
MESSAGE_REQ_EXEC_CPG_PROCJOIN, CONFCHG_CPG_REASON_JOIN);
break;
case CPD_STATE_LEAVE_STARTED:
error = CS_ERR_BUSY;
break;
case CPD_STATE_JOIN_STARTED:
error = CS_ERR_EXIST;
break;
case CPD_STATE_JOIN_COMPLETED:
error = CS_ERR_EXIST;
break;
}
response_send:
res_lib_cpg_join.header.size = sizeof(res_lib_cpg_join);
res_lib_cpg_join.header.id = MESSAGE_RES_CPG_JOIN;
res_lib_cpg_join.header.error = error;
api->ipc_response_send (conn, &res_lib_cpg_join, sizeof(res_lib_cpg_join));
}
/* Leave message from the library */
static void message_handler_req_lib_cpg_leave (void *conn, const void *message)
{
struct res_lib_cpg_leave res_lib_cpg_leave;
cs_error_t error = CS_OK;
struct req_lib_cpg_leave *req_lib_cpg_leave = (struct req_lib_cpg_leave *)message;
struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn);
log_printf(LOGSYS_LEVEL_DEBUG, "got leave request on %p", conn);
switch (cpd->cpd_state) {
case CPD_STATE_UNJOINED:
error = CS_ERR_NOT_EXIST;
break;
case CPD_STATE_LEAVE_STARTED:
error = CS_ERR_NOT_EXIST;
break;
case CPD_STATE_JOIN_STARTED:
error = CS_ERR_BUSY;
break;
case CPD_STATE_JOIN_COMPLETED:
error = CS_OK;
cpd->cpd_state = CPD_STATE_LEAVE_STARTED;
cpg_node_joinleave_send (req_lib_cpg_leave->pid,
&req_lib_cpg_leave->group_name,
MESSAGE_REQ_EXEC_CPG_PROCLEAVE,
CONFCHG_CPG_REASON_LEAVE);
break;
}
/* send return */
res_lib_cpg_leave.header.size = sizeof(res_lib_cpg_leave);
res_lib_cpg_leave.header.id = MESSAGE_RES_CPG_LEAVE;
res_lib_cpg_leave.header.error = error;
api->ipc_response_send(conn, &res_lib_cpg_leave, sizeof(res_lib_cpg_leave));
}
/* Finalize message from library */
static void message_handler_req_lib_cpg_finalize (
void *conn,
const void *message)
{
struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn);
struct res_lib_cpg_finalize res_lib_cpg_finalize;
cs_error_t error = CS_OK;
log_printf (LOGSYS_LEVEL_DEBUG, "cpg finalize for conn=%p", conn);
/*
* We will just remove cpd from list. After this call, connection will be
* closed on lib side, and cpg_lib_exit_fn will be called
*/
list_del (&cpd->list);
list_init (&cpd->list);
res_lib_cpg_finalize.header.size = sizeof (res_lib_cpg_finalize);
res_lib_cpg_finalize.header.id = MESSAGE_RES_CPG_FINALIZE;
res_lib_cpg_finalize.header.error = error;
api->ipc_response_send (conn, &res_lib_cpg_finalize,
sizeof (res_lib_cpg_finalize));
}
static int
memory_map (
const char *path,
size_t bytes,
void **buf)
{
int32_t fd;
void *addr_orig;
void *addr;
int32_t res;
fd = open (path, O_RDWR, 0600);
unlink (path);
if (fd == -1) {
return (-1);
}
res = ftruncate (fd, bytes);
if (res == -1) {
goto error_close_unlink;
}
addr_orig = mmap (NULL, bytes, PROT_NONE,
MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
if (addr_orig == MAP_FAILED) {
goto error_close_unlink;
}
addr = mmap (addr_orig, bytes, PROT_READ | PROT_WRITE,
MAP_FIXED | MAP_SHARED, fd, 0);
if (addr != addr_orig) {
munmap(addr_orig, bytes);
goto error_close_unlink;
}
#ifdef MADV_NOSYNC
madvise(addr, bytes, MADV_NOSYNC);
#endif
res = close (fd);
if (res) {
return (-1);
}
*buf = addr_orig;
return (0);
error_close_unlink:
close (fd);
unlink(path);
return -1;
}
static inline int zcb_alloc (
struct cpg_pd *cpd,
const char *path_to_file,
size_t size,
void **addr)
{
struct zcb_mapped *zcb_mapped;
unsigned int res;
zcb_mapped = malloc (sizeof (struct zcb_mapped));
if (zcb_mapped == NULL) {
return (-1);
}
res = memory_map (
path_to_file,
size,
addr);
if (res == -1) {
free (zcb_mapped);
return (-1);
}
list_init (&zcb_mapped->list);
zcb_mapped->addr = *addr;
zcb_mapped->size = size;
list_add_tail (&zcb_mapped->list, &cpd->zcb_mapped_list_head);
return (0);
}
static inline int zcb_free (struct zcb_mapped *zcb_mapped)
{
unsigned int res;
res = munmap (zcb_mapped->addr, zcb_mapped->size);
list_del (&zcb_mapped->list);
free (zcb_mapped);
return (res);
}
static inline int zcb_by_addr_free (struct cpg_pd *cpd, void *addr)
{
struct list_head *list;
struct zcb_mapped *zcb_mapped;
unsigned int res = 0;
for (list = cpd->zcb_mapped_list_head.next;
list != &cpd->zcb_mapped_list_head; list = list->next) {
zcb_mapped = list_entry (list, struct zcb_mapped, list);
if (zcb_mapped->addr == addr) {
res = zcb_free (zcb_mapped);
break;
}
}
return (res);
}
static inline int zcb_all_free (
struct cpg_pd *cpd)
{
struct list_head *list;
struct zcb_mapped *zcb_mapped;
for (list = cpd->zcb_mapped_list_head.next;
list != &cpd->zcb_mapped_list_head;) {
zcb_mapped = list_entry (list, struct zcb_mapped, list);
list = list->next;
zcb_free (zcb_mapped);
}
return (0);
}
union u {
uint64_t server_addr;
void *server_ptr;
};
static uint64_t void2serveraddr (void *server_ptr)
{
union u u;
u.server_ptr = server_ptr;
return (u.server_addr);
}
static void *serveraddr2void (uint64_t server_addr)
{
union u u;
u.server_addr = server_addr;
return (u.server_ptr);
};
static void message_handler_req_lib_cpg_zc_alloc (
void *conn,
const void *message)
{
mar_req_coroipcc_zc_alloc_t *hdr = (mar_req_coroipcc_zc_alloc_t *)message;
struct qb_ipc_response_header res_header;
void *addr = NULL;
struct coroipcs_zc_header *zc_header;
unsigned int res;
struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn);
log_printf(LOGSYS_LEVEL_DEBUG, "path: %s", hdr->path_to_file);
res = zcb_alloc (cpd, hdr->path_to_file, hdr->map_size,
&addr);
assert(res == 0);
zc_header = (struct coroipcs_zc_header *)addr;
zc_header->server_address = void2serveraddr(addr);
res_header.size = sizeof (struct qb_ipc_response_header);
res_header.id = 0;
api->ipc_response_send (conn,
&res_header,
res_header.size);
}
static void message_handler_req_lib_cpg_zc_free (
void *conn,
const void *message)
{
mar_req_coroipcc_zc_free_t *hdr = (mar_req_coroipcc_zc_free_t *)message;
struct qb_ipc_response_header res_header;
void *addr = NULL;
struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn);
log_printf(LOGSYS_LEVEL_DEBUG, " free'ing");
addr = serveraddr2void (hdr->server_address);
zcb_by_addr_free (cpd, addr);
res_header.size = sizeof (struct qb_ipc_response_header);
res_header.id = 0;
api->ipc_response_send (
conn, &res_header,
res_header.size);
}
/* Mcast message from the library */
static void message_handler_req_lib_cpg_mcast (void *conn, const void *message)
{
const struct req_lib_cpg_mcast *req_lib_cpg_mcast = message;
struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn);
mar_cpg_name_t group_name = cpd->group_name;
struct iovec req_exec_cpg_iovec[2];
struct req_exec_cpg_mcast req_exec_cpg_mcast;
int msglen = req_lib_cpg_mcast->msglen;
int result;
cs_error_t error = CS_ERR_NOT_EXIST;
log_printf(LOGSYS_LEVEL_DEBUG, "got mcast request on %p", conn);
switch (cpd->cpd_state) {
case CPD_STATE_UNJOINED:
error = CS_ERR_NOT_EXIST;
break;
case CPD_STATE_LEAVE_STARTED:
error = CS_ERR_NOT_EXIST;
break;
case CPD_STATE_JOIN_STARTED:
error = CS_OK;
break;
case CPD_STATE_JOIN_COMPLETED:
error = CS_OK;
break;
}
if (error == CS_OK) {
req_exec_cpg_mcast.header.size = sizeof(req_exec_cpg_mcast) + msglen;
req_exec_cpg_mcast.header.id = SERVICE_ID_MAKE(CPG_SERVICE,
MESSAGE_REQ_EXEC_CPG_MCAST);
req_exec_cpg_mcast.pid = cpd->pid;
req_exec_cpg_mcast.msglen = msglen;
api->ipc_source_set (&req_exec_cpg_mcast.source, conn);
memcpy(&req_exec_cpg_mcast.group_name, &group_name,
sizeof(mar_cpg_name_t));
req_exec_cpg_iovec[0].iov_base = (char *)&req_exec_cpg_mcast;
req_exec_cpg_iovec[0].iov_len = sizeof(req_exec_cpg_mcast);
req_exec_cpg_iovec[1].iov_base = (char *)&req_lib_cpg_mcast->message;
req_exec_cpg_iovec[1].iov_len = msglen;
result = api->totem_mcast (req_exec_cpg_iovec, 2, TOTEM_AGREED);
assert(result == 0);
} else {
log_printf(LOGSYS_LEVEL_ERROR, "*** %p can't mcast to group %s state:%d, error:%d",
conn, group_name.value, cpd->cpd_state, error);
}
}
static void message_handler_req_lib_cpg_zc_execute (
void *conn,
const void *message)
{
mar_req_coroipcc_zc_execute_t *hdr = (mar_req_coroipcc_zc_execute_t *)message;
struct qb_ipc_request_header *header;
struct res_lib_cpg_mcast res_lib_cpg_mcast;
struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn);
struct iovec req_exec_cpg_iovec[2];
struct req_exec_cpg_mcast req_exec_cpg_mcast;
struct req_lib_cpg_mcast *req_lib_cpg_mcast;
int result;
cs_error_t error = CS_ERR_NOT_EXIST;
log_printf(LOGSYS_LEVEL_DEBUG, "got ZC mcast request on %p", conn);
header = (struct qb_ipc_request_header *)(((char *)serveraddr2void(hdr->server_address) + sizeof (struct coroipcs_zc_header)));
req_lib_cpg_mcast = (struct req_lib_cpg_mcast *)header;
switch (cpd->cpd_state) {
case CPD_STATE_UNJOINED:
error = CS_ERR_NOT_EXIST;
break;
case CPD_STATE_LEAVE_STARTED:
error = CS_ERR_NOT_EXIST;
break;
case CPD_STATE_JOIN_STARTED:
error = CS_OK;
break;
case CPD_STATE_JOIN_COMPLETED:
error = CS_OK;
break;
}
res_lib_cpg_mcast.header.size = sizeof(res_lib_cpg_mcast);
res_lib_cpg_mcast.header.id = MESSAGE_RES_CPG_MCAST;
if (error == CS_OK) {
req_exec_cpg_mcast.header.size = sizeof(req_exec_cpg_mcast) + req_lib_cpg_mcast->msglen;
req_exec_cpg_mcast.header.id = SERVICE_ID_MAKE(CPG_SERVICE,
MESSAGE_REQ_EXEC_CPG_MCAST);
req_exec_cpg_mcast.pid = cpd->pid;
req_exec_cpg_mcast.msglen = req_lib_cpg_mcast->msglen;
api->ipc_source_set (&req_exec_cpg_mcast.source, conn);
memcpy(&req_exec_cpg_mcast.group_name, &cpd->group_name,
sizeof(mar_cpg_name_t));
req_exec_cpg_iovec[0].iov_base = (char *)&req_exec_cpg_mcast;
req_exec_cpg_iovec[0].iov_len = sizeof(req_exec_cpg_mcast);
req_exec_cpg_iovec[1].iov_base = (char *)header + sizeof(struct req_lib_cpg_mcast);
req_exec_cpg_iovec[1].iov_len = req_exec_cpg_mcast.msglen;
result = api->totem_mcast (req_exec_cpg_iovec, 2, TOTEM_AGREED);
if (result == 0) {
res_lib_cpg_mcast.header.error = CS_OK;
} else {
res_lib_cpg_mcast.header.error = CS_ERR_TRY_AGAIN;
}
} else {
res_lib_cpg_mcast.header.error = error;
}
api->ipc_response_send (conn, &res_lib_cpg_mcast,
sizeof (res_lib_cpg_mcast));
}
static void message_handler_req_lib_cpg_membership (void *conn,
const void *message)
{
struct req_lib_cpg_membership_get *req_lib_cpg_membership_get =
(struct req_lib_cpg_membership_get *)message;
struct res_lib_cpg_membership_get res_lib_cpg_membership_get;
struct list_head *iter;
int member_count = 0;
res_lib_cpg_membership_get.header.id = MESSAGE_RES_CPG_MEMBERSHIP;
res_lib_cpg_membership_get.header.error = CS_OK;
res_lib_cpg_membership_get.header.size =
sizeof (struct req_lib_cpg_membership_get);
for (iter = process_info_list_head.next;
iter != &process_info_list_head; iter = iter->next) {
struct process_info *pi = list_entry (iter, struct process_info, list);
if (mar_name_compare (&pi->group, &req_lib_cpg_membership_get->group_name) == 0) {
res_lib_cpg_membership_get.member_list[member_count].nodeid = pi->nodeid;
res_lib_cpg_membership_get.member_list[member_count].pid = pi->pid;
member_count += 1;
}
}
res_lib_cpg_membership_get.member_count = member_count;
api->ipc_response_send (conn, &res_lib_cpg_membership_get,
sizeof (res_lib_cpg_membership_get));
}
static void message_handler_req_lib_cpg_local_get (void *conn,
const void *message)
{
struct res_lib_cpg_local_get res_lib_cpg_local_get;
res_lib_cpg_local_get.header.size = sizeof (res_lib_cpg_local_get);
res_lib_cpg_local_get.header.id = MESSAGE_RES_CPG_LOCAL_GET;
res_lib_cpg_local_get.header.error = CS_OK;
res_lib_cpg_local_get.local_nodeid = api->totem_nodeid_get ();
api->ipc_response_send (conn, &res_lib_cpg_local_get,
sizeof (res_lib_cpg_local_get));
}
static void message_handler_req_lib_cpg_iteration_initialize (
void *conn,
const void *message)
{
const struct req_lib_cpg_iterationinitialize *req_lib_cpg_iterationinitialize = message;
struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn);
hdb_handle_t cpg_iteration_handle = 0;
struct res_lib_cpg_iterationinitialize res_lib_cpg_iterationinitialize;
struct list_head *iter, *iter2;
struct cpg_iteration_instance *cpg_iteration_instance;
cs_error_t error = CS_OK;
int res;
log_printf (LOGSYS_LEVEL_DEBUG, "cpg iteration initialize");
/* Because between calling this function and *next can be some operations which will
* change list, we must do full copy.
*/
/*
* Create new iteration instance
*/
res = hdb_handle_create (&cpg_iteration_handle_t_db, sizeof (struct cpg_iteration_instance),
&cpg_iteration_handle);
if (res != 0) {
error = CS_ERR_NO_MEMORY;
goto response_send;
}
res = hdb_handle_get (&cpg_iteration_handle_t_db, cpg_iteration_handle, (void *)&cpg_iteration_instance);
if (res != 0) {
error = CS_ERR_BAD_HANDLE;
goto error_destroy;
}
list_init (&cpg_iteration_instance->items_list_head);
cpg_iteration_instance->handle = cpg_iteration_handle;
/*
* Create copy of process_info list "grouped by" group name
*/
for (iter = process_info_list_head.next; iter != &process_info_list_head; iter = iter->next) {
struct process_info *pi = list_entry (iter, struct process_info, list);
struct process_info *new_pi;
if (req_lib_cpg_iterationinitialize->iteration_type == CPG_ITERATION_NAME_ONLY) {
/*
* Try to find processed group name in our list new list
*/
int found = 0;
for (iter2 = cpg_iteration_instance->items_list_head.next;
iter2 != &cpg_iteration_instance->items_list_head;
iter2 = iter2->next) {
struct process_info *pi2 = list_entry (iter2, struct process_info, list);
if (mar_name_compare (&pi2->group, &pi->group) == 0) {
found = 1;
break;
}
}
if (found) {
/*
* We have this name in list -> don't add
*/
continue ;
}
} else if (req_lib_cpg_iterationinitialize->iteration_type == CPG_ITERATION_ONE_GROUP) {
/*
* Test pi group name with request
*/
if (mar_name_compare (&pi->group, &req_lib_cpg_iterationinitialize->group_name) != 0)
/*
* Not same -> don't add
*/
continue ;
}
new_pi = malloc (sizeof (struct process_info));
if (!new_pi) {
log_printf(LOGSYS_LEVEL_WARNING, "Unable to allocate process_info struct");
error = CS_ERR_NO_MEMORY;
goto error_put_destroy;
}
memcpy (new_pi, pi, sizeof (struct process_info));
list_init (&new_pi->list);
if (req_lib_cpg_iterationinitialize->iteration_type == CPG_ITERATION_NAME_ONLY) {
/*
* pid and nodeid -> undefined
*/
new_pi->pid = new_pi->nodeid = 0;
}
/*
* We will return list "grouped" by "group name", so try to find right place to add
*/
for (iter2 = cpg_iteration_instance->items_list_head.next;
iter2 != &cpg_iteration_instance->items_list_head;
iter2 = iter2->next) {
struct process_info *pi2 = list_entry (iter2, struct process_info, list);
if (mar_name_compare (&pi2->group, &pi->group) == 0) {
break;
}
}
list_add (&new_pi->list, iter2);
}
/*
* Now we have a full "grouped by" copy of process_info list
*/
/*
* Add instance to current cpd list
*/
list_init (&cpg_iteration_instance->list);
list_add (&cpg_iteration_instance->list, &cpd->iteration_instance_list_head);
cpg_iteration_instance->current_pointer = &cpg_iteration_instance->items_list_head;
error_put_destroy:
hdb_handle_put (&cpg_iteration_handle_t_db, cpg_iteration_handle);
error_destroy:
if (error != CS_OK) {
hdb_handle_destroy (&cpg_iteration_handle_t_db, cpg_iteration_handle);
}
response_send:
res_lib_cpg_iterationinitialize.header.size = sizeof (res_lib_cpg_iterationinitialize);
res_lib_cpg_iterationinitialize.header.id = MESSAGE_RES_CPG_ITERATIONINITIALIZE;
res_lib_cpg_iterationinitialize.header.error = error;
res_lib_cpg_iterationinitialize.iteration_handle = cpg_iteration_handle;
api->ipc_response_send (conn, &res_lib_cpg_iterationinitialize,
sizeof (res_lib_cpg_iterationinitialize));
}
static void message_handler_req_lib_cpg_iteration_next (
void *conn,
const void *message)
{
const struct req_lib_cpg_iterationnext *req_lib_cpg_iterationnext = message;
struct res_lib_cpg_iterationnext res_lib_cpg_iterationnext;
struct cpg_iteration_instance *cpg_iteration_instance;
cs_error_t error = CS_OK;
int res;
struct process_info *pi;
log_printf (LOGSYS_LEVEL_DEBUG, "cpg iteration next");
res = hdb_handle_get (&cpg_iteration_handle_t_db,
req_lib_cpg_iterationnext->iteration_handle,
(void *)&cpg_iteration_instance);
if (res != 0) {
error = CS_ERR_LIBRARY;
goto error_exit;
}
assert (cpg_iteration_instance);
cpg_iteration_instance->current_pointer = cpg_iteration_instance->current_pointer->next;
if (cpg_iteration_instance->current_pointer == &cpg_iteration_instance->items_list_head) {
error = CS_ERR_NO_SECTIONS;
goto error_put;
}
pi = list_entry (cpg_iteration_instance->current_pointer, struct process_info, list);
/*
* Copy iteration data
*/
res_lib_cpg_iterationnext.description.nodeid = pi->nodeid;
res_lib_cpg_iterationnext.description.pid = pi->pid;
memcpy (&res_lib_cpg_iterationnext.description.group,
&pi->group,
sizeof (mar_cpg_name_t));
error_put:
hdb_handle_put (&cpg_iteration_handle_t_db, req_lib_cpg_iterationnext->iteration_handle);
error_exit:
res_lib_cpg_iterationnext.header.size = sizeof (res_lib_cpg_iterationnext);
res_lib_cpg_iterationnext.header.id = MESSAGE_RES_CPG_ITERATIONNEXT;
res_lib_cpg_iterationnext.header.error = error;
api->ipc_response_send (conn, &res_lib_cpg_iterationnext,
sizeof (res_lib_cpg_iterationnext));
}
static void message_handler_req_lib_cpg_iteration_finalize (
void *conn,
const void *message)
{
const struct req_lib_cpg_iterationfinalize *req_lib_cpg_iterationfinalize = message;
struct res_lib_cpg_iterationfinalize res_lib_cpg_iterationfinalize;
struct cpg_iteration_instance *cpg_iteration_instance;
cs_error_t error = CS_OK;
int res;
log_printf (LOGSYS_LEVEL_DEBUG, "cpg iteration finalize");
res = hdb_handle_get (&cpg_iteration_handle_t_db,
req_lib_cpg_iterationfinalize->iteration_handle,
(void *)&cpg_iteration_instance);
if (res != 0) {
error = CS_ERR_LIBRARY;
goto error_exit;
}
assert (cpg_iteration_instance);
cpg_iteration_instance_finalize (cpg_iteration_instance);
hdb_handle_put (&cpg_iteration_handle_t_db, cpg_iteration_instance->handle);
error_exit:
res_lib_cpg_iterationfinalize.header.size = sizeof (res_lib_cpg_iterationfinalize);
res_lib_cpg_iterationfinalize.header.id = MESSAGE_RES_CPG_ITERATIONFINALIZE;
res_lib_cpg_iterationfinalize.header.error = error;
api->ipc_response_send (conn, &res_lib_cpg_iterationfinalize,
sizeof (res_lib_cpg_iterationfinalize));
}
diff --git a/exec/main.c b/exec/main.c
index feb83320..6cbfef62 100644
--- a/exec/main.c
+++ b/exec/main.c
@@ -1,1248 +1,1249 @@
/*
* Copyright (c) 2002-2006 MontaVista Software, Inc.
* Copyright (c) 2006-2012 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Steven Dake (sdake@redhat.com)
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
/**
* \mainpage Corosync
*
* This is the doxygen generated developer documentation for the Corosync
* project. For more information about Corosync, please see the project
* web site, <a href="http://www.corosync.org">corosync.org</a>.
*
* \section license License
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <config.h>
#include <pthread.h>
#include <assert.h>
#include <sys/types.h>
#include <sys/file.h>
#include <sys/poll.h>
#include <sys/uio.h>
#include <sys/mman.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <sys/stat.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <unistd.h>
#include <fcntl.h>
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
#include <signal.h>
#include <sched.h>
#include <time.h>
#include <semaphore.h>
#include <qb/qbdefs.h>
#include <qb/qblog.h>
#include <qb/qbloop.h>
#include <qb/qbutil.h>
#include <qb/qbipcs.h>
#include <corosync/swab.h>
#include <corosync/corotypes.h>
#include <corosync/corodefs.h>
#include <corosync/list.h>
#include <corosync/totem/totempg.h>
#include <corosync/logsys.h>
#include <corosync/icmap.h>
#include "quorum.h"
#include "totemsrp.h"
#include "logconfig.h"
#include "totemconfig.h"
#include "main.h"
#include "sync.h"
#include "timer.h"
#include "util.h"
#include "apidef.h"
#include "service.h"
#include "schedwrk.h"
#ifdef HAVE_SMALL_MEMORY_FOOTPRINT
#define IPC_LOGSYS_SIZE 1024*64
#else
#define IPC_LOGSYS_SIZE 8192*128
#endif
LOGSYS_DECLARE_SYSTEM ("corosync",
LOGSYS_MODE_OUTPUT_STDERR,
LOG_DAEMON,
LOG_INFO);
LOGSYS_DECLARE_SUBSYS ("MAIN");
#define SERVER_BACKLOG 5
static int sched_priority = 0;
static unsigned int service_count = 32;
static struct totem_logging_configuration totem_logging_configuration;
static struct corosync_api_v1 *api = NULL;
static int sync_in_process = 1;
static qb_loop_t *corosync_poll_handle;
struct sched_param global_sched_param;
static corosync_timer_handle_t corosync_stats_timer_handle;
static const char *corosync_lock_file = LOCALSTATEDIR"/run/corosync.pid";
qb_loop_t *cs_poll_handle_get (void)
{
return (corosync_poll_handle);
}
int cs_poll_dispatch_add (qb_loop_t * handle,
int fd,
int events,
void *data,
int (*dispatch_fn) (int fd,
int revents,
void *data))
{
return qb_loop_poll_add(handle, QB_LOOP_MED, fd, events, data,
dispatch_fn);
}
int cs_poll_dispatch_delete(qb_loop_t * handle, int fd)
{
return qb_loop_poll_del(handle, fd);
}
void corosync_state_dump (void)
{
int i;
for (i = 0; i < SERVICES_COUNT_MAX; i++) {
if (corosync_service[i] && corosync_service[i]->exec_dump_fn) {
corosync_service[i]->exec_dump_fn ();
}
}
}
static void corosync_blackbox_write_to_file (void)
{
char fname[PATH_MAX];
char time_str[PATH_MAX];
struct tm cur_time_tm;
time_t cur_time_t;
cur_time_t = time(NULL);
localtime_r(&cur_time_t, &cur_time_tm);
strftime(time_str, PATH_MAX, "%Y-%m-%dT%H:%M:%S", &cur_time_tm);
snprintf(fname, PATH_MAX, "%s/fdata-%s-%lld",
LOCALSTATEDIR "/lib/corosync",
time_str,
(long long int)getpid());
qb_log_blackbox_write_to_file(fname);
unlink(LOCALSTATEDIR "/lib/corosync/fdata");
symlink(fname, LOCALSTATEDIR "/lib/corosync/fdata");
}
static void unlink_all_completed (void)
{
api->timer_delete (corosync_stats_timer_handle);
qb_loop_stop (corosync_poll_handle);
icmap_fini();
}
void corosync_shutdown_request (void)
{
corosync_service_unlink_all (api, unlink_all_completed);
}
static int32_t sig_diag_handler (int num, void *data)
{
corosync_state_dump ();
return 0;
}
static int32_t sig_exit_handler (int num, void *data)
{
corosync_service_unlink_all (api, unlink_all_completed);
return 0;
}
static void sigsegv_handler (int num)
{
(void)signal (SIGSEGV, SIG_DFL);
corosync_blackbox_write_to_file ();
qb_log_fini();
raise (SIGSEGV);
}
static void sigabrt_handler (int num)
{
(void)signal (SIGABRT, SIG_DFL);
corosync_blackbox_write_to_file ();
qb_log_fini();
raise (SIGABRT);
}
#define LOCALHOST_IP inet_addr("127.0.0.1")
static void *corosync_group_handle;
static struct totempg_group corosync_group = {
.group = "a",
.group_len = 1
};
static void serialize_lock (void)
{
}
static void serialize_unlock (void)
{
}
static void corosync_sync_completed (void)
{
log_printf (LOGSYS_LEVEL_NOTICE,
"Completed service synchronization, ready to provide service.");
sync_in_process = 0;
cs_ipcs_sync_state_changed(sync_in_process);
cs_ipc_allow_connections(1);
}
static int corosync_sync_callbacks_retrieve (
int service_id,
struct sync_callbacks *callbacks)
{
if (corosync_service[service_id] == NULL) {
return (-1);
}
if (callbacks == NULL) {
return (0);
}
callbacks->name = corosync_service[service_id]->name;
callbacks->sync_init = corosync_service[service_id]->sync_init;
callbacks->sync_process = corosync_service[service_id]->sync_process;
callbacks->sync_activate = corosync_service[service_id]->sync_activate;
callbacks->sync_abort = corosync_service[service_id]->sync_abort;
return (0);
}
static struct memb_ring_id corosync_ring_id;
static void member_object_joined (unsigned int nodeid)
{
char member_ip[ICMAP_KEYNAME_MAXLEN];
char member_join_count[ICMAP_KEYNAME_MAXLEN];
char member_status[ICMAP_KEYNAME_MAXLEN];
snprintf(member_ip, ICMAP_KEYNAME_MAXLEN,
"runtime.totem.pg.mrp.srp.members.%u.ip", nodeid);
snprintf(member_join_count, ICMAP_KEYNAME_MAXLEN,
"runtime.totem.pg.mrp.srp.members.%u.join_count", nodeid);
snprintf(member_status, ICMAP_KEYNAME_MAXLEN,
"runtime.totem.pg.mrp.srp.members.%u.status", nodeid);
if (icmap_get(member_ip, NULL, NULL, NULL) == CS_OK) {
icmap_inc(member_join_count);
icmap_set_string(member_status, "joined");
} else {
icmap_set_string(member_ip, (char*)api->totem_ifaces_print (nodeid));
icmap_set_uint32(member_join_count, 1);
icmap_set_string(member_status, "joined");
}
log_printf (LOGSYS_LEVEL_DEBUG,
"Member joined: %s", api->totem_ifaces_print (nodeid));
}
static void member_object_left (unsigned int nodeid)
{
char member_status[ICMAP_KEYNAME_MAXLEN];
snprintf(member_status, ICMAP_KEYNAME_MAXLEN,
"runtime.totem.pg.mrp.srp.members.%u.status", nodeid);
icmap_set_string(member_status, "left");
log_printf (LOGSYS_LEVEL_DEBUG,
"Member left: %s", api->totem_ifaces_print (nodeid));
}
static void confchg_fn (
enum totem_configuration_type configuration_type,
const unsigned int *member_list, size_t member_list_entries,
const unsigned int *left_list, size_t left_list_entries,
const unsigned int *joined_list, size_t joined_list_entries,
const struct memb_ring_id *ring_id)
{
int i;
int abort_activate = 0;
if (sync_in_process == 1) {
abort_activate = 1;
}
sync_in_process = 1;
cs_ipcs_sync_state_changed(sync_in_process);
memcpy (&corosync_ring_id, ring_id, sizeof (struct memb_ring_id));
for (i = 0; i < left_list_entries; i++) {
member_object_left (left_list[i]);
}
for (i = 0; i < joined_list_entries; i++) {
member_object_joined (joined_list[i]);
}
/*
* Call configuration change for all services
*/
for (i = 0; i < service_count; i++) {
if (corosync_service[i] && corosync_service[i]->confchg_fn) {
corosync_service[i]->confchg_fn (configuration_type,
member_list, member_list_entries,
left_list, left_list_entries,
joined_list, joined_list_entries, ring_id);
}
}
if (abort_activate) {
sync_abort ();
}
if (configuration_type == TOTEM_CONFIGURATION_TRANSITIONAL) {
sync_save_transitional (member_list, member_list_entries, ring_id);
}
if (configuration_type == TOTEM_CONFIGURATION_REGULAR) {
sync_start (member_list, member_list_entries, ring_id);
}
}
static void priv_drop (void)
{
return; /* TODO: we are still not dropping privs */
}
static void corosync_tty_detach (void)
{
FILE *r;
/*
* Disconnect from TTY if this is not a debug run
*/
switch (fork ()) {
case -1:
corosync_exit_error (COROSYNC_DONE_FORK);
break;
case 0:
/*
* child which is disconnected, run this process
*/
break;
default:
exit (0);
break;
}
/* Create new session */
(void)setsid();
/*
* Map stdin/out/err to /dev/null.
*/
r = freopen("/dev/null", "r", stdin);
if (r == NULL) {
corosync_exit_error (COROSYNC_DONE_STD_TO_NULL_REDIR);
}
r = freopen("/dev/null", "a", stderr);
if (r == NULL) {
corosync_exit_error (COROSYNC_DONE_STD_TO_NULL_REDIR);
}
r = freopen("/dev/null", "a", stdout);
if (r == NULL) {
corosync_exit_error (COROSYNC_DONE_STD_TO_NULL_REDIR);
}
}
static void corosync_mlockall (void)
{
int res;
struct rlimit rlimit;
rlimit.rlim_cur = RLIM_INFINITY;
rlimit.rlim_max = RLIM_INFINITY;
-#ifndef COROSYNC_SOLARIS
- setrlimit (RLIMIT_MEMLOCK, &rlimit);
-#else
- setrlimit (RLIMIT_VMEM, &rlimit);
+
+#ifndef RLIMIT_MEMLOCK
+#define RLIMIT_MEMLOCK RLIMIT_VMEM
#endif
+ setrlimit (RLIMIT_MEMLOCK, &rlimit);
+
res = mlockall (MCL_CURRENT | MCL_FUTURE);
if (res == -1) {
LOGSYS_PERROR (errno, LOGSYS_LEVEL_WARNING,
"Could not lock memory of service to avoid page faults");
};
}
static void corosync_totem_stats_updater (void *data)
{
totempg_stats_t * stats;
uint32_t total_mtt_rx_token;
uint32_t total_backlog_calc;
uint32_t total_token_holdtime;
int t, prev, i;
int32_t token_count;
char key_name[ICMAP_KEYNAME_MAXLEN];
stats = api->totem_get_stats();
icmap_set_uint32("runtime.totem.pg.msg_reserved", stats->msg_reserved);
icmap_set_uint32("runtime.totem.pg.msg_queue_avail", stats->msg_queue_avail);
icmap_set_uint64("runtime.totem.pg.mrp.srp.orf_token_tx", stats->mrp->srp->orf_token_tx);
icmap_set_uint64("runtime.totem.pg.mrp.srp.orf_token_rx", stats->mrp->srp->orf_token_rx);
icmap_set_uint64("runtime.totem.pg.mrp.srp.memb_merge_detect_tx", stats->mrp->srp->memb_merge_detect_tx);
icmap_set_uint64("runtime.totem.pg.mrp.srp.memb_merge_detect_rx", stats->mrp->srp->memb_merge_detect_rx);
icmap_set_uint64("runtime.totem.pg.mrp.srp.memb_join_tx", stats->mrp->srp->memb_join_tx);
icmap_set_uint64("runtime.totem.pg.mrp.srp.memb_join_rx", stats->mrp->srp->memb_join_rx);
icmap_set_uint64("runtime.totem.pg.mrp.srp.mcast_tx", stats->mrp->srp->mcast_tx);
icmap_set_uint64("runtime.totem.pg.mrp.srp.mcast_retx", stats->mrp->srp->mcast_retx);
icmap_set_uint64("runtime.totem.pg.mrp.srp.mcast_rx", stats->mrp->srp->mcast_rx);
icmap_set_uint64("runtime.totem.pg.mrp.srp.memb_commit_token_tx", stats->mrp->srp->memb_commit_token_tx);
icmap_set_uint64("runtime.totem.pg.mrp.srp.memb_commit_token_rx", stats->mrp->srp->memb_commit_token_rx);
icmap_set_uint64("runtime.totem.pg.mrp.srp.token_hold_cancel_tx", stats->mrp->srp->token_hold_cancel_tx);
icmap_set_uint64("runtime.totem.pg.mrp.srp.token_hold_cancel_rx", stats->mrp->srp->token_hold_cancel_rx);
icmap_set_uint64("runtime.totem.pg.mrp.srp.operational_entered", stats->mrp->srp->operational_entered);
icmap_set_uint64("runtime.totem.pg.mrp.srp.operational_token_lost", stats->mrp->srp->operational_token_lost);
icmap_set_uint64("runtime.totem.pg.mrp.srp.gather_entered", stats->mrp->srp->gather_entered);
icmap_set_uint64("runtime.totem.pg.mrp.srp.gather_token_lost", stats->mrp->srp->gather_token_lost);
icmap_set_uint64("runtime.totem.pg.mrp.srp.commit_entered", stats->mrp->srp->commit_entered);
icmap_set_uint64("runtime.totem.pg.mrp.srp.commit_token_lost", stats->mrp->srp->commit_token_lost);
icmap_set_uint64("runtime.totem.pg.mrp.srp.recovery_entered", stats->mrp->srp->recovery_entered);
icmap_set_uint64("runtime.totem.pg.mrp.srp.recovery_token_lost", stats->mrp->srp->recovery_token_lost);
icmap_set_uint64("runtime.totem.pg.mrp.srp.consensus_timeouts", stats->mrp->srp->consensus_timeouts);
icmap_set_uint64("runtime.totem.pg.mrp.srp.rx_msg_dropped", stats->mrp->srp->rx_msg_dropped);
icmap_set_uint32("runtime.totem.pg.mrp.srp.continuous_gather", stats->mrp->srp->continuous_gather);
icmap_set_uint8("runtime.totem.pg.mrp.srp.firewall_enabled_or_nic_failure",
stats->mrp->srp->continuous_gather > MAX_NO_CONT_GATHER ? 1 : 0);
for (i = 0; i < stats->mrp->srp->rrp->interface_count; i++) {
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "runtime.totem.pg.mrp.rrp.%u.faulty", i);
icmap_set_uint8(key_name, stats->mrp->srp->rrp->faulty[i]);
}
total_mtt_rx_token = 0;
total_token_holdtime = 0;
total_backlog_calc = 0;
token_count = 0;
t = stats->mrp->srp->latest_token;
while (1) {
if (t == 0)
prev = TOTEM_TOKEN_STATS_MAX - 1;
else
prev = t - 1;
if (prev == stats->mrp->srp->earliest_token)
break;
/* if tx == 0, then dropped token (not ours) */
if (stats->mrp->srp->token[t].tx != 0 ||
(stats->mrp->srp->token[t].rx - stats->mrp->srp->token[prev].rx) > 0 ) {
total_mtt_rx_token += (stats->mrp->srp->token[t].rx - stats->mrp->srp->token[prev].rx);
total_token_holdtime += (stats->mrp->srp->token[t].tx - stats->mrp->srp->token[t].rx);
total_backlog_calc += stats->mrp->srp->token[t].backlog_calc;
token_count++;
}
t = prev;
}
if (token_count) {
icmap_set_uint32("runtime.totem.pg.mrp.srp.mtt_rx_token", (total_mtt_rx_token / token_count));
icmap_set_uint32("runtime.totem.pg.mrp.srp.avg_token_workload", (total_token_holdtime / token_count));
icmap_set_uint32("runtime.totem.pg.mrp.srp.avg_backlog_calc", (total_backlog_calc / token_count));
}
cs_ipcs_stats_update();
api->timer_add_duration (1500 * MILLI_2_NANO_SECONDS, NULL,
corosync_totem_stats_updater,
&corosync_stats_timer_handle);
}
static void totem_dynamic_notify(
int32_t event,
const char *key_name,
struct icmap_notify_value new_val,
struct icmap_notify_value old_val,
void *user_data)
{
int res;
int ring_no;
int member_no;
struct totem_ip_address member;
int add_new_member = 0;
int remove_old_member = 0;
char tmp_str[ICMAP_KEYNAME_MAXLEN];
res = sscanf(key_name, "nodelist.node.%u.ring%u%s", &member_no, &ring_no, tmp_str);
if (res != 3)
return ;
if (strcmp(tmp_str, "_addr") != 0) {
return;
}
if (event == ICMAP_TRACK_ADD && new_val.type == ICMAP_VALUETYPE_STRING) {
add_new_member = 1;
}
if (event == ICMAP_TRACK_DELETE && old_val.type == ICMAP_VALUETYPE_STRING) {
remove_old_member = 1;
}
if (event == ICMAP_TRACK_MODIFY && new_val.type == ICMAP_VALUETYPE_STRING &&
old_val.type == ICMAP_VALUETYPE_STRING) {
add_new_member = 1;
remove_old_member = 1;
}
if (remove_old_member) {
log_printf(LOGSYS_LEVEL_DEBUG,
"removing dynamic member %s for ring %u", (char *)old_val.data, ring_no);
if (totemip_parse(&member, (char *)old_val.data, 0) == 0) {
totempg_member_remove (&member, ring_no);
}
}
if (add_new_member) {
log_printf(LOGSYS_LEVEL_DEBUG,
"adding dynamic member %s for ring %u", (char *)new_val.data, ring_no);
if (totemip_parse(&member, (char *)new_val.data, 0) == 0) {
totempg_member_add (&member, ring_no);
}
}
}
static void corosync_totem_dynamic_init (void)
{
icmap_track_t icmap_track = NULL;
icmap_track_add("nodelist.node.",
ICMAP_TRACK_ADD | ICMAP_TRACK_DELETE | ICMAP_TRACK_MODIFY | ICMAP_TRACK_PREFIX,
totem_dynamic_notify,
NULL,
&icmap_track);
}
static void corosync_totem_stats_init (void)
{
icmap_set_uint32("runtime.totem.pg.mrp.srp.mtt_rx_token", 0);
icmap_set_uint32("runtime.totem.pg.mrp.srp.avg_token_workload", 0);
icmap_set_uint32("runtime.totem.pg.mrp.srp.avg_backlog_calc", 0);
/* start stats timer */
api->timer_add_duration (1500 * MILLI_2_NANO_SECONDS, NULL,
corosync_totem_stats_updater,
&corosync_stats_timer_handle);
}
static void deliver_fn (
unsigned int nodeid,
const void *msg,
unsigned int msg_len,
int endian_conversion_required)
{
const struct qb_ipc_request_header *header;
int32_t service;
int32_t fn_id;
uint32_t id;
header = msg;
if (endian_conversion_required) {
id = swab32 (header->id);
} else {
id = header->id;
}
/*
* Call the proper executive handler
*/
service = id >> 16;
fn_id = id & 0xffff;
if (!corosync_service[service]) {
return;
}
if (fn_id >= corosync_service[service]->exec_engine_count) {
log_printf(LOGSYS_LEVEL_WARNING, "discarded unknown message %d for service %d (max id %d)",
fn_id, service, corosync_service[service]->exec_engine_count);
return;
}
icmap_fast_inc(service_stats_rx[service][fn_id]);
if (endian_conversion_required) {
assert(corosync_service[service]->exec_engine[fn_id].exec_endian_convert_fn != NULL);
corosync_service[service]->exec_engine[fn_id].exec_endian_convert_fn
((void *)msg);
}
corosync_service[service]->exec_engine[fn_id].exec_handler_fn
(msg, nodeid);
}
int main_mcast (
const struct iovec *iovec,
unsigned int iov_len,
unsigned int guarantee)
{
const struct qb_ipc_request_header *req = iovec->iov_base;
int32_t service;
int32_t fn_id;
service = req->id >> 16;
fn_id = req->id & 0xffff;
if (corosync_service[service]) {
icmap_fast_inc(service_stats_tx[service][fn_id]);
}
return (totempg_groups_mcast_joined (corosync_group_handle, iovec, iov_len, guarantee));
}
static qb_loop_timer_handle recheck_the_q_level_timer;
void corosync_recheck_the_q_level(void *data)
{
totempg_check_q_level(corosync_group_handle);
if (cs_ipcs_q_level_get() == TOTEM_Q_LEVEL_CRITICAL) {
qb_loop_timer_add(cs_poll_handle_get(), QB_LOOP_MED, 1*QB_TIME_NS_IN_MSEC,
NULL, corosync_recheck_the_q_level, &recheck_the_q_level_timer);
}
}
struct sending_allowed_private_data_struct {
int reserved_msgs;
};
int corosync_sending_allowed (
unsigned int service,
unsigned int id,
const void *msg,
void *sending_allowed_private_data)
{
struct sending_allowed_private_data_struct *pd =
(struct sending_allowed_private_data_struct *)sending_allowed_private_data;
struct iovec reserve_iovec;
struct qb_ipc_request_header *header = (struct qb_ipc_request_header *)msg;
int sending_allowed;
reserve_iovec.iov_base = (char *)header;
reserve_iovec.iov_len = header->size;
pd->reserved_msgs = totempg_groups_joined_reserve (
corosync_group_handle,
&reserve_iovec, 1);
if (pd->reserved_msgs == -1) {
return -EINVAL;
}
sending_allowed = QB_FALSE;
if (corosync_quorum_is_quorate() == 1 ||
corosync_service[service]->allow_inquorate == CS_LIB_ALLOW_INQUORATE) {
// we are quorate
// now check flow control
if (corosync_service[service]->lib_engine[id].flow_control == CS_LIB_FLOW_CONTROL_NOT_REQUIRED) {
sending_allowed = QB_TRUE;
} else if (pd->reserved_msgs && sync_in_process == 0) {
sending_allowed = QB_TRUE;
} else if (pd->reserved_msgs == 0) {
return -ENOBUFS;
} else /* (sync_in_process) */ {
return -EINPROGRESS;
}
} else {
return -EHOSTUNREACH;
}
return (sending_allowed);
}
void corosync_sending_allowed_release (void *sending_allowed_private_data)
{
struct sending_allowed_private_data_struct *pd =
(struct sending_allowed_private_data_struct *)sending_allowed_private_data;
if (pd->reserved_msgs == -1) {
return;
}
totempg_groups_joined_release (pd->reserved_msgs);
}
int message_source_is_local (const mar_message_source_t *source)
{
int ret = 0;
assert (source != NULL);
if (source->nodeid == totempg_my_nodeid_get ()) {
ret = 1;
}
return ret;
}
void message_source_set (
mar_message_source_t *source,
void *conn)
{
assert ((source != NULL) && (conn != NULL));
memset (source, 0, sizeof (mar_message_source_t));
source->nodeid = totempg_my_nodeid_get ();
source->conn = conn;
}
static void corosync_setscheduler (void)
{
#if defined(HAVE_PTHREAD_SETSCHEDPARAM) && defined(HAVE_SCHED_GET_PRIORITY_MAX) && defined(HAVE_SCHED_SETSCHEDULER)
int res;
sched_priority = sched_get_priority_max (SCHED_RR);
if (sched_priority != -1) {
global_sched_param.sched_priority = sched_priority;
res = sched_setscheduler (0, SCHED_RR, &global_sched_param);
if (res == -1) {
LOGSYS_PERROR(errno, LOGSYS_LEVEL_WARNING,
"Could not set SCHED_RR at priority %d",
global_sched_param.sched_priority);
global_sched_param.sched_priority = 0;
#ifdef HAVE_QB_LOG_THREAD_PRIORITY_SET
qb_log_thread_priority_set (SCHED_OTHER, 0);
#endif
} else {
/*
* Turn on SCHED_RR in logsys system
*/
#ifdef HAVE_QB_LOG_THREAD_PRIORITY_SET
res = qb_log_thread_priority_set (SCHED_RR, sched_priority);
#else
res = -1;
#endif
if (res == -1) {
log_printf (LOGSYS_LEVEL_ERROR,
"Could not set logsys thread priority."
" Can't continue because of priority inversions.");
corosync_exit_error (COROSYNC_DONE_LOGSETUP);
}
}
} else {
LOGSYS_PERROR (errno, LOGSYS_LEVEL_WARNING,
"Could not get maximum scheduler priority");
sched_priority = 0;
}
#else
log_printf(LOGSYS_LEVEL_WARNING,
"The Platform is missing process priority setting features. Leaving at default.");
#endif
}
static void
_logsys_log_printf(int level, int subsys,
const char *function_name,
const char *file_name,
int file_line,
const char *format,
...) __attribute__((format(printf, 6, 7)));
static void
_logsys_log_printf(int level, int subsys,
const char *function_name,
const char *file_name,
int file_line,
const char *format, ...)
{
va_list ap;
va_start(ap, format);
qb_log_from_external_source_va(function_name, file_name,
format, level, file_line,
subsys, ap);
va_end(ap);
}
static void fplay_key_change_notify_fn (
int32_t event,
const char *key_name,
struct icmap_notify_value new_val,
struct icmap_notify_value old_val,
void *user_data)
{
if (strcmp(key_name, "runtime.blackbox.dump_flight_data") == 0) {
fprintf(stderr,"Writetofile\n");
corosync_blackbox_write_to_file ();
}
if (strcmp(key_name, "runtime.blackbox.dump_state") == 0) {
fprintf(stderr,"statefump\n");
corosync_state_dump ();
}
}
static void corosync_fplay_control_init (void)
{
icmap_track_t track = NULL;
icmap_set_string("runtime.blackbox.dump_flight_data", "no");
icmap_set_string("runtime.blackbox.dump_state", "no");
icmap_track_add("runtime.blackbox.dump_flight_data",
ICMAP_TRACK_ADD | ICMAP_TRACK_DELETE | ICMAP_TRACK_MODIFY,
fplay_key_change_notify_fn,
NULL, &track);
icmap_track_add("runtime.blackbox.dump_state",
ICMAP_TRACK_ADD | ICMAP_TRACK_DELETE | ICMAP_TRACK_MODIFY,
fplay_key_change_notify_fn,
NULL, &track);
}
/*
* Set RO flag for keys, which ether doesn't make sense to change by user (statistic)
* or which when changed are not reflected by runtime (totem.crypto_cipher, ...).
*
* Also some RO keys cannot be determined in this stage, so they are set later in
* other functions (like nodelist.local_node_pos, ...)
*/
static void set_icmap_ro_keys_flag (void)
{
/*
* Set RO flag for all keys of internal configuration and runtime statistics
*/
icmap_set_ro_access("internal_configuration.", CS_TRUE, CS_TRUE);
icmap_set_ro_access("runtime.connections.", CS_TRUE, CS_TRUE);
icmap_set_ro_access("runtime.totem.", CS_TRUE, CS_TRUE);
icmap_set_ro_access("runtime.services.", CS_TRUE, CS_TRUE);
/*
* Set RO flag for constrete keys of configuration which can't be changed
* during runtime
*/
icmap_set_ro_access("totem.crypto_cipher", CS_FALSE, CS_TRUE);
icmap_set_ro_access("totem.crypto_hash", CS_FALSE, CS_TRUE);
icmap_set_ro_access("totem.secauth", CS_FALSE, CS_TRUE);
icmap_set_ro_access("totem.rrp_mode", CS_FALSE, CS_TRUE);
icmap_set_ro_access("totem.netmtu", CS_FALSE, CS_TRUE);
}
static void main_service_ready (void)
{
int res;
/*
* This must occur after totempg is initialized because "this_ip" must be set
*/
res = corosync_service_defaults_link_and_init (api);
if (res == -1) {
log_printf (LOGSYS_LEVEL_ERROR, "Could not initialize default services");
corosync_exit_error (COROSYNC_DONE_INIT_SERVICES);
}
cs_ipcs_init();
corosync_totem_stats_init ();
corosync_fplay_control_init ();
corosync_totem_dynamic_init ();
sync_init (
corosync_sync_callbacks_retrieve,
corosync_sync_completed);
}
static enum e_corosync_done corosync_flock (const char *lockfile, pid_t pid)
{
struct flock lock;
enum e_corosync_done err;
char pid_s[17];
int fd_flag;
int lf;
err = COROSYNC_DONE_EXIT;
lf = open (lockfile, O_WRONLY | O_CREAT, 0640);
if (lf == -1) {
log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't create lock file.");
return (COROSYNC_DONE_AQUIRE_LOCK);
}
retry_fcntl:
lock.l_type = F_WRLCK;
lock.l_start = 0;
lock.l_whence = SEEK_SET;
lock.l_len = 0;
if (fcntl (lf, F_SETLK, &lock) == -1) {
switch (errno) {
case EINTR:
goto retry_fcntl;
break;
case EAGAIN:
case EACCES:
log_printf (LOGSYS_LEVEL_ERROR, "Another Corosync instance is already running.");
err = COROSYNC_DONE_ALREADY_RUNNING;
goto error_close;
break;
default:
log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't aquire lock. Error was %s",
strerror(errno));
err = COROSYNC_DONE_AQUIRE_LOCK;
goto error_close;
break;
}
}
if (ftruncate (lf, 0) == -1) {
log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't truncate lock file. Error was %s",
strerror (errno));
err = COROSYNC_DONE_AQUIRE_LOCK;
goto error_close_unlink;
}
memset (pid_s, 0, sizeof (pid_s));
snprintf (pid_s, sizeof (pid_s) - 1, "%u\n", pid);
retry_write:
if (write (lf, pid_s, strlen (pid_s)) != strlen (pid_s)) {
if (errno == EINTR) {
goto retry_write;
} else {
log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't write pid to lock file. "
"Error was %s", strerror (errno));
err = COROSYNC_DONE_AQUIRE_LOCK;
goto error_close_unlink;
}
}
if ((fd_flag = fcntl (lf, F_GETFD, 0)) == -1) {
log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't get close-on-exec flag from lock file. "
"Error was %s", strerror (errno));
err = COROSYNC_DONE_AQUIRE_LOCK;
goto error_close_unlink;
}
fd_flag |= FD_CLOEXEC;
if (fcntl (lf, F_SETFD, fd_flag) == -1) {
log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't set close-on-exec flag to lock file. "
"Error was %s", strerror (errno));
err = COROSYNC_DONE_AQUIRE_LOCK;
goto error_close_unlink;
}
return (err);
error_close_unlink:
unlink (lockfile);
error_close:
close (lf);
return (err);
}
int main (int argc, char **argv, char **envp)
{
const char *error_string;
struct totem_config totem_config;
int res, ch;
int background, setprio;
struct stat stat_out;
char corosync_lib_dir[PATH_MAX];
enum e_corosync_done flock_err;
uint64_t totem_config_warnings;
/* default configuration
*/
background = 1;
setprio = 0;
while ((ch = getopt (argc, argv, "fprv")) != EOF) {
switch (ch) {
case 'f':
background = 0;
logsys_config_mode_set (NULL, LOGSYS_MODE_OUTPUT_STDERR|LOGSYS_MODE_THREADED|LOGSYS_MODE_FORK);
break;
case 'p':
break;
case 'r':
setprio = 1;
break;
case 'v':
printf ("Corosync Cluster Engine, version '%s'\n", VERSION);
printf ("Copyright (c) 2006-2009 Red Hat, Inc.\n");
return EXIT_SUCCESS;
break;
default:
fprintf(stderr, \
"usage:\n"\
" -f : Start application in foreground.\n"\
" -p : Does nothing. \n"\
" -r : Set round robin realtime scheduling \n"\
" -v : Display version and SVN revision of Corosync and exit.\n");
return EXIT_FAILURE;
}
}
/*
* Set round robin realtime scheduling with priority 99
* Lock all memory to avoid page faults which may interrupt
* application healthchecking
*/
if (setprio) {
corosync_setscheduler ();
}
corosync_mlockall ();
log_printf (LOGSYS_LEVEL_NOTICE, "Corosync Cluster Engine ('%s'): started and ready to provide service.", VERSION);
log_printf (LOGSYS_LEVEL_INFO, "Corosync built-in features:" PACKAGE_FEATURES "");
corosync_poll_handle = qb_loop_create ();
qb_loop_signal_add(corosync_poll_handle, QB_LOOP_LOW,
SIGUSR2, NULL, sig_diag_handler, NULL);
qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH,
SIGINT, NULL, sig_exit_handler, NULL);
qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH,
SIGQUIT, NULL, sig_exit_handler, NULL);
qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH,
SIGTERM, NULL, sig_exit_handler, NULL);
(void)signal (SIGSEGV, sigsegv_handler);
(void)signal (SIGABRT, sigabrt_handler);
#if MSG_NOSIGNAL != 0
(void)signal (SIGPIPE, SIG_IGN);
#endif
if (icmap_init() != CS_OK) {
log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't initialize configuration component.");
corosync_exit_error (COROSYNC_DONE_ICMAP);
}
set_icmap_ro_keys_flag();
/*
* Initialize the corosync_api_v1 definition
*/
api = apidef_get ();
res = coroparse_configparse(&error_string);
if (res == -1) {
log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string);
corosync_exit_error (COROSYNC_DONE_MAINCONFIGREAD);
}
res = corosync_log_config_read (&error_string);
if (res == -1) {
/*
* if we are here, we _must_ flush the logsys queue
* and try to inform that we couldn't read the config.
* this is a desperate attempt before certain death
* and there is no guarantee that we can print to stderr
* nor that logsys is sending the messages where we expect.
*/
log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string);
fprintf(stderr, "%s", error_string);
syslog (LOGSYS_LEVEL_ERROR, "%s", error_string);
corosync_exit_error (COROSYNC_DONE_LOGCONFIGREAD);
}
/*
* Make sure required directory is present
*/
sprintf (corosync_lib_dir, "%s/lib/corosync", LOCALSTATEDIR);
res = stat (corosync_lib_dir, &stat_out);
if ((res == -1) || (res == 0 && !S_ISDIR(stat_out.st_mode))) {
log_printf (LOGSYS_LEVEL_ERROR, "Required directory not present %s. Please create it.", corosync_lib_dir);
corosync_exit_error (COROSYNC_DONE_DIR_NOT_PRESENT);
}
res = totem_config_read (&totem_config, &error_string, &totem_config_warnings);
if (res == -1) {
log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string);
corosync_exit_error (COROSYNC_DONE_MAINCONFIGREAD);
}
if (totem_config_warnings & TOTEM_CONFIG_WARNING_MEMBERS_IGNORED) {
log_printf (LOGSYS_LEVEL_WARNING, "member section is used together with nodelist. Members ignored.");
}
if (totem_config_warnings & TOTEM_CONFIG_WARNING_MEMBERS_DEPRECATED) {
log_printf (LOGSYS_LEVEL_WARNING, "member section is deprecated.");
}
if (totem_config_warnings & TOTEM_CONFIG_WARNING_TOTEM_NODEID_IGNORED) {
log_printf (LOGSYS_LEVEL_WARNING, "nodeid appears both in totem section and nodelist. Nodelist one is used.");
}
if (totem_config_warnings != 0) {
log_printf (LOGSYS_LEVEL_WARNING, "Please migrate config file to nodelist.");
}
res = totem_config_keyread (&totem_config, &error_string);
if (res == -1) {
log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string);
corosync_exit_error (COROSYNC_DONE_MAINCONFIGREAD);
}
res = totem_config_validate (&totem_config, &error_string);
if (res == -1) {
log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string);
corosync_exit_error (COROSYNC_DONE_MAINCONFIGREAD);
}
totem_config.totem_logging_configuration = totem_logging_configuration;
totem_config.totem_logging_configuration.log_subsys_id = _logsys_subsys_create("TOTEM", "totem");
totem_config.totem_logging_configuration.log_level_security = LOGSYS_LEVEL_WARNING;
totem_config.totem_logging_configuration.log_level_error = LOGSYS_LEVEL_ERROR;
totem_config.totem_logging_configuration.log_level_warning = LOGSYS_LEVEL_WARNING;
totem_config.totem_logging_configuration.log_level_notice = LOGSYS_LEVEL_NOTICE;
totem_config.totem_logging_configuration.log_level_debug = LOGSYS_LEVEL_DEBUG;
totem_config.totem_logging_configuration.log_printf = _logsys_log_printf;
logsys_config_apply();
/*
* Now we are fully initialized.
*/
if (background) {
corosync_tty_detach ();
}
qb_log_thread_start();
if ((flock_err = corosync_flock (corosync_lock_file, getpid ())) != COROSYNC_DONE_EXIT) {
corosync_exit_error (flock_err);
}
/*
* if totempg_initialize doesn't have root priveleges, it cannot
* bind to a specific interface. This only matters if
* there is more then one interface in a system, so
* in this case, only a warning is printed
*/
/*
* Join multicast group and setup delivery
* and configuration change functions
*/
totempg_initialize (
corosync_poll_handle,
&totem_config);
totempg_service_ready_register (
main_service_ready);
totempg_groups_initialize (
&corosync_group_handle,
deliver_fn,
confchg_fn);
totempg_groups_join (
corosync_group_handle,
&corosync_group,
1);
/*
* Drop root privleges to user 'corosync'
* TODO: Don't really need full root capabilities;
* needed capabilities are:
* CAP_NET_RAW (bindtodevice)
* CAP_SYS_NICE (setscheduler)
* CAP_IPC_LOCK (mlockall)
*/
priv_drop ();
schedwrk_init (
serialize_lock,
serialize_unlock);
/*
* Start main processing loop
*/
qb_loop_run (corosync_poll_handle);
/*
* Exit was requested
*/
totempg_finalize ();
/*
* free the loop resources
*/
qb_loop_destroy (corosync_poll_handle);
/*
* free up the icmap
*/
/*
* Remove pid lock file
*/
unlink (corosync_lock_file);
corosync_exit_error (COROSYNC_DONE_EXIT);
return EXIT_SUCCESS;
}
diff --git a/exec/mon.c b/exec/mon.c
index d6451322..49c46b42 100644
--- a/exec/mon.c
+++ b/exec/mon.c
@@ -1,480 +1,476 @@
/*
* Copyright (c) 2010-2012 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Angus Salkeld <asalkeld@redhat.com>
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <config.h>
#include <unistd.h>
#include <statgrab.h>
#include <corosync/corotypes.h>
#include <corosync/corodefs.h>
#include <corosync/coroapi.h>
#include <corosync/list.h>
#include <corosync/logsys.h>
#include <corosync/icmap.h>
#include "fsm.h"
#include "service.h"
LOGSYS_DECLARE_SUBSYS ("MON");
/*
* Service Interfaces required by service_message_handler struct
*/
static char *mon_exec_init_fn (struct corosync_api_v1 *corosync_api);
static struct corosync_api_v1 *api;
#define MON_DEFAULT_PERIOD 3000
#define MON_MIN_PERIOD 500
#define MON_MAX_PERIOD (120 * CS_TIME_MS_IN_SEC)
struct corosync_service_engine mon_service_engine = {
.name = "corosync resource monitoring service",
.id = MON_SERVICE,
.priority = 1,
.private_data_size = 0,
.flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED,
.lib_init_fn = NULL,
.lib_exit_fn = NULL,
.lib_engine = NULL,
.lib_engine_count = 0,
.exec_engine = NULL,
.exec_engine_count = 0,
.confchg_fn = NULL,
.exec_init_fn = mon_exec_init_fn,
.exec_dump_fn = NULL
};
static DECLARE_LIST_INIT (confchg_notify);
struct resource_instance {
const char *icmap_path;
const char *name;
corosync_timer_handle_t timer_handle;
void (*update_stats_fn) (void *data);
struct cs_fsm fsm;
uint64_t period;
icmap_value_types_t max_type;
union {
int32_t int32;
double dbl;
} max;
};
static void mem_update_stats_fn (void *data);
static void load_update_stats_fn (void *data);
static struct resource_instance memory_used_inst = {
.name = "memory_used",
.icmap_path = "resources.system.memory_used.",
.update_stats_fn = mem_update_stats_fn,
.max_type = ICMAP_VALUETYPE_INT32,
.max.int32 = INT32_MAX,
.period = MON_DEFAULT_PERIOD,
};
static struct resource_instance load_15min_inst = {
.name = "load_15min",
.icmap_path = "resources.system.load_15min.",
.update_stats_fn = load_update_stats_fn,
.max_type = ICMAP_VALUETYPE_DOUBLE,
.max.dbl = INT32_MAX,
.period = MON_DEFAULT_PERIOD,
};
/*
* F S M
*/
static void mon_config_changed (struct cs_fsm* fsm, int32_t event, void * data);
static void mon_resource_failed (struct cs_fsm* fsm, int32_t event, void * data);
const char * mon_running_str = "running";
const char * mon_failed_str = "failed";
const char * mon_failure_str = "failure";
const char * mon_stopped_str = "stopped";
const char * mon_config_changed_str = "config_changed";
enum mon_resource_state {
MON_S_STOPPED,
MON_S_RUNNING,
MON_S_FAILED
};
enum mon_resource_event {
MON_E_CONFIG_CHANGED,
MON_E_FAILURE
};
struct cs_fsm_entry mon_fsm_table[] = {
{ MON_S_STOPPED, MON_E_CONFIG_CHANGED, mon_config_changed, {MON_S_STOPPED, MON_S_RUNNING, -1} },
{ MON_S_STOPPED, MON_E_FAILURE, NULL, {-1} },
{ MON_S_RUNNING, MON_E_CONFIG_CHANGED, mon_config_changed, {MON_S_RUNNING, MON_S_STOPPED, -1} },
{ MON_S_RUNNING, MON_E_FAILURE, mon_resource_failed, {MON_S_FAILED, -1} },
{ MON_S_FAILED, MON_E_CONFIG_CHANGED, mon_config_changed, {MON_S_RUNNING, MON_S_STOPPED, -1} },
{ MON_S_FAILED, MON_E_FAILURE, NULL, {-1} },
};
struct corosync_service_engine *mon_get_service_engine_ver0 (void)
{
return (&mon_service_engine);
}
static const char * mon_res_state_to_str(struct cs_fsm* fsm,
int32_t state)
{
switch (state) {
case MON_S_STOPPED:
return mon_stopped_str;
break;
case MON_S_RUNNING:
return mon_running_str;
break;
case MON_S_FAILED:
return mon_failed_str;
break;
}
return NULL;
}
static const char * mon_res_event_to_str(struct cs_fsm* fsm,
int32_t event)
{
switch (event) {
case MON_E_CONFIG_CHANGED:
return mon_config_changed_str;
break;
case MON_E_FAILURE:
return mon_failure_str;
break;
}
return NULL;
}
static void mon_fsm_cb (struct cs_fsm *fsm, int cb_event, int32_t curr_state,
int32_t next_state, int32_t fsm_event, void *data)
{
switch (cb_event) {
case CS_FSM_CB_EVENT_PROCESS_NF:
log_printf (LOGSYS_LEVEL_ERROR, "Fsm:%s could not find event \"%s\" in state \"%s\"",
fsm->name, fsm->event_to_str(fsm, fsm_event), fsm->state_to_str(fsm, curr_state));
corosync_exit_error(COROSYNC_DONE_FATAL_ERR);
break;
case CS_FSM_CB_EVENT_STATE_SET:
log_printf (LOGSYS_LEVEL_INFO, "Fsm:%s event \"%s\", state \"%s\" --> \"%s\"",
fsm->name,
fsm->event_to_str(fsm, fsm_event),
fsm->state_to_str(fsm, fsm->table[fsm->curr_entry].curr_state),
fsm->state_to_str(fsm, next_state));
break;
case CS_FSM_CB_EVENT_STATE_SET_NF:
log_printf (LOGSYS_LEVEL_CRIT, "Fsm:%s Can't change state from \"%s\" to \"%s\" (event was \"%s\")",
fsm->name,
fsm->state_to_str(fsm, fsm->table[fsm->curr_entry].curr_state),
fsm->state_to_str(fsm, next_state),
fsm->event_to_str(fsm, fsm_event));
corosync_exit_error(COROSYNC_DONE_FATAL_ERR);
break;
default:
log_printf (LOGSYS_LEVEL_CRIT, "Fsm: Can't find callback event!");
corosync_exit_error(COROSYNC_DONE_FATAL_ERR);
break;
}
}
static void mon_fsm_state_set (struct cs_fsm* fsm,
enum mon_resource_state next_state, struct resource_instance* inst)
{
enum mon_resource_state prev_state = fsm->curr_state;
const char *state_str;
char key_name[ICMAP_KEYNAME_MAXLEN];
ENTER();
cs_fsm_state_set(fsm, next_state, inst, mon_fsm_cb);
if (prev_state == fsm->curr_state) {
return;
}
state_str = mon_res_state_to_str(fsm, fsm->curr_state);
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "state");
icmap_set_string(key_name, state_str);
}
static void mon_config_changed (struct cs_fsm* fsm, int32_t event, void * data)
{
struct resource_instance * inst = (struct resource_instance *)data;
uint64_t tmp_value;
char key_name[ICMAP_KEYNAME_MAXLEN];
int run_updater;
ENTER();
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "poll_period");
if (icmap_get_uint64(key_name, &tmp_value) == CS_OK) {
if (tmp_value >= MON_MIN_PERIOD && tmp_value <= MON_MAX_PERIOD) {
log_printf (LOGSYS_LEVEL_DEBUG,
"poll_period changing from:%"PRIu64" to %"PRIu64".",
inst->period, tmp_value);
inst->period = tmp_value;
} else {
log_printf (LOGSYS_LEVEL_WARNING,
"Could NOT use poll_period:%"PRIu64" ms for resource %s",
tmp_value, inst->name);
}
}
if (inst->timer_handle) {
api->timer_delete(inst->timer_handle);
inst->timer_handle = 0;
}
run_updater = 0;
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "max");
if (inst->max_type == ICMAP_VALUETYPE_INT32) {
if (icmap_get_int32(key_name, &inst->max.int32) != CS_OK) {
inst->max.int32 = INT32_MAX;
mon_fsm_state_set (fsm, MON_S_STOPPED, inst);
} else {
run_updater = 1;
}
}
if (inst->max_type == ICMAP_VALUETYPE_DOUBLE) {
if (icmap_get_double(key_name, &inst->max.dbl) != CS_OK) {
inst->max.dbl = INT32_MAX;
mon_fsm_state_set (fsm, MON_S_STOPPED, inst);
} else {
run_updater = 1;
}
}
if (run_updater) {
mon_fsm_state_set (fsm, MON_S_RUNNING, inst);
/*
* run the updater, incase the period has shortened
* and to start the timer.
*/
inst->update_stats_fn (inst);
}
}
void mon_resource_failed (struct cs_fsm* fsm, int32_t event, void * data)
{
struct resource_instance * inst = (struct resource_instance *)data;
ENTER();
mon_fsm_state_set (fsm, MON_S_FAILED, inst);
}
static int32_t percent_mem_used_get(void)
{
sg_mem_stats *mem_stats;
sg_swap_stats *swap_stats;
long long total, freemem;
mem_stats = sg_get_mem_stats();
swap_stats = sg_get_swap_stats();
if (mem_stats == NULL || swap_stats != NULL) {
log_printf (LOGSYS_LEVEL_ERROR, "Unable to get memory stats: %s",
sg_str_error(sg_get_error()));
return -1;
}
total = mem_stats->total + swap_stats->total;
freemem = mem_stats->free + swap_stats->free;
return ((total - freemem) * 100) / total;
}
static void mem_update_stats_fn (void *data)
{
struct resource_instance * inst = (struct resource_instance *)data;
int32_t new_value;
uint64_t timestamp;
char key_name[ICMAP_KEYNAME_MAXLEN];
new_value = percent_mem_used_get();
if (new_value > 0) {
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "current");
icmap_set_uint32(key_name, new_value);
timestamp = cs_timestamp_get();
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "last_updated");
icmap_set_uint64(key_name, timestamp);
if (new_value > inst->max.int32 && inst->fsm.curr_state != MON_S_FAILED) {
cs_fsm_process (&inst->fsm, MON_E_FAILURE, inst, mon_fsm_cb);
}
}
api->timer_add_duration(inst->period * MILLI_2_NANO_SECONDS,
inst, inst->update_stats_fn, &inst->timer_handle);
}
static double min15_loadavg_get(void)
{
sg_load_stats *load_stats;
load_stats = sg_get_load_stats ();
if (load_stats == NULL) {
log_printf (LOGSYS_LEVEL_ERROR, "Unable to get load stats: %s",
sg_str_error (sg_get_error()));
return -1;
}
return load_stats->min15;
}
static void load_update_stats_fn (void *data)
{
struct resource_instance * inst = (struct resource_instance *)data;
uint64_t timestamp;
char key_name[ICMAP_KEYNAME_MAXLEN];
double min15 = min15_loadavg_get();
if (min15 > 0) {
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "current");
icmap_set_double(key_name, min15);
timestamp = cs_timestamp_get();
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "last_updated");
icmap_set_uint64(key_name, timestamp);
if (min15 > inst->max.dbl && inst->fsm.curr_state != MON_S_FAILED) {
cs_fsm_process (&inst->fsm, MON_E_FAILURE, &inst, mon_fsm_cb);
}
}
api->timer_add_duration(inst->period * MILLI_2_NANO_SECONDS,
inst, inst->update_stats_fn, &inst->timer_handle);
}
static void mon_key_changed_cb (
int32_t event,
const char *key_name,
struct icmap_notify_value new_value,
struct icmap_notify_value old_value,
void *user_data)
{
struct resource_instance* inst = (struct resource_instance*)user_data;
char *last_key_part;
if (event == ICMAP_TRACK_DELETE && inst) {
log_printf (LOGSYS_LEVEL_WARNING,
"resource \"%s\" deleted from cmap!",
inst->name);
cs_fsm_process (&inst->fsm, MON_E_CONFIG_CHANGED, inst, mon_fsm_cb);
}
if (event == ICMAP_TRACK_MODIFY) {
last_key_part = strrchr(key_name, '.');
if (last_key_part == NULL)
return ;
last_key_part++;
if (strcmp(last_key_part, "max") == 0 ||
strcmp(last_key_part, "poll_period") == 0) {
ENTER();
cs_fsm_process (&inst->fsm, MON_E_CONFIG_CHANGED, inst, mon_fsm_cb);
}
}
}
static void mon_instance_init (struct resource_instance* inst)
{
uint64_t tmp_value;
char key_name[ICMAP_KEYNAME_MAXLEN];
icmap_track_t icmap_track = NULL;
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "current");
if (inst->max_type == ICMAP_VALUETYPE_INT32) {
icmap_set_int32(key_name, 0);
} else {
icmap_set_double(key_name, 0);
}
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "last_updated");
icmap_set_uint64(key_name, 0);
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "state");
icmap_set_string(key_name, mon_stopped_str);
inst->fsm.name = inst->name;
inst->fsm.curr_entry = 0;
inst->fsm.curr_state = MON_S_STOPPED;
inst->fsm.table = mon_fsm_table;
inst->fsm.entries = sizeof(mon_fsm_table) / sizeof(struct cs_fsm_entry);
inst->fsm.state_to_str = mon_res_state_to_str;
inst->fsm.event_to_str = mon_res_event_to_str;
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", inst->icmap_path, "poll_period");
if (icmap_get_uint64(key_name, &tmp_value) != CS_OK) {
icmap_set_uint64(key_name, inst->period);
}
else {
if (tmp_value >= MON_MIN_PERIOD && tmp_value <= MON_MAX_PERIOD) {
inst->period = tmp_value;
} else {
log_printf (LOGSYS_LEVEL_WARNING,
"Could NOT use poll_period:%"PRIu64" ms for resource %s",
tmp_value, inst->name);
}
}
cs_fsm_process (&inst->fsm, MON_E_CONFIG_CHANGED, inst, mon_fsm_cb);
icmap_track_add(inst->icmap_path,
ICMAP_TRACK_ADD | ICMAP_TRACK_MODIFY | ICMAP_TRACK_DELETE | ICMAP_TRACK_PREFIX,
mon_key_changed_cb, inst, &icmap_track);
}
static char *mon_exec_init_fn (struct corosync_api_v1 *corosync_api)
{
-
sg_init();
-#ifdef COROSYNC_SOLARIS
- logsys_subsys_init();
-#endif
api = corosync_api;
mon_instance_init (&memory_used_inst);
mon_instance_init (&load_15min_inst);
return NULL;
}
diff --git a/exec/pload.c b/exec/pload.c
index 55332a7c..206338d6 100644
--- a/exec/pload.c
+++ b/exec/pload.c
@@ -1,361 +1,357 @@
/*
* Copyright (c) 2008-2012 Red Hat, Inc.
*
* All rights reserved.
*
* Authors: Steven Dake (sdake@redhat.com)
* Fabio M. Di Nitto (fdinitto@redhat.com)
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <config.h>
#include <qb/qblist.h>
#include <qb/qbutil.h>
#include <qb/qbipc_common.h>
#include <corosync/swab.h>
#include <corosync/corodefs.h>
#include <corosync/coroapi.h>
#include <corosync/icmap.h>
#include <corosync/logsys.h>
#include "service.h"
#include "util.h"
LOGSYS_DECLARE_SUBSYS ("PLOAD");
/*
* Service Interfaces required by service_message_handler struct
*/
static struct corosync_api_v1 *api;
static char *pload_exec_init_fn (struct corosync_api_v1 *corosync_api);
/*
* on wire / network bits
*/
enum pload_exec_message_req_types {
MESSAGE_REQ_EXEC_PLOAD_START = 0,
MESSAGE_REQ_EXEC_PLOAD_MCAST = 1
};
struct req_exec_pload_start {
struct qb_ipc_request_header header;
uint32_t msg_count;
uint32_t msg_size;
};
struct req_exec_pload_mcast {
struct qb_ipc_request_header header;
};
static void message_handler_req_exec_pload_start (const void *msg,
unsigned int nodeid);
static void req_exec_pload_start_endian_convert (void *msg);
static void message_handler_req_exec_pload_mcast (const void *msg,
unsigned int nodeid);
static void req_exec_pload_mcast_endian_convert (void *msg);
static struct corosync_exec_handler pload_exec_engine[] =
{
{
.exec_handler_fn = message_handler_req_exec_pload_start,
.exec_endian_convert_fn = req_exec_pload_start_endian_convert
},
{
.exec_handler_fn = message_handler_req_exec_pload_mcast,
.exec_endian_convert_fn = req_exec_pload_mcast_endian_convert
}
};
/*
* internal bits and pieces
*/
/*
* really unused buffer but we need to give something to iovec
*/
static char *buffer = NULL;
/*
* wanted/size come from config
* sent/delivered track the runtime status
*/
static uint32_t msgs_wanted = 0;
static uint32_t msg_size = 0;
static uint32_t msgs_sent = 0;
static uint32_t msgs_delivered = 0;
/*
* bit flip to track if we are running or not and avoid multiple instances
*/
static uint8_t pload_started = 0;
/*
* handle for scheduler
*/
static hdb_handle_t start_mcasting_handle;
/*
* timing/profiling
*/
static unsigned long long int tv1;
static unsigned long long int tv2;
static unsigned long long int tv_elapsed;
/*
* Service engine hooks
*/
struct corosync_service_engine pload_service_engine = {
.name = "corosync profile loading service",
.id = PLOAD_SERVICE,
.priority = 1,
.flow_control = CS_LIB_FLOW_CONTROL_REQUIRED,
.exec_engine = pload_exec_engine,
.exec_engine_count = sizeof (pload_exec_engine) / sizeof (struct corosync_exec_handler),
.exec_init_fn = pload_exec_init_fn
};
struct corosync_service_engine *pload_get_service_engine_ver0 (void)
{
return (&pload_service_engine);
}
/*
* internal use only functions
*/
/*
* not all architectures / OSes define timersub in sys/time.h or time.h
*/
#ifndef timersub
#warning Using internal timersub definition. Check your include header files
#define timersub(a, b, result) \
do { \
(result)->tv_sec = (a)->tv_sec - (b)->tv_sec; \
(result)->tv_usec = (a)->tv_usec - (b)->tv_usec; \
if ((result)->tv_usec < 0) { \
--(result)->tv_sec; \
(result)->tv_usec += 1000000; \
} \
} while (0)
#endif /* timersub */
/*
* tell all cluster nodes to start mcasting
*/
static void pload_send_start (uint32_t count, uint32_t size)
{
struct req_exec_pload_start req_exec_pload_start;
struct iovec iov;
req_exec_pload_start.header.id = SERVICE_ID_MAKE (PLOAD_SERVICE, MESSAGE_REQ_EXEC_PLOAD_START);
req_exec_pload_start.msg_count = count;
req_exec_pload_start.msg_size = size;
iov.iov_base = (void *)&req_exec_pload_start;
iov.iov_len = sizeof (struct req_exec_pload_start);
api->totem_mcast (&iov, 1, TOTEM_AGREED);
}
/*
* send N empty data messages of size X
*/
static int pload_send_message (const void *arg)
{
struct req_exec_pload_mcast req_exec_pload_mcast;
struct iovec iov[2];
unsigned int res;
unsigned int iov_len = 1;
req_exec_pload_mcast.header.id = SERVICE_ID_MAKE (PLOAD_SERVICE, MESSAGE_REQ_EXEC_PLOAD_MCAST);
req_exec_pload_mcast.header.size = sizeof (struct req_exec_pload_mcast) + msg_size;
iov[0].iov_base = (void *)&req_exec_pload_mcast;
iov[0].iov_len = sizeof (struct req_exec_pload_mcast);
if (msg_size > sizeof (req_exec_pload_mcast)) {
iov[1].iov_base = &buffer;
iov[1].iov_len = msg_size - sizeof (req_exec_pload_mcast);
iov_len = 2;
}
do {
res = api->totem_mcast (iov, iov_len, TOTEM_AGREED);
if (res == -1) {
break;
} else {
msgs_sent++;
}
} while (msgs_sent < msgs_wanted);
if (msgs_sent == msgs_wanted) {
return (0);
} else {
return (-1);
}
}
/*
* hook into icmap to read config at runtime
* we do NOT start by default, ever!
*/
static void pload_read_config(
int32_t event,
const char *key_name,
struct icmap_notify_value new_val,
struct icmap_notify_value old_val,
void *user_data)
{
uint32_t pload_count = 1500000;
uint32_t pload_size = 300;
char *pload_start = NULL;
icmap_get_uint32("pload.count", &pload_count);
icmap_get_uint32("pload.size", &pload_size);
if (pload_size > MESSAGE_SIZE_MAX) {
pload_size = MESSAGE_SIZE_MAX;
log_printf(LOGSYS_LEVEL_WARNING, "pload size limited to %u", pload_size);
}
if ((!pload_started) &&
(icmap_get_string("pload.start", &pload_start) == CS_OK)) {
if (!strcmp(pload_start,
"i_totally_understand_pload_will_crash_my_cluster_and_kill_corosync_on_exit")) {
buffer = malloc(pload_size);
if (buffer) {
log_printf(LOGSYS_LEVEL_WARNING, "Starting pload!");
pload_send_start(pload_count, pload_size);
} else {
log_printf(LOGSYS_LEVEL_WARNING,
"Unable to allocate pload buffer!");
}
}
free(pload_start);
}
}
/*
* exec functions
*/
static char *pload_exec_init_fn (struct corosync_api_v1 *corosync_api)
{
icmap_track_t pload_track = NULL;
-#ifdef COROSYNC_SOLARIS
- logsys_subsys_init();
-#endif
-
api = corosync_api;
/*
* track changes to pload config and start only on demand
*/
if (icmap_track_add("pload.",
ICMAP_TRACK_ADD | ICMAP_TRACK_DELETE | ICMAP_TRACK_MODIFY | ICMAP_TRACK_PREFIX,
pload_read_config,
NULL,
&pload_track) != CS_OK) {
return (char *)"Unable to setup pload config tracking!\n";
}
return NULL;
}
/*
* network messages/onwire handlers
*/
static void req_exec_pload_start_endian_convert (void *msg)
{
struct req_exec_pload_start *req_exec_pload_start = msg;
req_exec_pload_start->msg_count = swab32(req_exec_pload_start->msg_count);
req_exec_pload_start->msg_size = swab32(req_exec_pload_start->msg_size);
}
static void message_handler_req_exec_pload_start (
const void *msg,
unsigned int nodeid)
{
const struct req_exec_pload_start *req_exec_pload_start = msg;
/*
* don't start multiple instances
*/
if (pload_started) {
return;
}
pload_started = 1;
msgs_wanted = req_exec_pload_start->msg_count;
msg_size = req_exec_pload_start->msg_size;
api->schedwrk_create (
&start_mcasting_handle,
pload_send_message,
&start_mcasting_handle);
}
static void req_exec_pload_mcast_endian_convert (void *msg)
{
}
static void message_handler_req_exec_pload_mcast (
const void *msg,
unsigned int nodeid)
{
char log_buffer[1024];
if (msgs_delivered == 0) {
tv1 = qb_util_nano_current_get ();
}
msgs_delivered += 1;
if (msgs_delivered == msgs_wanted) {
tv2 = qb_util_nano_current_get ();
tv_elapsed = tv2 - tv1;
sprintf (log_buffer, "%5d Writes %d bytes per write %7.3f seconds runtime, %9.3f TP/S, %9.3f MB/S.",
msgs_delivered,
msg_size,
(tv_elapsed / 1000000000.0),
((float)msgs_delivered) / (tv_elapsed / 1000000000.0),
(((float)msgs_delivered) * ((float)msg_size) /
(tv_elapsed / 1000000000.0)) / (1024.0 * 1024.0));
log_printf (LOGSYS_LEVEL_NOTICE, "%s", log_buffer);
log_printf (LOGSYS_LEVEL_WARNING, "Stopping corosync the hard way");
if (buffer) {
free(buffer);
buffer = NULL;
}
exit(COROSYNC_DONE_PLOAD);
}
}
diff --git a/exec/totemudp.c b/exec/totemudp.c
index 0c4bb0ba..d7d7d2b4 100644
--- a/exec/totemudp.c
+++ b/exec/totemudp.c
@@ -1,1260 +1,1288 @@
/*
* Copyright (c) 2005 MontaVista Software, Inc.
* Copyright (c) 2006-2012 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Steven Dake (sdake@redhat.com)
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <config.h>
#include <assert.h>
#include <pthread.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/socket.h>
#include <netdb.h>
#include <sys/un.h>
#include <sys/ioctl.h>
#include <sys/param.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <unistd.h>
#include <fcntl.h>
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
#include <sched.h>
#include <time.h>
#include <sys/time.h>
#include <sys/poll.h>
#include <limits.h>
#include <corosync/sq.h>
#include <corosync/swab.h>
#include <corosync/list.h>
#include <qb/qbdefs.h>
#include <qb/qbloop.h>
#define LOGSYS_UTILS_ONLY 1
#include <corosync/logsys.h>
#include "totemudp.h"
#include "util.h"
#include "totemcrypto.h"
#include <nss.h>
#include <pk11pub.h>
#include <pkcs11.h>
#include <prerror.h>
#ifndef MSG_NOSIGNAL
#define MSG_NOSIGNAL 0
#endif
#define MCAST_SOCKET_BUFFER_SIZE (TRANSMITS_ALLOWED * FRAME_SIZE_MAX)
#define NETIF_STATE_REPORT_UP 1
#define NETIF_STATE_REPORT_DOWN 2
#define BIND_STATE_UNBOUND 0
#define BIND_STATE_REGULAR 1
#define BIND_STATE_LOOPBACK 2
#define MESSAGE_TYPE_MEMB_JOIN 3
struct totemudp_socket {
int mcast_recv;
int mcast_send;
int token;
};
struct totemudp_instance {
struct crypto_instance *crypto_inst;
qb_loop_t *totemudp_poll_handle;
struct totem_interface *totem_interface;
int netif_state_report;
int netif_bind_state;
void *context;
void (*totemudp_deliver_fn) (
void *context,
const void *msg,
unsigned int msg_len);
void (*totemudp_iface_change_fn) (
void *context,
const struct totem_ip_address *iface_address);
void (*totemudp_target_set_completed) (void *context);
/*
* Function and data used to log messages
*/
int totemudp_log_level_security;
int totemudp_log_level_error;
int totemudp_log_level_warning;
int totemudp_log_level_notice;
int totemudp_log_level_debug;
int totemudp_subsys_id;
void (*totemudp_log_printf) (
int level,
int subsys,
const char *function,
const char *file,
int line,
const char *format,
...)__attribute__((format(printf, 6, 7)));
void *udp_context;
char iov_buffer[FRAME_SIZE_MAX];
char iov_buffer_flush[FRAME_SIZE_MAX];
struct iovec totemudp_iov_recv;
struct iovec totemudp_iov_recv_flush;
struct totemudp_socket totemudp_sockets;
struct totem_ip_address mcast_address;
int stats_sent;
int stats_recv;
int stats_delv;
int stats_remcasts;
int stats_orf_token;
struct timeval stats_tv_start;
struct totem_ip_address my_id;
int firstrun;
qb_loop_timer_handle timer_netif_check_timeout;
unsigned int my_memb_entries;
int flushing;
struct totem_config *totem_config;
struct totem_ip_address token_target;
};
struct work_item {
const void *msg;
unsigned int msg_len;
struct totemudp_instance *instance;
};
static int totemudp_build_sockets (
struct totemudp_instance *instance,
struct totem_ip_address *bindnet_address,
struct totem_ip_address *mcastaddress,
struct totemudp_socket *sockets,
struct totem_ip_address *bound_to);
static struct totem_ip_address localhost;
static void totemudp_instance_initialize (struct totemudp_instance *instance)
{
memset (instance, 0, sizeof (struct totemudp_instance));
instance->netif_state_report = NETIF_STATE_REPORT_UP | NETIF_STATE_REPORT_DOWN;
instance->totemudp_iov_recv.iov_base = instance->iov_buffer;
instance->totemudp_iov_recv.iov_len = FRAME_SIZE_MAX; //sizeof (instance->iov_buffer);
instance->totemudp_iov_recv_flush.iov_base = instance->iov_buffer_flush;
instance->totemudp_iov_recv_flush.iov_len = FRAME_SIZE_MAX; //sizeof (instance->iov_buffer);
/*
* There is always atleast 1 processor
*/
instance->my_memb_entries = 1;
}
#define log_printf(level, format, args...) \
do { \
instance->totemudp_log_printf ( \
level, instance->totemudp_subsys_id, \
__FUNCTION__, __FILE__, __LINE__, \
(const char *)format, ##args); \
} while (0);
#define LOGSYS_PERROR(err_num, level, fmt, args...) \
do { \
char _error_str[LOGSYS_MAX_PERROR_MSG_LEN]; \
const char *_error_ptr = qb_strerror_r(err_num, _error_str, sizeof(_error_str)); \
instance->totemudp_log_printf ( \
level, instance->totemudp_subsys_id, \
__FUNCTION__, __FILE__, __LINE__, \
fmt ": %s (%d)\n", ##args, _error_ptr, err_num); \
} while(0)
int totemudp_crypto_set (
void *udp_context,
const char *cipher_type,
const char *hash_type)
{
return (0);
}
static inline void ucast_sendmsg (
struct totemudp_instance *instance,
struct totem_ip_address *system_to,
const void *msg,
unsigned int msg_len)
{
struct msghdr msg_ucast;
int res = 0;
size_t buf_out_len;
unsigned char buf_out[FRAME_SIZE_MAX];
struct sockaddr_storage sockaddr;
struct iovec iovec;
int addrlen;
/*
* Encrypt and digest the message
*/
if (crypto_encrypt_and_sign (
instance->crypto_inst,
(const unsigned char *)msg,
msg_len,
buf_out,
&buf_out_len) != 0) {
log_printf(LOGSYS_LEVEL_CRIT, "Error encrypting/signing packet (non-critical)");
return;
}
iovec.iov_base = (void *)buf_out;
iovec.iov_len = buf_out_len;
/*
* Build unicast message
*/
totemip_totemip_to_sockaddr_convert(system_to,
instance->totem_interface->ip_port, &sockaddr, &addrlen);
msg_ucast.msg_name = &sockaddr;
msg_ucast.msg_namelen = addrlen;
msg_ucast.msg_iov = (void *)&iovec;
msg_ucast.msg_iovlen = 1;
-#if !defined(COROSYNC_SOLARIS)
+#ifdef HAVE_MSGHDR_CONTROL
msg_ucast.msg_control = 0;
+#endif
+#ifdef HAVE_MSGHDR_CONTROLLEN
msg_ucast.msg_controllen = 0;
+#endif
+#ifdef HAVE_MSGHDR_FLAGS
msg_ucast.msg_flags = 0;
-#else
+#endif
+#ifdef HAVE_MSGHDR_ACCRIGHTS
msg_ucast.msg_accrights = NULL;
+#endif
+#ifdef HAVE_MSGHDR_ACCRIGHTSLEN
msg_ucast.msg_accrightslen = 0;
#endif
/*
* Transmit unicast message
* An error here is recovered by totemsrp
*/
res = sendmsg (instance->totemudp_sockets.mcast_send, &msg_ucast,
MSG_NOSIGNAL);
if (res < 0) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_debug,
"sendmsg(ucast) failed (non-critical)");
}
}
static inline void mcast_sendmsg (
struct totemudp_instance *instance,
const void *msg,
unsigned int msg_len)
{
struct msghdr msg_mcast;
int res = 0;
size_t buf_out_len;
unsigned char buf_out[FRAME_SIZE_MAX];
struct iovec iovec;
struct sockaddr_storage sockaddr;
int addrlen;
/*
* Encrypt and digest the message
*/
if (crypto_encrypt_and_sign (
instance->crypto_inst,
(const unsigned char *)msg,
msg_len,
buf_out,
&buf_out_len) != 0) {
log_printf(LOGSYS_LEVEL_CRIT, "Error encrypting/signing packet (non-critical)");
return;
}
iovec.iov_base = (void *)&buf_out;
iovec.iov_len = buf_out_len;
/*
* Build multicast message
*/
totemip_totemip_to_sockaddr_convert(&instance->mcast_address,
instance->totem_interface->ip_port, &sockaddr, &addrlen);
msg_mcast.msg_name = &sockaddr;
msg_mcast.msg_namelen = addrlen;
msg_mcast.msg_iov = (void *)&iovec;
msg_mcast.msg_iovlen = 1;
-#if !defined(COROSYNC_SOLARIS)
+#ifdef HAVE_MSGHDR_CONTROL
msg_mcast.msg_control = 0;
+#endif
+#ifdef HAVE_MSGHDR_CONTROLLEN
msg_mcast.msg_controllen = 0;
+#endif
+#ifdef HAVE_MSGHDR_FLAGS
msg_mcast.msg_flags = 0;
-#else
+#endif
+#ifdef HAVE_MSGHDR_ACCRIGHTS
msg_mcast.msg_accrights = NULL;
+#endif
+#ifdef HAVE_MSGHDR_ACCRIGHTSLEN
msg_mcast.msg_accrightslen = 0;
#endif
/*
* Transmit multicast message
* An error here is recovered by totemsrp
*/
res = sendmsg (instance->totemudp_sockets.mcast_send, &msg_mcast,
MSG_NOSIGNAL);
if (res < 0) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_debug,
"sendmsg(mcast) failed (non-critical)");
}
}
int totemudp_finalize (
void *udp_context)
{
struct totemudp_instance *instance = (struct totemudp_instance *)udp_context;
int res = 0;
if (instance->totemudp_sockets.mcast_recv > 0) {
qb_loop_poll_del (instance->totemudp_poll_handle,
instance->totemudp_sockets.mcast_recv);
close (instance->totemudp_sockets.mcast_recv);
}
if (instance->totemudp_sockets.mcast_send > 0) {
close (instance->totemudp_sockets.mcast_send);
}
if (instance->totemudp_sockets.token > 0) {
qb_loop_poll_del (instance->totemudp_poll_handle,
instance->totemudp_sockets.token);
close (instance->totemudp_sockets.token);
}
return (res);
}
/*
* Only designed to work with a message with one iov
*/
static int net_deliver_fn (
int fd,
int revents,
void *data)
{
struct totemudp_instance *instance = (struct totemudp_instance *)data;
struct msghdr msg_recv;
struct iovec *iovec;
struct sockaddr_storage system_from;
int bytes_received;
int res = 0;
char *message_type;
if (instance->flushing == 1) {
iovec = &instance->totemudp_iov_recv_flush;
} else {
iovec = &instance->totemudp_iov_recv;
}
/*
* Receive datagram
*/
msg_recv.msg_name = &system_from;
msg_recv.msg_namelen = sizeof (struct sockaddr_storage);
msg_recv.msg_iov = iovec;
msg_recv.msg_iovlen = 1;
-#if !defined(COROSYNC_SOLARIS)
+#ifdef HAVE_MSGHDR_CONTROL
msg_recv.msg_control = 0;
+#endif
+#ifdef HAVE_MSGHDR_CONTROLLEN
msg_recv.msg_controllen = 0;
+#endif
+#ifdef HAVE_MSGHDR_FLAGS
msg_recv.msg_flags = 0;
-#else
+#endif
+#ifdef HAVE_MSGHDR_ACCRIGHTS
msg_recv.msg_accrights = NULL;
+#endif
+#ifdef HAVE_MSGHDR_ACCRIGHTSLEN
msg_recv.msg_accrightslen = 0;
#endif
bytes_received = recvmsg (fd, &msg_recv, MSG_NOSIGNAL | MSG_DONTWAIT);
if (bytes_received == -1) {
return (0);
} else {
instance->stats_recv += bytes_received;
}
/*
* Authenticate and if authenticated, decrypt datagram
*/
res = crypto_authenticate_and_decrypt (instance->crypto_inst, iovec->iov_base, &bytes_received);
if (res == -1) {
log_printf (instance->totemudp_log_level_security, "Received message has invalid digest... ignoring.");
log_printf (instance->totemudp_log_level_security,
"Invalid packet data");
iovec->iov_len = FRAME_SIZE_MAX;
return 0;
}
iovec->iov_len = bytes_received;
/*
* Drop all non-mcast messages (more specifically join
* messages should be dropped)
*/
message_type = (char *)iovec->iov_base;
if (instance->flushing == 1 && *message_type == MESSAGE_TYPE_MEMB_JOIN) {
iovec->iov_len = FRAME_SIZE_MAX;
return (0);
}
/*
* Handle incoming message
*/
instance->totemudp_deliver_fn (
instance->context,
iovec->iov_base,
iovec->iov_len);
iovec->iov_len = FRAME_SIZE_MAX;
return (0);
}
static int netif_determine (
struct totemudp_instance *instance,
struct totem_ip_address *bindnet,
struct totem_ip_address *bound_to,
int *interface_up,
int *interface_num)
{
int res;
res = totemip_iface_check (bindnet, bound_to,
interface_up, interface_num,
instance->totem_config->clear_node_high_bit);
return (res);
}
/*
* If the interface is up, the sockets for totem are built. If the interface is down
* this function is requeued in the timer list to retry building the sockets later.
*/
static void timer_function_netif_check_timeout (
void *data)
{
struct totemudp_instance *instance = (struct totemudp_instance *)data;
int interface_up;
int interface_num;
struct totem_ip_address *bind_address;
/*
* Build sockets for every interface
*/
netif_determine (instance,
&instance->totem_interface->bindnet,
&instance->totem_interface->boundto,
&interface_up, &interface_num);
/*
* If the network interface isn't back up and we are already
* in loopback mode, add timer to check again and return
*/
if ((instance->netif_bind_state == BIND_STATE_LOOPBACK &&
interface_up == 0) ||
(instance->my_memb_entries == 1 &&
instance->netif_bind_state == BIND_STATE_REGULAR &&
interface_up == 1)) {
qb_loop_timer_add (instance->totemudp_poll_handle,
QB_LOOP_MED,
instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC,
(void *)instance,
timer_function_netif_check_timeout,
&instance->timer_netif_check_timeout);
/*
* Add a timer to check for a downed regular interface
*/
return;
}
if (instance->totemudp_sockets.mcast_recv > 0) {
qb_loop_poll_del (instance->totemudp_poll_handle,
instance->totemudp_sockets.mcast_recv);
close (instance->totemudp_sockets.mcast_recv);
}
if (instance->totemudp_sockets.mcast_send > 0) {
close (instance->totemudp_sockets.mcast_send);
}
if (instance->totemudp_sockets.token > 0) {
qb_loop_poll_del (instance->totemudp_poll_handle,
instance->totemudp_sockets.token);
close (instance->totemudp_sockets.token);
}
if (interface_up == 0) {
/*
* Interface is not up
*/
instance->netif_bind_state = BIND_STATE_LOOPBACK;
bind_address = &localhost;
/*
* Add a timer to retry building interfaces and request memb_gather_enter
*/
qb_loop_timer_add (instance->totemudp_poll_handle,
QB_LOOP_MED,
instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC,
(void *)instance,
timer_function_netif_check_timeout,
&instance->timer_netif_check_timeout);
} else {
/*
* Interface is up
*/
instance->netif_bind_state = BIND_STATE_REGULAR;
bind_address = &instance->totem_interface->bindnet;
}
/*
* Create and bind the multicast and unicast sockets
*/
(void)totemudp_build_sockets (instance,
&instance->mcast_address,
bind_address,
&instance->totemudp_sockets,
&instance->totem_interface->boundto);
qb_loop_poll_add (
instance->totemudp_poll_handle,
QB_LOOP_MED,
instance->totemudp_sockets.mcast_recv,
POLLIN, instance, net_deliver_fn);
qb_loop_poll_add (
instance->totemudp_poll_handle,
QB_LOOP_MED,
instance->totemudp_sockets.token,
POLLIN, instance, net_deliver_fn);
totemip_copy (&instance->my_id, &instance->totem_interface->boundto);
/*
* This reports changes in the interface to the user and totemsrp
*/
if (instance->netif_bind_state == BIND_STATE_REGULAR) {
if (instance->netif_state_report & NETIF_STATE_REPORT_UP) {
log_printf (instance->totemudp_log_level_notice,
"The network interface [%s] is now up.",
totemip_print (&instance->totem_interface->boundto));
instance->netif_state_report = NETIF_STATE_REPORT_DOWN;
instance->totemudp_iface_change_fn (instance->context, &instance->my_id);
}
/*
* Add a timer to check for interface going down in single membership
*/
if (instance->my_memb_entries == 1) {
qb_loop_timer_add (instance->totemudp_poll_handle,
QB_LOOP_MED,
instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC,
(void *)instance,
timer_function_netif_check_timeout,
&instance->timer_netif_check_timeout);
}
} else {
if (instance->netif_state_report & NETIF_STATE_REPORT_DOWN) {
log_printf (instance->totemudp_log_level_notice,
"The network interface is down.");
instance->totemudp_iface_change_fn (instance->context, &instance->my_id);
}
instance->netif_state_report = NETIF_STATE_REPORT_UP;
}
}
/* Set the socket priority to INTERACTIVE to ensure
that our messages don't get queued behind anything else */
static void totemudp_traffic_control_set(struct totemudp_instance *instance, int sock)
{
#ifdef SO_PRIORITY
int prio = 6; /* TC_PRIO_INTERACTIVE */
if (setsockopt(sock, SOL_SOCKET, SO_PRIORITY, &prio, sizeof(int))) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "Could not set traffic priority");
}
#endif
}
static int totemudp_build_sockets_ip (
struct totemudp_instance *instance,
struct totem_ip_address *mcast_address,
struct totem_ip_address *bindnet_address,
struct totemudp_socket *sockets,
struct totem_ip_address *bound_to,
int interface_num)
{
struct sockaddr_storage sockaddr;
struct ipv6_mreq mreq6;
struct ip_mreq mreq;
struct sockaddr_storage mcast_ss, boundto_ss;
struct sockaddr_in6 *mcast_sin6 = (struct sockaddr_in6 *)&mcast_ss;
struct sockaddr_in *mcast_sin = (struct sockaddr_in *)&mcast_ss;
struct sockaddr_in *boundto_sin = (struct sockaddr_in *)&boundto_ss;
unsigned int sendbuf_size;
unsigned int recvbuf_size;
unsigned int optlen = sizeof (sendbuf_size);
int addrlen;
int res;
int flag;
/*
* Create multicast recv socket
*/
sockets->mcast_recv = socket (bindnet_address->family, SOCK_DGRAM, 0);
if (sockets->mcast_recv == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"socket() failed");
return (-1);
}
totemip_nosigpipe (sockets->mcast_recv);
res = fcntl (sockets->mcast_recv, F_SETFL, O_NONBLOCK);
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"Could not set non-blocking operation on multicast socket");
return (-1);
}
/*
* Force reuse
*/
flag = 1;
if ( setsockopt(sockets->mcast_recv, SOL_SOCKET, SO_REUSEADDR, (char *)&flag, sizeof (flag)) < 0) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"setsockopt(SO_REUSEADDR) failed");
return (-1);
}
/*
* Bind to multicast socket used for multicast receives
*/
totemip_totemip_to_sockaddr_convert(mcast_address,
instance->totem_interface->ip_port, &sockaddr, &addrlen);
res = bind (sockets->mcast_recv, (struct sockaddr *)&sockaddr, addrlen);
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"Unable to bind the socket to receive multicast packets");
return (-1);
}
/*
* Setup mcast send socket
*/
sockets->mcast_send = socket (bindnet_address->family, SOCK_DGRAM, 0);
if (sockets->mcast_send == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"socket() failed");
return (-1);
}
totemip_nosigpipe (sockets->mcast_send);
res = fcntl (sockets->mcast_send, F_SETFL, O_NONBLOCK);
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"Could not set non-blocking operation on multicast socket");
return (-1);
}
/*
* Force reuse
*/
flag = 1;
if ( setsockopt(sockets->mcast_send, SOL_SOCKET, SO_REUSEADDR, (char *)&flag, sizeof (flag)) < 0) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"setsockopt(SO_REUSEADDR) failed");
return (-1);
}
totemip_totemip_to_sockaddr_convert(bound_to, instance->totem_interface->ip_port - 1,
&sockaddr, &addrlen);
res = bind (sockets->mcast_send, (struct sockaddr *)&sockaddr, addrlen);
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"Unable to bind the socket to send multicast packets");
return (-1);
}
/*
* Setup unicast socket
*/
sockets->token = socket (bindnet_address->family, SOCK_DGRAM, 0);
if (sockets->token == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"socket() failed");
return (-1);
}
totemip_nosigpipe (sockets->token);
res = fcntl (sockets->token, F_SETFL, O_NONBLOCK);
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"Could not set non-blocking operation on token socket");
return (-1);
}
/*
* Force reuse
*/
flag = 1;
if ( setsockopt(sockets->token, SOL_SOCKET, SO_REUSEADDR, (char *)&flag, sizeof (flag)) < 0) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"setsockopt(SO_REUSEADDR) failed");
return (-1);
}
/*
* Bind to unicast socket used for token send/receives
* This has the side effect of binding to the correct interface
*/
totemip_totemip_to_sockaddr_convert(bound_to, instance->totem_interface->ip_port, &sockaddr, &addrlen);
res = bind (sockets->token, (struct sockaddr *)&sockaddr, addrlen);
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"Unable to bind UDP unicast socket");
return (-1);
}
recvbuf_size = MCAST_SOCKET_BUFFER_SIZE;
sendbuf_size = MCAST_SOCKET_BUFFER_SIZE;
/*
* Set buffer sizes to avoid overruns
*/
res = setsockopt (sockets->mcast_recv, SOL_SOCKET, SO_RCVBUF, &recvbuf_size, optlen);
res = setsockopt (sockets->mcast_send, SOL_SOCKET, SO_SNDBUF, &sendbuf_size, optlen);
res = getsockopt (sockets->mcast_recv, SOL_SOCKET, SO_RCVBUF, &recvbuf_size, &optlen);
if (res == 0) {
log_printf (instance->totemudp_log_level_debug,
"Receive multicast socket recv buffer size (%d bytes).", recvbuf_size);
}
res = getsockopt (sockets->mcast_send, SOL_SOCKET, SO_SNDBUF, &sendbuf_size, &optlen);
if (res == 0) {
log_printf (instance->totemudp_log_level_debug,
"Transmit multicast socket send buffer size (%d bytes).", sendbuf_size);
}
/*
* Join group membership on socket
*/
totemip_totemip_to_sockaddr_convert(mcast_address, instance->totem_interface->ip_port, &mcast_ss, &addrlen);
totemip_totemip_to_sockaddr_convert(bound_to, instance->totem_interface->ip_port, &boundto_ss, &addrlen);
if (instance->totem_config->broadcast_use == 1) {
unsigned int broadcast = 1;
if ((setsockopt(sockets->mcast_recv, SOL_SOCKET,
SO_BROADCAST, &broadcast, sizeof (broadcast))) == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"setting broadcast option failed");
return (-1);
}
if ((setsockopt(sockets->mcast_send, SOL_SOCKET,
SO_BROADCAST, &broadcast, sizeof (broadcast))) == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"setting broadcast option failed");
return (-1);
}
} else {
switch (bindnet_address->family) {
case AF_INET:
memset(&mreq, 0, sizeof(mreq));
mreq.imr_multiaddr.s_addr = mcast_sin->sin_addr.s_addr;
mreq.imr_interface.s_addr = boundto_sin->sin_addr.s_addr;
res = setsockopt (sockets->mcast_recv, IPPROTO_IP, IP_ADD_MEMBERSHIP,
&mreq, sizeof (mreq));
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"join ipv4 multicast group failed");
return (-1);
}
break;
case AF_INET6:
memset(&mreq6, 0, sizeof(mreq6));
memcpy(&mreq6.ipv6mr_multiaddr, &mcast_sin6->sin6_addr, sizeof(struct in6_addr));
mreq6.ipv6mr_interface = interface_num;
res = setsockopt (sockets->mcast_recv, IPPROTO_IPV6, IPV6_JOIN_GROUP,
&mreq6, sizeof (mreq6));
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"join ipv6 multicast group failed");
return (-1);
}
break;
}
}
/*
* Turn on multicast loopback
*/
flag = 1;
switch ( bindnet_address->family ) {
case AF_INET:
res = setsockopt (sockets->mcast_send, IPPROTO_IP, IP_MULTICAST_LOOP,
&flag, sizeof (flag));
break;
case AF_INET6:
res = setsockopt (sockets->mcast_send, IPPROTO_IPV6, IPV6_MULTICAST_LOOP,
&flag, sizeof (flag));
}
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"Unable to turn on multicast loopback");
return (-1);
}
/*
* Set multicast packets TTL
*/
flag = instance->totem_interface->ttl;
if (bindnet_address->family == AF_INET6) {
res = setsockopt (sockets->mcast_send, IPPROTO_IPV6, IPV6_MULTICAST_HOPS,
&flag, sizeof (flag));
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"set mcast v6 TTL failed");
return (-1);
}
} else {
res = setsockopt(sockets->mcast_send, IPPROTO_IP, IP_MULTICAST_TTL,
&flag, sizeof(flag));
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"set mcast v4 TTL failed");
return (-1);
}
}
/*
* Bind to a specific interface for multicast send and receive
*/
switch ( bindnet_address->family ) {
case AF_INET:
if (setsockopt (sockets->mcast_send, IPPROTO_IP, IP_MULTICAST_IF,
&boundto_sin->sin_addr, sizeof (boundto_sin->sin_addr)) < 0) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"cannot select interface for multicast packets (send)");
return (-1);
}
if (setsockopt (sockets->mcast_recv, IPPROTO_IP, IP_MULTICAST_IF,
&boundto_sin->sin_addr, sizeof (boundto_sin->sin_addr)) < 0) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"cannot select interface for multicast packets (recv)");
return (-1);
}
break;
case AF_INET6:
if (setsockopt (sockets->mcast_send, IPPROTO_IPV6, IPV6_MULTICAST_IF,
&interface_num, sizeof (interface_num)) < 0) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"cannot select interface for multicast packets (send v6)");
return (-1);
}
if (setsockopt (sockets->mcast_recv, IPPROTO_IPV6, IPV6_MULTICAST_IF,
&interface_num, sizeof (interface_num)) < 0) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"cannot select interface for multicast packets (recv v6)");
return (-1);
}
break;
}
return 0;
}
static int totemudp_build_sockets (
struct totemudp_instance *instance,
struct totem_ip_address *mcast_address,
struct totem_ip_address *bindnet_address,
struct totemudp_socket *sockets,
struct totem_ip_address *bound_to)
{
int interface_num;
int interface_up;
int res;
/*
* Determine the ip address bound to and the interface name
*/
res = netif_determine (instance,
bindnet_address,
bound_to,
&interface_up,
&interface_num);
if (res == -1) {
return (-1);
}
totemip_copy(&instance->my_id, bound_to);
res = totemudp_build_sockets_ip (instance, mcast_address,
bindnet_address, sockets, bound_to, interface_num);
/* We only send out of the token socket */
totemudp_traffic_control_set(instance, sockets->token);
return res;
}
/*
* Totem Network interface - also does encryption/decryption
* depends on poll abstraction, POSIX, IPV4
*/
/*
* Create an instance
*/
int totemudp_initialize (
qb_loop_t *poll_handle,
void **udp_context,
struct totem_config *totem_config,
int interface_no,
void *context,
void (*deliver_fn) (
void *context,
const void *msg,
unsigned int msg_len),
void (*iface_change_fn) (
void *context,
const struct totem_ip_address *iface_address),
void (*target_set_completed) (
void *context))
{
struct totemudp_instance *instance;
instance = malloc (sizeof (struct totemudp_instance));
if (instance == NULL) {
return (-1);
}
totemudp_instance_initialize (instance);
instance->totem_config = totem_config;
/*
* Configure logging
*/
instance->totemudp_log_level_security = 1; //totem_config->totem_logging_configuration.log_level_security;
instance->totemudp_log_level_error = totem_config->totem_logging_configuration.log_level_error;
instance->totemudp_log_level_warning = totem_config->totem_logging_configuration.log_level_warning;
instance->totemudp_log_level_notice = totem_config->totem_logging_configuration.log_level_notice;
instance->totemudp_log_level_debug = totem_config->totem_logging_configuration.log_level_debug;
instance->totemudp_subsys_id = totem_config->totem_logging_configuration.log_subsys_id;
instance->totemudp_log_printf = totem_config->totem_logging_configuration.log_printf;
/*
* Initialize random number generator for later use to generate salt
*/
instance->crypto_inst = crypto_init (totem_config->private_key,
totem_config->private_key_len,
totem_config->crypto_cipher_type,
totem_config->crypto_hash_type,
instance->totemudp_log_printf,
instance->totemudp_log_level_security,
instance->totemudp_log_level_notice,
instance->totemudp_log_level_error,
instance->totemudp_subsys_id);
if (instance->crypto_inst == NULL) {
return (-1);
}
/*
* Initialize local variables for totemudp
*/
instance->totem_interface = &totem_config->interfaces[interface_no];
totemip_copy (&instance->mcast_address, &instance->totem_interface->mcast_addr);
memset (instance->iov_buffer, 0, FRAME_SIZE_MAX);
instance->totemudp_poll_handle = poll_handle;
instance->totem_interface->bindnet.nodeid = instance->totem_config->node_id;
instance->context = context;
instance->totemudp_deliver_fn = deliver_fn;
instance->totemudp_iface_change_fn = iface_change_fn;
instance->totemudp_target_set_completed = target_set_completed;
totemip_localhost (instance->mcast_address.family, &localhost);
localhost.nodeid = instance->totem_config->node_id;
/*
* RRP layer isn't ready to receive message because it hasn't
* initialized yet. Add short timer to check the interfaces.
*/
qb_loop_timer_add (instance->totemudp_poll_handle,
QB_LOOP_MED,
100*QB_TIME_NS_IN_MSEC,
(void *)instance,
timer_function_netif_check_timeout,
&instance->timer_netif_check_timeout);
*udp_context = instance;
return (0);
}
void *totemudp_buffer_alloc (void)
{
return malloc (FRAME_SIZE_MAX);
}
void totemudp_buffer_release (void *ptr)
{
return free (ptr);
}
int totemudp_processor_count_set (
void *udp_context,
int processor_count)
{
struct totemudp_instance *instance = (struct totemudp_instance *)udp_context;
int res = 0;
instance->my_memb_entries = processor_count;
qb_loop_timer_del (instance->totemudp_poll_handle,
instance->timer_netif_check_timeout);
if (processor_count == 1) {
qb_loop_timer_add (instance->totemudp_poll_handle,
QB_LOOP_MED,
instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC,
(void *)instance,
timer_function_netif_check_timeout,
&instance->timer_netif_check_timeout);
}
return (res);
}
int totemudp_recv_flush (void *udp_context)
{
struct totemudp_instance *instance = (struct totemudp_instance *)udp_context;
struct pollfd ufd;
int nfds;
int res = 0;
instance->flushing = 1;
do {
ufd.fd = instance->totemudp_sockets.mcast_recv;
ufd.events = POLLIN;
nfds = poll (&ufd, 1, 0);
if (nfds == 1 && ufd.revents & POLLIN) {
net_deliver_fn (instance->totemudp_sockets.mcast_recv,
ufd.revents, instance);
}
} while (nfds == 1);
instance->flushing = 0;
return (res);
}
int totemudp_send_flush (void *udp_context)
{
return 0;
}
int totemudp_token_send (
void *udp_context,
const void *msg,
unsigned int msg_len)
{
struct totemudp_instance *instance = (struct totemudp_instance *)udp_context;
int res = 0;
ucast_sendmsg (instance, &instance->token_target, msg, msg_len);
return (res);
}
int totemudp_mcast_flush_send (
void *udp_context,
const void *msg,
unsigned int msg_len)
{
struct totemudp_instance *instance = (struct totemudp_instance *)udp_context;
int res = 0;
mcast_sendmsg (instance, msg, msg_len);
return (res);
}
int totemudp_mcast_noflush_send (
void *udp_context,
const void *msg,
unsigned int msg_len)
{
struct totemudp_instance *instance = (struct totemudp_instance *)udp_context;
int res = 0;
mcast_sendmsg (instance, msg, msg_len);
return (res);
}
extern int totemudp_iface_check (void *udp_context)
{
struct totemudp_instance *instance = (struct totemudp_instance *)udp_context;
int res = 0;
timer_function_netif_check_timeout (instance);
return (res);
}
extern void totemudp_net_mtu_adjust (void *udp_context, struct totem_config *totem_config)
{
#define UDPIP_HEADER_SIZE (20 + 8) /* 20 bytes for ip 8 bytes for udp */
totem_config->net_mtu -= crypto_sec_header_size(totem_config->crypto_cipher_type,
totem_config->crypto_hash_type) +
UDPIP_HEADER_SIZE;
}
const char *totemudp_iface_print (void *udp_context) {
struct totemudp_instance *instance = (struct totemudp_instance *)udp_context;
const char *ret_char;
ret_char = totemip_print (&instance->my_id);
return (ret_char);
}
int totemudp_iface_get (
void *udp_context,
struct totem_ip_address *addr)
{
struct totemudp_instance *instance = (struct totemudp_instance *)udp_context;
int res = 0;
memcpy (addr, &instance->my_id, sizeof (struct totem_ip_address));
return (res);
}
int totemudp_token_target_set (
void *udp_context,
const struct totem_ip_address *token_target)
{
struct totemudp_instance *instance = (struct totemudp_instance *)udp_context;
int res = 0;
memcpy (&instance->token_target, token_target,
sizeof (struct totem_ip_address));
instance->totemudp_target_set_completed (instance->context);
return (res);
}
extern int totemudp_recv_mcast_empty (
void *udp_context)
{
struct totemudp_instance *instance = (struct totemudp_instance *)udp_context;
unsigned int res;
struct sockaddr_storage system_from;
struct msghdr msg_recv;
struct pollfd ufd;
int nfds;
int msg_processed = 0;
/*
* Receive datagram
*/
msg_recv.msg_name = &system_from;
msg_recv.msg_namelen = sizeof (struct sockaddr_storage);
msg_recv.msg_iov = &instance->totemudp_iov_recv_flush;
msg_recv.msg_iovlen = 1;
-#if !defined(COROSYNC_SOLARIS)
+#ifdef HAVE_MSGHDR_CONTROL
msg_recv.msg_control = 0;
+#endif
+#ifdef HAVE_MSGHDR_CONTROLLEN
msg_recv.msg_controllen = 0;
+#endif
+#ifdef HAVE_MSGHDR_FLAGS
msg_recv.msg_flags = 0;
-#else
+#endif
+#ifdef HAVE_MSGHDR_ACCRIGHTS
msg_recv.msg_accrights = NULL;
+#endif
+#ifdef HAVE_MSGHDR_ACCRIGHTSLEN
msg_recv.msg_accrightslen = 0;
#endif
do {
ufd.fd = instance->totemudp_sockets.mcast_recv;
ufd.events = POLLIN;
nfds = poll (&ufd, 1, 0);
if (nfds == 1 && ufd.revents & POLLIN) {
res = recvmsg (instance->totemudp_sockets.mcast_recv, &msg_recv, MSG_NOSIGNAL | MSG_DONTWAIT);
if (res != -1) {
msg_processed = 1;
} else {
msg_processed = -1;
}
}
} while (nfds == 1);
return (msg_processed);
}
diff --git a/exec/totemudpu.c b/exec/totemudpu.c
index 314d1862..e9bf7598 100644
--- a/exec/totemudpu.c
+++ b/exec/totemudpu.c
@@ -1,1127 +1,1155 @@
/*
* Copyright (c) 2005 MontaVista Software, Inc.
* Copyright (c) 2006-2012 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Steven Dake (sdake@redhat.com)
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <config.h>
#include <assert.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/socket.h>
#include <netdb.h>
#include <sys/un.h>
#include <sys/ioctl.h>
#include <sys/param.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <unistd.h>
#include <fcntl.h>
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
#include <sched.h>
#include <time.h>
#include <sys/time.h>
#include <sys/poll.h>
#include <limits.h>
#include <qb/qbdefs.h>
#include <qb/qbloop.h>
#include <corosync/sq.h>
#include <corosync/list.h>
#include <corosync/swab.h>
#define LOGSYS_UTILS_ONLY 1
#include <corosync/logsys.h>
#include "totemudpu.h"
#include "util.h"
#include "totemcrypto.h"
#include <nss.h>
#include <pk11pub.h>
#include <pkcs11.h>
#include <prerror.h>
#ifndef MSG_NOSIGNAL
#define MSG_NOSIGNAL 0
#endif
#define MCAST_SOCKET_BUFFER_SIZE (TRANSMITS_ALLOWED * FRAME_SIZE_MAX)
#define NETIF_STATE_REPORT_UP 1
#define NETIF_STATE_REPORT_DOWN 2
#define BIND_STATE_UNBOUND 0
#define BIND_STATE_REGULAR 1
#define BIND_STATE_LOOPBACK 2
struct totemudpu_member {
struct list_head list;
struct totem_ip_address member;
int fd;
};
struct totemudpu_instance {
struct crypto_instance *crypto_inst;
qb_loop_t *totemudpu_poll_handle;
struct totem_interface *totem_interface;
int netif_state_report;
int netif_bind_state;
void *context;
void (*totemudpu_deliver_fn) (
void *context,
const void *msg,
unsigned int msg_len);
void (*totemudpu_iface_change_fn) (
void *context,
const struct totem_ip_address *iface_address);
void (*totemudpu_target_set_completed) (void *context);
/*
* Function and data used to log messages
*/
int totemudpu_log_level_security;
int totemudpu_log_level_error;
int totemudpu_log_level_warning;
int totemudpu_log_level_notice;
int totemudpu_log_level_debug;
int totemudpu_subsys_id;
void (*totemudpu_log_printf) (
int level,
int subsys,
const char *function,
const char *file,
int line,
const char *format,
...)__attribute__((format(printf, 6, 7)));
void *udpu_context;
char iov_buffer[FRAME_SIZE_MAX];
struct iovec totemudpu_iov_recv;
struct list_head member_list;
int stats_sent;
int stats_recv;
int stats_delv;
int stats_remcasts;
int stats_orf_token;
struct timeval stats_tv_start;
struct totem_ip_address my_id;
int firstrun;
qb_loop_timer_handle timer_netif_check_timeout;
unsigned int my_memb_entries;
struct totem_config *totem_config;
struct totem_ip_address token_target;
int token_socket;
};
struct work_item {
const void *msg;
unsigned int msg_len;
struct totemudpu_instance *instance;
};
static int totemudpu_build_sockets (
struct totemudpu_instance *instance,
struct totem_ip_address *bindnet_address,
struct totem_ip_address *bound_to);
static int totemudpu_create_sending_socket(
void *udpu_context,
const struct totem_ip_address *member);
int totemudpu_member_list_rebind_ip (
void *udpu_context);
static struct totem_ip_address localhost;
static void totemudpu_instance_initialize (struct totemudpu_instance *instance)
{
memset (instance, 0, sizeof (struct totemudpu_instance));
instance->netif_state_report = NETIF_STATE_REPORT_UP | NETIF_STATE_REPORT_DOWN;
instance->totemudpu_iov_recv.iov_base = instance->iov_buffer;
instance->totemudpu_iov_recv.iov_len = FRAME_SIZE_MAX; //sizeof (instance->iov_buffer);
/*
* There is always atleast 1 processor
*/
instance->my_memb_entries = 1;
list_init (&instance->member_list);
}
#define log_printf(level, format, args...) \
do { \
instance->totemudpu_log_printf ( \
level, instance->totemudpu_subsys_id, \
__FUNCTION__, __FILE__, __LINE__, \
(const char *)format, ##args); \
} while (0);
#define LOGSYS_PERROR(err_num, level, fmt, args...) \
do { \
char _error_str[LOGSYS_MAX_PERROR_MSG_LEN]; \
const char *_error_ptr = qb_strerror_r(err_num, _error_str, sizeof(_error_str)); \
instance->totemudpu_log_printf ( \
level, instance->totemudpu_subsys_id, \
__FUNCTION__, __FILE__, __LINE__, \
fmt ": %s (%d)", ##args, _error_ptr, err_num); \
} while(0)
int totemudpu_crypto_set (
void *udpu_context,
const char *cipher_type,
const char *hash_type)
{
return (0);
}
static inline void ucast_sendmsg (
struct totemudpu_instance *instance,
struct totem_ip_address *system_to,
const void *msg,
unsigned int msg_len)
{
struct msghdr msg_ucast;
int res = 0;
size_t buf_out_len;
unsigned char buf_out[FRAME_SIZE_MAX];
struct sockaddr_storage sockaddr;
struct iovec iovec;
int addrlen;
/*
* Encrypt and digest the message
*/
if (crypto_encrypt_and_sign (
instance->crypto_inst,
(const unsigned char *)msg,
msg_len,
buf_out,
&buf_out_len) != 0) {
log_printf(LOGSYS_LEVEL_CRIT, "Error encrypting/signing packet (non-critical)");
return;
}
iovec.iov_base = (void *)buf_out;
iovec.iov_len = buf_out_len;
/*
* Build unicast message
*/
totemip_totemip_to_sockaddr_convert(system_to,
instance->totem_interface->ip_port, &sockaddr, &addrlen);
msg_ucast.msg_name = &sockaddr;
msg_ucast.msg_namelen = addrlen;
msg_ucast.msg_iov = (void *)&iovec;
msg_ucast.msg_iovlen = 1;
-#if !defined(COROSYNC_SOLARIS)
+#ifdef HAVE_MSGHDR_CONTROL
msg_ucast.msg_control = 0;
+#endif
+#ifdef HAVE_MSGHDR_CONTROLLEN
msg_ucast.msg_controllen = 0;
+#endif
+#ifdef HAVE_MSGHDR_FLAGS
msg_ucast.msg_flags = 0;
-#else
+#endif
+#ifdef HAVE_MSGHDR_ACCRIGHTS
msg_ucast.msg_accrights = NULL;
+#endif
+#ifdef HAVE_MSGHDR_ACCRIGHTSLEN
msg_ucast.msg_accrightslen = 0;
#endif
/*
* Transmit unicast message
* An error here is recovered by totemsrp
*/
res = sendmsg (instance->token_socket, &msg_ucast, MSG_NOSIGNAL);
if (res < 0) {
LOGSYS_PERROR (errno, instance->totemudpu_log_level_debug,
"sendmsg(ucast) failed (non-critical)");
}
}
static inline void mcast_sendmsg (
struct totemudpu_instance *instance,
const void *msg,
unsigned int msg_len)
{
struct msghdr msg_mcast;
int res = 0;
size_t buf_out_len;
unsigned char buf_out[FRAME_SIZE_MAX];
struct iovec iovec;
struct sockaddr_storage sockaddr;
int addrlen;
struct list_head *list;
struct totemudpu_member *member;
/*
* Encrypt and digest the message
*/
if (crypto_encrypt_and_sign (
instance->crypto_inst,
(const unsigned char *)msg,
msg_len,
buf_out,
&buf_out_len) != 0) {
log_printf(LOGSYS_LEVEL_CRIT, "Error encrypting/signing packet (non-critical)");
return;
}
iovec.iov_base = (void *)buf_out;
iovec.iov_len = buf_out_len;
/*
* Build multicast message
*/
for (list = instance->member_list.next;
list != &instance->member_list;
list = list->next) {
member = list_entry (list,
struct totemudpu_member,
list);
totemip_totemip_to_sockaddr_convert(&member->member,
instance->totem_interface->ip_port, &sockaddr, &addrlen);
msg_mcast.msg_name = &sockaddr;
msg_mcast.msg_namelen = addrlen;
msg_mcast.msg_iov = (void *)&iovec;
msg_mcast.msg_iovlen = 1;
- #if !defined(COROSYNC_SOLARIS)
+ #ifdef HAVE_MSGHDR_CONTROL
msg_mcast.msg_control = 0;
+ #endif
+ #ifdef HAVE_MSGHDR_CONTROLLEN
msg_mcast.msg_controllen = 0;
+ #endif
+ #ifdef HAVE_MSGHDR_FLAGS
msg_mcast.msg_flags = 0;
- #else
+ #endif
+ #ifdef HAVE_MSGHDR_ACCRIGHTS
msg_mcast.msg_accrights = NULL;
+ #endif
+ #ifdef HAVE_MSGHDR_ACCRIGHTSLEN
msg_mcast.msg_accrightslen = 0;
#endif
/*
* Transmit multicast message
* An error here is recovered by totemsrp
*/
res = sendmsg (member->fd, &msg_mcast, MSG_NOSIGNAL);
if (res < 0) {
LOGSYS_PERROR (errno, instance->totemudpu_log_level_debug,
"sendmsg(mcast) failed (non-critical)");
}
}
}
int totemudpu_finalize (
void *udpu_context)
{
struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context;
int res = 0;
if (instance->token_socket > 0) {
qb_loop_poll_del (instance->totemudpu_poll_handle,
instance->token_socket);
close (instance->token_socket);
}
return (res);
}
static int net_deliver_fn (
int fd,
int revents,
void *data)
{
struct totemudpu_instance *instance = (struct totemudpu_instance *)data;
struct msghdr msg_recv;
struct iovec *iovec;
struct sockaddr_storage system_from;
int bytes_received;
int res = 0;
iovec = &instance->totemudpu_iov_recv;
/*
* Receive datagram
*/
msg_recv.msg_name = &system_from;
msg_recv.msg_namelen = sizeof (struct sockaddr_storage);
msg_recv.msg_iov = iovec;
msg_recv.msg_iovlen = 1;
-#if !defined(COROSYNC_SOLARIS)
+#ifdef HAVE_MSGHDR_CONTROL
msg_recv.msg_control = 0;
+#endif
+#ifdef HAVE_MSGHDR_CONTROLLEN
msg_recv.msg_controllen = 0;
+#endif
+#ifdef HAVE_MSGHDR_FLAGS
msg_recv.msg_flags = 0;
-#else
+#endif
+#ifdef HAVE_MSGHDR_ACCRIGHTS
msg_recv.msg_accrights = NULL;
+#endif
+#ifdef HAVE_MSGHDR_ACCRIGHTSLEN
msg_recv.msg_accrightslen = 0;
#endif
bytes_received = recvmsg (fd, &msg_recv, MSG_NOSIGNAL | MSG_DONTWAIT);
if (bytes_received == -1) {
return (0);
} else {
instance->stats_recv += bytes_received;
}
/*
* Authenticate and if authenticated, decrypt datagram
*/
res = crypto_authenticate_and_decrypt (instance->crypto_inst, iovec->iov_base, &bytes_received);
if (res == -1) {
log_printf (instance->totemudpu_log_level_security, "Received message has invalid digest... ignoring.");
log_printf (instance->totemudpu_log_level_security,
"Invalid packet data");
iovec->iov_len = FRAME_SIZE_MAX;
return 0;
}
iovec->iov_len = bytes_received;
/*
* Handle incoming message
*/
instance->totemudpu_deliver_fn (
instance->context,
iovec->iov_base,
iovec->iov_len);
iovec->iov_len = FRAME_SIZE_MAX;
return (0);
}
static int netif_determine (
struct totemudpu_instance *instance,
struct totem_ip_address *bindnet,
struct totem_ip_address *bound_to,
int *interface_up,
int *interface_num)
{
int res;
res = totemip_iface_check (bindnet, bound_to,
interface_up, interface_num,
instance->totem_config->clear_node_high_bit);
return (res);
}
/*
* If the interface is up, the sockets for totem are built. If the interface is down
* this function is requeued in the timer list to retry building the sockets later.
*/
static void timer_function_netif_check_timeout (
void *data)
{
struct totemudpu_instance *instance = (struct totemudpu_instance *)data;
int interface_up;
int interface_num;
struct totem_ip_address *bind_address;
/*
* Build sockets for every interface
*/
netif_determine (instance,
&instance->totem_interface->bindnet,
&instance->totem_interface->boundto,
&interface_up, &interface_num);
/*
* If the network interface isn't back up and we are already
* in loopback mode, add timer to check again and return
*/
if ((instance->netif_bind_state == BIND_STATE_LOOPBACK &&
interface_up == 0) ||
(instance->my_memb_entries == 1 &&
instance->netif_bind_state == BIND_STATE_REGULAR &&
interface_up == 1)) {
qb_loop_timer_add (instance->totemudpu_poll_handle,
QB_LOOP_MED,
instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC,
(void *)instance,
timer_function_netif_check_timeout,
&instance->timer_netif_check_timeout);
/*
* Add a timer to check for a downed regular interface
*/
return;
}
if (instance->token_socket > 0) {
qb_loop_poll_del (instance->totemudpu_poll_handle,
instance->token_socket);
close (instance->token_socket);
}
if (interface_up == 0) {
/*
* Interface is not up
*/
instance->netif_bind_state = BIND_STATE_LOOPBACK;
bind_address = &localhost;
/*
* Add a timer to retry building interfaces and request memb_gather_enter
*/
qb_loop_timer_add (instance->totemudpu_poll_handle,
QB_LOOP_MED,
instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC,
(void *)instance,
timer_function_netif_check_timeout,
&instance->timer_netif_check_timeout);
} else {
/*
* Interface is up
*/
instance->netif_bind_state = BIND_STATE_REGULAR;
bind_address = &instance->totem_interface->bindnet;
}
/*
* Create and bind the multicast and unicast sockets
*/
totemudpu_build_sockets (instance,
bind_address,
&instance->totem_interface->boundto);
qb_loop_poll_add (instance->totemudpu_poll_handle,
QB_LOOP_MED,
instance->token_socket,
POLLIN, instance, net_deliver_fn);
totemip_copy (&instance->my_id, &instance->totem_interface->boundto);
/*
* This reports changes in the interface to the user and totemsrp
*/
if (instance->netif_bind_state == BIND_STATE_REGULAR) {
if (instance->netif_state_report & NETIF_STATE_REPORT_UP) {
log_printf (instance->totemudpu_log_level_notice,
"The network interface [%s] is now up.",
totemip_print (&instance->totem_interface->boundto));
instance->netif_state_report = NETIF_STATE_REPORT_DOWN;
instance->totemudpu_iface_change_fn (instance->context, &instance->my_id);
}
/*
* Add a timer to check for interface going down in single membership
*/
if (instance->my_memb_entries == 1) {
qb_loop_timer_add (instance->totemudpu_poll_handle,
QB_LOOP_MED,
instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC,
(void *)instance,
timer_function_netif_check_timeout,
&instance->timer_netif_check_timeout);
}
} else {
if (instance->netif_state_report & NETIF_STATE_REPORT_DOWN) {
log_printf (instance->totemudpu_log_level_notice,
"The network interface is down.");
instance->totemudpu_iface_change_fn (instance->context, &instance->my_id);
}
instance->netif_state_report = NETIF_STATE_REPORT_UP;
}
}
/* Set the socket priority to INTERACTIVE to ensure
that our messages don't get queued behind anything else */
static void totemudpu_traffic_control_set(struct totemudpu_instance *instance, int sock)
{
#ifdef SO_PRIORITY
int prio = 6; /* TC_PRIO_INTERACTIVE */
if (setsockopt(sock, SOL_SOCKET, SO_PRIORITY, &prio, sizeof(int))) {
LOGSYS_PERROR (errno, instance->totemudpu_log_level_warning,
"Could not set traffic priority");
}
#endif
}
static int totemudpu_build_sockets_ip (
struct totemudpu_instance *instance,
struct totem_ip_address *bindnet_address,
struct totem_ip_address *bound_to,
int interface_num)
{
struct sockaddr_storage sockaddr;
int addrlen;
int res;
unsigned int recvbuf_size;
unsigned int optlen = sizeof (recvbuf_size);
/*
* Setup unicast socket
*/
instance->token_socket = socket (bindnet_address->family, SOCK_DGRAM, 0);
if (instance->token_socket == -1) {
LOGSYS_PERROR (errno, instance->totemudpu_log_level_warning,
"socket() failed");
return (-1);
}
totemip_nosigpipe (instance->token_socket);
res = fcntl (instance->token_socket, F_SETFL, O_NONBLOCK);
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudpu_log_level_warning,
"Could not set non-blocking operation on token socket");
return (-1);
}
/*
* Bind to unicast socket used for token send/receives
* This has the side effect of binding to the correct interface
*/
totemip_totemip_to_sockaddr_convert(bound_to, instance->totem_interface->ip_port, &sockaddr, &addrlen);
res = bind (instance->token_socket, (struct sockaddr *)&sockaddr, addrlen);
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudpu_log_level_warning,
"bind token socket failed");
return (-1);
}
/*
* the token_socket can receive many messages. Allow a large number
* of receive messages on this socket
*/
recvbuf_size = MCAST_SOCKET_BUFFER_SIZE;
res = setsockopt (instance->token_socket, SOL_SOCKET, SO_RCVBUF,
&recvbuf_size, optlen);
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudpu_log_level_notice,
"Could not set recvbuf size");
}
return 0;
}
static int totemudpu_build_sockets (
struct totemudpu_instance *instance,
struct totem_ip_address *bindnet_address,
struct totem_ip_address *bound_to)
{
int interface_num;
int interface_up;
int res;
/*
* Determine the ip address bound to and the interface name
*/
res = netif_determine (instance,
bindnet_address,
bound_to,
&interface_up,
&interface_num);
if (res == -1) {
return (-1);
}
totemip_copy(&instance->my_id, bound_to);
res = totemudpu_build_sockets_ip (instance,
bindnet_address, bound_to, interface_num);
/* We only send out of the token socket */
totemudpu_traffic_control_set(instance, instance->token_socket);
/*
* Rebind all members to new ips
*/
totemudpu_member_list_rebind_ip(instance);
return res;
}
/*
* Totem Network interface - also does encryption/decryption
* depends on poll abstraction, POSIX, IPV4
*/
/*
* Create an instance
*/
int totemudpu_initialize (
qb_loop_t *poll_handle,
void **udpu_context,
struct totem_config *totem_config,
int interface_no,
void *context,
void (*deliver_fn) (
void *context,
const void *msg,
unsigned int msg_len),
void (*iface_change_fn) (
void *context,
const struct totem_ip_address *iface_address),
void (*target_set_completed) (
void *context))
{
struct totemudpu_instance *instance;
instance = malloc (sizeof (struct totemudpu_instance));
if (instance == NULL) {
return (-1);
}
totemudpu_instance_initialize (instance);
instance->totem_config = totem_config;
/*
* Configure logging
*/
instance->totemudpu_log_level_security = 1; //totem_config->totem_logging_configuration.log_level_security;
instance->totemudpu_log_level_error = totem_config->totem_logging_configuration.log_level_error;
instance->totemudpu_log_level_warning = totem_config->totem_logging_configuration.log_level_warning;
instance->totemudpu_log_level_notice = totem_config->totem_logging_configuration.log_level_notice;
instance->totemudpu_log_level_debug = totem_config->totem_logging_configuration.log_level_debug;
instance->totemudpu_subsys_id = totem_config->totem_logging_configuration.log_subsys_id;
instance->totemudpu_log_printf = totem_config->totem_logging_configuration.log_printf;
/*
* Initialize random number generator for later use to generate salt
*/
instance->crypto_inst = crypto_init (totem_config->private_key,
totem_config->private_key_len,
totem_config->crypto_cipher_type,
totem_config->crypto_hash_type,
instance->totemudpu_log_printf,
instance->totemudpu_log_level_security,
instance->totemudpu_log_level_notice,
instance->totemudpu_log_level_error,
instance->totemudpu_subsys_id);
if (instance->crypto_inst == NULL) {
return (-1);
}
/*
* Initialize local variables for totemudpu
*/
instance->totem_interface = &totem_config->interfaces[interface_no];
memset (instance->iov_buffer, 0, FRAME_SIZE_MAX);
instance->totemudpu_poll_handle = poll_handle;
instance->totem_interface->bindnet.nodeid = instance->totem_config->node_id;
instance->context = context;
instance->totemudpu_deliver_fn = deliver_fn;
instance->totemudpu_iface_change_fn = iface_change_fn;
instance->totemudpu_target_set_completed = target_set_completed;
totemip_localhost (AF_INET, &localhost);
localhost.nodeid = instance->totem_config->node_id;
/*
* RRP layer isn't ready to receive message because it hasn't
* initialized yet. Add short timer to check the interfaces.
*/
qb_loop_timer_add (instance->totemudpu_poll_handle,
QB_LOOP_MED,
100*QB_TIME_NS_IN_MSEC,
(void *)instance,
timer_function_netif_check_timeout,
&instance->timer_netif_check_timeout);
*udpu_context = instance;
return (0);
}
void *totemudpu_buffer_alloc (void)
{
return malloc (FRAME_SIZE_MAX);
}
void totemudpu_buffer_release (void *ptr)
{
return free (ptr);
}
int totemudpu_processor_count_set (
void *udpu_context,
int processor_count)
{
struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context;
int res = 0;
instance->my_memb_entries = processor_count;
qb_loop_timer_del (instance->totemudpu_poll_handle,
instance->timer_netif_check_timeout);
if (processor_count == 1) {
qb_loop_timer_add (instance->totemudpu_poll_handle,
QB_LOOP_MED,
instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC,
(void *)instance,
timer_function_netif_check_timeout,
&instance->timer_netif_check_timeout);
}
return (res);
}
int totemudpu_recv_flush (void *udpu_context)
{
int res = 0;
return (res);
}
int totemudpu_send_flush (void *udpu_context)
{
int res = 0;
return (res);
}
int totemudpu_token_send (
void *udpu_context,
const void *msg,
unsigned int msg_len)
{
struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context;
int res = 0;
ucast_sendmsg (instance, &instance->token_target, msg, msg_len);
return (res);
}
int totemudpu_mcast_flush_send (
void *udpu_context,
const void *msg,
unsigned int msg_len)
{
struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context;
int res = 0;
mcast_sendmsg (instance, msg, msg_len);
return (res);
}
int totemudpu_mcast_noflush_send (
void *udpu_context,
const void *msg,
unsigned int msg_len)
{
struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context;
int res = 0;
mcast_sendmsg (instance, msg, msg_len);
return (res);
}
extern int totemudpu_iface_check (void *udpu_context)
{
struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context;
int res = 0;
timer_function_netif_check_timeout (instance);
return (res);
}
extern void totemudpu_net_mtu_adjust (void *udpu_context, struct totem_config *totem_config)
{
#define UDPIP_HEADER_SIZE (20 + 8) /* 20 bytes for ip 8 bytes for udp */
totem_config->net_mtu -= crypto_sec_header_size(totem_config->crypto_cipher_type,
totem_config->crypto_hash_type) +
UDPIP_HEADER_SIZE;
}
const char *totemudpu_iface_print (void *udpu_context) {
struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context;
const char *ret_char;
ret_char = totemip_print (&instance->my_id);
return (ret_char);
}
int totemudpu_iface_get (
void *udpu_context,
struct totem_ip_address *addr)
{
struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context;
int res = 0;
memcpy (addr, &instance->my_id, sizeof (struct totem_ip_address));
return (res);
}
int totemudpu_token_target_set (
void *udpu_context,
const struct totem_ip_address *token_target)
{
struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context;
int res = 0;
memcpy (&instance->token_target, token_target,
sizeof (struct totem_ip_address));
instance->totemudpu_target_set_completed (instance->context);
return (res);
}
extern int totemudpu_recv_mcast_empty (
void *udpu_context)
{
struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context;
unsigned int res;
struct sockaddr_storage system_from;
struct msghdr msg_recv;
struct pollfd ufd;
int nfds;
int msg_processed = 0;
/*
* Receive datagram
*/
msg_recv.msg_name = &system_from;
msg_recv.msg_namelen = sizeof (struct sockaddr_storage);
msg_recv.msg_iov = &instance->totemudpu_iov_recv;
msg_recv.msg_iovlen = 1;
-#if !defined(COROSYNC_SOLARIS)
+#ifdef HAVE_MSGHDR_CONTROL
msg_recv.msg_control = 0;
+#endif
+#ifdef HAVE_MSGHDR_CONTROLLEN
msg_recv.msg_controllen = 0;
+#endif
+#ifdef HAVE_MSGHDR_FLAGS
msg_recv.msg_flags = 0;
-#else
+#endif
+#ifdef HAVE_MSGHDR_ACCRIGHTS
msg_recv.msg_accrights = NULL;
+#endif
+#ifdef HAVE_MSGHDR_ACCRIGHTSLEN
msg_recv.msg_accrightslen = 0;
#endif
do {
ufd.fd = instance->token_socket;
ufd.events = POLLIN;
nfds = poll (&ufd, 1, 0);
if (nfds == 1 && ufd.revents & POLLIN) {
res = recvmsg (instance->token_socket, &msg_recv, MSG_NOSIGNAL | MSG_DONTWAIT);
if (res != -1) {
msg_processed = 1;
} else {
msg_processed = -1;
}
}
} while (nfds == 1);
return (msg_processed);
}
static int totemudpu_create_sending_socket(
void *udpu_context,
const struct totem_ip_address *member)
{
struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context;
int fd;
int res;
unsigned int sendbuf_size;
unsigned int optlen = sizeof (sendbuf_size);
struct sockaddr_storage sockaddr;
int addrlen;
fd = socket (member->family, SOCK_DGRAM, 0);
if (fd == -1) {
LOGSYS_PERROR (errno, instance->totemudpu_log_level_warning,
"Could not create socket for new member");
return (-1);
}
totemip_nosigpipe (fd);
res = fcntl (fd, F_SETFL, O_NONBLOCK);
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudpu_log_level_warning,
"Could not set non-blocking operation on token socket");
return (-1);
}
/*
* These sockets are used to send multicast messages, so their buffers
* should be large
*/
sendbuf_size = MCAST_SOCKET_BUFFER_SIZE;
res = setsockopt (fd, SOL_SOCKET, SO_SNDBUF,
&sendbuf_size, optlen);
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudpu_log_level_notice,
"Could not set sendbuf size");
}
/*
* Bind to sending interface
*/
totemip_totemip_to_sockaddr_convert(&instance->my_id, 0, &sockaddr, &addrlen);
res = bind (fd, (struct sockaddr *)&sockaddr, addrlen);
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudpu_log_level_warning,
"bind token socket failed");
return (-1);
}
return (fd);
}
int totemudpu_member_add (
void *udpu_context,
const struct totem_ip_address *member)
{
struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context;
struct totemudpu_member *new_member;
new_member = malloc (sizeof (struct totemudpu_member));
if (new_member == NULL) {
return (-1);
}
log_printf (LOGSYS_LEVEL_NOTICE, "adding new UDPU member {%s}",
totemip_print(member));
list_init (&new_member->list);
list_add_tail (&new_member->list, &instance->member_list);
memcpy (&new_member->member, member, sizeof (struct totem_ip_address));
new_member->fd = totemudpu_create_sending_socket(udpu_context, member);
return (0);
}
int totemudpu_member_remove (
void *udpu_context,
const struct totem_ip_address *token_target)
{
int found = 0;
struct list_head *list;
struct totemudpu_member *member;
struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context;
/*
* Find the member to remove and close its socket
*/
for (list = instance->member_list.next;
list != &instance->member_list;
list = list->next) {
member = list_entry (list,
struct totemudpu_member,
list);
if (totemip_compare (token_target, &member->member)==0) {
log_printf(LOGSYS_LEVEL_NOTICE,
"removing UDPU member {%s}",
totemip_print(&member->member));
if (member->fd > 0) {
log_printf(LOGSYS_LEVEL_DEBUG,
"Closing socket to: {%s}",
totemip_print(&member->member));
qb_loop_poll_del (instance->totemudpu_poll_handle,
member->fd);
close (member->fd);
}
found = 1;
break;
}
}
/*
* Delete the member from the list
*/
if (found) {
list_del (list);
}
instance = NULL;
return (0);
}
int totemudpu_member_list_rebind_ip (
void *udpu_context)
{
struct list_head *list;
struct totemudpu_member *member;
struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context;
for (list = instance->member_list.next;
list != &instance->member_list;
list = list->next) {
member = list_entry (list,
struct totemudpu_member,
list);
if (member->fd > 0) {
close (member->fd);
}
member->fd = totemudpu_create_sending_socket(udpu_context, &member->member);
}
return (0);
}
diff --git a/exec/votequorum.c b/exec/votequorum.c
index 30404a58..253f649a 100644
--- a/exec/votequorum.c
+++ b/exec/votequorum.c
@@ -1,2463 +1,2459 @@
/*
* Copyright (c) 2009-2012 Red Hat, Inc.
*
* All rights reserved.
*
* Authors: Christine Caulfield (ccaulfie@redhat.com)
* Fabio M. Di Nitto (fdinitto@redhat.com)
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <config.h>
#include <sys/types.h>
#include <stdint.h>
#include <qb/qbipc_common.h>
#include "quorum.h"
#include <corosync/corodefs.h>
#include <corosync/list.h>
#include <corosync/logsys.h>
#include <corosync/coroapi.h>
#include <corosync/icmap.h>
#include <corosync/ipc_votequorum.h>
#include "service.h"
LOGSYS_DECLARE_SUBSYS ("VOTEQ");
/*
* interface with corosync
*/
static struct corosync_api_v1 *corosync_api;
/*
* votequorum global config vars
*/
static char qdevice_name[VOTEQUORUM_QDEVICE_MAX_NAME_LEN];
static struct cluster_node *qdevice = NULL;
static unsigned int qdevice_timeout = VOTEQUORUM_QDEVICE_DEFAULT_TIMEOUT;
static uint8_t qdevice_can_operate = 1;
static void *qdevice_reg_conn = NULL;
static uint8_t qdevice_master_wins = 0;
static uint8_t two_node = 0;
static uint8_t wait_for_all = 0;
static uint8_t wait_for_all_status = 0;
static uint8_t auto_tie_breaker = 0;
static int lowest_node_id = -1;
#define DEFAULT_LMS_WIN 10000
static uint8_t last_man_standing = 0;
static uint32_t last_man_standing_window = DEFAULT_LMS_WIN;
static uint8_t allow_downscale = 0;
static uint32_t ev_barrier = 0;
/*
* votequorum_exec defines/structs/forward definitions
*/
struct req_exec_quorum_nodeinfo {
struct qb_ipc_request_header header __attribute__((aligned(8)));
uint32_t nodeid;
uint32_t votes;
uint32_t expected_votes;
uint32_t flags;
} __attribute__((packed));
struct req_exec_quorum_reconfigure {
struct qb_ipc_request_header header __attribute__((aligned(8)));
uint32_t nodeid;
uint32_t value;
uint8_t param;
uint8_t _pad0;
uint8_t _pad1;
uint8_t _pad2;
} __attribute__((packed));
struct req_exec_quorum_qdevice_reg {
struct qb_ipc_request_header header __attribute__((aligned(8)));
uint32_t operation;
char qdevice_name[VOTEQUORUM_QDEVICE_MAX_NAME_LEN];
} __attribute__((packed));
struct req_exec_quorum_qdevice_reconfigure {
struct qb_ipc_request_header header __attribute__((aligned(8)));
char oldname[VOTEQUORUM_QDEVICE_MAX_NAME_LEN];
char newname[VOTEQUORUM_QDEVICE_MAX_NAME_LEN];
} __attribute__((packed));
/*
* votequorum_exec onwire version (via totem)
*/
#include "votequorum.h"
/*
* votequorum_exec onwire messages (via totem)
*/
#define MESSAGE_REQ_EXEC_VOTEQUORUM_NODEINFO 0
#define MESSAGE_REQ_EXEC_VOTEQUORUM_RECONFIGURE 1
#define MESSAGE_REQ_EXEC_VOTEQUORUM_QDEVICE_REG 2
#define MESSAGE_REQ_EXEC_VOTEQUORUM_QDEVICE_RECONFIGURE 3
static void votequorum_exec_send_expectedvotes_notification(void);
static int votequorum_exec_send_quorum_notification(void *conn, uint64_t context);
#define VOTEQUORUM_RECONFIG_PARAM_EXPECTED_VOTES 1
#define VOTEQUORUM_RECONFIG_PARAM_NODE_VOTES 2
static int votequorum_exec_send_reconfigure(uint8_t param, unsigned int nodeid, uint32_t value);
/*
* used by req_exec_quorum_qdevice_reg
*/
#define VOTEQUORUM_QDEVICE_OPERATION_UNREGISTER 0
#define VOTEQUORUM_QDEVICE_OPERATION_REGISTER 1
/*
* votequorum internal node status/view
*/
#define NODE_FLAGS_QUORATE 1
#define NODE_FLAGS_LEAVING 2
#define NODE_FLAGS_WFASTATUS 4
#define NODE_FLAGS_FIRST 8
#define NODE_FLAGS_QDEVICE_REGISTERED 16
#define NODE_FLAGS_QDEVICE_ALIVE 32
#define NODE_FLAGS_QDEVICE_CAST_VOTE 64
#define NODE_FLAGS_QDEVICE_MASTER_WINS 128
typedef enum {
NODESTATE_MEMBER=1,
NODESTATE_DEAD,
NODESTATE_LEAVING
} nodestate_t;
struct cluster_node {
int node_id;
nodestate_t state;
uint32_t votes;
uint32_t expected_votes;
uint32_t flags;
struct list_head list;
};
/*
* votequorum internal quorum status
*/
static uint8_t quorum;
static uint8_t cluster_is_quorate;
/*
* votequorum membership data
*/
static struct cluster_node *us;
static struct list_head cluster_members_list;
static unsigned int quorum_members[PROCESSOR_COUNT_MAX];
static int quorum_members_entries = 0;
static struct memb_ring_id quorum_ringid;
/*
* pre allocate all cluster_nodes + one for qdevice
*/
static struct cluster_node cluster_nodes[PROCESSOR_COUNT_MAX+2];
static int cluster_nodes_entries = 0;
/*
* votequorum tracking
*/
struct quorum_pd {
unsigned char track_flags;
int tracking_enabled;
uint64_t tracking_context;
struct list_head list;
void *conn;
};
static struct list_head trackers_list;
/*
* votequorum timers
*/
static corosync_timer_handle_t qdevice_timer;
static int qdevice_timer_set = 0;
static corosync_timer_handle_t last_man_standing_timer;
static int last_man_standing_timer_set = 0;
/*
* Service Interfaces required by service_message_handler struct
*/
static void votequorum_confchg_fn (
enum totem_configuration_type configuration_type,
const unsigned int *member_list, size_t member_list_entries,
const unsigned int *left_list, size_t left_list_entries,
const unsigned int *joined_list, size_t joined_list_entries,
const struct memb_ring_id *ring_id);
static quorum_set_quorate_fn_t quorum_callback;
/*
* votequorum_exec handler and definitions
*/
static char *votequorum_exec_init_fn (struct corosync_api_v1 *api);
static int votequorum_exec_exit_fn (void);
static int votequorum_exec_send_nodeinfo(uint32_t nodeid);
static void message_handler_req_exec_votequorum_nodeinfo (
const void *message,
unsigned int nodeid);
static void exec_votequorum_nodeinfo_endian_convert (void *message);
static void message_handler_req_exec_votequorum_reconfigure (
const void *message,
unsigned int nodeid);
static void exec_votequorum_reconfigure_endian_convert (void *message);
static void message_handler_req_exec_votequorum_qdevice_reg (
const void *message,
unsigned int nodeid);
static void exec_votequorum_qdevice_reg_endian_convert (void *message);
static void message_handler_req_exec_votequorum_qdevice_reconfigure (
const void *message,
unsigned int nodeid);
static void exec_votequorum_qdevice_reconfigure_endian_convert (void *message);
static struct corosync_exec_handler votequorum_exec_engine[] =
{
{ /* 0 */
.exec_handler_fn = message_handler_req_exec_votequorum_nodeinfo,
.exec_endian_convert_fn = exec_votequorum_nodeinfo_endian_convert
},
{ /* 1 */
.exec_handler_fn = message_handler_req_exec_votequorum_reconfigure,
.exec_endian_convert_fn = exec_votequorum_reconfigure_endian_convert
},
{ /* 2 */
.exec_handler_fn = message_handler_req_exec_votequorum_qdevice_reg,
.exec_endian_convert_fn = exec_votequorum_qdevice_reg_endian_convert
},
{ /* 3 */
.exec_handler_fn = message_handler_req_exec_votequorum_qdevice_reconfigure,
.exec_endian_convert_fn = exec_votequorum_qdevice_reconfigure_endian_convert
},
};
/*
* Library Handler and Functions Definitions
*/
static int quorum_lib_init_fn (void *conn);
static int quorum_lib_exit_fn (void *conn);
static void message_handler_req_lib_votequorum_getinfo (void *conn,
const void *message);
static void message_handler_req_lib_votequorum_setexpected (void *conn,
const void *message);
static void message_handler_req_lib_votequorum_setvotes (void *conn,
const void *message);
static void message_handler_req_lib_votequorum_trackstart (void *conn,
const void *message);
static void message_handler_req_lib_votequorum_trackstop (void *conn,
const void *message);
static void message_handler_req_lib_votequorum_qdevice_register (void *conn,
const void *message);
static void message_handler_req_lib_votequorum_qdevice_unregister (void *conn,
const void *message);
static void message_handler_req_lib_votequorum_qdevice_update (void *conn,
const void *message);
static void message_handler_req_lib_votequorum_qdevice_poll (void *conn,
const void *message);
static void message_handler_req_lib_votequorum_qdevice_master_wins (void *conn,
const void *message);
static struct corosync_lib_handler quorum_lib_service[] =
{
{ /* 0 */
.lib_handler_fn = message_handler_req_lib_votequorum_getinfo,
.flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED
},
{ /* 1 */
.lib_handler_fn = message_handler_req_lib_votequorum_setexpected,
.flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED
},
{ /* 2 */
.lib_handler_fn = message_handler_req_lib_votequorum_setvotes,
.flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED
},
{ /* 3 */
.lib_handler_fn = message_handler_req_lib_votequorum_trackstart,
.flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED
},
{ /* 4 */
.lib_handler_fn = message_handler_req_lib_votequorum_trackstop,
.flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED
},
{ /* 5 */
.lib_handler_fn = message_handler_req_lib_votequorum_qdevice_register,
.flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED
},
{ /* 6 */
.lib_handler_fn = message_handler_req_lib_votequorum_qdevice_unregister,
.flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED
},
{ /* 7 */
.lib_handler_fn = message_handler_req_lib_votequorum_qdevice_update,
.flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED
},
{ /* 8 */
.lib_handler_fn = message_handler_req_lib_votequorum_qdevice_poll,
.flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED
},
{ /* 9 */
.lib_handler_fn = message_handler_req_lib_votequorum_qdevice_master_wins,
.flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED
}
};
static struct corosync_service_engine votequorum_service_engine = {
.name = "corosync vote quorum service v1.0",
.id = VOTEQUORUM_SERVICE,
.priority = 2,
.private_data_size = sizeof (struct quorum_pd),
.allow_inquorate = CS_LIB_ALLOW_INQUORATE,
.flow_control = COROSYNC_LIB_FLOW_CONTROL_REQUIRED,
.lib_init_fn = quorum_lib_init_fn,
.lib_exit_fn = quorum_lib_exit_fn,
.lib_engine = quorum_lib_service,
.lib_engine_count = sizeof (quorum_lib_service) / sizeof (struct corosync_lib_handler),
.exec_init_fn = votequorum_exec_init_fn,
.exec_exit_fn = votequorum_exec_exit_fn,
.exec_engine = votequorum_exec_engine,
.exec_engine_count = sizeof (votequorum_exec_engine) / sizeof (struct corosync_exec_handler),
.confchg_fn = votequorum_confchg_fn
};
struct corosync_service_engine *votequorum_get_service_engine_ver0 (void)
{
return (&votequorum_service_engine);
}
static struct default_service votequorum_service[] = {
{
.name = "corosync_votequorum",
.ver = 0,
.loader = votequorum_get_service_engine_ver0
},
};
/*
* common/utility macros/functions
*/
#define max(a,b) (((a) > (b)) ? (a) : (b))
#define list_iterate(v, head) \
for (v = (head)->next; v != head; v = v->next)
static void node_add_ordered(struct cluster_node *newnode)
{
struct cluster_node *node = NULL;
struct list_head *tmp;
struct list_head *newlist = &newnode->list;
ENTER();
list_iterate(tmp, &cluster_members_list) {
node = list_entry(tmp, struct cluster_node, list);
if (newnode->node_id < node->node_id) {
break;
}
}
if (!node) {
list_add(&newnode->list, &cluster_members_list);
} else {
newlist->prev = tmp->prev;
newlist->next = tmp;
tmp->prev->next = newlist;
tmp->prev = newlist;
}
LEAVE();
}
static struct cluster_node *allocate_node(unsigned int nodeid)
{
struct cluster_node *cl = NULL;
struct list_head *tmp;
ENTER();
if (cluster_nodes_entries <= PROCESSOR_COUNT_MAX + 1) {
cl = (struct cluster_node *)&cluster_nodes[cluster_nodes_entries];
cluster_nodes_entries++;
} else {
list_iterate(tmp, &cluster_members_list) {
cl = list_entry(tmp, struct cluster_node, list);
if (cl->state == NODESTATE_DEAD) {
break;
}
}
/*
* this should never happen
*/
if (!cl) {
log_printf(LOGSYS_LEVEL_CRIT, "Unable to find memory for node %u data!!", nodeid);
goto out;
}
list_del(tmp);
}
memset(cl, 0, sizeof(struct cluster_node));
cl->node_id = nodeid;
if (nodeid != VOTEQUORUM_QDEVICE_NODEID) {
node_add_ordered(cl);
}
out:
LEAVE();
return cl;
}
static struct cluster_node *find_node_by_nodeid(unsigned int nodeid)
{
struct cluster_node *node;
struct list_head *tmp;
ENTER();
if (nodeid == us->node_id) {
LEAVE();
return us;
}
if (nodeid == VOTEQUORUM_QDEVICE_NODEID) {
LEAVE();
return qdevice;
}
list_iterate(tmp, &cluster_members_list) {
node = list_entry(tmp, struct cluster_node, list);
if (node->node_id == nodeid) {
LEAVE();
return node;
}
}
LEAVE();
return NULL;
}
static void get_lowest_node_id(void)
{
struct cluster_node *node = NULL;
struct list_head *tmp;
ENTER();
lowest_node_id = us->node_id;
list_iterate(tmp, &cluster_members_list) {
node = list_entry(tmp, struct cluster_node, list);
if ((node->state == NODESTATE_MEMBER) &&
(node->node_id < lowest_node_id)) {
lowest_node_id = node->node_id;
}
}
log_printf(LOGSYS_LEVEL_DEBUG, "lowest node id: %d us: %d", lowest_node_id, us->node_id);
icmap_set_uint32("runtime.votequorum.lowest_node_id", lowest_node_id);
LEAVE();
}
static int check_low_node_id_partition(void)
{
struct cluster_node *node = NULL;
struct list_head *tmp;
int found = 0;
ENTER();
list_iterate(tmp, &cluster_members_list) {
node = list_entry(tmp, struct cluster_node, list);
if ((node->state == NODESTATE_MEMBER) &&
(node->node_id == lowest_node_id)) {
found = 1;
}
}
LEAVE();
return found;
}
static int check_qdevice_master(void)
{
struct cluster_node *node = NULL;
struct list_head *tmp;
int found = 0;
ENTER();
list_iterate(tmp, &cluster_members_list) {
node = list_entry(tmp, struct cluster_node, list);
if ((node->state == NODESTATE_MEMBER) &&
(node->flags & NODE_FLAGS_QDEVICE_MASTER_WINS) &&
(node->flags & NODE_FLAGS_QDEVICE_CAST_VOTE)) {
found = 1;
}
}
LEAVE();
return found;
}
static void decode_flags(uint32_t flags)
{
ENTER();
log_printf(LOGSYS_LEVEL_DEBUG,
"flags: quorate: %s Leaving: %s WFA Status: %s First: %s Qdevice: %s QdeviceAlive: %s QdeviceCastVote: %s QdeviceMasterWins: %s",
(flags & NODE_FLAGS_QUORATE)?"Yes":"No",
(flags & NODE_FLAGS_LEAVING)?"Yes":"No",
(flags & NODE_FLAGS_WFASTATUS)?"Yes":"No",
(flags & NODE_FLAGS_FIRST)?"Yes":"No",
(flags & NODE_FLAGS_QDEVICE_REGISTERED)?"Yes":"No",
(flags & NODE_FLAGS_QDEVICE_ALIVE)?"Yes":"No",
(flags & NODE_FLAGS_QDEVICE_CAST_VOTE)?"Yes":"No",
(flags & NODE_FLAGS_QDEVICE_MASTER_WINS)?"Yes":"No");
LEAVE();
}
static void update_wait_for_all_status(uint8_t wfa_status)
{
ENTER();
wait_for_all_status = wfa_status;
if (wait_for_all_status) {
us->flags |= NODE_FLAGS_WFASTATUS;
} else {
us->flags &= ~NODE_FLAGS_WFASTATUS;
}
icmap_set_uint8("runtime.votequorum.wait_for_all_status",
wait_for_all_status);
LEAVE();
}
static void update_two_node(void)
{
ENTER();
icmap_set_uint8("runtime.votequorum.two_node", two_node);
LEAVE();
}
static void update_ev_barrier(uint32_t expected_votes)
{
ENTER();
ev_barrier = expected_votes;
icmap_set_uint32("runtime.votequorum.ev_barrier", ev_barrier);
LEAVE();
}
static void update_qdevice_can_operate(uint8_t status)
{
ENTER();
qdevice_can_operate = status;
icmap_set_uint8("runtime.votequorum.qdevice_can_operate", qdevice_can_operate);
LEAVE();
}
static void update_qdevice_master_wins(uint8_t allow)
{
ENTER();
qdevice_master_wins = allow;
icmap_set_uint8("runtime.votequorum.qdevice_master_wins", qdevice_master_wins);
LEAVE();
}
/*
* quorum calculation core bits
*/
static int calculate_quorum(int allow_decrease, unsigned int max_expected, unsigned int *ret_total_votes)
{
struct list_head *nodelist;
struct cluster_node *node;
unsigned int total_votes = 0;
unsigned int highest_expected = 0;
unsigned int newquorum, q1, q2;
unsigned int total_nodes = 0;
ENTER();
if ((allow_downscale) && (allow_decrease) && (max_expected)) {
max_expected = max(ev_barrier, max_expected);
}
list_iterate(nodelist, &cluster_members_list) {
node = list_entry(nodelist, struct cluster_node, list);
log_printf(LOGSYS_LEVEL_DEBUG, "node %u state=%d, votes=%u, expected=%u",
node->node_id, node->state, node->votes, node->expected_votes);
if (node->state == NODESTATE_MEMBER) {
if (max_expected) {
node->expected_votes = max_expected;
} else {
highest_expected = max(highest_expected, node->expected_votes);
}
total_votes += node->votes;
total_nodes++;
}
}
if (us->flags & NODE_FLAGS_QDEVICE_CAST_VOTE) {
log_printf(LOGSYS_LEVEL_DEBUG, "node 0 state=1, votes=%u", qdevice->votes);
total_votes += qdevice->votes;
total_nodes++;
}
if (max_expected > 0) {
highest_expected = max_expected;
}
/*
* This quorum calculation is taken from the OpenVMS Cluster Systems
* manual, but, then, you guessed that didn't you
*/
q1 = (highest_expected + 2) / 2;
q2 = (total_votes + 2) / 2;
newquorum = max(q1, q2);
/*
* Normally quorum never decreases but the system administrator can
* force it down by setting expected votes to a maximum value
*/
if (!allow_decrease) {
newquorum = max(quorum, newquorum);
}
/*
* The special two_node mode allows each of the two nodes to retain
* quorum if the other fails. Only one of the two should live past
* fencing (as both nodes try to fence each other in split-brain.)
* Also: if there are more than two nodes, force us inquorate to avoid
* any damage or confusion.
*/
if (two_node && total_nodes <= 2) {
newquorum = 1;
}
if (ret_total_votes) {
*ret_total_votes = total_votes;
}
LEAVE();
return newquorum;
}
static void are_we_quorate(unsigned int total_votes)
{
int quorate;
int quorum_change = 0;
ENTER();
/*
* wait for all nodes to show up before granting quorum
*/
if ((wait_for_all) && (wait_for_all_status)) {
if (total_votes != us->expected_votes) {
log_printf(LOGSYS_LEVEL_NOTICE,
"Waiting for all cluster members. "
"Current votes: %d expected_votes: %d",
total_votes, us->expected_votes);
cluster_is_quorate = 0;
return;
}
update_wait_for_all_status(0);
}
if (quorum > total_votes) {
quorate = 0;
} else {
quorate = 1;
get_lowest_node_id();
}
if ((auto_tie_breaker) &&
(total_votes == (us->expected_votes / 2)) &&
(check_low_node_id_partition() == 1)) {
quorate = 1;
}
if ((qdevice_master_wins) &&
(!quorate) &&
(check_qdevice_master() == 1)) {
log_printf(LOGSYS_LEVEL_DEBUG, "node is quorate as part of master_wins partition");
quorate = 1;
}
if (cluster_is_quorate && !quorate) {
quorum_change = 1;
log_printf(LOGSYS_LEVEL_DEBUG, "quorum lost, blocking activity");
}
if (!cluster_is_quorate && quorate) {
quorum_change = 1;
log_printf(LOGSYS_LEVEL_DEBUG, "quorum regained, resuming activity");
}
cluster_is_quorate = quorate;
if (cluster_is_quorate) {
us->flags |= NODE_FLAGS_QUORATE;
} else {
us->flags &= ~NODE_FLAGS_QUORATE;
}
if (wait_for_all) {
if (quorate) {
update_wait_for_all_status(0);
} else {
update_wait_for_all_status(1);
}
}
if (quorum_change) {
quorum_callback(quorum_members, quorum_members_entries,
cluster_is_quorate, &quorum_ringid);
}
LEAVE();
}
static void get_total_votes(unsigned int *totalvotes, unsigned int *current_members)
{
unsigned int total_votes = 0;
unsigned int cluster_members = 0;
struct list_head *nodelist;
struct cluster_node *node;
ENTER();
list_iterate(nodelist, &cluster_members_list) {
node = list_entry(nodelist, struct cluster_node, list);
if (node->state == NODESTATE_MEMBER) {
cluster_members++;
total_votes += node->votes;
}
}
if (qdevice->votes) {
total_votes += qdevice->votes;
cluster_members++;
}
*totalvotes = total_votes;
*current_members = cluster_members;
LEAVE();
}
/*
* Recalculate cluster quorum, set quorate and notify changes
*/
static void recalculate_quorum(int allow_decrease, int by_current_nodes)
{
unsigned int total_votes = 0;
unsigned int cluster_members = 0;
ENTER();
get_total_votes(&total_votes, &cluster_members);
if (!by_current_nodes) {
cluster_members = 0;
}
/*
* Keep expected_votes at the highest number of votes in the cluster
*/
log_printf(LOGSYS_LEVEL_DEBUG, "total_votes=%d, expected_votes=%d", total_votes, us->expected_votes);
if (total_votes > us->expected_votes) {
us->expected_votes = total_votes;
votequorum_exec_send_expectedvotes_notification();
}
quorum = calculate_quorum(allow_decrease, cluster_members, &total_votes);
are_we_quorate(total_votes);
votequorum_exec_send_quorum_notification(NULL, 0L);
LEAVE();
}
/*
* configuration bits and pieces
*/
static int votequorum_read_nodelist_configuration(uint32_t *votes,
uint32_t *nodes,
uint32_t *expected_votes)
{
icmap_iter_t iter;
const char *iter_key;
char tmp_key[ICMAP_KEYNAME_MAXLEN];
uint32_t our_pos, node_pos;
uint32_t nodecount = 0;
uint32_t nodelist_expected_votes = 0;
uint32_t node_votes = 0;
int res = 0;
ENTER();
if (icmap_get_uint32("nodelist.local_node_pos", &our_pos) != CS_OK) {
log_printf(LOGSYS_LEVEL_DEBUG,
"No nodelist defined or our node is not in the nodelist");
return 0;
}
iter = icmap_iter_init("nodelist.node.");
while ((iter_key = icmap_iter_next(iter, NULL, NULL)) != NULL) {
res = sscanf(iter_key, "nodelist.node.%u.%s", &node_pos, tmp_key);
if (res != 2) {
continue;
}
if (strcmp(tmp_key, "ring0_addr") != 0) {
continue;
}
nodecount++;
snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.quorum_votes", node_pos);
if (icmap_get_uint32(tmp_key, &node_votes) != CS_OK) {
node_votes = 1;
}
nodelist_expected_votes = nodelist_expected_votes + node_votes;
if (node_pos == our_pos) {
*votes = node_votes;
}
}
*expected_votes = nodelist_expected_votes;
*nodes = nodecount;
icmap_iter_finalize(iter);
LEAVE();
return 1;
}
static int votequorum_qdevice_is_configured(uint32_t *qdevice_votes)
{
char *qdevice_model = NULL;
ENTER();
if ((icmap_get_string("quorum.device.model", &qdevice_model) == CS_OK) &&
(strlen(qdevice_model))) {
free(qdevice_model);
if (icmap_get_uint32("quorum.device.votes", qdevice_votes) != CS_OK) {
*qdevice_votes = -1;
}
if (icmap_get_uint32("quorum.device.timeout", &qdevice_timeout) != CS_OK) {
qdevice_timeout = VOTEQUORUM_QDEVICE_DEFAULT_TIMEOUT;
}
update_qdevice_can_operate(1);
return 1;
}
LEAVE();
return 0;
}
#define VOTEQUORUM_READCONFIG_STARTUP 0
#define VOTEQUORUM_READCONFIG_RUNTIME 1
static char *votequorum_readconfig(int runtime)
{
uint32_t node_votes = 0, qdevice_votes = 0;
uint32_t node_expected_votes = 0, expected_votes = 0;
uint32_t node_count = 0;
int have_nodelist, have_qdevice;
char *error = NULL;
ENTER();
log_printf(LOGSYS_LEVEL_DEBUG, "Reading configuration (runtime: %d)", runtime);
/*
* gather basic data here
*/
icmap_get_uint32("quorum.expected_votes", &expected_votes);
have_nodelist = votequorum_read_nodelist_configuration(&node_votes, &node_count, &node_expected_votes);
have_qdevice = votequorum_qdevice_is_configured(&qdevice_votes);
icmap_get_uint8("quorum.two_node", &two_node);
/*
* do config verification and enablement
*/
if ((!have_nodelist) && (!expected_votes)) {
if (!runtime) {
error = (char *)"configuration error: nodelist or quorum.expected_votes must be configured!";
} else {
log_printf(LOGSYS_LEVEL_CRIT, "configuration error: nodelist or quorum.expected_votes must be configured!");
log_printf(LOGSYS_LEVEL_CRIT, "will continue with current runtime data");
}
goto out;
}
/*
* two_node and qdevice are not compatible in the same config.
* try to make an educated guess of what to do
*/
if ((two_node) && (have_qdevice)) {
if (!runtime) {
error = (char *)"configuration error: two_node and quorum device cannot be configured at the same time!";
goto out;
} else {
log_printf(LOGSYS_LEVEL_CRIT, "configuration error: two_node and quorum device cannot be configured at the same time!");
if (us->flags & NODE_FLAGS_QDEVICE_REGISTERED) {
log_printf(LOGSYS_LEVEL_CRIT, "quorum device is registered, disabling two_node");
two_node = 0;
} else {
log_printf(LOGSYS_LEVEL_CRIT, "quorum device is not registered, allowing two_node");
update_qdevice_can_operate(0);
}
}
}
/*
* Enable special features
*/
if (!runtime) {
if (two_node) {
wait_for_all = 1;
}
icmap_get_uint8("quorum.allow_downscale", &allow_downscale);
icmap_get_uint8("quorum.wait_for_all", &wait_for_all);
icmap_get_uint8("quorum.auto_tie_breaker", &auto_tie_breaker);
icmap_get_uint8("quorum.last_man_standing", &last_man_standing);
icmap_get_uint32("quorum.last_man_standing_window", &last_man_standing_window);
}
/*
* quorum device is not compatible with last_man_standing and auto_tie_breaker
* neither lms or atb can be set at runtime, so there is no need to check for
* runtime incompatibilities, but qdevice can be configured _after_ LMS and ATB have
* been enabled at startup.
*/
if ((have_qdevice) && (last_man_standing)) {
if (!runtime) {
error = (char *)"configuration error: quorum.device is not compatible with last_man_standing";
goto out;
} else {
log_printf(LOGSYS_LEVEL_CRIT, "configuration error: quorum.device is not compatible with last_man_standing");
log_printf(LOGSYS_LEVEL_CRIT, "disabling quorum device operations");
update_qdevice_can_operate(0);
}
}
if ((have_qdevice) && (auto_tie_breaker)) {
if (!runtime) {
error = (char *)"configuration error: quorum.device is not compatible with auto_tie_breaker";
goto out;
} else {
log_printf(LOGSYS_LEVEL_CRIT, "configuration error: quorum.device is not compatible with auto_tie_breaker");
log_printf(LOGSYS_LEVEL_CRIT, "disabling quorum device operations");
update_qdevice_can_operate(0);
}
}
if ((have_qdevice) && (wait_for_all)) {
if (!runtime) {
error = (char *)"configuration error: quorum.device is not compatible with wait_for_all";
goto out;
} else {
log_printf(LOGSYS_LEVEL_CRIT, "configuration error: quorum.device is not compatible with wait_for_all");
log_printf(LOGSYS_LEVEL_CRIT, "disabling quorum device operations");
update_qdevice_can_operate(0);
}
}
if ((have_qdevice) && (allow_downscale)) {
if (!runtime) {
error = (char *)"configuration error: quorum.device is not compatible with allow_downscale";
goto out;
} else {
log_printf(LOGSYS_LEVEL_CRIT, "configuration error: quorum.device is not compatible with allow_downscale");
log_printf(LOGSYS_LEVEL_CRIT, "disabling quorum device operations");
update_qdevice_can_operate(0);
}
}
/*
* if user specifies quorum.expected_votes + quorum.device but NOT the device.votes
* we don't know what the quorum device should vote.
*/
if ((expected_votes) && (have_qdevice) && (qdevice_votes == -1)) {
if (!runtime) {
error = (char *)"configuration error: quorum.device.votes must be specified when quorum.expected_votes is set";
goto out;
} else {
log_printf(LOGSYS_LEVEL_CRIT, "configuration error: quorum.device.votes must be specified when quorum.expected_votes is set");
log_printf(LOGSYS_LEVEL_CRIT, "disabling quorum device operations");
update_qdevice_can_operate(0);
}
}
/*
* if user specifies a node list with uneven votes and no device.votes
* we cannot autocalculate the votes
*/
if ((have_qdevice) &&
(qdevice_votes == -1) &&
(have_nodelist) &&
(node_count != node_expected_votes)) {
if (!runtime) {
error = (char *)"configuration error: quorum.device.votes must be specified when not all nodes votes 1";
goto out;
} else {
log_printf(LOGSYS_LEVEL_CRIT, "configuration error: quorum.device.votes must be specified when not all nodes votes 1");
log_printf(LOGSYS_LEVEL_CRIT, "disabling quorum device operations");
update_qdevice_can_operate(0);
}
}
/*
* validate quorum device votes vs expected_votes
*/
if ((qdevice_votes > 0) && (expected_votes)) {
int delta = expected_votes - qdevice_votes;
if (delta < 2) {
if (!runtime) {
error = (char *)"configuration error: quorum.device.votes is too high or expected_votes is too low";
goto out;
} else {
log_printf(LOGSYS_LEVEL_CRIT, "configuration error: quorum.device.votes is too high or expected_votes is too low");
log_printf(LOGSYS_LEVEL_CRIT, "disabling quorum device operations");
update_qdevice_can_operate(0);
}
}
}
/*
* automatically calculate device votes and adjust expected_votes from nodelist
*/
if ((have_qdevice) &&
(qdevice_votes == -1) &&
(!expected_votes) &&
(have_nodelist) &&
(node_count == node_expected_votes)) {
qdevice_votes = node_expected_votes - 1;
node_expected_votes = node_expected_votes + qdevice_votes;
}
/*
* set this node votes and expected_votes
*/
if (have_nodelist) {
us->votes = node_votes;
us->expected_votes = node_expected_votes;
} else {
us->votes = 1;
icmap_get_uint32("quorum.votes", &us->votes);
}
if (expected_votes) {
us->expected_votes = expected_votes;
}
/*
* set qdevice votes
*/
if (!have_qdevice) {
qdevice->votes = 0;
}
if (qdevice_votes != -1) {
qdevice->votes = qdevice_votes;
}
update_ev_barrier(us->expected_votes);
update_two_node();
if (wait_for_all) {
update_wait_for_all_status(1);
}
out:
LEAVE();
return error;
}
static void votequorum_refresh_config(
int32_t event,
const char *key_name,
struct icmap_notify_value new_val,
struct icmap_notify_value old_val,
void *user_data)
{
ENTER();
/*
* Reload the configuration
*/
votequorum_readconfig(VOTEQUORUM_READCONFIG_RUNTIME);
/*
* activate new config
*/
votequorum_exec_send_nodeinfo(us->node_id);
votequorum_exec_send_nodeinfo(VOTEQUORUM_QDEVICE_NODEID);
LEAVE();
}
static void votequorum_exec_add_config_notification(void)
{
icmap_track_t icmap_track_nodelist = NULL;
icmap_track_t icmap_track_quorum = NULL;
ENTER();
icmap_track_add("nodelist.",
ICMAP_TRACK_ADD | ICMAP_TRACK_DELETE | ICMAP_TRACK_MODIFY | ICMAP_TRACK_PREFIX,
votequorum_refresh_config,
NULL,
&icmap_track_nodelist);
icmap_track_add("quorum.",
ICMAP_TRACK_ADD | ICMAP_TRACK_DELETE | ICMAP_TRACK_MODIFY | ICMAP_TRACK_PREFIX,
votequorum_refresh_config,
NULL,
&icmap_track_quorum);
LEAVE();
}
/*
* votequorum_exec core
*/
static int votequorum_exec_send_reconfigure(uint8_t param, unsigned int nodeid, uint32_t value)
{
struct req_exec_quorum_reconfigure req_exec_quorum_reconfigure;
struct iovec iov[1];
int ret;
ENTER();
req_exec_quorum_reconfigure.nodeid = nodeid;
req_exec_quorum_reconfigure.value = value;
req_exec_quorum_reconfigure.param = param;
req_exec_quorum_reconfigure._pad0 = 0;
req_exec_quorum_reconfigure._pad1 = 0;
req_exec_quorum_reconfigure._pad2 = 0;
req_exec_quorum_reconfigure.header.id = SERVICE_ID_MAKE(VOTEQUORUM_SERVICE, MESSAGE_REQ_EXEC_VOTEQUORUM_RECONFIGURE);
req_exec_quorum_reconfigure.header.size = sizeof(req_exec_quorum_reconfigure);
iov[0].iov_base = (void *)&req_exec_quorum_reconfigure;
iov[0].iov_len = sizeof(req_exec_quorum_reconfigure);
ret = corosync_api->totem_mcast (iov, 1, TOTEM_AGREED);
LEAVE();
return ret;
}
static int votequorum_exec_send_nodeinfo(uint32_t nodeid)
{
struct req_exec_quorum_nodeinfo req_exec_quorum_nodeinfo;
struct iovec iov[1];
struct cluster_node *node;
int ret;
ENTER();
node = find_node_by_nodeid(nodeid);
if (!node) {
return -1;
}
req_exec_quorum_nodeinfo.nodeid = nodeid;
req_exec_quorum_nodeinfo.votes = node->votes;
req_exec_quorum_nodeinfo.expected_votes = node->expected_votes;
req_exec_quorum_nodeinfo.flags = node->flags;
if (nodeid != VOTEQUORUM_QDEVICE_NODEID) {
decode_flags(node->flags);
}
req_exec_quorum_nodeinfo.header.id = SERVICE_ID_MAKE(VOTEQUORUM_SERVICE, MESSAGE_REQ_EXEC_VOTEQUORUM_NODEINFO);
req_exec_quorum_nodeinfo.header.size = sizeof(req_exec_quorum_nodeinfo);
iov[0].iov_base = (void *)&req_exec_quorum_nodeinfo;
iov[0].iov_len = sizeof(req_exec_quorum_nodeinfo);
ret = corosync_api->totem_mcast (iov, 1, TOTEM_AGREED);
LEAVE();
return ret;
}
static int votequorum_exec_send_qdevice_reconfigure(const char *oldname, const char *newname)
{
struct req_exec_quorum_qdevice_reconfigure req_exec_quorum_qdevice_reconfigure;
struct iovec iov[1];
int ret;
ENTER();
req_exec_quorum_qdevice_reconfigure.header.id = SERVICE_ID_MAKE(VOTEQUORUM_SERVICE, MESSAGE_REQ_EXEC_VOTEQUORUM_QDEVICE_RECONFIGURE);
req_exec_quorum_qdevice_reconfigure.header.size = sizeof(req_exec_quorum_qdevice_reconfigure);
strcpy(req_exec_quorum_qdevice_reconfigure.oldname, oldname);
strcpy(req_exec_quorum_qdevice_reconfigure.newname, newname);
iov[0].iov_base = (void *)&req_exec_quorum_qdevice_reconfigure;
iov[0].iov_len = sizeof(req_exec_quorum_qdevice_reconfigure);
ret = corosync_api->totem_mcast (iov, 1, TOTEM_AGREED);
LEAVE();
return ret;
}
static int votequorum_exec_send_qdevice_reg(uint32_t operation, const char *qdevice_name_req)
{
struct req_exec_quorum_qdevice_reg req_exec_quorum_qdevice_reg;
struct iovec iov[1];
int ret;
ENTER();
req_exec_quorum_qdevice_reg.header.id = SERVICE_ID_MAKE(VOTEQUORUM_SERVICE, MESSAGE_REQ_EXEC_VOTEQUORUM_QDEVICE_REG);
req_exec_quorum_qdevice_reg.header.size = sizeof(req_exec_quorum_qdevice_reg);
req_exec_quorum_qdevice_reg.operation = operation;
strcpy(req_exec_quorum_qdevice_reg.qdevice_name, qdevice_name_req);
iov[0].iov_base = (void *)&req_exec_quorum_qdevice_reg;
iov[0].iov_len = sizeof(req_exec_quorum_qdevice_reg);
ret = corosync_api->totem_mcast (iov, 1, TOTEM_AGREED);
LEAVE();
return ret;
}
static int votequorum_exec_send_quorum_notification(void *conn, uint64_t context)
{
struct res_lib_votequorum_notification *res_lib_votequorum_notification;
struct list_head *tmp;
struct cluster_node *node;
int cluster_members = 0;
int i = 0;
int size;
char buf[sizeof(struct res_lib_votequorum_notification) + sizeof(struct votequorum_node) * (PROCESSOR_COUNT_MAX + 2)];
ENTER();
list_iterate(tmp, &cluster_members_list) {
node = list_entry(tmp, struct cluster_node, list);
cluster_members++;
}
if (us->flags & NODE_FLAGS_QDEVICE_REGISTERED) {
cluster_members++;
}
size = sizeof(struct res_lib_votequorum_notification) + sizeof(struct votequorum_node) * cluster_members;
res_lib_votequorum_notification = (struct res_lib_votequorum_notification *)&buf;
res_lib_votequorum_notification->quorate = cluster_is_quorate;
res_lib_votequorum_notification->node_list_entries = cluster_members;
res_lib_votequorum_notification->context = context;
list_iterate(tmp, &cluster_members_list) {
node = list_entry(tmp, struct cluster_node, list);
res_lib_votequorum_notification->node_list[i].nodeid = node->node_id;
res_lib_votequorum_notification->node_list[i++].state = node->state;
}
if (us->flags & NODE_FLAGS_QDEVICE_REGISTERED) {
res_lib_votequorum_notification->node_list[i].nodeid = VOTEQUORUM_QDEVICE_NODEID;
res_lib_votequorum_notification->node_list[i++].state = qdevice->state;
}
res_lib_votequorum_notification->header.id = MESSAGE_RES_VOTEQUORUM_NOTIFICATION;
res_lib_votequorum_notification->header.size = size;
res_lib_votequorum_notification->header.error = CS_OK;
/* Send it to all interested parties */
if (conn) {
int ret = corosync_api->ipc_dispatch_send(conn, &buf, size);
LEAVE();
return ret;
} else {
struct quorum_pd *qpd;
list_iterate(tmp, &trackers_list) {
qpd = list_entry(tmp, struct quorum_pd, list);
res_lib_votequorum_notification->context = qpd->tracking_context;
corosync_api->ipc_dispatch_send(qpd->conn, &buf, size);
}
}
LEAVE();
return 0;
}
static void votequorum_exec_send_expectedvotes_notification(void)
{
struct res_lib_votequorum_expectedvotes_notification res_lib_votequorum_expectedvotes_notification;
struct quorum_pd *qpd;
struct list_head *tmp;
ENTER();
log_printf(LOGSYS_LEVEL_DEBUG, "Sending expected votes callback");
res_lib_votequorum_expectedvotes_notification.header.id = MESSAGE_RES_VOTEQUORUM_EXPECTEDVOTES_NOTIFICATION;
res_lib_votequorum_expectedvotes_notification.header.size = sizeof(res_lib_votequorum_expectedvotes_notification);
res_lib_votequorum_expectedvotes_notification.header.error = CS_OK;
res_lib_votequorum_expectedvotes_notification.expected_votes = us->expected_votes;
list_iterate(tmp, &trackers_list) {
qpd = list_entry(tmp, struct quorum_pd, list);
res_lib_votequorum_expectedvotes_notification.context = qpd->tracking_context;
corosync_api->ipc_dispatch_send(qpd->conn, &res_lib_votequorum_expectedvotes_notification,
sizeof(struct res_lib_votequorum_expectedvotes_notification));
}
LEAVE();
}
static void exec_votequorum_qdevice_reconfigure_endian_convert (void *message)
{
ENTER();
LEAVE();
}
static void message_handler_req_exec_votequorum_qdevice_reconfigure (
const void *message,
unsigned int nodeid)
{
const struct req_exec_quorum_qdevice_reconfigure *req_exec_quorum_qdevice_reconfigure = message;
ENTER();
log_printf(LOGSYS_LEVEL_DEBUG, "Received qdevice name change req from node %u [from: %s to: %s]",
nodeid,
req_exec_quorum_qdevice_reconfigure->oldname,
req_exec_quorum_qdevice_reconfigure->newname);
if (!strcmp(req_exec_quorum_qdevice_reconfigure->oldname, qdevice_name)) {
log_printf(LOGSYS_LEVEL_DEBUG, "Allowing qdevice rename");
memset(qdevice_name, 0, VOTEQUORUM_QDEVICE_MAX_NAME_LEN);
strcpy(qdevice_name, req_exec_quorum_qdevice_reconfigure->newname);
/*
* TODO: notify qdevices about name change?
* this is not relevant for now and can wait later on since
* qdevices are local only and libvotequorum is not final
*/
}
LEAVE();
}
static void exec_votequorum_qdevice_reg_endian_convert (void *message)
{
struct req_exec_quorum_qdevice_reg *req_exec_quorum_qdevice_reg = message;
ENTER();
req_exec_quorum_qdevice_reg->operation = swab32(req_exec_quorum_qdevice_reg->operation);
LEAVE();
}
static void message_handler_req_exec_votequorum_qdevice_reg (
const void *message,
unsigned int nodeid)
{
const struct req_exec_quorum_qdevice_reg *req_exec_quorum_qdevice_reg = message;
struct res_lib_votequorum_status res_lib_votequorum_status;
int wipe_qdevice_name = 1;
struct cluster_node *node = NULL;
struct list_head *tmp;
cs_error_t error = CS_OK;
ENTER();
log_printf(LOGSYS_LEVEL_DEBUG, "Received qdevice op %u req from node %u [%s]",
req_exec_quorum_qdevice_reg->operation,
nodeid, req_exec_quorum_qdevice_reg->qdevice_name);
switch(req_exec_quorum_qdevice_reg->operation)
{
case VOTEQUORUM_QDEVICE_OPERATION_REGISTER:
if (nodeid != us->node_id) {
if (!strlen(qdevice_name)) {
log_printf(LOGSYS_LEVEL_DEBUG, "Remote qdevice name recorded");
strcpy(qdevice_name, req_exec_quorum_qdevice_reg->qdevice_name);
}
LEAVE();
return;
}
/*
* protect against the case where we broadcast qdevice registration
* to new memebers, we receive the message back, but there is no registration
* connection in progress
*/
if (us->flags & NODE_FLAGS_QDEVICE_REGISTERED) {
LEAVE();
return;
}
/*
* this should NEVER happen
*/
if (!qdevice_reg_conn) {
log_printf(LOGSYS_LEVEL_WARNING, "Unable to determine origin of the qdevice register call!");
LEAVE();
return;
}
/*
* registering our own device in this case
*/
if (!strlen(qdevice_name)) {
strcpy(qdevice_name, req_exec_quorum_qdevice_reg->qdevice_name);
}
/*
* check if it is our device or something else
*/
if ((!strncmp(req_exec_quorum_qdevice_reg->qdevice_name,
qdevice_name, VOTEQUORUM_QDEVICE_MAX_NAME_LEN))) {
us->flags |= NODE_FLAGS_QDEVICE_REGISTERED;
votequorum_exec_send_nodeinfo(VOTEQUORUM_QDEVICE_NODEID);
votequorum_exec_send_nodeinfo(us->node_id);
} else {
log_printf(LOGSYS_LEVEL_WARNING,
"A new qdevice with different name (new: %s old: %s) is trying to register!",
req_exec_quorum_qdevice_reg->qdevice_name, qdevice_name);
error = CS_ERR_EXIST;
}
res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status);
res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS;
res_lib_votequorum_status.header.error = error;
corosync_api->ipc_response_send(qdevice_reg_conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status));
qdevice_reg_conn = NULL;
break;
case VOTEQUORUM_QDEVICE_OPERATION_UNREGISTER:
list_iterate(tmp, &cluster_members_list) {
node = list_entry(tmp, struct cluster_node, list);
if ((node->state == NODESTATE_MEMBER) &&
(node->flags & NODE_FLAGS_QDEVICE_REGISTERED)) {
wipe_qdevice_name = 0;
}
}
if (wipe_qdevice_name) {
memset(qdevice_name, 0, VOTEQUORUM_QDEVICE_MAX_NAME_LEN);
}
break;
}
LEAVE();
}
static void exec_votequorum_nodeinfo_endian_convert (void *message)
{
struct req_exec_quorum_nodeinfo *nodeinfo = message;
ENTER();
nodeinfo->nodeid = swab32(nodeinfo->nodeid);
nodeinfo->votes = swab32(nodeinfo->votes);
nodeinfo->expected_votes = swab32(nodeinfo->expected_votes);
nodeinfo->flags = swab32(nodeinfo->flags);
LEAVE();
}
static void message_handler_req_exec_votequorum_nodeinfo (
const void *message,
unsigned int sender_nodeid)
{
const struct req_exec_quorum_nodeinfo *req_exec_quorum_nodeinfo = message;
struct cluster_node *node = NULL;
int old_votes;
int old_expected;
uint32_t old_flags;
nodestate_t old_state;
int new_node = 0;
int allow_downgrade = 0;
int by_node = 0;
unsigned int nodeid = req_exec_quorum_nodeinfo->nodeid;
ENTER();
log_printf(LOGSYS_LEVEL_DEBUG, "got nodeinfo message from cluster node %u", sender_nodeid);
log_printf(LOGSYS_LEVEL_DEBUG, "nodeinfo message[%u]: votes: %d, expected: %d flags: %d",
nodeid,
req_exec_quorum_nodeinfo->votes,
req_exec_quorum_nodeinfo->expected_votes,
req_exec_quorum_nodeinfo->flags);
if (nodeid != VOTEQUORUM_QDEVICE_NODEID) {
decode_flags(req_exec_quorum_nodeinfo->flags);
}
node = find_node_by_nodeid(nodeid);
if (!node) {
node = allocate_node(nodeid);
new_node = 1;
}
if (!node) {
corosync_api->error_memory_failure();
LEAVE();
return;
}
if (new_node) {
old_votes = 0;
old_expected = 0;
old_state = NODESTATE_DEAD;
old_flags = 0;
} else {
old_votes = node->votes;
old_expected = node->expected_votes;
old_state = node->state;
old_flags = node->flags;
}
if (nodeid == VOTEQUORUM_QDEVICE_NODEID) {
struct cluster_node *sender_node = find_node_by_nodeid(sender_nodeid);
if ((!cluster_is_quorate) &&
(sender_node->flags & NODE_FLAGS_QUORATE)) {
node->votes = req_exec_quorum_nodeinfo->votes;
} else {
node->votes = max(node->votes, req_exec_quorum_nodeinfo->votes);
}
goto recalculate;
}
/* Update node state */
node->flags = req_exec_quorum_nodeinfo->flags;
node->votes = req_exec_quorum_nodeinfo->votes;
node->state = NODESTATE_MEMBER;
if (node->flags & NODE_FLAGS_LEAVING) {
node->state = NODESTATE_LEAVING;
allow_downgrade = 1;
by_node = 1;
}
if ((!cluster_is_quorate) &&
(node->flags & NODE_FLAGS_QUORATE)) {
allow_downgrade = 1;
us->expected_votes = req_exec_quorum_nodeinfo->expected_votes;
}
if (node->flags & NODE_FLAGS_QUORATE) {
node->expected_votes = req_exec_quorum_nodeinfo->expected_votes;
} else {
node->expected_votes = us->expected_votes;
}
if ((last_man_standing) && (node->votes > 1)) {
log_printf(LOGSYS_LEVEL_WARNING, "Last Man Standing feature is supported only when all"
"cluster nodes votes are set to 1. Disabling LMS.");
last_man_standing = 0;
if (last_man_standing_timer_set) {
corosync_api->timer_delete(last_man_standing_timer);
last_man_standing_timer_set = 0;
}
}
recalculate:
if ((new_node) ||
(nodeid == us->node_id) ||
(node->flags & NODE_FLAGS_FIRST) ||
(old_votes != node->votes) ||
(old_expected != node->expected_votes) ||
(old_flags != node->flags) ||
(old_state != node->state)) {
recalculate_quorum(allow_downgrade, by_node);
}
if ((wait_for_all) &&
(!(node->flags & NODE_FLAGS_WFASTATUS)) &&
(node->flags & NODE_FLAGS_QUORATE)) {
update_wait_for_all_status(0);
}
LEAVE();
}
static void exec_votequorum_reconfigure_endian_convert (void *message)
{
struct req_exec_quorum_reconfigure *reconfigure = message;
ENTER();
reconfigure->nodeid = swab32(reconfigure->nodeid);
reconfigure->value = swab32(reconfigure->value);
LEAVE();
}
static void message_handler_req_exec_votequorum_reconfigure (
const void *message,
unsigned int nodeid)
{
const struct req_exec_quorum_reconfigure *req_exec_quorum_reconfigure = message;
struct cluster_node *node;
struct list_head *nodelist;
ENTER();
log_printf(LOGSYS_LEVEL_DEBUG, "got reconfigure message from cluster node %u for %u",
nodeid, req_exec_quorum_reconfigure->nodeid);
switch(req_exec_quorum_reconfigure->param)
{
case VOTEQUORUM_RECONFIG_PARAM_EXPECTED_VOTES:
list_iterate(nodelist, &cluster_members_list) {
node = list_entry(nodelist, struct cluster_node, list);
if (node->state == NODESTATE_MEMBER) {
node->expected_votes = req_exec_quorum_reconfigure->value;
}
}
votequorum_exec_send_expectedvotes_notification();
update_ev_barrier(req_exec_quorum_reconfigure->value);
recalculate_quorum(1, 0); /* Allow decrease */
break;
case VOTEQUORUM_RECONFIG_PARAM_NODE_VOTES:
node = find_node_by_nodeid(req_exec_quorum_reconfigure->nodeid);
if (!node) {
LEAVE();
return;
}
node->votes = req_exec_quorum_reconfigure->value;
recalculate_quorum(1, 0); /* Allow decrease */
break;
}
LEAVE();
}
static int votequorum_exec_exit_fn (void)
{
int ret = 0;
ENTER();
/*
* tell the other nodes we are leaving
*/
if (allow_downscale) {
us->flags |= NODE_FLAGS_LEAVING;
ret = votequorum_exec_send_nodeinfo(us->node_id);
}
LEAVE();
return ret;
}
static char *votequorum_exec_init_fn (struct corosync_api_v1 *api)
{
char *error = NULL;
-#ifdef COROSYNC_SOLARIS
- logsys_subsys_init();
-#endif
-
ENTER();
/*
* make sure we start clean
*/
list_init(&cluster_members_list);
list_init(&trackers_list);
qdevice = NULL;
us = NULL;
memset(cluster_nodes, 0, sizeof(cluster_nodes));
/*
* Allocate a cluster_node for qdevice
*/
qdevice = allocate_node(VOTEQUORUM_QDEVICE_NODEID);
if (!qdevice) {
LEAVE();
return ((char *)"Could not allocate node.");
}
qdevice->votes = 0;
memset(qdevice_name, 0, VOTEQUORUM_QDEVICE_MAX_NAME_LEN);
/*
* Allocate a cluster_node for us
*/
us = allocate_node(corosync_api->totem_nodeid_get());
if (!us) {
LEAVE();
return ((char *)"Could not allocate node.");
}
icmap_set_uint32("runtime.votequorum.this_node_id", us->node_id);
us->state = NODESTATE_MEMBER;
us->votes = 1;
us->flags |= NODE_FLAGS_FIRST;
error = votequorum_readconfig(VOTEQUORUM_READCONFIG_STARTUP);
if (error) {
return error;
}
recalculate_quorum(0, 0);
/*
* Listen for changes
*/
votequorum_exec_add_config_notification();
/*
* Start us off with one node
*/
votequorum_exec_send_nodeinfo(us->node_id);
LEAVE();
return (NULL);
}
/*
* votequorum service core
*/
static void votequorum_last_man_standing_timer_fn(void *arg)
{
ENTER();
last_man_standing_timer_set = 0;
if (cluster_is_quorate) {
recalculate_quorum(1,1);
}
LEAVE();
}
static void votequorum_confchg_fn (
enum totem_configuration_type configuration_type,
const unsigned int *member_list, size_t member_list_entries,
const unsigned int *left_list, size_t left_list_entries,
const unsigned int *joined_list, size_t joined_list_entries,
const struct memb_ring_id *ring_id)
{
int i;
struct cluster_node *node;
ENTER();
if (member_list_entries > 1) {
us->flags &= ~NODE_FLAGS_FIRST;
}
if (left_list_entries) {
for (i = 0; i< left_list_entries; i++) {
node = find_node_by_nodeid(left_list[i]);
if (node) {
node->state = NODESTATE_DEAD;
}
}
}
if (last_man_standing) {
if (((member_list_entries >= quorum) && (left_list_entries)) ||
((member_list_entries <= quorum) && (auto_tie_breaker) && (check_low_node_id_partition() == 1))) {
if (last_man_standing_timer_set) {
corosync_api->timer_delete(last_man_standing_timer);
last_man_standing_timer_set = 0;
}
corosync_api->timer_add_duration((unsigned long long)last_man_standing_window*1000000,
NULL, votequorum_last_man_standing_timer_fn,
&last_man_standing_timer);
last_man_standing_timer_set = 1;
}
}
if (member_list_entries) {
memcpy(quorum_members, member_list, sizeof(unsigned int) * member_list_entries);
quorum_members_entries = member_list_entries;
votequorum_exec_send_nodeinfo(us->node_id);
votequorum_exec_send_nodeinfo(VOTEQUORUM_QDEVICE_NODEID);
if (strlen(qdevice_name)) {
votequorum_exec_send_qdevice_reg(VOTEQUORUM_QDEVICE_OPERATION_REGISTER,
qdevice_name);
}
}
memcpy(&quorum_ringid, ring_id, sizeof(*ring_id));
if (left_list_entries) {
recalculate_quorum(0, 0);
}
if (configuration_type == TOTEM_CONFIGURATION_REGULAR) {
quorum_callback(quorum_members, quorum_members_entries,
cluster_is_quorate, &quorum_ringid);
}
LEAVE();
}
char *votequorum_init(struct corosync_api_v1 *api,
quorum_set_quorate_fn_t q_set_quorate_fn)
{
char *error;
ENTER();
if (q_set_quorate_fn == NULL) {
return ((char *)"Quorate function not set");
}
corosync_api = api;
quorum_callback = q_set_quorate_fn;
error = corosync_service_link_and_init(corosync_api,
&votequorum_service[0]);
if (error) {
return (error);
}
LEAVE();
return (NULL);
}
/*
* Library Handler init/fini
*/
static int quorum_lib_init_fn (void *conn)
{
struct quorum_pd *pd = (struct quorum_pd *)corosync_api->ipc_private_data_get (conn);
ENTER();
list_init (&pd->list);
pd->conn = conn;
LEAVE();
return (0);
}
static int quorum_lib_exit_fn (void *conn)
{
struct quorum_pd *quorum_pd = (struct quorum_pd *)corosync_api->ipc_private_data_get (conn);
ENTER();
if (quorum_pd->tracking_enabled) {
list_del (&quorum_pd->list);
list_init (&quorum_pd->list);
}
LEAVE();
return (0);
}
/*
* library internal functions
*/
static void qdevice_timer_fn(void *arg)
{
ENTER();
if ((!(us->flags & NODE_FLAGS_QDEVICE_ALIVE)) ||
(!qdevice_timer_set)) {
LEAVE();
return;
}
us->flags &= ~NODE_FLAGS_QDEVICE_ALIVE;
us->flags &= ~NODE_FLAGS_QDEVICE_CAST_VOTE;
log_printf(LOGSYS_LEVEL_INFO, "lost contact with quorum device %s", qdevice_name);
votequorum_exec_send_nodeinfo(us->node_id);
qdevice_timer_set = 0;
LEAVE();
}
/*
* Library Handler Functions
*/
static void message_handler_req_lib_votequorum_getinfo (void *conn, const void *message)
{
const struct req_lib_votequorum_getinfo *req_lib_votequorum_getinfo = message;
struct res_lib_votequorum_getinfo res_lib_votequorum_getinfo;
struct cluster_node *node;
unsigned int highest_expected = 0;
unsigned int total_votes = 0;
cs_error_t error = CS_OK;
uint32_t nodeid = req_lib_votequorum_getinfo->nodeid;
ENTER();
log_printf(LOGSYS_LEVEL_DEBUG, "got getinfo request on %p for node %u", conn, req_lib_votequorum_getinfo->nodeid);
if (nodeid == VOTEQUORUM_QDEVICE_NODEID) {
nodeid = us->node_id;
}
node = find_node_by_nodeid(nodeid);
if (node) {
struct cluster_node *iternode;
struct list_head *nodelist;
list_iterate(nodelist, &cluster_members_list) {
iternode = list_entry(nodelist, struct cluster_node, list);
if (iternode->state == NODESTATE_MEMBER) {
highest_expected =
max(highest_expected, iternode->expected_votes);
total_votes += iternode->votes;
}
}
if (node->flags & NODE_FLAGS_QDEVICE_CAST_VOTE) {
total_votes += qdevice->votes;
}
switch(node->state) {
case NODESTATE_MEMBER:
res_lib_votequorum_getinfo.state = VOTEQUORUM_NODESTATE_MEMBER;
break;
case NODESTATE_DEAD:
res_lib_votequorum_getinfo.state = VOTEQUORUM_NODESTATE_DEAD;
break;
case NODESTATE_LEAVING:
res_lib_votequorum_getinfo.state = VOTEQUORUM_NODESTATE_LEAVING;
break;
default:
res_lib_votequorum_getinfo.state = node->state;
break;
}
res_lib_votequorum_getinfo.state = node->state;
res_lib_votequorum_getinfo.votes = node->votes;
res_lib_votequorum_getinfo.expected_votes = node->expected_votes;
res_lib_votequorum_getinfo.highest_expected = highest_expected;
res_lib_votequorum_getinfo.quorum = quorum;
res_lib_votequorum_getinfo.total_votes = total_votes;
res_lib_votequorum_getinfo.flags = 0;
res_lib_votequorum_getinfo.nodeid = node->node_id;
if (two_node) {
res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_TWONODE;
}
if (cluster_is_quorate) {
res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_QUORATE;
}
if (wait_for_all) {
res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_WAIT_FOR_ALL;
}
if (last_man_standing) {
res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_LAST_MAN_STANDING;
}
if (auto_tie_breaker) {
res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_AUTO_TIE_BREAKER;
}
if (allow_downscale) {
res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_ALLOW_DOWNSCALE;
}
memset(res_lib_votequorum_getinfo.qdevice_name, 0, VOTEQUORUM_QDEVICE_MAX_NAME_LEN);
strcpy(res_lib_votequorum_getinfo.qdevice_name, qdevice_name);
res_lib_votequorum_getinfo.qdevice_votes = qdevice->votes;
if (node->flags & NODE_FLAGS_QDEVICE_REGISTERED) {
res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_QDEVICE_REGISTERED;
}
if (node->flags & NODE_FLAGS_QDEVICE_ALIVE) {
res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_QDEVICE_ALIVE;
}
if (node->flags & NODE_FLAGS_QDEVICE_CAST_VOTE) {
res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_QDEVICE_CAST_VOTE;
}
if (node->flags & NODE_FLAGS_QDEVICE_MASTER_WINS) {
res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_QDEVICE_MASTER_WINS;
}
} else {
error = CS_ERR_NOT_EXIST;
}
res_lib_votequorum_getinfo.header.size = sizeof(res_lib_votequorum_getinfo);
res_lib_votequorum_getinfo.header.id = MESSAGE_RES_VOTEQUORUM_GETINFO;
res_lib_votequorum_getinfo.header.error = error;
corosync_api->ipc_response_send(conn, &res_lib_votequorum_getinfo, sizeof(res_lib_votequorum_getinfo));
log_printf(LOGSYS_LEVEL_DEBUG, "getinfo response error: %d", error);
LEAVE();
}
static void message_handler_req_lib_votequorum_setexpected (void *conn, const void *message)
{
const struct req_lib_votequorum_setexpected *req_lib_votequorum_setexpected = message;
struct res_lib_votequorum_status res_lib_votequorum_status;
cs_error_t error = CS_OK;
unsigned int newquorum;
unsigned int total_votes;
uint8_t allow_downscale_status = 0;
ENTER();
allow_downscale_status = allow_downscale;
allow_downscale = 0;
/*
* Validate new expected votes
*/
newquorum = calculate_quorum(1, req_lib_votequorum_setexpected->expected_votes, &total_votes);
allow_downscale = allow_downscale_status;
if (newquorum < total_votes / 2 ||
newquorum > total_votes) {
error = CS_ERR_INVALID_PARAM;
goto error_exit;
}
votequorum_exec_send_reconfigure(VOTEQUORUM_RECONFIG_PARAM_EXPECTED_VOTES, us->node_id,
req_lib_votequorum_setexpected->expected_votes);
error_exit:
res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status);
res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS;
res_lib_votequorum_status.header.error = error;
corosync_api->ipc_response_send(conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status));
LEAVE();
}
static void message_handler_req_lib_votequorum_setvotes (void *conn, const void *message)
{
const struct req_lib_votequorum_setvotes *req_lib_votequorum_setvotes = message;
struct res_lib_votequorum_status res_lib_votequorum_status;
struct cluster_node *node;
unsigned int newquorum;
unsigned int total_votes;
unsigned int saved_votes;
cs_error_t error = CS_OK;
unsigned int nodeid;
ENTER();
nodeid = req_lib_votequorum_setvotes->nodeid;
node = find_node_by_nodeid(nodeid);
if (!node) {
error = CS_ERR_NAME_NOT_FOUND;
goto error_exit;
}
/*
* Check votes is valid
*/
saved_votes = node->votes;
node->votes = req_lib_votequorum_setvotes->votes;
newquorum = calculate_quorum(1, 0, &total_votes);
if (newquorum < total_votes / 2 ||
newquorum > total_votes) {
node->votes = saved_votes;
error = CS_ERR_INVALID_PARAM;
goto error_exit;
}
votequorum_exec_send_reconfigure(VOTEQUORUM_RECONFIG_PARAM_NODE_VOTES, nodeid,
req_lib_votequorum_setvotes->votes);
error_exit:
res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status);
res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS;
res_lib_votequorum_status.header.error = error;
corosync_api->ipc_response_send(conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status));
LEAVE();
}
static void message_handler_req_lib_votequorum_trackstart (void *conn,
const void *message)
{
const struct req_lib_votequorum_trackstart *req_lib_votequorum_trackstart = message;
struct res_lib_votequorum_status res_lib_votequorum_status;
struct quorum_pd *quorum_pd = (struct quorum_pd *)corosync_api->ipc_private_data_get (conn);
ENTER();
/*
* If an immediate listing of the current cluster membership
* is requested, generate membership list
*/
if (req_lib_votequorum_trackstart->track_flags & CS_TRACK_CURRENT ||
req_lib_votequorum_trackstart->track_flags & CS_TRACK_CHANGES) {
log_printf(LOGSYS_LEVEL_DEBUG, "sending initial status to %p", conn);
votequorum_exec_send_quorum_notification(conn, req_lib_votequorum_trackstart->context);
}
/*
* Record requests for tracking
*/
if (req_lib_votequorum_trackstart->track_flags & CS_TRACK_CHANGES ||
req_lib_votequorum_trackstart->track_flags & CS_TRACK_CHANGES_ONLY) {
quorum_pd->track_flags = req_lib_votequorum_trackstart->track_flags;
quorum_pd->tracking_enabled = 1;
quorum_pd->tracking_context = req_lib_votequorum_trackstart->context;
list_add (&quorum_pd->list, &trackers_list);
}
res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status);
res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS;
res_lib_votequorum_status.header.error = CS_OK;
corosync_api->ipc_response_send(conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status));
LEAVE();
}
static void message_handler_req_lib_votequorum_trackstop (void *conn,
const void *message)
{
struct res_lib_votequorum_status res_lib_votequorum_status;
struct quorum_pd *quorum_pd = (struct quorum_pd *)corosync_api->ipc_private_data_get (conn);
int error = CS_OK;
ENTER();
if (quorum_pd->tracking_enabled) {
error = CS_OK;
quorum_pd->tracking_enabled = 0;
list_del (&quorum_pd->list);
list_init (&quorum_pd->list);
} else {
error = CS_ERR_NOT_EXIST;
}
res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status);
res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS;
res_lib_votequorum_status.header.error = error;
corosync_api->ipc_response_send(conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status));
LEAVE();
}
static void message_handler_req_lib_votequorum_qdevice_register (void *conn,
const void *message)
{
const struct req_lib_votequorum_qdevice_register *req_lib_votequorum_qdevice_register = message;
struct res_lib_votequorum_status res_lib_votequorum_status;
cs_error_t error = CS_OK;
ENTER();
if (!qdevice_can_operate) {
log_printf(LOGSYS_LEVEL_INFO, "Registration of quorum device is disabled by incorrect corosync.conf. See logs for more information");
error = CS_ERR_ACCESS;
goto out;
}
if (us->flags & NODE_FLAGS_QDEVICE_REGISTERED) {
if ((!strncmp(req_lib_votequorum_qdevice_register->name,
qdevice_name, VOTEQUORUM_QDEVICE_MAX_NAME_LEN))) {
goto out;
} else {
log_printf(LOGSYS_LEVEL_WARNING,
"A new qdevice with different name (new: %s old: %s) is trying to re-register!",
req_lib_votequorum_qdevice_register->name, qdevice_name);
error = CS_ERR_EXIST;
goto out;
}
} else {
if (qdevice_reg_conn != NULL) {
log_printf(LOGSYS_LEVEL_WARNING,
"Registration request already in progress");
error = CS_ERR_TRY_AGAIN;
goto out;
}
qdevice_reg_conn = conn;
if (votequorum_exec_send_qdevice_reg(VOTEQUORUM_QDEVICE_OPERATION_REGISTER,
req_lib_votequorum_qdevice_register->name) != 0) {
log_printf(LOGSYS_LEVEL_WARNING,
"Unable to send qdevice registration request to cluster");
error = CS_ERR_TRY_AGAIN;
qdevice_reg_conn = NULL;
} else {
LEAVE();
return;
}
}
out:
res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status);
res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS;
res_lib_votequorum_status.header.error = error;
corosync_api->ipc_response_send(conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status));
LEAVE();
}
static void message_handler_req_lib_votequorum_qdevice_unregister (void *conn,
const void *message)
{
const struct req_lib_votequorum_qdevice_unregister *req_lib_votequorum_qdevice_unregister = message;
struct res_lib_votequorum_status res_lib_votequorum_status;
cs_error_t error = CS_OK;
ENTER();
if (us->flags & NODE_FLAGS_QDEVICE_REGISTERED) {
if (strncmp(req_lib_votequorum_qdevice_unregister->name, qdevice_name, VOTEQUORUM_QDEVICE_MAX_NAME_LEN)) {
error = CS_ERR_INVALID_PARAM;
goto out;
}
if (qdevice_timer_set) {
corosync_api->timer_delete(qdevice_timer);
qdevice_timer_set = 0;
}
us->flags &= ~NODE_FLAGS_QDEVICE_REGISTERED;
us->flags &= ~NODE_FLAGS_QDEVICE_ALIVE;
us->flags &= ~NODE_FLAGS_QDEVICE_CAST_VOTE;
us->flags &= ~NODE_FLAGS_QDEVICE_MASTER_WINS;
votequorum_exec_send_nodeinfo(us->node_id);
votequorum_exec_send_qdevice_reg(VOTEQUORUM_QDEVICE_OPERATION_UNREGISTER,
req_lib_votequorum_qdevice_unregister->name);
} else {
error = CS_ERR_NOT_EXIST;
}
out:
res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status);
res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS;
res_lib_votequorum_status.header.error = error;
corosync_api->ipc_response_send(conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status));
LEAVE();
}
static void message_handler_req_lib_votequorum_qdevice_update (void *conn,
const void *message)
{
const struct req_lib_votequorum_qdevice_update *req_lib_votequorum_qdevice_update = message;
struct res_lib_votequorum_status res_lib_votequorum_status;
cs_error_t error = CS_OK;
ENTER();
if (us->flags & NODE_FLAGS_QDEVICE_REGISTERED) {
if (strncmp(req_lib_votequorum_qdevice_update->oldname, qdevice_name, VOTEQUORUM_QDEVICE_MAX_NAME_LEN)) {
error = CS_ERR_INVALID_PARAM;
goto out;
}
votequorum_exec_send_qdevice_reconfigure(req_lib_votequorum_qdevice_update->oldname,
req_lib_votequorum_qdevice_update->newname);
} else {
error = CS_ERR_NOT_EXIST;
}
out:
res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status);
res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS;
res_lib_votequorum_status.header.error = error;
corosync_api->ipc_response_send(conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status));
LEAVE();
}
static void message_handler_req_lib_votequorum_qdevice_poll (void *conn,
const void *message)
{
const struct req_lib_votequorum_qdevice_poll *req_lib_votequorum_qdevice_poll = message;
struct res_lib_votequorum_status res_lib_votequorum_status;
cs_error_t error = CS_OK;
uint32_t oldflags;
ENTER();
if (!qdevice_can_operate) {
error = CS_ERR_ACCESS;
goto out;
}
if (us->flags & NODE_FLAGS_QDEVICE_REGISTERED) {
if (strncmp(req_lib_votequorum_qdevice_poll->name, qdevice_name, VOTEQUORUM_QDEVICE_MAX_NAME_LEN)) {
error = CS_ERR_INVALID_PARAM;
goto out;
}
if (qdevice_timer_set) {
corosync_api->timer_delete(qdevice_timer);
qdevice_timer_set = 0;
}
oldflags = us->flags;
us->flags |= NODE_FLAGS_QDEVICE_ALIVE;
if (req_lib_votequorum_qdevice_poll->cast_vote) {
us->flags |= NODE_FLAGS_QDEVICE_CAST_VOTE;
} else {
us->flags &= ~NODE_FLAGS_QDEVICE_CAST_VOTE;
}
if (us->flags != oldflags) {
votequorum_exec_send_nodeinfo(us->node_id);
}
corosync_api->timer_add_duration((unsigned long long)qdevice_timeout*1000000, qdevice,
qdevice_timer_fn, &qdevice_timer);
qdevice_timer_set = 1;
} else {
error = CS_ERR_NOT_EXIST;
}
out:
res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status);
res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS;
res_lib_votequorum_status.header.error = error;
corosync_api->ipc_response_send(conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status));
LEAVE();
}
static void message_handler_req_lib_votequorum_qdevice_master_wins (void *conn,
const void *message)
{
const struct req_lib_votequorum_qdevice_master_wins *req_lib_votequorum_qdevice_master_wins = message;
struct res_lib_votequorum_status res_lib_votequorum_status;
cs_error_t error = CS_OK;
uint32_t oldflags = us->flags;
ENTER();
if (!qdevice_can_operate) {
error = CS_ERR_ACCESS;
goto out;
}
if (us->flags & NODE_FLAGS_QDEVICE_REGISTERED) {
if (strncmp(req_lib_votequorum_qdevice_master_wins->name, qdevice_name, VOTEQUORUM_QDEVICE_MAX_NAME_LEN)) {
error = CS_ERR_INVALID_PARAM;
goto out;
}
if (req_lib_votequorum_qdevice_master_wins->allow) {
us->flags |= NODE_FLAGS_QDEVICE_MASTER_WINS;
} else {
us->flags &= ~NODE_FLAGS_QDEVICE_MASTER_WINS;
}
if (us->flags != oldflags) {
votequorum_exec_send_nodeinfo(us->node_id);
}
update_qdevice_master_wins(req_lib_votequorum_qdevice_master_wins->allow);
} else {
error = CS_ERR_NOT_EXIST;
}
out:
res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status);
res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS;
res_lib_votequorum_status.header.error = error;
corosync_api->ipc_response_send(conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status));
LEAVE();
}
diff --git a/exec/vsf_quorum.c b/exec/vsf_quorum.c
index 9cab2d22..7a20a45d 100644
--- a/exec/vsf_quorum.c
+++ b/exec/vsf_quorum.c
@@ -1,483 +1,480 @@
/*
* Copyright (c) 2008-2012 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Christine Caulfield (ccaulfie@redhat.com)
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of Red Hat Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <config.h>
#include <pwd.h>
#include <grp.h>
#include <sys/types.h>
#include <sys/poll.h>
#include <sys/uio.h>
#include <sys/mman.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <unistd.h>
#include <fcntl.h>
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
#include <sched.h>
#include <time.h>
#include "quorum.h"
#include <corosync/corotypes.h>
#include <qb/qbipc_common.h>
#include <corosync/corodefs.h>
#include <corosync/swab.h>
#include <corosync/list.h>
#include <corosync/mar_gen.h>
#include <corosync/ipc_quorum.h>
#include <corosync/mar_gen.h>
#include <corosync/coroapi.h>
#include <corosync/logsys.h>
#include <corosync/icmap.h>
#include "service.h"
#include "votequorum.h"
#include "vsf_ykd.h"
LOGSYS_DECLARE_SUBSYS ("QUORUM");
struct quorum_pd {
unsigned char track_flags;
int tracking_enabled;
struct list_head list;
void *conn;
};
struct internal_callback_pd {
struct list_head list;
quorum_callback_fn_t callback;
void *context;
};
static void message_handler_req_lib_quorum_getquorate (void *conn,
const void *msg);
static void message_handler_req_lib_quorum_trackstart (void *conn,
const void *msg);
static void message_handler_req_lib_quorum_trackstop (void *conn,
const void *msg);
static void message_handler_req_lib_quorum_gettype (void *conn,
const void *msg);
static void send_library_notification(void *conn);
static void send_internal_notification(void);
static char *quorum_exec_init_fn (struct corosync_api_v1 *api);
static int quorum_lib_init_fn (void *conn);
static int quorum_lib_exit_fn (void *conn);
static int primary_designated = 0;
static int quorum_type = 0;
static struct corosync_api_v1 *corosync_api;
static struct list_head lib_trackers_list;
static struct list_head internal_trackers_list;
static struct memb_ring_id quorum_ring_id;
static size_t quorum_view_list_entries = 0;
static int quorum_view_list[PROCESSOR_COUNT_MAX];
struct quorum_services_api_ver1 *quorum_iface = NULL;
static char view_buf[64];
static void log_view_list(const unsigned int *view_list, size_t view_list_entries)
{
int total = (int)view_list_entries;
int len, pos, ret;
int i = 0;
while (1) {
len = sizeof(view_buf);
pos = 0;
memset(view_buf, 0, len);
for (; i < total; i++) {
ret = snprintf(view_buf + pos, len - pos, " %d", view_list[i]);
if (ret >= len - pos)
break;
pos += ret;
}
log_printf (LOGSYS_LEVEL_NOTICE, "Members[%d]:%s%s",
total, view_buf, i < total ? "\\" : "");
if (i == total)
break;
}
}
/* Internal quorum API function */
static void quorum_api_set_quorum(const unsigned int *view_list,
size_t view_list_entries,
int quorum, struct memb_ring_id *ring_id)
{
int old_quorum = primary_designated;
primary_designated = quorum;
if (primary_designated && !old_quorum) {
log_printf (LOGSYS_LEVEL_NOTICE, "This node is within the primary component and will provide service.");
} else if (!primary_designated && old_quorum) {
log_printf (LOGSYS_LEVEL_NOTICE, "This node is within the non-primary component and will NOT provide any services.");
}
quorum_view_list_entries = view_list_entries;
memcpy(&quorum_ring_id, ring_id, sizeof (quorum_ring_id));
memcpy(quorum_view_list, view_list, sizeof(unsigned int)*view_list_entries);
log_view_list(view_list, view_list_entries);
/* Tell internal listeners */
send_internal_notification();
/* Tell IPC listeners */
send_library_notification(NULL);
}
static struct corosync_lib_handler quorum_lib_service[] =
{
{ /* 0 */
.lib_handler_fn = message_handler_req_lib_quorum_getquorate,
.flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED
},
{ /* 1 */
.lib_handler_fn = message_handler_req_lib_quorum_trackstart,
.flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED
},
{ /* 2 */
.lib_handler_fn = message_handler_req_lib_quorum_trackstop,
.flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED
},
{ /* 3 */
.lib_handler_fn = message_handler_req_lib_quorum_gettype,
.flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED
}
};
static struct corosync_service_engine quorum_service_handler = {
.name = "corosync cluster quorum service v0.1",
.id = QUORUM_SERVICE,
.priority = 1,
.private_data_size = sizeof (struct quorum_pd),
.flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED,
.allow_inquorate = CS_LIB_ALLOW_INQUORATE,
.lib_init_fn = quorum_lib_init_fn,
.lib_exit_fn = quorum_lib_exit_fn,
.lib_engine = quorum_lib_service,
.exec_init_fn = quorum_exec_init_fn,
.lib_engine_count = sizeof (quorum_lib_service) / sizeof (struct corosync_lib_handler)
};
struct corosync_service_engine *vsf_quorum_get_service_engine_ver0 (void)
{
return (&quorum_service_handler);
}
/* -------------------------------------------------- */
/*
* Internal API functions for corosync
*/
static int quorum_quorate(void)
{
return primary_designated;
}
static int quorum_register_callback(quorum_callback_fn_t function, void *context)
{
struct internal_callback_pd *pd = malloc(sizeof(struct internal_callback_pd));
if (!pd)
return -1;
pd->context = context;
pd->callback = function;
list_add (&pd->list, &internal_trackers_list);
return 0;
}
static int quorum_unregister_callback(quorum_callback_fn_t function, void *context)
{
struct internal_callback_pd *pd;
struct list_head *tmp;
for (tmp = internal_trackers_list.next; tmp != &internal_trackers_list; tmp = tmp->next) {
pd = list_entry(tmp, struct internal_callback_pd, list);
if (pd->callback == function && pd->context == context) {
list_del(&pd->list);
return 0;
}
}
return -1;
}
static struct quorum_callin_functions callins = {
.quorate = quorum_quorate,
.register_callback = quorum_register_callback,
.unregister_callback = quorum_unregister_callback
};
/* --------------------------------------------------------------------- */
static char *quorum_exec_init_fn (struct corosync_api_v1 *api)
{
char *quorum_module = NULL;
char *error;
-#ifdef COROSYNC_SOLARIS
- logsys_subsys_init();
-#endif
corosync_api = api;
list_init (&lib_trackers_list);
list_init (&internal_trackers_list);
/*
* Tell corosync we have a quorum engine.
*/
api->quorum_initialize(&callins);
/*
* Look for a quorum provider
*/
if (icmap_get_string("quorum.provider", &quorum_module) == CS_OK) {
log_printf (LOGSYS_LEVEL_NOTICE,
"Using quorum provider %s", quorum_module);
error = (char *)"Invalid quorum provider";
if (strcmp (quorum_module, "corosync_votequorum") == 0) {
error = votequorum_init (api, quorum_api_set_quorum);
quorum_type = 1;
}
if (strcmp (quorum_module, "corosync_ykd") == 0) {
error = ykd_init (api, quorum_api_set_quorum);
quorum_type = 1;
}
if (error) {
log_printf (LOGSYS_LEVEL_CRIT,
"Quorum provider: %s failed to initialize.",
quorum_module);
free(quorum_module);
return (error);
}
}
if (quorum_module) {
free(quorum_module);
quorum_module = NULL;
}
/*
* setting quorum_type and primary_designated in the right order is important
* always try to lookup/init a quorum module, then revert back to be quorate
*/
if (quorum_type == 0) {
primary_designated = 1;
}
return (NULL);
}
static int quorum_lib_init_fn (void *conn)
{
struct quorum_pd *pd = (struct quorum_pd *)corosync_api->ipc_private_data_get (conn);
log_printf(LOGSYS_LEVEL_DEBUG, "lib_init_fn: conn=%p", conn);
list_init (&pd->list);
pd->conn = conn;
return (0);
}
static int quorum_lib_exit_fn (void *conn)
{
struct quorum_pd *quorum_pd = (struct quorum_pd *)corosync_api->ipc_private_data_get (conn);
log_printf(LOGSYS_LEVEL_DEBUG, "lib_exit_fn: conn=%p", conn);
if (quorum_pd->tracking_enabled) {
list_del (&quorum_pd->list);
list_init (&quorum_pd->list);
}
return (0);
}
static void send_internal_notification(void)
{
struct list_head *tmp;
struct internal_callback_pd *pd;
for (tmp = internal_trackers_list.next; tmp != &internal_trackers_list; tmp = tmp->next) {
pd = list_entry(tmp, struct internal_callback_pd, list);
pd->callback(primary_designated, pd->context);
}
}
static void send_library_notification(void *conn)
{
int size = sizeof(struct res_lib_quorum_notification) + sizeof(unsigned int)*quorum_view_list_entries;
char buf[size];
struct res_lib_quorum_notification *res_lib_quorum_notification = (struct res_lib_quorum_notification *)buf;
struct list_head *tmp;
int i;
log_printf(LOGSYS_LEVEL_DEBUG, "sending quorum notification to %p, length = %d", conn, size);
res_lib_quorum_notification->quorate = primary_designated;
res_lib_quorum_notification->ring_seq = quorum_ring_id.seq;
res_lib_quorum_notification->view_list_entries = quorum_view_list_entries;
for (i=0; i<quorum_view_list_entries; i++) {
res_lib_quorum_notification->view_list[i] = quorum_view_list[i];
}
res_lib_quorum_notification->header.id = MESSAGE_RES_QUORUM_NOTIFICATION;
res_lib_quorum_notification->header.size = size;
res_lib_quorum_notification->header.error = CS_OK;
/* Send it to all interested parties */
if (conn) {
corosync_api->ipc_dispatch_send(conn, res_lib_quorum_notification, size);
}
else {
struct quorum_pd *qpd;
for (tmp = lib_trackers_list.next; tmp != &lib_trackers_list; tmp = tmp->next) {
qpd = list_entry(tmp, struct quorum_pd, list);
corosync_api->ipc_dispatch_send(qpd->conn,
res_lib_quorum_notification, size);
}
}
return;
}
static void message_handler_req_lib_quorum_getquorate (void *conn,
const void *msg)
{
struct res_lib_quorum_getquorate res_lib_quorum_getquorate;
log_printf(LOGSYS_LEVEL_DEBUG, "got quorate request on %p", conn);
/* send status */
res_lib_quorum_getquorate.quorate = primary_designated;
res_lib_quorum_getquorate.header.size = sizeof(res_lib_quorum_getquorate);
res_lib_quorum_getquorate.header.id = MESSAGE_RES_QUORUM_GETQUORATE;
res_lib_quorum_getquorate.header.error = CS_OK;
corosync_api->ipc_response_send(conn, &res_lib_quorum_getquorate, sizeof(res_lib_quorum_getquorate));
}
static void message_handler_req_lib_quorum_trackstart (void *conn,
const void *msg)
{
const struct req_lib_quorum_trackstart *req_lib_quorum_trackstart = msg;
struct qb_ipc_response_header res;
struct quorum_pd *quorum_pd = (struct quorum_pd *)corosync_api->ipc_private_data_get (conn);
log_printf(LOGSYS_LEVEL_DEBUG, "got trackstart request on %p", conn);
/*
* If an immediate listing of the current cluster membership
* is requested, generate membership list
*/
if (req_lib_quorum_trackstart->track_flags & CS_TRACK_CURRENT ||
req_lib_quorum_trackstart->track_flags & CS_TRACK_CHANGES) {
log_printf(LOGSYS_LEVEL_DEBUG, "sending initial status to %p", conn);
send_library_notification(conn);
}
/*
* Record requests for tracking
*/
if (req_lib_quorum_trackstart->track_flags & CS_TRACK_CHANGES ||
req_lib_quorum_trackstart->track_flags & CS_TRACK_CHANGES_ONLY) {
quorum_pd->track_flags = req_lib_quorum_trackstart->track_flags;
quorum_pd->tracking_enabled = 1;
list_add (&quorum_pd->list, &lib_trackers_list);
}
/* send status */
res.size = sizeof(res);
res.id = MESSAGE_RES_QUORUM_TRACKSTART;
res.error = CS_OK;
corosync_api->ipc_response_send(conn, &res, sizeof(struct qb_ipc_response_header));
}
static void message_handler_req_lib_quorum_trackstop (void *conn, const void *msg)
{
struct qb_ipc_response_header res;
struct quorum_pd *quorum_pd = (struct quorum_pd *)corosync_api->ipc_private_data_get (conn);
log_printf(LOGSYS_LEVEL_DEBUG, "got trackstop request on %p", conn);
if (quorum_pd->tracking_enabled) {
res.error = CS_OK;
quorum_pd->tracking_enabled = 0;
list_del (&quorum_pd->list);
list_init (&quorum_pd->list);
} else {
res.error = CS_ERR_NOT_EXIST;
}
/* send status */
res.size = sizeof(res);
res.id = MESSAGE_RES_QUORUM_TRACKSTOP;
res.error = CS_OK;
corosync_api->ipc_response_send(conn, &res, sizeof(struct qb_ipc_response_header));
}
static void message_handler_req_lib_quorum_gettype (void *conn,
const void *msg)
{
struct res_lib_quorum_gettype res_lib_quorum_gettype;
log_printf(LOGSYS_LEVEL_DEBUG, "got quorum_type request on %p", conn);
/* send status */
res_lib_quorum_gettype.quorum_type = quorum_type;
res_lib_quorum_gettype.header.size = sizeof(res_lib_quorum_gettype);
res_lib_quorum_gettype.header.id = MESSAGE_RES_QUORUM_GETTYPE;
res_lib_quorum_gettype.header.error = CS_OK;
corosync_api->ipc_response_send(conn, &res_lib_quorum_gettype, sizeof(res_lib_quorum_gettype));
}
diff --git a/exec/wd.c b/exec/wd.c
index cb8b34fc..093747f7 100644
--- a/exec/wd.c
+++ b/exec/wd.c
@@ -1,740 +1,738 @@
/*
* Copyright (c) 2010-2012 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Angus Salkeld <asalkeld@redhat.com>
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <config.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/ioctl.h>
#include <linux/types.h>
#include <linux/watchdog.h>
#include <sys/reboot.h>
#include <corosync/corotypes.h>
#include <corosync/corodefs.h>
#include <corosync/coroapi.h>
#include <corosync/list.h>
#include <corosync/logsys.h>
#include <corosync/icmap.h>
#include "fsm.h"
#include "service.h"
typedef enum {
WD_RESOURCE_GOOD,
WD_RESOURCE_FAILED,
WD_RESOURCE_STATE_UNKNOWN,
WD_RESOURCE_NOT_MONITORED
} wd_resource_state_t;
struct resource {
char res_path[ICMAP_KEYNAME_MAXLEN];
char *recovery;
char name[CS_MAX_NAME_LENGTH];
time_t last_updated;
struct cs_fsm fsm;
corosync_timer_handle_t check_timer;
uint64_t check_timeout;
icmap_track_t icmap_track;
};
LOGSYS_DECLARE_SUBSYS("WD");
/*
* Service Interfaces required by service_message_handler struct
*/
static char *wd_exec_init_fn (struct corosync_api_v1 *corosync_api);
static int wd_exec_exit_fn (void);
static void wd_resource_check_fn (void* resource_ref);
static struct corosync_api_v1 *api;
#define WD_DEFAULT_TIMEOUT_SEC 6
#define WD_DEFAULT_TIMEOUT_MS (WD_DEFAULT_TIMEOUT_SEC * CS_TIME_MS_IN_SEC)
#define WD_MIN_TIMEOUT_MS 500
#define WD_MAX_TIMEOUT_MS (120 * CS_TIME_MS_IN_SEC)
static uint32_t watchdog_timeout = WD_DEFAULT_TIMEOUT_SEC;
static uint64_t tickle_timeout = (WD_DEFAULT_TIMEOUT_MS / 2);
static int dog = -1;
static corosync_timer_handle_t wd_timer;
static int watchdog_ok = 1;
struct corosync_service_engine wd_service_engine = {
.name = "corosync watchdog service",
.id = WD_SERVICE,
.priority = 1,
.private_data_size = 0,
.flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED,
.lib_init_fn = NULL,
.lib_exit_fn = NULL,
.lib_engine = NULL,
.lib_engine_count = 0,
.exec_engine = NULL,
.exec_engine_count = 0,
.confchg_fn = NULL,
.exec_init_fn = wd_exec_init_fn,
.exec_exit_fn = wd_exec_exit_fn,
.exec_dump_fn = NULL
};
static DECLARE_LIST_INIT (confchg_notify);
/*
* F S M
*/
static void wd_config_changed (struct cs_fsm* fsm, int32_t event, void * data);
static void wd_resource_failed (struct cs_fsm* fsm, int32_t event, void * data);
enum wd_resource_state {
WD_S_RUNNING,
WD_S_FAILED,
WD_S_STOPPED
};
enum wd_resource_event {
WD_E_FAILURE,
WD_E_CONFIG_CHANGED
};
const char * wd_running_str = "running";
const char * wd_failed_str = "failed";
const char * wd_failure_str = "failure";
const char * wd_stopped_str = "stopped";
const char * wd_config_changed_str = "config_changed";
struct cs_fsm_entry wd_fsm_table[] = {
{ WD_S_STOPPED, WD_E_CONFIG_CHANGED, wd_config_changed, {WD_S_STOPPED, WD_S_RUNNING, -1} },
{ WD_S_STOPPED, WD_E_FAILURE, NULL, {-1} },
{ WD_S_RUNNING, WD_E_CONFIG_CHANGED, wd_config_changed, {WD_S_RUNNING, WD_S_STOPPED, -1} },
{ WD_S_RUNNING, WD_E_FAILURE, wd_resource_failed, {WD_S_FAILED, -1} },
{ WD_S_FAILED, WD_E_CONFIG_CHANGED, wd_config_changed, {WD_S_RUNNING, WD_S_STOPPED, -1} },
{ WD_S_FAILED, WD_E_FAILURE, NULL, {-1} },
};
struct corosync_service_engine *wd_get_service_engine_ver0 (void)
{
return (&wd_service_engine);
}
static const char * wd_res_state_to_str(struct cs_fsm* fsm,
int32_t state)
{
switch (state) {
case WD_S_STOPPED:
return wd_stopped_str;
break;
case WD_S_RUNNING:
return wd_running_str;
break;
case WD_S_FAILED:
return wd_failed_str;
break;
}
return NULL;
}
static const char * wd_res_event_to_str(struct cs_fsm* fsm,
int32_t event)
{
switch (event) {
case WD_E_CONFIG_CHANGED:
return wd_config_changed_str;
break;
case WD_E_FAILURE:
return wd_failure_str;
break;
}
return NULL;
}
static void wd_fsm_cb (struct cs_fsm *fsm, int cb_event, int32_t curr_state,
int32_t next_state, int32_t fsm_event, void *data)
{
switch (cb_event) {
case CS_FSM_CB_EVENT_PROCESS_NF:
log_printf (LOGSYS_LEVEL_ERROR, "Fsm:%s could not find event \"%s\" in state \"%s\"",
fsm->name, fsm->event_to_str(fsm, fsm_event), fsm->state_to_str(fsm, curr_state));
corosync_exit_error(COROSYNC_DONE_FATAL_ERR);
break;
case CS_FSM_CB_EVENT_STATE_SET:
log_printf (LOGSYS_LEVEL_INFO, "Fsm:%s event \"%s\", state \"%s\" --> \"%s\"",
fsm->name,
fsm->event_to_str(fsm, fsm_event),
fsm->state_to_str(fsm, fsm->table[fsm->curr_entry].curr_state),
fsm->state_to_str(fsm, next_state));
break;
case CS_FSM_CB_EVENT_STATE_SET_NF:
log_printf (LOGSYS_LEVEL_CRIT, "Fsm:%s Can't change state from \"%s\" to \"%s\" (event was \"%s\")",
fsm->name,
fsm->state_to_str(fsm, fsm->table[fsm->curr_entry].curr_state),
fsm->state_to_str(fsm, next_state),
fsm->event_to_str(fsm, fsm_event));
corosync_exit_error(COROSYNC_DONE_FATAL_ERR);
break;
default:
log_printf (LOGSYS_LEVEL_CRIT, "Fsm: Unknown callback event!");
corosync_exit_error(COROSYNC_DONE_FATAL_ERR);
break;
}
}
/*
* returns (CS_TRUE == OK, CS_FALSE == failed)
*/
static int32_t wd_resource_state_is_ok (struct resource *ref)
{
char* state = NULL;
uint64_t last_updated;
uint64_t my_time;
uint64_t allowed_period;
char key_name[ICMAP_KEYNAME_MAXLEN];
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", ref->res_path, "last_updated");
if (icmap_get_uint64(key_name, &last_updated) != CS_OK) {
/* key does not exist.
*/
return CS_FALSE;
}
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", ref->res_path, "state");
if (icmap_get_string(key_name, &state) != CS_OK || strcmp(state, "disabled") == 0) {
/* key does not exist.
*/
if (state != NULL)
free(state);
return CS_FALSE;
}
if (last_updated == 0) {
/* initial value */
free(state);
return CS_TRUE;
}
my_time = cs_timestamp_get();
/*
* Here we check that the monitor has written a timestamp within the poll_period
* plus a grace factor of (0.5 * poll_period).
*/
allowed_period = (ref->check_timeout * MILLI_2_NANO_SECONDS * 3) / 2;
if ((last_updated + allowed_period) < my_time) {
log_printf (LOGSYS_LEVEL_ERROR,
"last_updated %"PRIu64" ms too late, period:%"PRIu64".",
(uint64_t)(my_time/MILLI_2_NANO_SECONDS - ((last_updated + allowed_period) / MILLI_2_NANO_SECONDS)),
ref->check_timeout);
free(state);
return CS_FALSE;
}
if (strcmp (state, wd_failed_str) == 0) {
free(state);
return CS_FALSE;
}
free(state);
return CS_TRUE;
}
static void wd_config_changed (struct cs_fsm* fsm, int32_t event, void * data)
{
char *state;
uint64_t tmp_value;
uint64_t next_timeout;
struct resource *ref = (struct resource*)data;
char key_name[ICMAP_KEYNAME_MAXLEN];
next_timeout = ref->check_timeout;
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", ref->res_path, "poll_period");
if (icmap_get_uint64(ref->res_path, &tmp_value) == CS_OK) {
if (tmp_value >= WD_MIN_TIMEOUT_MS && tmp_value <= WD_MAX_TIMEOUT_MS) {
log_printf (LOGSYS_LEVEL_DEBUG,
"poll_period changing from:%"PRIu64" to %"PRIu64".",
ref->check_timeout, tmp_value);
/*
* To easy in the transition between poll_period's we are going
* to make the first timeout the bigger of the new and old value.
* This is to give the monitoring system time to adjust.
*/
next_timeout = CS_MAX(tmp_value, ref->check_timeout);
ref->check_timeout = tmp_value;
} else {
log_printf (LOGSYS_LEVEL_WARNING,
"Could NOT use poll_period:%"PRIu64" ms for resource %s",
tmp_value, ref->name);
}
}
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", ref->res_path, "recovery");
if (icmap_get_string(key_name, &ref->recovery) != CS_OK) {
/* key does not exist.
*/
log_printf (LOGSYS_LEVEL_WARNING,
"resource %s missing a recovery key.", ref->name);
cs_fsm_state_set(&ref->fsm, WD_S_STOPPED, ref, wd_fsm_cb);
return;
}
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", ref->res_path, "state");
if (icmap_get_string(key_name, &state) != CS_OK) {
/* key does not exist.
*/
log_printf (LOGSYS_LEVEL_WARNING,
"resource %s missing a state key.", ref->name);
cs_fsm_state_set(&ref->fsm, WD_S_STOPPED, ref, wd_fsm_cb);
return;
}
if (ref->check_timer) {
api->timer_delete(ref->check_timer);
ref->check_timer = 0;
}
if (strcmp(wd_stopped_str, state) == 0) {
cs_fsm_state_set(&ref->fsm, WD_S_STOPPED, ref, wd_fsm_cb);
} else {
api->timer_add_duration(next_timeout * MILLI_2_NANO_SECONDS,
ref, wd_resource_check_fn, &ref->check_timer);
cs_fsm_state_set(&ref->fsm, WD_S_RUNNING, ref, wd_fsm_cb);
}
free(state);
}
static void wd_resource_failed (struct cs_fsm* fsm, int32_t event, void * data)
{
struct resource* ref = (struct resource*)data;
if (ref->check_timer) {
api->timer_delete(ref->check_timer);
ref->check_timer = 0;
}
log_printf (LOGSYS_LEVEL_CRIT, "%s resource \"%s\" failed!",
ref->recovery, (char*)ref->name);
if (strcmp (ref->recovery, "watchdog") == 0 ||
strcmp (ref->recovery, "quit") == 0) {
watchdog_ok = 0;
}
else if (strcmp (ref->recovery, "reboot") == 0) {
reboot(RB_AUTOBOOT);
}
else if (strcmp (ref->recovery, "shutdown") == 0) {
reboot(RB_POWER_OFF);
}
cs_fsm_state_set(fsm, WD_S_FAILED, data, wd_fsm_cb);
}
static void wd_key_changed(
int32_t event,
const char *key_name,
struct icmap_notify_value new_val,
struct icmap_notify_value old_val,
void *user_data)
{
struct resource* ref = (struct resource*)user_data;
char *last_key_part;
if (ref == NULL) {
return ;
}
last_key_part = strrchr(key_name, '.');
if (last_key_part == NULL) {
return ;
}
last_key_part++;
if (event == ICMAP_TRACK_ADD || event == ICMAP_TRACK_MODIFY) {
if (strcmp(last_key_part, "last_updated") == 0 ||
strcmp(last_key_part, "current") == 0) {
return;
}
cs_fsm_process(&ref->fsm, WD_E_CONFIG_CHANGED, ref, wd_fsm_cb);
}
if (event == ICMAP_TRACK_DELETE && ref != NULL) {
if (strcmp(last_key_part, "state") != 0) {
return ;
}
log_printf (LOGSYS_LEVEL_WARNING,
"resource \"%s\" deleted from cmap!",
ref->name);
api->timer_delete(ref->check_timer);
ref->check_timer = 0;
icmap_track_delete(ref->icmap_track);
free(ref);
}
}
static void wd_resource_check_fn (void* resource_ref)
{
struct resource* ref = (struct resource*)resource_ref;
if (wd_resource_state_is_ok (ref) == CS_FALSE) {
cs_fsm_process(&ref->fsm, WD_E_FAILURE, ref, wd_fsm_cb);
return;
}
api->timer_add_duration(ref->check_timeout*MILLI_2_NANO_SECONDS,
ref, wd_resource_check_fn, &ref->check_timer);
}
/*
* return 0 - fully configured
* return -1 - partially configured
*/
static int32_t wd_resource_create (char *res_path, char *res_name)
{
char *state;
uint64_t tmp_value;
struct resource *ref = calloc (1, sizeof (struct resource));
char key_name[ICMAP_KEYNAME_MAXLEN];
strcpy(ref->res_path, res_path);
ref->check_timeout = WD_DEFAULT_TIMEOUT_MS;
ref->check_timer = 0;
strcpy(ref->name, res_name);
ref->fsm.name = ref->name;
ref->fsm.table = wd_fsm_table;
ref->fsm.entries = sizeof(wd_fsm_table) / sizeof(struct cs_fsm_entry);
ref->fsm.curr_entry = 0;
ref->fsm.curr_state = WD_S_STOPPED;
ref->fsm.state_to_str = wd_res_state_to_str;
ref->fsm.event_to_str = wd_res_event_to_str;
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", res_path, "poll_period");
if (icmap_get_uint64(key_name, &tmp_value) != CS_OK) {
icmap_set_uint64(key_name, ref->check_timeout);
} else {
if (tmp_value >= WD_MIN_TIMEOUT_MS && tmp_value <= WD_MAX_TIMEOUT_MS) {
ref->check_timeout = tmp_value;
} else {
log_printf (LOGSYS_LEVEL_WARNING,
"Could NOT use poll_period:%"PRIu64" ms for resource %s",
tmp_value, ref->name);
}
}
icmap_track_add(res_path,
ICMAP_TRACK_ADD | ICMAP_TRACK_MODIFY | ICMAP_TRACK_DELETE | ICMAP_TRACK_PREFIX,
wd_key_changed,
ref, &ref->icmap_track);
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", res_path, "recovery");
if (icmap_get_string(key_name, &ref->recovery) != CS_OK) {
/* key does not exist.
*/
log_printf (LOGSYS_LEVEL_WARNING,
"resource %s missing a recovery key.", ref->name);
return -1;
}
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", res_path, "state");
if (icmap_get_string(key_name, &state) != CS_OK) {
/* key does not exist.
*/
log_printf (LOGSYS_LEVEL_WARNING,
"resource %s missing a state key.", ref->name);
return -1;
}
snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", res_path, "last_updated");
if (icmap_get_uint64(key_name, &tmp_value) != CS_OK) {
/* key does not exist.
*/
ref->last_updated = 0;
} else {
ref->last_updated = tmp_value;
}
/*
* delay the first check to give the monitor time to start working.
*/
tmp_value = CS_MAX(ref->check_timeout * 2, WD_DEFAULT_TIMEOUT_MS);
api->timer_add_duration(tmp_value * MILLI_2_NANO_SECONDS,
ref,
wd_resource_check_fn, &ref->check_timer);
cs_fsm_state_set(&ref->fsm, WD_S_RUNNING, ref, wd_fsm_cb);
return 0;
}
static void wd_tickle_fn (void* arg)
{
ENTER();
if (watchdog_ok) {
if (dog > 0) {
ioctl(dog, WDIOC_KEEPALIVE, &watchdog_ok);
}
api->timer_add_duration(tickle_timeout*MILLI_2_NANO_SECONDS, NULL,
wd_tickle_fn, &wd_timer);
}
else {
log_printf (LOGSYS_LEVEL_ALERT, "NOT tickling the watchdog!");
}
}
static void wd_resource_created_cb(
int32_t event,
const char *key_name,
struct icmap_notify_value new_val,
struct icmap_notify_value old_val,
void *user_data)
{
char res_name[ICMAP_KEYNAME_MAXLEN];
char res_type[ICMAP_KEYNAME_MAXLEN];
char tmp_key[ICMAP_KEYNAME_MAXLEN];
int res;
if (event != ICMAP_TRACK_ADD) {
return ;
}
res = sscanf(key_name, "resources.%[^.].%[^.].%[^.]", res_type, res_name, tmp_key);
if (res != 3) {
return ;
}
if (strcmp(tmp_key, "state") != 0) {
return ;
}
snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "resources.%s.%s.", res_type, res_name);
wd_resource_create (tmp_key, res_name);
}
static void wd_scan_resources (void)
{
int res_count = 0;
icmap_track_t icmap_track = NULL;
icmap_iter_t iter;
const char *key_name;
int res;
char res_name[ICMAP_KEYNAME_MAXLEN];
char res_type[ICMAP_KEYNAME_MAXLEN];
char tmp_key[ICMAP_KEYNAME_MAXLEN];
ENTER();
iter = icmap_iter_init("resources.");
while ((key_name = icmap_iter_next(iter, NULL, NULL)) != NULL) {
res = sscanf(key_name, "resources.%[^.].%[^.].%[^.]", res_type, res_name, tmp_key);
if (res != 3) {
continue ;
}
if (strcmp(tmp_key, "state") != 0) {
continue ;
}
snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "resources.%s.%s.", res_type, res_name);
if (wd_resource_create (tmp_key, res_name) == 0) {
res_count++;
}
}
icmap_iter_finalize(iter);
icmap_track_add("resources.process.", ICMAP_TRACK_ADD | ICMAP_TRACK_PREFIX,
wd_resource_created_cb, NULL, &icmap_track);
icmap_track_add("resources.system.", ICMAP_TRACK_ADD | ICMAP_TRACK_PREFIX,
wd_resource_created_cb, NULL, &icmap_track);
if (res_count == 0) {
log_printf (LOGSYS_LEVEL_INFO, "no resources configured.");
}
}
static void watchdog_timeout_apply (uint32_t new)
{
struct watchdog_info ident;
uint32_t original_timeout = watchdog_timeout;
if (new == original_timeout) {
return;
}
watchdog_timeout = new;
if (dog > 0) {
ioctl(dog, WDIOC_GETSUPPORT, &ident);
if (ident.options & WDIOF_SETTIMEOUT) {
/* yay! the dog is trained.
*/
ioctl(dog, WDIOC_SETTIMEOUT, &watchdog_timeout);
}
ioctl(dog, WDIOC_GETTIMEOUT, &watchdog_timeout);
}
if (watchdog_timeout == new) {
tickle_timeout = (watchdog_timeout * CS_TIME_MS_IN_SEC)/ 2;
/* reset the tickle timer in case it was reduced.
*/
api->timer_delete (wd_timer);
api->timer_add_duration(tickle_timeout*MILLI_2_NANO_SECONDS, NULL,
wd_tickle_fn, &wd_timer);
log_printf (LOGSYS_LEVEL_DEBUG, "The Watchdog timeout is %d seconds", watchdog_timeout);
log_printf (LOGSYS_LEVEL_DEBUG, "The tickle timeout is %"PRIu64" ms", tickle_timeout);
} else {
log_printf (LOGSYS_LEVEL_WARNING,
"Could not change the Watchdog timeout from %d to %d seconds",
original_timeout, new);
}
}
static int setup_watchdog(void)
{
struct watchdog_info ident;
ENTER();
if (access ("/dev/watchdog", W_OK) != 0) {
log_printf (LOGSYS_LEVEL_WARNING, "No Watchdog, try modprobe <a watchdog>");
dog = -1;
return -1;
}
/* here goes, lets hope they have "Magic Close"
*/
dog = open("/dev/watchdog", O_WRONLY);
if (dog == -1) {
log_printf (LOGSYS_LEVEL_WARNING, "Watchdog exists but couldn't be opened.");
dog = -1;
return -1;
}
/* Right we have the dog.
* Lets see what breed it is.
*/
ioctl(dog, WDIOC_GETSUPPORT, &ident);
log_printf (LOGSYS_LEVEL_INFO, "Watchdog is now been tickled by corosync.");
log_printf (LOGSYS_LEVEL_DEBUG, "%s", ident.identity);
watchdog_timeout_apply (watchdog_timeout);
ioctl(dog, WDIOC_SETOPTIONS, WDIOS_ENABLECARD);
return 0;
}
static void wd_top_level_key_changed(
int32_t event,
const char *key_name,
struct icmap_notify_value new_val,
struct icmap_notify_value old_val,
void *user_data)
{
uint32_t tmp_value_32;
ENTER();
if (icmap_get_uint32("resources.watchdog_timeout", &tmp_value_32) != CS_OK) {
if (tmp_value_32 >= 2 && tmp_value_32 <= 120) {
watchdog_timeout_apply (tmp_value_32);
}
}
else {
watchdog_timeout_apply (WD_DEFAULT_TIMEOUT_SEC);
}
}
static void watchdog_timeout_get_initial (void)
{
uint32_t tmp_value_32;
icmap_track_t icmap_track = NULL;
ENTER();
if (icmap_get_uint32("resources.watchdog_timeout", &tmp_value_32) != CS_OK) {
watchdog_timeout_apply (WD_DEFAULT_TIMEOUT_SEC);
icmap_set_uint32("resources.watchdog_timeout", watchdog_timeout);
}
else {
if (tmp_value_32 >= 2 && tmp_value_32 <= 120) {
watchdog_timeout_apply (tmp_value_32);
} else {
watchdog_timeout_apply (WD_DEFAULT_TIMEOUT_SEC);
}
}
icmap_track_add("resources.watchdog_timeout", ICMAP_TRACK_MODIFY,
wd_top_level_key_changed, NULL, &icmap_track);
}
static char *wd_exec_init_fn (struct corosync_api_v1 *corosync_api)
{
ENTER();
-#ifdef COROSYNC_SOLARIS
- logsys_subsys_init();
-#endif
+
api = corosync_api;
watchdog_timeout_get_initial();
setup_watchdog();
wd_scan_resources();
api->timer_add_duration(tickle_timeout*MILLI_2_NANO_SECONDS, NULL,
wd_tickle_fn, &wd_timer);
return NULL;
}
static int wd_exec_exit_fn (void)
{
char magic = 'V';
ENTER();
if (dog > 0) {
log_printf (LOGSYS_LEVEL_INFO, "magically closing the watchdog.");
write (dog, &magic, 1);
}
return 0;
}
diff --git a/include/corosync/corotypes.h b/include/corosync/corotypes.h
index d5a7d011..801a61a5 100644
--- a/include/corosync/corotypes.h
+++ b/include/corosync/corotypes.h
@@ -1,144 +1,140 @@
/*
* Copyright (c) 2008 Allied Telesis Labs.
* Copyright (c) 2012 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Angus Salkeld (ahsalkeld@gmail.com)
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef COROTYPES_H_DEFINED
#define COROTYPES_H_DEFINED
-#ifndef COROSYNC_SOLARIS
#include <stdint.h>
-#else
-#include <sys/types.h>
-#endif
#include <errno.h>
#include <time.h>
#include <sys/time.h>
typedef int64_t cs_time_t;
#define CS_FALSE 0
#define CS_TRUE !CS_FALSE
#define CS_MAX_NAME_LENGTH 256
#define CS_TIME_END ((cs_time_t)0x7FFFFFFFFFFFFFFFULL)
#define CS_MAX(x, y) (((x) > (y)) ? (x) : (y))
typedef struct {
uint16_t length;
uint8_t value[CS_MAX_NAME_LENGTH];
} cs_name_t;
typedef struct {
char releaseCode;
unsigned char majorVersion;
unsigned char minorVersion;
} cs_version_t;
typedef enum {
CS_DISPATCH_ONE = 1,
CS_DISPATCH_ALL = 2,
CS_DISPATCH_BLOCKING = 3,
CS_DISPATCH_ONE_NONBLOCKING = 4
} cs_dispatch_flags_t;
#define CS_TRACK_CURRENT 0x01
#define CS_TRACK_CHANGES 0x02
#define CS_TRACK_CHANGES_ONLY 0x04
typedef enum {
CS_OK = 1,
CS_ERR_LIBRARY = 2,
CS_ERR_VERSION = 3,
CS_ERR_INIT = 4,
CS_ERR_TIMEOUT = 5,
CS_ERR_TRY_AGAIN = 6,
CS_ERR_INVALID_PARAM = 7,
CS_ERR_NO_MEMORY = 8,
CS_ERR_BAD_HANDLE = 9,
CS_ERR_BUSY = 10,
CS_ERR_ACCESS = 11,
CS_ERR_NOT_EXIST = 12,
CS_ERR_NAME_TOO_LONG = 13,
CS_ERR_EXIST = 14,
CS_ERR_NO_SPACE = 15,
CS_ERR_INTERRUPT = 16,
CS_ERR_NAME_NOT_FOUND = 17,
CS_ERR_NO_RESOURCES = 18,
CS_ERR_NOT_SUPPORTED = 19,
CS_ERR_BAD_OPERATION = 20,
CS_ERR_FAILED_OPERATION = 21,
CS_ERR_MESSAGE_ERROR = 22,
CS_ERR_QUEUE_FULL = 23,
CS_ERR_QUEUE_NOT_AVAILABLE = 24,
CS_ERR_BAD_FLAGS = 25,
CS_ERR_TOO_BIG = 26,
CS_ERR_NO_SECTIONS = 27,
CS_ERR_CONTEXT_NOT_FOUND = 28,
CS_ERR_TOO_MANY_GROUPS = 30,
CS_ERR_SECURITY = 100
} cs_error_t;
#define CS_IPC_TIMEOUT_MS -1
#define CS_TIME_MS_IN_SEC 1000ULL
#define CS_TIME_US_IN_SEC 1000000ULL
#define CS_TIME_NS_IN_SEC 1000000000ULL
#define CS_TIME_US_IN_MSEC 1000ULL
#define CS_TIME_NS_IN_MSEC 1000000ULL
#define CS_TIME_NS_IN_USEC 1000ULL
static inline uint64_t cs_timestamp_get(void)
{
uint64_t result;
#if defined _POSIX_MONOTONIC_CLOCK && _POSIX_MONOTONIC_CLOCK >= 0
struct timespec ts;
clock_gettime (CLOCK_MONOTONIC, &ts);
result = (ts.tv_sec * CS_TIME_NS_IN_SEC) + (uint64_t)ts.tv_nsec;
#else
struct timeval time_from_epoch;
gettimeofday (&time_from_epoch, 0);
result = ((time_from_epoch.tv_sec * CS_TIME_NS_IN_SEC) +
(time_from_epoch.tv_usec * CS_TIME_NS_IN_USEC));
#endif
return result;
}
cs_error_t qb_to_cs_error (int result);
const char * cs_strerror(cs_error_t err);
cs_error_t hdb_error_to_cs (int res);
#endif /* COROTYPES_H_DEFINED */
diff --git a/include/corosync/mar_gen.h b/include/corosync/mar_gen.h
index 47edf09a..622bbec4 100644
--- a/include/corosync/mar_gen.h
+++ b/include/corosync/mar_gen.h
@@ -1,187 +1,183 @@
/*
* Copyright (c) 2006-2011 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Steven Dake (sdake@redhat.com)
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef MAR_GEN_H_DEFINED
#define MAR_GEN_H_DEFINED
-#ifndef COROSYNC_SOLARIS
#include <stdint.h>
-#else
-#include <sys/types.h>
-#endif
#include <string.h>
#include <corosync/corotypes.h>
#include <corosync/swab.h>
typedef int8_t mar_int8_t;
typedef int16_t mar_int16_t;
typedef int32_t mar_int32_t;
typedef int64_t mar_int64_t;
typedef uint8_t mar_uint8_t;
typedef uint16_t mar_uint16_t;
typedef uint32_t mar_uint32_t;
typedef uint64_t mar_uint64_t;
static inline void swab_mar_int8_t (mar_int8_t *to_swab)
{
return;
}
static inline void swab_mar_int16_t (mar_int16_t *to_swab)
{
*to_swab = swab16 (*to_swab);
}
static inline void swab_mar_int32_t (mar_int32_t *to_swab)
{
*to_swab = swab32 (*to_swab);
}
static inline void swab_mar_int64_t (mar_int64_t *to_swab)
{
*to_swab = swab64 (*to_swab);
}
static inline void swab_mar_uint8_t (mar_uint8_t *to_swab)
{
return;
}
static inline void swab_mar_uint16_t (mar_uint16_t *to_swab)
{
*to_swab = swab16 (*to_swab);
}
static inline void swab_mar_uint32_t (mar_uint32_t *to_swab)
{
*to_swab = swab32 (*to_swab);
}
static inline void swab_mar_uint64_t (mar_uint64_t *to_swab)
{
*to_swab = swab64 (*to_swab);
}
typedef struct {
mar_uint16_t length __attribute__((aligned(8)));
mar_uint8_t value[CS_MAX_NAME_LENGTH] __attribute__((aligned(8)));
} mar_name_t;
static inline const char *get_mar_name_t (const mar_name_t *name) {
return ((const char *)name->value);
}
static inline int mar_name_match(const mar_name_t *name1, const mar_name_t *name2)
{
if (name1->length == name2->length) {
return ((strncmp ((const char *)name1->value,
(const char *)name2->value,
name1->length)) == 0);
}
return 0;
}
static inline void swab_mar_name_t (mar_name_t *to_swab)
{
swab_mar_uint16_t (&to_swab->length);
}
static inline void marshall_from_mar_name_t (
cs_name_t *dest,
const mar_name_t *src)
{
dest->length = src->length;
memcpy (dest->value, src->value, CS_MAX_NAME_LENGTH);
}
static inline void marshall_to_mar_name_t (
mar_name_t *dest,
const cs_name_t *src)
{
dest->length = src->length;
memcpy (dest->value, src->value, CS_MAX_NAME_LENGTH);
}
typedef enum {
MAR_FALSE = 0,
MAR_TRUE = 1
} mar_bool_t;
typedef mar_uint64_t mar_time_t;
static inline void swab_mar_time_t (mar_time_t *to_swab)
{
swab_mar_uint64_t (to_swab);
}
#define MAR_TIME_END ((int64_t)0x7fffffffffffffffull)
#define MAR_TIME_BEGIN 0x0ULL
#define MAR_TIME_UNKNOWN 0x8000000000000000ULL
#define MAR_TIME_ONE_MICROSECOND 1000ULL
#define MAR_TIME_ONE_MILLISECOND 1000000ULL
#define MAR_TIME_ONE_SECOND 1000000000ULL
#define MAR_TIME_ONE_MINUTE 60000000000ULL
#define MAR_TIME_ONE_HOUR 3600000000000ULL
#define MAR_TIME_ONE_DAY 86400000000000ULL
#define MAR_TIME_MAX CS_TIME_END
#define MAR_TRACK_CURRENT 0x01
#define MAR_TRACK_CHANGES 0x02
#define MAR_TRACK_CHANGES_ONLY 0x04
typedef mar_uint64_t mar_invocation_t;
static inline void swab_mar_invocation_t (mar_invocation_t *to_swab)
{
swab_mar_uint64_t (to_swab);
}
typedef mar_uint64_t mar_size_t;
static inline void swab_mar_size_t (mar_size_t *to_swab)
{
swab_mar_uint64_t (to_swab);
}
static inline void swab_coroipc_request_header_t (struct qb_ipc_request_header *to_swab)
{
swab_mar_int32_t (&to_swab->size);
swab_mar_int32_t (&to_swab->id);
}
#endif /* MAR_GEN_H_DEFINED */

File Metadata

Mime Type
text/x-diff
Expires
Thu, Jul 10, 1:24 AM (15 h, 48 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2009476
Default Alt Text
(364 KB)

Event Timeline