Page Menu
Home
ClusterLabs Projects
Search
Configure Global Search
Log In
Files
F2825124
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
124 KB
Referenced Files
None
Subscribers
None
View Options
diff --git a/configure.ac b/configure.ac
index d7d4c33..add6b92 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,433 +1,443 @@
# -*- Autoconf -*-
# Process this file with autoconf to produce a configure script.
# bootstrap / init
AC_PREREQ([2.61])
AC_INIT([booth], [0.2.0], [pacemaker@oss.clusterlabs.org])
AM_INIT_AUTOMAKE([-Wno-portability])
AC_CONFIG_SRCDIR([src/main.c])
AC_CONFIG_HEADER([src/b_config.h src/booth_config.h])
AC_CANONICAL_HOST
AC_LANG([C])
AC_SUBST(WITH_LIST, [""])
dnl Fix default variables - "prefix" variable if not specified
if test "$prefix" = "NONE"; then
prefix="/usr"
dnl Fix "localstatedir" variable if not specified
if test "$localstatedir" = "\${prefix}/var"; then
localstatedir="/var"
fi
dnl Fix "sysconfdir" variable if not specified
if test "$sysconfdir" = "\${prefix}/etc"; then
sysconfdir="/etc"
fi
dnl Fix "libdir" variable if not specified
if test "$libdir" = "\${exec_prefix}/lib"; then
if test -e /usr/lib64; then
libdir="/usr/lib64"
else
libdir="/usr/lib"
fi
fi
fi
if test "$srcdir" = "."; then
AC_MSG_NOTICE([building in place srcdir:$srcdir])
AC_DEFINE([BUILDING_IN_PLACE], 1, [building in place])
else
AC_MSG_NOTICE([building out of tree srcdir:$srcdir])
fi
# Checks for programs.
# check stolen from gnulib/m4/gnu-make.m4
if ! ${MAKE-make} --version /cannot/make/this >/dev/null 2>&1; then
AC_MSG_ERROR([you don't seem to have GNU make; it is required])
fi
AC_PROG_CC
AC_PROG_INSTALL
AC_PROG_LN_S
AC_PROG_MAKE_SET
AC_PROG_RANLIB
AC_PATH_PROGS(PKGCONFIG, pkg-config)
AC_PATH_PROGS(ASCIIDOC, asciidoc)
AM_CONDITIONAL(BUILD_ASCIIDOC, test x"${ASCIIDOC}" != x"")
# Checks for libraries.
AC_CHECK_LIB([socket], [socket])
AC_CHECK_LIB([nsl], [t_open])
AC_CHECK_LIB([gpl], [cl_log])
PKG_CHECK_MODULES(GLIB, [glib-2.0])
# Checks for header files.
AC_FUNC_ALLOCA
AC_HEADER_DIRENT
AC_HEADER_STDC
AC_HEADER_SYS_WAIT
AC_CHECK_HEADERS([arpa/inet.h fcntl.h limits.h netdb.h netinet/in.h stdint.h \
stdlib.h string.h sys/ioctl.h sys/param.h sys/socket.h \
sys/time.h syslog.h unistd.h sys/types.h getopt.h malloc.h \
sys/sockio.h utmpx.h])
AC_CHECK_HEADERS(heartbeat/glue_config.h)
# Checks for typedefs, structures, and compiler characteristics.
AC_C_CONST
AC_TYPE_UID_T
AC_C_INLINE
AC_TYPE_INT16_T
AC_TYPE_INT32_T
AC_TYPE_INT64_T
AC_TYPE_INT8_T
AC_TYPE_SIZE_T
AC_TYPE_SSIZE_T
AC_HEADER_TIME
AC_TYPE_UINT16_T
AC_TYPE_UINT32_T
AC_TYPE_UINT64_T
AC_TYPE_UINT8_T
AC_C_VOLATILE
# Checks for library functions.
AC_FUNC_CLOSEDIR_VOID
AC_FUNC_ERROR_AT_LINE
AC_REPLACE_FNMATCH
AC_FUNC_FORK
AC_PROG_GCC_TRADITIONAL
AC_FUNC_MALLOC
AC_FUNC_MEMCMP
AC_FUNC_REALLOC
AC_FUNC_SELECT_ARGTYPES
AC_TYPE_SIGNAL
AC_FUNC_VPRINTF
AC_CHECK_FUNCS([alarm alphasort atexit bzero dup2 endgrent endpwent fcntl \
getcwd getpeerucred getpeereid gettimeofday inet_ntoa memmove \
memset mkdir scandir select socket strcasecmp strchr strdup \
strerror strrchr strspn strstr \
sched_get_priority_max sched_setscheduler])
AC_CONFIG_FILES([Makefile
src/Makefile
docs/Makefile])
# ===============================================
# Helpers
# ===============================================
## helper for CC stuff
cc_supports_flag() {
local CFLAGS="-Werror $@"
AC_MSG_CHECKING(whether $CC supports "$@")
AC_COMPILE_IFELSE([AC_LANG_SOURCE([[int main(){return 0;}]])],
[RC=0; AC_MSG_RESULT(yes)],[RC=1; AC_MSG_RESULT(no)])
return $RC
}
## cleanup
AC_MSG_NOTICE(Sanitizing prefix: ${prefix})
case $prefix in
NONE) prefix=/usr/local;;
esac
AC_MSG_NOTICE(Sanitizing exec_prefix: ${exec_prefix})
case $exec_prefix in
dnl For consistency with Corosync, map NONE->$prefix
NONE) exec_prefix=$prefix;;
prefix) exec_prefix=$prefix;;
esac
## local defines
PACKAGE_FEATURES=""
LINT_FLAGS="-weak -unrecog +posixlib +ignoresigns -fcnuse \
-badflag -D__gnuc_va_list=va_list -D__attribute\(x\)="
# local options
AC_ARG_ENABLE([fatal-warnings],
[ --enable-fatal-warnings : enable fatal warnings. ],
[ default="no" ])
AC_ARG_ENABLE([debug],
[ --enable-debug : enable debug build. ],
[ default="no" ])
AC_ARG_ENABLE([user-flags],
[ --enable-user-flags : rely on user environment. ],
[ default="no" ])
AC_ARG_ENABLE([coverage],
[ --enable-coverage : coverage analysis of the codebase. ],
[ default="no" ])
AC_ARG_ENABLE([small-memory-footprint],
[ --enable-small-memory-footprint : Use small message queues and small messages sizes. ],
[ default="no" ])
AC_ARG_WITH([initddir],
[ --with-initddir=DIR : path to init script directory. ],
[ INITDDIR="$withval" ],
[ INITDDIR="$sysconfdir/init.d" ])
AC_ARG_ENABLE([resource-monitor],
[ --enable-resource-monitor : Enabling Resource Monitor ],
[ default="no" ])
# OS detection
# THIS SECTION MUST DIE!
CP=cp
OS_LDL="-ldl"
have_linux="no"
case "$host_os" in
*linux*)
AC_DEFINE_UNQUOTED([BOOTH_LINUX], [1],
[Compiling for Linux platform])
OS_CFLAGS=""
OS_CPPFLAGS="-D_GNU_SOURCE"
OS_LDFLAGS=""
OS_DYFLAGS="-rdynamic"
DARWIN_OPTS=""
have_linux="yes"
;;
darwin*)
AC_DEFINE_UNQUOTED([BOOTH_DARWIN], [1],
[Compiling for Darwin platform])
CP=rsync
OS_CFLAGS=""
OS_CPPFLAGS=""
OS_LDFLAGS=""
OS_DYFLAGS=""
DARWIN_OPTS="-dynamiclib -bind_at_load \
-current_version ${SONAME} \
-compatibility_version ${SONAME} -install_name \$(libdir)/\$(@)"
AC_DEFINE_UNQUOTED([MAP_ANONYMOUS], [MAP_ANON],
[Shared memory define for Darwin platform])
AC_DEFINE_UNQUOTED([PATH_MAX], [4096],
[Number of chars in a path name including nul])
AC_DEFINE_UNQUOTED([NAME_MAX], [255],
[Number of chars in a file name])
;;
*bsd*)
AC_DEFINE_UNQUOTED([BOOTH_BSD], [1],
[Compiling for BSD platform])
AC_DEFINE_UNQUOTED([MAP_ANONYMOUS], [MAP_ANON],
[Shared memory define for Darwin platform])
OS_CFLAGS=""
OS_CPPFLAGS="-I/usr/local/include"
OS_LDFLAGS="-L/usr/local/lib"
OS_DYFLAGS="-export-dynamic"
DARWIN_OPTS=""
OS_LDL=""
case "$host_os" in
*freebsd[[234567]]*)
;;
*freebsd*)
AC_DEFINE_UNQUOTED([BOOTH_FREEBSD_GE_8], [1],
[Compiling for FreeBSD >= 8 platform])
;;
esac
;;
*solaris*)
AC_DEFINE_UNQUOTED([BOOTH_SOLARIS], [1],
[Compiling for Solaris platform])
AC_DEFINE_UNQUOTED([TS_CLASS], [1],
[Prevent being scheduled RR])
AC_DEFINE_UNQUOTED([_SEM_SEMUN_UNDEFINED], [1],
[The semun structure is undefined])
CP=rsync
OS_CFLAGS=""
OS_CPPFLAGS="-D_REENTRANT"
OS_LDFLAGS=""
OS_DYFLAGS="-Wl,-z,lazyload"
DARWIN_OPTS=""
SOLARIS_OPTS=" "
;;
*)
AC_MSG_ERROR([Unsupported OS? hmmmm])
;;
esac
AC_SUBST(CP)
# *FLAGS handling goes here
ENV_CFLAGS="$CFLAGS"
ENV_CPPFLAGS="$CPPFLAGS"
ENV_LDFLAGS="$LDFLAGS"
# debug build stuff
if test "x${enable_debug}" = xyes; then
AC_DEFINE_UNQUOTED([DEBUG], [1], [Compiling Debugging code])
OPT_CFLAGS="-O0"
PACKAGE_FEATURES="$PACKAGE_FEATURES debug"
else
OPT_CFLAGS="-O3"
fi
# gdb flags
if test "x${GCC}" = xyes; then
GDB_FLAGS="-ggdb3"
else
GDB_FLAGS="-g"
fi
+dnl Check for POSIX clock_gettime
+dnl
+AC_CACHE_CHECK([have clock_gettime],ac_cv_HAVE_CLOCK_GETTIME,[
+AC_TRY_COMPILE([
+#include <time.h>
+],
+[ struct timespec tv; clock_gettime(CLOCK_REALTIME, &tv); return 0;],
+ac_cv_HAVE_CLOCK_GETTIME=yes,ac_cv_HAVE_CLOCK_GETTIME=no,ac_cv_HAVE_CLOCK_GETTIME=cross)])
+AM_CONDITIONAL(BUILD_TIMER_C, test x"$ac_cv_HAVE_CLOCK_GETTIME" = x"yes")
+
# extra warnings
EXTRA_WARNINGS=""
WARNLIST="
all
shadow
missing-prototypes
missing-declarations
strict-prototypes
declaration-after-statement
pointer-arith
write-strings
bad-function-cast
missing-format-attribute
format=2
format-security
format-nonliteral
no-long-long
unsigned-char
gnu89-inline
no-strict-aliasing
"
for j in $WARNLIST; do
if cc_supports_flag -W$j; then
EXTRA_WARNINGS="$EXTRA_WARNINGS -W$j";
fi
done
if test "x${enable_coverage}" = xyes && \
cc_supports_flag -ftest-coverage && \
cc_supports_flag -fprofile-arcs ; then
AC_MSG_NOTICE([Enabling Coverage (enable -O0 by default)])
OPT_CFLAGS="-O0"
COVERAGE_CFLAGS="-ftest-coverage -fprofile-arcs"
COVERAGE_LDFLAGS="-ftest-coverage -fprofile-arcs"
COVERAGE_LCRSO_EXTRA_LDFLAGS="-rdynamic"
PACKAGE_FEATURES="$PACKAGE_FEATURES coverage"
else
COVERAGE_CFLAGS=""
COVERAGE_LDFLAGS=""
COVERAGE_LCRSO_EXTRA_LDFLAGS=""
fi
if test "x${enable_small_memory_footprint}" = xyes ; then
AC_DEFINE_UNQUOTED([HAVE_SMALL_MEMORY_FOOTPRINT], 1, [have small_memory_footprint])
PACKAGE_FEATURES="$PACKAGE_FEATURES small-memory-footprint"
fi
if test "x${enable_ansi}" = xyes && \
cc_supports_flag -std=iso9899:199409 ; then
AC_MSG_NOTICE([Enabling ANSI Compatibility])
ANSI_CPPFLAGS="-ansi -D_GNU_SOURCE -DANSI_ONLY"
PACKAGE_FEATURES="$PACKAGE_FEATURES ansi"
else
ANSI_CPPFLAGS=""
fi
if test "x${enable_fatal_warnings}" = xyes && \
cc_supports_flag -Werror ; then
AC_MSG_NOTICE([Enabling Fatal Warnings (-Werror)])
WERROR_CFLAGS="-Werror"
PACKAGE_FEATURES="$PACKAGE_FEATURES fatal-warnings"
else
WERROR_CFLAGS=""
fi
# don't add addtional cflags
if test "x${enable_user_flags}" = xyes; then
OPT_CFLAGS=""
GDB_FLAGS=""
EXTRA_WARNINGS=""
fi
# final build of *FLAGS
CFLAGS="$ENV_CFLAGS $OPT_CFLAGS $GDB_FLAGS $OS_CFLAGS \
$COVERAGE_CFLAGS $EXTRA_WARNINGS $WERROR_CFLAGS $NSS_CFLAGS"
CPPFLAGS="$ENV_CPPFLAGS $ANSI_CPPFLAGS $OS_CPPFLAGS $GLIB_CFLAGS $RESMON_CFLAGS"
LDFLAGS="$ENV_LDFLAGS $COVERAGE_LDFLAGS $OS_LDFLAGS"
# substitute what we need:
AC_SUBST([INITDDIR])
AC_SUBST([COVERAGE_LCRSO_EXTRA_LDFLAGS])
AC_SUBST([OS_DYFLAGS])
AC_SUBST([OS_LDL])
AM_CONDITIONAL(BUILD_DARWIN, test -n "${DARWIN_OPTS}")
AM_CONDITIONAL(BUILD_SOLARIS, test -n "${SOLARIS_OPTS}")
AC_SUBST([DARWIN_OPTS])
AC_SUBST([SOLARIS_OPTS])
AM_CONDITIONAL(BUILD_HTML_DOCS, test -n "${GROFF}")
AC_SUBST([LINT_FLAGS])
AC_DEFINE_UNQUOTED([LCRSODIR], "$(eval echo ${LCRSODIR})", [LCRSO directory])
AC_DEFINE_UNQUOTED([SOCKETDIR], "$(eval echo ${SOCKETDIR})", [Socket directory])
AC_DEFINE_UNQUOTED([LOCALSTATEDIR], "$(eval echo ${localstatedir})", [localstate directory])
BOOTHSYSCONFDIR=${sysconfdir}/booth
AC_SUBST([HAVE_LOG_CIB_DIFF])
AC_SUBST([HAVE_XML_LOG_PATCHSET])
AC_SUBST([BOOTHSYSCONFDIR])
AC_DEFINE_UNQUOTED([BOOTHSYSCONFDIR], "$(eval echo ${BOOTHSYSCONFDIR})", [booth config directory])
AC_DEFINE_UNQUOTED([PACKAGE_FEATURES], "${PACKAGE_FEATURES}", [booth built-in features])
AC_OUTPUT
AC_MSG_RESULT([])
AC_MSG_RESULT([$PACKAGE configuration:])
AC_MSG_RESULT([ Version = ${VERSION}])
AC_MSG_RESULT([ Prefix = ${prefix}])
AC_MSG_RESULT([ Executables = ${sbindir}])
AC_MSG_RESULT([ Man pages = ${mandir}])
AC_MSG_RESULT([ Doc dir = ${docdir}])
AC_MSG_RESULT([ Libraries = ${libdir}])
AC_MSG_RESULT([ Header files = ${includedir}])
AC_MSG_RESULT([ Arch-independent files = ${datadir}])
AC_MSG_RESULT([ State information = ${localstatedir}])
AC_MSG_RESULT([ System configuration = ${sysconfdir}])
AC_MSG_RESULT([ System init.d directory = ${INITDDIR}])
AC_MSG_RESULT([ booth config dir = ${BOOTHSYSCONFDIR}])
AC_MSG_RESULT([ SOCKETDIR = ${SOCKETDIR}])
AC_MSG_RESULT([ Features =${PACKAGE_FEATURES}])
AC_MSG_RESULT([])
AC_MSG_RESULT([$PACKAGE build info:])
AC_MSG_RESULT([ Library SONAME = ${SONAME}])
LIB_MSG_RESULT(m4_shift(local_soname_list))dnl
AC_MSG_RESULT([ Default optimization = ${OPT_CFLAGS}])
AC_MSG_RESULT([ Default debug options = ${GDB_CFLAGS}])
AC_MSG_RESULT([ Extra compiler warnings = ${EXTRA_WARNING}])
AC_MSG_RESULT([ Env. defined CFLAG = ${ENV_CFLAGS}])
AC_MSG_RESULT([ Env. defined CPPFLAGS = ${ENV_CPPFLAGS}])
AC_MSG_RESULT([ Env. defined LDFLAGS = ${ENV_LDFLAGS}])
AC_MSG_RESULT([ OS defined CFLAGS = ${OS_CFLAGS}])
AC_MSG_RESULT([ OS defined CPPFLAGS = ${OS_CPPFLAGS}])
AC_MSG_RESULT([ OS defined LDFLAGS = ${OS_LDFLAGS}])
AC_MSG_RESULT([ OS defined LDL = ${OS_LDL}])
AC_MSG_RESULT([ OS defined DYFLAGS = ${OS_DYFLAGS}])
AC_MSG_RESULT([ ANSI defined CPPFLAGS = ${ANSI_CPPFLAGS}])
AC_MSG_RESULT([ Coverage CFLAGS = ${COVERAGE_CFLAGS}])
AC_MSG_RESULT([ Coverage LDFLAGS = ${COVERAGE_LDFLAGS}])
AC_MSG_RESULT([ Fatal War. CFLAGS = ${WERROR_CFLAGS}])
AC_MSG_RESULT([ Final CFLAGS = ${CFLAGS}])
AC_MSG_RESULT([ Final CPPFLAGS = ${CPPFLAGS}])
AC_MSG_RESULT([ Final LDFLAGS = ${LDFLAGS}])
diff --git a/src/Makefile.am b/src/Makefile.am
index b3b60dd..b43e511 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -1,21 +1,25 @@
MAINTAINERCLEANFILES = Makefile.in
AM_CFLAGS = -fPIC -Werror -funsigned-char -Wno-pointer-sign
AM_CPPFLAGS = -I$(top_builddir)/include
sbin_PROGRAMS = boothd
boothd_SOURCES = config.c main.c raft.c ticket.c transport.c \
pacemaker.c handler.c
+if BUILD_TIMER_C
+boothd_SOURCES += timer.c
+endif
+
boothd_LDFLAGS = $(OS_DYFLAGS) -L./
boothd_LDADD = -lplumb -lplumbgpl -lz -lm
boothd_CPPFLAGS = $(GLIB_CFLAGS)
noinst_HEADERS = booth.h pacemaker.h \
config.h log.h raft.h ticket.h transport.h handler.h
lint:
-splint $(INCLUDES) $(LINT_FLAGS) $(CFLAGS) *.c
diff --git a/src/config.h b/src/config.h
index 48159a8..e8ef069 100644
--- a/src/config.h
+++ b/src/config.h
@@ -1,242 +1,235 @@
/*
* Copyright (C) 2011 Jiaju Zhang <jjzhang@suse.de>
* Copyright (C) 2013-2014 Philipp Marek <philipp.marek@linbit.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef _CONFIG_H
#define _CONFIG_H
#include <stdint.h>
#include "booth.h"
+#include "timer.h"
#include "raft.h"
#include "transport.h"
/** @{ */
/** Definitions for in-RAM data. */
#define MAX_NODES 16
#define TICKET_ALLOC 16
struct ticket_config {
/** \name Configuration items.
* @{ */
/** Name of ticket. */
boothc_ticket name;
/** How many seconds a term lasts (if not refreshed). */
int term_duration;
/** Network related timeouts. */
int timeout;
/** Retries before giving up. */
int retries;
/** If >0, time to wait for a site to get fenced.
* The ticket may be acquired after that timespan by
* another site. */
int acquire_after; /* TODO: needed? */
/* Program to ask whether it makes sense to
* acquire the ticket */
char *ext_verifier;
/** Node weights. */
int weight[MAX_NODES];
/** @} */
/** \name Runtime values.
* @{ */
/** Current state. */
server_state_e state;
/** Next state. Used at startup. */
server_state_e next_state;
/** When something has to be done */
- struct timeval next_cron;
+ timetype next_cron;
/** Current leader. This is effectively the log[] in Raft. */
struct booth_site *leader;
/** Leader that got lost. */
struct booth_site *lost_leader;
/** Is the ticket granted? */
int is_granted;
/** Timestamp of leadership expiration */
time_t term_expires;
/** End of election period */
time_t election_end;
struct booth_site *voted_for;
/** Who the various sites vote for.
* NO_OWNER = no vote yet. */
struct booth_site *votes_for[MAX_NODES];
/* bitmap */
uint64_t votes_received;
/** Last voting round that was seen. */
uint32_t current_term;
/** Do ticket updates whenever we get enough heartbeats.
* But do that only once.
* This is reset to 0 whenever we broadcast heartbeat and set
* to 1 once enough acks are received.
* Increased to 2 when the ticket is commited to the CIB (see
* delay_commit).
*/
uint32_t ticket_updated;
/** @} */
/** */
uint32_t last_applied;
uint32_t next_index[MAX_NODES];
uint32_t match_index[MAX_NODES];
/* Why did we start the elections?
*/
cmd_reason_t election_reason;
/* if it is potentially dangerous to grant the ticket
* immediately, then this is set to some point in time,
* usually (now + term_duration + acquire_after)
*/
time_t delay_commit;
/* the last request RPC we sent
*/
uint32_t last_request;
/* if we expect some acks, then set this to the id of
* the RPC which others will send us; it is cleared once all
* replies were received
*/
uint32_t acks_expected;
/* bitmask of servers which sent acks
*/
uint64_t acks_received;
/* timestamp of the request, currently unused */
time_t req_sent_at;
/* we need to wait for MY_INDEX from other servers,
* hold the ticket processing for a while until they reply
*/
int start_postpone;
/* Do we need to update the copy in the CIB?
* Normally, the ticket is written only when it changes via
* the UPDATE RPC (for followers) and on expiration update
* (for leaders)
*/
int update_cib;
/* Is this ticket in election?
*/
int in_election;
/* don't log warnings unnecessarily
*/
int expect_more_rejects;
/** \name Needed while proposals are being done.
* @{ */
/* Need to keep the previous valid ticket in case we moved to
* start new elections and another server asks for the ticket
* status. It would be wrong to send our candidate ticket.
*/
struct ticket_config *last_valid_tk;
/** Whom to vote for the next time.
* Needed to push a ticket to someone else. */
#if 0
/** Bitmap of sites that acknowledge that state. */
uint64_t proposal_acknowledges;
/** When an incompletely acknowledged proposal gets done.
* If all peers agree, that happens sooner.
* See switch_state_to(). */
struct timeval proposal_switch;
/** Timestamp of proposal expiration. */
time_t proposal_expires;
#endif
/** Number of send retries left.
* Used on the new owner.
* Starts at 0, counts up. */
int retry_number;
/** @} */
};
struct booth_config {
char name[BOOTH_NAME_LEN];
transport_layer_t proto;
uint16_t port;
/** Stores the OR of sites bitmasks. */
uint64_t sites_bits;
/** Stores the OR of all members' bitmasks. */
uint64_t all_bits;
char site_user[BOOTH_NAME_LEN];
char site_group[BOOTH_NAME_LEN];
char arb_user[BOOTH_NAME_LEN];
char arb_group[BOOTH_NAME_LEN];
uid_t uid;
gid_t gid;
int site_count;
struct booth_site site[MAX_NODES];
int ticket_count;
int ticket_allocated;
struct ticket_config *ticket;
};
extern struct booth_config *booth_conf;
int read_config(const char *path, int type);
int check_config(int type);
int find_site_by_name(unsigned char *site, struct booth_site **node, int any_type);
int find_site_by_id(uint32_t site_id, struct booth_site **node);
const char *type_to_string(int type);
-
-#include <stdio.h>
-#define R(tk_) do { if (ANYDEBUG) printf("## %12s:%3d state %s, %d, " \
- "leader %s, exp %s", __FILE__, __LINE__, \
- state_to_string(tk_->state), tk_->current_term, \
- site_string(tk_->leader), ctime(&tk_->term_expires)); } while(0)
-
-
#endif /* _CONFIG_H */
diff --git a/src/handler.c b/src/handler.c
index 885997c..cdfae4d 100644
--- a/src/handler.c
+++ b/src/handler.c
@@ -1,70 +1,70 @@
/*
* Copyright (C) 2014 Philipp Marek <philipp.marek@linbit.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <arpa/inet.h>
#include <inttypes.h>
#include <stdio.h>
#include <assert.h>
#include <time.h>
#include "ticket.h"
#include "config.h"
#include "inline-fn.h"
#include "log.h"
#include "pacemaker.h"
#include "booth.h"
#include "handler.h"
/** Runs an external handler.
* See eg. 'before-acquire-handler'.
* TODO: timeout, async operation?. */
int run_handler(struct ticket_config *tk,
const char *cmd, int synchronous)
{
int rv;
char expires[16];
if (!cmd)
return 0;
assert(synchronous);
- sprintf(expires, "%" PRId64, (int64_t)(tk->term_expires));
+ sprintf(expires, "%" PRId64, (int64_t)wall_ts(tk->term_expires));
rv = setenv("BOOTH_TICKET", tk->name, 1) ||
setenv("BOOTH_LOCAL", local->addr_string, 1) ||
setenv("BOOTH_CONF_NAME", booth_conf->name, 1) ||
setenv("BOOTH_CONF_PATH", cl.configfile, 1) ||
setenv("BOOTH_TICKET_EXPIRES", expires, 1);
if (rv) {
log_error("Cannot set environment: %s", strerror(errno));
} else {
rv = system(cmd);
if (rv)
tk_log_warn("handler \"%s\" exited with error %s",
cmd, interpret_rv(rv));
else
tk_log_debug("handler \"%s\" exited with success", cmd);
}
return rv;
}
diff --git a/src/inline-fn.h b/src/inline-fn.h
index 6d64461..2721782 100644
--- a/src/inline-fn.h
+++ b/src/inline-fn.h
@@ -1,325 +1,296 @@
/*
* Copyright (C) 2013-2014 Philipp Marek <philipp.marek@linbit.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef _INLINE_FN_H
#define _INLINE_FN_H
#include <time.h>
#include <sys/time.h>
#include <assert.h>
#include <string.h>
+#include "timer.h"
#include "config.h"
#include "transport.h"
inline static uint32_t get_local_id(void)
{
return local ? local->site_id : -1;
}
inline static uint32_t get_node_id(struct booth_site *node)
{
return node ? node->site_id : NO_ONE;
}
inline static int term_time_left(const struct ticket_config *tk)
{
int left;
- left = tk->term_expires - time(NULL);
+ left = tk->term_expires - get_secs(NULL);
return (left < 0) ? 0 : left;
}
/** Returns number of seconds left, if any. */
inline static int leader_and_valid(const struct ticket_config *tk)
{
if (tk->leader != local)
return 0;
return term_time_left(tk);
}
/** Is this some leader? */
inline static int is_owned(const struct ticket_config *tk)
{
return (tk->leader && tk->leader != no_leader);
}
static inline void init_header_bare(struct boothc_header *h) {
h->magic = htonl(BOOTHC_MAGIC);
h->version = htonl(BOOTHC_VERSION);
h->from = htonl(local->site_id);
h->iv = htonl(0);
h->auth1 = htonl(0);
h->auth2 = htonl(0);
}
static inline void init_header(struct boothc_header *h,
int cmd, int request, int options,
int result, int reason, int data_len)
{
init_header_bare(h);
h->length = htonl(data_len);
h->cmd = htonl(cmd);
h->request = htonl(request);
h->options = htonl(options);
h->result = htonl(result);
h->reason = htonl(reason);
}
static inline void init_ticket_site_header(struct boothc_ticket_msg *msg, int cmd)
{
init_header(&msg->header, cmd, 0, 0, 0, 0, sizeof(*msg));
}
#define my_last_term(tk) \
(((tk)->state == ST_CANDIDATE && (tk)->last_valid_tk->current_term) ? \
(tk)->last_valid_tk->current_term : (tk)->current_term)
static inline void init_ticket_msg(struct boothc_ticket_msg *msg,
int cmd, int request, int rv, int reason,
struct ticket_config *tk)
{
assert(sizeof(msg->ticket.id) == sizeof(tk->name));
init_header(&msg->header, cmd, request, 0, rv, reason, sizeof(*msg));
if (!tk) {
memset(&msg->ticket, 0, sizeof(msg->ticket));
} else {
memcpy(msg->ticket.id, tk->name, sizeof(msg->ticket.id));
msg->ticket.leader = htonl(get_node_id(
(tk->leader && tk->leader != no_leader) ? tk->leader : tk->voted_for));
msg->ticket.term = htonl(tk->current_term);
msg->ticket.term_valid_for = htonl(term_time_left(tk));
}
}
static inline struct booth_transport const *transport(void)
{
return booth_transport + booth_conf->proto;
}
static inline const char *site_string(struct booth_site *site)
{
return site ? site->addr_string : "NONE";
}
static inline const char *ticket_leader_string(struct ticket_config *tk)
{
return site_string(tk->leader);
}
static inline void disown_ticket(struct ticket_config *tk)
{
tk->leader = NULL;
tk->is_granted = 0;
- time(&tk->term_expires);
+ get_secs(&tk->term_expires);
}
static inline int disown_if_expired(struct ticket_config *tk)
{
- if (time(NULL) >= tk->term_expires ||
+ if (get_secs(NULL) >= tk->term_expires ||
!tk->leader) {
disown_ticket(tk);
return 1;
}
return 0;
}
/* We allow half of the uint32_t to be used;
* half of that below, half of that above the current known "good" value.
* 0 UINT32_MAX
* |--------------------------+----------------+------------|
* | | |
* |--------+-------| allowed range
* |
* current commit index
*
* So, on overflow it looks like that:
* UINT32_MAX 0
* |--------------------------+-----------||---+------------|
* | | |
* |--------+-------| allowed range
* |
* current commit index
*
* This should be possible by using the same datatype and relying
* on the under/overflow semantics.
*
*
* Having 30 bits available, and assuming an expire time of
* one minute and a (high) commit index step of 64 == 2^6 (because
* of weights), we get 2^24 minutes of range - which is ~750
* years. "Should be enough for everybody."
*/
static inline int index_is_higher_than(uint32_t c_high, uint32_t c_low)
{
uint32_t diff;
if (c_high == c_low)
return 0;
diff = c_high - c_low;
if (diff < UINT32_MAX/4)
return 1;
diff = c_low - c_high;
if (diff < UINT32_MAX/4)
return 0;
assert(!"commit index out of range - invalid");
}
static inline uint32_t index_max2(uint32_t a, uint32_t b)
{
return index_is_higher_than(a, b) ? a : b;
}
static inline uint32_t index_max3(uint32_t a, uint32_t b, uint32_t c)
{
return index_max2( index_max2(a, b), c);
}
-static inline double timeval_to_float(struct timeval tv)
-{
- return tv.tv_sec + tv.tv_usec*(double)1.0e-6;
-}
-
-static inline int timeval_msec(struct timeval tv)
-{
- int m;
-
- m = tv.tv_usec / 1000;
- if (m >= 1000)
- m = 999;
- return m;
-}
-
-
-static inline int timeval_compare(struct timeval tv1, struct timeval tv2)
-{
- if (tv1.tv_sec < tv2.tv_sec)
- return -1;
- if (tv1.tv_sec > tv2.tv_sec)
- return +1;
- if (tv1.tv_usec < tv2.tv_usec)
- return -1;
- if (tv1.tv_usec > tv2.tv_usec)
- return +1;
- return 0;
-}
-
-
static inline time_t next_vote_starts_at(struct ticket_config *tk)
{
time_t half_exp, retries_needed, t;
/* If not owner, don't renew. */
if (tk->leader != local)
return 0;
/* Try to renew at half of expiry time. */
half_exp = tk->term_expires - tk->term_duration/2;
/* Also start renewal if we couldn't get
* a few message retransmission in the alloted
* expiry time. */
retries_needed = tk->term_expires - tk->timeout * tk->retries/2;
/* Return earlier timestamp. */
t = min(half_exp, retries_needed);
return t;
}
static inline int should_start_renewal(struct ticket_config *tk)
{
time_t now, when;
when = next_vote_starts_at(tk);
if (!when)
return 0;
- time(&now);
+ get_secs(&now);
return when <= now;
}
static inline void expect_replies(struct ticket_config *tk,
int reply_type)
{
tk->retry_number = 0;
tk->acks_expected = reply_type;
tk->acks_received = local->bitmask;
- tk->req_sent_at = time(NULL);
+ tk->req_sent_at = get_secs(NULL);
tk->ticket_updated = 0;
}
static inline void no_resends(struct ticket_config *tk)
{
tk->retry_number = 0;
tk->acks_expected = 0;
}
static inline struct booth_site *my_vote(struct ticket_config *tk)
{
return tk->votes_for[ local->index ];
}
static inline int count_bits(uint64_t val) {
return __builtin_popcount(val);
}
static inline int majority_of_bits(struct ticket_config *tk, uint64_t val)
{
/* Use ">" to get majority decision, even for an even number
* of participants. */
return count_bits(val) * 2 >
booth_conf->site_count;
}
static inline int all_replied(struct ticket_config *tk)
{
return !(tk->acks_received ^ booth_conf->all_bits);
}
static inline int all_sites_replied(struct ticket_config *tk)
{
return !((tk->acks_received & booth_conf->sites_bits) ^ booth_conf->sites_bits);
}
#endif
diff --git a/src/main.c b/src/main.c
index 376cf6a..cb60927 100644
--- a/src/main.c
+++ b/src/main.c
@@ -1,1367 +1,1367 @@
/*
* Copyright (C) 2011 Jiaju Zhang <jjzhang@suse.de>
* Copyright (C) 2013-2014 Philipp Marek <philipp.marek@linbit.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <sched.h>
#include <errno.h>
#include <limits.h>
#include <sys/file.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <sys/poll.h>
#include <pacemaker/crm/services.h>
#include <clplumbing/setproctitle.h>
#include <sys/prctl.h>
#include <clplumbing/coredumps.h>
#include <fcntl.h>
#include <string.h>
#include <assert.h>
#include <error.h>
#include <sys/ioctl.h>
#include <termios.h>
#include <signal.h>
#include <netdb.h>
#include <arpa/inet.h>
#include <sys/types.h>
#include "log.h"
#include "booth.h"
#include "config.h"
#include "transport.h"
#include "inline-fn.h"
#include "pacemaker.h"
#include "ticket.h"
#define RELEASE_VERSION "0.2.0"
#define CLIENT_NALLOC 32
int daemonize = 0;
int enable_stderr = 0;
time_t start_time;
/** Structure for "clients".
* Filehandles with incoming data get registered here (and in pollfds),
* along with their callbacks.
* Because these can be reallocated with every new fd, addressing
* happens _only_ by their numeric index. */
struct client *clients = NULL;
struct pollfd *pollfds = NULL;
static int client_maxi;
static int client_size = 0;
static const struct booth_site _no_leader = {
.addr_string = "none",
.site_id = NO_ONE,
};
struct booth_site *no_leader = (struct booth_site*)& _no_leader;
typedef enum
{
BOOTHD_STARTED=0,
BOOTHD_STARTING
} BOOTH_DAEMON_STATE;
int poll_timeout = POLL_TIMEOUT;
struct booth_config *booth_conf;
struct command_line cl;
int do_read(int fd, void *buf, size_t count)
{
int rv, off = 0;
while (off < count) {
rv = read(fd, (char *)buf + off, count - off);
if (rv == 0)
return -1;
if (rv == -1 && errno == EINTR)
continue;
if (rv == -1)
return -1;
off += rv;
}
return 0;
}
int do_write(int fd, void *buf, size_t count)
{
int rv, off = 0;
retry:
rv = write(fd, (char *)buf + off, count);
if (rv == -1 && errno == EINTR)
goto retry;
/* If we cannot write _any_ data, we'd be in an (potential) loop. */
if (rv <= 0) {
log_error("write failed: %s (%d)", strerror(errno), errno);
return rv;
}
if (rv != count) {
count -= rv;
off += rv;
goto retry;
}
return 0;
}
static void client_alloc(void)
{
int i;
if (!clients) {
clients = malloc(CLIENT_NALLOC * sizeof(struct client));
pollfds = malloc(CLIENT_NALLOC * sizeof(struct pollfd));
} else {
clients = realloc(clients, (client_size + CLIENT_NALLOC) *
sizeof(struct client));
pollfds = realloc(pollfds, (client_size + CLIENT_NALLOC) *
sizeof(struct pollfd));
}
if (!clients || !pollfds) {
log_error("can't alloc for client array");
exit(1);
}
for (i = client_size; i < client_size + CLIENT_NALLOC; i++) {
clients[i].workfn = NULL;
clients[i].deadfn = NULL;
clients[i].fd = -1;
pollfds[i].fd = -1;
pollfds[i].revents = 0;
}
client_size += CLIENT_NALLOC;
}
static void client_dead(int ci)
{
if (clients[ci].fd != -1)
close(clients[ci].fd);
clients[ci].fd = -1;
clients[ci].workfn = NULL;
pollfds[ci].fd = -1;
}
int client_add(int fd, const struct booth_transport *tpt,
void (*workfn)(int ci),
void (*deadfn)(int ci))
{
int i;
struct client *c;
if (client_size + 2 >= client_maxi ) {
client_alloc();
}
for (i = 0; i < client_size; i++) {
c = clients + i;
if (c->fd != -1)
continue;
c->workfn = workfn;
if (deadfn)
c->deadfn = deadfn;
else
c->deadfn = client_dead;
c->transport = tpt;
c->fd = fd;
pollfds[i].fd = fd;
pollfds[i].events = POLLIN;
if (i > client_maxi)
client_maxi = i;
return i;
}
assert(!"no client");
}
/* Only used for client requests, TCP ???*/
void process_connection(int ci)
{
struct boothc_ticket_msg msg;
int rv, len, expr, fd;
void (*deadfn) (int ci);
fd = clients[ci].fd;
rv = do_read(fd, &msg.header, sizeof(msg.header));
if (rv < 0) {
if (errno == ECONNRESET)
log_debug("client %d connection reset for fd %d",
ci, clients[ci].fd);
goto kill;
}
if (check_boothc_header(&msg.header, -1) < 0)
goto kill;
/* Basic sanity checks already done. */
len = ntohl(msg.header.length);
if (len) {
if (len != sizeof(msg)) {
bad_len:
log_error("got wrong length %u", len);
return;
}
expr = len - sizeof(msg.header);
rv = do_read(clients[ci].fd, msg.header.data, expr);
if (rv < 0) {
log_error("connection %d read data error %d, wanted %d",
ci, rv, expr);
goto kill;
}
}
/* For CMD_GRANT and CMD_REVOKE:
* Don't close connection immediately, but send
* result a second later? */
switch (ntohl(msg.header.cmd)) {
case CMD_LIST:
ticket_answer_list(fd, &msg);
goto kill;
case CMD_GRANT:
/* Expect boothc_ticket_site_msg. */
if (len != sizeof(msg))
goto bad_len;
ticket_answer_grant(fd, &msg);
goto kill;
case CMD_REVOKE:
/* Expect boothc_ticket_site_msg. */
if (len != sizeof(msg))
goto bad_len;
ticket_answer_revoke(fd, &msg);
goto kill;
default:
log_error("connection %d cmd %x unknown",
ci, ntohl(msg.header.cmd));
init_header(&msg.header,CMR_GENERAL, 0, 0, RLT_INVALID_ARG, 0, sizeof(msg.header));
send_header_only(fd, &msg.header);
goto kill;
}
assert(0);
return;
kill:
deadfn = clients[ci].deadfn;
if(deadfn) {
deadfn(ci);
}
return;
}
/** Callback function for the listening TCP socket. */
static void process_listener(int ci)
{
int fd, i;
fd = accept(clients[ci].fd, NULL, NULL);
if (fd < 0) {
log_error("process_listener: accept error for fd %d: %s (%d)",
clients[ci].fd, strerror(errno), errno);
if (clients[ci].deadfn)
clients[ci].deadfn(ci);
return;
}
i = client_add(fd, clients[ci].transport, process_connection, NULL);
log_debug("add client connection %d fd %d", i, fd);
}
static int setup_config(int type)
{
int rv;
rv = read_config(cl.configfile, type);
if (rv < 0)
goto out;
/* Set "local" pointer, ignoring errors. */
if (cl.type == DAEMON && cl.site[0]) {
if (!find_site_by_name(cl.site, &local, 1)) {
log_error("Cannot find \"%s\" in the configuration.",
cl.site);
return -EINVAL;
}
local->local = 1;
} else
find_myself(NULL, type == CLIENT);
rv = check_config(type);
if (rv < 0)
goto out;
/* Per default the PID file name is derived from the
* configuration name. */
if (!cl.lockfile[0]) {
snprintf(cl.lockfile, sizeof(cl.lockfile)-1,
"%s/%s.pid", BOOTH_RUN_DIR, booth_conf->name);
}
out:
return rv;
}
static int setup_transport(void)
{
int rv;
rv = transport()->init(message_recv);
if (rv < 0) {
log_error("failed to init booth_transport %s", transport()->name);
goto out;
}
rv = booth_transport[TCP].init(NULL);
if (rv < 0) {
log_error("failed to init booth_transport[TCP]");
goto out;
}
out:
return rv;
}
static int write_daemon_state(int fd, int state)
{
char buffer[1024];
int rv, size;
size = sizeof(buffer) - 1;
rv = snprintf(buffer, size,
"booth_pid=%d "
"booth_state=%s "
"booth_type=%s "
"booth_cfg_name='%s' "
"booth_id=%d "
"booth_addr_string='%s' "
"booth_port=%d\n",
getpid(),
( state == BOOTHD_STARTED ? "started" :
state == BOOTHD_STARTING ? "starting" :
"invalid"),
type_to_string(local->type),
booth_conf->name,
local->site_id,
local->addr_string,
booth_conf->port);
if (rv < 0 || rv == size) {
log_error("Buffer filled up in write_daemon_state().");
return -1;
}
size = rv;
rv = ftruncate(fd, 0);
if (rv < 0) {
log_error("lockfile %s truncate error %d: %s",
cl.lockfile, errno, strerror(errno));
return rv;
}
rv = lseek(fd, 0, SEEK_SET);
if (rv < 0) {
log_error("lseek set fd(%d) offset to 0 error, return(%d), message(%s)",
fd, rv, strerror(errno));
rv = -1;
return rv;
}
rv = write(fd, buffer, size);
if (rv != size) {
log_error("write to fd(%d, %d) returned %d, errno %d, message(%s)",
fd, size,
rv, errno, strerror(errno));
return -1;
}
return 0;
}
static int loop(int fd)
{
void (*workfn) (int ci);
void (*deadfn) (int ci);
int rv, i;
rv = setup_transport();
if (rv < 0)
goto fail;
rv = setup_ticket();
if (rv < 0)
goto fail;
client_add(local->tcp_fd, booth_transport + TCP,
process_listener, NULL);
rv = write_daemon_state(fd, BOOTHD_STARTED);
if (rv != 0) {
log_error("write daemon state %d to lockfile error %s: %s",
BOOTHD_STARTED, cl.lockfile, strerror(errno));
goto fail;
}
if (cl.type == ARBITRATOR)
log_info("BOOTH arbitrator daemon started");
else if (cl.type == SITE)
log_info("BOOTH cluster site daemon started");
while (1) {
rv = poll(pollfds, client_maxi + 1, poll_timeout);
if (rv == -1 && errno == EINTR)
continue;
if (rv < 0) {
log_error("poll failed: %s (%d)", strerror(errno), errno);
goto fail;
}
for (i = 0; i <= client_maxi; i++) {
if (clients[i].fd < 0)
continue;
if (pollfds[i].revents & POLLIN) {
workfn = clients[i].workfn;
if (workfn)
workfn(i);
}
if (pollfds[i].revents &
(POLLERR | POLLHUP | POLLNVAL)) {
deadfn = clients[i].deadfn;
if (deadfn)
deadfn(i);
}
}
process_tickets();
}
return 0;
fail:
return -1;
}
static int query_get_string_answer(cmd_request_t cmd)
{
struct booth_site *site;
struct boothc_header reply;
char *data;
int data_len;
int rv;
struct booth_transport const *tpt;
data = NULL;
init_header(&cl.msg.header, cmd, 0, cl.options, 0, 0, sizeof(cl.msg));
if (!*cl.site)
site = local;
else if (!find_site_by_name(cl.site, &site, 1)) {
log_error("cannot find site \"%s\"", cl.site);
rv = ENOENT;
goto out;
}
tpt = booth_transport + TCP;
rv = tpt->open(site);
if (rv < 0)
goto out_free;
rv = tpt->send(site, &cl.msg, sizeof(cl.msg));
if (rv < 0)
goto out_free;
rv = tpt->recv(site, &reply, sizeof(reply));
if (rv < 0)
goto out_free;
data_len = ntohl(reply.length) - sizeof(reply);
data = malloc(data_len);
if (!data) {
rv = -ENOMEM;
goto out_free;
}
rv = tpt->recv(site, data, data_len);
if (rv < 0)
goto out_free;
do_write(STDOUT_FILENO, data, data_len);
rv = 0;
out_free:
free(data);
tpt->close(site);
out:
return rv;
}
static int test_reply(int reply_code, cmd_request_t cmd)
{
int rv = 0;
const char *op_str;
if (cmd == CMD_GRANT)
op_str = "grant";
else if (cmd == CMD_REVOKE)
op_str = "revoke";
else {
log_error("internal error reading reply result!");
return -1;
}
switch (reply_code) {
case RLT_OVERGRANT:
log_info("You're granting a granted ticket. "
"If you wanted to migrate a ticket, "
"use revoke first, then use grant.");
rv = -1;
break;
case RLT_TICKET_IDLE:
log_info("ticket is not owned");
rv = 0;
break;
case RLT_ASYNC:
log_info("%s command sent, result will be returned "
"asynchronously. Please use \"booth list\" to "
"see the outcome.", op_str);
rv = 0;
break;
case RLT_SYNC_SUCC:
case RLT_SUCCESS:
log_info("%s succeeded!", op_str);
rv = 0;
break;
case RLT_SYNC_FAIL:
log_info("%s failed!", op_str);
rv = -1;
break;
case RLT_INVALID_ARG:
log_error("ticket \"%s\" does not exist",
cl.msg.ticket.id);
break;
case RLT_EXT_FAILED:
log_error("before-acquire-handler for ticket \"%s\" failed, grant denied",
cl.msg.ticket.id);
break;
case RLT_REDIRECT:
/* talk to another site */
rv = 1;
break;
default:
log_error("got an error code: %x", rv);
rv = -1;
}
return rv;
}
static int do_command(cmd_request_t cmd)
{
struct booth_site *site;
struct boothc_ticket_msg reply;
struct booth_transport const *tpt;
uint32_t leader_id;
int rv;
rv = 0;
site = NULL;
if (!*cl.site)
site = local;
else {
if (!find_site_by_name(cl.site, &site, 1)) {
log_error("Site \"%s\" not configured.", cl.site);
goto out_close;
}
}
if (site->type == ARBITRATOR) {
log_error("Site \"%s\" is an arbitrator, cannot grant/revoke ticket there.", cl.site);
goto out_close;
}
assert(site->type == SITE);
/* We don't check for existence of ticket, so that asking can be
* done without local configuration, too.
* Although, that means that the UDP port has to be specified, too. */
if (!cl.msg.ticket.id[0]) {
/* If the loaded configuration has only a single ticket defined, use that. */
if (booth_conf->ticket_count == 1) {
strcpy(cl.msg.ticket.id, booth_conf->ticket[0].name);
} else {
log_error("No ticket given.");
goto out_close;
}
}
redirect:
init_header(&cl.msg.header, cmd, 0, cl.options, 0, 0, sizeof(cl.msg));
/* Always use TCP for client - at least for now. */
tpt = booth_transport + TCP;
rv = tpt->open(site);
if (rv < 0)
goto out_close;
rv = tpt->send(site, &cl.msg, sizeof(cl.msg));
if (rv < 0)
goto out_close;
rv = tpt->recv(site, &reply, sizeof(reply));
if (rv < 0)
goto out_close;
rv = test_reply(ntohl(reply.header.result), cmd);
if (rv == 1) {
local_transport->close(site);
leader_id = ntohl(reply.ticket.leader);
if (!find_site_by_id(leader_id, &site)) {
log_error("Message with unknown redirect site %x received", leader_id);
return rv;
}
goto redirect;
}
out_close:
if (site)
local_transport->close(site);
return rv;
}
static int do_grant(void)
{
return do_command(CMD_GRANT);
}
static int do_revoke(void)
{
return do_command(CMD_REVOKE);
}
static int _lockfile(int mode, int *fdp, pid_t *locked_by)
{
struct flock lock;
int fd, rv;
/* After reboot the directory may not yet exist.
* Try to create it, but ignore errors. */
if (strncmp(cl.lockfile, BOOTH_RUN_DIR,
strlen(BOOTH_RUN_DIR)) == 0)
mkdir(BOOTH_RUN_DIR, 0775);
if (locked_by)
*locked_by = 0;
*fdp = -1;
fd = open(cl.lockfile, mode, 0664);
if (fd < 0)
return errno;
*fdp = fd;
lock.l_type = F_WRLCK;
lock.l_start = 0;
lock.l_whence = SEEK_SET;
lock.l_len = 0;
lock.l_pid = 0;
if (fcntl(fd, F_SETLK, &lock) == 0)
return 0;
rv = errno;
if (locked_by)
if (fcntl(fd, F_GETLK, &lock) == 0)
*locked_by = lock.l_pid;
return rv;
}
static inline int is_root(void)
{
/* TODO: getuid()? Better way to check? */
return geteuid() == 0;
}
static int create_lockfile(void)
{
int rv, fd;
fd = -1;
rv = _lockfile(O_CREAT | O_WRONLY, &fd, NULL);
if (fd == -1) {
log_error("lockfile %s open error %d: %s",
cl.lockfile, rv, strerror(rv));
return -1;
}
if (rv < 0) {
log_error("lockfile %s setlk error %d: %s",
cl.lockfile, rv, strerror(rv));
goto fail;
}
rv = write_daemon_state(fd, BOOTHD_STARTING);
if (rv != 0) {
log_error("write daemon state %d to lockfile error %s: %s",
BOOTHD_STARTING, cl.lockfile, strerror(errno));
goto fail;
}
if (is_root()) {
if (fchown(fd, booth_conf->uid, booth_conf->gid) < 0)
log_error("fchown() on lockfile said %d: %s",
errno, strerror(errno));
}
return fd;
fail:
close(fd);
return -1;
}
static void unlink_lockfile(int fd)
{
unlink(cl.lockfile);
close(fd);
}
static void print_usage(void)
{
printf("Usages:\n");
printf(" booth daemon [-c config] [-D]\n");
printf(" booth [client] {list|grant|revoke} [options]\n");
printf(" booth status [-c config] [-D]\n");
printf("\n");
printf("Client operations:\n");
printf(" list: List all the tickets\n");
printf(" grant: Grant ticket to site\n");
printf(" revoke: Revoke ticket from site\n");
printf("\n");
printf("Options:\n");
printf(" -c FILE Specify config file [default " BOOTH_DEFAULT_CONF "]\n");
printf(" Can be a path or a name without \".conf\" suffix\n");
printf(" -D Enable debugging to stderr and don't fork\n");
printf(" -S Systemd mode (no forking)\n");
printf(" -t ticket name\n");
printf(" -s site name\n");
printf(" -l LOCKFILE Specify lock file path (daemon only)\n");
printf(" -F Try to grant the ticket immediately (client only)\n");
printf(" -h Print this help, then exit\n");
printf("\n");
printf("Please see the man page for details.\n");
}
#define OPTION_STRING "c:Dl:t:s:FhS"
void safe_copy(char *dest, char *value, size_t buflen, const char *description) {
int content_len = buflen - 1;
if (strlen(value) >= content_len) {
fprintf(stderr, "'%s' exceeds maximum %s length of %d\n",
value, description, content_len);
exit(EXIT_FAILURE);
}
strncpy(dest, value, content_len);
dest[content_len] = 0;
}
static int host_convert(char *hostname, char *ip_str, size_t ip_size)
{
struct addrinfo *result = NULL, hints = {0};
int re = -1;
memset(&hints, 0, sizeof(hints));
hints.ai_family = BOOTH_PROTO_FAMILY;
hints.ai_socktype = SOCK_DGRAM;
re = getaddrinfo(hostname, NULL, &hints, &result);
if (re == 0) {
struct in_addr addr = ((struct sockaddr_in *)result->ai_addr)->sin_addr;
const char *re_ntop = inet_ntop(BOOTH_PROTO_FAMILY, &addr, ip_str, ip_size);
if (re_ntop == NULL) {
re = -1;
}
}
freeaddrinfo(result);
return re;
}
static int read_arguments(int argc, char **argv)
{
int optchar;
char *arg1 = argv[1];
char *op = NULL;
char *cp;
char site_arg[INET_ADDRSTRLEN] = {0};
int left;
if (argc < 2 || !strcmp(arg1, "help") || !strcmp(arg1, "--help") ||
!strcmp(arg1, "-h")) {
print_usage();
exit(EXIT_SUCCESS);
}
if (!strcmp(arg1, "version") || !strcmp(arg1, "--version") ||
!strcmp(arg1, "-V")) {
printf("%s %s (built %s %s)\n",
argv[0], RELEASE_VERSION, __DATE__, __TIME__);
exit(EXIT_SUCCESS);
}
if (strcmp(arg1, "arbitrator") == 0 ||
strcmp(arg1, "site") == 0 ||
strcmp(arg1, "start") == 0 ||
strcmp(arg1, "daemon") == 0) {
cl.type = DAEMON;
optind = 2;
} else if (strcmp(arg1, "status") == 0) {
cl.type = STATUS;
optind = 2;
} else if (strcmp(arg1, "client") == 0) {
cl.type = CLIENT;
if (argc < 3) {
print_usage();
exit(EXIT_FAILURE);
}
op = argv[2];
optind = 3;
} else {
cl.type = CLIENT;
op = argv[1];
optind = 2;
}
if (cl.type == CLIENT) {
if (!strcmp(op, "list"))
cl.op = CMD_LIST;
else if (!strcmp(op, "grant"))
cl.op = CMD_GRANT;
else if (!strcmp(op, "revoke"))
cl.op = CMD_REVOKE;
else {
fprintf(stderr, "client operation \"%s\" is unknown\n",
op);
exit(EXIT_FAILURE);
}
}
while (optind < argc) {
optchar = getopt(argc, argv, OPTION_STRING);
switch (optchar) {
case 'c':
if (strchr(optarg, '/')) {
safe_copy(cl.configfile, optarg,
sizeof(cl.configfile), "config file");
} else {
/* If no "/" in there, use with default directory. */
strcpy(cl.configfile, BOOTH_DEFAULT_CONF_DIR);
cp = cl.configfile + strlen(BOOTH_DEFAULT_CONF_DIR);
assert(cp > cl.configfile);
assert(*(cp-1) == '/');
/* Write at the \0, ie. after the "/" */
safe_copy(cp, optarg,
(sizeof(cl.configfile) -
(cp - cl.configfile) -
strlen(BOOTH_DEFAULT_CONF_EXT)),
"config name");
/* If no extension, append ".conf".
* Space is available, see -strlen() above. */
if (!strchr(cp, '.'))
strcat(cp, BOOTH_DEFAULT_CONF_EXT);
}
break;
case 'D':
debug_level++;
enable_stderr = 1;
/* Fall through */
case 'S':
daemonize = 1;
break;
case 'l':
safe_copy(cl.lockfile, optarg, sizeof(cl.lockfile), "lock file");
break;
case 't':
if (cl.op == CMD_GRANT || cl.op == CMD_REVOKE) {
safe_copy(cl.msg.ticket.id, optarg,
sizeof(cl.msg.ticket.id), "ticket name");
} else {
print_usage();
exit(EXIT_FAILURE);
}
break;
case 's':
/* For testing and debugging: allow "-s site" also for
* daemon start, so that the address that should be used
* can be set manually.
* This makes it easier to start multiple processes
* on one machine. */
if (cl.type == CLIENT ||
(cl.type == DAEMON && debug_level)) {
int re = host_convert(optarg, site_arg, INET_ADDRSTRLEN);
if (re == 0) {
safe_copy(cl.site, site_arg, sizeof(cl.site), "site name");
} else {
safe_copy(cl.site, optarg, sizeof(cl.site), "site name");
}
} else {
log_error("\"-s\" not allowed in daemon mode.");
exit(EXIT_FAILURE);
}
break;
case 'F':
if (cl.type != CLIENT || cl.op != CMD_GRANT) {
log_error("use \"-F\" only for client grant");
exit(EXIT_FAILURE);
}
cl.options |= OPT_IMMEDIATE;
break;
case 'h':
print_usage();
exit(EXIT_SUCCESS);
break;
case ':':
case '?':
fprintf(stderr, "Please use '-h' for usage.\n");
exit(EXIT_FAILURE);
break;
case -1:
/* No more parameters on cmdline, only arguments. */
goto extra_args;
default:
goto unknown;
};
}
return 0;
extra_args:
if (cl.type == CLIENT && !cl.msg.ticket.id[0]) {
/* Use additional argument as ticket name. */
safe_copy(cl.msg.ticket.id,
argv[optind],
sizeof(cl.msg.ticket.id),
"ticket name");
optind++;
}
if (optind == argc)
return 0;
left = argc - optind;
fprintf(stderr, "Superfluous argument%s: %s%s\n",
left == 1 ? "" : "s",
argv[optind],
left == 1 ? "" : "...");
exit(EXIT_FAILURE);
unknown:
fprintf(stderr, "unknown option: %s\n", argv[optind]);
exit(EXIT_FAILURE);
}
static void set_scheduler(void)
{
struct sched_param sched_param;
struct rlimit rlimit;
int rv;
rlimit.rlim_cur = RLIM_INFINITY;
rlimit.rlim_max = RLIM_INFINITY;
setrlimit(RLIMIT_MEMLOCK, &rlimit);
rv = mlockall(MCL_CURRENT | MCL_FUTURE);
if (rv < 0) {
log_error("mlockall failed");
}
rv = sched_get_priority_max(SCHED_RR);
if (rv != -1) {
sched_param.sched_priority = rv;
rv = sched_setscheduler(0, SCHED_RR, &sched_param);
if (rv == -1)
log_error("could not set SCHED_RR priority %d: %s (%d)",
sched_param.sched_priority,
strerror(errno), errno);
} else {
log_error("could not get maximum scheduler priority err %d",
errno);
}
}
static void set_oom_adj(int val)
{
FILE *fp;
fp = fopen("/proc/self/oom_adj", "w");
if (!fp)
return;
fprintf(fp, "%i", val);
fclose(fp);
}
static int do_status(int type)
{
pid_t pid;
int rv, lock_fd, ret;
const char *reason = NULL;
char lockfile_data[1024], *cp;
ret = PCMK_OCF_NOT_RUNNING;
/* TODO: query all, and return quit only if it's _cleanly_ not
* running, ie. _neither_ of port/lockfile/process is available?
*
* Currently a single failure says "not running", even if "only" the
* lockfile has been removed. */
rv = setup_config(type);
if (rv) {
reason = "Error reading configuration.";
ret = PCMK_OCF_UNKNOWN_ERROR;
goto quit;
}
if (!local) {
reason = "No Service IP active here.";
goto quit;
}
rv = _lockfile(O_RDWR, &lock_fd, &pid);
if (rv == 0) {
reason = "PID file not locked.";
goto quit;
}
if (lock_fd == -1) {
reason = "No PID file.";
goto quit;
}
if (pid) {
fprintf(stdout, "booth_lockpid=%d ", pid);
fflush(stdout);
}
rv = read(lock_fd, lockfile_data, sizeof(lockfile_data) - 1);
if (rv < 4) {
reason = "Cannot read lockfile data.";
ret = PCMK_LSB_UNKNOWN_ERROR;
goto quit;
}
lockfile_data[rv] = 0;
if (lock_fd != -1)
close(lock_fd);
/* Make sure it's only a single line */
cp = strchr(lockfile_data, '\r');
if (cp)
*cp = 0;
cp = strchr(lockfile_data, '\n');
if (cp)
*cp = 0;
rv = setup_tcp_listener(1);
if (rv == 0) {
reason = "TCP port not in use.";
goto quit;
}
fprintf(stdout, "booth_lockfile='%s' %s\n",
cl.lockfile, lockfile_data);
if (daemonize)
fprintf(stderr, "Booth at %s port %d seems to be running.\n",
local->addr_string, booth_conf->port);
return 0;
quit:
log_debug("not running: %s", reason);
/* Ie. "DEBUG" */
if (daemonize)
fprintf(stderr, "not running: %s\n", reason);
return ret;
}
static int limit_this_process(void)
{
int rv;
if (!is_root())
return 0;
if (setregid(booth_conf->gid, booth_conf->gid) < 0) {
rv = errno;
log_error("setregid() didn't work: %s", strerror(rv));
return rv;
}
if (setreuid(booth_conf->uid, booth_conf->uid) < 0) {
rv = errno;
log_error("setreuid() didn't work: %s", strerror(rv));
return rv;
}
/* TODO: ulimits? But that would restrict crm_ticket and handler
* scripts, too! */
return 0;
}
static int lock_fd = -1;
static void server_exit(void)
{
int rv;
if (lock_fd >= 0) {
/* We might not be able to delete it, but at least
* make it empty. */
rv = ftruncate(lock_fd, 0);
(void)rv;
unlink_lockfile(lock_fd);
}
log_info("exiting");
}
static void sig_exit_handler(int sig)
{
log_info("caught signal %d", sig);
exit(0);
}
static int do_server(int type)
{
int rv = -1;
static char log_ent[128] = DAEMON_NAME "-";
rv = setup_config(type);
if (rv < 0)
return rv;
if (!local) {
log_error("Cannot find myself in the configuration.");
exit(EXIT_FAILURE);
}
if (!daemonize) {
if (daemon(0, 0) < 0) {
perror("daemon error");
exit(EXIT_FAILURE);
}
}
/* The lockfile must be written to _after_ the call to daemon(), so
* that the lockfile contains the pid of the daemon, not the parent. */
lock_fd = create_lockfile();
if (lock_fd < 0)
return lock_fd;
atexit(server_exit);
strcat(log_ent, type_to_string(local->type));
cl_log_set_entity(log_ent);
cl_log_enable_stderr(enable_stderr ? TRUE : FALSE);
cl_log_set_facility(HA_LOG_FACILITY);
cl_inherit_logging_environment(0);
log_info("BOOTH %s daemon is starting, node id is 0x%08X (%d).",
type_to_string(local->type),
local->site_id, local->site_id);
signal(SIGUSR1, (__sighandler_t)tickets_log_info);
signal(SIGTERM, (__sighandler_t)sig_exit_handler);
signal(SIGINT, (__sighandler_t)sig_exit_handler);
set_scheduler();
set_oom_adj(-16);
set_proc_title("%s %s for [%s]:%d",
DAEMON_NAME,
type_to_string(local->type),
local->addr_string,
booth_conf->port);
rv = limit_this_process();
if (rv)
return rv;
if (cl_enable_coredumps(TRUE) < 0){
cl_log(LOG_ERR, "enabling core dump failed");
}
cl_cdtocoredir();
prctl(PR_SET_DUMPABLE, (unsigned long)TRUE, 0UL, 0UL, 0UL);
rv = loop(lock_fd);
return rv;
}
static int do_client(void)
{
int rv = -1;
rv = setup_config(CLIENT);
if (rv < 0) {
log_error("cannot read config");
goto out;
}
switch (cl.op) {
case CMD_LIST:
rv = query_get_string_answer(CMD_LIST);
break;
case CMD_GRANT:
rv = do_grant();
break;
case CMD_REVOKE:
rv = do_revoke();
break;
}
out:
return rv;
}
int main(int argc, char *argv[], char *envp[])
{
int rv;
init_set_proc_title(argc, argv, envp);
- time(&start_time);
+ get_secs(&start_time);
memset(&cl, 0, sizeof(cl));
strncpy(cl.configfile,
BOOTH_DEFAULT_CONF, BOOTH_PATH_LEN - 1);
cl.lockfile[0] = 0;
debug_level = 0;
cl_log_set_entity("booth");
cl_log_enable_stderr(TRUE);
cl_log_set_facility(0);
rv = read_arguments(argc, argv);
if (rv < 0)
goto out;
switch (cl.type) {
case STATUS:
rv = do_status(cl.type);
break;
case ARBITRATOR:
case DAEMON:
case SITE:
rv = do_server(cl.type);
break;
case CLIENT:
rv = do_client();
break;
}
out:
/* Normalize values. 0x100 would be seen as "OK" by waitpid(). */
return (rv >= 0 && rv < 0x70) ? rv : 1;
}
diff --git a/src/pacemaker.c b/src/pacemaker.c
index 24a3700..25a7e56 100644
--- a/src/pacemaker.c
+++ b/src/pacemaker.c
@@ -1,340 +1,340 @@
/*
* Copyright (C) 2011 Jiaju Zhang <jjzhang@suse.de>
* Copyright (C) 2013-2014 Philipp Marek <philipp.marek@linbit.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <inttypes.h>
#include <sys/types.h>
#include <sys/wait.h>
#include "log.h"
#include "pacemaker.h"
#include "inline-fn.h"
enum atomic_ticket_supported {
YES=0,
NO,
FILENOTFOUND, /* Ie. UNKNOWN */
UNKNOWN = FILENOTFOUND,
};
/* http://thedailywtf.com/Articles/What_Is_Truth_0x3f_.aspx */
enum atomic_ticket_supported atomicity = UNKNOWN;
#define COMMAND_MAX 1024
/** Determines whether the installed crm_ticket can do atomic ticket grants,
* _including_ multiple attribute changes.
*
* See
* https://bugzilla.novell.com/show_bug.cgi?id=855099
*
* Run "crm_ticket" without "--force";
* - the old version asks for "Y/N" via STDIN, and returns 0
* when reading "no";
* - the new version just reports an error without asking.
*/
static void test_atomicity(void)
{
int rv;
if (atomicity != UNKNOWN)
return;
rv = system("echo n | crm_ticket -g -t any-ticket-name > /dev/null 2> /dev/null");
if (rv == -1) {
log_error("Cannot run \"crm_ticket\"!");
/* BIG problem. Abort. */
exit(1);
}
if (WIFSIGNALED(rv)) {
log_error("\"crm_ticket\" terminated by a signal!");
/* Problem. Abort. */
exit(1);
}
switch (WEXITSTATUS(rv)) {
case 0:
atomicity = NO;
log_info("Old \"crm_ticket\" found, using non-atomic ticket updates.");
break;
case 1:
atomicity = YES;
log_info("New \"crm_ticket\" found, using atomic ticket updates.");
break;
default:
log_error("Unexpected return value from \"crm_ticket\" (%d), "
"falling back to non-atomic ticket updates.",
rv);
atomicity = NO;
}
assert(atomicity == YES || atomicity == NO);
}
const char * interpret_rv(int rv)
{
static char text[64];
int p;
if (rv == 0)
return "0";
p = sprintf(text, "rv %d", WEXITSTATUS(rv));
if (WIFSIGNALED(rv))
sprintf(text + p, " signal %d", WTERMSIG(rv));
return text;
}
static int pcmk_write_ticket_atomic(struct ticket_config *tk, int grant)
{
char cmd[COMMAND_MAX];
int rv;
/* The values are appended to "-v", so that NO_ONE
* (which is -1) isn't seen as another option. */
snprintf(cmd, COMMAND_MAX,
"crm_ticket -t '%s' "
"%s --force "
"-S owner -v%" PRIi32 " "
"-S expires -v%" PRIi64 " "
"-S term -v%" PRIi64,
tk->name,
(grant > 0 ? "-g" :
grant < 0 ? "-r" :
""),
(int32_t)get_node_id(tk->leader),
- (int64_t)tk->term_expires,
+ (int64_t)wall_ts(tk->term_expires),
(int64_t)tk->current_term);
rv = system(cmd);
log_debug("command: '%s' was executed", cmd);
if (rv != 0)
log_error("error: \"%s\" failed, %s", cmd, interpret_rv(rv));
return rv;
}
static int pcmk_store_ticket_nonatomic(struct ticket_config *tk);
static int pcmk_grant_ticket(struct ticket_config *tk)
{
char cmd[COMMAND_MAX];
int rv;
test_atomicity();
if (atomicity == YES)
return pcmk_write_ticket_atomic(tk, +1);
rv = pcmk_store_ticket_nonatomic(tk);
if (rv)
return rv;
snprintf(cmd, COMMAND_MAX, "crm_ticket -t %s -g --force",
tk->name);
log_debug("command: '%s' was executed", cmd);
rv = system(cmd);
if (rv != 0)
log_error("error: \"%s\" failed, %s", cmd, interpret_rv(rv));
return rv;
}
static int pcmk_revoke_ticket(struct ticket_config *tk)
{
char cmd[COMMAND_MAX];
int rv;
test_atomicity();
if (atomicity == YES)
return pcmk_write_ticket_atomic(tk, -1);
rv = pcmk_store_ticket_nonatomic(tk);
if (rv)
return rv;
snprintf(cmd, COMMAND_MAX, "crm_ticket -t %s -r --force",
tk->name);
log_debug("command: '%s' was executed", cmd);
rv = system(cmd);
if (rv != 0)
log_error("error: \"%s\" failed, %s", cmd, interpret_rv(rv));
return rv;
}
static int crm_ticket_set(const struct ticket_config *tk, const char *attr, int64_t val)
{
char cmd[COMMAND_MAX];
int i, rv;
snprintf(cmd, COMMAND_MAX,
"crm_ticket -t '%s' -S '%s' -v %" PRIi64,
tk->name, attr, val);
/* If there are errors, there's not much we can do but retry ... */
for (i=0; i<3 &&
(rv = system(cmd));
i++) ;
log_debug("'%s' gave result %s", cmd, interpret_rv(rv));
return rv;
}
static int pcmk_store_ticket_nonatomic(struct ticket_config *tk)
{
int rv;
/* Always try to store *each* attribute, even if there's an error
* for one of them. */
rv = crm_ticket_set(tk, "owner", (int32_t)get_node_id(tk->leader));
- rv = crm_ticket_set(tk, "expires", tk->term_expires) || rv;
+ rv = crm_ticket_set(tk, "expires", wall_ts(tk->term_expires)) || rv;
rv = crm_ticket_set(tk, "term", tk->current_term) || rv;
if (rv)
log_error("setting crm_ticket attributes failed; %s",
interpret_rv(rv));
else
log_info("setting crm_ticket attributes successful");
return rv;
}
static int crm_ticket_get(struct ticket_config *tk,
const char *attr, int64_t *data)
{
char cmd[COMMAND_MAX];
char line[256];
int rv;
int64_t v;
FILE *p;
*data = -1;
v = 0;
snprintf(cmd, COMMAND_MAX,
"crm_ticket -t '%s' -G '%s' --quiet",
tk->name, attr);
p = popen(cmd, "r");
if (p == NULL) {
rv = errno;
log_error("popen error %d (%s) for \"%s\"",
rv, strerror(rv), cmd);
return rv || -EINVAL;
}
if (fgets(line, sizeof(line) - 1, p) == NULL) {
rv = ENODATA;
goto out;
}
rv = EINVAL;
if (!strncmp(line, "false", 5)) {
v = 0;
rv = 0;
} else if (!strncmp(line, "true", 4)) {
v = 1;
rv = 0;
} else if (sscanf(line, "%" PRIi64, &v) == 1) {
rv = 0;
}
*data = v;
out:
rv = pclose(p);
log_debug("command \"%s\" returned %s, value %" PRIi64, cmd, interpret_rv(rv), v);
return rv;
}
static int pcmk_load_ticket(struct ticket_config *tk)
{
int rv;
int64_t v;
/* This here gets run during startup; testing that here means that
* normal operation won't be interrupted with that test. */
test_atomicity();
rv = crm_ticket_get(tk, "expires", &v);
if (!rv) {
- tk->term_expires = v;
+ tk->term_expires = unwall_ts(v);
}
rv = crm_ticket_get(tk, "term", &v);
if (!rv) {
tk->current_term = v;
}
rv = crm_ticket_get(tk, "granted", &v);
if (!rv) {
tk->is_granted = v;
}
rv = crm_ticket_get(tk, "owner", &v);
if (!rv) {
/* No check, node could have been deconfigured. */
if (!find_site_by_id(v, &tk->leader)) {
/* Hmm, no site found for the ticket we have in the
* CIB!?
* Assume that the ticket belonged to us if it was
* granted here!
*/
tk_log_warn("no site matches; site got reconfigured?");
if (tk->is_granted) {
tk_log_warn("granted here, assume it belonged to us");
tk->leader = local;
}
}
}
return rv;
}
struct ticket_handler pcmk_handler = {
.grant_ticket = pcmk_grant_ticket,
.revoke_ticket = pcmk_revoke_ticket,
.load_ticket = pcmk_load_ticket,
};
diff --git a/src/raft.c b/src/raft.c
index edbae04..b31be63 100644
--- a/src/raft.c
+++ b/src/raft.c
@@ -1,930 +1,931 @@
/*
* Copyright (C) 2014 Philipp Marek <philipp.marek@linbit.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <stdlib.h>
#include <inttypes.h>
#include <string.h>
#include <errno.h>
#include <arpa/inet.h>
#include "booth.h"
+#include "timer.h"
#include "transport.h"
#include "inline-fn.h"
#include "config.h"
#include "raft.h"
#include "ticket.h"
#include "log.h"
inline static void clear_election(struct ticket_config *tk)
{
int i;
struct booth_site *site;
tk_log_debug("clear election");
tk->votes_received = 0;
foreach_node(i, site)
tk->votes_for[site->index] = NULL;
}
inline static void record_vote(struct ticket_config *tk,
struct booth_site *who,
struct booth_site *vote)
{
tk_log_debug("site %s votes for %s",
site_string(who),
site_string(vote));
if (!tk->votes_for[who->index]) {
tk->votes_for[who->index] = vote;
tk->votes_received |= who->bitmask;
} else {
if (tk->votes_for[who->index] != vote)
tk_log_warn("%s voted previously "
"for %s and now wants to vote for %s (ignored)",
site_string(who),
site_string(tk->votes_for[who->index]),
site_string(vote));
}
}
static int cmp_msg_ticket(struct ticket_config *tk,
struct booth_site *sender,
struct booth_site *leader,
struct boothc_ticket_msg *msg)
{
if (my_last_term(tk) != ntohl(msg->ticket.term)) {
return my_last_term(tk) - ntohl(msg->ticket.term);
}
return 0;
}
static void update_term_from_msg(struct ticket_config *tk,
struct boothc_ticket_msg *msg)
{
uint32_t i;
i = ntohl(msg->ticket.term);
/* if we failed to start the election, then accept the term
* from the leader
* */
if (tk->state == ST_CANDIDATE) {
tk->current_term = i;
} else {
tk->current_term = max(i, tk->current_term);
}
}
static void update_ticket_from_msg(struct ticket_config *tk,
struct booth_site *sender,
struct boothc_ticket_msg *msg)
{
int duration;
tk_log_debug("updating from %s (%d/%d)",
site_string(sender),
ntohl(msg->ticket.term), ntohl(msg->ticket.term_valid_for));
duration = min(tk->term_duration, ntohl(msg->ticket.term_valid_for));
- tk->term_expires = time(NULL) + duration;
+ tk->term_expires = get_secs(NULL) + duration;
update_term_from_msg(tk, msg);
}
static void copy_ticket_from_msg(struct ticket_config *tk,
struct boothc_ticket_msg *msg)
{
- tk->term_expires = time(NULL) + ntohl(msg->ticket.term_valid_for);
+ tk->term_expires = get_secs(NULL) + ntohl(msg->ticket.term_valid_for);
tk->current_term = ntohl(msg->ticket.term);
}
static void become_follower(struct ticket_config *tk,
struct boothc_ticket_msg *msg)
{
copy_ticket_from_msg(tk, msg);
tk->state = ST_FOLLOWER;
tk->delay_commit = 0;
tk->in_election = 0;
/* if we're following and the ticket was granted here
* then commit to CIB right away (we're probably restarting)
*/
if (tk->is_granted) {
disown_ticket(tk);
ticket_write(tk);
}
}
static void won_elections(struct ticket_config *tk)
{
tk->leader = local;
tk->state = ST_LEADER;
- tk->term_expires = time(NULL) + tk->term_duration;
+ tk->term_expires = get_secs(NULL) + tk->term_duration;
tk->election_end = 0;
tk->voted_for = NULL;
ticket_broadcast(tk, OP_HEARTBEAT, OP_ACK, RLT_SUCCESS, 0);
ticket_activate_timeout(tk);
}
static int is_tie(struct ticket_config *tk)
{
int i;
struct booth_site *v;
int count[MAX_NODES] = { 0, };
int max_votes = 0, max_cnt = 0;
for(i=0; i<booth_conf->site_count; i++) {
v = tk->votes_for[i];
if (!v)
continue;
count[v->index]++;
max_votes = max(max_votes, count[v->index]);
}
for(i=0; i<booth_conf->site_count; i++) {
if (count[i] == max_votes)
max_cnt++;
}
return max_cnt > 1;
}
static struct booth_site *majority_votes(struct ticket_config *tk)
{
int i, n;
struct booth_site *v;
int count[MAX_NODES] = { 0, };
for(i=0; i<booth_conf->site_count; i++) {
v = tk->votes_for[i];
if (!v)
continue;
n = v->index;
count[n]++;
tk_log_debug("Majority: %d %s wants %d %s => %d",
i, site_string(&booth_conf->site[i]),
n, site_string(v),
count[n]);
if (count[n]*2 <= booth_conf->site_count)
continue;
tk_log_debug("Majority reached: %d of %d for %s",
count[n], booth_conf->site_count,
site_string(v));
return v;
}
return NULL;
}
void elections_end(struct ticket_config *tk)
{
time_t now;
struct booth_site *new_leader;
- now = time(NULL);
+ now = get_secs(NULL);
if (now > tk->election_end) {
/* This is previous election timed out */
tk_log_info("elections finished");
}
tk->in_election = 0;
new_leader = majority_votes(tk);
if (new_leader == local) {
tk_log_info("granted successfully here");
won_elections(tk);
} else if (new_leader) {
tk_log_info("ticket granted at %s",
site_string(new_leader));
} else {
tk_log_info("nobody won elections, new elections");
if (!new_election(tk, NULL, is_tie(tk), OR_AGAIN)) {
ticket_activate_timeout(tk);
}
}
}
static int newer_term(struct ticket_config *tk,
struct booth_site *sender,
struct booth_site *leader,
struct boothc_ticket_msg *msg,
int in_election)
{
uint32_t term;
/* it may happen that we hear about our newer term */
if (leader == local)
return 0;
term = ntohl(msg->ticket.term);
/* §5.1 */
if (term > tk->current_term) {
tk->state = ST_FOLLOWER;
if (!in_election) {
tk->leader = leader;
tk_log_info("from %s: higher term %d vs. %d, following %s",
site_string(sender),
term, tk->current_term,
ticket_leader_string(tk));
} else {
tk_log_debug("from %s: higher term %d vs. %d (election)",
site_string(sender),
term, tk->current_term);
}
tk->current_term = term;
return 1;
}
return 0;
}
static int term_too_low(struct ticket_config *tk,
struct booth_site *sender,
struct booth_site *leader,
struct boothc_ticket_msg *msg)
{
uint32_t term;
term = ntohl(msg->ticket.term);
/* §5.1 */
if (term < tk->current_term) {
tk_log_info("sending reject to %s, its term too low "
"(%d vs. %d)", site_string(sender),
term, tk->current_term
);
send_reject(sender, tk, RLT_TERM_OUTDATED, msg);
return 1;
}
return 0;
}
/* For follower. */
static int answer_HEARTBEAT (
struct ticket_config *tk,
struct booth_site *sender,
struct booth_site *leader,
struct boothc_ticket_msg *msg
)
{
uint32_t term;
term = ntohl(msg->ticket.term);
tk_log_debug("heartbeat from leader: %s, have %s; term %d vs %d",
site_string(leader), ticket_leader_string(tk),
term, tk->current_term);
if (term < tk->current_term) {
if (sender == tk->leader) {
tk_log_info("trusting leader %s with a lower term (%d vs %d)",
site_string(leader), term, tk->current_term);
} else if (is_owned(tk)) {
tk_log_warn("different leader %s with a lower term "
"(%d vs %d), sending reject",
site_string(leader), term, tk->current_term);
return send_reject(sender, tk, RLT_TERM_OUTDATED, msg);
}
}
/* got heartbeat, no rejects expected anymore */
tk->expect_more_rejects = 0;
/* Needed? */
newer_term(tk, sender, leader, msg, 0);
become_follower(tk, msg);
/* Racy??? */
assert(sender == leader || !leader);
tk->leader = leader;
/* Ack the heartbeat (we comply). */
return send_msg(OP_ACK, tk, sender, msg);
}
static int process_UPDATE (
struct ticket_config *tk,
struct booth_site *sender,
struct booth_site *leader,
struct boothc_ticket_msg *msg
)
{
if (is_owned(tk) && sender != tk->leader) {
tk_log_warn("different leader %s wants to update "
"our ticket, sending reject",
site_string(leader));
return send_reject(sender, tk, RLT_TERM_OUTDATED, msg);
}
tk_log_debug("leader %s wants to update our ticket",
site_string(leader));
tk->leader = leader;
copy_ticket_from_msg(tk, msg);
ticket_write(tk);
/* run ticket_cron if the ticket expires */
set_ticket_wakeup(tk);
return send_msg(OP_ACK, tk, sender, msg);
}
static int process_REVOKE (
struct ticket_config *tk,
struct booth_site *sender,
struct booth_site *leader,
struct boothc_ticket_msg *msg
)
{
int rv;
if (tk->state == ST_INIT && tk->leader == no_leader) {
/* assume that our ack got lost */
rv = send_msg(OP_ACK, tk, sender, msg);
} else if (tk->leader != sender) {
tk_log_error("%s wants to revoke ticket, "
"but it is not granted there (ignoring)",
site_string(sender));
return 1;
} else if (tk->state != ST_FOLLOWER) {
tk_log_error("unexpected ticket revoke from %s "
"(in state %s) (ignoring)",
site_string(sender),
state_to_string(tk->state));
return 1;
} else {
tk_log_info("%s revokes ticket",
site_string(tk->leader));
reset_ticket(tk);
tk->leader = no_leader;
ticket_write(tk);
rv = send_msg(OP_ACK, tk, sender, msg);
}
return rv;
}
/* For leader. */
static int process_ACK(
struct ticket_config *tk,
struct booth_site *sender,
struct booth_site *leader,
struct boothc_ticket_msg *msg
)
{
uint32_t term;
term = ntohl(msg->ticket.term);
if (newer_term(tk, sender, leader, msg, 0)) {
/* unexpected higher term */
tk_log_warn("got higher term from %s (%d vs. %d)",
site_string(sender),
term, tk->current_term);
return 0;
}
/* Don't send a reject. */
if (term < tk->current_term) {
/* Doesn't know what he's talking about - perhaps
* doesn't receive our packets? */
tk_log_warn("unexpected term "
"from %s (%d vs. %d) (ignoring)",
site_string(sender),
term, tk->current_term);
return 0;
}
/* if the ticket is to be revoked, further processing is not
* interesting (and dangerous) */
if (tk->next_state == ST_INIT || tk->state == ST_INIT)
return 0;
/* for heartbeats we make do with the majority */
if (tk->last_request == OP_HEARTBEAT &&
term == tk->current_term &&
leader == tk->leader) {
if (majority_of_bits(tk, tk->acks_received)) {
/* OK, at least half of the nodes are reachable;
* Update the ticket and send update messages out
*/
return leader_update_ticket(tk);
}
}
return 0;
}
static int process_VOTE_FOR(
struct ticket_config *tk,
struct booth_site *sender,
struct booth_site *leader,
struct boothc_ticket_msg *msg
)
{
/* leader wants to step down? */
if (leader == no_leader && sender == tk->leader &&
(tk->state == ST_FOLLOWER || tk->state == ST_CANDIDATE)) {
tk_log_info("%s wants to give the ticket away",
site_string(tk->leader));
reset_ticket(tk);
tk->state = ST_FOLLOWER;
if (local->type == SITE) {
ticket_write(tk);
schedule_election(tk, OR_STEPDOWN);
}
return 0;
}
if (tk->state != ST_CANDIDATE) {
/* lost candidate status, somebody rejected our proposal */
tk_log_debug("candidate status lost, ignoring vote_for from %s",
site_string(sender));
return 0;
}
if (term_too_low(tk, sender, leader, msg))
return 0;
if (newer_term(tk, sender, leader, msg, 0)) {
clear_election(tk);
}
record_vote(tk, sender, leader);
/* only if all voted can we take the ticket now, otherwise
* wait for timeout in ticket_cron */
if (!tk->acks_expected) {
/* §5.2 */
elections_end(tk);
}
return 0;
}
static int process_REJECTED(
struct ticket_config *tk,
struct booth_site *sender,
struct booth_site *leader,
struct boothc_ticket_msg *msg
)
{
uint32_t rv;
rv = ntohl(msg->header.result);
if (tk->state == ST_CANDIDATE &&
leader == local) {
/* the sender has us as the leader (!)
* the elections will time out, then we can try again
*/
tk_log_warn("ticket was granted to us "
"(and we didn't know)");
tk->expect_more_rejects = 1;
return 0;
}
if (tk->state == ST_CANDIDATE &&
rv == RLT_TERM_OUTDATED) {
tk_log_warn("ticket outdated (term %d), granted to %s",
ntohl(msg->ticket.term),
site_string(leader)
);
tk->leader = leader;
tk->expect_more_rejects = 1;
become_follower(tk, msg);
return 0;
}
if (tk->state == ST_CANDIDATE &&
rv == RLT_TERM_STILL_VALID) {
if (tk->lost_leader == leader) {
if (tk->election_reason == OR_TKT_LOST) {
tk_log_warn("%s still has the ticket valid, "
"we'll backup a bit",
site_string(sender));
} else {
tk_log_warn("%s unexpectedly rejects elections",
site_string(sender));
}
} else {
tk_log_warn("ticket was granted to %s "
"(and we didn't know)",
site_string(leader));
}
tk->leader = leader;
become_follower(tk, msg);
tk->expect_more_rejects = 1;
return 0;
}
if (tk->state == ST_CANDIDATE &&
rv == RLT_YOU_OUTDATED) {
tk->leader = leader;
tk->expect_more_rejects = 1;
if (leader && leader != no_leader) {
tk_log_warn("our ticket is outdated, granted to %s",
site_string(leader));
become_follower(tk, msg);
} else {
tk_log_warn("our ticket is outdated and revoked");
update_ticket_from_msg(tk, sender, msg);
tk->state = ST_INIT;
}
return 0;
}
if (!tk->expect_more_rejects) {
tk_log_warn("from %s: in state %s, got %s (unexpected reject)",
site_string(sender),
state_to_string(tk->state),
state_to_string(rv));
}
return 0;
}
static int ticket_seems_ok(struct ticket_config *tk)
{
int time_left;
time_left = term_time_left(tk);
if (!time_left)
return 0; /* quite sure */
if (tk->state == ST_CANDIDATE)
return 0; /* in state of flux */
if (tk->state == ST_LEADER)
return 1; /* quite sure */
if (tk->state == ST_FOLLOWER &&
time_left >= tk->term_duration/3)
return 1; /* almost quite sure */
return 0;
}
static int test_reason(
struct ticket_config *tk,
struct booth_site *sender,
struct booth_site *leader,
struct boothc_ticket_msg *msg
)
{
int reason;
reason = ntohl(msg->header.reason);
if (reason == OR_TKT_LOST) {
if (tk->state == ST_INIT &&
tk->leader == no_leader) {
tk_log_warn("%s claims that the ticket is lost, "
"but it's in %s state (reject sent)",
site_string(sender),
state_to_string(tk->state)
);
return RLT_YOU_OUTDATED;
}
if (ticket_seems_ok(tk)) {
tk_log_warn("%s claims that the ticket is lost, "
"but it is ok here (reject sent)",
site_string(sender));
return RLT_TERM_STILL_VALID;
}
}
return 0;
}
/* §5.2 */
static int answer_REQ_VOTE(
struct ticket_config *tk,
struct booth_site *sender,
struct booth_site *leader,
struct boothc_ticket_msg *msg
)
{
int valid;
struct boothc_ticket_msg omsg;
cmd_result_t inappr_reason;
inappr_reason = test_reason(tk, sender, leader, msg);
if (inappr_reason)
return send_reject(sender, tk, inappr_reason, msg);
valid = term_time_left(tk);
/* allow the leader to start new elections on valid tickets */
if (sender != tk->leader && valid) {
tk_log_warn("election from %s rejected "
"(we have %s as ticket owner), ticket still valid for %ds",
site_string(sender), site_string(tk->leader), valid);
return send_reject(sender, tk, RLT_TERM_STILL_VALID, msg);
}
if (term_too_low(tk, sender, leader, msg))
return 0;
/* set this, so that we know not to send status for the
* ticket */
tk->in_election = 1;
/* if it's a newer term or ... */
if (newer_term(tk, sender, leader, msg, 1)) {
clear_election(tk);
goto vote_for_sender;
}
/* ... we didn't vote yet, then vote for the sender */
/* §5.2, §5.4 */
if (!tk->voted_for) {
vote_for_sender:
tk->voted_for = sender;
record_vote(tk, sender, leader);
}
init_ticket_msg(&omsg, OP_VOTE_FOR, OP_REQ_VOTE, RLT_SUCCESS, 0, tk);
omsg.ticket.leader = htonl(get_node_id(tk->voted_for));
return booth_udp_send(sender, &omsg, sizeof(omsg));
}
int new_election(struct ticket_config *tk,
struct booth_site *preference, int update_term, cmd_reason_t reason)
{
struct booth_site *new_leader;
time_t now;
if (local->type != SITE)
return 0;
- time(&now);
+ get_secs(&now);
tk_log_debug("start new election?, now=%" PRIi64 ", end %" PRIi64,
- (int64_t)now, (int64_t)(tk->election_end));
+ (int64_t)wall_ts(now), (int64_t)(wall_ts(tk->election_end)));
if (now < tk->election_end)
return 1;
/* §5.2 */
/* If there was _no_ answer, don't keep incrementing the term number
* indefinitely. If there was no peer, there'll probably be no one
* listening now either. However, we don't know if we were
* invoked due to a timeout (caller does).
*/
if (update_term) {
/* save the previous term, we may need to send out the
* MY_INDEX message */
if (tk->state != ST_CANDIDATE) {
memcpy(tk->last_valid_tk, tk, sizeof(struct ticket_config));
}
tk->current_term++;
}
tk->term_expires = 0;
tk->election_end = now + tk->timeout;
tk->in_election = 1;
tk_log_info("starting new election (term=%d)",
tk->current_term);
clear_election(tk);
if(preference)
new_leader = preference;
else
new_leader = (local->type == SITE) ? local : NULL;
record_vote(tk, local, new_leader);
tk->voted_for = new_leader;
tk->leader = no_leader;
tk->state = ST_CANDIDATE;
/* some callers may want just to repeat on timeout */
if (reason == OR_AGAIN) {
reason = tk->election_reason;
} else {
tk->election_reason = reason;
}
ticket_broadcast(tk, OP_REQ_VOTE, OP_VOTE_FOR, RLT_SUCCESS, reason);
ticket_activate_timeout(tk);
add_random_delay(tk);
return 0;
}
/* we were a leader and somebody says that they have a more up
* to date ticket
* there was probably connectivity loss
* tricky
*/
static int leader_handle_newer_ticket(
struct ticket_config *tk,
struct booth_site *sender,
struct booth_site *leader,
struct boothc_ticket_msg *msg
)
{
update_term_from_msg(tk, msg);
if (leader != no_leader && leader && leader != local) {
/* eek, two leaders, split brain */
/* normally shouldn't happen; run election */
tk_log_error("from %s: ticket granted to %s! (revoking locally)",
site_string(sender),
site_string(leader)
);
} else if (term_time_left(tk)) {
/* eek, two leaders, split brain */
/* normally shouldn't happen; run election */
tk_log_error("from %s: ticket granted to %s! (revoking locally)",
site_string(sender),
site_string(leader)
);
}
tk->next_state = ST_LEADER;
return 0;
}
/* reply to STATUS */
static int process_MY_INDEX (
struct ticket_config *tk,
struct booth_site *sender,
struct booth_site *leader,
struct boothc_ticket_msg *msg
)
{
int i;
int expired;
expired = !msg->ticket.term_valid_for;
i = cmp_msg_ticket(tk, sender, leader, msg);
if (i > 0) {
/* let them know about our newer ticket */
/* but if we're voting in elections, our ticket is not
* valid yet, don't send it */
if (!tk->in_election)
send_msg(OP_MY_INDEX, tk, sender, msg);
if (tk->state == ST_LEADER) {
tk_log_info("sending ticket update to %s",
site_string(sender));
return send_msg(OP_UPDATE, tk, sender, msg);
}
}
/* we have a newer or equal ticket and theirs is expired,
* nothing more to do here */
if (i >= 0 && expired) {
return 0;
}
if (tk->state == ST_LEADER) {
/* we're the leader, thread carefully */
if (expired) {
/* if their ticket is expired,
* nothing more to do */
return 0;
}
if (i < 0) {
/* they have a newer ticket, trouble if we're already leader
* for it */
tk_log_warn("from %s: more up to date ticket at %s",
site_string(sender),
site_string(leader)
);
return leader_handle_newer_ticket(tk, sender, leader, msg);
} else {
/* we have the ticket and we don't care */
return 0;
}
}
/* their ticket is either newer or not expired, don't
* ignore it */
update_ticket_from_msg(tk, sender, msg);
tk->leader = leader;
update_ticket_state(tk, sender);
set_ticket_wakeup(tk);
return 0;
}
int raft_answer(
struct ticket_config *tk,
struct booth_site *sender,
struct booth_site *leader,
struct boothc_ticket_msg *msg
)
{
int cmd, req;
int rv;
rv = 0;
cmd = ntohl(msg->header.cmd);
req = ntohl(msg->header.request);
if (req)
tk_log_debug("got %s (req %s) from %s",
state_to_string(cmd),
state_to_string(req),
site_string(sender));
else
tk_log_debug("got %s from %s",
state_to_string(cmd),
site_string(sender));
switch (cmd) {
case OP_REQ_VOTE:
rv = answer_REQ_VOTE(tk, sender, leader, msg);
break;
case OP_VOTE_FOR:
rv = process_VOTE_FOR(tk, sender, leader, msg);
break;
case OP_ACK:
if (tk->leader == local &&
tk->state == ST_LEADER)
rv = process_ACK(tk, sender, leader, msg);
break;
case OP_HEARTBEAT:
if ((tk->leader != local || !term_time_left(tk)) &&
(tk->state == ST_INIT || tk->state == ST_FOLLOWER ||
tk->state == ST_CANDIDATE))
rv = answer_HEARTBEAT(tk, sender, leader, msg);
else {
tk_log_warn("unexpected message %s, from %s",
state_to_string(cmd),
site_string(sender));
if (ticket_seems_ok(tk))
send_reject(sender, tk, RLT_TERM_STILL_VALID, msg);
rv = -EINVAL;
}
break;
case OP_UPDATE:
if (((tk->leader != local && tk->leader == leader) || !is_owned(tk)) &&
(tk->state == ST_INIT || tk->state == ST_FOLLOWER ||
tk->state == ST_CANDIDATE)) {
rv = process_UPDATE(tk, sender, leader, msg);
} else {
tk_log_warn("unexpected message %s, from %s",
state_to_string(cmd),
site_string(sender));
if (ticket_seems_ok(tk))
send_reject(sender, tk, RLT_TERM_STILL_VALID, msg);
rv = -EINVAL;
}
break;
case OP_REJECTED:
rv = process_REJECTED(tk, sender, leader, msg);
break;
case OP_REVOKE:
rv = process_REVOKE(tk, sender, leader, msg);
break;
case OP_MY_INDEX:
rv = process_MY_INDEX(tk, sender, leader, msg);
break;
case OP_STATUS:
if (!tk->in_election)
rv = send_msg(OP_MY_INDEX, tk, sender, msg);
break;
default:
tk_log_error("unknown message %s, from %s",
state_to_string(cmd), site_string(sender));
rv = -EINVAL;
}
return rv;
}
diff --git a/src/ticket.c b/src/ticket.c
index 7702f46..72b5e09 100644
--- a/src/ticket.c
+++ b/src/ticket.c
@@ -1,1079 +1,1076 @@
/*
* Copyright (C) 2011 Jiaju Zhang <jjzhang@suse.de>
* Copyright (C) 2013-2014 Philipp Marek <philipp.marek@linbit.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <arpa/inet.h>
#include <inttypes.h>
#include <stdio.h>
#include <assert.h>
#include <time.h>
#include <clplumbing/cl_random.h>
#include "ticket.h"
#include "config.h"
#include "pacemaker.h"
#include "inline-fn.h"
#include "log.h"
#include "booth.h"
#include "raft.h"
#include "handler.h"
#define TK_LINE 256
/* Untrusted input, must fit (incl. \0) in a buffer of max chars. */
int check_max_len_valid(const char *s, int max)
{
int i;
for(i=0; i<max; i++)
if (s[i] == 0)
return 1;
return 0;
}
int find_ticket_by_name(const char *ticket, struct ticket_config **found)
{
int i;
if (found)
*found = NULL;
for (i = 0; i < booth_conf->ticket_count; i++) {
if (!strcmp(booth_conf->ticket[i].name, ticket)) {
if (found)
*found = booth_conf->ticket + i;
return 1;
}
}
return 0;
}
int check_ticket(char *ticket, struct ticket_config **found)
{
if (found)
*found = NULL;
if (!booth_conf)
return 0;
if (!check_max_len_valid(ticket, sizeof(booth_conf->ticket[0].name)))
return 0;
return find_ticket_by_name(ticket, found);
}
int check_site(char *site, int *is_local)
{
struct booth_site *node;
if (!check_max_len_valid(site, sizeof(node->addr_string)))
return 0;
if (find_site_by_name(site, &node, 0)) {
*is_local = node->local;
return 1;
}
return 0;
}
int ticket_write(struct ticket_config *tk)
{
if (local->type != SITE)
return -EINVAL;
if (tk->leader == local) {
pcmk_handler.grant_ticket(tk);
} else {
pcmk_handler.revoke_ticket(tk);
}
tk->update_cib = 0;
return 0;
}
/* Ask an external program whether getting the ticket
* makes sense.
* Eg. if the services have a failcount of INFINITY,
* we can't serve here anyway. */
int test_external_prog(struct ticket_config *tk,
int start_election)
{
int rv;
rv = run_handler(tk, tk->ext_verifier, 1);
if (rv) {
tk_log_warn("we are not allowed to acquire ticket");
/* Give it to somebody else.
* Just send a VOTE_FOR message, so the
* others can start elections. */
if (leader_and_valid(tk)) {
reset_ticket(tk);
ticket_write(tk);
if (start_election) {
ticket_broadcast(tk, OP_VOTE_FOR, OP_REQ_VOTE, RLT_SUCCESS, OR_LOCAL_FAIL);
}
}
}
return rv;
}
/* Try to acquire a ticket
* Could be manual grant or after ticket loss
*/
int acquire_ticket(struct ticket_config *tk, cmd_reason_t reason)
{
if (test_external_prog(tk, 0))
return RLT_EXT_FAILED;
return new_election(tk, local, 1, reason);
}
/** Try to get the ticket for the local site.
* */
int do_grant_ticket(struct ticket_config *tk, int options)
{
int rv;
tk_log_info("granting ticket");
if (tk->leader == local)
return RLT_SUCCESS;
if (is_owned(tk))
return RLT_OVERGRANT;
- tk->delay_commit = time(NULL) +
+ tk->delay_commit = get_secs(NULL) +
tk->term_duration + tk->acquire_after;
if (options & OPT_IMMEDIATE) {
tk_log_warn("granting ticket immediately! If there are "
"unreachable sites, _hope_ you are sure that they don't "
"have the ticket!");
tk->delay_commit = 0;
}
rv = acquire_ticket(tk, OR_ADMIN);
if (rv)
tk->delay_commit = 0;
return rv;
}
static int start_revoke_ticket(struct ticket_config *tk)
{
tk_log_info("revoking ticket");
reset_ticket(tk);
tk->leader = no_leader;
ticket_write(tk);
ticket_activate_timeout(tk);
return ticket_broadcast(tk, OP_REVOKE, OP_ACK, RLT_SUCCESS, OR_ADMIN);
}
/** Ticket revoke.
* Only to be started from the leader. */
int do_revoke_ticket(struct ticket_config *tk)
{
if (tk->acks_expected) {
tk_log_info("delay ticket revoke until the current operation finishes");
tk->next_state = ST_INIT;
return 0;
} else {
return start_revoke_ticket(tk);
}
}
int list_ticket(char **pdata, unsigned int *len)
{
struct ticket_config *tk;
char timeout_str[64];
char pending_str[64];
char *data, *cp;
int i, alloc;
+ time_t ts;
*pdata = NULL;
*len = 0;
alloc = 256 +
booth_conf->ticket_count * (BOOTH_NAME_LEN * 2 + 128);
data = malloc(alloc);
if (!data)
return -ENOMEM;
cp = data;
foreach_ticket(i, tk) {
- if (tk->term_expires != 0)
+ if (tk->term_expires != 0) {
+ ts = wall_ts(tk->term_expires);
strftime(timeout_str, sizeof(timeout_str), "%F %T",
- localtime(&tk->term_expires));
- else
+ localtime(&ts));
+ } else
strcpy(timeout_str, "N/A");
- if (tk->leader == local && tk->delay_commit > time(NULL)) {
+ if (tk->leader == local && tk->delay_commit > get_secs(NULL)) {
+ ts = wall_ts(tk->delay_commit);
strcpy(pending_str, " (commit pending until ");
strftime(pending_str + strlen(" (commit pending until "),
sizeof(pending_str) - strlen(" (commit pending until ") - 1,
- "%F %T", localtime(&tk->delay_commit));
+ "%F %T", localtime(&ts));
strcat(pending_str, ")");
} else
*pending_str = '\0';
cp += snprintf(cp,
alloc - (cp - data),
"ticket: %s, leader: %s",
tk->name,
ticket_leader_string(tk));
if (is_owned(tk)) {
cp += snprintf(cp,
alloc - (cp - data),
", expires: %s%s\n",
timeout_str,
pending_str);
} else {
cp += snprintf(cp, alloc - (cp - data), "\n");
}
if (alloc - (cp - data) <= 0)
return -ENOMEM;
}
*pdata = data;
*len = cp - data;
return 0;
}
void reset_ticket(struct ticket_config *tk)
{
disown_ticket(tk);
tk->state = ST_INIT;
tk->voted_for = NULL;
}
static void reacquire_ticket(struct ticket_config *tk)
{
int valid;
const char *where_granted = "\0";
char buff[64];
- valid = (tk->term_expires >= time(NULL));
+ valid = (tk->term_expires >= get_secs(NULL));
if (tk->leader == local) {
where_granted = "granted here";
} else {
snprintf(buff, sizeof(buff), "granted to %s",
site_string(tk->leader));
where_granted = buff;
}
if (!valid) {
tk_log_warn("%s, but not valid "
"anymore (will try to reacquire)", where_granted);
}
if (tk->is_granted && tk->leader != local) {
if (tk->leader && tk->leader != no_leader) {
tk_log_error("granted here, but also %s, "
"that's really too bad (will try to reacquire)",
where_granted);
} else {
tk_log_warn("granted here, but we're "
"not recorded as the grantee (will try to reacquire)");
}
}
/* try to acquire the
* ticket through new elections
*/
acquire_ticket(tk, OR_REACQUIRE);
}
void update_ticket_state(struct ticket_config *tk, struct booth_site *sender)
{
if (tk->state == ST_CANDIDATE) {
tk_log_info("learned from %s about "
"newer ticket, stopping elections",
site_string(sender));
/* there could be rejects coming from others; don't log
* warnings unnecessarily */
tk->expect_more_rejects = 1;
}
if (tk->leader == local || tk->is_granted) {
/* message from a live leader with valid ticket? */
if (sender == tk->leader && term_time_left(tk)) {
if (tk->is_granted) {
tk_log_warn("ticket was granted here, "
"but it's live at %s (revoking here)",
site_string(sender));
} else {
tk_log_info("ticket live at %s",
site_string(sender));
}
disown_ticket(tk);
ticket_write(tk);
tk->state = ST_FOLLOWER;
tk->next_state = ST_FOLLOWER;
} else {
if (tk->state == ST_CANDIDATE) {
tk->state = ST_FOLLOWER;
}
tk->next_state = ST_LEADER;
}
} else {
if (!tk->leader || tk->leader == no_leader) {
if (sender)
tk_log_info("ticket is not granted");
else
tk_log_info("ticket is not granted (from CIB)");
tk->state = ST_INIT;
} else {
if (sender)
tk_log_info("ticket granted to %s (says %s)",
site_string(tk->leader),
site_string(sender));
else
tk_log_info("ticket granted to %s (from CIB)",
site_string(tk->leader));
tk->state = ST_FOLLOWER;
/* just make sure that we check the ticket soon */
tk->next_state = ST_FOLLOWER;
}
}
}
int setup_ticket(void)
{
struct ticket_config *tk;
int i;
foreach_ticket(i, tk) {
reset_ticket(tk);
if (local->type == SITE) {
if (!pcmk_handler.load_ticket(tk)) {
update_ticket_state(tk, NULL);
}
tk->update_cib = 1;
}
tk_log_info("broadcasting state query");
/* wait until all send their status (or the first
* timeout) */
tk->start_postpone = 1;
ticket_broadcast(tk, OP_STATUS, OP_MY_INDEX, RLT_SUCCESS, 0);
}
return 0;
}
int ticket_answer_list(int fd, struct boothc_ticket_msg *msg)
{
char *data;
int olen, rv;
struct boothc_header hdr;
rv = list_ticket(&data, &olen);
if (rv < 0)
return rv;
init_header(&hdr, CMR_LIST, 0, 0, RLT_SUCCESS, 0, sizeof(hdr) + olen);
return send_header_plus(fd, &hdr, data, olen);
}
int ticket_answer_grant(int fd, struct boothc_ticket_msg *msg)
{
int rv;
struct ticket_config *tk;
if (!check_ticket(msg->ticket.id, &tk)) {
log_warn("client asked to grant unknown ticket %s",
msg->ticket.id);
rv = RLT_INVALID_ARG;
goto reply;
}
if (is_owned(tk)) {
log_warn("client wants to grant an (already granted!) ticket %s",
msg->ticket.id);
rv = RLT_OVERGRANT;
goto reply;
}
rv = do_grant_ticket(tk, ntohl(msg->header.options));
reply:
init_header(&msg->header, CMR_GRANT, 0, 0, rv ?: RLT_ASYNC, 0, sizeof(*msg));
return send_ticket_msg(fd, msg);
}
int ticket_answer_revoke(int fd, struct boothc_ticket_msg *msg)
{
int rv;
struct ticket_config *tk;
if (!check_ticket(msg->ticket.id, &tk)) {
log_warn("client wants to revoke an unknown ticket %s",
msg->ticket.id);
rv = RLT_INVALID_ARG;
goto reply;
}
if (!is_owned(tk)) {
log_info("client wants to revoke a free ticket %s",
msg->ticket.id);
rv = RLT_TICKET_IDLE;
goto reply;
}
if (tk->leader != local) {
log_info("the ticket %s is not granted here, "
"redirect to %s",
msg->ticket.id, ticket_leader_string(tk));
rv = RLT_REDIRECT;
goto reply;
}
rv = do_revoke_ticket(tk);
if (rv == 0)
rv = RLT_ASYNC;
reply:
init_ticket_msg(msg, CMR_REVOKE, 0, rv, 0, tk);
return send_ticket_msg(fd, msg);
}
int ticket_broadcast(struct ticket_config *tk,
cmd_request_t cmd, cmd_request_t expected_reply,
cmd_result_t res, cmd_reason_t reason)
{
struct boothc_ticket_msg msg;
init_ticket_msg(&msg, cmd, 0, res, reason, tk);
tk_log_debug("broadcasting '%s' (term=%d, valid=%d)",
state_to_string(cmd),
ntohl(msg.ticket.term),
ntohl(msg.ticket.term_valid_for));
tk->last_request = cmd;
if (expected_reply) {
expect_replies(tk, expected_reply);
}
return transport()->broadcast(&msg, sizeof(msg));
}
/* is it safe to commit the grant?
* if we didn't hear from all sites on the initial grant, we may
* need to delay the commit
*
* TODO: investigate possibility to devise from history whether a
* missing site could be holding a ticket or not
*/
static int ticket_dangerous(struct ticket_config *tk)
{
if (!tk->delay_commit)
return 0;
- if (tk->delay_commit <= time(NULL) ||
+ if (tk->delay_commit <= get_secs(NULL) ||
all_sites_replied(tk)) {
tk_log_debug("ticket delay commit expired");
tk->delay_commit = 0;
return 0;
} else {
tk_log_debug("delay ticket commit for %ds",
- (int)(tk->delay_commit - time(NULL)));
+ (int)(tk->delay_commit - get_secs(NULL)));
}
return 1;
}
/* update the ticket on the leader, write it to the CIB, and
send out the update message to others with the new expiry
time
*/
int leader_update_ticket(struct ticket_config *tk)
{
int rv = 0;
if (tk->ticket_updated >= 2)
return 0;
if (tk->ticket_updated < 1) {
tk->ticket_updated = 1;
- tk->term_expires = time(NULL) + tk->term_duration;
+ tk->term_expires = get_secs(NULL) + tk->term_duration;
rv = ticket_broadcast(tk, OP_UPDATE, OP_ACK, RLT_SUCCESS, 0);
}
if (tk->ticket_updated < 2) {
if (!ticket_dangerous(tk)) {
tk->ticket_updated = 2;
ticket_write(tk);
} else {
/* log just once, on the first retry */
if (tk->retry_number == 1)
tk_log_info("delaying ticket commit to CIB for %ds "
"(or all sites are reached)",
- (int)(tk->delay_commit - time(NULL)));
+ (int)(tk->delay_commit - get_secs(NULL)));
}
}
return rv;
}
static void log_lost_servers(struct ticket_config *tk)
{
struct booth_site *n;
int i;
if (tk->retry_number > 1)
/* log those that we couldn't reach, but do
* that only on the first retry
*/
return;
for (i = 0; i < booth_conf->site_count; i++) {
n = booth_conf->site + i;
if (!(tk->acks_received & n->bitmask)) {
tk_log_warn("%s %s didn't acknowledge our request, "
"will retry %d times",
(n->type == ARBITRATOR ? "arbitrator" : "site"),
site_string(n),
tk->retries);
}
}
}
static void resend_msg(struct ticket_config *tk)
{
struct booth_site *n;
int i;
if (!(tk->acks_received ^ local->bitmask)) {
ticket_broadcast(tk, tk->last_request, 0, RLT_SUCCESS, 0);
} else {
for (i = 0; i < booth_conf->site_count; i++) {
n = booth_conf->site + i;
if (!(tk->acks_received & n->bitmask)) {
tk_log_debug("resending %s to %s",
state_to_string(tk->last_request),
site_string(n)
);
send_msg(tk->last_request, tk, n, NULL);
}
}
}
}
static void handle_resends(struct ticket_config *tk)
{
int ack_cnt;
if (++tk->retry_number > tk->retries) {
tk_log_debug("giving up on sending retries");
no_resends(tk);
set_ticket_wakeup(tk);
return;
}
/* try to reach some sites again if we just stepped down */
if (tk->last_request == OP_VOTE_FOR) {
tk_log_warn("no answers to our request (try #%d), "
"we are alone",
tk->retry_number);
goto just_resend;
}
if (!majority_of_bits(tk, tk->acks_received)) {
ack_cnt = count_bits(tk->acks_received) - 1;
if (!ack_cnt) {
tk_log_warn("no answers to our request (try #%d), "
"we are alone",
tk->retry_number);
} else {
tk_log_warn("not enough answers to our request (try #%d): "
"only got %d answers",
tk->retry_number,
ack_cnt);
}
} else {
log_lost_servers(tk);
if (tk->last_request == OP_HEARTBEAT &&
is_owned(tk)) {
/* we have the majority, update the ticket, at
* least the local copy if we're still not
* allowed to commit
*/
leader_update_ticket(tk);
}
}
just_resend:
resend_msg(tk);
ticket_activate_timeout(tk);
}
int postpone_ticket_processing(struct ticket_config *tk)
{
extern time_t start_time;
return tk->start_postpone &&
- ((time(NULL) - start_time) < tk->timeout);
+ ((get_secs(NULL) - start_time) < tk->timeout);
}
static void process_next_state(struct ticket_config *tk)
{
switch(tk->next_state) {
case ST_LEADER:
reacquire_ticket(tk);
break;
case ST_INIT:
no_resends(tk);
start_revoke_ticket(tk);
break;
/* wanting to be follower is not much of an ambition; no
* processing, just return; don't reset start_postpone until
* we got some replies to status */
case ST_FOLLOWER:
return;
default:
break;
}
tk->start_postpone = 0;
}
static void ticket_lost(struct ticket_config *tk)
{
if (tk->leader != local) {
tk_log_warn("lost at %s", site_string(tk->leader));
} else {
tk_log_warn("lost majority (revoking locally)");
}
tk->lost_leader = tk->leader;
reset_ticket(tk);
tk->state = ST_FOLLOWER;
if (local->type == SITE) {
ticket_write(tk);
schedule_election(tk, OR_TKT_LOST);
}
}
static void next_action(struct ticket_config *tk)
{
switch(tk->state) {
case ST_INIT:
/* init state, handle resends for ticket revoke */
/* and rebroadcast if stepping down */
if (tk->acks_expected) {
handle_resends(tk);
}
break;
case ST_FOLLOWER:
/* leader/ticket lost? and we didn't vote yet */
tk_log_debug("leader: %s, voted_for: %s",
site_string(tk->leader),
site_string(tk->voted_for));
if (!tk->leader && !tk->voted_for) {
disown_ticket(tk);
if (!new_election(tk, NULL, 1, OR_AGAIN)) {
ticket_activate_timeout(tk);
}
}
break;
case ST_CANDIDATE:
/* elections timed out? */
elections_end(tk);
break;
case ST_LEADER:
/* timeout or ticket renewal? */
if (tk->acks_expected) {
handle_resends(tk);
} else {
/* this is ticket renewal, run local test */
if (!test_external_prog(tk, 1)) {
ticket_broadcast(tk, OP_HEARTBEAT, OP_ACK, RLT_SUCCESS, 0);
ticket_activate_timeout(tk);
}
}
break;
default:
break;
}
}
static void ticket_cron(struct ticket_config *tk)
{
time_t now;
/* don't process the tickets too early after start */
if (postpone_ticket_processing(tk)) {
tk_log_debug("ticket processing postponed (start_postpone=%d)",
tk->start_postpone);
/* but run again soon */
ticket_activate_timeout(tk);
return;
}
/* no need for status resends, we hope we got at least one
* my_index back */
if (tk->acks_expected == OP_MY_INDEX) {
no_resends(tk);
}
/* after startup, we need to decide what to do based on the
* current ticket state; tk->next_state has a hint
* also used for revokes which had to be delayed
*/
if (tk->next_state) {
process_next_state(tk);
goto out;
}
/* Has an owner, has an expiry date, and expiry date in the past?
* Losing the ticket must happen in _every_ state. */
- now = time(NULL);
+ now = get_secs(NULL);
if (!tk->in_election &&
tk->term_expires &&
is_owned(tk) &&
now >= tk->term_expires) {
ticket_lost(tk);
goto out;
}
next_action(tk);
out:
tk->next_state = 0;
if (!tk->in_election && tk->update_cib)
ticket_write(tk);
}
void process_tickets(void)
{
struct ticket_config *tk;
int i;
- struct timeval now, last_cron;
- float sec_until;
+ timetype now, last_cron;
- gettimeofday(&now, NULL);
+ get_time(&now);
foreach_ticket(i, tk) {
- sec_until = timeval_to_float(tk->next_cron) - timeval_to_float(now);
- if (0)
- tk_log_debug("next cron %" PRIx64 ".%03d, "
- "now %" PRIx64 "%03d, in %f",
- (uint64_t)tk->next_cron.tv_sec, timeval_msec(tk->next_cron),
- (uint64_t)now.tv_sec, timeval_msec(now),
- sec_until);
- if (sec_until > 0.0)
+ if (time_cmp(&tk->next_cron, &now, >))
continue;
tk_log_debug("ticket cron");
last_cron = tk->next_cron;
ticket_cron(tk);
- if (!timercmp(&last_cron, &tk->next_cron, !=)) {
+ if (!time_cmp(&last_cron, &tk->next_cron, !=)) {
tk_log_debug("nobody set ticket wakeup");
set_ticket_wakeup(tk);
}
}
}
void tickets_log_info(void)
{
struct ticket_config *tk;
int i;
+ time_t ts;
foreach_ticket(i, tk) {
+ ts = wall_ts(tk->term_expires);
tk_log_info("state '%s' "
"term %d "
"leader %s "
"expires %-24.24s",
state_to_string(tk->state),
tk->current_term,
ticket_leader_string(tk),
- ctime(&tk->term_expires));
+ ctime(&ts));
}
}
static void update_acks(
struct ticket_config *tk,
struct booth_site *sender,
struct booth_site *leader,
struct boothc_ticket_msg *msg
)
{
uint32_t cmd;
uint32_t req;
cmd = ntohl(msg->header.cmd);
req = ntohl(msg->header.request);
if (req != tk->last_request ||
(tk->acks_expected != cmd &&
tk->acks_expected != OP_REJECTED))
return;
/* got an ack! */
tk->acks_received |= sender->bitmask;
if (cmd == OP_HEARTBEAT)
tk_log_debug("got ACK from %s, %d/%d agree.",
site_string(sender),
count_bits(tk->acks_received),
booth_conf->site_count);
if (tk->delay_commit && all_sites_replied(tk)) {
tk->delay_commit = 0;
}
if (all_replied(tk) ||
/* we just stepped down, need only one site to start
* elections */
(cmd == OP_REQ_VOTE && tk->last_request == OP_VOTE_FOR)) {
no_resends(tk);
tk->start_postpone = 0;
set_ticket_wakeup(tk);
}
}
/* UDP message receiver. */
int message_recv(struct boothc_ticket_msg *msg, int msglen)
{
uint32_t from;
struct booth_site *source;
struct ticket_config *tk;
struct booth_site *leader;
uint32_t leader_u;
if (check_boothc_header(&msg->header, sizeof(*msg)) < 0 ||
msglen != sizeof(*msg)) {
log_error("message receive error");
return -1;
}
from = ntohl(msg->header.from);
if (!find_site_by_id(from, &source) || !source) {
log_error("unknown sender: %08x", from);
return -1;
}
if (!check_ticket(msg->ticket.id, &tk)) {
log_warn("got invalid ticket name %s from %s",
msg->ticket.id, site_string(source));
return -EINVAL;
}
leader_u = ntohl(msg->ticket.leader);
if (!find_site_by_id(leader_u, &leader)) {
tk_log_error("message with unknown leader %u received", leader_u);
return -EINVAL;
}
update_acks(tk, source, leader, msg);
return raft_answer(tk, source, leader, msg);
}
static void log_next_wakeup(struct ticket_config *tk)
{
- struct timeval now, res;
+ timetype now, res;
- gettimeofday(&now, NULL);
- timersub(&tk->next_cron, &now, &res);
- tk_log_debug("set ticket wakeup in %d.%06d",
- (int)res.tv_sec, (int)res.tv_usec);
+ get_time(&now);
+ time_sub(&tk->next_cron, &now, &res);
+ tk_log_debug("set ticket wakeup in %d.%03d",
+ (int)res.tv_sec, (int)msecs(res));
}
/* New vote round; §5.2 */
/* delay the next election start for up to 1s */
void add_random_delay(struct ticket_config *tk)
{
- struct timeval delay, tv;
+ timetype delay, tv;
- delay.tv_sec = 0;
- delay.tv_usec = cl_rand_from_interval(0, 1000000);
- timeradd(&tk->next_cron, &delay, &tv);
+ rand_time_ms(delay, 1000);
+ time_add(&tk->next_cron, &delay, &tv);
ticket_next_cron_at(tk, tv);
if (ANYDEBUG) {
log_next_wakeup(tk);
}
}
void set_ticket_wakeup(struct ticket_config *tk)
{
- struct timeval tv, now;
+ timetype tv, now, res;
/* At least every hour, perhaps sooner. */
ticket_next_cron_in(tk, 3600);
- gettimeofday(&now, NULL);
+ get_time(&now);
switch (tk->state) {
case ST_LEADER:
assert(tk->leader == local);
tv = now;
tv.tv_sec = next_vote_starts_at(tk);
- /* If timestamp is in the past, wakeup at the expiry
- * time. */
- if (timeval_compare(tv, now) <= 0) {
- tk_log_debug("next ts in the past (%f)",
- timeval_to_float(tv) - timeval_to_float(now));
- tv.tv_sec = tk->term_expires;
+ /* If timestamp is in the past, wakeup in
+ * one second. */
+ if (time_cmp(&tv, &now, <)) {
+ time_sub(&tv, &now, &res);
+ tk_log_debug("next ts in the past (%d.%03d)",
+ (int)res.tv_sec, (int)msecs(res));
+ tv.tv_sec = now.tv_sec + 1;
}
ticket_next_cron_at(tk, tv);
break;
case ST_CANDIDATE:
assert(tk->election_end);
ticket_next_cron_at_coarse(tk, tk->election_end);
break;
case ST_INIT:
case ST_FOLLOWER:
/* If there is (or should be) some owner, check on it later on.
* If no one is interested - don't care. */
if (is_owned(tk) &&
(local->type == SITE))
ticket_next_cron_at_coarse(tk,
tk->term_expires + tk->acquire_after);
break;
default:
tk_log_error("unknown ticket state: %d", tk->state);
}
if (tk->next_state) {
/* we need to do something soon here */
if (!tk->acks_expected) {
ticket_next_cron_at(tk, now);
} else {
ticket_activate_timeout(tk);
}
}
if (ANYDEBUG) {
log_next_wakeup(tk);
}
}
void schedule_election(struct ticket_config *tk, cmd_reason_t reason)
{
if (local->type != SITE)
return;
tk->election_reason = reason;
- gettimeofday(&tk->next_cron, NULL);
+ get_time(&tk->next_cron);
/* introduce a short delay before starting election */
add_random_delay(tk);
}
/* Given a state (in host byte order), return a human-readable (char*).
* An array is used so that multiple states can be printed in a single printf(). */
char *state_to_string(uint32_t state_ho)
{
union mu { cmd_request_t s; char c[5]; };
static union mu cache[6] = { { 0 } }, *cur;
static int current = 0;
current ++;
if (current >= sizeof(cache)/sizeof(cache[0]))
current = 0;
cur = cache + current;
cur->s = htonl(state_ho);
/* Shouldn't be necessary, union array is initialized with zeroes, and
* these bytes never get written. */
cur->c[4] = 0;
return cur->c;
}
int send_reject(struct booth_site *dest, struct ticket_config *tk,
cmd_result_t code, struct boothc_ticket_msg *in_msg)
{
int req = ntohl(in_msg->header.cmd);
struct boothc_ticket_msg msg;
tk_log_debug("sending reject to %s",
site_string(dest));
init_ticket_msg(&msg, OP_REJECTED, req, code, 0, tk);
return booth_udp_send(dest, &msg, sizeof(msg));
}
int send_msg (
int cmd,
struct ticket_config *current_tk,
struct booth_site *dest,
struct boothc_ticket_msg *in_msg
)
{
int req = 0;
struct ticket_config *tk = current_tk;
struct boothc_ticket_msg msg;
if (cmd == OP_MY_INDEX) {
if (current_tk->state == ST_CANDIDATE &&
current_tk->last_valid_tk->current_term) {
tk = current_tk->last_valid_tk;
}
tk_log_info("sending status to %s",
site_string(dest));
}
if (in_msg)
req = ntohl(in_msg->header.cmd);
init_ticket_msg(&msg, cmd, req, RLT_SUCCESS, 0, tk);
return booth_udp_send(dest, &msg, sizeof(msg));
}
diff --git a/src/ticket.h b/src/ticket.h
index f4c2770..24e796e 100644
--- a/src/ticket.h
+++ b/src/ticket.h
@@ -1,114 +1,114 @@
/*
* Copyright (C) 2011 Jiaju Zhang <jjzhang@suse.de>
* Copyright (C) 2013-2014 Philipp Marek <philipp.marek@linbit.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef _TICKET_H
#define _TICKET_H
#include <time.h>
#include <sys/time.h>
#include <math.h>
+#include "timer.h"
#include "config.h"
#include "log.h"
#define DEFAULT_TICKET_EXPIRY 600
#define DEFAULT_TICKET_TIMEOUT 5
#define DEFAULT_RETRIES 10
#define foreach_ticket(i_,t_) for(i=0; (t_=booth_conf->ticket+i, i<booth_conf->ticket_count); i++)
#define foreach_node(i_,n_) for(i=0; (n_=booth_conf->site+i, i<booth_conf->site_count); i++)
int check_ticket(char *ticket, struct ticket_config **tc);
int check_site(char *site, int *local);
int grant_ticket(struct ticket_config *ticket);
int revoke_ticket(struct ticket_config *ticket);
int list_ticket(char **pdata, unsigned int *len);
int message_recv(struct boothc_ticket_msg *msg, int msglen);
void reset_ticket(struct ticket_config *tk);
void update_ticket_state(struct ticket_config *tk, struct booth_site *sender);
int setup_ticket(void);
int check_max_len_valid(const char *s, int max);
int do_grant_ticket(struct ticket_config *ticket, int options);
int do_revoke_ticket(struct ticket_config *tk);
int find_ticket_by_name(const char *ticket, struct ticket_config **found);
void set_ticket_wakeup(struct ticket_config *tk);
int postpone_ticket_processing(struct ticket_config *tk);
int test_external_prog(struct ticket_config *tk, int start_election);
int acquire_ticket(struct ticket_config *tk, cmd_reason_t reason);
int ticket_answer_list(int fd, struct boothc_ticket_msg *msg);
int ticket_answer_grant(int fd, struct boothc_ticket_msg *msg);
int ticket_answer_revoke(int fd, struct boothc_ticket_msg *msg);
int ticket_broadcast_proposed_state(struct ticket_config *tk, cmd_request_t state);
int ticket_write(struct ticket_config *tk);
void process_tickets(void);
void tickets_log_info(void);
char *state_to_string(uint32_t state_ho);
int send_reject(struct booth_site *dest, struct ticket_config *tk,
cmd_result_t code, struct boothc_ticket_msg *in_msg);
int send_msg (int cmd, struct ticket_config *tk,
struct booth_site *dest, struct boothc_ticket_msg *in_msg);
int ticket_broadcast(struct ticket_config *tk, cmd_request_t cmd, cmd_request_t expected_reply, cmd_result_t res, cmd_reason_t reason);
int leader_update_ticket(struct ticket_config *tk);
void add_random_delay(struct ticket_config *tk);
void schedule_election(struct ticket_config *tk, cmd_reason_t reason);
-static inline void ticket_next_cron_at(struct ticket_config *tk, struct timeval when)
+static inline void ticket_next_cron_at(struct ticket_config *tk, timetype when)
{
tk->next_cron = when;
}
static inline void ticket_next_cron_at_coarse(struct ticket_config *tk, time_t when)
{
+ memset(&tk->next_cron, 0, sizeof(tk->next_cron));
tk->next_cron.tv_sec = when;
- tk->next_cron.tv_usec = 0;
}
-static inline void ticket_next_cron_in(struct ticket_config *tk, float seconds)
+static inline void ticket_next_cron_in(struct ticket_config *tk, time_t seconds)
{
- struct timeval tv;
+ timetype tv;
- gettimeofday(&tv, NULL);
- tv.tv_sec += trunc(seconds);
- tv.tv_usec += (seconds - trunc(seconds)) * 1e6;
+ get_time(&tv);
+ tv.tv_sec += seconds;
ticket_next_cron_at(tk, tv);
}
static inline void ticket_activate_timeout(struct ticket_config *tk)
{
/* TODO: increase timeout when no answers */
tk_log_debug("activate ticket timeout in %d", tk->timeout);
ticket_next_cron_in(tk, tk->timeout);
}
#endif /* _TICKET_H */
diff --git a/src/timer.c b/src/timer.c
new file mode 100644
index 0000000..b550fdd
--- /dev/null
+++ b/src/timer.c
@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) 2014 Dejan Muhamedagic <dejan@suse.de>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "timer.h"
+
+void time_sub(struct timespec *a, struct timespec *b, struct timespec *res)
+{
+ if (a->tv_nsec < b->tv_nsec) {
+ res->tv_sec = a->tv_sec - b->tv_sec - 1;
+ res->tv_nsec = a->tv_nsec + (1000000000 - b->tv_nsec);
+ } else {
+ res->tv_sec = a->tv_sec - b->tv_sec;
+ res->tv_nsec = a->tv_nsec - b->tv_nsec;
+ }
+}
+
+
+void time_add(struct timespec *a, struct timespec *b, struct timespec *res)
+{
+ res->tv_nsec = (a->tv_nsec + b->tv_nsec) % 1000000000;
+ res->tv_sec = a->tv_sec + b->tv_sec + ((a->tv_nsec + b->tv_nsec) / 1000000000);
+}
+
+time_t get_secs(time_t *p)
+{
+ struct timespec tv;
+ time_t secs;
+
+ get_time(&tv);
+ secs = tv.tv_sec;
+ if (p)
+ *p = secs;
+ return secs;
+}
+
+/* time booth_clk_t is a time since boot or similar, return
+ * something humans can understand */
+time_t wall_ts(time_t booth_clk_t)
+{
+ struct timespec booth_clk_now, now_tv, res;
+ struct timeval now;
+
+ get_time(&booth_clk_now);
+ gettimeofday(&now, NULL);
+ TIMEVAL_TO_TIMESPEC(&now, &now_tv);
+ time_sub(&now_tv, &booth_clk_now, &res);
+ return booth_clk_t + res.tv_sec;
+}
+
+/* time t is wall clock time, convert to time compatible
+ * with our clock_gettime clock */
+time_t unwall_ts(time_t t)
+{
+ struct timespec booth_clk_now, now_tv, res;
+ struct timeval now;
+
+ get_time(&booth_clk_now);
+ gettimeofday(&now, NULL);
+ TIMEVAL_TO_TIMESPEC(&now, &now_tv);
+ time_sub(&now_tv, &booth_clk_now, &res);
+ return t - res.tv_sec;
+}
diff --git a/src/timer.h b/src/timer.h
new file mode 100644
index 0000000..05c7da8
--- /dev/null
+++ b/src/timer.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (C) 2014 Dejan Muhamedagic <dejan@suse.de>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _TIMER_H
+#define _TIMER_H
+
+#include <time.h>
+#include <unistd.h>
+#include <sys/time.h>
+
+#if _POSIX_TIMERS > 0
+
+#if defined(CLOCK_MONOTONIC)
+# define BOOTH_CLOCK CLOCK_MONOTONIC
+#else
+# define BOOTH_CLOCK CLOCK_REALTIME
+#endif
+
+typedef struct timespec timetype;
+
+#define get_time(p) clock_gettime(BOOTH_CLOCK, p)
+
+#define time_cmp(a, b, CMP) \
+ (((a)->tv_sec == (b)->tv_sec) ? \
+ ((a)->tv_nsec CMP (b)->tv_nsec) : \
+ ((a)->tv_sec CMP (b)->tv_sec))
+
+void time_sub(struct timespec *a, struct timespec *b, struct timespec *res);
+void time_add(struct timespec *a, struct timespec *b, struct timespec *res);
+time_t get_secs(time_t *p);
+time_t wall_ts(time_t t);
+time_t unwall_ts(time_t t);
+
+#define msecs(tv) ((tv).tv_nsec/1000000)
+
+/* random time from 0 to t milliseconds */
+#define rand_time_ms(tv, t) do { \
+ tv.tv_sec = 0; \
+ tv.tv_nsec = t * cl_rand_from_interval(0, 1000000); \
+ } while(0)
+
+#else
+
+typedef struct timeval timetype;
+#define get_time(p) gettimeofday(p, NULL)
+#define time_sub timersub
+#define time_add timeradd
+#define time_cmp timercmp
+#define get_secs time
+
+#define msecs(tv) ((tv).tv_usec/1000)
+
+/* random time from 0 to t milliseconds */
+#define rand_time_ms(tv, t) do { \
+ tv.tv_sec = 0; \
+ tv.tv_usec = t * cl_rand_from_interval(0, 1000); \
+ } while(0)
+
+#define wall_ts(t) (t)
+#define unwall_ts(t) (t)
+
+#endif
+
+#endif
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Sat, Jan 25, 11:29 AM (1 d, 12 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1320292
Default Alt Text
(124 KB)
Attached To
Mode
rB Booth
Attached
Detach File
Event Timeline
Log In to Comment