diff --git a/Makefile.am b/Makefile.am index acd4c1e..883d2f5 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1,251 +1,251 @@ # Copyright (c) 2009 Red Hat, Inc. # # Authors: Andrew Beekhof # Steven Dake (sdake@redhat.com) # # This software licensed under BSD license, the text of which follows: # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # - Redistributions of source code must retain the above copyright notice, # this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # - Neither the name of the MontaVista Software, Inc. nor the names of its # contributors may be used to endorse or promote products derived from this # software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF # THE POSSIBILITY OF SUCH DAMAGE. SPEC = $(PACKAGE_NAME).spec TARFILE = $(PACKAGE_NAME)-$(VERSION).tar.gz EXTRA_DIST = autogen.sh conf/booth.conf.example \ - script/booth-keygen script/lsb script/ocf script/service-runnable.in \ + script/booth-keygen script/lsb script/ocf script/service-runnable.in script/crmv1.in \ script/unit-test.py.in script/wireshark-dissector.lua \ test/arbtests.py test/assertions.py test/booth_path test/boothrunner.py \ test/boothtestenv.py.in test/clientenv.py test/clienttests.py test/live_test.sh \ test/runtests.py.in test/serverenv.py test/servertests.py test/sitetests.py \ test/utils.py \ unit-tests \ contrib \ icons \ $(SPEC).in booth-rpmlintrc \ .version build-aux/git-version-gen build-aux/PKG_CHECK_VAR.m4 \ build-aux/gitlog-to-changelog build-aux/release.mk AUTOMAKE_OPTIONS = foreign MAINTAINERCLEANFILES = Makefile.in aclocal.m4 configure depcomp \ config.guess config.sub missing install-sh \ autoheader automake autoconf test_lense.sh \ compile # Don't try to install files outside build directory for "make distcheck". AM_DISTCHECK_CONFIGURE_FLAGS = --with-ocfdir="$$dc_install_base/lib/ocf" dist_doc_DATA = AUTHORS README COPYING README.upgrade-from-v0.1 README-testing boothconfdir = ${BOOTHSYSCONFDIR} boothconf_DATA = conf/booth.conf.example boothsitedir = $(ocfdir)/resource.d/pacemaker boothsite_SCRIPTS = script/ocf/booth-site boothocfdir = $(ocfdir)/resource.d/booth boothocf_SCRIPTS = script/ocf/sharedrsc script/ocf/geostore boothocflibdir = $(ocfdir)/lib/booth boothocflib_DATA = script/ocf/geo_attr.sh bootharbitratordir = ${INITDDIR} bootharbitrator_SCRIPTS = script/lsb/booth-arbitrator boothnoarchdir = $(datadir)/$(PACKAGE_NAME) nodist_boothnoarch_SCRIPTS = script/service-runnable sbin_SCRIPTS = script/booth-keygen pkgconfigdir = $(datadir)/pkgconfig pkgconfig_DATA = booth.pc TESTS = test/runtests.py SUBDIRS = src docs conf coverity: cov-build --dir=cov make cov-analyze --dir cov --concurrency --wait-for-license cov-format-errors --dir cov install-exec-local: $(INSTALL) -d $(DESTDIR)/${boothconfdir} $(INSTALL) -d $(DESTDIR)/${bootharbitratordir} $(INSTALL) -d $(DESTDIR)/${boothsitedir} $(INSTALL) -d $(DESTDIR)/${boothocfdir} $(INSTALL) -d $(DESTDIR)/${BOOTH_LIB_DIR} -m 750 -chown $(CRM_DAEMON_USER):$(CRM_DAEMON_GROUP) $(DESTDIR)/${BOOTH_LIB_DIR} $(INSTALL) -d $(DESTDIR)/${BOOTH_CORE_DIR} -m 750 -chown $(CRM_DAEMON_USER):$(CRM_DAEMON_GROUP) $(DESTDIR)/${BOOTH_CORE_DIR} install-exec-hook: ln -sf ${sbindir}/boothd $(DESTDIR)/${sbindir}/booth ln -sf ${sbindir}/boothd $(DESTDIR)/${sbindir}/geostore uninstall-local: rmdir $(DESTDIR)/${boothconfdir} || :; rmdir $(DESTDIR)/${bootharbitratordir} || :; rmdir $(DESTDIR)/${boothsitedir} || :; rmdir $(DESTDIR)/${BOOTH_CORE_DIR} || :; rmdir $(DESTDIR)/${BOOTH_LIB_DIR} || :; test: check lint: for dir in src; do make -C $$dir lint; done clean-local: rm -rf test/*.pyc test/__pycache__ test/runtests.py test/boothtestenv.py cov* $(SPEC) dist-clean-local: rm -f autoconf automake autoheader # release/versioning BUILT_SOURCES = .version .version: echo $(VERSION) > $@-t && mv $@-t $@ dist-hook: gen-ChangeLog echo $(VERSION) > $(distdir)/.tarball-version test/runtests.py: test/runtests.py.in test/boothtestenv.py rm -f $@-t $@ mkdir -p ${abs_top_builddir}/test sed \ -e 's#PYTHON_SHEBANG#${PYTHON_SHEBANG}#g' \ -e 's#TEST_SRC_DIR#${abs_top_srcdir}/test#g' \ -e 's#TEST_BUILD_DIR#${abs_top_builddir}/test#g' \ $< > $@-t; chmod a-w $@-t chmod u+x $@-t mv $@-t $@ test/boothtestenv.py: test/boothtestenv.py.in rm -f $@-t $@ mkdir -p ${abs_top_builddir}/test sed \ -e 's#TEST_SRC_DIR#${abs_top_srcdir}/test#g' \ -e 's#TEST_BUILD_DIR#${abs_top_builddir}/test#g' \ $< > $@-t; chmod a-w $@-t mv $@-t $@ ## make rpm/srpm section. $(abs_builddir)/booth-rpmlintrc: cat $(abs_srcdir)/booth-rpmlintrc > booth-rpmlintrc $(SPEC): $(SPEC).in .version config.status $(abs_builddir)/booth-rpmlintrc rm -f $@-t $@ date="$(shell LC_ALL=C date "+%a %b %d %Y")" && \ gvgver="`cd $(abs_srcdir); build-aux/git-version-gen --fallback $(VERSION) .tarball-version .gitarchivever`" && \ if [ "$$gvgver" = "`echo $$gvgver | sed 's/-/./'`" ];then \ rpmver="$$gvgver" && \ alphatag="" && \ dirty="" && \ numcomm="0"; \ else \ gitver="`echo $$gvgver | sed 's/\(.*\)\./\1-/'`" && \ rpmver=`echo $$gitver | sed 's/-.*//g'` && \ alphatag=`echo $$gvgver | sed 's/[^-]*-\([^-]*\).*/\1/'` && \ numcomm=`echo $$gitver | sed 's/[^-]*-\([^-]*\).*/\1/'` && \ dirty="" && \ if [ "`echo $$gitver | sed 's/^.*-dirty$$//g'`" = "" ];then \ dirty="dirty"; \ fi \ fi && \ if [ "$$numcomm" = "0" ]; then numcomm=""; fi && \ if [ -n "$$numcomm" ]; then numcomm="%global numcomm $$numcomm"; fi && \ if [ "$$alphatag" = "$$gitver" ]; then alphatag=""; fi && \ if [ -n "$$alphatag" ]; then alphatag="%global alphatag $$alphatag"; fi && \ if [ -n "$$dirty" ]; then dirty="%global dirty dirty"; fi && \ sed \ -e "s#@version@#$$rpmver#g" \ -e "s#@ALPHATAG@#$$alphatag#g" \ -e "s#@NUMCOMM@#$$numcomm#g" \ -e "s#@DIRTY@#$$dirty#g" \ -e "s#@date@#$$date#g" \ $(abs_srcdir)/$@.in > $@-t; sed -i -e "s#@uname@#$(CRM_DAEMON_USER)#g" $@-t sed -i -e "s#@gname@#$(CRM_DAEMON_GROUP)#g" $@-t if BUILD_ASCIIDOC_HTML_MAN sed -i -e "s#@bcond_html_man@#bcond_without#g" $@-t else sed -i -e "s#@bcond_html_man@#bcond_with#g" $@-t endif if IS_ASCIIDOC sed -i -e "s#@asciidoc@#asciidoc#g" $@-t else sed -i -e "s#@asciidoc@#asciidoctor#g" $@-t endif if LOGGING_LIBQB sed -i -e "s#@bcond_glue@#bcond_with#g" $@-t else sed -i -e "s#@bcond_glue@#bcond_without#g" $@-t endif if PYTHON_IS_VERSION3 sed -i -e "s#@bcond_python3@#bcond_without#g" $@-t else sed -i -e "s#@bcond_python3@#bcond_with#g" $@-t endif if RUN_BUILD_TESTS sed -i -e "s#@bcond_run_build_tests@#bcond_without#g" $@-t else sed -i -e "s#@bcond_run_build_tests@#bcond_with#g" $@-t endif chmod a-w $@-t mv $@-t $@ rm -f $@-t* $(TARFILE): $(MAKE) dist RPMBUILDOPTS = --define "_sourcedir $(abs_builddir)" \ --define "_specdir $(abs_builddir)" \ --define "_builddir $(abs_builddir)" \ --define "_srcrpmdir $(abs_builddir)" \ --define "_rpmdir $(abs_builddir)" srpm: clean $(MAKE) $(SPEC) $(TARFILE) rpmbuild $(RPMBUILDOPTS) --nodeps -bs $(SPEC) rpm: clean $(MAKE) $(SPEC) $(TARFILE) rpmbuild $(RPMBUILDOPTS) -ba $(SPEC) gen_start_date = 2000-01-01 .PHONY: gen-ChangeLog gen-ChangeLog: if test -d $(abs_srcdir)/.git; then \ LC_ALL=C $(top_srcdir)/build-aux/gitlog-to-changelog \ --since=$(gen_start_date) > $(distdir)/cl-t; \ rm -f $(distdir)/ChangeLog; \ mv $(distdir)/cl-t $(distdir)/ChangeLog; \ fi diff --git a/README.crmv1 b/README.crmv1 new file mode 100644 index 0000000..b2708a2 --- /dev/null +++ b/README.crmv1 @@ -0,0 +1,49 @@ +CRMv1 cluster +============= + +Heartbeat is a predecessor to Pacemaker and here we make a +comeback to that kind of clustering. Why should we do that? +Firstly, Pacemaker became a behemoth, something that can brew +your coffee, but also something that is rather unwieldy and +difficult to manage. Secondly, booth is a very reliable +distributed engine and in our testing it was used also in a +typical LAN and passed all the tests with flying colours. So, +this is something for people who don't need all the bells and +whistles of Pacemaker, but still want to have HA. + +STONITH is missing, but the cluster must have at least three +members. Hence, the booth arbitrator serves as a fencing +replacement. This is as it should be: a two node cluster is +indeed very difficult to run. The booth arbitrator can be a +smallish instance running wherever in your network. As with +fencing, it doesn't even have to be particularly reliable, it +just have to be there when we need it. + +Setup +----- + +Just like with heartbeat, CRMv1 in booth is very simple to setup. +There is a helper program called `crmv1` which is going to handle +all the details. In the simplest setup, which is anyway the most +common, there is just one group. The resources are run in order, +there is no parallelism. + +Here the usage with one realistic example: + + Usage: crmv1 {group ...|group delete } + + Examples: + + crmv1 group bigdb \ + IPaddr ip=192.168.1.1 \ + ocf:linbit:drbd drbd_resource=bigdisk \ + Filesystem device=/dev/bigdisk directory=/bigdisk fstype=xfs \ + oracle sid=bigdb + + crmv1 group delete bigdb + +There is no monitoring of resources, but it is easy to run an +external monitor of the topmost resource, i.e. the service which +is actually used by the users. If that monitor fails, then it +makes sense to move the group to the other node. + diff --git a/conf/booth.conf.example b/conf/booth.conf.example index 1d63547..440046a 100644 --- a/conf/booth.conf.example +++ b/conf/booth.conf.example @@ -1,27 +1,31 @@ # The booth configuration file is "/etc/booth/booth.conf". You need to # prepare the same booth configuration file on each arbitrator and # each node in the cluster sites where the booth daemon can be launched. # Here is an example of the configuration file: # "transport" means which transport layer booth daemon will use. # Currently only "UDP" is supported. transport="UDP" # The port that booth daemons will use to talk to each other. port="9929" # The arbitrator IP. If you want to configure several arbitrators, # you need to configure each arbitrator with a separate line. arbitrator="147.2.207.14" # The site IP. The cluster site uses this IP to talk to other sites. # Like arbitrator, you need to configure each site with a separate line. site="147.4.215.19" site="147.18.2.1" # The ticket name, which corresponds to a set of resources which can be # fail-overed among different sites. ticket="ticketA" ticket="ticketB" expire = 600 weights = 1,2,3 + +# Use the CRMv1 feature, i.e. make the booth a cluster in its own +# right (run resource, etc) +crmv1 diff --git a/conf/crmv1.conf.example b/conf/crmv1.conf.example new file mode 100644 index 0000000..e108265 --- /dev/null +++ b/conf/crmv1.conf.example @@ -0,0 +1,15 @@ +# The crmv1 configuration file is "/etc/booth/crmv1/conf". You need to +# prepare the same configuration file on each arbitrator and +# each node in the cluster sites where the booth daemon can be launched. + +# The configuration consists of groups definition with parameters for resources +# It is recommended to use the crmv1 program to prepare this +# configuration file. +# Here is one example: + +group bigdb \ + IPaddr ip=192.168.1.1 \ + ocf:linbit:drbd drbd_resource=bigdisk \ + Filesystem device=/dev/bigdisk directory=/bigdisk fstype=xfs \ + oracle sid=bigdb + diff --git a/script/crmv1 b/script/crmv1 new file mode 100755 index 0000000..b9b0416 --- /dev/null +++ b/script/crmv1 @@ -0,0 +1,116 @@ +#!/bin/bash +# +# This is crmv1, a tool to configure booth as a crmv1 style +# cluster. +# It basically manages groups. There is no concept of a group in +# booth, but we can get by by using the before-acquire-handler. +# Essentially, the handler is used to run programs (resource +# agents). Just how the resource agents are configured is another +# matter. +# + +CONF_DIR=/etc/booth + +cnt=0 + +usage() { + cat<&2 + +Usage: $0 {group ...|group delete } + +Examples: + + crmv1 group bigdb \\ + IPaddr ip=192.168.1.1 \\ + ocf:linbit:drbd drbd_resource=bigdisk \\ + Filesystem device=/dev/bigdisk directory=/bigdisk fstype=xfs \\ + oracle sid=bigdb + + crmv1 group delete bigdb + +EOF + exit $1 +} +fatal() { + cat<&2 + +FATAL: $* + +EOF + exit 1 +} + +add_group() { + mkdir -p $CONF_DIR/crmv1/$2 + echo "$@" >> $CONF_DIR/crmv1/conf +} + +del_group() { + rm -rf $CONF_DIR/crmv1/$1 + sed -i "/group $1/d" $CONF_DIR/crmv1/conf +} + +get_ra() { + local ra + ra=$1 + set `echo $ra | sed 's/:/ /g'` + if [ $# -eq 1 ]; then + dir=/usr/lib/ocf/resource.d/heartbeat + else + # 1:2:3 + dir=/usr/lib/ocf/resource.d/$1/$2 + ra=$3 + fi + if [ -f $dir/$ra ]; then + echo $dir/$ra + else + fatal "no resource agent $1, did you install resource-agents?" + fi +} + +mk_link() { + ln -fs $2 $CONF_DIR/crmv1/$1/`printf '%02d' $3`_`basename $2` +} +ln_ra() { + ra_f=`get_ra $2` + mk_link $1 $ra_f $cnt + cnt=$((cnt+1)) +} + +# this is not really creating a group, we just parse the input to +# make sure that the group is well defined; the group is then +# created by boothd on starting; consider this a document on how +# creating a group should be implemented +new_group() { + group=$2 + shift 2 + for p; do + save_ra=$p + if echo $p | grep -qs '='; then + args="$args $p" + else + if [ "$save_ra" ]; then + ln_ra $group $save_ra + save_ra='' + continue + fi + fi + ln_ra $group $p + done + add_group group $group $@ +} + +if [ $# -lt 3 ]; then + usage 1 +fi +if [ $1 != group ]; then + usage 1 +fi +if [ $2 != delete ]; then + if grep -qs "^group $2" $CONF_DIR/crmv1/conf; then + fatal "group $2 already exists" + fi + new_group $@ +else + del_group $3 +fi diff --git a/src/booth.h b/src/booth.h index 0cd43c0..65824d3 100644 --- a/src/booth.h +++ b/src/booth.h @@ -1,386 +1,384 @@ /* * Copyright (C) 2011 Jiaju Zhang * Copyright (C) 2013-2014 Philipp Marek * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ #ifndef _BOOTH_H #define _BOOTH_H #include #include #include #include #include #include #include #include "timer.h" #define BOOTH_RUN_DIR "/var/run/booth/" #define BOOTH_LOG_DIR "/var/log" #define BOOTH_LOGFILE_NAME "booth.log" #define BOOTH_DEFAULT_CONF_DIR "/etc/booth/" #define BOOTH_DEFAULT_CONF_NAME "booth" #define BOOTH_DEFAULT_CONF_EXT ".conf" #define BOOTH_DEFAULT_CONF \ BOOTH_DEFAULT_CONF_DIR BOOTH_DEFAULT_CONF_NAME BOOTH_DEFAULT_CONF_EXT +#define BOOTH_DEFAULT_CRMV1_CONF BOOTH_DEFAULT_CONF_DIR "crmv1/conf" #define DAEMON_NAME "boothd" #define BOOTH_PATH_LEN PATH_MAX #define BOOTH_MAX_KEY_LEN 64 #define BOOTH_MIN_KEY_LEN 8 /* hash size is 160 bits (sha1), but add a bit more space in case * stronger hashes are required */ #define BOOTH_MAC_SIZE 24 /* tolerate packets which are not older than 10 minutes */ #define BOOTH_DEFAULT_MAX_TIME_SKEW 600 #define BOOTH_DEFAULT_PORT 9929 #define BOOTHC_MAGIC 0x5F1BA08C #define BOOTHC_VERSION 0x00010003 /** Timeout value for poll(). * Determines frequency of periodic jobs, eg. when send-retries are done. * See process_tickets(). */ #define POLL_TIMEOUT 100 /** @{ */ /** The on-network data structures and constants. */ #define BOOTH_NAME_LEN 64 #define BOOTH_ATTRVAL_LEN 128 #define CHAR2CONST(a,b,c,d) ((a << 24) | (b << 16) | (c << 8) | d) /* Says that the ticket shouldn't be active anywhere. * NONE wouldn't be specific enough. */ #define NO_ONE ((uint32_t)-1) /* Says that another one should recover. */ #define TICKET_LOST CHAR2CONST('L', 'O', 'S', 'T') typedef char boothc_site[BOOTH_NAME_LEN]; typedef char boothc_ticket[BOOTH_NAME_LEN]; typedef char boothc_attr[BOOTH_NAME_LEN]; typedef char boothc_attr_value[BOOTH_ATTRVAL_LEN]; /* message option bits */ enum { BOOTH_OPT_AUTH = 1, /* authentication */ BOOTH_OPT_ATTR = 4, /* attr message type, otherwise ticket */ }; struct boothc_header { /** Various options, message type, authentication */ uint32_t opts; /** Generation info (used for authentication) * This is something that would need to be monotone * incremental. CLOCK_MONOTONIC should fit the purpose. On * failover, however, it may happen that the new host has a * clock which is significantly behind the clock of old host. * We'll need to relax a bit for the nodes which are starting * (just accept all OP_STATUS). */ uint32_t secs; /* seconds */ uint32_t usecs; /* microseconds */ /** BOOTHC_MAGIC */ uint32_t magic; /** BOOTHC_VERSION */ uint32_t version; /** Packet source; site_id. See add_site(). */ uint32_t from; /** Length including header */ uint32_t length; /** The command respectively protocol state. See cmd_request_t. */ uint32_t cmd; /** The matching request (what do we reply to). See cmd_request_t. */ uint32_t request; /** Command options. */ uint32_t options; /** The reason for this RPC. */ uint32_t reason; /** Result of operation. 0 == OK */ uint32_t result; char data[0]; } __attribute__((packed)); struct ticket_msg { /** Ticket name. */ boothc_ticket id; /** Current leader. May be NO_ONE. See add_site(). * For a OP_REQ_VOTE this is */ uint32_t leader; /** Current term. */ uint32_t term; uint32_t term_valid_for; /* Perhaps we need to send a status along, too - like * starting, running, stopping, error, ...? */ } __attribute__((packed)); struct attr_msg { /** Ticket name. */ boothc_ticket tkt_id; /** Attribute name. */ boothc_attr name; /** The value. */ boothc_attr_value val; } __attribute__((packed)); /* GEO attributes * attributes should be regularly updated. */ struct geo_attr { /** Update timestamp. */ timetype update_ts; /** The value. */ char *val; /** Who set it (currently unused) struct booth_site *origin; */ }; struct hmac { /** hash id, currently set to constant BOOTH_HASH */ uint32_t hid; /** the calculated hash, BOOTH_MAC_SIZE is big enough to * accommodate the hash of type hid */ unsigned char hash[BOOTH_MAC_SIZE]; } __attribute__((packed)); struct boothc_hdr_msg { struct boothc_header header; struct hmac hmac; } __attribute__((packed)); struct boothc_ticket_msg { struct boothc_header header; struct ticket_msg ticket; struct hmac hmac; } __attribute__((packed)); struct boothc_attr_msg { struct boothc_header header; struct attr_msg attr; struct hmac hmac; } __attribute__((packed)); typedef enum { /* 0x43 = "C"ommands */ CMD_LIST = CHAR2CONST('C', 'L', 's', 't'), CMD_GRANT = CHAR2CONST('C', 'G', 'n', 't'), CMD_REVOKE = CHAR2CONST('C', 'R', 'v', 'k'), CMD_PEERS = CHAR2CONST('P', 'e', 'e', 'r'), /* Replies */ CL_RESULT = CHAR2CONST('R', 's', 'l', 't'), CL_LIST = CHAR2CONST('R', 'L', 's', 't'), CL_GRANT = CHAR2CONST('R', 'G', 'n', 't'), CL_REVOKE = CHAR2CONST('R', 'R', 'v', 'k'), /* get status from another server */ OP_STATUS = CHAR2CONST('S', 't', 'a', 't'), OP_MY_INDEX = CHAR2CONST('M', 'I', 'd', 'x'), /* reply to status */ /* Raft */ OP_REQ_VOTE = CHAR2CONST('R', 'V', 'o', 't'), /* start election */ OP_VOTE_FOR = CHAR2CONST('V', 't', 'F', 'r'), /* reply to REQ_VOTE */ OP_HEARTBEAT= CHAR2CONST('H', 'r', 't', 'B'), /* Heartbeat */ OP_ACK = CHAR2CONST('A', 'c', 'k', '.'), /* Ack for heartbeats and revokes */ OP_UPDATE = CHAR2CONST('U', 'p', 'd', 'E'), /* Update ticket */ OP_REVOKE = CHAR2CONST('R', 'e', 'v', 'k'), /* Revoke ticket */ OP_REJECTED = CHAR2CONST('R', 'J', 'C', '!'), /* Attributes */ ATTR_SET = CHAR2CONST('A', 'S', 'e', 't'), ATTR_GET = CHAR2CONST('A', 'G', 'e', 't'), ATTR_DEL = CHAR2CONST('A', 'D', 'e', 'l'), ATTR_LIST = CHAR2CONST('A', 'L', 's', 't'), } cmd_request_t; typedef enum { /* for compatibility with other functions */ RLT_SUCCESS = 0, RLT_ASYNC = CHAR2CONST('A', 's', 'y', 'n'), RLT_MORE = CHAR2CONST('M', 'o', 'r', 'e'), RLT_SYNC_SUCC = CHAR2CONST('S', 'c', 'c', 's'), RLT_SYNC_FAIL = CHAR2CONST('F', 'a', 'i', 'l'), RLT_INVALID_ARG = CHAR2CONST('I', 'A', 'r', 'g'), RLT_NO_SUCH_ATTR = CHAR2CONST('N', 'A', 't', 'r'), RLT_CIB_PENDING = CHAR2CONST('P', 'e', 'n', 'd'), RLT_EXT_FAILED = CHAR2CONST('X', 'P', 'r', 'g'), RLT_ATTR_PREREQ = CHAR2CONST('A', 'P', 'r', 'q'), RLT_TICKET_IDLE = CHAR2CONST('T', 'i', 'd', 'l'), RLT_OVERGRANT = CHAR2CONST('O', 'v', 'e', 'r'), RLT_PROBABLY_SUCCESS = CHAR2CONST('S', 'u', 'c', '?'), RLT_BUSY = CHAR2CONST('B', 'u', 's', 'y'), RLT_AUTH = CHAR2CONST('A', 'u', 't', 'h'), RLT_TERM_OUTDATED = CHAR2CONST('T', 'O', 'd', 't'), RLT_TERM_STILL_VALID = CHAR2CONST('T', 'V', 'l', 'd'), RLT_YOU_OUTDATED = CHAR2CONST('O', 'u', 't', 'd'), RLT_REDIRECT = CHAR2CONST('R', 'e', 'd', 'r'), } cmd_result_t; typedef enum { /* for compatibility with other functions */ OR_JUST_SO = 0, OR_AGAIN = CHAR2CONST('A', 'a', 'a', 'a'), OR_TKT_LOST = CHAR2CONST('T', 'L', 's', 't'), OR_REACQUIRE = CHAR2CONST('R', 'a', 'c', 'q'), OR_ADMIN = CHAR2CONST('A', 'd', 'm', 'n'), OR_LOCAL_FAIL = CHAR2CONST('L', 'o', 'c', 'F'), OR_STEPDOWN = CHAR2CONST('S', 'p', 'd', 'n'), OR_SPLIT = CHAR2CONST('S', 'p', 'l', 't'), } cmd_reason_t; /* bitwise command options */ typedef enum { OPT_IMMEDIATE = 1, /* immediate grant */ OPT_WAIT = 2, /* wait for the elections' outcome */ OPT_WAIT_COMMIT = 4, /* wait for the ticket commit to CIB */ } cmd_options_t; /** @} */ /** @{ */ struct booth_site { /** Calculated ID. See add_site(). */ int site_id; int type; int local; /** Roles, like ACCEPTOR, PROPOSER, or LEARNER. Not really used ATM. */ int role; boothc_site addr_string; int tcp_fd; int udp_fd; /* 0-based, used for indexing into per-ticket weights. * -1 for no_leader. */ int index; uint64_t bitmask; unsigned short family; union { struct sockaddr_in sa4; struct sockaddr_in6 sa6; }; int saddrlen; int addrlen; /** statistics */ time_t last_recv; unsigned int sent_cnt; unsigned int sent_err_cnt; unsigned int resend_cnt; unsigned int recv_cnt; unsigned int recv_err_cnt; unsigned int sec_cnt; unsigned int invalid_cnt; /** last timestamp seen from this site */ uint32_t last_secs; uint32_t last_usecs; }; extern struct booth_site *local; extern struct booth_site *const no_leader; /** @} */ struct booth_transport; struct client { int fd; const struct booth_transport *transport; struct boothc_ticket_msg *msg; int offset; /* bytes read so far into msg */ void (*workfn)(int); void (*deadfn)(int); }; extern struct client *clients; extern struct pollfd *pollfds; int client_add(int fd, const struct booth_transport *tpt, void (*workfn)(int ci), void (*deadfn)(int ci)); int find_client_by_fd(int fd); void safe_copy(char *dest, char *value, size_t buflen, const char *description); int update_authkey(void); void list_peers(int fd); struct command_line { int type; /* ACT_ */ int op; /* OP_ */ int options; /* OPT_ */ char configfile[BOOTH_PATH_LEN]; char lockfile[BOOTH_PATH_LEN]; char site[BOOTH_NAME_LEN]; struct boothc_ticket_msg msg; struct boothc_attr_msg attr_msg; }; extern struct command_line cl; /* http://gcc.gnu.org/onlinedocs/gcc/Typeof.html */ #define min(a__,b__) \ ({ typeof (a__) _a = (a__); \ typeof (b__) _b = (b__); \ _a < _b ? _a : _b; }) #define max(a__,b__) \ ({ typeof (a__) _a = (a__); \ typeof (b__) _b = (b__); \ _a > _b ? _a : _b; }) - - - #endif /* _BOOTH_H */ diff --git a/src/config.c b/src/config.c index f0ca4aa..12d2a2f 100644 --- a/src/config.c +++ b/src/config.c @@ -1,1062 +1,1240 @@ /* * Copyright (C) 2011 Jiaju Zhang * Copyright (C) 2013-2014 Philipp Marek * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ #include "b_config.h" #include #include #include #include #include #include #include #include #include #include #include #include "booth.h" #include "config.h" #include "raft.h" #include "ticket.h" #include "log.h" static int ticket_size = 0; static int ticket_realloc(void) { const int added = 5; int had, want; void *p; had = booth_conf->ticket_allocated; want = had + added; p = realloc(booth_conf->ticket, sizeof(struct ticket_config) * want); if (!p) { log_error("can't alloc more tickets"); return -ENOMEM; } booth_conf->ticket = p; memset(booth_conf->ticket + had, 0, sizeof(struct ticket_config) * added); booth_conf->ticket_allocated = want; return 0; } static void hostname_to_ip(char * hostname) { struct addrinfo hints; struct addrinfo *result, *rp; int res; int addr_found = 0; const char *ntop_res; memset(&hints, 0, sizeof(hints)); hints.ai_family = AF_UNSPEC; hints.ai_socktype = SOCK_STREAM; res = getaddrinfo(hostname, NULL, &hints, &result); if (res != 0) { log_error("can't find IP for the host \"%s\"", hostname); return; } /* Return the first found AF_INET or AF_INET6 address */ for (rp = result; rp && !addr_found; rp = rp->ai_next) { if (rp->ai_family != AF_INET && rp->ai_family != AF_INET6) { continue ; } switch (rp->ai_family) { case AF_INET: ntop_res = inet_ntop(rp->ai_family, &((struct sockaddr_in *)(rp->ai_addr))->sin_addr, hostname, BOOTH_NAME_LEN - 1); break; case AF_INET6: ntop_res = inet_ntop(rp->ai_family, &((struct sockaddr_in6 *)(rp->ai_addr))->sin6_addr, hostname, BOOTH_NAME_LEN - 1); break; } if (ntop_res) { /* buffer overflow will not happen (IPv6 notation < 63 chars), but suppress the warnings */ hostname[BOOTH_NAME_LEN - 1] = '\0'; addr_found = 1; } } if (!addr_found) { log_error("no IP addresses found for the host \"%s\"", hostname); } freeaddrinfo(result); } static int add_site(char *addr_string, int type) { int rv; struct booth_site *site; uLong nid; uint32_t mask; int i; rv = 1; if (booth_conf->site_count == MAX_NODES) { log_error("too many nodes"); goto out; } if (strnlen(addr_string, sizeof(booth_conf->site[0].addr_string)) >= sizeof(booth_conf->site[0].addr_string)) { log_error("site address \"%s\" too long", addr_string); goto out; } site = booth_conf->site + booth_conf->site_count; site->family = AF_INET; site->type = type; /* buffer overflow will not hapen (we've already checked that addr_string will fit incl. terminating '\0' above), but suppress the warnings with copying everything but the boundary byte, which is valid as-is, since this last byte will be safely pre-zeroed from the struct booth_config initialization */ strncpy(site->addr_string, addr_string, sizeof(site->addr_string) - 1); if (!(inet_pton(AF_INET, site->addr_string, &site->sa4.sin_addr) > 0) && !(inet_pton(AF_INET6, site->addr_string, &site->sa6.sin6_addr) > 0)) { /* Not a valid address, so let us try to convert it into an IP address */ hostname_to_ip(site->addr_string); } site->index = booth_conf->site_count; site->bitmask = 1 << booth_conf->site_count; /* Catch site overflow */ assert(site->bitmask); booth_conf->all_bits |= site->bitmask; if (type == SITE) booth_conf->sites_bits |= site->bitmask; site->tcp_fd = -1; booth_conf->site_count++; rv = 0; memset(&site->sa6, 0, sizeof(site->sa6)); nid = crc32(0L, NULL, 0); /* Using the ASCII representation in site->addr_string (both sizeof() * and strlen()) gives quite a lot of collisions; a brute-force run * from 0.0.0.0 to 24.0.0.0 gives ~4% collisions, and this tends to * increase even more. * Whether there'll be a collision in real-life, with 3 or 5 nodes, is * another question ... but for now get the ID from the binary * representation - that had *no* collisions up to 32.0.0.0. * Note that POSIX mandates inet_pton to arange the address pointed * to by "dst" in network byte order, assuring little/big-endianess * mutual compatibility. */ if (inet_pton(AF_INET, site->addr_string, &site->sa4.sin_addr) > 0) { site->family = AF_INET; site->sa4.sin_family = site->family; site->sa4.sin_port = htons(booth_conf->port); site->saddrlen = sizeof(site->sa4); site->addrlen = sizeof(site->sa4.sin_addr); site->site_id = crc32(nid, (void*)&site->sa4.sin_addr, site->addrlen); } else if (inet_pton(AF_INET6, site->addr_string, &site->sa6.sin6_addr) > 0) { site->family = AF_INET6; site->sa6.sin6_family = site->family; site->sa6.sin6_flowinfo = 0; site->sa6.sin6_port = htons(booth_conf->port); site->saddrlen = sizeof(site->sa6); site->addrlen = sizeof(site->sa6.sin6_addr); site->site_id = crc32(nid, (void*)&site->sa6.sin6_addr, site->addrlen); } else { log_error("Address string \"%s\" is bad", site->addr_string); rv = EINVAL; } /* Make sure we will never collide with NO_ONE, * or be negative (to get "get_local_id() < 0" working). */ mask = 1 << (sizeof(site->site_id)*8 -1); assert(NO_ONE & mask); site->site_id &= ~mask; /* Test for collisions with other sites */ for(i=0; iindex; i++) if (booth_conf->site[i].site_id == site->site_id) { log_error("Got a site-ID collision. Please file a bug on https://github.com/ClusterLabs/booth/issues/new, attaching the configuration file."); exit(1); } out: return rv; } inline static char *skip_while_in(const char *cp, int (*fn)(int), const char *allowed) { /* strchr() returns a pointer to the terminator if *cp == 0. */ while (*cp && (fn(*cp) || strchr(allowed, *cp))) cp++; /* discard "const" qualifier */ return (char*)cp; } inline static char *skip_while(char *cp, int (*fn)(int)) { while (fn(*cp)) cp++; return cp; } inline static char *skip_until(char *cp, char expected) { while (*cp && *cp != expected) cp++; return cp; } static inline int is_end_of_line(char *cp) { char c = *cp; return c == '\n' || c == 0 || c == '#'; } static int add_ticket(const char *name, struct ticket_config **tkp, const struct ticket_config *def) { int rv; struct ticket_config *tk; if (booth_conf->ticket_count == booth_conf->ticket_allocated) { rv = ticket_realloc(); if (rv < 0) return rv; } tk = booth_conf->ticket + booth_conf->ticket_count; booth_conf->ticket_count++; if (!check_max_len_valid(name, sizeof(tk->name))) { log_error("ticket name \"%s\" too long.", name); return -EINVAL; } if (find_ticket_by_name(name, NULL)) { log_error("ticket name \"%s\" used again.", name); return -EINVAL; } if (* skip_while_in(name, isalnum, "-/")) { log_error("ticket name \"%s\" invalid; only alphanumeric names.", name); return -EINVAL; } strcpy(tk->name, name); tk->timeout = def->timeout; tk->term_duration = def->term_duration; tk->retries = def->retries; memcpy(tk->weight, def->weight, sizeof(tk->weight)); tk->mode = def->mode; if (tkp) *tkp = tk; return 0; } static int postproc_ticket(struct ticket_config *tk) { if (!tk) return 1; if (!tk->renewal_freq) { tk->renewal_freq = tk->term_duration/2; } if (tk->timeout*(tk->retries+1) >= tk->renewal_freq) { log_error("%s: total amount of time to " "retry sending packets cannot exceed " "renewal frequency " "(%d*(%d+1) >= %d)", tk->name, tk->timeout, tk->retries, tk->renewal_freq); return 0; } return 1; } /* returns number of weights, or -1 on bad input. */ static int parse_weights(const char *input, int weights[MAX_NODES]) { int i, v; char *cp; for(i=0; i= MAX_ARGS) { log_error("too many arguments for the acquire-handler"); free(tk_test.path); return -1; } tk_test.argv[i++] = p; } while (p); return 0; } struct toktab grant_type[] = { { "auto", GRANT_AUTO}, { "manual", GRANT_MANUAL}, { NULL, 0}, }; struct toktab attr_op[] = { {"eq", ATTR_OP_EQ}, {"ne", ATTR_OP_NE}, {NULL, 0}, }; static int lookup_tokval(char *key, struct toktab *tab) { struct toktab *tp; for (tp = tab; tp->str; tp++) { if (!strcmp(tp->str, key)) return tp->val; } return 0; } /* attribute prerequisite */ static int parse_attr_prereq(char *val, struct ticket_config *tk) { struct attr_prereq *ap = NULL; char *p; ap = (struct attr_prereq *)calloc(1, sizeof(struct attr_prereq)); if (!ap) { log_error("out of memory"); return -1; } p = strtok(val, " \t"); if (!p) { log_error("not enough arguments to attr-prereq"); goto err_out; } ap->grant_type = lookup_tokval(p, grant_type); if (!ap->grant_type) { log_error("%s is not a grant type", p); goto err_out; } p = strtok(NULL, " \t"); if (!p) { log_error("not enough arguments to attr-prereq"); goto err_out; } if (!(ap->attr_name = strdup(p))) { log_error("out of memory"); goto err_out; } p = strtok(NULL, " \t"); if (!p) { log_error("not enough arguments to attr-prereq"); goto err_out; } ap->op = lookup_tokval(p, attr_op); if (!ap->op) { log_error("%s is not an attribute operation", p); goto err_out; } p = strtok(NULL, " \t"); if (!p) { log_error("not enough arguments to attr-prereq"); goto err_out; } if (!(ap->attr_val = strdup(p))) { log_error("out of memory"); goto err_out; } tk->attr_prereqs = g_list_append(tk->attr_prereqs, ap); if (!tk->attr_prereqs) { log_error("out of memory"); goto err_out; } return 0; err_out: if (ap) { if (ap->attr_val) free(ap->attr_val); if (ap->attr_name) free(ap->attr_name); free(ap); } return -1; } extern int poll_timeout; +void +get_keyval(char *key, char *val, struct args *a) { + char *p; + + strncpy(a->key, key, 16); + p = skip_while(val, isspace); + *(p-1) = '\0'; + strncpy(a->val, val, 16); +} + +struct crmv1_group { + char name[16]; + char ra[128]; + struct args { + char *key[16]; + char *val[16]; + } args[16]; +}; + +#define OCF_HB_PATH "/usr/lib/ocf/resource.d/heartbeat/" +#define OCF_PATH "/usr/lib/ocf/resource.d/" + +void ln_ra(char *ra, char *s, int cnt) +{ + char *p, *q, *r; + int fd; + char ra_target_s[128]; + + p = s; + q = strchr(":", s); + if (!q) { + strcpy(ra, OCF_HB_PATH); + strncpy(ra+strlen(OCF_HB_PATH), s, 128-strlen(OCF_HB_PATH)); + r = s; + } else { + /* s -> p ':' q ':' r + * copy to ra + */ + *q = '\0'; q++; + strcpy(ra, OCF_PATH); + strncpy(ra+strlen(OCF_PATH), p, 128-strlen(OCF_PATH)); + *(q-p+1) = '/'; + strncpy(ra+strlen(OCF_PATH)+1, q, 128-strlen(OCF_PATH)-strlen(q)); + r = strchr(":", q); + *r = '\0'; r++; + strncat(ra, r, 128-strlen(OCF_PATH)); + } + if (strlen(p) >= 128) { + log_error("RA name too long: %s", s); + exit(1); + } + /* now test if there is a file containing this RA + */ + if (!(fd = open(ra))) { + log_error("RA does not exist: %s", s); + exit(1); + } + close(fd); + /* finally, create a soft link + */ + if (snprintf(ra_target_s, 128, "%02d_%s", cnt, r) >= 128) { + log_error("RA name too long: %s", s); + exit(1); + } + if (symlink(BOOTH_DEFAULT_CRMV1_CONF, ra_target_s) != 0) { + log_error("failed to symlink %s: %s", ra_target_s, + strerror(errno)); + exit(1); + } +} + +/* mimic the shell parsing + */ + +int parse_crmv1_conf(struct ticket_config *current_tk) +{ + struct crmv1_group *groups[16], *curr_group; + char line[1024], *buf; + char error_str_buf[1024]; + FILE *fp; + char *s, *key, *val; + const char *error; + char *save_ra; + int i, grp_i = 0, key_i = 0, in_key, grp_wait; + int cnt = 0, args_cnt = 0; + + curr_group = groups[0]; + fp = fopen(BOOTH_DEFAULT_CRMV1_CONF, "r"); + if (!fp) { + log_error("failed to open %s: %s", BOOTH_DEFAULT_CRMV1_CONF, + strerror(errno)); + return -1; + } + + log_debug("reading config file %s", BOOTH_DEFAULT_CRMV1_CONF); + /* make one long line */ + while (fgets(line, sizeof(line), fp)) { + s = skip_while(line, isspace); + if (is_end_of_line(s) || *s == '#') + continue; + /* is line continued? */ + if (*(s+strlen(s)-2) == '\\' && *(s+strlen(s)-1) == '\n') { + *(s+strlen(s)-2) = ' '; + } + } + buf = line; + + /* now parse the line */ + for (s = buf; ; ) { + /* a '=' b or ra */ + s = skip_while(s, isspace); + save_ra = s; + if ( *s == '=' ) { + *s = '\0'; + s++; + get_keyval(save_ra, s, curr_group->args[args_cnt++]); + } else { + if ( save_ra ) { + ln_ra(curr_group->ra, save_ra, cnt); + curr_group->args[0] = NULL; + save_ra = NULL; + cnt++; + continue; + } + } + ln_ra(curr_group->ra, s, cnt); + cnt++; + + if (strcmp(key, "group") == 0) { + grp_wait = 1; + continue; + } + + (void)snprintf(error_str_buf, sizeof(error_str_buf), + "Unknown keyword \"%s\"", key); + error = error_str_buf; + goto err; + + curr_group++; + } + fclose(fp); + + /* Default: make config name match config filename. */ + if (!booth_conf->name[0]) { + cp = strrchr(path, '/'); + cp = cp ? cp+1 : (char *)path; + cp2 = strrchr(cp, '.'); + if (!cp2) + cp2 = cp + strlen(cp); + if (cp2-cp >= BOOTH_NAME_LEN) { + log_error("token too long"); + goto out; + } + strncpy(booth_conf->name, cp, cp2-cp); + *(booth_conf->name+(cp2-cp)) = '\0'; + } + + if (!postproc_ticket(current_tk)) { + goto out; + } + + return 0; + +err: + fclose(fp); +out: + log_error("%s in config file line %d", + error, lineno); + booth_conf->crmv1 = 0; + return -1; +} + int read_config(const char *path, int type) { char line[1024]; char error_str_buf[1024]; FILE *fp; char *s, *key, *val, *end_of_key; const char *error; char *cp, *cp2; int i; int lineno = 0; int got_transport = 0; int min_timeout = 0; struct ticket_config defaults = { { 0 } }; struct ticket_config *current_tk = NULL; fp = fopen(path, "r"); if (!fp) { log_error("failed to open %s: %s", path, strerror(errno)); return -1; } booth_conf = malloc(sizeof(struct booth_config) + TICKET_ALLOC * sizeof(struct ticket_config)); if (!booth_conf) { fclose(fp); log_error("failed to alloc memory for booth config"); return -ENOMEM; } memset(booth_conf, 0, sizeof(struct booth_config) + TICKET_ALLOC * sizeof(struct ticket_config)); ticket_size = TICKET_ALLOC; booth_conf->proto = UDP; booth_conf->port = BOOTH_DEFAULT_PORT; booth_conf->maxtimeskew = BOOTH_DEFAULT_MAX_TIME_SKEW; booth_conf->authkey[0] = '\0'; /* Provide safe defaults. -1 is reserved, though. */ booth_conf->uid = -2; booth_conf->gid = -2; strcpy(booth_conf->site_user, "hacluster"); strcpy(booth_conf->site_group, "haclient"); strcpy(booth_conf->arb_user, "nobody"); strcpy(booth_conf->arb_group, "nobody"); parse_weights("", defaults.weight); defaults.clu_test.path = NULL; defaults.clu_test.pid = 0; defaults.clu_test.status = 0; defaults.clu_test.progstate = EXTPROG_IDLE; defaults.term_duration = DEFAULT_TICKET_EXPIRY; defaults.timeout = DEFAULT_TICKET_TIMEOUT; defaults.retries = DEFAULT_RETRIES; defaults.acquire_after = 0; defaults.mode = TICKET_MODE_AUTO; error = ""; log_debug("reading config file %s", path); while (fgets(line, sizeof(line), fp)) { lineno++; s = skip_while(line, isspace); if (is_end_of_line(s) || *s == '#') continue; key = s; /* Key */ end_of_key = skip_while_in(key, isalnum, "-_"); if (end_of_key == key) { error = "No key"; goto err; } if (!*end_of_key) goto exp_equal; /* whitespace, and something else but nothing more? */ s = skip_while(end_of_key, isspace); if (*s != '=') { exp_equal: error = "Expected '=' after key"; goto err; } s++; /* It's my buffer, and I terminate if I want to. */ /* But not earlier than that, because we had to check for = */ *end_of_key = 0; /* Value tokenizing */ s = skip_while(s, isspace); switch (*s) { case '"': case '\'': val = s+1; s = skip_until(val, *s); /* Terminate value */ if (!*s) { error = "Unterminated quoted string"; goto err; } /* Remove and skip quote */ *s = 0; s++; if (*(s = skip_while(s, isspace)) && *s != '#') { error = "Surplus data after value"; goto err; } *s = 0; break; case 0: no_value: error = "No value"; goto err; break; default: val = s; /* Rest of line. */ i = strlen(s); /* i > 0 because of "case 0" above. */ while (i > 0 && isspace(s[i-1])) i--; s += i; *s = 0; } if (val == s) goto no_value; if (strlen(key) > BOOTH_NAME_LEN || strlen(val) > BOOTH_NAME_LEN) { error = "key/value too long"; goto err; } if (strcmp(key, "transport") == 0) { if (got_transport) { error = "config file has multiple transport lines"; goto err; } if (strcasecmp(val, "UDP") == 0) booth_conf->proto = UDP; else if (strcasecmp(val, "SCTP") == 0) booth_conf->proto = SCTP; else { (void)snprintf(error_str_buf, sizeof(error_str_buf), "invalid transport protocol \"%s\"", val); error = error_str_buf; goto err; } got_transport = 1; continue; } if (strcmp(key, "port") == 0) { booth_conf->port = atoi(val); continue; } if (strcmp(key, "name") == 0) { safe_copy(booth_conf->name, val, BOOTH_NAME_LEN, "name"); continue; } #if HAVE_LIBGCRYPT || HAVE_LIBMHASH if (strcmp(key, "authfile") == 0) { safe_copy(booth_conf->authfile, val, BOOTH_PATH_LEN, "authfile"); continue; } if (strcmp(key, "maxtimeskew") == 0) { booth_conf->maxtimeskew = atoi(val); continue; } #endif if (strcmp(key, "site") == 0) { if (add_site(val, SITE)) goto err; continue; } if (strcmp(key, "arbitrator") == 0) { if (add_site(val, ARBITRATOR)) goto err; continue; } if (strcmp(key, "site-user") == 0) { safe_copy(booth_conf->site_user, optarg, BOOTH_NAME_LEN, "site-user"); continue; } if (strcmp(key, "site-group") == 0) { safe_copy(booth_conf->site_group, optarg, BOOTH_NAME_LEN, "site-group"); continue; } if (strcmp(key, "arbitrator-user") == 0) { safe_copy(booth_conf->arb_user, optarg, BOOTH_NAME_LEN, "arbitrator-user"); continue; } if (strcmp(key, "arbitrator-group") == 0) { safe_copy(booth_conf->arb_group, optarg, BOOTH_NAME_LEN, "arbitrator-group"); continue; } if (strcmp(key, "debug") == 0) { if (type != CLIENT && type != GEOSTORE) debug_level = max(debug_level, atoi(val)); continue; } if (strcmp(key, "ticket") == 0) { if (current_tk && strcmp(current_tk->name, "__defaults__")) { if (!postproc_ticket(current_tk)) { goto err; } } if (!strcmp(val, "__defaults__")) { current_tk = &defaults; } else if (add_ticket(val, ¤t_tk, &defaults)) { goto err; } continue; } + if (strcmp(key, "crmv1") == 0) { + if ( !parse_crmv1_conf() ) + booth_conf->crmv1 = 1; + continue; + } + /* current_tk must be allocated at this point, otherwise * we don't know to which ticket the key refers */ if (!current_tk) { (void)snprintf(error_str_buf, sizeof(error_str_buf), "Unexpected keyword \"%s\"", key); error = error_str_buf; goto err; } if (strcmp(key, "expire") == 0) { current_tk->term_duration = read_time(val); if (current_tk->term_duration <= 0) { error = "Expected time >0 for expire"; goto err; } continue; } if (strcmp(key, "timeout") == 0) { current_tk->timeout = read_time(val); if (current_tk->timeout <= 0) { error = "Expected time >0 for timeout"; goto err; } if (!min_timeout) { min_timeout = current_tk->timeout; } else { min_timeout = min(min_timeout, current_tk->timeout); } continue; } if (strcmp(key, "retries") == 0) { current_tk->retries = strtol(val, &s, 0); if (*s || s == val || current_tk->retries<3 || current_tk->retries > 100) { error = "Expected plain integer value in the range [3, 100] for retries"; goto err; } continue; } if (strcmp(key, "renewal-freq") == 0) { current_tk->renewal_freq = read_time(val); if (current_tk->renewal_freq <= 0) { error = "Expected time >0 for renewal-freq"; goto err; } continue; } if (strcmp(key, "acquire-after") == 0) { current_tk->acquire_after = read_time(val); if (current_tk->acquire_after < 0) { error = "Expected time >=0 for acquire-after"; goto err; } continue; } if (strcmp(key, "before-acquire-handler") == 0) { if (parse_extprog(val, current_tk)) { goto err; } continue; } if (strcmp(key, "attr-prereq") == 0) { if (parse_attr_prereq(val, current_tk)) { goto err; } continue; } if (strcmp(key, "mode") == 0) { current_tk->mode = retrieve_ticket_mode(val); continue; } if (strcmp(key, "weights") == 0) { if (parse_weights(val, current_tk->weight) < 0) goto err; continue; } (void)snprintf(error_str_buf, sizeof(error_str_buf), "Unknown keyword \"%s\"", key); error = error_str_buf; goto err; } fclose(fp); if ((booth_conf->site_count % 2) == 0) { log_warn("Odd number of nodes is strongly recommended!"); } /* Default: make config name match config filename. */ if (!booth_conf->name[0]) { cp = strrchr(path, '/'); cp = cp ? cp+1 : (char *)path; cp2 = strrchr(cp, '.'); if (!cp2) cp2 = cp + strlen(cp); if (cp2-cp >= BOOTH_NAME_LEN) { log_error("booth config file name too long"); goto out; } strncpy(booth_conf->name, cp, cp2-cp); *(booth_conf->name+(cp2-cp)) = '\0'; } if (!postproc_ticket(current_tk)) { goto out; } poll_timeout = min(POLL_TIMEOUT, min_timeout/10); if (!poll_timeout) poll_timeout = POLL_TIMEOUT; return 0; err: fclose(fp); out: log_error("%s in config file line %d", error, lineno); free(booth_conf); booth_conf = NULL; return -1; } int check_config(int type) { struct passwd *pw; struct group *gr; char *cp, *input; if (!booth_conf) return -1; input = (type == ARBITRATOR) ? booth_conf->arb_user : booth_conf->site_user; if (!*input) goto u_inval; if (isdigit(input[0])) { booth_conf->uid = strtol(input, &cp, 0); if (*cp != 0) { u_inval: log_error("User \"%s\" cannot be resolved into a UID.", input); return ENOENT; } } else { pw = getpwnam(input); if (!pw) goto u_inval; booth_conf->uid = pw->pw_uid; } input = (type == ARBITRATOR) ? booth_conf->arb_group : booth_conf->site_group; if (!*input) goto g_inval; if (isdigit(input[0])) { booth_conf->gid = strtol(input, &cp, 0); if (*cp != 0) { g_inval: log_error("Group \"%s\" cannot be resolved into a UID.", input); return ENOENT; } } else { gr = getgrnam(input); if (!gr) goto g_inval; booth_conf->gid = gr->gr_gid; } return 0; } static int get_other_site(struct booth_site **node) { struct booth_site *n; int i; *node = NULL; if (!booth_conf) return 0; for (i = 0; i < booth_conf->site_count; i++) { n = booth_conf->site + i; if (n != local && n->type == SITE) { if (!*node) { *node = n; } else { return 0; } } } return !*node ? 0 : 1; } int find_site_by_name(char *site, struct booth_site **node, int any_type) { struct booth_site *n; int i; if (!booth_conf) return 0; if (!strcmp(site, OTHER_SITE)) return get_other_site(node); for (i = 0; i < booth_conf->site_count; i++) { n = booth_conf->site + i; if ((n->type == SITE || any_type) && strncmp(n->addr_string, site, sizeof(n->addr_string)) == 0) { *node = n; return 1; } } return 0; } int find_site_by_id(uint32_t site_id, struct booth_site **node) { struct booth_site *n; int i; if (site_id == NO_ONE) { *node = no_leader; return 1; } if (!booth_conf) return 0; for (i = 0; i < booth_conf->site_count; i++) { n = booth_conf->site + i; if (n->site_id == site_id) { *node = n; return 1; } } return 0; } const char *type_to_string(int type) { switch (type) { case ARBITRATOR: return "arbitrator"; case SITE: return "site"; case CLIENT: return "client"; case GEOSTORE: return "attr"; } return "??invalid-type??"; } diff --git a/src/config.h b/src/config.h index bca73bc..834aa4e 100644 --- a/src/config.h +++ b/src/config.h @@ -1,340 +1,342 @@ /* * Copyright (C) 2011 Jiaju Zhang * Copyright (C) 2013-2014 Philipp Marek * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ #ifndef _CONFIG_H #define _CONFIG_H #include #include +#include #include "booth.h" #include "timer.h" #include "raft.h" #include "transport.h" /** @{ */ /** Definitions for in-RAM data. */ #define MAX_NODES 16 #define MAX_ARGS 16 #define TICKET_ALLOC 16 #define OTHER_SITE "other" typedef enum { EXTPROG_IDLE, EXTPROG_RUNNING, EXTPROG_EXITED, EXTPROG_IGNORE, } extprog_state_e; #define tk_test tk->clu_test typedef enum { ATTR_OP_EQ = 1, ATTR_OP_NE, } attr_op_e; typedef enum { GRANT_AUTO = 1, GRANT_MANUAL, } grant_type_e; typedef enum { TICKET_MODE_AUTO = 1, TICKET_MODE_MANUAL, } ticket_mode_e; struct toktab { const char *str; int val; }; struct attr_prereq { grant_type_e grant_type; /* grant type */ attr_op_e op; /* attribute operation */ char *attr_name; char *attr_val; }; struct ticket_config { /** \name Configuration items. * @{ */ /** Name of ticket. */ boothc_ticket name; /** How long a term lasts if not refreshed (in ms) */ int term_duration; /** Network related timeouts (in ms) */ int timeout; /** Retries before giving up. */ int retries; /** If >0, time to wait for a site to get fenced. * The ticket may be acquired after that timespan by * another site. */ int acquire_after; /* How often to renew the ticket (in ms) */ int renewal_freq; /* Program to ask whether it makes sense to * acquire the ticket */ struct clu_test { char *path; int is_dir; char *argv[MAX_ARGS]; pid_t pid; int status; /* child exit status */ extprog_state_e progstate; /* program running/idle/waited on */ } clu_test; /** Node weights. */ int weight[MAX_NODES]; /* Mode operation of the ticket. * Set to MANUAL to make sure that the ticket will be manipulated * only by manual commands of the administrator. In such a case * automatic elections will be disabled. * Manual tickets do not have to be renewed every some time. * The leader will continue to send heartbeat messages to other sites. */ ticket_mode_e mode; /** @} */ /** \name Runtime values. * @{ */ /** Current state. */ server_state_e state; /** Next state. Used at startup. */ server_state_e next_state; /** When something has to be done */ timetype next_cron; /** Current leader. This is effectively the log[] in Raft. */ struct booth_site *leader; /** Leader that got lost. */ struct booth_site *lost_leader; /** Is the ticket granted? */ int is_granted; /** Which site considered itself a leader. * For manual tickets it is possible, that * more than one site will act as a leader. * This array is used for tracking that situation * and notifying the user about the issue. * * Possible values for every site: * 0: the site does not claim to be the leader * 1: the site considers itself a leader and * is sending or used to send heartbeat messages * * The site will be marked as '1' until this site * receives revoke confirmation. * * If more than one site has '1', the geo cluster is * considered to have multiple leadership and proper * warning are generated. */ int sites_where_granted[MAX_NODES]; /** Timestamp of leadership expiration */ timetype term_expires; /** End of election period */ timetype election_end; struct booth_site *voted_for; /** Who the various sites vote for. * NO_OWNER = no vote yet. */ struct booth_site *votes_for[MAX_NODES]; /* bitmap */ uint64_t votes_received; /** Last voting round that was seen. */ uint32_t current_term; /** Do ticket updates whenever we get enough heartbeats. * But do that only once. * This is reset to 0 whenever we broadcast heartbeat and set * to 1 once enough acks are received. * Increased to 2 when the ticket is commited to the CIB (see * delay_commit). */ uint32_t ticket_updated; /** Outcome of whatever ticket request was processed. * Can also be an intermediate stage. */ uint32_t outcome; /** @} */ /** */ uint32_t last_applied; uint32_t next_index[MAX_NODES]; uint32_t match_index[MAX_NODES]; /* Why did we start the elections? */ cmd_reason_t election_reason; /* if it is potentially dangerous to grant the ticket * immediately, then this is set to some point in time, * usually (now + term_duration + acquire_after) */ timetype delay_commit; /* the last request RPC we sent */ uint32_t last_request; /* if we expect some acks, then set this to the id of * the RPC which others will send us; it is cleared once all * replies were received */ uint32_t acks_expected; /* bitmask of servers which sent acks */ uint64_t acks_received; /* timestamp of the request */ timetype req_sent_at; /* we need to wait for MY_INDEX from other servers, * hold the ticket processing for a while until they reply */ int start_postpone; /** Last renewal time */ timetype last_renewal; /* Do we need to update the copy in the CIB? * Normally, the ticket is written only when it changes via * the UPDATE RPC (for followers) and on expiration update * (for leaders) */ int update_cib; /* Is this ticket in election? */ int in_election; /* don't log warnings unnecessarily */ int expect_more_rejects; /** \name Needed while proposals are being done. * @{ */ /* Need to keep the previous valid ticket in case we moved to * start new elections and another server asks for the ticket * status. It would be wrong to send our candidate ticket. */ struct ticket_config *last_valid_tk; /** Attributes, user defined */ GHashTable *attr; /** Attribute prerequisites */ GList *attr_prereqs; /** Whom to vote for the next time. * Needed to push a ticket to someone else. */ #if 0 /** Bitmap of sites that acknowledge that state. */ uint64_t proposal_acknowledges; /** When an incompletely acknowledged proposal gets done. * If all peers agree, that happens sooner. * See switch_state_to(). */ struct timeval proposal_switch; /** Timestamp of proposal expiration. */ time_t proposal_expires; #endif /** Number of send retries left. * Used on the new owner. * Starts at 0, counts up. */ int retry_number; /** @} */ }; struct booth_config { char name[BOOTH_NAME_LEN]; /** File containing the authentication file. */ char authfile[BOOTH_PATH_LEN]; struct stat authstat; char authkey[BOOTH_MAX_KEY_LEN]; int authkey_len; /** Maximum time skew between peers allowed */ int maxtimeskew; transport_layer_t proto; uint16_t port; /** Stores the OR of sites bitmasks. */ uint64_t sites_bits; /** Stores the OR of all members' bitmasks. */ uint64_t all_bits; char site_user[BOOTH_NAME_LEN]; char site_group[BOOTH_NAME_LEN]; char arb_user[BOOTH_NAME_LEN]; char arb_group[BOOTH_NAME_LEN]; uid_t uid; gid_t gid; int site_count; struct booth_site site[MAX_NODES]; int ticket_count; int ticket_allocated; struct ticket_config *ticket; + int crmv1; }; extern struct booth_config *booth_conf; #define is_auth_req() (booth_conf->authkey[0] != '\0') int read_config(const char *path, int type); int check_config(int type); int find_site_by_name(char *site, struct booth_site **node, int any_type); int find_site_by_id(uint32_t site_id, struct booth_site **node); const char *type_to_string(int type); #endif /* _CONFIG_H */ diff --git a/src/handler.c b/src/handler.c index a12857e..2f6afbc 100644 --- a/src/handler.c +++ b/src/handler.c @@ -1,283 +1,296 @@ /* * Copyright (C) 2014 Philipp Marek * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ #include #include #include #include #include #include #include #include #include #include #include #include "ticket.h" #include "config.h" #include "inline-fn.h" #include "log.h" #include "pacemaker.h" #include "booth.h" #include "handler.h" static int set_booth_env(struct ticket_config *tk) { int rv; char expires[16]; sprintf(expires, "%" PRId64, (int64_t)wall_ts(&tk->term_expires)); rv = setenv("BOOTH_TICKET", tk->name, 1) || setenv("BOOTH_LOCAL", local->addr_string, 1) || setenv("BOOTH_CONF_NAME", booth_conf->name, 1) || setenv("BOOTH_CONF_PATH", cl.configfile, 1) || setenv("BOOTH_TICKET_EXPIRES", expires, 1); if (rv) { log_error("Cannot set environment: %s", strerror(errno)); } return rv; } static void closefiles(void) { int fd; /* close all descriptors except stdin/out/err */ for (fd = getdtablesize() - 1; fd > STDERR_FILENO; fd--) { close(fd); } } +static void +wait4proc(struct ticket_config *tk, char *prog) { + int rv, status; + + while (waitpid(curr_pid, &status, 0) != curr_pid) + ; + curr_pid = 0; + if (!ignore_status) { + rv = test_exit_status(tk, prog, status, 1); + if (rv) + _exit(rv); + } else { + /* + * To make ignore_rest function signal safe log_info + * must be removed from signal function. Information + * about signal delivery is important so put it here. + */ + log_info("external programs handler caught TERM, ignoring " + "status of external test programs"); + } + static void run_ext_prog(struct ticket_config *tk, char *prog) { - if (set_booth_env(tk)) { + int status, rv; + + switch(curr_pid=fork()) { + case -1: + log_error("fork: %s", strerror(errno)); _exit(1); + case 0: /* child */ + if (set_booth_env(tk)) { + _exit(1); + } + closefiles(); /* don't leak open files */ + tk_log_debug("running handler %s", prog); + execv(prog, tk_test.argv); + tk_log_error("%s: execv failed (%s)", prog, strerror(errno)); + _exit(1); + default: /* parent */ + wait4proc(struct ticket_config *tk, char *prog); } - closefiles(); /* don't leak open files */ - tk_log_debug("running handler %s", prog); - execv(prog, tk_test.argv); - tk_log_error("%s: execv failed (%s)", prog, strerror(errno)); - _exit(1); } static int prog_filter(const struct dirent *dp) { return (*dp->d_name != '.'); } static pid_t curr_pid; static int ignore_status; static int test_exit_status(struct ticket_config *tk, char *prog, int status, int log_msg) { int rv = -1; if (WIFEXITED(status)) { rv = WEXITSTATUS(status); } else if (WIFSIGNALED(status)) { rv = 128 + WTERMSIG(status); } if (rv) { if (log_msg) { tk_log_warn("handler \"%s\" failed: %s", prog, interpret_rv(status)); tk_log_warn("we are not allowed to acquire ticket"); } } else { tk_log_debug("handler \"%s\" exited with success", prog); } return rv; } static void reset_test_state(struct ticket_config *tk) { tk_test.pid = 0; set_progstate(tk, EXTPROG_IDLE); } int tk_test_exit_status(struct ticket_config *tk) { int rv; rv = test_exit_status(tk, tk_test.path, tk_test.status, !tk_test.is_dir); reset_test_state(tk); return rv; } void wait_child(int sig) { int i, status; struct ticket_config *tk; /* use waitpid(2) and not wait(2) in order not to interfear * with popen(2)/pclose(2) and system(2) used in pacemaker.c */ foreach_ticket(i, tk) { if (tk_test.path && tk_test.pid > 0 && (tk_test.progstate == EXTPROG_RUNNING || tk_test.progstate == EXTPROG_IGNORE) && waitpid(tk_test.pid, &status, WNOHANG) == tk_test.pid) { if (tk_test.progstate == EXTPROG_IGNORE) { /* not interested in the outcome */ reset_test_state(tk); } else { tk_test.status = status; set_progstate(tk, EXTPROG_EXITED); } } } } /* the parent may want to have us stop processing scripts, say * when the ticket gets revoked */ static void ignore_rest(int sig) { signal(SIGTERM, SIG_IGN); ignore_status = 1; if (curr_pid > 0) { (void)kill(curr_pid, SIGTERM); } } void ext_prog_timeout(struct ticket_config *tk) { tk_log_warn("handler timed out"); } int is_ext_prog_running(struct ticket_config *tk) { if (!tk_test.path) return 0; return (tk_test.pid > 0 && tk_test.progstate == EXTPROG_RUNNING); } void ignore_ext_test(struct ticket_config *tk) { if (is_ext_prog_running(tk)) { (void)kill(tk_test.pid, SIGTERM); set_progstate(tk, EXTPROG_IGNORE); } else if (tk_test.progstate == EXTPROG_EXITED) { /* external prog exited, but the status not yet examined; * we're not interested in checking the status anymore */ reset_test_state(tk); } } static void process_ext_dir(struct ticket_config *tk) { char prog[FILENAME_MAX+1]; int rv, n_progs, i, status; struct dirent **proglist, *dp; signal(SIGTERM, (__sighandler_t)ignore_rest); signal(SIGCHLD, SIG_DFL); signal(SIGUSR1, SIG_DFL); signal(SIGINT, SIG_DFL); tk_log_debug("running programs in directory %s", tk_test.path); n_progs = scandir(tk_test.path, &proglist, prog_filter, alphasort); if (n_progs == -1) { tk_log_error("%s: scandir failed (%s)", tk_test.path, strerror(errno)); _exit(1); } for (i = 0; i < n_progs; i++) { if (ignore_status) break; dp = proglist[i]; if (strlen(dp->d_name) + strlen(tk_test.path) + 1 > FILENAME_MAX) { tk_log_error("%s: name exceeds max length (%s)", tk_test.path, dp->d_name); _exit(1); } strcpy(prog, tk_test.path); strcat(prog, "/"); strcat(prog, dp->d_name); - switch(curr_pid=fork()) { - case -1: - log_error("fork: %s", strerror(errno)); - _exit(1); - case 0: /* child */ - run_ext_prog(tk, prog); - break; /* run_ext_prog effectively noreturn */ - default: /* parent */ - while (waitpid(curr_pid, &status, 0) != curr_pid) - ; - curr_pid = 0; - if (!ignore_status) { - rv = test_exit_status(tk, prog, status, 1); - if (rv) - _exit(rv); - } else { - /* - * To make ignore_rest function signal safe log_info - * must be removed from signal function. Information - * about signal delivery is important so put it here. - */ - log_info("external programs handler caught TERM, ignoring " - "status of external test programs"); - } + run_ext_prog(tk, prog); + if (booth_conf->crmv1) { + wait4proc(struct ticket_config *tk, char *prog); } } _exit(0); } /* run some external program * return codes: * RUNCMD_ERR: executing program failed (or some other failure) * RUNCMD_MORE: program forked, results later */ int run_handler(struct ticket_config *tk) { int rv = 0; pid_t pid; struct stat stbuf; if (!tk_test.path) return 0; if (stat(tk_test.path, &stbuf)) { tk_log_error("%s: stat failed (%s)", tk_test.path, strerror(errno)); return RUNCMD_ERR; } tk_test.is_dir = (stbuf.st_mode & S_IFDIR); switch(pid=fork()) { case -1: log_error("fork: %s", strerror(errno)); return RUNCMD_ERR; case 0: /* child */ if (tk_test.is_dir) { process_ext_dir(tk); } else { run_ext_prog(tk, tk_test.path); } default: /* parent */ tk_test.pid = pid; set_progstate(tk, EXTPROG_RUNNING); rv = RUNCMD_MORE; /* program runs */ + if (booth_conf->crmv1) { + wait4proc(struct ticket_config *tk, char *prog); + } } return rv; } diff --git a/test/live_test.sh b/test/live_test.sh index bd60964..485a32d 100755 --- a/test/live_test.sh +++ b/test/live_test.sh @@ -1,1353 +1,1392 @@ #!/bin/sh # # see README-testing for more information # do some basic booth operation tests for the given config # PROG=`basename $0` usage() { cat<[:]] $PROG [ ...] EOF if [ $1 -eq 0 ]; then list_all examples fi exit } list_all() { echo "Tests:" grep "^test_.*{$" $0 | sed 's/test_//;s/(.*//;s/^/ /' echo echo "Netem functions:" grep "^NETEM_ENV_.*{$" $0 | sed 's/NETEM_ENV_//;s/(.*//;s/^/ /' } examples() { cat< /dev/null } stop_site() { manage_site $1 stop } stop_arbitrator() { manage_arbitrator $1 stop } restart_site() { manage_site $1 restart } cleanup_site() { manage_site $1 cleanup } reload_site() { runcmd $1 OCF_ROOT=/usr/lib/ocf /usr/lib/ocf/resource.d/pacemaker/booth-site reload } restart_arbitrator() { manage_arbitrator $1 restart } booth_status() { test "`runcmd $1 booth status | get_stat_fld booth_state`" = "started" } cleanup_booth() { local h procs for h in $sites; do cleanup_site $h & procs="$! $procs" done >/dev/null 2>&1 wait $procs wait_timeout } cleanup_dep_rsc() { local dep_rsc=`get_rsc` test -z "$dep_rsc" && return local h procs for h in $sites; do runcmd $h crm -w resource cleanup $dep_rsc & procs="$! $procs" done >/dev/null 2>&1 wait $procs } check_dep_rsc() { local dep_rsc=`get_rsc` test -z "$dep_rsc" && return 0 local h for h in $sites; do runcmd $h BOOTH_TICKET=$tkt /usr/share/booth/service-runnable $dep_rsc || return 1 done return 0 } stop_booth() { local h rc for h in $sites; do stop_site $h rc=$((rc|$?)) done >/dev/null 2>&1 for h in $arbitrators; do stop_arbitrator $h rc=$((rc|$?)) done >/dev/null 2>&1 wait_timeout return $rc } start_booth() { local h rc for h in $sites; do start_site $h rc=$((rc|$?)) done >/dev/null 2>&1 for h in $arbitrators; do start_arbitrator $h rc=$((rc|$?)) done >/dev/null 2>&1 wait_timeout return $rc } restart_booth() { local h procs for h in $sites; do restart_site $h & procs="$! $procs" done >/dev/null 2>&1 for h in $arbitrators; do restart_arbitrator $h done >/dev/null 2>&1 wait $procs wait_timeout } reboot_test() { cleanup_booth restart_booth cleanup_dep_rsc } is_we_server() { local h for h in $sites $arbitrators; do ip a l | fgrep -wq $h && return done return 1 } is_pacemaker_running() { local h for h in $sites; do runcmd $h crmadmin -D >/dev/null || return 1 done return 0 } sync_conf() { local h rc=0 local tmpf for h in $sites $arbitrators; do rsync -q -e "ssh $SSH_OPTS" $1 root@$h:$run_cnf rc=$((rc|$?)) if [ -n "$authfile" ]; then tmpf=`mktemp` scp -q $(get_site 1):$authfile $tmpf && rsync -q -e "ssh $SSH_OPTS" $tmpf root@$h:$authfile rc=$((rc|$?)) rm -f $tmpf fi done return $rc } dump_conf() { echo "test configuration file $cnf:" grep -v '^#' $cnf | grep -v '^[[:space:]]*$' | sed "s/^/$cnf: /" } forall() { local h rc=0 for h in $sites $arbitrators; do runcmd $h "$@" rc=$((rc|$?)) done return $rc } forall_withname() { local h rc=0 output for h in $sites $arbitrators; do output=`runcmd $h "$@"` rc=$((rc|$?)) echo $h: $output done return $rc } forall_sites() { local h rc=0 for h in $sites; do runcmd $h "$@" rc=$((rc|$?)) done return $rc } forall_fun() { local h rc=0 f=$1 for h in $sites $arbitrators; do $f $h rc=$((rc|$?)) [ $rc -ne 0 ] && break done return $rc } # run on all hosts whatever function produced on stdout forall_fun2() { local h rc=0 f f=$1 shift 1 for h in $sites $arbitrators; do $f "$@" | ssh $SSH_OPTS $h rc=$((rc|$?)) [ $rc -ne 0 ] && break done return $rc } run_site() { local n=$1 h shift 1 h=`echo $sites | awk '{print $'$n'}'` runcmd $h "$@" } run_arbitrator() { local n=$1 h shift 1 h=`echo $arbitrators | awk '{print $'$n'}'` runcmd $h "$@" } # need to get logs from _all_ clusters' nodes get_all_nodes() { for h in $sites; do runcmd $h crm_node -l | awk '{print $2}' done } extract_value() { sed 's/ *#.*//;s/.*=//;s/"//g;s/^ *//;s/ *$//' } get_extern_ip() { grep "^$1" | awk ' { if(/# *external[_-]ip=/) print $NF; else print; } ' | extract_value } get_value() { grep "^$1" | extract_value } # get internal IP for the external address internal_ip() { fgrep "$1" $cnf | extract_value } get_rsc() { awk ' n && /^[[:space:]]*before-acquire-handler/ {print $NF; exit} n && (/^$/ || /^ticket.*/) {exit} /^ticket.*'$tkt'/ {n=1} ' $cnf } get_attr() { awk ' n && /^[[:space:]]*attr-prereq = auto .* eq / {print $4,$6; exit} n && (/^$/ || /^ticket.*/) {exit} /^ticket.*'$tkt'/ {n=1} ' $cnf } get_mode() { awk ' n && /^[[:space:]]*mode/ {print $NF; exit} n && (/^$/ || /^ticket.*/) {exit} /^ticket.*'$tkt'/ {n=1} ' $cnf } set_site_attr() { local site site=$1 set -- `get_attr` run_site $site geostore set $1 $2 } del_site_attr() { local site site=$1 set -- `get_attr` run_site $site geostore delete $1 } break_external_prog() { run_site $1 crm configure "location $PREFNAME `get_rsc` rule -inf: defined \#uname" } show_pref() { run_site $1 crm configure show $PREFNAME > /dev/null } repair_external_prog() { run_site $1 crm configure delete __pref_booth_live_test } get_tkt() { grep "^ticket=" | head -1 | sed 's/ticket=//;s/"//g' } get_tkt_settings() { awk ' n && /^[[:space:]]*(expire|timeout|renewal-freq)/ { sub(" = ", "=", $0); gsub("-", "_", $0); sub("^[[:space:]]*", "T_", $0); if ($0 ~ /ms$/) { sub("ms$", "", $0); eq = match($0, "="); print substr($0, 1, eq)""substr($0, eq+1)/1000; } else { print; } next } n && (/^$/ || /^ticket.*/) {exit} /^ticket.*'$tkt'/ {n=1} ' $1 } wait_exp() { # shellcheck disable=SC2154 # (T_expire: defined with get_tkt_settings) sleep $T_expire } wait_renewal() { sleep $T_renewal_freq } wait_timeout() { sleep $MIN_TIMEOUT } set_netem_env() { local modfun args modfun=`echo $1 | sed 's/:.*//'` args=`echo $1 | sed 's/[^:]*//;s/:/ /g'` if ! is_function NETEM_ENV_$modfun; then echo "NETEM_ENV_$modfun: doesn't exist" exit 1 fi NETEM_ENV_$modfun $args } reset_netem_env() { [ -z "$NETEM_ENV" ] && return [ -n "$__NETEM_RESET" ] && return __NETEM_RESET=1 forall $ABSPATH $run_cnf __netem__ netem_reset } setup_netem() { [ -z "$NETEM_ENV" ] && return __NETEM_RESET= echo "-------------------------------------------------- (netem)" | logmsg for env in $NETEM_ENV; do set_netem_env $env done trap "reset_netem_env" EXIT } cib_status() { local h=$1 stat stat=`runcmd $h crm_ticket -L | grep "^$tkt" | awk '{print $2}'` test "$stat" != "-1" } is_cib_granted() { local stat h=$1 stat=`runcmd $h crm_ticket -L | grep "^$tkt" | awk '{print $2}'` [ "$stat" = "granted" ] } check_cib_consistency() { local h gh="" rc=0 for h in $sites; do if is_cib_granted $h; then [ -n "$gh" ] && rc=1 # granted twice gh="$gh `internal_ip $h`" fi done [ -z "$gh" ] && gh="none" if [ $rc -eq 0 ]; then echo $gh return $rc fi cat<= 0 ? x : -x; } } ' | sort -n | tail -1 } booth_leader_consistency() { test `booth_list_fld 2 | sort -u | wc -l` -eq 1 } # are there two leaders or is it just that some booths are outdated booth_leader_consistency_2() { test `booth_list_fld 2 | sort -u | grep -iv none | wc -l` -le 1 } # do all booths have the same info? # possible differences: # a) more than one leader # b) some booths not uptodate (have no leader for the ticket) # c) ticket expiry times differ check_booth_consistency() { local tlist rc rc_lead maxdiff tlist=`forall_withname booth list 2>/dev/null | grep $tkt` # Check time consistency ticket_times=$(echo "$tlist" | booth_list_fld 3) if [[ $ticket_times == *"INF"* ]]; then rc=0 else maxdiff=`echo "$tlist" | max_booth_time_diff` test "$maxdiff" -eq 0 rc=$? fi # Check leader consistency echo "$tlist" | booth_leader_consistency rc_lead=$? if [ $rc_lead -ne 0 ]; then echo "$tlist" | booth_leader_consistency_2 rc_lead=$(($rc_lead + $?)) # rc_lead=2 if the prev test failed fi rc=$(($rc | $rc_lead<<1)) test $rc -eq 0 && return cat</dev/null wait_timeout } run_report() { local start_ts=$1 end_ts=$2 name=$3 local hb_report_opts="" local quick_opt="" logmsg "running hb_report" hb_report -Q 2>&1 | grep -sq "illegal.option" || quick_opt="-Q" if [ `id -u` != 0 ]; then hb_report_opts="-u root" fi hb_report $hb_report_opts $quick_opt -f "`date -d @$((start_ts-5))`" \ -t "`date -d @$((end_ts+60))`" \ -n "$all_nodes $arbitrators" $name 2>&1 | logmsg } runtest() { local start_ts end_ts local rc booth_status dep_rsc_status local usrmsg rc=0 TEST=$1 start_ts=`date` # to have the expanded form in the logfile start_ts=`date +%s` echo -n "Testing: $1 (ticket: $tkt)... " can_run_test $1 || return 0 echo "==================================================" | logmsg echo "starting booth test $1 ..." | logmsg if is_function setup_$1; then echo "-------------------------------------------------- (setup)" | logmsg setup_$1 rc=$? [ "$rc" -ne 0 ] && rc=$ERR_SETUP_FAILED fi if [ "$rc" -eq 0 ]; then setup_netem echo "-------------------------------------------------- (test)" | logmsg test_$1 rc=$? fi case $rc in 0) # wait a bit more if we're losing packets [ -n "$PKT_LOSS" ] && wait_timeout echo "-------------------------------------------------- (check)" | logmsg check_$1 rc=$? if [ $rc -eq 0 ]; then usrmsg="SUCCESS" else usrmsg="check FAIL: $rc" fi ;; $ERR_SETUP_FAILED) usrmsg="setup FAIL" ;; *) usrmsg="test FAIL: $rc" ;; esac end_ts=`date` # to have the expanded form in the logfile end_ts=`date +%s` echo "finished booth test $1 ($tkt): $usrmsg" | logmsg echo "==================================================" | logmsg is_function recover_$1 && recover_$1 reset_netem_env #sleep 3 all_booth_status booth_status=$? check_dep_rsc dep_rsc_status=$? if [ $((rc|booth_status|dep_rsc_status)) -eq 0 ]; then echo OK [ "$GET_REPORT" ] && run_report $start_ts $end_ts $TEST else echo "$usrmsg (running hb_report ... $1.tar.bz2; see also $logf)" [ $booth_status -ne 0 ] && echo "unexpected: some booth daemons not running" [ $dep_rsc_status -ne 0 ] && echo "unexpected: dependent resource failure" run_report $start_ts $end_ts $TEST reboot_test master_rc=1 fi revoke_ticket } # # the tests # # most tests start by granting ticket grant_ticket() { run_site $1 booth grant -w $tkt >/dev/null } grant_ticket_cib() { run_site $1 booth grant -C $tkt >/dev/null } ## TEST: grant ## # just a grant test_grant() { grant_ticket 1 } check_grant() { check_consistency `get_internal_site 1` } ## TEST: longgrant ## # just a grant followed by three expire times setup_longgrant() { grant_ticket 1 } test_longgrant() { wait_exp wait_exp wait_exp } check_longgrant() { check_consistency `get_internal_site 1` } ## TEST: longgrant2 ## # just a grant followed by 10 expire times setup_longgrant2() { grant_ticket_cib 1 } test_longgrant2() { local i # shellcheck disable=SC2034 # (variable exists merely out of necessity) for i in `seq 10`; do wait_exp done } check_longgrant2() { check_consistency `get_internal_site 1` } ## TEST: grant_noarb ## # just a grant with no arbitrators setup_grant_noarb() { local h for h in $arbitrators; do stop_arbitrator $h || return 1 done >/dev/null 2>&1 #sleep 1 } test_grant_noarb() { grant_ticket 1 } check_grant_noarb() { check_consistency `get_internal_site 1` } recover_grant_noarb() { local h for h in $arbitrators; do start_arbitrator $h done >/dev/null 2>&1 } applicable_grant_noarb() { [ -n "$arbitrators" ] } ## TEST: revoke ## # just a revoke setup_revoke() { grant_ticket 1 } test_revoke() { revoke_ticket } check_revoke() { check_consistency } ## TEST: grant_elsewhere ## # just a grant to another site test_grant_elsewhere() { run_site 1 booth grant -w -s `get_internal_site 2` $tkt >/dev/null } check_grant_elsewhere() { check_consistency `get_internal_site 2` } ## TEST: grant_site_lost ## # grant with one site lost setup_grant_site_lost() { stop_site `get_site 2` booth_status `get_site 2` && return 1 return 0 } test_grant_site_lost() { grant_ticket 1 wait_exp } check_grant_site_lost() { check_consistency `get_internal_site 1` } recover_grant_site_lost() { start_site `get_site 2` } ## TEST: grant_site_reappear ## # grant with one site lost then reappearing setup_grant_site_reappear() { stop_site `get_site 2` booth_status `get_site 2` && return 1 return 0 #sleep 1 } test_grant_site_reappear() { grant_ticket 1 || return $ERR_SETUP_FAILED check_cib `get_internal_site 1` || return $ERR_SETUP_FAILED wait_timeout start_site `get_site 2` || return $ERR_SETUP_FAILED wait_timeout wait_timeout } check_grant_site_reappear() { check_consistency `get_internal_site 1` && is_cib_granted `get_site 1` } recover_grant_site_reappear() { start_site `get_site 2` } ## TEST: simultaneous_start_even ## # simultaneous start of even number of members setup_simultaneous_start_even() { grant_ticket_cib 2 || return 1 stop_booth || return 1 #wait_timeout } test_simultaneous_start_even() { local serv for serv in $(echo $sites | sed "s/`get_site 1` //"); do start_site $serv & done for serv in $arbitrators; do start_arbitrator $serv & done wait_renewal start_site `get_site 1` wait_timeout wait_timeout } check_simultaneous_start_even() { check_consistency `get_internal_site 2` } ## TEST: slow_start_granted ## # slow start setup_slow_start_granted() { grant_ticket_cib 1 || return 1 stop_booth || return 1 #wait_timeout } test_slow_start_granted() { for serv in $sites; do start_site $serv wait_timeout done for serv in $arbitrators; do start_arbitrator $serv wait_timeout done } check_slow_start_granted() { check_consistency `get_internal_site 1` } ## TEST: restart_granted ## # restart with ticket granted setup_restart_granted() { grant_ticket_cib 1 } test_restart_granted() { restart_site `get_site 1` || return 1 wait_timeout } check_restart_granted() { check_consistency `get_internal_site 1` } ## TEST: reload_granted ## # reload with ticket granted setup_reload_granted() { grant_ticket_cib 1 } test_reload_granted() { reload_site `get_site 1` || return 1 wait_timeout } check_reload_granted() { check_consistency `get_internal_site 1` } ## TEST: restart_granted_nocib ## # restart with ticket granted (but cib empty) setup_restart_granted_nocib() { grant_ticket_cib 1 } test_restart_granted_nocib() { stop_site_clean `get_site 1` || return 1 #wait_timeout start_site `get_site 1` || return 1 wait_timeout wait_timeout wait_timeout } check_restart_granted_nocib() { check_consistency `get_internal_site 1` } ## TEST: restart_notgranted ## # restart with ticket not granted setup_restart_notgranted() { grant_ticket_cib 1 } test_restart_notgranted() { stop_site `get_site 2` || return 1 #sleep 1 start_site `get_site 2` || return 1 wait_timeout } check_restart_notgranted() { check_consistency `get_internal_site 1` } ## TEST: failover ## # ticket failover setup_failover() { grant_ticket 1 [ -n "`get_attr`" ] && set_site_attr 2 return 0 } test_failover() { stop_site_clean `get_site 1` || return 1 booth_status `get_site 1` && return 1 wait_exp wait_timeout wait_timeout wait_timeout } check_failover() { check_consistency any } recover_failover() { start_site `get_site 1` } ## TEST: split_leader ## # split brain (leader alone) setup_split_leader() { grant_ticket_cib 1 [ -n "`get_attr`" ] && set_site_attr 2 return 0 } test_split_leader() { run_site 1 $iprules stop $port >/dev/null wait_exp wait_timeout wait_timeout wait_timeout wait_timeout check_cib any || return 1 run_site 1 $iprules start $port >/dev/null wait_timeout wait_timeout wait_timeout } check_split_leader() { check_consistency any } recover_split_leader() { run_site 1 $iprules start $port >/dev/null } ## TEST: split_follower ## # split brain (follower alone) setup_split_follower() { grant_ticket_cib 1 } test_split_follower() { run_site 2 $iprules stop $port >/dev/null wait_exp wait_timeout run_site 2 $iprules start $port >/dev/null wait_timeout } check_split_follower() { check_consistency `get_internal_site 1` } ## TEST: split_edge ## # split brain (leader alone) setup_split_edge() { grant_ticket_cib 1 } test_split_edge() { run_site 1 $iprules stop $port >/dev/null wait_exp run_site 1 $iprules start $port >/dev/null wait_timeout wait_timeout } check_split_edge() { check_consistency any } ## TEST: external_prog_failed ## # external test prog failed setup_external_prog_failed() { grant_ticket 1 || return 1 [ -n "`get_attr`" ] && set_site_attr 2 break_external_prog 1 show_pref 1 || return 1 } test_external_prog_failed() { wait_renewal wait_timeout } check_external_prog_failed() { check_consistency any && [ `booth_where_granted` != `get_internal_site 1` ] } recover_external_prog_failed() { repair_external_prog 1 } applicable_external_prog_failed() { [ -n "`get_rsc`" ] } ## TEST: attr_prereq_ok ## # failover with attribute prerequisite setup_attr_prereq_ok() { grant_ticket 1 || return 1 set_site_attr 2 stop_site_clean `get_site 1` booth_status `get_site 1` && return 1 return 0 } test_attr_prereq_ok() { wait_exp wait_timeout } check_attr_prereq_ok() { check_consistency `get_internal_site 2` } recover_attr_prereq_ok() { start_site `get_site 1` del_site_attr 2 } applicable_attr_prereq_ok() { [ -n "`get_attr`" ] } ## TEST: attr_prereq_fail ## # failover with failed attribute prerequisite setup_attr_prereq_fail() { grant_ticket 1 || return 1 del_site_attr 2 >/dev/null 2>&1 stop_site_clean `get_site 1` booth_status `get_site 1` && return 1 return 0 } test_attr_prereq_fail() { wait_exp wait_exp wait_exp } check_attr_prereq_fail() { check_consistency && booth_where_granted | grep -qwi none } recover_attr_prereq_fail() { start_site `get_site 1` } applicable_attr_prereq_fail() { [ -n "`get_attr`" ] } +## TEST: crmv1_group_start ## + +add_crmv1_group() { + crmv1 group testgrp rsc1 Dummy rsc2 Dummy fake=test +} + +rm_crmv1_group() { + crmv1 group delete testgrp +} + +check_resources() { + export OCF_ROOT=/usr/lib/ocf + export OCF_RESOURCE_INSTANCE=rsc1 + . /usr/lib/ocf/lib/heartbeat/ocf-shellfuncs + /usr/lib/ocf/resource.d/heartbeat/Dummy monitor || return 1 + OCF_RESOURCE_INSTANCE=rsc2 + export OCF_RESKEY_fake=test + /usr/lib/ocf/resource.d/heartbeat/Dummy monitor || return 1 + return 0 +} + +# crmv1 start a group +setup_crmv1_group_start_ok() { + add_crmv1_group +} +test_crmv1_group_start_ok() { + wait_exp + wait_timeout +} +check_crmv1_group_start_ok() { + check_resources +} +recover_crmv1_group_start_ok() { + stop_site `get_site 1` + stop_site `get_site 2` + rm_crmv1_group +} + # # environment modifications # # packet loss at one site 30% NETEM_ENV_single_loss() { run_site 1 $ABSPATH $run_cnf __netem__ netem_loss ${1:-30} PKT_LOSS=${1:-30} } # packet loss everywhere 30% NETEM_ENV_loss() { forall $ABSPATH $run_cnf __netem__ netem_loss ${1:-30} PKT_LOSS=${1:-30} } # network delay 100ms NETEM_ENV_net_delay() { forall $ABSPATH $run_cnf __netem__ netem_delay ${1:-100} } # duplicate packets NETEM_ENV_duplicate() { forall $ABSPATH $run_cnf __netem__ netem_duplicate ${1:-10} } # reorder packets NETEM_ENV_reorder() { forall $ABSPATH $run_cnf __netem__ netem_reorder ${1:-25} ${2:-50} } # need this if we're run from a local directory or such get_prog_abspath() { local p p=`run_site 1 rpm -ql booth-test | fgrep -w $PROG` echo ${p:-/usr/share/booth/tests/test/live_test.sh} } [ -f "$cnf" ] || { echo "ERROR: configuration file $cnf doesn't exist" usage 1 } is_pacemaker_running || { echo "ERROR: sites must run pacemaker" exit 1 } sites=`get_extern_ip site < $cnf` arbitrators=`get_extern_ip arbitrator < $cnf` internal_sites=`get_value site < $cnf` internal_arbitrators=`get_value arbitrator < $cnf` all_nodes=`get_all_nodes` port=`get_value port < $cnf` : ${port:=9929} if [ "$1" = "__netem__" ]; then shift 1 _JUST_NETEM=1 local_netem_env "$@" exit fi [ -z "$internal_sites" ] && { echo no sites in $cnf usage 1 } exec 2>$logf BASH_XTRACEFD=2 PS4='+ `date +"%T"`: ' set -x WE_SERVER="" is_we_server && WE_SERVER=1 PREFNAME=__pref_booth_live_test authfile=`get_value authfile < $cnf` run_site 1 'test -f '"$authfile"' || booth-keygen '"$authfile" TESTS="$@" MANUAL_TESTS="$@" : ${TESTS:="grant longgrant grant_noarb grant_elsewhere grant_site_lost grant_site_reappear revoke simultaneous_start_even slow_start_granted restart_granted reload_granted restart_granted_nocib restart_notgranted failover split_leader split_follower split_edge -external_prog_failed attr_prereq_ok attr_prereq_fail"} +external_prog_failed attr_prereq_ok attr_prereq_fail +crmv1_group_start"} : ${MANUAL_TESTS:="grant longgrant grant_noarb grant_elsewhere grant_site_lost restart_granted reload_granted split_leader split_follower split_edge "} #get total number od lines in the file conf_file_size=$(grep -c $ $cnf) #get line numbers for all tickets ticket_line_numbers=$(grep -n ticket $cnf | cut -d: -f1) read -d'\n' -a TICKET_LINES<<< $ticket_line_numbers #save the part of config located before ticket definitions sed -n "1,$((${TICKET_LINES[0]}-1))p" $cnf > ${cnf}_main.config #create a separate file for every ticket data number_of_tickets=0 for i in $(seq 0 1 $((${#TICKET_LINES[@]}-1))); do ticket_line_start=${TICKET_LINES[i]} ticket_line_end=$((${TICKET_LINES[i+1]}-1)) if [ ${ticket_line_end} -lt 0 ]; then # for the last ticket ticket_line_end=${conf_file_size} fi sed -n "${ticket_line_start},${ticket_line_end}p" $cnf > ${cnf}_${number_of_tickets}.ticket number_of_tickets=$((number_of_tickets+1)) done master_rc=0 # updated in runtest for i in `seq 0 $(($number_of_tickets-1))` do cat ${cnf}_main.config > booth_${i}.conf cat ${cnf}_${i}.ticket >> booth_${i}.conf tkt=`get_tkt < booth_${i}.conf` if [ -z "$tkt" ]; then echo "Skipping empty ticket.." continue fi sync_conf booth_${i}.conf || exit reboot_test all_booth_status || { start_booth all_booth_status || { echo "some booth servers couldn't be started" exit 1 } } ABSPATH=`get_prog_abspath` dump_conf | logmsg eval `get_tkt_settings booth_${i}.conf` # shellcheck disable=SC2154 # (T_timeout: defined with get_tkt_settings) MIN_TIMEOUT=`awk -v tm=$T_timeout 'BEGIN{ if (tm >= 2) print tm; else print 2*tm; }'` [ -z "$T_expire" ] && { echo set $tkt expire time in $cnf usage 1 } if [ -z "$T_renewal_freq" ]; then T_renewal_freq=$((T_expire/2)) fi revoke_ticket T_mode=`get_mode` T_mode_lowercase=$(echo "$T_mode" | tr '[:upper:]' '[:lower:]') if [[ $T_mode_lowercase == *"manual"* ]]; then echo "Running tests for manual tickets.." for t in $MANUAL_TESTS; do runtest $t done else echo "Running tests for automatic Raft tickets.." for t in $TESTS; do runtest $t done fi done exit $master_rc diff --git a/unit-tests/030_crmv1.txt b/unit-tests/030_crmv1.txt new file mode 100644 index 0000000..42e6ed1 --- /dev/null +++ b/unit-tests/030_crmv1.txt @@ -0,0 +1,48 @@ +# vim: ft=sh et : +# +# Testing crmv1 groups + + +ticket: + name "tick1" + state ST_LEADER + current_term 40 + leader local + # may keep ticket all the time + term_duration 3000 + # but shall start renewal now + term_expires time(0) + 1000 + req_sent_at time(0) - 10 + + +gdb0: + call parse_extprog("test `set|grep ^BOOTH|wc -l` -ge 5", booth_conf->ticket+0) + +outgoing0: + header.cmd OP_HEARTBEAT + + +testgrp: + call parse_extprog("bin/crmv1") + ext_verifier 'bin/crmv1' + # cause re-query of the verifier + req_sent_at time(0) - 10 + +# +#gdb1: +# break ticket_broadcast_proposed_state § commands § bt § c § end + + +outgoing1: + header.cmd OP_HEARTBEAT + + +# now say that we may not have it anymore. +ticket2: + ext_verifier 'test "$BOOTH_TICKET" == "tick2FOO"' + # cause re-query of the verifier + req_sent_at time(0) - 10 + +finally: + state ST_LEADER + leader local diff --git a/unit-tests/bin/checkcrmv1 b/unit-tests/bin/checkcrmv1 new file mode 100755 index 0000000..94396cf --- /dev/null +++ b/unit-tests/bin/checkcrmv1 @@ -0,0 +1,39 @@ +#!/bin/sh + +add_crmv1_group() { + crmv1 group testgrp rsc1 Dummy rsc2 Dummy fake=test +} + +rm_crmv1_group() { + crmv1 group delete testgrp +} + +check_resources() { + export OCF_ROOT=/usr/lib/ocf + export OCF_RESOURCE_INSTANCE=rsc1 + . /usr/lib/ocf/lib/heartbeat/ocf-shellfuncs + /usr/lib/ocf/resource.d/heartbeat/Dummy monitor || return 1 + OCF_RESOURCE_INSTANCE=rsc2 + export OCF_RESKEY_fake=test + /usr/lib/ocf/resource.d/heartbeat/Dummy monitor || return 1 + return 0 +} + +# crmv1 start a group +setup_crmv1_group_start_ok() { + add_crmv1_group +} +test_crmv1_group_start_ok() { + wait_exp + wait_timeout +} +check_crmv1_group_start_ok() { + check_resources +} +recover_crmv1_group_start_ok() { + stop_site `get_site 1` + stop_site `get_site 2` + rm_crmv1_group +} + +check_crmv1_group_start_ok