diff --git a/Makefile.am b/Makefile.am index acd4c1e..883d2f5 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1,251 +1,251 @@ # Copyright (c) 2009 Red Hat, Inc. # # Authors: Andrew Beekhof # Steven Dake (sdake@redhat.com) # # This software licensed under BSD license, the text of which follows: # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # - Redistributions of source code must retain the above copyright notice, # this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # - Neither the name of the MontaVista Software, Inc. nor the names of its # contributors may be used to endorse or promote products derived from this # software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF # THE POSSIBILITY OF SUCH DAMAGE. SPEC = $(PACKAGE_NAME).spec TARFILE = $(PACKAGE_NAME)-$(VERSION).tar.gz EXTRA_DIST = autogen.sh conf/booth.conf.example \ - script/booth-keygen script/lsb script/ocf script/service-runnable.in \ + script/booth-keygen script/lsb script/ocf script/service-runnable.in script/crmv1.in \ script/unit-test.py.in script/wireshark-dissector.lua \ test/arbtests.py test/assertions.py test/booth_path test/boothrunner.py \ test/boothtestenv.py.in test/clientenv.py test/clienttests.py test/live_test.sh \ test/runtests.py.in test/serverenv.py test/servertests.py test/sitetests.py \ test/utils.py \ unit-tests \ contrib \ icons \ $(SPEC).in booth-rpmlintrc \ .version build-aux/git-version-gen build-aux/PKG_CHECK_VAR.m4 \ build-aux/gitlog-to-changelog build-aux/release.mk AUTOMAKE_OPTIONS = foreign MAINTAINERCLEANFILES = Makefile.in aclocal.m4 configure depcomp \ config.guess config.sub missing install-sh \ autoheader automake autoconf test_lense.sh \ compile # Don't try to install files outside build directory for "make distcheck". AM_DISTCHECK_CONFIGURE_FLAGS = --with-ocfdir="$$dc_install_base/lib/ocf" dist_doc_DATA = AUTHORS README COPYING README.upgrade-from-v0.1 README-testing boothconfdir = ${BOOTHSYSCONFDIR} boothconf_DATA = conf/booth.conf.example boothsitedir = $(ocfdir)/resource.d/pacemaker boothsite_SCRIPTS = script/ocf/booth-site boothocfdir = $(ocfdir)/resource.d/booth boothocf_SCRIPTS = script/ocf/sharedrsc script/ocf/geostore boothocflibdir = $(ocfdir)/lib/booth boothocflib_DATA = script/ocf/geo_attr.sh bootharbitratordir = ${INITDDIR} bootharbitrator_SCRIPTS = script/lsb/booth-arbitrator boothnoarchdir = $(datadir)/$(PACKAGE_NAME) nodist_boothnoarch_SCRIPTS = script/service-runnable sbin_SCRIPTS = script/booth-keygen pkgconfigdir = $(datadir)/pkgconfig pkgconfig_DATA = booth.pc TESTS = test/runtests.py SUBDIRS = src docs conf coverity: cov-build --dir=cov make cov-analyze --dir cov --concurrency --wait-for-license cov-format-errors --dir cov install-exec-local: $(INSTALL) -d $(DESTDIR)/${boothconfdir} $(INSTALL) -d $(DESTDIR)/${bootharbitratordir} $(INSTALL) -d $(DESTDIR)/${boothsitedir} $(INSTALL) -d $(DESTDIR)/${boothocfdir} $(INSTALL) -d $(DESTDIR)/${BOOTH_LIB_DIR} -m 750 -chown $(CRM_DAEMON_USER):$(CRM_DAEMON_GROUP) $(DESTDIR)/${BOOTH_LIB_DIR} $(INSTALL) -d $(DESTDIR)/${BOOTH_CORE_DIR} -m 750 -chown $(CRM_DAEMON_USER):$(CRM_DAEMON_GROUP) $(DESTDIR)/${BOOTH_CORE_DIR} install-exec-hook: ln -sf ${sbindir}/boothd $(DESTDIR)/${sbindir}/booth ln -sf ${sbindir}/boothd $(DESTDIR)/${sbindir}/geostore uninstall-local: rmdir $(DESTDIR)/${boothconfdir} || :; rmdir $(DESTDIR)/${bootharbitratordir} || :; rmdir $(DESTDIR)/${boothsitedir} || :; rmdir $(DESTDIR)/${BOOTH_CORE_DIR} || :; rmdir $(DESTDIR)/${BOOTH_LIB_DIR} || :; test: check lint: for dir in src; do make -C $$dir lint; done clean-local: rm -rf test/*.pyc test/__pycache__ test/runtests.py test/boothtestenv.py cov* $(SPEC) dist-clean-local: rm -f autoconf automake autoheader # release/versioning BUILT_SOURCES = .version .version: echo $(VERSION) > $@-t && mv $@-t $@ dist-hook: gen-ChangeLog echo $(VERSION) > $(distdir)/.tarball-version test/runtests.py: test/runtests.py.in test/boothtestenv.py rm -f $@-t $@ mkdir -p ${abs_top_builddir}/test sed \ -e 's#PYTHON_SHEBANG#${PYTHON_SHEBANG}#g' \ -e 's#TEST_SRC_DIR#${abs_top_srcdir}/test#g' \ -e 's#TEST_BUILD_DIR#${abs_top_builddir}/test#g' \ $< > $@-t; chmod a-w $@-t chmod u+x $@-t mv $@-t $@ test/boothtestenv.py: test/boothtestenv.py.in rm -f $@-t $@ mkdir -p ${abs_top_builddir}/test sed \ -e 's#TEST_SRC_DIR#${abs_top_srcdir}/test#g' \ -e 's#TEST_BUILD_DIR#${abs_top_builddir}/test#g' \ $< > $@-t; chmod a-w $@-t mv $@-t $@ ## make rpm/srpm section. $(abs_builddir)/booth-rpmlintrc: cat $(abs_srcdir)/booth-rpmlintrc > booth-rpmlintrc $(SPEC): $(SPEC).in .version config.status $(abs_builddir)/booth-rpmlintrc rm -f $@-t $@ date="$(shell LC_ALL=C date "+%a %b %d %Y")" && \ gvgver="`cd $(abs_srcdir); build-aux/git-version-gen --fallback $(VERSION) .tarball-version .gitarchivever`" && \ if [ "$$gvgver" = "`echo $$gvgver | sed 's/-/./'`" ];then \ rpmver="$$gvgver" && \ alphatag="" && \ dirty="" && \ numcomm="0"; \ else \ gitver="`echo $$gvgver | sed 's/\(.*\)\./\1-/'`" && \ rpmver=`echo $$gitver | sed 's/-.*//g'` && \ alphatag=`echo $$gvgver | sed 's/[^-]*-\([^-]*\).*/\1/'` && \ numcomm=`echo $$gitver | sed 's/[^-]*-\([^-]*\).*/\1/'` && \ dirty="" && \ if [ "`echo $$gitver | sed 's/^.*-dirty$$//g'`" = "" ];then \ dirty="dirty"; \ fi \ fi && \ if [ "$$numcomm" = "0" ]; then numcomm=""; fi && \ if [ -n "$$numcomm" ]; then numcomm="%global numcomm $$numcomm"; fi && \ if [ "$$alphatag" = "$$gitver" ]; then alphatag=""; fi && \ if [ -n "$$alphatag" ]; then alphatag="%global alphatag $$alphatag"; fi && \ if [ -n "$$dirty" ]; then dirty="%global dirty dirty"; fi && \ sed \ -e "s#@version@#$$rpmver#g" \ -e "s#@ALPHATAG@#$$alphatag#g" \ -e "s#@NUMCOMM@#$$numcomm#g" \ -e "s#@DIRTY@#$$dirty#g" \ -e "s#@date@#$$date#g" \ $(abs_srcdir)/$@.in > $@-t; sed -i -e "s#@uname@#$(CRM_DAEMON_USER)#g" $@-t sed -i -e "s#@gname@#$(CRM_DAEMON_GROUP)#g" $@-t if BUILD_ASCIIDOC_HTML_MAN sed -i -e "s#@bcond_html_man@#bcond_without#g" $@-t else sed -i -e "s#@bcond_html_man@#bcond_with#g" $@-t endif if IS_ASCIIDOC sed -i -e "s#@asciidoc@#asciidoc#g" $@-t else sed -i -e "s#@asciidoc@#asciidoctor#g" $@-t endif if LOGGING_LIBQB sed -i -e "s#@bcond_glue@#bcond_with#g" $@-t else sed -i -e "s#@bcond_glue@#bcond_without#g" $@-t endif if PYTHON_IS_VERSION3 sed -i -e "s#@bcond_python3@#bcond_without#g" $@-t else sed -i -e "s#@bcond_python3@#bcond_with#g" $@-t endif if RUN_BUILD_TESTS sed -i -e "s#@bcond_run_build_tests@#bcond_without#g" $@-t else sed -i -e "s#@bcond_run_build_tests@#bcond_with#g" $@-t endif chmod a-w $@-t mv $@-t $@ rm -f $@-t* $(TARFILE): $(MAKE) dist RPMBUILDOPTS = --define "_sourcedir $(abs_builddir)" \ --define "_specdir $(abs_builddir)" \ --define "_builddir $(abs_builddir)" \ --define "_srcrpmdir $(abs_builddir)" \ --define "_rpmdir $(abs_builddir)" srpm: clean $(MAKE) $(SPEC) $(TARFILE) rpmbuild $(RPMBUILDOPTS) --nodeps -bs $(SPEC) rpm: clean $(MAKE) $(SPEC) $(TARFILE) rpmbuild $(RPMBUILDOPTS) -ba $(SPEC) gen_start_date = 2000-01-01 .PHONY: gen-ChangeLog gen-ChangeLog: if test -d $(abs_srcdir)/.git; then \ LC_ALL=C $(top_srcdir)/build-aux/gitlog-to-changelog \ --since=$(gen_start_date) > $(distdir)/cl-t; \ rm -f $(distdir)/ChangeLog; \ mv $(distdir)/cl-t $(distdir)/ChangeLog; \ fi diff --git a/README.crmv1 b/README.crmv1 new file mode 100644 index 0000000..b2708a2 --- /dev/null +++ b/README.crmv1 @@ -0,0 +1,49 @@ +CRMv1 cluster +============= + +Heartbeat is a predecessor to Pacemaker and here we make a +comeback to that kind of clustering. Why should we do that? +Firstly, Pacemaker became a behemoth, something that can brew +your coffee, but also something that is rather unwieldy and +difficult to manage. Secondly, booth is a very reliable +distributed engine and in our testing it was used also in a +typical LAN and passed all the tests with flying colours. So, +this is something for people who don't need all the bells and +whistles of Pacemaker, but still want to have HA. + +STONITH is missing, but the cluster must have at least three +members. Hence, the booth arbitrator serves as a fencing +replacement. This is as it should be: a two node cluster is +indeed very difficult to run. The booth arbitrator can be a +smallish instance running wherever in your network. As with +fencing, it doesn't even have to be particularly reliable, it +just have to be there when we need it. + +Setup +----- + +Just like with heartbeat, CRMv1 in booth is very simple to setup. +There is a helper program called `crmv1` which is going to handle +all the details. In the simplest setup, which is anyway the most +common, there is just one group. The resources are run in order, +there is no parallelism. + +Here the usage with one realistic example: + + Usage: crmv1 {group ...|group delete } + + Examples: + + crmv1 group bigdb \ + IPaddr ip=192.168.1.1 \ + ocf:linbit:drbd drbd_resource=bigdisk \ + Filesystem device=/dev/bigdisk directory=/bigdisk fstype=xfs \ + oracle sid=bigdb + + crmv1 group delete bigdb + +There is no monitoring of resources, but it is easy to run an +external monitor of the topmost resource, i.e. the service which +is actually used by the users. If that monitor fails, then it +makes sense to move the group to the other node. + diff --git a/conf/booth.conf.example b/conf/booth.conf.example index 1d63547..440046a 100644 --- a/conf/booth.conf.example +++ b/conf/booth.conf.example @@ -1,27 +1,31 @@ # The booth configuration file is "/etc/booth/booth.conf". You need to # prepare the same booth configuration file on each arbitrator and # each node in the cluster sites where the booth daemon can be launched. # Here is an example of the configuration file: # "transport" means which transport layer booth daemon will use. # Currently only "UDP" is supported. transport="UDP" # The port that booth daemons will use to talk to each other. port="9929" # The arbitrator IP. If you want to configure several arbitrators, # you need to configure each arbitrator with a separate line. arbitrator="147.2.207.14" # The site IP. The cluster site uses this IP to talk to other sites. # Like arbitrator, you need to configure each site with a separate line. site="147.4.215.19" site="147.18.2.1" # The ticket name, which corresponds to a set of resources which can be # fail-overed among different sites. ticket="ticketA" ticket="ticketB" expire = 600 weights = 1,2,3 + +# Use the CRMv1 feature, i.e. make the booth a cluster in its own +# right (run resource, etc) +crmv1 diff --git a/conf/crmv1.conf.example b/conf/crmv1.conf.example new file mode 100644 index 0000000..e108265 --- /dev/null +++ b/conf/crmv1.conf.example @@ -0,0 +1,15 @@ +# The crmv1 configuration file is "/etc/booth/crmv1/conf". You need to +# prepare the same configuration file on each arbitrator and +# each node in the cluster sites where the booth daemon can be launched. + +# The configuration consists of groups definition with parameters for resources +# It is recommended to use the crmv1 program to prepare this +# configuration file. +# Here is one example: + +group bigdb \ + IPaddr ip=192.168.1.1 \ + ocf:linbit:drbd drbd_resource=bigdisk \ + Filesystem device=/dev/bigdisk directory=/bigdisk fstype=xfs \ + oracle sid=bigdb + diff --git a/script/crmv1 b/script/crmv1 new file mode 100755 index 0000000..b9b0416 --- /dev/null +++ b/script/crmv1 @@ -0,0 +1,116 @@ +#!/bin/bash +# +# This is crmv1, a tool to configure booth as a crmv1 style +# cluster. +# It basically manages groups. There is no concept of a group in +# booth, but we can get by by using the before-acquire-handler. +# Essentially, the handler is used to run programs (resource +# agents). Just how the resource agents are configured is another +# matter. +# + +CONF_DIR=/etc/booth + +cnt=0 + +usage() { + cat<&2 + +Usage: $0 {group ...|group delete } + +Examples: + + crmv1 group bigdb \\ + IPaddr ip=192.168.1.1 \\ + ocf:linbit:drbd drbd_resource=bigdisk \\ + Filesystem device=/dev/bigdisk directory=/bigdisk fstype=xfs \\ + oracle sid=bigdb + + crmv1 group delete bigdb + +EOF + exit $1 +} +fatal() { + cat<&2 + +FATAL: $* + +EOF + exit 1 +} + +add_group() { + mkdir -p $CONF_DIR/crmv1/$2 + echo "$@" >> $CONF_DIR/crmv1/conf +} + +del_group() { + rm -rf $CONF_DIR/crmv1/$1 + sed -i "/group $1/d" $CONF_DIR/crmv1/conf +} + +get_ra() { + local ra + ra=$1 + set `echo $ra | sed 's/:/ /g'` + if [ $# -eq 1 ]; then + dir=/usr/lib/ocf/resource.d/heartbeat + else + # 1:2:3 + dir=/usr/lib/ocf/resource.d/$1/$2 + ra=$3 + fi + if [ -f $dir/$ra ]; then + echo $dir/$ra + else + fatal "no resource agent $1, did you install resource-agents?" + fi +} + +mk_link() { + ln -fs $2 $CONF_DIR/crmv1/$1/`printf '%02d' $3`_`basename $2` +} +ln_ra() { + ra_f=`get_ra $2` + mk_link $1 $ra_f $cnt + cnt=$((cnt+1)) +} + +# this is not really creating a group, we just parse the input to +# make sure that the group is well defined; the group is then +# created by boothd on starting; consider this a document on how +# creating a group should be implemented +new_group() { + group=$2 + shift 2 + for p; do + save_ra=$p + if echo $p | grep -qs '='; then + args="$args $p" + else + if [ "$save_ra" ]; then + ln_ra $group $save_ra + save_ra='' + continue + fi + fi + ln_ra $group $p + done + add_group group $group $@ +} + +if [ $# -lt 3 ]; then + usage 1 +fi +if [ $1 != group ]; then + usage 1 +fi +if [ $2 != delete ]; then + if grep -qs "^group $2" $CONF_DIR/crmv1/conf; then + fatal "group $2 already exists" + fi + new_group $@ +else + del_group $3 +fi diff --git a/src/booth.h b/src/booth.h index 0cd43c0..65824d3 100644 --- a/src/booth.h +++ b/src/booth.h @@ -1,386 +1,384 @@ /* * Copyright (C) 2011 Jiaju Zhang * Copyright (C) 2013-2014 Philipp Marek * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ #ifndef _BOOTH_H #define _BOOTH_H #include #include #include #include #include #include #include #include "timer.h" #define BOOTH_RUN_DIR "/var/run/booth/" #define BOOTH_LOG_DIR "/var/log" #define BOOTH_LOGFILE_NAME "booth.log" #define BOOTH_DEFAULT_CONF_DIR "/etc/booth/" #define BOOTH_DEFAULT_CONF_NAME "booth" #define BOOTH_DEFAULT_CONF_EXT ".conf" #define BOOTH_DEFAULT_CONF \ BOOTH_DEFAULT_CONF_DIR BOOTH_DEFAULT_CONF_NAME BOOTH_DEFAULT_CONF_EXT +#define BOOTH_DEFAULT_CRMV1_CONF BOOTH_DEFAULT_CONF_DIR "crmv1/conf" #define DAEMON_NAME "boothd" #define BOOTH_PATH_LEN PATH_MAX #define BOOTH_MAX_KEY_LEN 64 #define BOOTH_MIN_KEY_LEN 8 /* hash size is 160 bits (sha1), but add a bit more space in case * stronger hashes are required */ #define BOOTH_MAC_SIZE 24 /* tolerate packets which are not older than 10 minutes */ #define BOOTH_DEFAULT_MAX_TIME_SKEW 600 #define BOOTH_DEFAULT_PORT 9929 #define BOOTHC_MAGIC 0x5F1BA08C #define BOOTHC_VERSION 0x00010003 /** Timeout value for poll(). * Determines frequency of periodic jobs, eg. when send-retries are done. * See process_tickets(). */ #define POLL_TIMEOUT 100 /** @{ */ /** The on-network data structures and constants. */ #define BOOTH_NAME_LEN 64 #define BOOTH_ATTRVAL_LEN 128 #define CHAR2CONST(a,b,c,d) ((a << 24) | (b << 16) | (c << 8) | d) /* Says that the ticket shouldn't be active anywhere. * NONE wouldn't be specific enough. */ #define NO_ONE ((uint32_t)-1) /* Says that another one should recover. */ #define TICKET_LOST CHAR2CONST('L', 'O', 'S', 'T') typedef char boothc_site[BOOTH_NAME_LEN]; typedef char boothc_ticket[BOOTH_NAME_LEN]; typedef char boothc_attr[BOOTH_NAME_LEN]; typedef char boothc_attr_value[BOOTH_ATTRVAL_LEN]; /* message option bits */ enum { BOOTH_OPT_AUTH = 1, /* authentication */ BOOTH_OPT_ATTR = 4, /* attr message type, otherwise ticket */ }; struct boothc_header { /** Various options, message type, authentication */ uint32_t opts; /** Generation info (used for authentication) * This is something that would need to be monotone * incremental. CLOCK_MONOTONIC should fit the purpose. On * failover, however, it may happen that the new host has a * clock which is significantly behind the clock of old host. * We'll need to relax a bit for the nodes which are starting * (just accept all OP_STATUS). */ uint32_t secs; /* seconds */ uint32_t usecs; /* microseconds */ /** BOOTHC_MAGIC */ uint32_t magic; /** BOOTHC_VERSION */ uint32_t version; /** Packet source; site_id. See add_site(). */ uint32_t from; /** Length including header */ uint32_t length; /** The command respectively protocol state. See cmd_request_t. */ uint32_t cmd; /** The matching request (what do we reply to). See cmd_request_t. */ uint32_t request; /** Command options. */ uint32_t options; /** The reason for this RPC. */ uint32_t reason; /** Result of operation. 0 == OK */ uint32_t result; char data[0]; } __attribute__((packed)); struct ticket_msg { /** Ticket name. */ boothc_ticket id; /** Current leader. May be NO_ONE. See add_site(). * For a OP_REQ_VOTE this is */ uint32_t leader; /** Current term. */ uint32_t term; uint32_t term_valid_for; /* Perhaps we need to send a status along, too - like * starting, running, stopping, error, ...? */ } __attribute__((packed)); struct attr_msg { /** Ticket name. */ boothc_ticket tkt_id; /** Attribute name. */ boothc_attr name; /** The value. */ boothc_attr_value val; } __attribute__((packed)); /* GEO attributes * attributes should be regularly updated. */ struct geo_attr { /** Update timestamp. */ timetype update_ts; /** The value. */ char *val; /** Who set it (currently unused) struct booth_site *origin; */ }; struct hmac { /** hash id, currently set to constant BOOTH_HASH */ uint32_t hid; /** the calculated hash, BOOTH_MAC_SIZE is big enough to * accommodate the hash of type hid */ unsigned char hash[BOOTH_MAC_SIZE]; } __attribute__((packed)); struct boothc_hdr_msg { struct boothc_header header; struct hmac hmac; } __attribute__((packed)); struct boothc_ticket_msg { struct boothc_header header; struct ticket_msg ticket; struct hmac hmac; } __attribute__((packed)); struct boothc_attr_msg { struct boothc_header header; struct attr_msg attr; struct hmac hmac; } __attribute__((packed)); typedef enum { /* 0x43 = "C"ommands */ CMD_LIST = CHAR2CONST('C', 'L', 's', 't'), CMD_GRANT = CHAR2CONST('C', 'G', 'n', 't'), CMD_REVOKE = CHAR2CONST('C', 'R', 'v', 'k'), CMD_PEERS = CHAR2CONST('P', 'e', 'e', 'r'), /* Replies */ CL_RESULT = CHAR2CONST('R', 's', 'l', 't'), CL_LIST = CHAR2CONST('R', 'L', 's', 't'), CL_GRANT = CHAR2CONST('R', 'G', 'n', 't'), CL_REVOKE = CHAR2CONST('R', 'R', 'v', 'k'), /* get status from another server */ OP_STATUS = CHAR2CONST('S', 't', 'a', 't'), OP_MY_INDEX = CHAR2CONST('M', 'I', 'd', 'x'), /* reply to status */ /* Raft */ OP_REQ_VOTE = CHAR2CONST('R', 'V', 'o', 't'), /* start election */ OP_VOTE_FOR = CHAR2CONST('V', 't', 'F', 'r'), /* reply to REQ_VOTE */ OP_HEARTBEAT= CHAR2CONST('H', 'r', 't', 'B'), /* Heartbeat */ OP_ACK = CHAR2CONST('A', 'c', 'k', '.'), /* Ack for heartbeats and revokes */ OP_UPDATE = CHAR2CONST('U', 'p', 'd', 'E'), /* Update ticket */ OP_REVOKE = CHAR2CONST('R', 'e', 'v', 'k'), /* Revoke ticket */ OP_REJECTED = CHAR2CONST('R', 'J', 'C', '!'), /* Attributes */ ATTR_SET = CHAR2CONST('A', 'S', 'e', 't'), ATTR_GET = CHAR2CONST('A', 'G', 'e', 't'), ATTR_DEL = CHAR2CONST('A', 'D', 'e', 'l'), ATTR_LIST = CHAR2CONST('A', 'L', 's', 't'), } cmd_request_t; typedef enum { /* for compatibility with other functions */ RLT_SUCCESS = 0, RLT_ASYNC = CHAR2CONST('A', 's', 'y', 'n'), RLT_MORE = CHAR2CONST('M', 'o', 'r', 'e'), RLT_SYNC_SUCC = CHAR2CONST('S', 'c', 'c', 's'), RLT_SYNC_FAIL = CHAR2CONST('F', 'a', 'i', 'l'), RLT_INVALID_ARG = CHAR2CONST('I', 'A', 'r', 'g'), RLT_NO_SUCH_ATTR = CHAR2CONST('N', 'A', 't', 'r'), RLT_CIB_PENDING = CHAR2CONST('P', 'e', 'n', 'd'), RLT_EXT_FAILED = CHAR2CONST('X', 'P', 'r', 'g'), RLT_ATTR_PREREQ = CHAR2CONST('A', 'P', 'r', 'q'), RLT_TICKET_IDLE = CHAR2CONST('T', 'i', 'd', 'l'), RLT_OVERGRANT = CHAR2CONST('O', 'v', 'e', 'r'), RLT_PROBABLY_SUCCESS = CHAR2CONST('S', 'u', 'c', '?'), RLT_BUSY = CHAR2CONST('B', 'u', 's', 'y'), RLT_AUTH = CHAR2CONST('A', 'u', 't', 'h'), RLT_TERM_OUTDATED = CHAR2CONST('T', 'O', 'd', 't'), RLT_TERM_STILL_VALID = CHAR2CONST('T', 'V', 'l', 'd'), RLT_YOU_OUTDATED = CHAR2CONST('O', 'u', 't', 'd'), RLT_REDIRECT = CHAR2CONST('R', 'e', 'd', 'r'), } cmd_result_t; typedef enum { /* for compatibility with other functions */ OR_JUST_SO = 0, OR_AGAIN = CHAR2CONST('A', 'a', 'a', 'a'), OR_TKT_LOST = CHAR2CONST('T', 'L', 's', 't'), OR_REACQUIRE = CHAR2CONST('R', 'a', 'c', 'q'), OR_ADMIN = CHAR2CONST('A', 'd', 'm', 'n'), OR_LOCAL_FAIL = CHAR2CONST('L', 'o', 'c', 'F'), OR_STEPDOWN = CHAR2CONST('S', 'p', 'd', 'n'), OR_SPLIT = CHAR2CONST('S', 'p', 'l', 't'), } cmd_reason_t; /* bitwise command options */ typedef enum { OPT_IMMEDIATE = 1, /* immediate grant */ OPT_WAIT = 2, /* wait for the elections' outcome */ OPT_WAIT_COMMIT = 4, /* wait for the ticket commit to CIB */ } cmd_options_t; /** @} */ /** @{ */ struct booth_site { /** Calculated ID. See add_site(). */ int site_id; int type; int local; /** Roles, like ACCEPTOR, PROPOSER, or LEARNER. Not really used ATM. */ int role; boothc_site addr_string; int tcp_fd; int udp_fd; /* 0-based, used for indexing into per-ticket weights. * -1 for no_leader. */ int index; uint64_t bitmask; unsigned short family; union { struct sockaddr_in sa4; struct sockaddr_in6 sa6; }; int saddrlen; int addrlen; /** statistics */ time_t last_recv; unsigned int sent_cnt; unsigned int sent_err_cnt; unsigned int resend_cnt; unsigned int recv_cnt; unsigned int recv_err_cnt; unsigned int sec_cnt; unsigned int invalid_cnt; /** last timestamp seen from this site */ uint32_t last_secs; uint32_t last_usecs; }; extern struct booth_site *local; extern struct booth_site *const no_leader; /** @} */ struct booth_transport; struct client { int fd; const struct booth_transport *transport; struct boothc_ticket_msg *msg; int offset; /* bytes read so far into msg */ void (*workfn)(int); void (*deadfn)(int); }; extern struct client *clients; extern struct pollfd *pollfds; int client_add(int fd, const struct booth_transport *tpt, void (*workfn)(int ci), void (*deadfn)(int ci)); int find_client_by_fd(int fd); void safe_copy(char *dest, char *value, size_t buflen, const char *description); int update_authkey(void); void list_peers(int fd); struct command_line { int type; /* ACT_ */ int op; /* OP_ */ int options; /* OPT_ */ char configfile[BOOTH_PATH_LEN]; char lockfile[BOOTH_PATH_LEN]; char site[BOOTH_NAME_LEN]; struct boothc_ticket_msg msg; struct boothc_attr_msg attr_msg; }; extern struct command_line cl; /* http://gcc.gnu.org/onlinedocs/gcc/Typeof.html */ #define min(a__,b__) \ ({ typeof (a__) _a = (a__); \ typeof (b__) _b = (b__); \ _a < _b ? _a : _b; }) #define max(a__,b__) \ ({ typeof (a__) _a = (a__); \ typeof (b__) _b = (b__); \ _a > _b ? _a : _b; }) - - - #endif /* _BOOTH_H */ diff --git a/src/config.c b/src/config.c index f0ca4aa..12d2a2f 100644 --- a/src/config.c +++ b/src/config.c @@ -1,1062 +1,1240 @@ /* * Copyright (C) 2011 Jiaju Zhang * Copyright (C) 2013-2014 Philipp Marek * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ #include "b_config.h" #include #include #include #include #include #include #include #include #include #include #include #include "booth.h" #include "config.h" #include "raft.h" #include "ticket.h" #include "log.h" static int ticket_size = 0; static int ticket_realloc(void) { const int added = 5; int had, want; void *p; had = booth_conf->ticket_allocated; want = had + added; p = realloc(booth_conf->ticket, sizeof(struct ticket_config) * want); if (!p) { log_error("can't alloc more tickets"); return -ENOMEM; } booth_conf->ticket = p; memset(booth_conf->ticket + had, 0, sizeof(struct ticket_config) * added); booth_conf->ticket_allocated = want; return 0; } static void hostname_to_ip(char * hostname) { struct addrinfo hints; struct addrinfo *result, *rp; int res; int addr_found = 0; const char *ntop_res; memset(&hints, 0, sizeof(hints)); hints.ai_family = AF_UNSPEC; hints.ai_socktype = SOCK_STREAM; res = getaddrinfo(hostname, NULL, &hints, &result); if (res != 0) { log_error("can't find IP for the host \"%s\"", hostname); return; } /* Return the first found AF_INET or AF_INET6 address */ for (rp = result; rp && !addr_found; rp = rp->ai_next) { if (rp->ai_family != AF_INET && rp->ai_family != AF_INET6) { continue ; } switch (rp->ai_family) { case AF_INET: ntop_res = inet_ntop(rp->ai_family, &((struct sockaddr_in *)(rp->ai_addr))->sin_addr, hostname, BOOTH_NAME_LEN - 1); break; case AF_INET6: ntop_res = inet_ntop(rp->ai_family, &((struct sockaddr_in6 *)(rp->ai_addr))->sin6_addr, hostname, BOOTH_NAME_LEN - 1); break; } if (ntop_res) { /* buffer overflow will not happen (IPv6 notation < 63 chars), but suppress the warnings */ hostname[BOOTH_NAME_LEN - 1] = '\0'; addr_found = 1; } } if (!addr_found) { log_error("no IP addresses found for the host \"%s\"", hostname); } freeaddrinfo(result); } static int add_site(char *addr_string, int type) { int rv; struct booth_site *site; uLong nid; uint32_t mask; int i; rv = 1; if (booth_conf->site_count == MAX_NODES) { log_error("too many nodes"); goto out; } if (strnlen(addr_string, sizeof(booth_conf->site[0].addr_string)) >= sizeof(booth_conf->site[0].addr_string)) { log_error("site address \"%s\" too long", addr_string); goto out; } site = booth_conf->site + booth_conf->site_count; site->family = AF_INET; site->type = type; /* buffer overflow will not hapen (we've already checked that addr_string will fit incl. terminating '\0' above), but suppress the warnings with copying everything but the boundary byte, which is valid as-is, since this last byte will be safely pre-zeroed from the struct booth_config initialization */ strncpy(site->addr_string, addr_string, sizeof(site->addr_string) - 1); if (!(inet_pton(AF_INET, site->addr_string, &site->sa4.sin_addr) > 0) && !(inet_pton(AF_INET6, site->addr_string, &site->sa6.sin6_addr) > 0)) { /* Not a valid address, so let us try to convert it into an IP address */ hostname_to_ip(site->addr_string); } site->index = booth_conf->site_count; site->bitmask = 1 << booth_conf->site_count; /* Catch site overflow */ assert(site->bitmask); booth_conf->all_bits |= site->bitmask; if (type == SITE) booth_conf->sites_bits |= site->bitmask; site->tcp_fd = -1; booth_conf->site_count++; rv = 0; memset(&site->sa6, 0, sizeof(site->sa6)); nid = crc32(0L, NULL, 0); /* Using the ASCII representation in site->addr_string (both sizeof() * and strlen()) gives quite a lot of collisions; a brute-force run * from 0.0.0.0 to 24.0.0.0 gives ~4% collisions, and this tends to * increase even more. * Whether there'll be a collision in real-life, with 3 or 5 nodes, is * another question ... but for now get the ID from the binary * representation - that had *no* collisions up to 32.0.0.0. * Note that POSIX mandates inet_pton to arange the address pointed * to by "dst" in network byte order, assuring little/big-endianess * mutual compatibility. */ if (inet_pton(AF_INET, site->addr_string, &site->sa4.sin_addr) > 0) { site->family = AF_INET; site->sa4.sin_family = site->family; site->sa4.sin_port = htons(booth_conf->port); site->saddrlen = sizeof(site->sa4); site->addrlen = sizeof(site->sa4.sin_addr); site->site_id = crc32(nid, (void*)&site->sa4.sin_addr, site->addrlen); } else if (inet_pton(AF_INET6, site->addr_string, &site->sa6.sin6_addr) > 0) { site->family = AF_INET6; site->sa6.sin6_family = site->family; site->sa6.sin6_flowinfo = 0; site->sa6.sin6_port = htons(booth_conf->port); site->saddrlen = sizeof(site->sa6); site->addrlen = sizeof(site->sa6.sin6_addr); site->site_id = crc32(nid, (void*)&site->sa6.sin6_addr, site->addrlen); } else { log_error("Address string \"%s\" is bad", site->addr_string); rv = EINVAL; } /* Make sure we will never collide with NO_ONE, * or be negative (to get "get_local_id() < 0" working). */ mask = 1 << (sizeof(site->site_id)*8 -1); assert(NO_ONE & mask); site->site_id &= ~mask; /* Test for collisions with other sites */ for(i=0; iindex; i++) if (booth_conf->site[i].site_id == site->site_id) { log_error("Got a site-ID collision. Please file a bug on https://github.com/ClusterLabs/booth/issues/new, attaching the configuration file."); exit(1); } out: return rv; } inline static char *skip_while_in(const char *cp, int (*fn)(int), const char *allowed) { /* strchr() returns a pointer to the terminator if *cp == 0. */ while (*cp && (fn(*cp) || strchr(allowed, *cp))) cp++; /* discard "const" qualifier */ return (char*)cp; } inline static char *skip_while(char *cp, int (*fn)(int)) { while (fn(*cp)) cp++; return cp; } inline static char *skip_until(char *cp, char expected) { while (*cp && *cp != expected) cp++; return cp; } static inline int is_end_of_line(char *cp) { char c = *cp; return c == '\n' || c == 0 || c == '#'; } static int add_ticket(const char *name, struct ticket_config **tkp, const struct ticket_config *def) { int rv; struct ticket_config *tk; if (booth_conf->ticket_count == booth_conf->ticket_allocated) { rv = ticket_realloc(); if (rv < 0) return rv; } tk = booth_conf->ticket + booth_conf->ticket_count; booth_conf->ticket_count++; if (!check_max_len_valid(name, sizeof(tk->name))) { log_error("ticket name \"%s\" too long.", name); return -EINVAL; } if (find_ticket_by_name(name, NULL)) { log_error("ticket name \"%s\" used again.", name); return -EINVAL; } if (* skip_while_in(name, isalnum, "-/")) { log_error("ticket name \"%s\" invalid; only alphanumeric names.", name); return -EINVAL; } strcpy(tk->name, name); tk->timeout = def->timeout; tk->term_duration = def->term_duration; tk->retries = def->retries; memcpy(tk->weight, def->weight, sizeof(tk->weight)); tk->mode = def->mode; if (tkp) *tkp = tk; return 0; } static int postproc_ticket(struct ticket_config *tk) { if (!tk) return 1; if (!tk->renewal_freq) { tk->renewal_freq = tk->term_duration/2; } if (tk->timeout*(tk->retries+1) >= tk->renewal_freq) { log_error("%s: total amount of time to " "retry sending packets cannot exceed " "renewal frequency " "(%d*(%d+1) >= %d)", tk->name, tk->timeout, tk->retries, tk->renewal_freq); return 0; } return 1; } /* returns number of weights, or -1 on bad input. */ static int parse_weights(const char *input, int weights[MAX_NODES]) { int i, v; char *cp; for(i=0; i= MAX_ARGS) { log_error("too many arguments for the acquire-handler"); free(tk_test.path); return -1; } tk_test.argv[i++] = p; } while (p); return 0; } struct toktab grant_type[] = { { "auto", GRANT_AUTO}, { "manual", GRANT_MANUAL}, { NULL, 0}, }; struct toktab attr_op[] = { {"eq", ATTR_OP_EQ}, {"ne", ATTR_OP_NE}, {NULL, 0}, }; static int lookup_tokval(char *key, struct toktab *tab) { struct toktab *tp; for (tp = tab; tp->str; tp++) { if (!strcmp(tp->str, key)) return tp->val; } return 0; } /* attribute prerequisite */ static int parse_attr_prereq(char *val, struct ticket_config *tk) { struct attr_prereq *ap = NULL; char *p; ap = (struct attr_prereq *)calloc(1, sizeof(struct attr_prereq)); if (!ap) { log_error("out of memory"); return -1; } p = strtok(val, " \t"); if (!p) { log_error("not enough arguments to attr-prereq"); goto err_out; } ap->grant_type = lookup_tokval(p, grant_type); if (!ap->grant_type) { log_error("%s is not a grant type", p); goto err_out; } p = strtok(NULL, " \t"); if (!p) { log_error("not enough arguments to attr-prereq"); goto err_out; } if (!(ap->attr_name = strdup(p))) { log_error("out of memory"); goto err_out; } p = strtok(NULL, " \t"); if (!p) { log_error("not enough arguments to attr-prereq"); goto err_out; } ap->op = lookup_tokval(p, attr_op); if (!ap->op) { log_error("%s is not an attribute operation", p); goto err_out; } p = strtok(NULL, " \t"); if (!p) { log_error("not enough arguments to attr-prereq"); goto err_out; } if (!(ap->attr_val = strdup(p))) { log_error("out of memory"); goto err_out; } tk->attr_prereqs = g_list_append(tk->attr_prereqs, ap); if (!tk->attr_prereqs) { log_error("out of memory"); goto err_out; } return 0; err_out: if (ap) { if (ap->attr_val) free(ap->attr_val); if (ap->attr_name) free(ap->attr_name); free(ap); } return -1; } extern int poll_timeout; +void +get_keyval(char *key, char *val, struct args *a) { + char *p; + + strncpy(a->key, key, 16); + p = skip_while(val, isspace); + *(p-1) = '\0'; + strncpy(a->val, val, 16); +} + +struct crmv1_group { + char name[16]; + char ra[128]; + struct args { + char *key[16]; + char *val[16]; + } args[16]; +}; + +#define OCF_HB_PATH "/usr/lib/ocf/resource.d/heartbeat/" +#define OCF_PATH "/usr/lib/ocf/resource.d/" + +void ln_ra(char *ra, char *s, int cnt) +{ + char *p, *q, *r; + int fd; + char ra_target_s[128]; + + p = s; + q = strchr(":", s); + if (!q) { + strcpy(ra, OCF_HB_PATH); + strncpy(ra+strlen(OCF_HB_PATH), s, 128-strlen(OCF_HB_PATH)); + r = s; + } else { + /* s -> p ':' q ':' r + * copy to ra + */ + *q = '\0'; q++; + strcpy(ra, OCF_PATH); + strncpy(ra+strlen(OCF_PATH), p, 128-strlen(OCF_PATH)); + *(q-p+1) = '/'; + strncpy(ra+strlen(OCF_PATH)+1, q, 128-strlen(OCF_PATH)-strlen(q)); + r = strchr(":", q); + *r = '\0'; r++; + strncat(ra, r, 128-strlen(OCF_PATH)); + } + if (strlen(p) >= 128) { + log_error("RA name too long: %s", s); + exit(1); + } + /* now test if there is a file containing this RA + */ + if (!(fd = open(ra))) { + log_error("RA does not exist: %s", s); + exit(1); + } + close(fd); + /* finally, create a soft link + */ + if (snprintf(ra_target_s, 128, "%02d_%s", cnt, r) >= 128) { + log_error("RA name too long: %s", s); + exit(1); + } + if (symlink(BOOTH_DEFAULT_CRMV1_CONF, ra_target_s) != 0) { + log_error("failed to symlink %s: %s", ra_target_s, + strerror(errno)); + exit(1); + } +} + +/* mimic the shell parsing + */ + +int parse_crmv1_conf(struct ticket_config *current_tk) +{ + struct crmv1_group *groups[16], *curr_group; + char line[1024], *buf; + char error_str_buf[1024]; + FILE *fp; + char *s, *key, *val; + const char *error; + char *save_ra; + int i, grp_i = 0, key_i = 0, in_key, grp_wait; + int cnt = 0, args_cnt = 0; + + curr_group = groups[0]; + fp = fopen(BOOTH_DEFAULT_CRMV1_CONF, "r"); + if (!fp) { + log_error("failed to open %s: %s", BOOTH_DEFAULT_CRMV1_CONF, + strerror(errno)); + return -1; + } + + log_debug("reading config file %s", BOOTH_DEFAULT_CRMV1_CONF); + /* make one long line */ + while (fgets(line, sizeof(line), fp)) { + s = skip_while(line, isspace); + if (is_end_of_line(s) || *s == '#') + continue; + /* is line continued? */ + if (*(s+strlen(s)-2) == '\\' && *(s+strlen(s)-1) == '\n') { + *(s+strlen(s)-2) = ' '; + } + } + buf = line; + + /* now parse the line */ + for (s = buf; ; ) { + /* a '=' b or ra */ + s = skip_while(s, isspace); + save_ra = s; + if ( *s == '=' ) { + *s = '\0'; + s++; + get_keyval(save_ra, s, curr_group->args[args_cnt++]); + } else { + if ( save_ra ) { + ln_ra(curr_group->ra, save_ra, cnt); + curr_group->args[0] = NULL; + save_ra = NULL; + cnt++; + continue; + } + } + ln_ra(curr_group->ra, s, cnt); + cnt++; + + if (strcmp(key, "group") == 0) { + grp_wait = 1; + continue; + } + + (void)snprintf(error_str_buf, sizeof(error_str_buf), + "Unknown keyword \"%s\"", key); + error = error_str_buf; + goto err; + + curr_group++; + } + fclose(fp); + + /* Default: make config name match config filename. */ + if (!booth_conf->name[0]) { + cp = strrchr(path, '/'); + cp = cp ? cp+1 : (char *)path; + cp2 = strrchr(cp, '.'); + if (!cp2) + cp2 = cp + strlen(cp); + if (cp2-cp >= BOOTH_NAME_LEN) { + log_error("token too long"); + goto out; + } + strncpy(booth_conf->name, cp, cp2-cp); + *(booth_conf->name+(cp2-cp)) = '\0'; + } + + if (!postproc_ticket(current_tk)) { + goto out; + } + + return 0; + +err: + fclose(fp); +out: + log_error("%s in config file line %d", + error, lineno); + booth_conf->crmv1 = 0; + return -1; +} + int read_config(const char *path, int type) { char line[1024]; char error_str_buf[1024]; FILE *fp; char *s, *key, *val, *end_of_key; const char *error; char *cp, *cp2; int i; int lineno = 0; int got_transport = 0; int min_timeout = 0; struct ticket_config defaults = { { 0 } }; struct ticket_config *current_tk = NULL; fp = fopen(path, "r"); if (!fp) { log_error("failed to open %s: %s", path, strerror(errno)); return -1; } booth_conf = malloc(sizeof(struct booth_config) + TICKET_ALLOC * sizeof(struct ticket_config)); if (!booth_conf) { fclose(fp); log_error("failed to alloc memory for booth config"); return -ENOMEM; } memset(booth_conf, 0, sizeof(struct booth_config) + TICKET_ALLOC * sizeof(struct ticket_config)); ticket_size = TICKET_ALLOC; booth_conf->proto = UDP; booth_conf->port = BOOTH_DEFAULT_PORT; booth_conf->maxtimeskew = BOOTH_DEFAULT_MAX_TIME_SKEW; booth_conf->authkey[0] = '\0'; /* Provide safe defaults. -1 is reserved, though. */ booth_conf->uid = -2; booth_conf->gid = -2; strcpy(booth_conf->site_user, "hacluster"); strcpy(booth_conf->site_group, "haclient"); strcpy(booth_conf->arb_user, "nobody"); strcpy(booth_conf->arb_group, "nobody"); parse_weights("", defaults.weight); defaults.clu_test.path = NULL; defaults.clu_test.pid = 0; defaults.clu_test.status = 0; defaults.clu_test.progstate = EXTPROG_IDLE; defaults.term_duration = DEFAULT_TICKET_EXPIRY; defaults.timeout = DEFAULT_TICKET_TIMEOUT; defaults.retries = DEFAULT_RETRIES; defaults.acquire_after = 0; defaults.mode = TICKET_MODE_AUTO; error = ""; log_debug("reading config file %s", path); while (fgets(line, sizeof(line), fp)) { lineno++; s = skip_while(line, isspace); if (is_end_of_line(s) || *s == '#') continue; key = s; /* Key */ end_of_key = skip_while_in(key, isalnum, "-_"); if (end_of_key == key) { error = "No key"; goto err; } if (!*end_of_key) goto exp_equal; /* whitespace, and something else but nothing more? */ s = skip_while(end_of_key, isspace); if (*s != '=') { exp_equal: error = "Expected '=' after key"; goto err; } s++; /* It's my buffer, and I terminate if I want to. */ /* But not earlier than that, because we had to check for = */ *end_of_key = 0; /* Value tokenizing */ s = skip_while(s, isspace); switch (*s) { case '"': case '\'': val = s+1; s = skip_until(val, *s); /* Terminate value */ if (!*s) { error = "Unterminated quoted string"; goto err; } /* Remove and skip quote */ *s = 0; s++; if (*(s = skip_while(s, isspace)) && *s != '#') { error = "Surplus data after value"; goto err; } *s = 0; break; case 0: no_value: error = "No value"; goto err; break; default: val = s; /* Rest of line. */ i = strlen(s); /* i > 0 because of "case 0" above. */ while (i > 0 && isspace(s[i-1])) i--; s += i; *s = 0; } if (val == s) goto no_value; if (strlen(key) > BOOTH_NAME_LEN || strlen(val) > BOOTH_NAME_LEN) { error = "key/value too long"; goto err; } if (strcmp(key, "transport") == 0) { if (got_transport) { error = "config file has multiple transport lines"; goto err; } if (strcasecmp(val, "UDP") == 0) booth_conf->proto = UDP; else if (strcasecmp(val, "SCTP") == 0) booth_conf->proto = SCTP; else { (void)snprintf(error_str_buf, sizeof(error_str_buf), "invalid transport protocol \"%s\"", val); error = error_str_buf; goto err; } got_transport = 1; continue; } if (strcmp(key, "port") == 0) { booth_conf->port = atoi(val); continue; } if (strcmp(key, "name") == 0) { safe_copy(booth_conf->name, val, BOOTH_NAME_LEN, "name"); continue; } #if HAVE_LIBGCRYPT || HAVE_LIBMHASH if (strcmp(key, "authfile") == 0) { safe_copy(booth_conf->authfile, val, BOOTH_PATH_LEN, "authfile"); continue; } if (strcmp(key, "maxtimeskew") == 0) { booth_conf->maxtimeskew = atoi(val); continue; } #endif if (strcmp(key, "site") == 0) { if (add_site(val, SITE)) goto err; continue; } if (strcmp(key, "arbitrator") == 0) { if (add_site(val, ARBITRATOR)) goto err; continue; } if (strcmp(key, "site-user") == 0) { safe_copy(booth_conf->site_user, optarg, BOOTH_NAME_LEN, "site-user"); continue; } if (strcmp(key, "site-group") == 0) { safe_copy(booth_conf->site_group, optarg, BOOTH_NAME_LEN, "site-group"); continue; } if (strcmp(key, "arbitrator-user") == 0) { safe_copy(booth_conf->arb_user, optarg, BOOTH_NAME_LEN, "arbitrator-user"); continue; } if (strcmp(key, "arbitrator-group") == 0) { safe_copy(booth_conf->arb_group, optarg, BOOTH_NAME_LEN, "arbitrator-group"); continue; } if (strcmp(key, "debug") == 0) { if (type != CLIENT && type != GEOSTORE) debug_level = max(debug_level, atoi(val)); continue; } if (strcmp(key, "ticket") == 0) { if (current_tk && strcmp(current_tk->name, "__defaults__")) { if (!postproc_ticket(current_tk)) { goto err; } } if (!strcmp(val, "__defaults__")) { current_tk = &defaults; } else if (add_ticket(val, ¤t_tk, &defaults)) { goto err; } continue; } + if (strcmp(key, "crmv1") == 0) { + if ( !parse_crmv1_conf() ) + booth_conf->crmv1 = 1; + continue; + } + /* current_tk must be allocated at this point, otherwise * we don't know to which ticket the key refers */ if (!current_tk) { (void)snprintf(error_str_buf, sizeof(error_str_buf), "Unexpected keyword \"%s\"", key); error = error_str_buf; goto err; } if (strcmp(key, "expire") == 0) { current_tk->term_duration = read_time(val); if (current_tk->term_duration <= 0) { error = "Expected time >0 for expire"; goto err; } continue; } if (strcmp(key, "timeout") == 0) { current_tk->timeout = read_time(val); if (current_tk->timeout <= 0) { error = "Expected time >0 for timeout"; goto err; } if (!min_timeout) { min_timeout = current_tk->timeout; } else { min_timeout = min(min_timeout, current_tk->timeout); } continue; } if (strcmp(key, "retries") == 0) { current_tk->retries = strtol(val, &s, 0); if (*s || s == val || current_tk->retries<3 || current_tk->retries > 100) { error = "Expected plain integer value in the range [3, 100] for retries"; goto err; } continue; } if (strcmp(key, "renewal-freq") == 0) { current_tk->renewal_freq = read_time(val); if (current_tk->renewal_freq <= 0) { error = "Expected time >0 for renewal-freq"; goto err; } continue; } if (strcmp(key, "acquire-after") == 0) { current_tk->acquire_after = read_time(val); if (current_tk->acquire_after < 0) { error = "Expected time >=0 for acquire-after"; goto err; } continue; } if (strcmp(key, "before-acquire-handler") == 0) { if (parse_extprog(val, current_tk)) { goto err; } continue; } if (strcmp(key, "attr-prereq") == 0) { if (parse_attr_prereq(val, current_tk)) { goto err; } continue; } if (strcmp(key, "mode") == 0) { current_tk->mode = retrieve_ticket_mode(val); continue; } if (strcmp(key, "weights") == 0) { if (parse_weights(val, current_tk->weight) < 0) goto err; continue; } (void)snprintf(error_str_buf, sizeof(error_str_buf), "Unknown keyword \"%s\"", key); error = error_str_buf; goto err; } fclose(fp); if ((booth_conf->site_count % 2) == 0) { log_warn("Odd number of nodes is strongly recommended!"); } /* Default: make config name match config filename. */ if (!booth_conf->name[0]) { cp = strrchr(path, '/'); cp = cp ? cp+1 : (char *)path; cp2 = strrchr(cp, '.'); if (!cp2) cp2 = cp + strlen(cp); if (cp2-cp >= BOOTH_NAME_LEN) { log_error("booth config file name too long"); goto out; } strncpy(booth_conf->name, cp, cp2-cp); *(booth_conf->name+(cp2-cp)) = '\0'; } if (!postproc_ticket(current_tk)) { goto out; } poll_timeout = min(POLL_TIMEOUT, min_timeout/10); if (!poll_timeout) poll_timeout = POLL_TIMEOUT; return 0; err: fclose(fp); out: log_error("%s in config file line %d", error, lineno); free(booth_conf); booth_conf = NULL; return -1; } int check_config(int type) { struct passwd *pw; struct group *gr; char *cp, *input; if (!booth_conf) return -1; input = (type == ARBITRATOR) ? booth_conf->arb_user : booth_conf->site_user; if (!*input) goto u_inval; if (isdigit(input[0])) { booth_conf->uid = strtol(input, &cp, 0); if (*cp != 0) { u_inval: log_error("User \"%s\" cannot be resolved into a UID.", input); return ENOENT; } } else { pw = getpwnam(input); if (!pw) goto u_inval; booth_conf->uid = pw->pw_uid; } input = (type == ARBITRATOR) ? booth_conf->arb_group : booth_conf->site_group; if (!*input) goto g_inval; if (isdigit(input[0])) { booth_conf->gid = strtol(input, &cp, 0); if (*cp != 0) { g_inval: log_error("Group \"%s\" cannot be resolved into a UID.", input); return ENOENT; } } else { gr = getgrnam(input); if (!gr) goto g_inval; booth_conf->gid = gr->gr_gid; } return 0; } static int get_other_site(struct booth_site **node) { struct booth_site *n; int i; *node = NULL; if (!booth_conf) return 0; for (i = 0; i < booth_conf->site_count; i++) { n = booth_conf->site + i; if (n != local && n->type == SITE) { if (!*node) { *node = n; } else { return 0; } } } return !*node ? 0 : 1; } int find_site_by_name(char *site, struct booth_site **node, int any_type) { struct booth_site *n; int i; if (!booth_conf) return 0; if (!strcmp(site, OTHER_SITE)) return get_other_site(node); for (i = 0; i < booth_conf->site_count; i++) { n = booth_conf->site + i; if ((n->type == SITE || any_type) && strncmp(n->addr_string, site, sizeof(n->addr_string)) == 0) { *node = n; return 1; } } return 0; } int find_site_by_id(uint32_t site_id, struct booth_site **node) { struct booth_site *n; int i; if (site_id == NO_ONE) { *node = no_leader; return 1; } if (!booth_conf) return 0; for (i = 0; i < booth_conf->site_count; i++) { n = booth_conf->site + i; if (n->site_id == site_id) { *node = n; return 1; } } return 0; } const char *type_to_string(int type) { switch (type) { case ARBITRATOR: return "arbitrator"; case SITE: return "site"; case CLIENT: return "client"; case GEOSTORE: return "attr"; } return "??invalid-type??"; } diff --git a/src/config.h b/src/config.h index bca73bc..834aa4e 100644 --- a/src/config.h +++ b/src/config.h @@ -1,340 +1,342 @@ /* * Copyright (C) 2011 Jiaju Zhang * Copyright (C) 2013-2014 Philipp Marek * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ #ifndef _CONFIG_H #define _CONFIG_H #include #include +#include #include "booth.h" #include "timer.h" #include "raft.h" #include "transport.h" /** @{ */ /** Definitions for in-RAM data. */ #define MAX_NODES 16 #define MAX_ARGS 16 #define TICKET_ALLOC 16 #define OTHER_SITE "other" typedef enum { EXTPROG_IDLE, EXTPROG_RUNNING, EXTPROG_EXITED, EXTPROG_IGNORE, } extprog_state_e; #define tk_test tk->clu_test typedef enum { ATTR_OP_EQ = 1, ATTR_OP_NE, } attr_op_e; typedef enum { GRANT_AUTO = 1, GRANT_MANUAL, } grant_type_e; typedef enum { TICKET_MODE_AUTO = 1, TICKET_MODE_MANUAL, } ticket_mode_e; struct toktab { const char *str; int val; }; struct attr_prereq { grant_type_e grant_type; /* grant type */ attr_op_e op; /* attribute operation */ char *attr_name; char *attr_val; }; struct ticket_config { /** \name Configuration items. * @{ */ /** Name of ticket. */ boothc_ticket name; /** How long a term lasts if not refreshed (in ms) */ int term_duration; /** Network related timeouts (in ms) */ int timeout; /** Retries before giving up. */ int retries; /** If >0, time to wait for a site to get fenced. * The ticket may be acquired after that timespan by * another site. */ int acquire_after; /* How often to renew the ticket (in ms) */ int renewal_freq; /* Program to ask whether it makes sense to * acquire the ticket */ struct clu_test { char *path; int is_dir; char *argv[MAX_ARGS]; pid_t pid; int status; /* child exit status */ extprog_state_e progstate; /* program running/idle/waited on */ } clu_test; /** Node weights. */ int weight[MAX_NODES]; /* Mode operation of the ticket. * Set to MANUAL to make sure that the ticket will be manipulated * only by manual commands of the administrator. In such a case * automatic elections will be disabled. * Manual tickets do not have to be renewed every some time. * The leader will continue to send heartbeat messages to other sites. */ ticket_mode_e mode; /** @} */ /** \name Runtime values. * @{ */ /** Current state. */ server_state_e state; /** Next state. Used at startup. */ server_state_e next_state; /** When something has to be done */ timetype next_cron; /** Current leader. This is effectively the log[] in Raft. */ struct booth_site *leader; /** Leader that got lost. */ struct booth_site *lost_leader; /** Is the ticket granted? */ int is_granted; /** Which site considered itself a leader. * For manual tickets it is possible, that * more than one site will act as a leader. * This array is used for tracking that situation * and notifying the user about the issue. * * Possible values for every site: * 0: the site does not claim to be the leader * 1: the site considers itself a leader and * is sending or used to send heartbeat messages * * The site will be marked as '1' until this site * receives revoke confirmation. * * If more than one site has '1', the geo cluster is * considered to have multiple leadership and proper * warning are generated. */ int sites_where_granted[MAX_NODES]; /** Timestamp of leadership expiration */ timetype term_expires; /** End of election period */ timetype election_end; struct booth_site *voted_for; /** Who the various sites vote for. * NO_OWNER = no vote yet. */ struct booth_site *votes_for[MAX_NODES]; /* bitmap */ uint64_t votes_received; /** Last voting round that was seen. */ uint32_t current_term; /** Do ticket updates whenever we get enough heartbeats. * But do that only once. * This is reset to 0 whenever we broadcast heartbeat and set * to 1 once enough acks are received. * Increased to 2 when the ticket is commited to the CIB (see * delay_commit). */ uint32_t ticket_updated; /** Outcome of whatever ticket request was processed. * Can also be an intermediate stage. */ uint32_t outcome; /** @} */ /** */ uint32_t last_applied; uint32_t next_index[MAX_NODES]; uint32_t match_index[MAX_NODES]; /* Why did we start the elections? */ cmd_reason_t election_reason; /* if it is potentially dangerous to grant the ticket * immediately, then this is set to some point in time, * usually (now + term_duration + acquire_after) */ timetype delay_commit; /* the last request RPC we sent */ uint32_t last_request; /* if we expect some acks, then set this to the id of * the RPC which others will send us; it is cleared once all * replies were received */ uint32_t acks_expected; /* bitmask of servers which sent acks */ uint64_t acks_received; /* timestamp of the request */ timetype req_sent_at; /* we need to wait for MY_INDEX from other servers, * hold the ticket processing for a while until they reply */ int start_postpone; /** Last renewal time */ timetype last_renewal; /* Do we need to update the copy in the CIB? * Normally, the ticket is written only when it changes via * the UPDATE RPC (for followers) and on expiration update * (for leaders) */ int update_cib; /* Is this ticket in election? */ int in_election; /* don't log warnings unnecessarily */ int expect_more_rejects; /** \name Needed while proposals are being done. * @{ */ /* Need to keep the previous valid ticket in case we moved to * start new elections and another server asks for the ticket * status. It would be wrong to send our candidate ticket. */ struct ticket_config *last_valid_tk; /** Attributes, user defined */ GHashTable *attr; /** Attribute prerequisites */ GList *attr_prereqs; /** Whom to vote for the next time. * Needed to push a ticket to someone else. */ #if 0 /** Bitmap of sites that acknowledge that state. */ uint64_t proposal_acknowledges; /** When an incompletely acknowledged proposal gets done. * If all peers agree, that happens sooner. * See switch_state_to(). */ struct timeval proposal_switch; /** Timestamp of proposal expiration. */ time_t proposal_expires; #endif /** Number of send retries left. * Used on the new owner. * Starts at 0, counts up. */ int retry_number; /** @} */ }; struct booth_config { char name[BOOTH_NAME_LEN]; /** File containing the authentication file. */ char authfile[BOOTH_PATH_LEN]; struct stat authstat; char authkey[BOOTH_MAX_KEY_LEN]; int authkey_len; /** Maximum time skew between peers allowed */ int maxtimeskew; transport_layer_t proto; uint16_t port; /** Stores the OR of sites bitmasks. */ uint64_t sites_bits; /** Stores the OR of all members' bitmasks. */ uint64_t all_bits; char site_user[BOOTH_NAME_LEN]; char site_group[BOOTH_NAME_LEN]; char arb_user[BOOTH_NAME_LEN]; char arb_group[BOOTH_NAME_LEN]; uid_t uid; gid_t gid; int site_count; struct booth_site site[MAX_NODES]; int ticket_count; int ticket_allocated; struct ticket_config *ticket; + int crmv1; }; extern struct booth_config *booth_conf; #define is_auth_req() (booth_conf->authkey[0] != '\0') int read_config(const char *path, int type); int check_config(int type); int find_site_by_name(char *site, struct booth_site **node, int any_type); int find_site_by_id(uint32_t site_id, struct booth_site **node); const char *type_to_string(int type); #endif /* _CONFIG_H */ diff --git a/src/handler.c b/src/handler.c index a12857e..2f6afbc 100644 --- a/src/handler.c +++ b/src/handler.c @@ -1,283 +1,296 @@ /* * Copyright (C) 2014 Philipp Marek * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ #include #include #include #include #include #include #include #include #include #include #include #include "ticket.h" #include "config.h" #include "inline-fn.h" #include "log.h" #include "pacemaker.h" #include "booth.h" #include "handler.h" static int set_booth_env(struct ticket_config *tk) { int rv; char expires[16]; sprintf(expires, "%" PRId64, (int64_t)wall_ts(&tk->term_expires)); rv = setenv("BOOTH_TICKET", tk->name, 1) || setenv("BOOTH_LOCAL", local->addr_string, 1) || setenv("BOOTH_CONF_NAME", booth_conf->name, 1) || setenv("BOOTH_CONF_PATH", cl.configfile, 1) || setenv("BOOTH_TICKET_EXPIRES", expires, 1); if (rv) { log_error("Cannot set environment: %s", strerror(errno)); } return rv; } static void closefiles(void) { int fd; /* close all descriptors except stdin/out/err */ for (fd = getdtablesize() - 1; fd > STDERR_FILENO; fd--) { close(fd); } } +static void +wait4proc(struct ticket_config *tk, char *prog) { + int rv, status; + + while (waitpid(curr_pid, &status, 0) != curr_pid) + ; + curr_pid = 0; + if (!ignore_status) { + rv = test_exit_status(tk, prog, status, 1); + if (rv) + _exit(rv); + } else { + /* + * To make ignore_rest function signal safe log_info + * must be removed from signal function. Information + * about signal delivery is important so put it here. + */ + log_info("external programs handler caught TERM, ignoring " + "status of external test programs"); + } + static void run_ext_prog(struct ticket_config *tk, char *prog) { - if (set_booth_env(tk)) { + int status, rv; + + switch(curr_pid=fork()) { + case -1: + log_error("fork: %s", strerror(errno)); _exit(1); + case 0: /* child */ + if (set_booth_env(tk)) { + _exit(1); + } + closefiles(); /* don't leak open files */ + tk_log_debug("running handler %s", prog); + execv(prog, tk_test.argv); + tk_log_error("%s: execv failed (%s)", prog, strerror(errno)); + _exit(1); + default: /* parent */ + wait4proc(struct ticket_config *tk, char *prog); } - closefiles(); /* don't leak open files */ - tk_log_debug("running handler %s", prog); - execv(prog, tk_test.argv); - tk_log_error("%s: execv failed (%s)", prog, strerror(errno)); - _exit(1); } static int prog_filter(const struct dirent *dp) { return (*dp->d_name != '.'); } static pid_t curr_pid; static int ignore_status; static int test_exit_status(struct ticket_config *tk, char *prog, int status, int log_msg) { int rv = -1; if (WIFEXITED(status)) { rv = WEXITSTATUS(status); } else if (WIFSIGNALED(status)) { rv = 128 + WTERMSIG(status); } if (rv) { if (log_msg) { tk_log_warn("handler \"%s\" failed: %s", prog, interpret_rv(status)); tk_log_warn("we are not allowed to acquire ticket"); } } else { tk_log_debug("handler \"%s\" exited with success", prog); } return rv; } static void reset_test_state(struct ticket_config *tk) { tk_test.pid = 0; set_progstate(tk, EXTPROG_IDLE); } int tk_test_exit_status(struct ticket_config *tk) { int rv; rv = test_exit_status(tk, tk_test.path, tk_test.status, !tk_test.is_dir); reset_test_state(tk); return rv; } void wait_child(int sig) { int i, status; struct ticket_config *tk; /* use waitpid(2) and not wait(2) in order not to interfear * with popen(2)/pclose(2) and system(2) used in pacemaker.c */ foreach_ticket(i, tk) { if (tk_test.path && tk_test.pid > 0 && (tk_test.progstate == EXTPROG_RUNNING || tk_test.progstate == EXTPROG_IGNORE) && waitpid(tk_test.pid, &status, WNOHANG) == tk_test.pid) { if (tk_test.progstate == EXTPROG_IGNORE) { /* not interested in the outcome */ reset_test_state(tk); } else { tk_test.status = status; set_progstate(tk, EXTPROG_EXITED); } } } } /* the parent may want to have us stop processing scripts, say * when the ticket gets revoked */ static void ignore_rest(int sig) { signal(SIGTERM, SIG_IGN); ignore_status = 1; if (curr_pid > 0) { (void)kill(curr_pid, SIGTERM); } } void ext_prog_timeout(struct ticket_config *tk) { tk_log_warn("handler timed out"); } int is_ext_prog_running(struct ticket_config *tk) { if (!tk_test.path) return 0; return (tk_test.pid > 0 && tk_test.progstate == EXTPROG_RUNNING); } void ignore_ext_test(struct ticket_config *tk) { if (is_ext_prog_running(tk)) { (void)kill(tk_test.pid, SIGTERM); set_progstate(tk, EXTPROG_IGNORE); } else if (tk_test.progstate == EXTPROG_EXITED) { /* external prog exited, but the status not yet examined; * we're not interested in checking the status anymore */ reset_test_state(tk); } } static void process_ext_dir(struct ticket_config *tk) { char prog[FILENAME_MAX+1]; int rv, n_progs, i, status; struct dirent **proglist, *dp; signal(SIGTERM, (__sighandler_t)ignore_rest); signal(SIGCHLD, SIG_DFL); signal(SIGUSR1, SIG_DFL); signal(SIGINT, SIG_DFL); tk_log_debug("running programs in directory %s", tk_test.path); n_progs = scandir(tk_test.path, &proglist, prog_filter, alphasort); if (n_progs == -1) { tk_log_error("%s: scandir failed (%s)", tk_test.path, strerror(errno)); _exit(1); } for (i = 0; i < n_progs; i++) { if (ignore_status) break; dp = proglist[i]; if (strlen(dp->d_name) + strlen(tk_test.path) + 1 > FILENAME_MAX) { tk_log_error("%s: name exceeds max length (%s)", tk_test.path, dp->d_name); _exit(1); } strcpy(prog, tk_test.path); strcat(prog, "/"); strcat(prog, dp->d_name); - switch(curr_pid=fork()) { - case -1: - log_error("fork: %s", strerror(errno)); - _exit(1); - case 0: /* child */ - run_ext_prog(tk, prog); - break; /* run_ext_prog effectively noreturn */ - default: /* parent */ - while (waitpid(curr_pid, &status, 0) != curr_pid) - ; - curr_pid = 0; - if (!ignore_status) { - rv = test_exit_status(tk, prog, status, 1); - if (rv) - _exit(rv); - } else { - /* - * To make ignore_rest function signal safe log_info - * must be removed from signal function. Information - * about signal delivery is important so put it here. - */ - log_info("external programs handler caught TERM, ignoring " - "status of external test programs"); - } + run_ext_prog(tk, prog); + if (booth_conf->crmv1) { + wait4proc(struct ticket_config *tk, char *prog); } } _exit(0); } /* run some external program * return codes: * RUNCMD_ERR: executing program failed (or some other failure) * RUNCMD_MORE: program forked, results later */ int run_handler(struct ticket_config *tk) { int rv = 0; pid_t pid; struct stat stbuf; if (!tk_test.path) return 0; if (stat(tk_test.path, &stbuf)) { tk_log_error("%s: stat failed (%s)", tk_test.path, strerror(errno)); return RUNCMD_ERR; } tk_test.is_dir = (stbuf.st_mode & S_IFDIR); switch(pid=fork()) { case -1: log_error("fork: %s", strerror(errno)); return RUNCMD_ERR; case 0: /* child */ if (tk_test.is_dir) { process_ext_dir(tk); } else { run_ext_prog(tk, tk_test.path); } default: /* parent */ tk_test.pid = pid; set_progstate(tk, EXTPROG_RUNNING); rv = RUNCMD_MORE; /* program runs */ + if (booth_conf->crmv1) { + wait4proc(struct ticket_config *tk, char *prog); + } } return rv; }