diff --git a/GNUmakefile b/GNUmakefile index 5c80118..b486e56 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -1,47 +1,47 @@ # # Copyright (C) 2008 Andrew Beekhof # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # -include Makefile PACKAGE ?= booth TARFILE = $(PACKAGE).tar.bz2 RPM_ROOT = $(shell pwd) RPM_OPTS = --define "_sourcedir $(RPM_ROOT)" \ --define "_specdir $(RPM_ROOT)" \ --define "_srcrpmdir $(RPM_ROOT)" TAG ?= HEAD gitarchive: rm -f $(TARFILE) git archive --format=tar --prefix $(PACKAGE)/ $(TAG) | tar xf - git describe --tags --always > $(PACKAGE)/.git_info tar -cjf $(TARFILE) $(PACKAGE) rm -r $(PACKAGE) echo `date`: Rebuilt $(TARFILE) srpm: gitarchive rm -f *.src.rpm @echo To create custom builds, edit the flags and options in $(PACKAGE).spec first rpmbuild -bs $(RPM_OPTS) $(PACKAGE).spec rpm: srpm rpmbuild $(RPM_OPTS) --rebuild $(RPM_ROOT)/*.src.rpm diff --git a/script/booth-keygen b/script/booth-keygen index 5215826..51a577d 100644 --- a/script/booth-keygen +++ b/script/booth-keygen @@ -1,64 +1,64 @@ #!/bin/sh # # Generate authentication key for booth # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # -# You should have received a copy of the GNU General Public License -# along with this program; if not, write the Free Software Foundation, -# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # DFLT_AUTHFILE=/etc/booth/authkey KEYSIZE=64 # /dev/urandom should be good enough RND_SRC=/dev/urandom usage() { cat<&2 } fatal() { error $* exit 1 } case "$1" in "-h"|"--help"|"-?") usage;; /*|"") : ;; *) fatal "please use absolute path for the key file" ;; esac keyf=${1:-$DFLT_AUTHFILE} if test -f $keyf; then fatal "file $keyf already exists" fi umask 077 errout=`dd if=$RND_SRC of=$keyf bs=$KEYSIZE count=1 2>&1` rc=$? if [ $rc -ne 0 ]; then echo "$errout" >&2 exit $rc fi chown root:root $keyf diff --git a/script/ocf/booth-site b/script/ocf/booth-site index ab1e619..809928c 100755 --- a/script/ocf/booth-site +++ b/script/ocf/booth-site @@ -1,203 +1,203 @@ #!/bin/bash # vim: set sw=4 : # # Resource Agent for BOOTH site daemon. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # -# You should have received a copy of the GNU General Public License -# along with this program; if not, write the Free Software Foundation, -# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # ####################################################################### # Initialization: DEFAULT_BIN="boothd" DEFAULT_CONF="/etc/booth/booth.conf" : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### booth_site_meta_data() { cat < 1.0 This Resource Agent can control the BOOTH site daemon. It assumes that the binary boothd is in your default PATH. In most cases, it should be run as a primitive resource. BOOTH site daemon The configuration name (or configuration filename) to use. BOOTH Options Any additional options to start the BOOTH daemon with BOOTH Options The daemon to start The daemon to start END } ####################################################################### booth_site_usage() { cat < /dev/null } booth_site_start() { local rc booth_site_status rc=$? case $rc in 0) ocf_log info "boothd already running" return $OCF_SUCCESS ;; $OCF_NOT_RUNNING) ;; esac $OCF_RESKEY_daemon daemon -c $OCF_RESKEY_config $OCF_RESKEY_args || return $OCF_ERR_GENERIC sleep 1 while ! booth_monitor_basic; do sleep 1 done return $OCF_SUCCESS } booth_site_stop() { local pid pid=`get_booth_pid` if [ -z "$pid" ]; then ocf_log info "boothd already stopped" return $OCF_SUCCESS fi ocf_stop_processes TERM 5 $pid while is_booth_running; do sleep 1 done return $OCF_SUCCESS } booth_site_restart() { booth_site_stop booth_site_start } booth_site_reload() { booth_site_restart } booth_site_monitor() { booth_site_status case $? in 0) return $OCF_SUCCESS ;; $OCF_NOT_RUNNING) return $OCF_NOT_RUNNING ;; esac } booth_site_validate_all() { if ! test -f $OCF_RESKEY_config; then ocf_log err "$OCF_RESKEY_config does not exist" return $OCF_ERR_INSTALLED fi if ocf_is_true $OCF_RESKEY_CRM_meta_globally_unique; then ocf_log err "$OCF_RESOURCE_INSTANCE must be configured with the globally_unique=false meta attribute" return $OCF_ERR_CONFIGURED fi return $OCF_SUCCESS } : ${OCF_RESKEY_daemon:=$DEFAULT_BIN} : ${OCF_RESKEY_config:=$DEFAULT_CONF} OCF_REQUIRED_BINARIES=${OCF_RESKEY_daemon} ocf_rarun $* diff --git a/script/ocf/geo_attr.sh b/script/ocf/geo_attr.sh index d116c9e..d6e8b81 100644 --- a/script/ocf/geo_attr.sh +++ b/script/ocf/geo_attr.sh @@ -1,203 +1,203 @@ # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # -# You should have received a copy of the GNU General Public License -# along with this program; if not, write the Free Software Foundation, -# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # # This file is part of the booth project and contains /bin/sh # code to support GEO attributes # USAGE # # To use this for updating GEO attributes just follow the # instructions below. # Source this file in your OCF RA script: # ## . ${OCF_ROOT}/lib/booth/geo_attr.sh # 1) meta-data # # geo_attr_meta_data prints descriptions of three parameters. # Best to invoke it just before printing "". For # instance: # ## cat < ## ... ## ## ... ## `geo_attr_meta_data` ## ## ... ## EOF # 2) validation (validate-all) # # Invoke geo_attr_validate_all to test the environment: # ## if ! geo_attr_validate_all; then ## return $OCF_ERR_INSTALL ## fi # 3) Attribute updating # # Put something like the following code after the RA updated the # remote site state (e.g. data replication): # ## if [ -n "$OCF_RESKEY_booth_ticket" ]; then ## if geo_attr_geo_attr $outcome; then ## # success! ## else ## # failed to set the attribute ## # appropriate error was already logged ## # normally, more cannot be done at this point ## # because updating GEO attributes is ## # essentially a best effort operation ## fi ## fi # # The outcome variable is a boolean. # It should reflect the outcome of the operation to update # data at the site (set to "0" for failure, anything else for # success). # 4) Site name (optional) # # We use the special value 'other' to specify the site where the # attribute is to be updated. That should cover the majority of # GEO clusters. In case your setup has more than two sites, then # provide a function named get_site_name which should print the # appropriate site name (as specified in booth.conf too) to # stdout. # DEFAULT_BOOTH_CONF="/etc/booth/booth.conf" : ${OCF_RESKEY_booth_config:=$DEFAULT_BOOTH_CONF} geo_attr_meta_data() { cat < Booth ticket. Need to define this to activate GEO attribute updating. See also the booth_config and geo_attribute parameters. Booth ticket Booth configuration name (or configuration filename) to use. BOOTH configuration file Attribute name. If not specified, we'll get the name from the first "attr-prereq" definition for the given ticket. This normally needs to be used only in case there are multiple "attr-prereq" directives for the ticket. GEO attribute END } geo_attr_get_attr() { local tkt cnf attr tkt=$OCF_RESKEY_booth_ticket cnf=$OCF_RESKEY_booth_config attr=$OCF_RESKEY_geo_attribute awk -v attr="$attr" ' n && /^[[:space:]]*attr-prereq = auto .* eq / { if (attr == "" || attr == $4) { print $4,$6; exit } } n && (/^$/ || /^ticket.*/) {exit} /^ticket.*'$tkt'/ {n=1} ' $cnf } # arguments: # $1: 0 reset the attribute # != 0 set the attribute # geo_attr_geo_attr() { local val site val=$1 set -- `geo_attr_get_attr` if test z"`command -v get_site_name`" = z"get_site_name"; then site=`get_site_name` else site="other" fi if [ "$val" = "0" ]; then geostore delete -s $site $1 >/dev/null 2>&1 else geostore set -s $site $1 $2 fi } geo_attr_read_attr() { local site set -- `geo_attr_get_attr` if test z"`command -v get_site_name`" = z"get_site_name"; then site=`get_site_name` else site="other" fi geostore get -s $site $1 } # test the environment for geo_attr # geo_attr_validate_all() { if [ -z "$OCF_RESKEY_booth_ticket" ]; then return 0 fi if ! test -f "$OCF_RESKEY_booth_config"; then ocf_log err "booth configuration $OCF_RESKEY_booth_config doesn't exist" return 1 fi if ! grep -qs "^ticket[[:space:]]*=[[:space:]]*\"$OCF_RESKEY_booth_ticket\"" $OCF_RESKEY_booth_config; then ocf_log err "ticket $OCF_RESKEY_booth_ticket not found in $OCF_RESKEY_booth_config" return 1 fi set -- `geo_attr_get_attr` if [ $# -eq 0 ]; then ocf_log err "no attr-prereq defined in $OCF_RESKEY_booth_ticket" return 1 fi return 0 } diff --git a/script/ocf/geostore b/script/ocf/geostore index 4b31127..85842a8 100755 --- a/script/ocf/geostore +++ b/script/ocf/geostore @@ -1,112 +1,112 @@ #!/bin/sh # # # geostore OCF RA. Just an example on how to use # geo-attr.sh # # Copyright (c) 2015 Dejan Muhamedagic # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # -# You should have received a copy of the GNU General Public License -# along with this program; if not, write the Free Software Foundation, -# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs . ${OCF_ROOT}/lib/booth/geo_attr.sh ####################################################################### geostore_meta_data() { cat < 1.0 This is the geostore Resource Agent. It's a sample for how to use geo_attr.sh. Sample GEO attribute RA `geo_attr_meta_data` END } ####################################################################### geostore_usage() { cat < 1.0 This is sharedrsc Resource Agent. It just keeps some resource while running and releases it when stopped. The resource is a directory on a shared filesystem or on a filesystem which is remotely accessible over ssh. Used for booth testing, i.e. to make sure that no two sites keep the same ticket. shared resource (booth testing) Location of the shared directory. If it's of the form "[user@]host:path" then it is assumed that the directory is to be accessed over ssh on that host. Otherwise, it must be a directory on a shared filesystem (such as nfs or ocfs2). shared directory location END } ####################################################################### sharedrsc_usage() { cat < $1/owner" } removecmd() { echo "test -d $1 && test \"\`cat $1/owner\`\" = `uname -n` && rm $1/owner && rmdir $1" } testdir() { runcmd testcmd $1 } makedir() { runcmd makecmd $1 } removedir() { runcmd removecmd $1 } sharedrsc_monitor() { if testdir $DIR; then return $OCF_SUCCESS else return $OCF_NOT_RUNNING fi } sharedrsc_start() { if sharedrsc_monitor; then return $OCF_SUCCESS fi makedir $DIR && return $OCF_SUCCESS local owner if ! owner=`runcmd getowner $DIR`; then owner="... nobody, it's only half-claimed" fi ocf_log err "eek, $OCF_RESKEY_dir already owned by $owner" return $OCF_ERR_GENERIC } sharedrsc_stop() { sharedrsc_monitor if [ $? -eq $OCF_NOT_RUNNING ]; then return $OCF_SUCCESS fi removedir $DIR } sharedrsc_getconfig() { local colon_pos colon_pos=`expr index "$OCF_RESKEY_dir" ":"` if [ $colon_pos -gt 0 ]; then SSH_HOST=`echo $OCF_RESKEY_dir | cut -d: -f 1` DIR=`echo $OCF_RESKEY_dir | cut -d: -f 2` else SSH_HOST= DIR=$OCF_RESKEY_dir fi } sharedrsc_validate_all() { if [ `expr index $DIR /` -ne 1 ]; then ocf_log err "dir must be an absolute path" return $OCF_ERR_INSTALLED fi return $OCF_SUCCESS } OCF_REQUIRED_PARAMS="dir" ocf_rarun $* # # vim:tabstop=4:shiftwidth=4:textwidth=0:wrapmargin=0 diff --git a/src/attr.c b/src/attr.c index 15e1099..17fa0c6 100644 --- a/src/attr.c +++ b/src/attr.c @@ -1,461 +1,461 @@ /* * Copyright (C) 2015 Dejan Muhamedagic * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. + * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #include #include "attr.h" #include "ticket.h" #include "pacemaker.h" void print_geostore_usage(void) { printf( "Usage:\n" " geostore {list|set|get|delete} [-t ticket] [options] attr [value]\n" "\n" " list: List all attributes\n" " set: Set attribute to a value\n" " get: Get attribute's value\n" " delete: Delete attribute\n" "\n" " -t Ticket where attribute resides\n" " (required, if more than one ticket is configured)\n" "\n" "Options:\n" " -c FILE Specify config file [default " BOOTH_DEFAULT_CONF "]\n" " Can be a path or just a name without \".conf\" suffix\n" " -s Connect to a different site\n" " -h Print this help\n" "\n" "Examples:\n" "\n" " # geostore list -t ticket-A -s 10.121.8.183\n" " # geostore set -s 10.121.8.183 sr_status ACTIVE\n" " # geostore get -t ticket-A -s 10.121.8.183 sr_status\n" " # geostore delete -s 10.121.8.183 sr_status\n" "\n" "See the geostore(8) man page for more details.\n" ); } /* * the client side */ /* cl has all the input parameters: * ticket, attr name, attr value */ int test_attr_reply(cmd_result_t reply_code, cmd_request_t cmd) { int rv = 0; const char *op_str = ""; if (cmd == ATTR_SET) op_str = "set"; else if (cmd == ATTR_GET) op_str = "get"; else if (cmd == ATTR_LIST) op_str = "list"; else if (cmd == ATTR_DEL) op_str = "delete"; else { log_error("internal error reading reply result!"); return -1; } switch (reply_code) { case RLT_ASYNC: log_info("%s command sent, result will be returned " "asynchronously.", op_str); rv = 0; break; case RLT_SYNC_SUCC: case RLT_SUCCESS: if (cmd == ATTR_SET) log_info("%s succeeded!", op_str); rv = 0; break; case RLT_SYNC_FAIL: log_info("%s failed!", op_str); rv = -1; break; case RLT_INVALID_ARG: log_error("ticket \"%s\" does not exist", cl.attr_msg.attr.tkt_id); rv = 1; break; case RLT_NO_SUCH_ATTR: log_error("attribute \"%s\" not set", cl.attr_msg.attr.name); rv = 1; break; case RLT_AUTH: log_error("authentication error"); rv = -1; break; default: log_error("got an error code: %x", rv); rv = -1; } return rv; } /* read the server's reply * need to first get the header which contains the length of the * reply * return codes: * -2: header not received * -1: header received, but message too short * >=0: success */ static int read_server_reply( struct booth_transport const *tpt, struct booth_site *site, char *msg) { struct boothc_header *header; int rv; int len; header = (struct boothc_header *)msg; rv = tpt->recv(site, header, sizeof(*header)); if (rv < 0) { return -2; } len = ntohl(header->length); rv = tpt->recv(site, msg+len, len-sizeof(*header)); if (rv < 0) { return -1; } return rv; } int do_attr_command(cmd_request_t cmd) { struct booth_site *site = NULL; struct boothc_header *header; struct booth_transport const *tpt; int len, rv = -1; char *msg = NULL; if (!*cl.site) site = local; else { if (!find_site_by_name(cl.site, &site, 1)) { log_error("Site \"%s\" not configured.", cl.site); goto out_close; } } if (site->type == ARBITRATOR) { if (site == local) { log_error("We're just an arbitrator, no attributes here."); } else { log_error("%s is just an arbitrator, no attributes there.", cl.site); } goto out_close; } tpt = booth_transport + TCP; init_header(&cl.attr_msg.header, cmd, 0, cl.options, 0, 0, sizeof(cl.attr_msg)); rv = tpt->open(site); if (rv < 0) goto out_close; rv = tpt->send(site, &cl.attr_msg, sendmsglen(&cl.attr_msg)); if (rv < 0) goto out_close; msg = malloc(MAX_MSG_LEN); if (!msg) { log_error("out of memory"); rv = -1; goto out_close; } rv = read_server_reply(tpt, site, msg); header = (struct boothc_header *)msg; if (rv < 0) { if (rv == -1) (void)test_attr_reply(ntohl(header->result), cmd); goto out_close; } len = ntohl(header->length); if (check_boothc_header(header, len) < 0) { log_error("message from %s receive error", site_string(site)); rv = -1; goto out_close; } if (check_auth(site, msg, len)) { log_error("%s failed to authenticate", site_string(site)); rv = -1; goto out_close; } rv = test_attr_reply(ntohl(header->result), cmd); out_close: if (site) tpt->close(site); if (msg) free(msg); return rv; } /* * the server side */ /* need to invert gboolean, our success is 0 */ #define gbool2rlt(i) (i ? RLT_SUCCESS : RLT_SYNC_FAIL) static void free_geo_attr(gpointer data) { struct geo_attr *a = (struct geo_attr *)data; if (!a) return; g_free(a->val); g_free(a); } int store_geo_attr(struct ticket_config *tk, const char *name, char *val, int notime) { struct geo_attr *a; GDestroyNotify free_geo_attr_notify = free_geo_attr; if (!tk) return -1; /* * allocate new, if attr doesn't already exist * copy the attribute value * send status */ if (!tk->attr) tk->attr = g_hash_table_new_full(g_str_hash, g_str_equal, free_geo_attr_notify, g_free); if (!tk->attr) { log_error("out of memory"); return -1; } a = (struct geo_attr *)calloc(1, sizeof(struct geo_attr)); if (!a) { log_error("out of memory"); return -1; } a->val = g_strdup(val); if (!notime) get_time(&a->update_ts); g_hash_table_insert(tk->attr, g_strndup(name, BOOTH_NAME_LEN), a); return 0; } static cmd_result_t attr_set(struct ticket_config *tk, struct boothc_attr_msg *msg) { int rc; rc = store_geo_attr(tk, msg->attr.name, msg->attr.val, 0); if (rc) { return RLT_SYNC_FAIL; } (void)pcmk_handler.set_attr(tk, msg->attr.name, msg->attr.val); return RLT_SUCCESS; } static cmd_result_t attr_del(struct ticket_config *tk, struct boothc_attr_msg *msg) { gboolean rv; gpointer orig_key, value; /* * lookup attr * deallocate, if found * send status */ if (!tk->attr) return RLT_NO_SUCH_ATTR; rv = g_hash_table_lookup_extended(tk->attr, msg->attr.name, &orig_key, &value); if (!rv) return RLT_NO_SUCH_ATTR; rv = g_hash_table_remove(tk->attr, msg->attr.name); (void)pcmk_handler.del_attr(tk, msg->attr.name); return gbool2rlt(rv); } static void append_attr(gpointer key, gpointer value, gpointer user_data) { char *attr_name = (char *)key; struct geo_attr *a = (struct geo_attr *)value; GString *data = (GString *)user_data; char time_str[64]; time_t ts; if (is_time_set(&a->update_ts)) { ts = wall_ts(&a->update_ts); strftime(time_str, sizeof(time_str), "%F %T", localtime(&ts)); } g_string_append_printf(data, "%s %s %s\n", attr_name, a->val, time_str); } static cmd_result_t attr_get(struct ticket_config *tk, int fd, struct boothc_attr_msg *msg) { cmd_result_t rv = RLT_SUCCESS; struct boothc_hdr_msg hdr; struct geo_attr *a; GString *attr_val; /* * lookup attr * send value */ a = (struct geo_attr *)g_hash_table_lookup(tk->attr, msg->attr.name); if (!a) return RLT_NO_SUCH_ATTR; attr_val = g_string_new(NULL); if (!attr_val) { log_error("out of memory"); return RLT_SYNC_FAIL; } g_string_printf(attr_val, "%s\n", a->val); init_header(&hdr.header, ATTR_GET, 0, 0, RLT_SUCCESS, 0, sizeof(hdr) + attr_val->len); if (send_header_plus(fd, &hdr, attr_val->str, attr_val->len)) rv = RLT_SYNC_FAIL; if (attr_val) g_string_free(attr_val, FALSE); return rv; } static cmd_result_t attr_list(struct ticket_config *tk, int fd, struct boothc_attr_msg *msg) { GString *data; cmd_result_t rv; struct boothc_hdr_msg hdr; /* * list all attributes for the ticket * send the list */ data = g_string_sized_new(512); if (!data) { log_error("out of memory"); return RLT_SYNC_FAIL; } g_hash_table_foreach(tk->attr, append_attr, data); init_header(&hdr.header, ATTR_LIST, 0, 0, RLT_SUCCESS, 0, sizeof(hdr) + data->len); rv = send_header_plus(fd, &hdr, data->str, data->len); if (data) g_string_free(data, FALSE); return rv; } int process_attr_request(struct client *req_client, void *buf) { cmd_result_t rv = RLT_SYNC_FAIL; struct ticket_config *tk; int cmd; struct boothc_attr_msg *msg; struct boothc_hdr_msg hdr; msg = (struct boothc_attr_msg *)buf; cmd = ntohl(msg->header.cmd); if (!check_ticket(msg->attr.tkt_id, &tk)) { log_warn("client referenced unknown ticket %s", msg->attr.tkt_id); rv = RLT_INVALID_ARG; goto reply_now; } switch (cmd) { case ATTR_LIST: rv = attr_list(tk, req_client->fd, msg); if (rv) goto reply_now; return 1; case ATTR_GET: rv = attr_get(tk, req_client->fd, msg); if (rv) goto reply_now; return 1; case ATTR_SET: rv = attr_set(tk, msg); break; case ATTR_DEL: rv = attr_del(tk, msg); break; } reply_now: init_header(&hdr.header, CL_RESULT, 0, 0, rv, 0, sizeof(hdr)); send_header_plus(req_client->fd, &hdr, NULL, 0); return 1; } /* read attr message from another site */ /* this is a NOOP and it should never be invoked * only clients retrieve/manage attributes and they connect * directly to the target site */ int attr_recv(void *buf, struct booth_site *source) { struct boothc_attr_msg *msg; struct ticket_config *tk; msg = (struct boothc_attr_msg *)buf; log_warn("unexpected attribute message from %s", site_string(source)); if (!check_ticket(msg->attr.tkt_id, &tk)) { log_warn("got invalid ticket name %s from %s", msg->attr.tkt_id, site_string(source)); source->invalid_cnt++; return 1; } return 0; } diff --git a/src/attr.h b/src/attr.h index 0576b2d..7e46595 100644 --- a/src/attr.h +++ b/src/attr.h @@ -1,39 +1,39 @@ /* * Copyright (C) 2015 Dejan Muhamedagic * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. + * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #ifndef _ATTR_H #define _ATTR_H #define ATTR_PROG "geostore" #include "b_config.h" #include "log.h" #include #include #include "booth.h" #include "timer.h" #include void print_geostore_usage(void); int test_attr_reply(cmd_result_t reply_code, cmd_request_t cmd); int do_attr_command(cmd_request_t cmd); int process_attr_request(struct client *req_client, void *buf); int attr_recv(void *buf, struct booth_site *source); int store_geo_attr(struct ticket_config *tk, const char *name, char *val, int notime); #endif /* _ATTR_H */ diff --git a/src/auth.c b/src/auth.c index 8c16161..8230b78 100644 --- a/src/auth.c +++ b/src/auth.c @@ -1,130 +1,130 @@ /* * Copyright (C) 2015 Dejan Muhamedagic * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. + * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #include "auth.h" #if HAVE_LIBGCRYPT /* calculate the HMAC of the message in data and store it in result * it is up to the caller to make sure that there's enough space * at result for the MAC */ int calc_hmac(const void *data, size_t datalen, int hid, unsigned char *result, char *key, int keylen) { static gcry_md_hd_t digest; gcry_error_t err; if (!digest) { err = gcry_md_open(&digest, hid, GCRY_MD_FLAG_HMAC); if (err) { log_error("gcry_md_open: %s", gcry_strerror(err)); return -1; } err = gcry_md_setkey(digest, key, keylen); if (err) { log_error("gcry_md_open: %s", gcry_strerror(err)); return -1; } } gcry_md_write(digest, data, datalen); memcpy(result, gcry_md_read(digest, 0), gcry_md_get_algo_dlen(hid)); gcry_md_reset(digest); return 0; } /* test HMAC */ int verify_hmac(const void *data, size_t datalen, int hid, unsigned char *hmac, char *key, int keylen) { unsigned char *our_hmac; int rc; our_hmac = malloc(gcry_md_get_algo_dlen(hid)); if (!our_hmac) return -1; rc = calc_hmac(data, datalen, hid, our_hmac, key, keylen); if (rc) goto out_free; rc = memcmp(our_hmac, hmac, gcry_md_get_algo_dlen(hid)); out_free: if (our_hmac) free(our_hmac); return rc; } #endif #if HAVE_LIBMHASH /* calculate the HMAC of the message in data and store it in result * it is up to the caller to make sure that there's enough space * at result for the MAC */ int calc_hmac(const void *data, size_t datalen, hashid hid, unsigned char *result, char *key, int keylen) { MHASH td; size_t block_size; block_size = mhash_get_hash_pblock(hid); if (!block_size) return -1; td = mhash_hmac_init(hid, key, keylen, block_size); if (!td) return -1; (void)mhash(td, data, datalen); if (mhash_hmac_deinit(td, result)) return -1; return 0; } /* test HMAC */ int verify_hmac(const void *data, size_t datalen, hashid hid, unsigned char *hmac, char *key, int keylen) { MHASH td; unsigned char *our_hmac = NULL; int rc = -1; td = mhash_hmac_init(hid, key, keylen, mhash_get_hash_pblock(hid)); if (!td) return -1; our_hmac = malloc(mhash_get_block_size(hid)); if (!our_hmac) return -1; (void)mhash(td, data, datalen); if (mhash_hmac_deinit(td, our_hmac)) goto out_free; rc = memcmp(our_hmac, hmac, mhash_get_block_size(hid)); out_free: if (our_hmac) free(our_hmac); return rc; } #endif diff --git a/src/auth.h b/src/auth.h index 8e45bb7..a3c1fa5 100644 --- a/src/auth.h +++ b/src/auth.h @@ -1,45 +1,45 @@ /* * Copyright (C) 2015 Dejan Muhamedagic * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. + * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #include "b_config.h" #include "log.h" #include #if HAVE_LIBGCRYPT #include #define BOOTH_HASH GCRY_MD_SHA1 int calc_hmac(const void *data, size_t datalen, int hid, unsigned char *result, char *key, int keylen); int verify_hmac(const void *data, size_t datalen, int hid, unsigned char *hmac, char *key, int keylen); #endif #if HAVE_LIBMHASH #include #define BOOTH_HASH MHASH_SHA1 int calc_hmac(const void *data, size_t datalen, hashid hid, unsigned char *result, char *key, int keylen); int verify_hmac(const void *data, size_t datalen, hashid hid, unsigned char *hmac, char *key, int keylen); #endif diff --git a/src/booth.h b/src/booth.h index 036dea5..8ff7fce 100644 --- a/src/booth.h +++ b/src/booth.h @@ -1,384 +1,384 @@ /* * Copyright (C) 2011 Jiaju Zhang * Copyright (C) 2013-2014 Philipp Marek * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. + * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #ifndef _BOOTH_H #define _BOOTH_H #include #include #include #include #include #include #include "timer.h" #define BOOTH_RUN_DIR "/var/run/booth/" #define BOOTH_LOG_DIR "/var/log" #define BOOTH_LOGFILE_NAME "booth.log" #define BOOTH_DEFAULT_CONF_DIR "/etc/booth/" #define BOOTH_DEFAULT_CONF_NAME "booth" #define BOOTH_DEFAULT_CONF_EXT ".conf" #define BOOTH_DEFAULT_CONF \ BOOTH_DEFAULT_CONF_DIR BOOTH_DEFAULT_CONF_NAME BOOTH_DEFAULT_CONF_EXT #define DAEMON_NAME "boothd" #define BOOTH_PATH_LEN 127 #define BOOTH_MAX_KEY_LEN 64 #define BOOTH_MIN_KEY_LEN 8 /* hash size is 160 bits (sha1), but add a bit more space in case * stronger hashes are required */ #define BOOTH_MAC_SIZE 24 /* tolerate packets which are not older than 10 minutes */ #define BOOTH_DEFAULT_MAX_TIME_SKEW 600 #define BOOTH_DEFAULT_PORT 9929 #define BOOTHC_MAGIC 0x5F1BA08C #define BOOTHC_VERSION 0x00010003 /** Timeout value for poll(). * Determines frequency of periodic jobs, eg. when send-retries are done. * See process_tickets(). */ #define POLL_TIMEOUT 100 /** @{ */ /** The on-network data structures and constants. */ #define BOOTH_NAME_LEN 64 #define BOOTH_ATTRVAL_LEN 128 #define CHAR2CONST(a,b,c,d) ((a << 24) | (b << 16) | (c << 8) | d) /* Says that the ticket shouldn't be active anywhere. * NONE wouldn't be specific enough. */ #define NO_ONE ((uint32_t)-1) /* Says that another one should recover. */ #define TICKET_LOST CHAR2CONST('L', 'O', 'S', 'T') typedef unsigned char boothc_site [BOOTH_NAME_LEN]; typedef unsigned char boothc_ticket[BOOTH_NAME_LEN]; typedef unsigned char boothc_attr[BOOTH_NAME_LEN]; typedef unsigned char boothc_attr_value[BOOTH_ATTRVAL_LEN]; /* message option bits */ enum { BOOTH_OPT_AUTH = 1, /* authentication */ BOOTH_OPT_ATTR = 4, /* attr message type, otherwise ticket */ }; struct boothc_header { /** Various options, message type, authentication */ uint32_t opts; /** Generation info (used for authentication) * This is something that would need to be monotone * incremental. CLOCK_MONOTONIC should fit the purpose. On * failover, however, it may happen that the new host has a * clock which is significantly behind the clock of old host. * We'll need to relax a bit for the nodes which are starting * (just accept all OP_STATUS). */ uint32_t secs; /* seconds */ uint32_t usecs; /* microseconds */ /** BOOTHC_MAGIC */ uint32_t magic; /** BOOTHC_VERSION */ uint32_t version; /** Packet source; site_id. See add_site(). */ uint32_t from; /** Length including header */ uint32_t length; /** The command respectively protocol state. See cmd_request_t. */ uint32_t cmd; /** The matching request (what do we reply to). See cmd_request_t. */ uint32_t request; /** Command options. */ uint32_t options; /** The reason for this RPC. */ uint32_t reason; /** Result of operation. 0 == OK */ uint32_t result; char data[0]; } __attribute__((packed)); struct ticket_msg { /** Ticket name. */ boothc_ticket id; /** Current leader. May be NO_ONE. See add_site(). * For a OP_REQ_VOTE this is */ uint32_t leader; /** Current term. */ uint32_t term; uint32_t term_valid_for; /* Perhaps we need to send a status along, too - like * starting, running, stopping, error, ...? */ } __attribute__((packed)); struct attr_msg { /** Ticket name. */ boothc_ticket tkt_id; /** Attribute name. */ boothc_attr name; /** The value. */ boothc_attr_value val; } __attribute__((packed)); /* GEO attributes * attributes should be regularly updated. */ struct geo_attr { /** Update timestamp. */ timetype update_ts; /** The value. */ char *val; /** Who set it (currently unused) struct booth_site *origin; */ } __attribute__((packed)); struct hmac { /** hash id, currently set to constant BOOTH_HASH */ uint32_t hid; /** the calculated hash, BOOTH_MAC_SIZE is big enough to * accommodate the hash of type hid */ unsigned char hash[BOOTH_MAC_SIZE]; } __attribute__((packed)); struct boothc_hdr_msg { struct boothc_header header; struct hmac hmac; } __attribute__((packed)); struct boothc_ticket_msg { struct boothc_header header; struct ticket_msg ticket; struct hmac hmac; } __attribute__((packed)); struct boothc_attr_msg { struct boothc_header header; struct attr_msg attr; struct hmac hmac; } __attribute__((packed)); typedef enum { /* 0x43 = "C"ommands */ CMD_LIST = CHAR2CONST('C', 'L', 's', 't'), CMD_GRANT = CHAR2CONST('C', 'G', 'n', 't'), CMD_REVOKE = CHAR2CONST('C', 'R', 'v', 'k'), CMD_PEERS = CHAR2CONST('P', 'e', 'e', 'r'), /* Replies */ CL_RESULT = CHAR2CONST('R', 's', 'l', 't'), CL_LIST = CHAR2CONST('R', 'L', 's', 't'), CL_GRANT = CHAR2CONST('R', 'G', 'n', 't'), CL_REVOKE = CHAR2CONST('R', 'R', 'v', 'k'), /* get status from another server */ OP_STATUS = CHAR2CONST('S', 't', 'a', 't'), OP_MY_INDEX = CHAR2CONST('M', 'I', 'd', 'x'), /* reply to status */ /* Raft */ OP_REQ_VOTE = CHAR2CONST('R', 'V', 'o', 't'), /* start election */ OP_VOTE_FOR = CHAR2CONST('V', 't', 'F', 'r'), /* reply to REQ_VOTE */ OP_HEARTBEAT= CHAR2CONST('H', 'r', 't', 'B'), /* Heartbeat */ OP_ACK = CHAR2CONST('A', 'c', 'k', '.'), /* Ack for heartbeats and revokes */ OP_UPDATE = CHAR2CONST('U', 'p', 'd', 'E'), /* Update ticket */ OP_REVOKE = CHAR2CONST('R', 'e', 'v', 'k'), /* Revoke ticket */ OP_REJECTED = CHAR2CONST('R', 'J', 'C', '!'), /* Attributes */ ATTR_SET = CHAR2CONST('A', 'S', 'e', 't'), ATTR_GET = CHAR2CONST('A', 'G', 'e', 't'), ATTR_DEL = CHAR2CONST('A', 'D', 'e', 'l'), ATTR_LIST = CHAR2CONST('A', 'L', 's', 't'), } cmd_request_t; typedef enum { /* for compatibility with other functions */ RLT_SUCCESS = 0, RLT_ASYNC = CHAR2CONST('A', 's', 'y', 'n'), RLT_MORE = CHAR2CONST('M', 'o', 'r', 'e'), RLT_SYNC_SUCC = CHAR2CONST('S', 'c', 'c', 's'), RLT_SYNC_FAIL = CHAR2CONST('F', 'a', 'i', 'l'), RLT_INVALID_ARG = CHAR2CONST('I', 'A', 'r', 'g'), RLT_NO_SUCH_ATTR = CHAR2CONST('N', 'A', 't', 'r'), RLT_CIB_PENDING = CHAR2CONST('P', 'e', 'n', 'd'), RLT_EXT_FAILED = CHAR2CONST('X', 'P', 'r', 'g'), RLT_ATTR_PREREQ = CHAR2CONST('A', 'P', 'r', 'q'), RLT_TICKET_IDLE = CHAR2CONST('T', 'i', 'd', 'l'), RLT_OVERGRANT = CHAR2CONST('O', 'v', 'e', 'r'), RLT_PROBABLY_SUCCESS = CHAR2CONST('S', 'u', 'c', '?'), RLT_BUSY = CHAR2CONST('B', 'u', 's', 'y'), RLT_AUTH = CHAR2CONST('A', 'u', 't', 'h'), RLT_TERM_OUTDATED = CHAR2CONST('T', 'O', 'd', 't'), RLT_TERM_STILL_VALID = CHAR2CONST('T', 'V', 'l', 'd'), RLT_YOU_OUTDATED = CHAR2CONST('O', 'u', 't', 'd'), RLT_REDIRECT = CHAR2CONST('R', 'e', 'd', 'r'), } cmd_result_t; typedef enum { /* for compatibility with other functions */ OR_JUST_SO = 0, OR_AGAIN = CHAR2CONST('A', 'a', 'a', 'a'), OR_TKT_LOST = CHAR2CONST('T', 'L', 's', 't'), OR_REACQUIRE = CHAR2CONST('R', 'a', 'c', 'q'), OR_ADMIN = CHAR2CONST('A', 'd', 'm', 'n'), OR_LOCAL_FAIL = CHAR2CONST('L', 'o', 'c', 'F'), OR_STEPDOWN = CHAR2CONST('S', 'p', 'd', 'n'), OR_SPLIT = CHAR2CONST('S', 'p', 'l', 't'), } cmd_reason_t; /* bitwise command options */ typedef enum { OPT_IMMEDIATE = 1, /* immediate grant */ OPT_WAIT = 2, /* wait for the elections' outcome */ OPT_WAIT_COMMIT = 4, /* wait for the ticket commit to CIB */ } cmd_options_t; /** @} */ /** @{ */ struct booth_site { /** Calculated ID. See add_site(). */ int site_id; int type; int local; /** Roles, like ACCEPTOR, PROPOSER, or LEARNER. Not really used ATM. */ int role; char addr_string[BOOTH_NAME_LEN]; int tcp_fd; int udp_fd; /* 0-based, used for indexing into per-ticket weights */ int index; uint64_t bitmask; unsigned short family; union { struct sockaddr_in sa4; struct sockaddr_in6 sa6; }; int saddrlen; int addrlen; /** statistics */ time_t last_recv; unsigned int sent_cnt; unsigned int sent_err_cnt; unsigned int resend_cnt; unsigned int recv_cnt; unsigned int recv_err_cnt; unsigned int sec_cnt; unsigned int invalid_cnt; /** last timestamp seen from this site */ uint32_t last_secs; uint32_t last_usecs; } __attribute__((packed)); extern struct booth_site *local; extern struct booth_site * no_leader; /** @} */ struct booth_transport; struct client { int fd; const struct booth_transport *transport; struct boothc_ticket_msg *msg; int offset; /* bytes read so far into msg */ void (*workfn)(int); void (*deadfn)(int); }; extern struct client *clients; extern struct pollfd *pollfds; int client_add(int fd, const struct booth_transport *tpt, void (*workfn)(int ci), void (*deadfn)(int ci)); int find_client_by_fd(int fd); void safe_copy(char *dest, char *value, size_t buflen, const char *description); int update_authkey(void); void list_peers(int fd); struct command_line { int type; /* ACT_ */ int op; /* OP_ */ int options; /* OPT_ */ char configfile[BOOTH_PATH_LEN]; char lockfile[BOOTH_PATH_LEN]; char site[BOOTH_NAME_LEN]; struct boothc_ticket_msg msg; struct boothc_attr_msg attr_msg; }; extern struct command_line cl; /* http://gcc.gnu.org/onlinedocs/gcc/Typeof.html */ #define min(a__,b__) \ ({ typeof (a__) _a = (a__); \ typeof (b__) _b = (b__); \ _a < _b ? _a : _b; }) #define max(a__,b__) \ ({ typeof (a__) _a = (a__); \ typeof (b__) _b = (b__); \ _a > _b ? _a : _b; }) #endif /* _BOOTH_H */ diff --git a/src/config.c b/src/config.c index d81719f..4f1be5b 100644 --- a/src/config.c +++ b/src/config.c @@ -1,970 +1,970 @@ /* * Copyright (C) 2011 Jiaju Zhang * Copyright (C) 2013-2014 Philipp Marek * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. + * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #include #include #include #include #include #include #include #include #include #include #include "b_config.h" #include "booth.h" #include "config.h" #include "raft.h" #include "ticket.h" #include "log.h" static int ticket_size = 0; static int ticket_realloc(void) { const int added = 5; int had, want; void *p; had = booth_conf->ticket_allocated; want = had + added; p = realloc(booth_conf->ticket, sizeof(struct ticket_config) * want); if (!p) { log_error("can't alloc more tickets"); return -ENOMEM; } booth_conf->ticket = p; memset(booth_conf->ticket + had, 0, sizeof(struct ticket_config) * added); booth_conf->ticket_allocated = want; return 0; } int add_site(char *address, int type); int add_site(char *addr_string, int type) { int rv; struct booth_site *site; uLong nid; uint32_t mask; int i; rv = 1; if (booth_conf->site_count == MAX_NODES) { log_error("too many nodes"); goto out; } if (strlen(addr_string)+1 >= sizeof(booth_conf->site[0].addr_string)) { log_error("site address \"%s\" too long", addr_string); goto out; } site = booth_conf->site + booth_conf->site_count; site->family = AF_INET; site->type = type; /* Make site_id start at a non-zero point. * Perhaps use hash over string or address? */ strcpy(site->addr_string, addr_string); site->index = booth_conf->site_count; site->bitmask = 1 << booth_conf->site_count; /* Catch site overflow */ assert(site->bitmask); booth_conf->all_bits |= site->bitmask; if (type == SITE) booth_conf->sites_bits |= site->bitmask; site->tcp_fd = -1; booth_conf->site_count++; rv = 0; memset(&site->sa6, 0, sizeof(site->sa6)); nid = crc32(0L, NULL, 0); /* Using the ASCII representation in site->addr_string (both sizeof() * and strlen()) gives quite a lot of collisions; a brute-force run * from 0.0.0.0 to 24.0.0.0 gives ~4% collisions, and this tends to * increase even more. * Whether there'll be a collision in real-life, with 3 or 5 nodes, is * another question ... but for now get the ID from the binary * representation - that had *no* collisions up to 32.0.0.0. */ if (inet_pton(AF_INET, site->addr_string, &site->sa4.sin_addr) > 0) { site->family = AF_INET; site->sa4.sin_family = site->family; site->sa4.sin_port = htons(booth_conf->port); site->saddrlen = sizeof(site->sa4); site->addrlen = sizeof(site->sa4.sin_addr); site->site_id = crc32(nid, (void*)&site->sa4.sin_addr, site->addrlen); } else if (inet_pton(AF_INET6, site->addr_string, &site->sa6.sin6_addr) > 0) { site->family = AF_INET6; site->sa6.sin6_family = site->family; site->sa6.sin6_flowinfo = 0; site->sa6.sin6_port = htons(booth_conf->port); site->saddrlen = sizeof(site->sa6); site->addrlen = sizeof(site->sa6.sin6_addr); site->site_id = crc32(nid, (void*)&site->sa6.sin6_addr, site->addrlen); } else { log_error("Address string \"%s\" is bad", site->addr_string); rv = EINVAL; } /* Make sure we will never collide with NO_ONE, * or be negative (to get "get_local_id() < 0" working). */ mask = 1 << (sizeof(site->site_id)*8 -1); assert(NO_ONE & mask); site->site_id &= ~mask; /* Test for collisions with other sites */ for(i=0; iindex; i++) if (booth_conf->site[i].site_id == site->site_id) { log_error("Got a site-ID collision. Please file a bug on https://github.com/ClusterLabs/booth/issues/new, attaching the configuration file."); exit(1); } out: return rv; } inline static char *skip_while_in(const char *cp, int (*fn)(int), const char *allowed) { /* strchr() returns a pointer to the terminator if *cp == 0. */ while (*cp && (fn(*cp) || strchr(allowed, *cp))) cp++; /* discard "const" qualifier */ return (char*)cp; } inline static char *skip_while(char *cp, int (*fn)(int)) { while (fn(*cp)) cp++; return cp; } inline static char *skip_until(char *cp, char expected) { while (*cp && *cp != expected) cp++; return cp; } static inline int is_end_of_line(char *cp) { char c = *cp; return c == '\n' || c == 0 || c == '#'; } static int add_ticket(const char *name, struct ticket_config **tkp, const struct ticket_config *def) { int rv; struct ticket_config *tk; if (booth_conf->ticket_count == booth_conf->ticket_allocated) { rv = ticket_realloc(); if (rv < 0) return rv; } tk = booth_conf->ticket + booth_conf->ticket_count; booth_conf->ticket_count++; if (!check_max_len_valid(name, sizeof(tk->name))) { log_error("ticket name \"%s\" too long.", name); return -EINVAL; } if (find_ticket_by_name(name, NULL)) { log_error("ticket name \"%s\" used again.", name); return -EINVAL; } if (* skip_while_in(name, isalnum, "-/")) { log_error("ticket name \"%s\" invalid; only alphanumeric names.", name); return -EINVAL; } strcpy(tk->name, name); tk->timeout = def->timeout; tk->term_duration = def->term_duration; tk->retries = def->retries; memcpy(tk->weight, def->weight, sizeof(tk->weight)); if (tkp) *tkp = tk; return 0; } static int postproc_ticket(struct ticket_config *tk) { if (!tk) return 1; if (!tk->renewal_freq) { tk->renewal_freq = tk->term_duration/2; } if (tk->timeout*(tk->retries+1) >= tk->renewal_freq) { log_error("%s: total amount of time to " "retry sending packets cannot exceed " "renewal frequency " "(%d*(%d+1) >= %d)", tk->name, tk->timeout, tk->retries, tk->renewal_freq); return 0; } return 1; } /* returns number of weights, or -1 on bad input. */ static int parse_weights(const char *input, int weights[MAX_NODES]) { int i, v; char *cp; for(i=0; i= MAX_ARGS) { log_error("too many arguments for the acquire-handler"); free(tk_test.path); return -1; } tk_test.argv[i++] = p; } while (p); return 0; } struct toktab grant_type[] = { { "auto", GRANT_AUTO}, { "manual", GRANT_MANUAL}, { NULL, 0}, }; struct toktab attr_op[] = { {"eq", ATTR_OP_EQ}, {"ne", ATTR_OP_NE}, {NULL, 0}, }; static int lookup_tokval(char *key, struct toktab *tab) { struct toktab *tp; for (tp = tab; tp->str; tp++) { if (!strcmp(tp->str, key)) return tp->val; } return 0; } /* attribute prerequisite */ static int parse_attr_prereq(char *val, struct ticket_config *tk) { struct attr_prereq *ap = NULL; char *p; ap = (struct attr_prereq *)calloc(1, sizeof(struct attr_prereq)); if (!ap) { log_error("out of memory"); return -1; } p = strtok(val, " \t"); if (!p) { log_error("not enough arguments to attr-prereq"); goto err_out; } ap->grant_type = lookup_tokval(p, grant_type); if (!ap->grant_type) { log_error("%s is not a grant type", p); goto err_out; } p = strtok(NULL, " \t"); if (!p) { log_error("not enough arguments to attr-prereq"); goto err_out; } if (!(ap->attr_name = strdup(p))) { log_error("out of memory"); goto err_out; } p = strtok(NULL, " \t"); if (!p) { log_error("not enough arguments to attr-prereq"); goto err_out; } ap->op = lookup_tokval(p, attr_op); if (!ap->op) { log_error("%s is not an attribute operation", p); goto err_out; } p = strtok(NULL, " \t"); if (!p) { log_error("not enough arguments to attr-prereq"); goto err_out; } if (!(ap->attr_val = strdup(p))) { log_error("out of memory"); goto err_out; } tk->attr_prereqs = g_list_append(tk->attr_prereqs, ap); if (!tk->attr_prereqs) { log_error("out of memory"); goto err_out; } return 0; err_out: if (ap) { if (ap->attr_val) free(ap->attr_val); if (ap->attr_name) free(ap->attr_name); free(ap); } return -1; } extern int poll_timeout; int read_config(const char *path, int type) { char line[1024]; FILE *fp; char *s, *key, *val, *end_of_key; const char *error; char *cp, *cp2; int i; int lineno = 0; int got_transport = 0; int min_timeout = 0; struct ticket_config defaults = { { 0 } }; struct ticket_config *current_tk = NULL; fp = fopen(path, "r"); if (!fp) { log_error("failed to open %s: %s", path, strerror(errno)); return -1; } booth_conf = malloc(sizeof(struct booth_config) + TICKET_ALLOC * sizeof(struct ticket_config)); if (!booth_conf) { log_error("failed to alloc memory for booth config"); return -ENOMEM; } memset(booth_conf, 0, sizeof(struct booth_config) + TICKET_ALLOC * sizeof(struct ticket_config)); ticket_size = TICKET_ALLOC; booth_conf->proto = UDP; booth_conf->port = BOOTH_DEFAULT_PORT; booth_conf->maxtimeskew = BOOTH_DEFAULT_MAX_TIME_SKEW; booth_conf->authkey[0] = '\0'; /* Provide safe defaults. -1 is reserved, though. */ booth_conf->uid = -2; booth_conf->gid = -2; strcpy(booth_conf->site_user, "hacluster"); strcpy(booth_conf->site_group, "haclient"); strcpy(booth_conf->arb_user, "nobody"); strcpy(booth_conf->arb_group, "nobody"); parse_weights("", defaults.weight); defaults.clu_test.path = NULL; defaults.clu_test.pid = 0; defaults.clu_test.status = 0; defaults.clu_test.progstate = EXTPROG_IDLE; defaults.term_duration = DEFAULT_TICKET_EXPIRY; defaults.timeout = DEFAULT_TICKET_TIMEOUT; defaults.retries = DEFAULT_RETRIES; defaults.acquire_after = 0; error = ""; log_debug("reading config file %s", path); while (fgets(line, sizeof(line), fp)) { lineno++; s = skip_while(line, isspace); if (is_end_of_line(s) || *s == '#') continue; key = s; /* Key */ end_of_key = skip_while_in(key, isalnum, "-_"); if (end_of_key == key) { error = "No key"; goto err; } if (!*end_of_key) goto exp_equal; /* whitespace, and something else but nothing more? */ s = skip_while(end_of_key, isspace); if (*s != '=') { exp_equal: error = "Expected '=' after key"; goto err; } s++; /* It's my buffer, and I terminate if I want to. */ /* But not earlier than that, because we had to check for = */ *end_of_key = 0; /* Value tokenizing */ s = skip_while(s, isspace); switch (*s) { case '"': case '\'': val = s+1; s = skip_until(val, *s); /* Terminate value */ if (!*s) { error = "Unterminated quoted string"; goto err; } /* Remove and skip quote */ *s = 0; s++; if (*(s = skip_while(s, isspace)) && *s != '#') { error = "Surplus data after value"; goto err; } *s = 0; break; case 0: no_value: error = "No value"; goto err; break; default: val = s; /* Rest of line. */ i = strlen(s); /* i > 0 because of "case 0" above. */ while (i > 0 && isspace(s[i-1])) i--; s += i; *s = 0; } if (val == s) goto no_value; if (strlen(key) > BOOTH_NAME_LEN || strlen(val) > BOOTH_NAME_LEN) { error = "key/value too long"; goto err; } if (strcmp(key, "transport") == 0) { if (got_transport) { error = "config file has multiple transport lines"; goto err; } if (strcasecmp(val, "UDP") == 0) booth_conf->proto = UDP; else if (strcasecmp(val, "SCTP") == 0) booth_conf->proto = SCTP; else { error = "invalid transport protocol"; goto err; } got_transport = 1; continue; } if (strcmp(key, "port") == 0) { booth_conf->port = atoi(val); continue; } if (strcmp(key, "name") == 0) { safe_copy(booth_conf->name, val, BOOTH_NAME_LEN, "name"); continue; } #if HAVE_LIBGCRYPT || HAVE_LIBMHASH if (strcmp(key, "authfile") == 0) { safe_copy(booth_conf->authfile, val, BOOTH_PATH_LEN, "authfile"); continue; } if (strcmp(key, "maxtimeskew") == 0) { booth_conf->maxtimeskew = atoi(val); continue; } #endif if (strcmp(key, "site") == 0) { if (add_site(val, SITE)) goto out; continue; } if (strcmp(key, "arbitrator") == 0) { if (add_site(val, ARBITRATOR)) goto out; continue; } if (strcmp(key, "site-user") == 0) { safe_copy(booth_conf->site_user, optarg, BOOTH_NAME_LEN, "site-user"); continue; } if (strcmp(key, "site-group") == 0) { safe_copy(booth_conf->site_group, optarg, BOOTH_NAME_LEN, "site-group"); continue; } if (strcmp(key, "arbitrator-user") == 0) { safe_copy(booth_conf->arb_user, optarg, BOOTH_NAME_LEN, "arbitrator-user"); continue; } if (strcmp(key, "arbitrator-group") == 0) { safe_copy(booth_conf->arb_group, optarg, BOOTH_NAME_LEN, "arbitrator-group"); continue; } if (strcmp(key, "debug") == 0) { if (type != CLIENT && type != GEOSTORE) debug_level = max(debug_level, atoi(val)); continue; } if (strcmp(key, "ticket") == 0) { if (current_tk && strcmp(current_tk->name, "__defaults__")) { if (!postproc_ticket(current_tk)) { goto out; } } if (!strcmp(val, "__defaults__")) { current_tk = &defaults; } else if (add_ticket(val, ¤t_tk, &defaults)) { goto out; } continue; } /* current_tk must be allocated at this point, otherwise * we don't know to which ticket the key refers */ if (!current_tk) { error = "Unexpected keyword"; goto err; } if (strcmp(key, "expire") == 0) { current_tk->term_duration = read_time(val); if (current_tk->term_duration <= 0) { error = "Expected time >0 for expire"; goto err; } continue; } if (strcmp(key, "timeout") == 0) { current_tk->timeout = read_time(val); if (current_tk->timeout <= 0) { error = "Expected time >0 for timeout"; goto err; } if (!min_timeout) { min_timeout = current_tk->timeout; } else { min_timeout = min(min_timeout, current_tk->timeout); } continue; } if (strcmp(key, "retries") == 0) { current_tk->retries = strtol(val, &s, 0); if (*s || s == val || current_tk->retries<3 || current_tk->retries > 100) { error = "Expected plain integer value in the range [3, 100] for retries"; goto err; } continue; } if (strcmp(key, "renewal-freq") == 0) { current_tk->renewal_freq = read_time(val); if (current_tk->renewal_freq <= 0) { error = "Expected time >0 for renewal-freq"; goto err; } continue; } if (strcmp(key, "acquire-after") == 0) { current_tk->acquire_after = read_time(val); if (current_tk->acquire_after < 0) { error = "Expected time >=0 for acquire-after"; goto err; } continue; } if (strcmp(key, "before-acquire-handler") == 0) { if (parse_extprog(val, current_tk)) { goto err; } continue; } if (strcmp(key, "attr-prereq") == 0) { if (parse_attr_prereq(val, current_tk)) { goto err; } continue; } if (strcmp(key, "weights") == 0) { if (parse_weights(val, current_tk->weight) < 0) goto out; continue; } error = "Unknown keyword"; goto err; } if ((booth_conf->site_count % 2) == 0) { log_warn("Odd number of nodes is strongly recommended!"); } /* Default: make config name match config filename. */ if (!booth_conf->name[0]) { cp = strrchr(path, '/'); cp = cp ? cp+1 : (char *)path; cp2 = strrchr(cp, '.'); if (!cp2) cp2 = cp + strlen(cp); if (cp2-cp >= BOOTH_NAME_LEN) { log_error("booth config file name too long"); goto err; } strncpy(booth_conf->name, cp, cp2-cp); *(booth_conf->name+(cp2-cp)) = '\0'; } if (!postproc_ticket(current_tk)) { goto out; } poll_timeout = min(POLL_TIMEOUT, min_timeout/10); if (!poll_timeout) poll_timeout = POLL_TIMEOUT; return 0; err: out: log_error("%s in config file line %d", error, lineno); free(booth_conf); booth_conf = NULL; return -1; } int check_config(int type) { struct passwd *pw; struct group *gr; char *cp, *input; if (!booth_conf) return -1; input = (type == ARBITRATOR) ? booth_conf->arb_user : booth_conf->site_user; if (!*input) goto u_inval; if (isdigit(input[0])) { booth_conf->uid = strtol(input, &cp, 0); if (*cp != 0) { u_inval: log_error("User \"%s\" cannot be resolved into a UID.", input); return ENOENT; } } else { pw = getpwnam(input); if (!pw) goto u_inval; booth_conf->uid = pw->pw_uid; } input = (type == ARBITRATOR) ? booth_conf->arb_group : booth_conf->site_group; if (!*input) goto g_inval; if (isdigit(input[0])) { booth_conf->gid = strtol(input, &cp, 0); if (*cp != 0) { g_inval: log_error("Group \"%s\" cannot be resolved into a UID.", input); return ENOENT; } } else { gr = getgrnam(input); if (!gr) goto g_inval; booth_conf->gid = gr->gr_gid; } return 0; } static int get_other_site(struct booth_site **node) { struct booth_site *n; int i; *node = NULL; if (!booth_conf) return 0; for (i = 0; i < booth_conf->site_count; i++) { n = booth_conf->site + i; if (n != local && n->type == SITE) { if (!*node) { *node = n; } else { return 0; } } } return !*node ? 0 : 1; } int find_site_by_name(unsigned char *site, struct booth_site **node, int any_type) { struct booth_site *n; int i; if (!booth_conf) return 0; if (!strcmp(site, OTHER_SITE)) return get_other_site(node); for (i = 0; i < booth_conf->site_count; i++) { n = booth_conf->site + i; if ((n->type == SITE || any_type) && strcmp(n->addr_string, site) == 0) { *node = n; return 1; } } return 0; } int find_site_by_id(uint32_t site_id, struct booth_site **node) { struct booth_site *n; int i; if (site_id == NO_ONE) { *node = no_leader; return 1; } if (!booth_conf) return 0; for (i = 0; i < booth_conf->site_count; i++) { n = booth_conf->site + i; if (n->site_id == site_id) { *node = n; return 1; } } return 0; } const char *type_to_string(int type) { switch (type) { case ARBITRATOR: return "arbitrator"; case SITE: return "site"; case CLIENT: return "client"; case GEOSTORE: return "attr"; } return "??invalid-type??"; } diff --git a/src/config.h b/src/config.h index b6ae377..eea13a3 100644 --- a/src/config.h +++ b/src/config.h @@ -1,305 +1,305 @@ /* * Copyright (C) 2011 Jiaju Zhang * Copyright (C) 2013-2014 Philipp Marek * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. + * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #ifndef _CONFIG_H #define _CONFIG_H #include #include #include "booth.h" #include "timer.h" #include "raft.h" #include "transport.h" /** @{ */ /** Definitions for in-RAM data. */ #define MAX_NODES 16 #define MAX_ARGS 16 #define TICKET_ALLOC 16 #define OTHER_SITE "other" typedef enum { EXTPROG_IDLE, EXTPROG_RUNNING, EXTPROG_EXITED, EXTPROG_IGNORE, } extprog_state_e; #define tk_test tk->clu_test typedef enum { ATTR_OP_EQ = 1, ATTR_OP_NE, } attr_op_e; typedef enum { GRANT_AUTO = 1, GRANT_MANUAL, } grant_type_e; struct toktab { const char *str; int val; }; struct attr_prereq { grant_type_e grant_type; /* grant type */ attr_op_e op; /* attribute operation */ char *attr_name; char *attr_val; }; struct ticket_config { /** \name Configuration items. * @{ */ /** Name of ticket. */ boothc_ticket name; /** How long a term lasts if not refreshed (in ms) */ int term_duration; /** Network related timeouts (in ms) */ int timeout; /** Retries before giving up. */ int retries; /** If >0, time to wait for a site to get fenced. * The ticket may be acquired after that timespan by * another site. */ int acquire_after; /* How often to renew the ticket (in ms) */ int renewal_freq; /* Program to ask whether it makes sense to * acquire the ticket */ struct clu_test { char *path; int is_dir; char *argv[MAX_ARGS]; pid_t pid; int status; /* child exit status */ extprog_state_e progstate; /* program running/idle/waited on */ } clu_test; /** Node weights. */ int weight[MAX_NODES]; /** @} */ /** \name Runtime values. * @{ */ /** Current state. */ server_state_e state; /** Next state. Used at startup. */ server_state_e next_state; /** When something has to be done */ timetype next_cron; /** Current leader. This is effectively the log[] in Raft. */ struct booth_site *leader; /** Leader that got lost. */ struct booth_site *lost_leader; /** Is the ticket granted? */ int is_granted; /** Timestamp of leadership expiration */ timetype term_expires; /** End of election period */ timetype election_end; struct booth_site *voted_for; /** Who the various sites vote for. * NO_OWNER = no vote yet. */ struct booth_site *votes_for[MAX_NODES]; /* bitmap */ uint64_t votes_received; /** Last voting round that was seen. */ uint32_t current_term; /** Do ticket updates whenever we get enough heartbeats. * But do that only once. * This is reset to 0 whenever we broadcast heartbeat and set * to 1 once enough acks are received. * Increased to 2 when the ticket is commited to the CIB (see * delay_commit). */ uint32_t ticket_updated; /** Outcome of whatever ticket request was processed. * Can also be an intermediate stage. */ uint32_t outcome; /** @} */ /** */ uint32_t last_applied; uint32_t next_index[MAX_NODES]; uint32_t match_index[MAX_NODES]; /* Why did we start the elections? */ cmd_reason_t election_reason; /* if it is potentially dangerous to grant the ticket * immediately, then this is set to some point in time, * usually (now + term_duration + acquire_after) */ timetype delay_commit; /* the last request RPC we sent */ uint32_t last_request; /* if we expect some acks, then set this to the id of * the RPC which others will send us; it is cleared once all * replies were received */ uint32_t acks_expected; /* bitmask of servers which sent acks */ uint64_t acks_received; /* timestamp of the request */ timetype req_sent_at; /* we need to wait for MY_INDEX from other servers, * hold the ticket processing for a while until they reply */ int start_postpone; /** Last renewal time */ timetype last_renewal; /* Do we need to update the copy in the CIB? * Normally, the ticket is written only when it changes via * the UPDATE RPC (for followers) and on expiration update * (for leaders) */ int update_cib; /* Is this ticket in election? */ int in_election; /* don't log warnings unnecessarily */ int expect_more_rejects; /** \name Needed while proposals are being done. * @{ */ /* Need to keep the previous valid ticket in case we moved to * start new elections and another server asks for the ticket * status. It would be wrong to send our candidate ticket. */ struct ticket_config *last_valid_tk; /** Attributes, user defined */ GHashTable *attr; /** Attribute prerequisites */ GList *attr_prereqs; /** Whom to vote for the next time. * Needed to push a ticket to someone else. */ #if 0 /** Bitmap of sites that acknowledge that state. */ uint64_t proposal_acknowledges; /** When an incompletely acknowledged proposal gets done. * If all peers agree, that happens sooner. * See switch_state_to(). */ struct timeval proposal_switch; /** Timestamp of proposal expiration. */ time_t proposal_expires; #endif /** Number of send retries left. * Used on the new owner. * Starts at 0, counts up. */ int retry_number; /** @} */ }; struct booth_config { char name[BOOTH_NAME_LEN]; /** File containing the authentication file. */ char authfile[BOOTH_PATH_LEN]; struct stat authstat; unsigned char authkey[BOOTH_MAX_KEY_LEN]; int authkey_len; /** Maximum time skew between peers allowed */ int maxtimeskew; transport_layer_t proto; uint16_t port; /** Stores the OR of sites bitmasks. */ uint64_t sites_bits; /** Stores the OR of all members' bitmasks. */ uint64_t all_bits; char site_user[BOOTH_NAME_LEN]; char site_group[BOOTH_NAME_LEN]; char arb_user[BOOTH_NAME_LEN]; char arb_group[BOOTH_NAME_LEN]; uid_t uid; gid_t gid; int site_count; struct booth_site site[MAX_NODES]; int ticket_count; int ticket_allocated; struct ticket_config *ticket; }; extern struct booth_config *booth_conf; #define is_auth_req() (booth_conf->authkey[0] != '\0') int read_config(const char *path, int type); int check_config(int type); int find_site_by_name(unsigned char *site, struct booth_site **node, int any_type); int find_site_by_id(uint32_t site_id, struct booth_site **node); const char *type_to_string(int type); #endif /* _CONFIG_H */ diff --git a/src/handler.c b/src/handler.c index 282f6d7..381975c 100644 --- a/src/handler.c +++ b/src/handler.c @@ -1,270 +1,270 @@ /* * Copyright (C) 2014 Philipp Marek * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. + * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #include #include #include #include #include #include #include #include #include #include #include "ticket.h" #include "config.h" #include "inline-fn.h" #include "log.h" #include "pacemaker.h" #include "booth.h" #include "handler.h" static int set_booth_env(struct ticket_config *tk) { int rv; char expires[16]; sprintf(expires, "%" PRId64, (int64_t)wall_ts(&tk->term_expires)); rv = setenv("BOOTH_TICKET", tk->name, 1) || setenv("BOOTH_LOCAL", local->addr_string, 1) || setenv("BOOTH_CONF_NAME", booth_conf->name, 1) || setenv("BOOTH_CONF_PATH", cl.configfile, 1) || setenv("BOOTH_TICKET_EXPIRES", expires, 1); if (rv) { log_error("Cannot set environment: %s", strerror(errno)); } return rv; } static void closefiles(void) { int fd; /* close all descriptors except stdin/out/err */ for (fd = getdtablesize() - 1; fd > STDERR_FILENO; fd--) { close(fd); } } static void run_ext_prog(struct ticket_config *tk, char *prog) { if (set_booth_env(tk)) { _exit(1); } closefiles(); /* don't leak open files */ tk_log_debug("running handler %s", prog); execv(prog, tk_test.argv); tk_log_error("%s: execv failed (%s)", prog, strerror(errno)); _exit(1); } static int prog_filter(const struct dirent *dp) { return (*dp->d_name != '.'); } static pid_t curr_pid; static int ignore_status; static int test_exit_status(struct ticket_config *tk, char *prog, int status, int log_msg) { int rv = -1; if (WIFEXITED(status)) { rv = WEXITSTATUS(status); } else if (WIFSIGNALED(status)) { rv = 128 + WTERMSIG(status); } if (rv) { if (log_msg) { tk_log_warn("handler \"%s\" failed: %s", prog, interpret_rv(status)); tk_log_warn("we are not allowed to acquire ticket"); } } else { tk_log_debug("handler \"%s\" exited with success", prog); } return rv; } static void reset_test_state(struct ticket_config *tk) { tk_test.pid = 0; tk_test.progstate = EXTPROG_IDLE; } int tk_test_exit_status(struct ticket_config *tk) { int rv; rv = test_exit_status(tk, tk_test.path, tk_test.status, !tk_test.is_dir); reset_test_state(tk); return rv; } void wait_child(int sig) { int i, status; struct ticket_config *tk; /* use waitpid(2) and not wait(2) in order not to interfear * with popen(2)/pclose(2) and system(2) used in pacemaker.c */ foreach_ticket(i, tk) { if (tk_test.path && tk_test.pid >= 0 && (tk_test.progstate == EXTPROG_RUNNING || tk_test.progstate == EXTPROG_IGNORE) && waitpid(tk_test.pid, &status, WNOHANG) == tk_test.pid) { if (tk_test.progstate == EXTPROG_IGNORE) { /* not interested in the outcome */ reset_test_state(tk); } else { tk_test.status = status; tk_test.progstate = EXTPROG_EXITED; } } } } /* the parent may want to have us stop processing scripts, say * when the ticket gets revoked */ static void ignore_rest(int sig) { signal(SIGTERM, SIG_IGN); log_info("external programs handler caught TERM, ignoring status of external test programs"); ignore_status = 1; if (curr_pid > 0) { (void)kill(curr_pid, SIGTERM); } } void ext_prog_timeout(struct ticket_config *tk) { tk_log_warn("handler timed out"); } int is_ext_prog_running(struct ticket_config *tk) { if (!tk_test.path) return 0; return (tk_test.pid > 0 && tk_test.progstate == EXTPROG_RUNNING); } void ignore_ext_test(struct ticket_config *tk) { if (is_ext_prog_running(tk)) { (void)kill(tk_test.pid, SIGTERM); tk_test.progstate = EXTPROG_IGNORE; } } static void process_ext_dir(struct ticket_config *tk) { char prog[FILENAME_MAX+1]; int rv, n_progs, i, status; struct dirent **proglist, *dp; signal(SIGTERM, (__sighandler_t)ignore_rest); signal(SIGCHLD, SIG_DFL); signal(SIGUSR1, SIG_DFL); signal(SIGINT, SIG_DFL); tk_log_debug("running programs in directory %s", tk_test.path); n_progs = scandir(tk_test.path, &proglist, prog_filter, alphasort); if (n_progs == -1) { tk_log_error("%s: scandir failed (%s)", tk_test.path, strerror(errno)); _exit(1); } for (i = 0; i < n_progs; i++) { if (ignore_status) break; dp = proglist[i]; if (strlen(dp->d_name) + strlen(tk_test.path) + 1 > FILENAME_MAX) { tk_log_error("%s: name exceeds max length (%s)", tk_test.path, dp->d_name); _exit(1); } strcpy(prog, tk_test.path); strcat(prog, "/"); strcat(prog, dp->d_name); switch(curr_pid=fork()) { case -1: log_error("fork: %s", strerror(errno)); _exit(1); case 0: /* child */ run_ext_prog(tk, prog); default: /* parent */ while (waitpid(curr_pid, &status, 0) != curr_pid) ; curr_pid = 0; if (!ignore_status) { rv = test_exit_status(tk, prog, status, 1); if (rv) _exit(rv); } } } _exit(0); } /* run some external program * return codes: * RUNCMD_ERR: executing program failed (or some other failure) * RUNCMD_MORE: program forked, results later */ int run_handler(struct ticket_config *tk) { int rv = 0; pid_t pid; struct stat stbuf; if (!tk_test.path) return 0; if (stat(tk_test.path, &stbuf)) { tk_log_error("%s: stat failed (%s)", tk_test.path, strerror(errno)); return RUNCMD_ERR; } tk_test.is_dir = (stbuf.st_mode & S_IFDIR); switch(pid=fork()) { case -1: log_error("fork: %s", strerror(errno)); return RUNCMD_ERR; case 0: /* child */ if (tk_test.is_dir) { process_ext_dir(tk); } else { run_ext_prog(tk, tk_test.path); } default: /* parent */ tk_test.pid = pid; tk_test.progstate = EXTPROG_RUNNING; rv = RUNCMD_MORE; /* program runs */ } return rv; } diff --git a/src/handler.h b/src/handler.h index 43ca93d..60101e2 100644 --- a/src/handler.h +++ b/src/handler.h @@ -1,35 +1,35 @@ /* * Copyright (C) 2014 Philipp Marek * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. + * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #ifndef _HANDLER_H #define _HANDLER_H enum { RUNCMD_ERR = -1, RUNCMD_MORE = -2, }; int run_handler(struct ticket_config *tk); int tk_test_exit_status(struct ticket_config *tk); void ignore_ext_test(struct ticket_config *tk); int is_ext_prog_running(struct ticket_config *tk); void ext_prog_timeout(struct ticket_config *tk); void wait_child(int sig); #endif diff --git a/src/inline-fn.h b/src/inline-fn.h index 77958a5..f756522 100644 --- a/src/inline-fn.h +++ b/src/inline-fn.h @@ -1,312 +1,312 @@ /* * Copyright (C) 2013-2014 Philipp Marek * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. + * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #ifndef _INLINE_FN_H #define _INLINE_FN_H #include #include #include #include #include "timer.h" #include "config.h" #include "transport.h" inline static uint32_t get_local_id(void) { return local ? local->site_id : -1; } inline static uint32_t get_node_id(struct booth_site *node) { return node ? node->site_id : 0; } /** Returns number of seconds left, if any. */ inline static int term_time_left(struct ticket_config *tk) { int left = 0; if (is_time_set(&tk->term_expires)) { left = time_left(&tk->term_expires); } return (left < 0) ? 0 : left; } inline static int leader_and_valid(struct ticket_config *tk) { if (tk->leader != local) return 0; return term_time_left(tk); } /** Is this some leader? */ inline static int is_owned(const struct ticket_config *tk) { return (tk->leader && tk->leader != no_leader); } inline static int is_resend(struct ticket_config *tk) { timetype now; get_time(&now); return time_sub_int(&now, &tk->req_sent_at) >= tk->timeout; } static inline void init_header_bare(struct boothc_header *h) { timetype now; assert(local && local->site_id); h->magic = htonl(BOOTHC_MAGIC); h->version = htonl(BOOTHC_VERSION); h->from = htonl(local->site_id); if (is_auth_req()) { get_time(&now); h->opts = htonl(BOOTH_OPT_AUTH); h->secs = htonl(secs_since_epoch(&now)); h->usecs = htonl(get_usecs(&now)); } else { h->opts = htonl(0); h->secs = htonl(0); h->usecs = htonl(0); } } /* get the _real_ message length out of the header */ #define sendmsglen(msg) ntohl((msg)->header.length) static inline void init_header(struct boothc_header *h, int cmd, int request, int options, int result, int reason, int data_len) { init_header_bare(h); h->length = htonl(data_len - (is_auth_req() ? 0 : sizeof(struct hmac))); h->cmd = htonl(cmd); h->request = htonl(request); h->options = htonl(options); h->result = htonl(result); h->reason = htonl(reason); } #define my_last_term(tk) \ (((tk)->state == ST_CANDIDATE && (tk)->last_valid_tk) ? \ (tk)->last_valid_tk->current_term : (tk)->current_term) extern int TIME_RES, TIME_MULT; #define msg_term_time(msg) \ ntohl((msg)->ticket.term_valid_for)*TIME_RES/TIME_MULT #define set_msg_term_time(msg, tk) \ (msg)->ticket.term_valid_for = htonl(term_time_left(tk)*TIME_MULT/TIME_RES) static inline void init_ticket_msg(struct boothc_ticket_msg *msg, int cmd, int request, int rv, int reason, struct ticket_config *tk) { assert(sizeof(msg->ticket.id) == sizeof(tk->name)); init_header(&msg->header, cmd, request, 0, rv, reason, sizeof(*msg)); if (!tk) { memset(&msg->ticket, 0, sizeof(msg->ticket)); } else { memcpy(msg->ticket.id, tk->name, sizeof(msg->ticket.id)); msg->ticket.leader = htonl(get_node_id( (tk->leader && tk->leader != no_leader) ? tk->leader : (tk->voted_for ? tk->voted_for : no_leader))); msg->ticket.term = htonl(tk->current_term); set_msg_term_time(msg, tk); } } static inline void init_attr_msg(struct boothc_attr_msg *msg, int cmd, int request, int rv, int reason, struct ticket_config *tk, char *attr_name, struct geo_attr *attr) { assert(tk); assert(attr); init_header(&msg->header, cmd, request, 0, rv, reason, sizeof(*msg)); if (!tk) { memset(&msg->attr.tkt_id, 0, sizeof(msg->attr.tkt_id)); } else { memcpy(msg->attr.tkt_id, tk->name, sizeof(msg->attr.tkt_id)); } memcpy(msg->attr.name, attr_name, sizeof(msg->attr.name)); memcpy(msg->attr.val, attr->val, sizeof(msg->attr.val)); } static inline struct booth_transport const *transport(void) { return booth_transport + booth_conf->proto; } static inline const char *site_string(struct booth_site *site) { return site ? site->addr_string : "NONE"; } static inline const char *ticket_leader_string(struct ticket_config *tk) { return site_string(tk->leader); } /* We allow half of the uint32_t to be used; * half of that below, half of that above the current known "good" value. * 0 UINT32_MAX * |--------------------------+----------------+------------| * | | | * |--------+-------| allowed range * | * current commit index * * So, on overflow it looks like that: * UINT32_MAX 0 * |--------------------------+-----------||---+------------| * | | | * |--------+-------| allowed range * | * current commit index * * This should be possible by using the same datatype and relying * on the under/overflow semantics. * * * Having 30 bits available, and assuming an expire time of * one minute and a (high) commit index step of 64 == 2^6 (because * of weights), we get 2^24 minutes of range - which is ~750 * years. "Should be enough for everybody." */ static inline int index_is_higher_than(uint32_t c_high, uint32_t c_low) { uint32_t diff; if (c_high == c_low) return 0; diff = c_high - c_low; if (diff < UINT32_MAX/4) return 1; diff = c_low - c_high; if (diff < UINT32_MAX/4) return 0; assert(!"commit index out of range - invalid"); } static inline uint32_t index_max2(uint32_t a, uint32_t b) { return index_is_higher_than(a, b) ? a : b; } static inline uint32_t index_max3(uint32_t a, uint32_t b, uint32_t c) { return index_max2( index_max2(a, b), c); } /* only invoked when ticket leader */ static inline void get_next_election_time(struct ticket_config *tk, timetype *next) { assert(tk->leader == local); /* if last_renewal is not set, which is unusual, it may mean * that the ticket never got updated, i.e. nobody acked * ticket updates (say, due to a temporary connection * problem) * we may try a bit later again */ if (!is_time_set(&tk->last_renewal)) { time_reset(next); } else { interval_add(&tk->last_renewal, tk->renewal_freq, next); } /* if delay_commit is earlier than next, then set next to * delay_commit */ if (is_time_set(&tk->delay_commit) && time_cmp(next, &tk->delay_commit, >)) { copy_time(&tk->delay_commit, next); } } static inline void expect_replies(struct ticket_config *tk, int reply_type) { tk->retry_number = 0; tk->acks_expected = reply_type; tk->acks_received = local->bitmask; get_time(&tk->req_sent_at); } static inline void no_resends(struct ticket_config *tk) { tk->retry_number = 0; tk->acks_expected = 0; } static inline struct booth_site *my_vote(struct ticket_config *tk) { return tk->votes_for[ local->index ]; } static inline int count_bits(uint64_t val) { return __builtin_popcount(val); } static inline int majority_of_bits(struct ticket_config *tk, uint64_t val) { /* Use ">" to get majority decision, even for an even number * of participants. */ return count_bits(val) * 2 > booth_conf->site_count; } static inline int all_replied(struct ticket_config *tk) { return !(tk->acks_received ^ booth_conf->all_bits); } static inline int all_sites_replied(struct ticket_config *tk) { return !((tk->acks_received & booth_conf->sites_bits) ^ booth_conf->sites_bits); } #endif diff --git a/src/log.h b/src/log.h index 5891c58..b72e85a 100644 --- a/src/log.h +++ b/src/log.h @@ -1,55 +1,55 @@ /* * Copyright (C) 2010-2011 Red Hat, Inc. All rights reserved. * (This code is borrowed from the sanlock project which is hosted on * fedorahosted.org.) * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. + * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #ifndef _LOG_H #define _LOG_H #include #include #include "inline-fn.h" #define log_debug(fmt, args...) do { \ if (ANYDEBUG) cl_log(LOG_DEBUG, fmt, ##args); } \ while (0) #define log_info(fmt, args...) cl_log(LOG_INFO, fmt, ##args) #define log_warn(fmt, args...) cl_log(LOG_WARNING, fmt, ##args) #define log_error(fmt, args...) cl_log(LOG_ERR, fmt, ##args) /* all tk_* macros prepend "%(tk->name): " (the caller needs to * have the ticket named tk!) */ #define tk_cl_log(sev, fmt, args...) \ cl_log(sev, "%s (%s/%d/%d): " fmt, \ tk->name, state_to_string(tk->state), tk->current_term, term_time_left(tk), \ ##args) #define tk_cl_log_src(sev, fmt, args...) \ cl_log(sev, "%s:%d: %s (%s/%d/%d): " fmt, \ __FUNCTION__, __LINE__, \ tk->name, state_to_string(tk->state), tk->current_term, term_time_left(tk), \ ##args) #define tk_log_debug(fmt, args...) do { \ if (ANYDEBUG) tk_cl_log_src(LOG_DEBUG, fmt, ##args); } \ while (0) #define tk_log_info(fmt, args...) tk_cl_log(LOG_INFO, fmt, ##args) #define tk_log_warn(fmt, args...) tk_cl_log(LOG_WARNING, fmt, ##args) #define tk_log_error(fmt, args...) tk_cl_log(LOG_ERR, fmt, ##args) #endif /* _LOG_H */ diff --git a/src/main.c b/src/main.c index b919c73..406677d 100644 --- a/src/main.c +++ b/src/main.c @@ -1,1588 +1,1588 @@ /* * Copyright (C) 2011 Jiaju Zhang * Copyright (C) 2013-2014 Philipp Marek * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. + * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "b_config.h" #include "log.h" #include "booth.h" #include "config.h" #include "transport.h" #include "inline-fn.h" #include "pacemaker.h" #include "ticket.h" #include "request.h" #include "attr.h" #include "handler.h" #define RELEASE_VERSION "1.0" #define RELEASE_STR RELEASE_VERSION " (build " BOOTH_BUILD_VERSION ")" #define CLIENT_NALLOC 32 int daemonize = 0; int enable_stderr = 0; timetype start_time; /** Structure for "clients". * Filehandles with incoming data get registered here (and in pollfds), * along with their callbacks. * Because these can be reallocated with every new fd, addressing * happens _only_ by their numeric index. */ struct client *clients = NULL; struct pollfd *pollfds = NULL; static int client_maxi; static int client_size = 0; static const struct booth_site _no_leader = { .addr_string = "none", .site_id = NO_ONE, }; struct booth_site *no_leader = (struct booth_site*)& _no_leader; typedef enum { BOOTHD_STARTED=0, BOOTHD_STARTING } BOOTH_DAEMON_STATE; int poll_timeout; struct booth_config *booth_conf; struct command_line cl; static void client_alloc(void) { int i; if (!clients) { clients = malloc(CLIENT_NALLOC * sizeof(struct client)); pollfds = malloc(CLIENT_NALLOC * sizeof(struct pollfd)); } else { clients = realloc(clients, (client_size + CLIENT_NALLOC) * sizeof(struct client)); pollfds = realloc(pollfds, (client_size + CLIENT_NALLOC) * sizeof(struct pollfd)); } if (!clients || !pollfds) { log_error("can't alloc for client array"); exit(1); } for (i = client_size; i < client_size + CLIENT_NALLOC; i++) { clients[i].workfn = NULL; clients[i].deadfn = NULL; clients[i].fd = -1; pollfds[i].fd = -1; pollfds[i].revents = 0; } client_size += CLIENT_NALLOC; } static void client_dead(int ci) { struct client *c = clients + ci; if (c->fd != -1) { log_debug("removing client %d", c->fd); close(c->fd); } c->fd = -1; c->workfn = NULL; if (c->msg) { free(c->msg); c->msg = NULL; c->offset = 0; } pollfds[ci].fd = -1; } int client_add(int fd, const struct booth_transport *tpt, void (*workfn)(int ci), void (*deadfn)(int ci)) { int i; struct client *c; if (client_size - 1 <= client_maxi ) { client_alloc(); } for (i = 0; i < client_size; i++) { c = clients + i; if (c->fd != -1) continue; c->workfn = workfn; if (deadfn) c->deadfn = deadfn; else c->deadfn = client_dead; c->transport = tpt; c->fd = fd; c->msg = NULL; c->offset = 0; pollfds[i].fd = fd; pollfds[i].events = POLLIN; if (i > client_maxi) client_maxi = i; return i; } assert(!"no client"); } int find_client_by_fd(int fd) { int i; if (fd < 0) return -1; for (i = 0; i <= client_maxi; i++) { if (clients[i].fd == fd) return i; } return -1; } static int format_peers(char **pdata, unsigned int *len) { struct booth_site *s; char *data, *cp; char time_str[64]; int i, alloc; *pdata = NULL; *len = 0; alloc = booth_conf->site_count * (BOOTH_NAME_LEN + 256); data = malloc(alloc); if (!data) return -ENOMEM; cp = data; foreach_node(i, s) { if (s == local) continue; strftime(time_str, sizeof(time_str), "%F %T", localtime(&s->last_recv)); cp += snprintf(cp, alloc - (cp - data), "%-12s %s, last recv: %s\n", type_to_string(s->type), s->addr_string, time_str); cp += snprintf(cp, alloc - (cp - data), "\tSent pkts:%u error:%u resends:%u\n", s->sent_cnt, s->sent_err_cnt, s->resend_cnt); cp += snprintf(cp, alloc - (cp - data), "\tRecv pkts:%u error:%u authfail:%u invalid:%u\n\n", s->recv_cnt, s->recv_err_cnt, s->sec_cnt, s->invalid_cnt); if (alloc - (cp - data) <= 0) return -ENOMEM; } *pdata = data; *len = cp - data; return 0; } void list_peers(int fd) { char *data; int olen; struct boothc_hdr_msg hdr; if (format_peers(&data, &olen) < 0) goto out; init_header(&hdr.header, CL_LIST, 0, 0, RLT_SUCCESS, 0, sizeof(hdr) + olen); (void)send_header_plus(fd, &hdr, data, olen); out: if (data) free(data); } /* trim trailing spaces if the key is ascii */ static void trim_key() { unsigned char *p; int i; for (i=0, p=booth_conf->authkey; i < booth_conf->authkey_len; i++, p++) if (!isascii(*p)) return; p = booth_conf->authkey; while (booth_conf->authkey_len > 0 && isspace(*p)) { p++; booth_conf->authkey_len--; } memmove(booth_conf->authkey, p, booth_conf->authkey_len); p = booth_conf->authkey + booth_conf->authkey_len - 1; while (booth_conf->authkey_len > 0 && isspace(*p)) { booth_conf->authkey_len--; p--; } } static int read_authkey() { int fd; booth_conf->authkey[0] = '\0'; if (stat(booth_conf->authfile, &booth_conf->authstat) < 0) { log_error("cannot stat authentication file %s: %s", booth_conf->authfile, strerror(errno)); return -1; } if (booth_conf->authstat.st_mode & (S_IRGRP | S_IROTH)) { log_error("%s: file can be readable only for the owner", booth_conf->authfile); return -1; } fd = open(booth_conf->authfile, O_RDONLY); if (fd < 0) { log_error("cannot open %s: %s", booth_conf->authfile, strerror(errno)); return -1; } booth_conf->authkey_len = read(fd, booth_conf->authkey, BOOTH_MAX_KEY_LEN); close(fd); trim_key(); log_debug("read key of size %d in authfile %s", booth_conf->authkey_len, booth_conf->authfile); /* make sure that the key is of minimum length */ return (booth_conf->authkey_len >= BOOTH_MIN_KEY_LEN) ? 0 : -1; } int update_authkey() { struct stat statbuf; if (stat(booth_conf->authfile, &statbuf) < 0) { log_error("cannot stat authentication file %s: %s", booth_conf->authfile, strerror(errno)); return -1; } if (statbuf.st_mtime > booth_conf->authstat.st_mtime) { return read_authkey(); } return 0; } static int setup_config(int type) { int rv; rv = read_config(cl.configfile, type); if (rv < 0) goto out; if (booth_conf->authfile[0] != '\0') { rv = read_authkey(); if (rv < 0) goto out; } /* Set "local" pointer, ignoring errors. */ if (cl.type == DAEMON && cl.site[0]) { if (!find_site_by_name(cl.site, &local, 1)) { log_error("Cannot find \"%s\" in the configuration.", cl.site); return -EINVAL; } local->local = 1; } else find_myself(NULL, type == CLIENT || type == GEOSTORE); rv = check_config(type); if (rv < 0) goto out; /* Per default the PID file name is derived from the * configuration name. */ if (!cl.lockfile[0]) { snprintf(cl.lockfile, sizeof(cl.lockfile)-1, "%s/%s.pid", BOOTH_RUN_DIR, booth_conf->name); } out: return rv; } static int setup_transport(void) { int rv; rv = transport()->init(message_recv); if (rv < 0) { log_error("failed to init booth_transport %s", transport()->name); goto out; } rv = booth_transport[TCP].init(NULL); if (rv < 0) { log_error("failed to init booth_transport[TCP]"); goto out; } out: return rv; } static int write_daemon_state(int fd, int state) { char buffer[1024]; int rv, size; size = sizeof(buffer) - 1; rv = snprintf(buffer, size, "booth_pid=%d " "booth_state=%s " "booth_type=%s " "booth_cfg_name='%s' " "booth_id=%d " "booth_addr_string='%s' " "booth_port=%d\n", getpid(), ( state == BOOTHD_STARTED ? "started" : state == BOOTHD_STARTING ? "starting" : "invalid"), type_to_string(local->type), booth_conf->name, local->site_id, local->addr_string, booth_conf->port); if (rv < 0 || rv == size) { log_error("Buffer filled up in write_daemon_state()."); return -1; } size = rv; rv = ftruncate(fd, 0); if (rv < 0) { log_error("lockfile %s truncate error %d: %s", cl.lockfile, errno, strerror(errno)); return rv; } rv = lseek(fd, 0, SEEK_SET); if (rv < 0) { log_error("lseek set fd(%d) offset to 0 error, return(%d), message(%s)", fd, rv, strerror(errno)); rv = -1; return rv; } rv = write(fd, buffer, size); if (rv != size) { log_error("write to fd(%d, %d) returned %d, errno %d, message(%s)", fd, size, rv, errno, strerror(errno)); return -1; } return 0; } static int loop(int fd) { void (*workfn) (int ci); void (*deadfn) (int ci); int rv, i; rv = setup_transport(); if (rv < 0) goto fail; rv = setup_ticket(); if (rv < 0) goto fail; rv = write_daemon_state(fd, BOOTHD_STARTED); if (rv != 0) { log_error("write daemon state %d to lockfile error %s: %s", BOOTHD_STARTED, cl.lockfile, strerror(errno)); goto fail; } log_info("BOOTH %s daemon started, node id is 0x%08X (%d).", type_to_string(local->type), local->site_id, local->site_id); while (1) { rv = poll(pollfds, client_maxi + 1, poll_timeout); if (rv == -1 && errno == EINTR) continue; if (rv < 0) { log_error("poll failed: %s (%d)", strerror(errno), errno); goto fail; } for (i = 0; i <= client_maxi; i++) { if (clients[i].fd < 0) continue; if (pollfds[i].revents & POLLIN) { workfn = clients[i].workfn; if (workfn) workfn(i); } if (pollfds[i].revents & (POLLERR | POLLHUP | POLLNVAL)) { deadfn = clients[i].deadfn; if (deadfn) deadfn(i); } } process_tickets(); } return 0; fail: return -1; } static int test_reply(cmd_result_t reply_code, cmd_request_t cmd) { int rv = 0; const char *op_str = ""; if (cmd == CMD_GRANT) op_str = "grant"; else if (cmd == CMD_REVOKE) op_str = "revoke"; else if (cmd == CMD_LIST) op_str = "list"; else if (cmd == CMD_PEERS) op_str = "peers"; else { log_error("internal error reading reply result!"); return -1; } switch (reply_code) { case RLT_OVERGRANT: log_info("You're granting a granted ticket. " "If you wanted to migrate a ticket, " "use revoke first, then use grant."); rv = -1; break; case RLT_TICKET_IDLE: log_info("ticket is not owned"); rv = 0; break; case RLT_ASYNC: log_info("%s command sent, result will be returned " "asynchronously. Please use \"booth list\" to " "see the outcome.", op_str); rv = 0; break; case RLT_CIB_PENDING: log_info("%s succeeded (CIB commit pending)", op_str); /* wait for the CIB commit? */ rv = (cl.options & OPT_WAIT_COMMIT) ? 3 : 0; break; case RLT_MORE: rv = 2; break; case RLT_SYNC_SUCC: case RLT_SUCCESS: if (cmd != CMD_LIST && cmd != CMD_PEERS) log_info("%s succeeded!", op_str); rv = 0; break; case RLT_SYNC_FAIL: log_info("%s failed!", op_str); rv = -1; break; case RLT_INVALID_ARG: log_error("ticket \"%s\" does not exist", cl.msg.ticket.id); rv = -1; break; case RLT_AUTH: log_error("authentication error"); rv = -1; break; case RLT_EXT_FAILED: log_error("before-acquire-handler for ticket \"%s\" failed, grant denied", cl.msg.ticket.id); rv = -1; break; case RLT_ATTR_PREREQ: log_error("attr-prereq for ticket \"%s\" failed, grant denied", cl.msg.ticket.id); rv = -1; break; case RLT_REDIRECT: /* talk to another site */ rv = 1; break; default: log_error("got an error code: %x", rv); rv = -1; } return rv; } static int query_get_string_answer(cmd_request_t cmd) { struct booth_site *site; struct boothc_hdr_msg reply; struct boothc_header *header; char *data; int data_len; int rv; struct booth_transport const *tpt; int (*test_reply_f) (cmd_result_t reply_code, cmd_request_t cmd); size_t msg_size; void *request; if (cl.type == GEOSTORE) { test_reply_f = test_attr_reply; msg_size = sizeof(cl.attr_msg); request = &cl.attr_msg; } else { test_reply_f = test_reply; msg_size = sizeof(cl.msg); request = &cl.msg; } header = (struct boothc_header *)request; data = NULL; init_header(header, cmd, 0, cl.options, 0, 0, msg_size); if (!*cl.site) site = local; else if (!find_site_by_name(cl.site, &site, 1)) { log_error("cannot find site \"%s\"", cl.site); rv = ENOENT; goto out; } tpt = booth_transport + TCP; rv = tpt->open(site); if (rv < 0) goto out_close; rv = tpt->send(site, request, msg_size); if (rv < 0) goto out_close; rv = tpt->recv_auth(site, &reply, sizeof(reply)); if (rv < 0) goto out_close; data_len = ntohl(reply.header.length) - rv; /* no attribute, or no ticket found */ if (!data_len) { goto out_test_reply; } data = malloc(data_len+1); if (!data) { rv = -ENOMEM; goto out_close; } rv = tpt->recv(site, data, data_len); if (rv < 0) goto out_close; *(data+data_len) = '\0'; *(data + data_len) = '\0'; (void)fputs(data, stdout); fflush(stdout); rv = 0; out_test_reply: rv = test_reply_f(ntohl(reply.header.result), cmd); out_close: tpt->close(site); out: if (data) free(data); return rv; } static int do_command(cmd_request_t cmd) { struct booth_site *site; struct boothc_ticket_msg reply; struct booth_transport const *tpt; uint32_t leader_id; int rv; int reply_cnt = 0, msg_logged = 0; const char *op_str = ""; if (cmd == CMD_GRANT) op_str = "grant"; else if (cmd == CMD_REVOKE) op_str = "revoke"; rv = 0; site = NULL; /* Always use TCP for client - at least for now. */ tpt = booth_transport + TCP; if (!*cl.site) site = local; else { if (!find_site_by_name(cl.site, &site, 1)) { log_error("Site \"%s\" not configured.", cl.site); goto out_close; } } if (site->type == ARBITRATOR) { if (site == local) { log_error("We're just an arbitrator, cannot grant/revoke tickets here."); } else { log_error("%s is just an arbitrator, cannot grant/revoke tickets there.", cl.site); } goto out_close; } assert(site->type == SITE); /* We don't check for existence of ticket, so that asking can be * done without local configuration, too. * Although, that means that the UDP port has to be specified, too. */ if (!cl.msg.ticket.id[0]) { /* If the loaded configuration has only a single ticket defined, use that. */ if (booth_conf->ticket_count == 1) { strcpy(cl.msg.ticket.id, booth_conf->ticket[0].name); } else { log_error("No ticket given."); goto out_close; } } redirect: init_header(&cl.msg.header, cmd, 0, cl.options, 0, 0, sizeof(cl.msg)); rv = tpt->open(site); if (rv < 0) goto out_close; rv = tpt->send(site, &cl.msg, sendmsglen(&cl.msg)); if (rv < 0) goto out_close; read_more: rv = tpt->recv_auth(site, &reply, sizeof(reply)); if (rv < 0) { /* print any errors depending on the code sent by the * server */ (void)test_reply(ntohl(reply.header.result), cmd); goto out_close; } rv = test_reply(ntohl(reply.header.result), cmd); if (rv == 1) { tpt->close(site); leader_id = ntohl(reply.ticket.leader); if (!find_site_by_id(leader_id, &site)) { log_error("Message with unknown redirect site %x received", leader_id); rv = -1; goto out_close; } goto redirect; } else if (rv == 2 || rv == 3) { /* the server has more to say */ /* don't wait too long */ if (reply_cnt > 1 && !(cl.options & OPT_WAIT)) { rv = 0; log_info("Giving up on waiting for the definite result. " "Please use \"booth list\" later to " "see the outcome."); goto out_close; } if (reply_cnt == 0) { log_info("%s request sent, " "waiting for the result ...", op_str); msg_logged++; } else if (rv == 3 && msg_logged < 2) { log_info("waiting for the CIB commit ..."); msg_logged++; } reply_cnt++; goto read_more; } out_close: if (site) tpt->close(site); return rv; } static int _lockfile(int mode, int *fdp, pid_t *locked_by) { struct flock lock; int fd, rv; /* After reboot the directory may not yet exist. * Try to create it, but ignore errors. */ if (strncmp(cl.lockfile, BOOTH_RUN_DIR, strlen(BOOTH_RUN_DIR)) == 0) mkdir(BOOTH_RUN_DIR, 0775); if (locked_by) *locked_by = 0; *fdp = -1; fd = open(cl.lockfile, mode, 0664); if (fd < 0) return errno; *fdp = fd; lock.l_type = F_WRLCK; lock.l_start = 0; lock.l_whence = SEEK_SET; lock.l_len = 0; lock.l_pid = 0; if (fcntl(fd, F_SETLK, &lock) == 0) return 0; rv = errno; if (locked_by) if (fcntl(fd, F_GETLK, &lock) == 0) *locked_by = lock.l_pid; return rv; } static inline int is_root(void) { return geteuid() == 0; } static int create_lockfile(void) { int rv, fd; fd = -1; rv = _lockfile(O_CREAT | O_WRONLY, &fd, NULL); if (fd == -1) { log_error("lockfile %s open error %d: %s", cl.lockfile, rv, strerror(rv)); return -1; } if (rv < 0) { log_error("lockfile %s setlk error %d: %s", cl.lockfile, rv, strerror(rv)); goto fail; } rv = write_daemon_state(fd, BOOTHD_STARTING); if (rv != 0) { log_error("write daemon state %d to lockfile error %s: %s", BOOTHD_STARTING, cl.lockfile, strerror(errno)); goto fail; } if (is_root()) { if (fchown(fd, booth_conf->uid, booth_conf->gid) < 0) log_error("fchown() on lockfile said %d: %s", errno, strerror(errno)); } return fd; fail: close(fd); return -1; } static void unlink_lockfile(int fd) { unlink(cl.lockfile); close(fd); } static void print_usage(void) { printf( "Usage:\n" " booth list [options]\n" " booth {grant|revoke} [options] \n" " booth status [options]\n" "\n" " list: List all tickets\n" " grant: Grant ticket to site\n" " revoke: Revoke ticket\n" "\n" "Options:\n" " -c FILE Specify config file [default " BOOTH_DEFAULT_CONF "]\n" " Can be a path or just a name without \".conf\" suffix\n" " -s Connect/grant to a different site\n" " -F Try to grant the ticket immediately\n" " even if not all sites are reachable\n" " -w Wait forever for the outcome of the request\n" " -C Wait until the ticket is committed to the CIB (grant only)\n" " -h Print this help\n" "\n" "Examples:\n" "\n" " # booth list (list tickets)\n" " # booth grant ticket-A (grant ticket here)\n" " # booth grant -s 10.121.8.183 ticket-A (grant ticket to site 10.121.8.183)\n" " # booth revoke ticket-A (revoke ticket)\n" "\n" "See the booth(8) man page for more details.\n" ); } #define OPTION_STRING "c:Dl:t:s:FhSwC" #define ATTR_OPTION_STRING "c:Dt:s:h" void safe_copy(char *dest, char *value, size_t buflen, const char *description) { int content_len = buflen - 1; if (strlen(value) >= content_len) { fprintf(stderr, "'%s' exceeds maximum %s length of %d\n", value, description, content_len); exit(EXIT_FAILURE); } strncpy(dest, value, content_len); dest[content_len] = 0; } static int host_convert(char *hostname, char *ip_str, size_t ip_size) { struct addrinfo *result = NULL, hints = {0}; int re = -1; memset(&hints, 0, sizeof(hints)); hints.ai_family = AF_INET; hints.ai_socktype = SOCK_DGRAM; re = getaddrinfo(hostname, NULL, &hints, &result); if (re == 0) { struct in_addr addr = ((struct sockaddr_in *)result->ai_addr)->sin_addr; const char *re_ntop = inet_ntop(AF_INET, &addr, ip_str, ip_size); if (re_ntop == NULL) { re = -1; } } freeaddrinfo(result); return re; } #define cparg(dest, descr) do { \ if (optind >= argc) \ goto missingarg; \ safe_copy(dest, argv[optind], sizeof(dest), descr); \ optind++; \ } while(0) static int read_arguments(int argc, char **argv) { int optchar; char *arg1 = argv[1]; char *op = NULL; char *cp; const char *opt_string = OPTION_STRING; char site_arg[INET_ADDRSTRLEN] = {0}; int left; cl.type = 0; if ((cp = strstr(argv[0], ATTR_PROG)) && !strcmp(cp, ATTR_PROG)) { cl.type = GEOSTORE; op = argv[1]; optind = 2; opt_string = ATTR_OPTION_STRING; } else if (argc > 1 && (strcmp(arg1, "arbitrator") == 0 || strcmp(arg1, "site") == 0 || strcmp(arg1, "start") == 0 || strcmp(arg1, "daemon") == 0)) { cl.type = DAEMON; optind = 2; } else if (argc > 1 && (strcmp(arg1, "status") == 0)) { cl.type = STATUS; optind = 2; } else if (argc > 1 && (strcmp(arg1, "client") == 0)) { cl.type = CLIENT; if (argc < 3) { print_usage(); exit(EXIT_FAILURE); } op = argv[2]; optind = 3; } if (!cl.type) { cl.type = CLIENT; op = argv[1]; optind = 2; } if (argc < 2 || !strcmp(arg1, "help") || !strcmp(arg1, "--help") || !strcmp(arg1, "-h")) { if (cl.type == GEOSTORE) print_geostore_usage(); else print_usage(); exit(EXIT_SUCCESS); } if (!strcmp(arg1, "version") || !strcmp(arg1, "--version") || !strcmp(arg1, "-V")) { printf("%s %s\n", argv[0], RELEASE_STR); exit(EXIT_SUCCESS); } if (cl.type == CLIENT) { if (!strcmp(op, "list")) cl.op = CMD_LIST; else if (!strcmp(op, "grant")) cl.op = CMD_GRANT; else if (!strcmp(op, "revoke")) cl.op = CMD_REVOKE; else if (!strcmp(op, "peers")) cl.op = CMD_PEERS; else { fprintf(stderr, "client operation \"%s\" is unknown\n", op); exit(EXIT_FAILURE); } } else if (cl.type == GEOSTORE) { if (!strcmp(op, "list")) cl.op = ATTR_LIST; else if (!strcmp(op, "set")) cl.op = ATTR_SET; else if (!strcmp(op, "get")) cl.op = ATTR_GET; else if (!strcmp(op, "delete")) cl.op = ATTR_DEL; else { fprintf(stderr, "attribute operation \"%s\" is unknown\n", op); exit(EXIT_FAILURE); } } while (optind < argc) { optchar = getopt(argc, argv, opt_string); switch (optchar) { case 'c': if (strchr(optarg, '/')) { safe_copy(cl.configfile, optarg, sizeof(cl.configfile), "config file"); } else { /* If no "/" in there, use with default directory. */ strcpy(cl.configfile, BOOTH_DEFAULT_CONF_DIR); cp = cl.configfile + strlen(BOOTH_DEFAULT_CONF_DIR); assert(cp > cl.configfile); assert(*(cp-1) == '/'); /* Write at the \0, ie. after the "/" */ safe_copy(cp, optarg, (sizeof(cl.configfile) - (cp - cl.configfile) - strlen(BOOTH_DEFAULT_CONF_EXT)), "config name"); /* If no extension, append ".conf". * Space is available, see -strlen() above. */ if (!strchr(cp, '.')) strcat(cp, BOOTH_DEFAULT_CONF_EXT); } break; case 'D': debug_level++; enable_stderr = 1; /* Fall through */ case 'S': daemonize = 1; break; case 'l': safe_copy(cl.lockfile, optarg, sizeof(cl.lockfile), "lock file"); break; case 't': if (cl.op == CMD_GRANT || cl.op == CMD_REVOKE) { safe_copy(cl.msg.ticket.id, optarg, sizeof(cl.msg.ticket.id), "ticket name"); } else if (cl.type == GEOSTORE) { safe_copy(cl.attr_msg.attr.tkt_id, optarg, sizeof(cl.attr_msg.attr.tkt_id), "ticket name"); } else { print_usage(); exit(EXIT_FAILURE); } break; case 's': /* For testing and debugging: allow "-s site" also for * daemon start, so that the address that should be used * can be set manually. * This makes it easier to start multiple processes * on one machine. */ if (cl.type == CLIENT || cl.type == GEOSTORE || (cl.type == DAEMON && debug_level)) { if (strcmp(optarg, OTHER_SITE) && host_convert(optarg, site_arg, INET_ADDRSTRLEN) == 0) { safe_copy(cl.site, site_arg, sizeof(cl.site), "site name"); } else { safe_copy(cl.site, optarg, sizeof(cl.site), "site name"); } } else { log_error("\"-s\" not allowed in daemon mode."); exit(EXIT_FAILURE); } break; case 'F': if (cl.type != CLIENT || cl.op != CMD_GRANT) { log_error("use \"-F\" only for client grant"); exit(EXIT_FAILURE); } cl.options |= OPT_IMMEDIATE; break; case 'w': if (cl.type != CLIENT || (cl.op != CMD_GRANT && cl.op != CMD_REVOKE)) { log_error("use \"-w\" only for grant and revoke"); exit(EXIT_FAILURE); } cl.options |= OPT_WAIT; break; case 'C': if (cl.type != CLIENT || cl.op != CMD_GRANT) { log_error("use \"-C\" only for grant"); exit(EXIT_FAILURE); } cl.options |= OPT_WAIT | OPT_WAIT_COMMIT; break; case 'h': if (cl.type == GEOSTORE) print_geostore_usage(); else print_usage(); exit(EXIT_SUCCESS); break; case ':': case '?': fprintf(stderr, "Please use '-h' for usage.\n"); exit(EXIT_FAILURE); break; case -1: /* No more parameters on cmdline, only arguments. */ goto extra_args; default: goto unknown; }; } return 0; extra_args: if (cl.type == CLIENT && !cl.msg.ticket.id[0]) { cparg(cl.msg.ticket.id, "ticket name"); } else if (cl.type == GEOSTORE) { if (cl.op != ATTR_LIST) { cparg(cl.attr_msg.attr.name, "attribute name"); } if (cl.op == ATTR_SET) { cparg(cl.attr_msg.attr.val, "attribute value"); } } if (optind == argc) return 0; left = argc - optind; fprintf(stderr, "Superfluous argument%s: %s%s\n", left == 1 ? "" : "s", argv[optind], left == 1 ? "" : "..."); exit(EXIT_FAILURE); unknown: fprintf(stderr, "unknown option: %s\n", argv[optind]); exit(EXIT_FAILURE); missingarg: fprintf(stderr, "not enough arguments\n"); exit(EXIT_FAILURE); } static void set_scheduler(void) { struct sched_param sched_param; struct rlimit rlimit; int rv; rlimit.rlim_cur = RLIM_INFINITY; rlimit.rlim_max = RLIM_INFINITY; setrlimit(RLIMIT_MEMLOCK, &rlimit); rv = mlockall(MCL_CURRENT | MCL_FUTURE); if (rv < 0) { log_error("mlockall failed"); } rv = sched_get_priority_max(SCHED_RR); if (rv != -1) { sched_param.sched_priority = rv; rv = sched_setscheduler(0, SCHED_RR, &sched_param); if (rv == -1) log_error("could not set SCHED_RR priority %d: %s (%d)", sched_param.sched_priority, strerror(errno), errno); } else { log_error("could not get maximum scheduler priority err %d", errno); } } static int set_procfs_val(const char *path, const char *val) { int rc = -1; FILE *fp = fopen(path, "w"); if (fp) { if (fprintf(fp, "%s", val) > 0) rc = 0; fclose(fp); } return rc; } static int do_status(int type) { pid_t pid; int rv, lock_fd, ret; const char *reason = NULL; char lockfile_data[1024], *cp; ret = PCMK_OCF_NOT_RUNNING; rv = setup_config(type); if (rv) { reason = "Error reading configuration."; ret = PCMK_OCF_UNKNOWN_ERROR; goto quit; } if (!local) { reason = "No Service IP active here."; goto quit; } rv = _lockfile(O_RDWR, &lock_fd, &pid); if (rv == 0) { reason = "PID file not locked."; goto quit; } if (lock_fd == -1) { reason = "No PID file."; goto quit; } if (pid) { fprintf(stdout, "booth_lockpid=%d ", pid); fflush(stdout); } rv = read(lock_fd, lockfile_data, sizeof(lockfile_data) - 1); if (rv < 4) { reason = "Cannot read lockfile data."; ret = PCMK_LSB_UNKNOWN_ERROR; goto quit; } lockfile_data[rv] = 0; if (lock_fd != -1) close(lock_fd); /* Make sure it's only a single line */ cp = strchr(lockfile_data, '\r'); if (cp) *cp = 0; cp = strchr(lockfile_data, '\n'); if (cp) *cp = 0; rv = setup_tcp_listener(1); if (rv == 0) { reason = "TCP port not in use."; goto quit; } fprintf(stdout, "booth_lockfile='%s' %s\n", cl.lockfile, lockfile_data); if (daemonize) fprintf(stderr, "Booth at %s port %d seems to be running.\n", local->addr_string, booth_conf->port); return 0; quit: log_debug("not running: %s", reason); /* Ie. "DEBUG" */ if (daemonize) fprintf(stderr, "not running: %s\n", reason); return ret; } static int limit_this_process(void) { int rv; if (!is_root()) return 0; if (setregid(booth_conf->gid, booth_conf->gid) < 0) { rv = errno; log_error("setregid() didn't work: %s", strerror(rv)); return rv; } if (setreuid(booth_conf->uid, booth_conf->uid) < 0) { rv = errno; log_error("setreuid() didn't work: %s", strerror(rv)); return rv; } return 0; } static int lock_fd = -1; static void server_exit(void) { int rv; if (lock_fd >= 0) { /* We might not be able to delete it, but at least * make it empty. */ rv = ftruncate(lock_fd, 0); (void)rv; unlink_lockfile(lock_fd); } log_info("exiting"); } static void sig_exit_handler(int sig) { log_info("caught signal %d", sig); exit(0); } static int do_server(int type) { int rv = -1; static char log_ent[128] = DAEMON_NAME "-"; rv = setup_config(type); if (rv < 0) return rv; if (!local) { log_error("Cannot find myself in the configuration."); exit(EXIT_FAILURE); } if (!daemonize) { if (daemon(0, 0) < 0) { perror("daemon error"); exit(EXIT_FAILURE); } } /* The lockfile must be written to _after_ the call to daemon(), so * that the lockfile contains the pid of the daemon, not the parent. */ lock_fd = create_lockfile(); if (lock_fd < 0) return lock_fd; atexit(server_exit); strcat(log_ent, type_to_string(local->type)); cl_log_set_entity(log_ent); cl_log_enable_stderr(enable_stderr ? TRUE : FALSE); cl_log_set_facility(HA_LOG_FACILITY); cl_inherit_logging_environment(0); log_info("BOOTH %s %s daemon is starting", type_to_string(local->type), RELEASE_STR); signal(SIGUSR1, (__sighandler_t)tickets_log_info); signal(SIGTERM, (__sighandler_t)sig_exit_handler); signal(SIGINT, (__sighandler_t)sig_exit_handler); /* we'll handle errors there and then */ signal(SIGPIPE, SIG_IGN); set_scheduler(); /* we don't want to be killed by the OOM-killer */ if (set_procfs_val("/proc/self/oom_score_adj", "-999")) (void)set_procfs_val("/proc/self/oom_adj", "-16"); set_proc_title("%s %s %s for [%s]:%d", DAEMON_NAME, cl.configfile, type_to_string(local->type), local->addr_string, booth_conf->port); rv = limit_this_process(); if (rv) return rv; if (cl_enable_coredumps(TRUE) < 0){ cl_log(LOG_ERR, "enabling core dump failed"); } cl_cdtocoredir(); prctl(PR_SET_DUMPABLE, (unsigned long)TRUE, 0UL, 0UL, 0UL); signal(SIGCHLD, (__sighandler_t)wait_child); rv = loop(lock_fd); return rv; } static int do_client(void) { int rv; rv = setup_config(CLIENT); if (rv < 0) { log_error("cannot read config"); goto out; } switch (cl.op) { case CMD_LIST: case CMD_PEERS: rv = query_get_string_answer(cl.op); break; case CMD_GRANT: case CMD_REVOKE: rv = do_command(cl.op); break; } out: return rv; } static int do_attr(void) { int rv = -1; rv = setup_config(GEOSTORE); if (rv < 0) { log_error("cannot read config"); goto out; } /* We don't check for existence of ticket, so that asking can be * done without local configuration, too. * Although, that means that the UDP port has to be specified, too. */ if (!cl.attr_msg.attr.tkt_id[0]) { /* If the loaded configuration has only a single ticket defined, use that. */ if (booth_conf->ticket_count == 1) { strcpy(cl.attr_msg.attr.tkt_id, booth_conf->ticket[0].name); } else { rv = 1; log_error("No ticket given."); goto out; } } switch (cl.op) { case ATTR_LIST: case ATTR_GET: rv = query_get_string_answer(cl.op); break; case ATTR_SET: case ATTR_DEL: rv = do_attr_command(cl.op); break; } out: return rv; } int main(int argc, char *argv[], char *envp[]) { int rv; char *cp; init_set_proc_title(argc, argv, envp); get_time(&start_time); memset(&cl, 0, sizeof(cl)); strncpy(cl.configfile, BOOTH_DEFAULT_CONF, BOOTH_PATH_LEN - 1); cl.lockfile[0] = 0; debug_level = 0; cl_log_set_entity( (cp = strstr(argv[0], ATTR_PROG)) && !strcmp(cp, ATTR_PROG) ? ATTR_PROG : "booth" ); cl_log_enable_stderr(TRUE); cl_log_set_facility(0); rv = read_arguments(argc, argv); if (rv < 0) goto out; switch (cl.type) { case STATUS: rv = do_status(cl.type); break; case ARBITRATOR: case DAEMON: case SITE: rv = do_server(cl.type); break; case CLIENT: rv = do_client(); break; case GEOSTORE: rv = do_attr(); break; } out: /* Normalize values. 0x100 would be seen as "OK" by waitpid(). */ return (rv >= 0 && rv < 0x70) ? rv : 1; } diff --git a/src/pacemaker.c b/src/pacemaker.c index 52d84fa..5db5469 100644 --- a/src/pacemaker.c +++ b/src/pacemaker.c @@ -1,530 +1,530 @@ /* * Copyright (C) 2011 Jiaju Zhang * Copyright (C) 2013-2014 Philipp Marek * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. + * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #include #include #include #include #include #include #include #include #include #include "ticket.h" #include "log.h" #include "attr.h" #include "pacemaker.h" #include "inline-fn.h" enum atomic_ticket_supported { YES=0, NO, FILENOTFOUND, /* Ie. UNKNOWN */ UNKNOWN = FILENOTFOUND, }; /* http://thedailywtf.com/Articles/What_Is_Truth_0x3f_.aspx */ enum atomic_ticket_supported atomicity = UNKNOWN; #define COMMAND_MAX 1024 /** Determines whether the installed crm_ticket can do atomic ticket grants, * _including_ multiple attribute changes. * * See * https://bugzilla.novell.com/show_bug.cgi?id=855099 * * Run "crm_ticket" without "--force"; * - the old version asks for "Y/N" via STDIN, and returns 0 * when reading "no"; * - the new version just reports an error without asking. */ static void test_atomicity(void) { int rv; if (atomicity != UNKNOWN) return; rv = system("echo n | crm_ticket -g -t any-ticket-name > /dev/null 2> /dev/null"); if (rv == -1) { log_error("Cannot run \"crm_ticket\"!"); /* BIG problem. Abort. */ exit(1); } if (WIFSIGNALED(rv)) { log_error("\"crm_ticket\" terminated by a signal!"); /* Problem. Abort. */ exit(1); } switch (WEXITSTATUS(rv)) { case 0: atomicity = NO; log_info("Old \"crm_ticket\" found, using non-atomic ticket updates."); break; case 1: atomicity = YES; log_info("New \"crm_ticket\" found, using atomic ticket updates."); break; default: log_error("Unexpected return value from \"crm_ticket\" (%d), " "falling back to non-atomic ticket updates.", rv); atomicity = NO; } assert(atomicity == YES || atomicity == NO); } const char * interpret_rv(int rv) { static char text[64]; if (rv == 0) return "0"; if (WIFSIGNALED(rv)) sprintf(text, "got signal %d", WTERMSIG(rv)); else sprintf(text, "exit code %d", WEXITSTATUS(rv)); return text; } static int pcmk_write_ticket_atomic(struct ticket_config *tk, int grant) { char cmd[COMMAND_MAX]; int rv; /* The values are appended to "-v", so that NO_ONE * (which is -1) isn't seen as another option. */ snprintf(cmd, COMMAND_MAX, "crm_ticket -t '%s' " "%s --force " "-S owner -v%" PRIi32 " " "-S expires -v%" PRIi64 " " "-S term -v%" PRIi64, tk->name, (grant > 0 ? "-g" : grant < 0 ? "-r" : ""), (int32_t)get_node_id(tk->leader), (int64_t)wall_ts(&tk->term_expires), (int64_t)tk->current_term); rv = system(cmd); log_debug("command: '%s' was executed", cmd); if (rv != 0) log_error("error: \"%s\" failed, %s", cmd, interpret_rv(rv)); return rv; } static int pcmk_store_ticket_nonatomic(struct ticket_config *tk); static int pcmk_grant_ticket(struct ticket_config *tk) { char cmd[COMMAND_MAX]; int rv; test_atomicity(); if (atomicity == YES) return pcmk_write_ticket_atomic(tk, +1); rv = pcmk_store_ticket_nonatomic(tk); if (rv) return rv; snprintf(cmd, COMMAND_MAX, "crm_ticket -t %s -g --force", tk->name); log_debug("command: '%s' was executed", cmd); rv = system(cmd); if (rv != 0) log_error("error: \"%s\" failed, %s", cmd, interpret_rv(rv)); return rv; } static int pcmk_revoke_ticket(struct ticket_config *tk) { char cmd[COMMAND_MAX]; int rv; test_atomicity(); if (atomicity == YES) return pcmk_write_ticket_atomic(tk, -1); rv = pcmk_store_ticket_nonatomic(tk); if (rv) return rv; snprintf(cmd, COMMAND_MAX, "crm_ticket -t %s -r --force", tk->name); log_debug("command: '%s' was executed", cmd); rv = system(cmd); if (rv != 0) log_error("error: \"%s\" failed, %s", cmd, interpret_rv(rv)); return rv; } static int _run_crm_ticket(char *cmd) { int i, rv; /* If there are errors, there's not much we can do but retry ... */ for (i=0; i<3 && (rv = system(cmd)); i++) ; log_debug("'%s' gave result %s", cmd, interpret_rv(rv)); return rv; } static int crm_ticket_set_int(const struct ticket_config *tk, const char *attr, int64_t val) { char cmd[COMMAND_MAX]; snprintf(cmd, COMMAND_MAX, "crm_ticket -t '%s' -S '%s' -v %" PRIi64, tk->name, attr, val); return _run_crm_ticket(cmd); } static int pcmk_set_attr(struct ticket_config *tk, const char *attr, const char *val) { char cmd[COMMAND_MAX]; snprintf(cmd, COMMAND_MAX, "crm_ticket -t '%s' -S '%s' -v '%s'", tk->name, attr, val); return _run_crm_ticket(cmd); } static int pcmk_del_attr(struct ticket_config *tk, const char *attr) { char cmd[COMMAND_MAX]; snprintf(cmd, COMMAND_MAX, "crm_ticket -t '%s' -D '%s'", tk->name, attr); return _run_crm_ticket(cmd); } static int pcmk_store_ticket_nonatomic(struct ticket_config *tk) { int rv; /* Always try to store *each* attribute, even if there's an error * for one of them. */ rv = crm_ticket_set_int(tk, "owner", (int32_t)get_node_id(tk->leader)); rv = crm_ticket_set_int(tk, "expires", wall_ts(&tk->term_expires)) || rv; rv = crm_ticket_set_int(tk, "term", tk->current_term) || rv; if (rv) log_error("setting crm_ticket attributes failed; %s", interpret_rv(rv)); else log_info("setting crm_ticket attributes successful"); return rv; } typedef int (*attr_f)(struct ticket_config *tk, const char *name, char *val); struct attr_tab { const char *name; attr_f handling_f; }; static int save_expires(struct ticket_config *tk, const char *name, char *val) { secs2tv(unwall_ts(atol(val)), &tk->term_expires); return 0; } static int save_term(struct ticket_config *tk, const char *name, char *val) { tk->current_term = atol(val); return 0; } static int parse_boolean(char *val) { long v; if (!strncmp(val, "false", 5)) { v = 0; } else if (!strncmp(val, "true", 4)) { v = 1; } else { v = atol(val); } return v; } static int save_granted(struct ticket_config *tk, const char *name, char *val) { tk->is_granted = parse_boolean(val); return 0; } static int save_owner(struct ticket_config *tk, const char *name, char *val) { /* No check, node could have been deconfigured. */ tk->leader = NULL; return !find_site_by_id(atol(val), &tk->leader); } static int ignore_attr(struct ticket_config *tk, const char *name, char *val) { return 0; } static int save_attr(struct ticket_config *tk, const char *name, char *val) { /* tell store_geo_attr not to store time, we don't have that * information available */ return store_geo_attr(tk, name, val, 1); } struct attr_tab attr_handlers[] = { { "expires", save_expires}, { "term", save_term}, { "granted", save_granted}, { "owner", save_owner}, { "id", ignore_attr}, { "last-granted", ignore_attr}, { NULL, 0}, }; /* get_attr is currently not used and has not been tested */ static int pcmk_get_attr(struct ticket_config *tk, const char *attr, const char **vp) { char cmd[COMMAND_MAX]; char line[BOOTH_ATTRVAL_LEN+1]; int rv = 0; FILE *p; *vp = NULL; snprintf(cmd, COMMAND_MAX, "crm_ticket -t '%s' -G '%s' --quiet", tk->name, attr); p = popen(cmd, "r"); if (p == NULL) { rv = errno; log_error("popen error %d (%s) for \"%s\"", rv, strerror(rv), cmd); return rv || EINVAL; } if (fgets(line, BOOTH_ATTRVAL_LEN, p) == NULL) { rv = ENODATA; goto out; } *vp = g_strdup(line); out: rv = pclose(p); if (!rv) { log_debug("command \"%s\"", cmd); } else if (WEXITSTATUS(rv) == 6) { log_info("command \"%s\", ticket not found", cmd); } else { log_error("command \"%s\" %s", cmd, interpret_rv(rv)); } return rv; } static int save_attributes(struct ticket_config *tk, xmlDocPtr doc) { int rv = 0, rc; xmlNodePtr n; xmlAttrPtr attr; xmlChar *v; struct attr_tab *atp; n = xmlDocGetRootElement(doc); if (n == NULL) { tk_log_error("crm_ticket xml output empty"); return -EINVAL; } if (xmlStrcmp(n->name, (const xmlChar *)"ticket_state")) { tk_log_error("crm_ticket xml root element not ticket_state"); return -EINVAL; } for (attr = n->properties; attr; attr = attr->next) { v = xmlGetProp(n, attr->name); for (atp = attr_handlers; atp->name; atp++) { if (!strcmp(atp->name, attr->name)) { rc = atp->handling_f(tk, attr->name, v); break; } } if (!atp->name) { rc = save_attr(tk, attr->name, v); } if (rc) { tk_log_error("error storing attribute %s", attr->name); rv |= rc; } xmlFree(v); } return rv; } #define CHUNK_SIZE 256 static int parse_ticket_state(struct ticket_config *tk, FILE *p) { int rv = 0; GString *input = NULL; char line[CHUNK_SIZE]; xmlDocPtr doc = NULL; xmlErrorPtr errptr; int opts = XML_PARSE_COMPACT | XML_PARSE_NONET; /* skip first two lines of output */ if (fgets(line, CHUNK_SIZE-1, p) == NULL || fgets(line, CHUNK_SIZE-1, p) == NULL) { tk_log_error("crm_ticket xml output empty"); rv = ENODATA; goto out; } input = g_string_sized_new(CHUNK_SIZE); if (!input) { log_error("out of memory"); rv = -1; goto out; } while (fgets(line, CHUNK_SIZE-1, p) != NULL) { if (!g_string_append(input, line)) { log_error("out of memory"); rv = -1; goto out; } } doc = xmlReadDoc(input->str, NULL, NULL, opts); if (doc == NULL) { errptr = xmlGetLastError(); if (errptr) { tk_log_error("crm_ticket xml parse failed (domain=%d, level=%d, code=%d): %s", errptr->domain, errptr->level, errptr->code, errptr->message); } else { tk_log_error("crm_ticket xml parse failed"); } rv = -EINVAL; goto out; } rv = save_attributes(tk, doc); out: if (doc) xmlFreeDoc(doc); if (input) g_string_free(input, TRUE); return rv; } static int pcmk_load_ticket(struct ticket_config *tk) { char cmd[COMMAND_MAX]; int rv = 0, pipe_rv; FILE *p; /* This here gets run during startup; testing that here means that * normal operation won't be interrupted with that test. */ test_atomicity(); snprintf(cmd, COMMAND_MAX, "crm_ticket -t '%s' -q", tk->name); p = popen(cmd, "r"); if (p == NULL) { pipe_rv = errno; log_error("popen error %d (%s) for \"%s\"", pipe_rv, strerror(pipe_rv), cmd); return pipe_rv || -EINVAL; } rv = parse_ticket_state(tk, p); if (!tk->leader) { /* Hmm, no site found for the ticket we have in the * CIB!? * Assume that the ticket belonged to us if it was * granted here! */ log_warn("%s: no site matches; site got reconfigured?", tk->name); if (tk->is_granted) { log_warn("%s: granted here, assume it belonged to us", tk->name); tk->leader = local; } } pipe_rv = pclose(p); if (!pipe_rv) { log_debug("command \"%s\"", cmd); } else if (WEXITSTATUS(pipe_rv) == 6) { log_info("command \"%s\", ticket not found", cmd); } else { log_error("command \"%s\" %s", cmd, interpret_rv(pipe_rv)); } return rv | pipe_rv; } struct ticket_handler pcmk_handler = { .grant_ticket = pcmk_grant_ticket, .revoke_ticket = pcmk_revoke_ticket, .load_ticket = pcmk_load_ticket, .set_attr = pcmk_set_attr, .get_attr = pcmk_get_attr, .del_attr = pcmk_del_attr, }; diff --git a/src/pacemaker.h b/src/pacemaker.h index e066802..34a1cae 100644 --- a/src/pacemaker.h +++ b/src/pacemaker.h @@ -1,39 +1,39 @@ /* * Copyright (C) 2011 Jiaju Zhang * Copyright (C) 2013-2014 Philipp Marek * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. + * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #ifndef _PACEMAKER_H #define _PACEMAKER_H #include #include "config.h" struct ticket_handler { int (*grant_ticket) (struct ticket_config *tk); int (*revoke_ticket) (struct ticket_config *tk); int (*load_ticket) (struct ticket_config *tk); int (*set_attr) (struct ticket_config *tk, const char *a, const char *v); int (*get_attr) (struct ticket_config *tk, const char *a, const char **vp); int (*del_attr) (struct ticket_config *tk, const char *a); }; struct ticket_handler pcmk_handler; const char * interpret_rv(int rv); #endif /* _PACEMAKER_H */ diff --git a/src/raft.c b/src/raft.c index d7daffa..35094ed 100644 --- a/src/raft.c +++ b/src/raft.c @@ -1,1003 +1,1003 @@ /* * Copyright (C) 2014 Philipp Marek * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. + * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #include #include #include #include #include #include "booth.h" #include "timer.h" #include "transport.h" #include "inline-fn.h" #include "config.h" #include "raft.h" #include "ticket.h" #include "request.h" #include "log.h" inline static void clear_election(struct ticket_config *tk) { int i; struct booth_site *site; tk_log_debug("clear election"); tk->votes_received = 0; foreach_node(i, site) tk->votes_for[site->index] = NULL; } inline static void record_vote(struct ticket_config *tk, struct booth_site *who, struct booth_site *vote) { tk_log_debug("site %s votes for %s", site_string(who), site_string(vote)); if (!tk->votes_for[who->index]) { tk->votes_for[who->index] = vote; tk->votes_received |= who->bitmask; } else { if (tk->votes_for[who->index] != vote) tk_log_warn("%s voted previously " "for %s and now wants to vote for %s (ignored)", site_string(who), site_string(tk->votes_for[who->index]), site_string(vote)); } } static void update_term_from_msg(struct ticket_config *tk, struct boothc_ticket_msg *msg) { uint32_t i; i = ntohl(msg->ticket.term); /* if we failed to start the election, then accept the term * from the leader * */ if (tk->state == ST_CANDIDATE) { tk->current_term = i; } else { tk->current_term = max(i, tk->current_term); } } static void set_ticket_expiry(struct ticket_config *tk, int duration) { set_future_time(&tk->term_expires, duration); } static void update_ticket_from_msg(struct ticket_config *tk, struct booth_site *sender, struct boothc_ticket_msg *msg) { int duration; tk_log_info("updating from %s (%d/%d)", site_string(sender), ntohl(msg->ticket.term), msg_term_time(msg)); duration = min(tk->term_duration, msg_term_time(msg)); set_ticket_expiry(tk, duration); update_term_from_msg(tk, msg); } static void copy_ticket_from_msg(struct ticket_config *tk, struct boothc_ticket_msg *msg) { set_ticket_expiry(tk, msg_term_time(msg)); tk->current_term = ntohl(msg->ticket.term); } static void become_follower(struct ticket_config *tk, struct boothc_ticket_msg *msg) { copy_ticket_from_msg(tk, msg); set_state(tk, ST_FOLLOWER); time_reset(&tk->delay_commit); tk->in_election = 0; /* if we're following and the ticket was granted here * then commit to CIB right away (we're probably restarting) */ if (tk->is_granted) { disown_ticket(tk); ticket_write(tk); } } static void won_elections(struct ticket_config *tk) { set_leader(tk, local); set_state(tk, ST_LEADER); set_ticket_expiry(tk, tk->term_duration); time_reset(&tk->election_end); tk->voted_for = NULL; if (is_time_set(&tk->delay_commit) && all_sites_replied(tk)) { time_reset(&tk->delay_commit); tk_log_debug("reset delay commit as all sites replied"); } save_committed_tkt(tk); ticket_broadcast(tk, OP_HEARTBEAT, OP_ACK, RLT_SUCCESS, 0); tk->ticket_updated = 0; } /* if more than one member got the same (and maximum within that * election) number of votes, then that is a tie */ static int is_tie(struct ticket_config *tk) { int i; struct booth_site *v; int count[MAX_NODES] = { 0, }; int max_votes = 0, max_cnt = 0; for(i=0; isite_count; i++) { v = tk->votes_for[i]; if (!v) continue; count[v->index]++; max_votes = max(max_votes, count[v->index]); } for(i=0; isite_count; i++) { if (count[i] == max_votes) max_cnt++; } return max_cnt > 1; } static struct booth_site *majority_votes(struct ticket_config *tk) { int i, n; struct booth_site *v; int count[MAX_NODES] = { 0, }; for(i=0; isite_count; i++) { v = tk->votes_for[i]; if (!v || v == no_leader) continue; n = v->index; count[n]++; tk_log_debug("Majority: %d %s wants %d %s => %d", i, site_string(&booth_conf->site[i]), n, site_string(v), count[n]); if (count[n]*2 <= booth_conf->site_count) continue; tk_log_debug("Majority reached: %d of %d for %s", count[n], booth_conf->site_count, site_string(v)); return v; } return NULL; } void elections_end(struct ticket_config *tk) { struct booth_site *new_leader; if (is_past(&tk->election_end)) { /* This is previous election timed out */ tk_log_info("elections finished"); } tk->in_election = 0; new_leader = majority_votes(tk); if (new_leader == local) { won_elections(tk); tk_log_info("granted successfully here"); } else if (new_leader) { tk_log_info("ticket granted at %s", site_string(new_leader)); } else { tk_log_info("nobody won elections, new elections"); tk->outcome = RLT_MORE; foreach_tkt_req(tk, notify_client); if (!new_election(tk, NULL, is_tie(tk) ? 2 : 0, OR_AGAIN)) { ticket_activate_timeout(tk); } } } static int newer_term(struct ticket_config *tk, struct booth_site *sender, struct booth_site *leader, struct boothc_ticket_msg *msg, int in_election) { uint32_t term; /* it may happen that we hear about our newer term */ if (leader == local) return 0; term = ntohl(msg->ticket.term); /* §5.1 */ if (term > tk->current_term) { set_state(tk, ST_FOLLOWER); if (!in_election) { set_leader(tk, leader); tk_log_info("from %s: higher term %d vs. %d, following %s", site_string(sender), term, tk->current_term, ticket_leader_string(tk)); } else { tk_log_debug("from %s: higher term %d vs. %d (election)", site_string(sender), term, tk->current_term); } tk->current_term = term; return 1; } return 0; } static int msg_term_invalid(struct ticket_config *tk, struct booth_site *sender, struct booth_site *leader, struct boothc_ticket_msg *msg) { uint32_t term; term = ntohl(msg->ticket.term); /* §5.1 */ if (is_term_invalid(tk, term)) { tk_log_info("got invalid term from %s " "(%d), ignoring", site_string(sender), term); return 1; } return 0; } static int term_too_low(struct ticket_config *tk, struct booth_site *sender, struct booth_site *leader, struct boothc_ticket_msg *msg) { uint32_t term; term = ntohl(msg->ticket.term); /* §5.1 */ if (term < tk->current_term) { tk_log_info("sending reject to %s, its term too low " "(%d vs. %d)", site_string(sender), term, tk->current_term ); send_reject(sender, tk, RLT_TERM_OUTDATED, msg); return 1; } return 0; } /* For follower. */ static int answer_HEARTBEAT ( struct ticket_config *tk, struct booth_site *sender, struct booth_site *leader, struct boothc_ticket_msg *msg ) { uint32_t term; term = ntohl(msg->ticket.term); tk_log_debug("heartbeat from leader: %s, have %s; term %d vs %d", site_string(leader), ticket_leader_string(tk), term, tk->current_term); if (term < tk->current_term) { if (sender == tk->leader) { tk_log_info("trusting leader %s with a lower term (%d vs %d)", site_string(leader), term, tk->current_term); } else if (is_owned(tk)) { tk_log_warn("different leader %s with a lower term " "(%d vs %d), sending reject", site_string(leader), term, tk->current_term); return send_reject(sender, tk, RLT_TERM_OUTDATED, msg); } } /* got heartbeat, no rejects expected anymore */ tk->expect_more_rejects = 0; /* Needed? */ newer_term(tk, sender, leader, msg, 0); become_follower(tk, msg); /* Racy??? */ assert(sender == leader || !leader); set_leader(tk, leader); /* Ack the heartbeat (we comply). */ return send_msg(OP_ACK, tk, sender, msg); } static int process_UPDATE ( struct ticket_config *tk, struct booth_site *sender, struct booth_site *leader, struct boothc_ticket_msg *msg ) { if (is_owned(tk) && sender != tk->leader) { tk_log_warn("different leader %s wants to update " "our ticket, sending reject", site_string(leader)); return send_reject(sender, tk, RLT_TERM_OUTDATED, msg); } tk_log_debug("leader %s wants to update our ticket", site_string(leader)); become_follower(tk, msg); set_leader(tk, leader); ticket_write(tk); /* run ticket_cron if the ticket expires */ set_ticket_wakeup(tk); return send_msg(OP_ACK, tk, sender, msg); } static int process_REVOKE ( struct ticket_config *tk, struct booth_site *sender, struct booth_site *leader, struct boothc_ticket_msg *msg ) { int rv; if (tk->state == ST_INIT && tk->leader == no_leader) { /* assume that our ack got lost */ rv = send_msg(OP_ACK, tk, sender, msg); } else if (tk->leader != sender) { tk_log_error("%s wants to revoke ticket, " "but it is not granted there (ignoring)", site_string(sender)); return 1; } else if (tk->state != ST_FOLLOWER) { tk_log_error("unexpected ticket revoke from %s " "(in state %s) (ignoring)", site_string(sender), state_to_string(tk->state)); return 1; } else { tk_log_info("%s revokes ticket", site_string(tk->leader)); save_committed_tkt(tk); reset_ticket(tk); set_leader(tk, no_leader); ticket_write(tk); rv = send_msg(OP_ACK, tk, sender, msg); } return rv; } /* For leader. */ static int process_ACK( struct ticket_config *tk, struct booth_site *sender, struct booth_site *leader, struct boothc_ticket_msg *msg ) { uint32_t term; int req; term = ntohl(msg->ticket.term); if (newer_term(tk, sender, leader, msg, 0)) { /* unexpected higher term */ tk_log_warn("got higher term from %s (%d vs. %d)", site_string(sender), term, tk->current_term); return 0; } /* Don't send a reject. */ if (term < tk->current_term) { /* Doesn't know what he's talking about - perhaps * doesn't receive our packets? */ tk_log_warn("unexpected term " "from %s (%d vs. %d) (ignoring)", site_string(sender), term, tk->current_term); return 0; } /* if the ticket is to be revoked, further processing is not * interesting (and dangerous) */ if (tk->next_state == ST_INIT || tk->state == ST_INIT) return 0; req = ntohl(msg->header.request); if ((req == OP_UPDATE || req == OP_HEARTBEAT) && term == tk->current_term && leader == tk->leader) { if (majority_of_bits(tk, tk->acks_received)) { /* OK, at least half of the nodes are reachable; * Update the ticket and send update messages out */ return leader_update_ticket(tk); } } return 0; } static int process_VOTE_FOR( struct ticket_config *tk, struct booth_site *sender, struct booth_site *leader, struct boothc_ticket_msg *msg ) { if (leader == no_leader) { /* leader wants to step down? */ if (sender == tk->leader && (tk->state == ST_FOLLOWER || tk->state == ST_CANDIDATE)) { tk_log_info("%s wants to give the ticket away (ticket release)", site_string(tk->leader)); save_committed_tkt(tk); reset_ticket(tk); set_state(tk, ST_FOLLOWER); if (local->type == SITE) { ticket_write(tk); schedule_election(tk, OR_STEPDOWN); } } else { tk_log_info("%s votes for none, ignoring (duplicate ticket release?)", site_string(sender)); } return 0; } if (tk->state != ST_CANDIDATE) { /* lost candidate status, somebody rejected our proposal */ tk_log_info("candidate status lost, ignoring VtFr from %s", site_string(sender)); return 0; } if (term_too_low(tk, sender, leader, msg)) return 0; if (newer_term(tk, sender, leader, msg, 0)) { clear_election(tk); } record_vote(tk, sender, leader); /* only if all voted can we take the ticket now, otherwise * wait for timeout in ticket_cron */ if (!tk->acks_expected) { /* §5.2 */ elections_end(tk); } return 0; } static int process_REJECTED( struct ticket_config *tk, struct booth_site *sender, struct booth_site *leader, struct boothc_ticket_msg *msg ) { uint32_t rv; rv = ntohl(msg->header.result); if (tk->state == ST_CANDIDATE && leader == local) { /* the sender has us as the leader (!) * the elections will time out, then we can try again */ tk_log_warn("ticket was granted to us " "(and we didn't know)"); tk->expect_more_rejects = 1; return 0; } if (tk->state == ST_CANDIDATE && rv == RLT_TERM_OUTDATED) { tk_log_warn("ticket outdated (term %d), granted to %s", ntohl(msg->ticket.term), site_string(leader) ); set_leader(tk, leader); tk->expect_more_rejects = 1; become_follower(tk, msg); return 0; } if (tk->state == ST_CANDIDATE && rv == RLT_TERM_STILL_VALID) { if (tk->lost_leader == leader) { if (tk->election_reason == OR_TKT_LOST) { tk_log_warn("%s still has the ticket valid, " "we'll backup a bit", site_string(sender)); } else { tk_log_warn("%s unexpectedly rejects elections", site_string(sender)); } } else { tk_log_warn("ticket was granted to %s " "(and we didn't know)", site_string(leader)); } set_leader(tk, leader); become_follower(tk, msg); tk->expect_more_rejects = 1; return 0; } if (tk->state == ST_CANDIDATE && rv == RLT_YOU_OUTDATED) { set_leader(tk, leader); tk->expect_more_rejects = 1; if (leader && leader != no_leader) { tk_log_warn("our ticket is outdated, granted to %s", site_string(leader)); become_follower(tk, msg); } else { tk_log_warn("our ticket is outdated and revoked"); update_ticket_from_msg(tk, sender, msg); set_state(tk, ST_INIT); } return 0; } if (!tk->expect_more_rejects) { tk_log_warn("from %s: in state %s, got %s (unexpected reject)", site_string(sender), state_to_string(tk->state), state_to_string(rv)); } return 0; } static int ticket_seems_ok(struct ticket_config *tk) { int left; left = term_time_left(tk); if (!left) return 0; /* quite sure */ if (tk->state == ST_CANDIDATE) return 0; /* in state of flux */ if (tk->state == ST_LEADER) return 1; /* quite sure */ if (tk->state == ST_FOLLOWER && left >= tk->term_duration/3) return 1; /* almost quite sure */ return 0; } static int test_reason( struct ticket_config *tk, struct booth_site *sender, struct booth_site *leader, struct boothc_ticket_msg *msg ) { int reason; reason = ntohl(msg->header.reason); if (reason == OR_TKT_LOST) { if (tk->state == ST_INIT && tk->leader == no_leader) { tk_log_warn("%s claims that the ticket is lost, " "but it's in %s state (reject sent)", site_string(sender), state_to_string(tk->state) ); return RLT_YOU_OUTDATED; } if (ticket_seems_ok(tk)) { tk_log_warn("%s claims that the ticket is lost, " "but it is ok here (reject sent)", site_string(sender)); return RLT_TERM_STILL_VALID; } } return 0; } /* §5.2 */ static int answer_REQ_VOTE( struct ticket_config *tk, struct booth_site *sender, struct booth_site *leader, struct boothc_ticket_msg *msg ) { int valid; struct boothc_ticket_msg omsg; cmd_result_t inappr_reason; int reason; inappr_reason = test_reason(tk, sender, leader, msg); if (inappr_reason) return send_reject(sender, tk, inappr_reason, msg); valid = term_time_left(tk); reason = ntohl(msg->header.reason); /* valid tickets are not allowed only if the sender thinks * the ticket got lost */ if (sender != tk->leader && valid && reason != OR_STEPDOWN) { tk_log_warn("election from %s with reason %s rejected " "(we have %s as ticket owner), ticket still valid for %ds", site_string(sender), state_to_string(reason), site_string(tk->leader), valid); return send_reject(sender, tk, RLT_TERM_STILL_VALID, msg); } if (term_too_low(tk, sender, leader, msg)) return 0; /* set this, so that we know not to send status for the * ticket */ tk->in_election = 1; /* reset ticket's leader on not valid tickets */ if (!valid) set_leader(tk, NULL); /* if it's a newer term or ... */ if (newer_term(tk, sender, leader, msg, 1)) { clear_election(tk); goto vote_for_sender; } /* ... we didn't vote yet, then vote for the sender */ /* §5.2, §5.4 */ if (!tk->voted_for) { vote_for_sender: tk->voted_for = sender; record_vote(tk, sender, leader); } init_ticket_msg(&omsg, OP_VOTE_FOR, OP_REQ_VOTE, RLT_SUCCESS, 0, tk); omsg.ticket.leader = htonl(get_node_id(tk->voted_for)); return booth_udp_send_auth(sender, &omsg, sendmsglen(&omsg)); } #define is_reason(r, tk) \ (reason == (r) || (reason == OR_AGAIN && (tk)->election_reason == (r))) int new_election(struct ticket_config *tk, struct booth_site *preference, int update_term, cmd_reason_t reason) { struct booth_site *new_leader; if (local->type != SITE) return 0; if ((is_reason(OR_TKT_LOST, tk) || is_reason(OR_STEPDOWN, tk)) && check_attr_prereq(tk, GRANT_AUTO)) { tk_log_info("attribute prerequisite not met, " "not starting elections"); return 0; } /* elections were already started, but not yet finished/timed out */ if (is_time_set(&tk->election_end) && !is_past(&tk->election_end)) return 1; if (ANYDEBUG) { int tdiff; if (is_time_set(&tk->election_end)) { tdiff = -time_left(&tk->election_end); tk_log_debug("starting elections, previous finished since " intfmt(tdiff)); } else { tk_log_debug("starting elections"); } tk_log_debug("elections caused by %s %s", state_to_string(reason), reason == OR_AGAIN ? state_to_string(tk->election_reason) : "" ); } /* §5.2 */ /* If there was _no_ answer, don't keep incrementing the term number * indefinitely. If there was no peer, there'll probably be no one * listening now either. However, we don't know if we were * invoked due to a timeout (caller does). */ /* increment the term only if either the current term was * valid or if there was a tie (in that case update_term > 1) */ if ((update_term > 1) || (update_term && tk->last_valid_tk && tk->last_valid_tk->current_term >= tk->current_term)) { /* save the previous term, we may need to send out the * MY_INDEX message */ if (tk->state != ST_CANDIDATE) { save_committed_tkt(tk); } tk->current_term++; } set_future_time(&tk->election_end, tk->timeout); tk->in_election = 1; tk_log_info("starting new election (term=%d)", tk->current_term); clear_election(tk); if(preference) new_leader = preference; else new_leader = (local->type == SITE) ? local : NULL; record_vote(tk, local, new_leader); tk->voted_for = new_leader; set_state(tk, ST_CANDIDATE); /* some callers may want just to repeat on timeout */ if (reason == OR_AGAIN) { reason = tk->election_reason; } else { tk->election_reason = reason; } ticket_broadcast(tk, OP_REQ_VOTE, OP_VOTE_FOR, RLT_SUCCESS, reason); add_random_delay(tk); return 0; } /* we were a leader and somebody says that they have a more up * to date ticket * there was probably connectivity loss * tricky */ static int leader_handle_newer_ticket( struct ticket_config *tk, struct booth_site *sender, struct booth_site *leader, struct boothc_ticket_msg *msg ) { update_term_from_msg(tk, msg); if (leader != no_leader && leader && leader != local) { /* eek, two leaders, split brain */ /* normally shouldn't happen; run election */ tk_log_error("from %s: ticket granted to %s! (revoking locally)", site_string(sender), site_string(leader) ); } else if (term_time_left(tk)) { /* eek, two leaders, split brain */ /* normally shouldn't happen; run election */ tk_log_error("from %s: ticket granted to %s! (revoking locally)", site_string(sender), site_string(leader) ); } set_next_state(tk, ST_LEADER); return 0; } /* reply to STATUS */ static int process_MY_INDEX ( struct ticket_config *tk, struct booth_site *sender, struct booth_site *leader, struct boothc_ticket_msg *msg ) { int i; int expired; expired = !msg_term_time(msg); /* test against the last valid(!) ticket we have */ i = my_last_term(tk) - ntohl(msg->ticket.term); if (i > 0) { /* let them know about our newer ticket */ send_msg(OP_MY_INDEX, tk, sender, msg); if (tk->state == ST_LEADER) { tk_log_info("sending ticket update to %s", site_string(sender)); return send_msg(OP_UPDATE, tk, sender, msg); } } /* we have a newer or equal ticket and theirs is expired, * nothing more to do here */ if (i >= 0 && expired) { return 0; } if (tk->state == ST_LEADER) { /* we're the leader, thread carefully */ if (expired) { /* if their ticket is expired, * nothing more to do */ return 0; } if (i < 0) { /* they have a newer ticket, trouble if we're already leader * for it */ tk_log_warn("from %s: more up to date ticket at %s", site_string(sender), site_string(leader) ); return leader_handle_newer_ticket(tk, sender, leader, msg); } else { /* we have the ticket and we don't care */ return 0; } } else if (tk->state == ST_CANDIDATE) { if (leader == local) { /* a belated MY_INDEX, we're already trying to get the * ticket */ return 0; } } /* their ticket is either newer or not expired, don't * ignore it */ update_ticket_from_msg(tk, sender, msg); set_leader(tk, leader); update_ticket_state(tk, sender); save_committed_tkt(tk); set_ticket_wakeup(tk); return 0; } int raft_answer( struct ticket_config *tk, struct booth_site *sender, struct booth_site *leader, struct boothc_ticket_msg *msg ) { int cmd, req; int rv; rv = 0; cmd = ntohl(msg->header.cmd); req = ntohl(msg->header.request); if (req) tk_log_debug("got %s (req %s) from %s", state_to_string(cmd), state_to_string(req), site_string(sender)); else tk_log_debug("got %s from %s", state_to_string(cmd), site_string(sender)); /* don't process tickets with invalid term */ if (cmd != OP_STATUS && msg_term_invalid(tk, sender, leader, msg)) return 0; switch (cmd) { case OP_REQ_VOTE: rv = answer_REQ_VOTE(tk, sender, leader, msg); break; case OP_VOTE_FOR: rv = process_VOTE_FOR(tk, sender, leader, msg); break; case OP_ACK: if (tk->leader == local && tk->state == ST_LEADER) rv = process_ACK(tk, sender, leader, msg); break; case OP_HEARTBEAT: if ((tk->leader != local || !term_time_left(tk)) && (tk->state == ST_INIT || tk->state == ST_FOLLOWER || tk->state == ST_CANDIDATE)) rv = answer_HEARTBEAT(tk, sender, leader, msg); else { tk_log_warn("unexpected message %s, from %s", state_to_string(cmd), site_string(sender)); if (ticket_seems_ok(tk)) send_reject(sender, tk, RLT_TERM_STILL_VALID, msg); rv = -EINVAL; } break; case OP_UPDATE: if (((tk->leader != local && tk->leader == leader) || !is_owned(tk)) && (tk->state == ST_INIT || tk->state == ST_FOLLOWER || tk->state == ST_CANDIDATE)) { rv = process_UPDATE(tk, sender, leader, msg); } else { tk_log_warn("unexpected message %s, from %s", state_to_string(cmd), site_string(sender)); if (ticket_seems_ok(tk)) send_reject(sender, tk, RLT_TERM_STILL_VALID, msg); rv = -EINVAL; } break; case OP_REJECTED: rv = process_REJECTED(tk, sender, leader, msg); break; case OP_REVOKE: rv = process_REVOKE(tk, sender, leader, msg); break; case OP_MY_INDEX: rv = process_MY_INDEX(tk, sender, leader, msg); break; case OP_STATUS: if (!tk->in_election) rv = send_msg(OP_MY_INDEX, tk, sender, msg); break; default: tk_log_error("unknown message %s, from %s", state_to_string(cmd), site_string(sender)); rv = -EINVAL; } return rv; } diff --git a/src/raft.h b/src/raft.h index bb0c93c..00a467d 100644 --- a/src/raft.h +++ b/src/raft.h @@ -1,43 +1,43 @@ /* * Copyright (C) 2014 Philipp Marek * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. + * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #ifndef _RAFT_H #define _RAFT_H #include "booth.h" typedef enum { ST_INIT = CHAR2CONST('I', 'n', 'i', 't'), ST_FOLLOWER = CHAR2CONST('F', 'l', 'l', 'w'), ST_CANDIDATE = CHAR2CONST('C', 'n', 'd', 'i'), ST_LEADER = CHAR2CONST('L', 'e', 'a', 'd'), } server_state_e; struct ticket_config; int raft_answer(struct ticket_config *tk, struct booth_site *from, struct booth_site *leader, struct boothc_ticket_msg *msg); int new_election(struct ticket_config *tk, struct booth_site *new_leader, int update_term, cmd_reason_t reason); void elections_end(struct ticket_config *tk); #endif /* _RAFT_H */ diff --git a/src/request.c b/src/request.c index 9adc0ca..0612bff 100644 --- a/src/request.c +++ b/src/request.c @@ -1,83 +1,83 @@ /* * Copyright (C) 2015 Dejan Muhamedagic * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. + * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #include #include #include #include #include "booth.h" #include "ticket.h" #include "request.h" #include "log.h" static GList *req_l = NULL; static int req_id_cnt; /* add request to the queue; it is up to the caller to manage * memory for the three parameters */ void *add_req( struct ticket_config *tk, struct client *req_client, struct boothc_ticket_msg *msg) { struct request *rp; rp = g_new(struct request, 1); if (!rp) return NULL; rp->id = req_id_cnt++; rp->tk = tk; rp->client_fd = req_client->fd; rp->msg = msg; req_l = g_list_append(req_l, rp); return rp; } int get_req_id(const void *rp) { if (!rp) return -1; return ((struct request *)rp)->id; } static void del_req(GList *lp) { if (!lp) return; req_l = g_list_delete_link(req_l, lp); } void foreach_tkt_req(struct ticket_config *tk, req_fp f) { GList *lp, *next; struct request *rp; lp = g_list_first(req_l); while (lp) { next = g_list_next(lp); rp = (struct request *)lp->data; if (rp->tk == tk && (f)(rp->tk, rp->client_fd, rp->msg) == 0) { log_debug("remove request for client %d", rp->client_fd); del_req(lp); /* don't need this request anymore */ } lp = next; } } diff --git a/src/request.h b/src/request.h index 9ba49e7..d71a257 100644 --- a/src/request.h +++ b/src/request.h @@ -1,55 +1,55 @@ /* * Copyright (C) 2015 Dejan Muhamedagic * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. + * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #ifndef _REQUEST_H #define _REQUEST_H #include "booth.h" #include "config.h" /* Requests are coming from clients and get queued in a * round-robin queue (fixed size) * * This is one way to make the server more responsive and less * dependent on misbehaving clients. The requests are queued and * later served from the server loop. */ struct request { /** Request ID */ int id; /** The ticket. */ struct ticket_config *tk; /** The client which sent the request */ int client_fd; /** The message containing the request */ void *msg; }; typedef int (*req_fp)( struct ticket_config *, int, struct boothc_ticket_msg *); void *add_req(struct ticket_config *tk, struct client *req_client, struct boothc_ticket_msg *msg); void foreach_tkt_req(struct ticket_config *tk, req_fp f); int get_req_id(const void *rp); #endif /* _REQUEST_H */ diff --git a/src/ticket.c b/src/ticket.c index 8fc5670..afc992c 100644 --- a/src/ticket.c +++ b/src/ticket.c @@ -1,1286 +1,1286 @@ /* * Copyright (C) 2011 Jiaju Zhang * Copyright (C) 2013-2014 Philipp Marek * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. + * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #include #include #include #include #include #include #include #include #include #include "b_config.h" #include "ticket.h" #include "config.h" #include "pacemaker.h" #include "inline-fn.h" #include "log.h" #include "booth.h" #include "raft.h" #include "handler.h" #include "request.h" #define TK_LINE 256 extern int TIME_RES; /* Untrusted input, must fit (incl. \0) in a buffer of max chars. */ int check_max_len_valid(const char *s, int max) { int i; for(i=0; iticket_count; i++) { if (!strcmp(booth_conf->ticket[i].name, ticket)) { if (found) *found = booth_conf->ticket + i; return 1; } } return 0; } int check_ticket(char *ticket, struct ticket_config **found) { if (found) *found = NULL; if (!booth_conf) return 0; if (!check_max_len_valid(ticket, sizeof(booth_conf->ticket[0].name))) return 0; return find_ticket_by_name(ticket, found); } int check_site(char *site, int *is_local) { struct booth_site *node; if (!check_max_len_valid(site, sizeof(node->addr_string))) return 0; if (find_site_by_name(site, &node, 0)) { *is_local = node->local; return 1; } return 0; } /* is it safe to commit the grant? * if we didn't hear from all sites on the initial grant, we may * need to delay the commit * * TODO: investigate possibility to devise from history whether a * missing site could be holding a ticket or not */ static int ticket_dangerous(struct ticket_config *tk) { int tdiff; /* we may be invoked often, don't spam the log unnecessarily */ static int no_log_delay_msg; if (!is_time_set(&tk->delay_commit)) return 0; if (is_past(&tk->delay_commit) || all_sites_replied(tk)) { if (tk->leader == local) { tk_log_info("%s, committing to CIB", is_past(&tk->delay_commit) ? "ticket delay expired" : "all sites replied"); } time_reset(&tk->delay_commit); no_log_delay_msg = 0; return 0; } tdiff = time_left(&tk->delay_commit); tk_log_debug("delay ticket commit for another " intfmt(tdiff)); if (!no_log_delay_msg) { tk_log_info("delaying ticket commit to CIB for " intfmt(tdiff)); tk_log_info("(or all sites are reached)"); no_log_delay_msg = 1; } return 1; } int ticket_write(struct ticket_config *tk) { if (local->type != SITE) return -EINVAL; if (ticket_dangerous(tk)) return 1; if (tk->leader == local) { if (tk->state != ST_LEADER) { tk_log_info("ticket state not yet consistent, " "delaying ticket grant to CIB"); return 1; } pcmk_handler.grant_ticket(tk); } else { pcmk_handler.revoke_ticket(tk); } tk->update_cib = 0; return 0; } void save_committed_tkt(struct ticket_config *tk) { if (!tk->last_valid_tk) { tk->last_valid_tk = malloc(sizeof(struct ticket_config)); if (!tk->last_valid_tk) { log_error("out of memory"); return; } } memcpy(tk->last_valid_tk, tk, sizeof(struct ticket_config)); } static void ext_prog_failed(struct ticket_config *tk, int start_election) { /* Give it to somebody else. * Just send a VOTE_FOR message, so the * others can start elections. */ if (leader_and_valid(tk)) { save_committed_tkt(tk); reset_ticket(tk); ticket_write(tk); if (start_election) { ticket_broadcast(tk, OP_VOTE_FOR, OP_REQ_VOTE, RLT_SUCCESS, OR_LOCAL_FAIL); } } } #define attr_found(geo_ap, ap) \ ((geo_ap) && !strcmp((geo_ap)->val, (ap)->attr_val)) int check_attr_prereq(struct ticket_config *tk, grant_type_e grant_type) { GList *el; struct attr_prereq *ap; struct geo_attr *geo_ap; for (el = g_list_first(tk->attr_prereqs); el; el = g_list_next(el)) { ap = (struct attr_prereq *)el->data; if (ap->grant_type != grant_type) continue; geo_ap = (struct geo_attr *)g_hash_table_lookup(tk->attr, ap->attr_name); switch(ap->op) { case ATTR_OP_EQ: if (!attr_found(geo_ap, ap)) goto fail; break; case ATTR_OP_NE: if (attr_found(geo_ap, ap)) goto fail; break; default: break; } } return 0; fail: tk_log_warn("'%s' attr-prereq failed", ap->attr_name); return 1; } /* do we need to run the external program? * or we already done that and waiting for the outcome * or program exited and we can collect the status * return codes * 0: no program defined * RUNCMD_MORE: program forked, results later * != 0: executing program failed (or some other failure) */ static int do_ext_prog(struct ticket_config *tk, int start_election) { int rv = 0; if (!tk_test.path) return 0; switch(tk_test.progstate) { case EXTPROG_IDLE: rv = run_handler(tk); if (rv == RUNCMD_ERR) { tk_log_warn("couldn't run external test, not allowed to acquire ticket"); ext_prog_failed(tk, start_election); } break; case EXTPROG_RUNNING: /* should never get here, but just in case */ rv = RUNCMD_MORE; break; case EXTPROG_EXITED: rv = tk_test_exit_status(tk); if (rv) { ext_prog_failed(tk, start_election); } break; case EXTPROG_IGNORE: /* nothing to do here */ break; } return rv; } /* Try to acquire a ticket * Could be manual grant or after start (if the ticket is granted * and still valid in the CIB) * If the external program needs to run, this is run twice, once * to start the program, and then to get the result and start * elections. */ int acquire_ticket(struct ticket_config *tk, cmd_reason_t reason) { int rv; if (reason == OR_ADMIN && check_attr_prereq(tk, GRANT_MANUAL)) return RLT_ATTR_PREREQ; switch(do_ext_prog(tk, 0)) { case 0: /* everything fine */ break; case RUNCMD_MORE: /* need to wait for the outcome before starting elections */ return 0; default: return RLT_EXT_FAILED; } rv = new_election(tk, local, 1, reason); return rv ? RLT_SYNC_FAIL : 0; } /** Try to get the ticket for the local site. * */ int do_grant_ticket(struct ticket_config *tk, int options) { int rv; tk_log_info("granting ticket"); if (tk->leader == local) return RLT_SUCCESS; if (is_owned(tk)) return RLT_OVERGRANT; set_future_time(&tk->delay_commit, tk->term_duration + tk->acquire_after); if (options & OPT_IMMEDIATE) { tk_log_warn("granting ticket immediately! If there are " "unreachable sites, _hope_ you are sure that they don't " "have the ticket!"); time_reset(&tk->delay_commit); } rv = acquire_ticket(tk, OR_ADMIN); if (rv) { time_reset(&tk->delay_commit); return rv; } else { return RLT_MORE; } } static void start_revoke_ticket(struct ticket_config *tk) { tk_log_info("revoking ticket"); save_committed_tkt(tk); reset_ticket(tk); set_leader(tk, no_leader); ticket_write(tk); ticket_broadcast(tk, OP_REVOKE, OP_ACK, RLT_SUCCESS, OR_ADMIN); } /** Ticket revoke. * Only to be started from the leader. */ int do_revoke_ticket(struct ticket_config *tk) { if (tk->acks_expected) { tk_log_info("delay ticket revoke until the current operation finishes"); set_next_state(tk, ST_INIT); return RLT_MORE; } else { start_revoke_ticket(tk); return RLT_SUCCESS; } } int list_ticket(char **pdata, unsigned int *len) { struct ticket_config *tk; char timeout_str[64]; char pending_str[64]; char *data, *cp; int i, alloc; time_t ts; *pdata = NULL; *len = 0; alloc = 256 + booth_conf->ticket_count * (BOOTH_NAME_LEN * 2 + 128); data = malloc(alloc); if (!data) return -ENOMEM; cp = data; foreach_ticket(i, tk) { if (is_time_set(&tk->term_expires)) { ts = wall_ts(&tk->term_expires); strftime(timeout_str, sizeof(timeout_str), "%F %T", localtime(&ts)); } else strcpy(timeout_str, "INF"); if (tk->leader == local && is_time_set(&tk->delay_commit) && !is_past(&tk->delay_commit)) { ts = wall_ts(&tk->delay_commit); strcpy(pending_str, " (commit pending until "); strftime(pending_str + strlen(" (commit pending until "), sizeof(pending_str) - strlen(" (commit pending until ") - 1, "%F %T", localtime(&ts)); strcat(pending_str, ")"); } else *pending_str = '\0'; cp += snprintf(cp, alloc - (cp - data), "ticket: %s, leader: %s", tk->name, ticket_leader_string(tk)); if (is_owned(tk)) { cp += snprintf(cp, alloc - (cp - data), ", expires: %s%s\n", timeout_str, pending_str); } else { cp += snprintf(cp, alloc - (cp - data), "\n"); } if (alloc - (cp - data) <= 0) return -ENOMEM; } *pdata = data; *len = cp - data; return 0; } void disown_ticket(struct ticket_config *tk) { set_leader(tk, NULL); tk->is_granted = 0; get_time(&tk->term_expires); } int disown_if_expired(struct ticket_config *tk) { if (is_past(&tk->term_expires) || !tk->leader) { disown_ticket(tk); return 1; } return 0; } void reset_ticket(struct ticket_config *tk) { ignore_ext_test(tk); disown_ticket(tk); no_resends(tk); set_state(tk, ST_INIT); tk->voted_for = NULL; } static void log_reacquire_reason(struct ticket_config *tk) { int valid; const char *where_granted = "\0"; char buff[64]; valid = is_time_set(&tk->term_expires) && !is_past(&tk->term_expires); if (tk->leader == local) { where_granted = "granted here"; } else { snprintf(buff, sizeof(buff), "granted to %s", site_string(tk->leader)); where_granted = buff; } if (!valid) { tk_log_warn("%s, but not valid " "anymore (will try to reacquire)", where_granted); } if (tk->is_granted && tk->leader != local) { if (tk->leader && tk->leader != no_leader) { tk_log_error("granted here, but also %s, " "that's really too bad (will try to reacquire)", where_granted); } else { tk_log_warn("granted here, but we're " "not recorded as the grantee (will try to reacquire)"); } } } void update_ticket_state(struct ticket_config *tk, struct booth_site *sender) { if (tk->state == ST_CANDIDATE) { tk_log_info("learned from %s about " "newer ticket, stopping elections", site_string(sender)); /* there could be rejects coming from others; don't log * warnings unnecessarily */ tk->expect_more_rejects = 1; } if (tk->leader == local || tk->is_granted) { /* message from a live leader with valid ticket? */ if (sender == tk->leader && term_time_left(tk)) { if (tk->is_granted) { tk_log_warn("ticket was granted here, " "but it's live at %s (revoking here)", site_string(sender)); } else { tk_log_info("ticket live at %s", site_string(sender)); } disown_ticket(tk); ticket_write(tk); set_state(tk, ST_FOLLOWER); set_next_state(tk, ST_FOLLOWER); } else { if (tk->state == ST_CANDIDATE) { set_state(tk, ST_FOLLOWER); } set_next_state(tk, ST_LEADER); } } else { if (!tk->leader || tk->leader == no_leader) { if (sender) tk_log_info("ticket is not granted"); else tk_log_info("ticket is not granted (from CIB)"); set_state(tk, ST_INIT); } else { if (sender) tk_log_info("ticket granted to %s (says %s)", site_string(tk->leader), tk->leader == sender ? "they" : site_string(sender)); else tk_log_info("ticket granted to %s (from CIB)", site_string(tk->leader)); set_state(tk, ST_FOLLOWER); /* just make sure that we check the ticket soon */ set_next_state(tk, ST_FOLLOWER); } } } int setup_ticket(void) { struct ticket_config *tk; int i; foreach_ticket(i, tk) { reset_ticket(tk); if (local->type == SITE) { if (!pcmk_handler.load_ticket(tk)) { update_ticket_state(tk, NULL); } tk->update_cib = 1; } tk_log_info("broadcasting state query"); /* wait until all send their status (or the first * timeout) */ tk->start_postpone = 1; ticket_broadcast(tk, OP_STATUS, OP_MY_INDEX, RLT_SUCCESS, 0); } return 0; } int ticket_answer_list(int fd) { char *data; int olen, rv; struct boothc_hdr_msg hdr; rv = list_ticket(&data, &olen); if (rv < 0) goto out; init_header(&hdr.header, CL_LIST, 0, 0, RLT_SUCCESS, 0, sizeof(hdr) + olen); rv = send_header_plus(fd, &hdr, data, olen); out: if (data) free(data); return rv; } int process_client_request(struct client *req_client, void *buf) { int rv, rc = 1; struct ticket_config *tk; int cmd; struct boothc_ticket_msg omsg; struct boothc_ticket_msg *msg; msg = (struct boothc_ticket_msg *)buf; cmd = ntohl(msg->header.cmd); if (!check_ticket(msg->ticket.id, &tk)) { log_warn("client referenced unknown ticket %s", msg->ticket.id); rv = RLT_INVALID_ARG; goto reply_now; } if ((cmd == CMD_GRANT) && is_owned(tk)) { log_warn("client wants to grant an (already granted!) ticket %s", msg->ticket.id); rv = RLT_OVERGRANT; goto reply_now; } if ((cmd == CMD_REVOKE) && !is_owned(tk)) { log_info("client wants to revoke a free ticket %s", msg->ticket.id); rv = RLT_TICKET_IDLE; goto reply_now; } if ((cmd == CMD_REVOKE) && tk->leader != local) { tk_log_info("not granted here, redirect to %s", ticket_leader_string(tk)); rv = RLT_REDIRECT; goto reply_now; } if (cmd == CMD_REVOKE) rv = do_revoke_ticket(tk); else rv = do_grant_ticket(tk, ntohl(msg->header.options)); if (rv == RLT_MORE) { /* client may receive further notifications, save the * request for further processing */ add_req(tk, req_client, msg); tk_log_debug("queue request %s for client %d", state_to_string(cmd), req_client->fd); rc = 0; /* we're not yet done with the message */ } reply_now: init_ticket_msg(&omsg, CL_RESULT, 0, rv, 0, tk); send_client_msg(req_client->fd, &omsg); return rc; } int notify_client(struct ticket_config *tk, int client_fd, struct boothc_ticket_msg *msg) { struct boothc_ticket_msg omsg; void (*deadfn) (int ci); int rv, rc, ci; int cmd, options; struct client *req_client; cmd = ntohl(msg->header.cmd); options = ntohl(msg->header.options); rv = tk->outcome; ci = find_client_by_fd(client_fd); if (ci < 0) { tk_log_info("client %d (request %s) left before being notified", client_fd, state_to_string(cmd)); return 0; } tk_log_debug("notifying client %d (request %s)", client_fd, state_to_string(cmd)); init_ticket_msg(&omsg, CL_RESULT, 0, rv, 0, tk); rc = send_client_msg(client_fd, &omsg); if (rc == 0 && ((rv == RLT_MORE) || (rv == RLT_CIB_PENDING && (options & OPT_WAIT_COMMIT)))) { /* more to do here, keep the request */ return 1; } else { /* we sent a definite answer or there was a write error, drop * the client */ if (rc) { tk_log_debug("failed to notify client %d (request %s)", client_fd, state_to_string(cmd)); } else { tk_log_debug("client %d (request %s) got final notification", client_fd, state_to_string(cmd)); } req_client = clients + ci; deadfn = req_client->deadfn; if(deadfn) { deadfn(ci); } return 0; /* we're done with this request */ } } int ticket_broadcast(struct ticket_config *tk, cmd_request_t cmd, cmd_request_t expected_reply, cmd_result_t res, cmd_reason_t reason) { struct boothc_ticket_msg msg; init_ticket_msg(&msg, cmd, 0, res, reason, tk); tk_log_debug("broadcasting '%s' (term=%d, valid=%d)", state_to_string(cmd), ntohl(msg.ticket.term), msg_term_time(&msg)); tk->last_request = cmd; if (expected_reply) { expect_replies(tk, expected_reply); } ticket_activate_timeout(tk); return transport()->broadcast_auth(&msg, sendmsglen(&msg)); } /* update the ticket on the leader, write it to the CIB, and send out the update message to others with the new expiry time */ int leader_update_ticket(struct ticket_config *tk) { int rv = 0, rv2; timetype now; if (tk->ticket_updated >= 2) return 0; if (tk->ticket_updated < 1) { tk->ticket_updated = 1; get_time(&now); copy_time(&now, &tk->last_renewal); set_future_time(&tk->term_expires, tk->term_duration); rv = ticket_broadcast(tk, OP_UPDATE, OP_ACK, RLT_SUCCESS, 0); } if (tk->ticket_updated < 2) { rv2 = ticket_write(tk); switch(rv2) { case 0: tk->ticket_updated = 2; tk->outcome = RLT_SUCCESS; foreach_tkt_req(tk, notify_client); break; case 1: if (tk->outcome != RLT_CIB_PENDING) { tk->outcome = RLT_CIB_PENDING; foreach_tkt_req(tk, notify_client); } break; default: break; } } return rv; } static void log_lost_servers(struct ticket_config *tk) { struct booth_site *n; int i; if (tk->retry_number > 1) /* log those that we couldn't reach, but do * that only on the first retry */ return; for (i = 0; i < booth_conf->site_count; i++) { n = booth_conf->site + i; if (!(tk->acks_received & n->bitmask)) { tk_log_warn("%s %s didn't acknowledge our %s, " "will retry %d times", (n->type == ARBITRATOR ? "arbitrator" : "site"), site_string(n), state_to_string(tk->last_request), tk->retries); } } } static void resend_msg(struct ticket_config *tk) { struct booth_site *n; int i; if (!(tk->acks_received ^ local->bitmask)) { ticket_broadcast(tk, tk->last_request, 0, RLT_SUCCESS, 0); } else { for (i = 0; i < booth_conf->site_count; i++) { n = booth_conf->site + i; if (!(tk->acks_received & n->bitmask)) { n->resend_cnt++; tk_log_debug("resending %s to %s", state_to_string(tk->last_request), site_string(n) ); send_msg(tk->last_request, tk, n, NULL); } } ticket_activate_timeout(tk); } } static void handle_resends(struct ticket_config *tk) { int ack_cnt; if (++tk->retry_number > tk->retries) { tk_log_info("giving up on sending retries"); no_resends(tk); set_ticket_wakeup(tk); return; } /* try to reach some sites again if we just stepped down */ if (tk->last_request == OP_VOTE_FOR) { tk_log_warn("no answers to our VtFr request to step down (try #%d), " "we are alone", tk->retry_number); goto just_resend; } if (!majority_of_bits(tk, tk->acks_received)) { ack_cnt = count_bits(tk->acks_received) - 1; if (!ack_cnt) { tk_log_warn("no answers to our request (try #%d), " "we are alone", tk->retry_number); } else { tk_log_warn("not enough answers to our request (try #%d): " "only got %d answers", tk->retry_number, ack_cnt); } } else { log_lost_servers(tk); } just_resend: resend_msg(tk); } int postpone_ticket_processing(struct ticket_config *tk) { extern timetype start_time; return tk->start_postpone && (-time_left(&start_time) < tk->timeout); } #define has_extprog_exited(tk) ((tk)->clu_test.progstate == EXTPROG_EXITED) static void process_next_state(struct ticket_config *tk) { int rv; switch(tk->next_state) { case ST_LEADER: if (has_extprog_exited(tk)) { if (tk->state != ST_LEADER) { rv = acquire_ticket(tk, OR_ADMIN); if (rv != 0) { /* external program failed */ tk->outcome = rv; foreach_tkt_req(tk, notify_client); } } } else { log_reacquire_reason(tk); acquire_ticket(tk, OR_REACQUIRE); } break; case ST_INIT: no_resends(tk); start_revoke_ticket(tk); tk->outcome = RLT_SUCCESS; foreach_tkt_req(tk, notify_client); break; /* wanting to be follower is not much of an ambition; no * processing, just return; don't reset start_postpone until * we got some replies to status */ case ST_FOLLOWER: return; default: break; } tk->start_postpone = 0; } static void ticket_lost(struct ticket_config *tk) { int reason = OR_TKT_LOST; if (tk->leader != local) { tk_log_warn("lost at %s", site_string(tk->leader)); } else { if (is_ext_prog_running(tk)) { ext_prog_timeout(tk); reason = OR_LOCAL_FAIL; } else { tk_log_warn("lost majority (revoking locally)"); reason = tk->election_reason ? tk->election_reason : OR_REACQUIRE; } } tk->lost_leader = tk->leader; save_committed_tkt(tk); reset_ticket(tk); set_state(tk, ST_FOLLOWER); if (local->type == SITE) { ticket_write(tk); schedule_election(tk, reason); } } static void next_action(struct ticket_config *tk) { int rv; switch(tk->state) { case ST_INIT: /* init state, handle resends for ticket revoke */ /* and rebroadcast if stepping down */ /* try to acquire ticket on grant */ if (has_extprog_exited(tk)) { rv = acquire_ticket(tk, OR_ADMIN); if (rv != 0) { /* external program failed */ tk->outcome = rv; foreach_tkt_req(tk, notify_client); } } else { if (tk->acks_expected) { handle_resends(tk); } } break; case ST_FOLLOWER: /* leader/ticket lost? and we didn't vote yet */ tk_log_debug("leader: %s, voted_for: %s", site_string(tk->leader), site_string(tk->voted_for)); if (!tk->leader) { if (!tk->voted_for || !tk->in_election) { disown_ticket(tk); if (!new_election(tk, NULL, 1, OR_AGAIN)) { ticket_activate_timeout(tk); } } else { /* we should restart elections in case nothing * happens in the meantime */ tk->in_election = 0; ticket_activate_timeout(tk); } } break; case ST_CANDIDATE: /* elections timed out? */ elections_end(tk); break; case ST_LEADER: /* timeout or ticket renewal? */ if (tk->acks_expected) { handle_resends(tk); if (majority_of_bits(tk, tk->acks_received)) { leader_update_ticket(tk); } } else { /* this is ticket renewal, run local test */ if (!do_ext_prog(tk, 1)) { ticket_broadcast(tk, OP_HEARTBEAT, OP_ACK, RLT_SUCCESS, 0); tk->ticket_updated = 0; } } break; default: break; } } static void ticket_cron(struct ticket_config *tk) { /* don't process the tickets too early after start */ if (postpone_ticket_processing(tk)) { tk_log_debug("ticket processing postponed (start_postpone=%d)", tk->start_postpone); /* but run again soon */ ticket_activate_timeout(tk); return; } /* no need for status resends, we hope we got at least one * my_index back */ if (tk->acks_expected == OP_MY_INDEX) { no_resends(tk); } /* after startup, we need to decide what to do based on the * current ticket state; tk->next_state has a hint * also used for revokes which had to be delayed */ if (tk->next_state) { process_next_state(tk); goto out; } /* Has an owner, has an expiry date, and expiry date in the past? * Losing the ticket must happen in _every_ state. */ if (is_owned(tk) && is_time_set(&tk->term_expires) && is_past(&tk->term_expires)) { ticket_lost(tk); goto out; } next_action(tk); out: tk->next_state = 0; if (!tk->in_election && tk->update_cib) ticket_write(tk); } void process_tickets(void) { struct ticket_config *tk; int i; timetype last_cron; foreach_ticket(i, tk) { if (!has_extprog_exited(tk) && is_time_set(&tk->next_cron) && !is_past(&tk->next_cron)) continue; tk_log_debug("ticket cron"); copy_time(&tk->next_cron, &last_cron); ticket_cron(tk); if (time_cmp(&last_cron, &tk->next_cron, ==)) { tk_log_debug("nobody set ticket wakeup"); set_ticket_wakeup(tk); } } } void tickets_log_info(void) { struct ticket_config *tk; int i; time_t ts; foreach_ticket(i, tk) { ts = wall_ts(&tk->term_expires); tk_log_info("state '%s' " "term %d " "leader %s " "expires %-24.24s", state_to_string(tk->state), tk->current_term, ticket_leader_string(tk), ctime(&ts)); } } static void update_acks( struct ticket_config *tk, struct booth_site *sender, struct booth_site *leader, struct boothc_ticket_msg *msg ) { uint32_t cmd; uint32_t req; cmd = ntohl(msg->header.cmd); req = ntohl(msg->header.request); if (req != tk->last_request || (tk->acks_expected != cmd && tk->acks_expected != OP_REJECTED)) return; /* got an ack! */ tk->acks_received |= sender->bitmask; if (all_replied(tk) || /* we just stepped down, need only one site to start * elections */ (cmd == OP_REQ_VOTE && tk->last_request == OP_VOTE_FOR)) { no_resends(tk); tk->start_postpone = 0; set_ticket_wakeup(tk); } } /* read ticket message */ int ticket_recv(void *buf, struct booth_site *source) { struct boothc_ticket_msg *msg; struct ticket_config *tk; struct booth_site *leader; uint32_t leader_u; msg = (struct boothc_ticket_msg *)buf; if (!check_ticket(msg->ticket.id, &tk)) { log_warn("got invalid ticket name %s from %s", msg->ticket.id, site_string(source)); source->invalid_cnt++; return -EINVAL; } leader_u = ntohl(msg->ticket.leader); if (!find_site_by_id(leader_u, &leader)) { tk_log_error("message with unknown leader %u received", leader_u); source->invalid_cnt++; return -EINVAL; } update_acks(tk, source, leader, msg); return raft_answer(tk, source, leader, msg); } static void log_next_wakeup(struct ticket_config *tk) { int left; left = time_left(&tk->next_cron); tk_log_debug("set ticket wakeup in " intfmt(left)); } /* New vote round; §5.2 */ /* delay the next election start for some random time * (up to 1 second) */ void add_random_delay(struct ticket_config *tk) { timetype tv; interval_add(&tk->next_cron, rand_time(min(1000, tk->timeout)), &tv); ticket_next_cron_at(tk, &tv); if (ANYDEBUG) { log_next_wakeup(tk); } } void set_ticket_wakeup(struct ticket_config *tk) { timetype near_future, tv, next_vote; /* At least every hour, perhaps sooner (default) */ ticket_next_cron_in(tk, 3600*TIME_RES); set_future_time(&near_future, 10); switch (tk->state) { case ST_LEADER: assert(tk->leader == local); get_next_election_time(tk, &next_vote); /* If timestamp is in the past, wakeup in * near future */ if (!is_time_set(&next_vote)) { tk_log_debug("next ts unset, wakeup soon"); ticket_next_cron_at(tk, &near_future); } else if (is_past(&next_vote)) { int tdiff = time_left(&next_vote); tk_log_debug("next ts in the past " intfmt(tdiff)); ticket_next_cron_at(tk, &near_future); } else { ticket_next_cron_at(tk, &next_vote); } break; case ST_CANDIDATE: assert(is_time_set(&tk->election_end)); ticket_next_cron_at(tk, &tk->election_end); break; case ST_INIT: case ST_FOLLOWER: /* If there is (or should be) some owner, check on it later on. * If no one is interested - don't care. */ if (is_owned(tk)) { interval_add(&tk->term_expires, tk->acquire_after, &tv); ticket_next_cron_at(tk, &tv); } break; default: tk_log_error("unknown ticket state: %d", tk->state); } if (tk->next_state) { /* we need to do something soon here */ if (!tk->acks_expected) { ticket_next_cron_at(tk, &near_future); } else { ticket_activate_timeout(tk); } } if (ANYDEBUG) { log_next_wakeup(tk); } } void schedule_election(struct ticket_config *tk, cmd_reason_t reason) { if (local->type != SITE) return; tk->election_reason = reason; get_time(&tk->next_cron); /* introduce a short delay before starting election */ add_random_delay(tk); } /* Given a state (in host byte order), return a human-readable (char*). * An array is used so that multiple states can be printed in a single printf(). */ char *state_to_string(uint32_t state_ho) { union mu { cmd_request_t s; char c[5]; }; static union mu cache[6] = { { 0 } }, *cur; static int current = 0; current ++; if (current >= sizeof(cache)/sizeof(cache[0])) current = 0; cur = cache + current; cur->s = htonl(state_ho); /* Shouldn't be necessary, union array is initialized with zeroes, and * these bytes never get written. */ cur->c[4] = 0; return cur->c; } int send_reject(struct booth_site *dest, struct ticket_config *tk, cmd_result_t code, struct boothc_ticket_msg *in_msg) { int req = ntohl(in_msg->header.cmd); struct boothc_ticket_msg msg; tk_log_debug("sending reject to %s", site_string(dest)); init_ticket_msg(&msg, OP_REJECTED, req, code, 0, tk); return booth_udp_send_auth(dest, &msg, sendmsglen(&msg)); } int send_msg ( int cmd, struct ticket_config *tk, struct booth_site *dest, struct boothc_ticket_msg *in_msg ) { int req = 0; struct ticket_config *valid_tk = tk; struct boothc_ticket_msg msg; /* if we want to send the last valid ticket, then if we're in * the ST_CANDIDATE state, the last valid ticket is in * tk->last_valid_tk */ if (cmd == OP_MY_INDEX) { if (tk->state == ST_CANDIDATE && tk->last_valid_tk) { valid_tk = tk->last_valid_tk; } tk_log_info("sending status to %s", site_string(dest)); } if (in_msg) req = ntohl(in_msg->header.cmd); init_ticket_msg(&msg, cmd, req, RLT_SUCCESS, 0, valid_tk); return booth_udp_send_auth(dest, &msg, sendmsglen(&msg)); } diff --git a/src/ticket.h b/src/ticket.h index 1e1ffa9..b3b758d 100644 --- a/src/ticket.h +++ b/src/ticket.h @@ -1,130 +1,130 @@ /* * Copyright (C) 2011 Jiaju Zhang * Copyright (C) 2013-2014 Philipp Marek * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. + * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #ifndef _TICKET_H #define _TICKET_H #include #include #include #include "timer.h" #include "config.h" #include "log.h" extern int TIME_RES; #define DEFAULT_TICKET_EXPIRY (600*TIME_RES) #define DEFAULT_TICKET_TIMEOUT (5*TIME_RES) #define DEFAULT_RETRIES 10 #define foreach_ticket(i_,t_) for(i=0; (t_=booth_conf->ticket+i, iticket_count); i++) #define foreach_node(i_,n_) for(i=0; (n_=booth_conf->site+i, isite_count); i++) #define set_leader(tk, who) do { \ tk->leader = who; \ tk_log_debug("ticket leader set to %s", ticket_leader_string(tk)); \ } while(0) #define set_state(tk, newst) do { \ tk_log_debug("state transition: %s -> %s", \ state_to_string(tk->state), state_to_string(newst)); \ tk->state = newst; \ } while(0) #define set_next_state(tk, newst) do { \ if (!(newst)) tk_log_debug("next state reset"); \ else tk_log_debug("next state set to %s", state_to_string(newst)); \ tk->next_state = newst; \ } while(0) #define is_term_invalid(tk, term) \ ((tk)->last_valid_tk && (tk)->last_valid_tk->current_term > (term)) void save_committed_tkt(struct ticket_config *tk); void disown_ticket(struct ticket_config *tk); int disown_if_expired(struct ticket_config *tk); int check_ticket(char *ticket, struct ticket_config **tc); int check_site(char *site, int *local); int grant_ticket(struct ticket_config *ticket); int revoke_ticket(struct ticket_config *ticket); int list_ticket(char **pdata, unsigned int *len); int ticket_recv(void *buf, struct booth_site *source); void reset_ticket(struct ticket_config *tk); void update_ticket_state(struct ticket_config *tk, struct booth_site *sender); int setup_ticket(void); int check_max_len_valid(const char *s, int max); int do_grant_ticket(struct ticket_config *ticket, int options); int do_revoke_ticket(struct ticket_config *tk); int find_ticket_by_name(const char *ticket, struct ticket_config **found); void set_ticket_wakeup(struct ticket_config *tk); int postpone_ticket_processing(struct ticket_config *tk); int acquire_ticket(struct ticket_config *tk, cmd_reason_t reason); int ticket_answer_list(int fd); int process_client_request(struct client *req_client, void *buf); int ticket_write(struct ticket_config *tk); void process_tickets(void); void tickets_log_info(void); char *state_to_string(uint32_t state_ho); int send_reject(struct booth_site *dest, struct ticket_config *tk, cmd_result_t code, struct boothc_ticket_msg *in_msg); int send_msg (int cmd, struct ticket_config *tk, struct booth_site *dest, struct boothc_ticket_msg *in_msg); int notify_client(struct ticket_config *tk, int client_fd, struct boothc_ticket_msg *msg); int ticket_broadcast(struct ticket_config *tk, cmd_request_t cmd, cmd_request_t expected_reply, cmd_result_t res, cmd_reason_t reason); int leader_update_ticket(struct ticket_config *tk); void add_random_delay(struct ticket_config *tk); void schedule_election(struct ticket_config *tk, cmd_reason_t reason); int check_attr_prereq(struct ticket_config *tk, grant_type_e grant_type); static inline void ticket_next_cron_at(struct ticket_config *tk, timetype *when) { copy_time(when, &tk->next_cron); } static inline void ticket_next_cron_in(struct ticket_config *tk, int interval) { timetype tv; set_future_time(&tv, interval); ticket_next_cron_at(tk, &tv); } static inline void ticket_activate_timeout(struct ticket_config *tk) { /* TODO: increase timeout when no answers */ tk_log_debug("activate ticket timeout in %d", tk->timeout); ticket_next_cron_in(tk, tk->timeout); } #endif /* _TICKET_H */ diff --git a/src/timer.c b/src/timer.c index d51e807..58ba24f 100644 --- a/src/timer.c +++ b/src/timer.c @@ -1,181 +1,181 @@ /* * Copyright (C) 2014 Dejan Muhamedagic * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. + * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #include "timer.h" /* which time resolution makes most sense? * the factors are clock resolution and network latency */ int TIME_RES = 1000; int TIME_MULT = 1; int time_sub_int(timetype *a, timetype *b) { timetype res; time_sub(a, b, &res); return res.tv_sec*TIME_RES + res.SUBSEC/TIME_FAC; } /* interval (b) is in ms (1/TIME_RES) */ void interval_add(timetype *a, int b, timetype *res) { /* need this to allow interval_add(a, b, a); */ long tmp_subsec = a->SUBSEC + (long)b*TIME_FAC; res->SUBSEC = tmp_subsec%NSECS; res->tv_sec = a->tv_sec + tmp_subsec/NSECS; } int is_time_set(timetype *p) { return (p->tv_sec != 0) || (p->SUBSEC != 0); } int is_past(timetype *p) { timetype now; /*if (!is_time_set(p)) return 1;*/ assert(p->tv_sec || p->SUBSEC); get_time(&now); return time_cmp(&now, p, >); } void secs2tv(time_t secs, timetype *p) { memset(p, 0, sizeof(timetype)); p->tv_sec = secs; } int time_left(timetype *p) { timetype now; assert(p->tv_sec || p->SUBSEC); get_time(&now); return time_sub_int(p, &now); } void set_future_time(timetype *a, int b) { timetype now; get_time(&now); interval_add(&now, b, a); } void time_reset(timetype *p) { memset(p, 0, sizeof(timetype)); } void copy_time(timetype *src, timetype *dst) { dst->SUBSEC = src->SUBSEC; dst->tv_sec = src->tv_sec; } #if _POSIX_TIMERS > 0 void time_sub(struct timespec *a, struct timespec *b, struct timespec *res) { if (a->tv_nsec < b->tv_nsec) { res->tv_sec = a->tv_sec - b->tv_sec - 1L; res->tv_nsec = a->tv_nsec + (NSECS - b->tv_nsec); } else { res->tv_sec = a->tv_sec - b->tv_sec; res->tv_nsec = a->tv_nsec - b->tv_nsec; } } void time_add(struct timespec *a, struct timespec *b, struct timespec *res) { res->tv_nsec = a->tv_nsec + b->tv_nsec; res->tv_sec = a->tv_sec + b->tv_sec; if (res->tv_nsec >= NSECS) { res->tv_sec++; res->tv_nsec %= NSECS; } } time_t get_secs(struct timespec *p) { if (p) { get_time(p); return p->tv_sec; } else { struct timespec tv; get_time(&tv); return tv.tv_sec; } } /* time booth_clk_t is a time since boot or similar, convert that * to time since epoch (Jan 1, 1970) */ static void clock2epochtime(struct timespec *booth_clk_t, struct timespec *res) { struct timespec booth_clk_now, now_tv; struct timeval now; get_time(&booth_clk_now); gettimeofday(&now, NULL); TIMEVAL_TO_TIMESPEC(&now, &now_tv); time_sub(&now_tv, &booth_clk_now, res); time_add(booth_clk_t, res, res); } /* time booth_clk_t is a time since boot or similar, return * something humans can understand (rounded seconds only) */ time_t wall_ts(struct timespec *booth_clk_t) { struct timespec res; clock2epochtime(booth_clk_t, &res); return round2secs(&res); } /* time booth_clk_t is a time since boot or similar, get here * seconds since epoch */ time_t secs_since_epoch(struct timespec *booth_clk_t) { struct timespec res; clock2epochtime(booth_clk_t, &res); return res.tv_sec; } /* time t is wall clock time, convert to time compatible * with our clock_gettime clock */ time_t unwall_ts(time_t t) { struct timespec booth_clk_now, now_tv, res; struct timeval now; get_time(&booth_clk_now); gettimeofday(&now, NULL); TIMEVAL_TO_TIMESPEC(&now, &now_tv); time_sub(&now_tv, &booth_clk_now, &res); return t - res.tv_sec; } #endif diff --git a/src/timer.h b/src/timer.h index 7d1120b..744120c 100644 --- a/src/timer.h +++ b/src/timer.h @@ -1,96 +1,96 @@ /* * Copyright (C) 2014 Dejan Muhamedagic * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. + * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #ifndef _TIMER_H #define _TIMER_H #include #include #include #include #include #if _POSIX_TIMERS > 0 #if defined(CLOCK_MONOTONIC) # define BOOTH_CLOCK CLOCK_MONOTONIC #else # define BOOTH_CLOCK CLOCK_REALTIME #endif #define NSECS 1000000000L /* nanoseconds */ #define TIME_FAC (NSECS/TIME_RES) #define SUBSEC tv_nsec #define SUBSEC_FAC NSECS typedef struct timespec timetype; #define get_time(p) clock_gettime(BOOTH_CLOCK, p) #define get_usecs(p) ((p)->tv_nsec/1000L) #define time_cmp(a, b, CMP) \ (((a)->tv_sec == (b)->tv_sec) ? \ ((a)->tv_nsec CMP (b)->tv_nsec) : \ ((a)->tv_sec CMP (b)->tv_sec)) void time_sub(struct timespec *a, struct timespec *b, struct timespec *res); void time_add(struct timespec *a, struct timespec *b, struct timespec *res); time_t get_secs(struct timespec *p); time_t wall_ts(struct timespec *p); time_t secs_since_epoch(struct timespec *booth_clk_t); time_t unwall_ts(time_t t); #else #define MUSECS 1000000L /* microseconds */ #define SUBSEC_FAC MUSECS #define TIME_FAC (MUSECS/TIME_RES) #define SUBSEC tv_usec typedef struct timeval timetype; #define get_time(p) gettimeofday(p, NULL) #define secs_since_epoch(p) ((p)->tv_sec) #define get_usecs(p) ((p)->tv_usec) #define time_sub timersub #define time_add timeradd #define time_cmp timercmp #define get_secs time #define wall_ts round2secs #define unwall_ts(t) (t) #endif int is_past(timetype *p); void secs2tv(time_t secs, timetype *p); void time_reset(timetype *p); int time_sub_int(timetype *a, timetype *b); void set_future_time(timetype *a, int b); int time_left(timetype *p); void copy_time(timetype *src, timetype *dst); void interval_add(timetype *p, int interval, timetype *res); int is_time_set(timetype *p); #define intfmt(t) "%d.%03d", (t)/TIME_RES, ((t)<0?-(t):(t))%TIME_RES /* random time from 0 to t ms (1/TIME_RES) */ #define rand_time(t) cl_rand_from_interval(0, t*(TIME_RES/1000)) #define round2secs(p) \ ((p)->tv_sec + ((p)->SUBSEC + SUBSEC_FAC/2)/SUBSEC_FAC) #endif diff --git a/src/transport.c b/src/transport.c index c5b4ad9..13d9d03 100644 --- a/src/transport.c +++ b/src/transport.c @@ -1,1094 +1,1094 @@ /* * Copyright (C) 2011 Jiaju Zhang * Copyright (C) 2013-2014 Philipp Marek * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. + * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include "b_config.h" #include "booth.h" #include "inline-fn.h" #include "log.h" #include "config.h" #include "ticket.h" #include "transport.h" #include "auth.h" #include "attr.h" #define BOOTH_IPADDR_LEN (sizeof(struct in6_addr)) #define NETLINK_BUFSIZE 16384 #define SOCKET_BUFFER_SIZE 160000 #define FRAME_SIZE_MAX 10000 struct booth_site *local = NULL; static int (*deliver_fn) (void *msg, int msglen); static void parse_rtattr(struct rtattr *tb[], int max, struct rtattr *rta, int len) { while (RTA_OK(rta, len)) { if (rta->rta_type <= max) tb[rta->rta_type] = rta; rta = RTA_NEXT(rta,len); } } enum match_type { NO_MATCH = 0, FUZZY_MATCH, EXACT_MATCH, }; static int find_address(unsigned char ipaddr[BOOTH_IPADDR_LEN], int family, int prefixlen, int fuzzy_allowed, struct booth_site **me, int *address_bits_matched) { int i; struct booth_site *node; int bytes, bits_left, mask; unsigned char node_bits, ip_bits; uint8_t *n_a; int matched; enum match_type did_match = NO_MATCH; bytes = prefixlen / 8; bits_left = prefixlen % 8; /* One bit left to check means ignore 7 lowest bits. */ mask = ~( (1 << (8 - bits_left)) -1); for (i = 0; i < booth_conf->site_count; i++) { node = booth_conf->site + i; if (family != node->family) continue; n_a = node_to_addr_pointer(node); for(matched = 0; matched < node->addrlen; matched++) if (ipaddr[matched] != n_a[matched]) break; if (matched == node->addrlen) { /* Exact match. */ exact_match: *address_bits_matched = matched * 8; *me = node; did_match = EXACT_MATCH; break; } if (!fuzzy_allowed) continue; /* Check prefix, whole bytes */ if (matched < bytes) continue; if (matched * 8 < *address_bits_matched) continue; if (!bits_left) goto exact_match; node_bits = n_a[bytes]; ip_bits = ipaddr[bytes]; if (((node_bits ^ ip_bits) & mask) == 0) { /* _At_least_ prefixlen bits matched. */ if (did_match < EXACT_MATCH) { *address_bits_matched = prefixlen; *me = node; did_match = FUZZY_MATCH; } } } return did_match; } int _find_myself(int family, struct booth_site **mep, int fuzzy_allowed); int _find_myself(int family, struct booth_site **mep, int fuzzy_allowed) { int fd; struct sockaddr_nl nladdr; struct booth_site *me; unsigned char ipaddr[BOOTH_IPADDR_LEN]; static char rcvbuf[NETLINK_BUFSIZE]; struct { struct nlmsghdr nlh; struct rtgenmsg g; } req; int address_bits_matched; if (local) goto found; me = NULL; address_bits_matched = 0; if (mep) *mep = NULL; fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); if (fd < 0) { log_error("failed to create netlink socket"); return 0; } setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &rcvbuf, sizeof(rcvbuf)); memset(&nladdr, 0, sizeof(nladdr)); nladdr.nl_family = AF_NETLINK; memset(&req, 0, sizeof(req)); req.nlh.nlmsg_len = sizeof(req); req.nlh.nlmsg_type = RTM_GETADDR; req.nlh.nlmsg_flags = NLM_F_ROOT|NLM_F_MATCH|NLM_F_REQUEST; req.nlh.nlmsg_pid = 0; req.nlh.nlmsg_seq = 1; req.g.rtgen_family = family; if (sendto(fd, (void *)&req, sizeof(req), 0, (struct sockaddr*)&nladdr, sizeof(nladdr)) < 0) { close(fd); log_error("failed to send data to netlink socket"); return 0; } while (1) { int status; struct nlmsghdr *h; struct iovec iov = { rcvbuf, sizeof(rcvbuf) }; struct msghdr msg = { (void *)&nladdr, sizeof(nladdr), &iov, 1, NULL, 0, 0 }; status = recvmsg(fd, &msg, 0); if (!status) { close(fd); log_error("failed to recvmsg from netlink socket"); return 0; } h = (struct nlmsghdr *)rcvbuf; if (h->nlmsg_type == NLMSG_DONE) break; if (h->nlmsg_type == NLMSG_ERROR) { close(fd); log_error("netlink socket recvmsg error"); return 0; } while (NLMSG_OK(h, status)) { if (h->nlmsg_type == RTM_NEWADDR) { struct ifaddrmsg *ifa = NLMSG_DATA(h); struct rtattr *tb[IFA_MAX+1]; int len = h->nlmsg_len - NLMSG_LENGTH(sizeof(*ifa)); memset(tb, 0, sizeof(tb)); parse_rtattr(tb, IFA_MAX, IFA_RTA(ifa), len); memset(ipaddr, 0, BOOTH_IPADDR_LEN); /* prefer IFA_LOCAL if it exists, for p-t-p * interfaces, otherwise use IFA_ADDRESS */ if (tb[IFA_LOCAL]) { memcpy(ipaddr, RTA_DATA(tb[IFA_LOCAL]), BOOTH_IPADDR_LEN); } else { memcpy(ipaddr, RTA_DATA(tb[IFA_ADDRESS]), BOOTH_IPADDR_LEN); } /* First try with exact addresses, then optionally with subnet matching. */ if (ifa->ifa_prefixlen > address_bits_matched) { find_address(ipaddr, ifa->ifa_family, ifa->ifa_prefixlen, fuzzy_allowed, &me, &address_bits_matched); if (me) { log_debug("found myself at %s (%d bits matched)", site_string(me), address_bits_matched); } } } h = NLMSG_NEXT(h, status); } } close(fd); if (!me) return 0; me->local = 1; local = me; found: if (mep) *mep = local; return 1; } int find_myself(struct booth_site **mep, int fuzzy_allowed) { return _find_myself(AF_INET6, mep, fuzzy_allowed) || _find_myself(AF_INET, mep, fuzzy_allowed); } /** Checks the header fields for validity. * cf. init_header(). * For @len_incl_data < 0 the length is not checked. * Return <0 if error, else bytes read. */ int check_boothc_header(struct boothc_header *h, int len_incl_data) { int l; if (h->magic != htonl(BOOTHC_MAGIC)) { log_error("magic error %x", ntohl(h->magic)); return -EINVAL; } if (h->version != htonl(BOOTHC_VERSION)) { log_error("version error %x", ntohl(h->version)); return -EINVAL; } l = ntohl(h->length); if (l < sizeof(*h)) { log_error("length %d out of range", l); return -EINVAL; } if (len_incl_data < 0) return 0; if (l != len_incl_data) { log_error("length error - got %d, wanted %d", len_incl_data, l); return -EINVAL; } return len_incl_data; } static int do_read(int fd, void *buf, size_t count) { int rv, off = 0; while (off < count) { rv = read(fd, (char *)buf + off, count - off); if (rv == 0) return -1; if (rv == -1 && errno == EINTR) continue; if (rv == -1 && errno == EWOULDBLOCK) break; if (rv == -1) return -1; off += rv; } return off; } static int do_write(int fd, void *buf, size_t count) { int rv, off = 0; retry: rv = send(fd, (char *)buf + off, count, MSG_NOSIGNAL); if (rv == -1 && errno == EINTR) goto retry; /* If we cannot write _any_ data, we'd be in an (potential) loop. */ if (rv <= 0) { log_error("send failed: %s (%d)", strerror(errno), errno); return rv; } if (rv != count) { count -= rv; off += rv; goto retry; } return 0; } /* Only used for client requests (tcp) */ int read_client(struct client *req_cl) { char *msg; struct boothc_header *header; int rv, fd; int len = MAX_MSG_LEN; if (!req_cl->msg) { msg = malloc(MAX_MSG_LEN); if (!msg) { log_error("out of memory for client messages"); return -1; } req_cl->msg = (void *)msg; } else { msg = (char *)req_cl->msg; } header = (struct boothc_header *)msg; /* update len if we read enough */ if (req_cl->offset >= sizeof(header)) { len = min(ntohl(header->length), MAX_MSG_LEN); } fd = req_cl->fd; rv = do_read(fd, msg+req_cl->offset, len-req_cl->offset); if (rv < 0) { if (errno == ECONNRESET) log_debug("client connection reset for fd %d", fd); return -1; } req_cl->offset += rv; /* update len if we read enough */ if (req_cl->offset >= sizeof(header)) { len = min(ntohl(header->length), MAX_MSG_LEN); } if (req_cl->offset < len) { /* client promised to send more */ return 1; } if (check_boothc_header(header, len) < 0) { return -1; } return 0; } /* Only used for client requests (tcp) */ static void process_connection(int ci) { struct client *req_cl; void *msg = NULL; struct boothc_header *header; struct boothc_hdr_msg err_reply; cmd_result_t errc; void (*deadfn) (int ci); req_cl = clients + ci; switch (read_client(req_cl)) { case -1: /* error */ goto kill; case 1: /* more to read */ return; case 0: /* we can process the request now */ msg = req_cl->msg; } header = (struct boothc_header *)msg; if (check_auth(NULL, msg, ntohl(header->length))) { errc = RLT_AUTH; goto send_err; } /* For CMD_GRANT and CMD_REVOKE: * Don't close connection immediately, but send * result a second later? */ switch (ntohl(header->cmd)) { case CMD_LIST: ticket_answer_list(req_cl->fd); goto kill; case CMD_PEERS: list_peers(req_cl->fd); goto kill; case CMD_GRANT: case CMD_REVOKE: if (process_client_request(req_cl, msg) == 1) goto kill; /* request processed definitely, close connection */ else return; case ATTR_LIST: case ATTR_GET: case ATTR_SET: case ATTR_DEL: if (process_attr_request(req_cl, msg) == 1) goto kill; /* request processed definitely, close connection */ else return; default: log_error("connection %d cmd %x unknown", ci, ntohl(header->cmd)); errc = RLT_INVALID_ARG; goto send_err; } assert(0); return; send_err: init_header(&err_reply.header, CL_RESULT, 0, 0, errc, 0, sizeof(err_reply)); send_client_msg(req_cl->fd, &err_reply); kill: deadfn = req_cl->deadfn; if(deadfn) { deadfn(ci); } return; } static void process_tcp_listener(int ci) { int fd, i, flags, one = 1; socklen_t addrlen = sizeof(struct sockaddr); struct sockaddr addr; fd = accept(clients[ci].fd, &addr, &addrlen); if (fd < 0) { log_error("process_tcp_listener: accept error %d %d", fd, errno); return; } setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, (char *)&one, sizeof(one)); flags = fcntl(fd, F_GETFL, 0); fcntl(fd, F_SETFL, flags | O_NONBLOCK); i = client_add(fd, clients[ci].transport, process_connection, NULL); log_debug("client connection %d fd %d", i, fd); } int setup_tcp_listener(int test_only) { int s, rv; int one = 1; s = socket(local->family, SOCK_STREAM, 0); if (s == -1) { log_error("failed to create tcp socket %s", strerror(errno)); return s; } rv = setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (char *)&one, sizeof(one)); if (rv == -1) { log_error("failed to set the SO_REUSEADDR option"); return rv; } rv = bind(s, &local->sa6, local->saddrlen); if (test_only) { rv = (rv == -1) ? errno : 0; close(s); return rv; } if (rv == -1) { log_error("failed to bind socket %s", strerror(errno)); return rv; } rv = listen(s, 5); if (rv == -1) { log_error("failed to listen on socket %s", strerror(errno)); return rv; } return s; } static int booth_tcp_init(void * unused __attribute__((unused))) { int rv; if (get_local_id() < 0) return -1; rv = setup_tcp_listener(0); if (rv < 0) return rv; client_add(rv, booth_transport + TCP, process_tcp_listener, NULL); return 0; } static int connect_nonb(int sockfd, const struct sockaddr *saptr, socklen_t salen, int sec) { int flags, n, error; socklen_t len; fd_set rset, wset; struct timeval tval; flags = fcntl(sockfd, F_GETFL, 0); fcntl(sockfd, F_SETFL, flags | O_NONBLOCK); error = 0; if ( (n = connect(sockfd, saptr, salen)) < 0) if (errno != EINPROGRESS) return -1; if (n == 0) goto done; /* connect completed immediately */ FD_ZERO(&rset); FD_SET(sockfd, &rset); wset = rset; tval.tv_sec = sec; tval.tv_usec = 0; if ((n = select(sockfd + 1, &rset, &wset, NULL, sec ? &tval : NULL)) == 0) { /* leave outside function to close */ /* timeout */ /* close(sockfd); */ errno = ETIMEDOUT; return -1; } if (FD_ISSET(sockfd, &rset) || FD_ISSET(sockfd, &wset)) { len = sizeof(error); if (getsockopt(sockfd, SOL_SOCKET, SO_ERROR, &error, &len) < 0) return -1; /* Solaris pending error */ } else { log_error("select error: sockfd not set"); return -1; } done: fcntl(sockfd, F_SETFL, flags); /* restore file status flags */ if (error) { /* leave outside function to close */ /* close(sockfd); */ errno = error; return -1; } return 0; } int booth_tcp_open(struct booth_site *to) { int s, rv; if (to->tcp_fd >= STDERR_FILENO) goto found; s = socket(to->family, SOCK_STREAM, 0); if (s == -1) { log_error("cannot create socket of family %d", to->family); return -1; } rv = connect_nonb(s, (struct sockaddr *)&to->sa6, to->saddrlen, 10); if (rv == -1) { if( errno == ETIMEDOUT) log_error("connect to %s got a timeout", site_string(to)); else log_error("connect to %s got an error: %s", site_string(to), strerror(errno)); goto error; } to->tcp_fd = s; found: return 1; error: if (s >= 0) close(s); return -1; } int booth_tcp_send(struct booth_site *to, void *buf, int len) { int rv; rv = add_hmac(buf, len); if (!rv) rv = do_write(to->tcp_fd, buf, len); return rv; } static int booth_tcp_recv(struct booth_site *from, void *buf, int len) { int got; /* Needs timeouts! */ got = do_read(from->tcp_fd, buf, len); if (got < 0) { log_error("read failed (%d): %s", errno, strerror(errno)); return got; } return got; } static int booth_tcp_recv_auth(struct booth_site *from, void *buf, int len) { int got, total; int payload_len; /* Needs timeouts! */ payload_len = len - sizeof(struct hmac); got = booth_tcp_recv(from, buf, payload_len); if (got < 0) { return got; } total = got; if (is_auth_req()) { got = booth_tcp_recv(from, (unsigned char *)buf+payload_len, sizeof(struct hmac)); if (got != sizeof(struct hmac) || check_auth(from, buf, len)) { return -1; } total += got; } return total; } static int booth_tcp_close(struct booth_site *to) { if (to) { if (to->tcp_fd > STDERR_FILENO) close(to->tcp_fd); to->tcp_fd = -1; } return 0; } static int booth_tcp_exit(void) { return 0; } static int setup_udp_server(void) { int rv, fd; int one = 1; unsigned int recvbuf_size; fd = socket(local->family, SOCK_DGRAM, 0); if (fd == -1) { log_error("failed to create UDP socket %s", strerror(errno)); goto ex; } rv = fcntl(fd, F_SETFL, O_NONBLOCK); if (rv == -1) { log_error("failed to set non-blocking operation " "on UDP socket: %s", strerror(errno)); goto ex; } rv = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (char *)&one, sizeof(one)); if (rv == -1) { log_error("failed to set the SO_REUSEADDR option"); goto ex; } rv = bind(fd, (struct sockaddr *)&local->sa6, local->saddrlen); if (rv == -1) { log_error("failed to bind UDP socket to [%s]:%d: %s", site_string(local), booth_conf->port, strerror(errno)); goto ex; } recvbuf_size = SOCKET_BUFFER_SIZE; rv = setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &recvbuf_size, sizeof(recvbuf_size)); if (rv == -1) { log_error("failed to set recvbuf size"); goto ex; } local->udp_fd = fd; return 0; ex: if (fd >= 0) close(fd); return -1; } /* Receive/process callback for UDP */ static void process_recv(int ci) { struct sockaddr_storage sa; int rv; socklen_t sa_len; /* beware, the buffer needs to be large enought to accept a * packet */ char buffer[MAX_MSG_LEN]; /* Used for unit tests */ struct boothc_ticket_msg *msg; sa_len = sizeof(sa); msg = (void*)buffer; rv = recvfrom(clients[ci].fd, buffer, sizeof(buffer), MSG_NOSIGNAL | MSG_DONTWAIT, (struct sockaddr *)&sa, &sa_len); if (rv == -1) return; deliver_fn((void*)msg, rv); } static int booth_udp_init(void *f) { int rv; rv = setup_udp_server(); if (rv < 0) return rv; deliver_fn = f; client_add(local->udp_fd, booth_transport + UDP, process_recv, NULL); return 0; } int booth_udp_send(struct booth_site *to, void *buf, int len) { int rv; to->sent_cnt++; rv = sendto(local->udp_fd, buf, len, MSG_NOSIGNAL, (struct sockaddr *)&to->sa6, to->saddrlen); if (rv == len) { rv = 0; } else if (rv < 0) { to->sent_err_cnt++; log_error("Cannot send to %s: %d %s", site_string(to), errno, strerror(errno)); } else { rv = -1; to->sent_err_cnt++; log_error("Packet sent to %s got truncated", site_string(to)); } return rv; } int booth_udp_send_auth(struct booth_site *to, void *buf, int len) { int rv; rv = add_hmac(buf, len); if (rv < 0) return rv; return booth_udp_send(to, buf, len); } static int booth_udp_broadcast_auth(void *buf, int len) { int i, rv, rvs; struct booth_site *site; if (!booth_conf || !booth_conf->site_count) return -1; rv = add_hmac(buf, len); if (rv < 0) return rv; rvs = 0; foreach_node(i, site) { if (site != local) { rv = booth_udp_send(site, buf, len); if (!rvs) rvs = rv; } } return rvs; } static int booth_udp_exit(void) { return 0; } /* SCTP transport layer has not been developed yet */ static int booth_sctp_init(void *f __attribute__((unused))) { return 0; } static int booth_sctp_send(struct booth_site * to __attribute__((unused)), void *buf __attribute__((unused)), int len __attribute__((unused))) { return 0; } static int booth_sctp_broadcast(void *buf __attribute__((unused)), int len __attribute__((unused))) { return 0; } static int return_0_booth_site(struct booth_site *v __attribute((unused))) { return 0; } static int return_0(void) { return 0; } const struct booth_transport booth_transport[TRANSPORT_ENTRIES] = { [TCP] = { .name = "TCP", .init = booth_tcp_init, .open = booth_tcp_open, .send = booth_tcp_send, .recv = booth_tcp_recv, .recv_auth = booth_tcp_recv_auth, .close = booth_tcp_close, .exit = booth_tcp_exit }, [UDP] = { .name = "UDP", .init = booth_udp_init, .open = return_0_booth_site, .send = booth_udp_send, .send_auth = booth_udp_send_auth, .close = return_0_booth_site, .broadcast_auth = booth_udp_broadcast_auth, .exit = booth_udp_exit }, [SCTP] = { .name = "SCTP", .init = booth_sctp_init, .open = return_0_booth_site, .send = booth_sctp_send, .broadcast = booth_sctp_broadcast, .exit = return_0, } }; /* data + (datalen-sizeof(struct hmac)) points to struct hmac * i.e. struct hmac is always tacked on the payload */ int add_hmac(void *data, int len) { int rv = 0; #if HAVE_LIBGCRYPT || HAVE_LIBMHASH int payload_len; struct hmac *hp; if (!is_auth_req()) return 0; payload_len = len - sizeof(struct hmac); hp = (struct hmac *)((unsigned char *)data + payload_len); hp->hid = htonl(BOOTH_HASH); memset(hp->hash, 0, BOOTH_MAC_SIZE); rv = calc_hmac(data, payload_len, BOOTH_HASH, hp->hash, booth_conf->authkey, booth_conf->authkey_len); if (rv < 0) { log_error("internal error: cannot calculate mac"); } #endif return rv; } #if HAVE_LIBGCRYPT || HAVE_LIBMHASH /* TODO: we need some client identification for logging */ #define peer_string(p) (p ? site_string(p) : "client") /* verify the validity of timestamp from the header * the timestamp needs to be either greater than the one already * recorded for the site or, and this is checked for clients, * not to be older than booth_conf->maxtimeskew * update the timestamp for the site, if this packet is from a * site */ static int verify_ts(struct booth_site *from, void *buf, int len) { struct boothc_header *h; struct timeval tv, curr_tv, now; if (len < sizeof(*h)) { log_error("%s: packet too short", peer_string(from)); return -1; } h = (struct boothc_header *)buf; tv.tv_sec = ntohl(h->secs); tv.tv_usec = ntohl(h->usecs); if (from) { curr_tv.tv_sec = from->last_secs; curr_tv.tv_usec = from->last_usecs; if (timercmp(&tv, &curr_tv, >)) goto accept; log_warn("%s: packet timestamp older than previous one", site_string(from)); } gettimeofday(&now, NULL); now.tv_sec -= booth_conf->maxtimeskew; if (timercmp(&tv, &now, >)) goto accept; log_error("%s: packet timestamp older than %d seconds", peer_string(from), booth_conf->maxtimeskew); return -1; accept: if (from) { from->last_secs = tv.tv_sec; from->last_usecs = tv.tv_usec; } return 0; } #endif int check_auth(struct booth_site *from, void *buf, int len) { int rv = 0; #if HAVE_LIBGCRYPT || HAVE_LIBMHASH int payload_len; struct hmac *hp; if (!is_auth_req()) return 0; payload_len = len - sizeof(struct hmac); if (payload_len < 0) { log_error("%s: failed to authenticate, packet too short (size:%d)", peer_string(from), len); return -1; } hp = (struct hmac *)((unsigned char *)buf + payload_len); rv = verify_hmac(buf, payload_len, ntohl(hp->hid), hp->hash, booth_conf->authkey, booth_conf->authkey_len); if (!rv) { rv = verify_ts(from, buf, len); } if (rv != 0) { log_error("%s: failed to authenticate", peer_string(from)); } #endif return rv; } int send_data(int fd, void *data, int datalen) { int rv = 0; rv = add_hmac(data, datalen); if (!rv) rv = do_write(fd, data, datalen); return rv; } int send_header_plus(int fd, struct boothc_hdr_msg *msg, void *data, int len) { int rv; rv = send_data(fd, msg, sendmsglen(msg)-len); if (rv >= 0 && len) rv = do_write(fd, data, len); return rv; } /* UDP message receiver. */ int message_recv(void *msg, int msglen) { uint32_t from; struct boothc_header *header; struct booth_site *source; header = (struct boothc_header *)msg; from = ntohl(header->from); if (!find_site_by_id(from, &source) || !source) { log_error("unknown sender: %08x", from); return -1; } time(&source->last_recv); source->recv_cnt++; if (check_boothc_header(header, msglen) < 0) { log_error("message from %s receive error", site_string(source)); source->recv_err_cnt++; return -1; } if (check_auth(source, msg, msglen)) { log_error("%s failed to authenticate", site_string(source)); source->sec_cnt++; return -1; } if (ntohl(header->opts) & BOOTH_OPT_ATTR) { /* not used, clients send/retrieve attributes directly * from sites */ return attr_recv(msg, source); } else { return ticket_recv(msg, source); } } diff --git a/src/transport.h b/src/transport.h index a088658..fe1b759 100644 --- a/src/transport.h +++ b/src/transport.h @@ -1,90 +1,90 @@ /* * Copyright (C) 2011 Jiaju Zhang * Copyright (C) 2013 Philipp Marek * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. + * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #ifndef _TRANSPORT_H #define _TRANSPORT_H #include "b_config.h" #include "booth.h" typedef enum { TCP = 1, UDP, SCTP, TRANSPORT_ENTRIES, } transport_layer_t; typedef enum { ARBITRATOR = 0x50, SITE, CLIENT, DAEMON, STATUS, GEOSTORE, } action_t; /* when allocating space for messages */ #define MAX_MSG_LEN 1024 struct booth_transport { const char *name; int (*init) (void *); int (*open) (struct booth_site *); int (*send) (struct booth_site *, void *, int); int (*send_auth) (struct booth_site *, void *, int); int (*recv) (struct booth_site *, void *, int); int (*recv_auth) (struct booth_site *, void *, int); int (*broadcast) (void *, int); int (*broadcast_auth) (void *, int); int (*close) (struct booth_site *); int (*exit) (void); }; extern const struct booth_transport booth_transport[TRANSPORT_ENTRIES]; int find_myself(struct booth_site **me, int fuzzy_allowed); int read_client(struct client *req_cl); int check_boothc_header(struct boothc_header *data, int len_incl_data); int setup_tcp_listener(int test_only); int booth_udp_send(struct booth_site *to, void *buf, int len); int booth_udp_send_auth(struct booth_site *to, void *buf, int len); int booth_tcp_open(struct booth_site *to); int booth_tcp_send(struct booth_site *to, void *buf, int len); int message_recv(void *msg, int msglen); inline static void * node_to_addr_pointer(struct booth_site *node) { switch (node->family) { case AF_INET: return &node->sa4.sin_addr; case AF_INET6: return &node->sa6.sin6_addr; } return NULL; } int send_data(int fd, void *data, int datalen); int send_header_plus(int fd, struct boothc_hdr_msg *hdr, void *data, int len); #define send_client_msg(fd, msg) send_data(fd, msg, sendmsglen(msg)) int add_hmac(void *data, int len); int check_auth(struct booth_site *from, void *buf, int len); #endif /* _TRANSPORT_H */