diff --git a/conf/corosync.conf.example b/conf/corosync.conf.example index 7121548b..6ffb4cf0 100644 --- a/conf/corosync.conf.example +++ b/conf/corosync.conf.example @@ -1,80 +1,87 @@ # Please read the corosync.conf.5 manual page totem { version: 2 # crypto_cipher and crypto_hash: Used for mutual node authentication. # If you choose to enable this, then do remember to create a shared # secret with "corosync-keygen". crypto_cipher: none crypto_hash: none + # crypto_compat: 2.0|2.2 (default higher) can be used to change + # on-wire crypto packet format. Unless performing some special + # rolling upgrades from corosync < 2.2 to 2.2, to keep the cluster + # running, do not touch this option. This option cannot be changed + # at runtime. + #crypto_compat: 2.2 + # interface: define at least one interface to communicate # over. If you define more than one interface stanza, you must # also set rrp_mode. interface { # Rings must be consecutively numbered, starting at 0. ringnumber: 0 # This is normally the *network* address of the # interface to bind to. This ensures that you can use # identical instances of this configuration file # across all your cluster nodes, without having to # modify this option. bindnetaddr: 192.168.1.0 # However, if you have multiple physical network # interfaces configured for the same subnet, then the # network address alone is not sufficient to identify # the interface Corosync should bind to. In that case, # configure the *host* address of the interface # instead: # bindnetaddr: 192.168.1.1 # When selecting a multicast address, consider RFC # 2365 (which, among other things, specifies that # 239.255.x.x addresses are left to the discretion of # the network administrator). Do not reuse multicast # addresses across multiple Corosync clusters sharing # the same network. mcastaddr: 239.255.1.1 # Corosync uses the port you specify here for UDP # messaging, and also the immediately preceding # port. Thus if you set this to 5405, Corosync sends # messages over UDP ports 5405 and 5404. mcastport: 5405 # Time-to-live for cluster communication packets. The # number of hops (routers) that this ring will allow # itself to pass. Note that multicast routing must be # specifically enabled on most network routers. ttl: 1 } } logging { # Log the source file and line where messages are being # generated. When in doubt, leave off. Potentially useful for # debugging. fileline: off # Log to standard error. When in doubt, set to no. Useful when # running in the foreground (when invoking "corosync -f") to_stderr: no # Log to a log file. When set to "no", the "logfile" option # must not be set. to_logfile: yes logfile: /var/log/cluster/corosync.log # Log to the system log daemon. When in doubt, set to yes. to_syslog: yes # Log debug messages (very verbose). When in doubt, leave off. debug: off # Log messages with time stamps. When in doubt, set to on # (unless you are only logging to syslog, where double # timestamps can be annoying). timestamp: on logger_subsys { subsys: QUORUM debug: off } } quorum { # Enable and configure quorum subsystem (default: off) # see also corosync.conf.5 and votequorum.5 #provider: corosync_votequorum } diff --git a/conf/lenses/corosync.aug b/conf/lenses/corosync.aug index 1418c30b..cc2311cb 100644 --- a/conf/lenses/corosync.aug +++ b/conf/lenses/corosync.aug @@ -1,181 +1,182 @@ (* Process /etc/corosync/corosync.conf *) (* The lens is based on the corosync.conf(5) man page *) module Corosync = autoload xfm let comment = Util.comment let empty = Util.empty let dels = Util.del_str let eol = Util.eol let ws = del /[ \t]+/ " " let wsc = del /:[ \t]+/ ": " let indent = del /[ \t]*/ "" (* We require that braces are always followed by a newline *) let obr = del /\{([ \t]*)\n/ "{\n" let cbr = del /[ \t]*}[ \t]*\n/ "}\n" let ikey (k:regexp) = indent . key k let section (n:regexp) (b:lens) = [ ikey n . ws . obr . (b|empty|comment)* . cbr ] let kv (k:regexp) (v:regexp) = [ ikey k . wsc . store v . eol ] (* FIXME: it would be much more concise to write *) (* [ key k . ws . (bare | quoted) ] *) (* but the typechecker trips over that *) let qstr (k:regexp) = let delq = del /['"]/ "\"" in let bare = del /["']?/ "" . store /[^"' \t\n]+/ . del /["']?/ "" in let quoted = delq . store /.*[ \t].*/ . delq in [ ikey k . wsc . bare . eol ] |[ ikey k . wsc . quoted . eol ] (* A integer subsection *) let interface = let setting = kv "ringnumber" Rx.integer |kv "mcastport" Rx.integer |kv "ttl" Rx.integer |qstr /bindnetaddr|mcastaddr/ in section "interface" setting (* The totem section *) let totem = let setting = kv "clear_node_high_bit" /yes|no/ |kv "rrp_mode" /none|active|passive/ |kv "vsftype" /none|ykd/ |kv "secauth" /on|off/ |kv "crypto_type" /nss|aes256|aes192|aes128|3des/ |kv "crypto_cipher" /none|nss|aes256|aes192|aes128|3des/ |kv "crypto_hash" /none|md5|sha1|sha256|sha384|sha512/ + |kv "crypto_compat" /2.0|2.2/ |kv "transport" /udp|iba/ |kv "version" Rx.integer |kv "nodeid" Rx.integer |kv "threads" Rx.integer |kv "netmtu" Rx.integer |kv "token" Rx.integer |kv "token_retransmit" Rx.integer |kv "hold" Rx.integer |kv "token_retransmits_before_loss_const" Rx.integer |kv "join" Rx.integer |kv "send_join" Rx.integer |kv "consensus" Rx.integer |kv "merge" Rx.integer |kv "downcheck" Rx.integer |kv "fail_to_recv_const" Rx.integer |kv "seqno_unchanged_const" Rx.integer |kv "heartbeat_failures_allowed" Rx.integer |kv "max_network_delay" Rx.integer |kv "max_messages" Rx.integer |kv "window_size" Rx.integer |kv "rrp_problem_count_timeout" Rx.integer |kv "rrp_problem_count_threshold" Rx.integer |kv "rrp_token_expired_timeout" Rx.integer |interface in section "totem" setting let common_logging = kv "to_syslog" /yes|no|on|off/ |kv "to_stderr" /yes|no|on|off/ |kv "to_logfile" /yes|no|on|off/ |kv "debug" /yes|no|on|off|trace/ |kv "logfile_priority" /alert|crit|debug|emerg|err|info|notice|warning/ |kv "syslog_priority" /alert|crit|debug|emerg|err|info|notice|warning/ |kv "syslog_facility" /daemon|local0|local1|local2|local3|local4|local5|local6|local7/ |qstr /logfile|tags/ (* A logger_subsys subsection *) let logger_subsys = let setting = qstr /subsys/ |common_logging in section "logger_subsys" setting (* The logging section *) let logging = let setting = kv "fileline" /yes|no|on|off/ |kv "function_name" /yes|no|on|off/ |kv "timestamp" /yes|no|on|off/ |common_logging |logger_subsys in section "logging" setting (* The resource section *) let common_resource = kv "max" Rx.decimal |kv "poll_period" Rx.integer |kv "recovery" /reboot|shutdown|watchdog|none/ let memory_used = let setting = common_resource in section "memory_used" setting let load_15min = let setting = common_resource in section "load_15min" setting let system = let setting = load_15min |memory_used in section "system" setting (* The resources section *) let resources = let setting = system in section "resources" setting (* The quorum section *) let quorum = let setting = qstr /provider/ |kv "expected_votes" Rx.integer |kv "votes" Rx.integer |kv "wait_for_all" Rx.integer |kv "last_man_standing" Rx.integer |kv "last_man_standing_window" Rx.integer |kv "auto_tie_breaker" Rx.integer |kv "two_node" Rx.integer in section "quorum" setting (* The service section *) let service = let setting = qstr /name|ver/ in section "service" setting (* The uidgid section *) let uidgid = let setting = qstr /uid|gid/ in section "uidgid" setting (* The node section *) let node = let setting = qstr /ring[0-9]_addr/ |kv "nodeid" Rx.integer |kv "quorum_votes" Rx.integer in section "node" setting (* The nodelist section *) let nodelist = let setting = node in section "nodelist" setting let lns = (comment|empty|totem|quorum|logging|resources|service|uidgid|nodelist)* let xfm = transform lns (incl "/etc/corosync/corosync.conf") diff --git a/conf/lenses/tests/test_corosync.aug b/conf/lenses/tests/test_corosync.aug index 486b5433..71e41bf4 100644 --- a/conf/lenses/tests/test_corosync.aug +++ b/conf/lenses/tests/test_corosync.aug @@ -1,163 +1,165 @@ module Test_corosync = let conf = "# Please read the corosync.conf.5 manual page totem { version: 2 secauth: off crypto_cipher: none crypto_hash: none + crypto_compat: 2.2 threads: 0 clear_node_high_bit: no rrp_mode: none transport: udp token: 1000 interface { ringnumber: 0 bindnetaddr: 192.168.122.1 mcastaddr: 226.94.1.1 ttl: 45 mcastport: 5405 } } logging { fileline: off function_name: on to_stderr: yes to_logfile: yes to_syslog: yes logfile: /tmp/corosync.log debug: off timestamp: on logger_subsys { to_syslog: no subsys: CPG debug: on } logger_subsys { to_stderr: no logfile: /tmp/corosync-msg.log subsys: MSG debug: on tags: enter|trace4 } } quorum { provider: corosync_votequorum expected_votes: 5 votes: 2 two_node: 1 wait_for_all: 1 last_man_standing: 1 last_man_standing_window: 10000 auto_tie_breaker: 1 } resources { system { memory_used { recovery: reboot max: 80 } load_15min { recovery: watchdog max: 8.56 } } } uidgid { uid: 0 gid: 0 } nodelist { node { ring0_addr: 192.168.122.1 nodeid: 1 quorum_votes: 2 } node { ring0_addr: 192.168.122.2 ring1_addr: 192.168.123.1 nodeid: 2 } }\n" test Corosync.lns get conf = { "#comment" = "Please read the corosync.conf.5 manual page" } { } { "totem" { "version" = "2" } { "secauth" = "off" } { "crypto_cipher" = "none" } { "crypto_hash" = "none" } + { "crypto_compat" = "2.2" } { "threads" = "0" } { "clear_node_high_bit" = "no" } { "rrp_mode" = "none" } { "transport" = "udp" } { "token" = "1000" } { "interface" { "ringnumber" = "0" } { "bindnetaddr" = "192.168.122.1" } { "mcastaddr" = "226.94.1.1" } { "ttl" = "45" } { "mcastport" = "5405" } } } { } { "logging" { "fileline" = "off" } { "function_name" = "on" } { "to_stderr" = "yes" } { "to_logfile" = "yes" } { "to_syslog" = "yes" } { "logfile" = "/tmp/corosync.log" } { "debug" = "off" } { "timestamp" = "on" } { "logger_subsys" { "to_syslog" = "no" } { "subsys" = "CPG" } { "debug" = "on" } } { "logger_subsys" { "to_stderr" = "no" } { "logfile" = "/tmp/corosync-msg.log" } { "subsys" = "MSG" } { "debug" = "on" } { "tags" = "enter|trace4" } } } { } { "quorum" { "provider" = "corosync_votequorum" } { "expected_votes" = "5" } { "votes" = "2" } { "two_node" = "1" } { "wait_for_all" = "1" } { "last_man_standing" = "1" } { "last_man_standing_window" = "10000" } { "auto_tie_breaker" = "1" } } { } { "resources" { "system" { "memory_used" { "recovery" = "reboot" } { "max" = "80" } } { "load_15min" { "recovery" = "watchdog" } { "max" = "8.56" } } } } { } { "uidgid" { "uid" = "0" } { "gid" = "0" } } { } { "nodelist" { "node" { "ring0_addr" = "192.168.122.1" } { "nodeid" = "1" } { "quorum_votes" = "2" } } { } { "node" { "ring0_addr" = "192.168.122.2" } { "ring1_addr" = "192.168.123.1" } { "nodeid" = "2" } } } diff --git a/exec/coroparse.c b/exec/coroparse.c index 32f14b2e..fceafa88 100644 --- a/exec/coroparse.c +++ b/exec/coroparse.c @@ -1,1160 +1,1168 @@ /* * Copyright (c) 2006-2012 Red Hat, Inc. * * All rights reserved. * * Author: Patrick Caulfield (pcaulfie@redhat.com) * Jan Friesse (jfriesse@redhat.com) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define LOGSYS_UTILS_ONLY 1 #include #include #include "main.h" #include "util.h" enum parser_cb_type { PARSER_CB_START, PARSER_CB_END, PARSER_CB_SECTION_START, PARSER_CB_SECTION_END, PARSER_CB_ITEM, }; typedef int (*parser_cb_f)(const char *path, char *key, char *value, enum parser_cb_type type, const char **error_string, void *user_data); enum main_cp_cb_data_state { MAIN_CP_CB_DATA_STATE_NORMAL, MAIN_CP_CB_DATA_STATE_TOTEM, MAIN_CP_CB_DATA_STATE_INTERFACE, MAIN_CP_CB_DATA_STATE_LOGGER_SUBSYS, MAIN_CP_CB_DATA_STATE_UIDGID, MAIN_CP_CB_DATA_STATE_LOGGING_DAEMON, MAIN_CP_CB_DATA_STATE_MEMBER, MAIN_CP_CB_DATA_STATE_QUORUM, MAIN_CP_CB_DATA_STATE_QDEVICE, MAIN_CP_CB_DATA_STATE_NODELIST, MAIN_CP_CB_DATA_STATE_NODELIST_NODE, MAIN_CP_CB_DATA_STATE_PLOAD, MAIN_CP_CB_DATA_STATE_QB }; struct key_value_list_item { char *key; char *value; struct list_head list; }; struct main_cp_cb_data { enum main_cp_cb_data_state state; int ringnumber; char *bindnetaddr; char *mcastaddr; char *broadcast; int mcastport; int ttl; struct list_head logger_subsys_items_head; char *subsys; char *logging_daemon_name; struct list_head member_items_head; int node_number; int ring0_addr_added; }; static int read_config_file_into_icmap( const char **error_string); static char error_string_response[512]; static int uid_determine (const char *req_user) { int pw_uid = 0; struct passwd passwd; struct passwd* pwdptr = &passwd; struct passwd* temp_pwd_pt; char *pwdbuffer; int pwdlinelen; pwdlinelen = sysconf (_SC_GETPW_R_SIZE_MAX); if (pwdlinelen == -1) { pwdlinelen = 256; } pwdbuffer = malloc (pwdlinelen); if ((getpwnam_r (req_user, pwdptr, pwdbuffer, pwdlinelen, &temp_pwd_pt)) != 0) { sprintf (error_string_response, "The '%s' user is not found in /etc/passwd, please read the documentation.", req_user); return (-1); } pw_uid = passwd.pw_uid; free (pwdbuffer); return pw_uid; } static int gid_determine (const char *req_group) { int corosync_gid = 0; struct group group; struct group * grpptr = &group; struct group * temp_grp_pt; char *grpbuffer; int grplinelen; grplinelen = sysconf (_SC_GETGR_R_SIZE_MAX); if (grplinelen == -1) { grplinelen = 256; } grpbuffer = malloc (grplinelen); if ((getgrnam_r (req_group, grpptr, grpbuffer, grplinelen, &temp_grp_pt)) != 0) { sprintf (error_string_response, "The '%s' group is not found in /etc/group, please read the documentation.", req_group); return (-1); } corosync_gid = group.gr_gid; free (grpbuffer); return corosync_gid; } static char *strchr_rs (const char *haystack, int byte) { const char *end_address = strchr (haystack, byte); if (end_address) { end_address += 1; /* skip past { or = */ while (*end_address == ' ' || *end_address == '\t') end_address++; } return ((char *) end_address); } int coroparse_configparse (const char **error_string) { if (read_config_file_into_icmap(error_string)) { return -1; } return 0; } static char *remove_whitespace(char *string) { char *start; char *end; start = string; while (*start == ' ' || *start == '\t') start++; end = start+(strlen(start))-1; while ((*end == ' ' || *end == '\t' || *end == ':' || *end == '{') && end > start) end--; if (end != start) *(end+1) = '\0'; return start; } static int parse_section(FILE *fp, char *path, const char **error_string, parser_cb_f parser_cb, void *user_data) { char line[512]; int i; char *loc; int ignore_line; char new_keyname[ICMAP_KEYNAME_MAXLEN]; if (strcmp(path, "") == 0) { parser_cb("", NULL, NULL, PARSER_CB_START, error_string, user_data); } while (fgets (line, sizeof (line), fp)) { if (strlen(line) > 0) { if (line[strlen(line) - 1] == '\n') line[strlen(line) - 1] = '\0'; if (strlen (line) > 0 && line[strlen(line) - 1] == '\r') line[strlen(line) - 1] = '\0'; } /* * Clear out white space and tabs */ for (i = strlen (line) - 1; i > -1; i--) { if (line[i] == '\t' || line[i] == ' ') { line[i] = '\0'; } else { break; } } ignore_line = 1; for (i = 0; i < strlen (line); i++) { if (line[i] != '\t' && line[i] != ' ') { if (line[i] != '#') ignore_line = 0; break; } } /* * Clear out comments and empty lines */ if (ignore_line) { continue; } /* New section ? */ if ((loc = strchr_rs (line, '{'))) { char *section = remove_whitespace(line); loc--; *loc = '\0'; strcpy(new_keyname, path); if (strcmp(path, "") != 0) { strcat(new_keyname, "."); } strcat(new_keyname, section); if (!parser_cb(new_keyname, NULL, NULL, PARSER_CB_SECTION_START, error_string, user_data)) { return -1; } if (parse_section(fp, new_keyname, error_string, parser_cb, user_data)) return -1; } /* New key/value */ if ((loc = strchr_rs (line, ':'))) { char *key; char *value; *(loc-1) = '\0'; key = remove_whitespace(line); value = remove_whitespace(loc); strcpy(new_keyname, path); if (strcmp(path, "") != 0) { strcat(new_keyname, "."); } strcat(new_keyname, key); if (!parser_cb(new_keyname, key, value, PARSER_CB_ITEM, error_string, user_data)) { return -1; } } if (strchr_rs (line, '}')) { if (!parser_cb(path, NULL, NULL, PARSER_CB_SECTION_END, error_string, user_data)) { return -1; } return 0; } } if (strcmp(path, "") != 0) { *error_string = "Missing closing brace"; return -1; } if (strcmp(path, "") == 0) { parser_cb("", NULL, NULL, PARSER_CB_END, error_string, user_data); } return 0; } static int safe_atoi(const char *str, int *res) { int val; char *endptr; errno = 0; val = strtol(str, &endptr, 10); if (errno == ERANGE) { return (-1); } if (endptr == str) { return (-1); } if (*endptr != '\0') { return (-1); } *res = val; return (0); } static int str_to_ull(const char *str, unsigned long long int *res) { unsigned long long int val; char *endptr; errno = 0; val = strtoull(str, &endptr, 10); if (errno == ERANGE) { return (-1); } if (endptr == str) { return (-1); } if (*endptr != '\0') { return (-1); } *res = val; return (0); } static int main_config_parser_cb(const char *path, char *key, char *value, enum parser_cb_type type, const char **error_string, void *user_data) { int i; unsigned long long int ull; int add_as_string; char key_name[ICMAP_KEYNAME_MAXLEN]; static char formated_err[256]; struct main_cp_cb_data *data = (struct main_cp_cb_data *)user_data; struct key_value_list_item *kv_item; struct list_head *iter, *iter_next; int uid, gid; switch (type) { case PARSER_CB_START: memset(data, 0, sizeof(struct main_cp_cb_data)); data->state = MAIN_CP_CB_DATA_STATE_NORMAL; break; case PARSER_CB_END: break; case PARSER_CB_ITEM: add_as_string = 1; switch (data->state) { case MAIN_CP_CB_DATA_STATE_NORMAL: break; case MAIN_CP_CB_DATA_STATE_PLOAD: if ((strcmp(path, "pload.count") == 0) || (strcmp(path, "pload.size") == 0)) { if (safe_atoi(value, &i) != 0) { goto atoi_error; } icmap_set_uint32(path, i); add_as_string = 0; } break; case MAIN_CP_CB_DATA_STATE_QUORUM: if ((strcmp(path, "quorum.expected_votes") == 0) || (strcmp(path, "quorum.votes") == 0) || (strcmp(path, "quorum.last_man_standing_window") == 0) || (strcmp(path, "quorum.leaving_timeout") == 0)) { if (safe_atoi(value, &i) != 0) { goto atoi_error; } icmap_set_uint32(path, i); add_as_string = 0; } if ((strcmp(path, "quorum.two_node") == 0) || (strcmp(path, "quorum.allow_downscale") == 0) || (strcmp(path, "quorum.wait_for_all") == 0) || (strcmp(path, "quorum.auto_tie_breaker") == 0) || (strcmp(path, "quorum.last_man_standing") == 0)) { if (safe_atoi(value, &i) != 0) { goto atoi_error; } icmap_set_uint8(path, i); add_as_string = 0; } break; case MAIN_CP_CB_DATA_STATE_QDEVICE: if ((strcmp(path, "quorum.device.timeout") == 0) || (strcmp(path, "quorum.device.votes") == 0)) { if (safe_atoi(value, &i) != 0) { goto atoi_error; } icmap_set_uint32(path, i); add_as_string = 0; } if ((strcmp(path, "quorum.device.master_wins") == 0)) { if (safe_atoi(value, &i) != 0) { goto atoi_error; } icmap_set_uint8(path, i); add_as_string = 0; } case MAIN_CP_CB_DATA_STATE_TOTEM: if ((strcmp(path, "totem.version") == 0) || (strcmp(path, "totem.nodeid") == 0) || (strcmp(path, "totem.threads") == 0) || (strcmp(path, "totem.token") == 0) || (strcmp(path, "totem.token_retransmit") == 0) || (strcmp(path, "totem.hold") == 0) || (strcmp(path, "totem.token_retransmits_before_loss_const") == 0) || (strcmp(path, "totem.join") == 0) || (strcmp(path, "totem.send_join") == 0) || (strcmp(path, "totem.consensus") == 0) || (strcmp(path, "totem.merge") == 0) || (strcmp(path, "totem.downcheck") == 0) || (strcmp(path, "totem.fail_recv_const") == 0) || (strcmp(path, "totem.seqno_unchanged_const") == 0) || (strcmp(path, "totem.rrp_token_expired_timeout") == 0) || (strcmp(path, "totem.rrp_problem_count_timeout") == 0) || (strcmp(path, "totem.rrp_problem_count_threshold") == 0) || (strcmp(path, "totem.rrp_problem_count_mcast_threshold") == 0) || (strcmp(path, "totem.rrp_autorecovery_check_timeout") == 0) || (strcmp(path, "totem.heartbeat_failures_allowed") == 0) || (strcmp(path, "totem.max_network_delay") == 0) || (strcmp(path, "totem.window_size") == 0) || (strcmp(path, "totem.max_messages") == 0) || (strcmp(path, "totem.miss_count_const") == 0) || (strcmp(path, "totem.netmtu") == 0)) { if (safe_atoi(value, &i) != 0) { goto atoi_error; } icmap_set_uint32(path, i); add_as_string = 0; } if (strcmp(path, "totem.config_version") == 0) { if (str_to_ull(value, &ull) != 0) { goto atoi_error; } icmap_set_uint64(path, ull); add_as_string = 0; } if (strcmp(path, "totem.crypto_type") == 0) { if ((strcmp(value, "nss") != 0) && (strcmp(value, "aes256") != 0) && (strcmp(value, "aes192") != 0) && (strcmp(value, "aes128") != 0) && (strcmp(value, "3des") != 0)) { *error_string = "Invalid crypto type"; return (0); } } if (strcmp(path, "totem.crypto_cipher") == 0) { if ((strcmp(value, "none") != 0) && (strcmp(value, "aes256") != 0) && (strcmp(value, "aes192") != 0) && (strcmp(value, "aes128") != 0) && (strcmp(value, "3des") != 0)) { *error_string = "Invalid cipher type"; return (0); } } if (strcmp(path, "totem.crypto_hash") == 0) { if ((strcmp(value, "none") != 0) && (strcmp(value, "md5") != 0) && (strcmp(value, "sha1") != 0) && (strcmp(value, "sha256") != 0) && (strcmp(value, "sha384") != 0) && (strcmp(value, "sha512") != 0)) { *error_string = "Invalid hash type"; return (0); } } + if (strcmp(path, "totem.crypto_compat") == 0) { + if ((strcmp(value, "2.0") != 0) && + (strcmp(value, "2.2") != 0)) { + *error_string = "Invalid crypto compat type"; + + return (0); + } + } break; case MAIN_CP_CB_DATA_STATE_QB: if (strcmp(path, "qb.ipc_type") == 0) { if ((strcmp(value, "native") != 0) && (strcmp(value, "shm") != 0) && (strcmp(value, "socket") != 0)) { *error_string = "Invalid qb ipc_type"; return (0); } } break; case MAIN_CP_CB_DATA_STATE_INTERFACE: if (strcmp(path, "totem.interface.ringnumber") == 0) { if (safe_atoi(value, &i) != 0) { goto atoi_error; } data->ringnumber = i; add_as_string = 0; } if (strcmp(path, "totem.interface.bindnetaddr") == 0) { data->bindnetaddr = strdup(value); add_as_string = 0; } if (strcmp(path, "totem.interface.mcastaddr") == 0) { data->mcastaddr = strdup(value); add_as_string = 0; } if (strcmp(path, "totem.interface.broadcast") == 0) { data->broadcast = strdup(value); add_as_string = 0; } if (strcmp(path, "totem.interface.mcastport") == 0) { if (safe_atoi(value, &i) != 0) { goto atoi_error; } data->mcastport = i; if (data->mcastport < 0 || data->mcastport > 65535) { *error_string = "Invalid multicast port (should be 0..65535)"; return (0); }; add_as_string = 0; } if (strcmp(path, "totem.interface.ttl") == 0) { if (safe_atoi(value, &i) != 0) { goto atoi_error; } data->ttl = i; if (data->ttl < 0 || data->ttl > 255) { *error_string = "Invalid TTL (should be 0..255)"; return (0); }; add_as_string = 0; } break; case MAIN_CP_CB_DATA_STATE_LOGGER_SUBSYS: if (strcmp(key, "subsys") == 0) { data->subsys = strdup(value); if (data->subsys == NULL) { *error_string = "Can't alloc memory"; return (0); } } else { kv_item = malloc(sizeof(*kv_item)); if (kv_item == NULL) { *error_string = "Can't alloc memory"; return (0); } memset(kv_item, 0, sizeof(*kv_item)); kv_item->key = strdup(key); kv_item->value = strdup(value); if (kv_item->key == NULL || kv_item->value == NULL) { free(kv_item); *error_string = "Can't alloc memory"; return (0); } list_init(&kv_item->list); list_add(&kv_item->list, &data->logger_subsys_items_head); } add_as_string = 0; break; case MAIN_CP_CB_DATA_STATE_LOGGING_DAEMON: if (strcmp(key, "subsys") == 0) { data->subsys = strdup(value); if (data->subsys == NULL) { *error_string = "Can't alloc memory"; return (0); } } else if (strcmp(key, "name") == 0) { data->logging_daemon_name = strdup(value); if (data->logging_daemon_name == NULL) { *error_string = "Can't alloc memory"; return (0); } } else { kv_item = malloc(sizeof(*kv_item)); if (kv_item == NULL) { *error_string = "Can't alloc memory"; return (0); } memset(kv_item, 0, sizeof(*kv_item)); kv_item->key = strdup(key); kv_item->value = strdup(value); if (kv_item->key == NULL || kv_item->value == NULL) { free(kv_item); *error_string = "Can't alloc memory"; return (0); } list_init(&kv_item->list); list_add(&kv_item->list, &data->logger_subsys_items_head); } add_as_string = 0; break; case MAIN_CP_CB_DATA_STATE_UIDGID: if (strcmp(key, "uid") == 0) { uid = uid_determine(value); if (uid == -1) { *error_string = error_string_response; return (0); } snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "uidgid.uid.%u", uid); icmap_set_uint8(key_name, 1); add_as_string = 0; } else if (strcmp(key, "gid") == 0) { gid = gid_determine(value); if (gid == -1) { *error_string = error_string_response; return (0); } snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "uidgid.gid.%u", gid); icmap_set_uint8(key_name, 1); add_as_string = 0; } else { *error_string = "uidgid: Only uid and gid are allowed items"; return (0); } break; case MAIN_CP_CB_DATA_STATE_MEMBER: if (strcmp(key, "memberaddr") != 0) { *error_string = "Only memberaddr is allowed in member section"; return (0); } kv_item = malloc(sizeof(*kv_item)); if (kv_item == NULL) { *error_string = "Can't alloc memory"; return (0); } memset(kv_item, 0, sizeof(*kv_item)); kv_item->key = strdup(key); kv_item->value = strdup(value); if (kv_item->key == NULL || kv_item->value == NULL) { free(kv_item); *error_string = "Can't alloc memory"; return (0); } list_init(&kv_item->list); list_add(&kv_item->list, &data->member_items_head); add_as_string = 0; break; case MAIN_CP_CB_DATA_STATE_NODELIST: break; case MAIN_CP_CB_DATA_STATE_NODELIST_NODE: snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.%s", data->node_number, key); if ((strcmp(key, "nodeid") == 0) || (strcmp(key, "quorum_votes") == 0)) { if (safe_atoi(value, &i) != 0) { goto atoi_error; } icmap_set_uint32(key_name, i); add_as_string = 0; } if (strcmp(key, "ring0_addr") == 0) { data->ring0_addr_added = 1; } if (add_as_string) { icmap_set_string(key_name, value); add_as_string = 0; } break; } if (add_as_string) { icmap_set_string(path, value); } break; case PARSER_CB_SECTION_START: if (strcmp(path, "totem.interface") == 0) { data->state = MAIN_CP_CB_DATA_STATE_INTERFACE; data->ringnumber = 0; data->mcastport = -1; data->ttl = -1; list_init(&data->member_items_head); }; if (strcmp(path, "totem") == 0) { data->state = MAIN_CP_CB_DATA_STATE_TOTEM; }; if (strcmp(path, "qb") == 0) { data->state = MAIN_CP_CB_DATA_STATE_QB; } if (strcmp(path, "logging.logger_subsys") == 0) { data->state = MAIN_CP_CB_DATA_STATE_LOGGER_SUBSYS; list_init(&data->logger_subsys_items_head); data->subsys = NULL; } if (strcmp(path, "logging.logging_daemon") == 0) { data->state = MAIN_CP_CB_DATA_STATE_LOGGING_DAEMON; list_init(&data->logger_subsys_items_head); data->subsys = NULL; data->logging_daemon_name = NULL; } if (strcmp(path, "uidgid") == 0) { data->state = MAIN_CP_CB_DATA_STATE_UIDGID; } if (strcmp(path, "totem.interface.member") == 0) { data->state = MAIN_CP_CB_DATA_STATE_MEMBER; } if (strcmp(path, "quorum") == 0) { data->state = MAIN_CP_CB_DATA_STATE_QUORUM; } if (strcmp(path, "quorum.device") == 0) { data->state = MAIN_CP_CB_DATA_STATE_QDEVICE; } if (strcmp(path, "nodelist") == 0) { data->state = MAIN_CP_CB_DATA_STATE_NODELIST; data->node_number = 0; } if (strcmp(path, "nodelist.node") == 0) { data->state = MAIN_CP_CB_DATA_STATE_NODELIST_NODE; data->ring0_addr_added = 0; } break; case PARSER_CB_SECTION_END: switch (data->state) { case MAIN_CP_CB_DATA_STATE_NORMAL: break; case MAIN_CP_CB_DATA_STATE_PLOAD: data->state = MAIN_CP_CB_DATA_STATE_NORMAL; break; case MAIN_CP_CB_DATA_STATE_INTERFACE: /* * Create new interface section */ if (data->bindnetaddr != NULL) { snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.bindnetaddr", data->ringnumber); icmap_set_string(key_name, data->bindnetaddr); free(data->bindnetaddr); data->bindnetaddr = NULL; } if (data->mcastaddr != NULL) { snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.mcastaddr", data->ringnumber); icmap_set_string(key_name, data->mcastaddr); free(data->mcastaddr); data->mcastaddr = NULL; } if (data->broadcast != NULL) { snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.broadcast", data->ringnumber); icmap_set_string(key_name, data->broadcast); free(data->broadcast); data->broadcast = NULL; } if (data->mcastport > -1) { snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.mcastport", data->ringnumber); icmap_set_uint16(key_name, data->mcastport); } if (data->ttl > -1) { snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.ttl", data->ringnumber); icmap_set_uint8(key_name, data->ttl); } i = 0; for (iter = data->member_items_head.next; iter != &data->member_items_head; iter = iter_next) { kv_item = list_entry(iter, struct key_value_list_item, list); snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.member.%u", data->ringnumber, i); icmap_set_string(key_name, kv_item->value); iter_next = iter->next; free(kv_item->value); free(kv_item->key); free(kv_item); i++; } data->state = MAIN_CP_CB_DATA_STATE_TOTEM; break; case MAIN_CP_CB_DATA_STATE_TOTEM: data->state = MAIN_CP_CB_DATA_STATE_NORMAL; break; case MAIN_CP_CB_DATA_STATE_QB: data->state = MAIN_CP_CB_DATA_STATE_NORMAL; break; case MAIN_CP_CB_DATA_STATE_LOGGER_SUBSYS: if (data->subsys == NULL) { *error_string = "No subsys key in logger_subsys directive"; return (0); } for (iter = data->logger_subsys_items_head.next; iter != &data->logger_subsys_items_head; iter = iter_next) { kv_item = list_entry(iter, struct key_value_list_item, list); snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "logging.logger_subsys.%s.%s", data->subsys, kv_item->key); icmap_set_string(key_name, kv_item->value); iter_next = iter->next; free(kv_item->value); free(kv_item->key); free(kv_item); } snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "logging.logger_subsys.%s.subsys", data->subsys); icmap_set_string(key_name, data->subsys); free(data->subsys); data->state = MAIN_CP_CB_DATA_STATE_NORMAL; break; case MAIN_CP_CB_DATA_STATE_LOGGING_DAEMON: if (data->logging_daemon_name == NULL) { *error_string = "No name key in logging_daemon directive"; return (0); } for (iter = data->logger_subsys_items_head.next; iter != &data->logger_subsys_items_head; iter = iter_next) { kv_item = list_entry(iter, struct key_value_list_item, list); if (data->subsys == NULL) { if (strcmp(data->logging_daemon_name, "corosync") == 0) { snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "logging.%s", kv_item->key); } else { snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "logging.logging_daemon.%s.%s", data->logging_daemon_name, kv_item->key); } } else { if (strcmp(data->logging_daemon_name, "corosync") == 0) { snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "logging.logger_subsys.%s.%s", data->subsys, kv_item->key); } else { snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "logging.logging_daemon.%s.%s.%s", data->logging_daemon_name, data->subsys, kv_item->key); } } icmap_set_string(key_name, kv_item->value); iter_next = iter->next; free(kv_item->value); free(kv_item->key); free(kv_item); } if (data->subsys == NULL) { if (strcmp(data->logging_daemon_name, "corosync") != 0) { snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "logging.logging_daemon.%s.name", data->logging_daemon_name); icmap_set_string(key_name, data->logging_daemon_name); } } else { if (strcmp(data->logging_daemon_name, "corosync") == 0) { snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "logging.logger_subsys.%s.subsys", data->subsys); icmap_set_string(key_name, data->subsys); } else { snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "logging.logging_daemon.%s.%s.subsys", data->logging_daemon_name, data->subsys); icmap_set_string(key_name, data->subsys); snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "logging.logging_daemon.%s.%s.name", data->logging_daemon_name, data->subsys); icmap_set_string(key_name, data->logging_daemon_name); } } free(data->subsys); free(data->logging_daemon_name); data->state = MAIN_CP_CB_DATA_STATE_NORMAL; break; case MAIN_CP_CB_DATA_STATE_UIDGID: data->state = MAIN_CP_CB_DATA_STATE_UIDGID; break; case MAIN_CP_CB_DATA_STATE_MEMBER: data->state = MAIN_CP_CB_DATA_STATE_INTERFACE; break; case MAIN_CP_CB_DATA_STATE_QUORUM: data->state = MAIN_CP_CB_DATA_STATE_NORMAL; break; case MAIN_CP_CB_DATA_STATE_QDEVICE: data->state = MAIN_CP_CB_DATA_STATE_QUORUM; break; case MAIN_CP_CB_DATA_STATE_NODELIST: data->state = MAIN_CP_CB_DATA_STATE_NORMAL; break; case MAIN_CP_CB_DATA_STATE_NODELIST_NODE: if (!data->ring0_addr_added) { *error_string = "No ring0_addr specified for node"; return (0); } data->node_number++; data->state = MAIN_CP_CB_DATA_STATE_NODELIST; break; } break; } return (1); atoi_error: snprintf(formated_err, sizeof(formated_err), "Value of key \"%s\" must be integer, but \"%s\" was given", key, value); *error_string = formated_err; return (0); } static int uidgid_config_parser_cb(const char *path, char *key, char *value, enum parser_cb_type type, const char **error_string, void *user_data) { char key_name[ICMAP_KEYNAME_MAXLEN]; int uid, gid; switch (type) { case PARSER_CB_START: break; case PARSER_CB_END: break; case PARSER_CB_ITEM: if (strcmp(path, "uidgid.uid") == 0) { uid = uid_determine(value); if (uid == -1) { *error_string = error_string_response; return (0); } snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "uidgid.uid.%u", uid); icmap_set_uint8(key_name, 1); } else if (strcmp(path, "uidgid.gid") == 0) { gid = gid_determine(value); if (gid == -1) { *error_string = error_string_response; return (0); } snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "uidgid.gid.%u", gid); icmap_set_uint8(key_name, 1); } else { *error_string = "uidgid: Only uid and gid are allowed items"; return (0); } break; case PARSER_CB_SECTION_START: if (strcmp(path, "uidgid") != 0) { *error_string = "uidgid: Can't add subsection different then uidgid"; return (0); }; break; case PARSER_CB_SECTION_END: break; } return (1); } static int read_uidgid_files_into_icmap( const char **error_string) { FILE *fp; const char *dirname; DIR *dp; struct dirent *dirent; struct dirent *entry; char filename[PATH_MAX + FILENAME_MAX + 1]; int res = 0; size_t len; int return_code; struct stat stat_buf; char key_name[ICMAP_KEYNAME_MAXLEN]; dirname = COROSYSCONFDIR "/uidgid.d"; dp = opendir (dirname); if (dp == NULL) return 0; len = offsetof(struct dirent, d_name) + FILENAME_MAX + 1; entry = malloc(len); if (entry == NULL) { res = 0; goto error_exit; } for (return_code = readdir_r(dp, entry, &dirent); dirent != NULL && return_code == 0; return_code = readdir_r(dp, entry, &dirent)) { snprintf(filename, sizeof (filename), "%s/%s", dirname, dirent->d_name); stat (filename, &stat_buf); if (S_ISREG(stat_buf.st_mode)) { fp = fopen (filename, "r"); if (fp == NULL) continue; key_name[0] = 0; res = parse_section(fp, key_name, error_string, uidgid_config_parser_cb, NULL); fclose (fp); if (res != 0) { goto error_exit; } } } error_exit: free (entry); closedir(dp); return res; } /* Read config file and load into icmap */ static int read_config_file_into_icmap( const char **error_string) { FILE *fp; const char *filename; char *error_reason = error_string_response; int res; char key_name[ICMAP_KEYNAME_MAXLEN]; struct main_cp_cb_data data; filename = getenv ("COROSYNC_MAIN_CONFIG_FILE"); if (!filename) filename = COROSYSCONFDIR "/corosync.conf"; fp = fopen (filename, "r"); if (fp == NULL) { char error_str[100]; const char *error_ptr = qb_strerror_r(errno, error_str, sizeof(error_str)); snprintf (error_reason, sizeof(error_string_response), "Can't read file %s reason = (%s)", filename, error_ptr); *error_string = error_reason; return -1; } key_name[0] = 0; res = parse_section(fp, key_name, error_string, main_config_parser_cb, &data); fclose(fp); if (res == 0) { res = read_uidgid_files_into_icmap(error_string); } if (res == 0) { snprintf (error_reason, sizeof(error_string_response), "Successfully read main configuration file '%s'.", filename); *error_string = error_reason; } return res; } diff --git a/exec/main.c b/exec/main.c index e263ee50..dc7d299a 100644 --- a/exec/main.c +++ b/exec/main.c @@ -1,1269 +1,1270 @@ /* * Copyright (c) 2002-2006 MontaVista Software, Inc. * Copyright (c) 2006-2012 Red Hat, Inc. * * All rights reserved. * * Author: Steven Dake (sdake@redhat.com) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ /** * \mainpage Corosync * * This is the doxygen generated developer documentation for the Corosync * project. For more information about Corosync, please see the project * web site, corosync.org. * * \section license License * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "quorum.h" #include "totemsrp.h" #include "logconfig.h" #include "totemconfig.h" #include "main.h" #include "sync.h" #include "timer.h" #include "util.h" #include "apidef.h" #include "service.h" #include "schedwrk.h" #ifdef HAVE_SMALL_MEMORY_FOOTPRINT #define IPC_LOGSYS_SIZE 1024*64 #else #define IPC_LOGSYS_SIZE 8192*128 #endif LOGSYS_DECLARE_SYSTEM ("corosync", LOGSYS_MODE_OUTPUT_STDERR, LOG_DAEMON, LOG_INFO); LOGSYS_DECLARE_SUBSYS ("MAIN"); #define SERVER_BACKLOG 5 static int sched_priority = 0; static unsigned int service_count = 32; static struct totem_logging_configuration totem_logging_configuration; static struct corosync_api_v1 *api = NULL; static int sync_in_process = 1; static qb_loop_t *corosync_poll_handle; struct sched_param global_sched_param; static corosync_timer_handle_t corosync_stats_timer_handle; static const char *corosync_lock_file = LOCALSTATEDIR"/run/corosync.pid"; qb_loop_t *cs_poll_handle_get (void) { return (corosync_poll_handle); } int cs_poll_dispatch_add (qb_loop_t * handle, int fd, int events, void *data, int (*dispatch_fn) (int fd, int revents, void *data)) { return qb_loop_poll_add(handle, QB_LOOP_MED, fd, events, data, dispatch_fn); } int cs_poll_dispatch_delete(qb_loop_t * handle, int fd) { return qb_loop_poll_del(handle, fd); } void corosync_state_dump (void) { int i; for (i = 0; i < SERVICES_COUNT_MAX; i++) { if (corosync_service[i] && corosync_service[i]->exec_dump_fn) { corosync_service[i]->exec_dump_fn (); } } } static void corosync_blackbox_write_to_file (void) { char fname[PATH_MAX]; char time_str[PATH_MAX]; struct tm cur_time_tm; time_t cur_time_t; cur_time_t = time(NULL); localtime_r(&cur_time_t, &cur_time_tm); strftime(time_str, PATH_MAX, "%Y-%m-%dT%H:%M:%S", &cur_time_tm); snprintf(fname, PATH_MAX, "%s/fdata-%s-%lld", LOCALSTATEDIR "/lib/corosync", time_str, (long long int)getpid()); qb_log_blackbox_write_to_file(fname); unlink(LOCALSTATEDIR "/lib/corosync/fdata"); symlink(fname, LOCALSTATEDIR "/lib/corosync/fdata"); } static void unlink_all_completed (void) { api->timer_delete (corosync_stats_timer_handle); qb_loop_stop (corosync_poll_handle); icmap_fini(); } void corosync_shutdown_request (void) { corosync_service_unlink_all (api, unlink_all_completed); } static int32_t sig_diag_handler (int num, void *data) { corosync_state_dump (); return 0; } static int32_t sig_exit_handler (int num, void *data) { corosync_service_unlink_all (api, unlink_all_completed); return 0; } static void sigsegv_handler (int num) { (void)signal (SIGSEGV, SIG_DFL); corosync_blackbox_write_to_file (); qb_log_fini(); raise (SIGSEGV); } static void sigabrt_handler (int num) { (void)signal (SIGABRT, SIG_DFL); corosync_blackbox_write_to_file (); qb_log_fini(); raise (SIGABRT); } #define LOCALHOST_IP inet_addr("127.0.0.1") static void *corosync_group_handle; static struct totempg_group corosync_group = { .group = "a", .group_len = 1 }; static void serialize_lock (void) { } static void serialize_unlock (void) { } static void corosync_sync_completed (void) { log_printf (LOGSYS_LEVEL_NOTICE, "Completed service synchronization, ready to provide service."); sync_in_process = 0; cs_ipcs_sync_state_changed(sync_in_process); cs_ipc_allow_connections(1); } static int corosync_sync_callbacks_retrieve ( int service_id, struct sync_callbacks *callbacks) { if (corosync_service[service_id] == NULL) { return (-1); } if (callbacks == NULL) { return (0); } callbacks->name = corosync_service[service_id]->name; callbacks->sync_init = corosync_service[service_id]->sync_init; callbacks->sync_process = corosync_service[service_id]->sync_process; callbacks->sync_activate = corosync_service[service_id]->sync_activate; callbacks->sync_abort = corosync_service[service_id]->sync_abort; return (0); } static struct memb_ring_id corosync_ring_id; static void member_object_joined (unsigned int nodeid) { char member_ip[ICMAP_KEYNAME_MAXLEN]; char member_join_count[ICMAP_KEYNAME_MAXLEN]; char member_status[ICMAP_KEYNAME_MAXLEN]; snprintf(member_ip, ICMAP_KEYNAME_MAXLEN, "runtime.totem.pg.mrp.srp.members.%u.ip", nodeid); snprintf(member_join_count, ICMAP_KEYNAME_MAXLEN, "runtime.totem.pg.mrp.srp.members.%u.join_count", nodeid); snprintf(member_status, ICMAP_KEYNAME_MAXLEN, "runtime.totem.pg.mrp.srp.members.%u.status", nodeid); if (icmap_get(member_ip, NULL, NULL, NULL) == CS_OK) { icmap_inc(member_join_count); icmap_set_string(member_status, "joined"); } else { icmap_set_string(member_ip, (char*)api->totem_ifaces_print (nodeid)); icmap_set_uint32(member_join_count, 1); icmap_set_string(member_status, "joined"); } log_printf (LOGSYS_LEVEL_DEBUG, "Member joined: %s", api->totem_ifaces_print (nodeid)); } static void member_object_left (unsigned int nodeid) { char member_status[ICMAP_KEYNAME_MAXLEN]; snprintf(member_status, ICMAP_KEYNAME_MAXLEN, "runtime.totem.pg.mrp.srp.members.%u.status", nodeid); icmap_set_string(member_status, "left"); log_printf (LOGSYS_LEVEL_DEBUG, "Member left: %s", api->totem_ifaces_print (nodeid)); } static void confchg_fn ( enum totem_configuration_type configuration_type, const unsigned int *member_list, size_t member_list_entries, const unsigned int *left_list, size_t left_list_entries, const unsigned int *joined_list, size_t joined_list_entries, const struct memb_ring_id *ring_id) { int i; int abort_activate = 0; if (sync_in_process == 1) { abort_activate = 1; } sync_in_process = 1; cs_ipcs_sync_state_changed(sync_in_process); memcpy (&corosync_ring_id, ring_id, sizeof (struct memb_ring_id)); for (i = 0; i < left_list_entries; i++) { member_object_left (left_list[i]); } for (i = 0; i < joined_list_entries; i++) { member_object_joined (joined_list[i]); } /* * Call configuration change for all services */ for (i = 0; i < service_count; i++) { if (corosync_service[i] && corosync_service[i]->confchg_fn) { corosync_service[i]->confchg_fn (configuration_type, member_list, member_list_entries, left_list, left_list_entries, joined_list, joined_list_entries, ring_id); } } if (abort_activate) { sync_abort (); } if (configuration_type == TOTEM_CONFIGURATION_TRANSITIONAL) { sync_save_transitional (member_list, member_list_entries, ring_id); } if (configuration_type == TOTEM_CONFIGURATION_REGULAR) { sync_start (member_list, member_list_entries, ring_id); } } static void priv_drop (void) { return; /* TODO: we are still not dropping privs */ } static void corosync_tty_detach (void) { FILE *r; /* * Disconnect from TTY if this is not a debug run */ switch (fork ()) { case -1: corosync_exit_error (COROSYNC_DONE_FORK); break; case 0: /* * child which is disconnected, run this process */ break; default: exit (0); break; } /* Create new session */ (void)setsid(); /* * Map stdin/out/err to /dev/null. */ r = freopen("/dev/null", "r", stdin); if (r == NULL) { corosync_exit_error (COROSYNC_DONE_STD_TO_NULL_REDIR); } r = freopen("/dev/null", "a", stderr); if (r == NULL) { corosync_exit_error (COROSYNC_DONE_STD_TO_NULL_REDIR); } r = freopen("/dev/null", "a", stdout); if (r == NULL) { corosync_exit_error (COROSYNC_DONE_STD_TO_NULL_REDIR); } } static void corosync_mlockall (void) { int res; struct rlimit rlimit; rlimit.rlim_cur = RLIM_INFINITY; rlimit.rlim_max = RLIM_INFINITY; #ifndef RLIMIT_MEMLOCK #define RLIMIT_MEMLOCK RLIMIT_VMEM #endif setrlimit (RLIMIT_MEMLOCK, &rlimit); res = mlockall (MCL_CURRENT | MCL_FUTURE); if (res == -1) { LOGSYS_PERROR (errno, LOGSYS_LEVEL_WARNING, "Could not lock memory of service to avoid page faults"); }; } static void corosync_totem_stats_updater (void *data) { totempg_stats_t * stats; uint32_t total_mtt_rx_token; uint32_t total_backlog_calc; uint32_t total_token_holdtime; int t, prev, i; int32_t token_count; char key_name[ICMAP_KEYNAME_MAXLEN]; stats = api->totem_get_stats(); icmap_set_uint32("runtime.totem.pg.msg_reserved", stats->msg_reserved); icmap_set_uint32("runtime.totem.pg.msg_queue_avail", stats->msg_queue_avail); icmap_set_uint64("runtime.totem.pg.mrp.srp.orf_token_tx", stats->mrp->srp->orf_token_tx); icmap_set_uint64("runtime.totem.pg.mrp.srp.orf_token_rx", stats->mrp->srp->orf_token_rx); icmap_set_uint64("runtime.totem.pg.mrp.srp.memb_merge_detect_tx", stats->mrp->srp->memb_merge_detect_tx); icmap_set_uint64("runtime.totem.pg.mrp.srp.memb_merge_detect_rx", stats->mrp->srp->memb_merge_detect_rx); icmap_set_uint64("runtime.totem.pg.mrp.srp.memb_join_tx", stats->mrp->srp->memb_join_tx); icmap_set_uint64("runtime.totem.pg.mrp.srp.memb_join_rx", stats->mrp->srp->memb_join_rx); icmap_set_uint64("runtime.totem.pg.mrp.srp.mcast_tx", stats->mrp->srp->mcast_tx); icmap_set_uint64("runtime.totem.pg.mrp.srp.mcast_retx", stats->mrp->srp->mcast_retx); icmap_set_uint64("runtime.totem.pg.mrp.srp.mcast_rx", stats->mrp->srp->mcast_rx); icmap_set_uint64("runtime.totem.pg.mrp.srp.memb_commit_token_tx", stats->mrp->srp->memb_commit_token_tx); icmap_set_uint64("runtime.totem.pg.mrp.srp.memb_commit_token_rx", stats->mrp->srp->memb_commit_token_rx); icmap_set_uint64("runtime.totem.pg.mrp.srp.token_hold_cancel_tx", stats->mrp->srp->token_hold_cancel_tx); icmap_set_uint64("runtime.totem.pg.mrp.srp.token_hold_cancel_rx", stats->mrp->srp->token_hold_cancel_rx); icmap_set_uint64("runtime.totem.pg.mrp.srp.operational_entered", stats->mrp->srp->operational_entered); icmap_set_uint64("runtime.totem.pg.mrp.srp.operational_token_lost", stats->mrp->srp->operational_token_lost); icmap_set_uint64("runtime.totem.pg.mrp.srp.gather_entered", stats->mrp->srp->gather_entered); icmap_set_uint64("runtime.totem.pg.mrp.srp.gather_token_lost", stats->mrp->srp->gather_token_lost); icmap_set_uint64("runtime.totem.pg.mrp.srp.commit_entered", stats->mrp->srp->commit_entered); icmap_set_uint64("runtime.totem.pg.mrp.srp.commit_token_lost", stats->mrp->srp->commit_token_lost); icmap_set_uint64("runtime.totem.pg.mrp.srp.recovery_entered", stats->mrp->srp->recovery_entered); icmap_set_uint64("runtime.totem.pg.mrp.srp.recovery_token_lost", stats->mrp->srp->recovery_token_lost); icmap_set_uint64("runtime.totem.pg.mrp.srp.consensus_timeouts", stats->mrp->srp->consensus_timeouts); icmap_set_uint64("runtime.totem.pg.mrp.srp.rx_msg_dropped", stats->mrp->srp->rx_msg_dropped); icmap_set_uint32("runtime.totem.pg.mrp.srp.continuous_gather", stats->mrp->srp->continuous_gather); icmap_set_uint32("runtime.totem.pg.mrp.srp.continuous_sendmsg_failures", stats->mrp->srp->continuous_sendmsg_failures); icmap_set_uint8("runtime.totem.pg.mrp.srp.firewall_enabled_or_nic_failure", stats->mrp->srp->continuous_gather > MAX_NO_CONT_GATHER ? 1 : 0); if (stats->mrp->srp->continuous_gather > MAX_NO_CONT_GATHER || stats->mrp->srp->continuous_sendmsg_failures > MAX_NO_CONT_SENDMSG_FAILURES) { log_printf (LOGSYS_LEVEL_WARNING, "Totem is unable to form a cluster because of an " "operating system or network fault. The most common " "cause of this message is that the local firewall is " "configured improperly."); icmap_set_uint8("runtime.totem.pg.mrp.srp.firewall_enabled_or_nic_failure", 1); } else { icmap_set_uint8("runtime.totem.pg.mrp.srp.firewall_enabled_or_nic_failure", 0); } for (i = 0; i < stats->mrp->srp->rrp->interface_count; i++) { snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "runtime.totem.pg.mrp.rrp.%u.faulty", i); icmap_set_uint8(key_name, stats->mrp->srp->rrp->faulty[i]); } total_mtt_rx_token = 0; total_token_holdtime = 0; total_backlog_calc = 0; token_count = 0; t = stats->mrp->srp->latest_token; while (1) { if (t == 0) prev = TOTEM_TOKEN_STATS_MAX - 1; else prev = t - 1; if (prev == stats->mrp->srp->earliest_token) break; /* if tx == 0, then dropped token (not ours) */ if (stats->mrp->srp->token[t].tx != 0 || (stats->mrp->srp->token[t].rx - stats->mrp->srp->token[prev].rx) > 0 ) { total_mtt_rx_token += (stats->mrp->srp->token[t].rx - stats->mrp->srp->token[prev].rx); total_token_holdtime += (stats->mrp->srp->token[t].tx - stats->mrp->srp->token[t].rx); total_backlog_calc += stats->mrp->srp->token[t].backlog_calc; token_count++; } t = prev; } if (token_count) { icmap_set_uint32("runtime.totem.pg.mrp.srp.mtt_rx_token", (total_mtt_rx_token / token_count)); icmap_set_uint32("runtime.totem.pg.mrp.srp.avg_token_workload", (total_token_holdtime / token_count)); icmap_set_uint32("runtime.totem.pg.mrp.srp.avg_backlog_calc", (total_backlog_calc / token_count)); } cs_ipcs_stats_update(); api->timer_add_duration (1500 * MILLI_2_NANO_SECONDS, NULL, corosync_totem_stats_updater, &corosync_stats_timer_handle); } static void totem_dynamic_notify( int32_t event, const char *key_name, struct icmap_notify_value new_val, struct icmap_notify_value old_val, void *user_data) { int res; int ring_no; int member_no; struct totem_ip_address member; int add_new_member = 0; int remove_old_member = 0; char tmp_str[ICMAP_KEYNAME_MAXLEN]; res = sscanf(key_name, "nodelist.node.%u.ring%u%s", &member_no, &ring_no, tmp_str); if (res != 3) return ; if (strcmp(tmp_str, "_addr") != 0) { return; } if (event == ICMAP_TRACK_ADD && new_val.type == ICMAP_VALUETYPE_STRING) { add_new_member = 1; } if (event == ICMAP_TRACK_DELETE && old_val.type == ICMAP_VALUETYPE_STRING) { remove_old_member = 1; } if (event == ICMAP_TRACK_MODIFY && new_val.type == ICMAP_VALUETYPE_STRING && old_val.type == ICMAP_VALUETYPE_STRING) { add_new_member = 1; remove_old_member = 1; } if (remove_old_member) { log_printf(LOGSYS_LEVEL_DEBUG, "removing dynamic member %s for ring %u", (char *)old_val.data, ring_no); if (totemip_parse(&member, (char *)old_val.data, 0) == 0) { totempg_member_remove (&member, ring_no); } } if (add_new_member) { log_printf(LOGSYS_LEVEL_DEBUG, "adding dynamic member %s for ring %u", (char *)new_val.data, ring_no); if (totemip_parse(&member, (char *)new_val.data, 0) == 0) { totempg_member_add (&member, ring_no); } } } static void corosync_totem_dynamic_init (void) { icmap_track_t icmap_track = NULL; icmap_track_add("nodelist.node.", ICMAP_TRACK_ADD | ICMAP_TRACK_DELETE | ICMAP_TRACK_MODIFY | ICMAP_TRACK_PREFIX, totem_dynamic_notify, NULL, &icmap_track); } static void corosync_totem_stats_init (void) { icmap_set_uint32("runtime.totem.pg.mrp.srp.mtt_rx_token", 0); icmap_set_uint32("runtime.totem.pg.mrp.srp.avg_token_workload", 0); icmap_set_uint32("runtime.totem.pg.mrp.srp.avg_backlog_calc", 0); /* start stats timer */ api->timer_add_duration (1500 * MILLI_2_NANO_SECONDS, NULL, corosync_totem_stats_updater, &corosync_stats_timer_handle); } static void deliver_fn ( unsigned int nodeid, const void *msg, unsigned int msg_len, int endian_conversion_required) { const struct qb_ipc_request_header *header; int32_t service; int32_t fn_id; uint32_t id; header = msg; if (endian_conversion_required) { id = swab32 (header->id); } else { id = header->id; } /* * Call the proper executive handler */ service = id >> 16; fn_id = id & 0xffff; if (!corosync_service[service]) { return; } if (fn_id >= corosync_service[service]->exec_engine_count) { log_printf(LOGSYS_LEVEL_WARNING, "discarded unknown message %d for service %d (max id %d)", fn_id, service, corosync_service[service]->exec_engine_count); return; } icmap_fast_inc(service_stats_rx[service][fn_id]); if (endian_conversion_required) { assert(corosync_service[service]->exec_engine[fn_id].exec_endian_convert_fn != NULL); corosync_service[service]->exec_engine[fn_id].exec_endian_convert_fn ((void *)msg); } corosync_service[service]->exec_engine[fn_id].exec_handler_fn (msg, nodeid); } int main_mcast ( const struct iovec *iovec, unsigned int iov_len, unsigned int guarantee) { const struct qb_ipc_request_header *req = iovec->iov_base; int32_t service; int32_t fn_id; service = req->id >> 16; fn_id = req->id & 0xffff; if (corosync_service[service]) { icmap_fast_inc(service_stats_tx[service][fn_id]); } return (totempg_groups_mcast_joined (corosync_group_handle, iovec, iov_len, guarantee)); } static qb_loop_timer_handle recheck_the_q_level_timer; void corosync_recheck_the_q_level(void *data) { totempg_check_q_level(corosync_group_handle); if (cs_ipcs_q_level_get() == TOTEM_Q_LEVEL_CRITICAL) { qb_loop_timer_add(cs_poll_handle_get(), QB_LOOP_MED, 1*QB_TIME_NS_IN_MSEC, NULL, corosync_recheck_the_q_level, &recheck_the_q_level_timer); } } struct sending_allowed_private_data_struct { int reserved_msgs; }; int corosync_sending_allowed ( unsigned int service, unsigned int id, const void *msg, void *sending_allowed_private_data) { struct sending_allowed_private_data_struct *pd = (struct sending_allowed_private_data_struct *)sending_allowed_private_data; struct iovec reserve_iovec; struct qb_ipc_request_header *header = (struct qb_ipc_request_header *)msg; int sending_allowed; reserve_iovec.iov_base = (char *)header; reserve_iovec.iov_len = header->size; pd->reserved_msgs = totempg_groups_joined_reserve ( corosync_group_handle, &reserve_iovec, 1); if (pd->reserved_msgs == -1) { return -EINVAL; } sending_allowed = QB_FALSE; if (corosync_quorum_is_quorate() == 1 || corosync_service[service]->allow_inquorate == CS_LIB_ALLOW_INQUORATE) { // we are quorate // now check flow control if (corosync_service[service]->lib_engine[id].flow_control == CS_LIB_FLOW_CONTROL_NOT_REQUIRED) { sending_allowed = QB_TRUE; } else if (pd->reserved_msgs && sync_in_process == 0) { sending_allowed = QB_TRUE; } else if (pd->reserved_msgs == 0) { return -ENOBUFS; } else /* (sync_in_process) */ { return -EINPROGRESS; } } else { return -EHOSTUNREACH; } return (sending_allowed); } void corosync_sending_allowed_release (void *sending_allowed_private_data) { struct sending_allowed_private_data_struct *pd = (struct sending_allowed_private_data_struct *)sending_allowed_private_data; if (pd->reserved_msgs == -1) { return; } totempg_groups_joined_release (pd->reserved_msgs); } int message_source_is_local (const mar_message_source_t *source) { int ret = 0; assert (source != NULL); if (source->nodeid == totempg_my_nodeid_get ()) { ret = 1; } return ret; } void message_source_set ( mar_message_source_t *source, void *conn) { assert ((source != NULL) && (conn != NULL)); memset (source, 0, sizeof (mar_message_source_t)); source->nodeid = totempg_my_nodeid_get (); source->conn = conn; } static void corosync_setscheduler (void) { #if defined(HAVE_PTHREAD_SETSCHEDPARAM) && defined(HAVE_SCHED_GET_PRIORITY_MAX) && defined(HAVE_SCHED_SETSCHEDULER) int res; sched_priority = sched_get_priority_max (SCHED_RR); if (sched_priority != -1) { global_sched_param.sched_priority = sched_priority; res = sched_setscheduler (0, SCHED_RR, &global_sched_param); if (res == -1) { LOGSYS_PERROR(errno, LOGSYS_LEVEL_WARNING, "Could not set SCHED_RR at priority %d", global_sched_param.sched_priority); global_sched_param.sched_priority = 0; #ifdef HAVE_QB_LOG_THREAD_PRIORITY_SET qb_log_thread_priority_set (SCHED_OTHER, 0); #endif } else { /* * Turn on SCHED_RR in logsys system */ #ifdef HAVE_QB_LOG_THREAD_PRIORITY_SET res = qb_log_thread_priority_set (SCHED_RR, sched_priority); #else res = -1; #endif if (res == -1) { log_printf (LOGSYS_LEVEL_ERROR, "Could not set logsys thread priority." " Can't continue because of priority inversions."); corosync_exit_error (COROSYNC_DONE_LOGSETUP); } } } else { LOGSYS_PERROR (errno, LOGSYS_LEVEL_WARNING, "Could not get maximum scheduler priority"); sched_priority = 0; } #else log_printf(LOGSYS_LEVEL_WARNING, "The Platform is missing process priority setting features. Leaving at default."); #endif } static void _logsys_log_printf(int level, int subsys, const char *function_name, const char *file_name, int file_line, const char *format, ...) __attribute__((format(printf, 6, 7))); static void _logsys_log_printf(int level, int subsys, const char *function_name, const char *file_name, int file_line, const char *format, ...) { va_list ap; va_start(ap, format); qb_log_from_external_source_va(function_name, file_name, format, level, file_line, subsys, ap); va_end(ap); } static void fplay_key_change_notify_fn ( int32_t event, const char *key_name, struct icmap_notify_value new_val, struct icmap_notify_value old_val, void *user_data) { if (strcmp(key_name, "runtime.blackbox.dump_flight_data") == 0) { fprintf(stderr,"Writetofile\n"); corosync_blackbox_write_to_file (); } if (strcmp(key_name, "runtime.blackbox.dump_state") == 0) { fprintf(stderr,"statefump\n"); corosync_state_dump (); } } static void corosync_fplay_control_init (void) { icmap_track_t track = NULL; icmap_set_string("runtime.blackbox.dump_flight_data", "no"); icmap_set_string("runtime.blackbox.dump_state", "no"); icmap_track_add("runtime.blackbox.dump_flight_data", ICMAP_TRACK_ADD | ICMAP_TRACK_DELETE | ICMAP_TRACK_MODIFY, fplay_key_change_notify_fn, NULL, &track); icmap_track_add("runtime.blackbox.dump_state", ICMAP_TRACK_ADD | ICMAP_TRACK_DELETE | ICMAP_TRACK_MODIFY, fplay_key_change_notify_fn, NULL, &track); } /* * Set RO flag for keys, which ether doesn't make sense to change by user (statistic) * or which when changed are not reflected by runtime (totem.crypto_cipher, ...). * * Also some RO keys cannot be determined in this stage, so they are set later in * other functions (like nodelist.local_node_pos, ...) */ static void set_icmap_ro_keys_flag (void) { /* * Set RO flag for all keys of internal configuration and runtime statistics */ icmap_set_ro_access("internal_configuration.", CS_TRUE, CS_TRUE); icmap_set_ro_access("runtime.connections.", CS_TRUE, CS_TRUE); icmap_set_ro_access("runtime.totem.", CS_TRUE, CS_TRUE); icmap_set_ro_access("runtime.services.", CS_TRUE, CS_TRUE); /* * Set RO flag for constrete keys of configuration which can't be changed * during runtime */ icmap_set_ro_access("totem.crypto_cipher", CS_FALSE, CS_TRUE); icmap_set_ro_access("totem.crypto_hash", CS_FALSE, CS_TRUE); + icmap_set_ro_access("totem.crypto_compat", CS_FALSE, CS_TRUE); icmap_set_ro_access("totem.secauth", CS_FALSE, CS_TRUE); icmap_set_ro_access("totem.rrp_mode", CS_FALSE, CS_TRUE); icmap_set_ro_access("totem.netmtu", CS_FALSE, CS_TRUE); icmap_set_ro_access("qb.ipc_type", CS_FALSE, CS_TRUE); } static void main_service_ready (void) { int res; /* * This must occur after totempg is initialized because "this_ip" must be set */ res = corosync_service_defaults_link_and_init (api); if (res == -1) { log_printf (LOGSYS_LEVEL_ERROR, "Could not initialize default services"); corosync_exit_error (COROSYNC_DONE_INIT_SERVICES); } cs_ipcs_init(); corosync_totem_stats_init (); corosync_fplay_control_init (); corosync_totem_dynamic_init (); sync_init ( corosync_sync_callbacks_retrieve, corosync_sync_completed); } static enum e_corosync_done corosync_flock (const char *lockfile, pid_t pid) { struct flock lock; enum e_corosync_done err; char pid_s[17]; int fd_flag; int lf; err = COROSYNC_DONE_EXIT; lf = open (lockfile, O_WRONLY | O_CREAT, 0640); if (lf == -1) { log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't create lock file."); return (COROSYNC_DONE_AQUIRE_LOCK); } retry_fcntl: lock.l_type = F_WRLCK; lock.l_start = 0; lock.l_whence = SEEK_SET; lock.l_len = 0; if (fcntl (lf, F_SETLK, &lock) == -1) { switch (errno) { case EINTR: goto retry_fcntl; break; case EAGAIN: case EACCES: log_printf (LOGSYS_LEVEL_ERROR, "Another Corosync instance is already running."); err = COROSYNC_DONE_ALREADY_RUNNING; goto error_close; break; default: log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't aquire lock. Error was %s", strerror(errno)); err = COROSYNC_DONE_AQUIRE_LOCK; goto error_close; break; } } if (ftruncate (lf, 0) == -1) { log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't truncate lock file. Error was %s", strerror (errno)); err = COROSYNC_DONE_AQUIRE_LOCK; goto error_close_unlink; } memset (pid_s, 0, sizeof (pid_s)); snprintf (pid_s, sizeof (pid_s) - 1, "%u\n", pid); retry_write: if (write (lf, pid_s, strlen (pid_s)) != strlen (pid_s)) { if (errno == EINTR) { goto retry_write; } else { log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't write pid to lock file. " "Error was %s", strerror (errno)); err = COROSYNC_DONE_AQUIRE_LOCK; goto error_close_unlink; } } if ((fd_flag = fcntl (lf, F_GETFD, 0)) == -1) { log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't get close-on-exec flag from lock file. " "Error was %s", strerror (errno)); err = COROSYNC_DONE_AQUIRE_LOCK; goto error_close_unlink; } fd_flag |= FD_CLOEXEC; if (fcntl (lf, F_SETFD, fd_flag) == -1) { log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't set close-on-exec flag to lock file. " "Error was %s", strerror (errno)); err = COROSYNC_DONE_AQUIRE_LOCK; goto error_close_unlink; } return (err); error_close_unlink: unlink (lockfile); error_close: close (lf); return (err); } int main (int argc, char **argv, char **envp) { const char *error_string; struct totem_config totem_config; int res, ch; int background, setprio; struct stat stat_out; char corosync_lib_dir[PATH_MAX]; enum e_corosync_done flock_err; uint64_t totem_config_warnings; /* default configuration */ background = 1; setprio = 0; while ((ch = getopt (argc, argv, "fprv")) != EOF) { switch (ch) { case 'f': background = 0; logsys_config_mode_set (NULL, LOGSYS_MODE_OUTPUT_STDERR|LOGSYS_MODE_THREADED|LOGSYS_MODE_FORK); break; case 'p': break; case 'r': setprio = 1; break; case 'v': printf ("Corosync Cluster Engine, version '%s'\n", VERSION); printf ("Copyright (c) 2006-2009 Red Hat, Inc.\n"); return EXIT_SUCCESS; break; default: fprintf(stderr, \ "usage:\n"\ " -f : Start application in foreground.\n"\ " -p : Does nothing. \n"\ " -r : Set round robin realtime scheduling \n"\ " -v : Display version and SVN revision of Corosync and exit.\n"); return EXIT_FAILURE; } } /* * Set round robin realtime scheduling with priority 99 * Lock all memory to avoid page faults which may interrupt * application healthchecking */ if (setprio) { corosync_setscheduler (); } corosync_mlockall (); log_printf (LOGSYS_LEVEL_NOTICE, "Corosync Cluster Engine ('%s'): started and ready to provide service.", VERSION); log_printf (LOGSYS_LEVEL_INFO, "Corosync built-in features:" PACKAGE_FEATURES ""); corosync_poll_handle = qb_loop_create (); qb_loop_signal_add(corosync_poll_handle, QB_LOOP_LOW, SIGUSR2, NULL, sig_diag_handler, NULL); qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH, SIGINT, NULL, sig_exit_handler, NULL); qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH, SIGQUIT, NULL, sig_exit_handler, NULL); qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH, SIGTERM, NULL, sig_exit_handler, NULL); (void)signal (SIGSEGV, sigsegv_handler); (void)signal (SIGABRT, sigabrt_handler); #if MSG_NOSIGNAL != 0 (void)signal (SIGPIPE, SIG_IGN); #endif if (icmap_init() != CS_OK) { log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't initialize configuration component."); corosync_exit_error (COROSYNC_DONE_ICMAP); } set_icmap_ro_keys_flag(); /* * Initialize the corosync_api_v1 definition */ api = apidef_get (); res = coroparse_configparse(&error_string); if (res == -1) { log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string); corosync_exit_error (COROSYNC_DONE_MAINCONFIGREAD); } res = corosync_log_config_read (&error_string); if (res == -1) { /* * if we are here, we _must_ flush the logsys queue * and try to inform that we couldn't read the config. * this is a desperate attempt before certain death * and there is no guarantee that we can print to stderr * nor that logsys is sending the messages where we expect. */ log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string); fprintf(stderr, "%s", error_string); syslog (LOGSYS_LEVEL_ERROR, "%s", error_string); corosync_exit_error (COROSYNC_DONE_LOGCONFIGREAD); } /* * Make sure required directory is present */ sprintf (corosync_lib_dir, "%s/lib/corosync", LOCALSTATEDIR); res = stat (corosync_lib_dir, &stat_out); if ((res == -1) || (res == 0 && !S_ISDIR(stat_out.st_mode))) { log_printf (LOGSYS_LEVEL_ERROR, "Required directory not present %s. Please create it.", corosync_lib_dir); corosync_exit_error (COROSYNC_DONE_DIR_NOT_PRESENT); } res = totem_config_read (&totem_config, &error_string, &totem_config_warnings); if (res == -1) { log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string); corosync_exit_error (COROSYNC_DONE_MAINCONFIGREAD); } if (totem_config_warnings & TOTEM_CONFIG_WARNING_MEMBERS_IGNORED) { log_printf (LOGSYS_LEVEL_WARNING, "member section is used together with nodelist. Members ignored."); } if (totem_config_warnings & TOTEM_CONFIG_WARNING_MEMBERS_DEPRECATED) { log_printf (LOGSYS_LEVEL_WARNING, "member section is deprecated."); } if (totem_config_warnings & TOTEM_CONFIG_WARNING_TOTEM_NODEID_IGNORED) { log_printf (LOGSYS_LEVEL_WARNING, "nodeid appears both in totem section and nodelist. Nodelist one is used."); } if (totem_config_warnings != 0) { log_printf (LOGSYS_LEVEL_WARNING, "Please migrate config file to nodelist."); } res = totem_config_keyread (&totem_config, &error_string); if (res == -1) { log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string); corosync_exit_error (COROSYNC_DONE_MAINCONFIGREAD); } res = totem_config_validate (&totem_config, &error_string); if (res == -1) { log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string); corosync_exit_error (COROSYNC_DONE_MAINCONFIGREAD); } totem_config.totem_logging_configuration = totem_logging_configuration; totem_config.totem_logging_configuration.log_subsys_id = _logsys_subsys_create("TOTEM", "totem"); totem_config.totem_logging_configuration.log_level_security = LOGSYS_LEVEL_WARNING; totem_config.totem_logging_configuration.log_level_error = LOGSYS_LEVEL_ERROR; totem_config.totem_logging_configuration.log_level_warning = LOGSYS_LEVEL_WARNING; totem_config.totem_logging_configuration.log_level_notice = LOGSYS_LEVEL_NOTICE; totem_config.totem_logging_configuration.log_level_debug = LOGSYS_LEVEL_DEBUG; totem_config.totem_logging_configuration.log_level_trace = LOGSYS_LEVEL_TRACE; totem_config.totem_logging_configuration.log_printf = _logsys_log_printf; logsys_config_apply(); /* * Now we are fully initialized. */ if (background) { corosync_tty_detach (); } if (logsys_thread_start() != 0) { log_printf (LOGSYS_LEVEL_ERROR, "Can't initialize log thread"); corosync_exit_error (COROSYNC_DONE_LOGCONFIGREAD); } if ((flock_err = corosync_flock (corosync_lock_file, getpid ())) != COROSYNC_DONE_EXIT) { corosync_exit_error (flock_err); } /* * if totempg_initialize doesn't have root priveleges, it cannot * bind to a specific interface. This only matters if * there is more then one interface in a system, so * in this case, only a warning is printed */ /* * Join multicast group and setup delivery * and configuration change functions */ totempg_initialize ( corosync_poll_handle, &totem_config); totempg_service_ready_register ( main_service_ready); totempg_groups_initialize ( &corosync_group_handle, deliver_fn, confchg_fn); totempg_groups_join ( corosync_group_handle, &corosync_group, 1); /* * Drop root privleges to user 'corosync' * TODO: Don't really need full root capabilities; * needed capabilities are: * CAP_NET_RAW (bindtodevice) * CAP_SYS_NICE (setscheduler) * CAP_IPC_LOCK (mlockall) */ priv_drop (); schedwrk_init ( serialize_lock, serialize_unlock); /* * Start main processing loop */ qb_loop_run (corosync_poll_handle); /* * Exit was requested */ totempg_finalize (); /* * free the loop resources */ qb_loop_destroy (corosync_poll_handle); /* * free up the icmap */ /* * Remove pid lock file */ unlink (corosync_lock_file); corosync_exit_error (COROSYNC_DONE_EXIT); return EXIT_SUCCESS; } diff --git a/exec/totemconfig.c b/exec/totemconfig.c index 17d8e03b..e1badad5 100644 --- a/exec/totemconfig.c +++ b/exec/totemconfig.c @@ -1,1089 +1,1103 @@ /* * Copyright (c) 2002-2005 MontaVista Software, Inc. * Copyright (c) 2006-2012 Red Hat, Inc. * * All rights reserved. * * Author: Steven Dake (sdake@redhat.com) * Jan Friesse (jfriesse@redhat.com) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "util.h" #include "totemconfig.h" #define TOKEN_RETRANSMITS_BEFORE_LOSS_CONST 4 #define TOKEN_TIMEOUT 1000 #define JOIN_TIMEOUT 50 #define MERGE_TIMEOUT 200 #define DOWNCHECK_TIMEOUT 1000 #define FAIL_TO_RECV_CONST 2500 #define SEQNO_UNCHANGED_CONST 30 #define MINIMUM_TIMEOUT (int)(1000/HZ)*3 #define MAX_NETWORK_DELAY 50 #define WINDOW_SIZE 50 #define MAX_MESSAGES 17 #define MISS_COUNT_CONST 5 #define RRP_PROBLEM_COUNT_TIMEOUT 2000 #define RRP_PROBLEM_COUNT_THRESHOLD_DEFAULT 10 #define RRP_PROBLEM_COUNT_THRESHOLD_MIN 2 #define RRP_AUTORECOVERY_CHECK_TIMEOUT 1000 #define DEFAULT_PORT 5405 static char error_string_response[512]; static void add_totem_config_notification(struct totem_config *totem_config); static void totem_volatile_config_read (struct totem_config *totem_config) { char *str; icmap_get_uint32("totem.token", &totem_config->token_timeout); icmap_get_uint32("totem.token_retransmit", &totem_config->token_retransmit_timeout); icmap_get_uint32("totem.hold", &totem_config->token_hold_timeout); icmap_get_uint32("totem.token_retransmits_before_loss_const", &totem_config->token_retransmits_before_loss_const); icmap_get_uint32("totem.join", &totem_config->join_timeout); icmap_get_uint32("totem.send_join", &totem_config->send_join_timeout); icmap_get_uint32("totem.consensus", &totem_config->consensus_timeout); icmap_get_uint32("totem.merge", &totem_config->merge_timeout); icmap_get_uint32("totem.downcheck", &totem_config->downcheck_timeout); icmap_get_uint32("totem.fail_recv_const", &totem_config->fail_to_recv_const); icmap_get_uint32("totem.seqno_unchanged_const", &totem_config->seqno_unchanged_const); icmap_get_uint32("totem.rrp_token_expired_timeout", &totem_config->rrp_token_expired_timeout); icmap_get_uint32("totem.rrp_problem_count_timeout", &totem_config->rrp_problem_count_timeout); icmap_get_uint32("totem.rrp_problem_count_threshold", &totem_config->rrp_problem_count_threshold); icmap_get_uint32("totem.rrp_problem_count_mcast_threshold", &totem_config->rrp_problem_count_mcast_threshold); icmap_get_uint32("totem.rrp_autorecovery_check_timeout", &totem_config->rrp_autorecovery_check_timeout); icmap_get_uint32("totem.heartbeat_failures_allowed", &totem_config->heartbeat_failures_allowed); icmap_get_uint32("totem.max_network_delay", &totem_config->max_network_delay); icmap_get_uint32("totem.window_size", &totem_config->window_size); icmap_get_uint32("totem.max_messages", &totem_config->max_messages); icmap_get_uint32("totem.miss_count_const", &totem_config->miss_count_const); if (icmap_get_string("totem.vsftype", &str) == CS_OK) { totem_config->vsf_type = str; } } static void totem_get_crypto(struct totem_config *totem_config) { char *str; const char *tmp_cipher; const char *tmp_hash; + const char *tmp_compat; tmp_hash = "sha1"; tmp_cipher = "aes256"; + tmp_compat = "2.2"; if (icmap_get_string("totem.secauth", &str) == CS_OK) { if (strcmp (str, "off") == 0) { tmp_hash = "none"; tmp_cipher = "none"; } free(str); } if (icmap_get_string("totem.crypto_cipher", &str) == CS_OK) { if (strcmp(str, "none") == 0) { tmp_cipher = "none"; } if (strcmp(str, "aes256") == 0) { tmp_cipher = "aes256"; } if (strcmp(str, "aes192") == 0) { tmp_cipher = "aes192"; } if (strcmp(str, "aes128") == 0) { tmp_cipher = "aes128"; } if (strcmp(str, "3des") == 0) { tmp_cipher = "3des"; } free(str); } if (icmap_get_string("totem.crypto_hash", &str) == CS_OK) { if (strcmp(str, "none") == 0) { tmp_hash = "none"; } if (strcmp(str, "md5") == 0) { tmp_hash = "md5"; } if (strcmp(str, "sha1") == 0) { tmp_hash = "sha1"; } if (strcmp(str, "sha256") == 0) { tmp_hash = "sha256"; } if (strcmp(str, "sha384") == 0) { tmp_hash = "sha384"; } if (strcmp(str, "sha512") == 0) { tmp_hash = "sha512"; } free(str); } + if (icmap_get_string("totem.crypto_compat", &str) == CS_OK) { + if (strcmp(str, "2.0") == 0) { + tmp_compat = "2.0"; + } + if (strcmp(str, "2.2") == 0) { + tmp_compat = "2.2"; + } + free(str); + } + free(totem_config->crypto_cipher_type); free(totem_config->crypto_hash_type); + free(totem_config->crypto_compat_type); totem_config->crypto_cipher_type = strdup(tmp_cipher); totem_config->crypto_hash_type = strdup(tmp_hash); + totem_config->crypto_compat_type = strdup(tmp_compat); } static uint16_t generate_cluster_id (const char *cluster_name) { int i; int value = 0; for (i = 0; i < strlen(cluster_name); i++) { value <<= 1; value += cluster_name[i]; } return (value & 0xFFFF); } static int get_cluster_mcast_addr ( const char *cluster_name, const struct totem_ip_address *bindnet, unsigned int ringnumber, struct totem_ip_address *res) { uint16_t clusterid; char addr[INET6_ADDRSTRLEN + 1]; int err; if (cluster_name == NULL) { return (-1); } clusterid = generate_cluster_id(cluster_name) + ringnumber; memset (res, 0, sizeof(res)); switch (bindnet->family) { case AF_INET: snprintf(addr, sizeof(addr), "239.192.%d.%d", clusterid >> 8, clusterid % 0xFF); break; case AF_INET6: snprintf(addr, sizeof(addr), "ff15::%x", clusterid); break; default: /* * Unknown family */ return (-1); } err = totemip_parse (res, addr, 0); return (err); } static int find_local_node_in_nodelist(struct totem_config *totem_config) { icmap_iter_t iter; const char *iter_key; int res = 0; int node_pos; int local_node_pos = -1; struct totem_ip_address bind_addr; int interface_up, interface_num; char tmp_key[ICMAP_KEYNAME_MAXLEN]; char *node_addr_str; struct totem_ip_address node_addr; res = totemip_iface_check(&totem_config->interfaces[0].bindnet, &bind_addr, &interface_up, &interface_num, totem_config->clear_node_high_bit); if (res == -1) { return (-1); } iter = icmap_iter_init("nodelist.node."); while ((iter_key = icmap_iter_next(iter, NULL, NULL)) != NULL) { res = sscanf(iter_key, "nodelist.node.%u.%s", &node_pos, tmp_key); if (res != 2) { continue; } if (strcmp(tmp_key, "ring0_addr") != 0) { continue; } snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.ring0_addr", node_pos); if (icmap_get_string(tmp_key, &node_addr_str) != CS_OK) { continue; } res = totemip_parse (&node_addr, node_addr_str, 0); free(node_addr_str); if (res == -1) { continue ; } if (totemip_equal(&bind_addr, &node_addr)) { local_node_pos = node_pos; } } icmap_iter_finalize(iter); return (local_node_pos); } static void put_nodelist_members_to_config(struct totem_config *totem_config) { icmap_iter_t iter, iter2; const char *iter_key, *iter_key2; int res = 0; int node_pos; char tmp_key[ICMAP_KEYNAME_MAXLEN]; char tmp_key2[ICMAP_KEYNAME_MAXLEN]; char *node_addr_str; int member_count; unsigned int ringnumber = 0; iter = icmap_iter_init("nodelist.node."); while ((iter_key = icmap_iter_next(iter, NULL, NULL)) != NULL) { res = sscanf(iter_key, "nodelist.node.%u.%s", &node_pos, tmp_key); if (res != 2) { continue; } if (strcmp(tmp_key, "ring0_addr") != 0) { continue; } snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.", node_pos); iter2 = icmap_iter_init(tmp_key); while ((iter_key2 = icmap_iter_next(iter2, NULL, NULL)) != NULL) { res = sscanf(iter_key2, "nodelist.node.%u.ring%u%s", &node_pos, &ringnumber, tmp_key2); if (res != 3 || strcmp(tmp_key2, "_addr") != 0) { continue; } if (icmap_get_string(iter_key2, &node_addr_str) != CS_OK) { continue; } member_count = totem_config->interfaces[ringnumber].member_count; res = totemip_parse(&totem_config->interfaces[ringnumber].member_list[member_count], node_addr_str, 0); if (res != -1) { totem_config->interfaces[ringnumber].member_count++; } free(node_addr_str); } icmap_iter_finalize(iter2); } icmap_iter_finalize(iter); } static void config_convert_nodelist_to_interface(struct totem_config *totem_config) { icmap_iter_t iter; const char *iter_key; int res = 0; int node_pos; char tmp_key[ICMAP_KEYNAME_MAXLEN]; char tmp_key2[ICMAP_KEYNAME_MAXLEN]; char *node_addr_str; unsigned int ringnumber = 0; struct list_head addrs; struct list_head *list; struct totem_ip_if_address *if_addr; struct totem_ip_address node_addr; int node_found; if (totemip_getifaddrs(&addrs) == -1) { return ; } iter = icmap_iter_init("nodelist.node."); while ((iter_key = icmap_iter_next(iter, NULL, NULL)) != NULL) { res = sscanf(iter_key, "nodelist.node.%u.%s", &node_pos, tmp_key); if (res != 2) { continue; } if (strcmp(tmp_key, "ring0_addr") != 0) { continue; } if (icmap_get_string(iter_key, &node_addr_str) != CS_OK) { continue ; } if (totemip_parse(&node_addr, node_addr_str, 0) == -1) { free(node_addr_str); continue ; } free(node_addr_str); /* * Try to find node in if_addrs */ node_found = 0; for (list = addrs.next; list != &addrs; list = list->next) { if_addr = list_entry(list, struct totem_ip_if_address, list); if (totemip_equal(&node_addr, &if_addr->ip_addr)) { node_found = 1; break; } } if (node_found) { break ; } } icmap_iter_finalize(iter); if (node_found) { /* * We found node, so create interface section */ snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.", node_pos); iter = icmap_iter_init(tmp_key); while ((iter_key = icmap_iter_next(iter, NULL, NULL)) != NULL) { res = sscanf(iter_key, "nodelist.node.%u.ring%u%s", &node_pos, &ringnumber, tmp_key2); if (res != 3 || strcmp(tmp_key2, "_addr") != 0) { continue ; } if (icmap_get_string(iter_key, &node_addr_str) != CS_OK) { continue; } snprintf(tmp_key2, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.bindnetaddr", ringnumber); icmap_set_string(tmp_key2, node_addr_str); free(node_addr_str); } icmap_iter_finalize(iter); } } extern int totem_config_read ( struct totem_config *totem_config, const char **error_string, uint64_t *warnings) { int res = 0; char *str; unsigned int ringnumber = 0; int member_count = 0; icmap_iter_t iter, member_iter; const char *iter_key; const char *member_iter_key; char ringnumber_key[ICMAP_KEYNAME_MAXLEN]; char tmp_key[ICMAP_KEYNAME_MAXLEN]; uint8_t u8; uint16_t u16; char *cluster_name = NULL; int i; int local_node_pos; int nodeid_set; *warnings = 0; memset (totem_config, 0, sizeof (struct totem_config)); totem_config->interfaces = malloc (sizeof (struct totem_interface) * INTERFACE_MAX); if (totem_config->interfaces == 0) { *error_string = "Out of memory trying to allocate ethernet interface storage area"; return -1; } memset (totem_config->interfaces, 0, sizeof (struct totem_interface) * INTERFACE_MAX); strcpy (totem_config->rrp_mode, "none"); icmap_get_uint32("totem.version", (uint32_t *)&totem_config->version); totem_get_crypto(totem_config); if (icmap_get_string("totem.rrp_mode", &str) == CS_OK) { strcpy (totem_config->rrp_mode, str); free(str); } icmap_get_uint32("totem.nodeid", &totem_config->node_id); totem_config->clear_node_high_bit = 0; if (icmap_get_string("totem.clear_node_high_bit", &str) == CS_OK) { if (strcmp (str, "yes") == 0) { totem_config->clear_node_high_bit = 1; } free(str); } icmap_get_uint32("totem.threads", &totem_config->threads); icmap_get_uint32("totem.netmtu", &totem_config->net_mtu); icmap_get_string("totem.cluster_name", &cluster_name); /* * Get things that might change in the future */ totem_volatile_config_read(totem_config); if (icmap_get_string("totem.interface.0.bindnetaddr", &str) != CS_OK) { /* * We were not able to find ring 0 bindnet addr. Try to use nodelist informations */ config_convert_nodelist_to_interface(totem_config); } else { free(str); } iter = icmap_iter_init("totem.interface."); while ((iter_key = icmap_iter_next(iter, NULL, NULL)) != NULL) { res = sscanf(iter_key, "totem.interface.%[^.].%s", ringnumber_key, tmp_key); if (res != 2) { continue; } if (strcmp(tmp_key, "bindnetaddr") != 0) { continue; } member_count = 0; ringnumber = atoi(ringnumber_key); if (ringnumber >= INTERFACE_MAX) { snprintf (error_string_response, sizeof(error_string_response), "parse error in config: interface ring number %u is bigger then allowed maximum %u\n", ringnumber, INTERFACE_MAX - 1); *error_string = error_string_response; return -1; } /* * Get the bind net address */ if (icmap_get_string(iter_key, &str) == CS_OK) { res = totemip_parse (&totem_config->interfaces[ringnumber].bindnet, str, totem_config->interfaces[ringnumber].mcast_addr.family); free(str); } /* * Get interface multicast address */ snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.mcastaddr", ringnumber); if (icmap_get_string(tmp_key, &str) == CS_OK) { res = totemip_parse (&totem_config->interfaces[ringnumber].mcast_addr, str, 0); free(str); } else { /* * User not specified address -> autogenerate one from cluster_name key * (if available) */ res = get_cluster_mcast_addr (cluster_name, &totem_config->interfaces[ringnumber].bindnet, ringnumber, &totem_config->interfaces[ringnumber].mcast_addr); } totem_config->broadcast_use = 0; snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.broadcast", ringnumber); if (icmap_get_string(tmp_key, &str) == CS_OK) { if (strcmp (str, "yes") == 0) { totem_config->broadcast_use = 1; totemip_parse ( &totem_config->interfaces[ringnumber].mcast_addr, "255.255.255.255", 0); } free(str); } /* * Get mcast port */ snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.mcastport", ringnumber); if (icmap_get_uint16(tmp_key, &totem_config->interfaces[ringnumber].ip_port) != CS_OK) { if (totem_config->broadcast_use) { totem_config->interfaces[ringnumber].ip_port = DEFAULT_PORT + (2 * ringnumber); } else { totem_config->interfaces[ringnumber].ip_port = DEFAULT_PORT; } } /* * Get the TTL */ totem_config->interfaces[ringnumber].ttl = 1; snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.ttl", ringnumber); if (icmap_get_uint8(tmp_key, &u8) == CS_OK) { totem_config->interfaces[ringnumber].ttl = u8; } snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.member.", ringnumber); member_iter = icmap_iter_init(tmp_key); while ((member_iter_key = icmap_iter_next(member_iter, NULL, NULL)) != NULL) { if (member_count == 0) { if (icmap_get_string("nodelist.node.0.ring0_addr", &str) == CS_OK) { free(str); *warnings |= TOTEM_CONFIG_WARNING_MEMBERS_IGNORED; break; } else { *warnings |= TOTEM_CONFIG_WARNING_MEMBERS_DEPRECATED; } } if (icmap_get_string(member_iter_key, &str) == CS_OK) { res = totemip_parse (&totem_config->interfaces[ringnumber].member_list[member_count++], str, 0); } } icmap_iter_finalize(member_iter); totem_config->interfaces[ringnumber].member_count = member_count; totem_config->interface_count++; } icmap_iter_finalize(iter); /* * Store automatically generated items back to icmap */ for (i = 0; i < totem_config->interface_count; i++) { snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.mcastaddr", i); if (icmap_get_string(tmp_key, &str) == CS_OK) { free(str); } else { str = (char *)totemip_print(&totem_config->interfaces[i].mcast_addr); icmap_set_string(tmp_key, str); } snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.mcastport", i); if (icmap_get_uint16(tmp_key, &u16) != CS_OK) { icmap_set_uint16(tmp_key, totem_config->interfaces[i].ip_port); } } totem_config->transport_number = TOTEM_TRANSPORT_UDP; if (icmap_get_string("totem.transport", &str) == CS_OK) { if (strcmp (str, "udpu") == 0) { totem_config->transport_number = TOTEM_TRANSPORT_UDPU; } if (strcmp (str, "iba") == 0) { totem_config->transport_number = TOTEM_TRANSPORT_RDMA; } free(str); } free(cluster_name); /* * Check existence of nodelist */ if (icmap_get_string("nodelist.node.0.ring0_addr", &str) == CS_OK) { free(str); /* * find local node */ local_node_pos = find_local_node_in_nodelist(totem_config); if (local_node_pos != -1) { icmap_set_uint32("nodelist.local_node_pos", local_node_pos); snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.nodeid", local_node_pos); nodeid_set = (totem_config->node_id != 0); if (icmap_get_uint32(tmp_key, &totem_config->node_id) == CS_OK && nodeid_set) { *warnings |= TOTEM_CONFIG_WARNING_TOTEM_NODEID_IGNORED; } /* * Make localnode ring0_addr read only, so we can be sure that local * node never changes. If rebinding to other IP would be in future * supported, this must be changed and handled properly! */ snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.ring0_addr", local_node_pos); icmap_set_ro_access(tmp_key, 0, 1); icmap_set_ro_access("nodelist.local_node_pos", 0, 1); } put_nodelist_members_to_config(totem_config); } add_totem_config_notification(totem_config); return 0; } int totem_config_validate ( struct totem_config *totem_config, const char **error_string) { static char local_error_reason[512]; char parse_error[512]; const char *error_reason = local_error_reason; int i; unsigned int interface_max = INTERFACE_MAX; if (totem_config->interface_count == 0) { error_reason = "No interfaces defined"; goto parse_error; } for (i = 0; i < totem_config->interface_count; i++) { /* * Some error checking of parsed data to make sure its valid */ struct totem_ip_address null_addr; memset (&null_addr, 0, sizeof (struct totem_ip_address)); if ((totem_config->transport_number == 0) && memcmp (&totem_config->interfaces[i].mcast_addr, &null_addr, sizeof (struct totem_ip_address)) == 0) { error_reason = "No multicast address specified"; goto parse_error; } if (totem_config->interfaces[i].ip_port == 0) { error_reason = "No multicast port specified"; goto parse_error; } if (totem_config->interfaces[i].ttl > 255) { error_reason = "Invalid TTL (should be 0..255)"; goto parse_error; } if (totem_config->transport_number != TOTEM_TRANSPORT_UDP && totem_config->interfaces[i].ttl != 1) { error_reason = "Can only set ttl on multicast transport types"; goto parse_error; } if (totem_config->interfaces[i].mcast_addr.family == AF_INET6 && totem_config->node_id == 0) { error_reason = "An IPV6 network requires that a node ID be specified."; goto parse_error; } if (totem_config->broadcast_use == 0 && totem_config->transport_number == 0) { if (totem_config->interfaces[i].mcast_addr.family != totem_config->interfaces[i].bindnet.family) { error_reason = "Multicast address family does not match bind address family"; goto parse_error; } if (totem_config->interfaces[i].mcast_addr.family != totem_config->interfaces[i].bindnet.family) { error_reason = "Not all bind address belong to the same IP family"; goto parse_error; } if (totemip_is_mcast (&totem_config->interfaces[i].mcast_addr) != 0) { error_reason = "mcastaddr is not a correct multicast address."; goto parse_error; } } } if (totem_config->version != 2) { error_reason = "This totem parser can only parse version 2 configurations."; goto parse_error; } if (totem_config->token_retransmits_before_loss_const == 0) { totem_config->token_retransmits_before_loss_const = TOKEN_RETRANSMITS_BEFORE_LOSS_CONST; } /* * Setup timeout values that are not setup by user */ if (totem_config->token_timeout == 0) { totem_config->token_timeout = TOKEN_TIMEOUT; } if (totem_config->max_network_delay == 0) { totem_config->max_network_delay = MAX_NETWORK_DELAY; } if (totem_config->max_network_delay < MINIMUM_TIMEOUT) { snprintf (local_error_reason, sizeof(local_error_reason), "The max_network_delay parameter (%d ms) may not be less then (%d ms).", totem_config->max_network_delay, MINIMUM_TIMEOUT); goto parse_error; } if (totem_config->window_size == 0) { totem_config->window_size = WINDOW_SIZE; } if (totem_config->max_messages == 0) { totem_config->max_messages = MAX_MESSAGES; } if (totem_config->miss_count_const == 0) { totem_config->miss_count_const = MISS_COUNT_CONST; } if (totem_config->token_timeout < MINIMUM_TIMEOUT) { snprintf (local_error_reason, sizeof(local_error_reason), "The token timeout parameter (%d ms) may not be less then (%d ms).", totem_config->token_timeout, MINIMUM_TIMEOUT); goto parse_error; } if (totem_config->token_retransmit_timeout == 0) { totem_config->token_retransmit_timeout = (int)(totem_config->token_timeout / (totem_config->token_retransmits_before_loss_const + 0.2)); } if (totem_config->token_hold_timeout == 0) { totem_config->token_hold_timeout = (int)(totem_config->token_retransmit_timeout * 0.8 - (1000/HZ)); } if (totem_config->token_retransmit_timeout < MINIMUM_TIMEOUT) { snprintf (local_error_reason, sizeof(local_error_reason), "The token retransmit timeout parameter (%d ms) may not be less then (%d ms).", totem_config->token_retransmit_timeout, MINIMUM_TIMEOUT); goto parse_error; } if (totem_config->token_hold_timeout < MINIMUM_TIMEOUT) { snprintf (local_error_reason, sizeof(local_error_reason), "The token hold timeout parameter (%d ms) may not be less then (%d ms).", totem_config->token_hold_timeout, MINIMUM_TIMEOUT); goto parse_error; } if (totem_config->join_timeout == 0) { totem_config->join_timeout = JOIN_TIMEOUT; } if (totem_config->join_timeout < MINIMUM_TIMEOUT) { snprintf (local_error_reason, sizeof(local_error_reason), "The join timeout parameter (%d ms) may not be less then (%d ms).", totem_config->join_timeout, MINIMUM_TIMEOUT); goto parse_error; } if (totem_config->consensus_timeout == 0) { totem_config->consensus_timeout = (int)(float)(1.2 * totem_config->token_timeout); } if (totem_config->consensus_timeout < MINIMUM_TIMEOUT) { snprintf (local_error_reason, sizeof(local_error_reason), "The consensus timeout parameter (%d ms) may not be less then (%d ms).", totem_config->consensus_timeout, MINIMUM_TIMEOUT); goto parse_error; } if (totem_config->merge_timeout == 0) { totem_config->merge_timeout = MERGE_TIMEOUT; } if (totem_config->merge_timeout < MINIMUM_TIMEOUT) { snprintf (local_error_reason, sizeof(local_error_reason), "The merge timeout parameter (%d ms) may not be less then (%d ms).", totem_config->merge_timeout, MINIMUM_TIMEOUT); goto parse_error; } if (totem_config->downcheck_timeout == 0) { totem_config->downcheck_timeout = DOWNCHECK_TIMEOUT; } if (totem_config->downcheck_timeout < MINIMUM_TIMEOUT) { snprintf (local_error_reason, sizeof(local_error_reason), "The downcheck timeout parameter (%d ms) may not be less then (%d ms).", totem_config->downcheck_timeout, MINIMUM_TIMEOUT); goto parse_error; } /* * RRP values validation */ if (strcmp (totem_config->rrp_mode, "none") && strcmp (totem_config->rrp_mode, "active") && strcmp (totem_config->rrp_mode, "passive")) { snprintf (local_error_reason, sizeof(local_error_reason), "The RRP mode \"%s\" specified is invalid. It must be none, active, or passive.\n", totem_config->rrp_mode); goto parse_error; } if (totem_config->rrp_problem_count_timeout == 0) { totem_config->rrp_problem_count_timeout = RRP_PROBLEM_COUNT_TIMEOUT; } if (totem_config->rrp_problem_count_timeout < MINIMUM_TIMEOUT) { snprintf (local_error_reason, sizeof(local_error_reason), "The RRP problem count timeout parameter (%d ms) may not be less then (%d ms).", totem_config->rrp_problem_count_timeout, MINIMUM_TIMEOUT); goto parse_error; } if (totem_config->rrp_problem_count_threshold == 0) { totem_config->rrp_problem_count_threshold = RRP_PROBLEM_COUNT_THRESHOLD_DEFAULT; } if (totem_config->rrp_problem_count_mcast_threshold == 0) { totem_config->rrp_problem_count_mcast_threshold = totem_config->rrp_problem_count_threshold * 10; } if (totem_config->rrp_problem_count_threshold < RRP_PROBLEM_COUNT_THRESHOLD_MIN) { snprintf (local_error_reason, sizeof(local_error_reason), "The RRP problem count threshold (%d problem count) may not be less then (%d problem count).", totem_config->rrp_problem_count_threshold, RRP_PROBLEM_COUNT_THRESHOLD_MIN); goto parse_error; } if (totem_config->rrp_problem_count_mcast_threshold < RRP_PROBLEM_COUNT_THRESHOLD_MIN) { snprintf (local_error_reason, sizeof(local_error_reason), "The RRP multicast problem count threshold (%d problem count) may not be less then (%d problem count).", totem_config->rrp_problem_count_mcast_threshold, RRP_PROBLEM_COUNT_THRESHOLD_MIN); goto parse_error; } if (totem_config->rrp_token_expired_timeout == 0) { totem_config->rrp_token_expired_timeout = totem_config->token_retransmit_timeout; } if (totem_config->rrp_token_expired_timeout < MINIMUM_TIMEOUT) { snprintf (local_error_reason, sizeof(local_error_reason), "The RRP token expired timeout parameter (%d ms) may not be less then (%d ms).", totem_config->rrp_token_expired_timeout, MINIMUM_TIMEOUT); goto parse_error; } if (totem_config->rrp_autorecovery_check_timeout == 0) { totem_config->rrp_autorecovery_check_timeout = RRP_AUTORECOVERY_CHECK_TIMEOUT; } if (strcmp (totem_config->rrp_mode, "none") == 0) { interface_max = 1; } if (interface_max < totem_config->interface_count) { snprintf (parse_error, sizeof(parse_error), "%d is too many configured interfaces for the rrp_mode setting %s.", totem_config->interface_count, totem_config->rrp_mode); error_reason = parse_error; goto parse_error; } if (totem_config->fail_to_recv_const == 0) { totem_config->fail_to_recv_const = FAIL_TO_RECV_CONST; } if (totem_config->seqno_unchanged_const == 0) { totem_config->seqno_unchanged_const = SEQNO_UNCHANGED_CONST; } if (totem_config->net_mtu == 0) { totem_config->net_mtu = 1500; } if ((MESSAGE_QUEUE_MAX) < totem_config->max_messages) { snprintf (local_error_reason, sizeof(local_error_reason), "The max_messages parameter (%d messages) may not be greater then (%d messages).", totem_config->max_messages, MESSAGE_QUEUE_MAX); goto parse_error; } if (totem_config->threads > SEND_THREADS_MAX) { totem_config->threads = SEND_THREADS_MAX; } if (totem_config->net_mtu > FRAME_SIZE_MAX) { error_reason = "This net_mtu parameter is greater then the maximum frame size"; goto parse_error; } if (totem_config->vsf_type == NULL) { totem_config->vsf_type = "none"; } return (0); parse_error: snprintf (error_string_response, sizeof(error_string_response), "parse error in config: %s\n", error_reason); *error_string = error_string_response; return (-1); } static int read_keyfile ( const char *key_location, struct totem_config *totem_config, const char **error_string) { int fd; int res; ssize_t expected_key_len = sizeof (totem_config->private_key); int saved_errno; char error_str[100]; const char *error_ptr; fd = open (key_location, O_RDONLY); if (fd == -1) { error_ptr = qb_strerror_r(errno, error_str, sizeof(error_str)); snprintf (error_string_response, sizeof(error_string_response), "Could not open %s: %s\n", key_location, error_ptr); goto parse_error; } res = read (fd, totem_config->private_key, expected_key_len); saved_errno = errno; close (fd); if (res == -1) { error_ptr = qb_strerror_r (saved_errno, error_str, sizeof(error_str)); snprintf (error_string_response, sizeof(error_string_response), "Could not read %s: %s\n", key_location, error_ptr); goto parse_error; } totem_config->private_key_len = expected_key_len; if (res != expected_key_len) { snprintf (error_string_response, sizeof(error_string_response), "Could only read %d bits of 1024 bits from %s.\n", res * 8, key_location); goto parse_error; } return 0; parse_error: *error_string = error_string_response; return (-1); } int totem_config_keyread ( struct totem_config *totem_config, const char **error_string) { int got_key = 0; char *key_location = NULL; int res; size_t key_len; memset (totem_config->private_key, 0, 128); totem_config->private_key_len = 128; if (strcmp(totem_config->crypto_cipher_type, "none") == 0 && strcmp(totem_config->crypto_hash_type, "none") == 0) { return (0); } /* cmap may store the location of the key file */ if (icmap_get_string("totem.keyfile", &key_location) == CS_OK) { res = read_keyfile(key_location, totem_config, error_string); free(key_location); if (res) { goto key_error; } got_key = 1; } else { /* Or the key itself may be in the cmap */ if (icmap_get("totem.key", NULL, &key_len, NULL) == CS_OK) { if (key_len > sizeof (totem_config->private_key)) { sprintf(error_string_response, "key is too long"); goto key_error; } if (icmap_get("totem.key", totem_config->private_key, &key_len, NULL) == CS_OK) { totem_config->private_key_len = key_len; got_key = 1; } else { sprintf(error_string_response, "can't store private key"); goto key_error; } } } /* In desperation we read the default filename */ if (!got_key) { const char *filename = getenv("COROSYNC_TOTEM_AUTHKEY_FILE"); if (!filename) filename = COROSYSCONFDIR "/authkey"; res = read_keyfile(filename, totem_config, error_string); if (res) goto key_error; } return (0); key_error: *error_string = error_string_response; return (-1); } static void totem_change_notify( int32_t event, const char *key_name, struct icmap_notify_value new_val, struct icmap_notify_value old_val, void *user_data) { totem_volatile_config_read((struct totem_config *)user_data); } static void add_totem_config_notification(struct totem_config *totem_config) { icmap_track_t icmap_track; icmap_track_add("totem.", ICMAP_TRACK_ADD | ICMAP_TRACK_DELETE | ICMAP_TRACK_MODIFY | ICMAP_TRACK_PREFIX, totem_change_notify, totem_config, &icmap_track); } diff --git a/exec/totemcrypto.c b/exec/totemcrypto.c index e014c50c..44faaaf5 100644 --- a/exec/totemcrypto.c +++ b/exec/totemcrypto.c @@ -1,770 +1,961 @@ /* * Copyright (c) 2006-2012 Red Hat, Inc. * * All rights reserved. * * Author: Steven Dake (sdake@redhat.com) * Christine Caulfield (ccaulfie@redhat.com) * Jan Friesse (jfriesse@redhat.com) * Fabio M. Di Nitto (fdinitto@redhat.com) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include "config.h" #include #include #include #include #include #include #define LOGSYS_UTILS_ONLY 1 #include #include #include "totemcrypto.h" /* * define onwire crypto header */ struct crypto_config_header { uint8_t crypto_cipher_type; uint8_t crypto_hash_type; uint8_t __pad0; uint8_t __pad1; } __attribute__((packed)); /* * crypto definitions and conversion tables */ #define SALT_SIZE 16 /* * This are defined in new NSS. For older one, we will define our own */ #ifndef AES_256_KEY_LENGTH #define AES_256_KEY_LENGTH 32 #endif #ifndef AES_192_KEY_LENGTH #define AES_192_KEY_LENGTH 24 #endif #ifndef AES_128_KEY_LENGTH #define AES_128_KEY_LENGTH 16 #endif +/* + * while CRYPTO_CIPHER_TYPE_2_2 is not a real cipher at all, + * we still allocate a value for it because we use crypto_crypt_t + * internally and we don't want overlaps + */ + enum crypto_crypt_t { CRYPTO_CIPHER_TYPE_NONE = 0, CRYPTO_CIPHER_TYPE_AES256 = 1, CRYPTO_CIPHER_TYPE_AES192 = 2, CRYPTO_CIPHER_TYPE_AES128 = 3, - CRYPTO_CIPHER_TYPE_3DES = 4 + CRYPTO_CIPHER_TYPE_3DES = 4, + CRYPTO_CIPHER_TYPE_2_2 = UINT8_MAX }; CK_MECHANISM_TYPE cipher_to_nss[] = { 0, /* CRYPTO_CIPHER_TYPE_NONE */ CKM_AES_CBC_PAD, /* CRYPTO_CIPHER_TYPE_AES256 */ CKM_AES_CBC_PAD, /* CRYPTO_CIPHER_TYPE_AES192 */ CKM_AES_CBC_PAD, /* CRYPTO_CIPHER_TYPE_AES128 */ CKM_DES3_CBC_PAD /* CRYPTO_CIPHER_TYPE_3DES */ }; size_t cipher_key_len[] = { 0, /* CRYPTO_CIPHER_TYPE_NONE */ AES_256_KEY_LENGTH, /* CRYPTO_CIPHER_TYPE_AES256 */ AES_192_KEY_LENGTH, /* CRYPTO_CIPHER_TYPE_AES192 */ AES_128_KEY_LENGTH, /* CRYPTO_CIPHER_TYPE_AES128 */ 24 /* CRYPTO_CIPHER_TYPE_3DES - no magic in nss headers */ }; size_t cypher_block_len[] = { 0, /* CRYPTO_CIPHER_TYPE_NONE */ AES_BLOCK_SIZE, /* CRYPTO_CIPHER_TYPE_AES256 */ AES_BLOCK_SIZE, /* CRYPTO_CIPHER_TYPE_AES192 */ AES_BLOCK_SIZE, /* CRYPTO_CIPHER_TYPE_AES128 */ 0 /* CRYPTO_CIPHER_TYPE_3DES */ }; /* * hash definitions and conversion tables */ +/* + * while CRYPTO_HASH_TYPE_2_2 is not a real hash mechanism at all, + * we still allocate a value for it because we use crypto_hash_t + * internally and we don't want overlaps + */ + enum crypto_hash_t { CRYPTO_HASH_TYPE_NONE = 0, CRYPTO_HASH_TYPE_MD5 = 1, CRYPTO_HASH_TYPE_SHA1 = 2, CRYPTO_HASH_TYPE_SHA256 = 3, CRYPTO_HASH_TYPE_SHA384 = 4, - CRYPTO_HASH_TYPE_SHA512 = 5 + CRYPTO_HASH_TYPE_SHA512 = 5, + CRYPTO_HASH_TYPE_2_2 = UINT8_MAX }; CK_MECHANISM_TYPE hash_to_nss[] = { 0, /* CRYPTO_HASH_TYPE_NONE */ CKM_MD5_HMAC, /* CRYPTO_HASH_TYPE_MD5 */ CKM_SHA_1_HMAC, /* CRYPTO_HASH_TYPE_SHA1 */ CKM_SHA256_HMAC, /* CRYPTO_HASH_TYPE_SHA256 */ CKM_SHA384_HMAC, /* CRYPTO_HASH_TYPE_SHA384 */ CKM_SHA512_HMAC /* CRYPTO_HASH_TYPE_SHA512 */ }; size_t hash_len[] = { 0, /* CRYPTO_HASH_TYPE_NONE */ MD5_LENGTH, /* CRYPTO_HASH_TYPE_MD5 */ SHA1_LENGTH, /* CRYPTO_HASH_TYPE_SHA1 */ SHA256_LENGTH, /* CRYPTO_HASH_TYPE_SHA256 */ SHA384_LENGTH, /* CRYPTO_HASH_TYPE_SHA384 */ SHA512_LENGTH /* CRYPTO_HASH_TYPE_SHA512 */ }; size_t hash_block_len[] = { 0, /* CRYPTO_HASH_TYPE_NONE */ MD5_BLOCK_LENGTH, /* CRYPTO_HASH_TYPE_MD5 */ SHA1_BLOCK_LENGTH, /* CRYPTO_HASH_TYPE_SHA1 */ SHA256_BLOCK_LENGTH, /* CRYPTO_HASH_TYPE_SHA256 */ SHA384_BLOCK_LENGTH, /* CRYPTO_HASH_TYPE_SHA384 */ SHA512_BLOCK_LENGTH /* CRYPTO_HASH_TYPE_SHA512 */ }; +/* + * crypto on-wire compat + */ + +enum crypto_compat_t { + CRYPTO_COMPAT_2_0 = 0, + CRYPTO_COMPAT_2_2 = 1 +}; + struct crypto_instance { PK11SymKey *nss_sym_key; PK11SymKey *nss_sym_key_sign; unsigned char private_key[1024]; unsigned int private_key_len; enum crypto_crypt_t crypto_cipher_type; enum crypto_hash_t crypto_hash_type; + enum crypto_compat_t crypto_compat_type; + unsigned int crypto_header_size; void (*log_printf_func) ( int level, int subsys, const char *function, const char *file, int line, const char *format, ...)__attribute__((format(printf, 6, 7))); int log_level_security; int log_level_notice; int log_level_error; int log_subsys_id; }; #define log_printf(level, format, args...) \ do { \ instance->log_printf_func ( \ level, instance->log_subsys_id, \ __FUNCTION__, __FILE__, __LINE__, \ (const char *)format, ##args); \ } while (0); +/* + * compat functions + */ + +static int string_to_crypto_compat_type(const char* crypto_compat_type) +{ + if (strcmp(crypto_compat_type, "2.0") == 0) { + return CRYPTO_COMPAT_2_0; + } else if (strcmp(crypto_compat_type, "2.1") == 0) { + return CRYPTO_COMPAT_2_2; + } + return CRYPTO_COMPAT_2_2; +} + /* * crypt/decrypt functions */ static int string_to_crypto_cipher_type(const char* crypto_cipher_type) { if (strcmp(crypto_cipher_type, "none") == 0) { return CRYPTO_CIPHER_TYPE_NONE; } else if (strcmp(crypto_cipher_type, "aes256") == 0) { return CRYPTO_CIPHER_TYPE_AES256; } else if (strcmp(crypto_cipher_type, "aes192") == 0) { return CRYPTO_CIPHER_TYPE_AES192; } else if (strcmp(crypto_cipher_type, "aes128") == 0) { return CRYPTO_CIPHER_TYPE_AES128; } else if (strcmp(crypto_cipher_type, "3des") == 0) { return CRYPTO_CIPHER_TYPE_3DES; } return CRYPTO_CIPHER_TYPE_AES256; } static int init_nss_crypto(struct crypto_instance *instance) { PK11SlotInfo* crypt_slot = NULL; SECItem crypt_param; if (!cipher_to_nss[instance->crypto_cipher_type]) { return 0; } crypt_param.type = siBuffer; crypt_param.data = instance->private_key; crypt_param.len = cipher_key_len[instance->crypto_cipher_type]; crypt_slot = PK11_GetBestSlot(cipher_to_nss[instance->crypto_cipher_type], NULL); if (crypt_slot == NULL) { log_printf(instance->log_level_security, "Unable to find security slot (err %d)", PR_GetError()); return -1; } instance->nss_sym_key = PK11_ImportSymKey(crypt_slot, cipher_to_nss[instance->crypto_cipher_type], PK11_OriginUnwrap, CKA_ENCRYPT|CKA_DECRYPT, &crypt_param, NULL); if (instance->nss_sym_key == NULL) { log_printf(instance->log_level_security, "Failure to import key into NSS (err %d)", PR_GetError()); return -1; } PK11_FreeSlot(crypt_slot); return 0; } static int encrypt_nss( struct crypto_instance *instance, const unsigned char *buf_in, const size_t buf_in_len, unsigned char *buf_out, size_t *buf_out_len) { PK11Context* crypt_context = NULL; SECItem crypt_param; SECItem *nss_sec_param = NULL; int tmp1_outlen = 0; unsigned int tmp2_outlen = 0; unsigned char *salt = buf_out; unsigned char *data = buf_out + SALT_SIZE; int err = -1; if (!cipher_to_nss[instance->crypto_cipher_type]) { memcpy(buf_out, buf_in, buf_in_len); *buf_out_len = buf_in_len; return 0; } if (PK11_GenerateRandom (salt, SALT_SIZE) != SECSuccess) { log_printf(instance->log_level_security, "Failure to generate a random number %d", PR_GetError()); goto out; } crypt_param.type = siBuffer; crypt_param.data = salt; crypt_param.len = SALT_SIZE; nss_sec_param = PK11_ParamFromIV (cipher_to_nss[instance->crypto_cipher_type], &crypt_param); if (nss_sec_param == NULL) { log_printf(instance->log_level_security, "Failure to set up PKCS11 param (err %d)", PR_GetError()); goto out; } /* * Create cipher context for encryption */ crypt_context = PK11_CreateContextBySymKey (cipher_to_nss[instance->crypto_cipher_type], CKA_ENCRYPT, instance->nss_sym_key, nss_sec_param); if (!crypt_context) { log_printf(instance->log_level_security, "PK11_CreateContext failed (encrypt) crypt_type=%d (err %d)", (int)cipher_to_nss[instance->crypto_cipher_type], PR_GetError()); goto out; } if (PK11_CipherOp(crypt_context, data, &tmp1_outlen, FRAME_SIZE_MAX - instance->crypto_header_size, (unsigned char *)buf_in, buf_in_len) != SECSuccess) { log_printf(instance->log_level_security, "PK11_CipherOp failed (encrypt) crypt_type=%d (err %d)", (int)cipher_to_nss[instance->crypto_cipher_type], PR_GetError()); goto out; } if (PK11_DigestFinal(crypt_context, data + tmp1_outlen, &tmp2_outlen, FRAME_SIZE_MAX - tmp1_outlen) != SECSuccess) { log_printf(instance->log_level_security, "PK11_DigestFinal failed (encrypt) crypt_type=%d (err %d)", (int)cipher_to_nss[instance->crypto_cipher_type], PR_GetError()); goto out; } *buf_out_len = tmp1_outlen + tmp2_outlen + SALT_SIZE; err = 0; out: if (crypt_context) { PK11_DestroyContext(crypt_context, PR_TRUE); } if (nss_sec_param) { SECITEM_FreeItem(nss_sec_param, PR_TRUE); } return err; } static int decrypt_nss ( struct crypto_instance *instance, unsigned char *buf, int *buf_len) { PK11Context* decrypt_context = NULL; SECItem decrypt_param; int tmp1_outlen = 0; unsigned int tmp2_outlen = 0; unsigned char *salt = buf; unsigned char *data = salt + SALT_SIZE; int datalen = *buf_len - SALT_SIZE; unsigned char outbuf[FRAME_SIZE_MAX]; int outbuf_len; int err = -1; if (!cipher_to_nss[instance->crypto_cipher_type]) { return 0; } /* Create cipher context for decryption */ decrypt_param.type = siBuffer; decrypt_param.data = salt; decrypt_param.len = SALT_SIZE; decrypt_context = PK11_CreateContextBySymKey(cipher_to_nss[instance->crypto_cipher_type], CKA_DECRYPT, instance->nss_sym_key, &decrypt_param); if (!decrypt_context) { log_printf(instance->log_level_security, "PK11_CreateContext (decrypt) failed (err %d)", PR_GetError()); goto out; } if (PK11_CipherOp(decrypt_context, outbuf, &tmp1_outlen, sizeof(outbuf), data, datalen) != SECSuccess) { log_printf(instance->log_level_security, "PK11_CipherOp (decrypt) failed (err %d)", PR_GetError()); goto out; } if (PK11_DigestFinal(decrypt_context, outbuf + tmp1_outlen, &tmp2_outlen, sizeof(outbuf) - tmp1_outlen) != SECSuccess) { log_printf(instance->log_level_security, "PK11_DigestFinal (decrypt) failed (err %d)", PR_GetError()); goto out; } outbuf_len = tmp1_outlen + tmp2_outlen; memset(buf, 0, *buf_len); memcpy(buf, outbuf, outbuf_len); *buf_len = outbuf_len; err = 0; out: if (decrypt_context) { PK11_DestroyContext(decrypt_context, PR_TRUE); } return err; } /* * hash/hmac/digest functions */ static int string_to_crypto_hash_type(const char* crypto_hash_type) { if (strcmp(crypto_hash_type, "none") == 0) { return CRYPTO_HASH_TYPE_NONE; } else if (strcmp(crypto_hash_type, "md5") == 0) { return CRYPTO_HASH_TYPE_MD5; } else if (strcmp(crypto_hash_type, "sha1") == 0) { return CRYPTO_HASH_TYPE_SHA1; } else if (strcmp(crypto_hash_type, "sha256") == 0) { return CRYPTO_HASH_TYPE_SHA256; } else if (strcmp(crypto_hash_type, "sha384") == 0) { return CRYPTO_HASH_TYPE_SHA384; } else if (strcmp(crypto_hash_type, "sha512") == 0) { return CRYPTO_HASH_TYPE_SHA512; } return CRYPTO_HASH_TYPE_SHA1; } static int init_nss_hash(struct crypto_instance *instance) { PK11SlotInfo* hash_slot = NULL; SECItem hash_param; if (!hash_to_nss[instance->crypto_hash_type]) { return 0; } hash_param.type = siBuffer; hash_param.data = 0; hash_param.len = 0; hash_slot = PK11_GetBestSlot(hash_to_nss[instance->crypto_hash_type], NULL); if (hash_slot == NULL) { log_printf(instance->log_level_security, "Unable to find security slot (err %d)", PR_GetError()); return -1; } instance->nss_sym_key_sign = PK11_ImportSymKey(hash_slot, hash_to_nss[instance->crypto_hash_type], PK11_OriginUnwrap, CKA_SIGN, &hash_param, NULL); if (instance->nss_sym_key_sign == NULL) { log_printf(instance->log_level_security, "Failure to import key into NSS (err %d)", PR_GetError()); return -1; } PK11_FreeSlot(hash_slot); return 0; } static int calculate_nss_hash( struct crypto_instance *instance, const unsigned char *buf, const size_t buf_len, unsigned char *hash) { PK11Context* hash_context = NULL; SECItem hash_param; unsigned int hash_tmp_outlen = 0; unsigned char hash_block[hash_block_len[instance->crypto_hash_type]]; int err = -1; /* Now do the digest */ hash_param.type = siBuffer; hash_param.data = 0; hash_param.len = 0; hash_context = PK11_CreateContextBySymKey(hash_to_nss[instance->crypto_hash_type], CKA_SIGN, instance->nss_sym_key_sign, &hash_param); if (!hash_context) { log_printf(instance->log_level_security, "PK11_CreateContext failed (hash) hash_type=%d (err %d)", (int)hash_to_nss[instance->crypto_hash_type], PR_GetError()); goto out; } if (PK11_DigestBegin(hash_context) != SECSuccess) { log_printf(instance->log_level_security, "PK11_DigestBegin failed (hash) hash_type=%d (err %d)", (int)hash_to_nss[instance->crypto_hash_type], PR_GetError()); goto out; } if (PK11_DigestOp(hash_context, buf, buf_len) != SECSuccess) { log_printf(instance->log_level_security, "PK11_DigestOp failed (hash) hash_type=%d (err %d)", (int)hash_to_nss[instance->crypto_hash_type], PR_GetError()); goto out; } if (PK11_DigestFinal(hash_context, hash_block, &hash_tmp_outlen, hash_block_len[instance->crypto_hash_type]) != SECSuccess) { log_printf(instance->log_level_security, "PK11_DigestFinale failed (hash) hash_type=%d (err %d)", (int)hash_to_nss[instance->crypto_hash_type], PR_GetError()); goto out; } memcpy(hash, hash_block, hash_len[instance->crypto_hash_type]); err = 0; out: if (hash_context) { PK11_DestroyContext(hash_context, PR_TRUE); } return err; } /* * global/glue nss functions */ static int init_nss_db(struct crypto_instance *instance) { if ((!cipher_to_nss[instance->crypto_cipher_type]) && (!hash_to_nss[instance->crypto_hash_type])) { return 0; } if (NSS_NoDB_Init(".") != SECSuccess) { log_printf(instance->log_level_security, "NSS DB initialization failed (err %d)", PR_GetError()); return -1; } return 0; } static int init_nss(struct crypto_instance *instance, const char *crypto_cipher_type, - const char *crypto_hash_type) + const char *crypto_hash_type, + const char *crypto_compat_type) { log_printf(instance->log_level_notice, - "Initializing transmit/receive security (NSS) crypto: %s hash: %s", - crypto_cipher_type, crypto_hash_type); + "Initializing transmit/receive security (NSS) crypto: %s hash: %s compat: %s", + crypto_cipher_type, crypto_hash_type, crypto_compat_type); if (init_nss_db(instance) < 0) { return -1; } if (init_nss_crypto(instance) < 0) { return -1; } if (init_nss_hash(instance) < 0) { return -1; } return 0; } -static int encrypt_and_sign_nss ( +static int encrypt_and_sign_nss_2_0 ( struct crypto_instance *instance, const unsigned char *buf_in, const size_t buf_in_len, unsigned char *buf_out, size_t *buf_out_len) { unsigned char *hash = buf_out; unsigned char *data = hash + hash_len[instance->crypto_hash_type]; if (encrypt_nss(instance, buf_in, buf_in_len, data, buf_out_len) < 0) { return -1; } if (hash_to_nss[instance->crypto_hash_type]) { if (calculate_nss_hash(instance, data, *buf_out_len, hash) < 0) { return -1; } *buf_out_len = *buf_out_len + hash_len[instance->crypto_hash_type]; } return 0; } -static int authenticate_and_decrypt_nss ( +static int encrypt_and_sign_nss_2_2 ( + struct crypto_instance *instance, + const unsigned char *buf_in, + const size_t buf_in_len, + unsigned char *buf_out, + size_t *buf_out_len) +{ + if (encrypt_nss(instance, + buf_in, buf_in_len, + buf_out + sizeof(struct crypto_config_header), buf_out_len) < 0) { + return -1; + } + + *buf_out_len += sizeof(struct crypto_config_header); + + if (hash_to_nss[instance->crypto_hash_type]) { + if (calculate_nss_hash(instance, buf_out, *buf_out_len, buf_out + *buf_out_len) < 0) { + return -1; + } + *buf_out_len += hash_len[instance->crypto_hash_type]; + } + + return 0; +} + +static int authenticate_and_decrypt_nss_2_0 ( struct crypto_instance *instance, unsigned char *buf, int *buf_len) { if (hash_to_nss[instance->crypto_hash_type]) { unsigned char tmp_hash[hash_len[instance->crypto_hash_type]]; unsigned char *hash = buf; unsigned char *data = hash + hash_len[instance->crypto_hash_type]; int datalen = *buf_len - hash_len[instance->crypto_hash_type]; if (calculate_nss_hash(instance, data, datalen, tmp_hash) < 0) { return -1; } if (memcmp(tmp_hash, hash, hash_len[instance->crypto_hash_type]) != 0) { log_printf(instance->log_level_error, "Digest does not match"); return -1; } memmove(buf, data, datalen); *buf_len = datalen; } if (decrypt_nss(instance, buf, buf_len) < 0) { return -1; } return 0; } +static int authenticate_nss_2_2 ( + struct crypto_instance *instance, + unsigned char *buf, + int *buf_len) +{ + if (hash_to_nss[instance->crypto_hash_type]) { + unsigned char tmp_hash[hash_len[instance->crypto_hash_type]]; + int datalen = *buf_len - hash_len[instance->crypto_hash_type]; + + if (calculate_nss_hash(instance, buf, datalen, tmp_hash) < 0) { + return -1; + } + + if (memcmp(tmp_hash, buf + datalen, hash_len[instance->crypto_hash_type]) != 0) { + log_printf(instance->log_level_error, "Digest does not match"); + return -1; + } + *buf_len = datalen; + } + + return 0; +} + +static int decrypt_nss_2_2 ( + struct crypto_instance *instance, + unsigned char *buf, + int *buf_len) +{ + *buf_len -= sizeof(struct crypto_config_header); + + if (decrypt_nss(instance, buf + sizeof(struct crypto_config_header), buf_len) < 0) { + return -1; + } + + return 0; +} + /* * exported API */ size_t crypto_sec_header_size( const char *crypto_cipher_type, const char *crypto_hash_type) { int crypto_cipher = string_to_crypto_cipher_type(crypto_cipher_type); int crypto_hash = string_to_crypto_hash_type(crypto_hash_type); size_t hdr_size = 0; hdr_size = sizeof(struct crypto_config_header); if (crypto_hash) { hdr_size += hash_len[crypto_hash]; } if (crypto_cipher) { hdr_size += SALT_SIZE; hdr_size += cypher_block_len[crypto_cipher]; } return hdr_size; } +/* + * 2.0 packet format: + * crypto_cipher_type | crypto_hash_type | __pad0 | __pad1 | hash | salt | data + * only data is encrypted, hash only covers salt + data + * + * 2.2 packet format + * fake_crypto_cipher_type | fake_crypto_hash_type | __pad0 | __pad1 | salt | data | hash + * only data is encrypted, hash covers the whole packet + * + * we need to leave fake_* unencrypted for older versions of corosync to reject the packets, + * we need to leave __pad0|1 unencrypted for performance reasons (saves at least 2 memcpy and + * and extra buffer but values are hashed and verified. + */ + int crypto_encrypt_and_sign ( struct crypto_instance *instance, const unsigned char *buf_in, const size_t buf_in_len, unsigned char *buf_out, size_t *buf_out_len) { struct crypto_config_header *cch = (struct crypto_config_header *)buf_out; int err; - cch->crypto_cipher_type = instance->crypto_cipher_type; - cch->crypto_hash_type = instance->crypto_hash_type; - cch->__pad0 = 0; - cch->__pad1 = 0; + switch (instance->crypto_compat_type) { + case CRYPTO_COMPAT_2_0: + cch->crypto_cipher_type = instance->crypto_cipher_type; + cch->crypto_hash_type = instance->crypto_hash_type; + cch->__pad0 = 0; + cch->__pad1 = 0; + + buf_out += sizeof(struct crypto_config_header); - buf_out += sizeof(struct crypto_config_header); + err = encrypt_and_sign_nss_2_0(instance, + buf_in, buf_in_len, + buf_out, buf_out_len); - err = encrypt_and_sign_nss(instance, - buf_in, buf_in_len, - buf_out, buf_out_len); + *buf_out_len = *buf_out_len + sizeof(struct crypto_config_header); + break; + case CRYPTO_COMPAT_2_2: + cch->crypto_cipher_type = CRYPTO_CIPHER_TYPE_2_2; + cch->crypto_hash_type = CRYPTO_HASH_TYPE_2_2; + cch->__pad0 = 0; + cch->__pad1 = 0; - *buf_out_len = *buf_out_len + sizeof(struct crypto_config_header); + err = encrypt_and_sign_nss_2_2(instance, + buf_in, buf_in_len, + buf_out, buf_out_len); + break; + default: + err = -1; + break; + } return err; } int crypto_authenticate_and_decrypt (struct crypto_instance *instance, unsigned char *buf, int *buf_len) { struct crypto_config_header *cch = (struct crypto_config_header *)buf; - /* - * decode crypto config of incoming packets - */ - - if (cch->crypto_cipher_type != instance->crypto_cipher_type) { - log_printf(instance->log_level_security, - "Incoming packet has different crypto type. Rejecting"); - return -1; - } - - if (cch->crypto_hash_type != instance->crypto_hash_type) { - log_printf(instance->log_level_security, - "Incoming packet has different hash type. Rejecting"); - return -1; - } - - if ((cch->__pad0 != 0) || (cch->__pad1 != 0)) { - log_printf(instance->log_level_security, - "Incoming packet appears to have features not supported by this version of corosync. Rejecting"); - return -1; + switch (instance->crypto_compat_type) { + case CRYPTO_COMPAT_2_0: + + /* + * decode crypto config of incoming packets + */ + + if (cch->crypto_cipher_type != instance->crypto_cipher_type) { + log_printf(instance->log_level_security, + "Incoming packet has different crypto type. Rejecting"); + return -1; + } + + if (cch->crypto_hash_type != instance->crypto_hash_type) { + log_printf(instance->log_level_security, + "Incoming packet has different hash type. Rejecting"); + return -1; + } + + if ((cch->__pad0 != 0) || (cch->__pad1 != 0)) { + log_printf(instance->log_level_security, + "Incoming packet appears to have features not supported by this version of corosync. Rejecting"); + return -1; + } + + /* + * invalidate config header and kill it + */ + + cch = NULL; + *buf_len -= sizeof(struct crypto_config_header); + memmove(buf, buf + sizeof(struct crypto_config_header), *buf_len); + + return authenticate_and_decrypt_nss_2_0(instance, buf, buf_len); + break; + case CRYPTO_COMPAT_2_2: + if (cch->crypto_cipher_type != CRYPTO_CIPHER_TYPE_2_2) { + log_printf(instance->log_level_security, + "Incoming packet has different crypto type. Rejecting"); + return -1; + } + + if (cch->crypto_hash_type != CRYPTO_HASH_TYPE_2_2) { + log_printf(instance->log_level_security, + "Incoming packet has different hash type. Rejecting"); + return -1; + } + + /* + * authenticate packet first + */ + + if (authenticate_nss_2_2(instance, buf, buf_len) != 0) { + return -1; + } + + /* + * now we can "trust" the padding bytes/future features + */ + + if ((cch->__pad0 != 0) || (cch->__pad1 != 0)) { + log_printf(instance->log_level_security, + "Incoming packet appears to have features not supported by this version of corosync. Rejecting"); + return -1; + } + + /* + * decrypt + */ + + if (decrypt_nss_2_2(instance, buf, buf_len) != 0) { + return -1; + } + + /* + * invalidate config header and kill it + */ + cch = NULL; + memmove(buf, buf + sizeof(struct crypto_config_header), *buf_len); + + return 0; + break; + default: + return -1; + break; } - - /* - * invalidate config header and kill it - */ - cch = NULL; - *buf_len -= sizeof(struct crypto_config_header); - memmove(buf, buf + sizeof(struct crypto_config_header), *buf_len); - - return authenticate_and_decrypt_nss(instance, buf, buf_len); } struct crypto_instance *crypto_init( const unsigned char *private_key, unsigned int private_key_len, const char *crypto_cipher_type, const char *crypto_hash_type, + const char *crypto_compat_type, void (*log_printf_func) ( int level, int subsys, const char *function, const char *file, int line, const char *format, ...)__attribute__((format(printf, 6, 7))), int log_level_security, int log_level_notice, int log_level_error, int log_subsys_id) { struct crypto_instance *instance; instance = malloc(sizeof(*instance)); if (instance == NULL) { return (NULL); } memset(instance, 0, sizeof(struct crypto_instance)); memcpy(instance->private_key, private_key, private_key_len); instance->private_key_len = private_key_len; instance->crypto_cipher_type = string_to_crypto_cipher_type(crypto_cipher_type); instance->crypto_hash_type = string_to_crypto_hash_type(crypto_hash_type); + instance->crypto_compat_type = string_to_crypto_compat_type(crypto_compat_type); instance->crypto_header_size = crypto_sec_header_size(crypto_cipher_type, crypto_hash_type); instance->log_printf_func = log_printf_func; instance->log_level_security = log_level_security; instance->log_level_notice = log_level_notice; instance->log_level_error = log_level_error; instance->log_subsys_id = log_subsys_id; - if (init_nss(instance, crypto_cipher_type, crypto_hash_type) < 0) { + if (init_nss(instance, crypto_cipher_type, crypto_hash_type, crypto_compat_type) < 0) { free(instance); return(NULL); } return (instance); } diff --git a/exec/totemcrypto.h b/exec/totemcrypto.h index 7c06c391..45770506 100644 --- a/exec/totemcrypto.h +++ b/exec/totemcrypto.h @@ -1,77 +1,78 @@ /* * Copyright (c) 2006-2012 Red Hat, Inc. * * All rights reserved. * * Author: Steven Dake (sdake@redhat.com) * Christine Caulfield (ccaulfie@redhat.com) * Jan Friesse (jfriesse@redhat.com) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef TOTEMCRYPTO_H_DEFINED #define TOTEMCRYPTO_H_DEFINED #include struct crypto_instance; extern size_t crypto_sec_header_size( const char *crypto_cipher_type, const char *crypto_hash_type); extern int crypto_authenticate_and_decrypt ( struct crypto_instance *instance, unsigned char *buf, int *buf_len); extern int crypto_encrypt_and_sign ( struct crypto_instance *instance, const unsigned char *buf_in, const size_t buf_in_len, unsigned char *buf_out, size_t *buf_out_len); extern struct crypto_instance *crypto_init( const unsigned char *private_key, unsigned int private_key_len, const char *crypto_cipher_type, const char *crypto_hash_type, + const char *crypto_compat_type, void (*log_printf_func) ( int level, int subsys, const char *function, const char *file, int line, const char *format, ...)__attribute__((format(printf, 6, 7))), int log_level_security, int log_level_notice, int log_level_error, int log_subsys_id); #endif /* TOTEMCRYPTO_H_DEFINED */ diff --git a/exec/totemudp.c b/exec/totemudp.c index a5169c2a..52089614 100644 --- a/exec/totemudp.c +++ b/exec/totemudp.c @@ -1,1423 +1,1424 @@ /* * Copyright (c) 2005 MontaVista Software, Inc. * Copyright (c) 2006-2012 Red Hat, Inc. * * All rights reserved. * * Author: Steven Dake (sdake@redhat.com) * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define LOGSYS_UTILS_ONLY 1 #include #include "totemudp.h" #include "util.h" #include "totemcrypto.h" #include #include #include #include #ifndef MSG_NOSIGNAL #define MSG_NOSIGNAL 0 #endif #define MCAST_SOCKET_BUFFER_SIZE (TRANSMITS_ALLOWED * FRAME_SIZE_MAX) #define NETIF_STATE_REPORT_UP 1 #define NETIF_STATE_REPORT_DOWN 2 #define BIND_STATE_UNBOUND 0 #define BIND_STATE_REGULAR 1 #define BIND_STATE_LOOPBACK 2 #define MESSAGE_TYPE_MEMB_JOIN 3 struct totemudp_socket { int mcast_recv; int mcast_send; int token; /* * Socket used for local multicast delivery. We don't rely on multicast * loop and rather this UNIX DGRAM socket is used. Socket is created by * socketpair call and they are used in same way as pipe (so [0] is read * end and [1] is write end) */ int local_mcast_loop[2]; }; struct totemudp_instance { struct crypto_instance *crypto_inst; qb_loop_t *totemudp_poll_handle; struct totem_interface *totem_interface; int netif_state_report; int netif_bind_state; void *context; void (*totemudp_deliver_fn) ( void *context, const void *msg, unsigned int msg_len); void (*totemudp_iface_change_fn) ( void *context, const struct totem_ip_address *iface_address); void (*totemudp_target_set_completed) (void *context); /* * Function and data used to log messages */ int totemudp_log_level_security; int totemudp_log_level_error; int totemudp_log_level_warning; int totemudp_log_level_notice; int totemudp_log_level_debug; int totemudp_subsys_id; void (*totemudp_log_printf) ( int level, int subsys, const char *function, const char *file, int line, const char *format, ...)__attribute__((format(printf, 6, 7))); void *udp_context; char iov_buffer[FRAME_SIZE_MAX]; char iov_buffer_flush[FRAME_SIZE_MAX]; struct iovec totemudp_iov_recv; struct iovec totemudp_iov_recv_flush; struct totemudp_socket totemudp_sockets; struct totem_ip_address mcast_address; int stats_sent; int stats_recv; int stats_delv; int stats_remcasts; int stats_orf_token; struct timeval stats_tv_start; struct totem_ip_address my_id; int firstrun; qb_loop_timer_handle timer_netif_check_timeout; unsigned int my_memb_entries; int flushing; struct totem_config *totem_config; totemsrp_stats_t *stats; struct totem_ip_address token_target; }; struct work_item { const void *msg; unsigned int msg_len; struct totemudp_instance *instance; }; static int totemudp_build_sockets ( struct totemudp_instance *instance, struct totem_ip_address *bindnet_address, struct totem_ip_address *mcastaddress, struct totemudp_socket *sockets, struct totem_ip_address *bound_to); static struct totem_ip_address localhost; static void totemudp_instance_initialize (struct totemudp_instance *instance) { memset (instance, 0, sizeof (struct totemudp_instance)); instance->netif_state_report = NETIF_STATE_REPORT_UP | NETIF_STATE_REPORT_DOWN; instance->totemudp_iov_recv.iov_base = instance->iov_buffer; instance->totemudp_iov_recv.iov_len = FRAME_SIZE_MAX; //sizeof (instance->iov_buffer); instance->totemudp_iov_recv_flush.iov_base = instance->iov_buffer_flush; instance->totemudp_iov_recv_flush.iov_len = FRAME_SIZE_MAX; //sizeof (instance->iov_buffer); /* * There is always atleast 1 processor */ instance->my_memb_entries = 1; } #define log_printf(level, format, args...) \ do { \ instance->totemudp_log_printf ( \ level, instance->totemudp_subsys_id, \ __FUNCTION__, __FILE__, __LINE__, \ (const char *)format, ##args); \ } while (0); #define LOGSYS_PERROR(err_num, level, fmt, args...) \ do { \ char _error_str[LOGSYS_MAX_PERROR_MSG_LEN]; \ const char *_error_ptr = qb_strerror_r(err_num, _error_str, sizeof(_error_str)); \ instance->totemudp_log_printf ( \ level, instance->totemudp_subsys_id, \ __FUNCTION__, __FILE__, __LINE__, \ fmt ": %s (%d)\n", ##args, _error_ptr, err_num); \ } while(0) int totemudp_crypto_set ( void *udp_context, const char *cipher_type, const char *hash_type) { return (0); } static inline void ucast_sendmsg ( struct totemudp_instance *instance, struct totem_ip_address *system_to, const void *msg, unsigned int msg_len) { struct msghdr msg_ucast; int res = 0; size_t buf_out_len; unsigned char buf_out[FRAME_SIZE_MAX]; struct sockaddr_storage sockaddr; struct iovec iovec; int addrlen; /* * Encrypt and digest the message */ if (crypto_encrypt_and_sign ( instance->crypto_inst, (const unsigned char *)msg, msg_len, buf_out, &buf_out_len) != 0) { log_printf(LOGSYS_LEVEL_CRIT, "Error encrypting/signing packet (non-critical)"); return; } iovec.iov_base = (void *)buf_out; iovec.iov_len = buf_out_len; /* * Build unicast message */ memset(&msg_ucast, 0, sizeof(msg_ucast)); totemip_totemip_to_sockaddr_convert(system_to, instance->totem_interface->ip_port, &sockaddr, &addrlen); msg_ucast.msg_name = &sockaddr; msg_ucast.msg_namelen = addrlen; msg_ucast.msg_iov = (void *)&iovec; msg_ucast.msg_iovlen = 1; #ifdef HAVE_MSGHDR_CONTROL msg_ucast.msg_control = 0; #endif #ifdef HAVE_MSGHDR_CONTROLLEN msg_ucast.msg_controllen = 0; #endif #ifdef HAVE_MSGHDR_FLAGS msg_ucast.msg_flags = 0; #endif #ifdef HAVE_MSGHDR_ACCRIGHTS msg_ucast.msg_accrights = NULL; #endif #ifdef HAVE_MSGHDR_ACCRIGHTSLEN msg_ucast.msg_accrightslen = 0; #endif /* * Transmit unicast message * An error here is recovered by totemsrp */ res = sendmsg (instance->totemudp_sockets.mcast_send, &msg_ucast, MSG_NOSIGNAL); if (res < 0) { LOGSYS_PERROR (errno, instance->totemudp_log_level_debug, "sendmsg(ucast) failed (non-critical)"); } } static inline void mcast_sendmsg ( struct totemudp_instance *instance, const void *msg, unsigned int msg_len) { struct msghdr msg_mcast; int res = 0; size_t buf_out_len; unsigned char buf_out[FRAME_SIZE_MAX]; struct iovec iovec; struct sockaddr_storage sockaddr; int addrlen; /* * Encrypt and digest the message */ if (crypto_encrypt_and_sign ( instance->crypto_inst, (const unsigned char *)msg, msg_len, buf_out, &buf_out_len) != 0) { log_printf(LOGSYS_LEVEL_CRIT, "Error encrypting/signing packet (non-critical)"); return; } iovec.iov_base = (void *)&buf_out; iovec.iov_len = buf_out_len; /* * Build multicast message */ totemip_totemip_to_sockaddr_convert(&instance->mcast_address, instance->totem_interface->ip_port, &sockaddr, &addrlen); memset(&msg_mcast, 0, sizeof(msg_mcast)); msg_mcast.msg_name = &sockaddr; msg_mcast.msg_namelen = addrlen; msg_mcast.msg_iov = (void *)&iovec; msg_mcast.msg_iovlen = 1; #ifdef HAVE_MSGHDR_CONTROL msg_mcast.msg_control = 0; #endif #ifdef HAVE_MSGHDR_CONTROLLEN msg_mcast.msg_controllen = 0; #endif #ifdef HAVE_MSGHDR_FLAGS msg_mcast.msg_flags = 0; #endif #ifdef HAVE_MSGHDR_ACCRIGHTS msg_mcast.msg_accrights = NULL; #endif #ifdef HAVE_MSGHDR_ACCRIGHTSLEN msg_mcast.msg_accrightslen = 0; #endif /* * Transmit multicast message * An error here is recovered by totemsrp */ res = sendmsg (instance->totemudp_sockets.mcast_send, &msg_mcast, MSG_NOSIGNAL); if (res < 0) { LOGSYS_PERROR (errno, instance->totemudp_log_level_debug, "sendmsg(mcast) failed (non-critical)"); instance->stats->continuous_sendmsg_failures++; } else { instance->stats->continuous_sendmsg_failures = 0; } /* * Transmit multicast message to local unix mcast loop * An error here is recovered by totemsrp */ msg_mcast.msg_name = NULL; msg_mcast.msg_namelen = 0; res = sendmsg (instance->totemudp_sockets.local_mcast_loop[1], &msg_mcast, MSG_NOSIGNAL); if (res < 0) { LOGSYS_PERROR (errno, instance->totemudp_log_level_debug, "sendmsg(local mcast loop) failed (non-critical)"); } } int totemudp_finalize ( void *udp_context) { struct totemudp_instance *instance = (struct totemudp_instance *)udp_context; int res = 0; if (instance->totemudp_sockets.mcast_recv > 0) { qb_loop_poll_del (instance->totemudp_poll_handle, instance->totemudp_sockets.mcast_recv); close (instance->totemudp_sockets.mcast_recv); } if (instance->totemudp_sockets.mcast_send > 0) { close (instance->totemudp_sockets.mcast_send); } if (instance->totemudp_sockets.local_mcast_loop[0] > 0) { qb_loop_poll_del (instance->totemudp_poll_handle, instance->totemudp_sockets.local_mcast_loop[0]); close (instance->totemudp_sockets.local_mcast_loop[0]); close (instance->totemudp_sockets.local_mcast_loop[1]); } if (instance->totemudp_sockets.token > 0) { qb_loop_poll_del (instance->totemudp_poll_handle, instance->totemudp_sockets.token); close (instance->totemudp_sockets.token); } return (res); } /* * Only designed to work with a message with one iov */ static int net_deliver_fn ( int fd, int revents, void *data) { struct totemudp_instance *instance = (struct totemudp_instance *)data; struct msghdr msg_recv; struct iovec *iovec; struct sockaddr_storage system_from; int bytes_received; int res = 0; char *message_type; if (instance->flushing == 1) { iovec = &instance->totemudp_iov_recv_flush; } else { iovec = &instance->totemudp_iov_recv; } /* * Receive datagram */ msg_recv.msg_name = &system_from; msg_recv.msg_namelen = sizeof (struct sockaddr_storage); msg_recv.msg_iov = iovec; msg_recv.msg_iovlen = 1; #ifdef HAVE_MSGHDR_CONTROL msg_recv.msg_control = 0; #endif #ifdef HAVE_MSGHDR_CONTROLLEN msg_recv.msg_controllen = 0; #endif #ifdef HAVE_MSGHDR_FLAGS msg_recv.msg_flags = 0; #endif #ifdef HAVE_MSGHDR_ACCRIGHTS msg_recv.msg_accrights = NULL; #endif #ifdef HAVE_MSGHDR_ACCRIGHTSLEN msg_recv.msg_accrightslen = 0; #endif bytes_received = recvmsg (fd, &msg_recv, MSG_NOSIGNAL | MSG_DONTWAIT); if (bytes_received == -1) { return (0); } else { instance->stats_recv += bytes_received; } /* * Authenticate and if authenticated, decrypt datagram */ res = crypto_authenticate_and_decrypt (instance->crypto_inst, iovec->iov_base, &bytes_received); if (res == -1) { log_printf (instance->totemudp_log_level_security, "Received message has invalid digest... ignoring."); log_printf (instance->totemudp_log_level_security, "Invalid packet data"); iovec->iov_len = FRAME_SIZE_MAX; return 0; } iovec->iov_len = bytes_received; /* * Drop all non-mcast messages (more specifically join * messages should be dropped) */ message_type = (char *)iovec->iov_base; if (instance->flushing == 1 && *message_type == MESSAGE_TYPE_MEMB_JOIN) { iovec->iov_len = FRAME_SIZE_MAX; return (0); } /* * Handle incoming message */ instance->totemudp_deliver_fn ( instance->context, iovec->iov_base, iovec->iov_len); iovec->iov_len = FRAME_SIZE_MAX; return (0); } static int netif_determine ( struct totemudp_instance *instance, struct totem_ip_address *bindnet, struct totem_ip_address *bound_to, int *interface_up, int *interface_num) { int res; res = totemip_iface_check (bindnet, bound_to, interface_up, interface_num, instance->totem_config->clear_node_high_bit); return (res); } /* * If the interface is up, the sockets for totem are built. If the interface is down * this function is requeued in the timer list to retry building the sockets later. */ static void timer_function_netif_check_timeout ( void *data) { struct totemudp_instance *instance = (struct totemudp_instance *)data; int interface_up; int interface_num; struct totem_ip_address *bind_address; /* * Build sockets for every interface */ netif_determine (instance, &instance->totem_interface->bindnet, &instance->totem_interface->boundto, &interface_up, &interface_num); /* * If the network interface isn't back up and we are already * in loopback mode, add timer to check again and return */ if ((instance->netif_bind_state == BIND_STATE_LOOPBACK && interface_up == 0) || (instance->my_memb_entries == 1 && instance->netif_bind_state == BIND_STATE_REGULAR && interface_up == 1)) { qb_loop_timer_add (instance->totemudp_poll_handle, QB_LOOP_MED, instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC, (void *)instance, timer_function_netif_check_timeout, &instance->timer_netif_check_timeout); /* * Add a timer to check for a downed regular interface */ return; } if (instance->totemudp_sockets.mcast_recv > 0) { qb_loop_poll_del (instance->totemudp_poll_handle, instance->totemudp_sockets.mcast_recv); close (instance->totemudp_sockets.mcast_recv); } if (instance->totemudp_sockets.mcast_send > 0) { close (instance->totemudp_sockets.mcast_send); } if (instance->totemudp_sockets.local_mcast_loop[0] > 0) { qb_loop_poll_del (instance->totemudp_poll_handle, instance->totemudp_sockets.local_mcast_loop[0]); close (instance->totemudp_sockets.local_mcast_loop[0]); close (instance->totemudp_sockets.local_mcast_loop[1]); } if (instance->totemudp_sockets.token > 0) { qb_loop_poll_del (instance->totemudp_poll_handle, instance->totemudp_sockets.token); close (instance->totemudp_sockets.token); } if (interface_up == 0) { /* * Interface is not up */ instance->netif_bind_state = BIND_STATE_LOOPBACK; bind_address = &localhost; /* * Add a timer to retry building interfaces and request memb_gather_enter */ qb_loop_timer_add (instance->totemudp_poll_handle, QB_LOOP_MED, instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC, (void *)instance, timer_function_netif_check_timeout, &instance->timer_netif_check_timeout); } else { /* * Interface is up */ instance->netif_bind_state = BIND_STATE_REGULAR; bind_address = &instance->totem_interface->bindnet; } /* * Create and bind the multicast and unicast sockets */ (void)totemudp_build_sockets (instance, &instance->mcast_address, bind_address, &instance->totemudp_sockets, &instance->totem_interface->boundto); qb_loop_poll_add ( instance->totemudp_poll_handle, QB_LOOP_MED, instance->totemudp_sockets.mcast_recv, POLLIN, instance, net_deliver_fn); qb_loop_poll_add ( instance->totemudp_poll_handle, QB_LOOP_MED, instance->totemudp_sockets.local_mcast_loop[0], POLLIN, instance, net_deliver_fn); qb_loop_poll_add ( instance->totemudp_poll_handle, QB_LOOP_MED, instance->totemudp_sockets.token, POLLIN, instance, net_deliver_fn); totemip_copy (&instance->my_id, &instance->totem_interface->boundto); /* * This reports changes in the interface to the user and totemsrp */ if (instance->netif_bind_state == BIND_STATE_REGULAR) { if (instance->netif_state_report & NETIF_STATE_REPORT_UP) { log_printf (instance->totemudp_log_level_notice, "The network interface [%s] is now up.", totemip_print (&instance->totem_interface->boundto)); instance->netif_state_report = NETIF_STATE_REPORT_DOWN; instance->totemudp_iface_change_fn (instance->context, &instance->my_id); } /* * Add a timer to check for interface going down in single membership */ if (instance->my_memb_entries == 1) { qb_loop_timer_add (instance->totemudp_poll_handle, QB_LOOP_MED, instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC, (void *)instance, timer_function_netif_check_timeout, &instance->timer_netif_check_timeout); } } else { if (instance->netif_state_report & NETIF_STATE_REPORT_DOWN) { log_printf (instance->totemudp_log_level_notice, "The network interface is down."); instance->totemudp_iface_change_fn (instance->context, &instance->my_id); } instance->netif_state_report = NETIF_STATE_REPORT_UP; } } /* Set the socket priority to INTERACTIVE to ensure that our messages don't get queued behind anything else */ static void totemudp_traffic_control_set(struct totemudp_instance *instance, int sock) { #ifdef SO_PRIORITY int prio = 6; /* TC_PRIO_INTERACTIVE */ if (setsockopt(sock, SOL_SOCKET, SO_PRIORITY, &prio, sizeof(int))) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "Could not set traffic priority"); } #endif } static int totemudp_build_sockets_ip ( struct totemudp_instance *instance, struct totem_ip_address *mcast_address, struct totem_ip_address *bindnet_address, struct totemudp_socket *sockets, struct totem_ip_address *bound_to, int interface_num) { struct sockaddr_storage sockaddr; struct ipv6_mreq mreq6; struct ip_mreq mreq; struct sockaddr_storage mcast_ss, boundto_ss; struct sockaddr_in6 *mcast_sin6 = (struct sockaddr_in6 *)&mcast_ss; struct sockaddr_in *mcast_sin = (struct sockaddr_in *)&mcast_ss; struct sockaddr_in *boundto_sin = (struct sockaddr_in *)&boundto_ss; unsigned int sendbuf_size; unsigned int recvbuf_size; unsigned int optlen = sizeof (sendbuf_size); int addrlen; int res; int flag; uint8_t sflag; int i; /* * Create multicast recv socket */ sockets->mcast_recv = socket (bindnet_address->family, SOCK_DGRAM, 0); if (sockets->mcast_recv == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "socket() failed"); return (-1); } totemip_nosigpipe (sockets->mcast_recv); res = fcntl (sockets->mcast_recv, F_SETFL, O_NONBLOCK); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "Could not set non-blocking operation on multicast socket"); return (-1); } /* * Force reuse */ flag = 1; if ( setsockopt(sockets->mcast_recv, SOL_SOCKET, SO_REUSEADDR, (char *)&flag, sizeof (flag)) < 0) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "setsockopt(SO_REUSEADDR) failed"); return (-1); } /* * Bind to multicast socket used for multicast receives */ totemip_totemip_to_sockaddr_convert(mcast_address, instance->totem_interface->ip_port, &sockaddr, &addrlen); res = bind (sockets->mcast_recv, (struct sockaddr *)&sockaddr, addrlen); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "Unable to bind the socket to receive multicast packets"); return (-1); } /* * Create local multicast loop socket */ if (socketpair(AF_UNIX, SOCK_DGRAM, 0, sockets->local_mcast_loop) == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "socket() failed"); return (-1); } for (i = 0; i < 2; i++) { totemip_nosigpipe (sockets->local_mcast_loop[i]); res = fcntl (sockets->local_mcast_loop[i], F_SETFL, O_NONBLOCK); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "Could not set non-blocking operation on multicast socket"); return (-1); } } /* * Setup mcast send socket */ sockets->mcast_send = socket (bindnet_address->family, SOCK_DGRAM, 0); if (sockets->mcast_send == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "socket() failed"); return (-1); } totemip_nosigpipe (sockets->mcast_send); res = fcntl (sockets->mcast_send, F_SETFL, O_NONBLOCK); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "Could not set non-blocking operation on multicast socket"); return (-1); } /* * Force reuse */ flag = 1; if ( setsockopt(sockets->mcast_send, SOL_SOCKET, SO_REUSEADDR, (char *)&flag, sizeof (flag)) < 0) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "setsockopt(SO_REUSEADDR) failed"); return (-1); } totemip_totemip_to_sockaddr_convert(bound_to, instance->totem_interface->ip_port - 1, &sockaddr, &addrlen); res = bind (sockets->mcast_send, (struct sockaddr *)&sockaddr, addrlen); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "Unable to bind the socket to send multicast packets"); return (-1); } /* * Setup unicast socket */ sockets->token = socket (bindnet_address->family, SOCK_DGRAM, 0); if (sockets->token == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "socket() failed"); return (-1); } totemip_nosigpipe (sockets->token); res = fcntl (sockets->token, F_SETFL, O_NONBLOCK); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "Could not set non-blocking operation on token socket"); return (-1); } /* * Force reuse */ flag = 1; if ( setsockopt(sockets->token, SOL_SOCKET, SO_REUSEADDR, (char *)&flag, sizeof (flag)) < 0) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "setsockopt(SO_REUSEADDR) failed"); return (-1); } /* * Bind to unicast socket used for token send/receives * This has the side effect of binding to the correct interface */ totemip_totemip_to_sockaddr_convert(bound_to, instance->totem_interface->ip_port, &sockaddr, &addrlen); res = bind (sockets->token, (struct sockaddr *)&sockaddr, addrlen); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "Unable to bind UDP unicast socket"); return (-1); } recvbuf_size = MCAST_SOCKET_BUFFER_SIZE; sendbuf_size = MCAST_SOCKET_BUFFER_SIZE; /* * Set buffer sizes to avoid overruns */ res = setsockopt (sockets->mcast_recv, SOL_SOCKET, SO_RCVBUF, &recvbuf_size, optlen); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_debug, "Unable to set SO_RCVBUF size on UDP mcast socket"); return (-1); } res = setsockopt (sockets->mcast_send, SOL_SOCKET, SO_SNDBUF, &sendbuf_size, optlen); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_debug, "Unable to set SO_SNDBUF size on UDP mcast socket"); return (-1); } res = setsockopt (sockets->local_mcast_loop[0], SOL_SOCKET, SO_RCVBUF, &recvbuf_size, optlen); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_debug, "Unable to set SO_RCVBUF size on UDP local mcast loop socket"); return (-1); } res = setsockopt (sockets->local_mcast_loop[1], SOL_SOCKET, SO_SNDBUF, &sendbuf_size, optlen); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_debug, "Unable to set SO_SNDBUF size on UDP local mcast loop socket"); return (-1); } res = getsockopt (sockets->mcast_recv, SOL_SOCKET, SO_RCVBUF, &recvbuf_size, &optlen); if (res == 0) { log_printf (instance->totemudp_log_level_debug, "Receive multicast socket recv buffer size (%d bytes).", recvbuf_size); } res = getsockopt (sockets->mcast_send, SOL_SOCKET, SO_SNDBUF, &sendbuf_size, &optlen); if (res == 0) { log_printf (instance->totemudp_log_level_debug, "Transmit multicast socket send buffer size (%d bytes).", sendbuf_size); } res = getsockopt (sockets->local_mcast_loop[0], SOL_SOCKET, SO_RCVBUF, &recvbuf_size, &optlen); if (res == 0) { log_printf (instance->totemudp_log_level_debug, "Local receive multicast loop socket recv buffer size (%d bytes).", recvbuf_size); } res = getsockopt (sockets->local_mcast_loop[1], SOL_SOCKET, SO_SNDBUF, &sendbuf_size, &optlen); if (res == 0) { log_printf (instance->totemudp_log_level_debug, "Local transmit multicast loop socket send buffer size (%d bytes).", sendbuf_size); } /* * Join group membership on socket */ totemip_totemip_to_sockaddr_convert(mcast_address, instance->totem_interface->ip_port, &mcast_ss, &addrlen); totemip_totemip_to_sockaddr_convert(bound_to, instance->totem_interface->ip_port, &boundto_ss, &addrlen); if (instance->totem_config->broadcast_use == 1) { unsigned int broadcast = 1; if ((setsockopt(sockets->mcast_recv, SOL_SOCKET, SO_BROADCAST, &broadcast, sizeof (broadcast))) == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "setting broadcast option failed"); return (-1); } if ((setsockopt(sockets->mcast_send, SOL_SOCKET, SO_BROADCAST, &broadcast, sizeof (broadcast))) == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "setting broadcast option failed"); return (-1); } } else { switch (bindnet_address->family) { case AF_INET: memset(&mreq, 0, sizeof(mreq)); mreq.imr_multiaddr.s_addr = mcast_sin->sin_addr.s_addr; mreq.imr_interface.s_addr = boundto_sin->sin_addr.s_addr; res = setsockopt (sockets->mcast_recv, IPPROTO_IP, IP_ADD_MEMBERSHIP, &mreq, sizeof (mreq)); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "join ipv4 multicast group failed"); return (-1); } break; case AF_INET6: memset(&mreq6, 0, sizeof(mreq6)); memcpy(&mreq6.ipv6mr_multiaddr, &mcast_sin6->sin6_addr, sizeof(struct in6_addr)); mreq6.ipv6mr_interface = interface_num; res = setsockopt (sockets->mcast_recv, IPPROTO_IPV6, IPV6_JOIN_GROUP, &mreq6, sizeof (mreq6)); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "join ipv6 multicast group failed"); return (-1); } break; } } /* * Turn off multicast loopback */ flag = 0; switch ( bindnet_address->family ) { case AF_INET: sflag = 0; res = setsockopt (sockets->mcast_send, IPPROTO_IP, IP_MULTICAST_LOOP, &sflag, sizeof (sflag)); break; case AF_INET6: res = setsockopt (sockets->mcast_send, IPPROTO_IPV6, IPV6_MULTICAST_LOOP, &flag, sizeof (flag)); } if (res == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "Unable to turn off multicast loopback"); return (-1); } /* * Set multicast packets TTL */ flag = instance->totem_interface->ttl; if (bindnet_address->family == AF_INET6) { res = setsockopt (sockets->mcast_send, IPPROTO_IPV6, IPV6_MULTICAST_HOPS, &flag, sizeof (flag)); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "set mcast v6 TTL failed"); return (-1); } } else { sflag = flag; res = setsockopt(sockets->mcast_send, IPPROTO_IP, IP_MULTICAST_TTL, &sflag, sizeof(sflag)); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "set mcast v4 TTL failed"); return (-1); } } /* * Bind to a specific interface for multicast send and receive */ switch ( bindnet_address->family ) { case AF_INET: if (setsockopt (sockets->mcast_send, IPPROTO_IP, IP_MULTICAST_IF, &boundto_sin->sin_addr, sizeof (boundto_sin->sin_addr)) < 0) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "cannot select interface for multicast packets (send)"); return (-1); } if (setsockopt (sockets->mcast_recv, IPPROTO_IP, IP_MULTICAST_IF, &boundto_sin->sin_addr, sizeof (boundto_sin->sin_addr)) < 0) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "cannot select interface for multicast packets (recv)"); return (-1); } break; case AF_INET6: if (setsockopt (sockets->mcast_send, IPPROTO_IPV6, IPV6_MULTICAST_IF, &interface_num, sizeof (interface_num)) < 0) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "cannot select interface for multicast packets (send v6)"); return (-1); } if (setsockopt (sockets->mcast_recv, IPPROTO_IPV6, IPV6_MULTICAST_IF, &interface_num, sizeof (interface_num)) < 0) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "cannot select interface for multicast packets (recv v6)"); return (-1); } break; } return 0; } static int totemudp_build_sockets ( struct totemudp_instance *instance, struct totem_ip_address *mcast_address, struct totem_ip_address *bindnet_address, struct totemudp_socket *sockets, struct totem_ip_address *bound_to) { int interface_num; int interface_up; int res; /* * Determine the ip address bound to and the interface name */ res = netif_determine (instance, bindnet_address, bound_to, &interface_up, &interface_num); if (res == -1) { return (-1); } totemip_copy(&instance->my_id, bound_to); res = totemudp_build_sockets_ip (instance, mcast_address, bindnet_address, sockets, bound_to, interface_num); /* We only send out of the token socket */ totemudp_traffic_control_set(instance, sockets->token); return res; } /* * Totem Network interface - also does encryption/decryption * depends on poll abstraction, POSIX, IPV4 */ /* * Create an instance */ int totemudp_initialize ( qb_loop_t *poll_handle, void **udp_context, struct totem_config *totem_config, totemsrp_stats_t *stats, int interface_no, void *context, void (*deliver_fn) ( void *context, const void *msg, unsigned int msg_len), void (*iface_change_fn) ( void *context, const struct totem_ip_address *iface_address), void (*target_set_completed) ( void *context)) { struct totemudp_instance *instance; instance = malloc (sizeof (struct totemudp_instance)); if (instance == NULL) { return (-1); } totemudp_instance_initialize (instance); instance->totem_config = totem_config; instance->stats = stats; /* * Configure logging */ instance->totemudp_log_level_security = 1; //totem_config->totem_logging_configuration.log_level_security; instance->totemudp_log_level_error = totem_config->totem_logging_configuration.log_level_error; instance->totemudp_log_level_warning = totem_config->totem_logging_configuration.log_level_warning; instance->totemudp_log_level_notice = totem_config->totem_logging_configuration.log_level_notice; instance->totemudp_log_level_debug = totem_config->totem_logging_configuration.log_level_debug; instance->totemudp_subsys_id = totem_config->totem_logging_configuration.log_subsys_id; instance->totemudp_log_printf = totem_config->totem_logging_configuration.log_printf; /* * Initialize random number generator for later use to generate salt */ instance->crypto_inst = crypto_init (totem_config->private_key, totem_config->private_key_len, totem_config->crypto_cipher_type, totem_config->crypto_hash_type, + totem_config->crypto_compat_type, instance->totemudp_log_printf, instance->totemudp_log_level_security, instance->totemudp_log_level_notice, instance->totemudp_log_level_error, instance->totemudp_subsys_id); if (instance->crypto_inst == NULL) { return (-1); } /* * Initialize local variables for totemudp */ instance->totem_interface = &totem_config->interfaces[interface_no]; totemip_copy (&instance->mcast_address, &instance->totem_interface->mcast_addr); memset (instance->iov_buffer, 0, FRAME_SIZE_MAX); instance->totemudp_poll_handle = poll_handle; instance->totem_interface->bindnet.nodeid = instance->totem_config->node_id; instance->context = context; instance->totemudp_deliver_fn = deliver_fn; instance->totemudp_iface_change_fn = iface_change_fn; instance->totemudp_target_set_completed = target_set_completed; totemip_localhost (instance->mcast_address.family, &localhost); localhost.nodeid = instance->totem_config->node_id; /* * RRP layer isn't ready to receive message because it hasn't * initialized yet. Add short timer to check the interfaces. */ qb_loop_timer_add (instance->totemudp_poll_handle, QB_LOOP_MED, 100*QB_TIME_NS_IN_MSEC, (void *)instance, timer_function_netif_check_timeout, &instance->timer_netif_check_timeout); *udp_context = instance; return (0); } void *totemudp_buffer_alloc (void) { return malloc (FRAME_SIZE_MAX); } void totemudp_buffer_release (void *ptr) { return free (ptr); } int totemudp_processor_count_set ( void *udp_context, int processor_count) { struct totemudp_instance *instance = (struct totemudp_instance *)udp_context; int res = 0; instance->my_memb_entries = processor_count; qb_loop_timer_del (instance->totemudp_poll_handle, instance->timer_netif_check_timeout); if (processor_count == 1) { qb_loop_timer_add (instance->totemudp_poll_handle, QB_LOOP_MED, instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC, (void *)instance, timer_function_netif_check_timeout, &instance->timer_netif_check_timeout); } return (res); } int totemudp_recv_flush (void *udp_context) { struct totemudp_instance *instance = (struct totemudp_instance *)udp_context; struct pollfd ufd; int nfds; int res = 0; int i; int sock; instance->flushing = 1; for (i = 0; i < 2; i++) { sock = -1; if (i == 0) { sock = instance->totemudp_sockets.mcast_recv; } if (i == 1) { sock = instance->totemudp_sockets.local_mcast_loop[0]; } assert(sock != -1); do { ufd.fd = sock; ufd.events = POLLIN; nfds = poll (&ufd, 1, 0); if (nfds == 1 && ufd.revents & POLLIN) { net_deliver_fn (sock, ufd.revents, instance); } } while (nfds == 1); } instance->flushing = 0; return (res); } int totemudp_send_flush (void *udp_context) { return 0; } int totemudp_token_send ( void *udp_context, const void *msg, unsigned int msg_len) { struct totemudp_instance *instance = (struct totemudp_instance *)udp_context; int res = 0; ucast_sendmsg (instance, &instance->token_target, msg, msg_len); return (res); } int totemudp_mcast_flush_send ( void *udp_context, const void *msg, unsigned int msg_len) { struct totemudp_instance *instance = (struct totemudp_instance *)udp_context; int res = 0; mcast_sendmsg (instance, msg, msg_len); return (res); } int totemudp_mcast_noflush_send ( void *udp_context, const void *msg, unsigned int msg_len) { struct totemudp_instance *instance = (struct totemudp_instance *)udp_context; int res = 0; mcast_sendmsg (instance, msg, msg_len); return (res); } extern int totemudp_iface_check (void *udp_context) { struct totemudp_instance *instance = (struct totemudp_instance *)udp_context; int res = 0; timer_function_netif_check_timeout (instance); return (res); } extern void totemudp_net_mtu_adjust (void *udp_context, struct totem_config *totem_config) { #define UDPIP_HEADER_SIZE (20 + 8) /* 20 bytes for ip 8 bytes for udp */ totem_config->net_mtu -= crypto_sec_header_size(totem_config->crypto_cipher_type, totem_config->crypto_hash_type) + UDPIP_HEADER_SIZE; } const char *totemudp_iface_print (void *udp_context) { struct totemudp_instance *instance = (struct totemudp_instance *)udp_context; const char *ret_char; ret_char = totemip_print (&instance->my_id); return (ret_char); } int totemudp_iface_get ( void *udp_context, struct totem_ip_address *addr) { struct totemudp_instance *instance = (struct totemudp_instance *)udp_context; int res = 0; memcpy (addr, &instance->my_id, sizeof (struct totem_ip_address)); return (res); } int totemudp_token_target_set ( void *udp_context, const struct totem_ip_address *token_target) { struct totemudp_instance *instance = (struct totemudp_instance *)udp_context; int res = 0; memcpy (&instance->token_target, token_target, sizeof (struct totem_ip_address)); instance->totemudp_target_set_completed (instance->context); return (res); } extern int totemudp_recv_mcast_empty ( void *udp_context) { struct totemudp_instance *instance = (struct totemudp_instance *)udp_context; unsigned int res; struct sockaddr_storage system_from; struct msghdr msg_recv; struct pollfd ufd; int nfds; int msg_processed = 0; int i; int sock; /* * Receive datagram */ msg_recv.msg_name = &system_from; msg_recv.msg_namelen = sizeof (struct sockaddr_storage); msg_recv.msg_iov = &instance->totemudp_iov_recv_flush; msg_recv.msg_iovlen = 1; #ifdef HAVE_MSGHDR_CONTROL msg_recv.msg_control = 0; #endif #ifdef HAVE_MSGHDR_CONTROLLEN msg_recv.msg_controllen = 0; #endif #ifdef HAVE_MSGHDR_FLAGS msg_recv.msg_flags = 0; #endif #ifdef HAVE_MSGHDR_ACCRIGHTS msg_recv.msg_accrights = NULL; #endif #ifdef HAVE_MSGHDR_ACCRIGHTSLEN msg_recv.msg_accrightslen = 0; #endif for (i = 0; i < 2; i++) { sock = -1; if (i == 0) { sock = instance->totemudp_sockets.mcast_recv; } if (i == 1) { sock = instance->totemudp_sockets.local_mcast_loop[0]; } assert(sock != -1); do { ufd.fd = sock; ufd.events = POLLIN; nfds = poll (&ufd, 1, 0); if (nfds == 1 && ufd.revents & POLLIN) { res = recvmsg (sock, &msg_recv, MSG_NOSIGNAL | MSG_DONTWAIT); if (res != -1) { msg_processed = 1; } else { msg_processed = -1; } } } while (nfds == 1); } return (msg_processed); } diff --git a/exec/totemudpu.c b/exec/totemudpu.c index 12ec63c0..14163c50 100644 --- a/exec/totemudpu.c +++ b/exec/totemudpu.c @@ -1,1163 +1,1164 @@ /* * Copyright (c) 2005 MontaVista Software, Inc. * Copyright (c) 2006-2012 Red Hat, Inc. * * All rights reserved. * * Author: Steven Dake (sdake@redhat.com) * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define LOGSYS_UTILS_ONLY 1 #include #include "totemudpu.h" #include "util.h" #include "totemcrypto.h" #include #include #include #include #ifndef MSG_NOSIGNAL #define MSG_NOSIGNAL 0 #endif #define MCAST_SOCKET_BUFFER_SIZE (TRANSMITS_ALLOWED * FRAME_SIZE_MAX) #define NETIF_STATE_REPORT_UP 1 #define NETIF_STATE_REPORT_DOWN 2 #define BIND_STATE_UNBOUND 0 #define BIND_STATE_REGULAR 1 #define BIND_STATE_LOOPBACK 2 struct totemudpu_member { struct list_head list; struct totem_ip_address member; int fd; }; struct totemudpu_instance { struct crypto_instance *crypto_inst; qb_loop_t *totemudpu_poll_handle; struct totem_interface *totem_interface; int netif_state_report; int netif_bind_state; void *context; void (*totemudpu_deliver_fn) ( void *context, const void *msg, unsigned int msg_len); void (*totemudpu_iface_change_fn) ( void *context, const struct totem_ip_address *iface_address); void (*totemudpu_target_set_completed) (void *context); /* * Function and data used to log messages */ int totemudpu_log_level_security; int totemudpu_log_level_error; int totemudpu_log_level_warning; int totemudpu_log_level_notice; int totemudpu_log_level_debug; int totemudpu_subsys_id; void (*totemudpu_log_printf) ( int level, int subsys, const char *function, const char *file, int line, const char *format, ...)__attribute__((format(printf, 6, 7))); void *udpu_context; char iov_buffer[FRAME_SIZE_MAX]; struct iovec totemudpu_iov_recv; struct list_head member_list; int stats_sent; int stats_recv; int stats_delv; int stats_remcasts; int stats_orf_token; struct timeval stats_tv_start; struct totem_ip_address my_id; int firstrun; qb_loop_timer_handle timer_netif_check_timeout; unsigned int my_memb_entries; struct totem_config *totem_config; totemsrp_stats_t *stats; struct totem_ip_address token_target; int token_socket; }; struct work_item { const void *msg; unsigned int msg_len; struct totemudpu_instance *instance; }; static int totemudpu_build_sockets ( struct totemudpu_instance *instance, struct totem_ip_address *bindnet_address, struct totem_ip_address *bound_to); static int totemudpu_create_sending_socket( void *udpu_context, const struct totem_ip_address *member); int totemudpu_member_list_rebind_ip ( void *udpu_context); static struct totem_ip_address localhost; static void totemudpu_instance_initialize (struct totemudpu_instance *instance) { memset (instance, 0, sizeof (struct totemudpu_instance)); instance->netif_state_report = NETIF_STATE_REPORT_UP | NETIF_STATE_REPORT_DOWN; instance->totemudpu_iov_recv.iov_base = instance->iov_buffer; instance->totemudpu_iov_recv.iov_len = FRAME_SIZE_MAX; //sizeof (instance->iov_buffer); /* * There is always atleast 1 processor */ instance->my_memb_entries = 1; list_init (&instance->member_list); } #define log_printf(level, format, args...) \ do { \ instance->totemudpu_log_printf ( \ level, instance->totemudpu_subsys_id, \ __FUNCTION__, __FILE__, __LINE__, \ (const char *)format, ##args); \ } while (0); #define LOGSYS_PERROR(err_num, level, fmt, args...) \ do { \ char _error_str[LOGSYS_MAX_PERROR_MSG_LEN]; \ const char *_error_ptr = qb_strerror_r(err_num, _error_str, sizeof(_error_str)); \ instance->totemudpu_log_printf ( \ level, instance->totemudpu_subsys_id, \ __FUNCTION__, __FILE__, __LINE__, \ fmt ": %s (%d)", ##args, _error_ptr, err_num); \ } while(0) int totemudpu_crypto_set ( void *udpu_context, const char *cipher_type, const char *hash_type) { return (0); } static inline void ucast_sendmsg ( struct totemudpu_instance *instance, struct totem_ip_address *system_to, const void *msg, unsigned int msg_len) { struct msghdr msg_ucast; int res = 0; size_t buf_out_len; unsigned char buf_out[FRAME_SIZE_MAX]; struct sockaddr_storage sockaddr; struct iovec iovec; int addrlen; /* * Encrypt and digest the message */ if (crypto_encrypt_and_sign ( instance->crypto_inst, (const unsigned char *)msg, msg_len, buf_out, &buf_out_len) != 0) { log_printf(LOGSYS_LEVEL_CRIT, "Error encrypting/signing packet (non-critical)"); return; } iovec.iov_base = (void *)buf_out; iovec.iov_len = buf_out_len; /* * Build unicast message */ totemip_totemip_to_sockaddr_convert(system_to, instance->totem_interface->ip_port, &sockaddr, &addrlen); memset(&msg_ucast, 0, sizeof(msg_ucast)); msg_ucast.msg_name = &sockaddr; msg_ucast.msg_namelen = addrlen; msg_ucast.msg_iov = (void *)&iovec; msg_ucast.msg_iovlen = 1; #ifdef HAVE_MSGHDR_CONTROL msg_ucast.msg_control = 0; #endif #ifdef HAVE_MSGHDR_CONTROLLEN msg_ucast.msg_controllen = 0; #endif #ifdef HAVE_MSGHDR_FLAGS msg_ucast.msg_flags = 0; #endif #ifdef HAVE_MSGHDR_ACCRIGHTS msg_ucast.msg_accrights = NULL; #endif #ifdef HAVE_MSGHDR_ACCRIGHTSLEN msg_ucast.msg_accrightslen = 0; #endif /* * Transmit unicast message * An error here is recovered by totemsrp */ res = sendmsg (instance->token_socket, &msg_ucast, MSG_NOSIGNAL); if (res < 0) { LOGSYS_PERROR (errno, instance->totemudpu_log_level_debug, "sendmsg(ucast) failed (non-critical)"); } } static inline void mcast_sendmsg ( struct totemudpu_instance *instance, const void *msg, unsigned int msg_len) { struct msghdr msg_mcast; int res = 0; size_t buf_out_len; unsigned char buf_out[FRAME_SIZE_MAX]; struct iovec iovec; struct sockaddr_storage sockaddr; int addrlen; struct list_head *list; struct totemudpu_member *member; /* * Encrypt and digest the message */ if (crypto_encrypt_and_sign ( instance->crypto_inst, (const unsigned char *)msg, msg_len, buf_out, &buf_out_len) != 0) { log_printf(LOGSYS_LEVEL_CRIT, "Error encrypting/signing packet (non-critical)"); return; } iovec.iov_base = (void *)buf_out; iovec.iov_len = buf_out_len; memset(&msg_mcast, 0, sizeof(msg_mcast)); /* * Build multicast message */ for (list = instance->member_list.next; list != &instance->member_list; list = list->next) { member = list_entry (list, struct totemudpu_member, list); totemip_totemip_to_sockaddr_convert(&member->member, instance->totem_interface->ip_port, &sockaddr, &addrlen); msg_mcast.msg_name = &sockaddr; msg_mcast.msg_namelen = addrlen; msg_mcast.msg_iov = (void *)&iovec; msg_mcast.msg_iovlen = 1; #ifdef HAVE_MSGHDR_CONTROL msg_mcast.msg_control = 0; #endif #ifdef HAVE_MSGHDR_CONTROLLEN msg_mcast.msg_controllen = 0; #endif #ifdef HAVE_MSGHDR_FLAGS msg_mcast.msg_flags = 0; #endif #ifdef HAVE_MSGHDR_ACCRIGHTS msg_mcast.msg_accrights = NULL; #endif #ifdef HAVE_MSGHDR_ACCRIGHTSLEN msg_mcast.msg_accrightslen = 0; #endif /* * Transmit multicast message * An error here is recovered by totemsrp */ res = sendmsg (member->fd, &msg_mcast, MSG_NOSIGNAL); if (res < 0) { LOGSYS_PERROR (errno, instance->totemudpu_log_level_debug, "sendmsg(mcast) failed (non-critical)"); } } } int totemudpu_finalize ( void *udpu_context) { struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; int res = 0; if (instance->token_socket > 0) { qb_loop_poll_del (instance->totemudpu_poll_handle, instance->token_socket); close (instance->token_socket); } return (res); } static int net_deliver_fn ( int fd, int revents, void *data) { struct totemudpu_instance *instance = (struct totemudpu_instance *)data; struct msghdr msg_recv; struct iovec *iovec; struct sockaddr_storage system_from; int bytes_received; int res = 0; iovec = &instance->totemudpu_iov_recv; /* * Receive datagram */ msg_recv.msg_name = &system_from; msg_recv.msg_namelen = sizeof (struct sockaddr_storage); msg_recv.msg_iov = iovec; msg_recv.msg_iovlen = 1; #ifdef HAVE_MSGHDR_CONTROL msg_recv.msg_control = 0; #endif #ifdef HAVE_MSGHDR_CONTROLLEN msg_recv.msg_controllen = 0; #endif #ifdef HAVE_MSGHDR_FLAGS msg_recv.msg_flags = 0; #endif #ifdef HAVE_MSGHDR_ACCRIGHTS msg_recv.msg_accrights = NULL; #endif #ifdef HAVE_MSGHDR_ACCRIGHTSLEN msg_recv.msg_accrightslen = 0; #endif bytes_received = recvmsg (fd, &msg_recv, MSG_NOSIGNAL | MSG_DONTWAIT); if (bytes_received == -1) { return (0); } else { instance->stats_recv += bytes_received; } /* * Authenticate and if authenticated, decrypt datagram */ res = crypto_authenticate_and_decrypt (instance->crypto_inst, iovec->iov_base, &bytes_received); if (res == -1) { log_printf (instance->totemudpu_log_level_security, "Received message has invalid digest... ignoring."); log_printf (instance->totemudpu_log_level_security, "Invalid packet data"); iovec->iov_len = FRAME_SIZE_MAX; return 0; } iovec->iov_len = bytes_received; /* * Handle incoming message */ instance->totemudpu_deliver_fn ( instance->context, iovec->iov_base, iovec->iov_len); iovec->iov_len = FRAME_SIZE_MAX; return (0); } static int netif_determine ( struct totemudpu_instance *instance, struct totem_ip_address *bindnet, struct totem_ip_address *bound_to, int *interface_up, int *interface_num) { int res; res = totemip_iface_check (bindnet, bound_to, interface_up, interface_num, instance->totem_config->clear_node_high_bit); return (res); } /* * If the interface is up, the sockets for totem are built. If the interface is down * this function is requeued in the timer list to retry building the sockets later. */ static void timer_function_netif_check_timeout ( void *data) { struct totemudpu_instance *instance = (struct totemudpu_instance *)data; int interface_up; int interface_num; struct totem_ip_address *bind_address; /* * Build sockets for every interface */ netif_determine (instance, &instance->totem_interface->bindnet, &instance->totem_interface->boundto, &interface_up, &interface_num); /* * If the network interface isn't back up and we are already * in loopback mode, add timer to check again and return */ if ((instance->netif_bind_state == BIND_STATE_LOOPBACK && interface_up == 0) || (instance->my_memb_entries == 1 && instance->netif_bind_state == BIND_STATE_REGULAR && interface_up == 1)) { qb_loop_timer_add (instance->totemudpu_poll_handle, QB_LOOP_MED, instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC, (void *)instance, timer_function_netif_check_timeout, &instance->timer_netif_check_timeout); /* * Add a timer to check for a downed regular interface */ return; } if (instance->token_socket > 0) { qb_loop_poll_del (instance->totemudpu_poll_handle, instance->token_socket); close (instance->token_socket); } if (interface_up == 0) { /* * Interface is not up */ instance->netif_bind_state = BIND_STATE_LOOPBACK; bind_address = &localhost; /* * Add a timer to retry building interfaces and request memb_gather_enter */ qb_loop_timer_add (instance->totemudpu_poll_handle, QB_LOOP_MED, instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC, (void *)instance, timer_function_netif_check_timeout, &instance->timer_netif_check_timeout); } else { /* * Interface is up */ instance->netif_bind_state = BIND_STATE_REGULAR; bind_address = &instance->totem_interface->bindnet; } /* * Create and bind the multicast and unicast sockets */ totemudpu_build_sockets (instance, bind_address, &instance->totem_interface->boundto); qb_loop_poll_add (instance->totemudpu_poll_handle, QB_LOOP_MED, instance->token_socket, POLLIN, instance, net_deliver_fn); totemip_copy (&instance->my_id, &instance->totem_interface->boundto); /* * This reports changes in the interface to the user and totemsrp */ if (instance->netif_bind_state == BIND_STATE_REGULAR) { if (instance->netif_state_report & NETIF_STATE_REPORT_UP) { log_printf (instance->totemudpu_log_level_notice, "The network interface [%s] is now up.", totemip_print (&instance->totem_interface->boundto)); instance->netif_state_report = NETIF_STATE_REPORT_DOWN; instance->totemudpu_iface_change_fn (instance->context, &instance->my_id); } /* * Add a timer to check for interface going down in single membership */ if (instance->my_memb_entries == 1) { qb_loop_timer_add (instance->totemudpu_poll_handle, QB_LOOP_MED, instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC, (void *)instance, timer_function_netif_check_timeout, &instance->timer_netif_check_timeout); } } else { if (instance->netif_state_report & NETIF_STATE_REPORT_DOWN) { log_printf (instance->totemudpu_log_level_notice, "The network interface is down."); instance->totemudpu_iface_change_fn (instance->context, &instance->my_id); } instance->netif_state_report = NETIF_STATE_REPORT_UP; } } /* Set the socket priority to INTERACTIVE to ensure that our messages don't get queued behind anything else */ static void totemudpu_traffic_control_set(struct totemudpu_instance *instance, int sock) { #ifdef SO_PRIORITY int prio = 6; /* TC_PRIO_INTERACTIVE */ if (setsockopt(sock, SOL_SOCKET, SO_PRIORITY, &prio, sizeof(int))) { LOGSYS_PERROR (errno, instance->totemudpu_log_level_warning, "Could not set traffic priority"); } #endif } static int totemudpu_build_sockets_ip ( struct totemudpu_instance *instance, struct totem_ip_address *bindnet_address, struct totem_ip_address *bound_to, int interface_num) { struct sockaddr_storage sockaddr; int addrlen; int res; unsigned int recvbuf_size; unsigned int optlen = sizeof (recvbuf_size); /* * Setup unicast socket */ instance->token_socket = socket (bindnet_address->family, SOCK_DGRAM, 0); if (instance->token_socket == -1) { LOGSYS_PERROR (errno, instance->totemudpu_log_level_warning, "socket() failed"); return (-1); } totemip_nosigpipe (instance->token_socket); res = fcntl (instance->token_socket, F_SETFL, O_NONBLOCK); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudpu_log_level_warning, "Could not set non-blocking operation on token socket"); return (-1); } /* * Bind to unicast socket used for token send/receives * This has the side effect of binding to the correct interface */ totemip_totemip_to_sockaddr_convert(bound_to, instance->totem_interface->ip_port, &sockaddr, &addrlen); res = bind (instance->token_socket, (struct sockaddr *)&sockaddr, addrlen); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudpu_log_level_warning, "bind token socket failed"); return (-1); } /* * the token_socket can receive many messages. Allow a large number * of receive messages on this socket */ recvbuf_size = MCAST_SOCKET_BUFFER_SIZE; res = setsockopt (instance->token_socket, SOL_SOCKET, SO_RCVBUF, &recvbuf_size, optlen); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudpu_log_level_notice, "Could not set recvbuf size"); } return 0; } static int totemudpu_build_sockets ( struct totemudpu_instance *instance, struct totem_ip_address *bindnet_address, struct totem_ip_address *bound_to) { int interface_num; int interface_up; int res; /* * Determine the ip address bound to and the interface name */ res = netif_determine (instance, bindnet_address, bound_to, &interface_up, &interface_num); if (res == -1) { return (-1); } totemip_copy(&instance->my_id, bound_to); res = totemudpu_build_sockets_ip (instance, bindnet_address, bound_to, interface_num); /* We only send out of the token socket */ totemudpu_traffic_control_set(instance, instance->token_socket); /* * Rebind all members to new ips */ totemudpu_member_list_rebind_ip(instance); return res; } /* * Totem Network interface - also does encryption/decryption * depends on poll abstraction, POSIX, IPV4 */ /* * Create an instance */ int totemudpu_initialize ( qb_loop_t *poll_handle, void **udpu_context, struct totem_config *totem_config, totemsrp_stats_t *stats, int interface_no, void *context, void (*deliver_fn) ( void *context, const void *msg, unsigned int msg_len), void (*iface_change_fn) ( void *context, const struct totem_ip_address *iface_address), void (*target_set_completed) ( void *context)) { struct totemudpu_instance *instance; instance = malloc (sizeof (struct totemudpu_instance)); if (instance == NULL) { return (-1); } totemudpu_instance_initialize (instance); instance->totem_config = totem_config; instance->stats = stats; /* * Configure logging */ instance->totemudpu_log_level_security = 1; //totem_config->totem_logging_configuration.log_level_security; instance->totemudpu_log_level_error = totem_config->totem_logging_configuration.log_level_error; instance->totemudpu_log_level_warning = totem_config->totem_logging_configuration.log_level_warning; instance->totemudpu_log_level_notice = totem_config->totem_logging_configuration.log_level_notice; instance->totemudpu_log_level_debug = totem_config->totem_logging_configuration.log_level_debug; instance->totemudpu_subsys_id = totem_config->totem_logging_configuration.log_subsys_id; instance->totemudpu_log_printf = totem_config->totem_logging_configuration.log_printf; /* * Initialize random number generator for later use to generate salt */ instance->crypto_inst = crypto_init (totem_config->private_key, totem_config->private_key_len, totem_config->crypto_cipher_type, totem_config->crypto_hash_type, + totem_config->crypto_compat_type, instance->totemudpu_log_printf, instance->totemudpu_log_level_security, instance->totemudpu_log_level_notice, instance->totemudpu_log_level_error, instance->totemudpu_subsys_id); if (instance->crypto_inst == NULL) { return (-1); } /* * Initialize local variables for totemudpu */ instance->totem_interface = &totem_config->interfaces[interface_no]; memset (instance->iov_buffer, 0, FRAME_SIZE_MAX); instance->totemudpu_poll_handle = poll_handle; instance->totem_interface->bindnet.nodeid = instance->totem_config->node_id; instance->context = context; instance->totemudpu_deliver_fn = deliver_fn; instance->totemudpu_iface_change_fn = iface_change_fn; instance->totemudpu_target_set_completed = target_set_completed; totemip_localhost (AF_INET, &localhost); localhost.nodeid = instance->totem_config->node_id; /* * RRP layer isn't ready to receive message because it hasn't * initialized yet. Add short timer to check the interfaces. */ qb_loop_timer_add (instance->totemudpu_poll_handle, QB_LOOP_MED, 100*QB_TIME_NS_IN_MSEC, (void *)instance, timer_function_netif_check_timeout, &instance->timer_netif_check_timeout); *udpu_context = instance; return (0); } void *totemudpu_buffer_alloc (void) { return malloc (FRAME_SIZE_MAX); } void totemudpu_buffer_release (void *ptr) { return free (ptr); } int totemudpu_processor_count_set ( void *udpu_context, int processor_count) { struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; int res = 0; instance->my_memb_entries = processor_count; qb_loop_timer_del (instance->totemudpu_poll_handle, instance->timer_netif_check_timeout); if (processor_count == 1) { qb_loop_timer_add (instance->totemudpu_poll_handle, QB_LOOP_MED, instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC, (void *)instance, timer_function_netif_check_timeout, &instance->timer_netif_check_timeout); } return (res); } int totemudpu_recv_flush (void *udpu_context) { int res = 0; return (res); } int totemudpu_send_flush (void *udpu_context) { int res = 0; return (res); } int totemudpu_token_send ( void *udpu_context, const void *msg, unsigned int msg_len) { struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; int res = 0; ucast_sendmsg (instance, &instance->token_target, msg, msg_len); return (res); } int totemudpu_mcast_flush_send ( void *udpu_context, const void *msg, unsigned int msg_len) { struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; int res = 0; mcast_sendmsg (instance, msg, msg_len); return (res); } int totemudpu_mcast_noflush_send ( void *udpu_context, const void *msg, unsigned int msg_len) { struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; int res = 0; mcast_sendmsg (instance, msg, msg_len); return (res); } extern int totemudpu_iface_check (void *udpu_context) { struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; int res = 0; timer_function_netif_check_timeout (instance); return (res); } extern void totemudpu_net_mtu_adjust (void *udpu_context, struct totem_config *totem_config) { #define UDPIP_HEADER_SIZE (20 + 8) /* 20 bytes for ip 8 bytes for udp */ totem_config->net_mtu -= crypto_sec_header_size(totem_config->crypto_cipher_type, totem_config->crypto_hash_type) + UDPIP_HEADER_SIZE; } const char *totemudpu_iface_print (void *udpu_context) { struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; const char *ret_char; ret_char = totemip_print (&instance->my_id); return (ret_char); } int totemudpu_iface_get ( void *udpu_context, struct totem_ip_address *addr) { struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; int res = 0; memcpy (addr, &instance->my_id, sizeof (struct totem_ip_address)); return (res); } int totemudpu_token_target_set ( void *udpu_context, const struct totem_ip_address *token_target) { struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; int res = 0; memcpy (&instance->token_target, token_target, sizeof (struct totem_ip_address)); instance->totemudpu_target_set_completed (instance->context); return (res); } extern int totemudpu_recv_mcast_empty ( void *udpu_context) { struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; unsigned int res; struct sockaddr_storage system_from; struct msghdr msg_recv; struct pollfd ufd; int nfds; int msg_processed = 0; /* * Receive datagram */ msg_recv.msg_name = &system_from; msg_recv.msg_namelen = sizeof (struct sockaddr_storage); msg_recv.msg_iov = &instance->totemudpu_iov_recv; msg_recv.msg_iovlen = 1; #ifdef HAVE_MSGHDR_CONTROL msg_recv.msg_control = 0; #endif #ifdef HAVE_MSGHDR_CONTROLLEN msg_recv.msg_controllen = 0; #endif #ifdef HAVE_MSGHDR_FLAGS msg_recv.msg_flags = 0; #endif #ifdef HAVE_MSGHDR_ACCRIGHTS msg_recv.msg_accrights = NULL; #endif #ifdef HAVE_MSGHDR_ACCRIGHTSLEN msg_recv.msg_accrightslen = 0; #endif do { ufd.fd = instance->token_socket; ufd.events = POLLIN; nfds = poll (&ufd, 1, 0); if (nfds == 1 && ufd.revents & POLLIN) { res = recvmsg (instance->token_socket, &msg_recv, MSG_NOSIGNAL | MSG_DONTWAIT); if (res != -1) { msg_processed = 1; } else { msg_processed = -1; } } } while (nfds == 1); return (msg_processed); } static int totemudpu_create_sending_socket( void *udpu_context, const struct totem_ip_address *member) { struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; int fd; int res; unsigned int sendbuf_size; unsigned int optlen = sizeof (sendbuf_size); struct sockaddr_storage sockaddr; int addrlen; fd = socket (member->family, SOCK_DGRAM, 0); if (fd == -1) { LOGSYS_PERROR (errno, instance->totemudpu_log_level_warning, "Could not create socket for new member"); return (-1); } totemip_nosigpipe (fd); res = fcntl (fd, F_SETFL, O_NONBLOCK); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudpu_log_level_warning, "Could not set non-blocking operation on token socket"); return (-1); } /* * These sockets are used to send multicast messages, so their buffers * should be large */ sendbuf_size = MCAST_SOCKET_BUFFER_SIZE; res = setsockopt (fd, SOL_SOCKET, SO_SNDBUF, &sendbuf_size, optlen); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudpu_log_level_notice, "Could not set sendbuf size"); } /* * Bind to sending interface */ totemip_totemip_to_sockaddr_convert(&instance->my_id, 0, &sockaddr, &addrlen); res = bind (fd, (struct sockaddr *)&sockaddr, addrlen); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudpu_log_level_warning, "bind token socket failed"); return (-1); } return (fd); } int totemudpu_member_add ( void *udpu_context, const struct totem_ip_address *member) { struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; struct totemudpu_member *new_member; new_member = malloc (sizeof (struct totemudpu_member)); if (new_member == NULL) { return (-1); } log_printf (LOGSYS_LEVEL_NOTICE, "adding new UDPU member {%s}", totemip_print(member)); list_init (&new_member->list); list_add_tail (&new_member->list, &instance->member_list); memcpy (&new_member->member, member, sizeof (struct totem_ip_address)); new_member->fd = totemudpu_create_sending_socket(udpu_context, member); return (0); } int totemudpu_member_remove ( void *udpu_context, const struct totem_ip_address *token_target) { int found = 0; struct list_head *list; struct totemudpu_member *member; struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; /* * Find the member to remove and close its socket */ for (list = instance->member_list.next; list != &instance->member_list; list = list->next) { member = list_entry (list, struct totemudpu_member, list); if (totemip_compare (token_target, &member->member)==0) { log_printf(LOGSYS_LEVEL_NOTICE, "removing UDPU member {%s}", totemip_print(&member->member)); if (member->fd > 0) { log_printf(LOGSYS_LEVEL_DEBUG, "Closing socket to: {%s}", totemip_print(&member->member)); qb_loop_poll_del (instance->totemudpu_poll_handle, member->fd); close (member->fd); } found = 1; break; } } /* * Delete the member from the list */ if (found) { list_del (list); } instance = NULL; return (0); } int totemudpu_member_list_rebind_ip ( void *udpu_context) { struct list_head *list; struct totemudpu_member *member; struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; for (list = instance->member_list.next; list != &instance->member_list; list = list->next) { member = list_entry (list, struct totemudpu_member, list); if (member->fd > 0) { close (member->fd); } member->fd = totemudpu_create_sending_socket(udpu_context, &member->member); } return (0); } diff --git a/include/corosync/totem/totem.h b/include/corosync/totem/totem.h index 02a8a2cb..384d5b0d 100644 --- a/include/corosync/totem/totem.h +++ b/include/corosync/totem/totem.h @@ -1,283 +1,285 @@ /* * Copyright (c) 2005 MontaVista Software, Inc. * Copyright (c) 2006-2012 Red Hat, Inc. * * Author: Steven Dake (sdake@redhat.com) * * All rights reserved. * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef TOTEM_H_DEFINED #define TOTEM_H_DEFINED #include "totemip.h" #include #ifdef HAVE_SMALL_MEMORY_FOOTPRINT #define PROCESSOR_COUNT_MAX 16 #define MESSAGE_SIZE_MAX 1024*64 #define MESSAGE_QUEUE_MAX 512 #else #define PROCESSOR_COUNT_MAX 384 #define MESSAGE_SIZE_MAX 1024*1024 /* (1MB) */ #define MESSAGE_QUEUE_MAX ((4 * MESSAGE_SIZE_MAX) / totem_config->net_mtu) #endif /* HAVE_SMALL_MEMORY_FOOTPRINT */ #define FRAME_SIZE_MAX 10000 #define TRANSMITS_ALLOWED 16 #define SEND_THREADS_MAX 16 #define INTERFACE_MAX 2 /** * Maximum number of continuous gather states */ #define MAX_NO_CONT_GATHER 3 /* * Maximum number of continuous failures get from sendmsg call */ #define MAX_NO_CONT_SENDMSG_FAILURES 30 struct totem_interface { struct totem_ip_address bindnet; struct totem_ip_address boundto; struct totem_ip_address mcast_addr; uint16_t ip_port; uint16_t ttl; int member_count; struct totem_ip_address member_list[PROCESSOR_COUNT_MAX]; }; struct totem_logging_configuration { void (*log_printf) ( int level, int subsys, const char *function_name, const char *file_name, int file_line, const char *format, ...) __attribute__((format(printf, 6, 7))); int log_level_security; int log_level_error; int log_level_warning; int log_level_notice; int log_level_debug; int log_level_trace; int log_subsys_id; }; enum { TOTEM_PRIVATE_KEY_LEN = 128 }; enum { TOTEM_RRP_MODE_BYTES = 64 }; typedef enum { TOTEM_TRANSPORT_UDP = 0, TOTEM_TRANSPORT_UDPU = 1, TOTEM_TRANSPORT_RDMA = 2 } totem_transport_t; struct totem_config { int version; /* * network */ struct totem_interface *interfaces; unsigned int interface_count; unsigned int node_id; unsigned int clear_node_high_bit; /* * key information */ unsigned char private_key[TOTEM_PRIVATE_KEY_LEN]; unsigned int private_key_len; /* * Totem configuration parameters */ unsigned int token_timeout; unsigned int token_retransmit_timeout; unsigned int token_hold_timeout; unsigned int token_retransmits_before_loss_const; unsigned int join_timeout; unsigned int send_join_timeout; unsigned int consensus_timeout; unsigned int merge_timeout; unsigned int downcheck_timeout; unsigned int fail_to_recv_const; unsigned int seqno_unchanged_const; unsigned int rrp_token_expired_timeout; unsigned int rrp_problem_count_timeout; unsigned int rrp_problem_count_threshold; unsigned int rrp_problem_count_mcast_threshold; unsigned int rrp_autorecovery_check_timeout; char rrp_mode[TOTEM_RRP_MODE_BYTES]; struct totem_logging_configuration totem_logging_configuration; unsigned int net_mtu; unsigned int threads; unsigned int heartbeat_failures_allowed; unsigned int max_network_delay; unsigned int window_size; unsigned int max_messages; const char *vsf_type; unsigned int broadcast_use; char *crypto_cipher_type; char *crypto_hash_type; + char *crypto_compat_type; + totem_transport_t transport_number; unsigned int miss_count_const; }; #define TOTEM_CONFIGURATION_TYPE enum totem_configuration_type { TOTEM_CONFIGURATION_REGULAR, TOTEM_CONFIGURATION_TRANSITIONAL }; #define TOTEM_CALLBACK_TOKEN_TYPE enum totem_callback_token_type { TOTEM_CALLBACK_TOKEN_RECEIVED = 1, TOTEM_CALLBACK_TOKEN_SENT = 2 }; enum totem_event_type { TOTEM_EVENT_DELIVERY_CONGESTED, TOTEM_EVENT_NEW_MSG, }; #define MEMB_RING_ID struct memb_ring_id { struct totem_ip_address rep; unsigned long long seq; } __attribute__((packed)); typedef struct { int is_dirty; time_t last_updated; } totem_stats_header_t; typedef struct { totem_stats_header_t hdr; uint32_t iface_changes; } totemnet_stats_t; typedef struct { totem_stats_header_t hdr; totemnet_stats_t *net; char *algo_name; uint8_t *faulty; uint32_t interface_count; } totemrrp_stats_t; typedef struct { uint32_t rx; uint32_t tx; int backlog_calc; } totemsrp_token_stats_t; typedef struct { totem_stats_header_t hdr; totemrrp_stats_t *rrp; uint64_t orf_token_tx; uint64_t orf_token_rx; uint64_t memb_merge_detect_tx; uint64_t memb_merge_detect_rx; uint64_t memb_join_tx; uint64_t memb_join_rx; uint64_t mcast_tx; uint64_t mcast_retx; uint64_t mcast_rx; uint64_t memb_commit_token_tx; uint64_t memb_commit_token_rx; uint64_t token_hold_cancel_tx; uint64_t token_hold_cancel_rx; uint64_t operational_entered; uint64_t operational_token_lost; uint64_t gather_entered; uint64_t gather_token_lost; uint64_t commit_entered; uint64_t commit_token_lost; uint64_t recovery_entered; uint64_t recovery_token_lost; uint64_t consensus_timeouts; uint64_t rx_msg_dropped; uint32_t continuous_gather; uint32_t continuous_sendmsg_failures; int earliest_token; int latest_token; #define TOTEM_TOKEN_STATS_MAX 100 totemsrp_token_stats_t token[TOTEM_TOKEN_STATS_MAX]; } totemsrp_stats_t; #define TOTEM_CONFIGURATION_TYPE typedef struct { totem_stats_header_t hdr; totemsrp_stats_t *srp; } totemmrp_stats_t; typedef struct { totem_stats_header_t hdr; totemmrp_stats_t *mrp; uint32_t msg_reserved; uint32_t msg_queue_avail; } totempg_stats_t; #endif /* TOTEM_H_DEFINED */ diff --git a/man/corosync.conf.5 b/man/corosync.conf.5 index 1a8c4b0e..8c353ec0 100644 --- a/man/corosync.conf.5 +++ b/man/corosync.conf.5 @@ -1,677 +1,690 @@ .\"/* .\" * Copyright (c) 2005 MontaVista Software, Inc. .\" * Copyright (c) 2006-2012 Red Hat, Inc. .\" * .\" * All rights reserved. .\" * .\" * Author: Steven Dake (sdake@redhat.com) .\" * .\" * This software licensed under BSD license, the text of which follows: .\" * .\" * Redistribution and use in source and binary forms, with or without .\" * modification, are permitted provided that the following conditions are met: .\" * .\" * - Redistributions of source code must retain the above copyright notice, .\" * this list of conditions and the following disclaimer. .\" * - Redistributions in binary form must reproduce the above copyright notice, .\" * this list of conditions and the following disclaimer in the documentation .\" * and/or other materials provided with the distribution. .\" * - Neither the name of the MontaVista Software, Inc. nor the names of its .\" * contributors may be used to endorse or promote products derived from this .\" * software without specific prior written permission. .\" * .\" * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" .\" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE .\" * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR .\" * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF .\" * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS .\" * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN .\" * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) .\" * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF .\" * THE POSSIBILITY OF SUCH DAMAGE. .\" */ .TH COROSYNC_CONF 5 2012-10-10 "corosync Man Page" "Corosync Cluster Engine Programmer's Manual" .SH NAME corosync.conf - corosync executive configuration file .SH SYNOPSIS /etc/corosync/corosync.conf .SH DESCRIPTION The corosync.conf instructs the corosync executive about various parameters needed to control the corosync executive. Empty lines and lines starting with # character are ignored. The configuration file consists of bracketed top level directives. The possible directive choices are: .TP totem { } This top level directive contains configuration options for the totem protocol. .TP logging { } This top level directive contains configuration options for logging. .TP quorum { } This top level directive contains configuration options for quorum. .TP nodelist { } This top level directive contains configuration options for nodes in cluster. .TP qb { } This top level directive contains configuration options related to libqb. .PP .PP Within the .B totem directive, an interface directive is required. There is also one configuration option which is required: .PP .PP Within the .B interface sub-directive of totem there are four parameters which are required. There is one parameter which is optional. .TP ringnumber This specifies the ring number for the interface. When using the redundant ring protocol, each interface should specify separate ring numbers to uniquely identify to the membership protocol which interface to use for which redundant ring. The ringnumber must start at 0. .TP bindnetaddr This specifies the network address the corosync executive should bind to. bindnetaddr should be an IP address configured on the system, or a network address. For example, if the local interface is 192.168.5.92 with netmask 255.255.255.0, you should set bindnetaddr to 192.168.5.92 or 192.168.5.0. If the local interface is 192.168.5.92 with netmask 255.255.255.192, set bindnetaddr to 192.168.5.92 or 192.168.5.64, and so forth. This may also be an IPV6 address, in which case IPV6 networking will be used. In this case, the exact address must be specified and there is no automatic selection of the network interface within a specific subnet as with IPv4. If IPv6 networking is used, the nodeid field in nodelist must be specified. .TP broadcast This is optional and can be set to yes. If it is set to yes, the broadcast address will be used for communication. If this option is set, mcastaddr should not be set. .TP mcastaddr This is the multicast address used by corosync executive. The default should work for most networks, but the network administrator should be queried about a multicast address to use. Avoid 224.x.x.x because this is a "config" multicast address. This may also be an IPV6 multicast address, in which case IPV6 networking will be used. If IPv6 networking is used, the nodeid field in nodelist must be specified. It's not needed to use this option if cluster_name option is used. If both options are used, mcastaddr has higher priority. .TP mcastport This specifies the UDP port number. It is possible to use the same multicast address on a network with the corosync services configured for different UDP ports. Please note corosync uses two UDP ports mcastport (for mcast receives) and mcastport - 1 (for mcast sends). If you have multiple clusters on the same network using the same mcastaddr please configure the mcastports with a gap. .TP ttl This specifies the Time To Live (TTL). If you run your cluster on a routed network then the default of "1" will be too small. This option provides a way to increase this up to 255. The valid range is 0..255. Note that this is only valid on multicast transport types. .PP .PP Within the .B totem directive, there are seven configuration options of which one is required, five are optional, and one is required when IPV6 is configured in the interface subdirective. The required directive controls the version of the totem configuration. The optional option unless using IPV6 directive controls identification of the processor. The optional options control secrecy and authentication, the redundant ring mode of operation and maximum network MTU field. .TP version This specifies the version of the configuration file. Currently the only valid version for this directive is 2. .PP clear_node_high_bit This configuration option is optional and is only relevant when no nodeid is specified. Some corosync clients require a signed 32 bit nodeid that is greater than zero however by default corosync uses all 32 bits of the IPv4 address space when generating a nodeid. Set this option to yes to force the high bit to be zero and therefor ensure the nodeid is a positive signed 32 bit integer. WARNING: The clusters behavior is undefined if this option is enabled on only a subset of the cluster (for example during a rolling upgrade). .TP crypto_hash This specifies which HMAC authentication should be used to authenticate all messages. Valid values are none (no authentication), md5, sha1, sha256, sha384 and sha512. The default is sha1. .TP crypto_cipher This specifies which cipher should be used to encrypt all messages. Valid values are none (no encryption), aes256, aes192, aes128 and 3des. The default is aes256. +.TP +crypto_compat +This specifies which crypto protocol version should be used to encrypt all messages. +Valid values are 2.0 and 2.2. + +The default is always is the higher supported version. + +This value should only be used when performing rolling upgrades from older +versions of corosync to newer ones. It cannot be changed at runtime. + +Once the upgrade is completed, the cluster should be temporary halted to +switch to the latest version of the protocol. + .TP secauth This specifies that HMAC/SHA1 authentication should be used to authenticate all messages. It further specifies that all data should be encrypted with the nss library and aes256 encryption algorithm to protect data from eavesdropping. Enabling this option adds a encryption header to every message sent by totem which reduces total throughput. Also encryption and authentication consume extra CPU cycles in corosync. The default is on. WARNING: This parameter is deprecated. It's recomended to use combination of crypto_cipher and crypto_hash. .TP rrp_mode This specifies the mode of redundant ring, which may be none, active, or passive. Active replication offers slightly lower latency from transmit to delivery in faulty network environments but with less performance. Passive replication may nearly double the speed of the totem protocol if the protocol doesn't become cpu bound. The final option is none, in which case only one network interface will be used to operate the totem protocol. If only one interface directive is specified, none is automatically chosen. If multiple interface directives are specified, only active or passive may be chosen. The maximum number of interface directives that is allowed for either modes (active or passive) is 2. .TP netmtu This specifies the network maximum transmit unit. To set this value beyond 1500, the regular frame MTU, requires ethernet devices that support large, or also called jumbo, frames. If any device in the network doesn't support large frames, the protocol will not operate properly. The hosts must also have their mtu size set from 1500 to whatever frame size is specified here. Please note while some NICs or switches claim large frame support, they support 9000 MTU as the maximum frame size including the IP header. Setting the netmtu and host MTUs to 9000 will cause totem to use the full 9000 bytes of the frame. Then Linux will add a 18 byte header moving the full frame size to 9018. As a result some hardware will not operate properly with this size of data. A netmtu of 8982 seems to work for the few large frame devices that have been tested. Some manufacturers claim large frame support when in fact they support frame sizes of 4500 bytes. When sending multicast traffic, if the network frequently reconfigures, chances are that some device in the network doesn't support large frames. Choose hardware carefully if intending to use large frame support. The default is 1500. .TP vsftype This directive controls the virtual synchrony filter type used to identify a primary component. The preferred choice is YKD dynamic linear voting, however, for clusters larger then 32 nodes YKD consumes alot of memory. For large scale clusters that are created by changing the MAX_PROCESSORS_COUNT #define in the C code totem.h file, the virtual synchrony filter "none" is recommended but then AMF and DLCK services (which are currently experimental) are not safe for use. The default is ykd. The vsftype can also be set to none. .TP transport This directive controls the transport mechanism used. If the interface to which corosync is binding is an RDMA interface such as RoCEE or Infiniband, the "iba" parameter may be specified. To avoid the use of multicast entirely, a unicast transport parameter "udpu" can be specified. This requires specifying the list of members in nodelist directive, that could potentially make up the membership before deployment. The default is udp. The transport type can also be set to udpu or iba. .TP cluster_name This specifies the name of cluster and it's used for automatic generating of multicast address. .TP config_version This specifies version of config file. This is converted to unsigned 64-bit int. By default it's 0. Option is used to prevent joining old nodes with not up-to-date configuration. If value is not 0, and node is going for first time (only for first time, join after split doesn't follow this rules) from single-node membership to multiple nodes membership, other nodes config_versions are collected. If current node config_version is not equal to highest of collected versions, corosync is terminated. Within the .B totem directive, there are several configuration options which are used to control the operation of the protocol. It is generally not recommended to change any of these values without proper guidance and sufficient testing. Some networks may require larger values if suffering from frequent reconfigurations. Some applications may require faster failure detection times which can be achieved by reducing the token timeout. .TP token This timeout specifies in milliseconds until a token loss is declared after not receiving a token. This is the time spent detecting a failure of a processor in the current configuration. Reforming a new configuration takes about 50 milliseconds in addition to this timeout. The default is 1000 milliseconds. .TP token_retransmit This timeout specifies in milliseconds after how long before receiving a token the token is retransmitted. This will be automatically calculated if token is modified. It is not recommended to alter this value without guidance from the corosync community. The default is 238 milliseconds. .TP hold This timeout specifies in milliseconds how long the token should be held by the representative when the protocol is under low utilization. It is not recommended to alter this value without guidance from the corosync community. The default is 180 milliseconds. .TP token_retransmits_before_loss_const This value identifies how many token retransmits should be attempted before forming a new configuration. If this value is set, retransmit and hold will be automatically calculated from retransmits_before_loss and token. The default is 4 retransmissions. .TP join This timeout specifies in milliseconds how long to wait for join messages in the membership protocol. The default is 50 milliseconds. .TP send_join This timeout specifies in milliseconds an upper range between 0 and send_join to wait before sending a join message. For configurations with less then 32 nodes, this parameter is not necessary. For larger rings, this parameter is necessary to ensure the NIC is not overflowed with join messages on formation of a new ring. A reasonable value for large rings (128 nodes) would be 80msec. Other timer values must also change if this value is changed. Seek advice from the corosync mailing list if trying to run larger configurations. The default is 0 milliseconds. .TP consensus This timeout specifies in milliseconds how long to wait for consensus to be achieved before starting a new round of membership configuration. The minimum value for consensus must be 1.2 * token. This value will be automatically calculated at 1.2 * token if the user doesn't specify a consensus value. For two node clusters, a consensus larger then the join timeout but less then token is safe. For three node or larger clusters, consensus should be larger then token. There is an increasing risk of odd membership changes, which stil guarantee virtual synchrony, as node count grows if consensus is less than token. The default is 1200 milliseconds. .TP merge This timeout specifies in milliseconds how long to wait before checking for a partition when no multicast traffic is being sent. If multicast traffic is being sent, the merge detection happens automatically as a function of the protocol. The default is 200 milliseconds. .TP downcheck This timeout specifies in milliseconds how long to wait before checking that a network interface is back up after it has been downed. The default is 1000 millseconds. .TP fail_recv_const This constant specifies how many rotations of the token without receiving any of the messages when messages should be received may occur before a new configuration is formed. The default is 2500 failures to receive a message. .TP seqno_unchanged_const This constant specifies how many rotations of the token without any multicast traffic should occur before the hold timer is started. The default is 30 rotations. .TP heartbeat_failures_allowed [HeartBeating mechanism] Configures the optional HeartBeating mechanism for faster failure detection. Keep in mind that engaging this mechanism in lossy networks could cause faulty loss declaration as the mechanism relies on the network for heartbeating. So as a rule of thumb use this mechanism if you require improved failure in low to medium utilized networks. This constant specifies the number of heartbeat failures the system should tolerate before declaring heartbeat failure e.g 3. Also if this value is not set or is 0 then the heartbeat mechanism is not engaged in the system and token rotation is the method of failure detection The default is 0 (disabled). .TP max_network_delay [HeartBeating mechanism] This constant specifies in milliseconds the approximate delay that your network takes to transport one packet from one machine to another. This value is to be set by system engineers and please dont change if not sure as this effects the failure detection mechanism using heartbeat. The default is 50 milliseconds. .TP window_size This constant specifies the maximum number of messages that may be sent on one token rotation. If all processors perform equally well, this value could be large (300), which would introduce higher latency from origination to delivery for very large rings. To reduce latency in large rings(16+), the defaults are a safe compromise. If 1 or more slow processor(s) are present among fast processors, window_size should be no larger then 256000 / netmtu to avoid overflow of the kernel receive buffers. The user is notified of this by the display of a retransmit list in the notification logs. There is no loss of data, but performance is reduced when these errors occur. The default is 50 messages. .TP max_messages This constant specifies the maximum number of messages that may be sent by one processor on receipt of the token. The max_messages parameter is limited to 256000 / netmtu to prevent overflow of the kernel transmit buffers. The default is 17 messages. .TP miss_count_const This constant defines the maximum number of times on receipt of a token a message is checked for retransmission before a retransmission occurs. This parameter is useful to modify for switches that delay multicast packets compared to unicast packets. The default setting works well for nearly all modern switches. The default is 5 messages. .TP rrp_problem_count_timeout This specifies the time in milliseconds to wait before decrementing the problem count by 1 for a particular ring to ensure a link is not marked faulty for transient network failures. The default is 2000 milliseconds. .TP rrp_problem_count_threshold This specifies the number of times a problem is detected with a link before setting the link faulty. Once a link is set faulty, no more data is transmitted upon it. Also, the problem counter is no longer decremented when the problem count timeout expires. A problem is detected whenever all tokens from the proceeding processor have not been received within the rrp_token_expired_timeout. The rrp_problem_count_threshold * rrp_token_expired_timeout should be atleast 50 milliseconds less then the token timeout, or a complete reconfiguration may occur. The default is 10 problem counts. .TP rrp_problem_count_mcast_threshold This specifies the number of times a problem is detected with multicast before setting the link faulty for passive rrp mode. This variable is unused in active rrp mode. The default is 10 times rrp_problem_count_threshold. .TP rrp_token_expired_timeout This specifies the time in milliseconds to increment the problem counter for the redundant ring protocol after not having received a token from all rings for a particular processor. This value will automatically be calculated from the token timeout and problem_count_threshold but may be overridden. It is not recommended to override this value without guidance from the corosync community. The default is 47 milliseconds. .TP rrp_autorecovery_check_timeout This specifies the time in milliseconds to check if the failed ring can be auto-recovered. The default is 1000 milliseconds. .PP Within the .B logging directive, there are several configuration options which are all optional. .PP The following 3 options are valid only for the top level logging directive: .TP timestamp This specifies that a timestamp is placed on all log messages. The default is off. .TP fileline This specifies that file and line should be printed. The default is off. .TP function_name This specifies that the code function name should be printed. The default is off. .PP The following options are valid both for top level logging directive and they can be overriden in logger_subsys entries. .TP to_stderr .TP to_logfile .TP to_syslog These specify the destination of logging output. Any combination of these options may be specified. Valid options are .B yes and .B no. The default is syslog and stderr. Please note, if you are using to_logfile and want to rotate the file, use logrotate(8) with the option .B copytruncate. eg. .IP .RS .ne 18 .nf .ta 4n 30n 33n /var/log/corosync.log { missingok compress notifempty daily rotate 7 copytruncate } .ta .fi .RE .IP .PP .TP logfile If the .B to_logfile directive is set to .B yes , this option specifies the pathname of the log file. No default. .TP logfile_priority This specifies the logfile priority for this particular subsystem. Ignored if debug is on. Possible values are: alert, crit, debug (same as debug = on), emerg, err, info, notice, warning. The default is: info. .TP syslog_facility This specifies the syslog facility type that will be used for any messages sent to syslog. options are daemon, local0, local1, local2, local3, local4, local5, local6 & local7. The default is daemon. .TP syslog_priority This specifies the syslog level for this particular subsystem. Ignored if debug is on. Possible values are: alert, crit, debug (same as debug = on), emerg, err, info, notice, warning. The default is: info. .TP debug This specifies whether debug output is logged for this particular logger. Also can contain value trace, what is highest level of debug informations. The default is off. .PP Within the .B logging directive, logger_subsys directives are optional. .PP Within the .B logger_subsys sub-directive, all of the above logging configuration options are valid and can be used to override the default settings. The subsys entry, described below, is mandatory to identify the subsystem. .TP subsys This specifies the subsystem identity (name) for which logging is specified. This is the name used by a service in the log_init () call. E.g. 'CPG'. This directive is required. .PP Within the .B quorum directive it is possible to specify the quorum algorithm to use with the .TP provider directive. At the time of writing only corosync_votequorum is supported. See votequorum(5) for configuration options. .PP Within the .B nodelist directive it is possible to specify specific informations about nodes in cluster. Directive can contain only .B node sub-directive, which specifies every node that should be a member of the membership, and where non-default options are needed. Every node must have at least ring0_addr field filled. For UDPU, every node that should be a member of the membership must be specified. Possible options are: .TP ringX_addr This specifies ip address of one of the nodes. X is ring number. .TP nodeid This configuration option is optional when using IPv4 and required when using IPv6. This is a 32 bit value specifying the node identifier delivered to the cluster membership service. If this is not specified with IPv4, the node id will be determined from the 32 bit IP address the system to which the system is bound with ring identifier of 0. The node identifier value of zero is reserved and should not be used. .PP Within the .B qb directive it is possible to specify options for libqb. Possible option is: .TP ipc_type This specifies type of IPC to use. Can be one of native (default), shm and socket. Native means one of shm or socket, depending on what is supported by OS. On systems with support for both, SHM is selected. SHM is generally faster, but need to allocate ring buffer file in /dev/shm. .SH "FILES" .TP /etc/corosync/corosync.conf The corosync executive configuration file. .SH "SEE ALSO" .BR corosync_overview (8), .BR votequorum (5), .BR logrotate (8) .PP