Page MenuHomeClusterLabs Projects

No OneTemporary

This file is larger than 256 KB, so syntax highlighting was skipped.
diff --git a/exec/coroparse.c b/exec/coroparse.c
index 29875b90..302c3060 100644
--- a/exec/coroparse.c
+++ b/exec/coroparse.c
@@ -1,434 +1,434 @@
/*
* Copyright (c) 2006, 2009 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Patrick Caulfield (pcaulfie@redhat.com)
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <config.h>
#include <sys/types.h>
#include <sys/uio.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/un.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <unistd.h>
#include <fcntl.h>
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
#include <string.h>
#include <dirent.h>
#include <limits.h>
#include <stddef.h>
#include <corosync/lcr/lcr_comp.h>
#include <corosync/engine/objdb.h>
#include <corosync/engine/config.h>
+#include <qb/qbutil.h>
#define LOGSYS_UTILS_ONLY 1
#include <corosync/engine/logsys.h>
#include "util.h"
static int read_config_file_into_objdb(
struct objdb_iface_ver0 *objdb,
const char **error_string);
static char error_string_response[512];
static char *strchr_rs (const char *haystack, int byte)
{
const char *end_address = strchr (haystack, byte);
if (end_address) {
end_address += 1; /* skip past { or = */
end_address += strspn (end_address, " \t");
}
return ((char *) end_address);
}
static int aisparser_readconfig (struct objdb_iface_ver0 *objdb,
const char **error_string)
{
if (read_config_file_into_objdb(objdb, error_string)) {
return -1;
}
return 0;
}
static char *remove_whitespace(char *string)
{
char *start = string+strspn(string, " \t");
char *end = start+(strlen(start))-1;
while ((*end == ' ' || *end == '\t' || *end == ':' || *end == '{') && end > start)
end--;
if (end != start)
*(end+1) = '\0';
return start;
}
#define PCHECK_ADD_SUBSECTION 1
#define PCHECK_ADD_ITEM 2
typedef int (*parser_check_item_f)(struct objdb_iface_ver0 *objdb,
hdb_handle_t parent_handle,
int type,
const char *name,
const char **error_string);
static int parse_section(FILE *fp,
struct objdb_iface_ver0 *objdb,
hdb_handle_t parent_handle,
const char **error_string,
parser_check_item_f parser_check_item_call)
{
char line[512];
int i;
char *loc;
int ignore_line;
while (fgets (line, sizeof (line), fp)) {
if (strlen(line) > 0) {
if (line[strlen(line) - 1] == '\n')
line[strlen(line) - 1] = '\0';
if (strlen (line) > 0 && line[strlen(line) - 1] == '\r')
line[strlen(line) - 1] = '\0';
}
/*
* Clear out white space and tabs
*/
for (i = strlen (line) - 1; i > -1; i--) {
if (line[i] == '\t' || line[i] == ' ') {
line[i] = '\0';
} else {
break;
}
}
ignore_line = 1;
for (i = 0; i < strlen (line); i++) {
if (line[i] != '\t' && line[i] != ' ') {
if (line[i] != '#')
ignore_line = 0;
break;
}
}
/*
* Clear out comments and empty lines
*/
if (ignore_line) {
continue;
}
/* New section ? */
if ((loc = strchr_rs (line, '{'))) {
hdb_handle_t new_parent;
char *section = remove_whitespace(line);
loc--;
*loc = '\0';
if (parser_check_item_call) {
if (!parser_check_item_call(objdb, parent_handle, PCHECK_ADD_SUBSECTION,
section, error_string))
return -1;
}
objdb->object_create (parent_handle, &new_parent,
section, strlen (section));
if (parse_section(fp, objdb, new_parent, error_string, parser_check_item_call))
return -1;
}
/* New key/value */
if ((loc = strchr_rs (line, ':'))) {
char *key;
char *value;
*(loc-1) = '\0';
key = remove_whitespace(line);
value = remove_whitespace(loc);
if (parser_check_item_call) {
if (!parser_check_item_call(objdb, parent_handle, PCHECK_ADD_ITEM,
key, error_string))
return -1;
}
objdb->object_key_create_typed (parent_handle, key,
value, strlen (value) + 1, OBJDB_VALUETYPE_STRING);
}
if (strchr_rs (line, '}')) {
return 0;
}
}
if (parent_handle != OBJECT_PARENT_HANDLE) {
*error_string = "Missing closing brace";
return -1;
}
return 0;
}
static int parser_check_item_uidgid(struct objdb_iface_ver0 *objdb,
hdb_handle_t parent_handle,
int type,
const char *name,
const char **error_string)
{
if (type == PCHECK_ADD_SUBSECTION) {
if (parent_handle != OBJECT_PARENT_HANDLE) {
*error_string = "uidgid: Can't add second level subsection";
return 0;
}
if (strcmp (name, "uidgid") != 0) {
*error_string = "uidgid: Can't add subsection different then uidgid";
return 0;
}
}
if (type == PCHECK_ADD_ITEM) {
if (!(strcmp (name, "uid") == 0 || strcmp (name, "gid") == 0)) {
*error_string = "uidgid: Only uid and gid are allowed items";
return 0;
}
}
return 1;
}
static int read_uidgid_files_into_objdb(
struct objdb_iface_ver0 *objdb,
const char **error_string)
{
FILE *fp;
const char *dirname;
DIR *dp;
struct dirent *dirent;
struct dirent *entry;
char filename[PATH_MAX + FILENAME_MAX + 1];
int res = 0;
size_t len;
int return_code;
struct stat stat_buf;
dirname = COROSYSCONFDIR "/uidgid.d";
dp = opendir (dirname);
if (dp == NULL)
return 0;
len = offsetof(struct dirent, d_name) + NAME_MAX + 1;
entry = malloc(len);
if (entry == NULL) {
res = 0;
goto error_exit;
}
for (return_code = readdir_r(dp, entry, &dirent);
dirent != NULL && return_code == 0;
return_code = readdir_r(dp, entry, &dirent)) {
snprintf(filename, sizeof (filename), "%s/%s", dirname, dirent->d_name);
stat (filename, &stat_buf);
if (S_ISREG(stat_buf.st_mode)) {
fp = fopen (filename, "r");
if (fp == NULL) continue;
res = parse_section(fp, objdb, OBJECT_PARENT_HANDLE, error_string, parser_check_item_uidgid);
fclose (fp);
if (res != 0) {
goto error_exit;
}
}
}
error_exit:
free (entry);
closedir(dp);
return res;
}
static int read_service_files_into_objdb(
struct objdb_iface_ver0 *objdb,
const char **error_string)
{
FILE *fp;
const char *dirname;
DIR *dp;
struct dirent *dirent;
struct dirent *entry;
char filename[PATH_MAX + FILENAME_MAX + 1];
int res = 0;
struct stat stat_buf;
size_t len;
int return_code;
dirname = COROSYSCONFDIR "/service.d";
dp = opendir (dirname);
if (dp == NULL)
return 0;
len = offsetof(struct dirent, d_name) + NAME_MAX + 1;
entry = malloc(len);
if (entry == NULL) {
res = 0;
goto error_exit;
}
for (return_code = readdir_r(dp, entry, &dirent);
dirent != NULL && return_code == 0;
return_code = readdir_r(dp, entry, &dirent)) {
snprintf(filename, sizeof (filename), "%s/%s", dirname, dirent->d_name);
stat (filename, &stat_buf);
if (S_ISREG(stat_buf.st_mode)) {
fp = fopen (filename, "r");
if (fp == NULL) continue;
res = parse_section(fp, objdb, OBJECT_PARENT_HANDLE, error_string, NULL);
fclose (fp);
if (res != 0) {
goto error_exit;
}
}
}
error_exit:
free (entry);
closedir(dp);
return res;
}
/* Read config file and load into objdb */
static int read_config_file_into_objdb(
struct objdb_iface_ver0 *objdb,
const char **error_string)
{
FILE *fp;
const char *filename;
char *error_reason = error_string_response;
int res;
filename = getenv ("COROSYNC_MAIN_CONFIG_FILE");
if (!filename)
filename = COROSYSCONFDIR "/corosync.conf";
fp = fopen (filename, "r");
if (fp == NULL) {
char error_str[100];
- const char *error_ptr;
- LOGSYS_STRERROR_R (error_ptr, errno, error_str, sizeof(error_str));
+ const char *error_ptr = qb_strerror_r(errno, error_str, sizeof(error_str));
snprintf (error_reason, sizeof(error_string_response),
"Can't read file %s reason = (%s)\n",
filename, error_ptr);
*error_string = error_reason;
return -1;
}
res = parse_section(fp, objdb, OBJECT_PARENT_HANDLE, error_string, NULL);
fclose(fp);
if (res == 0) {
res = read_uidgid_files_into_objdb(objdb, error_string);
}
if (res == 0) {
res = read_service_files_into_objdb(objdb, error_string);
}
if (res == 0) {
snprintf (error_reason, sizeof(error_string_response),
"Successfully read main configuration file '%s'.\n", filename);
*error_string = error_reason;
}
return res;
}
/*
* Dynamic Loader definition
*/
struct config_iface_ver0 aisparser_iface_ver0 = {
.config_readconfig = aisparser_readconfig
};
struct lcr_iface corosync_aisparser_ver0[1] = {
{
.name = "corosync_parser",
.version = 0,
.versions_replace = 0,
.versions_replace_count = 0,
.dependencies = 0,
.dependency_count = 0,
.constructor = NULL,
.destructor = NULL,
.interfaces = NULL,
}
};
struct corosync_service_handler *aisparser_get_handler_ver0 (void);
struct lcr_comp aisparser_comp_ver0 = {
.iface_count = 1,
.ifaces = corosync_aisparser_ver0
};
#ifdef COROSYNC_SOLARIS
void corosync_lcr_component_register (void);
void corosync_lcr_component_register (void) {
#else
__attribute__ ((constructor)) static void corosync_lcr_component_register (void) {
#endif
lcr_interfaces_set (&corosync_aisparser_ver0[0], &aisparser_iface_ver0);
lcr_component_register (&aisparser_comp_ver0);
}
diff --git a/exec/ipc_glue.c b/exec/ipc_glue.c
index 60865805..d6ca20d2 100644
--- a/exec/ipc_glue.c
+++ b/exec/ipc_glue.c
@@ -1,928 +1,902 @@
/*
* Copyright (c) 2010 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Angus Salkeld <asalkeld@redhat.com>
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of Red Hat, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <config.h>
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
#include <assert.h>
#include <sys/uio.h>
#include <string.h>
#include <qb/qbdefs.h>
#include <qb/qblist.h>
#include <qb/qbutil.h>
#include <qb/qbloop.h>
#include <qb/qbipcs.h>
#include <corosync/swab.h>
#include <corosync/corotypes.h>
#include <corosync/corodefs.h>
#include <corosync/totem/totempg.h>
#include <corosync/engine/objdb.h>
#include <corosync/engine/config.h>
#include <corosync/engine/logsys.h>
#include "mainconfig.h"
#include "sync.h"
#include "syncv2.h"
#include "timer.h"
#include "main.h"
#include "util.h"
#include "apidef.h"
#include "service.h"
LOGSYS_DECLARE_SUBSYS ("MAIN");
static struct corosync_api_v1 *api = NULL;
-static int ipc_subsys_id = -1;
static int32_t ipc_not_enough_fds_left = 0;
static int32_t ipc_fc_is_quorate; /* boolean */
static int32_t ipc_fc_totem_queue_level; /* percentage used */
static int32_t ipc_fc_sync_in_process; /* boolean */
static qb_handle_t object_connection_handle;
struct cs_ipcs_mapper {
int32_t id;
qb_ipcs_service_t *inst;
char name[256];
};
struct outq_item {
void *msg;
size_t mlen;
struct list_head list;
};
static struct cs_ipcs_mapper ipcs_mapper[SERVICE_HANDLER_MAXIMUM_COUNT];
static int32_t cs_ipcs_job_add(enum qb_loop_priority p, void *data, qb_loop_job_dispatch_fn fn);
static int32_t cs_ipcs_dispatch_add(enum qb_loop_priority p, int32_t fd, int32_t events,
void *data, qb_ipcs_dispatch_fn_t fn);
static int32_t cs_ipcs_dispatch_mod(enum qb_loop_priority p, int32_t fd, int32_t events,
void *data, qb_ipcs_dispatch_fn_t fn);
static int32_t cs_ipcs_dispatch_del(int32_t fd);
static struct qb_ipcs_poll_handlers corosync_poll_funcs = {
.job_add = cs_ipcs_job_add,
.dispatch_add = cs_ipcs_dispatch_add,
.dispatch_mod = cs_ipcs_dispatch_mod,
.dispatch_del = cs_ipcs_dispatch_del,
};
static int32_t cs_ipcs_connection_accept (qb_ipcs_connection_t *c, uid_t euid, gid_t egid);
static void cs_ipcs_connection_created(qb_ipcs_connection_t *c);
static int32_t cs_ipcs_msg_process(qb_ipcs_connection_t *c,
void *data, size_t size);
static int32_t cs_ipcs_connection_closed (qb_ipcs_connection_t *c);
static void cs_ipcs_connection_destroyed (qb_ipcs_connection_t *c);
static struct qb_ipcs_service_handlers corosync_service_funcs = {
.connection_accept = cs_ipcs_connection_accept,
.connection_created = cs_ipcs_connection_created,
.msg_process = cs_ipcs_msg_process,
.connection_closed = cs_ipcs_connection_closed,
.connection_destroyed = cs_ipcs_connection_destroyed,
};
static const char* cs_ipcs_serv_short_name(int32_t service_id)
{
const char *name;
switch (service_id) {
case EVS_SERVICE:
name = "evs";
break;
case CLM_SERVICE:
name = "saClm";
break;
case AMF_SERVICE:
name = "saAmf";
break;
case CKPT_SERVICE:
name = "saCkpt";
break;
case EVT_SERVICE:
name = "saEvt";
break;
case LCK_SERVICE:
name = "saLck";
break;
case MSG_SERVICE:
name = "saMsg";
break;
case CFG_SERVICE:
name = "cfg";
break;
case CPG_SERVICE:
name = "cpg";
break;
case CMAN_SERVICE:
name = "cman";
break;
case PCMK_SERVICE:
name = "pacemaker.engine";
break;
case CONFDB_SERVICE:
name = "confdb";
break;
case QUORUM_SERVICE:
name = "quorum";
break;
case PLOAD_SERVICE:
name = "pload";
break;
case TMR_SERVICE:
name = "saTmr";
break;
case VOTEQUORUM_SERVICE:
name = "votequorum";
break;
case NTF_SERVICE:
name = "saNtf";
break;
case AMF_V2_SERVICE:
name = "saAmfV2";
break;
case TST_SV1_SERVICE:
name = "tst";
break;
case TST_SV2_SERVICE:
name = "tst2";
break;
case MON_SERVICE:
name = "mon";
break;
case WD_SERVICE:
name = "wd";
break;
default:
name = NULL;
break;
}
return name;
}
int32_t cs_ipcs_service_destroy(int32_t service_id)
{
if (ipcs_mapper[service_id].inst) {
qb_ipcs_destroy(ipcs_mapper[service_id].inst);
ipcs_mapper[service_id].inst = NULL;
}
return 0;
}
static int32_t cs_ipcs_connection_accept (qb_ipcs_connection_t *c, uid_t euid, gid_t egid)
{
struct list_head *iter;
int32_t service = qb_ipcs_service_id_get(c);
if (ais_service[service] == NULL ||
ais_service_exiting[service] ||
ipcs_mapper[service].inst == NULL) {
return -ENOSYS;
}
if (ipc_not_enough_fds_left) {
return -EMFILE;
}
if (euid == 0 || egid == 0) {
return 0;
}
for (iter = uidgid_list_head.next; iter != &uidgid_list_head;
iter = iter->next) {
struct uidgid_item *ugi = qb_list_entry (iter, struct uidgid_item,
list);
if (euid == ugi->uid || egid == ugi->gid)
return 0;
}
log_printf(LOGSYS_LEVEL_ERROR, "Denied connection attempt from %d:%d", euid, egid);
return -EACCES;
}
static char * pid_to_name (pid_t pid, char *out_name, size_t name_len)
{
char *name;
char *rest;
FILE *fp;
char fname[32];
char buf[256];
snprintf (fname, 32, "/proc/%d/stat", pid);
fp = fopen (fname, "r");
if (!fp) {
return NULL;
}
if (fgets (buf, sizeof (buf), fp) == NULL) {
fclose (fp);
return NULL;
}
fclose (fp);
name = strrchr (buf, '(');
if (!name) {
return NULL;
}
/* move past the bracket */
name++;
rest = strrchr (buf, ')');
if (rest == NULL || rest[1] != ' ') {
return NULL;
}
*rest = '\0';
/* move past the NULL and space */
rest += 2;
/* copy the name */
strncpy (out_name, name, name_len);
out_name[name_len - 1] = '\0';
return out_name;
}
struct cs_ipcs_conn_context {
qb_handle_t stats_handle;
struct list_head outq_head;
int32_t queuing;
uint32_t queued;
uint64_t invalid_request;
uint64_t overload;
uint32_t sent;
char data[1];
};
static void cs_ipcs_connection_created(qb_ipcs_connection_t *c)
{
int32_t service = 0;
uint32_t zero_32 = 0;
uint64_t zero_64 = 0;
unsigned int key_incr_dummy;
qb_handle_t object_handle;
struct cs_ipcs_conn_context *context;
char conn_name[42];
char proc_name[32];
struct qb_ipcs_connection_stats stats;
int32_t size = sizeof(struct cs_ipcs_conn_context);
log_printf(LOG_INFO, "%s() new connection", __func__);
service = qb_ipcs_service_id_get(c);
size += ais_service[service]->private_data_size;
context = calloc(1, size);
list_init(&context->outq_head);
context->queuing = QB_FALSE;
context->queued = 0;
context->sent = 0;
qb_ipcs_context_set(c, context);
ais_service[service]->lib_init_fn(c);
api->object_key_increment (object_connection_handle,
"active", strlen("active"),
&key_incr_dummy);
qb_ipcs_connection_stats_get(c, &stats, QB_FALSE);
if (stats.client_pid > 0) {
if (pid_to_name (stats.client_pid, proc_name, sizeof(proc_name))) {
snprintf (conn_name,
sizeof(conn_name),
"%s:%d:%p", proc_name,
stats.client_pid, c);
} else {
snprintf (conn_name,
sizeof(conn_name),
"%d:%p",
stats.client_pid, c);
}
} else {
snprintf (conn_name,
sizeof(conn_name),
"%p", c);
}
api->object_create (object_connection_handle,
&object_handle,
conn_name,
strlen (conn_name));
context->stats_handle = object_handle;
api->object_key_create_typed (object_handle,
"service_id",
&zero_32, sizeof (zero_32),
OBJDB_VALUETYPE_UINT32);
api->object_key_create_typed (object_handle,
"client_pid",
&zero_32, sizeof (zero_32),
OBJDB_VALUETYPE_INT32);
api->object_key_create_typed (object_handle,
"responses",
&zero_64, sizeof (zero_64),
OBJDB_VALUETYPE_UINT64);
api->object_key_create_typed (object_handle,
"dispatched",
&zero_64, sizeof (zero_64),
OBJDB_VALUETYPE_UINT64);
api->object_key_create_typed (object_handle,
"requests",
&zero_64, sizeof (zero_64),
OBJDB_VALUETYPE_INT64);
api->object_key_create_typed (object_handle,
"send_retries",
&zero_64, sizeof (zero_64),
OBJDB_VALUETYPE_UINT64);
api->object_key_create_typed (object_handle,
"recv_retries",
&zero_64, sizeof (zero_64),
OBJDB_VALUETYPE_UINT64);
api->object_key_create_typed (object_handle,
"flow_control",
&zero_32, sizeof (zero_32),
OBJDB_VALUETYPE_UINT32);
api->object_key_create_typed (object_handle,
"flow_control_count",
&zero_64, sizeof (zero_64),
OBJDB_VALUETYPE_UINT64);
api->object_key_create_typed (object_handle,
"queue_size",
&zero_32, sizeof (zero_32),
OBJDB_VALUETYPE_UINT32);
api->object_key_create_typed (object_handle,
"invalid_request",
&zero_64, sizeof (zero_64),
OBJDB_VALUETYPE_UINT64);
api->object_key_create_typed (object_handle,
"overload",
&zero_64, sizeof (zero_64),
OBJDB_VALUETYPE_UINT64);
}
void cs_ipc_refcnt_inc(void *conn)
{
qb_ipcs_connection_ref(conn);
}
void cs_ipc_refcnt_dec(void *conn)
{
qb_ipcs_connection_unref(conn);
}
void *cs_ipcs_private_data_get(void *conn)
{
struct cs_ipcs_conn_context *cnx;
cnx = qb_ipcs_context_get(conn);
return &cnx->data[0];
}
static void cs_ipcs_connection_destroyed (qb_ipcs_connection_t *c)
{
struct cs_ipcs_conn_context *context;
struct list_head *list, *list_next;
struct outq_item *outq_item;
log_printf(LOG_INFO, "%s() ", __func__);
context = qb_ipcs_context_get(c);
if (context) {
for (list = context->outq_head.next;
list != &context->outq_head; list = list_next) {
list_next = list->next;
outq_item = list_entry (list, struct outq_item, list);
list_del (list);
free (outq_item->msg);
free (outq_item);
}
free(context);
}
}
static int32_t cs_ipcs_connection_closed (qb_ipcs_connection_t *c)
{
struct cs_ipcs_conn_context *cnx;
unsigned int key_incr_dummy;
int32_t res = 0;
int32_t service = qb_ipcs_service_id_get(c);
log_printf(LOG_INFO, "%s() ", __func__);
res = ais_service[service]->lib_exit_fn(c);
if (res != 0) {
return res;
}
cnx = qb_ipcs_context_get(c);
api->object_destroy (cnx->stats_handle);
api->object_key_increment (object_connection_handle,
"closed", strlen("closed"),
&key_incr_dummy);
api->object_key_decrement (object_connection_handle,
"active", strlen("active"),
&key_incr_dummy);
return 0;
}
int cs_ipcs_response_iov_send (void *conn,
const struct iovec *iov,
unsigned int iov_len)
{
int32_t rc = qb_ipcs_response_sendv(conn, iov, iov_len);
if (rc >= 0) {
return 0;
}
return rc;
}
int cs_ipcs_response_send(void *conn, const void *msg, size_t mlen)
{
int32_t rc = qb_ipcs_response_send(conn, msg, mlen);
if (rc >= 0) {
return 0;
}
return rc;
}
static void outq_flush (void *data)
{
qb_ipcs_connection_t *conn = data;
struct list_head *list, *list_next;
struct outq_item *outq_item;
int32_t rc;
struct cs_ipcs_conn_context *context = qb_ipcs_context_get(conn);
for (list = context->outq_head.next;
list != &context->outq_head; list = list_next) {
list_next = list->next;
outq_item = list_entry (list, struct outq_item, list);
rc = qb_ipcs_event_send(conn, outq_item->msg, outq_item->mlen);
if (rc != outq_item->mlen) {
break;
}
context->sent++;
context->queued--;
list_del (list);
free (outq_item->msg);
free (outq_item);
}
if (list_empty (&context->outq_head)) {
context->queuing = QB_FALSE;
log_printf(LOGSYS_LEVEL_INFO, "Q empty, queued:%d sent:%d.",
context->queued, context->sent);
context->queued = 0;
context->sent = 0;
return;
}
qb_loop_job_add(cs_poll_handle_get(), QB_LOOP_HIGH, conn, outq_flush);
if (rc < 0 && rc != -EAGAIN) {
log_printf(LOGSYS_LEVEL_ERROR, "event_send retuned %d!", rc);
}
}
static void msg_send_or_queue(qb_ipcs_connection_t *conn, const struct iovec *iov, uint32_t iov_len)
{
int32_t rc = 0;
int32_t i;
int32_t bytes_msg = 0;
struct outq_item *outq_item;
char *write_buf = 0;
struct cs_ipcs_conn_context *context = qb_ipcs_context_get(conn);
for (i = 0; i < iov_len; i++) {
bytes_msg += iov[i].iov_len;
}
if (!context->queuing) {
assert(list_empty (&context->outq_head));
rc = qb_ipcs_event_sendv(conn, iov, iov_len);
if (rc == bytes_msg) {
context->sent++;
return;
}
if (rc == -EAGAIN) {
context->queued = 0;
context->sent = 0;
context->queuing = QB_TRUE;
qb_loop_job_add(cs_poll_handle_get(), QB_LOOP_HIGH, conn, outq_flush);
} else {
log_printf(LOGSYS_LEVEL_ERROR, "event_send retuned %d, expected %d!", rc, bytes_msg);
return;
}
}
outq_item = malloc (sizeof (struct outq_item));
if (outq_item == NULL) {
qb_ipcs_disconnect(conn);
return;
}
outq_item->msg = malloc (bytes_msg);
if (outq_item->msg == NULL) {
free (outq_item);
qb_ipcs_disconnect(conn);
return;
}
write_buf = outq_item->msg;
for (i = 0; i < iov_len; i++) {
memcpy (write_buf, iov[i].iov_base, iov[i].iov_len);
write_buf += iov[i].iov_len;
}
outq_item->mlen = bytes_msg;
list_init (&outq_item->list);
list_add_tail (&outq_item->list, &context->outq_head);
context->queued++;
}
int cs_ipcs_dispatch_send(void *conn, const void *msg, size_t mlen)
{
struct iovec iov;
iov.iov_base = (void *)msg;
iov.iov_len = mlen;
msg_send_or_queue (conn, &iov, 1);
return 0;
}
int cs_ipcs_dispatch_iov_send (void *conn,
const struct iovec *iov,
unsigned int iov_len)
{
msg_send_or_queue(conn, iov, iov_len);
return 0;
}
static int32_t cs_ipcs_msg_process(qb_ipcs_connection_t *c,
void *data, size_t size)
{
struct qb_ipc_response_header response;
struct qb_ipc_request_header *request_pt = (struct qb_ipc_request_header *)data;
int32_t service = qb_ipcs_service_id_get(c);
int32_t send_ok = 0;
int32_t is_async_call = QB_FALSE;
ssize_t res = -1;
int sending_allowed_private_data;
struct cs_ipcs_conn_context *cnx;
send_ok = corosync_sending_allowed (service,
request_pt->id,
request_pt,
&sending_allowed_private_data);
is_async_call = (service == CPG_SERVICE && request_pt->id == 2);
/*
* This happens when the message contains some kind of invalid
* parameter, such as an invalid size
*/
if (send_ok == -EINVAL) {
response.size = sizeof (response);
response.id = 0;
response.error = CS_ERR_INVALID_PARAM;
cnx = qb_ipcs_context_get(c);
if (cnx) {
cnx->invalid_request++;
}
if (is_async_call) {
log_printf(LOGSYS_LEVEL_INFO, "*** %s() invalid message! size:%d error:%d",
__func__, response.size, response.error);
} else {
qb_ipcs_response_send (c,
&response,
sizeof (response));
}
res = -EINVAL;
} else if (send_ok < 0) {
cnx = qb_ipcs_context_get(c);
if (cnx) {
cnx->overload++;
}
if (!is_async_call) {
/*
* Overload, tell library to retry
*/
response.size = sizeof (response);
response.id = 0;
response.error = CS_ERR_TRY_AGAIN;
qb_ipcs_response_send (c,
&response,
sizeof (response));
} else {
log_printf(LOGSYS_LEVEL_WARNING,
"*** %s() (%d:%d - %d) %s!",
__func__, service, request_pt->id,
is_async_call, strerror(-send_ok));
}
res = -ENOBUFS;
}
if (send_ok) {
ais_service[service]->lib_engine[request_pt->id].lib_handler_fn(c, request_pt);
res = 0;
}
corosync_sending_allowed_release (&sending_allowed_private_data);
return res;
}
static int32_t cs_ipcs_job_add(enum qb_loop_priority p, void *data, qb_loop_job_dispatch_fn fn)
{
return qb_loop_job_add(cs_poll_handle_get(), p, data, fn);
}
static int32_t cs_ipcs_dispatch_add(enum qb_loop_priority p, int32_t fd, int32_t events,
void *data, qb_ipcs_dispatch_fn_t fn)
{
return qb_loop_poll_add(cs_poll_handle_get(), p, fd, events, data, fn);
}
static int32_t cs_ipcs_dispatch_mod(enum qb_loop_priority p, int32_t fd, int32_t events,
void *data, qb_ipcs_dispatch_fn_t fn)
{
return qb_loop_poll_mod(cs_poll_handle_get(), p, fd, events, data, fn);
}
static int32_t cs_ipcs_dispatch_del(int32_t fd)
{
return qb_loop_poll_del(cs_poll_handle_get(), fd);
}
static void cs_ipcs_low_fds_event(int32_t not_enough, int32_t fds_available)
{
ipc_not_enough_fds_left = not_enough;
if (not_enough) {
log_printf(LOGSYS_LEVEL_WARNING, "refusing new connections (fds_available:%d)\n",
fds_available);
} else {
log_printf(LOGSYS_LEVEL_NOTICE, "allowing new connections (fds_available:%d)\n",
fds_available);
}
}
int32_t cs_ipcs_q_level_get(void)
{
return ipc_fc_totem_queue_level;
}
static qb_loop_timer_handle ipcs_check_for_flow_control_timer;
static void cs_ipcs_check_for_flow_control(void)
{
int32_t i;
int32_t fc_enabled;
for (i = 0; i < SERVICE_HANDLER_MAXIMUM_COUNT; i++) {
if (ais_service[i] == NULL || ipcs_mapper[i].inst == NULL) {
continue;
}
fc_enabled = QB_TRUE;
if (ipc_fc_is_quorate == 1 ||
ais_service[i]->allow_inquorate == CS_LIB_ALLOW_INQUORATE) {
/*
* we are quorate
* now check flow control
*/
if (ipc_fc_totem_queue_level != TOTEM_Q_LEVEL_CRITICAL &&
ipc_fc_sync_in_process == 0) {
fc_enabled = QB_FALSE;
}
}
if (fc_enabled) {
qb_ipcs_request_rate_limit(ipcs_mapper[i].inst, QB_IPCS_RATE_OFF);
qb_loop_timer_add(cs_poll_handle_get(), QB_LOOP_MED, 1*QB_TIME_NS_IN_MSEC,
NULL, corosync_recheck_the_q_level, &ipcs_check_for_flow_control_timer);
} else if (ipc_fc_totem_queue_level == TOTEM_Q_LEVEL_LOW) {
qb_ipcs_request_rate_limit(ipcs_mapper[i].inst, QB_IPCS_RATE_FAST);
} else if (ipc_fc_totem_queue_level == TOTEM_Q_LEVEL_GOOD) {
qb_ipcs_request_rate_limit(ipcs_mapper[i].inst, QB_IPCS_RATE_NORMAL);
} else if (ipc_fc_totem_queue_level == TOTEM_Q_LEVEL_HIGH) {
qb_ipcs_request_rate_limit(ipcs_mapper[i].inst, QB_IPCS_RATE_SLOW);
}
}
}
static void cs_ipcs_fc_quorum_changed(int quorate, void *context)
{
ipc_fc_is_quorate = quorate;
cs_ipcs_check_for_flow_control();
}
static void cs_ipcs_totem_queue_level_changed(enum totem_q_level level)
{
ipc_fc_totem_queue_level = level;
cs_ipcs_check_for_flow_control();
}
void cs_ipcs_sync_state_changed(int32_t sync_in_process)
{
ipc_fc_sync_in_process = sync_in_process;
cs_ipcs_check_for_flow_control();
}
-static void cs_ipcs_libqb_log_fn(const char *file_name,
- int32_t file_line,
- int32_t severity,
- const char *msg)
-{
- int32_t level = severity;
- if (severity > LOG_DEBUG) {
- level = LOGSYS_LEVEL_DEBUG;
- }
-
- _logsys_log_printf (LOGSYS_ENCODE_RECID(level,
- ipc_subsys_id,
- LOGSYS_RECID_LOG),
- __func__, file_name, file_line, "%s", msg);
-}
-
void cs_ipcs_stats_update(void)
{
int32_t i;
struct qb_ipcs_stats srv_stats;
struct qb_ipcs_connection_stats stats;
qb_ipcs_connection_t *c;
struct cs_ipcs_conn_context *cnx;
for (i = 0; i < SERVICE_HANDLER_MAXIMUM_COUNT; i++) {
if (ais_service[i] == NULL || ipcs_mapper[i].inst == NULL) {
continue;
}
qb_ipcs_stats_get(ipcs_mapper[i].inst, &srv_stats, QB_FALSE);
for (c = qb_ipcs_connection_first_get(ipcs_mapper[i].inst); c;
c = qb_ipcs_connection_next_get(ipcs_mapper[i].inst, c)) {
cnx = qb_ipcs_context_get(c);
if (cnx == NULL) continue;
qb_ipcs_connection_stats_get(c, &stats, QB_FALSE);
api->object_key_replace(cnx->stats_handle,
"client_pid", strlen("client_pid"),
&stats.client_pid, sizeof(uint32_t));
api->object_key_replace(cnx->stats_handle,
"requests", strlen("requests"),
&stats.requests, sizeof(uint64_t));
api->object_key_replace(cnx->stats_handle,
"responses", strlen("responses"),
&stats.responses, sizeof(uint64_t));
api->object_key_replace(cnx->stats_handle,
"dispatched", strlen("dispatched"),
&stats.events, sizeof(uint64_t));
api->object_key_replace(cnx->stats_handle,
"send_retries", strlen("send_retries"),
&stats.send_retries, sizeof(uint64_t));
api->object_key_replace(cnx->stats_handle,
"recv_retries", strlen("recv_retries"),
&stats.recv_retries, sizeof(uint64_t));
api->object_key_replace(cnx->stats_handle,
"flow_control", strlen("flow_control"),
&stats.flow_control_state, sizeof(uint32_t));
api->object_key_replace(cnx->stats_handle,
"flow_control_count", strlen("flow_control_count"),
&stats.flow_control_count, sizeof(uint64_t));
api->object_key_replace(cnx->stats_handle,
"queue_size", strlen("queue_size"),
&cnx->queued, sizeof(uint32_t));
api->object_key_replace(cnx->stats_handle,
"invalid_request", strlen("invalid_request"),
&cnx->invalid_request, sizeof(uint64_t));
api->object_key_replace(cnx->stats_handle,
"overload", strlen("overload"),
&cnx->overload, sizeof(uint64_t));
qb_ipcs_connection_unref(c);
}
}
}
void cs_ipcs_service_init(struct corosync_service_engine *service)
{
if (service->lib_engine_count == 0) {
log_printf (LOGSYS_LEVEL_DEBUG,
"NOT Initializing IPC on %s [%d]",
cs_ipcs_serv_short_name(service->id),
service->id);
return;
}
ipcs_mapper[service->id].id = service->id;
strcpy(ipcs_mapper[service->id].name, cs_ipcs_serv_short_name(service->id));
log_printf (LOGSYS_LEVEL_DEBUG,
"Initializing IPC on %s [%d]",
ipcs_mapper[service->id].name,
ipcs_mapper[service->id].id);
ipcs_mapper[service->id].inst = qb_ipcs_create(ipcs_mapper[service->id].name,
ipcs_mapper[service->id].id,
QB_IPC_SHM,
&corosync_service_funcs);
assert(ipcs_mapper[service->id].inst);
qb_ipcs_poll_handlers_set(ipcs_mapper[service->id].inst,
&corosync_poll_funcs);
qb_ipcs_run(ipcs_mapper[service->id].inst);
}
void cs_ipcs_init(void)
{
qb_handle_t object_find_handle;
qb_handle_t object_runtime_handle;
uint64_t zero_64 = 0;
api = apidef_get ();
qb_loop_poll_low_fds_event_set(cs_poll_handle_get(), cs_ipcs_low_fds_event);
- ipc_subsys_id = _logsys_subsys_create ("IPC");
- if (ipc_subsys_id < 0) {
- log_printf (LOGSYS_LEVEL_ERROR,
- "Could not initialize IPC logging subsystem\n");
- corosync_exit_error (AIS_DONE_INIT_SERVICES);
- }
-
- qb_util_set_log_function (cs_ipcs_libqb_log_fn);
-
api->quorum_register_callback (cs_ipcs_fc_quorum_changed, NULL);
totempg_queue_level_register_callback (cs_ipcs_totem_queue_level_changed);
api->object_find_create (OBJECT_PARENT_HANDLE,
"runtime", strlen ("runtime"),
&object_find_handle);
if (api->object_find_next (object_find_handle,
&object_runtime_handle) != 0) {
log_printf (LOGSYS_LEVEL_ERROR,"arrg no runtime");
return;
}
/* Connection objects */
api->object_create (object_runtime_handle,
&object_connection_handle,
"connections", strlen ("connections"));
api->object_key_create_typed (object_connection_handle,
"active", &zero_64, sizeof (zero_64),
OBJDB_VALUETYPE_UINT64);
api->object_key_create_typed (object_connection_handle,
"closed", &zero_64, sizeof (zero_64),
OBJDB_VALUETYPE_UINT64);
}
diff --git a/exec/logsys.c b/exec/logsys.c
index cef745cd..ec30c83e 100644
--- a/exec/logsys.c
+++ b/exec/logsys.c
@@ -1,1674 +1,725 @@
/*
* Copyright (c) 2002-2004 MontaVista Software, Inc.
* Copyright (c) 2006-2010 Red Hat, Inc.
*
* Author: Steven Dake (sdake@redhat.com)
* Author: Lon Hohberger (lhh@redhat.com)
* Author: Fabio M. Di Nitto (fdinitto@redhat.com)
*
* All rights reserved.
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <config.h>
#include <stdint.h>
-#include <stdio.h>
#include <ctype.h>
+#include <assert.h>
+#include <stdio.h>
#include <string.h>
-#include <stdarg.h>
-#include <sys/time.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <time.h>
-#include <errno.h>
-#include <sys/types.h>
-#include <sys/socket.h>
-#include <unistd.h>
-#if defined(COROSYNC_LINUX)
-#include <linux/un.h>
-#endif
-#if defined(COROSYNC_BSD) || defined(COROSYNC_DARWIN)
-#include <sys/un.h>
-#endif
-#include <syslog.h>
-#include <stdlib.h>
-#include <pthread.h>
-#include <limits.h>
-#include <sys/mman.h>
-#include <semaphore.h>
+
+#include <qb/qbdefs.h>
+#include <qb/qbutil.h>
+#include <qb/qblog.h>
#include <corosync/list.h>
#include <corosync/engine/logsys.h>
-#include "util.h"
-
-#define YIELD_AFTER_LOG_OPS 10
-
-#define MIN(x,y) ((x) < (y) ? (x) : (y))
-
-#define ROUNDUP(x, y) ((((x) + ((y) - 1)) / (y)) * (y))
-
/*
* syslog prioritynames, facility names to value mapping
* Some C libraries build this in to their headers, but it is non-portable
* so logsys supplies its own version.
*/
struct syslog_names {
const char *c_name;
int c_val;
};
-struct syslog_names prioritynames[] =
+static struct syslog_names prioritynames[] =
{
{ "alert", LOG_ALERT },
{ "crit", LOG_CRIT },
{ "debug", LOG_DEBUG },
{ "emerg", LOG_EMERG },
{ "err", LOG_ERR },
{ "error", LOG_ERR },
{ "info", LOG_INFO },
{ "notice", LOG_NOTICE },
{ "warning", LOG_WARNING },
{ NULL, -1 }
};
-struct syslog_names facilitynames[] =
+static struct syslog_names facilitynames[] =
{
{ "auth", LOG_AUTH },
{ "cron", LOG_CRON },
{ "daemon", LOG_DAEMON },
{ "kern", LOG_KERN },
{ "lpr", LOG_LPR },
{ "mail", LOG_MAIL },
{ "news", LOG_NEWS },
{ "syslog", LOG_SYSLOG },
{ "user", LOG_USER },
{ "uucp", LOG_UUCP },
{ "local0", LOG_LOCAL0 },
{ "local1", LOG_LOCAL1 },
{ "local2", LOG_LOCAL2 },
{ "local3", LOG_LOCAL3 },
{ "local4", LOG_LOCAL4 },
{ "local5", LOG_LOCAL5 },
{ "local6", LOG_LOCAL6 },
{ "local7", LOG_LOCAL7 },
{ NULL, -1 }
};
-struct record {
- unsigned int rec_ident;
- const char *file_name;
- const char *function_name;
- int file_line;
- char *buffer;
- struct list_head list;
-};
-
+#define MAX_FILES_PER_SUBSYS 16
+
/*
* need unlogical order to preserve 64bit alignment
*/
struct logsys_logger {
char subsys[LOGSYS_MAX_SUBSYS_NAMELEN]; /* subsystem name */
char *logfile; /* log to file */
- FILE *logfile_fp; /* track file descriptor */
unsigned int mode; /* subsystem mode */
unsigned int debug; /* debug on|off */
- int syslog_facility; /* facility */
int syslog_priority; /* priority */
int logfile_priority; /* priority to file */
int init_status; /* internal field to handle init queues
for subsystems */
+ int32_t target_id;
+ char *files[MAX_FILES_PER_SUBSYS];
+ int32_t file_idx;
+ int32_t dirty;
};
-
-/*
- * These are not static so they can be read from the core file
- */
-int *flt_data;
-
-uint32_t flt_head;
-
-uint32_t flt_tail;
-
-unsigned int flt_data_size;
-
-#define COMBINE_BUFFER_SIZE 2048
-
/* values for logsys_logger init_status */
#define LOGSYS_LOGGER_INIT_DONE 0
#define LOGSYS_LOGGER_NEEDS_INIT 1
static int logsys_system_needs_init = LOGSYS_LOGGER_NEEDS_INIT;
-static int logsys_memory_used = 0;
-
-static int logsys_sched_param_queued = 0;
-
-static int logsys_sched_policy;
-
-static struct sched_param logsys_sched_param;
-
-static int logsys_after_log_ops_yield = 10;
-
static struct logsys_logger logsys_loggers[LOGSYS_MAX_SUBSYS_COUNT + 1];
-static int wthread_active = 0;
-
-static int wthread_should_exit = 0;
-
static pthread_mutex_t logsys_config_mutex = PTHREAD_MUTEX_INITIALIZER;
-static unsigned int records_written = 1;
-
-static pthread_t logsys_thread_id;
-
-static sem_t logsys_thread_start;
-
-static sem_t logsys_print_finished;
-
-static pthread_mutex_t logsys_flt_mutex = PTHREAD_MUTEX_INITIALIZER;
-
-static pthread_mutex_t logsys_wthread_mutex = PTHREAD_MUTEX_INITIALIZER;
-
-static int logsys_buffer_full = 0;
+static int32_t _logsys_config_mode_set_unlocked(int32_t subsysid, uint32_t new_mode);
+static void _logsys_config_apply_per_file(int32_t s, const char *filename);
+static void _logsys_config_apply_per_subsys(int32_t s);
static char *format_buffer=NULL;
-static int logsys_dropped_messages = 0;
-
-void *logsys_rec_end;
-
-static DECLARE_LIST_INIT(logsys_print_finished_records);
-
-#define FDMAX_ARGS 64
-
-#define CIRCULAR_BUFFER_WRITE_SIZE 64
-
-/* forward declarations */
-static void logsys_close_logfile(int subsysid);
-
-static uint32_t circular_memory_map (void **buf, size_t bytes)
-{
- void *addr_orig;
- void *addr;
- int fd;
- int res;
- const char *file = "fdata-XXXXXX";
- char path[PATH_MAX];
- char buffer[CIRCULAR_BUFFER_WRITE_SIZE];
- int i;
- int written;
- int error_return = 0;
-
- snprintf (path, PATH_MAX, "/dev/shm/%s", file);
-
- fd = mkstemp (path);
- if (fd == -1) {
- snprintf (path, PATH_MAX, LOCALSTATEDIR "/run/%s", file);
- fd = mkstemp (path);
- if (fd == -1) {
- error_return = -1;
- goto error_exit;
- }
- }
-
- /*
- * ftruncate doesn't return ENOSPC
- * have to use write to determine if shared memory is actually available
- */
- res = ftruncate (fd, 0);
- if (res == -1) {
- error_return = -1;
- goto unlink_exit;
- }
- memset (buffer, 0, sizeof (buffer));
- for (i = 0; i < (bytes / CIRCULAR_BUFFER_WRITE_SIZE); i++) {
-retry_write:
- written = write (fd, buffer, CIRCULAR_BUFFER_WRITE_SIZE);
- if (written == -1 && errno == EINTR) {
- goto retry_write;
- }
- if (written != 64) {
- error_return = -1;
- goto unlink_exit;
- }
- }
-
- addr_orig = mmap (NULL, bytes << 1, PROT_NONE,
- MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
- if (addr_orig == MAP_FAILED) {
- error_return = -1;
- goto unlink_exit;
- }
-
- addr = mmap (addr_orig, bytes, PROT_READ | PROT_WRITE,
- MAP_SHARED | MAP_FIXED, fd, 0);
- if (addr != addr_orig) {
- error_return = -1;
- goto mmap_exit;
- }
- #ifdef COROSYNC_BSD
- madvise(addr_orig, bytes, MADV_NOSYNC);
- #endif
-
- addr = mmap (((char *)addr_orig) + bytes,
- bytes, PROT_READ | PROT_WRITE,
- MAP_SHARED | MAP_FIXED, fd, 0);
- if ((char *)addr != (char *)((char *)addr_orig + bytes)) {
- error_return = -1;
- goto mmap_exit;
- }
-#ifdef COROSYNC_BSD
- madvise(((char *)addr_orig) + bytes, bytes, MADV_NOSYNC);
-#endif
-
- *buf = addr_orig;
- error_return = 0;
- goto unlink_exit;
-
-mmap_exit:
- munmap (addr_orig, bytes << 1);
-unlink_exit:
- unlink (path);
- close (fd);
-error_exit:
- return (error_return);
-}
-
-static void logsys_flt_lock (void)
-{
- pthread_mutex_lock (&logsys_flt_mutex);
-}
-static void logsys_flt_unlock (void)
-{
- pthread_mutex_unlock (&logsys_flt_mutex);
-}
-
-static void logsys_wthread_lock (void)
-{
- pthread_mutex_lock (&logsys_wthread_mutex);
-}
-static void logsys_wthread_unlock (void)
-{
- pthread_mutex_unlock (&logsys_wthread_mutex);
-}
-
-/*
- * Before any write operation, a reclaim on the buffer area must be executed
- */
-static inline void records_reclaim (unsigned int idx, unsigned int words)
-{
- unsigned int should_reclaim;
-
- should_reclaim = 0;
-
- if ((idx + words) >= flt_data_size) {
- logsys_buffer_full = 1;
- }
- if (logsys_buffer_full == 0) {
- return;
- }
-
- if (flt_tail > flt_head) {
- if (idx + words >= flt_tail) {
- should_reclaim = 1;
- }
- } else {
- if ((idx + words) >= (flt_tail + flt_data_size)) {
- should_reclaim = 1;
- }
- }
-
- if (should_reclaim) {
- int words_needed = 0;
-
- words_needed = words + 1;
- do {
- words_needed -= flt_data[flt_tail];
- flt_tail =
- (flt_tail +
- flt_data[flt_tail]) % (flt_data_size);
- } while (words_needed > 0);
- }
-}
-
-#define idx_word_step(idx) \
-do { \
- if (idx > (flt_data_size - 1)) { \
- idx = 0; \
- } \
-} while (0);
-
-#define idx_buffer_step(idx) \
-do { \
- if (idx > (flt_data_size - 1)) { \
- idx = ((idx) % (flt_data_size)); \
- } \
-} while (0);
-
-/*
- * Internal threaded logging implementation
- */
-static inline int strcpy_cutoff (char *dest, const char *src, size_t cutoff,
- size_t buf_len)
-{
- size_t len = strlen (src);
- if (buf_len <= 1) {
- if (buf_len == 0)
- dest[0] = 0;
- return 0;
- }
-
- if (cutoff == 0) {
- cutoff = len;
- }
-
- cutoff = MIN (cutoff, buf_len - 1);
- len = MIN (len, cutoff);
- memcpy (dest, src, len);
- memset (dest + len, ' ', cutoff - len);
- dest[cutoff] = '\0';
-
- return (cutoff);
-}
-
-static const char log_month_name[][4] = {
- "Jan", "Feb", "Mar", "Apr", "May", "Jun",
- "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
-};
-
-/*
- * %s SUBSYSTEM
- * %n FUNCTION NAME
- * %f FILENAME
- * %l FILELINE
- * %p PRIORITY
- * %t TIMESTAMP
- * %b BUFFER
- *
- * any number between % and character specify field length to pad or chop
-*/
-static void log_printf_to_logs (
- unsigned int rec_ident,
- const char *file_name,
- const char *function_name,
- int file_line,
- const char *buffer)
-{
- char normal_output_buffer[COMBINE_BUFFER_SIZE];
- char syslog_output_buffer[COMBINE_BUFFER_SIZE];
- char char_time[128];
- char line_no[30];
- unsigned int format_buffer_idx = 0;
- unsigned int normal_output_buffer_idx = 0;
- unsigned int syslog_output_buffer_idx = 0;
- struct timeval tv;
- size_t cutoff;
- unsigned int normal_len, syslog_len;
- int subsysid;
- unsigned int level;
- int c;
- struct tm tm_res;
-
- if (LOGSYS_DECODE_RECID(rec_ident) != LOGSYS_RECID_LOG) {
- return;
- }
-
- subsysid = LOGSYS_DECODE_SUBSYSID(rec_ident);
- level = LOGSYS_DECODE_LEVEL(rec_ident);
-
- while ((c = format_buffer[format_buffer_idx])) {
- cutoff = 0;
- if (c != '%') {
- normal_output_buffer[normal_output_buffer_idx++] = c;
- syslog_output_buffer[syslog_output_buffer_idx++] = c;
- format_buffer_idx++;
- } else {
- const char *normal_p, *syslog_p;
-
- format_buffer_idx += 1;
- if (isdigit (format_buffer[format_buffer_idx])) {
- cutoff = atoi (&format_buffer[format_buffer_idx]);
- }
- while (isdigit (format_buffer[format_buffer_idx])) {
- format_buffer_idx += 1;
- }
-
- switch (format_buffer[format_buffer_idx]) {
- case 's':
- normal_p = logsys_loggers[subsysid].subsys;
- syslog_p = logsys_loggers[subsysid].subsys;
- break;
-
- case 'n':
- normal_p = function_name;
- syslog_p = function_name;
- break;
-
- case 'f':
- normal_p = file_name;
- syslog_p = file_name;
- break;
-
- case 'l':
- snprintf (line_no, sizeof (line_no), "%d", file_line);
- normal_p = line_no;
- syslog_p = line_no;
- break;
-
- case 't':
- gettimeofday (&tv, NULL);
- (void)localtime_r ((time_t *)&tv.tv_sec, &tm_res);
- snprintf (char_time, sizeof (char_time), "%s %02d %02d:%02d:%02d",
- log_month_name[tm_res.tm_mon], tm_res.tm_mday, tm_res.tm_hour,
- tm_res.tm_min, tm_res.tm_sec);
- normal_p = char_time;
-
- /*
- * syslog does timestamping on its own.
- * also strip extra space in case.
- */
- syslog_p = "";
- break;
-
- case 'b':
- normal_p = buffer;
- syslog_p = buffer;
- break;
-
- case 'p':
- normal_p = logsys_loggers[LOGSYS_MAX_SUBSYS_COUNT].subsys;
- syslog_p = "";
- break;
-
- default:
- normal_p = "";
- syslog_p = "";
- break;
- }
- normal_len = strcpy_cutoff (normal_output_buffer + normal_output_buffer_idx,
- normal_p, cutoff,
- (sizeof (normal_output_buffer)
- - normal_output_buffer_idx));
- normal_output_buffer_idx += normal_len;
- syslog_len = strcpy_cutoff (syslog_output_buffer + syslog_output_buffer_idx,
- syslog_p, cutoff,
- (sizeof (syslog_output_buffer)
- - syslog_output_buffer_idx));
- syslog_output_buffer_idx += syslog_len;
- format_buffer_idx += 1;
- }
- if ((normal_output_buffer_idx >= sizeof (normal_output_buffer) - 2) ||
- (syslog_output_buffer_idx >= sizeof (syslog_output_buffer) - 1)) {
- /* Note: we make allowance for '\0' at the end of
- * both of these arrays and normal_output_buffer also
- * needs a '\n'.
- */
- break;
- }
- }
-
- normal_output_buffer[normal_output_buffer_idx] = '\0';
- syslog_output_buffer[syslog_output_buffer_idx] = '\0';
-
- /*
- * Output to syslog
- */
- if ((logsys_loggers[subsysid].mode & LOGSYS_MODE_OUTPUT_SYSLOG) &&
- ((level <= logsys_loggers[subsysid].syslog_priority) ||
- (logsys_loggers[subsysid].debug != 0))) {
- syslog (level | logsys_loggers[subsysid].syslog_facility, "%s", syslog_output_buffer);
- }
-
- /*
- * Terminate string with \n \0
- */
- normal_output_buffer[normal_output_buffer_idx++] = '\n';
- normal_output_buffer[normal_output_buffer_idx] = '\0';
-
- /*
- * Output to configured file
- */
- if (((logsys_loggers[subsysid].mode & LOGSYS_MODE_OUTPUT_FILE) &&
- (logsys_loggers[subsysid].logfile_fp != NULL)) &&
- ((level <= logsys_loggers[subsysid].logfile_priority) ||
- (logsys_loggers[subsysid].debug != 0))) {
- /*
- * Output to a file
- */
- if ((fwrite (normal_output_buffer, strlen (normal_output_buffer), 1,
- logsys_loggers[subsysid].logfile_fp) < 1) ||
- (fflush (logsys_loggers[subsysid].logfile_fp) == EOF)) {
- char tmpbuffer[1024];
- /*
- * if we are here, it's bad.. it's really really bad.
- * Best thing would be to light a candle in a church
- * and pray.
- */
- snprintf(tmpbuffer, sizeof(tmpbuffer),
- "LOGSYS EMERGENCY: %s Unable to write to %s.",
- logsys_loggers[subsysid].subsys,
- logsys_loggers[subsysid].logfile);
- pthread_mutex_lock (&logsys_config_mutex);
- logsys_close_logfile(subsysid);
- logsys_loggers[subsysid].mode &= ~LOGSYS_MODE_OUTPUT_FILE;
- pthread_mutex_unlock (&logsys_config_mutex);
- log_printf_to_logs(
- LOGSYS_ENCODE_RECID(
- LOGSYS_LEVEL_EMERG,
- subsysid,
- LOGSYS_RECID_LOG),
- __FILE__, __FUNCTION__, __LINE__,
- tmpbuffer);
- }
- }
-
- /*
- * Output to stderr
- */
- if ((logsys_loggers[subsysid].mode & LOGSYS_MODE_OUTPUT_STDERR) &&
- ((level <= logsys_loggers[subsysid].logfile_priority) ||
- (logsys_loggers[subsysid].debug != 0))) {
- if (write (STDERR_FILENO, normal_output_buffer, strlen (normal_output_buffer)) < 0) {
- char tmpbuffer[1024];
- /*
- * if we are here, it's bad.. it's really really bad.
- * Best thing would be to light 20 candles for each saint
- * in the calendar and pray a lot...
- */
- pthread_mutex_lock (&logsys_config_mutex);
- logsys_loggers[subsysid].mode &= ~LOGSYS_MODE_OUTPUT_STDERR;
- pthread_mutex_unlock (&logsys_config_mutex);
- snprintf(tmpbuffer, sizeof(tmpbuffer),
- "LOGSYS EMERGENCY: %s Unable to write to STDERR.",
- logsys_loggers[subsysid].subsys);
- log_printf_to_logs(
- LOGSYS_ENCODE_RECID(
- LOGSYS_LEVEL_EMERG,
- subsysid,
- LOGSYS_RECID_LOG),
- __FILE__, __FUNCTION__, __LINE__,
- tmpbuffer);
- }
- }
-}
-
-static void log_printf_to_logs_wthread (
- unsigned int rec_ident,
- const char *file_name,
- const char *function_name,
- int file_line,
- const char *buffer)
-{
- struct record *rec;
- uint32_t length;
-
- rec = malloc (sizeof (struct record));
- if (rec == NULL) {
- return;
- }
-
- length = strlen (buffer);
-
- rec->rec_ident = rec_ident;
- rec->file_name = file_name;
- rec->function_name = function_name;
- rec->file_line = file_line;
- rec->buffer = malloc (length + 1);
- if (rec->buffer == NULL) {
- free (rec);
- return;
- }
- memcpy (rec->buffer, buffer, length + 1);
-
- list_init (&rec->list);
- logsys_wthread_lock();
- logsys_memory_used += length + 1 + sizeof (struct record);
- if (logsys_memory_used > 512000) {
- free (rec->buffer);
- free (rec);
- logsys_memory_used = logsys_memory_used - length - 1 - sizeof (struct record);
- logsys_dropped_messages += 1;
- logsys_wthread_unlock();
- return;
-
- } else {
- list_add_tail (&rec->list, &logsys_print_finished_records);
- }
- logsys_wthread_unlock();
-
- sem_post (&logsys_print_finished);
-}
-
-static void *logsys_worker_thread (void *data) __attribute__((noreturn));
-static void *logsys_worker_thread (void *data)
-{
- struct record *rec;
- int dropped = 0;
- int res;
-
- /*
- * Signal wthread_create that the initialization process may continue
- */
- sem_post (&logsys_thread_start);
- for (;;) {
- dropped = 0;
-retry_sem_wait:
- res = sem_wait (&logsys_print_finished);
- if (res == -1 && errno == EINTR) {
- goto retry_sem_wait;
- } else
- if (res == -1) {
- /*
- * This case shouldn't happen
- */
- pthread_exit (NULL);
- }
-
-
- logsys_wthread_lock();
- if (wthread_should_exit) {
- int value;
-
- res = sem_getvalue (&logsys_print_finished, &value);
- if (value == 0) {
- logsys_wthread_unlock();
- pthread_exit (NULL);
- }
- }
-
- rec = list_entry (logsys_print_finished_records.next, struct record, list);
- list_del (&rec->list);
- logsys_memory_used = logsys_memory_used - strlen (rec->buffer) -
- sizeof (struct record) - 1;
- dropped = logsys_dropped_messages;
- logsys_dropped_messages = 0;
- logsys_wthread_unlock();
- if (dropped) {
- printf ("%d messages lost\n", dropped);
- }
- log_printf_to_logs (
- rec->rec_ident,
- rec->file_name,
- rec->function_name,
- rec->file_line,
- rec->buffer);
- free (rec->buffer);
- free (rec);
- }
-}
-
-static void wthread_create (void)
-{
- int res;
-
- if (wthread_active) {
- return;
- }
-
- wthread_active = 1;
-
-
- /*
- * TODO: propagate pthread_create errors back to the caller
- */
- res = pthread_create (&logsys_thread_id, NULL,
- logsys_worker_thread, NULL);
- sem_wait (&logsys_thread_start);
-
- if (res == 0) {
- if (logsys_sched_param_queued == 1) {
- /*
- * TODO: propagate logsys_thread_priority_set errors back to
- * the caller
- */
- res = logsys_thread_priority_set (
- logsys_sched_policy,
- &logsys_sched_param,
- logsys_after_log_ops_yield);
- logsys_sched_param_queued = 0;
- }
- } else {
- wthread_active = 0;
- }
-}
-
static int _logsys_config_subsys_get_unlocked (const char *subsys)
{
unsigned int i;
if (!subsys) {
return LOGSYS_MAX_SUBSYS_COUNT;
}
- for (i = 0; i <= LOGSYS_MAX_SUBSYS_COUNT; i++) {
+ for (i = 0; i <= LOGSYS_MAX_SUBSYS_COUNT; i++) {
if (strcmp (logsys_loggers[i].subsys, subsys) == 0) {
return i;
}
}
return (-1);
}
-static void syslog_facility_reconf (void)
-{
- closelog();
- openlog(logsys_loggers[LOGSYS_MAX_SUBSYS_COUNT].subsys,
- LOG_CONS|LOG_PID,
- logsys_loggers[LOGSYS_MAX_SUBSYS_COUNT].syslog_facility);
-}
-
-/*
- * this is always invoked within the mutex, so it's safe to parse the
- * whole thing as we need.
- */
-static void logsys_close_logfile (
- int subsysid)
-{
- int i;
-
- if ((logsys_loggers[subsysid].logfile_fp == NULL) &&
- (logsys_loggers[subsysid].logfile == NULL)) {
- return;
- }
-
- /*
- * if there is another subsystem or system using the same fp,
- * then we clean our own structs, but we can't close the file
- * as it is in use by somebody else.
- * Only the last users will be allowed to perform the fclose.
- */
- for (i = 0; i <= LOGSYS_MAX_SUBSYS_COUNT; i++) {
- if ((logsys_loggers[i].logfile_fp == logsys_loggers[subsysid].logfile_fp) &&
- (i != subsysid)) {
- logsys_loggers[subsysid].logfile = NULL;
- logsys_loggers[subsysid].logfile_fp = NULL;
- return;
- }
- }
-
- /*
- * if we are here, we are the last users of that fp, so we can safely
- * close it.
- */
- fclose (logsys_loggers[subsysid].logfile_fp);
- logsys_loggers[subsysid].logfile_fp = NULL;
- free (logsys_loggers[subsysid].logfile);
- logsys_loggers[subsysid].logfile = NULL;
-}
/*
* we need a version that can work when somebody else is already
* holding a config mutex lock or we will never get out of here
*/
static int logsys_config_file_set_unlocked (
int subsysid,
const char **error_string,
const char *file)
{
static char error_string_response[512];
int i;
- logsys_close_logfile(subsysid);
+ if (logsys_loggers[subsysid].target_id > 0) {
+ /* TODO close file
+ logsys_filter_apply(subsysid,
+ QB_LOG_FILTER_REMOVE,
+ logsys_loggers[subsysid].target_id);
+ */
+ }
+ logsys_loggers[subsysid].dirty = QB_TRUE;
if ((file == NULL) ||
(strcmp(logsys_loggers[subsysid].subsys, "") == 0)) {
return (0);
}
if (strlen(file) >= PATH_MAX) {
snprintf (error_string_response,
sizeof(error_string_response),
"%s: logfile name exceed maximum system filename lenght\n",
logsys_loggers[subsysid].subsys);
*error_string = error_string_response;
return (-1);
}
for (i = 0; i <= LOGSYS_MAX_SUBSYS_COUNT; i++) {
if ((logsys_loggers[i].logfile != NULL) &&
- (strcmp (logsys_loggers[i].logfile, file) == 0) &&
- (i != subsysid)) {
- logsys_loggers[subsysid].logfile =
- logsys_loggers[i].logfile;
- logsys_loggers[subsysid].logfile_fp =
- logsys_loggers[i].logfile_fp;
- return (0);
+ (strcmp (logsys_loggers[i].logfile, file) == 0) &&
+ (i != subsysid)) {
+ /* we have found another subsys with this config file
+ * so add a filter
+ */
+ logsys_loggers[subsysid].target_id = logsys_loggers[i].target_id;
+ return (0);
}
}
-
logsys_loggers[subsysid].logfile = strdup(file);
if (logsys_loggers[subsysid].logfile == NULL) {
snprintf (error_string_response,
sizeof(error_string_response),
"Unable to allocate memory for logfile '%s'\n",
file);
*error_string = error_string_response;
return (-1);
}
- logsys_loggers[subsysid].logfile_fp = fopen (file, "a+");
- if (logsys_loggers[subsysid].logfile_fp == NULL) {
- int err;
+ if (logsys_loggers[subsysid].target_id > 0) {
+ /* no one else is using this close it */
+ qb_log_file_close(logsys_loggers[subsysid].target_id);
+ }
+
+ logsys_loggers[subsysid].target_id = qb_log_file_open(file);
+ if (logsys_loggers[subsysid].target_id < 0) {
+ int err = logsys_loggers[subsysid].target_id;
char error_str[LOGSYS_MAX_PERROR_MSG_LEN];
const char *error_ptr;
-
- err = errno;
-#ifdef COROSYNC_LINUX
- /* The GNU version of strerror_r returns a (char*) that *must* be used */
- error_ptr = strerror_r(err, error_str, sizeof(error_str));
-#else
- /* The XSI-compliant strerror_r() return 0 or -1 (in case the buffer is full) */
- if ( strerror_r(err, error_str, sizeof(error_str)) < 0 )
- error_ptr = "";
- else
- error_ptr = error_str;
-#endif
+ error_ptr = qb_strerror_r(err, error_str, sizeof(error_str));
free(logsys_loggers[subsysid].logfile);
logsys_loggers[subsysid].logfile = NULL;
snprintf (error_string_response,
sizeof(error_string_response),
"Can't open logfile '%s' for reason: %s (%d).\n",
- file, error_ptr, err);
+ file, error_ptr, err);
*error_string = error_string_response;
return (-1);
}
-
return (0);
}
static void logsys_subsys_init (
const char *subsys,
int subsysid)
{
if (logsys_system_needs_init == LOGSYS_LOGGER_NEEDS_INIT) {
logsys_loggers[subsysid].init_status =
LOGSYS_LOGGER_NEEDS_INIT;
} else {
- memcpy(&logsys_loggers[subsysid],
- &logsys_loggers[LOGSYS_MAX_SUBSYS_COUNT],
- sizeof(logsys_loggers[LOGSYS_MAX_SUBSYS_COUNT]));
- logsys_loggers[subsysid].init_status =
- LOGSYS_LOGGER_INIT_DONE;
+ logsys_loggers[subsysid].mode = logsys_loggers[LOGSYS_MAX_SUBSYS_COUNT].mode;
+ logsys_loggers[subsysid].debug = logsys_loggers[LOGSYS_MAX_SUBSYS_COUNT].debug;
+ logsys_loggers[subsysid].syslog_priority = logsys_loggers[LOGSYS_MAX_SUBSYS_COUNT].syslog_priority;
+ logsys_loggers[subsysid].logfile_priority = logsys_loggers[LOGSYS_MAX_SUBSYS_COUNT].logfile_priority;
+ logsys_loggers[subsysid].init_status = LOGSYS_LOGGER_INIT_DONE;
}
strncpy (logsys_loggers[subsysid].subsys, subsys,
sizeof (logsys_loggers[subsysid].subsys));
logsys_loggers[subsysid].subsys[
sizeof (logsys_loggers[subsysid].subsys) - 1] = '\0';
+ logsys_loggers[subsysid].file_idx = 0;
+}
+
+static const char *_logsys_tags_stringify(uint32_t tags)
+{
+ if (tags == QB_LOG_TAG_LIBQB_MSG) {
+ return "QB";
+ } else {
+ return logsys_loggers[tags].subsys;
+ }
}
/*
* Internal API - exported
*/
int _logsys_system_setup(
const char *mainsystem,
unsigned int mode,
- unsigned int debug,
- const char *logfile,
- int logfile_priority,
int syslog_facility,
int syslog_priority)
{
int i;
- const char *errstr;
+ int32_t fidx;
char tempsubsys[LOGSYS_MAX_SUBSYS_NAMELEN];
if ((mainsystem == NULL) ||
(strlen(mainsystem) >= LOGSYS_MAX_SUBSYS_NAMELEN)) {
return -1;
}
i = LOGSYS_MAX_SUBSYS_COUNT;
pthread_mutex_lock (&logsys_config_mutex);
snprintf(logsys_loggers[i].subsys,
LOGSYS_MAX_SUBSYS_NAMELEN,
"%s", mainsystem);
logsys_loggers[i].mode = mode;
+ logsys_loggers[i].debug = 0;
+ logsys_loggers[i].file_idx = 0;
+ logsys_loggers[i].logfile_priority = syslog_priority;
+ logsys_loggers[i].syslog_priority = syslog_priority;
- logsys_loggers[i].debug = debug;
-
- if (logsys_config_file_set_unlocked (i, &errstr, logfile) < 0) {
- pthread_mutex_unlock (&logsys_config_mutex);
- return (-1);
+ qb_log_init(mainsystem, syslog_facility, syslog_priority);
+ if (logsys_loggers[i].mode & LOGSYS_MODE_OUTPUT_STDERR) {
+ qb_log_ctl(QB_LOG_STDERR, QB_LOG_CONF_ENABLED, QB_TRUE);
+ } else {
+ qb_log_ctl(QB_LOG_STDERR, QB_LOG_CONF_ENABLED, QB_FALSE);
+ }
+ if (logsys_loggers[i].mode & LOGSYS_MODE_OUTPUT_SYSLOG) {
+ qb_log_ctl(QB_LOG_SYSLOG, QB_LOG_CONF_ENABLED, QB_TRUE);
+ } else {
+ qb_log_ctl(QB_LOG_SYSLOG, QB_LOG_CONF_ENABLED, QB_FALSE);
}
- logsys_loggers[i].logfile_priority = logfile_priority;
- logsys_loggers[i].syslog_facility = syslog_facility;
- logsys_loggers[i].syslog_priority = syslog_priority;
- syslog_facility_reconf();
+ qb_log_filter_ctl(QB_LOG_BLACKBOX, QB_LOG_FILTER_ADD,
+ QB_LOG_FILTER_FILE, "*", LOG_TRACE);
+ qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_SIZE, 4096);
+ qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_THREADED, QB_FALSE);
+ qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_ENABLED, QB_TRUE);
- logsys_loggers[i].init_status = LOGSYS_LOGGER_INIT_DONE;
+ logsys_format_set(NULL);
+ qb_log_tags_stringify_fn_set(_logsys_tags_stringify);
+ logsys_loggers[i].init_status = LOGSYS_LOGGER_INIT_DONE;
logsys_system_needs_init = LOGSYS_LOGGER_INIT_DONE;
for (i = 0; i < LOGSYS_MAX_SUBSYS_COUNT; i++) {
if ((strcmp (logsys_loggers[i].subsys, "") != 0) &&
- (logsys_loggers[i].init_status ==
- LOGSYS_LOGGER_NEEDS_INIT)) {
- strncpy (tempsubsys, logsys_loggers[i].subsys,
- sizeof (tempsubsys));
- tempsubsys[sizeof (tempsubsys) - 1] = '\0';
- logsys_subsys_init(tempsubsys, i);
+ (logsys_loggers[i].init_status ==
+ LOGSYS_LOGGER_NEEDS_INIT)) {
+ fidx = logsys_loggers[i].file_idx;
+ strncpy (tempsubsys, logsys_loggers[i].subsys,
+ sizeof (tempsubsys));
+ tempsubsys[sizeof (tempsubsys) - 1] = '\0';
+ logsys_subsys_init(tempsubsys, i);
+ logsys_loggers[i].file_idx = fidx;
+ _logsys_config_mode_set_unlocked(i, logsys_loggers[i].mode);
+ _logsys_config_apply_per_subsys(i);
}
}
pthread_mutex_unlock (&logsys_config_mutex);
return (0);
}
-int _logsys_subsys_create (const char *subsys)
+
+static void _logsys_subsys_filename_add (int32_t s, const char *filename)
+{
+ int i;
+
+ if (filename == NULL) {
+ return;
+ }
+ assert(logsys_loggers[s].file_idx < MAX_FILES_PER_SUBSYS);
+ assert(logsys_loggers[s].file_idx >= 0);
+
+ for (i = 0; i < logsys_loggers[s].file_idx; i++) {
+ if (strcmp(logsys_loggers[s].files[i], filename) == 0) {
+ return;
+ }
+ }
+ logsys_loggers[s].files[logsys_loggers[s].file_idx++] = strdup(filename);
+
+ if (logsys_system_needs_init == LOGSYS_LOGGER_INIT_DONE) {
+ _logsys_config_apply_per_file(s, filename);
+ }
+}
+
+int _logsys_subsys_create (const char *subsys, const char *filename)
{
int i;
if ((subsys == NULL) ||
(strlen(subsys) >= LOGSYS_MAX_SUBSYS_NAMELEN)) {
return -1;
}
pthread_mutex_lock (&logsys_config_mutex);
i = _logsys_config_subsys_get_unlocked (subsys);
if ((i > -1) && (i < LOGSYS_MAX_SUBSYS_COUNT)) {
+ _logsys_subsys_filename_add(i, filename);
pthread_mutex_unlock (&logsys_config_mutex);
return i;
}
for (i = 0; i < LOGSYS_MAX_SUBSYS_COUNT; i++) {
if (strcmp (logsys_loggers[i].subsys, "") == 0) {
logsys_subsys_init(subsys, i);
+ _logsys_subsys_filename_add(i, filename);
break;
}
}
if (i >= LOGSYS_MAX_SUBSYS_COUNT) {
i = -1;
}
pthread_mutex_unlock (&logsys_config_mutex);
return i;
}
-int _logsys_wthread_create (void)
-{
- if (((logsys_loggers[LOGSYS_MAX_SUBSYS_COUNT].mode & LOGSYS_MODE_FORK) == 0) &&
- ((logsys_loggers[LOGSYS_MAX_SUBSYS_COUNT].mode & LOGSYS_MODE_THREADED) != 0)) {
- wthread_create();
- }
- return (0);
-}
-
-int _logsys_rec_init (unsigned int fltsize)
-{
- size_t flt_real_size;
- int res;
-
- sem_init (&logsys_thread_start, 0, 0);
-
- sem_init (&logsys_print_finished, 0, 0);
-
- /*
- * XXX: kill me for 1.1 because I am a dirty hack
- * temporary workaround that will be replaced by supporting
- * 0 byte size flight recorder buffer.
- * 0 byte size buffer will enable direct printing to logs
- * without flight recoder.
- */
- if (fltsize < 64000) {
- fltsize = 64000;
- }
-
- flt_real_size = ROUNDUP(fltsize, sysconf(_SC_PAGESIZE)) * 4;
-
- res = circular_memory_map ((void **)&flt_data, flt_real_size);
- if (res == -1) {
- sem_destroy (&logsys_thread_start);
- sem_destroy (&logsys_print_finished);
- }
-
- memset (flt_data, 0, flt_real_size * 2);
- /*
- * flt_data_size tracks data by ints and not bytes/chars.
- */
-
- flt_data_size = flt_real_size / sizeof (uint32_t);
-
- /*
- * First record starts at zero
- * Last record ends at zero
- */
- flt_head = 0;
- flt_tail = 0;
-
- return (0);
-}
-
-
-/*
- * u32 RECORD SIZE
- * u32 record ident
- * u32 arg count
- * u32 file line
- * u32 subsys length
- * buffer null terminated subsys
- * u32 filename length
- * buffer null terminated filename
- * u32 filename length
- * buffer null terminated function
- * u32 arg1 length
- * buffer arg1
- * ... repeats length & arg
- */
-
-void _logsys_log_rec (
- unsigned int rec_ident,
- const char *function_name,
- const char *file_name,
- int file_line,
- ...)
-{
- va_list ap;
- const void *buf_args[FDMAX_ARGS];
- unsigned int buf_len[FDMAX_ARGS];
- unsigned int i;
- unsigned int idx;
- unsigned int arguments = 0;
- unsigned int record_reclaim_size = 0;
- unsigned int index_start;
- int words_written;
- int subsysid;
-
- subsysid = LOGSYS_DECODE_SUBSYSID(rec_ident);
-
- /*
- * Decode VA Args
- */
- va_start (ap, file_line);
- arguments = 3;
- for (;;) {
- buf_args[arguments] = va_arg (ap, void *);
- if (buf_args[arguments] == LOGSYS_REC_END) {
- break;
- }
- buf_len[arguments] = va_arg (ap, int);
- record_reclaim_size += ((buf_len[arguments] + 3) >> 2) + 1;
- arguments++;
- if (arguments >= FDMAX_ARGS) {
- break;
- }
- }
- va_end (ap);
-
- /*
- * Encode logsys subsystem identity, filename, and function
- */
- buf_args[0] = logsys_loggers[subsysid].subsys;
- buf_len[0] = strlen (logsys_loggers[subsysid].subsys) + 1;
- buf_args[1] = file_name;
- buf_len[1] = strlen (file_name) + 1;
- buf_args[2] = function_name;
- buf_len[2] = strlen (function_name) + 1;
- for (i = 0; i < 3; i++) {
- record_reclaim_size += ((buf_len[i] + 3) >> 2) + 1;
- }
-
- logsys_flt_lock();
- idx = flt_head;
- index_start = idx;
-
- /*
- * Reclaim data needed for record including 4 words for the header
- */
- records_reclaim (idx, record_reclaim_size + 4);
-
- /*
- * Write record size of zero and rest of header information
- */
- flt_data[idx++] = 0;
- idx_word_step(idx);
-
- flt_data[idx++] = rec_ident;
- idx_word_step(idx);
-
- flt_data[idx++] = file_line;
- idx_word_step(idx);
-
- flt_data[idx++] = records_written;
- idx_word_step(idx);
- /*
- * Encode all of the arguments into the log message
- */
- for (i = 0; i < arguments; i++) {
- unsigned int bytes;
- unsigned int total_words;
-
- bytes = buf_len[i];
- total_words = (bytes + 3) >> 2;
-
- flt_data[idx++] = total_words;
- idx_word_step(idx);
-
- memcpy (&flt_data[idx], buf_args[i], buf_len[i]);
-
- idx += total_words;
- idx_buffer_step (idx);
-
- }
- words_written = idx - index_start;
- if (words_written < 0) {
- words_written += flt_data_size;
- }
- /*
- * Commit the write of the record size now that the full record
- * is in the memory buffer
- */
- flt_data[index_start] = words_written;
-
- flt_head = idx;
- logsys_flt_unlock();
- records_written++;
-}
-
-void _logsys_log_vprintf (
- unsigned int rec_ident,
- const char *function_name,
- const char *file_name,
- int file_line,
- const char *format,
- va_list ap)
-{
- char logsys_print_buffer[COMBINE_BUFFER_SIZE];
- unsigned int len;
- unsigned int level;
- int subsysid;
- const char *short_file_name;
-
- subsysid = LOGSYS_DECODE_SUBSYSID(rec_ident);
- level = LOGSYS_DECODE_LEVEL(rec_ident);
-
- len = vsnprintf (logsys_print_buffer, sizeof (logsys_print_buffer), format, ap);
- if (logsys_print_buffer[len - 1] == '\n') {
- logsys_print_buffer[len - 1] = '\0';
- len -= 1;
- }
-#ifdef BUILDING_IN_PLACE
- short_file_name = file_name;
-#else
- short_file_name = strrchr (file_name, '/');
- if (short_file_name == NULL)
- short_file_name = file_name;
- else
- short_file_name++; /* move past the "/" */
-#endif /* BUILDING_IN_PLACE */
-
- /*
- * Create a log record
- */
- _logsys_log_rec (
- rec_ident,
- function_name,
- short_file_name,
- file_line,
- logsys_print_buffer, len + 1,
- LOGSYS_REC_END);
-
- /*
- * If logsys is not going to print a message to a log target don't
- * queue one
- */
- if ((level > logsys_loggers[subsysid].syslog_priority &&
- level > logsys_loggers[subsysid].logfile_priority &&
- logsys_loggers[subsysid].debug == 0) ||
-
- (level == LOGSYS_LEVEL_DEBUG &&
- logsys_loggers[subsysid].debug == 0)) {
-
- return;
- }
-
- if ((logsys_loggers[LOGSYS_MAX_SUBSYS_COUNT].mode & LOGSYS_MODE_THREADED) == 0) {
- /*
- * Output (and block) if the log mode is not threaded otherwise
- * expect the worker thread to output the log data once signaled
- */
- log_printf_to_logs (rec_ident,
- short_file_name,
- function_name,
- file_line,
- logsys_print_buffer);
- } else {
- /*
- * Signal worker thread to display logging output
- */
- log_printf_to_logs_wthread (rec_ident,
- short_file_name,
- function_name,
- file_line,
- logsys_print_buffer);
- }
-}
-
-void _logsys_log_printf (
- unsigned int rec_ident,
- const char *function_name,
- const char *file_name,
- int file_line,
- const char *format,
- ...)
-{
- va_list ap;
-
- va_start (ap, format);
- _logsys_log_vprintf (rec_ident, function_name, file_name, file_line,
- format, ap);
- va_end (ap);
-}
-
int _logsys_config_subsys_get (const char *subsys)
{
unsigned int i;
pthread_mutex_lock (&logsys_config_mutex);
i = _logsys_config_subsys_get_unlocked (subsys);
pthread_mutex_unlock (&logsys_config_mutex);
return i;
}
-/*
- * External Configuration and Initialization API
- */
-void logsys_fork_completed (void)
+static int32_t _logsys_config_mode_set_unlocked(int32_t subsysid, uint32_t new_mode)
{
- logsys_loggers[LOGSYS_MAX_SUBSYS_COUNT].mode &= ~LOGSYS_MODE_FORK;
- (void)_logsys_wthread_create ();
+ if ( logsys_loggers[subsysid].mode == new_mode) {
+ return 0;
+ }
+ if (logsys_loggers[subsysid].target_id > 0) {
+ qb_log_ctl(logsys_loggers[subsysid].target_id,
+ QB_LOG_CONF_ENABLED,
+ (new_mode & LOGSYS_MODE_OUTPUT_FILE));
+ }
+
+ if (subsysid == LOGSYS_MAX_SUBSYS_COUNT) {
+ qb_log_ctl(QB_LOG_STDERR,
+ QB_LOG_CONF_ENABLED,
+ (new_mode & LOGSYS_MODE_OUTPUT_STDERR));
+ qb_log_ctl(QB_LOG_SYSLOG,
+ QB_LOG_CONF_ENABLED,
+ (new_mode & LOGSYS_MODE_OUTPUT_SYSLOG));
+ }
+ logsys_loggers[subsysid].mode = new_mode;
+ return 0;
}
int logsys_config_mode_set (const char *subsys, unsigned int mode)
{
int i;
pthread_mutex_lock (&logsys_config_mutex);
if (subsys != NULL) {
i = _logsys_config_subsys_get_unlocked (subsys);
if (i >= 0) {
- logsys_loggers[i].mode = mode;
- i = 0;
+ i = _logsys_config_mode_set_unlocked(i, mode);
}
} else {
for (i = 0; i <= LOGSYS_MAX_SUBSYS_COUNT; i++) {
- logsys_loggers[i].mode = mode;
+ _logsys_config_mode_set_unlocked(i, mode);
}
i = 0;
}
+
pthread_mutex_unlock (&logsys_config_mutex);
return i;
}
unsigned int logsys_config_mode_get (const char *subsys)
{
int i;
i = _logsys_config_subsys_get (subsys);
if (i < 0) {
return i;
}
return logsys_loggers[i].mode;
}
int logsys_config_file_set (
const char *subsys,
const char **error_string,
const char *file)
{
int i;
int res;
pthread_mutex_lock (&logsys_config_mutex);
if (subsys != NULL) {
i = _logsys_config_subsys_get_unlocked (subsys);
if (i < 0) {
res = i;
} else {
res = logsys_config_file_set_unlocked(i, error_string, file);
}
} else {
for (i = 0; i <= LOGSYS_MAX_SUBSYS_COUNT; i++) {
res = logsys_config_file_set_unlocked(i, error_string, file);
if (res < 0) {
break;
}
}
}
pthread_mutex_unlock (&logsys_config_mutex);
return res;
}
int logsys_format_set (const char *format)
{
int ret = 0;
-
- pthread_mutex_lock (&logsys_config_mutex);
+ int c;
+ int w;
+ int reminder;
+ char syslog_format[128];
if (format_buffer) {
free(format_buffer);
format_buffer = NULL;
}
- format_buffer = strdup(format ? format : "%p [%6s] %b");
+ format_buffer = strdup(format ? format : "%7p [%6g] %b");
if (format_buffer == NULL) {
ret = -1;
}
+ qb_log_format_set(QB_LOG_STDERR, format_buffer);
+
+ /*
+ * This just goes through and remove %t and %p from
+ * the format string for syslog.
+ */
+ w = 0;
+ memset(syslog_format, '\0', sizeof(syslog_format));
+ for (c = 0; c < strlen(format_buffer); c++) {
+ if (format_buffer[c] == '%') {
+ reminder = c;
+ for (c++; c < strlen(format_buffer); c++) {
+ if (isdigit(format_buffer[c])) {
+ continue;
+ }
+ if (format_buffer[c] == 't' ||
+ format_buffer[c] == 'p') {
+ c++;
+ } else {
+ c = reminder;
+ }
+ break;
+ }
+ }
+ syslog_format[w] = format_buffer[c];
+ w++;
+ }
+// printf("normal_format: %s\n", format_buffer);
+// printf("syslog_format: %s\n", syslog_format);
+ qb_log_format_set(QB_LOG_SYSLOG, syslog_format);
- pthread_mutex_unlock (&logsys_config_mutex);
return ret;
}
char *logsys_format_get (void)
{
return format_buffer;
}
int logsys_config_syslog_facility_set (
const char *subsys,
unsigned int facility)
{
- int i;
-
- pthread_mutex_lock (&logsys_config_mutex);
- if (subsys != NULL) {
- i = _logsys_config_subsys_get_unlocked (subsys);
- if (i >= 0) {
- logsys_loggers[i].syslog_facility = facility;
- if (i == LOGSYS_MAX_SUBSYS_COUNT) {
- syslog_facility_reconf();
- }
- i = 0;
- }
- } else {
- for (i = 0; i <= LOGSYS_MAX_SUBSYS_COUNT; i++) {
- logsys_loggers[i].syslog_facility = facility;
- }
- syslog_facility_reconf();
- i = 0;
- }
- pthread_mutex_unlock (&logsys_config_mutex);
-
- return i;
+ return qb_log_ctl(QB_LOG_SYSLOG, QB_LOG_CONF_FACILITY, facility);
}
int logsys_config_syslog_priority_set (
const char *subsys,
unsigned int priority)
{
int i;
pthread_mutex_lock (&logsys_config_mutex);
if (subsys != NULL) {
i = _logsys_config_subsys_get_unlocked (subsys);
if (i >= 0) {
logsys_loggers[i].syslog_priority = priority;
+ logsys_loggers[i].dirty = QB_TRUE;
+
i = 0;
}
} else {
for (i = 0; i <= LOGSYS_MAX_SUBSYS_COUNT; i++) {
logsys_loggers[i].syslog_priority = priority;
+ logsys_loggers[i].dirty = QB_TRUE;
}
i = 0;
}
pthread_mutex_unlock (&logsys_config_mutex);
return i;
}
int logsys_config_logfile_priority_set (
const char *subsys,
unsigned int priority)
{
int i;
pthread_mutex_lock (&logsys_config_mutex);
if (subsys != NULL) {
i = _logsys_config_subsys_get_unlocked (subsys);
if (i >= 0) {
logsys_loggers[i].logfile_priority = priority;
+ logsys_loggers[i].dirty = QB_TRUE;
i = 0;
}
} else {
for (i = 0; i <= LOGSYS_MAX_SUBSYS_COUNT; i++) {
logsys_loggers[i].logfile_priority = priority;
+ logsys_loggers[i].dirty = QB_TRUE;
}
i = 0;
}
pthread_mutex_unlock (&logsys_config_mutex);
return i;
}
+
+static void _logsys_config_apply_per_file(int32_t s, const char *filename)
+{
+ qb_log_filter_ctl(s, QB_LOG_TAG_SET, QB_LOG_FILTER_FILE,
+ filename, LOG_TRACE);
+
+ qb_log_filter_ctl(QB_LOG_SYSLOG, QB_LOG_FILTER_REMOVE,
+ QB_LOG_FILTER_FILE, filename, LOG_TRACE);
+ qb_log_filter_ctl(QB_LOG_STDERR, QB_LOG_FILTER_REMOVE,
+ QB_LOG_FILTER_FILE, filename, LOG_TRACE);
+
+ qb_log_filter_ctl(QB_LOG_SYSLOG, QB_LOG_FILTER_ADD,
+ QB_LOG_FILTER_FILE, filename,
+ logsys_loggers[s].syslog_priority);
+ qb_log_filter_ctl(QB_LOG_STDERR, QB_LOG_FILTER_ADD,
+ QB_LOG_FILTER_FILE, filename,
+ logsys_loggers[s].logfile_priority);
+}
+
+static void _logsys_config_apply_per_subsys(int32_t s)
+{
+ int32_t f;
+ for (f = 0; f < logsys_loggers[s].file_idx; f++) {
+ _logsys_config_apply_per_file(s, logsys_loggers[s].files[f]);
+ }
+ logsys_loggers[s].dirty = QB_FALSE;
+}
+
+void logsys_config_apply(void)
+{
+ int32_t s;
+
+ for (s = 0; s <= LOGSYS_MAX_SUBSYS_COUNT; s++) {
+ if (strcmp(logsys_loggers[s].subsys, "") == 0) {
+ continue;
+ }
+ _logsys_config_apply_per_subsys(s);
+ }
+}
+
int logsys_config_debug_set (
const char *subsys,
unsigned int debug)
{
int i;
pthread_mutex_lock (&logsys_config_mutex);
if (subsys != NULL) {
i = _logsys_config_subsys_get_unlocked (subsys);
if (i >= 0) {
logsys_loggers[i].debug = debug;
i = 0;
}
} else {
for (i = 0; i <= LOGSYS_MAX_SUBSYS_COUNT; i++) {
logsys_loggers[i].debug = debug;
}
i = 0;
}
pthread_mutex_unlock (&logsys_config_mutex);
return i;
}
int logsys_facility_id_get (const char *name)
{
unsigned int i;
for (i = 0; facilitynames[i].c_name != NULL; i++) {
if (strcasecmp(name, facilitynames[i].c_name) == 0) {
return (facilitynames[i].c_val);
}
}
return (-1);
}
const char *logsys_facility_name_get (unsigned int facility)
{
unsigned int i;
for (i = 0; facilitynames[i].c_name != NULL; i++) {
if (facility == facilitynames[i].c_val) {
return (facilitynames[i].c_name);
}
}
return (NULL);
}
int logsys_priority_id_get (const char *name)
{
unsigned int i;
for (i = 0; prioritynames[i].c_name != NULL; i++) {
if (strcasecmp(name, prioritynames[i].c_name) == 0) {
return (prioritynames[i].c_val);
}
}
return (-1);
}
const char *logsys_priority_name_get (unsigned int priority)
{
unsigned int i;
for (i = 0; prioritynames[i].c_name != NULL; i++) {
if (priority == prioritynames[i].c_val) {
return (prioritynames[i].c_name);
}
}
return (NULL);
}
-int logsys_thread_priority_set (
- int policy,
- const struct sched_param *param,
- unsigned int after_log_ops_yield)
-
-{
- int res = 0;
- if (param == NULL) {
- return (0);
- }
-
-#if defined(HAVE_PTHREAD_SETSCHEDPARAM) && defined(HAVE_SCHED_GET_PRIORITY_MAX)
- if (wthread_active == 0) {
- logsys_sched_policy = policy;
- memcpy(&logsys_sched_param, param, sizeof(struct sched_param));
- logsys_sched_param_queued = 1;
- } else {
- res = pthread_setschedparam (logsys_thread_id, policy, param);
- }
-#endif
-
- if (after_log_ops_yield > 0) {
- logsys_after_log_ops_yield = after_log_ops_yield;
- }
-
- return (res);
-}
-
-int logsys_log_rec_store (const char *filename)
-{
- int fd;
- ssize_t written_size = 0;
- size_t this_write_size;
-
- fd = open (filename, O_CREAT|O_RDWR, 0700);
- if (fd < 0) {
- return (-1);
- }
-
- logsys_flt_lock();
-
- this_write_size = write (fd, &flt_data_size, sizeof(uint32_t));
- if (this_write_size != sizeof(unsigned int)) {
- goto error_exit;
- }
- written_size += this_write_size;
-
- this_write_size = write (fd, flt_data, flt_data_size * sizeof (uint32_t));
- if (this_write_size != (flt_data_size * sizeof(uint32_t))) {
- goto error_exit;
- }
- written_size += this_write_size;
-
- this_write_size = write (fd, &flt_head, sizeof (uint32_t));
- if (this_write_size != (sizeof(uint32_t))) {
- goto error_exit;
- }
- written_size += this_write_size;
- this_write_size = write (fd, &flt_tail, sizeof (uint32_t));
- if (this_write_size != (sizeof(uint32_t))) {
- goto error_exit;
- }
- written_size += this_write_size;
- if (written_size != ((flt_data_size + 3) * sizeof (uint32_t))) {
- goto error_exit;
- }
-
- logsys_flt_unlock();
- close (fd);
- return (0);
-
-error_exit:
- logsys_flt_unlock();
- close (fd);
- return (-1);
-}
-
-void logsys_atexit (void)
-{
- int res;
- int value;
- struct record *rec;
-
- if (wthread_active == 0) {
- for (;;) {
- logsys_wthread_lock();
-
- res = sem_getvalue (&logsys_print_finished, &value);
- if (res != 0 || value == 0) {
- logsys_wthread_unlock();
- return;
- }
- sem_wait (&logsys_print_finished);
-
- rec = list_entry (logsys_print_finished_records.next, struct record, list);
- list_del (&rec->list);
- logsys_memory_used = logsys_memory_used - strlen (rec->buffer) -
- sizeof (struct record) - 1;
- logsys_wthread_unlock();
- log_printf_to_logs (
- rec->rec_ident,
- rec->file_name,
- rec->function_name,
- rec->file_line,
- rec->buffer);
- free (rec->buffer);
- free (rec);
- }
- } else {
- wthread_should_exit = 1;
- sem_post (&logsys_print_finished);
- pthread_join (logsys_thread_id, NULL);
- }
-}
-
-void logsys_flush (void)
-{
-}
diff --git a/exec/main.c b/exec/main.c
index bcc0da35..d766c6c7 100644
--- a/exec/main.c
+++ b/exec/main.c
@@ -1,1516 +1,1538 @@
/*
* Copyright (c) 2002-2006 MontaVista Software, Inc.
* Copyright (c) 2006-2009 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Steven Dake (sdake@redhat.com)
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
/**
* \mainpage Corosync
*
* This is the doxygen generated developer documentation for the Corosync
* project. For more information about Corosync, please see the project
* web site, <a href="http://www.corosync.org">corosync.org</a>.
*
* \section license License
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <config.h>
#include <pthread.h>
#include <assert.h>
#include <sys/types.h>
#include <sys/file.h>
#include <sys/poll.h>
#include <sys/uio.h>
#include <sys/mman.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <sys/stat.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <unistd.h>
#include <fcntl.h>
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
#include <signal.h>
#include <sched.h>
#include <time.h>
#include <semaphore.h>
#include <qb/qbdefs.h>
+#include <qb/qblog.h>
#include <qb/qbloop.h>
#include <qb/qbutil.h>
#include <qb/qbipcs.h>
#include <corosync/swab.h>
#include <corosync/corotypes.h>
#include <corosync/corodefs.h>
#include <corosync/list.h>
#include <corosync/lcr/lcr_ifact.h>
#include <corosync/totem/totempg.h>
#include <corosync/engine/objdb.h>
#include <corosync/engine/config.h>
#include <corosync/engine/logsys.h>
#include "quorum.h"
#include "totemsrp.h"
#include "mainconfig.h"
#include "totemconfig.h"
#include "main.h"
#include "sync.h"
#include "syncv2.h"
#include "timer.h"
#include "util.h"
#include "apidef.h"
#include "service.h"
#include "schedwrk.h"
#include "evil.h"
#ifdef HAVE_SMALL_MEMORY_FOOTPRINT
#define IPC_LOGSYS_SIZE 1024*64
#else
#define IPC_LOGSYS_SIZE 8192*128
#endif
LOGSYS_DECLARE_SYSTEM ("corosync",
- LOGSYS_MODE_OUTPUT_STDERR | LOGSYS_MODE_THREADED | LOGSYS_MODE_FORK,
- 0,
- NULL,
- LOG_INFO,
+ LOGSYS_MODE_OUTPUT_STDERR,
LOG_DAEMON,
- LOG_INFO,
- NULL,
- IPC_LOGSYS_SIZE);
+ LOG_INFO);
LOGSYS_DECLARE_SUBSYS ("MAIN");
#define SERVER_BACKLOG 5
static int sched_priority = 0;
static unsigned int service_count = 32;
static struct totem_logging_configuration totem_logging_configuration;
static int num_config_modules;
static struct config_iface_ver0 *config_modules[MAX_DYNAMIC_SERVICES];
static struct objdb_iface_ver0 *objdb = NULL;
static struct corosync_api_v1 *api = NULL;
static enum cs_sync_mode minimum_sync_mode;
static int sync_in_process = 1;
static qb_loop_t *corosync_poll_handle;
struct sched_param global_sched_param;
static hdb_handle_t object_memb_handle;
static corosync_timer_handle_t corosync_stats_timer_handle;
static const char *corosync_lock_file = LOCALSTATEDIR"/run/corosync.pid";
qb_loop_t *cs_poll_handle_get (void)
{
return (corosync_poll_handle);
}
int cs_poll_dispatch_add (qb_loop_t * handle,
int fd,
int events,
void *data,
int (*dispatch_fn) (int fd,
int revents,
void *data))
{
return qb_loop_poll_add(handle, QB_LOOP_MED, fd, events, data,
dispatch_fn);
}
int cs_poll_dispatch_delete(qb_loop_t * handle, int fd)
{
return qb_loop_poll_del(handle, fd);
}
void corosync_state_dump (void)
{
int i;
for (i = 0; i < SERVICE_HANDLER_MAXIMUM_COUNT; i++) {
if (ais_service[i] && ais_service[i]->exec_dump_fn) {
ais_service[i]->exec_dump_fn ();
}
}
}
static void unlink_all_completed (void)
{
api->timer_delete (corosync_stats_timer_handle);
qb_loop_stop (corosync_poll_handle);
}
void corosync_shutdown_request (void)
{
corosync_service_unlink_all (api, unlink_all_completed);
}
static int32_t sig_diag_handler (int num, void *data)
{
corosync_state_dump ();
- logsys_log_rec_store (LOCALSTATEDIR "/lib/corosync/fdata");
+ qb_log_blackbox_write_to_file(LOCALSTATEDIR "/lib/corosync/fdata");
return 0;
}
static int32_t sig_exit_handler (int num, void *data)
{
corosync_service_unlink_all (api, unlink_all_completed);
return 0;
}
static void sigsegv_handler (int num)
{
(void)signal (SIGSEGV, SIG_DFL);
- logsys_atexit();
- logsys_log_rec_store (LOCALSTATEDIR "/lib/corosync/fdata");
+ qb_log_blackbox_write_to_file(LOCALSTATEDIR "/lib/corosync/fdata");
+ qb_log_fini();
raise (SIGSEGV);
}
static void sigabrt_handler (int num)
{
(void)signal (SIGABRT, SIG_DFL);
- logsys_atexit();
- logsys_log_rec_store (LOCALSTATEDIR "/lib/corosync/fdata");
+ qb_log_blackbox_write_to_file(LOCALSTATEDIR "/lib/corosync/fdata");
+ qb_log_fini();
raise (SIGABRT);
}
#define LOCALHOST_IP inet_addr("127.0.0.1")
static hdb_handle_t corosync_group_handle;
static struct totempg_group corosync_group = {
.group = "a",
.group_len = 1
};
static void serialize_lock (void)
{
}
static void serialize_unlock (void)
{
}
static void corosync_sync_completed (void)
{
log_printf (LOGSYS_LEVEL_NOTICE,
"Completed service synchronization, ready to provide service.\n");
sync_in_process = 0;
cs_ipcs_sync_state_changed(sync_in_process);
}
static int corosync_sync_callbacks_retrieve (int sync_id,
struct sync_callbacks *callbacks)
{
unsigned int ais_service_index;
int res;
for (ais_service_index = 0;
ais_service_index < SERVICE_HANDLER_MAXIMUM_COUNT;
ais_service_index++) {
if (ais_service[ais_service_index] != NULL
&& (ais_service[ais_service_index]->sync_mode == CS_SYNC_V1
|| ais_service[ais_service_index]->sync_mode == CS_SYNC_V1_APIV2)) {
if (ais_service_index == sync_id) {
break;
}
}
}
/*
* Try to load backwards compat sync engines
*/
if (ais_service_index == SERVICE_HANDLER_MAXIMUM_COUNT) {
res = evil_callbacks_load (sync_id, callbacks);
return (res);
}
callbacks->name = ais_service[ais_service_index]->name;
callbacks->sync_init_api.sync_init_v1 = ais_service[ais_service_index]->sync_init;
callbacks->api_version = 1;
if (ais_service[ais_service_index]->sync_mode == CS_SYNC_V1_APIV2) {
callbacks->api_version = 2;
}
callbacks->sync_process = ais_service[ais_service_index]->sync_process;
callbacks->sync_activate = ais_service[ais_service_index]->sync_activate;
callbacks->sync_abort = ais_service[ais_service_index]->sync_abort;
return (0);
}
static int corosync_sync_v2_callbacks_retrieve (
int service_id,
struct sync_callbacks *callbacks)
{
int res;
if (minimum_sync_mode == CS_SYNC_V2 && service_id == CLM_SERVICE && ais_service[CLM_SERVICE] == NULL) {
res = evil_callbacks_load (service_id, callbacks);
return (res);
}
if (minimum_sync_mode == CS_SYNC_V2 && service_id == EVT_SERVICE && ais_service[EVT_SERVICE] == NULL) {
res = evil_callbacks_load (service_id, callbacks);
return (res);
}
if (ais_service[service_id] == NULL) {
return (-1);
}
if (minimum_sync_mode == CS_SYNC_V1 && ais_service[service_id]->sync_mode != CS_SYNC_V2) {
return (-1);
}
callbacks->name = ais_service[service_id]->name;
callbacks->api_version = 1;
if (ais_service[service_id]->sync_mode == CS_SYNC_V1_APIV2) {
callbacks->api_version = 2;
}
callbacks->sync_init_api.sync_init_v1 = ais_service[service_id]->sync_init;
callbacks->sync_process = ais_service[service_id]->sync_process;
callbacks->sync_activate = ais_service[service_id]->sync_activate;
callbacks->sync_abort = ais_service[service_id]->sync_abort;
return (0);
}
static struct memb_ring_id corosync_ring_id;
static void member_object_joined (unsigned int nodeid)
{
hdb_handle_t object_find_handle;
hdb_handle_t object_node_handle;
char * nodeint_str;
char nodeid_str[64];
unsigned int key_incr_dummy;
snprintf (nodeid_str, 64, "%d", nodeid);
objdb->object_find_create (
object_memb_handle,
nodeid_str,
strlen (nodeid_str),
&object_find_handle);
if (objdb->object_find_next (object_find_handle,
&object_node_handle) == 0) {
objdb->object_key_increment (object_node_handle,
"join_count", strlen("join_count"),
&key_incr_dummy);
objdb->object_key_replace (object_node_handle,
"status", strlen("status"),
"joined", strlen("joined"));
} else {
nodeint_str = (char*)api->totem_ifaces_print (nodeid);
objdb->object_create (object_memb_handle,
&object_node_handle,
nodeid_str, strlen (nodeid_str));
objdb->object_key_create_typed (object_node_handle,
"ip",
nodeint_str, strlen(nodeint_str),
OBJDB_VALUETYPE_STRING);
key_incr_dummy = 1;
objdb->object_key_create_typed (object_node_handle,
"join_count",
&key_incr_dummy, sizeof (key_incr_dummy),
OBJDB_VALUETYPE_UINT32);
objdb->object_key_create_typed (object_node_handle,
"status",
"joined", strlen("joined"),
OBJDB_VALUETYPE_STRING);
}
}
static void member_object_left (unsigned int nodeid)
{
hdb_handle_t object_find_handle;
hdb_handle_t object_node_handle;
char nodeid_str[64];
snprintf (nodeid_str, 64, "%u", nodeid);
objdb->object_find_create (
object_memb_handle,
nodeid_str,
strlen (nodeid_str),
&object_find_handle);
if (objdb->object_find_next (object_find_handle,
&object_node_handle) == 0) {
objdb->object_key_replace (object_node_handle,
"status", strlen("status"),
"left", strlen("left"));
}
}
static void confchg_fn (
enum totem_configuration_type configuration_type,
const unsigned int *member_list, size_t member_list_entries,
const unsigned int *left_list, size_t left_list_entries,
const unsigned int *joined_list, size_t joined_list_entries,
const struct memb_ring_id *ring_id)
{
int i;
int abort_activate = 0;
if (sync_in_process == 1) {
abort_activate = 1;
}
sync_in_process = 1;
cs_ipcs_sync_state_changed(sync_in_process);
memcpy (&corosync_ring_id, ring_id, sizeof (struct memb_ring_id));
for (i = 0; i < left_list_entries; i++) {
member_object_left (left_list[i]);
}
for (i = 0; i < joined_list_entries; i++) {
member_object_joined (joined_list[i]);
}
/*
* Call configuration change for all services
*/
for (i = 0; i < service_count; i++) {
if (ais_service[i] && ais_service[i]->confchg_fn) {
ais_service[i]->confchg_fn (configuration_type,
member_list, member_list_entries,
left_list, left_list_entries,
joined_list, joined_list_entries, ring_id);
}
}
if (abort_activate) {
sync_v2_abort ();
}
if (minimum_sync_mode == CS_SYNC_V2 && configuration_type == TOTEM_CONFIGURATION_TRANSITIONAL) {
sync_v2_save_transitional (member_list, member_list_entries, ring_id);
}
if (minimum_sync_mode == CS_SYNC_V2 && configuration_type == TOTEM_CONFIGURATION_REGULAR) {
sync_v2_start (member_list, member_list_entries, ring_id);
}
}
static void priv_drop (void)
{
return; /* TODO: we are still not dropping privs */
}
static void corosync_tty_detach (void)
{
FILE *r;
/*
* Disconnect from TTY if this is not a debug run
*/
switch (fork ()) {
case -1:
corosync_exit_error (AIS_DONE_FORK);
break;
case 0:
/*
* child which is disconnected, run this process
*/
break;
default:
exit (0);
break;
}
/* Create new session */
(void)setsid();
/*
* Map stdin/out/err to /dev/null.
*/
r = freopen("/dev/null", "r", stdin);
if (r == NULL) {
corosync_exit_error (AIS_DONE_STD_TO_NULL_REDIR);
}
r = freopen("/dev/null", "a", stderr);
if (r == NULL) {
corosync_exit_error (AIS_DONE_STD_TO_NULL_REDIR);
}
r = freopen("/dev/null", "a", stdout);
if (r == NULL) {
corosync_exit_error (AIS_DONE_STD_TO_NULL_REDIR);
}
}
static void corosync_mlockall (void)
{
#if !defined(COROSYNC_BSD) || defined(COROSYNC_FREEBSD_GE_8)
int res;
#endif
struct rlimit rlimit;
rlimit.rlim_cur = RLIM_INFINITY;
rlimit.rlim_max = RLIM_INFINITY;
#ifndef COROSYNC_SOLARIS
setrlimit (RLIMIT_MEMLOCK, &rlimit);
#else
setrlimit (RLIMIT_VMEM, &rlimit);
#endif
#if defined(COROSYNC_BSD) && !defined(COROSYNC_FREEBSD_GE_8)
/* under FreeBSD < 8 a process with locked page cannot call dlopen
* code disabled until FreeBSD bug i386/93396 was solved
*/
log_printf (LOGSYS_LEVEL_WARNING, "Could not lock memory of service to avoid page faults\n");
#else
res = mlockall (MCL_CURRENT | MCL_FUTURE);
if (res == -1) {
LOGSYS_PERROR (errno, LOGSYS_LEVEL_WARNING,
"Could not lock memory of service to avoid page faults");
};
#endif
}
static void corosync_totem_stats_updater (void *data)
{
totempg_stats_t * stats;
uint32_t mtt_rx_token;
uint32_t total_mtt_rx_token;
uint32_t avg_backlog_calc;
uint32_t total_backlog_calc;
uint32_t avg_token_holdtime;
uint32_t total_token_holdtime;
int t, prev;
int32_t token_count;
uint32_t firewall_enabled_or_nic_failure;
stats = api->totem_get_stats();
objdb->object_key_replace (stats->hdr.handle,
"msg_reserved", strlen("msg_reserved"),
&stats->msg_reserved, sizeof (stats->msg_reserved));
objdb->object_key_replace (stats->hdr.handle,
"msg_queue_avail", strlen("msg_queue_avail"),
&stats->msg_queue_avail, sizeof (stats->msg_queue_avail));
objdb->object_key_replace (stats->mrp->srp->hdr.handle,
"orf_token_tx", strlen("orf_token_tx"),
&stats->mrp->srp->orf_token_tx, sizeof (stats->mrp->srp->orf_token_tx));
objdb->object_key_replace (stats->mrp->srp->hdr.handle,
"orf_token_rx", strlen("orf_token_rx"),
&stats->mrp->srp->orf_token_rx, sizeof (stats->mrp->srp->orf_token_rx));
objdb->object_key_replace (stats->mrp->srp->hdr.handle,
"memb_merge_detect_tx", strlen("memb_merge_detect_tx"),
&stats->mrp->srp->memb_merge_detect_tx, sizeof (stats->mrp->srp->memb_merge_detect_tx));
objdb->object_key_replace (stats->mrp->srp->hdr.handle,
"memb_merge_detect_rx", strlen("memb_merge_detect_rx"),
&stats->mrp->srp->memb_merge_detect_rx, sizeof (stats->mrp->srp->memb_merge_detect_rx));
objdb->object_key_replace (stats->mrp->srp->hdr.handle,
"memb_join_tx", strlen("memb_join_tx"),
&stats->mrp->srp->memb_join_tx, sizeof (stats->mrp->srp->memb_join_tx));
objdb->object_key_replace (stats->mrp->srp->hdr.handle,
"memb_join_rx", strlen("memb_join_rx"),
&stats->mrp->srp->memb_join_rx, sizeof (stats->mrp->srp->memb_join_rx));
objdb->object_key_replace (stats->mrp->srp->hdr.handle,
"mcast_tx", strlen("mcast_tx"),
&stats->mrp->srp->mcast_tx, sizeof (stats->mrp->srp->mcast_tx));
objdb->object_key_replace (stats->mrp->srp->hdr.handle,
"mcast_retx", strlen("mcast_retx"),
&stats->mrp->srp->mcast_retx, sizeof (stats->mrp->srp->mcast_retx));
objdb->object_key_replace (stats->mrp->srp->hdr.handle,
"mcast_rx", strlen("mcast_rx"),
&stats->mrp->srp->mcast_rx, sizeof (stats->mrp->srp->mcast_rx));
objdb->object_key_replace (stats->mrp->srp->hdr.handle,
"memb_commit_token_tx", strlen("memb_commit_token_tx"),
&stats->mrp->srp->memb_commit_token_tx, sizeof (stats->mrp->srp->memb_commit_token_tx));
objdb->object_key_replace (stats->mrp->srp->hdr.handle,
"memb_commit_token_rx", strlen("memb_commit_token_rx"),
&stats->mrp->srp->memb_commit_token_rx, sizeof (stats->mrp->srp->memb_commit_token_rx));
objdb->object_key_replace (stats->mrp->srp->hdr.handle,
"token_hold_cancel_tx", strlen("token_hold_cancel_tx"),
&stats->mrp->srp->token_hold_cancel_tx, sizeof (stats->mrp->srp->token_hold_cancel_tx));
objdb->object_key_replace (stats->mrp->srp->hdr.handle,
"token_hold_cancel_rx", strlen("token_hold_cancel_rx"),
&stats->mrp->srp->token_hold_cancel_rx, sizeof (stats->mrp->srp->token_hold_cancel_rx));
objdb->object_key_replace (stats->mrp->srp->hdr.handle,
"operational_entered", strlen("operational_entered"),
&stats->mrp->srp->operational_entered, sizeof (stats->mrp->srp->operational_entered));
objdb->object_key_replace (stats->mrp->srp->hdr.handle,
"operational_token_lost", strlen("operational_token_lost"),
&stats->mrp->srp->operational_token_lost, sizeof (stats->mrp->srp->operational_token_lost));
objdb->object_key_replace (stats->mrp->srp->hdr.handle,
"gather_entered", strlen("gather_entered"),
&stats->mrp->srp->gather_entered, sizeof (stats->mrp->srp->gather_entered));
objdb->object_key_replace (stats->mrp->srp->hdr.handle,
"gather_token_lost", strlen("gather_token_lost"),
&stats->mrp->srp->gather_token_lost, sizeof (stats->mrp->srp->gather_token_lost));
objdb->object_key_replace (stats->mrp->srp->hdr.handle,
"commit_entered", strlen("commit_entered"),
&stats->mrp->srp->commit_entered, sizeof (stats->mrp->srp->commit_entered));
objdb->object_key_replace (stats->mrp->srp->hdr.handle,
"commit_token_lost", strlen("commit_token_lost"),
&stats->mrp->srp->commit_token_lost, sizeof (stats->mrp->srp->commit_token_lost));
objdb->object_key_replace (stats->mrp->srp->hdr.handle,
"recovery_entered", strlen("recovery_entered"),
&stats->mrp->srp->recovery_entered, sizeof (stats->mrp->srp->recovery_entered));
objdb->object_key_replace (stats->mrp->srp->hdr.handle,
"recovery_token_lost", strlen("recovery_token_lost"),
&stats->mrp->srp->recovery_token_lost, sizeof (stats->mrp->srp->recovery_token_lost));
objdb->object_key_replace (stats->mrp->srp->hdr.handle,
"consensus_timeouts", strlen("consensus_timeouts"),
&stats->mrp->srp->consensus_timeouts, sizeof (stats->mrp->srp->consensus_timeouts));
objdb->object_key_replace (stats->mrp->srp->hdr.handle,
"rx_msg_dropped", strlen("rx_msg_dropped"),
&stats->mrp->srp->rx_msg_dropped, sizeof (stats->mrp->srp->rx_msg_dropped));
objdb->object_key_replace (stats->mrp->srp->hdr.handle,
"continuous_gather", strlen("continuous_gather"),
&stats->mrp->srp->continuous_gather, sizeof (stats->mrp->srp->continuous_gather));
firewall_enabled_or_nic_failure = (stats->mrp->srp->continuous_gather > MAX_NO_CONT_GATHER ? 1 : 0);
objdb->object_key_replace (stats->mrp->srp->hdr.handle,
"firewall_enabled_or_nic_failure", strlen("firewall_enabled_or_nic_failure"),
&firewall_enabled_or_nic_failure, sizeof (firewall_enabled_or_nic_failure));
total_mtt_rx_token = 0;
total_token_holdtime = 0;
total_backlog_calc = 0;
token_count = 0;
t = stats->mrp->srp->latest_token;
while (1) {
if (t == 0)
prev = TOTEM_TOKEN_STATS_MAX - 1;
else
prev = t - 1;
if (prev == stats->mrp->srp->earliest_token)
break;
/* if tx == 0, then dropped token (not ours) */
if (stats->mrp->srp->token[t].tx != 0 ||
(stats->mrp->srp->token[t].rx - stats->mrp->srp->token[prev].rx) > 0 ) {
total_mtt_rx_token += (stats->mrp->srp->token[t].rx - stats->mrp->srp->token[prev].rx);
total_token_holdtime += (stats->mrp->srp->token[t].tx - stats->mrp->srp->token[t].rx);
total_backlog_calc += stats->mrp->srp->token[t].backlog_calc;
token_count++;
}
t = prev;
}
if (token_count) {
mtt_rx_token = (total_mtt_rx_token / token_count);
avg_backlog_calc = (total_backlog_calc / token_count);
avg_token_holdtime = (total_token_holdtime / token_count);
objdb->object_key_replace (stats->mrp->srp->hdr.handle,
"mtt_rx_token", strlen("mtt_rx_token"),
&mtt_rx_token, sizeof (mtt_rx_token));
objdb->object_key_replace (stats->mrp->srp->hdr.handle,
"avg_token_workload", strlen("avg_token_workload"),
&avg_token_holdtime, sizeof (avg_token_holdtime));
objdb->object_key_replace (stats->mrp->srp->hdr.handle,
"avg_backlog_calc", strlen("avg_backlog_calc"),
&avg_backlog_calc, sizeof (avg_backlog_calc));
}
cs_ipcs_stats_update();
api->timer_add_duration (1500 * MILLI_2_NANO_SECONDS, NULL,
corosync_totem_stats_updater,
&corosync_stats_timer_handle);
}
static void corosync_totem_stats_init (void)
{
totempg_stats_t * stats;
hdb_handle_t object_find_handle;
hdb_handle_t object_runtime_handle;
hdb_handle_t object_totem_handle;
uint32_t zero_32 = 0;
uint64_t zero_64 = 0;
stats = api->totem_get_stats();
objdb->object_find_create (
OBJECT_PARENT_HANDLE,
"runtime",
strlen ("runtime"),
&object_find_handle);
if (objdb->object_find_next (object_find_handle,
&object_runtime_handle) == 0) {
objdb->object_create (object_runtime_handle,
&object_totem_handle,
"totem", strlen ("totem"));
objdb->object_create (object_totem_handle,
&stats->hdr.handle,
"pg", strlen ("pg"));
objdb->object_create (stats->hdr.handle,
&stats->mrp->hdr.handle,
"mrp", strlen ("mrp"));
objdb->object_create (stats->mrp->hdr.handle,
&stats->mrp->srp->hdr.handle,
"srp", strlen ("srp"));
objdb->object_key_create_typed (stats->hdr.handle,
"msg_reserved", &stats->msg_reserved,
sizeof (stats->msg_reserved), OBJDB_VALUETYPE_UINT32);
objdb->object_key_create_typed (stats->hdr.handle,
"msg_queue_avail", &stats->msg_queue_avail,
sizeof (stats->msg_queue_avail), OBJDB_VALUETYPE_UINT32);
/* Members object */
objdb->object_create (stats->mrp->srp->hdr.handle,
&object_memb_handle,
"members", strlen ("members"));
objdb->object_key_create_typed (stats->mrp->srp->hdr.handle,
"orf_token_tx", &stats->mrp->srp->orf_token_tx,
sizeof (stats->mrp->srp->orf_token_tx), OBJDB_VALUETYPE_UINT64);
objdb->object_key_create_typed (stats->mrp->srp->hdr.handle,
"orf_token_rx", &stats->mrp->srp->orf_token_rx,
sizeof (stats->mrp->srp->orf_token_rx), OBJDB_VALUETYPE_UINT64);
objdb->object_key_create_typed (stats->mrp->srp->hdr.handle,
"memb_merge_detect_tx", &stats->mrp->srp->memb_merge_detect_tx,
sizeof (stats->mrp->srp->memb_merge_detect_tx), OBJDB_VALUETYPE_UINT64);
objdb->object_key_create_typed (stats->mrp->srp->hdr.handle,
"memb_merge_detect_rx", &stats->mrp->srp->memb_merge_detect_rx,
sizeof (stats->mrp->srp->memb_merge_detect_rx), OBJDB_VALUETYPE_UINT64);
objdb->object_key_create_typed (stats->mrp->srp->hdr.handle,
"memb_join_tx", &stats->mrp->srp->memb_join_tx,
sizeof (stats->mrp->srp->memb_join_tx), OBJDB_VALUETYPE_UINT64);
objdb->object_key_create_typed (stats->mrp->srp->hdr.handle,
"memb_join_rx", &stats->mrp->srp->memb_join_rx,
sizeof (stats->mrp->srp->memb_join_rx), OBJDB_VALUETYPE_UINT64);
objdb->object_key_create_typed (stats->mrp->srp->hdr.handle,
"mcast_tx", &stats->mrp->srp->mcast_tx,
sizeof (stats->mrp->srp->mcast_tx), OBJDB_VALUETYPE_UINT64);
objdb->object_key_create_typed (stats->mrp->srp->hdr.handle,
"mcast_retx", &stats->mrp->srp->mcast_retx,
sizeof (stats->mrp->srp->mcast_retx), OBJDB_VALUETYPE_UINT64);
objdb->object_key_create_typed (stats->mrp->srp->hdr.handle,
"mcast_rx", &stats->mrp->srp->mcast_rx,
sizeof (stats->mrp->srp->mcast_rx), OBJDB_VALUETYPE_UINT64);
objdb->object_key_create_typed (stats->mrp->srp->hdr.handle,
"memb_commit_token_tx", &stats->mrp->srp->memb_commit_token_tx,
sizeof (stats->mrp->srp->memb_commit_token_tx), OBJDB_VALUETYPE_UINT64);
objdb->object_key_create_typed (stats->mrp->srp->hdr.handle,
"memb_commit_token_rx", &stats->mrp->srp->memb_commit_token_rx,
sizeof (stats->mrp->srp->memb_commit_token_rx), OBJDB_VALUETYPE_UINT64);
objdb->object_key_create_typed (stats->mrp->srp->hdr.handle,
"token_hold_cancel_tx", &stats->mrp->srp->token_hold_cancel_tx,
sizeof (stats->mrp->srp->token_hold_cancel_tx), OBJDB_VALUETYPE_UINT64);
objdb->object_key_create_typed (stats->mrp->srp->hdr.handle,
"token_hold_cancel_rx", &stats->mrp->srp->token_hold_cancel_rx,
sizeof (stats->mrp->srp->token_hold_cancel_rx), OBJDB_VALUETYPE_UINT64);
objdb->object_key_create_typed (stats->mrp->srp->hdr.handle,
"operational_entered", &stats->mrp->srp->operational_entered,
sizeof (stats->mrp->srp->operational_entered), OBJDB_VALUETYPE_UINT64);
objdb->object_key_create_typed (stats->mrp->srp->hdr.handle,
"operational_token_lost", &stats->mrp->srp->operational_token_lost,
sizeof (stats->mrp->srp->operational_token_lost), OBJDB_VALUETYPE_UINT64);
objdb->object_key_create_typed (stats->mrp->srp->hdr.handle,
"gather_entered", &stats->mrp->srp->gather_entered,
sizeof (stats->mrp->srp->gather_entered), OBJDB_VALUETYPE_UINT64);
objdb->object_key_create_typed (stats->mrp->srp->hdr.handle,
"gather_token_lost", &stats->mrp->srp->gather_token_lost,
sizeof (stats->mrp->srp->gather_token_lost), OBJDB_VALUETYPE_UINT64);
objdb->object_key_create_typed (stats->mrp->srp->hdr.handle,
"commit_entered", &stats->mrp->srp->commit_entered,
sizeof (stats->mrp->srp->commit_entered), OBJDB_VALUETYPE_UINT64);
objdb->object_key_create_typed (stats->mrp->srp->hdr.handle,
"commit_token_lost", &stats->mrp->srp->commit_token_lost,
sizeof (stats->mrp->srp->commit_token_lost), OBJDB_VALUETYPE_UINT64);
objdb->object_key_create_typed (stats->mrp->srp->hdr.handle,
"recovery_entered", &stats->mrp->srp->recovery_entered,
sizeof (stats->mrp->srp->recovery_entered), OBJDB_VALUETYPE_UINT64);
objdb->object_key_create_typed (stats->mrp->srp->hdr.handle,
"recovery_token_lost", &stats->mrp->srp->recovery_token_lost,
sizeof (stats->mrp->srp->recovery_token_lost), OBJDB_VALUETYPE_UINT64);
objdb->object_key_create_typed (stats->mrp->srp->hdr.handle,
"consensus_timeouts", &stats->mrp->srp->consensus_timeouts,
sizeof (stats->mrp->srp->consensus_timeouts), OBJDB_VALUETYPE_UINT64);
objdb->object_key_create_typed (stats->mrp->srp->hdr.handle,
"mtt_rx_token", &zero_32,
sizeof (zero_32), OBJDB_VALUETYPE_UINT32);
objdb->object_key_create_typed (stats->mrp->srp->hdr.handle,
"avg_token_workload", &zero_32,
sizeof (zero_32), OBJDB_VALUETYPE_UINT32);
objdb->object_key_create_typed (stats->mrp->srp->hdr.handle,
"avg_backlog_calc", &zero_32,
sizeof (zero_32), OBJDB_VALUETYPE_UINT32);
objdb->object_key_create_typed (stats->mrp->srp->hdr.handle,
"rx_msg_dropped", &zero_64,
sizeof (zero_64), OBJDB_VALUETYPE_UINT64);
objdb->object_key_create_typed (stats->mrp->srp->hdr.handle,
"continuous_gather", &zero_32,
sizeof (zero_32), OBJDB_VALUETYPE_UINT32);
objdb->object_key_create_typed (stats->mrp->srp->hdr.handle,
"firewall_enabled_or_nic_failure", &zero_32,
sizeof (zero_32), OBJDB_VALUETYPE_UINT32);
}
/* start stats timer */
api->timer_add_duration (1500 * MILLI_2_NANO_SECONDS, NULL,
corosync_totem_stats_updater,
&corosync_stats_timer_handle);
}
static void deliver_fn (
unsigned int nodeid,
const void *msg,
unsigned int msg_len,
int endian_conversion_required)
{
const struct qb_ipc_request_header *header;
int32_t service;
int32_t fn_id;
uint32_t id;
uint32_t key_incr_dummy;
header = msg;
if (endian_conversion_required) {
id = swab32 (header->id);
} else {
id = header->id;
}
/*
* Call the proper executive handler
*/
service = id >> 16;
fn_id = id & 0xffff;
if (ais_service[service] == NULL && service == EVT_SERVICE) {
evil_deliver_fn (nodeid, service, fn_id, msg,
endian_conversion_required);
}
if (!ais_service[service]) {
return;
}
if (fn_id >= ais_service[service]->exec_engine_count) {
log_printf(LOGSYS_LEVEL_WARNING, "discarded unknown message %d for service %d (max id %d)",
fn_id, service, ais_service[service]->exec_engine_count);
return;
}
objdb->object_key_increment (service_stats_handle[service][fn_id],
"rx", strlen("rx"),
&key_incr_dummy);
if (endian_conversion_required) {
assert(ais_service[service]->exec_engine[fn_id].exec_endian_convert_fn != NULL);
ais_service[service]->exec_engine[fn_id].exec_endian_convert_fn
((void *)msg);
}
ais_service[service]->exec_engine[fn_id].exec_handler_fn
(msg, nodeid);
}
void main_get_config_modules(struct config_iface_ver0 ***modules, int *num)
{
*modules = config_modules;
*num = num_config_modules;
}
int main_mcast (
const struct iovec *iovec,
unsigned int iov_len,
unsigned int guarantee)
{
const struct qb_ipc_request_header *req = iovec->iov_base;
int32_t service;
int32_t fn_id;
uint32_t key_incr_dummy;
service = req->id >> 16;
fn_id = req->id & 0xffff;
if (ais_service[service]) {
objdb->object_key_increment (service_stats_handle[service][fn_id],
"tx", strlen("tx"), &key_incr_dummy);
}
return (totempg_groups_mcast_joined (corosync_group_handle, iovec, iov_len, guarantee));
}
static qb_loop_timer_handle recheck_the_q_level_timer;
void corosync_recheck_the_q_level(void *data)
{
totempg_check_q_level(corosync_group_handle);
if (cs_ipcs_q_level_get() == TOTEM_Q_LEVEL_CRITICAL) {
qb_loop_timer_add(cs_poll_handle_get(), QB_LOOP_MED, 1*QB_TIME_NS_IN_MSEC,
NULL, corosync_recheck_the_q_level, &recheck_the_q_level_timer);
}
}
struct sending_allowed_private_data_struct {
int reserved_msgs;
};
int corosync_sending_allowed (
unsigned int service,
unsigned int id,
const void *msg,
void *sending_allowed_private_data)
{
struct sending_allowed_private_data_struct *pd =
(struct sending_allowed_private_data_struct *)sending_allowed_private_data;
struct iovec reserve_iovec;
struct qb_ipc_request_header *header = (struct qb_ipc_request_header *)msg;
int sending_allowed;
reserve_iovec.iov_base = (char *)header;
reserve_iovec.iov_len = header->size;
pd->reserved_msgs = totempg_groups_joined_reserve (
corosync_group_handle,
&reserve_iovec, 1);
if (pd->reserved_msgs == -1) {
return -EINVAL;
}
sending_allowed = QB_FALSE;
if (corosync_quorum_is_quorate() == 1 ||
ais_service[service]->allow_inquorate == CS_LIB_ALLOW_INQUORATE) {
// we are quorate
// now check flow control
if (ais_service[service]->lib_engine[id].flow_control == CS_LIB_FLOW_CONTROL_NOT_REQUIRED) {
sending_allowed = QB_TRUE;
} else if (pd->reserved_msgs && sync_in_process == 0) {
sending_allowed = QB_TRUE;
} else if (pd->reserved_msgs == 0) {
return -ENOBUFS;
} else /* (sync_in_process) */ {
return -EINPROGRESS;
}
} else {
return -EHOSTUNREACH;
}
return (sending_allowed);
}
void corosync_sending_allowed_release (void *sending_allowed_private_data)
{
struct sending_allowed_private_data_struct *pd =
(struct sending_allowed_private_data_struct *)sending_allowed_private_data;
if (pd->reserved_msgs == -1) {
return;
}
totempg_groups_joined_release (pd->reserved_msgs);
}
int message_source_is_local (const mar_message_source_t *source)
{
int ret = 0;
assert (source != NULL);
if (source->nodeid == totempg_my_nodeid_get ()) {
ret = 1;
}
return ret;
}
void message_source_set (
mar_message_source_t *source,
void *conn)
{
assert ((source != NULL) && (conn != NULL));
memset (source, 0, sizeof (mar_message_source_t));
source->nodeid = totempg_my_nodeid_get ();
source->conn = conn;
}
static void corosync_setscheduler (void)
{
#if defined(HAVE_PTHREAD_SETSCHEDPARAM) && defined(HAVE_SCHED_GET_PRIORITY_MAX) && defined(HAVE_SCHED_SETSCHEDULER)
int res;
sched_priority = sched_get_priority_max (SCHED_RR);
if (sched_priority != -1) {
global_sched_param.sched_priority = sched_priority;
res = sched_setscheduler (0, SCHED_RR, &global_sched_param);
if (res == -1) {
LOGSYS_PERROR(errno, LOGSYS_LEVEL_WARNING,
"Could not set SCHED_RR at priority %d",
global_sched_param.sched_priority);
global_sched_param.sched_priority = 0;
logsys_thread_priority_set (SCHED_OTHER, NULL, 1);
} else {
/*
* Turn on SCHED_RR in logsys system
*/
res = logsys_thread_priority_set (SCHED_RR, &global_sched_param, 10);
if (res == -1) {
log_printf (LOGSYS_LEVEL_ERROR,
"Could not set logsys thread priority."
" Can't continue because of priority inversions.");
corosync_exit_error (AIS_DONE_LOGSETUP);
}
}
} else {
LOGSYS_PERROR (errno, LOGSYS_LEVEL_WARNING,
"Could not get maximum scheduler priority");
sched_priority = 0;
}
#else
log_printf(LOGSYS_LEVEL_WARNING,
"The Platform is missing process priority setting features. Leaving at default.");
#endif
}
+static void
+_logsys_log_printf(int level, int subsys,
+ const char *function_name,
+ const char *file_name,
+ int file_line,
+ const char *format,
+ ...) __attribute__((format(printf, 6, 7)));
+
+static void
+_logsys_log_printf(int level, int subsys,
+ const char *function_name,
+ const char *file_name,
+ int file_line,
+ const char *format, ...)
+{
+ va_list ap;
+ char buf[QB_LOG_MAX_LEN];
+ size_t len;
+
+ va_start(ap, format);
+ len = vsnprintf(buf, sizeof(buf), format, ap);
+ va_end(ap);
+
+ if (buf[len - 1] == '\n') {
+ buf[len - 1] = '\0';
+ len -= 1;
+ }
+
+ qb_log_from_external_source(function_name, file_name,
+ format, level, file_line,
+ subsys, buf);
+}
+
static void fplay_key_change_notify_fn (
object_change_type_t change_type,
hdb_handle_t parent_object_handle,
hdb_handle_t object_handle,
const void *object_name_pt, size_t object_name_len,
const void *key_name_pt, size_t key_len,
const void *key_value_pt, size_t key_value_len,
void *priv_data_pt)
{
if (key_len == strlen ("dump_flight_data") &&
memcmp ("dump_flight_data", key_name_pt, key_len) == 0) {
- logsys_log_rec_store (LOCALSTATEDIR "/lib/corosync/fdata");
+ qb_log_blackbox_write_to_file (LOCALSTATEDIR "/lib/corosync/fdata");
}
if (key_len == strlen ("dump_state") &&
memcmp ("dump_state", key_name_pt, key_len) == 0) {
corosync_state_dump ();
}
}
static void corosync_fplay_control_init (void)
{
hdb_handle_t object_find_handle;
hdb_handle_t object_runtime_handle;
hdb_handle_t object_blackbox_handle;
objdb->object_find_create (OBJECT_PARENT_HANDLE,
"runtime", strlen ("runtime"),
&object_find_handle);
if (objdb->object_find_next (object_find_handle,
&object_runtime_handle) != 0) {
return;
}
objdb->object_create (object_runtime_handle,
&object_blackbox_handle,
"blackbox", strlen ("blackbox"));
objdb->object_key_create_typed (object_blackbox_handle,
"dump_flight_data", "no", strlen("no"),
OBJDB_VALUETYPE_STRING);
objdb->object_key_create_typed (object_blackbox_handle,
"dump_state", "no", strlen("no"),
OBJDB_VALUETYPE_STRING);
objdb->object_track_start (object_blackbox_handle,
OBJECT_TRACK_DEPTH_RECURSIVE,
fplay_key_change_notify_fn,
NULL, NULL, NULL, NULL);
}
static void main_service_ready (void)
{
int res;
/*
* This must occur after totempg is initialized because "this_ip" must be set
*/
res = corosync_service_defaults_link_and_init (api);
if (res == -1) {
log_printf (LOGSYS_LEVEL_ERROR, "Could not initialize default services\n");
corosync_exit_error (AIS_DONE_INIT_SERVICES);
}
evil_init (api);
cs_ipcs_init();
corosync_totem_stats_init ();
corosync_fplay_control_init ();
if (minimum_sync_mode == CS_SYNC_V2) {
log_printf (LOGSYS_LEVEL_NOTICE, "Compatibility mode set to none. Using V2 of the synchronization engine.\n");
sync_v2_init (
corosync_sync_v2_callbacks_retrieve,
corosync_sync_completed);
} else
if (minimum_sync_mode == CS_SYNC_V1) {
log_printf (LOGSYS_LEVEL_NOTICE, "Compatibility mode set to whitetank. Using V1 and V2 of the synchronization engine.\n");
sync_register (
corosync_sync_callbacks_retrieve,
sync_v2_memb_list_determine,
sync_v2_memb_list_abort,
sync_v2_start);
sync_v2_init (
corosync_sync_v2_callbacks_retrieve,
corosync_sync_completed);
}
}
static enum e_ais_done corosync_flock (const char *lockfile, pid_t pid)
{
struct flock lock;
enum e_ais_done err;
char pid_s[17];
int fd_flag;
int lf;
err = AIS_DONE_EXIT;
lf = open (lockfile, O_WRONLY | O_CREAT, 0640);
if (lf == -1) {
log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't create lock file.\n");
return (AIS_DONE_AQUIRE_LOCK);
}
retry_fcntl:
lock.l_type = F_WRLCK;
lock.l_start = 0;
lock.l_whence = SEEK_SET;
lock.l_len = 0;
if (fcntl (lf, F_SETLK, &lock) == -1) {
switch (errno) {
case EINTR:
goto retry_fcntl;
break;
case EAGAIN:
case EACCES:
log_printf (LOGSYS_LEVEL_ERROR, "Another Corosync instance is already running.\n");
err = AIS_DONE_ALREADY_RUNNING;
goto error_close;
break;
default:
log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't aquire lock. Error was %s\n",
strerror(errno));
err = AIS_DONE_AQUIRE_LOCK;
goto error_close;
break;
}
}
if (ftruncate (lf, 0) == -1) {
log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't truncate lock file. Error was %s\n",
strerror (errno));
err = AIS_DONE_AQUIRE_LOCK;
goto error_close_unlink;
}
memset (pid_s, 0, sizeof (pid_s));
snprintf (pid_s, sizeof (pid_s) - 1, "%u\n", pid);
retry_write:
if (write (lf, pid_s, strlen (pid_s)) != strlen (pid_s)) {
if (errno == EINTR) {
goto retry_write;
} else {
log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't write pid to lock file. "
"Error was %s\n", strerror (errno));
err = AIS_DONE_AQUIRE_LOCK;
goto error_close_unlink;
}
}
if ((fd_flag = fcntl (lf, F_GETFD, 0)) == -1) {
log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't get close-on-exec flag from lock file. "
"Error was %s\n", strerror (errno));
err = AIS_DONE_AQUIRE_LOCK;
goto error_close_unlink;
}
fd_flag |= FD_CLOEXEC;
if (fcntl (lf, F_SETFD, fd_flag) == -1) {
log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't set close-on-exec flag to lock file. "
"Error was %s\n", strerror (errno));
err = AIS_DONE_AQUIRE_LOCK;
goto error_close_unlink;
}
return (err);
error_close_unlink:
unlink (lockfile);
error_close:
close (lf);
return (err);
}
int main (int argc, char **argv, char **envp)
{
const char *error_string;
struct totem_config totem_config;
hdb_handle_t objdb_handle;
hdb_handle_t config_handle;
unsigned int config_version = 0;
void *objdb_p;
struct config_iface_ver0 *config;
void *config_p;
const char *config_iface_init;
char *config_iface;
char *iface;
char *strtok_save_pt;
int res, ch;
int background, setprio;
struct stat stat_out;
char corosync_lib_dir[PATH_MAX];
hdb_handle_t object_runtime_handle;
enum e_ais_done flock_err;
/* default configuration
*/
background = 1;
setprio = 1;
while ((ch = getopt (argc, argv, "fpv")) != EOF) {
switch (ch) {
case 'f':
background = 0;
logsys_config_mode_set (NULL, LOGSYS_MODE_OUTPUT_STDERR|LOGSYS_MODE_THREADED|LOGSYS_MODE_FORK);
break;
case 'p':
setprio = 0;
break;
case 'v':
printf ("Corosync Cluster Engine, version '%s'\n", VERSION);
printf ("Copyright (c) 2006-2009 Red Hat, Inc.\n");
return EXIT_SUCCESS;
break;
default:
fprintf(stderr, \
"usage:\n"\
" -f : Start application in foreground.\n"\
" -p : Do not set process priority. \n"\
" -v : Display version and SVN revision of Corosync and exit.\n");
return EXIT_FAILURE;
}
}
/*
* Set round robin realtime scheduling with priority 99
* Lock all memory to avoid page faults which may interrupt
* application healthchecking
*/
if (setprio) {
corosync_setscheduler ();
}
corosync_mlockall ();
log_printf (LOGSYS_LEVEL_NOTICE, "Corosync Cluster Engine ('%s'): started and ready to provide service.\n", VERSION);
log_printf (LOGSYS_LEVEL_INFO, "Corosync built-in features:" PACKAGE_FEATURES "\n");
corosync_poll_handle = qb_loop_create ();
qb_loop_signal_add(corosync_poll_handle, QB_LOOP_LOW,
SIGUSR2, NULL, sig_diag_handler, NULL);
qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH,
SIGINT, NULL, sig_exit_handler, NULL);
qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH,
SIGQUIT, NULL, sig_exit_handler, NULL);
qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH,
SIGTERM, NULL, sig_exit_handler, NULL);
(void)signal (SIGSEGV, sigsegv_handler);
(void)signal (SIGABRT, sigabrt_handler);
#if MSG_NOSIGNAL != 0
(void)signal (SIGPIPE, SIG_IGN);
#endif
/*
* Load the object database interface
*/
res = lcr_ifact_reference (
&objdb_handle,
"objdb",
0,
&objdb_p,
0);
if (res == -1) {
log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't open configuration object database component.\n");
corosync_exit_error (AIS_DONE_OBJDB);
}
objdb = (struct objdb_iface_ver0 *)objdb_p;
objdb->objdb_init ();
/*
* Initialize the corosync_api_v1 definition
*/
apidef_init (objdb);
api = apidef_get ();
num_config_modules = 0;
/*
* Bootstrap in the default configuration parser or use
* the corosync default built in parser if the configuration parser
* isn't overridden
*/
config_iface_init = getenv("COROSYNC_DEFAULT_CONFIG_IFACE");
if (!config_iface_init) {
config_iface_init = "corosync_parser";
}
/* Make a copy so we can deface it with strtok */
if ((config_iface = strdup(config_iface_init)) == NULL) {
log_printf (LOGSYS_LEVEL_ERROR, "exhausted virtual memory");
corosync_exit_error (AIS_DONE_OBJDB);
}
iface = strtok_r(config_iface, ":", &strtok_save_pt);
while (iface)
{
res = lcr_ifact_reference (
&config_handle,
iface,
config_version,
&config_p,
0);
config = (struct config_iface_ver0 *)config_p;
if (res == -1) {
log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't open configuration component '%s'\n", iface);
corosync_exit_error (AIS_DONE_MAINCONFIGREAD);
}
res = config->config_readconfig(objdb, &error_string);
if (res == -1) {
log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string);
corosync_exit_error (AIS_DONE_MAINCONFIGREAD);
}
log_printf (LOGSYS_LEVEL_NOTICE, "%s", error_string);
config_modules[num_config_modules++] = config;
iface = strtok_r(NULL, ":", &strtok_save_pt);
}
free(config_iface);
res = corosync_main_config_read (objdb, &error_string);
if (res == -1) {
/*
* if we are here, we _must_ flush the logsys queue
* and try to inform that we couldn't read the config.
* this is a desperate attempt before certain death
* and there is no guarantee that we can print to stderr
* nor that logsys is sending the messages where we expect.
*/
log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string);
fprintf(stderr, "%s", error_string);
syslog (LOGSYS_LEVEL_ERROR, "%s", error_string);
corosync_exit_error (AIS_DONE_MAINCONFIGREAD);
}
/*
* Make sure required directory is present
*/
sprintf (corosync_lib_dir, "%s/lib/corosync", LOCALSTATEDIR);
res = stat (corosync_lib_dir, &stat_out);
if ((res == -1) || (res == 0 && !S_ISDIR(stat_out.st_mode))) {
log_printf (LOGSYS_LEVEL_ERROR, "Required directory not present %s. Please create it.\n", corosync_lib_dir);
corosync_exit_error (AIS_DONE_DIR_NOT_PRESENT);
}
res = totem_config_read (objdb, &totem_config, &error_string);
if (res == -1) {
log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string);
corosync_exit_error (AIS_DONE_MAINCONFIGREAD);
}
res = totem_config_keyread (objdb, &totem_config, &error_string);
if (res == -1) {
log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string);
corosync_exit_error (AIS_DONE_MAINCONFIGREAD);
}
res = totem_config_validate (&totem_config, &error_string);
if (res == -1) {
log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string);
corosync_exit_error (AIS_DONE_MAINCONFIGREAD);
}
totem_config.totem_logging_configuration = totem_logging_configuration;
- totem_config.totem_logging_configuration.log_subsys_id =
- _logsys_subsys_create ("TOTEM");
-
- if (totem_config.totem_logging_configuration.log_subsys_id < 0) {
- log_printf (LOGSYS_LEVEL_ERROR,
- "Unable to initialize TOTEM logging subsystem\n");
- corosync_exit_error (AIS_DONE_MAINCONFIGREAD);
- }
-
+ totem_config.totem_logging_configuration.log_subsys_id = _logsys_subsys_create("TOTEM", "totem");
totem_config.totem_logging_configuration.log_level_security = LOGSYS_LEVEL_WARNING;
totem_config.totem_logging_configuration.log_level_error = LOGSYS_LEVEL_ERROR;
totem_config.totem_logging_configuration.log_level_warning = LOGSYS_LEVEL_WARNING;
totem_config.totem_logging_configuration.log_level_notice = LOGSYS_LEVEL_NOTICE;
totem_config.totem_logging_configuration.log_level_debug = LOGSYS_LEVEL_DEBUG;
totem_config.totem_logging_configuration.log_printf = _logsys_log_printf;
+ logsys_config_apply();
res = corosync_main_config_compatibility_read (objdb,
&minimum_sync_mode,
&error_string);
if (res == -1) {
log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string);
corosync_exit_error (AIS_DONE_MAINCONFIGREAD);
}
res = corosync_main_config_compatibility_read (objdb,
&minimum_sync_mode,
&error_string);
if (res == -1) {
log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string);
corosync_exit_error (AIS_DONE_MAINCONFIGREAD);
}
/* create the main runtime object */
objdb->object_create (OBJECT_PARENT_HANDLE,
&object_runtime_handle,
"runtime", strlen ("runtime"));
/*
* Now we are fully initialized.
*/
if (background) {
corosync_tty_detach ();
}
- logsys_fork_completed();
+ qb_log_thread_start();
if ((flock_err = corosync_flock (corosync_lock_file, getpid ())) != AIS_DONE_EXIT) {
corosync_exit_error (flock_err);
}
/*
* if totempg_initialize doesn't have root priveleges, it cannot
* bind to a specific interface. This only matters if
* there is more then one interface in a system, so
* in this case, only a warning is printed
*/
/*
* Join multicast group and setup delivery
* and configuration change functions
*/
totempg_initialize (
corosync_poll_handle,
&totem_config);
totempg_service_ready_register (
main_service_ready);
totempg_groups_initialize (
&corosync_group_handle,
deliver_fn,
confchg_fn);
totempg_groups_join (
corosync_group_handle,
&corosync_group,
1);
/*
* Drop root privleges to user 'ais'
* TODO: Don't really need full root capabilities;
* needed capabilities are:
* CAP_NET_RAW (bindtodevice)
* CAP_SYS_NICE (setscheduler)
* CAP_IPC_LOCK (mlockall)
*/
priv_drop ();
schedwrk_init (
serialize_lock,
serialize_unlock);
/*
* Start main processing loop
*/
qb_loop_run (corosync_poll_handle);
/*
* Exit was requested
*/
totempg_finalize ();
/*
* Remove pid lock file
*/
unlink (corosync_lock_file);
corosync_exit_error (AIS_DONE_EXIT);
return EXIT_SUCCESS;
}
diff --git a/exec/mainconfig.c b/exec/mainconfig.c
index 7d4e520b..2db9a53d 100644
--- a/exec/mainconfig.c
+++ b/exec/mainconfig.c
@@ -1,765 +1,766 @@
/*
* Copyright (c) 2002-2005 MontaVista Software, Inc.
* Copyright (c) 2006-2009 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Steven Dake (sdake@redhat.com)
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <config.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <errno.h>
#include <unistd.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <pwd.h>
#include <grp.h>
#include <limits.h>
#include <corosync/corotypes.h>
#include <corosync/list.h>
#include <corosync/totem/totem.h>
#include <corosync/engine/logsys.h>
#include "util.h"
#include "mainconfig.h"
static char error_string_response[512];
static struct objdb_iface_ver0 *global_objdb;
DECLARE_LIST_INIT(uidgid_list_head);
/* This just makes the code below a little neater */
static inline int objdb_get_string (
const struct objdb_iface_ver0 *objdb,
hdb_handle_t object_service_handle,
const char *key, char **value)
{
int res;
*value = NULL;
if ( !(res = objdb->object_key_get (object_service_handle,
key,
strlen (key),
(void *)value,
NULL))) {
if (*value) {
return 0;
}
}
return -1;
}
static inline void objdb_get_int (
const struct objdb_iface_ver0 *objdb,
hdb_handle_t object_service_handle,
char *key, unsigned int *intvalue)
{
char *value = NULL;
if (!objdb->object_key_get (object_service_handle,
key,
strlen (key),
(void *)&value,
NULL)) {
if (value) {
*intvalue = atoi(value);
}
}
}
/**
* insert_into_buffer
* @target_buffer: a buffer where to write results
* @bufferlen: tell us the size of the buffer to avoid overflows
* @entry: entry that needs to be added to the buffer
* @after: can either be NULL or set to a string.
* if NULL, @entry is prependend to logsys_format_get buffer.
* if set, @entry is added immediately after @after.
*
* Since the function is specific to logsys_format_get handling, it is implicit
* that source is logsys_format_get();
*
* In case of failure, target_buffer could be left dirty. So don't trust
* any data leftover in it.
*
* Searching for "after" assumes that there is only entry of "after"
* in the source. Afterall we control the string here and for logging format
* it makes little to no sense to have duplicate format entries.
*
* Returns: 0 on success, -1 on failure
**/
static int insert_into_buffer(
char *target_buffer,
size_t bufferlen,
const char *entry,
const char *after)
{
const char *current_format = NULL;
current_format = logsys_format_get();
/* if the entry is already in the format we don't add it again */
if (strstr(current_format, entry) != NULL) {
return -1;
}
/* if there is no "after", simply prepend the requested entry
* otherwise go for beautiful string manipulation.... </sarcasm> */
if (!after) {
if (snprintf(target_buffer, bufferlen - 1, "%s%s",
entry,
current_format) >= bufferlen - 1) {
return -1;
}
} else {
const char *afterpos;
size_t afterlen;
size_t templen;
/* check if after is contained in the format
* and afterlen has a meaning or return an error */
afterpos = strstr(current_format, after);
afterlen = strlen(after);
if ((!afterpos) || (!afterlen)) {
return -1;
}
templen = afterpos - current_format + afterlen;
if (snprintf(target_buffer, templen + 1, "%s", current_format)
>= bufferlen - 1) {
return -1;
}
if (snprintf(target_buffer + templen, bufferlen - ( templen + 1 ),
"%s%s", entry, current_format + templen)
>= bufferlen - ( templen + 1 )) {
return -1;
}
}
return 0;
}
/*
* format set is the only global specific option that
* doesn't apply at system/subsystem level.
*/
static int corosync_main_config_format_set (
struct objdb_iface_ver0 *objdb,
hdb_handle_t object_handle,
const char **error_string)
{
const char *error_reason;
char new_format_buffer[PATH_MAX];
char *value;
int err = 0;
if (!objdb_get_string (objdb,object_handle, "fileline", &value)) {
if (strcmp (value, "on") == 0) {
if (!insert_into_buffer(new_format_buffer,
sizeof(new_format_buffer),
- " %f:%l", "s]")) {
+ " %f:%l", "g]")) {
err = logsys_format_set(new_format_buffer);
} else
if (!insert_into_buffer(new_format_buffer,
sizeof(new_format_buffer),
"%f:%l", NULL)) {
err = logsys_format_set(new_format_buffer);
}
} else
if (strcmp (value, "off") == 0) {
/* nothing to do here */
} else {
error_reason = "unknown value for fileline";
goto parse_error;
}
}
if (!objdb_get_string (objdb,object_handle, "function_name", &value)) {
if (strcmp (value, "on") == 0) {
if (!insert_into_buffer(new_format_buffer,
sizeof(new_format_buffer),
"%n:", "f:")) {
err = logsys_format_set(new_format_buffer);
} else
if (!insert_into_buffer(new_format_buffer,
sizeof(new_format_buffer),
- " %n", "s]")) {
+ " %n", "g]")) {
err = logsys_format_set(new_format_buffer);
}
} else
if (strcmp (value, "off") == 0) {
/* nothing to do here */
} else {
error_reason = "unknown value for function_name";
goto parse_error;
}
}
if (!objdb_get_string (objdb,object_handle, "timestamp", &value)) {
if (strcmp (value, "on") == 0) {
if(!insert_into_buffer(new_format_buffer,
sizeof(new_format_buffer),
"%t ", NULL)) {
err = logsys_format_set(new_format_buffer);
}
} else
if (strcmp (value, "off") == 0) {
/* nothing to do here */
} else {
error_reason = "unknown value for timestamp";
goto parse_error;
}
}
if (err) {
error_reason = "exhausted virtual memory";
goto parse_error;
}
return (0);
parse_error:
*error_string = error_reason;
return (-1);
}
static int corosync_main_config_log_destination_set (
struct objdb_iface_ver0 *objdb,
hdb_handle_t object_handle,
const char *subsys,
const char **error_string,
const char *objdb_key,
unsigned int mode_mask,
char deprecated,
const char *replacement)
{
static char formatted_error_reason[128];
char *value;
unsigned int mode;
if (!objdb_get_string (objdb, object_handle, objdb_key, &value)) {
if (deprecated) {
log_printf(LOGSYS_LEVEL_WARNING,
"Warning: the %s config paramater has been obsoleted."
" See corosync.conf man page %s directive.",
objdb_key, replacement);
}
mode = logsys_config_mode_get (subsys);
if (strcmp (value, "yes") == 0 || strcmp (value, "on") == 0) {
mode |= mode_mask;
if (logsys_config_mode_set(subsys, mode) < 0) {
sprintf (formatted_error_reason, "unable to set mode %s", objdb_key);
*error_string = formatted_error_reason;
return -1;
}
} else
if (strcmp (value, "no") == 0 || strcmp (value, "off") == 0) {
mode &= ~mode_mask;
if (logsys_config_mode_set(subsys, mode) < 0) {
sprintf (formatted_error_reason, "unable to unset mode %s", objdb_key);
*error_string = formatted_error_reason;
return -1;
}
} else {
sprintf (formatted_error_reason, "unknown value for %s", objdb_key);
*error_string = formatted_error_reason;
return -1;
}
}
return 0;
}
static int corosync_main_config_set (
struct objdb_iface_ver0 *objdb,
hdb_handle_t object_handle,
const char *subsys,
const char **error_string)
{
const char *error_reason = error_string_response;
char *value;
int mode;
/*
* this bit abuses the internal logsys exported API
* to guarantee that all configured subsystems are
* initialized too.
*
* using this approach avoids some headaches caused
* by IPC and TOTEM that have a special logging
* handling requirements
*/
if (subsys != NULL) {
- if (_logsys_subsys_create(subsys) < 0) {
+ if (_logsys_subsys_create(subsys, NULL) < 0) {
error_reason = "unable to create new logging subsystem";
goto parse_error;
}
}
mode = logsys_config_mode_get(subsys);
if (mode < 0) {
error_reason = "unable to get mode";
goto parse_error;
}
if (corosync_main_config_log_destination_set (objdb, object_handle, subsys, &error_reason,
"to_logfile", LOGSYS_MODE_OUTPUT_FILE, 0, NULL) != 0)
goto parse_error;
if (corosync_main_config_log_destination_set (objdb, object_handle, subsys, &error_reason,
"to_stderr", LOGSYS_MODE_OUTPUT_STDERR, 0, NULL) != 0)
goto parse_error;
if (corosync_main_config_log_destination_set (objdb, object_handle, subsys, &error_reason,
"to_syslog", LOGSYS_MODE_OUTPUT_SYSLOG, 0, NULL) != 0)
goto parse_error;
if (corosync_main_config_log_destination_set (objdb, object_handle, subsys, &error_reason,
"to_file", LOGSYS_MODE_OUTPUT_FILE, 1, "to_logfile") != 0)
goto parse_error;
if (!objdb_get_string (objdb,object_handle, "syslog_facility", &value)) {
int syslog_facility;
syslog_facility = logsys_facility_id_get(value);
if (syslog_facility < 0) {
error_reason = "unknown syslog facility specified";
goto parse_error;
}
if (logsys_config_syslog_facility_set(subsys,
syslog_facility) < 0) {
error_reason = "unable to set syslog facility";
goto parse_error;
}
}
if (!objdb_get_string (objdb,object_handle, "syslog_level", &value)) {
int syslog_priority;
log_printf(LOGSYS_LEVEL_WARNING,
"Warning: the syslog_level config paramater has been obsoleted."
" See corosync.conf man page syslog_priority directive.");
syslog_priority = logsys_priority_id_get(value);
if (syslog_priority < 0) {
error_reason = "unknown syslog level specified";
goto parse_error;
}
if (logsys_config_syslog_priority_set(subsys,
syslog_priority) < 0) {
error_reason = "unable to set syslog level";
goto parse_error;
}
}
if (!objdb_get_string (objdb,object_handle, "syslog_priority", &value)) {
int syslog_priority;
syslog_priority = logsys_priority_id_get(value);
if (syslog_priority < 0) {
error_reason = "unknown syslog priority specified";
goto parse_error;
}
if (logsys_config_syslog_priority_set(subsys,
syslog_priority) < 0) {
error_reason = "unable to set syslog priority";
goto parse_error;
}
}
if (!objdb_get_string (objdb,object_handle, "logfile", &value)) {
if (logsys_config_file_set (subsys, error_string, value) < 0) {
goto parse_error;
}
}
if (!objdb_get_string (objdb,object_handle, "logfile_priority", &value)) {
int logfile_priority;
logfile_priority = logsys_priority_id_get(value);
if (logfile_priority < 0) {
error_reason = "unknown logfile priority specified";
goto parse_error;
}
if (logsys_config_logfile_priority_set(subsys,
logfile_priority) < 0) {
error_reason = "unable to set logfile priority";
goto parse_error;
}
}
if (!objdb_get_string (objdb, object_handle, "debug", &value)) {
if (strcmp (value, "on") == 0) {
if (logsys_config_debug_set (subsys, 1) < 0) {
error_reason = "unable to set debug on";
goto parse_error;
}
} else
if (strcmp (value, "off") == 0) {
if (logsys_config_debug_set (subsys, 0) < 0) {
error_reason = "unable to set debug off";
goto parse_error;
}
} else {
error_reason = "unknown value for debug";
goto parse_error;
}
}
return (0);
parse_error:
*error_string = error_reason;
return (-1);
}
static int corosync_main_config_read_logging (
struct objdb_iface_ver0 *objdb,
const char **error_string)
{
hdb_handle_t object_service_handle;
hdb_handle_t object_logger_subsys_handle;
hdb_handle_t object_find_handle;
hdb_handle_t object_find_logsys_handle;
const char *error_reason;
char *value;
objdb->object_find_create (
OBJECT_PARENT_HANDLE,
"logging",
strlen ("logging"),
&object_find_handle);
if (objdb->object_find_next (
object_find_handle,
&object_service_handle) == 0) {
/* format set is supported only for toplevel */
if (corosync_main_config_format_set (objdb,
object_service_handle,
&error_reason) < 0) {
goto parse_error;
}
if (corosync_main_config_set (objdb,
object_service_handle,
NULL,
&error_reason) < 0) {
goto parse_error;
}
/* we will need 2 of these to compensate for new logging
* config format */
objdb->object_find_create (
object_service_handle,
"logger_subsys",
strlen ("logger_subsys"),
&object_find_logsys_handle);
while (objdb->object_find_next (
object_find_logsys_handle,
&object_logger_subsys_handle) == 0) {
if (!objdb_get_string (objdb,
object_logger_subsys_handle,
"subsys", &value)) {
if (corosync_main_config_set (objdb,
object_logger_subsys_handle,
value,
&error_reason) < 0) {
goto parse_error;
}
}
else {
error_reason = "subsys required for logger directive";
goto parse_error;
}
}
objdb->object_find_destroy (object_find_logsys_handle);
objdb->object_find_create (
object_service_handle,
"logging_daemon",
strlen ("logging_daemon"),
&object_find_logsys_handle);
while (objdb->object_find_next (
object_find_logsys_handle,
&object_logger_subsys_handle) == 0) {
if (!objdb_get_string (objdb,
object_logger_subsys_handle,
"name", &value)) {
if (strcmp(value, "corosync") == 0) {
if (!objdb_get_string (objdb,
object_logger_subsys_handle,
"subsys", &value)) {
if (corosync_main_config_set (objdb,
object_logger_subsys_handle,
value,
&error_reason) < 0) {
goto parse_error;
}
}
else {
if (corosync_main_config_set (objdb,
object_logger_subsys_handle,
NULL,
&error_reason) < 0) {
goto parse_error;
}
}
}
}
else {
error_reason = "name required for logging_daemon directive";
goto parse_error;
}
}
objdb->object_find_destroy (object_find_logsys_handle);
}
objdb->object_find_destroy (object_find_handle);
+ logsys_config_apply();
return 0;
parse_error:
*error_string = error_reason;
return (-1);
}
static int uid_determine (const char *req_user)
{
int pw_uid = 0;
struct passwd passwd;
struct passwd* pwdptr = &passwd;
struct passwd* temp_pwd_pt;
char *pwdbuffer;
int pwdlinelen;
pwdlinelen = sysconf (_SC_GETPW_R_SIZE_MAX);
if (pwdlinelen == -1) {
pwdlinelen = 256;
}
pwdbuffer = malloc (pwdlinelen);
if ((getpwnam_r (req_user, pwdptr, pwdbuffer, pwdlinelen, &temp_pwd_pt)) != 0) {
log_printf (LOGSYS_LEVEL_ERROR,
"ERROR: The '%s' user is not found in /etc/passwd, please read the documentation.\n",
req_user);
corosync_exit_error (AIS_DONE_UID_DETERMINE);
}
pw_uid = passwd.pw_uid;
free (pwdbuffer);
return pw_uid;
}
static int gid_determine (const char *req_group)
{
int ais_gid = 0;
struct group group;
struct group * grpptr = &group;
struct group * temp_grp_pt;
char *grpbuffer;
int grplinelen;
grplinelen = sysconf (_SC_GETGR_R_SIZE_MAX);
if (grplinelen == -1) {
grplinelen = 256;
}
grpbuffer = malloc (grplinelen);
if ((getgrnam_r (req_group, grpptr, grpbuffer, grplinelen, &temp_grp_pt)) != 0) {
log_printf (LOGSYS_LEVEL_ERROR,
"ERROR: The '%s' group is not found in /etc/group, please read the documentation.\n",
req_group);
corosync_exit_error (AIS_DONE_GID_DETERMINE);
}
ais_gid = group.gr_gid;
free (grpbuffer);
return ais_gid;
}
static void main_objdb_reload_notify(objdb_reload_notify_type_t type, int flush,
void *priv_data_pt)
{
const char *error_string;
if (type == OBJDB_RELOAD_NOTIFY_END) {
/*
* Reload the logsys configuration
*/
if (logsys_format_set(NULL) == -1) {
fprintf (stderr, "Unable to setup logging format.\n");
}
corosync_main_config_read_logging(global_objdb,
&error_string);
}
}
static void add_logsys_config_notification(
struct objdb_iface_ver0 *objdb)
{
global_objdb = objdb;
objdb->object_track_start(OBJECT_PARENT_HANDLE,
1,
NULL,
NULL,
NULL,
main_objdb_reload_notify,
NULL);
}
static int corosync_main_config_read_uidgid (
struct objdb_iface_ver0 *objdb,
const char **error_string)
{
hdb_handle_t object_find_handle;
hdb_handle_t object_service_handle;
char *value;
int uid, gid;
struct uidgid_item *ugi;
objdb->object_find_create (
OBJECT_PARENT_HANDLE,
"uidgid",
strlen ("uidgid"),
&object_find_handle);
while (objdb->object_find_next (
object_find_handle,
&object_service_handle) == 0) {
uid = -1;
gid = -1;
if (!objdb_get_string (objdb,object_service_handle, "uid", &value)) {
uid = uid_determine(value);
}
if (!objdb_get_string (objdb,object_service_handle, "gid", &value)) {
gid = gid_determine(value);
}
if (uid > -1 || gid > -1) {
ugi = malloc (sizeof (*ugi));
if (ugi == NULL) {
_corosync_out_of_memory_error();
}
ugi->uid = uid;
ugi->gid = gid;
list_init (&ugi->list);
list_add (&ugi->list, &uidgid_list_head);
}
}
objdb->object_find_destroy (object_find_handle);
return 0;
}
int corosync_main_config_read (
struct objdb_iface_ver0 *objdb,
const char **error_string)
{
const char *error_reason = error_string_response;
if (corosync_main_config_read_logging(objdb, error_string) < 0) {
error_reason = *error_string;
goto parse_error;
}
corosync_main_config_read_uidgid (objdb, error_string);
add_logsys_config_notification(objdb);
return 0;
parse_error:
snprintf (error_string_response, sizeof(error_string_response),
"parse error in config: %s.\n",
error_reason);
*error_string = error_string_response;
return (-1);
}
int corosync_main_config_compatibility_read (
struct objdb_iface_ver0 *objdb,
enum cs_sync_mode *minimum_sync_mode,
const char **error_string)
{
const char *error_reason = error_string_response;
char *value;
*minimum_sync_mode = CS_SYNC_V1;
if (!objdb_get_string (objdb, OBJECT_PARENT_HANDLE, "compatibility", &value)) {
if (strcmp (value, "whitetank") == 0) {
*minimum_sync_mode = CS_SYNC_V1;
} else
if (strcmp (value, "none") == 0) {
*minimum_sync_mode = CS_SYNC_V2;
} else {
snprintf (error_string_response, sizeof (error_string_response),
"Invalid compatibility option '%s' specified, must be none or whitetank.\n", value);
goto parse_error;
}
}
return 0;
parse_error:
*error_string = error_reason;
return (-1);
}
diff --git a/exec/service.c b/exec/service.c
index 04353142..2b6fa358 100644
--- a/exec/service.c
+++ b/exec/service.c
@@ -1,728 +1,737 @@
/*
* Copyright (c) 2006 MontaVista Software, Inc.
* Copyright (c) 2006-2009 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Steven Dake (sdake@redhat.com)
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <config.h>
#include <stdlib.h>
#include <string.h>
#include <corosync/lcr/lcr_ifact.h>
#include <corosync/swab.h>
#include <corosync/totem/totem.h>
#include <corosync/corotypes.h>
#include "mainconfig.h"
#include "util.h"
#include <corosync/engine/logsys.h>
#include "timer.h"
#include <corosync/totem/totempg.h>
#include <corosync/totem/totemip.h>
#include "main.h"
#include <corosync/engine/coroapi.h>
#include "service.h"
#include <qb/qbipcs.h>
#include <qb/qbloop.h>
LOGSYS_DECLARE_SUBSYS ("SERV");
struct default_service {
const char *name;
int ver;
};
static struct default_service default_services[] = {
{
.name = "corosync_evs",
.ver = 0,
},
{
.name = "corosync_cfg",
.ver = 0,
},
{
.name = "corosync_cpg",
.ver = 0,
},
{
.name = "corosync_confdb",
.ver = 0,
},
{
.name = "corosync_pload",
.ver = 0,
},
#ifdef HAVE_MONITORING
{
.name = "corosync_mon",
.ver = 0,
},
#endif
#ifdef HAVE_WATCHDOG
{
.name = "corosync_wd",
.ver = 0,
},
#endif
{
.name = "corosync_quorum",
.ver = 0,
}
};
/*
* service exit and unlink schedwrk handler data structure
*/
struct seus_handler_data {
hdb_handle_t service_handle;
int service_engine;
struct corosync_api_v1 *api;
};
struct corosync_service_engine *ais_service[SERVICE_HANDLER_MAXIMUM_COUNT];
hdb_handle_t service_stats_handle[SERVICE_HANDLER_MAXIMUM_COUNT][64];
int ais_service_exiting[SERVICE_HANDLER_MAXIMUM_COUNT];
static hdb_handle_t object_internal_configuration_handle;
static hdb_handle_t object_stats_services_handle;
static void (*service_unlink_all_complete) (void) = NULL;
static unsigned int default_services_requested (struct corosync_api_v1 *corosync_api)
{
hdb_handle_t object_service_handle;
hdb_handle_t object_find_handle;
char *value;
/*
* Don't link default services if they have been disabled
*/
corosync_api->object_find_create (
OBJECT_PARENT_HANDLE,
"aisexec",
strlen ("aisexec"),
&object_find_handle);
if (corosync_api->object_find_next (
object_find_handle,
&object_service_handle) == 0) {
if ( ! corosync_api->object_key_get (object_service_handle,
"defaultservices",
strlen ("defaultservices"),
(void *)&value,
NULL)) {
if (value && strcmp (value, "no") == 0) {
return 0;
}
}
}
corosync_api->object_find_destroy (object_find_handle);
return (-1);
}
unsigned int corosync_service_link_and_init (
struct corosync_api_v1 *corosync_api,
const char *service_name,
unsigned int service_ver)
{
struct corosync_service_engine_iface_ver0 *iface_ver0;
void *iface_ver0_p;
hdb_handle_t handle;
struct corosync_service_engine *service;
int res;
hdb_handle_t object_service_handle;
hdb_handle_t object_stats_handle;
int fn;
char object_name[32];
char *name_sufix;
uint64_t zero_64 = 0;
+ void* _start;
+ void* _stop;
/*
* reference the service interface
*/
iface_ver0_p = NULL;
res = lcr_ifact_reference (
&handle,
service_name,
service_ver,
&iface_ver0_p,
(void *)0);
iface_ver0 = (struct corosync_service_engine_iface_ver0 *)iface_ver0_p;
if (res == -1 || iface_ver0 == 0) {
log_printf(LOGSYS_LEVEL_ERROR, "Service failed to load '%s'.\n", service_name);
return (-1);
}
/*
* Initialize service
*/
service = iface_ver0->corosync_get_service_engine_ver0();
ais_service[service->id] = service;
+
+ /* begin */
+ _start = lcr_ifact_addr_get(handle, "__start___verbose");
+ _stop = lcr_ifact_addr_get(handle, "__stop___verbose");
+ qb_log_callsites_register(_start, _stop);
+ /* end */
+
if (service->config_init_fn) {
res = service->config_init_fn (corosync_api);
}
if (service->exec_init_fn) {
res = service->exec_init_fn (corosync_api);
}
/*
* Store service in object database
*/
corosync_api->object_create (object_internal_configuration_handle,
&object_service_handle,
"service",
strlen ("service"));
corosync_api->object_key_create_typed (object_service_handle,
"name",
service_name,
strlen (service_name) + 1, OBJDB_VALUETYPE_STRING);
corosync_api->object_key_create_typed (object_service_handle,
"ver",
&service_ver,
sizeof (service_ver), OBJDB_VALUETYPE_UINT32);
res = corosync_api->object_key_create_typed (object_service_handle,
"handle",
&handle,
sizeof (handle), OBJDB_VALUETYPE_UINT64);
corosync_api->object_key_create_typed (object_service_handle,
"service_id",
&service->id,
sizeof (service->id), OBJDB_VALUETYPE_UINT16);
name_sufix = strrchr (service_name, '_');
if (name_sufix)
name_sufix++;
else
name_sufix = (char*)service_name;
corosync_api->object_create (object_stats_services_handle,
&object_stats_handle,
name_sufix, strlen (name_sufix));
corosync_api->object_key_create_typed (object_stats_handle,
"service_id",
&service->id, sizeof (service->id),
OBJDB_VALUETYPE_INT16);
for (fn = 0; fn < service->exec_engine_count; fn++) {
snprintf (object_name, 32, "%d", fn);
corosync_api->object_create (object_stats_handle,
&service_stats_handle[service->id][fn],
object_name, strlen (object_name));
corosync_api->object_key_create_typed (service_stats_handle[service->id][fn],
"tx",
&zero_64, sizeof (zero_64),
OBJDB_VALUETYPE_UINT64);
corosync_api->object_key_create_typed (service_stats_handle[service->id][fn],
"rx",
&zero_64, sizeof (zero_64),
OBJDB_VALUETYPE_UINT64);
}
log_printf (LOGSYS_LEVEL_NOTICE,
"Service engine loaded: %s [%d]\n", service->name, service->id);
cs_ipcs_service_init(service);
return (res);
}
static int service_priority_max(void)
{
int lpc = 0, max = 0;
for(; lpc < SERVICE_HANDLER_MAXIMUM_COUNT; lpc++) {
if(ais_service[lpc] != NULL && ais_service[lpc]->priority > max) {
max = ais_service[lpc]->priority;
}
}
return max;
}
/*
* use the force
*/
static unsigned int
corosync_service_unlink_priority (
struct corosync_api_v1 *corosync_api,
int lowest_priority,
int *current_priority,
int *current_service_engine,
hdb_handle_t *current_service_handle)
{
unsigned short *service_id;
hdb_handle_t object_service_handle;
hdb_handle_t object_find_handle;
hdb_handle_t *found_service_handle;
for(; *current_priority >= lowest_priority; *current_priority = *current_priority - 1) {
for(*current_service_engine = 0;
*current_service_engine < SERVICE_HANDLER_MAXIMUM_COUNT;
*current_service_engine = *current_service_engine + 1) {
if(ais_service[*current_service_engine] == NULL ||
ais_service[*current_service_engine]->priority != *current_priority) {
continue;
}
/*
* find service object in object database by service id
* and unload it if possible.
*
* If the service engine's exec_exit_fn returns -1 indicating
* it was busy, this function returns -1 and can be called again
* at a later time (usually via the schedwrk api).
*/
corosync_api->object_find_create (
object_internal_configuration_handle,
"service", strlen ("service"), &object_find_handle);
while (corosync_api->object_find_next (
object_find_handle, &object_service_handle) == 0) {
int res = corosync_api->object_key_get (
object_service_handle,
"service_id", strlen ("service_id"),
(void *)&service_id, NULL);
if (res == 0 && *service_id ==
ais_service[*current_service_engine]->id) {
if (ais_service[*service_id]->exec_exit_fn) {
res = ais_service[*service_id]->exec_exit_fn ();
if (res == -1) {
corosync_api->object_find_destroy (object_find_handle);
return (-1);
}
}
res = corosync_api->object_key_get (
object_service_handle,
"handle", strlen ("handle"),
(void *)&found_service_handle,
NULL);
*current_service_handle = *found_service_handle;
ais_service_exiting[*current_service_engine] = 1;
corosync_api->object_find_destroy (object_find_handle);
/*
* Call should call this function again
*/
return (1);
}
}
corosync_api->object_find_destroy (object_find_handle);
}
}
/*
* We finish unlink of all services -> no need to call this function again
*/
return (0);
}
static unsigned int service_unlink_and_exit (
struct corosync_api_v1 *corosync_api,
const char *service_name,
unsigned int service_ver)
{
hdb_handle_t object_service_handle;
char *found_service_name;
unsigned short *service_id;
unsigned int *found_service_ver;
hdb_handle_t object_find_handle;
hdb_handle_t *found_service_handle;
char *name_sufix;
int res;
name_sufix = strrchr (service_name, '_');
if (name_sufix)
name_sufix++;
else
name_sufix = (char*)service_name;
corosync_api->object_find_create (
object_stats_services_handle,
name_sufix, strlen (name_sufix),
&object_find_handle);
if (corosync_api->object_find_next (
object_find_handle,
&object_service_handle) == 0) {
corosync_api->object_destroy (object_service_handle);
}
corosync_api->object_find_destroy (object_find_handle);
corosync_api->object_find_create (
object_internal_configuration_handle,
"service",
strlen ("service"),
&object_find_handle);
while (corosync_api->object_find_next (
object_find_handle,
&object_service_handle) == 0) {
corosync_api->object_key_get (object_service_handle,
"name",
strlen ("name"),
(void *)&found_service_name,
NULL);
if (strcmp (service_name, found_service_name) != 0) {
continue;
}
corosync_api->object_key_get (object_service_handle,
"ver",
strlen ("ver"),
(void *)&found_service_ver,
NULL);
/*
* If service found and linked exit it
*/
if (service_ver != *found_service_ver) {
continue;
}
corosync_api->object_key_get (
object_service_handle,
"service_id", strlen ("service_id"),
(void *)&service_id, NULL);
if(service_id != NULL
&& *service_id < SERVICE_HANDLER_MAXIMUM_COUNT
&& ais_service[*service_id] != NULL) {
corosync_api->object_find_destroy (object_find_handle);
if (ais_service[*service_id]->exec_exit_fn) {
res = ais_service[*service_id]->exec_exit_fn ();
if (res == -1) {
return (-1);
}
}
log_printf(LOGSYS_LEVEL_NOTICE,
"Service engine unloaded: %s\n",
ais_service[*service_id]->name);
ais_service[*service_id] = NULL;
res = corosync_api->object_key_get (
object_service_handle,
"handle", strlen ("handle"),
(void *)&found_service_handle,
NULL);
cs_ipcs_service_destroy (*service_id);
lcr_ifact_release (*found_service_handle);
corosync_api->object_destroy (object_service_handle);
}
}
corosync_api->object_find_destroy (object_find_handle);
return (0);
}
/*
* Links default services into the executive
*/
unsigned int corosync_service_defaults_link_and_init (struct corosync_api_v1 *corosync_api)
{
unsigned int i;
hdb_handle_t object_service_handle;
char *found_service_name;
char *found_service_ver;
unsigned int found_service_ver_atoi;
hdb_handle_t object_find_handle;
hdb_handle_t object_find2_handle;
hdb_handle_t object_runtime_handle;
corosync_api->object_find_create (
OBJECT_PARENT_HANDLE,
"runtime",
strlen ("runtime"),
&object_find2_handle);
if (corosync_api->object_find_next (
object_find2_handle,
&object_runtime_handle) == 0) {
corosync_api->object_create (object_runtime_handle,
&object_stats_services_handle,
"services", strlen ("services"));
}
corosync_api->object_create (OBJECT_PARENT_HANDLE,
&object_internal_configuration_handle,
"internal_configuration",
strlen ("internal_configuration"));
corosync_api->object_find_create (
OBJECT_PARENT_HANDLE,
"service",
strlen ("service"),
&object_find_handle);
while (corosync_api->object_find_next (
object_find_handle,
&object_service_handle) == 0) {
corosync_api->object_key_get (object_service_handle,
"name",
strlen ("name"),
(void *)&found_service_name,
NULL);
found_service_ver = NULL;
corosync_api->object_key_get (object_service_handle,
"ver",
strlen ("ver"),
(void *)&found_service_ver,
NULL);
found_service_ver_atoi = (found_service_ver ? atoi (found_service_ver) : 0);
corosync_service_link_and_init (
corosync_api,
found_service_name,
found_service_ver_atoi);
}
corosync_api->object_find_destroy (object_find_handle);
if (default_services_requested (corosync_api) == 0) {
return (0);
}
for (i = 0;
i < sizeof (default_services) / sizeof (struct default_service); i++) {
corosync_service_link_and_init (
corosync_api,
default_services[i].name,
default_services[i].ver);
}
return (0);
}
/*
* Declaration of exit_schedwrk_handler, because of cycle
* (service_exit_schedwrk_handler calls service_unlink_schedwrk_handler, and vice-versa)
*/
static void service_exit_schedwrk_handler (void *data);
static void service_unlink_schedwrk_handler (void *data) {
struct seus_handler_data *cb_data = (struct seus_handler_data *)data;
/*
* Exit all ipc connections dependent on this service
*/
if (cs_ipcs_service_destroy (cb_data->service_engine) == -1) {
goto redo_this_function;
}
log_printf(LOGSYS_LEVEL_NOTICE,
"Service engine unloaded: %s\n",
ais_service[cb_data->service_engine]->name);
ais_service[cb_data->service_engine] = NULL;
lcr_ifact_release (cb_data->service_handle);
qb_loop_job_add(cs_poll_handle_get(),
QB_LOOP_HIGH,
data,
service_exit_schedwrk_handler);
return;
redo_this_function:
qb_loop_job_add(cs_poll_handle_get(),
QB_LOOP_HIGH,
data,
service_unlink_schedwrk_handler);
}
static void service_exit_schedwrk_handler (void *data) {
int res;
static int current_priority = 0;
static int current_service_engine = 0;
static int called = 0;
struct seus_handler_data *cb_data = (struct seus_handler_data *)data;
struct corosync_api_v1 *api = (struct corosync_api_v1 *)cb_data->api;
hdb_handle_t service_handle;
if (called == 0) {
log_printf(LOGSYS_LEVEL_NOTICE,
"Unloading all Corosync service engines.\n");
current_priority = service_priority_max ();
called = 1;
}
res = corosync_service_unlink_priority (
api,
0,
&current_priority,
&current_service_engine,
&service_handle);
if (res == 0) {
service_unlink_all_complete();
return;
}
if (res == 1) {
cb_data->service_engine = current_service_engine;
cb_data->service_handle = service_handle;
qb_loop_job_add(cs_poll_handle_get(),
QB_LOOP_HIGH,
data,
service_unlink_schedwrk_handler);
return;
}
qb_loop_job_add(cs_poll_handle_get(),
QB_LOOP_HIGH,
data,
service_exit_schedwrk_handler);
}
void corosync_service_unlink_all (
struct corosync_api_v1 *api,
void (*unlink_all_complete) (void))
{
static int called = 0;
static struct seus_handler_data cb_data;
assert (api);
service_unlink_all_complete = unlink_all_complete;
if (called) {
return;
}
if (called == 0) {
called = 1;
}
cb_data.api = api;
qb_loop_job_add(cs_poll_handle_get(),
QB_LOOP_HIGH,
&cb_data,
service_exit_schedwrk_handler);
}
struct service_unlink_and_exit_data {
hdb_handle_t handle;
struct corosync_api_v1 *api;
const char *name;
unsigned int ver;
};
static void service_unlink_and_exit_schedwrk_handler (void *data)
{
struct service_unlink_and_exit_data *service_unlink_and_exit_data =
data;
int res;
res = service_unlink_and_exit (
service_unlink_and_exit_data->api,
service_unlink_and_exit_data->name,
service_unlink_and_exit_data->ver);
if (res == 0) {
free (service_unlink_and_exit_data);
} else {
qb_loop_job_add(cs_poll_handle_get(),
QB_LOOP_HIGH,
data,
service_unlink_and_exit_schedwrk_handler);
}
}
typedef int (*schedwrk_cast) (const void *);
unsigned int corosync_service_unlink_and_exit (
struct corosync_api_v1 *api,
const char *service_name,
unsigned int service_ver)
{
struct service_unlink_and_exit_data *service_unlink_and_exit_data;
assert (api);
service_unlink_and_exit_data = malloc (sizeof (struct service_unlink_and_exit_data));
service_unlink_and_exit_data->api = api;
service_unlink_and_exit_data->name = strdup (service_name);
service_unlink_and_exit_data->ver = service_ver;
qb_loop_job_add(cs_poll_handle_get(),
QB_LOOP_HIGH,
service_unlink_and_exit_data,
service_unlink_and_exit_schedwrk_handler);
return (0);
}
diff --git a/exec/syncv2.c b/exec/syncv2.c
index efa37785..f9eebacf 100644
--- a/exec/syncv2.c
+++ b/exec/syncv2.c
@@ -1,624 +1,624 @@
/*
* Copyright (c) 2009 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Steven Dake (sdake@redhat.com)
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <config.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <sys/ioctl.h>
#include <netinet/in.h>
#include <sys/uio.h>
#include <unistd.h>
#include <fcntl.h>
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
#include <time.h>
#include <unistd.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <corosync/corotypes.h>
#include <corosync/swab.h>
#include <corosync/totem/totempg.h>
#include <corosync/totem/totem.h>
#include <corosync/lcr/lcr_ifact.h>
#include <corosync/engine/logsys.h>
#include <qb/qbipc_common.h>
#include "schedwrk.h"
#include "quorum.h"
#include "sync.h"
#include "syncv2.h"
LOGSYS_DECLARE_SUBSYS ("SYNCV2");
#define MESSAGE_REQ_SYNC_BARRIER 0
#define MESSAGE_REQ_SYNC_SERVICE_BUILD 1
#define MESSAGE_REQ_SYNC_MEMB_DETERMINE 2
enum sync_process_state {
INIT,
PROCESS,
ACTIVATE
};
enum sync_state {
SYNC_SERVICELIST_BUILD,
SYNC_PROCESS,
SYNC_BARRIER
};
struct service_entry {
int service_id;
int api_version;
union sync_init_api sync_init_api;
void (*sync_abort) (void);
int (*sync_process) (void);
void (*sync_activate) (void);
enum sync_process_state state;
char name[128];
};
struct processor_entry {
int nodeid;
int received;
};
struct req_exec_memb_determine_message {
struct qb_ipc_request_header header __attribute__((aligned(8)));
struct memb_ring_id ring_id __attribute__((aligned(8)));
};
struct req_exec_service_build_message {
struct qb_ipc_request_header header __attribute__((aligned(8)));
struct memb_ring_id ring_id __attribute__((aligned(8)));
int service_list_entries __attribute__((aligned(8)));
int service_list[128] __attribute__((aligned(8)));
};
struct req_exec_barrier_message {
struct qb_ipc_request_header header __attribute__((aligned(8)));
struct memb_ring_id ring_id __attribute__((aligned(8)));
};
static enum sync_state my_state = SYNC_BARRIER;
static struct memb_ring_id my_ring_id;
static struct memb_ring_id my_memb_determine_ring_id;
static int my_memb_determine = 0;
static unsigned int my_memb_determine_list[PROCESSOR_COUNT_MAX];
static unsigned int my_memb_determine_list_entries = 0;
static int my_processing_idx = 0;
static hdb_handle_t my_schedwrk_handle;
static struct processor_entry my_processor_list[PROCESSOR_COUNT_MAX];
static unsigned int my_member_list[PROCESSOR_COUNT_MAX];
static unsigned int my_trans_list[PROCESSOR_COUNT_MAX];
static size_t my_member_list_entries = 0;
static size_t my_trans_list_entries = 0;
static int my_processor_list_entries = 0;
static struct service_entry my_service_list[128];
static int my_service_list_entries = 0;
static const struct memb_ring_id sync_ring_id;
static struct service_entry my_initial_service_list[PROCESSOR_COUNT_MAX];
static int my_initial_service_list_entries;
static void (*sync_synchronization_completed) (void);
static void sync_deliver_fn (
unsigned int nodeid,
const void *msg,
unsigned int msg_len,
int endian_conversion_required);
static int schedwrk_processor (const void *context);
static void sync_process_enter (void);
static struct totempg_group sync_group = {
.group = "syncv2",
.group_len = 6
};
static hdb_handle_t sync_group_handle;
int sync_v2_init (
int (*sync_callbacks_retrieve) (
int service_id,
struct sync_callbacks *callbacks),
void (*synchronization_completed) (void))
{
unsigned int res;
int i;
struct sync_callbacks sync_callbacks;
res = totempg_groups_initialize (
&sync_group_handle,
sync_deliver_fn,
NULL);
if (res == -1) {
log_printf (LOGSYS_LEVEL_ERROR,
- "Couldn't initialize groups interface.\n");
+ "Couldn't initialize groups interface.");
return (-1);
}
res = totempg_groups_join (
sync_group_handle,
&sync_group,
1);
if (res == -1) {
log_printf (LOGSYS_LEVEL_ERROR, "Couldn't join group.\n");
return (-1);
}
sync_synchronization_completed = synchronization_completed;
for (i = 0; i < 64; i++) {
res = sync_callbacks_retrieve (i, &sync_callbacks);
if (res == -1) {
continue;
}
if (sync_callbacks.sync_init_api.sync_init_v1 == NULL) {
continue;
}
my_initial_service_list[my_initial_service_list_entries].state =
INIT;
my_initial_service_list[my_initial_service_list_entries].service_id = i;
strcpy (my_initial_service_list[my_initial_service_list_entries].name,
sync_callbacks.name);
my_initial_service_list[my_initial_service_list_entries].api_version = sync_callbacks.api_version;
my_initial_service_list[my_initial_service_list_entries].sync_init_api = sync_callbacks.sync_init_api;
my_initial_service_list[my_initial_service_list_entries].sync_process = sync_callbacks.sync_process;
my_initial_service_list[my_initial_service_list_entries].sync_abort = sync_callbacks.sync_abort;
my_initial_service_list[my_initial_service_list_entries].sync_activate = sync_callbacks.sync_activate;
my_initial_service_list_entries += 1;
}
return (0);
}
static void sync_barrier_handler (unsigned int nodeid, const void *msg)
{
const struct req_exec_barrier_message *req_exec_barrier_message = msg;
int i;
int barrier_reached = 1;
if (memcmp (&my_ring_id, &req_exec_barrier_message->ring_id,
sizeof (struct memb_ring_id)) != 0) {
log_printf (LOGSYS_LEVEL_DEBUG, "barrier for old ring - discarding\n");
return;
}
for (i = 0; i < my_processor_list_entries; i++) {
if (my_processor_list[i].nodeid == nodeid) {
my_processor_list[i].received = 1;
}
}
for (i = 0; i < my_processor_list_entries; i++) {
if (my_processor_list[i].received == 0) {
barrier_reached = 0;
}
}
if (barrier_reached) {
log_printf (LOGSYS_LEVEL_DEBUG, "Committing synchronization for %s\n",
my_service_list[my_processing_idx].name);
my_service_list[my_processing_idx].state = ACTIVATE;
my_service_list[my_processing_idx].sync_activate ();
my_processing_idx += 1;
if (my_service_list_entries == my_processing_idx) {
my_memb_determine_list_entries = 0;
sync_synchronization_completed ();
} else {
sync_process_enter ();
}
}
}
static void dummy_sync_init (
const unsigned int *member_list,
size_t member_list_entries,
const struct memb_ring_id *ring_id)
{
}
static void dummy_sync_abort (void)
{
}
static int dummy_sync_process (void)
{
return (0);
}
static void dummy_sync_activate (void)
{
}
static int service_entry_compare (const void *a, const void *b)
{
const struct service_entry *service_entry_a = a;
const struct service_entry *service_entry_b = b;
return (service_entry_a->service_id > service_entry_b->service_id);
}
static void sync_memb_determine (unsigned int nodeid, const void *msg)
{
const struct req_exec_memb_determine_message *req_exec_memb_determine_message = msg;
int found = 0;
int i;
if (memcmp (&req_exec_memb_determine_message->ring_id,
&my_memb_determine_ring_id, sizeof (struct memb_ring_id)) != 0) {
log_printf (LOGSYS_LEVEL_DEBUG, "memb determine for old ring - discarding\n");
return;
}
my_memb_determine = 1;
for (i = 0; i < my_memb_determine_list_entries; i++) {
if (my_memb_determine_list[i] == nodeid) {
found = 1;
}
}
if (found == 0) {
my_memb_determine_list[my_memb_determine_list_entries] = nodeid;
my_memb_determine_list_entries += 1;
}
}
static void sync_service_build_handler (unsigned int nodeid, const void *msg)
{
const struct req_exec_service_build_message *req_exec_service_build_message = msg;
int i, j;
int barrier_reached = 1;
int found;
int qsort_trigger = 0;
if (memcmp (&my_ring_id, &req_exec_service_build_message->ring_id,
sizeof (struct memb_ring_id)) != 0) {
log_printf (LOGSYS_LEVEL_DEBUG, "service build for old ring - discarding\n");
return;
}
for (i = 0; i < req_exec_service_build_message->service_list_entries; i++) {
found = 0;
for (j = 0; j < my_service_list_entries; j++) {
if (req_exec_service_build_message->service_list[i] ==
my_service_list[j].service_id) {
found = 1;
break;
}
}
if (found == 0) {
my_service_list[my_service_list_entries].state =
INIT;
my_service_list[my_service_list_entries].service_id =
req_exec_service_build_message->service_list[i];
sprintf (my_service_list[my_service_list_entries].name,
"External Service (id = %d)\n",
req_exec_service_build_message->service_list[i]);
my_service_list[my_service_list_entries].api_version = 1;
my_service_list[my_service_list_entries].sync_init_api.sync_init_v1 =
dummy_sync_init;
my_service_list[my_service_list_entries].sync_abort =
dummy_sync_abort;
my_service_list[my_service_list_entries].sync_process =
dummy_sync_process;
my_service_list[my_service_list_entries].sync_activate =
dummy_sync_activate;
my_service_list_entries += 1;
qsort_trigger = 1;
}
}
if (qsort_trigger) {
qsort (my_service_list, my_service_list_entries,
sizeof (struct service_entry), service_entry_compare);
}
for (i = 0; i < my_processor_list_entries; i++) {
if (my_processor_list[i].nodeid == nodeid) {
my_processor_list[i].received = 1;
}
}
for (i = 0; i < my_processor_list_entries; i++) {
if (my_processor_list[i].received == 0) {
barrier_reached = 0;
}
}
if (barrier_reached) {
sync_process_enter ();
}
}
static void sync_deliver_fn (
unsigned int nodeid,
const void *msg,
unsigned int msg_len,
int endian_conversion_required)
{
struct qb_ipc_request_header *header = (struct qb_ipc_request_header *)msg;
switch (header->id) {
case MESSAGE_REQ_SYNC_BARRIER:
sync_barrier_handler (nodeid, msg);
break;
case MESSAGE_REQ_SYNC_SERVICE_BUILD:
sync_service_build_handler (nodeid, msg);
break;
case MESSAGE_REQ_SYNC_MEMB_DETERMINE:
sync_memb_determine (nodeid, msg);
break;
}
}
static void memb_determine_message_transmit (void)
{
struct iovec iovec;
struct req_exec_memb_determine_message req_exec_memb_determine_message;
req_exec_memb_determine_message.header.size = sizeof (struct req_exec_memb_determine_message);
req_exec_memb_determine_message.header.id = MESSAGE_REQ_SYNC_MEMB_DETERMINE;
memcpy (&req_exec_memb_determine_message.ring_id,
&my_memb_determine_ring_id,
sizeof (struct memb_ring_id));
iovec.iov_base = (char *)&req_exec_memb_determine_message;
iovec.iov_len = sizeof (req_exec_memb_determine_message);
(void)totempg_groups_mcast_joined (sync_group_handle,
&iovec, 1, TOTEMPG_AGREED);
}
static void barrier_message_transmit (void)
{
struct iovec iovec;
struct req_exec_barrier_message req_exec_barrier_message;
req_exec_barrier_message.header.size = sizeof (struct req_exec_barrier_message);
req_exec_barrier_message.header.id = MESSAGE_REQ_SYNC_BARRIER;
memcpy (&req_exec_barrier_message.ring_id, &my_ring_id,
sizeof (struct memb_ring_id));
iovec.iov_base = (char *)&req_exec_barrier_message;
iovec.iov_len = sizeof (req_exec_barrier_message);
(void)totempg_groups_mcast_joined (sync_group_handle,
&iovec, 1, TOTEMPG_AGREED);
}
static void service_build_message_transmit (struct req_exec_service_build_message *service_build_message)
{
struct iovec iovec;
service_build_message->header.size = sizeof (struct req_exec_service_build_message);
service_build_message->header.id = MESSAGE_REQ_SYNC_SERVICE_BUILD;
memcpy (&service_build_message->ring_id, &my_ring_id,
sizeof (struct memb_ring_id));
iovec.iov_base = (void *)service_build_message;
iovec.iov_len = sizeof (struct req_exec_service_build_message);
(void)totempg_groups_mcast_joined (sync_group_handle,
&iovec, 1, TOTEMPG_AGREED);
}
static void sync_barrier_enter (void)
{
my_state = SYNC_BARRIER;
barrier_message_transmit ();
}
static void sync_process_enter (void)
{
int i;
my_state = SYNC_PROCESS;
/*
* No syncv2 services
*/
if (my_service_list_entries == 0) {
my_state = SYNC_SERVICELIST_BUILD;
my_memb_determine_list_entries = 0;
sync_synchronization_completed ();
return;
}
for (i = 0; i < my_processor_list_entries; i++) {
my_processor_list[i].received = 0;
}
schedwrk_create (&my_schedwrk_handle,
schedwrk_processor,
NULL);
}
static void sync_servicelist_build_enter (
const unsigned int *member_list,
size_t member_list_entries,
const struct memb_ring_id *ring_id)
{
struct req_exec_service_build_message service_build;
int i;
my_state = SYNC_SERVICELIST_BUILD;
for (i = 0; i < member_list_entries; i++) {
my_processor_list[i].nodeid = member_list[i];
my_processor_list[i].received = 0;
}
my_processor_list_entries = member_list_entries;
memcpy (my_member_list, member_list,
member_list_entries * sizeof (unsigned int));
my_member_list_entries = member_list_entries;
my_processing_idx = 0;
memcpy (my_service_list, my_initial_service_list,
sizeof (struct service_entry) *
my_initial_service_list_entries);
my_service_list_entries = my_initial_service_list_entries;
for (i = 0; i < my_initial_service_list[i].service_id; i++) {
service_build.service_list[i] =
my_initial_service_list[i].service_id;
}
service_build.service_list_entries = i;
service_build_message_transmit (&service_build);
}
static int schedwrk_processor (const void *context)
{
int res = 0;
if (my_service_list[my_processing_idx].state == INIT) {
my_service_list[my_processing_idx].state = PROCESS;
if (my_service_list[my_processing_idx].api_version == 1) {
my_service_list[my_processing_idx].sync_init_api.sync_init_v1 (my_member_list,
my_member_list_entries,
&my_ring_id);
} else {
unsigned int old_trans_list[PROCESSOR_COUNT_MAX];
size_t old_trans_list_entries = 0;
int o, m;
memcpy (old_trans_list, my_trans_list, my_trans_list_entries *
sizeof (unsigned int));
old_trans_list_entries = my_trans_list_entries;
my_trans_list_entries = 0;
for (o = 0; o < old_trans_list_entries; o++) {
for (m = 0; m < my_member_list_entries; m++) {
if (old_trans_list[o] == my_member_list[m]) {
my_trans_list[my_trans_list_entries] = my_member_list[m];
my_trans_list_entries++;
break;
}
}
}
my_service_list[my_processing_idx].sync_init_api.sync_init_v2 (my_trans_list,
my_trans_list_entries, my_member_list,
my_member_list_entries,
&my_ring_id);
}
}
if (my_service_list[my_processing_idx].state == PROCESS) {
my_service_list[my_processing_idx].state = PROCESS;
res = my_service_list[my_processing_idx].sync_process ();
if (res == 0) {
sync_barrier_enter();
} else {
return (-1);
}
}
return (0);
}
void sync_v2_start (
const unsigned int *member_list,
size_t member_list_entries,
const struct memb_ring_id *ring_id)
{
ENTER();
memcpy (&my_ring_id, ring_id, sizeof (struct memb_ring_id));
if (my_memb_determine) {
my_memb_determine = 0;
sync_servicelist_build_enter (my_memb_determine_list,
my_memb_determine_list_entries, ring_id);
} else {
sync_servicelist_build_enter (member_list, member_list_entries,
ring_id);
}
}
void sync_v2_save_transitional (
const unsigned int *member_list,
size_t member_list_entries,
const struct memb_ring_id *ring_id)
{
ENTER();
memcpy (my_trans_list, member_list, member_list_entries *
sizeof (unsigned int));
my_trans_list_entries = member_list_entries;
}
void sync_v2_abort (void)
{
ENTER();
if (my_state == SYNC_PROCESS) {
schedwrk_destroy (my_schedwrk_handle);
my_service_list[my_processing_idx].sync_abort ();
}
/* this will cause any "old" barrier messages from causing
* problems.
*/
memset (&my_ring_id, 0, sizeof (struct memb_ring_id));
}
void sync_v2_memb_list_determine (const struct memb_ring_id *ring_id)
{
ENTER();
memcpy (&my_memb_determine_ring_id, ring_id,
sizeof (struct memb_ring_id));
memb_determine_message_transmit ();
}
void sync_v2_memb_list_abort (void)
{
ENTER();
my_memb_determine_list_entries = 0;
memset (&my_memb_determine_ring_id, 0, sizeof (struct memb_ring_id));
}
diff --git a/exec/totemconfig.c b/exec/totemconfig.c
index 6bc3494a..224ff316 100644
--- a/exec/totemconfig.c
+++ b/exec/totemconfig.c
@@ -1,957 +1,957 @@
/*
* Copyright (c) 2002-2005 MontaVista Software, Inc.
* Copyright (c) 2006-2010 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Steven Dake (sdake@redhat.com)
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <config.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <errno.h>
#include <unistd.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <sys/param.h>
#include <corosync/swab.h>
#include <corosync/list.h>
#include <qb/qbdefs.h>
#include <corosync/totem/totem.h>
#include <corosync/engine/objdb.h>
#include <corosync/engine/config.h>
#include <corosync/engine/logsys.h>
#ifdef HAVE_LIBNSS
#include <nss.h>
#include <pk11pub.h>
#include <pkcs11.h>
#include <prerror.h>
#endif
#include "util.h"
#include "totemconfig.h"
#define TOKEN_RETRANSMITS_BEFORE_LOSS_CONST 4
#define TOKEN_TIMEOUT 1000
#define TOKEN_RETRANSMIT_TIMEOUT (int)(TOKEN_TIMEOUT / (TOKEN_RETRANSMITS_BEFORE_LOSS_CONST + 0.2))
#define TOKEN_HOLD_TIMEOUT (int)(TOKEN_RETRANSMIT_TIMEOUT * 0.8 - (1000/(int)HZ))
#define JOIN_TIMEOUT 50
#define MERGE_TIMEOUT 200
#define DOWNCHECK_TIMEOUT 1000
#define FAIL_TO_RECV_CONST 2500
#define SEQNO_UNCHANGED_CONST 30
#define MINIMUM_TIMEOUT (int)(1000/HZ)*3
#define MAX_NETWORK_DELAY 50
#define WINDOW_SIZE 50
#define MAX_MESSAGES 17
#define MISS_COUNT_CONST 5
#define RRP_PROBLEM_COUNT_TIMEOUT 2000
#define RRP_PROBLEM_COUNT_THRESHOLD_DEFAULT 10
#define RRP_PROBLEM_COUNT_THRESHOLD_MIN 5
#define RRP_AUTORECOVERY_CHECK_TIMEOUT 1000
static char error_string_response[512];
static struct objdb_iface_ver0 *global_objdb;
static void add_totem_config_notification(
struct objdb_iface_ver0 *objdb,
struct totem_config *totem_config,
hdb_handle_t totem_object_handle);
/* These just makes the code below a little neater */
static inline int objdb_get_string (
const struct objdb_iface_ver0 *objdb,
hdb_handle_t object_service_handle,
const char *key, const char **value)
{
int res;
*value = NULL;
if ( !(res = objdb->object_key_get (object_service_handle,
key,
strlen (key),
(void *)value,
NULL))) {
if (*value) {
return 0;
}
}
return -1;
}
static inline void objdb_get_int (
const struct objdb_iface_ver0 *objdb,
hdb_handle_t object_service_handle,
const char *key, unsigned int *intvalue)
{
char *value = NULL;
if (!objdb->object_key_get (object_service_handle,
key,
strlen (key),
(void *)&value,
NULL)) {
if (value) {
*intvalue = atoi(value);
}
}
}
static unsigned int totem_handle_find (
struct objdb_iface_ver0 *objdb,
hdb_handle_t *totem_find_handle) {
hdb_handle_t object_find_handle;
unsigned int res;
/*
* Find a network section
*/
objdb->object_find_create (
OBJECT_PARENT_HANDLE,
"network",
strlen ("network"),
&object_find_handle);
res = objdb->object_find_next (
object_find_handle,
totem_find_handle);
objdb->object_find_destroy (object_find_handle);
/*
* Network section not found in configuration, checking for totem
*/
if (res == -1) {
objdb->object_find_create (
OBJECT_PARENT_HANDLE,
"totem",
strlen ("totem"),
&object_find_handle);
res = objdb->object_find_next (
object_find_handle,
totem_find_handle);
objdb->object_find_destroy (object_find_handle);
}
if (res == -1) {
return (-1);
}
return (0);
}
static void totem_volatile_config_read (
struct objdb_iface_ver0 *objdb,
struct totem_config *totem_config,
hdb_handle_t object_totem_handle)
{
objdb_get_int (objdb,object_totem_handle, "token", &totem_config->token_timeout);
objdb_get_int (objdb,object_totem_handle, "token_retransmit", &totem_config->token_retransmit_timeout);
objdb_get_int (objdb,object_totem_handle, "hold", &totem_config->token_hold_timeout);
objdb_get_int (objdb,object_totem_handle, "token_retransmits_before_loss_const", &totem_config->token_retransmits_before_loss_const);
objdb_get_int (objdb,object_totem_handle, "join", &totem_config->join_timeout);
objdb_get_int (objdb,object_totem_handle, "send_join", &totem_config->send_join_timeout);
objdb_get_int (objdb,object_totem_handle, "consensus", &totem_config->consensus_timeout);
objdb_get_int (objdb,object_totem_handle, "merge", &totem_config->merge_timeout);
objdb_get_int (objdb,object_totem_handle, "downcheck", &totem_config->downcheck_timeout);
objdb_get_int (objdb,object_totem_handle, "fail_recv_const", &totem_config->fail_to_recv_const);
objdb_get_int (objdb,object_totem_handle, "seqno_unchanged_const", &totem_config->seqno_unchanged_const);
objdb_get_int (objdb,object_totem_handle, "rrp_token_expired_timeout", &totem_config->rrp_token_expired_timeout);
objdb_get_int (objdb,object_totem_handle, "rrp_problem_count_timeout", &totem_config->rrp_problem_count_timeout);
objdb_get_int (objdb,object_totem_handle, "rrp_problem_count_threshold", &totem_config->rrp_problem_count_threshold);
objdb_get_int (objdb,object_totem_handle, "rrp_autorecovery_check_timeout", &totem_config->rrp_autorecovery_check_timeout);
objdb_get_int (objdb,object_totem_handle, "heartbeat_failures_allowed", &totem_config->heartbeat_failures_allowed);
objdb_get_int (objdb,object_totem_handle, "max_network_delay", &totem_config->max_network_delay);
objdb_get_int (objdb,object_totem_handle, "window_size", &totem_config->window_size);
(void)objdb_get_string (objdb, object_totem_handle, "vsftype", &totem_config->vsf_type);
objdb_get_int (objdb,object_totem_handle, "max_messages", &totem_config->max_messages);
objdb_get_int (objdb,object_totem_handle, "miss_count_const", &totem_config->miss_count_const);
}
static void totem_get_crypto_type(
const struct objdb_iface_ver0 *objdb,
hdb_handle_t object_totem_handle,
struct totem_config *totem_config)
{
const char *str;
totem_config->crypto_accept = TOTEM_CRYPTO_ACCEPT_OLD;
if (!objdb_get_string (objdb, object_totem_handle, "crypto_accept", &str)) {
if (strcmp(str, "new") == 0) {
totem_config->crypto_accept = TOTEM_CRYPTO_ACCEPT_NEW;
}
}
totem_config->crypto_type = TOTEM_CRYPTO_SOBER;
#ifdef HAVE_LIBNSS
/*
* We must set these even if the key does not exist.
* Encryption type can be set on-the-fly using CFG
*/
totem_config->crypto_crypt_type = CKM_AES_CBC_PAD;
totem_config->crypto_sign_type = CKM_SHA256_RSA_PKCS;
#endif
if (!objdb_get_string (objdb, object_totem_handle, "crypto_type", &str)) {
if (strcmp(str, "sober") == 0) {
return;
}
#ifdef HAVE_LIBNSS
if (strcmp(str, "nss") == 0) {
totem_config->crypto_type = TOTEM_CRYPTO_NSS;
}
#endif
}
}
extern int totem_config_read (
struct objdb_iface_ver0 *objdb,
struct totem_config *totem_config,
const char **error_string)
{
int res = 0;
hdb_handle_t object_totem_handle;
hdb_handle_t object_interface_handle;
hdb_handle_t object_member_handle;
const char *str;
unsigned int ringnumber = 0;
hdb_handle_t object_find_interface_handle;
hdb_handle_t object_find_member_handle;
const char *transport_type;
int member_count = 0;
res = totem_handle_find (objdb, &object_totem_handle);
if (res == -1) {
printf ("couldn't find totem handle\n");
return (-1);
}
memset (totem_config, 0, sizeof (struct totem_config));
totem_config->interfaces = malloc (sizeof (struct totem_interface) * INTERFACE_MAX);
if (totem_config->interfaces == 0) {
*error_string = "Out of memory trying to allocate ethernet interface storage area";
return -1;
}
memset (totem_config->interfaces, 0,
sizeof (struct totem_interface) * INTERFACE_MAX);
totem_config->secauth = 1;
strcpy (totem_config->rrp_mode, "none");
if (!objdb_get_string (objdb, object_totem_handle, "version", &str)) {
if (strcmp (str, "2") == 0) {
totem_config->version = 2;
}
}
if (!objdb_get_string (objdb, object_totem_handle, "secauth", &str)) {
if (strcmp (str, "on") == 0) {
totem_config->secauth = 1;
}
if (strcmp (str, "off") == 0) {
totem_config->secauth = 0;
}
}
if (totem_config->secauth == 1) {
totem_get_crypto_type(objdb, object_totem_handle, totem_config);
}
if (!objdb_get_string (objdb, object_totem_handle, "rrp_mode", &str)) {
strcpy (totem_config->rrp_mode, str);
}
/*
* Get interface node id
*/
objdb_get_int (objdb, object_totem_handle, "nodeid", &totem_config->node_id);
totem_config->clear_node_high_bit = 0;
if (!objdb_get_string (objdb,object_totem_handle, "clear_node_high_bit", &str)) {
if (strcmp (str, "yes") == 0) {
totem_config->clear_node_high_bit = 1;
}
}
objdb_get_int (objdb,object_totem_handle, "threads", &totem_config->threads);
objdb_get_int (objdb,object_totem_handle, "netmtu", &totem_config->net_mtu);
/*
* Get things that might change in the future
*/
totem_volatile_config_read (objdb, totem_config, object_totem_handle);
objdb->object_find_create (
object_totem_handle,
"interface",
strlen ("interface"),
&object_find_interface_handle);
while (objdb->object_find_next (
object_find_interface_handle,
&object_interface_handle) == 0) {
member_count = 0;
objdb_get_int (objdb, object_interface_handle, "ringnumber", &ringnumber);
/*
* Get interface multicast address
*/
if (!objdb_get_string (objdb, object_interface_handle, "mcastaddr", &str)) {
res = totemip_parse (&totem_config->interfaces[ringnumber].mcast_addr, str, 0);
}
totem_config->broadcast_use = 0;
if (!objdb_get_string (objdb, object_interface_handle, "broadcast", &str)) {
if (strcmp (str, "yes") == 0) {
totem_config->broadcast_use = 1;
totemip_parse (
&totem_config->interfaces[ringnumber].mcast_addr,
"255.255.255.255", 0);
}
}
/*
* Get mcast port
*/
if (!objdb_get_string (objdb, object_interface_handle, "mcastport", &str)) {
totem_config->interfaces[ringnumber].ip_port = atoi (str);
}
/*
* Get the bind net address
*/
if (!objdb_get_string (objdb, object_interface_handle, "bindnetaddr", &str)) {
res = totemip_parse (&totem_config->interfaces[ringnumber].bindnet, str,
totem_config->interfaces[ringnumber].mcast_addr.family);
}
/*
* Get the TTL
*/
totem_config->interfaces[ringnumber].ttl = 1;
if (!objdb_get_string (objdb, object_interface_handle, "ttl", &str)) {
totem_config->interfaces[ringnumber].ttl = atoi (str);
}
objdb->object_find_create (
object_interface_handle,
"member",
strlen ("member"),
&object_find_member_handle);
while (objdb->object_find_next (
object_find_member_handle,
&object_member_handle) == 0) {
if (!objdb_get_string (objdb, object_member_handle, "memberaddr", &str)) {
res = totemip_parse (&totem_config->interfaces[ringnumber].member_list[member_count++], str, 0);
}
}
totem_config->interfaces[ringnumber].member_count = member_count;
totem_config->interface_count++;
}
objdb->object_find_destroy (object_find_interface_handle);
add_totem_config_notification(objdb, totem_config, object_totem_handle);
totem_config->transport_number = TOTEM_TRANSPORT_UDP;
(void)objdb_get_string (objdb, object_totem_handle, "transport", &transport_type);
if (transport_type) {
if (strcmp (transport_type, "udpu") == 0) {
totem_config->transport_number = TOTEM_TRANSPORT_UDPU;
}
}
if (transport_type) {
if (strcmp (transport_type, "iba") == 0) {
totem_config->transport_number = TOTEM_TRANSPORT_RDMA;
}
}
return 0;
}
int totem_config_validate (
struct totem_config *totem_config,
const char **error_string)
{
static char local_error_reason[512];
char parse_error[512];
const char *error_reason = local_error_reason;
int i;
unsigned int interface_max = INTERFACE_MAX;
if (totem_config->interface_count == 0) {
error_reason = "No interfaces defined";
goto parse_error;
}
for (i = 0; i < totem_config->interface_count; i++) {
/*
* Some error checking of parsed data to make sure its valid
*/
struct totem_ip_address null_addr;
memset (&null_addr, 0, sizeof (struct totem_ip_address));
if ((totem_config->transport_number == 0) &&
memcmp (&totem_config->interfaces[i].mcast_addr, &null_addr,
sizeof (struct totem_ip_address)) == 0) {
error_reason = "No multicast address specified";
goto parse_error;
}
if (totem_config->interfaces[i].ip_port == 0) {
error_reason = "No multicast port specified";
goto parse_error;
}
if (totem_config->interfaces[i].ttl > 255) {
error_reason = "Invalid TTL (should be 0..255)";
goto parse_error;
}
if (totem_config->transport_number != TOTEM_TRANSPORT_UDP &&
totem_config->interfaces[i].ttl != 1) {
error_reason = "Can only set ttl on multicast transport types";
goto parse_error;
}
if (totem_config->interfaces[i].mcast_addr.family == AF_INET6 &&
totem_config->node_id == 0) {
error_reason = "An IPV6 network requires that a node ID be specified.";
goto parse_error;
}
if (totem_config->broadcast_use == 0 && totem_config->transport_number == 0) {
if (totem_config->interfaces[i].mcast_addr.family != totem_config->interfaces[i].bindnet.family) {
error_reason = "Multicast address family does not match bind address family";
goto parse_error;
}
if (totem_config->interfaces[i].mcast_addr.family != totem_config->interfaces[i].bindnet.family) {
error_reason = "Not all bind address belong to the same IP family";
goto parse_error;
}
if (totemip_is_mcast (&totem_config->interfaces[i].mcast_addr) != 0) {
error_reason = "mcastaddr is not a correct multicast address.";
goto parse_error;
}
}
}
if (totem_config->version != 2) {
error_reason = "This totem parser can only parse version 2 configurations.";
goto parse_error;
}
if (totem_config->token_retransmits_before_loss_const == 0) {
totem_config->token_retransmits_before_loss_const =
TOKEN_RETRANSMITS_BEFORE_LOSS_CONST;
}
/*
* Setup timeout values that are not setup by user
*/
if (totem_config->token_timeout == 0) {
totem_config->token_timeout = TOKEN_TIMEOUT;
if (totem_config->token_retransmits_before_loss_const == 0) {
totem_config->token_retransmits_before_loss_const = TOKEN_RETRANSMITS_BEFORE_LOSS_CONST;
}
if (totem_config->token_retransmit_timeout == 0) {
totem_config->token_retransmit_timeout =
(int)(totem_config->token_timeout /
(totem_config->token_retransmits_before_loss_const + 0.2));
}
if (totem_config->token_hold_timeout == 0) {
totem_config->token_hold_timeout =
(int)(totem_config->token_retransmit_timeout * 0.8 -
(1000/HZ));
}
}
if (totem_config->max_network_delay == 0) {
totem_config->max_network_delay = MAX_NETWORK_DELAY;
}
if (totem_config->max_network_delay < MINIMUM_TIMEOUT) {
snprintf (local_error_reason, sizeof(local_error_reason),
"The max_network_delay parameter (%d ms) may not be less then (%d ms).",
totem_config->max_network_delay, MINIMUM_TIMEOUT);
goto parse_error;
}
if (totem_config->window_size == 0) {
totem_config->window_size = WINDOW_SIZE;
}
if (totem_config->max_messages == 0) {
totem_config->max_messages = MAX_MESSAGES;
}
if (totem_config->miss_count_const == 0) {
totem_config->miss_count_const = MISS_COUNT_CONST;
}
if (totem_config->token_timeout < MINIMUM_TIMEOUT) {
snprintf (local_error_reason, sizeof(local_error_reason),
"The token timeout parameter (%d ms) may not be less then (%d ms).",
totem_config->token_timeout, MINIMUM_TIMEOUT);
goto parse_error;
}
if (totem_config->token_retransmit_timeout == 0) {
totem_config->token_retransmit_timeout =
(int)(totem_config->token_timeout /
(totem_config->token_retransmits_before_loss_const + 0.2));
}
if (totem_config->token_hold_timeout == 0) {
totem_config->token_hold_timeout =
(int)(totem_config->token_retransmit_timeout * 0.8 -
(1000/HZ));
}
if (totem_config->token_retransmit_timeout < MINIMUM_TIMEOUT) {
snprintf (local_error_reason, sizeof(local_error_reason),
"The token retransmit timeout parameter (%d ms) may not be less then (%d ms).",
totem_config->token_retransmit_timeout, MINIMUM_TIMEOUT);
goto parse_error;
}
if (totem_config->token_hold_timeout == 0) {
totem_config->token_hold_timeout = TOKEN_HOLD_TIMEOUT;
}
if (totem_config->token_hold_timeout < MINIMUM_TIMEOUT) {
snprintf (local_error_reason, sizeof(local_error_reason),
"The token hold timeout parameter (%d ms) may not be less then (%d ms).",
totem_config->token_hold_timeout, MINIMUM_TIMEOUT);
goto parse_error;
}
if (totem_config->join_timeout == 0) {
totem_config->join_timeout = JOIN_TIMEOUT;
}
if (totem_config->join_timeout < MINIMUM_TIMEOUT) {
snprintf (local_error_reason, sizeof(local_error_reason),
"The join timeout parameter (%d ms) may not be less then (%d ms).",
totem_config->join_timeout, MINIMUM_TIMEOUT);
goto parse_error;
}
if (totem_config->consensus_timeout == 0) {
totem_config->consensus_timeout = (int)(float)(1.2 * totem_config->token_timeout);
}
if (totem_config->consensus_timeout < MINIMUM_TIMEOUT) {
snprintf (local_error_reason, sizeof(local_error_reason),
"The consensus timeout parameter (%d ms) may not be less then (%d ms).",
totem_config->consensus_timeout, MINIMUM_TIMEOUT);
goto parse_error;
}
if (totem_config->merge_timeout == 0) {
totem_config->merge_timeout = MERGE_TIMEOUT;
}
if (totem_config->merge_timeout < MINIMUM_TIMEOUT) {
snprintf (local_error_reason, sizeof(local_error_reason),
"The merge timeout parameter (%d ms) may not be less then (%d ms).",
totem_config->merge_timeout, MINIMUM_TIMEOUT);
goto parse_error;
}
if (totem_config->downcheck_timeout == 0) {
totem_config->downcheck_timeout = DOWNCHECK_TIMEOUT;
}
if (totem_config->downcheck_timeout < MINIMUM_TIMEOUT) {
snprintf (local_error_reason, sizeof(local_error_reason),
"The downcheck timeout parameter (%d ms) may not be less then (%d ms).",
totem_config->downcheck_timeout, MINIMUM_TIMEOUT);
goto parse_error;
}
/*
* RRP values validation
*/
if (strcmp (totem_config->rrp_mode, "none") &&
strcmp (totem_config->rrp_mode, "active") &&
strcmp (totem_config->rrp_mode, "passive")) {
snprintf (local_error_reason, sizeof(local_error_reason),
"The RRP mode \"%s\" specified is invalid. It must be none, active, or passive.\n", totem_config->rrp_mode);
goto parse_error;
}
if (totem_config->rrp_problem_count_timeout == 0) {
totem_config->rrp_problem_count_timeout = RRP_PROBLEM_COUNT_TIMEOUT;
}
if (totem_config->rrp_problem_count_timeout < MINIMUM_TIMEOUT) {
snprintf (local_error_reason, sizeof(local_error_reason),
"The RRP problem count timeout parameter (%d ms) may not be less then (%d ms).",
totem_config->rrp_problem_count_timeout, MINIMUM_TIMEOUT);
goto parse_error;
}
if (totem_config->rrp_problem_count_threshold == 0) {
totem_config->rrp_problem_count_threshold = RRP_PROBLEM_COUNT_THRESHOLD_DEFAULT;
}
if (totem_config->rrp_problem_count_threshold < RRP_PROBLEM_COUNT_THRESHOLD_MIN) {
snprintf (local_error_reason, sizeof(local_error_reason),
"The RRP problem count threshold (%d problem count) may not be less then (%d problem count).",
totem_config->rrp_problem_count_threshold, RRP_PROBLEM_COUNT_THRESHOLD_MIN);
goto parse_error;
}
if (totem_config->rrp_token_expired_timeout == 0) {
totem_config->rrp_token_expired_timeout =
totem_config->token_retransmit_timeout;
}
if (totem_config->rrp_token_expired_timeout < MINIMUM_TIMEOUT) {
snprintf (local_error_reason, sizeof(local_error_reason),
"The RRP token expired timeout parameter (%d ms) may not be less then (%d ms).",
totem_config->rrp_token_expired_timeout, MINIMUM_TIMEOUT);
goto parse_error;
}
if (totem_config->rrp_autorecovery_check_timeout == 0) {
totem_config->rrp_autorecovery_check_timeout = RRP_AUTORECOVERY_CHECK_TIMEOUT;
}
if (strcmp (totem_config->rrp_mode, "none") == 0) {
interface_max = 1;
}
if (interface_max < totem_config->interface_count) {
snprintf (parse_error, sizeof(parse_error),
"%d is too many configured interfaces for the rrp_mode setting %s.",
totem_config->interface_count,
totem_config->rrp_mode);
error_reason = parse_error;
goto parse_error;
}
if (totem_config->fail_to_recv_const == 0) {
totem_config->fail_to_recv_const = FAIL_TO_RECV_CONST;
}
if (totem_config->seqno_unchanged_const == 0) {
totem_config->seqno_unchanged_const = SEQNO_UNCHANGED_CONST;
}
if (totem_config->net_mtu == 0) {
totem_config->net_mtu = 1500;
}
if ((MESSAGE_QUEUE_MAX) < totem_config->max_messages) {
snprintf (local_error_reason, sizeof(local_error_reason),
"The max_messages parameter (%d messages) may not be greater then (%d messages).",
totem_config->max_messages, MESSAGE_QUEUE_MAX);
goto parse_error;
}
if (totem_config->threads > SEND_THREADS_MAX) {
totem_config->threads = SEND_THREADS_MAX;
}
if (totem_config->secauth == 0) {
totem_config->threads = 0;
}
if (totem_config->net_mtu > FRAME_SIZE_MAX) {
error_reason = "This net_mtu parameter is greater then the maximum frame size";
goto parse_error;
}
if (totem_config->vsf_type == NULL) {
totem_config->vsf_type = "none";
}
return (0);
parse_error:
snprintf (error_string_response, sizeof(error_string_response),
"parse error in config: %s\n", error_reason);
*error_string = error_string_response;
return (-1);
}
static int read_keyfile (
const char *key_location,
struct totem_config *totem_config,
const char **error_string)
{
int fd;
int res;
ssize_t expected_key_len = sizeof (totem_config->private_key);
int saved_errno;
char error_str[100];
const char *error_ptr;
fd = open (key_location, O_RDONLY);
if (fd == -1) {
- LOGSYS_STRERROR_R (error_ptr, errno, error_str, sizeof(error_str));
+ error_ptr = qb_strerror_r(errno, error_str, sizeof(error_str));
snprintf (error_string_response, sizeof(error_string_response),
"Could not open %s: %s\n",
key_location, error_ptr);
goto parse_error;
}
res = read (fd, totem_config->private_key, expected_key_len);
saved_errno = errno;
close (fd);
if (res == -1) {
- LOGSYS_STRERROR_R (error_ptr, saved_errno, error_str, sizeof(error_str));
+ error_ptr = qb_strerror_r (saved_errno, error_str, sizeof(error_str));
snprintf (error_string_response, sizeof(error_string_response),
"Could not read %s: %s\n",
key_location, error_ptr);
goto parse_error;
}
totem_config->private_key_len = expected_key_len;
if (res != expected_key_len) {
snprintf (error_string_response, sizeof(error_string_response),
"Could only read %d bits of 1024 bits from %s.\n",
res * 8, key_location);
goto parse_error;
}
return 0;
parse_error:
*error_string = error_string_response;
return (-1);
}
int totem_config_keyread (
struct objdb_iface_ver0 *objdb,
struct totem_config *totem_config,
const char **error_string)
{
int got_key = 0;
const char *key_location = NULL;
hdb_handle_t object_totem_handle;
int res;
memset (totem_config->private_key, 0, 128);
totem_config->private_key_len = 128;
if (totem_config->secauth == 0) {
return (0);
}
res = totem_handle_find (objdb, &object_totem_handle);
if (res == -1) {
return (-1);
}
/* objdb may store the location of the key file */
if (!objdb_get_string (objdb,object_totem_handle, "keyfile", &key_location)
&& key_location) {
res = read_keyfile(key_location, totem_config, error_string);
if (res) {
goto key_error;
}
got_key = 1;
} else { /* Or the key itself may be in the objdb */
char *key = NULL;
size_t key_len;
res = objdb->object_key_get (object_totem_handle,
"key",
strlen ("key"),
(void *)&key,
&key_len);
if (res == 0 && key) {
if (key_len > sizeof (totem_config->private_key)) {
goto key_error;
}
memcpy(totem_config->private_key, key, key_len);
totem_config->private_key_len = key_len;
got_key = 1;
}
}
/* In desperation we read the default filename */
if (!got_key) {
const char *filename = getenv("COROSYNC_TOTEM_AUTHKEY_FILE");
if (!filename)
filename = COROSYSCONFDIR "/authkey";
res = read_keyfile(filename, totem_config, error_string);
if (res)
goto key_error;
}
return (0);
key_error:
*error_string = error_string_response;
return (-1);
}
static void totem_key_change_notify(object_change_type_t change_type,
hdb_handle_t parent_object_handle,
hdb_handle_t object_handle,
const void *object_name_pt, size_t object_name_len,
const void *key_name_pt, size_t key_len,
const void *key_value_pt, size_t key_value_len,
void *priv_data_pt)
{
struct totem_config *totem_config = priv_data_pt;
if (memcmp(object_name_pt, "totem", object_name_len) == 0)
totem_volatile_config_read(global_objdb,
totem_config,
object_handle); // CHECK
}
static void totem_objdb_reload_notify(objdb_reload_notify_type_t type, int flush,
void *priv_data_pt)
{
struct totem_config *totem_config = priv_data_pt;
hdb_handle_t totem_object_handle;
if (totem_config == NULL)
return;
/*
* A new totem {} key might exist, cancel the
* existing notification at the start of reload,
* and start a new one on the new object when
* it's all settled.
*/
if (type == OBJDB_RELOAD_NOTIFY_START) {
global_objdb->object_track_stop(
totem_key_change_notify,
NULL,
NULL,
NULL,
totem_config);
}
if (type == OBJDB_RELOAD_NOTIFY_END ||
type == OBJDB_RELOAD_NOTIFY_FAILED) {
if (!totem_handle_find(global_objdb,
&totem_object_handle)) {
global_objdb->object_track_start(totem_object_handle,
1,
totem_key_change_notify,
NULL, // object_create_notify,
NULL, // object_destroy_notify,
NULL, // object_reload_notify
totem_config); // priv_data
/*
* Reload the configuration
*/
totem_volatile_config_read(global_objdb,
totem_config,
totem_object_handle);
}
else {
log_printf(LOGSYS_LEVEL_ERROR, "totem objdb tracking stopped, cannot find totem{} handle on objdb\n");
}
}
}
static void add_totem_config_notification(
struct objdb_iface_ver0 *objdb,
struct totem_config *totem_config,
hdb_handle_t totem_object_handle)
{
global_objdb = objdb;
objdb->object_track_start(totem_object_handle,
1,
totem_key_change_notify,
NULL, // object_create_notify,
NULL, // object_destroy_notify,
NULL, // object_reload_notify
totem_config); // priv_data
/*
* Reload notify must be on the parent object
*/
objdb->object_track_start(OBJECT_PARENT_HANDLE,
1,
NULL, // key_change_notify,
NULL, // object_create_notify,
NULL, // object_destroy_notify,
totem_objdb_reload_notify, // object_reload_notify
totem_config); // priv_data
}
diff --git a/exec/totemiba.c b/exec/totemiba.c
index e3c7816b..2d8c6908 100644
--- a/exec/totemiba.c
+++ b/exec/totemiba.c
@@ -1,1556 +1,1555 @@
/*
* Copyright (c) 2009 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Steven Dake (sdake@redhat.com)
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <config.h>
#include <assert.h>
#include <pthread.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/socket.h>
#include <netdb.h>
#include <sys/un.h>
#include <sys/ioctl.h>
#include <sys/param.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <unistd.h>
#include <fcntl.h>
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
#include <sched.h>
#include <time.h>
#include <sys/time.h>
#include <sys/poll.h>
#include <limits.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netdb.h>
#include <rdma/rdma_cma.h>
#include <assert.h>
#include <errno.h>
#include <corosync/sq.h>
#include <corosync/list.h>
#include <corosync/hdb.h>
#include <corosync/swab.h>
#include <qb/qbloop.h>
#define LOGSYS_UTILS_ONLY 1
#include <corosync/engine/logsys.h>
#include "totemiba.h"
#include "wthread.h"
#define COMPLETION_QUEUE_ENTRIES 100
#define TOTAL_READ_POSTS 100
#define MAX_MTU_SIZE 4096
struct totemiba_instance {
struct sockaddr bind_addr;
struct sockaddr send_token_bind_addr;
struct sockaddr mcast_addr;
struct sockaddr token_addr;
struct sockaddr local_mcast_bind_addr;
struct totem_interface *totem_interface;
struct totem_config *totem_config;
void (*totemiba_iface_change_fn) (
void *context,
const struct totem_ip_address *iface_address);
void (*totemiba_deliver_fn) (
void *context,
const void *msg,
unsigned int msg_len);
void (*totemiba_target_set_completed) (
void *context);
void *rrp_context;
qb_loop_timer_handle timer_netif_check_timeout;
qb_loop_t *totemiba_poll_handle;
struct totem_ip_address my_id;
struct rdma_event_channel *mcast_channel;
struct rdma_cm_id *mcast_cma_id;
struct ibv_pd *mcast_pd;
struct sockaddr mcast_dest_addr;
uint32_t mcast_qpn;
uint32_t mcast_qkey;
struct ibv_ah *mcast_ah;
struct ibv_comp_channel *mcast_send_completion_channel;
struct ibv_comp_channel *mcast_recv_completion_channel;
struct ibv_cq *mcast_send_cq;
struct ibv_cq *mcast_recv_cq;
int recv_token_accepted;
struct rdma_event_channel *recv_token_channel;
struct rdma_event_channel *listen_recv_token_channel;
struct rdma_cm_id *listen_recv_token_cma_id;
struct rdma_cm_id *recv_token_cma_id;
struct ibv_pd *recv_token_pd;
struct sockaddr recv_token_dest_addr;
struct ibv_comp_channel *recv_token_send_completion_channel;
struct ibv_comp_channel *recv_token_recv_completion_channel;
struct ibv_cq *recv_token_send_cq;
struct ibv_cq *recv_token_recv_cq;
int send_token_bound;
struct rdma_event_channel *send_token_channel;
struct rdma_cm_id *send_token_cma_id;
struct ibv_pd *send_token_pd;
struct sockaddr send_token_dest_addr;
uint32_t send_token_qpn;
uint32_t send_token_qkey;
struct ibv_ah *send_token_ah;
struct ibv_comp_channel *send_token_send_completion_channel;
struct ibv_comp_channel *send_token_recv_completion_channel;
struct ibv_cq *send_token_send_cq;
struct ibv_cq *send_token_recv_cq;
void (*totemiba_log_printf) (
unsigned int rec_ident,
const char *function,
const char *file,
int line,
const char *format,
...)__attribute__((format(printf, 5, 6)));
int totemiba_subsys_id;
struct list_head mcast_send_buf_free;
struct list_head token_send_buf_free;
struct list_head mcast_send_buf_head;
struct list_head token_send_buf_head;
struct list_head recv_token_recv_buf_head;
};
union u {
uint64_t wr_id;
void *v;
};
-#define log_printf(level, format, args...) \
-do { \
- instance->totemiba_log_printf ( \
- LOGSYS_ENCODE_RECID(level, \
- instance->totemiba_subsys_id, \
- LOGSYS_RECID_LOG), \
- __FUNCTION__, __FILE__, __LINE__, \
- (const char *)format, ##args); \
+#define log_printf(level, format, args...) \
+do { \
+ instance->totemiba_log_printf ( \
+ level, \
+ instance->totemiba_subsys_id, \
+ __FUNCTION__, __FILE__, __LINE__, \
+ (const char *)format, ##args); \
} while (0);
struct recv_buf {
struct list_head list_all;
struct ibv_recv_wr recv_wr;
struct ibv_sge sge;
struct ibv_mr *mr;
char buffer[MAX_MTU_SIZE];
};
struct send_buf {
struct list_head list_free;
struct list_head list_all;
struct ibv_mr *mr;
char buffer[MAX_MTU_SIZE];
};
static hdb_handle_t
void2wrid (void *v) { union u u; u.v = v; return u.wr_id; }
static void *
wrid2void (uint64_t wr_id) { union u u; u.wr_id = wr_id; return u.v; }
static void totemiba_instance_initialize (struct totemiba_instance *instance)
{
memset (instance, 0, sizeof (struct totemiba_instance));
list_init (&instance->mcast_send_buf_free);
list_init (&instance->token_send_buf_free);
list_init (&instance->mcast_send_buf_head);
list_init (&instance->token_send_buf_head);
list_init (&instance->recv_token_recv_buf_head);
}
static inline struct send_buf *mcast_send_buf_get (
struct totemiba_instance *instance)
{
struct send_buf *send_buf;
if (list_empty (&instance->mcast_send_buf_free) == 0) {
send_buf = list_entry (instance->mcast_send_buf_free.next, struct send_buf, list_free);
list_del (&send_buf->list_free);
return (send_buf);
}
send_buf = malloc (sizeof (struct send_buf));
if (send_buf == NULL) {
return (NULL);
}
send_buf->mr = ibv_reg_mr (instance->mcast_pd,
send_buf->buffer,
2048, IBV_ACCESS_LOCAL_WRITE);
if (send_buf->mr == NULL) {
log_printf (LOGSYS_LEVEL_ERROR, "couldn't register memory range\n");
free (send_buf);
return (NULL);
}
list_init (&send_buf->list_all);
list_add_tail (&send_buf->list_all, &instance->mcast_send_buf_head);
return (send_buf);
}
static inline void mcast_send_buf_put (
struct totemiba_instance *instance,
struct send_buf *send_buf)
{
list_init (&send_buf->list_free);
list_add_tail (&send_buf->list_free, &instance->mcast_send_buf_free);
}
static inline struct send_buf *token_send_buf_get (
struct totemiba_instance *instance)
{
struct send_buf *send_buf;
if (list_empty (&instance->token_send_buf_free) == 0) {
send_buf = list_entry (instance->token_send_buf_free.next, struct send_buf, list_free);
list_del (&send_buf->list_free);
return (send_buf);
}
send_buf = malloc (sizeof (struct send_buf));
if (send_buf == NULL) {
return (NULL);
}
send_buf->mr = ibv_reg_mr (instance->send_token_pd,
send_buf->buffer,
2048, IBV_ACCESS_LOCAL_WRITE);
if (send_buf->mr == NULL) {
log_printf (LOGSYS_LEVEL_ERROR, "couldn't register memory range\n");
free (send_buf);
return (NULL);
}
list_init (&send_buf->list_all);
list_add_tail (&send_buf->list_all, &instance->token_send_buf_head);
return (send_buf);
}
static inline void token_send_buf_destroy (struct totemiba_instance *instance)
{
struct list_head *list;
struct send_buf *send_buf;
for (list = instance->token_send_buf_head.next; list != &instance->token_send_buf_head;) {
send_buf = list_entry (list, struct send_buf, list_all);
list = list->next;
ibv_dereg_mr (send_buf->mr);
free (send_buf);
}
list_init (&instance->token_send_buf_free);
list_init (&instance->token_send_buf_head);
}
static inline void token_send_buf_put (
struct totemiba_instance *instance,
struct send_buf *send_buf)
{
list_init (&send_buf->list_free);
list_add_tail (&send_buf->list_free, &instance->token_send_buf_free);
}
static inline struct recv_buf *recv_token_recv_buf_create (
struct totemiba_instance *instance)
{
struct recv_buf *recv_buf;
recv_buf = malloc (sizeof (struct recv_buf));
if (recv_buf == NULL) {
return (NULL);
}
recv_buf->mr = ibv_reg_mr (instance->recv_token_pd, &recv_buf->buffer,
2048,
IBV_ACCESS_LOCAL_WRITE);
recv_buf->recv_wr.next = NULL;
recv_buf->recv_wr.sg_list = &recv_buf->sge;
recv_buf->recv_wr.num_sge = 1;
recv_buf->recv_wr.wr_id = (uintptr_t)recv_buf;
recv_buf->sge.length = 2048;
recv_buf->sge.lkey = recv_buf->mr->lkey;
recv_buf->sge.addr = (uintptr_t)recv_buf->buffer;
list_init (&recv_buf->list_all);
list_add (&recv_buf->list_all, &instance->recv_token_recv_buf_head);
return (recv_buf);
}
static inline int recv_token_recv_buf_post (struct totemiba_instance *instance, struct recv_buf *recv_buf)
{
struct ibv_recv_wr *fail_recv;
int res;
res = ibv_post_recv (instance->recv_token_cma_id->qp, &recv_buf->recv_wr, &fail_recv);
return (res);
}
static inline void recv_token_recv_buf_post_initial (struct totemiba_instance *instance)
{
struct recv_buf *recv_buf;
unsigned int i;
for (i = 0; i < TOTAL_READ_POSTS; i++) {
recv_buf = recv_token_recv_buf_create (instance);
recv_token_recv_buf_post (instance, recv_buf);
}
}
static inline void recv_token_recv_buf_post_destroy (
struct totemiba_instance *instance)
{
struct recv_buf *recv_buf;
struct list_head *list;
for (list = instance->recv_token_recv_buf_head.next;
list != &instance->recv_token_recv_buf_head;) {
recv_buf = list_entry (list, struct recv_buf, list_all);
list = list->next;
ibv_dereg_mr (recv_buf->mr);
free (recv_buf);
}
list_init (&instance->recv_token_recv_buf_head);
}
static inline struct recv_buf *mcast_recv_buf_create (struct totemiba_instance *instance)
{
struct recv_buf *recv_buf;
struct ibv_mr *mr;
recv_buf = malloc (sizeof (struct recv_buf));
if (recv_buf == NULL) {
return (NULL);
}
mr = ibv_reg_mr (instance->mcast_pd, &recv_buf->buffer,
2048,
IBV_ACCESS_LOCAL_WRITE);
recv_buf->recv_wr.next = NULL;
recv_buf->recv_wr.sg_list = &recv_buf->sge;
recv_buf->recv_wr.num_sge = 1;
recv_buf->recv_wr.wr_id = (uintptr_t)recv_buf;
recv_buf->sge.length = 2048;
recv_buf->sge.lkey = mr->lkey;
recv_buf->sge.addr = (uintptr_t)recv_buf->buffer;
return (recv_buf);
}
static inline int mcast_recv_buf_post (struct totemiba_instance *instance, struct recv_buf *recv_buf)
{
struct ibv_recv_wr *fail_recv;
int res;
res = ibv_post_recv (instance->mcast_cma_id->qp, &recv_buf->recv_wr, &fail_recv);
return (res);
}
static inline void mcast_recv_buf_post_initial (struct totemiba_instance *instance)
{
struct recv_buf *recv_buf;
unsigned int i;
for (i = 0; i < TOTAL_READ_POSTS; i++) {
recv_buf = mcast_recv_buf_create (instance);
mcast_recv_buf_post (instance, recv_buf);
}
}
static inline void iba_deliver_fn (struct totemiba_instance *instance, uint64_t wr_id, uint32_t bytes)
{
const char *addr;
const struct recv_buf *recv_buf;
recv_buf = wrid2void(wr_id);
addr = &recv_buf->buffer[sizeof (struct ibv_grh)];
instance->totemiba_deliver_fn (instance->rrp_context, addr, bytes);
}
static int mcast_cq_send_event_fn (int events, int suck, void *context)
{
struct totemiba_instance *instance = (struct totemiba_instance *)context;
struct ibv_wc wc[32];
struct ibv_cq *ev_cq;
void *ev_ctx;
int res;
int i;
ibv_get_cq_event (instance->mcast_send_completion_channel, &ev_cq, &ev_ctx);
ibv_ack_cq_events (ev_cq, 1);
res = ibv_req_notify_cq (ev_cq, 0);
res = ibv_poll_cq (instance->mcast_send_cq, 32, wc);
if (res > 0) {
for (i = 0; i < res; i++) {
mcast_send_buf_put (instance, wrid2void(wc[i].wr_id));
}
}
return (0);
}
static int mcast_cq_recv_event_fn (int events, int suck, void *context)
{
struct totemiba_instance *instance = (struct totemiba_instance *)context;
struct ibv_wc wc[64];
struct ibv_cq *ev_cq;
void *ev_ctx;
int res;
int i;
ibv_get_cq_event (instance->mcast_recv_completion_channel, &ev_cq, &ev_ctx);
ibv_ack_cq_events (ev_cq, 1);
res = ibv_req_notify_cq (ev_cq, 0);
res = ibv_poll_cq (instance->mcast_recv_cq, 64, wc);
if (res > 0) {
for (i = 0; i < res; i++) {
iba_deliver_fn (instance, wc[i].wr_id, wc[i].byte_len);
mcast_recv_buf_post (instance, wrid2void(wc[i].wr_id));
}
}
return (0);
}
static int mcast_rdma_event_fn (int events, int suck, void *context)
{
struct totemiba_instance *instance = (struct totemiba_instance *)context;
struct rdma_cm_event *event;
int res;
res = rdma_get_cm_event (instance->mcast_channel, &event);
if (res != 0) {
return (0);
}
switch (event->event) {
/*
* occurs when we resolve the multicast address
*/
case RDMA_CM_EVENT_ADDR_RESOLVED:
rdma_join_multicast (instance->mcast_cma_id, &instance->mcast_addr, instance);
break;
/*
* occurs when the CM joins the multicast group
*/
case RDMA_CM_EVENT_MULTICAST_JOIN:
instance->mcast_qpn = event->param.ud.qp_num;
instance->mcast_qkey = event->param.ud.qkey;
instance->mcast_ah = ibv_create_ah (instance->mcast_pd, &event->param.ud.ah_attr);
instance->totemiba_iface_change_fn (instance->rrp_context, &instance->my_id);
break;
case RDMA_CM_EVENT_ADDR_ERROR:
case RDMA_CM_EVENT_ROUTE_ERROR:
case RDMA_CM_EVENT_MULTICAST_ERROR:
log_printf (LOGSYS_LEVEL_ERROR, "multicast error\n");
break;
case RDMA_CM_EVENT_DEVICE_REMOVAL:
break;
default:
log_printf (LOGSYS_LEVEL_ERROR, "default %d\n", event->event);
break;
}
rdma_ack_cm_event (event);
return (0);
}
static int recv_token_cq_send_event_fn (hdb_handle_t poll_handle, int events, int suck, void *context)
{
struct totemiba_instance *instance = (struct totemiba_instance *)context;
struct ibv_wc wc[32];
struct ibv_cq *ev_cq;
void *ev_ctx;
int res;
int i;
ibv_get_cq_event (instance->recv_token_send_completion_channel, &ev_cq, &ev_ctx);
ibv_ack_cq_events (ev_cq, 1);
res = ibv_req_notify_cq (ev_cq, 0);
res = ibv_poll_cq (instance->recv_token_send_cq, 32, wc);
if (res > 0) {
for (i = 0; i < res; i++) {
iba_deliver_fn (instance, wc[i].wr_id, wc[i].byte_len);
ibv_dereg_mr (wrid2void(wc[i].wr_id));
}
}
return (0);
}
static int recv_token_cq_recv_event_fn (int events, int suck, void *context)
{
struct totemiba_instance *instance = (struct totemiba_instance *)context;
struct ibv_wc wc[32];
struct ibv_cq *ev_cq;
void *ev_ctx;
int res;
int i;
ibv_get_cq_event (instance->recv_token_recv_completion_channel, &ev_cq, &ev_ctx);
ibv_ack_cq_events (ev_cq, 1);
res = ibv_req_notify_cq (ev_cq, 0);
res = ibv_poll_cq (instance->recv_token_recv_cq, 32, wc);
if (res > 0) {
for (i = 0; i < res; i++) {
iba_deliver_fn (instance, wc[i].wr_id, wc[i].byte_len);
recv_token_recv_buf_post (instance, wrid2void(wc[i].wr_id));
}
}
return (0);
}
static int recv_token_accept_destroy (struct totemiba_instance *instance)
{
if (instance->recv_token_accepted == 0) {
return (0);
}
rdma_destroy_qp (instance->recv_token_cma_id);
recv_token_recv_buf_post_destroy (instance);
ibv_destroy_cq (instance->recv_token_send_cq);
ibv_destroy_cq (instance->recv_token_recv_cq);
ibv_destroy_comp_channel (instance->recv_token_send_completion_channel);
ibv_destroy_comp_channel (instance->recv_token_recv_completion_channel);
ibv_dealloc_pd (instance->recv_token_pd);
rdma_destroy_id (instance->recv_token_cma_id);
qb_loop_poll_del (
instance->totemiba_poll_handle,
instance->recv_token_recv_completion_channel->fd);
qb_loop_poll_del (
instance->totemiba_poll_handle,
instance->recv_token_send_completion_channel->fd);
return (0);
}
static int recv_token_accept_setup (struct totemiba_instance *instance)
{
struct ibv_qp_init_attr init_qp_attr;
int res = 0;
/*
* Allocate the protection domain
*/
instance->recv_token_pd = ibv_alloc_pd (instance->recv_token_cma_id->verbs);
/*
* Create a completion channel
*/
instance->recv_token_recv_completion_channel = ibv_create_comp_channel (instance->recv_token_cma_id->verbs);
if (instance->recv_token_recv_completion_channel == NULL) {
log_printf (LOGSYS_LEVEL_ERROR, "couldn't create completion channel\n");
return (-1);
}
/*
* Create the completion queue
*/
instance->recv_token_recv_cq = ibv_create_cq (instance->recv_token_cma_id->verbs,
COMPLETION_QUEUE_ENTRIES, instance,
instance->recv_token_recv_completion_channel, 0);
if (instance->recv_token_recv_cq == NULL) {
log_printf (LOGSYS_LEVEL_ERROR, "couldn't create completion queue\n");
return (-1);
}
res = ibv_req_notify_cq (instance->recv_token_recv_cq, 0);
if (res != 0) {
log_printf (LOGSYS_LEVEL_ERROR, "couldn't request notifications of the completion queue\n");
return (-1);
}
/*
* Create a completion channel
*/
instance->recv_token_send_completion_channel = ibv_create_comp_channel (instance->recv_token_cma_id->verbs);
if (instance->recv_token_send_completion_channel == NULL) {
log_printf (LOGSYS_LEVEL_ERROR, "couldn't create completion channel\n");
return (-1);
}
/*
* Create the completion queue
*/
instance->recv_token_send_cq = ibv_create_cq (instance->recv_token_cma_id->verbs,
COMPLETION_QUEUE_ENTRIES, instance,
instance->recv_token_send_completion_channel, 0);
if (instance->recv_token_send_cq == NULL) {
log_printf (LOGSYS_LEVEL_ERROR, "couldn't create completion queue\n");
return (-1);
}
res = ibv_req_notify_cq (instance->recv_token_send_cq, 0);
if (res != 0) {
log_printf (LOGSYS_LEVEL_ERROR, "couldn't request notifications of the completion queue\n");
return (-1);
}
memset (&init_qp_attr, 0, sizeof (struct ibv_qp_init_attr));
init_qp_attr.cap.max_send_wr = 50;
init_qp_attr.cap.max_recv_wr = TOTAL_READ_POSTS;
init_qp_attr.cap.max_send_sge = 1;
init_qp_attr.cap.max_recv_sge = 1;
init_qp_attr.qp_context = instance;
init_qp_attr.sq_sig_all = 0;
init_qp_attr.qp_type = IBV_QPT_UD;
init_qp_attr.send_cq = instance->recv_token_send_cq;
init_qp_attr.recv_cq = instance->recv_token_recv_cq;
res = rdma_create_qp (instance->recv_token_cma_id, instance->recv_token_pd,
&init_qp_attr);
if (res != 0) {
log_printf (LOGSYS_LEVEL_ERROR, "couldn't create queue pair\n");
return (-1);
}
recv_token_recv_buf_post_initial (instance);
qb_loop_poll_add (
instance->totemiba_poll_handle,
QB_LOOP_MED,
instance->recv_token_recv_completion_channel->fd,
POLLIN, instance, recv_token_cq_recv_event_fn);
qb_loop_poll_add (
instance->totemiba_poll_handle,
QB_LOOP_MED,
instance->recv_token_send_completion_channel->fd,
POLLIN, instance, recv_token_cq_send_event_fn);
instance->recv_token_accepted = 1;
return (res);
};
static int recv_token_rdma_event_fn (int events, int suck, void *context)
{
struct totemiba_instance *instance = (struct totemiba_instance *)context;
struct rdma_cm_event *event;
struct rdma_conn_param conn_param;
int res;
res = rdma_get_cm_event (instance->listen_recv_token_channel, &event);
if (res != 0) {
return (0);
}
switch (event->event) {
case RDMA_CM_EVENT_CONNECT_REQUEST:
recv_token_accept_destroy (instance);
instance->recv_token_cma_id = event->id;
recv_token_accept_setup (instance);
memset (&conn_param, 0, sizeof (struct rdma_conn_param));
conn_param.qp_num = instance->recv_token_cma_id->qp->qp_num;
res = rdma_accept (instance->recv_token_cma_id, &conn_param);
break;
default:
log_printf (LOGSYS_LEVEL_ERROR, "default %d\n", event->event);
break;
}
res = rdma_ack_cm_event (event);
return (0);
}
static int send_token_cq_send_event_fn (int events, int suck, void *context)
{
struct totemiba_instance *instance = (struct totemiba_instance *)context;
struct ibv_wc wc[32];
struct ibv_cq *ev_cq;
void *ev_ctx;
int res;
int i;
ibv_get_cq_event (instance->send_token_send_completion_channel, &ev_cq, &ev_ctx);
ibv_ack_cq_events (ev_cq, 1);
res = ibv_req_notify_cq (ev_cq, 0);
res = ibv_poll_cq (instance->send_token_send_cq, 32, wc);
if (res > 0) {
for (i = 0; i < res; i++) {
token_send_buf_put (instance, wrid2void(wc[i].wr_id));
}
}
return (0);
}
static int send_token_cq_recv_event_fn (int events, int suck, void *context)
{
struct totemiba_instance *instance = (struct totemiba_instance *)context;
struct ibv_wc wc[32];
struct ibv_cq *ev_cq;
void *ev_ctx;
int res;
int i;
ibv_get_cq_event (instance->send_token_recv_completion_channel, &ev_cq, &ev_ctx);
ibv_ack_cq_events (ev_cq, 1);
res = ibv_req_notify_cq (ev_cq, 0);
res = ibv_poll_cq (instance->send_token_recv_cq, 32, wc);
if (res > 0) {
for (i = 0; i < res; i++) {
iba_deliver_fn (instance, wc[i].wr_id, wc[i].byte_len);
}
}
return (0);
}
static int send_token_rdma_event_fn (int events, int suck, void *context)
{
struct totemiba_instance *instance = (struct totemiba_instance *)context;
struct rdma_cm_event *event;
struct rdma_conn_param conn_param;
int res;
res = rdma_get_cm_event (instance->send_token_channel, &event);
if (res != 0) {
return (0);
}
switch (event->event) {
/*
* occurs when we resolve the multicast address
*/
case RDMA_CM_EVENT_ADDR_RESOLVED:
res = rdma_resolve_route (instance->send_token_cma_id, 2000);
break;
/*
* occurs when the CM joins the multicast group
*/
case RDMA_CM_EVENT_ROUTE_RESOLVED:
memset (&conn_param, 0, sizeof (struct rdma_conn_param));
conn_param.private_data = NULL;
conn_param.private_data_len = 0;
res = rdma_connect (instance->send_token_cma_id, &conn_param);
break;
case RDMA_CM_EVENT_ESTABLISHED:
instance->send_token_qpn = event->param.ud.qp_num;
instance->send_token_qkey = event->param.ud.qkey;
instance->send_token_ah = ibv_create_ah (instance->send_token_pd, &event->param.ud.ah_attr);
instance->totemiba_target_set_completed (instance->rrp_context);
break;
case RDMA_CM_EVENT_ADDR_ERROR:
case RDMA_CM_EVENT_ROUTE_ERROR:
case RDMA_CM_EVENT_MULTICAST_ERROR:
log_printf (LOGSYS_LEVEL_ERROR,
"send_token_rdma_event_fn multicast error\n");
break;
case RDMA_CM_EVENT_DEVICE_REMOVAL:
break;
case RDMA_CM_EVENT_UNREACHABLE:
log_printf (LOGSYS_LEVEL_ERROR,
"send_token_rdma_event_fn unreachable\n");
break;
default:
log_printf (LOGSYS_LEVEL_ERROR,
"send_token_rdma_event_fn unknown event %d\n",
event->event);
break;
}
rdma_ack_cm_event (event);
return (0);
}
static int send_token_bind (struct totemiba_instance *instance)
{
int res;
struct ibv_qp_init_attr init_qp_attr;
instance->send_token_channel = rdma_create_event_channel();
if (instance->send_token_channel == NULL) {
log_printf (LOGSYS_LEVEL_ERROR, "couldn't create rdma channel\n");
return (-1);
}
res = rdma_create_id (instance->send_token_channel,
&instance->send_token_cma_id, NULL, RDMA_PS_UDP);
if (res) {
log_printf (LOGSYS_LEVEL_ERROR, "error creating send_token_cma_id\n");
return (-1);
}
res = rdma_bind_addr (instance->send_token_cma_id,
&instance->send_token_bind_addr);
if (res) {
log_printf (LOGSYS_LEVEL_ERROR, "error doing rdma_bind_addr for send token\n");
return (-1);
}
/*
* Resolve the send_token address into a GUID
*/
res = rdma_resolve_addr (instance->send_token_cma_id,
&instance->bind_addr, &instance->token_addr, 2000);
if (res) {
log_printf (LOGSYS_LEVEL_ERROR, "error resolving send token address %d %d\n", res, errno);
return (-1);
}
/*
* Allocate the protection domain
*/
instance->send_token_pd = ibv_alloc_pd (instance->send_token_cma_id->verbs);
/*
* Create a completion channel
*/
instance->send_token_recv_completion_channel = ibv_create_comp_channel (instance->send_token_cma_id->verbs);
if (instance->send_token_recv_completion_channel == NULL) {
log_printf (LOGSYS_LEVEL_ERROR, "couldn't create completion channel\n");
return (-1);
}
/*
* Create the completion queue
*/
instance->send_token_recv_cq = ibv_create_cq (instance->send_token_cma_id->verbs,
COMPLETION_QUEUE_ENTRIES, instance,
instance->send_token_recv_completion_channel, 0);
if (instance->send_token_recv_cq == NULL) {
log_printf (LOGSYS_LEVEL_ERROR, "couldn't create completion queue\n");
return (-1);
}
res = ibv_req_notify_cq (instance->send_token_recv_cq, 0);
if (res != 0) {
log_printf (LOGSYS_LEVEL_ERROR,
"couldn't request notifications of the completion queue\n");
return (-1);
}
/*
* Create a completion channel
*/
instance->send_token_send_completion_channel =
ibv_create_comp_channel (instance->send_token_cma_id->verbs);
if (instance->send_token_send_completion_channel == NULL) {
log_printf (LOGSYS_LEVEL_ERROR, "couldn't create completion channel\n");
return (-1);
}
/*
* Create the completion queue
*/
instance->send_token_send_cq = ibv_create_cq (
instance->send_token_cma_id->verbs,
COMPLETION_QUEUE_ENTRIES, instance,
instance->send_token_send_completion_channel, 0);
if (instance->send_token_send_cq == NULL) {
log_printf (LOGSYS_LEVEL_ERROR, "couldn't create completion queue\n");
return (-1);
}
res = ibv_req_notify_cq (instance->send_token_send_cq, 0);
if (res != 0) {
log_printf (LOGSYS_LEVEL_ERROR,
"couldn't request notifications of the completion queue\n");
return (-1);
}
memset (&init_qp_attr, 0, sizeof (struct ibv_qp_init_attr));
init_qp_attr.cap.max_send_wr = 50;
init_qp_attr.cap.max_recv_wr = TOTAL_READ_POSTS;
init_qp_attr.cap.max_send_sge = 1;
init_qp_attr.cap.max_recv_sge = 1;
init_qp_attr.qp_context = instance;
init_qp_attr.sq_sig_all = 0;
init_qp_attr.qp_type = IBV_QPT_UD;
init_qp_attr.send_cq = instance->send_token_send_cq;
init_qp_attr.recv_cq = instance->send_token_recv_cq;
res = rdma_create_qp (instance->send_token_cma_id,
instance->send_token_pd, &init_qp_attr);
if (res != 0) {
log_printf (LOGSYS_LEVEL_ERROR, "couldn't create queue pair\n");
return (-1);
}
qb_loop_poll_add (
instance->totemiba_poll_handle,
QB_LOOP_MED,
instance->send_token_recv_completion_channel->fd,
POLLIN, instance, send_token_cq_recv_event_fn);
qb_loop_poll_add (
instance->totemiba_poll_handle,
QB_LOOP_MED,
instance->send_token_send_completion_channel->fd,
POLLIN, instance, send_token_cq_send_event_fn);
qb_loop_poll_add (
instance->totemiba_poll_handle,
QB_LOOP_MED,
instance->send_token_channel->fd,
POLLIN, instance, send_token_rdma_event_fn);
instance->send_token_bound = 1;
return (0);
}
static int send_token_unbind (struct totemiba_instance *instance)
{
if (instance->send_token_bound == 0) {
return (0);
}
qb_loop_poll_del (
instance->totemiba_poll_handle,
instance->send_token_recv_completion_channel->fd);
qb_loop_poll_del (
instance->totemiba_poll_handle,
instance->send_token_send_completion_channel->fd);
qb_loop_poll_del (
instance->totemiba_poll_handle,
instance->send_token_channel->fd);
rdma_destroy_qp (instance->send_token_cma_id);
ibv_destroy_cq (instance->send_token_send_cq);
ibv_destroy_cq (instance->send_token_recv_cq);
ibv_destroy_comp_channel (instance->send_token_send_completion_channel);
ibv_destroy_comp_channel (instance->send_token_recv_completion_channel);
token_send_buf_destroy (instance);
ibv_dealloc_pd (instance->send_token_pd);
rdma_destroy_id (instance->send_token_cma_id);
rdma_destroy_event_channel (instance->send_token_channel);
return (0);
}
static int recv_token_bind (struct totemiba_instance *instance)
{
int res;
instance->listen_recv_token_channel = rdma_create_event_channel();
if (instance->listen_recv_token_channel == NULL) {
log_printf (LOGSYS_LEVEL_ERROR, "couldn't create rdma channel\n");
return (-1);
}
res = rdma_create_id (instance->listen_recv_token_channel,
&instance->listen_recv_token_cma_id, NULL, RDMA_PS_UDP);
if (res) {
log_printf (LOGSYS_LEVEL_ERROR, "error creating recv_token_cma_id\n");
return (-1);
}
res = rdma_bind_addr (instance->listen_recv_token_cma_id,
&instance->bind_addr);
if (res) {
log_printf (LOGSYS_LEVEL_ERROR, "error doing rdma_bind_addr for recv token\n");
return (-1);
}
/*
* Resolve the recv_token address into a GUID
*/
res = rdma_listen (instance->listen_recv_token_cma_id, 10);
if (res) {
log_printf (LOGSYS_LEVEL_ERROR, "error listening %d %d\n", res, errno);
return (-1);
}
qb_loop_poll_add (
instance->totemiba_poll_handle,
QB_LOOP_MED,
instance->listen_recv_token_channel->fd,
POLLIN, instance, recv_token_rdma_event_fn);
return (0);
}
static int mcast_bind (struct totemiba_instance *instance)
{
int res;
struct ibv_qp_init_attr init_qp_attr;
instance->mcast_channel = rdma_create_event_channel();
if (instance->mcast_channel == NULL) {
log_printf (LOGSYS_LEVEL_ERROR, "couldn't create rdma channel\n");
return (-1);
}
res = rdma_create_id (instance->mcast_channel, &instance->mcast_cma_id, NULL, RDMA_PS_UDP);
if (res) {
log_printf (LOGSYS_LEVEL_ERROR, "error creating mcast_cma_id\n");
return (-1);
}
res = rdma_bind_addr (instance->mcast_cma_id, &instance->local_mcast_bind_addr);
if (res) {
log_printf (LOGSYS_LEVEL_ERROR, "error doing rdma_bind_addr for mcast\n");
return (-1);
}
/*
* Resolve the multicast address into a GUID
*/
res = rdma_resolve_addr (instance->mcast_cma_id, &instance->local_mcast_bind_addr,
&instance->mcast_addr, 5000);
if (res) {
log_printf (LOGSYS_LEVEL_ERROR, "error resolving multicast address %d %d\n", res, errno);
return (-1);
}
/*
* Allocate the protection domain
*/
instance->mcast_pd = ibv_alloc_pd (instance->mcast_cma_id->verbs);
/*
* Create a completion channel
*/
instance->mcast_recv_completion_channel = ibv_create_comp_channel (instance->mcast_cma_id->verbs);
if (instance->mcast_recv_completion_channel == NULL) {
log_printf (LOGSYS_LEVEL_ERROR, "couldn't create completion channel\n");
return (-1);
}
/*
* Create the completion queue
*/
instance->mcast_recv_cq = ibv_create_cq (instance->mcast_cma_id->verbs,
COMPLETION_QUEUE_ENTRIES, instance,
instance->mcast_recv_completion_channel, 0);
if (instance->mcast_recv_cq == NULL) {
log_printf (LOGSYS_LEVEL_ERROR, "couldn't create completion queue\n");
return (-1);
}
res = ibv_req_notify_cq (instance->mcast_recv_cq, 0);
if (res != 0) {
log_printf (LOGSYS_LEVEL_ERROR, "couldn't request notifications of the completion queue\n");
return (-1);
}
/*
* Create a completion channel
*/
instance->mcast_send_completion_channel = ibv_create_comp_channel (instance->mcast_cma_id->verbs);
if (instance->mcast_send_completion_channel == NULL) {
log_printf (LOGSYS_LEVEL_ERROR, "couldn't create completion channel\n");
return (-1);
}
/*
* Create the completion queue
*/
instance->mcast_send_cq = ibv_create_cq (instance->mcast_cma_id->verbs,
COMPLETION_QUEUE_ENTRIES, instance,
instance->mcast_send_completion_channel, 0);
if (instance->mcast_send_cq == NULL) {
log_printf (LOGSYS_LEVEL_ERROR, "couldn't create completion queue\n");
return (-1);
}
res = ibv_req_notify_cq (instance->mcast_send_cq, 0);
if (res != 0) {
log_printf (LOGSYS_LEVEL_ERROR, "couldn't request notifications of the completion queue\n");
return (-1);
}
memset (&init_qp_attr, 0, sizeof (struct ibv_qp_init_attr));
init_qp_attr.cap.max_send_wr = 50;
init_qp_attr.cap.max_recv_wr = TOTAL_READ_POSTS;
init_qp_attr.cap.max_send_sge = 1;
init_qp_attr.cap.max_recv_sge = 1;
init_qp_attr.qp_context = instance;
init_qp_attr.sq_sig_all = 0;
init_qp_attr.qp_type = IBV_QPT_UD;
init_qp_attr.send_cq = instance->mcast_send_cq;
init_qp_attr.recv_cq = instance->mcast_recv_cq;
res = rdma_create_qp (instance->mcast_cma_id, instance->mcast_pd,
&init_qp_attr);
if (res != 0) {
log_printf (LOGSYS_LEVEL_ERROR, "couldn't create queue pair\n");
return (-1);
}
mcast_recv_buf_post_initial (instance);
qb_loop_poll_add (
instance->totemiba_poll_handle,
QB_LOOP_MED,
instance->mcast_recv_completion_channel->fd,
POLLIN, instance, mcast_cq_recv_event_fn);
qb_loop_poll_add (
instance->totemiba_poll_handle,
QB_LOOP_MED,
instance->mcast_send_completion_channel->fd,
POLLIN, instance, mcast_cq_send_event_fn);
qb_loop_poll_add (
instance->totemiba_poll_handle,
QB_LOOP_MED,
instance->mcast_channel->fd,
POLLIN, instance, mcast_rdma_event_fn);
return (0);
}
static void timer_function_netif_check_timeout (
void *data)
{
struct totemiba_instance *instance = (struct totemiba_instance *)data;
int res;
int interface_up;
int interface_num;
int addr_len;
totemip_iface_check (&instance->totem_interface->bindnet,
&instance->totem_interface->boundto, &interface_up, &interface_num, instance->totem_config->clear_node_high_bit);
totemip_totemip_to_sockaddr_convert(&instance->totem_interface->boundto,
instance->totem_interface->ip_port, (struct sockaddr_storage *)&instance->bind_addr,
&addr_len);
totemip_totemip_to_sockaddr_convert(&instance->totem_interface->boundto,
0, (struct sockaddr_storage *)&instance->send_token_bind_addr,
&addr_len);
totemip_totemip_to_sockaddr_convert(&instance->totem_interface->boundto,
0, (struct sockaddr_storage *)&instance->local_mcast_bind_addr,
&addr_len);
totemip_totemip_to_sockaddr_convert(&instance->totem_interface->boundto,
instance->totem_interface->ip_port, (struct sockaddr_storage *)&instance->my_id,
&addr_len);
totemip_sockaddr_to_totemip_convert(
(const struct sockaddr_storage *)&instance->bind_addr,
&instance->my_id);
memcpy (&instance->my_id, &instance->totem_interface->boundto,
sizeof (struct totem_ip_address));
totemip_totemip_to_sockaddr_convert(&instance->totem_interface->mcast_addr,
instance->totem_interface->ip_port,
(struct sockaddr_storage *)&instance->mcast_addr, &addr_len);
res = recv_token_bind (instance);
res = mcast_bind (instance);
}
int totemiba_crypto_set (
void *iba_context,
unsigned int type)
{
struct totemiba_instance *instance = (struct totemiba_instance *)iba_context;
int res = 0;
instance = NULL;
return (res);
}
int totemiba_finalize (
void *iba_context)
{
struct totemiba_instance *instance = (struct totemiba_instance *)iba_context;
int res = 0;
instance = NULL;
return (res);
}
/*
* Create an instance
*/
int totemiba_initialize (
qb_loop_t *qb_poll_handle,
void **iba_context,
struct totem_config *totem_config,
int interface_no,
void *context,
void (*deliver_fn) (
void *context,
const void *msg,
unsigned int msg_len),
void (*iface_change_fn) (
void *context,
const struct totem_ip_address *iface_address),
void (*target_set_completed) (
void *context))
{
struct totemiba_instance *instance;
int res = 0;
instance = malloc (sizeof (struct totemiba_instance));
if (instance == NULL) {
return (-1);
}
totemiba_instance_initialize (instance);
instance->totem_interface = &totem_config->interfaces[interface_no];
instance->totemiba_poll_handle = qb_poll_handle;
instance->totemiba_deliver_fn = deliver_fn;
instance->totemiba_target_set_completed = target_set_completed;
instance->totemiba_iface_change_fn = iface_change_fn;
instance->totem_config = totem_config;
instance->rrp_context = context;
qb_loop_timer_add (instance->totemiba_poll_handle,
QB_LOOP_MED,
100*QB_TIME_NS_IN_NSEC,
(void *)instance,
timer_function_netif_check_timeout,
&instance->timer_netif_check_timeout);
instance->totemiba_subsys_id = totem_config->totem_logging_configuration.log_subsys_id;
instance->totemiba_log_printf = totem_config->totem_logging_configuration.log_printf;
*iba_context = instance;
return (res);
}
void *totemiba_buffer_alloc (void)
{
return malloc (MAX_MTU_SIZE);
}
void totemiba_buffer_release (void *ptr)
{
return free (ptr);
}
int totemiba_processor_count_set (
void *iba_context,
int processor_count)
{
struct totemiba_instance *instance = (struct totemiba_instance *)iba_context;
int res = 0;
instance = NULL;
return (res);
}
int totemiba_recv_flush (void *iba_context)
{
struct totemiba_instance *instance = (struct totemiba_instance *)iba_context;
int res = 0;
instance = NULL;
return (res);
}
int totemiba_send_flush (void *iba_context)
{
struct totemiba_instance *instance = (struct totemiba_instance *)iba_context;
int res = 0;
instance = NULL;
return (res);
}
int totemiba_token_send (
void *iba_context,
const void *ms,
unsigned int msg_len)
{
struct totemiba_instance *instance = (struct totemiba_instance *)iba_context;
int res = 0;
struct ibv_send_wr send_wr, *failed_send_wr;
struct ibv_sge sge;
void *msg;
struct send_buf *send_buf;
send_buf = token_send_buf_get (instance);
if (send_buf == NULL) {
return (-1);
}
msg = send_buf->buffer;
memcpy (msg, ms, msg_len);
send_wr.next = NULL;
send_wr.sg_list = &sge;
send_wr.num_sge = 1;
send_wr.opcode = IBV_WR_SEND;
send_wr.send_flags = IBV_SEND_SIGNALED;
send_wr.wr_id = void2wrid(send_buf);
send_wr.imm_data = 0;
send_wr.wr.ud.ah = instance->send_token_ah;
send_wr.wr.ud.remote_qpn = instance->send_token_qpn;
send_wr.wr.ud.remote_qkey = instance->send_token_qkey;
sge.length = msg_len;
sge.lkey = send_buf->mr->lkey;
sge.addr = (uintptr_t)msg;
res = ibv_post_send (instance->send_token_cma_id->qp, &send_wr, &failed_send_wr);
return (res);
}
int totemiba_mcast_flush_send (
void *iba_context,
const void *ms,
unsigned int msg_len)
{
struct totemiba_instance *instance = (struct totemiba_instance *)iba_context;
int res = 0;
struct ibv_send_wr send_wr, *failed_send_wr;
struct ibv_sge sge;
void *msg;
struct send_buf *send_buf;
send_buf = mcast_send_buf_get (instance);
if (send_buf == NULL) {
return (-1);
}
msg = send_buf->buffer;
memcpy (msg, ms, msg_len);
send_wr.next = NULL;
send_wr.sg_list = &sge;
send_wr.num_sge = 1;
send_wr.opcode = IBV_WR_SEND;
send_wr.send_flags = IBV_SEND_SIGNALED;
send_wr.wr_id = void2wrid(send_buf);
send_wr.imm_data = 0;
send_wr.wr.ud.ah = instance->mcast_ah;
send_wr.wr.ud.remote_qpn = instance->mcast_qpn;
send_wr.wr.ud.remote_qkey = instance->mcast_qkey;
sge.length = msg_len;
sge.lkey = send_buf->mr->lkey;
sge.addr = (uintptr_t)msg;
res = ibv_post_send (instance->mcast_cma_id->qp, &send_wr, &failed_send_wr);
return (res);
}
int totemiba_mcast_noflush_send (
void *iba_context,
const void *ms,
unsigned int msg_len)
{
struct totemiba_instance *instance = (struct totemiba_instance *)iba_context;
int res = 0;
struct ibv_send_wr send_wr, *failed_send_wr;
struct ibv_sge sge;
void *msg;
struct send_buf *send_buf;
send_buf = mcast_send_buf_get (instance);
if (send_buf == NULL) {
return (-1);
}
msg = send_buf->buffer;
memcpy (msg, ms, msg_len);
send_wr.next = NULL;
send_wr.sg_list = &sge;
send_wr.num_sge = 1;
send_wr.opcode = IBV_WR_SEND;
send_wr.send_flags = IBV_SEND_SIGNALED;
send_wr.wr_id = void2wrid(send_buf);
send_wr.imm_data = 0;
send_wr.wr.ud.ah = instance->mcast_ah;
send_wr.wr.ud.remote_qpn = instance->mcast_qpn;
send_wr.wr.ud.remote_qkey = instance->mcast_qkey;
sge.length = msg_len;
sge.lkey = send_buf->mr->lkey;
sge.addr = (uintptr_t)msg;
res = ibv_post_send (instance->mcast_cma_id->qp, &send_wr, &failed_send_wr);
return (res);
}
extern int totemiba_iface_check (void *iba_context)
{
struct totemiba_instance *instance = (struct totemiba_instance *)iba_context;
int res = 0;
instance = NULL;
return (res);
}
extern void totemiba_net_mtu_adjust (void *iba_context, struct totem_config *totem_config)
{
struct totemiba_instance *instance = (struct totemiba_instance *)iba_context;
instance = NULL;
}
const char *totemiba_iface_print (void *iba_context) {
struct totemiba_instance *instance = (struct totemiba_instance *)iba_context;
const char *ret_char;
ret_char = totemip_print (&instance->my_id);
return (ret_char);
}
int totemiba_iface_get (
void *iba_context,
struct totem_ip_address *addr)
{
struct totemiba_instance *instance = (struct totemiba_instance *)iba_context;
int res = 0;
memcpy (addr, &instance->my_id, sizeof (struct totem_ip_address));
return (res);
}
int totemiba_token_target_set (
void *iba_context,
const struct totem_ip_address *token_target)
{
struct totemiba_instance *instance = (struct totemiba_instance *)iba_context;
int res = 0;
int addr_len = 16;
totemip_totemip_to_sockaddr_convert((struct totem_ip_address *)token_target,
instance->totem_interface->ip_port, (struct sockaddr_storage *)&instance->token_addr,
&addr_len);
res = send_token_unbind (instance);
res = send_token_bind (instance);
return (res);
}
extern int totemiba_recv_mcast_empty (
void *iba_context)
{
struct totemiba_instance *instance = (struct totemiba_instance *)iba_context;
int res = 0;
instance = NULL;
return (res);
}
diff --git a/exec/totemnet.c b/exec/totemnet.c
index 3c87e9a5..9b211f7c 100644
--- a/exec/totemnet.c
+++ b/exec/totemnet.c
@@ -1,489 +1,489 @@
/*
* Copyright (c) 2005 MontaVista Software, Inc.
* Copyright (c) 2006-2009 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Steven Dake (sdake@redhat.com)
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <config.h>
#include <assert.h>
#ifdef HAVE_RDMA
#include <totemiba.h>
#endif
#include <totemudp.h>
#include <totemudpu.h>
#include <totemnet.h>
#include <qb/qbloop.h>
#define LOGSYS_UTILS_ONLY 1
#include <corosync/engine/logsys.h>
struct transport {
const char *name;
int (*initialize) (
qb_loop_t *loop_pt,
void **transport_instance,
struct totem_config *totem_config,
int interface_no,
void *context,
void (*deliver_fn) (
void *context,
const void *msg,
unsigned int msg_len),
void (*iface_change_fn) (
void *context,
const struct totem_ip_address *iface_address),
void (*target_set_completed) (
void *context));
void *(*buffer_alloc) (void);
void (*buffer_release) (void *ptr);
int (*processor_count_set) (
void *transport_context,
int processor_count);
int (*token_send) (
void *transport_context,
const void *msg,
unsigned int msg_len);
int (*mcast_flush_send) (
void *transport_context,
const void *msg,
unsigned int msg_len);
int (*mcast_noflush_send) (
void *transport_context,
const void *msg,
unsigned int msg_len);
int (*recv_flush) (void *transport_context);
int (*send_flush) (void *transport_context);
int (*iface_check) (void *transport_context);
int (*finalize) (void *transport_context);
void (*net_mtu_adjust) (void *transport_context, struct totem_config *totem_config);
const char *(*iface_print) (void *transport_context);
int (*iface_get) (
void *transport_context,
struct totem_ip_address *addr);
int (*token_target_set) (
void *transport_context,
const struct totem_ip_address *token_target);
int (*crypto_set) (
void *transport_context,
unsigned int type);
int (*recv_mcast_empty) (
void *transport_context);
int (*member_add) (
void *transport_context,
const struct totem_ip_address *member);
int (*member_remove) (
void *transport_context,
const struct totem_ip_address *member);
};
struct transport transport_entries[] = {
{
.name = "UDP/IP Multicast",
.initialize = totemudp_initialize,
.buffer_alloc = totemudp_buffer_alloc,
.buffer_release = totemudp_buffer_release,
.processor_count_set = totemudp_processor_count_set,
.token_send = totemudp_token_send,
.mcast_flush_send = totemudp_mcast_flush_send,
.mcast_noflush_send = totemudp_mcast_noflush_send,
.recv_flush = totemudp_recv_flush,
.send_flush = totemudp_send_flush,
.iface_check = totemudp_iface_check,
.finalize = totemudp_finalize,
.net_mtu_adjust = totemudp_net_mtu_adjust,
.iface_print = totemudp_iface_print,
.iface_get = totemudp_iface_get,
.token_target_set = totemudp_token_target_set,
.crypto_set = totemudp_crypto_set,
.recv_mcast_empty = totemudp_recv_mcast_empty
},
{
.name = "UDP/IP Unicast",
.initialize = totemudpu_initialize,
.buffer_alloc = totemudpu_buffer_alloc,
.buffer_release = totemudpu_buffer_release,
.processor_count_set = totemudpu_processor_count_set,
.token_send = totemudpu_token_send,
.mcast_flush_send = totemudpu_mcast_flush_send,
.mcast_noflush_send = totemudpu_mcast_noflush_send,
.recv_flush = totemudpu_recv_flush,
.send_flush = totemudpu_send_flush,
.iface_check = totemudpu_iface_check,
.finalize = totemudpu_finalize,
.net_mtu_adjust = totemudpu_net_mtu_adjust,
.iface_print = totemudpu_iface_print,
.iface_get = totemudpu_iface_get,
.token_target_set = totemudpu_token_target_set,
.crypto_set = totemudpu_crypto_set,
.recv_mcast_empty = totemudpu_recv_mcast_empty,
.member_add = totemudpu_member_add,
.member_remove = totemudpu_member_remove
},
#ifdef HAVE_RDMA
{
.name = "Infiniband/IP",
.initialize = totemiba_initialize,
.buffer_alloc = totemiba_buffer_alloc,
.buffer_release = totemiba_buffer_release,
.processor_count_set = totemiba_processor_count_set,
.token_send = totemiba_token_send,
.mcast_flush_send = totemiba_mcast_flush_send,
.mcast_noflush_send = totemiba_mcast_noflush_send,
.recv_flush = totemiba_recv_flush,
.send_flush = totemiba_send_flush,
.iface_check = totemiba_iface_check,
.finalize = totemiba_finalize,
.net_mtu_adjust = totemiba_net_mtu_adjust,
.iface_print = totemiba_iface_print,
.iface_get = totemiba_iface_get,
.token_target_set = totemiba_token_target_set,
.crypto_set = totemiba_crypto_set,
.recv_mcast_empty = totemiba_recv_mcast_empty
}
#endif
};
struct totemnet_instance {
void *transport_context;
struct transport *transport;
void (*totemnet_log_printf) (
- unsigned int rec_ident,
+ int level,
+ int subsys,
const char *function,
const char *file,
int line,
const char *format,
- ...)__attribute__((format(printf, 5, 6)));
+ ...)__attribute__((format(printf, 6, 7)));
int totemnet_subsys_id;
};
#define log_printf(level, format, args...) \
do { \
instance->totemnet_log_printf ( \
- LOGSYS_ENCODE_RECID(level, \
+ level, \
instance->totemnet_subsys_id, \
- LOGSYS_RECID_LOG), \
__FUNCTION__, __FILE__, __LINE__, \
(const char *)format, ##args); \
} while (0);
static void totemnet_instance_initialize (
struct totemnet_instance *instance,
struct totem_config *config)
{
int transport;
instance->totemnet_log_printf = config->totem_logging_configuration.log_printf;
instance->totemnet_subsys_id = config->totem_logging_configuration.log_subsys_id;
transport = config->transport_number;
log_printf (LOGSYS_LEVEL_NOTICE,
"Initializing transport (%s).\n", transport_entries[transport].name);
instance->transport = &transport_entries[transport];
}
int totemnet_crypto_set (
void *net_context,
unsigned int type)
{
struct totemnet_instance *instance = (struct totemnet_instance *)net_context;
int res = 0;
res = instance->transport->crypto_set (instance->transport_context, type);
return res;
}
int totemnet_finalize (
void *net_context)
{
struct totemnet_instance *instance = (struct totemnet_instance *)net_context;
int res = 0;
res = instance->transport->finalize (instance->transport_context);
return (res);
}
int totemnet_initialize (
qb_loop_t *loop_pt,
void **net_context,
struct totem_config *totem_config,
int interface_no,
void *context,
void (*deliver_fn) (
void *context,
const void *msg,
unsigned int msg_len),
void (*iface_change_fn) (
void *context,
const struct totem_ip_address *iface_address),
void (*target_set_completed) (
void *context))
{
struct totemnet_instance *instance;
unsigned int res;
instance = malloc (sizeof (struct totemnet_instance));
if (instance == NULL) {
return (-1);
}
totemnet_instance_initialize (instance, totem_config);
res = instance->transport->initialize (loop_pt,
&instance->transport_context, totem_config,
interface_no, context, deliver_fn, iface_change_fn, target_set_completed);
if (res == -1) {
goto error_destroy;
}
*net_context = instance;
return (0);
error_destroy:
free (instance);
return (-1);
}
void *totemnet_buffer_alloc (void *net_context)
{
struct totemnet_instance *instance = net_context;
assert (instance != NULL);
assert (instance->transport != NULL);
return instance->transport->buffer_alloc();
}
void totemnet_buffer_release (void *net_context, void *ptr)
{
struct totemnet_instance *instance = net_context;
assert (instance != NULL);
assert (instance->transport != NULL);
instance->transport->buffer_release (ptr);
}
int totemnet_processor_count_set (
void *net_context,
int processor_count)
{
struct totemnet_instance *instance = (struct totemnet_instance *)net_context;
int res = 0;
res = instance->transport->processor_count_set (instance->transport_context, processor_count);
return (res);
}
int totemnet_recv_flush (void *net_context)
{
struct totemnet_instance *instance = (struct totemnet_instance *)net_context;
int res = 0;
res = instance->transport->recv_flush (instance->transport_context);
return (res);
}
int totemnet_send_flush (void *net_context)
{
struct totemnet_instance *instance = (struct totemnet_instance *)net_context;
int res = 0;
res = instance->transport->send_flush (instance->transport_context);
return (res);
}
int totemnet_token_send (
void *net_context,
const void *msg,
unsigned int msg_len)
{
struct totemnet_instance *instance = (struct totemnet_instance *)net_context;
int res = 0;
res = instance->transport->token_send (instance->transport_context, msg, msg_len);
return (res);
}
int totemnet_mcast_flush_send (
void *net_context,
const void *msg,
unsigned int msg_len)
{
struct totemnet_instance *instance = (struct totemnet_instance *)net_context;
int res = 0;
res = instance->transport->mcast_flush_send (instance->transport_context, msg, msg_len);
return (res);
}
int totemnet_mcast_noflush_send (
void *net_context,
const void *msg,
unsigned int msg_len)
{
struct totemnet_instance *instance = (struct totemnet_instance *)net_context;
int res = 0;
res = instance->transport->mcast_noflush_send (instance->transport_context, msg, msg_len);
return (res);
}
extern int totemnet_iface_check (void *net_context)
{
struct totemnet_instance *instance = (struct totemnet_instance *)net_context;
int res = 0;
res = instance->transport->iface_check (instance->transport_context);
return (res);
}
extern int totemnet_net_mtu_adjust (void *net_context, struct totem_config *totem_config)
{
struct totemnet_instance *instance = (struct totemnet_instance *)net_context;
int res = 0;
instance->transport->net_mtu_adjust (instance->transport_context, totem_config);
return (res);
}
const char *totemnet_iface_print (void *net_context) {
struct totemnet_instance *instance = (struct totemnet_instance *)net_context;
const char *ret_char;
ret_char = instance->transport->iface_print (instance->transport_context);
return (ret_char);
}
int totemnet_iface_get (
void *net_context,
struct totem_ip_address *addr)
{
struct totemnet_instance *instance = (struct totemnet_instance *)net_context;
unsigned int res;
res = instance->transport->iface_get (instance->transport_context, addr);
return (res);
}
int totemnet_token_target_set (
void *net_context,
const struct totem_ip_address *token_target)
{
struct totemnet_instance *instance = (struct totemnet_instance *)net_context;
unsigned int res;
res = instance->transport->token_target_set (instance->transport_context, token_target);
return (res);
}
extern int totemnet_recv_mcast_empty (
void *net_context)
{
struct totemnet_instance *instance = (struct totemnet_instance *)net_context;
unsigned int res;
res = instance->transport->recv_mcast_empty (instance->transport_context);
return (res);
}
extern int totemnet_member_add (
void *net_context,
const struct totem_ip_address *member)
{
struct totemnet_instance *instance = (struct totemnet_instance *)net_context;
unsigned int res = 0;
if (instance->transport->member_add) {
res = instance->transport->member_add (
instance->transport_context,
member);
}
return (res);
}
extern int totemnet_member_remove (
void *net_context,
const struct totem_ip_address *member)
{
struct totemnet_instance *instance = (struct totemnet_instance *)net_context;
unsigned int res = 0;
if (instance->transport->member_remove) {
res = instance->transport->member_remove (
instance->transport_context,
member);
}
return (res);
}
diff --git a/exec/totempg.c b/exec/totempg.c
index 36d5a448..33e3e2fd 100644
--- a/exec/totempg.c
+++ b/exec/totempg.c
@@ -1,1460 +1,1459 @@
/*
* Copyright (c) 2003-2005 MontaVista Software, Inc.
* Copyright (c) 2005 OSDL.
* Copyright (c) 2006-2009 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Steven Dake (sdake@redhat.com)
* Author: Mark Haverkamp (markh@osdl.org)
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
* FRAGMENTATION AND PACKING ALGORITHM:
*
* Assemble the entire message into one buffer
* if full fragment
* store fragment into lengths list
* for each full fragment
* multicast fragment
* set length and fragment fields of pg mesage
* store remaining multicast into head of fragmentation data and set lens field
*
* If a message exceeds the maximum packet size allowed by the totem
* single ring protocol, the protocol could lose forward progress.
* Statically calculating the allowed data amount doesn't work because
* the amount of data allowed depends on the number of fragments in
* each message. In this implementation, the maximum fragment size
* is dynamically calculated for each fragment added to the message.
* It is possible for a message to be two bytes short of the maximum
* packet size. This occurs when a message or collection of
* messages + the mcast header + the lens are two bytes short of the
* end of the packet. Since another len field consumes two bytes, the
* len field would consume the rest of the packet without room for data.
*
* One optimization would be to forgo the final len field and determine
* it from the size of the udp datagram. Then this condition would no
* longer occur.
*/
/*
* ASSEMBLY AND UNPACKING ALGORITHM:
*
* copy incoming packet into assembly data buffer indexed by current
* location of end of fragment
*
* if not fragmented
* deliver all messages in assembly data buffer
* else
* if msg_count > 1 and fragmented
* deliver all messages except last message in assembly data buffer
* copy last fragmented section to start of assembly data buffer
* else
* if msg_count = 1 and fragmented
* do nothing
*
*/
#include <config.h>
#ifdef HAVE_ALLOCA_H
#include <alloca.h>
#endif
#include <netinet/in.h>
#include <sys/uio.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <pthread.h>
#include <errno.h>
#include <limits.h>
#include <corosync/swab.h>
#include <corosync/hdb.h>
#include <corosync/list.h>
#include <qb/qbloop.h>
#include <qb/qbipcs.h>
#include <corosync/totem/totempg.h>
#define LOGSYS_UTILS_ONLY 1
#include <corosync/engine/logsys.h>
#include "totemmrp.h"
#include "totemsrp.h"
#define min(a,b) ((a) < (b)) ? a : b
struct totempg_mcast_header {
short version;
short type;
};
#if !(defined(__i386__) || defined(__x86_64__))
/*
* Need align on architectures different then i386 or x86_64
*/
#define TOTEMPG_NEED_ALIGN 1
#endif
/*
* totempg_mcast structure
*
* header: Identify the mcast.
* fragmented: Set if this message continues into next message
* continuation: Set if this message is a continuation from last message
* msg_count Indicates how many packed messages are contained
* in the mcast.
* Also, the size of each packed message and the messages themselves are
* appended to the end of this structure when sent.
*/
struct totempg_mcast {
struct totempg_mcast_header header;
unsigned char fragmented;
unsigned char continuation;
unsigned short msg_count;
/*
* short msg_len[msg_count];
*/
/*
* data for messages
*/
};
/*
* Maximum packet size for totem pg messages
*/
#define TOTEMPG_PACKET_SIZE (totempg_totem_config->net_mtu - \
sizeof (struct totempg_mcast))
/*
* Local variables used for packing small messages
*/
static unsigned short mcast_packed_msg_lens[FRAME_SIZE_MAX];
static int mcast_packed_msg_count = 0;
static int totempg_reserved = 1;
static unsigned int totempg_size_limit;
static totem_queue_level_changed_fn totem_queue_level_changed = NULL;
/*
* Function and data used to log messages
*/
static int totempg_log_level_security;
static int totempg_log_level_error;
static int totempg_log_level_warning;
static int totempg_log_level_notice;
static int totempg_log_level_debug;
static int totempg_subsys_id;
static void (*totempg_log_printf) (
- unsigned int rec_ident,
+ int level,
+ int subsys,
const char *function,
const char *file,
int line,
- const char *format, ...) __attribute__((format(printf, 5, 6)));
+ const char *format, ...) __attribute__((format(printf, 6, 7)));
struct totem_config *totempg_totem_config;
static totempg_stats_t totempg_stats;
enum throw_away_mode {
THROW_AWAY_INACTIVE,
THROW_AWAY_ACTIVE
};
struct assembly {
unsigned int nodeid;
unsigned char data[MESSAGE_SIZE_MAX];
int index;
unsigned char last_frag_num;
enum throw_away_mode throw_away_mode;
struct list_head list;
};
static void assembly_deref (struct assembly *assembly);
static int callback_token_received_fn (enum totem_callback_token_type type,
const void *data);
DECLARE_LIST_INIT(assembly_list_inuse);
DECLARE_LIST_INIT(assembly_list_free);
/*
* Staging buffer for packed messages. Messages are staged in this buffer
* before sending. Multiple messages may fit which cuts down on the
* number of mcasts sent. If a message doesn't completely fit, then
* the mcast header has a fragment bit set that says that there are more
* data to follow. fragment_size is an index into the buffer. It indicates
* the size of message data and where to place new message data.
* fragment_contuation indicates whether the first packed message in
* the buffer is a continuation of a previously packed fragment.
*/
static unsigned char *fragmentation_data;
static int fragment_size = 0;
static int fragment_continuation = 0;
static struct iovec iov_delv;
static unsigned int totempg_max_handle = 0;
struct totempg_group_instance {
void (*deliver_fn) (
unsigned int nodeid,
const void *msg,
unsigned int msg_len,
int endian_conversion_required);
void (*confchg_fn) (
enum totem_configuration_type configuration_type,
const unsigned int *member_list, size_t member_list_entries,
const unsigned int *left_list, size_t left_list_entries,
const unsigned int *joined_list, size_t joined_list_entries,
const struct memb_ring_id *ring_id);
struct totempg_group *groups;
int groups_cnt;
int32_t q_level;
};
DECLARE_HDB_DATABASE (totempg_groups_instance_database,NULL);
static unsigned char next_fragment = 1;
static pthread_mutex_t totempg_mutex = PTHREAD_MUTEX_INITIALIZER;
static pthread_mutex_t callback_token_mutex = PTHREAD_MUTEX_INITIALIZER;
static pthread_mutex_t mcast_msg_mutex = PTHREAD_MUTEX_INITIALIZER;
-#define log_printf(level, format, args...) \
-do { \
- totempg_log_printf ( \
- LOGSYS_ENCODE_RECID(level, \
- totempg_subsys_id, \
- LOGSYS_RECID_LOG), \
- __FUNCTION__, __FILE__, __LINE__, \
- format, ##args); \
+#define log_printf(level, format, args...) \
+do { \
+ totempg_log_printf(level, \
+ totempg_subsys_id, \
+ __FUNCTION__, __FILE__, __LINE__, \
+ format, ##args); \
} while (0);
static int msg_count_send_ok (int msg_count);
static int byte_count_send_ok (int byte_count);
static struct assembly *assembly_ref (unsigned int nodeid)
{
struct assembly *assembly;
struct list_head *list;
/*
* Search inuse list for node id and return assembly buffer if found
*/
for (list = assembly_list_inuse.next;
list != &assembly_list_inuse;
list = list->next) {
assembly = list_entry (list, struct assembly, list);
if (nodeid == assembly->nodeid) {
return (assembly);
}
}
/*
* Nothing found in inuse list get one from free list if available
*/
if (list_empty (&assembly_list_free) == 0) {
assembly = list_entry (assembly_list_free.next, struct assembly, list);
list_del (&assembly->list);
list_add (&assembly->list, &assembly_list_inuse);
assembly->nodeid = nodeid;
assembly->index = 0;
assembly->last_frag_num = 0;
assembly->throw_away_mode = THROW_AWAY_INACTIVE;
return (assembly);
}
/*
* Nothing available in inuse or free list, so allocate a new one
*/
assembly = malloc (sizeof (struct assembly));
/*
* TODO handle memory allocation failure here
*/
assert (assembly);
assembly->nodeid = nodeid;
assembly->data[0] = 0;
assembly->index = 0;
assembly->last_frag_num = 0;
assembly->throw_away_mode = THROW_AWAY_INACTIVE;
list_init (&assembly->list);
list_add (&assembly->list, &assembly_list_inuse);
return (assembly);
}
static void assembly_deref (struct assembly *assembly)
{
list_del (&assembly->list);
list_add (&assembly->list, &assembly_list_free);
}
static inline void app_confchg_fn (
enum totem_configuration_type configuration_type,
const unsigned int *member_list, size_t member_list_entries,
const unsigned int *left_list, size_t left_list_entries,
const unsigned int *joined_list, size_t joined_list_entries,
const struct memb_ring_id *ring_id)
{
int i;
struct totempg_group_instance *instance;
struct assembly *assembly;
unsigned int res;
/*
* For every leaving processor, add to free list
* This also has the side effect of clearing out the dataset
* In the leaving processor's assembly buffer.
*/
for (i = 0; i < left_list_entries; i++) {
assembly = assembly_ref (left_list[i]);
list_del (&assembly->list);
list_add (&assembly->list, &assembly_list_free);
}
for (i = 0; i <= totempg_max_handle; i++) {
res = hdb_handle_get (&totempg_groups_instance_database,
hdb_nocheck_convert (i), (void *)&instance);
if (res == 0) {
if (instance->confchg_fn) {
instance->confchg_fn (
configuration_type,
member_list,
member_list_entries,
left_list,
left_list_entries,
joined_list,
joined_list_entries,
ring_id);
}
hdb_handle_put (&totempg_groups_instance_database,
hdb_nocheck_convert (i));
}
}
}
static inline void group_endian_convert (
void *msg,
int msg_len)
{
unsigned short *group_len;
int i;
char *aligned_msg;
#ifdef TOTEMPG_NEED_ALIGN
/*
* Align data structure for not i386 or x86_64
*/
if ((size_t)msg % 4 != 0) {
aligned_msg = alloca(msg_len);
memcpy(aligned_msg, msg, msg_len);
} else {
aligned_msg = msg;
}
#else
aligned_msg = msg;
#endif
group_len = (unsigned short *)aligned_msg;
group_len[0] = swab16(group_len[0]);
for (i = 1; i < group_len[0] + 1; i++) {
group_len[i] = swab16(group_len[i]);
}
if (aligned_msg != msg) {
memcpy(msg, aligned_msg, msg_len);
}
}
static inline int group_matches (
struct iovec *iovec,
unsigned int iov_len,
struct totempg_group *groups_b,
unsigned int group_b_cnt,
unsigned int *adjust_iovec)
{
unsigned short *group_len;
char *group_name;
int i;
int j;
#ifdef TOTEMPG_NEED_ALIGN
struct iovec iovec_aligned = { NULL, 0 };
#endif
assert (iov_len == 1);
#ifdef TOTEMPG_NEED_ALIGN
/*
* Align data structure for not i386 or x86_64
*/
if ((size_t)iovec->iov_base % 4 != 0) {
iovec_aligned.iov_base = alloca(iovec->iov_len);
memcpy(iovec_aligned.iov_base, iovec->iov_base, iovec->iov_len);
iovec_aligned.iov_len = iovec->iov_len;
iovec = &iovec_aligned;
}
#endif
group_len = (unsigned short *)iovec->iov_base;
group_name = ((char *)iovec->iov_base) +
sizeof (unsigned short) * (group_len[0] + 1);
/*
* Calculate amount to adjust the iovec by before delivering to app
*/
*adjust_iovec = sizeof (unsigned short) * (group_len[0] + 1);
for (i = 1; i < group_len[0] + 1; i++) {
*adjust_iovec += group_len[i];
}
/*
* Determine if this message should be delivered to this instance
*/
for (i = 1; i < group_len[0] + 1; i++) {
for (j = 0; j < group_b_cnt; j++) {
if ((group_len[i] == groups_b[j].group_len) &&
(memcmp (groups_b[j].group, group_name, group_len[i]) == 0)) {
return (1);
}
}
group_name += group_len[i];
}
return (0);
}
static inline void app_deliver_fn (
unsigned int nodeid,
void *msg,
unsigned int msg_len,
int endian_conversion_required)
{
int i;
struct totempg_group_instance *instance;
struct iovec stripped_iovec;
unsigned int adjust_iovec;
unsigned int res;
struct iovec *iovec;
struct iovec aligned_iovec = { NULL, 0 };
if (endian_conversion_required) {
group_endian_convert (msg, msg_len);
}
/*
* TODO: segmentation/assembly need to be redesigned to provide aligned access
* in all cases to avoid memory copies on non386 archs. Probably broke backwars
* compatibility
*/
#ifdef TOTEMPG_NEED_ALIGN
/*
* Align data structure for not i386 or x86_64
*/
aligned_iovec.iov_base = alloca(msg_len);
aligned_iovec.iov_len = msg_len;
memcpy(aligned_iovec.iov_base, msg, msg_len);
#else
aligned_iovec.iov_base = msg;
aligned_iovec.iov_len = msg_len;
#endif
iovec = &aligned_iovec;
for (i = 0; i <= totempg_max_handle; i++) {
res = hdb_handle_get (&totempg_groups_instance_database,
hdb_nocheck_convert (i), (void *)&instance);
if (res == 0) {
if (group_matches (iovec, 1, instance->groups, instance->groups_cnt, &adjust_iovec)) {
stripped_iovec.iov_len = iovec->iov_len - adjust_iovec;
stripped_iovec.iov_base = (char *)iovec->iov_base + adjust_iovec;
#ifdef TOTEMPG_NEED_ALIGN
/*
* Align data structure for not i386 or x86_64
*/
if ((char *)iovec->iov_base + adjust_iovec % 4 != 0) {
/*
* Deal with misalignment
*/
stripped_iovec.iov_base =
alloca (stripped_iovec.iov_len);
memcpy (stripped_iovec.iov_base,
(char *)iovec->iov_base + adjust_iovec,
stripped_iovec.iov_len);
}
#endif
instance->deliver_fn (
nodeid,
stripped_iovec.iov_base,
stripped_iovec.iov_len,
endian_conversion_required);
}
hdb_handle_put (&totempg_groups_instance_database, hdb_nocheck_convert(i));
}
}
}
static void totempg_confchg_fn (
enum totem_configuration_type configuration_type,
const unsigned int *member_list, size_t member_list_entries,
const unsigned int *left_list, size_t left_list_entries,
const unsigned int *joined_list, size_t joined_list_entries,
const struct memb_ring_id *ring_id)
{
// TODO optimize this
app_confchg_fn (configuration_type,
member_list, member_list_entries,
left_list, left_list_entries,
joined_list, joined_list_entries,
ring_id);
}
static void totempg_deliver_fn (
unsigned int nodeid,
const void *msg,
unsigned int msg_len,
int endian_conversion_required)
{
struct totempg_mcast *mcast;
unsigned short *msg_lens;
int i;
struct assembly *assembly;
char header[FRAME_SIZE_MAX];
int msg_count;
int continuation;
int start;
const char *data;
int datasize;
assembly = assembly_ref (nodeid);
assert (assembly);
/*
* Assemble the header into one block of data and
* assemble the packet contents into one block of data to simplify delivery
*/
mcast = (struct totempg_mcast *)msg;
if (endian_conversion_required) {
mcast->msg_count = swab16 (mcast->msg_count);
}
msg_count = mcast->msg_count;
datasize = sizeof (struct totempg_mcast) +
msg_count * sizeof (unsigned short);
memcpy (header, msg, datasize);
data = msg;
msg_lens = (unsigned short *) (header + sizeof (struct totempg_mcast));
if (endian_conversion_required) {
for (i = 0; i < mcast->msg_count; i++) {
msg_lens[i] = swab16 (msg_lens[i]);
}
}
memcpy (&assembly->data[assembly->index], &data[datasize],
msg_len - datasize);
/*
* If the last message in the buffer is a fragment, then we
* can't deliver it. We'll first deliver the full messages
* then adjust the assembly buffer so we can add the rest of the
* fragment when it arrives.
*/
msg_count = mcast->fragmented ? mcast->msg_count - 1 : mcast->msg_count;
continuation = mcast->continuation;
iov_delv.iov_base = (void *)&assembly->data[0];
iov_delv.iov_len = assembly->index + msg_lens[0];
/*
* Make sure that if this message is a continuation, that it
* matches the sequence number of the previous fragment.
* Also, if the first packed message is a continuation
* of a previous message, but the assembly buffer
* is empty, then we need to discard it since we can't
* assemble a complete message. Likewise, if this message isn't a
* continuation and the assembly buffer is empty, we have to discard
* the continued message.
*/
start = 0;
if (assembly->throw_away_mode == THROW_AWAY_ACTIVE) {
/* Throw away the first msg block */
if (mcast->fragmented == 0 || mcast->fragmented == 1) {
assembly->throw_away_mode = THROW_AWAY_INACTIVE;
assembly->index += msg_lens[0];
iov_delv.iov_base = (void *)&assembly->data[assembly->index];
iov_delv.iov_len = msg_lens[1];
start = 1;
}
} else
if (assembly->throw_away_mode == THROW_AWAY_INACTIVE) {
if (continuation == assembly->last_frag_num) {
assembly->last_frag_num = mcast->fragmented;
for (i = start; i < msg_count; i++) {
app_deliver_fn(nodeid, iov_delv.iov_base, iov_delv.iov_len,
endian_conversion_required);
assembly->index += msg_lens[i];
iov_delv.iov_base = (void *)&assembly->data[assembly->index];
if (i < (msg_count - 1)) {
iov_delv.iov_len = msg_lens[i + 1];
}
}
} else {
assembly->throw_away_mode = THROW_AWAY_ACTIVE;
}
}
if (mcast->fragmented == 0) {
/*
* End of messages, dereference assembly struct
*/
assembly->last_frag_num = 0;
assembly->index = 0;
assembly_deref (assembly);
} else {
/*
* Message is fragmented, keep around assembly list
*/
if (mcast->msg_count > 1) {
memmove (&assembly->data[0],
&assembly->data[assembly->index],
msg_lens[msg_count]);
assembly->index = 0;
}
assembly->index += msg_lens[msg_count];
}
}
/*
* Totem Process Group Abstraction
* depends on poll abstraction, POSIX, IPV4
*/
void *callback_token_received_handle;
int callback_token_received_fn (enum totem_callback_token_type type,
const void *data)
{
struct totempg_mcast mcast;
struct iovec iovecs[3];
pthread_mutex_lock (&mcast_msg_mutex);
if (mcast_packed_msg_count == 0) {
pthread_mutex_unlock (&mcast_msg_mutex);
return (0);
}
if (totemmrp_avail() == 0) {
pthread_mutex_unlock (&mcast_msg_mutex);
return (0);
}
mcast.header.version = 0;
mcast.header.type = 0;
mcast.fragmented = 0;
/*
* Was the first message in this buffer a continuation of a
* fragmented message?
*/
mcast.continuation = fragment_continuation;
fragment_continuation = 0;
mcast.msg_count = mcast_packed_msg_count;
iovecs[0].iov_base = (void *)&mcast;
iovecs[0].iov_len = sizeof (struct totempg_mcast);
iovecs[1].iov_base = (void *)mcast_packed_msg_lens;
iovecs[1].iov_len = mcast_packed_msg_count * sizeof (unsigned short);
iovecs[2].iov_base = (void *)&fragmentation_data[0];
iovecs[2].iov_len = fragment_size;
(void)totemmrp_mcast (iovecs, 3, 0);
mcast_packed_msg_count = 0;
fragment_size = 0;
pthread_mutex_unlock (&mcast_msg_mutex);
return (0);
}
/*
* Initialize the totem process group abstraction
*/
int totempg_initialize (
qb_loop_t *poll_handle,
struct totem_config *totem_config)
{
int res;
totempg_totem_config = totem_config;
totempg_log_level_security = totem_config->totem_logging_configuration.log_level_security;
totempg_log_level_error = totem_config->totem_logging_configuration.log_level_error;
totempg_log_level_warning = totem_config->totem_logging_configuration.log_level_warning;
totempg_log_level_notice = totem_config->totem_logging_configuration.log_level_notice;
totempg_log_level_debug = totem_config->totem_logging_configuration.log_level_debug;
totempg_log_printf = totem_config->totem_logging_configuration.log_printf;
totempg_subsys_id = totem_config->totem_logging_configuration.log_subsys_id;
fragmentation_data = malloc (TOTEMPG_PACKET_SIZE);
if (fragmentation_data == 0) {
return (-1);
}
totemsrp_net_mtu_adjust (totem_config);
res = totemmrp_initialize (
poll_handle,
totem_config,
&totempg_stats,
totempg_deliver_fn,
totempg_confchg_fn);
totemmrp_callback_token_create (
&callback_token_received_handle,
TOTEM_CALLBACK_TOKEN_RECEIVED,
0,
callback_token_received_fn,
0);
totempg_size_limit = (totemmrp_avail() - 1) *
(totempg_totem_config->net_mtu -
sizeof (struct totempg_mcast) - 16);
return (res);
}
void totempg_finalize (void)
{
pthread_mutex_lock (&totempg_mutex);
totemmrp_finalize ();
pthread_mutex_unlock (&totempg_mutex);
}
/*
* Multicast a message
*/
static int mcast_msg (
struct iovec *iovec_in,
unsigned int iov_len,
int guarantee)
{
int res = 0;
struct totempg_mcast mcast;
struct iovec iovecs[3];
struct iovec iovec[64];
int i;
int dest, src;
int max_packet_size = 0;
int copy_len = 0;
int copy_base = 0;
int total_size = 0;
pthread_mutex_lock (&mcast_msg_mutex);
totemmrp_event_signal (TOTEM_EVENT_NEW_MSG, 1);
/*
* Remove zero length iovectors from the list
*/
assert (iov_len < 64);
for (dest = 0, src = 0; src < iov_len; src++) {
if (iovec_in[src].iov_len) {
memcpy (&iovec[dest++], &iovec_in[src],
sizeof (struct iovec));
}
}
iov_len = dest;
max_packet_size = TOTEMPG_PACKET_SIZE -
(sizeof (unsigned short) * (mcast_packed_msg_count + 1));
mcast_packed_msg_lens[mcast_packed_msg_count] = 0;
/*
* Check if we would overwrite new message queue
*/
for (i = 0; i < iov_len; i++) {
total_size += iovec[i].iov_len;
}
if (byte_count_send_ok (total_size + sizeof(unsigned short) *
(mcast_packed_msg_count)) == 0) {
pthread_mutex_unlock (&mcast_msg_mutex);
return(-1);
}
mcast.header.version = 0;
for (i = 0; i < iov_len; ) {
mcast.fragmented = 0;
mcast.continuation = fragment_continuation;
copy_len = iovec[i].iov_len - copy_base;
/*
* If it all fits with room left over, copy it in.
* We need to leave at least sizeof(short) + 1 bytes in the
* fragment_buffer on exit so that max_packet_size + fragment_size
* doesn't exceed the size of the fragment_buffer on the next call.
*/
if ((copy_len + fragment_size) <
(max_packet_size - sizeof (unsigned short))) {
memcpy (&fragmentation_data[fragment_size],
(char *)iovec[i].iov_base + copy_base, copy_len);
fragment_size += copy_len;
mcast_packed_msg_lens[mcast_packed_msg_count] += copy_len;
next_fragment = 1;
copy_len = 0;
copy_base = 0;
i++;
continue;
/*
* If it just fits or is too big, then send out what fits.
*/
} else {
unsigned char *data_ptr;
copy_len = min(copy_len, max_packet_size - fragment_size);
if( copy_len == max_packet_size )
data_ptr = (unsigned char *)iovec[i].iov_base + copy_base;
else {
data_ptr = fragmentation_data;
memcpy (&fragmentation_data[fragment_size],
(unsigned char *)iovec[i].iov_base + copy_base, copy_len);
}
memcpy (&fragmentation_data[fragment_size],
(unsigned char *)iovec[i].iov_base + copy_base, copy_len);
mcast_packed_msg_lens[mcast_packed_msg_count] += copy_len;
/*
* if we're not on the last iovec or the iovec is too large to
* fit, then indicate a fragment. This also means that the next
* message will have the continuation of this one.
*/
if ((i < (iov_len - 1)) ||
((copy_base + copy_len) < iovec[i].iov_len)) {
if (!next_fragment) {
next_fragment++;
}
fragment_continuation = next_fragment;
mcast.fragmented = next_fragment++;
assert(fragment_continuation != 0);
assert(mcast.fragmented != 0);
} else {
fragment_continuation = 0;
}
/*
* assemble the message and send it
*/
mcast.msg_count = ++mcast_packed_msg_count;
iovecs[0].iov_base = (void *)&mcast;
iovecs[0].iov_len = sizeof(struct totempg_mcast);
iovecs[1].iov_base = (void *)mcast_packed_msg_lens;
iovecs[1].iov_len = mcast_packed_msg_count *
sizeof(unsigned short);
iovecs[2].iov_base = (void *)data_ptr;
iovecs[2].iov_len = max_packet_size;
assert (totemmrp_avail() > 0);
res = totemmrp_mcast (iovecs, 3, guarantee);
if (res == -1) {
goto error_exit;
}
/*
* Recalculate counts and indexes for the next.
*/
mcast_packed_msg_lens[0] = 0;
mcast_packed_msg_count = 0;
fragment_size = 0;
max_packet_size = TOTEMPG_PACKET_SIZE - (sizeof(unsigned short));
/*
* If the iovec all fit, go to the next iovec
*/
if ((copy_base + copy_len) == iovec[i].iov_len) {
copy_len = 0;
copy_base = 0;
i++;
/*
* Continue with the rest of the current iovec.
*/
} else {
copy_base += copy_len;
}
}
}
/*
* Bump only if we added message data. This may be zero if
* the last buffer just fit into the fragmentation_data buffer
* and we were at the last iovec.
*/
if (mcast_packed_msg_lens[mcast_packed_msg_count]) {
mcast_packed_msg_count++;
}
error_exit:
pthread_mutex_unlock (&mcast_msg_mutex);
return (res);
}
/*
* Determine if a message of msg_size could be queued
*/
static int msg_count_send_ok (
int msg_count)
{
int avail = 0;
avail = totemmrp_avail ();
totempg_stats.msg_queue_avail = avail;
return ((avail - totempg_reserved) > msg_count);
}
static int byte_count_send_ok (
int byte_count)
{
unsigned int msg_count = 0;
int avail = 0;
avail = totemmrp_avail ();
msg_count = (byte_count / (totempg_totem_config->net_mtu - sizeof (struct totempg_mcast) - 16)) + 1;
return (avail >= msg_count);
}
static int send_reserve (
int msg_size)
{
unsigned int msg_count = 0;
msg_count = (msg_size / (totempg_totem_config->net_mtu - sizeof (struct totempg_mcast) - 16)) + 1;
totempg_reserved += msg_count;
totempg_stats.msg_reserved = totempg_reserved;
return (msg_count);
}
static void send_release (
int msg_count)
{
totempg_reserved -= msg_count;
totempg_stats.msg_reserved = totempg_reserved;
}
int totempg_callback_token_create (
void **handle_out,
enum totem_callback_token_type type,
int delete,
int (*callback_fn) (enum totem_callback_token_type type, const void *),
const void *data)
{
unsigned int res;
pthread_mutex_lock (&callback_token_mutex);
res = totemmrp_callback_token_create (handle_out, type, delete,
callback_fn, data);
pthread_mutex_unlock (&callback_token_mutex);
return (res);
}
void totempg_callback_token_destroy (
void *handle_out)
{
pthread_mutex_lock (&callback_token_mutex);
totemmrp_callback_token_destroy (handle_out);
pthread_mutex_unlock (&callback_token_mutex);
}
/*
* vi: set autoindent tabstop=4 shiftwidth=4 :
*/
int totempg_groups_initialize (
hdb_handle_t *handle,
void (*deliver_fn) (
unsigned int nodeid,
const void *msg,
unsigned int msg_len,
int endian_conversion_required),
void (*confchg_fn) (
enum totem_configuration_type configuration_type,
const unsigned int *member_list, size_t member_list_entries,
const unsigned int *left_list, size_t left_list_entries,
const unsigned int *joined_list, size_t joined_list_entries,
const struct memb_ring_id *ring_id))
{
struct totempg_group_instance *instance;
unsigned int res;
pthread_mutex_lock (&totempg_mutex);
res = hdb_handle_create (&totempg_groups_instance_database,
sizeof (struct totempg_group_instance), handle);
if (res != 0) {
goto error_exit;
}
if (*handle > totempg_max_handle) {
totempg_max_handle = *handle;
}
res = hdb_handle_get (&totempg_groups_instance_database, *handle,
(void *)&instance);
if (res != 0) {
goto error_destroy;
}
instance->deliver_fn = deliver_fn;
instance->confchg_fn = confchg_fn;
instance->groups = 0;
instance->groups_cnt = 0;
instance->q_level = QB_LOOP_MED;
hdb_handle_put (&totempg_groups_instance_database, *handle);
pthread_mutex_unlock (&totempg_mutex);
return (0);
error_destroy:
hdb_handle_destroy (&totempg_groups_instance_database, *handle);
error_exit:
pthread_mutex_unlock (&totempg_mutex);
return (-1);
}
int totempg_groups_join (
hdb_handle_t handle,
const struct totempg_group *groups,
size_t group_cnt)
{
struct totempg_group_instance *instance;
struct totempg_group *new_groups;
unsigned int res;
pthread_mutex_lock (&totempg_mutex);
res = hdb_handle_get (&totempg_groups_instance_database, handle,
(void *)&instance);
if (res != 0) {
goto error_exit;
}
new_groups = realloc (instance->groups,
sizeof (struct totempg_group) *
(instance->groups_cnt + group_cnt));
if (new_groups == 0) {
res = ENOMEM;
goto error_exit;
}
memcpy (&new_groups[instance->groups_cnt],
groups, group_cnt * sizeof (struct totempg_group));
instance->groups = new_groups;
instance->groups_cnt += group_cnt;
hdb_handle_put (&totempg_groups_instance_database, handle);
error_exit:
pthread_mutex_unlock (&totempg_mutex);
return (res);
}
int totempg_groups_leave (
hdb_handle_t handle,
const struct totempg_group *groups,
size_t group_cnt)
{
struct totempg_group_instance *instance;
unsigned int res;
pthread_mutex_lock (&totempg_mutex);
res = hdb_handle_get (&totempg_groups_instance_database, handle,
(void *)&instance);
if (res != 0) {
goto error_exit;
}
hdb_handle_put (&totempg_groups_instance_database, handle);
error_exit:
pthread_mutex_unlock (&totempg_mutex);
return (res);
}
#define MAX_IOVECS_FROM_APP 32
#define MAX_GROUPS_PER_MSG 32
int totempg_groups_mcast_joined (
hdb_handle_t handle,
const struct iovec *iovec,
unsigned int iov_len,
int guarantee)
{
struct totempg_group_instance *instance;
unsigned short group_len[MAX_GROUPS_PER_MSG + 1];
struct iovec iovec_mcast[MAX_GROUPS_PER_MSG + 1 + MAX_IOVECS_FROM_APP];
int i;
unsigned int res;
pthread_mutex_lock (&totempg_mutex);
res = hdb_handle_get (&totempg_groups_instance_database, handle,
(void *)&instance);
if (res != 0) {
goto error_exit;
}
/*
* Build group_len structure and the iovec_mcast structure
*/
group_len[0] = instance->groups_cnt;
for (i = 0; i < instance->groups_cnt; i++) {
group_len[i + 1] = instance->groups[i].group_len;
iovec_mcast[i + 1].iov_len = instance->groups[i].group_len;
iovec_mcast[i + 1].iov_base = (void *) instance->groups[i].group;
}
iovec_mcast[0].iov_len = (instance->groups_cnt + 1) * sizeof (unsigned short);
iovec_mcast[0].iov_base = group_len;
for (i = 0; i < iov_len; i++) {
iovec_mcast[i + instance->groups_cnt + 1].iov_len = iovec[i].iov_len;
iovec_mcast[i + instance->groups_cnt + 1].iov_base = iovec[i].iov_base;
}
res = mcast_msg (iovec_mcast, iov_len + instance->groups_cnt + 1, guarantee);
hdb_handle_put (&totempg_groups_instance_database, handle);
error_exit:
pthread_mutex_unlock (&totempg_mutex);
return (res);
}
static void check_q_level(struct totempg_group_instance *instance)
{
int32_t old_level;
int32_t percent_used = 0;
old_level = instance->q_level;
percent_used = 100 - (totemmrp_avail () * 100 / 800); /*(1024*1024/1500)*/
if (percent_used > 90 && instance->q_level != TOTEM_Q_LEVEL_CRITICAL) {
instance->q_level = TOTEM_Q_LEVEL_CRITICAL;
} else if (percent_used < 30 && instance->q_level != TOTEM_Q_LEVEL_LOW) {
instance->q_level = TOTEM_Q_LEVEL_LOW;
} else if (percent_used > 40 && percent_used < 60 && instance->q_level != TOTEM_Q_LEVEL_GOOD) {
instance->q_level = TOTEM_Q_LEVEL_GOOD;
} else if (percent_used > 70 && percent_used < 80 && instance->q_level != TOTEM_Q_LEVEL_HIGH) {
instance->q_level = TOTEM_Q_LEVEL_HIGH;
}
if (totem_queue_level_changed && old_level != instance->q_level) {
totem_queue_level_changed(instance->q_level);
}
}
void totempg_check_q_level(qb_handle_t handle)
{
struct totempg_group_instance *instance;
if (hdb_handle_get (&totempg_groups_instance_database, handle,
(void *)&instance) != 0) {
return;
}
check_q_level(instance);
hdb_handle_put (&totempg_groups_instance_database, handle);
}
int totempg_groups_joined_reserve (
hdb_handle_t handle,
const struct iovec *iovec,
unsigned int iov_len)
{
struct totempg_group_instance *instance;
unsigned int size = 0;
unsigned int i;
unsigned int res;
unsigned int reserved = 0;
pthread_mutex_lock (&totempg_mutex);
pthread_mutex_lock (&mcast_msg_mutex);
res = hdb_handle_get (&totempg_groups_instance_database, handle,
(void *)&instance);
if (res != 0) {
goto error_exit;
}
for (i = 0; i < instance->groups_cnt; i++) {
size += instance->groups[i].group_len;
}
for (i = 0; i < iov_len; i++) {
size += iovec[i].iov_len;
}
check_q_level(instance);
if (size >= totempg_size_limit) {
reserved = -1;
goto error_put;
}
reserved = send_reserve (size);
if (msg_count_send_ok (reserved) == 0) {
send_release (reserved);
reserved = 0;
}
error_put:
hdb_handle_put (&totempg_groups_instance_database, handle);
error_exit:
pthread_mutex_unlock (&mcast_msg_mutex);
pthread_mutex_unlock (&totempg_mutex);
return (reserved);
}
int totempg_groups_joined_release (int msg_count)
{
pthread_mutex_lock (&totempg_mutex);
pthread_mutex_lock (&mcast_msg_mutex);
send_release (msg_count);
pthread_mutex_unlock (&mcast_msg_mutex);
pthread_mutex_unlock (&totempg_mutex);
return 0;
}
int totempg_groups_mcast_groups (
hdb_handle_t handle,
int guarantee,
const struct totempg_group *groups,
size_t groups_cnt,
const struct iovec *iovec,
unsigned int iov_len)
{
struct totempg_group_instance *instance;
unsigned short group_len[MAX_GROUPS_PER_MSG + 1];
struct iovec iovec_mcast[MAX_GROUPS_PER_MSG + 1 + MAX_IOVECS_FROM_APP];
int i;
unsigned int res;
pthread_mutex_lock (&totempg_mutex);
res = hdb_handle_get (&totempg_groups_instance_database, handle,
(void *)&instance);
if (res != 0) {
goto error_exit;
}
/*
* Build group_len structure and the iovec_mcast structure
*/
group_len[0] = groups_cnt;
for (i = 0; i < groups_cnt; i++) {
group_len[i + 1] = groups[i].group_len;
iovec_mcast[i + 1].iov_len = groups[i].group_len;
iovec_mcast[i + 1].iov_base = (void *) groups[i].group;
}
iovec_mcast[0].iov_len = (groups_cnt + 1) * sizeof (unsigned short);
iovec_mcast[0].iov_base = group_len;
for (i = 0; i < iov_len; i++) {
iovec_mcast[i + groups_cnt + 1].iov_len = iovec[i].iov_len;
iovec_mcast[i + groups_cnt + 1].iov_base = iovec[i].iov_base;
}
res = mcast_msg (iovec_mcast, iov_len + groups_cnt + 1, guarantee);
hdb_handle_put (&totempg_groups_instance_database, handle);
error_exit:
pthread_mutex_unlock (&totempg_mutex);
return (res);
}
/*
* Returns -1 if error, 0 if can't send, 1 if can send the message
*/
int totempg_groups_send_ok_groups (
hdb_handle_t handle,
const struct totempg_group *groups,
size_t groups_cnt,
const struct iovec *iovec,
unsigned int iov_len)
{
struct totempg_group_instance *instance;
unsigned int size = 0;
unsigned int i;
unsigned int res;
pthread_mutex_lock (&totempg_mutex);
res = hdb_handle_get (&totempg_groups_instance_database, handle,
(void *)&instance);
if (res != 0) {
goto error_exit;
}
for (i = 0; i < groups_cnt; i++) {
size += groups[i].group_len;
}
for (i = 0; i < iov_len; i++) {
size += iovec[i].iov_len;
}
res = msg_count_send_ok (size);
hdb_handle_put (&totempg_groups_instance_database, handle);
error_exit:
pthread_mutex_unlock (&totempg_mutex);
return (res);
}
int totempg_ifaces_get (
unsigned int nodeid,
struct totem_ip_address *interfaces,
char ***status,
unsigned int *iface_count)
{
int res;
res = totemmrp_ifaces_get (
nodeid,
interfaces,
status,
iface_count);
return (res);
}
void totempg_event_signal (enum totem_event_type type, int value)
{
totemmrp_event_signal (type, value);
}
void* totempg_get_stats (void)
{
return &totempg_stats;
}
int totempg_crypto_set (
unsigned int type)
{
int res;
res = totemmrp_crypto_set (
type);
return (res);
}
int totempg_ring_reenable (void)
{
int res;
res = totemmrp_ring_reenable ();
return (res);
}
const char *totempg_ifaces_print (unsigned int nodeid)
{
static char iface_string[256 * INTERFACE_MAX];
char one_iface[64];
struct totem_ip_address interfaces[INTERFACE_MAX];
char **status;
unsigned int iface_count;
unsigned int i;
int res;
iface_string[0] = '\0';
res = totempg_ifaces_get (nodeid, interfaces, &status, &iface_count);
if (res == -1) {
return ("no interface found for nodeid");
}
for (i = 0; i < iface_count; i++) {
sprintf (one_iface, "r(%d) ip(%s) ",
i, totemip_print (&interfaces[i]));
strcat (iface_string, one_iface);
}
return (iface_string);
}
unsigned int totempg_my_nodeid_get (void)
{
return (totemmrp_my_nodeid_get());
}
int totempg_my_family_get (void)
{
return (totemmrp_my_family_get());
}
extern void totempg_service_ready_register (
void (*totem_service_ready) (void))
{
totemmrp_service_ready_register (totem_service_ready);
}
void totempg_queue_level_register_callback (totem_queue_level_changed_fn fn)
{
totem_queue_level_changed = fn;
}
extern int totempg_member_add (
const struct totem_ip_address *member,
int ring_no);
extern int totempg_member_remove (
const struct totem_ip_address *member,
int ring_no);
diff --git a/exec/totemrrp.c b/exec/totemrrp.c
index c67fdaba..8fe3ef7b 100644
--- a/exec/totemrrp.c
+++ b/exec/totemrrp.c
@@ -1,2097 +1,2096 @@
/*
* Copyright (c) 2005 MontaVista Software, Inc.
* Copyright (c) 2006-2009 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Steven Dake (sdake@redhat.com)
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <config.h>
#include <assert.h>
#include <pthread.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/socket.h>
#include <netdb.h>
#include <sys/un.h>
#include <sys/ioctl.h>
#include <sys/param.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <unistd.h>
#include <fcntl.h>
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
#include <sched.h>
#include <time.h>
#include <sys/time.h>
#include <sys/poll.h>
#include <limits.h>
#include <corosync/sq.h>
#include <corosync/list.h>
#include <corosync/hdb.h>
#include <corosync/swab.h>
#include <qb/qbdefs.h>
#include <qb/qbloop.h>
#define LOGSYS_UTILS_ONLY 1
#include <corosync/engine/logsys.h>
#include "totemnet.h"
#include "totemrrp.h"
void rrp_deliver_fn (
void *context,
const void *msg,
unsigned int msg_len);
void rrp_iface_change_fn (
void *context,
const struct totem_ip_address *iface_addr);
struct totemrrp_instance;
struct passive_instance {
struct totemrrp_instance *rrp_instance;
unsigned int *faulty;
unsigned int *token_recv_count;
unsigned int *mcast_recv_count;
unsigned char token[15000];
unsigned int token_len;
qb_loop_timer_handle timer_expired_token;
qb_loop_timer_handle timer_problem_decrementer;
void *totemrrp_context;
unsigned int token_xmit_iface;
unsigned int msg_xmit_iface;
};
struct active_instance {
struct totemrrp_instance *rrp_instance;
unsigned int *faulty;
unsigned int *last_token_recv;
unsigned int *counter_problems;
unsigned char token[15000];
unsigned int token_len;
unsigned int last_token_seq;
qb_loop_timer_handle timer_expired_token;
qb_loop_timer_handle timer_problem_decrementer;
void *totemrrp_context;
};
struct rrp_algo {
const char *name;
void * (*initialize) (
struct totemrrp_instance *rrp_instance,
int interface_count);
void (*mcast_recv) (
struct totemrrp_instance *instance,
unsigned int iface_no,
void *context,
const void *msg,
unsigned int msg_len);
void (*mcast_noflush_send) (
struct totemrrp_instance *instance,
const void *msg,
unsigned int msg_len);
void (*mcast_flush_send) (
struct totemrrp_instance *instance,
const void *msg,
unsigned int msg_len);
void (*token_recv) (
struct totemrrp_instance *instance,
unsigned int iface_no,
void *context,
const void *msg,
unsigned int msg_len,
unsigned int token_seqid);
void (*token_send) (
struct totemrrp_instance *instance,
const void *msg,
unsigned int msg_len);
void (*recv_flush) (
struct totemrrp_instance *instance);
void (*send_flush) (
struct totemrrp_instance *instance);
void (*iface_check) (
struct totemrrp_instance *instance);
void (*processor_count_set) (
struct totemrrp_instance *instance,
unsigned int processor_count);
void (*token_target_set) (
struct totemrrp_instance *instance,
struct totem_ip_address *token_target,
unsigned int iface_no);
void (*ring_reenable) (
struct totemrrp_instance *instance,
unsigned int iface_no);
int (*mcast_recv_empty) (
struct totemrrp_instance *instance);
int (*member_add) (
struct totemrrp_instance *instance,
const struct totem_ip_address *member,
unsigned int iface_no);
int (*member_remove) (
struct totemrrp_instance *instance,
const struct totem_ip_address *member,
unsigned int iface_no);
};
struct totemrrp_instance {
qb_loop_t *poll_handle;
struct totem_interface *interfaces;
struct rrp_algo *rrp_algo;
void *context;
char *status[INTERFACE_MAX];
void (*totemrrp_deliver_fn) (
void *context,
const void *msg,
unsigned int msg_len);
void (*totemrrp_iface_change_fn) (
void *context,
const struct totem_ip_address *iface_addr,
unsigned int iface_no);
void (*totemrrp_token_seqid_get) (
const void *msg,
unsigned int *seqid,
unsigned int *token_is);
void (*totemrrp_target_set_completed) (
void *context);
unsigned int (*totemrrp_msgs_missing) (void);
/*
* Function and data used to log messages
*/
int totemrrp_log_level_security;
int totemrrp_log_level_error;
int totemrrp_log_level_warning;
int totemrrp_log_level_notice;
int totemrrp_log_level_debug;
int totemrrp_subsys_id;
void (*totemrrp_log_printf) (
- unsigned int rec_ident,
+ int level,
+ int subsys,
const char *function,
const char *file,
int line,
- const char *format, ...)__attribute__((format(printf, 5, 6)));
+ const char *format, ...)__attribute__((format(printf, 6, 7)));
void **net_handles;
void *rrp_algo_instance;
int interface_count;
int processor_count;
int my_nodeid;
struct totem_config *totem_config;
void *deliver_fn_context[INTERFACE_MAX];
qb_loop_timer_handle timer_active_test_ring_timeout[INTERFACE_MAX];
};
/*
* None Replication Forward Declerations
*/
static void none_mcast_recv (
struct totemrrp_instance *instance,
unsigned int iface_no,
void *context,
const void *msg,
unsigned int msg_len);
static void none_mcast_noflush_send (
struct totemrrp_instance *instance,
const void *msg,
unsigned int msg_len);
static void none_mcast_flush_send (
struct totemrrp_instance *instance,
const void *msg,
unsigned int msg_len);
static void none_token_recv (
struct totemrrp_instance *instance,
unsigned int iface_no,
void *context,
const void *msg,
unsigned int msg_len,
unsigned int token_seqid);
static void none_token_send (
struct totemrrp_instance *instance,
const void *msg,
unsigned int msg_len);
static void none_recv_flush (
struct totemrrp_instance *instance);
static void none_send_flush (
struct totemrrp_instance *instance);
static void none_iface_check (
struct totemrrp_instance *instance);
static void none_processor_count_set (
struct totemrrp_instance *instance,
unsigned int processor_count_set);
static void none_token_target_set (
struct totemrrp_instance *instance,
struct totem_ip_address *token_target,
unsigned int iface_no);
static void none_ring_reenable (
struct totemrrp_instance *instance,
unsigned int iface_no);
static int none_mcast_recv_empty (
struct totemrrp_instance *instance);
static int none_member_add (
struct totemrrp_instance *instance,
const struct totem_ip_address *member,
unsigned int iface_no);
static int none_member_remove (
struct totemrrp_instance *instance,
const struct totem_ip_address *member,
unsigned int iface_no);
/*
* Passive Replication Forward Declerations
*/
static void *passive_instance_initialize (
struct totemrrp_instance *rrp_instance,
int interface_count);
static void passive_mcast_recv (
struct totemrrp_instance *instance,
unsigned int iface_no,
void *context,
const void *msg,
unsigned int msg_len);
static void passive_mcast_noflush_send (
struct totemrrp_instance *instance,
const void *msg,
unsigned int msg_len);
static void passive_mcast_flush_send (
struct totemrrp_instance *instance,
const void *msg,
unsigned int msg_len);
static void passive_monitor (
struct totemrrp_instance *rrp_instance,
unsigned int iface_no,
int is_token_recv_count);
static void passive_token_recv (
struct totemrrp_instance *instance,
unsigned int iface_no,
void *context,
const void *msg,
unsigned int msg_len,
unsigned int token_seqid);
static void passive_token_send (
struct totemrrp_instance *instance,
const void *msg,
unsigned int msg_len);
static void passive_recv_flush (
struct totemrrp_instance *instance);
static void passive_send_flush (
struct totemrrp_instance *instance);
static void passive_iface_check (
struct totemrrp_instance *instance);
static void passive_processor_count_set (
struct totemrrp_instance *instance,
unsigned int processor_count_set);
static void passive_token_target_set (
struct totemrrp_instance *instance,
struct totem_ip_address *token_target,
unsigned int iface_no);
static void passive_ring_reenable (
struct totemrrp_instance *instance,
unsigned int iface_no);
static int passive_mcast_recv_empty (
struct totemrrp_instance *instance);
static int passive_member_add (
struct totemrrp_instance *instance,
const struct totem_ip_address *member,
unsigned int iface_no);
static int passive_member_remove (
struct totemrrp_instance *instance,
const struct totem_ip_address *member,
unsigned int iface_no);
/*
* Active Replication Forward Definitions
*/
static void *active_instance_initialize (
struct totemrrp_instance *rrp_instance,
int interface_count);
static void active_mcast_recv (
struct totemrrp_instance *instance,
unsigned int iface_no,
void *context,
const void *msg,
unsigned int msg_len);
static void active_mcast_noflush_send (
struct totemrrp_instance *instance,
const void *msg,
unsigned int msg_len);
static void active_mcast_flush_send (
struct totemrrp_instance *instance,
const void *msg,
unsigned int msg_len);
static void active_token_recv (
struct totemrrp_instance *instance,
unsigned int iface_no,
void *context,
const void *msg,
unsigned int msg_len,
unsigned int token_seqid);
static void active_token_send (
struct totemrrp_instance *instance,
const void *msg,
unsigned int msg_len);
static void active_recv_flush (
struct totemrrp_instance *instance);
static void active_send_flush (
struct totemrrp_instance *instance);
static void active_iface_check (
struct totemrrp_instance *instance);
static void active_processor_count_set (
struct totemrrp_instance *instance,
unsigned int processor_count_set);
static void active_token_target_set (
struct totemrrp_instance *instance,
struct totem_ip_address *token_target,
unsigned int iface_no);
static void active_ring_reenable (
struct totemrrp_instance *instance,
unsigned int iface_no);
static int active_mcast_recv_empty (
struct totemrrp_instance *instance);
static int active_member_add (
struct totemrrp_instance *instance,
const struct totem_ip_address *member,
unsigned int iface_no);
static int active_member_remove (
struct totemrrp_instance *instance,
const struct totem_ip_address *member,
unsigned int iface_no);
static void active_timer_expired_token_start (
struct active_instance *active_instance);
static void active_timer_expired_token_cancel (
struct active_instance *active_instance);
static void active_timer_problem_decrementer_start (
struct active_instance *active_instance);
static void active_timer_problem_decrementer_cancel (
struct active_instance *active_instance);
/*
* 0-5 reserved for totemsrp.c
*/
#define MESSAGE_TYPE_RING_TEST_ACTIVE 6
#define MESSAGE_TYPE_RING_TEST_ACTIVATE 7
#define ENDIAN_LOCAL 0xff22
/*
* Rollover handling:
*
* ARR_SEQNO_START_TOKEN is the starting sequence number of last seen sequence
* for a token for active redundand ring. This should remain zero, unless testing
* overflow in which case 07fffff00 or 0xffffff00 are good starting values.
* It should be same as on defined in totemsrp.c
*/
#define ARR_SEQNO_START_TOKEN 0x0
/*
* These can be used ot test different rollover points
* #define ARR_SEQNO_START_MSG 0xfffffe00
*/
/*
* Threshold value when recv_count for passive rrp should be adjusted.
* Set this value to some smaller for testing of adjusting proper
* functionality. Also keep in mind that this value must be smaller
* then rrp_problem_count_threshold
*/
#define PASSIVE_RECV_COUNT_THRESHOLD (INT_MAX / 2)
struct message_header {
char type;
char encapsulated;
unsigned short endian_detector;
int ring_number;
int nodeid_activator;
} __attribute__((packed));
struct deliver_fn_context {
struct totemrrp_instance *instance;
void *context;
int iface_no;
};
struct rrp_algo none_algo = {
.name = "none",
.initialize = NULL,
.mcast_recv = none_mcast_recv,
.mcast_noflush_send = none_mcast_noflush_send,
.mcast_flush_send = none_mcast_flush_send,
.token_recv = none_token_recv,
.token_send = none_token_send,
.recv_flush = none_recv_flush,
.send_flush = none_send_flush,
.iface_check = none_iface_check,
.processor_count_set = none_processor_count_set,
.token_target_set = none_token_target_set,
.ring_reenable = none_ring_reenable,
.mcast_recv_empty = none_mcast_recv_empty,
.member_add = none_member_add,
.member_remove = none_member_remove
};
struct rrp_algo passive_algo = {
.name = "passive",
.initialize = passive_instance_initialize,
.mcast_recv = passive_mcast_recv,
.mcast_noflush_send = passive_mcast_noflush_send,
.mcast_flush_send = passive_mcast_flush_send,
.token_recv = passive_token_recv,
.token_send = passive_token_send,
.recv_flush = passive_recv_flush,
.send_flush = passive_send_flush,
.iface_check = passive_iface_check,
.processor_count_set = passive_processor_count_set,
.token_target_set = passive_token_target_set,
.ring_reenable = passive_ring_reenable,
.mcast_recv_empty = passive_mcast_recv_empty,
.member_add = passive_member_add,
.member_remove = passive_member_remove
};
struct rrp_algo active_algo = {
.name = "active",
.initialize = active_instance_initialize,
.mcast_recv = active_mcast_recv,
.mcast_noflush_send = active_mcast_noflush_send,
.mcast_flush_send = active_mcast_flush_send,
.token_recv = active_token_recv,
.token_send = active_token_send,
.recv_flush = active_recv_flush,
.send_flush = active_send_flush,
.iface_check = active_iface_check,
.processor_count_set = active_processor_count_set,
.token_target_set = active_token_target_set,
.ring_reenable = active_ring_reenable,
.mcast_recv_empty = active_mcast_recv_empty,
.member_add = active_member_add,
.member_remove = active_member_remove
};
struct rrp_algo *rrp_algos[] = {
&none_algo,
&passive_algo,
&active_algo
};
#define RRP_ALGOS_COUNT 3
-#define log_printf(level, format, args...) \
-do { \
- rrp_instance->totemrrp_log_printf ( \
- LOGSYS_ENCODE_RECID(level, \
- rrp_instance->totemrrp_subsys_id, \
- LOGSYS_RECID_LOG), \
- __FUNCTION__, __FILE__, __LINE__, \
- format, ##args); \
+#define log_printf(level, format, args...) \
+do { \
+ rrp_instance->totemrrp_log_printf ( \
+ level, rrp_instance->totemrrp_subsys_id, \
+ __FUNCTION__, __FILE__, __LINE__, \
+ format, ##args); \
} while (0);
static void test_active_msg_endian_convert(const struct message_header *in, struct message_header *out)
{
out->type = in->type;
out->encapsulated = in->encapsulated;
out->endian_detector = ENDIAN_LOCAL;
out->ring_number = swab32 (in->ring_number);
out->nodeid_activator = swab32(in->nodeid_activator);
}
static void timer_function_test_ring_timeout (void *context)
{
struct deliver_fn_context *deliver_fn_context = (struct deliver_fn_context *)context;
struct totemrrp_instance *rrp_instance = deliver_fn_context->instance;
unsigned int *faulty = NULL;
int iface_no = deliver_fn_context->iface_no;
struct message_header msg = {
.type = MESSAGE_TYPE_RING_TEST_ACTIVE,
.endian_detector = ENDIAN_LOCAL,
};
if (strcmp(rrp_instance->totem_config->rrp_mode, "active") == 0)
faulty = ((struct active_instance *)(rrp_instance->rrp_algo_instance))->faulty;
if (strcmp(rrp_instance->totem_config->rrp_mode, "passive") == 0)
faulty = ((struct passive_instance *)(rrp_instance->rrp_algo_instance))->faulty;
assert (faulty != NULL);
if (faulty[iface_no] == 1) {
msg.ring_number = iface_no;
msg.nodeid_activator = rrp_instance->my_nodeid;
totemnet_token_send (
rrp_instance->net_handles[iface_no],
&msg, sizeof (struct message_header));
qb_loop_timer_add (rrp_instance->poll_handle,
QB_LOOP_MED,
rrp_instance->totem_config->rrp_autorecovery_check_timeout*QB_TIME_NS_IN_MSEC,
(void *)deliver_fn_context,
timer_function_test_ring_timeout,
&rrp_instance->timer_active_test_ring_timeout[iface_no]);
}
}
/*
* None Replication Implementation
*/
static void none_mcast_recv (
struct totemrrp_instance *rrp_instance,
unsigned int iface_no,
void *context,
const void *msg,
unsigned int msg_len)
{
rrp_instance->totemrrp_deliver_fn (
context,
msg,
msg_len);
}
static void none_mcast_flush_send (
struct totemrrp_instance *instance,
const void *msg,
unsigned int msg_len)
{
totemnet_mcast_flush_send (instance->net_handles[0], msg, msg_len);
}
static void none_mcast_noflush_send (
struct totemrrp_instance *instance,
const void *msg,
unsigned int msg_len)
{
totemnet_mcast_noflush_send (instance->net_handles[0], msg, msg_len);
}
static void none_token_recv (
struct totemrrp_instance *rrp_instance,
unsigned int iface_no,
void *context,
const void *msg,
unsigned int msg_len,
unsigned int token_seq)
{
rrp_instance->totemrrp_deliver_fn (
context,
msg,
msg_len);
}
static void none_token_send (
struct totemrrp_instance *instance,
const void *msg,
unsigned int msg_len)
{
totemnet_token_send (
instance->net_handles[0],
msg, msg_len);
}
static void none_recv_flush (struct totemrrp_instance *instance)
{
totemnet_recv_flush (instance->net_handles[0]);
}
static void none_send_flush (struct totemrrp_instance *instance)
{
totemnet_send_flush (instance->net_handles[0]);
}
static void none_iface_check (struct totemrrp_instance *instance)
{
totemnet_iface_check (instance->net_handles[0]);
}
static void none_processor_count_set (
struct totemrrp_instance *instance,
unsigned int processor_count)
{
totemnet_processor_count_set (instance->net_handles[0],
processor_count);
}
static void none_token_target_set (
struct totemrrp_instance *instance,
struct totem_ip_address *token_target,
unsigned int iface_no)
{
totemnet_token_target_set (instance->net_handles[0], token_target);
}
static void none_ring_reenable (
struct totemrrp_instance *instance,
unsigned int iface_no)
{
/*
* No operation
*/
}
static int none_mcast_recv_empty (
struct totemrrp_instance *instance)
{
int res;
res = totemnet_recv_mcast_empty (instance->net_handles[0]);
return (res);
}
static int none_member_add (
struct totemrrp_instance *instance,
const struct totem_ip_address *member,
unsigned int iface_no)
{
int res;
res = totemnet_member_add (instance->net_handles[0], member);
return (res);
}
static int none_member_remove (
struct totemrrp_instance *instance,
const struct totem_ip_address *member,
unsigned int iface_no)
{
int res;
res = totemnet_member_remove (instance->net_handles[0], member);
return (res);
}
/*
* Passive Replication Implementation
*/
void *passive_instance_initialize (
struct totemrrp_instance *rrp_instance,
int interface_count)
{
struct passive_instance *instance;
instance = malloc (sizeof (struct passive_instance));
if (instance == 0) {
goto error_exit;
}
memset (instance, 0, sizeof (struct passive_instance));
instance->faulty = malloc (sizeof (int) * interface_count);
if (instance->faulty == 0) {
free (instance);
instance = 0;
goto error_exit;
}
memset (instance->faulty, 0, sizeof (int) * interface_count);
instance->token_recv_count = malloc (sizeof (int) * interface_count);
if (instance->token_recv_count == 0) {
free (instance->faulty);
free (instance);
instance = 0;
goto error_exit;
}
memset (instance->token_recv_count, 0, sizeof (int) * interface_count);
instance->mcast_recv_count = malloc (sizeof (int) * interface_count);
if (instance->mcast_recv_count == 0) {
free (instance->token_recv_count);
free (instance->faulty);
free (instance);
instance = 0;
goto error_exit;
}
memset (instance->mcast_recv_count, 0, sizeof (int) * interface_count);
error_exit:
return ((void *)instance);
}
static void timer_function_passive_token_expired (void *context)
{
struct passive_instance *passive_instance = (struct passive_instance *)context;
struct totemrrp_instance *rrp_instance = passive_instance->rrp_instance;
rrp_instance->totemrrp_deliver_fn (
passive_instance->totemrrp_context,
passive_instance->token,
passive_instance->token_len);
}
/* TODO
static void timer_function_passive_problem_decrementer (void *context)
{
// struct passive_instance *passive_instance = (struct passive_instance *)context;
// struct totemrrp_instance *rrp_instance = passive_instance->rrp_instance;
}
*/
static void passive_timer_expired_token_start (
struct passive_instance *passive_instance)
{
qb_loop_timer_add (
passive_instance->rrp_instance->poll_handle,
QB_LOOP_MED,
passive_instance->rrp_instance->totem_config->rrp_token_expired_timeout*QB_TIME_NS_IN_MSEC,
(void *)passive_instance,
timer_function_passive_token_expired,
&passive_instance->timer_expired_token);
}
static void passive_timer_expired_token_cancel (
struct passive_instance *passive_instance)
{
qb_loop_timer_del (
passive_instance->rrp_instance->poll_handle,
passive_instance->timer_expired_token);
}
/*
static void passive_timer_problem_decrementer_start (
struct passive_instance *passive_instance)
{
qb_loop_timer_add (
QB_LOOP_MED,
passive_instance->rrp_instance->poll_handle,
passive_instance->rrp_instance->totem_config->rrp_problem_count_timeout*QB_TIME_NS_IN_MSEC,
(void *)passive_instance,
timer_function_passive_problem_decrementer,
&passive_instance->timer_problem_decrementer);
}
static void passive_timer_problem_decrementer_cancel (
struct passive_instance *passive_instance)
{
qb_loop_timer_del (
passive_instance->rrp_instance->poll_handle,
passive_instance->timer_problem_decrementer);
}
*/
/*
* Monitor function implementation from rrp paper.
* rrp_instance is passive rrp instance, iface_no is interface with received messgae/token and
* is_token_recv_count is boolean variable which donates if message is token (>1) or regular
* message (= 0)
*/
static void passive_monitor (
struct totemrrp_instance *rrp_instance,
unsigned int iface_no,
int is_token_recv_count)
{
struct passive_instance *passive_instance = (struct passive_instance *)rrp_instance->rrp_algo_instance;
unsigned int *recv_count;
unsigned int max;
unsigned int i;
unsigned int min_all, min_active;
/*
* Monitor for failures
*/
if (is_token_recv_count) {
recv_count = passive_instance->token_recv_count;
} else {
recv_count = passive_instance->mcast_recv_count;
}
recv_count[iface_no] += 1;
max = 0;
for (i = 0; i < rrp_instance->interface_count; i++) {
if (max < recv_count[i]) {
max = recv_count[i];
}
}
/*
* Max is larger then threshold -> start adjusting process
*/
if (max > PASSIVE_RECV_COUNT_THRESHOLD) {
min_all = min_active = recv_count[iface_no];
for (i = 0; i < rrp_instance->interface_count; i++) {
if (recv_count[i] < min_all) {
min_all = recv_count[i];
}
if (passive_instance->faulty[i] == 0 &&
recv_count[i] < min_active) {
min_active = recv_count[i];
}
}
if (min_all > 0) {
/*
* There is one or more faulty device with recv_count > 0
*/
for (i = 0; i < rrp_instance->interface_count; i++) {
recv_count[i] -= min_all;
}
} else {
/*
* No faulty device with recv_count > 0, adjust only active
* devices
*/
for (i = 0; i < rrp_instance->interface_count; i++) {
if (passive_instance->faulty[i] == 0) {
recv_count[i] -= min_active;
}
}
}
/*
* Find again max
*/
max = 0;
for (i = 0; i < rrp_instance->interface_count; i++) {
if (max < recv_count[i]) {
max = recv_count[i];
}
}
}
for (i = 0; i < rrp_instance->interface_count; i++) {
if ((passive_instance->faulty[i] == 0) &&
(max - recv_count[i] >
rrp_instance->totem_config->rrp_problem_count_threshold)) {
passive_instance->faulty[i] = 1;
qb_loop_timer_add (rrp_instance->poll_handle,
QB_LOOP_MED,
rrp_instance->totem_config->rrp_autorecovery_check_timeout*QB_TIME_NS_IN_MSEC,
rrp_instance->deliver_fn_context[i],
timer_function_test_ring_timeout,
&rrp_instance->timer_active_test_ring_timeout[i]);
sprintf (rrp_instance->status[i],
"Marking ringid %u interface %s FAULTY",
i,
totemnet_iface_print (rrp_instance->net_handles[i]));
log_printf (
rrp_instance->totemrrp_log_level_error,
"%s",
rrp_instance->status[i]);
}
}
}
static void passive_mcast_recv (
struct totemrrp_instance *rrp_instance,
unsigned int iface_no,
void *context,
const void *msg,
unsigned int msg_len)
{
struct passive_instance *passive_instance = (struct passive_instance *)rrp_instance->rrp_algo_instance;
rrp_instance->totemrrp_deliver_fn (
context,
msg,
msg_len);
if (rrp_instance->totemrrp_msgs_missing() == 0 &&
passive_instance->timer_expired_token) {
/*
* Delivers the last token
*/
rrp_instance->totemrrp_deliver_fn (
passive_instance->totemrrp_context,
passive_instance->token,
passive_instance->token_len);
passive_timer_expired_token_cancel (passive_instance);
}
passive_monitor (rrp_instance, iface_no, 0);
}
static void passive_mcast_flush_send (
struct totemrrp_instance *instance,
const void *msg,
unsigned int msg_len)
{
struct passive_instance *passive_instance = (struct passive_instance *)instance->rrp_algo_instance;
do {
passive_instance->msg_xmit_iface = (passive_instance->msg_xmit_iface + 1) % instance->interface_count;
} while (passive_instance->faulty[passive_instance->msg_xmit_iface] == 1);
totemnet_mcast_flush_send (instance->net_handles[passive_instance->msg_xmit_iface], msg, msg_len);
}
static void passive_mcast_noflush_send (
struct totemrrp_instance *instance,
const void *msg,
unsigned int msg_len)
{
struct passive_instance *passive_instance = (struct passive_instance *)instance->rrp_algo_instance;
do {
passive_instance->msg_xmit_iface = (passive_instance->msg_xmit_iface + 1) % instance->interface_count;
} while (passive_instance->faulty[passive_instance->msg_xmit_iface] == 1);
totemnet_mcast_noflush_send (instance->net_handles[passive_instance->msg_xmit_iface], msg, msg_len);
}
static void passive_token_recv (
struct totemrrp_instance *rrp_instance,
unsigned int iface_no,
void *context,
const void *msg,
unsigned int msg_len,
unsigned int token_seq)
{
struct passive_instance *passive_instance = (struct passive_instance *)rrp_instance->rrp_algo_instance;
passive_instance->totemrrp_context = context; // this should be in totemrrp_instance ? TODO
if (rrp_instance->totemrrp_msgs_missing() == 0) {
rrp_instance->totemrrp_deliver_fn (
context,
msg,
msg_len);
} else {
memcpy (passive_instance->token, msg, msg_len);
passive_timer_expired_token_start (passive_instance);
}
passive_monitor (rrp_instance, iface_no, 1);
}
static void passive_token_send (
struct totemrrp_instance *instance,
const void *msg,
unsigned int msg_len)
{
struct passive_instance *passive_instance = (struct passive_instance *)instance->rrp_algo_instance;
do {
passive_instance->token_xmit_iface = (passive_instance->token_xmit_iface + 1) % instance->interface_count;
} while (passive_instance->faulty[passive_instance->token_xmit_iface] == 1);
totemnet_token_send (
instance->net_handles[passive_instance->token_xmit_iface],
msg, msg_len);
}
static void passive_recv_flush (struct totemrrp_instance *instance)
{
struct passive_instance *rrp_algo_instance = (struct passive_instance *)instance->rrp_algo_instance;
unsigned int i;
for (i = 0; i < instance->interface_count; i++) {
if (rrp_algo_instance->faulty[i] == 0) {
totemnet_recv_flush (instance->net_handles[i]);
}
}
}
static void passive_send_flush (struct totemrrp_instance *instance)
{
struct passive_instance *rrp_algo_instance = (struct passive_instance *)instance->rrp_algo_instance;
unsigned int i;
for (i = 0; i < instance->interface_count; i++) {
if (rrp_algo_instance->faulty[i] == 0) {
totemnet_send_flush (instance->net_handles[i]);
}
}
}
static void passive_iface_check (struct totemrrp_instance *instance)
{
struct passive_instance *rrp_algo_instance = (struct passive_instance *)instance->rrp_algo_instance;
unsigned int i;
for (i = 0; i < instance->interface_count; i++) {
if (rrp_algo_instance->faulty[i] == 0) {
totemnet_iface_check (instance->net_handles[i]);
}
}
}
static void passive_processor_count_set (
struct totemrrp_instance *instance,
unsigned int processor_count)
{
struct passive_instance *rrp_algo_instance = (struct passive_instance *)instance->rrp_algo_instance;
unsigned int i;
for (i = 0; i < instance->interface_count; i++) {
if (rrp_algo_instance->faulty[i] == 0) {
totemnet_processor_count_set (instance->net_handles[i],
processor_count);
}
}
}
static void passive_token_target_set (
struct totemrrp_instance *instance,
struct totem_ip_address *token_target,
unsigned int iface_no)
{
totemnet_token_target_set (instance->net_handles[iface_no], token_target);
}
static int passive_mcast_recv_empty (
struct totemrrp_instance *instance)
{
int res;
int msgs_emptied = 0;
int i;
for (i = 0; i < instance->interface_count; i++) {
res = totemnet_recv_mcast_empty (instance->net_handles[i]);
if (res == -1) {
return (-1);
}
if (res == 1) {
msgs_emptied = 1;
}
}
return (msgs_emptied);
}
static int passive_member_add (
struct totemrrp_instance *instance,
const struct totem_ip_address *member,
unsigned int iface_no)
{
int res;
res = totemnet_member_add (instance->net_handles[iface_no], member);
return (res);
}
static int passive_member_remove (
struct totemrrp_instance *instance,
const struct totem_ip_address *member,
unsigned int iface_no)
{
int res;
res = totemnet_member_remove (instance->net_handles[iface_no], member);
return (res);
}
static void passive_ring_reenable (
struct totemrrp_instance *instance,
unsigned int iface_no)
{
struct passive_instance *rrp_algo_instance = (struct passive_instance *)instance->rrp_algo_instance;
memset (rrp_algo_instance->mcast_recv_count, 0, sizeof (unsigned int) *
instance->interface_count);
memset (rrp_algo_instance->token_recv_count, 0, sizeof (unsigned int) *
instance->interface_count);
if (iface_no == instance->interface_count) {
memset (rrp_algo_instance->faulty, 0, sizeof (unsigned int) *
instance->interface_count);
} else {
rrp_algo_instance->faulty[iface_no] = 0;
}
}
/*
* Active Replication Implementation
*/
void *active_instance_initialize (
struct totemrrp_instance *rrp_instance,
int interface_count)
{
struct active_instance *instance;
instance = malloc (sizeof (struct active_instance));
if (instance == 0) {
goto error_exit;
}
memset (instance, 0, sizeof (struct active_instance));
instance->faulty = malloc (sizeof (int) * interface_count);
if (instance->faulty == 0) {
free (instance);
instance = 0;
goto error_exit;
}
memset (instance->faulty, 0, sizeof (unsigned int) * interface_count);
instance->last_token_recv = malloc (sizeof (int) * interface_count);
if (instance->last_token_recv == 0) {
free (instance->faulty);
free (instance);
instance = 0;
goto error_exit;
}
memset (instance->last_token_recv, 0, sizeof (unsigned int) * interface_count);
instance->counter_problems = malloc (sizeof (int) * interface_count);
if (instance->counter_problems == 0) {
free (instance->last_token_recv);
free (instance->faulty);
free (instance);
instance = 0;
goto error_exit;
}
memset (instance->counter_problems, 0, sizeof (unsigned int) * interface_count);
instance->timer_expired_token = 0;
instance->timer_problem_decrementer = 0;
instance->rrp_instance = rrp_instance;
instance->last_token_seq = ARR_SEQNO_START_TOKEN - 1;
error_exit:
return ((void *)instance);
}
static void timer_function_active_problem_decrementer (void *context)
{
struct active_instance *active_instance = (struct active_instance *)context;
struct totemrrp_instance *rrp_instance = active_instance->rrp_instance;
unsigned int problem_found = 0;
unsigned int i;
for (i = 0; i < rrp_instance->interface_count; i++) {
if (active_instance->counter_problems[i] > 0) {
problem_found = 1;
active_instance->counter_problems[i] -= 1;
if (active_instance->counter_problems[i] == 0) {
sprintf (rrp_instance->status[i],
"ring %d active with no faults", i);
} else {
sprintf (rrp_instance->status[i],
"Decrementing problem counter for iface %s to [%d of %d]",
totemnet_iface_print (rrp_instance->net_handles[i]),
active_instance->counter_problems[i],
rrp_instance->totem_config->rrp_problem_count_threshold);
}
log_printf (
rrp_instance->totemrrp_log_level_warning,
"%s",
rrp_instance->status[i]);
}
}
if (problem_found) {
active_timer_problem_decrementer_start (active_instance);
} else {
active_instance->timer_problem_decrementer = 0;
}
}
static void timer_function_active_token_expired (void *context)
{
struct active_instance *active_instance = (struct active_instance *)context;
struct totemrrp_instance *rrp_instance = active_instance->rrp_instance;
unsigned int i;
for (i = 0; i < rrp_instance->interface_count; i++) {
if (active_instance->last_token_recv[i] == 0) {
active_instance->counter_problems[i] += 1;
if (active_instance->timer_problem_decrementer == 0) {
active_timer_problem_decrementer_start (active_instance);
}
sprintf (rrp_instance->status[i],
"Incrementing problem counter for seqid %d iface %s to [%d of %d]",
active_instance->last_token_seq,
totemnet_iface_print (rrp_instance->net_handles[i]),
active_instance->counter_problems[i],
rrp_instance->totem_config->rrp_problem_count_threshold);
log_printf (
rrp_instance->totemrrp_log_level_warning,
"%s",
rrp_instance->status[i]);
}
}
for (i = 0; i < rrp_instance->interface_count; i++) {
if (active_instance->counter_problems[i] >= rrp_instance->totem_config->rrp_problem_count_threshold)
{
active_instance->faulty[i] = 1;
qb_loop_timer_add (rrp_instance->poll_handle,
QB_LOOP_MED,
rrp_instance->totem_config->rrp_autorecovery_check_timeout*QB_TIME_NS_IN_MSEC,
rrp_instance->deliver_fn_context[i],
timer_function_test_ring_timeout,
&rrp_instance->timer_active_test_ring_timeout[i]);
sprintf (rrp_instance->status[i],
"Marking seqid %d ringid %u interface %s FAULTY",
active_instance->last_token_seq,
i,
totemnet_iface_print (rrp_instance->net_handles[i]));
log_printf (
rrp_instance->totemrrp_log_level_error,
"%s",
rrp_instance->status[i]);
active_timer_problem_decrementer_cancel (active_instance);
}
}
rrp_instance->totemrrp_deliver_fn (
active_instance->totemrrp_context,
active_instance->token,
active_instance->token_len);
}
static void active_timer_expired_token_start (
struct active_instance *active_instance)
{
qb_loop_timer_add (
active_instance->rrp_instance->poll_handle,
QB_LOOP_MED,
active_instance->rrp_instance->totem_config->rrp_token_expired_timeout*QB_TIME_NS_IN_MSEC,
(void *)active_instance,
timer_function_active_token_expired,
&active_instance->timer_expired_token);
}
static void active_timer_expired_token_cancel (
struct active_instance *active_instance)
{
qb_loop_timer_del (
active_instance->rrp_instance->poll_handle,
active_instance->timer_expired_token);
}
static void active_timer_problem_decrementer_start (
struct active_instance *active_instance)
{
qb_loop_timer_add (
active_instance->rrp_instance->poll_handle,
QB_LOOP_MED,
active_instance->rrp_instance->totem_config->rrp_problem_count_timeout*QB_TIME_NS_IN_MSEC,
(void *)active_instance,
timer_function_active_problem_decrementer,
&active_instance->timer_problem_decrementer);
}
static void active_timer_problem_decrementer_cancel (
struct active_instance *active_instance)
{
qb_loop_timer_del (
active_instance->rrp_instance->poll_handle,
active_instance->timer_problem_decrementer);
}
/*
* active replication
*/
static void active_mcast_recv (
struct totemrrp_instance *instance,
unsigned int iface_no,
void *context,
const void *msg,
unsigned int msg_len)
{
instance->totemrrp_deliver_fn (
context,
msg,
msg_len);
}
static void active_mcast_flush_send (
struct totemrrp_instance *instance,
const void *msg,
unsigned int msg_len)
{
int i;
struct active_instance *rrp_algo_instance = (struct active_instance *)instance->rrp_algo_instance;
for (i = 0; i < instance->interface_count; i++) {
if (rrp_algo_instance->faulty[i] == 0) {
totemnet_mcast_flush_send (instance->net_handles[i], msg, msg_len);
}
}
}
static void active_mcast_noflush_send (
struct totemrrp_instance *instance,
const void *msg,
unsigned int msg_len)
{
int i;
struct active_instance *rrp_algo_instance = (struct active_instance *)instance->rrp_algo_instance;
for (i = 0; i < instance->interface_count; i++) {
if (rrp_algo_instance->faulty[i] == 0) {
totemnet_mcast_noflush_send (instance->net_handles[i], msg, msg_len);
}
}
}
static void active_token_recv (
struct totemrrp_instance *rrp_instance,
unsigned int iface_no,
void *context,
const void *msg,
unsigned int msg_len,
unsigned int token_seq)
{
int i;
struct active_instance *active_instance = (struct active_instance *)rrp_instance->rrp_algo_instance;
active_instance->totemrrp_context = context;
if (sq_lt_compare (active_instance->last_token_seq, token_seq)) {
memcpy (active_instance->token, msg, msg_len);
active_instance->token_len = msg_len;
for (i = 0; i < rrp_instance->interface_count; i++) {
active_instance->last_token_recv[i] = 0;
}
active_instance->last_token_recv[iface_no] = 1;
active_timer_expired_token_start (active_instance);
}
/*
* This doesn't follow spec because the spec assumes we will know
* when token resets occur.
*/
active_instance->last_token_seq = token_seq;
if (token_seq == active_instance->last_token_seq) {
active_instance->last_token_recv[iface_no] = 1;
for (i = 0; i < rrp_instance->interface_count; i++) {
if ((active_instance->last_token_recv[i] == 0) &&
active_instance->faulty[i] == 0) {
return; /* don't deliver token */
}
}
active_timer_expired_token_cancel (active_instance);
rrp_instance->totemrrp_deliver_fn (
context,
msg,
msg_len);
}
}
static void active_token_send (
struct totemrrp_instance *instance,
const void *msg,
unsigned int msg_len)
{
struct active_instance *rrp_algo_instance = (struct active_instance *)instance->rrp_algo_instance;
int i;
for (i = 0; i < instance->interface_count; i++) {
if (rrp_algo_instance->faulty[i] == 0) {
totemnet_token_send (
instance->net_handles[i],
msg, msg_len);
}
}
}
static void active_recv_flush (struct totemrrp_instance *instance)
{
struct active_instance *rrp_algo_instance = (struct active_instance *)instance->rrp_algo_instance;
unsigned int i;
for (i = 0; i < instance->interface_count; i++) {
if (rrp_algo_instance->faulty[i] == 0) {
totemnet_recv_flush (instance->net_handles[i]);
}
}
}
static void active_send_flush (struct totemrrp_instance *instance)
{
struct active_instance *rrp_algo_instance = (struct active_instance *)instance->rrp_algo_instance;
unsigned int i;
for (i = 0; i < instance->interface_count; i++) {
if (rrp_algo_instance->faulty[i] == 0) {
totemnet_send_flush (instance->net_handles[i]);
}
}
}
static int active_member_add (
struct totemrrp_instance *instance,
const struct totem_ip_address *member,
unsigned int iface_no)
{
int res;
res = totemnet_member_add (instance->net_handles[iface_no], member);
return (res);
}
static int active_member_remove (
struct totemrrp_instance *instance,
const struct totem_ip_address *member,
unsigned int iface_no)
{
int res;
res = totemnet_member_remove (instance->net_handles[iface_no], member);
return (res);
}
static void active_iface_check (struct totemrrp_instance *instance)
{
struct active_instance *rrp_algo_instance = (struct active_instance *)instance->rrp_algo_instance;
unsigned int i;
for (i = 0; i < instance->interface_count; i++) {
if (rrp_algo_instance->faulty[i] == 0) {
totemnet_iface_check (instance->net_handles[i]);
}
}
}
static void active_processor_count_set (
struct totemrrp_instance *instance,
unsigned int processor_count)
{
struct active_instance *rrp_algo_instance = (struct active_instance *)instance->rrp_algo_instance;
unsigned int i;
for (i = 0; i < instance->interface_count; i++) {
if (rrp_algo_instance->faulty[i] == 0) {
totemnet_processor_count_set (instance->net_handles[i],
processor_count);
}
}
}
static void active_token_target_set (
struct totemrrp_instance *instance,
struct totem_ip_address *token_target,
unsigned int iface_no)
{
totemnet_token_target_set (instance->net_handles[iface_no], token_target);
}
static int active_mcast_recv_empty (
struct totemrrp_instance *instance)
{
int res;
int msgs_emptied = 0;
int i;
for (i = 0; i < instance->interface_count; i++) {
res = totemnet_recv_mcast_empty (instance->net_handles[i]);
if (res == -1) {
return (-1);
}
if (res == 1) {
msgs_emptied = 1;
}
}
return (msgs_emptied);
}
static void active_ring_reenable (
struct totemrrp_instance *instance,
unsigned int iface_no)
{
struct active_instance *rrp_algo_instance = (struct active_instance *)instance->rrp_algo_instance;
if (iface_no == instance->interface_count) {
memset (rrp_algo_instance->last_token_recv, 0, sizeof (unsigned int) *
instance->interface_count);
memset (rrp_algo_instance->faulty, 0, sizeof (unsigned int) *
instance->interface_count);
memset (rrp_algo_instance->counter_problems, 0, sizeof (unsigned int) *
instance->interface_count);
} else {
rrp_algo_instance->last_token_recv[iface_no] = 0;
rrp_algo_instance->faulty[iface_no] = 0;
rrp_algo_instance->counter_problems[iface_no] = 0;
}
}
static void totemrrp_instance_initialize (struct totemrrp_instance *instance)
{
memset (instance, 0, sizeof (struct totemrrp_instance));
}
static int totemrrp_algorithm_set (
struct totem_config *totem_config,
struct totemrrp_instance *instance)
{
unsigned int res = -1;
unsigned int i;
for (i = 0; i < RRP_ALGOS_COUNT; i++) {
if (strcmp (totem_config->rrp_mode, rrp_algos[i]->name) == 0) {
instance->rrp_algo = rrp_algos[i];
if (rrp_algos[i]->initialize) {
instance->rrp_algo_instance = rrp_algos[i]->initialize (
instance,
totem_config->interface_count);
}
res = 0;
break;
}
}
for (i = 0; i < totem_config->interface_count; i++) {
instance->status[i] = malloc (1024);
sprintf (instance->status[i], "ring %d active with no faults", i);
}
return (res);
}
void rrp_deliver_fn (
void *context,
const void *msg,
unsigned int msg_len)
{
unsigned int token_seqid;
unsigned int token_is;
struct deliver_fn_context *deliver_fn_context = (struct deliver_fn_context *)context;
struct totemrrp_instance *rrp_instance = deliver_fn_context->instance;
const struct message_header *hdr = msg;
struct message_header tmp_msg, activate_msg;
memset(&tmp_msg, 0, sizeof(struct message_header));
memset(&activate_msg, 0, sizeof(struct message_header));
rrp_instance->totemrrp_token_seqid_get (
msg,
&token_seqid,
&token_is);
if (hdr->type == MESSAGE_TYPE_RING_TEST_ACTIVE) {
log_printf (
rrp_instance->totemrrp_log_level_debug,
"received message requesting test of ring now active\n");
if (hdr->endian_detector != ENDIAN_LOCAL) {
test_active_msg_endian_convert(hdr, &tmp_msg);
hdr = &tmp_msg;
}
if (hdr->nodeid_activator == rrp_instance->my_nodeid) {
/*
* Send an activate message
*/
activate_msg.type = MESSAGE_TYPE_RING_TEST_ACTIVATE;
activate_msg.endian_detector = ENDIAN_LOCAL;
activate_msg.ring_number = hdr->ring_number;
activate_msg.nodeid_activator = rrp_instance->my_nodeid;
totemnet_token_send (
rrp_instance->net_handles[deliver_fn_context->iface_no],
&activate_msg, sizeof (struct message_header));
} else {
/*
* Send a ring test message
*/
totemnet_token_send (
rrp_instance->net_handles[deliver_fn_context->iface_no],
msg, msg_len);
}
} else
if (hdr->type == MESSAGE_TYPE_RING_TEST_ACTIVATE) {
log_printf (
rrp_instance->totemrrp_log_level_notice,
"Automatically recovered ring %d\n", hdr->ring_number);
if (hdr->endian_detector != ENDIAN_LOCAL) {
test_active_msg_endian_convert(hdr, &tmp_msg);
hdr = &tmp_msg;
}
totemrrp_ring_reenable (rrp_instance, deliver_fn_context->iface_no);
if (hdr->nodeid_activator != rrp_instance->my_nodeid) {
totemnet_token_send (
rrp_instance->net_handles[deliver_fn_context->iface_no],
msg, msg_len);
}
} else
if (token_is) {
/*
* Deliver to the token receiver for this rrp algorithm
*/
rrp_instance->rrp_algo->token_recv (
rrp_instance,
deliver_fn_context->iface_no,
deliver_fn_context->context,
msg,
msg_len,
token_seqid);
} else {
/*
* Deliver to the mcast receiver for this rrp algorithm
*/
rrp_instance->rrp_algo->mcast_recv (
rrp_instance,
deliver_fn_context->iface_no,
deliver_fn_context->context,
msg,
msg_len);
}
}
void rrp_iface_change_fn (
void *context,
const struct totem_ip_address *iface_addr)
{
struct deliver_fn_context *deliver_fn_context = (struct deliver_fn_context *)context;
deliver_fn_context->instance->my_nodeid = iface_addr->nodeid;
deliver_fn_context->instance->totemrrp_iface_change_fn (
deliver_fn_context->context,
iface_addr,
deliver_fn_context->iface_no);
}
int totemrrp_finalize (
void *rrp_context)
{
struct totemrrp_instance *instance = (struct totemrrp_instance *)rrp_context;
int i;
for (i = 0; i < instance->interface_count; i++) {
totemnet_finalize (instance->net_handles[i]);
}
return (0);
}
static void rrp_target_set_completed (void *context)
{
struct deliver_fn_context *deliver_fn_context = (struct deliver_fn_context *)context;
deliver_fn_context->instance->totemrrp_target_set_completed (deliver_fn_context->context);
}
/*
* Totem Redundant Ring interface
* depends on poll abstraction, POSIX, IPV4
*/
/*
* Create an instance
*/
int totemrrp_initialize (
qb_loop_t *poll_handle,
void **rrp_context,
struct totem_config *totem_config,
void *context,
void (*deliver_fn) (
void *context,
const void *msg,
unsigned int msg_len),
void (*iface_change_fn) (
void *context,
const struct totem_ip_address *iface_addr,
unsigned int iface_no),
void (*token_seqid_get) (
const void *msg,
unsigned int *seqid,
unsigned int *token_is),
unsigned int (*msgs_missing) (void),
void (*target_set_completed) (void *context))
{
struct totemrrp_instance *instance;
unsigned int res;
int i;
instance = malloc (sizeof (struct totemrrp_instance));
if (instance == 0) {
return (-1);
}
totemrrp_instance_initialize (instance);
instance->totem_config = totem_config;
res = totemrrp_algorithm_set (
instance->totem_config,
instance);
if (res == -1) {
goto error_destroy;
}
/*
* Configure logging
*/
instance->totemrrp_log_level_security = totem_config->totem_logging_configuration.log_level_security;
instance->totemrrp_log_level_error = totem_config->totem_logging_configuration.log_level_error;
instance->totemrrp_log_level_warning = totem_config->totem_logging_configuration.log_level_warning;
instance->totemrrp_log_level_notice = totem_config->totem_logging_configuration.log_level_notice;
instance->totemrrp_log_level_debug = totem_config->totem_logging_configuration.log_level_debug;
instance->totemrrp_subsys_id = totem_config->totem_logging_configuration.log_subsys_id;
instance->totemrrp_log_printf = totem_config->totem_logging_configuration.log_printf;
instance->interfaces = totem_config->interfaces;
instance->poll_handle = poll_handle;
instance->totemrrp_deliver_fn = deliver_fn;
instance->totemrrp_iface_change_fn = iface_change_fn;
instance->totemrrp_token_seqid_get = token_seqid_get;
instance->totemrrp_target_set_completed = target_set_completed;
instance->totemrrp_msgs_missing = msgs_missing;
instance->interface_count = totem_config->interface_count;
instance->net_handles = malloc (sizeof (void *) * totem_config->interface_count);
instance->context = context;
instance->poll_handle = poll_handle;
for (i = 0; i < totem_config->interface_count; i++) {
struct deliver_fn_context *deliver_fn_context;
deliver_fn_context = malloc (sizeof (struct deliver_fn_context));
assert (deliver_fn_context);
deliver_fn_context->instance = instance;
deliver_fn_context->context = context;
deliver_fn_context->iface_no = i;
instance->deliver_fn_context[i] = (void *)deliver_fn_context;
totemnet_initialize (
poll_handle,
&instance->net_handles[i],
totem_config,
i,
(void *)deliver_fn_context,
rrp_deliver_fn,
rrp_iface_change_fn,
rrp_target_set_completed);
totemnet_net_mtu_adjust (instance->net_handles[i], totem_config);
}
*rrp_context = instance;
return (0);
error_destroy:
free (instance);
return (res);
}
void *totemrrp_buffer_alloc (void *rrp_context)
{
struct totemrrp_instance *instance = rrp_context;
assert (instance != NULL);
return totemnet_buffer_alloc (instance->net_handles[0]);
}
void totemrrp_buffer_release (void *rrp_context, void *ptr)
{
struct totemrrp_instance *instance = rrp_context;
assert (instance != NULL);
totemnet_buffer_release (instance->net_handles[0], ptr);
}
int totemrrp_processor_count_set (
void *rrp_context,
unsigned int processor_count)
{
struct totemrrp_instance *instance = (struct totemrrp_instance *)rrp_context;
instance->rrp_algo->processor_count_set (instance, processor_count);
instance->processor_count = processor_count;
return (0);
}
int totemrrp_token_target_set (
void *rrp_context,
struct totem_ip_address *addr,
unsigned int iface_no)
{
struct totemrrp_instance *instance = (struct totemrrp_instance *)rrp_context;
instance->rrp_algo->token_target_set (instance, addr, iface_no);
return (0);
}
int totemrrp_recv_flush (void *rrp_context)
{
struct totemrrp_instance *instance = (struct totemrrp_instance *)rrp_context;
instance->rrp_algo->recv_flush (instance);
return (0);
}
int totemrrp_send_flush (void *rrp_context)
{
struct totemrrp_instance *instance = (struct totemrrp_instance *)rrp_context;
instance->rrp_algo->send_flush (instance);
return (0);
}
int totemrrp_token_send (
void *rrp_context,
const void *msg,
unsigned int msg_len)
{
struct totemrrp_instance *instance = (struct totemrrp_instance *)rrp_context;
instance->rrp_algo->token_send (instance, msg, msg_len);
return (0);
}
int totemrrp_mcast_flush_send (
void *rrp_context,
const void *msg,
unsigned int msg_len)
{
struct totemrrp_instance *instance = (struct totemrrp_instance *)rrp_context;
int res = 0;
// TODO this needs to return the result
instance->rrp_algo->mcast_flush_send (instance, msg, msg_len);
return (res);
}
int totemrrp_mcast_noflush_send (
void *rrp_context,
const void *msg,
unsigned int msg_len)
{
struct totemrrp_instance *instance = (struct totemrrp_instance *)rrp_context;
/*
* merge detects go out through mcast_flush_send so it is safe to
* flush these messages if we are only one processor. This avoids
* an encryption/hmac and decryption/hmac
*/
if (instance->processor_count > 1) {
// TODO this needs to return the result
instance->rrp_algo->mcast_noflush_send (instance, msg, msg_len);
}
return (0);
}
int totemrrp_iface_check (void *rrp_context)
{
struct totemrrp_instance *instance = (struct totemrrp_instance *)rrp_context;
instance->rrp_algo->iface_check (instance);
return (0);
}
int totemrrp_ifaces_get (
void *rrp_context,
char ***status,
unsigned int *iface_count)
{
struct totemrrp_instance *instance = (struct totemrrp_instance *)rrp_context;
*status = instance->status;
if (iface_count) {
*iface_count = instance->interface_count;
}
return (0);
}
int totemrrp_crypto_set (
void *rrp_context,
unsigned int type)
{
struct totemrrp_instance *instance = (struct totemrrp_instance *)rrp_context;
int res;
res = totemnet_crypto_set(instance->net_handles[0], type);
return (res);
}
/*
* iface_no indicates the interface number [0, ..., interface_count-1] of the
* specific ring which will be reenabled. We specify iface_no == interface_count
* means reenabling all the rings.
*/
int totemrrp_ring_reenable (
void *rrp_context,
unsigned int iface_no)
{
struct totemrrp_instance *instance = (struct totemrrp_instance *)rrp_context;
int res = 0;
unsigned int i;
instance->rrp_algo->ring_reenable (instance, iface_no);
if (iface_no == instance->interface_count) {
for (i = 0; i < instance->interface_count; i++) {
sprintf (instance->status[i], "ring %d active with no faults", i);
}
} else {
sprintf (instance->status[iface_no], "ring %d active with no faults", iface_no);
}
return (res);
}
extern int totemrrp_mcast_recv_empty (
void *rrp_context)
{
struct totemrrp_instance *instance = (struct totemrrp_instance *)rrp_context;
int res;
res = instance->rrp_algo->mcast_recv_empty (instance);
return (res);
}
int totemrrp_member_add (
void *rrp_context,
const struct totem_ip_address *member,
int iface_no)
{
struct totemrrp_instance *instance = (struct totemrrp_instance *)rrp_context;
int res;
res = instance->rrp_algo->member_add (instance, member, iface_no);
return (res);
}
int totemrrp_member_remove (
void *rrp_context,
const struct totem_ip_address *member,
int iface_no)
{
struct totemrrp_instance *instance = (struct totemrrp_instance *)rrp_context;
int res;
res = instance->rrp_algo->member_remove (instance, member, iface_no);
return (res);
}
diff --git a/exec/totemsrp.c b/exec/totemsrp.c
index cb2d9152..e05d65bc 100644
--- a/exec/totemsrp.c
+++ b/exec/totemsrp.c
@@ -1,4489 +1,4497 @@
/*
* Copyright (c) 2003-2006 MontaVista Software, Inc.
* Copyright (c) 2006-2009 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Steven Dake (sdake@redhat.com)
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
* The first version of this code was based upon Yair Amir's PhD thesis:
* http://www.cs.jhu.edu/~yairamir/phd.ps) (ch4,5).
*
* The current version of totemsrp implements the Totem protocol specified in:
* http://citeseer.ist.psu.edu/amir95totem.html
*
* The deviations from the above published protocols are:
* - encryption of message contents with SOBER128
* - authentication of meessage contents with SHA1/HMAC
* - token hold mode where token doesn't rotate on unused ring - reduces cpu
* usage on 1.6ghz xeon from 35% to less then .1 % as measured by top
*/
#include <config.h>
#include <assert.h>
#ifdef HAVE_ALLOCA_H
#include <alloca.h>
#endif
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/socket.h>
#include <netdb.h>
#include <sys/un.h>
#include <sys/ioctl.h>
#include <sys/param.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <unistd.h>
#include <fcntl.h>
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
#include <sched.h>
#include <time.h>
#include <sys/time.h>
#include <sys/poll.h>
#include <limits.h>
#include <qb/qbdefs.h>
#include <qb/qbutil.h>
#include <qb/qbloop.h>
#include <corosync/swab.h>
#include <corosync/cs_queue.h>
#include <corosync/sq.h>
#include <corosync/list.h>
#include <corosync/hdb.h>
#define LOGSYS_UTILS_ONLY 1
#include <corosync/engine/logsys.h>
#include "totemsrp.h"
#include "totemrrp.h"
#include "totemnet.h"
#include "crypto.h"
#define LOCALHOST_IP inet_addr("127.0.0.1")
#define QUEUE_RTR_ITEMS_SIZE_MAX 16384 /* allow 16384 retransmit items */
#define RETRANS_MESSAGE_QUEUE_SIZE_MAX 16384 /* allow 500 messages to be queued */
#define RECEIVED_MESSAGE_QUEUE_SIZE_MAX 500 /* allow 500 messages to be queued */
#define MAXIOVS 5
#define RETRANSMIT_ENTRIES_MAX 30
#define TOKEN_SIZE_MAX 64000 /* bytes */
#define LEAVE_DUMMY_NODEID 0
/*
* Rollover handling:
* SEQNO_START_MSG is the starting sequence number after a new configuration
* This should remain zero, unless testing overflow in which case
* 0x7ffff000 and 0xfffff000 are good starting values.
*
* SEQNO_START_TOKEN is the starting sequence number after a new configuration
* for a token. This should remain zero, unless testing overflow in which
* case 07fffff00 or 0xffffff00 are good starting values.
*
* SEQNO_START_MSG is the starting sequence number after a new configuration
* This should remain zero, unless testing overflow in which case
* 0x7ffff000 and 0xfffff000 are good values to start with
*/
#define SEQNO_START_MSG 0x0
#define SEQNO_START_TOKEN 0x0
/*
* These can be used ot test different rollover points
* #define SEQNO_START_MSG 0xfffffe00
* #define SEQNO_START_TOKEN 0xfffffe00
*/
/*
* These can be used to test the error recovery algorithms
* #define TEST_DROP_ORF_TOKEN_PERCENTAGE 30
* #define TEST_DROP_COMMIT_TOKEN_PERCENTAGE 30
* #define TEST_DROP_MCAST_PERCENTAGE 50
* #define TEST_RECOVERY_MSG_COUNT 300
*/
/*
* we compare incoming messages to determine if their endian is
* different - if so convert them
*
* do not change
*/
#define ENDIAN_LOCAL 0xff22
enum message_type {
MESSAGE_TYPE_ORF_TOKEN = 0, /* Ordering, Reliability, Flow (ORF) control Token */
MESSAGE_TYPE_MCAST = 1, /* ring ordered multicast message */
MESSAGE_TYPE_MEMB_MERGE_DETECT = 2, /* merge rings if there are available rings */
MESSAGE_TYPE_MEMB_JOIN = 3, /* membership join message */
MESSAGE_TYPE_MEMB_COMMIT_TOKEN = 4, /* membership commit token */
MESSAGE_TYPE_TOKEN_HOLD_CANCEL = 5, /* cancel the holding of the token */
};
enum encapsulation_type {
MESSAGE_ENCAPSULATED = 1,
MESSAGE_NOT_ENCAPSULATED = 2
};
/*
* New membership algorithm local variables
*/
struct srp_addr {
struct totem_ip_address addr[INTERFACE_MAX];
};
struct consensus_list_item {
struct srp_addr addr;
int set;
};
struct token_callback_instance {
struct list_head list;
int (*callback_fn) (enum totem_callback_token_type type, const void *);
enum totem_callback_token_type callback_type;
int delete;
void *data;
};
struct totemsrp_socket {
int mcast;
int token;
};
struct message_header {
char type;
char encapsulated;
unsigned short endian_detector;
unsigned int nodeid;
} __attribute__((packed));
struct mcast {
struct message_header header;
struct srp_addr system_from;
unsigned int seq;
int this_seqno;
struct memb_ring_id ring_id;
unsigned int node_id;
int guarantee;
} __attribute__((packed));
struct rtr_item {
struct memb_ring_id ring_id;
unsigned int seq;
}__attribute__((packed));
struct orf_token {
struct message_header header;
unsigned int seq;
unsigned int token_seq;
unsigned int aru;
unsigned int aru_addr;
struct memb_ring_id ring_id;
unsigned int backlog;
unsigned int fcc;
int retrans_flg;
int rtr_list_entries;
struct rtr_item rtr_list[0];
}__attribute__((packed));
struct memb_join {
struct message_header header;
struct srp_addr system_from;
unsigned int proc_list_entries;
unsigned int failed_list_entries;
unsigned long long ring_seq;
unsigned char end_of_memb_join[0];
/*
* These parts of the data structure are dynamic:
* struct srp_addr proc_list[];
* struct srp_addr failed_list[];
*/
} __attribute__((packed));
struct memb_merge_detect {
struct message_header header;
struct srp_addr system_from;
struct memb_ring_id ring_id;
} __attribute__((packed));
struct token_hold_cancel {
struct message_header header;
struct memb_ring_id ring_id;
} __attribute__((packed));
struct memb_commit_token_memb_entry {
struct memb_ring_id ring_id;
unsigned int aru;
unsigned int high_delivered;
unsigned int received_flg;
}__attribute__((packed));
struct memb_commit_token {
struct message_header header;
unsigned int token_seq;
struct memb_ring_id ring_id;
unsigned int retrans_flg;
int memb_index;
int addr_entries;
unsigned char end_of_commit_token[0];
/*
* These parts of the data structure are dynamic:
*
* struct srp_addr addr[PROCESSOR_COUNT_MAX];
* struct memb_commit_token_memb_entry memb_list[PROCESSOR_COUNT_MAX];
*/
}__attribute__((packed));
struct message_item {
struct mcast *mcast;
unsigned int msg_len;
};
struct sort_queue_item {
struct mcast *mcast;
unsigned int msg_len;
};
struct orf_token_mcast_thread_state {
char iobuf[9000];
prng_state prng_state;
};
enum memb_state {
MEMB_STATE_OPERATIONAL = 1,
MEMB_STATE_GATHER = 2,
MEMB_STATE_COMMIT = 3,
MEMB_STATE_RECOVERY = 4
};
struct totemsrp_instance {
int iface_changes;
int failed_to_recv;
/*
* Flow control mcasts and remcasts on last and current orf_token
*/
int fcc_remcast_last;
int fcc_mcast_last;
int fcc_remcast_current;
struct consensus_list_item consensus_list[PROCESSOR_COUNT_MAX];
int consensus_list_entries;
struct srp_addr my_id;
struct srp_addr my_proc_list[PROCESSOR_COUNT_MAX];
struct srp_addr my_failed_list[PROCESSOR_COUNT_MAX];
struct srp_addr my_new_memb_list[PROCESSOR_COUNT_MAX];
struct srp_addr my_trans_memb_list[PROCESSOR_COUNT_MAX];
struct srp_addr my_memb_list[PROCESSOR_COUNT_MAX];
struct srp_addr my_deliver_memb_list[PROCESSOR_COUNT_MAX];
struct srp_addr my_left_memb_list[PROCESSOR_COUNT_MAX];
int my_proc_list_entries;
int my_failed_list_entries;
int my_new_memb_entries;
int my_trans_memb_entries;
int my_memb_entries;
int my_deliver_memb_entries;
int my_left_memb_entries;
struct memb_ring_id my_ring_id;
struct memb_ring_id my_old_ring_id;
int my_aru_count;
int my_merge_detect_timeout_outstanding;
unsigned int my_last_aru;
int my_seq_unchanged;
int my_received_flg;
unsigned int my_high_seq_received;
unsigned int my_install_seq;
int my_rotation_counter;
int my_set_retrans_flg;
int my_retrans_flg_count;
unsigned int my_high_ring_delivered;
int heartbeat_timeout;
/*
* Queues used to order, deliver, and recover messages
*/
struct cs_queue new_message_queue;
struct cs_queue retrans_message_queue;
struct sq regular_sort_queue;
struct sq recovery_sort_queue;
/*
* Received up to and including
*/
unsigned int my_aru;
unsigned int my_high_delivered;
struct list_head token_callback_received_listhead;
struct list_head token_callback_sent_listhead;
char orf_token_retransmit[TOKEN_SIZE_MAX];
int orf_token_retransmit_size;
unsigned int my_token_seq;
/*
* Timers
*/
qb_loop_timer_handle timer_pause_timeout;
qb_loop_timer_handle timer_orf_token_timeout;
qb_loop_timer_handle timer_orf_token_retransmit_timeout;
qb_loop_timer_handle timer_orf_token_hold_retransmit_timeout;
qb_loop_timer_handle timer_merge_detect_timeout;
qb_loop_timer_handle memb_timer_state_gather_join_timeout;
qb_loop_timer_handle memb_timer_state_gather_consensus_timeout;
qb_loop_timer_handle memb_timer_state_commit_timeout;
qb_loop_timer_handle timer_heartbeat_timeout;
/*
* Function and data used to log messages
*/
int totemsrp_log_level_security;
int totemsrp_log_level_error;
int totemsrp_log_level_warning;
int totemsrp_log_level_notice;
int totemsrp_log_level_debug;
int totemsrp_subsys_id;
void (*totemsrp_log_printf) (
- unsigned int rec_ident,
+ int level,
+ int sybsys,
const char *function,
const char *file,
int line,
- const char *format, ...)__attribute__((format(printf, 5, 6)));;
+ const char *format, ...)__attribute__((format(printf, 6, 7)));;
enum memb_state memb_state;
//TODO struct srp_addr next_memb;
qb_loop_t *totemsrp_poll_handle;
struct totem_ip_address mcast_address;
void (*totemsrp_deliver_fn) (
unsigned int nodeid,
const void *msg,
unsigned int msg_len,
int endian_conversion_required);
void (*totemsrp_confchg_fn) (
enum totem_configuration_type configuration_type,
const unsigned int *member_list, size_t member_list_entries,
const unsigned int *left_list, size_t left_list_entries,
const unsigned int *joined_list, size_t joined_list_entries,
const struct memb_ring_id *ring_id);
void (*totemsrp_service_ready_fn) (void);
int global_seqno;
int my_token_held;
unsigned long long token_ring_id_seq;
unsigned int last_released;
unsigned int set_aru;
int old_ring_state_saved;
int old_ring_state_aru;
unsigned int old_ring_state_high_seq_received;
unsigned int my_last_seq;
struct timeval tv_old;
void *totemrrp_context;
struct totem_config *totem_config;
unsigned int use_heartbeat;
unsigned int my_trc;
unsigned int my_pbl;
unsigned int my_cbl;
uint64_t pause_timestamp;
struct memb_commit_token *commit_token;
totemsrp_stats_t stats;
uint32_t orf_token_discard;
void * token_recv_event_handle;
void * token_sent_event_handle;
char commit_token_storage[40000];
};
struct message_handlers {
int count;
int (*handler_functions[6]) (
struct totemsrp_instance *instance,
const void *msg,
size_t msg_len,
int endian_conversion_needed);
};
/*
* forward decls
*/
static int message_handler_orf_token (
struct totemsrp_instance *instance,
const void *msg,
size_t msg_len,
int endian_conversion_needed);
static int message_handler_mcast (
struct totemsrp_instance *instance,
const void *msg,
size_t msg_len,
int endian_conversion_needed);
static int message_handler_memb_merge_detect (
struct totemsrp_instance *instance,
const void *msg,
size_t msg_len,
int endian_conversion_needed);
static int message_handler_memb_join (
struct totemsrp_instance *instance,
const void *msg,
size_t msg_len,
int endian_conversion_needed);
static int message_handler_memb_commit_token (
struct totemsrp_instance *instance,
const void *msg,
size_t msg_len,
int endian_conversion_needed);
static int message_handler_token_hold_cancel (
struct totemsrp_instance *instance,
const void *msg,
size_t msg_len,
int endian_conversion_needed);
static void totemsrp_instance_initialize (struct totemsrp_instance *instance);
static unsigned int main_msgs_missing (void);
static void main_token_seqid_get (
const void *msg,
unsigned int *seqid,
unsigned int *token_is);
static void srp_addr_copy (struct srp_addr *dest, const struct srp_addr *src);
static void srp_addr_to_nodeid (
unsigned int *nodeid_out,
struct srp_addr *srp_addr_in,
unsigned int entries);
static int srp_addr_equal (const struct srp_addr *a, const struct srp_addr *b);
static void memb_leave_message_send (struct totemsrp_instance *instance);
static void memb_ring_id_create_or_load (struct totemsrp_instance *, struct memb_ring_id *);
static void token_callbacks_execute (struct totemsrp_instance *instance, enum totem_callback_token_type type);
static void memb_state_gather_enter (struct totemsrp_instance *instance, int gather_from);
static void messages_deliver_to_app (struct totemsrp_instance *instance, int skip, unsigned int end_point);
static int orf_token_mcast (struct totemsrp_instance *instance, struct orf_token *oken,
int fcc_mcasts_allowed);
static void messages_free (struct totemsrp_instance *instance, unsigned int token_aru);
static void memb_ring_id_set_and_store (struct totemsrp_instance *instance,
const struct memb_ring_id *ring_id);
static void target_set_completed (void *context);
static void memb_state_commit_token_update (struct totemsrp_instance *instance);
static void memb_state_commit_token_target_set (struct totemsrp_instance *instance);
static int memb_state_commit_token_send (struct totemsrp_instance *instance);
static int memb_state_commit_token_send_recovery (struct totemsrp_instance *instance, struct memb_commit_token *memb_commit_token);
static void memb_state_commit_token_create (struct totemsrp_instance *instance);
static int token_hold_cancel_send (struct totemsrp_instance *instance);
static void orf_token_endian_convert (const struct orf_token *in, struct orf_token *out);
static void memb_commit_token_endian_convert (const struct memb_commit_token *in, struct memb_commit_token *out);
static void memb_join_endian_convert (const struct memb_join *in, struct memb_join *out);
static void mcast_endian_convert (const struct mcast *in, struct mcast *out);
static void memb_merge_detect_endian_convert (
const struct memb_merge_detect *in,
struct memb_merge_detect *out);
static void srp_addr_copy_endian_convert (struct srp_addr *out, const struct srp_addr *in);
static void timer_function_orf_token_timeout (void *data);
static void timer_function_pause_timeout (void *data);
static void timer_function_heartbeat_timeout (void *data);
static void timer_function_token_retransmit_timeout (void *data);
static void timer_function_token_hold_retransmit_timeout (void *data);
static void timer_function_merge_detect_timeout (void *data);
static void *totemsrp_buffer_alloc (struct totemsrp_instance *instance);
static void totemsrp_buffer_release (struct totemsrp_instance *instance, void *ptr);
void main_deliver_fn (
void *context,
const void *msg,
unsigned int msg_len);
void main_iface_change_fn (
void *context,
const struct totem_ip_address *iface_address,
unsigned int iface_no);
struct message_handlers totemsrp_message_handlers = {
6,
{
message_handler_orf_token,
message_handler_mcast,
message_handler_memb_merge_detect,
message_handler_memb_join,
message_handler_memb_commit_token,
message_handler_token_hold_cancel
}
};
static const char *rundir = NULL;
-#define log_printf(level, format, args...) \
-do { \
- instance->totemsrp_log_printf ( \
- LOGSYS_ENCODE_RECID(level, \
- instance->totemsrp_subsys_id, \
- LOGSYS_RECID_LOG), \
- __FUNCTION__, __FILE__, __LINE__, \
- format, ##args); \
+#define log_printf(level, format, args...) \
+do { \
+ instance->totemsrp_log_printf ( \
+ level, instance->totemsrp_subsys_id, \
+ __FUNCTION__, __FILE__, __LINE__, \
+ format, ##args); \
} while (0);
+#define LOGSYS_PERROR(err_num, level, fmt, args...) \
+do { \
+ char _error_str[LOGSYS_MAX_PERROR_MSG_LEN]; \
+ const char *_error_ptr = qb_strerror_r(err_num, _error_str, sizeof(_error_str)); \
+ instance->totemsrp_log_printf ( \
+ level, instance->totemsrp_subsys_id, \
+ __FUNCTION__, __FILE__, __LINE__, \
+ fmt ": %s (%d)\n", ##args, _error_ptr, err_num); \
+ } while(0)
static void totemsrp_instance_initialize (struct totemsrp_instance *instance)
{
memset (instance, 0, sizeof (struct totemsrp_instance));
list_init (&instance->token_callback_received_listhead);
list_init (&instance->token_callback_sent_listhead);
instance->my_received_flg = 1;
instance->my_token_seq = SEQNO_START_TOKEN - 1;
instance->memb_state = MEMB_STATE_OPERATIONAL;
instance->set_aru = -1;
instance->my_aru = SEQNO_START_MSG;
instance->my_high_seq_received = SEQNO_START_MSG;
instance->my_high_delivered = SEQNO_START_MSG;
instance->orf_token_discard = 0;
instance->commit_token = (struct memb_commit_token *)instance->commit_token_storage;
}
static void main_token_seqid_get (
const void *msg,
unsigned int *seqid,
unsigned int *token_is)
{
const struct orf_token *token = msg;
*seqid = 0;
*token_is = 0;
if (token->header.type == MESSAGE_TYPE_ORF_TOKEN) {
*seqid = token->token_seq;
*token_is = 1;
}
}
static unsigned int main_msgs_missing (void)
{
// TODO
return (0);
}
static int pause_flush (struct totemsrp_instance *instance)
{
uint64_t now_msec;
uint64_t timestamp_msec;
int res = 0;
now_msec = (qb_util_nano_current_get () / QB_TIME_NS_IN_MSEC);
timestamp_msec = instance->pause_timestamp / QB_TIME_NS_IN_MSEC;
if ((now_msec - timestamp_msec) > (instance->totem_config->token_timeout / 2)) {
log_printf (instance->totemsrp_log_level_notice,
"Process pause detected for %d ms, flushing membership messages.\n", (unsigned int)(now_msec - timestamp_msec));
/*
* -1 indicates an error from recvmsg
*/
do {
res = totemrrp_mcast_recv_empty (instance->totemrrp_context);
} while (res == -1);
}
return (res);
}
static int token_event_stats_collector (enum totem_callback_token_type type, const void *void_instance)
{
struct totemsrp_instance *instance = (struct totemsrp_instance *)void_instance;
uint32_t time_now;
unsigned long long nano_secs = qb_util_nano_current_get ();
time_now = (nano_secs / QB_TIME_NS_IN_MSEC);
if (type == TOTEM_CALLBACK_TOKEN_RECEIVED) {
/* incr latest token the index */
if (instance->stats.latest_token == (TOTEM_TOKEN_STATS_MAX - 1))
instance->stats.latest_token = 0;
else
instance->stats.latest_token++;
if (instance->stats.earliest_token == instance->stats.latest_token) {
/* we have filled up the array, start overwriting */
if (instance->stats.earliest_token == (TOTEM_TOKEN_STATS_MAX - 1))
instance->stats.earliest_token = 0;
else
instance->stats.earliest_token++;
instance->stats.token[instance->stats.earliest_token].rx = 0;
instance->stats.token[instance->stats.earliest_token].tx = 0;
instance->stats.token[instance->stats.earliest_token].backlog_calc = 0;
}
instance->stats.token[instance->stats.latest_token].rx = time_now;
instance->stats.token[instance->stats.latest_token].tx = 0; /* in case we drop the token */
} else {
instance->stats.token[instance->stats.latest_token].tx = time_now;
}
return 0;
}
/*
* Exported interfaces
*/
int totemsrp_initialize (
qb_loop_t *poll_handle,
void **srp_context,
struct totem_config *totem_config,
totemmrp_stats_t *stats,
void (*deliver_fn) (
unsigned int nodeid,
const void *msg,
unsigned int msg_len,
int endian_conversion_required),
void (*confchg_fn) (
enum totem_configuration_type configuration_type,
const unsigned int *member_list, size_t member_list_entries,
const unsigned int *left_list, size_t left_list_entries,
const unsigned int *joined_list, size_t joined_list_entries,
const struct memb_ring_id *ring_id))
{
struct totemsrp_instance *instance;
unsigned int res;
instance = malloc (sizeof (struct totemsrp_instance));
if (instance == NULL) {
goto error_exit;
}
rundir = getenv ("COROSYNC_RUN_DIR");
if (rundir == NULL) {
rundir = LOCALSTATEDIR "/lib/corosync";
}
res = mkdir (rundir, 0700);
if (res == -1 && errno != EEXIST) {
goto error_destroy;
}
res = chdir (rundir);
if (res == -1) {
goto error_destroy;
}
totemsrp_instance_initialize (instance);
stats->srp = &instance->stats;
instance->stats.latest_token = 0;
instance->stats.earliest_token = 0;
instance->totem_config = totem_config;
/*
* Configure logging
*/
instance->totemsrp_log_level_security = totem_config->totem_logging_configuration.log_level_security;
instance->totemsrp_log_level_error = totem_config->totem_logging_configuration.log_level_error;
instance->totemsrp_log_level_warning = totem_config->totem_logging_configuration.log_level_warning;
instance->totemsrp_log_level_notice = totem_config->totem_logging_configuration.log_level_notice;
instance->totemsrp_log_level_debug = totem_config->totem_logging_configuration.log_level_debug;
instance->totemsrp_subsys_id = totem_config->totem_logging_configuration.log_subsys_id;
instance->totemsrp_log_printf = totem_config->totem_logging_configuration.log_printf;
/*
* Initialize local variables for totemsrp
*/
totemip_copy (&instance->mcast_address, &totem_config->interfaces[0].mcast_addr);
/*
* Display totem configuration
*/
log_printf (instance->totemsrp_log_level_debug,
"Token Timeout (%d ms) retransmit timeout (%d ms)\n",
totem_config->token_timeout, totem_config->token_retransmit_timeout);
log_printf (instance->totemsrp_log_level_debug,
"token hold (%d ms) retransmits before loss (%d retrans)\n",
totem_config->token_hold_timeout, totem_config->token_retransmits_before_loss_const);
log_printf (instance->totemsrp_log_level_debug,
"join (%d ms) send_join (%d ms) consensus (%d ms) merge (%d ms)\n",
totem_config->join_timeout,
totem_config->send_join_timeout,
totem_config->consensus_timeout,
totem_config->merge_timeout);
log_printf (instance->totemsrp_log_level_debug,
"downcheck (%d ms) fail to recv const (%d msgs)\n",
totem_config->downcheck_timeout, totem_config->fail_to_recv_const);
log_printf (instance->totemsrp_log_level_debug,
"seqno unchanged const (%d rotations) Maximum network MTU %d\n", totem_config->seqno_unchanged_const, totem_config->net_mtu);
log_printf (instance->totemsrp_log_level_debug,
"window size per rotation (%d messages) maximum messages per rotation (%d messages)\n",
totem_config->window_size, totem_config->max_messages);
log_printf (instance->totemsrp_log_level_debug,
"missed count const (%d messages)\n",
totem_config->miss_count_const);
log_printf (instance->totemsrp_log_level_debug,
"send threads (%d threads)\n", totem_config->threads);
log_printf (instance->totemsrp_log_level_debug,
"RRP token expired timeout (%d ms)\n",
totem_config->rrp_token_expired_timeout);
log_printf (instance->totemsrp_log_level_debug,
"RRP token problem counter (%d ms)\n",
totem_config->rrp_problem_count_timeout);
log_printf (instance->totemsrp_log_level_debug,
"RRP threshold (%d problem count)\n",
totem_config->rrp_problem_count_threshold);
log_printf (instance->totemsrp_log_level_debug,
"RRP automatic recovery check timeout (%d ms)\n",
totem_config->rrp_autorecovery_check_timeout);
log_printf (instance->totemsrp_log_level_debug,
"RRP mode set to %s.\n", instance->totem_config->rrp_mode);
log_printf (instance->totemsrp_log_level_debug,
"heartbeat_failures_allowed (%d)\n", totem_config->heartbeat_failures_allowed);
log_printf (instance->totemsrp_log_level_debug,
"max_network_delay (%d ms)\n", totem_config->max_network_delay);
cs_queue_init (&instance->retrans_message_queue, RETRANS_MESSAGE_QUEUE_SIZE_MAX,
sizeof (struct message_item));
sq_init (&instance->regular_sort_queue,
QUEUE_RTR_ITEMS_SIZE_MAX, sizeof (struct sort_queue_item), 0);
sq_init (&instance->recovery_sort_queue,
QUEUE_RTR_ITEMS_SIZE_MAX, sizeof (struct sort_queue_item), 0);
instance->totemsrp_poll_handle = poll_handle;
instance->totemsrp_deliver_fn = deliver_fn;
instance->totemsrp_confchg_fn = confchg_fn;
instance->use_heartbeat = 1;
timer_function_pause_timeout (instance);
if ( totem_config->heartbeat_failures_allowed == 0 ) {
log_printf (instance->totemsrp_log_level_debug,
"HeartBeat is Disabled. To enable set heartbeat_failures_allowed > 0\n");
instance->use_heartbeat = 0;
}
if (instance->use_heartbeat) {
instance->heartbeat_timeout
= (totem_config->heartbeat_failures_allowed) * totem_config->token_retransmit_timeout
+ totem_config->max_network_delay;
if (instance->heartbeat_timeout >= totem_config->token_timeout) {
log_printf (instance->totemsrp_log_level_debug,
"total heartbeat_timeout (%d ms) is not less than token timeout (%d ms)\n",
instance->heartbeat_timeout,
totem_config->token_timeout);
log_printf (instance->totemsrp_log_level_debug,
"heartbeat_timeout = heartbeat_failures_allowed * token_retransmit_timeout + max_network_delay\n");
log_printf (instance->totemsrp_log_level_debug,
"heartbeat timeout should be less than the token timeout. HeartBeat is Diabled !!\n");
instance->use_heartbeat = 0;
}
else {
log_printf (instance->totemsrp_log_level_debug,
"total heartbeat_timeout (%d ms)\n", instance->heartbeat_timeout);
}
}
totemrrp_initialize (
poll_handle,
&instance->totemrrp_context,
totem_config,
instance,
main_deliver_fn,
main_iface_change_fn,
main_token_seqid_get,
main_msgs_missing,
target_set_completed);
/*
* Must have net_mtu adjusted by totemrrp_initialize first
*/
cs_queue_init (&instance->new_message_queue,
MESSAGE_QUEUE_MAX,
sizeof (struct message_item));
totemsrp_callback_token_create (instance,
&instance->token_recv_event_handle,
TOTEM_CALLBACK_TOKEN_RECEIVED,
0,
token_event_stats_collector,
instance);
totemsrp_callback_token_create (instance,
&instance->token_sent_event_handle,
TOTEM_CALLBACK_TOKEN_SENT,
0,
token_event_stats_collector,
instance);
*srp_context = instance;
return (0);
error_destroy:
free (instance);
error_exit:
return (-1);
}
void totemsrp_finalize (
void *srp_context)
{
struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context;
memb_leave_message_send (instance);
free (srp_context);
}
int totemsrp_ifaces_get (
void *srp_context,
unsigned int nodeid,
struct totem_ip_address *interfaces,
char ***status,
unsigned int *iface_count)
{
struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context;
int res = 0;
unsigned int found = 0;
unsigned int i;
for (i = 0; i < instance->my_memb_entries; i++) {
if (instance->my_memb_list[i].addr[0].nodeid == nodeid) {
found = 1;
break;
}
}
if (found) {
memcpy (interfaces, &instance->my_memb_list[i],
sizeof (struct srp_addr));
*iface_count = instance->totem_config->interface_count;
goto finish;
}
for (i = 0; i < instance->my_left_memb_entries; i++) {
if (instance->my_left_memb_list[i].addr[0].nodeid == nodeid) {
found = 1;
break;
}
}
if (found) {
memcpy (interfaces, &instance->my_left_memb_list[i],
sizeof (struct srp_addr));
*iface_count = instance->totem_config->interface_count;
} else {
res = -1;
}
finish:
totemrrp_ifaces_get (instance->totemrrp_context, status, NULL);
return (res);
}
int totemsrp_crypto_set (
void *srp_context,
unsigned int type)
{
struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context;
int res;
res = totemrrp_crypto_set(instance->totemrrp_context, type);
return (res);
}
unsigned int totemsrp_my_nodeid_get (
void *srp_context)
{
struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context;
unsigned int res;
res = instance->totem_config->interfaces[0].boundto.nodeid;
return (res);
}
int totemsrp_my_family_get (
void *srp_context)
{
struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context;
int res;
res = instance->totem_config->interfaces[0].boundto.family;
return (res);
}
int totemsrp_ring_reenable (
void *srp_context)
{
struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context;
totemrrp_ring_reenable (instance->totemrrp_context,
instance->totem_config->interface_count);
return (0);
}
/*
* Set operations for use by the membership algorithm
*/
static int srp_addr_equal (const struct srp_addr *a, const struct srp_addr *b)
{
unsigned int i;
unsigned int res;
for (i = 0; i < 1; i++) {
res = totemip_equal (&a->addr[i], &b->addr[i]);
if (res == 0) {
return (0);
}
}
return (1);
}
static void srp_addr_copy (struct srp_addr *dest, const struct srp_addr *src)
{
unsigned int i;
for (i = 0; i < INTERFACE_MAX; i++) {
totemip_copy (&dest->addr[i], &src->addr[i]);
}
}
static void srp_addr_to_nodeid (
unsigned int *nodeid_out,
struct srp_addr *srp_addr_in,
unsigned int entries)
{
unsigned int i;
for (i = 0; i < entries; i++) {
nodeid_out[i] = srp_addr_in[i].addr[0].nodeid;
}
}
static void srp_addr_copy_endian_convert (struct srp_addr *out, const struct srp_addr *in)
{
int i;
for (i = 0; i < INTERFACE_MAX; i++) {
totemip_copy_endian_convert (&out->addr[i], &in->addr[i]);
}
}
static void memb_consensus_reset (struct totemsrp_instance *instance)
{
instance->consensus_list_entries = 0;
}
static void memb_set_subtract (
struct srp_addr *out_list, int *out_list_entries,
struct srp_addr *one_list, int one_list_entries,
struct srp_addr *two_list, int two_list_entries)
{
int found = 0;
int i;
int j;
*out_list_entries = 0;
for (i = 0; i < one_list_entries; i++) {
for (j = 0; j < two_list_entries; j++) {
if (srp_addr_equal (&one_list[i], &two_list[j])) {
found = 1;
break;
}
}
if (found == 0) {
srp_addr_copy (&out_list[*out_list_entries], &one_list[i]);
*out_list_entries = *out_list_entries + 1;
}
found = 0;
}
}
/*
* Set consensus for a specific processor
*/
static void memb_consensus_set (
struct totemsrp_instance *instance,
const struct srp_addr *addr)
{
int found = 0;
int i;
if (addr->addr[0].nodeid == LEAVE_DUMMY_NODEID)
return;
for (i = 0; i < instance->consensus_list_entries; i++) {
if (srp_addr_equal(addr, &instance->consensus_list[i].addr)) {
found = 1;
break; /* found entry */
}
}
srp_addr_copy (&instance->consensus_list[i].addr, addr);
instance->consensus_list[i].set = 1;
if (found == 0) {
instance->consensus_list_entries++;
}
return;
}
/*
* Is consensus set for a specific processor
*/
static int memb_consensus_isset (
struct totemsrp_instance *instance,
const struct srp_addr *addr)
{
int i;
for (i = 0; i < instance->consensus_list_entries; i++) {
if (srp_addr_equal (addr, &instance->consensus_list[i].addr)) {
return (instance->consensus_list[i].set);
}
}
return (0);
}
/*
* Is consensus agreed upon based upon consensus database
*/
static int memb_consensus_agreed (
struct totemsrp_instance *instance)
{
struct srp_addr token_memb[PROCESSOR_COUNT_MAX];
int token_memb_entries = 0;
int agreed = 1;
int i;
memb_set_subtract (token_memb, &token_memb_entries,
instance->my_proc_list, instance->my_proc_list_entries,
instance->my_failed_list, instance->my_failed_list_entries);
for (i = 0; i < token_memb_entries; i++) {
if (memb_consensus_isset (instance, &token_memb[i]) == 0) {
agreed = 0;
break;
}
}
assert (token_memb_entries >= 1);
return (agreed);
}
static void memb_consensus_notset (
struct totemsrp_instance *instance,
struct srp_addr *no_consensus_list,
int *no_consensus_list_entries,
struct srp_addr *comparison_list,
int comparison_list_entries)
{
int i;
*no_consensus_list_entries = 0;
for (i = 0; i < instance->my_proc_list_entries; i++) {
if (memb_consensus_isset (instance, &instance->my_proc_list[i]) == 0) {
srp_addr_copy (&no_consensus_list[*no_consensus_list_entries], &instance->my_proc_list[i]);
*no_consensus_list_entries = *no_consensus_list_entries + 1;
}
}
}
/*
* Is set1 equal to set2 Entries can be in different orders
*/
static int memb_set_equal (
struct srp_addr *set1, int set1_entries,
struct srp_addr *set2, int set2_entries)
{
int i;
int j;
int found = 0;
if (set1_entries != set2_entries) {
return (0);
}
for (i = 0; i < set2_entries; i++) {
for (j = 0; j < set1_entries; j++) {
if (srp_addr_equal (&set1[j], &set2[i])) {
found = 1;
break;
}
}
if (found == 0) {
return (0);
}
found = 0;
}
return (1);
}
/*
* Is subset fully contained in fullset
*/
static int memb_set_subset (
const struct srp_addr *subset, int subset_entries,
const struct srp_addr *fullset, int fullset_entries)
{
int i;
int j;
int found = 0;
if (subset_entries > fullset_entries) {
return (0);
}
for (i = 0; i < subset_entries; i++) {
for (j = 0; j < fullset_entries; j++) {
if (srp_addr_equal (&subset[i], &fullset[j])) {
found = 1;
}
}
if (found == 0) {
return (0);
}
found = 0;
}
return (1);
}
/*
* merge subset into fullset taking care not to add duplicates
*/
static void memb_set_merge (
const struct srp_addr *subset, int subset_entries,
struct srp_addr *fullset, int *fullset_entries)
{
int found = 0;
int i;
int j;
for (i = 0; i < subset_entries; i++) {
for (j = 0; j < *fullset_entries; j++) {
if (srp_addr_equal (&fullset[j], &subset[i])) {
found = 1;
break;
}
}
if (found == 0) {
srp_addr_copy (&fullset[*fullset_entries], &subset[i]);
*fullset_entries = *fullset_entries + 1;
}
found = 0;
}
return;
}
static void memb_set_and_with_ring_id (
struct srp_addr *set1,
struct memb_ring_id *set1_ring_ids,
int set1_entries,
struct srp_addr *set2,
int set2_entries,
struct memb_ring_id *old_ring_id,
struct srp_addr *and,
int *and_entries)
{
int i;
int j;
int found = 0;
*and_entries = 0;
for (i = 0; i < set2_entries; i++) {
for (j = 0; j < set1_entries; j++) {
if (srp_addr_equal (&set1[j], &set2[i])) {
if (memcmp (&set1_ring_ids[j], old_ring_id, sizeof (struct memb_ring_id)) == 0) {
found = 1;
}
break;
}
}
if (found) {
srp_addr_copy (&and[*and_entries], &set1[j]);
*and_entries = *and_entries + 1;
}
found = 0;
}
return;
}
#ifdef CODE_COVERAGE
static void memb_set_print (
char *string,
struct srp_addr *list,
int list_entries)
{
int i;
int j;
printf ("List '%s' contains %d entries:\n", string, list_entries);
for (i = 0; i < list_entries; i++) {
for (j = 0; j < INTERFACE_MAX; j++) {
printf ("Address %d\n", i);
printf ("\tiface %d %s\n", j, totemip_print (&list[i].addr[j]));
printf ("family %d\n", list[i].addr[j].family);
}
}
}
#endif
static void *totemsrp_buffer_alloc (struct totemsrp_instance *instance)
{
assert (instance != NULL);
return totemrrp_buffer_alloc (instance->totemrrp_context);
}
static void totemsrp_buffer_release (struct totemsrp_instance *instance, void *ptr)
{
assert (instance != NULL);
totemrrp_buffer_release (instance->totemrrp_context, ptr);
}
static void reset_token_retransmit_timeout (struct totemsrp_instance *instance)
{
qb_loop_timer_del (instance->totemsrp_poll_handle,
instance->timer_orf_token_retransmit_timeout);
qb_loop_timer_add (instance->totemsrp_poll_handle,
QB_LOOP_MED,
instance->totem_config->token_retransmit_timeout*QB_TIME_NS_IN_MSEC,
(void *)instance,
timer_function_token_retransmit_timeout,
&instance->timer_orf_token_retransmit_timeout);
}
static void start_merge_detect_timeout (struct totemsrp_instance *instance)
{
if (instance->my_merge_detect_timeout_outstanding == 0) {
qb_loop_timer_add (instance->totemsrp_poll_handle,
QB_LOOP_MED,
instance->totem_config->merge_timeout*QB_TIME_NS_IN_MSEC,
(void *)instance,
timer_function_merge_detect_timeout,
&instance->timer_merge_detect_timeout);
instance->my_merge_detect_timeout_outstanding = 1;
}
}
static void cancel_merge_detect_timeout (struct totemsrp_instance *instance)
{
qb_loop_timer_del (instance->totemsrp_poll_handle, instance->timer_merge_detect_timeout);
instance->my_merge_detect_timeout_outstanding = 0;
}
/*
* ring_state_* is used to save and restore the sort queue
* state when a recovery operation fails (and enters gather)
*/
static void old_ring_state_save (struct totemsrp_instance *instance)
{
if (instance->old_ring_state_saved == 0) {
instance->old_ring_state_saved = 1;
memcpy (&instance->my_old_ring_id, &instance->my_ring_id,
sizeof (struct memb_ring_id));
instance->old_ring_state_aru = instance->my_aru;
instance->old_ring_state_high_seq_received = instance->my_high_seq_received;
log_printf (instance->totemsrp_log_level_debug,
"Saving state aru %x high seq received %x\n",
instance->my_aru, instance->my_high_seq_received);
}
}
static void old_ring_state_restore (struct totemsrp_instance *instance)
{
instance->my_aru = instance->old_ring_state_aru;
instance->my_high_seq_received = instance->old_ring_state_high_seq_received;
log_printf (instance->totemsrp_log_level_debug,
"Restoring instance->my_aru %x my high seq received %x\n",
instance->my_aru, instance->my_high_seq_received);
}
static void old_ring_state_reset (struct totemsrp_instance *instance)
{
log_printf (instance->totemsrp_log_level_debug,
"Resetting old ring state\n");
instance->old_ring_state_saved = 0;
}
static void reset_pause_timeout (struct totemsrp_instance *instance)
{
qb_loop_timer_del (instance->totemsrp_poll_handle, instance->timer_pause_timeout);
qb_loop_timer_add (instance->totemsrp_poll_handle,
QB_LOOP_MED,
instance->totem_config->token_timeout * QB_TIME_NS_IN_MSEC / 5,
(void *)instance,
timer_function_pause_timeout,
&instance->timer_pause_timeout);
}
static void reset_token_timeout (struct totemsrp_instance *instance) {
qb_loop_timer_del (instance->totemsrp_poll_handle, instance->timer_orf_token_timeout);
qb_loop_timer_add (instance->totemsrp_poll_handle,
QB_LOOP_MED,
instance->totem_config->token_timeout*QB_TIME_NS_IN_MSEC,
(void *)instance,
timer_function_orf_token_timeout,
&instance->timer_orf_token_timeout);
}
static void reset_heartbeat_timeout (struct totemsrp_instance *instance) {
qb_loop_timer_del (instance->totemsrp_poll_handle, instance->timer_heartbeat_timeout);
qb_loop_timer_add (instance->totemsrp_poll_handle,
QB_LOOP_MED,
instance->heartbeat_timeout*QB_TIME_NS_IN_MSEC,
(void *)instance,
timer_function_heartbeat_timeout,
&instance->timer_heartbeat_timeout);
}
static void cancel_token_timeout (struct totemsrp_instance *instance) {
qb_loop_timer_del (instance->totemsrp_poll_handle, instance->timer_orf_token_timeout);
}
static void cancel_heartbeat_timeout (struct totemsrp_instance *instance) {
qb_loop_timer_del (instance->totemsrp_poll_handle, instance->timer_heartbeat_timeout);
}
static void cancel_token_retransmit_timeout (struct totemsrp_instance *instance)
{
qb_loop_timer_del (instance->totemsrp_poll_handle, instance->timer_orf_token_retransmit_timeout);
}
static void start_token_hold_retransmit_timeout (struct totemsrp_instance *instance)
{
qb_loop_timer_add (instance->totemsrp_poll_handle,
QB_LOOP_MED,
instance->totem_config->token_hold_timeout*QB_TIME_NS_IN_MSEC,
(void *)instance,
timer_function_token_hold_retransmit_timeout,
&instance->timer_orf_token_hold_retransmit_timeout);
}
static void cancel_token_hold_retransmit_timeout (struct totemsrp_instance *instance)
{
qb_loop_timer_del (instance->totemsrp_poll_handle,
instance->timer_orf_token_hold_retransmit_timeout);
}
static void memb_state_consensus_timeout_expired (
struct totemsrp_instance *instance)
{
struct srp_addr no_consensus_list[PROCESSOR_COUNT_MAX];
int no_consensus_list_entries;
instance->stats.consensus_timeouts++;
if (memb_consensus_agreed (instance)) {
memb_consensus_reset (instance);
memb_consensus_set (instance, &instance->my_id);
reset_token_timeout (instance); // REVIEWED
} else {
memb_consensus_notset (
instance,
no_consensus_list,
&no_consensus_list_entries,
instance->my_proc_list,
instance->my_proc_list_entries);
memb_set_merge (no_consensus_list, no_consensus_list_entries,
instance->my_failed_list, &instance->my_failed_list_entries);
memb_state_gather_enter (instance, 0);
}
}
static void memb_join_message_send (struct totemsrp_instance *instance);
static void memb_merge_detect_transmit (struct totemsrp_instance *instance);
/*
* Timers used for various states of the membership algorithm
*/
static void timer_function_pause_timeout (void *data)
{
struct totemsrp_instance *instance = data;
instance->pause_timestamp = qb_util_nano_current_get ();
reset_pause_timeout (instance);
}
static void memb_recovery_state_token_loss (struct totemsrp_instance *instance)
{
old_ring_state_restore (instance);
memb_state_gather_enter (instance, 5);
instance->stats.recovery_token_lost++;
}
static void timer_function_orf_token_timeout (void *data)
{
struct totemsrp_instance *instance = data;
switch (instance->memb_state) {
case MEMB_STATE_OPERATIONAL:
log_printf (instance->totemsrp_log_level_debug,
"The token was lost in the OPERATIONAL state.\n");
log_printf (instance->totemsrp_log_level_notice,
"A processor failed, forming new configuration.\n");
totemrrp_iface_check (instance->totemrrp_context);
memb_state_gather_enter (instance, 2);
instance->stats.operational_token_lost++;
break;
case MEMB_STATE_GATHER:
log_printf (instance->totemsrp_log_level_debug,
"The consensus timeout expired.\n");
memb_state_consensus_timeout_expired (instance);
memb_state_gather_enter (instance, 3);
instance->stats.gather_token_lost++;
break;
case MEMB_STATE_COMMIT:
log_printf (instance->totemsrp_log_level_debug,
"The token was lost in the COMMIT state.\n");
memb_state_gather_enter (instance, 4);
instance->stats.commit_token_lost++;
break;
case MEMB_STATE_RECOVERY:
log_printf (instance->totemsrp_log_level_debug,
"The token was lost in the RECOVERY state.\n");
memb_recovery_state_token_loss (instance);
instance->orf_token_discard = 1;
break;
}
}
static void timer_function_heartbeat_timeout (void *data)
{
struct totemsrp_instance *instance = data;
log_printf (instance->totemsrp_log_level_debug,
"HeartBeat Timer expired Invoking token loss mechanism in state %d \n", instance->memb_state);
timer_function_orf_token_timeout(data);
}
static void memb_timer_function_state_gather (void *data)
{
struct totemsrp_instance *instance = data;
switch (instance->memb_state) {
case MEMB_STATE_OPERATIONAL:
case MEMB_STATE_RECOVERY:
assert (0); /* this should never happen */
break;
case MEMB_STATE_GATHER:
case MEMB_STATE_COMMIT:
memb_join_message_send (instance);
/*
* Restart the join timeout
`*/
qb_loop_timer_del (instance->totemsrp_poll_handle, instance->memb_timer_state_gather_join_timeout);
qb_loop_timer_add (instance->totemsrp_poll_handle,
QB_LOOP_MED,
instance->totem_config->join_timeout*QB_TIME_NS_IN_MSEC,
(void *)instance,
memb_timer_function_state_gather,
&instance->memb_timer_state_gather_join_timeout);
break;
}
}
static void memb_timer_function_gather_consensus_timeout (void *data)
{
struct totemsrp_instance *instance = data;
memb_state_consensus_timeout_expired (instance);
}
static void deliver_messages_from_recovery_to_regular (struct totemsrp_instance *instance)
{
unsigned int i;
struct sort_queue_item *recovery_message_item;
struct sort_queue_item regular_message_item;
unsigned int range = 0;
int res;
void *ptr;
struct mcast *mcast;
log_printf (instance->totemsrp_log_level_debug,
"recovery to regular %x-%x\n", SEQNO_START_MSG + 1, instance->my_aru);
range = instance->my_aru - SEQNO_START_MSG;
/*
* Move messages from recovery to regular sort queue
*/
// todo should i be initialized to 0 or 1 ?
for (i = 1; i <= range; i++) {
res = sq_item_get (&instance->recovery_sort_queue,
i + SEQNO_START_MSG, &ptr);
if (res != 0) {
continue;
}
recovery_message_item = ptr;
/*
* Convert recovery message into regular message
*/
mcast = recovery_message_item->mcast;
if (mcast->header.encapsulated == MESSAGE_ENCAPSULATED) {
/*
* Message is a recovery message encapsulated
* in a new ring message
*/
regular_message_item.mcast =
(struct mcast *)(((char *)recovery_message_item->mcast) + sizeof (struct mcast));
regular_message_item.msg_len =
recovery_message_item->msg_len - sizeof (struct mcast);
mcast = regular_message_item.mcast;
} else {
/*
* TODO this case shouldn't happen
*/
continue;
}
log_printf (instance->totemsrp_log_level_debug,
"comparing if ring id is for this processors old ring seqno %d\n",
mcast->seq);
/*
* Only add this message to the regular sort
* queue if it was originated with the same ring
* id as the previous ring
*/
if (memcmp (&instance->my_old_ring_id, &mcast->ring_id,
sizeof (struct memb_ring_id)) == 0) {
res = sq_item_inuse (&instance->regular_sort_queue, mcast->seq);
if (res == 0) {
sq_item_add (&instance->regular_sort_queue,
&regular_message_item, mcast->seq);
if (sq_lt_compare (instance->old_ring_state_high_seq_received, mcast->seq)) {
instance->old_ring_state_high_seq_received = mcast->seq;
}
}
} else {
log_printf (instance->totemsrp_log_level_debug,
"-not adding msg with seq no %x\n", mcast->seq);
}
}
}
/*
* Change states in the state machine of the membership algorithm
*/
static void memb_state_operational_enter (struct totemsrp_instance *instance)
{
struct srp_addr joined_list[PROCESSOR_COUNT_MAX];
int joined_list_entries = 0;
unsigned int aru_save;
unsigned int joined_list_totemip[PROCESSOR_COUNT_MAX];
unsigned int trans_memb_list_totemip[PROCESSOR_COUNT_MAX];
unsigned int new_memb_list_totemip[PROCESSOR_COUNT_MAX];
unsigned int left_list[PROCESSOR_COUNT_MAX];
unsigned int i;
unsigned int res;
memb_consensus_reset (instance);
old_ring_state_reset (instance);
deliver_messages_from_recovery_to_regular (instance);
log_printf (instance->totemsrp_log_level_debug,
"Delivering to app %x to %x\n",
instance->my_high_delivered + 1, instance->old_ring_state_high_seq_received);
aru_save = instance->my_aru;
instance->my_aru = instance->old_ring_state_aru;
messages_deliver_to_app (instance, 0, instance->old_ring_state_high_seq_received);
/*
* Calculate joined and left list
*/
memb_set_subtract (instance->my_left_memb_list,
&instance->my_left_memb_entries,
instance->my_memb_list, instance->my_memb_entries,
instance->my_trans_memb_list, instance->my_trans_memb_entries);
memb_set_subtract (joined_list, &joined_list_entries,
instance->my_new_memb_list, instance->my_new_memb_entries,
instance->my_trans_memb_list, instance->my_trans_memb_entries);
/*
* Install new membership
*/
instance->my_memb_entries = instance->my_new_memb_entries;
memcpy (&instance->my_memb_list, instance->my_new_memb_list,
sizeof (struct srp_addr) * instance->my_memb_entries);
instance->last_released = 0;
instance->my_set_retrans_flg = 0;
/*
* Deliver transitional configuration to application
*/
srp_addr_to_nodeid (left_list, instance->my_left_memb_list,
instance->my_left_memb_entries);
srp_addr_to_nodeid (trans_memb_list_totemip,
instance->my_trans_memb_list, instance->my_trans_memb_entries);
instance->totemsrp_confchg_fn (TOTEM_CONFIGURATION_TRANSITIONAL,
trans_memb_list_totemip, instance->my_trans_memb_entries,
left_list, instance->my_left_memb_entries,
0, 0, &instance->my_ring_id);
// TODO we need to filter to ensure we only deliver those
// messages which are part of instance->my_deliver_memb
messages_deliver_to_app (instance, 1, instance->old_ring_state_high_seq_received);
instance->my_aru = aru_save;
/*
* Deliver regular configuration to application
*/
srp_addr_to_nodeid (new_memb_list_totemip,
instance->my_new_memb_list, instance->my_new_memb_entries);
srp_addr_to_nodeid (joined_list_totemip, joined_list,
joined_list_entries);
instance->totemsrp_confchg_fn (TOTEM_CONFIGURATION_REGULAR,
new_memb_list_totemip, instance->my_new_memb_entries,
0, 0,
joined_list_totemip, joined_list_entries, &instance->my_ring_id);
/*
* The recovery sort queue now becomes the regular
* sort queue. It is necessary to copy the state
* into the regular sort queue.
*/
sq_copy (&instance->regular_sort_queue, &instance->recovery_sort_queue);
instance->my_last_aru = SEQNO_START_MSG;
/* When making my_proc_list smaller, ensure that the
* now non-used entries are zero-ed out. There are some suspect
* assert's that assume that there is always 2 entries in the list.
* These fail when my_proc_list is reduced to 1 entry (and the
* valid [0] entry is the same as the 'unused' [1] entry).
*/
memset(instance->my_proc_list, 0,
sizeof (struct srp_addr) * instance->my_proc_list_entries);
instance->my_proc_list_entries = instance->my_new_memb_entries;
memcpy (instance->my_proc_list, instance->my_new_memb_list,
sizeof (struct srp_addr) * instance->my_memb_entries);
instance->my_failed_list_entries = 0;
instance->my_high_delivered = instance->my_high_seq_received;
for (i = 0; i <= instance->my_high_delivered; i++) {
void *ptr;
res = sq_item_get (&instance->regular_sort_queue, i, &ptr);
if (res == 0) {
struct sort_queue_item *regular_message;
regular_message = ptr;
free (regular_message->mcast);
}
}
sq_items_release (&instance->regular_sort_queue, instance->my_high_delivered);
instance->last_released = instance->my_high_delivered;
log_printf (instance->totemsrp_log_level_debug,
"entering OPERATIONAL state.\n");
log_printf (instance->totemsrp_log_level_notice,
"A processor joined or left the membership and a new membership was formed.\n");
instance->memb_state = MEMB_STATE_OPERATIONAL;
instance->stats.operational_entered++;
instance->stats.continuous_gather = 0;
instance->my_received_flg = 1;
reset_pause_timeout (instance);
/*
* Save ring id information from this configuration to determine
* which processors are transitioning from old regular configuration
* in to new regular configuration on the next configuration change
*/
memcpy (&instance->my_old_ring_id, &instance->my_ring_id,
sizeof (struct memb_ring_id));
return;
}
static void memb_state_gather_enter (
struct totemsrp_instance *instance,
int gather_from)
{
instance->orf_token_discard = 1;
memb_set_merge (
&instance->my_id, 1,
instance->my_proc_list, &instance->my_proc_list_entries);
memb_join_message_send (instance);
/*
* Restart the join timeout
*/
qb_loop_timer_del (instance->totemsrp_poll_handle, instance->memb_timer_state_gather_join_timeout);
qb_loop_timer_add (instance->totemsrp_poll_handle,
QB_LOOP_MED,
instance->totem_config->join_timeout*QB_TIME_NS_IN_MSEC,
(void *)instance,
memb_timer_function_state_gather,
&instance->memb_timer_state_gather_join_timeout);
/*
* Restart the consensus timeout
*/
qb_loop_timer_del (instance->totemsrp_poll_handle,
instance->memb_timer_state_gather_consensus_timeout);
qb_loop_timer_add (instance->totemsrp_poll_handle,
QB_LOOP_MED,
instance->totem_config->consensus_timeout*QB_TIME_NS_IN_MSEC,
(void *)instance,
memb_timer_function_gather_consensus_timeout,
&instance->memb_timer_state_gather_consensus_timeout);
/*
* Cancel the token loss and token retransmission timeouts
*/
cancel_token_retransmit_timeout (instance); // REVIEWED
cancel_token_timeout (instance); // REVIEWED
cancel_merge_detect_timeout (instance);
memb_consensus_reset (instance);
memb_consensus_set (instance, &instance->my_id);
log_printf (instance->totemsrp_log_level_debug,
"entering GATHER state from %d.\n", gather_from);
instance->memb_state = MEMB_STATE_GATHER;
instance->stats.gather_entered++;
if (gather_from == 3) {
/*
* State 3 means gather, so we are continuously gathering.
*/
instance->stats.continuous_gather++;
}
if (instance->stats.continuous_gather > MAX_NO_CONT_GATHER) {
log_printf (instance->totemsrp_log_level_warning,
"Totem is unable to form a cluster because of an "
"operating system or network fault. The most common "
"cause of this message is that the local firewall is "
"configured improperly.\n");
}
return;
}
static void timer_function_token_retransmit_timeout (void *data);
static void target_set_completed (
void *context)
{
struct totemsrp_instance *instance = (struct totemsrp_instance *)context;
memb_state_commit_token_send (instance);
}
static void memb_state_commit_enter (
struct totemsrp_instance *instance)
{
old_ring_state_save (instance);
memb_state_commit_token_update (instance);
memb_state_commit_token_target_set (instance);
qb_loop_timer_del (instance->totemsrp_poll_handle, instance->memb_timer_state_gather_join_timeout);
instance->memb_timer_state_gather_join_timeout = 0;
qb_loop_timer_del (instance->totemsrp_poll_handle, instance->memb_timer_state_gather_consensus_timeout);
instance->memb_timer_state_gather_consensus_timeout = 0;
memb_ring_id_set_and_store (instance, &instance->commit_token->ring_id);
instance->token_ring_id_seq = instance->my_ring_id.seq;
log_printf (instance->totemsrp_log_level_debug,
"entering COMMIT state.\n");
instance->memb_state = MEMB_STATE_COMMIT;
reset_token_retransmit_timeout (instance); // REVIEWED
reset_token_timeout (instance); // REVIEWED
instance->stats.commit_entered++;
instance->stats.continuous_gather = 0;
/*
* reset all flow control variables since we are starting a new ring
*/
instance->my_trc = 0;
instance->my_pbl = 0;
instance->my_cbl = 0;
/*
* commit token sent after callback that token target has been set
*/
}
static void memb_state_recovery_enter (
struct totemsrp_instance *instance,
struct memb_commit_token *commit_token)
{
int i;
int local_received_flg = 1;
unsigned int low_ring_aru;
unsigned int range = 0;
unsigned int messages_originated = 0;
const struct srp_addr *addr;
struct memb_commit_token_memb_entry *memb_list;
struct memb_ring_id my_new_memb_ring_id_list[PROCESSOR_COUNT_MAX];
addr = (const struct srp_addr *)commit_token->end_of_commit_token;
memb_list = (struct memb_commit_token_memb_entry *)(addr + commit_token->addr_entries);
log_printf (instance->totemsrp_log_level_debug,
"entering RECOVERY state.\n");
instance->orf_token_discard = 0;
instance->my_high_ring_delivered = 0;
sq_reinit (&instance->recovery_sort_queue, SEQNO_START_MSG);
cs_queue_reinit (&instance->retrans_message_queue);
low_ring_aru = instance->old_ring_state_high_seq_received;
memb_state_commit_token_send_recovery (instance, commit_token);
instance->my_token_seq = SEQNO_START_TOKEN - 1;
/*
* Build regular configuration
*/
totemrrp_processor_count_set (
instance->totemrrp_context,
commit_token->addr_entries);
/*
* Build transitional configuration
*/
for (i = 0; i < instance->my_new_memb_entries; i++) {
memcpy (&my_new_memb_ring_id_list[i],
&memb_list[i].ring_id,
sizeof (struct memb_ring_id));
}
memb_set_and_with_ring_id (
instance->my_new_memb_list,
my_new_memb_ring_id_list,
instance->my_new_memb_entries,
instance->my_memb_list,
instance->my_memb_entries,
&instance->my_old_ring_id,
instance->my_trans_memb_list,
&instance->my_trans_memb_entries);
for (i = 0; i < instance->my_trans_memb_entries; i++) {
log_printf (instance->totemsrp_log_level_debug,
"TRANS [%d] member %s:\n", i, totemip_print (&instance->my_trans_memb_list[i].addr[0]));
}
for (i = 0; i < instance->my_new_memb_entries; i++) {
log_printf (instance->totemsrp_log_level_debug,
"position [%d] member %s:\n", i, totemip_print (&addr[i].addr[0]));
log_printf (instance->totemsrp_log_level_debug,
"previous ring seq %lld rep %s\n",
memb_list[i].ring_id.seq,
totemip_print (&memb_list[i].ring_id.rep));
log_printf (instance->totemsrp_log_level_debug,
"aru %x high delivered %x received flag %d\n",
memb_list[i].aru,
memb_list[i].high_delivered,
memb_list[i].received_flg);
// assert (totemip_print (&memb_list[i].ring_id.rep) != 0);
}
/*
* Determine if any received flag is false
*/
for (i = 0; i < commit_token->addr_entries; i++) {
if (memb_set_subset (&instance->my_new_memb_list[i], 1,
instance->my_trans_memb_list, instance->my_trans_memb_entries) &&
memb_list[i].received_flg == 0) {
instance->my_deliver_memb_entries = instance->my_trans_memb_entries;
memcpy (instance->my_deliver_memb_list, instance->my_trans_memb_list,
sizeof (struct srp_addr) * instance->my_trans_memb_entries);
local_received_flg = 0;
break;
}
}
if (local_received_flg == 1) {
goto no_originate;
} /* Else originate messages if we should */
/*
* Calculate my_low_ring_aru, instance->my_high_ring_delivered for the transitional membership
*/
for (i = 0; i < commit_token->addr_entries; i++) {
if (memb_set_subset (&instance->my_new_memb_list[i], 1,
instance->my_deliver_memb_list,
instance->my_deliver_memb_entries) &&
memcmp (&instance->my_old_ring_id,
&memb_list[i].ring_id,
sizeof (struct memb_ring_id)) == 0) {
if (sq_lt_compare (memb_list[i].aru, low_ring_aru)) {
low_ring_aru = memb_list[i].aru;
}
if (sq_lt_compare (instance->my_high_ring_delivered, memb_list[i].high_delivered)) {
instance->my_high_ring_delivered = memb_list[i].high_delivered;
}
}
}
/*
* Copy all old ring messages to instance->retrans_message_queue
*/
range = instance->old_ring_state_high_seq_received - low_ring_aru;
if (range == 0) {
/*
* No messages to copy
*/
goto no_originate;
}
assert (range < QUEUE_RTR_ITEMS_SIZE_MAX);
log_printf (instance->totemsrp_log_level_debug,
"copying all old ring messages from %x-%x.\n",
low_ring_aru + 1, instance->old_ring_state_high_seq_received);
for (i = 1; i <= range; i++) {
struct sort_queue_item *sort_queue_item;
struct message_item message_item;
void *ptr;
int res;
res = sq_item_get (&instance->regular_sort_queue,
low_ring_aru + i, &ptr);
if (res != 0) {
continue;
}
sort_queue_item = ptr;
messages_originated++;
memset (&message_item, 0, sizeof (struct message_item));
// TODO LEAK
message_item.mcast = totemsrp_buffer_alloc (instance);
assert (message_item.mcast);
message_item.mcast->header.type = MESSAGE_TYPE_MCAST;
srp_addr_copy (&message_item.mcast->system_from, &instance->my_id);
message_item.mcast->header.encapsulated = MESSAGE_ENCAPSULATED;
message_item.mcast->header.nodeid = instance->my_id.addr[0].nodeid;
assert (message_item.mcast->header.nodeid);
message_item.mcast->header.endian_detector = ENDIAN_LOCAL;
memcpy (&message_item.mcast->ring_id, &instance->my_ring_id,
sizeof (struct memb_ring_id));
message_item.msg_len = sort_queue_item->msg_len + sizeof (struct mcast);
memcpy (((char *)message_item.mcast) + sizeof (struct mcast),
sort_queue_item->mcast,
sort_queue_item->msg_len);
cs_queue_item_add (&instance->retrans_message_queue, &message_item);
}
log_printf (instance->totemsrp_log_level_debug,
"Originated %d messages in RECOVERY.\n", messages_originated);
goto originated;
no_originate:
log_printf (instance->totemsrp_log_level_debug,
"Did not need to originate any messages in recovery.\n");
originated:
instance->my_aru = SEQNO_START_MSG;
instance->my_aru_count = 0;
instance->my_seq_unchanged = 0;
instance->my_high_seq_received = SEQNO_START_MSG;
instance->my_install_seq = SEQNO_START_MSG;
instance->last_released = SEQNO_START_MSG;
reset_token_timeout (instance); // REVIEWED
reset_token_retransmit_timeout (instance); // REVIEWED
instance->memb_state = MEMB_STATE_RECOVERY;
instance->stats.recovery_entered++;
instance->stats.continuous_gather = 0;
return;
}
void totemsrp_event_signal (void *srp_context, enum totem_event_type type, int value)
{
struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context;
token_hold_cancel_send (instance);
return;
}
int totemsrp_mcast (
void *srp_context,
struct iovec *iovec,
unsigned int iov_len,
int guarantee)
{
struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context;
int i;
struct message_item message_item;
char *addr;
unsigned int addr_idx;
if (cs_queue_is_full (&instance->new_message_queue)) {
log_printf (instance->totemsrp_log_level_debug, "queue full\n");
return (-1);
}
memset (&message_item, 0, sizeof (struct message_item));
/*
* Allocate pending item
*/
message_item.mcast = totemsrp_buffer_alloc (instance);
if (message_item.mcast == 0) {
goto error_mcast;
}
/*
* Set mcast header
*/
memset(message_item.mcast, 0, sizeof (struct mcast));
message_item.mcast->header.type = MESSAGE_TYPE_MCAST;
message_item.mcast->header.endian_detector = ENDIAN_LOCAL;
message_item.mcast->header.encapsulated = MESSAGE_NOT_ENCAPSULATED;
message_item.mcast->header.nodeid = instance->my_id.addr[0].nodeid;
assert (message_item.mcast->header.nodeid);
message_item.mcast->guarantee = guarantee;
srp_addr_copy (&message_item.mcast->system_from, &instance->my_id);
addr = (char *)message_item.mcast;
addr_idx = sizeof (struct mcast);
for (i = 0; i < iov_len; i++) {
memcpy (&addr[addr_idx], iovec[i].iov_base, iovec[i].iov_len);
addr_idx += iovec[i].iov_len;
}
message_item.msg_len = addr_idx;
log_printf (instance->totemsrp_log_level_debug, "mcasted message added to pending queue\n");
instance->stats.mcast_tx++;
cs_queue_item_add (&instance->new_message_queue, &message_item);
return (0);
error_mcast:
return (-1);
}
/*
* Determine if there is room to queue a new message
*/
int totemsrp_avail (void *srp_context)
{
struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context;
int avail;
cs_queue_avail (&instance->new_message_queue, &avail);
return (avail);
}
/*
* ORF Token Management
*/
/*
* Recast message to mcast group if it is available
*/
static int orf_token_remcast (
struct totemsrp_instance *instance,
int seq)
{
struct sort_queue_item *sort_queue_item;
int res;
void *ptr;
struct sq *sort_queue;
if (instance->memb_state == MEMB_STATE_RECOVERY) {
sort_queue = &instance->recovery_sort_queue;
} else {
sort_queue = &instance->regular_sort_queue;
}
res = sq_in_range (sort_queue, seq);
if (res == 0) {
log_printf (instance->totemsrp_log_level_debug, "sq not in range\n");
return (-1);
}
/*
* Get RTR item at seq, if not available, return
*/
res = sq_item_get (sort_queue, seq, &ptr);
if (res != 0) {
return -1;
}
sort_queue_item = ptr;
totemrrp_mcast_noflush_send (
instance->totemrrp_context,
sort_queue_item->mcast,
sort_queue_item->msg_len);
return (0);
}
/*
* Free all freeable messages from ring
*/
static void messages_free (
struct totemsrp_instance *instance,
unsigned int token_aru)
{
struct sort_queue_item *regular_message;
unsigned int i;
int res;
int log_release = 0;
unsigned int release_to;
unsigned int range = 0;
release_to = token_aru;
if (sq_lt_compare (instance->my_last_aru, release_to)) {
release_to = instance->my_last_aru;
}
if (sq_lt_compare (instance->my_high_delivered, release_to)) {
release_to = instance->my_high_delivered;
}
/*
* Ensure we dont try release before an already released point
*/
if (sq_lt_compare (release_to, instance->last_released)) {
return;
}
range = release_to - instance->last_released;
assert (range < QUEUE_RTR_ITEMS_SIZE_MAX);
/*
* Release retransmit list items if group aru indicates they are transmitted
*/
for (i = 1; i <= range; i++) {
void *ptr;
res = sq_item_get (&instance->regular_sort_queue,
instance->last_released + i, &ptr);
if (res == 0) {
regular_message = ptr;
totemsrp_buffer_release (instance, regular_message->mcast);
}
sq_items_release (&instance->regular_sort_queue,
instance->last_released + i);
log_release = 1;
}
instance->last_released += range;
if (log_release) {
log_printf (instance->totemsrp_log_level_debug,
"releasing messages up to and including %x\n", release_to);
}
}
static void update_aru (
struct totemsrp_instance *instance)
{
unsigned int i;
int res;
struct sq *sort_queue;
unsigned int range;
unsigned int my_aru_saved = 0;
if (instance->memb_state == MEMB_STATE_RECOVERY) {
sort_queue = &instance->recovery_sort_queue;
} else {
sort_queue = &instance->regular_sort_queue;
}
range = instance->my_high_seq_received - instance->my_aru;
if (range > 1024) {
return;
}
my_aru_saved = instance->my_aru;
for (i = 1; i <= range; i++) {
void *ptr;
res = sq_item_get (sort_queue, my_aru_saved + i, &ptr);
/*
* If hole, stop updating aru
*/
if (res != 0) {
break;
}
}
instance->my_aru += i - 1;
}
/*
* Multicasts pending messages onto the ring (requires orf_token possession)
*/
static int orf_token_mcast (
struct totemsrp_instance *instance,
struct orf_token *token,
int fcc_mcasts_allowed)
{
struct message_item *message_item = 0;
struct cs_queue *mcast_queue;
struct sq *sort_queue;
struct sort_queue_item sort_queue_item;
struct mcast *mcast;
unsigned int fcc_mcast_current;
if (instance->memb_state == MEMB_STATE_RECOVERY) {
mcast_queue = &instance->retrans_message_queue;
sort_queue = &instance->recovery_sort_queue;
reset_token_retransmit_timeout (instance); // REVIEWED
} else {
mcast_queue = &instance->new_message_queue;
sort_queue = &instance->regular_sort_queue;
}
for (fcc_mcast_current = 0; fcc_mcast_current < fcc_mcasts_allowed; fcc_mcast_current++) {
if (cs_queue_is_empty (mcast_queue)) {
break;
}
message_item = (struct message_item *)cs_queue_item_get (mcast_queue);
message_item->mcast->seq = ++token->seq;
message_item->mcast->this_seqno = instance->global_seqno++;
/*
* Build IO vector
*/
memset (&sort_queue_item, 0, sizeof (struct sort_queue_item));
sort_queue_item.mcast = message_item->mcast;
sort_queue_item.msg_len = message_item->msg_len;
mcast = sort_queue_item.mcast;
memcpy (&mcast->ring_id, &instance->my_ring_id, sizeof (struct memb_ring_id));
/*
* Add message to retransmit queue
*/
sq_item_add (sort_queue, &sort_queue_item, message_item->mcast->seq);
totemrrp_mcast_noflush_send (
instance->totemrrp_context,
message_item->mcast,
message_item->msg_len);
/*
* Delete item from pending queue
*/
cs_queue_item_remove (mcast_queue);
/*
* If messages mcasted, deliver any new messages to totempg
*/
instance->my_high_seq_received = token->seq;
}
update_aru (instance);
/*
* Return 1 if more messages are available for single node clusters
*/
return (fcc_mcast_current);
}
/*
* Remulticasts messages in orf_token's retransmit list (requires orf_token)
* Modify's orf_token's rtr to include retransmits required by this process
*/
static int orf_token_rtr (
struct totemsrp_instance *instance,
struct orf_token *orf_token,
unsigned int *fcc_allowed)
{
unsigned int res;
unsigned int i, j;
unsigned int found;
struct sq *sort_queue;
struct rtr_item *rtr_list;
unsigned int range = 0;
char retransmit_msg[1024];
char value[64];
if (instance->memb_state == MEMB_STATE_RECOVERY) {
sort_queue = &instance->recovery_sort_queue;
} else {
sort_queue = &instance->regular_sort_queue;
}
rtr_list = &orf_token->rtr_list[0];
strcpy (retransmit_msg, "Retransmit List: ");
if (orf_token->rtr_list_entries) {
log_printf (instance->totemsrp_log_level_debug,
"Retransmit List %d\n", orf_token->rtr_list_entries);
for (i = 0; i < orf_token->rtr_list_entries; i++) {
sprintf (value, "%x ", rtr_list[i].seq);
strcat (retransmit_msg, value);
}
strcat (retransmit_msg, "\n");
log_printf (instance->totemsrp_log_level_notice,
"%s", retransmit_msg);
}
/*
* Retransmit messages on orf_token's RTR list from RTR queue
*/
for (instance->fcc_remcast_current = 0, i = 0;
instance->fcc_remcast_current < *fcc_allowed && i < orf_token->rtr_list_entries;) {
/*
* If this retransmit request isn't from this configuration,
* try next rtr entry
*/
if (memcmp (&rtr_list[i].ring_id, &instance->my_ring_id,
sizeof (struct memb_ring_id)) != 0) {
i += 1;
continue;
}
res = orf_token_remcast (instance, rtr_list[i].seq);
if (res == 0) {
/*
* Multicasted message, so no need to copy to new retransmit list
*/
orf_token->rtr_list_entries -= 1;
assert (orf_token->rtr_list_entries >= 0);
memmove (&rtr_list[i], &rtr_list[i + 1],
sizeof (struct rtr_item) * (orf_token->rtr_list_entries - i));
instance->stats.mcast_retx++;
instance->fcc_remcast_current++;
} else {
i += 1;
}
}
*fcc_allowed = *fcc_allowed - instance->fcc_remcast_current;
/*
* Add messages to retransmit to RTR list
* but only retry if there is room in the retransmit list
*/
range = orf_token->seq - instance->my_aru;
assert (range < QUEUE_RTR_ITEMS_SIZE_MAX);
for (i = 1; (orf_token->rtr_list_entries < RETRANSMIT_ENTRIES_MAX) &&
(i <= range); i++) {
/*
* Ensure message is within the sort queue range
*/
res = sq_in_range (sort_queue, instance->my_aru + i);
if (res == 0) {
break;
}
/*
* Find if a message is missing from this processor
*/
res = sq_item_inuse (sort_queue, instance->my_aru + i);
if (res == 0) {
/*
* Determine how many times we have missed receiving
* this sequence number. sq_item_miss_count increments
* a counter for the sequence number. The miss count
* will be returned and compared. This allows time for
* delayed multicast messages to be received before
* declaring the message is missing and requesting a
* retransmit.
*/
res = sq_item_miss_count (sort_queue, instance->my_aru + i);
if (res < instance->totem_config->miss_count_const) {
continue;
}
/*
* Determine if missing message is already in retransmit list
*/
found = 0;
for (j = 0; j < orf_token->rtr_list_entries; j++) {
if (instance->my_aru + i == rtr_list[j].seq) {
found = 1;
}
}
if (found == 0) {
/*
* Missing message not found in current retransmit list so add it
*/
memcpy (&rtr_list[orf_token->rtr_list_entries].ring_id,
&instance->my_ring_id, sizeof (struct memb_ring_id));
rtr_list[orf_token->rtr_list_entries].seq = instance->my_aru + i;
orf_token->rtr_list_entries++;
}
}
}
return (instance->fcc_remcast_current);
}
static void token_retransmit (struct totemsrp_instance *instance)
{
totemrrp_token_send (instance->totemrrp_context,
instance->orf_token_retransmit,
instance->orf_token_retransmit_size);
}
/*
* Retransmit the regular token if no mcast or token has
* been received in retransmit token period retransmit
* the token to the next processor
*/
static void timer_function_token_retransmit_timeout (void *data)
{
struct totemsrp_instance *instance = data;
switch (instance->memb_state) {
case MEMB_STATE_GATHER:
break;
case MEMB_STATE_COMMIT:
case MEMB_STATE_OPERATIONAL:
case MEMB_STATE_RECOVERY:
token_retransmit (instance);
reset_token_retransmit_timeout (instance); // REVIEWED
break;
}
}
static void timer_function_token_hold_retransmit_timeout (void *data)
{
struct totemsrp_instance *instance = data;
switch (instance->memb_state) {
case MEMB_STATE_GATHER:
break;
case MEMB_STATE_COMMIT:
break;
case MEMB_STATE_OPERATIONAL:
case MEMB_STATE_RECOVERY:
token_retransmit (instance);
break;
}
}
static void timer_function_merge_detect_timeout(void *data)
{
struct totemsrp_instance *instance = data;
instance->my_merge_detect_timeout_outstanding = 0;
switch (instance->memb_state) {
case MEMB_STATE_OPERATIONAL:
if (totemip_equal(&instance->my_ring_id.rep, &instance->my_id.addr[0])) {
memb_merge_detect_transmit (instance);
}
break;
case MEMB_STATE_GATHER:
case MEMB_STATE_COMMIT:
case MEMB_STATE_RECOVERY:
break;
}
}
/*
* Send orf_token to next member (requires orf_token)
*/
static int token_send (
struct totemsrp_instance *instance,
struct orf_token *orf_token,
int forward_token)
{
int res = 0;
unsigned int orf_token_size;
orf_token_size = sizeof (struct orf_token) +
(orf_token->rtr_list_entries * sizeof (struct rtr_item));
memcpy (instance->orf_token_retransmit, orf_token, orf_token_size);
instance->orf_token_retransmit_size = orf_token_size;
orf_token->header.nodeid = instance->my_id.addr[0].nodeid;
assert (orf_token->header.nodeid);
if (forward_token == 0) {
return (0);
}
totemrrp_token_send (instance->totemrrp_context,
orf_token,
orf_token_size);
return (res);
}
static int token_hold_cancel_send (struct totemsrp_instance *instance)
{
struct token_hold_cancel token_hold_cancel;
/*
* Only cancel if the token is currently held
*/
if (instance->my_token_held == 0) {
return (0);
}
instance->my_token_held = 0;
/*
* Build message
*/
token_hold_cancel.header.type = MESSAGE_TYPE_TOKEN_HOLD_CANCEL;
token_hold_cancel.header.endian_detector = ENDIAN_LOCAL;
token_hold_cancel.header.encapsulated = 0;
token_hold_cancel.header.nodeid = instance->my_id.addr[0].nodeid;
memcpy (&token_hold_cancel.ring_id, &instance->my_ring_id,
sizeof (struct memb_ring_id));
assert (token_hold_cancel.header.nodeid);
instance->stats.token_hold_cancel_tx++;
totemrrp_mcast_flush_send (instance->totemrrp_context, &token_hold_cancel,
sizeof (struct token_hold_cancel));
return (0);
}
static int orf_token_send_initial (struct totemsrp_instance *instance)
{
struct orf_token orf_token;
int res;
orf_token.header.type = MESSAGE_TYPE_ORF_TOKEN;
orf_token.header.endian_detector = ENDIAN_LOCAL;
orf_token.header.encapsulated = 0;
orf_token.header.nodeid = instance->my_id.addr[0].nodeid;
assert (orf_token.header.nodeid);
orf_token.seq = SEQNO_START_MSG;
orf_token.token_seq = SEQNO_START_TOKEN;
orf_token.retrans_flg = 1;
instance->my_set_retrans_flg = 1;
instance->stats.orf_token_tx++;
if (cs_queue_is_empty (&instance->retrans_message_queue) == 1) {
orf_token.retrans_flg = 0;
instance->my_set_retrans_flg = 0;
} else {
orf_token.retrans_flg = 1;
instance->my_set_retrans_flg = 1;
}
orf_token.aru = 0;
orf_token.aru = SEQNO_START_MSG - 1;
orf_token.aru_addr = instance->my_id.addr[0].nodeid;
memcpy (&orf_token.ring_id, &instance->my_ring_id, sizeof (struct memb_ring_id));
orf_token.fcc = 0;
orf_token.backlog = 0;
orf_token.rtr_list_entries = 0;
res = token_send (instance, &orf_token, 1);
return (res);
}
static void memb_state_commit_token_update (
struct totemsrp_instance *instance)
{
struct srp_addr *addr;
struct memb_commit_token_memb_entry *memb_list;
unsigned int high_aru;
unsigned int i;
addr = (struct srp_addr *)instance->commit_token->end_of_commit_token;
memb_list = (struct memb_commit_token_memb_entry *)(addr + instance->commit_token->addr_entries);
memcpy (instance->my_new_memb_list, addr,
sizeof (struct srp_addr) * instance->commit_token->addr_entries);
instance->my_new_memb_entries = instance->commit_token->addr_entries;
memcpy (&memb_list[instance->commit_token->memb_index].ring_id,
&instance->my_old_ring_id, sizeof (struct memb_ring_id));
memb_list[instance->commit_token->memb_index].aru = instance->old_ring_state_aru;
/*
* TODO high delivered is really instance->my_aru, but with safe this
* could change?
*/
instance->my_received_flg =
(instance->my_aru == instance->my_high_seq_received);
memb_list[instance->commit_token->memb_index].received_flg = instance->my_received_flg;
memb_list[instance->commit_token->memb_index].high_delivered = instance->my_high_delivered;
/*
* find high aru up to current memb_index for all matching ring ids
* if any ring id matching memb_index has aru less then high aru set
* received flag for that entry to false
*/
high_aru = memb_list[instance->commit_token->memb_index].aru;
for (i = 0; i <= instance->commit_token->memb_index; i++) {
if (memcmp (&memb_list[instance->commit_token->memb_index].ring_id,
&memb_list[i].ring_id,
sizeof (struct memb_ring_id)) == 0) {
if (sq_lt_compare (high_aru, memb_list[i].aru)) {
high_aru = memb_list[i].aru;
}
}
}
for (i = 0; i <= instance->commit_token->memb_index; i++) {
if (memcmp (&memb_list[instance->commit_token->memb_index].ring_id,
&memb_list[i].ring_id,
sizeof (struct memb_ring_id)) == 0) {
if (sq_lt_compare (memb_list[i].aru, high_aru)) {
memb_list[i].received_flg = 0;
if (i == instance->commit_token->memb_index) {
instance->my_received_flg = 0;
}
}
}
}
instance->commit_token->header.nodeid = instance->my_id.addr[0].nodeid;
instance->commit_token->memb_index += 1;
assert (instance->commit_token->memb_index <= instance->commit_token->addr_entries);
assert (instance->commit_token->header.nodeid);
}
static void memb_state_commit_token_target_set (
struct totemsrp_instance *instance)
{
struct srp_addr *addr;
unsigned int i;
addr = (struct srp_addr *)instance->commit_token->end_of_commit_token;
for (i = 0; i < instance->totem_config->interface_count; i++) {
totemrrp_token_target_set (
instance->totemrrp_context,
&addr[instance->commit_token->memb_index %
instance->commit_token->addr_entries].addr[i],
i);
}
}
static int memb_state_commit_token_send_recovery (
struct totemsrp_instance *instance,
struct memb_commit_token *commit_token)
{
unsigned int commit_token_size;
commit_token->token_seq++;
commit_token_size = sizeof (struct memb_commit_token) +
((sizeof (struct srp_addr) +
sizeof (struct memb_commit_token_memb_entry)) * commit_token->addr_entries);
/*
* Make a copy for retransmission if necessary
*/
memcpy (instance->orf_token_retransmit, commit_token, commit_token_size);
instance->orf_token_retransmit_size = commit_token_size;
instance->stats.memb_commit_token_tx++;
totemrrp_token_send (instance->totemrrp_context,
commit_token,
commit_token_size);
/*
* Request retransmission of the commit token in case it is lost
*/
reset_token_retransmit_timeout (instance);
return (0);
}
static int memb_state_commit_token_send (
struct totemsrp_instance *instance)
{
unsigned int commit_token_size;
instance->commit_token->token_seq++;
commit_token_size = sizeof (struct memb_commit_token) +
((sizeof (struct srp_addr) +
sizeof (struct memb_commit_token_memb_entry)) * instance->commit_token->addr_entries);
/*
* Make a copy for retransmission if necessary
*/
memcpy (instance->orf_token_retransmit, instance->commit_token, commit_token_size);
instance->orf_token_retransmit_size = commit_token_size;
instance->stats.memb_commit_token_tx++;
totemrrp_token_send (instance->totemrrp_context,
instance->commit_token,
commit_token_size);
/*
* Request retransmission of the commit token in case it is lost
*/
reset_token_retransmit_timeout (instance);
return (0);
}
static int memb_lowest_in_config (struct totemsrp_instance *instance)
{
struct srp_addr token_memb[PROCESSOR_COUNT_MAX];
int token_memb_entries = 0;
int i;
struct totem_ip_address *lowest_addr;
memb_set_subtract (token_memb, &token_memb_entries,
instance->my_proc_list, instance->my_proc_list_entries,
instance->my_failed_list, instance->my_failed_list_entries);
/*
* find representative by searching for smallest identifier
*/
lowest_addr = &token_memb[0].addr[0];
for (i = 1; i < token_memb_entries; i++) {
if (totemip_compare(lowest_addr, &token_memb[i].addr[0]) > 0) {
totemip_copy (lowest_addr, &token_memb[i].addr[0]);
}
}
return (totemip_compare (lowest_addr, &instance->my_id.addr[0]) == 0);
}
static int srp_addr_compare (const void *a, const void *b)
{
const struct srp_addr *srp_a = (const struct srp_addr *)a;
const struct srp_addr *srp_b = (const struct srp_addr *)b;
return (totemip_compare (&srp_a->addr[0], &srp_b->addr[0]));
}
static void memb_state_commit_token_create (
struct totemsrp_instance *instance)
{
struct srp_addr token_memb[PROCESSOR_COUNT_MAX];
struct srp_addr *addr;
struct memb_commit_token_memb_entry *memb_list;
int token_memb_entries = 0;
log_printf (instance->totemsrp_log_level_debug,
"Creating commit token because I am the rep.\n");
memb_set_subtract (token_memb, &token_memb_entries,
instance->my_proc_list, instance->my_proc_list_entries,
instance->my_failed_list, instance->my_failed_list_entries);
memset (instance->commit_token, 0, sizeof (struct memb_commit_token));
instance->commit_token->header.type = MESSAGE_TYPE_MEMB_COMMIT_TOKEN;
instance->commit_token->header.endian_detector = ENDIAN_LOCAL;
instance->commit_token->header.encapsulated = 0;
instance->commit_token->header.nodeid = instance->my_id.addr[0].nodeid;
assert (instance->commit_token->header.nodeid);
totemip_copy(&instance->commit_token->ring_id.rep, &instance->my_id.addr[0]);
instance->commit_token->ring_id.seq = instance->token_ring_id_seq + 4;
/*
* This qsort is necessary to ensure the commit token traverses
* the ring in the proper order
*/
qsort (token_memb, token_memb_entries, sizeof (struct srp_addr),
srp_addr_compare);
instance->commit_token->memb_index = 0;
instance->commit_token->addr_entries = token_memb_entries;
addr = (struct srp_addr *)instance->commit_token->end_of_commit_token;
memb_list = (struct memb_commit_token_memb_entry *)(addr + instance->commit_token->addr_entries);
memcpy (addr, token_memb,
token_memb_entries * sizeof (struct srp_addr));
memset (memb_list, 0,
sizeof (struct memb_commit_token_memb_entry) * token_memb_entries);
}
static void memb_join_message_send (struct totemsrp_instance *instance)
{
char memb_join_data[40000];
struct memb_join *memb_join = (struct memb_join *)memb_join_data;
char *addr;
unsigned int addr_idx;
memb_join->header.type = MESSAGE_TYPE_MEMB_JOIN;
memb_join->header.endian_detector = ENDIAN_LOCAL;
memb_join->header.encapsulated = 0;
memb_join->header.nodeid = instance->my_id.addr[0].nodeid;
assert (memb_join->header.nodeid);
memb_join->ring_seq = instance->my_ring_id.seq;
memb_join->proc_list_entries = instance->my_proc_list_entries;
memb_join->failed_list_entries = instance->my_failed_list_entries;
srp_addr_copy (&memb_join->system_from, &instance->my_id);
/*
* This mess adds the joined and failed processor lists into the join
* message
*/
addr = (char *)memb_join;
addr_idx = sizeof (struct memb_join);
memcpy (&addr[addr_idx],
instance->my_proc_list,
instance->my_proc_list_entries *
sizeof (struct srp_addr));
addr_idx +=
instance->my_proc_list_entries *
sizeof (struct srp_addr);
memcpy (&addr[addr_idx],
instance->my_failed_list,
instance->my_failed_list_entries *
sizeof (struct srp_addr));
addr_idx +=
instance->my_failed_list_entries *
sizeof (struct srp_addr);
if (instance->totem_config->send_join_timeout) {
usleep (random() % (instance->totem_config->send_join_timeout * 1000));
}
instance->stats.memb_join_tx++;
totemrrp_mcast_flush_send (
instance->totemrrp_context,
memb_join,
addr_idx);
}
static void memb_leave_message_send (struct totemsrp_instance *instance)
{
char memb_join_data[40000];
struct memb_join *memb_join = (struct memb_join *)memb_join_data;
char *addr;
unsigned int addr_idx;
int active_memb_entries;
struct srp_addr active_memb[PROCESSOR_COUNT_MAX];
log_printf (instance->totemsrp_log_level_debug,
"sending join/leave message\n");
/*
* add us to the failed list, and remove us from
* the members list
*/
memb_set_merge(
&instance->my_id, 1,
instance->my_failed_list, &instance->my_failed_list_entries);
memb_set_subtract (active_memb, &active_memb_entries,
instance->my_proc_list, instance->my_proc_list_entries,
&instance->my_id, 1);
memb_join->header.type = MESSAGE_TYPE_MEMB_JOIN;
memb_join->header.endian_detector = ENDIAN_LOCAL;
memb_join->header.encapsulated = 0;
memb_join->header.nodeid = LEAVE_DUMMY_NODEID;
memb_join->ring_seq = instance->my_ring_id.seq;
memb_join->proc_list_entries = active_memb_entries;
memb_join->failed_list_entries = instance->my_failed_list_entries;
srp_addr_copy (&memb_join->system_from, &instance->my_id);
memb_join->system_from.addr[0].nodeid = LEAVE_DUMMY_NODEID;
// TODO: CC Maybe use the actual join send routine.
/*
* This mess adds the joined and failed processor lists into the join
* message
*/
addr = (char *)memb_join;
addr_idx = sizeof (struct memb_join);
memcpy (&addr[addr_idx],
active_memb,
active_memb_entries *
sizeof (struct srp_addr));
addr_idx +=
active_memb_entries *
sizeof (struct srp_addr);
memcpy (&addr[addr_idx],
instance->my_failed_list,
instance->my_failed_list_entries *
sizeof (struct srp_addr));
addr_idx +=
instance->my_failed_list_entries *
sizeof (struct srp_addr);
if (instance->totem_config->send_join_timeout) {
usleep (random() % (instance->totem_config->send_join_timeout * 1000));
}
instance->stats.memb_join_tx++;
totemrrp_mcast_flush_send (
instance->totemrrp_context,
memb_join,
addr_idx);
}
static void memb_merge_detect_transmit (struct totemsrp_instance *instance)
{
struct memb_merge_detect memb_merge_detect;
memb_merge_detect.header.type = MESSAGE_TYPE_MEMB_MERGE_DETECT;
memb_merge_detect.header.endian_detector = ENDIAN_LOCAL;
memb_merge_detect.header.encapsulated = 0;
memb_merge_detect.header.nodeid = instance->my_id.addr[0].nodeid;
srp_addr_copy (&memb_merge_detect.system_from, &instance->my_id);
memcpy (&memb_merge_detect.ring_id, &instance->my_ring_id,
sizeof (struct memb_ring_id));
assert (memb_merge_detect.header.nodeid);
instance->stats.memb_merge_detect_tx++;
totemrrp_mcast_flush_send (instance->totemrrp_context,
&memb_merge_detect,
sizeof (struct memb_merge_detect));
}
static void memb_ring_id_create_or_load (
struct totemsrp_instance *instance,
struct memb_ring_id *memb_ring_id)
{
int fd;
int res = 0;
char filename[PATH_MAX];
snprintf (filename, sizeof(filename), "%s/ringid_%s",
rundir, totemip_print (&instance->my_id.addr[0]));
fd = open (filename, O_RDONLY, 0700);
/*
* If file can be opened and read, read the ring id
*/
if (fd != -1) {
res = read (fd, &memb_ring_id->seq, sizeof (uint64_t));
close (fd);
}
/*
* If file could not be opened or read, create a new ring id
*/
if ((fd == -1) || (res != sizeof (uint64_t))) {
memb_ring_id->seq = 0;
umask(0);
fd = open (filename, O_CREAT|O_RDWR, 0700);
if (fd != -1) {
res = write (fd, &memb_ring_id->seq, sizeof (uint64_t));
close (fd);
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemsrp_log_level_warning,
"Couldn't write ringid file '%s'", filename);
}
} else {
LOGSYS_PERROR (errno, instance->totemsrp_log_level_warning,
"Couldn't create ringid file '%s'", filename);
}
}
totemip_copy(&memb_ring_id->rep, &instance->my_id.addr[0]);
assert (!totemip_zero_check(&memb_ring_id->rep));
instance->token_ring_id_seq = memb_ring_id->seq;
}
static void memb_ring_id_set_and_store (
struct totemsrp_instance *instance,
const struct memb_ring_id *ring_id)
{
char filename[256];
int fd;
int res;
memcpy (&instance->my_ring_id, ring_id, sizeof (struct memb_ring_id));
snprintf (filename, sizeof(filename), "%s/ringid_%s",
rundir, totemip_print (&instance->my_id.addr[0]));
fd = open (filename, O_WRONLY, 0777);
if (fd == -1) {
fd = open (filename, O_CREAT|O_RDWR, 0777);
}
if (fd == -1) {
LOGSYS_PERROR(errno, instance->totemsrp_log_level_warning,
"Couldn't store new ring id %llx to stable storage",
instance->my_ring_id.seq);
assert (0);
return;
}
log_printf (instance->totemsrp_log_level_debug,
"Storing new sequence id for ring %llx\n", instance->my_ring_id.seq);
//assert (fd > 0);
res = write (fd, &instance->my_ring_id.seq, sizeof (unsigned long long));
assert (res == sizeof (unsigned long long));
close (fd);
}
int totemsrp_callback_token_create (
void *srp_context,
void **handle_out,
enum totem_callback_token_type type,
int delete,
int (*callback_fn) (enum totem_callback_token_type type, const void *),
const void *data)
{
struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context;
struct token_callback_instance *callback_handle;
token_hold_cancel_send (instance);
callback_handle = malloc (sizeof (struct token_callback_instance));
if (callback_handle == 0) {
return (-1);
}
*handle_out = (void *)callback_handle;
list_init (&callback_handle->list);
callback_handle->callback_fn = callback_fn;
callback_handle->data = (void *) data;
callback_handle->callback_type = type;
callback_handle->delete = delete;
switch (type) {
case TOTEM_CALLBACK_TOKEN_RECEIVED:
list_add (&callback_handle->list, &instance->token_callback_received_listhead);
break;
case TOTEM_CALLBACK_TOKEN_SENT:
list_add (&callback_handle->list, &instance->token_callback_sent_listhead);
break;
}
return (0);
}
void totemsrp_callback_token_destroy (void *srp_context, void **handle_out)
{
struct token_callback_instance *h;
if (*handle_out) {
h = (struct token_callback_instance *)*handle_out;
list_del (&h->list);
free (h);
h = NULL;
*handle_out = 0;
}
}
static void token_callbacks_execute (
struct totemsrp_instance *instance,
enum totem_callback_token_type type)
{
struct list_head *list;
struct list_head *list_next;
struct list_head *callback_listhead = 0;
struct token_callback_instance *token_callback_instance;
int res;
int del;
switch (type) {
case TOTEM_CALLBACK_TOKEN_RECEIVED:
callback_listhead = &instance->token_callback_received_listhead;
break;
case TOTEM_CALLBACK_TOKEN_SENT:
callback_listhead = &instance->token_callback_sent_listhead;
break;
default:
assert (0);
}
for (list = callback_listhead->next; list != callback_listhead;
list = list_next) {
token_callback_instance = list_entry (list, struct token_callback_instance, list);
list_next = list->next;
del = token_callback_instance->delete;
if (del == 1) {
list_del (list);
}
res = token_callback_instance->callback_fn (
token_callback_instance->callback_type,
token_callback_instance->data);
/*
* This callback failed to execute, try it again on the next token
*/
if (res == -1 && del == 1) {
list_add (list, callback_listhead);
} else if (del) {
free (token_callback_instance);
}
}
}
/*
* Flow control functions
*/
static unsigned int backlog_get (struct totemsrp_instance *instance)
{
unsigned int backlog = 0;
if (instance->memb_state == MEMB_STATE_OPERATIONAL) {
backlog = cs_queue_used (&instance->new_message_queue);
} else
if (instance->memb_state == MEMB_STATE_RECOVERY) {
backlog = cs_queue_used (&instance->retrans_message_queue);
}
instance->stats.token[instance->stats.latest_token].backlog_calc = backlog;
return (backlog);
}
static int fcc_calculate (
struct totemsrp_instance *instance,
struct orf_token *token)
{
unsigned int transmits_allowed;
unsigned int backlog_calc;
transmits_allowed = instance->totem_config->max_messages;
if (transmits_allowed > instance->totem_config->window_size - token->fcc) {
transmits_allowed = instance->totem_config->window_size - token->fcc;
}
instance->my_cbl = backlog_get (instance);
/*
* Only do backlog calculation if there is a backlog otherwise
* we would result in div by zero
*/
if (token->backlog + instance->my_cbl - instance->my_pbl) {
backlog_calc = (instance->totem_config->window_size * instance->my_pbl) /
(token->backlog + instance->my_cbl - instance->my_pbl);
if (backlog_calc > 0 && transmits_allowed > backlog_calc) {
transmits_allowed = backlog_calc;
}
}
return (transmits_allowed);
}
/*
* don't overflow the RTR sort queue
*/
static void fcc_rtr_limit (
struct totemsrp_instance *instance,
struct orf_token *token,
unsigned int *transmits_allowed)
{
int check = QUEUE_RTR_ITEMS_SIZE_MAX;
check -= (*transmits_allowed + instance->totem_config->window_size);
assert (check >= 0);
if (sq_lt_compare (instance->last_released +
QUEUE_RTR_ITEMS_SIZE_MAX - *transmits_allowed -
instance->totem_config->window_size,
token->seq)) {
*transmits_allowed = 0;
}
}
static void fcc_token_update (
struct totemsrp_instance *instance,
struct orf_token *token,
unsigned int msgs_transmitted)
{
token->fcc += msgs_transmitted - instance->my_trc;
token->backlog += instance->my_cbl - instance->my_pbl;
instance->my_trc = msgs_transmitted;
instance->my_pbl = instance->my_cbl;
}
/*
* Message Handlers
*/
unsigned long long int tv_old;
/*
* message handler called when TOKEN message type received
*/
static int message_handler_orf_token (
struct totemsrp_instance *instance,
const void *msg,
size_t msg_len,
int endian_conversion_needed)
{
char token_storage[1500];
char token_convert[1500];
struct orf_token *token = NULL;
int forward_token;
unsigned int transmits_allowed;
unsigned int mcasted_retransmit;
unsigned int mcasted_regular;
unsigned int last_aru;
#ifdef GIVEINFO
unsigned long long tv_current;
unsigned long long tv_diff;
tv_current = qb_util_nano_current_get ();
tv_diff = tv_current - tv_old;
tv_old = tv_current;
log_printf (instance->totemsrp_log_level_debug,
"Time since last token %0.4f ms\n", ((float)tv_diff) / 1000000.0);
#endif
if (instance->orf_token_discard) {
return (0);
}
#ifdef TEST_DROP_ORF_TOKEN_PERCENTAGE
if (random()%100 < TEST_DROP_ORF_TOKEN_PERCENTAGE) {
return (0);
}
#endif
if (endian_conversion_needed) {
orf_token_endian_convert ((struct orf_token *)msg,
(struct orf_token *)token_convert);
msg = (struct orf_token *)token_convert;
}
/*
* Make copy of token and retransmit list in case we have
* to flush incoming messages from the kernel queue
*/
token = (struct orf_token *)token_storage;
memcpy (token, msg, sizeof (struct orf_token));
memcpy (&token->rtr_list[0], (char *)msg + sizeof (struct orf_token),
sizeof (struct rtr_item) * RETRANSMIT_ENTRIES_MAX);
/*
* Handle merge detection timeout
*/
if (token->seq == instance->my_last_seq) {
start_merge_detect_timeout (instance);
instance->my_seq_unchanged += 1;
} else {
cancel_merge_detect_timeout (instance);
cancel_token_hold_retransmit_timeout (instance);
instance->my_seq_unchanged = 0;
}
instance->my_last_seq = token->seq;
#ifdef TEST_RECOVERY_MSG_COUNT
if (instance->memb_state == MEMB_STATE_OPERATIONAL && token->seq > TEST_RECOVERY_MSG_COUNT) {
return (0);
}
#endif
totemrrp_recv_flush (instance->totemrrp_context);
/*
* Determine if we should hold (in reality drop) the token
*/
instance->my_token_held = 0;
if (totemip_equal(&instance->my_ring_id.rep, &instance->my_id.addr[0]) &&
instance->my_seq_unchanged > instance->totem_config->seqno_unchanged_const) {
instance->my_token_held = 1;
} else
if (!totemip_equal(&instance->my_ring_id.rep, &instance->my_id.addr[0]) &&
instance->my_seq_unchanged >= instance->totem_config->seqno_unchanged_const) {
instance->my_token_held = 1;
}
/*
* Hold onto token when there is no activity on ring and
* this processor is the ring rep
*/
forward_token = 1;
if (totemip_equal(&instance->my_ring_id.rep, &instance->my_id.addr[0])) {
if (instance->my_token_held) {
forward_token = 0;
}
}
token_callbacks_execute (instance, TOTEM_CALLBACK_TOKEN_RECEIVED);
switch (instance->memb_state) {
case MEMB_STATE_COMMIT:
/* Discard token */
break;
case MEMB_STATE_OPERATIONAL:
messages_free (instance, token->aru);
/*
* Do NOT add break, this case should also execute code in gather case.
*/
case MEMB_STATE_GATHER:
/*
* DO NOT add break, we use different free mechanism in recovery state
*/
case MEMB_STATE_RECOVERY:
/*
* Discard tokens from another configuration
*/
if (memcmp (&token->ring_id, &instance->my_ring_id,
sizeof (struct memb_ring_id)) != 0) {
if ((forward_token)
&& instance->use_heartbeat) {
reset_heartbeat_timeout(instance);
}
else {
cancel_heartbeat_timeout(instance);
}
return (0); /* discard token */
}
/*
* Discard retransmitted tokens
*/
if (sq_lte_compare (token->token_seq, instance->my_token_seq)) {
return (0); /* discard token */
}
last_aru = instance->my_last_aru;
instance->my_last_aru = token->aru;
transmits_allowed = fcc_calculate (instance, token);
mcasted_retransmit = orf_token_rtr (instance, token, &transmits_allowed);
fcc_rtr_limit (instance, token, &transmits_allowed);
mcasted_regular = orf_token_mcast (instance, token, transmits_allowed);
/*
if (mcasted_regular) {
printf ("mcasted regular %d\n", mcasted_regular);
printf ("token seq %d\n", token->seq);
}
*/
fcc_token_update (instance, token, mcasted_retransmit +
mcasted_regular);
if (sq_lt_compare (instance->my_aru, token->aru) ||
instance->my_id.addr[0].nodeid == token->aru_addr ||
token->aru_addr == 0) {
token->aru = instance->my_aru;
if (token->aru == token->seq) {
token->aru_addr = 0;
} else {
token->aru_addr = instance->my_id.addr[0].nodeid;
}
}
if (token->aru == last_aru && token->aru_addr != 0) {
instance->my_aru_count += 1;
} else {
instance->my_aru_count = 0;
}
if (instance->my_aru_count > instance->totem_config->fail_to_recv_const &&
token->aru_addr == instance->my_id.addr[0].nodeid) {
log_printf (instance->totemsrp_log_level_error,
"FAILED TO RECEIVE\n");
instance->failed_to_recv = 1;
memb_set_merge (&instance->my_id, 1,
instance->my_failed_list,
&instance->my_failed_list_entries);
memb_state_gather_enter (instance, 6);
} else {
instance->my_token_seq = token->token_seq;
token->token_seq += 1;
if (instance->memb_state == MEMB_STATE_RECOVERY) {
/*
* instance->my_aru == instance->my_high_seq_received means this processor
* has recovered all messages it can recover
* (ie: its retrans queue is empty)
*/
if (cs_queue_is_empty (&instance->retrans_message_queue) == 0) {
if (token->retrans_flg == 0) {
token->retrans_flg = 1;
instance->my_set_retrans_flg = 1;
}
} else
if (token->retrans_flg == 1 && instance->my_set_retrans_flg) {
token->retrans_flg = 0;
instance->my_set_retrans_flg = 0;
}
log_printf (instance->totemsrp_log_level_debug,
"token retrans flag is %d my set retrans flag%d retrans queue empty %d count %d, aru %x\n",
token->retrans_flg, instance->my_set_retrans_flg,
cs_queue_is_empty (&instance->retrans_message_queue),
instance->my_retrans_flg_count, token->aru);
if (token->retrans_flg == 0) {
instance->my_retrans_flg_count += 1;
} else {
instance->my_retrans_flg_count = 0;
}
if (instance->my_retrans_flg_count == 2) {
instance->my_install_seq = token->seq;
}
log_printf (instance->totemsrp_log_level_debug,
"install seq %x aru %x high seq received %x\n",
instance->my_install_seq, instance->my_aru, instance->my_high_seq_received);
if (instance->my_retrans_flg_count >= 2 &&
instance->my_received_flg == 0 &&
sq_lte_compare (instance->my_install_seq, instance->my_aru)) {
instance->my_received_flg = 1;
instance->my_deliver_memb_entries = instance->my_trans_memb_entries;
memcpy (instance->my_deliver_memb_list, instance->my_trans_memb_list,
sizeof (struct totem_ip_address) * instance->my_trans_memb_entries);
}
if (instance->my_retrans_flg_count >= 3 &&
sq_lte_compare (instance->my_install_seq, token->aru)) {
instance->my_rotation_counter += 1;
} else {
instance->my_rotation_counter = 0;
}
if (instance->my_rotation_counter == 2) {
log_printf (instance->totemsrp_log_level_debug,
"retrans flag count %x token aru %x install seq %x aru %x %x\n",
instance->my_retrans_flg_count, token->aru, instance->my_install_seq,
instance->my_aru, token->seq);
memb_state_operational_enter (instance);
instance->my_rotation_counter = 0;
instance->my_retrans_flg_count = 0;
}
}
totemrrp_send_flush (instance->totemrrp_context);
token_send (instance, token, forward_token);
#ifdef GIVEINFO
tv_current = qb_util_nano_current_get ();
tv_diff = tv_current - tv_old;
tv_old = tv_current;
log_printf (instance->totemsrp_log_level_debug,
"I held %0.4f ms\n",
((float)tv_diff) / 1000000.0);
#endif
if (instance->memb_state == MEMB_STATE_OPERATIONAL) {
messages_deliver_to_app (instance, 0,
instance->my_high_seq_received);
}
/*
* Deliver messages after token has been transmitted
* to improve performance
*/
reset_token_timeout (instance); // REVIEWED
reset_token_retransmit_timeout (instance); // REVIEWED
if (totemip_equal(&instance->my_id.addr[0], &instance->my_ring_id.rep) &&
instance->my_token_held == 1) {
start_token_hold_retransmit_timeout (instance);
}
token_callbacks_execute (instance, TOTEM_CALLBACK_TOKEN_SENT);
}
break;
}
if ((forward_token)
&& instance->use_heartbeat) {
reset_heartbeat_timeout(instance);
}
else {
cancel_heartbeat_timeout(instance);
}
return (0);
}
static void messages_deliver_to_app (
struct totemsrp_instance *instance,
int skip,
unsigned int end_point)
{
struct sort_queue_item *sort_queue_item_p;
unsigned int i;
int res;
struct mcast *mcast_in;
struct mcast mcast_header;
unsigned int range = 0;
int endian_conversion_required;
unsigned int my_high_delivered_stored = 0;
range = end_point - instance->my_high_delivered;
if (range) {
log_printf (instance->totemsrp_log_level_debug,
"Delivering %x to %x\n", instance->my_high_delivered,
end_point);
}
assert (range < QUEUE_RTR_ITEMS_SIZE_MAX);
my_high_delivered_stored = instance->my_high_delivered;
/*
* Deliver messages in order from rtr queue to pending delivery queue
*/
for (i = 1; i <= range; i++) {
void *ptr = 0;
/*
* If out of range of sort queue, stop assembly
*/
res = sq_in_range (&instance->regular_sort_queue,
my_high_delivered_stored + i);
if (res == 0) {
break;
}
res = sq_item_get (&instance->regular_sort_queue,
my_high_delivered_stored + i, &ptr);
/*
* If hole, stop assembly
*/
if (res != 0 && skip == 0) {
break;
}
instance->my_high_delivered = my_high_delivered_stored + i;
if (res != 0) {
continue;
}
sort_queue_item_p = ptr;
mcast_in = sort_queue_item_p->mcast;
assert (mcast_in != (struct mcast *)0xdeadbeef);
endian_conversion_required = 0;
if (mcast_in->header.endian_detector != ENDIAN_LOCAL) {
endian_conversion_required = 1;
mcast_endian_convert (mcast_in, &mcast_header);
} else {
memcpy (&mcast_header, mcast_in, sizeof (struct mcast));
}
/*
* Skip messages not originated in instance->my_deliver_memb
*/
if (skip &&
memb_set_subset (&mcast_header.system_from,
1,
instance->my_deliver_memb_list,
instance->my_deliver_memb_entries) == 0) {
instance->my_high_delivered = my_high_delivered_stored + i;
continue;
}
/*
* Message found
*/
log_printf (instance->totemsrp_log_level_debug,
"Delivering MCAST message with seq %x to pending delivery queue\n",
mcast_header.seq);
/*
* Message is locally originated multicast
*/
instance->totemsrp_deliver_fn (
mcast_header.header.nodeid,
((char *)sort_queue_item_p->mcast) + sizeof (struct mcast),
sort_queue_item_p->msg_len - sizeof (struct mcast),
endian_conversion_required);
}
}
/*
* recv message handler called when MCAST message type received
*/
static int message_handler_mcast (
struct totemsrp_instance *instance,
const void *msg,
size_t msg_len,
int endian_conversion_needed)
{
struct sort_queue_item sort_queue_item;
struct sq *sort_queue;
struct mcast mcast_header;
if (endian_conversion_needed) {
mcast_endian_convert (msg, &mcast_header);
} else {
memcpy (&mcast_header, msg, sizeof (struct mcast));
}
if (mcast_header.header.encapsulated == MESSAGE_ENCAPSULATED) {
sort_queue = &instance->recovery_sort_queue;
} else {
sort_queue = &instance->regular_sort_queue;
}
assert (msg_len <= FRAME_SIZE_MAX);
#ifdef TEST_DROP_MCAST_PERCENTAGE
if (random()%100 < TEST_DROP_MCAST_PERCENTAGE) {
return (0);
}
#endif
/*
* If the message is foreign execute the switch below
*/
if (memcmp (&instance->my_ring_id, &mcast_header.ring_id,
sizeof (struct memb_ring_id)) != 0) {
switch (instance->memb_state) {
case MEMB_STATE_OPERATIONAL:
memb_set_merge (
&mcast_header.system_from, 1,
instance->my_proc_list, &instance->my_proc_list_entries);
memb_state_gather_enter (instance, 7);
break;
case MEMB_STATE_GATHER:
if (!memb_set_subset (
&mcast_header.system_from,
1,
instance->my_proc_list,
instance->my_proc_list_entries)) {
memb_set_merge (&mcast_header.system_from, 1,
instance->my_proc_list, &instance->my_proc_list_entries);
memb_state_gather_enter (instance, 8);
return (0);
}
break;
case MEMB_STATE_COMMIT:
/* discard message */
instance->stats.rx_msg_dropped++;
break;
case MEMB_STATE_RECOVERY:
/* discard message */
instance->stats.rx_msg_dropped++;
break;
}
return (0);
}
log_printf (instance->totemsrp_log_level_debug,
"Received ringid(%s:%lld) seq %x\n",
totemip_print (&mcast_header.ring_id.rep),
mcast_header.ring_id.seq,
mcast_header.seq);
/*
* Add mcast message to rtr queue if not already in rtr queue
* otherwise free io vectors
*/
if (msg_len > 0 && msg_len <= FRAME_SIZE_MAX &&
sq_in_range (sort_queue, mcast_header.seq) &&
sq_item_inuse (sort_queue, mcast_header.seq) == 0) {
/*
* Allocate new multicast memory block
*/
// TODO LEAK
sort_queue_item.mcast = totemsrp_buffer_alloc (instance);
if (sort_queue_item.mcast == NULL) {
return (-1); /* error here is corrected by the algorithm */
}
memcpy (sort_queue_item.mcast, msg, msg_len);
sort_queue_item.msg_len = msg_len;
if (sq_lt_compare (instance->my_high_seq_received,
mcast_header.seq)) {
instance->my_high_seq_received = mcast_header.seq;
}
sq_item_add (sort_queue, &sort_queue_item, mcast_header.seq);
}
update_aru (instance);
if (instance->memb_state == MEMB_STATE_OPERATIONAL) {
messages_deliver_to_app (instance, 0, instance->my_high_seq_received);
}
/* TODO remove from retrans message queue for old ring in recovery state */
return (0);
}
static int message_handler_memb_merge_detect (
struct totemsrp_instance *instance,
const void *msg,
size_t msg_len,
int endian_conversion_needed)
{
struct memb_merge_detect memb_merge_detect;
if (endian_conversion_needed) {
memb_merge_detect_endian_convert (msg, &memb_merge_detect);
} else {
memcpy (&memb_merge_detect, msg,
sizeof (struct memb_merge_detect));
}
/*
* do nothing if this is a merge detect from this configuration
*/
if (memcmp (&instance->my_ring_id, &memb_merge_detect.ring_id,
sizeof (struct memb_ring_id)) == 0) {
return (0);
}
/*
* Execute merge operation
*/
switch (instance->memb_state) {
case MEMB_STATE_OPERATIONAL:
memb_set_merge (&memb_merge_detect.system_from, 1,
instance->my_proc_list, &instance->my_proc_list_entries);
memb_state_gather_enter (instance, 9);
break;
case MEMB_STATE_GATHER:
if (!memb_set_subset (
&memb_merge_detect.system_from,
1,
instance->my_proc_list,
instance->my_proc_list_entries)) {
memb_set_merge (&memb_merge_detect.system_from, 1,
instance->my_proc_list, &instance->my_proc_list_entries);
memb_state_gather_enter (instance, 10);
return (0);
}
break;
case MEMB_STATE_COMMIT:
/* do nothing in commit */
break;
case MEMB_STATE_RECOVERY:
/* do nothing in recovery */
break;
}
return (0);
}
static void memb_join_process (
struct totemsrp_instance *instance,
const struct memb_join *memb_join)
{
struct srp_addr *proc_list;
struct srp_addr *failed_list;
int gather_entered = 0;
int fail_minus_memb_entries = 0;
struct srp_addr fail_minus_memb[PROCESSOR_COUNT_MAX];
proc_list = (struct srp_addr *)memb_join->end_of_memb_join;
failed_list = proc_list + memb_join->proc_list_entries;
/*
memb_set_print ("proclist", proc_list, memb_join->proc_list_entries);
memb_set_print ("faillist", failed_list, memb_join->failed_list_entries);
memb_set_print ("my_proclist", instance->my_proc_list, instance->my_proc_list_entries);
memb_set_print ("my_faillist", instance->my_failed_list, instance->my_failed_list_entries);
-*/
if (memb_set_equal (proc_list,
memb_join->proc_list_entries,
instance->my_proc_list,
instance->my_proc_list_entries) &&
memb_set_equal (failed_list,
memb_join->failed_list_entries,
instance->my_failed_list,
instance->my_failed_list_entries)) {
memb_consensus_set (instance, &memb_join->system_from);
if (memb_consensus_agreed (instance) && instance->failed_to_recv == 1) {
instance->failed_to_recv = 0;
srp_addr_copy (&instance->my_proc_list[0],
&instance->my_id);
instance->my_proc_list_entries = 1;
instance->my_failed_list_entries = 0;
memb_state_commit_token_create (instance);
memb_state_commit_enter (instance);
return;
}
if (memb_consensus_agreed (instance) &&
memb_lowest_in_config (instance)) {
memb_state_commit_token_create (instance);
memb_state_commit_enter (instance);
} else {
return;
}
} else
if (memb_set_subset (proc_list,
memb_join->proc_list_entries,
instance->my_proc_list,
instance->my_proc_list_entries) &&
memb_set_subset (failed_list,
memb_join->failed_list_entries,
instance->my_failed_list,
instance->my_failed_list_entries)) {
return;
} else
if (memb_set_subset (&memb_join->system_from, 1,
instance->my_failed_list, instance->my_failed_list_entries)) {
return;
} else {
memb_set_merge (proc_list,
memb_join->proc_list_entries,
instance->my_proc_list, &instance->my_proc_list_entries);
if (memb_set_subset (
&instance->my_id, 1,
failed_list, memb_join->failed_list_entries)) {
memb_set_merge (
&memb_join->system_from, 1,
instance->my_failed_list, &instance->my_failed_list_entries);
} else {
if (memb_set_subset (
&memb_join->system_from, 1,
instance->my_memb_list,
instance->my_memb_entries)) {
if (memb_set_subset (
&memb_join->system_from, 1,
instance->my_failed_list,
instance->my_failed_list_entries) == 0) {
memb_set_merge (failed_list,
memb_join->failed_list_entries,
instance->my_failed_list, &instance->my_failed_list_entries);
} else {
memb_set_subtract (fail_minus_memb,
&fail_minus_memb_entries,
failed_list,
memb_join->failed_list_entries,
instance->my_memb_list,
instance->my_memb_entries);
memb_set_merge (fail_minus_memb,
fail_minus_memb_entries,
instance->my_failed_list,
&instance->my_failed_list_entries);
}
}
}
memb_state_gather_enter (instance, 11);
gather_entered = 1;
}
if (gather_entered == 0 &&
instance->memb_state == MEMB_STATE_OPERATIONAL) {
memb_state_gather_enter (instance, 12);
}
}
static void memb_join_endian_convert (const struct memb_join *in, struct memb_join *out)
{
int i;
struct srp_addr *in_proc_list;
struct srp_addr *in_failed_list;
struct srp_addr *out_proc_list;
struct srp_addr *out_failed_list;
out->header.type = in->header.type;
out->header.endian_detector = ENDIAN_LOCAL;
out->header.nodeid = swab32 (in->header.nodeid);
srp_addr_copy_endian_convert (&out->system_from, &in->system_from);
out->proc_list_entries = swab32 (in->proc_list_entries);
out->failed_list_entries = swab32 (in->failed_list_entries);
out->ring_seq = swab64 (in->ring_seq);
in_proc_list = (struct srp_addr *)in->end_of_memb_join;
in_failed_list = in_proc_list + out->proc_list_entries;
out_proc_list = (struct srp_addr *)out->end_of_memb_join;
out_failed_list = out_proc_list + out->proc_list_entries;
for (i = 0; i < out->proc_list_entries; i++) {
srp_addr_copy_endian_convert (&out_proc_list[i], &in_proc_list[i]);
}
for (i = 0; i < out->failed_list_entries; i++) {
srp_addr_copy_endian_convert (&out_failed_list[i], &in_failed_list[i]);
}
}
static void memb_commit_token_endian_convert (const struct memb_commit_token *in, struct memb_commit_token *out)
{
int i;
struct srp_addr *in_addr = (struct srp_addr *)in->end_of_commit_token;
struct srp_addr *out_addr = (struct srp_addr *)out->end_of_commit_token;
struct memb_commit_token_memb_entry *in_memb_list;
struct memb_commit_token_memb_entry *out_memb_list;
out->header.type = in->header.type;
out->header.endian_detector = ENDIAN_LOCAL;
out->header.nodeid = swab32 (in->header.nodeid);
out->token_seq = swab32 (in->token_seq);
totemip_copy_endian_convert(&out->ring_id.rep, &in->ring_id.rep);
out->ring_id.seq = swab64 (in->ring_id.seq);
out->retrans_flg = swab32 (in->retrans_flg);
out->memb_index = swab32 (in->memb_index);
out->addr_entries = swab32 (in->addr_entries);
in_memb_list = (struct memb_commit_token_memb_entry *)(in_addr + out->addr_entries);
out_memb_list = (struct memb_commit_token_memb_entry *)(out_addr + out->addr_entries);
for (i = 0; i < out->addr_entries; i++) {
srp_addr_copy_endian_convert (&out_addr[i], &in_addr[i]);
/*
* Only convert the memb entry if it has been set
*/
if (in_memb_list[i].ring_id.rep.family != 0) {
totemip_copy_endian_convert (&out_memb_list[i].ring_id.rep,
&in_memb_list[i].ring_id.rep);
out_memb_list[i].ring_id.seq =
swab64 (in_memb_list[i].ring_id.seq);
out_memb_list[i].aru = swab32 (in_memb_list[i].aru);
out_memb_list[i].high_delivered = swab32 (in_memb_list[i].high_delivered);
out_memb_list[i].received_flg = swab32 (in_memb_list[i].received_flg);
}
}
}
static void orf_token_endian_convert (const struct orf_token *in, struct orf_token *out)
{
int i;
out->header.type = in->header.type;
out->header.endian_detector = ENDIAN_LOCAL;
out->header.nodeid = swab32 (in->header.nodeid);
out->seq = swab32 (in->seq);
out->token_seq = swab32 (in->token_seq);
out->aru = swab32 (in->aru);
totemip_copy_endian_convert(&out->ring_id.rep, &in->ring_id.rep);
out->aru_addr = swab32(in->aru_addr);
out->ring_id.seq = swab64 (in->ring_id.seq);
out->fcc = swab32 (in->fcc);
out->backlog = swab32 (in->backlog);
out->retrans_flg = swab32 (in->retrans_flg);
out->rtr_list_entries = swab32 (in->rtr_list_entries);
for (i = 0; i < out->rtr_list_entries; i++) {
totemip_copy_endian_convert(&out->rtr_list[i].ring_id.rep, &in->rtr_list[i].ring_id.rep);
out->rtr_list[i].ring_id.seq = swab64 (in->rtr_list[i].ring_id.seq);
out->rtr_list[i].seq = swab32 (in->rtr_list[i].seq);
}
}
static void mcast_endian_convert (const struct mcast *in, struct mcast *out)
{
out->header.type = in->header.type;
out->header.endian_detector = ENDIAN_LOCAL;
out->header.nodeid = swab32 (in->header.nodeid);
out->header.encapsulated = in->header.encapsulated;
out->seq = swab32 (in->seq);
out->this_seqno = swab32 (in->this_seqno);
totemip_copy_endian_convert(&out->ring_id.rep, &in->ring_id.rep);
out->ring_id.seq = swab64 (in->ring_id.seq);
out->node_id = swab32 (in->node_id);
out->guarantee = swab32 (in->guarantee);
srp_addr_copy_endian_convert (&out->system_from, &in->system_from);
}
static void memb_merge_detect_endian_convert (
const struct memb_merge_detect *in,
struct memb_merge_detect *out)
{
out->header.type = in->header.type;
out->header.endian_detector = ENDIAN_LOCAL;
out->header.nodeid = swab32 (in->header.nodeid);
totemip_copy_endian_convert(&out->ring_id.rep, &in->ring_id.rep);
out->ring_id.seq = swab64 (in->ring_id.seq);
srp_addr_copy_endian_convert (&out->system_from, &in->system_from);
}
static int message_handler_memb_join (
struct totemsrp_instance *instance,
const void *msg,
size_t msg_len,
int endian_conversion_needed)
{
const struct memb_join *memb_join;
struct memb_join *memb_join_convert = alloca (msg_len);
if (endian_conversion_needed) {
memb_join = memb_join_convert;
memb_join_endian_convert (msg, memb_join_convert);
} else {
memb_join = msg;
}
/*
* If the process paused because it wasn't scheduled in a timely
* fashion, flush the join messages because they may be queued
* entries
*/
if (pause_flush (instance)) {
return (0);
}
if (instance->token_ring_id_seq < memb_join->ring_seq) {
instance->token_ring_id_seq = memb_join->ring_seq;
}
switch (instance->memb_state) {
case MEMB_STATE_OPERATIONAL:
memb_join_process (instance, memb_join);
break;
case MEMB_STATE_GATHER:
memb_join_process (instance, memb_join);
break;
case MEMB_STATE_COMMIT:
if (memb_set_subset (&memb_join->system_from,
1,
instance->my_new_memb_list,
instance->my_new_memb_entries) &&
memb_join->ring_seq >= instance->my_ring_id.seq) {
memb_join_process (instance, memb_join);
memb_state_gather_enter (instance, 13);
}
break;
case MEMB_STATE_RECOVERY:
if (memb_set_subset (&memb_join->system_from,
1,
instance->my_new_memb_list,
instance->my_new_memb_entries) &&
memb_join->ring_seq >= instance->my_ring_id.seq) {
memb_join_process (instance, memb_join);
memb_recovery_state_token_loss (instance);
memb_state_gather_enter (instance, 14);
}
break;
}
return (0);
}
static int message_handler_memb_commit_token (
struct totemsrp_instance *instance,
const void *msg,
size_t msg_len,
int endian_conversion_needed)
{
struct memb_commit_token *memb_commit_token_convert = alloca (msg_len);
struct memb_commit_token *memb_commit_token;
struct srp_addr sub[PROCESSOR_COUNT_MAX];
int sub_entries;
struct srp_addr *addr;
log_printf (instance->totemsrp_log_level_debug,
"got commit token\n");
if (endian_conversion_needed) {
memb_commit_token_endian_convert (msg, memb_commit_token_convert);
} else {
memcpy (memb_commit_token_convert, msg, msg_len);
}
memb_commit_token = memb_commit_token_convert;
addr = (struct srp_addr *)memb_commit_token->end_of_commit_token;
#ifdef TEST_DROP_COMMIT_TOKEN_PERCENTAGE
if (random()%100 < TEST_DROP_COMMIT_TOKEN_PERCENTAGE) {
return (0);
}
#endif
switch (instance->memb_state) {
case MEMB_STATE_OPERATIONAL:
/* discard token */
break;
case MEMB_STATE_GATHER:
memb_set_subtract (sub, &sub_entries,
instance->my_proc_list, instance->my_proc_list_entries,
instance->my_failed_list, instance->my_failed_list_entries);
if (memb_set_equal (addr,
memb_commit_token->addr_entries,
sub,
sub_entries) &&
memb_commit_token->ring_id.seq > instance->my_ring_id.seq) {
memcpy (instance->commit_token, memb_commit_token, msg_len);
memb_state_commit_enter (instance);
}
break;
case MEMB_STATE_COMMIT:
/*
* If retransmitted commit tokens are sent on this ring
* filter them out and only enter recovery once the
* commit token has traversed the array. This is
* determined by :
* memb_commit_token->memb_index == memb_commit_token->addr_entries) {
*/
if (memb_commit_token->ring_id.seq == instance->my_ring_id.seq &&
memb_commit_token->memb_index == memb_commit_token->addr_entries) {
memb_state_recovery_enter (instance, memb_commit_token);
}
break;
case MEMB_STATE_RECOVERY:
if (totemip_equal (&instance->my_id.addr[0], &instance->my_ring_id.rep)) {
log_printf (instance->totemsrp_log_level_debug,
"Sending initial ORF token\n");
// TODO convert instead of initiate
orf_token_send_initial (instance);
reset_token_timeout (instance); // REVIEWED
reset_token_retransmit_timeout (instance); // REVIEWED
}
break;
}
return (0);
}
static int message_handler_token_hold_cancel (
struct totemsrp_instance *instance,
const void *msg,
size_t msg_len,
int endian_conversion_needed)
{
const struct token_hold_cancel *token_hold_cancel = msg;
if (memcmp (&token_hold_cancel->ring_id, &instance->my_ring_id,
sizeof (struct memb_ring_id)) == 0) {
instance->my_seq_unchanged = 0;
if (totemip_equal(&instance->my_ring_id.rep, &instance->my_id.addr[0])) {
timer_function_token_retransmit_timeout (instance);
}
}
return (0);
}
void main_deliver_fn (
void *context,
const void *msg,
unsigned int msg_len)
{
struct totemsrp_instance *instance = context;
const struct message_header *message_header = msg;
if (msg_len < sizeof (struct message_header)) {
log_printf (instance->totemsrp_log_level_security,
"Received message is too short... ignoring %u.\n",
(unsigned int)msg_len);
return;
}
switch (message_header->type) {
case MESSAGE_TYPE_ORF_TOKEN:
instance->stats.orf_token_rx++;
break;
case MESSAGE_TYPE_MCAST:
instance->stats.mcast_rx++;
break;
case MESSAGE_TYPE_MEMB_MERGE_DETECT:
instance->stats.memb_merge_detect_rx++;
break;
case MESSAGE_TYPE_MEMB_JOIN:
instance->stats.memb_join_rx++;
break;
case MESSAGE_TYPE_MEMB_COMMIT_TOKEN:
instance->stats.memb_commit_token_rx++;
break;
case MESSAGE_TYPE_TOKEN_HOLD_CANCEL:
instance->stats.token_hold_cancel_rx++;
break;
default:
log_printf (instance->totemsrp_log_level_security, "Type of received message is wrong... ignoring %d.\n", (int)message_header->type);
printf ("wrong message type\n");
instance->stats.rx_msg_dropped++;
return;
}
/*
* Handle incoming message
*/
totemsrp_message_handlers.handler_functions[(int)message_header->type] (
instance,
msg,
msg_len,
message_header->endian_detector != ENDIAN_LOCAL);
}
void main_iface_change_fn (
void *context,
const struct totem_ip_address *iface_addr,
unsigned int iface_no)
{
struct totemsrp_instance *instance = context;
int i;
totemip_copy (&instance->my_id.addr[iface_no], iface_addr);
assert (instance->my_id.addr[iface_no].nodeid);
totemip_copy (&instance->my_memb_list[0].addr[iface_no], iface_addr);
if (instance->iface_changes++ == 0) {
memb_ring_id_create_or_load (instance, &instance->my_ring_id);
log_printf (
instance->totemsrp_log_level_debug,
"Created or loaded sequence id %lld.%s for this ring.\n",
instance->my_ring_id.seq,
totemip_print (&instance->my_ring_id.rep));
if (instance->totemsrp_service_ready_fn) {
instance->totemsrp_service_ready_fn ();
}
}
for (i = 0; i < instance->totem_config->interfaces[iface_no].member_count; i++) {
totemsrp_member_add (instance,
&instance->totem_config->interfaces[iface_no].member_list[i],
iface_no);
}
if (instance->iface_changes >= instance->totem_config->interface_count) {
memb_state_gather_enter (instance, 15);
}
}
void totemsrp_net_mtu_adjust (struct totem_config *totem_config) {
totem_config->net_mtu -= sizeof (struct mcast);
}
void totemsrp_service_ready_register (
void *context,
void (*totem_service_ready) (void))
{
struct totemsrp_instance *instance = (struct totemsrp_instance *)context;
instance->totemsrp_service_ready_fn = totem_service_ready;
}
int totemsrp_member_add (
void *context,
const struct totem_ip_address *member,
int ring_no)
{
struct totemsrp_instance *instance = (struct totemsrp_instance *)context;
int res;
res = totemrrp_member_add (instance->totemrrp_context, member, ring_no);
return (res);
}
int totemsrp_member_remove (
void *context,
const struct totem_ip_address *member,
int ring_no)
{
struct totemsrp_instance *instance = (struct totemsrp_instance *)context;
int res;
res = totemrrp_member_remove (instance->totemrrp_context, member, ring_no);
return (res);
}
diff --git a/exec/totemudp.c b/exec/totemudp.c
index fb801f3d..ed2f03c0 100644
--- a/exec/totemudp.c
+++ b/exec/totemudp.c
@@ -1,1947 +1,1956 @@
/*
* Copyright (c) 2005 MontaVista Software, Inc.
* Copyright (c) 2006-2009 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Steven Dake (sdake@redhat.com)
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <config.h>
#include <assert.h>
#include <pthread.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/socket.h>
#include <netdb.h>
#include <sys/un.h>
#include <sys/ioctl.h>
#include <sys/param.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <unistd.h>
#include <fcntl.h>
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
#include <sched.h>
#include <time.h>
#include <sys/time.h>
#include <sys/poll.h>
#include <limits.h>
#include <corosync/sq.h>
#include <corosync/swab.h>
#include <corosync/list.h>
#include <corosync/hdb.h>
#include <qb/qbdefs.h>
#include <qb/qbloop.h>
#define LOGSYS_UTILS_ONLY 1
#include <corosync/engine/logsys.h>
#include "totemudp.h"
#include "crypto.h"
#include "util.h"
#ifdef HAVE_LIBNSS
#include <nss.h>
#include <pk11pub.h>
#include <pkcs11.h>
#include <prerror.h>
#endif
#ifndef MSG_NOSIGNAL
#define MSG_NOSIGNAL 0
#endif
#define MCAST_SOCKET_BUFFER_SIZE (TRANSMITS_ALLOWED * FRAME_SIZE_MAX)
#define NETIF_STATE_REPORT_UP 1
#define NETIF_STATE_REPORT_DOWN 2
#define BIND_STATE_UNBOUND 0
#define BIND_STATE_REGULAR 1
#define BIND_STATE_LOOPBACK 2
#define HMAC_HASH_SIZE 20
struct security_header {
unsigned char hash_digest[HMAC_HASH_SIZE]; /* The hash *MUST* be first in the data structure */
unsigned char salt[16]; /* random number */
char msg[0];
} __attribute__((packed));
struct totemudp_mcast_thread_state {
unsigned char iobuf[FRAME_SIZE_MAX];
prng_state prng_state;
};
struct totemudp_socket {
int mcast_recv;
int mcast_send;
int token;
};
struct totemudp_instance {
hmac_state totemudp_hmac_state;
prng_state totemudp_prng_state;
#ifdef HAVE_LIBNSS
PK11SymKey *nss_sym_key;
PK11SymKey *nss_sym_key_sign;
#endif
unsigned char totemudp_private_key[1024];
unsigned int totemudp_private_key_len;
qb_loop_t *totemudp_poll_handle;
struct totem_interface *totem_interface;
int netif_state_report;
int netif_bind_state;
void *context;
void (*totemudp_deliver_fn) (
void *context,
const void *msg,
unsigned int msg_len);
void (*totemudp_iface_change_fn) (
void *context,
const struct totem_ip_address *iface_address);
void (*totemudp_target_set_completed) (void *context);
/*
* Function and data used to log messages
*/
int totemudp_log_level_security;
int totemudp_log_level_error;
int totemudp_log_level_warning;
int totemudp_log_level_notice;
int totemudp_log_level_debug;
int totemudp_subsys_id;
void (*totemudp_log_printf) (
- unsigned int rec_ident,
+ int level,
+ int subsys,
const char *function,
const char *file,
int line,
const char *format,
- ...)__attribute__((format(printf, 5, 6)));
+ ...)__attribute__((format(printf, 6, 7)));
void *udp_context;
char iov_buffer[FRAME_SIZE_MAX];
char iov_buffer_flush[FRAME_SIZE_MAX];
struct iovec totemudp_iov_recv;
struct iovec totemudp_iov_recv_flush;
struct totemudp_socket totemudp_sockets;
struct totem_ip_address mcast_address;
int stats_sent;
int stats_recv;
int stats_delv;
int stats_remcasts;
int stats_orf_token;
struct timeval stats_tv_start;
struct totem_ip_address my_id;
int firstrun;
qb_loop_timer_handle timer_netif_check_timeout;
unsigned int my_memb_entries;
int flushing;
struct totem_config *totem_config;
struct totem_ip_address token_target;
};
struct work_item {
const void *msg;
unsigned int msg_len;
struct totemudp_instance *instance;
};
static int totemudp_build_sockets (
struct totemudp_instance *instance,
struct totem_ip_address *bindnet_address,
struct totem_ip_address *mcastaddress,
struct totemudp_socket *sockets,
struct totem_ip_address *bound_to);
static struct totem_ip_address localhost;
static void totemudp_instance_initialize (struct totemudp_instance *instance)
{
memset (instance, 0, sizeof (struct totemudp_instance));
instance->netif_state_report = NETIF_STATE_REPORT_UP | NETIF_STATE_REPORT_DOWN;
instance->totemudp_iov_recv.iov_base = instance->iov_buffer;
instance->totemudp_iov_recv.iov_len = FRAME_SIZE_MAX; //sizeof (instance->iov_buffer);
instance->totemudp_iov_recv_flush.iov_base = instance->iov_buffer_flush;
instance->totemudp_iov_recv_flush.iov_len = FRAME_SIZE_MAX; //sizeof (instance->iov_buffer);
/*
* There is always atleast 1 processor
*/
instance->my_memb_entries = 1;
}
#define log_printf(level, format, args...) \
do { \
instance->totemudp_log_printf ( \
- LOGSYS_ENCODE_RECID(level, \
- instance->totemudp_subsys_id, \
- LOGSYS_RECID_LOG), \
+ level, instance->totemudp_subsys_id, \
__FUNCTION__, __FILE__, __LINE__, \
(const char *)format, ##args); \
} while (0);
+#define LOGSYS_PERROR(err_num, level, fmt, args...) \
+do { \
+ char _error_str[LOGSYS_MAX_PERROR_MSG_LEN]; \
+ const char *_error_ptr = qb_strerror_r(err_num, _error_str, sizeof(_error_str)); \
+ instance->totemudp_log_printf ( \
+ level, instance->totemudp_subsys_id, \
+ __FUNCTION__, __FILE__, __LINE__, \
+ fmt ": %s (%d)\n", ##args, _error_ptr, err_num); \
+ } while(0)
+
static int authenticate_and_decrypt_sober (
struct totemudp_instance *instance,
struct iovec *iov,
unsigned int iov_len)
{
unsigned char keys[48];
struct security_header *header = (struct security_header *)iov[0].iov_base;
prng_state keygen_prng_state;
prng_state stream_prng_state;
unsigned char *hmac_key = &keys[32];
unsigned char *cipher_key = &keys[16];
unsigned char *initial_vector = &keys[0];
unsigned char digest_comparison[HMAC_HASH_SIZE];
unsigned long len;
/*
* Generate MAC, CIPHER, IV keys from private key
*/
memset (keys, 0, sizeof (keys));
sober128_start (&keygen_prng_state);
sober128_add_entropy (instance->totemudp_private_key,
instance->totemudp_private_key_len, &keygen_prng_state);
sober128_add_entropy (header->salt, sizeof (header->salt), &keygen_prng_state);
sober128_read (keys, sizeof (keys), &keygen_prng_state);
/*
* Setup stream cipher
*/
sober128_start (&stream_prng_state);
sober128_add_entropy (cipher_key, 16, &stream_prng_state);
sober128_add_entropy (initial_vector, 16, &stream_prng_state);
/*
* Authenticate contents of message
*/
hmac_init (&instance->totemudp_hmac_state, DIGEST_SHA1, hmac_key, 16);
hmac_process (&instance->totemudp_hmac_state,
(unsigned char *)iov->iov_base + HMAC_HASH_SIZE,
iov->iov_len - HMAC_HASH_SIZE);
len = hash_descriptor[DIGEST_SHA1]->hashsize;
assert (HMAC_HASH_SIZE >= len);
hmac_done (&instance->totemudp_hmac_state, digest_comparison, &len);
if (memcmp (digest_comparison, header->hash_digest, len) != 0) {
return (-1);
}
/*
* Decrypt the contents of the message with the cipher key
*/
sober128_read ((unsigned char*)iov->iov_base +
sizeof (struct security_header),
iov->iov_len - sizeof (struct security_header),
&stream_prng_state);
return (0);
}
static void init_sober_crypto(
struct totemudp_instance *instance)
{
log_printf(instance->totemudp_log_level_notice,
"Initializing transmit/receive security: libtomcrypt SOBER128/SHA1HMAC (mode 0).\n");
rng_make_prng (128, PRNG_SOBER, &instance->totemudp_prng_state, NULL);
}
#ifdef HAVE_LIBNSS
static unsigned char *copy_from_iovec(
const struct iovec *iov,
unsigned int iov_len,
size_t *buf_size)
{
int i;
size_t bufptr;
size_t buflen = 0;
unsigned char *newbuf;
for (i=0; i<iov_len; i++)
buflen += iov[i].iov_len;
newbuf = malloc(buflen);
if (!newbuf)
return NULL;
bufptr=0;
for (i=0; i<iov_len; i++) {
memcpy(newbuf+bufptr, iov[i].iov_base, iov[i].iov_len);
bufptr += iov[i].iov_len;
}
*buf_size = buflen;
return newbuf;
}
static void copy_to_iovec(
struct iovec *iov,
unsigned int iov_len,
const unsigned char *buf,
size_t buf_size)
{
int i;
size_t copylen;
size_t bufptr = 0;
bufptr=0;
for (i=0; i<iov_len; i++) {
copylen = iov[i].iov_len;
if (bufptr + copylen > buf_size) {
copylen = buf_size - bufptr;
}
memcpy(iov[i].iov_base, buf+bufptr, copylen);
bufptr += copylen;
if (iov[i].iov_len != copylen) {
iov[i].iov_len = copylen;
return;
}
}
}
static void init_nss_crypto(
struct totemudp_instance *instance)
{
PK11SlotInfo* aes_slot = NULL;
PK11SlotInfo* sha1_slot = NULL;
SECItem key_item;
SECStatus rv;
log_printf(instance->totemudp_log_level_notice,
"Initializing transmit/receive security: NSS AES128CBC/SHA1HMAC (mode 1).\n");
rv = NSS_NoDB_Init(".");
if (rv != SECSuccess)
{
log_printf(instance->totemudp_log_level_security, "NSS initialization failed (err %d)\n",
PR_GetError());
goto out;
}
aes_slot = PK11_GetBestSlot(instance->totem_config->crypto_crypt_type, NULL);
if (aes_slot == NULL)
{
log_printf(instance->totemudp_log_level_security, "Unable to find security slot (err %d)\n",
PR_GetError());
goto out;
}
sha1_slot = PK11_GetBestSlot(CKM_SHA_1_HMAC, NULL);
if (sha1_slot == NULL)
{
log_printf(instance->totemudp_log_level_security, "Unable to find security slot (err %d)\n",
PR_GetError());
goto out;
}
/*
* Make the private key into a SymKey that we can use
*/
key_item.type = siBuffer;
key_item.data = instance->totem_config->private_key;
key_item.len = 32; /* Use 128 bits */
instance->nss_sym_key = PK11_ImportSymKey(aes_slot,
instance->totem_config->crypto_crypt_type,
PK11_OriginUnwrap, CKA_ENCRYPT|CKA_DECRYPT,
&key_item, NULL);
if (instance->nss_sym_key == NULL)
{
log_printf(instance->totemudp_log_level_security, "Failure to import key into NSS (err %d)\n",
PR_GetError());
goto out;
}
instance->nss_sym_key_sign = PK11_ImportSymKey(sha1_slot,
CKM_SHA_1_HMAC,
PK11_OriginUnwrap, CKA_SIGN,
&key_item, NULL);
if (instance->nss_sym_key_sign == NULL) {
log_printf(instance->totemudp_log_level_security, "Failure to import key into NSS (err %d)\n",
PR_GetError());
goto out;
}
out:
return;
}
static int encrypt_and_sign_nss (
struct totemudp_instance *instance,
unsigned char *buf,
size_t *buf_len,
const struct iovec *iovec,
unsigned int iov_len)
{
PK11Context* enc_context = NULL;
SECStatus rv1, rv2;
int tmp1_outlen;
unsigned int tmp2_outlen;
unsigned char *inbuf;
unsigned char *data;
unsigned char *outdata;
size_t datalen;
SECItem no_params;
SECItem iv_item;
struct security_header *header;
SECItem *nss_sec_param;
unsigned char nss_iv_data[16];
SECStatus rv;
no_params.type = siBuffer;
no_params.data = 0;
no_params.len = 0;
tmp1_outlen = tmp2_outlen = 0;
inbuf = copy_from_iovec(iovec, iov_len, &datalen);
if (!inbuf) {
log_printf(instance->totemudp_log_level_security, "malloc error copying buffer from iovec\n");
return -1;
}
data = inbuf + sizeof (struct security_header);
datalen -= sizeof (struct security_header);
outdata = buf + sizeof (struct security_header);
header = (struct security_header *)buf;
rv = PK11_GenerateRandom (
nss_iv_data,
sizeof (nss_iv_data));
if (rv != SECSuccess) {
log_printf(instance->totemudp_log_level_security,
"Failure to generate a random number %d\n",
PR_GetError());
}
memcpy(header->salt, nss_iv_data, sizeof(nss_iv_data));
iv_item.type = siBuffer;
iv_item.data = nss_iv_data;
iv_item.len = sizeof (nss_iv_data);
nss_sec_param = PK11_ParamFromIV (
instance->totem_config->crypto_crypt_type,
&iv_item);
if (nss_sec_param == NULL) {
log_printf(instance->totemudp_log_level_security,
"Failure to set up PKCS11 param (err %d)\n",
PR_GetError());
free (inbuf);
return (-1);
}
/*
* Create cipher context for encryption
*/
enc_context = PK11_CreateContextBySymKey (
instance->totem_config->crypto_crypt_type,
CKA_ENCRYPT,
instance->nss_sym_key,
nss_sec_param);
if (!enc_context) {
char err[1024];
PR_GetErrorText(err);
err[PR_GetErrorTextLength()] = 0;
log_printf(instance->totemudp_log_level_security,
"PK11_CreateContext failed (encrypt) crypt_type=%d (err %d): %s\n",
instance->totem_config->crypto_crypt_type,
PR_GetError(), err);
free(inbuf);
return -1;
}
rv1 = PK11_CipherOp(enc_context, outdata,
&tmp1_outlen, FRAME_SIZE_MAX - sizeof(struct security_header),
data, datalen);
rv2 = PK11_DigestFinal(enc_context, outdata + tmp1_outlen, &tmp2_outlen,
FRAME_SIZE_MAX - tmp1_outlen);
PK11_DestroyContext(enc_context, PR_TRUE);
*buf_len = tmp1_outlen + tmp2_outlen;
free(inbuf);
// memcpy(&outdata[*buf_len], nss_iv_data, sizeof(nss_iv_data));
if (rv1 != SECSuccess || rv2 != SECSuccess)
goto out;
/* Now do the digest */
enc_context = PK11_CreateContextBySymKey(CKM_SHA_1_HMAC,
CKA_SIGN, instance->nss_sym_key_sign, &no_params);
if (!enc_context) {
char err[1024];
PR_GetErrorText(err);
err[PR_GetErrorTextLength()] = 0;
log_printf(instance->totemudp_log_level_security, "encrypt: PK11_CreateContext failed (digest) err %d: %s\n",
PR_GetError(), err);
return -1;
}
PK11_DigestBegin(enc_context);
rv1 = PK11_DigestOp(enc_context, outdata - 16, *buf_len + 16);
rv2 = PK11_DigestFinal(enc_context, header->hash_digest, &tmp2_outlen, sizeof(header->hash_digest));
PK11_DestroyContext(enc_context, PR_TRUE);
if (rv1 != SECSuccess || rv2 != SECSuccess)
goto out;
*buf_len = *buf_len + sizeof(struct security_header);
SECITEM_FreeItem(nss_sec_param, PR_TRUE);
return 0;
out:
return -1;
}
static int authenticate_and_decrypt_nss (
struct totemudp_instance *instance,
struct iovec *iov,
unsigned int iov_len)
{
PK11Context* enc_context = NULL;
SECStatus rv1, rv2;
int tmp1_outlen;
unsigned int tmp2_outlen;
unsigned char outbuf[FRAME_SIZE_MAX];
unsigned char digest[HMAC_HASH_SIZE];
unsigned char *outdata;
int result_len;
unsigned char *data;
unsigned char *inbuf;
size_t datalen;
struct security_header *header = (struct security_header *)iov[0].iov_base;
SECItem no_params;
SECItem ivdata;
no_params.type = siBuffer;
no_params.data = 0;
no_params.len = 0;
tmp1_outlen = tmp2_outlen = 0;
if (iov_len > 1) {
inbuf = copy_from_iovec(iov, iov_len, &datalen);
if (!inbuf) {
log_printf(instance->totemudp_log_level_security, "malloc error copying buffer from iovec\n");
return -1;
}
}
else {
inbuf = (unsigned char *)iov[0].iov_base;
datalen = iov[0].iov_len;
}
data = inbuf + sizeof (struct security_header) - 16;
datalen = datalen - sizeof (struct security_header) + 16;
outdata = outbuf + sizeof (struct security_header);
/* Check the digest */
enc_context = PK11_CreateContextBySymKey (
CKM_SHA_1_HMAC, CKA_SIGN,
instance->nss_sym_key_sign,
&no_params);
if (!enc_context) {
char err[1024];
PR_GetErrorText(err);
err[PR_GetErrorTextLength()] = 0;
log_printf(instance->totemudp_log_level_security, "PK11_CreateContext failed (check digest) err %d: %s\n",
PR_GetError(), err);
free (inbuf);
return -1;
}
PK11_DigestBegin(enc_context);
rv1 = PK11_DigestOp(enc_context, data, datalen);
rv2 = PK11_DigestFinal(enc_context, digest, &tmp2_outlen, sizeof(digest));
PK11_DestroyContext(enc_context, PR_TRUE);
if (rv1 != SECSuccess || rv2 != SECSuccess) {
log_printf(instance->totemudp_log_level_security, "Digest check failed\n");
return -1;
}
if (memcmp(digest, header->hash_digest, tmp2_outlen) != 0) {
log_printf(instance->totemudp_log_level_error, "Digest does not match\n");
return -1;
}
/*
* Get rid of salt
*/
data += 16;
datalen -= 16;
/* Create cipher context for decryption */
ivdata.type = siBuffer;
ivdata.data = header->salt;
ivdata.len = sizeof(header->salt);
enc_context = PK11_CreateContextBySymKey(
instance->totem_config->crypto_crypt_type,
CKA_DECRYPT,
instance->nss_sym_key, &ivdata);
if (!enc_context) {
log_printf(instance->totemudp_log_level_security,
"PK11_CreateContext (decrypt) failed (err %d)\n",
PR_GetError());
return -1;
}
rv1 = PK11_CipherOp(enc_context, outdata, &tmp1_outlen,
sizeof(outbuf) - sizeof (struct security_header),
data, datalen);
if (rv1 != SECSuccess) {
log_printf(instance->totemudp_log_level_security,
"PK11_CipherOp (decrypt) failed (err %d)\n",
PR_GetError());
}
rv2 = PK11_DigestFinal(enc_context, outdata + tmp1_outlen, &tmp2_outlen,
sizeof(outbuf) - tmp1_outlen);
PK11_DestroyContext(enc_context, PR_TRUE);
result_len = tmp1_outlen + tmp2_outlen + sizeof (struct security_header);
/* Copy it back to the buffer */
copy_to_iovec(iov, iov_len, outbuf, result_len);
if (iov_len > 1)
free(inbuf);
if (rv1 != SECSuccess || rv2 != SECSuccess)
return -1;
return 0;
}
#endif
static int encrypt_and_sign_sober (
struct totemudp_instance *instance,
unsigned char *buf,
size_t *buf_len,
const struct iovec *iovec,
unsigned int iov_len)
{
int i;
unsigned char *addr;
unsigned char keys[48];
struct security_header *header;
unsigned char *hmac_key = &keys[32];
unsigned char *cipher_key = &keys[16];
unsigned char *initial_vector = &keys[0];
unsigned long len;
size_t outlen = 0;
hmac_state hmac_st;
prng_state keygen_prng_state;
prng_state stream_prng_state;
prng_state *prng_state_in = &instance->totemudp_prng_state;
header = (struct security_header *)buf;
addr = buf + sizeof (struct security_header);
memset (keys, 0, sizeof (keys));
memset (header->salt, 0, sizeof (header->salt));
/*
* Generate MAC, CIPHER, IV keys from private key
*/
sober128_read (header->salt, sizeof (header->salt), prng_state_in);
sober128_start (&keygen_prng_state);
sober128_add_entropy (instance->totemudp_private_key,
instance->totemudp_private_key_len,
&keygen_prng_state);
sober128_add_entropy (header->salt, sizeof (header->salt),
&keygen_prng_state);
sober128_read (keys, sizeof (keys), &keygen_prng_state);
/*
* Setup stream cipher
*/
sober128_start (&stream_prng_state);
sober128_add_entropy (cipher_key, 16, &stream_prng_state);
sober128_add_entropy (initial_vector, 16, &stream_prng_state);
outlen = sizeof (struct security_header);
/*
* Copy remainder of message, then encrypt it
*/
for (i = 1; i < iov_len; i++) {
memcpy (addr, iovec[i].iov_base, iovec[i].iov_len);
addr += iovec[i].iov_len;
outlen += iovec[i].iov_len;
}
/*
* Encrypt message by XORing stream cipher data
*/
sober128_read (buf + sizeof (struct security_header),
outlen - sizeof (struct security_header),
&stream_prng_state);
memset (&hmac_st, 0, sizeof (hmac_st));
/*
* Sign the contents of the message with the hmac key and store signature in message
*/
hmac_init (&hmac_st, DIGEST_SHA1, hmac_key, 16);
hmac_process (&hmac_st,
buf + HMAC_HASH_SIZE,
outlen - HMAC_HASH_SIZE);
len = hash_descriptor[DIGEST_SHA1]->hashsize;
hmac_done (&hmac_st, header->hash_digest, &len);
*buf_len = outlen;
return 0;
}
static int encrypt_and_sign_worker (
struct totemudp_instance *instance,
unsigned char *buf,
size_t *buf_len,
const struct iovec *iovec,
unsigned int iov_len)
{
if (instance->totem_config->crypto_type == TOTEM_CRYPTO_SOBER ||
instance->totem_config->crypto_accept == TOTEM_CRYPTO_ACCEPT_OLD)
return encrypt_and_sign_sober(instance, buf, buf_len, iovec, iov_len);
#ifdef HAVE_LIBNSS
if (instance->totem_config->crypto_type == TOTEM_CRYPTO_NSS)
return encrypt_and_sign_nss(instance, buf, buf_len, iovec, iov_len);
#endif
return -1;
}
static int authenticate_and_decrypt (
struct totemudp_instance *instance,
struct iovec *iov,
unsigned int iov_len)
{
unsigned char type;
unsigned char *endbuf = (unsigned char *)iov[iov_len-1].iov_base;
int res = -1;
/*
* Get the encryption type and remove it from the buffer
*/
type = endbuf[iov[iov_len-1].iov_len-1];
iov[iov_len-1].iov_len -= 1;
if (type == TOTEM_CRYPTO_SOBER)
res = authenticate_and_decrypt_sober(instance, iov, iov_len);
/*
* Only try higher crypto options if NEW has been requested
*/
if (instance->totem_config->crypto_accept == TOTEM_CRYPTO_ACCEPT_NEW) {
#ifdef HAVE_LIBNSS
if (type == TOTEM_CRYPTO_NSS)
res = authenticate_and_decrypt_nss(instance, iov, iov_len);
#endif
}
/*
* If it failed, then try decrypting the whole packet as it might be
* from aisexec
*/
if (res == -1) {
iov[iov_len-1].iov_len += 1;
res = authenticate_and_decrypt_sober(instance, iov, iov_len);
}
return res;
}
static void init_crypto(
struct totemudp_instance *instance)
{
/*
* If we are expecting NEW crypto type then initialise all available
* crypto options. For OLD then we only need SOBER128.
*/
init_sober_crypto(instance);
if (instance->totem_config->crypto_accept == TOTEM_CRYPTO_ACCEPT_OLD)
return;
#ifdef HAVE_LIBNSS
init_nss_crypto(instance);
#endif
}
int totemudp_crypto_set (
void *udp_context,
unsigned int type)
{
struct totemudp_instance *instance = (struct totemudp_instance *)udp_context;
int res = 0;
/*
* Can't set crypto type if OLD is selected
*/
if (instance->totem_config->crypto_accept == TOTEM_CRYPTO_ACCEPT_OLD) {
res = -1;
} else {
/*
* Validate crypto algorithm
*/
switch (type) {
case TOTEM_CRYPTO_SOBER:
log_printf(instance->totemudp_log_level_security,
"Transmit security set to: libtomcrypt SOBER128/SHA1HMAC (mode 0)");
break;
case TOTEM_CRYPTO_NSS:
log_printf(instance->totemudp_log_level_security,
"Transmit security set to: NSS AES128CBC/SHA1HMAC (mode 1)");
break;
default:
res = -1;
break;
}
}
return (res);
}
static inline void ucast_sendmsg (
struct totemudp_instance *instance,
struct totem_ip_address *system_to,
const void *msg,
unsigned int msg_len)
{
struct msghdr msg_ucast;
int res = 0;
size_t buf_len;
unsigned char sheader[sizeof (struct security_header)];
unsigned char encrypt_data[FRAME_SIZE_MAX];
struct iovec iovec_encrypt[2];
const struct iovec *iovec_sendmsg;
struct sockaddr_storage sockaddr;
struct iovec iovec;
unsigned int iov_len;
int addrlen;
if (instance->totem_config->secauth == 1) {
iovec_encrypt[0].iov_base = (void *)sheader;
iovec_encrypt[0].iov_len = sizeof (struct security_header);
iovec_encrypt[1].iov_base = (void *)msg;
iovec_encrypt[1].iov_len = msg_len;
/*
* Encrypt and digest the message
*/
encrypt_and_sign_worker (
instance,
encrypt_data,
&buf_len,
iovec_encrypt,
2);
if (instance->totem_config->crypto_accept == TOTEM_CRYPTO_ACCEPT_NEW) {
encrypt_data[buf_len++] = instance->totem_config->crypto_type;
}
else {
encrypt_data[buf_len++] = 0;
}
iovec_encrypt[0].iov_base = (void *)encrypt_data;
iovec_encrypt[0].iov_len = buf_len;
iovec_sendmsg = &iovec_encrypt[0];
iov_len = 1;
} else {
iovec.iov_base = (void *)msg;
iovec.iov_len = msg_len;
iovec_sendmsg = &iovec;
iov_len = 1;
}
/*
* Build unicast message
*/
totemip_totemip_to_sockaddr_convert(system_to,
instance->totem_interface->ip_port, &sockaddr, &addrlen);
msg_ucast.msg_name = &sockaddr;
msg_ucast.msg_namelen = addrlen;
msg_ucast.msg_iov = (void *) iovec_sendmsg;
msg_ucast.msg_iovlen = iov_len;
#if !defined(COROSYNC_SOLARIS)
msg_ucast.msg_control = 0;
msg_ucast.msg_controllen = 0;
msg_ucast.msg_flags = 0;
#else
msg_ucast.msg_accrights = NULL;
msg_ucast.msg_accrightslen = 0;
#endif
/*
* Transmit unicast message
* An error here is recovered by totemsrp
*/
res = sendmsg (instance->totemudp_sockets.mcast_send, &msg_ucast,
MSG_NOSIGNAL);
if (res < 0) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_debug,
"sendmsg(ucast) failed (non-critical)");
}
}
static inline void mcast_sendmsg (
struct totemudp_instance *instance,
const void *msg,
unsigned int msg_len)
{
struct msghdr msg_mcast;
int res = 0;
size_t buf_len;
unsigned char sheader[sizeof (struct security_header)];
unsigned char encrypt_data[FRAME_SIZE_MAX];
struct iovec iovec_encrypt[2];
struct iovec iovec;
const struct iovec *iovec_sendmsg;
struct sockaddr_storage sockaddr;
unsigned int iov_len;
int addrlen;
if (instance->totem_config->secauth == 1) {
iovec_encrypt[0].iov_base = (void *)sheader;
iovec_encrypt[0].iov_len = sizeof (struct security_header);
iovec_encrypt[1].iov_base = (void *)msg;
iovec_encrypt[1].iov_len = msg_len;
/*
* Encrypt and digest the message
*/
encrypt_and_sign_worker (
instance,
encrypt_data,
&buf_len,
iovec_encrypt,
2);
if (instance->totem_config->crypto_accept == TOTEM_CRYPTO_ACCEPT_NEW) {
encrypt_data[buf_len++] = instance->totem_config->crypto_type;
}
else {
encrypt_data[buf_len++] = 0;
}
iovec_encrypt[0].iov_base = (void *)encrypt_data;
iovec_encrypt[0].iov_len = buf_len;
iovec_sendmsg = &iovec_encrypt[0];
iov_len = 1;
} else {
iovec.iov_base = (void *)msg;
iovec.iov_len = msg_len;
iovec_sendmsg = &iovec;
iov_len = 1;
}
/*
* Build multicast message
*/
totemip_totemip_to_sockaddr_convert(&instance->mcast_address,
instance->totem_interface->ip_port, &sockaddr, &addrlen);
msg_mcast.msg_name = &sockaddr;
msg_mcast.msg_namelen = addrlen;
msg_mcast.msg_iov = (void *) iovec_sendmsg;
msg_mcast.msg_iovlen = iov_len;
#if !defined(COROSYNC_SOLARIS)
msg_mcast.msg_control = 0;
msg_mcast.msg_controllen = 0;
msg_mcast.msg_flags = 0;
#else
msg_mcast.msg_accrights = NULL;
msg_mcast.msg_accrightslen = 0;
#endif
/*
* Transmit multicast message
* An error here is recovered by totemsrp
*/
res = sendmsg (instance->totemudp_sockets.mcast_send, &msg_mcast,
MSG_NOSIGNAL);
if (res < 0) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_debug,
"sendmsg(mcast) failed (non-critical)");
}
}
int totemudp_finalize (
void *udp_context)
{
struct totemudp_instance *instance = (struct totemudp_instance *)udp_context;
int res = 0;
if (instance->totemudp_sockets.mcast_recv > 0) {
close (instance->totemudp_sockets.mcast_recv);
qb_loop_poll_del (instance->totemudp_poll_handle,
instance->totemudp_sockets.mcast_recv);
}
if (instance->totemudp_sockets.mcast_send > 0) {
close (instance->totemudp_sockets.mcast_send);
}
if (instance->totemudp_sockets.token > 0) {
close (instance->totemudp_sockets.token);
qb_loop_poll_del (instance->totemudp_poll_handle,
instance->totemudp_sockets.token);
}
return (res);
}
/*
* Only designed to work with a message with one iov
*/
static int net_deliver_fn (
int fd,
int revents,
void *data)
{
struct totemudp_instance *instance = (struct totemudp_instance *)data;
struct msghdr msg_recv;
struct iovec *iovec;
struct sockaddr_storage system_from;
int bytes_received;
int res = 0;
unsigned char *msg_offset;
unsigned int size_delv;
if (instance->flushing == 1) {
iovec = &instance->totemudp_iov_recv_flush;
} else {
iovec = &instance->totemudp_iov_recv;
}
/*
* Receive datagram
*/
msg_recv.msg_name = &system_from;
msg_recv.msg_namelen = sizeof (struct sockaddr_storage);
msg_recv.msg_iov = iovec;
msg_recv.msg_iovlen = 1;
#if !defined(COROSYNC_SOLARIS)
msg_recv.msg_control = 0;
msg_recv.msg_controllen = 0;
msg_recv.msg_flags = 0;
#else
msg_recv.msg_accrights = NULL;
msg_recv.msg_accrightslen = 0;
#endif
bytes_received = recvmsg (fd, &msg_recv, MSG_NOSIGNAL | MSG_DONTWAIT);
if (bytes_received == -1) {
return (0);
} else {
instance->stats_recv += bytes_received;
}
if ((instance->totem_config->secauth == 1) &&
(bytes_received < sizeof (struct security_header))) {
log_printf (instance->totemudp_log_level_security, "Received message is too short... ignoring %d.\n", bytes_received);
return (0);
}
iovec->iov_len = bytes_received;
if (instance->totem_config->secauth == 1) {
/*
* Authenticate and if authenticated, decrypt datagram
*/
res = authenticate_and_decrypt (instance, iovec, 1);
if (res == -1) {
log_printf (instance->totemudp_log_level_security, "Received message has invalid digest... ignoring.\n");
log_printf (instance->totemudp_log_level_security,
"Invalid packet data\n");
iovec->iov_len = FRAME_SIZE_MAX;
return 0;
}
msg_offset = (unsigned char *)iovec->iov_base +
sizeof (struct security_header);
size_delv = bytes_received - sizeof (struct security_header);
} else {
msg_offset = (void *)iovec->iov_base;
size_delv = bytes_received;
}
/*
* Handle incoming message
*/
instance->totemudp_deliver_fn (
instance->context,
msg_offset,
size_delv);
iovec->iov_len = FRAME_SIZE_MAX;
return (0);
}
static int netif_determine (
struct totemudp_instance *instance,
struct totem_ip_address *bindnet,
struct totem_ip_address *bound_to,
int *interface_up,
int *interface_num)
{
int res;
res = totemip_iface_check (bindnet, bound_to,
interface_up, interface_num,
instance->totem_config->clear_node_high_bit);
return (res);
}
/*
* If the interface is up, the sockets for totem are built. If the interface is down
* this function is requeued in the timer list to retry building the sockets later.
*/
static void timer_function_netif_check_timeout (
void *data)
{
struct totemudp_instance *instance = (struct totemudp_instance *)data;
int interface_up;
int interface_num;
struct totem_ip_address *bind_address;
/*
* Build sockets for every interface
*/
netif_determine (instance,
&instance->totem_interface->bindnet,
&instance->totem_interface->boundto,
&interface_up, &interface_num);
/*
* If the network interface isn't back up and we are already
* in loopback mode, add timer to check again and return
*/
if ((instance->netif_bind_state == BIND_STATE_LOOPBACK &&
interface_up == 0) ||
(instance->my_memb_entries == 1 &&
instance->netif_bind_state == BIND_STATE_REGULAR &&
interface_up == 1)) {
qb_loop_timer_add (instance->totemudp_poll_handle,
QB_LOOP_MED,
instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC,
(void *)instance,
timer_function_netif_check_timeout,
&instance->timer_netif_check_timeout);
/*
* Add a timer to check for a downed regular interface
*/
return;
}
if (instance->totemudp_sockets.mcast_recv > 0) {
close (instance->totemudp_sockets.mcast_recv);
qb_loop_poll_del (instance->totemudp_poll_handle,
instance->totemudp_sockets.mcast_recv);
}
if (instance->totemudp_sockets.mcast_send > 0) {
close (instance->totemudp_sockets.mcast_send);
}
if (instance->totemudp_sockets.token > 0) {
close (instance->totemudp_sockets.token);
qb_loop_poll_del (instance->totemudp_poll_handle,
instance->totemudp_sockets.token);
}
if (interface_up == 0) {
/*
* Interface is not up
*/
instance->netif_bind_state = BIND_STATE_LOOPBACK;
bind_address = &localhost;
/*
* Add a timer to retry building interfaces and request memb_gather_enter
*/
qb_loop_timer_add (instance->totemudp_poll_handle,
QB_LOOP_MED,
instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC,
(void *)instance,
timer_function_netif_check_timeout,
&instance->timer_netif_check_timeout);
} else {
/*
* Interface is up
*/
instance->netif_bind_state = BIND_STATE_REGULAR;
bind_address = &instance->totem_interface->bindnet;
}
/*
* Create and bind the multicast and unicast sockets
*/
(void)totemudp_build_sockets (instance,
&instance->mcast_address,
bind_address,
&instance->totemudp_sockets,
&instance->totem_interface->boundto);
qb_loop_poll_add (
instance->totemudp_poll_handle,
QB_LOOP_MED,
instance->totemudp_sockets.mcast_recv,
POLLIN, instance, net_deliver_fn);
qb_loop_poll_add (
instance->totemudp_poll_handle,
QB_LOOP_MED,
instance->totemudp_sockets.token,
POLLIN, instance, net_deliver_fn);
totemip_copy (&instance->my_id, &instance->totem_interface->boundto);
/*
* This reports changes in the interface to the user and totemsrp
*/
if (instance->netif_bind_state == BIND_STATE_REGULAR) {
if (instance->netif_state_report & NETIF_STATE_REPORT_UP) {
log_printf (instance->totemudp_log_level_notice,
"The network interface [%s] is now up.\n",
totemip_print (&instance->totem_interface->boundto));
instance->netif_state_report = NETIF_STATE_REPORT_DOWN;
instance->totemudp_iface_change_fn (instance->context, &instance->my_id);
}
/*
* Add a timer to check for interface going down in single membership
*/
if (instance->my_memb_entries == 1) {
qb_loop_timer_add (instance->totemudp_poll_handle,
QB_LOOP_MED,
instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC,
(void *)instance,
timer_function_netif_check_timeout,
&instance->timer_netif_check_timeout);
}
} else {
if (instance->netif_state_report & NETIF_STATE_REPORT_DOWN) {
log_printf (instance->totemudp_log_level_notice,
"The network interface is down.\n");
instance->totemudp_iface_change_fn (instance->context, &instance->my_id);
}
instance->netif_state_report = NETIF_STATE_REPORT_UP;
}
}
/* Set the socket priority to INTERACTIVE to ensure
that our messages don't get queued behind anything else */
static void totemudp_traffic_control_set(struct totemudp_instance *instance, int sock)
{
#ifdef SO_PRIORITY
int prio = 6; /* TC_PRIO_INTERACTIVE */
if (setsockopt(sock, SOL_SOCKET, SO_PRIORITY, &prio, sizeof(int))) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "Could not set traffic priority");
}
#endif
}
static int totemudp_build_sockets_ip (
struct totemudp_instance *instance,
struct totem_ip_address *mcast_address,
struct totem_ip_address *bindnet_address,
struct totemudp_socket *sockets,
struct totem_ip_address *bound_to,
int interface_num)
{
struct sockaddr_storage sockaddr;
struct ipv6_mreq mreq6;
struct ip_mreq mreq;
struct sockaddr_storage mcast_ss, boundto_ss;
struct sockaddr_in6 *mcast_sin6 = (struct sockaddr_in6 *)&mcast_ss;
struct sockaddr_in *mcast_sin = (struct sockaddr_in *)&mcast_ss;
struct sockaddr_in *boundto_sin = (struct sockaddr_in *)&boundto_ss;
unsigned int sendbuf_size;
unsigned int recvbuf_size;
unsigned int optlen = sizeof (sendbuf_size);
int addrlen;
int res;
int flag;
/*
* Create multicast recv socket
*/
sockets->mcast_recv = socket (bindnet_address->family, SOCK_DGRAM, 0);
if (sockets->mcast_recv == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"socket() failed");
return (-1);
}
totemip_nosigpipe (sockets->mcast_recv);
res = fcntl (sockets->mcast_recv, F_SETFL, O_NONBLOCK);
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"Could not set non-blocking operation on multicast socket");
return (-1);
}
/*
* Force reuse
*/
flag = 1;
if ( setsockopt(sockets->mcast_recv, SOL_SOCKET, SO_REUSEADDR, (char *)&flag, sizeof (flag)) < 0) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"setsockopt(SO_REUSEADDR) failed");
return (-1);
}
/*
* Bind to multicast socket used for multicast receives
*/
totemip_totemip_to_sockaddr_convert(mcast_address,
instance->totem_interface->ip_port, &sockaddr, &addrlen);
res = bind (sockets->mcast_recv, (struct sockaddr *)&sockaddr, addrlen);
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"Unable to bind the socket to receive multicast packets");
return (-1);
}
/*
* Setup mcast send socket
*/
sockets->mcast_send = socket (bindnet_address->family, SOCK_DGRAM, 0);
if (sockets->mcast_send == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"socket() failed");
return (-1);
}
totemip_nosigpipe (sockets->mcast_send);
res = fcntl (sockets->mcast_send, F_SETFL, O_NONBLOCK);
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"Could not set non-blocking operation on multicast socket");
return (-1);
}
/*
* Force reuse
*/
flag = 1;
if ( setsockopt(sockets->mcast_send, SOL_SOCKET, SO_REUSEADDR, (char *)&flag, sizeof (flag)) < 0) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"setsockopt(SO_REUSEADDR) failed");
return (-1);
}
totemip_totemip_to_sockaddr_convert(bound_to, instance->totem_interface->ip_port - 1,
&sockaddr, &addrlen);
res = bind (sockets->mcast_send, (struct sockaddr *)&sockaddr, addrlen);
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"Unable to bind the socket to send multicast packets");
return (-1);
}
/*
* Setup unicast socket
*/
sockets->token = socket (bindnet_address->family, SOCK_DGRAM, 0);
if (sockets->token == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"socket() failed");
return (-1);
}
totemip_nosigpipe (sockets->token);
res = fcntl (sockets->token, F_SETFL, O_NONBLOCK);
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"Could not set non-blocking operation on token socket");
return (-1);
}
/*
* Force reuse
*/
flag = 1;
if ( setsockopt(sockets->token, SOL_SOCKET, SO_REUSEADDR, (char *)&flag, sizeof (flag)) < 0) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"setsockopt(SO_REUSEADDR) failed");
return (-1);
}
/*
* Bind to unicast socket used for token send/receives
* This has the side effect of binding to the correct interface
*/
totemip_totemip_to_sockaddr_convert(bound_to, instance->totem_interface->ip_port, &sockaddr, &addrlen);
res = bind (sockets->token, (struct sockaddr *)&sockaddr, addrlen);
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"Unable to bind UDP unicast socket");
return (-1);
}
recvbuf_size = MCAST_SOCKET_BUFFER_SIZE;
sendbuf_size = MCAST_SOCKET_BUFFER_SIZE;
/*
* Set buffer sizes to avoid overruns
*/
res = setsockopt (sockets->mcast_recv, SOL_SOCKET, SO_RCVBUF, &recvbuf_size, optlen);
res = setsockopt (sockets->mcast_send, SOL_SOCKET, SO_SNDBUF, &sendbuf_size, optlen);
res = getsockopt (sockets->mcast_recv, SOL_SOCKET, SO_RCVBUF, &recvbuf_size, &optlen);
if (res == 0) {
log_printf (instance->totemudp_log_level_debug,
"Receive multicast socket recv buffer size (%d bytes).\n", recvbuf_size);
}
res = getsockopt (sockets->mcast_send, SOL_SOCKET, SO_SNDBUF, &sendbuf_size, &optlen);
if (res == 0) {
log_printf (instance->totemudp_log_level_debug,
"Transmit multicast socket send buffer size (%d bytes).\n", sendbuf_size);
}
/*
* Join group membership on socket
*/
totemip_totemip_to_sockaddr_convert(mcast_address, instance->totem_interface->ip_port, &mcast_ss, &addrlen);
totemip_totemip_to_sockaddr_convert(bound_to, instance->totem_interface->ip_port, &boundto_ss, &addrlen);
if (instance->totem_config->broadcast_use == 1) {
unsigned int broadcast = 1;
if ((setsockopt(sockets->mcast_recv, SOL_SOCKET,
SO_BROADCAST, &broadcast, sizeof (broadcast))) == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"setting broadcast option failed");
return (-1);
}
if ((setsockopt(sockets->mcast_send, SOL_SOCKET,
SO_BROADCAST, &broadcast, sizeof (broadcast))) == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"setting broadcast option failed");
return (-1);
}
} else {
switch (bindnet_address->family) {
case AF_INET:
memset(&mreq, 0, sizeof(mreq));
mreq.imr_multiaddr.s_addr = mcast_sin->sin_addr.s_addr;
mreq.imr_interface.s_addr = boundto_sin->sin_addr.s_addr;
res = setsockopt (sockets->mcast_recv, IPPROTO_IP, IP_ADD_MEMBERSHIP,
&mreq, sizeof (mreq));
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"join ipv4 multicast group failed");
return (-1);
}
break;
case AF_INET6:
memset(&mreq6, 0, sizeof(mreq6));
memcpy(&mreq6.ipv6mr_multiaddr, &mcast_sin6->sin6_addr, sizeof(struct in6_addr));
mreq6.ipv6mr_interface = interface_num;
res = setsockopt (sockets->mcast_recv, IPPROTO_IPV6, IPV6_JOIN_GROUP,
&mreq6, sizeof (mreq6));
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"join ipv6 multicast group failed");
return (-1);
}
break;
}
}
/*
* Turn on multicast loopback
*/
flag = 1;
switch ( bindnet_address->family ) {
case AF_INET:
res = setsockopt (sockets->mcast_send, IPPROTO_IP, IP_MULTICAST_LOOP,
&flag, sizeof (flag));
break;
case AF_INET6:
res = setsockopt (sockets->mcast_send, IPPROTO_IPV6, IPV6_MULTICAST_LOOP,
&flag, sizeof (flag));
}
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"Unable to turn on multicast loopback");
return (-1);
}
/*
* Set multicast packets TTL
*/
flag = instance->totem_interface->ttl;
if (bindnet_address->family == AF_INET6) {
res = setsockopt (sockets->mcast_send, IPPROTO_IPV6, IPV6_MULTICAST_HOPS,
&flag, sizeof (flag));
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"set mcast v6 TTL failed");
return (-1);
}
} else {
res = setsockopt(sockets->mcast_send, IPPROTO_IP, IP_MULTICAST_TTL,
&flag, sizeof(flag));
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"set mcast v4 TTL failed");
return (-1);
}
}
/*
* Bind to a specific interface for multicast send and receive
*/
switch ( bindnet_address->family ) {
case AF_INET:
if (setsockopt (sockets->mcast_send, IPPROTO_IP, IP_MULTICAST_IF,
&boundto_sin->sin_addr, sizeof (boundto_sin->sin_addr)) < 0) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"cannot select interface for multicast packets (send)");
return (-1);
}
if (setsockopt (sockets->mcast_recv, IPPROTO_IP, IP_MULTICAST_IF,
&boundto_sin->sin_addr, sizeof (boundto_sin->sin_addr)) < 0) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"cannot select interface for multicast packets (recv)");
return (-1);
}
break;
case AF_INET6:
if (setsockopt (sockets->mcast_send, IPPROTO_IPV6, IPV6_MULTICAST_IF,
&interface_num, sizeof (interface_num)) < 0) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"cannot select interface for multicast packets (send v6)");
return (-1);
}
if (setsockopt (sockets->mcast_recv, IPPROTO_IPV6, IPV6_MULTICAST_IF,
&interface_num, sizeof (interface_num)) < 0) {
LOGSYS_PERROR (errno, instance->totemudp_log_level_warning,
"cannot select interface for multicast packets (recv v6)");
return (-1);
}
break;
}
return 0;
}
static int totemudp_build_sockets (
struct totemudp_instance *instance,
struct totem_ip_address *mcast_address,
struct totem_ip_address *bindnet_address,
struct totemudp_socket *sockets,
struct totem_ip_address *bound_to)
{
int interface_num;
int interface_up;
int res;
/*
* Determine the ip address bound to and the interface name
*/
res = netif_determine (instance,
bindnet_address,
bound_to,
&interface_up,
&interface_num);
if (res == -1) {
return (-1);
}
totemip_copy(&instance->my_id, bound_to);
res = totemudp_build_sockets_ip (instance, mcast_address,
bindnet_address, sockets, bound_to, interface_num);
/* We only send out of the token socket */
totemudp_traffic_control_set(instance, sockets->token);
return res;
}
/*
* Totem Network interface - also does encryption/decryption
* depends on poll abstraction, POSIX, IPV4
*/
/*
* Create an instance
*/
int totemudp_initialize (
qb_loop_t *poll_handle,
void **udp_context,
struct totem_config *totem_config,
int interface_no,
void *context,
void (*deliver_fn) (
void *context,
const void *msg,
unsigned int msg_len),
void (*iface_change_fn) (
void *context,
const struct totem_ip_address *iface_address),
void (*target_set_completed) (
void *context))
{
struct totemudp_instance *instance;
instance = malloc (sizeof (struct totemudp_instance));
if (instance == NULL) {
return (-1);
}
totemudp_instance_initialize (instance);
instance->totem_config = totem_config;
/*
* Configure logging
*/
instance->totemudp_log_level_security = 1; //totem_config->totem_logging_configuration.log_level_security;
instance->totemudp_log_level_error = totem_config->totem_logging_configuration.log_level_error;
instance->totemudp_log_level_warning = totem_config->totem_logging_configuration.log_level_warning;
instance->totemudp_log_level_notice = totem_config->totem_logging_configuration.log_level_notice;
instance->totemudp_log_level_debug = totem_config->totem_logging_configuration.log_level_debug;
instance->totemudp_subsys_id = totem_config->totem_logging_configuration.log_subsys_id;
instance->totemudp_log_printf = totem_config->totem_logging_configuration.log_printf;
/*
* Initialize random number generator for later use to generate salt
*/
memcpy (instance->totemudp_private_key, totem_config->private_key,
totem_config->private_key_len);
instance->totemudp_private_key_len = totem_config->private_key_len;
init_crypto(instance);
/*
* Initialize local variables for totemudp
*/
instance->totem_interface = &totem_config->interfaces[interface_no];
totemip_copy (&instance->mcast_address, &instance->totem_interface->mcast_addr);
memset (instance->iov_buffer, 0, FRAME_SIZE_MAX);
instance->totemudp_poll_handle = poll_handle;
instance->totem_interface->bindnet.nodeid = instance->totem_config->node_id;
instance->context = context;
instance->totemudp_deliver_fn = deliver_fn;
instance->totemudp_iface_change_fn = iface_change_fn;
instance->totemudp_target_set_completed = target_set_completed;
totemip_localhost (instance->mcast_address.family, &localhost);
localhost.nodeid = instance->totem_config->node_id;
/*
* RRP layer isn't ready to receive message because it hasn't
* initialized yet. Add short timer to check the interfaces.
*/
qb_loop_timer_add (instance->totemudp_poll_handle,
QB_LOOP_MED,
100*QB_TIME_NS_IN_MSEC,
(void *)instance,
timer_function_netif_check_timeout,
&instance->timer_netif_check_timeout);
*udp_context = instance;
return (0);
}
void *totemudp_buffer_alloc (void)
{
return malloc (FRAME_SIZE_MAX);
}
void totemudp_buffer_release (void *ptr)
{
return free (ptr);
}
int totemudp_processor_count_set (
void *udp_context,
int processor_count)
{
struct totemudp_instance *instance = (struct totemudp_instance *)udp_context;
int res = 0;
instance->my_memb_entries = processor_count;
qb_loop_timer_del (instance->totemudp_poll_handle,
instance->timer_netif_check_timeout);
if (processor_count == 1) {
qb_loop_timer_add (instance->totemudp_poll_handle,
QB_LOOP_MED,
instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC,
(void *)instance,
timer_function_netif_check_timeout,
&instance->timer_netif_check_timeout);
}
return (res);
}
int totemudp_recv_flush (void *udp_context)
{
struct totemudp_instance *instance = (struct totemudp_instance *)udp_context;
struct pollfd ufd;
int nfds;
int res = 0;
instance->flushing = 1;
do {
ufd.fd = instance->totemudp_sockets.mcast_recv;
ufd.events = POLLIN;
nfds = poll (&ufd, 1, 0);
if (nfds == 1 && ufd.revents & POLLIN) {
net_deliver_fn (instance->totemudp_sockets.mcast_recv,
ufd.revents, instance);
}
} while (nfds == 1);
instance->flushing = 0;
return (res);
}
int totemudp_send_flush (void *udp_context)
{
return 0;
}
int totemudp_token_send (
void *udp_context,
const void *msg,
unsigned int msg_len)
{
struct totemudp_instance *instance = (struct totemudp_instance *)udp_context;
int res = 0;
ucast_sendmsg (instance, &instance->token_target, msg, msg_len);
return (res);
}
int totemudp_mcast_flush_send (
void *udp_context,
const void *msg,
unsigned int msg_len)
{
struct totemudp_instance *instance = (struct totemudp_instance *)udp_context;
int res = 0;
mcast_sendmsg (instance, msg, msg_len);
return (res);
}
int totemudp_mcast_noflush_send (
void *udp_context,
const void *msg,
unsigned int msg_len)
{
struct totemudp_instance *instance = (struct totemudp_instance *)udp_context;
int res = 0;
mcast_sendmsg (instance, msg, msg_len);
return (res);
}
extern int totemudp_iface_check (void *udp_context)
{
struct totemudp_instance *instance = (struct totemudp_instance *)udp_context;
int res = 0;
timer_function_netif_check_timeout (instance);
return (res);
}
extern void totemudp_net_mtu_adjust (void *udp_context, struct totem_config *totem_config)
{
#define UDPIP_HEADER_SIZE (20 + 8) /* 20 bytes for ip 8 bytes for udp */
if (totem_config->secauth == 1) {
totem_config->net_mtu -= sizeof (struct security_header) +
UDPIP_HEADER_SIZE;
} else {
totem_config->net_mtu -= UDPIP_HEADER_SIZE;
}
}
const char *totemudp_iface_print (void *udp_context) {
struct totemudp_instance *instance = (struct totemudp_instance *)udp_context;
const char *ret_char;
ret_char = totemip_print (&instance->my_id);
return (ret_char);
}
int totemudp_iface_get (
void *udp_context,
struct totem_ip_address *addr)
{
struct totemudp_instance *instance = (struct totemudp_instance *)udp_context;
int res = 0;
memcpy (addr, &instance->my_id, sizeof (struct totem_ip_address));
return (res);
}
int totemudp_token_target_set (
void *udp_context,
const struct totem_ip_address *token_target)
{
struct totemudp_instance *instance = (struct totemudp_instance *)udp_context;
int res = 0;
memcpy (&instance->token_target, token_target,
sizeof (struct totem_ip_address));
instance->totemudp_target_set_completed (instance->context);
return (res);
}
extern int totemudp_recv_mcast_empty (
void *udp_context)
{
struct totemudp_instance *instance = (struct totemudp_instance *)udp_context;
unsigned int res;
struct sockaddr_storage system_from;
struct msghdr msg_recv;
struct pollfd ufd;
int nfds;
int msg_processed = 0;
/*
* Receive datagram
*/
msg_recv.msg_name = &system_from;
msg_recv.msg_namelen = sizeof (struct sockaddr_storage);
msg_recv.msg_iov = &instance->totemudp_iov_recv_flush;
msg_recv.msg_iovlen = 1;
#if !defined(COROSYNC_SOLARIS)
msg_recv.msg_control = 0;
msg_recv.msg_controllen = 0;
msg_recv.msg_flags = 0;
#else
msg_recv.msg_accrights = NULL;
msg_recv.msg_accrightslen = 0;
#endif
do {
ufd.fd = instance->totemudp_sockets.mcast_recv;
ufd.events = POLLIN;
nfds = poll (&ufd, 1, 0);
if (nfds == 1 && ufd.revents & POLLIN) {
res = recvmsg (instance->totemudp_sockets.mcast_recv, &msg_recv, MSG_NOSIGNAL | MSG_DONTWAIT);
if (res != -1) {
msg_processed = 1;
} else {
msg_processed = -1;
}
}
} while (nfds == 1);
return (msg_processed);
}
diff --git a/exec/totemudpu.c b/exec/totemudpu.c
index 8ef90bbc..529c3627 100644
--- a/exec/totemudpu.c
+++ b/exec/totemudpu.c
@@ -1,1712 +1,1720 @@
/*
* Copyright (c) 2005 MontaVista Software, Inc.
* Copyright (c) 2006-2009 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Steven Dake (sdake@redhat.com)
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <config.h>
#include <assert.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/socket.h>
#include <netdb.h>
#include <sys/un.h>
#include <sys/ioctl.h>
#include <sys/param.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <unistd.h>
#include <fcntl.h>
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
#include <sched.h>
#include <time.h>
#include <sys/time.h>
#include <sys/poll.h>
#include <limits.h>
#include <qb/qbdefs.h>
#include <qb/qbloop.h>
#include <corosync/sq.h>
#include <corosync/list.h>
#include <corosync/hdb.h>
#include <corosync/swab.h>
#define LOGSYS_UTILS_ONLY 1
#include <corosync/engine/logsys.h>
#include "totemudpu.h"
#include "crypto.h"
#include "util.h"
#ifdef HAVE_LIBNSS
#include <nss.h>
#include <pk11pub.h>
#include <pkcs11.h>
#include <prerror.h>
#endif
#ifndef MSG_NOSIGNAL
#define MSG_NOSIGNAL 0
#endif
#define MCAST_SOCKET_BUFFER_SIZE (TRANSMITS_ALLOWED * FRAME_SIZE_MAX)
#define NETIF_STATE_REPORT_UP 1
#define NETIF_STATE_REPORT_DOWN 2
#define BIND_STATE_UNBOUND 0
#define BIND_STATE_REGULAR 1
#define BIND_STATE_LOOPBACK 2
#define HMAC_HASH_SIZE 20
struct security_header {
unsigned char hash_digest[HMAC_HASH_SIZE]; /* The hash *MUST* be first in the data structure */
unsigned char salt[16]; /* random number */
char msg[0];
} __attribute__((packed));
struct totemudpu_member {
struct list_head list;
struct totem_ip_address member;
int fd;
};
struct totemudpu_instance {
hmac_state totemudpu_hmac_state;
prng_state totemudpu_prng_state;
#ifdef HAVE_LIBNSS
PK11SymKey *nss_sym_key;
PK11SymKey *nss_sym_key_sign;
#endif
unsigned char totemudpu_private_key[1024];
unsigned int totemudpu_private_key_len;
qb_loop_t *totemudpu_poll_handle;
struct totem_interface *totem_interface;
int netif_state_report;
int netif_bind_state;
void *context;
void (*totemudpu_deliver_fn) (
void *context,
const void *msg,
unsigned int msg_len);
void (*totemudpu_iface_change_fn) (
void *context,
const struct totem_ip_address *iface_address);
void (*totemudpu_target_set_completed) (void *context);
/*
* Function and data used to log messages
*/
int totemudpu_log_level_security;
int totemudpu_log_level_error;
int totemudpu_log_level_warning;
int totemudpu_log_level_notice;
int totemudpu_log_level_debug;
int totemudpu_subsys_id;
void (*totemudpu_log_printf) (
- unsigned int rec_ident,
+ int level,
+ int subsys,
const char *function,
const char *file,
int line,
const char *format,
- ...)__attribute__((format(printf, 5, 6)));
+ ...)__attribute__((format(printf, 6, 7)));
void *udpu_context;
char iov_buffer[FRAME_SIZE_MAX];
struct iovec totemudpu_iov_recv;
struct list_head member_list;
int stats_sent;
int stats_recv;
int stats_delv;
int stats_remcasts;
int stats_orf_token;
struct timeval stats_tv_start;
struct totem_ip_address my_id;
int firstrun;
qb_loop_timer_handle timer_netif_check_timeout;
unsigned int my_memb_entries;
struct totem_config *totem_config;
struct totem_ip_address token_target;
int token_socket;
};
struct work_item {
const void *msg;
unsigned int msg_len;
struct totemudpu_instance *instance;
};
static int totemudpu_build_sockets (
struct totemudpu_instance *instance,
struct totem_ip_address *bindnet_address,
struct totem_ip_address *bound_to);
static struct totem_ip_address localhost;
static void totemudpu_instance_initialize (struct totemudpu_instance *instance)
{
memset (instance, 0, sizeof (struct totemudpu_instance));
instance->netif_state_report = NETIF_STATE_REPORT_UP | NETIF_STATE_REPORT_DOWN;
instance->totemudpu_iov_recv.iov_base = instance->iov_buffer;
instance->totemudpu_iov_recv.iov_len = FRAME_SIZE_MAX; //sizeof (instance->iov_buffer);
/*
* There is always atleast 1 processor
*/
instance->my_memb_entries = 1;
list_init (&instance->member_list);
}
-#define log_printf(level, format, args...) \
-do { \
- instance->totemudpu_log_printf ( \
- LOGSYS_ENCODE_RECID(level, \
- instance->totemudpu_subsys_id, \
- LOGSYS_RECID_LOG), \
- __FUNCTION__, __FILE__, __LINE__, \
- (const char *)format, ##args); \
+#define log_printf(level, format, args...) \
+do { \
+ instance->totemudpu_log_printf ( \
+ level, instance->totemudpu_subsys_id, \
+ __FUNCTION__, __FILE__, __LINE__, \
+ (const char *)format, ##args); \
} while (0);
+#define LOGSYS_PERROR(err_num, level, fmt, args...) \
+do { \
+ char _error_str[LOGSYS_MAX_PERROR_MSG_LEN]; \
+ const char *_error_ptr = qb_strerror_r(err_num, _error_str, sizeof(_error_str)); \
+ instance->totemudpu_log_printf ( \
+ level, instance->totemudpu_subsys_id, \
+ __FUNCTION__, __FILE__, __LINE__, \
+ fmt ": %s (%d)\n", ##args, _error_ptr, err_num); \
+ } while(0)
static int authenticate_and_decrypt_sober (
struct totemudpu_instance *instance,
struct iovec *iov,
unsigned int iov_len)
{
unsigned char keys[48];
struct security_header *header = (struct security_header *)iov[0].iov_base;
prng_state keygen_prng_state;
prng_state stream_prng_state;
unsigned char *hmac_key = &keys[32];
unsigned char *cipher_key = &keys[16];
unsigned char *initial_vector = &keys[0];
unsigned char digest_comparison[HMAC_HASH_SIZE];
unsigned long len;
/*
* Generate MAC, CIPHER, IV keys from private key
*/
memset (keys, 0, sizeof (keys));
sober128_start (&keygen_prng_state);
sober128_add_entropy (instance->totemudpu_private_key,
instance->totemudpu_private_key_len, &keygen_prng_state);
sober128_add_entropy (header->salt, sizeof (header->salt), &keygen_prng_state);
sober128_read (keys, sizeof (keys), &keygen_prng_state);
/*
* Setup stream cipher
*/
sober128_start (&stream_prng_state);
sober128_add_entropy (cipher_key, 16, &stream_prng_state);
sober128_add_entropy (initial_vector, 16, &stream_prng_state);
/*
* Authenticate contents of message
*/
hmac_init (&instance->totemudpu_hmac_state, DIGEST_SHA1, hmac_key, 16);
hmac_process (&instance->totemudpu_hmac_state,
(unsigned char *)iov->iov_base + HMAC_HASH_SIZE,
iov->iov_len - HMAC_HASH_SIZE);
len = hash_descriptor[DIGEST_SHA1]->hashsize;
assert (HMAC_HASH_SIZE >= len);
hmac_done (&instance->totemudpu_hmac_state, digest_comparison, &len);
if (memcmp (digest_comparison, header->hash_digest, len) != 0) {
return (-1);
}
/*
* Decrypt the contents of the message with the cipher key
*/
sober128_read ((unsigned char*)iov->iov_base +
sizeof (struct security_header),
iov->iov_len - sizeof (struct security_header),
&stream_prng_state);
return (0);
}
static void init_sober_crypto(
struct totemudpu_instance *instance)
{
log_printf(instance->totemudpu_log_level_notice,
"Initializing transmit/receive security: libtomcrypt SOBER128/SHA1HMAC (mode 0).\n");
rng_make_prng (128, PRNG_SOBER, &instance->totemudpu_prng_state, NULL);
}
#ifdef HAVE_LIBNSS
static unsigned char *copy_from_iovec(
const struct iovec *iov,
unsigned int iov_len,
size_t *buf_size)
{
int i;
size_t bufptr;
size_t buflen = 0;
unsigned char *newbuf;
for (i=0; i<iov_len; i++)
buflen += iov[i].iov_len;
newbuf = malloc(buflen);
if (!newbuf)
return NULL;
bufptr=0;
for (i=0; i<iov_len; i++) {
memcpy(newbuf+bufptr, iov[i].iov_base, iov[i].iov_len);
bufptr += iov[i].iov_len;
}
*buf_size = buflen;
return newbuf;
}
static void copy_to_iovec(
struct iovec *iov,
unsigned int iov_len,
const unsigned char *buf,
size_t buf_size)
{
int i;
size_t copylen;
size_t bufptr = 0;
bufptr=0;
for (i=0; i<iov_len; i++) {
copylen = iov[i].iov_len;
if (bufptr + copylen > buf_size) {
copylen = buf_size - bufptr;
}
memcpy(iov[i].iov_base, buf+bufptr, copylen);
bufptr += copylen;
if (iov[i].iov_len != copylen) {
iov[i].iov_len = copylen;
return;
}
}
}
static void init_nss_crypto(
struct totemudpu_instance *instance)
{
PK11SlotInfo* aes_slot = NULL;
PK11SlotInfo* sha1_slot = NULL;
SECItem key_item;
SECStatus rv;
log_printf(instance->totemudpu_log_level_notice,
"Initializing transmit/receive security: NSS AES128CBC/SHA1HMAC (mode 1).\n");
rv = NSS_NoDB_Init(".");
if (rv != SECSuccess)
{
log_printf(instance->totemudpu_log_level_security, "NSS initialization failed (err %d)\n",
PR_GetError());
goto out;
}
aes_slot = PK11_GetBestSlot(instance->totem_config->crypto_crypt_type, NULL);
if (aes_slot == NULL)
{
log_printf(instance->totemudpu_log_level_security, "Unable to find security slot (err %d)\n",
PR_GetError());
goto out;
}
sha1_slot = PK11_GetBestSlot(CKM_SHA_1_HMAC, NULL);
if (sha1_slot == NULL)
{
log_printf(instance->totemudpu_log_level_security, "Unable to find security slot (err %d)\n",
PR_GetError());
goto out;
}
/*
* Make the private key into a SymKey that we can use
*/
key_item.type = siBuffer;
key_item.data = instance->totem_config->private_key;
key_item.len = 32; /* Use 128 bits */
instance->nss_sym_key = PK11_ImportSymKey(aes_slot,
instance->totem_config->crypto_crypt_type,
PK11_OriginUnwrap, CKA_ENCRYPT|CKA_DECRYPT,
&key_item, NULL);
if (instance->nss_sym_key == NULL)
{
log_printf(instance->totemudpu_log_level_security, "Failure to import key into NSS (err %d)\n",
PR_GetError());
goto out;
}
instance->nss_sym_key_sign = PK11_ImportSymKey(sha1_slot,
CKM_SHA_1_HMAC,
PK11_OriginUnwrap, CKA_SIGN,
&key_item, NULL);
if (instance->nss_sym_key_sign == NULL) {
log_printf(instance->totemudpu_log_level_security, "Failure to import key into NSS (err %d)\n",
PR_GetError());
goto out;
}
out:
return;
}
static int encrypt_and_sign_nss (
struct totemudpu_instance *instance,
unsigned char *buf,
size_t *buf_len,
const struct iovec *iovec,
unsigned int iov_len)
{
PK11Context* enc_context = NULL;
SECStatus rv1, rv2;
int tmp1_outlen;
unsigned int tmp2_outlen;
unsigned char *inbuf;
unsigned char *data;
unsigned char *outdata;
size_t datalen;
SECItem no_params;
SECItem iv_item;
struct security_header *header;
SECItem *nss_sec_param;
unsigned char nss_iv_data[16];
SECStatus rv;
no_params.type = siBuffer;
no_params.data = 0;
no_params.len = 0;
tmp1_outlen = tmp2_outlen = 0;
inbuf = copy_from_iovec(iovec, iov_len, &datalen);
if (!inbuf) {
log_printf(instance->totemudpu_log_level_security, "malloc error copying buffer from iovec\n");
return -1;
}
data = inbuf + sizeof (struct security_header);
datalen -= sizeof (struct security_header);
outdata = buf + sizeof (struct security_header);
header = (struct security_header *)buf;
rv = PK11_GenerateRandom (
nss_iv_data,
sizeof (nss_iv_data));
if (rv != SECSuccess) {
log_printf(instance->totemudpu_log_level_security,
"Failure to generate a random number %d\n",
PR_GetError());
}
memcpy(header->salt, nss_iv_data, sizeof(nss_iv_data));
iv_item.type = siBuffer;
iv_item.data = nss_iv_data;
iv_item.len = sizeof (nss_iv_data);
nss_sec_param = PK11_ParamFromIV (
instance->totem_config->crypto_crypt_type,
&iv_item);
if (nss_sec_param == NULL) {
log_printf(instance->totemudpu_log_level_security,
"Failure to set up PKCS11 param (err %d)\n",
PR_GetError());
free (inbuf);
return (-1);
}
/*
* Create cipher context for encryption
*/
enc_context = PK11_CreateContextBySymKey (
instance->totem_config->crypto_crypt_type,
CKA_ENCRYPT,
instance->nss_sym_key,
nss_sec_param);
if (!enc_context) {
char err[1024];
PR_GetErrorText(err);
err[PR_GetErrorTextLength()] = 0;
log_printf(instance->totemudpu_log_level_security,
"PK11_CreateContext failed (encrypt) crypt_type=%d (err %d): %s\n",
instance->totem_config->crypto_crypt_type,
PR_GetError(), err);
free(inbuf);
return -1;
}
rv1 = PK11_CipherOp(enc_context, outdata,
&tmp1_outlen, FRAME_SIZE_MAX - sizeof(struct security_header),
data, datalen);
rv2 = PK11_DigestFinal(enc_context, outdata + tmp1_outlen, &tmp2_outlen,
FRAME_SIZE_MAX - tmp1_outlen);
PK11_DestroyContext(enc_context, PR_TRUE);
*buf_len = tmp1_outlen + tmp2_outlen;
free(inbuf);
// memcpy(&outdata[*buf_len], nss_iv_data, sizeof(nss_iv_data));
if (rv1 != SECSuccess || rv2 != SECSuccess)
goto out;
/* Now do the digest */
enc_context = PK11_CreateContextBySymKey(CKM_SHA_1_HMAC,
CKA_SIGN, instance->nss_sym_key_sign, &no_params);
if (!enc_context) {
char err[1024];
PR_GetErrorText(err);
err[PR_GetErrorTextLength()] = 0;
log_printf(instance->totemudpu_log_level_security, "encrypt: PK11_CreateContext failed (digest) err %d: %s\n",
PR_GetError(), err);
return -1;
}
PK11_DigestBegin(enc_context);
rv1 = PK11_DigestOp(enc_context, outdata - 16, *buf_len + 16);
rv2 = PK11_DigestFinal(enc_context, header->hash_digest, &tmp2_outlen, sizeof(header->hash_digest));
PK11_DestroyContext(enc_context, PR_TRUE);
if (rv1 != SECSuccess || rv2 != SECSuccess)
goto out;
*buf_len = *buf_len + sizeof(struct security_header);
SECITEM_FreeItem(nss_sec_param, PR_TRUE);
return 0;
out:
return -1;
}
static int authenticate_and_decrypt_nss (
struct totemudpu_instance *instance,
struct iovec *iov,
unsigned int iov_len)
{
PK11Context* enc_context = NULL;
SECStatus rv1, rv2;
int tmp1_outlen;
unsigned int tmp2_outlen;
unsigned char outbuf[FRAME_SIZE_MAX];
unsigned char digest[HMAC_HASH_SIZE];
unsigned char *outdata;
int result_len;
unsigned char *data;
unsigned char *inbuf;
size_t datalen;
struct security_header *header = (struct security_header *)iov[0].iov_base;
SECItem no_params;
SECItem ivdata;
no_params.type = siBuffer;
no_params.data = 0;
no_params.len = 0;
tmp1_outlen = tmp2_outlen = 0;
if (iov_len > 1) {
inbuf = copy_from_iovec(iov, iov_len, &datalen);
if (!inbuf) {
log_printf(instance->totemudpu_log_level_security, "malloc error copying buffer from iovec\n");
return -1;
}
}
else {
inbuf = (unsigned char *)iov[0].iov_base;
datalen = iov[0].iov_len;
}
data = inbuf + sizeof (struct security_header) - 16;
datalen = datalen - sizeof (struct security_header) + 16;
outdata = outbuf + sizeof (struct security_header);
/* Check the digest */
enc_context = PK11_CreateContextBySymKey (
CKM_SHA_1_HMAC, CKA_SIGN,
instance->nss_sym_key_sign,
&no_params);
if (!enc_context) {
char err[1024];
PR_GetErrorText(err);
err[PR_GetErrorTextLength()] = 0;
log_printf(instance->totemudpu_log_level_security, "PK11_CreateContext failed (check digest) err %d: %s\n",
PR_GetError(), err);
free (inbuf);
return -1;
}
PK11_DigestBegin(enc_context);
rv1 = PK11_DigestOp(enc_context, data, datalen);
rv2 = PK11_DigestFinal(enc_context, digest, &tmp2_outlen, sizeof(digest));
PK11_DestroyContext(enc_context, PR_TRUE);
if (rv1 != SECSuccess || rv2 != SECSuccess) {
log_printf(instance->totemudpu_log_level_security, "Digest check failed\n");
return -1;
}
if (memcmp(digest, header->hash_digest, tmp2_outlen) != 0) {
log_printf(instance->totemudpu_log_level_error, "Digest does not match\n");
return -1;
}
/*
* Get rid of salt
*/
data += 16;
datalen -= 16;
/* Create cipher context for decryption */
ivdata.type = siBuffer;
ivdata.data = header->salt;
ivdata.len = sizeof(header->salt);
enc_context = PK11_CreateContextBySymKey(
instance->totem_config->crypto_crypt_type,
CKA_DECRYPT,
instance->nss_sym_key, &ivdata);
if (!enc_context) {
log_printf(instance->totemudpu_log_level_security,
"PK11_CreateContext (decrypt) failed (err %d)\n",
PR_GetError());
return -1;
}
rv1 = PK11_CipherOp(enc_context, outdata, &tmp1_outlen,
sizeof(outbuf) - sizeof (struct security_header),
data, datalen);
if (rv1 != SECSuccess) {
log_printf(instance->totemudpu_log_level_security,
"PK11_CipherOp (decrypt) failed (err %d)\n",
PR_GetError());
}
rv2 = PK11_DigestFinal(enc_context, outdata + tmp1_outlen, &tmp2_outlen,
sizeof(outbuf) - tmp1_outlen);
PK11_DestroyContext(enc_context, PR_TRUE);
result_len = tmp1_outlen + tmp2_outlen + sizeof (struct security_header);
/* Copy it back to the buffer */
copy_to_iovec(iov, iov_len, outbuf, result_len);
if (iov_len > 1)
free(inbuf);
if (rv1 != SECSuccess || rv2 != SECSuccess)
return -1;
return 0;
}
#endif
static int encrypt_and_sign_sober (
struct totemudpu_instance *instance,
unsigned char *buf,
size_t *buf_len,
const struct iovec *iovec,
unsigned int iov_len)
{
int i;
unsigned char *addr;
unsigned char keys[48];
struct security_header *header;
unsigned char *hmac_key = &keys[32];
unsigned char *cipher_key = &keys[16];
unsigned char *initial_vector = &keys[0];
unsigned long len;
size_t outlen = 0;
hmac_state hmac_st;
prng_state keygen_prng_state;
prng_state stream_prng_state;
prng_state *prng_state_in = &instance->totemudpu_prng_state;
header = (struct security_header *)buf;
addr = buf + sizeof (struct security_header);
memset (keys, 0, sizeof (keys));
memset (header->salt, 0, sizeof (header->salt));
/*
* Generate MAC, CIPHER, IV keys from private key
*/
sober128_read (header->salt, sizeof (header->salt), prng_state_in);
sober128_start (&keygen_prng_state);
sober128_add_entropy (instance->totemudpu_private_key,
instance->totemudpu_private_key_len,
&keygen_prng_state);
sober128_add_entropy (header->salt, sizeof (header->salt),
&keygen_prng_state);
sober128_read (keys, sizeof (keys), &keygen_prng_state);
/*
* Setup stream cipher
*/
sober128_start (&stream_prng_state);
sober128_add_entropy (cipher_key, 16, &stream_prng_state);
sober128_add_entropy (initial_vector, 16, &stream_prng_state);
outlen = sizeof (struct security_header);
/*
* Copy remainder of message, then encrypt it
*/
for (i = 1; i < iov_len; i++) {
memcpy (addr, iovec[i].iov_base, iovec[i].iov_len);
addr += iovec[i].iov_len;
outlen += iovec[i].iov_len;
}
/*
* Encrypt message by XORing stream cipher data
*/
sober128_read (buf + sizeof (struct security_header),
outlen - sizeof (struct security_header),
&stream_prng_state);
memset (&hmac_st, 0, sizeof (hmac_st));
/*
* Sign the contents of the message with the hmac key and store signature in message
*/
hmac_init (&hmac_st, DIGEST_SHA1, hmac_key, 16);
hmac_process (&hmac_st,
buf + HMAC_HASH_SIZE,
outlen - HMAC_HASH_SIZE);
len = hash_descriptor[DIGEST_SHA1]->hashsize;
hmac_done (&hmac_st, header->hash_digest, &len);
*buf_len = outlen;
return 0;
}
static int encrypt_and_sign_worker (
struct totemudpu_instance *instance,
unsigned char *buf,
size_t *buf_len,
const struct iovec *iovec,
unsigned int iov_len)
{
if (instance->totem_config->crypto_type == TOTEM_CRYPTO_SOBER ||
instance->totem_config->crypto_accept == TOTEM_CRYPTO_ACCEPT_OLD)
return encrypt_and_sign_sober(instance, buf, buf_len, iovec, iov_len);
#ifdef HAVE_LIBNSS
if (instance->totem_config->crypto_type == TOTEM_CRYPTO_NSS)
return encrypt_and_sign_nss(instance, buf, buf_len, iovec, iov_len);
#endif
return -1;
}
static int authenticate_and_decrypt (
struct totemudpu_instance *instance,
struct iovec *iov,
unsigned int iov_len)
{
unsigned char type;
unsigned char *endbuf = (unsigned char *)iov[iov_len-1].iov_base;
int res = -1;
/*
* Get the encryption type and remove it from the buffer
*/
type = endbuf[iov[iov_len-1].iov_len-1];
iov[iov_len-1].iov_len -= 1;
if (type == TOTEM_CRYPTO_SOBER)
res = authenticate_and_decrypt_sober(instance, iov, iov_len);
/*
* Only try higher crypto options if NEW has been requested
*/
if (instance->totem_config->crypto_accept == TOTEM_CRYPTO_ACCEPT_NEW) {
#ifdef HAVE_LIBNSS
if (type == TOTEM_CRYPTO_NSS)
res = authenticate_and_decrypt_nss(instance, iov, iov_len);
#endif
}
/*
* If it failed, then try decrypting the whole packet as it might be
* from aisexec
*/
if (res == -1) {
iov[iov_len-1].iov_len += 1;
res = authenticate_and_decrypt_sober(instance, iov, iov_len);
}
return res;
}
static void init_crypto(
struct totemudpu_instance *instance)
{
/*
* If we are expecting NEW crypto type then initialise all available
* crypto options. For OLD then we only need SOBER128.
*/
init_sober_crypto(instance);
if (instance->totem_config->crypto_accept == TOTEM_CRYPTO_ACCEPT_OLD)
return;
#ifdef HAVE_LIBNSS
init_nss_crypto(instance);
#endif
}
int totemudpu_crypto_set (
void *udpu_context,
unsigned int type)
{
struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context;
int res = 0;
/*
* Can't set crypto type if OLD is selected
*/
if (instance->totem_config->crypto_accept == TOTEM_CRYPTO_ACCEPT_OLD) {
res = -1;
} else {
/*
* Validate crypto algorithm
*/
switch (type) {
case TOTEM_CRYPTO_SOBER:
log_printf(instance->totemudpu_log_level_security,
"Transmit security set to: libtomcrypt SOBER128/SHA1HMAC (mode 0)");
break;
case TOTEM_CRYPTO_NSS:
log_printf(instance->totemudpu_log_level_security,
"Transmit security set to: NSS AES128CBC/SHA1HMAC (mode 1)");
break;
default:
res = -1;
break;
}
}
return (res);
}
static inline void ucast_sendmsg (
struct totemudpu_instance *instance,
struct totem_ip_address *system_to,
const void *msg,
unsigned int msg_len)
{
struct msghdr msg_ucast;
int res = 0;
size_t buf_len;
unsigned char sheader[sizeof (struct security_header)];
unsigned char encrypt_data[FRAME_SIZE_MAX];
struct iovec iovec_encrypt[2];
const struct iovec *iovec_sendmsg;
struct sockaddr_storage sockaddr;
struct iovec iovec;
unsigned int iov_len;
int addrlen;
if (instance->totem_config->secauth == 1) {
iovec_encrypt[0].iov_base = (void *)sheader;
iovec_encrypt[0].iov_len = sizeof (struct security_header);
iovec_encrypt[1].iov_base = (void *)msg;
iovec_encrypt[1].iov_len = msg_len;
/*
* Encrypt and digest the message
*/
encrypt_and_sign_worker (
instance,
encrypt_data,
&buf_len,
iovec_encrypt,
2);
if (instance->totem_config->crypto_accept == TOTEM_CRYPTO_ACCEPT_NEW) {
encrypt_data[buf_len++] = instance->totem_config->crypto_type;
}
else {
encrypt_data[buf_len++] = 0;
}
iovec_encrypt[0].iov_base = (void *)encrypt_data;
iovec_encrypt[0].iov_len = buf_len;
iovec_sendmsg = &iovec_encrypt[0];
iov_len = 1;
} else {
iovec.iov_base = (void *)msg;
iovec.iov_len = msg_len;
iovec_sendmsg = &iovec;
iov_len = 1;
}
/*
* Build unicast message
*/
totemip_totemip_to_sockaddr_convert(system_to,
instance->totem_interface->ip_port, &sockaddr, &addrlen);
msg_ucast.msg_name = &sockaddr;
msg_ucast.msg_namelen = addrlen;
msg_ucast.msg_iov = (void *) iovec_sendmsg;
msg_ucast.msg_iovlen = iov_len;
#if !defined(COROSYNC_SOLARIS)
msg_ucast.msg_control = 0;
msg_ucast.msg_controllen = 0;
msg_ucast.msg_flags = 0;
#else
msg_ucast.msg_accrights = NULL;
msg_ucast.msg_accrightslen = 0;
#endif
/*
* Transmit unicast message
* An error here is recovered by totemsrp
*/
res = sendmsg (instance->token_socket, &msg_ucast, MSG_NOSIGNAL);
if (res < 0) {
LOGSYS_PERROR (errno, instance->totemudpu_log_level_debug,
"sendmsg(ucast) failed (non-critical)");
}
}
static inline void mcast_sendmsg (
struct totemudpu_instance *instance,
const void *msg,
unsigned int msg_len)
{
struct msghdr msg_mcast;
int res = 0;
size_t buf_len;
unsigned char sheader[sizeof (struct security_header)];
unsigned char encrypt_data[FRAME_SIZE_MAX];
struct iovec iovec_encrypt[2];
struct iovec iovec;
const struct iovec *iovec_sendmsg;
struct sockaddr_storage sockaddr;
unsigned int iov_len;
int addrlen;
struct list_head *list;
struct totemudpu_member *member;
if (instance->totem_config->secauth == 1) {
iovec_encrypt[0].iov_base = (void *)sheader;
iovec_encrypt[0].iov_len = sizeof (struct security_header);
iovec_encrypt[1].iov_base = (void *)msg;
iovec_encrypt[1].iov_len = msg_len;
/*
* Encrypt and digest the message
*/
encrypt_and_sign_worker (
instance,
encrypt_data,
&buf_len,
iovec_encrypt,
2);
if (instance->totem_config->crypto_accept == TOTEM_CRYPTO_ACCEPT_NEW) {
encrypt_data[buf_len++] = instance->totem_config->crypto_type;
}
else {
encrypt_data[buf_len++] = 0;
}
iovec_encrypt[0].iov_base = (void *)encrypt_data;
iovec_encrypt[0].iov_len = buf_len;
iovec_sendmsg = &iovec_encrypt[0];
iov_len = 1;
} else {
iovec.iov_base = (void *)msg;
iovec.iov_len = msg_len;
iovec_sendmsg = &iovec;
iov_len = 1;
}
/*
* Build multicast message
*/
for (list = instance->member_list.next;
list != &instance->member_list;
list = list->next) {
member = list_entry (list,
struct totemudpu_member,
list);
totemip_totemip_to_sockaddr_convert(&member->member,
instance->totem_interface->ip_port, &sockaddr, &addrlen);
msg_mcast.msg_name = &sockaddr;
msg_mcast.msg_namelen = addrlen;
msg_mcast.msg_iov = (void *) iovec_sendmsg;
msg_mcast.msg_iovlen = iov_len;
#if !defined(COROSYNC_SOLARIS)
msg_mcast.msg_control = 0;
msg_mcast.msg_controllen = 0;
msg_mcast.msg_flags = 0;
#else
msg_mcast.msg_accrights = NULL;
msg_mcast.msg_accrightslen = 0;
#endif
/*
* Transmit multicast message
* An error here is recovered by totemsrp
*/
res = sendmsg (member->fd, &msg_mcast, MSG_NOSIGNAL);
if (res < 0) {
LOGSYS_PERROR (errno, instance->totemudpu_log_level_debug,
"sendmsg(mcast) failed (non-critical)");
}
}
}
int totemudpu_finalize (
void *udpu_context)
{
struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context;
int res = 0;
if (instance->token_socket > 0) {
close (instance->token_socket);
qb_loop_poll_del (instance->totemudpu_poll_handle,
instance->token_socket);
}
return (res);
}
static int net_deliver_fn (
int fd,
int revents,
void *data)
{
struct totemudpu_instance *instance = (struct totemudpu_instance *)data;
struct msghdr msg_recv;
struct iovec *iovec;
struct sockaddr_storage system_from;
int bytes_received;
int res = 0;
unsigned char *msg_offset;
unsigned int size_delv;
iovec = &instance->totemudpu_iov_recv;
/*
* Receive datagram
*/
msg_recv.msg_name = &system_from;
msg_recv.msg_namelen = sizeof (struct sockaddr_storage);
msg_recv.msg_iov = iovec;
msg_recv.msg_iovlen = 1;
#if !defined(COROSYNC_SOLARIS)
msg_recv.msg_control = 0;
msg_recv.msg_controllen = 0;
msg_recv.msg_flags = 0;
#else
msg_recv.msg_accrights = NULL;
msg_recv.msg_accrightslen = 0;
#endif
bytes_received = recvmsg (fd, &msg_recv, MSG_NOSIGNAL | MSG_DONTWAIT);
if (bytes_received == -1) {
return (0);
} else {
instance->stats_recv += bytes_received;
}
if ((instance->totem_config->secauth == 1) &&
(bytes_received < sizeof (struct security_header))) {
log_printf (instance->totemudpu_log_level_security, "Received message is too short... ignoring %d.\n", bytes_received);
return (0);
}
iovec->iov_len = bytes_received;
if (instance->totem_config->secauth == 1) {
/*
* Authenticate and if authenticated, decrypt datagram
*/
res = authenticate_and_decrypt (instance, iovec, 1);
if (res == -1) {
log_printf (instance->totemudpu_log_level_security, "Received message has invalid digest... ignoring.\n");
log_printf (instance->totemudpu_log_level_security,
"Invalid packet data\n");
iovec->iov_len = FRAME_SIZE_MAX;
return 0;
}
msg_offset = (unsigned char *)iovec->iov_base +
sizeof (struct security_header);
size_delv = bytes_received - sizeof (struct security_header);
} else {
msg_offset = (void *)iovec->iov_base;
size_delv = bytes_received;
}
/*
* Handle incoming message
*/
instance->totemudpu_deliver_fn (
instance->context,
msg_offset,
size_delv);
iovec->iov_len = FRAME_SIZE_MAX;
return (0);
}
static int netif_determine (
struct totemudpu_instance *instance,
struct totem_ip_address *bindnet,
struct totem_ip_address *bound_to,
int *interface_up,
int *interface_num)
{
int res;
res = totemip_iface_check (bindnet, bound_to,
interface_up, interface_num,
instance->totem_config->clear_node_high_bit);
return (res);
}
/*
* If the interface is up, the sockets for totem are built. If the interface is down
* this function is requeued in the timer list to retry building the sockets later.
*/
static void timer_function_netif_check_timeout (
void *data)
{
struct totemudpu_instance *instance = (struct totemudpu_instance *)data;
int interface_up;
int interface_num;
struct totem_ip_address *bind_address;
/*
* Build sockets for every interface
*/
netif_determine (instance,
&instance->totem_interface->bindnet,
&instance->totem_interface->boundto,
&interface_up, &interface_num);
/*
* If the network interface isn't back up and we are already
* in loopback mode, add timer to check again and return
*/
if ((instance->netif_bind_state == BIND_STATE_LOOPBACK &&
interface_up == 0) ||
(instance->my_memb_entries == 1 &&
instance->netif_bind_state == BIND_STATE_REGULAR &&
interface_up == 1)) {
qb_loop_timer_add (instance->totemudpu_poll_handle,
QB_LOOP_MED,
instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC,
(void *)instance,
timer_function_netif_check_timeout,
&instance->timer_netif_check_timeout);
/*
* Add a timer to check for a downed regular interface
*/
return;
}
if (instance->token_socket > 0) {
close (instance->token_socket);
qb_loop_poll_del (instance->totemudpu_poll_handle,
instance->token_socket);
}
if (interface_up == 0) {
/*
* Interface is not up
*/
instance->netif_bind_state = BIND_STATE_LOOPBACK;
bind_address = &localhost;
/*
* Add a timer to retry building interfaces and request memb_gather_enter
*/
qb_loop_timer_add (instance->totemudpu_poll_handle,
QB_LOOP_MED,
instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC,
(void *)instance,
timer_function_netif_check_timeout,
&instance->timer_netif_check_timeout);
} else {
/*
* Interface is up
*/
instance->netif_bind_state = BIND_STATE_REGULAR;
bind_address = &instance->totem_interface->bindnet;
}
/*
* Create and bind the multicast and unicast sockets
*/
totemudpu_build_sockets (instance,
bind_address,
&instance->totem_interface->boundto);
qb_loop_poll_add (instance->totemudpu_poll_handle,
QB_LOOP_MED,
instance->token_socket,
POLLIN, instance, net_deliver_fn);
totemip_copy (&instance->my_id, &instance->totem_interface->boundto);
/*
* This reports changes in the interface to the user and totemsrp
*/
if (instance->netif_bind_state == BIND_STATE_REGULAR) {
if (instance->netif_state_report & NETIF_STATE_REPORT_UP) {
log_printf (instance->totemudpu_log_level_notice,
"The network interface [%s] is now up.\n",
totemip_print (&instance->totem_interface->boundto));
instance->netif_state_report = NETIF_STATE_REPORT_DOWN;
instance->totemudpu_iface_change_fn (instance->context, &instance->my_id);
}
/*
* Add a timer to check for interface going down in single membership
*/
if (instance->my_memb_entries == 1) {
qb_loop_timer_add (instance->totemudpu_poll_handle,
QB_LOOP_MED,
instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC,
(void *)instance,
timer_function_netif_check_timeout,
&instance->timer_netif_check_timeout);
}
} else {
if (instance->netif_state_report & NETIF_STATE_REPORT_DOWN) {
log_printf (instance->totemudpu_log_level_notice,
"The network interface is down.\n");
instance->totemudpu_iface_change_fn (instance->context, &instance->my_id);
}
instance->netif_state_report = NETIF_STATE_REPORT_UP;
}
}
/* Set the socket priority to INTERACTIVE to ensure
that our messages don't get queued behind anything else */
static void totemudpu_traffic_control_set(struct totemudpu_instance *instance, int sock)
{
#ifdef SO_PRIORITY
int prio = 6; /* TC_PRIO_INTERACTIVE */
if (setsockopt(sock, SOL_SOCKET, SO_PRIORITY, &prio, sizeof(int))) {
LOGSYS_PERROR (errno, instance->totemudpu_log_level_warning,
"Could not set traffic priority");
}
#endif
}
static int totemudpu_build_sockets_ip (
struct totemudpu_instance *instance,
struct totem_ip_address *bindnet_address,
struct totem_ip_address *bound_to,
int interface_num)
{
struct sockaddr_storage sockaddr;
int addrlen;
int res;
unsigned int recvbuf_size;
unsigned int optlen = sizeof (recvbuf_size);
/*
* Setup unicast socket
*/
instance->token_socket = socket (bindnet_address->family, SOCK_DGRAM, 0);
if (instance->token_socket == -1) {
LOGSYS_PERROR (errno, instance->totemudpu_log_level_warning,
"socket() failed");
return (-1);
}
totemip_nosigpipe (instance->token_socket);
res = fcntl (instance->token_socket, F_SETFL, O_NONBLOCK);
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudpu_log_level_warning,
"Could not set non-blocking operation on token socket");
return (-1);
}
/*
* Bind to unicast socket used for token send/receives
* This has the side effect of binding to the correct interface
*/
totemip_totemip_to_sockaddr_convert(bound_to, instance->totem_interface->ip_port, &sockaddr, &addrlen);
res = bind (instance->token_socket, (struct sockaddr *)&sockaddr, addrlen);
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudpu_log_level_warning,
"bind token socket failed");
return (-1);
}
/*
* the token_socket can receive many messages. Allow a large number
* of receive messages on this socket
*/
recvbuf_size = MCAST_SOCKET_BUFFER_SIZE;
res = setsockopt (instance->token_socket, SOL_SOCKET, SO_RCVBUF,
&recvbuf_size, optlen);
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudpu_log_level_notice,
"Could not set recvbuf size");
}
return 0;
}
static int totemudpu_build_sockets (
struct totemudpu_instance *instance,
struct totem_ip_address *bindnet_address,
struct totem_ip_address *bound_to)
{
int interface_num;
int interface_up;
int res;
/*
* Determine the ip address bound to and the interface name
*/
res = netif_determine (instance,
bindnet_address,
bound_to,
&interface_up,
&interface_num);
if (res == -1) {
return (-1);
}
totemip_copy(&instance->my_id, bound_to);
res = totemudpu_build_sockets_ip (instance,
bindnet_address, bound_to, interface_num);
/* We only send out of the token socket */
totemudpu_traffic_control_set(instance, instance->token_socket);
return res;
}
/*
* Totem Network interface - also does encryption/decryption
* depends on poll abstraction, POSIX, IPV4
*/
/*
* Create an instance
*/
int totemudpu_initialize (
qb_loop_t *poll_handle,
void **udpu_context,
struct totem_config *totem_config,
int interface_no,
void *context,
void (*deliver_fn) (
void *context,
const void *msg,
unsigned int msg_len),
void (*iface_change_fn) (
void *context,
const struct totem_ip_address *iface_address),
void (*target_set_completed) (
void *context))
{
struct totemudpu_instance *instance;
instance = malloc (sizeof (struct totemudpu_instance));
if (instance == NULL) {
return (-1);
}
totemudpu_instance_initialize (instance);
instance->totem_config = totem_config;
/*
* Configure logging
*/
instance->totemudpu_log_level_security = 1; //totem_config->totem_logging_configuration.log_level_security;
instance->totemudpu_log_level_error = totem_config->totem_logging_configuration.log_level_error;
instance->totemudpu_log_level_warning = totem_config->totem_logging_configuration.log_level_warning;
instance->totemudpu_log_level_notice = totem_config->totem_logging_configuration.log_level_notice;
instance->totemudpu_log_level_debug = totem_config->totem_logging_configuration.log_level_debug;
instance->totemudpu_subsys_id = totem_config->totem_logging_configuration.log_subsys_id;
instance->totemudpu_log_printf = totem_config->totem_logging_configuration.log_printf;
/*
* Initialize random number generator for later use to generate salt
*/
memcpy (instance->totemudpu_private_key, totem_config->private_key,
totem_config->private_key_len);
instance->totemudpu_private_key_len = totem_config->private_key_len;
init_crypto(instance);
/*
* Initialize local variables for totemudpu
*/
instance->totem_interface = &totem_config->interfaces[interface_no];
memset (instance->iov_buffer, 0, FRAME_SIZE_MAX);
instance->totemudpu_poll_handle = poll_handle;
instance->totem_interface->bindnet.nodeid = instance->totem_config->node_id;
instance->context = context;
instance->totemudpu_deliver_fn = deliver_fn;
instance->totemudpu_iface_change_fn = iface_change_fn;
instance->totemudpu_target_set_completed = target_set_completed;
totemip_localhost (AF_INET, &localhost);
localhost.nodeid = instance->totem_config->node_id;
/*
* RRP layer isn't ready to receive message because it hasn't
* initialized yet. Add short timer to check the interfaces.
*/
qb_loop_timer_add (instance->totemudpu_poll_handle,
QB_LOOP_MED,
100*QB_TIME_NS_IN_MSEC,
(void *)instance,
timer_function_netif_check_timeout,
&instance->timer_netif_check_timeout);
*udpu_context = instance;
return (0);
}
void *totemudpu_buffer_alloc (void)
{
return malloc (FRAME_SIZE_MAX);
}
void totemudpu_buffer_release (void *ptr)
{
return free (ptr);
}
int totemudpu_processor_count_set (
void *udpu_context,
int processor_count)
{
struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context;
int res = 0;
instance->my_memb_entries = processor_count;
qb_loop_timer_del (instance->totemudpu_poll_handle,
instance->timer_netif_check_timeout);
if (processor_count == 1) {
qb_loop_timer_add (instance->totemudpu_poll_handle,
QB_LOOP_MED,
instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC,
(void *)instance,
timer_function_netif_check_timeout,
&instance->timer_netif_check_timeout);
}
return (res);
}
int totemudpu_recv_flush (void *udpu_context)
{
int res = 0;
return (res);
}
int totemudpu_send_flush (void *udpu_context)
{
int res = 0;
return (res);
}
int totemudpu_token_send (
void *udpu_context,
const void *msg,
unsigned int msg_len)
{
struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context;
int res = 0;
ucast_sendmsg (instance, &instance->token_target, msg, msg_len);
return (res);
}
int totemudpu_mcast_flush_send (
void *udpu_context,
const void *msg,
unsigned int msg_len)
{
struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context;
int res = 0;
mcast_sendmsg (instance, msg, msg_len);
return (res);
}
int totemudpu_mcast_noflush_send (
void *udpu_context,
const void *msg,
unsigned int msg_len)
{
struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context;
int res = 0;
mcast_sendmsg (instance, msg, msg_len);
return (res);
}
extern int totemudpu_iface_check (void *udpu_context)
{
struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context;
int res = 0;
timer_function_netif_check_timeout (instance);
return (res);
}
extern void totemudpu_net_mtu_adjust (void *udpu_context, struct totem_config *totem_config)
{
#define UDPIP_HEADER_SIZE (20 + 8) /* 20 bytes for ip 8 bytes for udp */
if (totem_config->secauth == 1) {
totem_config->net_mtu -= sizeof (struct security_header) +
UDPIP_HEADER_SIZE;
} else {
totem_config->net_mtu -= UDPIP_HEADER_SIZE;
}
}
const char *totemudpu_iface_print (void *udpu_context) {
struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context;
const char *ret_char;
ret_char = totemip_print (&instance->my_id);
return (ret_char);
}
int totemudpu_iface_get (
void *udpu_context,
struct totem_ip_address *addr)
{
struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context;
int res = 0;
memcpy (addr, &instance->my_id, sizeof (struct totem_ip_address));
return (res);
}
int totemudpu_token_target_set (
void *udpu_context,
const struct totem_ip_address *token_target)
{
struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context;
int res = 0;
memcpy (&instance->token_target, token_target,
sizeof (struct totem_ip_address));
instance->totemudpu_target_set_completed (instance->context);
return (res);
}
extern int totemudpu_recv_mcast_empty (
void *udpu_context)
{
struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context;
unsigned int res;
struct sockaddr_storage system_from;
struct msghdr msg_recv;
struct pollfd ufd;
int nfds;
int msg_processed = 0;
/*
* Receive datagram
*/
msg_recv.msg_name = &system_from;
msg_recv.msg_namelen = sizeof (struct sockaddr_storage);
msg_recv.msg_iov = &instance->totemudpu_iov_recv;
msg_recv.msg_iovlen = 1;
#if !defined(COROSYNC_SOLARIS)
msg_recv.msg_control = 0;
msg_recv.msg_controllen = 0;
msg_recv.msg_flags = 0;
#else
msg_recv.msg_accrights = NULL;
msg_recv.msg_accrightslen = 0;
#endif
do {
ufd.fd = instance->token_socket;
ufd.events = POLLIN;
nfds = poll (&ufd, 1, 0);
if (nfds == 1 && ufd.revents & POLLIN) {
res = recvmsg (instance->token_socket, &msg_recv, MSG_NOSIGNAL | MSG_DONTWAIT);
if (res != -1) {
msg_processed = 1;
} else {
msg_processed = -1;
}
}
} while (nfds == 1);
return (msg_processed);
}
int totemudpu_member_add (
void *udpu_context,
const struct totem_ip_address *member)
{
struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context;
struct totemudpu_member *new_member;
int res;
unsigned int sendbuf_size;
unsigned int optlen = sizeof (sendbuf_size);
new_member = malloc (sizeof (struct totemudpu_member));
if (new_member == NULL) {
return (-1);
}
list_init (&new_member->list);
list_add_tail (&new_member->list, &instance->member_list);
memcpy (&new_member->member, member, sizeof (struct totem_ip_address));
new_member->fd = socket (member->family, SOCK_DGRAM, 0);
if (new_member->fd == -1) {
LOGSYS_PERROR (errno, instance->totemudpu_log_level_warning,
"Could not create socket for new member");
return (-1);
}
totemip_nosigpipe (new_member->fd);
res = fcntl (new_member->fd, F_SETFL, O_NONBLOCK);
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudpu_log_level_warning,
"Could not set non-blocking operation on token socket");
return (-1);
}
/*
* These sockets are used to send multicast messages, so their buffers
* should be large
*/
sendbuf_size = MCAST_SOCKET_BUFFER_SIZE;
res = setsockopt (new_member->fd, SOL_SOCKET, SO_SNDBUF,
&sendbuf_size, optlen);
if (res == -1) {
LOGSYS_PERROR (errno, instance->totemudpu_log_level_notice,
"Could not set sendbuf size");
}
return (0);
}
int totemudpu_member_remove (
void *udpu_context,
const struct totem_ip_address *token_target)
{
struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context;
instance = NULL;
return (0);
}
diff --git a/exec/util.c b/exec/util.c
index 6c7b60bc..16ba3c84 100644
--- a/exec/util.c
+++ b/exec/util.c
@@ -1,182 +1,182 @@
/*
* Copyright (c) 2002-2004 MontaVista Software, Inc.
* Copyright (c) 2004 Open Source Development Lab
* Copyright (c) 2006-2007, 2009 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Steven Dake (sdake@redhat.com), Mark Haverkamp (markh@osdl.org)
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <config.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <errno.h>
#include <sys/time.h>
#include <assert.h>
#include <corosync/corotypes.h>
#include <corosync/corodefs.h>
#include <corosync/list.h>
#include <corosync/engine/logsys.h>
#include "util.h"
LOGSYS_DECLARE_SUBSYS ("MAIN");
struct service_names {
const char *c_name;
int32_t c_val;
};
static struct service_names servicenames[] =
{
{ "EVS", EVS_SERVICE },
{ "CLM", CLM_SERVICE },
{ "AMF", AMF_SERVICE },
{ "CKPT", CKPT_SERVICE },
{ "EVT", EVT_SERVICE },
{ "LCK", LCK_SERVICE },
{ "MSG", MSG_SERVICE },
{ "CFG", CFG_SERVICE },
{ "CPG", CPG_SERVICE },
{ "CMAN", CMAN_SERVICE },
{ "PCMK", PCMK_SERVICE },
{ "CONFDB", CONFDB_SERVICE },
{ "QUORUM", QUORUM_SERVICE },
{ "PLOAD", PLOAD_SERVICE },
{ "TMR", TMR_SERVICE },
{ "VOTEQUORUM", VOTEQUORUM_SERVICE },
{ "NTF", NTF_SERVICE },
{ "AMF", AMF_V2_SERVICE },
{ "TST", TST_SV1_SERVICE },
{ "TST", TST_SV2_SERVICE },
{ "MON", MON_SERVICE },
{ "WD", WD_SERVICE },
{ NULL, -1 }
};
const char * short_service_name_get(uint32_t service_id,
char *buf, size_t buf_size)
{
uint32_t i;
for (i = 0; servicenames[i].c_name != NULL; i++) {
if (service_id == servicenames[i].c_val) {
return (servicenames[i].c_name);
}
}
snprintf(buf, buf_size, "%d", service_id);
return buf;
}
/*
* Compare two names. returns non-zero on match.
*/
int name_match(cs_name_t *name1, cs_name_t *name2)
{
if (name1->length == name2->length) {
return ((strncmp ((char *)name1->value, (char *)name2->value,
name1->length)) == 0);
}
return 0;
}
/*
* Get the time of day and convert to nanoseconds
*/
cs_time_t clust_time_now(void)
{
struct timeval tv;
cs_time_t time_now;
if (gettimeofday(&tv, 0)) {
return 0ULL;
}
time_now = (cs_time_t)(tv.tv_sec) * 1000000000ULL;
time_now += (cs_time_t)(tv.tv_usec) * 1000ULL;
return time_now;
}
void _corosync_out_of_memory_error (void) __attribute__((noreturn));
void _corosync_out_of_memory_error (void)
{
assert (0==1);
exit (EXIT_FAILURE);
}
void _corosync_exit_error (
enum e_ais_done err, const char *file, unsigned int line) __attribute__((noreturn));
void _corosync_exit_error (
enum e_ais_done err, const char *file, unsigned int line)
{
log_printf (LOGSYS_LEVEL_ERROR, "Corosync Cluster Engine exiting "
"with status %d at %s:%u.\n", err, file, line);
- logsys_atexit ();
+ qb_log_fini();
exit (err);
}
#define min(a,b) ((a) < (b) ? (a) : (b))
char *getcs_name_t (cs_name_t *name)
{
static char ret_name[CS_MAX_NAME_LENGTH];
/* if string is corrupt (non-terminated), ensure it's displayed safely */
if (name->length >= CS_MAX_NAME_LENGTH || name->value[name->length] != '\0') {
memset (ret_name, 0, sizeof (ret_name));
memcpy (ret_name, name->value, min(name->length, CS_MAX_NAME_LENGTH -1));
return (ret_name);
}
return ((char *)name->value);
}
void setcs_name_t (cs_name_t *name, char *str) {
strncpy ((char *)name->value, str, sizeof (name->value));
((char *)name->value)[sizeof (name->value) - 1] = '\0';
if (strlen ((char *)name->value) > CS_MAX_NAME_LENGTH) {
name->length = CS_MAX_NAME_LENGTH;
} else {
name->length = strlen (str);
}
}
int cs_name_tisEqual (cs_name_t *str1, char *str2) {
if (str1->length == strlen (str2)) {
return ((strncmp ((char *)str1->value, (char *)str2,
str1->length)) == 0);
} else {
return 0;
}
}
diff --git a/include/corosync/engine/logsys.h b/include/corosync/engine/logsys.h
index df1db1d4..662b2a11 100644
--- a/include/corosync/engine/logsys.h
+++ b/include/corosync/engine/logsys.h
@@ -1,457 +1,222 @@
/*
* Copyright (c) 2002-2004 MontaVista Software, Inc.
* Copyright (c) 2006-2009 Red Hat, Inc.
*
* Author: Steven Dake (sdake@redhat.com)
* Author: Lon Hohberger (lhh@redhat.com)
* Author: Fabio M. Di Nitto (fdinitto@redhat.com)
*
* All rights reserved.
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef LOGSYS_H_DEFINED
#define LOGSYS_H_DEFINED
#include <stdarg.h>
#include <stdlib.h>
#include <syslog.h>
#include <pthread.h>
#include <limits.h>
+#include <qb/qblog.h>
+
#ifdef __cplusplus
extern "C" {
#endif
/*
* All of the LOGSYS_MODE's can be ORed together for combined behavior
*
* FORK and THREADED are ignored for SUBSYSTEMS
*/
#define LOGSYS_MODE_OUTPUT_FILE (1<<0)
#define LOGSYS_MODE_OUTPUT_STDERR (1<<1)
#define LOGSYS_MODE_OUTPUT_SYSLOG (1<<2)
#define LOGSYS_MODE_FORK (1<<3)
#define LOGSYS_MODE_THREADED (1<<4)
/*
* Log priorities, compliant with syslog and SA Forum Log spec.
*/
#define LOGSYS_LEVEL_EMERG LOG_EMERG
#define LOGSYS_LEVEL_ALERT LOG_ALERT
#define LOGSYS_LEVEL_CRIT LOG_CRIT
#define LOGSYS_LEVEL_ERROR LOG_ERR
#define LOGSYS_LEVEL_WARNING LOG_WARNING
#define LOGSYS_LEVEL_NOTICE LOG_NOTICE
#define LOGSYS_LEVEL_INFO LOG_INFO
#define LOGSYS_LEVEL_DEBUG LOG_DEBUG
-/*
- * All of the LOGSYS_RECID's are mutually exclusive. Only one RECID at any time
- * can be specified.
- *
- * RECID_LOG indicates a message that should be sent to log. Anything else
- * is stored only in the flight recorder.
- */
-
-#define LOGSYS_RECID_MAX ((UINT_MAX) >> LOGSYS_SUBSYSID_END)
-
-#define LOGSYS_RECID_LOG (LOGSYS_RECID_MAX - 1)
-#define LOGSYS_RECID_ENTER (LOGSYS_RECID_MAX - 2)
-#define LOGSYS_RECID_LEAVE (LOGSYS_RECID_MAX - 3)
-#define LOGSYS_RECID_TRACE1 (LOGSYS_RECID_MAX - 4)
-#define LOGSYS_RECID_TRACE2 (LOGSYS_RECID_MAX - 5)
-#define LOGSYS_RECID_TRACE3 (LOGSYS_RECID_MAX - 6)
-#define LOGSYS_RECID_TRACE4 (LOGSYS_RECID_MAX - 7)
-#define LOGSYS_RECID_TRACE5 (LOGSYS_RECID_MAX - 8)
-#define LOGSYS_RECID_TRACE6 (LOGSYS_RECID_MAX - 9)
-#define LOGSYS_RECID_TRACE7 (LOGSYS_RECID_MAX - 10)
-#define LOGSYS_RECID_TRACE8 (LOGSYS_RECID_MAX - 11)
-
-
-/*
- * Internal APIs that must be globally exported
- * (External API below)
- */
-
/*
* logsys_logger bits
*
* SUBSYS_COUNT defines the maximum number of subsystems
* SUBSYS_NAMELEN defines the maximum len of a subsystem name
*/
#define LOGSYS_MAX_SUBSYS_COUNT 64
#define LOGSYS_MAX_SUBSYS_NAMELEN 64
-
-/*
- * rec_ident explained:
- *
- * rec_ident is an unsigned int and carries bitfields information
- * on subsys_id, log priority (level) and type of message (RECID).
- *
- * level values are imported from syslog.h.
- * At the time of writing it's a 3 bits value (0 to 7).
- *
- * subsys_id is any value between 0 and 64 (LOGSYS_MAX_SUBSYS_COUNT)
- *
- * RECID identifies the type of message. A set of predefined values
- * are available via logsys, but other custom values can be defined
- * by users.
- *
- * ----
- * bitfields:
- *
- * 0 - 2 level
- * 3 - 9 subsysid
- * 10 - n RECID
- */
-
-#define LOGSYS_LEVEL_END (3)
-#define LOGSYS_SUBSYSID_END (LOGSYS_LEVEL_END + 7)
-
-#define LOGSYS_RECID_LEVEL_MASK (LOG_PRIMASK)
-#define LOGSYS_RECID_SUBSYSID_MASK ((2 << (LOGSYS_SUBSYSID_END - 1)) - \
- (LOG_PRIMASK + 1))
-#define LOGSYS_RECID_RECID_MASK (UINT_MAX - \
- (LOGSYS_RECID_SUBSYSID_MASK + LOG_PRIMASK))
-
-#define LOGSYS_ENCODE_RECID(level,subsysid,recid) \
- (((recid) << LOGSYS_SUBSYSID_END) | \
- ((subsysid) << LOGSYS_LEVEL_END) | \
- (level))
-
-#define LOGSYS_DECODE_LEVEL(rec_ident) \
- ((rec_ident) & LOGSYS_RECID_LEVEL_MASK)
-
-#define LOGSYS_DECODE_SUBSYSID(rec_ident) \
- (((rec_ident) & LOGSYS_RECID_SUBSYSID_MASK) >> LOGSYS_LEVEL_END)
-
-#define LOGSYS_DECODE_RECID(rec_ident) \
- (((rec_ident) & LOGSYS_RECID_RECID_MASK) >> LOGSYS_SUBSYSID_END)
-
#define LOGSYS_MAX_PERROR_MSG_LEN 128
-#ifdef COROSYNC_LINUX
-/* The GNU version of strerror_r returns a (char*) that *must* be used */
-#define LOGSYS_STRERROR_R(out_ptr, err_num, buffer, sizeof_buffer) \
- out_ptr = strerror_r(err_num, buffer, sizeof_buffer);
-#else
-/* The XSI-compliant strerror_r() return 0 or -1 (in case the buffer is full) */
-#define LOGSYS_STRERROR_R(out_ptr, err_num, buffer, sizeof_buffer) do { \
- if ( strerror_r(err_num, buffer, sizeof_buffer) == 0 ) { \
- out_ptr = buffer; \
- } else { \
- out_ptr = ""; \
- } \
- } while(0)
-#endif
-
-#define LOGSYS_PERROR(err_num, level, fmt, args...) do { \
- char _error_str[LOGSYS_MAX_PERROR_MSG_LEN]; \
- const char *_error_ptr; \
- LOGSYS_STRERROR_R(_error_ptr, err_num, _error_str, sizeof(_error_str)); \
- log_printf(level, fmt ": %s (%d)\n", ##args, _error_ptr, err_num); \
- } while(0)
-
-
-
#ifndef LOGSYS_UTILS_ONLY
-extern int _logsys_system_setup(
- const char *mainsystem,
- unsigned int mode,
- unsigned int debug,
- const char *logfile,
- int logfile_priority,
- int syslog_facility,
- int syslog_priority);
-
-extern int _logsys_config_subsys_get (
- const char *subsys);
-
-extern int _logsys_subsys_create (const char *subsys);
-
-extern int _logsys_rec_init (unsigned int size);
-
-extern void _logsys_log_vprintf (
- unsigned int rec_ident,
- const char *function_name,
- const char *file_name,
- int file_line,
- const char *format,
- va_list ap) __attribute__((format(printf, 5, 0)));
-
-extern void _logsys_log_printf (
- unsigned int rec_ident,
- const char *function_name,
- const char *file_name,
- int file_line,
- const char *format,
- ...) __attribute__((format(printf, 5, 6)));
-
-extern void _logsys_log_rec (
- unsigned int rec_ident,
- const char *function_name,
- const char *file_name,
- int file_line,
- ...);
-
-extern int _logsys_wthread_create (void);
-
-static int logsys_subsys_id __attribute__((unused)) = LOGSYS_MAX_SUBSYS_COUNT;
-
-/*
- * External API - init
- * See below:
- *
- * LOGSYS_DECLARE_SYSTEM
- * LOGSYS_DECLARE_SUBSYS
- *
- */
-extern void logsys_fork_completed (void);
-
-extern void logsys_atexit (void);
-
-/*
- * External API - misc
- */
-extern void logsys_flush (void);
-
-extern int logsys_log_rec_store (const char *filename);
-
-/*
- * External API - configuration
- */
-
/*
* configuration bits that can only be done for the whole system
*/
extern int logsys_format_set (
const char *format);
extern char *logsys_format_get (void);
/*
* per system/subsystem settings.
*
* NOTE: once a subsystem is created and configured, changing
* the default does NOT affect the subsystems.
*
* Pass a NULL subsystem to change them all
*/
extern int logsys_config_syslog_facility_set (
const char *subsys,
unsigned int facility);
extern int logsys_config_syslog_priority_set (
const char *subsys,
unsigned int priority);
extern int logsys_config_mode_set (
const char *subsys,
unsigned int mode);
extern unsigned int logsys_config_mode_get (
const char *subsys);
+void logsys_config_apply(void);
+
/*
* to close a logfile, just invoke this function with a NULL
* file or if you want to change logfile, the old one will
* be closed for you.
*/
extern int logsys_config_file_set (
const char *subsys,
const char **error_string,
const char *file);
extern int logsys_config_logfile_priority_set (
const char *subsys,
unsigned int priority);
/*
* enabling debug, disable message priority filtering.
* everything is sent everywhere. priority values
* for file and syslog are not overwritten.
*/
extern int logsys_config_debug_set (
const char *subsys,
unsigned int value);
/*
* External API - helpers
*
* convert facility/priority to/from name/values
*/
extern int logsys_facility_id_get (
const char *name);
extern const char *logsys_facility_name_get (
unsigned int facility);
extern int logsys_priority_id_get (
const char *name);
extern const char *logsys_priority_name_get (
unsigned int priority);
-extern int logsys_thread_priority_set (
- int policy,
- const struct sched_param *param,
- unsigned int after_log_ops_yield);
+extern int _logsys_system_setup(
+ const char *mainsystem,
+ unsigned int mode,
+ int syslog_facility,
+ int syslog_priority);
-/*
- * External definitions
- */
-extern void *logsys_rec_end;
+extern int _logsys_config_subsys_get (
+ const char *subsys);
-#define LOGSYS_REC_END (&logsys_rec_end)
+extern int _logsys_subsys_create (const char *subsys, const char *filename);
-#define LOGSYS_DECLARE_SYSTEM(name,mode,debug,file,file_priority, \
- syslog_facility,syslog_priority,format,fltsize) \
+static int logsys_subsys_id __attribute__((unused)) = LOGSYS_MAX_SUBSYS_COUNT;
+
+#define LOGSYS_DECLARE_SYSTEM(name,mode,syslog_facility,syslog_priority)\
__attribute__ ((constructor)) \
static void logsys_system_init (void) \
{ \
- if (_logsys_system_setup (name,mode,debug,file,file_priority, \
- syslog_facility,syslog_priority) < 0) { \
+ if (_logsys_system_setup (name,mode,syslog_facility,syslog_priority) < 0) { \
fprintf (stderr, \
"Unable to setup logging system: %s.\n", name); \
exit (-1); \
- } \
- \
- if (logsys_format_set (format) == -1) { \
- fprintf (stderr, \
- "Unable to setup logging format.\n"); \
- exit (-1); \
- } \
- \
- if (_logsys_rec_init (fltsize) < 0) { \
- fprintf (stderr, \
- "Unable to initialize log flight recorder.\n"); \
- exit (-1); \
- } \
- \
- if (_logsys_wthread_create() < 0) { \
- fprintf (stderr, \
- "Unable to initialize logging thread.\n"); \
- exit (-1); \
} \
}
#define LOGSYS_DECLARE_SUBSYS(subsys) \
__attribute__ ((constructor)) \
static void logsys_subsys_init (void) \
{ \
+ assert(__start___verbose != __stop___verbose); \
logsys_subsys_id = \
- _logsys_subsys_create ((subsys)); \
+ _logsys_subsys_create ((subsys), __FILE__); \
if (logsys_subsys_id == -1) { \
fprintf (stderr, \
"Unable to create logging subsystem: %s.\n", subsys); \
exit (-1); \
} \
}
-#define log_rec(rec_ident, args...) \
-do { \
- _logsys_log_rec (rec_ident, __FUNCTION__, \
- __FILE__, __LINE__, ##args, \
- LOGSYS_REC_END); \
-} while(0)
-
-#define log_printf(level, format, args...) \
- do { \
- _logsys_log_printf ( \
- LOGSYS_ENCODE_RECID(level, \
- logsys_subsys_id, \
- LOGSYS_RECID_LOG), \
- __FUNCTION__, __FILE__, __LINE__, \
- format, ##args); \
-} while(0)
-
-#define ENTER() do { \
- _logsys_log_rec ( \
- LOGSYS_ENCODE_RECID(LOGSYS_LEVEL_DEBUG, \
- logsys_subsys_id, \
- LOGSYS_RECID_ENTER), \
- __FUNCTION__, __FILE__, __LINE__, LOGSYS_REC_END); \
-} while(0)
-
-#define LEAVE() do { \
- _logsys_log_rec ( \
- LOGSYS_ENCODE_RECID(LOGSYS_LEVEL_DEBUG, \
- logsys_subsys_id, \
- LOGSYS_RECID_LEAVE), \
- __FUNCTION__, __FILE__, __LINE__, LOGSYS_REC_END); \
-} while(0)
-
-#define TRACE(recid, format, args...) do { \
- _logsys_log_printf ( \
- LOGSYS_ENCODE_RECID(LOGSYS_LEVEL_DEBUG, \
- logsys_subsys_id, \
- recid), \
- __FUNCTION__, __FILE__, __LINE__, \
- format, ##args); \
-} while(0)
-
-#define TRACE1(format, args...) do { \
- TRACE(LOGSYS_RECID_TRACE1, format, ##args); \
-} while(0)
-
-#define TRACE2(format, args...) do { \
- TRACE(LOGSYS_RECID_TRACE2, format, ##args); \
-} while(0)
-
-#define TRACE3(format, args...) do { \
- TRACE(LOGSYS_RECID_TRACE3, format, ##args); \
-} while(0)
-
-#define TRACE4(format, args...) do { \
- TRACE(LOGSYS_RECID_TRACE4, format, ##args); \
-} while(0)
-
-#define TRACE5(format, args...) do { \
- TRACE(LOGSYS_RECID_TRACE5, format, ##args); \
-} while(0)
-
-#define TRACE6(format, args...) do { \
- TRACE(LOGSYS_RECID_TRACE6, format, ##args); \
-} while(0)
-
-#define TRACE7(format, args...) do { \
- TRACE(LOGSYS_RECID_TRACE7, format, ##args); \
-} while(0)
-
-#define TRACE8(format, args...) do { \
- TRACE(LOGSYS_RECID_TRACE8, format, ##args); \
-} while(0)
+#define LOGSYS_PERROR(err_num, level, fmt, args...) do { \
+ char _error_str[LOGSYS_MAX_PERROR_MSG_LEN]; \
+ const char *_error_ptr = qb_strerror_r(err_num, _error_str, sizeof(_error_str)); \
+ qb_log(level, fmt ": %s (%d)\n", ##args, _error_ptr, err_num); \
+ } while(0)
+
+#define log_printf(level, format, args...) qb_log(level, format, ##args)
+#define ENTER() qb_log(LOG_DEBUG, "ENTER")
+#define LEAVE() qb_log(LOG_DEBUG, "LEAVE")
+#define TRACE1(format, args...) qb_log(LOG_DEBUG, "TRACE1:" #format, ##args)
+#define TRACE2
+#define TRACE3
+#define TRACE4
+#define TRACE5
+#define TRACE6
+#define TRACE7
+#define TRACE8
#endif /* LOGSYS_UTILS_ONLY */
#ifdef __cplusplus
}
#endif
#endif /* LOGSYS_H_DEFINED */
diff --git a/include/corosync/lcr/lcr_ifact.h b/include/corosync/lcr/lcr_ifact.h
index 0be3e370..446e1af6 100644
--- a/include/corosync/lcr/lcr_ifact.h
+++ b/include/corosync/lcr/lcr_ifact.h
@@ -1,55 +1,58 @@
/*
* Copyright (C) 2006 Steven Dake (sdake@redhat.com)
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef LCR_IFACT_H_DEFINED
#define LCR_IFACT_H_DEFINED
#include <corosync/hdb.h>
#ifdef __cplusplus
extern "C" {
#endif
int lcr_ifact_reference (
hdb_handle_t *handle,
const char *iface_name,
int version,
void **interface,
void *context);
+void *lcr_ifact_addr_get(hdb_handle_t iface_handle,
+ const char* symbol_name);
+
int lcr_ifact_release (
hdb_handle_t handle);
#ifdef __cplusplus
}
#endif
#endif /* LCR_IFACT_H_DEFINED */
diff --git a/include/corosync/totem/totem.h b/include/corosync/totem/totem.h
index 239b0356..a025eab1 100644
--- a/include/corosync/totem/totem.h
+++ b/include/corosync/totem/totem.h
@@ -1,285 +1,278 @@
/*
* Copyright (c) 2005 MontaVista Software, Inc.
* Copyright (c) 2006-2009 Red Hat, Inc.
*
* Author: Steven Dake (sdake@redhat.com)
*
* All rights reserved.
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef TOTEM_H_DEFINED
#define TOTEM_H_DEFINED
#include "totemip.h"
#include <corosync/hdb.h>
#ifdef HAVE_SMALL_MEMORY_FOOTPRINT
#define PROCESSOR_COUNT_MAX 16
#define MESSAGE_SIZE_MAX 1024*64
#define MESSAGE_QUEUE_MAX 512
#else
#define PROCESSOR_COUNT_MAX 384
#define MESSAGE_SIZE_MAX 1024*1024 /* (1MB) */
#define MESSAGE_QUEUE_MAX MESSAGE_SIZE_MAX / totem_config->net_mtu
#endif /* HAVE_SMALL_MEMORY_FOOTPRINT */
#define FRAME_SIZE_MAX 10000
#define TRANSMITS_ALLOWED 16
#define SEND_THREADS_MAX 16
#define INTERFACE_MAX 2
/**
* Maximum number of continuous gather states
*/
#define MAX_NO_CONT_GATHER 3
struct totem_interface {
struct totem_ip_address bindnet;
struct totem_ip_address boundto;
struct totem_ip_address mcast_addr;
uint16_t ip_port;
uint16_t ttl;
int member_count;
struct totem_ip_address member_list[PROCESSOR_COUNT_MAX];
};
struct totem_logging_configuration {
void (*log_printf) (
- unsigned int rec_ident,
+ int level,
+ int subsys,
const char *function_name,
const char *file_name,
int file_line,
const char *format,
- ...) __attribute__((format(printf, 5, 6)));
+ ...) __attribute__((format(printf, 6, 7)));
int log_level_security;
int log_level_error;
int log_level_warning;
int log_level_notice;
int log_level_debug;
int log_subsys_id;
};
enum { TOTEM_PRIVATE_KEY_LEN = 128 };
enum { TOTEM_RRP_MODE_BYTES = 64 };
typedef enum {
TOTEM_TRANSPORT_UDP = 0,
TOTEM_TRANSPORT_UDPU = 1,
TOTEM_TRANSPORT_RDMA = 2
} totem_transport_t;
struct totem_config {
int version;
/*
* network
*/
struct totem_interface *interfaces;
unsigned int interface_count;
unsigned int node_id;
unsigned int clear_node_high_bit;
/*
* key information
*/
unsigned char private_key[TOTEM_PRIVATE_KEY_LEN];
unsigned int private_key_len;
/*
* Totem configuration parameters
*/
unsigned int token_timeout;
unsigned int token_retransmit_timeout;
unsigned int token_hold_timeout;
unsigned int token_retransmits_before_loss_const;
unsigned int join_timeout;
unsigned int send_join_timeout;
unsigned int consensus_timeout;
unsigned int merge_timeout;
unsigned int downcheck_timeout;
unsigned int fail_to_recv_const;
unsigned int seqno_unchanged_const;
unsigned int rrp_token_expired_timeout;
unsigned int rrp_problem_count_timeout;
unsigned int rrp_problem_count_threshold;
unsigned int rrp_autorecovery_check_timeout;
char rrp_mode[TOTEM_RRP_MODE_BYTES];
struct totem_logging_configuration totem_logging_configuration;
- void (*log_rec) (
- int subsysid,
- const char *function_name,
- const char *file_name,
- int file_line,
- unsigned int rec_ident,
- ...);
-
unsigned int secauth;
unsigned int net_mtu;
unsigned int threads;
unsigned int heartbeat_failures_allowed;
unsigned int max_network_delay;
unsigned int window_size;
unsigned int max_messages;
const char *vsf_type;
unsigned int broadcast_use;
enum { TOTEM_CRYPTO_SOBER=0, TOTEM_CRYPTO_NSS } crypto_type;
enum { TOTEM_CRYPTO_ACCEPT_OLD=0, TOTEM_CRYPTO_ACCEPT_NEW } crypto_accept;
int crypto_crypt_type;
int crypto_sign_type;
totem_transport_t transport_number;
unsigned int miss_count_const;
};
#define TOTEM_CONFIGURATION_TYPE
enum totem_configuration_type {
TOTEM_CONFIGURATION_REGULAR,
TOTEM_CONFIGURATION_TRANSITIONAL
};
#define TOTEM_CALLBACK_TOKEN_TYPE
enum totem_callback_token_type {
TOTEM_CALLBACK_TOKEN_RECEIVED = 1,
TOTEM_CALLBACK_TOKEN_SENT = 2
};
enum totem_event_type {
TOTEM_EVENT_DELIVERY_CONGESTED,
TOTEM_EVENT_NEW_MSG,
};
#define MEMB_RING_ID
struct memb_ring_id {
struct totem_ip_address rep;
unsigned long long seq;
} __attribute__((packed));
typedef struct {
hdb_handle_t handle;
int is_dirty;
time_t last_updated;
} totem_stats_header_t;
typedef struct {
totem_stats_header_t hdr;
uint32_t iface_changes;
} totemnet_stats_t;
typedef struct {
totem_stats_header_t hdr;
totemnet_stats_t *net;
char *algo_name;
} totemrrp_stats_t;
typedef struct {
uint32_t rx;
uint32_t tx;
int backlog_calc;
} totemsrp_token_stats_t;
typedef struct {
totem_stats_header_t hdr;
totemrrp_stats_t *rrp;
uint64_t orf_token_tx;
uint64_t orf_token_rx;
uint64_t memb_merge_detect_tx;
uint64_t memb_merge_detect_rx;
uint64_t memb_join_tx;
uint64_t memb_join_rx;
uint64_t mcast_tx;
uint64_t mcast_retx;
uint64_t mcast_rx;
uint64_t memb_commit_token_tx;
uint64_t memb_commit_token_rx;
uint64_t token_hold_cancel_tx;
uint64_t token_hold_cancel_rx;
uint64_t operational_entered;
uint64_t operational_token_lost;
uint64_t gather_entered;
uint64_t gather_token_lost;
uint64_t commit_entered;
uint64_t commit_token_lost;
uint64_t recovery_entered;
uint64_t recovery_token_lost;
uint64_t consensus_timeouts;
uint64_t rx_msg_dropped;
uint32_t continuous_gather;
int earliest_token;
int latest_token;
#define TOTEM_TOKEN_STATS_MAX 100
totemsrp_token_stats_t token[TOTEM_TOKEN_STATS_MAX];
} totemsrp_stats_t;
#define TOTEM_CONFIGURATION_TYPE
typedef struct {
totem_stats_header_t hdr;
totemsrp_stats_t *srp;
} totemmrp_stats_t;
typedef struct {
totem_stats_header_t hdr;
totemmrp_stats_t *mrp;
uint32_t msg_reserved;
uint32_t msg_queue_avail;
} totempg_stats_t;
#endif /* TOTEM_H_DEFINED */
diff --git a/lcr/lcr_ifact.c b/lcr/lcr_ifact.c
index bf2b7214..fc14f8c3 100644
--- a/lcr/lcr_ifact.c
+++ b/lcr/lcr_ifact.c
@@ -1,566 +1,590 @@
/*
* Copyright (C) 2006 Steven Dake (sdake@redhat.com)
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <config.h>
#include <stdio.h>
#include <stdlib.h>
#include <dlfcn.h>
#include <dirent.h>
#include <errno.h>
#include <string.h>
#include <unistd.h>
#include <fnmatch.h>
#ifdef COROSYNC_SOLARIS
#include <iso/limits_iso.h>
#endif
#include <corosync/hdb.h>
#include <corosync/lcr/lcr_comp.h>
#include <corosync/lcr/lcr_ifact.h>
struct lcr_component_instance {
struct lcr_iface *ifaces;
int iface_count;
hdb_handle_t comp_handle;
void *dl_handle;
int refcount;
char library_name[256];
};
struct lcr_iface_instance {
hdb_handle_t component_handle;
void *context;
void (*destructor) (void *context);
};
DECLARE_HDB_DATABASE (lcr_component_instance_database, NULL);
DECLARE_HDB_DATABASE (lcr_iface_instance_database, NULL);
/*
static struct hdb_handle_database lcr_component_instance_database = {
.handle_count = 0,
.handles = 0,
.iterator = 0
};
static struct hdb_handle_database lcr_iface_instance_database = {
.handle_count = 0,
.handles = 0,
.iterator = 0
};
*/
static hdb_handle_t g_component_handle = 0xFFFFFFFF;
#if defined(COROSYNC_LINUX) || defined(COROSYNC_SOLARIS)
static int lcr_select_so (const struct dirent *dirent)
#else
static int lcr_select_so (struct dirent *dirent)
#endif
{
unsigned int len;
len = strlen (dirent->d_name);
if (len > 6) {
if (strcmp (".lcrso", dirent->d_name + len - 6) == 0) {
return (1);
}
}
return (0);
}
#if defined(COROSYNC_LINUX) || defined(COROSYNC_SOLARIS)
static int pathlist_select (const struct dirent *dirent)
#else
static int pathlist_select (struct dirent *dirent)
#endif
{
if (fnmatch ("*.conf", dirent->d_name, 0) == 0) {
return (1);
}
return (0);
}
static inline struct lcr_component_instance *lcr_comp_find (
const char *iface_name,
unsigned int version,
unsigned int *iface_number)
{
struct lcr_component_instance *instance;
void *instance_p = NULL;
hdb_handle_t component_handle = 0;
int i;
/*
* Try to find interface in already loaded component
*/
hdb_iterator_reset (&lcr_component_instance_database);
while (hdb_iterator_next (&lcr_component_instance_database,
&instance_p, &component_handle) == 0) {
instance = (struct lcr_component_instance *)instance_p;
for (i = 0; i < instance->iface_count; i++) {
if ((strcmp (instance->ifaces[i].name, iface_name) == 0) &&
instance->ifaces[i].version == version) {
*iface_number = i;
return (instance);
}
}
hdb_handle_put (&lcr_component_instance_database, component_handle);
}
return (NULL);
}
static inline int lcr_lib_loaded (
char *library_name)
{
struct lcr_component_instance *instance;
void *instance_p = NULL;
hdb_handle_t component_handle = 0;
/*
* Try to find interface in already loaded component
*/
hdb_iterator_reset (&lcr_component_instance_database);
while (hdb_iterator_next (&lcr_component_instance_database,
(void *)&instance_p, &component_handle) == 0) {
instance = (struct lcr_component_instance *)instance_p;
if (strcmp (instance->library_name, library_name) == 0) {
return (1);
}
hdb_handle_put (&lcr_component_instance_database, component_handle);
}
return (0);
}
enum { PATH_LIST_SIZE = 128 };
const char *path_list[PATH_LIST_SIZE];
unsigned int path_list_entries = 0;
static void defaults_path_build (void)
{
char cwd[1024];
char *res;
res = getcwd (cwd, sizeof (cwd));
if (res != NULL && (path_list[0] = strdup (cwd)) != NULL) {
path_list_entries++;
}
path_list[path_list_entries++] = LCRSODIR;
}
static void ld_library_path_build (void)
{
char *ld_library_path;
char *my_ld_library_path;
char *p_s, *ptrptr;
ld_library_path = getenv ("LD_LIBRARY_PATH");
if (ld_library_path == NULL) {
return;
}
my_ld_library_path = strdup (ld_library_path);
if (my_ld_library_path == NULL) {
return;
}
p_s = strtok_r (my_ld_library_path, ":", &ptrptr);
while (p_s != NULL) {
char *p = strdup (p_s);
if (p && path_list_entries < PATH_LIST_SIZE) {
path_list[path_list_entries++] = p;
}
p_s = strtok_r (NULL, ":", &ptrptr);
}
free (my_ld_library_path);
}
static int ldso_path_build (const char *path, const char *filename)
{
FILE *fp;
char string[1024];
char filename_cat[1024];
char newpath[1024];
char *newpath_tmp;
char *new_filename;
int j;
struct dirent **scandir_list;
unsigned int scandir_entries;
snprintf (filename_cat, sizeof(filename_cat), "%s/%s", path, filename);
if (filename[0] == '*') {
scandir_entries = scandir (
path,
&scandir_list,
pathlist_select, alphasort);
if (scandir_entries == 0) {
return 0;
} else if (scandir_entries == -1) {
return -1;
} else {
for (j = 0; j < scandir_entries; j++) {
ldso_path_build (path, scandir_list[j]->d_name);
}
}
}
fp = fopen (filename_cat, "r");
if (fp == NULL) {
return (-1);
}
while (fgets (string, sizeof (string), fp)) {
char *p;
if (strlen(string) > 0)
string[strlen(string) - 1] = '\0';
if (strncmp (string, "include", strlen ("include")) == 0) {
newpath_tmp = string + strlen ("include") + 1;
for (j = strlen (string);
string[j] != ' ' &&
string[j] != '/' &&
j > 0;
j--) {
}
string[j] = '\0';
new_filename = &string[j] + 1;
strcpy (newpath, path);
strcat (newpath, "/");
strcat (newpath, newpath_tmp);
ldso_path_build (newpath, new_filename);
continue;
}
p = strdup (string);
if (p && path_list_entries < PATH_LIST_SIZE) {
path_list[path_list_entries++] = p;
}
}
fclose(fp);
return (0);
}
#if defined (COROSYNC_SOLARIS) && !defined(HAVE_SCANDIR)
static int scandir (
const char *dir, struct dirent ***namelist,
int (*filter)(const struct dirent *),
int (*compar)(const struct dirent **, const struct dirent **))
{
DIR *d;
struct dirent *entry;
struct dirent *result;
struct dirent **names = NULL;
int namelist_items = 0, namelist_size = 0;
size_t len;
int return_code;
d = opendir(dir);
if (d == NULL)
return -1;
names = NULL;
len = offsetof(struct dirent, d_name) +
pathconf(dir, _PC_NAME_MAX) + 1;
entry = malloc(len);
for (return_code = readdir_r (d, entry, &result);
dirent != NULL && return_code == 0;
return_code = readdir_r(d, entry, &result)) {
struct dirent *tmpentry;
if ((filter != NULL) && ((*filter)(result) == 0)) {
continue;
}
if (namelist_items >= namelist_size) {
struct dirent **tmp;
namelist_size += 512;
if ((unsigned long)namelist_size > INT_MAX) {
errno = EOVERFLOW;
goto fail;
}
tmp = realloc (names,
namelist_size * sizeof(struct dirent *));
if (tmp == NULL) {
goto fail;
}
names = tmp;
}
tmpentry = malloc (result->d_reclen);
if (tmpentry == NULL) {
goto fail;
}
(void) memcpy (tmpentry, result, result->d_reclen);
names[namelist_items++] = tmpentry;
}
(void) closedir (d);
if ((namelist_items > 1) && (compar != NULL)) {
qsort (names, namelist_items, sizeof (struct dirent *),
(int (*)(const void *, const void *))compar);
}
*namelist = names;
return namelist_items;
fail:
{
int err = errno;
(void) closedir (d);
while (namelist_items != 0) {
namelist_items--;
free (*namelist[namelist_items]);
}
free (entry);
free (names);
*namelist = NULL;
errno = err;
return -1;
}
}
#endif
#if defined (COROSYNC_SOLARIS) && !defined(HAVE_ALPHASORT)
static int alphasort (const struct dirent **a, const struct dirent **b)
{
return strcmp ((*a)->d_name, (*b)->d_name);
}
#endif
static int interface_find_and_load (
const char *path,
const char *iface_name,
int version,
struct lcr_component_instance **instance_ret,
unsigned int *iface_number)
{
struct lcr_component_instance *instance;
void *dl_handle;
struct dirent **scandir_list;
int scandir_entries;
unsigned int libs_to_scan;
char dl_name[1024];
#ifdef COROSYNC_SOLARIS
void (*comp_reg)(void);
#endif
scandir_entries = scandir (path, &scandir_list, lcr_select_so, alphasort);
if (scandir_entries > 0)
/*
* no error so load the object
*/
for (libs_to_scan = 0; libs_to_scan < scandir_entries; libs_to_scan++) {
/*
* Load objects, scan them, unload them if they are not a match
*/
snprintf (dl_name, sizeof(dl_name), "%s/%s",
path, scandir_list[libs_to_scan]->d_name);
/*
* Don't reload already loaded libraries
*/
if (lcr_lib_loaded (dl_name)) {
continue;
}
dl_handle = dlopen (dl_name, RTLD_NOW);
if (dl_handle == NULL) {
fprintf(stderr, "%s: open failed: %s\n",
dl_name, dlerror());
continue;
}
/*
* constructors don't work in Solaris dlopen, so we have to specifically call
* a function to register the component
*/
#ifdef COROSYNC_SOLARIS
comp_reg = dlsym (dl_handle, "corosync_lcr_component_register");
comp_reg ();
#endif
instance = lcr_comp_find (iface_name, version, iface_number);
if (instance) {
instance->dl_handle = dl_handle;
strcpy (instance->library_name, dl_name);
goto found;
}
/*
* No matching interfaces found, try next shared object
*/
if (g_component_handle != 0xFFFFFFFF) {
hdb_handle_destroy (&lcr_component_instance_database,
g_component_handle);
g_component_handle = 0xFFFFFFFF;
}
dlclose (dl_handle);
} /* scanning for lcrso loop */
if (scandir_entries > 0) {
int i;
for (i = 0; i < scandir_entries; i++) {
free (scandir_list[i]);
}
free (scandir_list);
}
g_component_handle = 0xFFFFFFFF;
return -1;
found:
*instance_ret = instance;
if (scandir_entries > 0) {
int i;
for (i = 0; i < scandir_entries; i++) {
free (scandir_list[i]);
}
free (scandir_list);
}
g_component_handle = 0xFFFFFFFF;
return 0;
}
static unsigned int lcr_initialized = 0;
+void *lcr_ifact_addr_get(hdb_handle_t iface_handle,
+ const char* symbol_name)
+{
+ struct lcr_iface_instance *iface_instance;
+ struct lcr_component_instance *instance;
+ void *ptr;
+
+ hdb_handle_get (&lcr_iface_instance_database,
+ iface_handle, (void *)&iface_instance);
+
+ hdb_handle_get (&lcr_component_instance_database,
+ iface_instance->component_handle, (void *)&instance);
+
+ ptr = dlsym(instance->dl_handle, symbol_name);
+
+ hdb_handle_put(&lcr_component_instance_database,
+ iface_instance->component_handle);
+
+ hdb_handle_put (&lcr_iface_instance_database,
+ iface_handle);
+ return ptr;
+}
+
+
int lcr_ifact_reference (
hdb_handle_t *iface_handle,
const char *iface_name,
int version,
void **iface,
void *context)
{
struct lcr_iface_instance *iface_instance;
struct lcr_component_instance *instance;
unsigned int iface_number;
unsigned int res;
unsigned int i;
/*
* Determine if the component is already loaded
*/
instance = lcr_comp_find (iface_name, version, &iface_number);
if (instance) {
goto found;
}
if (lcr_initialized == 0) {
lcr_initialized = 1;
defaults_path_build ();
ld_library_path_build ();
ldso_path_build ("/etc", "ld.so.conf");
}
// TODO error checking in this code is weak
/*
* Search through all lcrso files for desired interface
*/
for (i = 0; i < path_list_entries; i++) {
res = interface_find_and_load (
path_list[i],
iface_name,
version,
&instance,
&iface_number);
if (res == 0) {
goto found;
}
}
/*
* No matching interfaces found in all shared objects
*/
return (-1);
found:
*iface = instance->ifaces[iface_number].interfaces;
if (instance->ifaces[iface_number].constructor) {
instance->ifaces[iface_number].constructor (context);
}
hdb_handle_create (&lcr_iface_instance_database,
sizeof (struct lcr_iface_instance),
iface_handle);
hdb_handle_get (&lcr_iface_instance_database,
*iface_handle, (void *)&iface_instance);
iface_instance->component_handle = instance->comp_handle;
iface_instance->context = context;
iface_instance->destructor = instance->ifaces[iface_number].destructor;
hdb_handle_put (&lcr_iface_instance_database, *iface_handle);
return (0);
}
int lcr_ifact_release (hdb_handle_t handle)
{
struct lcr_iface_instance *iface_instance;
int res = 0;
res = hdb_handle_get (&lcr_iface_instance_database,
handle, (void *)&iface_instance);
if (iface_instance->destructor) {
iface_instance->destructor (iface_instance->context);
}
hdb_handle_put (&lcr_component_instance_database,
iface_instance->component_handle);
hdb_handle_put (&lcr_iface_instance_database, handle);
hdb_handle_destroy (&lcr_iface_instance_database, handle);
return (res);
}
void lcr_component_register (struct lcr_comp *comp)
{
struct lcr_component_instance *instance;
static hdb_handle_t comp_handle;
hdb_handle_create (&lcr_component_instance_database,
sizeof (struct lcr_component_instance),
&comp_handle);
hdb_handle_get (&lcr_component_instance_database,
comp_handle, (void *)&instance);
instance->ifaces = comp->ifaces;
instance->iface_count = comp->iface_count;
instance->comp_handle = comp_handle;
instance->dl_handle = NULL;
hdb_handle_put (&lcr_component_instance_database,
comp_handle);
g_component_handle = comp_handle;
}
diff --git a/services/cpg.c b/services/cpg.c
index 633d4b37..1b8d5b45 100644
--- a/services/cpg.c
+++ b/services/cpg.c
@@ -1,2052 +1,2052 @@
/*
* Copyright (c) 2006-2009 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Christine Caulfield (ccaulfie@redhat.com)
* Author: Jan Friesse (jfriesse@redhat.com)
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <config.h>
#ifdef HAVE_ALLOCA_H
#include <alloca.h>
#endif
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <sys/ioctl.h>
#include <netinet/in.h>
#include <sys/uio.h>
#include <unistd.h>
#include <fcntl.h>
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
#include <time.h>
#include <assert.h>
#include <unistd.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <sys/mman.h>
#include <corosync/corotypes.h>
#include <qb/qbipc_common.h>
#include <corosync/corodefs.h>
#include <corosync/list.h>
#include <corosync/jhash.h>
#include <corosync/lcr/lcr_comp.h>
#include <corosync/engine/logsys.h>
#include <corosync/engine/coroapi.h>
#include <corosync/cpg.h>
#include <corosync/ipc_cpg.h>
LOGSYS_DECLARE_SUBSYS ("CPG");
#define GROUP_HASH_SIZE 32
enum cpg_message_req_types {
MESSAGE_REQ_EXEC_CPG_PROCJOIN = 0,
MESSAGE_REQ_EXEC_CPG_PROCLEAVE = 1,
MESSAGE_REQ_EXEC_CPG_JOINLIST = 2,
MESSAGE_REQ_EXEC_CPG_MCAST = 3,
MESSAGE_REQ_EXEC_CPG_DOWNLIST_OLD = 4,
MESSAGE_REQ_EXEC_CPG_DOWNLIST = 5
};
struct zcb_mapped {
struct list_head list;
void *addr;
size_t size;
};
/*
* state` exec deliver
* match group name, pid -> if matched deliver for YES:
* XXX indicates impossible state
*
* join leave mcast
* UNJOINED XXX XXX NO
* LEAVE_STARTED XXX YES(unjoined_enter) YES
* JOIN_STARTED YES(join_started_enter) XXX NO
* JOIN_COMPLETED XXX NO YES
*
* join_started_enter
* set JOIN_COMPLETED
* add entry to process_info list
* unjoined_enter
* set UNJOINED
* delete entry from process_info list
*
*
* library accept join error codes
* UNJOINED YES(CS_OK) set JOIN_STARTED
* LEAVE_STARTED NO(CS_ERR_BUSY)
* JOIN_STARTED NO(CS_ERR_EXIST)
* JOIN_COMPlETED NO(CS_ERR_EXIST)
*
* library accept leave error codes
* UNJOINED NO(CS_ERR_NOT_EXIST)
* LEAVE_STARTED NO(CS_ERR_NOT_EXIST)
* JOIN_STARTED NO(CS_ERR_BUSY)
* JOIN_COMPLETED YES(CS_OK) set LEAVE_STARTED
*
* library accept mcast
* UNJOINED NO(CS_ERR_NOT_EXIST)
* LEAVE_STARTED NO(CS_ERR_NOT_EXIST)
* JOIN_STARTED YES(CS_OK)
* JOIN_COMPLETED YES(CS_OK)
*/
enum cpd_state {
CPD_STATE_UNJOINED,
CPD_STATE_LEAVE_STARTED,
CPD_STATE_JOIN_STARTED,
CPD_STATE_JOIN_COMPLETED
};
enum cpg_sync_state {
CPGSYNC_DOWNLIST,
CPGSYNC_JOINLIST
};
enum cpg_downlist_state_e {
CPG_DOWNLIST_NONE,
CPG_DOWNLIST_WAITING_FOR_MESSAGES,
CPG_DOWNLIST_APPLYING,
};
static enum cpg_downlist_state_e downlist_state;
static struct list_head downlist_messages_head;
struct cpg_pd {
void *conn;
mar_cpg_name_t group_name;
uint32_t pid;
enum cpd_state cpd_state;
unsigned int flags;
int initial_totem_conf_sent;
struct list_head list;
struct list_head iteration_instance_list_head;
struct list_head zcb_mapped_list_head;
};
struct cpg_iteration_instance {
hdb_handle_t handle;
struct list_head list;
struct list_head items_list_head; /* List of process_info */
struct list_head *current_pointer;
};
DECLARE_HDB_DATABASE(cpg_iteration_handle_t_db,NULL);
DECLARE_LIST_INIT(cpg_pd_list_head);
static unsigned int my_member_list[PROCESSOR_COUNT_MAX];
static unsigned int my_member_list_entries;
static unsigned int my_old_member_list[PROCESSOR_COUNT_MAX];
static unsigned int my_old_member_list_entries = 0;
static struct corosync_api_v1 *api = NULL;
static enum cpg_sync_state my_sync_state = CPGSYNC_DOWNLIST;
static mar_cpg_ring_id_t last_sync_ring_id;
struct process_info {
unsigned int nodeid;
uint32_t pid;
mar_cpg_name_t group;
struct list_head list; /* on the group_info members list */
};
DECLARE_LIST_INIT(process_info_list_head);
struct join_list_entry {
uint32_t pid;
mar_cpg_name_t group_name;
};
/*
* Service Interfaces required by service_message_handler struct
*/
static int cpg_exec_init_fn (struct corosync_api_v1 *);
static int cpg_lib_init_fn (void *conn);
static int cpg_lib_exit_fn (void *conn);
static void message_handler_req_exec_cpg_procjoin (
const void *message,
unsigned int nodeid);
static void message_handler_req_exec_cpg_procleave (
const void *message,
unsigned int nodeid);
static void message_handler_req_exec_cpg_joinlist (
const void *message,
unsigned int nodeid);
static void message_handler_req_exec_cpg_mcast (
const void *message,
unsigned int nodeid);
static void message_handler_req_exec_cpg_downlist_old (
const void *message,
unsigned int nodeid);
static void message_handler_req_exec_cpg_downlist (
const void *message,
unsigned int nodeid);
static void exec_cpg_procjoin_endian_convert (void *msg);
static void exec_cpg_joinlist_endian_convert (void *msg);
static void exec_cpg_mcast_endian_convert (void *msg);
static void exec_cpg_downlist_endian_convert_old (void *msg);
static void exec_cpg_downlist_endian_convert (void *msg);
static void message_handler_req_lib_cpg_join (void *conn, const void *message);
static void message_handler_req_lib_cpg_leave (void *conn, const void *message);
static void message_handler_req_lib_cpg_finalize (void *conn, const void *message);
static void message_handler_req_lib_cpg_mcast (void *conn, const void *message);
static void message_handler_req_lib_cpg_membership (void *conn,
const void *message);
static void message_handler_req_lib_cpg_local_get (void *conn,
const void *message);
static void message_handler_req_lib_cpg_iteration_initialize (
void *conn,
const void *message);
static void message_handler_req_lib_cpg_iteration_next (
void *conn,
const void *message);
static void message_handler_req_lib_cpg_iteration_finalize (
void *conn,
const void *message);
static void message_handler_req_lib_cpg_zc_alloc (
void *conn,
const void *message);
static void message_handler_req_lib_cpg_zc_free (
void *conn,
const void *message);
static void message_handler_req_lib_cpg_zc_execute (
void *conn,
const void *message);
static int cpg_node_joinleave_send (unsigned int pid, const mar_cpg_name_t *group_name, int fn, int reason);
static int cpg_exec_send_downlist(void);
static int cpg_exec_send_joinlist(void);
static void downlist_messages_delete (void);
static void downlist_master_choose_and_send (void);
static void cpg_sync_init_v2 (
const unsigned int *trans_list,
size_t trans_list_entries,
const unsigned int *member_list,
size_t member_list_entries,
const struct memb_ring_id *ring_id);
static int cpg_sync_process (void);
static void cpg_sync_activate (void);
static void cpg_sync_abort (void);
static int notify_lib_totem_membership (
void *conn,
int member_list_entries,
const unsigned int *member_list);
static inline int zcb_all_free (
struct cpg_pd *cpd);
/*
* Library Handler Definition
*/
static struct corosync_lib_handler cpg_lib_engine[] =
{
{ /* 0 */
.lib_handler_fn = message_handler_req_lib_cpg_join,
.flow_control = CS_LIB_FLOW_CONTROL_REQUIRED
},
{ /* 1 */
.lib_handler_fn = message_handler_req_lib_cpg_leave,
.flow_control = CS_LIB_FLOW_CONTROL_REQUIRED
},
{ /* 2 */
.lib_handler_fn = message_handler_req_lib_cpg_mcast,
.flow_control = CS_LIB_FLOW_CONTROL_REQUIRED
},
{ /* 3 */
.lib_handler_fn = message_handler_req_lib_cpg_membership,
.flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED
},
{ /* 4 */
.lib_handler_fn = message_handler_req_lib_cpg_local_get,
.flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED
},
{ /* 5 */
.lib_handler_fn = message_handler_req_lib_cpg_iteration_initialize,
.flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED
},
{ /* 6 */
.lib_handler_fn = message_handler_req_lib_cpg_iteration_next,
.flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED
},
{ /* 7 */
.lib_handler_fn = message_handler_req_lib_cpg_iteration_finalize,
.flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED
},
{ /* 8 */
.lib_handler_fn = message_handler_req_lib_cpg_finalize,
.flow_control = CS_LIB_FLOW_CONTROL_REQUIRED
},
{ /* 9 */
.lib_handler_fn = message_handler_req_lib_cpg_zc_alloc,
.flow_control = CS_LIB_FLOW_CONTROL_REQUIRED
},
{ /* 10 */
.lib_handler_fn = message_handler_req_lib_cpg_zc_free,
.flow_control = CS_LIB_FLOW_CONTROL_REQUIRED
},
{ /* 11 */
.lib_handler_fn = message_handler_req_lib_cpg_zc_execute,
.flow_control = CS_LIB_FLOW_CONTROL_REQUIRED
},
};
static struct corosync_exec_handler cpg_exec_engine[] =
{
{ /* 0 */
.exec_handler_fn = message_handler_req_exec_cpg_procjoin,
.exec_endian_convert_fn = exec_cpg_procjoin_endian_convert
},
{ /* 1 */
.exec_handler_fn = message_handler_req_exec_cpg_procleave,
.exec_endian_convert_fn = exec_cpg_procjoin_endian_convert
},
{ /* 2 */
.exec_handler_fn = message_handler_req_exec_cpg_joinlist,
.exec_endian_convert_fn = exec_cpg_joinlist_endian_convert
},
{ /* 3 */
.exec_handler_fn = message_handler_req_exec_cpg_mcast,
.exec_endian_convert_fn = exec_cpg_mcast_endian_convert
},
{ /* 4 */
.exec_handler_fn = message_handler_req_exec_cpg_downlist_old,
.exec_endian_convert_fn = exec_cpg_downlist_endian_convert_old
},
{ /* 5 */
.exec_handler_fn = message_handler_req_exec_cpg_downlist,
.exec_endian_convert_fn = exec_cpg_downlist_endian_convert
},
};
struct corosync_service_engine cpg_service_engine = {
.name = "corosync cluster closed process group service v1.01",
.id = CPG_SERVICE,
.priority = 1,
.private_data_size = sizeof (struct cpg_pd),
.flow_control = CS_LIB_FLOW_CONTROL_REQUIRED,
.allow_inquorate = CS_LIB_ALLOW_INQUORATE,
.lib_init_fn = cpg_lib_init_fn,
.lib_exit_fn = cpg_lib_exit_fn,
.lib_engine = cpg_lib_engine,
.lib_engine_count = sizeof (cpg_lib_engine) / sizeof (struct corosync_lib_handler),
.exec_init_fn = cpg_exec_init_fn,
.exec_dump_fn = NULL,
.exec_engine = cpg_exec_engine,
.exec_engine_count = sizeof (cpg_exec_engine) / sizeof (struct corosync_exec_handler),
.sync_mode = CS_SYNC_V1_APIV2,
.sync_init = (sync_init_v1_fn_t)cpg_sync_init_v2,
.sync_process = cpg_sync_process,
.sync_activate = cpg_sync_activate,
.sync_abort = cpg_sync_abort
};
/*
* Dynamic loader definition
*/
static struct corosync_service_engine *cpg_get_service_engine_ver0 (void);
static struct corosync_service_engine_iface_ver0 cpg_service_engine_iface = {
.corosync_get_service_engine_ver0 = cpg_get_service_engine_ver0
};
static struct lcr_iface corosync_cpg_ver0[1] = {
{
.name = "corosync_cpg",
.version = 0,
.versions_replace = 0,
.versions_replace_count = 0,
.dependencies = 0,
.dependency_count = 0,
.constructor = NULL,
.destructor = NULL,
.interfaces = NULL
}
};
static struct lcr_comp cpg_comp_ver0 = {
.iface_count = 1,
.ifaces = corosync_cpg_ver0
};
static struct corosync_service_engine *cpg_get_service_engine_ver0 (void)
{
return (&cpg_service_engine);
}
#ifdef COROSYNC_SOLARIS
void corosync_lcr_component_register (void);
void corosync_lcr_component_register (void) {
#else
__attribute__ ((constructor)) static void corosync_lcr_component_register (void) {
#endif
lcr_interfaces_set (&corosync_cpg_ver0[0], &cpg_service_engine_iface);
lcr_component_register (&cpg_comp_ver0);
}
struct req_exec_cpg_procjoin {
struct qb_ipc_request_header header __attribute__((aligned(8)));
mar_cpg_name_t group_name __attribute__((aligned(8)));
mar_uint32_t pid __attribute__((aligned(8)));
mar_uint32_t reason __attribute__((aligned(8)));
};
struct req_exec_cpg_mcast {
struct qb_ipc_request_header header __attribute__((aligned(8)));
mar_cpg_name_t group_name __attribute__((aligned(8)));
mar_uint32_t msglen __attribute__((aligned(8)));
mar_uint32_t pid __attribute__((aligned(8)));
mar_message_source_t source __attribute__((aligned(8)));
mar_uint8_t message[] __attribute__((aligned(8)));
};
struct req_exec_cpg_downlist_old {
struct qb_ipc_request_header header __attribute__((aligned(8)));
mar_uint32_t left_nodes __attribute__((aligned(8)));
mar_uint32_t nodeids[PROCESSOR_COUNT_MAX] __attribute__((aligned(8)));
};
struct req_exec_cpg_downlist {
struct qb_ipc_request_header header __attribute__((aligned(8)));
/* merge decisions */
mar_uint32_t old_members __attribute__((aligned(8)));
/* downlist below */
mar_uint32_t left_nodes __attribute__((aligned(8)));
mar_uint32_t nodeids[PROCESSOR_COUNT_MAX] __attribute__((aligned(8)));
};
struct downlist_msg {
mar_uint32_t sender_nodeid;
mar_uint32_t old_members __attribute__((aligned(8)));
mar_uint32_t left_nodes __attribute__((aligned(8)));
mar_uint32_t nodeids[PROCESSOR_COUNT_MAX] __attribute__((aligned(8)));
struct list_head list;
};
static struct req_exec_cpg_downlist g_req_exec_cpg_downlist;
static void cpg_sync_init_v2 (
const unsigned int *trans_list,
size_t trans_list_entries,
const unsigned int *member_list,
size_t member_list_entries,
const struct memb_ring_id *ring_id)
{
int entries;
int i, j;
int found;
my_sync_state = CPGSYNC_DOWNLIST;
memcpy (my_member_list, member_list, member_list_entries *
sizeof (unsigned int));
my_member_list_entries = member_list_entries;
last_sync_ring_id.nodeid = ring_id->rep.nodeid;
last_sync_ring_id.seq = ring_id->seq;
downlist_state = CPG_DOWNLIST_WAITING_FOR_MESSAGES;
entries = 0;
/*
* Determine list of nodeids for downlist message
*/
for (i = 0; i < my_old_member_list_entries; i++) {
found = 0;
for (j = 0; j < trans_list_entries; j++) {
if (my_old_member_list[i] == trans_list[j]) {
found = 1;
break;
}
}
if (found == 0) {
g_req_exec_cpg_downlist.nodeids[entries++] =
my_old_member_list[i];
}
}
g_req_exec_cpg_downlist.left_nodes = entries;
}
static int cpg_sync_process (void)
{
int res = -1;
if (my_sync_state == CPGSYNC_DOWNLIST) {
res = cpg_exec_send_downlist();
if (res == -1) {
return (-1);
}
my_sync_state = CPGSYNC_JOINLIST;
}
if (my_sync_state == CPGSYNC_JOINLIST) {
res = cpg_exec_send_joinlist();
}
return (res);
}
static void cpg_sync_activate (void)
{
memcpy (my_old_member_list, my_member_list,
my_member_list_entries * sizeof (unsigned int));
my_old_member_list_entries = my_member_list_entries;
if (downlist_state == CPG_DOWNLIST_WAITING_FOR_MESSAGES) {
downlist_master_choose_and_send ();
}
downlist_messages_delete ();
downlist_state = CPG_DOWNLIST_NONE;
notify_lib_totem_membership (NULL, my_member_list_entries, my_member_list);
}
static void cpg_sync_abort (void)
{
downlist_state = CPG_DOWNLIST_NONE;
downlist_messages_delete ();
}
static int notify_lib_totem_membership (
void *conn,
int member_list_entries,
const unsigned int *member_list)
{
struct list_head *iter;
char *buf;
int size;
struct res_lib_cpg_totem_confchg_callback *res;
size = sizeof(struct res_lib_cpg_totem_confchg_callback) +
sizeof(mar_uint32_t) * (member_list_entries);
buf = alloca(size);
if (!buf)
return CS_ERR_LIBRARY;
res = (struct res_lib_cpg_totem_confchg_callback *)buf;
res->member_list_entries = member_list_entries;
res->header.size = size;
res->header.id = MESSAGE_RES_CPG_TOTEM_CONFCHG_CALLBACK;
res->header.error = CS_OK;
memcpy (&res->ring_id, &last_sync_ring_id, sizeof (mar_cpg_ring_id_t));
memcpy (res->member_list, member_list, res->member_list_entries * sizeof (mar_uint32_t));
if (conn == NULL) {
for (iter = cpg_pd_list_head.next; iter != &cpg_pd_list_head; iter = iter->next) {
struct cpg_pd *cpg_pd = list_entry (iter, struct cpg_pd, list);
api->ipc_dispatch_send (cpg_pd->conn, buf, size);
}
} else {
api->ipc_dispatch_send (conn, buf, size);
}
return CS_OK;
}
static int notify_lib_joinlist(
const mar_cpg_name_t *group_name,
void *conn,
int joined_list_entries,
mar_cpg_address_t *joined_list,
int left_list_entries,
mar_cpg_address_t *left_list,
int id)
{
int size;
char *buf;
struct list_head *iter;
int count;
struct res_lib_cpg_confchg_callback *res;
mar_cpg_address_t *retgi;
count = 0;
for (iter = process_info_list_head.next; iter != &process_info_list_head; iter = iter->next) {
struct process_info *pi = list_entry (iter, struct process_info, list);
if (mar_name_compare (&pi->group, group_name) == 0) {
int i;
int founded = 0;
for (i = 0; i < left_list_entries; i++) {
if (left_list[i].nodeid == pi->nodeid && left_list[i].pid == pi->pid) {
founded++;
}
}
if (!founded)
count++;
}
}
size = sizeof(struct res_lib_cpg_confchg_callback) +
sizeof(mar_cpg_address_t) * (count + left_list_entries + joined_list_entries);
buf = alloca(size);
if (!buf)
return CS_ERR_LIBRARY;
res = (struct res_lib_cpg_confchg_callback *)buf;
res->joined_list_entries = joined_list_entries;
res->left_list_entries = left_list_entries;
res->member_list_entries = count;
retgi = res->member_list;
res->header.size = size;
res->header.id = id;
res->header.error = CS_OK;
memcpy(&res->group_name, group_name, sizeof(mar_cpg_name_t));
for (iter = process_info_list_head.next; iter != &process_info_list_head; iter = iter->next) {
struct process_info *pi=list_entry (iter, struct process_info, list);
if (mar_name_compare (&pi->group, group_name) == 0) {
int i;
int founded = 0;
for (i = 0;i < left_list_entries; i++) {
if (left_list[i].nodeid == pi->nodeid && left_list[i].pid == pi->pid) {
founded++;
}
}
if (!founded) {
retgi->nodeid = pi->nodeid;
retgi->pid = pi->pid;
retgi++;
}
}
}
if (left_list_entries) {
memcpy (retgi, left_list, left_list_entries * sizeof(mar_cpg_address_t));
retgi += left_list_entries;
}
if (joined_list_entries) {
memcpy (retgi, joined_list, joined_list_entries * sizeof(mar_cpg_address_t));
retgi += joined_list_entries;
}
if (conn) {
api->ipc_dispatch_send (conn, buf, size);
} else {
for (iter = cpg_pd_list_head.next; iter != &cpg_pd_list_head; iter = iter->next) {
struct cpg_pd *cpd = list_entry (iter, struct cpg_pd, list);
if (mar_name_compare (&cpd->group_name, group_name) == 0) {
assert (left_list_entries <= 1);
assert (joined_list_entries <= 1);
if (joined_list_entries) {
if (joined_list[0].pid == cpd->pid &&
joined_list[0].nodeid == api->totem_nodeid_get()) {
cpd->cpd_state = CPD_STATE_JOIN_COMPLETED;
}
}
if (cpd->cpd_state == CPD_STATE_JOIN_COMPLETED ||
cpd->cpd_state == CPD_STATE_LEAVE_STARTED) {
api->ipc_dispatch_send (cpd->conn, buf, size);
}
if (left_list_entries) {
if (left_list[0].pid == cpd->pid &&
left_list[0].nodeid == api->totem_nodeid_get()) {
cpd->pid = 0;
memset (&cpd->group_name, 0, sizeof(cpd->group_name));
cpd->cpd_state = CPD_STATE_UNJOINED;
}
}
}
}
}
/*
* Traverse thru cpds and send totem membership for cpd, where it is not send yet
*/
for (iter = cpg_pd_list_head.next; iter != &cpg_pd_list_head; iter = iter->next) {
struct cpg_pd *cpd = list_entry (iter, struct cpg_pd, list);
if ((cpd->flags & CPG_MODEL_V1_DELIVER_INITIAL_TOTEM_CONF) && (cpd->initial_totem_conf_sent == 0)) {
cpd->initial_totem_conf_sent = 1;
notify_lib_totem_membership (cpd->conn, my_old_member_list_entries, my_old_member_list);
}
}
return CS_OK;
}
-static void downlist_log(int loglevel, const char *msg, struct downlist_msg* dl)
+static void downlist_log(const char *msg, struct downlist_msg* dl)
{
- log_printf (loglevel,
+ log_printf (LOG_DEBUG,
"%s: sender %s; members(old:%d left:%d)",
msg,
api->totem_ifaces_print(dl->sender_nodeid),
dl->old_members,
dl->left_nodes);
}
static struct downlist_msg* downlist_master_choose (void)
{
struct downlist_msg *cmp;
struct downlist_msg *best = NULL;
struct list_head *iter;
uint32_t cmp_members;
uint32_t best_members;
for (iter = downlist_messages_head.next;
iter != &downlist_messages_head;
iter = iter->next) {
cmp = list_entry(iter, struct downlist_msg, list);
- downlist_log(LOGSYS_LEVEL_DEBUG, "comparing", cmp);
+ downlist_log("comparing", cmp);
if (best == NULL) {
best = cmp;
continue;
}
best_members = best->old_members - best->left_nodes;
cmp_members = cmp->old_members - cmp->left_nodes;
if (cmp_members < best_members) {
continue;
}
else if (cmp_members > best_members) {
best = cmp;
}
else if (cmp->sender_nodeid < best->sender_nodeid) {
best = cmp;
}
}
return best;
}
static void downlist_master_choose_and_send (void)
{
struct downlist_msg *stored_msg;
struct list_head *iter;
mar_cpg_address_t left_list;
int i;
downlist_state = CPG_DOWNLIST_APPLYING;
stored_msg = downlist_master_choose ();
if (!stored_msg) {
log_printf (LOGSYS_LEVEL_DEBUG, "NO chosen downlist");
return;
}
- downlist_log(LOGSYS_LEVEL_DEBUG, "chosen downlist", stored_msg);
+ downlist_log("chosen downlist", stored_msg);
/* send events */
for (iter = process_info_list_head.next; iter != &process_info_list_head; ) {
struct process_info *pi = list_entry(iter, struct process_info, list);
iter = iter->next;
for (i = 0; i < stored_msg->left_nodes; i++) {
if (pi->nodeid == stored_msg->nodeids[i]) {
left_list.nodeid = pi->nodeid;
left_list.pid = pi->pid;
left_list.reason = CONFCHG_CPG_REASON_NODEDOWN;
notify_lib_joinlist(&pi->group, NULL,
0, NULL,
1, &left_list,
MESSAGE_RES_CPG_CONFCHG_CALLBACK);
list_del (&pi->list);
free (pi);
break;
}
}
}
}
static void downlist_messages_delete (void)
{
struct downlist_msg *stored_msg;
struct list_head *iter, *iter_next;
for (iter = downlist_messages_head.next;
iter != &downlist_messages_head;
iter = iter_next) {
iter_next = iter->next;
stored_msg = list_entry(iter, struct downlist_msg, list);
list_del (&stored_msg->list);
free (stored_msg);
}
}
static int cpg_exec_init_fn (struct corosync_api_v1 *corosync_api)
{
#ifdef COROSYNC_SOLARIS
logsys_subsys_init();
#endif
list_init (&downlist_messages_head);
api = corosync_api;
return (0);
}
static void cpg_iteration_instance_finalize (struct cpg_iteration_instance *cpg_iteration_instance)
{
struct list_head *iter, *iter_next;
struct process_info *pi;
for (iter = cpg_iteration_instance->items_list_head.next;
iter != &cpg_iteration_instance->items_list_head;
iter = iter_next) {
iter_next = iter->next;
pi = list_entry (iter, struct process_info, list);
list_del (&pi->list);
free (pi);
}
list_del (&cpg_iteration_instance->list);
hdb_handle_destroy (&cpg_iteration_handle_t_db, cpg_iteration_instance->handle);
}
static void cpg_pd_finalize (struct cpg_pd *cpd)
{
struct list_head *iter, *iter_next;
struct cpg_iteration_instance *cpii;
zcb_all_free(cpd);
for (iter = cpd->iteration_instance_list_head.next;
iter != &cpd->iteration_instance_list_head;
iter = iter_next) {
iter_next = iter->next;
cpii = list_entry (iter, struct cpg_iteration_instance, list);
cpg_iteration_instance_finalize (cpii);
}
list_del (&cpd->list);
}
static int cpg_lib_exit_fn (void *conn)
{
struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn);
log_printf(LOGSYS_LEVEL_DEBUG, "exit_fn for conn=%p\n", conn);
if (cpd->group_name.length > 0) {
cpg_node_joinleave_send (cpd->pid, &cpd->group_name,
MESSAGE_REQ_EXEC_CPG_PROCLEAVE, CONFCHG_CPG_REASON_PROCDOWN);
}
cpg_pd_finalize (cpd);
api->ipc_refcnt_dec (conn);
return (0);
}
static int cpg_node_joinleave_send (unsigned int pid, const mar_cpg_name_t *group_name, int fn, int reason)
{
struct req_exec_cpg_procjoin req_exec_cpg_procjoin;
struct iovec req_exec_cpg_iovec;
int result;
memcpy(&req_exec_cpg_procjoin.group_name, group_name, sizeof(mar_cpg_name_t));
req_exec_cpg_procjoin.pid = pid;
req_exec_cpg_procjoin.reason = reason;
req_exec_cpg_procjoin.header.size = sizeof(req_exec_cpg_procjoin);
req_exec_cpg_procjoin.header.id = SERVICE_ID_MAKE(CPG_SERVICE, fn);
req_exec_cpg_iovec.iov_base = (char *)&req_exec_cpg_procjoin;
req_exec_cpg_iovec.iov_len = sizeof(req_exec_cpg_procjoin);
result = api->totem_mcast (&req_exec_cpg_iovec, 1, TOTEM_AGREED);
return (result);
}
/* Can byteswap join & leave messages */
static void exec_cpg_procjoin_endian_convert (void *msg)
{
struct req_exec_cpg_procjoin *req_exec_cpg_procjoin = msg;
req_exec_cpg_procjoin->pid = swab32(req_exec_cpg_procjoin->pid);
swab_mar_cpg_name_t (&req_exec_cpg_procjoin->group_name);
req_exec_cpg_procjoin->reason = swab32(req_exec_cpg_procjoin->reason);
}
static void exec_cpg_joinlist_endian_convert (void *msg_v)
{
char *msg = msg_v;
struct qb_ipc_response_header *res = (struct qb_ipc_response_header *)msg;
struct join_list_entry *jle = (struct join_list_entry *)(msg + sizeof(struct qb_ipc_response_header));
swab_mar_int32_t (&res->size);
while ((const char*)jle < msg + res->size) {
jle->pid = swab32(jle->pid);
swab_mar_cpg_name_t (&jle->group_name);
jle++;
}
}
static void exec_cpg_downlist_endian_convert_old (void *msg)
{
}
static void exec_cpg_downlist_endian_convert (void *msg)
{
struct req_exec_cpg_downlist *req_exec_cpg_downlist = msg;
unsigned int i;
req_exec_cpg_downlist->left_nodes = swab32(req_exec_cpg_downlist->left_nodes);
req_exec_cpg_downlist->old_members = swab32(req_exec_cpg_downlist->old_members);
for (i = 0; i < req_exec_cpg_downlist->left_nodes; i++) {
req_exec_cpg_downlist->nodeids[i] = swab32(req_exec_cpg_downlist->nodeids[i]);
}
}
static void exec_cpg_mcast_endian_convert (void *msg)
{
struct req_exec_cpg_mcast *req_exec_cpg_mcast = msg;
swab_coroipc_request_header_t (&req_exec_cpg_mcast->header);
swab_mar_cpg_name_t (&req_exec_cpg_mcast->group_name);
req_exec_cpg_mcast->pid = swab32(req_exec_cpg_mcast->pid);
req_exec_cpg_mcast->msglen = swab32(req_exec_cpg_mcast->msglen);
swab_mar_message_source_t (&req_exec_cpg_mcast->source);
}
static struct process_info *process_info_find(const mar_cpg_name_t *group_name, uint32_t pid, unsigned int nodeid) {
struct list_head *iter;
for (iter = process_info_list_head.next; iter != &process_info_list_head; ) {
struct process_info *pi = list_entry (iter, struct process_info, list);
iter = iter->next;
if (pi->pid == pid && pi->nodeid == nodeid &&
mar_name_compare (&pi->group, group_name) == 0) {
return pi;
}
}
return NULL;
}
static void do_proc_join(
const mar_cpg_name_t *name,
uint32_t pid,
unsigned int nodeid,
int reason)
{
struct process_info *pi;
struct process_info *pi_entry;
mar_cpg_address_t notify_info;
struct list_head *list;
struct list_head *list_to_add = NULL;
if (process_info_find (name, pid, nodeid) != NULL) {
return ;
}
pi = malloc (sizeof (struct process_info));
if (!pi) {
log_printf(LOGSYS_LEVEL_WARNING, "Unable to allocate process_info struct");
return;
}
pi->nodeid = nodeid;
pi->pid = pid;
memcpy(&pi->group, name, sizeof(*name));
list_init(&pi->list);
/*
* Insert new process in sorted order so synchronization works properly
*/
list_to_add = &process_info_list_head;
for (list = process_info_list_head.next; list != &process_info_list_head; list = list->next) {
pi_entry = list_entry(list, struct process_info, list);
if (pi_entry->nodeid > pi->nodeid ||
(pi_entry->nodeid == pi->nodeid && pi_entry->pid > pi->pid)) {
break;
}
list_to_add = list;
}
list_add (&pi->list, list_to_add);
notify_info.pid = pi->pid;
notify_info.nodeid = nodeid;
notify_info.reason = reason;
notify_lib_joinlist(&pi->group, NULL,
1, &notify_info,
0, NULL,
MESSAGE_RES_CPG_CONFCHG_CALLBACK);
}
static void message_handler_req_exec_cpg_downlist_old (
const void *message,
unsigned int nodeid)
{
log_printf (LOGSYS_LEVEL_WARNING, "downlist OLD from node %d",
nodeid);
}
static void message_handler_req_exec_cpg_downlist(
const void *message,
unsigned int nodeid)
{
const struct req_exec_cpg_downlist *req_exec_cpg_downlist = message;
int i;
struct list_head *iter;
struct downlist_msg *stored_msg;
int found;
if (downlist_state != CPG_DOWNLIST_WAITING_FOR_MESSAGES) {
log_printf (LOGSYS_LEVEL_WARNING, "downlist left_list: %d received in state %d",
req_exec_cpg_downlist->left_nodes, downlist_state);
return;
}
stored_msg = malloc (sizeof (struct downlist_msg));
stored_msg->sender_nodeid = nodeid;
stored_msg->old_members = req_exec_cpg_downlist->old_members;
stored_msg->left_nodes = req_exec_cpg_downlist->left_nodes;
memcpy (stored_msg->nodeids, req_exec_cpg_downlist->nodeids,
req_exec_cpg_downlist->left_nodes * sizeof (mar_uint32_t));
list_init (&stored_msg->list);
list_add (&stored_msg->list, &downlist_messages_head);
for (i = 0; i < my_member_list_entries; i++) {
found = 0;
for (iter = downlist_messages_head.next;
iter != &downlist_messages_head;
iter = iter->next) {
stored_msg = list_entry(iter, struct downlist_msg, list);
if (my_member_list[i] == stored_msg->sender_nodeid) {
found = 1;
}
}
if (!found) {
return;
}
}
downlist_master_choose_and_send ();
}
static void message_handler_req_exec_cpg_procjoin (
const void *message,
unsigned int nodeid)
{
const struct req_exec_cpg_procjoin *req_exec_cpg_procjoin = message;
log_printf(LOGSYS_LEVEL_DEBUG, "got procjoin message from cluster node %d\n", nodeid);
do_proc_join (&req_exec_cpg_procjoin->group_name,
req_exec_cpg_procjoin->pid, nodeid,
CONFCHG_CPG_REASON_JOIN);
}
static void message_handler_req_exec_cpg_procleave (
const void *message,
unsigned int nodeid)
{
const struct req_exec_cpg_procjoin *req_exec_cpg_procjoin = message;
struct process_info *pi;
struct list_head *iter;
mar_cpg_address_t notify_info;
log_printf(LOGSYS_LEVEL_DEBUG, "got procleave message from cluster node %d\n", nodeid);
notify_info.pid = req_exec_cpg_procjoin->pid;
notify_info.nodeid = nodeid;
notify_info.reason = req_exec_cpg_procjoin->reason;
notify_lib_joinlist(&req_exec_cpg_procjoin->group_name, NULL,
0, NULL,
1, &notify_info,
MESSAGE_RES_CPG_CONFCHG_CALLBACK);
for (iter = process_info_list_head.next; iter != &process_info_list_head; ) {
pi = list_entry(iter, struct process_info, list);
iter = iter->next;
if (pi->pid == req_exec_cpg_procjoin->pid && pi->nodeid == nodeid &&
mar_name_compare (&pi->group, &req_exec_cpg_procjoin->group_name)==0) {
list_del (&pi->list);
free (pi);
}
}
}
/* Got a proclist from another node */
static void message_handler_req_exec_cpg_joinlist (
const void *message_v,
unsigned int nodeid)
{
const char *message = message_v;
const struct qb_ipc_response_header *res = (const struct qb_ipc_response_header *)message;
const struct join_list_entry *jle = (const struct join_list_entry *)(message + sizeof(struct qb_ipc_response_header));
log_printf(LOGSYS_LEVEL_DEBUG, "got joinlist message from node %x\n",
nodeid);
/* Ignore our own messages */
if (nodeid == api->totem_nodeid_get()) {
return;
}
while ((const char*)jle < message + res->size) {
do_proc_join (&jle->group_name, jle->pid, nodeid,
CONFCHG_CPG_REASON_NODEUP);
jle++;
}
}
static void message_handler_req_exec_cpg_mcast (
const void *message,
unsigned int nodeid)
{
const struct req_exec_cpg_mcast *req_exec_cpg_mcast = message;
struct res_lib_cpg_deliver_callback res_lib_cpg_mcast;
int msglen = req_exec_cpg_mcast->msglen;
struct list_head *iter, *pi_iter;
struct cpg_pd *cpd;
struct iovec iovec[2];
int known_node = 0;
res_lib_cpg_mcast.header.id = MESSAGE_RES_CPG_DELIVER_CALLBACK;
res_lib_cpg_mcast.header.size = sizeof(res_lib_cpg_mcast) + msglen;
res_lib_cpg_mcast.msglen = msglen;
res_lib_cpg_mcast.pid = req_exec_cpg_mcast->pid;
res_lib_cpg_mcast.nodeid = nodeid;
memcpy(&res_lib_cpg_mcast.group_name, &req_exec_cpg_mcast->group_name,
sizeof(mar_cpg_name_t));
iovec[0].iov_base = (void *)&res_lib_cpg_mcast;
iovec[0].iov_len = sizeof (res_lib_cpg_mcast);
iovec[1].iov_base = (char*)message+sizeof(*req_exec_cpg_mcast);
iovec[1].iov_len = msglen;
for (iter = cpg_pd_list_head.next; iter != &cpg_pd_list_head; ) {
cpd = list_entry(iter, struct cpg_pd, list);
iter = iter->next;
if ((cpd->cpd_state == CPD_STATE_LEAVE_STARTED || cpd->cpd_state == CPD_STATE_JOIN_COMPLETED)
&& (mar_name_compare (&cpd->group_name, &req_exec_cpg_mcast->group_name) == 0)) {
if (!known_node) {
/* Try to find, if we know the node */
for (pi_iter = process_info_list_head.next;
pi_iter != &process_info_list_head; pi_iter = pi_iter->next) {
struct process_info *pi = list_entry (pi_iter, struct process_info, list);
if (pi->nodeid == nodeid &&
mar_name_compare (&pi->group, &req_exec_cpg_mcast->group_name) == 0) {
known_node = 1;
break;
}
}
}
if (!known_node) {
log_printf(LOGSYS_LEVEL_WARNING, "Unknown node -> we will not deliver message");
return ;
}
api->ipc_dispatch_iov_send (cpd->conn, iovec, 2);
}
}
}
static int cpg_exec_send_downlist(void)
{
struct iovec iov;
g_req_exec_cpg_downlist.header.id = SERVICE_ID_MAKE(CPG_SERVICE, MESSAGE_REQ_EXEC_CPG_DOWNLIST);
g_req_exec_cpg_downlist.header.size = sizeof(struct req_exec_cpg_downlist);
g_req_exec_cpg_downlist.old_members = my_old_member_list_entries;
iov.iov_base = (void *)&g_req_exec_cpg_downlist;
iov.iov_len = g_req_exec_cpg_downlist.header.size;
return (api->totem_mcast (&iov, 1, TOTEM_AGREED));
}
static int cpg_exec_send_joinlist(void)
{
int count = 0;
struct list_head *iter;
struct qb_ipc_response_header *res;
char *buf;
struct join_list_entry *jle;
struct iovec req_exec_cpg_iovec;
for (iter = process_info_list_head.next; iter != &process_info_list_head; iter = iter->next) {
struct process_info *pi = list_entry (iter, struct process_info, list);
if (pi->nodeid == api->totem_nodeid_get ()) {
count++;
}
}
/* Nothing to send */
if (!count)
return 0;
buf = alloca(sizeof(struct qb_ipc_response_header) + sizeof(struct join_list_entry) * count);
if (!buf) {
log_printf(LOGSYS_LEVEL_WARNING, "Unable to allocate joinlist buffer");
return -1;
}
jle = (struct join_list_entry *)(buf + sizeof(struct qb_ipc_response_header));
res = (struct qb_ipc_response_header *)buf;
for (iter = process_info_list_head.next; iter != &process_info_list_head; iter = iter->next) {
struct process_info *pi = list_entry (iter, struct process_info, list);
if (pi->nodeid == api->totem_nodeid_get ()) {
memcpy (&jle->group_name, &pi->group, sizeof (mar_cpg_name_t));
jle->pid = pi->pid;
jle++;
}
}
res->id = SERVICE_ID_MAKE(CPG_SERVICE, MESSAGE_REQ_EXEC_CPG_JOINLIST);
res->size = sizeof(struct qb_ipc_response_header)+sizeof(struct join_list_entry) * count;
req_exec_cpg_iovec.iov_base = buf;
req_exec_cpg_iovec.iov_len = res->size;
return (api->totem_mcast (&req_exec_cpg_iovec, 1, TOTEM_AGREED));
}
static int cpg_lib_init_fn (void *conn)
{
struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn);
memset (cpd, 0, sizeof(struct cpg_pd));
cpd->conn = conn;
list_add (&cpd->list, &cpg_pd_list_head);
list_init (&cpd->iteration_instance_list_head);
list_init (&cpd->zcb_mapped_list_head);
api->ipc_refcnt_inc (conn);
log_printf(LOGSYS_LEVEL_DEBUG, "lib_init_fn: conn=%p, cpd=%p\n", conn, cpd);
return (0);
}
/* Join message from the library */
static void message_handler_req_lib_cpg_join (void *conn, const void *message)
{
const struct req_lib_cpg_join *req_lib_cpg_join = message;
struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn);
struct res_lib_cpg_join res_lib_cpg_join;
cs_error_t error = CS_OK;
struct list_head *iter;
/* Test, if we don't have same pid and group name joined */
for (iter = cpg_pd_list_head.next; iter != &cpg_pd_list_head; iter = iter->next) {
struct cpg_pd *cpd_item = list_entry (iter, struct cpg_pd, list);
if (cpd_item->pid == req_lib_cpg_join->pid &&
mar_name_compare(&req_lib_cpg_join->group_name, &cpd_item->group_name) == 0) {
/* We have same pid and group name joined -> return error */
error = CS_ERR_EXIST;
goto response_send;
}
}
/*
* Same check must be done in process info list, because there may be not yet delivered
* leave of client.
*/
for (iter = process_info_list_head.next; iter != &process_info_list_head; iter = iter->next) {
struct process_info *pi = list_entry (iter, struct process_info, list);
if (pi->nodeid == api->totem_nodeid_get () && pi->pid == req_lib_cpg_join->pid &&
mar_name_compare(&req_lib_cpg_join->group_name, &pi->group) == 0) {
/* We have same pid and group name joined -> return error */
error = CS_ERR_TRY_AGAIN;
goto response_send;
}
}
switch (cpd->cpd_state) {
case CPD_STATE_UNJOINED:
error = CS_OK;
cpd->cpd_state = CPD_STATE_JOIN_STARTED;
cpd->pid = req_lib_cpg_join->pid;
cpd->flags = req_lib_cpg_join->flags;
memcpy (&cpd->group_name, &req_lib_cpg_join->group_name,
sizeof (cpd->group_name));
cpg_node_joinleave_send (req_lib_cpg_join->pid,
&req_lib_cpg_join->group_name,
MESSAGE_REQ_EXEC_CPG_PROCJOIN, CONFCHG_CPG_REASON_JOIN);
break;
case CPD_STATE_LEAVE_STARTED:
error = CS_ERR_BUSY;
break;
case CPD_STATE_JOIN_STARTED:
error = CS_ERR_EXIST;
break;
case CPD_STATE_JOIN_COMPLETED:
error = CS_ERR_EXIST;
break;
}
response_send:
res_lib_cpg_join.header.size = sizeof(res_lib_cpg_join);
res_lib_cpg_join.header.id = MESSAGE_RES_CPG_JOIN;
res_lib_cpg_join.header.error = error;
api->ipc_response_send (conn, &res_lib_cpg_join, sizeof(res_lib_cpg_join));
}
/* Leave message from the library */
static void message_handler_req_lib_cpg_leave (void *conn, const void *message)
{
struct res_lib_cpg_leave res_lib_cpg_leave;
cs_error_t error = CS_OK;
struct req_lib_cpg_leave *req_lib_cpg_leave = (struct req_lib_cpg_leave *)message;
struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn);
log_printf(LOGSYS_LEVEL_DEBUG, "got leave request on %p\n", conn);
switch (cpd->cpd_state) {
case CPD_STATE_UNJOINED:
error = CS_ERR_NOT_EXIST;
break;
case CPD_STATE_LEAVE_STARTED:
error = CS_ERR_NOT_EXIST;
break;
case CPD_STATE_JOIN_STARTED:
error = CS_ERR_BUSY;
break;
case CPD_STATE_JOIN_COMPLETED:
error = CS_OK;
cpd->cpd_state = CPD_STATE_LEAVE_STARTED;
cpg_node_joinleave_send (req_lib_cpg_leave->pid,
&req_lib_cpg_leave->group_name,
MESSAGE_REQ_EXEC_CPG_PROCLEAVE,
CONFCHG_CPG_REASON_LEAVE);
break;
}
/* send return */
res_lib_cpg_leave.header.size = sizeof(res_lib_cpg_leave);
res_lib_cpg_leave.header.id = MESSAGE_RES_CPG_LEAVE;
res_lib_cpg_leave.header.error = error;
api->ipc_response_send(conn, &res_lib_cpg_leave, sizeof(res_lib_cpg_leave));
}
/* Finalize message from library */
static void message_handler_req_lib_cpg_finalize (
void *conn,
const void *message)
{
struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn);
struct res_lib_cpg_finalize res_lib_cpg_finalize;
cs_error_t error = CS_OK;
log_printf (LOGSYS_LEVEL_DEBUG, "cpg finalize for conn=%p\n", conn);
/*
* We will just remove cpd from list. After this call, connection will be
* closed on lib side, and cpg_lib_exit_fn will be called
*/
list_del (&cpd->list);
list_init (&cpd->list);
res_lib_cpg_finalize.header.size = sizeof (res_lib_cpg_finalize);
res_lib_cpg_finalize.header.id = MESSAGE_RES_CPG_FINALIZE;
res_lib_cpg_finalize.header.error = error;
api->ipc_response_send (conn, &res_lib_cpg_finalize,
sizeof (res_lib_cpg_finalize));
}
static int
memory_map (
const char *path,
size_t bytes,
void **buf)
{
int32_t fd;
void *addr_orig;
void *addr;
int32_t res;
fd = open (path, O_RDWR, 0600);
unlink (path);
if (fd == -1) {
return (-1);
}
res = ftruncate (fd, bytes);
if (res == -1) {
goto error_close_unlink;
}
addr_orig = mmap (NULL, bytes, PROT_NONE,
MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
if (addr_orig == MAP_FAILED) {
goto error_close_unlink;
}
addr = mmap (addr_orig, bytes, PROT_READ | PROT_WRITE,
MAP_FIXED | MAP_SHARED, fd, 0);
if (addr != addr_orig) {
munmap(addr_orig, bytes);
goto error_close_unlink;
}
#ifdef COROSYNC_BSD
madvise(addr, bytes, MADV_NOSYNC);
#endif
res = close (fd);
if (res) {
return (-1);
}
*buf = addr_orig;
return (0);
error_close_unlink:
close (fd);
unlink(path);
return -1;
}
static inline int zcb_alloc (
struct cpg_pd *cpd,
const char *path_to_file,
size_t size,
void **addr)
{
struct zcb_mapped *zcb_mapped;
unsigned int res;
zcb_mapped = malloc (sizeof (struct zcb_mapped));
if (zcb_mapped == NULL) {
return (-1);
}
res = memory_map (
path_to_file,
size,
addr);
if (res == -1) {
free (zcb_mapped);
return (-1);
}
list_init (&zcb_mapped->list);
zcb_mapped->addr = *addr;
zcb_mapped->size = size;
list_add_tail (&zcb_mapped->list, &cpd->zcb_mapped_list_head);
return (0);
}
static inline int zcb_free (struct zcb_mapped *zcb_mapped)
{
unsigned int res;
res = munmap (zcb_mapped->addr, zcb_mapped->size);
list_del (&zcb_mapped->list);
free (zcb_mapped);
return (res);
}
static inline int zcb_by_addr_free (struct cpg_pd *cpd, void *addr)
{
struct list_head *list;
struct zcb_mapped *zcb_mapped;
unsigned int res = 0;
for (list = cpd->zcb_mapped_list_head.next;
list != &cpd->zcb_mapped_list_head; list = list->next) {
zcb_mapped = list_entry (list, struct zcb_mapped, list);
if (zcb_mapped->addr == addr) {
res = zcb_free (zcb_mapped);
break;
}
}
return (res);
}
static inline int zcb_all_free (
struct cpg_pd *cpd)
{
struct list_head *list;
struct zcb_mapped *zcb_mapped;
for (list = cpd->zcb_mapped_list_head.next;
list != &cpd->zcb_mapped_list_head;) {
zcb_mapped = list_entry (list, struct zcb_mapped, list);
list = list->next;
zcb_free (zcb_mapped);
}
return (0);
}
union u {
uint64_t server_addr;
void *server_ptr;
};
static uint64_t void2serveraddr (void *server_ptr)
{
union u u;
u.server_ptr = server_ptr;
return (u.server_addr);
}
static void *serveraddr2void (uint64_t server_addr)
{
union u u;
u.server_addr = server_addr;
return (u.server_ptr);
};
static void message_handler_req_lib_cpg_zc_alloc (
void *conn,
const void *message)
{
mar_req_coroipcc_zc_alloc_t *hdr = (mar_req_coroipcc_zc_alloc_t *)message;
struct qb_ipc_response_header res_header;
void *addr = NULL;
struct coroipcs_zc_header *zc_header;
unsigned int res;
struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn);
log_printf(LOGSYS_LEVEL_DEBUG, "path: %s", hdr->path_to_file);
res = zcb_alloc (cpd, hdr->path_to_file, hdr->map_size,
&addr);
assert(res == 0);
zc_header = (struct coroipcs_zc_header *)addr;
zc_header->server_address = void2serveraddr(addr);
res_header.size = sizeof (struct qb_ipc_response_header);
res_header.id = 0;
api->ipc_response_send (conn,
&res_header,
res_header.size);
}
static void message_handler_req_lib_cpg_zc_free (
void *conn,
const void *message)
{
mar_req_coroipcc_zc_free_t *hdr = (mar_req_coroipcc_zc_free_t *)message;
struct qb_ipc_response_header res_header;
void *addr = NULL;
struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn);
log_printf(LOGSYS_LEVEL_DEBUG, " free'ing");
addr = serveraddr2void (hdr->server_address);
zcb_by_addr_free (cpd, addr);
res_header.size = sizeof (struct qb_ipc_response_header);
res_header.id = 0;
api->ipc_response_send (
conn, &res_header,
res_header.size);
}
/* Mcast message from the library */
static void message_handler_req_lib_cpg_mcast (void *conn, const void *message)
{
const struct req_lib_cpg_mcast *req_lib_cpg_mcast = message;
struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn);
mar_cpg_name_t group_name = cpd->group_name;
struct iovec req_exec_cpg_iovec[2];
struct req_exec_cpg_mcast req_exec_cpg_mcast;
int msglen = req_lib_cpg_mcast->msglen;
int result;
cs_error_t error = CS_ERR_NOT_EXIST;
log_printf(LOGSYS_LEVEL_DEBUG, "got mcast request on %p\n", conn);
switch (cpd->cpd_state) {
case CPD_STATE_UNJOINED:
error = CS_ERR_NOT_EXIST;
break;
case CPD_STATE_LEAVE_STARTED:
error = CS_ERR_NOT_EXIST;
break;
case CPD_STATE_JOIN_STARTED:
error = CS_OK;
break;
case CPD_STATE_JOIN_COMPLETED:
error = CS_OK;
break;
}
if (error == CS_OK) {
req_exec_cpg_mcast.header.size = sizeof(req_exec_cpg_mcast) + msglen;
req_exec_cpg_mcast.header.id = SERVICE_ID_MAKE(CPG_SERVICE,
MESSAGE_REQ_EXEC_CPG_MCAST);
req_exec_cpg_mcast.pid = cpd->pid;
req_exec_cpg_mcast.msglen = msglen;
api->ipc_source_set (&req_exec_cpg_mcast.source, conn);
memcpy(&req_exec_cpg_mcast.group_name, &group_name,
sizeof(mar_cpg_name_t));
req_exec_cpg_iovec[0].iov_base = (char *)&req_exec_cpg_mcast;
req_exec_cpg_iovec[0].iov_len = sizeof(req_exec_cpg_mcast);
req_exec_cpg_iovec[1].iov_base = (char *)&req_lib_cpg_mcast->message;
req_exec_cpg_iovec[1].iov_len = msglen;
result = api->totem_mcast (req_exec_cpg_iovec, 2, TOTEM_AGREED);
assert(result == 0);
} else {
log_printf(LOGSYS_LEVEL_ERROR, "*** %p can't mcast to group %s state:%d, error:%d\n",
conn, group_name.value, cpd->cpd_state, error);
}
}
static void message_handler_req_lib_cpg_zc_execute (
void *conn,
const void *message)
{
mar_req_coroipcc_zc_execute_t *hdr = (mar_req_coroipcc_zc_execute_t *)message;
struct qb_ipc_request_header *header;
struct res_lib_cpg_mcast res_lib_cpg_mcast;
struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn);
struct iovec req_exec_cpg_iovec[2];
struct req_exec_cpg_mcast req_exec_cpg_mcast;
struct req_lib_cpg_mcast *req_lib_cpg_mcast;
int result;
cs_error_t error = CS_ERR_NOT_EXIST;
log_printf(LOGSYS_LEVEL_DEBUG, "got ZC mcast request on %p\n", conn);
header = (struct qb_ipc_request_header *)(((char *)serveraddr2void(hdr->server_address) + sizeof (struct coroipcs_zc_header)));
req_lib_cpg_mcast = (struct req_lib_cpg_mcast *)header;
switch (cpd->cpd_state) {
case CPD_STATE_UNJOINED:
error = CS_ERR_NOT_EXIST;
break;
case CPD_STATE_LEAVE_STARTED:
error = CS_ERR_NOT_EXIST;
break;
case CPD_STATE_JOIN_STARTED:
error = CS_OK;
break;
case CPD_STATE_JOIN_COMPLETED:
error = CS_OK;
break;
}
res_lib_cpg_mcast.header.size = sizeof(res_lib_cpg_mcast);
res_lib_cpg_mcast.header.id = MESSAGE_RES_CPG_MCAST;
if (error == CS_OK) {
req_exec_cpg_mcast.header.size = sizeof(req_exec_cpg_mcast) + req_lib_cpg_mcast->msglen;
req_exec_cpg_mcast.header.id = SERVICE_ID_MAKE(CPG_SERVICE,
MESSAGE_REQ_EXEC_CPG_MCAST);
req_exec_cpg_mcast.pid = cpd->pid;
req_exec_cpg_mcast.msglen = req_lib_cpg_mcast->msglen;
api->ipc_source_set (&req_exec_cpg_mcast.source, conn);
memcpy(&req_exec_cpg_mcast.group_name, &cpd->group_name,
sizeof(mar_cpg_name_t));
req_exec_cpg_iovec[0].iov_base = (char *)&req_exec_cpg_mcast;
req_exec_cpg_iovec[0].iov_len = sizeof(req_exec_cpg_mcast);
req_exec_cpg_iovec[1].iov_base = (char *)header + sizeof(struct req_lib_cpg_mcast);
req_exec_cpg_iovec[1].iov_len = req_exec_cpg_mcast.msglen;
result = api->totem_mcast (req_exec_cpg_iovec, 2, TOTEM_AGREED);
if (result == 0) {
res_lib_cpg_mcast.header.error = CS_OK;
} else {
res_lib_cpg_mcast.header.error = CS_ERR_TRY_AGAIN;
}
} else {
res_lib_cpg_mcast.header.error = error;
}
api->ipc_response_send (conn, &res_lib_cpg_mcast,
sizeof (res_lib_cpg_mcast));
}
static void message_handler_req_lib_cpg_membership (void *conn,
const void *message)
{
struct req_lib_cpg_membership_get *req_lib_cpg_membership_get =
(struct req_lib_cpg_membership_get *)message;
struct res_lib_cpg_membership_get res_lib_cpg_membership_get;
struct list_head *iter;
int member_count = 0;
res_lib_cpg_membership_get.header.id = MESSAGE_RES_CPG_MEMBERSHIP;
res_lib_cpg_membership_get.header.error = CS_OK;
res_lib_cpg_membership_get.header.size =
sizeof (struct req_lib_cpg_membership_get);
for (iter = process_info_list_head.next;
iter != &process_info_list_head; iter = iter->next) {
struct process_info *pi = list_entry (iter, struct process_info, list);
if (mar_name_compare (&pi->group, &req_lib_cpg_membership_get->group_name) == 0) {
res_lib_cpg_membership_get.member_list[member_count].nodeid = pi->nodeid;
res_lib_cpg_membership_get.member_list[member_count].pid = pi->pid;
member_count += 1;
}
}
res_lib_cpg_membership_get.member_count = member_count;
api->ipc_response_send (conn, &res_lib_cpg_membership_get,
sizeof (res_lib_cpg_membership_get));
}
static void message_handler_req_lib_cpg_local_get (void *conn,
const void *message)
{
struct res_lib_cpg_local_get res_lib_cpg_local_get;
res_lib_cpg_local_get.header.size = sizeof (res_lib_cpg_local_get);
res_lib_cpg_local_get.header.id = MESSAGE_RES_CPG_LOCAL_GET;
res_lib_cpg_local_get.header.error = CS_OK;
res_lib_cpg_local_get.local_nodeid = api->totem_nodeid_get ();
api->ipc_response_send (conn, &res_lib_cpg_local_get,
sizeof (res_lib_cpg_local_get));
}
static void message_handler_req_lib_cpg_iteration_initialize (
void *conn,
const void *message)
{
const struct req_lib_cpg_iterationinitialize *req_lib_cpg_iterationinitialize = message;
struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn);
hdb_handle_t cpg_iteration_handle = 0;
struct res_lib_cpg_iterationinitialize res_lib_cpg_iterationinitialize;
struct list_head *iter, *iter2;
struct cpg_iteration_instance *cpg_iteration_instance;
cs_error_t error = CS_OK;
int res;
log_printf (LOGSYS_LEVEL_DEBUG, "cpg iteration initialize\n");
/* Because between calling this function and *next can be some operations which will
* change list, we must do full copy.
*/
/*
* Create new iteration instance
*/
res = hdb_handle_create (&cpg_iteration_handle_t_db, sizeof (struct cpg_iteration_instance),
&cpg_iteration_handle);
if (res != 0) {
error = CS_ERR_NO_MEMORY;
goto response_send;
}
res = hdb_handle_get (&cpg_iteration_handle_t_db, cpg_iteration_handle, (void *)&cpg_iteration_instance);
if (res != 0) {
error = CS_ERR_BAD_HANDLE;
goto error_destroy;
}
list_init (&cpg_iteration_instance->items_list_head);
cpg_iteration_instance->handle = cpg_iteration_handle;
/*
* Create copy of process_info list "grouped by" group name
*/
for (iter = process_info_list_head.next; iter != &process_info_list_head; iter = iter->next) {
struct process_info *pi = list_entry (iter, struct process_info, list);
struct process_info *new_pi;
if (req_lib_cpg_iterationinitialize->iteration_type == CPG_ITERATION_NAME_ONLY) {
/*
* Try to find processed group name in our list new list
*/
int found = 0;
for (iter2 = cpg_iteration_instance->items_list_head.next;
iter2 != &cpg_iteration_instance->items_list_head;
iter2 = iter2->next) {
struct process_info *pi2 = list_entry (iter2, struct process_info, list);
if (mar_name_compare (&pi2->group, &pi->group) == 0) {
found = 1;
break;
}
}
if (found) {
/*
* We have this name in list -> don't add
*/
continue ;
}
} else if (req_lib_cpg_iterationinitialize->iteration_type == CPG_ITERATION_ONE_GROUP) {
/*
* Test pi group name with request
*/
if (mar_name_compare (&pi->group, &req_lib_cpg_iterationinitialize->group_name) != 0)
/*
* Not same -> don't add
*/
continue ;
}
new_pi = malloc (sizeof (struct process_info));
if (!new_pi) {
log_printf(LOGSYS_LEVEL_WARNING, "Unable to allocate process_info struct");
error = CS_ERR_NO_MEMORY;
goto error_put_destroy;
}
memcpy (new_pi, pi, sizeof (struct process_info));
list_init (&new_pi->list);
if (req_lib_cpg_iterationinitialize->iteration_type == CPG_ITERATION_NAME_ONLY) {
/*
* pid and nodeid -> undefined
*/
new_pi->pid = new_pi->nodeid = 0;
}
/*
* We will return list "grouped" by "group name", so try to find right place to add
*/
for (iter2 = cpg_iteration_instance->items_list_head.next;
iter2 != &cpg_iteration_instance->items_list_head;
iter2 = iter2->next) {
struct process_info *pi2 = list_entry (iter2, struct process_info, list);
if (mar_name_compare (&pi2->group, &pi->group) == 0) {
break;
}
}
list_add (&new_pi->list, iter2);
}
/*
* Now we have a full "grouped by" copy of process_info list
*/
/*
* Add instance to current cpd list
*/
list_init (&cpg_iteration_instance->list);
list_add (&cpg_iteration_instance->list, &cpd->iteration_instance_list_head);
cpg_iteration_instance->current_pointer = &cpg_iteration_instance->items_list_head;
error_put_destroy:
hdb_handle_put (&cpg_iteration_handle_t_db, cpg_iteration_handle);
error_destroy:
if (error != CS_OK) {
hdb_handle_destroy (&cpg_iteration_handle_t_db, cpg_iteration_handle);
}
response_send:
res_lib_cpg_iterationinitialize.header.size = sizeof (res_lib_cpg_iterationinitialize);
res_lib_cpg_iterationinitialize.header.id = MESSAGE_RES_CPG_ITERATIONINITIALIZE;
res_lib_cpg_iterationinitialize.header.error = error;
res_lib_cpg_iterationinitialize.iteration_handle = cpg_iteration_handle;
api->ipc_response_send (conn, &res_lib_cpg_iterationinitialize,
sizeof (res_lib_cpg_iterationinitialize));
}
static void message_handler_req_lib_cpg_iteration_next (
void *conn,
const void *message)
{
const struct req_lib_cpg_iterationnext *req_lib_cpg_iterationnext = message;
struct res_lib_cpg_iterationnext res_lib_cpg_iterationnext;
struct cpg_iteration_instance *cpg_iteration_instance;
cs_error_t error = CS_OK;
int res;
struct process_info *pi;
log_printf (LOGSYS_LEVEL_DEBUG, "cpg iteration next\n");
res = hdb_handle_get (&cpg_iteration_handle_t_db,
req_lib_cpg_iterationnext->iteration_handle,
(void *)&cpg_iteration_instance);
if (res != 0) {
error = CS_ERR_LIBRARY;
goto error_exit;
}
assert (cpg_iteration_instance);
cpg_iteration_instance->current_pointer = cpg_iteration_instance->current_pointer->next;
if (cpg_iteration_instance->current_pointer == &cpg_iteration_instance->items_list_head) {
error = CS_ERR_NO_SECTIONS;
goto error_put;
}
pi = list_entry (cpg_iteration_instance->current_pointer, struct process_info, list);
/*
* Copy iteration data
*/
res_lib_cpg_iterationnext.description.nodeid = pi->nodeid;
res_lib_cpg_iterationnext.description.pid = pi->pid;
memcpy (&res_lib_cpg_iterationnext.description.group,
&pi->group,
sizeof (mar_cpg_name_t));
error_put:
hdb_handle_put (&cpg_iteration_handle_t_db, req_lib_cpg_iterationnext->iteration_handle);
error_exit:
res_lib_cpg_iterationnext.header.size = sizeof (res_lib_cpg_iterationnext);
res_lib_cpg_iterationnext.header.id = MESSAGE_RES_CPG_ITERATIONNEXT;
res_lib_cpg_iterationnext.header.error = error;
api->ipc_response_send (conn, &res_lib_cpg_iterationnext,
sizeof (res_lib_cpg_iterationnext));
}
static void message_handler_req_lib_cpg_iteration_finalize (
void *conn,
const void *message)
{
const struct req_lib_cpg_iterationfinalize *req_lib_cpg_iterationfinalize = message;
struct res_lib_cpg_iterationfinalize res_lib_cpg_iterationfinalize;
struct cpg_iteration_instance *cpg_iteration_instance;
cs_error_t error = CS_OK;
int res;
log_printf (LOGSYS_LEVEL_DEBUG, "cpg iteration finalize\n");
res = hdb_handle_get (&cpg_iteration_handle_t_db,
req_lib_cpg_iterationfinalize->iteration_handle,
(void *)&cpg_iteration_instance);
if (res != 0) {
error = CS_ERR_LIBRARY;
goto error_exit;
}
assert (cpg_iteration_instance);
cpg_iteration_instance_finalize (cpg_iteration_instance);
hdb_handle_put (&cpg_iteration_handle_t_db, cpg_iteration_instance->handle);
error_exit:
res_lib_cpg_iterationfinalize.header.size = sizeof (res_lib_cpg_iterationfinalize);
res_lib_cpg_iterationfinalize.header.id = MESSAGE_RES_CPG_ITERATIONFINALIZE;
res_lib_cpg_iterationfinalize.header.error = error;
api->ipc_response_send (conn, &res_lib_cpg_iterationfinalize,
sizeof (res_lib_cpg_iterationfinalize));
}
diff --git a/test/Makefile.am b/test/Makefile.am
index c79071f4..1b28643c 100644
--- a/test/Makefile.am
+++ b/test/Makefile.am
@@ -1,101 +1,97 @@
#
# Copyright (c) 2009 Red Hat, Inc.
#
# Authors: Andrew Beekhof
# Steven Dake (sdake@redhat.com)
#
# This software licensed under BSD license, the text of which follows:
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# - Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# - Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# - Neither the name of the MontaVista Software, Inc. nor the names of its
# contributors may be used to endorse or promote products derived from this
# software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
# THE POSSIBILITY OF SUCH DAMAGE.
MAINTAINERCLEANFILES = Makefile.in
INCLUDES = -I$(top_builddir)/include/corosync -I$(top_srcdir)/include
noinst_PROGRAMS = testevs evsbench evsverify cpgverify testcpg testcpg2 cpgbench testconfdb \
- logsysbench logsysrec testquorum testvotequorum1 testvotequorum2 \
- logsys_s logsys_t1 logsys_t2 \
+ testquorum testvotequorum1 testvotequorum2 \
stress_cpgfdget stress_cpgcontext cpgbound testsam \
- testcpgzc cpgbenchzc testzcgc stress_cpgzc
+ testcpgzc cpgbenchzc testzcgc stress_cpgzc \
+ logsys_s logsys_t1 logsys_t2
testevs_LDADD = -levs $(LIBQB_LIBS)
testevs_LDFLAGS = -L../lib
testcpg_LDADD = -lcpg $(LIBQB_LIBS)
testcpg_LDFLAGS = -L../lib
testcpg2_LDADD = -lcpg $(LIBQB_LIBS)
testcpg2_LDFLAGS = -L../lib
testcpgzc_LDADD = -lcpg $(LIBQB_LIBS)
testcpgzc_LDFLAGS = -L../lib
testzcgc_LDADD = -lcpg $(LIBQB_LIBS)
testzcgc_LDFLAGS = -L../lib
stress_cpgzc_LDADD = -lcpg $(LIBQB_LIBS)
stress_cpgzc_LDFLAGS = -L../lib
stress_cpgfdget_LDADD = -lcpg $(LIBQB_LIBS)
stress_cpgfdget_LDFLAGS = -L../lib
stress_cpgcontext_LDADD = -lcpg $(LIBQB_LIBS)
stress_cpgcontext_LDFLAGS = -L../lib
testconfdb_LDADD = -lconfdb ../lcr/liblcr.a $(LIBQB_LIBS)
testconfdb_LDFLAGS = -L../lib
testquorum_LDADD = -lquorum $(LIBQB_LIBS)
testquorum_LDFLAGS = -L../lib
testvotequorum1_LDADD = -lvotequorum $(LIBQB_LIBS)
testvotequorum1_LDFLAGS = -L../lib
testvotequorum2_LDADD = -lvotequorum $(LIBQB_LIBS)
testvotequorum2_LDFLAGS = -L../lib
evsverify_LDADD = -levs -ltotem_pg $(LIBQB_LIBS)
evsverify_LDFLAGS = -L../lib -L../exec
cpgverify_LDADD = -lcpg -ltotem_pg $(LIBQB_LIBS)
cpgverify_LDFLAGS = -L../lib -L../exec
cpgbound_LDADD = -lcpg $(LIBQB_LIBS)
cpgbound_LDFLAGS = -L../lib
evsbench_LDADD = -levs $(LIBQB_LIBS)
evsbench_LDFLAGS = -L../lib
cpgbench_LDADD = -lcpg $(LIBQB_LIBS)
cpgbench_LDFLAGS = -L../lib
cpgbenchzc_LDADD = -lcpg $(LIBQB_LIBS)
cpgbenchzc_LDFLAGS = -L../lib
-logsysbench_LDADD = -llogsys
-logsysbench_LDFLAGS = -L../exec
-logsysrec_LDADD = -llogsys
-logsysrec_LDFLAGS = -L../exec
logsys_s_SOURCES = logsys_s.c logsys_s1.c logsys_s2.c
-logsys_s_LDADD = -llogsys
+logsys_s_LDADD = -llogsys $(LIBQB_LIBS)
logsys_s_LDFLAGS = -L../exec
-logsys_t1_LDADD = -llogsys
+logsys_t1_LDADD = -llogsys $(LIBQB_LIBS)
logsys_t1_LDFLAGS = -L../exec
-logsys_t2_LDADD = -llogsys
+logsys_t2_LDADD = -llogsys $(LIBQB_LIBS)
logsys_t2_LDFLAGS = -L../exec
testsam_LDADD = -lsam -lconfdb -lquorum $(LIBQB_LIBS)
testsam_LDFLAGS = -L../lib
LINT_FILES1:=$(filter-out sa_error.c, $(wildcard *.c))
LINT_FILES2:=$(filter-out testevsth.c, $(LINT_FILES1))
LINT_FILES:=$(filter-out testparse.c, $(LINT_FILES2))
lint:
-for f in $(LINT_FILES) ; do echo Splint $$f ; splint $(INCLUDES) $(LINT_FLAGS) $(CFLAGS) $$f ; done
clean-local:
rm -f fdata
diff --git a/test/logsys_s.c b/test/logsys_s.c
index f4e42202..7eabacd0 100644
--- a/test/logsys_s.c
+++ b/test/logsys_s.c
@@ -1,60 +1,57 @@
/*
* Copyright (c) 2007 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Steven Dake (sdake@redhat.com)
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <config.h>
#include <stdio.h>
#include <syslog.h>
+#include <assert.h>
#include <corosync/engine/logsys.h>
LOGSYS_DECLARE_SYSTEM ("logsystestsubsystems",
LOGSYS_MODE_OUTPUT_STDERR | LOGSYS_MODE_OUTPUT_SYSLOG,
- 0,
- NULL,
- LOGSYS_LEVEL_INFO,
LOG_DAEMON,
- LOGSYS_LEVEL_INFO,
- NULL,
- 1000000);
+ LOGSYS_LEVEL_INFO);
+LOGSYS_DECLARE_SUBSYS ("MAIN");
extern void logsys_s1_print (void);
extern void logsys_s2_print (void);
int main (void) {
- logsys_fork_completed();
+ qb_log_thread_start();
logsys_s1_print();
logsys_s2_print();
return (0);
}
diff --git a/test/logsys_s1.c b/test/logsys_s1.c
index 296b189e..fb689dd7 100644
--- a/test/logsys_s1.c
+++ b/test/logsys_s1.c
@@ -1,49 +1,50 @@
/*
* Copyright (c) 2007 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Steven Dake (sdake@redhat.com)
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <config.h>
#include <stdio.h>
#include <syslog.h>
+#include <assert.h>
#include <corosync/engine/logsys.h>
void logsys_s1_print (void);
LOGSYS_DECLARE_SUBSYS ("SYS1");
void logsys_s1_print (void) {
log_printf (LOGSYS_LEVEL_ALERT, "This is an alert log message\n");
log_printf (LOGSYS_LEVEL_WARNING, "This is a warning log message\n");
}
diff --git a/test/logsys_s2.c b/test/logsys_s2.c
index a680ad96..80828865 100644
--- a/test/logsys_s2.c
+++ b/test/logsys_s2.c
@@ -1,51 +1,52 @@
/*
* Copyright (c) 2007 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Steven Dake (sdake@redhat.com)
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <config.h>
#include <stdio.h>
#include <syslog.h>
+#include <assert.h>
#include <corosync/engine/logsys.h>
void logsys_s2_print (void);
LOGSYS_DECLARE_SUBSYS ("SYS2");
void logsys_s2_print (void) {
logsys_config_logfile_priority_set("SYS2", LOGSYS_LEVEL_DEBUG);
log_printf (LOGSYS_LEVEL_ALERT, "This is an alert log message\n");
log_printf (LOGSYS_LEVEL_WARNING, "This is a warning log message\n");
log_printf (LOGSYS_LEVEL_DEBUG, "This is a debug log message\n");
}
diff --git a/test/logsys_t1.c b/test/logsys_t1.c
index 268086b9..06b2c59b 100644
--- a/test/logsys_t1.c
+++ b/test/logsys_t1.c
@@ -1,57 +1,54 @@
/*
* Copyright (c) 2007 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Steven Dake (sdake@redhat.com)
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <config.h>
#include <stdio.h>
#include <syslog.h>
+#include <assert.h>
#include <corosync/engine/logsys.h>
LOGSYS_DECLARE_SYSTEM ("logsystestNOsubsystems",
LOGSYS_MODE_OUTPUT_STDERR | LOGSYS_MODE_OUTPUT_SYSLOG,
- 0,
- NULL,
- LOGSYS_LEVEL_DEBUG,
LOG_DAEMON,
- LOGSYS_LEVEL_DEBUG,
- NULL,
- 1000000);
+ LOGSYS_LEVEL_DEBUG);
+LOGSYS_DECLARE_SUBSYS("MAIN");
int main (void) {
log_printf (LOGSYS_LEVEL_ALERT, "This is an alert log message\n");
log_printf (LOGSYS_LEVEL_WARNING, "This is a warning log message\n");
log_printf (LOGSYS_LEVEL_DEBUG, "This is a debug log message\n");
return (0);
}
diff --git a/test/logsys_t2.c b/test/logsys_t2.c
index 8cb6095d..fcee399a 100644
--- a/test/logsys_t2.c
+++ b/test/logsys_t2.c
@@ -1,83 +1,85 @@
/*
* Copyright (c) 2007 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Lon Hohberger (lhh@redhat.com)
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <config.h>
#include <stdio.h>
#include <stdint.h>
+#include <assert.h>
#include <corosync/engine/logsys.h>
LOGSYS_DECLARE_SYSTEM ("logtest_t2",
LOGSYS_MODE_OUTPUT_STDERR | LOGSYS_MODE_THREADED,
- 0,
- NULL,
- LOGSYS_LEVEL_INFO,
LOG_DAEMON,
- LOGSYS_LEVEL_INFO,
- NULL,
- 1000000);
+ LOGSYS_LEVEL_INFO);
+
+LOGSYS_DECLARE_SUBSYS("MAIN");
int
main(int argc, char **argv)
{
/*
* fork could occur here and the file to output to could be set
*/
logsys_config_mode_set (NULL, LOGSYS_MODE_OUTPUT_STDERR | LOGSYS_MODE_THREADED);
+
log_printf(LOGSYS_LEVEL_NOTICE, "Hello, world!\n");
log_printf(LOGSYS_LEVEL_DEBUG, "If you see this, the logger's busted\n");
logsys_config_logfile_priority_set (NULL, LOGSYS_LEVEL_ALERT);
+ logsys_config_apply();
log_printf(LOGSYS_LEVEL_DEBUG, "If you see this, the logger's busted\n");
log_printf(LOGSYS_LEVEL_CRIT, "If you see this, the logger's busted\n");
log_printf(LOGSYS_LEVEL_ALERT, "Alert 1\n");
logsys_config_logfile_priority_set (NULL, LOGSYS_LEVEL_NOTICE);
+ logsys_config_apply();
log_printf(LOGSYS_LEVEL_CRIT, "Crit 1\n");
log_printf(LOGSYS_LEVEL_INFO, "If you see this, the logger's busted\n");
logsys_config_logfile_priority_set (NULL, LOGSYS_LEVEL_DEBUG);
+ logsys_config_apply();
log_printf(LOGSYS_LEVEL_DEBUG, "Debug 1\n");
logsys_config_mode_set (NULL, LOGSYS_MODE_OUTPUT_STDERR);
log_printf(LOGSYS_LEVEL_DEBUG, "Debug 2\n");
return 0;
}
diff --git a/test/logsysbench.c b/test/logsysbench.c
deleted file mode 100644
index 1d6dac8d..00000000
--- a/test/logsysbench.c
+++ /dev/null
@@ -1,177 +0,0 @@
-/*
- * Copyright (c) 2008, 2009 Red Hat, Inc.
- *
- * All rights reserved.
- *
- * Author: Steven Dake (sdake@redhat.com)
- *
- * This software licensed under BSD license, the text of which follows:
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * - Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- * - Neither the name of the MontaVista Software, Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from this
- * software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
- * THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <config.h>
-
-#include <stdio.h>
-#include <stdint.h>
-#include <string.h>
-#include <sys/time.h>
-#include <time.h>
-#include <corosync/engine/logsys.h>
-
-LOGSYS_DECLARE_SYSTEM ("logtest_rec",
- LOGSYS_MODE_OUTPUT_STDERR | LOGSYS_MODE_THREADED,
- 0, /* debug */
- NULL,
- LOGSYS_LEVEL_INFO, /* logfile_priority */
- LOG_DAEMON, /* syslog facility */
- LOGSYS_LEVEL_INFO, /* syslog level */
- NULL, /* use default format */
- 1000000); /* flight recorder size */
-
-#define LOGREC_ID_CHECKPOINT_CREATE 2
-#define LOGREC_ARGS_CHECKPOINT_CREATE 2
-#define ITERATIONS 1000000
-
-static struct timeval tv1, tv2, tv_elapsed;
-
-#ifndef timersub
-#define timersub(a, b, result) \
-do { \
- (result)->tv_sec = (a)->tv_sec - (b)->tv_sec; \
- (result)->tv_usec = (a)->tv_usec - (b)->tv_usec; \
- if ((result)->tv_usec < 0) { \
- --(result)->tv_sec; \
- (result)->tv_usec += 1000000; \
- } \
-} while (0)
-#endif
-
-static void bm_start (void)
-{
- gettimeofday (&tv1, NULL);
-}
-static void bm_finish (const char *operation)
-{
- gettimeofday (&tv2, NULL);
- timersub (&tv2, &tv1, &tv_elapsed);
-
- if (strlen (operation) > 22) {
- printf ("%s\t\t", operation);
- } else {
- printf ("%s\t\t\t", operation);
- }
- printf ("%9.3f operations/sec\n",
- ((float)ITERATIONS) / (tv_elapsed.tv_sec + (tv_elapsed.tv_usec / 1000000.0)));
-}
-
-static char buffer[256];
-int main (void)
-{
- int i;
- char buf[1024];
-
-
- printf ("heating up cache with logrec functionality\n");
- for (i = 0; i < ITERATIONS; i++) {
- log_rec (LOGREC_ID_CHECKPOINT_CREATE,
- "recordA", 8, "recordB", 8, LOGSYS_REC_END);
- }
- bm_start();
- for (i = 0; i < ITERATIONS; i++) {
- log_rec (LOGREC_ID_CHECKPOINT_CREATE,
- buffer, 7, LOGSYS_REC_END);
- }
- bm_finish ("log_rec 1 arguments:");
- bm_start();
- for (i = 0; i < ITERATIONS; i++) {
- log_rec (LOGREC_ID_CHECKPOINT_CREATE,
- "recordA", 8, LOGSYS_REC_END);
- }
- bm_finish ("log_rec 2 arguments:");
- bm_start();
- for (i = 0; i < 10; i++) {
- log_rec (LOGREC_ID_CHECKPOINT_CREATE,
- "recordA", 8, "recordB", 8, LOGSYS_REC_END);
- }
- bm_start();
- for (i = 0; i < ITERATIONS; i++) {
- log_rec (LOGREC_ID_CHECKPOINT_CREATE,
- "recordA", 8, "recordB", 8, "recordC", 8, LOGSYS_REC_END);
- }
- bm_finish ("log_rec 3 arguments:");
- bm_start();
- for (i = 0; i < ITERATIONS; i++) {
- log_rec (LOGREC_ID_CHECKPOINT_CREATE,
- "recordA", 8, "recordB", 8, "recordC", 8, "recordD", 8, LOGSYS_REC_END);
- }
- bm_finish ("log_rec 4 arguments:");
-
- /*
- * sprintf testing
- */
- printf ("heating up cache with sprintf functionality\n");
- for (i = 0; i < ITERATIONS; i++) {
- snprintf (buf, sizeof(buf), "Some logging information %s", "recordA");
- }
- bm_start();
- for (i = 0; i < ITERATIONS; i++) {
- snprintf (buf, sizeof(buf), "Some logging information %s", "recordA");
- }
- bm_finish ("sprintf 1 argument:");
- bm_start();
- for (i = 0; i < ITERATIONS; i++) {
- sprintf (buf, "Some logging information %s %s", "recordA", "recordB");
- }
- bm_finish ("sprintf 2 arguments:");
- bm_start();
- for (i = 0; i < ITERATIONS; i++) {
- sprintf (buf, "Some logging information %s %s %s", "recordA", "recordB", "recordC");
- }
- bm_finish ("sprintf 3 arguments:");
- bm_start();
- for (i = 0; i < ITERATIONS; i++) {
- sprintf (buf, "Some logging information %s %s %s %s", "recordA", "recordB", "recordC", "recordD");
- }
- bm_finish ("sprintf 4 arguments:");
- bm_start();
- for (i = 0; i < ITERATIONS; i++) {
- sprintf (buf, "Some logging information %s %s %s %d", "recordA", "recordB", "recordC", i);
- }
- bm_finish ("sprintf 4 arguments (1 int):");
-
- logsys_log_rec_store ("fdata");
-/* TODO
- currently fails under some circumstances
-
- bm_start();
- for (i = 0; i < ITERATIONS; i++) {
- log_printf (LOGSYS_LEVEL_NOTICE, "test %d", i);
- }
- bm_finish("log_printf");
-*/
-
- return (0);
-}
diff --git a/test/logsysrec.c b/test/logsysrec.c
index 4dd68994..2d05435d 100644
--- a/test/logsysrec.c
+++ b/test/logsysrec.c
@@ -1,71 +1,66 @@
/*
* Copyright (c) 2008 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Steven Dake (sdake@redhat.com)
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <config.h>
#include <stdio.h>
#include <stdint.h>
#include <corosync/engine/logsys.h>
LOGSYS_DECLARE_SYSTEM ("logtest_rec",
LOGSYS_MODE_OUTPUT_STDERR | LOGSYS_MODE_THREADED,
- 0,
- NULL,
- LOG_INFO,
LOG_DAEMON,
- LOG_INFO,
- NULL,
- 100000);
+ LOG_INFO);
#define LOGREC_ID_CHECKPOINT_CREATE 2
#define LOGREC_ARGS_CHECKPOINT_CREATE 2
int main(int argc, char **argv)
{
int i;
for (i = 0; i < 10000; i++) {
log_printf (LOGSYS_LEVEL_NOTICE,
"This is a test of %s(%d)\n", "stringparse", i);
log_rec (LOGSYS_ENCODE_RECID(LOGSYS_LEVEL_NOTICE,
logsys_subsys_id,
LOGREC_ID_CHECKPOINT_CREATE),
"record1", 8, "record22", 9, "record333", 10, "record444", 11, LOGSYS_REC_END);
}
logsys_atexit ();
logsys_log_rec_store ("fdata");
return 0;
}
diff --git a/tools/corosync-fplay.c b/tools/corosync-fplay.c
index c74c2017..ab0e2933 100644
--- a/tools/corosync-fplay.c
+++ b/tools/corosync-fplay.c
@@ -1,522 +1,51 @@
-#include <config.h>
-
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <stdio.h>
-#include <unistd.h>
-#include <string.h>
-#include <stdlib.h>
-#include <stdint.h>
-#include <errno.h>
-
-#include <sys/socket.h>
-#include <netinet/in.h>
-#include <sys/types.h>
-#include <sys/socket.h>
-#include <arpa/inet.h>
-
-#include <corosync/engine/logsys.h>
-
-uint32_t flt_data_size;
-
-uint32_t *flt_data;
-#define FDHEAD_INDEX (flt_data_size)
-#define FDTAIL_INDEX (flt_data_size + 1)
-
-#define TOTEMIP_ADDRLEN (sizeof(struct in6_addr))
-
-struct totem_ip_address {
- unsigned int nodeid;
- unsigned short family;
- unsigned char addr[TOTEMIP_ADDRLEN];
-} __attribute__((packed));
-
-struct memb_ring_id {
- struct totem_ip_address rep;
- unsigned long long seq;
-} __attribute__((packed));
-
-static const char *totemip_print(const struct totem_ip_address *addr)
-{
- static char buf[INET6_ADDRSTRLEN];
-
- return inet_ntop(addr->family, addr->addr, buf, sizeof(buf));
-}
-
-static char *print_string_len (const unsigned char *str, unsigned int len)
-{
- unsigned int i;
- static char buf[1024];
- memset (buf, 0, sizeof (buf));
- for (i = 0; i < len; i++) {
- buf[i] = str[i];
- }
- return (buf);
-}
-
-static void sync_printer_confchg_set_sync (const void **record)
-{
- const unsigned int *my_should_sync = record[0];
- printf ("Setting my_should_sync to %d\n", *my_should_sync);
-}
-
-static void sync_printer_set_sync_state (const void **record)
-{
- const unsigned int *my_sync_state = record[0];
- printf ("Setting my_sync_state to %d\n", *my_sync_state);
-}
-
-static void sync_printer_process_currentstate (const void **record)
-{
- const unsigned int *my_sync_state = record[0];
- printf ("Retrieving my_sync_state %d\n", *my_sync_state);
-}
-
-static void sync_printer_process_get_shouldsync (const void **record)
-{
- const unsigned int *my_should_sync = record[0];
- printf ("Getting my_should_sync %d\n", *my_should_sync);
-}
-
-static void sync_printer_checkpoint_release (const void **record)
-{
- const unsigned char *name = record[0];
- const uint16_t *name_len = record[1];
- const unsigned int *ckpt_id = record[2];
- const unsigned int *from = record[3];
-
- printf ("Checkpoint release name=[%s] id=[%d] from=[%d] len=[%d]\n",
- print_string_len (name, *name_len),
- *ckpt_id,
- *from,
- *name_len);
-}
-
-static void sync_printer_checkpoint_transmit (const void **record)
-{
- const unsigned char *name = record[0];
- const uint16_t *name_len = record[1];
- const unsigned int *ckpt_id = record[2];
- const unsigned int *xmit_id = record[3];
-
- printf ("xmit_id=[%d] Checkpoint transmit name=[%s] id=[%d]\n",
- *xmit_id, print_string_len (name, *name_len),
- *ckpt_id);
-}
-
-static void sync_printer_section_transmit (const void **record)
-{
- const unsigned char *ckpt_name = record[0];
- const uint16_t *name_len = record[1];
- const unsigned int *ckpt_id = record[2];
- const unsigned int *xmit_id = record[3];
- const unsigned char *section_name = record[4];
- const uint16_t *section_name_len = record[5];
-
- printf ("xmit_id=[%d] Section transmit checkpoint name=[%s] id=[%d] ",
- *xmit_id, print_string_len (ckpt_name, *name_len),
- *ckpt_id);
- printf ("section=[%s]\n",
- print_string_len (section_name, *section_name_len));
-}
-static void sync_printer_checkpoint_receive (const void **record)
-{
- const unsigned char *ckpt_name = record[0];
- const uint16_t *name_len = record[1];
- const unsigned int *ckpt_id = record[2];
- const unsigned int *xmit_id = record[3];
-
- printf ("xmit_id=[%d] Checkpoint receive checkpoint name=[%s] id=[%d]\n",
- *xmit_id, print_string_len (ckpt_name, *name_len), *ckpt_id);
-}
-
-static void sync_printer_section_receive (const void **record)
-{
- const unsigned char *ckpt_name = record[0];
- const uint16_t *name_len = record[1];
- const unsigned int *ckpt_id = record[2];
- const unsigned int *xmit_id = record[3];
- const unsigned char *section_name = record[4];
- const unsigned int *section_name_len = record[5];
-
- printf ("xmit_id=[%d] Section receive checkpoint name=[%s] id=[%d] ",
- *xmit_id, print_string_len (ckpt_name, *name_len),
- *ckpt_id);
-
- printf ("section=[%s]\n",
- print_string_len (section_name, *section_name_len));
-}
-
-static void sync_printer_confchg_fn (const void **record)
-{
- unsigned int i;
-
- const unsigned int *members = record[0];
- const unsigned int *member_count = record[1];
- const struct memb_ring_id *ring_id = record[2];
- struct in_addr addr;
-
- printf ("sync confchg fn ringid [ip=%s seq=%lld]\n",
- totemip_print (&ring_id->rep),
- ring_id->seq);
- printf ("members [%d]:\n", *member_count);
- for (i = 0; i < *member_count; i++) {
- addr.s_addr = members[i];
- printf ("\tmember [%s]\n", inet_ntoa (addr));
- }
-}
-
-static void printer_totemsrp_mcast (const void **record)
-{
- const unsigned int *msgid = record[0];
-
- printf ("totemsrp_mcast %d\n", *msgid);
-}
-
-static void printer_totemsrp_delv (const void **record)
-{
- const unsigned int *msgid = record[0];
-
- printf ("totemsrp_delv %d\n", *msgid);
-}
-
-static void printer_totempg_mcast_fits (const void **record)
-{
- const unsigned int *idx = record[0];
- const unsigned int *iov_len = record[1];
- const unsigned int *copy_len = record[2];
- const unsigned int *fragment_size = record[3];
- const unsigned int *max_packet_size = record[4];
- const unsigned int *copy_base = record[5];
- const unsigned char *next_fragment = record[6];
-
- printf ("totempg_mcast index=[%d] iov_len=[%d] copy_len=[%d] fragment_size=[%d] max_packet_size=[%d] copy_base=[%d] next_fragment[%d]\n",
- *idx, *iov_len, *copy_len, *fragment_size, *max_packet_size, *copy_base, *next_fragment);
-}
-
-static void sync_printer_service_process (const void **record)
-{
- const struct memb_ring_id *ring_id = record[0];
- const struct memb_ring_id *sync_ring_id = record[1];
-
- printf ("sync service process callback ringid [ip=%s seq=%lld] ",
- totemip_print (&ring_id->rep),
- ring_id->seq);
- printf ("sync ringid [ip=%s seq=%lld]\n",
- totemip_print (&sync_ring_id->rep),
- sync_ring_id->seq);
-}
-
-struct printer_subsys_record_print {
- int ident;
- void (*print_fn)(const void **record);
- int record_length;
-};
-
-struct printer_subsys {
- const char *subsys;
- struct printer_subsys_record_print *record_printers;
- int record_printers_count;
-};
-
-#define LOGREC_ID_SYNC_CONFCHG_FN 0
-#define LOGREC_ID_SYNC_SERVICE_PROCESS 1
-
/*
- * CKPT subsystem
+ * Copyright (c) 2011 Red Hat
+ *
+ * All rights reserved.
+ *
+ * Author: Angus Salkeld <asalkeld@redhat.com>
+ *
+ * This software licensed under BSD license, the text of which follows:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ * - Neither the name of the MontaVista Software, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
*/
-#define LOGREC_ID_CONFCHG_SETSYNC 0
-#define LOGREC_ID_SETSYNCSTATE 1
-#define LOGREC_ID_SYNC_PROCESS_CURRENTSTATE 2
-#define LOGREC_ID_SYNC_PROCESS_GETSHOULDSYNC 3
-#define LOGREC_ID_SYNC_CHECKPOINT_TRANSMIT 4
-#define LOGREC_ID_SYNC_SECTION_TRANSMIT 5
-#define LOGREC_ID_SYNC_CHECKPOINT_RECEIVE 6
-#define LOGREC_ID_SYNC_SECTION_RECEIVE 7
-#define LOGREC_ID_SYNC_CHECKPOINT_RELEASE 8
-
-#define LOGREC_ID_TOTEMSRP_MCAST 0
-#define LOGREC_ID_TOTEMSRP_DELV 1
-#define LOGREC_ID_TOTEMPG_MCAST_FITS 2
-
-
-static struct printer_subsys_record_print record_print_sync[] = {
- {
- .ident = LOGREC_ID_SYNC_CONFCHG_FN,
- .print_fn = sync_printer_confchg_fn,
- .record_length = 28
- },
- {
- .ident = LOGREC_ID_SYNC_SERVICE_PROCESS,
- .print_fn = sync_printer_service_process,
- .record_length = 28
- }
-};
-
-static struct printer_subsys_record_print record_print_ckpt[] = {
- {
- .ident = LOGREC_ID_CONFCHG_SETSYNC,
- .print_fn = sync_printer_confchg_set_sync,
- .record_length = 28
- },
- {
- .ident = LOGREC_ID_SETSYNCSTATE,
- .print_fn = sync_printer_set_sync_state,
- .record_length = 28
- },
- {
- .ident = LOGREC_ID_SYNC_PROCESS_CURRENTSTATE,
- .print_fn = sync_printer_process_currentstate,
- .record_length = 28
- },
- {
- .ident = LOGREC_ID_SYNC_PROCESS_GETSHOULDSYNC,
- .print_fn = sync_printer_process_get_shouldsync,
- .record_length = 28
- },
- {
- .ident = LOGREC_ID_SYNC_CHECKPOINT_TRANSMIT,
- .print_fn = sync_printer_checkpoint_transmit,
- .record_length = 28
- },
- {
- .ident = LOGREC_ID_SYNC_SECTION_TRANSMIT,
- .print_fn = sync_printer_section_transmit,
- .record_length = 28
- },
- {
- .ident = LOGREC_ID_SYNC_CHECKPOINT_RECEIVE,
- .print_fn = sync_printer_checkpoint_receive,
- .record_length = 28
- },
- {
- .ident = LOGREC_ID_SYNC_SECTION_RECEIVE,
- .print_fn = sync_printer_section_receive,
- .record_length = 28
- },
- {
- .ident = LOGREC_ID_SYNC_CHECKPOINT_RELEASE,
- .print_fn = sync_printer_checkpoint_release,
- .record_length = 28
- }
-
-};
-static struct printer_subsys_record_print record_print_totem[] = {
- {
- .ident = LOGREC_ID_TOTEMSRP_MCAST,
- .print_fn = printer_totemsrp_mcast,
- .record_length = 28
- },
- {
- .ident = LOGREC_ID_TOTEMSRP_DELV,
- .print_fn = printer_totemsrp_delv,
- .record_length = 28
- },
- {
- .ident = LOGREC_ID_TOTEMPG_MCAST_FITS,
- .print_fn = printer_totempg_mcast_fits,
- .record_length = 28
- }
-};
-
-static struct printer_subsys printer_subsystems[] = {
- {
- .subsys = "SYNC",
- .record_printers = record_print_sync,
- .record_printers_count = sizeof (record_print_sync) / sizeof (struct printer_subsys_record_print)
- },
- {
- .subsys = "CKPT",
- .record_printers = record_print_ckpt,
- .record_printers_count = sizeof (record_print_ckpt) / sizeof (struct printer_subsys_record_print)
- },
- {
- .subsys = "TOTEM",
- .record_printers = record_print_totem,
- .record_printers_count = sizeof (record_print_totem) / sizeof (struct printer_subsys_record_print)
- }
-};
-static unsigned int printer_subsys_count =
- sizeof (printer_subsystems) / sizeof (struct printer_subsys);
-
-#define G_RECORD_SIZE 10000
-
-static uint32_t g_record[G_RECORD_SIZE];
-
-/*
- * Copy record, dealing with wrapping
- */
-static int logsys_rec_get (int rec_idx) {
- uint32_t rec_size;
- int firstcopy, secondcopy;
-
- rec_size = flt_data[rec_idx];
-
- firstcopy = rec_size;
- secondcopy = 0;
-
- if (rec_size > G_RECORD_SIZE || rec_size > flt_data_size) {
- fprintf (stderr, "rec_size too large. Input file is probably corrupted.\n");
- exit (EXIT_FAILURE);
- }
-
- if (firstcopy + rec_idx > flt_data_size) {
- firstcopy = flt_data_size - rec_idx;
- secondcopy -= firstcopy - rec_size;
- }
- memcpy (&g_record[0], &flt_data[rec_idx], firstcopy * sizeof(uint32_t));
- if (secondcopy) {
- memcpy (&g_record[firstcopy], &flt_data[0], secondcopy * sizeof(uint32_t));
- }
- return ((rec_idx + rec_size) % flt_data_size);
-}
-
-static void logsys_rec_print (const void *record)
-{
- const uint32_t *buf_uint32t = record;
- uint32_t rec_size;
- uint32_t rec_ident;
- uint32_t line;
- uint32_t arg_size_idx;
- unsigned int i;
- unsigned int j;
- unsigned int rec_idx = 0;
- uint32_t record_number;
- unsigned int words_processed;
- const char *arguments[64];
- int arg_count = 0;
-
- rec_size = buf_uint32t[rec_idx];
- rec_ident = buf_uint32t[rec_idx+1];
- line = buf_uint32t[rec_idx+2];
- record_number = buf_uint32t[rec_idx+3];
-
- printf ("rec=[%d] ", record_number);
- arg_size_idx = rec_idx + 4;
- words_processed = 4;
- for (i = 0; words_processed < rec_size; i++) {
- arguments[arg_count++] =
- (const char *)&buf_uint32t[arg_size_idx + 1];
- words_processed += buf_uint32t[arg_size_idx] + 1;
- arg_size_idx += buf_uint32t[arg_size_idx] + 1;
-
- }
-
- for (i = 0; i < printer_subsys_count; i++) {
- if (strcmp (arguments[0], printer_subsystems[i].subsys) == 0) {
- for (j = 0; j < printer_subsystems[i].record_printers_count; j++) {
- if (rec_ident == printer_subsystems[i].record_printers[j].ident) {
- printer_subsystems[i].record_printers[j].print_fn ((const void **)&arguments[3]);
- return;
- }
- }
- }
- }
-
- switch(LOGSYS_DECODE_RECID(rec_ident)) {
- case LOGSYS_RECID_LOG:
- printf ("Log Message=%s\n", arguments[3]);
- break;
- case LOGSYS_RECID_ENTER:
- printf ("ENTERING function [%s] line [%d]\n", arguments[2], line);
- break;
- case LOGSYS_RECID_LEAVE:
- printf ("LEAVING function [%s] line [%d]\n", arguments[2], line);
- break;
- case LOGSYS_RECID_TRACE1:
- printf ("Tracing(1) Messsage=%s\n", arguments[3]);
- break;
- case LOGSYS_RECID_TRACE2:
- printf ("Tracing(2) Messsage=%s\n", arguments[3]);
- break;
- case LOGSYS_RECID_TRACE3:
- printf ("Tracing(3) Messsage=%s\n", arguments[3]);
- break;
- case LOGSYS_RECID_TRACE4:
- printf ("Tracing(4) Messsage=%s\n", arguments[3]);
- break;
- case LOGSYS_RECID_TRACE5:
- printf ("Tracing(5) Messsage=%s\n", arguments[3]);
- break;
- case LOGSYS_RECID_TRACE6:
- printf ("Tracing(6) Messsage=%s\n", arguments[3]);
- break;
- case LOGSYS_RECID_TRACE7:
- printf ("Tracing(7) Messsage=%s\n", arguments[3]);
- break;
- case LOGSYS_RECID_TRACE8:
- printf ("Tracing(8) Messsage=%s\n", arguments[3]);
- break;
- default:
- printf ("Unknown record type found subsys=[%s] ident=[%d]\n",
- arguments[0], LOGSYS_DECODE_RECID(rec_ident));
- break;
- }
-#ifdef COMPILE_OUT
-printf ("\n");
-#endif
-}
+#include "config.h"
+#include <qb/qbdefs.h>
+#include <qb/qblog.h>
int main (void)
{
- int fd;
- int rec_idx;
- int end_rec;
- int record_count = 1;
- ssize_t n_read;
- const char *data_file = LOCALSTATEDIR "/lib/corosync/fdata";
- size_t n_required;
-
- if ((fd = open (data_file, O_RDONLY)) < 0) {
- fprintf (stderr, "failed to open %s: %s\n",
- data_file, strerror (errno));
- return EXIT_FAILURE;
- }
-
- n_required = sizeof (uint32_t);
- n_read = read (fd, &flt_data_size, n_required);
- if (n_read != n_required) {
- fprintf (stderr, "Unable to read fdata header\n");
- return EXIT_FAILURE;
- }
-
- n_required = ((flt_data_size + 2) * sizeof(uint32_t));
-
- if ((flt_data = malloc (n_required)) == NULL) {
- fprintf (stderr, "exhausted virtual memory\n");
- return EXIT_FAILURE;
- }
- n_read = read (fd, flt_data, n_required);
- close (fd);
- if (n_read < 0) {
- fprintf (stderr, "reading %s failed: %s\n",
- data_file, strerror (errno));
- return EXIT_FAILURE;
- }
-
- if (n_read != n_required) {
- printf ("Warning: read %zd bytes, but expected %zu\n",
- n_read, n_required);
- }
-
- rec_idx = flt_data[FDTAIL_INDEX];
- end_rec = flt_data[FDHEAD_INDEX];
-
- printf ("Starting replay: head [%d] tail [%d]\n",
- flt_data[FDHEAD_INDEX],
- flt_data[FDTAIL_INDEX]);
+ qb_log_init("fplay", LOG_USER, LOG_INFO);
- for (;;) {
- rec_idx = logsys_rec_get (rec_idx);
- logsys_rec_print (g_record);
- if (rec_idx == end_rec) {
- break;
- }
- record_count += 1;
- }
+ qb_log_filter_ctl(QB_LOG_STDERR, QB_LOG_FILTER_ADD,
+ QB_LOG_FILTER_FILE, __FILE__, LOG_INFO);
+ qb_log_format_set(QB_LOG_STDERR, "%f:%l [%p] %b");
+ qb_log_ctl(QB_LOG_SYSLOG, QB_LOG_CONF_ENABLED, QB_FALSE);
+ qb_log_ctl(QB_LOG_STDERR, QB_LOG_CONF_ENABLED, QB_TRUE);
- printf ("Finishing replay: records found [%d]\n", record_count);
- return (0);
+ qb_log_blackbox_print_from_file(LOCALSTATEDIR "/lib/corosync/fdata");
+ return 0;
}
diff --git a/tools/corosync-notifyd.c b/tools/corosync-notifyd.c
index dd8ee4b5..72c58273 100644
--- a/tools/corosync-notifyd.c
+++ b/tools/corosync-notifyd.c
@@ -1,1109 +1,1111 @@
/*
* Copyright (c) 2011 Red Hat
*
* All rights reserved.
*
* Author: Angus Salkeld <asalkeld@redhat.com>
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the MontaVista Software, Inc. nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <config.h>
#include <sys/select.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <sys/types.h>
#include <netdb.h>
#include <arpa/inet.h>
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <unistd.h>
#include <string.h>
#include <ctype.h>
#include <poll.h>
#include <signal.h>
-#include <syslog.h>
+
+#include <qb/qbdefs.h>
+#include <qb/qbloop.h>
+#include <qb/qblog.h>
#include <qb/qbdefs.h>
#include <qb/qbloop.h>
#include <corosync/corotypes.h>
#include <corosync/confdb.h>
#include <corosync/cfg.h>
#include <corosync/quorum.h>
/*
* generic declarations
*/
enum {
CS_NTF_LOG,
CS_NTF_STDOUT,
CS_NTF_SNMP,
CS_NTF_DBUS,
CS_NTF_FG,
CS_NTF_MAX,
};
static int conf[CS_NTF_MAX];
static int32_t _cs_is_quorate = 0;
typedef void (*node_membership_fn_t)(char *nodename, uint32_t nodeid, char *state, char* ip);
typedef void (*node_quorum_fn_t)(char *nodename, uint32_t nodeid, const char *state);
typedef void (*application_connection_fn_t)(char *nodename, uint32_t nodeid, char *app_name, const char *state);
struct notify_callbacks {
node_membership_fn_t node_membership_fn;
node_quorum_fn_t node_quorum_fn;
application_connection_fn_t application_connection_fn;
};
#define MAX_NOTIFIERS 5
static int num_notifiers = 0;
static struct notify_callbacks notifiers[MAX_NOTIFIERS];
static uint32_t local_nodeid = 0;
static char local_nodename[CS_MAX_NAME_LENGTH];
static qb_loop_t *main_loop;
static quorum_handle_t quorum_handle;
static void _cs_node_membership_event(char *nodename, uint32_t nodeid, char *state, char* ip);
static void _cs_node_quorum_event(const char *state);
static void _cs_application_connection_event(char *app_name, const char *state);
#ifdef HAVE_DBUS
#include <dbus/dbus.h>
/*
* dbus
*/
#define DBUS_CS_NAME "org.corosync"
#define DBUS_CS_IFACE "org.corosync"
#define DBUS_CS_PATH "/org/corosync"
static DBusConnection *db = NULL;
static char _err[512];
static int err_set = 0;
static void _cs_dbus_init(void);
#endif /* HAVE_DBUS */
#ifdef ENABLE_SNMP
#include <net-snmp/net-snmp-config.h>
#include <net-snmp/snmpv3_api.h>
#include <net-snmp/agent/agent_trap.h>
#include <net-snmp/library/mib.h>
#include <net-snmp/library/snmp_api.h>
#include <net-snmp/library/snmp_client.h>
#include <net-snmp/library/snmp_debug.h>
enum snmp_node_status {
SNMP_NODE_STATUS_UNKNOWN = 0,
SNMP_NODE_STATUS_JOINED = 1,
SNMP_NODE_STATUS_LEFT = 2
};
#define SNMP_OID_COROSYNC "1.3.6.1.4.1.35488"
#define SNMP_OID_OBJECT_ROOT SNMP_OID_COROSYNC ".1"
#define SNMP_OID_OBJECT_NODE_NAME SNMP_OID_OBJECT_ROOT ".1"
#define SNMP_OID_OBJECT_NODE_ID SNMP_OID_OBJECT_ROOT ".2"
#define SNMP_OID_OBJECT_NODE_STATUS SNMP_OID_OBJECT_ROOT ".3"
#define SNMP_OID_OBJECT_NODE_ADDR SNMP_OID_OBJECT_ROOT ".4"
#define SNMP_OID_OBJECT_RINGSEQ SNMP_OID_OBJECT_ROOT ".20"
#define SNMP_OID_OBJECT_QUORUM SNMP_OID_OBJECT_ROOT ".21"
#define SNMP_OID_OBJECT_APP_NAME SNMP_OID_OBJECT_ROOT ".40"
#define SNMP_OID_OBJECT_APP_STATUS SNMP_OID_OBJECT_ROOT ".41"
#define SNMP_OID_TRAPS_ROOT SNMP_OID_COROSYNC ".0"
#define SNMP_OID_TRAPS_NODE SNMP_OID_TRAPS_ROOT ".1"
#define SNMP_OID_TRAPS_QUORUM SNMP_OID_TRAPS_ROOT ".2"
#define SNMP_OID_TRAPS_APP SNMP_OID_TRAPS_ROOT ".3"
#define CS_TIMESTAMP_STR_LEN 20
static const char *local_host = "localhost";
#endif /* ENABLE_SNMP */
static char snmp_manager_buf[CS_MAX_NAME_LENGTH];
static char *snmp_manager = NULL;
/*
* confdb
*/
#define SEPERATOR_STR "."
static confdb_handle_t confdb_handle;
static void _cs_confdb_key_changed(confdb_handle_t handle,
confdb_change_type_t change_type,
hdb_handle_t parent_object_handle,
hdb_handle_t object_handle,
const void *object_name, size_t object_name_len,
const void *key_name, size_t key_name_len,
const void *key_value, size_t key_value_len);
static void _cs_confdb_object_created(confdb_handle_t handle,
hdb_handle_t parent_object_handle,
hdb_handle_t object_handle,
const void *name_pt, size_t name_len);
static void _cs_confdb_object_deleted(confdb_handle_t handle,
hdb_handle_t parent_object_handle,
const void *name_pt, size_t name_len);
static confdb_callbacks_t callbacks = {
.confdb_key_change_notify_fn = _cs_confdb_key_changed,
.confdb_object_create_change_notify_fn = _cs_confdb_object_created,
.confdb_object_delete_change_notify_fn = _cs_confdb_object_deleted,
};
static int32_t _cs_ip_to_hostname(char* ip, char* name_out)
{
struct sockaddr_in sa;
int rc;
if (strchr(ip, ':') == NULL) {
sa.sin_family = AF_INET;
} else {
sa.sin_family = AF_INET6;
}
rc = inet_pton(sa.sin_family, ip, &sa.sin_addr);
if (rc == 0) {
return -EINVAL;
}
rc = getnameinfo((struct sockaddr*)&sa, sizeof(sa),
name_out, CS_MAX_NAME_LENGTH, NULL, 0, 0);
if (rc != 0) {
- syslog (LOG_ERR, "error looking up %s : %s\n", ip, gai_strerror(rc));
+ qb_log(LOG_ERR, 0, "error looking up %s : %s", ip, gai_strerror(rc));
return -EINVAL;
}
return 0;
}
static void
_cs_confdb_key_changed(confdb_handle_t handle,
confdb_change_type_t change_type,
hdb_handle_t parent_object_handle,
hdb_handle_t object_handle,
const void *object_name_pt, size_t object_name_len,
const void *key_name_pt, size_t key_name_len,
const void *key_value_pt, size_t key_value_len)
{
char parent_name[CS_MAX_NAME_LENGTH];
size_t len = 0;
hdb_handle_t real_parent_object_handle;
cs_error_t rc = CS_OK;
char nodename[CS_MAX_NAME_LENGTH];
char nodeid_str[CS_MAX_NAME_LENGTH];
uint32_t nodeid;
char status[CS_MAX_NAME_LENGTH];
char ip[CS_MAX_NAME_LENGTH];
size_t ip_len;
confdb_value_types_t type;
char* open_bracket = NULL;
char* close_bracket = NULL;
rc = confdb_object_parent_get (handle,
parent_object_handle, &real_parent_object_handle);
assert(rc == CS_OK);
rc = confdb_object_name_get (handle,
real_parent_object_handle,
parent_name,
&len);
parent_name[len] = '\0';
assert(rc == CS_OK);
if (strcmp(parent_name, "members") == 0) {
if (strncmp(key_name_pt, "status", strlen("status")) == 0) {
memcpy(nodeid_str, object_name_pt, object_name_len);
nodeid_str[object_name_len] = '\0';
nodeid = atoi(nodeid_str);
memcpy(status, key_value_pt, key_value_len);
status[key_value_len] = '\0';
rc = confdb_key_get_typed(handle, parent_object_handle,
"ip", ip, &ip_len, &type);
assert(rc == CS_OK);
ip[ip_len-1] = '\0';
/*
* We want the ip out of: "r(0) ip(192.168.100.92)"
*/
open_bracket = strrchr(ip, '(');
open_bracket++;
close_bracket = strrchr(open_bracket, ')');
*close_bracket = '\0';
_cs_ip_to_hostname(open_bracket, nodename);
_cs_node_membership_event(nodename, nodeid, status, open_bracket);
}
}
}
static void
_cs_confdb_object_created(confdb_handle_t handle,
hdb_handle_t parent_object_handle,
hdb_handle_t object_handle,
const void *name_pt,
size_t name_len)
{
char parent_name[CS_MAX_NAME_LENGTH];
size_t len = 0;
char obj_name[CS_MAX_NAME_LENGTH];
cs_error_t rc = CS_OK;
memcpy(obj_name, name_pt, name_len);
obj_name[name_len] = '\0';
rc = confdb_object_name_get (handle,
object_handle, parent_name, &len);
parent_name[len] = '\0';
if (rc != CS_OK) {
return;
}
if (strcmp(parent_name, "connections") == 0) {
_cs_application_connection_event(obj_name, "connected");
}
}
static void
_cs_confdb_object_deleted(confdb_handle_t handle,
hdb_handle_t parent_object_handle,
const void *name_pt,
size_t name_len)
{
char obj_name[CS_MAX_NAME_LENGTH];
char parent_name[CS_MAX_NAME_LENGTH];
size_t len = 0;
cs_error_t rc;
memcpy(obj_name, name_pt, name_len);
obj_name[name_len] = '\0';
rc = confdb_object_name_get (handle,
parent_object_handle, parent_name, &len);
parent_name[len] = '\0';
assert(rc == CS_OK);
if (strcmp(parent_name, "connections") == 0) {
_cs_application_connection_event(obj_name, "disconnected");
}
}
static cs_error_t
_cs_confdb_find_object (confdb_handle_t handle,
const char * name_pt,
hdb_handle_t * out_handle)
{
char * obj_name_pt;
char * save_pt;
hdb_handle_t obj_handle;
confdb_handle_t parent_object_handle = OBJECT_PARENT_HANDLE;
char tmp_name[CS_MAX_NAME_LENGTH];
cs_error_t res = CS_OK;
strncpy (tmp_name, name_pt, sizeof (tmp_name));
tmp_name[sizeof (tmp_name) - 1] = '\0';
obj_name_pt = strtok_r(tmp_name, SEPERATOR_STR, &save_pt);
while (obj_name_pt != NULL) {
res = confdb_object_find_start(handle, parent_object_handle);
if (res != CS_OK) {
- syslog (LOG_ERR, "Could not start object_find %d\n", res);
+ qb_log(LOG_ERR, 0, "Could not start object_find %d", res);
exit (EXIT_FAILURE);
}
res = confdb_object_find(handle, parent_object_handle,
obj_name_pt, strlen (obj_name_pt), &obj_handle);
if (res != CS_OK) {
return res;
}
parent_object_handle = obj_handle;
obj_name_pt = strtok_r (NULL, SEPERATOR_STR, &save_pt);
}
*out_handle = parent_object_handle;
return res;
}
static int
_cs_confdb_dispatch(int fd, int revents, void *data)
{
confdb_dispatch(confdb_handle, CS_DISPATCH_ONE);
return 0;
}
static void _cs_quorum_notification(quorum_handle_t handle,
uint32_t quorate, uint64_t ring_seq,
uint32_t view_list_entries, uint32_t *view_list)
{
if (_cs_is_quorate == quorate) {
return;
}
_cs_is_quorate = quorate;
if (quorate) {
_cs_node_quorum_event("quorate");
} else {
_cs_node_quorum_event("not quorate");
}
}
static int
_cs_quorum_dispatch(int fd, int revents, void *data)
{
quorum_dispatch(quorum_handle, CS_DISPATCH_ONE);
return 0;
}
static void
_cs_quorum_init(void)
{
cs_error_t rc;
int fd;
quorum_callbacks_t quorum_callbacks = {
.quorum_notify_fn = _cs_quorum_notification,
};
rc = quorum_initialize (&quorum_handle, &quorum_callbacks);
if (rc != CS_OK) {
- syslog(LOG_ERR, "Could not connect to corosync(quorum)");
+ qb_log(LOG_ERR, "Could not connect to corosync(quorum)");
return;
}
quorum_fd_get(quorum_handle, &fd);
qb_loop_poll_add(main_loop, QB_LOOP_MED, fd, POLLIN|POLLNVAL, NULL,
_cs_quorum_dispatch);
quorum_trackstart(quorum_handle, CS_TRACK_CHANGES);
}
static void
_cs_quorum_finalize(void)
{
quorum_finalize (quorum_handle);
}
#ifdef HAVE_DBUS
/*
* dbus notifications
*/
static void
_cs_dbus_auto_flush(void)
{
dbus_connection_ref(db);
dbus_connection_read_write(db, 500);
dbus_connection_unref(db);
}
static void
_cs_dbus_release(void)
{
DBusError err;
if (!db)
return;
dbus_error_init(&err);
dbus_bus_release_name(db, DBUS_CS_NAME, &err);
dbus_error_free(&err);
dbus_connection_unref(db);
db = NULL;
}
static void
_cs_dbus_node_quorum_event(char *nodename, uint32_t nodeid, const char *state)
{
DBusMessage *msg = NULL;
int ret = -1;
if (err_set) {
- syslog (LOG_ERR, "%s\n", _err);
+ qb_log(LOG_ERR, "%s", _err);
err_set = 0;
}
if (!db) {
goto out_free;
}
if (dbus_connection_get_is_connected(db) != TRUE) {
err_set = 1;
snprintf(_err, sizeof(_err), "DBus connection lost");
_cs_dbus_release();
goto out_unlock;
}
_cs_dbus_auto_flush();
if (!(msg = dbus_message_new_signal(DBUS_CS_PATH,
DBUS_CS_IFACE,
"QuorumStateChange"))) {
- syslog (LOG_ERR, "%s(%d) error\n", __func__, __LINE__);
+ qb_log(LOG_ERR, "error creating dbus signal");
goto out_unlock;
}
if (!dbus_message_append_args(msg,
DBUS_TYPE_STRING, &nodename,
DBUS_TYPE_UINT32, &nodeid,
DBUS_TYPE_STRING, &state,
DBUS_TYPE_INVALID)) {
- syslog (LOG_ERR, "%s(%d) error\n", __func__, __LINE__);
+ qb_log(LOG_ERR, "error adding args to quorum signal");
goto out_unlock;
}
dbus_connection_send(db, msg, NULL);
ret = 0;
out_unlock:
- if (ret == -1) {
- syslog (LOG_ERR, "%s() error\n", __func__);
- }
- if (msg)
+ if (msg) {
dbus_message_unref(msg);
+ }
out_free:
return;
}
static void
_cs_dbus_node_membership_event(char *nodename, uint32_t nodeid, char *state, char* ip)
{
DBusMessage *msg = NULL;
int ret = -1;
if (err_set) {
- syslog (LOG_ERR, "%s\n", _err);
+ qb_log(LOG_ERR, "%s", _err);
err_set = 0;
}
if (!db) {
goto out_free;
}
if (dbus_connection_get_is_connected(db) != TRUE) {
err_set = 1;
snprintf(_err, sizeof(_err), "DBus connection lost");
_cs_dbus_release();
goto out_unlock;
}
_cs_dbus_auto_flush();
if (!(msg = dbus_message_new_signal(DBUS_CS_PATH,
DBUS_CS_IFACE,
"NodeStateChange"))) {
- syslog (LOG_ERR, "%s(%d) error\n", __func__, __LINE__);
+ qb_log(LOG_ERR, "error creating NodeStateChange signal");
goto out_unlock;
}
if (!dbus_message_append_args(msg,
DBUS_TYPE_STRING, &nodename,
DBUS_TYPE_UINT32, &nodeid,
DBUS_TYPE_STRING, &ip,
DBUS_TYPE_STRING, &state,
DBUS_TYPE_INVALID)) {
- syslog (LOG_ERR, "%s(%d) error\n", __func__, __LINE__);
+ qb_log(LOG_ERR, "error adding args to NodeStateChange signal");
goto out_unlock;
}
dbus_connection_send(db, msg, NULL);
ret = 0;
out_unlock:
- if (ret == -1) {
- syslog (LOG_ERR, "%s() error\n", __func__);
- }
- if (msg)
+ if (msg) {
dbus_message_unref(msg);
+ }
out_free:
return;
}
static void
_cs_dbus_application_connection_event(char *nodename, uint32_t nodeid, char *app_name, const char *state)
{
DBusMessage *msg = NULL;
int ret = -1;
if (err_set) {
- syslog (LOG_ERR, "%s\n", _err);
+ qb_log(LOG_ERR, "%s", _err);
err_set = 0;
}
if (!db) {
goto out_free;
}
if (dbus_connection_get_is_connected(db) != TRUE) {
err_set = 1;
snprintf(_err, sizeof(_err), "DBus connection lost");
_cs_dbus_release();
goto out_unlock;
}
_cs_dbus_auto_flush();
if (!(msg = dbus_message_new_signal(DBUS_CS_PATH,
DBUS_CS_IFACE,
"ConnectionStateChange"))) {
- syslog (LOG_ERR, "%s(%d) error\n", __func__, __LINE__);
+ qb_log(LOG_ERR, "error creating ConnectionStateChange signal");
goto out_unlock;
}
if (!dbus_message_append_args(msg,
DBUS_TYPE_STRING, &nodename,
DBUS_TYPE_UINT32, &nodeid,
DBUS_TYPE_STRING, &app_name,
DBUS_TYPE_STRING, &state,
DBUS_TYPE_INVALID)) {
- syslog (LOG_ERR, "%s(%d) error\n", __func__, __LINE__);
+ qb_log(LOG_ERR, "error adding args to ConnectionStateChange signal");
goto out_unlock;
}
dbus_connection_send(db, msg, NULL);
ret = 0;
out_unlock:
- if (msg)
+ if (msg) {
dbus_message_unref(msg);
+ }
out_free:
return;
}
static void
_cs_dbus_init(void)
{
DBusConnection *dbc = NULL;
DBusError err;
dbus_error_init(&err);
dbc = dbus_bus_get(DBUS_BUS_SYSTEM, &err);
if (!dbc) {
snprintf(_err, sizeof(_err),
"dbus_bus_get: %s", err.message);
err_set = 1;
dbus_error_free(&err);
return;
}
dbus_connection_set_exit_on_disconnect(dbc, FALSE);
db = dbc;
notifiers[num_notifiers].node_membership_fn =
_cs_dbus_node_membership_event;
notifiers[num_notifiers].node_quorum_fn =
_cs_dbus_node_quorum_event;
notifiers[num_notifiers].application_connection_fn =
_cs_dbus_application_connection_event;
num_notifiers++;
}
#endif /* HAVE_DBUS */
#ifdef ENABLE_SNMP
static netsnmp_session *snmp_init (const char *target)
{
static netsnmp_session *session = NULL;
#ifndef NETSNMPV54
char default_port[128];
snprintf (default_port, sizeof (default_port), "%s:162", target);
#endif
if (session) {
return (session);
}
if (target == NULL) {
return NULL;
}
session = malloc (sizeof (netsnmp_session));
snmp_sess_init (session);
session->version = SNMP_VERSION_2c;
session->callback = NULL;
session->callback_magic = NULL;
session = snmp_add(session,
#ifdef NETSNMPV54
netsnmp_transport_open_client ("snmptrap", target),
#else
netsnmp_tdomain_transport (default_port, 0, "udp"),
#endif
NULL, NULL);
if (session == NULL) {
- syslog(LOG_ERR, "Could not create snmp transport");
+ qb_log(LOG_ERR, 0, "Could not create snmp transport");
}
return (session);
}
static inline void add_field (
netsnmp_pdu *trap_pdu,
u_char asn_type,
const char *prefix,
void *value,
size_t value_size)
{
oid _oid[MAX_OID_LEN];
size_t _oid_len = MAX_OID_LEN;
if (snmp_parse_oid(prefix, _oid, &_oid_len)) {
snmp_pdu_add_variable (trap_pdu, _oid, _oid_len, asn_type, (u_char *) value, value_size);
}
}
static void
_cs_snmp_node_membership_event(char *nodename, uint32_t nodeid, char *state, char* ip)
{
int ret;
char csysuptime[CS_TIMESTAMP_STR_LEN];
static oid snmptrap_oid[] = { 1,3,6,1,6,3,1,1,4,1,0 };
static oid sysuptime_oid[] = { 1,3,6,1,2,1,1,3,0 };
time_t now = time (NULL);
netsnmp_pdu *trap_pdu;
netsnmp_session *session = snmp_init (snmp_manager);
if (session == NULL) {
- syslog (LOG_NOTICE, "Failed to init SNMP session.\n");
+ qb_log(LOG_NOTICE, "Failed to init SNMP session.");
return ;
}
trap_pdu = snmp_pdu_create (SNMP_MSG_TRAP2);
if (!trap_pdu) {
- syslog (LOG_NOTICE, "Failed to create SNMP notification.\n");
+ qb_log(LOG_NOTICE, "Failed to create SNMP notification.");
return ;
}
/* send uptime */
snprintf (csysuptime, CS_TIMESTAMP_STR_LEN, "%ld", now);
snmp_add_var (trap_pdu, sysuptime_oid, sizeof (sysuptime_oid) / sizeof (oid), 't', csysuptime);
snmp_add_var (trap_pdu, snmptrap_oid, sizeof (snmptrap_oid) / sizeof (oid), 'o', SNMP_OID_TRAPS_NODE);
/* Add extries to the trap */
add_field (trap_pdu, ASN_OCTET_STR, SNMP_OID_OBJECT_NODE_NAME, (void*)nodename, strlen (nodename));
add_field (trap_pdu, ASN_INTEGER, SNMP_OID_OBJECT_NODE_ID, (void*)&nodeid, sizeof (nodeid));
add_field (trap_pdu, ASN_OCTET_STR, SNMP_OID_OBJECT_NODE_ADDR, (void*)ip, strlen (ip));
add_field (trap_pdu, ASN_OCTET_STR, SNMP_OID_OBJECT_NODE_STATUS, (void*)state, strlen (state));
/* Send and cleanup */
ret = snmp_send (session, trap_pdu);
if (ret == 0) {
/* error */
- syslog (LOG_ERR, "Could not send SNMP trap");
+ qb_log(LOG_ERR, "Could not send SNMP trap");
snmp_free_pdu (trap_pdu);
}
}
static void
_cs_snmp_node_quorum_event(char *nodename, uint32_t nodeid,
const char *state)
{
int ret;
char csysuptime[20];
static oid snmptrap_oid[] = { 1,3,6,1,6,3,1,1,4,1,0 };
static oid sysuptime_oid[] = { 1,3,6,1,2,1,1,3,0 };
time_t now = time (NULL);
netsnmp_pdu *trap_pdu;
netsnmp_session *session = snmp_init (snmp_manager);
if (session == NULL) {
- syslog (LOG_NOTICE, "Failed to init SNMP session.\n");
+ qb_log(LOG_NOTICE, "Failed to init SNMP session.");
return ;
}
trap_pdu = snmp_pdu_create (SNMP_MSG_TRAP2);
if (!trap_pdu) {
- syslog (LOG_NOTICE, "Failed to create SNMP notification.\n");
+ qb_log(LOG_NOTICE, "Failed to create SNMP notification.");
return ;
}
/* send uptime */
sprintf (csysuptime, "%ld", now);
snmp_add_var (trap_pdu, sysuptime_oid, sizeof (sysuptime_oid) / sizeof (oid), 't', csysuptime);
snmp_add_var (trap_pdu, snmptrap_oid, sizeof (snmptrap_oid) / sizeof (oid), 'o', SNMP_OID_TRAPS_NODE);
/* Add extries to the trap */
add_field (trap_pdu, ASN_OCTET_STR, SNMP_OID_OBJECT_NODE_NAME, (void*)nodename, strlen (nodename));
add_field (trap_pdu, ASN_INTEGER, SNMP_OID_OBJECT_NODE_ID, (void*)&nodeid, sizeof (nodeid));
add_field (trap_pdu, ASN_OCTET_STR, SNMP_OID_OBJECT_QUORUM, (void*)state, strlen (state));
/* Send and cleanup */
ret = snmp_send (session, trap_pdu);
if (ret == 0) {
/* error */
- syslog (LOG_ERR, "Could not send SNMP trap");
+ qb_log(LOG_ERR, "Could not send SNMP trap");
snmp_free_pdu (trap_pdu);
}
}
static void
_cs_snmp_init(void)
{
if (snmp_manager == NULL) {
snmp_manager = (char*)local_host;
}
notifiers[num_notifiers].node_membership_fn =
_cs_snmp_node_membership_event;
notifiers[num_notifiers].node_quorum_fn =
_cs_snmp_node_quorum_event;
notifiers[num_notifiers].application_connection_fn = NULL;
num_notifiers++;
}
#endif /* ENABLE_SNMP */
static void
_cs_syslog_node_membership_event(char *nodename, uint32_t nodeid, char *state, char* ip)
{
- syslog (LOG_NOTICE, "%s[%d] ip:%s %s\n", nodename, nodeid, ip, state);
+ qb_log(LOG_NOTICE, "%s[%d] ip:%s %s", nodename, nodeid, ip, state);
}
static void
_cs_syslog_node_quorum_event(char *nodename, uint32_t nodeid, const char *state)
{
if (strcmp(state, "quorate") == 0) {
- syslog (LOG_NOTICE, "%s[%d] is now %s\n", nodename, nodeid, state);
+ qb_log(LOG_NOTICE, "%s[%d] is now %s", nodename, nodeid, state);
} else {
- syslog (LOG_NOTICE, "%s[%d] has lost quorum\n", nodename, nodeid);
+ qb_log(LOG_NOTICE, "%s[%d] has lost quorum", nodename, nodeid);
}
}
static void
_cs_syslog_application_connection_event(char *nodename, uint32_t nodeid, char* app_name, const char *state)
{
if (strcmp(state, "connected") == 0) {
- syslog (LOG_ERR, "%s[%d] %s is now %s to corosync\n", nodename, nodeid, app_name, state);
+ qb_log(LOG_NOTICE, "%s[%d] %s is now %s to corosync", nodename, nodeid, app_name, state);
} else {
- syslog (LOG_ERR, "%s[%d] %s is now %s from corosync\n", nodename, nodeid, app_name, state);
+ qb_log(LOG_NOTICE, "%s[%d] %s is now %s from corosync", nodename, nodeid, app_name, state);
}
}
static void
_cs_node_membership_event(char *nodename, uint32_t nodeid, char *state, char* ip)
{
int i;
for (i = 0; i < num_notifiers; i++) {
if (notifiers[i].node_membership_fn) {
notifiers[i].node_membership_fn(nodename, nodeid, state, ip);
}
}
}
static void
_cs_local_node_info_get(char **nodename, uint32_t *nodeid)
{
cs_error_t rc;
corosync_cfg_handle_t cfg_handle;
if (local_nodeid == 0) {
rc = corosync_cfg_initialize(&cfg_handle, NULL);
if (rc != CS_OK) {
syslog (LOG_ERR, "Failed to initialize the cfg API. Error %d\n", rc);
exit (EXIT_FAILURE);
}
rc = corosync_cfg_local_get (cfg_handle, &local_nodeid);
corosync_cfg_finalize(cfg_handle);
if (rc != CS_OK) {
local_nodeid = 0;
strncpy(local_nodename, "localhost", sizeof (local_nodename));
local_nodename[sizeof (local_nodename) - 1] = '\0';
} else {
gethostname(local_nodename, CS_MAX_NAME_LENGTH);
}
}
*nodeid = local_nodeid;
*nodename = local_nodename;
}
static void
_cs_node_quorum_event(const char *state)
{
int i;
char *nodename;
uint32_t nodeid;
_cs_local_node_info_get(&nodename, &nodeid);
for (i = 0; i < num_notifiers; i++) {
if (notifiers[i].node_quorum_fn) {
notifiers[i].node_quorum_fn(nodename, nodeid, state);
}
}
}
static void
_cs_application_connection_event(char *app_name, const char *state)
{
int i;
char *nodename;
uint32_t nodeid;
_cs_local_node_info_get(&nodename, &nodeid);
for (i = 0; i < num_notifiers; i++) {
if (notifiers[i].application_connection_fn) {
notifiers[i].application_connection_fn(nodename, nodeid, app_name, state);
}
}
}
static int32_t
sig_exit_handler(int32_t num, void *data)
{
qb_loop_stop(main_loop);
return 0;
}
static void
_cs_confdb_init(void)
{
hdb_handle_t obj_handle;
cs_error_t rc;
int conf_fd = 0;
rc = confdb_initialize (&confdb_handle, &callbacks);
if (rc != CS_OK) {
- syslog (LOG_ERR, "Failed to initialize the objdb API. Error %d\n", rc);
+ qb_log(LOG_ERR, "Failed to initialize the objdb API. Error %d", rc);
exit (EXIT_FAILURE);
}
confdb_fd_get(confdb_handle, &conf_fd);
qb_loop_poll_add(main_loop, QB_LOOP_MED, conf_fd, POLLIN|POLLNVAL, NULL,
_cs_confdb_dispatch);
rc = _cs_confdb_find_object (confdb_handle, "runtime.connections.",
&obj_handle);
if (rc != CS_OK) {
- syslog (LOG_ERR,
- "Failed to find the connections object. Error %d\n", rc);
+ qb_log(LOG_ERR,
+ "Failed to find the connections object. Error %d", rc);
exit (EXIT_FAILURE);
}
rc = confdb_track_changes (confdb_handle, obj_handle,
CONFDB_TRACK_DEPTH_ONE);
if (rc != CS_OK) {
- syslog (LOG_ERR,
- "Failed to track the connections object. Error %d\n", rc);
+ qb_log(LOG_ERR,
+ "Failed to track the connections object. Error %d", rc);
exit (EXIT_FAILURE);
}
rc = _cs_confdb_find_object(confdb_handle,
"runtime.totem.pg.mrp.srp.members.", &obj_handle);
if (rc != CS_OK) {
- syslog (LOG_ERR, "Failed to find the object. Error %d\n", rc);
+ qb_log(LOG_ERR, "Failed to find the object. Error %d", rc);
exit (EXIT_FAILURE);
}
rc = confdb_track_changes(confdb_handle,
obj_handle, CONFDB_TRACK_DEPTH_RECURSIVE);
if (rc != CS_OK) {
- syslog (LOG_ERR,
- "Failed to track the object. Error %d\n", rc);
+ qb_log(LOG_ERR,
+ "Failed to track the object. Error %d", rc);
exit (EXIT_FAILURE);
}
}
static void
_cs_confdb_finalize(void)
{
confdb_stop_track_changes (confdb_handle);
confdb_finalize (confdb_handle);
}
static void
_cs_check_config(void)
{
- if (conf[CS_NTF_LOG] == 0 &&
- conf[CS_NTF_STDOUT] == 0 &&
- conf[CS_NTF_SNMP] == 0 &&
- conf[CS_NTF_DBUS] == 0) {
- syslog(LOG_ERR, "no event type enabled, see corosync-notifyd -h, exiting.");
+ if (conf[CS_NTF_LOG] == QB_FALSE &&
+ conf[CS_NTF_STDOUT] == QB_FALSE &&
+ conf[CS_NTF_SNMP] == QB_FALSE &&
+ conf[CS_NTF_DBUS] == QB_FALSE) {
+ qb_log(LOG_ERR, "no event type enabled, see corosync-notifyd -h, exiting.");
exit(EXIT_FAILURE);
}
#ifndef ENABLE_SNMP
if (conf[CS_NTF_SNMP]) {
- syslog(LOG_ERR, "Not compiled with SNMP support enabled, exiting.");
+ qb_log(LOG_ERR, "Not compiled with SNMP support enabled, exiting.");
exit(EXIT_FAILURE);
}
#endif
#ifndef HAVE_DBUS
if (conf[CS_NTF_DBUS]) {
- syslog(LOG_ERR, "Not compiled with DBus support enabled, exiting.");
+ qb_log(LOG_ERR, "Not compiled with DBus support enabled, exiting.");
exit(EXIT_FAILURE);
}
#endif
if (conf[CS_NTF_STDOUT] && !conf[CS_NTF_FG]) {
- syslog(LOG_ERR, "configured to print to stdout and run in the background, exiting");
+ qb_log(LOG_ERR, "configured to print to stdout and run in the background, exiting");
exit(EXIT_FAILURE);
}
if (conf[CS_NTF_SNMP] && conf[CS_NTF_DBUS]) {
- syslog(LOG_ERR, "configured to send snmp traps and dbus signals - are you sure?.");
+ qb_log(LOG_ERR, "configured to send snmp traps and dbus signals - are you sure?.");
}
}
static void
_cs_usage(void)
{
fprintf(stderr, "usage:\n"\
" -f : Start application in foreground.\n"\
" -l : Log all events.\n"\
" -o : Print events to stdout (turns on -l).\n"\
" -s : Send SNMP traps on all events.\n"\
" -m : SNMP Manager IP address (defaults to localhost).\n"\
" -d : Send DBUS signals on all events.\n"\
" -h : Print this help\n\n");
}
int
main(int argc, char *argv[])
{
int ch;
- conf[CS_NTF_FG] = 0;
- conf[CS_NTF_LOG] = 0;
- conf[CS_NTF_STDOUT] = 0;
- conf[CS_NTF_SNMP] = 0;
- conf[CS_NTF_DBUS] = 0;
+ conf[CS_NTF_FG] = QB_FALSE;
+ conf[CS_NTF_LOG] = QB_FALSE;
+ conf[CS_NTF_STDOUT] = QB_FALSE;
+ conf[CS_NTF_SNMP] = QB_FALSE;
+ conf[CS_NTF_DBUS] = QB_FALSE;
while ((ch = getopt (argc, argv, "floshdm:")) != EOF) {
switch (ch) {
case 'f':
- conf[CS_NTF_FG] = 1;
+ conf[CS_NTF_FG] = QB_TRUE;
break;
case 'l':
- conf[CS_NTF_LOG] = 1;
+ conf[CS_NTF_LOG] = QB_TRUE;
break;
case 'm':
- conf[CS_NTF_SNMP] = 1;
+ conf[CS_NTF_SNMP] = QB_TRUE;
strncpy(snmp_manager_buf, optarg, sizeof (snmp_manager_buf));
snmp_manager_buf[sizeof (snmp_manager_buf) - 1] = '\0';
snmp_manager = snmp_manager_buf;
break;
case 'o':
- conf[CS_NTF_LOG] = 1;
- conf[CS_NTF_STDOUT] = 1;
+ conf[CS_NTF_LOG] = QB_TRUE;
+ conf[CS_NTF_STDOUT] = QB_TRUE;
break;
case 's':
- conf[CS_NTF_SNMP] = 1;
+ conf[CS_NTF_SNMP] = QB_TRUE;
break;
case 'd':
- conf[CS_NTF_DBUS] = 1;
+ conf[CS_NTF_DBUS] = QB_TRUE;
break;
case 'h':
default:
_cs_usage();
return EXIT_FAILURE;
}
}
+ qb_log_init("notifyd", LOG_DAEMON, LOG_INFO);
+
if (conf[CS_NTF_STDOUT]) {
- openlog(NULL, LOG_PID|LOG_PERROR, LOG_DAEMON);
- } else {
- openlog(NULL, LOG_PID, LOG_DAEMON);
+ qb_log_filter_ctl(QB_LOG_STDERR, QB_LOG_FILTER_ADD,
+ QB_LOG_FILTER_FILE, "*", LOG_DEBUG);
+ qb_log_ctl(QB_LOG_STDERR, QB_LOG_CONF_ENABLED, conf[CS_NTF_STDOUT]);
}
_cs_check_config();
if (!conf[CS_NTF_FG]) {
if (daemon(0, 0) < 0)
{
perror("daemon() failed");
return EXIT_FAILURE;
}
}
num_notifiers = 0;
if (conf[CS_NTF_LOG]) {
notifiers[num_notifiers].node_membership_fn =
_cs_syslog_node_membership_event;
notifiers[num_notifiers].node_quorum_fn =
_cs_syslog_node_quorum_event;
notifiers[num_notifiers].application_connection_fn =
_cs_syslog_application_connection_event;
num_notifiers++;
}
main_loop = qb_loop_create();
_cs_confdb_init();
_cs_quorum_init();
#ifdef HAVE_DBUS
if (conf[CS_NTF_DBUS]) {
_cs_dbus_init();
}
#endif /* HAVE_DBUS */
#ifdef ENABLE_SNMP
if (conf[CS_NTF_SNMP]) {
_cs_snmp_init();
}
#endif /* ENABLE_SNMP */
qb_loop_signal_add(main_loop,
QB_LOOP_HIGH,
SIGINT,
NULL,
sig_exit_handler,
NULL);
qb_loop_signal_add(main_loop,
QB_LOOP_HIGH,
SIGQUIT,
NULL,
sig_exit_handler,
NULL);
qb_loop_signal_add(main_loop,
QB_LOOP_HIGH,
SIGTERM,
NULL,
sig_exit_handler,
NULL);
qb_loop_run(main_loop);
#ifdef HAVE_DBUS
if (conf[CS_NTF_DBUS]) {
_cs_dbus_release();
}
#endif /* HAVE_DBUS */
_cs_quorum_finalize();
_cs_confdb_finalize();
return 0;
}

File Metadata

Mime Type
text/x-diff
Expires
Thu, Jul 10, 2:00 AM (1 d, 14 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2009637
Default Alt Text
(769 KB)

Event Timeline