diff --git a/exec/coroparse.c b/exec/coroparse.c index 29875b90..302c3060 100644 --- a/exec/coroparse.c +++ b/exec/coroparse.c @@ -1,434 +1,434 @@ /* * Copyright (c) 2006, 2009 Red Hat, Inc. * * All rights reserved. * * Author: Patrick Caulfield (pcaulfie@redhat.com) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#include #define LOGSYS_UTILS_ONLY 1 #include #include "util.h" static int read_config_file_into_objdb( struct objdb_iface_ver0 *objdb, const char **error_string); static char error_string_response[512]; static char *strchr_rs (const char *haystack, int byte) { const char *end_address = strchr (haystack, byte); if (end_address) { end_address += 1; /* skip past { or = */ end_address += strspn (end_address, " \t"); } return ((char *) end_address); } static int aisparser_readconfig (struct objdb_iface_ver0 *objdb, const char **error_string) { if (read_config_file_into_objdb(objdb, error_string)) { return -1; } return 0; } static char *remove_whitespace(char *string) { char *start = string+strspn(string, " \t"); char *end = start+(strlen(start))-1; while ((*end == ' ' || *end == '\t' || *end == ':' || *end == '{') && end > start) end--; if (end != start) *(end+1) = '\0'; return start; } #define PCHECK_ADD_SUBSECTION 1 #define PCHECK_ADD_ITEM 2 typedef int (*parser_check_item_f)(struct objdb_iface_ver0 *objdb, hdb_handle_t parent_handle, int type, const char *name, const char **error_string); static int parse_section(FILE *fp, struct objdb_iface_ver0 *objdb, hdb_handle_t parent_handle, const char **error_string, parser_check_item_f parser_check_item_call) { char line[512]; int i; char *loc; int ignore_line; while (fgets (line, sizeof (line), fp)) { if (strlen(line) > 0) { if (line[strlen(line) - 1] == '\n') line[strlen(line) - 1] = '\0'; if (strlen (line) > 0 && line[strlen(line) - 1] == '\r') line[strlen(line) - 1] = '\0'; } /* * Clear out white space and tabs */ for (i = strlen (line) - 1; i > -1; i--) { if (line[i] == '\t' || line[i] == ' ') { line[i] = '\0'; } else { break; } } ignore_line = 1; for (i = 0; i < strlen (line); i++) { if (line[i] != '\t' && line[i] != ' ') { if (line[i] != '#') ignore_line = 0; break; } } /* * Clear out comments and empty lines */ if (ignore_line) { continue; } /* New section ? */ if ((loc = strchr_rs (line, '{'))) { hdb_handle_t new_parent; char *section = remove_whitespace(line); loc--; *loc = '\0'; if (parser_check_item_call) { if (!parser_check_item_call(objdb, parent_handle, PCHECK_ADD_SUBSECTION, section, error_string)) return -1; } objdb->object_create (parent_handle, &new_parent, section, strlen (section)); if (parse_section(fp, objdb, new_parent, error_string, parser_check_item_call)) return -1; } /* New key/value */ if ((loc = strchr_rs (line, ':'))) { char *key; char *value; *(loc-1) = '\0'; key = remove_whitespace(line); value = remove_whitespace(loc); if (parser_check_item_call) { if (!parser_check_item_call(objdb, parent_handle, PCHECK_ADD_ITEM, key, error_string)) return -1; } objdb->object_key_create_typed (parent_handle, key, value, strlen (value) + 1, OBJDB_VALUETYPE_STRING); } if (strchr_rs (line, '}')) { return 0; } } if (parent_handle != OBJECT_PARENT_HANDLE) { *error_string = "Missing closing brace"; return -1; } return 0; } static int parser_check_item_uidgid(struct objdb_iface_ver0 *objdb, hdb_handle_t parent_handle, int type, const char *name, const char **error_string) { if (type == PCHECK_ADD_SUBSECTION) { if (parent_handle != OBJECT_PARENT_HANDLE) { *error_string = "uidgid: Can't add second level subsection"; return 0; } if (strcmp (name, "uidgid") != 0) { *error_string = "uidgid: Can't add subsection different then uidgid"; return 0; } } if (type == PCHECK_ADD_ITEM) { if (!(strcmp (name, "uid") == 0 || strcmp (name, "gid") == 0)) { *error_string = "uidgid: Only uid and gid are allowed items"; return 0; } } return 1; } static int read_uidgid_files_into_objdb( struct objdb_iface_ver0 *objdb, const char **error_string) { FILE *fp; const char *dirname; DIR *dp; struct dirent *dirent; struct dirent *entry; char filename[PATH_MAX + FILENAME_MAX + 1]; int res = 0; size_t len; int return_code; struct stat stat_buf; dirname = COROSYSCONFDIR "/uidgid.d"; dp = opendir (dirname); if (dp == NULL) return 0; len = offsetof(struct dirent, d_name) + NAME_MAX + 1; entry = malloc(len); if (entry == NULL) { res = 0; goto error_exit; } for (return_code = readdir_r(dp, entry, &dirent); dirent != NULL && return_code == 0; return_code = readdir_r(dp, entry, &dirent)) { snprintf(filename, sizeof (filename), "%s/%s", dirname, dirent->d_name); stat (filename, &stat_buf); if (S_ISREG(stat_buf.st_mode)) { fp = fopen (filename, "r"); if (fp == NULL) continue; res = parse_section(fp, objdb, OBJECT_PARENT_HANDLE, error_string, parser_check_item_uidgid); fclose (fp); if (res != 0) { goto error_exit; } } } error_exit: free (entry); closedir(dp); return res; } static int read_service_files_into_objdb( struct objdb_iface_ver0 *objdb, const char **error_string) { FILE *fp; const char *dirname; DIR *dp; struct dirent *dirent; struct dirent *entry; char filename[PATH_MAX + FILENAME_MAX + 1]; int res = 0; struct stat stat_buf; size_t len; int return_code; dirname = COROSYSCONFDIR "/service.d"; dp = opendir (dirname); if (dp == NULL) return 0; len = offsetof(struct dirent, d_name) + NAME_MAX + 1; entry = malloc(len); if (entry == NULL) { res = 0; goto error_exit; } for (return_code = readdir_r(dp, entry, &dirent); dirent != NULL && return_code == 0; return_code = readdir_r(dp, entry, &dirent)) { snprintf(filename, sizeof (filename), "%s/%s", dirname, dirent->d_name); stat (filename, &stat_buf); if (S_ISREG(stat_buf.st_mode)) { fp = fopen (filename, "r"); if (fp == NULL) continue; res = parse_section(fp, objdb, OBJECT_PARENT_HANDLE, error_string, NULL); fclose (fp); if (res != 0) { goto error_exit; } } } error_exit: free (entry); closedir(dp); return res; } /* Read config file and load into objdb */ static int read_config_file_into_objdb( struct objdb_iface_ver0 *objdb, const char **error_string) { FILE *fp; const char *filename; char *error_reason = error_string_response; int res; filename = getenv ("COROSYNC_MAIN_CONFIG_FILE"); if (!filename) filename = COROSYSCONFDIR "/corosync.conf"; fp = fopen (filename, "r"); if (fp == NULL) { char error_str[100]; - const char *error_ptr; - LOGSYS_STRERROR_R (error_ptr, errno, error_str, sizeof(error_str)); + const char *error_ptr = qb_strerror_r(errno, error_str, sizeof(error_str)); snprintf (error_reason, sizeof(error_string_response), "Can't read file %s reason = (%s)\n", filename, error_ptr); *error_string = error_reason; return -1; } res = parse_section(fp, objdb, OBJECT_PARENT_HANDLE, error_string, NULL); fclose(fp); if (res == 0) { res = read_uidgid_files_into_objdb(objdb, error_string); } if (res == 0) { res = read_service_files_into_objdb(objdb, error_string); } if (res == 0) { snprintf (error_reason, sizeof(error_string_response), "Successfully read main configuration file '%s'.\n", filename); *error_string = error_reason; } return res; } /* * Dynamic Loader definition */ struct config_iface_ver0 aisparser_iface_ver0 = { .config_readconfig = aisparser_readconfig }; struct lcr_iface corosync_aisparser_ver0[1] = { { .name = "corosync_parser", .version = 0, .versions_replace = 0, .versions_replace_count = 0, .dependencies = 0, .dependency_count = 0, .constructor = NULL, .destructor = NULL, .interfaces = NULL, } }; struct corosync_service_handler *aisparser_get_handler_ver0 (void); struct lcr_comp aisparser_comp_ver0 = { .iface_count = 1, .ifaces = corosync_aisparser_ver0 }; #ifdef COROSYNC_SOLARIS void corosync_lcr_component_register (void); void corosync_lcr_component_register (void) { #else __attribute__ ((constructor)) static void corosync_lcr_component_register (void) { #endif lcr_interfaces_set (&corosync_aisparser_ver0[0], &aisparser_iface_ver0); lcr_component_register (&aisparser_comp_ver0); } diff --git a/exec/ipc_glue.c b/exec/ipc_glue.c index 60865805..d6ca20d2 100644 --- a/exec/ipc_glue.c +++ b/exec/ipc_glue.c @@ -1,928 +1,902 @@ /* * Copyright (c) 2010 Red Hat, Inc. * * All rights reserved. * * Author: Angus Salkeld * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of Red Hat, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "mainconfig.h" #include "sync.h" #include "syncv2.h" #include "timer.h" #include "main.h" #include "util.h" #include "apidef.h" #include "service.h" LOGSYS_DECLARE_SUBSYS ("MAIN"); static struct corosync_api_v1 *api = NULL; -static int ipc_subsys_id = -1; static int32_t ipc_not_enough_fds_left = 0; static int32_t ipc_fc_is_quorate; /* boolean */ static int32_t ipc_fc_totem_queue_level; /* percentage used */ static int32_t ipc_fc_sync_in_process; /* boolean */ static qb_handle_t object_connection_handle; struct cs_ipcs_mapper { int32_t id; qb_ipcs_service_t *inst; char name[256]; }; struct outq_item { void *msg; size_t mlen; struct list_head list; }; static struct cs_ipcs_mapper ipcs_mapper[SERVICE_HANDLER_MAXIMUM_COUNT]; static int32_t cs_ipcs_job_add(enum qb_loop_priority p, void *data, qb_loop_job_dispatch_fn fn); static int32_t cs_ipcs_dispatch_add(enum qb_loop_priority p, int32_t fd, int32_t events, void *data, qb_ipcs_dispatch_fn_t fn); static int32_t cs_ipcs_dispatch_mod(enum qb_loop_priority p, int32_t fd, int32_t events, void *data, qb_ipcs_dispatch_fn_t fn); static int32_t cs_ipcs_dispatch_del(int32_t fd); static struct qb_ipcs_poll_handlers corosync_poll_funcs = { .job_add = cs_ipcs_job_add, .dispatch_add = cs_ipcs_dispatch_add, .dispatch_mod = cs_ipcs_dispatch_mod, .dispatch_del = cs_ipcs_dispatch_del, }; static int32_t cs_ipcs_connection_accept (qb_ipcs_connection_t *c, uid_t euid, gid_t egid); static void cs_ipcs_connection_created(qb_ipcs_connection_t *c); static int32_t cs_ipcs_msg_process(qb_ipcs_connection_t *c, void *data, size_t size); static int32_t cs_ipcs_connection_closed (qb_ipcs_connection_t *c); static void cs_ipcs_connection_destroyed (qb_ipcs_connection_t *c); static struct qb_ipcs_service_handlers corosync_service_funcs = { .connection_accept = cs_ipcs_connection_accept, .connection_created = cs_ipcs_connection_created, .msg_process = cs_ipcs_msg_process, .connection_closed = cs_ipcs_connection_closed, .connection_destroyed = cs_ipcs_connection_destroyed, }; static const char* cs_ipcs_serv_short_name(int32_t service_id) { const char *name; switch (service_id) { case EVS_SERVICE: name = "evs"; break; case CLM_SERVICE: name = "saClm"; break; case AMF_SERVICE: name = "saAmf"; break; case CKPT_SERVICE: name = "saCkpt"; break; case EVT_SERVICE: name = "saEvt"; break; case LCK_SERVICE: name = "saLck"; break; case MSG_SERVICE: name = "saMsg"; break; case CFG_SERVICE: name = "cfg"; break; case CPG_SERVICE: name = "cpg"; break; case CMAN_SERVICE: name = "cman"; break; case PCMK_SERVICE: name = "pacemaker.engine"; break; case CONFDB_SERVICE: name = "confdb"; break; case QUORUM_SERVICE: name = "quorum"; break; case PLOAD_SERVICE: name = "pload"; break; case TMR_SERVICE: name = "saTmr"; break; case VOTEQUORUM_SERVICE: name = "votequorum"; break; case NTF_SERVICE: name = "saNtf"; break; case AMF_V2_SERVICE: name = "saAmfV2"; break; case TST_SV1_SERVICE: name = "tst"; break; case TST_SV2_SERVICE: name = "tst2"; break; case MON_SERVICE: name = "mon"; break; case WD_SERVICE: name = "wd"; break; default: name = NULL; break; } return name; } int32_t cs_ipcs_service_destroy(int32_t service_id) { if (ipcs_mapper[service_id].inst) { qb_ipcs_destroy(ipcs_mapper[service_id].inst); ipcs_mapper[service_id].inst = NULL; } return 0; } static int32_t cs_ipcs_connection_accept (qb_ipcs_connection_t *c, uid_t euid, gid_t egid) { struct list_head *iter; int32_t service = qb_ipcs_service_id_get(c); if (ais_service[service] == NULL || ais_service_exiting[service] || ipcs_mapper[service].inst == NULL) { return -ENOSYS; } if (ipc_not_enough_fds_left) { return -EMFILE; } if (euid == 0 || egid == 0) { return 0; } for (iter = uidgid_list_head.next; iter != &uidgid_list_head; iter = iter->next) { struct uidgid_item *ugi = qb_list_entry (iter, struct uidgid_item, list); if (euid == ugi->uid || egid == ugi->gid) return 0; } log_printf(LOGSYS_LEVEL_ERROR, "Denied connection attempt from %d:%d", euid, egid); return -EACCES; } static char * pid_to_name (pid_t pid, char *out_name, size_t name_len) { char *name; char *rest; FILE *fp; char fname[32]; char buf[256]; snprintf (fname, 32, "/proc/%d/stat", pid); fp = fopen (fname, "r"); if (!fp) { return NULL; } if (fgets (buf, sizeof (buf), fp) == NULL) { fclose (fp); return NULL; } fclose (fp); name = strrchr (buf, '('); if (!name) { return NULL; } /* move past the bracket */ name++; rest = strrchr (buf, ')'); if (rest == NULL || rest[1] != ' ') { return NULL; } *rest = '\0'; /* move past the NULL and space */ rest += 2; /* copy the name */ strncpy (out_name, name, name_len); out_name[name_len - 1] = '\0'; return out_name; } struct cs_ipcs_conn_context { qb_handle_t stats_handle; struct list_head outq_head; int32_t queuing; uint32_t queued; uint64_t invalid_request; uint64_t overload; uint32_t sent; char data[1]; }; static void cs_ipcs_connection_created(qb_ipcs_connection_t *c) { int32_t service = 0; uint32_t zero_32 = 0; uint64_t zero_64 = 0; unsigned int key_incr_dummy; qb_handle_t object_handle; struct cs_ipcs_conn_context *context; char conn_name[42]; char proc_name[32]; struct qb_ipcs_connection_stats stats; int32_t size = sizeof(struct cs_ipcs_conn_context); log_printf(LOG_INFO, "%s() new connection", __func__); service = qb_ipcs_service_id_get(c); size += ais_service[service]->private_data_size; context = calloc(1, size); list_init(&context->outq_head); context->queuing = QB_FALSE; context->queued = 0; context->sent = 0; qb_ipcs_context_set(c, context); ais_service[service]->lib_init_fn(c); api->object_key_increment (object_connection_handle, "active", strlen("active"), &key_incr_dummy); qb_ipcs_connection_stats_get(c, &stats, QB_FALSE); if (stats.client_pid > 0) { if (pid_to_name (stats.client_pid, proc_name, sizeof(proc_name))) { snprintf (conn_name, sizeof(conn_name), "%s:%d:%p", proc_name, stats.client_pid, c); } else { snprintf (conn_name, sizeof(conn_name), "%d:%p", stats.client_pid, c); } } else { snprintf (conn_name, sizeof(conn_name), "%p", c); } api->object_create (object_connection_handle, &object_handle, conn_name, strlen (conn_name)); context->stats_handle = object_handle; api->object_key_create_typed (object_handle, "service_id", &zero_32, sizeof (zero_32), OBJDB_VALUETYPE_UINT32); api->object_key_create_typed (object_handle, "client_pid", &zero_32, sizeof (zero_32), OBJDB_VALUETYPE_INT32); api->object_key_create_typed (object_handle, "responses", &zero_64, sizeof (zero_64), OBJDB_VALUETYPE_UINT64); api->object_key_create_typed (object_handle, "dispatched", &zero_64, sizeof (zero_64), OBJDB_VALUETYPE_UINT64); api->object_key_create_typed (object_handle, "requests", &zero_64, sizeof (zero_64), OBJDB_VALUETYPE_INT64); api->object_key_create_typed (object_handle, "send_retries", &zero_64, sizeof (zero_64), OBJDB_VALUETYPE_UINT64); api->object_key_create_typed (object_handle, "recv_retries", &zero_64, sizeof (zero_64), OBJDB_VALUETYPE_UINT64); api->object_key_create_typed (object_handle, "flow_control", &zero_32, sizeof (zero_32), OBJDB_VALUETYPE_UINT32); api->object_key_create_typed (object_handle, "flow_control_count", &zero_64, sizeof (zero_64), OBJDB_VALUETYPE_UINT64); api->object_key_create_typed (object_handle, "queue_size", &zero_32, sizeof (zero_32), OBJDB_VALUETYPE_UINT32); api->object_key_create_typed (object_handle, "invalid_request", &zero_64, sizeof (zero_64), OBJDB_VALUETYPE_UINT64); api->object_key_create_typed (object_handle, "overload", &zero_64, sizeof (zero_64), OBJDB_VALUETYPE_UINT64); } void cs_ipc_refcnt_inc(void *conn) { qb_ipcs_connection_ref(conn); } void cs_ipc_refcnt_dec(void *conn) { qb_ipcs_connection_unref(conn); } void *cs_ipcs_private_data_get(void *conn) { struct cs_ipcs_conn_context *cnx; cnx = qb_ipcs_context_get(conn); return &cnx->data[0]; } static void cs_ipcs_connection_destroyed (qb_ipcs_connection_t *c) { struct cs_ipcs_conn_context *context; struct list_head *list, *list_next; struct outq_item *outq_item; log_printf(LOG_INFO, "%s() ", __func__); context = qb_ipcs_context_get(c); if (context) { for (list = context->outq_head.next; list != &context->outq_head; list = list_next) { list_next = list->next; outq_item = list_entry (list, struct outq_item, list); list_del (list); free (outq_item->msg); free (outq_item); } free(context); } } static int32_t cs_ipcs_connection_closed (qb_ipcs_connection_t *c) { struct cs_ipcs_conn_context *cnx; unsigned int key_incr_dummy; int32_t res = 0; int32_t service = qb_ipcs_service_id_get(c); log_printf(LOG_INFO, "%s() ", __func__); res = ais_service[service]->lib_exit_fn(c); if (res != 0) { return res; } cnx = qb_ipcs_context_get(c); api->object_destroy (cnx->stats_handle); api->object_key_increment (object_connection_handle, "closed", strlen("closed"), &key_incr_dummy); api->object_key_decrement (object_connection_handle, "active", strlen("active"), &key_incr_dummy); return 0; } int cs_ipcs_response_iov_send (void *conn, const struct iovec *iov, unsigned int iov_len) { int32_t rc = qb_ipcs_response_sendv(conn, iov, iov_len); if (rc >= 0) { return 0; } return rc; } int cs_ipcs_response_send(void *conn, const void *msg, size_t mlen) { int32_t rc = qb_ipcs_response_send(conn, msg, mlen); if (rc >= 0) { return 0; } return rc; } static void outq_flush (void *data) { qb_ipcs_connection_t *conn = data; struct list_head *list, *list_next; struct outq_item *outq_item; int32_t rc; struct cs_ipcs_conn_context *context = qb_ipcs_context_get(conn); for (list = context->outq_head.next; list != &context->outq_head; list = list_next) { list_next = list->next; outq_item = list_entry (list, struct outq_item, list); rc = qb_ipcs_event_send(conn, outq_item->msg, outq_item->mlen); if (rc != outq_item->mlen) { break; } context->sent++; context->queued--; list_del (list); free (outq_item->msg); free (outq_item); } if (list_empty (&context->outq_head)) { context->queuing = QB_FALSE; log_printf(LOGSYS_LEVEL_INFO, "Q empty, queued:%d sent:%d.", context->queued, context->sent); context->queued = 0; context->sent = 0; return; } qb_loop_job_add(cs_poll_handle_get(), QB_LOOP_HIGH, conn, outq_flush); if (rc < 0 && rc != -EAGAIN) { log_printf(LOGSYS_LEVEL_ERROR, "event_send retuned %d!", rc); } } static void msg_send_or_queue(qb_ipcs_connection_t *conn, const struct iovec *iov, uint32_t iov_len) { int32_t rc = 0; int32_t i; int32_t bytes_msg = 0; struct outq_item *outq_item; char *write_buf = 0; struct cs_ipcs_conn_context *context = qb_ipcs_context_get(conn); for (i = 0; i < iov_len; i++) { bytes_msg += iov[i].iov_len; } if (!context->queuing) { assert(list_empty (&context->outq_head)); rc = qb_ipcs_event_sendv(conn, iov, iov_len); if (rc == bytes_msg) { context->sent++; return; } if (rc == -EAGAIN) { context->queued = 0; context->sent = 0; context->queuing = QB_TRUE; qb_loop_job_add(cs_poll_handle_get(), QB_LOOP_HIGH, conn, outq_flush); } else { log_printf(LOGSYS_LEVEL_ERROR, "event_send retuned %d, expected %d!", rc, bytes_msg); return; } } outq_item = malloc (sizeof (struct outq_item)); if (outq_item == NULL) { qb_ipcs_disconnect(conn); return; } outq_item->msg = malloc (bytes_msg); if (outq_item->msg == NULL) { free (outq_item); qb_ipcs_disconnect(conn); return; } write_buf = outq_item->msg; for (i = 0; i < iov_len; i++) { memcpy (write_buf, iov[i].iov_base, iov[i].iov_len); write_buf += iov[i].iov_len; } outq_item->mlen = bytes_msg; list_init (&outq_item->list); list_add_tail (&outq_item->list, &context->outq_head); context->queued++; } int cs_ipcs_dispatch_send(void *conn, const void *msg, size_t mlen) { struct iovec iov; iov.iov_base = (void *)msg; iov.iov_len = mlen; msg_send_or_queue (conn, &iov, 1); return 0; } int cs_ipcs_dispatch_iov_send (void *conn, const struct iovec *iov, unsigned int iov_len) { msg_send_or_queue(conn, iov, iov_len); return 0; } static int32_t cs_ipcs_msg_process(qb_ipcs_connection_t *c, void *data, size_t size) { struct qb_ipc_response_header response; struct qb_ipc_request_header *request_pt = (struct qb_ipc_request_header *)data; int32_t service = qb_ipcs_service_id_get(c); int32_t send_ok = 0; int32_t is_async_call = QB_FALSE; ssize_t res = -1; int sending_allowed_private_data; struct cs_ipcs_conn_context *cnx; send_ok = corosync_sending_allowed (service, request_pt->id, request_pt, &sending_allowed_private_data); is_async_call = (service == CPG_SERVICE && request_pt->id == 2); /* * This happens when the message contains some kind of invalid * parameter, such as an invalid size */ if (send_ok == -EINVAL) { response.size = sizeof (response); response.id = 0; response.error = CS_ERR_INVALID_PARAM; cnx = qb_ipcs_context_get(c); if (cnx) { cnx->invalid_request++; } if (is_async_call) { log_printf(LOGSYS_LEVEL_INFO, "*** %s() invalid message! size:%d error:%d", __func__, response.size, response.error); } else { qb_ipcs_response_send (c, &response, sizeof (response)); } res = -EINVAL; } else if (send_ok < 0) { cnx = qb_ipcs_context_get(c); if (cnx) { cnx->overload++; } if (!is_async_call) { /* * Overload, tell library to retry */ response.size = sizeof (response); response.id = 0; response.error = CS_ERR_TRY_AGAIN; qb_ipcs_response_send (c, &response, sizeof (response)); } else { log_printf(LOGSYS_LEVEL_WARNING, "*** %s() (%d:%d - %d) %s!", __func__, service, request_pt->id, is_async_call, strerror(-send_ok)); } res = -ENOBUFS; } if (send_ok) { ais_service[service]->lib_engine[request_pt->id].lib_handler_fn(c, request_pt); res = 0; } corosync_sending_allowed_release (&sending_allowed_private_data); return res; } static int32_t cs_ipcs_job_add(enum qb_loop_priority p, void *data, qb_loop_job_dispatch_fn fn) { return qb_loop_job_add(cs_poll_handle_get(), p, data, fn); } static int32_t cs_ipcs_dispatch_add(enum qb_loop_priority p, int32_t fd, int32_t events, void *data, qb_ipcs_dispatch_fn_t fn) { return qb_loop_poll_add(cs_poll_handle_get(), p, fd, events, data, fn); } static int32_t cs_ipcs_dispatch_mod(enum qb_loop_priority p, int32_t fd, int32_t events, void *data, qb_ipcs_dispatch_fn_t fn) { return qb_loop_poll_mod(cs_poll_handle_get(), p, fd, events, data, fn); } static int32_t cs_ipcs_dispatch_del(int32_t fd) { return qb_loop_poll_del(cs_poll_handle_get(), fd); } static void cs_ipcs_low_fds_event(int32_t not_enough, int32_t fds_available) { ipc_not_enough_fds_left = not_enough; if (not_enough) { log_printf(LOGSYS_LEVEL_WARNING, "refusing new connections (fds_available:%d)\n", fds_available); } else { log_printf(LOGSYS_LEVEL_NOTICE, "allowing new connections (fds_available:%d)\n", fds_available); } } int32_t cs_ipcs_q_level_get(void) { return ipc_fc_totem_queue_level; } static qb_loop_timer_handle ipcs_check_for_flow_control_timer; static void cs_ipcs_check_for_flow_control(void) { int32_t i; int32_t fc_enabled; for (i = 0; i < SERVICE_HANDLER_MAXIMUM_COUNT; i++) { if (ais_service[i] == NULL || ipcs_mapper[i].inst == NULL) { continue; } fc_enabled = QB_TRUE; if (ipc_fc_is_quorate == 1 || ais_service[i]->allow_inquorate == CS_LIB_ALLOW_INQUORATE) { /* * we are quorate * now check flow control */ if (ipc_fc_totem_queue_level != TOTEM_Q_LEVEL_CRITICAL && ipc_fc_sync_in_process == 0) { fc_enabled = QB_FALSE; } } if (fc_enabled) { qb_ipcs_request_rate_limit(ipcs_mapper[i].inst, QB_IPCS_RATE_OFF); qb_loop_timer_add(cs_poll_handle_get(), QB_LOOP_MED, 1*QB_TIME_NS_IN_MSEC, NULL, corosync_recheck_the_q_level, &ipcs_check_for_flow_control_timer); } else if (ipc_fc_totem_queue_level == TOTEM_Q_LEVEL_LOW) { qb_ipcs_request_rate_limit(ipcs_mapper[i].inst, QB_IPCS_RATE_FAST); } else if (ipc_fc_totem_queue_level == TOTEM_Q_LEVEL_GOOD) { qb_ipcs_request_rate_limit(ipcs_mapper[i].inst, QB_IPCS_RATE_NORMAL); } else if (ipc_fc_totem_queue_level == TOTEM_Q_LEVEL_HIGH) { qb_ipcs_request_rate_limit(ipcs_mapper[i].inst, QB_IPCS_RATE_SLOW); } } } static void cs_ipcs_fc_quorum_changed(int quorate, void *context) { ipc_fc_is_quorate = quorate; cs_ipcs_check_for_flow_control(); } static void cs_ipcs_totem_queue_level_changed(enum totem_q_level level) { ipc_fc_totem_queue_level = level; cs_ipcs_check_for_flow_control(); } void cs_ipcs_sync_state_changed(int32_t sync_in_process) { ipc_fc_sync_in_process = sync_in_process; cs_ipcs_check_for_flow_control(); } -static void cs_ipcs_libqb_log_fn(const char *file_name, - int32_t file_line, - int32_t severity, - const char *msg) -{ - int32_t level = severity; - if (severity > LOG_DEBUG) { - level = LOGSYS_LEVEL_DEBUG; - } - - _logsys_log_printf (LOGSYS_ENCODE_RECID(level, - ipc_subsys_id, - LOGSYS_RECID_LOG), - __func__, file_name, file_line, "%s", msg); -} - void cs_ipcs_stats_update(void) { int32_t i; struct qb_ipcs_stats srv_stats; struct qb_ipcs_connection_stats stats; qb_ipcs_connection_t *c; struct cs_ipcs_conn_context *cnx; for (i = 0; i < SERVICE_HANDLER_MAXIMUM_COUNT; i++) { if (ais_service[i] == NULL || ipcs_mapper[i].inst == NULL) { continue; } qb_ipcs_stats_get(ipcs_mapper[i].inst, &srv_stats, QB_FALSE); for (c = qb_ipcs_connection_first_get(ipcs_mapper[i].inst); c; c = qb_ipcs_connection_next_get(ipcs_mapper[i].inst, c)) { cnx = qb_ipcs_context_get(c); if (cnx == NULL) continue; qb_ipcs_connection_stats_get(c, &stats, QB_FALSE); api->object_key_replace(cnx->stats_handle, "client_pid", strlen("client_pid"), &stats.client_pid, sizeof(uint32_t)); api->object_key_replace(cnx->stats_handle, "requests", strlen("requests"), &stats.requests, sizeof(uint64_t)); api->object_key_replace(cnx->stats_handle, "responses", strlen("responses"), &stats.responses, sizeof(uint64_t)); api->object_key_replace(cnx->stats_handle, "dispatched", strlen("dispatched"), &stats.events, sizeof(uint64_t)); api->object_key_replace(cnx->stats_handle, "send_retries", strlen("send_retries"), &stats.send_retries, sizeof(uint64_t)); api->object_key_replace(cnx->stats_handle, "recv_retries", strlen("recv_retries"), &stats.recv_retries, sizeof(uint64_t)); api->object_key_replace(cnx->stats_handle, "flow_control", strlen("flow_control"), &stats.flow_control_state, sizeof(uint32_t)); api->object_key_replace(cnx->stats_handle, "flow_control_count", strlen("flow_control_count"), &stats.flow_control_count, sizeof(uint64_t)); api->object_key_replace(cnx->stats_handle, "queue_size", strlen("queue_size"), &cnx->queued, sizeof(uint32_t)); api->object_key_replace(cnx->stats_handle, "invalid_request", strlen("invalid_request"), &cnx->invalid_request, sizeof(uint64_t)); api->object_key_replace(cnx->stats_handle, "overload", strlen("overload"), &cnx->overload, sizeof(uint64_t)); qb_ipcs_connection_unref(c); } } } void cs_ipcs_service_init(struct corosync_service_engine *service) { if (service->lib_engine_count == 0) { log_printf (LOGSYS_LEVEL_DEBUG, "NOT Initializing IPC on %s [%d]", cs_ipcs_serv_short_name(service->id), service->id); return; } ipcs_mapper[service->id].id = service->id; strcpy(ipcs_mapper[service->id].name, cs_ipcs_serv_short_name(service->id)); log_printf (LOGSYS_LEVEL_DEBUG, "Initializing IPC on %s [%d]", ipcs_mapper[service->id].name, ipcs_mapper[service->id].id); ipcs_mapper[service->id].inst = qb_ipcs_create(ipcs_mapper[service->id].name, ipcs_mapper[service->id].id, QB_IPC_SHM, &corosync_service_funcs); assert(ipcs_mapper[service->id].inst); qb_ipcs_poll_handlers_set(ipcs_mapper[service->id].inst, &corosync_poll_funcs); qb_ipcs_run(ipcs_mapper[service->id].inst); } void cs_ipcs_init(void) { qb_handle_t object_find_handle; qb_handle_t object_runtime_handle; uint64_t zero_64 = 0; api = apidef_get (); qb_loop_poll_low_fds_event_set(cs_poll_handle_get(), cs_ipcs_low_fds_event); - ipc_subsys_id = _logsys_subsys_create ("IPC"); - if (ipc_subsys_id < 0) { - log_printf (LOGSYS_LEVEL_ERROR, - "Could not initialize IPC logging subsystem\n"); - corosync_exit_error (AIS_DONE_INIT_SERVICES); - } - - qb_util_set_log_function (cs_ipcs_libqb_log_fn); - api->quorum_register_callback (cs_ipcs_fc_quorum_changed, NULL); totempg_queue_level_register_callback (cs_ipcs_totem_queue_level_changed); api->object_find_create (OBJECT_PARENT_HANDLE, "runtime", strlen ("runtime"), &object_find_handle); if (api->object_find_next (object_find_handle, &object_runtime_handle) != 0) { log_printf (LOGSYS_LEVEL_ERROR,"arrg no runtime"); return; } /* Connection objects */ api->object_create (object_runtime_handle, &object_connection_handle, "connections", strlen ("connections")); api->object_key_create_typed (object_connection_handle, "active", &zero_64, sizeof (zero_64), OBJDB_VALUETYPE_UINT64); api->object_key_create_typed (object_connection_handle, "closed", &zero_64, sizeof (zero_64), OBJDB_VALUETYPE_UINT64); } diff --git a/exec/logsys.c b/exec/logsys.c index cef745cd..ec30c83e 100644 --- a/exec/logsys.c +++ b/exec/logsys.c @@ -1,1674 +1,725 @@ /* * Copyright (c) 2002-2004 MontaVista Software, Inc. * Copyright (c) 2006-2010 Red Hat, Inc. * * Author: Steven Dake (sdake@redhat.com) * Author: Lon Hohberger (lhh@redhat.com) * Author: Fabio M. Di Nitto (fdinitto@redhat.com) * * All rights reserved. * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include -#include #include +#include +#include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#if defined(COROSYNC_LINUX) -#include -#endif -#if defined(COROSYNC_BSD) || defined(COROSYNC_DARWIN) -#include -#endif -#include -#include -#include -#include -#include -#include + +#include +#include +#include #include #include -#include "util.h" - -#define YIELD_AFTER_LOG_OPS 10 - -#define MIN(x,y) ((x) < (y) ? (x) : (y)) - -#define ROUNDUP(x, y) ((((x) + ((y) - 1)) / (y)) * (y)) - /* * syslog prioritynames, facility names to value mapping * Some C libraries build this in to their headers, but it is non-portable * so logsys supplies its own version. */ struct syslog_names { const char *c_name; int c_val; }; -struct syslog_names prioritynames[] = +static struct syslog_names prioritynames[] = { { "alert", LOG_ALERT }, { "crit", LOG_CRIT }, { "debug", LOG_DEBUG }, { "emerg", LOG_EMERG }, { "err", LOG_ERR }, { "error", LOG_ERR }, { "info", LOG_INFO }, { "notice", LOG_NOTICE }, { "warning", LOG_WARNING }, { NULL, -1 } }; -struct syslog_names facilitynames[] = +static struct syslog_names facilitynames[] = { { "auth", LOG_AUTH }, { "cron", LOG_CRON }, { "daemon", LOG_DAEMON }, { "kern", LOG_KERN }, { "lpr", LOG_LPR }, { "mail", LOG_MAIL }, { "news", LOG_NEWS }, { "syslog", LOG_SYSLOG }, { "user", LOG_USER }, { "uucp", LOG_UUCP }, { "local0", LOG_LOCAL0 }, { "local1", LOG_LOCAL1 }, { "local2", LOG_LOCAL2 }, { "local3", LOG_LOCAL3 }, { "local4", LOG_LOCAL4 }, { "local5", LOG_LOCAL5 }, { "local6", LOG_LOCAL6 }, { "local7", LOG_LOCAL7 }, { NULL, -1 } }; -struct record { - unsigned int rec_ident; - const char *file_name; - const char *function_name; - int file_line; - char *buffer; - struct list_head list; -}; - +#define MAX_FILES_PER_SUBSYS 16 + /* * need unlogical order to preserve 64bit alignment */ struct logsys_logger { char subsys[LOGSYS_MAX_SUBSYS_NAMELEN]; /* subsystem name */ char *logfile; /* log to file */ - FILE *logfile_fp; /* track file descriptor */ unsigned int mode; /* subsystem mode */ unsigned int debug; /* debug on|off */ - int syslog_facility; /* facility */ int syslog_priority; /* priority */ int logfile_priority; /* priority to file */ int init_status; /* internal field to handle init queues for subsystems */ + int32_t target_id; + char *files[MAX_FILES_PER_SUBSYS]; + int32_t file_idx; + int32_t dirty; }; - -/* - * These are not static so they can be read from the core file - */ -int *flt_data; - -uint32_t flt_head; - -uint32_t flt_tail; - -unsigned int flt_data_size; - -#define COMBINE_BUFFER_SIZE 2048 - /* values for logsys_logger init_status */ #define LOGSYS_LOGGER_INIT_DONE 0 #define LOGSYS_LOGGER_NEEDS_INIT 1 static int logsys_system_needs_init = LOGSYS_LOGGER_NEEDS_INIT; -static int logsys_memory_used = 0; - -static int logsys_sched_param_queued = 0; - -static int logsys_sched_policy; - -static struct sched_param logsys_sched_param; - -static int logsys_after_log_ops_yield = 10; - static struct logsys_logger logsys_loggers[LOGSYS_MAX_SUBSYS_COUNT + 1]; -static int wthread_active = 0; - -static int wthread_should_exit = 0; - static pthread_mutex_t logsys_config_mutex = PTHREAD_MUTEX_INITIALIZER; -static unsigned int records_written = 1; - -static pthread_t logsys_thread_id; - -static sem_t logsys_thread_start; - -static sem_t logsys_print_finished; - -static pthread_mutex_t logsys_flt_mutex = PTHREAD_MUTEX_INITIALIZER; - -static pthread_mutex_t logsys_wthread_mutex = PTHREAD_MUTEX_INITIALIZER; - -static int logsys_buffer_full = 0; +static int32_t _logsys_config_mode_set_unlocked(int32_t subsysid, uint32_t new_mode); +static void _logsys_config_apply_per_file(int32_t s, const char *filename); +static void _logsys_config_apply_per_subsys(int32_t s); static char *format_buffer=NULL; -static int logsys_dropped_messages = 0; - -void *logsys_rec_end; - -static DECLARE_LIST_INIT(logsys_print_finished_records); - -#define FDMAX_ARGS 64 - -#define CIRCULAR_BUFFER_WRITE_SIZE 64 - -/* forward declarations */ -static void logsys_close_logfile(int subsysid); - -static uint32_t circular_memory_map (void **buf, size_t bytes) -{ - void *addr_orig; - void *addr; - int fd; - int res; - const char *file = "fdata-XXXXXX"; - char path[PATH_MAX]; - char buffer[CIRCULAR_BUFFER_WRITE_SIZE]; - int i; - int written; - int error_return = 0; - - snprintf (path, PATH_MAX, "/dev/shm/%s", file); - - fd = mkstemp (path); - if (fd == -1) { - snprintf (path, PATH_MAX, LOCALSTATEDIR "/run/%s", file); - fd = mkstemp (path); - if (fd == -1) { - error_return = -1; - goto error_exit; - } - } - - /* - * ftruncate doesn't return ENOSPC - * have to use write to determine if shared memory is actually available - */ - res = ftruncate (fd, 0); - if (res == -1) { - error_return = -1; - goto unlink_exit; - } - memset (buffer, 0, sizeof (buffer)); - for (i = 0; i < (bytes / CIRCULAR_BUFFER_WRITE_SIZE); i++) { -retry_write: - written = write (fd, buffer, CIRCULAR_BUFFER_WRITE_SIZE); - if (written == -1 && errno == EINTR) { - goto retry_write; - } - if (written != 64) { - error_return = -1; - goto unlink_exit; - } - } - - addr_orig = mmap (NULL, bytes << 1, PROT_NONE, - MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); - if (addr_orig == MAP_FAILED) { - error_return = -1; - goto unlink_exit; - } - - addr = mmap (addr_orig, bytes, PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_FIXED, fd, 0); - if (addr != addr_orig) { - error_return = -1; - goto mmap_exit; - } - #ifdef COROSYNC_BSD - madvise(addr_orig, bytes, MADV_NOSYNC); - #endif - - addr = mmap (((char *)addr_orig) + bytes, - bytes, PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_FIXED, fd, 0); - if ((char *)addr != (char *)((char *)addr_orig + bytes)) { - error_return = -1; - goto mmap_exit; - } -#ifdef COROSYNC_BSD - madvise(((char *)addr_orig) + bytes, bytes, MADV_NOSYNC); -#endif - - *buf = addr_orig; - error_return = 0; - goto unlink_exit; - -mmap_exit: - munmap (addr_orig, bytes << 1); -unlink_exit: - unlink (path); - close (fd); -error_exit: - return (error_return); -} - -static void logsys_flt_lock (void) -{ - pthread_mutex_lock (&logsys_flt_mutex); -} -static void logsys_flt_unlock (void) -{ - pthread_mutex_unlock (&logsys_flt_mutex); -} - -static void logsys_wthread_lock (void) -{ - pthread_mutex_lock (&logsys_wthread_mutex); -} -static void logsys_wthread_unlock (void) -{ - pthread_mutex_unlock (&logsys_wthread_mutex); -} - -/* - * Before any write operation, a reclaim on the buffer area must be executed - */ -static inline void records_reclaim (unsigned int idx, unsigned int words) -{ - unsigned int should_reclaim; - - should_reclaim = 0; - - if ((idx + words) >= flt_data_size) { - logsys_buffer_full = 1; - } - if (logsys_buffer_full == 0) { - return; - } - - if (flt_tail > flt_head) { - if (idx + words >= flt_tail) { - should_reclaim = 1; - } - } else { - if ((idx + words) >= (flt_tail + flt_data_size)) { - should_reclaim = 1; - } - } - - if (should_reclaim) { - int words_needed = 0; - - words_needed = words + 1; - do { - words_needed -= flt_data[flt_tail]; - flt_tail = - (flt_tail + - flt_data[flt_tail]) % (flt_data_size); - } while (words_needed > 0); - } -} - -#define idx_word_step(idx) \ -do { \ - if (idx > (flt_data_size - 1)) { \ - idx = 0; \ - } \ -} while (0); - -#define idx_buffer_step(idx) \ -do { \ - if (idx > (flt_data_size - 1)) { \ - idx = ((idx) % (flt_data_size)); \ - } \ -} while (0); - -/* - * Internal threaded logging implementation - */ -static inline int strcpy_cutoff (char *dest, const char *src, size_t cutoff, - size_t buf_len) -{ - size_t len = strlen (src); - if (buf_len <= 1) { - if (buf_len == 0) - dest[0] = 0; - return 0; - } - - if (cutoff == 0) { - cutoff = len; - } - - cutoff = MIN (cutoff, buf_len - 1); - len = MIN (len, cutoff); - memcpy (dest, src, len); - memset (dest + len, ' ', cutoff - len); - dest[cutoff] = '\0'; - - return (cutoff); -} - -static const char log_month_name[][4] = { - "Jan", "Feb", "Mar", "Apr", "May", "Jun", - "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" -}; - -/* - * %s SUBSYSTEM - * %n FUNCTION NAME - * %f FILENAME - * %l FILELINE - * %p PRIORITY - * %t TIMESTAMP - * %b BUFFER - * - * any number between % and character specify field length to pad or chop -*/ -static void log_printf_to_logs ( - unsigned int rec_ident, - const char *file_name, - const char *function_name, - int file_line, - const char *buffer) -{ - char normal_output_buffer[COMBINE_BUFFER_SIZE]; - char syslog_output_buffer[COMBINE_BUFFER_SIZE]; - char char_time[128]; - char line_no[30]; - unsigned int format_buffer_idx = 0; - unsigned int normal_output_buffer_idx = 0; - unsigned int syslog_output_buffer_idx = 0; - struct timeval tv; - size_t cutoff; - unsigned int normal_len, syslog_len; - int subsysid; - unsigned int level; - int c; - struct tm tm_res; - - if (LOGSYS_DECODE_RECID(rec_ident) != LOGSYS_RECID_LOG) { - return; - } - - subsysid = LOGSYS_DECODE_SUBSYSID(rec_ident); - level = LOGSYS_DECODE_LEVEL(rec_ident); - - while ((c = format_buffer[format_buffer_idx])) { - cutoff = 0; - if (c != '%') { - normal_output_buffer[normal_output_buffer_idx++] = c; - syslog_output_buffer[syslog_output_buffer_idx++] = c; - format_buffer_idx++; - } else { - const char *normal_p, *syslog_p; - - format_buffer_idx += 1; - if (isdigit (format_buffer[format_buffer_idx])) { - cutoff = atoi (&format_buffer[format_buffer_idx]); - } - while (isdigit (format_buffer[format_buffer_idx])) { - format_buffer_idx += 1; - } - - switch (format_buffer[format_buffer_idx]) { - case 's': - normal_p = logsys_loggers[subsysid].subsys; - syslog_p = logsys_loggers[subsysid].subsys; - break; - - case 'n': - normal_p = function_name; - syslog_p = function_name; - break; - - case 'f': - normal_p = file_name; - syslog_p = file_name; - break; - - case 'l': - snprintf (line_no, sizeof (line_no), "%d", file_line); - normal_p = line_no; - syslog_p = line_no; - break; - - case 't': - gettimeofday (&tv, NULL); - (void)localtime_r ((time_t *)&tv.tv_sec, &tm_res); - snprintf (char_time, sizeof (char_time), "%s %02d %02d:%02d:%02d", - log_month_name[tm_res.tm_mon], tm_res.tm_mday, tm_res.tm_hour, - tm_res.tm_min, tm_res.tm_sec); - normal_p = char_time; - - /* - * syslog does timestamping on its own. - * also strip extra space in case. - */ - syslog_p = ""; - break; - - case 'b': - normal_p = buffer; - syslog_p = buffer; - break; - - case 'p': - normal_p = logsys_loggers[LOGSYS_MAX_SUBSYS_COUNT].subsys; - syslog_p = ""; - break; - - default: - normal_p = ""; - syslog_p = ""; - break; - } - normal_len = strcpy_cutoff (normal_output_buffer + normal_output_buffer_idx, - normal_p, cutoff, - (sizeof (normal_output_buffer) - - normal_output_buffer_idx)); - normal_output_buffer_idx += normal_len; - syslog_len = strcpy_cutoff (syslog_output_buffer + syslog_output_buffer_idx, - syslog_p, cutoff, - (sizeof (syslog_output_buffer) - - syslog_output_buffer_idx)); - syslog_output_buffer_idx += syslog_len; - format_buffer_idx += 1; - } - if ((normal_output_buffer_idx >= sizeof (normal_output_buffer) - 2) || - (syslog_output_buffer_idx >= sizeof (syslog_output_buffer) - 1)) { - /* Note: we make allowance for '\0' at the end of - * both of these arrays and normal_output_buffer also - * needs a '\n'. - */ - break; - } - } - - normal_output_buffer[normal_output_buffer_idx] = '\0'; - syslog_output_buffer[syslog_output_buffer_idx] = '\0'; - - /* - * Output to syslog - */ - if ((logsys_loggers[subsysid].mode & LOGSYS_MODE_OUTPUT_SYSLOG) && - ((level <= logsys_loggers[subsysid].syslog_priority) || - (logsys_loggers[subsysid].debug != 0))) { - syslog (level | logsys_loggers[subsysid].syslog_facility, "%s", syslog_output_buffer); - } - - /* - * Terminate string with \n \0 - */ - normal_output_buffer[normal_output_buffer_idx++] = '\n'; - normal_output_buffer[normal_output_buffer_idx] = '\0'; - - /* - * Output to configured file - */ - if (((logsys_loggers[subsysid].mode & LOGSYS_MODE_OUTPUT_FILE) && - (logsys_loggers[subsysid].logfile_fp != NULL)) && - ((level <= logsys_loggers[subsysid].logfile_priority) || - (logsys_loggers[subsysid].debug != 0))) { - /* - * Output to a file - */ - if ((fwrite (normal_output_buffer, strlen (normal_output_buffer), 1, - logsys_loggers[subsysid].logfile_fp) < 1) || - (fflush (logsys_loggers[subsysid].logfile_fp) == EOF)) { - char tmpbuffer[1024]; - /* - * if we are here, it's bad.. it's really really bad. - * Best thing would be to light a candle in a church - * and pray. - */ - snprintf(tmpbuffer, sizeof(tmpbuffer), - "LOGSYS EMERGENCY: %s Unable to write to %s.", - logsys_loggers[subsysid].subsys, - logsys_loggers[subsysid].logfile); - pthread_mutex_lock (&logsys_config_mutex); - logsys_close_logfile(subsysid); - logsys_loggers[subsysid].mode &= ~LOGSYS_MODE_OUTPUT_FILE; - pthread_mutex_unlock (&logsys_config_mutex); - log_printf_to_logs( - LOGSYS_ENCODE_RECID( - LOGSYS_LEVEL_EMERG, - subsysid, - LOGSYS_RECID_LOG), - __FILE__, __FUNCTION__, __LINE__, - tmpbuffer); - } - } - - /* - * Output to stderr - */ - if ((logsys_loggers[subsysid].mode & LOGSYS_MODE_OUTPUT_STDERR) && - ((level <= logsys_loggers[subsysid].logfile_priority) || - (logsys_loggers[subsysid].debug != 0))) { - if (write (STDERR_FILENO, normal_output_buffer, strlen (normal_output_buffer)) < 0) { - char tmpbuffer[1024]; - /* - * if we are here, it's bad.. it's really really bad. - * Best thing would be to light 20 candles for each saint - * in the calendar and pray a lot... - */ - pthread_mutex_lock (&logsys_config_mutex); - logsys_loggers[subsysid].mode &= ~LOGSYS_MODE_OUTPUT_STDERR; - pthread_mutex_unlock (&logsys_config_mutex); - snprintf(tmpbuffer, sizeof(tmpbuffer), - "LOGSYS EMERGENCY: %s Unable to write to STDERR.", - logsys_loggers[subsysid].subsys); - log_printf_to_logs( - LOGSYS_ENCODE_RECID( - LOGSYS_LEVEL_EMERG, - subsysid, - LOGSYS_RECID_LOG), - __FILE__, __FUNCTION__, __LINE__, - tmpbuffer); - } - } -} - -static void log_printf_to_logs_wthread ( - unsigned int rec_ident, - const char *file_name, - const char *function_name, - int file_line, - const char *buffer) -{ - struct record *rec; - uint32_t length; - - rec = malloc (sizeof (struct record)); - if (rec == NULL) { - return; - } - - length = strlen (buffer); - - rec->rec_ident = rec_ident; - rec->file_name = file_name; - rec->function_name = function_name; - rec->file_line = file_line; - rec->buffer = malloc (length + 1); - if (rec->buffer == NULL) { - free (rec); - return; - } - memcpy (rec->buffer, buffer, length + 1); - - list_init (&rec->list); - logsys_wthread_lock(); - logsys_memory_used += length + 1 + sizeof (struct record); - if (logsys_memory_used > 512000) { - free (rec->buffer); - free (rec); - logsys_memory_used = logsys_memory_used - length - 1 - sizeof (struct record); - logsys_dropped_messages += 1; - logsys_wthread_unlock(); - return; - - } else { - list_add_tail (&rec->list, &logsys_print_finished_records); - } - logsys_wthread_unlock(); - - sem_post (&logsys_print_finished); -} - -static void *logsys_worker_thread (void *data) __attribute__((noreturn)); -static void *logsys_worker_thread (void *data) -{ - struct record *rec; - int dropped = 0; - int res; - - /* - * Signal wthread_create that the initialization process may continue - */ - sem_post (&logsys_thread_start); - for (;;) { - dropped = 0; -retry_sem_wait: - res = sem_wait (&logsys_print_finished); - if (res == -1 && errno == EINTR) { - goto retry_sem_wait; - } else - if (res == -1) { - /* - * This case shouldn't happen - */ - pthread_exit (NULL); - } - - - logsys_wthread_lock(); - if (wthread_should_exit) { - int value; - - res = sem_getvalue (&logsys_print_finished, &value); - if (value == 0) { - logsys_wthread_unlock(); - pthread_exit (NULL); - } - } - - rec = list_entry (logsys_print_finished_records.next, struct record, list); - list_del (&rec->list); - logsys_memory_used = logsys_memory_used - strlen (rec->buffer) - - sizeof (struct record) - 1; - dropped = logsys_dropped_messages; - logsys_dropped_messages = 0; - logsys_wthread_unlock(); - if (dropped) { - printf ("%d messages lost\n", dropped); - } - log_printf_to_logs ( - rec->rec_ident, - rec->file_name, - rec->function_name, - rec->file_line, - rec->buffer); - free (rec->buffer); - free (rec); - } -} - -static void wthread_create (void) -{ - int res; - - if (wthread_active) { - return; - } - - wthread_active = 1; - - - /* - * TODO: propagate pthread_create errors back to the caller - */ - res = pthread_create (&logsys_thread_id, NULL, - logsys_worker_thread, NULL); - sem_wait (&logsys_thread_start); - - if (res == 0) { - if (logsys_sched_param_queued == 1) { - /* - * TODO: propagate logsys_thread_priority_set errors back to - * the caller - */ - res = logsys_thread_priority_set ( - logsys_sched_policy, - &logsys_sched_param, - logsys_after_log_ops_yield); - logsys_sched_param_queued = 0; - } - } else { - wthread_active = 0; - } -} - static int _logsys_config_subsys_get_unlocked (const char *subsys) { unsigned int i; if (!subsys) { return LOGSYS_MAX_SUBSYS_COUNT; } - for (i = 0; i <= LOGSYS_MAX_SUBSYS_COUNT; i++) { + for (i = 0; i <= LOGSYS_MAX_SUBSYS_COUNT; i++) { if (strcmp (logsys_loggers[i].subsys, subsys) == 0) { return i; } } return (-1); } -static void syslog_facility_reconf (void) -{ - closelog(); - openlog(logsys_loggers[LOGSYS_MAX_SUBSYS_COUNT].subsys, - LOG_CONS|LOG_PID, - logsys_loggers[LOGSYS_MAX_SUBSYS_COUNT].syslog_facility); -} - -/* - * this is always invoked within the mutex, so it's safe to parse the - * whole thing as we need. - */ -static void logsys_close_logfile ( - int subsysid) -{ - int i; - - if ((logsys_loggers[subsysid].logfile_fp == NULL) && - (logsys_loggers[subsysid].logfile == NULL)) { - return; - } - - /* - * if there is another subsystem or system using the same fp, - * then we clean our own structs, but we can't close the file - * as it is in use by somebody else. - * Only the last users will be allowed to perform the fclose. - */ - for (i = 0; i <= LOGSYS_MAX_SUBSYS_COUNT; i++) { - if ((logsys_loggers[i].logfile_fp == logsys_loggers[subsysid].logfile_fp) && - (i != subsysid)) { - logsys_loggers[subsysid].logfile = NULL; - logsys_loggers[subsysid].logfile_fp = NULL; - return; - } - } - - /* - * if we are here, we are the last users of that fp, so we can safely - * close it. - */ - fclose (logsys_loggers[subsysid].logfile_fp); - logsys_loggers[subsysid].logfile_fp = NULL; - free (logsys_loggers[subsysid].logfile); - logsys_loggers[subsysid].logfile = NULL; -} /* * we need a version that can work when somebody else is already * holding a config mutex lock or we will never get out of here */ static int logsys_config_file_set_unlocked ( int subsysid, const char **error_string, const char *file) { static char error_string_response[512]; int i; - logsys_close_logfile(subsysid); + if (logsys_loggers[subsysid].target_id > 0) { + /* TODO close file + logsys_filter_apply(subsysid, + QB_LOG_FILTER_REMOVE, + logsys_loggers[subsysid].target_id); + */ + } + logsys_loggers[subsysid].dirty = QB_TRUE; if ((file == NULL) || (strcmp(logsys_loggers[subsysid].subsys, "") == 0)) { return (0); } if (strlen(file) >= PATH_MAX) { snprintf (error_string_response, sizeof(error_string_response), "%s: logfile name exceed maximum system filename lenght\n", logsys_loggers[subsysid].subsys); *error_string = error_string_response; return (-1); } for (i = 0; i <= LOGSYS_MAX_SUBSYS_COUNT; i++) { if ((logsys_loggers[i].logfile != NULL) && - (strcmp (logsys_loggers[i].logfile, file) == 0) && - (i != subsysid)) { - logsys_loggers[subsysid].logfile = - logsys_loggers[i].logfile; - logsys_loggers[subsysid].logfile_fp = - logsys_loggers[i].logfile_fp; - return (0); + (strcmp (logsys_loggers[i].logfile, file) == 0) && + (i != subsysid)) { + /* we have found another subsys with this config file + * so add a filter + */ + logsys_loggers[subsysid].target_id = logsys_loggers[i].target_id; + return (0); } } - logsys_loggers[subsysid].logfile = strdup(file); if (logsys_loggers[subsysid].logfile == NULL) { snprintf (error_string_response, sizeof(error_string_response), "Unable to allocate memory for logfile '%s'\n", file); *error_string = error_string_response; return (-1); } - logsys_loggers[subsysid].logfile_fp = fopen (file, "a+"); - if (logsys_loggers[subsysid].logfile_fp == NULL) { - int err; + if (logsys_loggers[subsysid].target_id > 0) { + /* no one else is using this close it */ + qb_log_file_close(logsys_loggers[subsysid].target_id); + } + + logsys_loggers[subsysid].target_id = qb_log_file_open(file); + if (logsys_loggers[subsysid].target_id < 0) { + int err = logsys_loggers[subsysid].target_id; char error_str[LOGSYS_MAX_PERROR_MSG_LEN]; const char *error_ptr; - - err = errno; -#ifdef COROSYNC_LINUX - /* The GNU version of strerror_r returns a (char*) that *must* be used */ - error_ptr = strerror_r(err, error_str, sizeof(error_str)); -#else - /* The XSI-compliant strerror_r() return 0 or -1 (in case the buffer is full) */ - if ( strerror_r(err, error_str, sizeof(error_str)) < 0 ) - error_ptr = ""; - else - error_ptr = error_str; -#endif + error_ptr = qb_strerror_r(err, error_str, sizeof(error_str)); free(logsys_loggers[subsysid].logfile); logsys_loggers[subsysid].logfile = NULL; snprintf (error_string_response, sizeof(error_string_response), "Can't open logfile '%s' for reason: %s (%d).\n", - file, error_ptr, err); + file, error_ptr, err); *error_string = error_string_response; return (-1); } - return (0); } static void logsys_subsys_init ( const char *subsys, int subsysid) { if (logsys_system_needs_init == LOGSYS_LOGGER_NEEDS_INIT) { logsys_loggers[subsysid].init_status = LOGSYS_LOGGER_NEEDS_INIT; } else { - memcpy(&logsys_loggers[subsysid], - &logsys_loggers[LOGSYS_MAX_SUBSYS_COUNT], - sizeof(logsys_loggers[LOGSYS_MAX_SUBSYS_COUNT])); - logsys_loggers[subsysid].init_status = - LOGSYS_LOGGER_INIT_DONE; + logsys_loggers[subsysid].mode = logsys_loggers[LOGSYS_MAX_SUBSYS_COUNT].mode; + logsys_loggers[subsysid].debug = logsys_loggers[LOGSYS_MAX_SUBSYS_COUNT].debug; + logsys_loggers[subsysid].syslog_priority = logsys_loggers[LOGSYS_MAX_SUBSYS_COUNT].syslog_priority; + logsys_loggers[subsysid].logfile_priority = logsys_loggers[LOGSYS_MAX_SUBSYS_COUNT].logfile_priority; + logsys_loggers[subsysid].init_status = LOGSYS_LOGGER_INIT_DONE; } strncpy (logsys_loggers[subsysid].subsys, subsys, sizeof (logsys_loggers[subsysid].subsys)); logsys_loggers[subsysid].subsys[ sizeof (logsys_loggers[subsysid].subsys) - 1] = '\0'; + logsys_loggers[subsysid].file_idx = 0; +} + +static const char *_logsys_tags_stringify(uint32_t tags) +{ + if (tags == QB_LOG_TAG_LIBQB_MSG) { + return "QB"; + } else { + return logsys_loggers[tags].subsys; + } } /* * Internal API - exported */ int _logsys_system_setup( const char *mainsystem, unsigned int mode, - unsigned int debug, - const char *logfile, - int logfile_priority, int syslog_facility, int syslog_priority) { int i; - const char *errstr; + int32_t fidx; char tempsubsys[LOGSYS_MAX_SUBSYS_NAMELEN]; if ((mainsystem == NULL) || (strlen(mainsystem) >= LOGSYS_MAX_SUBSYS_NAMELEN)) { return -1; } i = LOGSYS_MAX_SUBSYS_COUNT; pthread_mutex_lock (&logsys_config_mutex); snprintf(logsys_loggers[i].subsys, LOGSYS_MAX_SUBSYS_NAMELEN, "%s", mainsystem); logsys_loggers[i].mode = mode; + logsys_loggers[i].debug = 0; + logsys_loggers[i].file_idx = 0; + logsys_loggers[i].logfile_priority = syslog_priority; + logsys_loggers[i].syslog_priority = syslog_priority; - logsys_loggers[i].debug = debug; - - if (logsys_config_file_set_unlocked (i, &errstr, logfile) < 0) { - pthread_mutex_unlock (&logsys_config_mutex); - return (-1); + qb_log_init(mainsystem, syslog_facility, syslog_priority); + if (logsys_loggers[i].mode & LOGSYS_MODE_OUTPUT_STDERR) { + qb_log_ctl(QB_LOG_STDERR, QB_LOG_CONF_ENABLED, QB_TRUE); + } else { + qb_log_ctl(QB_LOG_STDERR, QB_LOG_CONF_ENABLED, QB_FALSE); + } + if (logsys_loggers[i].mode & LOGSYS_MODE_OUTPUT_SYSLOG) { + qb_log_ctl(QB_LOG_SYSLOG, QB_LOG_CONF_ENABLED, QB_TRUE); + } else { + qb_log_ctl(QB_LOG_SYSLOG, QB_LOG_CONF_ENABLED, QB_FALSE); } - logsys_loggers[i].logfile_priority = logfile_priority; - logsys_loggers[i].syslog_facility = syslog_facility; - logsys_loggers[i].syslog_priority = syslog_priority; - syslog_facility_reconf(); + qb_log_filter_ctl(QB_LOG_BLACKBOX, QB_LOG_FILTER_ADD, + QB_LOG_FILTER_FILE, "*", LOG_TRACE); + qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_SIZE, 4096); + qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_THREADED, QB_FALSE); + qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_ENABLED, QB_TRUE); - logsys_loggers[i].init_status = LOGSYS_LOGGER_INIT_DONE; + logsys_format_set(NULL); + qb_log_tags_stringify_fn_set(_logsys_tags_stringify); + logsys_loggers[i].init_status = LOGSYS_LOGGER_INIT_DONE; logsys_system_needs_init = LOGSYS_LOGGER_INIT_DONE; for (i = 0; i < LOGSYS_MAX_SUBSYS_COUNT; i++) { if ((strcmp (logsys_loggers[i].subsys, "") != 0) && - (logsys_loggers[i].init_status == - LOGSYS_LOGGER_NEEDS_INIT)) { - strncpy (tempsubsys, logsys_loggers[i].subsys, - sizeof (tempsubsys)); - tempsubsys[sizeof (tempsubsys) - 1] = '\0'; - logsys_subsys_init(tempsubsys, i); + (logsys_loggers[i].init_status == + LOGSYS_LOGGER_NEEDS_INIT)) { + fidx = logsys_loggers[i].file_idx; + strncpy (tempsubsys, logsys_loggers[i].subsys, + sizeof (tempsubsys)); + tempsubsys[sizeof (tempsubsys) - 1] = '\0'; + logsys_subsys_init(tempsubsys, i); + logsys_loggers[i].file_idx = fidx; + _logsys_config_mode_set_unlocked(i, logsys_loggers[i].mode); + _logsys_config_apply_per_subsys(i); } } pthread_mutex_unlock (&logsys_config_mutex); return (0); } -int _logsys_subsys_create (const char *subsys) + +static void _logsys_subsys_filename_add (int32_t s, const char *filename) +{ + int i; + + if (filename == NULL) { + return; + } + assert(logsys_loggers[s].file_idx < MAX_FILES_PER_SUBSYS); + assert(logsys_loggers[s].file_idx >= 0); + + for (i = 0; i < logsys_loggers[s].file_idx; i++) { + if (strcmp(logsys_loggers[s].files[i], filename) == 0) { + return; + } + } + logsys_loggers[s].files[logsys_loggers[s].file_idx++] = strdup(filename); + + if (logsys_system_needs_init == LOGSYS_LOGGER_INIT_DONE) { + _logsys_config_apply_per_file(s, filename); + } +} + +int _logsys_subsys_create (const char *subsys, const char *filename) { int i; if ((subsys == NULL) || (strlen(subsys) >= LOGSYS_MAX_SUBSYS_NAMELEN)) { return -1; } pthread_mutex_lock (&logsys_config_mutex); i = _logsys_config_subsys_get_unlocked (subsys); if ((i > -1) && (i < LOGSYS_MAX_SUBSYS_COUNT)) { + _logsys_subsys_filename_add(i, filename); pthread_mutex_unlock (&logsys_config_mutex); return i; } for (i = 0; i < LOGSYS_MAX_SUBSYS_COUNT; i++) { if (strcmp (logsys_loggers[i].subsys, "") == 0) { logsys_subsys_init(subsys, i); + _logsys_subsys_filename_add(i, filename); break; } } if (i >= LOGSYS_MAX_SUBSYS_COUNT) { i = -1; } pthread_mutex_unlock (&logsys_config_mutex); return i; } -int _logsys_wthread_create (void) -{ - if (((logsys_loggers[LOGSYS_MAX_SUBSYS_COUNT].mode & LOGSYS_MODE_FORK) == 0) && - ((logsys_loggers[LOGSYS_MAX_SUBSYS_COUNT].mode & LOGSYS_MODE_THREADED) != 0)) { - wthread_create(); - } - return (0); -} - -int _logsys_rec_init (unsigned int fltsize) -{ - size_t flt_real_size; - int res; - - sem_init (&logsys_thread_start, 0, 0); - - sem_init (&logsys_print_finished, 0, 0); - - /* - * XXX: kill me for 1.1 because I am a dirty hack - * temporary workaround that will be replaced by supporting - * 0 byte size flight recorder buffer. - * 0 byte size buffer will enable direct printing to logs - * without flight recoder. - */ - if (fltsize < 64000) { - fltsize = 64000; - } - - flt_real_size = ROUNDUP(fltsize, sysconf(_SC_PAGESIZE)) * 4; - - res = circular_memory_map ((void **)&flt_data, flt_real_size); - if (res == -1) { - sem_destroy (&logsys_thread_start); - sem_destroy (&logsys_print_finished); - } - - memset (flt_data, 0, flt_real_size * 2); - /* - * flt_data_size tracks data by ints and not bytes/chars. - */ - - flt_data_size = flt_real_size / sizeof (uint32_t); - - /* - * First record starts at zero - * Last record ends at zero - */ - flt_head = 0; - flt_tail = 0; - - return (0); -} - - -/* - * u32 RECORD SIZE - * u32 record ident - * u32 arg count - * u32 file line - * u32 subsys length - * buffer null terminated subsys - * u32 filename length - * buffer null terminated filename - * u32 filename length - * buffer null terminated function - * u32 arg1 length - * buffer arg1 - * ... repeats length & arg - */ - -void _logsys_log_rec ( - unsigned int rec_ident, - const char *function_name, - const char *file_name, - int file_line, - ...) -{ - va_list ap; - const void *buf_args[FDMAX_ARGS]; - unsigned int buf_len[FDMAX_ARGS]; - unsigned int i; - unsigned int idx; - unsigned int arguments = 0; - unsigned int record_reclaim_size = 0; - unsigned int index_start; - int words_written; - int subsysid; - - subsysid = LOGSYS_DECODE_SUBSYSID(rec_ident); - - /* - * Decode VA Args - */ - va_start (ap, file_line); - arguments = 3; - for (;;) { - buf_args[arguments] = va_arg (ap, void *); - if (buf_args[arguments] == LOGSYS_REC_END) { - break; - } - buf_len[arguments] = va_arg (ap, int); - record_reclaim_size += ((buf_len[arguments] + 3) >> 2) + 1; - arguments++; - if (arguments >= FDMAX_ARGS) { - break; - } - } - va_end (ap); - - /* - * Encode logsys subsystem identity, filename, and function - */ - buf_args[0] = logsys_loggers[subsysid].subsys; - buf_len[0] = strlen (logsys_loggers[subsysid].subsys) + 1; - buf_args[1] = file_name; - buf_len[1] = strlen (file_name) + 1; - buf_args[2] = function_name; - buf_len[2] = strlen (function_name) + 1; - for (i = 0; i < 3; i++) { - record_reclaim_size += ((buf_len[i] + 3) >> 2) + 1; - } - - logsys_flt_lock(); - idx = flt_head; - index_start = idx; - - /* - * Reclaim data needed for record including 4 words for the header - */ - records_reclaim (idx, record_reclaim_size + 4); - - /* - * Write record size of zero and rest of header information - */ - flt_data[idx++] = 0; - idx_word_step(idx); - - flt_data[idx++] = rec_ident; - idx_word_step(idx); - - flt_data[idx++] = file_line; - idx_word_step(idx); - - flt_data[idx++] = records_written; - idx_word_step(idx); - /* - * Encode all of the arguments into the log message - */ - for (i = 0; i < arguments; i++) { - unsigned int bytes; - unsigned int total_words; - - bytes = buf_len[i]; - total_words = (bytes + 3) >> 2; - - flt_data[idx++] = total_words; - idx_word_step(idx); - - memcpy (&flt_data[idx], buf_args[i], buf_len[i]); - - idx += total_words; - idx_buffer_step (idx); - - } - words_written = idx - index_start; - if (words_written < 0) { - words_written += flt_data_size; - } - /* - * Commit the write of the record size now that the full record - * is in the memory buffer - */ - flt_data[index_start] = words_written; - - flt_head = idx; - logsys_flt_unlock(); - records_written++; -} - -void _logsys_log_vprintf ( - unsigned int rec_ident, - const char *function_name, - const char *file_name, - int file_line, - const char *format, - va_list ap) -{ - char logsys_print_buffer[COMBINE_BUFFER_SIZE]; - unsigned int len; - unsigned int level; - int subsysid; - const char *short_file_name; - - subsysid = LOGSYS_DECODE_SUBSYSID(rec_ident); - level = LOGSYS_DECODE_LEVEL(rec_ident); - - len = vsnprintf (logsys_print_buffer, sizeof (logsys_print_buffer), format, ap); - if (logsys_print_buffer[len - 1] == '\n') { - logsys_print_buffer[len - 1] = '\0'; - len -= 1; - } -#ifdef BUILDING_IN_PLACE - short_file_name = file_name; -#else - short_file_name = strrchr (file_name, '/'); - if (short_file_name == NULL) - short_file_name = file_name; - else - short_file_name++; /* move past the "/" */ -#endif /* BUILDING_IN_PLACE */ - - /* - * Create a log record - */ - _logsys_log_rec ( - rec_ident, - function_name, - short_file_name, - file_line, - logsys_print_buffer, len + 1, - LOGSYS_REC_END); - - /* - * If logsys is not going to print a message to a log target don't - * queue one - */ - if ((level > logsys_loggers[subsysid].syslog_priority && - level > logsys_loggers[subsysid].logfile_priority && - logsys_loggers[subsysid].debug == 0) || - - (level == LOGSYS_LEVEL_DEBUG && - logsys_loggers[subsysid].debug == 0)) { - - return; - } - - if ((logsys_loggers[LOGSYS_MAX_SUBSYS_COUNT].mode & LOGSYS_MODE_THREADED) == 0) { - /* - * Output (and block) if the log mode is not threaded otherwise - * expect the worker thread to output the log data once signaled - */ - log_printf_to_logs (rec_ident, - short_file_name, - function_name, - file_line, - logsys_print_buffer); - } else { - /* - * Signal worker thread to display logging output - */ - log_printf_to_logs_wthread (rec_ident, - short_file_name, - function_name, - file_line, - logsys_print_buffer); - } -} - -void _logsys_log_printf ( - unsigned int rec_ident, - const char *function_name, - const char *file_name, - int file_line, - const char *format, - ...) -{ - va_list ap; - - va_start (ap, format); - _logsys_log_vprintf (rec_ident, function_name, file_name, file_line, - format, ap); - va_end (ap); -} - int _logsys_config_subsys_get (const char *subsys) { unsigned int i; pthread_mutex_lock (&logsys_config_mutex); i = _logsys_config_subsys_get_unlocked (subsys); pthread_mutex_unlock (&logsys_config_mutex); return i; } -/* - * External Configuration and Initialization API - */ -void logsys_fork_completed (void) +static int32_t _logsys_config_mode_set_unlocked(int32_t subsysid, uint32_t new_mode) { - logsys_loggers[LOGSYS_MAX_SUBSYS_COUNT].mode &= ~LOGSYS_MODE_FORK; - (void)_logsys_wthread_create (); + if ( logsys_loggers[subsysid].mode == new_mode) { + return 0; + } + if (logsys_loggers[subsysid].target_id > 0) { + qb_log_ctl(logsys_loggers[subsysid].target_id, + QB_LOG_CONF_ENABLED, + (new_mode & LOGSYS_MODE_OUTPUT_FILE)); + } + + if (subsysid == LOGSYS_MAX_SUBSYS_COUNT) { + qb_log_ctl(QB_LOG_STDERR, + QB_LOG_CONF_ENABLED, + (new_mode & LOGSYS_MODE_OUTPUT_STDERR)); + qb_log_ctl(QB_LOG_SYSLOG, + QB_LOG_CONF_ENABLED, + (new_mode & LOGSYS_MODE_OUTPUT_SYSLOG)); + } + logsys_loggers[subsysid].mode = new_mode; + return 0; } int logsys_config_mode_set (const char *subsys, unsigned int mode) { int i; pthread_mutex_lock (&logsys_config_mutex); if (subsys != NULL) { i = _logsys_config_subsys_get_unlocked (subsys); if (i >= 0) { - logsys_loggers[i].mode = mode; - i = 0; + i = _logsys_config_mode_set_unlocked(i, mode); } } else { for (i = 0; i <= LOGSYS_MAX_SUBSYS_COUNT; i++) { - logsys_loggers[i].mode = mode; + _logsys_config_mode_set_unlocked(i, mode); } i = 0; } + pthread_mutex_unlock (&logsys_config_mutex); return i; } unsigned int logsys_config_mode_get (const char *subsys) { int i; i = _logsys_config_subsys_get (subsys); if (i < 0) { return i; } return logsys_loggers[i].mode; } int logsys_config_file_set ( const char *subsys, const char **error_string, const char *file) { int i; int res; pthread_mutex_lock (&logsys_config_mutex); if (subsys != NULL) { i = _logsys_config_subsys_get_unlocked (subsys); if (i < 0) { res = i; } else { res = logsys_config_file_set_unlocked(i, error_string, file); } } else { for (i = 0; i <= LOGSYS_MAX_SUBSYS_COUNT; i++) { res = logsys_config_file_set_unlocked(i, error_string, file); if (res < 0) { break; } } } pthread_mutex_unlock (&logsys_config_mutex); return res; } int logsys_format_set (const char *format) { int ret = 0; - - pthread_mutex_lock (&logsys_config_mutex); + int c; + int w; + int reminder; + char syslog_format[128]; if (format_buffer) { free(format_buffer); format_buffer = NULL; } - format_buffer = strdup(format ? format : "%p [%6s] %b"); + format_buffer = strdup(format ? format : "%7p [%6g] %b"); if (format_buffer == NULL) { ret = -1; } + qb_log_format_set(QB_LOG_STDERR, format_buffer); + + /* + * This just goes through and remove %t and %p from + * the format string for syslog. + */ + w = 0; + memset(syslog_format, '\0', sizeof(syslog_format)); + for (c = 0; c < strlen(format_buffer); c++) { + if (format_buffer[c] == '%') { + reminder = c; + for (c++; c < strlen(format_buffer); c++) { + if (isdigit(format_buffer[c])) { + continue; + } + if (format_buffer[c] == 't' || + format_buffer[c] == 'p') { + c++; + } else { + c = reminder; + } + break; + } + } + syslog_format[w] = format_buffer[c]; + w++; + } +// printf("normal_format: %s\n", format_buffer); +// printf("syslog_format: %s\n", syslog_format); + qb_log_format_set(QB_LOG_SYSLOG, syslog_format); - pthread_mutex_unlock (&logsys_config_mutex); return ret; } char *logsys_format_get (void) { return format_buffer; } int logsys_config_syslog_facility_set ( const char *subsys, unsigned int facility) { - int i; - - pthread_mutex_lock (&logsys_config_mutex); - if (subsys != NULL) { - i = _logsys_config_subsys_get_unlocked (subsys); - if (i >= 0) { - logsys_loggers[i].syslog_facility = facility; - if (i == LOGSYS_MAX_SUBSYS_COUNT) { - syslog_facility_reconf(); - } - i = 0; - } - } else { - for (i = 0; i <= LOGSYS_MAX_SUBSYS_COUNT; i++) { - logsys_loggers[i].syslog_facility = facility; - } - syslog_facility_reconf(); - i = 0; - } - pthread_mutex_unlock (&logsys_config_mutex); - - return i; + return qb_log_ctl(QB_LOG_SYSLOG, QB_LOG_CONF_FACILITY, facility); } int logsys_config_syslog_priority_set ( const char *subsys, unsigned int priority) { int i; pthread_mutex_lock (&logsys_config_mutex); if (subsys != NULL) { i = _logsys_config_subsys_get_unlocked (subsys); if (i >= 0) { logsys_loggers[i].syslog_priority = priority; + logsys_loggers[i].dirty = QB_TRUE; + i = 0; } } else { for (i = 0; i <= LOGSYS_MAX_SUBSYS_COUNT; i++) { logsys_loggers[i].syslog_priority = priority; + logsys_loggers[i].dirty = QB_TRUE; } i = 0; } pthread_mutex_unlock (&logsys_config_mutex); return i; } int logsys_config_logfile_priority_set ( const char *subsys, unsigned int priority) { int i; pthread_mutex_lock (&logsys_config_mutex); if (subsys != NULL) { i = _logsys_config_subsys_get_unlocked (subsys); if (i >= 0) { logsys_loggers[i].logfile_priority = priority; + logsys_loggers[i].dirty = QB_TRUE; i = 0; } } else { for (i = 0; i <= LOGSYS_MAX_SUBSYS_COUNT; i++) { logsys_loggers[i].logfile_priority = priority; + logsys_loggers[i].dirty = QB_TRUE; } i = 0; } pthread_mutex_unlock (&logsys_config_mutex); return i; } + +static void _logsys_config_apply_per_file(int32_t s, const char *filename) +{ + qb_log_filter_ctl(s, QB_LOG_TAG_SET, QB_LOG_FILTER_FILE, + filename, LOG_TRACE); + + qb_log_filter_ctl(QB_LOG_SYSLOG, QB_LOG_FILTER_REMOVE, + QB_LOG_FILTER_FILE, filename, LOG_TRACE); + qb_log_filter_ctl(QB_LOG_STDERR, QB_LOG_FILTER_REMOVE, + QB_LOG_FILTER_FILE, filename, LOG_TRACE); + + qb_log_filter_ctl(QB_LOG_SYSLOG, QB_LOG_FILTER_ADD, + QB_LOG_FILTER_FILE, filename, + logsys_loggers[s].syslog_priority); + qb_log_filter_ctl(QB_LOG_STDERR, QB_LOG_FILTER_ADD, + QB_LOG_FILTER_FILE, filename, + logsys_loggers[s].logfile_priority); +} + +static void _logsys_config_apply_per_subsys(int32_t s) +{ + int32_t f; + for (f = 0; f < logsys_loggers[s].file_idx; f++) { + _logsys_config_apply_per_file(s, logsys_loggers[s].files[f]); + } + logsys_loggers[s].dirty = QB_FALSE; +} + +void logsys_config_apply(void) +{ + int32_t s; + + for (s = 0; s <= LOGSYS_MAX_SUBSYS_COUNT; s++) { + if (strcmp(logsys_loggers[s].subsys, "") == 0) { + continue; + } + _logsys_config_apply_per_subsys(s); + } +} + int logsys_config_debug_set ( const char *subsys, unsigned int debug) { int i; pthread_mutex_lock (&logsys_config_mutex); if (subsys != NULL) { i = _logsys_config_subsys_get_unlocked (subsys); if (i >= 0) { logsys_loggers[i].debug = debug; i = 0; } } else { for (i = 0; i <= LOGSYS_MAX_SUBSYS_COUNT; i++) { logsys_loggers[i].debug = debug; } i = 0; } pthread_mutex_unlock (&logsys_config_mutex); return i; } int logsys_facility_id_get (const char *name) { unsigned int i; for (i = 0; facilitynames[i].c_name != NULL; i++) { if (strcasecmp(name, facilitynames[i].c_name) == 0) { return (facilitynames[i].c_val); } } return (-1); } const char *logsys_facility_name_get (unsigned int facility) { unsigned int i; for (i = 0; facilitynames[i].c_name != NULL; i++) { if (facility == facilitynames[i].c_val) { return (facilitynames[i].c_name); } } return (NULL); } int logsys_priority_id_get (const char *name) { unsigned int i; for (i = 0; prioritynames[i].c_name != NULL; i++) { if (strcasecmp(name, prioritynames[i].c_name) == 0) { return (prioritynames[i].c_val); } } return (-1); } const char *logsys_priority_name_get (unsigned int priority) { unsigned int i; for (i = 0; prioritynames[i].c_name != NULL; i++) { if (priority == prioritynames[i].c_val) { return (prioritynames[i].c_name); } } return (NULL); } -int logsys_thread_priority_set ( - int policy, - const struct sched_param *param, - unsigned int after_log_ops_yield) - -{ - int res = 0; - if (param == NULL) { - return (0); - } - -#if defined(HAVE_PTHREAD_SETSCHEDPARAM) && defined(HAVE_SCHED_GET_PRIORITY_MAX) - if (wthread_active == 0) { - logsys_sched_policy = policy; - memcpy(&logsys_sched_param, param, sizeof(struct sched_param)); - logsys_sched_param_queued = 1; - } else { - res = pthread_setschedparam (logsys_thread_id, policy, param); - } -#endif - - if (after_log_ops_yield > 0) { - logsys_after_log_ops_yield = after_log_ops_yield; - } - - return (res); -} - -int logsys_log_rec_store (const char *filename) -{ - int fd; - ssize_t written_size = 0; - size_t this_write_size; - - fd = open (filename, O_CREAT|O_RDWR, 0700); - if (fd < 0) { - return (-1); - } - - logsys_flt_lock(); - - this_write_size = write (fd, &flt_data_size, sizeof(uint32_t)); - if (this_write_size != sizeof(unsigned int)) { - goto error_exit; - } - written_size += this_write_size; - - this_write_size = write (fd, flt_data, flt_data_size * sizeof (uint32_t)); - if (this_write_size != (flt_data_size * sizeof(uint32_t))) { - goto error_exit; - } - written_size += this_write_size; - - this_write_size = write (fd, &flt_head, sizeof (uint32_t)); - if (this_write_size != (sizeof(uint32_t))) { - goto error_exit; - } - written_size += this_write_size; - this_write_size = write (fd, &flt_tail, sizeof (uint32_t)); - if (this_write_size != (sizeof(uint32_t))) { - goto error_exit; - } - written_size += this_write_size; - if (written_size != ((flt_data_size + 3) * sizeof (uint32_t))) { - goto error_exit; - } - - logsys_flt_unlock(); - close (fd); - return (0); - -error_exit: - logsys_flt_unlock(); - close (fd); - return (-1); -} - -void logsys_atexit (void) -{ - int res; - int value; - struct record *rec; - - if (wthread_active == 0) { - for (;;) { - logsys_wthread_lock(); - - res = sem_getvalue (&logsys_print_finished, &value); - if (res != 0 || value == 0) { - logsys_wthread_unlock(); - return; - } - sem_wait (&logsys_print_finished); - - rec = list_entry (logsys_print_finished_records.next, struct record, list); - list_del (&rec->list); - logsys_memory_used = logsys_memory_used - strlen (rec->buffer) - - sizeof (struct record) - 1; - logsys_wthread_unlock(); - log_printf_to_logs ( - rec->rec_ident, - rec->file_name, - rec->function_name, - rec->file_line, - rec->buffer); - free (rec->buffer); - free (rec); - } - } else { - wthread_should_exit = 1; - sem_post (&logsys_print_finished); - pthread_join (logsys_thread_id, NULL); - } -} - -void logsys_flush (void) -{ -} diff --git a/exec/main.c b/exec/main.c index bcc0da35..d766c6c7 100644 --- a/exec/main.c +++ b/exec/main.c @@ -1,1516 +1,1538 @@ /* * Copyright (c) 2002-2006 MontaVista Software, Inc. * Copyright (c) 2006-2009 Red Hat, Inc. * * All rights reserved. * * Author: Steven Dake (sdake@redhat.com) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ /** * \mainpage Corosync * * This is the doxygen generated developer documentation for the Corosync * project. For more information about Corosync, please see the project * web site, corosync.org. * * \section license License * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#include #include #include #include #include #include #include #include #include #include #include #include #include #include "quorum.h" #include "totemsrp.h" #include "mainconfig.h" #include "totemconfig.h" #include "main.h" #include "sync.h" #include "syncv2.h" #include "timer.h" #include "util.h" #include "apidef.h" #include "service.h" #include "schedwrk.h" #include "evil.h" #ifdef HAVE_SMALL_MEMORY_FOOTPRINT #define IPC_LOGSYS_SIZE 1024*64 #else #define IPC_LOGSYS_SIZE 8192*128 #endif LOGSYS_DECLARE_SYSTEM ("corosync", - LOGSYS_MODE_OUTPUT_STDERR | LOGSYS_MODE_THREADED | LOGSYS_MODE_FORK, - 0, - NULL, - LOG_INFO, + LOGSYS_MODE_OUTPUT_STDERR, LOG_DAEMON, - LOG_INFO, - NULL, - IPC_LOGSYS_SIZE); + LOG_INFO); LOGSYS_DECLARE_SUBSYS ("MAIN"); #define SERVER_BACKLOG 5 static int sched_priority = 0; static unsigned int service_count = 32; static struct totem_logging_configuration totem_logging_configuration; static int num_config_modules; static struct config_iface_ver0 *config_modules[MAX_DYNAMIC_SERVICES]; static struct objdb_iface_ver0 *objdb = NULL; static struct corosync_api_v1 *api = NULL; static enum cs_sync_mode minimum_sync_mode; static int sync_in_process = 1; static qb_loop_t *corosync_poll_handle; struct sched_param global_sched_param; static hdb_handle_t object_memb_handle; static corosync_timer_handle_t corosync_stats_timer_handle; static const char *corosync_lock_file = LOCALSTATEDIR"/run/corosync.pid"; qb_loop_t *cs_poll_handle_get (void) { return (corosync_poll_handle); } int cs_poll_dispatch_add (qb_loop_t * handle, int fd, int events, void *data, int (*dispatch_fn) (int fd, int revents, void *data)) { return qb_loop_poll_add(handle, QB_LOOP_MED, fd, events, data, dispatch_fn); } int cs_poll_dispatch_delete(qb_loop_t * handle, int fd) { return qb_loop_poll_del(handle, fd); } void corosync_state_dump (void) { int i; for (i = 0; i < SERVICE_HANDLER_MAXIMUM_COUNT; i++) { if (ais_service[i] && ais_service[i]->exec_dump_fn) { ais_service[i]->exec_dump_fn (); } } } static void unlink_all_completed (void) { api->timer_delete (corosync_stats_timer_handle); qb_loop_stop (corosync_poll_handle); } void corosync_shutdown_request (void) { corosync_service_unlink_all (api, unlink_all_completed); } static int32_t sig_diag_handler (int num, void *data) { corosync_state_dump (); - logsys_log_rec_store (LOCALSTATEDIR "/lib/corosync/fdata"); + qb_log_blackbox_write_to_file(LOCALSTATEDIR "/lib/corosync/fdata"); return 0; } static int32_t sig_exit_handler (int num, void *data) { corosync_service_unlink_all (api, unlink_all_completed); return 0; } static void sigsegv_handler (int num) { (void)signal (SIGSEGV, SIG_DFL); - logsys_atexit(); - logsys_log_rec_store (LOCALSTATEDIR "/lib/corosync/fdata"); + qb_log_blackbox_write_to_file(LOCALSTATEDIR "/lib/corosync/fdata"); + qb_log_fini(); raise (SIGSEGV); } static void sigabrt_handler (int num) { (void)signal (SIGABRT, SIG_DFL); - logsys_atexit(); - logsys_log_rec_store (LOCALSTATEDIR "/lib/corosync/fdata"); + qb_log_blackbox_write_to_file(LOCALSTATEDIR "/lib/corosync/fdata"); + qb_log_fini(); raise (SIGABRT); } #define LOCALHOST_IP inet_addr("127.0.0.1") static hdb_handle_t corosync_group_handle; static struct totempg_group corosync_group = { .group = "a", .group_len = 1 }; static void serialize_lock (void) { } static void serialize_unlock (void) { } static void corosync_sync_completed (void) { log_printf (LOGSYS_LEVEL_NOTICE, "Completed service synchronization, ready to provide service.\n"); sync_in_process = 0; cs_ipcs_sync_state_changed(sync_in_process); } static int corosync_sync_callbacks_retrieve (int sync_id, struct sync_callbacks *callbacks) { unsigned int ais_service_index; int res; for (ais_service_index = 0; ais_service_index < SERVICE_HANDLER_MAXIMUM_COUNT; ais_service_index++) { if (ais_service[ais_service_index] != NULL && (ais_service[ais_service_index]->sync_mode == CS_SYNC_V1 || ais_service[ais_service_index]->sync_mode == CS_SYNC_V1_APIV2)) { if (ais_service_index == sync_id) { break; } } } /* * Try to load backwards compat sync engines */ if (ais_service_index == SERVICE_HANDLER_MAXIMUM_COUNT) { res = evil_callbacks_load (sync_id, callbacks); return (res); } callbacks->name = ais_service[ais_service_index]->name; callbacks->sync_init_api.sync_init_v1 = ais_service[ais_service_index]->sync_init; callbacks->api_version = 1; if (ais_service[ais_service_index]->sync_mode == CS_SYNC_V1_APIV2) { callbacks->api_version = 2; } callbacks->sync_process = ais_service[ais_service_index]->sync_process; callbacks->sync_activate = ais_service[ais_service_index]->sync_activate; callbacks->sync_abort = ais_service[ais_service_index]->sync_abort; return (0); } static int corosync_sync_v2_callbacks_retrieve ( int service_id, struct sync_callbacks *callbacks) { int res; if (minimum_sync_mode == CS_SYNC_V2 && service_id == CLM_SERVICE && ais_service[CLM_SERVICE] == NULL) { res = evil_callbacks_load (service_id, callbacks); return (res); } if (minimum_sync_mode == CS_SYNC_V2 && service_id == EVT_SERVICE && ais_service[EVT_SERVICE] == NULL) { res = evil_callbacks_load (service_id, callbacks); return (res); } if (ais_service[service_id] == NULL) { return (-1); } if (minimum_sync_mode == CS_SYNC_V1 && ais_service[service_id]->sync_mode != CS_SYNC_V2) { return (-1); } callbacks->name = ais_service[service_id]->name; callbacks->api_version = 1; if (ais_service[service_id]->sync_mode == CS_SYNC_V1_APIV2) { callbacks->api_version = 2; } callbacks->sync_init_api.sync_init_v1 = ais_service[service_id]->sync_init; callbacks->sync_process = ais_service[service_id]->sync_process; callbacks->sync_activate = ais_service[service_id]->sync_activate; callbacks->sync_abort = ais_service[service_id]->sync_abort; return (0); } static struct memb_ring_id corosync_ring_id; static void member_object_joined (unsigned int nodeid) { hdb_handle_t object_find_handle; hdb_handle_t object_node_handle; char * nodeint_str; char nodeid_str[64]; unsigned int key_incr_dummy; snprintf (nodeid_str, 64, "%d", nodeid); objdb->object_find_create ( object_memb_handle, nodeid_str, strlen (nodeid_str), &object_find_handle); if (objdb->object_find_next (object_find_handle, &object_node_handle) == 0) { objdb->object_key_increment (object_node_handle, "join_count", strlen("join_count"), &key_incr_dummy); objdb->object_key_replace (object_node_handle, "status", strlen("status"), "joined", strlen("joined")); } else { nodeint_str = (char*)api->totem_ifaces_print (nodeid); objdb->object_create (object_memb_handle, &object_node_handle, nodeid_str, strlen (nodeid_str)); objdb->object_key_create_typed (object_node_handle, "ip", nodeint_str, strlen(nodeint_str), OBJDB_VALUETYPE_STRING); key_incr_dummy = 1; objdb->object_key_create_typed (object_node_handle, "join_count", &key_incr_dummy, sizeof (key_incr_dummy), OBJDB_VALUETYPE_UINT32); objdb->object_key_create_typed (object_node_handle, "status", "joined", strlen("joined"), OBJDB_VALUETYPE_STRING); } } static void member_object_left (unsigned int nodeid) { hdb_handle_t object_find_handle; hdb_handle_t object_node_handle; char nodeid_str[64]; snprintf (nodeid_str, 64, "%u", nodeid); objdb->object_find_create ( object_memb_handle, nodeid_str, strlen (nodeid_str), &object_find_handle); if (objdb->object_find_next (object_find_handle, &object_node_handle) == 0) { objdb->object_key_replace (object_node_handle, "status", strlen("status"), "left", strlen("left")); } } static void confchg_fn ( enum totem_configuration_type configuration_type, const unsigned int *member_list, size_t member_list_entries, const unsigned int *left_list, size_t left_list_entries, const unsigned int *joined_list, size_t joined_list_entries, const struct memb_ring_id *ring_id) { int i; int abort_activate = 0; if (sync_in_process == 1) { abort_activate = 1; } sync_in_process = 1; cs_ipcs_sync_state_changed(sync_in_process); memcpy (&corosync_ring_id, ring_id, sizeof (struct memb_ring_id)); for (i = 0; i < left_list_entries; i++) { member_object_left (left_list[i]); } for (i = 0; i < joined_list_entries; i++) { member_object_joined (joined_list[i]); } /* * Call configuration change for all services */ for (i = 0; i < service_count; i++) { if (ais_service[i] && ais_service[i]->confchg_fn) { ais_service[i]->confchg_fn (configuration_type, member_list, member_list_entries, left_list, left_list_entries, joined_list, joined_list_entries, ring_id); } } if (abort_activate) { sync_v2_abort (); } if (minimum_sync_mode == CS_SYNC_V2 && configuration_type == TOTEM_CONFIGURATION_TRANSITIONAL) { sync_v2_save_transitional (member_list, member_list_entries, ring_id); } if (minimum_sync_mode == CS_SYNC_V2 && configuration_type == TOTEM_CONFIGURATION_REGULAR) { sync_v2_start (member_list, member_list_entries, ring_id); } } static void priv_drop (void) { return; /* TODO: we are still not dropping privs */ } static void corosync_tty_detach (void) { FILE *r; /* * Disconnect from TTY if this is not a debug run */ switch (fork ()) { case -1: corosync_exit_error (AIS_DONE_FORK); break; case 0: /* * child which is disconnected, run this process */ break; default: exit (0); break; } /* Create new session */ (void)setsid(); /* * Map stdin/out/err to /dev/null. */ r = freopen("/dev/null", "r", stdin); if (r == NULL) { corosync_exit_error (AIS_DONE_STD_TO_NULL_REDIR); } r = freopen("/dev/null", "a", stderr); if (r == NULL) { corosync_exit_error (AIS_DONE_STD_TO_NULL_REDIR); } r = freopen("/dev/null", "a", stdout); if (r == NULL) { corosync_exit_error (AIS_DONE_STD_TO_NULL_REDIR); } } static void corosync_mlockall (void) { #if !defined(COROSYNC_BSD) || defined(COROSYNC_FREEBSD_GE_8) int res; #endif struct rlimit rlimit; rlimit.rlim_cur = RLIM_INFINITY; rlimit.rlim_max = RLIM_INFINITY; #ifndef COROSYNC_SOLARIS setrlimit (RLIMIT_MEMLOCK, &rlimit); #else setrlimit (RLIMIT_VMEM, &rlimit); #endif #if defined(COROSYNC_BSD) && !defined(COROSYNC_FREEBSD_GE_8) /* under FreeBSD < 8 a process with locked page cannot call dlopen * code disabled until FreeBSD bug i386/93396 was solved */ log_printf (LOGSYS_LEVEL_WARNING, "Could not lock memory of service to avoid page faults\n"); #else res = mlockall (MCL_CURRENT | MCL_FUTURE); if (res == -1) { LOGSYS_PERROR (errno, LOGSYS_LEVEL_WARNING, "Could not lock memory of service to avoid page faults"); }; #endif } static void corosync_totem_stats_updater (void *data) { totempg_stats_t * stats; uint32_t mtt_rx_token; uint32_t total_mtt_rx_token; uint32_t avg_backlog_calc; uint32_t total_backlog_calc; uint32_t avg_token_holdtime; uint32_t total_token_holdtime; int t, prev; int32_t token_count; uint32_t firewall_enabled_or_nic_failure; stats = api->totem_get_stats(); objdb->object_key_replace (stats->hdr.handle, "msg_reserved", strlen("msg_reserved"), &stats->msg_reserved, sizeof (stats->msg_reserved)); objdb->object_key_replace (stats->hdr.handle, "msg_queue_avail", strlen("msg_queue_avail"), &stats->msg_queue_avail, sizeof (stats->msg_queue_avail)); objdb->object_key_replace (stats->mrp->srp->hdr.handle, "orf_token_tx", strlen("orf_token_tx"), &stats->mrp->srp->orf_token_tx, sizeof (stats->mrp->srp->orf_token_tx)); objdb->object_key_replace (stats->mrp->srp->hdr.handle, "orf_token_rx", strlen("orf_token_rx"), &stats->mrp->srp->orf_token_rx, sizeof (stats->mrp->srp->orf_token_rx)); objdb->object_key_replace (stats->mrp->srp->hdr.handle, "memb_merge_detect_tx", strlen("memb_merge_detect_tx"), &stats->mrp->srp->memb_merge_detect_tx, sizeof (stats->mrp->srp->memb_merge_detect_tx)); objdb->object_key_replace (stats->mrp->srp->hdr.handle, "memb_merge_detect_rx", strlen("memb_merge_detect_rx"), &stats->mrp->srp->memb_merge_detect_rx, sizeof (stats->mrp->srp->memb_merge_detect_rx)); objdb->object_key_replace (stats->mrp->srp->hdr.handle, "memb_join_tx", strlen("memb_join_tx"), &stats->mrp->srp->memb_join_tx, sizeof (stats->mrp->srp->memb_join_tx)); objdb->object_key_replace (stats->mrp->srp->hdr.handle, "memb_join_rx", strlen("memb_join_rx"), &stats->mrp->srp->memb_join_rx, sizeof (stats->mrp->srp->memb_join_rx)); objdb->object_key_replace (stats->mrp->srp->hdr.handle, "mcast_tx", strlen("mcast_tx"), &stats->mrp->srp->mcast_tx, sizeof (stats->mrp->srp->mcast_tx)); objdb->object_key_replace (stats->mrp->srp->hdr.handle, "mcast_retx", strlen("mcast_retx"), &stats->mrp->srp->mcast_retx, sizeof (stats->mrp->srp->mcast_retx)); objdb->object_key_replace (stats->mrp->srp->hdr.handle, "mcast_rx", strlen("mcast_rx"), &stats->mrp->srp->mcast_rx, sizeof (stats->mrp->srp->mcast_rx)); objdb->object_key_replace (stats->mrp->srp->hdr.handle, "memb_commit_token_tx", strlen("memb_commit_token_tx"), &stats->mrp->srp->memb_commit_token_tx, sizeof (stats->mrp->srp->memb_commit_token_tx)); objdb->object_key_replace (stats->mrp->srp->hdr.handle, "memb_commit_token_rx", strlen("memb_commit_token_rx"), &stats->mrp->srp->memb_commit_token_rx, sizeof (stats->mrp->srp->memb_commit_token_rx)); objdb->object_key_replace (stats->mrp->srp->hdr.handle, "token_hold_cancel_tx", strlen("token_hold_cancel_tx"), &stats->mrp->srp->token_hold_cancel_tx, sizeof (stats->mrp->srp->token_hold_cancel_tx)); objdb->object_key_replace (stats->mrp->srp->hdr.handle, "token_hold_cancel_rx", strlen("token_hold_cancel_rx"), &stats->mrp->srp->token_hold_cancel_rx, sizeof (stats->mrp->srp->token_hold_cancel_rx)); objdb->object_key_replace (stats->mrp->srp->hdr.handle, "operational_entered", strlen("operational_entered"), &stats->mrp->srp->operational_entered, sizeof (stats->mrp->srp->operational_entered)); objdb->object_key_replace (stats->mrp->srp->hdr.handle, "operational_token_lost", strlen("operational_token_lost"), &stats->mrp->srp->operational_token_lost, sizeof (stats->mrp->srp->operational_token_lost)); objdb->object_key_replace (stats->mrp->srp->hdr.handle, "gather_entered", strlen("gather_entered"), &stats->mrp->srp->gather_entered, sizeof (stats->mrp->srp->gather_entered)); objdb->object_key_replace (stats->mrp->srp->hdr.handle, "gather_token_lost", strlen("gather_token_lost"), &stats->mrp->srp->gather_token_lost, sizeof (stats->mrp->srp->gather_token_lost)); objdb->object_key_replace (stats->mrp->srp->hdr.handle, "commit_entered", strlen("commit_entered"), &stats->mrp->srp->commit_entered, sizeof (stats->mrp->srp->commit_entered)); objdb->object_key_replace (stats->mrp->srp->hdr.handle, "commit_token_lost", strlen("commit_token_lost"), &stats->mrp->srp->commit_token_lost, sizeof (stats->mrp->srp->commit_token_lost)); objdb->object_key_replace (stats->mrp->srp->hdr.handle, "recovery_entered", strlen("recovery_entered"), &stats->mrp->srp->recovery_entered, sizeof (stats->mrp->srp->recovery_entered)); objdb->object_key_replace (stats->mrp->srp->hdr.handle, "recovery_token_lost", strlen("recovery_token_lost"), &stats->mrp->srp->recovery_token_lost, sizeof (stats->mrp->srp->recovery_token_lost)); objdb->object_key_replace (stats->mrp->srp->hdr.handle, "consensus_timeouts", strlen("consensus_timeouts"), &stats->mrp->srp->consensus_timeouts, sizeof (stats->mrp->srp->consensus_timeouts)); objdb->object_key_replace (stats->mrp->srp->hdr.handle, "rx_msg_dropped", strlen("rx_msg_dropped"), &stats->mrp->srp->rx_msg_dropped, sizeof (stats->mrp->srp->rx_msg_dropped)); objdb->object_key_replace (stats->mrp->srp->hdr.handle, "continuous_gather", strlen("continuous_gather"), &stats->mrp->srp->continuous_gather, sizeof (stats->mrp->srp->continuous_gather)); firewall_enabled_or_nic_failure = (stats->mrp->srp->continuous_gather > MAX_NO_CONT_GATHER ? 1 : 0); objdb->object_key_replace (stats->mrp->srp->hdr.handle, "firewall_enabled_or_nic_failure", strlen("firewall_enabled_or_nic_failure"), &firewall_enabled_or_nic_failure, sizeof (firewall_enabled_or_nic_failure)); total_mtt_rx_token = 0; total_token_holdtime = 0; total_backlog_calc = 0; token_count = 0; t = stats->mrp->srp->latest_token; while (1) { if (t == 0) prev = TOTEM_TOKEN_STATS_MAX - 1; else prev = t - 1; if (prev == stats->mrp->srp->earliest_token) break; /* if tx == 0, then dropped token (not ours) */ if (stats->mrp->srp->token[t].tx != 0 || (stats->mrp->srp->token[t].rx - stats->mrp->srp->token[prev].rx) > 0 ) { total_mtt_rx_token += (stats->mrp->srp->token[t].rx - stats->mrp->srp->token[prev].rx); total_token_holdtime += (stats->mrp->srp->token[t].tx - stats->mrp->srp->token[t].rx); total_backlog_calc += stats->mrp->srp->token[t].backlog_calc; token_count++; } t = prev; } if (token_count) { mtt_rx_token = (total_mtt_rx_token / token_count); avg_backlog_calc = (total_backlog_calc / token_count); avg_token_holdtime = (total_token_holdtime / token_count); objdb->object_key_replace (stats->mrp->srp->hdr.handle, "mtt_rx_token", strlen("mtt_rx_token"), &mtt_rx_token, sizeof (mtt_rx_token)); objdb->object_key_replace (stats->mrp->srp->hdr.handle, "avg_token_workload", strlen("avg_token_workload"), &avg_token_holdtime, sizeof (avg_token_holdtime)); objdb->object_key_replace (stats->mrp->srp->hdr.handle, "avg_backlog_calc", strlen("avg_backlog_calc"), &avg_backlog_calc, sizeof (avg_backlog_calc)); } cs_ipcs_stats_update(); api->timer_add_duration (1500 * MILLI_2_NANO_SECONDS, NULL, corosync_totem_stats_updater, &corosync_stats_timer_handle); } static void corosync_totem_stats_init (void) { totempg_stats_t * stats; hdb_handle_t object_find_handle; hdb_handle_t object_runtime_handle; hdb_handle_t object_totem_handle; uint32_t zero_32 = 0; uint64_t zero_64 = 0; stats = api->totem_get_stats(); objdb->object_find_create ( OBJECT_PARENT_HANDLE, "runtime", strlen ("runtime"), &object_find_handle); if (objdb->object_find_next (object_find_handle, &object_runtime_handle) == 0) { objdb->object_create (object_runtime_handle, &object_totem_handle, "totem", strlen ("totem")); objdb->object_create (object_totem_handle, &stats->hdr.handle, "pg", strlen ("pg")); objdb->object_create (stats->hdr.handle, &stats->mrp->hdr.handle, "mrp", strlen ("mrp")); objdb->object_create (stats->mrp->hdr.handle, &stats->mrp->srp->hdr.handle, "srp", strlen ("srp")); objdb->object_key_create_typed (stats->hdr.handle, "msg_reserved", &stats->msg_reserved, sizeof (stats->msg_reserved), OBJDB_VALUETYPE_UINT32); objdb->object_key_create_typed (stats->hdr.handle, "msg_queue_avail", &stats->msg_queue_avail, sizeof (stats->msg_queue_avail), OBJDB_VALUETYPE_UINT32); /* Members object */ objdb->object_create (stats->mrp->srp->hdr.handle, &object_memb_handle, "members", strlen ("members")); objdb->object_key_create_typed (stats->mrp->srp->hdr.handle, "orf_token_tx", &stats->mrp->srp->orf_token_tx, sizeof (stats->mrp->srp->orf_token_tx), OBJDB_VALUETYPE_UINT64); objdb->object_key_create_typed (stats->mrp->srp->hdr.handle, "orf_token_rx", &stats->mrp->srp->orf_token_rx, sizeof (stats->mrp->srp->orf_token_rx), OBJDB_VALUETYPE_UINT64); objdb->object_key_create_typed (stats->mrp->srp->hdr.handle, "memb_merge_detect_tx", &stats->mrp->srp->memb_merge_detect_tx, sizeof (stats->mrp->srp->memb_merge_detect_tx), OBJDB_VALUETYPE_UINT64); objdb->object_key_create_typed (stats->mrp->srp->hdr.handle, "memb_merge_detect_rx", &stats->mrp->srp->memb_merge_detect_rx, sizeof (stats->mrp->srp->memb_merge_detect_rx), OBJDB_VALUETYPE_UINT64); objdb->object_key_create_typed (stats->mrp->srp->hdr.handle, "memb_join_tx", &stats->mrp->srp->memb_join_tx, sizeof (stats->mrp->srp->memb_join_tx), OBJDB_VALUETYPE_UINT64); objdb->object_key_create_typed (stats->mrp->srp->hdr.handle, "memb_join_rx", &stats->mrp->srp->memb_join_rx, sizeof (stats->mrp->srp->memb_join_rx), OBJDB_VALUETYPE_UINT64); objdb->object_key_create_typed (stats->mrp->srp->hdr.handle, "mcast_tx", &stats->mrp->srp->mcast_tx, sizeof (stats->mrp->srp->mcast_tx), OBJDB_VALUETYPE_UINT64); objdb->object_key_create_typed (stats->mrp->srp->hdr.handle, "mcast_retx", &stats->mrp->srp->mcast_retx, sizeof (stats->mrp->srp->mcast_retx), OBJDB_VALUETYPE_UINT64); objdb->object_key_create_typed (stats->mrp->srp->hdr.handle, "mcast_rx", &stats->mrp->srp->mcast_rx, sizeof (stats->mrp->srp->mcast_rx), OBJDB_VALUETYPE_UINT64); objdb->object_key_create_typed (stats->mrp->srp->hdr.handle, "memb_commit_token_tx", &stats->mrp->srp->memb_commit_token_tx, sizeof (stats->mrp->srp->memb_commit_token_tx), OBJDB_VALUETYPE_UINT64); objdb->object_key_create_typed (stats->mrp->srp->hdr.handle, "memb_commit_token_rx", &stats->mrp->srp->memb_commit_token_rx, sizeof (stats->mrp->srp->memb_commit_token_rx), OBJDB_VALUETYPE_UINT64); objdb->object_key_create_typed (stats->mrp->srp->hdr.handle, "token_hold_cancel_tx", &stats->mrp->srp->token_hold_cancel_tx, sizeof (stats->mrp->srp->token_hold_cancel_tx), OBJDB_VALUETYPE_UINT64); objdb->object_key_create_typed (stats->mrp->srp->hdr.handle, "token_hold_cancel_rx", &stats->mrp->srp->token_hold_cancel_rx, sizeof (stats->mrp->srp->token_hold_cancel_rx), OBJDB_VALUETYPE_UINT64); objdb->object_key_create_typed (stats->mrp->srp->hdr.handle, "operational_entered", &stats->mrp->srp->operational_entered, sizeof (stats->mrp->srp->operational_entered), OBJDB_VALUETYPE_UINT64); objdb->object_key_create_typed (stats->mrp->srp->hdr.handle, "operational_token_lost", &stats->mrp->srp->operational_token_lost, sizeof (stats->mrp->srp->operational_token_lost), OBJDB_VALUETYPE_UINT64); objdb->object_key_create_typed (stats->mrp->srp->hdr.handle, "gather_entered", &stats->mrp->srp->gather_entered, sizeof (stats->mrp->srp->gather_entered), OBJDB_VALUETYPE_UINT64); objdb->object_key_create_typed (stats->mrp->srp->hdr.handle, "gather_token_lost", &stats->mrp->srp->gather_token_lost, sizeof (stats->mrp->srp->gather_token_lost), OBJDB_VALUETYPE_UINT64); objdb->object_key_create_typed (stats->mrp->srp->hdr.handle, "commit_entered", &stats->mrp->srp->commit_entered, sizeof (stats->mrp->srp->commit_entered), OBJDB_VALUETYPE_UINT64); objdb->object_key_create_typed (stats->mrp->srp->hdr.handle, "commit_token_lost", &stats->mrp->srp->commit_token_lost, sizeof (stats->mrp->srp->commit_token_lost), OBJDB_VALUETYPE_UINT64); objdb->object_key_create_typed (stats->mrp->srp->hdr.handle, "recovery_entered", &stats->mrp->srp->recovery_entered, sizeof (stats->mrp->srp->recovery_entered), OBJDB_VALUETYPE_UINT64); objdb->object_key_create_typed (stats->mrp->srp->hdr.handle, "recovery_token_lost", &stats->mrp->srp->recovery_token_lost, sizeof (stats->mrp->srp->recovery_token_lost), OBJDB_VALUETYPE_UINT64); objdb->object_key_create_typed (stats->mrp->srp->hdr.handle, "consensus_timeouts", &stats->mrp->srp->consensus_timeouts, sizeof (stats->mrp->srp->consensus_timeouts), OBJDB_VALUETYPE_UINT64); objdb->object_key_create_typed (stats->mrp->srp->hdr.handle, "mtt_rx_token", &zero_32, sizeof (zero_32), OBJDB_VALUETYPE_UINT32); objdb->object_key_create_typed (stats->mrp->srp->hdr.handle, "avg_token_workload", &zero_32, sizeof (zero_32), OBJDB_VALUETYPE_UINT32); objdb->object_key_create_typed (stats->mrp->srp->hdr.handle, "avg_backlog_calc", &zero_32, sizeof (zero_32), OBJDB_VALUETYPE_UINT32); objdb->object_key_create_typed (stats->mrp->srp->hdr.handle, "rx_msg_dropped", &zero_64, sizeof (zero_64), OBJDB_VALUETYPE_UINT64); objdb->object_key_create_typed (stats->mrp->srp->hdr.handle, "continuous_gather", &zero_32, sizeof (zero_32), OBJDB_VALUETYPE_UINT32); objdb->object_key_create_typed (stats->mrp->srp->hdr.handle, "firewall_enabled_or_nic_failure", &zero_32, sizeof (zero_32), OBJDB_VALUETYPE_UINT32); } /* start stats timer */ api->timer_add_duration (1500 * MILLI_2_NANO_SECONDS, NULL, corosync_totem_stats_updater, &corosync_stats_timer_handle); } static void deliver_fn ( unsigned int nodeid, const void *msg, unsigned int msg_len, int endian_conversion_required) { const struct qb_ipc_request_header *header; int32_t service; int32_t fn_id; uint32_t id; uint32_t key_incr_dummy; header = msg; if (endian_conversion_required) { id = swab32 (header->id); } else { id = header->id; } /* * Call the proper executive handler */ service = id >> 16; fn_id = id & 0xffff; if (ais_service[service] == NULL && service == EVT_SERVICE) { evil_deliver_fn (nodeid, service, fn_id, msg, endian_conversion_required); } if (!ais_service[service]) { return; } if (fn_id >= ais_service[service]->exec_engine_count) { log_printf(LOGSYS_LEVEL_WARNING, "discarded unknown message %d for service %d (max id %d)", fn_id, service, ais_service[service]->exec_engine_count); return; } objdb->object_key_increment (service_stats_handle[service][fn_id], "rx", strlen("rx"), &key_incr_dummy); if (endian_conversion_required) { assert(ais_service[service]->exec_engine[fn_id].exec_endian_convert_fn != NULL); ais_service[service]->exec_engine[fn_id].exec_endian_convert_fn ((void *)msg); } ais_service[service]->exec_engine[fn_id].exec_handler_fn (msg, nodeid); } void main_get_config_modules(struct config_iface_ver0 ***modules, int *num) { *modules = config_modules; *num = num_config_modules; } int main_mcast ( const struct iovec *iovec, unsigned int iov_len, unsigned int guarantee) { const struct qb_ipc_request_header *req = iovec->iov_base; int32_t service; int32_t fn_id; uint32_t key_incr_dummy; service = req->id >> 16; fn_id = req->id & 0xffff; if (ais_service[service]) { objdb->object_key_increment (service_stats_handle[service][fn_id], "tx", strlen("tx"), &key_incr_dummy); } return (totempg_groups_mcast_joined (corosync_group_handle, iovec, iov_len, guarantee)); } static qb_loop_timer_handle recheck_the_q_level_timer; void corosync_recheck_the_q_level(void *data) { totempg_check_q_level(corosync_group_handle); if (cs_ipcs_q_level_get() == TOTEM_Q_LEVEL_CRITICAL) { qb_loop_timer_add(cs_poll_handle_get(), QB_LOOP_MED, 1*QB_TIME_NS_IN_MSEC, NULL, corosync_recheck_the_q_level, &recheck_the_q_level_timer); } } struct sending_allowed_private_data_struct { int reserved_msgs; }; int corosync_sending_allowed ( unsigned int service, unsigned int id, const void *msg, void *sending_allowed_private_data) { struct sending_allowed_private_data_struct *pd = (struct sending_allowed_private_data_struct *)sending_allowed_private_data; struct iovec reserve_iovec; struct qb_ipc_request_header *header = (struct qb_ipc_request_header *)msg; int sending_allowed; reserve_iovec.iov_base = (char *)header; reserve_iovec.iov_len = header->size; pd->reserved_msgs = totempg_groups_joined_reserve ( corosync_group_handle, &reserve_iovec, 1); if (pd->reserved_msgs == -1) { return -EINVAL; } sending_allowed = QB_FALSE; if (corosync_quorum_is_quorate() == 1 || ais_service[service]->allow_inquorate == CS_LIB_ALLOW_INQUORATE) { // we are quorate // now check flow control if (ais_service[service]->lib_engine[id].flow_control == CS_LIB_FLOW_CONTROL_NOT_REQUIRED) { sending_allowed = QB_TRUE; } else if (pd->reserved_msgs && sync_in_process == 0) { sending_allowed = QB_TRUE; } else if (pd->reserved_msgs == 0) { return -ENOBUFS; } else /* (sync_in_process) */ { return -EINPROGRESS; } } else { return -EHOSTUNREACH; } return (sending_allowed); } void corosync_sending_allowed_release (void *sending_allowed_private_data) { struct sending_allowed_private_data_struct *pd = (struct sending_allowed_private_data_struct *)sending_allowed_private_data; if (pd->reserved_msgs == -1) { return; } totempg_groups_joined_release (pd->reserved_msgs); } int message_source_is_local (const mar_message_source_t *source) { int ret = 0; assert (source != NULL); if (source->nodeid == totempg_my_nodeid_get ()) { ret = 1; } return ret; } void message_source_set ( mar_message_source_t *source, void *conn) { assert ((source != NULL) && (conn != NULL)); memset (source, 0, sizeof (mar_message_source_t)); source->nodeid = totempg_my_nodeid_get (); source->conn = conn; } static void corosync_setscheduler (void) { #if defined(HAVE_PTHREAD_SETSCHEDPARAM) && defined(HAVE_SCHED_GET_PRIORITY_MAX) && defined(HAVE_SCHED_SETSCHEDULER) int res; sched_priority = sched_get_priority_max (SCHED_RR); if (sched_priority != -1) { global_sched_param.sched_priority = sched_priority; res = sched_setscheduler (0, SCHED_RR, &global_sched_param); if (res == -1) { LOGSYS_PERROR(errno, LOGSYS_LEVEL_WARNING, "Could not set SCHED_RR at priority %d", global_sched_param.sched_priority); global_sched_param.sched_priority = 0; logsys_thread_priority_set (SCHED_OTHER, NULL, 1); } else { /* * Turn on SCHED_RR in logsys system */ res = logsys_thread_priority_set (SCHED_RR, &global_sched_param, 10); if (res == -1) { log_printf (LOGSYS_LEVEL_ERROR, "Could not set logsys thread priority." " Can't continue because of priority inversions."); corosync_exit_error (AIS_DONE_LOGSETUP); } } } else { LOGSYS_PERROR (errno, LOGSYS_LEVEL_WARNING, "Could not get maximum scheduler priority"); sched_priority = 0; } #else log_printf(LOGSYS_LEVEL_WARNING, "The Platform is missing process priority setting features. Leaving at default."); #endif } +static void +_logsys_log_printf(int level, int subsys, + const char *function_name, + const char *file_name, + int file_line, + const char *format, + ...) __attribute__((format(printf, 6, 7))); + +static void +_logsys_log_printf(int level, int subsys, + const char *function_name, + const char *file_name, + int file_line, + const char *format, ...) +{ + va_list ap; + char buf[QB_LOG_MAX_LEN]; + size_t len; + + va_start(ap, format); + len = vsnprintf(buf, sizeof(buf), format, ap); + va_end(ap); + + if (buf[len - 1] == '\n') { + buf[len - 1] = '\0'; + len -= 1; + } + + qb_log_from_external_source(function_name, file_name, + format, level, file_line, + subsys, buf); +} + static void fplay_key_change_notify_fn ( object_change_type_t change_type, hdb_handle_t parent_object_handle, hdb_handle_t object_handle, const void *object_name_pt, size_t object_name_len, const void *key_name_pt, size_t key_len, const void *key_value_pt, size_t key_value_len, void *priv_data_pt) { if (key_len == strlen ("dump_flight_data") && memcmp ("dump_flight_data", key_name_pt, key_len) == 0) { - logsys_log_rec_store (LOCALSTATEDIR "/lib/corosync/fdata"); + qb_log_blackbox_write_to_file (LOCALSTATEDIR "/lib/corosync/fdata"); } if (key_len == strlen ("dump_state") && memcmp ("dump_state", key_name_pt, key_len) == 0) { corosync_state_dump (); } } static void corosync_fplay_control_init (void) { hdb_handle_t object_find_handle; hdb_handle_t object_runtime_handle; hdb_handle_t object_blackbox_handle; objdb->object_find_create (OBJECT_PARENT_HANDLE, "runtime", strlen ("runtime"), &object_find_handle); if (objdb->object_find_next (object_find_handle, &object_runtime_handle) != 0) { return; } objdb->object_create (object_runtime_handle, &object_blackbox_handle, "blackbox", strlen ("blackbox")); objdb->object_key_create_typed (object_blackbox_handle, "dump_flight_data", "no", strlen("no"), OBJDB_VALUETYPE_STRING); objdb->object_key_create_typed (object_blackbox_handle, "dump_state", "no", strlen("no"), OBJDB_VALUETYPE_STRING); objdb->object_track_start (object_blackbox_handle, OBJECT_TRACK_DEPTH_RECURSIVE, fplay_key_change_notify_fn, NULL, NULL, NULL, NULL); } static void main_service_ready (void) { int res; /* * This must occur after totempg is initialized because "this_ip" must be set */ res = corosync_service_defaults_link_and_init (api); if (res == -1) { log_printf (LOGSYS_LEVEL_ERROR, "Could not initialize default services\n"); corosync_exit_error (AIS_DONE_INIT_SERVICES); } evil_init (api); cs_ipcs_init(); corosync_totem_stats_init (); corosync_fplay_control_init (); if (minimum_sync_mode == CS_SYNC_V2) { log_printf (LOGSYS_LEVEL_NOTICE, "Compatibility mode set to none. Using V2 of the synchronization engine.\n"); sync_v2_init ( corosync_sync_v2_callbacks_retrieve, corosync_sync_completed); } else if (minimum_sync_mode == CS_SYNC_V1) { log_printf (LOGSYS_LEVEL_NOTICE, "Compatibility mode set to whitetank. Using V1 and V2 of the synchronization engine.\n"); sync_register ( corosync_sync_callbacks_retrieve, sync_v2_memb_list_determine, sync_v2_memb_list_abort, sync_v2_start); sync_v2_init ( corosync_sync_v2_callbacks_retrieve, corosync_sync_completed); } } static enum e_ais_done corosync_flock (const char *lockfile, pid_t pid) { struct flock lock; enum e_ais_done err; char pid_s[17]; int fd_flag; int lf; err = AIS_DONE_EXIT; lf = open (lockfile, O_WRONLY | O_CREAT, 0640); if (lf == -1) { log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't create lock file.\n"); return (AIS_DONE_AQUIRE_LOCK); } retry_fcntl: lock.l_type = F_WRLCK; lock.l_start = 0; lock.l_whence = SEEK_SET; lock.l_len = 0; if (fcntl (lf, F_SETLK, &lock) == -1) { switch (errno) { case EINTR: goto retry_fcntl; break; case EAGAIN: case EACCES: log_printf (LOGSYS_LEVEL_ERROR, "Another Corosync instance is already running.\n"); err = AIS_DONE_ALREADY_RUNNING; goto error_close; break; default: log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't aquire lock. Error was %s\n", strerror(errno)); err = AIS_DONE_AQUIRE_LOCK; goto error_close; break; } } if (ftruncate (lf, 0) == -1) { log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't truncate lock file. Error was %s\n", strerror (errno)); err = AIS_DONE_AQUIRE_LOCK; goto error_close_unlink; } memset (pid_s, 0, sizeof (pid_s)); snprintf (pid_s, sizeof (pid_s) - 1, "%u\n", pid); retry_write: if (write (lf, pid_s, strlen (pid_s)) != strlen (pid_s)) { if (errno == EINTR) { goto retry_write; } else { log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't write pid to lock file. " "Error was %s\n", strerror (errno)); err = AIS_DONE_AQUIRE_LOCK; goto error_close_unlink; } } if ((fd_flag = fcntl (lf, F_GETFD, 0)) == -1) { log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't get close-on-exec flag from lock file. " "Error was %s\n", strerror (errno)); err = AIS_DONE_AQUIRE_LOCK; goto error_close_unlink; } fd_flag |= FD_CLOEXEC; if (fcntl (lf, F_SETFD, fd_flag) == -1) { log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't set close-on-exec flag to lock file. " "Error was %s\n", strerror (errno)); err = AIS_DONE_AQUIRE_LOCK; goto error_close_unlink; } return (err); error_close_unlink: unlink (lockfile); error_close: close (lf); return (err); } int main (int argc, char **argv, char **envp) { const char *error_string; struct totem_config totem_config; hdb_handle_t objdb_handle; hdb_handle_t config_handle; unsigned int config_version = 0; void *objdb_p; struct config_iface_ver0 *config; void *config_p; const char *config_iface_init; char *config_iface; char *iface; char *strtok_save_pt; int res, ch; int background, setprio; struct stat stat_out; char corosync_lib_dir[PATH_MAX]; hdb_handle_t object_runtime_handle; enum e_ais_done flock_err; /* default configuration */ background = 1; setprio = 1; while ((ch = getopt (argc, argv, "fpv")) != EOF) { switch (ch) { case 'f': background = 0; logsys_config_mode_set (NULL, LOGSYS_MODE_OUTPUT_STDERR|LOGSYS_MODE_THREADED|LOGSYS_MODE_FORK); break; case 'p': setprio = 0; break; case 'v': printf ("Corosync Cluster Engine, version '%s'\n", VERSION); printf ("Copyright (c) 2006-2009 Red Hat, Inc.\n"); return EXIT_SUCCESS; break; default: fprintf(stderr, \ "usage:\n"\ " -f : Start application in foreground.\n"\ " -p : Do not set process priority. \n"\ " -v : Display version and SVN revision of Corosync and exit.\n"); return EXIT_FAILURE; } } /* * Set round robin realtime scheduling with priority 99 * Lock all memory to avoid page faults which may interrupt * application healthchecking */ if (setprio) { corosync_setscheduler (); } corosync_mlockall (); log_printf (LOGSYS_LEVEL_NOTICE, "Corosync Cluster Engine ('%s'): started and ready to provide service.\n", VERSION); log_printf (LOGSYS_LEVEL_INFO, "Corosync built-in features:" PACKAGE_FEATURES "\n"); corosync_poll_handle = qb_loop_create (); qb_loop_signal_add(corosync_poll_handle, QB_LOOP_LOW, SIGUSR2, NULL, sig_diag_handler, NULL); qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH, SIGINT, NULL, sig_exit_handler, NULL); qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH, SIGQUIT, NULL, sig_exit_handler, NULL); qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH, SIGTERM, NULL, sig_exit_handler, NULL); (void)signal (SIGSEGV, sigsegv_handler); (void)signal (SIGABRT, sigabrt_handler); #if MSG_NOSIGNAL != 0 (void)signal (SIGPIPE, SIG_IGN); #endif /* * Load the object database interface */ res = lcr_ifact_reference ( &objdb_handle, "objdb", 0, &objdb_p, 0); if (res == -1) { log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't open configuration object database component.\n"); corosync_exit_error (AIS_DONE_OBJDB); } objdb = (struct objdb_iface_ver0 *)objdb_p; objdb->objdb_init (); /* * Initialize the corosync_api_v1 definition */ apidef_init (objdb); api = apidef_get (); num_config_modules = 0; /* * Bootstrap in the default configuration parser or use * the corosync default built in parser if the configuration parser * isn't overridden */ config_iface_init = getenv("COROSYNC_DEFAULT_CONFIG_IFACE"); if (!config_iface_init) { config_iface_init = "corosync_parser"; } /* Make a copy so we can deface it with strtok */ if ((config_iface = strdup(config_iface_init)) == NULL) { log_printf (LOGSYS_LEVEL_ERROR, "exhausted virtual memory"); corosync_exit_error (AIS_DONE_OBJDB); } iface = strtok_r(config_iface, ":", &strtok_save_pt); while (iface) { res = lcr_ifact_reference ( &config_handle, iface, config_version, &config_p, 0); config = (struct config_iface_ver0 *)config_p; if (res == -1) { log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't open configuration component '%s'\n", iface); corosync_exit_error (AIS_DONE_MAINCONFIGREAD); } res = config->config_readconfig(objdb, &error_string); if (res == -1) { log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string); corosync_exit_error (AIS_DONE_MAINCONFIGREAD); } log_printf (LOGSYS_LEVEL_NOTICE, "%s", error_string); config_modules[num_config_modules++] = config; iface = strtok_r(NULL, ":", &strtok_save_pt); } free(config_iface); res = corosync_main_config_read (objdb, &error_string); if (res == -1) { /* * if we are here, we _must_ flush the logsys queue * and try to inform that we couldn't read the config. * this is a desperate attempt before certain death * and there is no guarantee that we can print to stderr * nor that logsys is sending the messages where we expect. */ log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string); fprintf(stderr, "%s", error_string); syslog (LOGSYS_LEVEL_ERROR, "%s", error_string); corosync_exit_error (AIS_DONE_MAINCONFIGREAD); } /* * Make sure required directory is present */ sprintf (corosync_lib_dir, "%s/lib/corosync", LOCALSTATEDIR); res = stat (corosync_lib_dir, &stat_out); if ((res == -1) || (res == 0 && !S_ISDIR(stat_out.st_mode))) { log_printf (LOGSYS_LEVEL_ERROR, "Required directory not present %s. Please create it.\n", corosync_lib_dir); corosync_exit_error (AIS_DONE_DIR_NOT_PRESENT); } res = totem_config_read (objdb, &totem_config, &error_string); if (res == -1) { log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string); corosync_exit_error (AIS_DONE_MAINCONFIGREAD); } res = totem_config_keyread (objdb, &totem_config, &error_string); if (res == -1) { log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string); corosync_exit_error (AIS_DONE_MAINCONFIGREAD); } res = totem_config_validate (&totem_config, &error_string); if (res == -1) { log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string); corosync_exit_error (AIS_DONE_MAINCONFIGREAD); } totem_config.totem_logging_configuration = totem_logging_configuration; - totem_config.totem_logging_configuration.log_subsys_id = - _logsys_subsys_create ("TOTEM"); - - if (totem_config.totem_logging_configuration.log_subsys_id < 0) { - log_printf (LOGSYS_LEVEL_ERROR, - "Unable to initialize TOTEM logging subsystem\n"); - corosync_exit_error (AIS_DONE_MAINCONFIGREAD); - } - + totem_config.totem_logging_configuration.log_subsys_id = _logsys_subsys_create("TOTEM", "totem"); totem_config.totem_logging_configuration.log_level_security = LOGSYS_LEVEL_WARNING; totem_config.totem_logging_configuration.log_level_error = LOGSYS_LEVEL_ERROR; totem_config.totem_logging_configuration.log_level_warning = LOGSYS_LEVEL_WARNING; totem_config.totem_logging_configuration.log_level_notice = LOGSYS_LEVEL_NOTICE; totem_config.totem_logging_configuration.log_level_debug = LOGSYS_LEVEL_DEBUG; totem_config.totem_logging_configuration.log_printf = _logsys_log_printf; + logsys_config_apply(); res = corosync_main_config_compatibility_read (objdb, &minimum_sync_mode, &error_string); if (res == -1) { log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string); corosync_exit_error (AIS_DONE_MAINCONFIGREAD); } res = corosync_main_config_compatibility_read (objdb, &minimum_sync_mode, &error_string); if (res == -1) { log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string); corosync_exit_error (AIS_DONE_MAINCONFIGREAD); } /* create the main runtime object */ objdb->object_create (OBJECT_PARENT_HANDLE, &object_runtime_handle, "runtime", strlen ("runtime")); /* * Now we are fully initialized. */ if (background) { corosync_tty_detach (); } - logsys_fork_completed(); + qb_log_thread_start(); if ((flock_err = corosync_flock (corosync_lock_file, getpid ())) != AIS_DONE_EXIT) { corosync_exit_error (flock_err); } /* * if totempg_initialize doesn't have root priveleges, it cannot * bind to a specific interface. This only matters if * there is more then one interface in a system, so * in this case, only a warning is printed */ /* * Join multicast group and setup delivery * and configuration change functions */ totempg_initialize ( corosync_poll_handle, &totem_config); totempg_service_ready_register ( main_service_ready); totempg_groups_initialize ( &corosync_group_handle, deliver_fn, confchg_fn); totempg_groups_join ( corosync_group_handle, &corosync_group, 1); /* * Drop root privleges to user 'ais' * TODO: Don't really need full root capabilities; * needed capabilities are: * CAP_NET_RAW (bindtodevice) * CAP_SYS_NICE (setscheduler) * CAP_IPC_LOCK (mlockall) */ priv_drop (); schedwrk_init ( serialize_lock, serialize_unlock); /* * Start main processing loop */ qb_loop_run (corosync_poll_handle); /* * Exit was requested */ totempg_finalize (); /* * Remove pid lock file */ unlink (corosync_lock_file); corosync_exit_error (AIS_DONE_EXIT); return EXIT_SUCCESS; } diff --git a/exec/mainconfig.c b/exec/mainconfig.c index 7d4e520b..2db9a53d 100644 --- a/exec/mainconfig.c +++ b/exec/mainconfig.c @@ -1,765 +1,766 @@ /* * Copyright (c) 2002-2005 MontaVista Software, Inc. * Copyright (c) 2006-2009 Red Hat, Inc. * * All rights reserved. * * Author: Steven Dake (sdake@redhat.com) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "util.h" #include "mainconfig.h" static char error_string_response[512]; static struct objdb_iface_ver0 *global_objdb; DECLARE_LIST_INIT(uidgid_list_head); /* This just makes the code below a little neater */ static inline int objdb_get_string ( const struct objdb_iface_ver0 *objdb, hdb_handle_t object_service_handle, const char *key, char **value) { int res; *value = NULL; if ( !(res = objdb->object_key_get (object_service_handle, key, strlen (key), (void *)value, NULL))) { if (*value) { return 0; } } return -1; } static inline void objdb_get_int ( const struct objdb_iface_ver0 *objdb, hdb_handle_t object_service_handle, char *key, unsigned int *intvalue) { char *value = NULL; if (!objdb->object_key_get (object_service_handle, key, strlen (key), (void *)&value, NULL)) { if (value) { *intvalue = atoi(value); } } } /** * insert_into_buffer * @target_buffer: a buffer where to write results * @bufferlen: tell us the size of the buffer to avoid overflows * @entry: entry that needs to be added to the buffer * @after: can either be NULL or set to a string. * if NULL, @entry is prependend to logsys_format_get buffer. * if set, @entry is added immediately after @after. * * Since the function is specific to logsys_format_get handling, it is implicit * that source is logsys_format_get(); * * In case of failure, target_buffer could be left dirty. So don't trust * any data leftover in it. * * Searching for "after" assumes that there is only entry of "after" * in the source. Afterall we control the string here and for logging format * it makes little to no sense to have duplicate format entries. * * Returns: 0 on success, -1 on failure **/ static int insert_into_buffer( char *target_buffer, size_t bufferlen, const char *entry, const char *after) { const char *current_format = NULL; current_format = logsys_format_get(); /* if the entry is already in the format we don't add it again */ if (strstr(current_format, entry) != NULL) { return -1; } /* if there is no "after", simply prepend the requested entry * otherwise go for beautiful string manipulation.... */ if (!after) { if (snprintf(target_buffer, bufferlen - 1, "%s%s", entry, current_format) >= bufferlen - 1) { return -1; } } else { const char *afterpos; size_t afterlen; size_t templen; /* check if after is contained in the format * and afterlen has a meaning or return an error */ afterpos = strstr(current_format, after); afterlen = strlen(after); if ((!afterpos) || (!afterlen)) { return -1; } templen = afterpos - current_format + afterlen; if (snprintf(target_buffer, templen + 1, "%s", current_format) >= bufferlen - 1) { return -1; } if (snprintf(target_buffer + templen, bufferlen - ( templen + 1 ), "%s%s", entry, current_format + templen) >= bufferlen - ( templen + 1 )) { return -1; } } return 0; } /* * format set is the only global specific option that * doesn't apply at system/subsystem level. */ static int corosync_main_config_format_set ( struct objdb_iface_ver0 *objdb, hdb_handle_t object_handle, const char **error_string) { const char *error_reason; char new_format_buffer[PATH_MAX]; char *value; int err = 0; if (!objdb_get_string (objdb,object_handle, "fileline", &value)) { if (strcmp (value, "on") == 0) { if (!insert_into_buffer(new_format_buffer, sizeof(new_format_buffer), - " %f:%l", "s]")) { + " %f:%l", "g]")) { err = logsys_format_set(new_format_buffer); } else if (!insert_into_buffer(new_format_buffer, sizeof(new_format_buffer), "%f:%l", NULL)) { err = logsys_format_set(new_format_buffer); } } else if (strcmp (value, "off") == 0) { /* nothing to do here */ } else { error_reason = "unknown value for fileline"; goto parse_error; } } if (!objdb_get_string (objdb,object_handle, "function_name", &value)) { if (strcmp (value, "on") == 0) { if (!insert_into_buffer(new_format_buffer, sizeof(new_format_buffer), "%n:", "f:")) { err = logsys_format_set(new_format_buffer); } else if (!insert_into_buffer(new_format_buffer, sizeof(new_format_buffer), - " %n", "s]")) { + " %n", "g]")) { err = logsys_format_set(new_format_buffer); } } else if (strcmp (value, "off") == 0) { /* nothing to do here */ } else { error_reason = "unknown value for function_name"; goto parse_error; } } if (!objdb_get_string (objdb,object_handle, "timestamp", &value)) { if (strcmp (value, "on") == 0) { if(!insert_into_buffer(new_format_buffer, sizeof(new_format_buffer), "%t ", NULL)) { err = logsys_format_set(new_format_buffer); } } else if (strcmp (value, "off") == 0) { /* nothing to do here */ } else { error_reason = "unknown value for timestamp"; goto parse_error; } } if (err) { error_reason = "exhausted virtual memory"; goto parse_error; } return (0); parse_error: *error_string = error_reason; return (-1); } static int corosync_main_config_log_destination_set ( struct objdb_iface_ver0 *objdb, hdb_handle_t object_handle, const char *subsys, const char **error_string, const char *objdb_key, unsigned int mode_mask, char deprecated, const char *replacement) { static char formatted_error_reason[128]; char *value; unsigned int mode; if (!objdb_get_string (objdb, object_handle, objdb_key, &value)) { if (deprecated) { log_printf(LOGSYS_LEVEL_WARNING, "Warning: the %s config paramater has been obsoleted." " See corosync.conf man page %s directive.", objdb_key, replacement); } mode = logsys_config_mode_get (subsys); if (strcmp (value, "yes") == 0 || strcmp (value, "on") == 0) { mode |= mode_mask; if (logsys_config_mode_set(subsys, mode) < 0) { sprintf (formatted_error_reason, "unable to set mode %s", objdb_key); *error_string = formatted_error_reason; return -1; } } else if (strcmp (value, "no") == 0 || strcmp (value, "off") == 0) { mode &= ~mode_mask; if (logsys_config_mode_set(subsys, mode) < 0) { sprintf (formatted_error_reason, "unable to unset mode %s", objdb_key); *error_string = formatted_error_reason; return -1; } } else { sprintf (formatted_error_reason, "unknown value for %s", objdb_key); *error_string = formatted_error_reason; return -1; } } return 0; } static int corosync_main_config_set ( struct objdb_iface_ver0 *objdb, hdb_handle_t object_handle, const char *subsys, const char **error_string) { const char *error_reason = error_string_response; char *value; int mode; /* * this bit abuses the internal logsys exported API * to guarantee that all configured subsystems are * initialized too. * * using this approach avoids some headaches caused * by IPC and TOTEM that have a special logging * handling requirements */ if (subsys != NULL) { - if (_logsys_subsys_create(subsys) < 0) { + if (_logsys_subsys_create(subsys, NULL) < 0) { error_reason = "unable to create new logging subsystem"; goto parse_error; } } mode = logsys_config_mode_get(subsys); if (mode < 0) { error_reason = "unable to get mode"; goto parse_error; } if (corosync_main_config_log_destination_set (objdb, object_handle, subsys, &error_reason, "to_logfile", LOGSYS_MODE_OUTPUT_FILE, 0, NULL) != 0) goto parse_error; if (corosync_main_config_log_destination_set (objdb, object_handle, subsys, &error_reason, "to_stderr", LOGSYS_MODE_OUTPUT_STDERR, 0, NULL) != 0) goto parse_error; if (corosync_main_config_log_destination_set (objdb, object_handle, subsys, &error_reason, "to_syslog", LOGSYS_MODE_OUTPUT_SYSLOG, 0, NULL) != 0) goto parse_error; if (corosync_main_config_log_destination_set (objdb, object_handle, subsys, &error_reason, "to_file", LOGSYS_MODE_OUTPUT_FILE, 1, "to_logfile") != 0) goto parse_error; if (!objdb_get_string (objdb,object_handle, "syslog_facility", &value)) { int syslog_facility; syslog_facility = logsys_facility_id_get(value); if (syslog_facility < 0) { error_reason = "unknown syslog facility specified"; goto parse_error; } if (logsys_config_syslog_facility_set(subsys, syslog_facility) < 0) { error_reason = "unable to set syslog facility"; goto parse_error; } } if (!objdb_get_string (objdb,object_handle, "syslog_level", &value)) { int syslog_priority; log_printf(LOGSYS_LEVEL_WARNING, "Warning: the syslog_level config paramater has been obsoleted." " See corosync.conf man page syslog_priority directive."); syslog_priority = logsys_priority_id_get(value); if (syslog_priority < 0) { error_reason = "unknown syslog level specified"; goto parse_error; } if (logsys_config_syslog_priority_set(subsys, syslog_priority) < 0) { error_reason = "unable to set syslog level"; goto parse_error; } } if (!objdb_get_string (objdb,object_handle, "syslog_priority", &value)) { int syslog_priority; syslog_priority = logsys_priority_id_get(value); if (syslog_priority < 0) { error_reason = "unknown syslog priority specified"; goto parse_error; } if (logsys_config_syslog_priority_set(subsys, syslog_priority) < 0) { error_reason = "unable to set syslog priority"; goto parse_error; } } if (!objdb_get_string (objdb,object_handle, "logfile", &value)) { if (logsys_config_file_set (subsys, error_string, value) < 0) { goto parse_error; } } if (!objdb_get_string (objdb,object_handle, "logfile_priority", &value)) { int logfile_priority; logfile_priority = logsys_priority_id_get(value); if (logfile_priority < 0) { error_reason = "unknown logfile priority specified"; goto parse_error; } if (logsys_config_logfile_priority_set(subsys, logfile_priority) < 0) { error_reason = "unable to set logfile priority"; goto parse_error; } } if (!objdb_get_string (objdb, object_handle, "debug", &value)) { if (strcmp (value, "on") == 0) { if (logsys_config_debug_set (subsys, 1) < 0) { error_reason = "unable to set debug on"; goto parse_error; } } else if (strcmp (value, "off") == 0) { if (logsys_config_debug_set (subsys, 0) < 0) { error_reason = "unable to set debug off"; goto parse_error; } } else { error_reason = "unknown value for debug"; goto parse_error; } } return (0); parse_error: *error_string = error_reason; return (-1); } static int corosync_main_config_read_logging ( struct objdb_iface_ver0 *objdb, const char **error_string) { hdb_handle_t object_service_handle; hdb_handle_t object_logger_subsys_handle; hdb_handle_t object_find_handle; hdb_handle_t object_find_logsys_handle; const char *error_reason; char *value; objdb->object_find_create ( OBJECT_PARENT_HANDLE, "logging", strlen ("logging"), &object_find_handle); if (objdb->object_find_next ( object_find_handle, &object_service_handle) == 0) { /* format set is supported only for toplevel */ if (corosync_main_config_format_set (objdb, object_service_handle, &error_reason) < 0) { goto parse_error; } if (corosync_main_config_set (objdb, object_service_handle, NULL, &error_reason) < 0) { goto parse_error; } /* we will need 2 of these to compensate for new logging * config format */ objdb->object_find_create ( object_service_handle, "logger_subsys", strlen ("logger_subsys"), &object_find_logsys_handle); while (objdb->object_find_next ( object_find_logsys_handle, &object_logger_subsys_handle) == 0) { if (!objdb_get_string (objdb, object_logger_subsys_handle, "subsys", &value)) { if (corosync_main_config_set (objdb, object_logger_subsys_handle, value, &error_reason) < 0) { goto parse_error; } } else { error_reason = "subsys required for logger directive"; goto parse_error; } } objdb->object_find_destroy (object_find_logsys_handle); objdb->object_find_create ( object_service_handle, "logging_daemon", strlen ("logging_daemon"), &object_find_logsys_handle); while (objdb->object_find_next ( object_find_logsys_handle, &object_logger_subsys_handle) == 0) { if (!objdb_get_string (objdb, object_logger_subsys_handle, "name", &value)) { if (strcmp(value, "corosync") == 0) { if (!objdb_get_string (objdb, object_logger_subsys_handle, "subsys", &value)) { if (corosync_main_config_set (objdb, object_logger_subsys_handle, value, &error_reason) < 0) { goto parse_error; } } else { if (corosync_main_config_set (objdb, object_logger_subsys_handle, NULL, &error_reason) < 0) { goto parse_error; } } } } else { error_reason = "name required for logging_daemon directive"; goto parse_error; } } objdb->object_find_destroy (object_find_logsys_handle); } objdb->object_find_destroy (object_find_handle); + logsys_config_apply(); return 0; parse_error: *error_string = error_reason; return (-1); } static int uid_determine (const char *req_user) { int pw_uid = 0; struct passwd passwd; struct passwd* pwdptr = &passwd; struct passwd* temp_pwd_pt; char *pwdbuffer; int pwdlinelen; pwdlinelen = sysconf (_SC_GETPW_R_SIZE_MAX); if (pwdlinelen == -1) { pwdlinelen = 256; } pwdbuffer = malloc (pwdlinelen); if ((getpwnam_r (req_user, pwdptr, pwdbuffer, pwdlinelen, &temp_pwd_pt)) != 0) { log_printf (LOGSYS_LEVEL_ERROR, "ERROR: The '%s' user is not found in /etc/passwd, please read the documentation.\n", req_user); corosync_exit_error (AIS_DONE_UID_DETERMINE); } pw_uid = passwd.pw_uid; free (pwdbuffer); return pw_uid; } static int gid_determine (const char *req_group) { int ais_gid = 0; struct group group; struct group * grpptr = &group; struct group * temp_grp_pt; char *grpbuffer; int grplinelen; grplinelen = sysconf (_SC_GETGR_R_SIZE_MAX); if (grplinelen == -1) { grplinelen = 256; } grpbuffer = malloc (grplinelen); if ((getgrnam_r (req_group, grpptr, grpbuffer, grplinelen, &temp_grp_pt)) != 0) { log_printf (LOGSYS_LEVEL_ERROR, "ERROR: The '%s' group is not found in /etc/group, please read the documentation.\n", req_group); corosync_exit_error (AIS_DONE_GID_DETERMINE); } ais_gid = group.gr_gid; free (grpbuffer); return ais_gid; } static void main_objdb_reload_notify(objdb_reload_notify_type_t type, int flush, void *priv_data_pt) { const char *error_string; if (type == OBJDB_RELOAD_NOTIFY_END) { /* * Reload the logsys configuration */ if (logsys_format_set(NULL) == -1) { fprintf (stderr, "Unable to setup logging format.\n"); } corosync_main_config_read_logging(global_objdb, &error_string); } } static void add_logsys_config_notification( struct objdb_iface_ver0 *objdb) { global_objdb = objdb; objdb->object_track_start(OBJECT_PARENT_HANDLE, 1, NULL, NULL, NULL, main_objdb_reload_notify, NULL); } static int corosync_main_config_read_uidgid ( struct objdb_iface_ver0 *objdb, const char **error_string) { hdb_handle_t object_find_handle; hdb_handle_t object_service_handle; char *value; int uid, gid; struct uidgid_item *ugi; objdb->object_find_create ( OBJECT_PARENT_HANDLE, "uidgid", strlen ("uidgid"), &object_find_handle); while (objdb->object_find_next ( object_find_handle, &object_service_handle) == 0) { uid = -1; gid = -1; if (!objdb_get_string (objdb,object_service_handle, "uid", &value)) { uid = uid_determine(value); } if (!objdb_get_string (objdb,object_service_handle, "gid", &value)) { gid = gid_determine(value); } if (uid > -1 || gid > -1) { ugi = malloc (sizeof (*ugi)); if (ugi == NULL) { _corosync_out_of_memory_error(); } ugi->uid = uid; ugi->gid = gid; list_init (&ugi->list); list_add (&ugi->list, &uidgid_list_head); } } objdb->object_find_destroy (object_find_handle); return 0; } int corosync_main_config_read ( struct objdb_iface_ver0 *objdb, const char **error_string) { const char *error_reason = error_string_response; if (corosync_main_config_read_logging(objdb, error_string) < 0) { error_reason = *error_string; goto parse_error; } corosync_main_config_read_uidgid (objdb, error_string); add_logsys_config_notification(objdb); return 0; parse_error: snprintf (error_string_response, sizeof(error_string_response), "parse error in config: %s.\n", error_reason); *error_string = error_string_response; return (-1); } int corosync_main_config_compatibility_read ( struct objdb_iface_ver0 *objdb, enum cs_sync_mode *minimum_sync_mode, const char **error_string) { const char *error_reason = error_string_response; char *value; *minimum_sync_mode = CS_SYNC_V1; if (!objdb_get_string (objdb, OBJECT_PARENT_HANDLE, "compatibility", &value)) { if (strcmp (value, "whitetank") == 0) { *minimum_sync_mode = CS_SYNC_V1; } else if (strcmp (value, "none") == 0) { *minimum_sync_mode = CS_SYNC_V2; } else { snprintf (error_string_response, sizeof (error_string_response), "Invalid compatibility option '%s' specified, must be none or whitetank.\n", value); goto parse_error; } } return 0; parse_error: *error_string = error_reason; return (-1); } diff --git a/exec/service.c b/exec/service.c index 04353142..2b6fa358 100644 --- a/exec/service.c +++ b/exec/service.c @@ -1,728 +1,737 @@ /* * Copyright (c) 2006 MontaVista Software, Inc. * Copyright (c) 2006-2009 Red Hat, Inc. * * All rights reserved. * * Author: Steven Dake (sdake@redhat.com) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include "mainconfig.h" #include "util.h" #include #include "timer.h" #include #include #include "main.h" #include #include "service.h" #include #include LOGSYS_DECLARE_SUBSYS ("SERV"); struct default_service { const char *name; int ver; }; static struct default_service default_services[] = { { .name = "corosync_evs", .ver = 0, }, { .name = "corosync_cfg", .ver = 0, }, { .name = "corosync_cpg", .ver = 0, }, { .name = "corosync_confdb", .ver = 0, }, { .name = "corosync_pload", .ver = 0, }, #ifdef HAVE_MONITORING { .name = "corosync_mon", .ver = 0, }, #endif #ifdef HAVE_WATCHDOG { .name = "corosync_wd", .ver = 0, }, #endif { .name = "corosync_quorum", .ver = 0, } }; /* * service exit and unlink schedwrk handler data structure */ struct seus_handler_data { hdb_handle_t service_handle; int service_engine; struct corosync_api_v1 *api; }; struct corosync_service_engine *ais_service[SERVICE_HANDLER_MAXIMUM_COUNT]; hdb_handle_t service_stats_handle[SERVICE_HANDLER_MAXIMUM_COUNT][64]; int ais_service_exiting[SERVICE_HANDLER_MAXIMUM_COUNT]; static hdb_handle_t object_internal_configuration_handle; static hdb_handle_t object_stats_services_handle; static void (*service_unlink_all_complete) (void) = NULL; static unsigned int default_services_requested (struct corosync_api_v1 *corosync_api) { hdb_handle_t object_service_handle; hdb_handle_t object_find_handle; char *value; /* * Don't link default services if they have been disabled */ corosync_api->object_find_create ( OBJECT_PARENT_HANDLE, "aisexec", strlen ("aisexec"), &object_find_handle); if (corosync_api->object_find_next ( object_find_handle, &object_service_handle) == 0) { if ( ! corosync_api->object_key_get (object_service_handle, "defaultservices", strlen ("defaultservices"), (void *)&value, NULL)) { if (value && strcmp (value, "no") == 0) { return 0; } } } corosync_api->object_find_destroy (object_find_handle); return (-1); } unsigned int corosync_service_link_and_init ( struct corosync_api_v1 *corosync_api, const char *service_name, unsigned int service_ver) { struct corosync_service_engine_iface_ver0 *iface_ver0; void *iface_ver0_p; hdb_handle_t handle; struct corosync_service_engine *service; int res; hdb_handle_t object_service_handle; hdb_handle_t object_stats_handle; int fn; char object_name[32]; char *name_sufix; uint64_t zero_64 = 0; + void* _start; + void* _stop; /* * reference the service interface */ iface_ver0_p = NULL; res = lcr_ifact_reference ( &handle, service_name, service_ver, &iface_ver0_p, (void *)0); iface_ver0 = (struct corosync_service_engine_iface_ver0 *)iface_ver0_p; if (res == -1 || iface_ver0 == 0) { log_printf(LOGSYS_LEVEL_ERROR, "Service failed to load '%s'.\n", service_name); return (-1); } /* * Initialize service */ service = iface_ver0->corosync_get_service_engine_ver0(); ais_service[service->id] = service; + + /* begin */ + _start = lcr_ifact_addr_get(handle, "__start___verbose"); + _stop = lcr_ifact_addr_get(handle, "__stop___verbose"); + qb_log_callsites_register(_start, _stop); + /* end */ + if (service->config_init_fn) { res = service->config_init_fn (corosync_api); } if (service->exec_init_fn) { res = service->exec_init_fn (corosync_api); } /* * Store service in object database */ corosync_api->object_create (object_internal_configuration_handle, &object_service_handle, "service", strlen ("service")); corosync_api->object_key_create_typed (object_service_handle, "name", service_name, strlen (service_name) + 1, OBJDB_VALUETYPE_STRING); corosync_api->object_key_create_typed (object_service_handle, "ver", &service_ver, sizeof (service_ver), OBJDB_VALUETYPE_UINT32); res = corosync_api->object_key_create_typed (object_service_handle, "handle", &handle, sizeof (handle), OBJDB_VALUETYPE_UINT64); corosync_api->object_key_create_typed (object_service_handle, "service_id", &service->id, sizeof (service->id), OBJDB_VALUETYPE_UINT16); name_sufix = strrchr (service_name, '_'); if (name_sufix) name_sufix++; else name_sufix = (char*)service_name; corosync_api->object_create (object_stats_services_handle, &object_stats_handle, name_sufix, strlen (name_sufix)); corosync_api->object_key_create_typed (object_stats_handle, "service_id", &service->id, sizeof (service->id), OBJDB_VALUETYPE_INT16); for (fn = 0; fn < service->exec_engine_count; fn++) { snprintf (object_name, 32, "%d", fn); corosync_api->object_create (object_stats_handle, &service_stats_handle[service->id][fn], object_name, strlen (object_name)); corosync_api->object_key_create_typed (service_stats_handle[service->id][fn], "tx", &zero_64, sizeof (zero_64), OBJDB_VALUETYPE_UINT64); corosync_api->object_key_create_typed (service_stats_handle[service->id][fn], "rx", &zero_64, sizeof (zero_64), OBJDB_VALUETYPE_UINT64); } log_printf (LOGSYS_LEVEL_NOTICE, "Service engine loaded: %s [%d]\n", service->name, service->id); cs_ipcs_service_init(service); return (res); } static int service_priority_max(void) { int lpc = 0, max = 0; for(; lpc < SERVICE_HANDLER_MAXIMUM_COUNT; lpc++) { if(ais_service[lpc] != NULL && ais_service[lpc]->priority > max) { max = ais_service[lpc]->priority; } } return max; } /* * use the force */ static unsigned int corosync_service_unlink_priority ( struct corosync_api_v1 *corosync_api, int lowest_priority, int *current_priority, int *current_service_engine, hdb_handle_t *current_service_handle) { unsigned short *service_id; hdb_handle_t object_service_handle; hdb_handle_t object_find_handle; hdb_handle_t *found_service_handle; for(; *current_priority >= lowest_priority; *current_priority = *current_priority - 1) { for(*current_service_engine = 0; *current_service_engine < SERVICE_HANDLER_MAXIMUM_COUNT; *current_service_engine = *current_service_engine + 1) { if(ais_service[*current_service_engine] == NULL || ais_service[*current_service_engine]->priority != *current_priority) { continue; } /* * find service object in object database by service id * and unload it if possible. * * If the service engine's exec_exit_fn returns -1 indicating * it was busy, this function returns -1 and can be called again * at a later time (usually via the schedwrk api). */ corosync_api->object_find_create ( object_internal_configuration_handle, "service", strlen ("service"), &object_find_handle); while (corosync_api->object_find_next ( object_find_handle, &object_service_handle) == 0) { int res = corosync_api->object_key_get ( object_service_handle, "service_id", strlen ("service_id"), (void *)&service_id, NULL); if (res == 0 && *service_id == ais_service[*current_service_engine]->id) { if (ais_service[*service_id]->exec_exit_fn) { res = ais_service[*service_id]->exec_exit_fn (); if (res == -1) { corosync_api->object_find_destroy (object_find_handle); return (-1); } } res = corosync_api->object_key_get ( object_service_handle, "handle", strlen ("handle"), (void *)&found_service_handle, NULL); *current_service_handle = *found_service_handle; ais_service_exiting[*current_service_engine] = 1; corosync_api->object_find_destroy (object_find_handle); /* * Call should call this function again */ return (1); } } corosync_api->object_find_destroy (object_find_handle); } } /* * We finish unlink of all services -> no need to call this function again */ return (0); } static unsigned int service_unlink_and_exit ( struct corosync_api_v1 *corosync_api, const char *service_name, unsigned int service_ver) { hdb_handle_t object_service_handle; char *found_service_name; unsigned short *service_id; unsigned int *found_service_ver; hdb_handle_t object_find_handle; hdb_handle_t *found_service_handle; char *name_sufix; int res; name_sufix = strrchr (service_name, '_'); if (name_sufix) name_sufix++; else name_sufix = (char*)service_name; corosync_api->object_find_create ( object_stats_services_handle, name_sufix, strlen (name_sufix), &object_find_handle); if (corosync_api->object_find_next ( object_find_handle, &object_service_handle) == 0) { corosync_api->object_destroy (object_service_handle); } corosync_api->object_find_destroy (object_find_handle); corosync_api->object_find_create ( object_internal_configuration_handle, "service", strlen ("service"), &object_find_handle); while (corosync_api->object_find_next ( object_find_handle, &object_service_handle) == 0) { corosync_api->object_key_get (object_service_handle, "name", strlen ("name"), (void *)&found_service_name, NULL); if (strcmp (service_name, found_service_name) != 0) { continue; } corosync_api->object_key_get (object_service_handle, "ver", strlen ("ver"), (void *)&found_service_ver, NULL); /* * If service found and linked exit it */ if (service_ver != *found_service_ver) { continue; } corosync_api->object_key_get ( object_service_handle, "service_id", strlen ("service_id"), (void *)&service_id, NULL); if(service_id != NULL && *service_id < SERVICE_HANDLER_MAXIMUM_COUNT && ais_service[*service_id] != NULL) { corosync_api->object_find_destroy (object_find_handle); if (ais_service[*service_id]->exec_exit_fn) { res = ais_service[*service_id]->exec_exit_fn (); if (res == -1) { return (-1); } } log_printf(LOGSYS_LEVEL_NOTICE, "Service engine unloaded: %s\n", ais_service[*service_id]->name); ais_service[*service_id] = NULL; res = corosync_api->object_key_get ( object_service_handle, "handle", strlen ("handle"), (void *)&found_service_handle, NULL); cs_ipcs_service_destroy (*service_id); lcr_ifact_release (*found_service_handle); corosync_api->object_destroy (object_service_handle); } } corosync_api->object_find_destroy (object_find_handle); return (0); } /* * Links default services into the executive */ unsigned int corosync_service_defaults_link_and_init (struct corosync_api_v1 *corosync_api) { unsigned int i; hdb_handle_t object_service_handle; char *found_service_name; char *found_service_ver; unsigned int found_service_ver_atoi; hdb_handle_t object_find_handle; hdb_handle_t object_find2_handle; hdb_handle_t object_runtime_handle; corosync_api->object_find_create ( OBJECT_PARENT_HANDLE, "runtime", strlen ("runtime"), &object_find2_handle); if (corosync_api->object_find_next ( object_find2_handle, &object_runtime_handle) == 0) { corosync_api->object_create (object_runtime_handle, &object_stats_services_handle, "services", strlen ("services")); } corosync_api->object_create (OBJECT_PARENT_HANDLE, &object_internal_configuration_handle, "internal_configuration", strlen ("internal_configuration")); corosync_api->object_find_create ( OBJECT_PARENT_HANDLE, "service", strlen ("service"), &object_find_handle); while (corosync_api->object_find_next ( object_find_handle, &object_service_handle) == 0) { corosync_api->object_key_get (object_service_handle, "name", strlen ("name"), (void *)&found_service_name, NULL); found_service_ver = NULL; corosync_api->object_key_get (object_service_handle, "ver", strlen ("ver"), (void *)&found_service_ver, NULL); found_service_ver_atoi = (found_service_ver ? atoi (found_service_ver) : 0); corosync_service_link_and_init ( corosync_api, found_service_name, found_service_ver_atoi); } corosync_api->object_find_destroy (object_find_handle); if (default_services_requested (corosync_api) == 0) { return (0); } for (i = 0; i < sizeof (default_services) / sizeof (struct default_service); i++) { corosync_service_link_and_init ( corosync_api, default_services[i].name, default_services[i].ver); } return (0); } /* * Declaration of exit_schedwrk_handler, because of cycle * (service_exit_schedwrk_handler calls service_unlink_schedwrk_handler, and vice-versa) */ static void service_exit_schedwrk_handler (void *data); static void service_unlink_schedwrk_handler (void *data) { struct seus_handler_data *cb_data = (struct seus_handler_data *)data; /* * Exit all ipc connections dependent on this service */ if (cs_ipcs_service_destroy (cb_data->service_engine) == -1) { goto redo_this_function; } log_printf(LOGSYS_LEVEL_NOTICE, "Service engine unloaded: %s\n", ais_service[cb_data->service_engine]->name); ais_service[cb_data->service_engine] = NULL; lcr_ifact_release (cb_data->service_handle); qb_loop_job_add(cs_poll_handle_get(), QB_LOOP_HIGH, data, service_exit_schedwrk_handler); return; redo_this_function: qb_loop_job_add(cs_poll_handle_get(), QB_LOOP_HIGH, data, service_unlink_schedwrk_handler); } static void service_exit_schedwrk_handler (void *data) { int res; static int current_priority = 0; static int current_service_engine = 0; static int called = 0; struct seus_handler_data *cb_data = (struct seus_handler_data *)data; struct corosync_api_v1 *api = (struct corosync_api_v1 *)cb_data->api; hdb_handle_t service_handle; if (called == 0) { log_printf(LOGSYS_LEVEL_NOTICE, "Unloading all Corosync service engines.\n"); current_priority = service_priority_max (); called = 1; } res = corosync_service_unlink_priority ( api, 0, ¤t_priority, ¤t_service_engine, &service_handle); if (res == 0) { service_unlink_all_complete(); return; } if (res == 1) { cb_data->service_engine = current_service_engine; cb_data->service_handle = service_handle; qb_loop_job_add(cs_poll_handle_get(), QB_LOOP_HIGH, data, service_unlink_schedwrk_handler); return; } qb_loop_job_add(cs_poll_handle_get(), QB_LOOP_HIGH, data, service_exit_schedwrk_handler); } void corosync_service_unlink_all ( struct corosync_api_v1 *api, void (*unlink_all_complete) (void)) { static int called = 0; static struct seus_handler_data cb_data; assert (api); service_unlink_all_complete = unlink_all_complete; if (called) { return; } if (called == 0) { called = 1; } cb_data.api = api; qb_loop_job_add(cs_poll_handle_get(), QB_LOOP_HIGH, &cb_data, service_exit_schedwrk_handler); } struct service_unlink_and_exit_data { hdb_handle_t handle; struct corosync_api_v1 *api; const char *name; unsigned int ver; }; static void service_unlink_and_exit_schedwrk_handler (void *data) { struct service_unlink_and_exit_data *service_unlink_and_exit_data = data; int res; res = service_unlink_and_exit ( service_unlink_and_exit_data->api, service_unlink_and_exit_data->name, service_unlink_and_exit_data->ver); if (res == 0) { free (service_unlink_and_exit_data); } else { qb_loop_job_add(cs_poll_handle_get(), QB_LOOP_HIGH, data, service_unlink_and_exit_schedwrk_handler); } } typedef int (*schedwrk_cast) (const void *); unsigned int corosync_service_unlink_and_exit ( struct corosync_api_v1 *api, const char *service_name, unsigned int service_ver) { struct service_unlink_and_exit_data *service_unlink_and_exit_data; assert (api); service_unlink_and_exit_data = malloc (sizeof (struct service_unlink_and_exit_data)); service_unlink_and_exit_data->api = api; service_unlink_and_exit_data->name = strdup (service_name); service_unlink_and_exit_data->ver = service_ver; qb_loop_job_add(cs_poll_handle_get(), QB_LOOP_HIGH, service_unlink_and_exit_data, service_unlink_and_exit_schedwrk_handler); return (0); } diff --git a/exec/syncv2.c b/exec/syncv2.c index efa37785..f9eebacf 100644 --- a/exec/syncv2.c +++ b/exec/syncv2.c @@ -1,624 +1,624 @@ /* * Copyright (c) 2009 Red Hat, Inc. * * All rights reserved. * * Author: Steven Dake (sdake@redhat.com) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "schedwrk.h" #include "quorum.h" #include "sync.h" #include "syncv2.h" LOGSYS_DECLARE_SUBSYS ("SYNCV2"); #define MESSAGE_REQ_SYNC_BARRIER 0 #define MESSAGE_REQ_SYNC_SERVICE_BUILD 1 #define MESSAGE_REQ_SYNC_MEMB_DETERMINE 2 enum sync_process_state { INIT, PROCESS, ACTIVATE }; enum sync_state { SYNC_SERVICELIST_BUILD, SYNC_PROCESS, SYNC_BARRIER }; struct service_entry { int service_id; int api_version; union sync_init_api sync_init_api; void (*sync_abort) (void); int (*sync_process) (void); void (*sync_activate) (void); enum sync_process_state state; char name[128]; }; struct processor_entry { int nodeid; int received; }; struct req_exec_memb_determine_message { struct qb_ipc_request_header header __attribute__((aligned(8))); struct memb_ring_id ring_id __attribute__((aligned(8))); }; struct req_exec_service_build_message { struct qb_ipc_request_header header __attribute__((aligned(8))); struct memb_ring_id ring_id __attribute__((aligned(8))); int service_list_entries __attribute__((aligned(8))); int service_list[128] __attribute__((aligned(8))); }; struct req_exec_barrier_message { struct qb_ipc_request_header header __attribute__((aligned(8))); struct memb_ring_id ring_id __attribute__((aligned(8))); }; static enum sync_state my_state = SYNC_BARRIER; static struct memb_ring_id my_ring_id; static struct memb_ring_id my_memb_determine_ring_id; static int my_memb_determine = 0; static unsigned int my_memb_determine_list[PROCESSOR_COUNT_MAX]; static unsigned int my_memb_determine_list_entries = 0; static int my_processing_idx = 0; static hdb_handle_t my_schedwrk_handle; static struct processor_entry my_processor_list[PROCESSOR_COUNT_MAX]; static unsigned int my_member_list[PROCESSOR_COUNT_MAX]; static unsigned int my_trans_list[PROCESSOR_COUNT_MAX]; static size_t my_member_list_entries = 0; static size_t my_trans_list_entries = 0; static int my_processor_list_entries = 0; static struct service_entry my_service_list[128]; static int my_service_list_entries = 0; static const struct memb_ring_id sync_ring_id; static struct service_entry my_initial_service_list[PROCESSOR_COUNT_MAX]; static int my_initial_service_list_entries; static void (*sync_synchronization_completed) (void); static void sync_deliver_fn ( unsigned int nodeid, const void *msg, unsigned int msg_len, int endian_conversion_required); static int schedwrk_processor (const void *context); static void sync_process_enter (void); static struct totempg_group sync_group = { .group = "syncv2", .group_len = 6 }; static hdb_handle_t sync_group_handle; int sync_v2_init ( int (*sync_callbacks_retrieve) ( int service_id, struct sync_callbacks *callbacks), void (*synchronization_completed) (void)) { unsigned int res; int i; struct sync_callbacks sync_callbacks; res = totempg_groups_initialize ( &sync_group_handle, sync_deliver_fn, NULL); if (res == -1) { log_printf (LOGSYS_LEVEL_ERROR, - "Couldn't initialize groups interface.\n"); + "Couldn't initialize groups interface."); return (-1); } res = totempg_groups_join ( sync_group_handle, &sync_group, 1); if (res == -1) { log_printf (LOGSYS_LEVEL_ERROR, "Couldn't join group.\n"); return (-1); } sync_synchronization_completed = synchronization_completed; for (i = 0; i < 64; i++) { res = sync_callbacks_retrieve (i, &sync_callbacks); if (res == -1) { continue; } if (sync_callbacks.sync_init_api.sync_init_v1 == NULL) { continue; } my_initial_service_list[my_initial_service_list_entries].state = INIT; my_initial_service_list[my_initial_service_list_entries].service_id = i; strcpy (my_initial_service_list[my_initial_service_list_entries].name, sync_callbacks.name); my_initial_service_list[my_initial_service_list_entries].api_version = sync_callbacks.api_version; my_initial_service_list[my_initial_service_list_entries].sync_init_api = sync_callbacks.sync_init_api; my_initial_service_list[my_initial_service_list_entries].sync_process = sync_callbacks.sync_process; my_initial_service_list[my_initial_service_list_entries].sync_abort = sync_callbacks.sync_abort; my_initial_service_list[my_initial_service_list_entries].sync_activate = sync_callbacks.sync_activate; my_initial_service_list_entries += 1; } return (0); } static void sync_barrier_handler (unsigned int nodeid, const void *msg) { const struct req_exec_barrier_message *req_exec_barrier_message = msg; int i; int barrier_reached = 1; if (memcmp (&my_ring_id, &req_exec_barrier_message->ring_id, sizeof (struct memb_ring_id)) != 0) { log_printf (LOGSYS_LEVEL_DEBUG, "barrier for old ring - discarding\n"); return; } for (i = 0; i < my_processor_list_entries; i++) { if (my_processor_list[i].nodeid == nodeid) { my_processor_list[i].received = 1; } } for (i = 0; i < my_processor_list_entries; i++) { if (my_processor_list[i].received == 0) { barrier_reached = 0; } } if (barrier_reached) { log_printf (LOGSYS_LEVEL_DEBUG, "Committing synchronization for %s\n", my_service_list[my_processing_idx].name); my_service_list[my_processing_idx].state = ACTIVATE; my_service_list[my_processing_idx].sync_activate (); my_processing_idx += 1; if (my_service_list_entries == my_processing_idx) { my_memb_determine_list_entries = 0; sync_synchronization_completed (); } else { sync_process_enter (); } } } static void dummy_sync_init ( const unsigned int *member_list, size_t member_list_entries, const struct memb_ring_id *ring_id) { } static void dummy_sync_abort (void) { } static int dummy_sync_process (void) { return (0); } static void dummy_sync_activate (void) { } static int service_entry_compare (const void *a, const void *b) { const struct service_entry *service_entry_a = a; const struct service_entry *service_entry_b = b; return (service_entry_a->service_id > service_entry_b->service_id); } static void sync_memb_determine (unsigned int nodeid, const void *msg) { const struct req_exec_memb_determine_message *req_exec_memb_determine_message = msg; int found = 0; int i; if (memcmp (&req_exec_memb_determine_message->ring_id, &my_memb_determine_ring_id, sizeof (struct memb_ring_id)) != 0) { log_printf (LOGSYS_LEVEL_DEBUG, "memb determine for old ring - discarding\n"); return; } my_memb_determine = 1; for (i = 0; i < my_memb_determine_list_entries; i++) { if (my_memb_determine_list[i] == nodeid) { found = 1; } } if (found == 0) { my_memb_determine_list[my_memb_determine_list_entries] = nodeid; my_memb_determine_list_entries += 1; } } static void sync_service_build_handler (unsigned int nodeid, const void *msg) { const struct req_exec_service_build_message *req_exec_service_build_message = msg; int i, j; int barrier_reached = 1; int found; int qsort_trigger = 0; if (memcmp (&my_ring_id, &req_exec_service_build_message->ring_id, sizeof (struct memb_ring_id)) != 0) { log_printf (LOGSYS_LEVEL_DEBUG, "service build for old ring - discarding\n"); return; } for (i = 0; i < req_exec_service_build_message->service_list_entries; i++) { found = 0; for (j = 0; j < my_service_list_entries; j++) { if (req_exec_service_build_message->service_list[i] == my_service_list[j].service_id) { found = 1; break; } } if (found == 0) { my_service_list[my_service_list_entries].state = INIT; my_service_list[my_service_list_entries].service_id = req_exec_service_build_message->service_list[i]; sprintf (my_service_list[my_service_list_entries].name, "External Service (id = %d)\n", req_exec_service_build_message->service_list[i]); my_service_list[my_service_list_entries].api_version = 1; my_service_list[my_service_list_entries].sync_init_api.sync_init_v1 = dummy_sync_init; my_service_list[my_service_list_entries].sync_abort = dummy_sync_abort; my_service_list[my_service_list_entries].sync_process = dummy_sync_process; my_service_list[my_service_list_entries].sync_activate = dummy_sync_activate; my_service_list_entries += 1; qsort_trigger = 1; } } if (qsort_trigger) { qsort (my_service_list, my_service_list_entries, sizeof (struct service_entry), service_entry_compare); } for (i = 0; i < my_processor_list_entries; i++) { if (my_processor_list[i].nodeid == nodeid) { my_processor_list[i].received = 1; } } for (i = 0; i < my_processor_list_entries; i++) { if (my_processor_list[i].received == 0) { barrier_reached = 0; } } if (barrier_reached) { sync_process_enter (); } } static void sync_deliver_fn ( unsigned int nodeid, const void *msg, unsigned int msg_len, int endian_conversion_required) { struct qb_ipc_request_header *header = (struct qb_ipc_request_header *)msg; switch (header->id) { case MESSAGE_REQ_SYNC_BARRIER: sync_barrier_handler (nodeid, msg); break; case MESSAGE_REQ_SYNC_SERVICE_BUILD: sync_service_build_handler (nodeid, msg); break; case MESSAGE_REQ_SYNC_MEMB_DETERMINE: sync_memb_determine (nodeid, msg); break; } } static void memb_determine_message_transmit (void) { struct iovec iovec; struct req_exec_memb_determine_message req_exec_memb_determine_message; req_exec_memb_determine_message.header.size = sizeof (struct req_exec_memb_determine_message); req_exec_memb_determine_message.header.id = MESSAGE_REQ_SYNC_MEMB_DETERMINE; memcpy (&req_exec_memb_determine_message.ring_id, &my_memb_determine_ring_id, sizeof (struct memb_ring_id)); iovec.iov_base = (char *)&req_exec_memb_determine_message; iovec.iov_len = sizeof (req_exec_memb_determine_message); (void)totempg_groups_mcast_joined (sync_group_handle, &iovec, 1, TOTEMPG_AGREED); } static void barrier_message_transmit (void) { struct iovec iovec; struct req_exec_barrier_message req_exec_barrier_message; req_exec_barrier_message.header.size = sizeof (struct req_exec_barrier_message); req_exec_barrier_message.header.id = MESSAGE_REQ_SYNC_BARRIER; memcpy (&req_exec_barrier_message.ring_id, &my_ring_id, sizeof (struct memb_ring_id)); iovec.iov_base = (char *)&req_exec_barrier_message; iovec.iov_len = sizeof (req_exec_barrier_message); (void)totempg_groups_mcast_joined (sync_group_handle, &iovec, 1, TOTEMPG_AGREED); } static void service_build_message_transmit (struct req_exec_service_build_message *service_build_message) { struct iovec iovec; service_build_message->header.size = sizeof (struct req_exec_service_build_message); service_build_message->header.id = MESSAGE_REQ_SYNC_SERVICE_BUILD; memcpy (&service_build_message->ring_id, &my_ring_id, sizeof (struct memb_ring_id)); iovec.iov_base = (void *)service_build_message; iovec.iov_len = sizeof (struct req_exec_service_build_message); (void)totempg_groups_mcast_joined (sync_group_handle, &iovec, 1, TOTEMPG_AGREED); } static void sync_barrier_enter (void) { my_state = SYNC_BARRIER; barrier_message_transmit (); } static void sync_process_enter (void) { int i; my_state = SYNC_PROCESS; /* * No syncv2 services */ if (my_service_list_entries == 0) { my_state = SYNC_SERVICELIST_BUILD; my_memb_determine_list_entries = 0; sync_synchronization_completed (); return; } for (i = 0; i < my_processor_list_entries; i++) { my_processor_list[i].received = 0; } schedwrk_create (&my_schedwrk_handle, schedwrk_processor, NULL); } static void sync_servicelist_build_enter ( const unsigned int *member_list, size_t member_list_entries, const struct memb_ring_id *ring_id) { struct req_exec_service_build_message service_build; int i; my_state = SYNC_SERVICELIST_BUILD; for (i = 0; i < member_list_entries; i++) { my_processor_list[i].nodeid = member_list[i]; my_processor_list[i].received = 0; } my_processor_list_entries = member_list_entries; memcpy (my_member_list, member_list, member_list_entries * sizeof (unsigned int)); my_member_list_entries = member_list_entries; my_processing_idx = 0; memcpy (my_service_list, my_initial_service_list, sizeof (struct service_entry) * my_initial_service_list_entries); my_service_list_entries = my_initial_service_list_entries; for (i = 0; i < my_initial_service_list[i].service_id; i++) { service_build.service_list[i] = my_initial_service_list[i].service_id; } service_build.service_list_entries = i; service_build_message_transmit (&service_build); } static int schedwrk_processor (const void *context) { int res = 0; if (my_service_list[my_processing_idx].state == INIT) { my_service_list[my_processing_idx].state = PROCESS; if (my_service_list[my_processing_idx].api_version == 1) { my_service_list[my_processing_idx].sync_init_api.sync_init_v1 (my_member_list, my_member_list_entries, &my_ring_id); } else { unsigned int old_trans_list[PROCESSOR_COUNT_MAX]; size_t old_trans_list_entries = 0; int o, m; memcpy (old_trans_list, my_trans_list, my_trans_list_entries * sizeof (unsigned int)); old_trans_list_entries = my_trans_list_entries; my_trans_list_entries = 0; for (o = 0; o < old_trans_list_entries; o++) { for (m = 0; m < my_member_list_entries; m++) { if (old_trans_list[o] == my_member_list[m]) { my_trans_list[my_trans_list_entries] = my_member_list[m]; my_trans_list_entries++; break; } } } my_service_list[my_processing_idx].sync_init_api.sync_init_v2 (my_trans_list, my_trans_list_entries, my_member_list, my_member_list_entries, &my_ring_id); } } if (my_service_list[my_processing_idx].state == PROCESS) { my_service_list[my_processing_idx].state = PROCESS; res = my_service_list[my_processing_idx].sync_process (); if (res == 0) { sync_barrier_enter(); } else { return (-1); } } return (0); } void sync_v2_start ( const unsigned int *member_list, size_t member_list_entries, const struct memb_ring_id *ring_id) { ENTER(); memcpy (&my_ring_id, ring_id, sizeof (struct memb_ring_id)); if (my_memb_determine) { my_memb_determine = 0; sync_servicelist_build_enter (my_memb_determine_list, my_memb_determine_list_entries, ring_id); } else { sync_servicelist_build_enter (member_list, member_list_entries, ring_id); } } void sync_v2_save_transitional ( const unsigned int *member_list, size_t member_list_entries, const struct memb_ring_id *ring_id) { ENTER(); memcpy (my_trans_list, member_list, member_list_entries * sizeof (unsigned int)); my_trans_list_entries = member_list_entries; } void sync_v2_abort (void) { ENTER(); if (my_state == SYNC_PROCESS) { schedwrk_destroy (my_schedwrk_handle); my_service_list[my_processing_idx].sync_abort (); } /* this will cause any "old" barrier messages from causing * problems. */ memset (&my_ring_id, 0, sizeof (struct memb_ring_id)); } void sync_v2_memb_list_determine (const struct memb_ring_id *ring_id) { ENTER(); memcpy (&my_memb_determine_ring_id, ring_id, sizeof (struct memb_ring_id)); memb_determine_message_transmit (); } void sync_v2_memb_list_abort (void) { ENTER(); my_memb_determine_list_entries = 0; memset (&my_memb_determine_ring_id, 0, sizeof (struct memb_ring_id)); } diff --git a/exec/totemconfig.c b/exec/totemconfig.c index 6bc3494a..224ff316 100644 --- a/exec/totemconfig.c +++ b/exec/totemconfig.c @@ -1,957 +1,957 @@ /* * Copyright (c) 2002-2005 MontaVista Software, Inc. * Copyright (c) 2006-2010 Red Hat, Inc. * * All rights reserved. * * Author: Steven Dake (sdake@redhat.com) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef HAVE_LIBNSS #include #include #include #include #endif #include "util.h" #include "totemconfig.h" #define TOKEN_RETRANSMITS_BEFORE_LOSS_CONST 4 #define TOKEN_TIMEOUT 1000 #define TOKEN_RETRANSMIT_TIMEOUT (int)(TOKEN_TIMEOUT / (TOKEN_RETRANSMITS_BEFORE_LOSS_CONST + 0.2)) #define TOKEN_HOLD_TIMEOUT (int)(TOKEN_RETRANSMIT_TIMEOUT * 0.8 - (1000/(int)HZ)) #define JOIN_TIMEOUT 50 #define MERGE_TIMEOUT 200 #define DOWNCHECK_TIMEOUT 1000 #define FAIL_TO_RECV_CONST 2500 #define SEQNO_UNCHANGED_CONST 30 #define MINIMUM_TIMEOUT (int)(1000/HZ)*3 #define MAX_NETWORK_DELAY 50 #define WINDOW_SIZE 50 #define MAX_MESSAGES 17 #define MISS_COUNT_CONST 5 #define RRP_PROBLEM_COUNT_TIMEOUT 2000 #define RRP_PROBLEM_COUNT_THRESHOLD_DEFAULT 10 #define RRP_PROBLEM_COUNT_THRESHOLD_MIN 5 #define RRP_AUTORECOVERY_CHECK_TIMEOUT 1000 static char error_string_response[512]; static struct objdb_iface_ver0 *global_objdb; static void add_totem_config_notification( struct objdb_iface_ver0 *objdb, struct totem_config *totem_config, hdb_handle_t totem_object_handle); /* These just makes the code below a little neater */ static inline int objdb_get_string ( const struct objdb_iface_ver0 *objdb, hdb_handle_t object_service_handle, const char *key, const char **value) { int res; *value = NULL; if ( !(res = objdb->object_key_get (object_service_handle, key, strlen (key), (void *)value, NULL))) { if (*value) { return 0; } } return -1; } static inline void objdb_get_int ( const struct objdb_iface_ver0 *objdb, hdb_handle_t object_service_handle, const char *key, unsigned int *intvalue) { char *value = NULL; if (!objdb->object_key_get (object_service_handle, key, strlen (key), (void *)&value, NULL)) { if (value) { *intvalue = atoi(value); } } } static unsigned int totem_handle_find ( struct objdb_iface_ver0 *objdb, hdb_handle_t *totem_find_handle) { hdb_handle_t object_find_handle; unsigned int res; /* * Find a network section */ objdb->object_find_create ( OBJECT_PARENT_HANDLE, "network", strlen ("network"), &object_find_handle); res = objdb->object_find_next ( object_find_handle, totem_find_handle); objdb->object_find_destroy (object_find_handle); /* * Network section not found in configuration, checking for totem */ if (res == -1) { objdb->object_find_create ( OBJECT_PARENT_HANDLE, "totem", strlen ("totem"), &object_find_handle); res = objdb->object_find_next ( object_find_handle, totem_find_handle); objdb->object_find_destroy (object_find_handle); } if (res == -1) { return (-1); } return (0); } static void totem_volatile_config_read ( struct objdb_iface_ver0 *objdb, struct totem_config *totem_config, hdb_handle_t object_totem_handle) { objdb_get_int (objdb,object_totem_handle, "token", &totem_config->token_timeout); objdb_get_int (objdb,object_totem_handle, "token_retransmit", &totem_config->token_retransmit_timeout); objdb_get_int (objdb,object_totem_handle, "hold", &totem_config->token_hold_timeout); objdb_get_int (objdb,object_totem_handle, "token_retransmits_before_loss_const", &totem_config->token_retransmits_before_loss_const); objdb_get_int (objdb,object_totem_handle, "join", &totem_config->join_timeout); objdb_get_int (objdb,object_totem_handle, "send_join", &totem_config->send_join_timeout); objdb_get_int (objdb,object_totem_handle, "consensus", &totem_config->consensus_timeout); objdb_get_int (objdb,object_totem_handle, "merge", &totem_config->merge_timeout); objdb_get_int (objdb,object_totem_handle, "downcheck", &totem_config->downcheck_timeout); objdb_get_int (objdb,object_totem_handle, "fail_recv_const", &totem_config->fail_to_recv_const); objdb_get_int (objdb,object_totem_handle, "seqno_unchanged_const", &totem_config->seqno_unchanged_const); objdb_get_int (objdb,object_totem_handle, "rrp_token_expired_timeout", &totem_config->rrp_token_expired_timeout); objdb_get_int (objdb,object_totem_handle, "rrp_problem_count_timeout", &totem_config->rrp_problem_count_timeout); objdb_get_int (objdb,object_totem_handle, "rrp_problem_count_threshold", &totem_config->rrp_problem_count_threshold); objdb_get_int (objdb,object_totem_handle, "rrp_autorecovery_check_timeout", &totem_config->rrp_autorecovery_check_timeout); objdb_get_int (objdb,object_totem_handle, "heartbeat_failures_allowed", &totem_config->heartbeat_failures_allowed); objdb_get_int (objdb,object_totem_handle, "max_network_delay", &totem_config->max_network_delay); objdb_get_int (objdb,object_totem_handle, "window_size", &totem_config->window_size); (void)objdb_get_string (objdb, object_totem_handle, "vsftype", &totem_config->vsf_type); objdb_get_int (objdb,object_totem_handle, "max_messages", &totem_config->max_messages); objdb_get_int (objdb,object_totem_handle, "miss_count_const", &totem_config->miss_count_const); } static void totem_get_crypto_type( const struct objdb_iface_ver0 *objdb, hdb_handle_t object_totem_handle, struct totem_config *totem_config) { const char *str; totem_config->crypto_accept = TOTEM_CRYPTO_ACCEPT_OLD; if (!objdb_get_string (objdb, object_totem_handle, "crypto_accept", &str)) { if (strcmp(str, "new") == 0) { totem_config->crypto_accept = TOTEM_CRYPTO_ACCEPT_NEW; } } totem_config->crypto_type = TOTEM_CRYPTO_SOBER; #ifdef HAVE_LIBNSS /* * We must set these even if the key does not exist. * Encryption type can be set on-the-fly using CFG */ totem_config->crypto_crypt_type = CKM_AES_CBC_PAD; totem_config->crypto_sign_type = CKM_SHA256_RSA_PKCS; #endif if (!objdb_get_string (objdb, object_totem_handle, "crypto_type", &str)) { if (strcmp(str, "sober") == 0) { return; } #ifdef HAVE_LIBNSS if (strcmp(str, "nss") == 0) { totem_config->crypto_type = TOTEM_CRYPTO_NSS; } #endif } } extern int totem_config_read ( struct objdb_iface_ver0 *objdb, struct totem_config *totem_config, const char **error_string) { int res = 0; hdb_handle_t object_totem_handle; hdb_handle_t object_interface_handle; hdb_handle_t object_member_handle; const char *str; unsigned int ringnumber = 0; hdb_handle_t object_find_interface_handle; hdb_handle_t object_find_member_handle; const char *transport_type; int member_count = 0; res = totem_handle_find (objdb, &object_totem_handle); if (res == -1) { printf ("couldn't find totem handle\n"); return (-1); } memset (totem_config, 0, sizeof (struct totem_config)); totem_config->interfaces = malloc (sizeof (struct totem_interface) * INTERFACE_MAX); if (totem_config->interfaces == 0) { *error_string = "Out of memory trying to allocate ethernet interface storage area"; return -1; } memset (totem_config->interfaces, 0, sizeof (struct totem_interface) * INTERFACE_MAX); totem_config->secauth = 1; strcpy (totem_config->rrp_mode, "none"); if (!objdb_get_string (objdb, object_totem_handle, "version", &str)) { if (strcmp (str, "2") == 0) { totem_config->version = 2; } } if (!objdb_get_string (objdb, object_totem_handle, "secauth", &str)) { if (strcmp (str, "on") == 0) { totem_config->secauth = 1; } if (strcmp (str, "off") == 0) { totem_config->secauth = 0; } } if (totem_config->secauth == 1) { totem_get_crypto_type(objdb, object_totem_handle, totem_config); } if (!objdb_get_string (objdb, object_totem_handle, "rrp_mode", &str)) { strcpy (totem_config->rrp_mode, str); } /* * Get interface node id */ objdb_get_int (objdb, object_totem_handle, "nodeid", &totem_config->node_id); totem_config->clear_node_high_bit = 0; if (!objdb_get_string (objdb,object_totem_handle, "clear_node_high_bit", &str)) { if (strcmp (str, "yes") == 0) { totem_config->clear_node_high_bit = 1; } } objdb_get_int (objdb,object_totem_handle, "threads", &totem_config->threads); objdb_get_int (objdb,object_totem_handle, "netmtu", &totem_config->net_mtu); /* * Get things that might change in the future */ totem_volatile_config_read (objdb, totem_config, object_totem_handle); objdb->object_find_create ( object_totem_handle, "interface", strlen ("interface"), &object_find_interface_handle); while (objdb->object_find_next ( object_find_interface_handle, &object_interface_handle) == 0) { member_count = 0; objdb_get_int (objdb, object_interface_handle, "ringnumber", &ringnumber); /* * Get interface multicast address */ if (!objdb_get_string (objdb, object_interface_handle, "mcastaddr", &str)) { res = totemip_parse (&totem_config->interfaces[ringnumber].mcast_addr, str, 0); } totem_config->broadcast_use = 0; if (!objdb_get_string (objdb, object_interface_handle, "broadcast", &str)) { if (strcmp (str, "yes") == 0) { totem_config->broadcast_use = 1; totemip_parse ( &totem_config->interfaces[ringnumber].mcast_addr, "255.255.255.255", 0); } } /* * Get mcast port */ if (!objdb_get_string (objdb, object_interface_handle, "mcastport", &str)) { totem_config->interfaces[ringnumber].ip_port = atoi (str); } /* * Get the bind net address */ if (!objdb_get_string (objdb, object_interface_handle, "bindnetaddr", &str)) { res = totemip_parse (&totem_config->interfaces[ringnumber].bindnet, str, totem_config->interfaces[ringnumber].mcast_addr.family); } /* * Get the TTL */ totem_config->interfaces[ringnumber].ttl = 1; if (!objdb_get_string (objdb, object_interface_handle, "ttl", &str)) { totem_config->interfaces[ringnumber].ttl = atoi (str); } objdb->object_find_create ( object_interface_handle, "member", strlen ("member"), &object_find_member_handle); while (objdb->object_find_next ( object_find_member_handle, &object_member_handle) == 0) { if (!objdb_get_string (objdb, object_member_handle, "memberaddr", &str)) { res = totemip_parse (&totem_config->interfaces[ringnumber].member_list[member_count++], str, 0); } } totem_config->interfaces[ringnumber].member_count = member_count; totem_config->interface_count++; } objdb->object_find_destroy (object_find_interface_handle); add_totem_config_notification(objdb, totem_config, object_totem_handle); totem_config->transport_number = TOTEM_TRANSPORT_UDP; (void)objdb_get_string (objdb, object_totem_handle, "transport", &transport_type); if (transport_type) { if (strcmp (transport_type, "udpu") == 0) { totem_config->transport_number = TOTEM_TRANSPORT_UDPU; } } if (transport_type) { if (strcmp (transport_type, "iba") == 0) { totem_config->transport_number = TOTEM_TRANSPORT_RDMA; } } return 0; } int totem_config_validate ( struct totem_config *totem_config, const char **error_string) { static char local_error_reason[512]; char parse_error[512]; const char *error_reason = local_error_reason; int i; unsigned int interface_max = INTERFACE_MAX; if (totem_config->interface_count == 0) { error_reason = "No interfaces defined"; goto parse_error; } for (i = 0; i < totem_config->interface_count; i++) { /* * Some error checking of parsed data to make sure its valid */ struct totem_ip_address null_addr; memset (&null_addr, 0, sizeof (struct totem_ip_address)); if ((totem_config->transport_number == 0) && memcmp (&totem_config->interfaces[i].mcast_addr, &null_addr, sizeof (struct totem_ip_address)) == 0) { error_reason = "No multicast address specified"; goto parse_error; } if (totem_config->interfaces[i].ip_port == 0) { error_reason = "No multicast port specified"; goto parse_error; } if (totem_config->interfaces[i].ttl > 255) { error_reason = "Invalid TTL (should be 0..255)"; goto parse_error; } if (totem_config->transport_number != TOTEM_TRANSPORT_UDP && totem_config->interfaces[i].ttl != 1) { error_reason = "Can only set ttl on multicast transport types"; goto parse_error; } if (totem_config->interfaces[i].mcast_addr.family == AF_INET6 && totem_config->node_id == 0) { error_reason = "An IPV6 network requires that a node ID be specified."; goto parse_error; } if (totem_config->broadcast_use == 0 && totem_config->transport_number == 0) { if (totem_config->interfaces[i].mcast_addr.family != totem_config->interfaces[i].bindnet.family) { error_reason = "Multicast address family does not match bind address family"; goto parse_error; } if (totem_config->interfaces[i].mcast_addr.family != totem_config->interfaces[i].bindnet.family) { error_reason = "Not all bind address belong to the same IP family"; goto parse_error; } if (totemip_is_mcast (&totem_config->interfaces[i].mcast_addr) != 0) { error_reason = "mcastaddr is not a correct multicast address."; goto parse_error; } } } if (totem_config->version != 2) { error_reason = "This totem parser can only parse version 2 configurations."; goto parse_error; } if (totem_config->token_retransmits_before_loss_const == 0) { totem_config->token_retransmits_before_loss_const = TOKEN_RETRANSMITS_BEFORE_LOSS_CONST; } /* * Setup timeout values that are not setup by user */ if (totem_config->token_timeout == 0) { totem_config->token_timeout = TOKEN_TIMEOUT; if (totem_config->token_retransmits_before_loss_const == 0) { totem_config->token_retransmits_before_loss_const = TOKEN_RETRANSMITS_BEFORE_LOSS_CONST; } if (totem_config->token_retransmit_timeout == 0) { totem_config->token_retransmit_timeout = (int)(totem_config->token_timeout / (totem_config->token_retransmits_before_loss_const + 0.2)); } if (totem_config->token_hold_timeout == 0) { totem_config->token_hold_timeout = (int)(totem_config->token_retransmit_timeout * 0.8 - (1000/HZ)); } } if (totem_config->max_network_delay == 0) { totem_config->max_network_delay = MAX_NETWORK_DELAY; } if (totem_config->max_network_delay < MINIMUM_TIMEOUT) { snprintf (local_error_reason, sizeof(local_error_reason), "The max_network_delay parameter (%d ms) may not be less then (%d ms).", totem_config->max_network_delay, MINIMUM_TIMEOUT); goto parse_error; } if (totem_config->window_size == 0) { totem_config->window_size = WINDOW_SIZE; } if (totem_config->max_messages == 0) { totem_config->max_messages = MAX_MESSAGES; } if (totem_config->miss_count_const == 0) { totem_config->miss_count_const = MISS_COUNT_CONST; } if (totem_config->token_timeout < MINIMUM_TIMEOUT) { snprintf (local_error_reason, sizeof(local_error_reason), "The token timeout parameter (%d ms) may not be less then (%d ms).", totem_config->token_timeout, MINIMUM_TIMEOUT); goto parse_error; } if (totem_config->token_retransmit_timeout == 0) { totem_config->token_retransmit_timeout = (int)(totem_config->token_timeout / (totem_config->token_retransmits_before_loss_const + 0.2)); } if (totem_config->token_hold_timeout == 0) { totem_config->token_hold_timeout = (int)(totem_config->token_retransmit_timeout * 0.8 - (1000/HZ)); } if (totem_config->token_retransmit_timeout < MINIMUM_TIMEOUT) { snprintf (local_error_reason, sizeof(local_error_reason), "The token retransmit timeout parameter (%d ms) may not be less then (%d ms).", totem_config->token_retransmit_timeout, MINIMUM_TIMEOUT); goto parse_error; } if (totem_config->token_hold_timeout == 0) { totem_config->token_hold_timeout = TOKEN_HOLD_TIMEOUT; } if (totem_config->token_hold_timeout < MINIMUM_TIMEOUT) { snprintf (local_error_reason, sizeof(local_error_reason), "The token hold timeout parameter (%d ms) may not be less then (%d ms).", totem_config->token_hold_timeout, MINIMUM_TIMEOUT); goto parse_error; } if (totem_config->join_timeout == 0) { totem_config->join_timeout = JOIN_TIMEOUT; } if (totem_config->join_timeout < MINIMUM_TIMEOUT) { snprintf (local_error_reason, sizeof(local_error_reason), "The join timeout parameter (%d ms) may not be less then (%d ms).", totem_config->join_timeout, MINIMUM_TIMEOUT); goto parse_error; } if (totem_config->consensus_timeout == 0) { totem_config->consensus_timeout = (int)(float)(1.2 * totem_config->token_timeout); } if (totem_config->consensus_timeout < MINIMUM_TIMEOUT) { snprintf (local_error_reason, sizeof(local_error_reason), "The consensus timeout parameter (%d ms) may not be less then (%d ms).", totem_config->consensus_timeout, MINIMUM_TIMEOUT); goto parse_error; } if (totem_config->merge_timeout == 0) { totem_config->merge_timeout = MERGE_TIMEOUT; } if (totem_config->merge_timeout < MINIMUM_TIMEOUT) { snprintf (local_error_reason, sizeof(local_error_reason), "The merge timeout parameter (%d ms) may not be less then (%d ms).", totem_config->merge_timeout, MINIMUM_TIMEOUT); goto parse_error; } if (totem_config->downcheck_timeout == 0) { totem_config->downcheck_timeout = DOWNCHECK_TIMEOUT; } if (totem_config->downcheck_timeout < MINIMUM_TIMEOUT) { snprintf (local_error_reason, sizeof(local_error_reason), "The downcheck timeout parameter (%d ms) may not be less then (%d ms).", totem_config->downcheck_timeout, MINIMUM_TIMEOUT); goto parse_error; } /* * RRP values validation */ if (strcmp (totem_config->rrp_mode, "none") && strcmp (totem_config->rrp_mode, "active") && strcmp (totem_config->rrp_mode, "passive")) { snprintf (local_error_reason, sizeof(local_error_reason), "The RRP mode \"%s\" specified is invalid. It must be none, active, or passive.\n", totem_config->rrp_mode); goto parse_error; } if (totem_config->rrp_problem_count_timeout == 0) { totem_config->rrp_problem_count_timeout = RRP_PROBLEM_COUNT_TIMEOUT; } if (totem_config->rrp_problem_count_timeout < MINIMUM_TIMEOUT) { snprintf (local_error_reason, sizeof(local_error_reason), "The RRP problem count timeout parameter (%d ms) may not be less then (%d ms).", totem_config->rrp_problem_count_timeout, MINIMUM_TIMEOUT); goto parse_error; } if (totem_config->rrp_problem_count_threshold == 0) { totem_config->rrp_problem_count_threshold = RRP_PROBLEM_COUNT_THRESHOLD_DEFAULT; } if (totem_config->rrp_problem_count_threshold < RRP_PROBLEM_COUNT_THRESHOLD_MIN) { snprintf (local_error_reason, sizeof(local_error_reason), "The RRP problem count threshold (%d problem count) may not be less then (%d problem count).", totem_config->rrp_problem_count_threshold, RRP_PROBLEM_COUNT_THRESHOLD_MIN); goto parse_error; } if (totem_config->rrp_token_expired_timeout == 0) { totem_config->rrp_token_expired_timeout = totem_config->token_retransmit_timeout; } if (totem_config->rrp_token_expired_timeout < MINIMUM_TIMEOUT) { snprintf (local_error_reason, sizeof(local_error_reason), "The RRP token expired timeout parameter (%d ms) may not be less then (%d ms).", totem_config->rrp_token_expired_timeout, MINIMUM_TIMEOUT); goto parse_error; } if (totem_config->rrp_autorecovery_check_timeout == 0) { totem_config->rrp_autorecovery_check_timeout = RRP_AUTORECOVERY_CHECK_TIMEOUT; } if (strcmp (totem_config->rrp_mode, "none") == 0) { interface_max = 1; } if (interface_max < totem_config->interface_count) { snprintf (parse_error, sizeof(parse_error), "%d is too many configured interfaces for the rrp_mode setting %s.", totem_config->interface_count, totem_config->rrp_mode); error_reason = parse_error; goto parse_error; } if (totem_config->fail_to_recv_const == 0) { totem_config->fail_to_recv_const = FAIL_TO_RECV_CONST; } if (totem_config->seqno_unchanged_const == 0) { totem_config->seqno_unchanged_const = SEQNO_UNCHANGED_CONST; } if (totem_config->net_mtu == 0) { totem_config->net_mtu = 1500; } if ((MESSAGE_QUEUE_MAX) < totem_config->max_messages) { snprintf (local_error_reason, sizeof(local_error_reason), "The max_messages parameter (%d messages) may not be greater then (%d messages).", totem_config->max_messages, MESSAGE_QUEUE_MAX); goto parse_error; } if (totem_config->threads > SEND_THREADS_MAX) { totem_config->threads = SEND_THREADS_MAX; } if (totem_config->secauth == 0) { totem_config->threads = 0; } if (totem_config->net_mtu > FRAME_SIZE_MAX) { error_reason = "This net_mtu parameter is greater then the maximum frame size"; goto parse_error; } if (totem_config->vsf_type == NULL) { totem_config->vsf_type = "none"; } return (0); parse_error: snprintf (error_string_response, sizeof(error_string_response), "parse error in config: %s\n", error_reason); *error_string = error_string_response; return (-1); } static int read_keyfile ( const char *key_location, struct totem_config *totem_config, const char **error_string) { int fd; int res; ssize_t expected_key_len = sizeof (totem_config->private_key); int saved_errno; char error_str[100]; const char *error_ptr; fd = open (key_location, O_RDONLY); if (fd == -1) { - LOGSYS_STRERROR_R (error_ptr, errno, error_str, sizeof(error_str)); + error_ptr = qb_strerror_r(errno, error_str, sizeof(error_str)); snprintf (error_string_response, sizeof(error_string_response), "Could not open %s: %s\n", key_location, error_ptr); goto parse_error; } res = read (fd, totem_config->private_key, expected_key_len); saved_errno = errno; close (fd); if (res == -1) { - LOGSYS_STRERROR_R (error_ptr, saved_errno, error_str, sizeof(error_str)); + error_ptr = qb_strerror_r (saved_errno, error_str, sizeof(error_str)); snprintf (error_string_response, sizeof(error_string_response), "Could not read %s: %s\n", key_location, error_ptr); goto parse_error; } totem_config->private_key_len = expected_key_len; if (res != expected_key_len) { snprintf (error_string_response, sizeof(error_string_response), "Could only read %d bits of 1024 bits from %s.\n", res * 8, key_location); goto parse_error; } return 0; parse_error: *error_string = error_string_response; return (-1); } int totem_config_keyread ( struct objdb_iface_ver0 *objdb, struct totem_config *totem_config, const char **error_string) { int got_key = 0; const char *key_location = NULL; hdb_handle_t object_totem_handle; int res; memset (totem_config->private_key, 0, 128); totem_config->private_key_len = 128; if (totem_config->secauth == 0) { return (0); } res = totem_handle_find (objdb, &object_totem_handle); if (res == -1) { return (-1); } /* objdb may store the location of the key file */ if (!objdb_get_string (objdb,object_totem_handle, "keyfile", &key_location) && key_location) { res = read_keyfile(key_location, totem_config, error_string); if (res) { goto key_error; } got_key = 1; } else { /* Or the key itself may be in the objdb */ char *key = NULL; size_t key_len; res = objdb->object_key_get (object_totem_handle, "key", strlen ("key"), (void *)&key, &key_len); if (res == 0 && key) { if (key_len > sizeof (totem_config->private_key)) { goto key_error; } memcpy(totem_config->private_key, key, key_len); totem_config->private_key_len = key_len; got_key = 1; } } /* In desperation we read the default filename */ if (!got_key) { const char *filename = getenv("COROSYNC_TOTEM_AUTHKEY_FILE"); if (!filename) filename = COROSYSCONFDIR "/authkey"; res = read_keyfile(filename, totem_config, error_string); if (res) goto key_error; } return (0); key_error: *error_string = error_string_response; return (-1); } static void totem_key_change_notify(object_change_type_t change_type, hdb_handle_t parent_object_handle, hdb_handle_t object_handle, const void *object_name_pt, size_t object_name_len, const void *key_name_pt, size_t key_len, const void *key_value_pt, size_t key_value_len, void *priv_data_pt) { struct totem_config *totem_config = priv_data_pt; if (memcmp(object_name_pt, "totem", object_name_len) == 0) totem_volatile_config_read(global_objdb, totem_config, object_handle); // CHECK } static void totem_objdb_reload_notify(objdb_reload_notify_type_t type, int flush, void *priv_data_pt) { struct totem_config *totem_config = priv_data_pt; hdb_handle_t totem_object_handle; if (totem_config == NULL) return; /* * A new totem {} key might exist, cancel the * existing notification at the start of reload, * and start a new one on the new object when * it's all settled. */ if (type == OBJDB_RELOAD_NOTIFY_START) { global_objdb->object_track_stop( totem_key_change_notify, NULL, NULL, NULL, totem_config); } if (type == OBJDB_RELOAD_NOTIFY_END || type == OBJDB_RELOAD_NOTIFY_FAILED) { if (!totem_handle_find(global_objdb, &totem_object_handle)) { global_objdb->object_track_start(totem_object_handle, 1, totem_key_change_notify, NULL, // object_create_notify, NULL, // object_destroy_notify, NULL, // object_reload_notify totem_config); // priv_data /* * Reload the configuration */ totem_volatile_config_read(global_objdb, totem_config, totem_object_handle); } else { log_printf(LOGSYS_LEVEL_ERROR, "totem objdb tracking stopped, cannot find totem{} handle on objdb\n"); } } } static void add_totem_config_notification( struct objdb_iface_ver0 *objdb, struct totem_config *totem_config, hdb_handle_t totem_object_handle) { global_objdb = objdb; objdb->object_track_start(totem_object_handle, 1, totem_key_change_notify, NULL, // object_create_notify, NULL, // object_destroy_notify, NULL, // object_reload_notify totem_config); // priv_data /* * Reload notify must be on the parent object */ objdb->object_track_start(OBJECT_PARENT_HANDLE, 1, NULL, // key_change_notify, NULL, // object_create_notify, NULL, // object_destroy_notify, totem_objdb_reload_notify, // object_reload_notify totem_config); // priv_data } diff --git a/exec/totemiba.c b/exec/totemiba.c index e3c7816b..2d8c6908 100644 --- a/exec/totemiba.c +++ b/exec/totemiba.c @@ -1,1556 +1,1555 @@ /* * Copyright (c) 2009 Red Hat, Inc. * * All rights reserved. * * Author: Steven Dake (sdake@redhat.com) * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define LOGSYS_UTILS_ONLY 1 #include #include "totemiba.h" #include "wthread.h" #define COMPLETION_QUEUE_ENTRIES 100 #define TOTAL_READ_POSTS 100 #define MAX_MTU_SIZE 4096 struct totemiba_instance { struct sockaddr bind_addr; struct sockaddr send_token_bind_addr; struct sockaddr mcast_addr; struct sockaddr token_addr; struct sockaddr local_mcast_bind_addr; struct totem_interface *totem_interface; struct totem_config *totem_config; void (*totemiba_iface_change_fn) ( void *context, const struct totem_ip_address *iface_address); void (*totemiba_deliver_fn) ( void *context, const void *msg, unsigned int msg_len); void (*totemiba_target_set_completed) ( void *context); void *rrp_context; qb_loop_timer_handle timer_netif_check_timeout; qb_loop_t *totemiba_poll_handle; struct totem_ip_address my_id; struct rdma_event_channel *mcast_channel; struct rdma_cm_id *mcast_cma_id; struct ibv_pd *mcast_pd; struct sockaddr mcast_dest_addr; uint32_t mcast_qpn; uint32_t mcast_qkey; struct ibv_ah *mcast_ah; struct ibv_comp_channel *mcast_send_completion_channel; struct ibv_comp_channel *mcast_recv_completion_channel; struct ibv_cq *mcast_send_cq; struct ibv_cq *mcast_recv_cq; int recv_token_accepted; struct rdma_event_channel *recv_token_channel; struct rdma_event_channel *listen_recv_token_channel; struct rdma_cm_id *listen_recv_token_cma_id; struct rdma_cm_id *recv_token_cma_id; struct ibv_pd *recv_token_pd; struct sockaddr recv_token_dest_addr; struct ibv_comp_channel *recv_token_send_completion_channel; struct ibv_comp_channel *recv_token_recv_completion_channel; struct ibv_cq *recv_token_send_cq; struct ibv_cq *recv_token_recv_cq; int send_token_bound; struct rdma_event_channel *send_token_channel; struct rdma_cm_id *send_token_cma_id; struct ibv_pd *send_token_pd; struct sockaddr send_token_dest_addr; uint32_t send_token_qpn; uint32_t send_token_qkey; struct ibv_ah *send_token_ah; struct ibv_comp_channel *send_token_send_completion_channel; struct ibv_comp_channel *send_token_recv_completion_channel; struct ibv_cq *send_token_send_cq; struct ibv_cq *send_token_recv_cq; void (*totemiba_log_printf) ( unsigned int rec_ident, const char *function, const char *file, int line, const char *format, ...)__attribute__((format(printf, 5, 6))); int totemiba_subsys_id; struct list_head mcast_send_buf_free; struct list_head token_send_buf_free; struct list_head mcast_send_buf_head; struct list_head token_send_buf_head; struct list_head recv_token_recv_buf_head; }; union u { uint64_t wr_id; void *v; }; -#define log_printf(level, format, args...) \ -do { \ - instance->totemiba_log_printf ( \ - LOGSYS_ENCODE_RECID(level, \ - instance->totemiba_subsys_id, \ - LOGSYS_RECID_LOG), \ - __FUNCTION__, __FILE__, __LINE__, \ - (const char *)format, ##args); \ +#define log_printf(level, format, args...) \ +do { \ + instance->totemiba_log_printf ( \ + level, \ + instance->totemiba_subsys_id, \ + __FUNCTION__, __FILE__, __LINE__, \ + (const char *)format, ##args); \ } while (0); struct recv_buf { struct list_head list_all; struct ibv_recv_wr recv_wr; struct ibv_sge sge; struct ibv_mr *mr; char buffer[MAX_MTU_SIZE]; }; struct send_buf { struct list_head list_free; struct list_head list_all; struct ibv_mr *mr; char buffer[MAX_MTU_SIZE]; }; static hdb_handle_t void2wrid (void *v) { union u u; u.v = v; return u.wr_id; } static void * wrid2void (uint64_t wr_id) { union u u; u.wr_id = wr_id; return u.v; } static void totemiba_instance_initialize (struct totemiba_instance *instance) { memset (instance, 0, sizeof (struct totemiba_instance)); list_init (&instance->mcast_send_buf_free); list_init (&instance->token_send_buf_free); list_init (&instance->mcast_send_buf_head); list_init (&instance->token_send_buf_head); list_init (&instance->recv_token_recv_buf_head); } static inline struct send_buf *mcast_send_buf_get ( struct totemiba_instance *instance) { struct send_buf *send_buf; if (list_empty (&instance->mcast_send_buf_free) == 0) { send_buf = list_entry (instance->mcast_send_buf_free.next, struct send_buf, list_free); list_del (&send_buf->list_free); return (send_buf); } send_buf = malloc (sizeof (struct send_buf)); if (send_buf == NULL) { return (NULL); } send_buf->mr = ibv_reg_mr (instance->mcast_pd, send_buf->buffer, 2048, IBV_ACCESS_LOCAL_WRITE); if (send_buf->mr == NULL) { log_printf (LOGSYS_LEVEL_ERROR, "couldn't register memory range\n"); free (send_buf); return (NULL); } list_init (&send_buf->list_all); list_add_tail (&send_buf->list_all, &instance->mcast_send_buf_head); return (send_buf); } static inline void mcast_send_buf_put ( struct totemiba_instance *instance, struct send_buf *send_buf) { list_init (&send_buf->list_free); list_add_tail (&send_buf->list_free, &instance->mcast_send_buf_free); } static inline struct send_buf *token_send_buf_get ( struct totemiba_instance *instance) { struct send_buf *send_buf; if (list_empty (&instance->token_send_buf_free) == 0) { send_buf = list_entry (instance->token_send_buf_free.next, struct send_buf, list_free); list_del (&send_buf->list_free); return (send_buf); } send_buf = malloc (sizeof (struct send_buf)); if (send_buf == NULL) { return (NULL); } send_buf->mr = ibv_reg_mr (instance->send_token_pd, send_buf->buffer, 2048, IBV_ACCESS_LOCAL_WRITE); if (send_buf->mr == NULL) { log_printf (LOGSYS_LEVEL_ERROR, "couldn't register memory range\n"); free (send_buf); return (NULL); } list_init (&send_buf->list_all); list_add_tail (&send_buf->list_all, &instance->token_send_buf_head); return (send_buf); } static inline void token_send_buf_destroy (struct totemiba_instance *instance) { struct list_head *list; struct send_buf *send_buf; for (list = instance->token_send_buf_head.next; list != &instance->token_send_buf_head;) { send_buf = list_entry (list, struct send_buf, list_all); list = list->next; ibv_dereg_mr (send_buf->mr); free (send_buf); } list_init (&instance->token_send_buf_free); list_init (&instance->token_send_buf_head); } static inline void token_send_buf_put ( struct totemiba_instance *instance, struct send_buf *send_buf) { list_init (&send_buf->list_free); list_add_tail (&send_buf->list_free, &instance->token_send_buf_free); } static inline struct recv_buf *recv_token_recv_buf_create ( struct totemiba_instance *instance) { struct recv_buf *recv_buf; recv_buf = malloc (sizeof (struct recv_buf)); if (recv_buf == NULL) { return (NULL); } recv_buf->mr = ibv_reg_mr (instance->recv_token_pd, &recv_buf->buffer, 2048, IBV_ACCESS_LOCAL_WRITE); recv_buf->recv_wr.next = NULL; recv_buf->recv_wr.sg_list = &recv_buf->sge; recv_buf->recv_wr.num_sge = 1; recv_buf->recv_wr.wr_id = (uintptr_t)recv_buf; recv_buf->sge.length = 2048; recv_buf->sge.lkey = recv_buf->mr->lkey; recv_buf->sge.addr = (uintptr_t)recv_buf->buffer; list_init (&recv_buf->list_all); list_add (&recv_buf->list_all, &instance->recv_token_recv_buf_head); return (recv_buf); } static inline int recv_token_recv_buf_post (struct totemiba_instance *instance, struct recv_buf *recv_buf) { struct ibv_recv_wr *fail_recv; int res; res = ibv_post_recv (instance->recv_token_cma_id->qp, &recv_buf->recv_wr, &fail_recv); return (res); } static inline void recv_token_recv_buf_post_initial (struct totemiba_instance *instance) { struct recv_buf *recv_buf; unsigned int i; for (i = 0; i < TOTAL_READ_POSTS; i++) { recv_buf = recv_token_recv_buf_create (instance); recv_token_recv_buf_post (instance, recv_buf); } } static inline void recv_token_recv_buf_post_destroy ( struct totemiba_instance *instance) { struct recv_buf *recv_buf; struct list_head *list; for (list = instance->recv_token_recv_buf_head.next; list != &instance->recv_token_recv_buf_head;) { recv_buf = list_entry (list, struct recv_buf, list_all); list = list->next; ibv_dereg_mr (recv_buf->mr); free (recv_buf); } list_init (&instance->recv_token_recv_buf_head); } static inline struct recv_buf *mcast_recv_buf_create (struct totemiba_instance *instance) { struct recv_buf *recv_buf; struct ibv_mr *mr; recv_buf = malloc (sizeof (struct recv_buf)); if (recv_buf == NULL) { return (NULL); } mr = ibv_reg_mr (instance->mcast_pd, &recv_buf->buffer, 2048, IBV_ACCESS_LOCAL_WRITE); recv_buf->recv_wr.next = NULL; recv_buf->recv_wr.sg_list = &recv_buf->sge; recv_buf->recv_wr.num_sge = 1; recv_buf->recv_wr.wr_id = (uintptr_t)recv_buf; recv_buf->sge.length = 2048; recv_buf->sge.lkey = mr->lkey; recv_buf->sge.addr = (uintptr_t)recv_buf->buffer; return (recv_buf); } static inline int mcast_recv_buf_post (struct totemiba_instance *instance, struct recv_buf *recv_buf) { struct ibv_recv_wr *fail_recv; int res; res = ibv_post_recv (instance->mcast_cma_id->qp, &recv_buf->recv_wr, &fail_recv); return (res); } static inline void mcast_recv_buf_post_initial (struct totemiba_instance *instance) { struct recv_buf *recv_buf; unsigned int i; for (i = 0; i < TOTAL_READ_POSTS; i++) { recv_buf = mcast_recv_buf_create (instance); mcast_recv_buf_post (instance, recv_buf); } } static inline void iba_deliver_fn (struct totemiba_instance *instance, uint64_t wr_id, uint32_t bytes) { const char *addr; const struct recv_buf *recv_buf; recv_buf = wrid2void(wr_id); addr = &recv_buf->buffer[sizeof (struct ibv_grh)]; instance->totemiba_deliver_fn (instance->rrp_context, addr, bytes); } static int mcast_cq_send_event_fn (int events, int suck, void *context) { struct totemiba_instance *instance = (struct totemiba_instance *)context; struct ibv_wc wc[32]; struct ibv_cq *ev_cq; void *ev_ctx; int res; int i; ibv_get_cq_event (instance->mcast_send_completion_channel, &ev_cq, &ev_ctx); ibv_ack_cq_events (ev_cq, 1); res = ibv_req_notify_cq (ev_cq, 0); res = ibv_poll_cq (instance->mcast_send_cq, 32, wc); if (res > 0) { for (i = 0; i < res; i++) { mcast_send_buf_put (instance, wrid2void(wc[i].wr_id)); } } return (0); } static int mcast_cq_recv_event_fn (int events, int suck, void *context) { struct totemiba_instance *instance = (struct totemiba_instance *)context; struct ibv_wc wc[64]; struct ibv_cq *ev_cq; void *ev_ctx; int res; int i; ibv_get_cq_event (instance->mcast_recv_completion_channel, &ev_cq, &ev_ctx); ibv_ack_cq_events (ev_cq, 1); res = ibv_req_notify_cq (ev_cq, 0); res = ibv_poll_cq (instance->mcast_recv_cq, 64, wc); if (res > 0) { for (i = 0; i < res; i++) { iba_deliver_fn (instance, wc[i].wr_id, wc[i].byte_len); mcast_recv_buf_post (instance, wrid2void(wc[i].wr_id)); } } return (0); } static int mcast_rdma_event_fn (int events, int suck, void *context) { struct totemiba_instance *instance = (struct totemiba_instance *)context; struct rdma_cm_event *event; int res; res = rdma_get_cm_event (instance->mcast_channel, &event); if (res != 0) { return (0); } switch (event->event) { /* * occurs when we resolve the multicast address */ case RDMA_CM_EVENT_ADDR_RESOLVED: rdma_join_multicast (instance->mcast_cma_id, &instance->mcast_addr, instance); break; /* * occurs when the CM joins the multicast group */ case RDMA_CM_EVENT_MULTICAST_JOIN: instance->mcast_qpn = event->param.ud.qp_num; instance->mcast_qkey = event->param.ud.qkey; instance->mcast_ah = ibv_create_ah (instance->mcast_pd, &event->param.ud.ah_attr); instance->totemiba_iface_change_fn (instance->rrp_context, &instance->my_id); break; case RDMA_CM_EVENT_ADDR_ERROR: case RDMA_CM_EVENT_ROUTE_ERROR: case RDMA_CM_EVENT_MULTICAST_ERROR: log_printf (LOGSYS_LEVEL_ERROR, "multicast error\n"); break; case RDMA_CM_EVENT_DEVICE_REMOVAL: break; default: log_printf (LOGSYS_LEVEL_ERROR, "default %d\n", event->event); break; } rdma_ack_cm_event (event); return (0); } static int recv_token_cq_send_event_fn (hdb_handle_t poll_handle, int events, int suck, void *context) { struct totemiba_instance *instance = (struct totemiba_instance *)context; struct ibv_wc wc[32]; struct ibv_cq *ev_cq; void *ev_ctx; int res; int i; ibv_get_cq_event (instance->recv_token_send_completion_channel, &ev_cq, &ev_ctx); ibv_ack_cq_events (ev_cq, 1); res = ibv_req_notify_cq (ev_cq, 0); res = ibv_poll_cq (instance->recv_token_send_cq, 32, wc); if (res > 0) { for (i = 0; i < res; i++) { iba_deliver_fn (instance, wc[i].wr_id, wc[i].byte_len); ibv_dereg_mr (wrid2void(wc[i].wr_id)); } } return (0); } static int recv_token_cq_recv_event_fn (int events, int suck, void *context) { struct totemiba_instance *instance = (struct totemiba_instance *)context; struct ibv_wc wc[32]; struct ibv_cq *ev_cq; void *ev_ctx; int res; int i; ibv_get_cq_event (instance->recv_token_recv_completion_channel, &ev_cq, &ev_ctx); ibv_ack_cq_events (ev_cq, 1); res = ibv_req_notify_cq (ev_cq, 0); res = ibv_poll_cq (instance->recv_token_recv_cq, 32, wc); if (res > 0) { for (i = 0; i < res; i++) { iba_deliver_fn (instance, wc[i].wr_id, wc[i].byte_len); recv_token_recv_buf_post (instance, wrid2void(wc[i].wr_id)); } } return (0); } static int recv_token_accept_destroy (struct totemiba_instance *instance) { if (instance->recv_token_accepted == 0) { return (0); } rdma_destroy_qp (instance->recv_token_cma_id); recv_token_recv_buf_post_destroy (instance); ibv_destroy_cq (instance->recv_token_send_cq); ibv_destroy_cq (instance->recv_token_recv_cq); ibv_destroy_comp_channel (instance->recv_token_send_completion_channel); ibv_destroy_comp_channel (instance->recv_token_recv_completion_channel); ibv_dealloc_pd (instance->recv_token_pd); rdma_destroy_id (instance->recv_token_cma_id); qb_loop_poll_del ( instance->totemiba_poll_handle, instance->recv_token_recv_completion_channel->fd); qb_loop_poll_del ( instance->totemiba_poll_handle, instance->recv_token_send_completion_channel->fd); return (0); } static int recv_token_accept_setup (struct totemiba_instance *instance) { struct ibv_qp_init_attr init_qp_attr; int res = 0; /* * Allocate the protection domain */ instance->recv_token_pd = ibv_alloc_pd (instance->recv_token_cma_id->verbs); /* * Create a completion channel */ instance->recv_token_recv_completion_channel = ibv_create_comp_channel (instance->recv_token_cma_id->verbs); if (instance->recv_token_recv_completion_channel == NULL) { log_printf (LOGSYS_LEVEL_ERROR, "couldn't create completion channel\n"); return (-1); } /* * Create the completion queue */ instance->recv_token_recv_cq = ibv_create_cq (instance->recv_token_cma_id->verbs, COMPLETION_QUEUE_ENTRIES, instance, instance->recv_token_recv_completion_channel, 0); if (instance->recv_token_recv_cq == NULL) { log_printf (LOGSYS_LEVEL_ERROR, "couldn't create completion queue\n"); return (-1); } res = ibv_req_notify_cq (instance->recv_token_recv_cq, 0); if (res != 0) { log_printf (LOGSYS_LEVEL_ERROR, "couldn't request notifications of the completion queue\n"); return (-1); } /* * Create a completion channel */ instance->recv_token_send_completion_channel = ibv_create_comp_channel (instance->recv_token_cma_id->verbs); if (instance->recv_token_send_completion_channel == NULL) { log_printf (LOGSYS_LEVEL_ERROR, "couldn't create completion channel\n"); return (-1); } /* * Create the completion queue */ instance->recv_token_send_cq = ibv_create_cq (instance->recv_token_cma_id->verbs, COMPLETION_QUEUE_ENTRIES, instance, instance->recv_token_send_completion_channel, 0); if (instance->recv_token_send_cq == NULL) { log_printf (LOGSYS_LEVEL_ERROR, "couldn't create completion queue\n"); return (-1); } res = ibv_req_notify_cq (instance->recv_token_send_cq, 0); if (res != 0) { log_printf (LOGSYS_LEVEL_ERROR, "couldn't request notifications of the completion queue\n"); return (-1); } memset (&init_qp_attr, 0, sizeof (struct ibv_qp_init_attr)); init_qp_attr.cap.max_send_wr = 50; init_qp_attr.cap.max_recv_wr = TOTAL_READ_POSTS; init_qp_attr.cap.max_send_sge = 1; init_qp_attr.cap.max_recv_sge = 1; init_qp_attr.qp_context = instance; init_qp_attr.sq_sig_all = 0; init_qp_attr.qp_type = IBV_QPT_UD; init_qp_attr.send_cq = instance->recv_token_send_cq; init_qp_attr.recv_cq = instance->recv_token_recv_cq; res = rdma_create_qp (instance->recv_token_cma_id, instance->recv_token_pd, &init_qp_attr); if (res != 0) { log_printf (LOGSYS_LEVEL_ERROR, "couldn't create queue pair\n"); return (-1); } recv_token_recv_buf_post_initial (instance); qb_loop_poll_add ( instance->totemiba_poll_handle, QB_LOOP_MED, instance->recv_token_recv_completion_channel->fd, POLLIN, instance, recv_token_cq_recv_event_fn); qb_loop_poll_add ( instance->totemiba_poll_handle, QB_LOOP_MED, instance->recv_token_send_completion_channel->fd, POLLIN, instance, recv_token_cq_send_event_fn); instance->recv_token_accepted = 1; return (res); }; static int recv_token_rdma_event_fn (int events, int suck, void *context) { struct totemiba_instance *instance = (struct totemiba_instance *)context; struct rdma_cm_event *event; struct rdma_conn_param conn_param; int res; res = rdma_get_cm_event (instance->listen_recv_token_channel, &event); if (res != 0) { return (0); } switch (event->event) { case RDMA_CM_EVENT_CONNECT_REQUEST: recv_token_accept_destroy (instance); instance->recv_token_cma_id = event->id; recv_token_accept_setup (instance); memset (&conn_param, 0, sizeof (struct rdma_conn_param)); conn_param.qp_num = instance->recv_token_cma_id->qp->qp_num; res = rdma_accept (instance->recv_token_cma_id, &conn_param); break; default: log_printf (LOGSYS_LEVEL_ERROR, "default %d\n", event->event); break; } res = rdma_ack_cm_event (event); return (0); } static int send_token_cq_send_event_fn (int events, int suck, void *context) { struct totemiba_instance *instance = (struct totemiba_instance *)context; struct ibv_wc wc[32]; struct ibv_cq *ev_cq; void *ev_ctx; int res; int i; ibv_get_cq_event (instance->send_token_send_completion_channel, &ev_cq, &ev_ctx); ibv_ack_cq_events (ev_cq, 1); res = ibv_req_notify_cq (ev_cq, 0); res = ibv_poll_cq (instance->send_token_send_cq, 32, wc); if (res > 0) { for (i = 0; i < res; i++) { token_send_buf_put (instance, wrid2void(wc[i].wr_id)); } } return (0); } static int send_token_cq_recv_event_fn (int events, int suck, void *context) { struct totemiba_instance *instance = (struct totemiba_instance *)context; struct ibv_wc wc[32]; struct ibv_cq *ev_cq; void *ev_ctx; int res; int i; ibv_get_cq_event (instance->send_token_recv_completion_channel, &ev_cq, &ev_ctx); ibv_ack_cq_events (ev_cq, 1); res = ibv_req_notify_cq (ev_cq, 0); res = ibv_poll_cq (instance->send_token_recv_cq, 32, wc); if (res > 0) { for (i = 0; i < res; i++) { iba_deliver_fn (instance, wc[i].wr_id, wc[i].byte_len); } } return (0); } static int send_token_rdma_event_fn (int events, int suck, void *context) { struct totemiba_instance *instance = (struct totemiba_instance *)context; struct rdma_cm_event *event; struct rdma_conn_param conn_param; int res; res = rdma_get_cm_event (instance->send_token_channel, &event); if (res != 0) { return (0); } switch (event->event) { /* * occurs when we resolve the multicast address */ case RDMA_CM_EVENT_ADDR_RESOLVED: res = rdma_resolve_route (instance->send_token_cma_id, 2000); break; /* * occurs when the CM joins the multicast group */ case RDMA_CM_EVENT_ROUTE_RESOLVED: memset (&conn_param, 0, sizeof (struct rdma_conn_param)); conn_param.private_data = NULL; conn_param.private_data_len = 0; res = rdma_connect (instance->send_token_cma_id, &conn_param); break; case RDMA_CM_EVENT_ESTABLISHED: instance->send_token_qpn = event->param.ud.qp_num; instance->send_token_qkey = event->param.ud.qkey; instance->send_token_ah = ibv_create_ah (instance->send_token_pd, &event->param.ud.ah_attr); instance->totemiba_target_set_completed (instance->rrp_context); break; case RDMA_CM_EVENT_ADDR_ERROR: case RDMA_CM_EVENT_ROUTE_ERROR: case RDMA_CM_EVENT_MULTICAST_ERROR: log_printf (LOGSYS_LEVEL_ERROR, "send_token_rdma_event_fn multicast error\n"); break; case RDMA_CM_EVENT_DEVICE_REMOVAL: break; case RDMA_CM_EVENT_UNREACHABLE: log_printf (LOGSYS_LEVEL_ERROR, "send_token_rdma_event_fn unreachable\n"); break; default: log_printf (LOGSYS_LEVEL_ERROR, "send_token_rdma_event_fn unknown event %d\n", event->event); break; } rdma_ack_cm_event (event); return (0); } static int send_token_bind (struct totemiba_instance *instance) { int res; struct ibv_qp_init_attr init_qp_attr; instance->send_token_channel = rdma_create_event_channel(); if (instance->send_token_channel == NULL) { log_printf (LOGSYS_LEVEL_ERROR, "couldn't create rdma channel\n"); return (-1); } res = rdma_create_id (instance->send_token_channel, &instance->send_token_cma_id, NULL, RDMA_PS_UDP); if (res) { log_printf (LOGSYS_LEVEL_ERROR, "error creating send_token_cma_id\n"); return (-1); } res = rdma_bind_addr (instance->send_token_cma_id, &instance->send_token_bind_addr); if (res) { log_printf (LOGSYS_LEVEL_ERROR, "error doing rdma_bind_addr for send token\n"); return (-1); } /* * Resolve the send_token address into a GUID */ res = rdma_resolve_addr (instance->send_token_cma_id, &instance->bind_addr, &instance->token_addr, 2000); if (res) { log_printf (LOGSYS_LEVEL_ERROR, "error resolving send token address %d %d\n", res, errno); return (-1); } /* * Allocate the protection domain */ instance->send_token_pd = ibv_alloc_pd (instance->send_token_cma_id->verbs); /* * Create a completion channel */ instance->send_token_recv_completion_channel = ibv_create_comp_channel (instance->send_token_cma_id->verbs); if (instance->send_token_recv_completion_channel == NULL) { log_printf (LOGSYS_LEVEL_ERROR, "couldn't create completion channel\n"); return (-1); } /* * Create the completion queue */ instance->send_token_recv_cq = ibv_create_cq (instance->send_token_cma_id->verbs, COMPLETION_QUEUE_ENTRIES, instance, instance->send_token_recv_completion_channel, 0); if (instance->send_token_recv_cq == NULL) { log_printf (LOGSYS_LEVEL_ERROR, "couldn't create completion queue\n"); return (-1); } res = ibv_req_notify_cq (instance->send_token_recv_cq, 0); if (res != 0) { log_printf (LOGSYS_LEVEL_ERROR, "couldn't request notifications of the completion queue\n"); return (-1); } /* * Create a completion channel */ instance->send_token_send_completion_channel = ibv_create_comp_channel (instance->send_token_cma_id->verbs); if (instance->send_token_send_completion_channel == NULL) { log_printf (LOGSYS_LEVEL_ERROR, "couldn't create completion channel\n"); return (-1); } /* * Create the completion queue */ instance->send_token_send_cq = ibv_create_cq ( instance->send_token_cma_id->verbs, COMPLETION_QUEUE_ENTRIES, instance, instance->send_token_send_completion_channel, 0); if (instance->send_token_send_cq == NULL) { log_printf (LOGSYS_LEVEL_ERROR, "couldn't create completion queue\n"); return (-1); } res = ibv_req_notify_cq (instance->send_token_send_cq, 0); if (res != 0) { log_printf (LOGSYS_LEVEL_ERROR, "couldn't request notifications of the completion queue\n"); return (-1); } memset (&init_qp_attr, 0, sizeof (struct ibv_qp_init_attr)); init_qp_attr.cap.max_send_wr = 50; init_qp_attr.cap.max_recv_wr = TOTAL_READ_POSTS; init_qp_attr.cap.max_send_sge = 1; init_qp_attr.cap.max_recv_sge = 1; init_qp_attr.qp_context = instance; init_qp_attr.sq_sig_all = 0; init_qp_attr.qp_type = IBV_QPT_UD; init_qp_attr.send_cq = instance->send_token_send_cq; init_qp_attr.recv_cq = instance->send_token_recv_cq; res = rdma_create_qp (instance->send_token_cma_id, instance->send_token_pd, &init_qp_attr); if (res != 0) { log_printf (LOGSYS_LEVEL_ERROR, "couldn't create queue pair\n"); return (-1); } qb_loop_poll_add ( instance->totemiba_poll_handle, QB_LOOP_MED, instance->send_token_recv_completion_channel->fd, POLLIN, instance, send_token_cq_recv_event_fn); qb_loop_poll_add ( instance->totemiba_poll_handle, QB_LOOP_MED, instance->send_token_send_completion_channel->fd, POLLIN, instance, send_token_cq_send_event_fn); qb_loop_poll_add ( instance->totemiba_poll_handle, QB_LOOP_MED, instance->send_token_channel->fd, POLLIN, instance, send_token_rdma_event_fn); instance->send_token_bound = 1; return (0); } static int send_token_unbind (struct totemiba_instance *instance) { if (instance->send_token_bound == 0) { return (0); } qb_loop_poll_del ( instance->totemiba_poll_handle, instance->send_token_recv_completion_channel->fd); qb_loop_poll_del ( instance->totemiba_poll_handle, instance->send_token_send_completion_channel->fd); qb_loop_poll_del ( instance->totemiba_poll_handle, instance->send_token_channel->fd); rdma_destroy_qp (instance->send_token_cma_id); ibv_destroy_cq (instance->send_token_send_cq); ibv_destroy_cq (instance->send_token_recv_cq); ibv_destroy_comp_channel (instance->send_token_send_completion_channel); ibv_destroy_comp_channel (instance->send_token_recv_completion_channel); token_send_buf_destroy (instance); ibv_dealloc_pd (instance->send_token_pd); rdma_destroy_id (instance->send_token_cma_id); rdma_destroy_event_channel (instance->send_token_channel); return (0); } static int recv_token_bind (struct totemiba_instance *instance) { int res; instance->listen_recv_token_channel = rdma_create_event_channel(); if (instance->listen_recv_token_channel == NULL) { log_printf (LOGSYS_LEVEL_ERROR, "couldn't create rdma channel\n"); return (-1); } res = rdma_create_id (instance->listen_recv_token_channel, &instance->listen_recv_token_cma_id, NULL, RDMA_PS_UDP); if (res) { log_printf (LOGSYS_LEVEL_ERROR, "error creating recv_token_cma_id\n"); return (-1); } res = rdma_bind_addr (instance->listen_recv_token_cma_id, &instance->bind_addr); if (res) { log_printf (LOGSYS_LEVEL_ERROR, "error doing rdma_bind_addr for recv token\n"); return (-1); } /* * Resolve the recv_token address into a GUID */ res = rdma_listen (instance->listen_recv_token_cma_id, 10); if (res) { log_printf (LOGSYS_LEVEL_ERROR, "error listening %d %d\n", res, errno); return (-1); } qb_loop_poll_add ( instance->totemiba_poll_handle, QB_LOOP_MED, instance->listen_recv_token_channel->fd, POLLIN, instance, recv_token_rdma_event_fn); return (0); } static int mcast_bind (struct totemiba_instance *instance) { int res; struct ibv_qp_init_attr init_qp_attr; instance->mcast_channel = rdma_create_event_channel(); if (instance->mcast_channel == NULL) { log_printf (LOGSYS_LEVEL_ERROR, "couldn't create rdma channel\n"); return (-1); } res = rdma_create_id (instance->mcast_channel, &instance->mcast_cma_id, NULL, RDMA_PS_UDP); if (res) { log_printf (LOGSYS_LEVEL_ERROR, "error creating mcast_cma_id\n"); return (-1); } res = rdma_bind_addr (instance->mcast_cma_id, &instance->local_mcast_bind_addr); if (res) { log_printf (LOGSYS_LEVEL_ERROR, "error doing rdma_bind_addr for mcast\n"); return (-1); } /* * Resolve the multicast address into a GUID */ res = rdma_resolve_addr (instance->mcast_cma_id, &instance->local_mcast_bind_addr, &instance->mcast_addr, 5000); if (res) { log_printf (LOGSYS_LEVEL_ERROR, "error resolving multicast address %d %d\n", res, errno); return (-1); } /* * Allocate the protection domain */ instance->mcast_pd = ibv_alloc_pd (instance->mcast_cma_id->verbs); /* * Create a completion channel */ instance->mcast_recv_completion_channel = ibv_create_comp_channel (instance->mcast_cma_id->verbs); if (instance->mcast_recv_completion_channel == NULL) { log_printf (LOGSYS_LEVEL_ERROR, "couldn't create completion channel\n"); return (-1); } /* * Create the completion queue */ instance->mcast_recv_cq = ibv_create_cq (instance->mcast_cma_id->verbs, COMPLETION_QUEUE_ENTRIES, instance, instance->mcast_recv_completion_channel, 0); if (instance->mcast_recv_cq == NULL) { log_printf (LOGSYS_LEVEL_ERROR, "couldn't create completion queue\n"); return (-1); } res = ibv_req_notify_cq (instance->mcast_recv_cq, 0); if (res != 0) { log_printf (LOGSYS_LEVEL_ERROR, "couldn't request notifications of the completion queue\n"); return (-1); } /* * Create a completion channel */ instance->mcast_send_completion_channel = ibv_create_comp_channel (instance->mcast_cma_id->verbs); if (instance->mcast_send_completion_channel == NULL) { log_printf (LOGSYS_LEVEL_ERROR, "couldn't create completion channel\n"); return (-1); } /* * Create the completion queue */ instance->mcast_send_cq = ibv_create_cq (instance->mcast_cma_id->verbs, COMPLETION_QUEUE_ENTRIES, instance, instance->mcast_send_completion_channel, 0); if (instance->mcast_send_cq == NULL) { log_printf (LOGSYS_LEVEL_ERROR, "couldn't create completion queue\n"); return (-1); } res = ibv_req_notify_cq (instance->mcast_send_cq, 0); if (res != 0) { log_printf (LOGSYS_LEVEL_ERROR, "couldn't request notifications of the completion queue\n"); return (-1); } memset (&init_qp_attr, 0, sizeof (struct ibv_qp_init_attr)); init_qp_attr.cap.max_send_wr = 50; init_qp_attr.cap.max_recv_wr = TOTAL_READ_POSTS; init_qp_attr.cap.max_send_sge = 1; init_qp_attr.cap.max_recv_sge = 1; init_qp_attr.qp_context = instance; init_qp_attr.sq_sig_all = 0; init_qp_attr.qp_type = IBV_QPT_UD; init_qp_attr.send_cq = instance->mcast_send_cq; init_qp_attr.recv_cq = instance->mcast_recv_cq; res = rdma_create_qp (instance->mcast_cma_id, instance->mcast_pd, &init_qp_attr); if (res != 0) { log_printf (LOGSYS_LEVEL_ERROR, "couldn't create queue pair\n"); return (-1); } mcast_recv_buf_post_initial (instance); qb_loop_poll_add ( instance->totemiba_poll_handle, QB_LOOP_MED, instance->mcast_recv_completion_channel->fd, POLLIN, instance, mcast_cq_recv_event_fn); qb_loop_poll_add ( instance->totemiba_poll_handle, QB_LOOP_MED, instance->mcast_send_completion_channel->fd, POLLIN, instance, mcast_cq_send_event_fn); qb_loop_poll_add ( instance->totemiba_poll_handle, QB_LOOP_MED, instance->mcast_channel->fd, POLLIN, instance, mcast_rdma_event_fn); return (0); } static void timer_function_netif_check_timeout ( void *data) { struct totemiba_instance *instance = (struct totemiba_instance *)data; int res; int interface_up; int interface_num; int addr_len; totemip_iface_check (&instance->totem_interface->bindnet, &instance->totem_interface->boundto, &interface_up, &interface_num, instance->totem_config->clear_node_high_bit); totemip_totemip_to_sockaddr_convert(&instance->totem_interface->boundto, instance->totem_interface->ip_port, (struct sockaddr_storage *)&instance->bind_addr, &addr_len); totemip_totemip_to_sockaddr_convert(&instance->totem_interface->boundto, 0, (struct sockaddr_storage *)&instance->send_token_bind_addr, &addr_len); totemip_totemip_to_sockaddr_convert(&instance->totem_interface->boundto, 0, (struct sockaddr_storage *)&instance->local_mcast_bind_addr, &addr_len); totemip_totemip_to_sockaddr_convert(&instance->totem_interface->boundto, instance->totem_interface->ip_port, (struct sockaddr_storage *)&instance->my_id, &addr_len); totemip_sockaddr_to_totemip_convert( (const struct sockaddr_storage *)&instance->bind_addr, &instance->my_id); memcpy (&instance->my_id, &instance->totem_interface->boundto, sizeof (struct totem_ip_address)); totemip_totemip_to_sockaddr_convert(&instance->totem_interface->mcast_addr, instance->totem_interface->ip_port, (struct sockaddr_storage *)&instance->mcast_addr, &addr_len); res = recv_token_bind (instance); res = mcast_bind (instance); } int totemiba_crypto_set ( void *iba_context, unsigned int type) { struct totemiba_instance *instance = (struct totemiba_instance *)iba_context; int res = 0; instance = NULL; return (res); } int totemiba_finalize ( void *iba_context) { struct totemiba_instance *instance = (struct totemiba_instance *)iba_context; int res = 0; instance = NULL; return (res); } /* * Create an instance */ int totemiba_initialize ( qb_loop_t *qb_poll_handle, void **iba_context, struct totem_config *totem_config, int interface_no, void *context, void (*deliver_fn) ( void *context, const void *msg, unsigned int msg_len), void (*iface_change_fn) ( void *context, const struct totem_ip_address *iface_address), void (*target_set_completed) ( void *context)) { struct totemiba_instance *instance; int res = 0; instance = malloc (sizeof (struct totemiba_instance)); if (instance == NULL) { return (-1); } totemiba_instance_initialize (instance); instance->totem_interface = &totem_config->interfaces[interface_no]; instance->totemiba_poll_handle = qb_poll_handle; instance->totemiba_deliver_fn = deliver_fn; instance->totemiba_target_set_completed = target_set_completed; instance->totemiba_iface_change_fn = iface_change_fn; instance->totem_config = totem_config; instance->rrp_context = context; qb_loop_timer_add (instance->totemiba_poll_handle, QB_LOOP_MED, 100*QB_TIME_NS_IN_NSEC, (void *)instance, timer_function_netif_check_timeout, &instance->timer_netif_check_timeout); instance->totemiba_subsys_id = totem_config->totem_logging_configuration.log_subsys_id; instance->totemiba_log_printf = totem_config->totem_logging_configuration.log_printf; *iba_context = instance; return (res); } void *totemiba_buffer_alloc (void) { return malloc (MAX_MTU_SIZE); } void totemiba_buffer_release (void *ptr) { return free (ptr); } int totemiba_processor_count_set ( void *iba_context, int processor_count) { struct totemiba_instance *instance = (struct totemiba_instance *)iba_context; int res = 0; instance = NULL; return (res); } int totemiba_recv_flush (void *iba_context) { struct totemiba_instance *instance = (struct totemiba_instance *)iba_context; int res = 0; instance = NULL; return (res); } int totemiba_send_flush (void *iba_context) { struct totemiba_instance *instance = (struct totemiba_instance *)iba_context; int res = 0; instance = NULL; return (res); } int totemiba_token_send ( void *iba_context, const void *ms, unsigned int msg_len) { struct totemiba_instance *instance = (struct totemiba_instance *)iba_context; int res = 0; struct ibv_send_wr send_wr, *failed_send_wr; struct ibv_sge sge; void *msg; struct send_buf *send_buf; send_buf = token_send_buf_get (instance); if (send_buf == NULL) { return (-1); } msg = send_buf->buffer; memcpy (msg, ms, msg_len); send_wr.next = NULL; send_wr.sg_list = &sge; send_wr.num_sge = 1; send_wr.opcode = IBV_WR_SEND; send_wr.send_flags = IBV_SEND_SIGNALED; send_wr.wr_id = void2wrid(send_buf); send_wr.imm_data = 0; send_wr.wr.ud.ah = instance->send_token_ah; send_wr.wr.ud.remote_qpn = instance->send_token_qpn; send_wr.wr.ud.remote_qkey = instance->send_token_qkey; sge.length = msg_len; sge.lkey = send_buf->mr->lkey; sge.addr = (uintptr_t)msg; res = ibv_post_send (instance->send_token_cma_id->qp, &send_wr, &failed_send_wr); return (res); } int totemiba_mcast_flush_send ( void *iba_context, const void *ms, unsigned int msg_len) { struct totemiba_instance *instance = (struct totemiba_instance *)iba_context; int res = 0; struct ibv_send_wr send_wr, *failed_send_wr; struct ibv_sge sge; void *msg; struct send_buf *send_buf; send_buf = mcast_send_buf_get (instance); if (send_buf == NULL) { return (-1); } msg = send_buf->buffer; memcpy (msg, ms, msg_len); send_wr.next = NULL; send_wr.sg_list = &sge; send_wr.num_sge = 1; send_wr.opcode = IBV_WR_SEND; send_wr.send_flags = IBV_SEND_SIGNALED; send_wr.wr_id = void2wrid(send_buf); send_wr.imm_data = 0; send_wr.wr.ud.ah = instance->mcast_ah; send_wr.wr.ud.remote_qpn = instance->mcast_qpn; send_wr.wr.ud.remote_qkey = instance->mcast_qkey; sge.length = msg_len; sge.lkey = send_buf->mr->lkey; sge.addr = (uintptr_t)msg; res = ibv_post_send (instance->mcast_cma_id->qp, &send_wr, &failed_send_wr); return (res); } int totemiba_mcast_noflush_send ( void *iba_context, const void *ms, unsigned int msg_len) { struct totemiba_instance *instance = (struct totemiba_instance *)iba_context; int res = 0; struct ibv_send_wr send_wr, *failed_send_wr; struct ibv_sge sge; void *msg; struct send_buf *send_buf; send_buf = mcast_send_buf_get (instance); if (send_buf == NULL) { return (-1); } msg = send_buf->buffer; memcpy (msg, ms, msg_len); send_wr.next = NULL; send_wr.sg_list = &sge; send_wr.num_sge = 1; send_wr.opcode = IBV_WR_SEND; send_wr.send_flags = IBV_SEND_SIGNALED; send_wr.wr_id = void2wrid(send_buf); send_wr.imm_data = 0; send_wr.wr.ud.ah = instance->mcast_ah; send_wr.wr.ud.remote_qpn = instance->mcast_qpn; send_wr.wr.ud.remote_qkey = instance->mcast_qkey; sge.length = msg_len; sge.lkey = send_buf->mr->lkey; sge.addr = (uintptr_t)msg; res = ibv_post_send (instance->mcast_cma_id->qp, &send_wr, &failed_send_wr); return (res); } extern int totemiba_iface_check (void *iba_context) { struct totemiba_instance *instance = (struct totemiba_instance *)iba_context; int res = 0; instance = NULL; return (res); } extern void totemiba_net_mtu_adjust (void *iba_context, struct totem_config *totem_config) { struct totemiba_instance *instance = (struct totemiba_instance *)iba_context; instance = NULL; } const char *totemiba_iface_print (void *iba_context) { struct totemiba_instance *instance = (struct totemiba_instance *)iba_context; const char *ret_char; ret_char = totemip_print (&instance->my_id); return (ret_char); } int totemiba_iface_get ( void *iba_context, struct totem_ip_address *addr) { struct totemiba_instance *instance = (struct totemiba_instance *)iba_context; int res = 0; memcpy (addr, &instance->my_id, sizeof (struct totem_ip_address)); return (res); } int totemiba_token_target_set ( void *iba_context, const struct totem_ip_address *token_target) { struct totemiba_instance *instance = (struct totemiba_instance *)iba_context; int res = 0; int addr_len = 16; totemip_totemip_to_sockaddr_convert((struct totem_ip_address *)token_target, instance->totem_interface->ip_port, (struct sockaddr_storage *)&instance->token_addr, &addr_len); res = send_token_unbind (instance); res = send_token_bind (instance); return (res); } extern int totemiba_recv_mcast_empty ( void *iba_context) { struct totemiba_instance *instance = (struct totemiba_instance *)iba_context; int res = 0; instance = NULL; return (res); } diff --git a/exec/totemnet.c b/exec/totemnet.c index 3c87e9a5..9b211f7c 100644 --- a/exec/totemnet.c +++ b/exec/totemnet.c @@ -1,489 +1,489 @@ /* * Copyright (c) 2005 MontaVista Software, Inc. * Copyright (c) 2006-2009 Red Hat, Inc. * * All rights reserved. * * Author: Steven Dake (sdake@redhat.com) * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #ifdef HAVE_RDMA #include #endif #include #include #include #include #define LOGSYS_UTILS_ONLY 1 #include struct transport { const char *name; int (*initialize) ( qb_loop_t *loop_pt, void **transport_instance, struct totem_config *totem_config, int interface_no, void *context, void (*deliver_fn) ( void *context, const void *msg, unsigned int msg_len), void (*iface_change_fn) ( void *context, const struct totem_ip_address *iface_address), void (*target_set_completed) ( void *context)); void *(*buffer_alloc) (void); void (*buffer_release) (void *ptr); int (*processor_count_set) ( void *transport_context, int processor_count); int (*token_send) ( void *transport_context, const void *msg, unsigned int msg_len); int (*mcast_flush_send) ( void *transport_context, const void *msg, unsigned int msg_len); int (*mcast_noflush_send) ( void *transport_context, const void *msg, unsigned int msg_len); int (*recv_flush) (void *transport_context); int (*send_flush) (void *transport_context); int (*iface_check) (void *transport_context); int (*finalize) (void *transport_context); void (*net_mtu_adjust) (void *transport_context, struct totem_config *totem_config); const char *(*iface_print) (void *transport_context); int (*iface_get) ( void *transport_context, struct totem_ip_address *addr); int (*token_target_set) ( void *transport_context, const struct totem_ip_address *token_target); int (*crypto_set) ( void *transport_context, unsigned int type); int (*recv_mcast_empty) ( void *transport_context); int (*member_add) ( void *transport_context, const struct totem_ip_address *member); int (*member_remove) ( void *transport_context, const struct totem_ip_address *member); }; struct transport transport_entries[] = { { .name = "UDP/IP Multicast", .initialize = totemudp_initialize, .buffer_alloc = totemudp_buffer_alloc, .buffer_release = totemudp_buffer_release, .processor_count_set = totemudp_processor_count_set, .token_send = totemudp_token_send, .mcast_flush_send = totemudp_mcast_flush_send, .mcast_noflush_send = totemudp_mcast_noflush_send, .recv_flush = totemudp_recv_flush, .send_flush = totemudp_send_flush, .iface_check = totemudp_iface_check, .finalize = totemudp_finalize, .net_mtu_adjust = totemudp_net_mtu_adjust, .iface_print = totemudp_iface_print, .iface_get = totemudp_iface_get, .token_target_set = totemudp_token_target_set, .crypto_set = totemudp_crypto_set, .recv_mcast_empty = totemudp_recv_mcast_empty }, { .name = "UDP/IP Unicast", .initialize = totemudpu_initialize, .buffer_alloc = totemudpu_buffer_alloc, .buffer_release = totemudpu_buffer_release, .processor_count_set = totemudpu_processor_count_set, .token_send = totemudpu_token_send, .mcast_flush_send = totemudpu_mcast_flush_send, .mcast_noflush_send = totemudpu_mcast_noflush_send, .recv_flush = totemudpu_recv_flush, .send_flush = totemudpu_send_flush, .iface_check = totemudpu_iface_check, .finalize = totemudpu_finalize, .net_mtu_adjust = totemudpu_net_mtu_adjust, .iface_print = totemudpu_iface_print, .iface_get = totemudpu_iface_get, .token_target_set = totemudpu_token_target_set, .crypto_set = totemudpu_crypto_set, .recv_mcast_empty = totemudpu_recv_mcast_empty, .member_add = totemudpu_member_add, .member_remove = totemudpu_member_remove }, #ifdef HAVE_RDMA { .name = "Infiniband/IP", .initialize = totemiba_initialize, .buffer_alloc = totemiba_buffer_alloc, .buffer_release = totemiba_buffer_release, .processor_count_set = totemiba_processor_count_set, .token_send = totemiba_token_send, .mcast_flush_send = totemiba_mcast_flush_send, .mcast_noflush_send = totemiba_mcast_noflush_send, .recv_flush = totemiba_recv_flush, .send_flush = totemiba_send_flush, .iface_check = totemiba_iface_check, .finalize = totemiba_finalize, .net_mtu_adjust = totemiba_net_mtu_adjust, .iface_print = totemiba_iface_print, .iface_get = totemiba_iface_get, .token_target_set = totemiba_token_target_set, .crypto_set = totemiba_crypto_set, .recv_mcast_empty = totemiba_recv_mcast_empty } #endif }; struct totemnet_instance { void *transport_context; struct transport *transport; void (*totemnet_log_printf) ( - unsigned int rec_ident, + int level, + int subsys, const char *function, const char *file, int line, const char *format, - ...)__attribute__((format(printf, 5, 6))); + ...)__attribute__((format(printf, 6, 7))); int totemnet_subsys_id; }; #define log_printf(level, format, args...) \ do { \ instance->totemnet_log_printf ( \ - LOGSYS_ENCODE_RECID(level, \ + level, \ instance->totemnet_subsys_id, \ - LOGSYS_RECID_LOG), \ __FUNCTION__, __FILE__, __LINE__, \ (const char *)format, ##args); \ } while (0); static void totemnet_instance_initialize ( struct totemnet_instance *instance, struct totem_config *config) { int transport; instance->totemnet_log_printf = config->totem_logging_configuration.log_printf; instance->totemnet_subsys_id = config->totem_logging_configuration.log_subsys_id; transport = config->transport_number; log_printf (LOGSYS_LEVEL_NOTICE, "Initializing transport (%s).\n", transport_entries[transport].name); instance->transport = &transport_entries[transport]; } int totemnet_crypto_set ( void *net_context, unsigned int type) { struct totemnet_instance *instance = (struct totemnet_instance *)net_context; int res = 0; res = instance->transport->crypto_set (instance->transport_context, type); return res; } int totemnet_finalize ( void *net_context) { struct totemnet_instance *instance = (struct totemnet_instance *)net_context; int res = 0; res = instance->transport->finalize (instance->transport_context); return (res); } int totemnet_initialize ( qb_loop_t *loop_pt, void **net_context, struct totem_config *totem_config, int interface_no, void *context, void (*deliver_fn) ( void *context, const void *msg, unsigned int msg_len), void (*iface_change_fn) ( void *context, const struct totem_ip_address *iface_address), void (*target_set_completed) ( void *context)) { struct totemnet_instance *instance; unsigned int res; instance = malloc (sizeof (struct totemnet_instance)); if (instance == NULL) { return (-1); } totemnet_instance_initialize (instance, totem_config); res = instance->transport->initialize (loop_pt, &instance->transport_context, totem_config, interface_no, context, deliver_fn, iface_change_fn, target_set_completed); if (res == -1) { goto error_destroy; } *net_context = instance; return (0); error_destroy: free (instance); return (-1); } void *totemnet_buffer_alloc (void *net_context) { struct totemnet_instance *instance = net_context; assert (instance != NULL); assert (instance->transport != NULL); return instance->transport->buffer_alloc(); } void totemnet_buffer_release (void *net_context, void *ptr) { struct totemnet_instance *instance = net_context; assert (instance != NULL); assert (instance->transport != NULL); instance->transport->buffer_release (ptr); } int totemnet_processor_count_set ( void *net_context, int processor_count) { struct totemnet_instance *instance = (struct totemnet_instance *)net_context; int res = 0; res = instance->transport->processor_count_set (instance->transport_context, processor_count); return (res); } int totemnet_recv_flush (void *net_context) { struct totemnet_instance *instance = (struct totemnet_instance *)net_context; int res = 0; res = instance->transport->recv_flush (instance->transport_context); return (res); } int totemnet_send_flush (void *net_context) { struct totemnet_instance *instance = (struct totemnet_instance *)net_context; int res = 0; res = instance->transport->send_flush (instance->transport_context); return (res); } int totemnet_token_send ( void *net_context, const void *msg, unsigned int msg_len) { struct totemnet_instance *instance = (struct totemnet_instance *)net_context; int res = 0; res = instance->transport->token_send (instance->transport_context, msg, msg_len); return (res); } int totemnet_mcast_flush_send ( void *net_context, const void *msg, unsigned int msg_len) { struct totemnet_instance *instance = (struct totemnet_instance *)net_context; int res = 0; res = instance->transport->mcast_flush_send (instance->transport_context, msg, msg_len); return (res); } int totemnet_mcast_noflush_send ( void *net_context, const void *msg, unsigned int msg_len) { struct totemnet_instance *instance = (struct totemnet_instance *)net_context; int res = 0; res = instance->transport->mcast_noflush_send (instance->transport_context, msg, msg_len); return (res); } extern int totemnet_iface_check (void *net_context) { struct totemnet_instance *instance = (struct totemnet_instance *)net_context; int res = 0; res = instance->transport->iface_check (instance->transport_context); return (res); } extern int totemnet_net_mtu_adjust (void *net_context, struct totem_config *totem_config) { struct totemnet_instance *instance = (struct totemnet_instance *)net_context; int res = 0; instance->transport->net_mtu_adjust (instance->transport_context, totem_config); return (res); } const char *totemnet_iface_print (void *net_context) { struct totemnet_instance *instance = (struct totemnet_instance *)net_context; const char *ret_char; ret_char = instance->transport->iface_print (instance->transport_context); return (ret_char); } int totemnet_iface_get ( void *net_context, struct totem_ip_address *addr) { struct totemnet_instance *instance = (struct totemnet_instance *)net_context; unsigned int res; res = instance->transport->iface_get (instance->transport_context, addr); return (res); } int totemnet_token_target_set ( void *net_context, const struct totem_ip_address *token_target) { struct totemnet_instance *instance = (struct totemnet_instance *)net_context; unsigned int res; res = instance->transport->token_target_set (instance->transport_context, token_target); return (res); } extern int totemnet_recv_mcast_empty ( void *net_context) { struct totemnet_instance *instance = (struct totemnet_instance *)net_context; unsigned int res; res = instance->transport->recv_mcast_empty (instance->transport_context); return (res); } extern int totemnet_member_add ( void *net_context, const struct totem_ip_address *member) { struct totemnet_instance *instance = (struct totemnet_instance *)net_context; unsigned int res = 0; if (instance->transport->member_add) { res = instance->transport->member_add ( instance->transport_context, member); } return (res); } extern int totemnet_member_remove ( void *net_context, const struct totem_ip_address *member) { struct totemnet_instance *instance = (struct totemnet_instance *)net_context; unsigned int res = 0; if (instance->transport->member_remove) { res = instance->transport->member_remove ( instance->transport_context, member); } return (res); } diff --git a/exec/totempg.c b/exec/totempg.c index 36d5a448..33e3e2fd 100644 --- a/exec/totempg.c +++ b/exec/totempg.c @@ -1,1460 +1,1459 @@ /* * Copyright (c) 2003-2005 MontaVista Software, Inc. * Copyright (c) 2005 OSDL. * Copyright (c) 2006-2009 Red Hat, Inc. * * All rights reserved. * * Author: Steven Dake (sdake@redhat.com) * Author: Mark Haverkamp (markh@osdl.org) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ /* * FRAGMENTATION AND PACKING ALGORITHM: * * Assemble the entire message into one buffer * if full fragment * store fragment into lengths list * for each full fragment * multicast fragment * set length and fragment fields of pg mesage * store remaining multicast into head of fragmentation data and set lens field * * If a message exceeds the maximum packet size allowed by the totem * single ring protocol, the protocol could lose forward progress. * Statically calculating the allowed data amount doesn't work because * the amount of data allowed depends on the number of fragments in * each message. In this implementation, the maximum fragment size * is dynamically calculated for each fragment added to the message. * It is possible for a message to be two bytes short of the maximum * packet size. This occurs when a message or collection of * messages + the mcast header + the lens are two bytes short of the * end of the packet. Since another len field consumes two bytes, the * len field would consume the rest of the packet without room for data. * * One optimization would be to forgo the final len field and determine * it from the size of the udp datagram. Then this condition would no * longer occur. */ /* * ASSEMBLY AND UNPACKING ALGORITHM: * * copy incoming packet into assembly data buffer indexed by current * location of end of fragment * * if not fragmented * deliver all messages in assembly data buffer * else * if msg_count > 1 and fragmented * deliver all messages except last message in assembly data buffer * copy last fragmented section to start of assembly data buffer * else * if msg_count = 1 and fragmented * do nothing * */ #include #ifdef HAVE_ALLOCA_H #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define LOGSYS_UTILS_ONLY 1 #include #include "totemmrp.h" #include "totemsrp.h" #define min(a,b) ((a) < (b)) ? a : b struct totempg_mcast_header { short version; short type; }; #if !(defined(__i386__) || defined(__x86_64__)) /* * Need align on architectures different then i386 or x86_64 */ #define TOTEMPG_NEED_ALIGN 1 #endif /* * totempg_mcast structure * * header: Identify the mcast. * fragmented: Set if this message continues into next message * continuation: Set if this message is a continuation from last message * msg_count Indicates how many packed messages are contained * in the mcast. * Also, the size of each packed message and the messages themselves are * appended to the end of this structure when sent. */ struct totempg_mcast { struct totempg_mcast_header header; unsigned char fragmented; unsigned char continuation; unsigned short msg_count; /* * short msg_len[msg_count]; */ /* * data for messages */ }; /* * Maximum packet size for totem pg messages */ #define TOTEMPG_PACKET_SIZE (totempg_totem_config->net_mtu - \ sizeof (struct totempg_mcast)) /* * Local variables used for packing small messages */ static unsigned short mcast_packed_msg_lens[FRAME_SIZE_MAX]; static int mcast_packed_msg_count = 0; static int totempg_reserved = 1; static unsigned int totempg_size_limit; static totem_queue_level_changed_fn totem_queue_level_changed = NULL; /* * Function and data used to log messages */ static int totempg_log_level_security; static int totempg_log_level_error; static int totempg_log_level_warning; static int totempg_log_level_notice; static int totempg_log_level_debug; static int totempg_subsys_id; static void (*totempg_log_printf) ( - unsigned int rec_ident, + int level, + int subsys, const char *function, const char *file, int line, - const char *format, ...) __attribute__((format(printf, 5, 6))); + const char *format, ...) __attribute__((format(printf, 6, 7))); struct totem_config *totempg_totem_config; static totempg_stats_t totempg_stats; enum throw_away_mode { THROW_AWAY_INACTIVE, THROW_AWAY_ACTIVE }; struct assembly { unsigned int nodeid; unsigned char data[MESSAGE_SIZE_MAX]; int index; unsigned char last_frag_num; enum throw_away_mode throw_away_mode; struct list_head list; }; static void assembly_deref (struct assembly *assembly); static int callback_token_received_fn (enum totem_callback_token_type type, const void *data); DECLARE_LIST_INIT(assembly_list_inuse); DECLARE_LIST_INIT(assembly_list_free); /* * Staging buffer for packed messages. Messages are staged in this buffer * before sending. Multiple messages may fit which cuts down on the * number of mcasts sent. If a message doesn't completely fit, then * the mcast header has a fragment bit set that says that there are more * data to follow. fragment_size is an index into the buffer. It indicates * the size of message data and where to place new message data. * fragment_contuation indicates whether the first packed message in * the buffer is a continuation of a previously packed fragment. */ static unsigned char *fragmentation_data; static int fragment_size = 0; static int fragment_continuation = 0; static struct iovec iov_delv; static unsigned int totempg_max_handle = 0; struct totempg_group_instance { void (*deliver_fn) ( unsigned int nodeid, const void *msg, unsigned int msg_len, int endian_conversion_required); void (*confchg_fn) ( enum totem_configuration_type configuration_type, const unsigned int *member_list, size_t member_list_entries, const unsigned int *left_list, size_t left_list_entries, const unsigned int *joined_list, size_t joined_list_entries, const struct memb_ring_id *ring_id); struct totempg_group *groups; int groups_cnt; int32_t q_level; }; DECLARE_HDB_DATABASE (totempg_groups_instance_database,NULL); static unsigned char next_fragment = 1; static pthread_mutex_t totempg_mutex = PTHREAD_MUTEX_INITIALIZER; static pthread_mutex_t callback_token_mutex = PTHREAD_MUTEX_INITIALIZER; static pthread_mutex_t mcast_msg_mutex = PTHREAD_MUTEX_INITIALIZER; -#define log_printf(level, format, args...) \ -do { \ - totempg_log_printf ( \ - LOGSYS_ENCODE_RECID(level, \ - totempg_subsys_id, \ - LOGSYS_RECID_LOG), \ - __FUNCTION__, __FILE__, __LINE__, \ - format, ##args); \ +#define log_printf(level, format, args...) \ +do { \ + totempg_log_printf(level, \ + totempg_subsys_id, \ + __FUNCTION__, __FILE__, __LINE__, \ + format, ##args); \ } while (0); static int msg_count_send_ok (int msg_count); static int byte_count_send_ok (int byte_count); static struct assembly *assembly_ref (unsigned int nodeid) { struct assembly *assembly; struct list_head *list; /* * Search inuse list for node id and return assembly buffer if found */ for (list = assembly_list_inuse.next; list != &assembly_list_inuse; list = list->next) { assembly = list_entry (list, struct assembly, list); if (nodeid == assembly->nodeid) { return (assembly); } } /* * Nothing found in inuse list get one from free list if available */ if (list_empty (&assembly_list_free) == 0) { assembly = list_entry (assembly_list_free.next, struct assembly, list); list_del (&assembly->list); list_add (&assembly->list, &assembly_list_inuse); assembly->nodeid = nodeid; assembly->index = 0; assembly->last_frag_num = 0; assembly->throw_away_mode = THROW_AWAY_INACTIVE; return (assembly); } /* * Nothing available in inuse or free list, so allocate a new one */ assembly = malloc (sizeof (struct assembly)); /* * TODO handle memory allocation failure here */ assert (assembly); assembly->nodeid = nodeid; assembly->data[0] = 0; assembly->index = 0; assembly->last_frag_num = 0; assembly->throw_away_mode = THROW_AWAY_INACTIVE; list_init (&assembly->list); list_add (&assembly->list, &assembly_list_inuse); return (assembly); } static void assembly_deref (struct assembly *assembly) { list_del (&assembly->list); list_add (&assembly->list, &assembly_list_free); } static inline void app_confchg_fn ( enum totem_configuration_type configuration_type, const unsigned int *member_list, size_t member_list_entries, const unsigned int *left_list, size_t left_list_entries, const unsigned int *joined_list, size_t joined_list_entries, const struct memb_ring_id *ring_id) { int i; struct totempg_group_instance *instance; struct assembly *assembly; unsigned int res; /* * For every leaving processor, add to free list * This also has the side effect of clearing out the dataset * In the leaving processor's assembly buffer. */ for (i = 0; i < left_list_entries; i++) { assembly = assembly_ref (left_list[i]); list_del (&assembly->list); list_add (&assembly->list, &assembly_list_free); } for (i = 0; i <= totempg_max_handle; i++) { res = hdb_handle_get (&totempg_groups_instance_database, hdb_nocheck_convert (i), (void *)&instance); if (res == 0) { if (instance->confchg_fn) { instance->confchg_fn ( configuration_type, member_list, member_list_entries, left_list, left_list_entries, joined_list, joined_list_entries, ring_id); } hdb_handle_put (&totempg_groups_instance_database, hdb_nocheck_convert (i)); } } } static inline void group_endian_convert ( void *msg, int msg_len) { unsigned short *group_len; int i; char *aligned_msg; #ifdef TOTEMPG_NEED_ALIGN /* * Align data structure for not i386 or x86_64 */ if ((size_t)msg % 4 != 0) { aligned_msg = alloca(msg_len); memcpy(aligned_msg, msg, msg_len); } else { aligned_msg = msg; } #else aligned_msg = msg; #endif group_len = (unsigned short *)aligned_msg; group_len[0] = swab16(group_len[0]); for (i = 1; i < group_len[0] + 1; i++) { group_len[i] = swab16(group_len[i]); } if (aligned_msg != msg) { memcpy(msg, aligned_msg, msg_len); } } static inline int group_matches ( struct iovec *iovec, unsigned int iov_len, struct totempg_group *groups_b, unsigned int group_b_cnt, unsigned int *adjust_iovec) { unsigned short *group_len; char *group_name; int i; int j; #ifdef TOTEMPG_NEED_ALIGN struct iovec iovec_aligned = { NULL, 0 }; #endif assert (iov_len == 1); #ifdef TOTEMPG_NEED_ALIGN /* * Align data structure for not i386 or x86_64 */ if ((size_t)iovec->iov_base % 4 != 0) { iovec_aligned.iov_base = alloca(iovec->iov_len); memcpy(iovec_aligned.iov_base, iovec->iov_base, iovec->iov_len); iovec_aligned.iov_len = iovec->iov_len; iovec = &iovec_aligned; } #endif group_len = (unsigned short *)iovec->iov_base; group_name = ((char *)iovec->iov_base) + sizeof (unsigned short) * (group_len[0] + 1); /* * Calculate amount to adjust the iovec by before delivering to app */ *adjust_iovec = sizeof (unsigned short) * (group_len[0] + 1); for (i = 1; i < group_len[0] + 1; i++) { *adjust_iovec += group_len[i]; } /* * Determine if this message should be delivered to this instance */ for (i = 1; i < group_len[0] + 1; i++) { for (j = 0; j < group_b_cnt; j++) { if ((group_len[i] == groups_b[j].group_len) && (memcmp (groups_b[j].group, group_name, group_len[i]) == 0)) { return (1); } } group_name += group_len[i]; } return (0); } static inline void app_deliver_fn ( unsigned int nodeid, void *msg, unsigned int msg_len, int endian_conversion_required) { int i; struct totempg_group_instance *instance; struct iovec stripped_iovec; unsigned int adjust_iovec; unsigned int res; struct iovec *iovec; struct iovec aligned_iovec = { NULL, 0 }; if (endian_conversion_required) { group_endian_convert (msg, msg_len); } /* * TODO: segmentation/assembly need to be redesigned to provide aligned access * in all cases to avoid memory copies on non386 archs. Probably broke backwars * compatibility */ #ifdef TOTEMPG_NEED_ALIGN /* * Align data structure for not i386 or x86_64 */ aligned_iovec.iov_base = alloca(msg_len); aligned_iovec.iov_len = msg_len; memcpy(aligned_iovec.iov_base, msg, msg_len); #else aligned_iovec.iov_base = msg; aligned_iovec.iov_len = msg_len; #endif iovec = &aligned_iovec; for (i = 0; i <= totempg_max_handle; i++) { res = hdb_handle_get (&totempg_groups_instance_database, hdb_nocheck_convert (i), (void *)&instance); if (res == 0) { if (group_matches (iovec, 1, instance->groups, instance->groups_cnt, &adjust_iovec)) { stripped_iovec.iov_len = iovec->iov_len - adjust_iovec; stripped_iovec.iov_base = (char *)iovec->iov_base + adjust_iovec; #ifdef TOTEMPG_NEED_ALIGN /* * Align data structure for not i386 or x86_64 */ if ((char *)iovec->iov_base + adjust_iovec % 4 != 0) { /* * Deal with misalignment */ stripped_iovec.iov_base = alloca (stripped_iovec.iov_len); memcpy (stripped_iovec.iov_base, (char *)iovec->iov_base + adjust_iovec, stripped_iovec.iov_len); } #endif instance->deliver_fn ( nodeid, stripped_iovec.iov_base, stripped_iovec.iov_len, endian_conversion_required); } hdb_handle_put (&totempg_groups_instance_database, hdb_nocheck_convert(i)); } } } static void totempg_confchg_fn ( enum totem_configuration_type configuration_type, const unsigned int *member_list, size_t member_list_entries, const unsigned int *left_list, size_t left_list_entries, const unsigned int *joined_list, size_t joined_list_entries, const struct memb_ring_id *ring_id) { // TODO optimize this app_confchg_fn (configuration_type, member_list, member_list_entries, left_list, left_list_entries, joined_list, joined_list_entries, ring_id); } static void totempg_deliver_fn ( unsigned int nodeid, const void *msg, unsigned int msg_len, int endian_conversion_required) { struct totempg_mcast *mcast; unsigned short *msg_lens; int i; struct assembly *assembly; char header[FRAME_SIZE_MAX]; int msg_count; int continuation; int start; const char *data; int datasize; assembly = assembly_ref (nodeid); assert (assembly); /* * Assemble the header into one block of data and * assemble the packet contents into one block of data to simplify delivery */ mcast = (struct totempg_mcast *)msg; if (endian_conversion_required) { mcast->msg_count = swab16 (mcast->msg_count); } msg_count = mcast->msg_count; datasize = sizeof (struct totempg_mcast) + msg_count * sizeof (unsigned short); memcpy (header, msg, datasize); data = msg; msg_lens = (unsigned short *) (header + sizeof (struct totempg_mcast)); if (endian_conversion_required) { for (i = 0; i < mcast->msg_count; i++) { msg_lens[i] = swab16 (msg_lens[i]); } } memcpy (&assembly->data[assembly->index], &data[datasize], msg_len - datasize); /* * If the last message in the buffer is a fragment, then we * can't deliver it. We'll first deliver the full messages * then adjust the assembly buffer so we can add the rest of the * fragment when it arrives. */ msg_count = mcast->fragmented ? mcast->msg_count - 1 : mcast->msg_count; continuation = mcast->continuation; iov_delv.iov_base = (void *)&assembly->data[0]; iov_delv.iov_len = assembly->index + msg_lens[0]; /* * Make sure that if this message is a continuation, that it * matches the sequence number of the previous fragment. * Also, if the first packed message is a continuation * of a previous message, but the assembly buffer * is empty, then we need to discard it since we can't * assemble a complete message. Likewise, if this message isn't a * continuation and the assembly buffer is empty, we have to discard * the continued message. */ start = 0; if (assembly->throw_away_mode == THROW_AWAY_ACTIVE) { /* Throw away the first msg block */ if (mcast->fragmented == 0 || mcast->fragmented == 1) { assembly->throw_away_mode = THROW_AWAY_INACTIVE; assembly->index += msg_lens[0]; iov_delv.iov_base = (void *)&assembly->data[assembly->index]; iov_delv.iov_len = msg_lens[1]; start = 1; } } else if (assembly->throw_away_mode == THROW_AWAY_INACTIVE) { if (continuation == assembly->last_frag_num) { assembly->last_frag_num = mcast->fragmented; for (i = start; i < msg_count; i++) { app_deliver_fn(nodeid, iov_delv.iov_base, iov_delv.iov_len, endian_conversion_required); assembly->index += msg_lens[i]; iov_delv.iov_base = (void *)&assembly->data[assembly->index]; if (i < (msg_count - 1)) { iov_delv.iov_len = msg_lens[i + 1]; } } } else { assembly->throw_away_mode = THROW_AWAY_ACTIVE; } } if (mcast->fragmented == 0) { /* * End of messages, dereference assembly struct */ assembly->last_frag_num = 0; assembly->index = 0; assembly_deref (assembly); } else { /* * Message is fragmented, keep around assembly list */ if (mcast->msg_count > 1) { memmove (&assembly->data[0], &assembly->data[assembly->index], msg_lens[msg_count]); assembly->index = 0; } assembly->index += msg_lens[msg_count]; } } /* * Totem Process Group Abstraction * depends on poll abstraction, POSIX, IPV4 */ void *callback_token_received_handle; int callback_token_received_fn (enum totem_callback_token_type type, const void *data) { struct totempg_mcast mcast; struct iovec iovecs[3]; pthread_mutex_lock (&mcast_msg_mutex); if (mcast_packed_msg_count == 0) { pthread_mutex_unlock (&mcast_msg_mutex); return (0); } if (totemmrp_avail() == 0) { pthread_mutex_unlock (&mcast_msg_mutex); return (0); } mcast.header.version = 0; mcast.header.type = 0; mcast.fragmented = 0; /* * Was the first message in this buffer a continuation of a * fragmented message? */ mcast.continuation = fragment_continuation; fragment_continuation = 0; mcast.msg_count = mcast_packed_msg_count; iovecs[0].iov_base = (void *)&mcast; iovecs[0].iov_len = sizeof (struct totempg_mcast); iovecs[1].iov_base = (void *)mcast_packed_msg_lens; iovecs[1].iov_len = mcast_packed_msg_count * sizeof (unsigned short); iovecs[2].iov_base = (void *)&fragmentation_data[0]; iovecs[2].iov_len = fragment_size; (void)totemmrp_mcast (iovecs, 3, 0); mcast_packed_msg_count = 0; fragment_size = 0; pthread_mutex_unlock (&mcast_msg_mutex); return (0); } /* * Initialize the totem process group abstraction */ int totempg_initialize ( qb_loop_t *poll_handle, struct totem_config *totem_config) { int res; totempg_totem_config = totem_config; totempg_log_level_security = totem_config->totem_logging_configuration.log_level_security; totempg_log_level_error = totem_config->totem_logging_configuration.log_level_error; totempg_log_level_warning = totem_config->totem_logging_configuration.log_level_warning; totempg_log_level_notice = totem_config->totem_logging_configuration.log_level_notice; totempg_log_level_debug = totem_config->totem_logging_configuration.log_level_debug; totempg_log_printf = totem_config->totem_logging_configuration.log_printf; totempg_subsys_id = totem_config->totem_logging_configuration.log_subsys_id; fragmentation_data = malloc (TOTEMPG_PACKET_SIZE); if (fragmentation_data == 0) { return (-1); } totemsrp_net_mtu_adjust (totem_config); res = totemmrp_initialize ( poll_handle, totem_config, &totempg_stats, totempg_deliver_fn, totempg_confchg_fn); totemmrp_callback_token_create ( &callback_token_received_handle, TOTEM_CALLBACK_TOKEN_RECEIVED, 0, callback_token_received_fn, 0); totempg_size_limit = (totemmrp_avail() - 1) * (totempg_totem_config->net_mtu - sizeof (struct totempg_mcast) - 16); return (res); } void totempg_finalize (void) { pthread_mutex_lock (&totempg_mutex); totemmrp_finalize (); pthread_mutex_unlock (&totempg_mutex); } /* * Multicast a message */ static int mcast_msg ( struct iovec *iovec_in, unsigned int iov_len, int guarantee) { int res = 0; struct totempg_mcast mcast; struct iovec iovecs[3]; struct iovec iovec[64]; int i; int dest, src; int max_packet_size = 0; int copy_len = 0; int copy_base = 0; int total_size = 0; pthread_mutex_lock (&mcast_msg_mutex); totemmrp_event_signal (TOTEM_EVENT_NEW_MSG, 1); /* * Remove zero length iovectors from the list */ assert (iov_len < 64); for (dest = 0, src = 0; src < iov_len; src++) { if (iovec_in[src].iov_len) { memcpy (&iovec[dest++], &iovec_in[src], sizeof (struct iovec)); } } iov_len = dest; max_packet_size = TOTEMPG_PACKET_SIZE - (sizeof (unsigned short) * (mcast_packed_msg_count + 1)); mcast_packed_msg_lens[mcast_packed_msg_count] = 0; /* * Check if we would overwrite new message queue */ for (i = 0; i < iov_len; i++) { total_size += iovec[i].iov_len; } if (byte_count_send_ok (total_size + sizeof(unsigned short) * (mcast_packed_msg_count)) == 0) { pthread_mutex_unlock (&mcast_msg_mutex); return(-1); } mcast.header.version = 0; for (i = 0; i < iov_len; ) { mcast.fragmented = 0; mcast.continuation = fragment_continuation; copy_len = iovec[i].iov_len - copy_base; /* * If it all fits with room left over, copy it in. * We need to leave at least sizeof(short) + 1 bytes in the * fragment_buffer on exit so that max_packet_size + fragment_size * doesn't exceed the size of the fragment_buffer on the next call. */ if ((copy_len + fragment_size) < (max_packet_size - sizeof (unsigned short))) { memcpy (&fragmentation_data[fragment_size], (char *)iovec[i].iov_base + copy_base, copy_len); fragment_size += copy_len; mcast_packed_msg_lens[mcast_packed_msg_count] += copy_len; next_fragment = 1; copy_len = 0; copy_base = 0; i++; continue; /* * If it just fits or is too big, then send out what fits. */ } else { unsigned char *data_ptr; copy_len = min(copy_len, max_packet_size - fragment_size); if( copy_len == max_packet_size ) data_ptr = (unsigned char *)iovec[i].iov_base + copy_base; else { data_ptr = fragmentation_data; memcpy (&fragmentation_data[fragment_size], (unsigned char *)iovec[i].iov_base + copy_base, copy_len); } memcpy (&fragmentation_data[fragment_size], (unsigned char *)iovec[i].iov_base + copy_base, copy_len); mcast_packed_msg_lens[mcast_packed_msg_count] += copy_len; /* * if we're not on the last iovec or the iovec is too large to * fit, then indicate a fragment. This also means that the next * message will have the continuation of this one. */ if ((i < (iov_len - 1)) || ((copy_base + copy_len) < iovec[i].iov_len)) { if (!next_fragment) { next_fragment++; } fragment_continuation = next_fragment; mcast.fragmented = next_fragment++; assert(fragment_continuation != 0); assert(mcast.fragmented != 0); } else { fragment_continuation = 0; } /* * assemble the message and send it */ mcast.msg_count = ++mcast_packed_msg_count; iovecs[0].iov_base = (void *)&mcast; iovecs[0].iov_len = sizeof(struct totempg_mcast); iovecs[1].iov_base = (void *)mcast_packed_msg_lens; iovecs[1].iov_len = mcast_packed_msg_count * sizeof(unsigned short); iovecs[2].iov_base = (void *)data_ptr; iovecs[2].iov_len = max_packet_size; assert (totemmrp_avail() > 0); res = totemmrp_mcast (iovecs, 3, guarantee); if (res == -1) { goto error_exit; } /* * Recalculate counts and indexes for the next. */ mcast_packed_msg_lens[0] = 0; mcast_packed_msg_count = 0; fragment_size = 0; max_packet_size = TOTEMPG_PACKET_SIZE - (sizeof(unsigned short)); /* * If the iovec all fit, go to the next iovec */ if ((copy_base + copy_len) == iovec[i].iov_len) { copy_len = 0; copy_base = 0; i++; /* * Continue with the rest of the current iovec. */ } else { copy_base += copy_len; } } } /* * Bump only if we added message data. This may be zero if * the last buffer just fit into the fragmentation_data buffer * and we were at the last iovec. */ if (mcast_packed_msg_lens[mcast_packed_msg_count]) { mcast_packed_msg_count++; } error_exit: pthread_mutex_unlock (&mcast_msg_mutex); return (res); } /* * Determine if a message of msg_size could be queued */ static int msg_count_send_ok ( int msg_count) { int avail = 0; avail = totemmrp_avail (); totempg_stats.msg_queue_avail = avail; return ((avail - totempg_reserved) > msg_count); } static int byte_count_send_ok ( int byte_count) { unsigned int msg_count = 0; int avail = 0; avail = totemmrp_avail (); msg_count = (byte_count / (totempg_totem_config->net_mtu - sizeof (struct totempg_mcast) - 16)) + 1; return (avail >= msg_count); } static int send_reserve ( int msg_size) { unsigned int msg_count = 0; msg_count = (msg_size / (totempg_totem_config->net_mtu - sizeof (struct totempg_mcast) - 16)) + 1; totempg_reserved += msg_count; totempg_stats.msg_reserved = totempg_reserved; return (msg_count); } static void send_release ( int msg_count) { totempg_reserved -= msg_count; totempg_stats.msg_reserved = totempg_reserved; } int totempg_callback_token_create ( void **handle_out, enum totem_callback_token_type type, int delete, int (*callback_fn) (enum totem_callback_token_type type, const void *), const void *data) { unsigned int res; pthread_mutex_lock (&callback_token_mutex); res = totemmrp_callback_token_create (handle_out, type, delete, callback_fn, data); pthread_mutex_unlock (&callback_token_mutex); return (res); } void totempg_callback_token_destroy ( void *handle_out) { pthread_mutex_lock (&callback_token_mutex); totemmrp_callback_token_destroy (handle_out); pthread_mutex_unlock (&callback_token_mutex); } /* * vi: set autoindent tabstop=4 shiftwidth=4 : */ int totempg_groups_initialize ( hdb_handle_t *handle, void (*deliver_fn) ( unsigned int nodeid, const void *msg, unsigned int msg_len, int endian_conversion_required), void (*confchg_fn) ( enum totem_configuration_type configuration_type, const unsigned int *member_list, size_t member_list_entries, const unsigned int *left_list, size_t left_list_entries, const unsigned int *joined_list, size_t joined_list_entries, const struct memb_ring_id *ring_id)) { struct totempg_group_instance *instance; unsigned int res; pthread_mutex_lock (&totempg_mutex); res = hdb_handle_create (&totempg_groups_instance_database, sizeof (struct totempg_group_instance), handle); if (res != 0) { goto error_exit; } if (*handle > totempg_max_handle) { totempg_max_handle = *handle; } res = hdb_handle_get (&totempg_groups_instance_database, *handle, (void *)&instance); if (res != 0) { goto error_destroy; } instance->deliver_fn = deliver_fn; instance->confchg_fn = confchg_fn; instance->groups = 0; instance->groups_cnt = 0; instance->q_level = QB_LOOP_MED; hdb_handle_put (&totempg_groups_instance_database, *handle); pthread_mutex_unlock (&totempg_mutex); return (0); error_destroy: hdb_handle_destroy (&totempg_groups_instance_database, *handle); error_exit: pthread_mutex_unlock (&totempg_mutex); return (-1); } int totempg_groups_join ( hdb_handle_t handle, const struct totempg_group *groups, size_t group_cnt) { struct totempg_group_instance *instance; struct totempg_group *new_groups; unsigned int res; pthread_mutex_lock (&totempg_mutex); res = hdb_handle_get (&totempg_groups_instance_database, handle, (void *)&instance); if (res != 0) { goto error_exit; } new_groups = realloc (instance->groups, sizeof (struct totempg_group) * (instance->groups_cnt + group_cnt)); if (new_groups == 0) { res = ENOMEM; goto error_exit; } memcpy (&new_groups[instance->groups_cnt], groups, group_cnt * sizeof (struct totempg_group)); instance->groups = new_groups; instance->groups_cnt += group_cnt; hdb_handle_put (&totempg_groups_instance_database, handle); error_exit: pthread_mutex_unlock (&totempg_mutex); return (res); } int totempg_groups_leave ( hdb_handle_t handle, const struct totempg_group *groups, size_t group_cnt) { struct totempg_group_instance *instance; unsigned int res; pthread_mutex_lock (&totempg_mutex); res = hdb_handle_get (&totempg_groups_instance_database, handle, (void *)&instance); if (res != 0) { goto error_exit; } hdb_handle_put (&totempg_groups_instance_database, handle); error_exit: pthread_mutex_unlock (&totempg_mutex); return (res); } #define MAX_IOVECS_FROM_APP 32 #define MAX_GROUPS_PER_MSG 32 int totempg_groups_mcast_joined ( hdb_handle_t handle, const struct iovec *iovec, unsigned int iov_len, int guarantee) { struct totempg_group_instance *instance; unsigned short group_len[MAX_GROUPS_PER_MSG + 1]; struct iovec iovec_mcast[MAX_GROUPS_PER_MSG + 1 + MAX_IOVECS_FROM_APP]; int i; unsigned int res; pthread_mutex_lock (&totempg_mutex); res = hdb_handle_get (&totempg_groups_instance_database, handle, (void *)&instance); if (res != 0) { goto error_exit; } /* * Build group_len structure and the iovec_mcast structure */ group_len[0] = instance->groups_cnt; for (i = 0; i < instance->groups_cnt; i++) { group_len[i + 1] = instance->groups[i].group_len; iovec_mcast[i + 1].iov_len = instance->groups[i].group_len; iovec_mcast[i + 1].iov_base = (void *) instance->groups[i].group; } iovec_mcast[0].iov_len = (instance->groups_cnt + 1) * sizeof (unsigned short); iovec_mcast[0].iov_base = group_len; for (i = 0; i < iov_len; i++) { iovec_mcast[i + instance->groups_cnt + 1].iov_len = iovec[i].iov_len; iovec_mcast[i + instance->groups_cnt + 1].iov_base = iovec[i].iov_base; } res = mcast_msg (iovec_mcast, iov_len + instance->groups_cnt + 1, guarantee); hdb_handle_put (&totempg_groups_instance_database, handle); error_exit: pthread_mutex_unlock (&totempg_mutex); return (res); } static void check_q_level(struct totempg_group_instance *instance) { int32_t old_level; int32_t percent_used = 0; old_level = instance->q_level; percent_used = 100 - (totemmrp_avail () * 100 / 800); /*(1024*1024/1500)*/ if (percent_used > 90 && instance->q_level != TOTEM_Q_LEVEL_CRITICAL) { instance->q_level = TOTEM_Q_LEVEL_CRITICAL; } else if (percent_used < 30 && instance->q_level != TOTEM_Q_LEVEL_LOW) { instance->q_level = TOTEM_Q_LEVEL_LOW; } else if (percent_used > 40 && percent_used < 60 && instance->q_level != TOTEM_Q_LEVEL_GOOD) { instance->q_level = TOTEM_Q_LEVEL_GOOD; } else if (percent_used > 70 && percent_used < 80 && instance->q_level != TOTEM_Q_LEVEL_HIGH) { instance->q_level = TOTEM_Q_LEVEL_HIGH; } if (totem_queue_level_changed && old_level != instance->q_level) { totem_queue_level_changed(instance->q_level); } } void totempg_check_q_level(qb_handle_t handle) { struct totempg_group_instance *instance; if (hdb_handle_get (&totempg_groups_instance_database, handle, (void *)&instance) != 0) { return; } check_q_level(instance); hdb_handle_put (&totempg_groups_instance_database, handle); } int totempg_groups_joined_reserve ( hdb_handle_t handle, const struct iovec *iovec, unsigned int iov_len) { struct totempg_group_instance *instance; unsigned int size = 0; unsigned int i; unsigned int res; unsigned int reserved = 0; pthread_mutex_lock (&totempg_mutex); pthread_mutex_lock (&mcast_msg_mutex); res = hdb_handle_get (&totempg_groups_instance_database, handle, (void *)&instance); if (res != 0) { goto error_exit; } for (i = 0; i < instance->groups_cnt; i++) { size += instance->groups[i].group_len; } for (i = 0; i < iov_len; i++) { size += iovec[i].iov_len; } check_q_level(instance); if (size >= totempg_size_limit) { reserved = -1; goto error_put; } reserved = send_reserve (size); if (msg_count_send_ok (reserved) == 0) { send_release (reserved); reserved = 0; } error_put: hdb_handle_put (&totempg_groups_instance_database, handle); error_exit: pthread_mutex_unlock (&mcast_msg_mutex); pthread_mutex_unlock (&totempg_mutex); return (reserved); } int totempg_groups_joined_release (int msg_count) { pthread_mutex_lock (&totempg_mutex); pthread_mutex_lock (&mcast_msg_mutex); send_release (msg_count); pthread_mutex_unlock (&mcast_msg_mutex); pthread_mutex_unlock (&totempg_mutex); return 0; } int totempg_groups_mcast_groups ( hdb_handle_t handle, int guarantee, const struct totempg_group *groups, size_t groups_cnt, const struct iovec *iovec, unsigned int iov_len) { struct totempg_group_instance *instance; unsigned short group_len[MAX_GROUPS_PER_MSG + 1]; struct iovec iovec_mcast[MAX_GROUPS_PER_MSG + 1 + MAX_IOVECS_FROM_APP]; int i; unsigned int res; pthread_mutex_lock (&totempg_mutex); res = hdb_handle_get (&totempg_groups_instance_database, handle, (void *)&instance); if (res != 0) { goto error_exit; } /* * Build group_len structure and the iovec_mcast structure */ group_len[0] = groups_cnt; for (i = 0; i < groups_cnt; i++) { group_len[i + 1] = groups[i].group_len; iovec_mcast[i + 1].iov_len = groups[i].group_len; iovec_mcast[i + 1].iov_base = (void *) groups[i].group; } iovec_mcast[0].iov_len = (groups_cnt + 1) * sizeof (unsigned short); iovec_mcast[0].iov_base = group_len; for (i = 0; i < iov_len; i++) { iovec_mcast[i + groups_cnt + 1].iov_len = iovec[i].iov_len; iovec_mcast[i + groups_cnt + 1].iov_base = iovec[i].iov_base; } res = mcast_msg (iovec_mcast, iov_len + groups_cnt + 1, guarantee); hdb_handle_put (&totempg_groups_instance_database, handle); error_exit: pthread_mutex_unlock (&totempg_mutex); return (res); } /* * Returns -1 if error, 0 if can't send, 1 if can send the message */ int totempg_groups_send_ok_groups ( hdb_handle_t handle, const struct totempg_group *groups, size_t groups_cnt, const struct iovec *iovec, unsigned int iov_len) { struct totempg_group_instance *instance; unsigned int size = 0; unsigned int i; unsigned int res; pthread_mutex_lock (&totempg_mutex); res = hdb_handle_get (&totempg_groups_instance_database, handle, (void *)&instance); if (res != 0) { goto error_exit; } for (i = 0; i < groups_cnt; i++) { size += groups[i].group_len; } for (i = 0; i < iov_len; i++) { size += iovec[i].iov_len; } res = msg_count_send_ok (size); hdb_handle_put (&totempg_groups_instance_database, handle); error_exit: pthread_mutex_unlock (&totempg_mutex); return (res); } int totempg_ifaces_get ( unsigned int nodeid, struct totem_ip_address *interfaces, char ***status, unsigned int *iface_count) { int res; res = totemmrp_ifaces_get ( nodeid, interfaces, status, iface_count); return (res); } void totempg_event_signal (enum totem_event_type type, int value) { totemmrp_event_signal (type, value); } void* totempg_get_stats (void) { return &totempg_stats; } int totempg_crypto_set ( unsigned int type) { int res; res = totemmrp_crypto_set ( type); return (res); } int totempg_ring_reenable (void) { int res; res = totemmrp_ring_reenable (); return (res); } const char *totempg_ifaces_print (unsigned int nodeid) { static char iface_string[256 * INTERFACE_MAX]; char one_iface[64]; struct totem_ip_address interfaces[INTERFACE_MAX]; char **status; unsigned int iface_count; unsigned int i; int res; iface_string[0] = '\0'; res = totempg_ifaces_get (nodeid, interfaces, &status, &iface_count); if (res == -1) { return ("no interface found for nodeid"); } for (i = 0; i < iface_count; i++) { sprintf (one_iface, "r(%d) ip(%s) ", i, totemip_print (&interfaces[i])); strcat (iface_string, one_iface); } return (iface_string); } unsigned int totempg_my_nodeid_get (void) { return (totemmrp_my_nodeid_get()); } int totempg_my_family_get (void) { return (totemmrp_my_family_get()); } extern void totempg_service_ready_register ( void (*totem_service_ready) (void)) { totemmrp_service_ready_register (totem_service_ready); } void totempg_queue_level_register_callback (totem_queue_level_changed_fn fn) { totem_queue_level_changed = fn; } extern int totempg_member_add ( const struct totem_ip_address *member, int ring_no); extern int totempg_member_remove ( const struct totem_ip_address *member, int ring_no); diff --git a/exec/totemrrp.c b/exec/totemrrp.c index c67fdaba..8fe3ef7b 100644 --- a/exec/totemrrp.c +++ b/exec/totemrrp.c @@ -1,2097 +1,2096 @@ /* * Copyright (c) 2005 MontaVista Software, Inc. * Copyright (c) 2006-2009 Red Hat, Inc. * * All rights reserved. * * Author: Steven Dake (sdake@redhat.com) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define LOGSYS_UTILS_ONLY 1 #include #include "totemnet.h" #include "totemrrp.h" void rrp_deliver_fn ( void *context, const void *msg, unsigned int msg_len); void rrp_iface_change_fn ( void *context, const struct totem_ip_address *iface_addr); struct totemrrp_instance; struct passive_instance { struct totemrrp_instance *rrp_instance; unsigned int *faulty; unsigned int *token_recv_count; unsigned int *mcast_recv_count; unsigned char token[15000]; unsigned int token_len; qb_loop_timer_handle timer_expired_token; qb_loop_timer_handle timer_problem_decrementer; void *totemrrp_context; unsigned int token_xmit_iface; unsigned int msg_xmit_iface; }; struct active_instance { struct totemrrp_instance *rrp_instance; unsigned int *faulty; unsigned int *last_token_recv; unsigned int *counter_problems; unsigned char token[15000]; unsigned int token_len; unsigned int last_token_seq; qb_loop_timer_handle timer_expired_token; qb_loop_timer_handle timer_problem_decrementer; void *totemrrp_context; }; struct rrp_algo { const char *name; void * (*initialize) ( struct totemrrp_instance *rrp_instance, int interface_count); void (*mcast_recv) ( struct totemrrp_instance *instance, unsigned int iface_no, void *context, const void *msg, unsigned int msg_len); void (*mcast_noflush_send) ( struct totemrrp_instance *instance, const void *msg, unsigned int msg_len); void (*mcast_flush_send) ( struct totemrrp_instance *instance, const void *msg, unsigned int msg_len); void (*token_recv) ( struct totemrrp_instance *instance, unsigned int iface_no, void *context, const void *msg, unsigned int msg_len, unsigned int token_seqid); void (*token_send) ( struct totemrrp_instance *instance, const void *msg, unsigned int msg_len); void (*recv_flush) ( struct totemrrp_instance *instance); void (*send_flush) ( struct totemrrp_instance *instance); void (*iface_check) ( struct totemrrp_instance *instance); void (*processor_count_set) ( struct totemrrp_instance *instance, unsigned int processor_count); void (*token_target_set) ( struct totemrrp_instance *instance, struct totem_ip_address *token_target, unsigned int iface_no); void (*ring_reenable) ( struct totemrrp_instance *instance, unsigned int iface_no); int (*mcast_recv_empty) ( struct totemrrp_instance *instance); int (*member_add) ( struct totemrrp_instance *instance, const struct totem_ip_address *member, unsigned int iface_no); int (*member_remove) ( struct totemrrp_instance *instance, const struct totem_ip_address *member, unsigned int iface_no); }; struct totemrrp_instance { qb_loop_t *poll_handle; struct totem_interface *interfaces; struct rrp_algo *rrp_algo; void *context; char *status[INTERFACE_MAX]; void (*totemrrp_deliver_fn) ( void *context, const void *msg, unsigned int msg_len); void (*totemrrp_iface_change_fn) ( void *context, const struct totem_ip_address *iface_addr, unsigned int iface_no); void (*totemrrp_token_seqid_get) ( const void *msg, unsigned int *seqid, unsigned int *token_is); void (*totemrrp_target_set_completed) ( void *context); unsigned int (*totemrrp_msgs_missing) (void); /* * Function and data used to log messages */ int totemrrp_log_level_security; int totemrrp_log_level_error; int totemrrp_log_level_warning; int totemrrp_log_level_notice; int totemrrp_log_level_debug; int totemrrp_subsys_id; void (*totemrrp_log_printf) ( - unsigned int rec_ident, + int level, + int subsys, const char *function, const char *file, int line, - const char *format, ...)__attribute__((format(printf, 5, 6))); + const char *format, ...)__attribute__((format(printf, 6, 7))); void **net_handles; void *rrp_algo_instance; int interface_count; int processor_count; int my_nodeid; struct totem_config *totem_config; void *deliver_fn_context[INTERFACE_MAX]; qb_loop_timer_handle timer_active_test_ring_timeout[INTERFACE_MAX]; }; /* * None Replication Forward Declerations */ static void none_mcast_recv ( struct totemrrp_instance *instance, unsigned int iface_no, void *context, const void *msg, unsigned int msg_len); static void none_mcast_noflush_send ( struct totemrrp_instance *instance, const void *msg, unsigned int msg_len); static void none_mcast_flush_send ( struct totemrrp_instance *instance, const void *msg, unsigned int msg_len); static void none_token_recv ( struct totemrrp_instance *instance, unsigned int iface_no, void *context, const void *msg, unsigned int msg_len, unsigned int token_seqid); static void none_token_send ( struct totemrrp_instance *instance, const void *msg, unsigned int msg_len); static void none_recv_flush ( struct totemrrp_instance *instance); static void none_send_flush ( struct totemrrp_instance *instance); static void none_iface_check ( struct totemrrp_instance *instance); static void none_processor_count_set ( struct totemrrp_instance *instance, unsigned int processor_count_set); static void none_token_target_set ( struct totemrrp_instance *instance, struct totem_ip_address *token_target, unsigned int iface_no); static void none_ring_reenable ( struct totemrrp_instance *instance, unsigned int iface_no); static int none_mcast_recv_empty ( struct totemrrp_instance *instance); static int none_member_add ( struct totemrrp_instance *instance, const struct totem_ip_address *member, unsigned int iface_no); static int none_member_remove ( struct totemrrp_instance *instance, const struct totem_ip_address *member, unsigned int iface_no); /* * Passive Replication Forward Declerations */ static void *passive_instance_initialize ( struct totemrrp_instance *rrp_instance, int interface_count); static void passive_mcast_recv ( struct totemrrp_instance *instance, unsigned int iface_no, void *context, const void *msg, unsigned int msg_len); static void passive_mcast_noflush_send ( struct totemrrp_instance *instance, const void *msg, unsigned int msg_len); static void passive_mcast_flush_send ( struct totemrrp_instance *instance, const void *msg, unsigned int msg_len); static void passive_monitor ( struct totemrrp_instance *rrp_instance, unsigned int iface_no, int is_token_recv_count); static void passive_token_recv ( struct totemrrp_instance *instance, unsigned int iface_no, void *context, const void *msg, unsigned int msg_len, unsigned int token_seqid); static void passive_token_send ( struct totemrrp_instance *instance, const void *msg, unsigned int msg_len); static void passive_recv_flush ( struct totemrrp_instance *instance); static void passive_send_flush ( struct totemrrp_instance *instance); static void passive_iface_check ( struct totemrrp_instance *instance); static void passive_processor_count_set ( struct totemrrp_instance *instance, unsigned int processor_count_set); static void passive_token_target_set ( struct totemrrp_instance *instance, struct totem_ip_address *token_target, unsigned int iface_no); static void passive_ring_reenable ( struct totemrrp_instance *instance, unsigned int iface_no); static int passive_mcast_recv_empty ( struct totemrrp_instance *instance); static int passive_member_add ( struct totemrrp_instance *instance, const struct totem_ip_address *member, unsigned int iface_no); static int passive_member_remove ( struct totemrrp_instance *instance, const struct totem_ip_address *member, unsigned int iface_no); /* * Active Replication Forward Definitions */ static void *active_instance_initialize ( struct totemrrp_instance *rrp_instance, int interface_count); static void active_mcast_recv ( struct totemrrp_instance *instance, unsigned int iface_no, void *context, const void *msg, unsigned int msg_len); static void active_mcast_noflush_send ( struct totemrrp_instance *instance, const void *msg, unsigned int msg_len); static void active_mcast_flush_send ( struct totemrrp_instance *instance, const void *msg, unsigned int msg_len); static void active_token_recv ( struct totemrrp_instance *instance, unsigned int iface_no, void *context, const void *msg, unsigned int msg_len, unsigned int token_seqid); static void active_token_send ( struct totemrrp_instance *instance, const void *msg, unsigned int msg_len); static void active_recv_flush ( struct totemrrp_instance *instance); static void active_send_flush ( struct totemrrp_instance *instance); static void active_iface_check ( struct totemrrp_instance *instance); static void active_processor_count_set ( struct totemrrp_instance *instance, unsigned int processor_count_set); static void active_token_target_set ( struct totemrrp_instance *instance, struct totem_ip_address *token_target, unsigned int iface_no); static void active_ring_reenable ( struct totemrrp_instance *instance, unsigned int iface_no); static int active_mcast_recv_empty ( struct totemrrp_instance *instance); static int active_member_add ( struct totemrrp_instance *instance, const struct totem_ip_address *member, unsigned int iface_no); static int active_member_remove ( struct totemrrp_instance *instance, const struct totem_ip_address *member, unsigned int iface_no); static void active_timer_expired_token_start ( struct active_instance *active_instance); static void active_timer_expired_token_cancel ( struct active_instance *active_instance); static void active_timer_problem_decrementer_start ( struct active_instance *active_instance); static void active_timer_problem_decrementer_cancel ( struct active_instance *active_instance); /* * 0-5 reserved for totemsrp.c */ #define MESSAGE_TYPE_RING_TEST_ACTIVE 6 #define MESSAGE_TYPE_RING_TEST_ACTIVATE 7 #define ENDIAN_LOCAL 0xff22 /* * Rollover handling: * * ARR_SEQNO_START_TOKEN is the starting sequence number of last seen sequence * for a token for active redundand ring. This should remain zero, unless testing * overflow in which case 07fffff00 or 0xffffff00 are good starting values. * It should be same as on defined in totemsrp.c */ #define ARR_SEQNO_START_TOKEN 0x0 /* * These can be used ot test different rollover points * #define ARR_SEQNO_START_MSG 0xfffffe00 */ /* * Threshold value when recv_count for passive rrp should be adjusted. * Set this value to some smaller for testing of adjusting proper * functionality. Also keep in mind that this value must be smaller * then rrp_problem_count_threshold */ #define PASSIVE_RECV_COUNT_THRESHOLD (INT_MAX / 2) struct message_header { char type; char encapsulated; unsigned short endian_detector; int ring_number; int nodeid_activator; } __attribute__((packed)); struct deliver_fn_context { struct totemrrp_instance *instance; void *context; int iface_no; }; struct rrp_algo none_algo = { .name = "none", .initialize = NULL, .mcast_recv = none_mcast_recv, .mcast_noflush_send = none_mcast_noflush_send, .mcast_flush_send = none_mcast_flush_send, .token_recv = none_token_recv, .token_send = none_token_send, .recv_flush = none_recv_flush, .send_flush = none_send_flush, .iface_check = none_iface_check, .processor_count_set = none_processor_count_set, .token_target_set = none_token_target_set, .ring_reenable = none_ring_reenable, .mcast_recv_empty = none_mcast_recv_empty, .member_add = none_member_add, .member_remove = none_member_remove }; struct rrp_algo passive_algo = { .name = "passive", .initialize = passive_instance_initialize, .mcast_recv = passive_mcast_recv, .mcast_noflush_send = passive_mcast_noflush_send, .mcast_flush_send = passive_mcast_flush_send, .token_recv = passive_token_recv, .token_send = passive_token_send, .recv_flush = passive_recv_flush, .send_flush = passive_send_flush, .iface_check = passive_iface_check, .processor_count_set = passive_processor_count_set, .token_target_set = passive_token_target_set, .ring_reenable = passive_ring_reenable, .mcast_recv_empty = passive_mcast_recv_empty, .member_add = passive_member_add, .member_remove = passive_member_remove }; struct rrp_algo active_algo = { .name = "active", .initialize = active_instance_initialize, .mcast_recv = active_mcast_recv, .mcast_noflush_send = active_mcast_noflush_send, .mcast_flush_send = active_mcast_flush_send, .token_recv = active_token_recv, .token_send = active_token_send, .recv_flush = active_recv_flush, .send_flush = active_send_flush, .iface_check = active_iface_check, .processor_count_set = active_processor_count_set, .token_target_set = active_token_target_set, .ring_reenable = active_ring_reenable, .mcast_recv_empty = active_mcast_recv_empty, .member_add = active_member_add, .member_remove = active_member_remove }; struct rrp_algo *rrp_algos[] = { &none_algo, &passive_algo, &active_algo }; #define RRP_ALGOS_COUNT 3 -#define log_printf(level, format, args...) \ -do { \ - rrp_instance->totemrrp_log_printf ( \ - LOGSYS_ENCODE_RECID(level, \ - rrp_instance->totemrrp_subsys_id, \ - LOGSYS_RECID_LOG), \ - __FUNCTION__, __FILE__, __LINE__, \ - format, ##args); \ +#define log_printf(level, format, args...) \ +do { \ + rrp_instance->totemrrp_log_printf ( \ + level, rrp_instance->totemrrp_subsys_id, \ + __FUNCTION__, __FILE__, __LINE__, \ + format, ##args); \ } while (0); static void test_active_msg_endian_convert(const struct message_header *in, struct message_header *out) { out->type = in->type; out->encapsulated = in->encapsulated; out->endian_detector = ENDIAN_LOCAL; out->ring_number = swab32 (in->ring_number); out->nodeid_activator = swab32(in->nodeid_activator); } static void timer_function_test_ring_timeout (void *context) { struct deliver_fn_context *deliver_fn_context = (struct deliver_fn_context *)context; struct totemrrp_instance *rrp_instance = deliver_fn_context->instance; unsigned int *faulty = NULL; int iface_no = deliver_fn_context->iface_no; struct message_header msg = { .type = MESSAGE_TYPE_RING_TEST_ACTIVE, .endian_detector = ENDIAN_LOCAL, }; if (strcmp(rrp_instance->totem_config->rrp_mode, "active") == 0) faulty = ((struct active_instance *)(rrp_instance->rrp_algo_instance))->faulty; if (strcmp(rrp_instance->totem_config->rrp_mode, "passive") == 0) faulty = ((struct passive_instance *)(rrp_instance->rrp_algo_instance))->faulty; assert (faulty != NULL); if (faulty[iface_no] == 1) { msg.ring_number = iface_no; msg.nodeid_activator = rrp_instance->my_nodeid; totemnet_token_send ( rrp_instance->net_handles[iface_no], &msg, sizeof (struct message_header)); qb_loop_timer_add (rrp_instance->poll_handle, QB_LOOP_MED, rrp_instance->totem_config->rrp_autorecovery_check_timeout*QB_TIME_NS_IN_MSEC, (void *)deliver_fn_context, timer_function_test_ring_timeout, &rrp_instance->timer_active_test_ring_timeout[iface_no]); } } /* * None Replication Implementation */ static void none_mcast_recv ( struct totemrrp_instance *rrp_instance, unsigned int iface_no, void *context, const void *msg, unsigned int msg_len) { rrp_instance->totemrrp_deliver_fn ( context, msg, msg_len); } static void none_mcast_flush_send ( struct totemrrp_instance *instance, const void *msg, unsigned int msg_len) { totemnet_mcast_flush_send (instance->net_handles[0], msg, msg_len); } static void none_mcast_noflush_send ( struct totemrrp_instance *instance, const void *msg, unsigned int msg_len) { totemnet_mcast_noflush_send (instance->net_handles[0], msg, msg_len); } static void none_token_recv ( struct totemrrp_instance *rrp_instance, unsigned int iface_no, void *context, const void *msg, unsigned int msg_len, unsigned int token_seq) { rrp_instance->totemrrp_deliver_fn ( context, msg, msg_len); } static void none_token_send ( struct totemrrp_instance *instance, const void *msg, unsigned int msg_len) { totemnet_token_send ( instance->net_handles[0], msg, msg_len); } static void none_recv_flush (struct totemrrp_instance *instance) { totemnet_recv_flush (instance->net_handles[0]); } static void none_send_flush (struct totemrrp_instance *instance) { totemnet_send_flush (instance->net_handles[0]); } static void none_iface_check (struct totemrrp_instance *instance) { totemnet_iface_check (instance->net_handles[0]); } static void none_processor_count_set ( struct totemrrp_instance *instance, unsigned int processor_count) { totemnet_processor_count_set (instance->net_handles[0], processor_count); } static void none_token_target_set ( struct totemrrp_instance *instance, struct totem_ip_address *token_target, unsigned int iface_no) { totemnet_token_target_set (instance->net_handles[0], token_target); } static void none_ring_reenable ( struct totemrrp_instance *instance, unsigned int iface_no) { /* * No operation */ } static int none_mcast_recv_empty ( struct totemrrp_instance *instance) { int res; res = totemnet_recv_mcast_empty (instance->net_handles[0]); return (res); } static int none_member_add ( struct totemrrp_instance *instance, const struct totem_ip_address *member, unsigned int iface_no) { int res; res = totemnet_member_add (instance->net_handles[0], member); return (res); } static int none_member_remove ( struct totemrrp_instance *instance, const struct totem_ip_address *member, unsigned int iface_no) { int res; res = totemnet_member_remove (instance->net_handles[0], member); return (res); } /* * Passive Replication Implementation */ void *passive_instance_initialize ( struct totemrrp_instance *rrp_instance, int interface_count) { struct passive_instance *instance; instance = malloc (sizeof (struct passive_instance)); if (instance == 0) { goto error_exit; } memset (instance, 0, sizeof (struct passive_instance)); instance->faulty = malloc (sizeof (int) * interface_count); if (instance->faulty == 0) { free (instance); instance = 0; goto error_exit; } memset (instance->faulty, 0, sizeof (int) * interface_count); instance->token_recv_count = malloc (sizeof (int) * interface_count); if (instance->token_recv_count == 0) { free (instance->faulty); free (instance); instance = 0; goto error_exit; } memset (instance->token_recv_count, 0, sizeof (int) * interface_count); instance->mcast_recv_count = malloc (sizeof (int) * interface_count); if (instance->mcast_recv_count == 0) { free (instance->token_recv_count); free (instance->faulty); free (instance); instance = 0; goto error_exit; } memset (instance->mcast_recv_count, 0, sizeof (int) * interface_count); error_exit: return ((void *)instance); } static void timer_function_passive_token_expired (void *context) { struct passive_instance *passive_instance = (struct passive_instance *)context; struct totemrrp_instance *rrp_instance = passive_instance->rrp_instance; rrp_instance->totemrrp_deliver_fn ( passive_instance->totemrrp_context, passive_instance->token, passive_instance->token_len); } /* TODO static void timer_function_passive_problem_decrementer (void *context) { // struct passive_instance *passive_instance = (struct passive_instance *)context; // struct totemrrp_instance *rrp_instance = passive_instance->rrp_instance; } */ static void passive_timer_expired_token_start ( struct passive_instance *passive_instance) { qb_loop_timer_add ( passive_instance->rrp_instance->poll_handle, QB_LOOP_MED, passive_instance->rrp_instance->totem_config->rrp_token_expired_timeout*QB_TIME_NS_IN_MSEC, (void *)passive_instance, timer_function_passive_token_expired, &passive_instance->timer_expired_token); } static void passive_timer_expired_token_cancel ( struct passive_instance *passive_instance) { qb_loop_timer_del ( passive_instance->rrp_instance->poll_handle, passive_instance->timer_expired_token); } /* static void passive_timer_problem_decrementer_start ( struct passive_instance *passive_instance) { qb_loop_timer_add ( QB_LOOP_MED, passive_instance->rrp_instance->poll_handle, passive_instance->rrp_instance->totem_config->rrp_problem_count_timeout*QB_TIME_NS_IN_MSEC, (void *)passive_instance, timer_function_passive_problem_decrementer, &passive_instance->timer_problem_decrementer); } static void passive_timer_problem_decrementer_cancel ( struct passive_instance *passive_instance) { qb_loop_timer_del ( passive_instance->rrp_instance->poll_handle, passive_instance->timer_problem_decrementer); } */ /* * Monitor function implementation from rrp paper. * rrp_instance is passive rrp instance, iface_no is interface with received messgae/token and * is_token_recv_count is boolean variable which donates if message is token (>1) or regular * message (= 0) */ static void passive_monitor ( struct totemrrp_instance *rrp_instance, unsigned int iface_no, int is_token_recv_count) { struct passive_instance *passive_instance = (struct passive_instance *)rrp_instance->rrp_algo_instance; unsigned int *recv_count; unsigned int max; unsigned int i; unsigned int min_all, min_active; /* * Monitor for failures */ if (is_token_recv_count) { recv_count = passive_instance->token_recv_count; } else { recv_count = passive_instance->mcast_recv_count; } recv_count[iface_no] += 1; max = 0; for (i = 0; i < rrp_instance->interface_count; i++) { if (max < recv_count[i]) { max = recv_count[i]; } } /* * Max is larger then threshold -> start adjusting process */ if (max > PASSIVE_RECV_COUNT_THRESHOLD) { min_all = min_active = recv_count[iface_no]; for (i = 0; i < rrp_instance->interface_count; i++) { if (recv_count[i] < min_all) { min_all = recv_count[i]; } if (passive_instance->faulty[i] == 0 && recv_count[i] < min_active) { min_active = recv_count[i]; } } if (min_all > 0) { /* * There is one or more faulty device with recv_count > 0 */ for (i = 0; i < rrp_instance->interface_count; i++) { recv_count[i] -= min_all; } } else { /* * No faulty device with recv_count > 0, adjust only active * devices */ for (i = 0; i < rrp_instance->interface_count; i++) { if (passive_instance->faulty[i] == 0) { recv_count[i] -= min_active; } } } /* * Find again max */ max = 0; for (i = 0; i < rrp_instance->interface_count; i++) { if (max < recv_count[i]) { max = recv_count[i]; } } } for (i = 0; i < rrp_instance->interface_count; i++) { if ((passive_instance->faulty[i] == 0) && (max - recv_count[i] > rrp_instance->totem_config->rrp_problem_count_threshold)) { passive_instance->faulty[i] = 1; qb_loop_timer_add (rrp_instance->poll_handle, QB_LOOP_MED, rrp_instance->totem_config->rrp_autorecovery_check_timeout*QB_TIME_NS_IN_MSEC, rrp_instance->deliver_fn_context[i], timer_function_test_ring_timeout, &rrp_instance->timer_active_test_ring_timeout[i]); sprintf (rrp_instance->status[i], "Marking ringid %u interface %s FAULTY", i, totemnet_iface_print (rrp_instance->net_handles[i])); log_printf ( rrp_instance->totemrrp_log_level_error, "%s", rrp_instance->status[i]); } } } static void passive_mcast_recv ( struct totemrrp_instance *rrp_instance, unsigned int iface_no, void *context, const void *msg, unsigned int msg_len) { struct passive_instance *passive_instance = (struct passive_instance *)rrp_instance->rrp_algo_instance; rrp_instance->totemrrp_deliver_fn ( context, msg, msg_len); if (rrp_instance->totemrrp_msgs_missing() == 0 && passive_instance->timer_expired_token) { /* * Delivers the last token */ rrp_instance->totemrrp_deliver_fn ( passive_instance->totemrrp_context, passive_instance->token, passive_instance->token_len); passive_timer_expired_token_cancel (passive_instance); } passive_monitor (rrp_instance, iface_no, 0); } static void passive_mcast_flush_send ( struct totemrrp_instance *instance, const void *msg, unsigned int msg_len) { struct passive_instance *passive_instance = (struct passive_instance *)instance->rrp_algo_instance; do { passive_instance->msg_xmit_iface = (passive_instance->msg_xmit_iface + 1) % instance->interface_count; } while (passive_instance->faulty[passive_instance->msg_xmit_iface] == 1); totemnet_mcast_flush_send (instance->net_handles[passive_instance->msg_xmit_iface], msg, msg_len); } static void passive_mcast_noflush_send ( struct totemrrp_instance *instance, const void *msg, unsigned int msg_len) { struct passive_instance *passive_instance = (struct passive_instance *)instance->rrp_algo_instance; do { passive_instance->msg_xmit_iface = (passive_instance->msg_xmit_iface + 1) % instance->interface_count; } while (passive_instance->faulty[passive_instance->msg_xmit_iface] == 1); totemnet_mcast_noflush_send (instance->net_handles[passive_instance->msg_xmit_iface], msg, msg_len); } static void passive_token_recv ( struct totemrrp_instance *rrp_instance, unsigned int iface_no, void *context, const void *msg, unsigned int msg_len, unsigned int token_seq) { struct passive_instance *passive_instance = (struct passive_instance *)rrp_instance->rrp_algo_instance; passive_instance->totemrrp_context = context; // this should be in totemrrp_instance ? TODO if (rrp_instance->totemrrp_msgs_missing() == 0) { rrp_instance->totemrrp_deliver_fn ( context, msg, msg_len); } else { memcpy (passive_instance->token, msg, msg_len); passive_timer_expired_token_start (passive_instance); } passive_monitor (rrp_instance, iface_no, 1); } static void passive_token_send ( struct totemrrp_instance *instance, const void *msg, unsigned int msg_len) { struct passive_instance *passive_instance = (struct passive_instance *)instance->rrp_algo_instance; do { passive_instance->token_xmit_iface = (passive_instance->token_xmit_iface + 1) % instance->interface_count; } while (passive_instance->faulty[passive_instance->token_xmit_iface] == 1); totemnet_token_send ( instance->net_handles[passive_instance->token_xmit_iface], msg, msg_len); } static void passive_recv_flush (struct totemrrp_instance *instance) { struct passive_instance *rrp_algo_instance = (struct passive_instance *)instance->rrp_algo_instance; unsigned int i; for (i = 0; i < instance->interface_count; i++) { if (rrp_algo_instance->faulty[i] == 0) { totemnet_recv_flush (instance->net_handles[i]); } } } static void passive_send_flush (struct totemrrp_instance *instance) { struct passive_instance *rrp_algo_instance = (struct passive_instance *)instance->rrp_algo_instance; unsigned int i; for (i = 0; i < instance->interface_count; i++) { if (rrp_algo_instance->faulty[i] == 0) { totemnet_send_flush (instance->net_handles[i]); } } } static void passive_iface_check (struct totemrrp_instance *instance) { struct passive_instance *rrp_algo_instance = (struct passive_instance *)instance->rrp_algo_instance; unsigned int i; for (i = 0; i < instance->interface_count; i++) { if (rrp_algo_instance->faulty[i] == 0) { totemnet_iface_check (instance->net_handles[i]); } } } static void passive_processor_count_set ( struct totemrrp_instance *instance, unsigned int processor_count) { struct passive_instance *rrp_algo_instance = (struct passive_instance *)instance->rrp_algo_instance; unsigned int i; for (i = 0; i < instance->interface_count; i++) { if (rrp_algo_instance->faulty[i] == 0) { totemnet_processor_count_set (instance->net_handles[i], processor_count); } } } static void passive_token_target_set ( struct totemrrp_instance *instance, struct totem_ip_address *token_target, unsigned int iface_no) { totemnet_token_target_set (instance->net_handles[iface_no], token_target); } static int passive_mcast_recv_empty ( struct totemrrp_instance *instance) { int res; int msgs_emptied = 0; int i; for (i = 0; i < instance->interface_count; i++) { res = totemnet_recv_mcast_empty (instance->net_handles[i]); if (res == -1) { return (-1); } if (res == 1) { msgs_emptied = 1; } } return (msgs_emptied); } static int passive_member_add ( struct totemrrp_instance *instance, const struct totem_ip_address *member, unsigned int iface_no) { int res; res = totemnet_member_add (instance->net_handles[iface_no], member); return (res); } static int passive_member_remove ( struct totemrrp_instance *instance, const struct totem_ip_address *member, unsigned int iface_no) { int res; res = totemnet_member_remove (instance->net_handles[iface_no], member); return (res); } static void passive_ring_reenable ( struct totemrrp_instance *instance, unsigned int iface_no) { struct passive_instance *rrp_algo_instance = (struct passive_instance *)instance->rrp_algo_instance; memset (rrp_algo_instance->mcast_recv_count, 0, sizeof (unsigned int) * instance->interface_count); memset (rrp_algo_instance->token_recv_count, 0, sizeof (unsigned int) * instance->interface_count); if (iface_no == instance->interface_count) { memset (rrp_algo_instance->faulty, 0, sizeof (unsigned int) * instance->interface_count); } else { rrp_algo_instance->faulty[iface_no] = 0; } } /* * Active Replication Implementation */ void *active_instance_initialize ( struct totemrrp_instance *rrp_instance, int interface_count) { struct active_instance *instance; instance = malloc (sizeof (struct active_instance)); if (instance == 0) { goto error_exit; } memset (instance, 0, sizeof (struct active_instance)); instance->faulty = malloc (sizeof (int) * interface_count); if (instance->faulty == 0) { free (instance); instance = 0; goto error_exit; } memset (instance->faulty, 0, sizeof (unsigned int) * interface_count); instance->last_token_recv = malloc (sizeof (int) * interface_count); if (instance->last_token_recv == 0) { free (instance->faulty); free (instance); instance = 0; goto error_exit; } memset (instance->last_token_recv, 0, sizeof (unsigned int) * interface_count); instance->counter_problems = malloc (sizeof (int) * interface_count); if (instance->counter_problems == 0) { free (instance->last_token_recv); free (instance->faulty); free (instance); instance = 0; goto error_exit; } memset (instance->counter_problems, 0, sizeof (unsigned int) * interface_count); instance->timer_expired_token = 0; instance->timer_problem_decrementer = 0; instance->rrp_instance = rrp_instance; instance->last_token_seq = ARR_SEQNO_START_TOKEN - 1; error_exit: return ((void *)instance); } static void timer_function_active_problem_decrementer (void *context) { struct active_instance *active_instance = (struct active_instance *)context; struct totemrrp_instance *rrp_instance = active_instance->rrp_instance; unsigned int problem_found = 0; unsigned int i; for (i = 0; i < rrp_instance->interface_count; i++) { if (active_instance->counter_problems[i] > 0) { problem_found = 1; active_instance->counter_problems[i] -= 1; if (active_instance->counter_problems[i] == 0) { sprintf (rrp_instance->status[i], "ring %d active with no faults", i); } else { sprintf (rrp_instance->status[i], "Decrementing problem counter for iface %s to [%d of %d]", totemnet_iface_print (rrp_instance->net_handles[i]), active_instance->counter_problems[i], rrp_instance->totem_config->rrp_problem_count_threshold); } log_printf ( rrp_instance->totemrrp_log_level_warning, "%s", rrp_instance->status[i]); } } if (problem_found) { active_timer_problem_decrementer_start (active_instance); } else { active_instance->timer_problem_decrementer = 0; } } static void timer_function_active_token_expired (void *context) { struct active_instance *active_instance = (struct active_instance *)context; struct totemrrp_instance *rrp_instance = active_instance->rrp_instance; unsigned int i; for (i = 0; i < rrp_instance->interface_count; i++) { if (active_instance->last_token_recv[i] == 0) { active_instance->counter_problems[i] += 1; if (active_instance->timer_problem_decrementer == 0) { active_timer_problem_decrementer_start (active_instance); } sprintf (rrp_instance->status[i], "Incrementing problem counter for seqid %d iface %s to [%d of %d]", active_instance->last_token_seq, totemnet_iface_print (rrp_instance->net_handles[i]), active_instance->counter_problems[i], rrp_instance->totem_config->rrp_problem_count_threshold); log_printf ( rrp_instance->totemrrp_log_level_warning, "%s", rrp_instance->status[i]); } } for (i = 0; i < rrp_instance->interface_count; i++) { if (active_instance->counter_problems[i] >= rrp_instance->totem_config->rrp_problem_count_threshold) { active_instance->faulty[i] = 1; qb_loop_timer_add (rrp_instance->poll_handle, QB_LOOP_MED, rrp_instance->totem_config->rrp_autorecovery_check_timeout*QB_TIME_NS_IN_MSEC, rrp_instance->deliver_fn_context[i], timer_function_test_ring_timeout, &rrp_instance->timer_active_test_ring_timeout[i]); sprintf (rrp_instance->status[i], "Marking seqid %d ringid %u interface %s FAULTY", active_instance->last_token_seq, i, totemnet_iface_print (rrp_instance->net_handles[i])); log_printf ( rrp_instance->totemrrp_log_level_error, "%s", rrp_instance->status[i]); active_timer_problem_decrementer_cancel (active_instance); } } rrp_instance->totemrrp_deliver_fn ( active_instance->totemrrp_context, active_instance->token, active_instance->token_len); } static void active_timer_expired_token_start ( struct active_instance *active_instance) { qb_loop_timer_add ( active_instance->rrp_instance->poll_handle, QB_LOOP_MED, active_instance->rrp_instance->totem_config->rrp_token_expired_timeout*QB_TIME_NS_IN_MSEC, (void *)active_instance, timer_function_active_token_expired, &active_instance->timer_expired_token); } static void active_timer_expired_token_cancel ( struct active_instance *active_instance) { qb_loop_timer_del ( active_instance->rrp_instance->poll_handle, active_instance->timer_expired_token); } static void active_timer_problem_decrementer_start ( struct active_instance *active_instance) { qb_loop_timer_add ( active_instance->rrp_instance->poll_handle, QB_LOOP_MED, active_instance->rrp_instance->totem_config->rrp_problem_count_timeout*QB_TIME_NS_IN_MSEC, (void *)active_instance, timer_function_active_problem_decrementer, &active_instance->timer_problem_decrementer); } static void active_timer_problem_decrementer_cancel ( struct active_instance *active_instance) { qb_loop_timer_del ( active_instance->rrp_instance->poll_handle, active_instance->timer_problem_decrementer); } /* * active replication */ static void active_mcast_recv ( struct totemrrp_instance *instance, unsigned int iface_no, void *context, const void *msg, unsigned int msg_len) { instance->totemrrp_deliver_fn ( context, msg, msg_len); } static void active_mcast_flush_send ( struct totemrrp_instance *instance, const void *msg, unsigned int msg_len) { int i; struct active_instance *rrp_algo_instance = (struct active_instance *)instance->rrp_algo_instance; for (i = 0; i < instance->interface_count; i++) { if (rrp_algo_instance->faulty[i] == 0) { totemnet_mcast_flush_send (instance->net_handles[i], msg, msg_len); } } } static void active_mcast_noflush_send ( struct totemrrp_instance *instance, const void *msg, unsigned int msg_len) { int i; struct active_instance *rrp_algo_instance = (struct active_instance *)instance->rrp_algo_instance; for (i = 0; i < instance->interface_count; i++) { if (rrp_algo_instance->faulty[i] == 0) { totemnet_mcast_noflush_send (instance->net_handles[i], msg, msg_len); } } } static void active_token_recv ( struct totemrrp_instance *rrp_instance, unsigned int iface_no, void *context, const void *msg, unsigned int msg_len, unsigned int token_seq) { int i; struct active_instance *active_instance = (struct active_instance *)rrp_instance->rrp_algo_instance; active_instance->totemrrp_context = context; if (sq_lt_compare (active_instance->last_token_seq, token_seq)) { memcpy (active_instance->token, msg, msg_len); active_instance->token_len = msg_len; for (i = 0; i < rrp_instance->interface_count; i++) { active_instance->last_token_recv[i] = 0; } active_instance->last_token_recv[iface_no] = 1; active_timer_expired_token_start (active_instance); } /* * This doesn't follow spec because the spec assumes we will know * when token resets occur. */ active_instance->last_token_seq = token_seq; if (token_seq == active_instance->last_token_seq) { active_instance->last_token_recv[iface_no] = 1; for (i = 0; i < rrp_instance->interface_count; i++) { if ((active_instance->last_token_recv[i] == 0) && active_instance->faulty[i] == 0) { return; /* don't deliver token */ } } active_timer_expired_token_cancel (active_instance); rrp_instance->totemrrp_deliver_fn ( context, msg, msg_len); } } static void active_token_send ( struct totemrrp_instance *instance, const void *msg, unsigned int msg_len) { struct active_instance *rrp_algo_instance = (struct active_instance *)instance->rrp_algo_instance; int i; for (i = 0; i < instance->interface_count; i++) { if (rrp_algo_instance->faulty[i] == 0) { totemnet_token_send ( instance->net_handles[i], msg, msg_len); } } } static void active_recv_flush (struct totemrrp_instance *instance) { struct active_instance *rrp_algo_instance = (struct active_instance *)instance->rrp_algo_instance; unsigned int i; for (i = 0; i < instance->interface_count; i++) { if (rrp_algo_instance->faulty[i] == 0) { totemnet_recv_flush (instance->net_handles[i]); } } } static void active_send_flush (struct totemrrp_instance *instance) { struct active_instance *rrp_algo_instance = (struct active_instance *)instance->rrp_algo_instance; unsigned int i; for (i = 0; i < instance->interface_count; i++) { if (rrp_algo_instance->faulty[i] == 0) { totemnet_send_flush (instance->net_handles[i]); } } } static int active_member_add ( struct totemrrp_instance *instance, const struct totem_ip_address *member, unsigned int iface_no) { int res; res = totemnet_member_add (instance->net_handles[iface_no], member); return (res); } static int active_member_remove ( struct totemrrp_instance *instance, const struct totem_ip_address *member, unsigned int iface_no) { int res; res = totemnet_member_remove (instance->net_handles[iface_no], member); return (res); } static void active_iface_check (struct totemrrp_instance *instance) { struct active_instance *rrp_algo_instance = (struct active_instance *)instance->rrp_algo_instance; unsigned int i; for (i = 0; i < instance->interface_count; i++) { if (rrp_algo_instance->faulty[i] == 0) { totemnet_iface_check (instance->net_handles[i]); } } } static void active_processor_count_set ( struct totemrrp_instance *instance, unsigned int processor_count) { struct active_instance *rrp_algo_instance = (struct active_instance *)instance->rrp_algo_instance; unsigned int i; for (i = 0; i < instance->interface_count; i++) { if (rrp_algo_instance->faulty[i] == 0) { totemnet_processor_count_set (instance->net_handles[i], processor_count); } } } static void active_token_target_set ( struct totemrrp_instance *instance, struct totem_ip_address *token_target, unsigned int iface_no) { totemnet_token_target_set (instance->net_handles[iface_no], token_target); } static int active_mcast_recv_empty ( struct totemrrp_instance *instance) { int res; int msgs_emptied = 0; int i; for (i = 0; i < instance->interface_count; i++) { res = totemnet_recv_mcast_empty (instance->net_handles[i]); if (res == -1) { return (-1); } if (res == 1) { msgs_emptied = 1; } } return (msgs_emptied); } static void active_ring_reenable ( struct totemrrp_instance *instance, unsigned int iface_no) { struct active_instance *rrp_algo_instance = (struct active_instance *)instance->rrp_algo_instance; if (iface_no == instance->interface_count) { memset (rrp_algo_instance->last_token_recv, 0, sizeof (unsigned int) * instance->interface_count); memset (rrp_algo_instance->faulty, 0, sizeof (unsigned int) * instance->interface_count); memset (rrp_algo_instance->counter_problems, 0, sizeof (unsigned int) * instance->interface_count); } else { rrp_algo_instance->last_token_recv[iface_no] = 0; rrp_algo_instance->faulty[iface_no] = 0; rrp_algo_instance->counter_problems[iface_no] = 0; } } static void totemrrp_instance_initialize (struct totemrrp_instance *instance) { memset (instance, 0, sizeof (struct totemrrp_instance)); } static int totemrrp_algorithm_set ( struct totem_config *totem_config, struct totemrrp_instance *instance) { unsigned int res = -1; unsigned int i; for (i = 0; i < RRP_ALGOS_COUNT; i++) { if (strcmp (totem_config->rrp_mode, rrp_algos[i]->name) == 0) { instance->rrp_algo = rrp_algos[i]; if (rrp_algos[i]->initialize) { instance->rrp_algo_instance = rrp_algos[i]->initialize ( instance, totem_config->interface_count); } res = 0; break; } } for (i = 0; i < totem_config->interface_count; i++) { instance->status[i] = malloc (1024); sprintf (instance->status[i], "ring %d active with no faults", i); } return (res); } void rrp_deliver_fn ( void *context, const void *msg, unsigned int msg_len) { unsigned int token_seqid; unsigned int token_is; struct deliver_fn_context *deliver_fn_context = (struct deliver_fn_context *)context; struct totemrrp_instance *rrp_instance = deliver_fn_context->instance; const struct message_header *hdr = msg; struct message_header tmp_msg, activate_msg; memset(&tmp_msg, 0, sizeof(struct message_header)); memset(&activate_msg, 0, sizeof(struct message_header)); rrp_instance->totemrrp_token_seqid_get ( msg, &token_seqid, &token_is); if (hdr->type == MESSAGE_TYPE_RING_TEST_ACTIVE) { log_printf ( rrp_instance->totemrrp_log_level_debug, "received message requesting test of ring now active\n"); if (hdr->endian_detector != ENDIAN_LOCAL) { test_active_msg_endian_convert(hdr, &tmp_msg); hdr = &tmp_msg; } if (hdr->nodeid_activator == rrp_instance->my_nodeid) { /* * Send an activate message */ activate_msg.type = MESSAGE_TYPE_RING_TEST_ACTIVATE; activate_msg.endian_detector = ENDIAN_LOCAL; activate_msg.ring_number = hdr->ring_number; activate_msg.nodeid_activator = rrp_instance->my_nodeid; totemnet_token_send ( rrp_instance->net_handles[deliver_fn_context->iface_no], &activate_msg, sizeof (struct message_header)); } else { /* * Send a ring test message */ totemnet_token_send ( rrp_instance->net_handles[deliver_fn_context->iface_no], msg, msg_len); } } else if (hdr->type == MESSAGE_TYPE_RING_TEST_ACTIVATE) { log_printf ( rrp_instance->totemrrp_log_level_notice, "Automatically recovered ring %d\n", hdr->ring_number); if (hdr->endian_detector != ENDIAN_LOCAL) { test_active_msg_endian_convert(hdr, &tmp_msg); hdr = &tmp_msg; } totemrrp_ring_reenable (rrp_instance, deliver_fn_context->iface_no); if (hdr->nodeid_activator != rrp_instance->my_nodeid) { totemnet_token_send ( rrp_instance->net_handles[deliver_fn_context->iface_no], msg, msg_len); } } else if (token_is) { /* * Deliver to the token receiver for this rrp algorithm */ rrp_instance->rrp_algo->token_recv ( rrp_instance, deliver_fn_context->iface_no, deliver_fn_context->context, msg, msg_len, token_seqid); } else { /* * Deliver to the mcast receiver for this rrp algorithm */ rrp_instance->rrp_algo->mcast_recv ( rrp_instance, deliver_fn_context->iface_no, deliver_fn_context->context, msg, msg_len); } } void rrp_iface_change_fn ( void *context, const struct totem_ip_address *iface_addr) { struct deliver_fn_context *deliver_fn_context = (struct deliver_fn_context *)context; deliver_fn_context->instance->my_nodeid = iface_addr->nodeid; deliver_fn_context->instance->totemrrp_iface_change_fn ( deliver_fn_context->context, iface_addr, deliver_fn_context->iface_no); } int totemrrp_finalize ( void *rrp_context) { struct totemrrp_instance *instance = (struct totemrrp_instance *)rrp_context; int i; for (i = 0; i < instance->interface_count; i++) { totemnet_finalize (instance->net_handles[i]); } return (0); } static void rrp_target_set_completed (void *context) { struct deliver_fn_context *deliver_fn_context = (struct deliver_fn_context *)context; deliver_fn_context->instance->totemrrp_target_set_completed (deliver_fn_context->context); } /* * Totem Redundant Ring interface * depends on poll abstraction, POSIX, IPV4 */ /* * Create an instance */ int totemrrp_initialize ( qb_loop_t *poll_handle, void **rrp_context, struct totem_config *totem_config, void *context, void (*deliver_fn) ( void *context, const void *msg, unsigned int msg_len), void (*iface_change_fn) ( void *context, const struct totem_ip_address *iface_addr, unsigned int iface_no), void (*token_seqid_get) ( const void *msg, unsigned int *seqid, unsigned int *token_is), unsigned int (*msgs_missing) (void), void (*target_set_completed) (void *context)) { struct totemrrp_instance *instance; unsigned int res; int i; instance = malloc (sizeof (struct totemrrp_instance)); if (instance == 0) { return (-1); } totemrrp_instance_initialize (instance); instance->totem_config = totem_config; res = totemrrp_algorithm_set ( instance->totem_config, instance); if (res == -1) { goto error_destroy; } /* * Configure logging */ instance->totemrrp_log_level_security = totem_config->totem_logging_configuration.log_level_security; instance->totemrrp_log_level_error = totem_config->totem_logging_configuration.log_level_error; instance->totemrrp_log_level_warning = totem_config->totem_logging_configuration.log_level_warning; instance->totemrrp_log_level_notice = totem_config->totem_logging_configuration.log_level_notice; instance->totemrrp_log_level_debug = totem_config->totem_logging_configuration.log_level_debug; instance->totemrrp_subsys_id = totem_config->totem_logging_configuration.log_subsys_id; instance->totemrrp_log_printf = totem_config->totem_logging_configuration.log_printf; instance->interfaces = totem_config->interfaces; instance->poll_handle = poll_handle; instance->totemrrp_deliver_fn = deliver_fn; instance->totemrrp_iface_change_fn = iface_change_fn; instance->totemrrp_token_seqid_get = token_seqid_get; instance->totemrrp_target_set_completed = target_set_completed; instance->totemrrp_msgs_missing = msgs_missing; instance->interface_count = totem_config->interface_count; instance->net_handles = malloc (sizeof (void *) * totem_config->interface_count); instance->context = context; instance->poll_handle = poll_handle; for (i = 0; i < totem_config->interface_count; i++) { struct deliver_fn_context *deliver_fn_context; deliver_fn_context = malloc (sizeof (struct deliver_fn_context)); assert (deliver_fn_context); deliver_fn_context->instance = instance; deliver_fn_context->context = context; deliver_fn_context->iface_no = i; instance->deliver_fn_context[i] = (void *)deliver_fn_context; totemnet_initialize ( poll_handle, &instance->net_handles[i], totem_config, i, (void *)deliver_fn_context, rrp_deliver_fn, rrp_iface_change_fn, rrp_target_set_completed); totemnet_net_mtu_adjust (instance->net_handles[i], totem_config); } *rrp_context = instance; return (0); error_destroy: free (instance); return (res); } void *totemrrp_buffer_alloc (void *rrp_context) { struct totemrrp_instance *instance = rrp_context; assert (instance != NULL); return totemnet_buffer_alloc (instance->net_handles[0]); } void totemrrp_buffer_release (void *rrp_context, void *ptr) { struct totemrrp_instance *instance = rrp_context; assert (instance != NULL); totemnet_buffer_release (instance->net_handles[0], ptr); } int totemrrp_processor_count_set ( void *rrp_context, unsigned int processor_count) { struct totemrrp_instance *instance = (struct totemrrp_instance *)rrp_context; instance->rrp_algo->processor_count_set (instance, processor_count); instance->processor_count = processor_count; return (0); } int totemrrp_token_target_set ( void *rrp_context, struct totem_ip_address *addr, unsigned int iface_no) { struct totemrrp_instance *instance = (struct totemrrp_instance *)rrp_context; instance->rrp_algo->token_target_set (instance, addr, iface_no); return (0); } int totemrrp_recv_flush (void *rrp_context) { struct totemrrp_instance *instance = (struct totemrrp_instance *)rrp_context; instance->rrp_algo->recv_flush (instance); return (0); } int totemrrp_send_flush (void *rrp_context) { struct totemrrp_instance *instance = (struct totemrrp_instance *)rrp_context; instance->rrp_algo->send_flush (instance); return (0); } int totemrrp_token_send ( void *rrp_context, const void *msg, unsigned int msg_len) { struct totemrrp_instance *instance = (struct totemrrp_instance *)rrp_context; instance->rrp_algo->token_send (instance, msg, msg_len); return (0); } int totemrrp_mcast_flush_send ( void *rrp_context, const void *msg, unsigned int msg_len) { struct totemrrp_instance *instance = (struct totemrrp_instance *)rrp_context; int res = 0; // TODO this needs to return the result instance->rrp_algo->mcast_flush_send (instance, msg, msg_len); return (res); } int totemrrp_mcast_noflush_send ( void *rrp_context, const void *msg, unsigned int msg_len) { struct totemrrp_instance *instance = (struct totemrrp_instance *)rrp_context; /* * merge detects go out through mcast_flush_send so it is safe to * flush these messages if we are only one processor. This avoids * an encryption/hmac and decryption/hmac */ if (instance->processor_count > 1) { // TODO this needs to return the result instance->rrp_algo->mcast_noflush_send (instance, msg, msg_len); } return (0); } int totemrrp_iface_check (void *rrp_context) { struct totemrrp_instance *instance = (struct totemrrp_instance *)rrp_context; instance->rrp_algo->iface_check (instance); return (0); } int totemrrp_ifaces_get ( void *rrp_context, char ***status, unsigned int *iface_count) { struct totemrrp_instance *instance = (struct totemrrp_instance *)rrp_context; *status = instance->status; if (iface_count) { *iface_count = instance->interface_count; } return (0); } int totemrrp_crypto_set ( void *rrp_context, unsigned int type) { struct totemrrp_instance *instance = (struct totemrrp_instance *)rrp_context; int res; res = totemnet_crypto_set(instance->net_handles[0], type); return (res); } /* * iface_no indicates the interface number [0, ..., interface_count-1] of the * specific ring which will be reenabled. We specify iface_no == interface_count * means reenabling all the rings. */ int totemrrp_ring_reenable ( void *rrp_context, unsigned int iface_no) { struct totemrrp_instance *instance = (struct totemrrp_instance *)rrp_context; int res = 0; unsigned int i; instance->rrp_algo->ring_reenable (instance, iface_no); if (iface_no == instance->interface_count) { for (i = 0; i < instance->interface_count; i++) { sprintf (instance->status[i], "ring %d active with no faults", i); } } else { sprintf (instance->status[iface_no], "ring %d active with no faults", iface_no); } return (res); } extern int totemrrp_mcast_recv_empty ( void *rrp_context) { struct totemrrp_instance *instance = (struct totemrrp_instance *)rrp_context; int res; res = instance->rrp_algo->mcast_recv_empty (instance); return (res); } int totemrrp_member_add ( void *rrp_context, const struct totem_ip_address *member, int iface_no) { struct totemrrp_instance *instance = (struct totemrrp_instance *)rrp_context; int res; res = instance->rrp_algo->member_add (instance, member, iface_no); return (res); } int totemrrp_member_remove ( void *rrp_context, const struct totem_ip_address *member, int iface_no) { struct totemrrp_instance *instance = (struct totemrrp_instance *)rrp_context; int res; res = instance->rrp_algo->member_remove (instance, member, iface_no); return (res); } diff --git a/exec/totemsrp.c b/exec/totemsrp.c index cb2d9152..e05d65bc 100644 --- a/exec/totemsrp.c +++ b/exec/totemsrp.c @@ -1,4489 +1,4497 @@ /* * Copyright (c) 2003-2006 MontaVista Software, Inc. * Copyright (c) 2006-2009 Red Hat, Inc. * * All rights reserved. * * Author: Steven Dake (sdake@redhat.com) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ /* * The first version of this code was based upon Yair Amir's PhD thesis: * http://www.cs.jhu.edu/~yairamir/phd.ps) (ch4,5). * * The current version of totemsrp implements the Totem protocol specified in: * http://citeseer.ist.psu.edu/amir95totem.html * * The deviations from the above published protocols are: * - encryption of message contents with SOBER128 * - authentication of meessage contents with SHA1/HMAC * - token hold mode where token doesn't rotate on unused ring - reduces cpu * usage on 1.6ghz xeon from 35% to less then .1 % as measured by top */ #include #include #ifdef HAVE_ALLOCA_H #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define LOGSYS_UTILS_ONLY 1 #include #include "totemsrp.h" #include "totemrrp.h" #include "totemnet.h" #include "crypto.h" #define LOCALHOST_IP inet_addr("127.0.0.1") #define QUEUE_RTR_ITEMS_SIZE_MAX 16384 /* allow 16384 retransmit items */ #define RETRANS_MESSAGE_QUEUE_SIZE_MAX 16384 /* allow 500 messages to be queued */ #define RECEIVED_MESSAGE_QUEUE_SIZE_MAX 500 /* allow 500 messages to be queued */ #define MAXIOVS 5 #define RETRANSMIT_ENTRIES_MAX 30 #define TOKEN_SIZE_MAX 64000 /* bytes */ #define LEAVE_DUMMY_NODEID 0 /* * Rollover handling: * SEQNO_START_MSG is the starting sequence number after a new configuration * This should remain zero, unless testing overflow in which case * 0x7ffff000 and 0xfffff000 are good starting values. * * SEQNO_START_TOKEN is the starting sequence number after a new configuration * for a token. This should remain zero, unless testing overflow in which * case 07fffff00 or 0xffffff00 are good starting values. * * SEQNO_START_MSG is the starting sequence number after a new configuration * This should remain zero, unless testing overflow in which case * 0x7ffff000 and 0xfffff000 are good values to start with */ #define SEQNO_START_MSG 0x0 #define SEQNO_START_TOKEN 0x0 /* * These can be used ot test different rollover points * #define SEQNO_START_MSG 0xfffffe00 * #define SEQNO_START_TOKEN 0xfffffe00 */ /* * These can be used to test the error recovery algorithms * #define TEST_DROP_ORF_TOKEN_PERCENTAGE 30 * #define TEST_DROP_COMMIT_TOKEN_PERCENTAGE 30 * #define TEST_DROP_MCAST_PERCENTAGE 50 * #define TEST_RECOVERY_MSG_COUNT 300 */ /* * we compare incoming messages to determine if their endian is * different - if so convert them * * do not change */ #define ENDIAN_LOCAL 0xff22 enum message_type { MESSAGE_TYPE_ORF_TOKEN = 0, /* Ordering, Reliability, Flow (ORF) control Token */ MESSAGE_TYPE_MCAST = 1, /* ring ordered multicast message */ MESSAGE_TYPE_MEMB_MERGE_DETECT = 2, /* merge rings if there are available rings */ MESSAGE_TYPE_MEMB_JOIN = 3, /* membership join message */ MESSAGE_TYPE_MEMB_COMMIT_TOKEN = 4, /* membership commit token */ MESSAGE_TYPE_TOKEN_HOLD_CANCEL = 5, /* cancel the holding of the token */ }; enum encapsulation_type { MESSAGE_ENCAPSULATED = 1, MESSAGE_NOT_ENCAPSULATED = 2 }; /* * New membership algorithm local variables */ struct srp_addr { struct totem_ip_address addr[INTERFACE_MAX]; }; struct consensus_list_item { struct srp_addr addr; int set; }; struct token_callback_instance { struct list_head list; int (*callback_fn) (enum totem_callback_token_type type, const void *); enum totem_callback_token_type callback_type; int delete; void *data; }; struct totemsrp_socket { int mcast; int token; }; struct message_header { char type; char encapsulated; unsigned short endian_detector; unsigned int nodeid; } __attribute__((packed)); struct mcast { struct message_header header; struct srp_addr system_from; unsigned int seq; int this_seqno; struct memb_ring_id ring_id; unsigned int node_id; int guarantee; } __attribute__((packed)); struct rtr_item { struct memb_ring_id ring_id; unsigned int seq; }__attribute__((packed)); struct orf_token { struct message_header header; unsigned int seq; unsigned int token_seq; unsigned int aru; unsigned int aru_addr; struct memb_ring_id ring_id; unsigned int backlog; unsigned int fcc; int retrans_flg; int rtr_list_entries; struct rtr_item rtr_list[0]; }__attribute__((packed)); struct memb_join { struct message_header header; struct srp_addr system_from; unsigned int proc_list_entries; unsigned int failed_list_entries; unsigned long long ring_seq; unsigned char end_of_memb_join[0]; /* * These parts of the data structure are dynamic: * struct srp_addr proc_list[]; * struct srp_addr failed_list[]; */ } __attribute__((packed)); struct memb_merge_detect { struct message_header header; struct srp_addr system_from; struct memb_ring_id ring_id; } __attribute__((packed)); struct token_hold_cancel { struct message_header header; struct memb_ring_id ring_id; } __attribute__((packed)); struct memb_commit_token_memb_entry { struct memb_ring_id ring_id; unsigned int aru; unsigned int high_delivered; unsigned int received_flg; }__attribute__((packed)); struct memb_commit_token { struct message_header header; unsigned int token_seq; struct memb_ring_id ring_id; unsigned int retrans_flg; int memb_index; int addr_entries; unsigned char end_of_commit_token[0]; /* * These parts of the data structure are dynamic: * * struct srp_addr addr[PROCESSOR_COUNT_MAX]; * struct memb_commit_token_memb_entry memb_list[PROCESSOR_COUNT_MAX]; */ }__attribute__((packed)); struct message_item { struct mcast *mcast; unsigned int msg_len; }; struct sort_queue_item { struct mcast *mcast; unsigned int msg_len; }; struct orf_token_mcast_thread_state { char iobuf[9000]; prng_state prng_state; }; enum memb_state { MEMB_STATE_OPERATIONAL = 1, MEMB_STATE_GATHER = 2, MEMB_STATE_COMMIT = 3, MEMB_STATE_RECOVERY = 4 }; struct totemsrp_instance { int iface_changes; int failed_to_recv; /* * Flow control mcasts and remcasts on last and current orf_token */ int fcc_remcast_last; int fcc_mcast_last; int fcc_remcast_current; struct consensus_list_item consensus_list[PROCESSOR_COUNT_MAX]; int consensus_list_entries; struct srp_addr my_id; struct srp_addr my_proc_list[PROCESSOR_COUNT_MAX]; struct srp_addr my_failed_list[PROCESSOR_COUNT_MAX]; struct srp_addr my_new_memb_list[PROCESSOR_COUNT_MAX]; struct srp_addr my_trans_memb_list[PROCESSOR_COUNT_MAX]; struct srp_addr my_memb_list[PROCESSOR_COUNT_MAX]; struct srp_addr my_deliver_memb_list[PROCESSOR_COUNT_MAX]; struct srp_addr my_left_memb_list[PROCESSOR_COUNT_MAX]; int my_proc_list_entries; int my_failed_list_entries; int my_new_memb_entries; int my_trans_memb_entries; int my_memb_entries; int my_deliver_memb_entries; int my_left_memb_entries; struct memb_ring_id my_ring_id; struct memb_ring_id my_old_ring_id; int my_aru_count; int my_merge_detect_timeout_outstanding; unsigned int my_last_aru; int my_seq_unchanged; int my_received_flg; unsigned int my_high_seq_received; unsigned int my_install_seq; int my_rotation_counter; int my_set_retrans_flg; int my_retrans_flg_count; unsigned int my_high_ring_delivered; int heartbeat_timeout; /* * Queues used to order, deliver, and recover messages */ struct cs_queue new_message_queue; struct cs_queue retrans_message_queue; struct sq regular_sort_queue; struct sq recovery_sort_queue; /* * Received up to and including */ unsigned int my_aru; unsigned int my_high_delivered; struct list_head token_callback_received_listhead; struct list_head token_callback_sent_listhead; char orf_token_retransmit[TOKEN_SIZE_MAX]; int orf_token_retransmit_size; unsigned int my_token_seq; /* * Timers */ qb_loop_timer_handle timer_pause_timeout; qb_loop_timer_handle timer_orf_token_timeout; qb_loop_timer_handle timer_orf_token_retransmit_timeout; qb_loop_timer_handle timer_orf_token_hold_retransmit_timeout; qb_loop_timer_handle timer_merge_detect_timeout; qb_loop_timer_handle memb_timer_state_gather_join_timeout; qb_loop_timer_handle memb_timer_state_gather_consensus_timeout; qb_loop_timer_handle memb_timer_state_commit_timeout; qb_loop_timer_handle timer_heartbeat_timeout; /* * Function and data used to log messages */ int totemsrp_log_level_security; int totemsrp_log_level_error; int totemsrp_log_level_warning; int totemsrp_log_level_notice; int totemsrp_log_level_debug; int totemsrp_subsys_id; void (*totemsrp_log_printf) ( - unsigned int rec_ident, + int level, + int sybsys, const char *function, const char *file, int line, - const char *format, ...)__attribute__((format(printf, 5, 6)));; + const char *format, ...)__attribute__((format(printf, 6, 7)));; enum memb_state memb_state; //TODO struct srp_addr next_memb; qb_loop_t *totemsrp_poll_handle; struct totem_ip_address mcast_address; void (*totemsrp_deliver_fn) ( unsigned int nodeid, const void *msg, unsigned int msg_len, int endian_conversion_required); void (*totemsrp_confchg_fn) ( enum totem_configuration_type configuration_type, const unsigned int *member_list, size_t member_list_entries, const unsigned int *left_list, size_t left_list_entries, const unsigned int *joined_list, size_t joined_list_entries, const struct memb_ring_id *ring_id); void (*totemsrp_service_ready_fn) (void); int global_seqno; int my_token_held; unsigned long long token_ring_id_seq; unsigned int last_released; unsigned int set_aru; int old_ring_state_saved; int old_ring_state_aru; unsigned int old_ring_state_high_seq_received; unsigned int my_last_seq; struct timeval tv_old; void *totemrrp_context; struct totem_config *totem_config; unsigned int use_heartbeat; unsigned int my_trc; unsigned int my_pbl; unsigned int my_cbl; uint64_t pause_timestamp; struct memb_commit_token *commit_token; totemsrp_stats_t stats; uint32_t orf_token_discard; void * token_recv_event_handle; void * token_sent_event_handle; char commit_token_storage[40000]; }; struct message_handlers { int count; int (*handler_functions[6]) ( struct totemsrp_instance *instance, const void *msg, size_t msg_len, int endian_conversion_needed); }; /* * forward decls */ static int message_handler_orf_token ( struct totemsrp_instance *instance, const void *msg, size_t msg_len, int endian_conversion_needed); static int message_handler_mcast ( struct totemsrp_instance *instance, const void *msg, size_t msg_len, int endian_conversion_needed); static int message_handler_memb_merge_detect ( struct totemsrp_instance *instance, const void *msg, size_t msg_len, int endian_conversion_needed); static int message_handler_memb_join ( struct totemsrp_instance *instance, const void *msg, size_t msg_len, int endian_conversion_needed); static int message_handler_memb_commit_token ( struct totemsrp_instance *instance, const void *msg, size_t msg_len, int endian_conversion_needed); static int message_handler_token_hold_cancel ( struct totemsrp_instance *instance, const void *msg, size_t msg_len, int endian_conversion_needed); static void totemsrp_instance_initialize (struct totemsrp_instance *instance); static unsigned int main_msgs_missing (void); static void main_token_seqid_get ( const void *msg, unsigned int *seqid, unsigned int *token_is); static void srp_addr_copy (struct srp_addr *dest, const struct srp_addr *src); static void srp_addr_to_nodeid ( unsigned int *nodeid_out, struct srp_addr *srp_addr_in, unsigned int entries); static int srp_addr_equal (const struct srp_addr *a, const struct srp_addr *b); static void memb_leave_message_send (struct totemsrp_instance *instance); static void memb_ring_id_create_or_load (struct totemsrp_instance *, struct memb_ring_id *); static void token_callbacks_execute (struct totemsrp_instance *instance, enum totem_callback_token_type type); static void memb_state_gather_enter (struct totemsrp_instance *instance, int gather_from); static void messages_deliver_to_app (struct totemsrp_instance *instance, int skip, unsigned int end_point); static int orf_token_mcast (struct totemsrp_instance *instance, struct orf_token *oken, int fcc_mcasts_allowed); static void messages_free (struct totemsrp_instance *instance, unsigned int token_aru); static void memb_ring_id_set_and_store (struct totemsrp_instance *instance, const struct memb_ring_id *ring_id); static void target_set_completed (void *context); static void memb_state_commit_token_update (struct totemsrp_instance *instance); static void memb_state_commit_token_target_set (struct totemsrp_instance *instance); static int memb_state_commit_token_send (struct totemsrp_instance *instance); static int memb_state_commit_token_send_recovery (struct totemsrp_instance *instance, struct memb_commit_token *memb_commit_token); static void memb_state_commit_token_create (struct totemsrp_instance *instance); static int token_hold_cancel_send (struct totemsrp_instance *instance); static void orf_token_endian_convert (const struct orf_token *in, struct orf_token *out); static void memb_commit_token_endian_convert (const struct memb_commit_token *in, struct memb_commit_token *out); static void memb_join_endian_convert (const struct memb_join *in, struct memb_join *out); static void mcast_endian_convert (const struct mcast *in, struct mcast *out); static void memb_merge_detect_endian_convert ( const struct memb_merge_detect *in, struct memb_merge_detect *out); static void srp_addr_copy_endian_convert (struct srp_addr *out, const struct srp_addr *in); static void timer_function_orf_token_timeout (void *data); static void timer_function_pause_timeout (void *data); static void timer_function_heartbeat_timeout (void *data); static void timer_function_token_retransmit_timeout (void *data); static void timer_function_token_hold_retransmit_timeout (void *data); static void timer_function_merge_detect_timeout (void *data); static void *totemsrp_buffer_alloc (struct totemsrp_instance *instance); static void totemsrp_buffer_release (struct totemsrp_instance *instance, void *ptr); void main_deliver_fn ( void *context, const void *msg, unsigned int msg_len); void main_iface_change_fn ( void *context, const struct totem_ip_address *iface_address, unsigned int iface_no); struct message_handlers totemsrp_message_handlers = { 6, { message_handler_orf_token, message_handler_mcast, message_handler_memb_merge_detect, message_handler_memb_join, message_handler_memb_commit_token, message_handler_token_hold_cancel } }; static const char *rundir = NULL; -#define log_printf(level, format, args...) \ -do { \ - instance->totemsrp_log_printf ( \ - LOGSYS_ENCODE_RECID(level, \ - instance->totemsrp_subsys_id, \ - LOGSYS_RECID_LOG), \ - __FUNCTION__, __FILE__, __LINE__, \ - format, ##args); \ +#define log_printf(level, format, args...) \ +do { \ + instance->totemsrp_log_printf ( \ + level, instance->totemsrp_subsys_id, \ + __FUNCTION__, __FILE__, __LINE__, \ + format, ##args); \ } while (0); +#define LOGSYS_PERROR(err_num, level, fmt, args...) \ +do { \ + char _error_str[LOGSYS_MAX_PERROR_MSG_LEN]; \ + const char *_error_ptr = qb_strerror_r(err_num, _error_str, sizeof(_error_str)); \ + instance->totemsrp_log_printf ( \ + level, instance->totemsrp_subsys_id, \ + __FUNCTION__, __FILE__, __LINE__, \ + fmt ": %s (%d)\n", ##args, _error_ptr, err_num); \ + } while(0) static void totemsrp_instance_initialize (struct totemsrp_instance *instance) { memset (instance, 0, sizeof (struct totemsrp_instance)); list_init (&instance->token_callback_received_listhead); list_init (&instance->token_callback_sent_listhead); instance->my_received_flg = 1; instance->my_token_seq = SEQNO_START_TOKEN - 1; instance->memb_state = MEMB_STATE_OPERATIONAL; instance->set_aru = -1; instance->my_aru = SEQNO_START_MSG; instance->my_high_seq_received = SEQNO_START_MSG; instance->my_high_delivered = SEQNO_START_MSG; instance->orf_token_discard = 0; instance->commit_token = (struct memb_commit_token *)instance->commit_token_storage; } static void main_token_seqid_get ( const void *msg, unsigned int *seqid, unsigned int *token_is) { const struct orf_token *token = msg; *seqid = 0; *token_is = 0; if (token->header.type == MESSAGE_TYPE_ORF_TOKEN) { *seqid = token->token_seq; *token_is = 1; } } static unsigned int main_msgs_missing (void) { // TODO return (0); } static int pause_flush (struct totemsrp_instance *instance) { uint64_t now_msec; uint64_t timestamp_msec; int res = 0; now_msec = (qb_util_nano_current_get () / QB_TIME_NS_IN_MSEC); timestamp_msec = instance->pause_timestamp / QB_TIME_NS_IN_MSEC; if ((now_msec - timestamp_msec) > (instance->totem_config->token_timeout / 2)) { log_printf (instance->totemsrp_log_level_notice, "Process pause detected for %d ms, flushing membership messages.\n", (unsigned int)(now_msec - timestamp_msec)); /* * -1 indicates an error from recvmsg */ do { res = totemrrp_mcast_recv_empty (instance->totemrrp_context); } while (res == -1); } return (res); } static int token_event_stats_collector (enum totem_callback_token_type type, const void *void_instance) { struct totemsrp_instance *instance = (struct totemsrp_instance *)void_instance; uint32_t time_now; unsigned long long nano_secs = qb_util_nano_current_get (); time_now = (nano_secs / QB_TIME_NS_IN_MSEC); if (type == TOTEM_CALLBACK_TOKEN_RECEIVED) { /* incr latest token the index */ if (instance->stats.latest_token == (TOTEM_TOKEN_STATS_MAX - 1)) instance->stats.latest_token = 0; else instance->stats.latest_token++; if (instance->stats.earliest_token == instance->stats.latest_token) { /* we have filled up the array, start overwriting */ if (instance->stats.earliest_token == (TOTEM_TOKEN_STATS_MAX - 1)) instance->stats.earliest_token = 0; else instance->stats.earliest_token++; instance->stats.token[instance->stats.earliest_token].rx = 0; instance->stats.token[instance->stats.earliest_token].tx = 0; instance->stats.token[instance->stats.earliest_token].backlog_calc = 0; } instance->stats.token[instance->stats.latest_token].rx = time_now; instance->stats.token[instance->stats.latest_token].tx = 0; /* in case we drop the token */ } else { instance->stats.token[instance->stats.latest_token].tx = time_now; } return 0; } /* * Exported interfaces */ int totemsrp_initialize ( qb_loop_t *poll_handle, void **srp_context, struct totem_config *totem_config, totemmrp_stats_t *stats, void (*deliver_fn) ( unsigned int nodeid, const void *msg, unsigned int msg_len, int endian_conversion_required), void (*confchg_fn) ( enum totem_configuration_type configuration_type, const unsigned int *member_list, size_t member_list_entries, const unsigned int *left_list, size_t left_list_entries, const unsigned int *joined_list, size_t joined_list_entries, const struct memb_ring_id *ring_id)) { struct totemsrp_instance *instance; unsigned int res; instance = malloc (sizeof (struct totemsrp_instance)); if (instance == NULL) { goto error_exit; } rundir = getenv ("COROSYNC_RUN_DIR"); if (rundir == NULL) { rundir = LOCALSTATEDIR "/lib/corosync"; } res = mkdir (rundir, 0700); if (res == -1 && errno != EEXIST) { goto error_destroy; } res = chdir (rundir); if (res == -1) { goto error_destroy; } totemsrp_instance_initialize (instance); stats->srp = &instance->stats; instance->stats.latest_token = 0; instance->stats.earliest_token = 0; instance->totem_config = totem_config; /* * Configure logging */ instance->totemsrp_log_level_security = totem_config->totem_logging_configuration.log_level_security; instance->totemsrp_log_level_error = totem_config->totem_logging_configuration.log_level_error; instance->totemsrp_log_level_warning = totem_config->totem_logging_configuration.log_level_warning; instance->totemsrp_log_level_notice = totem_config->totem_logging_configuration.log_level_notice; instance->totemsrp_log_level_debug = totem_config->totem_logging_configuration.log_level_debug; instance->totemsrp_subsys_id = totem_config->totem_logging_configuration.log_subsys_id; instance->totemsrp_log_printf = totem_config->totem_logging_configuration.log_printf; /* * Initialize local variables for totemsrp */ totemip_copy (&instance->mcast_address, &totem_config->interfaces[0].mcast_addr); /* * Display totem configuration */ log_printf (instance->totemsrp_log_level_debug, "Token Timeout (%d ms) retransmit timeout (%d ms)\n", totem_config->token_timeout, totem_config->token_retransmit_timeout); log_printf (instance->totemsrp_log_level_debug, "token hold (%d ms) retransmits before loss (%d retrans)\n", totem_config->token_hold_timeout, totem_config->token_retransmits_before_loss_const); log_printf (instance->totemsrp_log_level_debug, "join (%d ms) send_join (%d ms) consensus (%d ms) merge (%d ms)\n", totem_config->join_timeout, totem_config->send_join_timeout, totem_config->consensus_timeout, totem_config->merge_timeout); log_printf (instance->totemsrp_log_level_debug, "downcheck (%d ms) fail to recv const (%d msgs)\n", totem_config->downcheck_timeout, totem_config->fail_to_recv_const); log_printf (instance->totemsrp_log_level_debug, "seqno unchanged const (%d rotations) Maximum network MTU %d\n", totem_config->seqno_unchanged_const, totem_config->net_mtu); log_printf (instance->totemsrp_log_level_debug, "window size per rotation (%d messages) maximum messages per rotation (%d messages)\n", totem_config->window_size, totem_config->max_messages); log_printf (instance->totemsrp_log_level_debug, "missed count const (%d messages)\n", totem_config->miss_count_const); log_printf (instance->totemsrp_log_level_debug, "send threads (%d threads)\n", totem_config->threads); log_printf (instance->totemsrp_log_level_debug, "RRP token expired timeout (%d ms)\n", totem_config->rrp_token_expired_timeout); log_printf (instance->totemsrp_log_level_debug, "RRP token problem counter (%d ms)\n", totem_config->rrp_problem_count_timeout); log_printf (instance->totemsrp_log_level_debug, "RRP threshold (%d problem count)\n", totem_config->rrp_problem_count_threshold); log_printf (instance->totemsrp_log_level_debug, "RRP automatic recovery check timeout (%d ms)\n", totem_config->rrp_autorecovery_check_timeout); log_printf (instance->totemsrp_log_level_debug, "RRP mode set to %s.\n", instance->totem_config->rrp_mode); log_printf (instance->totemsrp_log_level_debug, "heartbeat_failures_allowed (%d)\n", totem_config->heartbeat_failures_allowed); log_printf (instance->totemsrp_log_level_debug, "max_network_delay (%d ms)\n", totem_config->max_network_delay); cs_queue_init (&instance->retrans_message_queue, RETRANS_MESSAGE_QUEUE_SIZE_MAX, sizeof (struct message_item)); sq_init (&instance->regular_sort_queue, QUEUE_RTR_ITEMS_SIZE_MAX, sizeof (struct sort_queue_item), 0); sq_init (&instance->recovery_sort_queue, QUEUE_RTR_ITEMS_SIZE_MAX, sizeof (struct sort_queue_item), 0); instance->totemsrp_poll_handle = poll_handle; instance->totemsrp_deliver_fn = deliver_fn; instance->totemsrp_confchg_fn = confchg_fn; instance->use_heartbeat = 1; timer_function_pause_timeout (instance); if ( totem_config->heartbeat_failures_allowed == 0 ) { log_printf (instance->totemsrp_log_level_debug, "HeartBeat is Disabled. To enable set heartbeat_failures_allowed > 0\n"); instance->use_heartbeat = 0; } if (instance->use_heartbeat) { instance->heartbeat_timeout = (totem_config->heartbeat_failures_allowed) * totem_config->token_retransmit_timeout + totem_config->max_network_delay; if (instance->heartbeat_timeout >= totem_config->token_timeout) { log_printf (instance->totemsrp_log_level_debug, "total heartbeat_timeout (%d ms) is not less than token timeout (%d ms)\n", instance->heartbeat_timeout, totem_config->token_timeout); log_printf (instance->totemsrp_log_level_debug, "heartbeat_timeout = heartbeat_failures_allowed * token_retransmit_timeout + max_network_delay\n"); log_printf (instance->totemsrp_log_level_debug, "heartbeat timeout should be less than the token timeout. HeartBeat is Diabled !!\n"); instance->use_heartbeat = 0; } else { log_printf (instance->totemsrp_log_level_debug, "total heartbeat_timeout (%d ms)\n", instance->heartbeat_timeout); } } totemrrp_initialize ( poll_handle, &instance->totemrrp_context, totem_config, instance, main_deliver_fn, main_iface_change_fn, main_token_seqid_get, main_msgs_missing, target_set_completed); /* * Must have net_mtu adjusted by totemrrp_initialize first */ cs_queue_init (&instance->new_message_queue, MESSAGE_QUEUE_MAX, sizeof (struct message_item)); totemsrp_callback_token_create (instance, &instance->token_recv_event_handle, TOTEM_CALLBACK_TOKEN_RECEIVED, 0, token_event_stats_collector, instance); totemsrp_callback_token_create (instance, &instance->token_sent_event_handle, TOTEM_CALLBACK_TOKEN_SENT, 0, token_event_stats_collector, instance); *srp_context = instance; return (0); error_destroy: free (instance); error_exit: return (-1); } void totemsrp_finalize ( void *srp_context) { struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context; memb_leave_message_send (instance); free (srp_context); } int totemsrp_ifaces_get ( void *srp_context, unsigned int nodeid, struct totem_ip_address *interfaces, char ***status, unsigned int *iface_count) { struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context; int res = 0; unsigned int found = 0; unsigned int i; for (i = 0; i < instance->my_memb_entries; i++) { if (instance->my_memb_list[i].addr[0].nodeid == nodeid) { found = 1; break; } } if (found) { memcpy (interfaces, &instance->my_memb_list[i], sizeof (struct srp_addr)); *iface_count = instance->totem_config->interface_count; goto finish; } for (i = 0; i < instance->my_left_memb_entries; i++) { if (instance->my_left_memb_list[i].addr[0].nodeid == nodeid) { found = 1; break; } } if (found) { memcpy (interfaces, &instance->my_left_memb_list[i], sizeof (struct srp_addr)); *iface_count = instance->totem_config->interface_count; } else { res = -1; } finish: totemrrp_ifaces_get (instance->totemrrp_context, status, NULL); return (res); } int totemsrp_crypto_set ( void *srp_context, unsigned int type) { struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context; int res; res = totemrrp_crypto_set(instance->totemrrp_context, type); return (res); } unsigned int totemsrp_my_nodeid_get ( void *srp_context) { struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context; unsigned int res; res = instance->totem_config->interfaces[0].boundto.nodeid; return (res); } int totemsrp_my_family_get ( void *srp_context) { struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context; int res; res = instance->totem_config->interfaces[0].boundto.family; return (res); } int totemsrp_ring_reenable ( void *srp_context) { struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context; totemrrp_ring_reenable (instance->totemrrp_context, instance->totem_config->interface_count); return (0); } /* * Set operations for use by the membership algorithm */ static int srp_addr_equal (const struct srp_addr *a, const struct srp_addr *b) { unsigned int i; unsigned int res; for (i = 0; i < 1; i++) { res = totemip_equal (&a->addr[i], &b->addr[i]); if (res == 0) { return (0); } } return (1); } static void srp_addr_copy (struct srp_addr *dest, const struct srp_addr *src) { unsigned int i; for (i = 0; i < INTERFACE_MAX; i++) { totemip_copy (&dest->addr[i], &src->addr[i]); } } static void srp_addr_to_nodeid ( unsigned int *nodeid_out, struct srp_addr *srp_addr_in, unsigned int entries) { unsigned int i; for (i = 0; i < entries; i++) { nodeid_out[i] = srp_addr_in[i].addr[0].nodeid; } } static void srp_addr_copy_endian_convert (struct srp_addr *out, const struct srp_addr *in) { int i; for (i = 0; i < INTERFACE_MAX; i++) { totemip_copy_endian_convert (&out->addr[i], &in->addr[i]); } } static void memb_consensus_reset (struct totemsrp_instance *instance) { instance->consensus_list_entries = 0; } static void memb_set_subtract ( struct srp_addr *out_list, int *out_list_entries, struct srp_addr *one_list, int one_list_entries, struct srp_addr *two_list, int two_list_entries) { int found = 0; int i; int j; *out_list_entries = 0; for (i = 0; i < one_list_entries; i++) { for (j = 0; j < two_list_entries; j++) { if (srp_addr_equal (&one_list[i], &two_list[j])) { found = 1; break; } } if (found == 0) { srp_addr_copy (&out_list[*out_list_entries], &one_list[i]); *out_list_entries = *out_list_entries + 1; } found = 0; } } /* * Set consensus for a specific processor */ static void memb_consensus_set ( struct totemsrp_instance *instance, const struct srp_addr *addr) { int found = 0; int i; if (addr->addr[0].nodeid == LEAVE_DUMMY_NODEID) return; for (i = 0; i < instance->consensus_list_entries; i++) { if (srp_addr_equal(addr, &instance->consensus_list[i].addr)) { found = 1; break; /* found entry */ } } srp_addr_copy (&instance->consensus_list[i].addr, addr); instance->consensus_list[i].set = 1; if (found == 0) { instance->consensus_list_entries++; } return; } /* * Is consensus set for a specific processor */ static int memb_consensus_isset ( struct totemsrp_instance *instance, const struct srp_addr *addr) { int i; for (i = 0; i < instance->consensus_list_entries; i++) { if (srp_addr_equal (addr, &instance->consensus_list[i].addr)) { return (instance->consensus_list[i].set); } } return (0); } /* * Is consensus agreed upon based upon consensus database */ static int memb_consensus_agreed ( struct totemsrp_instance *instance) { struct srp_addr token_memb[PROCESSOR_COUNT_MAX]; int token_memb_entries = 0; int agreed = 1; int i; memb_set_subtract (token_memb, &token_memb_entries, instance->my_proc_list, instance->my_proc_list_entries, instance->my_failed_list, instance->my_failed_list_entries); for (i = 0; i < token_memb_entries; i++) { if (memb_consensus_isset (instance, &token_memb[i]) == 0) { agreed = 0; break; } } assert (token_memb_entries >= 1); return (agreed); } static void memb_consensus_notset ( struct totemsrp_instance *instance, struct srp_addr *no_consensus_list, int *no_consensus_list_entries, struct srp_addr *comparison_list, int comparison_list_entries) { int i; *no_consensus_list_entries = 0; for (i = 0; i < instance->my_proc_list_entries; i++) { if (memb_consensus_isset (instance, &instance->my_proc_list[i]) == 0) { srp_addr_copy (&no_consensus_list[*no_consensus_list_entries], &instance->my_proc_list[i]); *no_consensus_list_entries = *no_consensus_list_entries + 1; } } } /* * Is set1 equal to set2 Entries can be in different orders */ static int memb_set_equal ( struct srp_addr *set1, int set1_entries, struct srp_addr *set2, int set2_entries) { int i; int j; int found = 0; if (set1_entries != set2_entries) { return (0); } for (i = 0; i < set2_entries; i++) { for (j = 0; j < set1_entries; j++) { if (srp_addr_equal (&set1[j], &set2[i])) { found = 1; break; } } if (found == 0) { return (0); } found = 0; } return (1); } /* * Is subset fully contained in fullset */ static int memb_set_subset ( const struct srp_addr *subset, int subset_entries, const struct srp_addr *fullset, int fullset_entries) { int i; int j; int found = 0; if (subset_entries > fullset_entries) { return (0); } for (i = 0; i < subset_entries; i++) { for (j = 0; j < fullset_entries; j++) { if (srp_addr_equal (&subset[i], &fullset[j])) { found = 1; } } if (found == 0) { return (0); } found = 0; } return (1); } /* * merge subset into fullset taking care not to add duplicates */ static void memb_set_merge ( const struct srp_addr *subset, int subset_entries, struct srp_addr *fullset, int *fullset_entries) { int found = 0; int i; int j; for (i = 0; i < subset_entries; i++) { for (j = 0; j < *fullset_entries; j++) { if (srp_addr_equal (&fullset[j], &subset[i])) { found = 1; break; } } if (found == 0) { srp_addr_copy (&fullset[*fullset_entries], &subset[i]); *fullset_entries = *fullset_entries + 1; } found = 0; } return; } static void memb_set_and_with_ring_id ( struct srp_addr *set1, struct memb_ring_id *set1_ring_ids, int set1_entries, struct srp_addr *set2, int set2_entries, struct memb_ring_id *old_ring_id, struct srp_addr *and, int *and_entries) { int i; int j; int found = 0; *and_entries = 0; for (i = 0; i < set2_entries; i++) { for (j = 0; j < set1_entries; j++) { if (srp_addr_equal (&set1[j], &set2[i])) { if (memcmp (&set1_ring_ids[j], old_ring_id, sizeof (struct memb_ring_id)) == 0) { found = 1; } break; } } if (found) { srp_addr_copy (&and[*and_entries], &set1[j]); *and_entries = *and_entries + 1; } found = 0; } return; } #ifdef CODE_COVERAGE static void memb_set_print ( char *string, struct srp_addr *list, int list_entries) { int i; int j; printf ("List '%s' contains %d entries:\n", string, list_entries); for (i = 0; i < list_entries; i++) { for (j = 0; j < INTERFACE_MAX; j++) { printf ("Address %d\n", i); printf ("\tiface %d %s\n", j, totemip_print (&list[i].addr[j])); printf ("family %d\n", list[i].addr[j].family); } } } #endif static void *totemsrp_buffer_alloc (struct totemsrp_instance *instance) { assert (instance != NULL); return totemrrp_buffer_alloc (instance->totemrrp_context); } static void totemsrp_buffer_release (struct totemsrp_instance *instance, void *ptr) { assert (instance != NULL); totemrrp_buffer_release (instance->totemrrp_context, ptr); } static void reset_token_retransmit_timeout (struct totemsrp_instance *instance) { qb_loop_timer_del (instance->totemsrp_poll_handle, instance->timer_orf_token_retransmit_timeout); qb_loop_timer_add (instance->totemsrp_poll_handle, QB_LOOP_MED, instance->totem_config->token_retransmit_timeout*QB_TIME_NS_IN_MSEC, (void *)instance, timer_function_token_retransmit_timeout, &instance->timer_orf_token_retransmit_timeout); } static void start_merge_detect_timeout (struct totemsrp_instance *instance) { if (instance->my_merge_detect_timeout_outstanding == 0) { qb_loop_timer_add (instance->totemsrp_poll_handle, QB_LOOP_MED, instance->totem_config->merge_timeout*QB_TIME_NS_IN_MSEC, (void *)instance, timer_function_merge_detect_timeout, &instance->timer_merge_detect_timeout); instance->my_merge_detect_timeout_outstanding = 1; } } static void cancel_merge_detect_timeout (struct totemsrp_instance *instance) { qb_loop_timer_del (instance->totemsrp_poll_handle, instance->timer_merge_detect_timeout); instance->my_merge_detect_timeout_outstanding = 0; } /* * ring_state_* is used to save and restore the sort queue * state when a recovery operation fails (and enters gather) */ static void old_ring_state_save (struct totemsrp_instance *instance) { if (instance->old_ring_state_saved == 0) { instance->old_ring_state_saved = 1; memcpy (&instance->my_old_ring_id, &instance->my_ring_id, sizeof (struct memb_ring_id)); instance->old_ring_state_aru = instance->my_aru; instance->old_ring_state_high_seq_received = instance->my_high_seq_received; log_printf (instance->totemsrp_log_level_debug, "Saving state aru %x high seq received %x\n", instance->my_aru, instance->my_high_seq_received); } } static void old_ring_state_restore (struct totemsrp_instance *instance) { instance->my_aru = instance->old_ring_state_aru; instance->my_high_seq_received = instance->old_ring_state_high_seq_received; log_printf (instance->totemsrp_log_level_debug, "Restoring instance->my_aru %x my high seq received %x\n", instance->my_aru, instance->my_high_seq_received); } static void old_ring_state_reset (struct totemsrp_instance *instance) { log_printf (instance->totemsrp_log_level_debug, "Resetting old ring state\n"); instance->old_ring_state_saved = 0; } static void reset_pause_timeout (struct totemsrp_instance *instance) { qb_loop_timer_del (instance->totemsrp_poll_handle, instance->timer_pause_timeout); qb_loop_timer_add (instance->totemsrp_poll_handle, QB_LOOP_MED, instance->totem_config->token_timeout * QB_TIME_NS_IN_MSEC / 5, (void *)instance, timer_function_pause_timeout, &instance->timer_pause_timeout); } static void reset_token_timeout (struct totemsrp_instance *instance) { qb_loop_timer_del (instance->totemsrp_poll_handle, instance->timer_orf_token_timeout); qb_loop_timer_add (instance->totemsrp_poll_handle, QB_LOOP_MED, instance->totem_config->token_timeout*QB_TIME_NS_IN_MSEC, (void *)instance, timer_function_orf_token_timeout, &instance->timer_orf_token_timeout); } static void reset_heartbeat_timeout (struct totemsrp_instance *instance) { qb_loop_timer_del (instance->totemsrp_poll_handle, instance->timer_heartbeat_timeout); qb_loop_timer_add (instance->totemsrp_poll_handle, QB_LOOP_MED, instance->heartbeat_timeout*QB_TIME_NS_IN_MSEC, (void *)instance, timer_function_heartbeat_timeout, &instance->timer_heartbeat_timeout); } static void cancel_token_timeout (struct totemsrp_instance *instance) { qb_loop_timer_del (instance->totemsrp_poll_handle, instance->timer_orf_token_timeout); } static void cancel_heartbeat_timeout (struct totemsrp_instance *instance) { qb_loop_timer_del (instance->totemsrp_poll_handle, instance->timer_heartbeat_timeout); } static void cancel_token_retransmit_timeout (struct totemsrp_instance *instance) { qb_loop_timer_del (instance->totemsrp_poll_handle, instance->timer_orf_token_retransmit_timeout); } static void start_token_hold_retransmit_timeout (struct totemsrp_instance *instance) { qb_loop_timer_add (instance->totemsrp_poll_handle, QB_LOOP_MED, instance->totem_config->token_hold_timeout*QB_TIME_NS_IN_MSEC, (void *)instance, timer_function_token_hold_retransmit_timeout, &instance->timer_orf_token_hold_retransmit_timeout); } static void cancel_token_hold_retransmit_timeout (struct totemsrp_instance *instance) { qb_loop_timer_del (instance->totemsrp_poll_handle, instance->timer_orf_token_hold_retransmit_timeout); } static void memb_state_consensus_timeout_expired ( struct totemsrp_instance *instance) { struct srp_addr no_consensus_list[PROCESSOR_COUNT_MAX]; int no_consensus_list_entries; instance->stats.consensus_timeouts++; if (memb_consensus_agreed (instance)) { memb_consensus_reset (instance); memb_consensus_set (instance, &instance->my_id); reset_token_timeout (instance); // REVIEWED } else { memb_consensus_notset ( instance, no_consensus_list, &no_consensus_list_entries, instance->my_proc_list, instance->my_proc_list_entries); memb_set_merge (no_consensus_list, no_consensus_list_entries, instance->my_failed_list, &instance->my_failed_list_entries); memb_state_gather_enter (instance, 0); } } static void memb_join_message_send (struct totemsrp_instance *instance); static void memb_merge_detect_transmit (struct totemsrp_instance *instance); /* * Timers used for various states of the membership algorithm */ static void timer_function_pause_timeout (void *data) { struct totemsrp_instance *instance = data; instance->pause_timestamp = qb_util_nano_current_get (); reset_pause_timeout (instance); } static void memb_recovery_state_token_loss (struct totemsrp_instance *instance) { old_ring_state_restore (instance); memb_state_gather_enter (instance, 5); instance->stats.recovery_token_lost++; } static void timer_function_orf_token_timeout (void *data) { struct totemsrp_instance *instance = data; switch (instance->memb_state) { case MEMB_STATE_OPERATIONAL: log_printf (instance->totemsrp_log_level_debug, "The token was lost in the OPERATIONAL state.\n"); log_printf (instance->totemsrp_log_level_notice, "A processor failed, forming new configuration.\n"); totemrrp_iface_check (instance->totemrrp_context); memb_state_gather_enter (instance, 2); instance->stats.operational_token_lost++; break; case MEMB_STATE_GATHER: log_printf (instance->totemsrp_log_level_debug, "The consensus timeout expired.\n"); memb_state_consensus_timeout_expired (instance); memb_state_gather_enter (instance, 3); instance->stats.gather_token_lost++; break; case MEMB_STATE_COMMIT: log_printf (instance->totemsrp_log_level_debug, "The token was lost in the COMMIT state.\n"); memb_state_gather_enter (instance, 4); instance->stats.commit_token_lost++; break; case MEMB_STATE_RECOVERY: log_printf (instance->totemsrp_log_level_debug, "The token was lost in the RECOVERY state.\n"); memb_recovery_state_token_loss (instance); instance->orf_token_discard = 1; break; } } static void timer_function_heartbeat_timeout (void *data) { struct totemsrp_instance *instance = data; log_printf (instance->totemsrp_log_level_debug, "HeartBeat Timer expired Invoking token loss mechanism in state %d \n", instance->memb_state); timer_function_orf_token_timeout(data); } static void memb_timer_function_state_gather (void *data) { struct totemsrp_instance *instance = data; switch (instance->memb_state) { case MEMB_STATE_OPERATIONAL: case MEMB_STATE_RECOVERY: assert (0); /* this should never happen */ break; case MEMB_STATE_GATHER: case MEMB_STATE_COMMIT: memb_join_message_send (instance); /* * Restart the join timeout `*/ qb_loop_timer_del (instance->totemsrp_poll_handle, instance->memb_timer_state_gather_join_timeout); qb_loop_timer_add (instance->totemsrp_poll_handle, QB_LOOP_MED, instance->totem_config->join_timeout*QB_TIME_NS_IN_MSEC, (void *)instance, memb_timer_function_state_gather, &instance->memb_timer_state_gather_join_timeout); break; } } static void memb_timer_function_gather_consensus_timeout (void *data) { struct totemsrp_instance *instance = data; memb_state_consensus_timeout_expired (instance); } static void deliver_messages_from_recovery_to_regular (struct totemsrp_instance *instance) { unsigned int i; struct sort_queue_item *recovery_message_item; struct sort_queue_item regular_message_item; unsigned int range = 0; int res; void *ptr; struct mcast *mcast; log_printf (instance->totemsrp_log_level_debug, "recovery to regular %x-%x\n", SEQNO_START_MSG + 1, instance->my_aru); range = instance->my_aru - SEQNO_START_MSG; /* * Move messages from recovery to regular sort queue */ // todo should i be initialized to 0 or 1 ? for (i = 1; i <= range; i++) { res = sq_item_get (&instance->recovery_sort_queue, i + SEQNO_START_MSG, &ptr); if (res != 0) { continue; } recovery_message_item = ptr; /* * Convert recovery message into regular message */ mcast = recovery_message_item->mcast; if (mcast->header.encapsulated == MESSAGE_ENCAPSULATED) { /* * Message is a recovery message encapsulated * in a new ring message */ regular_message_item.mcast = (struct mcast *)(((char *)recovery_message_item->mcast) + sizeof (struct mcast)); regular_message_item.msg_len = recovery_message_item->msg_len - sizeof (struct mcast); mcast = regular_message_item.mcast; } else { /* * TODO this case shouldn't happen */ continue; } log_printf (instance->totemsrp_log_level_debug, "comparing if ring id is for this processors old ring seqno %d\n", mcast->seq); /* * Only add this message to the regular sort * queue if it was originated with the same ring * id as the previous ring */ if (memcmp (&instance->my_old_ring_id, &mcast->ring_id, sizeof (struct memb_ring_id)) == 0) { res = sq_item_inuse (&instance->regular_sort_queue, mcast->seq); if (res == 0) { sq_item_add (&instance->regular_sort_queue, ®ular_message_item, mcast->seq); if (sq_lt_compare (instance->old_ring_state_high_seq_received, mcast->seq)) { instance->old_ring_state_high_seq_received = mcast->seq; } } } else { log_printf (instance->totemsrp_log_level_debug, "-not adding msg with seq no %x\n", mcast->seq); } } } /* * Change states in the state machine of the membership algorithm */ static void memb_state_operational_enter (struct totemsrp_instance *instance) { struct srp_addr joined_list[PROCESSOR_COUNT_MAX]; int joined_list_entries = 0; unsigned int aru_save; unsigned int joined_list_totemip[PROCESSOR_COUNT_MAX]; unsigned int trans_memb_list_totemip[PROCESSOR_COUNT_MAX]; unsigned int new_memb_list_totemip[PROCESSOR_COUNT_MAX]; unsigned int left_list[PROCESSOR_COUNT_MAX]; unsigned int i; unsigned int res; memb_consensus_reset (instance); old_ring_state_reset (instance); deliver_messages_from_recovery_to_regular (instance); log_printf (instance->totemsrp_log_level_debug, "Delivering to app %x to %x\n", instance->my_high_delivered + 1, instance->old_ring_state_high_seq_received); aru_save = instance->my_aru; instance->my_aru = instance->old_ring_state_aru; messages_deliver_to_app (instance, 0, instance->old_ring_state_high_seq_received); /* * Calculate joined and left list */ memb_set_subtract (instance->my_left_memb_list, &instance->my_left_memb_entries, instance->my_memb_list, instance->my_memb_entries, instance->my_trans_memb_list, instance->my_trans_memb_entries); memb_set_subtract (joined_list, &joined_list_entries, instance->my_new_memb_list, instance->my_new_memb_entries, instance->my_trans_memb_list, instance->my_trans_memb_entries); /* * Install new membership */ instance->my_memb_entries = instance->my_new_memb_entries; memcpy (&instance->my_memb_list, instance->my_new_memb_list, sizeof (struct srp_addr) * instance->my_memb_entries); instance->last_released = 0; instance->my_set_retrans_flg = 0; /* * Deliver transitional configuration to application */ srp_addr_to_nodeid (left_list, instance->my_left_memb_list, instance->my_left_memb_entries); srp_addr_to_nodeid (trans_memb_list_totemip, instance->my_trans_memb_list, instance->my_trans_memb_entries); instance->totemsrp_confchg_fn (TOTEM_CONFIGURATION_TRANSITIONAL, trans_memb_list_totemip, instance->my_trans_memb_entries, left_list, instance->my_left_memb_entries, 0, 0, &instance->my_ring_id); // TODO we need to filter to ensure we only deliver those // messages which are part of instance->my_deliver_memb messages_deliver_to_app (instance, 1, instance->old_ring_state_high_seq_received); instance->my_aru = aru_save; /* * Deliver regular configuration to application */ srp_addr_to_nodeid (new_memb_list_totemip, instance->my_new_memb_list, instance->my_new_memb_entries); srp_addr_to_nodeid (joined_list_totemip, joined_list, joined_list_entries); instance->totemsrp_confchg_fn (TOTEM_CONFIGURATION_REGULAR, new_memb_list_totemip, instance->my_new_memb_entries, 0, 0, joined_list_totemip, joined_list_entries, &instance->my_ring_id); /* * The recovery sort queue now becomes the regular * sort queue. It is necessary to copy the state * into the regular sort queue. */ sq_copy (&instance->regular_sort_queue, &instance->recovery_sort_queue); instance->my_last_aru = SEQNO_START_MSG; /* When making my_proc_list smaller, ensure that the * now non-used entries are zero-ed out. There are some suspect * assert's that assume that there is always 2 entries in the list. * These fail when my_proc_list is reduced to 1 entry (and the * valid [0] entry is the same as the 'unused' [1] entry). */ memset(instance->my_proc_list, 0, sizeof (struct srp_addr) * instance->my_proc_list_entries); instance->my_proc_list_entries = instance->my_new_memb_entries; memcpy (instance->my_proc_list, instance->my_new_memb_list, sizeof (struct srp_addr) * instance->my_memb_entries); instance->my_failed_list_entries = 0; instance->my_high_delivered = instance->my_high_seq_received; for (i = 0; i <= instance->my_high_delivered; i++) { void *ptr; res = sq_item_get (&instance->regular_sort_queue, i, &ptr); if (res == 0) { struct sort_queue_item *regular_message; regular_message = ptr; free (regular_message->mcast); } } sq_items_release (&instance->regular_sort_queue, instance->my_high_delivered); instance->last_released = instance->my_high_delivered; log_printf (instance->totemsrp_log_level_debug, "entering OPERATIONAL state.\n"); log_printf (instance->totemsrp_log_level_notice, "A processor joined or left the membership and a new membership was formed.\n"); instance->memb_state = MEMB_STATE_OPERATIONAL; instance->stats.operational_entered++; instance->stats.continuous_gather = 0; instance->my_received_flg = 1; reset_pause_timeout (instance); /* * Save ring id information from this configuration to determine * which processors are transitioning from old regular configuration * in to new regular configuration on the next configuration change */ memcpy (&instance->my_old_ring_id, &instance->my_ring_id, sizeof (struct memb_ring_id)); return; } static void memb_state_gather_enter ( struct totemsrp_instance *instance, int gather_from) { instance->orf_token_discard = 1; memb_set_merge ( &instance->my_id, 1, instance->my_proc_list, &instance->my_proc_list_entries); memb_join_message_send (instance); /* * Restart the join timeout */ qb_loop_timer_del (instance->totemsrp_poll_handle, instance->memb_timer_state_gather_join_timeout); qb_loop_timer_add (instance->totemsrp_poll_handle, QB_LOOP_MED, instance->totem_config->join_timeout*QB_TIME_NS_IN_MSEC, (void *)instance, memb_timer_function_state_gather, &instance->memb_timer_state_gather_join_timeout); /* * Restart the consensus timeout */ qb_loop_timer_del (instance->totemsrp_poll_handle, instance->memb_timer_state_gather_consensus_timeout); qb_loop_timer_add (instance->totemsrp_poll_handle, QB_LOOP_MED, instance->totem_config->consensus_timeout*QB_TIME_NS_IN_MSEC, (void *)instance, memb_timer_function_gather_consensus_timeout, &instance->memb_timer_state_gather_consensus_timeout); /* * Cancel the token loss and token retransmission timeouts */ cancel_token_retransmit_timeout (instance); // REVIEWED cancel_token_timeout (instance); // REVIEWED cancel_merge_detect_timeout (instance); memb_consensus_reset (instance); memb_consensus_set (instance, &instance->my_id); log_printf (instance->totemsrp_log_level_debug, "entering GATHER state from %d.\n", gather_from); instance->memb_state = MEMB_STATE_GATHER; instance->stats.gather_entered++; if (gather_from == 3) { /* * State 3 means gather, so we are continuously gathering. */ instance->stats.continuous_gather++; } if (instance->stats.continuous_gather > MAX_NO_CONT_GATHER) { log_printf (instance->totemsrp_log_level_warning, "Totem is unable to form a cluster because of an " "operating system or network fault. The most common " "cause of this message is that the local firewall is " "configured improperly.\n"); } return; } static void timer_function_token_retransmit_timeout (void *data); static void target_set_completed ( void *context) { struct totemsrp_instance *instance = (struct totemsrp_instance *)context; memb_state_commit_token_send (instance); } static void memb_state_commit_enter ( struct totemsrp_instance *instance) { old_ring_state_save (instance); memb_state_commit_token_update (instance); memb_state_commit_token_target_set (instance); qb_loop_timer_del (instance->totemsrp_poll_handle, instance->memb_timer_state_gather_join_timeout); instance->memb_timer_state_gather_join_timeout = 0; qb_loop_timer_del (instance->totemsrp_poll_handle, instance->memb_timer_state_gather_consensus_timeout); instance->memb_timer_state_gather_consensus_timeout = 0; memb_ring_id_set_and_store (instance, &instance->commit_token->ring_id); instance->token_ring_id_seq = instance->my_ring_id.seq; log_printf (instance->totemsrp_log_level_debug, "entering COMMIT state.\n"); instance->memb_state = MEMB_STATE_COMMIT; reset_token_retransmit_timeout (instance); // REVIEWED reset_token_timeout (instance); // REVIEWED instance->stats.commit_entered++; instance->stats.continuous_gather = 0; /* * reset all flow control variables since we are starting a new ring */ instance->my_trc = 0; instance->my_pbl = 0; instance->my_cbl = 0; /* * commit token sent after callback that token target has been set */ } static void memb_state_recovery_enter ( struct totemsrp_instance *instance, struct memb_commit_token *commit_token) { int i; int local_received_flg = 1; unsigned int low_ring_aru; unsigned int range = 0; unsigned int messages_originated = 0; const struct srp_addr *addr; struct memb_commit_token_memb_entry *memb_list; struct memb_ring_id my_new_memb_ring_id_list[PROCESSOR_COUNT_MAX]; addr = (const struct srp_addr *)commit_token->end_of_commit_token; memb_list = (struct memb_commit_token_memb_entry *)(addr + commit_token->addr_entries); log_printf (instance->totemsrp_log_level_debug, "entering RECOVERY state.\n"); instance->orf_token_discard = 0; instance->my_high_ring_delivered = 0; sq_reinit (&instance->recovery_sort_queue, SEQNO_START_MSG); cs_queue_reinit (&instance->retrans_message_queue); low_ring_aru = instance->old_ring_state_high_seq_received; memb_state_commit_token_send_recovery (instance, commit_token); instance->my_token_seq = SEQNO_START_TOKEN - 1; /* * Build regular configuration */ totemrrp_processor_count_set ( instance->totemrrp_context, commit_token->addr_entries); /* * Build transitional configuration */ for (i = 0; i < instance->my_new_memb_entries; i++) { memcpy (&my_new_memb_ring_id_list[i], &memb_list[i].ring_id, sizeof (struct memb_ring_id)); } memb_set_and_with_ring_id ( instance->my_new_memb_list, my_new_memb_ring_id_list, instance->my_new_memb_entries, instance->my_memb_list, instance->my_memb_entries, &instance->my_old_ring_id, instance->my_trans_memb_list, &instance->my_trans_memb_entries); for (i = 0; i < instance->my_trans_memb_entries; i++) { log_printf (instance->totemsrp_log_level_debug, "TRANS [%d] member %s:\n", i, totemip_print (&instance->my_trans_memb_list[i].addr[0])); } for (i = 0; i < instance->my_new_memb_entries; i++) { log_printf (instance->totemsrp_log_level_debug, "position [%d] member %s:\n", i, totemip_print (&addr[i].addr[0])); log_printf (instance->totemsrp_log_level_debug, "previous ring seq %lld rep %s\n", memb_list[i].ring_id.seq, totemip_print (&memb_list[i].ring_id.rep)); log_printf (instance->totemsrp_log_level_debug, "aru %x high delivered %x received flag %d\n", memb_list[i].aru, memb_list[i].high_delivered, memb_list[i].received_flg); // assert (totemip_print (&memb_list[i].ring_id.rep) != 0); } /* * Determine if any received flag is false */ for (i = 0; i < commit_token->addr_entries; i++) { if (memb_set_subset (&instance->my_new_memb_list[i], 1, instance->my_trans_memb_list, instance->my_trans_memb_entries) && memb_list[i].received_flg == 0) { instance->my_deliver_memb_entries = instance->my_trans_memb_entries; memcpy (instance->my_deliver_memb_list, instance->my_trans_memb_list, sizeof (struct srp_addr) * instance->my_trans_memb_entries); local_received_flg = 0; break; } } if (local_received_flg == 1) { goto no_originate; } /* Else originate messages if we should */ /* * Calculate my_low_ring_aru, instance->my_high_ring_delivered for the transitional membership */ for (i = 0; i < commit_token->addr_entries; i++) { if (memb_set_subset (&instance->my_new_memb_list[i], 1, instance->my_deliver_memb_list, instance->my_deliver_memb_entries) && memcmp (&instance->my_old_ring_id, &memb_list[i].ring_id, sizeof (struct memb_ring_id)) == 0) { if (sq_lt_compare (memb_list[i].aru, low_ring_aru)) { low_ring_aru = memb_list[i].aru; } if (sq_lt_compare (instance->my_high_ring_delivered, memb_list[i].high_delivered)) { instance->my_high_ring_delivered = memb_list[i].high_delivered; } } } /* * Copy all old ring messages to instance->retrans_message_queue */ range = instance->old_ring_state_high_seq_received - low_ring_aru; if (range == 0) { /* * No messages to copy */ goto no_originate; } assert (range < QUEUE_RTR_ITEMS_SIZE_MAX); log_printf (instance->totemsrp_log_level_debug, "copying all old ring messages from %x-%x.\n", low_ring_aru + 1, instance->old_ring_state_high_seq_received); for (i = 1; i <= range; i++) { struct sort_queue_item *sort_queue_item; struct message_item message_item; void *ptr; int res; res = sq_item_get (&instance->regular_sort_queue, low_ring_aru + i, &ptr); if (res != 0) { continue; } sort_queue_item = ptr; messages_originated++; memset (&message_item, 0, sizeof (struct message_item)); // TODO LEAK message_item.mcast = totemsrp_buffer_alloc (instance); assert (message_item.mcast); message_item.mcast->header.type = MESSAGE_TYPE_MCAST; srp_addr_copy (&message_item.mcast->system_from, &instance->my_id); message_item.mcast->header.encapsulated = MESSAGE_ENCAPSULATED; message_item.mcast->header.nodeid = instance->my_id.addr[0].nodeid; assert (message_item.mcast->header.nodeid); message_item.mcast->header.endian_detector = ENDIAN_LOCAL; memcpy (&message_item.mcast->ring_id, &instance->my_ring_id, sizeof (struct memb_ring_id)); message_item.msg_len = sort_queue_item->msg_len + sizeof (struct mcast); memcpy (((char *)message_item.mcast) + sizeof (struct mcast), sort_queue_item->mcast, sort_queue_item->msg_len); cs_queue_item_add (&instance->retrans_message_queue, &message_item); } log_printf (instance->totemsrp_log_level_debug, "Originated %d messages in RECOVERY.\n", messages_originated); goto originated; no_originate: log_printf (instance->totemsrp_log_level_debug, "Did not need to originate any messages in recovery.\n"); originated: instance->my_aru = SEQNO_START_MSG; instance->my_aru_count = 0; instance->my_seq_unchanged = 0; instance->my_high_seq_received = SEQNO_START_MSG; instance->my_install_seq = SEQNO_START_MSG; instance->last_released = SEQNO_START_MSG; reset_token_timeout (instance); // REVIEWED reset_token_retransmit_timeout (instance); // REVIEWED instance->memb_state = MEMB_STATE_RECOVERY; instance->stats.recovery_entered++; instance->stats.continuous_gather = 0; return; } void totemsrp_event_signal (void *srp_context, enum totem_event_type type, int value) { struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context; token_hold_cancel_send (instance); return; } int totemsrp_mcast ( void *srp_context, struct iovec *iovec, unsigned int iov_len, int guarantee) { struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context; int i; struct message_item message_item; char *addr; unsigned int addr_idx; if (cs_queue_is_full (&instance->new_message_queue)) { log_printf (instance->totemsrp_log_level_debug, "queue full\n"); return (-1); } memset (&message_item, 0, sizeof (struct message_item)); /* * Allocate pending item */ message_item.mcast = totemsrp_buffer_alloc (instance); if (message_item.mcast == 0) { goto error_mcast; } /* * Set mcast header */ memset(message_item.mcast, 0, sizeof (struct mcast)); message_item.mcast->header.type = MESSAGE_TYPE_MCAST; message_item.mcast->header.endian_detector = ENDIAN_LOCAL; message_item.mcast->header.encapsulated = MESSAGE_NOT_ENCAPSULATED; message_item.mcast->header.nodeid = instance->my_id.addr[0].nodeid; assert (message_item.mcast->header.nodeid); message_item.mcast->guarantee = guarantee; srp_addr_copy (&message_item.mcast->system_from, &instance->my_id); addr = (char *)message_item.mcast; addr_idx = sizeof (struct mcast); for (i = 0; i < iov_len; i++) { memcpy (&addr[addr_idx], iovec[i].iov_base, iovec[i].iov_len); addr_idx += iovec[i].iov_len; } message_item.msg_len = addr_idx; log_printf (instance->totemsrp_log_level_debug, "mcasted message added to pending queue\n"); instance->stats.mcast_tx++; cs_queue_item_add (&instance->new_message_queue, &message_item); return (0); error_mcast: return (-1); } /* * Determine if there is room to queue a new message */ int totemsrp_avail (void *srp_context) { struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context; int avail; cs_queue_avail (&instance->new_message_queue, &avail); return (avail); } /* * ORF Token Management */ /* * Recast message to mcast group if it is available */ static int orf_token_remcast ( struct totemsrp_instance *instance, int seq) { struct sort_queue_item *sort_queue_item; int res; void *ptr; struct sq *sort_queue; if (instance->memb_state == MEMB_STATE_RECOVERY) { sort_queue = &instance->recovery_sort_queue; } else { sort_queue = &instance->regular_sort_queue; } res = sq_in_range (sort_queue, seq); if (res == 0) { log_printf (instance->totemsrp_log_level_debug, "sq not in range\n"); return (-1); } /* * Get RTR item at seq, if not available, return */ res = sq_item_get (sort_queue, seq, &ptr); if (res != 0) { return -1; } sort_queue_item = ptr; totemrrp_mcast_noflush_send ( instance->totemrrp_context, sort_queue_item->mcast, sort_queue_item->msg_len); return (0); } /* * Free all freeable messages from ring */ static void messages_free ( struct totemsrp_instance *instance, unsigned int token_aru) { struct sort_queue_item *regular_message; unsigned int i; int res; int log_release = 0; unsigned int release_to; unsigned int range = 0; release_to = token_aru; if (sq_lt_compare (instance->my_last_aru, release_to)) { release_to = instance->my_last_aru; } if (sq_lt_compare (instance->my_high_delivered, release_to)) { release_to = instance->my_high_delivered; } /* * Ensure we dont try release before an already released point */ if (sq_lt_compare (release_to, instance->last_released)) { return; } range = release_to - instance->last_released; assert (range < QUEUE_RTR_ITEMS_SIZE_MAX); /* * Release retransmit list items if group aru indicates they are transmitted */ for (i = 1; i <= range; i++) { void *ptr; res = sq_item_get (&instance->regular_sort_queue, instance->last_released + i, &ptr); if (res == 0) { regular_message = ptr; totemsrp_buffer_release (instance, regular_message->mcast); } sq_items_release (&instance->regular_sort_queue, instance->last_released + i); log_release = 1; } instance->last_released += range; if (log_release) { log_printf (instance->totemsrp_log_level_debug, "releasing messages up to and including %x\n", release_to); } } static void update_aru ( struct totemsrp_instance *instance) { unsigned int i; int res; struct sq *sort_queue; unsigned int range; unsigned int my_aru_saved = 0; if (instance->memb_state == MEMB_STATE_RECOVERY) { sort_queue = &instance->recovery_sort_queue; } else { sort_queue = &instance->regular_sort_queue; } range = instance->my_high_seq_received - instance->my_aru; if (range > 1024) { return; } my_aru_saved = instance->my_aru; for (i = 1; i <= range; i++) { void *ptr; res = sq_item_get (sort_queue, my_aru_saved + i, &ptr); /* * If hole, stop updating aru */ if (res != 0) { break; } } instance->my_aru += i - 1; } /* * Multicasts pending messages onto the ring (requires orf_token possession) */ static int orf_token_mcast ( struct totemsrp_instance *instance, struct orf_token *token, int fcc_mcasts_allowed) { struct message_item *message_item = 0; struct cs_queue *mcast_queue; struct sq *sort_queue; struct sort_queue_item sort_queue_item; struct mcast *mcast; unsigned int fcc_mcast_current; if (instance->memb_state == MEMB_STATE_RECOVERY) { mcast_queue = &instance->retrans_message_queue; sort_queue = &instance->recovery_sort_queue; reset_token_retransmit_timeout (instance); // REVIEWED } else { mcast_queue = &instance->new_message_queue; sort_queue = &instance->regular_sort_queue; } for (fcc_mcast_current = 0; fcc_mcast_current < fcc_mcasts_allowed; fcc_mcast_current++) { if (cs_queue_is_empty (mcast_queue)) { break; } message_item = (struct message_item *)cs_queue_item_get (mcast_queue); message_item->mcast->seq = ++token->seq; message_item->mcast->this_seqno = instance->global_seqno++; /* * Build IO vector */ memset (&sort_queue_item, 0, sizeof (struct sort_queue_item)); sort_queue_item.mcast = message_item->mcast; sort_queue_item.msg_len = message_item->msg_len; mcast = sort_queue_item.mcast; memcpy (&mcast->ring_id, &instance->my_ring_id, sizeof (struct memb_ring_id)); /* * Add message to retransmit queue */ sq_item_add (sort_queue, &sort_queue_item, message_item->mcast->seq); totemrrp_mcast_noflush_send ( instance->totemrrp_context, message_item->mcast, message_item->msg_len); /* * Delete item from pending queue */ cs_queue_item_remove (mcast_queue); /* * If messages mcasted, deliver any new messages to totempg */ instance->my_high_seq_received = token->seq; } update_aru (instance); /* * Return 1 if more messages are available for single node clusters */ return (fcc_mcast_current); } /* * Remulticasts messages in orf_token's retransmit list (requires orf_token) * Modify's orf_token's rtr to include retransmits required by this process */ static int orf_token_rtr ( struct totemsrp_instance *instance, struct orf_token *orf_token, unsigned int *fcc_allowed) { unsigned int res; unsigned int i, j; unsigned int found; struct sq *sort_queue; struct rtr_item *rtr_list; unsigned int range = 0; char retransmit_msg[1024]; char value[64]; if (instance->memb_state == MEMB_STATE_RECOVERY) { sort_queue = &instance->recovery_sort_queue; } else { sort_queue = &instance->regular_sort_queue; } rtr_list = &orf_token->rtr_list[0]; strcpy (retransmit_msg, "Retransmit List: "); if (orf_token->rtr_list_entries) { log_printf (instance->totemsrp_log_level_debug, "Retransmit List %d\n", orf_token->rtr_list_entries); for (i = 0; i < orf_token->rtr_list_entries; i++) { sprintf (value, "%x ", rtr_list[i].seq); strcat (retransmit_msg, value); } strcat (retransmit_msg, "\n"); log_printf (instance->totemsrp_log_level_notice, "%s", retransmit_msg); } /* * Retransmit messages on orf_token's RTR list from RTR queue */ for (instance->fcc_remcast_current = 0, i = 0; instance->fcc_remcast_current < *fcc_allowed && i < orf_token->rtr_list_entries;) { /* * If this retransmit request isn't from this configuration, * try next rtr entry */ if (memcmp (&rtr_list[i].ring_id, &instance->my_ring_id, sizeof (struct memb_ring_id)) != 0) { i += 1; continue; } res = orf_token_remcast (instance, rtr_list[i].seq); if (res == 0) { /* * Multicasted message, so no need to copy to new retransmit list */ orf_token->rtr_list_entries -= 1; assert (orf_token->rtr_list_entries >= 0); memmove (&rtr_list[i], &rtr_list[i + 1], sizeof (struct rtr_item) * (orf_token->rtr_list_entries - i)); instance->stats.mcast_retx++; instance->fcc_remcast_current++; } else { i += 1; } } *fcc_allowed = *fcc_allowed - instance->fcc_remcast_current; /* * Add messages to retransmit to RTR list * but only retry if there is room in the retransmit list */ range = orf_token->seq - instance->my_aru; assert (range < QUEUE_RTR_ITEMS_SIZE_MAX); for (i = 1; (orf_token->rtr_list_entries < RETRANSMIT_ENTRIES_MAX) && (i <= range); i++) { /* * Ensure message is within the sort queue range */ res = sq_in_range (sort_queue, instance->my_aru + i); if (res == 0) { break; } /* * Find if a message is missing from this processor */ res = sq_item_inuse (sort_queue, instance->my_aru + i); if (res == 0) { /* * Determine how many times we have missed receiving * this sequence number. sq_item_miss_count increments * a counter for the sequence number. The miss count * will be returned and compared. This allows time for * delayed multicast messages to be received before * declaring the message is missing and requesting a * retransmit. */ res = sq_item_miss_count (sort_queue, instance->my_aru + i); if (res < instance->totem_config->miss_count_const) { continue; } /* * Determine if missing message is already in retransmit list */ found = 0; for (j = 0; j < orf_token->rtr_list_entries; j++) { if (instance->my_aru + i == rtr_list[j].seq) { found = 1; } } if (found == 0) { /* * Missing message not found in current retransmit list so add it */ memcpy (&rtr_list[orf_token->rtr_list_entries].ring_id, &instance->my_ring_id, sizeof (struct memb_ring_id)); rtr_list[orf_token->rtr_list_entries].seq = instance->my_aru + i; orf_token->rtr_list_entries++; } } } return (instance->fcc_remcast_current); } static void token_retransmit (struct totemsrp_instance *instance) { totemrrp_token_send (instance->totemrrp_context, instance->orf_token_retransmit, instance->orf_token_retransmit_size); } /* * Retransmit the regular token if no mcast or token has * been received in retransmit token period retransmit * the token to the next processor */ static void timer_function_token_retransmit_timeout (void *data) { struct totemsrp_instance *instance = data; switch (instance->memb_state) { case MEMB_STATE_GATHER: break; case MEMB_STATE_COMMIT: case MEMB_STATE_OPERATIONAL: case MEMB_STATE_RECOVERY: token_retransmit (instance); reset_token_retransmit_timeout (instance); // REVIEWED break; } } static void timer_function_token_hold_retransmit_timeout (void *data) { struct totemsrp_instance *instance = data; switch (instance->memb_state) { case MEMB_STATE_GATHER: break; case MEMB_STATE_COMMIT: break; case MEMB_STATE_OPERATIONAL: case MEMB_STATE_RECOVERY: token_retransmit (instance); break; } } static void timer_function_merge_detect_timeout(void *data) { struct totemsrp_instance *instance = data; instance->my_merge_detect_timeout_outstanding = 0; switch (instance->memb_state) { case MEMB_STATE_OPERATIONAL: if (totemip_equal(&instance->my_ring_id.rep, &instance->my_id.addr[0])) { memb_merge_detect_transmit (instance); } break; case MEMB_STATE_GATHER: case MEMB_STATE_COMMIT: case MEMB_STATE_RECOVERY: break; } } /* * Send orf_token to next member (requires orf_token) */ static int token_send ( struct totemsrp_instance *instance, struct orf_token *orf_token, int forward_token) { int res = 0; unsigned int orf_token_size; orf_token_size = sizeof (struct orf_token) + (orf_token->rtr_list_entries * sizeof (struct rtr_item)); memcpy (instance->orf_token_retransmit, orf_token, orf_token_size); instance->orf_token_retransmit_size = orf_token_size; orf_token->header.nodeid = instance->my_id.addr[0].nodeid; assert (orf_token->header.nodeid); if (forward_token == 0) { return (0); } totemrrp_token_send (instance->totemrrp_context, orf_token, orf_token_size); return (res); } static int token_hold_cancel_send (struct totemsrp_instance *instance) { struct token_hold_cancel token_hold_cancel; /* * Only cancel if the token is currently held */ if (instance->my_token_held == 0) { return (0); } instance->my_token_held = 0; /* * Build message */ token_hold_cancel.header.type = MESSAGE_TYPE_TOKEN_HOLD_CANCEL; token_hold_cancel.header.endian_detector = ENDIAN_LOCAL; token_hold_cancel.header.encapsulated = 0; token_hold_cancel.header.nodeid = instance->my_id.addr[0].nodeid; memcpy (&token_hold_cancel.ring_id, &instance->my_ring_id, sizeof (struct memb_ring_id)); assert (token_hold_cancel.header.nodeid); instance->stats.token_hold_cancel_tx++; totemrrp_mcast_flush_send (instance->totemrrp_context, &token_hold_cancel, sizeof (struct token_hold_cancel)); return (0); } static int orf_token_send_initial (struct totemsrp_instance *instance) { struct orf_token orf_token; int res; orf_token.header.type = MESSAGE_TYPE_ORF_TOKEN; orf_token.header.endian_detector = ENDIAN_LOCAL; orf_token.header.encapsulated = 0; orf_token.header.nodeid = instance->my_id.addr[0].nodeid; assert (orf_token.header.nodeid); orf_token.seq = SEQNO_START_MSG; orf_token.token_seq = SEQNO_START_TOKEN; orf_token.retrans_flg = 1; instance->my_set_retrans_flg = 1; instance->stats.orf_token_tx++; if (cs_queue_is_empty (&instance->retrans_message_queue) == 1) { orf_token.retrans_flg = 0; instance->my_set_retrans_flg = 0; } else { orf_token.retrans_flg = 1; instance->my_set_retrans_flg = 1; } orf_token.aru = 0; orf_token.aru = SEQNO_START_MSG - 1; orf_token.aru_addr = instance->my_id.addr[0].nodeid; memcpy (&orf_token.ring_id, &instance->my_ring_id, sizeof (struct memb_ring_id)); orf_token.fcc = 0; orf_token.backlog = 0; orf_token.rtr_list_entries = 0; res = token_send (instance, &orf_token, 1); return (res); } static void memb_state_commit_token_update ( struct totemsrp_instance *instance) { struct srp_addr *addr; struct memb_commit_token_memb_entry *memb_list; unsigned int high_aru; unsigned int i; addr = (struct srp_addr *)instance->commit_token->end_of_commit_token; memb_list = (struct memb_commit_token_memb_entry *)(addr + instance->commit_token->addr_entries); memcpy (instance->my_new_memb_list, addr, sizeof (struct srp_addr) * instance->commit_token->addr_entries); instance->my_new_memb_entries = instance->commit_token->addr_entries; memcpy (&memb_list[instance->commit_token->memb_index].ring_id, &instance->my_old_ring_id, sizeof (struct memb_ring_id)); memb_list[instance->commit_token->memb_index].aru = instance->old_ring_state_aru; /* * TODO high delivered is really instance->my_aru, but with safe this * could change? */ instance->my_received_flg = (instance->my_aru == instance->my_high_seq_received); memb_list[instance->commit_token->memb_index].received_flg = instance->my_received_flg; memb_list[instance->commit_token->memb_index].high_delivered = instance->my_high_delivered; /* * find high aru up to current memb_index for all matching ring ids * if any ring id matching memb_index has aru less then high aru set * received flag for that entry to false */ high_aru = memb_list[instance->commit_token->memb_index].aru; for (i = 0; i <= instance->commit_token->memb_index; i++) { if (memcmp (&memb_list[instance->commit_token->memb_index].ring_id, &memb_list[i].ring_id, sizeof (struct memb_ring_id)) == 0) { if (sq_lt_compare (high_aru, memb_list[i].aru)) { high_aru = memb_list[i].aru; } } } for (i = 0; i <= instance->commit_token->memb_index; i++) { if (memcmp (&memb_list[instance->commit_token->memb_index].ring_id, &memb_list[i].ring_id, sizeof (struct memb_ring_id)) == 0) { if (sq_lt_compare (memb_list[i].aru, high_aru)) { memb_list[i].received_flg = 0; if (i == instance->commit_token->memb_index) { instance->my_received_flg = 0; } } } } instance->commit_token->header.nodeid = instance->my_id.addr[0].nodeid; instance->commit_token->memb_index += 1; assert (instance->commit_token->memb_index <= instance->commit_token->addr_entries); assert (instance->commit_token->header.nodeid); } static void memb_state_commit_token_target_set ( struct totemsrp_instance *instance) { struct srp_addr *addr; unsigned int i; addr = (struct srp_addr *)instance->commit_token->end_of_commit_token; for (i = 0; i < instance->totem_config->interface_count; i++) { totemrrp_token_target_set ( instance->totemrrp_context, &addr[instance->commit_token->memb_index % instance->commit_token->addr_entries].addr[i], i); } } static int memb_state_commit_token_send_recovery ( struct totemsrp_instance *instance, struct memb_commit_token *commit_token) { unsigned int commit_token_size; commit_token->token_seq++; commit_token_size = sizeof (struct memb_commit_token) + ((sizeof (struct srp_addr) + sizeof (struct memb_commit_token_memb_entry)) * commit_token->addr_entries); /* * Make a copy for retransmission if necessary */ memcpy (instance->orf_token_retransmit, commit_token, commit_token_size); instance->orf_token_retransmit_size = commit_token_size; instance->stats.memb_commit_token_tx++; totemrrp_token_send (instance->totemrrp_context, commit_token, commit_token_size); /* * Request retransmission of the commit token in case it is lost */ reset_token_retransmit_timeout (instance); return (0); } static int memb_state_commit_token_send ( struct totemsrp_instance *instance) { unsigned int commit_token_size; instance->commit_token->token_seq++; commit_token_size = sizeof (struct memb_commit_token) + ((sizeof (struct srp_addr) + sizeof (struct memb_commit_token_memb_entry)) * instance->commit_token->addr_entries); /* * Make a copy for retransmission if necessary */ memcpy (instance->orf_token_retransmit, instance->commit_token, commit_token_size); instance->orf_token_retransmit_size = commit_token_size; instance->stats.memb_commit_token_tx++; totemrrp_token_send (instance->totemrrp_context, instance->commit_token, commit_token_size); /* * Request retransmission of the commit token in case it is lost */ reset_token_retransmit_timeout (instance); return (0); } static int memb_lowest_in_config (struct totemsrp_instance *instance) { struct srp_addr token_memb[PROCESSOR_COUNT_MAX]; int token_memb_entries = 0; int i; struct totem_ip_address *lowest_addr; memb_set_subtract (token_memb, &token_memb_entries, instance->my_proc_list, instance->my_proc_list_entries, instance->my_failed_list, instance->my_failed_list_entries); /* * find representative by searching for smallest identifier */ lowest_addr = &token_memb[0].addr[0]; for (i = 1; i < token_memb_entries; i++) { if (totemip_compare(lowest_addr, &token_memb[i].addr[0]) > 0) { totemip_copy (lowest_addr, &token_memb[i].addr[0]); } } return (totemip_compare (lowest_addr, &instance->my_id.addr[0]) == 0); } static int srp_addr_compare (const void *a, const void *b) { const struct srp_addr *srp_a = (const struct srp_addr *)a; const struct srp_addr *srp_b = (const struct srp_addr *)b; return (totemip_compare (&srp_a->addr[0], &srp_b->addr[0])); } static void memb_state_commit_token_create ( struct totemsrp_instance *instance) { struct srp_addr token_memb[PROCESSOR_COUNT_MAX]; struct srp_addr *addr; struct memb_commit_token_memb_entry *memb_list; int token_memb_entries = 0; log_printf (instance->totemsrp_log_level_debug, "Creating commit token because I am the rep.\n"); memb_set_subtract (token_memb, &token_memb_entries, instance->my_proc_list, instance->my_proc_list_entries, instance->my_failed_list, instance->my_failed_list_entries); memset (instance->commit_token, 0, sizeof (struct memb_commit_token)); instance->commit_token->header.type = MESSAGE_TYPE_MEMB_COMMIT_TOKEN; instance->commit_token->header.endian_detector = ENDIAN_LOCAL; instance->commit_token->header.encapsulated = 0; instance->commit_token->header.nodeid = instance->my_id.addr[0].nodeid; assert (instance->commit_token->header.nodeid); totemip_copy(&instance->commit_token->ring_id.rep, &instance->my_id.addr[0]); instance->commit_token->ring_id.seq = instance->token_ring_id_seq + 4; /* * This qsort is necessary to ensure the commit token traverses * the ring in the proper order */ qsort (token_memb, token_memb_entries, sizeof (struct srp_addr), srp_addr_compare); instance->commit_token->memb_index = 0; instance->commit_token->addr_entries = token_memb_entries; addr = (struct srp_addr *)instance->commit_token->end_of_commit_token; memb_list = (struct memb_commit_token_memb_entry *)(addr + instance->commit_token->addr_entries); memcpy (addr, token_memb, token_memb_entries * sizeof (struct srp_addr)); memset (memb_list, 0, sizeof (struct memb_commit_token_memb_entry) * token_memb_entries); } static void memb_join_message_send (struct totemsrp_instance *instance) { char memb_join_data[40000]; struct memb_join *memb_join = (struct memb_join *)memb_join_data; char *addr; unsigned int addr_idx; memb_join->header.type = MESSAGE_TYPE_MEMB_JOIN; memb_join->header.endian_detector = ENDIAN_LOCAL; memb_join->header.encapsulated = 0; memb_join->header.nodeid = instance->my_id.addr[0].nodeid; assert (memb_join->header.nodeid); memb_join->ring_seq = instance->my_ring_id.seq; memb_join->proc_list_entries = instance->my_proc_list_entries; memb_join->failed_list_entries = instance->my_failed_list_entries; srp_addr_copy (&memb_join->system_from, &instance->my_id); /* * This mess adds the joined and failed processor lists into the join * message */ addr = (char *)memb_join; addr_idx = sizeof (struct memb_join); memcpy (&addr[addr_idx], instance->my_proc_list, instance->my_proc_list_entries * sizeof (struct srp_addr)); addr_idx += instance->my_proc_list_entries * sizeof (struct srp_addr); memcpy (&addr[addr_idx], instance->my_failed_list, instance->my_failed_list_entries * sizeof (struct srp_addr)); addr_idx += instance->my_failed_list_entries * sizeof (struct srp_addr); if (instance->totem_config->send_join_timeout) { usleep (random() % (instance->totem_config->send_join_timeout * 1000)); } instance->stats.memb_join_tx++; totemrrp_mcast_flush_send ( instance->totemrrp_context, memb_join, addr_idx); } static void memb_leave_message_send (struct totemsrp_instance *instance) { char memb_join_data[40000]; struct memb_join *memb_join = (struct memb_join *)memb_join_data; char *addr; unsigned int addr_idx; int active_memb_entries; struct srp_addr active_memb[PROCESSOR_COUNT_MAX]; log_printf (instance->totemsrp_log_level_debug, "sending join/leave message\n"); /* * add us to the failed list, and remove us from * the members list */ memb_set_merge( &instance->my_id, 1, instance->my_failed_list, &instance->my_failed_list_entries); memb_set_subtract (active_memb, &active_memb_entries, instance->my_proc_list, instance->my_proc_list_entries, &instance->my_id, 1); memb_join->header.type = MESSAGE_TYPE_MEMB_JOIN; memb_join->header.endian_detector = ENDIAN_LOCAL; memb_join->header.encapsulated = 0; memb_join->header.nodeid = LEAVE_DUMMY_NODEID; memb_join->ring_seq = instance->my_ring_id.seq; memb_join->proc_list_entries = active_memb_entries; memb_join->failed_list_entries = instance->my_failed_list_entries; srp_addr_copy (&memb_join->system_from, &instance->my_id); memb_join->system_from.addr[0].nodeid = LEAVE_DUMMY_NODEID; // TODO: CC Maybe use the actual join send routine. /* * This mess adds the joined and failed processor lists into the join * message */ addr = (char *)memb_join; addr_idx = sizeof (struct memb_join); memcpy (&addr[addr_idx], active_memb, active_memb_entries * sizeof (struct srp_addr)); addr_idx += active_memb_entries * sizeof (struct srp_addr); memcpy (&addr[addr_idx], instance->my_failed_list, instance->my_failed_list_entries * sizeof (struct srp_addr)); addr_idx += instance->my_failed_list_entries * sizeof (struct srp_addr); if (instance->totem_config->send_join_timeout) { usleep (random() % (instance->totem_config->send_join_timeout * 1000)); } instance->stats.memb_join_tx++; totemrrp_mcast_flush_send ( instance->totemrrp_context, memb_join, addr_idx); } static void memb_merge_detect_transmit (struct totemsrp_instance *instance) { struct memb_merge_detect memb_merge_detect; memb_merge_detect.header.type = MESSAGE_TYPE_MEMB_MERGE_DETECT; memb_merge_detect.header.endian_detector = ENDIAN_LOCAL; memb_merge_detect.header.encapsulated = 0; memb_merge_detect.header.nodeid = instance->my_id.addr[0].nodeid; srp_addr_copy (&memb_merge_detect.system_from, &instance->my_id); memcpy (&memb_merge_detect.ring_id, &instance->my_ring_id, sizeof (struct memb_ring_id)); assert (memb_merge_detect.header.nodeid); instance->stats.memb_merge_detect_tx++; totemrrp_mcast_flush_send (instance->totemrrp_context, &memb_merge_detect, sizeof (struct memb_merge_detect)); } static void memb_ring_id_create_or_load ( struct totemsrp_instance *instance, struct memb_ring_id *memb_ring_id) { int fd; int res = 0; char filename[PATH_MAX]; snprintf (filename, sizeof(filename), "%s/ringid_%s", rundir, totemip_print (&instance->my_id.addr[0])); fd = open (filename, O_RDONLY, 0700); /* * If file can be opened and read, read the ring id */ if (fd != -1) { res = read (fd, &memb_ring_id->seq, sizeof (uint64_t)); close (fd); } /* * If file could not be opened or read, create a new ring id */ if ((fd == -1) || (res != sizeof (uint64_t))) { memb_ring_id->seq = 0; umask(0); fd = open (filename, O_CREAT|O_RDWR, 0700); if (fd != -1) { res = write (fd, &memb_ring_id->seq, sizeof (uint64_t)); close (fd); if (res == -1) { LOGSYS_PERROR (errno, instance->totemsrp_log_level_warning, "Couldn't write ringid file '%s'", filename); } } else { LOGSYS_PERROR (errno, instance->totemsrp_log_level_warning, "Couldn't create ringid file '%s'", filename); } } totemip_copy(&memb_ring_id->rep, &instance->my_id.addr[0]); assert (!totemip_zero_check(&memb_ring_id->rep)); instance->token_ring_id_seq = memb_ring_id->seq; } static void memb_ring_id_set_and_store ( struct totemsrp_instance *instance, const struct memb_ring_id *ring_id) { char filename[256]; int fd; int res; memcpy (&instance->my_ring_id, ring_id, sizeof (struct memb_ring_id)); snprintf (filename, sizeof(filename), "%s/ringid_%s", rundir, totemip_print (&instance->my_id.addr[0])); fd = open (filename, O_WRONLY, 0777); if (fd == -1) { fd = open (filename, O_CREAT|O_RDWR, 0777); } if (fd == -1) { LOGSYS_PERROR(errno, instance->totemsrp_log_level_warning, "Couldn't store new ring id %llx to stable storage", instance->my_ring_id.seq); assert (0); return; } log_printf (instance->totemsrp_log_level_debug, "Storing new sequence id for ring %llx\n", instance->my_ring_id.seq); //assert (fd > 0); res = write (fd, &instance->my_ring_id.seq, sizeof (unsigned long long)); assert (res == sizeof (unsigned long long)); close (fd); } int totemsrp_callback_token_create ( void *srp_context, void **handle_out, enum totem_callback_token_type type, int delete, int (*callback_fn) (enum totem_callback_token_type type, const void *), const void *data) { struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context; struct token_callback_instance *callback_handle; token_hold_cancel_send (instance); callback_handle = malloc (sizeof (struct token_callback_instance)); if (callback_handle == 0) { return (-1); } *handle_out = (void *)callback_handle; list_init (&callback_handle->list); callback_handle->callback_fn = callback_fn; callback_handle->data = (void *) data; callback_handle->callback_type = type; callback_handle->delete = delete; switch (type) { case TOTEM_CALLBACK_TOKEN_RECEIVED: list_add (&callback_handle->list, &instance->token_callback_received_listhead); break; case TOTEM_CALLBACK_TOKEN_SENT: list_add (&callback_handle->list, &instance->token_callback_sent_listhead); break; } return (0); } void totemsrp_callback_token_destroy (void *srp_context, void **handle_out) { struct token_callback_instance *h; if (*handle_out) { h = (struct token_callback_instance *)*handle_out; list_del (&h->list); free (h); h = NULL; *handle_out = 0; } } static void token_callbacks_execute ( struct totemsrp_instance *instance, enum totem_callback_token_type type) { struct list_head *list; struct list_head *list_next; struct list_head *callback_listhead = 0; struct token_callback_instance *token_callback_instance; int res; int del; switch (type) { case TOTEM_CALLBACK_TOKEN_RECEIVED: callback_listhead = &instance->token_callback_received_listhead; break; case TOTEM_CALLBACK_TOKEN_SENT: callback_listhead = &instance->token_callback_sent_listhead; break; default: assert (0); } for (list = callback_listhead->next; list != callback_listhead; list = list_next) { token_callback_instance = list_entry (list, struct token_callback_instance, list); list_next = list->next; del = token_callback_instance->delete; if (del == 1) { list_del (list); } res = token_callback_instance->callback_fn ( token_callback_instance->callback_type, token_callback_instance->data); /* * This callback failed to execute, try it again on the next token */ if (res == -1 && del == 1) { list_add (list, callback_listhead); } else if (del) { free (token_callback_instance); } } } /* * Flow control functions */ static unsigned int backlog_get (struct totemsrp_instance *instance) { unsigned int backlog = 0; if (instance->memb_state == MEMB_STATE_OPERATIONAL) { backlog = cs_queue_used (&instance->new_message_queue); } else if (instance->memb_state == MEMB_STATE_RECOVERY) { backlog = cs_queue_used (&instance->retrans_message_queue); } instance->stats.token[instance->stats.latest_token].backlog_calc = backlog; return (backlog); } static int fcc_calculate ( struct totemsrp_instance *instance, struct orf_token *token) { unsigned int transmits_allowed; unsigned int backlog_calc; transmits_allowed = instance->totem_config->max_messages; if (transmits_allowed > instance->totem_config->window_size - token->fcc) { transmits_allowed = instance->totem_config->window_size - token->fcc; } instance->my_cbl = backlog_get (instance); /* * Only do backlog calculation if there is a backlog otherwise * we would result in div by zero */ if (token->backlog + instance->my_cbl - instance->my_pbl) { backlog_calc = (instance->totem_config->window_size * instance->my_pbl) / (token->backlog + instance->my_cbl - instance->my_pbl); if (backlog_calc > 0 && transmits_allowed > backlog_calc) { transmits_allowed = backlog_calc; } } return (transmits_allowed); } /* * don't overflow the RTR sort queue */ static void fcc_rtr_limit ( struct totemsrp_instance *instance, struct orf_token *token, unsigned int *transmits_allowed) { int check = QUEUE_RTR_ITEMS_SIZE_MAX; check -= (*transmits_allowed + instance->totem_config->window_size); assert (check >= 0); if (sq_lt_compare (instance->last_released + QUEUE_RTR_ITEMS_SIZE_MAX - *transmits_allowed - instance->totem_config->window_size, token->seq)) { *transmits_allowed = 0; } } static void fcc_token_update ( struct totemsrp_instance *instance, struct orf_token *token, unsigned int msgs_transmitted) { token->fcc += msgs_transmitted - instance->my_trc; token->backlog += instance->my_cbl - instance->my_pbl; instance->my_trc = msgs_transmitted; instance->my_pbl = instance->my_cbl; } /* * Message Handlers */ unsigned long long int tv_old; /* * message handler called when TOKEN message type received */ static int message_handler_orf_token ( struct totemsrp_instance *instance, const void *msg, size_t msg_len, int endian_conversion_needed) { char token_storage[1500]; char token_convert[1500]; struct orf_token *token = NULL; int forward_token; unsigned int transmits_allowed; unsigned int mcasted_retransmit; unsigned int mcasted_regular; unsigned int last_aru; #ifdef GIVEINFO unsigned long long tv_current; unsigned long long tv_diff; tv_current = qb_util_nano_current_get (); tv_diff = tv_current - tv_old; tv_old = tv_current; log_printf (instance->totemsrp_log_level_debug, "Time since last token %0.4f ms\n", ((float)tv_diff) / 1000000.0); #endif if (instance->orf_token_discard) { return (0); } #ifdef TEST_DROP_ORF_TOKEN_PERCENTAGE if (random()%100 < TEST_DROP_ORF_TOKEN_PERCENTAGE) { return (0); } #endif if (endian_conversion_needed) { orf_token_endian_convert ((struct orf_token *)msg, (struct orf_token *)token_convert); msg = (struct orf_token *)token_convert; } /* * Make copy of token and retransmit list in case we have * to flush incoming messages from the kernel queue */ token = (struct orf_token *)token_storage; memcpy (token, msg, sizeof (struct orf_token)); memcpy (&token->rtr_list[0], (char *)msg + sizeof (struct orf_token), sizeof (struct rtr_item) * RETRANSMIT_ENTRIES_MAX); /* * Handle merge detection timeout */ if (token->seq == instance->my_last_seq) { start_merge_detect_timeout (instance); instance->my_seq_unchanged += 1; } else { cancel_merge_detect_timeout (instance); cancel_token_hold_retransmit_timeout (instance); instance->my_seq_unchanged = 0; } instance->my_last_seq = token->seq; #ifdef TEST_RECOVERY_MSG_COUNT if (instance->memb_state == MEMB_STATE_OPERATIONAL && token->seq > TEST_RECOVERY_MSG_COUNT) { return (0); } #endif totemrrp_recv_flush (instance->totemrrp_context); /* * Determine if we should hold (in reality drop) the token */ instance->my_token_held = 0; if (totemip_equal(&instance->my_ring_id.rep, &instance->my_id.addr[0]) && instance->my_seq_unchanged > instance->totem_config->seqno_unchanged_const) { instance->my_token_held = 1; } else if (!totemip_equal(&instance->my_ring_id.rep, &instance->my_id.addr[0]) && instance->my_seq_unchanged >= instance->totem_config->seqno_unchanged_const) { instance->my_token_held = 1; } /* * Hold onto token when there is no activity on ring and * this processor is the ring rep */ forward_token = 1; if (totemip_equal(&instance->my_ring_id.rep, &instance->my_id.addr[0])) { if (instance->my_token_held) { forward_token = 0; } } token_callbacks_execute (instance, TOTEM_CALLBACK_TOKEN_RECEIVED); switch (instance->memb_state) { case MEMB_STATE_COMMIT: /* Discard token */ break; case MEMB_STATE_OPERATIONAL: messages_free (instance, token->aru); /* * Do NOT add break, this case should also execute code in gather case. */ case MEMB_STATE_GATHER: /* * DO NOT add break, we use different free mechanism in recovery state */ case MEMB_STATE_RECOVERY: /* * Discard tokens from another configuration */ if (memcmp (&token->ring_id, &instance->my_ring_id, sizeof (struct memb_ring_id)) != 0) { if ((forward_token) && instance->use_heartbeat) { reset_heartbeat_timeout(instance); } else { cancel_heartbeat_timeout(instance); } return (0); /* discard token */ } /* * Discard retransmitted tokens */ if (sq_lte_compare (token->token_seq, instance->my_token_seq)) { return (0); /* discard token */ } last_aru = instance->my_last_aru; instance->my_last_aru = token->aru; transmits_allowed = fcc_calculate (instance, token); mcasted_retransmit = orf_token_rtr (instance, token, &transmits_allowed); fcc_rtr_limit (instance, token, &transmits_allowed); mcasted_regular = orf_token_mcast (instance, token, transmits_allowed); /* if (mcasted_regular) { printf ("mcasted regular %d\n", mcasted_regular); printf ("token seq %d\n", token->seq); } */ fcc_token_update (instance, token, mcasted_retransmit + mcasted_regular); if (sq_lt_compare (instance->my_aru, token->aru) || instance->my_id.addr[0].nodeid == token->aru_addr || token->aru_addr == 0) { token->aru = instance->my_aru; if (token->aru == token->seq) { token->aru_addr = 0; } else { token->aru_addr = instance->my_id.addr[0].nodeid; } } if (token->aru == last_aru && token->aru_addr != 0) { instance->my_aru_count += 1; } else { instance->my_aru_count = 0; } if (instance->my_aru_count > instance->totem_config->fail_to_recv_const && token->aru_addr == instance->my_id.addr[0].nodeid) { log_printf (instance->totemsrp_log_level_error, "FAILED TO RECEIVE\n"); instance->failed_to_recv = 1; memb_set_merge (&instance->my_id, 1, instance->my_failed_list, &instance->my_failed_list_entries); memb_state_gather_enter (instance, 6); } else { instance->my_token_seq = token->token_seq; token->token_seq += 1; if (instance->memb_state == MEMB_STATE_RECOVERY) { /* * instance->my_aru == instance->my_high_seq_received means this processor * has recovered all messages it can recover * (ie: its retrans queue is empty) */ if (cs_queue_is_empty (&instance->retrans_message_queue) == 0) { if (token->retrans_flg == 0) { token->retrans_flg = 1; instance->my_set_retrans_flg = 1; } } else if (token->retrans_flg == 1 && instance->my_set_retrans_flg) { token->retrans_flg = 0; instance->my_set_retrans_flg = 0; } log_printf (instance->totemsrp_log_level_debug, "token retrans flag is %d my set retrans flag%d retrans queue empty %d count %d, aru %x\n", token->retrans_flg, instance->my_set_retrans_flg, cs_queue_is_empty (&instance->retrans_message_queue), instance->my_retrans_flg_count, token->aru); if (token->retrans_flg == 0) { instance->my_retrans_flg_count += 1; } else { instance->my_retrans_flg_count = 0; } if (instance->my_retrans_flg_count == 2) { instance->my_install_seq = token->seq; } log_printf (instance->totemsrp_log_level_debug, "install seq %x aru %x high seq received %x\n", instance->my_install_seq, instance->my_aru, instance->my_high_seq_received); if (instance->my_retrans_flg_count >= 2 && instance->my_received_flg == 0 && sq_lte_compare (instance->my_install_seq, instance->my_aru)) { instance->my_received_flg = 1; instance->my_deliver_memb_entries = instance->my_trans_memb_entries; memcpy (instance->my_deliver_memb_list, instance->my_trans_memb_list, sizeof (struct totem_ip_address) * instance->my_trans_memb_entries); } if (instance->my_retrans_flg_count >= 3 && sq_lte_compare (instance->my_install_seq, token->aru)) { instance->my_rotation_counter += 1; } else { instance->my_rotation_counter = 0; } if (instance->my_rotation_counter == 2) { log_printf (instance->totemsrp_log_level_debug, "retrans flag count %x token aru %x install seq %x aru %x %x\n", instance->my_retrans_flg_count, token->aru, instance->my_install_seq, instance->my_aru, token->seq); memb_state_operational_enter (instance); instance->my_rotation_counter = 0; instance->my_retrans_flg_count = 0; } } totemrrp_send_flush (instance->totemrrp_context); token_send (instance, token, forward_token); #ifdef GIVEINFO tv_current = qb_util_nano_current_get (); tv_diff = tv_current - tv_old; tv_old = tv_current; log_printf (instance->totemsrp_log_level_debug, "I held %0.4f ms\n", ((float)tv_diff) / 1000000.0); #endif if (instance->memb_state == MEMB_STATE_OPERATIONAL) { messages_deliver_to_app (instance, 0, instance->my_high_seq_received); } /* * Deliver messages after token has been transmitted * to improve performance */ reset_token_timeout (instance); // REVIEWED reset_token_retransmit_timeout (instance); // REVIEWED if (totemip_equal(&instance->my_id.addr[0], &instance->my_ring_id.rep) && instance->my_token_held == 1) { start_token_hold_retransmit_timeout (instance); } token_callbacks_execute (instance, TOTEM_CALLBACK_TOKEN_SENT); } break; } if ((forward_token) && instance->use_heartbeat) { reset_heartbeat_timeout(instance); } else { cancel_heartbeat_timeout(instance); } return (0); } static void messages_deliver_to_app ( struct totemsrp_instance *instance, int skip, unsigned int end_point) { struct sort_queue_item *sort_queue_item_p; unsigned int i; int res; struct mcast *mcast_in; struct mcast mcast_header; unsigned int range = 0; int endian_conversion_required; unsigned int my_high_delivered_stored = 0; range = end_point - instance->my_high_delivered; if (range) { log_printf (instance->totemsrp_log_level_debug, "Delivering %x to %x\n", instance->my_high_delivered, end_point); } assert (range < QUEUE_RTR_ITEMS_SIZE_MAX); my_high_delivered_stored = instance->my_high_delivered; /* * Deliver messages in order from rtr queue to pending delivery queue */ for (i = 1; i <= range; i++) { void *ptr = 0; /* * If out of range of sort queue, stop assembly */ res = sq_in_range (&instance->regular_sort_queue, my_high_delivered_stored + i); if (res == 0) { break; } res = sq_item_get (&instance->regular_sort_queue, my_high_delivered_stored + i, &ptr); /* * If hole, stop assembly */ if (res != 0 && skip == 0) { break; } instance->my_high_delivered = my_high_delivered_stored + i; if (res != 0) { continue; } sort_queue_item_p = ptr; mcast_in = sort_queue_item_p->mcast; assert (mcast_in != (struct mcast *)0xdeadbeef); endian_conversion_required = 0; if (mcast_in->header.endian_detector != ENDIAN_LOCAL) { endian_conversion_required = 1; mcast_endian_convert (mcast_in, &mcast_header); } else { memcpy (&mcast_header, mcast_in, sizeof (struct mcast)); } /* * Skip messages not originated in instance->my_deliver_memb */ if (skip && memb_set_subset (&mcast_header.system_from, 1, instance->my_deliver_memb_list, instance->my_deliver_memb_entries) == 0) { instance->my_high_delivered = my_high_delivered_stored + i; continue; } /* * Message found */ log_printf (instance->totemsrp_log_level_debug, "Delivering MCAST message with seq %x to pending delivery queue\n", mcast_header.seq); /* * Message is locally originated multicast */ instance->totemsrp_deliver_fn ( mcast_header.header.nodeid, ((char *)sort_queue_item_p->mcast) + sizeof (struct mcast), sort_queue_item_p->msg_len - sizeof (struct mcast), endian_conversion_required); } } /* * recv message handler called when MCAST message type received */ static int message_handler_mcast ( struct totemsrp_instance *instance, const void *msg, size_t msg_len, int endian_conversion_needed) { struct sort_queue_item sort_queue_item; struct sq *sort_queue; struct mcast mcast_header; if (endian_conversion_needed) { mcast_endian_convert (msg, &mcast_header); } else { memcpy (&mcast_header, msg, sizeof (struct mcast)); } if (mcast_header.header.encapsulated == MESSAGE_ENCAPSULATED) { sort_queue = &instance->recovery_sort_queue; } else { sort_queue = &instance->regular_sort_queue; } assert (msg_len <= FRAME_SIZE_MAX); #ifdef TEST_DROP_MCAST_PERCENTAGE if (random()%100 < TEST_DROP_MCAST_PERCENTAGE) { return (0); } #endif /* * If the message is foreign execute the switch below */ if (memcmp (&instance->my_ring_id, &mcast_header.ring_id, sizeof (struct memb_ring_id)) != 0) { switch (instance->memb_state) { case MEMB_STATE_OPERATIONAL: memb_set_merge ( &mcast_header.system_from, 1, instance->my_proc_list, &instance->my_proc_list_entries); memb_state_gather_enter (instance, 7); break; case MEMB_STATE_GATHER: if (!memb_set_subset ( &mcast_header.system_from, 1, instance->my_proc_list, instance->my_proc_list_entries)) { memb_set_merge (&mcast_header.system_from, 1, instance->my_proc_list, &instance->my_proc_list_entries); memb_state_gather_enter (instance, 8); return (0); } break; case MEMB_STATE_COMMIT: /* discard message */ instance->stats.rx_msg_dropped++; break; case MEMB_STATE_RECOVERY: /* discard message */ instance->stats.rx_msg_dropped++; break; } return (0); } log_printf (instance->totemsrp_log_level_debug, "Received ringid(%s:%lld) seq %x\n", totemip_print (&mcast_header.ring_id.rep), mcast_header.ring_id.seq, mcast_header.seq); /* * Add mcast message to rtr queue if not already in rtr queue * otherwise free io vectors */ if (msg_len > 0 && msg_len <= FRAME_SIZE_MAX && sq_in_range (sort_queue, mcast_header.seq) && sq_item_inuse (sort_queue, mcast_header.seq) == 0) { /* * Allocate new multicast memory block */ // TODO LEAK sort_queue_item.mcast = totemsrp_buffer_alloc (instance); if (sort_queue_item.mcast == NULL) { return (-1); /* error here is corrected by the algorithm */ } memcpy (sort_queue_item.mcast, msg, msg_len); sort_queue_item.msg_len = msg_len; if (sq_lt_compare (instance->my_high_seq_received, mcast_header.seq)) { instance->my_high_seq_received = mcast_header.seq; } sq_item_add (sort_queue, &sort_queue_item, mcast_header.seq); } update_aru (instance); if (instance->memb_state == MEMB_STATE_OPERATIONAL) { messages_deliver_to_app (instance, 0, instance->my_high_seq_received); } /* TODO remove from retrans message queue for old ring in recovery state */ return (0); } static int message_handler_memb_merge_detect ( struct totemsrp_instance *instance, const void *msg, size_t msg_len, int endian_conversion_needed) { struct memb_merge_detect memb_merge_detect; if (endian_conversion_needed) { memb_merge_detect_endian_convert (msg, &memb_merge_detect); } else { memcpy (&memb_merge_detect, msg, sizeof (struct memb_merge_detect)); } /* * do nothing if this is a merge detect from this configuration */ if (memcmp (&instance->my_ring_id, &memb_merge_detect.ring_id, sizeof (struct memb_ring_id)) == 0) { return (0); } /* * Execute merge operation */ switch (instance->memb_state) { case MEMB_STATE_OPERATIONAL: memb_set_merge (&memb_merge_detect.system_from, 1, instance->my_proc_list, &instance->my_proc_list_entries); memb_state_gather_enter (instance, 9); break; case MEMB_STATE_GATHER: if (!memb_set_subset ( &memb_merge_detect.system_from, 1, instance->my_proc_list, instance->my_proc_list_entries)) { memb_set_merge (&memb_merge_detect.system_from, 1, instance->my_proc_list, &instance->my_proc_list_entries); memb_state_gather_enter (instance, 10); return (0); } break; case MEMB_STATE_COMMIT: /* do nothing in commit */ break; case MEMB_STATE_RECOVERY: /* do nothing in recovery */ break; } return (0); } static void memb_join_process ( struct totemsrp_instance *instance, const struct memb_join *memb_join) { struct srp_addr *proc_list; struct srp_addr *failed_list; int gather_entered = 0; int fail_minus_memb_entries = 0; struct srp_addr fail_minus_memb[PROCESSOR_COUNT_MAX]; proc_list = (struct srp_addr *)memb_join->end_of_memb_join; failed_list = proc_list + memb_join->proc_list_entries; /* memb_set_print ("proclist", proc_list, memb_join->proc_list_entries); memb_set_print ("faillist", failed_list, memb_join->failed_list_entries); memb_set_print ("my_proclist", instance->my_proc_list, instance->my_proc_list_entries); memb_set_print ("my_faillist", instance->my_failed_list, instance->my_failed_list_entries); -*/ if (memb_set_equal (proc_list, memb_join->proc_list_entries, instance->my_proc_list, instance->my_proc_list_entries) && memb_set_equal (failed_list, memb_join->failed_list_entries, instance->my_failed_list, instance->my_failed_list_entries)) { memb_consensus_set (instance, &memb_join->system_from); if (memb_consensus_agreed (instance) && instance->failed_to_recv == 1) { instance->failed_to_recv = 0; srp_addr_copy (&instance->my_proc_list[0], &instance->my_id); instance->my_proc_list_entries = 1; instance->my_failed_list_entries = 0; memb_state_commit_token_create (instance); memb_state_commit_enter (instance); return; } if (memb_consensus_agreed (instance) && memb_lowest_in_config (instance)) { memb_state_commit_token_create (instance); memb_state_commit_enter (instance); } else { return; } } else if (memb_set_subset (proc_list, memb_join->proc_list_entries, instance->my_proc_list, instance->my_proc_list_entries) && memb_set_subset (failed_list, memb_join->failed_list_entries, instance->my_failed_list, instance->my_failed_list_entries)) { return; } else if (memb_set_subset (&memb_join->system_from, 1, instance->my_failed_list, instance->my_failed_list_entries)) { return; } else { memb_set_merge (proc_list, memb_join->proc_list_entries, instance->my_proc_list, &instance->my_proc_list_entries); if (memb_set_subset ( &instance->my_id, 1, failed_list, memb_join->failed_list_entries)) { memb_set_merge ( &memb_join->system_from, 1, instance->my_failed_list, &instance->my_failed_list_entries); } else { if (memb_set_subset ( &memb_join->system_from, 1, instance->my_memb_list, instance->my_memb_entries)) { if (memb_set_subset ( &memb_join->system_from, 1, instance->my_failed_list, instance->my_failed_list_entries) == 0) { memb_set_merge (failed_list, memb_join->failed_list_entries, instance->my_failed_list, &instance->my_failed_list_entries); } else { memb_set_subtract (fail_minus_memb, &fail_minus_memb_entries, failed_list, memb_join->failed_list_entries, instance->my_memb_list, instance->my_memb_entries); memb_set_merge (fail_minus_memb, fail_minus_memb_entries, instance->my_failed_list, &instance->my_failed_list_entries); } } } memb_state_gather_enter (instance, 11); gather_entered = 1; } if (gather_entered == 0 && instance->memb_state == MEMB_STATE_OPERATIONAL) { memb_state_gather_enter (instance, 12); } } static void memb_join_endian_convert (const struct memb_join *in, struct memb_join *out) { int i; struct srp_addr *in_proc_list; struct srp_addr *in_failed_list; struct srp_addr *out_proc_list; struct srp_addr *out_failed_list; out->header.type = in->header.type; out->header.endian_detector = ENDIAN_LOCAL; out->header.nodeid = swab32 (in->header.nodeid); srp_addr_copy_endian_convert (&out->system_from, &in->system_from); out->proc_list_entries = swab32 (in->proc_list_entries); out->failed_list_entries = swab32 (in->failed_list_entries); out->ring_seq = swab64 (in->ring_seq); in_proc_list = (struct srp_addr *)in->end_of_memb_join; in_failed_list = in_proc_list + out->proc_list_entries; out_proc_list = (struct srp_addr *)out->end_of_memb_join; out_failed_list = out_proc_list + out->proc_list_entries; for (i = 0; i < out->proc_list_entries; i++) { srp_addr_copy_endian_convert (&out_proc_list[i], &in_proc_list[i]); } for (i = 0; i < out->failed_list_entries; i++) { srp_addr_copy_endian_convert (&out_failed_list[i], &in_failed_list[i]); } } static void memb_commit_token_endian_convert (const struct memb_commit_token *in, struct memb_commit_token *out) { int i; struct srp_addr *in_addr = (struct srp_addr *)in->end_of_commit_token; struct srp_addr *out_addr = (struct srp_addr *)out->end_of_commit_token; struct memb_commit_token_memb_entry *in_memb_list; struct memb_commit_token_memb_entry *out_memb_list; out->header.type = in->header.type; out->header.endian_detector = ENDIAN_LOCAL; out->header.nodeid = swab32 (in->header.nodeid); out->token_seq = swab32 (in->token_seq); totemip_copy_endian_convert(&out->ring_id.rep, &in->ring_id.rep); out->ring_id.seq = swab64 (in->ring_id.seq); out->retrans_flg = swab32 (in->retrans_flg); out->memb_index = swab32 (in->memb_index); out->addr_entries = swab32 (in->addr_entries); in_memb_list = (struct memb_commit_token_memb_entry *)(in_addr + out->addr_entries); out_memb_list = (struct memb_commit_token_memb_entry *)(out_addr + out->addr_entries); for (i = 0; i < out->addr_entries; i++) { srp_addr_copy_endian_convert (&out_addr[i], &in_addr[i]); /* * Only convert the memb entry if it has been set */ if (in_memb_list[i].ring_id.rep.family != 0) { totemip_copy_endian_convert (&out_memb_list[i].ring_id.rep, &in_memb_list[i].ring_id.rep); out_memb_list[i].ring_id.seq = swab64 (in_memb_list[i].ring_id.seq); out_memb_list[i].aru = swab32 (in_memb_list[i].aru); out_memb_list[i].high_delivered = swab32 (in_memb_list[i].high_delivered); out_memb_list[i].received_flg = swab32 (in_memb_list[i].received_flg); } } } static void orf_token_endian_convert (const struct orf_token *in, struct orf_token *out) { int i; out->header.type = in->header.type; out->header.endian_detector = ENDIAN_LOCAL; out->header.nodeid = swab32 (in->header.nodeid); out->seq = swab32 (in->seq); out->token_seq = swab32 (in->token_seq); out->aru = swab32 (in->aru); totemip_copy_endian_convert(&out->ring_id.rep, &in->ring_id.rep); out->aru_addr = swab32(in->aru_addr); out->ring_id.seq = swab64 (in->ring_id.seq); out->fcc = swab32 (in->fcc); out->backlog = swab32 (in->backlog); out->retrans_flg = swab32 (in->retrans_flg); out->rtr_list_entries = swab32 (in->rtr_list_entries); for (i = 0; i < out->rtr_list_entries; i++) { totemip_copy_endian_convert(&out->rtr_list[i].ring_id.rep, &in->rtr_list[i].ring_id.rep); out->rtr_list[i].ring_id.seq = swab64 (in->rtr_list[i].ring_id.seq); out->rtr_list[i].seq = swab32 (in->rtr_list[i].seq); } } static void mcast_endian_convert (const struct mcast *in, struct mcast *out) { out->header.type = in->header.type; out->header.endian_detector = ENDIAN_LOCAL; out->header.nodeid = swab32 (in->header.nodeid); out->header.encapsulated = in->header.encapsulated; out->seq = swab32 (in->seq); out->this_seqno = swab32 (in->this_seqno); totemip_copy_endian_convert(&out->ring_id.rep, &in->ring_id.rep); out->ring_id.seq = swab64 (in->ring_id.seq); out->node_id = swab32 (in->node_id); out->guarantee = swab32 (in->guarantee); srp_addr_copy_endian_convert (&out->system_from, &in->system_from); } static void memb_merge_detect_endian_convert ( const struct memb_merge_detect *in, struct memb_merge_detect *out) { out->header.type = in->header.type; out->header.endian_detector = ENDIAN_LOCAL; out->header.nodeid = swab32 (in->header.nodeid); totemip_copy_endian_convert(&out->ring_id.rep, &in->ring_id.rep); out->ring_id.seq = swab64 (in->ring_id.seq); srp_addr_copy_endian_convert (&out->system_from, &in->system_from); } static int message_handler_memb_join ( struct totemsrp_instance *instance, const void *msg, size_t msg_len, int endian_conversion_needed) { const struct memb_join *memb_join; struct memb_join *memb_join_convert = alloca (msg_len); if (endian_conversion_needed) { memb_join = memb_join_convert; memb_join_endian_convert (msg, memb_join_convert); } else { memb_join = msg; } /* * If the process paused because it wasn't scheduled in a timely * fashion, flush the join messages because they may be queued * entries */ if (pause_flush (instance)) { return (0); } if (instance->token_ring_id_seq < memb_join->ring_seq) { instance->token_ring_id_seq = memb_join->ring_seq; } switch (instance->memb_state) { case MEMB_STATE_OPERATIONAL: memb_join_process (instance, memb_join); break; case MEMB_STATE_GATHER: memb_join_process (instance, memb_join); break; case MEMB_STATE_COMMIT: if (memb_set_subset (&memb_join->system_from, 1, instance->my_new_memb_list, instance->my_new_memb_entries) && memb_join->ring_seq >= instance->my_ring_id.seq) { memb_join_process (instance, memb_join); memb_state_gather_enter (instance, 13); } break; case MEMB_STATE_RECOVERY: if (memb_set_subset (&memb_join->system_from, 1, instance->my_new_memb_list, instance->my_new_memb_entries) && memb_join->ring_seq >= instance->my_ring_id.seq) { memb_join_process (instance, memb_join); memb_recovery_state_token_loss (instance); memb_state_gather_enter (instance, 14); } break; } return (0); } static int message_handler_memb_commit_token ( struct totemsrp_instance *instance, const void *msg, size_t msg_len, int endian_conversion_needed) { struct memb_commit_token *memb_commit_token_convert = alloca (msg_len); struct memb_commit_token *memb_commit_token; struct srp_addr sub[PROCESSOR_COUNT_MAX]; int sub_entries; struct srp_addr *addr; log_printf (instance->totemsrp_log_level_debug, "got commit token\n"); if (endian_conversion_needed) { memb_commit_token_endian_convert (msg, memb_commit_token_convert); } else { memcpy (memb_commit_token_convert, msg, msg_len); } memb_commit_token = memb_commit_token_convert; addr = (struct srp_addr *)memb_commit_token->end_of_commit_token; #ifdef TEST_DROP_COMMIT_TOKEN_PERCENTAGE if (random()%100 < TEST_DROP_COMMIT_TOKEN_PERCENTAGE) { return (0); } #endif switch (instance->memb_state) { case MEMB_STATE_OPERATIONAL: /* discard token */ break; case MEMB_STATE_GATHER: memb_set_subtract (sub, &sub_entries, instance->my_proc_list, instance->my_proc_list_entries, instance->my_failed_list, instance->my_failed_list_entries); if (memb_set_equal (addr, memb_commit_token->addr_entries, sub, sub_entries) && memb_commit_token->ring_id.seq > instance->my_ring_id.seq) { memcpy (instance->commit_token, memb_commit_token, msg_len); memb_state_commit_enter (instance); } break; case MEMB_STATE_COMMIT: /* * If retransmitted commit tokens are sent on this ring * filter them out and only enter recovery once the * commit token has traversed the array. This is * determined by : * memb_commit_token->memb_index == memb_commit_token->addr_entries) { */ if (memb_commit_token->ring_id.seq == instance->my_ring_id.seq && memb_commit_token->memb_index == memb_commit_token->addr_entries) { memb_state_recovery_enter (instance, memb_commit_token); } break; case MEMB_STATE_RECOVERY: if (totemip_equal (&instance->my_id.addr[0], &instance->my_ring_id.rep)) { log_printf (instance->totemsrp_log_level_debug, "Sending initial ORF token\n"); // TODO convert instead of initiate orf_token_send_initial (instance); reset_token_timeout (instance); // REVIEWED reset_token_retransmit_timeout (instance); // REVIEWED } break; } return (0); } static int message_handler_token_hold_cancel ( struct totemsrp_instance *instance, const void *msg, size_t msg_len, int endian_conversion_needed) { const struct token_hold_cancel *token_hold_cancel = msg; if (memcmp (&token_hold_cancel->ring_id, &instance->my_ring_id, sizeof (struct memb_ring_id)) == 0) { instance->my_seq_unchanged = 0; if (totemip_equal(&instance->my_ring_id.rep, &instance->my_id.addr[0])) { timer_function_token_retransmit_timeout (instance); } } return (0); } void main_deliver_fn ( void *context, const void *msg, unsigned int msg_len) { struct totemsrp_instance *instance = context; const struct message_header *message_header = msg; if (msg_len < sizeof (struct message_header)) { log_printf (instance->totemsrp_log_level_security, "Received message is too short... ignoring %u.\n", (unsigned int)msg_len); return; } switch (message_header->type) { case MESSAGE_TYPE_ORF_TOKEN: instance->stats.orf_token_rx++; break; case MESSAGE_TYPE_MCAST: instance->stats.mcast_rx++; break; case MESSAGE_TYPE_MEMB_MERGE_DETECT: instance->stats.memb_merge_detect_rx++; break; case MESSAGE_TYPE_MEMB_JOIN: instance->stats.memb_join_rx++; break; case MESSAGE_TYPE_MEMB_COMMIT_TOKEN: instance->stats.memb_commit_token_rx++; break; case MESSAGE_TYPE_TOKEN_HOLD_CANCEL: instance->stats.token_hold_cancel_rx++; break; default: log_printf (instance->totemsrp_log_level_security, "Type of received message is wrong... ignoring %d.\n", (int)message_header->type); printf ("wrong message type\n"); instance->stats.rx_msg_dropped++; return; } /* * Handle incoming message */ totemsrp_message_handlers.handler_functions[(int)message_header->type] ( instance, msg, msg_len, message_header->endian_detector != ENDIAN_LOCAL); } void main_iface_change_fn ( void *context, const struct totem_ip_address *iface_addr, unsigned int iface_no) { struct totemsrp_instance *instance = context; int i; totemip_copy (&instance->my_id.addr[iface_no], iface_addr); assert (instance->my_id.addr[iface_no].nodeid); totemip_copy (&instance->my_memb_list[0].addr[iface_no], iface_addr); if (instance->iface_changes++ == 0) { memb_ring_id_create_or_load (instance, &instance->my_ring_id); log_printf ( instance->totemsrp_log_level_debug, "Created or loaded sequence id %lld.%s for this ring.\n", instance->my_ring_id.seq, totemip_print (&instance->my_ring_id.rep)); if (instance->totemsrp_service_ready_fn) { instance->totemsrp_service_ready_fn (); } } for (i = 0; i < instance->totem_config->interfaces[iface_no].member_count; i++) { totemsrp_member_add (instance, &instance->totem_config->interfaces[iface_no].member_list[i], iface_no); } if (instance->iface_changes >= instance->totem_config->interface_count) { memb_state_gather_enter (instance, 15); } } void totemsrp_net_mtu_adjust (struct totem_config *totem_config) { totem_config->net_mtu -= sizeof (struct mcast); } void totemsrp_service_ready_register ( void *context, void (*totem_service_ready) (void)) { struct totemsrp_instance *instance = (struct totemsrp_instance *)context; instance->totemsrp_service_ready_fn = totem_service_ready; } int totemsrp_member_add ( void *context, const struct totem_ip_address *member, int ring_no) { struct totemsrp_instance *instance = (struct totemsrp_instance *)context; int res; res = totemrrp_member_add (instance->totemrrp_context, member, ring_no); return (res); } int totemsrp_member_remove ( void *context, const struct totem_ip_address *member, int ring_no) { struct totemsrp_instance *instance = (struct totemsrp_instance *)context; int res; res = totemrrp_member_remove (instance->totemrrp_context, member, ring_no); return (res); } diff --git a/exec/totemudp.c b/exec/totemudp.c index fb801f3d..ed2f03c0 100644 --- a/exec/totemudp.c +++ b/exec/totemudp.c @@ -1,1947 +1,1956 @@ /* * Copyright (c) 2005 MontaVista Software, Inc. * Copyright (c) 2006-2009 Red Hat, Inc. * * All rights reserved. * * Author: Steven Dake (sdake@redhat.com) * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define LOGSYS_UTILS_ONLY 1 #include #include "totemudp.h" #include "crypto.h" #include "util.h" #ifdef HAVE_LIBNSS #include #include #include #include #endif #ifndef MSG_NOSIGNAL #define MSG_NOSIGNAL 0 #endif #define MCAST_SOCKET_BUFFER_SIZE (TRANSMITS_ALLOWED * FRAME_SIZE_MAX) #define NETIF_STATE_REPORT_UP 1 #define NETIF_STATE_REPORT_DOWN 2 #define BIND_STATE_UNBOUND 0 #define BIND_STATE_REGULAR 1 #define BIND_STATE_LOOPBACK 2 #define HMAC_HASH_SIZE 20 struct security_header { unsigned char hash_digest[HMAC_HASH_SIZE]; /* The hash *MUST* be first in the data structure */ unsigned char salt[16]; /* random number */ char msg[0]; } __attribute__((packed)); struct totemudp_mcast_thread_state { unsigned char iobuf[FRAME_SIZE_MAX]; prng_state prng_state; }; struct totemudp_socket { int mcast_recv; int mcast_send; int token; }; struct totemudp_instance { hmac_state totemudp_hmac_state; prng_state totemudp_prng_state; #ifdef HAVE_LIBNSS PK11SymKey *nss_sym_key; PK11SymKey *nss_sym_key_sign; #endif unsigned char totemudp_private_key[1024]; unsigned int totemudp_private_key_len; qb_loop_t *totemudp_poll_handle; struct totem_interface *totem_interface; int netif_state_report; int netif_bind_state; void *context; void (*totemudp_deliver_fn) ( void *context, const void *msg, unsigned int msg_len); void (*totemudp_iface_change_fn) ( void *context, const struct totem_ip_address *iface_address); void (*totemudp_target_set_completed) (void *context); /* * Function and data used to log messages */ int totemudp_log_level_security; int totemudp_log_level_error; int totemudp_log_level_warning; int totemudp_log_level_notice; int totemudp_log_level_debug; int totemudp_subsys_id; void (*totemudp_log_printf) ( - unsigned int rec_ident, + int level, + int subsys, const char *function, const char *file, int line, const char *format, - ...)__attribute__((format(printf, 5, 6))); + ...)__attribute__((format(printf, 6, 7))); void *udp_context; char iov_buffer[FRAME_SIZE_MAX]; char iov_buffer_flush[FRAME_SIZE_MAX]; struct iovec totemudp_iov_recv; struct iovec totemudp_iov_recv_flush; struct totemudp_socket totemudp_sockets; struct totem_ip_address mcast_address; int stats_sent; int stats_recv; int stats_delv; int stats_remcasts; int stats_orf_token; struct timeval stats_tv_start; struct totem_ip_address my_id; int firstrun; qb_loop_timer_handle timer_netif_check_timeout; unsigned int my_memb_entries; int flushing; struct totem_config *totem_config; struct totem_ip_address token_target; }; struct work_item { const void *msg; unsigned int msg_len; struct totemudp_instance *instance; }; static int totemudp_build_sockets ( struct totemudp_instance *instance, struct totem_ip_address *bindnet_address, struct totem_ip_address *mcastaddress, struct totemudp_socket *sockets, struct totem_ip_address *bound_to); static struct totem_ip_address localhost; static void totemudp_instance_initialize (struct totemudp_instance *instance) { memset (instance, 0, sizeof (struct totemudp_instance)); instance->netif_state_report = NETIF_STATE_REPORT_UP | NETIF_STATE_REPORT_DOWN; instance->totemudp_iov_recv.iov_base = instance->iov_buffer; instance->totemudp_iov_recv.iov_len = FRAME_SIZE_MAX; //sizeof (instance->iov_buffer); instance->totemudp_iov_recv_flush.iov_base = instance->iov_buffer_flush; instance->totemudp_iov_recv_flush.iov_len = FRAME_SIZE_MAX; //sizeof (instance->iov_buffer); /* * There is always atleast 1 processor */ instance->my_memb_entries = 1; } #define log_printf(level, format, args...) \ do { \ instance->totemudp_log_printf ( \ - LOGSYS_ENCODE_RECID(level, \ - instance->totemudp_subsys_id, \ - LOGSYS_RECID_LOG), \ + level, instance->totemudp_subsys_id, \ __FUNCTION__, __FILE__, __LINE__, \ (const char *)format, ##args); \ } while (0); +#define LOGSYS_PERROR(err_num, level, fmt, args...) \ +do { \ + char _error_str[LOGSYS_MAX_PERROR_MSG_LEN]; \ + const char *_error_ptr = qb_strerror_r(err_num, _error_str, sizeof(_error_str)); \ + instance->totemudp_log_printf ( \ + level, instance->totemudp_subsys_id, \ + __FUNCTION__, __FILE__, __LINE__, \ + fmt ": %s (%d)\n", ##args, _error_ptr, err_num); \ + } while(0) + static int authenticate_and_decrypt_sober ( struct totemudp_instance *instance, struct iovec *iov, unsigned int iov_len) { unsigned char keys[48]; struct security_header *header = (struct security_header *)iov[0].iov_base; prng_state keygen_prng_state; prng_state stream_prng_state; unsigned char *hmac_key = &keys[32]; unsigned char *cipher_key = &keys[16]; unsigned char *initial_vector = &keys[0]; unsigned char digest_comparison[HMAC_HASH_SIZE]; unsigned long len; /* * Generate MAC, CIPHER, IV keys from private key */ memset (keys, 0, sizeof (keys)); sober128_start (&keygen_prng_state); sober128_add_entropy (instance->totemudp_private_key, instance->totemudp_private_key_len, &keygen_prng_state); sober128_add_entropy (header->salt, sizeof (header->salt), &keygen_prng_state); sober128_read (keys, sizeof (keys), &keygen_prng_state); /* * Setup stream cipher */ sober128_start (&stream_prng_state); sober128_add_entropy (cipher_key, 16, &stream_prng_state); sober128_add_entropy (initial_vector, 16, &stream_prng_state); /* * Authenticate contents of message */ hmac_init (&instance->totemudp_hmac_state, DIGEST_SHA1, hmac_key, 16); hmac_process (&instance->totemudp_hmac_state, (unsigned char *)iov->iov_base + HMAC_HASH_SIZE, iov->iov_len - HMAC_HASH_SIZE); len = hash_descriptor[DIGEST_SHA1]->hashsize; assert (HMAC_HASH_SIZE >= len); hmac_done (&instance->totemudp_hmac_state, digest_comparison, &len); if (memcmp (digest_comparison, header->hash_digest, len) != 0) { return (-1); } /* * Decrypt the contents of the message with the cipher key */ sober128_read ((unsigned char*)iov->iov_base + sizeof (struct security_header), iov->iov_len - sizeof (struct security_header), &stream_prng_state); return (0); } static void init_sober_crypto( struct totemudp_instance *instance) { log_printf(instance->totemudp_log_level_notice, "Initializing transmit/receive security: libtomcrypt SOBER128/SHA1HMAC (mode 0).\n"); rng_make_prng (128, PRNG_SOBER, &instance->totemudp_prng_state, NULL); } #ifdef HAVE_LIBNSS static unsigned char *copy_from_iovec( const struct iovec *iov, unsigned int iov_len, size_t *buf_size) { int i; size_t bufptr; size_t buflen = 0; unsigned char *newbuf; for (i=0; i buf_size) { copylen = buf_size - bufptr; } memcpy(iov[i].iov_base, buf+bufptr, copylen); bufptr += copylen; if (iov[i].iov_len != copylen) { iov[i].iov_len = copylen; return; } } } static void init_nss_crypto( struct totemudp_instance *instance) { PK11SlotInfo* aes_slot = NULL; PK11SlotInfo* sha1_slot = NULL; SECItem key_item; SECStatus rv; log_printf(instance->totemudp_log_level_notice, "Initializing transmit/receive security: NSS AES128CBC/SHA1HMAC (mode 1).\n"); rv = NSS_NoDB_Init("."); if (rv != SECSuccess) { log_printf(instance->totemudp_log_level_security, "NSS initialization failed (err %d)\n", PR_GetError()); goto out; } aes_slot = PK11_GetBestSlot(instance->totem_config->crypto_crypt_type, NULL); if (aes_slot == NULL) { log_printf(instance->totemudp_log_level_security, "Unable to find security slot (err %d)\n", PR_GetError()); goto out; } sha1_slot = PK11_GetBestSlot(CKM_SHA_1_HMAC, NULL); if (sha1_slot == NULL) { log_printf(instance->totemudp_log_level_security, "Unable to find security slot (err %d)\n", PR_GetError()); goto out; } /* * Make the private key into a SymKey that we can use */ key_item.type = siBuffer; key_item.data = instance->totem_config->private_key; key_item.len = 32; /* Use 128 bits */ instance->nss_sym_key = PK11_ImportSymKey(aes_slot, instance->totem_config->crypto_crypt_type, PK11_OriginUnwrap, CKA_ENCRYPT|CKA_DECRYPT, &key_item, NULL); if (instance->nss_sym_key == NULL) { log_printf(instance->totemudp_log_level_security, "Failure to import key into NSS (err %d)\n", PR_GetError()); goto out; } instance->nss_sym_key_sign = PK11_ImportSymKey(sha1_slot, CKM_SHA_1_HMAC, PK11_OriginUnwrap, CKA_SIGN, &key_item, NULL); if (instance->nss_sym_key_sign == NULL) { log_printf(instance->totemudp_log_level_security, "Failure to import key into NSS (err %d)\n", PR_GetError()); goto out; } out: return; } static int encrypt_and_sign_nss ( struct totemudp_instance *instance, unsigned char *buf, size_t *buf_len, const struct iovec *iovec, unsigned int iov_len) { PK11Context* enc_context = NULL; SECStatus rv1, rv2; int tmp1_outlen; unsigned int tmp2_outlen; unsigned char *inbuf; unsigned char *data; unsigned char *outdata; size_t datalen; SECItem no_params; SECItem iv_item; struct security_header *header; SECItem *nss_sec_param; unsigned char nss_iv_data[16]; SECStatus rv; no_params.type = siBuffer; no_params.data = 0; no_params.len = 0; tmp1_outlen = tmp2_outlen = 0; inbuf = copy_from_iovec(iovec, iov_len, &datalen); if (!inbuf) { log_printf(instance->totemudp_log_level_security, "malloc error copying buffer from iovec\n"); return -1; } data = inbuf + sizeof (struct security_header); datalen -= sizeof (struct security_header); outdata = buf + sizeof (struct security_header); header = (struct security_header *)buf; rv = PK11_GenerateRandom ( nss_iv_data, sizeof (nss_iv_data)); if (rv != SECSuccess) { log_printf(instance->totemudp_log_level_security, "Failure to generate a random number %d\n", PR_GetError()); } memcpy(header->salt, nss_iv_data, sizeof(nss_iv_data)); iv_item.type = siBuffer; iv_item.data = nss_iv_data; iv_item.len = sizeof (nss_iv_data); nss_sec_param = PK11_ParamFromIV ( instance->totem_config->crypto_crypt_type, &iv_item); if (nss_sec_param == NULL) { log_printf(instance->totemudp_log_level_security, "Failure to set up PKCS11 param (err %d)\n", PR_GetError()); free (inbuf); return (-1); } /* * Create cipher context for encryption */ enc_context = PK11_CreateContextBySymKey ( instance->totem_config->crypto_crypt_type, CKA_ENCRYPT, instance->nss_sym_key, nss_sec_param); if (!enc_context) { char err[1024]; PR_GetErrorText(err); err[PR_GetErrorTextLength()] = 0; log_printf(instance->totemudp_log_level_security, "PK11_CreateContext failed (encrypt) crypt_type=%d (err %d): %s\n", instance->totem_config->crypto_crypt_type, PR_GetError(), err); free(inbuf); return -1; } rv1 = PK11_CipherOp(enc_context, outdata, &tmp1_outlen, FRAME_SIZE_MAX - sizeof(struct security_header), data, datalen); rv2 = PK11_DigestFinal(enc_context, outdata + tmp1_outlen, &tmp2_outlen, FRAME_SIZE_MAX - tmp1_outlen); PK11_DestroyContext(enc_context, PR_TRUE); *buf_len = tmp1_outlen + tmp2_outlen; free(inbuf); // memcpy(&outdata[*buf_len], nss_iv_data, sizeof(nss_iv_data)); if (rv1 != SECSuccess || rv2 != SECSuccess) goto out; /* Now do the digest */ enc_context = PK11_CreateContextBySymKey(CKM_SHA_1_HMAC, CKA_SIGN, instance->nss_sym_key_sign, &no_params); if (!enc_context) { char err[1024]; PR_GetErrorText(err); err[PR_GetErrorTextLength()] = 0; log_printf(instance->totemudp_log_level_security, "encrypt: PK11_CreateContext failed (digest) err %d: %s\n", PR_GetError(), err); return -1; } PK11_DigestBegin(enc_context); rv1 = PK11_DigestOp(enc_context, outdata - 16, *buf_len + 16); rv2 = PK11_DigestFinal(enc_context, header->hash_digest, &tmp2_outlen, sizeof(header->hash_digest)); PK11_DestroyContext(enc_context, PR_TRUE); if (rv1 != SECSuccess || rv2 != SECSuccess) goto out; *buf_len = *buf_len + sizeof(struct security_header); SECITEM_FreeItem(nss_sec_param, PR_TRUE); return 0; out: return -1; } static int authenticate_and_decrypt_nss ( struct totemudp_instance *instance, struct iovec *iov, unsigned int iov_len) { PK11Context* enc_context = NULL; SECStatus rv1, rv2; int tmp1_outlen; unsigned int tmp2_outlen; unsigned char outbuf[FRAME_SIZE_MAX]; unsigned char digest[HMAC_HASH_SIZE]; unsigned char *outdata; int result_len; unsigned char *data; unsigned char *inbuf; size_t datalen; struct security_header *header = (struct security_header *)iov[0].iov_base; SECItem no_params; SECItem ivdata; no_params.type = siBuffer; no_params.data = 0; no_params.len = 0; tmp1_outlen = tmp2_outlen = 0; if (iov_len > 1) { inbuf = copy_from_iovec(iov, iov_len, &datalen); if (!inbuf) { log_printf(instance->totemudp_log_level_security, "malloc error copying buffer from iovec\n"); return -1; } } else { inbuf = (unsigned char *)iov[0].iov_base; datalen = iov[0].iov_len; } data = inbuf + sizeof (struct security_header) - 16; datalen = datalen - sizeof (struct security_header) + 16; outdata = outbuf + sizeof (struct security_header); /* Check the digest */ enc_context = PK11_CreateContextBySymKey ( CKM_SHA_1_HMAC, CKA_SIGN, instance->nss_sym_key_sign, &no_params); if (!enc_context) { char err[1024]; PR_GetErrorText(err); err[PR_GetErrorTextLength()] = 0; log_printf(instance->totemudp_log_level_security, "PK11_CreateContext failed (check digest) err %d: %s\n", PR_GetError(), err); free (inbuf); return -1; } PK11_DigestBegin(enc_context); rv1 = PK11_DigestOp(enc_context, data, datalen); rv2 = PK11_DigestFinal(enc_context, digest, &tmp2_outlen, sizeof(digest)); PK11_DestroyContext(enc_context, PR_TRUE); if (rv1 != SECSuccess || rv2 != SECSuccess) { log_printf(instance->totemudp_log_level_security, "Digest check failed\n"); return -1; } if (memcmp(digest, header->hash_digest, tmp2_outlen) != 0) { log_printf(instance->totemudp_log_level_error, "Digest does not match\n"); return -1; } /* * Get rid of salt */ data += 16; datalen -= 16; /* Create cipher context for decryption */ ivdata.type = siBuffer; ivdata.data = header->salt; ivdata.len = sizeof(header->salt); enc_context = PK11_CreateContextBySymKey( instance->totem_config->crypto_crypt_type, CKA_DECRYPT, instance->nss_sym_key, &ivdata); if (!enc_context) { log_printf(instance->totemudp_log_level_security, "PK11_CreateContext (decrypt) failed (err %d)\n", PR_GetError()); return -1; } rv1 = PK11_CipherOp(enc_context, outdata, &tmp1_outlen, sizeof(outbuf) - sizeof (struct security_header), data, datalen); if (rv1 != SECSuccess) { log_printf(instance->totemudp_log_level_security, "PK11_CipherOp (decrypt) failed (err %d)\n", PR_GetError()); } rv2 = PK11_DigestFinal(enc_context, outdata + tmp1_outlen, &tmp2_outlen, sizeof(outbuf) - tmp1_outlen); PK11_DestroyContext(enc_context, PR_TRUE); result_len = tmp1_outlen + tmp2_outlen + sizeof (struct security_header); /* Copy it back to the buffer */ copy_to_iovec(iov, iov_len, outbuf, result_len); if (iov_len > 1) free(inbuf); if (rv1 != SECSuccess || rv2 != SECSuccess) return -1; return 0; } #endif static int encrypt_and_sign_sober ( struct totemudp_instance *instance, unsigned char *buf, size_t *buf_len, const struct iovec *iovec, unsigned int iov_len) { int i; unsigned char *addr; unsigned char keys[48]; struct security_header *header; unsigned char *hmac_key = &keys[32]; unsigned char *cipher_key = &keys[16]; unsigned char *initial_vector = &keys[0]; unsigned long len; size_t outlen = 0; hmac_state hmac_st; prng_state keygen_prng_state; prng_state stream_prng_state; prng_state *prng_state_in = &instance->totemudp_prng_state; header = (struct security_header *)buf; addr = buf + sizeof (struct security_header); memset (keys, 0, sizeof (keys)); memset (header->salt, 0, sizeof (header->salt)); /* * Generate MAC, CIPHER, IV keys from private key */ sober128_read (header->salt, sizeof (header->salt), prng_state_in); sober128_start (&keygen_prng_state); sober128_add_entropy (instance->totemudp_private_key, instance->totemudp_private_key_len, &keygen_prng_state); sober128_add_entropy (header->salt, sizeof (header->salt), &keygen_prng_state); sober128_read (keys, sizeof (keys), &keygen_prng_state); /* * Setup stream cipher */ sober128_start (&stream_prng_state); sober128_add_entropy (cipher_key, 16, &stream_prng_state); sober128_add_entropy (initial_vector, 16, &stream_prng_state); outlen = sizeof (struct security_header); /* * Copy remainder of message, then encrypt it */ for (i = 1; i < iov_len; i++) { memcpy (addr, iovec[i].iov_base, iovec[i].iov_len); addr += iovec[i].iov_len; outlen += iovec[i].iov_len; } /* * Encrypt message by XORing stream cipher data */ sober128_read (buf + sizeof (struct security_header), outlen - sizeof (struct security_header), &stream_prng_state); memset (&hmac_st, 0, sizeof (hmac_st)); /* * Sign the contents of the message with the hmac key and store signature in message */ hmac_init (&hmac_st, DIGEST_SHA1, hmac_key, 16); hmac_process (&hmac_st, buf + HMAC_HASH_SIZE, outlen - HMAC_HASH_SIZE); len = hash_descriptor[DIGEST_SHA1]->hashsize; hmac_done (&hmac_st, header->hash_digest, &len); *buf_len = outlen; return 0; } static int encrypt_and_sign_worker ( struct totemudp_instance *instance, unsigned char *buf, size_t *buf_len, const struct iovec *iovec, unsigned int iov_len) { if (instance->totem_config->crypto_type == TOTEM_CRYPTO_SOBER || instance->totem_config->crypto_accept == TOTEM_CRYPTO_ACCEPT_OLD) return encrypt_and_sign_sober(instance, buf, buf_len, iovec, iov_len); #ifdef HAVE_LIBNSS if (instance->totem_config->crypto_type == TOTEM_CRYPTO_NSS) return encrypt_and_sign_nss(instance, buf, buf_len, iovec, iov_len); #endif return -1; } static int authenticate_and_decrypt ( struct totemudp_instance *instance, struct iovec *iov, unsigned int iov_len) { unsigned char type; unsigned char *endbuf = (unsigned char *)iov[iov_len-1].iov_base; int res = -1; /* * Get the encryption type and remove it from the buffer */ type = endbuf[iov[iov_len-1].iov_len-1]; iov[iov_len-1].iov_len -= 1; if (type == TOTEM_CRYPTO_SOBER) res = authenticate_and_decrypt_sober(instance, iov, iov_len); /* * Only try higher crypto options if NEW has been requested */ if (instance->totem_config->crypto_accept == TOTEM_CRYPTO_ACCEPT_NEW) { #ifdef HAVE_LIBNSS if (type == TOTEM_CRYPTO_NSS) res = authenticate_and_decrypt_nss(instance, iov, iov_len); #endif } /* * If it failed, then try decrypting the whole packet as it might be * from aisexec */ if (res == -1) { iov[iov_len-1].iov_len += 1; res = authenticate_and_decrypt_sober(instance, iov, iov_len); } return res; } static void init_crypto( struct totemudp_instance *instance) { /* * If we are expecting NEW crypto type then initialise all available * crypto options. For OLD then we only need SOBER128. */ init_sober_crypto(instance); if (instance->totem_config->crypto_accept == TOTEM_CRYPTO_ACCEPT_OLD) return; #ifdef HAVE_LIBNSS init_nss_crypto(instance); #endif } int totemudp_crypto_set ( void *udp_context, unsigned int type) { struct totemudp_instance *instance = (struct totemudp_instance *)udp_context; int res = 0; /* * Can't set crypto type if OLD is selected */ if (instance->totem_config->crypto_accept == TOTEM_CRYPTO_ACCEPT_OLD) { res = -1; } else { /* * Validate crypto algorithm */ switch (type) { case TOTEM_CRYPTO_SOBER: log_printf(instance->totemudp_log_level_security, "Transmit security set to: libtomcrypt SOBER128/SHA1HMAC (mode 0)"); break; case TOTEM_CRYPTO_NSS: log_printf(instance->totemudp_log_level_security, "Transmit security set to: NSS AES128CBC/SHA1HMAC (mode 1)"); break; default: res = -1; break; } } return (res); } static inline void ucast_sendmsg ( struct totemudp_instance *instance, struct totem_ip_address *system_to, const void *msg, unsigned int msg_len) { struct msghdr msg_ucast; int res = 0; size_t buf_len; unsigned char sheader[sizeof (struct security_header)]; unsigned char encrypt_data[FRAME_SIZE_MAX]; struct iovec iovec_encrypt[2]; const struct iovec *iovec_sendmsg; struct sockaddr_storage sockaddr; struct iovec iovec; unsigned int iov_len; int addrlen; if (instance->totem_config->secauth == 1) { iovec_encrypt[0].iov_base = (void *)sheader; iovec_encrypt[0].iov_len = sizeof (struct security_header); iovec_encrypt[1].iov_base = (void *)msg; iovec_encrypt[1].iov_len = msg_len; /* * Encrypt and digest the message */ encrypt_and_sign_worker ( instance, encrypt_data, &buf_len, iovec_encrypt, 2); if (instance->totem_config->crypto_accept == TOTEM_CRYPTO_ACCEPT_NEW) { encrypt_data[buf_len++] = instance->totem_config->crypto_type; } else { encrypt_data[buf_len++] = 0; } iovec_encrypt[0].iov_base = (void *)encrypt_data; iovec_encrypt[0].iov_len = buf_len; iovec_sendmsg = &iovec_encrypt[0]; iov_len = 1; } else { iovec.iov_base = (void *)msg; iovec.iov_len = msg_len; iovec_sendmsg = &iovec; iov_len = 1; } /* * Build unicast message */ totemip_totemip_to_sockaddr_convert(system_to, instance->totem_interface->ip_port, &sockaddr, &addrlen); msg_ucast.msg_name = &sockaddr; msg_ucast.msg_namelen = addrlen; msg_ucast.msg_iov = (void *) iovec_sendmsg; msg_ucast.msg_iovlen = iov_len; #if !defined(COROSYNC_SOLARIS) msg_ucast.msg_control = 0; msg_ucast.msg_controllen = 0; msg_ucast.msg_flags = 0; #else msg_ucast.msg_accrights = NULL; msg_ucast.msg_accrightslen = 0; #endif /* * Transmit unicast message * An error here is recovered by totemsrp */ res = sendmsg (instance->totemudp_sockets.mcast_send, &msg_ucast, MSG_NOSIGNAL); if (res < 0) { LOGSYS_PERROR (errno, instance->totemudp_log_level_debug, "sendmsg(ucast) failed (non-critical)"); } } static inline void mcast_sendmsg ( struct totemudp_instance *instance, const void *msg, unsigned int msg_len) { struct msghdr msg_mcast; int res = 0; size_t buf_len; unsigned char sheader[sizeof (struct security_header)]; unsigned char encrypt_data[FRAME_SIZE_MAX]; struct iovec iovec_encrypt[2]; struct iovec iovec; const struct iovec *iovec_sendmsg; struct sockaddr_storage sockaddr; unsigned int iov_len; int addrlen; if (instance->totem_config->secauth == 1) { iovec_encrypt[0].iov_base = (void *)sheader; iovec_encrypt[0].iov_len = sizeof (struct security_header); iovec_encrypt[1].iov_base = (void *)msg; iovec_encrypt[1].iov_len = msg_len; /* * Encrypt and digest the message */ encrypt_and_sign_worker ( instance, encrypt_data, &buf_len, iovec_encrypt, 2); if (instance->totem_config->crypto_accept == TOTEM_CRYPTO_ACCEPT_NEW) { encrypt_data[buf_len++] = instance->totem_config->crypto_type; } else { encrypt_data[buf_len++] = 0; } iovec_encrypt[0].iov_base = (void *)encrypt_data; iovec_encrypt[0].iov_len = buf_len; iovec_sendmsg = &iovec_encrypt[0]; iov_len = 1; } else { iovec.iov_base = (void *)msg; iovec.iov_len = msg_len; iovec_sendmsg = &iovec; iov_len = 1; } /* * Build multicast message */ totemip_totemip_to_sockaddr_convert(&instance->mcast_address, instance->totem_interface->ip_port, &sockaddr, &addrlen); msg_mcast.msg_name = &sockaddr; msg_mcast.msg_namelen = addrlen; msg_mcast.msg_iov = (void *) iovec_sendmsg; msg_mcast.msg_iovlen = iov_len; #if !defined(COROSYNC_SOLARIS) msg_mcast.msg_control = 0; msg_mcast.msg_controllen = 0; msg_mcast.msg_flags = 0; #else msg_mcast.msg_accrights = NULL; msg_mcast.msg_accrightslen = 0; #endif /* * Transmit multicast message * An error here is recovered by totemsrp */ res = sendmsg (instance->totemudp_sockets.mcast_send, &msg_mcast, MSG_NOSIGNAL); if (res < 0) { LOGSYS_PERROR (errno, instance->totemudp_log_level_debug, "sendmsg(mcast) failed (non-critical)"); } } int totemudp_finalize ( void *udp_context) { struct totemudp_instance *instance = (struct totemudp_instance *)udp_context; int res = 0; if (instance->totemudp_sockets.mcast_recv > 0) { close (instance->totemudp_sockets.mcast_recv); qb_loop_poll_del (instance->totemudp_poll_handle, instance->totemudp_sockets.mcast_recv); } if (instance->totemudp_sockets.mcast_send > 0) { close (instance->totemudp_sockets.mcast_send); } if (instance->totemudp_sockets.token > 0) { close (instance->totemudp_sockets.token); qb_loop_poll_del (instance->totemudp_poll_handle, instance->totemudp_sockets.token); } return (res); } /* * Only designed to work with a message with one iov */ static int net_deliver_fn ( int fd, int revents, void *data) { struct totemudp_instance *instance = (struct totemudp_instance *)data; struct msghdr msg_recv; struct iovec *iovec; struct sockaddr_storage system_from; int bytes_received; int res = 0; unsigned char *msg_offset; unsigned int size_delv; if (instance->flushing == 1) { iovec = &instance->totemudp_iov_recv_flush; } else { iovec = &instance->totemudp_iov_recv; } /* * Receive datagram */ msg_recv.msg_name = &system_from; msg_recv.msg_namelen = sizeof (struct sockaddr_storage); msg_recv.msg_iov = iovec; msg_recv.msg_iovlen = 1; #if !defined(COROSYNC_SOLARIS) msg_recv.msg_control = 0; msg_recv.msg_controllen = 0; msg_recv.msg_flags = 0; #else msg_recv.msg_accrights = NULL; msg_recv.msg_accrightslen = 0; #endif bytes_received = recvmsg (fd, &msg_recv, MSG_NOSIGNAL | MSG_DONTWAIT); if (bytes_received == -1) { return (0); } else { instance->stats_recv += bytes_received; } if ((instance->totem_config->secauth == 1) && (bytes_received < sizeof (struct security_header))) { log_printf (instance->totemudp_log_level_security, "Received message is too short... ignoring %d.\n", bytes_received); return (0); } iovec->iov_len = bytes_received; if (instance->totem_config->secauth == 1) { /* * Authenticate and if authenticated, decrypt datagram */ res = authenticate_and_decrypt (instance, iovec, 1); if (res == -1) { log_printf (instance->totemudp_log_level_security, "Received message has invalid digest... ignoring.\n"); log_printf (instance->totemudp_log_level_security, "Invalid packet data\n"); iovec->iov_len = FRAME_SIZE_MAX; return 0; } msg_offset = (unsigned char *)iovec->iov_base + sizeof (struct security_header); size_delv = bytes_received - sizeof (struct security_header); } else { msg_offset = (void *)iovec->iov_base; size_delv = bytes_received; } /* * Handle incoming message */ instance->totemudp_deliver_fn ( instance->context, msg_offset, size_delv); iovec->iov_len = FRAME_SIZE_MAX; return (0); } static int netif_determine ( struct totemudp_instance *instance, struct totem_ip_address *bindnet, struct totem_ip_address *bound_to, int *interface_up, int *interface_num) { int res; res = totemip_iface_check (bindnet, bound_to, interface_up, interface_num, instance->totem_config->clear_node_high_bit); return (res); } /* * If the interface is up, the sockets for totem are built. If the interface is down * this function is requeued in the timer list to retry building the sockets later. */ static void timer_function_netif_check_timeout ( void *data) { struct totemudp_instance *instance = (struct totemudp_instance *)data; int interface_up; int interface_num; struct totem_ip_address *bind_address; /* * Build sockets for every interface */ netif_determine (instance, &instance->totem_interface->bindnet, &instance->totem_interface->boundto, &interface_up, &interface_num); /* * If the network interface isn't back up and we are already * in loopback mode, add timer to check again and return */ if ((instance->netif_bind_state == BIND_STATE_LOOPBACK && interface_up == 0) || (instance->my_memb_entries == 1 && instance->netif_bind_state == BIND_STATE_REGULAR && interface_up == 1)) { qb_loop_timer_add (instance->totemudp_poll_handle, QB_LOOP_MED, instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC, (void *)instance, timer_function_netif_check_timeout, &instance->timer_netif_check_timeout); /* * Add a timer to check for a downed regular interface */ return; } if (instance->totemudp_sockets.mcast_recv > 0) { close (instance->totemudp_sockets.mcast_recv); qb_loop_poll_del (instance->totemudp_poll_handle, instance->totemudp_sockets.mcast_recv); } if (instance->totemudp_sockets.mcast_send > 0) { close (instance->totemudp_sockets.mcast_send); } if (instance->totemudp_sockets.token > 0) { close (instance->totemudp_sockets.token); qb_loop_poll_del (instance->totemudp_poll_handle, instance->totemudp_sockets.token); } if (interface_up == 0) { /* * Interface is not up */ instance->netif_bind_state = BIND_STATE_LOOPBACK; bind_address = &localhost; /* * Add a timer to retry building interfaces and request memb_gather_enter */ qb_loop_timer_add (instance->totemudp_poll_handle, QB_LOOP_MED, instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC, (void *)instance, timer_function_netif_check_timeout, &instance->timer_netif_check_timeout); } else { /* * Interface is up */ instance->netif_bind_state = BIND_STATE_REGULAR; bind_address = &instance->totem_interface->bindnet; } /* * Create and bind the multicast and unicast sockets */ (void)totemudp_build_sockets (instance, &instance->mcast_address, bind_address, &instance->totemudp_sockets, &instance->totem_interface->boundto); qb_loop_poll_add ( instance->totemudp_poll_handle, QB_LOOP_MED, instance->totemudp_sockets.mcast_recv, POLLIN, instance, net_deliver_fn); qb_loop_poll_add ( instance->totemudp_poll_handle, QB_LOOP_MED, instance->totemudp_sockets.token, POLLIN, instance, net_deliver_fn); totemip_copy (&instance->my_id, &instance->totem_interface->boundto); /* * This reports changes in the interface to the user and totemsrp */ if (instance->netif_bind_state == BIND_STATE_REGULAR) { if (instance->netif_state_report & NETIF_STATE_REPORT_UP) { log_printf (instance->totemudp_log_level_notice, "The network interface [%s] is now up.\n", totemip_print (&instance->totem_interface->boundto)); instance->netif_state_report = NETIF_STATE_REPORT_DOWN; instance->totemudp_iface_change_fn (instance->context, &instance->my_id); } /* * Add a timer to check for interface going down in single membership */ if (instance->my_memb_entries == 1) { qb_loop_timer_add (instance->totemudp_poll_handle, QB_LOOP_MED, instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC, (void *)instance, timer_function_netif_check_timeout, &instance->timer_netif_check_timeout); } } else { if (instance->netif_state_report & NETIF_STATE_REPORT_DOWN) { log_printf (instance->totemudp_log_level_notice, "The network interface is down.\n"); instance->totemudp_iface_change_fn (instance->context, &instance->my_id); } instance->netif_state_report = NETIF_STATE_REPORT_UP; } } /* Set the socket priority to INTERACTIVE to ensure that our messages don't get queued behind anything else */ static void totemudp_traffic_control_set(struct totemudp_instance *instance, int sock) { #ifdef SO_PRIORITY int prio = 6; /* TC_PRIO_INTERACTIVE */ if (setsockopt(sock, SOL_SOCKET, SO_PRIORITY, &prio, sizeof(int))) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "Could not set traffic priority"); } #endif } static int totemudp_build_sockets_ip ( struct totemudp_instance *instance, struct totem_ip_address *mcast_address, struct totem_ip_address *bindnet_address, struct totemudp_socket *sockets, struct totem_ip_address *bound_to, int interface_num) { struct sockaddr_storage sockaddr; struct ipv6_mreq mreq6; struct ip_mreq mreq; struct sockaddr_storage mcast_ss, boundto_ss; struct sockaddr_in6 *mcast_sin6 = (struct sockaddr_in6 *)&mcast_ss; struct sockaddr_in *mcast_sin = (struct sockaddr_in *)&mcast_ss; struct sockaddr_in *boundto_sin = (struct sockaddr_in *)&boundto_ss; unsigned int sendbuf_size; unsigned int recvbuf_size; unsigned int optlen = sizeof (sendbuf_size); int addrlen; int res; int flag; /* * Create multicast recv socket */ sockets->mcast_recv = socket (bindnet_address->family, SOCK_DGRAM, 0); if (sockets->mcast_recv == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "socket() failed"); return (-1); } totemip_nosigpipe (sockets->mcast_recv); res = fcntl (sockets->mcast_recv, F_SETFL, O_NONBLOCK); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "Could not set non-blocking operation on multicast socket"); return (-1); } /* * Force reuse */ flag = 1; if ( setsockopt(sockets->mcast_recv, SOL_SOCKET, SO_REUSEADDR, (char *)&flag, sizeof (flag)) < 0) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "setsockopt(SO_REUSEADDR) failed"); return (-1); } /* * Bind to multicast socket used for multicast receives */ totemip_totemip_to_sockaddr_convert(mcast_address, instance->totem_interface->ip_port, &sockaddr, &addrlen); res = bind (sockets->mcast_recv, (struct sockaddr *)&sockaddr, addrlen); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "Unable to bind the socket to receive multicast packets"); return (-1); } /* * Setup mcast send socket */ sockets->mcast_send = socket (bindnet_address->family, SOCK_DGRAM, 0); if (sockets->mcast_send == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "socket() failed"); return (-1); } totemip_nosigpipe (sockets->mcast_send); res = fcntl (sockets->mcast_send, F_SETFL, O_NONBLOCK); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "Could not set non-blocking operation on multicast socket"); return (-1); } /* * Force reuse */ flag = 1; if ( setsockopt(sockets->mcast_send, SOL_SOCKET, SO_REUSEADDR, (char *)&flag, sizeof (flag)) < 0) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "setsockopt(SO_REUSEADDR) failed"); return (-1); } totemip_totemip_to_sockaddr_convert(bound_to, instance->totem_interface->ip_port - 1, &sockaddr, &addrlen); res = bind (sockets->mcast_send, (struct sockaddr *)&sockaddr, addrlen); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "Unable to bind the socket to send multicast packets"); return (-1); } /* * Setup unicast socket */ sockets->token = socket (bindnet_address->family, SOCK_DGRAM, 0); if (sockets->token == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "socket() failed"); return (-1); } totemip_nosigpipe (sockets->token); res = fcntl (sockets->token, F_SETFL, O_NONBLOCK); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "Could not set non-blocking operation on token socket"); return (-1); } /* * Force reuse */ flag = 1; if ( setsockopt(sockets->token, SOL_SOCKET, SO_REUSEADDR, (char *)&flag, sizeof (flag)) < 0) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "setsockopt(SO_REUSEADDR) failed"); return (-1); } /* * Bind to unicast socket used for token send/receives * This has the side effect of binding to the correct interface */ totemip_totemip_to_sockaddr_convert(bound_to, instance->totem_interface->ip_port, &sockaddr, &addrlen); res = bind (sockets->token, (struct sockaddr *)&sockaddr, addrlen); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "Unable to bind UDP unicast socket"); return (-1); } recvbuf_size = MCAST_SOCKET_BUFFER_SIZE; sendbuf_size = MCAST_SOCKET_BUFFER_SIZE; /* * Set buffer sizes to avoid overruns */ res = setsockopt (sockets->mcast_recv, SOL_SOCKET, SO_RCVBUF, &recvbuf_size, optlen); res = setsockopt (sockets->mcast_send, SOL_SOCKET, SO_SNDBUF, &sendbuf_size, optlen); res = getsockopt (sockets->mcast_recv, SOL_SOCKET, SO_RCVBUF, &recvbuf_size, &optlen); if (res == 0) { log_printf (instance->totemudp_log_level_debug, "Receive multicast socket recv buffer size (%d bytes).\n", recvbuf_size); } res = getsockopt (sockets->mcast_send, SOL_SOCKET, SO_SNDBUF, &sendbuf_size, &optlen); if (res == 0) { log_printf (instance->totemudp_log_level_debug, "Transmit multicast socket send buffer size (%d bytes).\n", sendbuf_size); } /* * Join group membership on socket */ totemip_totemip_to_sockaddr_convert(mcast_address, instance->totem_interface->ip_port, &mcast_ss, &addrlen); totemip_totemip_to_sockaddr_convert(bound_to, instance->totem_interface->ip_port, &boundto_ss, &addrlen); if (instance->totem_config->broadcast_use == 1) { unsigned int broadcast = 1; if ((setsockopt(sockets->mcast_recv, SOL_SOCKET, SO_BROADCAST, &broadcast, sizeof (broadcast))) == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "setting broadcast option failed"); return (-1); } if ((setsockopt(sockets->mcast_send, SOL_SOCKET, SO_BROADCAST, &broadcast, sizeof (broadcast))) == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "setting broadcast option failed"); return (-1); } } else { switch (bindnet_address->family) { case AF_INET: memset(&mreq, 0, sizeof(mreq)); mreq.imr_multiaddr.s_addr = mcast_sin->sin_addr.s_addr; mreq.imr_interface.s_addr = boundto_sin->sin_addr.s_addr; res = setsockopt (sockets->mcast_recv, IPPROTO_IP, IP_ADD_MEMBERSHIP, &mreq, sizeof (mreq)); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "join ipv4 multicast group failed"); return (-1); } break; case AF_INET6: memset(&mreq6, 0, sizeof(mreq6)); memcpy(&mreq6.ipv6mr_multiaddr, &mcast_sin6->sin6_addr, sizeof(struct in6_addr)); mreq6.ipv6mr_interface = interface_num; res = setsockopt (sockets->mcast_recv, IPPROTO_IPV6, IPV6_JOIN_GROUP, &mreq6, sizeof (mreq6)); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "join ipv6 multicast group failed"); return (-1); } break; } } /* * Turn on multicast loopback */ flag = 1; switch ( bindnet_address->family ) { case AF_INET: res = setsockopt (sockets->mcast_send, IPPROTO_IP, IP_MULTICAST_LOOP, &flag, sizeof (flag)); break; case AF_INET6: res = setsockopt (sockets->mcast_send, IPPROTO_IPV6, IPV6_MULTICAST_LOOP, &flag, sizeof (flag)); } if (res == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "Unable to turn on multicast loopback"); return (-1); } /* * Set multicast packets TTL */ flag = instance->totem_interface->ttl; if (bindnet_address->family == AF_INET6) { res = setsockopt (sockets->mcast_send, IPPROTO_IPV6, IPV6_MULTICAST_HOPS, &flag, sizeof (flag)); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "set mcast v6 TTL failed"); return (-1); } } else { res = setsockopt(sockets->mcast_send, IPPROTO_IP, IP_MULTICAST_TTL, &flag, sizeof(flag)); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "set mcast v4 TTL failed"); return (-1); } } /* * Bind to a specific interface for multicast send and receive */ switch ( bindnet_address->family ) { case AF_INET: if (setsockopt (sockets->mcast_send, IPPROTO_IP, IP_MULTICAST_IF, &boundto_sin->sin_addr, sizeof (boundto_sin->sin_addr)) < 0) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "cannot select interface for multicast packets (send)"); return (-1); } if (setsockopt (sockets->mcast_recv, IPPROTO_IP, IP_MULTICAST_IF, &boundto_sin->sin_addr, sizeof (boundto_sin->sin_addr)) < 0) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "cannot select interface for multicast packets (recv)"); return (-1); } break; case AF_INET6: if (setsockopt (sockets->mcast_send, IPPROTO_IPV6, IPV6_MULTICAST_IF, &interface_num, sizeof (interface_num)) < 0) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "cannot select interface for multicast packets (send v6)"); return (-1); } if (setsockopt (sockets->mcast_recv, IPPROTO_IPV6, IPV6_MULTICAST_IF, &interface_num, sizeof (interface_num)) < 0) { LOGSYS_PERROR (errno, instance->totemudp_log_level_warning, "cannot select interface for multicast packets (recv v6)"); return (-1); } break; } return 0; } static int totemudp_build_sockets ( struct totemudp_instance *instance, struct totem_ip_address *mcast_address, struct totem_ip_address *bindnet_address, struct totemudp_socket *sockets, struct totem_ip_address *bound_to) { int interface_num; int interface_up; int res; /* * Determine the ip address bound to and the interface name */ res = netif_determine (instance, bindnet_address, bound_to, &interface_up, &interface_num); if (res == -1) { return (-1); } totemip_copy(&instance->my_id, bound_to); res = totemudp_build_sockets_ip (instance, mcast_address, bindnet_address, sockets, bound_to, interface_num); /* We only send out of the token socket */ totemudp_traffic_control_set(instance, sockets->token); return res; } /* * Totem Network interface - also does encryption/decryption * depends on poll abstraction, POSIX, IPV4 */ /* * Create an instance */ int totemudp_initialize ( qb_loop_t *poll_handle, void **udp_context, struct totem_config *totem_config, int interface_no, void *context, void (*deliver_fn) ( void *context, const void *msg, unsigned int msg_len), void (*iface_change_fn) ( void *context, const struct totem_ip_address *iface_address), void (*target_set_completed) ( void *context)) { struct totemudp_instance *instance; instance = malloc (sizeof (struct totemudp_instance)); if (instance == NULL) { return (-1); } totemudp_instance_initialize (instance); instance->totem_config = totem_config; /* * Configure logging */ instance->totemudp_log_level_security = 1; //totem_config->totem_logging_configuration.log_level_security; instance->totemudp_log_level_error = totem_config->totem_logging_configuration.log_level_error; instance->totemudp_log_level_warning = totem_config->totem_logging_configuration.log_level_warning; instance->totemudp_log_level_notice = totem_config->totem_logging_configuration.log_level_notice; instance->totemudp_log_level_debug = totem_config->totem_logging_configuration.log_level_debug; instance->totemudp_subsys_id = totem_config->totem_logging_configuration.log_subsys_id; instance->totemudp_log_printf = totem_config->totem_logging_configuration.log_printf; /* * Initialize random number generator for later use to generate salt */ memcpy (instance->totemudp_private_key, totem_config->private_key, totem_config->private_key_len); instance->totemudp_private_key_len = totem_config->private_key_len; init_crypto(instance); /* * Initialize local variables for totemudp */ instance->totem_interface = &totem_config->interfaces[interface_no]; totemip_copy (&instance->mcast_address, &instance->totem_interface->mcast_addr); memset (instance->iov_buffer, 0, FRAME_SIZE_MAX); instance->totemudp_poll_handle = poll_handle; instance->totem_interface->bindnet.nodeid = instance->totem_config->node_id; instance->context = context; instance->totemudp_deliver_fn = deliver_fn; instance->totemudp_iface_change_fn = iface_change_fn; instance->totemudp_target_set_completed = target_set_completed; totemip_localhost (instance->mcast_address.family, &localhost); localhost.nodeid = instance->totem_config->node_id; /* * RRP layer isn't ready to receive message because it hasn't * initialized yet. Add short timer to check the interfaces. */ qb_loop_timer_add (instance->totemudp_poll_handle, QB_LOOP_MED, 100*QB_TIME_NS_IN_MSEC, (void *)instance, timer_function_netif_check_timeout, &instance->timer_netif_check_timeout); *udp_context = instance; return (0); } void *totemudp_buffer_alloc (void) { return malloc (FRAME_SIZE_MAX); } void totemudp_buffer_release (void *ptr) { return free (ptr); } int totemudp_processor_count_set ( void *udp_context, int processor_count) { struct totemudp_instance *instance = (struct totemudp_instance *)udp_context; int res = 0; instance->my_memb_entries = processor_count; qb_loop_timer_del (instance->totemudp_poll_handle, instance->timer_netif_check_timeout); if (processor_count == 1) { qb_loop_timer_add (instance->totemudp_poll_handle, QB_LOOP_MED, instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC, (void *)instance, timer_function_netif_check_timeout, &instance->timer_netif_check_timeout); } return (res); } int totemudp_recv_flush (void *udp_context) { struct totemudp_instance *instance = (struct totemudp_instance *)udp_context; struct pollfd ufd; int nfds; int res = 0; instance->flushing = 1; do { ufd.fd = instance->totemudp_sockets.mcast_recv; ufd.events = POLLIN; nfds = poll (&ufd, 1, 0); if (nfds == 1 && ufd.revents & POLLIN) { net_deliver_fn (instance->totemudp_sockets.mcast_recv, ufd.revents, instance); } } while (nfds == 1); instance->flushing = 0; return (res); } int totemudp_send_flush (void *udp_context) { return 0; } int totemudp_token_send ( void *udp_context, const void *msg, unsigned int msg_len) { struct totemudp_instance *instance = (struct totemudp_instance *)udp_context; int res = 0; ucast_sendmsg (instance, &instance->token_target, msg, msg_len); return (res); } int totemudp_mcast_flush_send ( void *udp_context, const void *msg, unsigned int msg_len) { struct totemudp_instance *instance = (struct totemudp_instance *)udp_context; int res = 0; mcast_sendmsg (instance, msg, msg_len); return (res); } int totemudp_mcast_noflush_send ( void *udp_context, const void *msg, unsigned int msg_len) { struct totemudp_instance *instance = (struct totemudp_instance *)udp_context; int res = 0; mcast_sendmsg (instance, msg, msg_len); return (res); } extern int totemudp_iface_check (void *udp_context) { struct totemudp_instance *instance = (struct totemudp_instance *)udp_context; int res = 0; timer_function_netif_check_timeout (instance); return (res); } extern void totemudp_net_mtu_adjust (void *udp_context, struct totem_config *totem_config) { #define UDPIP_HEADER_SIZE (20 + 8) /* 20 bytes for ip 8 bytes for udp */ if (totem_config->secauth == 1) { totem_config->net_mtu -= sizeof (struct security_header) + UDPIP_HEADER_SIZE; } else { totem_config->net_mtu -= UDPIP_HEADER_SIZE; } } const char *totemudp_iface_print (void *udp_context) { struct totemudp_instance *instance = (struct totemudp_instance *)udp_context; const char *ret_char; ret_char = totemip_print (&instance->my_id); return (ret_char); } int totemudp_iface_get ( void *udp_context, struct totem_ip_address *addr) { struct totemudp_instance *instance = (struct totemudp_instance *)udp_context; int res = 0; memcpy (addr, &instance->my_id, sizeof (struct totem_ip_address)); return (res); } int totemudp_token_target_set ( void *udp_context, const struct totem_ip_address *token_target) { struct totemudp_instance *instance = (struct totemudp_instance *)udp_context; int res = 0; memcpy (&instance->token_target, token_target, sizeof (struct totem_ip_address)); instance->totemudp_target_set_completed (instance->context); return (res); } extern int totemudp_recv_mcast_empty ( void *udp_context) { struct totemudp_instance *instance = (struct totemudp_instance *)udp_context; unsigned int res; struct sockaddr_storage system_from; struct msghdr msg_recv; struct pollfd ufd; int nfds; int msg_processed = 0; /* * Receive datagram */ msg_recv.msg_name = &system_from; msg_recv.msg_namelen = sizeof (struct sockaddr_storage); msg_recv.msg_iov = &instance->totemudp_iov_recv_flush; msg_recv.msg_iovlen = 1; #if !defined(COROSYNC_SOLARIS) msg_recv.msg_control = 0; msg_recv.msg_controllen = 0; msg_recv.msg_flags = 0; #else msg_recv.msg_accrights = NULL; msg_recv.msg_accrightslen = 0; #endif do { ufd.fd = instance->totemudp_sockets.mcast_recv; ufd.events = POLLIN; nfds = poll (&ufd, 1, 0); if (nfds == 1 && ufd.revents & POLLIN) { res = recvmsg (instance->totemudp_sockets.mcast_recv, &msg_recv, MSG_NOSIGNAL | MSG_DONTWAIT); if (res != -1) { msg_processed = 1; } else { msg_processed = -1; } } } while (nfds == 1); return (msg_processed); } diff --git a/exec/totemudpu.c b/exec/totemudpu.c index 8ef90bbc..529c3627 100644 --- a/exec/totemudpu.c +++ b/exec/totemudpu.c @@ -1,1712 +1,1720 @@ /* * Copyright (c) 2005 MontaVista Software, Inc. * Copyright (c) 2006-2009 Red Hat, Inc. * * All rights reserved. * * Author: Steven Dake (sdake@redhat.com) * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define LOGSYS_UTILS_ONLY 1 #include #include "totemudpu.h" #include "crypto.h" #include "util.h" #ifdef HAVE_LIBNSS #include #include #include #include #endif #ifndef MSG_NOSIGNAL #define MSG_NOSIGNAL 0 #endif #define MCAST_SOCKET_BUFFER_SIZE (TRANSMITS_ALLOWED * FRAME_SIZE_MAX) #define NETIF_STATE_REPORT_UP 1 #define NETIF_STATE_REPORT_DOWN 2 #define BIND_STATE_UNBOUND 0 #define BIND_STATE_REGULAR 1 #define BIND_STATE_LOOPBACK 2 #define HMAC_HASH_SIZE 20 struct security_header { unsigned char hash_digest[HMAC_HASH_SIZE]; /* The hash *MUST* be first in the data structure */ unsigned char salt[16]; /* random number */ char msg[0]; } __attribute__((packed)); struct totemudpu_member { struct list_head list; struct totem_ip_address member; int fd; }; struct totemudpu_instance { hmac_state totemudpu_hmac_state; prng_state totemudpu_prng_state; #ifdef HAVE_LIBNSS PK11SymKey *nss_sym_key; PK11SymKey *nss_sym_key_sign; #endif unsigned char totemudpu_private_key[1024]; unsigned int totemudpu_private_key_len; qb_loop_t *totemudpu_poll_handle; struct totem_interface *totem_interface; int netif_state_report; int netif_bind_state; void *context; void (*totemudpu_deliver_fn) ( void *context, const void *msg, unsigned int msg_len); void (*totemudpu_iface_change_fn) ( void *context, const struct totem_ip_address *iface_address); void (*totemudpu_target_set_completed) (void *context); /* * Function and data used to log messages */ int totemudpu_log_level_security; int totemudpu_log_level_error; int totemudpu_log_level_warning; int totemudpu_log_level_notice; int totemudpu_log_level_debug; int totemudpu_subsys_id; void (*totemudpu_log_printf) ( - unsigned int rec_ident, + int level, + int subsys, const char *function, const char *file, int line, const char *format, - ...)__attribute__((format(printf, 5, 6))); + ...)__attribute__((format(printf, 6, 7))); void *udpu_context; char iov_buffer[FRAME_SIZE_MAX]; struct iovec totemudpu_iov_recv; struct list_head member_list; int stats_sent; int stats_recv; int stats_delv; int stats_remcasts; int stats_orf_token; struct timeval stats_tv_start; struct totem_ip_address my_id; int firstrun; qb_loop_timer_handle timer_netif_check_timeout; unsigned int my_memb_entries; struct totem_config *totem_config; struct totem_ip_address token_target; int token_socket; }; struct work_item { const void *msg; unsigned int msg_len; struct totemudpu_instance *instance; }; static int totemudpu_build_sockets ( struct totemudpu_instance *instance, struct totem_ip_address *bindnet_address, struct totem_ip_address *bound_to); static struct totem_ip_address localhost; static void totemudpu_instance_initialize (struct totemudpu_instance *instance) { memset (instance, 0, sizeof (struct totemudpu_instance)); instance->netif_state_report = NETIF_STATE_REPORT_UP | NETIF_STATE_REPORT_DOWN; instance->totemudpu_iov_recv.iov_base = instance->iov_buffer; instance->totemudpu_iov_recv.iov_len = FRAME_SIZE_MAX; //sizeof (instance->iov_buffer); /* * There is always atleast 1 processor */ instance->my_memb_entries = 1; list_init (&instance->member_list); } -#define log_printf(level, format, args...) \ -do { \ - instance->totemudpu_log_printf ( \ - LOGSYS_ENCODE_RECID(level, \ - instance->totemudpu_subsys_id, \ - LOGSYS_RECID_LOG), \ - __FUNCTION__, __FILE__, __LINE__, \ - (const char *)format, ##args); \ +#define log_printf(level, format, args...) \ +do { \ + instance->totemudpu_log_printf ( \ + level, instance->totemudpu_subsys_id, \ + __FUNCTION__, __FILE__, __LINE__, \ + (const char *)format, ##args); \ } while (0); +#define LOGSYS_PERROR(err_num, level, fmt, args...) \ +do { \ + char _error_str[LOGSYS_MAX_PERROR_MSG_LEN]; \ + const char *_error_ptr = qb_strerror_r(err_num, _error_str, sizeof(_error_str)); \ + instance->totemudpu_log_printf ( \ + level, instance->totemudpu_subsys_id, \ + __FUNCTION__, __FILE__, __LINE__, \ + fmt ": %s (%d)\n", ##args, _error_ptr, err_num); \ + } while(0) static int authenticate_and_decrypt_sober ( struct totemudpu_instance *instance, struct iovec *iov, unsigned int iov_len) { unsigned char keys[48]; struct security_header *header = (struct security_header *)iov[0].iov_base; prng_state keygen_prng_state; prng_state stream_prng_state; unsigned char *hmac_key = &keys[32]; unsigned char *cipher_key = &keys[16]; unsigned char *initial_vector = &keys[0]; unsigned char digest_comparison[HMAC_HASH_SIZE]; unsigned long len; /* * Generate MAC, CIPHER, IV keys from private key */ memset (keys, 0, sizeof (keys)); sober128_start (&keygen_prng_state); sober128_add_entropy (instance->totemudpu_private_key, instance->totemudpu_private_key_len, &keygen_prng_state); sober128_add_entropy (header->salt, sizeof (header->salt), &keygen_prng_state); sober128_read (keys, sizeof (keys), &keygen_prng_state); /* * Setup stream cipher */ sober128_start (&stream_prng_state); sober128_add_entropy (cipher_key, 16, &stream_prng_state); sober128_add_entropy (initial_vector, 16, &stream_prng_state); /* * Authenticate contents of message */ hmac_init (&instance->totemudpu_hmac_state, DIGEST_SHA1, hmac_key, 16); hmac_process (&instance->totemudpu_hmac_state, (unsigned char *)iov->iov_base + HMAC_HASH_SIZE, iov->iov_len - HMAC_HASH_SIZE); len = hash_descriptor[DIGEST_SHA1]->hashsize; assert (HMAC_HASH_SIZE >= len); hmac_done (&instance->totemudpu_hmac_state, digest_comparison, &len); if (memcmp (digest_comparison, header->hash_digest, len) != 0) { return (-1); } /* * Decrypt the contents of the message with the cipher key */ sober128_read ((unsigned char*)iov->iov_base + sizeof (struct security_header), iov->iov_len - sizeof (struct security_header), &stream_prng_state); return (0); } static void init_sober_crypto( struct totemudpu_instance *instance) { log_printf(instance->totemudpu_log_level_notice, "Initializing transmit/receive security: libtomcrypt SOBER128/SHA1HMAC (mode 0).\n"); rng_make_prng (128, PRNG_SOBER, &instance->totemudpu_prng_state, NULL); } #ifdef HAVE_LIBNSS static unsigned char *copy_from_iovec( const struct iovec *iov, unsigned int iov_len, size_t *buf_size) { int i; size_t bufptr; size_t buflen = 0; unsigned char *newbuf; for (i=0; i buf_size) { copylen = buf_size - bufptr; } memcpy(iov[i].iov_base, buf+bufptr, copylen); bufptr += copylen; if (iov[i].iov_len != copylen) { iov[i].iov_len = copylen; return; } } } static void init_nss_crypto( struct totemudpu_instance *instance) { PK11SlotInfo* aes_slot = NULL; PK11SlotInfo* sha1_slot = NULL; SECItem key_item; SECStatus rv; log_printf(instance->totemudpu_log_level_notice, "Initializing transmit/receive security: NSS AES128CBC/SHA1HMAC (mode 1).\n"); rv = NSS_NoDB_Init("."); if (rv != SECSuccess) { log_printf(instance->totemudpu_log_level_security, "NSS initialization failed (err %d)\n", PR_GetError()); goto out; } aes_slot = PK11_GetBestSlot(instance->totem_config->crypto_crypt_type, NULL); if (aes_slot == NULL) { log_printf(instance->totemudpu_log_level_security, "Unable to find security slot (err %d)\n", PR_GetError()); goto out; } sha1_slot = PK11_GetBestSlot(CKM_SHA_1_HMAC, NULL); if (sha1_slot == NULL) { log_printf(instance->totemudpu_log_level_security, "Unable to find security slot (err %d)\n", PR_GetError()); goto out; } /* * Make the private key into a SymKey that we can use */ key_item.type = siBuffer; key_item.data = instance->totem_config->private_key; key_item.len = 32; /* Use 128 bits */ instance->nss_sym_key = PK11_ImportSymKey(aes_slot, instance->totem_config->crypto_crypt_type, PK11_OriginUnwrap, CKA_ENCRYPT|CKA_DECRYPT, &key_item, NULL); if (instance->nss_sym_key == NULL) { log_printf(instance->totemudpu_log_level_security, "Failure to import key into NSS (err %d)\n", PR_GetError()); goto out; } instance->nss_sym_key_sign = PK11_ImportSymKey(sha1_slot, CKM_SHA_1_HMAC, PK11_OriginUnwrap, CKA_SIGN, &key_item, NULL); if (instance->nss_sym_key_sign == NULL) { log_printf(instance->totemudpu_log_level_security, "Failure to import key into NSS (err %d)\n", PR_GetError()); goto out; } out: return; } static int encrypt_and_sign_nss ( struct totemudpu_instance *instance, unsigned char *buf, size_t *buf_len, const struct iovec *iovec, unsigned int iov_len) { PK11Context* enc_context = NULL; SECStatus rv1, rv2; int tmp1_outlen; unsigned int tmp2_outlen; unsigned char *inbuf; unsigned char *data; unsigned char *outdata; size_t datalen; SECItem no_params; SECItem iv_item; struct security_header *header; SECItem *nss_sec_param; unsigned char nss_iv_data[16]; SECStatus rv; no_params.type = siBuffer; no_params.data = 0; no_params.len = 0; tmp1_outlen = tmp2_outlen = 0; inbuf = copy_from_iovec(iovec, iov_len, &datalen); if (!inbuf) { log_printf(instance->totemudpu_log_level_security, "malloc error copying buffer from iovec\n"); return -1; } data = inbuf + sizeof (struct security_header); datalen -= sizeof (struct security_header); outdata = buf + sizeof (struct security_header); header = (struct security_header *)buf; rv = PK11_GenerateRandom ( nss_iv_data, sizeof (nss_iv_data)); if (rv != SECSuccess) { log_printf(instance->totemudpu_log_level_security, "Failure to generate a random number %d\n", PR_GetError()); } memcpy(header->salt, nss_iv_data, sizeof(nss_iv_data)); iv_item.type = siBuffer; iv_item.data = nss_iv_data; iv_item.len = sizeof (nss_iv_data); nss_sec_param = PK11_ParamFromIV ( instance->totem_config->crypto_crypt_type, &iv_item); if (nss_sec_param == NULL) { log_printf(instance->totemudpu_log_level_security, "Failure to set up PKCS11 param (err %d)\n", PR_GetError()); free (inbuf); return (-1); } /* * Create cipher context for encryption */ enc_context = PK11_CreateContextBySymKey ( instance->totem_config->crypto_crypt_type, CKA_ENCRYPT, instance->nss_sym_key, nss_sec_param); if (!enc_context) { char err[1024]; PR_GetErrorText(err); err[PR_GetErrorTextLength()] = 0; log_printf(instance->totemudpu_log_level_security, "PK11_CreateContext failed (encrypt) crypt_type=%d (err %d): %s\n", instance->totem_config->crypto_crypt_type, PR_GetError(), err); free(inbuf); return -1; } rv1 = PK11_CipherOp(enc_context, outdata, &tmp1_outlen, FRAME_SIZE_MAX - sizeof(struct security_header), data, datalen); rv2 = PK11_DigestFinal(enc_context, outdata + tmp1_outlen, &tmp2_outlen, FRAME_SIZE_MAX - tmp1_outlen); PK11_DestroyContext(enc_context, PR_TRUE); *buf_len = tmp1_outlen + tmp2_outlen; free(inbuf); // memcpy(&outdata[*buf_len], nss_iv_data, sizeof(nss_iv_data)); if (rv1 != SECSuccess || rv2 != SECSuccess) goto out; /* Now do the digest */ enc_context = PK11_CreateContextBySymKey(CKM_SHA_1_HMAC, CKA_SIGN, instance->nss_sym_key_sign, &no_params); if (!enc_context) { char err[1024]; PR_GetErrorText(err); err[PR_GetErrorTextLength()] = 0; log_printf(instance->totemudpu_log_level_security, "encrypt: PK11_CreateContext failed (digest) err %d: %s\n", PR_GetError(), err); return -1; } PK11_DigestBegin(enc_context); rv1 = PK11_DigestOp(enc_context, outdata - 16, *buf_len + 16); rv2 = PK11_DigestFinal(enc_context, header->hash_digest, &tmp2_outlen, sizeof(header->hash_digest)); PK11_DestroyContext(enc_context, PR_TRUE); if (rv1 != SECSuccess || rv2 != SECSuccess) goto out; *buf_len = *buf_len + sizeof(struct security_header); SECITEM_FreeItem(nss_sec_param, PR_TRUE); return 0; out: return -1; } static int authenticate_and_decrypt_nss ( struct totemudpu_instance *instance, struct iovec *iov, unsigned int iov_len) { PK11Context* enc_context = NULL; SECStatus rv1, rv2; int tmp1_outlen; unsigned int tmp2_outlen; unsigned char outbuf[FRAME_SIZE_MAX]; unsigned char digest[HMAC_HASH_SIZE]; unsigned char *outdata; int result_len; unsigned char *data; unsigned char *inbuf; size_t datalen; struct security_header *header = (struct security_header *)iov[0].iov_base; SECItem no_params; SECItem ivdata; no_params.type = siBuffer; no_params.data = 0; no_params.len = 0; tmp1_outlen = tmp2_outlen = 0; if (iov_len > 1) { inbuf = copy_from_iovec(iov, iov_len, &datalen); if (!inbuf) { log_printf(instance->totemudpu_log_level_security, "malloc error copying buffer from iovec\n"); return -1; } } else { inbuf = (unsigned char *)iov[0].iov_base; datalen = iov[0].iov_len; } data = inbuf + sizeof (struct security_header) - 16; datalen = datalen - sizeof (struct security_header) + 16; outdata = outbuf + sizeof (struct security_header); /* Check the digest */ enc_context = PK11_CreateContextBySymKey ( CKM_SHA_1_HMAC, CKA_SIGN, instance->nss_sym_key_sign, &no_params); if (!enc_context) { char err[1024]; PR_GetErrorText(err); err[PR_GetErrorTextLength()] = 0; log_printf(instance->totemudpu_log_level_security, "PK11_CreateContext failed (check digest) err %d: %s\n", PR_GetError(), err); free (inbuf); return -1; } PK11_DigestBegin(enc_context); rv1 = PK11_DigestOp(enc_context, data, datalen); rv2 = PK11_DigestFinal(enc_context, digest, &tmp2_outlen, sizeof(digest)); PK11_DestroyContext(enc_context, PR_TRUE); if (rv1 != SECSuccess || rv2 != SECSuccess) { log_printf(instance->totemudpu_log_level_security, "Digest check failed\n"); return -1; } if (memcmp(digest, header->hash_digest, tmp2_outlen) != 0) { log_printf(instance->totemudpu_log_level_error, "Digest does not match\n"); return -1; } /* * Get rid of salt */ data += 16; datalen -= 16; /* Create cipher context for decryption */ ivdata.type = siBuffer; ivdata.data = header->salt; ivdata.len = sizeof(header->salt); enc_context = PK11_CreateContextBySymKey( instance->totem_config->crypto_crypt_type, CKA_DECRYPT, instance->nss_sym_key, &ivdata); if (!enc_context) { log_printf(instance->totemudpu_log_level_security, "PK11_CreateContext (decrypt) failed (err %d)\n", PR_GetError()); return -1; } rv1 = PK11_CipherOp(enc_context, outdata, &tmp1_outlen, sizeof(outbuf) - sizeof (struct security_header), data, datalen); if (rv1 != SECSuccess) { log_printf(instance->totemudpu_log_level_security, "PK11_CipherOp (decrypt) failed (err %d)\n", PR_GetError()); } rv2 = PK11_DigestFinal(enc_context, outdata + tmp1_outlen, &tmp2_outlen, sizeof(outbuf) - tmp1_outlen); PK11_DestroyContext(enc_context, PR_TRUE); result_len = tmp1_outlen + tmp2_outlen + sizeof (struct security_header); /* Copy it back to the buffer */ copy_to_iovec(iov, iov_len, outbuf, result_len); if (iov_len > 1) free(inbuf); if (rv1 != SECSuccess || rv2 != SECSuccess) return -1; return 0; } #endif static int encrypt_and_sign_sober ( struct totemudpu_instance *instance, unsigned char *buf, size_t *buf_len, const struct iovec *iovec, unsigned int iov_len) { int i; unsigned char *addr; unsigned char keys[48]; struct security_header *header; unsigned char *hmac_key = &keys[32]; unsigned char *cipher_key = &keys[16]; unsigned char *initial_vector = &keys[0]; unsigned long len; size_t outlen = 0; hmac_state hmac_st; prng_state keygen_prng_state; prng_state stream_prng_state; prng_state *prng_state_in = &instance->totemudpu_prng_state; header = (struct security_header *)buf; addr = buf + sizeof (struct security_header); memset (keys, 0, sizeof (keys)); memset (header->salt, 0, sizeof (header->salt)); /* * Generate MAC, CIPHER, IV keys from private key */ sober128_read (header->salt, sizeof (header->salt), prng_state_in); sober128_start (&keygen_prng_state); sober128_add_entropy (instance->totemudpu_private_key, instance->totemudpu_private_key_len, &keygen_prng_state); sober128_add_entropy (header->salt, sizeof (header->salt), &keygen_prng_state); sober128_read (keys, sizeof (keys), &keygen_prng_state); /* * Setup stream cipher */ sober128_start (&stream_prng_state); sober128_add_entropy (cipher_key, 16, &stream_prng_state); sober128_add_entropy (initial_vector, 16, &stream_prng_state); outlen = sizeof (struct security_header); /* * Copy remainder of message, then encrypt it */ for (i = 1; i < iov_len; i++) { memcpy (addr, iovec[i].iov_base, iovec[i].iov_len); addr += iovec[i].iov_len; outlen += iovec[i].iov_len; } /* * Encrypt message by XORing stream cipher data */ sober128_read (buf + sizeof (struct security_header), outlen - sizeof (struct security_header), &stream_prng_state); memset (&hmac_st, 0, sizeof (hmac_st)); /* * Sign the contents of the message with the hmac key and store signature in message */ hmac_init (&hmac_st, DIGEST_SHA1, hmac_key, 16); hmac_process (&hmac_st, buf + HMAC_HASH_SIZE, outlen - HMAC_HASH_SIZE); len = hash_descriptor[DIGEST_SHA1]->hashsize; hmac_done (&hmac_st, header->hash_digest, &len); *buf_len = outlen; return 0; } static int encrypt_and_sign_worker ( struct totemudpu_instance *instance, unsigned char *buf, size_t *buf_len, const struct iovec *iovec, unsigned int iov_len) { if (instance->totem_config->crypto_type == TOTEM_CRYPTO_SOBER || instance->totem_config->crypto_accept == TOTEM_CRYPTO_ACCEPT_OLD) return encrypt_and_sign_sober(instance, buf, buf_len, iovec, iov_len); #ifdef HAVE_LIBNSS if (instance->totem_config->crypto_type == TOTEM_CRYPTO_NSS) return encrypt_and_sign_nss(instance, buf, buf_len, iovec, iov_len); #endif return -1; } static int authenticate_and_decrypt ( struct totemudpu_instance *instance, struct iovec *iov, unsigned int iov_len) { unsigned char type; unsigned char *endbuf = (unsigned char *)iov[iov_len-1].iov_base; int res = -1; /* * Get the encryption type and remove it from the buffer */ type = endbuf[iov[iov_len-1].iov_len-1]; iov[iov_len-1].iov_len -= 1; if (type == TOTEM_CRYPTO_SOBER) res = authenticate_and_decrypt_sober(instance, iov, iov_len); /* * Only try higher crypto options if NEW has been requested */ if (instance->totem_config->crypto_accept == TOTEM_CRYPTO_ACCEPT_NEW) { #ifdef HAVE_LIBNSS if (type == TOTEM_CRYPTO_NSS) res = authenticate_and_decrypt_nss(instance, iov, iov_len); #endif } /* * If it failed, then try decrypting the whole packet as it might be * from aisexec */ if (res == -1) { iov[iov_len-1].iov_len += 1; res = authenticate_and_decrypt_sober(instance, iov, iov_len); } return res; } static void init_crypto( struct totemudpu_instance *instance) { /* * If we are expecting NEW crypto type then initialise all available * crypto options. For OLD then we only need SOBER128. */ init_sober_crypto(instance); if (instance->totem_config->crypto_accept == TOTEM_CRYPTO_ACCEPT_OLD) return; #ifdef HAVE_LIBNSS init_nss_crypto(instance); #endif } int totemudpu_crypto_set ( void *udpu_context, unsigned int type) { struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; int res = 0; /* * Can't set crypto type if OLD is selected */ if (instance->totem_config->crypto_accept == TOTEM_CRYPTO_ACCEPT_OLD) { res = -1; } else { /* * Validate crypto algorithm */ switch (type) { case TOTEM_CRYPTO_SOBER: log_printf(instance->totemudpu_log_level_security, "Transmit security set to: libtomcrypt SOBER128/SHA1HMAC (mode 0)"); break; case TOTEM_CRYPTO_NSS: log_printf(instance->totemudpu_log_level_security, "Transmit security set to: NSS AES128CBC/SHA1HMAC (mode 1)"); break; default: res = -1; break; } } return (res); } static inline void ucast_sendmsg ( struct totemudpu_instance *instance, struct totem_ip_address *system_to, const void *msg, unsigned int msg_len) { struct msghdr msg_ucast; int res = 0; size_t buf_len; unsigned char sheader[sizeof (struct security_header)]; unsigned char encrypt_data[FRAME_SIZE_MAX]; struct iovec iovec_encrypt[2]; const struct iovec *iovec_sendmsg; struct sockaddr_storage sockaddr; struct iovec iovec; unsigned int iov_len; int addrlen; if (instance->totem_config->secauth == 1) { iovec_encrypt[0].iov_base = (void *)sheader; iovec_encrypt[0].iov_len = sizeof (struct security_header); iovec_encrypt[1].iov_base = (void *)msg; iovec_encrypt[1].iov_len = msg_len; /* * Encrypt and digest the message */ encrypt_and_sign_worker ( instance, encrypt_data, &buf_len, iovec_encrypt, 2); if (instance->totem_config->crypto_accept == TOTEM_CRYPTO_ACCEPT_NEW) { encrypt_data[buf_len++] = instance->totem_config->crypto_type; } else { encrypt_data[buf_len++] = 0; } iovec_encrypt[0].iov_base = (void *)encrypt_data; iovec_encrypt[0].iov_len = buf_len; iovec_sendmsg = &iovec_encrypt[0]; iov_len = 1; } else { iovec.iov_base = (void *)msg; iovec.iov_len = msg_len; iovec_sendmsg = &iovec; iov_len = 1; } /* * Build unicast message */ totemip_totemip_to_sockaddr_convert(system_to, instance->totem_interface->ip_port, &sockaddr, &addrlen); msg_ucast.msg_name = &sockaddr; msg_ucast.msg_namelen = addrlen; msg_ucast.msg_iov = (void *) iovec_sendmsg; msg_ucast.msg_iovlen = iov_len; #if !defined(COROSYNC_SOLARIS) msg_ucast.msg_control = 0; msg_ucast.msg_controllen = 0; msg_ucast.msg_flags = 0; #else msg_ucast.msg_accrights = NULL; msg_ucast.msg_accrightslen = 0; #endif /* * Transmit unicast message * An error here is recovered by totemsrp */ res = sendmsg (instance->token_socket, &msg_ucast, MSG_NOSIGNAL); if (res < 0) { LOGSYS_PERROR (errno, instance->totemudpu_log_level_debug, "sendmsg(ucast) failed (non-critical)"); } } static inline void mcast_sendmsg ( struct totemudpu_instance *instance, const void *msg, unsigned int msg_len) { struct msghdr msg_mcast; int res = 0; size_t buf_len; unsigned char sheader[sizeof (struct security_header)]; unsigned char encrypt_data[FRAME_SIZE_MAX]; struct iovec iovec_encrypt[2]; struct iovec iovec; const struct iovec *iovec_sendmsg; struct sockaddr_storage sockaddr; unsigned int iov_len; int addrlen; struct list_head *list; struct totemudpu_member *member; if (instance->totem_config->secauth == 1) { iovec_encrypt[0].iov_base = (void *)sheader; iovec_encrypt[0].iov_len = sizeof (struct security_header); iovec_encrypt[1].iov_base = (void *)msg; iovec_encrypt[1].iov_len = msg_len; /* * Encrypt and digest the message */ encrypt_and_sign_worker ( instance, encrypt_data, &buf_len, iovec_encrypt, 2); if (instance->totem_config->crypto_accept == TOTEM_CRYPTO_ACCEPT_NEW) { encrypt_data[buf_len++] = instance->totem_config->crypto_type; } else { encrypt_data[buf_len++] = 0; } iovec_encrypt[0].iov_base = (void *)encrypt_data; iovec_encrypt[0].iov_len = buf_len; iovec_sendmsg = &iovec_encrypt[0]; iov_len = 1; } else { iovec.iov_base = (void *)msg; iovec.iov_len = msg_len; iovec_sendmsg = &iovec; iov_len = 1; } /* * Build multicast message */ for (list = instance->member_list.next; list != &instance->member_list; list = list->next) { member = list_entry (list, struct totemudpu_member, list); totemip_totemip_to_sockaddr_convert(&member->member, instance->totem_interface->ip_port, &sockaddr, &addrlen); msg_mcast.msg_name = &sockaddr; msg_mcast.msg_namelen = addrlen; msg_mcast.msg_iov = (void *) iovec_sendmsg; msg_mcast.msg_iovlen = iov_len; #if !defined(COROSYNC_SOLARIS) msg_mcast.msg_control = 0; msg_mcast.msg_controllen = 0; msg_mcast.msg_flags = 0; #else msg_mcast.msg_accrights = NULL; msg_mcast.msg_accrightslen = 0; #endif /* * Transmit multicast message * An error here is recovered by totemsrp */ res = sendmsg (member->fd, &msg_mcast, MSG_NOSIGNAL); if (res < 0) { LOGSYS_PERROR (errno, instance->totemudpu_log_level_debug, "sendmsg(mcast) failed (non-critical)"); } } } int totemudpu_finalize ( void *udpu_context) { struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; int res = 0; if (instance->token_socket > 0) { close (instance->token_socket); qb_loop_poll_del (instance->totemudpu_poll_handle, instance->token_socket); } return (res); } static int net_deliver_fn ( int fd, int revents, void *data) { struct totemudpu_instance *instance = (struct totemudpu_instance *)data; struct msghdr msg_recv; struct iovec *iovec; struct sockaddr_storage system_from; int bytes_received; int res = 0; unsigned char *msg_offset; unsigned int size_delv; iovec = &instance->totemudpu_iov_recv; /* * Receive datagram */ msg_recv.msg_name = &system_from; msg_recv.msg_namelen = sizeof (struct sockaddr_storage); msg_recv.msg_iov = iovec; msg_recv.msg_iovlen = 1; #if !defined(COROSYNC_SOLARIS) msg_recv.msg_control = 0; msg_recv.msg_controllen = 0; msg_recv.msg_flags = 0; #else msg_recv.msg_accrights = NULL; msg_recv.msg_accrightslen = 0; #endif bytes_received = recvmsg (fd, &msg_recv, MSG_NOSIGNAL | MSG_DONTWAIT); if (bytes_received == -1) { return (0); } else { instance->stats_recv += bytes_received; } if ((instance->totem_config->secauth == 1) && (bytes_received < sizeof (struct security_header))) { log_printf (instance->totemudpu_log_level_security, "Received message is too short... ignoring %d.\n", bytes_received); return (0); } iovec->iov_len = bytes_received; if (instance->totem_config->secauth == 1) { /* * Authenticate and if authenticated, decrypt datagram */ res = authenticate_and_decrypt (instance, iovec, 1); if (res == -1) { log_printf (instance->totemudpu_log_level_security, "Received message has invalid digest... ignoring.\n"); log_printf (instance->totemudpu_log_level_security, "Invalid packet data\n"); iovec->iov_len = FRAME_SIZE_MAX; return 0; } msg_offset = (unsigned char *)iovec->iov_base + sizeof (struct security_header); size_delv = bytes_received - sizeof (struct security_header); } else { msg_offset = (void *)iovec->iov_base; size_delv = bytes_received; } /* * Handle incoming message */ instance->totemudpu_deliver_fn ( instance->context, msg_offset, size_delv); iovec->iov_len = FRAME_SIZE_MAX; return (0); } static int netif_determine ( struct totemudpu_instance *instance, struct totem_ip_address *bindnet, struct totem_ip_address *bound_to, int *interface_up, int *interface_num) { int res; res = totemip_iface_check (bindnet, bound_to, interface_up, interface_num, instance->totem_config->clear_node_high_bit); return (res); } /* * If the interface is up, the sockets for totem are built. If the interface is down * this function is requeued in the timer list to retry building the sockets later. */ static void timer_function_netif_check_timeout ( void *data) { struct totemudpu_instance *instance = (struct totemudpu_instance *)data; int interface_up; int interface_num; struct totem_ip_address *bind_address; /* * Build sockets for every interface */ netif_determine (instance, &instance->totem_interface->bindnet, &instance->totem_interface->boundto, &interface_up, &interface_num); /* * If the network interface isn't back up and we are already * in loopback mode, add timer to check again and return */ if ((instance->netif_bind_state == BIND_STATE_LOOPBACK && interface_up == 0) || (instance->my_memb_entries == 1 && instance->netif_bind_state == BIND_STATE_REGULAR && interface_up == 1)) { qb_loop_timer_add (instance->totemudpu_poll_handle, QB_LOOP_MED, instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC, (void *)instance, timer_function_netif_check_timeout, &instance->timer_netif_check_timeout); /* * Add a timer to check for a downed regular interface */ return; } if (instance->token_socket > 0) { close (instance->token_socket); qb_loop_poll_del (instance->totemudpu_poll_handle, instance->token_socket); } if (interface_up == 0) { /* * Interface is not up */ instance->netif_bind_state = BIND_STATE_LOOPBACK; bind_address = &localhost; /* * Add a timer to retry building interfaces and request memb_gather_enter */ qb_loop_timer_add (instance->totemudpu_poll_handle, QB_LOOP_MED, instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC, (void *)instance, timer_function_netif_check_timeout, &instance->timer_netif_check_timeout); } else { /* * Interface is up */ instance->netif_bind_state = BIND_STATE_REGULAR; bind_address = &instance->totem_interface->bindnet; } /* * Create and bind the multicast and unicast sockets */ totemudpu_build_sockets (instance, bind_address, &instance->totem_interface->boundto); qb_loop_poll_add (instance->totemudpu_poll_handle, QB_LOOP_MED, instance->token_socket, POLLIN, instance, net_deliver_fn); totemip_copy (&instance->my_id, &instance->totem_interface->boundto); /* * This reports changes in the interface to the user and totemsrp */ if (instance->netif_bind_state == BIND_STATE_REGULAR) { if (instance->netif_state_report & NETIF_STATE_REPORT_UP) { log_printf (instance->totemudpu_log_level_notice, "The network interface [%s] is now up.\n", totemip_print (&instance->totem_interface->boundto)); instance->netif_state_report = NETIF_STATE_REPORT_DOWN; instance->totemudpu_iface_change_fn (instance->context, &instance->my_id); } /* * Add a timer to check for interface going down in single membership */ if (instance->my_memb_entries == 1) { qb_loop_timer_add (instance->totemudpu_poll_handle, QB_LOOP_MED, instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC, (void *)instance, timer_function_netif_check_timeout, &instance->timer_netif_check_timeout); } } else { if (instance->netif_state_report & NETIF_STATE_REPORT_DOWN) { log_printf (instance->totemudpu_log_level_notice, "The network interface is down.\n"); instance->totemudpu_iface_change_fn (instance->context, &instance->my_id); } instance->netif_state_report = NETIF_STATE_REPORT_UP; } } /* Set the socket priority to INTERACTIVE to ensure that our messages don't get queued behind anything else */ static void totemudpu_traffic_control_set(struct totemudpu_instance *instance, int sock) { #ifdef SO_PRIORITY int prio = 6; /* TC_PRIO_INTERACTIVE */ if (setsockopt(sock, SOL_SOCKET, SO_PRIORITY, &prio, sizeof(int))) { LOGSYS_PERROR (errno, instance->totemudpu_log_level_warning, "Could not set traffic priority"); } #endif } static int totemudpu_build_sockets_ip ( struct totemudpu_instance *instance, struct totem_ip_address *bindnet_address, struct totem_ip_address *bound_to, int interface_num) { struct sockaddr_storage sockaddr; int addrlen; int res; unsigned int recvbuf_size; unsigned int optlen = sizeof (recvbuf_size); /* * Setup unicast socket */ instance->token_socket = socket (bindnet_address->family, SOCK_DGRAM, 0); if (instance->token_socket == -1) { LOGSYS_PERROR (errno, instance->totemudpu_log_level_warning, "socket() failed"); return (-1); } totemip_nosigpipe (instance->token_socket); res = fcntl (instance->token_socket, F_SETFL, O_NONBLOCK); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudpu_log_level_warning, "Could not set non-blocking operation on token socket"); return (-1); } /* * Bind to unicast socket used for token send/receives * This has the side effect of binding to the correct interface */ totemip_totemip_to_sockaddr_convert(bound_to, instance->totem_interface->ip_port, &sockaddr, &addrlen); res = bind (instance->token_socket, (struct sockaddr *)&sockaddr, addrlen); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudpu_log_level_warning, "bind token socket failed"); return (-1); } /* * the token_socket can receive many messages. Allow a large number * of receive messages on this socket */ recvbuf_size = MCAST_SOCKET_BUFFER_SIZE; res = setsockopt (instance->token_socket, SOL_SOCKET, SO_RCVBUF, &recvbuf_size, optlen); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudpu_log_level_notice, "Could not set recvbuf size"); } return 0; } static int totemudpu_build_sockets ( struct totemudpu_instance *instance, struct totem_ip_address *bindnet_address, struct totem_ip_address *bound_to) { int interface_num; int interface_up; int res; /* * Determine the ip address bound to and the interface name */ res = netif_determine (instance, bindnet_address, bound_to, &interface_up, &interface_num); if (res == -1) { return (-1); } totemip_copy(&instance->my_id, bound_to); res = totemudpu_build_sockets_ip (instance, bindnet_address, bound_to, interface_num); /* We only send out of the token socket */ totemudpu_traffic_control_set(instance, instance->token_socket); return res; } /* * Totem Network interface - also does encryption/decryption * depends on poll abstraction, POSIX, IPV4 */ /* * Create an instance */ int totemudpu_initialize ( qb_loop_t *poll_handle, void **udpu_context, struct totem_config *totem_config, int interface_no, void *context, void (*deliver_fn) ( void *context, const void *msg, unsigned int msg_len), void (*iface_change_fn) ( void *context, const struct totem_ip_address *iface_address), void (*target_set_completed) ( void *context)) { struct totemudpu_instance *instance; instance = malloc (sizeof (struct totemudpu_instance)); if (instance == NULL) { return (-1); } totemudpu_instance_initialize (instance); instance->totem_config = totem_config; /* * Configure logging */ instance->totemudpu_log_level_security = 1; //totem_config->totem_logging_configuration.log_level_security; instance->totemudpu_log_level_error = totem_config->totem_logging_configuration.log_level_error; instance->totemudpu_log_level_warning = totem_config->totem_logging_configuration.log_level_warning; instance->totemudpu_log_level_notice = totem_config->totem_logging_configuration.log_level_notice; instance->totemudpu_log_level_debug = totem_config->totem_logging_configuration.log_level_debug; instance->totemudpu_subsys_id = totem_config->totem_logging_configuration.log_subsys_id; instance->totemudpu_log_printf = totem_config->totem_logging_configuration.log_printf; /* * Initialize random number generator for later use to generate salt */ memcpy (instance->totemudpu_private_key, totem_config->private_key, totem_config->private_key_len); instance->totemudpu_private_key_len = totem_config->private_key_len; init_crypto(instance); /* * Initialize local variables for totemudpu */ instance->totem_interface = &totem_config->interfaces[interface_no]; memset (instance->iov_buffer, 0, FRAME_SIZE_MAX); instance->totemudpu_poll_handle = poll_handle; instance->totem_interface->bindnet.nodeid = instance->totem_config->node_id; instance->context = context; instance->totemudpu_deliver_fn = deliver_fn; instance->totemudpu_iface_change_fn = iface_change_fn; instance->totemudpu_target_set_completed = target_set_completed; totemip_localhost (AF_INET, &localhost); localhost.nodeid = instance->totem_config->node_id; /* * RRP layer isn't ready to receive message because it hasn't * initialized yet. Add short timer to check the interfaces. */ qb_loop_timer_add (instance->totemudpu_poll_handle, QB_LOOP_MED, 100*QB_TIME_NS_IN_MSEC, (void *)instance, timer_function_netif_check_timeout, &instance->timer_netif_check_timeout); *udpu_context = instance; return (0); } void *totemudpu_buffer_alloc (void) { return malloc (FRAME_SIZE_MAX); } void totemudpu_buffer_release (void *ptr) { return free (ptr); } int totemudpu_processor_count_set ( void *udpu_context, int processor_count) { struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; int res = 0; instance->my_memb_entries = processor_count; qb_loop_timer_del (instance->totemudpu_poll_handle, instance->timer_netif_check_timeout); if (processor_count == 1) { qb_loop_timer_add (instance->totemudpu_poll_handle, QB_LOOP_MED, instance->totem_config->downcheck_timeout*QB_TIME_NS_IN_MSEC, (void *)instance, timer_function_netif_check_timeout, &instance->timer_netif_check_timeout); } return (res); } int totemudpu_recv_flush (void *udpu_context) { int res = 0; return (res); } int totemudpu_send_flush (void *udpu_context) { int res = 0; return (res); } int totemudpu_token_send ( void *udpu_context, const void *msg, unsigned int msg_len) { struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; int res = 0; ucast_sendmsg (instance, &instance->token_target, msg, msg_len); return (res); } int totemudpu_mcast_flush_send ( void *udpu_context, const void *msg, unsigned int msg_len) { struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; int res = 0; mcast_sendmsg (instance, msg, msg_len); return (res); } int totemudpu_mcast_noflush_send ( void *udpu_context, const void *msg, unsigned int msg_len) { struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; int res = 0; mcast_sendmsg (instance, msg, msg_len); return (res); } extern int totemudpu_iface_check (void *udpu_context) { struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; int res = 0; timer_function_netif_check_timeout (instance); return (res); } extern void totemudpu_net_mtu_adjust (void *udpu_context, struct totem_config *totem_config) { #define UDPIP_HEADER_SIZE (20 + 8) /* 20 bytes for ip 8 bytes for udp */ if (totem_config->secauth == 1) { totem_config->net_mtu -= sizeof (struct security_header) + UDPIP_HEADER_SIZE; } else { totem_config->net_mtu -= UDPIP_HEADER_SIZE; } } const char *totemudpu_iface_print (void *udpu_context) { struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; const char *ret_char; ret_char = totemip_print (&instance->my_id); return (ret_char); } int totemudpu_iface_get ( void *udpu_context, struct totem_ip_address *addr) { struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; int res = 0; memcpy (addr, &instance->my_id, sizeof (struct totem_ip_address)); return (res); } int totemudpu_token_target_set ( void *udpu_context, const struct totem_ip_address *token_target) { struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; int res = 0; memcpy (&instance->token_target, token_target, sizeof (struct totem_ip_address)); instance->totemudpu_target_set_completed (instance->context); return (res); } extern int totemudpu_recv_mcast_empty ( void *udpu_context) { struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; unsigned int res; struct sockaddr_storage system_from; struct msghdr msg_recv; struct pollfd ufd; int nfds; int msg_processed = 0; /* * Receive datagram */ msg_recv.msg_name = &system_from; msg_recv.msg_namelen = sizeof (struct sockaddr_storage); msg_recv.msg_iov = &instance->totemudpu_iov_recv; msg_recv.msg_iovlen = 1; #if !defined(COROSYNC_SOLARIS) msg_recv.msg_control = 0; msg_recv.msg_controllen = 0; msg_recv.msg_flags = 0; #else msg_recv.msg_accrights = NULL; msg_recv.msg_accrightslen = 0; #endif do { ufd.fd = instance->token_socket; ufd.events = POLLIN; nfds = poll (&ufd, 1, 0); if (nfds == 1 && ufd.revents & POLLIN) { res = recvmsg (instance->token_socket, &msg_recv, MSG_NOSIGNAL | MSG_DONTWAIT); if (res != -1) { msg_processed = 1; } else { msg_processed = -1; } } } while (nfds == 1); return (msg_processed); } int totemudpu_member_add ( void *udpu_context, const struct totem_ip_address *member) { struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; struct totemudpu_member *new_member; int res; unsigned int sendbuf_size; unsigned int optlen = sizeof (sendbuf_size); new_member = malloc (sizeof (struct totemudpu_member)); if (new_member == NULL) { return (-1); } list_init (&new_member->list); list_add_tail (&new_member->list, &instance->member_list); memcpy (&new_member->member, member, sizeof (struct totem_ip_address)); new_member->fd = socket (member->family, SOCK_DGRAM, 0); if (new_member->fd == -1) { LOGSYS_PERROR (errno, instance->totemudpu_log_level_warning, "Could not create socket for new member"); return (-1); } totemip_nosigpipe (new_member->fd); res = fcntl (new_member->fd, F_SETFL, O_NONBLOCK); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudpu_log_level_warning, "Could not set non-blocking operation on token socket"); return (-1); } /* * These sockets are used to send multicast messages, so their buffers * should be large */ sendbuf_size = MCAST_SOCKET_BUFFER_SIZE; res = setsockopt (new_member->fd, SOL_SOCKET, SO_SNDBUF, &sendbuf_size, optlen); if (res == -1) { LOGSYS_PERROR (errno, instance->totemudpu_log_level_notice, "Could not set sendbuf size"); } return (0); } int totemudpu_member_remove ( void *udpu_context, const struct totem_ip_address *token_target) { struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; instance = NULL; return (0); } diff --git a/exec/util.c b/exec/util.c index 6c7b60bc..16ba3c84 100644 --- a/exec/util.c +++ b/exec/util.c @@ -1,182 +1,182 @@ /* * Copyright (c) 2002-2004 MontaVista Software, Inc. * Copyright (c) 2004 Open Source Development Lab * Copyright (c) 2006-2007, 2009 Red Hat, Inc. * * All rights reserved. * * Author: Steven Dake (sdake@redhat.com), Mark Haverkamp (markh@osdl.org) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include "util.h" LOGSYS_DECLARE_SUBSYS ("MAIN"); struct service_names { const char *c_name; int32_t c_val; }; static struct service_names servicenames[] = { { "EVS", EVS_SERVICE }, { "CLM", CLM_SERVICE }, { "AMF", AMF_SERVICE }, { "CKPT", CKPT_SERVICE }, { "EVT", EVT_SERVICE }, { "LCK", LCK_SERVICE }, { "MSG", MSG_SERVICE }, { "CFG", CFG_SERVICE }, { "CPG", CPG_SERVICE }, { "CMAN", CMAN_SERVICE }, { "PCMK", PCMK_SERVICE }, { "CONFDB", CONFDB_SERVICE }, { "QUORUM", QUORUM_SERVICE }, { "PLOAD", PLOAD_SERVICE }, { "TMR", TMR_SERVICE }, { "VOTEQUORUM", VOTEQUORUM_SERVICE }, { "NTF", NTF_SERVICE }, { "AMF", AMF_V2_SERVICE }, { "TST", TST_SV1_SERVICE }, { "TST", TST_SV2_SERVICE }, { "MON", MON_SERVICE }, { "WD", WD_SERVICE }, { NULL, -1 } }; const char * short_service_name_get(uint32_t service_id, char *buf, size_t buf_size) { uint32_t i; for (i = 0; servicenames[i].c_name != NULL; i++) { if (service_id == servicenames[i].c_val) { return (servicenames[i].c_name); } } snprintf(buf, buf_size, "%d", service_id); return buf; } /* * Compare two names. returns non-zero on match. */ int name_match(cs_name_t *name1, cs_name_t *name2) { if (name1->length == name2->length) { return ((strncmp ((char *)name1->value, (char *)name2->value, name1->length)) == 0); } return 0; } /* * Get the time of day and convert to nanoseconds */ cs_time_t clust_time_now(void) { struct timeval tv; cs_time_t time_now; if (gettimeofday(&tv, 0)) { return 0ULL; } time_now = (cs_time_t)(tv.tv_sec) * 1000000000ULL; time_now += (cs_time_t)(tv.tv_usec) * 1000ULL; return time_now; } void _corosync_out_of_memory_error (void) __attribute__((noreturn)); void _corosync_out_of_memory_error (void) { assert (0==1); exit (EXIT_FAILURE); } void _corosync_exit_error ( enum e_ais_done err, const char *file, unsigned int line) __attribute__((noreturn)); void _corosync_exit_error ( enum e_ais_done err, const char *file, unsigned int line) { log_printf (LOGSYS_LEVEL_ERROR, "Corosync Cluster Engine exiting " "with status %d at %s:%u.\n", err, file, line); - logsys_atexit (); + qb_log_fini(); exit (err); } #define min(a,b) ((a) < (b) ? (a) : (b)) char *getcs_name_t (cs_name_t *name) { static char ret_name[CS_MAX_NAME_LENGTH]; /* if string is corrupt (non-terminated), ensure it's displayed safely */ if (name->length >= CS_MAX_NAME_LENGTH || name->value[name->length] != '\0') { memset (ret_name, 0, sizeof (ret_name)); memcpy (ret_name, name->value, min(name->length, CS_MAX_NAME_LENGTH -1)); return (ret_name); } return ((char *)name->value); } void setcs_name_t (cs_name_t *name, char *str) { strncpy ((char *)name->value, str, sizeof (name->value)); ((char *)name->value)[sizeof (name->value) - 1] = '\0'; if (strlen ((char *)name->value) > CS_MAX_NAME_LENGTH) { name->length = CS_MAX_NAME_LENGTH; } else { name->length = strlen (str); } } int cs_name_tisEqual (cs_name_t *str1, char *str2) { if (str1->length == strlen (str2)) { return ((strncmp ((char *)str1->value, (char *)str2, str1->length)) == 0); } else { return 0; } } diff --git a/include/corosync/engine/logsys.h b/include/corosync/engine/logsys.h index df1db1d4..662b2a11 100644 --- a/include/corosync/engine/logsys.h +++ b/include/corosync/engine/logsys.h @@ -1,457 +1,222 @@ /* * Copyright (c) 2002-2004 MontaVista Software, Inc. * Copyright (c) 2006-2009 Red Hat, Inc. * * Author: Steven Dake (sdake@redhat.com) * Author: Lon Hohberger (lhh@redhat.com) * Author: Fabio M. Di Nitto (fdinitto@redhat.com) * * All rights reserved. * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef LOGSYS_H_DEFINED #define LOGSYS_H_DEFINED #include #include #include #include #include +#include + #ifdef __cplusplus extern "C" { #endif /* * All of the LOGSYS_MODE's can be ORed together for combined behavior * * FORK and THREADED are ignored for SUBSYSTEMS */ #define LOGSYS_MODE_OUTPUT_FILE (1<<0) #define LOGSYS_MODE_OUTPUT_STDERR (1<<1) #define LOGSYS_MODE_OUTPUT_SYSLOG (1<<2) #define LOGSYS_MODE_FORK (1<<3) #define LOGSYS_MODE_THREADED (1<<4) /* * Log priorities, compliant with syslog and SA Forum Log spec. */ #define LOGSYS_LEVEL_EMERG LOG_EMERG #define LOGSYS_LEVEL_ALERT LOG_ALERT #define LOGSYS_LEVEL_CRIT LOG_CRIT #define LOGSYS_LEVEL_ERROR LOG_ERR #define LOGSYS_LEVEL_WARNING LOG_WARNING #define LOGSYS_LEVEL_NOTICE LOG_NOTICE #define LOGSYS_LEVEL_INFO LOG_INFO #define LOGSYS_LEVEL_DEBUG LOG_DEBUG -/* - * All of the LOGSYS_RECID's are mutually exclusive. Only one RECID at any time - * can be specified. - * - * RECID_LOG indicates a message that should be sent to log. Anything else - * is stored only in the flight recorder. - */ - -#define LOGSYS_RECID_MAX ((UINT_MAX) >> LOGSYS_SUBSYSID_END) - -#define LOGSYS_RECID_LOG (LOGSYS_RECID_MAX - 1) -#define LOGSYS_RECID_ENTER (LOGSYS_RECID_MAX - 2) -#define LOGSYS_RECID_LEAVE (LOGSYS_RECID_MAX - 3) -#define LOGSYS_RECID_TRACE1 (LOGSYS_RECID_MAX - 4) -#define LOGSYS_RECID_TRACE2 (LOGSYS_RECID_MAX - 5) -#define LOGSYS_RECID_TRACE3 (LOGSYS_RECID_MAX - 6) -#define LOGSYS_RECID_TRACE4 (LOGSYS_RECID_MAX - 7) -#define LOGSYS_RECID_TRACE5 (LOGSYS_RECID_MAX - 8) -#define LOGSYS_RECID_TRACE6 (LOGSYS_RECID_MAX - 9) -#define LOGSYS_RECID_TRACE7 (LOGSYS_RECID_MAX - 10) -#define LOGSYS_RECID_TRACE8 (LOGSYS_RECID_MAX - 11) - - -/* - * Internal APIs that must be globally exported - * (External API below) - */ - /* * logsys_logger bits * * SUBSYS_COUNT defines the maximum number of subsystems * SUBSYS_NAMELEN defines the maximum len of a subsystem name */ #define LOGSYS_MAX_SUBSYS_COUNT 64 #define LOGSYS_MAX_SUBSYS_NAMELEN 64 - -/* - * rec_ident explained: - * - * rec_ident is an unsigned int and carries bitfields information - * on subsys_id, log priority (level) and type of message (RECID). - * - * level values are imported from syslog.h. - * At the time of writing it's a 3 bits value (0 to 7). - * - * subsys_id is any value between 0 and 64 (LOGSYS_MAX_SUBSYS_COUNT) - * - * RECID identifies the type of message. A set of predefined values - * are available via logsys, but other custom values can be defined - * by users. - * - * ---- - * bitfields: - * - * 0 - 2 level - * 3 - 9 subsysid - * 10 - n RECID - */ - -#define LOGSYS_LEVEL_END (3) -#define LOGSYS_SUBSYSID_END (LOGSYS_LEVEL_END + 7) - -#define LOGSYS_RECID_LEVEL_MASK (LOG_PRIMASK) -#define LOGSYS_RECID_SUBSYSID_MASK ((2 << (LOGSYS_SUBSYSID_END - 1)) - \ - (LOG_PRIMASK + 1)) -#define LOGSYS_RECID_RECID_MASK (UINT_MAX - \ - (LOGSYS_RECID_SUBSYSID_MASK + LOG_PRIMASK)) - -#define LOGSYS_ENCODE_RECID(level,subsysid,recid) \ - (((recid) << LOGSYS_SUBSYSID_END) | \ - ((subsysid) << LOGSYS_LEVEL_END) | \ - (level)) - -#define LOGSYS_DECODE_LEVEL(rec_ident) \ - ((rec_ident) & LOGSYS_RECID_LEVEL_MASK) - -#define LOGSYS_DECODE_SUBSYSID(rec_ident) \ - (((rec_ident) & LOGSYS_RECID_SUBSYSID_MASK) >> LOGSYS_LEVEL_END) - -#define LOGSYS_DECODE_RECID(rec_ident) \ - (((rec_ident) & LOGSYS_RECID_RECID_MASK) >> LOGSYS_SUBSYSID_END) - #define LOGSYS_MAX_PERROR_MSG_LEN 128 -#ifdef COROSYNC_LINUX -/* The GNU version of strerror_r returns a (char*) that *must* be used */ -#define LOGSYS_STRERROR_R(out_ptr, err_num, buffer, sizeof_buffer) \ - out_ptr = strerror_r(err_num, buffer, sizeof_buffer); -#else -/* The XSI-compliant strerror_r() return 0 or -1 (in case the buffer is full) */ -#define LOGSYS_STRERROR_R(out_ptr, err_num, buffer, sizeof_buffer) do { \ - if ( strerror_r(err_num, buffer, sizeof_buffer) == 0 ) { \ - out_ptr = buffer; \ - } else { \ - out_ptr = ""; \ - } \ - } while(0) -#endif - -#define LOGSYS_PERROR(err_num, level, fmt, args...) do { \ - char _error_str[LOGSYS_MAX_PERROR_MSG_LEN]; \ - const char *_error_ptr; \ - LOGSYS_STRERROR_R(_error_ptr, err_num, _error_str, sizeof(_error_str)); \ - log_printf(level, fmt ": %s (%d)\n", ##args, _error_ptr, err_num); \ - } while(0) - - - #ifndef LOGSYS_UTILS_ONLY -extern int _logsys_system_setup( - const char *mainsystem, - unsigned int mode, - unsigned int debug, - const char *logfile, - int logfile_priority, - int syslog_facility, - int syslog_priority); - -extern int _logsys_config_subsys_get ( - const char *subsys); - -extern int _logsys_subsys_create (const char *subsys); - -extern int _logsys_rec_init (unsigned int size); - -extern void _logsys_log_vprintf ( - unsigned int rec_ident, - const char *function_name, - const char *file_name, - int file_line, - const char *format, - va_list ap) __attribute__((format(printf, 5, 0))); - -extern void _logsys_log_printf ( - unsigned int rec_ident, - const char *function_name, - const char *file_name, - int file_line, - const char *format, - ...) __attribute__((format(printf, 5, 6))); - -extern void _logsys_log_rec ( - unsigned int rec_ident, - const char *function_name, - const char *file_name, - int file_line, - ...); - -extern int _logsys_wthread_create (void); - -static int logsys_subsys_id __attribute__((unused)) = LOGSYS_MAX_SUBSYS_COUNT; - -/* - * External API - init - * See below: - * - * LOGSYS_DECLARE_SYSTEM - * LOGSYS_DECLARE_SUBSYS - * - */ -extern void logsys_fork_completed (void); - -extern void logsys_atexit (void); - -/* - * External API - misc - */ -extern void logsys_flush (void); - -extern int logsys_log_rec_store (const char *filename); - -/* - * External API - configuration - */ - /* * configuration bits that can only be done for the whole system */ extern int logsys_format_set ( const char *format); extern char *logsys_format_get (void); /* * per system/subsystem settings. * * NOTE: once a subsystem is created and configured, changing * the default does NOT affect the subsystems. * * Pass a NULL subsystem to change them all */ extern int logsys_config_syslog_facility_set ( const char *subsys, unsigned int facility); extern int logsys_config_syslog_priority_set ( const char *subsys, unsigned int priority); extern int logsys_config_mode_set ( const char *subsys, unsigned int mode); extern unsigned int logsys_config_mode_get ( const char *subsys); +void logsys_config_apply(void); + /* * to close a logfile, just invoke this function with a NULL * file or if you want to change logfile, the old one will * be closed for you. */ extern int logsys_config_file_set ( const char *subsys, const char **error_string, const char *file); extern int logsys_config_logfile_priority_set ( const char *subsys, unsigned int priority); /* * enabling debug, disable message priority filtering. * everything is sent everywhere. priority values * for file and syslog are not overwritten. */ extern int logsys_config_debug_set ( const char *subsys, unsigned int value); /* * External API - helpers * * convert facility/priority to/from name/values */ extern int logsys_facility_id_get ( const char *name); extern const char *logsys_facility_name_get ( unsigned int facility); extern int logsys_priority_id_get ( const char *name); extern const char *logsys_priority_name_get ( unsigned int priority); -extern int logsys_thread_priority_set ( - int policy, - const struct sched_param *param, - unsigned int after_log_ops_yield); +extern int _logsys_system_setup( + const char *mainsystem, + unsigned int mode, + int syslog_facility, + int syslog_priority); -/* - * External definitions - */ -extern void *logsys_rec_end; +extern int _logsys_config_subsys_get ( + const char *subsys); -#define LOGSYS_REC_END (&logsys_rec_end) +extern int _logsys_subsys_create (const char *subsys, const char *filename); -#define LOGSYS_DECLARE_SYSTEM(name,mode,debug,file,file_priority, \ - syslog_facility,syslog_priority,format,fltsize) \ +static int logsys_subsys_id __attribute__((unused)) = LOGSYS_MAX_SUBSYS_COUNT; + +#define LOGSYS_DECLARE_SYSTEM(name,mode,syslog_facility,syslog_priority)\ __attribute__ ((constructor)) \ static void logsys_system_init (void) \ { \ - if (_logsys_system_setup (name,mode,debug,file,file_priority, \ - syslog_facility,syslog_priority) < 0) { \ + if (_logsys_system_setup (name,mode,syslog_facility,syslog_priority) < 0) { \ fprintf (stderr, \ "Unable to setup logging system: %s.\n", name); \ exit (-1); \ - } \ - \ - if (logsys_format_set (format) == -1) { \ - fprintf (stderr, \ - "Unable to setup logging format.\n"); \ - exit (-1); \ - } \ - \ - if (_logsys_rec_init (fltsize) < 0) { \ - fprintf (stderr, \ - "Unable to initialize log flight recorder.\n"); \ - exit (-1); \ - } \ - \ - if (_logsys_wthread_create() < 0) { \ - fprintf (stderr, \ - "Unable to initialize logging thread.\n"); \ - exit (-1); \ } \ } #define LOGSYS_DECLARE_SUBSYS(subsys) \ __attribute__ ((constructor)) \ static void logsys_subsys_init (void) \ { \ + assert(__start___verbose != __stop___verbose); \ logsys_subsys_id = \ - _logsys_subsys_create ((subsys)); \ + _logsys_subsys_create ((subsys), __FILE__); \ if (logsys_subsys_id == -1) { \ fprintf (stderr, \ "Unable to create logging subsystem: %s.\n", subsys); \ exit (-1); \ } \ } -#define log_rec(rec_ident, args...) \ -do { \ - _logsys_log_rec (rec_ident, __FUNCTION__, \ - __FILE__, __LINE__, ##args, \ - LOGSYS_REC_END); \ -} while(0) - -#define log_printf(level, format, args...) \ - do { \ - _logsys_log_printf ( \ - LOGSYS_ENCODE_RECID(level, \ - logsys_subsys_id, \ - LOGSYS_RECID_LOG), \ - __FUNCTION__, __FILE__, __LINE__, \ - format, ##args); \ -} while(0) - -#define ENTER() do { \ - _logsys_log_rec ( \ - LOGSYS_ENCODE_RECID(LOGSYS_LEVEL_DEBUG, \ - logsys_subsys_id, \ - LOGSYS_RECID_ENTER), \ - __FUNCTION__, __FILE__, __LINE__, LOGSYS_REC_END); \ -} while(0) - -#define LEAVE() do { \ - _logsys_log_rec ( \ - LOGSYS_ENCODE_RECID(LOGSYS_LEVEL_DEBUG, \ - logsys_subsys_id, \ - LOGSYS_RECID_LEAVE), \ - __FUNCTION__, __FILE__, __LINE__, LOGSYS_REC_END); \ -} while(0) - -#define TRACE(recid, format, args...) do { \ - _logsys_log_printf ( \ - LOGSYS_ENCODE_RECID(LOGSYS_LEVEL_DEBUG, \ - logsys_subsys_id, \ - recid), \ - __FUNCTION__, __FILE__, __LINE__, \ - format, ##args); \ -} while(0) - -#define TRACE1(format, args...) do { \ - TRACE(LOGSYS_RECID_TRACE1, format, ##args); \ -} while(0) - -#define TRACE2(format, args...) do { \ - TRACE(LOGSYS_RECID_TRACE2, format, ##args); \ -} while(0) - -#define TRACE3(format, args...) do { \ - TRACE(LOGSYS_RECID_TRACE3, format, ##args); \ -} while(0) - -#define TRACE4(format, args...) do { \ - TRACE(LOGSYS_RECID_TRACE4, format, ##args); \ -} while(0) - -#define TRACE5(format, args...) do { \ - TRACE(LOGSYS_RECID_TRACE5, format, ##args); \ -} while(0) - -#define TRACE6(format, args...) do { \ - TRACE(LOGSYS_RECID_TRACE6, format, ##args); \ -} while(0) - -#define TRACE7(format, args...) do { \ - TRACE(LOGSYS_RECID_TRACE7, format, ##args); \ -} while(0) - -#define TRACE8(format, args...) do { \ - TRACE(LOGSYS_RECID_TRACE8, format, ##args); \ -} while(0) +#define LOGSYS_PERROR(err_num, level, fmt, args...) do { \ + char _error_str[LOGSYS_MAX_PERROR_MSG_LEN]; \ + const char *_error_ptr = qb_strerror_r(err_num, _error_str, sizeof(_error_str)); \ + qb_log(level, fmt ": %s (%d)\n", ##args, _error_ptr, err_num); \ + } while(0) + +#define log_printf(level, format, args...) qb_log(level, format, ##args) +#define ENTER() qb_log(LOG_DEBUG, "ENTER") +#define LEAVE() qb_log(LOG_DEBUG, "LEAVE") +#define TRACE1(format, args...) qb_log(LOG_DEBUG, "TRACE1:" #format, ##args) +#define TRACE2 +#define TRACE3 +#define TRACE4 +#define TRACE5 +#define TRACE6 +#define TRACE7 +#define TRACE8 #endif /* LOGSYS_UTILS_ONLY */ #ifdef __cplusplus } #endif #endif /* LOGSYS_H_DEFINED */ diff --git a/include/corosync/lcr/lcr_ifact.h b/include/corosync/lcr/lcr_ifact.h index 0be3e370..446e1af6 100644 --- a/include/corosync/lcr/lcr_ifact.h +++ b/include/corosync/lcr/lcr_ifact.h @@ -1,55 +1,58 @@ /* * Copyright (C) 2006 Steven Dake (sdake@redhat.com) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef LCR_IFACT_H_DEFINED #define LCR_IFACT_H_DEFINED #include #ifdef __cplusplus extern "C" { #endif int lcr_ifact_reference ( hdb_handle_t *handle, const char *iface_name, int version, void **interface, void *context); +void *lcr_ifact_addr_get(hdb_handle_t iface_handle, + const char* symbol_name); + int lcr_ifact_release ( hdb_handle_t handle); #ifdef __cplusplus } #endif #endif /* LCR_IFACT_H_DEFINED */ diff --git a/include/corosync/totem/totem.h b/include/corosync/totem/totem.h index 239b0356..a025eab1 100644 --- a/include/corosync/totem/totem.h +++ b/include/corosync/totem/totem.h @@ -1,285 +1,278 @@ /* * Copyright (c) 2005 MontaVista Software, Inc. * Copyright (c) 2006-2009 Red Hat, Inc. * * Author: Steven Dake (sdake@redhat.com) * * All rights reserved. * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef TOTEM_H_DEFINED #define TOTEM_H_DEFINED #include "totemip.h" #include #ifdef HAVE_SMALL_MEMORY_FOOTPRINT #define PROCESSOR_COUNT_MAX 16 #define MESSAGE_SIZE_MAX 1024*64 #define MESSAGE_QUEUE_MAX 512 #else #define PROCESSOR_COUNT_MAX 384 #define MESSAGE_SIZE_MAX 1024*1024 /* (1MB) */ #define MESSAGE_QUEUE_MAX MESSAGE_SIZE_MAX / totem_config->net_mtu #endif /* HAVE_SMALL_MEMORY_FOOTPRINT */ #define FRAME_SIZE_MAX 10000 #define TRANSMITS_ALLOWED 16 #define SEND_THREADS_MAX 16 #define INTERFACE_MAX 2 /** * Maximum number of continuous gather states */ #define MAX_NO_CONT_GATHER 3 struct totem_interface { struct totem_ip_address bindnet; struct totem_ip_address boundto; struct totem_ip_address mcast_addr; uint16_t ip_port; uint16_t ttl; int member_count; struct totem_ip_address member_list[PROCESSOR_COUNT_MAX]; }; struct totem_logging_configuration { void (*log_printf) ( - unsigned int rec_ident, + int level, + int subsys, const char *function_name, const char *file_name, int file_line, const char *format, - ...) __attribute__((format(printf, 5, 6))); + ...) __attribute__((format(printf, 6, 7))); int log_level_security; int log_level_error; int log_level_warning; int log_level_notice; int log_level_debug; int log_subsys_id; }; enum { TOTEM_PRIVATE_KEY_LEN = 128 }; enum { TOTEM_RRP_MODE_BYTES = 64 }; typedef enum { TOTEM_TRANSPORT_UDP = 0, TOTEM_TRANSPORT_UDPU = 1, TOTEM_TRANSPORT_RDMA = 2 } totem_transport_t; struct totem_config { int version; /* * network */ struct totem_interface *interfaces; unsigned int interface_count; unsigned int node_id; unsigned int clear_node_high_bit; /* * key information */ unsigned char private_key[TOTEM_PRIVATE_KEY_LEN]; unsigned int private_key_len; /* * Totem configuration parameters */ unsigned int token_timeout; unsigned int token_retransmit_timeout; unsigned int token_hold_timeout; unsigned int token_retransmits_before_loss_const; unsigned int join_timeout; unsigned int send_join_timeout; unsigned int consensus_timeout; unsigned int merge_timeout; unsigned int downcheck_timeout; unsigned int fail_to_recv_const; unsigned int seqno_unchanged_const; unsigned int rrp_token_expired_timeout; unsigned int rrp_problem_count_timeout; unsigned int rrp_problem_count_threshold; unsigned int rrp_autorecovery_check_timeout; char rrp_mode[TOTEM_RRP_MODE_BYTES]; struct totem_logging_configuration totem_logging_configuration; - void (*log_rec) ( - int subsysid, - const char *function_name, - const char *file_name, - int file_line, - unsigned int rec_ident, - ...); - unsigned int secauth; unsigned int net_mtu; unsigned int threads; unsigned int heartbeat_failures_allowed; unsigned int max_network_delay; unsigned int window_size; unsigned int max_messages; const char *vsf_type; unsigned int broadcast_use; enum { TOTEM_CRYPTO_SOBER=0, TOTEM_CRYPTO_NSS } crypto_type; enum { TOTEM_CRYPTO_ACCEPT_OLD=0, TOTEM_CRYPTO_ACCEPT_NEW } crypto_accept; int crypto_crypt_type; int crypto_sign_type; totem_transport_t transport_number; unsigned int miss_count_const; }; #define TOTEM_CONFIGURATION_TYPE enum totem_configuration_type { TOTEM_CONFIGURATION_REGULAR, TOTEM_CONFIGURATION_TRANSITIONAL }; #define TOTEM_CALLBACK_TOKEN_TYPE enum totem_callback_token_type { TOTEM_CALLBACK_TOKEN_RECEIVED = 1, TOTEM_CALLBACK_TOKEN_SENT = 2 }; enum totem_event_type { TOTEM_EVENT_DELIVERY_CONGESTED, TOTEM_EVENT_NEW_MSG, }; #define MEMB_RING_ID struct memb_ring_id { struct totem_ip_address rep; unsigned long long seq; } __attribute__((packed)); typedef struct { hdb_handle_t handle; int is_dirty; time_t last_updated; } totem_stats_header_t; typedef struct { totem_stats_header_t hdr; uint32_t iface_changes; } totemnet_stats_t; typedef struct { totem_stats_header_t hdr; totemnet_stats_t *net; char *algo_name; } totemrrp_stats_t; typedef struct { uint32_t rx; uint32_t tx; int backlog_calc; } totemsrp_token_stats_t; typedef struct { totem_stats_header_t hdr; totemrrp_stats_t *rrp; uint64_t orf_token_tx; uint64_t orf_token_rx; uint64_t memb_merge_detect_tx; uint64_t memb_merge_detect_rx; uint64_t memb_join_tx; uint64_t memb_join_rx; uint64_t mcast_tx; uint64_t mcast_retx; uint64_t mcast_rx; uint64_t memb_commit_token_tx; uint64_t memb_commit_token_rx; uint64_t token_hold_cancel_tx; uint64_t token_hold_cancel_rx; uint64_t operational_entered; uint64_t operational_token_lost; uint64_t gather_entered; uint64_t gather_token_lost; uint64_t commit_entered; uint64_t commit_token_lost; uint64_t recovery_entered; uint64_t recovery_token_lost; uint64_t consensus_timeouts; uint64_t rx_msg_dropped; uint32_t continuous_gather; int earliest_token; int latest_token; #define TOTEM_TOKEN_STATS_MAX 100 totemsrp_token_stats_t token[TOTEM_TOKEN_STATS_MAX]; } totemsrp_stats_t; #define TOTEM_CONFIGURATION_TYPE typedef struct { totem_stats_header_t hdr; totemsrp_stats_t *srp; } totemmrp_stats_t; typedef struct { totem_stats_header_t hdr; totemmrp_stats_t *mrp; uint32_t msg_reserved; uint32_t msg_queue_avail; } totempg_stats_t; #endif /* TOTEM_H_DEFINED */ diff --git a/lcr/lcr_ifact.c b/lcr/lcr_ifact.c index bf2b7214..fc14f8c3 100644 --- a/lcr/lcr_ifact.c +++ b/lcr/lcr_ifact.c @@ -1,566 +1,590 @@ /* * Copyright (C) 2006 Steven Dake (sdake@redhat.com) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #ifdef COROSYNC_SOLARIS #include #endif #include #include #include struct lcr_component_instance { struct lcr_iface *ifaces; int iface_count; hdb_handle_t comp_handle; void *dl_handle; int refcount; char library_name[256]; }; struct lcr_iface_instance { hdb_handle_t component_handle; void *context; void (*destructor) (void *context); }; DECLARE_HDB_DATABASE (lcr_component_instance_database, NULL); DECLARE_HDB_DATABASE (lcr_iface_instance_database, NULL); /* static struct hdb_handle_database lcr_component_instance_database = { .handle_count = 0, .handles = 0, .iterator = 0 }; static struct hdb_handle_database lcr_iface_instance_database = { .handle_count = 0, .handles = 0, .iterator = 0 }; */ static hdb_handle_t g_component_handle = 0xFFFFFFFF; #if defined(COROSYNC_LINUX) || defined(COROSYNC_SOLARIS) static int lcr_select_so (const struct dirent *dirent) #else static int lcr_select_so (struct dirent *dirent) #endif { unsigned int len; len = strlen (dirent->d_name); if (len > 6) { if (strcmp (".lcrso", dirent->d_name + len - 6) == 0) { return (1); } } return (0); } #if defined(COROSYNC_LINUX) || defined(COROSYNC_SOLARIS) static int pathlist_select (const struct dirent *dirent) #else static int pathlist_select (struct dirent *dirent) #endif { if (fnmatch ("*.conf", dirent->d_name, 0) == 0) { return (1); } return (0); } static inline struct lcr_component_instance *lcr_comp_find ( const char *iface_name, unsigned int version, unsigned int *iface_number) { struct lcr_component_instance *instance; void *instance_p = NULL; hdb_handle_t component_handle = 0; int i; /* * Try to find interface in already loaded component */ hdb_iterator_reset (&lcr_component_instance_database); while (hdb_iterator_next (&lcr_component_instance_database, &instance_p, &component_handle) == 0) { instance = (struct lcr_component_instance *)instance_p; for (i = 0; i < instance->iface_count; i++) { if ((strcmp (instance->ifaces[i].name, iface_name) == 0) && instance->ifaces[i].version == version) { *iface_number = i; return (instance); } } hdb_handle_put (&lcr_component_instance_database, component_handle); } return (NULL); } static inline int lcr_lib_loaded ( char *library_name) { struct lcr_component_instance *instance; void *instance_p = NULL; hdb_handle_t component_handle = 0; /* * Try to find interface in already loaded component */ hdb_iterator_reset (&lcr_component_instance_database); while (hdb_iterator_next (&lcr_component_instance_database, (void *)&instance_p, &component_handle) == 0) { instance = (struct lcr_component_instance *)instance_p; if (strcmp (instance->library_name, library_name) == 0) { return (1); } hdb_handle_put (&lcr_component_instance_database, component_handle); } return (0); } enum { PATH_LIST_SIZE = 128 }; const char *path_list[PATH_LIST_SIZE]; unsigned int path_list_entries = 0; static void defaults_path_build (void) { char cwd[1024]; char *res; res = getcwd (cwd, sizeof (cwd)); if (res != NULL && (path_list[0] = strdup (cwd)) != NULL) { path_list_entries++; } path_list[path_list_entries++] = LCRSODIR; } static void ld_library_path_build (void) { char *ld_library_path; char *my_ld_library_path; char *p_s, *ptrptr; ld_library_path = getenv ("LD_LIBRARY_PATH"); if (ld_library_path == NULL) { return; } my_ld_library_path = strdup (ld_library_path); if (my_ld_library_path == NULL) { return; } p_s = strtok_r (my_ld_library_path, ":", &ptrptr); while (p_s != NULL) { char *p = strdup (p_s); if (p && path_list_entries < PATH_LIST_SIZE) { path_list[path_list_entries++] = p; } p_s = strtok_r (NULL, ":", &ptrptr); } free (my_ld_library_path); } static int ldso_path_build (const char *path, const char *filename) { FILE *fp; char string[1024]; char filename_cat[1024]; char newpath[1024]; char *newpath_tmp; char *new_filename; int j; struct dirent **scandir_list; unsigned int scandir_entries; snprintf (filename_cat, sizeof(filename_cat), "%s/%s", path, filename); if (filename[0] == '*') { scandir_entries = scandir ( path, &scandir_list, pathlist_select, alphasort); if (scandir_entries == 0) { return 0; } else if (scandir_entries == -1) { return -1; } else { for (j = 0; j < scandir_entries; j++) { ldso_path_build (path, scandir_list[j]->d_name); } } } fp = fopen (filename_cat, "r"); if (fp == NULL) { return (-1); } while (fgets (string, sizeof (string), fp)) { char *p; if (strlen(string) > 0) string[strlen(string) - 1] = '\0'; if (strncmp (string, "include", strlen ("include")) == 0) { newpath_tmp = string + strlen ("include") + 1; for (j = strlen (string); string[j] != ' ' && string[j] != '/' && j > 0; j--) { } string[j] = '\0'; new_filename = &string[j] + 1; strcpy (newpath, path); strcat (newpath, "/"); strcat (newpath, newpath_tmp); ldso_path_build (newpath, new_filename); continue; } p = strdup (string); if (p && path_list_entries < PATH_LIST_SIZE) { path_list[path_list_entries++] = p; } } fclose(fp); return (0); } #if defined (COROSYNC_SOLARIS) && !defined(HAVE_SCANDIR) static int scandir ( const char *dir, struct dirent ***namelist, int (*filter)(const struct dirent *), int (*compar)(const struct dirent **, const struct dirent **)) { DIR *d; struct dirent *entry; struct dirent *result; struct dirent **names = NULL; int namelist_items = 0, namelist_size = 0; size_t len; int return_code; d = opendir(dir); if (d == NULL) return -1; names = NULL; len = offsetof(struct dirent, d_name) + pathconf(dir, _PC_NAME_MAX) + 1; entry = malloc(len); for (return_code = readdir_r (d, entry, &result); dirent != NULL && return_code == 0; return_code = readdir_r(d, entry, &result)) { struct dirent *tmpentry; if ((filter != NULL) && ((*filter)(result) == 0)) { continue; } if (namelist_items >= namelist_size) { struct dirent **tmp; namelist_size += 512; if ((unsigned long)namelist_size > INT_MAX) { errno = EOVERFLOW; goto fail; } tmp = realloc (names, namelist_size * sizeof(struct dirent *)); if (tmp == NULL) { goto fail; } names = tmp; } tmpentry = malloc (result->d_reclen); if (tmpentry == NULL) { goto fail; } (void) memcpy (tmpentry, result, result->d_reclen); names[namelist_items++] = tmpentry; } (void) closedir (d); if ((namelist_items > 1) && (compar != NULL)) { qsort (names, namelist_items, sizeof (struct dirent *), (int (*)(const void *, const void *))compar); } *namelist = names; return namelist_items; fail: { int err = errno; (void) closedir (d); while (namelist_items != 0) { namelist_items--; free (*namelist[namelist_items]); } free (entry); free (names); *namelist = NULL; errno = err; return -1; } } #endif #if defined (COROSYNC_SOLARIS) && !defined(HAVE_ALPHASORT) static int alphasort (const struct dirent **a, const struct dirent **b) { return strcmp ((*a)->d_name, (*b)->d_name); } #endif static int interface_find_and_load ( const char *path, const char *iface_name, int version, struct lcr_component_instance **instance_ret, unsigned int *iface_number) { struct lcr_component_instance *instance; void *dl_handle; struct dirent **scandir_list; int scandir_entries; unsigned int libs_to_scan; char dl_name[1024]; #ifdef COROSYNC_SOLARIS void (*comp_reg)(void); #endif scandir_entries = scandir (path, &scandir_list, lcr_select_so, alphasort); if (scandir_entries > 0) /* * no error so load the object */ for (libs_to_scan = 0; libs_to_scan < scandir_entries; libs_to_scan++) { /* * Load objects, scan them, unload them if they are not a match */ snprintf (dl_name, sizeof(dl_name), "%s/%s", path, scandir_list[libs_to_scan]->d_name); /* * Don't reload already loaded libraries */ if (lcr_lib_loaded (dl_name)) { continue; } dl_handle = dlopen (dl_name, RTLD_NOW); if (dl_handle == NULL) { fprintf(stderr, "%s: open failed: %s\n", dl_name, dlerror()); continue; } /* * constructors don't work in Solaris dlopen, so we have to specifically call * a function to register the component */ #ifdef COROSYNC_SOLARIS comp_reg = dlsym (dl_handle, "corosync_lcr_component_register"); comp_reg (); #endif instance = lcr_comp_find (iface_name, version, iface_number); if (instance) { instance->dl_handle = dl_handle; strcpy (instance->library_name, dl_name); goto found; } /* * No matching interfaces found, try next shared object */ if (g_component_handle != 0xFFFFFFFF) { hdb_handle_destroy (&lcr_component_instance_database, g_component_handle); g_component_handle = 0xFFFFFFFF; } dlclose (dl_handle); } /* scanning for lcrso loop */ if (scandir_entries > 0) { int i; for (i = 0; i < scandir_entries; i++) { free (scandir_list[i]); } free (scandir_list); } g_component_handle = 0xFFFFFFFF; return -1; found: *instance_ret = instance; if (scandir_entries > 0) { int i; for (i = 0; i < scandir_entries; i++) { free (scandir_list[i]); } free (scandir_list); } g_component_handle = 0xFFFFFFFF; return 0; } static unsigned int lcr_initialized = 0; +void *lcr_ifact_addr_get(hdb_handle_t iface_handle, + const char* symbol_name) +{ + struct lcr_iface_instance *iface_instance; + struct lcr_component_instance *instance; + void *ptr; + + hdb_handle_get (&lcr_iface_instance_database, + iface_handle, (void *)&iface_instance); + + hdb_handle_get (&lcr_component_instance_database, + iface_instance->component_handle, (void *)&instance); + + ptr = dlsym(instance->dl_handle, symbol_name); + + hdb_handle_put(&lcr_component_instance_database, + iface_instance->component_handle); + + hdb_handle_put (&lcr_iface_instance_database, + iface_handle); + return ptr; +} + + int lcr_ifact_reference ( hdb_handle_t *iface_handle, const char *iface_name, int version, void **iface, void *context) { struct lcr_iface_instance *iface_instance; struct lcr_component_instance *instance; unsigned int iface_number; unsigned int res; unsigned int i; /* * Determine if the component is already loaded */ instance = lcr_comp_find (iface_name, version, &iface_number); if (instance) { goto found; } if (lcr_initialized == 0) { lcr_initialized = 1; defaults_path_build (); ld_library_path_build (); ldso_path_build ("/etc", "ld.so.conf"); } // TODO error checking in this code is weak /* * Search through all lcrso files for desired interface */ for (i = 0; i < path_list_entries; i++) { res = interface_find_and_load ( path_list[i], iface_name, version, &instance, &iface_number); if (res == 0) { goto found; } } /* * No matching interfaces found in all shared objects */ return (-1); found: *iface = instance->ifaces[iface_number].interfaces; if (instance->ifaces[iface_number].constructor) { instance->ifaces[iface_number].constructor (context); } hdb_handle_create (&lcr_iface_instance_database, sizeof (struct lcr_iface_instance), iface_handle); hdb_handle_get (&lcr_iface_instance_database, *iface_handle, (void *)&iface_instance); iface_instance->component_handle = instance->comp_handle; iface_instance->context = context; iface_instance->destructor = instance->ifaces[iface_number].destructor; hdb_handle_put (&lcr_iface_instance_database, *iface_handle); return (0); } int lcr_ifact_release (hdb_handle_t handle) { struct lcr_iface_instance *iface_instance; int res = 0; res = hdb_handle_get (&lcr_iface_instance_database, handle, (void *)&iface_instance); if (iface_instance->destructor) { iface_instance->destructor (iface_instance->context); } hdb_handle_put (&lcr_component_instance_database, iface_instance->component_handle); hdb_handle_put (&lcr_iface_instance_database, handle); hdb_handle_destroy (&lcr_iface_instance_database, handle); return (res); } void lcr_component_register (struct lcr_comp *comp) { struct lcr_component_instance *instance; static hdb_handle_t comp_handle; hdb_handle_create (&lcr_component_instance_database, sizeof (struct lcr_component_instance), &comp_handle); hdb_handle_get (&lcr_component_instance_database, comp_handle, (void *)&instance); instance->ifaces = comp->ifaces; instance->iface_count = comp->iface_count; instance->comp_handle = comp_handle; instance->dl_handle = NULL; hdb_handle_put (&lcr_component_instance_database, comp_handle); g_component_handle = comp_handle; } diff --git a/services/cpg.c b/services/cpg.c index 633d4b37..1b8d5b45 100644 --- a/services/cpg.c +++ b/services/cpg.c @@ -1,2052 +1,2052 @@ /* * Copyright (c) 2006-2009 Red Hat, Inc. * * All rights reserved. * * Author: Christine Caulfield (ccaulfie@redhat.com) * Author: Jan Friesse (jfriesse@redhat.com) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #ifdef HAVE_ALLOCA_H #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include LOGSYS_DECLARE_SUBSYS ("CPG"); #define GROUP_HASH_SIZE 32 enum cpg_message_req_types { MESSAGE_REQ_EXEC_CPG_PROCJOIN = 0, MESSAGE_REQ_EXEC_CPG_PROCLEAVE = 1, MESSAGE_REQ_EXEC_CPG_JOINLIST = 2, MESSAGE_REQ_EXEC_CPG_MCAST = 3, MESSAGE_REQ_EXEC_CPG_DOWNLIST_OLD = 4, MESSAGE_REQ_EXEC_CPG_DOWNLIST = 5 }; struct zcb_mapped { struct list_head list; void *addr; size_t size; }; /* * state` exec deliver * match group name, pid -> if matched deliver for YES: * XXX indicates impossible state * * join leave mcast * UNJOINED XXX XXX NO * LEAVE_STARTED XXX YES(unjoined_enter) YES * JOIN_STARTED YES(join_started_enter) XXX NO * JOIN_COMPLETED XXX NO YES * * join_started_enter * set JOIN_COMPLETED * add entry to process_info list * unjoined_enter * set UNJOINED * delete entry from process_info list * * * library accept join error codes * UNJOINED YES(CS_OK) set JOIN_STARTED * LEAVE_STARTED NO(CS_ERR_BUSY) * JOIN_STARTED NO(CS_ERR_EXIST) * JOIN_COMPlETED NO(CS_ERR_EXIST) * * library accept leave error codes * UNJOINED NO(CS_ERR_NOT_EXIST) * LEAVE_STARTED NO(CS_ERR_NOT_EXIST) * JOIN_STARTED NO(CS_ERR_BUSY) * JOIN_COMPLETED YES(CS_OK) set LEAVE_STARTED * * library accept mcast * UNJOINED NO(CS_ERR_NOT_EXIST) * LEAVE_STARTED NO(CS_ERR_NOT_EXIST) * JOIN_STARTED YES(CS_OK) * JOIN_COMPLETED YES(CS_OK) */ enum cpd_state { CPD_STATE_UNJOINED, CPD_STATE_LEAVE_STARTED, CPD_STATE_JOIN_STARTED, CPD_STATE_JOIN_COMPLETED }; enum cpg_sync_state { CPGSYNC_DOWNLIST, CPGSYNC_JOINLIST }; enum cpg_downlist_state_e { CPG_DOWNLIST_NONE, CPG_DOWNLIST_WAITING_FOR_MESSAGES, CPG_DOWNLIST_APPLYING, }; static enum cpg_downlist_state_e downlist_state; static struct list_head downlist_messages_head; struct cpg_pd { void *conn; mar_cpg_name_t group_name; uint32_t pid; enum cpd_state cpd_state; unsigned int flags; int initial_totem_conf_sent; struct list_head list; struct list_head iteration_instance_list_head; struct list_head zcb_mapped_list_head; }; struct cpg_iteration_instance { hdb_handle_t handle; struct list_head list; struct list_head items_list_head; /* List of process_info */ struct list_head *current_pointer; }; DECLARE_HDB_DATABASE(cpg_iteration_handle_t_db,NULL); DECLARE_LIST_INIT(cpg_pd_list_head); static unsigned int my_member_list[PROCESSOR_COUNT_MAX]; static unsigned int my_member_list_entries; static unsigned int my_old_member_list[PROCESSOR_COUNT_MAX]; static unsigned int my_old_member_list_entries = 0; static struct corosync_api_v1 *api = NULL; static enum cpg_sync_state my_sync_state = CPGSYNC_DOWNLIST; static mar_cpg_ring_id_t last_sync_ring_id; struct process_info { unsigned int nodeid; uint32_t pid; mar_cpg_name_t group; struct list_head list; /* on the group_info members list */ }; DECLARE_LIST_INIT(process_info_list_head); struct join_list_entry { uint32_t pid; mar_cpg_name_t group_name; }; /* * Service Interfaces required by service_message_handler struct */ static int cpg_exec_init_fn (struct corosync_api_v1 *); static int cpg_lib_init_fn (void *conn); static int cpg_lib_exit_fn (void *conn); static void message_handler_req_exec_cpg_procjoin ( const void *message, unsigned int nodeid); static void message_handler_req_exec_cpg_procleave ( const void *message, unsigned int nodeid); static void message_handler_req_exec_cpg_joinlist ( const void *message, unsigned int nodeid); static void message_handler_req_exec_cpg_mcast ( const void *message, unsigned int nodeid); static void message_handler_req_exec_cpg_downlist_old ( const void *message, unsigned int nodeid); static void message_handler_req_exec_cpg_downlist ( const void *message, unsigned int nodeid); static void exec_cpg_procjoin_endian_convert (void *msg); static void exec_cpg_joinlist_endian_convert (void *msg); static void exec_cpg_mcast_endian_convert (void *msg); static void exec_cpg_downlist_endian_convert_old (void *msg); static void exec_cpg_downlist_endian_convert (void *msg); static void message_handler_req_lib_cpg_join (void *conn, const void *message); static void message_handler_req_lib_cpg_leave (void *conn, const void *message); static void message_handler_req_lib_cpg_finalize (void *conn, const void *message); static void message_handler_req_lib_cpg_mcast (void *conn, const void *message); static void message_handler_req_lib_cpg_membership (void *conn, const void *message); static void message_handler_req_lib_cpg_local_get (void *conn, const void *message); static void message_handler_req_lib_cpg_iteration_initialize ( void *conn, const void *message); static void message_handler_req_lib_cpg_iteration_next ( void *conn, const void *message); static void message_handler_req_lib_cpg_iteration_finalize ( void *conn, const void *message); static void message_handler_req_lib_cpg_zc_alloc ( void *conn, const void *message); static void message_handler_req_lib_cpg_zc_free ( void *conn, const void *message); static void message_handler_req_lib_cpg_zc_execute ( void *conn, const void *message); static int cpg_node_joinleave_send (unsigned int pid, const mar_cpg_name_t *group_name, int fn, int reason); static int cpg_exec_send_downlist(void); static int cpg_exec_send_joinlist(void); static void downlist_messages_delete (void); static void downlist_master_choose_and_send (void); static void cpg_sync_init_v2 ( const unsigned int *trans_list, size_t trans_list_entries, const unsigned int *member_list, size_t member_list_entries, const struct memb_ring_id *ring_id); static int cpg_sync_process (void); static void cpg_sync_activate (void); static void cpg_sync_abort (void); static int notify_lib_totem_membership ( void *conn, int member_list_entries, const unsigned int *member_list); static inline int zcb_all_free ( struct cpg_pd *cpd); /* * Library Handler Definition */ static struct corosync_lib_handler cpg_lib_engine[] = { { /* 0 */ .lib_handler_fn = message_handler_req_lib_cpg_join, .flow_control = CS_LIB_FLOW_CONTROL_REQUIRED }, { /* 1 */ .lib_handler_fn = message_handler_req_lib_cpg_leave, .flow_control = CS_LIB_FLOW_CONTROL_REQUIRED }, { /* 2 */ .lib_handler_fn = message_handler_req_lib_cpg_mcast, .flow_control = CS_LIB_FLOW_CONTROL_REQUIRED }, { /* 3 */ .lib_handler_fn = message_handler_req_lib_cpg_membership, .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED }, { /* 4 */ .lib_handler_fn = message_handler_req_lib_cpg_local_get, .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED }, { /* 5 */ .lib_handler_fn = message_handler_req_lib_cpg_iteration_initialize, .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED }, { /* 6 */ .lib_handler_fn = message_handler_req_lib_cpg_iteration_next, .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED }, { /* 7 */ .lib_handler_fn = message_handler_req_lib_cpg_iteration_finalize, .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED }, { /* 8 */ .lib_handler_fn = message_handler_req_lib_cpg_finalize, .flow_control = CS_LIB_FLOW_CONTROL_REQUIRED }, { /* 9 */ .lib_handler_fn = message_handler_req_lib_cpg_zc_alloc, .flow_control = CS_LIB_FLOW_CONTROL_REQUIRED }, { /* 10 */ .lib_handler_fn = message_handler_req_lib_cpg_zc_free, .flow_control = CS_LIB_FLOW_CONTROL_REQUIRED }, { /* 11 */ .lib_handler_fn = message_handler_req_lib_cpg_zc_execute, .flow_control = CS_LIB_FLOW_CONTROL_REQUIRED }, }; static struct corosync_exec_handler cpg_exec_engine[] = { { /* 0 */ .exec_handler_fn = message_handler_req_exec_cpg_procjoin, .exec_endian_convert_fn = exec_cpg_procjoin_endian_convert }, { /* 1 */ .exec_handler_fn = message_handler_req_exec_cpg_procleave, .exec_endian_convert_fn = exec_cpg_procjoin_endian_convert }, { /* 2 */ .exec_handler_fn = message_handler_req_exec_cpg_joinlist, .exec_endian_convert_fn = exec_cpg_joinlist_endian_convert }, { /* 3 */ .exec_handler_fn = message_handler_req_exec_cpg_mcast, .exec_endian_convert_fn = exec_cpg_mcast_endian_convert }, { /* 4 */ .exec_handler_fn = message_handler_req_exec_cpg_downlist_old, .exec_endian_convert_fn = exec_cpg_downlist_endian_convert_old }, { /* 5 */ .exec_handler_fn = message_handler_req_exec_cpg_downlist, .exec_endian_convert_fn = exec_cpg_downlist_endian_convert }, }; struct corosync_service_engine cpg_service_engine = { .name = "corosync cluster closed process group service v1.01", .id = CPG_SERVICE, .priority = 1, .private_data_size = sizeof (struct cpg_pd), .flow_control = CS_LIB_FLOW_CONTROL_REQUIRED, .allow_inquorate = CS_LIB_ALLOW_INQUORATE, .lib_init_fn = cpg_lib_init_fn, .lib_exit_fn = cpg_lib_exit_fn, .lib_engine = cpg_lib_engine, .lib_engine_count = sizeof (cpg_lib_engine) / sizeof (struct corosync_lib_handler), .exec_init_fn = cpg_exec_init_fn, .exec_dump_fn = NULL, .exec_engine = cpg_exec_engine, .exec_engine_count = sizeof (cpg_exec_engine) / sizeof (struct corosync_exec_handler), .sync_mode = CS_SYNC_V1_APIV2, .sync_init = (sync_init_v1_fn_t)cpg_sync_init_v2, .sync_process = cpg_sync_process, .sync_activate = cpg_sync_activate, .sync_abort = cpg_sync_abort }; /* * Dynamic loader definition */ static struct corosync_service_engine *cpg_get_service_engine_ver0 (void); static struct corosync_service_engine_iface_ver0 cpg_service_engine_iface = { .corosync_get_service_engine_ver0 = cpg_get_service_engine_ver0 }; static struct lcr_iface corosync_cpg_ver0[1] = { { .name = "corosync_cpg", .version = 0, .versions_replace = 0, .versions_replace_count = 0, .dependencies = 0, .dependency_count = 0, .constructor = NULL, .destructor = NULL, .interfaces = NULL } }; static struct lcr_comp cpg_comp_ver0 = { .iface_count = 1, .ifaces = corosync_cpg_ver0 }; static struct corosync_service_engine *cpg_get_service_engine_ver0 (void) { return (&cpg_service_engine); } #ifdef COROSYNC_SOLARIS void corosync_lcr_component_register (void); void corosync_lcr_component_register (void) { #else __attribute__ ((constructor)) static void corosync_lcr_component_register (void) { #endif lcr_interfaces_set (&corosync_cpg_ver0[0], &cpg_service_engine_iface); lcr_component_register (&cpg_comp_ver0); } struct req_exec_cpg_procjoin { struct qb_ipc_request_header header __attribute__((aligned(8))); mar_cpg_name_t group_name __attribute__((aligned(8))); mar_uint32_t pid __attribute__((aligned(8))); mar_uint32_t reason __attribute__((aligned(8))); }; struct req_exec_cpg_mcast { struct qb_ipc_request_header header __attribute__((aligned(8))); mar_cpg_name_t group_name __attribute__((aligned(8))); mar_uint32_t msglen __attribute__((aligned(8))); mar_uint32_t pid __attribute__((aligned(8))); mar_message_source_t source __attribute__((aligned(8))); mar_uint8_t message[] __attribute__((aligned(8))); }; struct req_exec_cpg_downlist_old { struct qb_ipc_request_header header __attribute__((aligned(8))); mar_uint32_t left_nodes __attribute__((aligned(8))); mar_uint32_t nodeids[PROCESSOR_COUNT_MAX] __attribute__((aligned(8))); }; struct req_exec_cpg_downlist { struct qb_ipc_request_header header __attribute__((aligned(8))); /* merge decisions */ mar_uint32_t old_members __attribute__((aligned(8))); /* downlist below */ mar_uint32_t left_nodes __attribute__((aligned(8))); mar_uint32_t nodeids[PROCESSOR_COUNT_MAX] __attribute__((aligned(8))); }; struct downlist_msg { mar_uint32_t sender_nodeid; mar_uint32_t old_members __attribute__((aligned(8))); mar_uint32_t left_nodes __attribute__((aligned(8))); mar_uint32_t nodeids[PROCESSOR_COUNT_MAX] __attribute__((aligned(8))); struct list_head list; }; static struct req_exec_cpg_downlist g_req_exec_cpg_downlist; static void cpg_sync_init_v2 ( const unsigned int *trans_list, size_t trans_list_entries, const unsigned int *member_list, size_t member_list_entries, const struct memb_ring_id *ring_id) { int entries; int i, j; int found; my_sync_state = CPGSYNC_DOWNLIST; memcpy (my_member_list, member_list, member_list_entries * sizeof (unsigned int)); my_member_list_entries = member_list_entries; last_sync_ring_id.nodeid = ring_id->rep.nodeid; last_sync_ring_id.seq = ring_id->seq; downlist_state = CPG_DOWNLIST_WAITING_FOR_MESSAGES; entries = 0; /* * Determine list of nodeids for downlist message */ for (i = 0; i < my_old_member_list_entries; i++) { found = 0; for (j = 0; j < trans_list_entries; j++) { if (my_old_member_list[i] == trans_list[j]) { found = 1; break; } } if (found == 0) { g_req_exec_cpg_downlist.nodeids[entries++] = my_old_member_list[i]; } } g_req_exec_cpg_downlist.left_nodes = entries; } static int cpg_sync_process (void) { int res = -1; if (my_sync_state == CPGSYNC_DOWNLIST) { res = cpg_exec_send_downlist(); if (res == -1) { return (-1); } my_sync_state = CPGSYNC_JOINLIST; } if (my_sync_state == CPGSYNC_JOINLIST) { res = cpg_exec_send_joinlist(); } return (res); } static void cpg_sync_activate (void) { memcpy (my_old_member_list, my_member_list, my_member_list_entries * sizeof (unsigned int)); my_old_member_list_entries = my_member_list_entries; if (downlist_state == CPG_DOWNLIST_WAITING_FOR_MESSAGES) { downlist_master_choose_and_send (); } downlist_messages_delete (); downlist_state = CPG_DOWNLIST_NONE; notify_lib_totem_membership (NULL, my_member_list_entries, my_member_list); } static void cpg_sync_abort (void) { downlist_state = CPG_DOWNLIST_NONE; downlist_messages_delete (); } static int notify_lib_totem_membership ( void *conn, int member_list_entries, const unsigned int *member_list) { struct list_head *iter; char *buf; int size; struct res_lib_cpg_totem_confchg_callback *res; size = sizeof(struct res_lib_cpg_totem_confchg_callback) + sizeof(mar_uint32_t) * (member_list_entries); buf = alloca(size); if (!buf) return CS_ERR_LIBRARY; res = (struct res_lib_cpg_totem_confchg_callback *)buf; res->member_list_entries = member_list_entries; res->header.size = size; res->header.id = MESSAGE_RES_CPG_TOTEM_CONFCHG_CALLBACK; res->header.error = CS_OK; memcpy (&res->ring_id, &last_sync_ring_id, sizeof (mar_cpg_ring_id_t)); memcpy (res->member_list, member_list, res->member_list_entries * sizeof (mar_uint32_t)); if (conn == NULL) { for (iter = cpg_pd_list_head.next; iter != &cpg_pd_list_head; iter = iter->next) { struct cpg_pd *cpg_pd = list_entry (iter, struct cpg_pd, list); api->ipc_dispatch_send (cpg_pd->conn, buf, size); } } else { api->ipc_dispatch_send (conn, buf, size); } return CS_OK; } static int notify_lib_joinlist( const mar_cpg_name_t *group_name, void *conn, int joined_list_entries, mar_cpg_address_t *joined_list, int left_list_entries, mar_cpg_address_t *left_list, int id) { int size; char *buf; struct list_head *iter; int count; struct res_lib_cpg_confchg_callback *res; mar_cpg_address_t *retgi; count = 0; for (iter = process_info_list_head.next; iter != &process_info_list_head; iter = iter->next) { struct process_info *pi = list_entry (iter, struct process_info, list); if (mar_name_compare (&pi->group, group_name) == 0) { int i; int founded = 0; for (i = 0; i < left_list_entries; i++) { if (left_list[i].nodeid == pi->nodeid && left_list[i].pid == pi->pid) { founded++; } } if (!founded) count++; } } size = sizeof(struct res_lib_cpg_confchg_callback) + sizeof(mar_cpg_address_t) * (count + left_list_entries + joined_list_entries); buf = alloca(size); if (!buf) return CS_ERR_LIBRARY; res = (struct res_lib_cpg_confchg_callback *)buf; res->joined_list_entries = joined_list_entries; res->left_list_entries = left_list_entries; res->member_list_entries = count; retgi = res->member_list; res->header.size = size; res->header.id = id; res->header.error = CS_OK; memcpy(&res->group_name, group_name, sizeof(mar_cpg_name_t)); for (iter = process_info_list_head.next; iter != &process_info_list_head; iter = iter->next) { struct process_info *pi=list_entry (iter, struct process_info, list); if (mar_name_compare (&pi->group, group_name) == 0) { int i; int founded = 0; for (i = 0;i < left_list_entries; i++) { if (left_list[i].nodeid == pi->nodeid && left_list[i].pid == pi->pid) { founded++; } } if (!founded) { retgi->nodeid = pi->nodeid; retgi->pid = pi->pid; retgi++; } } } if (left_list_entries) { memcpy (retgi, left_list, left_list_entries * sizeof(mar_cpg_address_t)); retgi += left_list_entries; } if (joined_list_entries) { memcpy (retgi, joined_list, joined_list_entries * sizeof(mar_cpg_address_t)); retgi += joined_list_entries; } if (conn) { api->ipc_dispatch_send (conn, buf, size); } else { for (iter = cpg_pd_list_head.next; iter != &cpg_pd_list_head; iter = iter->next) { struct cpg_pd *cpd = list_entry (iter, struct cpg_pd, list); if (mar_name_compare (&cpd->group_name, group_name) == 0) { assert (left_list_entries <= 1); assert (joined_list_entries <= 1); if (joined_list_entries) { if (joined_list[0].pid == cpd->pid && joined_list[0].nodeid == api->totem_nodeid_get()) { cpd->cpd_state = CPD_STATE_JOIN_COMPLETED; } } if (cpd->cpd_state == CPD_STATE_JOIN_COMPLETED || cpd->cpd_state == CPD_STATE_LEAVE_STARTED) { api->ipc_dispatch_send (cpd->conn, buf, size); } if (left_list_entries) { if (left_list[0].pid == cpd->pid && left_list[0].nodeid == api->totem_nodeid_get()) { cpd->pid = 0; memset (&cpd->group_name, 0, sizeof(cpd->group_name)); cpd->cpd_state = CPD_STATE_UNJOINED; } } } } } /* * Traverse thru cpds and send totem membership for cpd, where it is not send yet */ for (iter = cpg_pd_list_head.next; iter != &cpg_pd_list_head; iter = iter->next) { struct cpg_pd *cpd = list_entry (iter, struct cpg_pd, list); if ((cpd->flags & CPG_MODEL_V1_DELIVER_INITIAL_TOTEM_CONF) && (cpd->initial_totem_conf_sent == 0)) { cpd->initial_totem_conf_sent = 1; notify_lib_totem_membership (cpd->conn, my_old_member_list_entries, my_old_member_list); } } return CS_OK; } -static void downlist_log(int loglevel, const char *msg, struct downlist_msg* dl) +static void downlist_log(const char *msg, struct downlist_msg* dl) { - log_printf (loglevel, + log_printf (LOG_DEBUG, "%s: sender %s; members(old:%d left:%d)", msg, api->totem_ifaces_print(dl->sender_nodeid), dl->old_members, dl->left_nodes); } static struct downlist_msg* downlist_master_choose (void) { struct downlist_msg *cmp; struct downlist_msg *best = NULL; struct list_head *iter; uint32_t cmp_members; uint32_t best_members; for (iter = downlist_messages_head.next; iter != &downlist_messages_head; iter = iter->next) { cmp = list_entry(iter, struct downlist_msg, list); - downlist_log(LOGSYS_LEVEL_DEBUG, "comparing", cmp); + downlist_log("comparing", cmp); if (best == NULL) { best = cmp; continue; } best_members = best->old_members - best->left_nodes; cmp_members = cmp->old_members - cmp->left_nodes; if (cmp_members < best_members) { continue; } else if (cmp_members > best_members) { best = cmp; } else if (cmp->sender_nodeid < best->sender_nodeid) { best = cmp; } } return best; } static void downlist_master_choose_and_send (void) { struct downlist_msg *stored_msg; struct list_head *iter; mar_cpg_address_t left_list; int i; downlist_state = CPG_DOWNLIST_APPLYING; stored_msg = downlist_master_choose (); if (!stored_msg) { log_printf (LOGSYS_LEVEL_DEBUG, "NO chosen downlist"); return; } - downlist_log(LOGSYS_LEVEL_DEBUG, "chosen downlist", stored_msg); + downlist_log("chosen downlist", stored_msg); /* send events */ for (iter = process_info_list_head.next; iter != &process_info_list_head; ) { struct process_info *pi = list_entry(iter, struct process_info, list); iter = iter->next; for (i = 0; i < stored_msg->left_nodes; i++) { if (pi->nodeid == stored_msg->nodeids[i]) { left_list.nodeid = pi->nodeid; left_list.pid = pi->pid; left_list.reason = CONFCHG_CPG_REASON_NODEDOWN; notify_lib_joinlist(&pi->group, NULL, 0, NULL, 1, &left_list, MESSAGE_RES_CPG_CONFCHG_CALLBACK); list_del (&pi->list); free (pi); break; } } } } static void downlist_messages_delete (void) { struct downlist_msg *stored_msg; struct list_head *iter, *iter_next; for (iter = downlist_messages_head.next; iter != &downlist_messages_head; iter = iter_next) { iter_next = iter->next; stored_msg = list_entry(iter, struct downlist_msg, list); list_del (&stored_msg->list); free (stored_msg); } } static int cpg_exec_init_fn (struct corosync_api_v1 *corosync_api) { #ifdef COROSYNC_SOLARIS logsys_subsys_init(); #endif list_init (&downlist_messages_head); api = corosync_api; return (0); } static void cpg_iteration_instance_finalize (struct cpg_iteration_instance *cpg_iteration_instance) { struct list_head *iter, *iter_next; struct process_info *pi; for (iter = cpg_iteration_instance->items_list_head.next; iter != &cpg_iteration_instance->items_list_head; iter = iter_next) { iter_next = iter->next; pi = list_entry (iter, struct process_info, list); list_del (&pi->list); free (pi); } list_del (&cpg_iteration_instance->list); hdb_handle_destroy (&cpg_iteration_handle_t_db, cpg_iteration_instance->handle); } static void cpg_pd_finalize (struct cpg_pd *cpd) { struct list_head *iter, *iter_next; struct cpg_iteration_instance *cpii; zcb_all_free(cpd); for (iter = cpd->iteration_instance_list_head.next; iter != &cpd->iteration_instance_list_head; iter = iter_next) { iter_next = iter->next; cpii = list_entry (iter, struct cpg_iteration_instance, list); cpg_iteration_instance_finalize (cpii); } list_del (&cpd->list); } static int cpg_lib_exit_fn (void *conn) { struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn); log_printf(LOGSYS_LEVEL_DEBUG, "exit_fn for conn=%p\n", conn); if (cpd->group_name.length > 0) { cpg_node_joinleave_send (cpd->pid, &cpd->group_name, MESSAGE_REQ_EXEC_CPG_PROCLEAVE, CONFCHG_CPG_REASON_PROCDOWN); } cpg_pd_finalize (cpd); api->ipc_refcnt_dec (conn); return (0); } static int cpg_node_joinleave_send (unsigned int pid, const mar_cpg_name_t *group_name, int fn, int reason) { struct req_exec_cpg_procjoin req_exec_cpg_procjoin; struct iovec req_exec_cpg_iovec; int result; memcpy(&req_exec_cpg_procjoin.group_name, group_name, sizeof(mar_cpg_name_t)); req_exec_cpg_procjoin.pid = pid; req_exec_cpg_procjoin.reason = reason; req_exec_cpg_procjoin.header.size = sizeof(req_exec_cpg_procjoin); req_exec_cpg_procjoin.header.id = SERVICE_ID_MAKE(CPG_SERVICE, fn); req_exec_cpg_iovec.iov_base = (char *)&req_exec_cpg_procjoin; req_exec_cpg_iovec.iov_len = sizeof(req_exec_cpg_procjoin); result = api->totem_mcast (&req_exec_cpg_iovec, 1, TOTEM_AGREED); return (result); } /* Can byteswap join & leave messages */ static void exec_cpg_procjoin_endian_convert (void *msg) { struct req_exec_cpg_procjoin *req_exec_cpg_procjoin = msg; req_exec_cpg_procjoin->pid = swab32(req_exec_cpg_procjoin->pid); swab_mar_cpg_name_t (&req_exec_cpg_procjoin->group_name); req_exec_cpg_procjoin->reason = swab32(req_exec_cpg_procjoin->reason); } static void exec_cpg_joinlist_endian_convert (void *msg_v) { char *msg = msg_v; struct qb_ipc_response_header *res = (struct qb_ipc_response_header *)msg; struct join_list_entry *jle = (struct join_list_entry *)(msg + sizeof(struct qb_ipc_response_header)); swab_mar_int32_t (&res->size); while ((const char*)jle < msg + res->size) { jle->pid = swab32(jle->pid); swab_mar_cpg_name_t (&jle->group_name); jle++; } } static void exec_cpg_downlist_endian_convert_old (void *msg) { } static void exec_cpg_downlist_endian_convert (void *msg) { struct req_exec_cpg_downlist *req_exec_cpg_downlist = msg; unsigned int i; req_exec_cpg_downlist->left_nodes = swab32(req_exec_cpg_downlist->left_nodes); req_exec_cpg_downlist->old_members = swab32(req_exec_cpg_downlist->old_members); for (i = 0; i < req_exec_cpg_downlist->left_nodes; i++) { req_exec_cpg_downlist->nodeids[i] = swab32(req_exec_cpg_downlist->nodeids[i]); } } static void exec_cpg_mcast_endian_convert (void *msg) { struct req_exec_cpg_mcast *req_exec_cpg_mcast = msg; swab_coroipc_request_header_t (&req_exec_cpg_mcast->header); swab_mar_cpg_name_t (&req_exec_cpg_mcast->group_name); req_exec_cpg_mcast->pid = swab32(req_exec_cpg_mcast->pid); req_exec_cpg_mcast->msglen = swab32(req_exec_cpg_mcast->msglen); swab_mar_message_source_t (&req_exec_cpg_mcast->source); } static struct process_info *process_info_find(const mar_cpg_name_t *group_name, uint32_t pid, unsigned int nodeid) { struct list_head *iter; for (iter = process_info_list_head.next; iter != &process_info_list_head; ) { struct process_info *pi = list_entry (iter, struct process_info, list); iter = iter->next; if (pi->pid == pid && pi->nodeid == nodeid && mar_name_compare (&pi->group, group_name) == 0) { return pi; } } return NULL; } static void do_proc_join( const mar_cpg_name_t *name, uint32_t pid, unsigned int nodeid, int reason) { struct process_info *pi; struct process_info *pi_entry; mar_cpg_address_t notify_info; struct list_head *list; struct list_head *list_to_add = NULL; if (process_info_find (name, pid, nodeid) != NULL) { return ; } pi = malloc (sizeof (struct process_info)); if (!pi) { log_printf(LOGSYS_LEVEL_WARNING, "Unable to allocate process_info struct"); return; } pi->nodeid = nodeid; pi->pid = pid; memcpy(&pi->group, name, sizeof(*name)); list_init(&pi->list); /* * Insert new process in sorted order so synchronization works properly */ list_to_add = &process_info_list_head; for (list = process_info_list_head.next; list != &process_info_list_head; list = list->next) { pi_entry = list_entry(list, struct process_info, list); if (pi_entry->nodeid > pi->nodeid || (pi_entry->nodeid == pi->nodeid && pi_entry->pid > pi->pid)) { break; } list_to_add = list; } list_add (&pi->list, list_to_add); notify_info.pid = pi->pid; notify_info.nodeid = nodeid; notify_info.reason = reason; notify_lib_joinlist(&pi->group, NULL, 1, ¬ify_info, 0, NULL, MESSAGE_RES_CPG_CONFCHG_CALLBACK); } static void message_handler_req_exec_cpg_downlist_old ( const void *message, unsigned int nodeid) { log_printf (LOGSYS_LEVEL_WARNING, "downlist OLD from node %d", nodeid); } static void message_handler_req_exec_cpg_downlist( const void *message, unsigned int nodeid) { const struct req_exec_cpg_downlist *req_exec_cpg_downlist = message; int i; struct list_head *iter; struct downlist_msg *stored_msg; int found; if (downlist_state != CPG_DOWNLIST_WAITING_FOR_MESSAGES) { log_printf (LOGSYS_LEVEL_WARNING, "downlist left_list: %d received in state %d", req_exec_cpg_downlist->left_nodes, downlist_state); return; } stored_msg = malloc (sizeof (struct downlist_msg)); stored_msg->sender_nodeid = nodeid; stored_msg->old_members = req_exec_cpg_downlist->old_members; stored_msg->left_nodes = req_exec_cpg_downlist->left_nodes; memcpy (stored_msg->nodeids, req_exec_cpg_downlist->nodeids, req_exec_cpg_downlist->left_nodes * sizeof (mar_uint32_t)); list_init (&stored_msg->list); list_add (&stored_msg->list, &downlist_messages_head); for (i = 0; i < my_member_list_entries; i++) { found = 0; for (iter = downlist_messages_head.next; iter != &downlist_messages_head; iter = iter->next) { stored_msg = list_entry(iter, struct downlist_msg, list); if (my_member_list[i] == stored_msg->sender_nodeid) { found = 1; } } if (!found) { return; } } downlist_master_choose_and_send (); } static void message_handler_req_exec_cpg_procjoin ( const void *message, unsigned int nodeid) { const struct req_exec_cpg_procjoin *req_exec_cpg_procjoin = message; log_printf(LOGSYS_LEVEL_DEBUG, "got procjoin message from cluster node %d\n", nodeid); do_proc_join (&req_exec_cpg_procjoin->group_name, req_exec_cpg_procjoin->pid, nodeid, CONFCHG_CPG_REASON_JOIN); } static void message_handler_req_exec_cpg_procleave ( const void *message, unsigned int nodeid) { const struct req_exec_cpg_procjoin *req_exec_cpg_procjoin = message; struct process_info *pi; struct list_head *iter; mar_cpg_address_t notify_info; log_printf(LOGSYS_LEVEL_DEBUG, "got procleave message from cluster node %d\n", nodeid); notify_info.pid = req_exec_cpg_procjoin->pid; notify_info.nodeid = nodeid; notify_info.reason = req_exec_cpg_procjoin->reason; notify_lib_joinlist(&req_exec_cpg_procjoin->group_name, NULL, 0, NULL, 1, ¬ify_info, MESSAGE_RES_CPG_CONFCHG_CALLBACK); for (iter = process_info_list_head.next; iter != &process_info_list_head; ) { pi = list_entry(iter, struct process_info, list); iter = iter->next; if (pi->pid == req_exec_cpg_procjoin->pid && pi->nodeid == nodeid && mar_name_compare (&pi->group, &req_exec_cpg_procjoin->group_name)==0) { list_del (&pi->list); free (pi); } } } /* Got a proclist from another node */ static void message_handler_req_exec_cpg_joinlist ( const void *message_v, unsigned int nodeid) { const char *message = message_v; const struct qb_ipc_response_header *res = (const struct qb_ipc_response_header *)message; const struct join_list_entry *jle = (const struct join_list_entry *)(message + sizeof(struct qb_ipc_response_header)); log_printf(LOGSYS_LEVEL_DEBUG, "got joinlist message from node %x\n", nodeid); /* Ignore our own messages */ if (nodeid == api->totem_nodeid_get()) { return; } while ((const char*)jle < message + res->size) { do_proc_join (&jle->group_name, jle->pid, nodeid, CONFCHG_CPG_REASON_NODEUP); jle++; } } static void message_handler_req_exec_cpg_mcast ( const void *message, unsigned int nodeid) { const struct req_exec_cpg_mcast *req_exec_cpg_mcast = message; struct res_lib_cpg_deliver_callback res_lib_cpg_mcast; int msglen = req_exec_cpg_mcast->msglen; struct list_head *iter, *pi_iter; struct cpg_pd *cpd; struct iovec iovec[2]; int known_node = 0; res_lib_cpg_mcast.header.id = MESSAGE_RES_CPG_DELIVER_CALLBACK; res_lib_cpg_mcast.header.size = sizeof(res_lib_cpg_mcast) + msglen; res_lib_cpg_mcast.msglen = msglen; res_lib_cpg_mcast.pid = req_exec_cpg_mcast->pid; res_lib_cpg_mcast.nodeid = nodeid; memcpy(&res_lib_cpg_mcast.group_name, &req_exec_cpg_mcast->group_name, sizeof(mar_cpg_name_t)); iovec[0].iov_base = (void *)&res_lib_cpg_mcast; iovec[0].iov_len = sizeof (res_lib_cpg_mcast); iovec[1].iov_base = (char*)message+sizeof(*req_exec_cpg_mcast); iovec[1].iov_len = msglen; for (iter = cpg_pd_list_head.next; iter != &cpg_pd_list_head; ) { cpd = list_entry(iter, struct cpg_pd, list); iter = iter->next; if ((cpd->cpd_state == CPD_STATE_LEAVE_STARTED || cpd->cpd_state == CPD_STATE_JOIN_COMPLETED) && (mar_name_compare (&cpd->group_name, &req_exec_cpg_mcast->group_name) == 0)) { if (!known_node) { /* Try to find, if we know the node */ for (pi_iter = process_info_list_head.next; pi_iter != &process_info_list_head; pi_iter = pi_iter->next) { struct process_info *pi = list_entry (pi_iter, struct process_info, list); if (pi->nodeid == nodeid && mar_name_compare (&pi->group, &req_exec_cpg_mcast->group_name) == 0) { known_node = 1; break; } } } if (!known_node) { log_printf(LOGSYS_LEVEL_WARNING, "Unknown node -> we will not deliver message"); return ; } api->ipc_dispatch_iov_send (cpd->conn, iovec, 2); } } } static int cpg_exec_send_downlist(void) { struct iovec iov; g_req_exec_cpg_downlist.header.id = SERVICE_ID_MAKE(CPG_SERVICE, MESSAGE_REQ_EXEC_CPG_DOWNLIST); g_req_exec_cpg_downlist.header.size = sizeof(struct req_exec_cpg_downlist); g_req_exec_cpg_downlist.old_members = my_old_member_list_entries; iov.iov_base = (void *)&g_req_exec_cpg_downlist; iov.iov_len = g_req_exec_cpg_downlist.header.size; return (api->totem_mcast (&iov, 1, TOTEM_AGREED)); } static int cpg_exec_send_joinlist(void) { int count = 0; struct list_head *iter; struct qb_ipc_response_header *res; char *buf; struct join_list_entry *jle; struct iovec req_exec_cpg_iovec; for (iter = process_info_list_head.next; iter != &process_info_list_head; iter = iter->next) { struct process_info *pi = list_entry (iter, struct process_info, list); if (pi->nodeid == api->totem_nodeid_get ()) { count++; } } /* Nothing to send */ if (!count) return 0; buf = alloca(sizeof(struct qb_ipc_response_header) + sizeof(struct join_list_entry) * count); if (!buf) { log_printf(LOGSYS_LEVEL_WARNING, "Unable to allocate joinlist buffer"); return -1; } jle = (struct join_list_entry *)(buf + sizeof(struct qb_ipc_response_header)); res = (struct qb_ipc_response_header *)buf; for (iter = process_info_list_head.next; iter != &process_info_list_head; iter = iter->next) { struct process_info *pi = list_entry (iter, struct process_info, list); if (pi->nodeid == api->totem_nodeid_get ()) { memcpy (&jle->group_name, &pi->group, sizeof (mar_cpg_name_t)); jle->pid = pi->pid; jle++; } } res->id = SERVICE_ID_MAKE(CPG_SERVICE, MESSAGE_REQ_EXEC_CPG_JOINLIST); res->size = sizeof(struct qb_ipc_response_header)+sizeof(struct join_list_entry) * count; req_exec_cpg_iovec.iov_base = buf; req_exec_cpg_iovec.iov_len = res->size; return (api->totem_mcast (&req_exec_cpg_iovec, 1, TOTEM_AGREED)); } static int cpg_lib_init_fn (void *conn) { struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn); memset (cpd, 0, sizeof(struct cpg_pd)); cpd->conn = conn; list_add (&cpd->list, &cpg_pd_list_head); list_init (&cpd->iteration_instance_list_head); list_init (&cpd->zcb_mapped_list_head); api->ipc_refcnt_inc (conn); log_printf(LOGSYS_LEVEL_DEBUG, "lib_init_fn: conn=%p, cpd=%p\n", conn, cpd); return (0); } /* Join message from the library */ static void message_handler_req_lib_cpg_join (void *conn, const void *message) { const struct req_lib_cpg_join *req_lib_cpg_join = message; struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn); struct res_lib_cpg_join res_lib_cpg_join; cs_error_t error = CS_OK; struct list_head *iter; /* Test, if we don't have same pid and group name joined */ for (iter = cpg_pd_list_head.next; iter != &cpg_pd_list_head; iter = iter->next) { struct cpg_pd *cpd_item = list_entry (iter, struct cpg_pd, list); if (cpd_item->pid == req_lib_cpg_join->pid && mar_name_compare(&req_lib_cpg_join->group_name, &cpd_item->group_name) == 0) { /* We have same pid and group name joined -> return error */ error = CS_ERR_EXIST; goto response_send; } } /* * Same check must be done in process info list, because there may be not yet delivered * leave of client. */ for (iter = process_info_list_head.next; iter != &process_info_list_head; iter = iter->next) { struct process_info *pi = list_entry (iter, struct process_info, list); if (pi->nodeid == api->totem_nodeid_get () && pi->pid == req_lib_cpg_join->pid && mar_name_compare(&req_lib_cpg_join->group_name, &pi->group) == 0) { /* We have same pid and group name joined -> return error */ error = CS_ERR_TRY_AGAIN; goto response_send; } } switch (cpd->cpd_state) { case CPD_STATE_UNJOINED: error = CS_OK; cpd->cpd_state = CPD_STATE_JOIN_STARTED; cpd->pid = req_lib_cpg_join->pid; cpd->flags = req_lib_cpg_join->flags; memcpy (&cpd->group_name, &req_lib_cpg_join->group_name, sizeof (cpd->group_name)); cpg_node_joinleave_send (req_lib_cpg_join->pid, &req_lib_cpg_join->group_name, MESSAGE_REQ_EXEC_CPG_PROCJOIN, CONFCHG_CPG_REASON_JOIN); break; case CPD_STATE_LEAVE_STARTED: error = CS_ERR_BUSY; break; case CPD_STATE_JOIN_STARTED: error = CS_ERR_EXIST; break; case CPD_STATE_JOIN_COMPLETED: error = CS_ERR_EXIST; break; } response_send: res_lib_cpg_join.header.size = sizeof(res_lib_cpg_join); res_lib_cpg_join.header.id = MESSAGE_RES_CPG_JOIN; res_lib_cpg_join.header.error = error; api->ipc_response_send (conn, &res_lib_cpg_join, sizeof(res_lib_cpg_join)); } /* Leave message from the library */ static void message_handler_req_lib_cpg_leave (void *conn, const void *message) { struct res_lib_cpg_leave res_lib_cpg_leave; cs_error_t error = CS_OK; struct req_lib_cpg_leave *req_lib_cpg_leave = (struct req_lib_cpg_leave *)message; struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn); log_printf(LOGSYS_LEVEL_DEBUG, "got leave request on %p\n", conn); switch (cpd->cpd_state) { case CPD_STATE_UNJOINED: error = CS_ERR_NOT_EXIST; break; case CPD_STATE_LEAVE_STARTED: error = CS_ERR_NOT_EXIST; break; case CPD_STATE_JOIN_STARTED: error = CS_ERR_BUSY; break; case CPD_STATE_JOIN_COMPLETED: error = CS_OK; cpd->cpd_state = CPD_STATE_LEAVE_STARTED; cpg_node_joinleave_send (req_lib_cpg_leave->pid, &req_lib_cpg_leave->group_name, MESSAGE_REQ_EXEC_CPG_PROCLEAVE, CONFCHG_CPG_REASON_LEAVE); break; } /* send return */ res_lib_cpg_leave.header.size = sizeof(res_lib_cpg_leave); res_lib_cpg_leave.header.id = MESSAGE_RES_CPG_LEAVE; res_lib_cpg_leave.header.error = error; api->ipc_response_send(conn, &res_lib_cpg_leave, sizeof(res_lib_cpg_leave)); } /* Finalize message from library */ static void message_handler_req_lib_cpg_finalize ( void *conn, const void *message) { struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn); struct res_lib_cpg_finalize res_lib_cpg_finalize; cs_error_t error = CS_OK; log_printf (LOGSYS_LEVEL_DEBUG, "cpg finalize for conn=%p\n", conn); /* * We will just remove cpd from list. After this call, connection will be * closed on lib side, and cpg_lib_exit_fn will be called */ list_del (&cpd->list); list_init (&cpd->list); res_lib_cpg_finalize.header.size = sizeof (res_lib_cpg_finalize); res_lib_cpg_finalize.header.id = MESSAGE_RES_CPG_FINALIZE; res_lib_cpg_finalize.header.error = error; api->ipc_response_send (conn, &res_lib_cpg_finalize, sizeof (res_lib_cpg_finalize)); } static int memory_map ( const char *path, size_t bytes, void **buf) { int32_t fd; void *addr_orig; void *addr; int32_t res; fd = open (path, O_RDWR, 0600); unlink (path); if (fd == -1) { return (-1); } res = ftruncate (fd, bytes); if (res == -1) { goto error_close_unlink; } addr_orig = mmap (NULL, bytes, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); if (addr_orig == MAP_FAILED) { goto error_close_unlink; } addr = mmap (addr_orig, bytes, PROT_READ | PROT_WRITE, MAP_FIXED | MAP_SHARED, fd, 0); if (addr != addr_orig) { munmap(addr_orig, bytes); goto error_close_unlink; } #ifdef COROSYNC_BSD madvise(addr, bytes, MADV_NOSYNC); #endif res = close (fd); if (res) { return (-1); } *buf = addr_orig; return (0); error_close_unlink: close (fd); unlink(path); return -1; } static inline int zcb_alloc ( struct cpg_pd *cpd, const char *path_to_file, size_t size, void **addr) { struct zcb_mapped *zcb_mapped; unsigned int res; zcb_mapped = malloc (sizeof (struct zcb_mapped)); if (zcb_mapped == NULL) { return (-1); } res = memory_map ( path_to_file, size, addr); if (res == -1) { free (zcb_mapped); return (-1); } list_init (&zcb_mapped->list); zcb_mapped->addr = *addr; zcb_mapped->size = size; list_add_tail (&zcb_mapped->list, &cpd->zcb_mapped_list_head); return (0); } static inline int zcb_free (struct zcb_mapped *zcb_mapped) { unsigned int res; res = munmap (zcb_mapped->addr, zcb_mapped->size); list_del (&zcb_mapped->list); free (zcb_mapped); return (res); } static inline int zcb_by_addr_free (struct cpg_pd *cpd, void *addr) { struct list_head *list; struct zcb_mapped *zcb_mapped; unsigned int res = 0; for (list = cpd->zcb_mapped_list_head.next; list != &cpd->zcb_mapped_list_head; list = list->next) { zcb_mapped = list_entry (list, struct zcb_mapped, list); if (zcb_mapped->addr == addr) { res = zcb_free (zcb_mapped); break; } } return (res); } static inline int zcb_all_free ( struct cpg_pd *cpd) { struct list_head *list; struct zcb_mapped *zcb_mapped; for (list = cpd->zcb_mapped_list_head.next; list != &cpd->zcb_mapped_list_head;) { zcb_mapped = list_entry (list, struct zcb_mapped, list); list = list->next; zcb_free (zcb_mapped); } return (0); } union u { uint64_t server_addr; void *server_ptr; }; static uint64_t void2serveraddr (void *server_ptr) { union u u; u.server_ptr = server_ptr; return (u.server_addr); } static void *serveraddr2void (uint64_t server_addr) { union u u; u.server_addr = server_addr; return (u.server_ptr); }; static void message_handler_req_lib_cpg_zc_alloc ( void *conn, const void *message) { mar_req_coroipcc_zc_alloc_t *hdr = (mar_req_coroipcc_zc_alloc_t *)message; struct qb_ipc_response_header res_header; void *addr = NULL; struct coroipcs_zc_header *zc_header; unsigned int res; struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn); log_printf(LOGSYS_LEVEL_DEBUG, "path: %s", hdr->path_to_file); res = zcb_alloc (cpd, hdr->path_to_file, hdr->map_size, &addr); assert(res == 0); zc_header = (struct coroipcs_zc_header *)addr; zc_header->server_address = void2serveraddr(addr); res_header.size = sizeof (struct qb_ipc_response_header); res_header.id = 0; api->ipc_response_send (conn, &res_header, res_header.size); } static void message_handler_req_lib_cpg_zc_free ( void *conn, const void *message) { mar_req_coroipcc_zc_free_t *hdr = (mar_req_coroipcc_zc_free_t *)message; struct qb_ipc_response_header res_header; void *addr = NULL; struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn); log_printf(LOGSYS_LEVEL_DEBUG, " free'ing"); addr = serveraddr2void (hdr->server_address); zcb_by_addr_free (cpd, addr); res_header.size = sizeof (struct qb_ipc_response_header); res_header.id = 0; api->ipc_response_send ( conn, &res_header, res_header.size); } /* Mcast message from the library */ static void message_handler_req_lib_cpg_mcast (void *conn, const void *message) { const struct req_lib_cpg_mcast *req_lib_cpg_mcast = message; struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn); mar_cpg_name_t group_name = cpd->group_name; struct iovec req_exec_cpg_iovec[2]; struct req_exec_cpg_mcast req_exec_cpg_mcast; int msglen = req_lib_cpg_mcast->msglen; int result; cs_error_t error = CS_ERR_NOT_EXIST; log_printf(LOGSYS_LEVEL_DEBUG, "got mcast request on %p\n", conn); switch (cpd->cpd_state) { case CPD_STATE_UNJOINED: error = CS_ERR_NOT_EXIST; break; case CPD_STATE_LEAVE_STARTED: error = CS_ERR_NOT_EXIST; break; case CPD_STATE_JOIN_STARTED: error = CS_OK; break; case CPD_STATE_JOIN_COMPLETED: error = CS_OK; break; } if (error == CS_OK) { req_exec_cpg_mcast.header.size = sizeof(req_exec_cpg_mcast) + msglen; req_exec_cpg_mcast.header.id = SERVICE_ID_MAKE(CPG_SERVICE, MESSAGE_REQ_EXEC_CPG_MCAST); req_exec_cpg_mcast.pid = cpd->pid; req_exec_cpg_mcast.msglen = msglen; api->ipc_source_set (&req_exec_cpg_mcast.source, conn); memcpy(&req_exec_cpg_mcast.group_name, &group_name, sizeof(mar_cpg_name_t)); req_exec_cpg_iovec[0].iov_base = (char *)&req_exec_cpg_mcast; req_exec_cpg_iovec[0].iov_len = sizeof(req_exec_cpg_mcast); req_exec_cpg_iovec[1].iov_base = (char *)&req_lib_cpg_mcast->message; req_exec_cpg_iovec[1].iov_len = msglen; result = api->totem_mcast (req_exec_cpg_iovec, 2, TOTEM_AGREED); assert(result == 0); } else { log_printf(LOGSYS_LEVEL_ERROR, "*** %p can't mcast to group %s state:%d, error:%d\n", conn, group_name.value, cpd->cpd_state, error); } } static void message_handler_req_lib_cpg_zc_execute ( void *conn, const void *message) { mar_req_coroipcc_zc_execute_t *hdr = (mar_req_coroipcc_zc_execute_t *)message; struct qb_ipc_request_header *header; struct res_lib_cpg_mcast res_lib_cpg_mcast; struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn); struct iovec req_exec_cpg_iovec[2]; struct req_exec_cpg_mcast req_exec_cpg_mcast; struct req_lib_cpg_mcast *req_lib_cpg_mcast; int result; cs_error_t error = CS_ERR_NOT_EXIST; log_printf(LOGSYS_LEVEL_DEBUG, "got ZC mcast request on %p\n", conn); header = (struct qb_ipc_request_header *)(((char *)serveraddr2void(hdr->server_address) + sizeof (struct coroipcs_zc_header))); req_lib_cpg_mcast = (struct req_lib_cpg_mcast *)header; switch (cpd->cpd_state) { case CPD_STATE_UNJOINED: error = CS_ERR_NOT_EXIST; break; case CPD_STATE_LEAVE_STARTED: error = CS_ERR_NOT_EXIST; break; case CPD_STATE_JOIN_STARTED: error = CS_OK; break; case CPD_STATE_JOIN_COMPLETED: error = CS_OK; break; } res_lib_cpg_mcast.header.size = sizeof(res_lib_cpg_mcast); res_lib_cpg_mcast.header.id = MESSAGE_RES_CPG_MCAST; if (error == CS_OK) { req_exec_cpg_mcast.header.size = sizeof(req_exec_cpg_mcast) + req_lib_cpg_mcast->msglen; req_exec_cpg_mcast.header.id = SERVICE_ID_MAKE(CPG_SERVICE, MESSAGE_REQ_EXEC_CPG_MCAST); req_exec_cpg_mcast.pid = cpd->pid; req_exec_cpg_mcast.msglen = req_lib_cpg_mcast->msglen; api->ipc_source_set (&req_exec_cpg_mcast.source, conn); memcpy(&req_exec_cpg_mcast.group_name, &cpd->group_name, sizeof(mar_cpg_name_t)); req_exec_cpg_iovec[0].iov_base = (char *)&req_exec_cpg_mcast; req_exec_cpg_iovec[0].iov_len = sizeof(req_exec_cpg_mcast); req_exec_cpg_iovec[1].iov_base = (char *)header + sizeof(struct req_lib_cpg_mcast); req_exec_cpg_iovec[1].iov_len = req_exec_cpg_mcast.msglen; result = api->totem_mcast (req_exec_cpg_iovec, 2, TOTEM_AGREED); if (result == 0) { res_lib_cpg_mcast.header.error = CS_OK; } else { res_lib_cpg_mcast.header.error = CS_ERR_TRY_AGAIN; } } else { res_lib_cpg_mcast.header.error = error; } api->ipc_response_send (conn, &res_lib_cpg_mcast, sizeof (res_lib_cpg_mcast)); } static void message_handler_req_lib_cpg_membership (void *conn, const void *message) { struct req_lib_cpg_membership_get *req_lib_cpg_membership_get = (struct req_lib_cpg_membership_get *)message; struct res_lib_cpg_membership_get res_lib_cpg_membership_get; struct list_head *iter; int member_count = 0; res_lib_cpg_membership_get.header.id = MESSAGE_RES_CPG_MEMBERSHIP; res_lib_cpg_membership_get.header.error = CS_OK; res_lib_cpg_membership_get.header.size = sizeof (struct req_lib_cpg_membership_get); for (iter = process_info_list_head.next; iter != &process_info_list_head; iter = iter->next) { struct process_info *pi = list_entry (iter, struct process_info, list); if (mar_name_compare (&pi->group, &req_lib_cpg_membership_get->group_name) == 0) { res_lib_cpg_membership_get.member_list[member_count].nodeid = pi->nodeid; res_lib_cpg_membership_get.member_list[member_count].pid = pi->pid; member_count += 1; } } res_lib_cpg_membership_get.member_count = member_count; api->ipc_response_send (conn, &res_lib_cpg_membership_get, sizeof (res_lib_cpg_membership_get)); } static void message_handler_req_lib_cpg_local_get (void *conn, const void *message) { struct res_lib_cpg_local_get res_lib_cpg_local_get; res_lib_cpg_local_get.header.size = sizeof (res_lib_cpg_local_get); res_lib_cpg_local_get.header.id = MESSAGE_RES_CPG_LOCAL_GET; res_lib_cpg_local_get.header.error = CS_OK; res_lib_cpg_local_get.local_nodeid = api->totem_nodeid_get (); api->ipc_response_send (conn, &res_lib_cpg_local_get, sizeof (res_lib_cpg_local_get)); } static void message_handler_req_lib_cpg_iteration_initialize ( void *conn, const void *message) { const struct req_lib_cpg_iterationinitialize *req_lib_cpg_iterationinitialize = message; struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn); hdb_handle_t cpg_iteration_handle = 0; struct res_lib_cpg_iterationinitialize res_lib_cpg_iterationinitialize; struct list_head *iter, *iter2; struct cpg_iteration_instance *cpg_iteration_instance; cs_error_t error = CS_OK; int res; log_printf (LOGSYS_LEVEL_DEBUG, "cpg iteration initialize\n"); /* Because between calling this function and *next can be some operations which will * change list, we must do full copy. */ /* * Create new iteration instance */ res = hdb_handle_create (&cpg_iteration_handle_t_db, sizeof (struct cpg_iteration_instance), &cpg_iteration_handle); if (res != 0) { error = CS_ERR_NO_MEMORY; goto response_send; } res = hdb_handle_get (&cpg_iteration_handle_t_db, cpg_iteration_handle, (void *)&cpg_iteration_instance); if (res != 0) { error = CS_ERR_BAD_HANDLE; goto error_destroy; } list_init (&cpg_iteration_instance->items_list_head); cpg_iteration_instance->handle = cpg_iteration_handle; /* * Create copy of process_info list "grouped by" group name */ for (iter = process_info_list_head.next; iter != &process_info_list_head; iter = iter->next) { struct process_info *pi = list_entry (iter, struct process_info, list); struct process_info *new_pi; if (req_lib_cpg_iterationinitialize->iteration_type == CPG_ITERATION_NAME_ONLY) { /* * Try to find processed group name in our list new list */ int found = 0; for (iter2 = cpg_iteration_instance->items_list_head.next; iter2 != &cpg_iteration_instance->items_list_head; iter2 = iter2->next) { struct process_info *pi2 = list_entry (iter2, struct process_info, list); if (mar_name_compare (&pi2->group, &pi->group) == 0) { found = 1; break; } } if (found) { /* * We have this name in list -> don't add */ continue ; } } else if (req_lib_cpg_iterationinitialize->iteration_type == CPG_ITERATION_ONE_GROUP) { /* * Test pi group name with request */ if (mar_name_compare (&pi->group, &req_lib_cpg_iterationinitialize->group_name) != 0) /* * Not same -> don't add */ continue ; } new_pi = malloc (sizeof (struct process_info)); if (!new_pi) { log_printf(LOGSYS_LEVEL_WARNING, "Unable to allocate process_info struct"); error = CS_ERR_NO_MEMORY; goto error_put_destroy; } memcpy (new_pi, pi, sizeof (struct process_info)); list_init (&new_pi->list); if (req_lib_cpg_iterationinitialize->iteration_type == CPG_ITERATION_NAME_ONLY) { /* * pid and nodeid -> undefined */ new_pi->pid = new_pi->nodeid = 0; } /* * We will return list "grouped" by "group name", so try to find right place to add */ for (iter2 = cpg_iteration_instance->items_list_head.next; iter2 != &cpg_iteration_instance->items_list_head; iter2 = iter2->next) { struct process_info *pi2 = list_entry (iter2, struct process_info, list); if (mar_name_compare (&pi2->group, &pi->group) == 0) { break; } } list_add (&new_pi->list, iter2); } /* * Now we have a full "grouped by" copy of process_info list */ /* * Add instance to current cpd list */ list_init (&cpg_iteration_instance->list); list_add (&cpg_iteration_instance->list, &cpd->iteration_instance_list_head); cpg_iteration_instance->current_pointer = &cpg_iteration_instance->items_list_head; error_put_destroy: hdb_handle_put (&cpg_iteration_handle_t_db, cpg_iteration_handle); error_destroy: if (error != CS_OK) { hdb_handle_destroy (&cpg_iteration_handle_t_db, cpg_iteration_handle); } response_send: res_lib_cpg_iterationinitialize.header.size = sizeof (res_lib_cpg_iterationinitialize); res_lib_cpg_iterationinitialize.header.id = MESSAGE_RES_CPG_ITERATIONINITIALIZE; res_lib_cpg_iterationinitialize.header.error = error; res_lib_cpg_iterationinitialize.iteration_handle = cpg_iteration_handle; api->ipc_response_send (conn, &res_lib_cpg_iterationinitialize, sizeof (res_lib_cpg_iterationinitialize)); } static void message_handler_req_lib_cpg_iteration_next ( void *conn, const void *message) { const struct req_lib_cpg_iterationnext *req_lib_cpg_iterationnext = message; struct res_lib_cpg_iterationnext res_lib_cpg_iterationnext; struct cpg_iteration_instance *cpg_iteration_instance; cs_error_t error = CS_OK; int res; struct process_info *pi; log_printf (LOGSYS_LEVEL_DEBUG, "cpg iteration next\n"); res = hdb_handle_get (&cpg_iteration_handle_t_db, req_lib_cpg_iterationnext->iteration_handle, (void *)&cpg_iteration_instance); if (res != 0) { error = CS_ERR_LIBRARY; goto error_exit; } assert (cpg_iteration_instance); cpg_iteration_instance->current_pointer = cpg_iteration_instance->current_pointer->next; if (cpg_iteration_instance->current_pointer == &cpg_iteration_instance->items_list_head) { error = CS_ERR_NO_SECTIONS; goto error_put; } pi = list_entry (cpg_iteration_instance->current_pointer, struct process_info, list); /* * Copy iteration data */ res_lib_cpg_iterationnext.description.nodeid = pi->nodeid; res_lib_cpg_iterationnext.description.pid = pi->pid; memcpy (&res_lib_cpg_iterationnext.description.group, &pi->group, sizeof (mar_cpg_name_t)); error_put: hdb_handle_put (&cpg_iteration_handle_t_db, req_lib_cpg_iterationnext->iteration_handle); error_exit: res_lib_cpg_iterationnext.header.size = sizeof (res_lib_cpg_iterationnext); res_lib_cpg_iterationnext.header.id = MESSAGE_RES_CPG_ITERATIONNEXT; res_lib_cpg_iterationnext.header.error = error; api->ipc_response_send (conn, &res_lib_cpg_iterationnext, sizeof (res_lib_cpg_iterationnext)); } static void message_handler_req_lib_cpg_iteration_finalize ( void *conn, const void *message) { const struct req_lib_cpg_iterationfinalize *req_lib_cpg_iterationfinalize = message; struct res_lib_cpg_iterationfinalize res_lib_cpg_iterationfinalize; struct cpg_iteration_instance *cpg_iteration_instance; cs_error_t error = CS_OK; int res; log_printf (LOGSYS_LEVEL_DEBUG, "cpg iteration finalize\n"); res = hdb_handle_get (&cpg_iteration_handle_t_db, req_lib_cpg_iterationfinalize->iteration_handle, (void *)&cpg_iteration_instance); if (res != 0) { error = CS_ERR_LIBRARY; goto error_exit; } assert (cpg_iteration_instance); cpg_iteration_instance_finalize (cpg_iteration_instance); hdb_handle_put (&cpg_iteration_handle_t_db, cpg_iteration_instance->handle); error_exit: res_lib_cpg_iterationfinalize.header.size = sizeof (res_lib_cpg_iterationfinalize); res_lib_cpg_iterationfinalize.header.id = MESSAGE_RES_CPG_ITERATIONFINALIZE; res_lib_cpg_iterationfinalize.header.error = error; api->ipc_response_send (conn, &res_lib_cpg_iterationfinalize, sizeof (res_lib_cpg_iterationfinalize)); } diff --git a/test/Makefile.am b/test/Makefile.am index c79071f4..1b28643c 100644 --- a/test/Makefile.am +++ b/test/Makefile.am @@ -1,101 +1,97 @@ # # Copyright (c) 2009 Red Hat, Inc. # # Authors: Andrew Beekhof # Steven Dake (sdake@redhat.com) # # This software licensed under BSD license, the text of which follows: # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # - Redistributions of source code must retain the above copyright notice, # this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # - Neither the name of the MontaVista Software, Inc. nor the names of its # contributors may be used to endorse or promote products derived from this # software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF # THE POSSIBILITY OF SUCH DAMAGE. MAINTAINERCLEANFILES = Makefile.in INCLUDES = -I$(top_builddir)/include/corosync -I$(top_srcdir)/include noinst_PROGRAMS = testevs evsbench evsverify cpgverify testcpg testcpg2 cpgbench testconfdb \ - logsysbench logsysrec testquorum testvotequorum1 testvotequorum2 \ - logsys_s logsys_t1 logsys_t2 \ + testquorum testvotequorum1 testvotequorum2 \ stress_cpgfdget stress_cpgcontext cpgbound testsam \ - testcpgzc cpgbenchzc testzcgc stress_cpgzc + testcpgzc cpgbenchzc testzcgc stress_cpgzc \ + logsys_s logsys_t1 logsys_t2 testevs_LDADD = -levs $(LIBQB_LIBS) testevs_LDFLAGS = -L../lib testcpg_LDADD = -lcpg $(LIBQB_LIBS) testcpg_LDFLAGS = -L../lib testcpg2_LDADD = -lcpg $(LIBQB_LIBS) testcpg2_LDFLAGS = -L../lib testcpgzc_LDADD = -lcpg $(LIBQB_LIBS) testcpgzc_LDFLAGS = -L../lib testzcgc_LDADD = -lcpg $(LIBQB_LIBS) testzcgc_LDFLAGS = -L../lib stress_cpgzc_LDADD = -lcpg $(LIBQB_LIBS) stress_cpgzc_LDFLAGS = -L../lib stress_cpgfdget_LDADD = -lcpg $(LIBQB_LIBS) stress_cpgfdget_LDFLAGS = -L../lib stress_cpgcontext_LDADD = -lcpg $(LIBQB_LIBS) stress_cpgcontext_LDFLAGS = -L../lib testconfdb_LDADD = -lconfdb ../lcr/liblcr.a $(LIBQB_LIBS) testconfdb_LDFLAGS = -L../lib testquorum_LDADD = -lquorum $(LIBQB_LIBS) testquorum_LDFLAGS = -L../lib testvotequorum1_LDADD = -lvotequorum $(LIBQB_LIBS) testvotequorum1_LDFLAGS = -L../lib testvotequorum2_LDADD = -lvotequorum $(LIBQB_LIBS) testvotequorum2_LDFLAGS = -L../lib evsverify_LDADD = -levs -ltotem_pg $(LIBQB_LIBS) evsverify_LDFLAGS = -L../lib -L../exec cpgverify_LDADD = -lcpg -ltotem_pg $(LIBQB_LIBS) cpgverify_LDFLAGS = -L../lib -L../exec cpgbound_LDADD = -lcpg $(LIBQB_LIBS) cpgbound_LDFLAGS = -L../lib evsbench_LDADD = -levs $(LIBQB_LIBS) evsbench_LDFLAGS = -L../lib cpgbench_LDADD = -lcpg $(LIBQB_LIBS) cpgbench_LDFLAGS = -L../lib cpgbenchzc_LDADD = -lcpg $(LIBQB_LIBS) cpgbenchzc_LDFLAGS = -L../lib -logsysbench_LDADD = -llogsys -logsysbench_LDFLAGS = -L../exec -logsysrec_LDADD = -llogsys -logsysrec_LDFLAGS = -L../exec logsys_s_SOURCES = logsys_s.c logsys_s1.c logsys_s2.c -logsys_s_LDADD = -llogsys +logsys_s_LDADD = -llogsys $(LIBQB_LIBS) logsys_s_LDFLAGS = -L../exec -logsys_t1_LDADD = -llogsys +logsys_t1_LDADD = -llogsys $(LIBQB_LIBS) logsys_t1_LDFLAGS = -L../exec -logsys_t2_LDADD = -llogsys +logsys_t2_LDADD = -llogsys $(LIBQB_LIBS) logsys_t2_LDFLAGS = -L../exec testsam_LDADD = -lsam -lconfdb -lquorum $(LIBQB_LIBS) testsam_LDFLAGS = -L../lib LINT_FILES1:=$(filter-out sa_error.c, $(wildcard *.c)) LINT_FILES2:=$(filter-out testevsth.c, $(LINT_FILES1)) LINT_FILES:=$(filter-out testparse.c, $(LINT_FILES2)) lint: -for f in $(LINT_FILES) ; do echo Splint $$f ; splint $(INCLUDES) $(LINT_FLAGS) $(CFLAGS) $$f ; done clean-local: rm -f fdata diff --git a/test/logsys_s.c b/test/logsys_s.c index f4e42202..7eabacd0 100644 --- a/test/logsys_s.c +++ b/test/logsys_s.c @@ -1,60 +1,57 @@ /* * Copyright (c) 2007 Red Hat, Inc. * * All rights reserved. * * Author: Steven Dake (sdake@redhat.com) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include +#include #include LOGSYS_DECLARE_SYSTEM ("logsystestsubsystems", LOGSYS_MODE_OUTPUT_STDERR | LOGSYS_MODE_OUTPUT_SYSLOG, - 0, - NULL, - LOGSYS_LEVEL_INFO, LOG_DAEMON, - LOGSYS_LEVEL_INFO, - NULL, - 1000000); + LOGSYS_LEVEL_INFO); +LOGSYS_DECLARE_SUBSYS ("MAIN"); extern void logsys_s1_print (void); extern void logsys_s2_print (void); int main (void) { - logsys_fork_completed(); + qb_log_thread_start(); logsys_s1_print(); logsys_s2_print(); return (0); } diff --git a/test/logsys_s1.c b/test/logsys_s1.c index 296b189e..fb689dd7 100644 --- a/test/logsys_s1.c +++ b/test/logsys_s1.c @@ -1,49 +1,50 @@ /* * Copyright (c) 2007 Red Hat, Inc. * * All rights reserved. * * Author: Steven Dake (sdake@redhat.com) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include +#include #include void logsys_s1_print (void); LOGSYS_DECLARE_SUBSYS ("SYS1"); void logsys_s1_print (void) { log_printf (LOGSYS_LEVEL_ALERT, "This is an alert log message\n"); log_printf (LOGSYS_LEVEL_WARNING, "This is a warning log message\n"); } diff --git a/test/logsys_s2.c b/test/logsys_s2.c index a680ad96..80828865 100644 --- a/test/logsys_s2.c +++ b/test/logsys_s2.c @@ -1,51 +1,52 @@ /* * Copyright (c) 2007 Red Hat, Inc. * * All rights reserved. * * Author: Steven Dake (sdake@redhat.com) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include +#include #include void logsys_s2_print (void); LOGSYS_DECLARE_SUBSYS ("SYS2"); void logsys_s2_print (void) { logsys_config_logfile_priority_set("SYS2", LOGSYS_LEVEL_DEBUG); log_printf (LOGSYS_LEVEL_ALERT, "This is an alert log message\n"); log_printf (LOGSYS_LEVEL_WARNING, "This is a warning log message\n"); log_printf (LOGSYS_LEVEL_DEBUG, "This is a debug log message\n"); } diff --git a/test/logsys_t1.c b/test/logsys_t1.c index 268086b9..06b2c59b 100644 --- a/test/logsys_t1.c +++ b/test/logsys_t1.c @@ -1,57 +1,54 @@ /* * Copyright (c) 2007 Red Hat, Inc. * * All rights reserved. * * Author: Steven Dake (sdake@redhat.com) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include +#include #include LOGSYS_DECLARE_SYSTEM ("logsystestNOsubsystems", LOGSYS_MODE_OUTPUT_STDERR | LOGSYS_MODE_OUTPUT_SYSLOG, - 0, - NULL, - LOGSYS_LEVEL_DEBUG, LOG_DAEMON, - LOGSYS_LEVEL_DEBUG, - NULL, - 1000000); + LOGSYS_LEVEL_DEBUG); +LOGSYS_DECLARE_SUBSYS("MAIN"); int main (void) { log_printf (LOGSYS_LEVEL_ALERT, "This is an alert log message\n"); log_printf (LOGSYS_LEVEL_WARNING, "This is a warning log message\n"); log_printf (LOGSYS_LEVEL_DEBUG, "This is a debug log message\n"); return (0); } diff --git a/test/logsys_t2.c b/test/logsys_t2.c index 8cb6095d..fcee399a 100644 --- a/test/logsys_t2.c +++ b/test/logsys_t2.c @@ -1,83 +1,85 @@ /* * Copyright (c) 2007 Red Hat, Inc. * * All rights reserved. * * Author: Lon Hohberger (lhh@redhat.com) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include +#include #include LOGSYS_DECLARE_SYSTEM ("logtest_t2", LOGSYS_MODE_OUTPUT_STDERR | LOGSYS_MODE_THREADED, - 0, - NULL, - LOGSYS_LEVEL_INFO, LOG_DAEMON, - LOGSYS_LEVEL_INFO, - NULL, - 1000000); + LOGSYS_LEVEL_INFO); + +LOGSYS_DECLARE_SUBSYS("MAIN"); int main(int argc, char **argv) { /* * fork could occur here and the file to output to could be set */ logsys_config_mode_set (NULL, LOGSYS_MODE_OUTPUT_STDERR | LOGSYS_MODE_THREADED); + log_printf(LOGSYS_LEVEL_NOTICE, "Hello, world!\n"); log_printf(LOGSYS_LEVEL_DEBUG, "If you see this, the logger's busted\n"); logsys_config_logfile_priority_set (NULL, LOGSYS_LEVEL_ALERT); + logsys_config_apply(); log_printf(LOGSYS_LEVEL_DEBUG, "If you see this, the logger's busted\n"); log_printf(LOGSYS_LEVEL_CRIT, "If you see this, the logger's busted\n"); log_printf(LOGSYS_LEVEL_ALERT, "Alert 1\n"); logsys_config_logfile_priority_set (NULL, LOGSYS_LEVEL_NOTICE); + logsys_config_apply(); log_printf(LOGSYS_LEVEL_CRIT, "Crit 1\n"); log_printf(LOGSYS_LEVEL_INFO, "If you see this, the logger's busted\n"); logsys_config_logfile_priority_set (NULL, LOGSYS_LEVEL_DEBUG); + logsys_config_apply(); log_printf(LOGSYS_LEVEL_DEBUG, "Debug 1\n"); logsys_config_mode_set (NULL, LOGSYS_MODE_OUTPUT_STDERR); log_printf(LOGSYS_LEVEL_DEBUG, "Debug 2\n"); return 0; } diff --git a/test/logsysbench.c b/test/logsysbench.c deleted file mode 100644 index 1d6dac8d..00000000 --- a/test/logsysbench.c +++ /dev/null @@ -1,177 +0,0 @@ -/* - * Copyright (c) 2008, 2009 Red Hat, Inc. - * - * All rights reserved. - * - * Author: Steven Dake (sdake@redhat.com) - * - * This software licensed under BSD license, the text of which follows: - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * - Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - Neither the name of the MontaVista Software, Inc. nor the names of its - * contributors may be used to endorse or promote products derived from this - * software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF - * THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include - -#include -#include -#include -#include -#include -#include - -LOGSYS_DECLARE_SYSTEM ("logtest_rec", - LOGSYS_MODE_OUTPUT_STDERR | LOGSYS_MODE_THREADED, - 0, /* debug */ - NULL, - LOGSYS_LEVEL_INFO, /* logfile_priority */ - LOG_DAEMON, /* syslog facility */ - LOGSYS_LEVEL_INFO, /* syslog level */ - NULL, /* use default format */ - 1000000); /* flight recorder size */ - -#define LOGREC_ID_CHECKPOINT_CREATE 2 -#define LOGREC_ARGS_CHECKPOINT_CREATE 2 -#define ITERATIONS 1000000 - -static struct timeval tv1, tv2, tv_elapsed; - -#ifndef timersub -#define timersub(a, b, result) \ -do { \ - (result)->tv_sec = (a)->tv_sec - (b)->tv_sec; \ - (result)->tv_usec = (a)->tv_usec - (b)->tv_usec; \ - if ((result)->tv_usec < 0) { \ - --(result)->tv_sec; \ - (result)->tv_usec += 1000000; \ - } \ -} while (0) -#endif - -static void bm_start (void) -{ - gettimeofday (&tv1, NULL); -} -static void bm_finish (const char *operation) -{ - gettimeofday (&tv2, NULL); - timersub (&tv2, &tv1, &tv_elapsed); - - if (strlen (operation) > 22) { - printf ("%s\t\t", operation); - } else { - printf ("%s\t\t\t", operation); - } - printf ("%9.3f operations/sec\n", - ((float)ITERATIONS) / (tv_elapsed.tv_sec + (tv_elapsed.tv_usec / 1000000.0))); -} - -static char buffer[256]; -int main (void) -{ - int i; - char buf[1024]; - - - printf ("heating up cache with logrec functionality\n"); - for (i = 0; i < ITERATIONS; i++) { - log_rec (LOGREC_ID_CHECKPOINT_CREATE, - "recordA", 8, "recordB", 8, LOGSYS_REC_END); - } - bm_start(); - for (i = 0; i < ITERATIONS; i++) { - log_rec (LOGREC_ID_CHECKPOINT_CREATE, - buffer, 7, LOGSYS_REC_END); - } - bm_finish ("log_rec 1 arguments:"); - bm_start(); - for (i = 0; i < ITERATIONS; i++) { - log_rec (LOGREC_ID_CHECKPOINT_CREATE, - "recordA", 8, LOGSYS_REC_END); - } - bm_finish ("log_rec 2 arguments:"); - bm_start(); - for (i = 0; i < 10; i++) { - log_rec (LOGREC_ID_CHECKPOINT_CREATE, - "recordA", 8, "recordB", 8, LOGSYS_REC_END); - } - bm_start(); - for (i = 0; i < ITERATIONS; i++) { - log_rec (LOGREC_ID_CHECKPOINT_CREATE, - "recordA", 8, "recordB", 8, "recordC", 8, LOGSYS_REC_END); - } - bm_finish ("log_rec 3 arguments:"); - bm_start(); - for (i = 0; i < ITERATIONS; i++) { - log_rec (LOGREC_ID_CHECKPOINT_CREATE, - "recordA", 8, "recordB", 8, "recordC", 8, "recordD", 8, LOGSYS_REC_END); - } - bm_finish ("log_rec 4 arguments:"); - - /* - * sprintf testing - */ - printf ("heating up cache with sprintf functionality\n"); - for (i = 0; i < ITERATIONS; i++) { - snprintf (buf, sizeof(buf), "Some logging information %s", "recordA"); - } - bm_start(); - for (i = 0; i < ITERATIONS; i++) { - snprintf (buf, sizeof(buf), "Some logging information %s", "recordA"); - } - bm_finish ("sprintf 1 argument:"); - bm_start(); - for (i = 0; i < ITERATIONS; i++) { - sprintf (buf, "Some logging information %s %s", "recordA", "recordB"); - } - bm_finish ("sprintf 2 arguments:"); - bm_start(); - for (i = 0; i < ITERATIONS; i++) { - sprintf (buf, "Some logging information %s %s %s", "recordA", "recordB", "recordC"); - } - bm_finish ("sprintf 3 arguments:"); - bm_start(); - for (i = 0; i < ITERATIONS; i++) { - sprintf (buf, "Some logging information %s %s %s %s", "recordA", "recordB", "recordC", "recordD"); - } - bm_finish ("sprintf 4 arguments:"); - bm_start(); - for (i = 0; i < ITERATIONS; i++) { - sprintf (buf, "Some logging information %s %s %s %d", "recordA", "recordB", "recordC", i); - } - bm_finish ("sprintf 4 arguments (1 int):"); - - logsys_log_rec_store ("fdata"); -/* TODO - currently fails under some circumstances - - bm_start(); - for (i = 0; i < ITERATIONS; i++) { - log_printf (LOGSYS_LEVEL_NOTICE, "test %d", i); - } - bm_finish("log_printf"); -*/ - - return (0); -} diff --git a/test/logsysrec.c b/test/logsysrec.c index 4dd68994..2d05435d 100644 --- a/test/logsysrec.c +++ b/test/logsysrec.c @@ -1,71 +1,66 @@ /* * Copyright (c) 2008 Red Hat, Inc. * * All rights reserved. * * Author: Steven Dake (sdake@redhat.com) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include LOGSYS_DECLARE_SYSTEM ("logtest_rec", LOGSYS_MODE_OUTPUT_STDERR | LOGSYS_MODE_THREADED, - 0, - NULL, - LOG_INFO, LOG_DAEMON, - LOG_INFO, - NULL, - 100000); + LOG_INFO); #define LOGREC_ID_CHECKPOINT_CREATE 2 #define LOGREC_ARGS_CHECKPOINT_CREATE 2 int main(int argc, char **argv) { int i; for (i = 0; i < 10000; i++) { log_printf (LOGSYS_LEVEL_NOTICE, "This is a test of %s(%d)\n", "stringparse", i); log_rec (LOGSYS_ENCODE_RECID(LOGSYS_LEVEL_NOTICE, logsys_subsys_id, LOGREC_ID_CHECKPOINT_CREATE), "record1", 8, "record22", 9, "record333", 10, "record444", 11, LOGSYS_REC_END); } logsys_atexit (); logsys_log_rec_store ("fdata"); return 0; } diff --git a/tools/corosync-fplay.c b/tools/corosync-fplay.c index c74c2017..ab0e2933 100644 --- a/tools/corosync-fplay.c +++ b/tools/corosync-fplay.c @@ -1,522 +1,51 @@ -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include - -uint32_t flt_data_size; - -uint32_t *flt_data; -#define FDHEAD_INDEX (flt_data_size) -#define FDTAIL_INDEX (flt_data_size + 1) - -#define TOTEMIP_ADDRLEN (sizeof(struct in6_addr)) - -struct totem_ip_address { - unsigned int nodeid; - unsigned short family; - unsigned char addr[TOTEMIP_ADDRLEN]; -} __attribute__((packed)); - -struct memb_ring_id { - struct totem_ip_address rep; - unsigned long long seq; -} __attribute__((packed)); - -static const char *totemip_print(const struct totem_ip_address *addr) -{ - static char buf[INET6_ADDRSTRLEN]; - - return inet_ntop(addr->family, addr->addr, buf, sizeof(buf)); -} - -static char *print_string_len (const unsigned char *str, unsigned int len) -{ - unsigned int i; - static char buf[1024]; - memset (buf, 0, sizeof (buf)); - for (i = 0; i < len; i++) { - buf[i] = str[i]; - } - return (buf); -} - -static void sync_printer_confchg_set_sync (const void **record) -{ - const unsigned int *my_should_sync = record[0]; - printf ("Setting my_should_sync to %d\n", *my_should_sync); -} - -static void sync_printer_set_sync_state (const void **record) -{ - const unsigned int *my_sync_state = record[0]; - printf ("Setting my_sync_state to %d\n", *my_sync_state); -} - -static void sync_printer_process_currentstate (const void **record) -{ - const unsigned int *my_sync_state = record[0]; - printf ("Retrieving my_sync_state %d\n", *my_sync_state); -} - -static void sync_printer_process_get_shouldsync (const void **record) -{ - const unsigned int *my_should_sync = record[0]; - printf ("Getting my_should_sync %d\n", *my_should_sync); -} - -static void sync_printer_checkpoint_release (const void **record) -{ - const unsigned char *name = record[0]; - const uint16_t *name_len = record[1]; - const unsigned int *ckpt_id = record[2]; - const unsigned int *from = record[3]; - - printf ("Checkpoint release name=[%s] id=[%d] from=[%d] len=[%d]\n", - print_string_len (name, *name_len), - *ckpt_id, - *from, - *name_len); -} - -static void sync_printer_checkpoint_transmit (const void **record) -{ - const unsigned char *name = record[0]; - const uint16_t *name_len = record[1]; - const unsigned int *ckpt_id = record[2]; - const unsigned int *xmit_id = record[3]; - - printf ("xmit_id=[%d] Checkpoint transmit name=[%s] id=[%d]\n", - *xmit_id, print_string_len (name, *name_len), - *ckpt_id); -} - -static void sync_printer_section_transmit (const void **record) -{ - const unsigned char *ckpt_name = record[0]; - const uint16_t *name_len = record[1]; - const unsigned int *ckpt_id = record[2]; - const unsigned int *xmit_id = record[3]; - const unsigned char *section_name = record[4]; - const uint16_t *section_name_len = record[5]; - - printf ("xmit_id=[%d] Section transmit checkpoint name=[%s] id=[%d] ", - *xmit_id, print_string_len (ckpt_name, *name_len), - *ckpt_id); - printf ("section=[%s]\n", - print_string_len (section_name, *section_name_len)); -} -static void sync_printer_checkpoint_receive (const void **record) -{ - const unsigned char *ckpt_name = record[0]; - const uint16_t *name_len = record[1]; - const unsigned int *ckpt_id = record[2]; - const unsigned int *xmit_id = record[3]; - - printf ("xmit_id=[%d] Checkpoint receive checkpoint name=[%s] id=[%d]\n", - *xmit_id, print_string_len (ckpt_name, *name_len), *ckpt_id); -} - -static void sync_printer_section_receive (const void **record) -{ - const unsigned char *ckpt_name = record[0]; - const uint16_t *name_len = record[1]; - const unsigned int *ckpt_id = record[2]; - const unsigned int *xmit_id = record[3]; - const unsigned char *section_name = record[4]; - const unsigned int *section_name_len = record[5]; - - printf ("xmit_id=[%d] Section receive checkpoint name=[%s] id=[%d] ", - *xmit_id, print_string_len (ckpt_name, *name_len), - *ckpt_id); - - printf ("section=[%s]\n", - print_string_len (section_name, *section_name_len)); -} - -static void sync_printer_confchg_fn (const void **record) -{ - unsigned int i; - - const unsigned int *members = record[0]; - const unsigned int *member_count = record[1]; - const struct memb_ring_id *ring_id = record[2]; - struct in_addr addr; - - printf ("sync confchg fn ringid [ip=%s seq=%lld]\n", - totemip_print (&ring_id->rep), - ring_id->seq); - printf ("members [%d]:\n", *member_count); - for (i = 0; i < *member_count; i++) { - addr.s_addr = members[i]; - printf ("\tmember [%s]\n", inet_ntoa (addr)); - } -} - -static void printer_totemsrp_mcast (const void **record) -{ - const unsigned int *msgid = record[0]; - - printf ("totemsrp_mcast %d\n", *msgid); -} - -static void printer_totemsrp_delv (const void **record) -{ - const unsigned int *msgid = record[0]; - - printf ("totemsrp_delv %d\n", *msgid); -} - -static void printer_totempg_mcast_fits (const void **record) -{ - const unsigned int *idx = record[0]; - const unsigned int *iov_len = record[1]; - const unsigned int *copy_len = record[2]; - const unsigned int *fragment_size = record[3]; - const unsigned int *max_packet_size = record[4]; - const unsigned int *copy_base = record[5]; - const unsigned char *next_fragment = record[6]; - - printf ("totempg_mcast index=[%d] iov_len=[%d] copy_len=[%d] fragment_size=[%d] max_packet_size=[%d] copy_base=[%d] next_fragment[%d]\n", - *idx, *iov_len, *copy_len, *fragment_size, *max_packet_size, *copy_base, *next_fragment); -} - -static void sync_printer_service_process (const void **record) -{ - const struct memb_ring_id *ring_id = record[0]; - const struct memb_ring_id *sync_ring_id = record[1]; - - printf ("sync service process callback ringid [ip=%s seq=%lld] ", - totemip_print (&ring_id->rep), - ring_id->seq); - printf ("sync ringid [ip=%s seq=%lld]\n", - totemip_print (&sync_ring_id->rep), - sync_ring_id->seq); -} - -struct printer_subsys_record_print { - int ident; - void (*print_fn)(const void **record); - int record_length; -}; - -struct printer_subsys { - const char *subsys; - struct printer_subsys_record_print *record_printers; - int record_printers_count; -}; - -#define LOGREC_ID_SYNC_CONFCHG_FN 0 -#define LOGREC_ID_SYNC_SERVICE_PROCESS 1 - /* - * CKPT subsystem + * Copyright (c) 2011 Red Hat + * + * All rights reserved. + * + * Author: Angus Salkeld + * + * This software licensed under BSD license, the text of which follows: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the MontaVista Software, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. */ -#define LOGREC_ID_CONFCHG_SETSYNC 0 -#define LOGREC_ID_SETSYNCSTATE 1 -#define LOGREC_ID_SYNC_PROCESS_CURRENTSTATE 2 -#define LOGREC_ID_SYNC_PROCESS_GETSHOULDSYNC 3 -#define LOGREC_ID_SYNC_CHECKPOINT_TRANSMIT 4 -#define LOGREC_ID_SYNC_SECTION_TRANSMIT 5 -#define LOGREC_ID_SYNC_CHECKPOINT_RECEIVE 6 -#define LOGREC_ID_SYNC_SECTION_RECEIVE 7 -#define LOGREC_ID_SYNC_CHECKPOINT_RELEASE 8 - -#define LOGREC_ID_TOTEMSRP_MCAST 0 -#define LOGREC_ID_TOTEMSRP_DELV 1 -#define LOGREC_ID_TOTEMPG_MCAST_FITS 2 - - -static struct printer_subsys_record_print record_print_sync[] = { - { - .ident = LOGREC_ID_SYNC_CONFCHG_FN, - .print_fn = sync_printer_confchg_fn, - .record_length = 28 - }, - { - .ident = LOGREC_ID_SYNC_SERVICE_PROCESS, - .print_fn = sync_printer_service_process, - .record_length = 28 - } -}; - -static struct printer_subsys_record_print record_print_ckpt[] = { - { - .ident = LOGREC_ID_CONFCHG_SETSYNC, - .print_fn = sync_printer_confchg_set_sync, - .record_length = 28 - }, - { - .ident = LOGREC_ID_SETSYNCSTATE, - .print_fn = sync_printer_set_sync_state, - .record_length = 28 - }, - { - .ident = LOGREC_ID_SYNC_PROCESS_CURRENTSTATE, - .print_fn = sync_printer_process_currentstate, - .record_length = 28 - }, - { - .ident = LOGREC_ID_SYNC_PROCESS_GETSHOULDSYNC, - .print_fn = sync_printer_process_get_shouldsync, - .record_length = 28 - }, - { - .ident = LOGREC_ID_SYNC_CHECKPOINT_TRANSMIT, - .print_fn = sync_printer_checkpoint_transmit, - .record_length = 28 - }, - { - .ident = LOGREC_ID_SYNC_SECTION_TRANSMIT, - .print_fn = sync_printer_section_transmit, - .record_length = 28 - }, - { - .ident = LOGREC_ID_SYNC_CHECKPOINT_RECEIVE, - .print_fn = sync_printer_checkpoint_receive, - .record_length = 28 - }, - { - .ident = LOGREC_ID_SYNC_SECTION_RECEIVE, - .print_fn = sync_printer_section_receive, - .record_length = 28 - }, - { - .ident = LOGREC_ID_SYNC_CHECKPOINT_RELEASE, - .print_fn = sync_printer_checkpoint_release, - .record_length = 28 - } - -}; -static struct printer_subsys_record_print record_print_totem[] = { - { - .ident = LOGREC_ID_TOTEMSRP_MCAST, - .print_fn = printer_totemsrp_mcast, - .record_length = 28 - }, - { - .ident = LOGREC_ID_TOTEMSRP_DELV, - .print_fn = printer_totemsrp_delv, - .record_length = 28 - }, - { - .ident = LOGREC_ID_TOTEMPG_MCAST_FITS, - .print_fn = printer_totempg_mcast_fits, - .record_length = 28 - } -}; - -static struct printer_subsys printer_subsystems[] = { - { - .subsys = "SYNC", - .record_printers = record_print_sync, - .record_printers_count = sizeof (record_print_sync) / sizeof (struct printer_subsys_record_print) - }, - { - .subsys = "CKPT", - .record_printers = record_print_ckpt, - .record_printers_count = sizeof (record_print_ckpt) / sizeof (struct printer_subsys_record_print) - }, - { - .subsys = "TOTEM", - .record_printers = record_print_totem, - .record_printers_count = sizeof (record_print_totem) / sizeof (struct printer_subsys_record_print) - } -}; -static unsigned int printer_subsys_count = - sizeof (printer_subsystems) / sizeof (struct printer_subsys); - -#define G_RECORD_SIZE 10000 - -static uint32_t g_record[G_RECORD_SIZE]; - -/* - * Copy record, dealing with wrapping - */ -static int logsys_rec_get (int rec_idx) { - uint32_t rec_size; - int firstcopy, secondcopy; - - rec_size = flt_data[rec_idx]; - - firstcopy = rec_size; - secondcopy = 0; - - if (rec_size > G_RECORD_SIZE || rec_size > flt_data_size) { - fprintf (stderr, "rec_size too large. Input file is probably corrupted.\n"); - exit (EXIT_FAILURE); - } - - if (firstcopy + rec_idx > flt_data_size) { - firstcopy = flt_data_size - rec_idx; - secondcopy -= firstcopy - rec_size; - } - memcpy (&g_record[0], &flt_data[rec_idx], firstcopy * sizeof(uint32_t)); - if (secondcopy) { - memcpy (&g_record[firstcopy], &flt_data[0], secondcopy * sizeof(uint32_t)); - } - return ((rec_idx + rec_size) % flt_data_size); -} - -static void logsys_rec_print (const void *record) -{ - const uint32_t *buf_uint32t = record; - uint32_t rec_size; - uint32_t rec_ident; - uint32_t line; - uint32_t arg_size_idx; - unsigned int i; - unsigned int j; - unsigned int rec_idx = 0; - uint32_t record_number; - unsigned int words_processed; - const char *arguments[64]; - int arg_count = 0; - - rec_size = buf_uint32t[rec_idx]; - rec_ident = buf_uint32t[rec_idx+1]; - line = buf_uint32t[rec_idx+2]; - record_number = buf_uint32t[rec_idx+3]; - - printf ("rec=[%d] ", record_number); - arg_size_idx = rec_idx + 4; - words_processed = 4; - for (i = 0; words_processed < rec_size; i++) { - arguments[arg_count++] = - (const char *)&buf_uint32t[arg_size_idx + 1]; - words_processed += buf_uint32t[arg_size_idx] + 1; - arg_size_idx += buf_uint32t[arg_size_idx] + 1; - - } - - for (i = 0; i < printer_subsys_count; i++) { - if (strcmp (arguments[0], printer_subsystems[i].subsys) == 0) { - for (j = 0; j < printer_subsystems[i].record_printers_count; j++) { - if (rec_ident == printer_subsystems[i].record_printers[j].ident) { - printer_subsystems[i].record_printers[j].print_fn ((const void **)&arguments[3]); - return; - } - } - } - } - - switch(LOGSYS_DECODE_RECID(rec_ident)) { - case LOGSYS_RECID_LOG: - printf ("Log Message=%s\n", arguments[3]); - break; - case LOGSYS_RECID_ENTER: - printf ("ENTERING function [%s] line [%d]\n", arguments[2], line); - break; - case LOGSYS_RECID_LEAVE: - printf ("LEAVING function [%s] line [%d]\n", arguments[2], line); - break; - case LOGSYS_RECID_TRACE1: - printf ("Tracing(1) Messsage=%s\n", arguments[3]); - break; - case LOGSYS_RECID_TRACE2: - printf ("Tracing(2) Messsage=%s\n", arguments[3]); - break; - case LOGSYS_RECID_TRACE3: - printf ("Tracing(3) Messsage=%s\n", arguments[3]); - break; - case LOGSYS_RECID_TRACE4: - printf ("Tracing(4) Messsage=%s\n", arguments[3]); - break; - case LOGSYS_RECID_TRACE5: - printf ("Tracing(5) Messsage=%s\n", arguments[3]); - break; - case LOGSYS_RECID_TRACE6: - printf ("Tracing(6) Messsage=%s\n", arguments[3]); - break; - case LOGSYS_RECID_TRACE7: - printf ("Tracing(7) Messsage=%s\n", arguments[3]); - break; - case LOGSYS_RECID_TRACE8: - printf ("Tracing(8) Messsage=%s\n", arguments[3]); - break; - default: - printf ("Unknown record type found subsys=[%s] ident=[%d]\n", - arguments[0], LOGSYS_DECODE_RECID(rec_ident)); - break; - } -#ifdef COMPILE_OUT -printf ("\n"); -#endif -} +#include "config.h" +#include +#include int main (void) { - int fd; - int rec_idx; - int end_rec; - int record_count = 1; - ssize_t n_read; - const char *data_file = LOCALSTATEDIR "/lib/corosync/fdata"; - size_t n_required; - - if ((fd = open (data_file, O_RDONLY)) < 0) { - fprintf (stderr, "failed to open %s: %s\n", - data_file, strerror (errno)); - return EXIT_FAILURE; - } - - n_required = sizeof (uint32_t); - n_read = read (fd, &flt_data_size, n_required); - if (n_read != n_required) { - fprintf (stderr, "Unable to read fdata header\n"); - return EXIT_FAILURE; - } - - n_required = ((flt_data_size + 2) * sizeof(uint32_t)); - - if ((flt_data = malloc (n_required)) == NULL) { - fprintf (stderr, "exhausted virtual memory\n"); - return EXIT_FAILURE; - } - n_read = read (fd, flt_data, n_required); - close (fd); - if (n_read < 0) { - fprintf (stderr, "reading %s failed: %s\n", - data_file, strerror (errno)); - return EXIT_FAILURE; - } - - if (n_read != n_required) { - printf ("Warning: read %zd bytes, but expected %zu\n", - n_read, n_required); - } - - rec_idx = flt_data[FDTAIL_INDEX]; - end_rec = flt_data[FDHEAD_INDEX]; - - printf ("Starting replay: head [%d] tail [%d]\n", - flt_data[FDHEAD_INDEX], - flt_data[FDTAIL_INDEX]); + qb_log_init("fplay", LOG_USER, LOG_INFO); - for (;;) { - rec_idx = logsys_rec_get (rec_idx); - logsys_rec_print (g_record); - if (rec_idx == end_rec) { - break; - } - record_count += 1; - } + qb_log_filter_ctl(QB_LOG_STDERR, QB_LOG_FILTER_ADD, + QB_LOG_FILTER_FILE, __FILE__, LOG_INFO); + qb_log_format_set(QB_LOG_STDERR, "%f:%l [%p] %b"); + qb_log_ctl(QB_LOG_SYSLOG, QB_LOG_CONF_ENABLED, QB_FALSE); + qb_log_ctl(QB_LOG_STDERR, QB_LOG_CONF_ENABLED, QB_TRUE); - printf ("Finishing replay: records found [%d]\n", record_count); - return (0); + qb_log_blackbox_print_from_file(LOCALSTATEDIR "/lib/corosync/fdata"); + return 0; } diff --git a/tools/corosync-notifyd.c b/tools/corosync-notifyd.c index dd8ee4b5..72c58273 100644 --- a/tools/corosync-notifyd.c +++ b/tools/corosync-notifyd.c @@ -1,1109 +1,1111 @@ /* * Copyright (c) 2011 Red Hat * * All rights reserved. * * Author: Angus Salkeld * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#include + +#include +#include +#include #include #include #include #include #include #include /* * generic declarations */ enum { CS_NTF_LOG, CS_NTF_STDOUT, CS_NTF_SNMP, CS_NTF_DBUS, CS_NTF_FG, CS_NTF_MAX, }; static int conf[CS_NTF_MAX]; static int32_t _cs_is_quorate = 0; typedef void (*node_membership_fn_t)(char *nodename, uint32_t nodeid, char *state, char* ip); typedef void (*node_quorum_fn_t)(char *nodename, uint32_t nodeid, const char *state); typedef void (*application_connection_fn_t)(char *nodename, uint32_t nodeid, char *app_name, const char *state); struct notify_callbacks { node_membership_fn_t node_membership_fn; node_quorum_fn_t node_quorum_fn; application_connection_fn_t application_connection_fn; }; #define MAX_NOTIFIERS 5 static int num_notifiers = 0; static struct notify_callbacks notifiers[MAX_NOTIFIERS]; static uint32_t local_nodeid = 0; static char local_nodename[CS_MAX_NAME_LENGTH]; static qb_loop_t *main_loop; static quorum_handle_t quorum_handle; static void _cs_node_membership_event(char *nodename, uint32_t nodeid, char *state, char* ip); static void _cs_node_quorum_event(const char *state); static void _cs_application_connection_event(char *app_name, const char *state); #ifdef HAVE_DBUS #include /* * dbus */ #define DBUS_CS_NAME "org.corosync" #define DBUS_CS_IFACE "org.corosync" #define DBUS_CS_PATH "/org/corosync" static DBusConnection *db = NULL; static char _err[512]; static int err_set = 0; static void _cs_dbus_init(void); #endif /* HAVE_DBUS */ #ifdef ENABLE_SNMP #include #include #include #include #include #include #include enum snmp_node_status { SNMP_NODE_STATUS_UNKNOWN = 0, SNMP_NODE_STATUS_JOINED = 1, SNMP_NODE_STATUS_LEFT = 2 }; #define SNMP_OID_COROSYNC "1.3.6.1.4.1.35488" #define SNMP_OID_OBJECT_ROOT SNMP_OID_COROSYNC ".1" #define SNMP_OID_OBJECT_NODE_NAME SNMP_OID_OBJECT_ROOT ".1" #define SNMP_OID_OBJECT_NODE_ID SNMP_OID_OBJECT_ROOT ".2" #define SNMP_OID_OBJECT_NODE_STATUS SNMP_OID_OBJECT_ROOT ".3" #define SNMP_OID_OBJECT_NODE_ADDR SNMP_OID_OBJECT_ROOT ".4" #define SNMP_OID_OBJECT_RINGSEQ SNMP_OID_OBJECT_ROOT ".20" #define SNMP_OID_OBJECT_QUORUM SNMP_OID_OBJECT_ROOT ".21" #define SNMP_OID_OBJECT_APP_NAME SNMP_OID_OBJECT_ROOT ".40" #define SNMP_OID_OBJECT_APP_STATUS SNMP_OID_OBJECT_ROOT ".41" #define SNMP_OID_TRAPS_ROOT SNMP_OID_COROSYNC ".0" #define SNMP_OID_TRAPS_NODE SNMP_OID_TRAPS_ROOT ".1" #define SNMP_OID_TRAPS_QUORUM SNMP_OID_TRAPS_ROOT ".2" #define SNMP_OID_TRAPS_APP SNMP_OID_TRAPS_ROOT ".3" #define CS_TIMESTAMP_STR_LEN 20 static const char *local_host = "localhost"; #endif /* ENABLE_SNMP */ static char snmp_manager_buf[CS_MAX_NAME_LENGTH]; static char *snmp_manager = NULL; /* * confdb */ #define SEPERATOR_STR "." static confdb_handle_t confdb_handle; static void _cs_confdb_key_changed(confdb_handle_t handle, confdb_change_type_t change_type, hdb_handle_t parent_object_handle, hdb_handle_t object_handle, const void *object_name, size_t object_name_len, const void *key_name, size_t key_name_len, const void *key_value, size_t key_value_len); static void _cs_confdb_object_created(confdb_handle_t handle, hdb_handle_t parent_object_handle, hdb_handle_t object_handle, const void *name_pt, size_t name_len); static void _cs_confdb_object_deleted(confdb_handle_t handle, hdb_handle_t parent_object_handle, const void *name_pt, size_t name_len); static confdb_callbacks_t callbacks = { .confdb_key_change_notify_fn = _cs_confdb_key_changed, .confdb_object_create_change_notify_fn = _cs_confdb_object_created, .confdb_object_delete_change_notify_fn = _cs_confdb_object_deleted, }; static int32_t _cs_ip_to_hostname(char* ip, char* name_out) { struct sockaddr_in sa; int rc; if (strchr(ip, ':') == NULL) { sa.sin_family = AF_INET; } else { sa.sin_family = AF_INET6; } rc = inet_pton(sa.sin_family, ip, &sa.sin_addr); if (rc == 0) { return -EINVAL; } rc = getnameinfo((struct sockaddr*)&sa, sizeof(sa), name_out, CS_MAX_NAME_LENGTH, NULL, 0, 0); if (rc != 0) { - syslog (LOG_ERR, "error looking up %s : %s\n", ip, gai_strerror(rc)); + qb_log(LOG_ERR, 0, "error looking up %s : %s", ip, gai_strerror(rc)); return -EINVAL; } return 0; } static void _cs_confdb_key_changed(confdb_handle_t handle, confdb_change_type_t change_type, hdb_handle_t parent_object_handle, hdb_handle_t object_handle, const void *object_name_pt, size_t object_name_len, const void *key_name_pt, size_t key_name_len, const void *key_value_pt, size_t key_value_len) { char parent_name[CS_MAX_NAME_LENGTH]; size_t len = 0; hdb_handle_t real_parent_object_handle; cs_error_t rc = CS_OK; char nodename[CS_MAX_NAME_LENGTH]; char nodeid_str[CS_MAX_NAME_LENGTH]; uint32_t nodeid; char status[CS_MAX_NAME_LENGTH]; char ip[CS_MAX_NAME_LENGTH]; size_t ip_len; confdb_value_types_t type; char* open_bracket = NULL; char* close_bracket = NULL; rc = confdb_object_parent_get (handle, parent_object_handle, &real_parent_object_handle); assert(rc == CS_OK); rc = confdb_object_name_get (handle, real_parent_object_handle, parent_name, &len); parent_name[len] = '\0'; assert(rc == CS_OK); if (strcmp(parent_name, "members") == 0) { if (strncmp(key_name_pt, "status", strlen("status")) == 0) { memcpy(nodeid_str, object_name_pt, object_name_len); nodeid_str[object_name_len] = '\0'; nodeid = atoi(nodeid_str); memcpy(status, key_value_pt, key_value_len); status[key_value_len] = '\0'; rc = confdb_key_get_typed(handle, parent_object_handle, "ip", ip, &ip_len, &type); assert(rc == CS_OK); ip[ip_len-1] = '\0'; /* * We want the ip out of: "r(0) ip(192.168.100.92)" */ open_bracket = strrchr(ip, '('); open_bracket++; close_bracket = strrchr(open_bracket, ')'); *close_bracket = '\0'; _cs_ip_to_hostname(open_bracket, nodename); _cs_node_membership_event(nodename, nodeid, status, open_bracket); } } } static void _cs_confdb_object_created(confdb_handle_t handle, hdb_handle_t parent_object_handle, hdb_handle_t object_handle, const void *name_pt, size_t name_len) { char parent_name[CS_MAX_NAME_LENGTH]; size_t len = 0; char obj_name[CS_MAX_NAME_LENGTH]; cs_error_t rc = CS_OK; memcpy(obj_name, name_pt, name_len); obj_name[name_len] = '\0'; rc = confdb_object_name_get (handle, object_handle, parent_name, &len); parent_name[len] = '\0'; if (rc != CS_OK) { return; } if (strcmp(parent_name, "connections") == 0) { _cs_application_connection_event(obj_name, "connected"); } } static void _cs_confdb_object_deleted(confdb_handle_t handle, hdb_handle_t parent_object_handle, const void *name_pt, size_t name_len) { char obj_name[CS_MAX_NAME_LENGTH]; char parent_name[CS_MAX_NAME_LENGTH]; size_t len = 0; cs_error_t rc; memcpy(obj_name, name_pt, name_len); obj_name[name_len] = '\0'; rc = confdb_object_name_get (handle, parent_object_handle, parent_name, &len); parent_name[len] = '\0'; assert(rc == CS_OK); if (strcmp(parent_name, "connections") == 0) { _cs_application_connection_event(obj_name, "disconnected"); } } static cs_error_t _cs_confdb_find_object (confdb_handle_t handle, const char * name_pt, hdb_handle_t * out_handle) { char * obj_name_pt; char * save_pt; hdb_handle_t obj_handle; confdb_handle_t parent_object_handle = OBJECT_PARENT_HANDLE; char tmp_name[CS_MAX_NAME_LENGTH]; cs_error_t res = CS_OK; strncpy (tmp_name, name_pt, sizeof (tmp_name)); tmp_name[sizeof (tmp_name) - 1] = '\0'; obj_name_pt = strtok_r(tmp_name, SEPERATOR_STR, &save_pt); while (obj_name_pt != NULL) { res = confdb_object_find_start(handle, parent_object_handle); if (res != CS_OK) { - syslog (LOG_ERR, "Could not start object_find %d\n", res); + qb_log(LOG_ERR, 0, "Could not start object_find %d", res); exit (EXIT_FAILURE); } res = confdb_object_find(handle, parent_object_handle, obj_name_pt, strlen (obj_name_pt), &obj_handle); if (res != CS_OK) { return res; } parent_object_handle = obj_handle; obj_name_pt = strtok_r (NULL, SEPERATOR_STR, &save_pt); } *out_handle = parent_object_handle; return res; } static int _cs_confdb_dispatch(int fd, int revents, void *data) { confdb_dispatch(confdb_handle, CS_DISPATCH_ONE); return 0; } static void _cs_quorum_notification(quorum_handle_t handle, uint32_t quorate, uint64_t ring_seq, uint32_t view_list_entries, uint32_t *view_list) { if (_cs_is_quorate == quorate) { return; } _cs_is_quorate = quorate; if (quorate) { _cs_node_quorum_event("quorate"); } else { _cs_node_quorum_event("not quorate"); } } static int _cs_quorum_dispatch(int fd, int revents, void *data) { quorum_dispatch(quorum_handle, CS_DISPATCH_ONE); return 0; } static void _cs_quorum_init(void) { cs_error_t rc; int fd; quorum_callbacks_t quorum_callbacks = { .quorum_notify_fn = _cs_quorum_notification, }; rc = quorum_initialize (&quorum_handle, &quorum_callbacks); if (rc != CS_OK) { - syslog(LOG_ERR, "Could not connect to corosync(quorum)"); + qb_log(LOG_ERR, "Could not connect to corosync(quorum)"); return; } quorum_fd_get(quorum_handle, &fd); qb_loop_poll_add(main_loop, QB_LOOP_MED, fd, POLLIN|POLLNVAL, NULL, _cs_quorum_dispatch); quorum_trackstart(quorum_handle, CS_TRACK_CHANGES); } static void _cs_quorum_finalize(void) { quorum_finalize (quorum_handle); } #ifdef HAVE_DBUS /* * dbus notifications */ static void _cs_dbus_auto_flush(void) { dbus_connection_ref(db); dbus_connection_read_write(db, 500); dbus_connection_unref(db); } static void _cs_dbus_release(void) { DBusError err; if (!db) return; dbus_error_init(&err); dbus_bus_release_name(db, DBUS_CS_NAME, &err); dbus_error_free(&err); dbus_connection_unref(db); db = NULL; } static void _cs_dbus_node_quorum_event(char *nodename, uint32_t nodeid, const char *state) { DBusMessage *msg = NULL; int ret = -1; if (err_set) { - syslog (LOG_ERR, "%s\n", _err); + qb_log(LOG_ERR, "%s", _err); err_set = 0; } if (!db) { goto out_free; } if (dbus_connection_get_is_connected(db) != TRUE) { err_set = 1; snprintf(_err, sizeof(_err), "DBus connection lost"); _cs_dbus_release(); goto out_unlock; } _cs_dbus_auto_flush(); if (!(msg = dbus_message_new_signal(DBUS_CS_PATH, DBUS_CS_IFACE, "QuorumStateChange"))) { - syslog (LOG_ERR, "%s(%d) error\n", __func__, __LINE__); + qb_log(LOG_ERR, "error creating dbus signal"); goto out_unlock; } if (!dbus_message_append_args(msg, DBUS_TYPE_STRING, &nodename, DBUS_TYPE_UINT32, &nodeid, DBUS_TYPE_STRING, &state, DBUS_TYPE_INVALID)) { - syslog (LOG_ERR, "%s(%d) error\n", __func__, __LINE__); + qb_log(LOG_ERR, "error adding args to quorum signal"); goto out_unlock; } dbus_connection_send(db, msg, NULL); ret = 0; out_unlock: - if (ret == -1) { - syslog (LOG_ERR, "%s() error\n", __func__); - } - if (msg) + if (msg) { dbus_message_unref(msg); + } out_free: return; } static void _cs_dbus_node_membership_event(char *nodename, uint32_t nodeid, char *state, char* ip) { DBusMessage *msg = NULL; int ret = -1; if (err_set) { - syslog (LOG_ERR, "%s\n", _err); + qb_log(LOG_ERR, "%s", _err); err_set = 0; } if (!db) { goto out_free; } if (dbus_connection_get_is_connected(db) != TRUE) { err_set = 1; snprintf(_err, sizeof(_err), "DBus connection lost"); _cs_dbus_release(); goto out_unlock; } _cs_dbus_auto_flush(); if (!(msg = dbus_message_new_signal(DBUS_CS_PATH, DBUS_CS_IFACE, "NodeStateChange"))) { - syslog (LOG_ERR, "%s(%d) error\n", __func__, __LINE__); + qb_log(LOG_ERR, "error creating NodeStateChange signal"); goto out_unlock; } if (!dbus_message_append_args(msg, DBUS_TYPE_STRING, &nodename, DBUS_TYPE_UINT32, &nodeid, DBUS_TYPE_STRING, &ip, DBUS_TYPE_STRING, &state, DBUS_TYPE_INVALID)) { - syslog (LOG_ERR, "%s(%d) error\n", __func__, __LINE__); + qb_log(LOG_ERR, "error adding args to NodeStateChange signal"); goto out_unlock; } dbus_connection_send(db, msg, NULL); ret = 0; out_unlock: - if (ret == -1) { - syslog (LOG_ERR, "%s() error\n", __func__); - } - if (msg) + if (msg) { dbus_message_unref(msg); + } out_free: return; } static void _cs_dbus_application_connection_event(char *nodename, uint32_t nodeid, char *app_name, const char *state) { DBusMessage *msg = NULL; int ret = -1; if (err_set) { - syslog (LOG_ERR, "%s\n", _err); + qb_log(LOG_ERR, "%s", _err); err_set = 0; } if (!db) { goto out_free; } if (dbus_connection_get_is_connected(db) != TRUE) { err_set = 1; snprintf(_err, sizeof(_err), "DBus connection lost"); _cs_dbus_release(); goto out_unlock; } _cs_dbus_auto_flush(); if (!(msg = dbus_message_new_signal(DBUS_CS_PATH, DBUS_CS_IFACE, "ConnectionStateChange"))) { - syslog (LOG_ERR, "%s(%d) error\n", __func__, __LINE__); + qb_log(LOG_ERR, "error creating ConnectionStateChange signal"); goto out_unlock; } if (!dbus_message_append_args(msg, DBUS_TYPE_STRING, &nodename, DBUS_TYPE_UINT32, &nodeid, DBUS_TYPE_STRING, &app_name, DBUS_TYPE_STRING, &state, DBUS_TYPE_INVALID)) { - syslog (LOG_ERR, "%s(%d) error\n", __func__, __LINE__); + qb_log(LOG_ERR, "error adding args to ConnectionStateChange signal"); goto out_unlock; } dbus_connection_send(db, msg, NULL); ret = 0; out_unlock: - if (msg) + if (msg) { dbus_message_unref(msg); + } out_free: return; } static void _cs_dbus_init(void) { DBusConnection *dbc = NULL; DBusError err; dbus_error_init(&err); dbc = dbus_bus_get(DBUS_BUS_SYSTEM, &err); if (!dbc) { snprintf(_err, sizeof(_err), "dbus_bus_get: %s", err.message); err_set = 1; dbus_error_free(&err); return; } dbus_connection_set_exit_on_disconnect(dbc, FALSE); db = dbc; notifiers[num_notifiers].node_membership_fn = _cs_dbus_node_membership_event; notifiers[num_notifiers].node_quorum_fn = _cs_dbus_node_quorum_event; notifiers[num_notifiers].application_connection_fn = _cs_dbus_application_connection_event; num_notifiers++; } #endif /* HAVE_DBUS */ #ifdef ENABLE_SNMP static netsnmp_session *snmp_init (const char *target) { static netsnmp_session *session = NULL; #ifndef NETSNMPV54 char default_port[128]; snprintf (default_port, sizeof (default_port), "%s:162", target); #endif if (session) { return (session); } if (target == NULL) { return NULL; } session = malloc (sizeof (netsnmp_session)); snmp_sess_init (session); session->version = SNMP_VERSION_2c; session->callback = NULL; session->callback_magic = NULL; session = snmp_add(session, #ifdef NETSNMPV54 netsnmp_transport_open_client ("snmptrap", target), #else netsnmp_tdomain_transport (default_port, 0, "udp"), #endif NULL, NULL); if (session == NULL) { - syslog(LOG_ERR, "Could not create snmp transport"); + qb_log(LOG_ERR, 0, "Could not create snmp transport"); } return (session); } static inline void add_field ( netsnmp_pdu *trap_pdu, u_char asn_type, const char *prefix, void *value, size_t value_size) { oid _oid[MAX_OID_LEN]; size_t _oid_len = MAX_OID_LEN; if (snmp_parse_oid(prefix, _oid, &_oid_len)) { snmp_pdu_add_variable (trap_pdu, _oid, _oid_len, asn_type, (u_char *) value, value_size); } } static void _cs_snmp_node_membership_event(char *nodename, uint32_t nodeid, char *state, char* ip) { int ret; char csysuptime[CS_TIMESTAMP_STR_LEN]; static oid snmptrap_oid[] = { 1,3,6,1,6,3,1,1,4,1,0 }; static oid sysuptime_oid[] = { 1,3,6,1,2,1,1,3,0 }; time_t now = time (NULL); netsnmp_pdu *trap_pdu; netsnmp_session *session = snmp_init (snmp_manager); if (session == NULL) { - syslog (LOG_NOTICE, "Failed to init SNMP session.\n"); + qb_log(LOG_NOTICE, "Failed to init SNMP session."); return ; } trap_pdu = snmp_pdu_create (SNMP_MSG_TRAP2); if (!trap_pdu) { - syslog (LOG_NOTICE, "Failed to create SNMP notification.\n"); + qb_log(LOG_NOTICE, "Failed to create SNMP notification."); return ; } /* send uptime */ snprintf (csysuptime, CS_TIMESTAMP_STR_LEN, "%ld", now); snmp_add_var (trap_pdu, sysuptime_oid, sizeof (sysuptime_oid) / sizeof (oid), 't', csysuptime); snmp_add_var (trap_pdu, snmptrap_oid, sizeof (snmptrap_oid) / sizeof (oid), 'o', SNMP_OID_TRAPS_NODE); /* Add extries to the trap */ add_field (trap_pdu, ASN_OCTET_STR, SNMP_OID_OBJECT_NODE_NAME, (void*)nodename, strlen (nodename)); add_field (trap_pdu, ASN_INTEGER, SNMP_OID_OBJECT_NODE_ID, (void*)&nodeid, sizeof (nodeid)); add_field (trap_pdu, ASN_OCTET_STR, SNMP_OID_OBJECT_NODE_ADDR, (void*)ip, strlen (ip)); add_field (trap_pdu, ASN_OCTET_STR, SNMP_OID_OBJECT_NODE_STATUS, (void*)state, strlen (state)); /* Send and cleanup */ ret = snmp_send (session, trap_pdu); if (ret == 0) { /* error */ - syslog (LOG_ERR, "Could not send SNMP trap"); + qb_log(LOG_ERR, "Could not send SNMP trap"); snmp_free_pdu (trap_pdu); } } static void _cs_snmp_node_quorum_event(char *nodename, uint32_t nodeid, const char *state) { int ret; char csysuptime[20]; static oid snmptrap_oid[] = { 1,3,6,1,6,3,1,1,4,1,0 }; static oid sysuptime_oid[] = { 1,3,6,1,2,1,1,3,0 }; time_t now = time (NULL); netsnmp_pdu *trap_pdu; netsnmp_session *session = snmp_init (snmp_manager); if (session == NULL) { - syslog (LOG_NOTICE, "Failed to init SNMP session.\n"); + qb_log(LOG_NOTICE, "Failed to init SNMP session."); return ; } trap_pdu = snmp_pdu_create (SNMP_MSG_TRAP2); if (!trap_pdu) { - syslog (LOG_NOTICE, "Failed to create SNMP notification.\n"); + qb_log(LOG_NOTICE, "Failed to create SNMP notification."); return ; } /* send uptime */ sprintf (csysuptime, "%ld", now); snmp_add_var (trap_pdu, sysuptime_oid, sizeof (sysuptime_oid) / sizeof (oid), 't', csysuptime); snmp_add_var (trap_pdu, snmptrap_oid, sizeof (snmptrap_oid) / sizeof (oid), 'o', SNMP_OID_TRAPS_NODE); /* Add extries to the trap */ add_field (trap_pdu, ASN_OCTET_STR, SNMP_OID_OBJECT_NODE_NAME, (void*)nodename, strlen (nodename)); add_field (trap_pdu, ASN_INTEGER, SNMP_OID_OBJECT_NODE_ID, (void*)&nodeid, sizeof (nodeid)); add_field (trap_pdu, ASN_OCTET_STR, SNMP_OID_OBJECT_QUORUM, (void*)state, strlen (state)); /* Send and cleanup */ ret = snmp_send (session, trap_pdu); if (ret == 0) { /* error */ - syslog (LOG_ERR, "Could not send SNMP trap"); + qb_log(LOG_ERR, "Could not send SNMP trap"); snmp_free_pdu (trap_pdu); } } static void _cs_snmp_init(void) { if (snmp_manager == NULL) { snmp_manager = (char*)local_host; } notifiers[num_notifiers].node_membership_fn = _cs_snmp_node_membership_event; notifiers[num_notifiers].node_quorum_fn = _cs_snmp_node_quorum_event; notifiers[num_notifiers].application_connection_fn = NULL; num_notifiers++; } #endif /* ENABLE_SNMP */ static void _cs_syslog_node_membership_event(char *nodename, uint32_t nodeid, char *state, char* ip) { - syslog (LOG_NOTICE, "%s[%d] ip:%s %s\n", nodename, nodeid, ip, state); + qb_log(LOG_NOTICE, "%s[%d] ip:%s %s", nodename, nodeid, ip, state); } static void _cs_syslog_node_quorum_event(char *nodename, uint32_t nodeid, const char *state) { if (strcmp(state, "quorate") == 0) { - syslog (LOG_NOTICE, "%s[%d] is now %s\n", nodename, nodeid, state); + qb_log(LOG_NOTICE, "%s[%d] is now %s", nodename, nodeid, state); } else { - syslog (LOG_NOTICE, "%s[%d] has lost quorum\n", nodename, nodeid); + qb_log(LOG_NOTICE, "%s[%d] has lost quorum", nodename, nodeid); } } static void _cs_syslog_application_connection_event(char *nodename, uint32_t nodeid, char* app_name, const char *state) { if (strcmp(state, "connected") == 0) { - syslog (LOG_ERR, "%s[%d] %s is now %s to corosync\n", nodename, nodeid, app_name, state); + qb_log(LOG_NOTICE, "%s[%d] %s is now %s to corosync", nodename, nodeid, app_name, state); } else { - syslog (LOG_ERR, "%s[%d] %s is now %s from corosync\n", nodename, nodeid, app_name, state); + qb_log(LOG_NOTICE, "%s[%d] %s is now %s from corosync", nodename, nodeid, app_name, state); } } static void _cs_node_membership_event(char *nodename, uint32_t nodeid, char *state, char* ip) { int i; for (i = 0; i < num_notifiers; i++) { if (notifiers[i].node_membership_fn) { notifiers[i].node_membership_fn(nodename, nodeid, state, ip); } } } static void _cs_local_node_info_get(char **nodename, uint32_t *nodeid) { cs_error_t rc; corosync_cfg_handle_t cfg_handle; if (local_nodeid == 0) { rc = corosync_cfg_initialize(&cfg_handle, NULL); if (rc != CS_OK) { syslog (LOG_ERR, "Failed to initialize the cfg API. Error %d\n", rc); exit (EXIT_FAILURE); } rc = corosync_cfg_local_get (cfg_handle, &local_nodeid); corosync_cfg_finalize(cfg_handle); if (rc != CS_OK) { local_nodeid = 0; strncpy(local_nodename, "localhost", sizeof (local_nodename)); local_nodename[sizeof (local_nodename) - 1] = '\0'; } else { gethostname(local_nodename, CS_MAX_NAME_LENGTH); } } *nodeid = local_nodeid; *nodename = local_nodename; } static void _cs_node_quorum_event(const char *state) { int i; char *nodename; uint32_t nodeid; _cs_local_node_info_get(&nodename, &nodeid); for (i = 0; i < num_notifiers; i++) { if (notifiers[i].node_quorum_fn) { notifiers[i].node_quorum_fn(nodename, nodeid, state); } } } static void _cs_application_connection_event(char *app_name, const char *state) { int i; char *nodename; uint32_t nodeid; _cs_local_node_info_get(&nodename, &nodeid); for (i = 0; i < num_notifiers; i++) { if (notifiers[i].application_connection_fn) { notifiers[i].application_connection_fn(nodename, nodeid, app_name, state); } } } static int32_t sig_exit_handler(int32_t num, void *data) { qb_loop_stop(main_loop); return 0; } static void _cs_confdb_init(void) { hdb_handle_t obj_handle; cs_error_t rc; int conf_fd = 0; rc = confdb_initialize (&confdb_handle, &callbacks); if (rc != CS_OK) { - syslog (LOG_ERR, "Failed to initialize the objdb API. Error %d\n", rc); + qb_log(LOG_ERR, "Failed to initialize the objdb API. Error %d", rc); exit (EXIT_FAILURE); } confdb_fd_get(confdb_handle, &conf_fd); qb_loop_poll_add(main_loop, QB_LOOP_MED, conf_fd, POLLIN|POLLNVAL, NULL, _cs_confdb_dispatch); rc = _cs_confdb_find_object (confdb_handle, "runtime.connections.", &obj_handle); if (rc != CS_OK) { - syslog (LOG_ERR, - "Failed to find the connections object. Error %d\n", rc); + qb_log(LOG_ERR, + "Failed to find the connections object. Error %d", rc); exit (EXIT_FAILURE); } rc = confdb_track_changes (confdb_handle, obj_handle, CONFDB_TRACK_DEPTH_ONE); if (rc != CS_OK) { - syslog (LOG_ERR, - "Failed to track the connections object. Error %d\n", rc); + qb_log(LOG_ERR, + "Failed to track the connections object. Error %d", rc); exit (EXIT_FAILURE); } rc = _cs_confdb_find_object(confdb_handle, "runtime.totem.pg.mrp.srp.members.", &obj_handle); if (rc != CS_OK) { - syslog (LOG_ERR, "Failed to find the object. Error %d\n", rc); + qb_log(LOG_ERR, "Failed to find the object. Error %d", rc); exit (EXIT_FAILURE); } rc = confdb_track_changes(confdb_handle, obj_handle, CONFDB_TRACK_DEPTH_RECURSIVE); if (rc != CS_OK) { - syslog (LOG_ERR, - "Failed to track the object. Error %d\n", rc); + qb_log(LOG_ERR, + "Failed to track the object. Error %d", rc); exit (EXIT_FAILURE); } } static void _cs_confdb_finalize(void) { confdb_stop_track_changes (confdb_handle); confdb_finalize (confdb_handle); } static void _cs_check_config(void) { - if (conf[CS_NTF_LOG] == 0 && - conf[CS_NTF_STDOUT] == 0 && - conf[CS_NTF_SNMP] == 0 && - conf[CS_NTF_DBUS] == 0) { - syslog(LOG_ERR, "no event type enabled, see corosync-notifyd -h, exiting."); + if (conf[CS_NTF_LOG] == QB_FALSE && + conf[CS_NTF_STDOUT] == QB_FALSE && + conf[CS_NTF_SNMP] == QB_FALSE && + conf[CS_NTF_DBUS] == QB_FALSE) { + qb_log(LOG_ERR, "no event type enabled, see corosync-notifyd -h, exiting."); exit(EXIT_FAILURE); } #ifndef ENABLE_SNMP if (conf[CS_NTF_SNMP]) { - syslog(LOG_ERR, "Not compiled with SNMP support enabled, exiting."); + qb_log(LOG_ERR, "Not compiled with SNMP support enabled, exiting."); exit(EXIT_FAILURE); } #endif #ifndef HAVE_DBUS if (conf[CS_NTF_DBUS]) { - syslog(LOG_ERR, "Not compiled with DBus support enabled, exiting."); + qb_log(LOG_ERR, "Not compiled with DBus support enabled, exiting."); exit(EXIT_FAILURE); } #endif if (conf[CS_NTF_STDOUT] && !conf[CS_NTF_FG]) { - syslog(LOG_ERR, "configured to print to stdout and run in the background, exiting"); + qb_log(LOG_ERR, "configured to print to stdout and run in the background, exiting"); exit(EXIT_FAILURE); } if (conf[CS_NTF_SNMP] && conf[CS_NTF_DBUS]) { - syslog(LOG_ERR, "configured to send snmp traps and dbus signals - are you sure?."); + qb_log(LOG_ERR, "configured to send snmp traps and dbus signals - are you sure?."); } } static void _cs_usage(void) { fprintf(stderr, "usage:\n"\ " -f : Start application in foreground.\n"\ " -l : Log all events.\n"\ " -o : Print events to stdout (turns on -l).\n"\ " -s : Send SNMP traps on all events.\n"\ " -m : SNMP Manager IP address (defaults to localhost).\n"\ " -d : Send DBUS signals on all events.\n"\ " -h : Print this help\n\n"); } int main(int argc, char *argv[]) { int ch; - conf[CS_NTF_FG] = 0; - conf[CS_NTF_LOG] = 0; - conf[CS_NTF_STDOUT] = 0; - conf[CS_NTF_SNMP] = 0; - conf[CS_NTF_DBUS] = 0; + conf[CS_NTF_FG] = QB_FALSE; + conf[CS_NTF_LOG] = QB_FALSE; + conf[CS_NTF_STDOUT] = QB_FALSE; + conf[CS_NTF_SNMP] = QB_FALSE; + conf[CS_NTF_DBUS] = QB_FALSE; while ((ch = getopt (argc, argv, "floshdm:")) != EOF) { switch (ch) { case 'f': - conf[CS_NTF_FG] = 1; + conf[CS_NTF_FG] = QB_TRUE; break; case 'l': - conf[CS_NTF_LOG] = 1; + conf[CS_NTF_LOG] = QB_TRUE; break; case 'm': - conf[CS_NTF_SNMP] = 1; + conf[CS_NTF_SNMP] = QB_TRUE; strncpy(snmp_manager_buf, optarg, sizeof (snmp_manager_buf)); snmp_manager_buf[sizeof (snmp_manager_buf) - 1] = '\0'; snmp_manager = snmp_manager_buf; break; case 'o': - conf[CS_NTF_LOG] = 1; - conf[CS_NTF_STDOUT] = 1; + conf[CS_NTF_LOG] = QB_TRUE; + conf[CS_NTF_STDOUT] = QB_TRUE; break; case 's': - conf[CS_NTF_SNMP] = 1; + conf[CS_NTF_SNMP] = QB_TRUE; break; case 'd': - conf[CS_NTF_DBUS] = 1; + conf[CS_NTF_DBUS] = QB_TRUE; break; case 'h': default: _cs_usage(); return EXIT_FAILURE; } } + qb_log_init("notifyd", LOG_DAEMON, LOG_INFO); + if (conf[CS_NTF_STDOUT]) { - openlog(NULL, LOG_PID|LOG_PERROR, LOG_DAEMON); - } else { - openlog(NULL, LOG_PID, LOG_DAEMON); + qb_log_filter_ctl(QB_LOG_STDERR, QB_LOG_FILTER_ADD, + QB_LOG_FILTER_FILE, "*", LOG_DEBUG); + qb_log_ctl(QB_LOG_STDERR, QB_LOG_CONF_ENABLED, conf[CS_NTF_STDOUT]); } _cs_check_config(); if (!conf[CS_NTF_FG]) { if (daemon(0, 0) < 0) { perror("daemon() failed"); return EXIT_FAILURE; } } num_notifiers = 0; if (conf[CS_NTF_LOG]) { notifiers[num_notifiers].node_membership_fn = _cs_syslog_node_membership_event; notifiers[num_notifiers].node_quorum_fn = _cs_syslog_node_quorum_event; notifiers[num_notifiers].application_connection_fn = _cs_syslog_application_connection_event; num_notifiers++; } main_loop = qb_loop_create(); _cs_confdb_init(); _cs_quorum_init(); #ifdef HAVE_DBUS if (conf[CS_NTF_DBUS]) { _cs_dbus_init(); } #endif /* HAVE_DBUS */ #ifdef ENABLE_SNMP if (conf[CS_NTF_SNMP]) { _cs_snmp_init(); } #endif /* ENABLE_SNMP */ qb_loop_signal_add(main_loop, QB_LOOP_HIGH, SIGINT, NULL, sig_exit_handler, NULL); qb_loop_signal_add(main_loop, QB_LOOP_HIGH, SIGQUIT, NULL, sig_exit_handler, NULL); qb_loop_signal_add(main_loop, QB_LOOP_HIGH, SIGTERM, NULL, sig_exit_handler, NULL); qb_loop_run(main_loop); #ifdef HAVE_DBUS if (conf[CS_NTF_DBUS]) { _cs_dbus_release(); } #endif /* HAVE_DBUS */ _cs_quorum_finalize(); _cs_confdb_finalize(); return 0; }