diff --git a/exec/ipc_glue.c b/exec/ipc_glue.c index 0ea4b9f4..60865805 100644 --- a/exec/ipc_glue.c +++ b/exec/ipc_glue.c @@ -1,761 +1,928 @@ /* * Copyright (c) 2010 Red Hat, Inc. * * All rights reserved. * * Author: Angus Salkeld * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of Red Hat, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "mainconfig.h" #include "sync.h" #include "syncv2.h" #include "timer.h" #include "main.h" #include "util.h" #include "apidef.h" #include "service.h" LOGSYS_DECLARE_SUBSYS ("MAIN"); static struct corosync_api_v1 *api = NULL; static int ipc_subsys_id = -1; static int32_t ipc_not_enough_fds_left = 0; static int32_t ipc_fc_is_quorate; /* boolean */ static int32_t ipc_fc_totem_queue_level; /* percentage used */ static int32_t ipc_fc_sync_in_process; /* boolean */ static qb_handle_t object_connection_handle; struct cs_ipcs_mapper { int32_t id; qb_ipcs_service_t *inst; char name[256]; }; +struct outq_item { + void *msg; + size_t mlen; + struct list_head list; +}; + static struct cs_ipcs_mapper ipcs_mapper[SERVICE_HANDLER_MAXIMUM_COUNT]; static int32_t cs_ipcs_job_add(enum qb_loop_priority p, void *data, qb_loop_job_dispatch_fn fn); static int32_t cs_ipcs_dispatch_add(enum qb_loop_priority p, int32_t fd, int32_t events, void *data, qb_ipcs_dispatch_fn_t fn); static int32_t cs_ipcs_dispatch_mod(enum qb_loop_priority p, int32_t fd, int32_t events, void *data, qb_ipcs_dispatch_fn_t fn); static int32_t cs_ipcs_dispatch_del(int32_t fd); static struct qb_ipcs_poll_handlers corosync_poll_funcs = { .job_add = cs_ipcs_job_add, .dispatch_add = cs_ipcs_dispatch_add, .dispatch_mod = cs_ipcs_dispatch_mod, .dispatch_del = cs_ipcs_dispatch_del, }; static int32_t cs_ipcs_connection_accept (qb_ipcs_connection_t *c, uid_t euid, gid_t egid); static void cs_ipcs_connection_created(qb_ipcs_connection_t *c); static int32_t cs_ipcs_msg_process(qb_ipcs_connection_t *c, void *data, size_t size); static int32_t cs_ipcs_connection_closed (qb_ipcs_connection_t *c); static void cs_ipcs_connection_destroyed (qb_ipcs_connection_t *c); static struct qb_ipcs_service_handlers corosync_service_funcs = { .connection_accept = cs_ipcs_connection_accept, .connection_created = cs_ipcs_connection_created, .msg_process = cs_ipcs_msg_process, .connection_closed = cs_ipcs_connection_closed, .connection_destroyed = cs_ipcs_connection_destroyed, }; static const char* cs_ipcs_serv_short_name(int32_t service_id) { const char *name; switch (service_id) { case EVS_SERVICE: name = "evs"; break; case CLM_SERVICE: name = "saClm"; break; case AMF_SERVICE: name = "saAmf"; break; case CKPT_SERVICE: name = "saCkpt"; break; case EVT_SERVICE: name = "saEvt"; break; case LCK_SERVICE: name = "saLck"; break; case MSG_SERVICE: name = "saMsg"; break; case CFG_SERVICE: name = "cfg"; break; case CPG_SERVICE: name = "cpg"; break; case CMAN_SERVICE: name = "cman"; break; case PCMK_SERVICE: name = "pacemaker.engine"; break; case CONFDB_SERVICE: name = "confdb"; break; case QUORUM_SERVICE: name = "quorum"; break; case PLOAD_SERVICE: name = "pload"; break; case TMR_SERVICE: name = "saTmr"; break; case VOTEQUORUM_SERVICE: name = "votequorum"; break; case NTF_SERVICE: name = "saNtf"; break; case AMF_V2_SERVICE: name = "saAmfV2"; break; case TST_SV1_SERVICE: name = "tst"; break; case TST_SV2_SERVICE: name = "tst2"; break; case MON_SERVICE: name = "mon"; break; case WD_SERVICE: name = "wd"; break; default: name = NULL; break; } return name; } int32_t cs_ipcs_service_destroy(int32_t service_id) { if (ipcs_mapper[service_id].inst) { qb_ipcs_destroy(ipcs_mapper[service_id].inst); ipcs_mapper[service_id].inst = NULL; } return 0; } static int32_t cs_ipcs_connection_accept (qb_ipcs_connection_t *c, uid_t euid, gid_t egid) { struct list_head *iter; int32_t service = qb_ipcs_service_id_get(c); if (ais_service[service] == NULL || ais_service_exiting[service] || ipcs_mapper[service].inst == NULL) { return -ENOSYS; } if (ipc_not_enough_fds_left) { return -EMFILE; } if (euid == 0 || egid == 0) { return 0; } for (iter = uidgid_list_head.next; iter != &uidgid_list_head; iter = iter->next) { struct uidgid_item *ugi = qb_list_entry (iter, struct uidgid_item, list); if (euid == ugi->uid || egid == ugi->gid) return 0; } log_printf(LOGSYS_LEVEL_ERROR, "Denied connection attempt from %d:%d", euid, egid); return -EACCES; } static char * pid_to_name (pid_t pid, char *out_name, size_t name_len) { char *name; char *rest; FILE *fp; char fname[32]; char buf[256]; snprintf (fname, 32, "/proc/%d/stat", pid); fp = fopen (fname, "r"); if (!fp) { return NULL; } if (fgets (buf, sizeof (buf), fp) == NULL) { fclose (fp); return NULL; } fclose (fp); name = strrchr (buf, '('); if (!name) { return NULL; } /* move past the bracket */ name++; rest = strrchr (buf, ')'); if (rest == NULL || rest[1] != ' ') { return NULL; } *rest = '\0'; /* move past the NULL and space */ rest += 2; /* copy the name */ strncpy (out_name, name, name_len); out_name[name_len - 1] = '\0'; return out_name; } struct cs_ipcs_conn_context { qb_handle_t stats_handle; + struct list_head outq_head; + int32_t queuing; + uint32_t queued; + uint64_t invalid_request; + uint64_t overload; + uint32_t sent; char data[1]; }; static void cs_ipcs_connection_created(qb_ipcs_connection_t *c) { int32_t service = 0; uint32_t zero_32 = 0; uint64_t zero_64 = 0; unsigned int key_incr_dummy; qb_handle_t object_handle; struct cs_ipcs_conn_context *context; char conn_name[42]; char proc_name[32]; struct qb_ipcs_connection_stats stats; int32_t size = sizeof(struct cs_ipcs_conn_context); log_printf(LOG_INFO, "%s() new connection", __func__); service = qb_ipcs_service_id_get(c); size += ais_service[service]->private_data_size; context = calloc(1, size); + + list_init(&context->outq_head); + context->queuing = QB_FALSE; + context->queued = 0; + context->sent = 0; + qb_ipcs_context_set(c, context); ais_service[service]->lib_init_fn(c); api->object_key_increment (object_connection_handle, "active", strlen("active"), &key_incr_dummy); qb_ipcs_connection_stats_get(c, &stats, QB_FALSE); if (stats.client_pid > 0) { if (pid_to_name (stats.client_pid, proc_name, sizeof(proc_name))) { snprintf (conn_name, sizeof(conn_name), "%s:%d:%p", proc_name, stats.client_pid, c); } else { snprintf (conn_name, sizeof(conn_name), "%d:%p", stats.client_pid, c); } } else { snprintf (conn_name, sizeof(conn_name), "%p", c); } api->object_create (object_connection_handle, &object_handle, conn_name, strlen (conn_name)); context->stats_handle = object_handle; api->object_key_create_typed (object_handle, "service_id", &zero_32, sizeof (zero_32), OBJDB_VALUETYPE_UINT32); api->object_key_create_typed (object_handle, "client_pid", &zero_32, sizeof (zero_32), OBJDB_VALUETYPE_INT32); api->object_key_create_typed (object_handle, "responses", &zero_64, sizeof (zero_64), OBJDB_VALUETYPE_UINT64); api->object_key_create_typed (object_handle, "dispatched", &zero_64, sizeof (zero_64), OBJDB_VALUETYPE_UINT64); api->object_key_create_typed (object_handle, "requests", &zero_64, sizeof (zero_64), OBJDB_VALUETYPE_INT64); api->object_key_create_typed (object_handle, "send_retries", &zero_64, sizeof (zero_64), OBJDB_VALUETYPE_UINT64); api->object_key_create_typed (object_handle, "recv_retries", &zero_64, sizeof (zero_64), OBJDB_VALUETYPE_UINT64); api->object_key_create_typed (object_handle, "flow_control", &zero_32, sizeof (zero_32), OBJDB_VALUETYPE_UINT32); api->object_key_create_typed (object_handle, "flow_control_count", &zero_64, sizeof (zero_64), OBJDB_VALUETYPE_UINT64); + + api->object_key_create_typed (object_handle, + "queue_size", + &zero_32, sizeof (zero_32), + OBJDB_VALUETYPE_UINT32); + + api->object_key_create_typed (object_handle, + "invalid_request", + &zero_64, sizeof (zero_64), + OBJDB_VALUETYPE_UINT64); + + api->object_key_create_typed (object_handle, + "overload", + &zero_64, sizeof (zero_64), + OBJDB_VALUETYPE_UINT64); } void cs_ipc_refcnt_inc(void *conn) { qb_ipcs_connection_ref(conn); } void cs_ipc_refcnt_dec(void *conn) { qb_ipcs_connection_unref(conn); } void *cs_ipcs_private_data_get(void *conn) { struct cs_ipcs_conn_context *cnx; cnx = qb_ipcs_context_get(conn); return &cnx->data[0]; } static void cs_ipcs_connection_destroyed (qb_ipcs_connection_t *c) { - struct cs_ipcs_conn_context *cnx; + struct cs_ipcs_conn_context *context; + struct list_head *list, *list_next; + struct outq_item *outq_item; + log_printf(LOG_INFO, "%s() ", __func__); - cnx = qb_ipcs_context_get(c); - if (cnx) { - free(cnx); + context = qb_ipcs_context_get(c); + if (context) { + for (list = context->outq_head.next; + list != &context->outq_head; list = list_next) { + + list_next = list->next; + outq_item = list_entry (list, struct outq_item, list); + + list_del (list); + free (outq_item->msg); + free (outq_item); + } + free(context); } } static int32_t cs_ipcs_connection_closed (qb_ipcs_connection_t *c) { struct cs_ipcs_conn_context *cnx; unsigned int key_incr_dummy; int32_t res = 0; int32_t service = qb_ipcs_service_id_get(c); log_printf(LOG_INFO, "%s() ", __func__); res = ais_service[service]->lib_exit_fn(c); if (res != 0) { return res; } cnx = qb_ipcs_context_get(c); api->object_destroy (cnx->stats_handle); api->object_key_increment (object_connection_handle, "closed", strlen("closed"), &key_incr_dummy); api->object_key_decrement (object_connection_handle, "active", strlen("active"), &key_incr_dummy); return 0; } int cs_ipcs_response_iov_send (void *conn, const struct iovec *iov, unsigned int iov_len) { int32_t rc = qb_ipcs_response_sendv(conn, iov, iov_len); if (rc >= 0) { return 0; } return rc; } int cs_ipcs_response_send(void *conn, const void *msg, size_t mlen) { int32_t rc = qb_ipcs_response_send(conn, msg, mlen); if (rc >= 0) { return 0; } return rc; } -int cs_ipcs_dispatch_send(void *conn, const void *msg, size_t mlen) +static void outq_flush (void *data) { - int32_t rc = qb_ipcs_event_send(conn, msg, mlen); - if (rc >= 0) { - return 0; + qb_ipcs_connection_t *conn = data; + struct list_head *list, *list_next; + struct outq_item *outq_item; + int32_t rc; + struct cs_ipcs_conn_context *context = qb_ipcs_context_get(conn); + + for (list = context->outq_head.next; + list != &context->outq_head; list = list_next) { + + list_next = list->next; + outq_item = list_entry (list, struct outq_item, list); + + rc = qb_ipcs_event_send(conn, outq_item->msg, outq_item->mlen); + if (rc != outq_item->mlen) { + break; + } + context->sent++; + context->queued--; + + list_del (list); + free (outq_item->msg); + free (outq_item); } - return rc; + if (list_empty (&context->outq_head)) { + context->queuing = QB_FALSE; + log_printf(LOGSYS_LEVEL_INFO, "Q empty, queued:%d sent:%d.", + context->queued, context->sent); + context->queued = 0; + context->sent = 0; + return; + } + qb_loop_job_add(cs_poll_handle_get(), QB_LOOP_HIGH, conn, outq_flush); + if (rc < 0 && rc != -EAGAIN) { + log_printf(LOGSYS_LEVEL_ERROR, "event_send retuned %d!", rc); + } +} + +static void msg_send_or_queue(qb_ipcs_connection_t *conn, const struct iovec *iov, uint32_t iov_len) +{ + int32_t rc = 0; + int32_t i; + int32_t bytes_msg = 0; + struct outq_item *outq_item; + char *write_buf = 0; + struct cs_ipcs_conn_context *context = qb_ipcs_context_get(conn); + + for (i = 0; i < iov_len; i++) { + bytes_msg += iov[i].iov_len; + } + + if (!context->queuing) { + assert(list_empty (&context->outq_head)); + rc = qb_ipcs_event_sendv(conn, iov, iov_len); + if (rc == bytes_msg) { + context->sent++; + return; + } + if (rc == -EAGAIN) { + context->queued = 0; + context->sent = 0; + context->queuing = QB_TRUE; + qb_loop_job_add(cs_poll_handle_get(), QB_LOOP_HIGH, conn, outq_flush); + } else { + log_printf(LOGSYS_LEVEL_ERROR, "event_send retuned %d, expected %d!", rc, bytes_msg); + return; + } + } + outq_item = malloc (sizeof (struct outq_item)); + if (outq_item == NULL) { + qb_ipcs_disconnect(conn); + return; + } + outq_item->msg = malloc (bytes_msg); + if (outq_item->msg == NULL) { + free (outq_item); + qb_ipcs_disconnect(conn); + return; + } + + write_buf = outq_item->msg; + for (i = 0; i < iov_len; i++) { + memcpy (write_buf, iov[i].iov_base, iov[i].iov_len); + write_buf += iov[i].iov_len; + } + outq_item->mlen = bytes_msg; + list_init (&outq_item->list); + list_add_tail (&outq_item->list, &context->outq_head); + context->queued++; +} + +int cs_ipcs_dispatch_send(void *conn, const void *msg, size_t mlen) +{ + struct iovec iov; + iov.iov_base = (void *)msg; + iov.iov_len = mlen; + msg_send_or_queue (conn, &iov, 1); + return 0; } int cs_ipcs_dispatch_iov_send (void *conn, const struct iovec *iov, unsigned int iov_len) { - int32_t rc = qb_ipcs_event_sendv(conn, iov, iov_len); - if (rc >= 0) { - return 0; - } - return rc; + msg_send_or_queue(conn, iov, iov_len); + return 0; } static int32_t cs_ipcs_msg_process(qb_ipcs_connection_t *c, void *data, size_t size) { struct qb_ipc_response_header response; struct qb_ipc_request_header *request_pt = (struct qb_ipc_request_header *)data; int32_t service = qb_ipcs_service_id_get(c); - int32_t send_ok; + int32_t send_ok = 0; + int32_t is_async_call = QB_FALSE; ssize_t res = -1; int sending_allowed_private_data; + struct cs_ipcs_conn_context *cnx; send_ok = corosync_sending_allowed (service, request_pt->id, request_pt, &sending_allowed_private_data); + is_async_call = (service == CPG_SERVICE && request_pt->id == 2); + /* * This happens when the message contains some kind of invalid * parameter, such as an invalid size */ - if (send_ok == -1) { + if (send_ok == -EINVAL) { response.size = sizeof (response); response.id = 0; response.error = CS_ERR_INVALID_PARAM; - log_printf(LOG_INFO, "%s() invalid message! size:%d error:%d", - __func__, response.size, response.error); - qb_ipcs_response_send (c, + + cnx = qb_ipcs_context_get(c); + if (cnx) { + cnx->invalid_request++; + } + + if (is_async_call) { + log_printf(LOGSYS_LEVEL_INFO, "*** %s() invalid message! size:%d error:%d", + __func__, response.size, response.error); + } else { + qb_ipcs_response_send (c, &response, sizeof (response)); + } res = -EINVAL; - } else { - if (send_ok) { - ais_service[service]->lib_engine[request_pt->id].lib_handler_fn(c, request_pt); - res = 0; - } else { + } else if (send_ok < 0) { + cnx = qb_ipcs_context_get(c); + if (cnx) { + cnx->overload++; + } + if (!is_async_call) { /* * Overload, tell library to retry */ response.size = sizeof (response); response.id = 0; response.error = CS_ERR_TRY_AGAIN; qb_ipcs_response_send (c, - &response, - sizeof (response)); - res = -ENOBUFS; + &response, + sizeof (response)); + } else { + log_printf(LOGSYS_LEVEL_WARNING, + "*** %s() (%d:%d - %d) %s!", + __func__, service, request_pt->id, + is_async_call, strerror(-send_ok)); } + res = -ENOBUFS; + } + + if (send_ok) { + ais_service[service]->lib_engine[request_pt->id].lib_handler_fn(c, request_pt); + res = 0; } corosync_sending_allowed_release (&sending_allowed_private_data); return res; } static int32_t cs_ipcs_job_add(enum qb_loop_priority p, void *data, qb_loop_job_dispatch_fn fn) { return qb_loop_job_add(cs_poll_handle_get(), p, data, fn); } static int32_t cs_ipcs_dispatch_add(enum qb_loop_priority p, int32_t fd, int32_t events, void *data, qb_ipcs_dispatch_fn_t fn) { return qb_loop_poll_add(cs_poll_handle_get(), p, fd, events, data, fn); } static int32_t cs_ipcs_dispatch_mod(enum qb_loop_priority p, int32_t fd, int32_t events, void *data, qb_ipcs_dispatch_fn_t fn) { return qb_loop_poll_mod(cs_poll_handle_get(), p, fd, events, data, fn); } static int32_t cs_ipcs_dispatch_del(int32_t fd) { return qb_loop_poll_del(cs_poll_handle_get(), fd); } static void cs_ipcs_low_fds_event(int32_t not_enough, int32_t fds_available) { ipc_not_enough_fds_left = not_enough; if (not_enough) { log_printf(LOGSYS_LEVEL_WARNING, "refusing new connections (fds_available:%d)\n", fds_available); } else { log_printf(LOGSYS_LEVEL_NOTICE, "allowing new connections (fds_available:%d)\n", fds_available); } } int32_t cs_ipcs_q_level_get(void) { return ipc_fc_totem_queue_level; } static qb_loop_timer_handle ipcs_check_for_flow_control_timer; static void cs_ipcs_check_for_flow_control(void) { int32_t i; int32_t fc_enabled; for (i = 0; i < SERVICE_HANDLER_MAXIMUM_COUNT; i++) { if (ais_service[i] == NULL || ipcs_mapper[i].inst == NULL) { continue; } fc_enabled = QB_TRUE; if (ipc_fc_is_quorate == 1 || ais_service[i]->allow_inquorate == CS_LIB_ALLOW_INQUORATE) { /* * we are quorate * now check flow control */ if (ipc_fc_totem_queue_level != TOTEM_Q_LEVEL_CRITICAL && ipc_fc_sync_in_process == 0) { fc_enabled = QB_FALSE; } } if (fc_enabled) { qb_ipcs_request_rate_limit(ipcs_mapper[i].inst, QB_IPCS_RATE_OFF); qb_loop_timer_add(cs_poll_handle_get(), QB_LOOP_MED, 1*QB_TIME_NS_IN_MSEC, NULL, corosync_recheck_the_q_level, &ipcs_check_for_flow_control_timer); } else if (ipc_fc_totem_queue_level == TOTEM_Q_LEVEL_LOW) { qb_ipcs_request_rate_limit(ipcs_mapper[i].inst, QB_IPCS_RATE_FAST); } else if (ipc_fc_totem_queue_level == TOTEM_Q_LEVEL_GOOD) { qb_ipcs_request_rate_limit(ipcs_mapper[i].inst, QB_IPCS_RATE_NORMAL); } else if (ipc_fc_totem_queue_level == TOTEM_Q_LEVEL_HIGH) { qb_ipcs_request_rate_limit(ipcs_mapper[i].inst, QB_IPCS_RATE_SLOW); } } } static void cs_ipcs_fc_quorum_changed(int quorate, void *context) { ipc_fc_is_quorate = quorate; cs_ipcs_check_for_flow_control(); } static void cs_ipcs_totem_queue_level_changed(enum totem_q_level level) { ipc_fc_totem_queue_level = level; cs_ipcs_check_for_flow_control(); } void cs_ipcs_sync_state_changed(int32_t sync_in_process) { ipc_fc_sync_in_process = sync_in_process; cs_ipcs_check_for_flow_control(); } static void cs_ipcs_libqb_log_fn(const char *file_name, int32_t file_line, int32_t severity, const char *msg) { int32_t level = severity; if (severity > LOG_DEBUG) { level = LOGSYS_LEVEL_DEBUG; } _logsys_log_printf (LOGSYS_ENCODE_RECID(level, ipc_subsys_id, LOGSYS_RECID_LOG), __func__, file_name, file_line, "%s", msg); } void cs_ipcs_stats_update(void) { int32_t i; struct qb_ipcs_stats srv_stats; struct qb_ipcs_connection_stats stats; qb_ipcs_connection_t *c; struct cs_ipcs_conn_context *cnx; for (i = 0; i < SERVICE_HANDLER_MAXIMUM_COUNT; i++) { if (ais_service[i] == NULL || ipcs_mapper[i].inst == NULL) { continue; } qb_ipcs_stats_get(ipcs_mapper[i].inst, &srv_stats, QB_FALSE); for (c = qb_ipcs_connection_first_get(ipcs_mapper[i].inst); c; c = qb_ipcs_connection_next_get(ipcs_mapper[i].inst, c)) { cnx = qb_ipcs_context_get(c); if (cnx == NULL) continue; qb_ipcs_connection_stats_get(c, &stats, QB_FALSE); api->object_key_replace(cnx->stats_handle, "client_pid", strlen("client_pid"), &stats.client_pid, sizeof(uint32_t)); api->object_key_replace(cnx->stats_handle, "requests", strlen("requests"), &stats.requests, sizeof(uint64_t)); api->object_key_replace(cnx->stats_handle, "responses", strlen("responses"), &stats.responses, sizeof(uint64_t)); api->object_key_replace(cnx->stats_handle, "dispatched", strlen("dispatched"), &stats.events, sizeof(uint64_t)); api->object_key_replace(cnx->stats_handle, "send_retries", strlen("send_retries"), &stats.send_retries, sizeof(uint64_t)); api->object_key_replace(cnx->stats_handle, "recv_retries", strlen("recv_retries"), &stats.recv_retries, sizeof(uint64_t)); + api->object_key_replace(cnx->stats_handle, + "flow_control", strlen("flow_control"), + &stats.flow_control_state, sizeof(uint32_t)); api->object_key_replace(cnx->stats_handle, "flow_control_count", strlen("flow_control_count"), &stats.flow_control_count, sizeof(uint64_t)); api->object_key_replace(cnx->stats_handle, - "flow_control_state", strlen("flow_control_state"), - &stats.flow_control_state, sizeof(uint32_t)); - + "queue_size", strlen("queue_size"), + &cnx->queued, sizeof(uint32_t)); + api->object_key_replace(cnx->stats_handle, + "invalid_request", strlen("invalid_request"), + &cnx->invalid_request, sizeof(uint64_t)); + api->object_key_replace(cnx->stats_handle, + "overload", strlen("overload"), + &cnx->overload, sizeof(uint64_t)); qb_ipcs_connection_unref(c); } } } void cs_ipcs_service_init(struct corosync_service_engine *service) { if (service->lib_engine_count == 0) { log_printf (LOGSYS_LEVEL_DEBUG, "NOT Initializing IPC on %s [%d]", cs_ipcs_serv_short_name(service->id), service->id); return; } ipcs_mapper[service->id].id = service->id; strcpy(ipcs_mapper[service->id].name, cs_ipcs_serv_short_name(service->id)); log_printf (LOGSYS_LEVEL_DEBUG, "Initializing IPC on %s [%d]", ipcs_mapper[service->id].name, ipcs_mapper[service->id].id); ipcs_mapper[service->id].inst = qb_ipcs_create(ipcs_mapper[service->id].name, ipcs_mapper[service->id].id, QB_IPC_SHM, &corosync_service_funcs); assert(ipcs_mapper[service->id].inst); qb_ipcs_poll_handlers_set(ipcs_mapper[service->id].inst, &corosync_poll_funcs); qb_ipcs_run(ipcs_mapper[service->id].inst); } void cs_ipcs_init(void) { qb_handle_t object_find_handle; qb_handle_t object_runtime_handle; uint64_t zero_64 = 0; api = apidef_get (); qb_loop_poll_low_fds_event_set(cs_poll_handle_get(), cs_ipcs_low_fds_event); ipc_subsys_id = _logsys_subsys_create ("IPC"); if (ipc_subsys_id < 0) { log_printf (LOGSYS_LEVEL_ERROR, "Could not initialize IPC logging subsystem\n"); corosync_exit_error (AIS_DONE_INIT_SERVICES); } qb_util_set_log_function (cs_ipcs_libqb_log_fn); api->quorum_register_callback (cs_ipcs_fc_quorum_changed, NULL); totempg_queue_level_register_callback (cs_ipcs_totem_queue_level_changed); api->object_find_create (OBJECT_PARENT_HANDLE, "runtime", strlen ("runtime"), &object_find_handle); if (api->object_find_next (object_find_handle, &object_runtime_handle) != 0) { log_printf (LOGSYS_LEVEL_ERROR,"arrg no runtime"); return; } /* Connection objects */ api->object_create (object_runtime_handle, &object_connection_handle, "connections", strlen ("connections")); api->object_key_create_typed (object_connection_handle, "active", &zero_64, sizeof (zero_64), OBJDB_VALUETYPE_UINT64); api->object_key_create_typed (object_connection_handle, "closed", &zero_64, sizeof (zero_64), OBJDB_VALUETYPE_UINT64); } diff --git a/exec/main.c b/exec/main.c index 4612b0d5..62ca494f 100644 --- a/exec/main.c +++ b/exec/main.c @@ -1,1513 +1,1519 @@ /* * Copyright (c) 2002-2006 MontaVista Software, Inc. * Copyright (c) 2006-2009 Red Hat, Inc. * * All rights reserved. * * Author: Steven Dake (sdake@redhat.com) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ /** * \mainpage Corosync * * This is the doxygen generated developer documentation for the Corosync * project. For more information about Corosync, please see the project * web site, corosync.org. * * \section license License * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "quorum.h" #include "totemsrp.h" #include "mainconfig.h" #include "totemconfig.h" #include "main.h" #include "sync.h" #include "syncv2.h" #include "timer.h" #include "util.h" #include "apidef.h" #include "service.h" #include "schedwrk.h" #include "evil.h" #ifdef HAVE_SMALL_MEMORY_FOOTPRINT #define IPC_LOGSYS_SIZE 1024*64 #else #define IPC_LOGSYS_SIZE 8192*128 #endif LOGSYS_DECLARE_SYSTEM ("corosync", LOGSYS_MODE_OUTPUT_STDERR | LOGSYS_MODE_THREADED | LOGSYS_MODE_FORK, 0, NULL, LOG_INFO, LOG_DAEMON, LOG_INFO, NULL, IPC_LOGSYS_SIZE); LOGSYS_DECLARE_SUBSYS ("MAIN"); #define SERVER_BACKLOG 5 static int sched_priority = 0; static unsigned int service_count = 32; static struct totem_logging_configuration totem_logging_configuration; static int num_config_modules; static struct config_iface_ver0 *config_modules[MAX_DYNAMIC_SERVICES]; static struct objdb_iface_ver0 *objdb = NULL; static struct corosync_api_v1 *api = NULL; static enum cs_sync_mode minimum_sync_mode; static int sync_in_process = 1; static qb_loop_t *corosync_poll_handle; struct sched_param global_sched_param; static hdb_handle_t object_memb_handle; static corosync_timer_handle_t corosync_stats_timer_handle; static const char *corosync_lock_file = LOCALSTATEDIR"/run/corosync.pid"; qb_loop_t *cs_poll_handle_get (void) { return (corosync_poll_handle); } int cs_poll_dispatch_add (qb_loop_t * handle, int fd, int events, void *data, int (*dispatch_fn) (int fd, int revents, void *data)) { return qb_loop_poll_add(handle, QB_LOOP_MED, fd, events, data, dispatch_fn); } int cs_poll_dispatch_delete(qb_loop_t * handle, int fd) { return qb_loop_poll_del(handle, fd); } void corosync_state_dump (void) { int i; for (i = 0; i < SERVICE_HANDLER_MAXIMUM_COUNT; i++) { if (ais_service[i] && ais_service[i]->exec_dump_fn) { ais_service[i]->exec_dump_fn (); } } } static void unlink_all_completed (void) { api->timer_delete (corosync_stats_timer_handle); qb_loop_stop (corosync_poll_handle); } void corosync_shutdown_request (void) { corosync_service_unlink_all (api, unlink_all_completed); } static int32_t sig_diag_handler (int num, void *data) { corosync_state_dump (); logsys_log_rec_store (LOCALSTATEDIR "/lib/corosync/fdata"); return 0; } static int32_t sig_exit_handler (int num, void *data) { corosync_service_unlink_all (api, unlink_all_completed); return 0; } static void sigsegv_handler (int num) { (void)signal (SIGSEGV, SIG_DFL); logsys_atexit(); logsys_log_rec_store (LOCALSTATEDIR "/lib/corosync/fdata"); raise (SIGSEGV); } static void sigabrt_handler (int num) { (void)signal (SIGABRT, SIG_DFL); logsys_atexit(); logsys_log_rec_store (LOCALSTATEDIR "/lib/corosync/fdata"); raise (SIGABRT); } #define LOCALHOST_IP inet_addr("127.0.0.1") static hdb_handle_t corosync_group_handle; static struct totempg_group corosync_group = { .group = "a", .group_len = 1 }; static void serialize_lock (void) { } static void serialize_unlock (void) { } static void corosync_sync_completed (void) { log_printf (LOGSYS_LEVEL_NOTICE, "Completed service synchronization, ready to provide service.\n"); sync_in_process = 0; cs_ipcs_sync_state_changed(sync_in_process); } static int corosync_sync_callbacks_retrieve (int sync_id, struct sync_callbacks *callbacks) { unsigned int ais_service_index; int res; for (ais_service_index = 0; ais_service_index < SERVICE_HANDLER_MAXIMUM_COUNT; ais_service_index++) { if (ais_service[ais_service_index] != NULL && (ais_service[ais_service_index]->sync_mode == CS_SYNC_V1 || ais_service[ais_service_index]->sync_mode == CS_SYNC_V1_APIV2)) { if (ais_service_index == sync_id) { break; } } } /* * Try to load backwards compat sync engines */ if (ais_service_index == SERVICE_HANDLER_MAXIMUM_COUNT) { res = evil_callbacks_load (sync_id, callbacks); return (res); } callbacks->name = ais_service[ais_service_index]->name; callbacks->sync_init_api.sync_init_v1 = ais_service[ais_service_index]->sync_init; callbacks->api_version = 1; if (ais_service[ais_service_index]->sync_mode == CS_SYNC_V1_APIV2) { callbacks->api_version = 2; } callbacks->sync_process = ais_service[ais_service_index]->sync_process; callbacks->sync_activate = ais_service[ais_service_index]->sync_activate; callbacks->sync_abort = ais_service[ais_service_index]->sync_abort; return (0); } static int corosync_sync_v2_callbacks_retrieve ( int service_id, struct sync_callbacks *callbacks) { int res; if (minimum_sync_mode == CS_SYNC_V2 && service_id == CLM_SERVICE && ais_service[CLM_SERVICE] == NULL) { res = evil_callbacks_load (service_id, callbacks); return (res); } if (minimum_sync_mode == CS_SYNC_V2 && service_id == EVT_SERVICE && ais_service[EVT_SERVICE] == NULL) { res = evil_callbacks_load (service_id, callbacks); return (res); } if (ais_service[service_id] == NULL) { return (-1); } if (minimum_sync_mode == CS_SYNC_V1 && ais_service[service_id]->sync_mode != CS_SYNC_V2) { return (-1); } callbacks->name = ais_service[service_id]->name; callbacks->api_version = 1; if (ais_service[service_id]->sync_mode == CS_SYNC_V1_APIV2) { callbacks->api_version = 2; } callbacks->sync_init_api.sync_init_v1 = ais_service[service_id]->sync_init; callbacks->sync_process = ais_service[service_id]->sync_process; callbacks->sync_activate = ais_service[service_id]->sync_activate; callbacks->sync_abort = ais_service[service_id]->sync_abort; return (0); } static struct memb_ring_id corosync_ring_id; static void member_object_joined (unsigned int nodeid) { hdb_handle_t object_find_handle; hdb_handle_t object_node_handle; char * nodeint_str; char nodeid_str[64]; unsigned int key_incr_dummy; snprintf (nodeid_str, 64, "%d", nodeid); objdb->object_find_create ( object_memb_handle, nodeid_str, strlen (nodeid_str), &object_find_handle); if (objdb->object_find_next (object_find_handle, &object_node_handle) == 0) { objdb->object_key_increment (object_node_handle, "join_count", strlen("join_count"), &key_incr_dummy); objdb->object_key_replace (object_node_handle, "status", strlen("status"), "joined", strlen("joined")); } else { nodeint_str = (char*)api->totem_ifaces_print (nodeid); objdb->object_create (object_memb_handle, &object_node_handle, nodeid_str, strlen (nodeid_str)); objdb->object_key_create_typed (object_node_handle, "ip", nodeint_str, strlen(nodeint_str), OBJDB_VALUETYPE_STRING); key_incr_dummy = 1; objdb->object_key_create_typed (object_node_handle, "join_count", &key_incr_dummy, sizeof (key_incr_dummy), OBJDB_VALUETYPE_UINT32); objdb->object_key_create_typed (object_node_handle, "status", "joined", strlen("joined"), OBJDB_VALUETYPE_STRING); } } static void member_object_left (unsigned int nodeid) { hdb_handle_t object_find_handle; hdb_handle_t object_node_handle; char nodeid_str[64]; snprintf (nodeid_str, 64, "%u", nodeid); objdb->object_find_create ( object_memb_handle, nodeid_str, strlen (nodeid_str), &object_find_handle); if (objdb->object_find_next (object_find_handle, &object_node_handle) == 0) { objdb->object_key_replace (object_node_handle, "status", strlen("status"), "left", strlen("left")); } } static void confchg_fn ( enum totem_configuration_type configuration_type, const unsigned int *member_list, size_t member_list_entries, const unsigned int *left_list, size_t left_list_entries, const unsigned int *joined_list, size_t joined_list_entries, const struct memb_ring_id *ring_id) { int i; int abort_activate = 0; if (sync_in_process == 1) { abort_activate = 1; } sync_in_process = 1; cs_ipcs_sync_state_changed(sync_in_process); memcpy (&corosync_ring_id, ring_id, sizeof (struct memb_ring_id)); for (i = 0; i < left_list_entries; i++) { member_object_left (left_list[i]); } for (i = 0; i < joined_list_entries; i++) { member_object_joined (joined_list[i]); } /* * Call configuration change for all services */ for (i = 0; i < service_count; i++) { if (ais_service[i] && ais_service[i]->confchg_fn) { ais_service[i]->confchg_fn (configuration_type, member_list, member_list_entries, left_list, left_list_entries, joined_list, joined_list_entries, ring_id); } } if (abort_activate) { sync_v2_abort (); } if (minimum_sync_mode == CS_SYNC_V2 && configuration_type == TOTEM_CONFIGURATION_TRANSITIONAL) { sync_v2_save_transitional (member_list, member_list_entries, ring_id); } if (minimum_sync_mode == CS_SYNC_V2 && configuration_type == TOTEM_CONFIGURATION_REGULAR) { sync_v2_start (member_list, member_list_entries, ring_id); } } static void priv_drop (void) { return; /* TODO: we are still not dropping privs */ } static void corosync_tty_detach (void) { FILE *r; /* * Disconnect from TTY if this is not a debug run */ switch (fork ()) { case -1: corosync_exit_error (AIS_DONE_FORK); break; case 0: /* * child which is disconnected, run this process */ break; default: exit (0); break; } /* Create new session */ (void)setsid(); /* * Map stdin/out/err to /dev/null. */ r = freopen("/dev/null", "r", stdin); if (r == NULL) { corosync_exit_error (AIS_DONE_STD_TO_NULL_REDIR); } r = freopen("/dev/null", "a", stderr); if (r == NULL) { corosync_exit_error (AIS_DONE_STD_TO_NULL_REDIR); } r = freopen("/dev/null", "a", stdout); if (r == NULL) { corosync_exit_error (AIS_DONE_STD_TO_NULL_REDIR); } } static void corosync_mlockall (void) { #if !defined(COROSYNC_BSD) || defined(COROSYNC_FREEBSD_GE_8) int res; #endif struct rlimit rlimit; rlimit.rlim_cur = RLIM_INFINITY; rlimit.rlim_max = RLIM_INFINITY; #ifndef COROSYNC_SOLARIS setrlimit (RLIMIT_MEMLOCK, &rlimit); #else setrlimit (RLIMIT_VMEM, &rlimit); #endif #if defined(COROSYNC_BSD) && !defined(COROSYNC_FREEBSD_GE_8) /* under FreeBSD < 8 a process with locked page cannot call dlopen * code disabled until FreeBSD bug i386/93396 was solved */ log_printf (LOGSYS_LEVEL_WARNING, "Could not lock memory of service to avoid page faults\n"); #else res = mlockall (MCL_CURRENT | MCL_FUTURE); if (res == -1) { LOGSYS_PERROR (errno, LOGSYS_LEVEL_WARNING, "Could not lock memory of service to avoid page faults"); }; #endif } static void corosync_totem_stats_updater (void *data) { totempg_stats_t * stats; uint32_t mtt_rx_token; uint32_t total_mtt_rx_token; uint32_t avg_backlog_calc; uint32_t total_backlog_calc; uint32_t avg_token_holdtime; uint32_t total_token_holdtime; int t, prev; int32_t token_count; uint32_t firewall_enabled_or_nic_failure; stats = api->totem_get_stats(); objdb->object_key_replace (stats->hdr.handle, "msg_reserved", strlen("msg_reserved"), &stats->msg_reserved, sizeof (stats->msg_reserved)); objdb->object_key_replace (stats->hdr.handle, "msg_queue_avail", strlen("msg_queue_avail"), &stats->msg_queue_avail, sizeof (stats->msg_queue_avail)); objdb->object_key_replace (stats->mrp->srp->hdr.handle, "orf_token_tx", strlen("orf_token_tx"), &stats->mrp->srp->orf_token_tx, sizeof (stats->mrp->srp->orf_token_tx)); objdb->object_key_replace (stats->mrp->srp->hdr.handle, "orf_token_rx", strlen("orf_token_rx"), &stats->mrp->srp->orf_token_rx, sizeof (stats->mrp->srp->orf_token_rx)); objdb->object_key_replace (stats->mrp->srp->hdr.handle, "memb_merge_detect_tx", strlen("memb_merge_detect_tx"), &stats->mrp->srp->memb_merge_detect_tx, sizeof (stats->mrp->srp->memb_merge_detect_tx)); objdb->object_key_replace (stats->mrp->srp->hdr.handle, "memb_merge_detect_rx", strlen("memb_merge_detect_rx"), &stats->mrp->srp->memb_merge_detect_rx, sizeof (stats->mrp->srp->memb_merge_detect_rx)); objdb->object_key_replace (stats->mrp->srp->hdr.handle, "memb_join_tx", strlen("memb_join_tx"), &stats->mrp->srp->memb_join_tx, sizeof (stats->mrp->srp->memb_join_tx)); objdb->object_key_replace (stats->mrp->srp->hdr.handle, "memb_join_rx", strlen("memb_join_rx"), &stats->mrp->srp->memb_join_rx, sizeof (stats->mrp->srp->memb_join_rx)); objdb->object_key_replace (stats->mrp->srp->hdr.handle, "mcast_tx", strlen("mcast_tx"), &stats->mrp->srp->mcast_tx, sizeof (stats->mrp->srp->mcast_tx)); objdb->object_key_replace (stats->mrp->srp->hdr.handle, "mcast_retx", strlen("mcast_retx"), &stats->mrp->srp->mcast_retx, sizeof (stats->mrp->srp->mcast_retx)); objdb->object_key_replace (stats->mrp->srp->hdr.handle, "mcast_rx", strlen("mcast_rx"), &stats->mrp->srp->mcast_rx, sizeof (stats->mrp->srp->mcast_rx)); objdb->object_key_replace (stats->mrp->srp->hdr.handle, "memb_commit_token_tx", strlen("memb_commit_token_tx"), &stats->mrp->srp->memb_commit_token_tx, sizeof (stats->mrp->srp->memb_commit_token_tx)); objdb->object_key_replace (stats->mrp->srp->hdr.handle, "memb_commit_token_rx", strlen("memb_commit_token_rx"), &stats->mrp->srp->memb_commit_token_rx, sizeof (stats->mrp->srp->memb_commit_token_rx)); objdb->object_key_replace (stats->mrp->srp->hdr.handle, "token_hold_cancel_tx", strlen("token_hold_cancel_tx"), &stats->mrp->srp->token_hold_cancel_tx, sizeof (stats->mrp->srp->token_hold_cancel_tx)); objdb->object_key_replace (stats->mrp->srp->hdr.handle, "token_hold_cancel_rx", strlen("token_hold_cancel_rx"), &stats->mrp->srp->token_hold_cancel_rx, sizeof (stats->mrp->srp->token_hold_cancel_rx)); objdb->object_key_replace (stats->mrp->srp->hdr.handle, "operational_entered", strlen("operational_entered"), &stats->mrp->srp->operational_entered, sizeof (stats->mrp->srp->operational_entered)); objdb->object_key_replace (stats->mrp->srp->hdr.handle, "operational_token_lost", strlen("operational_token_lost"), &stats->mrp->srp->operational_token_lost, sizeof (stats->mrp->srp->operational_token_lost)); objdb->object_key_replace (stats->mrp->srp->hdr.handle, "gather_entered", strlen("gather_entered"), &stats->mrp->srp->gather_entered, sizeof (stats->mrp->srp->gather_entered)); objdb->object_key_replace (stats->mrp->srp->hdr.handle, "gather_token_lost", strlen("gather_token_lost"), &stats->mrp->srp->gather_token_lost, sizeof (stats->mrp->srp->gather_token_lost)); objdb->object_key_replace (stats->mrp->srp->hdr.handle, "commit_entered", strlen("commit_entered"), &stats->mrp->srp->commit_entered, sizeof (stats->mrp->srp->commit_entered)); objdb->object_key_replace (stats->mrp->srp->hdr.handle, "commit_token_lost", strlen("commit_token_lost"), &stats->mrp->srp->commit_token_lost, sizeof (stats->mrp->srp->commit_token_lost)); objdb->object_key_replace (stats->mrp->srp->hdr.handle, "recovery_entered", strlen("recovery_entered"), &stats->mrp->srp->recovery_entered, sizeof (stats->mrp->srp->recovery_entered)); objdb->object_key_replace (stats->mrp->srp->hdr.handle, "recovery_token_lost", strlen("recovery_token_lost"), &stats->mrp->srp->recovery_token_lost, sizeof (stats->mrp->srp->recovery_token_lost)); objdb->object_key_replace (stats->mrp->srp->hdr.handle, "consensus_timeouts", strlen("consensus_timeouts"), &stats->mrp->srp->consensus_timeouts, sizeof (stats->mrp->srp->consensus_timeouts)); objdb->object_key_replace (stats->mrp->srp->hdr.handle, "rx_msg_dropped", strlen("rx_msg_dropped"), &stats->mrp->srp->rx_msg_dropped, sizeof (stats->mrp->srp->rx_msg_dropped)); objdb->object_key_replace (stats->mrp->srp->hdr.handle, "continuous_gather", strlen("continuous_gather"), &stats->mrp->srp->continuous_gather, sizeof (stats->mrp->srp->continuous_gather)); firewall_enabled_or_nic_failure = (stats->mrp->srp->continuous_gather > MAX_NO_CONT_GATHER ? 1 : 0); objdb->object_key_replace (stats->mrp->srp->hdr.handle, "firewall_enabled_or_nic_failure", strlen("firewall_enabled_or_nic_failure"), &firewall_enabled_or_nic_failure, sizeof (firewall_enabled_or_nic_failure)); total_mtt_rx_token = 0; total_token_holdtime = 0; total_backlog_calc = 0; token_count = 0; t = stats->mrp->srp->latest_token; while (1) { if (t == 0) prev = TOTEM_TOKEN_STATS_MAX - 1; else prev = t - 1; if (prev == stats->mrp->srp->earliest_token) break; /* if tx == 0, then dropped token (not ours) */ if (stats->mrp->srp->token[t].tx != 0 || (stats->mrp->srp->token[t].rx - stats->mrp->srp->token[prev].rx) > 0 ) { total_mtt_rx_token += (stats->mrp->srp->token[t].rx - stats->mrp->srp->token[prev].rx); total_token_holdtime += (stats->mrp->srp->token[t].tx - stats->mrp->srp->token[t].rx); total_backlog_calc += stats->mrp->srp->token[t].backlog_calc; token_count++; } t = prev; } if (token_count) { mtt_rx_token = (total_mtt_rx_token / token_count); avg_backlog_calc = (total_backlog_calc / token_count); avg_token_holdtime = (total_token_holdtime / token_count); objdb->object_key_replace (stats->mrp->srp->hdr.handle, "mtt_rx_token", strlen("mtt_rx_token"), &mtt_rx_token, sizeof (mtt_rx_token)); objdb->object_key_replace (stats->mrp->srp->hdr.handle, "avg_token_workload", strlen("avg_token_workload"), &avg_token_holdtime, sizeof (avg_token_holdtime)); objdb->object_key_replace (stats->mrp->srp->hdr.handle, "avg_backlog_calc", strlen("avg_backlog_calc"), &avg_backlog_calc, sizeof (avg_backlog_calc)); } cs_ipcs_stats_update(); api->timer_add_duration (1500 * MILLI_2_NANO_SECONDS, NULL, corosync_totem_stats_updater, &corosync_stats_timer_handle); } static void corosync_totem_stats_init (void) { totempg_stats_t * stats; hdb_handle_t object_find_handle; hdb_handle_t object_runtime_handle; hdb_handle_t object_totem_handle; uint32_t zero_32 = 0; uint64_t zero_64 = 0; stats = api->totem_get_stats(); objdb->object_find_create ( OBJECT_PARENT_HANDLE, "runtime", strlen ("runtime"), &object_find_handle); if (objdb->object_find_next (object_find_handle, &object_runtime_handle) == 0) { objdb->object_create (object_runtime_handle, &object_totem_handle, "totem", strlen ("totem")); objdb->object_create (object_totem_handle, &stats->hdr.handle, "pg", strlen ("pg")); objdb->object_create (stats->hdr.handle, &stats->mrp->hdr.handle, "mrp", strlen ("mrp")); objdb->object_create (stats->mrp->hdr.handle, &stats->mrp->srp->hdr.handle, "srp", strlen ("srp")); objdb->object_key_create_typed (stats->hdr.handle, "msg_reserved", &stats->msg_reserved, sizeof (stats->msg_reserved), OBJDB_VALUETYPE_UINT32); objdb->object_key_create_typed (stats->hdr.handle, "msg_queue_avail", &stats->msg_queue_avail, sizeof (stats->msg_queue_avail), OBJDB_VALUETYPE_UINT32); /* Members object */ objdb->object_create (stats->mrp->srp->hdr.handle, &object_memb_handle, "members", strlen ("members")); objdb->object_key_create_typed (stats->mrp->srp->hdr.handle, "orf_token_tx", &stats->mrp->srp->orf_token_tx, sizeof (stats->mrp->srp->orf_token_tx), OBJDB_VALUETYPE_UINT64); objdb->object_key_create_typed (stats->mrp->srp->hdr.handle, "orf_token_rx", &stats->mrp->srp->orf_token_rx, sizeof (stats->mrp->srp->orf_token_rx), OBJDB_VALUETYPE_UINT64); objdb->object_key_create_typed (stats->mrp->srp->hdr.handle, "memb_merge_detect_tx", &stats->mrp->srp->memb_merge_detect_tx, sizeof (stats->mrp->srp->memb_merge_detect_tx), OBJDB_VALUETYPE_UINT64); objdb->object_key_create_typed (stats->mrp->srp->hdr.handle, "memb_merge_detect_rx", &stats->mrp->srp->memb_merge_detect_rx, sizeof (stats->mrp->srp->memb_merge_detect_rx), OBJDB_VALUETYPE_UINT64); objdb->object_key_create_typed (stats->mrp->srp->hdr.handle, "memb_join_tx", &stats->mrp->srp->memb_join_tx, sizeof (stats->mrp->srp->memb_join_tx), OBJDB_VALUETYPE_UINT64); objdb->object_key_create_typed (stats->mrp->srp->hdr.handle, "memb_join_rx", &stats->mrp->srp->memb_join_rx, sizeof (stats->mrp->srp->memb_join_rx), OBJDB_VALUETYPE_UINT64); objdb->object_key_create_typed (stats->mrp->srp->hdr.handle, "mcast_tx", &stats->mrp->srp->mcast_tx, sizeof (stats->mrp->srp->mcast_tx), OBJDB_VALUETYPE_UINT64); objdb->object_key_create_typed (stats->mrp->srp->hdr.handle, "mcast_retx", &stats->mrp->srp->mcast_retx, sizeof (stats->mrp->srp->mcast_retx), OBJDB_VALUETYPE_UINT64); objdb->object_key_create_typed (stats->mrp->srp->hdr.handle, "mcast_rx", &stats->mrp->srp->mcast_rx, sizeof (stats->mrp->srp->mcast_rx), OBJDB_VALUETYPE_UINT64); objdb->object_key_create_typed (stats->mrp->srp->hdr.handle, "memb_commit_token_tx", &stats->mrp->srp->memb_commit_token_tx, sizeof (stats->mrp->srp->memb_commit_token_tx), OBJDB_VALUETYPE_UINT64); objdb->object_key_create_typed (stats->mrp->srp->hdr.handle, "memb_commit_token_rx", &stats->mrp->srp->memb_commit_token_rx, sizeof (stats->mrp->srp->memb_commit_token_rx), OBJDB_VALUETYPE_UINT64); objdb->object_key_create_typed (stats->mrp->srp->hdr.handle, "token_hold_cancel_tx", &stats->mrp->srp->token_hold_cancel_tx, sizeof (stats->mrp->srp->token_hold_cancel_tx), OBJDB_VALUETYPE_UINT64); objdb->object_key_create_typed (stats->mrp->srp->hdr.handle, "token_hold_cancel_rx", &stats->mrp->srp->token_hold_cancel_rx, sizeof (stats->mrp->srp->token_hold_cancel_rx), OBJDB_VALUETYPE_UINT64); objdb->object_key_create_typed (stats->mrp->srp->hdr.handle, "operational_entered", &stats->mrp->srp->operational_entered, sizeof (stats->mrp->srp->operational_entered), OBJDB_VALUETYPE_UINT64); objdb->object_key_create_typed (stats->mrp->srp->hdr.handle, "operational_token_lost", &stats->mrp->srp->operational_token_lost, sizeof (stats->mrp->srp->operational_token_lost), OBJDB_VALUETYPE_UINT64); objdb->object_key_create_typed (stats->mrp->srp->hdr.handle, "gather_entered", &stats->mrp->srp->gather_entered, sizeof (stats->mrp->srp->gather_entered), OBJDB_VALUETYPE_UINT64); objdb->object_key_create_typed (stats->mrp->srp->hdr.handle, "gather_token_lost", &stats->mrp->srp->gather_token_lost, sizeof (stats->mrp->srp->gather_token_lost), OBJDB_VALUETYPE_UINT64); objdb->object_key_create_typed (stats->mrp->srp->hdr.handle, "commit_entered", &stats->mrp->srp->commit_entered, sizeof (stats->mrp->srp->commit_entered), OBJDB_VALUETYPE_UINT64); objdb->object_key_create_typed (stats->mrp->srp->hdr.handle, "commit_token_lost", &stats->mrp->srp->commit_token_lost, sizeof (stats->mrp->srp->commit_token_lost), OBJDB_VALUETYPE_UINT64); objdb->object_key_create_typed (stats->mrp->srp->hdr.handle, "recovery_entered", &stats->mrp->srp->recovery_entered, sizeof (stats->mrp->srp->recovery_entered), OBJDB_VALUETYPE_UINT64); objdb->object_key_create_typed (stats->mrp->srp->hdr.handle, "recovery_token_lost", &stats->mrp->srp->recovery_token_lost, sizeof (stats->mrp->srp->recovery_token_lost), OBJDB_VALUETYPE_UINT64); objdb->object_key_create_typed (stats->mrp->srp->hdr.handle, "consensus_timeouts", &stats->mrp->srp->consensus_timeouts, sizeof (stats->mrp->srp->consensus_timeouts), OBJDB_VALUETYPE_UINT64); objdb->object_key_create_typed (stats->mrp->srp->hdr.handle, "mtt_rx_token", &zero_32, sizeof (zero_32), OBJDB_VALUETYPE_UINT32); objdb->object_key_create_typed (stats->mrp->srp->hdr.handle, "avg_token_workload", &zero_32, sizeof (zero_32), OBJDB_VALUETYPE_UINT32); objdb->object_key_create_typed (stats->mrp->srp->hdr.handle, "avg_backlog_calc", &zero_32, sizeof (zero_32), OBJDB_VALUETYPE_UINT32); objdb->object_key_create_typed (stats->mrp->srp->hdr.handle, "rx_msg_dropped", &zero_64, sizeof (zero_64), OBJDB_VALUETYPE_UINT64); objdb->object_key_create_typed (stats->mrp->srp->hdr.handle, "continuous_gather", &zero_32, sizeof (zero_32), OBJDB_VALUETYPE_UINT32); objdb->object_key_create_typed (stats->mrp->srp->hdr.handle, "firewall_enabled_or_nic_failure", &zero_32, sizeof (zero_32), OBJDB_VALUETYPE_UINT32); } /* start stats timer */ api->timer_add_duration (1500 * MILLI_2_NANO_SECONDS, NULL, corosync_totem_stats_updater, &corosync_stats_timer_handle); } static void deliver_fn ( unsigned int nodeid, const void *msg, unsigned int msg_len, int endian_conversion_required) { const struct qb_ipc_request_header *header; int32_t service; int32_t fn_id; uint32_t id; uint32_t size; uint32_t key_incr_dummy; header = msg; if (endian_conversion_required) { id = swab32 (header->id); size = swab32 (header->size); } else { id = header->id; size = header->size; } /* * Call the proper executive handler */ service = id >> 16; fn_id = id & 0xffff; if (ais_service[service] == NULL && service == EVT_SERVICE) { evil_deliver_fn (nodeid, service, fn_id, msg, endian_conversion_required); } if (!ais_service[service]) { return; } if (fn_id >= ais_service[service]->exec_engine_count) { log_printf(LOGSYS_LEVEL_WARNING, "discarded unknown message %d for service %d (max id %d)", fn_id, service, ais_service[service]->exec_engine_count); return; } objdb->object_key_increment (service_stats_handle[service][fn_id], "rx", strlen("rx"), &key_incr_dummy); if (endian_conversion_required) { assert(ais_service[service]->exec_engine[fn_id].exec_endian_convert_fn != NULL); ais_service[service]->exec_engine[fn_id].exec_endian_convert_fn ((void *)msg); } ais_service[service]->exec_engine[fn_id].exec_handler_fn (msg, nodeid); } void main_get_config_modules(struct config_iface_ver0 ***modules, int *num) { *modules = config_modules; *num = num_config_modules; } int main_mcast ( const struct iovec *iovec, unsigned int iov_len, unsigned int guarantee) { const struct qb_ipc_request_header *req = iovec->iov_base; int32_t service; int32_t fn_id; uint32_t key_incr_dummy; service = req->id >> 16; fn_id = req->id & 0xffff; if (ais_service[service]) { objdb->object_key_increment (service_stats_handle[service][fn_id], "tx", strlen("tx"), &key_incr_dummy); } return (totempg_groups_mcast_joined (corosync_group_handle, iovec, iov_len, guarantee)); } static qb_loop_timer_handle recheck_the_q_level_timer; void corosync_recheck_the_q_level(void *data) { totempg_check_q_level(corosync_group_handle); if (cs_ipcs_q_level_get() == TOTEM_Q_LEVEL_CRITICAL) { qb_loop_timer_add(cs_poll_handle_get(), QB_LOOP_MED, 1*QB_TIME_NS_IN_MSEC, NULL, corosync_recheck_the_q_level, &recheck_the_q_level_timer); } } struct sending_allowed_private_data_struct { int reserved_msgs; }; int corosync_sending_allowed ( unsigned int service, unsigned int id, const void *msg, void *sending_allowed_private_data) { struct sending_allowed_private_data_struct *pd = (struct sending_allowed_private_data_struct *)sending_allowed_private_data; struct iovec reserve_iovec; struct qb_ipc_request_header *header = (struct qb_ipc_request_header *)msg; int sending_allowed; reserve_iovec.iov_base = (char *)header; reserve_iovec.iov_len = header->size; pd->reserved_msgs = totempg_groups_joined_reserve ( corosync_group_handle, &reserve_iovec, 1); if (pd->reserved_msgs == -1) { - return (-1); + return -EINVAL; } sending_allowed = QB_FALSE; if (corosync_quorum_is_quorate() == 1 || ais_service[service]->allow_inquorate == CS_LIB_ALLOW_INQUORATE) { // we are quorate // now check flow control if (ais_service[service]->lib_engine[id].flow_control == CS_LIB_FLOW_CONTROL_NOT_REQUIRED) { sending_allowed = QB_TRUE; } else if (pd->reserved_msgs && sync_in_process == 0) { sending_allowed = QB_TRUE; + } else if (pd->reserved_msgs == 0) { + return -ENOBUFS; + } else /* (sync_in_process) */ { + return -EINPROGRESS; } + } else { + return -EHOSTUNREACH; } return (sending_allowed); } void corosync_sending_allowed_release (void *sending_allowed_private_data) { struct sending_allowed_private_data_struct *pd = (struct sending_allowed_private_data_struct *)sending_allowed_private_data; if (pd->reserved_msgs == -1) { return; } totempg_groups_joined_release (pd->reserved_msgs); } int message_source_is_local (const mar_message_source_t *source) { int ret = 0; assert (source != NULL); if (source->nodeid == totempg_my_nodeid_get ()) { ret = 1; } return ret; } void message_source_set ( mar_message_source_t *source, void *conn) { assert ((source != NULL) && (conn != NULL)); memset (source, 0, sizeof (mar_message_source_t)); source->nodeid = totempg_my_nodeid_get (); source->conn = conn; } static void corosync_setscheduler (void) { #if defined(HAVE_PTHREAD_SETSCHEDPARAM) && defined(HAVE_SCHED_GET_PRIORITY_MAX) && defined(HAVE_SCHED_SETSCHEDULER) int res; sched_priority = sched_get_priority_max (SCHED_RR); if (sched_priority != -1) { global_sched_param.sched_priority = sched_priority; res = sched_setscheduler (0, SCHED_RR, &global_sched_param); if (res == -1) { LOGSYS_PERROR(errno, LOGSYS_LEVEL_WARNING, "Could not set SCHED_RR at priority %d", global_sched_param.sched_priority); global_sched_param.sched_priority = 0; logsys_thread_priority_set (SCHED_OTHER, NULL, 1); } else { /* * Turn on SCHED_RR in logsys system */ res = logsys_thread_priority_set (SCHED_RR, &global_sched_param, 10); if (res == -1) { log_printf (LOGSYS_LEVEL_ERROR, "Could not set logsys thread priority." " Can't continue because of priority inversions."); corosync_exit_error (AIS_DONE_LOGSETUP); } } } else { LOGSYS_PERROR (errno, LOGSYS_LEVEL_WARNING, "Could not get maximum scheduler priority"); sched_priority = 0; } #else log_printf(LOGSYS_LEVEL_WARNING, "The Platform is missing process priority setting features. Leaving at default."); #endif } static void fplay_key_change_notify_fn ( object_change_type_t change_type, hdb_handle_t parent_object_handle, hdb_handle_t object_handle, const void *object_name_pt, size_t object_name_len, const void *key_name_pt, size_t key_len, const void *key_value_pt, size_t key_value_len, void *priv_data_pt) { if (key_len == strlen ("dump_flight_data") && memcmp ("dump_flight_data", key_name_pt, key_len) == 0) { logsys_log_rec_store (LOCALSTATEDIR "/lib/corosync/fdata"); } if (key_len == strlen ("dump_state") && memcmp ("dump_state", key_name_pt, key_len) == 0) { corosync_state_dump (); } } static void corosync_fplay_control_init (void) { hdb_handle_t object_find_handle; hdb_handle_t object_runtime_handle; hdb_handle_t object_blackbox_handle; objdb->object_find_create (OBJECT_PARENT_HANDLE, "runtime", strlen ("runtime"), &object_find_handle); if (objdb->object_find_next (object_find_handle, &object_runtime_handle) != 0) { return; } objdb->object_create (object_runtime_handle, &object_blackbox_handle, "blackbox", strlen ("blackbox")); objdb->object_key_create_typed (object_blackbox_handle, "dump_flight_data", "no", strlen("no"), OBJDB_VALUETYPE_STRING); objdb->object_key_create_typed (object_blackbox_handle, "dump_state", "no", strlen("no"), OBJDB_VALUETYPE_STRING); objdb->object_track_start (object_blackbox_handle, OBJECT_TRACK_DEPTH_RECURSIVE, fplay_key_change_notify_fn, NULL, NULL, NULL, NULL); } static void main_service_ready (void) { int res; /* * This must occur after totempg is initialized because "this_ip" must be set */ res = corosync_service_defaults_link_and_init (api); if (res == -1) { log_printf (LOGSYS_LEVEL_ERROR, "Could not initialize default services\n"); corosync_exit_error (AIS_DONE_INIT_SERVICES); } evil_init (api); cs_ipcs_init(); corosync_totem_stats_init (); corosync_fplay_control_init (); if (minimum_sync_mode == CS_SYNC_V2) { log_printf (LOGSYS_LEVEL_NOTICE, "Compatibility mode set to none. Using V2 of the synchronization engine.\n"); sync_v2_init ( corosync_sync_v2_callbacks_retrieve, corosync_sync_completed); } else if (minimum_sync_mode == CS_SYNC_V1) { log_printf (LOGSYS_LEVEL_NOTICE, "Compatibility mode set to whitetank. Using V1 and V2 of the synchronization engine.\n"); sync_register ( corosync_sync_callbacks_retrieve, sync_v2_memb_list_determine, sync_v2_memb_list_abort, sync_v2_start); sync_v2_init ( corosync_sync_v2_callbacks_retrieve, corosync_sync_completed); } } static enum e_ais_done corosync_flock (const char *lockfile, pid_t pid) { struct flock lock; enum e_ais_done err; char pid_s[17]; int fd_flag; int lf; err = AIS_DONE_EXIT; lf = open (lockfile, O_WRONLY | O_CREAT, 0640); if (lf == -1) { log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't create lock file.\n"); return (AIS_DONE_AQUIRE_LOCK); } retry_fcntl: lock.l_type = F_WRLCK; lock.l_start = 0; lock.l_whence = SEEK_SET; lock.l_len = 0; if (fcntl (lf, F_SETLK, &lock) == -1) { switch (errno) { case EINTR: goto retry_fcntl; break; case EAGAIN: case EACCES: log_printf (LOGSYS_LEVEL_ERROR, "Another Corosync instance is already running.\n"); err = AIS_DONE_ALREADY_RUNNING; goto error_close; break; default: log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't aquire lock. Error was %s\n", strerror(errno)); err = AIS_DONE_AQUIRE_LOCK; goto error_close; break; } } if (ftruncate (lf, 0) == -1) { log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't truncate lock file. Error was %s\n", strerror (errno)); err = AIS_DONE_AQUIRE_LOCK; goto error_close_unlink; } memset (pid_s, 0, sizeof (pid_s)); snprintf (pid_s, sizeof (pid_s) - 1, "%u\n", pid); retry_write: if (write (lf, pid_s, strlen (pid_s)) != strlen (pid_s)) { if (errno == EINTR) { goto retry_write; } else { log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't write pid to lock file. " "Error was %s\n", strerror (errno)); err = AIS_DONE_AQUIRE_LOCK; goto error_close_unlink; } } if ((fd_flag = fcntl (lf, F_GETFD, 0)) == -1) { log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't get close-on-exec flag from lock file. " "Error was %s\n", strerror (errno)); err = AIS_DONE_AQUIRE_LOCK; goto error_close_unlink; } fd_flag |= FD_CLOEXEC; if (fcntl (lf, F_SETFD, fd_flag) == -1) { log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't set close-on-exec flag to lock file. " "Error was %s\n", strerror (errno)); err = AIS_DONE_AQUIRE_LOCK; goto error_close_unlink; } return (err); error_close_unlink: unlink (lockfile); error_close: close (lf); return (err); } int main (int argc, char **argv, char **envp) { const char *error_string; struct totem_config totem_config; hdb_handle_t objdb_handle; hdb_handle_t config_handle; unsigned int config_version = 0; void *objdb_p; struct config_iface_ver0 *config; void *config_p; const char *config_iface_init; char *config_iface; char *iface; char *strtok_save_pt; int res, ch; int background, setprio; struct stat stat_out; char corosync_lib_dir[PATH_MAX]; hdb_handle_t object_runtime_handle; enum e_ais_done flock_err; /* default configuration */ background = 1; setprio = 1; while ((ch = getopt (argc, argv, "fpv")) != EOF) { switch (ch) { case 'f': background = 0; logsys_config_mode_set (NULL, LOGSYS_MODE_OUTPUT_STDERR|LOGSYS_MODE_THREADED|LOGSYS_MODE_FORK); break; case 'p': setprio = 0; break; case 'v': printf ("Corosync Cluster Engine, version '%s'\n", VERSION); printf ("Copyright (c) 2006-2009 Red Hat, Inc.\n"); return EXIT_SUCCESS; break; default: fprintf(stderr, \ "usage:\n"\ " -f : Start application in foreground.\n"\ " -p : Do not set process priority. \n"\ " -v : Display version and SVN revision of Corosync and exit.\n"); return EXIT_FAILURE; } } /* * Set round robin realtime scheduling with priority 99 * Lock all memory to avoid page faults which may interrupt * application healthchecking */ if (setprio) { corosync_setscheduler (); } corosync_mlockall (); log_printf (LOGSYS_LEVEL_NOTICE, "Corosync Cluster Engine ('%s'): started and ready to provide service.\n", VERSION); log_printf (LOGSYS_LEVEL_INFO, "Corosync built-in features:" PACKAGE_FEATURES "\n"); corosync_poll_handle = qb_loop_create (); qb_loop_signal_add(corosync_poll_handle, QB_LOOP_LOW, SIGUSR2, NULL, sig_diag_handler, NULL); qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH, SIGINT, NULL, sig_exit_handler, NULL); qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH, SIGQUIT, NULL, sig_exit_handler, NULL); qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH, SIGTERM, NULL, sig_exit_handler, NULL); (void)signal (SIGSEGV, sigsegv_handler); (void)signal (SIGABRT, sigabrt_handler); #if MSG_NOSIGNAL != 0 (void)signal (SIGPIPE, SIG_IGN); #endif /* * Load the object database interface */ res = lcr_ifact_reference ( &objdb_handle, "objdb", 0, &objdb_p, 0); if (res == -1) { log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't open configuration object database component.\n"); corosync_exit_error (AIS_DONE_OBJDB); } objdb = (struct objdb_iface_ver0 *)objdb_p; objdb->objdb_init (); /* * Initialize the corosync_api_v1 definition */ apidef_init (objdb); api = apidef_get (); num_config_modules = 0; /* * Bootstrap in the default configuration parser or use * the corosync default built in parser if the configuration parser * isn't overridden */ config_iface_init = getenv("COROSYNC_DEFAULT_CONFIG_IFACE"); if (!config_iface_init) { config_iface_init = "corosync_parser"; } /* Make a copy so we can deface it with strtok */ if ((config_iface = strdup(config_iface_init)) == NULL) { log_printf (LOGSYS_LEVEL_ERROR, "exhausted virtual memory"); corosync_exit_error (AIS_DONE_OBJDB); } iface = strtok_r(config_iface, ":", &strtok_save_pt); while (iface) { res = lcr_ifact_reference ( &config_handle, iface, config_version, &config_p, 0); config = (struct config_iface_ver0 *)config_p; if (res == -1) { log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't open configuration component '%s'\n", iface); corosync_exit_error (AIS_DONE_MAINCONFIGREAD); } res = config->config_readconfig(objdb, &error_string); if (res == -1) { log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string); corosync_exit_error (AIS_DONE_MAINCONFIGREAD); } log_printf (LOGSYS_LEVEL_NOTICE, "%s", error_string); config_modules[num_config_modules++] = config; iface = strtok_r(NULL, ":", &strtok_save_pt); } free(config_iface); res = corosync_main_config_read (objdb, &error_string); if (res == -1) { /* * if we are here, we _must_ flush the logsys queue * and try to inform that we couldn't read the config. * this is a desperate attempt before certain death * and there is no guarantee that we can print to stderr * nor that logsys is sending the messages where we expect. */ log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string); fprintf(stderr, "%s", error_string); syslog (LOGSYS_LEVEL_ERROR, "%s", error_string); corosync_exit_error (AIS_DONE_MAINCONFIGREAD); } /* * Make sure required directory is present */ sprintf (corosync_lib_dir, "%s/lib/corosync", LOCALSTATEDIR); res = stat (corosync_lib_dir, &stat_out); if ((res == -1) || (res == 0 && !S_ISDIR(stat_out.st_mode))) { log_printf (LOGSYS_LEVEL_ERROR, "Required directory not present %s. Please create it.\n", corosync_lib_dir); corosync_exit_error (AIS_DONE_DIR_NOT_PRESENT); } res = totem_config_read (objdb, &totem_config, &error_string); if (res == -1) { log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string); corosync_exit_error (AIS_DONE_MAINCONFIGREAD); } res = totem_config_keyread (objdb, &totem_config, &error_string); if (res == -1) { log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string); corosync_exit_error (AIS_DONE_MAINCONFIGREAD); } res = totem_config_validate (&totem_config, &error_string); if (res == -1) { log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string); corosync_exit_error (AIS_DONE_MAINCONFIGREAD); } totem_config.totem_logging_configuration = totem_logging_configuration; totem_config.totem_logging_configuration.log_subsys_id = _logsys_subsys_create ("TOTEM"); if (totem_config.totem_logging_configuration.log_subsys_id < 0) { log_printf (LOGSYS_LEVEL_ERROR, "Unable to initialize TOTEM logging subsystem\n"); corosync_exit_error (AIS_DONE_MAINCONFIGREAD); } totem_config.totem_logging_configuration.log_level_security = LOGSYS_LEVEL_WARNING; totem_config.totem_logging_configuration.log_level_error = LOGSYS_LEVEL_ERROR; totem_config.totem_logging_configuration.log_level_warning = LOGSYS_LEVEL_WARNING; totem_config.totem_logging_configuration.log_level_notice = LOGSYS_LEVEL_NOTICE; totem_config.totem_logging_configuration.log_level_debug = LOGSYS_LEVEL_DEBUG; totem_config.totem_logging_configuration.log_printf = _logsys_log_printf; res = corosync_main_config_compatibility_read (objdb, &minimum_sync_mode, &error_string); if (res == -1) { log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string); corosync_exit_error (AIS_DONE_MAINCONFIGREAD); } res = corosync_main_config_compatibility_read (objdb, &minimum_sync_mode, &error_string); if (res == -1) { log_printf (LOGSYS_LEVEL_ERROR, "%s", error_string); corosync_exit_error (AIS_DONE_MAINCONFIGREAD); } /* create the main runtime object */ objdb->object_create (OBJECT_PARENT_HANDLE, &object_runtime_handle, "runtime", strlen ("runtime")); /* * Now we are fully initialized. */ if (background) { corosync_tty_detach (); } logsys_fork_completed(); if ((flock_err = corosync_flock (corosync_lock_file, getpid ())) != AIS_DONE_EXIT) { corosync_exit_error (flock_err); } /* * if totempg_initialize doesn't have root priveleges, it cannot * bind to a specific interface. This only matters if * there is more then one interface in a system, so * in this case, only a warning is printed */ /* * Join multicast group and setup delivery * and configuration change functions */ totempg_initialize ( corosync_poll_handle, &totem_config); totempg_service_ready_register ( main_service_ready); totempg_groups_initialize ( &corosync_group_handle, deliver_fn, confchg_fn); totempg_groups_join ( corosync_group_handle, &corosync_group, 1); /* * Drop root privleges to user 'ais' * TODO: Don't really need full root capabilities; * needed capabilities are: * CAP_NET_RAW (bindtodevice) * CAP_SYS_NICE (setscheduler) * CAP_IPC_LOCK (mlockall) */ priv_drop (); schedwrk_init ( serialize_lock, serialize_unlock); /* * Start main processing loop */ qb_loop_run (corosync_poll_handle); /* * Exit was requested */ totempg_finalize (); /* * Remove pid lock file */ unlink (corosync_lock_file); corosync_exit_error (AIS_DONE_EXIT); return EXIT_SUCCESS; } diff --git a/exec/service.c b/exec/service.c index 64f3889d..04353142 100644 --- a/exec/service.c +++ b/exec/service.c @@ -1,728 +1,728 @@ /* * Copyright (c) 2006 MontaVista Software, Inc. * Copyright (c) 2006-2009 Red Hat, Inc. * * All rights reserved. * * Author: Steven Dake (sdake@redhat.com) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include "mainconfig.h" #include "util.h" #include #include "timer.h" #include #include #include "main.h" #include #include "service.h" #include #include LOGSYS_DECLARE_SUBSYS ("SERV"); struct default_service { const char *name; int ver; }; static struct default_service default_services[] = { { .name = "corosync_evs", .ver = 0, }, { .name = "corosync_cfg", .ver = 0, }, { .name = "corosync_cpg", .ver = 0, }, { .name = "corosync_confdb", .ver = 0, }, { .name = "corosync_pload", .ver = 0, }, #ifdef HAVE_MONITORING { .name = "corosync_mon", .ver = 0, }, #endif #ifdef HAVE_WATCHDOG { .name = "corosync_wd", .ver = 0, }, #endif { .name = "corosync_quorum", .ver = 0, } }; /* * service exit and unlink schedwrk handler data structure */ struct seus_handler_data { hdb_handle_t service_handle; int service_engine; struct corosync_api_v1 *api; }; struct corosync_service_engine *ais_service[SERVICE_HANDLER_MAXIMUM_COUNT]; hdb_handle_t service_stats_handle[SERVICE_HANDLER_MAXIMUM_COUNT][64]; int ais_service_exiting[SERVICE_HANDLER_MAXIMUM_COUNT]; static hdb_handle_t object_internal_configuration_handle; static hdb_handle_t object_stats_services_handle; static void (*service_unlink_all_complete) (void) = NULL; static unsigned int default_services_requested (struct corosync_api_v1 *corosync_api) { hdb_handle_t object_service_handle; hdb_handle_t object_find_handle; char *value; /* * Don't link default services if they have been disabled */ corosync_api->object_find_create ( OBJECT_PARENT_HANDLE, "aisexec", strlen ("aisexec"), &object_find_handle); if (corosync_api->object_find_next ( object_find_handle, &object_service_handle) == 0) { if ( ! corosync_api->object_key_get (object_service_handle, "defaultservices", strlen ("defaultservices"), (void *)&value, NULL)) { if (value && strcmp (value, "no") == 0) { return 0; } } } corosync_api->object_find_destroy (object_find_handle); return (-1); } unsigned int corosync_service_link_and_init ( struct corosync_api_v1 *corosync_api, const char *service_name, unsigned int service_ver) { struct corosync_service_engine_iface_ver0 *iface_ver0; void *iface_ver0_p; hdb_handle_t handle; struct corosync_service_engine *service; int res; hdb_handle_t object_service_handle; hdb_handle_t object_stats_handle; int fn; char object_name[32]; char *name_sufix; uint64_t zero_64 = 0; /* * reference the service interface */ iface_ver0_p = NULL; res = lcr_ifact_reference ( &handle, service_name, service_ver, &iface_ver0_p, (void *)0); iface_ver0 = (struct corosync_service_engine_iface_ver0 *)iface_ver0_p; if (res == -1 || iface_ver0 == 0) { log_printf(LOGSYS_LEVEL_ERROR, "Service failed to load '%s'.\n", service_name); return (-1); } /* * Initialize service */ service = iface_ver0->corosync_get_service_engine_ver0(); ais_service[service->id] = service; if (service->config_init_fn) { res = service->config_init_fn (corosync_api); } if (service->exec_init_fn) { res = service->exec_init_fn (corosync_api); } /* * Store service in object database */ corosync_api->object_create (object_internal_configuration_handle, &object_service_handle, "service", strlen ("service")); corosync_api->object_key_create_typed (object_service_handle, "name", service_name, strlen (service_name) + 1, OBJDB_VALUETYPE_STRING); corosync_api->object_key_create_typed (object_service_handle, "ver", &service_ver, sizeof (service_ver), OBJDB_VALUETYPE_UINT32); res = corosync_api->object_key_create_typed (object_service_handle, "handle", &handle, sizeof (handle), OBJDB_VALUETYPE_UINT64); corosync_api->object_key_create_typed (object_service_handle, "service_id", &service->id, sizeof (service->id), OBJDB_VALUETYPE_UINT16); name_sufix = strrchr (service_name, '_'); if (name_sufix) name_sufix++; else name_sufix = (char*)service_name; corosync_api->object_create (object_stats_services_handle, &object_stats_handle, name_sufix, strlen (name_sufix)); corosync_api->object_key_create_typed (object_stats_handle, "service_id", &service->id, sizeof (service->id), OBJDB_VALUETYPE_INT16); for (fn = 0; fn < service->exec_engine_count; fn++) { snprintf (object_name, 32, "%d", fn); corosync_api->object_create (object_stats_handle, &service_stats_handle[service->id][fn], object_name, strlen (object_name)); corosync_api->object_key_create_typed (service_stats_handle[service->id][fn], "tx", &zero_64, sizeof (zero_64), OBJDB_VALUETYPE_UINT64); corosync_api->object_key_create_typed (service_stats_handle[service->id][fn], "rx", &zero_64, sizeof (zero_64), OBJDB_VALUETYPE_UINT64); } log_printf (LOGSYS_LEVEL_NOTICE, "Service engine loaded: %s [%d]\n", service->name, service->id); cs_ipcs_service_init(service); return (res); } static int service_priority_max(void) { int lpc = 0, max = 0; for(; lpc < SERVICE_HANDLER_MAXIMUM_COUNT; lpc++) { if(ais_service[lpc] != NULL && ais_service[lpc]->priority > max) { max = ais_service[lpc]->priority; } } return max; } /* * use the force */ static unsigned int corosync_service_unlink_priority ( struct corosync_api_v1 *corosync_api, int lowest_priority, int *current_priority, int *current_service_engine, hdb_handle_t *current_service_handle) { unsigned short *service_id; hdb_handle_t object_service_handle; hdb_handle_t object_find_handle; hdb_handle_t *found_service_handle; for(; *current_priority >= lowest_priority; *current_priority = *current_priority - 1) { for(*current_service_engine = 0; *current_service_engine < SERVICE_HANDLER_MAXIMUM_COUNT; *current_service_engine = *current_service_engine + 1) { if(ais_service[*current_service_engine] == NULL || ais_service[*current_service_engine]->priority != *current_priority) { continue; } /* * find service object in object database by service id * and unload it if possible. * * If the service engine's exec_exit_fn returns -1 indicating * it was busy, this function returns -1 and can be called again * at a later time (usually via the schedwrk api). */ corosync_api->object_find_create ( object_internal_configuration_handle, "service", strlen ("service"), &object_find_handle); while (corosync_api->object_find_next ( object_find_handle, &object_service_handle) == 0) { int res = corosync_api->object_key_get ( object_service_handle, "service_id", strlen ("service_id"), (void *)&service_id, NULL); if (res == 0 && *service_id == ais_service[*current_service_engine]->id) { if (ais_service[*service_id]->exec_exit_fn) { res = ais_service[*service_id]->exec_exit_fn (); if (res == -1) { corosync_api->object_find_destroy (object_find_handle); return (-1); } } res = corosync_api->object_key_get ( object_service_handle, "handle", strlen ("handle"), (void *)&found_service_handle, NULL); *current_service_handle = *found_service_handle; ais_service_exiting[*current_service_engine] = 1; corosync_api->object_find_destroy (object_find_handle); /* * Call should call this function again */ return (1); } } corosync_api->object_find_destroy (object_find_handle); } } /* * We finish unlink of all services -> no need to call this function again */ return (0); } static unsigned int service_unlink_and_exit ( struct corosync_api_v1 *corosync_api, const char *service_name, unsigned int service_ver) { hdb_handle_t object_service_handle; char *found_service_name; unsigned short *service_id; unsigned int *found_service_ver; hdb_handle_t object_find_handle; hdb_handle_t *found_service_handle; char *name_sufix; int res; name_sufix = strrchr (service_name, '_'); if (name_sufix) name_sufix++; else name_sufix = (char*)service_name; corosync_api->object_find_create ( object_stats_services_handle, name_sufix, strlen (name_sufix), &object_find_handle); if (corosync_api->object_find_next ( object_find_handle, &object_service_handle) == 0) { corosync_api->object_destroy (object_service_handle); } corosync_api->object_find_destroy (object_find_handle); corosync_api->object_find_create ( object_internal_configuration_handle, "service", strlen ("service"), &object_find_handle); while (corosync_api->object_find_next ( object_find_handle, &object_service_handle) == 0) { corosync_api->object_key_get (object_service_handle, "name", strlen ("name"), (void *)&found_service_name, NULL); if (strcmp (service_name, found_service_name) != 0) { continue; } corosync_api->object_key_get (object_service_handle, "ver", strlen ("ver"), (void *)&found_service_ver, NULL); /* * If service found and linked exit it */ if (service_ver != *found_service_ver) { continue; } corosync_api->object_key_get ( object_service_handle, "service_id", strlen ("service_id"), (void *)&service_id, NULL); if(service_id != NULL && *service_id < SERVICE_HANDLER_MAXIMUM_COUNT && ais_service[*service_id] != NULL) { corosync_api->object_find_destroy (object_find_handle); if (ais_service[*service_id]->exec_exit_fn) { res = ais_service[*service_id]->exec_exit_fn (); if (res == -1) { return (-1); } } log_printf(LOGSYS_LEVEL_NOTICE, "Service engine unloaded: %s\n", ais_service[*service_id]->name); ais_service[*service_id] = NULL; res = corosync_api->object_key_get ( object_service_handle, "handle", strlen ("handle"), (void *)&found_service_handle, NULL); cs_ipcs_service_destroy (*service_id); lcr_ifact_release (*found_service_handle); corosync_api->object_destroy (object_service_handle); } } corosync_api->object_find_destroy (object_find_handle); return (0); } /* * Links default services into the executive */ unsigned int corosync_service_defaults_link_and_init (struct corosync_api_v1 *corosync_api) { unsigned int i; hdb_handle_t object_service_handle; char *found_service_name; char *found_service_ver; unsigned int found_service_ver_atoi; hdb_handle_t object_find_handle; hdb_handle_t object_find2_handle; hdb_handle_t object_runtime_handle; corosync_api->object_find_create ( OBJECT_PARENT_HANDLE, "runtime", strlen ("runtime"), &object_find2_handle); if (corosync_api->object_find_next ( object_find2_handle, &object_runtime_handle) == 0) { corosync_api->object_create (object_runtime_handle, &object_stats_services_handle, "services", strlen ("services")); } corosync_api->object_create (OBJECT_PARENT_HANDLE, &object_internal_configuration_handle, "internal_configuration", strlen ("internal_configuration")); corosync_api->object_find_create ( OBJECT_PARENT_HANDLE, "service", strlen ("service"), &object_find_handle); while (corosync_api->object_find_next ( object_find_handle, &object_service_handle) == 0) { corosync_api->object_key_get (object_service_handle, "name", strlen ("name"), (void *)&found_service_name, NULL); found_service_ver = NULL; corosync_api->object_key_get (object_service_handle, "ver", strlen ("ver"), (void *)&found_service_ver, NULL); found_service_ver_atoi = (found_service_ver ? atoi (found_service_ver) : 0); corosync_service_link_and_init ( corosync_api, found_service_name, found_service_ver_atoi); } corosync_api->object_find_destroy (object_find_handle); if (default_services_requested (corosync_api) == 0) { return (0); } for (i = 0; i < sizeof (default_services) / sizeof (struct default_service); i++) { corosync_service_link_and_init ( corosync_api, default_services[i].name, default_services[i].ver); } return (0); } /* * Declaration of exit_schedwrk_handler, because of cycle * (service_exit_schedwrk_handler calls service_unlink_schedwrk_handler, and vice-versa) */ static void service_exit_schedwrk_handler (void *data); static void service_unlink_schedwrk_handler (void *data) { struct seus_handler_data *cb_data = (struct seus_handler_data *)data; /* * Exit all ipc connections dependent on this service */ if (cs_ipcs_service_destroy (cb_data->service_engine) == -1) { goto redo_this_function; } log_printf(LOGSYS_LEVEL_NOTICE, "Service engine unloaded: %s\n", ais_service[cb_data->service_engine]->name); ais_service[cb_data->service_engine] = NULL; lcr_ifact_release (cb_data->service_handle); - qb_loop_job_add(corosync_poll_handle_get(), + qb_loop_job_add(cs_poll_handle_get(), QB_LOOP_HIGH, data, service_exit_schedwrk_handler); return; redo_this_function: - qb_loop_job_add(corosync_poll_handle_get(), + qb_loop_job_add(cs_poll_handle_get(), QB_LOOP_HIGH, data, service_unlink_schedwrk_handler); } static void service_exit_schedwrk_handler (void *data) { int res; static int current_priority = 0; static int current_service_engine = 0; static int called = 0; struct seus_handler_data *cb_data = (struct seus_handler_data *)data; struct corosync_api_v1 *api = (struct corosync_api_v1 *)cb_data->api; hdb_handle_t service_handle; if (called == 0) { log_printf(LOGSYS_LEVEL_NOTICE, "Unloading all Corosync service engines.\n"); current_priority = service_priority_max (); called = 1; } res = corosync_service_unlink_priority ( api, 0, ¤t_priority, ¤t_service_engine, &service_handle); if (res == 0) { service_unlink_all_complete(); return; } if (res == 1) { cb_data->service_engine = current_service_engine; cb_data->service_handle = service_handle; - qb_loop_job_add(corosync_poll_handle_get(), + qb_loop_job_add(cs_poll_handle_get(), QB_LOOP_HIGH, data, service_unlink_schedwrk_handler); return; } - qb_loop_job_add(corosync_poll_handle_get(), + qb_loop_job_add(cs_poll_handle_get(), QB_LOOP_HIGH, data, service_exit_schedwrk_handler); } void corosync_service_unlink_all ( struct corosync_api_v1 *api, void (*unlink_all_complete) (void)) { static int called = 0; static struct seus_handler_data cb_data; assert (api); service_unlink_all_complete = unlink_all_complete; if (called) { return; } if (called == 0) { called = 1; } cb_data.api = api; - qb_loop_job_add(corosync_poll_handle_get(), + qb_loop_job_add(cs_poll_handle_get(), QB_LOOP_HIGH, &cb_data, service_exit_schedwrk_handler); } struct service_unlink_and_exit_data { hdb_handle_t handle; struct corosync_api_v1 *api; const char *name; unsigned int ver; }; static void service_unlink_and_exit_schedwrk_handler (void *data) { struct service_unlink_and_exit_data *service_unlink_and_exit_data = data; int res; res = service_unlink_and_exit ( service_unlink_and_exit_data->api, service_unlink_and_exit_data->name, service_unlink_and_exit_data->ver); if (res == 0) { free (service_unlink_and_exit_data); } else { - qb_loop_job_add(corosync_poll_handle_get(), + qb_loop_job_add(cs_poll_handle_get(), QB_LOOP_HIGH, data, service_unlink_and_exit_schedwrk_handler); } } typedef int (*schedwrk_cast) (const void *); unsigned int corosync_service_unlink_and_exit ( struct corosync_api_v1 *api, const char *service_name, unsigned int service_ver) { struct service_unlink_and_exit_data *service_unlink_and_exit_data; assert (api); service_unlink_and_exit_data = malloc (sizeof (struct service_unlink_and_exit_data)); service_unlink_and_exit_data->api = api; service_unlink_and_exit_data->name = strdup (service_name); service_unlink_and_exit_data->ver = service_ver; - qb_loop_job_add(corosync_poll_handle_get(), + qb_loop_job_add(cs_poll_handle_get(), QB_LOOP_HIGH, service_unlink_and_exit_data, service_unlink_and_exit_schedwrk_handler); return (0); } diff --git a/services/cpg.c b/services/cpg.c index 87cfaba9..e7573c5b 100644 --- a/services/cpg.c +++ b/services/cpg.c @@ -1,2058 +1,2054 @@ /* * Copyright (c) 2006-2009 Red Hat, Inc. * * All rights reserved. * * Author: Christine Caulfield (ccaulfie@redhat.com) * Author: Jan Friesse (jfriesse@redhat.com) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #ifdef HAVE_ALLOCA_H #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include LOGSYS_DECLARE_SUBSYS ("CPG"); #define GROUP_HASH_SIZE 32 enum cpg_message_req_types { MESSAGE_REQ_EXEC_CPG_PROCJOIN = 0, MESSAGE_REQ_EXEC_CPG_PROCLEAVE = 1, MESSAGE_REQ_EXEC_CPG_JOINLIST = 2, MESSAGE_REQ_EXEC_CPG_MCAST = 3, MESSAGE_REQ_EXEC_CPG_DOWNLIST_OLD = 4, MESSAGE_REQ_EXEC_CPG_DOWNLIST = 5 }; struct zcb_mapped { struct list_head list; void *addr; size_t size; }; /* * state` exec deliver * match group name, pid -> if matched deliver for YES: * XXX indicates impossible state * * join leave mcast * UNJOINED XXX XXX NO * LEAVE_STARTED XXX YES(unjoined_enter) YES * JOIN_STARTED YES(join_started_enter) XXX NO * JOIN_COMPLETED XXX NO YES * * join_started_enter * set JOIN_COMPLETED * add entry to process_info list * unjoined_enter * set UNJOINED * delete entry from process_info list * * * library accept join error codes * UNJOINED YES(CS_OK) set JOIN_STARTED * LEAVE_STARTED NO(CS_ERR_BUSY) * JOIN_STARTED NO(CS_ERR_EXIST) * JOIN_COMPlETED NO(CS_ERR_EXIST) * * library accept leave error codes * UNJOINED NO(CS_ERR_NOT_EXIST) * LEAVE_STARTED NO(CS_ERR_NOT_EXIST) * JOIN_STARTED NO(CS_ERR_BUSY) * JOIN_COMPLETED YES(CS_OK) set LEAVE_STARTED * * library accept mcast * UNJOINED NO(CS_ERR_NOT_EXIST) * LEAVE_STARTED NO(CS_ERR_NOT_EXIST) * JOIN_STARTED YES(CS_OK) * JOIN_COMPLETED YES(CS_OK) */ enum cpd_state { CPD_STATE_UNJOINED, CPD_STATE_LEAVE_STARTED, CPD_STATE_JOIN_STARTED, CPD_STATE_JOIN_COMPLETED }; enum cpg_sync_state { CPGSYNC_DOWNLIST, CPGSYNC_JOINLIST }; enum cpg_downlist_state_e { CPG_DOWNLIST_NONE, CPG_DOWNLIST_WAITING_FOR_MESSAGES, CPG_DOWNLIST_APPLYING, }; static enum cpg_downlist_state_e downlist_state; static struct list_head downlist_messages_head; struct cpg_pd { void *conn; mar_cpg_name_t group_name; uint32_t pid; enum cpd_state cpd_state; unsigned int flags; int initial_totem_conf_sent; struct list_head list; struct list_head iteration_instance_list_head; struct list_head zcb_mapped_list_head; }; struct cpg_iteration_instance { hdb_handle_t handle; struct list_head list; struct list_head items_list_head; /* List of process_info */ struct list_head *current_pointer; }; DECLARE_HDB_DATABASE(cpg_iteration_handle_t_db,NULL); DECLARE_LIST_INIT(cpg_pd_list_head); static unsigned int my_member_list[PROCESSOR_COUNT_MAX]; static unsigned int my_member_list_entries; static unsigned int my_old_member_list[PROCESSOR_COUNT_MAX]; static unsigned int my_old_member_list_entries = 0; static struct corosync_api_v1 *api = NULL; static enum cpg_sync_state my_sync_state = CPGSYNC_DOWNLIST; static mar_cpg_ring_id_t last_sync_ring_id; struct process_info { unsigned int nodeid; uint32_t pid; mar_cpg_name_t group; struct list_head list; /* on the group_info members list */ }; DECLARE_LIST_INIT(process_info_list_head); struct join_list_entry { uint32_t pid; mar_cpg_name_t group_name; }; /* * Service Interfaces required by service_message_handler struct */ static int cpg_exec_init_fn (struct corosync_api_v1 *); static int cpg_lib_init_fn (void *conn); static int cpg_lib_exit_fn (void *conn); static void message_handler_req_exec_cpg_procjoin ( const void *message, unsigned int nodeid); static void message_handler_req_exec_cpg_procleave ( const void *message, unsigned int nodeid); static void message_handler_req_exec_cpg_joinlist ( const void *message, unsigned int nodeid); static void message_handler_req_exec_cpg_mcast ( const void *message, unsigned int nodeid); static void message_handler_req_exec_cpg_downlist_old ( const void *message, unsigned int nodeid); static void message_handler_req_exec_cpg_downlist ( const void *message, unsigned int nodeid); static void exec_cpg_procjoin_endian_convert (void *msg); static void exec_cpg_joinlist_endian_convert (void *msg); static void exec_cpg_mcast_endian_convert (void *msg); static void exec_cpg_downlist_endian_convert_old (void *msg); static void exec_cpg_downlist_endian_convert (void *msg); static void message_handler_req_lib_cpg_join (void *conn, const void *message); static void message_handler_req_lib_cpg_leave (void *conn, const void *message); static void message_handler_req_lib_cpg_finalize (void *conn, const void *message); static void message_handler_req_lib_cpg_mcast (void *conn, const void *message); static void message_handler_req_lib_cpg_membership (void *conn, const void *message); static void message_handler_req_lib_cpg_local_get (void *conn, const void *message); static void message_handler_req_lib_cpg_iteration_initialize ( void *conn, const void *message); static void message_handler_req_lib_cpg_iteration_next ( void *conn, const void *message); static void message_handler_req_lib_cpg_iteration_finalize ( void *conn, const void *message); static void message_handler_req_lib_cpg_zc_alloc ( void *conn, const void *message); static void message_handler_req_lib_cpg_zc_free ( void *conn, const void *message); static void message_handler_req_lib_cpg_zc_execute ( void *conn, const void *message); static int cpg_node_joinleave_send (unsigned int pid, const mar_cpg_name_t *group_name, int fn, int reason); static int cpg_exec_send_downlist(void); static int cpg_exec_send_joinlist(void); static void downlist_messages_delete (void); static void downlist_master_choose_and_send (void); static void cpg_sync_init_v2 ( const unsigned int *trans_list, size_t trans_list_entries, const unsigned int *member_list, size_t member_list_entries, const struct memb_ring_id *ring_id); static int cpg_sync_process (void); static void cpg_sync_activate (void); static void cpg_sync_abort (void); static int notify_lib_totem_membership ( void *conn, int member_list_entries, const unsigned int *member_list); static inline int zcb_all_free ( struct cpg_pd *cpd); /* * Library Handler Definition */ static struct corosync_lib_handler cpg_lib_engine[] = { { /* 0 */ .lib_handler_fn = message_handler_req_lib_cpg_join, .flow_control = CS_LIB_FLOW_CONTROL_REQUIRED }, { /* 1 */ .lib_handler_fn = message_handler_req_lib_cpg_leave, .flow_control = CS_LIB_FLOW_CONTROL_REQUIRED }, { /* 2 */ .lib_handler_fn = message_handler_req_lib_cpg_mcast, .flow_control = CS_LIB_FLOW_CONTROL_REQUIRED }, { /* 3 */ .lib_handler_fn = message_handler_req_lib_cpg_membership, .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED }, { /* 4 */ .lib_handler_fn = message_handler_req_lib_cpg_local_get, .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED }, { /* 5 */ .lib_handler_fn = message_handler_req_lib_cpg_iteration_initialize, .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED }, { /* 6 */ .lib_handler_fn = message_handler_req_lib_cpg_iteration_next, .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED }, { /* 7 */ .lib_handler_fn = message_handler_req_lib_cpg_iteration_finalize, .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED }, { /* 8 */ .lib_handler_fn = message_handler_req_lib_cpg_finalize, .flow_control = CS_LIB_FLOW_CONTROL_REQUIRED }, { /* 9 */ .lib_handler_fn = message_handler_req_lib_cpg_zc_alloc, .flow_control = CS_LIB_FLOW_CONTROL_REQUIRED }, { /* 10 */ .lib_handler_fn = message_handler_req_lib_cpg_zc_free, .flow_control = CS_LIB_FLOW_CONTROL_REQUIRED }, { /* 11 */ .lib_handler_fn = message_handler_req_lib_cpg_zc_execute, .flow_control = CS_LIB_FLOW_CONTROL_REQUIRED }, }; static struct corosync_exec_handler cpg_exec_engine[] = { { /* 0 */ .exec_handler_fn = message_handler_req_exec_cpg_procjoin, .exec_endian_convert_fn = exec_cpg_procjoin_endian_convert }, { /* 1 */ .exec_handler_fn = message_handler_req_exec_cpg_procleave, .exec_endian_convert_fn = exec_cpg_procjoin_endian_convert }, { /* 2 */ .exec_handler_fn = message_handler_req_exec_cpg_joinlist, .exec_endian_convert_fn = exec_cpg_joinlist_endian_convert }, { /* 3 */ .exec_handler_fn = message_handler_req_exec_cpg_mcast, .exec_endian_convert_fn = exec_cpg_mcast_endian_convert }, { /* 4 */ .exec_handler_fn = message_handler_req_exec_cpg_downlist_old, .exec_endian_convert_fn = exec_cpg_downlist_endian_convert_old }, { /* 5 */ .exec_handler_fn = message_handler_req_exec_cpg_downlist, .exec_endian_convert_fn = exec_cpg_downlist_endian_convert }, }; struct corosync_service_engine cpg_service_engine = { .name = "corosync cluster closed process group service v1.01", .id = CPG_SERVICE, .priority = 1, .private_data_size = sizeof (struct cpg_pd), .flow_control = CS_LIB_FLOW_CONTROL_REQUIRED, .allow_inquorate = CS_LIB_ALLOW_INQUORATE, .lib_init_fn = cpg_lib_init_fn, .lib_exit_fn = cpg_lib_exit_fn, .lib_engine = cpg_lib_engine, .lib_engine_count = sizeof (cpg_lib_engine) / sizeof (struct corosync_lib_handler), .exec_init_fn = cpg_exec_init_fn, .exec_dump_fn = NULL, .exec_engine = cpg_exec_engine, .exec_engine_count = sizeof (cpg_exec_engine) / sizeof (struct corosync_exec_handler), .sync_mode = CS_SYNC_V1_APIV2, .sync_init = (sync_init_v1_fn_t)cpg_sync_init_v2, .sync_process = cpg_sync_process, .sync_activate = cpg_sync_activate, .sync_abort = cpg_sync_abort }; /* * Dynamic loader definition */ static struct corosync_service_engine *cpg_get_service_engine_ver0 (void); static struct corosync_service_engine_iface_ver0 cpg_service_engine_iface = { .corosync_get_service_engine_ver0 = cpg_get_service_engine_ver0 }; static struct lcr_iface corosync_cpg_ver0[1] = { { .name = "corosync_cpg", .version = 0, .versions_replace = 0, .versions_replace_count = 0, .dependencies = 0, .dependency_count = 0, .constructor = NULL, .destructor = NULL, .interfaces = NULL } }; static struct lcr_comp cpg_comp_ver0 = { .iface_count = 1, .ifaces = corosync_cpg_ver0 }; static struct corosync_service_engine *cpg_get_service_engine_ver0 (void) { return (&cpg_service_engine); } #ifdef COROSYNC_SOLARIS void corosync_lcr_component_register (void); void corosync_lcr_component_register (void) { #else __attribute__ ((constructor)) static void corosync_lcr_component_register (void) { #endif lcr_interfaces_set (&corosync_cpg_ver0[0], &cpg_service_engine_iface); lcr_component_register (&cpg_comp_ver0); } struct req_exec_cpg_procjoin { struct qb_ipc_request_header header __attribute__((aligned(8))); mar_cpg_name_t group_name __attribute__((aligned(8))); mar_uint32_t pid __attribute__((aligned(8))); mar_uint32_t reason __attribute__((aligned(8))); }; struct req_exec_cpg_mcast { struct qb_ipc_request_header header __attribute__((aligned(8))); mar_cpg_name_t group_name __attribute__((aligned(8))); mar_uint32_t msglen __attribute__((aligned(8))); mar_uint32_t pid __attribute__((aligned(8))); mar_message_source_t source __attribute__((aligned(8))); mar_uint8_t message[] __attribute__((aligned(8))); }; struct req_exec_cpg_downlist_old { struct qb_ipc_request_header header __attribute__((aligned(8))); mar_uint32_t left_nodes __attribute__((aligned(8))); mar_uint32_t nodeids[PROCESSOR_COUNT_MAX] __attribute__((aligned(8))); }; struct req_exec_cpg_downlist { struct qb_ipc_request_header header __attribute__((aligned(8))); /* merge decisions */ mar_uint32_t old_members __attribute__((aligned(8))); /* downlist below */ mar_uint32_t left_nodes __attribute__((aligned(8))); mar_uint32_t nodeids[PROCESSOR_COUNT_MAX] __attribute__((aligned(8))); }; struct downlist_msg { mar_uint32_t sender_nodeid; mar_uint32_t old_members __attribute__((aligned(8))); mar_uint32_t left_nodes __attribute__((aligned(8))); mar_uint32_t nodeids[PROCESSOR_COUNT_MAX] __attribute__((aligned(8))); struct list_head list; }; static struct req_exec_cpg_downlist g_req_exec_cpg_downlist; static void cpg_sync_init_v2 ( const unsigned int *trans_list, size_t trans_list_entries, const unsigned int *member_list, size_t member_list_entries, const struct memb_ring_id *ring_id) { int entries; int i, j; int found; my_sync_state = CPGSYNC_DOWNLIST; memcpy (my_member_list, member_list, member_list_entries * sizeof (unsigned int)); my_member_list_entries = member_list_entries; last_sync_ring_id.nodeid = ring_id->rep.nodeid; last_sync_ring_id.seq = ring_id->seq; downlist_state = CPG_DOWNLIST_WAITING_FOR_MESSAGES; entries = 0; /* * Determine list of nodeids for downlist message */ for (i = 0; i < my_old_member_list_entries; i++) { found = 0; for (j = 0; j < trans_list_entries; j++) { if (my_old_member_list[i] == trans_list[j]) { found = 1; break; } } if (found == 0) { g_req_exec_cpg_downlist.nodeids[entries++] = my_old_member_list[i]; } } g_req_exec_cpg_downlist.left_nodes = entries; } static int cpg_sync_process (void) { int res = -1; if (my_sync_state == CPGSYNC_DOWNLIST) { res = cpg_exec_send_downlist(); if (res == -1) { return (-1); } my_sync_state = CPGSYNC_JOINLIST; } if (my_sync_state == CPGSYNC_JOINLIST) { res = cpg_exec_send_joinlist(); } return (res); } static void cpg_sync_activate (void) { memcpy (my_old_member_list, my_member_list, my_member_list_entries * sizeof (unsigned int)); my_old_member_list_entries = my_member_list_entries; if (downlist_state == CPG_DOWNLIST_WAITING_FOR_MESSAGES) { downlist_master_choose_and_send (); } downlist_messages_delete (); downlist_state = CPG_DOWNLIST_NONE; notify_lib_totem_membership (NULL, my_member_list_entries, my_member_list); } static void cpg_sync_abort (void) { downlist_state = CPG_DOWNLIST_NONE; downlist_messages_delete (); } static int notify_lib_totem_membership ( void *conn, int member_list_entries, const unsigned int *member_list) { struct list_head *iter; char *buf; int size; struct res_lib_cpg_totem_confchg_callback *res; size = sizeof(struct res_lib_cpg_totem_confchg_callback) + sizeof(mar_uint32_t) * (member_list_entries); buf = alloca(size); if (!buf) return CS_ERR_LIBRARY; res = (struct res_lib_cpg_totem_confchg_callback *)buf; res->member_list_entries = member_list_entries; res->header.size = size; res->header.id = MESSAGE_RES_CPG_TOTEM_CONFCHG_CALLBACK; res->header.error = CS_OK; memcpy (&res->ring_id, &last_sync_ring_id, sizeof (mar_cpg_ring_id_t)); memcpy (res->member_list, member_list, res->member_list_entries * sizeof (mar_uint32_t)); if (conn == NULL) { for (iter = cpg_pd_list_head.next; iter != &cpg_pd_list_head; iter = iter->next) { struct cpg_pd *cpg_pd = list_entry (iter, struct cpg_pd, list); api->ipc_dispatch_send (cpg_pd->conn, buf, size); } } else { api->ipc_dispatch_send (conn, buf, size); } return CS_OK; } static int notify_lib_joinlist( const mar_cpg_name_t *group_name, void *conn, int joined_list_entries, mar_cpg_address_t *joined_list, int left_list_entries, mar_cpg_address_t *left_list, int id) { int size; char *buf; struct list_head *iter; int count; struct res_lib_cpg_confchg_callback *res; mar_cpg_address_t *retgi; count = 0; for (iter = process_info_list_head.next; iter != &process_info_list_head; iter = iter->next) { struct process_info *pi = list_entry (iter, struct process_info, list); if (mar_name_compare (&pi->group, group_name) == 0) { int i; int founded = 0; for (i = 0; i < left_list_entries; i++) { if (left_list[i].nodeid == pi->nodeid && left_list[i].pid == pi->pid) { founded++; } } if (!founded) count++; } } size = sizeof(struct res_lib_cpg_confchg_callback) + sizeof(mar_cpg_address_t) * (count + left_list_entries + joined_list_entries); buf = alloca(size); if (!buf) return CS_ERR_LIBRARY; res = (struct res_lib_cpg_confchg_callback *)buf; res->joined_list_entries = joined_list_entries; res->left_list_entries = left_list_entries; res->member_list_entries = count; retgi = res->member_list; res->header.size = size; res->header.id = id; res->header.error = CS_OK; memcpy(&res->group_name, group_name, sizeof(mar_cpg_name_t)); for (iter = process_info_list_head.next; iter != &process_info_list_head; iter = iter->next) { struct process_info *pi=list_entry (iter, struct process_info, list); if (mar_name_compare (&pi->group, group_name) == 0) { int i; int founded = 0; for (i = 0;i < left_list_entries; i++) { if (left_list[i].nodeid == pi->nodeid && left_list[i].pid == pi->pid) { founded++; } } if (!founded) { retgi->nodeid = pi->nodeid; retgi->pid = pi->pid; retgi++; } } } if (left_list_entries) { memcpy (retgi, left_list, left_list_entries * sizeof(mar_cpg_address_t)); retgi += left_list_entries; } if (joined_list_entries) { memcpy (retgi, joined_list, joined_list_entries * sizeof(mar_cpg_address_t)); retgi += joined_list_entries; } if (conn) { api->ipc_dispatch_send (conn, buf, size); } else { for (iter = cpg_pd_list_head.next; iter != &cpg_pd_list_head; iter = iter->next) { struct cpg_pd *cpd = list_entry (iter, struct cpg_pd, list); if (mar_name_compare (&cpd->group_name, group_name) == 0) { assert (left_list_entries <= 1); assert (joined_list_entries <= 1); if (joined_list_entries) { if (joined_list[0].pid == cpd->pid && joined_list[0].nodeid == api->totem_nodeid_get()) { cpd->cpd_state = CPD_STATE_JOIN_COMPLETED; } } if (cpd->cpd_state == CPD_STATE_JOIN_COMPLETED || cpd->cpd_state == CPD_STATE_LEAVE_STARTED) { api->ipc_dispatch_send (cpd->conn, buf, size); } if (left_list_entries) { if (left_list[0].pid == cpd->pid && left_list[0].nodeid == api->totem_nodeid_get()) { cpd->pid = 0; memset (&cpd->group_name, 0, sizeof(cpd->group_name)); cpd->cpd_state = CPD_STATE_UNJOINED; } } } } } /* * Traverse thru cpds and send totem membership for cpd, where it is not send yet */ for (iter = cpg_pd_list_head.next; iter != &cpg_pd_list_head; iter = iter->next) { struct cpg_pd *cpd = list_entry (iter, struct cpg_pd, list); if ((cpd->flags & CPG_MODEL_V1_DELIVER_INITIAL_TOTEM_CONF) && (cpd->initial_totem_conf_sent == 0)) { cpd->initial_totem_conf_sent = 1; notify_lib_totem_membership (cpd->conn, my_old_member_list_entries, my_old_member_list); } } return CS_OK; } static void downlist_log(int loglevel, const char *msg, struct downlist_msg* dl) { log_printf (loglevel, "%s: sender %s; members(old:%d left:%d)", msg, api->totem_ifaces_print(dl->sender_nodeid), dl->old_members, dl->left_nodes); } static struct downlist_msg* downlist_master_choose (void) { struct downlist_msg *cmp; struct downlist_msg *best = NULL; struct list_head *iter; uint32_t cmp_members; uint32_t best_members; for (iter = downlist_messages_head.next; iter != &downlist_messages_head; iter = iter->next) { cmp = list_entry(iter, struct downlist_msg, list); downlist_log(LOGSYS_LEVEL_DEBUG, "comparing", cmp); if (best == NULL) { best = cmp; continue; } best_members = best->old_members - best->left_nodes; cmp_members = cmp->old_members - cmp->left_nodes; if (cmp_members < best_members) { continue; } else if (cmp_members > best_members) { best = cmp; } else if (cmp->sender_nodeid < best->sender_nodeid) { best = cmp; } } return best; } static void downlist_master_choose_and_send (void) { struct downlist_msg *stored_msg; struct list_head *iter; mar_cpg_address_t left_list; int i; downlist_state = CPG_DOWNLIST_APPLYING; stored_msg = downlist_master_choose (); if (!stored_msg) { log_printf (LOGSYS_LEVEL_DEBUG, "NO chosen downlist"); return; } downlist_log(LOGSYS_LEVEL_DEBUG, "chosen downlist", stored_msg); /* send events */ for (iter = process_info_list_head.next; iter != &process_info_list_head; ) { struct process_info *pi = list_entry(iter, struct process_info, list); iter = iter->next; for (i = 0; i < stored_msg->left_nodes; i++) { if (pi->nodeid == stored_msg->nodeids[i]) { left_list.nodeid = pi->nodeid; left_list.pid = pi->pid; left_list.reason = CONFCHG_CPG_REASON_NODEDOWN; notify_lib_joinlist(&pi->group, NULL, 0, NULL, 1, &left_list, MESSAGE_RES_CPG_CONFCHG_CALLBACK); list_del (&pi->list); free (pi); break; } } } } static void downlist_messages_delete (void) { struct downlist_msg *stored_msg; struct list_head *iter, *iter_next; for (iter = downlist_messages_head.next; iter != &downlist_messages_head; iter = iter_next) { iter_next = iter->next; stored_msg = list_entry(iter, struct downlist_msg, list); list_del (&stored_msg->list); free (stored_msg); } } static int cpg_exec_init_fn (struct corosync_api_v1 *corosync_api) { #ifdef COROSYNC_SOLARIS logsys_subsys_init(); #endif list_init (&downlist_messages_head); api = corosync_api; return (0); } static void cpg_iteration_instance_finalize (struct cpg_iteration_instance *cpg_iteration_instance) { struct list_head *iter, *iter_next; struct process_info *pi; for (iter = cpg_iteration_instance->items_list_head.next; iter != &cpg_iteration_instance->items_list_head; iter = iter_next) { iter_next = iter->next; pi = list_entry (iter, struct process_info, list); list_del (&pi->list); free (pi); } list_del (&cpg_iteration_instance->list); hdb_handle_destroy (&cpg_iteration_handle_t_db, cpg_iteration_instance->handle); } static void cpg_pd_finalize (struct cpg_pd *cpd) { struct list_head *iter, *iter_next; struct cpg_iteration_instance *cpii; zcb_all_free(cpd); for (iter = cpd->iteration_instance_list_head.next; iter != &cpd->iteration_instance_list_head; iter = iter_next) { iter_next = iter->next; cpii = list_entry (iter, struct cpg_iteration_instance, list); cpg_iteration_instance_finalize (cpii); } list_del (&cpd->list); } static int cpg_lib_exit_fn (void *conn) { struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn); log_printf(LOGSYS_LEVEL_DEBUG, "exit_fn for conn=%p\n", conn); if (cpd->group_name.length > 0) { cpg_node_joinleave_send (cpd->pid, &cpd->group_name, MESSAGE_REQ_EXEC_CPG_PROCLEAVE, CONFCHG_CPG_REASON_PROCDOWN); } cpg_pd_finalize (cpd); api->ipc_refcnt_dec (conn); return (0); } static int cpg_node_joinleave_send (unsigned int pid, const mar_cpg_name_t *group_name, int fn, int reason) { struct req_exec_cpg_procjoin req_exec_cpg_procjoin; struct iovec req_exec_cpg_iovec; int result; memcpy(&req_exec_cpg_procjoin.group_name, group_name, sizeof(mar_cpg_name_t)); req_exec_cpg_procjoin.pid = pid; req_exec_cpg_procjoin.reason = reason; req_exec_cpg_procjoin.header.size = sizeof(req_exec_cpg_procjoin); req_exec_cpg_procjoin.header.id = SERVICE_ID_MAKE(CPG_SERVICE, fn); req_exec_cpg_iovec.iov_base = (char *)&req_exec_cpg_procjoin; req_exec_cpg_iovec.iov_len = sizeof(req_exec_cpg_procjoin); result = api->totem_mcast (&req_exec_cpg_iovec, 1, TOTEM_AGREED); return (result); } /* Can byteswap join & leave messages */ static void exec_cpg_procjoin_endian_convert (void *msg) { struct req_exec_cpg_procjoin *req_exec_cpg_procjoin = msg; req_exec_cpg_procjoin->pid = swab32(req_exec_cpg_procjoin->pid); swab_mar_cpg_name_t (&req_exec_cpg_procjoin->group_name); req_exec_cpg_procjoin->reason = swab32(req_exec_cpg_procjoin->reason); } static void exec_cpg_joinlist_endian_convert (void *msg_v) { char *msg = msg_v; struct qb_ipc_response_header *res = (struct qb_ipc_response_header *)msg; struct join_list_entry *jle = (struct join_list_entry *)(msg + sizeof(struct qb_ipc_response_header)); swab_mar_int32_t (&res->size); while ((const char*)jle < msg + res->size) { jle->pid = swab32(jle->pid); swab_mar_cpg_name_t (&jle->group_name); jle++; } } static void exec_cpg_downlist_endian_convert_old (void *msg) { } static void exec_cpg_downlist_endian_convert (void *msg) { struct req_exec_cpg_downlist *req_exec_cpg_downlist = msg; unsigned int i; req_exec_cpg_downlist->left_nodes = swab32(req_exec_cpg_downlist->left_nodes); req_exec_cpg_downlist->old_members = swab32(req_exec_cpg_downlist->old_members); for (i = 0; i < req_exec_cpg_downlist->left_nodes; i++) { req_exec_cpg_downlist->nodeids[i] = swab32(req_exec_cpg_downlist->nodeids[i]); } } static void exec_cpg_mcast_endian_convert (void *msg) { struct req_exec_cpg_mcast *req_exec_cpg_mcast = msg; swab_coroipc_request_header_t (&req_exec_cpg_mcast->header); swab_mar_cpg_name_t (&req_exec_cpg_mcast->group_name); req_exec_cpg_mcast->pid = swab32(req_exec_cpg_mcast->pid); req_exec_cpg_mcast->msglen = swab32(req_exec_cpg_mcast->msglen); swab_mar_message_source_t (&req_exec_cpg_mcast->source); } static struct process_info *process_info_find(const mar_cpg_name_t *group_name, uint32_t pid, unsigned int nodeid) { struct list_head *iter; for (iter = process_info_list_head.next; iter != &process_info_list_head; ) { struct process_info *pi = list_entry (iter, struct process_info, list); iter = iter->next; if (pi->pid == pid && pi->nodeid == nodeid && mar_name_compare (&pi->group, group_name) == 0) { return pi; } } return NULL; } static void do_proc_join( const mar_cpg_name_t *name, uint32_t pid, unsigned int nodeid, int reason) { struct process_info *pi; struct process_info *pi_entry; mar_cpg_address_t notify_info; struct list_head *list; struct list_head *list_to_add = NULL; if (process_info_find (name, pid, nodeid) != NULL) { return ; } pi = malloc (sizeof (struct process_info)); if (!pi) { log_printf(LOGSYS_LEVEL_WARNING, "Unable to allocate process_info struct"); return; } pi->nodeid = nodeid; pi->pid = pid; memcpy(&pi->group, name, sizeof(*name)); list_init(&pi->list); /* * Insert new process in sorted order so synchronization works properly */ list_to_add = &process_info_list_head; for (list = process_info_list_head.next; list != &process_info_list_head; list = list->next) { pi_entry = list_entry(list, struct process_info, list); if (pi_entry->nodeid > pi->nodeid || (pi_entry->nodeid == pi->nodeid && pi_entry->pid > pi->pid)) { break; } list_to_add = list; } list_add (&pi->list, list_to_add); notify_info.pid = pi->pid; notify_info.nodeid = nodeid; notify_info.reason = reason; notify_lib_joinlist(&pi->group, NULL, 1, ¬ify_info, 0, NULL, MESSAGE_RES_CPG_CONFCHG_CALLBACK); } static void message_handler_req_exec_cpg_downlist_old ( const void *message, unsigned int nodeid) { log_printf (LOGSYS_LEVEL_WARNING, "downlist OLD from node %d", nodeid); } static void message_handler_req_exec_cpg_downlist( const void *message, unsigned int nodeid) { const struct req_exec_cpg_downlist *req_exec_cpg_downlist = message; int i; struct list_head *iter; struct downlist_msg *stored_msg; int found; if (downlist_state != CPG_DOWNLIST_WAITING_FOR_MESSAGES) { log_printf (LOGSYS_LEVEL_WARNING, "downlist left_list: %d received in state %d", req_exec_cpg_downlist->left_nodes, downlist_state); return; } stored_msg = malloc (sizeof (struct downlist_msg)); stored_msg->sender_nodeid = nodeid; stored_msg->old_members = req_exec_cpg_downlist->old_members; stored_msg->left_nodes = req_exec_cpg_downlist->left_nodes; memcpy (stored_msg->nodeids, req_exec_cpg_downlist->nodeids, req_exec_cpg_downlist->left_nodes * sizeof (mar_uint32_t)); list_init (&stored_msg->list); list_add (&stored_msg->list, &downlist_messages_head); for (i = 0; i < my_member_list_entries; i++) { found = 0; for (iter = downlist_messages_head.next; iter != &downlist_messages_head; iter = iter->next) { stored_msg = list_entry(iter, struct downlist_msg, list); if (my_member_list[i] == stored_msg->sender_nodeid) { found = 1; } } if (!found) { return; } } downlist_master_choose_and_send (); } static void message_handler_req_exec_cpg_procjoin ( const void *message, unsigned int nodeid) { const struct req_exec_cpg_procjoin *req_exec_cpg_procjoin = message; log_printf(LOGSYS_LEVEL_DEBUG, "got procjoin message from cluster node %d\n", nodeid); do_proc_join (&req_exec_cpg_procjoin->group_name, req_exec_cpg_procjoin->pid, nodeid, CONFCHG_CPG_REASON_JOIN); } static void message_handler_req_exec_cpg_procleave ( const void *message, unsigned int nodeid) { const struct req_exec_cpg_procjoin *req_exec_cpg_procjoin = message; struct process_info *pi; struct list_head *iter; mar_cpg_address_t notify_info; log_printf(LOGSYS_LEVEL_DEBUG, "got procleave message from cluster node %d\n", nodeid); notify_info.pid = req_exec_cpg_procjoin->pid; notify_info.nodeid = nodeid; notify_info.reason = req_exec_cpg_procjoin->reason; notify_lib_joinlist(&req_exec_cpg_procjoin->group_name, NULL, 0, NULL, 1, ¬ify_info, MESSAGE_RES_CPG_CONFCHG_CALLBACK); for (iter = process_info_list_head.next; iter != &process_info_list_head; ) { pi = list_entry(iter, struct process_info, list); iter = iter->next; if (pi->pid == req_exec_cpg_procjoin->pid && pi->nodeid == nodeid && mar_name_compare (&pi->group, &req_exec_cpg_procjoin->group_name)==0) { list_del (&pi->list); free (pi); } } } /* Got a proclist from another node */ static void message_handler_req_exec_cpg_joinlist ( const void *message_v, unsigned int nodeid) { const char *message = message_v; const struct qb_ipc_response_header *res = (const struct qb_ipc_response_header *)message; const struct join_list_entry *jle = (const struct join_list_entry *)(message + sizeof(struct qb_ipc_response_header)); log_printf(LOGSYS_LEVEL_DEBUG, "got joinlist message from node %x\n", nodeid); /* Ignore our own messages */ if (nodeid == api->totem_nodeid_get()) { return; } while ((const char*)jle < message + res->size) { do_proc_join (&jle->group_name, jle->pid, nodeid, CONFCHG_CPG_REASON_NODEUP); jle++; } } static void message_handler_req_exec_cpg_mcast ( const void *message, unsigned int nodeid) { const struct req_exec_cpg_mcast *req_exec_cpg_mcast = message; struct res_lib_cpg_deliver_callback res_lib_cpg_mcast; int msglen = req_exec_cpg_mcast->msglen; struct list_head *iter, *pi_iter; struct cpg_pd *cpd; struct iovec iovec[2]; int known_node = 0; res_lib_cpg_mcast.header.id = MESSAGE_RES_CPG_DELIVER_CALLBACK; res_lib_cpg_mcast.header.size = sizeof(res_lib_cpg_mcast) + msglen; res_lib_cpg_mcast.msglen = msglen; res_lib_cpg_mcast.pid = req_exec_cpg_mcast->pid; res_lib_cpg_mcast.nodeid = nodeid; memcpy(&res_lib_cpg_mcast.group_name, &req_exec_cpg_mcast->group_name, sizeof(mar_cpg_name_t)); iovec[0].iov_base = (void *)&res_lib_cpg_mcast; iovec[0].iov_len = sizeof (res_lib_cpg_mcast); iovec[1].iov_base = (char*)message+sizeof(*req_exec_cpg_mcast); iovec[1].iov_len = msglen; for (iter = cpg_pd_list_head.next; iter != &cpg_pd_list_head; ) { cpd = list_entry(iter, struct cpg_pd, list); iter = iter->next; if ((cpd->cpd_state == CPD_STATE_LEAVE_STARTED || cpd->cpd_state == CPD_STATE_JOIN_COMPLETED) && (mar_name_compare (&cpd->group_name, &req_exec_cpg_mcast->group_name) == 0)) { if (!known_node) { /* Try to find, if we know the node */ for (pi_iter = process_info_list_head.next; pi_iter != &process_info_list_head; pi_iter = pi_iter->next) { struct process_info *pi = list_entry (pi_iter, struct process_info, list); if (pi->nodeid == nodeid && mar_name_compare (&pi->group, &req_exec_cpg_mcast->group_name) == 0) { known_node = 1; break; } } } if (!known_node) { log_printf(LOGSYS_LEVEL_WARNING, "Unknown node -> we will not deliver message"); return ; } api->ipc_dispatch_iov_send (cpd->conn, iovec, 2); } } } static int cpg_exec_send_downlist(void) { struct iovec iov; g_req_exec_cpg_downlist.header.id = SERVICE_ID_MAKE(CPG_SERVICE, MESSAGE_REQ_EXEC_CPG_DOWNLIST); g_req_exec_cpg_downlist.header.size = sizeof(struct req_exec_cpg_downlist); g_req_exec_cpg_downlist.old_members = my_old_member_list_entries; iov.iov_base = (void *)&g_req_exec_cpg_downlist; iov.iov_len = g_req_exec_cpg_downlist.header.size; return (api->totem_mcast (&iov, 1, TOTEM_AGREED)); } static int cpg_exec_send_joinlist(void) { int count = 0; struct list_head *iter; struct qb_ipc_response_header *res; char *buf; struct join_list_entry *jle; struct iovec req_exec_cpg_iovec; for (iter = process_info_list_head.next; iter != &process_info_list_head; iter = iter->next) { struct process_info *pi = list_entry (iter, struct process_info, list); if (pi->nodeid == api->totem_nodeid_get ()) { count++; } } /* Nothing to send */ if (!count) return 0; buf = alloca(sizeof(struct qb_ipc_response_header) + sizeof(struct join_list_entry) * count); if (!buf) { log_printf(LOGSYS_LEVEL_WARNING, "Unable to allocate joinlist buffer"); return -1; } jle = (struct join_list_entry *)(buf + sizeof(struct qb_ipc_response_header)); res = (struct qb_ipc_response_header *)buf; for (iter = process_info_list_head.next; iter != &process_info_list_head; iter = iter->next) { struct process_info *pi = list_entry (iter, struct process_info, list); if (pi->nodeid == api->totem_nodeid_get ()) { memcpy (&jle->group_name, &pi->group, sizeof (mar_cpg_name_t)); jle->pid = pi->pid; jle++; } } res->id = SERVICE_ID_MAKE(CPG_SERVICE, MESSAGE_REQ_EXEC_CPG_JOINLIST); res->size = sizeof(struct qb_ipc_response_header)+sizeof(struct join_list_entry) * count; req_exec_cpg_iovec.iov_base = buf; req_exec_cpg_iovec.iov_len = res->size; return (api->totem_mcast (&req_exec_cpg_iovec, 1, TOTEM_AGREED)); } static int cpg_lib_init_fn (void *conn) { struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn); memset (cpd, 0, sizeof(struct cpg_pd)); cpd->conn = conn; list_add (&cpd->list, &cpg_pd_list_head); list_init (&cpd->iteration_instance_list_head); list_init (&cpd->zcb_mapped_list_head); api->ipc_refcnt_inc (conn); log_printf(LOGSYS_LEVEL_DEBUG, "lib_init_fn: conn=%p, cpd=%p\n", conn, cpd); return (0); } /* Join message from the library */ static void message_handler_req_lib_cpg_join (void *conn, const void *message) { const struct req_lib_cpg_join *req_lib_cpg_join = message; struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn); struct res_lib_cpg_join res_lib_cpg_join; cs_error_t error = CS_OK; struct list_head *iter; /* Test, if we don't have same pid and group name joined */ for (iter = cpg_pd_list_head.next; iter != &cpg_pd_list_head; iter = iter->next) { struct cpg_pd *cpd_item = list_entry (iter, struct cpg_pd, list); if (cpd_item->pid == req_lib_cpg_join->pid && mar_name_compare(&req_lib_cpg_join->group_name, &cpd_item->group_name) == 0) { /* We have same pid and group name joined -> return error */ error = CS_ERR_EXIST; goto response_send; } } /* * Same check must be done in process info list, because there may be not yet delivered * leave of client. */ for (iter = process_info_list_head.next; iter != &process_info_list_head; iter = iter->next) { struct process_info *pi = list_entry (iter, struct process_info, list); if (pi->nodeid == api->totem_nodeid_get () && pi->pid == req_lib_cpg_join->pid && mar_name_compare(&req_lib_cpg_join->group_name, &pi->group) == 0) { /* We have same pid and group name joined -> return error */ error = CS_ERR_TRY_AGAIN; goto response_send; } } switch (cpd->cpd_state) { case CPD_STATE_UNJOINED: error = CS_OK; cpd->cpd_state = CPD_STATE_JOIN_STARTED; cpd->pid = req_lib_cpg_join->pid; cpd->flags = req_lib_cpg_join->flags; memcpy (&cpd->group_name, &req_lib_cpg_join->group_name, sizeof (cpd->group_name)); cpg_node_joinleave_send (req_lib_cpg_join->pid, &req_lib_cpg_join->group_name, MESSAGE_REQ_EXEC_CPG_PROCJOIN, CONFCHG_CPG_REASON_JOIN); break; case CPD_STATE_LEAVE_STARTED: error = CS_ERR_BUSY; break; case CPD_STATE_JOIN_STARTED: error = CS_ERR_EXIST; break; case CPD_STATE_JOIN_COMPLETED: error = CS_ERR_EXIST; break; } response_send: res_lib_cpg_join.header.size = sizeof(res_lib_cpg_join); res_lib_cpg_join.header.id = MESSAGE_RES_CPG_JOIN; res_lib_cpg_join.header.error = error; api->ipc_response_send (conn, &res_lib_cpg_join, sizeof(res_lib_cpg_join)); } /* Leave message from the library */ static void message_handler_req_lib_cpg_leave (void *conn, const void *message) { struct res_lib_cpg_leave res_lib_cpg_leave; cs_error_t error = CS_OK; struct req_lib_cpg_leave *req_lib_cpg_leave = (struct req_lib_cpg_leave *)message; struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn); log_printf(LOGSYS_LEVEL_DEBUG, "got leave request on %p\n", conn); switch (cpd->cpd_state) { case CPD_STATE_UNJOINED: error = CS_ERR_NOT_EXIST; break; case CPD_STATE_LEAVE_STARTED: error = CS_ERR_NOT_EXIST; break; case CPD_STATE_JOIN_STARTED: error = CS_ERR_BUSY; break; case CPD_STATE_JOIN_COMPLETED: error = CS_OK; cpd->cpd_state = CPD_STATE_LEAVE_STARTED; cpg_node_joinleave_send (req_lib_cpg_leave->pid, &req_lib_cpg_leave->group_name, MESSAGE_REQ_EXEC_CPG_PROCLEAVE, CONFCHG_CPG_REASON_LEAVE); break; } /* send return */ res_lib_cpg_leave.header.size = sizeof(res_lib_cpg_leave); res_lib_cpg_leave.header.id = MESSAGE_RES_CPG_LEAVE; res_lib_cpg_leave.header.error = error; api->ipc_response_send(conn, &res_lib_cpg_leave, sizeof(res_lib_cpg_leave)); } /* Finalize message from library */ static void message_handler_req_lib_cpg_finalize ( void *conn, const void *message) { struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn); struct res_lib_cpg_finalize res_lib_cpg_finalize; cs_error_t error = CS_OK; log_printf (LOGSYS_LEVEL_DEBUG, "cpg finalize for conn=%p\n", conn); /* * We will just remove cpd from list. After this call, connection will be * closed on lib side, and cpg_lib_exit_fn will be called */ list_del (&cpd->list); list_init (&cpd->list); res_lib_cpg_finalize.header.size = sizeof (res_lib_cpg_finalize); res_lib_cpg_finalize.header.id = MESSAGE_RES_CPG_FINALIZE; res_lib_cpg_finalize.header.error = error; api->ipc_response_send (conn, &res_lib_cpg_finalize, sizeof (res_lib_cpg_finalize)); } static int memory_map ( const char *path, size_t bytes, void **buf) { int32_t fd; void *addr_orig; void *addr; int32_t res; fd = open (path, O_RDWR, 0600); unlink (path); if (fd == -1) { return (-1); } res = ftruncate (fd, bytes); if (res == -1) { goto error_close_unlink; } addr_orig = mmap (NULL, bytes, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); if (addr_orig == MAP_FAILED) { goto error_close_unlink; } addr = mmap (addr_orig, bytes, PROT_READ | PROT_WRITE, MAP_FIXED | MAP_SHARED, fd, 0); if (addr != addr_orig) { munmap(addr_orig, bytes); goto error_close_unlink; } #ifdef COROSYNC_BSD madvise(addr, bytes, MADV_NOSYNC); #endif res = close (fd); if (res) { return (-1); } *buf = addr_orig; return (0); error_close_unlink: close (fd); unlink(path); return -1; } static inline int zcb_alloc ( struct cpg_pd *cpd, const char *path_to_file, size_t size, void **addr) { struct zcb_mapped *zcb_mapped; unsigned int res; zcb_mapped = malloc (sizeof (struct zcb_mapped)); if (zcb_mapped == NULL) { return (-1); } res = memory_map ( path_to_file, size, addr); if (res == -1) { free (zcb_mapped); return (-1); } list_init (&zcb_mapped->list); zcb_mapped->addr = *addr; zcb_mapped->size = size; list_add_tail (&zcb_mapped->list, &cpd->zcb_mapped_list_head); return (0); } static inline int zcb_free (struct zcb_mapped *zcb_mapped) { unsigned int res; res = munmap (zcb_mapped->addr, zcb_mapped->size); list_del (&zcb_mapped->list); free (zcb_mapped); return (res); } static inline int zcb_by_addr_free (struct cpg_pd *cpd, void *addr) { struct list_head *list; struct zcb_mapped *zcb_mapped; unsigned int res = 0; for (list = cpd->zcb_mapped_list_head.next; list != &cpd->zcb_mapped_list_head; list = list->next) { zcb_mapped = list_entry (list, struct zcb_mapped, list); if (zcb_mapped->addr == addr) { res = zcb_free (zcb_mapped); break; } } return (res); } static inline int zcb_all_free ( struct cpg_pd *cpd) { struct list_head *list; struct zcb_mapped *zcb_mapped; for (list = cpd->zcb_mapped_list_head.next; list != &cpd->zcb_mapped_list_head;) { zcb_mapped = list_entry (list, struct zcb_mapped, list); list = list->next; zcb_free (zcb_mapped); } return (0); } union u { uint64_t server_addr; void *server_ptr; }; static uint64_t void2serveraddr (void *server_ptr) { union u u; u.server_ptr = server_ptr; return (u.server_addr); } static void *serveraddr2void (uint64_t server_addr) { union u u; u.server_addr = server_addr; return (u.server_ptr); }; static void message_handler_req_lib_cpg_zc_alloc ( void *conn, const void *message) { mar_req_coroipcc_zc_alloc_t *hdr = (mar_req_coroipcc_zc_alloc_t *)message; struct qb_ipc_response_header res_header; void *addr = NULL; struct coroipcs_zc_header *zc_header; unsigned int res; struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn); log_printf(LOGSYS_LEVEL_DEBUG, "path: %s", hdr->path_to_file); res = zcb_alloc (cpd, hdr->path_to_file, hdr->map_size, &addr); assert(res == 0); zc_header = (struct coroipcs_zc_header *)addr; zc_header->server_address = void2serveraddr(addr); res_header.size = sizeof (struct qb_ipc_response_header); res_header.id = 0; api->ipc_response_send (conn, &res_header, res_header.size); } static void message_handler_req_lib_cpg_zc_free ( void *conn, const void *message) { mar_req_coroipcc_zc_free_t *hdr = (mar_req_coroipcc_zc_free_t *)message; struct qb_ipc_response_header res_header; void *addr = NULL; struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn); log_printf(LOGSYS_LEVEL_DEBUG, " free'ing"); addr = serveraddr2void (hdr->server_address); zcb_by_addr_free (cpd, addr); res_header.size = sizeof (struct qb_ipc_response_header); res_header.id = 0; api->ipc_response_send ( conn, &res_header, res_header.size); } /* Mcast message from the library */ static void message_handler_req_lib_cpg_mcast (void *conn, const void *message) { const struct req_lib_cpg_mcast *req_lib_cpg_mcast = message; struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn); mar_cpg_name_t group_name = cpd->group_name; struct iovec req_exec_cpg_iovec[2]; struct req_exec_cpg_mcast req_exec_cpg_mcast; - struct res_lib_cpg_mcast res_lib_cpg_mcast; int msglen = req_lib_cpg_mcast->msglen; int result; cs_error_t error = CS_ERR_NOT_EXIST; log_printf(LOGSYS_LEVEL_DEBUG, "got mcast request on %p\n", conn); switch (cpd->cpd_state) { case CPD_STATE_UNJOINED: error = CS_ERR_NOT_EXIST; break; case CPD_STATE_LEAVE_STARTED: error = CS_ERR_NOT_EXIST; break; case CPD_STATE_JOIN_STARTED: error = CS_OK; break; case CPD_STATE_JOIN_COMPLETED: error = CS_OK; break; } if (error == CS_OK) { req_exec_cpg_mcast.header.size = sizeof(req_exec_cpg_mcast) + msglen; req_exec_cpg_mcast.header.id = SERVICE_ID_MAKE(CPG_SERVICE, MESSAGE_REQ_EXEC_CPG_MCAST); req_exec_cpg_mcast.pid = cpd->pid; req_exec_cpg_mcast.msglen = msglen; api->ipc_source_set (&req_exec_cpg_mcast.source, conn); memcpy(&req_exec_cpg_mcast.group_name, &group_name, sizeof(mar_cpg_name_t)); req_exec_cpg_iovec[0].iov_base = (char *)&req_exec_cpg_mcast; req_exec_cpg_iovec[0].iov_len = sizeof(req_exec_cpg_mcast); req_exec_cpg_iovec[1].iov_base = (char *)&req_lib_cpg_mcast->message; req_exec_cpg_iovec[1].iov_len = msglen; result = api->totem_mcast (req_exec_cpg_iovec, 2, TOTEM_AGREED); assert(result == 0); + } else { + log_printf(LOGSYS_LEVEL_ERROR, "*** %p can't mcast to group %s state:%d, error:%d\n", + conn, group_name.value, cpd->cpd_state, error); } - - res_lib_cpg_mcast.header.size = sizeof(res_lib_cpg_mcast); - res_lib_cpg_mcast.header.id = MESSAGE_RES_CPG_MCAST; - res_lib_cpg_mcast.header.error = error; - api->ipc_response_send (conn, &res_lib_cpg_mcast, - sizeof (res_lib_cpg_mcast)); } static void message_handler_req_lib_cpg_zc_execute ( void *conn, const void *message) { mar_req_coroipcc_zc_execute_t *hdr = (mar_req_coroipcc_zc_execute_t *)message; struct qb_ipc_request_header *header; struct res_lib_cpg_mcast res_lib_cpg_mcast; struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn); struct iovec req_exec_cpg_iovec[2]; struct req_exec_cpg_mcast req_exec_cpg_mcast; struct req_lib_cpg_mcast *req_lib_cpg_mcast; int result; cs_error_t error = CS_ERR_NOT_EXIST; struct coroipcs_zc_header *zc_hdr; log_printf(LOGSYS_LEVEL_DEBUG, "got ZC mcast request on %p\n", conn); zc_hdr = (struct coroipcs_zc_header *)((char *)serveraddr2void(hdr->server_address)); header = (struct qb_ipc_request_header *)(((char *)serveraddr2void(hdr->server_address) + sizeof (struct coroipcs_zc_header))); req_lib_cpg_mcast = (struct req_lib_cpg_mcast *)header; switch (cpd->cpd_state) { case CPD_STATE_UNJOINED: error = CS_ERR_NOT_EXIST; break; case CPD_STATE_LEAVE_STARTED: error = CS_ERR_NOT_EXIST; break; case CPD_STATE_JOIN_STARTED: error = CS_OK; break; case CPD_STATE_JOIN_COMPLETED: error = CS_OK; break; } res_lib_cpg_mcast.header.size = sizeof(res_lib_cpg_mcast); res_lib_cpg_mcast.header.id = MESSAGE_RES_CPG_MCAST; if (error == CS_OK) { req_exec_cpg_mcast.header.size = sizeof(req_exec_cpg_mcast) + req_lib_cpg_mcast->msglen; req_exec_cpg_mcast.header.id = SERVICE_ID_MAKE(CPG_SERVICE, MESSAGE_REQ_EXEC_CPG_MCAST); req_exec_cpg_mcast.pid = cpd->pid; req_exec_cpg_mcast.msglen = req_lib_cpg_mcast->msglen; api->ipc_source_set (&req_exec_cpg_mcast.source, conn); memcpy(&req_exec_cpg_mcast.group_name, &cpd->group_name, sizeof(mar_cpg_name_t)); req_exec_cpg_iovec[0].iov_base = (char *)&req_exec_cpg_mcast; req_exec_cpg_iovec[0].iov_len = sizeof(req_exec_cpg_mcast); req_exec_cpg_iovec[1].iov_base = (char *)header + sizeof(struct req_lib_cpg_mcast); req_exec_cpg_iovec[1].iov_len = req_exec_cpg_mcast.msglen; result = api->totem_mcast (req_exec_cpg_iovec, 2, TOTEM_AGREED); if (result == 0) { res_lib_cpg_mcast.header.error = CS_OK; } else { res_lib_cpg_mcast.header.error = CS_ERR_TRY_AGAIN; } } else { res_lib_cpg_mcast.header.error = error; } api->ipc_response_send (conn, &res_lib_cpg_mcast, sizeof (res_lib_cpg_mcast)); } static void message_handler_req_lib_cpg_membership (void *conn, const void *message) { struct req_lib_cpg_membership_get *req_lib_cpg_membership_get = (struct req_lib_cpg_membership_get *)message; struct res_lib_cpg_membership_get res_lib_cpg_membership_get; struct list_head *iter; int member_count = 0; res_lib_cpg_membership_get.header.id = MESSAGE_RES_CPG_MEMBERSHIP; res_lib_cpg_membership_get.header.error = CS_OK; res_lib_cpg_membership_get.header.size = sizeof (struct req_lib_cpg_membership_get); for (iter = process_info_list_head.next; iter != &process_info_list_head; iter = iter->next) { struct process_info *pi = list_entry (iter, struct process_info, list); if (mar_name_compare (&pi->group, &req_lib_cpg_membership_get->group_name) == 0) { res_lib_cpg_membership_get.member_list[member_count].nodeid = pi->nodeid; res_lib_cpg_membership_get.member_list[member_count].pid = pi->pid; member_count += 1; } } res_lib_cpg_membership_get.member_count = member_count; api->ipc_response_send (conn, &res_lib_cpg_membership_get, sizeof (res_lib_cpg_membership_get)); } static void message_handler_req_lib_cpg_local_get (void *conn, const void *message) { struct res_lib_cpg_local_get res_lib_cpg_local_get; res_lib_cpg_local_get.header.size = sizeof (res_lib_cpg_local_get); res_lib_cpg_local_get.header.id = MESSAGE_RES_CPG_LOCAL_GET; res_lib_cpg_local_get.header.error = CS_OK; res_lib_cpg_local_get.local_nodeid = api->totem_nodeid_get (); api->ipc_response_send (conn, &res_lib_cpg_local_get, sizeof (res_lib_cpg_local_get)); } static void message_handler_req_lib_cpg_iteration_initialize ( void *conn, const void *message) { const struct req_lib_cpg_iterationinitialize *req_lib_cpg_iterationinitialize = message; struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn); hdb_handle_t cpg_iteration_handle = 0; struct res_lib_cpg_iterationinitialize res_lib_cpg_iterationinitialize; struct list_head *iter, *iter2; struct cpg_iteration_instance *cpg_iteration_instance; cs_error_t error = CS_OK; int res; log_printf (LOGSYS_LEVEL_DEBUG, "cpg iteration initialize\n"); /* Because between calling this function and *next can be some operations which will * change list, we must do full copy. */ /* * Create new iteration instance */ res = hdb_handle_create (&cpg_iteration_handle_t_db, sizeof (struct cpg_iteration_instance), &cpg_iteration_handle); if (res != 0) { error = CS_ERR_NO_MEMORY; goto response_send; } res = hdb_handle_get (&cpg_iteration_handle_t_db, cpg_iteration_handle, (void *)&cpg_iteration_instance); if (res != 0) { error = CS_ERR_BAD_HANDLE; goto error_destroy; } list_init (&cpg_iteration_instance->items_list_head); cpg_iteration_instance->handle = cpg_iteration_handle; /* * Create copy of process_info list "grouped by" group name */ for (iter = process_info_list_head.next; iter != &process_info_list_head; iter = iter->next) { struct process_info *pi = list_entry (iter, struct process_info, list); struct process_info *new_pi; if (req_lib_cpg_iterationinitialize->iteration_type == CPG_ITERATION_NAME_ONLY) { /* * Try to find processed group name in our list new list */ int found = 0; for (iter2 = cpg_iteration_instance->items_list_head.next; iter2 != &cpg_iteration_instance->items_list_head; iter2 = iter2->next) { struct process_info *pi2 = list_entry (iter2, struct process_info, list); if (mar_name_compare (&pi2->group, &pi->group) == 0) { found = 1; break; } } if (found) { /* * We have this name in list -> don't add */ continue ; } } else if (req_lib_cpg_iterationinitialize->iteration_type == CPG_ITERATION_ONE_GROUP) { /* * Test pi group name with request */ if (mar_name_compare (&pi->group, &req_lib_cpg_iterationinitialize->group_name) != 0) /* * Not same -> don't add */ continue ; } new_pi = malloc (sizeof (struct process_info)); if (!new_pi) { log_printf(LOGSYS_LEVEL_WARNING, "Unable to allocate process_info struct"); error = CS_ERR_NO_MEMORY; goto error_put_destroy; } memcpy (new_pi, pi, sizeof (struct process_info)); list_init (&new_pi->list); if (req_lib_cpg_iterationinitialize->iteration_type == CPG_ITERATION_NAME_ONLY) { /* * pid and nodeid -> undefined */ new_pi->pid = new_pi->nodeid = 0; } /* * We will return list "grouped" by "group name", so try to find right place to add */ for (iter2 = cpg_iteration_instance->items_list_head.next; iter2 != &cpg_iteration_instance->items_list_head; iter2 = iter2->next) { struct process_info *pi2 = list_entry (iter2, struct process_info, list); if (mar_name_compare (&pi2->group, &pi->group) == 0) { break; } } list_add (&new_pi->list, iter2); } /* * Now we have a full "grouped by" copy of process_info list */ /* * Add instance to current cpd list */ list_init (&cpg_iteration_instance->list); list_add (&cpg_iteration_instance->list, &cpd->iteration_instance_list_head); cpg_iteration_instance->current_pointer = &cpg_iteration_instance->items_list_head; error_put_destroy: hdb_handle_put (&cpg_iteration_handle_t_db, cpg_iteration_handle); error_destroy: if (error != CS_OK) { hdb_handle_destroy (&cpg_iteration_handle_t_db, cpg_iteration_handle); } response_send: res_lib_cpg_iterationinitialize.header.size = sizeof (res_lib_cpg_iterationinitialize); res_lib_cpg_iterationinitialize.header.id = MESSAGE_RES_CPG_ITERATIONINITIALIZE; res_lib_cpg_iterationinitialize.header.error = error; res_lib_cpg_iterationinitialize.iteration_handle = cpg_iteration_handle; api->ipc_response_send (conn, &res_lib_cpg_iterationinitialize, sizeof (res_lib_cpg_iterationinitialize)); } static void message_handler_req_lib_cpg_iteration_next ( void *conn, const void *message) { const struct req_lib_cpg_iterationnext *req_lib_cpg_iterationnext = message; struct res_lib_cpg_iterationnext res_lib_cpg_iterationnext; struct cpg_iteration_instance *cpg_iteration_instance; cs_error_t error = CS_OK; int res; struct process_info *pi; log_printf (LOGSYS_LEVEL_DEBUG, "cpg iteration next\n"); res = hdb_handle_get (&cpg_iteration_handle_t_db, req_lib_cpg_iterationnext->iteration_handle, (void *)&cpg_iteration_instance); if (res != 0) { error = CS_ERR_LIBRARY; goto error_exit; } assert (cpg_iteration_instance); cpg_iteration_instance->current_pointer = cpg_iteration_instance->current_pointer->next; if (cpg_iteration_instance->current_pointer == &cpg_iteration_instance->items_list_head) { error = CS_ERR_NO_SECTIONS; goto error_put; } pi = list_entry (cpg_iteration_instance->current_pointer, struct process_info, list); /* * Copy iteration data */ res_lib_cpg_iterationnext.description.nodeid = pi->nodeid; res_lib_cpg_iterationnext.description.pid = pi->pid; memcpy (&res_lib_cpg_iterationnext.description.group, &pi->group, sizeof (mar_cpg_name_t)); error_put: hdb_handle_put (&cpg_iteration_handle_t_db, req_lib_cpg_iterationnext->iteration_handle); error_exit: res_lib_cpg_iterationnext.header.size = sizeof (res_lib_cpg_iterationnext); res_lib_cpg_iterationnext.header.id = MESSAGE_RES_CPG_ITERATIONNEXT; res_lib_cpg_iterationnext.header.error = error; api->ipc_response_send (conn, &res_lib_cpg_iterationnext, sizeof (res_lib_cpg_iterationnext)); } static void message_handler_req_lib_cpg_iteration_finalize ( void *conn, const void *message) { const struct req_lib_cpg_iterationfinalize *req_lib_cpg_iterationfinalize = message; struct res_lib_cpg_iterationfinalize res_lib_cpg_iterationfinalize; struct cpg_iteration_instance *cpg_iteration_instance; cs_error_t error = CS_OK; int res; log_printf (LOGSYS_LEVEL_DEBUG, "cpg iteration finalize\n"); res = hdb_handle_get (&cpg_iteration_handle_t_db, req_lib_cpg_iterationfinalize->iteration_handle, (void *)&cpg_iteration_instance); if (res != 0) { error = CS_ERR_LIBRARY; goto error_exit; } assert (cpg_iteration_instance); cpg_iteration_instance_finalize (cpg_iteration_instance); hdb_handle_put (&cpg_iteration_handle_t_db, cpg_iteration_instance->handle); error_exit: res_lib_cpg_iterationfinalize.header.size = sizeof (res_lib_cpg_iterationfinalize); res_lib_cpg_iterationfinalize.header.id = MESSAGE_RES_CPG_ITERATIONFINALIZE; res_lib_cpg_iterationfinalize.header.error = error; api->ipc_response_send (conn, &res_lib_cpg_iterationfinalize, sizeof (res_lib_cpg_iterationfinalize)); }