diff --git a/lib/ipc_setup.c b/lib/ipc_setup.c index 0e16964..36ae2cf 100644 --- a/lib/ipc_setup.c +++ b/lib/ipc_setup.c @@ -1,867 +1,877 @@ /* * Copyright (C) 2010,2013 Red Hat, Inc. * * Author: Angus Salkeld * * This file is part of libqb. * * libqb is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, either version 2.1 of the License, or * (at your option) any later version. * * libqb is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with libqb. If not, see . */ #include "os_base.h" #include #if defined(HAVE_GETPEERUCRED) #include #endif #ifdef HAVE_SYS_UN_H #include #endif /* HAVE_SYS_UN_H */ #ifdef HAVE_SYS_STAT_H #include #endif #ifdef HAVE_SYS_MMAN_H #include #endif #include #include #include #include #include "util_int.h" #include "ipc_int.h" +/* Maximum number of times we generate a random socket name before giving up */ +#define MAX_NAME_RETRY_COUNT 20 + struct ipc_auth_ugp { uid_t uid; gid_t gid; pid_t pid; }; struct ipc_auth_data { int32_t sock; struct qb_ipcs_service *s; union { struct qb_ipc_connection_request req; struct qb_ipc_connection_response res; } msg; struct msghdr msg_recv; struct iovec iov_recv; struct ipc_auth_ugp ugp; size_t processed; size_t len; #ifdef SO_PASSCRED char *cmsg_cred; #endif }; static int32_t qb_ipcs_us_connection_acceptor(int fd, int revent, void *data); ssize_t qb_ipc_us_send(struct qb_ipc_one_way *one_way, const void *msg, size_t len) { int32_t result; int32_t processed = 0; char *rbuf = (char *)msg; qb_sigpipe_ctl(QB_SIGPIPE_IGNORE); retry_send: result = send(one_way->u.us.sock, &rbuf[processed], len - processed, MSG_NOSIGNAL); if (result == -1) { if (errno == EAGAIN && processed > 0) { goto retry_send; } else { qb_sigpipe_ctl(QB_SIGPIPE_DEFAULT); return -errno; } } processed += result; if (processed != len) { goto retry_send; } qb_sigpipe_ctl(QB_SIGPIPE_DEFAULT); return processed; } static ssize_t qb_ipc_us_recv_msghdr(struct ipc_auth_data *data) { char *msg = (char *) &data->msg; int32_t result; qb_sigpipe_ctl(QB_SIGPIPE_IGNORE); retry_recv: data->msg_recv.msg_iov->iov_base = &msg[data->processed]; data->msg_recv.msg_iov->iov_len = data->len - data->processed; result = recvmsg(data->sock, &data->msg_recv, MSG_NOSIGNAL | MSG_WAITALL); if (result == -1 && errno == EAGAIN) { qb_sigpipe_ctl(QB_SIGPIPE_DEFAULT); return -EAGAIN; } if (result == -1) { qb_sigpipe_ctl(QB_SIGPIPE_DEFAULT); return -errno; } if (result == 0) { qb_sigpipe_ctl(QB_SIGPIPE_DEFAULT); qb_util_log(LOG_DEBUG, "recv(fd %d) got 0 bytes assuming ENOTCONN", data->sock); return -ENOTCONN; } data->processed += result; if (data->processed != data->len) { goto retry_recv; } qb_sigpipe_ctl(QB_SIGPIPE_DEFAULT); assert(data->processed == data->len); return data->processed; } int32_t qb_ipc_us_sock_error_is_disconnected(int err) { if (err >= 0) { return QB_FALSE; } else if (err == -EAGAIN || err == -ETIMEDOUT || err == -EINTR || #ifdef EWOULDBLOCK err == -EWOULDBLOCK || #endif err == -EMSGSIZE || err == -ENOMSG || err == -EINVAL) { return QB_FALSE; } return QB_TRUE; } int32_t qb_ipc_us_ready(struct qb_ipc_one_way * ow_data, struct qb_ipc_one_way * ow_conn, int32_t ms_timeout, int32_t events) { struct pollfd ufds[2]; int32_t poll_events; int numfds = 1; int i; ufds[0].fd = ow_data->u.us.sock; ufds[0].events = events; ufds[0].revents = 0; if (ow_conn && ow_data != ow_conn) { numfds++; ufds[1].fd = ow_conn->u.us.sock; ufds[1].events = POLLIN; ufds[1].revents = 0; } poll_events = poll(ufds, numfds, ms_timeout); if ((poll_events == -1 && errno == EINTR) || poll_events == 0) { return -EAGAIN; } else if (poll_events == -1) { return -errno; } for (i = 0; i < poll_events; i++) { if (ufds[i].revents & POLLERR) { qb_util_log(LOG_DEBUG, "poll(fd %d) got POLLERR", ufds[i].fd); return -ENOTCONN; } else if (ufds[i].revents & POLLHUP) { qb_util_log(LOG_DEBUG, "poll(fd %d) got POLLHUP", ufds[i].fd); return -ENOTCONN; } else if (ufds[i].revents & POLLNVAL) { qb_util_log(LOG_DEBUG, "poll(fd %d) got POLLNVAL", ufds[i].fd); return -ENOTCONN; } else if (ufds[i].revents == 0) { qb_util_log(LOG_DEBUG, "poll(fd %d) zero revents", ufds[i].fd); return -ENOTCONN; } } return 0; } /* * recv an entire message - and try hard to get all of it. */ ssize_t qb_ipc_us_recv(struct qb_ipc_one_way * one_way, void *msg, size_t len, int32_t timeout) { int32_t result; int32_t final_rc = 0; int32_t processed = 0; int32_t to_recv = len; char *data = msg; qb_sigpipe_ctl(QB_SIGPIPE_IGNORE); retry_recv: result = recv(one_way->u.us.sock, &data[processed], to_recv, MSG_NOSIGNAL | MSG_WAITALL); if (result == -1) { if (errno == EAGAIN && (processed > 0 || timeout == -1)) { result = qb_ipc_us_ready(one_way, NULL, timeout, POLLIN); if (result == 0 || result == -EAGAIN) { goto retry_recv; } final_rc = result; goto cleanup_sigpipe; } else if (errno == ECONNRESET || errno == EPIPE) { final_rc = -ENOTCONN; goto cleanup_sigpipe; } else { final_rc = -errno; goto cleanup_sigpipe; } } if (result == 0) { final_rc = -ENOTCONN; goto cleanup_sigpipe; } processed += result; to_recv -= result; if (processed != len) { goto retry_recv; } final_rc = processed; cleanup_sigpipe: qb_sigpipe_ctl(QB_SIGPIPE_DEFAULT); return final_rc; } static int32_t qb_ipcc_stream_sock_connect(const char *socket_name, int32_t * sock_pt) { int32_t request_fd; struct sockaddr_un address; int32_t res = 0; request_fd = socket(PF_UNIX, SOCK_STREAM, 0); if (request_fd == -1) { return -errno; } qb_socket_nosigpipe(request_fd); res = qb_sys_fd_nonblock_cloexec_set(request_fd); if (res < 0) { goto error_connect; } memset(&address, 0, sizeof(struct sockaddr_un)); address.sun_family = AF_UNIX; #ifdef HAVE_STRUCT_SOCKADDR_UN_SUN_LEN address.sun_len = QB_SUN_LEN(&address); #endif if (!use_filesystem_sockets()) { snprintf(address.sun_path + 1, UNIX_PATH_MAX - 1, "%s", socket_name); } else { snprintf(address.sun_path, sizeof(address.sun_path), "%s/%s", SOCKETDIR, socket_name); } if (connect(request_fd, (struct sockaddr *)&address, QB_SUN_LEN(&address)) == -1) { res = -errno; goto error_connect; } *sock_pt = request_fd; return 0; error_connect: close(request_fd); *sock_pt = -1; return res; } void qb_ipcc_us_sock_close(int32_t sock) { shutdown(sock, SHUT_RDWR); close(sock); } static int32_t qb_ipc_auth_creds(struct ipc_auth_data *data) { int32_t res = 0; /* * currently support getpeerucred, getpeereid, and SO_PASSCRED credential * retrieval mechanisms for various Platforms */ #ifdef HAVE_GETPEERUCRED /* * Solaris and some BSD systems */ { ucred_t *uc = NULL; if (getpeerucred(data->sock, &uc) == 0) { res = 0; data->ugp.uid = ucred_geteuid(uc); data->ugp.gid = ucred_getegid(uc); data->ugp.pid = ucred_getpid(uc); ucred_free(uc); } else { res = -errno; } } #elif defined(HAVE_GETPEEREID) /* * Usually MacOSX systems */ { /* * TODO get the peer's pid. * c->pid = ?; */ if (getpeereid(data->sock, &data->ugp.uid, &data->ugp.gid) == 0) { res = 0; } else { res = -errno; } } #elif defined(SO_PASSCRED) /* * Usually Linux systems */ { struct ucred cred; struct cmsghdr *cmsg; res = -EINVAL; for (cmsg = CMSG_FIRSTHDR(&data->msg_recv); cmsg != NULL; cmsg = CMSG_NXTHDR(&data->msg_recv, cmsg)) { if (cmsg->cmsg_type != SCM_CREDENTIALS) continue; memcpy(&cred, CMSG_DATA(cmsg), sizeof(struct ucred)); res = 0; data->ugp.pid = cred.pid; data->ugp.uid = cred.uid; data->ugp.gid = cred.gid; break; } } #else /* no credentials */ data->ugp.pid = 0; data->ugp.uid = 0; data->ugp.gid = 0; res = -ENOTSUP; #endif /* no credentials */ return res; } static void destroy_ipc_auth_data(struct ipc_auth_data *data) { if (data->s) { qb_ipcs_unref(data->s); } #ifdef SO_PASSCRED free(data->cmsg_cred); #endif free(data); } static struct ipc_auth_data * init_ipc_auth_data(int sock, size_t len) { struct ipc_auth_data *data = calloc(1, sizeof(struct ipc_auth_data)); if (data == NULL) { return NULL; } data->msg_recv.msg_iov = &data->iov_recv; data->msg_recv.msg_iovlen = 1; data->msg_recv.msg_name = 0; data->msg_recv.msg_namelen = 0; #ifdef SO_PASSCRED data->cmsg_cred = calloc(1, CMSG_SPACE(sizeof(struct ucred))); if (data->cmsg_cred == NULL) { destroy_ipc_auth_data(data); return NULL; } data->msg_recv.msg_control = (void *)data->cmsg_cred; data->msg_recv.msg_controllen = CMSG_SPACE(sizeof(struct ucred)); #endif #ifdef QB_SOLARIS data->msg_recv.msg_accrights = 0; data->msg_recv.msg_accrightslen = 0; #else data->msg_recv.msg_flags = 0; #endif /* QB_SOLARIS */ data->len = len; data->iov_recv.iov_base = &data->msg; data->iov_recv.iov_len = data->len; data->sock = sock; return data; } int32_t qb_ipcc_us_setup_connect(struct qb_ipcc_connection *c, struct qb_ipc_connection_response *r) { int32_t res; struct qb_ipc_connection_request request; struct ipc_auth_data *data; #ifdef QB_LINUX int off = 0; int on = 1; #endif res = qb_ipcc_stream_sock_connect(c->name, &c->setup.u.us.sock); if (res != 0) { return res; } #ifdef QB_LINUX setsockopt(c->setup.u.us.sock, SOL_SOCKET, SO_PASSCRED, &on, sizeof(on)); #endif memset(&request, 0, sizeof(request)); request.hdr.id = QB_IPC_MSG_AUTHENTICATE; request.hdr.size = sizeof(request); request.max_msg_size = c->setup.max_msg_size; res = qb_ipc_us_send(&c->setup, &request, request.hdr.size); if (res < 0) { qb_ipcc_us_sock_close(c->setup.u.us.sock); return res; } data = init_ipc_auth_data(c->setup.u.us.sock, sizeof(struct qb_ipc_connection_response)); if (data == NULL) { qb_ipcc_us_sock_close(c->setup.u.us.sock); return -ENOMEM; } qb_ipc_us_ready(&c->setup, NULL, -1, POLLIN); res = qb_ipc_us_recv_msghdr(data); #ifdef QB_LINUX setsockopt(c->setup.u.us.sock, SOL_SOCKET, SO_PASSCRED, &off, sizeof(off)); #endif if (res != data->len) { destroy_ipc_auth_data(data); return res; } memcpy(r, &data->msg.res, sizeof(struct qb_ipc_connection_response)); qb_ipc_auth_creds(data); c->egid = data->ugp.gid; c->server_pid = data->ugp.pid; destroy_ipc_auth_data(data); return r->hdr.error; } /* ************************************************************************** * SERVER */ int32_t qb_ipcs_us_publish(struct qb_ipcs_service * s) { struct sockaddr_un un_addr; int32_t res; #ifdef SO_PASSCRED int on = 1; #endif /* * Create socket for IPC clients, name socket, listen for connections */ s->server_sock = socket(PF_UNIX, SOCK_STREAM, 0); if (s->server_sock == -1) { res = -errno; qb_util_perror(LOG_ERR, "Cannot create server socket"); return res; } res = qb_sys_fd_nonblock_cloexec_set(s->server_sock); if (res < 0) { goto error_close; } memset(&un_addr, 0, sizeof(struct sockaddr_un)); un_addr.sun_family = AF_UNIX; #if defined(QB_BSD) || defined(QB_DARWIN) un_addr.sun_len = SUN_LEN(&un_addr); #endif qb_util_log(LOG_INFO, "server name: %s", s->name); if (!use_filesystem_sockets()) { snprintf(un_addr.sun_path + 1, UNIX_PATH_MAX - 1, "%s", s->name); } else { struct stat stat_out; res = stat(SOCKETDIR, &stat_out); if (res == -1 || (res == 0 && !S_ISDIR(stat_out.st_mode))) { res = -errno; qb_util_log(LOG_CRIT, "Required directory not present %s", SOCKETDIR); goto error_close; } snprintf(un_addr.sun_path, sizeof(un_addr.sun_path), "%s/%s", SOCKETDIR, s->name); unlink(un_addr.sun_path); } res = bind(s->server_sock, (struct sockaddr *)&un_addr, QB_SUN_LEN(&un_addr)); if (res) { res = -errno; qb_util_perror(LOG_ERR, "Could not bind AF_UNIX (%s)", un_addr.sun_path); goto error_close; } /* * Allow everyone to write to the socket since the IPC layer handles * security automatically */ if (use_filesystem_sockets()) { res = chmod(un_addr.sun_path, S_IRWXU | S_IRWXG | S_IRWXO); } #ifdef SO_PASSCRED setsockopt(s->server_sock, SOL_SOCKET, SO_PASSCRED, &on, sizeof(on)); #endif if (listen(s->server_sock, SERVER_BACKLOG) == -1) { qb_util_perror(LOG_ERR, "socket listen failed"); } res = s->poll_fns.dispatch_add(s->poll_priority, s->server_sock, POLLIN | POLLPRI | POLLNVAL, s, qb_ipcs_us_connection_acceptor); return res; error_close: close(s->server_sock); return res; } int32_t qb_ipcs_us_withdraw(struct qb_ipcs_service * s) { qb_util_log(LOG_INFO, "withdrawing server sockets"); (void)s->poll_fns.dispatch_del(s->server_sock); shutdown(s->server_sock, SHUT_RDWR); if (use_filesystem_sockets()) { struct sockaddr_un sockname; socklen_t socklen = sizeof(sockname); if ((getsockname(s->server_sock, (struct sockaddr *)&sockname, &socklen) == 0) && sockname.sun_family == AF_UNIX) { unlink(sockname.sun_path); } } close(s->server_sock); s->server_sock = -1; return 0; } static int32_t handle_new_connection(struct qb_ipcs_service *s, int32_t auth_result, int32_t sock, void *msg, size_t len, struct ipc_auth_ugp *ugp) { struct qb_ipcs_connection *c = NULL; struct qb_ipc_connection_request *req = msg; int32_t res = auth_result; int32_t res2 = 0; + uint32_t retry_count = 0; uint32_t max_buffer_size = QB_MAX(req->max_msg_size, s->max_buffer_size); struct qb_ipc_connection_response response; c = qb_ipcs_connection_alloc(s); if (c == NULL) { qb_ipcc_us_sock_close(sock); return -ENOMEM; } c->receive_buf = calloc(1, max_buffer_size); if (c->receive_buf == NULL) { free(c); qb_ipcc_us_sock_close(sock); return -ENOMEM; } c->setup.u.us.sock = sock; c->request.max_msg_size = max_buffer_size; c->response.max_msg_size = max_buffer_size; c->event.max_msg_size = max_buffer_size; c->pid = ugp->pid; c->auth.uid = c->euid = ugp->uid; c->auth.gid = c->egid = ugp->gid; c->auth.mode = 0600; c->stats.client_pid = ugp->pid; - snprintf(c->description, CONNECTION_DESCRIPTION, - "%d-%d-%d", s->pid, ugp->pid, c->setup.u.us.sock); if (auth_result == 0 && c->service->serv_fns.connection_accept) { res = c->service->serv_fns.connection_accept(c, c->euid, c->egid); } if (res != 0) { goto send_response; } qb_util_log(LOG_DEBUG, "IPC credentials authenticated (%s)", c->description); +retry_description: + snprintf(c->description, CONNECTION_DESCRIPTION, + "%d-%d-%lu", s->pid, ugp->pid, (unsigned long)(random()%65536)); + memset(&response, 0, sizeof(response)); if (s->funcs.connect) { res = s->funcs.connect(s, c, &response); + if (res == -EEXIST && ++retry_count < MAX_NAME_RETRY_COUNT) { + qb_util_log(LOG_DEBUG, "Retrying socket name %s (count=%ld)\n", c->description, retry_count); + goto retry_description; + } if (res != 0) { goto send_response; } } /* * The connection is good, add it to the active connection list */ c->state = QB_IPCS_CONNECTION_ACTIVE; qb_list_add(&c->list, &s->connections); send_response: response.hdr.id = QB_IPC_MSG_AUTHENTICATE; response.hdr.size = sizeof(response); response.hdr.error = res; if (res == 0) { response.connection = (intptr_t) c; response.connection_type = s->type; response.max_msg_size = c->request.max_msg_size; s->stats.active_connections++; } res2 = qb_ipc_us_send(&c->setup, &response, response.hdr.size); if (res == 0 && res2 != response.hdr.size) { res = res2; } if (res == 0) { qb_ipcs_connection_ref(c); if (s->serv_fns.connection_created) { s->serv_fns.connection_created(c); } if (c->state == QB_IPCS_CONNECTION_ACTIVE) { c->state = QB_IPCS_CONNECTION_ESTABLISHED; } qb_ipcs_connection_unref(c); } else { if (res == -EACCES) { qb_util_log(LOG_ERR, "Invalid IPC credentials (%s).", c->description); } else if (res == -EAGAIN) { qb_util_log(LOG_WARNING, "Denied connection, is not ready (%s)", c->description); } else { errno = -res; qb_util_perror(LOG_ERR, "Error in connection setup (%s)", c->description); } if (c->state == QB_IPCS_CONNECTION_INACTIVE) { /* This removes the initial alloc ref */ qb_ipcs_connection_unref(c); qb_ipcc_us_sock_close(sock); } else { qb_ipcs_disconnect(c); } } return res; } static int32_t process_auth(int32_t fd, int32_t revents, void *d) { struct ipc_auth_data *data = (struct ipc_auth_data *) d; int32_t res = 0; #ifdef SO_PASSCRED int off = 0; #endif if (data->s->server_sock == -1) { qb_util_log(LOG_DEBUG, "Closing fd (%d) for server shutdown", fd); res = -ESHUTDOWN; goto cleanup_and_return; } if (revents & POLLNVAL) { qb_util_log(LOG_DEBUG, "NVAL conn fd (%d)", fd); res = -EINVAL; goto cleanup_and_return; } if (revents & POLLHUP) { qb_util_log(LOG_DEBUG, "HUP conn fd (%d)", fd); res = -ESHUTDOWN; goto cleanup_and_return; } if ((revents & POLLIN) == 0) { return 0; } res = qb_ipc_us_recv_msghdr(data); if (res == -EAGAIN) { /* yield to mainloop, Let mainloop call us again */ return 0; } if (res != data->len) { res = -EIO; goto cleanup_and_return; } res = qb_ipc_auth_creds(data); cleanup_and_return: #ifdef SO_PASSCRED setsockopt(data->sock, SOL_SOCKET, SO_PASSCRED, &off, sizeof(off)); #endif (void)data->s->poll_fns.dispatch_del(data->sock); if (res < 0) { close(data->sock); } else if (data->msg.req.hdr.id == QB_IPC_MSG_AUTHENTICATE) { (void)handle_new_connection(data->s, res, data->sock, &data->msg, data->len, &data->ugp); } else { close(data->sock); } destroy_ipc_auth_data(data); return 1; } static void qb_ipcs_uc_recv_and_auth(int32_t sock, struct qb_ipcs_service *s) { int res = 0; struct ipc_auth_data *data = NULL; #ifdef SO_PASSCRED int on = 1; #endif data = init_ipc_auth_data(sock, sizeof(struct qb_ipc_connection_request)); if (data == NULL) { close(sock); /* -ENOMEM */ return; } data->s = s; qb_ipcs_ref(data->s); #ifdef SO_PASSCRED setsockopt(sock, SOL_SOCKET, SO_PASSCRED, &on, sizeof(on)); #endif res = s->poll_fns.dispatch_add(QB_LOOP_MED, data->sock, POLLIN | POLLPRI | POLLNVAL, data, process_auth); if (res < 0) { qb_util_log(LOG_DEBUG, "Failed to process AUTH for fd (%d)", data->sock); close(sock); destroy_ipc_auth_data(data); } } static int32_t qb_ipcs_us_connection_acceptor(int fd, int revent, void *data) { struct sockaddr_un un_addr; int32_t new_fd; struct qb_ipcs_service *s = (struct qb_ipcs_service *)data; int32_t res; socklen_t addrlen = sizeof(struct sockaddr_un); if (revent & (POLLNVAL | POLLHUP | POLLERR)) { /* * handle shutdown more cleanly. */ return -1; } retry_accept: errno = 0; new_fd = accept(fd, (struct sockaddr *)&un_addr, &addrlen); if (new_fd == -1 && errno == EINTR) { goto retry_accept; } if (new_fd == -1 && errno == EBADF) { qb_util_perror(LOG_ERR, "Could not accept client connection from fd:%d", fd); return -1; } if (new_fd == -1) { qb_util_perror(LOG_ERR, "Could not accept client connection"); /* This is an error, but -1 would indicate disconnect * from the poll loop */ return 0; } res = qb_sys_fd_nonblock_cloexec_set(new_fd); if (res < 0) { close(new_fd); /* This is an error, but -1 would indicate disconnect * from the poll loop */ return 0; } qb_ipcs_uc_recv_and_auth(new_fd, s); return 0; } diff --git a/lib/ipc_socket.c b/lib/ipc_socket.c index fe2040e..1f7cde3 100644 --- a/lib/ipc_socket.c +++ b/lib/ipc_socket.c @@ -1,914 +1,914 @@ /* * Copyright (C) 2010,2013 Red Hat, Inc. * * Author: Angus Salkeld * * This file is part of libqb. * * libqb is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, either version 2.1 of the License, or * (at your option) any later version. * * libqb is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with libqb. If not, see . */ #include "os_base.h" #include #ifdef HAVE_SYS_UN_H #include #endif /* HAVE_SYS_UN_H */ #ifdef HAVE_SYS_MMAN_H #include #endif #include #include #include #include #include "util_int.h" #include "ipc_int.h" struct ipc_us_control { int32_t sent; int32_t flow_control; }; #define SHM_CONTROL_SIZE (3 * sizeof(struct ipc_us_control)) int use_filesystem_sockets(void) { static int need_init = 1; static int filesystem_sockets = 0; if (need_init) { #if defined(QB_LINUX) || defined(QB_CYGWIN) struct stat buf; if (stat(FORCESOCKETSFILE, &buf) == 0) { filesystem_sockets = 1; } #else filesystem_sockets = 1; #endif need_init = 0; } return filesystem_sockets; } static void set_sock_addr(struct sockaddr_un *address, const char *socket_name) { memset(address, 0, sizeof(struct sockaddr_un)); address->sun_family = AF_UNIX; #ifdef HAVE_STRUCT_SOCKADDR_UN_SUN_LEN address->sun_len = QB_SUN_LEN(address); #endif if (!use_filesystem_sockets()) { snprintf(address->sun_path + 1, UNIX_PATH_MAX - 1, "%s", socket_name); } else { snprintf(address->sun_path, sizeof(address->sun_path), "%s/%s", SOCKETDIR, socket_name); } } static int32_t qb_ipc_dgram_sock_setup(const char *base_name, const char *service_name, int32_t * sock_pt, gid_t gid) { int32_t request_fd; struct sockaddr_un local_address; int32_t res = 0; char sock_path[PATH_MAX]; request_fd = socket(PF_UNIX, SOCK_DGRAM, 0); if (request_fd == -1) { return -errno; } qb_socket_nosigpipe(request_fd); res = qb_sys_fd_nonblock_cloexec_set(request_fd); if (res < 0) { goto error_connect; } snprintf(sock_path, PATH_MAX, "%s-%s", base_name, service_name); set_sock_addr(&local_address, sock_path); if (use_filesystem_sockets()) { res = unlink(local_address.sun_path); } res = bind(request_fd, (struct sockaddr *)&local_address, sizeof(local_address)); if (use_filesystem_sockets()) { chmod(local_address.sun_path, 0660); chown(local_address.sun_path, -1, gid); } if (res < 0) { goto error_connect; } *sock_pt = request_fd; return 0; error_connect: close(request_fd); *sock_pt = -1; return res; } static int32_t set_sock_size(int sockfd, size_t max_msg_size) { int32_t rc; unsigned int optval; socklen_t optlen = sizeof(optval); rc = getsockopt(sockfd, SOL_SOCKET, SO_SNDBUF, &optval, &optlen); qb_util_log(LOG_TRACE, "%d: getsockopt(%d, SO_SNDBUF, needed:%d) actual:%d", rc, sockfd, max_msg_size, optval); /* The optval <= max_msg_size check is weird... * during testing it was discovered in some instances if the * default optval is exactly equal to our max_msg_size, we couldn't * actually send a message that large unless we explicitly set * it using setsockopt... there is no good explaination for this. Most * likely this is hitting some sort of "off by one" error in the kernel. */ if (rc == 0 && optval <= max_msg_size) { optval = max_msg_size; optlen = sizeof(optval); rc = setsockopt(sockfd, SOL_SOCKET, SO_SNDBUF, &optval, optlen); } if (rc != 0) { return -errno; } rc = getsockopt(sockfd, SOL_SOCKET, SO_RCVBUF, &optval, &optlen); qb_util_log(LOG_TRACE, "%d: getsockopt(%d, SO_RCVBUF, needed:%d) actual:%d", rc, sockfd, max_msg_size, optval); /* Set the sockets receive buffer size to match the send buffer. On * FreeBSD without this calls to sendto() will result in an ENOBUFS error * if the message is larger than net.local.dgram.recvspace sysctl. */ if (rc == 0 && optval <= max_msg_size) { optval = max_msg_size; optlen = sizeof(optval); rc = setsockopt(sockfd, SOL_SOCKET, SO_RCVBUF, &optval, optlen); } if (rc != 0) { return -errno; } return rc; } static int32_t dgram_verify_msg_size(size_t max_msg_size) { int32_t rc = -1; int32_t sockets[2]; int32_t tries = 0; int32_t write_passed = 0; int32_t read_passed = 0; char buf[max_msg_size]; if (socketpair(AF_UNIX, SOCK_DGRAM, 0, sockets) < 0) { qb_util_perror(LOG_DEBUG, "error calling socketpair()"); goto cleanup_socks; } if (set_sock_size(sockets[0], max_msg_size) != 0) { qb_util_log(LOG_DEBUG, "error set_sock_size(sockets[0],%#x)", max_msg_size); goto cleanup_socks; } if (set_sock_size(sockets[1], max_msg_size) != 0) { qb_util_log(LOG_DEBUG, "error set_sock_size(sockets[1],%#x)", max_msg_size); goto cleanup_socks; } for (tries = 0; tries < 3; tries++) { if (write_passed == 0) { rc = write(sockets[1], buf, max_msg_size); if (rc < 0 && (errno == EAGAIN || errno == EINTR)) { continue; } else if (rc == max_msg_size) { write_passed = 1; } else { break; } } if (read_passed == 0) { rc = read(sockets[0], buf, max_msg_size); if (rc < 0 && (errno == EAGAIN || errno == EINTR)) { continue; } else if (rc == max_msg_size) { read_passed = 1; } else { break; } } if (read_passed && write_passed) { rc = 0; break; } } cleanup_socks: close(sockets[0]); close(sockets[1]); return rc; } int32_t qb_ipcc_verify_dgram_max_msg_size(size_t max_msg_size) { int32_t i; int32_t last = -1; int32_t inc = 2048; if (dgram_verify_msg_size(max_msg_size) == 0) { return max_msg_size; } for (i = inc; i < max_msg_size; i+=inc) { if (dgram_verify_msg_size(i) == 0) { last = i; } else if (inc >= 512) { i-=inc; inc = inc/2; } else { break; } } return last; } /* * bind to "base_name-local_name" * connect to "base_name-remote_name" * output sock_pt */ static int32_t qb_ipc_dgram_sock_connect(const char *base_name, const char *local_name, const char *remote_name, int32_t max_msg_size, int32_t * sock_pt, gid_t gid) { char sock_path[PATH_MAX]; struct sockaddr_un remote_address; int32_t res = qb_ipc_dgram_sock_setup(base_name, local_name, sock_pt, gid); if (res < 0) { return res; } snprintf(sock_path, PATH_MAX, "%s-%s", base_name, remote_name); set_sock_addr(&remote_address, sock_path); if (connect(*sock_pt, (struct sockaddr *)&remote_address, QB_SUN_LEN(&remote_address)) == -1) { res = -errno; goto error_connect; } return set_sock_size(*sock_pt, max_msg_size); error_connect: close(*sock_pt); *sock_pt = -1; return res; } static int32_t _finish_connecting(struct qb_ipc_one_way *one_way) { struct sockaddr_un remote_address; int res; int error; int retry = 0; set_sock_addr(&remote_address, one_way->u.us.sock_name); /* this retry loop is here to help connecting when trying to send * an event right after connection setup. */ do { errno = 0; res = connect(one_way->u.us.sock, (struct sockaddr *)&remote_address, QB_SUN_LEN(&remote_address)); if (res == -1) { error = -errno; qb_util_perror(LOG_DEBUG, "error calling connect()"); retry++; usleep(100000); } } while (res == -1 && retry < 10); if (res == -1) { return error; } /* Beside disposing no longer needed value, this also signals that we are done with connect-on-send arrangement at the server side (i.e. for response and event channels). */ free(one_way->u.us.sock_name); one_way->u.us.sock_name = NULL; return set_sock_size(one_way->u.us.sock, one_way->max_msg_size); } /* * client functions * -------------------------------------------------------- */ static void qb_ipcc_us_disconnect(struct qb_ipcc_connection *c) { munmap(c->request.u.us.shared_data, SHM_CONTROL_SIZE); unlink(c->request.u.us.shared_file_name); if (use_filesystem_sockets()) { struct sockaddr_un un_addr; socklen_t un_addr_len = sizeof(struct sockaddr_un); char *base_name; char sock_name[PATH_MAX]; size_t length; if (getsockname(c->response.u.us.sock, (struct sockaddr *)&un_addr, &un_addr_len) == 0) { length = strlen(un_addr.sun_path); base_name = strndup(un_addr.sun_path, length - /* strlen("-response") */ 9); qb_util_log(LOG_DEBUG, "unlinking socket bound files with base_name=%s length=%d",base_name,length); snprintf(sock_name,PATH_MAX,"%s-%s",base_name,"request"); qb_util_log(LOG_DEBUG, "unlink sock_name=%s",sock_name); unlink(sock_name); snprintf(sock_name,PATH_MAX,"%s-%s",base_name,"event"); qb_util_log(LOG_DEBUG, "unlink sock_name=%s",sock_name); unlink(sock_name); snprintf(sock_name,PATH_MAX,"%s-%s",base_name,"event-tx"); qb_util_log(LOG_DEBUG, "unlink sock_name=%s",sock_name); unlink(sock_name); snprintf(sock_name,PATH_MAX,"%s-%s",base_name,"response"); qb_util_log(LOG_DEBUG, "unlink sock_name=%s",sock_name); unlink(sock_name); free(base_name); } } qb_ipcc_us_sock_close(c->event.u.us.sock); qb_ipcc_us_sock_close(c->request.u.us.sock); qb_ipcc_us_sock_close(c->setup.u.us.sock); } static ssize_t qb_ipc_socket_send(struct qb_ipc_one_way *one_way, const void *msg_ptr, size_t msg_len) { ssize_t rc = 0; struct ipc_us_control *ctl; ctl = (struct ipc_us_control *)one_way->u.us.shared_data; if (one_way->u.us.sock_name) { rc = _finish_connecting(one_way); if (rc < 0) { qb_util_log(LOG_ERR, "socket connect-on-send"); return rc; } } qb_sigpipe_ctl(QB_SIGPIPE_IGNORE); rc = send(one_way->u.us.sock, msg_ptr, msg_len, MSG_NOSIGNAL); if (rc == -1) { rc = -errno; if (errno != EAGAIN && errno != ENOBUFS) { qb_util_perror(LOG_DEBUG, "socket_send:send"); } } qb_sigpipe_ctl(QB_SIGPIPE_DEFAULT); if (ctl && rc == msg_len) { qb_atomic_int_inc(&ctl->sent); } return rc; } static ssize_t qb_ipc_socket_sendv(struct qb_ipc_one_way *one_way, const struct iovec *iov, size_t iov_len) { int32_t rc; struct ipc_us_control *ctl; ctl = (struct ipc_us_control *)one_way->u.us.shared_data; qb_sigpipe_ctl(QB_SIGPIPE_IGNORE); if (one_way->u.us.sock_name) { rc = _finish_connecting(one_way); if (rc < 0) { qb_util_perror(LOG_ERR, "socket connect-on-sendv"); qb_sigpipe_ctl(QB_SIGPIPE_DEFAULT); return rc; } } rc = writev(one_way->u.us.sock, iov, iov_len); if (rc == -1) { rc = -errno; if (errno != EAGAIN && errno != ENOBUFS) { qb_util_perror(LOG_DEBUG, "socket_sendv:writev %d", one_way->u.us.sock); } } qb_sigpipe_ctl(QB_SIGPIPE_DEFAULT); if (ctl && rc > 0) { qb_atomic_int_inc(&ctl->sent); } return rc; } /* * recv a message of unknown size. */ static ssize_t qb_ipc_us_recv_at_most(struct qb_ipc_one_way *one_way, void *msg, size_t len, int32_t timeout) { int32_t result; int32_t final_rc = 0; int32_t to_recv = 0; char *data = msg; struct ipc_us_control *ctl = NULL; int32_t time_waited = 0; int32_t time_to_wait = timeout; if (timeout == -1) { time_to_wait = 1000; } qb_sigpipe_ctl(QB_SIGPIPE_IGNORE); retry_peek: result = recv(one_way->u.us.sock, data, sizeof(struct qb_ipc_request_header), MSG_NOSIGNAL | MSG_PEEK); if (result == -1) { if (errno != EAGAIN) { final_rc = -errno; if (use_filesystem_sockets()) { if (errno == ECONNRESET || errno == EPIPE) { final_rc = -ENOTCONN; } } goto cleanup_sigpipe; } /* check to see if we have enough time left to try again */ if (time_waited < timeout || timeout == -1) { result = qb_ipc_us_ready(one_way, NULL, time_to_wait, POLLIN); if (qb_ipc_us_sock_error_is_disconnected(result)) { final_rc = result; goto cleanup_sigpipe; } time_waited += time_to_wait; goto retry_peek; } else if (time_waited >= timeout) { final_rc = -ETIMEDOUT; goto cleanup_sigpipe; } } if (result >= sizeof(struct qb_ipc_request_header)) { struct qb_ipc_request_header *hdr = NULL; hdr = (struct qb_ipc_request_header *)msg; to_recv = hdr->size; } result = recv(one_way->u.us.sock, data, to_recv, MSG_NOSIGNAL | MSG_WAITALL); if (result == -1) { final_rc = -errno; goto cleanup_sigpipe; } else if (result == 0) { qb_util_log(LOG_DEBUG, "recv == 0 -> ENOTCONN"); final_rc = -ENOTCONN; goto cleanup_sigpipe; } final_rc = result; ctl = (struct ipc_us_control *)one_way->u.us.shared_data; if (ctl) { (void)qb_atomic_int_dec_and_test(&ctl->sent); } cleanup_sigpipe: qb_sigpipe_ctl(QB_SIGPIPE_DEFAULT); return final_rc; } static void qb_ipc_us_fc_set(struct qb_ipc_one_way *one_way, int32_t fc_enable) { struct ipc_us_control *ctl = (struct ipc_us_control *)one_way->u.us.shared_data; qb_util_log(LOG_TRACE, "setting fc to %d", fc_enable); qb_atomic_int_set(&ctl->flow_control, fc_enable); } static int32_t qb_ipc_us_fc_get(struct qb_ipc_one_way *one_way) { struct ipc_us_control *ctl = (struct ipc_us_control *)one_way->u.us.shared_data; return qb_atomic_int_get(&ctl->flow_control); } static ssize_t qb_ipc_us_q_len_get(struct qb_ipc_one_way *one_way) { struct ipc_us_control *ctl = (struct ipc_us_control *)one_way->u.us.shared_data; return qb_atomic_int_get(&ctl->sent); } int32_t qb_ipcc_us_connect(struct qb_ipcc_connection * c, struct qb_ipc_connection_response * r) { int32_t res; char path[PATH_MAX]; int32_t fd_hdr; char *shm_ptr; qb_atomic_init(); c->needs_sock_for_poll = QB_FALSE; c->funcs.send = qb_ipc_socket_send; c->funcs.sendv = qb_ipc_socket_sendv; c->funcs.recv = qb_ipc_us_recv_at_most; c->funcs.fc_get = qb_ipc_us_fc_get; c->funcs.disconnect = qb_ipcc_us_disconnect; fd_hdr = qb_sys_mmap_file_open(path, r->request, SHM_CONTROL_SIZE, O_RDWR); if (fd_hdr < 0) { res = fd_hdr; errno = -fd_hdr; qb_util_perror(LOG_ERR, "couldn't open file for mmap"); return res; } (void)strlcpy(c->request.u.us.shared_file_name, r->request, NAME_MAX); shm_ptr = mmap(0, SHM_CONTROL_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd_hdr, 0); if (shm_ptr == MAP_FAILED) { res = -errno; qb_util_perror(LOG_ERR, "couldn't create mmap for header"); goto cleanup_hdr; } c->request.u.us.shared_data = shm_ptr; c->response.u.us.shared_data = shm_ptr + sizeof(struct ipc_us_control); c->event.u.us.shared_data = shm_ptr + (2 * sizeof(struct ipc_us_control)); close(fd_hdr); fd_hdr = -1; res = qb_ipc_dgram_sock_connect(r->response, "response", "request", r->max_msg_size, &c->request.u.us.sock, c->egid); if (res != 0) { goto cleanup_hdr; } c->response.u.us.sock = c->request.u.us.sock; res = qb_ipc_dgram_sock_connect(r->response, "event", "event-tx", r->max_msg_size, &c->event.u.us.sock, c->egid); if (res != 0) { goto cleanup_hdr; } return 0; cleanup_hdr: if (fd_hdr >= 0) { close(fd_hdr); } close(c->event.u.us.sock); close(c->request.u.us.sock); unlink(r->request); munmap(c->request.u.us.shared_data, SHM_CONTROL_SIZE); return res; } /* * service functions * -------------------------------------------------------- */ static int32_t _sock_connection_liveliness(int32_t fd, int32_t revents, void *data) { struct qb_ipcs_connection *c = (struct qb_ipcs_connection *)data; qb_util_log(LOG_DEBUG, "LIVENESS: fd %d event %d conn (%s)", fd, revents, c->description); if (revents & POLLNVAL) { qb_util_log(LOG_DEBUG, "NVAL conn (%s)", c->description); qb_ipcs_disconnect(c); return -EINVAL; } if (revents & POLLHUP) { qb_util_log(LOG_DEBUG, "HUP conn (%s)", c->description); qb_ipcs_disconnect(c); return -ESHUTDOWN; } /* If we actually get POLLIN for some reason here, it most * certainly means EOF. Do a recv on the fd to detect eof and * then disconnect */ if (revents & POLLIN) { char buf[10]; int res; res = recv(fd, buf, sizeof(buf), MSG_DONTWAIT); if (res < 0 && errno != EAGAIN && errno != EWOULDBLOCK) { res = -errno; } else if (res == 0) { qb_util_log(LOG_DEBUG, "EOF conn (%s)", c->description); res = -ESHUTDOWN; } if (res < 0) { qb_ipcs_disconnect(c); return res; } } return 0; } static int32_t _sock_add_to_mainloop(struct qb_ipcs_connection *c) { int res; res = c->service->poll_fns.dispatch_add(c->service->poll_priority, c->request.u.us.sock, POLLIN | POLLPRI | POLLNVAL, c, qb_ipcs_dispatch_connection_request); if (res < 0) { qb_util_log(LOG_ERR, "Error adding socket to mainloop (%s).", c->description); return res; } res = c->service->poll_fns.dispatch_add(c->service->poll_priority, c->setup.u.us.sock, POLLIN | POLLPRI | POLLNVAL, c, _sock_connection_liveliness); qb_util_log(LOG_DEBUG, "added %d to poll loop (liveness)", c->setup.u.us.sock); if (res < 0) { qb_util_perror(LOG_ERR, "Error adding setupfd to mainloop"); (void)c->service->poll_fns.dispatch_del(c->request.u.us.sock); return res; } return res; } static void _sock_rm_from_mainloop(struct qb_ipcs_connection *c) { (void)c->service->poll_fns.dispatch_del(c->request.u.us.sock); (void)c->service->poll_fns.dispatch_del(c->setup.u.us.sock); } static void qb_ipcs_us_disconnect(struct qb_ipcs_connection *c) { qb_enter(); if (c->state == QB_IPCS_CONNECTION_ESTABLISHED || c->state == QB_IPCS_CONNECTION_ACTIVE) { _sock_rm_from_mainloop(c); /* Free the temporaries denoting which respective socket name on the client's side to connect upon the first send operation -- normally the variable is free'd once the connection is established but there may have been no chance for that. */ free(c->response.u.us.sock_name); c->response.u.us.sock_name = NULL; free(c->event.u.us.sock_name); c->event.u.us.sock_name = NULL; if (use_filesystem_sockets()) { struct sockaddr_un un_addr; socklen_t un_addr_len = sizeof(struct sockaddr_un); char *base_name; char sock_name[PATH_MAX]; size_t length; if (getsockname(c->request.u.us.sock, (struct sockaddr *)&un_addr, &un_addr_len) == 0) { length = strlen(un_addr.sun_path); base_name = strndup(un_addr.sun_path, length - /* strlen("-request") */ 8); qb_util_log(LOG_DEBUG, "unlinking socket bound files with base_name=%s length=%d",base_name,length); snprintf(sock_name,PATH_MAX,"%s-%s",base_name,"request"); qb_util_log(LOG_DEBUG, "unlink sock_name=%s",sock_name); unlink(sock_name); snprintf(sock_name,PATH_MAX,"%s-%s",base_name,"event"); qb_util_log(LOG_DEBUG, "unlink sock_name=%s",sock_name); unlink(sock_name); snprintf(sock_name,PATH_MAX,"%s-%s",base_name,"event-tx"); qb_util_log(LOG_DEBUG, "unlink sock_name=%s",sock_name); unlink(sock_name); snprintf(sock_name,PATH_MAX,"%s-%s",base_name,"response"); qb_util_log(LOG_DEBUG, "unlink sock_name=%s",sock_name); unlink(sock_name); free(base_name); } } qb_ipcc_us_sock_close(c->setup.u.us.sock); qb_ipcc_us_sock_close(c->request.u.us.sock); qb_ipcc_us_sock_close(c->event.u.us.sock); } if (c->state == QB_IPCS_CONNECTION_SHUTTING_DOWN || c->state == QB_IPCS_CONNECTION_ACTIVE) { munmap(c->request.u.us.shared_data, SHM_CONTROL_SIZE); unlink(c->request.u.us.shared_file_name); } } static int32_t qb_ipcs_us_connect(struct qb_ipcs_service *s, struct qb_ipcs_connection *c, struct qb_ipc_connection_response *r) { char path[PATH_MAX]; int32_t fd_hdr; int32_t res = 0; struct ipc_us_control *ctl; char *shm_ptr; qb_util_log(LOG_DEBUG, "connecting to client (%s)", c->description); c->request.u.us.sock = c->setup.u.us.sock; c->response.u.us.sock = c->setup.u.us.sock; snprintf(r->request, NAME_MAX, "qb-%s-control-%s", s->name, c->description); snprintf(r->response, NAME_MAX, "qb-%s-%s", s->name, c->description); fd_hdr = qb_sys_mmap_file_open(path, r->request, SHM_CONTROL_SIZE, - O_CREAT | O_TRUNC | O_RDWR); + O_CREAT | O_TRUNC | O_RDWR | O_EXCL); if (fd_hdr < 0) { res = fd_hdr; errno = -fd_hdr; qb_util_perror(LOG_ERR, "couldn't create file for mmap (%s)", c->description); return res; } (void)strlcpy(r->request, path, PATH_MAX); (void)strlcpy(c->request.u.us.shared_file_name, r->request, NAME_MAX); res = chown(r->request, c->auth.uid, c->auth.gid); if (res != 0) { /* ignore res, this is just for the compiler warnings. */ res = 0; } res = chmod(r->request, c->auth.mode); if (res != 0) { /* ignore res, this is just for the compiler warnings. */ res = 0; } shm_ptr = mmap(0, SHM_CONTROL_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd_hdr, 0); if (shm_ptr == MAP_FAILED) { res = -errno; qb_util_perror(LOG_ERR, "couldn't create mmap for header (%s)", c->description); goto cleanup_hdr; } c->request.u.us.shared_data = shm_ptr; c->response.u.us.shared_data = shm_ptr + sizeof(struct ipc_us_control); c->event.u.us.shared_data = shm_ptr + (2 * sizeof(struct ipc_us_control)); ctl = (struct ipc_us_control *)c->request.u.us.shared_data; ctl->sent = 0; ctl->flow_control = 0; ctl = (struct ipc_us_control *)c->response.u.us.shared_data; ctl->sent = 0; ctl->flow_control = 0; ctl = (struct ipc_us_control *)c->event.u.us.shared_data; ctl->sent = 0; ctl->flow_control = 0; close(fd_hdr); fd_hdr = -1; /* request channel */ res = qb_ipc_dgram_sock_setup(r->response, "request", &c->request.u.us.sock, c->egid); if (res < 0) { goto cleanup_hdr; } res = set_sock_size(c->request.u.us.sock, c->request.max_msg_size); if (res != 0) { goto cleanup_hdr; } c->setup.u.us.sock_name = NULL; c->request.u.us.sock_name = NULL; /* response channel */ c->response.u.us.sock = c->request.u.us.sock; snprintf(path, PATH_MAX, "%s-%s", r->response, "response"); c->response.u.us.sock_name = strdup(path); /* event channel */ res = qb_ipc_dgram_sock_setup(r->response, "event-tx", &c->event.u.us.sock, c->egid); if (res < 0) { goto cleanup_hdr; } res = set_sock_size(c->event.u.us.sock, c->event.max_msg_size); if (res != 0) { goto cleanup_hdr; } snprintf(path, PATH_MAX, "%s-%s", r->response, "event"); c->event.u.us.sock_name = strdup(path); res = _sock_add_to_mainloop(c); if (res < 0) { goto cleanup_hdr; } return res; cleanup_hdr: free(c->response.u.us.sock_name); free(c->event.u.us.sock_name); if (fd_hdr >= 0) { close(fd_hdr); } unlink(r->request); munmap(c->request.u.us.shared_data, SHM_CONTROL_SIZE); return res; } void qb_ipcs_us_init(struct qb_ipcs_service *s) { s->funcs.connect = qb_ipcs_us_connect; s->funcs.disconnect = qb_ipcs_us_disconnect; s->funcs.recv = qb_ipc_us_recv_at_most; s->funcs.peek = NULL; s->funcs.reclaim = NULL; s->funcs.send = qb_ipc_socket_send; s->funcs.sendv = qb_ipc_socket_sendv; s->funcs.fc_set = qb_ipc_us_fc_set; s->funcs.q_len_get = qb_ipc_us_q_len_get; s->needs_sock_for_poll = QB_FALSE; qb_atomic_init(); } diff --git a/lib/ipcs.c b/lib/ipcs.c index 4a375fc..573b427 100644 --- a/lib/ipcs.c +++ b/lib/ipcs.c @@ -1,966 +1,980 @@ /* * Copyright (C) 2010 Red Hat, Inc. * * Author: Angus Salkeld * * This file is part of libqb. * * libqb is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, either version 2.1 of the License, or * (at your option) any later version. * * libqb is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with libqb. If not, see . */ #include "os_base.h" #include #include "util_int.h" #include "ipc_int.h" #include #include #include static void qb_ipcs_flowcontrol_set(struct qb_ipcs_connection *c, int32_t fc_enable); static int32_t new_event_notification(struct qb_ipcs_connection * c); static QB_LIST_DECLARE(qb_ipc_services); qb_ipcs_service_t * qb_ipcs_create(const char *name, int32_t service_id, enum qb_ipc_type type, struct qb_ipcs_service_handlers *handlers) { struct qb_ipcs_service *s; + int fd; + unsigned int seed; s = calloc(1, sizeof(struct qb_ipcs_service)); if (s == NULL) { return NULL; } if (type == QB_IPC_NATIVE) { #ifdef DISABLE_IPC_SHM s->type = QB_IPC_SOCKET; #else s->type = QB_IPC_SHM; #endif /* DISABLE_IPC_SHM */ } else { s->type = type; } s->pid = getpid(); s->needs_sock_for_poll = QB_FALSE; s->poll_priority = QB_LOOP_MED; /* Initial alloc ref */ qb_ipcs_ref(s); s->service_id = service_id; (void)strlcpy(s->name, name, NAME_MAX); s->serv_fns.connection_accept = handlers->connection_accept; s->serv_fns.connection_created = handlers->connection_created; s->serv_fns.msg_process = handlers->msg_process; s->serv_fns.connection_closed = handlers->connection_closed; s->serv_fns.connection_destroyed = handlers->connection_destroyed; qb_list_init(&s->connections); qb_list_init(&s->list); qb_list_add(&s->list, &qb_ipc_services); + /* Randomise socket names */ + fd = open("/dev/urandom", O_RDONLY); + if (fd == -1) { + seed = (time_t)time(NULL); + } else { + if (read(fd, &seed, sizeof(seed)) != 4) { + seed = (time_t)time(NULL); + } + close(fd); + } + srand(seed); + return s; } void qb_ipcs_poll_handlers_set(struct qb_ipcs_service *s, struct qb_ipcs_poll_handlers *handlers) { s->poll_fns.job_add = handlers->job_add; s->poll_fns.dispatch_add = handlers->dispatch_add; s->poll_fns.dispatch_mod = handlers->dispatch_mod; s->poll_fns.dispatch_del = handlers->dispatch_del; } void qb_ipcs_service_context_set(qb_ipcs_service_t* s, void *context) { s->context = context; } void * qb_ipcs_service_context_get(qb_ipcs_service_t* s) { return s->context; } int32_t qb_ipcs_run(struct qb_ipcs_service *s) { int32_t res = 0; if (s->poll_fns.dispatch_add == NULL || s->poll_fns.dispatch_mod == NULL || s->poll_fns.dispatch_del == NULL) { res = -EINVAL; goto run_cleanup; } switch (s->type) { case QB_IPC_SOCKET: qb_ipcs_us_init((struct qb_ipcs_service *)s); break; case QB_IPC_SHM: #ifdef DISABLE_IPC_SHM res = -ENOTSUP; #else qb_ipcs_shm_init((struct qb_ipcs_service *)s); #endif /* DISABLE_IPC_SHM */ break; case QB_IPC_POSIX_MQ: case QB_IPC_SYSV_MQ: res = -ENOTSUP; break; default: res = -EINVAL; break; } if (res == 0) { res = qb_ipcs_us_publish(s); if (res < 0) { (void)qb_ipcs_us_withdraw(s); goto run_cleanup; } } run_cleanup: if (res < 0) { /* Failed to run services, removing initial alloc reference. */ qb_ipcs_unref(s); } return res; } static int32_t _modify_dispatch_descriptor_(struct qb_ipcs_connection *c) { qb_ipcs_dispatch_mod_fn disp_mod = c->service->poll_fns.dispatch_mod; if (c->service->type == QB_IPC_SOCKET) { return disp_mod(c->service->poll_priority, c->event.u.us.sock, c->poll_events, c, qb_ipcs_dispatch_connection_request); } else { return disp_mod(c->service->poll_priority, c->setup.u.us.sock, c->poll_events, c, qb_ipcs_dispatch_connection_request); } return -EINVAL; } void qb_ipcs_request_rate_limit(struct qb_ipcs_service *s, enum qb_ipcs_rate_limit rl) { struct qb_ipcs_connection *c; enum qb_loop_priority old_p = s->poll_priority; struct qb_list_head *pos; struct qb_list_head *n; switch (rl) { case QB_IPCS_RATE_FAST: s->poll_priority = QB_LOOP_HIGH; break; case QB_IPCS_RATE_SLOW: case QB_IPCS_RATE_OFF: case QB_IPCS_RATE_OFF_2: s->poll_priority = QB_LOOP_LOW; break; default: case QB_IPCS_RATE_NORMAL: s->poll_priority = QB_LOOP_MED; break; } qb_list_for_each_safe(pos, n, &s->connections) { c = qb_list_entry(pos, struct qb_ipcs_connection, list); qb_ipcs_connection_ref(c); if (rl == QB_IPCS_RATE_OFF) { qb_ipcs_flowcontrol_set(c, 1); } else if (rl == QB_IPCS_RATE_OFF_2) { qb_ipcs_flowcontrol_set(c, 2); } else { qb_ipcs_flowcontrol_set(c, QB_FALSE); } if (old_p != s->poll_priority) { (void)_modify_dispatch_descriptor_(c); } qb_ipcs_connection_unref(c); } } void qb_ipcs_ref(struct qb_ipcs_service *s) { qb_atomic_int_inc(&s->ref_count); } void qb_ipcs_unref(struct qb_ipcs_service *s) { int32_t free_it; assert(s->ref_count > 0); free_it = qb_atomic_int_dec_and_test(&s->ref_count); if (free_it) { qb_util_log(LOG_DEBUG, "%s() - destroying", __func__); free(s); } } void qb_ipcs_destroy(struct qb_ipcs_service *s) { struct qb_ipcs_connection *c = NULL; struct qb_list_head *pos; struct qb_list_head *n; if (s == NULL) { return; } qb_list_for_each_safe(pos, n, &s->connections) { c = qb_list_entry(pos, struct qb_ipcs_connection, list); if (c == NULL) { continue; } qb_ipcs_disconnect(c); } (void)qb_ipcs_us_withdraw(s); /* service destroyed, remove initial alloc ref */ qb_ipcs_unref(s); } /* * connection API */ static struct qb_ipc_one_way * _event_sock_one_way_get(struct qb_ipcs_connection * c) { if (c->service->needs_sock_for_poll) { return &c->setup; } if (c->event.type == QB_IPC_SOCKET) { return &c->event; } return NULL; } static struct qb_ipc_one_way * _response_sock_one_way_get(struct qb_ipcs_connection * c) { if (c->service->needs_sock_for_poll) { return &c->setup; } if (c->response.type == QB_IPC_SOCKET) { return &c->response; } return NULL; } ssize_t qb_ipcs_response_send(struct qb_ipcs_connection *c, const void *data, size_t size) { ssize_t res; if (c == NULL) { return -EINVAL; } qb_ipcs_connection_ref(c); res = c->service->funcs.send(&c->response, data, size); if (res == size) { c->stats.responses++; } else if (res == -EAGAIN || res == -ETIMEDOUT) { struct qb_ipc_one_way *ow = _response_sock_one_way_get(c); if (ow) { ssize_t res2 = qb_ipc_us_ready(ow, &c->setup, 0, POLLOUT); if (res2 < 0) { res = res2; } } c->stats.send_retries++; } qb_ipcs_connection_unref(c); return res; } ssize_t qb_ipcs_response_sendv(struct qb_ipcs_connection * c, const struct iovec * iov, size_t iov_len) { ssize_t res; if (c == NULL) { return -EINVAL; } qb_ipcs_connection_ref(c); res = c->service->funcs.sendv(&c->response, iov, iov_len); if (res > 0) { c->stats.responses++; } else if (res == -EAGAIN || res == -ETIMEDOUT) { struct qb_ipc_one_way *ow = _response_sock_one_way_get(c); if (ow) { ssize_t res2 = qb_ipc_us_ready(ow, &c->setup, 0, POLLOUT); if (res2 < 0) { res = res2; } } c->stats.send_retries++; } qb_ipcs_connection_unref(c); return res; } static int32_t resend_event_notifications(struct qb_ipcs_connection *c) { ssize_t res = 0; if (!c->service->needs_sock_for_poll) { return res; } if (c->outstanding_notifiers > 0) { res = qb_ipc_us_send(&c->setup, c->receive_buf, c->outstanding_notifiers); } if (res > 0) { c->outstanding_notifiers -= res; } assert(c->outstanding_notifiers >= 0); if (c->outstanding_notifiers == 0) { c->poll_events = POLLIN | POLLPRI | POLLNVAL; (void)_modify_dispatch_descriptor_(c); } return res; } static int32_t new_event_notification(struct qb_ipcs_connection * c) { ssize_t res = 0; if (!c->service->needs_sock_for_poll) { return res; } assert(c->outstanding_notifiers >= 0); if (c->outstanding_notifiers > 0) { c->outstanding_notifiers++; res = resend_event_notifications(c); } else { res = qb_ipc_us_send(&c->setup, &c->outstanding_notifiers, 1); if (res == -EAGAIN) { /* * notify the client later, when we can. */ c->outstanding_notifiers++; c->poll_events = POLLOUT | POLLIN | POLLPRI | POLLNVAL; (void)_modify_dispatch_descriptor_(c); } } return res; } ssize_t qb_ipcs_event_send(struct qb_ipcs_connection * c, const void *data, size_t size) { ssize_t res; ssize_t resn; if (c == NULL) { return -EINVAL; } else if (size > c->event.max_msg_size) { return -EMSGSIZE; } qb_ipcs_connection_ref(c); res = c->service->funcs.send(&c->event, data, size); if (res == size) { c->stats.events++; resn = new_event_notification(c); if (resn < 0 && resn != -EAGAIN && resn != -ENOBUFS) { errno = -resn; qb_util_perror(LOG_WARNING, "new_event_notification (%s)", c->description); res = resn; } } else if (res == -EAGAIN || res == -ETIMEDOUT) { struct qb_ipc_one_way *ow = _event_sock_one_way_get(c); if (c->outstanding_notifiers > 0) { resn = resend_event_notifications(c); } if (ow) { resn = qb_ipc_us_ready(ow, &c->setup, 0, POLLOUT); if (resn < 0) { res = resn; } } c->stats.send_retries++; } qb_ipcs_connection_unref(c); return res; } ssize_t qb_ipcs_event_sendv(struct qb_ipcs_connection * c, const struct iovec * iov, size_t iov_len) { ssize_t res; ssize_t resn; if (c == NULL) { return -EINVAL; } qb_ipcs_connection_ref(c); res = c->service->funcs.sendv(&c->event, iov, iov_len); if (res > 0) { c->stats.events++; resn = new_event_notification(c); if (resn < 0 && resn != -EAGAIN) { errno = -resn; qb_util_perror(LOG_WARNING, "new_event_notification (%s)", c->description); res = resn; } } else if (res == -EAGAIN || res == -ETIMEDOUT) { struct qb_ipc_one_way *ow = _event_sock_one_way_get(c); if (c->outstanding_notifiers > 0) { resn = resend_event_notifications(c); } if (ow) { resn = qb_ipc_us_ready(ow, &c->setup, 0, POLLOUT); if (resn < 0) { res = resn; } } c->stats.send_retries++; } qb_ipcs_connection_unref(c); return res; } qb_ipcs_connection_t * qb_ipcs_connection_first_get(struct qb_ipcs_service * s) { struct qb_ipcs_connection *c; if (qb_list_empty(&s->connections)) { return NULL; } c = qb_list_first_entry(&s->connections, struct qb_ipcs_connection, list); qb_ipcs_connection_ref(c); return c; } qb_ipcs_connection_t * qb_ipcs_connection_next_get(struct qb_ipcs_service * s, struct qb_ipcs_connection * current) { struct qb_ipcs_connection *c; if (current == NULL || qb_list_is_last(¤t->list, &s->connections)) { return NULL; } c = qb_list_first_entry(¤t->list, struct qb_ipcs_connection, list); qb_ipcs_connection_ref(c); return c; } int32_t qb_ipcs_service_id_get(struct qb_ipcs_connection * c) { if (c == NULL) { return -EINVAL; } return c->service->service_id; } struct qb_ipcs_connection * qb_ipcs_connection_alloc(struct qb_ipcs_service *s) { struct qb_ipcs_connection *c = calloc(1, sizeof(struct qb_ipcs_connection)); if (c == NULL) { return NULL; } c->pid = 0; c->euid = -1; c->egid = -1; c->receive_buf = NULL; c->context = NULL; c->fc_enabled = QB_FALSE; c->state = QB_IPCS_CONNECTION_INACTIVE; c->poll_events = POLLIN | POLLPRI | POLLNVAL; c->setup.type = s->type; c->request.type = s->type; c->response.type = s->type; c->event.type = s->type; (void)strlcpy(c->description, "not set yet", CONNECTION_DESCRIPTION); /* initial alloc ref */ qb_ipcs_connection_ref(c); /* * The connection makes use of the service object. Give the connection * a reference to the service so we know the service can never be destroyed * until the connection is done with it. */ qb_ipcs_ref(s); c->service = s; qb_list_init(&c->list); return c; } void qb_ipcs_connection_ref(struct qb_ipcs_connection *c) { if (c) { qb_atomic_int_inc(&c->refcount); } } void qb_ipcs_connection_unref(struct qb_ipcs_connection *c) { int32_t free_it; if (c == NULL) { return; } if (c->refcount < 1) { qb_util_log(LOG_ERR, "ref:%d state:%d (%s)", c->refcount, c->state, c->description); assert(0); } free_it = qb_atomic_int_dec_and_test(&c->refcount); if (free_it) { qb_list_del(&c->list); if (c->service->serv_fns.connection_destroyed) { c->service->serv_fns.connection_destroyed(c); } c->service->funcs.disconnect(c); /* Let go of the connection's reference to the service */ qb_ipcs_unref(c->service); free(c->receive_buf); free(c); } } void qb_ipcs_disconnect(struct qb_ipcs_connection *c) { int32_t res = 0; qb_loop_job_dispatch_fn rerun_job; if (c == NULL) { return; } qb_util_log(LOG_DEBUG, "%s(%s) state:%d", __func__, c->description, c->state); if (c->state == QB_IPCS_CONNECTION_ACTIVE) { c->service->funcs.disconnect(c); c->state = QB_IPCS_CONNECTION_INACTIVE; c->service->stats.closed_connections++; /* This removes the initial alloc ref */ qb_ipcs_connection_unref(c); /* return early as it's an incomplete connection. */ return; } if (c->state == QB_IPCS_CONNECTION_ESTABLISHED) { c->service->funcs.disconnect(c); c->state = QB_IPCS_CONNECTION_SHUTTING_DOWN; c->service->stats.active_connections--; c->service->stats.closed_connections++; } if (c->state == QB_IPCS_CONNECTION_SHUTTING_DOWN) { int scheduled_retry = 0; res = 0; if (c->service->serv_fns.connection_closed) { res = c->service->serv_fns.connection_closed(c); } if (res != 0) { /* OK, so they want the connection_closed * function re-run */ rerun_job = (qb_loop_job_dispatch_fn) qb_ipcs_disconnect; res = c->service->poll_fns.job_add(QB_LOOP_LOW, c, rerun_job); if (res == 0) { /* this function is going to be called again. * so hold off on the unref */ scheduled_retry = 1; } } if (scheduled_retry == 0) { /* This removes the initial alloc ref */ qb_ipcs_connection_unref(c); } } } static void qb_ipcs_flowcontrol_set(struct qb_ipcs_connection *c, int32_t fc_enable) { if (c == NULL) { return; } if (c->fc_enabled != fc_enable) { c->service->funcs.fc_set(&c->request, fc_enable); c->fc_enabled = fc_enable; c->stats.flow_control_state = fc_enable; c->stats.flow_control_count++; } } static int32_t _process_request_(struct qb_ipcs_connection *c, int32_t ms_timeout) { int32_t res = 0; ssize_t size; struct qb_ipc_request_header *hdr; if (c->service->funcs.peek && c->service->funcs.reclaim) { size = c->service->funcs.peek(&c->request, (void **)&hdr, ms_timeout); } else { hdr = c->receive_buf; size = c->service->funcs.recv(&c->request, hdr, c->request.max_msg_size, ms_timeout); } if (size < 0) { if (size != -EAGAIN && size != -ETIMEDOUT) { qb_util_perror(LOG_DEBUG, "recv from client connection failed (%s)", c->description); } else { c->stats.recv_retries++; } res = size; goto cleanup; } else if (size == 0 || hdr->id == QB_IPC_MSG_DISCONNECT) { qb_util_log(LOG_DEBUG, "client requesting a disconnect (%s)", c->description); res = -ESHUTDOWN; goto cleanup; } else { c->stats.requests++; res = c->service->serv_fns.msg_process(c, hdr, hdr->size); /* 0 == good, negative == backoff */ if (res < 0) { res = -ENOBUFS; } else { res = size; } } if (c && c->service->funcs.peek && c->service->funcs.reclaim) { c->service->funcs.reclaim(&c->request); } cleanup: return res; } #define IPC_REQUEST_TIMEOUT 10 #define MAX_RECV_MSGS 50 static ssize_t _request_q_len_get(struct qb_ipcs_connection *c) { ssize_t q_len; if (c->service->funcs.q_len_get) { q_len = c->service->funcs.q_len_get(&c->request); if (q_len <= 0) { return q_len; } if (c->service->poll_priority == QB_LOOP_MED) { q_len = QB_MIN(q_len, 5); } else if (c->service->poll_priority == QB_LOOP_LOW) { q_len = 1; } else { q_len = QB_MIN(q_len, MAX_RECV_MSGS); } } else { q_len = 1; } return q_len; } int32_t qb_ipcs_dispatch_connection_request(int32_t fd, int32_t revents, void *data) { struct qb_ipcs_connection *c = (struct qb_ipcs_connection *)data; char bytes[MAX_RECV_MSGS]; int32_t res = 0; int32_t res2; int32_t recvd = 0; ssize_t avail; if (revents & POLLNVAL) { qb_util_log(LOG_DEBUG, "NVAL conn (%s)", c->description); res = -EINVAL; goto dispatch_cleanup; } if (revents & POLLHUP) { qb_util_log(LOG_DEBUG, "HUP conn (%s)", c->description); res = -ESHUTDOWN; goto dispatch_cleanup; } if (revents & POLLOUT) { /* try resend events now that fd can write */ res = resend_event_notifications(c); if (res < 0 && res != -EAGAIN) { errno = -res; qb_util_perror(LOG_WARNING, "resend_event_notifications (%s)", c->description); } /* nothing to read */ if ((revents & POLLIN) == 0) { res = 0; goto dispatch_cleanup; } } if (c->fc_enabled) { res = 0; goto dispatch_cleanup; } avail = _request_q_len_get(c); if (c->service->needs_sock_for_poll && avail == 0) { res2 = qb_ipc_us_recv(&c->setup, bytes, 1, 0); if (qb_ipc_us_sock_error_is_disconnected(res2)) { errno = -res2; qb_util_perror(LOG_WARNING, "conn (%s) disconnected", c->description); res = -ESHUTDOWN; goto dispatch_cleanup; } else { qb_util_log(LOG_WARNING, "conn (%s) Nothing in q but got POLLIN on fd:%d (res2:%d)", c->description, fd, res2); res = 0; goto dispatch_cleanup; } } do { res = _process_request_(c, IPC_REQUEST_TIMEOUT); if (res == -ESHUTDOWN) { goto dispatch_cleanup; } if (res > 0 || res == -ENOBUFS || res == -EINVAL) { recvd++; } if (res > 0) { avail--; } } while (avail > 0 && res > 0 && !c->fc_enabled); if (c->service->needs_sock_for_poll && recvd > 0) { res2 = qb_ipc_us_recv(&c->setup, bytes, recvd, -1); if (qb_ipc_us_sock_error_is_disconnected(res2)) { errno = -res2; qb_util_perror(LOG_ERR, "error receiving from setup sock (%s)", c->description); res = -ESHUTDOWN; goto dispatch_cleanup; } } res = QB_MIN(0, res); if (res == -EAGAIN || res == -ETIMEDOUT || res == -ENOBUFS) { res = 0; } if (res != 0) { if (res != -ENOTCONN) { /* * Abnormal state (ENOTCONN is normal shutdown). */ errno = -res; qb_util_perror(LOG_ERR, "request returned error (%s)", c->description); } } dispatch_cleanup: if (res != 0) { qb_ipcs_disconnect(c); } return res; } void qb_ipcs_context_set(struct qb_ipcs_connection *c, void *context) { if (c == NULL) { return; } c->context = context; } void * qb_ipcs_context_get(struct qb_ipcs_connection *c) { if (c == NULL) { return NULL; } return c->context; } void * qb_ipcs_connection_service_context_get(qb_ipcs_connection_t *c) { if (c == NULL || c->service == NULL) { return NULL; } return c->service->context; } int32_t qb_ipcs_connection_stats_get(qb_ipcs_connection_t * c, struct qb_ipcs_connection_stats * stats, int32_t clear_after_read) { if (c == NULL) { return -EINVAL; } memcpy(stats, &c->stats, sizeof(struct qb_ipcs_connection_stats)); if (clear_after_read) { memset(&c->stats, 0, sizeof(struct qb_ipcs_connection_stats_2)); c->stats.client_pid = c->pid; } return 0; } struct qb_ipcs_connection_stats_2* qb_ipcs_connection_stats_get_2(qb_ipcs_connection_t *c, int32_t clear_after_read) { struct qb_ipcs_connection_stats_2 * stats; if (c == NULL) { errno = EINVAL; return NULL; } stats = calloc(1, sizeof(struct qb_ipcs_connection_stats_2)); if (stats == NULL) { return NULL; } memcpy(stats, &c->stats, sizeof(struct qb_ipcs_connection_stats_2)); if (c->service->funcs.q_len_get) { stats->event_q_length = c->service->funcs.q_len_get(&c->event); } else { stats->event_q_length = 0; } if (clear_after_read) { memset(&c->stats, 0, sizeof(struct qb_ipcs_connection_stats_2)); c->stats.client_pid = c->pid; } return stats; } int32_t qb_ipcs_stats_get(struct qb_ipcs_service * s, struct qb_ipcs_stats * stats, int32_t clear_after_read) { if (s == NULL) { return -EINVAL; } memcpy(stats, &s->stats, sizeof(struct qb_ipcs_stats)); if (clear_after_read) { memset(&s->stats, 0, sizeof(struct qb_ipcs_stats)); } return 0; } void qb_ipcs_connection_auth_set(qb_ipcs_connection_t *c, uid_t uid, gid_t gid, mode_t mode) { if (c) { c->auth.uid = uid; c->auth.gid = gid; c->auth.mode = mode; } } int32_t qb_ipcs_connection_get_buffer_size(qb_ipcs_connection_t *c) { if (c == NULL) { return -EINVAL; } /* request, response, and event shoud all have the same * buffer size allocated. It doesn't matter which we return * here. */ return c->response.max_msg_size; } void qb_ipcs_enforce_buffer_size(qb_ipcs_service_t *s, uint32_t buf_size) { if (s == NULL) { return; } s->max_buffer_size = buf_size; } diff --git a/lib/log_blackbox.c b/lib/log_blackbox.c index 1cba422..2947162 100644 --- a/lib/log_blackbox.c +++ b/lib/log_blackbox.c @@ -1,382 +1,382 @@ /* * Copyright (C) 2011 Red Hat, Inc. * * All rights reserved. * * Author: Angus Salkeld * * libqb is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, either version 2.1 of the License, or * (at your option) any later version. * * libqb is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with libqb. If not, see . */ #include "os_base.h" #include #include "util_int.h" #include "log_int.h" #include "ringbuffer_int.h" #define BB_MIN_ENTRY_SIZE (4 * sizeof(uint32_t) +\ sizeof(uint8_t) +\ 2 * sizeof(char) + sizeof(time_t)) static void _blackbox_reload(int32_t target) { struct qb_log_target *t = qb_log_target_get(target); if (t->instance == NULL) { return; } qb_rb_close(t->instance); t->instance = qb_rb_open(t->filename, t->size, QB_RB_FLAG_CREATE | QB_RB_FLAG_OVERWRITE, 0); } /* file lineno * tags * priority * function name length * function name * buffer length * buffer */ static void _blackbox_vlogger(int32_t target, struct qb_log_callsite *cs, struct timespec *timestamp, va_list ap) { size_t max_size; size_t actual_size; uint32_t fn_size; char *chunk; char *msg_len_pt; uint32_t msg_len; struct qb_log_target *t = qb_log_target_get(target); if (t->instance == NULL) { return; } fn_size = strlen(cs->function) + 1; actual_size = 4 * sizeof(uint32_t) + sizeof(uint8_t) + fn_size + sizeof(struct timespec); max_size = actual_size + t->max_line_length; chunk = qb_rb_chunk_alloc(t->instance, max_size); if (chunk == NULL) { /* something bad has happened. abort blackbox logging */ qb_util_perror(LOG_ERR, "Blackbox allocation error, aborting blackbox log %s", t->filename); qb_rb_close(qb_rb_lastref_and_ret( (struct qb_ringbuffer_s **) &t->instance )); return; } /* line number */ memcpy(chunk, &cs->lineno, sizeof(uint32_t)); chunk += sizeof(uint32_t); /* tags */ memcpy(chunk, &cs->tags, sizeof(uint32_t)); chunk += sizeof(uint32_t); /* log level/priority */ memcpy(chunk, &cs->priority, sizeof(uint8_t)); chunk += sizeof(uint8_t); /* function name */ memcpy(chunk, &fn_size, sizeof(uint32_t)); chunk += sizeof(uint32_t); memcpy(chunk, cs->function, fn_size); chunk += fn_size; /* timestamp */ memcpy(chunk, timestamp, sizeof(struct timespec)); chunk += sizeof(struct timespec); /* log message length */ msg_len_pt = chunk; chunk += sizeof(uint32_t); /* log message */ msg_len = qb_vsnprintf_serialize(chunk, max_size, cs->format, ap); if (msg_len >= max_size) { chunk = msg_len_pt + sizeof(uint32_t); /* Reset */ /* Leave this at QB_LOG_MAX_LEN so as not to overflow the blackbox */ msg_len = qb_vsnprintf_serialize(chunk, QB_LOG_MAX_LEN, "Log message too long to be stored in the blackbox. "\ "Maximum is QB_LOG_MAX_LEN" , ap); actual_size += msg_len; } actual_size += msg_len; /* now that we know the length, write it */ memcpy(msg_len_pt, &msg_len, sizeof(uint32_t)); (void)qb_rb_chunk_commit(t->instance, actual_size); } static void _blackbox_close(int32_t target) { struct qb_log_target *t = qb_log_target_get(target); qb_rb_close(qb_rb_lastref_and_ret( (struct qb_ringbuffer_s **) &t->instance )); } int32_t qb_log_blackbox_open(struct qb_log_target *t) { if (t->size < 1024) { return -EINVAL; } snprintf(t->filename, PATH_MAX, "%s-%d-blackbox", t->name, getpid()); t->instance = qb_rb_open(t->filename, t->size, QB_RB_FLAG_CREATE | QB_RB_FLAG_OVERWRITE, 0); if (t->instance == NULL) { return -errno; } t->logger = NULL; t->vlogger = _blackbox_vlogger; t->reload = _blackbox_reload; t->close = _blackbox_close; return 0; } /* * This is designed to look as much like the ringbuffer header * as possible so that we can distinguish an old RB dump * from a new one with this header. */ struct _blackbox_file_header { uint32_t word_size; uint32_t read_pt; uint32_t write_pt; uint32_t version; uint32_t hash; } __attribute__((packed)); /* Values we expect for a 'new' header */ #define QB_BLACKBOX_HEADER_WORDSIZE 0 #define QB_BLACKBOX_HEADER_READPT 0xCCBBCCBB #define QB_BLACKBOX_HEADER_WRITEPT 0xBBCCBBCC #define QB_BLACKBOX_HEADER_VERSION 2 #define QB_BLACKBOX_HEADER_HASH 0 ssize_t qb_log_blackbox_write_to_file(const char *filename) { ssize_t written_size = 0; struct qb_log_target *t; struct _blackbox_file_header header; - int fd = open(filename, O_CREAT | O_RDWR, 0700); + int fd = open(filename, O_CREAT | O_RDWR | O_EXCL, 0700); if (fd < 0) { return -errno; } /* Write header, so we know this is a 'new' format blackbox */ header.word_size = QB_BLACKBOX_HEADER_WORDSIZE; header.read_pt = QB_BLACKBOX_HEADER_READPT; header.write_pt = QB_BLACKBOX_HEADER_WRITEPT; header.version = QB_BLACKBOX_HEADER_VERSION; header.hash = QB_BLACKBOX_HEADER_HASH; written_size = write(fd, &header, sizeof(header)); if (written_size < sizeof(header)) { close(fd); return written_size; } t = qb_log_target_get(QB_LOG_BLACKBOX); if (t->instance) { written_size += qb_rb_write_to_file(t->instance, fd); } else { written_size = -ENOENT; } close(fd); return written_size; } int qb_log_blackbox_print_from_file(const char *bb_filename) { qb_ringbuffer_t *instance; ssize_t bytes_read; int max_size = 2 * QB_LOG_MAX_LEN; char *chunk; int fd; int err = 0; int saved_errno; struct _blackbox_file_header header; int have_timespecs = 0; char time_buf[64]; fd = open(bb_filename, 0); if (fd < 0) { saved_errno = errno; qb_util_perror(LOG_ERR, "qb_log_blackbox_print_from_file"); return -saved_errno; } /* Read the header. If it looks like one of ours then we know we have hi-res timestamps */ err = read(fd, &header, sizeof(header)); if (err < sizeof(header)) { saved_errno = errno; close(fd); return -saved_errno; } if (header.word_size == QB_BLACKBOX_HEADER_WORDSIZE && header.read_pt == QB_BLACKBOX_HEADER_READPT && header.write_pt == QB_BLACKBOX_HEADER_WRITEPT && header.version == QB_BLACKBOX_HEADER_VERSION && header.hash == QB_BLACKBOX_HEADER_HASH) { have_timespecs = 1; } else { (void)lseek(fd, 0, SEEK_SET); } instance = qb_rb_create_from_file(fd, 0); close(fd); if (instance == NULL) { return -EIO; } chunk = malloc(max_size); do { char *ptr; uint32_t lineno; uint32_t tags; uint8_t priority; uint32_t fn_size; char *function; uint32_t len; struct timespec timestamp; time_t time_sec; uint32_t msg_len; struct tm *tm; char message[QB_LOG_MAX_LEN]; bytes_read = qb_rb_chunk_read(instance, chunk, max_size, 0); if (bytes_read >= 0 && bytes_read < BB_MIN_ENTRY_SIZE) { printf("ERROR Corrupt file: blackbox header too small.\n"); err = -1; goto cleanup; } else if (bytes_read < 0) { errno = -bytes_read; perror("ERROR: qb_rb_chunk_read failed"); err = -EIO; goto cleanup; } ptr = chunk; /* lineno */ memcpy(&lineno, ptr, sizeof(uint32_t)); ptr += sizeof(uint32_t); /* tags */ memcpy(&tags, ptr, sizeof(uint32_t)); ptr += sizeof(uint32_t); /* priority */ memcpy(&priority, ptr, sizeof(uint8_t)); ptr += sizeof(uint8_t); /* function size & name */ memcpy(&fn_size, ptr, sizeof(uint32_t)); if ((fn_size + BB_MIN_ENTRY_SIZE) > bytes_read) { #ifndef S_SPLINT_S printf("ERROR Corrupt file: fn_size way too big %" PRIu32 "\n", fn_size); err = -EIO; #endif /* S_SPLINT_S */ goto cleanup; } if (fn_size <= 0) { #ifndef S_SPLINT_S printf("ERROR Corrupt file: fn_size negative %" PRIu32 "\n", fn_size); err = -EIO; #endif /* S_SPLINT_S */ goto cleanup; } ptr += sizeof(uint32_t); function = ptr; ptr += fn_size; /* timestamp size & content */ if (have_timespecs) { memcpy(×tamp, ptr, sizeof(struct timespec)); ptr += sizeof(struct timespec); time_sec = timestamp.tv_sec; } else { memcpy(&time_sec, ptr, sizeof(time_t)); ptr += sizeof(time_t); timestamp.tv_nsec = 0LL; } tm = localtime(&time_sec); if (tm) { int slen = strftime(time_buf, sizeof(time_buf), "%b %d %T", tm); snprintf(time_buf+slen, sizeof(time_buf - slen), ".%03lld", timestamp.tv_nsec/QB_TIME_NS_IN_MSEC); } else { snprintf(time_buf, sizeof(time_buf), "%ld", (long int)time_sec); } /* message length */ memcpy(&msg_len, ptr, sizeof(uint32_t)); if (msg_len > QB_LOG_MAX_LEN || msg_len <= 0) { #ifndef S_SPLINT_S printf("ERROR Corrupt file: msg_len out of bounds %" PRIu32 "\n", msg_len); err = -EIO; #endif /* S_SPLINT_S */ goto cleanup; } ptr += sizeof(uint32_t); /* message content */ len = qb_vsnprintf_deserialize(message, QB_LOG_MAX_LEN, ptr); assert(len > 0); message[len] = '\0'; len--; while (len > 0 && (message[len] == '\n' || message[len] == '\0')) { message[len] = '\0'; len--; } printf("%-7s %s %s(%u):%u: %s\n", qb_log_priority2str(priority), time_buf, function, lineno, tags, message); } while (bytes_read > BB_MIN_ENTRY_SIZE); cleanup: qb_rb_close(instance); free(chunk); return err; } diff --git a/lib/ringbuffer.c b/lib/ringbuffer.c index 81411cb..8852ff5 100644 --- a/lib/ringbuffer.c +++ b/lib/ringbuffer.c @@ -1,935 +1,935 @@ /* * Copyright (C) 2010-2011 Red Hat, Inc. * * Author: Angus Salkeld * * This file is part of libqb. * * libqb is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, either version 2.1 of the License, or * (at your option) any later version. * * libqb is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with libqb. If not, see . */ #include "ringbuffer_int.h" #include #include "atomic_int.h" #define QB_RB_FILE_HEADER_VERSION 1 /* * #define CRAZY_DEBUG_PRINTFS 1 */ #ifdef CRAZY_DEBUG_PRINTFS #define DEBUG_PRINTF(format, args...) \ do { \ printf(format, ##args); \ } while(0) #else #define DEBUG_PRINTF(format, args...) #endif /* CRAZY_DEBUG_PRINTFS */ /* * move the write pointer to the next 128 byte boundary * write_pt goes in 4 bytes (sizeof(uint32_t)) * #define USE_CACHE_LINE_ALIGNMENT 1 */ #ifdef USE_CACHE_LINE_ALIGNMENT #define QB_CACHE_LINE_SIZE 128 #define QB_CACHE_LINE_WORDS (QB_CACHE_LINE_SIZE/sizeof(uint32_t)) #define idx_cache_line_step(idx) \ do { \ if (idx % QB_CACHE_LINE_WORDS) { \ idx += (QB_CACHE_LINE_WORDS - (idx % QB_CACHE_LINE_WORDS)); \ } \ if (idx > (rb->shared_hdr->word_size - 1)) { \ idx = ((idx) % (rb->shared_hdr->word_size)); \ } \ } while (0) #else #define QB_CACHE_LINE_SIZE 0 #define QB_CACHE_LINE_WORDS 0 #define idx_cache_line_step(idx) \ do { \ if (idx > (rb->shared_hdr->word_size - 1)) { \ idx = ((idx) % (rb->shared_hdr->word_size)); \ } \ } while (0) #endif /* the chunk header is two words * 1) the chunk data size * 2) the magic number */ #define QB_RB_CHUNK_HEADER_WORDS 2 #define QB_RB_CHUNK_HEADER_SIZE (sizeof(uint32_t) * QB_RB_CHUNK_HEADER_WORDS) /* * margin is the gap we leave when checking to see if we have enough * space for a new chunk. * So: * qb_rb_space_free() >= QB_RB_CHUNK_MARGIN + new data chunk * The extra word size is to allow for non word sized data chunks. * QB_CACHE_LINE_WORDS is to make sure we have space to align the * chunk. */ #define QB_RB_WORD_ALIGN 1 #define QB_RB_CHUNK_MARGIN (sizeof(uint32_t) * (QB_RB_CHUNK_HEADER_WORDS +\ QB_RB_WORD_ALIGN +\ QB_CACHE_LINE_WORDS)) #define QB_RB_CHUNK_MAGIC 0xA1A1A1A1 #define QB_RB_CHUNK_MAGIC_DEAD 0xD0D0D0D0 #define QB_RB_CHUNK_MAGIC_ALLOC 0xA110CED0 #define QB_RB_CHUNK_SIZE_GET(rb, pointer) rb->shared_data[pointer] #define QB_RB_CHUNK_MAGIC_GET(rb, pointer) \ qb_atomic_int_get_ex((int32_t*)&rb->shared_data[(pointer + 1) % rb->shared_hdr->word_size], \ QB_ATOMIC_ACQUIRE) #define QB_RB_CHUNK_MAGIC_SET(rb, pointer, new_val) \ qb_atomic_int_set_ex((int32_t*)&rb->shared_data[(pointer + 1) % rb->shared_hdr->word_size], \ new_val, QB_ATOMIC_RELEASE) #define QB_RB_CHUNK_DATA_GET(rb, pointer) \ &rb->shared_data[(pointer + QB_RB_CHUNK_HEADER_WORDS) % rb->shared_hdr->word_size] #define QB_MAGIC_ASSERT(_ptr_) \ do { \ uint32_t chunk_magic = QB_RB_CHUNK_MAGIC_GET(rb, _ptr_); \ if (chunk_magic != QB_RB_CHUNK_MAGIC) print_header(rb); \ assert(chunk_magic == QB_RB_CHUNK_MAGIC); \ } while (0) #define idx_step(idx) \ do { \ if (idx > (rb->shared_hdr->word_size - 1)) { \ idx = ((idx) % (rb->shared_hdr->word_size)); \ } \ } while (0) static void print_header(struct qb_ringbuffer_s * rb); static int _rb_chunk_reclaim(struct qb_ringbuffer_s * rb); qb_ringbuffer_t * qb_rb_open(const char *name, size_t size, uint32_t flags, size_t shared_user_data_size) { return qb_rb_open_2(name, size, flags, shared_user_data_size, NULL); } qb_ringbuffer_t * qb_rb_open_2(const char *name, size_t size, uint32_t flags, size_t shared_user_data_size, struct qb_rb_notifier *notifiers) { struct qb_ringbuffer_s *rb; size_t real_size; size_t shared_size; char path[PATH_MAX]; int32_t fd_hdr; int32_t fd_data; uint32_t file_flags = O_RDWR; char filename[PATH_MAX]; int32_t error = 0; void *shm_addr; long page_size = sysconf(_SC_PAGESIZE); #ifdef QB_ARCH_HPPA page_size = QB_MAX(page_size, 0x00400000); /* align to page colour */ #elif defined(QB_FORCE_SHM_ALIGN) page_size = QB_MAX(page_size, 16 * 1024); #endif /* QB_FORCE_SHM_ALIGN */ /* The user of this api expects the 'size' parameter passed into this function * to be reflective of the max size single write we can do to the * ringbuffer. This means we have to add both the 'margin' space used * to calculate if there is enough space for a new chunk as well as the '+1' that * prevents overlap of the read/write pointers */ size += QB_RB_CHUNK_MARGIN + 1; real_size = QB_ROUNDUP(size, page_size); shared_size = sizeof(struct qb_ringbuffer_shared_s) + shared_user_data_size; if (flags & QB_RB_FLAG_CREATE) { - file_flags |= O_CREAT | O_TRUNC; + file_flags |= O_CREAT | O_TRUNC | O_EXCL; } rb = calloc(1, sizeof(struct qb_ringbuffer_s)); if (rb == NULL) { return NULL; } /* * Create a shared_hdr memory segment for the header. */ snprintf(filename, PATH_MAX, "qb-%s-header", name); fd_hdr = qb_sys_mmap_file_open(path, filename, shared_size, file_flags); if (fd_hdr < 0) { error = fd_hdr; qb_util_log(LOG_ERR, "couldn't create file for mmap"); goto cleanup_hdr; } rb->shared_hdr = mmap(0, shared_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd_hdr, 0); if (rb->shared_hdr == MAP_FAILED) { error = -errno; qb_util_log(LOG_ERR, "couldn't create mmap for header"); goto cleanup_hdr; } qb_atomic_init(); rb->flags = flags; /* * create the semaphore */ if (flags & QB_RB_FLAG_CREATE) { rb->shared_data = NULL; /* rb->shared_hdr->word_size tracks data by ints and not bytes/chars. */ rb->shared_hdr->word_size = real_size / sizeof(uint32_t); rb->shared_hdr->write_pt = 0; rb->shared_hdr->read_pt = 0; (void)strlcpy(rb->shared_hdr->hdr_path, path, PATH_MAX); } if (notifiers && notifiers->post_fn) { error = 0; memcpy(&rb->notifier, notifiers, sizeof(struct qb_rb_notifier)); } else { error = qb_rb_sem_create(rb, flags); } if (error < 0) { errno = -error; qb_util_perror(LOG_ERR, "couldn't create a semaphore"); goto cleanup_hdr; } /* Create the shared_data memory segment for the actual ringbuffer. * They have to be separate. */ if (flags & QB_RB_FLAG_CREATE) { snprintf(filename, PATH_MAX, "qb-%s-data", name); fd_data = qb_sys_mmap_file_open(path, filename, real_size, file_flags); (void)strlcpy(rb->shared_hdr->data_path, path, PATH_MAX); } else { fd_data = qb_sys_mmap_file_open(path, rb->shared_hdr->data_path, real_size, file_flags); } if (fd_data < 0) { error = fd_data; qb_util_log(LOG_ERR, "couldn't create file for mmap"); goto cleanup_hdr; } qb_util_log(LOG_DEBUG, "shm size:%ld; real_size:%ld; rb->word_size:%d", size, real_size, rb->shared_hdr->word_size); /* this function closes fd_data */ error = qb_sys_circular_mmap(fd_data, &shm_addr, real_size); rb->shared_data = shm_addr; if (error != 0) { qb_util_log(LOG_ERR, "couldn't create circular mmap on %s", rb->shared_hdr->data_path); goto cleanup_data; } if (flags & QB_RB_FLAG_CREATE) { memset(rb->shared_data, 0, real_size); rb->shared_data[rb->shared_hdr->word_size] = 5; rb->shared_hdr->ref_count = 1; } else { qb_atomic_int_inc(&rb->shared_hdr->ref_count); } close(fd_hdr); return rb; cleanup_data: if (flags & QB_RB_FLAG_CREATE) { unlink(rb->shared_hdr->data_path); } cleanup_hdr: if (fd_hdr >= 0) { close(fd_hdr); } if (rb && (flags & QB_RB_FLAG_CREATE)) { unlink(rb->shared_hdr->hdr_path); if (rb->notifier.destroy_fn) { (void)rb->notifier.destroy_fn(rb->notifier.instance); } } if (rb && (rb->shared_hdr != MAP_FAILED && rb->shared_hdr != NULL)) { munmap(rb->shared_hdr, sizeof(struct qb_ringbuffer_shared_s)); } free(rb); errno = -error; return NULL; } void qb_rb_close(struct qb_ringbuffer_s * rb) { if (rb == NULL) { return; } qb_enter(); (void)qb_atomic_int_dec_and_test(&rb->shared_hdr->ref_count); (void)qb_rb_close_helper(rb, rb->flags & QB_RB_FLAG_CREATE, QB_FALSE); } void qb_rb_force_close(struct qb_ringbuffer_s * rb) { if (rb == NULL) { return; } qb_enter(); qb_atomic_int_set(&rb->shared_hdr->ref_count, -1); (void)qb_rb_close_helper(rb, QB_TRUE, QB_TRUE); } char * qb_rb_name_get(struct qb_ringbuffer_s * rb) { if (rb == NULL) { return NULL; } return rb->shared_hdr->hdr_path; } void * qb_rb_shared_user_data_get(struct qb_ringbuffer_s * rb) { if (rb == NULL) { return NULL; } return rb->shared_hdr->user_data; } int32_t qb_rb_refcount_get(struct qb_ringbuffer_s * rb) { if (rb == NULL) { return -EINVAL; } return qb_atomic_int_get(&rb->shared_hdr->ref_count); } ssize_t qb_rb_space_free(struct qb_ringbuffer_s * rb) { uint32_t write_size; uint32_t read_size; size_t space_free = 0; if (rb == NULL) { return -EINVAL; } if (rb->notifier.space_used_fn) { return (rb->shared_hdr->word_size * sizeof(uint32_t)) - rb->notifier.space_used_fn(rb->notifier.instance); } write_size = rb->shared_hdr->write_pt; read_size = rb->shared_hdr->read_pt; if (write_size > read_size) { space_free = (read_size - write_size + rb->shared_hdr->word_size) - 1; } else if (write_size < read_size) { space_free = (read_size - write_size) - 1; } else { if (rb->notifier.q_len_fn && rb->notifier.q_len_fn(rb->notifier.instance) > 0) { space_free = 0; } else { space_free = rb->shared_hdr->word_size; } } /* word -> bytes */ return (space_free * sizeof(uint32_t)); } ssize_t qb_rb_space_used(struct qb_ringbuffer_s * rb) { uint32_t write_size; uint32_t read_size; size_t space_used; if (rb == NULL) { return -EINVAL; } if (rb->notifier.space_used_fn) { return rb->notifier.space_used_fn(rb->notifier.instance); } write_size = rb->shared_hdr->write_pt; read_size = rb->shared_hdr->read_pt; if (write_size > read_size) { space_used = write_size - read_size; } else if (write_size < read_size) { space_used = (write_size - read_size + rb->shared_hdr->word_size) - 1; } else { space_used = 0; } /* word -> bytes */ return (space_used * sizeof(uint32_t)); } ssize_t qb_rb_chunks_used(struct qb_ringbuffer_s *rb) { if (rb == NULL) { return -EINVAL; } if (rb->notifier.q_len_fn) { return rb->notifier.q_len_fn(rb->notifier.instance); } return -ENOTSUP; } void * qb_rb_chunk_alloc(struct qb_ringbuffer_s * rb, size_t len) { uint32_t write_pt; if (rb == NULL) { errno = EINVAL; return NULL; } /* * Reclaim data if we are over writing and we need space */ if (rb->flags & QB_RB_FLAG_OVERWRITE) { while (qb_rb_space_free(rb) < (len + QB_RB_CHUNK_MARGIN)) { int rc = _rb_chunk_reclaim(rb); if (rc != 0) { errno = rc; return NULL; } } } else { if (qb_rb_space_free(rb) < (len + QB_RB_CHUNK_MARGIN)) { errno = EAGAIN; return NULL; } } write_pt = rb->shared_hdr->write_pt; /* * insert the chunk header */ rb->shared_data[write_pt] = 0; QB_RB_CHUNK_MAGIC_SET(rb, write_pt, QB_RB_CHUNK_MAGIC_ALLOC); /* * return a pointer to the beginning of the chunk data */ return (void *)QB_RB_CHUNK_DATA_GET(rb, write_pt); } static uint32_t qb_rb_chunk_step(struct qb_ringbuffer_s * rb, uint32_t pointer) { uint32_t chunk_size = QB_RB_CHUNK_SIZE_GET(rb, pointer); /* * skip over the chunk header */ pointer += QB_RB_CHUNK_HEADER_WORDS; /* * skip over the user's data. */ pointer += (chunk_size / sizeof(uint32_t)); /* make allowance for non-word sizes */ if ((chunk_size % (sizeof(uint32_t) * QB_RB_WORD_ALIGN)) != 0) { pointer++; } idx_cache_line_step(pointer); return pointer; } int32_t qb_rb_chunk_commit(struct qb_ringbuffer_s * rb, size_t len) { uint32_t old_write_pt; if (rb == NULL) { return -EINVAL; } /* * commit the magic & chunk_size */ old_write_pt = rb->shared_hdr->write_pt; rb->shared_data[old_write_pt] = len; /* * commit the new write pointer */ rb->shared_hdr->write_pt = qb_rb_chunk_step(rb, old_write_pt); QB_RB_CHUNK_MAGIC_SET(rb, old_write_pt, QB_RB_CHUNK_MAGIC); DEBUG_PRINTF("commit [%zd] read: %u, write: %u -> %u (%u)\n", (rb->notifier.q_len_fn ? rb->notifier.q_len_fn(rb->notifier.instance) : 0), rb->shared_hdr->read_pt, old_write_pt, rb->shared_hdr->write_pt, rb->shared_hdr->word_size); /* * post the notification to the reader */ if (rb->notifier.post_fn) { return rb->notifier.post_fn(rb->notifier.instance, len); } return 0; } ssize_t qb_rb_chunk_write(struct qb_ringbuffer_s * rb, const void *data, size_t len) { char *dest = qb_rb_chunk_alloc(rb, len); int32_t res = 0; if (rb == NULL) { return -EINVAL; } if (dest == NULL) { return -errno; } memcpy(dest, data, len); res = qb_rb_chunk_commit(rb, len); if (res < 0) { return res; } return len; } static int _rb_chunk_reclaim(struct qb_ringbuffer_s * rb) { uint32_t old_read_pt; uint32_t new_read_pt; uint32_t old_chunk_size; uint32_t chunk_magic; int rc = 0; old_read_pt = rb->shared_hdr->read_pt; chunk_magic = QB_RB_CHUNK_MAGIC_GET(rb, old_read_pt); if (chunk_magic != QB_RB_CHUNK_MAGIC) { return -EINVAL; } old_chunk_size = QB_RB_CHUNK_SIZE_GET(rb, old_read_pt); new_read_pt = qb_rb_chunk_step(rb, old_read_pt); /* * clear the header */ rb->shared_data[old_read_pt] = 0; QB_RB_CHUNK_MAGIC_SET(rb, old_read_pt, QB_RB_CHUNK_MAGIC_DEAD); /* * set the new read pointer after clearing the header * to prevent a situation where a fast writer will write their * new chunk between setting the new read pointer and clearing the * header. */ rb->shared_hdr->read_pt = new_read_pt; if (rb->notifier.reclaim_fn) { rc = rb->notifier.reclaim_fn(rb->notifier.instance, old_chunk_size); if (rc < 0) { errno = -rc; qb_util_perror(LOG_WARNING, "reclaim_fn"); } } DEBUG_PRINTF("reclaim [%zd]: read: %u -> %u, write: %u\n", (rb->notifier.q_len_fn ? rb->notifier.q_len_fn(rb->notifier.instance) : 0), old_read_pt, rb->shared_hdr->read_pt, rb->shared_hdr->write_pt); return rc; } void qb_rb_chunk_reclaim(struct qb_ringbuffer_s * rb) { if (rb == NULL) { return; } _rb_chunk_reclaim(rb); } ssize_t qb_rb_chunk_peek(struct qb_ringbuffer_s * rb, void **data_out, int32_t timeout) { uint32_t read_pt; uint32_t chunk_size; uint32_t chunk_magic; int32_t res = 0; if (rb == NULL) { return -EINVAL; } if (rb->notifier.timedwait_fn) { res = rb->notifier.timedwait_fn(rb->notifier.instance, timeout); } if (res < 0 && res != -EIDRM) { if (res == -ETIMEDOUT) { return 0; } else { errno = -res; qb_util_perror(LOG_ERR, "sem_timedwait"); } return res; } read_pt = rb->shared_hdr->read_pt; chunk_magic = QB_RB_CHUNK_MAGIC_GET(rb, read_pt); if (chunk_magic != QB_RB_CHUNK_MAGIC) { if (rb->notifier.post_fn) { (void)rb->notifier.post_fn(rb->notifier.instance, res); } #ifdef EBADMSG return -EBADMSG; #else return -EINVAL; #endif } chunk_size = QB_RB_CHUNK_SIZE_GET(rb, read_pt); *data_out = QB_RB_CHUNK_DATA_GET(rb, read_pt); return chunk_size; } ssize_t qb_rb_chunk_read(struct qb_ringbuffer_s * rb, void *data_out, size_t len, int32_t timeout) { uint32_t read_pt; uint32_t chunk_size; uint32_t chunk_magic; int32_t res = 0; if (rb == NULL) { return -EINVAL; } if (rb->notifier.timedwait_fn) { res = rb->notifier.timedwait_fn(rb->notifier.instance, timeout); } if (res < 0 && res != -EIDRM) { if (res != -ETIMEDOUT) { errno = -res; qb_util_perror(LOG_ERR, "sem_timedwait"); } return res; } read_pt = rb->shared_hdr->read_pt; chunk_magic = QB_RB_CHUNK_MAGIC_GET(rb, read_pt); if (chunk_magic != QB_RB_CHUNK_MAGIC) { if (rb->notifier.timedwait_fn == NULL) { return -ETIMEDOUT; } else { (void)rb->notifier.post_fn(rb->notifier.instance, res); #ifdef EBADMSG return -EBADMSG; #else return -EINVAL; #endif } } chunk_size = QB_RB_CHUNK_SIZE_GET(rb, read_pt); if (len < chunk_size) { qb_util_log(LOG_ERR, "trying to recv chunk of size %d but %d available", len, chunk_size); if (rb->notifier.post_fn) { (void)rb->notifier.post_fn(rb->notifier.instance, chunk_size); } return -ENOBUFS; } memcpy(data_out, QB_RB_CHUNK_DATA_GET(rb, read_pt), chunk_size); _rb_chunk_reclaim(rb); return chunk_size; } static void print_header(struct qb_ringbuffer_s * rb) { printf("Ringbuffer: \n"); if (rb->flags & QB_RB_FLAG_OVERWRITE) { printf(" ->OVERWRITE\n"); } else { printf(" ->NORMAL\n"); } #ifndef S_SPLINT_S printf(" ->write_pt [%" PRIu32 "]\n", rb->shared_hdr->write_pt); printf(" ->read_pt [%" PRIu32 "]\n", rb->shared_hdr->read_pt); printf(" ->size [%" PRIu32 " words]\n", rb->shared_hdr->word_size); printf(" =>free [%zd bytes]\n", qb_rb_space_free(rb)); printf(" =>used [%zd bytes]\n", qb_rb_space_used(rb)); #endif /* S_SPLINT_S */ } /* * FILE HEADER ORDER * 1. word_size * 2. write_pt * 3. read_pt * 4. version * 5. header_hash * * 6. data */ ssize_t qb_rb_write_to_file(struct qb_ringbuffer_s * rb, int32_t fd) { ssize_t result; ssize_t written_size = 0; uint32_t hash = 0; uint32_t version = QB_RB_FILE_HEADER_VERSION; if (rb == NULL) { return -EINVAL; } print_header(rb); /* * 1. word_size */ result = write(fd, &rb->shared_hdr->word_size, sizeof(uint32_t)); if (result != sizeof(uint32_t)) { return -errno; } written_size += result; /* * 2. 3. store the read & write pointers */ result = write(fd, (void *)&rb->shared_hdr->write_pt, sizeof(uint32_t)); if (result != sizeof(uint32_t)) { return -errno; } written_size += result; result = write(fd, (void *)&rb->shared_hdr->read_pt, sizeof(uint32_t)); if (result != sizeof(uint32_t)) { return -errno; } written_size += result; /* * 4. version used */ result = write(fd, &version, sizeof(uint32_t)); if (result != sizeof(uint32_t)) { return -errno; } written_size += result; /* * 5. hash helps us verify header is not corrupted on file read */ hash = rb->shared_hdr->word_size + rb->shared_hdr->write_pt + rb->shared_hdr->read_pt + QB_RB_FILE_HEADER_VERSION; result = write(fd, &hash, sizeof(uint32_t)); if (result != sizeof(uint32_t)) { return -errno; } written_size += result; result = write(fd, rb->shared_data, rb->shared_hdr->word_size * sizeof(uint32_t)); if (result != rb->shared_hdr->word_size * sizeof(uint32_t)) { return -errno; } written_size += result; qb_util_log(LOG_DEBUG, " writing total of: %zd\n", written_size); return written_size; } qb_ringbuffer_t * qb_rb_create_from_file(int32_t fd, uint32_t flags) { ssize_t n_read; size_t n_required; size_t total_read = 0; uint32_t read_pt; uint32_t write_pt; struct qb_ringbuffer_s *rb; uint32_t word_size = 0; uint32_t version = 0; uint32_t hash = 0; uint32_t calculated_hash = 0; if (fd < 0) { return NULL; } /* * 1. word size */ n_required = sizeof(uint32_t); n_read = read(fd, &word_size, n_required); if (n_read != n_required) { qb_util_perror(LOG_ERR, "Unable to read blackbox file header"); return NULL; } total_read += n_read; /* * 2. 3. read & write pointers */ n_read = read(fd, &write_pt, sizeof(uint32_t)); assert(n_read == sizeof(uint32_t)); total_read += n_read; n_read = read(fd, &read_pt, sizeof(uint32_t)); assert(n_read == sizeof(uint32_t)); total_read += n_read; /* * 4. version */ n_required = sizeof(uint32_t); n_read = read(fd, &version, n_required); if (n_read != n_required) { qb_util_perror(LOG_ERR, "Unable to read blackbox file header"); return NULL; } total_read += n_read; /* * 5. Hash */ n_required = sizeof(uint32_t); n_read = read(fd, &hash, n_required); if (n_read != n_required) { qb_util_perror(LOG_ERR, "Unable to read blackbox file header"); return NULL; } total_read += n_read; calculated_hash = word_size + write_pt + read_pt + version; if (hash != calculated_hash) { qb_util_log(LOG_ERR, "Corrupt blackbox: File header hash (%d) does not match calculated hash (%d)", hash, calculated_hash); return NULL; } else if (version != QB_RB_FILE_HEADER_VERSION) { qb_util_log(LOG_ERR, "Wrong file header version. Expected %d got %d", QB_RB_FILE_HEADER_VERSION, version); return NULL; } /* * 6. data */ n_required = (word_size * sizeof(uint32_t)); /* * qb_rb_open adds QB_RB_CHUNK_MARGIN + 1 to the requested size. */ rb = qb_rb_open("create_from_file", n_required - (QB_RB_CHUNK_MARGIN + 1), QB_RB_FLAG_CREATE | QB_RB_FLAG_NO_SEMAPHORE, 0); if (rb == NULL) { return NULL; } rb->shared_hdr->read_pt = read_pt; rb->shared_hdr->write_pt = write_pt; n_read = read(fd, rb->shared_data, n_required); if (n_read < 0) { qb_util_perror(LOG_ERR, "Unable to read blackbox file data"); goto cleanup_fail; } total_read += n_read; if (n_read != n_required) { qb_util_log(LOG_WARNING, "read %zd bytes, but expected %zu", n_read, n_required); goto cleanup_fail; } qb_util_log(LOG_DEBUG, "read total of: %zd", total_read); print_header(rb); return rb; cleanup_fail: qb_rb_close(rb); return NULL; } int32_t qb_rb_chown(struct qb_ringbuffer_s * rb, uid_t owner, gid_t group) { int32_t res; if (rb == NULL) { return -EINVAL; } res = chown(rb->shared_hdr->data_path, owner, group); if (res < 0 && errno != EPERM) { return -errno; } res = chown(rb->shared_hdr->hdr_path, owner, group); if (res < 0 && errno != EPERM) { return -errno; } return 0; } int32_t qb_rb_chmod(qb_ringbuffer_t * rb, mode_t mode) { int32_t res; if (rb == NULL) { return -EINVAL; } res = chmod(rb->shared_hdr->data_path, mode); if (res < 0) { return -errno; } res = chmod(rb->shared_hdr->hdr_path, mode); if (res < 0) { return -errno; } return 0; }