diff --git a/src/sbd-md.c b/src/sbd-md.c index c41e3e3..47cbe8c 100644 --- a/src/sbd-md.c +++ b/src/sbd-md.c @@ -1,1286 +1,1289 @@ /* * Copyright (C) 2013 Lars Marowsky-Bree * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ #include "sbd.h" #define SBD_MSG_EMPTY 0x00 #define SBD_MSG_TEST 0x01 #define SBD_MSG_RESET 0x02 #define SBD_MSG_OFF 0x03 #define SBD_MSG_EXIT 0x04 #define SBD_MSG_CRASHDUMP 0x05 #define SLOT_TO_SECTOR(slot) (1+slot*2) #define MBOX_TO_SECTOR(mbox) (2+mbox*2) extern int disk_count; /* These have to match the values in the header of the partition */ static char sbd_magic[8] = "SBD_SBD_"; static char sbd_version = 0x02; struct slot_msg_arg_t { const char* name; const char* msg; }; static signed char cmd2char(const char *cmd) { if (strcmp("clear", cmd) == 0) { return SBD_MSG_EMPTY; } else if (strcmp("test", cmd) == 0) { return SBD_MSG_TEST; } else if (strcmp("reset", cmd) == 0) { return SBD_MSG_RESET; } else if (strcmp("off", cmd) == 0) { return SBD_MSG_OFF; } else if (strcmp("exit", cmd) == 0) { return SBD_MSG_EXIT; } else if (strcmp("crashdump", cmd) == 0) { return SBD_MSG_CRASHDUMP; } return -1; } static const char* char2cmd(const char cmd) { switch (cmd) { case SBD_MSG_EMPTY: return "clear"; break; case SBD_MSG_TEST: return "test"; break; case SBD_MSG_RESET: return "reset"; break; case SBD_MSG_OFF: return "off"; break; case SBD_MSG_EXIT: return "exit"; break; case SBD_MSG_CRASHDUMP: return "crashdump"; break; default: return "undefined"; break; } } static void close_device(struct sbd_context *st) { if (!st) { return; } + if (st->ioctx) { + io_destroy(st->ioctx); + } if (st->devfd >= 0) { close(st->devfd); } free(st->buffer); free(st); } static struct sbd_context * open_device(const char* devname, int loglevel) { struct sbd_context *st; if (!devname) return NULL; st = calloc(1, sizeof(struct sbd_context)); if (!st) { return NULL; } st->devfd = -1; if (io_setup(1, &st->ioctx) != 0) { cl_perror("io_setup failed"); goto out; } st->devfd = open(devname, O_SYNC|O_RDWR|O_DIRECT); if (st->devfd == -1) { if (loglevel == LOG_DEBUG) { DBGLOG(loglevel, "Opening device %s failed.", devname); } else { cl_log(loglevel, "Opening device %s failed.", devname); } goto out; } ioctl(st->devfd, BLKSSZGET, §or_size); if (sector_size == 0) { cl_perror("Get sector size failed.\n"); goto out; } if (posix_memalign(&st->buffer, sector_size, sector_size)) { cl_perror("Couldn't allocate sector-buffer."); goto out; } return st; out: close_device(st); return NULL; } static void * sector_alloc(void) { void *x; x = calloc(1, sector_size); if (!x) { exit(1); } return x; } static int sector_io(struct sbd_context *st, int sector, void *data, int rw) { struct timespec timeout; struct io_event event; struct iocb *ios[1] = { &st->io }; long r; timeout.tv_sec = timeout_io; timeout.tv_nsec = 0; memset(&st->io, 0, sizeof(struct iocb)); if (rw) { memcpy(st->buffer, data, sector_size); io_prep_pwrite(&st->io, st->devfd, st->buffer, sector_size, (long long) sector_size * sector); } else { memset(st->buffer, 0, sector_size); io_prep_pread(&st->io, st->devfd, st->buffer, sector_size, (long long) sector_size * sector); } if (io_submit(st->ioctx, 1, ios) != 1) { cl_log(LOG_ERR, "Failed to submit IO request! (rw=%d)", rw); return -1; } errno = 0; r = io_getevents(st->ioctx, 1L, 1L, &event, &timeout); if (r < 0 ) { cl_log(LOG_ERR, "Failed to retrieve IO events (rw=%d)", rw); return -1; } else if (r < 1L) { cl_log(LOG_INFO, "Cancelling IO request due to timeout (rw=%d, r=%ld)", rw, r); r = io_cancel(st->ioctx, ios[0], &event); if (r) { DBGLOG(LOG_INFO, "Could not cancel IO request (rw=%d)", rw); /* Doesn't really matter, debugging information. */ } return -1; } else if (r > 1L) { cl_log(LOG_ERR, "More than one IO was returned (r=%ld)", r); return -1; } /* IO is happy */ if (event.res == sector_size) { if (!rw) { memcpy(data, st->buffer, sector_size); } return 0; } else { cl_log(LOG_ERR, "Short IO (rw=%d, res=%lu, sector_size=%d)", rw, event.res, sector_size); return -1; } } static int sector_write(struct sbd_context *st, int sector, void *data) { return sector_io(st, sector, data, 1); } static int sector_read(struct sbd_context *st, int sector, void *data) { return sector_io(st, sector, data, 0); } static int slot_read(struct sbd_context *st, int slot, struct sector_node_s *s_node) { return sector_read(st, SLOT_TO_SECTOR(slot), s_node); } static int slot_write(struct sbd_context *st, int slot, struct sector_node_s *s_node) { return sector_write(st, SLOT_TO_SECTOR(slot), s_node); } static int mbox_write(struct sbd_context *st, int mbox, struct sector_mbox_s *s_mbox) { return sector_write(st, MBOX_TO_SECTOR(mbox), s_mbox); } static int mbox_read(struct sbd_context *st, int mbox, struct sector_mbox_s *s_mbox) { return sector_read(st, MBOX_TO_SECTOR(mbox), s_mbox); } static int mbox_write_verify(struct sbd_context *st, int mbox, struct sector_mbox_s *s_mbox) { void *data; int rc = 0; if (sector_write(st, MBOX_TO_SECTOR(mbox), s_mbox) < 0) return -1; data = sector_alloc(); if (sector_read(st, MBOX_TO_SECTOR(mbox), data) < 0) { rc = -1; goto out; } if (memcmp(s_mbox, data, sector_size) != 0) { cl_log(LOG_ERR, "Write verification failed!"); rc = -1; goto out; } rc = 0; out: free(data); return rc; } static int header_write(struct sbd_context *st, struct sector_header_s *s_header) { s_header->sector_size = htonl(s_header->sector_size); s_header->timeout_watchdog = htonl(s_header->timeout_watchdog); s_header->timeout_allocate = htonl(s_header->timeout_allocate); s_header->timeout_loop = htonl(s_header->timeout_loop); s_header->timeout_msgwait = htonl(s_header->timeout_msgwait); return sector_write(st, 0, s_header); } static int header_read(struct sbd_context *st, struct sector_header_s *s_header) { if (sector_read(st, 0, s_header) < 0) return -1; s_header->sector_size = ntohl(s_header->sector_size); s_header->timeout_watchdog = ntohl(s_header->timeout_watchdog); s_header->timeout_allocate = ntohl(s_header->timeout_allocate); s_header->timeout_loop = ntohl(s_header->timeout_loop); s_header->timeout_msgwait = ntohl(s_header->timeout_msgwait); /* This sets the global defaults: */ timeout_watchdog = s_header->timeout_watchdog; timeout_allocate = s_header->timeout_allocate; timeout_loop = s_header->timeout_loop; timeout_msgwait = s_header->timeout_msgwait; return 0; } static int valid_header(const struct sector_header_s *s_header) { if (memcmp(s_header->magic, sbd_magic, sizeof(s_header->magic)) != 0) { cl_log(LOG_ERR, "Header magic does not match."); return -1; } if (s_header->version != sbd_version) { cl_log(LOG_ERR, "Header version does not match."); return -1; } if (s_header->sector_size != sector_size) { cl_log(LOG_ERR, "Header sector size does not match."); return -1; } return 0; } static struct sector_header_s * header_get(struct sbd_context *st) { struct sector_header_s *s_header; s_header = sector_alloc(); if (header_read(st, s_header) < 0) { cl_log(LOG_ERR, "Unable to read header from device %d", st->devfd); free(s_header); return NULL; } if (valid_header(s_header) < 0) { cl_log(LOG_ERR, "header on device %d is not valid.", st->devfd); free(s_header); return NULL; } /* cl_log(LOG_INFO, "Found version %d header with %d slots", s_header->version, s_header->slots); */ return s_header; } static int header_dump(struct sbd_context *st) { struct sector_header_s *s_header; char uuid[37]; s_header = header_get(st); if (s_header == NULL) return -1; printf("Header version : %u.%u\n", s_header->version, s_header->minor_version); if (s_header->minor_version > 0) { uuid_unparse_lower(s_header->uuid, uuid); printf("UUID : %s\n", uuid); } printf("Number of slots : %u\n", s_header->slots); printf("Sector size : %lu\n", (unsigned long)s_header->sector_size); printf("Timeout (watchdog) : %lu\n", (unsigned long)s_header->timeout_watchdog); printf("Timeout (allocate) : %lu\n", (unsigned long)s_header->timeout_allocate); printf("Timeout (loop) : %lu\n", (unsigned long)s_header->timeout_loop); printf("Timeout (msgwait) : %lu\n", (unsigned long)s_header->timeout_msgwait); free(s_header); return 0; } static int init_device(struct sbd_context *st) { struct sector_header_s *s_header; struct sector_node_s *s_node; struct sector_mbox_s *s_mbox; char uuid[37]; int i; int rc = 0; s_header = sector_alloc(); s_node = sector_alloc(); s_mbox = sector_alloc(); memcpy(s_header->magic, sbd_magic, sizeof(s_header->magic)); s_header->version = sbd_version; s_header->slots = 255; s_header->sector_size = sector_size; s_header->timeout_watchdog = timeout_watchdog; s_header->timeout_allocate = timeout_allocate; s_header->timeout_loop = timeout_loop; s_header->timeout_msgwait = timeout_msgwait; s_header->minor_version = 1; uuid_generate(s_header->uuid); uuid_unparse_lower(s_header->uuid, uuid); cl_log(LOG_INFO, "Creating version %d.%d header on device %d (uuid: %s)", s_header->version, s_header->minor_version, st->devfd, uuid); fprintf(stdout, "Creating version %d.%d header on device %d (uuid: %s)\n", s_header->version, s_header->minor_version, st->devfd, uuid); if (header_write(st, s_header) < 0) { rc = -1; goto out; } cl_log(LOG_INFO, "Initializing %d slots on device %d", s_header->slots, st->devfd); fprintf(stdout, "Initializing %d slots on device %d\n", s_header->slots, st->devfd); for (i=0;i < s_header->slots;i++) { if (slot_write(st, i, s_node) < 0) { rc = -1; goto out; } if (mbox_write(st, i, s_mbox) < 0) { rc = -1; goto out; } } out: free(s_node); free(s_header); free(s_mbox); return(rc); } /* Check if there already is a slot allocated to said name; returns the * slot number. If not found, returns -1. * This is necessary because slots might not be continuous. */ static int slot_lookup(struct sbd_context *st, const struct sector_header_s *s_header, const char *name) { struct sector_node_s *s_node = NULL; int i; int rc = -1; if (!name) { cl_log(LOG_ERR, "slot_lookup(): No name specified.\n"); goto out; } s_node = sector_alloc(); for (i=0; i < s_header->slots; i++) { if (slot_read(st, i, s_node) < 0) { rc = -2; goto out; } if (s_node->in_use != 0) { if (strncasecmp(s_node->name, name, SECTOR_NAME_MAX) == 0) { DBGLOG(LOG_INFO, "%s owns slot %d", name, i); rc = i; goto out; } } } out: free(s_node); return rc; } static int slot_unused(struct sbd_context *st, const struct sector_header_s *s_header) { struct sector_node_s *s_node; int i; int rc = -1; s_node = sector_alloc(); for (i=0; i < s_header->slots; i++) { if (slot_read(st, i, s_node) < 0) { rc = -1; goto out; } if (s_node->in_use == 0) { rc = i; goto out; } } out: free(s_node); return rc; } static int slot_allocate(struct sbd_context *st, const char *name) { struct sector_header_s *s_header = NULL; struct sector_node_s *s_node = NULL; struct sector_mbox_s *s_mbox = NULL; int i; int rc = 0; if (!name) { cl_log(LOG_ERR, "slot_allocate(): No name specified.\n"); fprintf(stderr, "slot_allocate(): No name specified.\n"); rc = -1; goto out; } s_header = header_get(st); if (!s_header) { rc = -1; goto out; } s_node = sector_alloc(); s_mbox = sector_alloc(); while (1) { i = slot_lookup(st, s_header, name); if ((i >= 0) || (i == -2)) { /* -1 is "no slot found", in which case we * proceed to allocate a new one. * -2 is "read error during lookup", in which * case we error out too * >= 0 is "slot already allocated" */ rc = i; goto out; } i = slot_unused(st, s_header); if (i >= 0) { cl_log(LOG_INFO, "slot %d is unused - trying to own", i); fprintf(stdout, "slot %d is unused - trying to own\n", i); memset(s_node, 0, sizeof(*s_node)); s_node->in_use = 1; strncpy(s_node->name, name, SECTOR_NAME_MAX); if (slot_write(st, i, s_node) < 0) { rc = -1; goto out; } sleep(timeout_allocate); } else { cl_log(LOG_ERR, "No more free slots."); fprintf(stderr, "No more free slots.\n"); rc = -1; goto out; } } out: free(s_node); free(s_header); free(s_mbox); return(rc); } static int slot_list(struct sbd_context *st) { struct sector_header_s *s_header = NULL; struct sector_node_s *s_node = NULL; struct sector_mbox_s *s_mbox = NULL; int i; int rc = 0; s_header = header_get(st); if (!s_header) { rc = -1; goto out; } s_node = sector_alloc(); s_mbox = sector_alloc(); for (i=0; i < s_header->slots; i++) { if (slot_read(st, i, s_node) < 0) { rc = -1; goto out; } if (s_node->in_use > 0) { if (mbox_read(st, i, s_mbox) < 0) { rc = -1; goto out; } printf("%d\t%s\t%s\t%s\n", i, s_node->name, char2cmd(s_mbox->cmd), s_mbox->from); } } out: free(s_node); free(s_header); free(s_mbox); return rc; } static int slot_msg(struct sbd_context *st, const char *name, const char *cmd) { struct sector_header_s *s_header = NULL; struct sector_mbox_s *s_mbox = NULL; int mbox; int rc = 0; char uuid[37]; if (!name || !cmd) { cl_log(LOG_ERR, "slot_msg(): No recipient / cmd specified.\n"); rc = -1; goto out; } s_header = header_get(st); if (!s_header) { rc = -1; goto out; } if (strcmp(name, "LOCAL") == 0) { name = local_uname; } if (s_header->minor_version > 0) { uuid_unparse_lower(s_header->uuid, uuid); cl_log(LOG_INFO, "Device UUID: %s", uuid); } mbox = slot_lookup(st, s_header, name); if (mbox < 0) { cl_log(LOG_ERR, "slot_msg(): No slot found for %s.", name); rc = -1; goto out; } s_mbox = sector_alloc(); s_mbox->cmd = cmd2char(cmd); if (s_mbox->cmd < 0) { cl_log(LOG_ERR, "slot_msg(): Invalid command %s.", cmd); rc = -1; goto out; } strncpy(s_mbox->from, local_uname, SECTOR_NAME_MAX); cl_log(LOG_INFO, "Writing %s to node slot %s", cmd, name); if (mbox_write_verify(st, mbox, s_mbox) < -1) { rc = -1; goto out; } if (strcasecmp(cmd, "exit") != 0) { cl_log(LOG_INFO, "Messaging delay: %d", (int)timeout_msgwait); sleep(timeout_msgwait); } cl_log(LOG_INFO, "%s successfully delivered to %s", cmd, name); out: free(s_mbox); free(s_header); return rc; } static int slot_ping(struct sbd_context *st, const char *name) { struct sector_header_s *s_header = NULL; struct sector_mbox_s *s_mbox = NULL; int mbox; int waited = 0; int rc = 0; if (!name) { cl_log(LOG_ERR, "slot_ping(): No recipient specified.\n"); rc = -1; goto out; } s_header = header_get(st); if (!s_header) { rc = -1; goto out; } if (strcmp(name, "LOCAL") == 0) { name = local_uname; } mbox = slot_lookup(st, s_header, name); if (mbox < 0) { cl_log(LOG_ERR, "slot_msg(): No slot found for %s.", name); rc = -1; goto out; } s_mbox = sector_alloc(); s_mbox->cmd = SBD_MSG_TEST; strncpy(s_mbox->from, local_uname, SECTOR_NAME_MAX); DBGLOG(LOG_DEBUG, "Pinging node %s", name); if (mbox_write(st, mbox, s_mbox) < -1) { rc = -1; goto out; } rc = -1; while (waited <= timeout_msgwait) { if (mbox_read(st, mbox, s_mbox) < 0) break; if (s_mbox->cmd != SBD_MSG_TEST) { rc = 0; break; } sleep(1); waited++; } if (rc == 0) { cl_log(LOG_DEBUG, "%s successfully pinged.", name); } else { cl_log(LOG_ERR, "%s failed to ping.", name); } out: free(s_mbox); free(s_header); return rc; } int init_devices(struct servants_list_item *servants) { int rc = 0; struct sbd_context *st; struct servants_list_item *s; for (s = servants; s; s = s->next) { fprintf(stdout, "Initializing device %s\n", s->devname); st = open_device(s->devname, LOG_ERR); if (!st) { return -1; } rc = init_device(st); close_device(st); if (rc == -1) { fprintf(stderr, "Failed to init device %s\n", s->devname); return rc; } fprintf(stdout, "Device %s is initialized.\n", s->devname); } fprintf(stdout, "Did you check sbd service down on all nodes before? If not do so now and restart afterwards.\n"); return 0; } static int slot_msg_wrapper(const char* devname, int mode, const void* argp) { int rc = 0; struct sbd_context *st; const struct slot_msg_arg_t* arg = (const struct slot_msg_arg_t*)argp; st = open_device(devname, LOG_WARNING); if (!st) return -1; cl_log(LOG_INFO, "Delivery process handling %s", devname); rc = slot_msg(st, arg->name, arg->msg); close_device(st); return rc; } static int slot_ping_wrapper(const char* devname, int mode, const void* argp) { int rc = 0; const char* name = (const char*)argp; struct sbd_context *st; st = open_device(devname, LOG_WARNING); if (!st) return -1; rc = slot_ping(st, name); close_device(st); return rc; } int allocate_slots(const char *name, struct servants_list_item *servants) { int rc = 0; struct sbd_context *st; struct servants_list_item *s; for (s = servants; s; s = s->next) { fprintf(stdout, "Trying to allocate slot for %s on device %s.\n", name, s->devname); st = open_device(s->devname, LOG_WARNING); if (!st) { return -1; } rc = slot_allocate(st, name); close_device(st); if (rc < 0) return rc; fprintf(stdout, "Slot for %s has been allocated on %s.\n", name, s->devname); } return 0; } int list_slots(struct servants_list_item *servants) { int rc = 0; struct servants_list_item *s; struct sbd_context *st; for (s = servants; s; s = s->next) { int rv = 0; st = open_device(s->devname, LOG_WARNING); if (!st) { rc = -1; fprintf(stderr, "== disk %s unreadable!\n", s->devname); continue; } rv = slot_list(st); close_device(st); if (rv == -1) { rc = -1; fprintf(stderr, "== Slots on disk %s NOT dumped\n", s->devname); } } return rc; } int ping_via_slots(const char *name, struct servants_list_item *servants) { int sig = 0; pid_t pid = 0; int status = 0; int servants_finished = 0; sigset_t procmask; siginfo_t sinfo; struct servants_list_item *s; sigemptyset(&procmask); sigaddset(&procmask, SIGCHLD); sigprocmask(SIG_BLOCK, &procmask, NULL); for (s = servants; s; s = s->next) { if(sbd_is_disk(s)) { s->pid = assign_servant(s->devname, &slot_ping_wrapper, 0, (const void*)name); } } while (servants_finished < disk_count) { sig = sigwaitinfo(&procmask, &sinfo); if (sig == SIGCHLD) { while ((pid = wait(&status))) { if (pid == -1 && errno == ECHILD) { break; } else { s = lookup_servant_by_pid(pid); if (sbd_is_disk(s)) { servants_finished++; } } } } } return 0; } int quorum_write(int good_servants) { return (good_servants > disk_count/2); } int messenger(const char *name, const char *msg, struct servants_list_item *servants) { int sig = 0; pid_t pid = 0; int status = 0; int servants_finished = 0; int successful_delivery = 0; sigset_t procmask; siginfo_t sinfo; struct servants_list_item *s; struct slot_msg_arg_t slot_msg_arg = {name, msg}; sigemptyset(&procmask); sigaddset(&procmask, SIGCHLD); sigprocmask(SIG_BLOCK, &procmask, NULL); for (s = servants; s; s = s->next) { s->pid = assign_servant(s->devname, &slot_msg_wrapper, 0, &slot_msg_arg); } while (!(quorum_write(successful_delivery) || (servants_finished == disk_count))) { sig = sigwaitinfo(&procmask, &sinfo); if (sig == SIGCHLD) { while ((pid = waitpid(-1, &status, WNOHANG))) { if (pid == -1 && errno == ECHILD) { break; } else { servants_finished++; if (WIFEXITED(status) && WEXITSTATUS(status) == 0) { DBGLOG(LOG_INFO, "Process %d succeeded.", (int)pid); successful_delivery++; } else { cl_log(LOG_WARNING, "Process %d failed to deliver!", (int)pid); } } } } } if (quorum_write(successful_delivery)) { cl_log(LOG_INFO, "Message successfully delivered."); return 0; } else { cl_log(LOG_ERR, "Message is not delivered via more then a half of devices"); return -1; } } unsigned long get_first_msgwait(struct servants_list_item *servants) { unsigned long msgwait = 0; struct servants_list_item *s = servants; for (s = servants; s; s = s->next) { struct sbd_context *st; struct sector_header_s *s_header; st = open_device(s->devname, LOG_WARNING); if (!st) { continue; } s_header = header_get(st); if (s_header != NULL) { msgwait = (unsigned long)s_header->timeout_msgwait; close_device(st); free(s_header); return msgwait; } close_device(st); } return msgwait; } int dump_headers(struct servants_list_item *servants) { int rc = 0; struct servants_list_item *s = servants; struct sbd_context *st; for (s = servants; s; s = s->next) { int rv; fprintf(stdout, "==Dumping header on disk %s\n", s->devname); st = open_device(s->devname, LOG_WARNING); if (st) { rv = header_dump(st); close_device(st); } else { fprintf(stderr, "== disk %s unreadable!\n", s->devname); rv = -1; } if (rv == -1) { rc = -1; fprintf(stderr, "==Header on disk %s NOT dumped\n", s->devname); } else { fprintf(stdout, "==Header on disk %s is dumped\n", s->devname); } } return rc; } void open_any_device(struct servants_list_item *servants) { struct sector_header_s *hdr_cur = NULL; struct timespec t_0; int t_wait = 0; bool logged_once = false; clock_gettime(CLOCK_MONOTONIC, &t_0); while (!hdr_cur && t_wait < timeout_startup) { struct timespec t_now; struct servants_list_item* s; for (s = servants; s; s = s->next) { struct sbd_context *st = open_device(s->devname, LOG_DEBUG); if (!st) { if (logged_once == false) { cl_log(LOG_WARNING, "Failed to open %s. " "Trying any other configured devices, " "otherwise retrying every %ds within %ds", s->devname, timeout_loop, timeout_startup); logged_once = true; } continue; } hdr_cur = header_get(st); close_device(st); if (hdr_cur) { break; } else { if (logged_once == false) { cl_log(LOG_WARNING, "Failed to read header from %s. " "Trying any other configured devices, " "otherwise retrying every %ds within %ds", s->devname, timeout_loop, timeout_startup); logged_once = true; } } } clock_gettime(CLOCK_MONOTONIC, &t_now); t_wait = t_now.tv_sec - t_0.tv_sec; if (!hdr_cur) { sleep(timeout_loop); } } if (hdr_cur) { timeout_watchdog = hdr_cur->timeout_watchdog; timeout_allocate = hdr_cur->timeout_allocate; timeout_loop = hdr_cur->timeout_loop; timeout_msgwait = hdr_cur->timeout_msgwait; } else { cl_log(LOG_ERR, "No devices were available at start-up within %i seconds.", timeout_startup); exit(1); } free(hdr_cur); return; } /* ::-::-::-::-::-::-::-::-::-::-::-::-:: Begin disk based servant code ::-::-::-::-::-::-::-::-::-::-::-::-:: */ static int servant_check_timeout_inconsistent(struct sector_header_s *hdr) { if (timeout_watchdog != hdr->timeout_watchdog) { cl_log(LOG_WARNING, "watchdog timeout: %d versus %d on this device", (int)timeout_watchdog, (int)hdr->timeout_watchdog); return -1; } if (timeout_allocate != hdr->timeout_allocate) { cl_log(LOG_WARNING, "allocate timeout: %d versus %d on this device", (int)timeout_allocate, (int)hdr->timeout_allocate); return -1; } if (timeout_loop != hdr->timeout_loop) { cl_log(LOG_WARNING, "loop timeout: %d versus %d on this device", (int)timeout_loop, (int)hdr->timeout_loop); return -1; } if (timeout_msgwait != hdr->timeout_msgwait) { cl_log(LOG_WARNING, "msgwait timeout: %d versus %d on this device", (int)timeout_msgwait, (int)hdr->timeout_msgwait); return -1; } return 0; } int servant_md(const char *diskname, int mode, const void* argp) { struct sector_mbox_s *s_mbox = NULL; struct sector_node_s *s_node = NULL; struct sector_header_s *s_header = NULL; int mbox; int rc = 0; time_t t0, t1, latency; union sigval signal_value; sigset_t servant_masks; struct sbd_context *st; pid_t ppid; char uuid[37]; const struct servants_list_item *s = argp; cl_log(LOG_INFO, "Servant starting for device %s", diskname); /* Block most of the signals */ sigfillset(&servant_masks); sigdelset(&servant_masks, SIGKILL); sigdelset(&servant_masks, SIGFPE); sigdelset(&servant_masks, SIGILL); sigdelset(&servant_masks, SIGSEGV); sigdelset(&servant_masks, SIGBUS); sigdelset(&servant_masks, SIGALRM); /* FIXME: check error */ sigprocmask(SIG_SETMASK, &servant_masks, NULL); st = open_device(diskname, LOG_WARNING); if (!st) { exit(EXIT_MD_SERVANT_IO_FAIL); } s_header = header_get(st); if (!s_header) { cl_log(LOG_ERR, "Not a valid header on %s", diskname); rc = EXIT_MD_SERVANT_IO_FAIL; goto out; } if (servant_check_timeout_inconsistent(s_header) < 0) { cl_log(LOG_ERR, "Timeouts on %s do not match first device", diskname); rc = EXIT_MD_SERVANT_IO_FAIL; goto out; } if (s_header->minor_version > 0) { uuid_unparse_lower(s_header->uuid, uuid); cl_log(LOG_INFO, "Device %s uuid: %s", diskname, uuid); } mbox = slot_allocate(st, local_uname); if (mbox < 0) { cl_log(LOG_ERR, "No slot allocated, and automatic allocation failed for disk %s.", diskname); rc = EXIT_MD_SERVANT_IO_FAIL; goto out; } s_node = sector_alloc(); if (slot_read(st, mbox, s_node) < 0) { cl_log(LOG_ERR, "Unable to read node entry on %s", diskname); rc = EXIT_MD_SERVANT_IO_FAIL; goto out; } cl_log(LOG_NOTICE, "Monitoring slot %d on disk %s", mbox, diskname); if (s_header->minor_version == 0) { set_proc_title("sbd: watcher: %s - slot: %d", diskname, mbox); } else { set_proc_title("sbd: watcher: %s - slot: %d - uuid: %s", diskname, mbox, uuid); } s_mbox = sector_alloc(); if (s->first_start) { if (mode > 0) { if (mbox_read(st, mbox, s_mbox) < 0) { cl_log(LOG_ERR, "mbox read failed during start-up in servant."); rc = EXIT_MD_SERVANT_IO_FAIL; goto out; } if (s_mbox->cmd != SBD_MSG_EXIT && s_mbox->cmd != SBD_MSG_EMPTY) { /* Not a clean stop. Abort start-up */ cl_log(LOG_WARNING, "Found fencing message - aborting start-up. Manual intervention required!"); ppid = getppid(); sigqueue(ppid, SIG_EXITREQ, signal_value); rc = 0; goto out; } } DBGLOG(LOG_INFO, "First servant start - zeroing inbox"); memset(s_mbox, 0, sizeof(*s_mbox)); if (mbox_write(st, mbox, s_mbox) < 0) { rc = EXIT_MD_SERVANT_IO_FAIL; goto out; } } memset(&signal_value, 0, sizeof(signal_value)); while (1) { struct sector_header_s *s_header_retry = NULL; struct sector_node_s *s_node_retry = NULL; t0 = time(NULL); sleep(timeout_loop); ppid = getppid(); if (ppid == 1) { /* Our parent died unexpectedly. Triggering * self-fence. */ do_timeout_action(); } /* These attempts are, by definition, somewhat racy. If * the device is wiped out or corrupted between here and * us reading our mbox, there is nothing we can do about * that. But at least we tried. */ s_header_retry = header_get(st); if (!s_header_retry) { cl_log(LOG_ERR, "No longer found a valid header on %s", diskname); rc = EXIT_MD_SERVANT_IO_FAIL; goto out; } if (memcmp(s_header, s_header_retry, sizeof(*s_header)) != 0) { cl_log(LOG_ERR, "Header on %s changed since start-up!", diskname); free(s_header_retry); rc = EXIT_MD_SERVANT_IO_FAIL; goto out; } free(s_header_retry); s_node_retry = sector_alloc(); if (slot_read(st, mbox, s_node_retry) < 0) { cl_log(LOG_ERR, "slot read failed in servant."); free(s_node_retry); rc = EXIT_MD_SERVANT_IO_FAIL; goto out; } if (memcmp(s_node, s_node_retry, sizeof(*s_node)) != 0) { cl_log(LOG_ERR, "Node entry on %s changed since start-up!", diskname); free(s_node_retry); rc = EXIT_MD_SERVANT_IO_FAIL; goto out; } free(s_node_retry); if (mbox_read(st, mbox, s_mbox) < 0) { cl_log(LOG_ERR, "mbox read failed in servant."); rc = EXIT_MD_SERVANT_IO_FAIL; goto out; } if (s_mbox->cmd > 0) { cl_log(LOG_NOTICE, "Received command %s from %s on disk %s", char2cmd(s_mbox->cmd), s_mbox->from, diskname); switch (s_mbox->cmd) { case SBD_MSG_TEST: memset(s_mbox, 0, sizeof(*s_mbox)); mbox_write(st, mbox, s_mbox); sigqueue(ppid, SIG_TEST, signal_value); break; case SBD_MSG_RESET: rc = EXIT_MD_SERVANT_REQUEST_RESET; goto out; case SBD_MSG_OFF: rc = EXIT_MD_SERVANT_REQUEST_SHUTOFF; goto out; case SBD_MSG_EXIT: sigqueue(ppid, SIG_EXITREQ, signal_value); break; case SBD_MSG_CRASHDUMP: rc = EXIT_MD_SERVANT_REQUEST_CRASHDUMP; goto out; default: /* FIXME: An "unknown" message might result from a partial write. log it and clear the slot. */ cl_log(LOG_ERR, "Unknown message on disk %s", diskname); memset(s_mbox, 0, sizeof(*s_mbox)); mbox_write(st, mbox, s_mbox); break; } } sigqueue(ppid, SIG_LIVENESS, signal_value); t1 = time(NULL); latency = t1 - t0; if (timeout_watchdog_warn && (latency > timeout_watchdog_warn)) { cl_log(LOG_WARNING, "Latency: %ds exceeded watchdog warning timeout %ds on disk %s", (int)latency, (int)timeout_watchdog_warn, diskname); } else if (debug) { DBGLOG(LOG_DEBUG, "Latency: %ds on disk %s", (int)latency, diskname); } } out: free(s_header); free(s_node); free(s_mbox); close_device(st); exit(rc); } diff --git a/tests/sbd-testbed.c b/tests/sbd-testbed.c index 858b1be..91920f2 100644 --- a/tests/sbd-testbed.c +++ b/tests/sbd-testbed.c @@ -1,729 +1,745 @@ #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if __GLIBC_PREREQ(2,36) #include #else #include typedef gboolean (*GUnixFDSourceFunc) (gint fd, GIOCondition condition, gpointer user_data); static gboolean GIOFunc2GUnixFDSourceFunc(GIOChannel *source, GIOCondition condition, gpointer data) { return ((GUnixFDSourceFunc) data) ( g_io_channel_unix_get_fd(source), condition, NULL); } static guint g_unix_fd_add(gint fd, GIOCondition condition, GUnixFDSourceFunc function, gpointer user_data) { GIOChannel *chan = g_io_channel_unix_new (fd); if (chan == NULL) { return 0; } else { return g_io_add_watch(chan, condition, GIOFunc2GUnixFDSourceFunc, (gpointer) function); } } #endif typedef int (*orig_open_f_type)(const char *pathname, int flags, ...); typedef int (*orig_ioctl_f_type)(int fd, unsigned long int request, ...); typedef ssize_t (*orig_write_f_type)(int fd, const void *buf, size_t count); typedef int (*orig_close_f_type)(int fd); typedef FILE *(*orig_fopen_f_type)(const char *pathname, const char *mode); typedef int (*orig_fclose_f_type)(FILE *fp); typedef int (*orig_io_setup_f_type)(int nr_events, io_context_t *ctx_idp); +typedef int (*orig_io_destroy_f_type)(io_context_t ctx_id); typedef int (*orig_io_submit_f_type)(io_context_t ctx_id, long nr, struct iocb *ios[]); typedef int (*orig_io_getevents_f_type)(io_context_t ctx_id, long min_nr, long nr, struct io_event *events, struct timespec *timeout); typedef int (*orig_io_cancel_f_type)(io_context_t ctx_id, struct iocb *iocb, struct io_event *result); static int is_init = 0; static FILE *log_fp = NULL; static char *sbd_device[3] = {NULL, NULL, NULL}; static int sbd_device_fd[3] = {-1, -1, -1}; static FILE *sysrq_fp = NULL; static FILE *sysrq_trigger_fp = NULL; static char *watchdog_device = NULL; static int watchdog_device_fd = -1; static int watchdog_timeout = -1; static pid_t watchdog_pid = -1; static int watchdog_pipe[2] = {-1, -1}; static guint watchdog_source_id = 0; static int watchdog_timer_id = 0; static orig_open_f_type orig_open = NULL; static orig_ioctl_f_type orig_ioctl = NULL; static orig_write_f_type orig_write = NULL; static orig_close_f_type orig_close = NULL; static orig_fopen_f_type orig_fopen = NULL; static orig_fclose_f_type orig_fclose = NULL; static orig_io_setup_f_type orig_io_setup = NULL; +static orig_io_destroy_f_type orig_io_destroy = NULL; static orig_io_submit_f_type orig_io_submit = NULL; static orig_io_getevents_f_type orig_io_getevents = NULL; static orig_io_cancel_f_type orig_io_cancel = NULL; /* fprintf is inlined as __fprintf_chk or * we have vfprintf. * For fscanf we have vfscanf. * For reboot we anyway don't want that to be * called in any case. */ static struct iocb *pending_iocb = NULL; struct io_context { int context_num; }; static struct io_context our_io_context = {.context_num = 1}; static int translate_aio = 0; static GMainLoop *mainloop = NULL; #if 0 static void watchdog_shutdown(int nsig) { if (watchdog_timer_id > 0) { fprintf(log_fp, "exiting with watchdog-timer armed\n"); } } #endif static void* dlsym_fatal(void *handle, const char *symbol) { void *rv = dlsym(handle, symbol); if (!rv) { fprintf(stderr, "Failed looking up symbol %s\n", symbol); exit(1); } return rv; } static void init (void) { void *handle; if (!is_init) { const char *value; int i; char *token, *str, *str_orig; is_init = 1; orig_open = (orig_open_f_type)dlsym_fatal(RTLD_NEXT,"open"); orig_ioctl = (orig_ioctl_f_type)dlsym_fatal(RTLD_NEXT,"ioctl"); orig_close = (orig_close_f_type)dlsym_fatal(RTLD_NEXT,"close"); orig_write = (orig_write_f_type)dlsym_fatal(RTLD_NEXT,"write"); orig_fopen = (orig_fopen_f_type)dlsym_fatal(RTLD_NEXT,"fopen"); orig_fclose = (orig_fclose_f_type)dlsym_fatal(RTLD_NEXT,"fclose"); handle = dlopen("libaio.so.1", RTLD_NOW); if (!handle) { fprintf(stderr, "Failed opening libaio.so.1\n"); exit(1); } orig_io_setup = (orig_io_setup_f_type)dlsym_fatal(handle,"io_setup"); + orig_io_destroy = (orig_io_destroy_f_type)dlsym_fatal(handle,"io_destroy"); orig_io_submit = (orig_io_submit_f_type)dlsym_fatal(handle,"io_submit"); orig_io_getevents = (orig_io_getevents_f_type)dlsym_fatal(handle,"io_getevents"); orig_io_cancel = (orig_io_cancel_f_type)dlsym_fatal(handle,"io_cancel"); dlclose(handle); value = getenv("SBD_PRELOAD_LOG"); if (value) { log_fp = fopen(value, "a"); } else { int fd = dup(fileno(stderr)); if (fd >= 0) { log_fp = fdopen(fd, "w"); } } if (log_fp == NULL) { fprintf(stderr, "couldn't open log-file\n"); } value = getenv("SBD_WATCHDOG_DEV"); if (value) { watchdog_device = strdup(value); } value = getenv("SBD_DEVICE"); if ((value) && (str = str_orig = strdup(value))) { for (i = 0; i < 3; i++, str = NULL) { token = strtok(str, ";"); if (token == NULL) { break; } sbd_device[i] = strdup(token); } free(str_orig); } value = getenv("SBD_TRANSLATE_AIO"); if ((value) && !strcmp(value, "yes")) { translate_aio = 1; } } } // ***** end - handling of watchdog & block-devices **** static gboolean watchdog_timeout_notify(gpointer data) { fprintf(log_fp, "watchdog fired after %ds - killing process group\n", watchdog_timeout); fclose(log_fp); log_fp = NULL; killpg(0, SIGKILL); exit(1); } static gboolean watchdog_dispatch_callback (gint fd, GIOCondition condition, gpointer user_data) { char buf[256]; int i = 0; if (condition & G_IO_HUP) { return FALSE; } if (watchdog_timer_id > 0) { g_source_remove(watchdog_timer_id); } watchdog_timer_id = 0; for (i = 0; i < sizeof(buf)-1; i++) { ssize_t len; do { len = read(watchdog_pipe[0], &buf[i], 1); } while ((len == -1) && (errno == EINTR)); if (len <= 0) { if (len == -1) { fprintf(log_fp, "Couldn't read from watchdog-pipe\n"); } buf[i] = '\0'; break; } if (buf[i] == '\n') { buf[i] = '\0'; break; } } buf[sizeof(buf)-1] = '\0'; if (sscanf(buf, "trigger %ds", &watchdog_timeout) == 1) { watchdog_timer_id = g_timeout_add(watchdog_timeout * 1000, watchdog_timeout_notify, NULL); } else if (strcmp(buf, "disarm") == 0) { // timer is stopped already } else { fprintf(log_fp, "unknown watchdog command\n"); } return TRUE; } static void watchdog_arm (void) { char buf[256]; if ((watchdog_timeout > 0) && (watchdog_pipe[1] >= 0)) { sprintf(buf, "trigger %ds\n", watchdog_timeout); if (write(watchdog_pipe[1], buf, strlen(buf)) != strlen(buf)) { fprintf(log_fp, "Failed tickling watchdog via pipe\n"); } } } static void watchdog_disarm (void) { char buf[256]; watchdog_timeout = -1; if (watchdog_pipe[1] >= 0) { sprintf(buf, "disarm\n"); if (write(watchdog_pipe[1], buf, strlen(buf)) != strlen(buf)) { fprintf(log_fp, "Failed disarming watchdog via pipe\n"); } } } int open(const char *pathname, int flags, ...) { int i, fd; int devnum = -1; int is_wd_dev = 0; va_list ap; init(); for (i=0; i < 3; i++) { if (sbd_device[i]) { if (strcmp(sbd_device[i], pathname) == 0) { devnum = i; flags &= ~O_DIRECT; break; } } } if (watchdog_device) { if (strcmp(watchdog_device, pathname) == 0) { is_wd_dev = 1; if (watchdog_pipe[1] == -1) { if (pipe(watchdog_pipe) == -1) { fprintf(log_fp, "Creating pipe for watchdog failed\n"); } else { int i; watchdog_pid = fork(); switch (watchdog_pid) { case -1: fprintf(log_fp, "Forking watchdog-child failed\n"); break; case 0: free(watchdog_device); watchdog_device = NULL; for (i = 0; i < 3; i++) { free(sbd_device[i]); sbd_device[i] = NULL; } close(watchdog_pipe[1]); if (fcntl(watchdog_pipe[0], F_SETFL, O_NONBLOCK) == -1) { // don't block on read for timer to be handled fprintf(log_fp, "Failed setting watchdog-pipe-read to non-blocking"); } mainloop = g_main_loop_new(NULL, FALSE); // mainloop_add_signal(SIGTERM, watchdog_shutdown); // mainloop_add_signal(SIGINT, watchdog_shutdown); watchdog_source_id = g_unix_fd_add(watchdog_pipe[0], G_IO_IN, watchdog_dispatch_callback, NULL); if (watchdog_source_id == 0) { fprintf(log_fp, "Failed creating source for watchdog-pipe\n"); exit(1); } g_main_loop_run(mainloop); g_main_loop_unref(mainloop); exit(0); default: close(watchdog_pipe[0]); if (fcntl(watchdog_pipe[1], F_SETFL, O_NONBLOCK) == -1) { fprintf(log_fp, "Failed setting watchdog-pipe-write to non-blocking"); } } } } pathname = "/dev/null"; } } va_start (ap, flags); fd = (flags & (O_CREAT #ifdef O_TMPFILE | O_TMPFILE #endif ))? orig_open(pathname, flags, va_arg(ap, mode_t)): orig_open(pathname, flags); va_end (ap); if (devnum >= 0) { sbd_device_fd[devnum] = fd; } else if (is_wd_dev) { watchdog_device_fd = fd; } return fd; } ssize_t write(int fd, const void *buf, size_t count) { init(); if ((fd == watchdog_device_fd) && (count >= 1)) { if (*(const char *)buf == 'V') { watchdog_disarm(); } else { watchdog_arm(); } } return orig_write(fd, buf, count); } int ioctl(int fd, unsigned long int request, ...) { int rv = -1; va_list ap; int i; init(); va_start(ap, request); switch (request) { case BLKSSZGET: for (i=0; i < 3; i++) { if (sbd_device_fd[i] == fd) { rv = 0; *(va_arg(ap, int *)) = 512; break; } if (i == 2) { rv = orig_ioctl(fd, request, va_arg(ap, int *)); } } break; case WDIOC_SETTIMEOUT: if (fd == watchdog_device_fd) { watchdog_timeout = *va_arg(ap, int *); watchdog_arm(); rv = 0; break; } rv = orig_ioctl(fd, request, va_arg(ap, int *)); break; case WDIOC_SETOPTIONS: if (fd == watchdog_device_fd) { int flags = *va_arg(ap, int *); if (flags & WDIOS_DISABLECARD) { watchdog_disarm(); } rv = 0; break; } rv = orig_ioctl(fd, request, va_arg(ap, int *)); break; case WDIOC_GETSUPPORT: rv = orig_ioctl(fd, request, va_arg(ap, struct watchdog_info *)); break; default: fprintf(log_fp, "ioctl using unknown request = 0x%08lx", request); rv = orig_ioctl(fd, request, va_arg(ap, void *)); } va_end(ap); return rv; } int close(int fd) { int i; init(); if (fd == watchdog_device_fd) { watchdog_device_fd = -1; } else { for (i = 0; i < 3; i++) { if (sbd_device_fd[i] == fd) { sbd_device_fd[i] = -1; break; } } } return orig_close(fd); } // ***** end - handling of watchdog & block-devices **** // ***** handling of sysrq, sysrq-trigger & reboot **** FILE * fopen(const char *pathname, const char *mode) { int is_sysrq = 0; int is_sysrq_trigger = 0; FILE *fp; init(); if ((strcmp("/proc/sys/kernel/sysrq", pathname) == 0) && strcmp("w", mode)) { pathname = "/dev/null"; is_sysrq = 1; } else if (strcmp("/proc/sysrq-trigger", pathname) == 0) { pathname = "/dev/null"; is_sysrq_trigger = 1; } fp = orig_fopen(pathname, mode); if (is_sysrq) { sysrq_fp = fp; } else if (is_sysrq_trigger) { sysrq_trigger_fp = fp; } return fp; } int fclose(FILE *fp) { init(); if (fp == sysrq_fp) { sysrq_fp = NULL; } else if (fp == sysrq_trigger_fp) { sysrq_trigger_fp = NULL; } return orig_fclose(fp); } #if defined(__USE_FORTIFY_LEVEL) && (__USE_FORTIFY_LEVEL > 1) int __fprintf_chk(FILE *stream, int flag, const char *format, ...) #else int fprintf(FILE *stream, const char *format, ...) #endif { va_list ap; int rv; init(); va_start (ap, format); if (stream == sysrq_trigger_fp) { char buf[256]; rv = vsnprintf(buf, sizeof(buf), format, ap); if (rv >= 1) { fprintf(log_fp, "sysrq-trigger ('%c') - %s\n", buf[0], (buf[0] == 'c')?"killing process group":"don't kill but wait for reboot-call"); if (buf[0] == 'c') { fclose(log_fp); log_fp = NULL; killpg(0, SIGKILL); exit(1); } } } else { rv = vfprintf(stream, format, ap); } va_end (ap); return rv; } int fscanf(FILE *stream, const char *format, ...) { va_list ap; int rv; init(); va_start (ap, format); rv = vfscanf(stream, format, ap); va_end (ap); return rv; } int reboot (int __howto) { fprintf(log_fp, "reboot (%s) - exiting inquisitor process\n", (__howto == RB_POWER_OFF)?"poweroff":"reboot"); fclose(log_fp); log_fp = NULL; killpg(0, SIGKILL); exit(1); } // ***** end - handling of sysrq, sysrq-trigger & reboot **** // ***** aio translate **** #if 0 struct iocb { void *data; unsigned key; short aio_lio_opcode; short aio_reqprio; int aio_fildes; }; static inline void io_prep_pread(struct iocb *iocb, int fd, void *buf, size_t count, long long offset) { memset(iocb, 0, sizeof(*iocb)); iocb->aio_fildes = fd; iocb->aio_lio_opcode = IO_CMD_PREAD; iocb->aio_reqprio = 0; iocb->u.c.buf = buf; iocb->u.c.nbytes = count; iocb->u.c.offset = offset; } static inline void io_prep_pwrite(struct iocb *iocb, int fd, void *buf, size_t count, long long offset) { memset(iocb, 0, sizeof(*iocb)); iocb->aio_fildes = fd; iocb->aio_lio_opcode = IO_CMD_PWRITE; iocb->aio_reqprio = 0; iocb->u.c.buf = buf; iocb->u.c.nbytes = count; iocb->u.c.offset = offset; } #endif int io_setup(int nr_events, io_context_t *ctx_idp) { init(); if (!translate_aio) { return orig_io_setup(nr_events, ctx_idp); } if (nr_events == 0) { return EINVAL; } if (nr_events > 1) { return EAGAIN; } if (ctx_idp == NULL) { return EFAULT; } *ctx_idp = &our_io_context; return 0; } +int io_destroy(io_context_t ctx_id) +{ + init(); + + if (!translate_aio) { + return orig_io_destroy(ctx_id); + } + + if (ctx_id != &our_io_context) { + return EINVAL; + } + return 0; +} int io_submit(io_context_t ctx_id, long nr, struct iocb *ios[]) { init(); if (!translate_aio) { return orig_io_submit(ctx_id, nr, ios); } if ((pending_iocb != NULL) || (nr > 1)) { return EAGAIN; } if ((nr == 1) && ((ios == NULL) || (ios[0] == NULL))) { return EFAULT; } if ((ctx_id != &our_io_context) || (nr < 0) || ((nr == 1) && (ios[0]->aio_lio_opcode != IO_CMD_PREAD) && (ios[0]->aio_lio_opcode != IO_CMD_PWRITE))) { return EINVAL; } if ((fcntl(ios[0]->aio_fildes, F_GETFD) == -1) && (errno == EBADF)) { return EBADF; } if (nr == 1) { pending_iocb = ios[0]; } return nr; } int io_getevents(io_context_t ctx_id, long min_nr, long nr, struct io_event *events, struct timespec *timeout) { init(); if (!translate_aio) { return orig_io_getevents(ctx_id, min_nr, nr, events, timeout); } if ((ctx_id != &our_io_context) || (min_nr != 1) || (nr != 1)) { return EINVAL; } if (pending_iocb == NULL) { return 0; } switch (pending_iocb->aio_lio_opcode) { case IO_CMD_PWRITE: events->res = pwrite(pending_iocb->aio_fildes, pending_iocb->u.c.buf, pending_iocb->u.c.nbytes, pending_iocb->u.c.offset); break; case IO_CMD_PREAD: events->res = pread(pending_iocb->aio_fildes, pending_iocb->u.c.buf, pending_iocb->u.c.nbytes, pending_iocb->u.c.offset); break; default: events->res = 0; } events->data = pending_iocb->data; events->obj = pending_iocb; events->res2 = 0; pending_iocb = NULL; return 1; } int io_cancel(io_context_t ctx_id, struct iocb *iocb, struct io_event *result) { init(); if (!translate_aio) { return orig_io_cancel(ctx_id, iocb, result); } if (ctx_id != &our_io_context) { return EINVAL; } if ((iocb == NULL) || (result == NULL)) { return EFAULT; } if (pending_iocb != iocb) { return EAGAIN; } result->data = iocb->data; result->obj = iocb; result->res = 0; result->res2 = 0; pending_iocb = NULL; return 0; } // ***** end - aio translate ****