Page MenuHomeClusterLabs Projects

No OneTemporary

diff --git a/src/sbd-md.c b/src/sbd-md.c
index 7bdd0eb..47cbe8c 100644
--- a/src/sbd-md.c
+++ b/src/sbd-md.c
@@ -1,1248 +1,1289 @@
/*
* Copyright (C) 2013 Lars Marowsky-Bree <lmb@suse.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include "sbd.h"
#define SBD_MSG_EMPTY 0x00
#define SBD_MSG_TEST 0x01
#define SBD_MSG_RESET 0x02
#define SBD_MSG_OFF 0x03
#define SBD_MSG_EXIT 0x04
#define SBD_MSG_CRASHDUMP 0x05
#define SLOT_TO_SECTOR(slot) (1+slot*2)
#define MBOX_TO_SECTOR(mbox) (2+mbox*2)
extern int disk_count;
/* These have to match the values in the header of the partition */
static char sbd_magic[8] = "SBD_SBD_";
static char sbd_version = 0x02;
struct slot_msg_arg_t {
const char* name;
const char* msg;
};
static signed char
cmd2char(const char *cmd)
{
if (strcmp("clear", cmd) == 0) {
return SBD_MSG_EMPTY;
} else if (strcmp("test", cmd) == 0) {
return SBD_MSG_TEST;
} else if (strcmp("reset", cmd) == 0) {
return SBD_MSG_RESET;
} else if (strcmp("off", cmd) == 0) {
return SBD_MSG_OFF;
} else if (strcmp("exit", cmd) == 0) {
return SBD_MSG_EXIT;
} else if (strcmp("crashdump", cmd) == 0) {
return SBD_MSG_CRASHDUMP;
}
return -1;
}
static const char*
char2cmd(const char cmd)
{
switch (cmd) {
case SBD_MSG_EMPTY:
return "clear";
break;
case SBD_MSG_TEST:
return "test";
break;
case SBD_MSG_RESET:
return "reset";
break;
case SBD_MSG_OFF:
return "off";
break;
case SBD_MSG_EXIT:
return "exit";
break;
case SBD_MSG_CRASHDUMP:
return "crashdump";
break;
default:
return "undefined";
break;
}
}
static void
close_device(struct sbd_context *st)
{
- close(st->devfd);
+ if (!st) {
+ return;
+ }
+ if (st->ioctx) {
+ io_destroy(st->ioctx);
+ }
+ if (st->devfd >= 0) {
+ close(st->devfd);
+ }
+ free(st->buffer);
free(st);
}
static struct sbd_context *
open_device(const char* devname, int loglevel)
{
struct sbd_context *st;
if (!devname)
return NULL;
- st = malloc(sizeof(struct sbd_context));
- if (!st)
+ st = calloc(1, sizeof(struct sbd_context));
+ if (!st) {
return NULL;
- memset(st, 0, sizeof(struct sbd_context));
+ }
+ st->devfd = -1;
if (io_setup(1, &st->ioctx) != 0) {
cl_perror("io_setup failed");
- free(st);
- return NULL;
+ goto out;
}
st->devfd = open(devname, O_SYNC|O_RDWR|O_DIRECT);
if (st->devfd == -1) {
if (loglevel == LOG_DEBUG) {
DBGLOG(loglevel, "Opening device %s failed.", devname);
} else {
cl_log(loglevel, "Opening device %s failed.", devname);
}
- free(st);
- return NULL;
+ goto out;
}
ioctl(st->devfd, BLKSSZGET, &sector_size);
if (sector_size == 0) {
cl_perror("Get sector size failed.\n");
- close_device(st);
- return NULL;
+ goto out;
+ }
+
+ if (posix_memalign(&st->buffer, sector_size, sector_size)) {
+ cl_perror("Couldn't allocate sector-buffer.");
+ goto out;
}
return st;
+
+out:
+ close_device(st);
+ return NULL;
}
static void *
sector_alloc(void)
{
void *x;
- x = valloc(sector_size);
+ x = calloc(1, sector_size);
if (!x) {
exit(1);
}
- memset(x, 0, sector_size);
return x;
}
static int
sector_io(struct sbd_context *st, int sector, void *data, int rw)
{
struct timespec timeout;
struct io_event event;
struct iocb *ios[1] = { &st->io };
long r;
timeout.tv_sec = timeout_io;
timeout.tv_nsec = 0;
memset(&st->io, 0, sizeof(struct iocb));
if (rw) {
- io_prep_pwrite(&st->io, st->devfd, data, sector_size, (long long) sector_size * sector);
+ memcpy(st->buffer, data, sector_size);
+ io_prep_pwrite(&st->io, st->devfd, st->buffer, sector_size, (long long) sector_size * sector);
} else {
- io_prep_pread(&st->io, st->devfd, data, sector_size, (long long) sector_size * sector);
+ memset(st->buffer, 0, sector_size);
+ io_prep_pread(&st->io, st->devfd, st->buffer, sector_size, (long long) sector_size * sector);
}
if (io_submit(st->ioctx, 1, ios) != 1) {
cl_log(LOG_ERR, "Failed to submit IO request! (rw=%d)", rw);
return -1;
}
errno = 0;
r = io_getevents(st->ioctx, 1L, 1L, &event, &timeout);
if (r < 0 ) {
cl_log(LOG_ERR, "Failed to retrieve IO events (rw=%d)", rw);
return -1;
} else if (r < 1L) {
- cl_log(LOG_INFO, "Cancelling IO request due to timeout (rw=%d)", rw);
+ cl_log(LOG_INFO, "Cancelling IO request due to timeout (rw=%d, r=%ld)", rw, r);
r = io_cancel(st->ioctx, ios[0], &event);
if (r) {
DBGLOG(LOG_INFO, "Could not cancel IO request (rw=%d)", rw);
/* Doesn't really matter, debugging information.
*/
}
return -1;
} else if (r > 1L) {
cl_log(LOG_ERR, "More than one IO was returned (r=%ld)", r);
return -1;
}
/* IO is happy */
if (event.res == sector_size) {
+ if (!rw) {
+ memcpy(data, st->buffer, sector_size);
+ }
return 0;
} else {
cl_log(LOG_ERR, "Short IO (rw=%d, res=%lu, sector_size=%d)",
rw, event.res, sector_size);
return -1;
}
}
static int
sector_write(struct sbd_context *st, int sector, void *data)
{
return sector_io(st, sector, data, 1);
}
static int
sector_read(struct sbd_context *st, int sector, void *data)
{
return sector_io(st, sector, data, 0);
}
static int
slot_read(struct sbd_context *st, int slot, struct sector_node_s *s_node)
{
return sector_read(st, SLOT_TO_SECTOR(slot), s_node);
}
static int
slot_write(struct sbd_context *st, int slot, struct sector_node_s *s_node)
{
return sector_write(st, SLOT_TO_SECTOR(slot), s_node);
}
static int
mbox_write(struct sbd_context *st, int mbox, struct sector_mbox_s *s_mbox)
{
return sector_write(st, MBOX_TO_SECTOR(mbox), s_mbox);
}
static int
mbox_read(struct sbd_context *st, int mbox, struct sector_mbox_s *s_mbox)
{
return sector_read(st, MBOX_TO_SECTOR(mbox), s_mbox);
}
static int
mbox_write_verify(struct sbd_context *st, int mbox, struct sector_mbox_s *s_mbox)
{
void *data;
int rc = 0;
if (sector_write(st, MBOX_TO_SECTOR(mbox), s_mbox) < 0)
return -1;
data = sector_alloc();
if (sector_read(st, MBOX_TO_SECTOR(mbox), data) < 0) {
rc = -1;
goto out;
}
if (memcmp(s_mbox, data, sector_size) != 0) {
cl_log(LOG_ERR, "Write verification failed!");
rc = -1;
goto out;
}
rc = 0;
out:
free(data);
return rc;
}
static int header_write(struct sbd_context *st, struct sector_header_s *s_header)
{
s_header->sector_size = htonl(s_header->sector_size);
s_header->timeout_watchdog = htonl(s_header->timeout_watchdog);
s_header->timeout_allocate = htonl(s_header->timeout_allocate);
s_header->timeout_loop = htonl(s_header->timeout_loop);
s_header->timeout_msgwait = htonl(s_header->timeout_msgwait);
return sector_write(st, 0, s_header);
}
static int
header_read(struct sbd_context *st, struct sector_header_s *s_header)
{
if (sector_read(st, 0, s_header) < 0)
return -1;
s_header->sector_size = ntohl(s_header->sector_size);
s_header->timeout_watchdog = ntohl(s_header->timeout_watchdog);
s_header->timeout_allocate = ntohl(s_header->timeout_allocate);
s_header->timeout_loop = ntohl(s_header->timeout_loop);
s_header->timeout_msgwait = ntohl(s_header->timeout_msgwait);
/* This sets the global defaults: */
timeout_watchdog = s_header->timeout_watchdog;
timeout_allocate = s_header->timeout_allocate;
timeout_loop = s_header->timeout_loop;
timeout_msgwait = s_header->timeout_msgwait;
return 0;
}
static int
valid_header(const struct sector_header_s *s_header)
{
if (memcmp(s_header->magic, sbd_magic, sizeof(s_header->magic)) != 0) {
cl_log(LOG_ERR, "Header magic does not match.");
return -1;
}
if (s_header->version != sbd_version) {
cl_log(LOG_ERR, "Header version does not match.");
return -1;
}
if (s_header->sector_size != sector_size) {
cl_log(LOG_ERR, "Header sector size does not match.");
return -1;
}
return 0;
}
static struct sector_header_s *
header_get(struct sbd_context *st)
{
struct sector_header_s *s_header;
s_header = sector_alloc();
if (header_read(st, s_header) < 0) {
cl_log(LOG_ERR, "Unable to read header from device %d", st->devfd);
+ free(s_header);
return NULL;
}
if (valid_header(s_header) < 0) {
cl_log(LOG_ERR, "header on device %d is not valid.", st->devfd);
+ free(s_header);
return NULL;
}
/* cl_log(LOG_INFO, "Found version %d header with %d slots",
s_header->version, s_header->slots); */
return s_header;
}
static int
header_dump(struct sbd_context *st)
{
struct sector_header_s *s_header;
char uuid[37];
s_header = header_get(st);
if (s_header == NULL)
return -1;
printf("Header version : %u.%u\n", s_header->version,
s_header->minor_version);
if (s_header->minor_version > 0) {
uuid_unparse_lower(s_header->uuid, uuid);
printf("UUID : %s\n", uuid);
}
printf("Number of slots : %u\n", s_header->slots);
printf("Sector size : %lu\n",
(unsigned long)s_header->sector_size);
printf("Timeout (watchdog) : %lu\n",
(unsigned long)s_header->timeout_watchdog);
printf("Timeout (allocate) : %lu\n",
(unsigned long)s_header->timeout_allocate);
printf("Timeout (loop) : %lu\n",
(unsigned long)s_header->timeout_loop);
printf("Timeout (msgwait) : %lu\n",
(unsigned long)s_header->timeout_msgwait);
+
+ free(s_header);
return 0;
}
static int
init_device(struct sbd_context *st)
{
struct sector_header_s *s_header;
struct sector_node_s *s_node;
struct sector_mbox_s *s_mbox;
char uuid[37];
int i;
int rc = 0;
s_header = sector_alloc();
s_node = sector_alloc();
s_mbox = sector_alloc();
memcpy(s_header->magic, sbd_magic, sizeof(s_header->magic));
s_header->version = sbd_version;
s_header->slots = 255;
s_header->sector_size = sector_size;
s_header->timeout_watchdog = timeout_watchdog;
s_header->timeout_allocate = timeout_allocate;
s_header->timeout_loop = timeout_loop;
s_header->timeout_msgwait = timeout_msgwait;
s_header->minor_version = 1;
uuid_generate(s_header->uuid);
uuid_unparse_lower(s_header->uuid, uuid);
cl_log(LOG_INFO, "Creating version %d.%d header on device %d (uuid: %s)",
s_header->version, s_header->minor_version,
st->devfd, uuid);
fprintf(stdout, "Creating version %d.%d header on device %d (uuid: %s)\n",
s_header->version, s_header->minor_version,
st->devfd, uuid);
if (header_write(st, s_header) < 0) {
rc = -1; goto out;
}
cl_log(LOG_INFO, "Initializing %d slots on device %d",
s_header->slots,
st->devfd);
fprintf(stdout, "Initializing %d slots on device %d\n",
s_header->slots,
st->devfd);
for (i=0;i < s_header->slots;i++) {
if (slot_write(st, i, s_node) < 0) {
rc = -1; goto out;
}
if (mbox_write(st, i, s_mbox) < 0) {
rc = -1; goto out;
}
}
out: free(s_node);
free(s_header);
free(s_mbox);
return(rc);
}
/* Check if there already is a slot allocated to said name; returns the
* slot number. If not found, returns -1.
* This is necessary because slots might not be continuous. */
static int
slot_lookup(struct sbd_context *st, const struct sector_header_s *s_header, const char *name)
{
struct sector_node_s *s_node = NULL;
int i;
int rc = -1;
if (!name) {
cl_log(LOG_ERR, "slot_lookup(): No name specified.\n");
goto out;
}
s_node = sector_alloc();
for (i=0; i < s_header->slots; i++) {
if (slot_read(st, i, s_node) < 0) {
rc = -2; goto out;
}
if (s_node->in_use != 0) {
if (strncasecmp(s_node->name, name,
SECTOR_NAME_MAX) == 0) {
DBGLOG(LOG_INFO, "%s owns slot %d", name, i);
rc = i; goto out;
}
}
}
out: free(s_node);
return rc;
}
static int
slot_unused(struct sbd_context *st, const struct sector_header_s *s_header)
{
struct sector_node_s *s_node;
int i;
int rc = -1;
s_node = sector_alloc();
for (i=0; i < s_header->slots; i++) {
if (slot_read(st, i, s_node) < 0) {
rc = -1; goto out;
}
if (s_node->in_use == 0) {
rc = i; goto out;
}
}
out: free(s_node);
return rc;
}
static int
slot_allocate(struct sbd_context *st, const char *name)
{
struct sector_header_s *s_header = NULL;
struct sector_node_s *s_node = NULL;
struct sector_mbox_s *s_mbox = NULL;
int i;
int rc = 0;
if (!name) {
cl_log(LOG_ERR, "slot_allocate(): No name specified.\n");
fprintf(stderr, "slot_allocate(): No name specified.\n");
rc = -1; goto out;
}
s_header = header_get(st);
if (!s_header) {
rc = -1; goto out;
}
s_node = sector_alloc();
s_mbox = sector_alloc();
while (1) {
i = slot_lookup(st, s_header, name);
if ((i >= 0) || (i == -2)) {
/* -1 is "no slot found", in which case we
* proceed to allocate a new one.
* -2 is "read error during lookup", in which
* case we error out too
* >= 0 is "slot already allocated" */
rc = i; goto out;
}
i = slot_unused(st, s_header);
if (i >= 0) {
cl_log(LOG_INFO, "slot %d is unused - trying to own", i);
fprintf(stdout, "slot %d is unused - trying to own\n", i);
memset(s_node, 0, sizeof(*s_node));
s_node->in_use = 1;
strncpy(s_node->name, name, SECTOR_NAME_MAX);
if (slot_write(st, i, s_node) < 0) {
rc = -1; goto out;
}
sleep(timeout_allocate);
} else {
cl_log(LOG_ERR, "No more free slots.");
fprintf(stderr, "No more free slots.\n");
rc = -1; goto out;
}
}
out: free(s_node);
free(s_header);
free(s_mbox);
return(rc);
}
static int
slot_list(struct sbd_context *st)
{
struct sector_header_s *s_header = NULL;
struct sector_node_s *s_node = NULL;
struct sector_mbox_s *s_mbox = NULL;
int i;
int rc = 0;
s_header = header_get(st);
if (!s_header) {
rc = -1; goto out;
}
s_node = sector_alloc();
s_mbox = sector_alloc();
for (i=0; i < s_header->slots; i++) {
if (slot_read(st, i, s_node) < 0) {
rc = -1; goto out;
}
if (s_node->in_use > 0) {
if (mbox_read(st, i, s_mbox) < 0) {
rc = -1; goto out;
}
printf("%d\t%s\t%s\t%s\n",
i, s_node->name, char2cmd(s_mbox->cmd),
s_mbox->from);
}
}
out: free(s_node);
free(s_header);
free(s_mbox);
return rc;
}
static int
slot_msg(struct sbd_context *st, const char *name, const char *cmd)
{
struct sector_header_s *s_header = NULL;
struct sector_mbox_s *s_mbox = NULL;
int mbox;
int rc = 0;
char uuid[37];
if (!name || !cmd) {
cl_log(LOG_ERR, "slot_msg(): No recipient / cmd specified.\n");
rc = -1; goto out;
}
s_header = header_get(st);
if (!s_header) {
rc = -1; goto out;
}
if (strcmp(name, "LOCAL") == 0) {
name = local_uname;
}
if (s_header->minor_version > 0) {
uuid_unparse_lower(s_header->uuid, uuid);
cl_log(LOG_INFO, "Device UUID: %s", uuid);
}
mbox = slot_lookup(st, s_header, name);
if (mbox < 0) {
cl_log(LOG_ERR, "slot_msg(): No slot found for %s.", name);
rc = -1; goto out;
}
s_mbox = sector_alloc();
s_mbox->cmd = cmd2char(cmd);
if (s_mbox->cmd < 0) {
cl_log(LOG_ERR, "slot_msg(): Invalid command %s.", cmd);
rc = -1; goto out;
}
strncpy(s_mbox->from, local_uname, SECTOR_NAME_MAX);
cl_log(LOG_INFO, "Writing %s to node slot %s",
cmd, name);
if (mbox_write_verify(st, mbox, s_mbox) < -1) {
rc = -1; goto out;
}
if (strcasecmp(cmd, "exit") != 0) {
cl_log(LOG_INFO, "Messaging delay: %d",
(int)timeout_msgwait);
sleep(timeout_msgwait);
}
cl_log(LOG_INFO, "%s successfully delivered to %s",
cmd, name);
out: free(s_mbox);
free(s_header);
return rc;
}
static int
slot_ping(struct sbd_context *st, const char *name)
{
struct sector_header_s *s_header = NULL;
struct sector_mbox_s *s_mbox = NULL;
int mbox;
int waited = 0;
int rc = 0;
if (!name) {
cl_log(LOG_ERR, "slot_ping(): No recipient specified.\n");
rc = -1; goto out;
}
s_header = header_get(st);
if (!s_header) {
rc = -1; goto out;
}
if (strcmp(name, "LOCAL") == 0) {
name = local_uname;
}
mbox = slot_lookup(st, s_header, name);
if (mbox < 0) {
cl_log(LOG_ERR, "slot_msg(): No slot found for %s.", name);
rc = -1; goto out;
}
s_mbox = sector_alloc();
s_mbox->cmd = SBD_MSG_TEST;
strncpy(s_mbox->from, local_uname, SECTOR_NAME_MAX);
DBGLOG(LOG_DEBUG, "Pinging node %s", name);
if (mbox_write(st, mbox, s_mbox) < -1) {
rc = -1; goto out;
}
rc = -1;
while (waited <= timeout_msgwait) {
if (mbox_read(st, mbox, s_mbox) < 0)
break;
if (s_mbox->cmd != SBD_MSG_TEST) {
rc = 0;
break;
}
sleep(1);
waited++;
}
if (rc == 0) {
cl_log(LOG_DEBUG, "%s successfully pinged.", name);
} else {
cl_log(LOG_ERR, "%s failed to ping.", name);
}
out: free(s_mbox);
free(s_header);
return rc;
}
int init_devices(struct servants_list_item *servants)
{
int rc = 0;
struct sbd_context *st;
struct servants_list_item *s;
for (s = servants; s; s = s->next) {
fprintf(stdout, "Initializing device %s\n",
s->devname);
st = open_device(s->devname, LOG_ERR);
if (!st) {
return -1;
}
rc = init_device(st);
close_device(st);
if (rc == -1) {
fprintf(stderr, "Failed to init device %s\n", s->devname);
return rc;
}
fprintf(stdout, "Device %s is initialized.\n", s->devname);
}
fprintf(stdout, "Did you check sbd service down on all nodes before? If not do so now and restart afterwards.\n");
return 0;
}
static int slot_msg_wrapper(const char* devname, int mode, const void* argp)
{
int rc = 0;
struct sbd_context *st;
const struct slot_msg_arg_t* arg = (const struct slot_msg_arg_t*)argp;
st = open_device(devname, LOG_WARNING);
if (!st)
return -1;
cl_log(LOG_INFO, "Delivery process handling %s",
devname);
rc = slot_msg(st, arg->name, arg->msg);
close_device(st);
return rc;
}
static int slot_ping_wrapper(const char* devname, int mode, const void* argp)
{
int rc = 0;
const char* name = (const char*)argp;
struct sbd_context *st;
st = open_device(devname, LOG_WARNING);
if (!st)
return -1;
rc = slot_ping(st, name);
close_device(st);
return rc;
}
int allocate_slots(const char *name, struct servants_list_item *servants)
{
int rc = 0;
struct sbd_context *st;
struct servants_list_item *s;
for (s = servants; s; s = s->next) {
fprintf(stdout, "Trying to allocate slot for %s on device %s.\n",
name,
s->devname);
st = open_device(s->devname, LOG_WARNING);
if (!st) {
return -1;
}
rc = slot_allocate(st, name);
close_device(st);
if (rc < 0)
return rc;
fprintf(stdout, "Slot for %s has been allocated on %s.\n",
name,
s->devname);
}
return 0;
}
int list_slots(struct servants_list_item *servants)
{
int rc = 0;
struct servants_list_item *s;
struct sbd_context *st;
for (s = servants; s; s = s->next) {
int rv = 0;
st = open_device(s->devname, LOG_WARNING);
if (!st) {
rc = -1;
fprintf(stderr, "== disk %s unreadable!\n", s->devname);
continue;
}
rv = slot_list(st);
close_device(st);
if (rv == -1) {
rc = -1;
fprintf(stderr, "== Slots on disk %s NOT dumped\n", s->devname);
}
}
return rc;
}
int ping_via_slots(const char *name, struct servants_list_item *servants)
{
int sig = 0;
pid_t pid = 0;
int status = 0;
int servants_finished = 0;
sigset_t procmask;
siginfo_t sinfo;
struct servants_list_item *s;
sigemptyset(&procmask);
sigaddset(&procmask, SIGCHLD);
sigprocmask(SIG_BLOCK, &procmask, NULL);
for (s = servants; s; s = s->next) {
if(sbd_is_disk(s)) {
s->pid = assign_servant(s->devname, &slot_ping_wrapper, 0, (const void*)name);
}
}
while (servants_finished < disk_count) {
sig = sigwaitinfo(&procmask, &sinfo);
if (sig == SIGCHLD) {
while ((pid = wait(&status))) {
if (pid == -1 && errno == ECHILD) {
break;
} else {
s = lookup_servant_by_pid(pid);
if (sbd_is_disk(s)) {
servants_finished++;
}
}
}
}
}
return 0;
}
int quorum_write(int good_servants)
{
return (good_servants > disk_count/2);
}
int messenger(const char *name, const char *msg, struct servants_list_item *servants)
{
int sig = 0;
pid_t pid = 0;
int status = 0;
int servants_finished = 0;
int successful_delivery = 0;
sigset_t procmask;
siginfo_t sinfo;
struct servants_list_item *s;
struct slot_msg_arg_t slot_msg_arg = {name, msg};
sigemptyset(&procmask);
sigaddset(&procmask, SIGCHLD);
sigprocmask(SIG_BLOCK, &procmask, NULL);
for (s = servants; s; s = s->next) {
s->pid = assign_servant(s->devname, &slot_msg_wrapper, 0, &slot_msg_arg);
}
while (!(quorum_write(successful_delivery) ||
(servants_finished == disk_count))) {
sig = sigwaitinfo(&procmask, &sinfo);
if (sig == SIGCHLD) {
while ((pid = waitpid(-1, &status, WNOHANG))) {
if (pid == -1 && errno == ECHILD) {
break;
} else {
servants_finished++;
if (WIFEXITED(status)
&& WEXITSTATUS(status) == 0) {
DBGLOG(LOG_INFO, "Process %d succeeded.",
(int)pid);
successful_delivery++;
} else {
cl_log(LOG_WARNING, "Process %d failed to deliver!",
(int)pid);
}
}
}
}
}
if (quorum_write(successful_delivery)) {
cl_log(LOG_INFO, "Message successfully delivered.");
return 0;
} else {
cl_log(LOG_ERR, "Message is not delivered via more then a half of devices");
return -1;
}
}
unsigned long
get_first_msgwait(struct servants_list_item *servants)
{
unsigned long msgwait = 0;
struct servants_list_item *s = servants;
for (s = servants; s; s = s->next) {
struct sbd_context *st;
struct sector_header_s *s_header;
st = open_device(s->devname, LOG_WARNING);
if (!st) {
continue;
}
s_header = header_get(st);
if (s_header != NULL) {
msgwait = (unsigned long)s_header->timeout_msgwait;
close_device(st);
+ free(s_header);
return msgwait;
}
close_device(st);
}
return msgwait;
}
int dump_headers(struct servants_list_item *servants)
{
int rc = 0;
struct servants_list_item *s = servants;
struct sbd_context *st;
for (s = servants; s; s = s->next) {
int rv;
fprintf(stdout, "==Dumping header on disk %s\n", s->devname);
st = open_device(s->devname, LOG_WARNING);
if (st) {
rv = header_dump(st);
close_device(st);
} else {
fprintf(stderr, "== disk %s unreadable!\n", s->devname);
rv = -1;
}
if (rv == -1) {
rc = -1;
fprintf(stderr, "==Header on disk %s NOT dumped\n", s->devname);
} else {
fprintf(stdout, "==Header on disk %s is dumped\n", s->devname);
}
}
return rc;
}
void open_any_device(struct servants_list_item *servants)
{
struct sector_header_s *hdr_cur = NULL;
struct timespec t_0;
int t_wait = 0;
bool logged_once = false;
clock_gettime(CLOCK_MONOTONIC, &t_0);
while (!hdr_cur && t_wait < timeout_startup) {
struct timespec t_now;
struct servants_list_item* s;
for (s = servants; s; s = s->next) {
struct sbd_context *st = open_device(s->devname, LOG_DEBUG);
if (!st) {
if (logged_once == false) {
cl_log(LOG_WARNING, "Failed to open %s. "
"Trying any other configured devices, "
"otherwise retrying every %ds within %ds",
s->devname, timeout_loop, timeout_startup);
logged_once = true;
}
continue;
}
hdr_cur = header_get(st);
close_device(st);
if (hdr_cur) {
break;
} else {
if (logged_once == false) {
cl_log(LOG_WARNING, "Failed to read header from %s. "
"Trying any other configured devices, "
"otherwise retrying every %ds within %ds",
s->devname, timeout_loop, timeout_startup);
logged_once = true;
}
}
}
clock_gettime(CLOCK_MONOTONIC, &t_now);
t_wait = t_now.tv_sec - t_0.tv_sec;
if (!hdr_cur) {
sleep(timeout_loop);
}
}
if (hdr_cur) {
timeout_watchdog = hdr_cur->timeout_watchdog;
timeout_allocate = hdr_cur->timeout_allocate;
timeout_loop = hdr_cur->timeout_loop;
timeout_msgwait = hdr_cur->timeout_msgwait;
} else {
cl_log(LOG_ERR, "No devices were available at start-up within %i seconds.",
timeout_startup);
exit(1);
}
free(hdr_cur);
return;
}
/*
::-::-::-::-::-::-::-::-::-::-::-::-::
Begin disk based servant code
::-::-::-::-::-::-::-::-::-::-::-::-::
*/
static int servant_check_timeout_inconsistent(struct sector_header_s *hdr)
{
if (timeout_watchdog != hdr->timeout_watchdog) {
cl_log(LOG_WARNING, "watchdog timeout: %d versus %d on this device",
(int)timeout_watchdog, (int)hdr->timeout_watchdog);
return -1;
}
if (timeout_allocate != hdr->timeout_allocate) {
cl_log(LOG_WARNING, "allocate timeout: %d versus %d on this device",
(int)timeout_allocate, (int)hdr->timeout_allocate);
return -1;
}
if (timeout_loop != hdr->timeout_loop) {
cl_log(LOG_WARNING, "loop timeout: %d versus %d on this device",
(int)timeout_loop, (int)hdr->timeout_loop);
return -1;
}
if (timeout_msgwait != hdr->timeout_msgwait) {
cl_log(LOG_WARNING, "msgwait timeout: %d versus %d on this device",
(int)timeout_msgwait, (int)hdr->timeout_msgwait);
return -1;
}
return 0;
}
int servant_md(const char *diskname, int mode, const void* argp)
{
struct sector_mbox_s *s_mbox = NULL;
struct sector_node_s *s_node = NULL;
struct sector_header_s *s_header = NULL;
int mbox;
int rc = 0;
time_t t0, t1, latency;
union sigval signal_value;
sigset_t servant_masks;
struct sbd_context *st;
pid_t ppid;
char uuid[37];
const struct servants_list_item *s = argp;
cl_log(LOG_INFO, "Servant starting for device %s", diskname);
/* Block most of the signals */
sigfillset(&servant_masks);
sigdelset(&servant_masks, SIGKILL);
sigdelset(&servant_masks, SIGFPE);
sigdelset(&servant_masks, SIGILL);
sigdelset(&servant_masks, SIGSEGV);
sigdelset(&servant_masks, SIGBUS);
sigdelset(&servant_masks, SIGALRM);
/* FIXME: check error */
sigprocmask(SIG_SETMASK, &servant_masks, NULL);
st = open_device(diskname, LOG_WARNING);
if (!st) {
exit(EXIT_MD_SERVANT_IO_FAIL);
}
s_header = header_get(st);
if (!s_header) {
cl_log(LOG_ERR, "Not a valid header on %s", diskname);
- exit(EXIT_MD_SERVANT_IO_FAIL);
+ rc = EXIT_MD_SERVANT_IO_FAIL;
+ goto out;
}
if (servant_check_timeout_inconsistent(s_header) < 0) {
cl_log(LOG_ERR, "Timeouts on %s do not match first device",
diskname);
- exit(EXIT_MD_SERVANT_IO_FAIL);
+ rc = EXIT_MD_SERVANT_IO_FAIL;
+ goto out;
}
if (s_header->minor_version > 0) {
uuid_unparse_lower(s_header->uuid, uuid);
cl_log(LOG_INFO, "Device %s uuid: %s", diskname, uuid);
}
mbox = slot_allocate(st, local_uname);
if (mbox < 0) {
cl_log(LOG_ERR,
"No slot allocated, and automatic allocation failed for disk %s.",
diskname);
rc = EXIT_MD_SERVANT_IO_FAIL;
goto out;
}
s_node = sector_alloc();
if (slot_read(st, mbox, s_node) < 0) {
cl_log(LOG_ERR, "Unable to read node entry on %s",
diskname);
- exit(EXIT_MD_SERVANT_IO_FAIL);
+ rc = EXIT_MD_SERVANT_IO_FAIL;
+ goto out;
}
cl_log(LOG_NOTICE, "Monitoring slot %d on disk %s", mbox, diskname);
if (s_header->minor_version == 0) {
set_proc_title("sbd: watcher: %s - slot: %d", diskname, mbox);
} else {
set_proc_title("sbd: watcher: %s - slot: %d - uuid: %s",
diskname, mbox, uuid);
}
s_mbox = sector_alloc();
if (s->first_start) {
if (mode > 0) {
if (mbox_read(st, mbox, s_mbox) < 0) {
cl_log(LOG_ERR, "mbox read failed during start-up in servant.");
rc = EXIT_MD_SERVANT_IO_FAIL;
goto out;
}
if (s_mbox->cmd != SBD_MSG_EXIT &&
s_mbox->cmd != SBD_MSG_EMPTY) {
/* Not a clean stop. Abort start-up */
cl_log(LOG_WARNING, "Found fencing message - aborting start-up. Manual intervention required!");
ppid = getppid();
sigqueue(ppid, SIG_EXITREQ, signal_value);
rc = 0;
goto out;
}
}
DBGLOG(LOG_INFO, "First servant start - zeroing inbox");
memset(s_mbox, 0, sizeof(*s_mbox));
if (mbox_write(st, mbox, s_mbox) < 0) {
rc = EXIT_MD_SERVANT_IO_FAIL;
goto out;
}
}
memset(&signal_value, 0, sizeof(signal_value));
while (1) {
struct sector_header_s *s_header_retry = NULL;
struct sector_node_s *s_node_retry = NULL;
t0 = time(NULL);
sleep(timeout_loop);
ppid = getppid();
if (ppid == 1) {
/* Our parent died unexpectedly. Triggering
* self-fence. */
do_timeout_action();
}
/* These attempts are, by definition, somewhat racy. If
* the device is wiped out or corrupted between here and
* us reading our mbox, there is nothing we can do about
* that. But at least we tried. */
s_header_retry = header_get(st);
if (!s_header_retry) {
cl_log(LOG_ERR, "No longer found a valid header on %s", diskname);
- exit(EXIT_MD_SERVANT_IO_FAIL);
+ rc = EXIT_MD_SERVANT_IO_FAIL;
+ goto out;
}
if (memcmp(s_header, s_header_retry, sizeof(*s_header)) != 0) {
cl_log(LOG_ERR, "Header on %s changed since start-up!", diskname);
- exit(EXIT_MD_SERVANT_IO_FAIL);
+ free(s_header_retry);
+ rc = EXIT_MD_SERVANT_IO_FAIL;
+ goto out;
}
free(s_header_retry);
s_node_retry = sector_alloc();
if (slot_read(st, mbox, s_node_retry) < 0) {
cl_log(LOG_ERR, "slot read failed in servant.");
- exit(EXIT_MD_SERVANT_IO_FAIL);
+ free(s_node_retry);
+ rc = EXIT_MD_SERVANT_IO_FAIL;
+ goto out;
}
if (memcmp(s_node, s_node_retry, sizeof(*s_node)) != 0) {
cl_log(LOG_ERR, "Node entry on %s changed since start-up!", diskname);
- exit(EXIT_MD_SERVANT_IO_FAIL);
+ free(s_node_retry);
+ rc = EXIT_MD_SERVANT_IO_FAIL;
+ goto out;
}
free(s_node_retry);
if (mbox_read(st, mbox, s_mbox) < 0) {
cl_log(LOG_ERR, "mbox read failed in servant.");
- exit(EXIT_MD_SERVANT_IO_FAIL);
+ rc = EXIT_MD_SERVANT_IO_FAIL;
+ goto out;
}
if (s_mbox->cmd > 0) {
cl_log(LOG_NOTICE,
"Received command %s from %s on disk %s",
char2cmd(s_mbox->cmd), s_mbox->from, diskname);
switch (s_mbox->cmd) {
case SBD_MSG_TEST:
memset(s_mbox, 0, sizeof(*s_mbox));
mbox_write(st, mbox, s_mbox);
sigqueue(ppid, SIG_TEST, signal_value);
break;
case SBD_MSG_RESET:
- exit(EXIT_MD_SERVANT_REQUEST_RESET);
+ rc = EXIT_MD_SERVANT_REQUEST_RESET;
+ goto out;
case SBD_MSG_OFF:
- exit(EXIT_MD_SERVANT_REQUEST_SHUTOFF);
+ rc = EXIT_MD_SERVANT_REQUEST_SHUTOFF;
+ goto out;
case SBD_MSG_EXIT:
sigqueue(ppid, SIG_EXITREQ, signal_value);
break;
case SBD_MSG_CRASHDUMP:
- exit(EXIT_MD_SERVANT_REQUEST_CRASHDUMP);
+ rc = EXIT_MD_SERVANT_REQUEST_CRASHDUMP;
+ goto out;
default:
/* FIXME:
An "unknown" message might result
from a partial write.
log it and clear the slot.
*/
cl_log(LOG_ERR, "Unknown message on disk %s",
diskname);
memset(s_mbox, 0, sizeof(*s_mbox));
mbox_write(st, mbox, s_mbox);
break;
}
}
sigqueue(ppid, SIG_LIVENESS, signal_value);
t1 = time(NULL);
latency = t1 - t0;
if (timeout_watchdog_warn && (latency > timeout_watchdog_warn)) {
cl_log(LOG_WARNING,
"Latency: %ds exceeded watchdog warning timeout %ds on disk %s",
(int)latency, (int)timeout_watchdog_warn,
diskname);
} else if (debug) {
DBGLOG(LOG_DEBUG, "Latency: %ds on disk %s", (int)latency,
diskname);
}
}
out:
+ free(s_header);
+ free(s_node);
free(s_mbox);
close_device(st);
exit(rc);
}
diff --git a/src/sbd.h b/src/sbd.h
index 7c3c1ec..bbdc6f1 100644
--- a/src/sbd.h
+++ b/src/sbd.h
@@ -1,218 +1,219 @@
/*
* Copyright (C) 2013 Lars Marowsky-Bree <lmb@suse.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include <arpa/inet.h>
#include <asm/unistd.h>
#include <ctype.h>
#include <errno.h>
#include <fcntl.h>
#include <libaio.h>
#include <linux/fs.h>
#include <linux/types.h>
#include <linux/watchdog.h>
#include <malloc.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
#include <sys/ioctl.h>
#include <sys/ptrace.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/utsname.h>
#include <sys/wait.h>
#include <syslog.h>
#include <time.h>
#include <unistd.h>
#include <uuid/uuid.h>
#include <qb/qblog.h>
#include <crm_config.h>
#include <config.h>
/* signals reserved for multi-disk sbd */
#define SIG_LIVENESS (SIGRTMIN + 1) /* report liveness of the disk */
#define SIG_EXITREQ (SIGRTMIN + 2) /* exit request to inquisitor */
#define SIG_TEST (SIGRTMIN + 3) /* trigger self test */
#define SIG_RESTART (SIGRTMIN + 4) /* trigger restart of all failed disk */
#define SIG_PCMK_UNHEALTHY (SIGRTMIN + 5)
/* FIXME: should add dynamic check of SIG_XX >= SIGRTMAX */
/* exit status for disk-servant */
#define EXIT_MD_SERVANT_IO_FAIL 20
#define EXIT_MD_SERVANT_REQUEST_RESET 21
#define EXIT_MD_SERVANT_REQUEST_SHUTOFF 22
#define EXIT_MD_SERVANT_REQUEST_CRASHDUMP 23
/* exit status for pcmk-servant */
#define EXIT_PCMK_SERVANT_GRACEFUL_SHUTDOWN 30
#define HOG_CHAR 0xff
#define SECTOR_NAME_MAX 63
/* Sector data types */
struct sector_header_s {
char magic[8];
unsigned char version;
unsigned char slots;
/* Caveat: stored in network byte-order */
uint32_t sector_size;
uint32_t timeout_watchdog;
uint32_t timeout_allocate;
uint32_t timeout_loop;
uint32_t timeout_msgwait;
/* Minor version for extensions to the core data set:
* compatible and optional values. */
unsigned char minor_version;
uuid_t uuid; /* 16 bytes */
};
struct sector_mbox_s {
signed char cmd;
char from[SECTOR_NAME_MAX+1];
};
struct sector_node_s {
/* slots will be created with in_use == 0 */
char in_use;
char name[SECTOR_NAME_MAX+1];
};
struct servants_list_item {
const char* devname;
pid_t pid;
int restarts;
int restart_blocked;
int outdated;
int first_start;
struct timespec t_last, t_started;
struct servants_list_item *next;
};
struct sbd_context {
int devfd;
io_context_t ioctx;
struct iocb io;
+ void *buffer;
};
enum pcmk_health
{
pcmk_health_unknown,
pcmk_health_pending,
pcmk_health_transient,
pcmk_health_unclean,
pcmk_health_shutdown,
pcmk_health_online,
pcmk_health_noquorum,
};
void usage(void);
int watchdog_init_interval(void);
int watchdog_tickle(void);
int watchdog_init(void);
void sysrq_init(void);
void watchdog_close(bool disarm);
int watchdog_info(void);
int watchdog_test(void);
void sysrq_trigger(char t);
void do_crashdump(void);
void do_reset(void);
void do_off(void);
void do_timeout_action(void);
pid_t make_daemon(void);
void maximize_priority(void);
void sbd_get_uname(void);
void sbd_set_format_string(int method, const char *daemon);
void notify_parent(void);
/* Tunable defaults: */
extern unsigned long timeout_watchdog;
extern unsigned long timeout_watchdog_warn;
extern bool do_calculate_timeout_watchdog_warn;
extern unsigned long timeout_watchdog_crashdump;
extern int timeout_allocate;
extern int timeout_loop;
extern int timeout_msgwait;
extern int timeout_io;
extern int timeout_startup;
extern int watchdog_use;
extern int watchdog_set_timeout;
extern int skip_rt;
extern int debug;
extern int debug_mode;
extern char *watchdogdev;
extern bool watchdogdev_is_default;
extern char* local_uname;
extern bool do_flush;
extern char timeout_sysrq_char;
extern bool move_to_root_cgroup;
extern bool enforce_moving_to_root_cgroup;
extern bool sync_resource_startup;
/* Global, non-tunable variables: */
extern int sector_size;
extern int watchdogfd;
extern const char* cmdname;
typedef int (*functionp_t)(const char* devname, int mode, const void* argp);
int assign_servant(const char* devname, functionp_t functionp, int mode, const void* argp);
#if SUPPORT_SHARED_DISK
void open_any_device(struct servants_list_item *servants);
int init_devices(struct servants_list_item *servants);
int allocate_slots(const char *name, struct servants_list_item *servants);
int list_slots(struct servants_list_item *servants);
int ping_via_slots(const char *name, struct servants_list_item *servants);
int dump_headers(struct servants_list_item *servants);
unsigned long get_first_msgwait(struct servants_list_item *servants);
int messenger(const char *name, const char *msg, struct servants_list_item *servants);
int servant_md(const char *diskname, int mode, const void* argp);
#endif
int servant_pcmk(const char *diskname, int mode, const void* argp);
int servant_cluster(const char *diskname, int mode, const void* argp);
struct servants_list_item *lookup_servant_by_dev(const char *devname);
struct servants_list_item *lookup_servant_by_pid(pid_t pid);
int init_set_proc_title(int argc, char *argv[], char *envp[]);
void set_proc_title(const char *fmt,...);
#define cl_log(level, fmt, args...) qb_log_from_external_source( __func__, __FILE__, fmt, level, __LINE__, 0, ##args)
# define cl_perror(fmt, args...) do { \
const char *err = strerror(errno); \
cl_log(LOG_ERR, fmt ": %s (%d)", ##args, err, errno); \
} while(0)
#define DBGLOG(lvl, fmt, args...) do { \
if (debug > 0) cl_log(lvl, fmt, ##args); \
} while(0)
extern int servant_health;
void set_servant_health(enum pcmk_health state, int level, char const *format, ...) __attribute__ ((__format__ (__printf__, 3, 4)));
bool sbd_is_disk(struct servants_list_item *servant);
bool sbd_is_pcmk(struct servants_list_item *servant);
bool sbd_is_cluster(struct servants_list_item *servant);
#define calculate_timeout_watchdog_warn(timeout) \
(timeout < 5 ? 2 : \
(timeout < (ULONG_MAX / 3) ? \
(((unsigned long) timeout) * 3 / 5) : (((unsigned long) timeout) / 5 * 3)))
diff --git a/tests/sbd-testbed.c b/tests/sbd-testbed.c
index 858b1be..91920f2 100644
--- a/tests/sbd-testbed.c
+++ b/tests/sbd-testbed.c
@@ -1,729 +1,745 @@
#define _GNU_SOURCE
#include <stdlib.h>
#include <dlfcn.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/ioctl.h>
#include <sys/reboot.h>
#include <stdarg.h>
#include <stddef.h>
#include <fcntl.h>
#include <libaio.h>
#include <linux/watchdog.h>
#include <linux/fs.h>
#include <stdio.h>
#include <signal.h>
#include <unistd.h>
#include <glib.h>
#include <errno.h>
#if __GLIBC_PREREQ(2,36)
#include <glib-unix.h>
#else
#include <glib/giochannel.h>
typedef gboolean (*GUnixFDSourceFunc) (gint fd,
GIOCondition condition,
gpointer user_data);
static gboolean
GIOFunc2GUnixFDSourceFunc(GIOChannel *source,
GIOCondition condition,
gpointer data)
{
return ((GUnixFDSourceFunc) data) (
g_io_channel_unix_get_fd(source),
condition, NULL);
}
static guint
g_unix_fd_add(gint fd,
GIOCondition condition,
GUnixFDSourceFunc function,
gpointer user_data)
{
GIOChannel *chan = g_io_channel_unix_new (fd);
if (chan == NULL) {
return 0;
} else {
return g_io_add_watch(chan,
condition,
GIOFunc2GUnixFDSourceFunc,
(gpointer) function);
}
}
#endif
typedef int (*orig_open_f_type)(const char *pathname, int flags, ...);
typedef int (*orig_ioctl_f_type)(int fd, unsigned long int request, ...);
typedef ssize_t (*orig_write_f_type)(int fd, const void *buf, size_t count);
typedef int (*orig_close_f_type)(int fd);
typedef FILE *(*orig_fopen_f_type)(const char *pathname, const char *mode);
typedef int (*orig_fclose_f_type)(FILE *fp);
typedef int (*orig_io_setup_f_type)(int nr_events, io_context_t *ctx_idp);
+typedef int (*orig_io_destroy_f_type)(io_context_t ctx_id);
typedef int (*orig_io_submit_f_type)(io_context_t ctx_id, long nr, struct iocb *ios[]);
typedef int (*orig_io_getevents_f_type)(io_context_t ctx_id, long min_nr, long nr,
struct io_event *events, struct timespec *timeout);
typedef int (*orig_io_cancel_f_type)(io_context_t ctx_id, struct iocb *iocb,
struct io_event *result);
static int is_init = 0;
static FILE *log_fp = NULL;
static char *sbd_device[3] = {NULL, NULL, NULL};
static int sbd_device_fd[3] = {-1, -1, -1};
static FILE *sysrq_fp = NULL;
static FILE *sysrq_trigger_fp = NULL;
static char *watchdog_device = NULL;
static int watchdog_device_fd = -1;
static int watchdog_timeout = -1;
static pid_t watchdog_pid = -1;
static int watchdog_pipe[2] = {-1, -1};
static guint watchdog_source_id = 0;
static int watchdog_timer_id = 0;
static orig_open_f_type orig_open = NULL;
static orig_ioctl_f_type orig_ioctl = NULL;
static orig_write_f_type orig_write = NULL;
static orig_close_f_type orig_close = NULL;
static orig_fopen_f_type orig_fopen = NULL;
static orig_fclose_f_type orig_fclose = NULL;
static orig_io_setup_f_type orig_io_setup = NULL;
+static orig_io_destroy_f_type orig_io_destroy = NULL;
static orig_io_submit_f_type orig_io_submit = NULL;
static orig_io_getevents_f_type orig_io_getevents = NULL;
static orig_io_cancel_f_type orig_io_cancel = NULL;
/* fprintf is inlined as __fprintf_chk or
* we have vfprintf.
* For fscanf we have vfscanf.
* For reboot we anyway don't want that to be
* called in any case.
*/
static struct iocb *pending_iocb = NULL;
struct io_context { int context_num; };
static struct io_context our_io_context = {.context_num = 1};
static int translate_aio = 0;
static GMainLoop *mainloop = NULL;
#if 0
static void
watchdog_shutdown(int nsig)
{
if (watchdog_timer_id > 0) {
fprintf(log_fp, "exiting with watchdog-timer armed\n");
}
}
#endif
static void*
dlsym_fatal(void *handle, const char *symbol)
{
void *rv = dlsym(handle, symbol);
if (!rv) {
fprintf(stderr, "Failed looking up symbol %s\n", symbol);
exit(1);
}
return rv;
}
static void
init (void)
{
void *handle;
if (!is_init) {
const char *value;
int i;
char *token, *str, *str_orig;
is_init = 1;
orig_open = (orig_open_f_type)dlsym_fatal(RTLD_NEXT,"open");
orig_ioctl = (orig_ioctl_f_type)dlsym_fatal(RTLD_NEXT,"ioctl");
orig_close = (orig_close_f_type)dlsym_fatal(RTLD_NEXT,"close");
orig_write = (orig_write_f_type)dlsym_fatal(RTLD_NEXT,"write");
orig_fopen = (orig_fopen_f_type)dlsym_fatal(RTLD_NEXT,"fopen");
orig_fclose = (orig_fclose_f_type)dlsym_fatal(RTLD_NEXT,"fclose");
handle = dlopen("libaio.so.1", RTLD_NOW);
if (!handle) {
fprintf(stderr, "Failed opening libaio.so.1\n");
exit(1);
}
orig_io_setup = (orig_io_setup_f_type)dlsym_fatal(handle,"io_setup");
+ orig_io_destroy = (orig_io_destroy_f_type)dlsym_fatal(handle,"io_destroy");
orig_io_submit = (orig_io_submit_f_type)dlsym_fatal(handle,"io_submit");
orig_io_getevents = (orig_io_getevents_f_type)dlsym_fatal(handle,"io_getevents");
orig_io_cancel = (orig_io_cancel_f_type)dlsym_fatal(handle,"io_cancel");
dlclose(handle);
value = getenv("SBD_PRELOAD_LOG");
if (value) {
log_fp = fopen(value, "a");
} else {
int fd = dup(fileno(stderr));
if (fd >= 0) {
log_fp = fdopen(fd, "w");
}
}
if (log_fp == NULL) {
fprintf(stderr, "couldn't open log-file\n");
}
value = getenv("SBD_WATCHDOG_DEV");
if (value) {
watchdog_device = strdup(value);
}
value = getenv("SBD_DEVICE");
if ((value) && (str = str_orig = strdup(value))) {
for (i = 0; i < 3; i++, str = NULL) {
token = strtok(str, ";");
if (token == NULL) {
break;
}
sbd_device[i] = strdup(token);
}
free(str_orig);
}
value = getenv("SBD_TRANSLATE_AIO");
if ((value) && !strcmp(value, "yes")) {
translate_aio = 1;
}
}
}
// ***** end - handling of watchdog & block-devices ****
static gboolean
watchdog_timeout_notify(gpointer data)
{
fprintf(log_fp, "watchdog fired after %ds - killing process group\n",
watchdog_timeout);
fclose(log_fp);
log_fp = NULL;
killpg(0, SIGKILL);
exit(1);
}
static gboolean
watchdog_dispatch_callback (gint fd,
GIOCondition condition,
gpointer user_data)
{
char buf[256];
int i = 0;
if (condition & G_IO_HUP) {
return FALSE;
}
if (watchdog_timer_id > 0) {
g_source_remove(watchdog_timer_id);
}
watchdog_timer_id = 0;
for (i = 0; i < sizeof(buf)-1; i++) {
ssize_t len;
do {
len = read(watchdog_pipe[0], &buf[i], 1);
} while ((len == -1) && (errno == EINTR));
if (len <= 0) {
if (len == -1) {
fprintf(log_fp, "Couldn't read from watchdog-pipe\n");
}
buf[i] = '\0';
break;
}
if (buf[i] == '\n') {
buf[i] = '\0';
break;
}
}
buf[sizeof(buf)-1] = '\0';
if (sscanf(buf, "trigger %ds", &watchdog_timeout) == 1) {
watchdog_timer_id = g_timeout_add(watchdog_timeout * 1000, watchdog_timeout_notify, NULL);
} else if (strcmp(buf, "disarm") == 0) {
// timer is stopped already
} else {
fprintf(log_fp, "unknown watchdog command\n");
}
return TRUE;
}
static void
watchdog_arm (void) {
char buf[256];
if ((watchdog_timeout > 0) && (watchdog_pipe[1] >= 0)) {
sprintf(buf, "trigger %ds\n", watchdog_timeout);
if (write(watchdog_pipe[1], buf, strlen(buf)) != strlen(buf)) {
fprintf(log_fp, "Failed tickling watchdog via pipe\n");
}
}
}
static void
watchdog_disarm (void) {
char buf[256];
watchdog_timeout = -1;
if (watchdog_pipe[1] >= 0) {
sprintf(buf, "disarm\n");
if (write(watchdog_pipe[1], buf, strlen(buf)) != strlen(buf)) {
fprintf(log_fp, "Failed disarming watchdog via pipe\n");
}
}
}
int
open(const char *pathname, int flags, ...)
{
int i, fd;
int devnum = -1;
int is_wd_dev = 0;
va_list ap;
init();
for (i=0; i < 3; i++) {
if (sbd_device[i]) {
if (strcmp(sbd_device[i], pathname) == 0) {
devnum = i;
flags &= ~O_DIRECT;
break;
}
}
}
if (watchdog_device) {
if (strcmp(watchdog_device, pathname) == 0) {
is_wd_dev = 1;
if (watchdog_pipe[1] == -1) {
if (pipe(watchdog_pipe) == -1) {
fprintf(log_fp, "Creating pipe for watchdog failed\n");
} else {
int i;
watchdog_pid = fork();
switch (watchdog_pid) {
case -1:
fprintf(log_fp, "Forking watchdog-child failed\n");
break;
case 0:
free(watchdog_device);
watchdog_device = NULL;
for (i = 0; i < 3; i++) {
free(sbd_device[i]);
sbd_device[i] = NULL;
}
close(watchdog_pipe[1]);
if (fcntl(watchdog_pipe[0], F_SETFL, O_NONBLOCK) == -1) {
// don't block on read for timer to be handled
fprintf(log_fp,
"Failed setting watchdog-pipe-read to non-blocking");
}
mainloop = g_main_loop_new(NULL, FALSE);
// mainloop_add_signal(SIGTERM, watchdog_shutdown);
// mainloop_add_signal(SIGINT, watchdog_shutdown);
watchdog_source_id = g_unix_fd_add(watchdog_pipe[0],
G_IO_IN,
watchdog_dispatch_callback,
NULL);
if (watchdog_source_id == 0) {
fprintf(log_fp, "Failed creating source for watchdog-pipe\n");
exit(1);
}
g_main_loop_run(mainloop);
g_main_loop_unref(mainloop);
exit(0);
default:
close(watchdog_pipe[0]);
if (fcntl(watchdog_pipe[1], F_SETFL, O_NONBLOCK) == -1) {
fprintf(log_fp,
"Failed setting watchdog-pipe-write to non-blocking");
}
}
}
}
pathname = "/dev/null";
}
}
va_start (ap, flags);
fd = (flags & (O_CREAT
#ifdef O_TMPFILE
| O_TMPFILE
#endif
))?
orig_open(pathname, flags, va_arg(ap, mode_t)):
orig_open(pathname, flags);
va_end (ap);
if (devnum >= 0) {
sbd_device_fd[devnum] = fd;
} else if (is_wd_dev) {
watchdog_device_fd = fd;
}
return fd;
}
ssize_t
write(int fd, const void *buf, size_t count)
{
init();
if ((fd == watchdog_device_fd) && (count >= 1)) {
if (*(const char *)buf == 'V') {
watchdog_disarm();
} else {
watchdog_arm();
}
}
return orig_write(fd, buf, count);
}
int
ioctl(int fd, unsigned long int request, ...)
{
int rv = -1;
va_list ap;
int i;
init();
va_start(ap, request);
switch (request) {
case BLKSSZGET:
for (i=0; i < 3; i++) {
if (sbd_device_fd[i] == fd) {
rv = 0;
*(va_arg(ap, int *)) = 512;
break;
}
if (i == 2) {
rv = orig_ioctl(fd, request, va_arg(ap, int *));
}
}
break;
case WDIOC_SETTIMEOUT:
if (fd == watchdog_device_fd) {
watchdog_timeout = *va_arg(ap, int *);
watchdog_arm();
rv = 0;
break;
}
rv = orig_ioctl(fd, request, va_arg(ap, int *));
break;
case WDIOC_SETOPTIONS:
if (fd == watchdog_device_fd) {
int flags = *va_arg(ap, int *);
if (flags & WDIOS_DISABLECARD) {
watchdog_disarm();
}
rv = 0;
break;
}
rv = orig_ioctl(fd, request, va_arg(ap, int *));
break;
case WDIOC_GETSUPPORT:
rv = orig_ioctl(fd, request, va_arg(ap, struct watchdog_info *));
break;
default:
fprintf(log_fp, "ioctl using unknown request = 0x%08lx", request);
rv = orig_ioctl(fd, request, va_arg(ap, void *));
}
va_end(ap);
return rv;
}
int
close(int fd)
{
int i;
init();
if (fd == watchdog_device_fd) {
watchdog_device_fd = -1;
} else {
for (i = 0; i < 3; i++) {
if (sbd_device_fd[i] == fd) {
sbd_device_fd[i] = -1;
break;
}
}
}
return orig_close(fd);
}
// ***** end - handling of watchdog & block-devices ****
// ***** handling of sysrq, sysrq-trigger & reboot ****
FILE *
fopen(const char *pathname, const char *mode)
{
int is_sysrq = 0;
int is_sysrq_trigger = 0;
FILE *fp;
init();
if ((strcmp("/proc/sys/kernel/sysrq", pathname) == 0) &&
strcmp("w", mode)) {
pathname = "/dev/null";
is_sysrq = 1;
} else if (strcmp("/proc/sysrq-trigger", pathname) == 0) {
pathname = "/dev/null";
is_sysrq_trigger = 1;
}
fp = orig_fopen(pathname, mode);
if (is_sysrq) {
sysrq_fp = fp;
} else if (is_sysrq_trigger) {
sysrq_trigger_fp = fp;
}
return fp;
}
int
fclose(FILE *fp)
{
init();
if (fp == sysrq_fp) {
sysrq_fp = NULL;
} else if (fp == sysrq_trigger_fp) {
sysrq_trigger_fp = NULL;
}
return orig_fclose(fp);
}
#if defined(__USE_FORTIFY_LEVEL) && (__USE_FORTIFY_LEVEL > 1)
int
__fprintf_chk(FILE *stream, int flag, const char *format, ...)
#else
int
fprintf(FILE *stream, const char *format, ...)
#endif
{
va_list ap;
int rv;
init();
va_start (ap, format);
if (stream == sysrq_trigger_fp) {
char buf[256];
rv = vsnprintf(buf, sizeof(buf), format, ap);
if (rv >= 1) {
fprintf(log_fp, "sysrq-trigger ('%c') - %s\n", buf[0],
(buf[0] == 'c')?"killing process group":"don't kill but wait for reboot-call");
if (buf[0] == 'c') {
fclose(log_fp);
log_fp = NULL;
killpg(0, SIGKILL);
exit(1);
}
}
} else {
rv = vfprintf(stream, format, ap);
}
va_end (ap);
return rv;
}
int
fscanf(FILE *stream, const char *format, ...)
{
va_list ap;
int rv;
init();
va_start (ap, format);
rv = vfscanf(stream, format, ap);
va_end (ap);
return rv;
}
int
reboot (int __howto)
{
fprintf(log_fp, "reboot (%s) - exiting inquisitor process\n",
(__howto == RB_POWER_OFF)?"poweroff":"reboot");
fclose(log_fp);
log_fp = NULL;
killpg(0, SIGKILL);
exit(1);
}
// ***** end - handling of sysrq, sysrq-trigger & reboot ****
// ***** aio translate ****
#if 0
struct iocb {
void *data;
unsigned key;
short aio_lio_opcode;
short aio_reqprio;
int aio_fildes;
};
static inline void io_prep_pread(struct iocb *iocb, int fd, void *buf, size_t count, long long offset)
{
memset(iocb, 0, sizeof(*iocb));
iocb->aio_fildes = fd;
iocb->aio_lio_opcode = IO_CMD_PREAD;
iocb->aio_reqprio = 0;
iocb->u.c.buf = buf;
iocb->u.c.nbytes = count;
iocb->u.c.offset = offset;
}
static inline void io_prep_pwrite(struct iocb *iocb, int fd, void *buf, size_t count, long long offset)
{
memset(iocb, 0, sizeof(*iocb));
iocb->aio_fildes = fd;
iocb->aio_lio_opcode = IO_CMD_PWRITE;
iocb->aio_reqprio = 0;
iocb->u.c.buf = buf;
iocb->u.c.nbytes = count;
iocb->u.c.offset = offset;
}
#endif
int io_setup(int nr_events, io_context_t *ctx_idp)
{
init();
if (!translate_aio) {
return orig_io_setup(nr_events, ctx_idp);
}
if (nr_events == 0) {
return EINVAL;
}
if (nr_events > 1) {
return EAGAIN;
}
if (ctx_idp == NULL) {
return EFAULT;
}
*ctx_idp = &our_io_context;
return 0;
}
+int io_destroy(io_context_t ctx_id)
+{
+ init();
+
+ if (!translate_aio) {
+ return orig_io_destroy(ctx_id);
+ }
+
+ if (ctx_id != &our_io_context) {
+ return EINVAL;
+ }
+ return 0;
+}
int io_submit(io_context_t ctx_id, long nr, struct iocb *ios[])
{
init();
if (!translate_aio) {
return orig_io_submit(ctx_id, nr, ios);
}
if ((pending_iocb != NULL) ||
(nr > 1)) {
return EAGAIN;
}
if ((nr == 1) && ((ios == NULL) || (ios[0] == NULL))) {
return EFAULT;
}
if ((ctx_id != &our_io_context) ||
(nr < 0) ||
((nr == 1) &&
(ios[0]->aio_lio_opcode != IO_CMD_PREAD) &&
(ios[0]->aio_lio_opcode != IO_CMD_PWRITE))) {
return EINVAL;
}
if ((fcntl(ios[0]->aio_fildes, F_GETFD) == -1) && (errno == EBADF)) {
return EBADF;
}
if (nr == 1) {
pending_iocb = ios[0];
}
return nr;
}
int io_getevents(io_context_t ctx_id, long min_nr, long nr,
struct io_event *events, struct timespec *timeout)
{
init();
if (!translate_aio) {
return orig_io_getevents(ctx_id, min_nr, nr, events, timeout);
}
if ((ctx_id != &our_io_context) ||
(min_nr != 1) ||
(nr != 1)) {
return EINVAL;
}
if (pending_iocb == NULL) {
return 0;
}
switch (pending_iocb->aio_lio_opcode) {
case IO_CMD_PWRITE:
events->res = pwrite(pending_iocb->aio_fildes,
pending_iocb->u.c.buf,
pending_iocb->u.c.nbytes,
pending_iocb->u.c.offset);
break;
case IO_CMD_PREAD:
events->res = pread(pending_iocb->aio_fildes,
pending_iocb->u.c.buf,
pending_iocb->u.c.nbytes,
pending_iocb->u.c.offset);
break;
default:
events->res = 0;
}
events->data = pending_iocb->data;
events->obj = pending_iocb;
events->res2 = 0;
pending_iocb = NULL;
return 1;
}
int io_cancel(io_context_t ctx_id, struct iocb *iocb,
struct io_event *result)
{
init();
if (!translate_aio) {
return orig_io_cancel(ctx_id, iocb, result);
}
if (ctx_id != &our_io_context) {
return EINVAL;
}
if ((iocb == NULL) || (result == NULL)) {
return EFAULT;
}
if (pending_iocb != iocb) {
return EAGAIN;
}
result->data = iocb->data;
result->obj = iocb;
result->res = 0;
result->res2 = 0;
pending_iocb = NULL;
return 0;
}
// ***** end - aio translate ****

File Metadata

Mime Type
text/x-diff
Expires
Mon, Apr 21, 6:03 PM (1 d, 3 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1665030
Default Alt Text
(59 KB)

Event Timeline