Page Menu
Home
ClusterLabs Projects
Search
Configure Global Search
Log In
Files
F3686800
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
59 KB
Referenced Files
None
Subscribers
None
View Options
diff --git a/src/sbd-md.c b/src/sbd-md.c
index 7bdd0eb..47cbe8c 100644
--- a/src/sbd-md.c
+++ b/src/sbd-md.c
@@ -1,1248 +1,1289 @@
/*
* Copyright (C) 2013 Lars Marowsky-Bree <lmb@suse.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include "sbd.h"
#define SBD_MSG_EMPTY 0x00
#define SBD_MSG_TEST 0x01
#define SBD_MSG_RESET 0x02
#define SBD_MSG_OFF 0x03
#define SBD_MSG_EXIT 0x04
#define SBD_MSG_CRASHDUMP 0x05
#define SLOT_TO_SECTOR(slot) (1+slot*2)
#define MBOX_TO_SECTOR(mbox) (2+mbox*2)
extern int disk_count;
/* These have to match the values in the header of the partition */
static char sbd_magic[8] = "SBD_SBD_";
static char sbd_version = 0x02;
struct slot_msg_arg_t {
const char* name;
const char* msg;
};
static signed char
cmd2char(const char *cmd)
{
if (strcmp("clear", cmd) == 0) {
return SBD_MSG_EMPTY;
} else if (strcmp("test", cmd) == 0) {
return SBD_MSG_TEST;
} else if (strcmp("reset", cmd) == 0) {
return SBD_MSG_RESET;
} else if (strcmp("off", cmd) == 0) {
return SBD_MSG_OFF;
} else if (strcmp("exit", cmd) == 0) {
return SBD_MSG_EXIT;
} else if (strcmp("crashdump", cmd) == 0) {
return SBD_MSG_CRASHDUMP;
}
return -1;
}
static const char*
char2cmd(const char cmd)
{
switch (cmd) {
case SBD_MSG_EMPTY:
return "clear";
break;
case SBD_MSG_TEST:
return "test";
break;
case SBD_MSG_RESET:
return "reset";
break;
case SBD_MSG_OFF:
return "off";
break;
case SBD_MSG_EXIT:
return "exit";
break;
case SBD_MSG_CRASHDUMP:
return "crashdump";
break;
default:
return "undefined";
break;
}
}
static void
close_device(struct sbd_context *st)
{
- close(st->devfd);
+ if (!st) {
+ return;
+ }
+ if (st->ioctx) {
+ io_destroy(st->ioctx);
+ }
+ if (st->devfd >= 0) {
+ close(st->devfd);
+ }
+ free(st->buffer);
free(st);
}
static struct sbd_context *
open_device(const char* devname, int loglevel)
{
struct sbd_context *st;
if (!devname)
return NULL;
- st = malloc(sizeof(struct sbd_context));
- if (!st)
+ st = calloc(1, sizeof(struct sbd_context));
+ if (!st) {
return NULL;
- memset(st, 0, sizeof(struct sbd_context));
+ }
+ st->devfd = -1;
if (io_setup(1, &st->ioctx) != 0) {
cl_perror("io_setup failed");
- free(st);
- return NULL;
+ goto out;
}
st->devfd = open(devname, O_SYNC|O_RDWR|O_DIRECT);
if (st->devfd == -1) {
if (loglevel == LOG_DEBUG) {
DBGLOG(loglevel, "Opening device %s failed.", devname);
} else {
cl_log(loglevel, "Opening device %s failed.", devname);
}
- free(st);
- return NULL;
+ goto out;
}
ioctl(st->devfd, BLKSSZGET, §or_size);
if (sector_size == 0) {
cl_perror("Get sector size failed.\n");
- close_device(st);
- return NULL;
+ goto out;
+ }
+
+ if (posix_memalign(&st->buffer, sector_size, sector_size)) {
+ cl_perror("Couldn't allocate sector-buffer.");
+ goto out;
}
return st;
+
+out:
+ close_device(st);
+ return NULL;
}
static void *
sector_alloc(void)
{
void *x;
- x = valloc(sector_size);
+ x = calloc(1, sector_size);
if (!x) {
exit(1);
}
- memset(x, 0, sector_size);
return x;
}
static int
sector_io(struct sbd_context *st, int sector, void *data, int rw)
{
struct timespec timeout;
struct io_event event;
struct iocb *ios[1] = { &st->io };
long r;
timeout.tv_sec = timeout_io;
timeout.tv_nsec = 0;
memset(&st->io, 0, sizeof(struct iocb));
if (rw) {
- io_prep_pwrite(&st->io, st->devfd, data, sector_size, (long long) sector_size * sector);
+ memcpy(st->buffer, data, sector_size);
+ io_prep_pwrite(&st->io, st->devfd, st->buffer, sector_size, (long long) sector_size * sector);
} else {
- io_prep_pread(&st->io, st->devfd, data, sector_size, (long long) sector_size * sector);
+ memset(st->buffer, 0, sector_size);
+ io_prep_pread(&st->io, st->devfd, st->buffer, sector_size, (long long) sector_size * sector);
}
if (io_submit(st->ioctx, 1, ios) != 1) {
cl_log(LOG_ERR, "Failed to submit IO request! (rw=%d)", rw);
return -1;
}
errno = 0;
r = io_getevents(st->ioctx, 1L, 1L, &event, &timeout);
if (r < 0 ) {
cl_log(LOG_ERR, "Failed to retrieve IO events (rw=%d)", rw);
return -1;
} else if (r < 1L) {
- cl_log(LOG_INFO, "Cancelling IO request due to timeout (rw=%d)", rw);
+ cl_log(LOG_INFO, "Cancelling IO request due to timeout (rw=%d, r=%ld)", rw, r);
r = io_cancel(st->ioctx, ios[0], &event);
if (r) {
DBGLOG(LOG_INFO, "Could not cancel IO request (rw=%d)", rw);
/* Doesn't really matter, debugging information.
*/
}
return -1;
} else if (r > 1L) {
cl_log(LOG_ERR, "More than one IO was returned (r=%ld)", r);
return -1;
}
/* IO is happy */
if (event.res == sector_size) {
+ if (!rw) {
+ memcpy(data, st->buffer, sector_size);
+ }
return 0;
} else {
cl_log(LOG_ERR, "Short IO (rw=%d, res=%lu, sector_size=%d)",
rw, event.res, sector_size);
return -1;
}
}
static int
sector_write(struct sbd_context *st, int sector, void *data)
{
return sector_io(st, sector, data, 1);
}
static int
sector_read(struct sbd_context *st, int sector, void *data)
{
return sector_io(st, sector, data, 0);
}
static int
slot_read(struct sbd_context *st, int slot, struct sector_node_s *s_node)
{
return sector_read(st, SLOT_TO_SECTOR(slot), s_node);
}
static int
slot_write(struct sbd_context *st, int slot, struct sector_node_s *s_node)
{
return sector_write(st, SLOT_TO_SECTOR(slot), s_node);
}
static int
mbox_write(struct sbd_context *st, int mbox, struct sector_mbox_s *s_mbox)
{
return sector_write(st, MBOX_TO_SECTOR(mbox), s_mbox);
}
static int
mbox_read(struct sbd_context *st, int mbox, struct sector_mbox_s *s_mbox)
{
return sector_read(st, MBOX_TO_SECTOR(mbox), s_mbox);
}
static int
mbox_write_verify(struct sbd_context *st, int mbox, struct sector_mbox_s *s_mbox)
{
void *data;
int rc = 0;
if (sector_write(st, MBOX_TO_SECTOR(mbox), s_mbox) < 0)
return -1;
data = sector_alloc();
if (sector_read(st, MBOX_TO_SECTOR(mbox), data) < 0) {
rc = -1;
goto out;
}
if (memcmp(s_mbox, data, sector_size) != 0) {
cl_log(LOG_ERR, "Write verification failed!");
rc = -1;
goto out;
}
rc = 0;
out:
free(data);
return rc;
}
static int header_write(struct sbd_context *st, struct sector_header_s *s_header)
{
s_header->sector_size = htonl(s_header->sector_size);
s_header->timeout_watchdog = htonl(s_header->timeout_watchdog);
s_header->timeout_allocate = htonl(s_header->timeout_allocate);
s_header->timeout_loop = htonl(s_header->timeout_loop);
s_header->timeout_msgwait = htonl(s_header->timeout_msgwait);
return sector_write(st, 0, s_header);
}
static int
header_read(struct sbd_context *st, struct sector_header_s *s_header)
{
if (sector_read(st, 0, s_header) < 0)
return -1;
s_header->sector_size = ntohl(s_header->sector_size);
s_header->timeout_watchdog = ntohl(s_header->timeout_watchdog);
s_header->timeout_allocate = ntohl(s_header->timeout_allocate);
s_header->timeout_loop = ntohl(s_header->timeout_loop);
s_header->timeout_msgwait = ntohl(s_header->timeout_msgwait);
/* This sets the global defaults: */
timeout_watchdog = s_header->timeout_watchdog;
timeout_allocate = s_header->timeout_allocate;
timeout_loop = s_header->timeout_loop;
timeout_msgwait = s_header->timeout_msgwait;
return 0;
}
static int
valid_header(const struct sector_header_s *s_header)
{
if (memcmp(s_header->magic, sbd_magic, sizeof(s_header->magic)) != 0) {
cl_log(LOG_ERR, "Header magic does not match.");
return -1;
}
if (s_header->version != sbd_version) {
cl_log(LOG_ERR, "Header version does not match.");
return -1;
}
if (s_header->sector_size != sector_size) {
cl_log(LOG_ERR, "Header sector size does not match.");
return -1;
}
return 0;
}
static struct sector_header_s *
header_get(struct sbd_context *st)
{
struct sector_header_s *s_header;
s_header = sector_alloc();
if (header_read(st, s_header) < 0) {
cl_log(LOG_ERR, "Unable to read header from device %d", st->devfd);
+ free(s_header);
return NULL;
}
if (valid_header(s_header) < 0) {
cl_log(LOG_ERR, "header on device %d is not valid.", st->devfd);
+ free(s_header);
return NULL;
}
/* cl_log(LOG_INFO, "Found version %d header with %d slots",
s_header->version, s_header->slots); */
return s_header;
}
static int
header_dump(struct sbd_context *st)
{
struct sector_header_s *s_header;
char uuid[37];
s_header = header_get(st);
if (s_header == NULL)
return -1;
printf("Header version : %u.%u\n", s_header->version,
s_header->minor_version);
if (s_header->minor_version > 0) {
uuid_unparse_lower(s_header->uuid, uuid);
printf("UUID : %s\n", uuid);
}
printf("Number of slots : %u\n", s_header->slots);
printf("Sector size : %lu\n",
(unsigned long)s_header->sector_size);
printf("Timeout (watchdog) : %lu\n",
(unsigned long)s_header->timeout_watchdog);
printf("Timeout (allocate) : %lu\n",
(unsigned long)s_header->timeout_allocate);
printf("Timeout (loop) : %lu\n",
(unsigned long)s_header->timeout_loop);
printf("Timeout (msgwait) : %lu\n",
(unsigned long)s_header->timeout_msgwait);
+
+ free(s_header);
return 0;
}
static int
init_device(struct sbd_context *st)
{
struct sector_header_s *s_header;
struct sector_node_s *s_node;
struct sector_mbox_s *s_mbox;
char uuid[37];
int i;
int rc = 0;
s_header = sector_alloc();
s_node = sector_alloc();
s_mbox = sector_alloc();
memcpy(s_header->magic, sbd_magic, sizeof(s_header->magic));
s_header->version = sbd_version;
s_header->slots = 255;
s_header->sector_size = sector_size;
s_header->timeout_watchdog = timeout_watchdog;
s_header->timeout_allocate = timeout_allocate;
s_header->timeout_loop = timeout_loop;
s_header->timeout_msgwait = timeout_msgwait;
s_header->minor_version = 1;
uuid_generate(s_header->uuid);
uuid_unparse_lower(s_header->uuid, uuid);
cl_log(LOG_INFO, "Creating version %d.%d header on device %d (uuid: %s)",
s_header->version, s_header->minor_version,
st->devfd, uuid);
fprintf(stdout, "Creating version %d.%d header on device %d (uuid: %s)\n",
s_header->version, s_header->minor_version,
st->devfd, uuid);
if (header_write(st, s_header) < 0) {
rc = -1; goto out;
}
cl_log(LOG_INFO, "Initializing %d slots on device %d",
s_header->slots,
st->devfd);
fprintf(stdout, "Initializing %d slots on device %d\n",
s_header->slots,
st->devfd);
for (i=0;i < s_header->slots;i++) {
if (slot_write(st, i, s_node) < 0) {
rc = -1; goto out;
}
if (mbox_write(st, i, s_mbox) < 0) {
rc = -1; goto out;
}
}
out: free(s_node);
free(s_header);
free(s_mbox);
return(rc);
}
/* Check if there already is a slot allocated to said name; returns the
* slot number. If not found, returns -1.
* This is necessary because slots might not be continuous. */
static int
slot_lookup(struct sbd_context *st, const struct sector_header_s *s_header, const char *name)
{
struct sector_node_s *s_node = NULL;
int i;
int rc = -1;
if (!name) {
cl_log(LOG_ERR, "slot_lookup(): No name specified.\n");
goto out;
}
s_node = sector_alloc();
for (i=0; i < s_header->slots; i++) {
if (slot_read(st, i, s_node) < 0) {
rc = -2; goto out;
}
if (s_node->in_use != 0) {
if (strncasecmp(s_node->name, name,
SECTOR_NAME_MAX) == 0) {
DBGLOG(LOG_INFO, "%s owns slot %d", name, i);
rc = i; goto out;
}
}
}
out: free(s_node);
return rc;
}
static int
slot_unused(struct sbd_context *st, const struct sector_header_s *s_header)
{
struct sector_node_s *s_node;
int i;
int rc = -1;
s_node = sector_alloc();
for (i=0; i < s_header->slots; i++) {
if (slot_read(st, i, s_node) < 0) {
rc = -1; goto out;
}
if (s_node->in_use == 0) {
rc = i; goto out;
}
}
out: free(s_node);
return rc;
}
static int
slot_allocate(struct sbd_context *st, const char *name)
{
struct sector_header_s *s_header = NULL;
struct sector_node_s *s_node = NULL;
struct sector_mbox_s *s_mbox = NULL;
int i;
int rc = 0;
if (!name) {
cl_log(LOG_ERR, "slot_allocate(): No name specified.\n");
fprintf(stderr, "slot_allocate(): No name specified.\n");
rc = -1; goto out;
}
s_header = header_get(st);
if (!s_header) {
rc = -1; goto out;
}
s_node = sector_alloc();
s_mbox = sector_alloc();
while (1) {
i = slot_lookup(st, s_header, name);
if ((i >= 0) || (i == -2)) {
/* -1 is "no slot found", in which case we
* proceed to allocate a new one.
* -2 is "read error during lookup", in which
* case we error out too
* >= 0 is "slot already allocated" */
rc = i; goto out;
}
i = slot_unused(st, s_header);
if (i >= 0) {
cl_log(LOG_INFO, "slot %d is unused - trying to own", i);
fprintf(stdout, "slot %d is unused - trying to own\n", i);
memset(s_node, 0, sizeof(*s_node));
s_node->in_use = 1;
strncpy(s_node->name, name, SECTOR_NAME_MAX);
if (slot_write(st, i, s_node) < 0) {
rc = -1; goto out;
}
sleep(timeout_allocate);
} else {
cl_log(LOG_ERR, "No more free slots.");
fprintf(stderr, "No more free slots.\n");
rc = -1; goto out;
}
}
out: free(s_node);
free(s_header);
free(s_mbox);
return(rc);
}
static int
slot_list(struct sbd_context *st)
{
struct sector_header_s *s_header = NULL;
struct sector_node_s *s_node = NULL;
struct sector_mbox_s *s_mbox = NULL;
int i;
int rc = 0;
s_header = header_get(st);
if (!s_header) {
rc = -1; goto out;
}
s_node = sector_alloc();
s_mbox = sector_alloc();
for (i=0; i < s_header->slots; i++) {
if (slot_read(st, i, s_node) < 0) {
rc = -1; goto out;
}
if (s_node->in_use > 0) {
if (mbox_read(st, i, s_mbox) < 0) {
rc = -1; goto out;
}
printf("%d\t%s\t%s\t%s\n",
i, s_node->name, char2cmd(s_mbox->cmd),
s_mbox->from);
}
}
out: free(s_node);
free(s_header);
free(s_mbox);
return rc;
}
static int
slot_msg(struct sbd_context *st, const char *name, const char *cmd)
{
struct sector_header_s *s_header = NULL;
struct sector_mbox_s *s_mbox = NULL;
int mbox;
int rc = 0;
char uuid[37];
if (!name || !cmd) {
cl_log(LOG_ERR, "slot_msg(): No recipient / cmd specified.\n");
rc = -1; goto out;
}
s_header = header_get(st);
if (!s_header) {
rc = -1; goto out;
}
if (strcmp(name, "LOCAL") == 0) {
name = local_uname;
}
if (s_header->minor_version > 0) {
uuid_unparse_lower(s_header->uuid, uuid);
cl_log(LOG_INFO, "Device UUID: %s", uuid);
}
mbox = slot_lookup(st, s_header, name);
if (mbox < 0) {
cl_log(LOG_ERR, "slot_msg(): No slot found for %s.", name);
rc = -1; goto out;
}
s_mbox = sector_alloc();
s_mbox->cmd = cmd2char(cmd);
if (s_mbox->cmd < 0) {
cl_log(LOG_ERR, "slot_msg(): Invalid command %s.", cmd);
rc = -1; goto out;
}
strncpy(s_mbox->from, local_uname, SECTOR_NAME_MAX);
cl_log(LOG_INFO, "Writing %s to node slot %s",
cmd, name);
if (mbox_write_verify(st, mbox, s_mbox) < -1) {
rc = -1; goto out;
}
if (strcasecmp(cmd, "exit") != 0) {
cl_log(LOG_INFO, "Messaging delay: %d",
(int)timeout_msgwait);
sleep(timeout_msgwait);
}
cl_log(LOG_INFO, "%s successfully delivered to %s",
cmd, name);
out: free(s_mbox);
free(s_header);
return rc;
}
static int
slot_ping(struct sbd_context *st, const char *name)
{
struct sector_header_s *s_header = NULL;
struct sector_mbox_s *s_mbox = NULL;
int mbox;
int waited = 0;
int rc = 0;
if (!name) {
cl_log(LOG_ERR, "slot_ping(): No recipient specified.\n");
rc = -1; goto out;
}
s_header = header_get(st);
if (!s_header) {
rc = -1; goto out;
}
if (strcmp(name, "LOCAL") == 0) {
name = local_uname;
}
mbox = slot_lookup(st, s_header, name);
if (mbox < 0) {
cl_log(LOG_ERR, "slot_msg(): No slot found for %s.", name);
rc = -1; goto out;
}
s_mbox = sector_alloc();
s_mbox->cmd = SBD_MSG_TEST;
strncpy(s_mbox->from, local_uname, SECTOR_NAME_MAX);
DBGLOG(LOG_DEBUG, "Pinging node %s", name);
if (mbox_write(st, mbox, s_mbox) < -1) {
rc = -1; goto out;
}
rc = -1;
while (waited <= timeout_msgwait) {
if (mbox_read(st, mbox, s_mbox) < 0)
break;
if (s_mbox->cmd != SBD_MSG_TEST) {
rc = 0;
break;
}
sleep(1);
waited++;
}
if (rc == 0) {
cl_log(LOG_DEBUG, "%s successfully pinged.", name);
} else {
cl_log(LOG_ERR, "%s failed to ping.", name);
}
out: free(s_mbox);
free(s_header);
return rc;
}
int init_devices(struct servants_list_item *servants)
{
int rc = 0;
struct sbd_context *st;
struct servants_list_item *s;
for (s = servants; s; s = s->next) {
fprintf(stdout, "Initializing device %s\n",
s->devname);
st = open_device(s->devname, LOG_ERR);
if (!st) {
return -1;
}
rc = init_device(st);
close_device(st);
if (rc == -1) {
fprintf(stderr, "Failed to init device %s\n", s->devname);
return rc;
}
fprintf(stdout, "Device %s is initialized.\n", s->devname);
}
fprintf(stdout, "Did you check sbd service down on all nodes before? If not do so now and restart afterwards.\n");
return 0;
}
static int slot_msg_wrapper(const char* devname, int mode, const void* argp)
{
int rc = 0;
struct sbd_context *st;
const struct slot_msg_arg_t* arg = (const struct slot_msg_arg_t*)argp;
st = open_device(devname, LOG_WARNING);
if (!st)
return -1;
cl_log(LOG_INFO, "Delivery process handling %s",
devname);
rc = slot_msg(st, arg->name, arg->msg);
close_device(st);
return rc;
}
static int slot_ping_wrapper(const char* devname, int mode, const void* argp)
{
int rc = 0;
const char* name = (const char*)argp;
struct sbd_context *st;
st = open_device(devname, LOG_WARNING);
if (!st)
return -1;
rc = slot_ping(st, name);
close_device(st);
return rc;
}
int allocate_slots(const char *name, struct servants_list_item *servants)
{
int rc = 0;
struct sbd_context *st;
struct servants_list_item *s;
for (s = servants; s; s = s->next) {
fprintf(stdout, "Trying to allocate slot for %s on device %s.\n",
name,
s->devname);
st = open_device(s->devname, LOG_WARNING);
if (!st) {
return -1;
}
rc = slot_allocate(st, name);
close_device(st);
if (rc < 0)
return rc;
fprintf(stdout, "Slot for %s has been allocated on %s.\n",
name,
s->devname);
}
return 0;
}
int list_slots(struct servants_list_item *servants)
{
int rc = 0;
struct servants_list_item *s;
struct sbd_context *st;
for (s = servants; s; s = s->next) {
int rv = 0;
st = open_device(s->devname, LOG_WARNING);
if (!st) {
rc = -1;
fprintf(stderr, "== disk %s unreadable!\n", s->devname);
continue;
}
rv = slot_list(st);
close_device(st);
if (rv == -1) {
rc = -1;
fprintf(stderr, "== Slots on disk %s NOT dumped\n", s->devname);
}
}
return rc;
}
int ping_via_slots(const char *name, struct servants_list_item *servants)
{
int sig = 0;
pid_t pid = 0;
int status = 0;
int servants_finished = 0;
sigset_t procmask;
siginfo_t sinfo;
struct servants_list_item *s;
sigemptyset(&procmask);
sigaddset(&procmask, SIGCHLD);
sigprocmask(SIG_BLOCK, &procmask, NULL);
for (s = servants; s; s = s->next) {
if(sbd_is_disk(s)) {
s->pid = assign_servant(s->devname, &slot_ping_wrapper, 0, (const void*)name);
}
}
while (servants_finished < disk_count) {
sig = sigwaitinfo(&procmask, &sinfo);
if (sig == SIGCHLD) {
while ((pid = wait(&status))) {
if (pid == -1 && errno == ECHILD) {
break;
} else {
s = lookup_servant_by_pid(pid);
if (sbd_is_disk(s)) {
servants_finished++;
}
}
}
}
}
return 0;
}
int quorum_write(int good_servants)
{
return (good_servants > disk_count/2);
}
int messenger(const char *name, const char *msg, struct servants_list_item *servants)
{
int sig = 0;
pid_t pid = 0;
int status = 0;
int servants_finished = 0;
int successful_delivery = 0;
sigset_t procmask;
siginfo_t sinfo;
struct servants_list_item *s;
struct slot_msg_arg_t slot_msg_arg = {name, msg};
sigemptyset(&procmask);
sigaddset(&procmask, SIGCHLD);
sigprocmask(SIG_BLOCK, &procmask, NULL);
for (s = servants; s; s = s->next) {
s->pid = assign_servant(s->devname, &slot_msg_wrapper, 0, &slot_msg_arg);
}
while (!(quorum_write(successful_delivery) ||
(servants_finished == disk_count))) {
sig = sigwaitinfo(&procmask, &sinfo);
if (sig == SIGCHLD) {
while ((pid = waitpid(-1, &status, WNOHANG))) {
if (pid == -1 && errno == ECHILD) {
break;
} else {
servants_finished++;
if (WIFEXITED(status)
&& WEXITSTATUS(status) == 0) {
DBGLOG(LOG_INFO, "Process %d succeeded.",
(int)pid);
successful_delivery++;
} else {
cl_log(LOG_WARNING, "Process %d failed to deliver!",
(int)pid);
}
}
}
}
}
if (quorum_write(successful_delivery)) {
cl_log(LOG_INFO, "Message successfully delivered.");
return 0;
} else {
cl_log(LOG_ERR, "Message is not delivered via more then a half of devices");
return -1;
}
}
unsigned long
get_first_msgwait(struct servants_list_item *servants)
{
unsigned long msgwait = 0;
struct servants_list_item *s = servants;
for (s = servants; s; s = s->next) {
struct sbd_context *st;
struct sector_header_s *s_header;
st = open_device(s->devname, LOG_WARNING);
if (!st) {
continue;
}
s_header = header_get(st);
if (s_header != NULL) {
msgwait = (unsigned long)s_header->timeout_msgwait;
close_device(st);
+ free(s_header);
return msgwait;
}
close_device(st);
}
return msgwait;
}
int dump_headers(struct servants_list_item *servants)
{
int rc = 0;
struct servants_list_item *s = servants;
struct sbd_context *st;
for (s = servants; s; s = s->next) {
int rv;
fprintf(stdout, "==Dumping header on disk %s\n", s->devname);
st = open_device(s->devname, LOG_WARNING);
if (st) {
rv = header_dump(st);
close_device(st);
} else {
fprintf(stderr, "== disk %s unreadable!\n", s->devname);
rv = -1;
}
if (rv == -1) {
rc = -1;
fprintf(stderr, "==Header on disk %s NOT dumped\n", s->devname);
} else {
fprintf(stdout, "==Header on disk %s is dumped\n", s->devname);
}
}
return rc;
}
void open_any_device(struct servants_list_item *servants)
{
struct sector_header_s *hdr_cur = NULL;
struct timespec t_0;
int t_wait = 0;
bool logged_once = false;
clock_gettime(CLOCK_MONOTONIC, &t_0);
while (!hdr_cur && t_wait < timeout_startup) {
struct timespec t_now;
struct servants_list_item* s;
for (s = servants; s; s = s->next) {
struct sbd_context *st = open_device(s->devname, LOG_DEBUG);
if (!st) {
if (logged_once == false) {
cl_log(LOG_WARNING, "Failed to open %s. "
"Trying any other configured devices, "
"otherwise retrying every %ds within %ds",
s->devname, timeout_loop, timeout_startup);
logged_once = true;
}
continue;
}
hdr_cur = header_get(st);
close_device(st);
if (hdr_cur) {
break;
} else {
if (logged_once == false) {
cl_log(LOG_WARNING, "Failed to read header from %s. "
"Trying any other configured devices, "
"otherwise retrying every %ds within %ds",
s->devname, timeout_loop, timeout_startup);
logged_once = true;
}
}
}
clock_gettime(CLOCK_MONOTONIC, &t_now);
t_wait = t_now.tv_sec - t_0.tv_sec;
if (!hdr_cur) {
sleep(timeout_loop);
}
}
if (hdr_cur) {
timeout_watchdog = hdr_cur->timeout_watchdog;
timeout_allocate = hdr_cur->timeout_allocate;
timeout_loop = hdr_cur->timeout_loop;
timeout_msgwait = hdr_cur->timeout_msgwait;
} else {
cl_log(LOG_ERR, "No devices were available at start-up within %i seconds.",
timeout_startup);
exit(1);
}
free(hdr_cur);
return;
}
/*
::-::-::-::-::-::-::-::-::-::-::-::-::
Begin disk based servant code
::-::-::-::-::-::-::-::-::-::-::-::-::
*/
static int servant_check_timeout_inconsistent(struct sector_header_s *hdr)
{
if (timeout_watchdog != hdr->timeout_watchdog) {
cl_log(LOG_WARNING, "watchdog timeout: %d versus %d on this device",
(int)timeout_watchdog, (int)hdr->timeout_watchdog);
return -1;
}
if (timeout_allocate != hdr->timeout_allocate) {
cl_log(LOG_WARNING, "allocate timeout: %d versus %d on this device",
(int)timeout_allocate, (int)hdr->timeout_allocate);
return -1;
}
if (timeout_loop != hdr->timeout_loop) {
cl_log(LOG_WARNING, "loop timeout: %d versus %d on this device",
(int)timeout_loop, (int)hdr->timeout_loop);
return -1;
}
if (timeout_msgwait != hdr->timeout_msgwait) {
cl_log(LOG_WARNING, "msgwait timeout: %d versus %d on this device",
(int)timeout_msgwait, (int)hdr->timeout_msgwait);
return -1;
}
return 0;
}
int servant_md(const char *diskname, int mode, const void* argp)
{
struct sector_mbox_s *s_mbox = NULL;
struct sector_node_s *s_node = NULL;
struct sector_header_s *s_header = NULL;
int mbox;
int rc = 0;
time_t t0, t1, latency;
union sigval signal_value;
sigset_t servant_masks;
struct sbd_context *st;
pid_t ppid;
char uuid[37];
const struct servants_list_item *s = argp;
cl_log(LOG_INFO, "Servant starting for device %s", diskname);
/* Block most of the signals */
sigfillset(&servant_masks);
sigdelset(&servant_masks, SIGKILL);
sigdelset(&servant_masks, SIGFPE);
sigdelset(&servant_masks, SIGILL);
sigdelset(&servant_masks, SIGSEGV);
sigdelset(&servant_masks, SIGBUS);
sigdelset(&servant_masks, SIGALRM);
/* FIXME: check error */
sigprocmask(SIG_SETMASK, &servant_masks, NULL);
st = open_device(diskname, LOG_WARNING);
if (!st) {
exit(EXIT_MD_SERVANT_IO_FAIL);
}
s_header = header_get(st);
if (!s_header) {
cl_log(LOG_ERR, "Not a valid header on %s", diskname);
- exit(EXIT_MD_SERVANT_IO_FAIL);
+ rc = EXIT_MD_SERVANT_IO_FAIL;
+ goto out;
}
if (servant_check_timeout_inconsistent(s_header) < 0) {
cl_log(LOG_ERR, "Timeouts on %s do not match first device",
diskname);
- exit(EXIT_MD_SERVANT_IO_FAIL);
+ rc = EXIT_MD_SERVANT_IO_FAIL;
+ goto out;
}
if (s_header->minor_version > 0) {
uuid_unparse_lower(s_header->uuid, uuid);
cl_log(LOG_INFO, "Device %s uuid: %s", diskname, uuid);
}
mbox = slot_allocate(st, local_uname);
if (mbox < 0) {
cl_log(LOG_ERR,
"No slot allocated, and automatic allocation failed for disk %s.",
diskname);
rc = EXIT_MD_SERVANT_IO_FAIL;
goto out;
}
s_node = sector_alloc();
if (slot_read(st, mbox, s_node) < 0) {
cl_log(LOG_ERR, "Unable to read node entry on %s",
diskname);
- exit(EXIT_MD_SERVANT_IO_FAIL);
+ rc = EXIT_MD_SERVANT_IO_FAIL;
+ goto out;
}
cl_log(LOG_NOTICE, "Monitoring slot %d on disk %s", mbox, diskname);
if (s_header->minor_version == 0) {
set_proc_title("sbd: watcher: %s - slot: %d", diskname, mbox);
} else {
set_proc_title("sbd: watcher: %s - slot: %d - uuid: %s",
diskname, mbox, uuid);
}
s_mbox = sector_alloc();
if (s->first_start) {
if (mode > 0) {
if (mbox_read(st, mbox, s_mbox) < 0) {
cl_log(LOG_ERR, "mbox read failed during start-up in servant.");
rc = EXIT_MD_SERVANT_IO_FAIL;
goto out;
}
if (s_mbox->cmd != SBD_MSG_EXIT &&
s_mbox->cmd != SBD_MSG_EMPTY) {
/* Not a clean stop. Abort start-up */
cl_log(LOG_WARNING, "Found fencing message - aborting start-up. Manual intervention required!");
ppid = getppid();
sigqueue(ppid, SIG_EXITREQ, signal_value);
rc = 0;
goto out;
}
}
DBGLOG(LOG_INFO, "First servant start - zeroing inbox");
memset(s_mbox, 0, sizeof(*s_mbox));
if (mbox_write(st, mbox, s_mbox) < 0) {
rc = EXIT_MD_SERVANT_IO_FAIL;
goto out;
}
}
memset(&signal_value, 0, sizeof(signal_value));
while (1) {
struct sector_header_s *s_header_retry = NULL;
struct sector_node_s *s_node_retry = NULL;
t0 = time(NULL);
sleep(timeout_loop);
ppid = getppid();
if (ppid == 1) {
/* Our parent died unexpectedly. Triggering
* self-fence. */
do_timeout_action();
}
/* These attempts are, by definition, somewhat racy. If
* the device is wiped out or corrupted between here and
* us reading our mbox, there is nothing we can do about
* that. But at least we tried. */
s_header_retry = header_get(st);
if (!s_header_retry) {
cl_log(LOG_ERR, "No longer found a valid header on %s", diskname);
- exit(EXIT_MD_SERVANT_IO_FAIL);
+ rc = EXIT_MD_SERVANT_IO_FAIL;
+ goto out;
}
if (memcmp(s_header, s_header_retry, sizeof(*s_header)) != 0) {
cl_log(LOG_ERR, "Header on %s changed since start-up!", diskname);
- exit(EXIT_MD_SERVANT_IO_FAIL);
+ free(s_header_retry);
+ rc = EXIT_MD_SERVANT_IO_FAIL;
+ goto out;
}
free(s_header_retry);
s_node_retry = sector_alloc();
if (slot_read(st, mbox, s_node_retry) < 0) {
cl_log(LOG_ERR, "slot read failed in servant.");
- exit(EXIT_MD_SERVANT_IO_FAIL);
+ free(s_node_retry);
+ rc = EXIT_MD_SERVANT_IO_FAIL;
+ goto out;
}
if (memcmp(s_node, s_node_retry, sizeof(*s_node)) != 0) {
cl_log(LOG_ERR, "Node entry on %s changed since start-up!", diskname);
- exit(EXIT_MD_SERVANT_IO_FAIL);
+ free(s_node_retry);
+ rc = EXIT_MD_SERVANT_IO_FAIL;
+ goto out;
}
free(s_node_retry);
if (mbox_read(st, mbox, s_mbox) < 0) {
cl_log(LOG_ERR, "mbox read failed in servant.");
- exit(EXIT_MD_SERVANT_IO_FAIL);
+ rc = EXIT_MD_SERVANT_IO_FAIL;
+ goto out;
}
if (s_mbox->cmd > 0) {
cl_log(LOG_NOTICE,
"Received command %s from %s on disk %s",
char2cmd(s_mbox->cmd), s_mbox->from, diskname);
switch (s_mbox->cmd) {
case SBD_MSG_TEST:
memset(s_mbox, 0, sizeof(*s_mbox));
mbox_write(st, mbox, s_mbox);
sigqueue(ppid, SIG_TEST, signal_value);
break;
case SBD_MSG_RESET:
- exit(EXIT_MD_SERVANT_REQUEST_RESET);
+ rc = EXIT_MD_SERVANT_REQUEST_RESET;
+ goto out;
case SBD_MSG_OFF:
- exit(EXIT_MD_SERVANT_REQUEST_SHUTOFF);
+ rc = EXIT_MD_SERVANT_REQUEST_SHUTOFF;
+ goto out;
case SBD_MSG_EXIT:
sigqueue(ppid, SIG_EXITREQ, signal_value);
break;
case SBD_MSG_CRASHDUMP:
- exit(EXIT_MD_SERVANT_REQUEST_CRASHDUMP);
+ rc = EXIT_MD_SERVANT_REQUEST_CRASHDUMP;
+ goto out;
default:
/* FIXME:
An "unknown" message might result
from a partial write.
log it and clear the slot.
*/
cl_log(LOG_ERR, "Unknown message on disk %s",
diskname);
memset(s_mbox, 0, sizeof(*s_mbox));
mbox_write(st, mbox, s_mbox);
break;
}
}
sigqueue(ppid, SIG_LIVENESS, signal_value);
t1 = time(NULL);
latency = t1 - t0;
if (timeout_watchdog_warn && (latency > timeout_watchdog_warn)) {
cl_log(LOG_WARNING,
"Latency: %ds exceeded watchdog warning timeout %ds on disk %s",
(int)latency, (int)timeout_watchdog_warn,
diskname);
} else if (debug) {
DBGLOG(LOG_DEBUG, "Latency: %ds on disk %s", (int)latency,
diskname);
}
}
out:
+ free(s_header);
+ free(s_node);
free(s_mbox);
close_device(st);
exit(rc);
}
diff --git a/src/sbd.h b/src/sbd.h
index 7c3c1ec..bbdc6f1 100644
--- a/src/sbd.h
+++ b/src/sbd.h
@@ -1,218 +1,219 @@
/*
* Copyright (C) 2013 Lars Marowsky-Bree <lmb@suse.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include <arpa/inet.h>
#include <asm/unistd.h>
#include <ctype.h>
#include <errno.h>
#include <fcntl.h>
#include <libaio.h>
#include <linux/fs.h>
#include <linux/types.h>
#include <linux/watchdog.h>
#include <malloc.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
#include <sys/ioctl.h>
#include <sys/ptrace.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/utsname.h>
#include <sys/wait.h>
#include <syslog.h>
#include <time.h>
#include <unistd.h>
#include <uuid/uuid.h>
#include <qb/qblog.h>
#include <crm_config.h>
#include <config.h>
/* signals reserved for multi-disk sbd */
#define SIG_LIVENESS (SIGRTMIN + 1) /* report liveness of the disk */
#define SIG_EXITREQ (SIGRTMIN + 2) /* exit request to inquisitor */
#define SIG_TEST (SIGRTMIN + 3) /* trigger self test */
#define SIG_RESTART (SIGRTMIN + 4) /* trigger restart of all failed disk */
#define SIG_PCMK_UNHEALTHY (SIGRTMIN + 5)
/* FIXME: should add dynamic check of SIG_XX >= SIGRTMAX */
/* exit status for disk-servant */
#define EXIT_MD_SERVANT_IO_FAIL 20
#define EXIT_MD_SERVANT_REQUEST_RESET 21
#define EXIT_MD_SERVANT_REQUEST_SHUTOFF 22
#define EXIT_MD_SERVANT_REQUEST_CRASHDUMP 23
/* exit status for pcmk-servant */
#define EXIT_PCMK_SERVANT_GRACEFUL_SHUTDOWN 30
#define HOG_CHAR 0xff
#define SECTOR_NAME_MAX 63
/* Sector data types */
struct sector_header_s {
char magic[8];
unsigned char version;
unsigned char slots;
/* Caveat: stored in network byte-order */
uint32_t sector_size;
uint32_t timeout_watchdog;
uint32_t timeout_allocate;
uint32_t timeout_loop;
uint32_t timeout_msgwait;
/* Minor version for extensions to the core data set:
* compatible and optional values. */
unsigned char minor_version;
uuid_t uuid; /* 16 bytes */
};
struct sector_mbox_s {
signed char cmd;
char from[SECTOR_NAME_MAX+1];
};
struct sector_node_s {
/* slots will be created with in_use == 0 */
char in_use;
char name[SECTOR_NAME_MAX+1];
};
struct servants_list_item {
const char* devname;
pid_t pid;
int restarts;
int restart_blocked;
int outdated;
int first_start;
struct timespec t_last, t_started;
struct servants_list_item *next;
};
struct sbd_context {
int devfd;
io_context_t ioctx;
struct iocb io;
+ void *buffer;
};
enum pcmk_health
{
pcmk_health_unknown,
pcmk_health_pending,
pcmk_health_transient,
pcmk_health_unclean,
pcmk_health_shutdown,
pcmk_health_online,
pcmk_health_noquorum,
};
void usage(void);
int watchdog_init_interval(void);
int watchdog_tickle(void);
int watchdog_init(void);
void sysrq_init(void);
void watchdog_close(bool disarm);
int watchdog_info(void);
int watchdog_test(void);
void sysrq_trigger(char t);
void do_crashdump(void);
void do_reset(void);
void do_off(void);
void do_timeout_action(void);
pid_t make_daemon(void);
void maximize_priority(void);
void sbd_get_uname(void);
void sbd_set_format_string(int method, const char *daemon);
void notify_parent(void);
/* Tunable defaults: */
extern unsigned long timeout_watchdog;
extern unsigned long timeout_watchdog_warn;
extern bool do_calculate_timeout_watchdog_warn;
extern unsigned long timeout_watchdog_crashdump;
extern int timeout_allocate;
extern int timeout_loop;
extern int timeout_msgwait;
extern int timeout_io;
extern int timeout_startup;
extern int watchdog_use;
extern int watchdog_set_timeout;
extern int skip_rt;
extern int debug;
extern int debug_mode;
extern char *watchdogdev;
extern bool watchdogdev_is_default;
extern char* local_uname;
extern bool do_flush;
extern char timeout_sysrq_char;
extern bool move_to_root_cgroup;
extern bool enforce_moving_to_root_cgroup;
extern bool sync_resource_startup;
/* Global, non-tunable variables: */
extern int sector_size;
extern int watchdogfd;
extern const char* cmdname;
typedef int (*functionp_t)(const char* devname, int mode, const void* argp);
int assign_servant(const char* devname, functionp_t functionp, int mode, const void* argp);
#if SUPPORT_SHARED_DISK
void open_any_device(struct servants_list_item *servants);
int init_devices(struct servants_list_item *servants);
int allocate_slots(const char *name, struct servants_list_item *servants);
int list_slots(struct servants_list_item *servants);
int ping_via_slots(const char *name, struct servants_list_item *servants);
int dump_headers(struct servants_list_item *servants);
unsigned long get_first_msgwait(struct servants_list_item *servants);
int messenger(const char *name, const char *msg, struct servants_list_item *servants);
int servant_md(const char *diskname, int mode, const void* argp);
#endif
int servant_pcmk(const char *diskname, int mode, const void* argp);
int servant_cluster(const char *diskname, int mode, const void* argp);
struct servants_list_item *lookup_servant_by_dev(const char *devname);
struct servants_list_item *lookup_servant_by_pid(pid_t pid);
int init_set_proc_title(int argc, char *argv[], char *envp[]);
void set_proc_title(const char *fmt,...);
#define cl_log(level, fmt, args...) qb_log_from_external_source( __func__, __FILE__, fmt, level, __LINE__, 0, ##args)
# define cl_perror(fmt, args...) do { \
const char *err = strerror(errno); \
cl_log(LOG_ERR, fmt ": %s (%d)", ##args, err, errno); \
} while(0)
#define DBGLOG(lvl, fmt, args...) do { \
if (debug > 0) cl_log(lvl, fmt, ##args); \
} while(0)
extern int servant_health;
void set_servant_health(enum pcmk_health state, int level, char const *format, ...) __attribute__ ((__format__ (__printf__, 3, 4)));
bool sbd_is_disk(struct servants_list_item *servant);
bool sbd_is_pcmk(struct servants_list_item *servant);
bool sbd_is_cluster(struct servants_list_item *servant);
#define calculate_timeout_watchdog_warn(timeout) \
(timeout < 5 ? 2 : \
(timeout < (ULONG_MAX / 3) ? \
(((unsigned long) timeout) * 3 / 5) : (((unsigned long) timeout) / 5 * 3)))
diff --git a/tests/sbd-testbed.c b/tests/sbd-testbed.c
index 858b1be..91920f2 100644
--- a/tests/sbd-testbed.c
+++ b/tests/sbd-testbed.c
@@ -1,729 +1,745 @@
#define _GNU_SOURCE
#include <stdlib.h>
#include <dlfcn.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/ioctl.h>
#include <sys/reboot.h>
#include <stdarg.h>
#include <stddef.h>
#include <fcntl.h>
#include <libaio.h>
#include <linux/watchdog.h>
#include <linux/fs.h>
#include <stdio.h>
#include <signal.h>
#include <unistd.h>
#include <glib.h>
#include <errno.h>
#if __GLIBC_PREREQ(2,36)
#include <glib-unix.h>
#else
#include <glib/giochannel.h>
typedef gboolean (*GUnixFDSourceFunc) (gint fd,
GIOCondition condition,
gpointer user_data);
static gboolean
GIOFunc2GUnixFDSourceFunc(GIOChannel *source,
GIOCondition condition,
gpointer data)
{
return ((GUnixFDSourceFunc) data) (
g_io_channel_unix_get_fd(source),
condition, NULL);
}
static guint
g_unix_fd_add(gint fd,
GIOCondition condition,
GUnixFDSourceFunc function,
gpointer user_data)
{
GIOChannel *chan = g_io_channel_unix_new (fd);
if (chan == NULL) {
return 0;
} else {
return g_io_add_watch(chan,
condition,
GIOFunc2GUnixFDSourceFunc,
(gpointer) function);
}
}
#endif
typedef int (*orig_open_f_type)(const char *pathname, int flags, ...);
typedef int (*orig_ioctl_f_type)(int fd, unsigned long int request, ...);
typedef ssize_t (*orig_write_f_type)(int fd, const void *buf, size_t count);
typedef int (*orig_close_f_type)(int fd);
typedef FILE *(*orig_fopen_f_type)(const char *pathname, const char *mode);
typedef int (*orig_fclose_f_type)(FILE *fp);
typedef int (*orig_io_setup_f_type)(int nr_events, io_context_t *ctx_idp);
+typedef int (*orig_io_destroy_f_type)(io_context_t ctx_id);
typedef int (*orig_io_submit_f_type)(io_context_t ctx_id, long nr, struct iocb *ios[]);
typedef int (*orig_io_getevents_f_type)(io_context_t ctx_id, long min_nr, long nr,
struct io_event *events, struct timespec *timeout);
typedef int (*orig_io_cancel_f_type)(io_context_t ctx_id, struct iocb *iocb,
struct io_event *result);
static int is_init = 0;
static FILE *log_fp = NULL;
static char *sbd_device[3] = {NULL, NULL, NULL};
static int sbd_device_fd[3] = {-1, -1, -1};
static FILE *sysrq_fp = NULL;
static FILE *sysrq_trigger_fp = NULL;
static char *watchdog_device = NULL;
static int watchdog_device_fd = -1;
static int watchdog_timeout = -1;
static pid_t watchdog_pid = -1;
static int watchdog_pipe[2] = {-1, -1};
static guint watchdog_source_id = 0;
static int watchdog_timer_id = 0;
static orig_open_f_type orig_open = NULL;
static orig_ioctl_f_type orig_ioctl = NULL;
static orig_write_f_type orig_write = NULL;
static orig_close_f_type orig_close = NULL;
static orig_fopen_f_type orig_fopen = NULL;
static orig_fclose_f_type orig_fclose = NULL;
static orig_io_setup_f_type orig_io_setup = NULL;
+static orig_io_destroy_f_type orig_io_destroy = NULL;
static orig_io_submit_f_type orig_io_submit = NULL;
static orig_io_getevents_f_type orig_io_getevents = NULL;
static orig_io_cancel_f_type orig_io_cancel = NULL;
/* fprintf is inlined as __fprintf_chk or
* we have vfprintf.
* For fscanf we have vfscanf.
* For reboot we anyway don't want that to be
* called in any case.
*/
static struct iocb *pending_iocb = NULL;
struct io_context { int context_num; };
static struct io_context our_io_context = {.context_num = 1};
static int translate_aio = 0;
static GMainLoop *mainloop = NULL;
#if 0
static void
watchdog_shutdown(int nsig)
{
if (watchdog_timer_id > 0) {
fprintf(log_fp, "exiting with watchdog-timer armed\n");
}
}
#endif
static void*
dlsym_fatal(void *handle, const char *symbol)
{
void *rv = dlsym(handle, symbol);
if (!rv) {
fprintf(stderr, "Failed looking up symbol %s\n", symbol);
exit(1);
}
return rv;
}
static void
init (void)
{
void *handle;
if (!is_init) {
const char *value;
int i;
char *token, *str, *str_orig;
is_init = 1;
orig_open = (orig_open_f_type)dlsym_fatal(RTLD_NEXT,"open");
orig_ioctl = (orig_ioctl_f_type)dlsym_fatal(RTLD_NEXT,"ioctl");
orig_close = (orig_close_f_type)dlsym_fatal(RTLD_NEXT,"close");
orig_write = (orig_write_f_type)dlsym_fatal(RTLD_NEXT,"write");
orig_fopen = (orig_fopen_f_type)dlsym_fatal(RTLD_NEXT,"fopen");
orig_fclose = (orig_fclose_f_type)dlsym_fatal(RTLD_NEXT,"fclose");
handle = dlopen("libaio.so.1", RTLD_NOW);
if (!handle) {
fprintf(stderr, "Failed opening libaio.so.1\n");
exit(1);
}
orig_io_setup = (orig_io_setup_f_type)dlsym_fatal(handle,"io_setup");
+ orig_io_destroy = (orig_io_destroy_f_type)dlsym_fatal(handle,"io_destroy");
orig_io_submit = (orig_io_submit_f_type)dlsym_fatal(handle,"io_submit");
orig_io_getevents = (orig_io_getevents_f_type)dlsym_fatal(handle,"io_getevents");
orig_io_cancel = (orig_io_cancel_f_type)dlsym_fatal(handle,"io_cancel");
dlclose(handle);
value = getenv("SBD_PRELOAD_LOG");
if (value) {
log_fp = fopen(value, "a");
} else {
int fd = dup(fileno(stderr));
if (fd >= 0) {
log_fp = fdopen(fd, "w");
}
}
if (log_fp == NULL) {
fprintf(stderr, "couldn't open log-file\n");
}
value = getenv("SBD_WATCHDOG_DEV");
if (value) {
watchdog_device = strdup(value);
}
value = getenv("SBD_DEVICE");
if ((value) && (str = str_orig = strdup(value))) {
for (i = 0; i < 3; i++, str = NULL) {
token = strtok(str, ";");
if (token == NULL) {
break;
}
sbd_device[i] = strdup(token);
}
free(str_orig);
}
value = getenv("SBD_TRANSLATE_AIO");
if ((value) && !strcmp(value, "yes")) {
translate_aio = 1;
}
}
}
// ***** end - handling of watchdog & block-devices ****
static gboolean
watchdog_timeout_notify(gpointer data)
{
fprintf(log_fp, "watchdog fired after %ds - killing process group\n",
watchdog_timeout);
fclose(log_fp);
log_fp = NULL;
killpg(0, SIGKILL);
exit(1);
}
static gboolean
watchdog_dispatch_callback (gint fd,
GIOCondition condition,
gpointer user_data)
{
char buf[256];
int i = 0;
if (condition & G_IO_HUP) {
return FALSE;
}
if (watchdog_timer_id > 0) {
g_source_remove(watchdog_timer_id);
}
watchdog_timer_id = 0;
for (i = 0; i < sizeof(buf)-1; i++) {
ssize_t len;
do {
len = read(watchdog_pipe[0], &buf[i], 1);
} while ((len == -1) && (errno == EINTR));
if (len <= 0) {
if (len == -1) {
fprintf(log_fp, "Couldn't read from watchdog-pipe\n");
}
buf[i] = '\0';
break;
}
if (buf[i] == '\n') {
buf[i] = '\0';
break;
}
}
buf[sizeof(buf)-1] = '\0';
if (sscanf(buf, "trigger %ds", &watchdog_timeout) == 1) {
watchdog_timer_id = g_timeout_add(watchdog_timeout * 1000, watchdog_timeout_notify, NULL);
} else if (strcmp(buf, "disarm") == 0) {
// timer is stopped already
} else {
fprintf(log_fp, "unknown watchdog command\n");
}
return TRUE;
}
static void
watchdog_arm (void) {
char buf[256];
if ((watchdog_timeout > 0) && (watchdog_pipe[1] >= 0)) {
sprintf(buf, "trigger %ds\n", watchdog_timeout);
if (write(watchdog_pipe[1], buf, strlen(buf)) != strlen(buf)) {
fprintf(log_fp, "Failed tickling watchdog via pipe\n");
}
}
}
static void
watchdog_disarm (void) {
char buf[256];
watchdog_timeout = -1;
if (watchdog_pipe[1] >= 0) {
sprintf(buf, "disarm\n");
if (write(watchdog_pipe[1], buf, strlen(buf)) != strlen(buf)) {
fprintf(log_fp, "Failed disarming watchdog via pipe\n");
}
}
}
int
open(const char *pathname, int flags, ...)
{
int i, fd;
int devnum = -1;
int is_wd_dev = 0;
va_list ap;
init();
for (i=0; i < 3; i++) {
if (sbd_device[i]) {
if (strcmp(sbd_device[i], pathname) == 0) {
devnum = i;
flags &= ~O_DIRECT;
break;
}
}
}
if (watchdog_device) {
if (strcmp(watchdog_device, pathname) == 0) {
is_wd_dev = 1;
if (watchdog_pipe[1] == -1) {
if (pipe(watchdog_pipe) == -1) {
fprintf(log_fp, "Creating pipe for watchdog failed\n");
} else {
int i;
watchdog_pid = fork();
switch (watchdog_pid) {
case -1:
fprintf(log_fp, "Forking watchdog-child failed\n");
break;
case 0:
free(watchdog_device);
watchdog_device = NULL;
for (i = 0; i < 3; i++) {
free(sbd_device[i]);
sbd_device[i] = NULL;
}
close(watchdog_pipe[1]);
if (fcntl(watchdog_pipe[0], F_SETFL, O_NONBLOCK) == -1) {
// don't block on read for timer to be handled
fprintf(log_fp,
"Failed setting watchdog-pipe-read to non-blocking");
}
mainloop = g_main_loop_new(NULL, FALSE);
// mainloop_add_signal(SIGTERM, watchdog_shutdown);
// mainloop_add_signal(SIGINT, watchdog_shutdown);
watchdog_source_id = g_unix_fd_add(watchdog_pipe[0],
G_IO_IN,
watchdog_dispatch_callback,
NULL);
if (watchdog_source_id == 0) {
fprintf(log_fp, "Failed creating source for watchdog-pipe\n");
exit(1);
}
g_main_loop_run(mainloop);
g_main_loop_unref(mainloop);
exit(0);
default:
close(watchdog_pipe[0]);
if (fcntl(watchdog_pipe[1], F_SETFL, O_NONBLOCK) == -1) {
fprintf(log_fp,
"Failed setting watchdog-pipe-write to non-blocking");
}
}
}
}
pathname = "/dev/null";
}
}
va_start (ap, flags);
fd = (flags & (O_CREAT
#ifdef O_TMPFILE
| O_TMPFILE
#endif
))?
orig_open(pathname, flags, va_arg(ap, mode_t)):
orig_open(pathname, flags);
va_end (ap);
if (devnum >= 0) {
sbd_device_fd[devnum] = fd;
} else if (is_wd_dev) {
watchdog_device_fd = fd;
}
return fd;
}
ssize_t
write(int fd, const void *buf, size_t count)
{
init();
if ((fd == watchdog_device_fd) && (count >= 1)) {
if (*(const char *)buf == 'V') {
watchdog_disarm();
} else {
watchdog_arm();
}
}
return orig_write(fd, buf, count);
}
int
ioctl(int fd, unsigned long int request, ...)
{
int rv = -1;
va_list ap;
int i;
init();
va_start(ap, request);
switch (request) {
case BLKSSZGET:
for (i=0; i < 3; i++) {
if (sbd_device_fd[i] == fd) {
rv = 0;
*(va_arg(ap, int *)) = 512;
break;
}
if (i == 2) {
rv = orig_ioctl(fd, request, va_arg(ap, int *));
}
}
break;
case WDIOC_SETTIMEOUT:
if (fd == watchdog_device_fd) {
watchdog_timeout = *va_arg(ap, int *);
watchdog_arm();
rv = 0;
break;
}
rv = orig_ioctl(fd, request, va_arg(ap, int *));
break;
case WDIOC_SETOPTIONS:
if (fd == watchdog_device_fd) {
int flags = *va_arg(ap, int *);
if (flags & WDIOS_DISABLECARD) {
watchdog_disarm();
}
rv = 0;
break;
}
rv = orig_ioctl(fd, request, va_arg(ap, int *));
break;
case WDIOC_GETSUPPORT:
rv = orig_ioctl(fd, request, va_arg(ap, struct watchdog_info *));
break;
default:
fprintf(log_fp, "ioctl using unknown request = 0x%08lx", request);
rv = orig_ioctl(fd, request, va_arg(ap, void *));
}
va_end(ap);
return rv;
}
int
close(int fd)
{
int i;
init();
if (fd == watchdog_device_fd) {
watchdog_device_fd = -1;
} else {
for (i = 0; i < 3; i++) {
if (sbd_device_fd[i] == fd) {
sbd_device_fd[i] = -1;
break;
}
}
}
return orig_close(fd);
}
// ***** end - handling of watchdog & block-devices ****
// ***** handling of sysrq, sysrq-trigger & reboot ****
FILE *
fopen(const char *pathname, const char *mode)
{
int is_sysrq = 0;
int is_sysrq_trigger = 0;
FILE *fp;
init();
if ((strcmp("/proc/sys/kernel/sysrq", pathname) == 0) &&
strcmp("w", mode)) {
pathname = "/dev/null";
is_sysrq = 1;
} else if (strcmp("/proc/sysrq-trigger", pathname) == 0) {
pathname = "/dev/null";
is_sysrq_trigger = 1;
}
fp = orig_fopen(pathname, mode);
if (is_sysrq) {
sysrq_fp = fp;
} else if (is_sysrq_trigger) {
sysrq_trigger_fp = fp;
}
return fp;
}
int
fclose(FILE *fp)
{
init();
if (fp == sysrq_fp) {
sysrq_fp = NULL;
} else if (fp == sysrq_trigger_fp) {
sysrq_trigger_fp = NULL;
}
return orig_fclose(fp);
}
#if defined(__USE_FORTIFY_LEVEL) && (__USE_FORTIFY_LEVEL > 1)
int
__fprintf_chk(FILE *stream, int flag, const char *format, ...)
#else
int
fprintf(FILE *stream, const char *format, ...)
#endif
{
va_list ap;
int rv;
init();
va_start (ap, format);
if (stream == sysrq_trigger_fp) {
char buf[256];
rv = vsnprintf(buf, sizeof(buf), format, ap);
if (rv >= 1) {
fprintf(log_fp, "sysrq-trigger ('%c') - %s\n", buf[0],
(buf[0] == 'c')?"killing process group":"don't kill but wait for reboot-call");
if (buf[0] == 'c') {
fclose(log_fp);
log_fp = NULL;
killpg(0, SIGKILL);
exit(1);
}
}
} else {
rv = vfprintf(stream, format, ap);
}
va_end (ap);
return rv;
}
int
fscanf(FILE *stream, const char *format, ...)
{
va_list ap;
int rv;
init();
va_start (ap, format);
rv = vfscanf(stream, format, ap);
va_end (ap);
return rv;
}
int
reboot (int __howto)
{
fprintf(log_fp, "reboot (%s) - exiting inquisitor process\n",
(__howto == RB_POWER_OFF)?"poweroff":"reboot");
fclose(log_fp);
log_fp = NULL;
killpg(0, SIGKILL);
exit(1);
}
// ***** end - handling of sysrq, sysrq-trigger & reboot ****
// ***** aio translate ****
#if 0
struct iocb {
void *data;
unsigned key;
short aio_lio_opcode;
short aio_reqprio;
int aio_fildes;
};
static inline void io_prep_pread(struct iocb *iocb, int fd, void *buf, size_t count, long long offset)
{
memset(iocb, 0, sizeof(*iocb));
iocb->aio_fildes = fd;
iocb->aio_lio_opcode = IO_CMD_PREAD;
iocb->aio_reqprio = 0;
iocb->u.c.buf = buf;
iocb->u.c.nbytes = count;
iocb->u.c.offset = offset;
}
static inline void io_prep_pwrite(struct iocb *iocb, int fd, void *buf, size_t count, long long offset)
{
memset(iocb, 0, sizeof(*iocb));
iocb->aio_fildes = fd;
iocb->aio_lio_opcode = IO_CMD_PWRITE;
iocb->aio_reqprio = 0;
iocb->u.c.buf = buf;
iocb->u.c.nbytes = count;
iocb->u.c.offset = offset;
}
#endif
int io_setup(int nr_events, io_context_t *ctx_idp)
{
init();
if (!translate_aio) {
return orig_io_setup(nr_events, ctx_idp);
}
if (nr_events == 0) {
return EINVAL;
}
if (nr_events > 1) {
return EAGAIN;
}
if (ctx_idp == NULL) {
return EFAULT;
}
*ctx_idp = &our_io_context;
return 0;
}
+int io_destroy(io_context_t ctx_id)
+{
+ init();
+
+ if (!translate_aio) {
+ return orig_io_destroy(ctx_id);
+ }
+
+ if (ctx_id != &our_io_context) {
+ return EINVAL;
+ }
+ return 0;
+}
int io_submit(io_context_t ctx_id, long nr, struct iocb *ios[])
{
init();
if (!translate_aio) {
return orig_io_submit(ctx_id, nr, ios);
}
if ((pending_iocb != NULL) ||
(nr > 1)) {
return EAGAIN;
}
if ((nr == 1) && ((ios == NULL) || (ios[0] == NULL))) {
return EFAULT;
}
if ((ctx_id != &our_io_context) ||
(nr < 0) ||
((nr == 1) &&
(ios[0]->aio_lio_opcode != IO_CMD_PREAD) &&
(ios[0]->aio_lio_opcode != IO_CMD_PWRITE))) {
return EINVAL;
}
if ((fcntl(ios[0]->aio_fildes, F_GETFD) == -1) && (errno == EBADF)) {
return EBADF;
}
if (nr == 1) {
pending_iocb = ios[0];
}
return nr;
}
int io_getevents(io_context_t ctx_id, long min_nr, long nr,
struct io_event *events, struct timespec *timeout)
{
init();
if (!translate_aio) {
return orig_io_getevents(ctx_id, min_nr, nr, events, timeout);
}
if ((ctx_id != &our_io_context) ||
(min_nr != 1) ||
(nr != 1)) {
return EINVAL;
}
if (pending_iocb == NULL) {
return 0;
}
switch (pending_iocb->aio_lio_opcode) {
case IO_CMD_PWRITE:
events->res = pwrite(pending_iocb->aio_fildes,
pending_iocb->u.c.buf,
pending_iocb->u.c.nbytes,
pending_iocb->u.c.offset);
break;
case IO_CMD_PREAD:
events->res = pread(pending_iocb->aio_fildes,
pending_iocb->u.c.buf,
pending_iocb->u.c.nbytes,
pending_iocb->u.c.offset);
break;
default:
events->res = 0;
}
events->data = pending_iocb->data;
events->obj = pending_iocb;
events->res2 = 0;
pending_iocb = NULL;
return 1;
}
int io_cancel(io_context_t ctx_id, struct iocb *iocb,
struct io_event *result)
{
init();
if (!translate_aio) {
return orig_io_cancel(ctx_id, iocb, result);
}
if (ctx_id != &our_io_context) {
return EINVAL;
}
if ((iocb == NULL) || (result == NULL)) {
return EFAULT;
}
if (pending_iocb != iocb) {
return EAGAIN;
}
result->data = iocb->data;
result->obj = iocb;
result->res = 0;
result->res2 = 0;
pending_iocb = NULL;
return 0;
}
// ***** end - aio translate ****
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Mon, Apr 21, 6:03 PM (1 d, 3 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1665030
Default Alt Text
(59 KB)
Attached To
Mode
rS SBD
Attached
Detach File
Event Timeline
Log In to Comment