diff --git a/tools/Makefile.am b/tools/Makefile.am index 1309223b4..08323fee3 100644 --- a/tools/Makefile.am +++ b/tools/Makefile.am @@ -1,83 +1,84 @@ # # heartbeat: Linux-HA heartbeat code # # Copyright (C) 2001 Michael Moerz # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # MAINTAINERCLEANFILES = Makefile.in SUBDIRS = ocft AM_CPPFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include halibdir = $(libexecdir)/heartbeat EXTRA_DIST = ocf-tester.8 sfex_init.8 sbin_PROGRAMS = sbin_SCRIPTS = ocf-tester halib_PROGRAMS = findif \ storage_mon halib_SCRIPTS = man8_MANS = ocf-tester.8 if BUILD_SFEX halib_PROGRAMS += sfex_daemon sbin_PROGRAMS += sfex_init sfex_stat man8_MANS += sfex_init.8 endif if USE_LIBNET halib_PROGRAMS += send_arp send_arp_SOURCES = send_arp.libnet.c send_arp_CFLAGS = @LIBNETDEFINES@ send_arp_LDADD = $(GLIBLIB) -lplumb @LIBNETLIBS@ else if SENDARP_LINUX halib_PROGRAMS += send_arp send_arp_SOURCES = send_arp.linux.c endif if NFSCONVERT halib_SCRIPTS += nfsconvert endif endif sfex_daemon_SOURCES = sfex_daemon.c sfex.h sfex_lib.c sfex_lib.h sfex_daemon_CFLAGS = -D_GNU_SOURCE sfex_daemon_LDADD = $(GLIBLIB) -lplumb -lplumbgpl sfex_init_SOURCES = sfex_init.c sfex.h sfex_lib.c sfex_lib.h sfex_init_CFLAGS = -D_GNU_SOURCE sfex_init_LDADD = $(GLIBLIB) -lplumb -lplumbgpl sfex_stat_SOURCES = sfex_stat.c sfex.h sfex_lib.c sfex_lib.h sfex_stat_CFLAGS = -D_GNU_SOURCE sfex_stat_LDADD = $(GLIBLIB) -lplumb -lplumbgpl findif_SOURCES = findif.c storage_mon_SOURCES = storage_mon.c +storage_mon_CFLAGS = -D_GNU_SOURCE if BUILD_TICKLE halib_PROGRAMS += tickle_tcp tickle_tcp_SOURCES = tickle_tcp.c endif .PHONY: install-exec-hook diff --git a/tools/storage_mon.c b/tools/storage_mon.c index 930ead41c..ba87492fc 100644 --- a/tools/storage_mon.c +++ b/tools/storage_mon.c @@ -1,265 +1,303 @@ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef __FreeBSD__ #include #endif #define MAX_DEVICES 25 #define DEFAULT_TIMEOUT 10 static void usage(char *name, FILE *f) { fprintf(f, "usage: %s [-hv] [-d ]... [-s ]... [-t ]\n", name); fprintf(f, " --device device to test, up to %d instances\n", MAX_DEVICES); fprintf(f, " --score score if device fails the test. Must match --device count\n"); fprintf(f, " --timeout max time to wait for a device test to come back. in seconds (default %d)\n", DEFAULT_TIMEOUT); fprintf(f, " --inject-errors-percent Generate EIO errors %% of the time (for testing only)\n"); fprintf(f, " --verbose emit extra output to stdout\n"); fprintf(f, " --help print this message\n"); } -/* Check one device */ -static void *test_device(const char *device, int verbose, int inject_error_percent) +static int open_device(const char *device, int verbose) { - uint64_t devsize; int device_fd; int res; + uint64_t devsize; off_t seek_spot; - char buffer[512]; - if (verbose) { - printf("Testing device %s\n", device); +#if defined(__linux__) || defined(__FreeBSD__) + device_fd = open(device, O_RDONLY|O_DIRECT); + if (device_fd >= 0) { + return device_fd; + } else if (errno != EINVAL) { + fprintf(stderr, "Failed to open %s: %s\n", device, strerror(errno)); + return -1; } +#endif device_fd = open(device, O_RDONLY); if (device_fd < 0) { fprintf(stderr, "Failed to open %s: %s\n", device, strerror(errno)); - exit(-1); + return -1; } #ifdef __FreeBSD__ res = ioctl(device_fd, DIOCGMEDIASIZE, &devsize); #else res = ioctl(device_fd, BLKGETSIZE64, &devsize); #endif if (res != 0) { fprintf(stderr, "Failed to stat %s: %s\n", device, strerror(errno)); close(device_fd); - exit(-1); + return -1; } if (verbose) { fprintf(stderr, "%s: size=%zu\n", device, devsize); } + /* Don't fret about real randomness */ srand(time(NULL) + getpid()); /* Pick a random place on the device - sector aligned */ seek_spot = (rand() % (devsize-1024)) & 0xFFFFFFFFFFFFFE00; res = lseek(device_fd, seek_spot, SEEK_SET); if (res < 0) { fprintf(stderr, "Failed to seek %s: %s\n", device, strerror(errno)); close(device_fd); - exit(-1); + return -1; } - if (verbose) { printf("%s: reading from pos %ld\n", device, seek_spot); } + return device_fd; +} + +/* Check one device */ +static void *test_device(const char *device, int verbose, int inject_error_percent) +{ + int device_fd; + int sec_size = 0; + int res; + void *buffer; + + if (verbose) { + printf("Testing device %s\n", device); + } + + device_fd = open_device(device, verbose); + if (device_fd < 0) { + exit(-1); + } + + ioctl(device_fd, BLKSSZGET, &sec_size); + if (sec_size == 0) { + fprintf(stderr, "Failed to stat %s: %s\n", device, strerror(errno)); + goto error; + } - res = read(device_fd, buffer, sizeof(buffer)); + if (posix_memalign(&buffer, sysconf(_SC_PAGESIZE), sec_size) != 0) { + fprintf(stderr, "Failed to allocate aligned memory: %s\n", strerror(errno)); + goto error; + } + + res = read(device_fd, buffer, sec_size); + free(buffer); if (res < 0) { fprintf(stderr, "Failed to read %s: %s\n", device, strerror(errno)); - close(device_fd); - exit(-1); + goto error; } - if (res < (int)sizeof(buffer)) { - fprintf(stderr, "Failed to read %ld bytes from %s, got %d\n", sizeof(buffer), device, res); - close(device_fd); - exit(-1); + if (res < sec_size) { + fprintf(stderr, "Failed to read %d bytes from %s, got %d\n", sec_size, device, res); + goto error; } /* Fake an error */ - if (inject_error_percent && ((rand() % 100) < inject_error_percent)) { - fprintf(stderr, "People, please fasten your seatbelts, injecting errors!\n"); - close(device_fd); - exit(-1); + if (inject_error_percent) { + srand(time(NULL) + getpid()); + if ((rand() % 100) < inject_error_percent) { + fprintf(stderr, "People, please fasten your seatbelts, injecting errors!\n"); + goto error; + } } res = close(device_fd); if (res != 0) { fprintf(stderr, "Failed to close %s: %s\n", device, strerror(errno)); - close(device_fd); exit(-1); } if (verbose) { printf("%s: done\n", device); } exit(0); + +error: + close(device_fd); + exit(-1); } int main(int argc, char *argv[]) { char *devices[MAX_DEVICES]; int scores[MAX_DEVICES]; pid_t test_forks[MAX_DEVICES]; size_t device_count = 0; size_t score_count = 0; size_t finished_count = 0; int timeout = DEFAULT_TIMEOUT; struct timespec ts; time_t start_time; size_t i; int final_score = 0; int opt, option_index; int verbose = 0; int inject_error_percent = 0; struct option long_options[] = { {"timeout", required_argument, 0, 't' }, {"device", required_argument, 0, 'd' }, {"score", required_argument, 0, 's' }, {"inject-errors-percent", required_argument, 0, 0 }, {"verbose", no_argument, 0, 'v' }, {"help", no_argument, 0, 'h' }, {0, 0, 0, 0 } }; while ( (opt = getopt_long(argc, argv, "hvt:d:s:", long_options, &option_index)) != -1 ) { switch (opt) { case 0: /* Long-only options */ if (strcmp(long_options[option_index].name, "inject-errors-percent") == 0) { inject_error_percent = atoi(optarg); if (inject_error_percent < 1 || inject_error_percent > 100) { fprintf(stderr, "inject_error_percent should be between 1 and 100\n"); return -1; } } break; case 'd': if (device_count < MAX_DEVICES) { devices[device_count++] = strdup(optarg); } else { fprintf(stderr, "too many devices, max is %d\n", MAX_DEVICES); return -1; } break; case 's': if (score_count < MAX_DEVICES) { int score = atoi(optarg); if (score < 1 || score > 10) { fprintf(stderr, "Score must be between 1 and 10 inclusive\n"); return -1; } scores[score_count++] = score; } else { fprintf(stderr, "too many scores, max is %d\n", MAX_DEVICES); return -1; } break; case 'v': verbose++; break; case 't': timeout = atoi(optarg); if (timeout < 1) { fprintf(stderr, "invalid timeout %d. Min 1, recommended %d (default)\n", timeout, DEFAULT_TIMEOUT); return -1; } break; case 'h': usage(argv[0], stdout); return 0; break; default: usage(argv[0], stderr); return -1; break; } } if (device_count == 0) { fprintf(stderr, "No devices to test, use the -d or --device argument\n"); return -1; } if (device_count != score_count) { fprintf(stderr, "There must be the same number of devices and scores\n"); return -1; } openlog("storage_mon", 0, LOG_DAEMON); memset(test_forks, 0, sizeof(test_forks)); for (i=0; i ts.tv_sec)) { for (i=0; i 0) { w = waitpid(test_forks[i], &wstatus, WUNTRACED | WNOHANG | WCONTINUED); if (w < 0) { fprintf(stderr, "waitpid on %s failed: %s\n", devices[i], strerror(errno)); return -1; } if (w == test_forks[i]) { if (WIFEXITED(wstatus)) { if (WEXITSTATUS(wstatus) != 0) { syslog(LOG_ERR, "Error reading from device %s", devices[i]); final_score += scores[i]; } finished_count++; test_forks[i] = 0; } } } } usleep(100000); clock_gettime(CLOCK_REALTIME, &ts); } /* See which threads have not finished */ for (i=0; i