diff --git a/tools/storage_mon.c b/tools/storage_mon.c index f829c5081..b0e277cbe 100644 --- a/tools/storage_mon.c +++ b/tools/storage_mon.c @@ -1,303 +1,312 @@ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef __FreeBSD__ #include #endif #define MAX_DEVICES 25 #define DEFAULT_TIMEOUT 10 static void usage(char *name, FILE *f) { fprintf(f, "usage: %s [-hv] [-d ]... [-s ]... [-t ]\n", name); fprintf(f, " --device device to test, up to %d instances\n", MAX_DEVICES); fprintf(f, " --score score if device fails the test. Must match --device count\n"); fprintf(f, " --timeout max time to wait for a device test to come back. in seconds (default %d)\n", DEFAULT_TIMEOUT); fprintf(f, " --inject-errors-percent Generate EIO errors %% of the time (for testing only)\n"); fprintf(f, " --verbose emit extra output to stdout\n"); fprintf(f, " --help print this message\n"); } /* Check one device */ static void *test_device(const char *device, int verbose, int inject_error_percent) { uint64_t devsize; int flags = O_RDONLY | O_DIRECT; int device_fd; int res; off_t seek_spot; if (verbose) { printf("Testing device %s\n", device); } device_fd = open(device, flags); if (device_fd < 0) { if (errno != EINVAL) { fprintf(stderr, "Failed to open %s: %s\n", device, strerror(errno)); exit(-1); } flags &= ~O_DIRECT; device_fd = open(device, flags); if (device_fd < 0) { fprintf(stderr, "Failed to open %s: %s\n", device, strerror(errno)); exit(-1); } } #ifdef __FreeBSD__ res = ioctl(device_fd, DIOCGMEDIASIZE, &devsize); #else res = ioctl(device_fd, BLKGETSIZE64, &devsize); #endif if (res < 0) { fprintf(stderr, "Failed to get device size for %s: %s\n", device, strerror(errno)); goto error; } if (verbose) { printf("%s: opened %s O_DIRECT, size=%zu\n", device, (flags & O_DIRECT)?"with":"without", devsize); } /* Don't fret about real randomness */ srand(time(NULL) + getpid()); /* Pick a random place on the device - sector aligned */ seek_spot = (rand() % (devsize-1024)) & 0xFFFFFFFFFFFFFE00; res = lseek(device_fd, seek_spot, SEEK_SET); if (res < 0) { fprintf(stderr, "Failed to seek %s: %s\n", device, strerror(errno)); goto error; } if (verbose) { printf("%s: reading from pos %ld\n", device, seek_spot); } if (flags & O_DIRECT) { int sec_size = 0; void *buffer; #ifdef __FreeBSD__ res = ioctl(device_fd, DIOCGSECTORSIZE, &sec_size); #else res = ioctl(device_fd, BLKSSZGET, &sec_size); #endif if (res < 0) { fprintf(stderr, "Failed to get block device sector size for %s: %s\n", device, strerror(errno)); goto error; } if (posix_memalign(&buffer, sysconf(_SC_PAGESIZE), sec_size) != 0) { fprintf(stderr, "Failed to allocate aligned memory: %s\n", strerror(errno)); goto error; } res = read(device_fd, buffer, sec_size); free(buffer); if (res < 0) { fprintf(stderr, "Failed to read %s: %s\n", device, strerror(errno)); goto error; } if (res < sec_size) { fprintf(stderr, "Failed to read %d bytes from %s, got %d\n", sec_size, device, res); goto error; } } else { char buffer[512]; res = read(device_fd, buffer, sizeof(buffer)); if (res < 0) { fprintf(stderr, "Failed to read %s: %s\n", device, strerror(errno)); goto error; } if (res < (int)sizeof(buffer)) { fprintf(stderr, "Failed to read %ld bytes from %s, got %d\n", sizeof(buffer), device, res); goto error; } } /* Fake an error */ if (inject_error_percent && ((rand() % 100) < inject_error_percent)) { fprintf(stderr, "People, please fasten your seatbelts, injecting errors!\n"); goto error; } res = close(device_fd); if (res != 0) { fprintf(stderr, "Failed to close %s: %s\n", device, strerror(errno)); exit(-1); } if (verbose) { printf("%s: done\n", device); } exit(0); error: close(device_fd); exit(-1); } +static int test_device_main(size_t device_count, char *devices[MAX_DEVICES], int scores[MAX_DEVICES], int verbose, int inject_error_percent, int timeout) +{ + pid_t test_forks[MAX_DEVICES]; + size_t i; + struct timespec ts; + time_t start_time; + size_t finished_count = 0; + int final_score = 0; + + memset(test_forks, 0, sizeof(test_forks)); + for (i=0; i ts.tv_sec)) { + for (i=0; i 0) { + w = waitpid(test_forks[i], &wstatus, WUNTRACED | WNOHANG | WCONTINUED); + if (w < 0) { + fprintf(stderr, "waitpid on %s failed: %s\n", devices[i], strerror(errno)); + return -1; + } + + if (w == test_forks[i]) { + if (WIFEXITED(wstatus)) { + if (WEXITSTATUS(wstatus) != 0) { + syslog(LOG_ERR, "Error reading from device %s", devices[i]); + final_score += scores[i]; + } + + finished_count++; + test_forks[i] = 0; + } + } + } + } + + usleep(100000); + + clock_gettime(CLOCK_REALTIME, &ts); + } + + /* See which threads have not finished */ + for (i=0; i 100) { fprintf(stderr, "inject_error_percent should be between 1 and 100\n"); return -1; } } break; case 'd': if (device_count < MAX_DEVICES) { devices[device_count++] = strdup(optarg); } else { fprintf(stderr, "too many devices, max is %d\n", MAX_DEVICES); return -1; } break; case 's': if (score_count < MAX_DEVICES) { int score = atoi(optarg); if (score < 1 || score > 10) { fprintf(stderr, "Score must be between 1 and 10 inclusive\n"); return -1; } scores[score_count++] = score; } else { fprintf(stderr, "too many scores, max is %d\n", MAX_DEVICES); return -1; } break; case 'v': verbose++; break; case 't': timeout = atoi(optarg); if (timeout < 1) { fprintf(stderr, "invalid timeout %d. Min 1, recommended %d (default)\n", timeout, DEFAULT_TIMEOUT); return -1; } break; case 'h': usage(argv[0], stdout); return 0; break; default: usage(argv[0], stderr); return -1; break; } } if (device_count == 0) { fprintf(stderr, "No devices to test, use the -d or --device argument\n"); return -1; } if (device_count != score_count) { fprintf(stderr, "There must be the same number of devices and scores\n"); return -1; } openlog("storage_mon", 0, LOG_DAEMON); - memset(test_forks, 0, sizeof(test_forks)); - for (i=0; i ts.tv_sec)) { - for (i=0; i 0) { - w = waitpid(test_forks[i], &wstatus, WUNTRACED | WNOHANG | WCONTINUED); - if (w < 0) { - fprintf(stderr, "waitpid on %s failed: %s\n", devices[i], strerror(errno)); - return -1; - } - - if (w == test_forks[i]) { - if (WIFEXITED(wstatus)) { - if (WEXITSTATUS(wstatus) != 0) { - syslog(LOG_ERR, "Error reading from device %s", devices[i]); - final_score += scores[i]; - } - - finished_count++; - test_forks[i] = 0; - } - } - } - } - - usleep(100000); - - clock_gettime(CLOCK_REALTIME, &ts); - } - - /* See which threads have not finished */ - for (i=0; i