diff --git a/tools/findif.c b/tools/findif.c index a25395fec..ab108a3c4 100644 --- a/tools/findif.c +++ b/tools/findif.c @@ -1,845 +1,845 @@ /* * findif.c: Finds an interface which can route a given address * * It's really simple to write in C, but hard to write in the shell... * * This code is dependent on IPV4 addressing conventions... * Sorry. * * Copyright (C) 2000 Alan Robertson * Copyright (C) 2001 Matt Soffen * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * * *********************************************************** * * All our arguments come through the environment as OCF * environment variables as below: * * OCF_RESKEY_ip * OCF_RESKEY_broadcast * OCF_RESKEY_nic * OCF_RESKEY_cidr_netmask * * If the CIDR netmask is omitted, we choose the netmask associated with * the route we selected. * * If the broadcast address was omitted, we assume the highest address * in the subnet. * * If the interface is omitted, we choose the interface associated with * the route we selected. * * * See http://www.doom.net/docs/netmask.html for a table explaining * CIDR address format and their relationship to life, the universe * and everything. * */ #include #include #include #include #include #include #include #include #ifdef HAVE_SYS_SOCKET_H #include #endif #ifdef HAVE_SYS_SOCKIO_H #include #endif #include #include #include #ifdef __linux__ #undef __OPTIMIZE__ /* * This gets rid of some silly -Wtraditional warnings on Linux * because the netinet header has some slightly funky constants * in it. */ #endif /* __linux__ */ #include #include #include #include #define DEBUG 0 #define EOS '\0' #define PROCROUTE "/proc/net/route" #define ROUTEPARM "-n get" #ifndef HAVE_STRNLEN /* Any system that don't provide strnlen() only has itself to blame */ #define strnlen(str, max) strlen(str) #endif /* * "route -n get iii.jjj.kkk.lll" can, on Solaris at least, * return the word "default" as the value from "mask" and "dest", * typically if the host is remote, reached over a default route. * We should probably treat such a mask as "0.0.0.0". * * Define "MASK_DEFAULT_TO_ZERO" to enable this interpretation. * * This is better for Solaris and is probably suitable (or irrelevant) * for others OSes also. But if it breaks another OS, then reduce the * "hash-if 1" below to exclude that OS. * (David Lee, Jan 2006) */ #if 1 # define MASK_DEFAULT_TO_ZERO #endif static int OutputInCIDR=0; /* * Different OSes offer different mechnisms to obtain this information. * Not all this can be determined at configure-time; need a run-time element. * * typedef ... SearchRoute ...: * For routines that interface on these mechanisms. * Return code: * <0: mechanism invalid, so try next mechanism * 0: mechanism worked: good answer * >0: mechanism worked: bad answer * On non-zero, errmsg may have been filled with an error message */ typedef int SearchRoute (char *address, struct in_addr *in , struct in_addr *addr_out, char *best_if, size_t best_iflen , unsigned long *best_netmask, char *errmsg , int errmsglen); static SearchRoute SearchUsingProcRoute; static SearchRoute SearchUsingRouteCmd; static SearchRoute *search_mechs[] = { &SearchUsingProcRoute, &SearchUsingRouteCmd, NULL }; void GetAddress (char **address, char **netmaskbits , char **bcast_arg, char **if_specified); int ConvertNetmaskBitsToInt(char *netmaskbits); void ValidateNetmaskBits(int bits, unsigned long *netmask); int ValidateIFName (const char *ifname, struct ifreq *ifr); int netmask_bits (unsigned long netmask); char * get_first_loopback_netdev(char * ifname); int is_loopback_interface(char * ifname); char * get_ifname(char * buf, char * ifname); int ConvertQuadToInt(char *dest); static const char *cmdname = "findif"; #define OCF_SUCCESS 0 #define OCF_ERR_GENERIC 1 #define OCF_ERR_ARGS 2 #define OCF_ERR_UNIMPLEMENTED 3 #define OCF_ERR_PERM 4 #define OCF_ERR_INSTALLED 5 #define OCF_ERR_CONFIGURED 6 #define OCF_NOT_RUNNING 7 void usage(int ec); #define PATH_PROC_NET_DEV "/proc/net/dev" #define DELIM '/' #define BAD_BROADCAST (0L) #define MAXSTR 128 static int SearchUsingProcRoute (char *address, struct in_addr *in , struct in_addr *addr_out, char *best_if, size_t best_iflen , unsigned long *best_netmask , char *errmsg, int errmsglen) { unsigned long flags, refcnt, use, gw, mask; unsigned long dest; long metric = LONG_MAX; long best_metric = LONG_MAX; int rc = OCF_SUCCESS; char buf[2048]; char interface[MAXSTR]; FILE *routefd = NULL; if ((routefd = fopen(PROCROUTE, "r")) == NULL) { snprintf(errmsg, errmsglen , "Cannot open %s for reading" , PROCROUTE); rc = OCF_ERR_GENERIC; goto out; } /* Skip first (header) line */ if (fgets(buf, sizeof(buf), routefd) == NULL) { snprintf(errmsg, errmsglen , "Cannot skip first line from %s" , PROCROUTE); rc = OCF_ERR_GENERIC; goto out; } *best_netmask = 0; while (fgets(buf, sizeof(buf), routefd) != NULL) { if (sscanf(buf, "%[^\t]\t%lx%lx%lx%lx%lx%lx%lx" , interface, &dest, &gw, &flags, &refcnt, &use , &metric, &mask) != 8) { snprintf(errmsg, errmsglen, "Bad line in %s: %s" , PROCROUTE, buf); rc = OCF_ERR_GENERIC; goto out; } if ( (in->s_addr&mask) == (in_addr_t)(dest&mask) && metric <= best_metric && mask >= *best_netmask) { best_metric = metric; *best_netmask = mask; strncpy(best_if, interface, best_iflen); } } if (best_metric == LONG_MAX) { snprintf(errmsg, errmsglen, "No route to %s\n", address); rc = OCF_ERR_GENERIC; } out: if (routefd) { fclose(routefd); } return(rc); } static int SearchUsingRouteCmd (char *address, struct in_addr *in , struct in_addr *addr_out, char *best_if, size_t best_iflen , unsigned long *best_netmask , char *errmsg, int errmsglen) { char mask[20]; char routecmd[MAXSTR]; int best_metric = INT_MAX; char buf[2048]; char interface[MAXSTR]; char *cp, *sp; int done = 0; FILE *routefd = NULL; uint32_t maskbits; /* Open route and get the information */ snprintf (routecmd, sizeof(routecmd), "%s %s %s" , ROUTE, ROUTEPARM, address); routefd = popen (routecmd, "r"); if (routefd == NULL) return (OCF_ERR_GENERIC); mask[0] = EOS; interface[0] = EOS; while ((done < 3) && fgets(buf, sizeof(buf), routefd)) { int buflen = strnlen(buf, sizeof(buf)); /*cp = buf;*/ sp = buf + buflen; while (sp!=buf && isspace((int)*(sp-1))) { --sp; } *sp = EOS; if (strstr (buf, "mask:")) { /*strsep(&cp, ":");cp++;*/ strtok(buf, ":"); cp = strtok(NULL, ":"); if (cp) { cp++; strncpy(mask, cp, sizeof(mask) - 1); *(mask + sizeof(mask) - 1) = '\0'; done++; } } if (strstr (buf, "interface:")) { /*strsep(&cp, ":");cp++;*/ strtok(buf, ":"); cp = strtok(NULL, ":"); if (cp) { cp++; strncpy(interface, cp, sizeof(interface) - 1); *(interface + sizeof(interface) - 1) = '\0'; done++; } } } pclose(routefd); /* * Check to see if mask isn't available. It may not be * returned if multiple IP's are defined. * use 255.255.255.255 for mask then */ /* I'm pretty sure this is the wrong behavior... * I think the right behavior is to declare an error and give up. * The admin didn't define his routes correctly. Fix them. * It's useless to take over an IP address with no way to * return packets to the originator. Without the right subnet * mask, you can't reply to any packets you receive. */ if (strnlen(mask, sizeof(mask)) == 0) { strncpy (mask, "255.255.255.255", sizeof(mask)); } /* * Solaris (at least) can return the word "default" for mask and dest. * For the moment, let's interpret this as: * mask: 0.0.0.0 * This was manifesting itself under "BasicSanityCheck", which tries * to use a remote IP number; these typically use the "default" route. * Better schemes are warmly invited... */ #ifdef MASK_DEFAULT_TO_ZERO if (strncmp(mask, "default", sizeof("default")) == 0) { strncpy (mask, "0.0.0.0", sizeof(mask)); } #endif if (inet_pton(AF_INET, mask, &maskbits) <= 0) { snprintf(errmsg, errmsglen, "mask [%s] not valid.", mask); return(OCF_ERR_CONFIGURED); } if (inet_pton(AF_INET, address, addr_out) <= 0) { snprintf(errmsg, errmsglen , "IP address [%s] not valid.", address); return(OCF_ERR_CONFIGURED); } if ((in->s_addr & maskbits) == (addr_out->s_addr & maskbits)) { if (interface[0] == EOS) { snprintf(errmsg, errmsglen, "No interface found."); return(OCF_ERR_GENERIC); } best_metric = 0; *best_netmask = maskbits; strncpy(best_if, interface, best_iflen); } if (best_metric == INT_MAX) { snprintf(errmsg, errmsglen, "No route to %s\n", address); return(OCF_ERR_GENERIC); } return (OCF_SUCCESS); } /* * Getaddress gets all its real parameters from the OCF environment * variables that its callers already use. */ void GetAddress (char **address, char **netmaskbits , char **bcast_arg, char **if_specified) { /* * Here are out input environment variables: * * OCF_RESKEY_ip ip address * OCF_RESKEY_cidr_netmask netmask of interface * OCF_RESKEY_broadcast broadcast address for interface * OCF_RESKEY_nic interface to assign to * */ *address = getenv("OCF_RESKEY_ip"); *netmaskbits = getenv("OCF_RESKEY_cidr_netmask"); if (*netmaskbits == NULL || **netmaskbits == EOS) { *netmaskbits = getenv("OCF_RESKEY_netmask"); } *bcast_arg = getenv("OCF_RESKEY_broadcast"); *if_specified = getenv("OCF_RESKEY_nic"); } int ConvertNetmaskBitsToInt(char *netmaskbits) { size_t nmblen = strnlen(netmaskbits, 3); /* Maximum netmask is 32 */ if (nmblen > 2 || nmblen == 0 || (strspn(netmaskbits, "0123456789") != nmblen)) return -1; else return atoi(netmaskbits); } void ValidateNetmaskBits(int bits, unsigned long *netmask) { /* Maximum netmask is 32 */ if (bits < 1 || bits > 32) { fprintf(stderr , "Invalid netmask specification [%d]" , bits); usage(OCF_ERR_CONFIGURED); /*not reached */ } bits = 32 - bits; *netmask = (1L<<(bits))-1L; *netmask = ((~(*netmask))&0xffffffffUL); *netmask = htonl(*netmask); } int ValidateIFName(const char *ifname, struct ifreq *ifr) { int skfd = -1; char *colonptr; if ( (skfd = socket(PF_INET, SOCK_DGRAM, 0)) == -1 ) { fprintf(stderr, "%s\n", strerror(errno)); return -2; } strncpy(ifr->ifr_name, ifname, IFNAMSIZ - 1); *(ifr->ifr_name + sizeof(ifr->ifr_name) - 1) = '\0'; /* Contain a ":"? Probably an error, but treat as warning at present */ if ((colonptr = strchr(ifname, ':')) != NULL) { fprintf(stderr, "%s: warning: name may be invalid\n", ifr->ifr_name); } if (ioctl(skfd, SIOCGIFFLAGS, ifr) < 0) { fprintf(stderr, "%s: unknown interface: %s\n" , ifr->ifr_name, strerror(errno)); close(skfd); /* return -1 only if ifname is known to be invalid */ return -1; } close(skfd); return 0; } int netmask_bits(unsigned long netmask) { int j; netmask = netmask & 0xFFFFFFFFUL; for (j=0; j <= 32; ++j) { if ((netmask >> j)&0x1) { break; } } return 32 - j; } char * get_first_loopback_netdev(char * output) { char buf[512]; FILE * fd = NULL; char *rc = NULL; if (!output) { fprintf(stderr, "output buf is a null pointer.\n"); goto out; } fd = fopen(PATH_PROC_NET_DEV, "r"); if (!fd) { fprintf(stderr, "Warning: cannot open %s (%s).\n", PATH_PROC_NET_DEV, strerror(errno)); goto out; } /* Skip the first two lines */ if (!fgets(buf, sizeof(buf), fd) || !fgets(buf, sizeof(buf), fd)) { fprintf(stderr, "Warning: cannot read header from %s.\n", PATH_PROC_NET_DEV); goto out; } while (fgets(buf, sizeof(buf), fd)) { char name[IFNAMSIZ]; if (NULL == get_ifname(buf, name)) { /* Maybe somethin is wrong, anyway continue */ continue; } if (is_loopback_interface(name)) { strncpy(output, name, IFNAMSIZ); rc = output; goto out; } } out: if (fd) { fclose(fd); } return rc; } int is_loopback_interface(char * ifname) { struct ifreq ifr; memset(&ifr, 0, sizeof(ifr)); if (ValidateIFName(ifname, &ifr) < 0) return 0; if (ifr.ifr_flags & IFF_LOOPBACK) { /* this is a loopback device. */ return 1; } else { return 0; } } char * get_ifname(char * buf, char * ifname) { char * start, * end, * buf_border; buf_border = buf + strnlen(buf, 512); start = buf; while (isspace((int) *start) && (start != buf_border)) { start++; } end = start; while ((*end != ':') && (end != buf_border)) { end++; } if ( start == buf_border || end == buf_border ) { /* Over the border of buf */ return NULL; } *end = '\0'; strncpy(ifname, start, IFNAMSIZ); return ifname; } int ConvertQuadToInt(char *dest) { struct in_addr ad; if (inet_pton(AF_INET, dest, &ad) <= 0) return -1; return netmask_bits(ntohl(ad.s_addr)); } int main(int argc, char ** argv) { char * address = NULL; char * bcast_arg = NULL; char * netmaskbits = NULL; struct in_addr in; struct in_addr addr_out; unsigned long netmask = 0; char best_if[MAXSTR]; char * if_specified = NULL; struct ifreq ifr; unsigned long best_netmask = UINT_MAX; int argerrs = 0; int nmbits; cmdname=argv[0]; memset(&addr_out, 0, sizeof(addr_out)); memset(&in, 0, sizeof(in)); memset(&ifr, 0, sizeof(ifr)); switch (argc) { case 1: /* No -C argument */ break; case 2: /* Hopefully a -C argument */ if (strncmp(argv[1], "-C", sizeof("-C")) != 0) { argerrs=1; } OutputInCIDR=1; break; default: argerrs=1; break; } if (argerrs) { usage(OCF_ERR_ARGS); /* not reached */ return(1); } GetAddress (&address, &netmaskbits, &bcast_arg , &if_specified); if (address == NULL || *address == EOS) { fprintf(stderr, "ERROR: IP address parameter is mandatory."); usage(OCF_ERR_CONFIGURED); /* not reached */ } /* Is the IP address we're supposed to find valid? */ if (inet_pton(AF_INET, address, (void *)&in) <= 0) { fprintf(stderr, "IP address [%s] not valid.", address); usage(OCF_ERR_CONFIGURED); /* not reached */ } if (netmaskbits != NULL && *netmaskbits != EOS) { if (strchr(netmaskbits, '.') != NULL) { nmbits = ConvertQuadToInt(netmaskbits); fprintf(stderr, "Converted dotted-quad netmask to CIDR as: %d\n", nmbits); }else{ nmbits = ConvertNetmaskBitsToInt(netmaskbits); } if (nmbits < 0) { fprintf(stderr, "Invalid netmask specification" " [%s]", netmaskbits); usage(OCF_ERR_CONFIGURED); /*not reached */ } /* Validate the netmaskbits field */ ValidateNetmaskBits (nmbits, &netmask); } if (if_specified != NULL && *if_specified != EOS) { if(ValidateIFName(if_specified, &ifr) < 0) { usage(OCF_ERR_CONFIGURED); /* not reached */ } strncpy(best_if, if_specified, sizeof(best_if) - 1); *(best_if + sizeof(best_if) - 1) = '\0'; }else{ SearchRoute **sr = search_mechs; char errmsg[MAXSTR] = "No valid mechanisms"; int rc = OCF_ERR_GENERIC; strcpy(best_if, "UNKNOWN"); while (*sr) { errmsg[0] = '\0'; rc = (*sr) (address, &in, &addr_out, best_if , sizeof(best_if) , &best_netmask, errmsg, sizeof(errmsg)); if (!rc) { /* Mechanism worked */ break; } sr++; } if (rc != 0) { /* No route, or all mechanisms failed */ if (*errmsg) { fprintf(stderr, "%s", errmsg); } return(rc); } } - if (netmaskbits) { + if (netmaskbits != NULL && *netmaskbits != EOS) { best_netmask = netmask; }else if (best_netmask == 0L) { /* On some distributions, there is no loopback related route item, this leads to the error here. My fix may be not good enough, please FIXME */ if (0 == strncmp(address, "127", 3)) { if (NULL != get_first_loopback_netdev(best_if)) { best_netmask = 0x000000ff; } else { fprintf(stderr, "No loopback interface found.\n"); return(OCF_ERR_GENERIC); } } else { fprintf(stderr , "ERROR: Cannot use default route w/o netmask [%s]\n" , address); return(OCF_ERR_GENERIC); } } /* Did they tell us the broadcast address? */ if (bcast_arg && *bcast_arg != EOS) { /* Yes, they gave us a broadcast address. * It at least should be a valid IP address */ struct in_addr bcast_addr; if (inet_pton(AF_INET, bcast_arg, (void *)&bcast_addr) <= 0) { fprintf(stderr, "Invalid broadcast address [%s].", bcast_arg); usage(OCF_ERR_CONFIGURED); /* not reached */ } best_netmask = htonl(best_netmask); if (!OutputInCIDR) { printf("%s\tnetmask %d.%d.%d.%d\tbroadcast %s\n" , best_if , (int)((best_netmask>>24) & 0xff) , (int)((best_netmask>>16) & 0xff) , (int)((best_netmask>>8) & 0xff) , (int)(best_netmask & 0xff) , bcast_arg); }else{ printf("%s\tnetmask %d\tbroadcast %s\n" , best_if , netmask_bits(best_netmask) , bcast_arg); } }else{ /* No, we use a common broadcast address convention */ unsigned long def_bcast; /* Common broadcast address */ def_bcast = (in.s_addr | (~best_netmask)); #if DEBUG fprintf(stderr, "best_netmask = %08lx, def_bcast = %08lx\n" , best_netmask, def_bcast); #endif /* Make things a bit more machine-independent */ best_netmask = htonl(best_netmask); def_bcast = htonl(def_bcast); if (!OutputInCIDR) { printf("%s\tnetmask %d.%d.%d.%d\tbroadcast %d.%d.%d.%d\n" , best_if , (int)((best_netmask>>24) & 0xff) , (int)((best_netmask>>16) & 0xff) , (int)((best_netmask>>8) & 0xff) , (int)(best_netmask & 0xff) , (int)((def_bcast>>24) & 0xff) , (int)((def_bcast>>16) & 0xff) , (int)((def_bcast>>8) & 0xff) , (int)(def_bcast & 0xff)); }else{ printf("%s\tnetmask %d\tbroadcast %d.%d.%d.%d\n" , best_if , netmask_bits(best_netmask) , (int)((def_bcast>>24) & 0xff) , (int)((def_bcast>>16) & 0xff) , (int)((def_bcast>>8) & 0xff) , (int)(def_bcast & 0xff)); } } return(0); } void usage(int ec) { fprintf(stderr, "\n" "%s version 2.99.1 Copyright Alan Robertson\n" "\n" "Usage: %s [-C]\n" "Options:\n" " -C: Output netmask as the number of bits rather " "than as 4 octets.\n" "Environment variables:\n" "OCF_RESKEY_ip ip address (mandatory!)\n" "OCF_RESKEY_cidr_netmask netmask of interface\n" "OCF_RESKEY_broadcast broadcast address for interface\n" "OCF_RESKEY_nic interface to assign to\n" , cmdname, cmdname); exit(ec); } /* Iface Destination Gateway Flags RefCnt Use Metric Mask MTU Window IRTT eth0 33D60987 00000000 0005 0 0 0 FFFFFFFF 0 0 0 eth0 00D60987 00000000 0001 0 0 0 00FFFFFF 0 0 0 lo 0000007F 00000000 0001 0 0 0 000000FF 0 0 0 eth0 00000000 FED60987 0003 0 0 0 00000000 0 0 0 netstat -rn outpug from RedHat Linux 6.0 Kernel IP routing table Destination Gateway Genmask Flags MSS Window irtt Iface 192.168.85.2 0.0.0.0 255.255.255.255 UH 0 0 0 eth1 10.0.0.2 0.0.0.0 255.255.255.255 UH 0 0 0 eth2 208.132.134.61 0.0.0.0 255.255.255.255 UH 0 0 0 eth0 208.132.134.32 0.0.0.0 255.255.255.224 U 0 0 0 eth0 192.168.85.0 0.0.0.0 255.255.255.0 U 0 0 0 eth1 10.0.0.0 0.0.0.0 255.255.255.0 U 0 0 0 eth2 127.0.0.0 0.0.0.0 255.0.0.0 U 0 0 0 lo 0.0.0.0 208.132.134.33 0.0.0.0 UG 0 0 0 eth0 |-------------------------------------------------------------------------------- netstat -rn output from FreeBSD 3.3 Routing tables Internet: Destination Gateway Flags Refs Use Netif Expire default 209.61.94.161 UGSc 3 8 pn0 192.168 link#1 UC 0 0 xl0 192.168.0.2 0:60:8:a4:91:fd UHLW 0 38 lo0 192.168.0.255 ff:ff:ff:ff:ff:ff UHLWb 1 7877 xl0 209.61.94.160/29 link#2 UC 0 0 pn0 209.61.94.161 0:a0:cc:26:c2:ea UHLW 6 17265 pn0 1105 209.61.94.162 0:a0:cc:27:1c:fb UHLW 1 568 pn0 1098 209.61.94.163 0:a0:cc:29:1f:86 UHLW 0 4749 pn0 1095 209.61.94.166 0:a0:cc:27:2d:e1 UHLW 0 12 lo0 209.61.94.167 ff:ff:ff:ff:ff:ff UHLWb 0 10578 pn0 |-------------------------------------------------------------------------------- netstat -rn output from FreeBSD 4.2 Routing tables Internet: Destination Gateway Flags Refs Use Netif Expire default 64.65.195.1 UGSc 1 11 dc0 64.65.195/24 link#1 UC 0 0 dc0 => 64.65.195.1 0:3:42:3b:0:dd UHLW 2 0 dc0 1131 64.65.195.184 0:a0:cc:29:1f:86 UHLW 2 18098 dc0 1119 64.65.195.194 0:a0:cc:27:2d:e1 UHLW 3 335161 dc0 943 64.65.195.200 52:54:0:db:33:b3 UHLW 0 13 dc0 406 64.65.195.255 ff:ff:ff:ff:ff:ff UHLWb 1 584 dc0 127.0.0.1 127.0.0.1 UH 0 0 lo0 192.168/16 link#2 UC 0 0 vx0 => 192.168.0.1 0:20:af:e2:f0:36 UHLW 0 2 lo0 192.168.255.255 ff:ff:ff:ff:ff:ff UHLWb 0 1 vx0 Internet6: Destination Gateway Flags Netif Expire ::1 ::1 UH lo0 fe80::%dc0/64 link#1 UC dc0 fe80::%vx0/64 link#2 UC vx0 fe80::%lo0/64 fe80::1%lo0 Uc lo0 ff01::/32 ::1 U lo0 ff02::%dc0/32 link#1 UC dc0 ff02::%vx0/32 link#2 UC vx0 ff02::%lo0/32 fe80::1%lo0 UC lo0 */ diff --git a/tools/storage_mon.c b/tools/storage_mon.c index 1aae29e58..cc415e97f 100644 --- a/tools/storage_mon.c +++ b/tools/storage_mon.c @@ -1,891 +1,898 @@ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef __FreeBSD__ #include #endif #include #include #include #include #include #include #include #include #include #define MAX_DEVICES 25 #define DEFAULT_TIMEOUT 10 #define DEFAULT_INTERVAL 30 #define DEFAULT_PIDFILE HA_VARRUNDIR "storage_mon.pid" #define DEFAULT_ATTRNAME "#health-storage_mon" #define SMON_GET_RESULT_COMMAND "get_check_value" #define SMON_RESULT_OK "green" #define SMON_RESULT_NG "red" #define SMON_RESULT_COMMAND_ERROR "unknown command" #define SMON_BUFF_1MEG 1048576 #define SMON_MAX_IPCSNAME 256 #define SMON_MAX_MSGSIZE 128 #define SMON_MAX_RESP_SIZE 100 #define PRINT_STORAGE_MON_ERR(fmt, ...) if (!daemonize) { \ fprintf(stderr, fmt"\n", __VA_ARGS__); \ } else { \ syslog(LOG_ERR, fmt, __VA_ARGS__); \ } #define PRINT_STORAGE_MON_ERR_NOARGS(str) if (!daemonize) { \ fprintf(stderr, str"\n"); \ } else { \ syslog(LOG_ERR, str); \ } #define PRINT_STORAGE_MON_INFO(fmt, ...) if (!daemonize) { \ printf(fmt"\n", __VA_ARGS__); \ } else { \ syslog(LOG_INFO, fmt, __VA_ARGS__); \ } struct storage_mon_timer_data { int interval; }; struct storage_mon_check_value_req { struct qb_ipc_request_header hdr; char message[SMON_MAX_MSGSIZE]; }; struct storage_mon_check_value_res { struct qb_ipc_response_header hdr; char message[SMON_MAX_MSGSIZE]; }; char *devices[MAX_DEVICES]; int scores[MAX_DEVICES]; size_t device_count = 0; int timeout = DEFAULT_TIMEOUT; int verbose = 0; int inject_error_percent = 0; const char *attrname = DEFAULT_ATTRNAME; gboolean daemonize = FALSE; int shutting_down = FALSE; static qb_ipcs_service_t *ipcs; int final_score = 0; int response_final_score = 0; pid_t test_forks[MAX_DEVICES]; size_t finished_count = 0; gboolean daemon_check_first_all_devices = FALSE; static qb_loop_t *storage_mon_poll_handle; static qb_loop_timer_handle timer_handle; static qb_loop_timer_handle expire_handle; static struct storage_mon_timer_data timer_d; static int test_device_main(gpointer data); static void wrap_test_device_main(void *data); static void usage(char *name, FILE *f) { fprintf(f, "usage: %s [-hv] [-d ]... [-s ]... [-t ]\n", name); fprintf(f, " --device device to test, up to %d instances\n", MAX_DEVICES); fprintf(f, " --score score if device fails the test. Must match --device count\n"); fprintf(f, " --timeout max time to wait for a device test to come back. in seconds (default %d)\n", DEFAULT_TIMEOUT); fprintf(f, " --inject-errors-percent Generate EIO errors %% of the time (for testing only)\n"); fprintf(f, " --daemonize test run in daemons.\n"); fprintf(f, " --client client connection to daemon. requires the attrname option.\n"); fprintf(f, " --interval interval to test. in seconds (default %d)(for daemonize only)\n", DEFAULT_INTERVAL); fprintf(f, " --pidfile file path to record pid (default %s)(for daemonize only)\n", DEFAULT_PIDFILE); fprintf(f, " --attrname attribute name to update test result (default %s)(for daemonize/client only)\n", DEFAULT_ATTRNAME); fprintf(f, " --verbose emit extra output to stdout\n"); fprintf(f, " --help print this message\n"); } /* Check one device */ static void *test_device(const char *device, int verbose, int inject_error_percent) { uint64_t devsize; int flags = O_RDONLY | O_DIRECT; int device_fd; int res; off_t seek_spot; if (verbose) { printf("Testing device %s\n", device); } device_fd = open(device, flags); if (device_fd < 0) { if (errno != EINVAL) { PRINT_STORAGE_MON_ERR("Failed to open %s: %s", device, strerror(errno)); exit(-1); } flags &= ~O_DIRECT; device_fd = open(device, flags); if (device_fd < 0) { PRINT_STORAGE_MON_ERR("Failed to open %s: %s", device, strerror(errno)); exit(-1); } } #ifdef __FreeBSD__ res = ioctl(device_fd, DIOCGMEDIASIZE, &devsize); #else res = ioctl(device_fd, BLKGETSIZE64, &devsize); #endif if (res < 0) { PRINT_STORAGE_MON_ERR("Failed to get device size for %s: %s", device, strerror(errno)); goto error; } if (verbose) { PRINT_STORAGE_MON_INFO("%s: opened %s O_DIRECT, size=%zu", device, (flags & O_DIRECT)?"with":"without", devsize); } /* Don't fret about real randomness */ srand(time(NULL) + getpid()); /* Pick a random place on the device - sector aligned */ seek_spot = (rand() % (devsize-1024)) & 0xFFFFFFFFFFFFFE00; res = lseek(device_fd, seek_spot, SEEK_SET); if (res < 0) { PRINT_STORAGE_MON_ERR("Failed to seek %s: %s", device, strerror(errno)); goto error; } if (verbose) { PRINT_STORAGE_MON_INFO("%s: reading from pos %ld", device, seek_spot); } if (flags & O_DIRECT) { int sec_size = 0; void *buffer; #ifdef __FreeBSD__ res = ioctl(device_fd, DIOCGSECTORSIZE, &sec_size); #else res = ioctl(device_fd, BLKSSZGET, &sec_size); #endif if (res < 0) { PRINT_STORAGE_MON_ERR("Failed to get block device sector size for %s: %s", device, strerror(errno)); goto error; } if (posix_memalign(&buffer, sysconf(_SC_PAGESIZE), sec_size) != 0) { PRINT_STORAGE_MON_ERR("Failed to allocate aligned memory: %s", strerror(errno)); goto error; } res = read(device_fd, buffer, sec_size); free(buffer); if (res < 0) { PRINT_STORAGE_MON_ERR("Failed to read %s: %s", device, strerror(errno)); goto error; } if (res < sec_size) { PRINT_STORAGE_MON_ERR("Failed to read %d bytes from %s, got %d", sec_size, device, res); goto error; } } else { char buffer[512]; res = read(device_fd, buffer, sizeof(buffer)); if (res < 0) { PRINT_STORAGE_MON_ERR("Failed to read %s: %s", device, strerror(errno)); goto error; } if (res < (int)sizeof(buffer)) { PRINT_STORAGE_MON_ERR("Failed to read %ld bytes from %s, got %d", sizeof(buffer), device, res); goto error; } } /* Fake an error */ if (inject_error_percent && ((rand() % 100) < inject_error_percent)) { PRINT_STORAGE_MON_ERR_NOARGS("People, please fasten your seatbelts, injecting errors!"); goto error; } res = close(device_fd); if (res != 0) { PRINT_STORAGE_MON_ERR("Failed to close %s: %s", device, strerror(errno)); exit(-1); } if (verbose) { PRINT_STORAGE_MON_INFO("%s: done", device); } exit(0); error: close(device_fd); exit(-1); } static gboolean is_child_runnning(void) { size_t i; for (i=0; i 0 ) { stop_child(test_forks[i], SIGTERM); } } } /* Set a timer for termination. */ qb_loop_timer_add(storage_mon_poll_handle, QB_LOOP_HIGH, 0, NULL, wrap_test_device_main, &timer_handle); return 0; } static size_t find_child_pid(int pid) { size_t i; for (i=0; i 0 ) { if (test_forks[i] == pid) { return i; } } } return -1; } static int32_t sigchld_handler(int32_t sig, void *data) { pid_t pid; size_t index; int status; if (is_child_runnning()) { while(1) { pid = waitpid(-1, &status, WNOHANG); if (pid > 0) { if (WIFEXITED(status)) { index = find_child_pid(pid); if (index >= 0) { /* If the expire timer is running, no timeout has occurred, */ /* so add the final_score from the exit code of the terminated child process. */ if (qb_loop_timer_is_running(storage_mon_poll_handle, expire_handle)) { if (WEXITSTATUS(status) !=0) { final_score += scores[index]; /* Update response values immediately in preparation for inquiries from clients. */ response_final_score = final_score; /* Even in the first demon mode check, if there is an error device, clear */ /* the flag to return the response to the client without waiting for all devices to finish. */ daemon_check_first_all_devices = TRUE; } } finished_count++; test_forks[index] = 0; } } } else { break; } } } return 0; } static void child_shutdown(int nsig) { exit(1); } static int write_pid_file(const char *pidfile) { char *pid; char *dir, *str = NULL; int fd = -1; int rc = -1; int i, len; if (asprintf(&pid, "%jd", (intmax_t)getpid()) < 0) { syslog(LOG_ERR, "Failed to allocate memory to store PID"); pid = NULL; goto done; } str = strdup(pidfile); if (str == NULL) { syslog(LOG_ERR, "Failed to duplicate string ['%s']", pidfile); goto done; } dir = dirname(str); for (i = 1, len = strlen(dir); i < len; i++) { if (dir[i] == '/') { dir[i] = 0; if ((mkdir(dir, 0640) < 0) && (errno != EEXIST)) { syslog(LOG_ERR, "Failed to create directory %s: %s", dir, strerror(errno)); goto done; } dir[i] = '/'; } } if ((mkdir(dir, 0640) < 0) && (errno != EEXIST)) { syslog(LOG_ERR, "Failed to create directory %s: %s", dir, strerror(errno)); goto done; } fd = open(pidfile, O_CREAT | O_WRONLY, 0640); if (fd < 0) { syslog(LOG_ERR, "Failed to open %s: %s", pidfile, strerror(errno)); goto done; } if (write(fd, pid, strlen(pid)) != strlen(pid)) { syslog(LOG_ERR, "Failed to write '%s' to %s: %s", pid, pidfile, strerror(errno)); goto done; } - close(fd); rc = 0; done: + if (fd != -1) { + close(fd); + } if (pid != NULL) { free(pid); } if (str != NULL) { free(str); } return rc; } static void child_timeout_handler(void *data) { size_t i; if (is_child_runnning()) { for (i=0; i 0) { /* If timeout occurs before SIGCHLD, add child process failure score to final_score. */ final_score += scores[i]; /* Update response values immediately in preparation for inquiries from clients. */ response_final_score = final_score; /* Even in the first demon mode check, if there is an error device, clear */ /* the flag to return the response to the client without waiting for all devices to finish. */ daemon_check_first_all_devices = TRUE; } } } } static void wrap_test_device_main(void *data) { struct storage_mon_timer_data *timer_data = (struct storage_mon_timer_data*)data; test_device_main((timer_data != NULL) ? &timer_data->interval : NULL); } static int test_device_main(gpointer data) { size_t i; struct timespec ts; time_t start_time; gboolean device_check = TRUE; if (daemonize) { if (shutting_down == TRUE) { goto done; } /* In the case of daemon mode, it is avoided that the timer is triggered and the number of */ /* child processes increases while the device monitoring child process is not completed. */ if (is_child_runnning()) { device_check = FALSE; } if (device_count == finished_count && device_check) { /* Update the result value for the client response once all checks have completed. */ response_final_score = final_score; if (!daemon_check_first_all_devices) { daemon_check_first_all_devices = TRUE; } } } if (device_check) { /* Reset final_score, finished_count, test_forks[] */ final_score = 0; finished_count = 0; memset(test_forks, 0, sizeof(test_forks)); for (i=0; i ts.tv_sec)) { for (i=0; i 0) { w = waitpid(test_forks[i], &wstatus, WUNTRACED | WNOHANG | WCONTINUED); if (w < 0) { PRINT_STORAGE_MON_ERR("waitpid on %s failed: %s", devices[i], strerror(errno)); return -1; } if (w == test_forks[i]) { if (WIFEXITED(wstatus)) { if (WEXITSTATUS(wstatus) != 0) { syslog(LOG_ERR, "Error reading from device %s", devices[i]); final_score += scores[i]; } finished_count++; test_forks[i] = 0; } } } } usleep(100000); clock_gettime(CLOCK_REALTIME, &ts); } /* See which threads have not finished */ for (i=0; ihdr.id, request->hdr.size, request->message); if (strcmp(request->message, SMON_GET_RESULT_COMMAND) != 0) { syslog(LOG_DEBUG, "request command is unknown."); send_score = -1; } else if (!daemon_check_first_all_devices) { send_score = -2; } resps.size = sizeof(struct qb_ipc_response_header); resps.id = 13; resps.error = 0; rc = snprintf(resp, SMON_MAX_RESP_SIZE, "%d", send_score) + 1; iov[0].iov_len = sizeof(resps); iov[0].iov_base = &resps; iov[1].iov_len = rc; iov[1].iov_base = resp; resps.size += rc; res = qb_ipcs_response_sendv(c, iov, 2); if (res < 0) { errno = -res; syslog(LOG_ERR, "qb_ipcs_response_send : errno = %d", errno); } return 0; } static int32_t storage_mon_client(void) { struct storage_mon_check_value_req request; struct storage_mon_check_value_res response; qb_ipcc_connection_t *conn; char ipcs_name[SMON_MAX_IPCSNAME]; int32_t rc; snprintf(ipcs_name, SMON_MAX_IPCSNAME, "storage_mon_%s", attrname); conn = qb_ipcc_connect(ipcs_name, 0); if (conn == NULL) { syslog(LOG_ERR, "qb_ipcc_connect error\n"); return(-1); } snprintf(request.message, SMON_MAX_MSGSIZE, "%s", SMON_GET_RESULT_COMMAND); request.hdr.id = 0; request.hdr.size = sizeof(struct storage_mon_check_value_req); + response.hdr.id = 0; rc = qb_ipcc_send(conn, &request, request.hdr.size); if (rc < 0) { syslog(LOG_ERR, "qb_ipcc_send error : %d\n", rc); return(-1); } if (rc > 0) { rc = qb_ipcc_recv(conn, &response, sizeof(response), -1); if (rc < 0) { syslog(LOG_ERR, "qb_ipcc_recv error : %d\n", rc); return(-1); } } qb_ipcc_disconnect(conn); /* Set score to result */ /* 0 : Normal. */ /* greater than 0 : monitoring error. */ /* -1 : communication system error. */ /* -2 : Not all checks completed for first device in daemon mode. */ - rc = atoi(response.message); + if (strnlen(response.message, 1)) { + rc = atoi(response.message); + } else { + rc = -1; + } syslog(LOG_DEBUG, "daemon response[%d]: %s \n", response.hdr.id, response.message); return(rc); } static int32_t storage_mon_daemon(int interval, const char *pidfile) { int32_t rc; char ipcs_name[SMON_MAX_IPCSNAME]; struct qb_ipcs_service_handlers service_handle = { .connection_accept = storage_mon_ipcs_connection_accept_fn, .connection_created = storage_mon_ipcs_connection_created_fn, .msg_process = storage_mon_ipcs_msg_process_fn, .connection_destroyed = storage_mon_ipcs_connection_destroyed_fn, .connection_closed = storage_mon_ipcs_connection_closed_fn, }; struct qb_ipcs_poll_handlers poll_handle = { .job_add = storage_mon_job_add, .dispatch_add = storage_mon_dispatch_add, .dispatch_mod = storage_mon_dispatch_mod, .dispatch_del = storage_mon_dispatch_del, }; if (daemon(0, 0) < 0) { syslog(LOG_ERR, "Failed to daemonize: %s", strerror(errno)); return -1; } umask(S_IWGRP | S_IWOTH | S_IROTH); if (write_pid_file(pidfile) < 0) { return -1; } snprintf(ipcs_name, SMON_MAX_IPCSNAME, "storage_mon_%s", attrname); ipcs = qb_ipcs_create(ipcs_name, 0, QB_IPC_NATIVE, &service_handle); if (ipcs == 0) { syslog(LOG_ERR, "qb_ipcs_create"); return -1; } qb_ipcs_enforce_buffer_size(ipcs, SMON_BUFF_1MEG); storage_mon_poll_handle = qb_loop_create(); qb_ipcs_poll_handlers_set(ipcs, &poll_handle); rc = qb_ipcs_run(ipcs); if (rc != 0) { errno = -rc; syslog(LOG_ERR, "qb_ipcs_run"); return -1; } qb_loop_signal_add(storage_mon_poll_handle, QB_LOOP_HIGH, SIGTERM, NULL, sigterm_handler, NULL); qb_loop_signal_add(storage_mon_poll_handle, QB_LOOP_MED, SIGCHLD, NULL, sigchld_handler, NULL); timer_d.interval = interval; qb_loop_timer_add(storage_mon_poll_handle, QB_LOOP_MED, 0, &timer_d, wrap_test_device_main, &timer_handle); qb_loop_run(storage_mon_poll_handle); qb_loop_destroy(storage_mon_poll_handle); unlink(pidfile); return 0; } int main(int argc, char *argv[]) { size_t score_count = 0; int opt, option_index; int interval = DEFAULT_INTERVAL; const char *pidfile = DEFAULT_PIDFILE; gboolean client = FALSE; struct option long_options[] = { {"timeout", required_argument, 0, 't' }, {"device", required_argument, 0, 'd' }, {"score", required_argument, 0, 's' }, {"inject-errors-percent", required_argument, 0, 0 }, {"daemonize", no_argument, 0, 0 }, {"client", no_argument, 0, 0 }, {"interval", required_argument, 0, 'i' }, {"pidfile", required_argument, 0, 'p' }, {"attrname", required_argument, 0, 'a' }, {"verbose", no_argument, 0, 'v' }, {"help", no_argument, 0, 'h' }, {0, 0, 0, 0 } }; while ( (opt = getopt_long(argc, argv, "hvt:d:s:i:p:a:", long_options, &option_index)) != -1 ) { switch (opt) { case 0: /* Long-only options */ if (strcmp(long_options[option_index].name, "inject-errors-percent") == 0) { inject_error_percent = atoi(optarg); if (inject_error_percent < 1 || inject_error_percent > 100) { fprintf(stderr, "inject_error_percent should be between 1 and 100\n"); return -1; } } if (strcmp(long_options[option_index].name, "daemonize") == 0) { daemonize = TRUE; } if (strcmp(long_options[option_index].name, "client") == 0) { client = TRUE; } if (daemonize && client) { fprintf(stderr,"The daemonize option and client option cannot be specified at the same time."); return -1; } break; case 'd': if (device_count < MAX_DEVICES) { devices[device_count++] = strdup(optarg); } else { fprintf(stderr, "too many devices, max is %d\n", MAX_DEVICES); return -1; } break; case 's': if (score_count < MAX_DEVICES) { int score = atoi(optarg); if (score < 1 || score > 10) { fprintf(stderr, "Score must be between 1 and 10 inclusive\n"); return -1; } scores[score_count++] = score; } else { fprintf(stderr, "too many scores, max is %d\n", MAX_DEVICES); return -1; } break; case 'v': verbose++; break; case 't': timeout = atoi(optarg); if (timeout < 1) { fprintf(stderr, "invalid timeout %d. Min 1, recommended %d (default)\n", timeout, DEFAULT_TIMEOUT); return -1; } break; case 'h': usage(argv[0], stdout); return 0; break; case 'i': interval = atoi(optarg); if (interval < 1) { fprintf(stderr, "invalid interval %d. Min 1, default is %d\n", interval, DEFAULT_INTERVAL); return -1; } break; case 'p': pidfile = strdup(optarg); if (pidfile == NULL) { fprintf(stderr, "Failed to duplicate string ['%s']\n", optarg); return -1; } break; case 'a': attrname = strdup(optarg); if (attrname == NULL) { fprintf(stderr, "Failed to duplicate string ['%s']\n", optarg); return -1; } break; default: usage(argv[0], stderr); return -1; break; } } if (client) { return(storage_mon_client()); } if (device_count == 0) { fprintf(stderr, "No devices to test, use the -d or --device argument\n"); return -1; } if (device_count != score_count) { fprintf(stderr, "There must be the same number of devices and scores\n"); return -1; } openlog("storage_mon", 0, LOG_DAEMON); if (!daemonize) { final_score = test_device_main(NULL); } else { return(storage_mon_daemon(interval, pidfile)); } return final_score; }