diff --git a/agents/kdump/fence_kdump.c b/agents/kdump/fence_kdump.c index 4ab2dd9b..1c4a69ca 100644 --- a/agents/kdump/fence_kdump.c +++ b/agents/kdump/fence_kdump.c @@ -1,570 +1,570 @@ /* -*- mode: C; c-basic-offset: 4; indent-tabs-mode: nil -*- * * Copyright (c) Ryan O'Hara (rohara@redhat.com) * Copyright (c) Red Hat, Inc. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "options.h" #include "message.h" #include "version.h" static int verbose = 0; #define log_debug(lvl, fmt, args...) \ do { \ if (lvl <= verbose) { \ fprintf (stdout, "[debug]: " fmt, ##args); \ syslog (LOG_INFO, fmt, ##args); \ } \ } while (0); #define log_error(lvl, fmt, args...) \ do { \ if (lvl <= verbose) { \ fprintf (stderr, "[error]: " fmt, ##args); \ syslog (LOG_ERR, fmt, ##args); \ } \ } while (0); static int trim (char *str) { char *p; int len; if (!str) return (0); len = strlen (str); while (len--) { if (isspace (str[len])) { str[len] = 0; } else { break; } } for (p = str; *p && isspace (*p); p++); memmove (str, p, strlen (p) + 1); return (strlen (str)); } static int read_message (const fence_kdump_node_t *node, void *msg, int len) { int error; char addr[NI_MAXHOST]; char port[NI_MAXSERV]; struct sockaddr_storage ss; socklen_t size = sizeof (ss); error = recvfrom (node->socket, msg, len, 0, (struct sockaddr *) &ss, &size); if (error < 0) { log_error (2, "recvfrom (%s)\n", strerror (errno)); goto out; } error = getnameinfo ((struct sockaddr *) &ss, size, addr, sizeof (addr), port, sizeof (port), NI_NUMERICHOST | NI_NUMERICSERV); if (error != 0) { log_error (2, "getnameinfo (%s)\n", gai_strerror (error)); goto out; } error = strcasecmp (node->addr, addr); if (error != 0) { log_debug (1, "discard message from '%s'\n", addr); } out: return (error); } static int do_action_monitor (void) { const char cmdline_path[] = "/proc/cmdline"; FILE *procFile; size_t sz = 0; char *lines = NULL; int result = 1; procFile = fopen(cmdline_path, "r"); if (procFile == NULL) { log_error (0, "Unable to open file %s (%s)\n", cmdline_path, strerror (errno)); return 1; } while (!feof (procFile)) { ssize_t rv = getline (&lines, &sz, procFile); if ((rv != -1) && (strstr(lines, "crashkernel=") != NULL)) { result = 0; } } free (lines); fclose (procFile); return result; } static int do_action_off (const fence_kdump_opts_t *opts) { int error; fd_set rfds; fence_kdump_msg_t msg; fence_kdump_node_t *node; struct timeval timeout; if (list_empty (&opts->nodes)) { return (1); } else { node = list_first_entry (&opts->nodes, fence_kdump_node_t, list); } timeout.tv_sec = opts->timeout; timeout.tv_usec = 0; FD_ZERO (&rfds); FD_SET (node->socket, &rfds); log_debug (0, "waiting for message from '%s'\n", node->addr); for (;;) { error = select (node->socket + 1, &rfds, NULL, NULL, &timeout); if (error < 0) { log_error (2, "select (%s)\n", strerror (errno)); break; } if (error == 0) { log_debug (0, "timeout after %d seconds\n", opts->timeout); break; } if (read_message (node, &msg, sizeof (msg)) != 0) { continue; } if (msg.magic != FENCE_KDUMP_MAGIC) { log_debug (1, "invalid magic number '0x%X'\n", msg.magic); continue; } switch (msg.version) { case FENCE_KDUMP_MSGV1: log_debug (0, "received valid message from '%s'\n", node->addr); return (0); default: log_debug (1, "invalid message version '0x%X'\n", msg.version); continue; } } return (1); } static int do_action_metadata (const char *self) { fprintf (stdout, "\n"); fprintf (stdout, "\n"); fprintf (stdout, ""); - fprintf (stdout, "\\fIfence_kdump\\fP is an I/O fencing agent to be used with the kdump\n" - "crash recovery service. When the \\fIfence_kdump\\fP agent is invoked,\n" + fprintf (stdout, "fence_kdump is an I/O fencing agent to be used with the kdump\n" + "crash recovery service. When the fence_kdump agent is invoked,\n" "it will listen for a message from the failed node that acknowledges\n" "that the failed node it executing the kdump crash kernel.\n" - "Note that \\fIfence_kdump\\fP is not a replacement for traditional\n" - "fencing methods. The \\fIfence_kdump\\fP agent can only detect that a\n" + "Note that fence_kdump is not a replacement for traditional\n" + "fencing methods. The fence_kdump agent can only detect that a\n" "node has entered the kdump crash recovery service. This allows the\n" "kdump crash recovery service complete without being preempted by\n" "traditional power fencing methods.\n\n" "Note: the \"off\" action listen for message from failed node that\n" "acknowledges node has entered kdump crash recovery service. If a valid\n" "message is received from the failed node, the node is considered to be\n" "fenced and the agent returns success. Failure to receive a valid\n" "message from the failed node in the given timeout period results in\n" "fencing failure."); fprintf (stdout, "\n"); fprintf (stdout, "http://www.kernel.org/pub/linux/utils/kernel/kexec/\n"); fprintf (stdout, "\n"); fprintf (stdout, "\t\n"); - fprintf (stdout, "\t\t\n"); + fprintf (stdout, "\t\t\n"); fprintf (stdout, "\t\t\n"); fprintf (stdout, "\t\t%s\n", "Name or IP address of node to be fenced. This option is required for\n" "the \"off\" action."); fprintf (stdout, "\t\n"); fprintf (stdout, "\t\n"); - fprintf (stdout, "\t\t\n"); + fprintf (stdout, "\t\t\n"); fprintf (stdout, "\t\t\n"); fprintf (stdout, "\t\t%s\n", - "IP port number that the \\fIfence_kdump\\fP agent will use to listen for\n" + "IP port number that the fence_kdump agent will use to listen for\n" "messages."); fprintf (stdout, "\t\n"); fprintf (stdout, "\t\n"); - fprintf (stdout, "\t\t\n"); + fprintf (stdout, "\t\t\n"); fprintf (stdout, "\t\t\n"); fprintf (stdout, "\t\t%s\n", - "IP network family. Force the \\fIfence_kdump\\fP agent to use a specific\n" - "family. The value for \\fIFAMILY\\fP can be \"auto\", \"ipv4\", or\n" + "IP network family. Force the fence_kdump agent to use a specific\n" + "family. The value for FAMILY can be \"auto\", \"ipv4\", or\n" "\"ipv6\"."); fprintf (stdout, "\t\n"); fprintf (stdout, "\t\n"); - fprintf (stdout, "\t\t\n"); + fprintf (stdout, "\t\t\n"); fprintf (stdout, "\t\t\n"); fprintf (stdout, "\t\t%s\n", - "Fencing action to perform. The value for \\fIACTION\\fP can be either\n" + "Fencing action to perform. The value for ACTION can be either\n" "\"off\" or \"metadata\"."); fprintf (stdout, "\t\n"); fprintf (stdout, "\t\n"); - fprintf (stdout, "\t\t\n"); + fprintf (stdout, "\t\t\n"); fprintf (stdout, "\t\t\n"); fprintf (stdout, "\t\t%s\n", "Number of seconds to wait for message from failed node. If no message\n" - "is received within \\fITIMEOUT\\fP seconds, the \\fIfence_kdump\\fP agent\n" + "is received within TIMEOUT seconds, the fence_kdump agent\n" "returns failure."); fprintf (stdout, "\t\n"); fprintf (stdout, "\t\n"); fprintf (stdout, "\t\t\n"); fprintf (stdout, "\t\t\n"); fprintf (stdout, "\t\t%s\n", "Print verbose output"); fprintf (stdout, "\t\n"); fprintf (stdout, "\t\n"); fprintf (stdout, "\t\t\n"); fprintf (stdout, "\t\t\n"); fprintf (stdout, "\t\t%s\n", "Print version"); fprintf (stdout, "\t\n"); fprintf (stdout, "\t\n"); fprintf (stdout, "\t\t\n"); fprintf (stdout, "\t\t\n"); fprintf (stdout, "\t\t%s\n", "Print usage"); fprintf (stdout, "\t\n"); fprintf (stdout, "\n"); fprintf (stdout, "\n"); fprintf (stdout, "\t\n"); fprintf (stdout, "\t\n"); fprintf (stdout, "\t\n"); fprintf (stdout, "\t\n"); fprintf (stdout, "\n"); fprintf (stdout, "\n"); return (0); } static void print_usage (const char *self) { fprintf (stdout, "Usage: %s [options]\n", basename (self)); fprintf (stdout, "\n"); fprintf (stdout, "Options:\n"); fprintf (stdout, "\n"); fprintf (stdout, "%s\n", " -n, --nodename=NODE Name or IP address of node to be fenced"); fprintf (stdout, "%s\n", " -p, --ipport=PORT IP port number (default: 7410)"); fprintf (stdout, "%s\n", " -f, --family=FAMILY Network family: ([auto], ipv4, ipv6)"); fprintf (stdout, "%s\n", " -o, --action=ACTION Fencing action: ([off], monitor, metadata, validate-all)"); fprintf (stdout, "%s\n", " -t, --timeout=TIMEOUT Timeout in seconds (default: 60)"); fprintf (stdout, "%s\n", " -v, --verbose Print verbose output"); fprintf (stdout, "%s\n", " -V, --version Print version"); fprintf (stdout, "%s\n", " -h, --help Print usage"); fprintf (stdout, "\n"); return; } static int get_options_node (fence_kdump_opts_t *opts) { int error; struct addrinfo hints; fence_kdump_node_t *node; node = malloc (sizeof (fence_kdump_node_t)); if (!node) { log_error (2, "malloc (%s)\n", strerror (errno)); return (1); } memset (node, 0, sizeof (fence_kdump_node_t)); memset (&hints, 0, sizeof (hints)); hints.ai_family = opts->family; hints.ai_socktype = SOCK_DGRAM; hints.ai_protocol = IPPROTO_UDP; hints.ai_flags = AI_NUMERICSERV; strncpy (node->name, opts->nodename, sizeof (node->name) - 1); snprintf (node->port, sizeof (node->port), "%d", opts->ipport); node->info = NULL; error = getaddrinfo (node->name, node->port, &hints, &node->info); if (error != 0) { log_error (2, "getaddrinfo (%s)\n", gai_strerror (error)); free_node (node); return (1); } error = getnameinfo (node->info->ai_addr, node->info->ai_addrlen, node->addr, sizeof (node->addr), node->port, sizeof (node->port), NI_NUMERICHOST | NI_NUMERICSERV); if (error != 0) { log_error (2, "getnameinfo (%s)\n", gai_strerror (error)); free_node (node); return (1); } hints.ai_family = node->info->ai_family; hints.ai_flags |= AI_PASSIVE; freeaddrinfo (node->info); node->info = NULL; error = getaddrinfo (NULL, node->port, &hints, &node->info); if (error != 0) { log_error (2, "getaddrinfo (%s)\n", gai_strerror (error)); free_node (node); return (1); } node->socket = socket (node->info->ai_family, node->info->ai_socktype, node->info->ai_protocol); if (node->socket < 0) { log_error (2, "socket (%s)\n", strerror (errno)); free_node (node); return (1); } error = bind (node->socket, node->info->ai_addr, node->info->ai_addrlen); if (error != 0) { log_error (2, "bind (%s)\n", strerror (errno)); free_node (node); return (1); } list_add_tail (&node->list, &opts->nodes); return (0); } static void get_options (int argc, char **argv, fence_kdump_opts_t *opts) { int opt; struct option options[] = { { "nodename", required_argument, NULL, 'n' }, { "ipport", required_argument, NULL, 'p' }, { "family", required_argument, NULL, 'f' }, { "action", required_argument, NULL, 'o' }, { "timeout", required_argument, NULL, 't' }, { "verbose", optional_argument, NULL, 'v' }, { "version", no_argument, NULL, 'V' }, { "help", no_argument, NULL, 'h' }, { 0, 0, 0, 0 } }; while ((opt = getopt_long (argc, argv, "n:p:f:o:t:v::Vh", options, NULL)) != EOF) { switch (opt) { case 'n': set_option_nodename (opts, optarg); break; case 'p': set_option_ipport (opts, optarg); break; case 'f': set_option_family (opts, optarg); break; case 'o': set_option_action (opts, optarg); break; case 't': set_option_timeout (opts, optarg); break; case 'v': set_option_verbose (opts, optarg); break; case 'V': print_version (argv[0]); exit (0); case 'h': print_usage (argv[0]); exit (0); default: print_usage (argv[0]); exit (1); } } verbose = opts->verbose; return; } static void get_options_stdin (fence_kdump_opts_t *opts) { char buf[1024]; char *opt; char *arg; while (fgets (buf, sizeof (buf), stdin) != 0) { if (trim (buf) == 0) { continue; } if (buf[0] == '#') { continue; } opt = buf; if ((arg = strchr (opt, '=')) != 0) { *arg = 0; arg += 1; } else { continue; } if (!strcasecmp (opt, "nodename")) { set_option_nodename (opts, arg); continue; } if (!strcasecmp (opt, "ipport")) { set_option_ipport (opts, arg); continue; } if (!strcasecmp (opt, "family")) { set_option_family (opts, arg); continue; } if (!strcasecmp (opt, "action")) { set_option_action (opts, arg); continue; } if (!strcasecmp (opt, "timeout")) { set_option_timeout (opts, arg); continue; } if (!strcasecmp (opt, "verbose")) { set_option_verbose (opts, arg); continue; } } verbose = opts->verbose; return; } int main (int argc, char **argv) { int error = 1; fence_kdump_opts_t opts; init_options (&opts); if (argc > 1) { get_options (argc, argv, &opts); } else { get_options_stdin (&opts); } openlog ("fence_kdump", LOG_CONS|LOG_PID, LOG_DAEMON); if (opts.action == FENCE_KDUMP_ACTION_OFF) { if (opts.nodename == NULL) { log_error (0, "action 'off' requires nodename\n"); exit (1); } if (get_options_node (&opts) != 0) { log_error (0, "failed to get node '%s'\n", opts.nodename); exit (1); } } if (verbose != 0) { print_options (&opts); } switch (opts.action) { case FENCE_KDUMP_ACTION_OFF: error = do_action_off (&opts); break; case FENCE_KDUMP_ACTION_METADATA: error = do_action_metadata (argv[0]); break; case FENCE_KDUMP_ACTION_MONITOR: error = do_action_monitor (); break; case FENCE_KDUMP_ACTION_VALIDATE: error = 0; break; default: break; } free_options (&opts); return (error); } diff --git a/tests/data/metadata/fence_kdump.xml b/tests/data/metadata/fence_kdump.xml index 89a414b8..719fd44c 100644 --- a/tests/data/metadata/fence_kdump.xml +++ b/tests/data/metadata/fence_kdump.xml @@ -1,57 +1,75 @@ - -The fence_kdump agent is intended to be used with with kdump service. + +fence_kdump is an I/O fencing agent to be used with the kdump +crash recovery service. When the fence_kdump agent is invoked, +it will listen for a message from the failed node that acknowledges +that the failed node it executing the kdump crash kernel. +Note that fence_kdump is not a replacement for traditional +fencing methods. The fence_kdump agent can only detect that a +node has entered the kdump crash recovery service. This allows the +kdump crash recovery service complete without being preempted by +traditional power fencing methods. + +Note: the "off" action listen for message from failed node that +acknowledges node has entered kdump crash recovery service. If a valid +message is received from the failed node, the node is considered to be +fenced and the agent returns success. Failure to receive a valid +message from the failed node in the given timeout period results in +fencing failure. http://www.kernel.org/pub/linux/utils/kernel/kexec/ - + - Name or IP address of node to be fenced + Name or IP address of node to be fenced. This option is required for +the "off" action. - + - Port number + IP port number that the fence_kdump agent will use to listen for +messages. - + - Network family + IP network family. Force the fence_kdump agent to use a specific +family. The value for FAMILY can be "auto", "ipv4", or +"ipv6". - + - Fencing action + Fencing action to perform. The value for ACTION can be either +"off" or "metadata". - + - Timeout in seconds - - - - - Disable logging to stderr. Does not affect --verbose or --debug-file or logging to syslog. + Number of seconds to wait for message from failed node. If no message +is received within TIMEOUT seconds, the fence_kdump agent +returns failure. Print verbose output Print version Print usage +