diff --git a/agents/sbd/fence_sbd.py b/agents/sbd/fence_sbd.py index bf95bb72..c3622029 100644 --- a/agents/sbd/fence_sbd.py +++ b/agents/sbd/fence_sbd.py @@ -1,435 +1,439 @@ #!@PYTHON@ -tt import sys, stat import logging import os import atexit sys.path.append("@FENCEAGENTSLIBDIR@") from fencing import fail_usage, run_commands, fence_action, all_opt from fencing import atexit_handler, check_input, process_input, show_docs from fencing import run_delay import itertools DEVICE_INIT = 1 DEVICE_NOT_INIT = -3 PATH_NOT_EXISTS = -1 PATH_NOT_BLOCK = -2 def is_block_device(filename): """Checks if a given path is a valid block device Key arguments: filename -- the file to check Return codes: True if it's a valid block device False, otherwise """ try: mode = os.lstat(filename).st_mode except OSError: return False else: return stat.S_ISBLK(mode) def is_link(filename): """Checks if a given path is a link. Key arguments: filename -- the file to check Return codes: True if it's a link False, otherwise """ try: mode = os.lstat(filename).st_mode except OSError: return False else: return stat.S_ISLNK(mode) def check_sbd_device(options, device_path): """checks that a given sbd device exists and is initialized Key arguments: options -- options dictionary device_path -- device path to check Return Codes: 1 / DEVICE_INIT if the device exists and is initialized -1 / PATH_NOT_EXISTS if the path does not exists -2 / PATH_NOT_BLOCK if the path exists but is not a valid block device -3 / DEVICE_NOT_INIT if the sbd device is not initialized """ # First of all we need to check if the device is valid if not os.path.exists(device_path): return PATH_NOT_EXISTS # We need to check if device path is a symbolic link. If so we resolve that # link. if is_link(device_path): link_target = os.readlink(device_path) device_path = os.path.join(os.path.dirname(device_path), link_target) # As second step we make sure it's a valid block device if not is_block_device(device_path): return PATH_NOT_BLOCK cmd = "%s -d %s dump" % (options["--sbd-path"], device_path) (return_code, out, err) = run_commands(options, [ cmd ]) for line in itertools.chain(out.split("\n"), err.split("\n")): if len(line) == 0: continue # If we read "NOT dumped" something went wrong, e.g. the device is not # initialized. if "NOT dumped" in line: return DEVICE_NOT_INIT return DEVICE_INIT def generate_sbd_command(options, command, arguments=None): """Generates a sbd command based on given arguments. Return Value: generated list of sbd commands (strings) depending on command multiple commands with a device each or a single command with multiple devices """ cmds = [] if not command in ["list", "dump"]: cmd = options["--sbd-path"] # add "-d" for each sbd device for device in parse_sbd_devices(options): cmd += " -d %s" % device cmd += " %s %s" % (command, arguments) cmds.append(cmd) else: for device in parse_sbd_devices(options): cmd = options["--sbd-path"] cmd += " -d %s" % device cmd += " %s %s" % (command, arguments) cmds.append(cmd) return cmds def send_sbd_message(conn, options, plug, message): """Sends a message to all sbd devices. Key arguments: conn -- connection structure options -- options dictionary plug -- plug to sent the message to message -- message to send Return Value: (return_code, out, err) Tuple containing the error code, """ del conn arguments = "%s %s" % (plug, message) cmd = generate_sbd_command(options, "message", arguments) (return_code, out, err) = run_commands(options, cmd) return (return_code, out, err) def get_msg_timeout(options): """Reads the configured sbd message timeout from each device. Key arguments: options -- options dictionary Return Value: msg_timeout (integer, seconds) """ # get the defined msg_timeout msg_timeout = -1 # default sbd msg timeout cmd = generate_sbd_command(options, "dump") (return_code, out, err) = run_commands(options, cmd) for line in itertools.chain(out.split("\n"), err.split("\n")): if len(line) == 0: continue if "msgwait" in line: tmp_msg_timeout = int(line.split(':')[1]) if -1 != msg_timeout and tmp_msg_timeout != msg_timeout: logging.warning(\ "sbd message timeouts differ in different devices") # we only save the highest timeout if tmp_msg_timeout > msg_timeout: msg_timeout = tmp_msg_timeout return msg_timeout def set_power_status(conn, options): """send status to sbd device (poison pill) Key arguments: conn -- connection structure options -- options dictionary Return Value: return_code -- action result (bool) """ target_status = options["--action"] plug = options["--plug"] return_code = 99 out = "" err = "" # Map fencing actions to sbd messages if "on" == target_status: (return_code, out, err) = send_sbd_message(conn, options, plug, "clear") elif "off" == target_status: (return_code, out, err) = send_sbd_message(conn, options, plug, "off") elif "reboot" == target_status: (return_code, out, err) = send_sbd_message(conn, options, plug, "reset") if 0 != return_code: logging.error("sending message to sbd device(s) \ failed with return code %d", return_code) logging.error("DETAIL: output on stdout was \"%s\"", out) logging.error("DETAIL: output on stderr was \"%s\"", err) return not bool(return_code) def reboot_cycle(conn, options): """" trigger reboot by sbd messages Key arguments: conn -- connection structure options -- options dictionary Return Value: return_code -- action result (bool) """ plug = options["--plug"] return_code = 99 out = "" err = "" (return_code, out, err) = send_sbd_message(conn, options, plug, "reset") return not bool(return_code) def get_power_status(conn, options): """Returns the status of a specific node. Key arguments: conn -- connection structure options -- option dictionary Return Value: status -- status code (string) """ status = "UNKWNOWN" plug = options["--plug"] nodelist = get_node_list(conn, options) # We need to check if the specified plug / node a already a allocated slot # on the device. if plug not in nodelist: logging.error("node \"%s\" not found in node list", plug) else: status = nodelist[plug][1] return status def translate_status(sbd_status): """Translates the sbd status to fencing status. Key arguments: sbd_status -- status to translate (string) Return Value: status -- fencing status (string) """ status = "UNKNOWN" # Currently we only accept "clear" to be marked as online. Eventually we # should also check against "test" online_status = ["clear"] offline_status = ["reset", "off"] if any(online_status_element in sbd_status \ for online_status_element in online_status): status = "on" if any(offline_status_element in sbd_status \ for offline_status_element in offline_status): status = "off" return status def get_node_list(conn, options): """Returns a list of hostnames, registerd on the sbd device. Key arguments: conn -- connection options options -- options Return Value: nodelist -- dictionary wich contains all node names and there status """ del conn nodelist = {} cmd = generate_sbd_command(options, "list") (return_code, out, err) = run_commands(options, cmd) for line in out.split("\n"): if len(line) == 0: continue # if we read "unreadable" something went wrong if "NOT dumped" in line: return nodelist words = line.split() port = words[1] sbd_status = words[2] nodelist[port] = (port, translate_status(sbd_status)) return nodelist def parse_sbd_devices(options): """Returns an array of all sbd devices. Key arguments: options -- options dictionary Return Value: devices -- array of device paths """ devices = [str.strip(dev) \ for dev in str.split(options["--devices"], ",")] return devices def define_new_opts(): """Defines the all opt list """ all_opt["devices"] = { "getopt" : ":", "longopt" : "devices", "help":"--devices=[device_a,device_b] \ Comma separated list of sbd devices", - "required" : "1", + "required" : "0", "shortdesc" : "SBD Device", "order": 1 } all_opt["sbd_path"] = { "getopt" : ":", "longopt" : "sbd-path", "help" : "--sbd-path=[path] Path to SBD binary", "required" : "0", "default" : "@SBD_PATH@", "order": 200 } def main(): """Main function """ # We need to define "no_password" otherwise we will be ask about it if # we don't provide any password. device_opt = ["no_password", "devices", "port", "method", "sbd_path"] # close stdout if we get interrupted atexit.register(atexit_handler) define_new_opts() all_opt["method"]["default"] = "cycle" all_opt["method"]["help"] = "-m, --method=[method] Method to fence (onoff|cycle) (Default: cycle)" all_opt["power_timeout"]["default"] = "30" options = check_input(device_opt, process_input(device_opt)) # fill the needed variables to generate metadata and help text output docs = {} docs["shortdesc"] = "Fence agent for sbd" docs["longdesc"] = "fence_sbd is an I/O Fencing agent \ which can be used in environments where sbd can be used (shared storage)." docs["vendorurl"] = "" show_docs(options, docs) - # We need to check if --devices is given and not empty. + # If not specified then read SBD_DEVICE from environment if "--devices" not in options: - fail_usage("No SBD devices specified. \ - At least one SBD device is required.") + dev_list = os.getenv("SBD_DEVICE") + if dev_list: + options["--devices"] = ",".join(dev_list.split(";")) + else: + fail_usage("No SBD devices specified. \ + At least one SBD device is required.") run_delay(options) # We need to check if the provided sbd_devices exists. We need to do # that for every given device. # Just for the case we are really rebooting / powering off a device # (pacemaker as well uses the list command to generate a dynamic list) # we leave it to sbd to try and decide if it was successful if not options["--action"] in ["reboot", "off", "list"]: for device_path in parse_sbd_devices(options): logging.debug("check device \"%s\"", device_path) return_code = check_sbd_device(options, device_path) if PATH_NOT_EXISTS == return_code: logging.error("\"%s\" does not exist", device_path) elif PATH_NOT_BLOCK == return_code: logging.error("\"%s\" is not a valid block device", device_path) elif DEVICE_NOT_INIT == return_code: logging.error("\"%s\" is not initialized", device_path) elif DEVICE_INIT != return_code: logging.error("UNKNOWN error while checking \"%s\"", device_path) # If we get any error while checking the device we need to exit at this # point. if DEVICE_INIT != return_code: exit(return_code) # we check against the defined timeouts. If the pacemaker timeout is smaller # then that defined within sbd we should report this. power_timeout = int(options["--power-timeout"]) sbd_msg_timeout = get_msg_timeout(options) if 0 < power_timeout <= sbd_msg_timeout: logging.warning("power timeout needs to be \ greater then sbd message timeout") result = fence_action(\ None, \ options, \ set_power_status, \ get_power_status, \ get_node_list, \ reboot_cycle) sys.exit(result) if __name__ == "__main__": main() diff --git a/tests/data/metadata/fence_sbd.xml b/tests/data/metadata/fence_sbd.xml index 82ded25b..c2daf0c5 100644 --- a/tests/data/metadata/fence_sbd.xml +++ b/tests/data/metadata/fence_sbd.xml @@ -1,135 +1,135 @@ fence_sbd is an I/O Fencing agent which can be used in environments where sbd can be used (shared storage). Fencing action - + SBD Device Method to fence Physical plug number on device, UUID or identification of machine Physical plug number on device, UUID or identification of machine Disable logging to stderr. Does not affect --verbose or --debug-file or logging to syslog. Verbose mode. Multiple -v flags can be stacked on the command line (e.g., -vvv) to increase verbosity. Level of debugging detail in output. Defaults to the number of --verbose flags specified on the command line, or to 1 if verbose=1 in a stonith device configuration (i.e., on stdin). Write debug information to given file Write debug information to given file Display version information and exit Display help and exit Separator for plug parameter when specifying more than 1 plug Separator for CSV created by 'list' operation Wait X seconds before fencing is started Disable timeout (true/false) (default: true when run from Pacemaker 2.0+) Wait X seconds for cmd prompt after login Test X seconds for status change after ON/OFF Wait X seconds after issuing ON/OFF Path to SBD binary Wait X seconds for cmd prompt after issuing command Sleep X seconds between status calls during a STONITH action Count of attempts to retry power on