Page MenuHomeClusterLabs Projects

No OneTemporary

diff --git a/agents/scsi/fence_scsi.py b/agents/scsi/fence_scsi.py
index 2246bd4e..e3333961 100644
--- a/agents/scsi/fence_scsi.py
+++ b/agents/scsi/fence_scsi.py
@@ -1,609 +1,597 @@
#!@PYTHON@ -tt
import sys
import stat
import re
import os
import time
import logging
import atexit
import hashlib
import ctypes
sys.path.append("@FENCEAGENTSLIBDIR@")
-from fencing import fail_usage, run_command, atexit_handler, check_input, process_input, show_docs, fence_action, all_opt, EC_STATUS
+from fencing import fail_usage, run_command, atexit_handler, check_input, process_input, show_docs, fence_action, all_opt
from fencing import run_delay
STORE_PATH = "@STORE_PATH@"
def get_status(conn, options):
del conn
status = "off"
for dev in options["devices"]:
is_block_device(dev)
reset_dev(options, dev)
if options["--key"] in get_registration_keys(options, dev):
status = "on"
else:
logging.debug("No registration for key "\
+ options["--key"] + " on device " + dev + "\n")
if options["--action"] == "on":
status = "off"
break
return status
def set_status(conn, options):
del conn
count = 0
if options["--action"] == "on":
set_key(options)
for dev in options["devices"]:
is_block_device(dev)
register_dev(options, dev)
if options["--key"] not in get_registration_keys(options, dev):
count += 1
logging.debug("Failed to register key "\
+ options["--key"] + "on device " + dev + "\n")
continue
dev_write(dev, options)
if get_reservation_key(options, dev) is None \
and not reserve_dev(options, dev) \
and get_reservation_key(options, dev) is None:
count += 1
logging.debug("Failed to create reservation (key="\
+ options["--key"] + ", device=" + dev + ")\n")
else:
host_key = get_key()
if host_key == options["--key"].lower():
fail_usage("Failed: keys cannot be same. You can not fence yourself.")
for dev in options["devices"]:
is_block_device(dev)
if options["--key"] in get_registration_keys(options, dev):
preempt_abort(options, host_key, dev)
for dev in options["devices"]:
if options["--key"] in get_registration_keys(options, dev):
count += 1
logging.debug("Failed to remove key "\
+ options["--key"] + " on device " + dev + "\n")
continue
if not get_reservation_key(options, dev):
count += 1
logging.debug("No reservation exists on device " + dev + "\n")
if count:
logging.error("Failed to verify " + str(count) + " device(s)")
sys.exit(1)
# check if host is ready to execute actions
def do_action_monitor(options):
# Check if required binaries are installed
if bool(run_cmd(options, options["--sg_persist-path"] + " -V")["rc"]):
logging.error("Unable to run " + options["--sg_persist-path"])
return 1
elif bool(run_cmd(options, options["--sg_turs-path"] + " -V")["rc"]):
logging.error("Unable to run " + options["--sg_turs-path"])
return 1
elif ("--devices" not in options and
bool(run_cmd(options, options["--vgs-path"] + " --version")["rc"])):
logging.error("Unable to run " + options["--vgs-path"])
return 1
# Keys have to be present in order to fence/unfence
get_key()
- devs = dev_read()
-
- for dev in devs:
- state_file = "/sys/block/{}/device/state".format(re.sub(".*\/", "", os.path.realpath(dev)))
- try:
- with open(state_file, "r") as f:
- state = f.readline().strip()
- logging.debug("{} is in state {}".format(dev, state))
- if state not in [ "live", "running" ]:
- logging.error("Failed: Unable to access " + dev)
- return EC_STATUS
- except Exception as e:
- logging.debug(str(e))
+ dev_read()
return 0
# run command, returns dict, ret["rc"] = exit code; ret["out"] = output;
# ret["err"] = error
def run_cmd(options, cmd):
ret = {}
(ret["rc"], ret["out"], ret["err"]) = run_command(options, cmd)
ret["out"] = "".join([i for i in ret["out"] if i is not None])
ret["err"] = "".join([i for i in ret["err"] if i is not None])
return ret
# check if device exist and is block device
def is_block_device(dev):
if not os.path.exists(dev):
fail_usage("Failed: device \"" + dev + "\" does not exist")
if not stat.S_ISBLK(os.stat(dev).st_mode):
fail_usage("Failed: device \"" + dev + "\" is not a block device")
# cancel registration
def preempt_abort(options, host, dev):
reset_dev(options,dev)
cmd = options["--sg_persist-path"] + " -n -o -A -T 5 -K " + host + " -S " + options["--key"] + " -d " + dev
return not bool(run_cmd(options, cmd)["rc"])
def reset_dev(options, dev):
return run_cmd(options, options["--sg_turs-path"] + " " + dev)["rc"]
def register_dev(options, dev):
dev = os.path.realpath(dev)
if re.search(r"^dm", dev[5:]):
for slave in get_mpath_slaves(dev):
register_dev(options, slave)
return True
if get_reservation_key(options, dev, False) == options["--key"]:
return True
reset_dev(options, dev)
cmd = options["--sg_persist-path"] + " -n -o -I -S " + options["--key"] + " -d " + dev
cmd += " -Z" if "--aptpl" in options else ""
#cmd return code != 0 but registration can be successful
return not bool(run_cmd(options, cmd)["rc"])
def reserve_dev(options, dev):
reset_dev(options,dev)
cmd = options["--sg_persist-path"] + " -n -o -R -T 5 -K " + options["--key"] + " -d " + dev
return not bool(run_cmd(options, cmd)["rc"])
def get_reservation_key(options, dev, fail=True):
reset_dev(options,dev)
opts = ""
if "--readonly" in options:
opts = "-y "
cmd = options["--sg_persist-path"] + " -n -i " + opts + "-r -d " + dev
out = run_cmd(options, cmd)
if out["rc"] and fail:
fail_usage('Cannot get reservation key on device "' + dev
+ '": ' + out["err"])
match = re.search(r"\s+key=0x(\S+)\s+", out["out"], re.IGNORECASE)
return match.group(1) if match else None
def get_registration_keys(options, dev, fail=True):
reset_dev(options,dev)
keys = []
opts = ""
if "--readonly" in options:
opts = "-y "
cmd = options["--sg_persist-path"] + " -n -i " + opts + "-k -d " + dev
out = run_cmd(options, cmd)
if out["rc"]:
fail_usage('Cannot get registration keys on device "' + dev
+ '": ' + out["err"], fail)
if not fail:
return []
for line in out["out"].split("\n"):
match = re.search(r"\s+0x(\S+)\s*", line)
if match:
keys.append(match.group(1))
return keys
def get_cluster_id(options):
cmd = options["--corosync-cmap-path"] + " totem.cluster_name"
match = re.search(r"\(str\) = (\S+)\n", run_cmd(options, cmd)["out"])
if not match:
fail_usage("Failed: cannot get cluster name")
try:
return hashlib.md5(match.group(1).encode('ascii')).hexdigest()
except ValueError:
# FIPS requires usedforsecurity=False and might not be
# available on all distros: https://bugs.python.org/issue9216
return hashlib.md5(match.group(1).encode('ascii'), usedforsecurity=False).hexdigest()
def get_node_id(options):
cmd = options["--corosync-cmap-path"] + " nodelist"
out = run_cmd(options, cmd)["out"]
match = re.search(r".(\d+).name \(str\) = " + options["--plug"] + "\n", out)
# try old format before failing
if not match:
match = re.search(r".(\d+).ring._addr \(str\) = " + options["--plug"] + "\n", out)
return match.group(1) if match else fail_usage("Failed: unable to parse output of corosync-cmapctl or node does not exist")
def get_node_hash(options):
try:
return hashlib.md5(options["--plug"].encode('ascii')).hexdigest()
except ValueError:
# FIPS requires usedforsecurity=False and might not be
# available on all distros: https://bugs.python.org/issue9216
return hashlib.md5(options["--plug"].encode('ascii'), usedforsecurity=False).hexdigest()
def generate_key(options):
if options["--key-value"] == "hash":
return "%.4s%.4s" % (get_cluster_id(options), get_node_hash(options))
else:
return "%.4s%.4d" % (get_cluster_id(options), int(get_node_id(options)))
# save node key to file
def set_key(options):
file_path = options["store_path"] + ".key"
if not os.path.isdir(os.path.dirname(options["store_path"])):
os.makedirs(os.path.dirname(options["store_path"]))
try:
f = open(file_path, "w")
except IOError:
fail_usage("Failed: Cannot open file \""+ file_path + "\"")
f.write(options["--key"].lower() + "\n")
f.close()
# read node key from file
def get_key(fail=True):
file_path = STORE_PATH + ".key"
try:
f = open(file_path, "r")
except IOError:
fail_usage("Failed: Cannot open file \""+ file_path + "\"", fail)
if not fail:
return None
return f.readline().strip().lower()
def dev_write(dev, options):
file_path = options["store_path"] + ".dev"
if not os.path.isdir(os.path.dirname(options["store_path"])):
os.makedirs(os.path.dirname(options["store_path"]))
try:
f = open(file_path, "a+")
except IOError:
fail_usage("Failed: Cannot open file \""+ file_path + "\"")
f.seek(0)
out = f.read()
if not re.search(r"^" + dev + "\s+", out, flags=re.MULTILINE):
f.write(dev + "\n")
f.close()
def dev_read(fail=True, opt=None):
file_path = STORE_PATH + ".dev"
try:
f = open(file_path, "r")
except IOError:
if "--suppress-errors" not in opt:
fail_usage("Failed: Cannot open file \"" + file_path + "\"", fail)
if not fail:
return None
# get not empty lines from file
devs = [line.strip() for line in f if line.strip()]
f.close()
return devs
def get_clvm_devices(options):
devs = []
cmd = options["--vgs-path"] + " " +\
"--noheadings " +\
"--separator : " +\
"--sort pv_uuid " +\
"--options vg_attr,pv_name "+\
"--config 'global { locking_type = 0 } devices { preferred_names = [ \"^/dev/dm\" ] }'"
out = run_cmd(options, cmd)
if out["rc"]:
fail_usage("Failed: Cannot get clvm devices")
for line in out["out"].split("\n"):
if 'c' in line.split(":")[0]:
devs.append(line.split(":")[1])
return devs
def get_mpath_slaves(dev):
if dev[:5] == "/dev/":
dev = dev[5:]
slaves = [i for i in os.listdir("/sys/block/" + dev + "/slaves/") if i[:1] != "."]
if slaves[0][:2] == "dm":
slaves = get_mpath_slaves(slaves[0])
else:
slaves = ["/dev/" + x for x in slaves]
return slaves
def define_new_opts():
all_opt["devices"] = {
"getopt" : "d:",
"longopt" : "devices",
"help" : "-d, --devices=[devices] List of devices to use for current operation",
"required" : "0",
"shortdesc" : "List of devices to use for current operation. Devices can \
be comma-separated list of raw devices (eg. /dev/sdc). Each device must support SCSI-3 \
persistent reservations.",
"order": 1
}
all_opt["nodename"] = {
"getopt" : ":",
"longopt" : "nodename",
"help" : "",
"required" : "0",
"shortdesc" : "",
"order": 1
}
all_opt["key"] = {
"getopt" : "k:",
"longopt" : "key",
"help" : "-k, --key=[key] Key to use for the current operation",
"required" : "0",
"shortdesc" : "Key to use for the current operation. This key should be \
unique to a node. For the \"on\" action, the key specifies the key use to \
register the local node. For the \"off\" action, this key specifies the key to \
be removed from the device(s).",
"order": 1
}
all_opt["aptpl"] = {
"getopt" : "a",
"longopt" : "aptpl",
"help" : "-a, --aptpl Use the APTPL flag for registrations",
"required" : "0",
"shortdesc" : "Use the APTPL flag for registrations. This option is only used for the 'on' action.",
"order": 1
}
all_opt["readonly"] = {
"getopt" : "",
"longopt" : "readonly",
"help" : "--readonly Open DEVICE read-only. May be useful with PRIN commands if there are unwanted side effects with the default read-write open.",
"required" : "0",
"shortdesc" : "Open DEVICE read-only.",
"order": 4
}
all_opt["suppress-errors"] = {
"getopt" : "",
"longopt" : "suppress-errors",
"help" : "--suppress-errors Suppress error log. Suppresses error logging when run from the watchdog service before pacemaker starts.",
"required" : "0",
"shortdesc" : "Error log suppression.",
"order": 5
}
all_opt["logfile"] = {
"getopt" : ":",
"longopt" : "logfile",
"help" : "-f, --logfile Log output (stdout and stderr) to file",
"required" : "0",
"shortdesc" : "Log output (stdout and stderr) to file",
"order": 6
}
all_opt["corosync_cmap_path"] = {
"getopt" : ":",
"longopt" : "corosync-cmap-path",
"help" : "--corosync-cmap-path=[path] Path to corosync-cmapctl binary",
"required" : "0",
"shortdesc" : "Path to corosync-cmapctl binary",
"default" : "@COROSYNC_CMAPCTL_PATH@",
"order": 300
}
all_opt["sg_persist_path"] = {
"getopt" : ":",
"longopt" : "sg_persist-path",
"help" : "--sg_persist-path=[path] Path to sg_persist binary",
"required" : "0",
"shortdesc" : "Path to sg_persist binary",
"default" : "@SG_PERSIST_PATH@",
"order": 300
}
all_opt["sg_turs_path"] = {
"getopt" : ":",
"longopt" : "sg_turs-path",
"help" : "--sg_turs-path=[path] Path to sg_turs binary",
"required" : "0",
"shortdesc" : "Path to sg_turs binary",
"default" : "@SG_TURS_PATH@",
"order": 300
}
all_opt["vgs_path"] = {
"getopt" : ":",
"longopt" : "vgs-path",
"help" : "--vgs-path=[path] Path to vgs binary",
"required" : "0",
"shortdesc" : "Path to vgs binary",
"default" : "@VGS_PATH@",
"order": 300
}
all_opt["key_value"] = {
"getopt" : ":",
"longopt" : "key-value",
"help" : "--key-value=<id|hash> SCSI key node generation method",
"required" : "0",
"shortdesc" : "Method used to generate the SCSI key. \"id\" (default) \
uses the positional ID from \"corosync-cmactl nodelist\" output which can get inconsistent \
when nodes are removed from cluster without full cluster restart. \"hash\" uses part of hash \
made out of node names which is not affected over time but there is theoretical chance that \
hashes can collide as size of SCSI key is quite limited.",
"default" : "id",
"order": 300
}
def scsi_check_get_options(options):
try:
f = open("/etc/sysconfig/stonith", "r")
except IOError:
return options
match = re.findall(r"^\s*(\S*)\s*=\s*(\S*)\s*", "".join(f.readlines()), re.MULTILINE)
for m in match:
options[m[0].lower()] = m[1].lower()
f.close()
return options
def scsi_check(hardreboot=False):
if len(sys.argv) >= 3 and sys.argv[1] == "repair":
return int(sys.argv[2])
options = {}
options["--sg_turs-path"] = "@SG_TURS_PATH@"
options["--sg_persist-path"] = "@SG_PERSIST_PATH@"
options["--power-timeout"] = "5"
options["retry"] = "0"
options["retry-sleep"] = "1"
options = scsi_check_get_options(options)
if "verbose" in options and options["verbose"] == "yes":
logging.getLogger().setLevel(logging.DEBUG)
devs = dev_read(fail=False,opt=options)
if not devs:
if "--suppress-errors" not in options:
logging.error("No devices found")
return 0
key = get_key(fail=False)
if not key:
logging.error("Key not found")
return 0
for dev in devs:
for n in range(int(options["retry"]) + 1):
if n > 0:
logging.debug("retry: " + str(n) + " of " + options["retry"])
if key in get_registration_keys(options, dev, fail=False):
logging.debug("key " + key + " registered with device " + dev)
return 0
else:
logging.debug("key " + key + " not registered with device " + dev)
if n < int(options["retry"]):
time.sleep(float(options["retry-sleep"]))
logging.debug("key " + key + " registered with any devices")
if hardreboot == True:
libc = ctypes.cdll['libc.so.6']
libc.reboot(0x1234567)
return 2
def main():
atexit.register(atexit_handler)
device_opt = ["no_login", "no_password", "devices", "nodename", "port",\
"no_port", "key", "aptpl", "fabric_fencing", "on_target", "corosync_cmap_path",\
"sg_persist_path", "sg_turs_path", "readonly", "suppress-errors", "logfile", "vgs_path",\
"force_on", "key_value"]
define_new_opts()
all_opt["delay"]["getopt"] = "H:"
all_opt["port"]["help"] = "-n, --plug=[nodename] Name of the node to be fenced"
all_opt["port"]["shortdesc"] = "Name of the node to be fenced. The node name is used to \
generate the key value used for the current operation. This option will be \
ignored when used with the -k option."
#fence_scsi_check
if os.path.basename(sys.argv[0]) == "fence_scsi_check":
sys.exit(scsi_check())
elif os.path.basename(sys.argv[0]) == "fence_scsi_check_hardreboot":
sys.exit(scsi_check(True))
options = check_input(device_opt, process_input(device_opt), other_conditions=True)
# hack to remove list/list-status actions which are not supported
options["device_opt"] = [ o for o in options["device_opt"] if o != "separator" ]
docs = {}
docs["shortdesc"] = "Fence agent for SCSI persistent reservation"
docs["longdesc"] = "fence_scsi is an I/O fencing agent that uses SCSI-3 \
persistent reservations to control access to shared storage devices. These \
devices must support SCSI-3 persistent reservations (SPC-3 or greater) as \
well as the \"preempt-and-abort\" subcommand.\nThe fence_scsi agent works by \
having each node in the cluster register a unique key with the SCSI \
device(s). Reservation key is generated from \"node id\" (default) or from \
\"node name hash\" (RECOMMENDED) by adjusting \"key_value\" option. \
Using hash is recommended to prevent issues when removing nodes \
from cluster without full cluster restart. \
Once registered, a single node will become the reservation holder \
by creating a \"write exclusive, registrants only\" reservation on the \
device(s). The result is that only registered nodes may write to the \
device(s). When a node failure occurs, the fence_scsi agent will remove the \
key belonging to the failed node from the device(s). The failed node will no \
longer be able to write to the device(s). A manual reboot is required.\
\n.P\n\
When used as a watchdog device you can define e.g. retry=1, retry-sleep=2 and \
verbose=yes parameters in /etc/sysconfig/stonith if you have issues with it \
failing."
docs["vendorurl"] = ""
show_docs(options, docs)
run_delay(options)
# backward compatibility layer BEGIN
if "--logfile" in options:
try:
logfile = open(options["--logfile"], 'w')
sys.stderr = logfile
sys.stdout = logfile
except IOError:
fail_usage("Failed: Unable to create file " + options["--logfile"])
# backward compatibility layer END
options["store_path"] = STORE_PATH
# Input control BEGIN
stop_after_error = False if options["--action"] == "validate-all" else True
if options["--action"] == "monitor":
sys.exit(do_action_monitor(options))
# workaround to avoid regressions
if "--nodename" in options and options["--nodename"]:
options["--plug"] = options["--nodename"]
del options["--nodename"]
if not (("--plug" in options and options["--plug"])\
or ("--key" in options and options["--key"])):
fail_usage("Failed: nodename or key is required", stop_after_error)
if not ("--key" in options and options["--key"]):
options["--key"] = generate_key(options)
if options["--key"] == "0" or not options["--key"]:
fail_usage("Failed: key cannot be 0", stop_after_error)
if "--key-value" in options\
and (options["--key-value"] != "id" and options["--key-value"] != "hash"):
fail_usage("Failed: key-value has to be 'id' or 'hash'", stop_after_error)
if options["--action"] == "validate-all":
sys.exit(0)
options["--key"] = options["--key"].lstrip('0')
if not ("--devices" in options and options["--devices"].split(",")):
options["devices"] = get_clvm_devices(options)
else:
options["devices"] = options["--devices"].split(",")
if not options["devices"]:
fail_usage("Failed: No devices found")
# Input control END
result = fence_action(None, options, set_status, get_status)
sys.exit(result)
if __name__ == "__main__":
main()

File Metadata

Mime Type
text/x-diff
Expires
Tue, Feb 25, 7:38 AM (1 d, 12 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1464745
Default Alt Text
(19 KB)

Event Timeline