diff --git a/agents/aws/fence_aws.py b/agents/aws/fence_aws.py index c947bf29..5d167714 100644 --- a/agents/aws/fence_aws.py +++ b/agents/aws/fence_aws.py @@ -1,223 +1,237 @@ #!@PYTHON@ -tt import sys, re import logging import atexit sys.path.append("@FENCEAGENTSLIBDIR@") from fencing import * from fencing import fail, fail_usage, run_delay, EC_STATUS, SyslogLibHandler import requests from requests import HTTPError try: import boto3 from botocore.exceptions import ConnectionError, ClientError, EndpointConnectionError, NoRegionError except ImportError: pass -logger = logging.getLogger("fence_aws") +logger = logging.getLogger() logger.propagate = False logger.setLevel(logging.INFO) logger.addHandler(SyslogLibHandler()) logging.getLogger('botocore.vendored').propagate = False -def get_instance_id(): +def get_instance_id(options): try: token = requests.put('http://169.254.169.254/latest/api/token', headers={"X-aws-ec2-metadata-token-ttl-seconds" : "21600"}).content.decode("UTF-8") r = requests.get('http://169.254.169.254/latest/meta-data/instance-id', headers={"X-aws-ec2-metadata-token" : token}).content.decode("UTF-8") return r except HTTPError as http_err: logger.error('HTTP error occurred while trying to access EC2 metadata server: %s', http_err) except Exception as err: - logger.error('A fatal error occurred while trying to access EC2 metadata server: %s', err) + if "--skip-race-check" not in options: + logger.error('A fatal error occurred while trying to access EC2 metadata server: %s', err) + else: + logger.debug('A fatal error occurred while trying to access EC2 metadata server: %s', err) return None - + def get_nodes_list(conn, options): - logger.info("Starting monitor operation") + logger.debug("Starting monitor operation") result = {} try: if "--filter" in options: filter_key = options["--filter"].split("=")[0].strip() filter_value = options["--filter"].split("=")[1].strip() filter = [{ "Name": filter_key, "Values": [filter_value] }] for instance in conn.instances.filter(Filters=filter): result[instance.id] = ("", None) else: for instance in conn.instances.all(): result[instance.id] = ("", None) except ClientError: fail_usage("Failed: Incorrect Access Key or Secret Key.") except EndpointConnectionError: fail_usage("Failed: Incorrect Region.") except ConnectionError as e: fail_usage("Failed: Unable to connect to AWS: " + str(e)) except Exception as e: logger.error("Failed to get node list: %s", e) logger.debug("Monitor operation OK: %s",result) return result def get_power_status(conn, options): logger.debug("Starting status operation") try: instance = conn.instances.filter(Filters=[{"Name": "instance-id", "Values": [options["--plug"]]}]) state = list(instance)[0].state["Name"] - logger.info("Status operation for EC2 instance %s returned state: %s",options["--plug"],state.upper()) + logger.debug("Status operation for EC2 instance %s returned state: %s",options["--plug"],state.upper()) if state == "running": return "on" elif state == "stopped": return "off" else: return "unknown" except ClientError: fail_usage("Failed: Incorrect Access Key or Secret Key.") except EndpointConnectionError: fail_usage("Failed: Incorrect Region.") except IndexError: fail(EC_STATUS) except Exception as e: - logging.error("Failed to get power status: %s", e) + logger.error("Failed to get power status: %s", e) fail(EC_STATUS) def get_self_power_status(conn, instance_id): try: instance = conn.instances.filter(Filters=[{"Name": "instance-id", "Values": [instance_id]}]) state = list(instance)[0].state["Name"] if state == "running": - logging.debug("Captured my (%s) state and it %s - returning OK - Proceeding with fencing",instance_id,state.upper()) + logger.debug("Captured my (%s) state and it %s - returning OK - Proceeding with fencing",instance_id,state.upper()) return "ok" else: - logging.debug("Captured my (%s) state it is %s - returning Alert - Unable to fence other nodes",instance_id,state.upper()) + logger.debug("Captured my (%s) state it is %s - returning Alert - Unable to fence other nodes",instance_id,state.upper()) return "alert" except ClientError: fail_usage("Failed: Incorrect Access Key or Secret Key.") except EndpointConnectionError: fail_usage("Failed: Incorrect Region.") except IndexError: return "fail" def set_power_status(conn, options): - my_instance = get_instance_id() + my_instance = get_instance_id(options) try: if (options["--action"]=="off"): - if (get_self_power_status(conn,my_instance) == "ok"): + if "--skip-race-check" in options or get_self_power_status(conn,my_instance) == "ok": conn.instances.filter(InstanceIds=[options["--plug"]]).stop(Force=True) - logger.info("Called StopInstance API call for %s", options["--plug"]) + logger.debug("Called StopInstance API call for %s", options["--plug"]) else: - logger.info("Skipping fencing as instance is not in running status") + logger.debug("Skipping fencing as instance is not in running status") elif (options["--action"]=="on"): conn.instances.filter(InstanceIds=[options["--plug"]]).start() except Exception as e: - logger.error("Failed to power %s %s: %s", \ + logger.debug("Failed to power %s %s: %s", \ options["--action"], options["--plug"], e) def define_new_opts(): all_opt["region"] = { "getopt" : "r:", "longopt" : "region", "help" : "-r, --region=[region] Region, e.g. us-east-1", "shortdesc" : "Region.", "required" : "0", "order" : 2 } all_opt["access_key"] = { "getopt" : "a:", "longopt" : "access-key", "help" : "-a, --access-key=[key] Access Key", "shortdesc" : "Access Key.", "required" : "0", "order" : 3 } all_opt["secret_key"] = { "getopt" : "s:", "longopt" : "secret-key", "help" : "-s, --secret-key=[key] Secret Key", "shortdesc" : "Secret Key.", "required" : "0", "order" : 4 } all_opt["filter"] = { "getopt" : ":", "longopt" : "filter", "help" : "--filter=[key=value] Filter (e.g. vpc-id=[vpc-XXYYZZAA]", "shortdesc": "Filter for list-action", "required": "0", "order": 5 } all_opt["boto3_debug"] = { "getopt" : "b:", "longopt" : "boto3_debug", "help" : "-b, --boto3_debug=[option] Boto3 and Botocore library debug logging", "shortdesc": "Boto Lib debug", "required": "0", "default": "False", "order": 6 } + all_opt["skip_race_check"] = { + "getopt" : "", + "longopt" : "skip-race-check", + "help" : "--skip-race-check Skip race condition check", + "shortdesc": "Skip race condition check", + "required": "0", + "order": 7 + } # Main agent method def main(): conn = None - device_opt = ["port", "no_password", "region", "access_key", "secret_key", "filter", "boto3_debug"] + device_opt = ["port", "no_password", "region", "access_key", "secret_key", "filter", "boto3_debug", "skip_race_check"] atexit.register(atexit_handler) define_new_opts() all_opt["power_timeout"]["default"] = "60" options = check_input(device_opt, process_input(device_opt)) docs = {} docs["shortdesc"] = "Fence agent for AWS (Amazon Web Services)" docs["longdesc"] = "fence_aws is an I/O Fencing agent for AWS (Amazon Web\ Services). It uses the boto3 library to connect to AWS.\ \n.P\n\ boto3 can be configured with AWS CLI or by creating ~/.aws/credentials.\n\ For instructions see: https://boto3.readthedocs.io/en/latest/guide/quickstart.html#configuration" docs["vendorurl"] = "http://www.amazon.com" show_docs(options, docs) run_delay(options) - if options.get("--verbose") is not None: - lh = logging.FileHandler('/var/log/fence_aws_debug.log') + if "--debug-file" in options: + for handler in logger.handlers: + if isinstance(handler, logging.FileHandler): + logger.removeHandler(handler) + lh = logging.FileHandler(options["--debug-file"]) logger.addHandler(lh) lhf = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') lh.setFormatter(lhf) - logger.setLevel(logging.DEBUG) + lh.setLevel(logging.DEBUG) if options["--boto3_debug"].lower() not in ["1", "yes", "on", "true"]: boto3.set_stream_logger('boto3',logging.INFO) boto3.set_stream_logger('botocore',logging.CRITICAL) logging.getLogger('botocore').propagate = False logging.getLogger('boto3').propagate = False else: log_format = logging.Formatter('%(asctime)s %(name)-12s %(levelname)-8s %(message)s') logging.getLogger('botocore').propagate = False logging.getLogger('boto3').propagate = False fdh = logging.FileHandler('/var/log/fence_aws_boto3.log') fdh.setFormatter(log_format) logging.getLogger('boto3').addHandler(fdh) logging.getLogger('botocore').addHandler(fdh) logging.debug("Boto debug level is %s and sending debug info to /var/log/fence_aws_boto3.log", options["--boto3_debug"]) region = options.get("--region") access_key = options.get("--access-key") secret_key = options.get("--secret-key") try: conn = boto3.resource('ec2', region_name=region, aws_access_key_id=access_key, aws_secret_access_key=secret_key) except Exception as e: fail_usage("Failed: Unable to connect to AWS: " + str(e)) # Operate the fencing device result = fence_action(conn, options, set_power_status, get_power_status, get_nodes_list) sys.exit(result) if __name__ == "__main__": main() diff --git a/tests/data/metadata/fence_aws.xml b/tests/data/metadata/fence_aws.xml index 76995ecf..32de4418 100644 --- a/tests/data/metadata/fence_aws.xml +++ b/tests/data/metadata/fence_aws.xml @@ -1,146 +1,151 @@ fence_aws is an I/O Fencing agent for AWS (Amazon WebServices). It uses the boto3 library to connect to AWS. boto3 can be configured with AWS CLI or by creating ~/.aws/credentials. For instructions see: https://boto3.readthedocs.io/en/latest/guide/quickstart.html#configuration http://www.amazon.com Fencing action Physical plug number on device, UUID or identification of machine Physical plug number on device, UUID or identification of machine Region. Access Key. Secret Key. Filter for list-action Boto Lib debug + + + + Skip race condition check + Disable logging to stderr. Does not affect --verbose or --debug-file or logging to syslog. Verbose mode. Multiple -v flags can be stacked on the command line (e.g., -vvv) to increase verbosity. Level of debugging detail in output. Defaults to the number of --verbose flags specified on the command line, or to 1 if verbose=1 in a stonith device configuration (i.e., on stdin). Write debug information to given file Write debug information to given file Display version information and exit Display help and exit Separator for plug parameter when specifying more than 1 plug Separator for CSV created by 'list' operation Wait X seconds before fencing is started Disable timeout (true/false) (default: true when run from Pacemaker 2.0+) Wait X seconds for cmd prompt after login Test X seconds for status change after ON/OFF Wait X seconds after issuing ON/OFF Wait X seconds for cmd prompt after issuing command Sleep X seconds between status calls during a STONITH action Count of attempts to retry power on