diff --git a/agents/aws_vpc_net/fence_aws_vpc_net.py b/agents/aws_vpc_net/fence_aws_vpc_net.py
index e1415c52..cf61f2d7 100644
--- a/agents/aws_vpc_net/fence_aws_vpc_net.py
+++ b/agents/aws_vpc_net/fence_aws_vpc_net.py
@@ -1,864 +1,980 @@
 #!@PYTHON@ -tt
 
 import sys, re
 import json
 import atexit
 import logging
 import time
 import requests
 
 sys.path.append("@FENCEAGENTSLIBDIR@")
 
 from fencing import *
 from fencing import (
     run_delay,
     fail,
     fail_usage,
     EC_STATUS,
     EC_GENERIC_ERROR,
     SyslogLibHandler
 )
 
 try:
-	import boto3
-	from botocore.exceptions import ConnectionError, ClientError, EndpointConnectionError, NoRegionError
+    import boto3
+    from botocore.exceptions import ConnectionError, ClientError, EndpointConnectionError, NoRegionError
 except ImportError:
-	pass
+    logger.error("Unable to import boto3 module. Please install boto3: pip install boto3")
+    sys.exit(EC_GENERIC_ERROR)
 
 
 # Logger configuration
 logger = logging.getLogger()
 logger.propagate = False
 logger.setLevel(logging.INFO)
 logger.addHandler(SyslogLibHandler())
 logging.getLogger('botocore.vendored').propagate = False
 
 status = {
-		"running": "on",
-		"stopped": "off",
-		"pending": "unknown",
-		"stopping": "unknown",
-		"shutting-down": "unknown",
-		"terminated": "unknown"
+    "running": "on",
+    "stopped": "off",
+    "pending": "unknown",
+    "stopping": "unknown",
+    "shutting-down": "unknown",
+    "terminated": "unknown"
 }
 
 def get_power_status(conn, options):
-	logger.debug("Starting status operation")
-	try:
-		instance_id = options["--plug"]
-		ec2_client = conn.meta.client
-
-		# Get the lastfence tag first
-		lastfence_response = ec2_client.describe_tags(
-			Filters=[
-				{"Name": "resource-id", "Values": [instance_id]},
-				{"Name": "key", "Values": ["lastfence"]}
-			]
-		)
-
-		if not lastfence_response["Tags"]:
-			logger.debug("No lastfence tag found for instance %s - instance is not fenced", instance_id)
-			return "on"
-
-		lastfence_timestamp = lastfence_response["Tags"][0]["Value"]
-
-		# Check for backup tags with pattern Original_SG_Backup_{instance_id}_*
-		response = ec2_client.describe_tags(
-			Filters=[
-				{"Name": "resource-id", "Values": [instance_id]},
-				{"Name": "key", "Values": [f"Original_SG_Backup_{instance_id}*"]}
-			]
-		)
-
-		if not response["Tags"]:
-			logger.debug("No backup tags found for instance %s - instance is not fenced", instance_id)
-			return "on"
-
-		# Loop through backup tags to find matching timestamp
-		for tag in response["Tags"]:
-			try:
-				backup_data = json.loads(tag["Value"])
-				backup_timestamp = backup_data.get("t")  # Using shortened timestamp field
-
-				if not backup_timestamp:
-					logger.debug("No timestamp found in backup data for tag %s", tag["Key"])
-					continue
-
-				# Validate timestamps match
-				if str(backup_timestamp) == str(lastfence_timestamp):
-					logger.debug("Found matching backup tag %s - instance is fenced", tag["Key"])
-					return "off"
-
-			except (json.JSONDecodeError, KeyError) as e:
-				logger.error(f"Failed to parse backup data for tag {tag['Key']}: {str(e)}")
-				continue
-
-		logger.debug("No backup tags with matching timestamp found - instance is not fenced")
-		return "on"
-
-	except ClientError:
-		fail_usage("Failed: Incorrect Access Key or Secret Key.")
-	except EndpointConnectionError:
-		fail_usage("Failed: Incorrect Region.")
-	except IndexError:
-		fail(EC_STATUS)
-	except Exception as e:
-		logger.error("Failed to get power status: %s", e)
-		fail(EC_STATUS)
+    logger.debug("Starting status operation")
+    try:
+        instance_id = options["--plug"]
+        ec2_client = conn.meta.client
+
+        # Get the lastfence tag first
+        lastfence_response = ec2_client.describe_tags(
+            Filters=[
+                {"Name": "resource-id", "Values": [instance_id]},
+                {"Name": "key", "Values": ["lastfence"]}
+            ]
+        )
+
+        # Helper function to check if security groups have been modified
+        def check_sg_modifications():
+            try:
+                state, _, interfaces = get_instance_details(ec2_client, instance_id)
+                if state == "running":  # Only check SGs if instance is running
+                    sg_to_remove = options.get("--secg", "").split(",") if options.get("--secg") else []
+                    if sg_to_remove:
+                        # Check if all interfaces have had their security groups modified
+                        all_interfaces_fenced = True
+                        for interface in interfaces:
+                            current_sgs = interface["SecurityGroups"]
+                            if "--invert-sg-removal" in options:
+                                # In keep_only mode, check if interface only has the specified groups
+                                if sorted(current_sgs) != sorted(sg_to_remove):
+                                    logger.debug(f"Interface {interface['NetworkInterfaceId']} still has different security groups")
+                                    all_interfaces_fenced = False
+                                    break
+                            else:
+                                # In remove mode, check if specified groups were removed
+                                if any(sg in current_sgs for sg in sg_to_remove):
+                                    logger.debug(f"Interface {interface['NetworkInterfaceId']} still has security groups that should be removed")
+                                    all_interfaces_fenced = False
+                                    break
+
+                        if all_interfaces_fenced:
+                            logger.debug("All interfaces have had their security groups successfully modified - considering instance fenced")
+                            return True
+            except Exception as e:
+                logger.debug("Failed to check security group modifications: %s", e)
+            return False
+
+        # If --ignore-tag-write-failure is set, prioritize checking SG modifications
+        if "--ignore-tag-write-failure" in options:
+            logger.debug("--ignore-tag-write-failure is set, checking security group modifications first")
+            if check_sg_modifications():
+                logger.info("All interfaces are properly fenced based on security group state, ignoring tag state")
+                return "off"
+            logger.debug("Not all interfaces are fenced, proceeding with tag checks")
+            # Only proceed with tag checks if we haven't determined state from SG modifications
+
+        try:
+            # If no lastfence tag exists, instance is not fenced
+            if not lastfence_response["Tags"]:
+                logger.debug("No lastfence tag found for instance %s - instance is not fenced", instance_id)
+                return "on"
+
+            lastfence_timestamp = lastfence_response["Tags"][0]["Value"]
+        except Exception as e:
+            if "--ignore-tag-write-failure" in options:
+                logger.warning(f"Failed to check lastfence tag but continuing due to ignore-tag-write-failure: {str(e)}")
+                # If we can't read tags but --ignore-tag-write-failure is set, rely on SG state
+                return "on"  # Default to "on" to allow fence operation to proceed
+            raise
+
+        # Check for backup tags with pattern Original_SG_Backup_{instance_id}_*
+        response = ec2_client.describe_tags(
+            Filters=[
+                {"Name": "resource-id", "Values": [instance_id]},
+                {"Name": "key", "Values": [f"Original_SG_Backup_{instance_id}*"]}
+            ]
+        )
+
+        # If no backup tags exist, instance is not fenced (unless --ignore-tag-write-failure handled above)
+        if not response["Tags"]:
+            logger.debug("No backup tags found for instance %s - instance is not fenced", instance_id)
+            return "on"
+
+        # Loop through backup tags to find matching timestamp
+        for tag in response["Tags"]:
+            try:
+                backup_data = json.loads(tag["Value"])
+                backup_timestamp = backup_data.get("t")  # Using shortened timestamp field
+
+                if not backup_timestamp:
+                    logger.debug("No timestamp found in backup data for tag %s", tag["Key"])
+                    continue
+
+                # Validate timestamps match
+                if str(backup_timestamp) == str(lastfence_timestamp):
+                    # Check if security groups were actually modified to confirm fencing
+                    try:
+                        state, _, interfaces = get_instance_details(ec2_client, instance_id)
+                        if state == "running":  # Only check SGs if instance is running
+                            sg_to_remove = options.get("--secg", "").split(",") if options.get("--secg") else []
+                            if sg_to_remove:
+                                # Check if all interfaces have had their security groups modified
+                                all_interfaces_fenced = True
+                                for interface in interfaces:
+                                    current_sgs = interface["SecurityGroups"]
+                                    if "--invert-sg-removal" in options:
+                                        # In keep_only mode, check if interface only has the specified groups
+                                        if sorted(current_sgs) != sorted(sg_to_remove):
+                                            logger.debug(f"Interface {interface['NetworkInterfaceId']} still has different security groups")
+                                            all_interfaces_fenced = False
+                                            break
+                                    else:
+                                        # In remove mode, check if specified groups were removed
+                                        if any(sg in current_sgs for sg in sg_to_remove):
+                                            logger.debug(f"Interface {interface['NetworkInterfaceId']} still has security groups that should be removed")
+                                            all_interfaces_fenced = False
+                                            break
+
+                                if all_interfaces_fenced:
+                                    logger.debug("Found matching backup tag %s and verified all interfaces have SG changes - instance is fenced", tag["Key"])
+                                    return "off"
+                    except Exception as e:
+                        logger.debug("Failed to check security group modifications: %s", e)
+                        # If we can't verify SG changes but have matching tags, assume fenced for backward compatibility
+                        logger.debug("Found matching backup tag %s but couldn't verify SG changes - assuming instance is fenced", tag["Key"])
+                        return "off"
+
+            except (json.JSONDecodeError, KeyError) as e:
+                logger.error(f"Failed to parse backup data for tag {tag['Key']}: {str(e)}")
+                continue
+
+        logger.debug("No backup tags with matching timestamp found - instance is not fenced")
+        return "on"
+
+    except ClientError:
+        fail_usage("Failed: Incorrect Access Key or Secret Key.")
+    except EndpointConnectionError:
+        fail_usage("Failed: Incorrect Region.")
+    except IndexError:
+        fail(EC_STATUS)
+    except Exception as e:
+        logger.error("Failed to get power status: %s", e)
+        fail(EC_STATUS)
 
 # Retrieve instance ID for self-check
 def get_instance_id():
     """Retrieve the instance ID of the current EC2 instance."""
     try:
         token = requests.put(
             "http://169.254.169.254/latest/api/token",
             headers={"X-aws-ec2-metadata-token-ttl-seconds": "21600"},
         ).content.decode("UTF-8")
         instance_id = requests.get(
             "http://169.254.169.254/latest/meta-data/instance-id",
             headers={"X-aws-ec2-metadata-token": token},
         ).content.decode("UTF-8")
         return instance_id
     except Exception as err:
         logger.error("Failed to retrieve instance ID for self-check: %s", err)
         return None
 
 
 # Retrieve instance details
 def get_instance_details(ec2_client, instance_id):
     """Retrieve instance details including state, VPC, interfaces, and attached SGs."""
     try:
         response = ec2_client.describe_instances(InstanceIds=[instance_id])
         instance = response["Reservations"][0]["Instances"][0]
 
         instance_state = instance["State"]["Name"]
         vpc_id = instance["VpcId"]
         network_interfaces = instance["NetworkInterfaces"]
 
         interfaces = []
         for interface in network_interfaces:
             try:
                 interfaces.append(
                     {
                         "NetworkInterfaceId": interface["NetworkInterfaceId"],
                         "SecurityGroups": [sg["GroupId"] for sg in interface["Groups"]],
                     }
                 )
             except KeyError as e:
                 logger.error(f"Malformed interface data: {str(e)}")
                 continue
 
         return instance_state, vpc_id, interfaces
 
     except ClientError as e:
         logger.error(f"AWS API error while retrieving instance details: {str(e)}")
         raise
     except IndexError as e:
         logger.error(f"Instance {instance_id} not found or no instances returned: {str(e)}")
         raise
     except KeyError as e:
         logger.error(f"Unexpected response format from AWS API: {str(e)}")
         raise
     except Exception as e:
         logger.error(f"Unexpected error while retrieving instance details: {str(e)}")
         raise
 
 # Check if we are the self-fencing node
 def get_self_power_status(conn, instance_id):
-	try:
-		instance = conn.instances.filter(Filters=[{"Name": "instance-id", "Values": [instance_id]}])
-		state = list(instance)[0].state["Name"]
-		if state == "running":
-			logger.debug(f"Captured my ({instance_id}) state and it {state.upper()} - returning OK - Proceeding with fencing")
-			return "ok"
-		else:
-			logger.debug(f"Captured my ({instance_id}) state it is {state.upper()} - returning Alert - Unable to fence other nodes")
-			return "alert"
-
-	except ClientError:
-		fail_usage("Failed: Incorrect Access Key or Secret Key.")
-	except EndpointConnectionError:
-		fail_usage("Failed: Incorrect Region.")
-	except IndexError:
-		return "fail"
+    try:
+        instance = conn.instances.filter(Filters=[{"Name": "instance-id", "Values": [instance_id]}])
+        state = list(instance)[0].state["Name"]
+        if state == "running":
+            logger.debug(f"Captured my ({instance_id}) state and it {state.upper()} - returning OK - Proceeding with fencing")
+            return "ok"
+        else:
+            logger.debug(f"Captured my ({instance_id}) state it is {state.upper()} - returning Alert - Unable to fence other nodes")
+            return "alert"
+
+    except ClientError:
+        fail_usage("Failed: Incorrect Access Key or Secret Key.")
+    except EndpointConnectionError:
+        fail_usage("Failed: Incorrect Region.")
+    except IndexError:
+        return "fail"
 
 # Create backup tags for each network interface
 def create_backup_tag(ec2_client, instance_id, interfaces, timestamp):
     """Create tags on the instance to backup original security groups for each network interface.
     If the security groups list is too long, it will be split across multiple tags."""
     try:
         # Create tags for each network interface
         for idx, interface in enumerate(interfaces, 1):
             interface_id = interface["NetworkInterfaceId"]
             security_groups = interface["SecurityGroups"]
 
             # Initialize variables for chunking
             sg_chunks = []
             current_chunk = []
 
             # Strip 'sg-' prefix from all security groups first
             stripped_sgs = [sg[3:] if sg.startswith('sg-') else sg for sg in security_groups]
 
             for sg in stripped_sgs:
                 # Create a test chunk with the new security group
                 test_chunk = current_chunk + [sg]
 
                 # Create a test backup object with this chunk
                 test_backup = {
                     "n": {
                         "i": interface_id,
                         "s": test_chunk,
                         "c": {
                             "i": len(sg_chunks),
                             "t": 1  # Temporary value, will update later
                         }
                     },
                     "t": timestamp
                 }
 
                 # Check if adding this SG would exceed the character limit
                 if len(json.dumps(test_backup)) > 254:
                     # Current chunk is full, add it to chunks and start a new one
                     if current_chunk:  # Only add if not empty
                         sg_chunks.append(current_chunk)
                         current_chunk = [sg]
                     else:
                         # Edge case: single SG exceeds limit (shouldn't happen with normal SG IDs)
                         logger.warning(f"Security group ID {sg} is unusually long")
                         sg_chunks.append([sg])
                 else:
                     # Add SG to current chunk
                     current_chunk = test_chunk
 
             # Add the last chunk if it has any items
             if current_chunk:
                 sg_chunks.append(current_chunk)
 
             # Update total chunks count and create tags
             for chunk_idx, sg_chunk in enumerate(sg_chunks):
 
                 sg_backup = {
                     "n": {  # NetworkInterface shortened to n
                         "i": interface_id,  # ni shortened to i
                         "s": sg_chunk,  # sg shortened to s, with 'sg-' prefix stripped
                         "c": {              # ci shortened to c
                             "i": chunk_idx,
                             "t": len(sg_chunks)
                         }
                     },
                     "t": timestamp  # ts shortened to t
                 }
                 tag_value = json.dumps(sg_backup)
                 tag_key = f"Original_SG_Backup_{instance_id}_{timestamp}_{idx}_{chunk_idx}"
 
                 # Create the tag
                 ec2_client.create_tags(
                     Resources=[instance_id],
                     Tags=[{"Key": tag_key, "Value": tag_value}],
                 )
 
                 # Verify the tag was created
                 response = ec2_client.describe_tags(
                     Filters=[
                         {"Name": "resource-id", "Values": [instance_id]},
                         {"Name": "key", "Values": [tag_key]}
                     ]
                 )
 
                 if not response["Tags"]:
                     logger.error(f"Failed to verify creation of backup tag '{tag_key}' for instance {instance_id}")
                     raise Exception("Backup tag creation could not be verified")
 
                 created_tag_value = response["Tags"][0]["Value"]
                 if created_tag_value != tag_value:
                     logger.error(f"Created tag value does not match expected value for instance {instance_id}")
                     raise Exception("Backup tag value mismatch")
 
                 logger.info(f"Backup tag '{tag_key}' chunk {chunk_idx + 1}/{len(sg_chunks)} created and verified for interface {interface_id}.")
     except ClientError as e:
         logger.error(f"AWS API error while creating/verifying backup tag: {str(e)}")
         raise
     except Exception as e:
         logger.error(f"Unexpected error while creating/verifying backup tag: {str(e)}")
         raise
 
 
-def modify_security_groups(ec2_client, instance_id, sg_list, timestamp, mode="remove"):
+def modify_security_groups(ec2_client, instance_id, sg_list, timestamp, mode="remove", options=None):
     """
     Modifies security groups on network interfaces based on the specified mode.
     In 'remove' mode: Removes all SGs in sg_list from each interface
     In 'keep_only' mode: Keeps only the SGs in sg_list and removes all others
 
     Args:
         ec2_client: The boto3 EC2 client
         instance_id: The ID of the EC2 instance
         sg_list: List of security group IDs to remove or keep
         timestamp: Unix timestamp for backup tag
         mode: Either "remove" or "keep_only" to determine operation mode
 
     Raises:
         ClientError: If AWS API calls fail
         Exception: For other unexpected errors
     """
     try:
         # Get instance details
         state, _, interfaces = get_instance_details(ec2_client, instance_id)
 
+        # Create a backup tag before making any changes
         try:
-            # Create a backup tag before making changes
             create_backup_tag(ec2_client, instance_id, interfaces, timestamp)
-        except ClientError as e:
-            logger.warning(f"Failed to create backup tag: {str(e)}")
-            # Continue execution even if backup tag creation fails
+            try:
+                set_lastfence_tag(ec2_client, instance_id, timestamp)
+            except Exception as e:
+                if "--ignore-tag-write-failure" in options:
+                    logger.warning(f"Failed to set lastfence tag but continuing due to --ignore-tag-write-failure: {str(e)}")
+                    logger.info("Will rely on security group state for fencing status")
+                else:
+                    logger.error(f"Failed to set lastfence tag: {str(e)}")
+                    raise
+        except Exception as e:
+            if "--ignore-tag-write-failure" in options:
+                logger.warning(f"Failed to create backup tag but continuing due to --ignore-tag-write-failure: {str(e)}")
+                logger.info("Will rely on security group state for fencing status")
+            else:
+                logger.error(f"Failed to create backup tag: {str(e)}")
+                raise
 
         changed_any = False
         for interface in interfaces:
             try:
                 original_sgs = interface["SecurityGroups"]
 
                 if mode == "remove":
                     # Exclude any SGs that are in sg_list
                     updated_sgs = [sg for sg in original_sgs if sg not in sg_list]
                     operation_desc = f"removing {sg_list}"
                 else:  # keep_only mode
                     # Set interface to only use the specified security groups
                     updated_sgs = sg_list
                     operation_desc = f"keeping only {sg_list}"
 
                 # Skip if we'd end up with zero SGs (only in remove mode)
                 if mode == "remove" and not updated_sgs:
                     logger.info(
                         f"Skipping interface {interface['NetworkInterfaceId']}: "
                         f"removal of {sg_list} would leave 0 SGs."
                     )
                     continue
 
                 # Skip if no changes needed
                 if updated_sgs == original_sgs:
                     continue
 
                 logger.info(
                     f"Updating interface {interface['NetworkInterfaceId']} from {original_sgs} "
                     f"to {updated_sgs} ({operation_desc})"
                 )
 
                 try:
                     ec2_client.modify_network_interface_attribute(
                         NetworkInterfaceId=interface["NetworkInterfaceId"],
                         Groups=updated_sgs
                     )
                     changed_any = True
                 except ClientError as e:
                     logger.error(
                         f"Failed to modify security groups for interface "
                         f"{interface['NetworkInterfaceId']}: {str(e)}"
                     )
                     continue
 
             except KeyError as e:
                 logger.error(f"Malformed interface data: {str(e)}")
                 continue
 
-        # If we didn't modify anything, log appropriate error
+        # If we didn't modify anything, raise an error
         if not changed_any:
             if mode == "remove":
                 error_msg = f"Security Groups {sg_list} not removed from any interface. Either not found, or removal left 0 SGs."
             else:
                 error_msg = f"Security Groups {sg_list} not found on any interface. No changes made."
             logger.error(error_msg)
-            sys.exit(EC_GENERIC_ERROR)
+            raise Exception("Failed to modify security groups: " + error_msg)
 
         # Wait a bit for changes to propagate
         time.sleep(5)
 
     except ClientError as e:
         logger.error(f"AWS API error: {str(e)}")
         raise
     except Exception as e:
         logger.error(f"Unexpected error: {str(e)}")
         raise
 
 def restore_security_groups(ec2_client, instance_id):
     """
     Restores the original security groups from backup tags to each network interface.
     Each network interface's original security groups are stored in a separate backup tag.
     All backup tags share the same timestamp as the lastfence tag for validation.
 
     The process:
     1. Get lastfence tag timestamp
     2. Find all backup tags with matching timestamp
     3. Create a map of interface IDs to their original security groups
     4. Restore each interface's security groups from the map
     5. Clean up matching backup tags and lastfence tag
 
     Args:
         ec2_client: The boto3 EC2 client
         instance_id: The ID of the EC2 instance
 
     Raises:
         ClientError: If AWS API calls fail
         Exception: For other unexpected errors
         SystemExit: If required tags are missing or no changes were made
     """
     try:
         # Get the lastfence tag first
         lastfence_response = ec2_client.describe_tags(
             Filters=[
                 {"Name": "resource-id", "Values": [instance_id]},
                 {"Name": "key", "Values": ["lastfence"]}
             ]
         )
 
         if not lastfence_response["Tags"]:
             logger.error(f"No lastfence tag found for instance {instance_id}")
             sys.exit(EC_GENERIC_ERROR)
 
         lastfence_timestamp = lastfence_response["Tags"][0]["Value"]
 
         # Get all backup tags for this instance
         backup_response = ec2_client.describe_tags(
             Filters=[
                 {"Name": "resource-id", "Values": [instance_id]},
                 {"Name": "key", "Values": [f"Original_SG_Backup_{instance_id}*"]}
             ]
         )
 
         if not backup_response["Tags"]:
             logger.error(f"No backup tags found for instance {instance_id}")
             sys.exit(EC_GENERIC_ERROR)
 
         # Find and combine backup tags with matching timestamp
         matching_backups = {}
         interface_chunks = {}
 
         for tag in backup_response["Tags"]:
             try:
                 backup_data = json.loads(tag["Value"])
                 backup_timestamp = backup_data.get("t")  # Using shortened timestamp field
 
                 if not backup_timestamp or str(backup_timestamp) != str(lastfence_timestamp):
                     continue
 
                 logger.info(f"Found matching backup tag {tag['Key']}")
                 interface_data = backup_data.get("n")  # Using shortened NetworkInterface field
 
                 if not interface_data or "i" not in interface_data:  # Using shortened interface id field
                     continue
 
                 interface_id = interface_data["i"]  # Using shortened interface id field
                 chunk_info = interface_data.get("c", {})  # Using shortened chunk info field
                 chunk_index = chunk_info.get("i", 0)
                 total_chunks = chunk_info.get("t", 1)
 
                 # Initialize tracking for this interface if needed
                 if interface_id not in interface_chunks:
                     interface_chunks[interface_id] = {
                         "total": total_chunks,
                         "chunks": {},
                         "security_groups": []
                     }
 
                 # Add this chunk's security groups
                 interface_chunks[interface_id]["chunks"][chunk_index] = interface_data["s"]  # Using shortened security groups field
 
                 # If we have all chunks for this interface, combine them
                 if len(interface_chunks[interface_id]["chunks"]) == total_chunks:
                     # Combine chunks and restore 'sg-' prefix
                     combined_sgs = []
                     for i in range(total_chunks):
                         chunk_sgs = interface_chunks[interface_id]["chunks"][i]
                         # Add back 'sg-' prefix if not already present
                         restored_sgs = ['sg-' + sg if not sg.startswith('sg-') else sg for sg in chunk_sgs]
                         combined_sgs.extend(restored_sgs)
                     matching_backups[interface_id] = combined_sgs
 
             except (json.JSONDecodeError, KeyError) as e:
                 logger.error(f"Failed to parse backup data for tag {tag['Key']}: {str(e)}")
                 continue
 
         if not matching_backups:
             logger.error("No complete backup data found with matching timestamp")
             sys.exit(EC_GENERIC_ERROR)
 
         # Get current interfaces
         _, _, current_interfaces = get_instance_details(ec2_client, instance_id)
 
         # Use the combined matching_backups as our backup_sg_map
         backup_sg_map = matching_backups
 
         changed_any = False
         for interface in current_interfaces:
             try:
                 interface_id = interface["NetworkInterfaceId"]
                 if interface_id not in backup_sg_map:
                     logger.warning(
                         f"No backup data found for interface {interface_id}. Skipping."
                     )
                     continue
 
                 original_sgs = backup_sg_map[interface_id]
                 current_sgs = interface["SecurityGroups"]
 
                 if original_sgs == current_sgs:
                     logger.info(
                         f"Interface {interface_id} already has original security groups. Skipping."
                     )
                     continue
 
                 logger.info(
                     f"Restoring interface {interface_id} from {current_sgs} "
                     f"to original security groups {original_sgs}"
                 )
 
                 try:
                     ec2_client.modify_network_interface_attribute(
                         NetworkInterfaceId=interface_id,
                         Groups=original_sgs
                     )
                     changed_any = True
                 except ClientError as e:
                     logger.error(
                         f"Failed to restore security groups for interface "
                         f"{interface_id}: {str(e)}"
                     )
                     continue
 
             except KeyError as e:
                 logger.error(f"Malformed interface data: {str(e)}")
                 continue
 
         if not changed_any:
             logger.error("No security groups were restored. All interfaces skipped.")
             sys.exit(EC_GENERIC_ERROR)
 
         # Wait for changes to propagate
         time.sleep(5)
 
         # Clean up only the matching backup tags and lastfence tag after successful restore
         try:
             # Delete all backup tags that match the lastfence timestamp
             tags_to_delete = [{"Key": "lastfence"}]
             deleted_tag_keys = []
             for tag in backup_response["Tags"]:
                 try:
                     backup_data = json.loads(tag["Value"])
                     if str(backup_data.get("t")) == str(lastfence_timestamp):  # Using shortened timestamp field
                         tags_to_delete.append({"Key": tag["Key"]})
                         deleted_tag_keys.append(tag["Key"])
                 except (json.JSONDecodeError, KeyError):
                     continue
 
             if len(tags_to_delete) > 1:  # More than just the lastfence tag
                 ec2_client.delete_tags(
                     Resources=[instance_id],
                     Tags=tags_to_delete
                 )
                 logger.info(f"Removed matching backup tags {deleted_tag_keys} and lastfence tag from instance {instance_id}")
         except ClientError as e:
             logger.warning(f"Failed to remove tags: {str(e)}")
             # Continue since the restore operation was successful
 
     except ClientError as e:
         logger.error(f"AWS API error: {str(e)}")
         raise
     except Exception as e:
         logger.error(f"Unexpected error: {str(e)}")
         raise
 
 # Shutdown instance
 def shutdown_instance(ec2_client, instance_id):
     """Shutdown the instance and confirm the state transition."""
     try:
         logger.info(f"Initiating shutdown for instance {instance_id}...")
         ec2_client.stop_instances(InstanceIds=[instance_id], Force=True)
 
         while True:
             try:
                 state, _, _ = get_instance_details(ec2_client, instance_id)
                 logger.info(f"Current instance state: {state}")
                 if state == "stopping":
                     logger.info(
                         f"Instance {instance_id} is transitioning to 'stopping'. Proceeding without waiting further."
                     )
                     break
             except ClientError as e:
                 logger.error(f"Failed to get instance state during shutdown: {str(e)}")
                 fail_usage(f"AWS API error while checking instance state: {str(e)}")
             except Exception as e:
                 logger.error(f"Unexpected error while checking instance state: {str(e)}")
                 fail_usage(f"Failed to check instance state: {str(e)}")
 
     except ClientError as e:
         logger.error(f"AWS API error during instance shutdown: {str(e)}")
         fail_usage(f"Failed to shutdown instance: {str(e)}")
     except Exception as e:
         logger.error(f"Unexpected error during instance shutdown: {str(e)}")
         fail_usage(f"Failed to shutdown instance due to unexpected error: {str(e)}")
 
 
 # Perform the fencing action
 def get_nodes_list(conn, options):
     """Get list of nodes and their status."""
     logger.debug("Starting monitor operation")
     result = {}
     try:
         if "--filter" in options:
             filter_key = options["--filter"].split("=")[0].strip()
             filter_value = options["--filter"].split("=")[1].strip()
             filter = [{"Name": filter_key, "Values": [filter_value]}]
             logging.debug("Filter: {}".format(filter))
 
         for instance in conn.instances.filter(Filters=filter if 'filter' in vars() else []):
             instance_name = ""
             for tag in instance.tags or []:
                 if tag.get("Key") == "Name":
                     instance_name = tag["Value"]
             try:
                 result[instance.id] = (instance_name, status[instance.state["Name"]])
             except KeyError as e:
                 if options.get("--original-action") == "list-status":
                     logger.error("Unknown status \"{}\" returned for {} ({})".format(instance.state["Name"], instance.id, instance_name))
                 result[instance.id] = (instance_name, "unknown")
     except Exception as e:
         logger.error("Failed to get node list: %s", e)
     return result
 
 def set_lastfence_tag(ec2_client, instance_id, timestamp):
     """Set a lastfence tag on the instance with the timestamp."""
     try:
         ec2_client.create_tags(
             Resources=[instance_id],
             Tags=[{"Key": "lastfence", "Value": str(timestamp)}]
         )
         logger.info(f"Set lastfence tag with timestamp {timestamp} on instance {instance_id}")
     except Exception as e:
         logger.error(f"Failed to set lastfence tag: {str(e)}")
         raise
 
 def set_power_status(conn, options):
     """Set power status of the instance."""
     timestamp = int(time.time())  # Unix timestamp
     ec2_client = conn.meta.client
     instance_id = options["--plug"]
     sg_to_remove = options.get("--secg", "").split(",") if options.get("--secg") else []
 
     # Perform self-check if skip-race not set
     if "--skip-race-check" not in options:
         self_instance_id = get_instance_id()
         if self_instance_id == instance_id:
             fail_usage("Self-fencing detected. Exiting.")
 
     try:
         # Only verify instance is running for 'off' action
         if options["--action"] == "off":
             instance_state, _, _ = get_instance_details(ec2_client, instance_id)
             if instance_state != "running":
                 fail_usage(f"Instance {instance_id} is not running. Exiting.")
 
         if options["--action"] == "on":
             if not "--unfence-ignore-restore" in options:
                 restore_security_groups(ec2_client, instance_id)
             else:
                 logger.info("Ignored Restoring security groups as --unfence-ignore-restore is set")
         elif options["--action"] == "off":
             if sg_to_remove:
                 mode = "keep_only" if "--invert-sg-removal" in options else "remove"
-                modify_security_groups(ec2_client, instance_id, sg_to_remove, timestamp, mode)
-                set_lastfence_tag(ec2_client, instance_id, timestamp)
-                if "--onfence-poweroff" in options:
-                    shutdown_instance(ec2_client, instance_id)
+                try:
+                    modify_security_groups(ec2_client, instance_id, sg_to_remove, timestamp, mode, options)
+                    if "--onfence-poweroff" in options:
+                        shutdown_instance(ec2_client, instance_id)
+                except Exception as e:
+                    if isinstance(e, ClientError):
+                        logger.error("AWS API error: %s", e)
+                        fail_usage(str(e))
+                    elif "--ignore-tag-write-failure" in options:
+                        # If we're ignoring tag failures, only fail if the security group modifications failed
+                        if "Failed to modify security groups" in str(e):
+                            logger.error("Failed to modify security groups: %s", e)
+                            fail(EC_STATUS)
+                        else:
+                            logger.warning("Ignoring error due to ignore-tag-write-failure: %s", e)
+                    else:
+                        logger.error("Failed to set power status: %s", e)
+                        fail(EC_STATUS)
     except Exception as e:
-        logger.error("Failed to set power status: %s", e)
+        logger.error("Unexpected error in set_power_status: %s", e)
         fail(EC_STATUS)
 
 
 # Define fencing agent options
 def define_new_opts():
     all_opt["port"]["help"] = "-n, --plug=[id]                AWS Instance ID to perform action on "
     all_opt["port"]["shortdesc"] = "AWS Instance ID to perform action on "
 
     all_opt["region"] = {
         "getopt": "r:",
         "longopt": "region",
         "help": "-r, --region=[region]          AWS region (e.g., us-east-1)",
         "shortdesc": "AWS Region.",
         "required": "0",
         "order": 1,
     }
     all_opt["access_key"] = {
         "getopt": "a:",
         "longopt": "access-key",
         "help": "-a, --access-key=[key]         AWS access key.",
         "shortdesc": "AWS Access Key.",
         "required": "0",
         "order": 2,
     }
     all_opt["secret_key"] = {
         "getopt": "s:",
         "longopt": "secret-key",
         "help": "-s, --secret-key=[key]         AWS secret key.",
         "shortdesc": "AWS Secret Key.",
         "required": "0",
         "order": 3,
     }
     all_opt["secg"] = {
         "getopt": ":",
         "longopt": "secg",
         "help": "--secg=[sg1,sg2,...]           Comma-separated list of Security Groups to remove.",
         "shortdesc": "Security Groups to remove.",
         "required": "0",
         "order": 4,
     }
     all_opt["skip_race_check"] = {
         "getopt": "",
         "longopt": "skip-race-check",
         "help": "--skip-race-check              Skip race condition check.",
         "shortdesc": "Skip race condition check.",
         "required": "0",
         "order": 6,
     }
     all_opt["invert-sg-removal"] = {
         "getopt": "",
         "longopt": "invert-sg-removal",
         "help": "--invert-sg-removal            Remove all security groups except the specified one(s).",
         "shortdesc": "Remove all security groups except specified..",
         "required": "0",
         "order": 7,
     }
     all_opt["unfence-ignore-restore"] = {
         "getopt": "",
         "longopt": "unfence-ignore-restore",
         "help": "--unfence-ignore-restore       Do not restore security groups from tag when unfencing (off).",
         "shortdesc": "Remove all security groups except specified..",
         "required": "0",
         "order": 8,
 
     }
     all_opt["filter"] = {
         "getopt": ":",
         "longopt": "filter",
         "help": "--filter=[key=value]           Filter (e.g. vpc-id=[vpc-XXYYZZAA])",
         "shortdesc": "Filter for list-action",
         "required": "0",
         "order": 9
     }
     all_opt["boto3_debug"] = {
         "getopt": "b:",
         "longopt": "boto3_debug",
         "help": "-b, --boto3_debug=[option]     Boto3 and Botocore library debug logging",
         "shortdesc": "Boto Lib debug",
         "required": "0",
         "default": "False",
         "order": 10
     }
     all_opt["onfence-poweroff"] = {
         "getopt": "",
         "longopt": "onfence-poweroff",
         "help": "--onfence-poweroff             Power off the machine async upon fence (this is a network fencing agent...)",
         "shortdesc": "Power off the machine async..",
         "required": "0",
         "order": 11
     }
+    all_opt["ignore-tag-write-failure"] = {
+        "getopt": "",
+        "longopt": "ignore-tag-write-failure",
+        "help": "--ignore-tag-write-failure     Continue to fence even if backup tag fails.  This ensures prioriization of fencing over AWS backplane access",
+        "shortdesc": "Continue to fence even if backup tag fails..",
+        "required": "0",
+        "order": 12
+    }
 
 
 def main():
     conn = None
 
     device_opt = [
         "no_password",
         "region",
         "access_key",
         "secret_key",
         "secg",
         "port",
         "skip_race_check",
         "invert-sg-removal",
         "unfence-ignore-restore",
         "filter",
         "boto3_debug",
-        "onfence-poweroff"
+        "onfence-poweroff",
+        "ignore-tag-write-failure"
 ]
 
     atexit.register(atexit_handler)
 
     define_new_opts()
 
     try:
         processed_input = process_input(device_opt)
         options = check_input(device_opt, processed_input)
     except Exception as e:
         logger.error(f"Failed to process input options: {str(e)}")
         sys.exit(EC_GENERIC_ERROR)
 
     run_delay(options)
 
     docs = {
         "shortdesc": "Fence agent for AWS (Amazon Web Services) Net",
         "longdesc": (
             "fence_aws_vpc is a Network and Power Fencing agent for AWS VPC that works by "
             "manipulating security groups. It uses the boto3 library to connect to AWS.\n\n"
             "boto3 can be configured with AWS CLI or by creating ~/.aws/credentials.\n"
             "For instructions see: https://boto3.readthedocs.io/en/latest/guide/quickstart.html#configuration"
             " "
             "NOTE: If onfence-poweroff is set, the agent won't be able to power on the node again, it will have to be powered on manually or with other automation."
         ),
         "vendorurl": "http://www.amazon.com"
     }
     show_docs(options, docs)
 
     if "--onfence-poweroff" not in options and options.get("--action", "") == "reboot":
         options["--action"] = "off"
 
     # Configure logging
     if "--debug-file" in options:
         for handler in logger.handlers:
             if isinstance(handler, logging.FileHandler):
                 logger.removeHandler(handler)
         lh = logging.FileHandler(options["--debug-file"])
         logger.addHandler(lh)
         lhf = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
         lh.setFormatter(lhf)
         lh.setLevel(logging.DEBUG)
 
     # Configure boto3 logging
     if options.get("--boto3_debug", "").lower() not in ["1", "yes", "on", "true"]:
         boto3.set_stream_logger('boto3', logging.INFO)
         boto3.set_stream_logger('botocore', logging.CRITICAL)
         logging.getLogger('botocore').propagate = False
         logging.getLogger('boto3').propagate = False
     else:
         log_format = logging.Formatter('%(asctime)s %(name)-12s %(levelname)-8s %(message)s')
         logging.getLogger('botocore').propagate = False
         logging.getLogger('boto3').propagate = False
         fdh = logging.FileHandler('/var/log/fence_aws_vpc_boto3.log')
         fdh.setFormatter(log_format)
         logging.getLogger('boto3').addHandler(fdh)
         logging.getLogger('botocore').addHandler(fdh)
         logging.debug("Boto debug level is %s and sending debug info to /var/log/fence_aws_vpc_boto3.log",
                      options.get("--boto3_debug"))
 
     # Establish AWS connection
     region = options.get("--region")
     access_key = options.get("--access-key")
     secret_key = options.get("--secret-key")
 
     try:
         conn = boto3.resource(
             "ec2",
             region_name=region,
             aws_access_key_id=access_key,
             aws_secret_access_key=secret_key,
         )
     except Exception as e:
         if not options.get("--action", "") in ["metadata", "manpage", "validate-all"]:
             fail_usage("Failed: Unable to connect to AWS: " + str(e))
         else:
             pass
 
     # Operate the fencing device using the fence library's fence_action
     result = fence_action(conn, options, set_power_status, get_power_status, get_nodes_list)
     sys.exit(result)
 
 
 if __name__ == "__main__":
     main()
+
diff --git a/agents/aws_vpc_net/readme.md b/agents/aws_vpc_net/readme.md
index 47f467ea..de1a12e5 100644
--- a/agents/aws_vpc_net/readme.md
+++ b/agents/aws_vpc_net/readme.md
@@ -1,328 +1,461 @@
 # Fence AWS VPC Network Agent Design Document
 
 ## Overview
 
 The fence_aws_vpc_net agent is a network and power fencing agent for AWS VPC that operates by manipulating security groups. This document outlines the design and architecture of the agent.
 
 ## Class Diagram
 
 ```mermaid
 classDiagram
     class FenceAWSVPCNet {
         -logger: Logger
         -conn: boto3.resource
         -options: dict
         +main()
         +define_new_opts()
         +process_input()
         +check_input()
+        +get_power_status()
+        +set_power_status()
     }
 
-    class AWSConnection {
-        -region: str
-        -access_key: str
-        -secret_key: str
-        +establish_connection()
-        +validate_credentials()
+    class InstanceOperations {
+        +get_instance_id()
+        +get_instance_details()
+        +shutdown_instance()
+        +get_nodes_list()
     }
 
-    class SecurityGroupManager {
+    class SecurityGroupOperations {
         +modify_security_groups()
         +create_backup_tag()
         +restore_security_groups()
         -validate_sg_changes()
     }
 
-    class InstanceManager {
-        +get_instance_details()
-        +shutdown_instance()
-        +get_power_status()
-        +set_power_status()
-        -validate_instance_state()
-    }
-
-    class TagManager {
+    class TagOperations {
         +set_lastfence_tag()
-        +get_backup_tags()
-        +cleanup_tags()
-        -validate_tag_operations()
+        +create_backup_tag()
+        +restore_from_backup()
+        -handle_chunked_tags()
     }
 
-    FenceAWSVPCNet --> AWSConnection
-    FenceAWSVPCNet --> SecurityGroupManager
-    FenceAWSVPCNet --> InstanceManager
-    SecurityGroupManager --> TagManager
-    InstanceManager --> TagManager
+    class LoggingManager {
+        +configure_logging()
+        +configure_boto3_logging()
+        +handle_debug_file()
+    }
 
+    FenceAWSVPCNet --> InstanceOperations
+    FenceAWSVPCNet --> SecurityGroupOperations
+    FenceAWSVPCNet --> TagOperations
+    FenceAWSVPCNet --> LoggingManager
+    SecurityGroupOperations --> TagOperations
 ```
 
 ## Sequence Diagrams
 
 ### Fence Operation (Power Off)
 
 ```mermaid
 sequenceDiagram
     participant Client
     participant FenceAgent
     participant AWS
     participant SecurityGroups
     participant Tags
 
     Client->>FenceAgent: Execute fence operation
     FenceAgent->>AWS: Validate AWS credentials
     AWS-->>FenceAgent: Credentials valid
 
+    opt skip-race-check not set
+        FenceAgent->>AWS: Get self instance ID
+        AWS-->>FenceAgent: Instance ID
+        FenceAgent->>FenceAgent: Check for self-fencing
+    end
+
     FenceAgent->>AWS: Get instance details
     AWS-->>FenceAgent: Instance details
 
     alt Instance is running
         FenceAgent->>SecurityGroups: Backup current security groups
         SecurityGroups-->>FenceAgent: Backup created
 
-        FenceAgent->>Tags: Create lastfence tag
-        Tags-->>FenceAgent: Tag created
+        alt ignore-tag-write-failure not set
+            FenceAgent->>Tags: Create lastfence tag
+            Tags-->>FenceAgent: Tag created
+        end
 
         FenceAgent->>SecurityGroups: Modify security groups
         SecurityGroups-->>FenceAgent: Groups modified
 
         opt onfence-poweroff enabled
             FenceAgent->>AWS: Initiate shutdown
             AWS-->>FenceAgent: Shutdown initiated
         end
 
         FenceAgent-->>Client: Success
     else Instance not running
         FenceAgent-->>Client: Fail - Instance not running
     end
 ```
 
 ### Unfence Operation (Power On)
 
 ```mermaid
 sequenceDiagram
     participant Client
     participant FenceAgent
     participant AWS
     participant SecurityGroups
     participant Tags
 
     Client->>FenceAgent: Execute unfence operation
     FenceAgent->>AWS: Validate AWS credentials
     AWS-->>FenceAgent: Credentials valid
 
-    FenceAgent->>Tags: Get lastfence tag
-    Tags-->>FenceAgent: Lastfence tag
+    alt unfence-ignore-restore not set
+        FenceAgent->>Tags: Get lastfence tag
+        Tags-->>FenceAgent: Lastfence tag
 
-    FenceAgent->>Tags: Get backup tags
-    Tags-->>FenceAgent: Backup tags
+        FenceAgent->>Tags: Get backup tags
+        Tags-->>FenceAgent: Backup tags
 
-    alt Valid backup found
-        FenceAgent->>SecurityGroups: Restore original security groups
-        SecurityGroups-->>FenceAgent: Groups restored
+        alt Valid backup found
+            FenceAgent->>SecurityGroups: Restore original security groups
+            SecurityGroups-->>FenceAgent: Groups restored
 
-        FenceAgent->>Tags: Cleanup backup tags
-        Tags-->>FenceAgent: Tags cleaned
+            FenceAgent->>Tags: Cleanup backup tags
+            Tags-->>FenceAgent: Tags cleaned
 
-        FenceAgent-->>Client: Success
-    else No valid backup
-        FenceAgent-->>Client: Fail - No valid backup found
+            FenceAgent-->>Client: Success
+        else No valid backup
+            FenceAgent-->>Client: Fail - No valid backup found
+        end
+    else
+        FenceAgent-->>Client: Success - Restore skipped
     end
 ```
 
 ## Component Details
 
 ### 1. Main Controller (FenceAWSVPCNet)
 - **Purpose**: Main entry point and orchestration
 - **Key Responsibilities**:
   - Process command line options
   - Initialize AWS connection
   - Execute fence operations
   - Handle logging and errors
+  - Manage self-fencing prevention
+  - Support tag write failure handling
 
-### 2. AWS Connection Manager
-- **Purpose**: Handle AWS connectivity
+### 2. Instance Operations
+- **Purpose**: Handle EC2 instance operations
 - **Key Responsibilities**:
-  - Establish and maintain AWS connection
-  - Handle credentials and regions
-  - Manage API retries and timeouts
+  - Get instance details and metadata
+  - Handle instance power operations
+  - Validate instance states
+  - List and filter instances
+  - Handle instance shutdown
 
-### 3. Security Group Manager
+### 3. Security Group Operations
 - **Purpose**: Manage security group operations
 - **Key Responsibilities**:
-  - Modify security groups
-  - Create backups of security group configurations
+  - Modify security groups (remove or keep-only modes)
+  - Handle chunked backup operations
   - Restore security groups from backups
   - Validate security group changes
+  - Support partial success scenarios
 
-### 4. Instance Manager
-- **Purpose**: Handle EC2 instance operations
-- **Key Responsibilities**:
-  - Get instance details and status
-  - Handle instance power operations
-  - Validate instance states
-  - Manage self-fencing prevention
-
-### 5. Tag Manager
+### 4. Tag Operations
 - **Purpose**: Manage AWS resource tagging
 - **Key Responsibilities**:
   - Create and manage backup tags
-  - Handle lastfence tags
+  - Handle chunked tag data
+  - Manage lastfence tags
   - Clean up tags after operations
-  - Validate tag operations
+  - Support tag write failure scenarios
+
+### 5. Logging Manager
+- **Purpose**: Handle logging configuration
+- **Key Responsibilities**:
+  - Configure application logging
+  - Manage boto3 debug logging
+  - Handle debug file output
+  - Control log propagation
 
 ## Success and Failure Paths
 
 ### Success Paths
 
-1. **Normal Fence Operation**
+1. **Normal Fence Operation (Without ignore-tag-write-failure)**
 ```
 Start
 ├── Validate AWS credentials
+├── Check for self-fencing (if enabled)
 ├── Check instance is running
-├── Backup security groups
+├── Backup security groups (with chunking)
+│   ├── Create backup tags for each interface
+│   └── Verify backup tag creation
 ├── Create lastfence tag
 ├── Modify security groups
+│   ├── Remove specified groups
+│   └── Verify modifications
 ├── [Optional] Shutdown instance
 └── Success
 ```
 
-2. **Normal Unfence Operation**
+2. **Fence Operation (With ignore-tag-write-failure)**
 ```
 Start
 ├── Validate AWS credentials
-├── Find lastfence tag
-├── Find backup tags
-├── Restore security groups
-├── Clean up tags
+├── Check for self-fencing (if enabled)
+├── Check instance is running
+├── Attempt backup tag creation
+│   ├── Success: Create backup tags
+│   └── Failure: Log warning and continue
+├── Attempt lastfence tag creation
+│   ├── Success: Create tag
+│   └── Failure: Log warning and continue
+├── Modify security groups
+│   ├── Remove specified groups
+│   ├── Verify modifications
+│   └── Check all interfaces modified
+│       ├── All modified: Success
+│       └── Partial: Fail with modification error
+├── [Optional] Shutdown instance
+└── Success (if security groups modified)
+```
+
+3. **Normal Unfence Operation**
+```
+Start
+├── Validate AWS credentials
+├── [Skip if unfence-ignore-restore]
+│   ├── Find lastfence tag
+│   ├── Find backup tags
+│   ├── Restore security groups
+│   └── Clean up tags
 └── Success
 ```
 
 ### Failure Paths
 
 1. **Authentication Failures**
 ```
 Start
 ├── Invalid AWS credentials
+│   ├── Missing credentials
+│   ├── Invalid access key
+│   ├── Invalid secret key
+│   └── Invalid region
 └── Fail with auth error
 ```
 
 2. **Instance State Failures**
 ```
 Start
+├── Instance not found
+│   └── Fail with instance error
 ├── Instance not in required state
-└── Fail with state error
+│   └── Fail with state error
+└── Self-fencing detected
+    └── Fail with self-fencing error
 ```
 
-3. **Security Group Operation Failures**
+3. **Security Group Operation Failures (Without ignore-tag-write-failure)**
 ```
 Start
 ├── Backup creation fails
+│   ├── Tag size too large
+│   ├── API error
 │   └── Fail with backup error
 ├── Security group modification fails
+│   ├── Permission denied
+│   ├── Invalid group ID
+│   ├── Rate limit exceeded
 │   └── Fail with modification error
 └── Restoration fails
+    ├── Missing backup data
+    ├── Invalid backup format
+    ├── Modification error
     └── Fail with restore error
 ```
 
-4. **Tag Operation Failures**
+4. **Security Group Operation Failures (With ignore-tag-write-failure)**
+```
+Start
+├── Backup creation fails
+│   ├── Log warning
+│   └── Continue to modifications
+├── Security group modification attempt
+│   ├── Success: All interfaces modified
+│   │   └── Continue to completion
+│   ├── Partial success
+│   │   ├── Verify fencing state
+│   │   │   ├── Sufficient interfaces modified
+│   │   │   │   └── Continue to completion
+│   │   │   └── Insufficient modifications
+│   │   │       └── Fail with partial error
+│   │   └── Log warning
+│   └── Complete failure
+│       └── Fail with modification error
+├── [Optional] Shutdown attempt
+│   ├── Success
+│   │   └── Continue to completion
+│   └── Failure
+│       └── Log warning (non-fatal)
+└── Final state determined by SG modifications
+```
+
+5. **Tag Operation Failures (Without ignore-tag-write-failure)**
 ```
 Start
 ├── Tag creation fails
+│   ├── Size limit exceeded
+│   ├── API error
 │   └── Fail with tag error
 ├── Tag retrieval fails
+│   ├── Missing tags
+│   ├── Invalid format
 │   └── Fail with retrieval error
 └── Tag cleanup fails
     └── Warning (non-fatal)
 ```
 
+6. **Tag Operation Failures (With ignore-tag-write-failure)**
+```
+Start
+├── Backup tag creation fails
+│   ├── Log warning
+│   └── Continue operation
+├── Lastfence tag creation fails
+│   ├── Log warning
+│   └── Continue operation
+├── Tag retrieval fails
+│   ├── Check security group state
+│   │   ├── Groups properly modified
+│   │   │   └── Continue operation
+│   │   └── Groups not modified
+│   │       └── Fail with SG error
+│   └── Log warning
+└── Tag cleanup fails
+    └── Warning (non-fatal)
+```
+
 ## Error Handling
 
 ### Error Categories
 1. **AWS API Errors**
    - ConnectionError
    - ClientError
    - EndpointConnectionError
    - NoRegionError
+   - Tag size limitations
+   - API rate limiting
 
 2. **Validation Errors**
    - Invalid parameters
    - Missing required options
    - Invalid security group configurations
+   - Malformed tag data
 
 3. **State Errors**
    - Instance state conflicts
    - Security group conflicts
    - Self-fencing detection
+   - Partial operation completion
 
 ### Error Recovery
 - Automatic retries for transient AWS API errors
+- Chunked tag handling for large security group lists
+- Support for continuing operation despite tag failures
 - Rollback of security group changes on partial failures
 - Preservation of backup tags for manual recovery
 - Detailed logging for troubleshooting
 
 ## Configuration Options
 
 ### Required Options
 - `--plug`: AWS Instance ID
 - AWS credentials (via options or environment)
 
 ### Optional Options
 - `--region`: AWS region
-- `--secg`: Security groups to remove
+- `--access-key`: AWS access key
+- `--secret-key`: AWS secret key
+- `--secg`: Security groups to remove/keep
 - `--skip-race-check`: Skip self-fencing check
-- `--invert-sg-removal`: Invert security group removal
+- `--invert-sg-removal`: Keep only specified security groups
 - `--unfence-ignore-restore`: Skip restore on unfence
 - `--onfence-poweroff`: Power off on fence
+- `--ignore-tag-write-failure`: Continue despite tag failures
+- `--filter`: Filter instances for list operation
+- `--boto3_debug`: Enable boto3 debug logging
 
 ## Logging and Monitoring
 
 ### Log Levels
-- ERROR: Operation failures
-- WARNING: Non-critical issues
-- INFO: Operation progress
-- DEBUG: Detailed operation data
+- ERROR: Operation failures and AWS API errors
+- WARNING: Non-critical issues and tag operation failures
+- INFO: Operation progress and success
+- DEBUG: Detailed operation data and API responses
 
 ### Key Metrics
 - Operation success/failure rates
-- Operation duration
+- Tag operation success rates
+- Security group modification status
 - AWS API call latency
 - Error frequency and types
+- Tag size and chunking metrics
 
 ## Security Considerations
 
 ### Authentication
 - AWS credential management
 - IAM role requirements
 - Access key security
+- Instance metadata security
 
 ### Operation Safety
 - Self-fencing prevention
 - Backup verification
 - Security group validation
 - State verification
+- Tag operation integrity
+- Partial success handling
 
 ## Best Practices
 
 1. **Operation Safety**
    - Always verify instance state
+   - Use self-fencing prevention
    - Validate security group changes
    - Maintain accurate backups
-   - Prevent self-fencing
+   - Handle tag operation failures gracefully
 
 2. **Error Handling**
    - Implement proper rollbacks
+   - Use chunked tag operations
    - Maintain detailed logs
    - Preserve recovery data
    - Handle edge cases
+   - Support partial success scenarios
 
 3. **Performance**
    - Minimize API calls
    - Implement retries
    - Handle rate limiting
-   - Optimize operations
+   - Optimize tag operations
+   - Use efficient security group modifications
 
 4. **Maintenance**
    - Regular backup cleanup
    - Log rotation
    - Configuration updates
    - Security updates
+   - Monitor tag usage
+   - Clean up orphaned tags
+
diff --git a/tests/data/metadata/fence_aws_vpc_net.xml b/tests/data/metadata/fence_aws_vpc_net.xml
index a252f28e..63607ce6 100644
--- a/tests/data/metadata/fence_aws_vpc_net.xml
+++ b/tests/data/metadata/fence_aws_vpc_net.xml
@@ -1,186 +1,196 @@
 <?xml version="1.0" ?>
 <resource-agent name="fence_aws_vpc_net" shortdesc="Fence agent for AWS (Amazon Web Services) Net" >
 <longdesc>fence_aws_vpc is a Network and Power Fencing agent for AWS VPC that works by manipulating security groups. It uses the boto3 library to connect to AWS.
 
 boto3 can be configured with AWS CLI or by creating ~/.aws/credentials.
 For instructions see: https://boto3.readthedocs.io/en/latest/guide/quickstart.html#configuration NOTE: If onfence-poweroff is set, the agent won't be able to power on the node again, it will have to be powered on manually or with other automation.</longdesc>
 <vendor-url>http://www.amazon.com</vendor-url>
 <parameters>
 	<parameter name="action" unique="0" required="1">
 		<getopt mixed="-o, --action=[action]" />
 		<content type="string" default="reboot"  />
 		<shortdesc lang="en">Fencing action</shortdesc>
 	</parameter>
 	<parameter name="plug" unique="0" required="1" obsoletes="port">
 		<getopt mixed="-n, --plug=[id]" />
 		<content type="string"  />
 		<shortdesc lang="en">AWS Instance ID to perform action on </shortdesc>
 	</parameter>
 	<parameter name="port" unique="0" required="1" deprecated="1">
 		<getopt mixed="-n, --plug=[id]" />
 		<content type="string"  />
 		<shortdesc lang="en">AWS Instance ID to perform action on </shortdesc>
 	</parameter>
 	<parameter name="region" unique="0" required="0">
 		<getopt mixed="-r, --region=[region]" />
 		<content type="string"  />
 		<shortdesc lang="en">AWS Region.</shortdesc>
 	</parameter>
 	<parameter name="access_key" unique="0" required="0">
 		<getopt mixed="-a, --access-key=[key]" />
 		<content type="string"  />
 		<shortdesc lang="en">AWS Access Key.</shortdesc>
 	</parameter>
 	<parameter name="secret_key" unique="0" required="0">
 		<getopt mixed="-s, --secret-key=[key]" />
 		<content type="string"  />
 		<shortdesc lang="en">AWS Secret Key.</shortdesc>
 	</parameter>
 	<parameter name="secg" unique="0" required="0">
 		<getopt mixed="--secg=[sg1,sg2,...]" />
 		<content type="string"  />
 		<shortdesc lang="en">Security Groups to remove.</shortdesc>
 	</parameter>
 	<parameter name="skip_race_check" unique="0" required="0">
 		<getopt mixed="--skip-race-check" />
 		<content type="boolean"  />
 		<shortdesc lang="en">Skip race condition check.</shortdesc>
 	</parameter>
 	<parameter name="invert-sg-removal" unique="0" required="0" deprecated="1">
 		<getopt mixed="--invert-sg-removal" />
 		<content type="boolean"  />
 		<shortdesc lang="en">Remove all security groups except specified..</shortdesc>
 	</parameter>
 	<parameter name="invert_sg_removal" unique="0" required="0" obsoletes="invert-sg-removal">
 		<getopt mixed="--invert-sg-removal" />
 		<content type="boolean"  />
 		<shortdesc lang="en">Remove all security groups except specified..</shortdesc>
 	</parameter>
 	<parameter name="unfence-ignore-restore" unique="0" required="0" deprecated="1">
 		<getopt mixed="--unfence-ignore-restore" />
 		<content type="boolean"  />
 		<shortdesc lang="en">Remove all security groups except specified..</shortdesc>
 	</parameter>
 	<parameter name="unfence_ignore_restore" unique="0" required="0" obsoletes="unfence-ignore-restore">
 		<getopt mixed="--unfence-ignore-restore" />
 		<content type="boolean"  />
 		<shortdesc lang="en">Remove all security groups except specified..</shortdesc>
 	</parameter>
 	<parameter name="filter" unique="0" required="0">
 		<getopt mixed="--filter=[key=value]" />
 		<content type="string"  />
 		<shortdesc lang="en">Filter for list-action</shortdesc>
 	</parameter>
 	<parameter name="boto3_debug" unique="0" required="0">
 		<getopt mixed="-b, --boto3_debug=[option]" />
 		<content type="string" default="False"  />
 		<shortdesc lang="en">Boto Lib debug</shortdesc>
 	</parameter>
 	<parameter name="onfence-poweroff" unique="0" required="0" deprecated="1">
 		<getopt mixed="--onfence-poweroff" />
 		<content type="boolean"  />
 		<shortdesc lang="en">Power off the machine async..</shortdesc>
 	</parameter>
 	<parameter name="onfence_poweroff" unique="0" required="0" obsoletes="onfence-poweroff">
 		<getopt mixed="--onfence-poweroff" />
 		<content type="boolean"  />
 		<shortdesc lang="en">Power off the machine async..</shortdesc>
 	</parameter>
+	<parameter name="ignore-tag-write-failure" unique="0" required="0" deprecated="1">
+		<getopt mixed="--ignore-tag-write-failure" />
+		<content type="boolean"  />
+		<shortdesc lang="en">Continue to fence even if backup tag fails..</shortdesc>
+	</parameter>
+	<parameter name="ignore_tag_write_failure" unique="0" required="0" obsoletes="ignore-tag-write-failure">
+		<getopt mixed="--ignore-tag-write-failure" />
+		<content type="boolean"  />
+		<shortdesc lang="en">Continue to fence even if backup tag fails..</shortdesc>
+	</parameter>
 	<parameter name="quiet" unique="0" required="0">
 		<getopt mixed="-q, --quiet" />
 		<content type="boolean"  />
 		<shortdesc lang="en">Disable logging to stderr. Does not affect --verbose or --debug-file or logging to syslog.</shortdesc>
 	</parameter>
 	<parameter name="verbose" unique="0" required="0">
 		<getopt mixed="-v, --verbose" />
 		<content type="boolean"  />
 		<shortdesc lang="en">Verbose mode. Multiple -v flags can be stacked on the command line (e.g., -vvv) to increase verbosity.</shortdesc>
 	</parameter>
 	<parameter name="verbose_level" unique="0" required="0">
 		<getopt mixed="--verbose-level" />
 		<content type="integer"  />
 		<shortdesc lang="en">Level of debugging detail in output. Defaults to the number of --verbose flags specified on the command line, or to 1 if verbose=1 in a stonith device configuration (i.e., on stdin).</shortdesc>
 	</parameter>
 	<parameter name="debug" unique="0" required="0" deprecated="1">
 		<getopt mixed="-D, --debug-file=[debugfile]" />
 		<content type="string"  />
 		<shortdesc lang="en">Write debug information to given file</shortdesc>
 	</parameter>
 	<parameter name="debug_file" unique="0" required="0" obsoletes="debug">
 		<getopt mixed="-D, --debug-file=[debugfile]" />
 		<shortdesc lang="en">Write debug information to given file</shortdesc>
 	</parameter>
 	<parameter name="version" unique="0" required="0">
 		<getopt mixed="-V, --version" />
 		<content type="boolean"  />
 		<shortdesc lang="en">Display version information and exit</shortdesc>
 	</parameter>
 	<parameter name="help" unique="0" required="0">
 		<getopt mixed="-h, --help" />
 		<content type="boolean"  />
 		<shortdesc lang="en">Display help and exit</shortdesc>
 	</parameter>
 	<parameter name="plug_separator" unique="0" required="0">
 		<getopt mixed="--plug-separator=[char]" />
 		<content type="string" default=","  />
 		<shortdesc lang="en">Separator for plug parameter when specifying more than 1 plug</shortdesc>
 	</parameter>
 	<parameter name="separator" unique="0" required="0">
 		<getopt mixed="-C, --separator=[char]" />
 		<content type="string" default=","  />
 		<shortdesc lang="en">Separator for CSV created by 'list' operation</shortdesc>
 	</parameter>
 	<parameter name="delay" unique="0" required="0">
 		<getopt mixed="--delay=[seconds]" />
 		<content type="second" default="0"  />
 		<shortdesc lang="en">Wait X seconds before fencing is started</shortdesc>
 	</parameter>
 	<parameter name="disable_timeout" unique="0" required="0">
 		<getopt mixed="--disable-timeout=[true/false]" />
 		<content type="string"  />
 		<shortdesc lang="en">Disable timeout (true/false) (default: true when run from Pacemaker 2.0+)</shortdesc>
 	</parameter>
 	<parameter name="login_timeout" unique="0" required="0">
 		<getopt mixed="--login-timeout=[seconds]" />
 		<content type="second" default="5"  />
 		<shortdesc lang="en">Wait X seconds for cmd prompt after login</shortdesc>
 	</parameter>
 	<parameter name="power_timeout" unique="0" required="0">
 		<getopt mixed="--power-timeout=[seconds]" />
 		<content type="second" default="20"  />
 		<shortdesc lang="en">Test X seconds for status change after ON/OFF</shortdesc>
 	</parameter>
 	<parameter name="power_wait" unique="0" required="0">
 		<getopt mixed="--power-wait=[seconds]" />
 		<content type="second" default="0"  />
 		<shortdesc lang="en">Wait X seconds after issuing ON/OFF</shortdesc>
 	</parameter>
 	<parameter name="shell_timeout" unique="0" required="0">
 		<getopt mixed="--shell-timeout=[seconds]" />
 		<content type="second" default="3"  />
 		<shortdesc lang="en">Wait X seconds for cmd prompt after issuing command</shortdesc>
 	</parameter>
 	<parameter name="stonith_status_sleep" unique="0" required="0">
 		<getopt mixed="--stonith-status-sleep=[seconds]" />
 		<content type="second" default="1"  />
 		<shortdesc lang="en">Sleep X seconds between status calls during a STONITH action</shortdesc>
 	</parameter>
 	<parameter name="retry_on" unique="0" required="0">
 		<getopt mixed="--retry-on=[attempts]" />
 		<content type="integer" default="1"  />
 		<shortdesc lang="en">Count of attempts to retry power on</shortdesc>
 	</parameter>
 </parameters>
 <actions>
 	<action name="on" automatic="0"/>
 	<action name="off" />
 	<action name="reboot" />
 	<action name="status" />
 	<action name="list" />
 	<action name="list-status" />
 	<action name="monitor" />
 	<action name="metadata" />
 	<action name="manpage" />
 	<action name="validate-all" />
 </actions>
 </resource-agent>