diff --git a/heartbeat/gcp-pd-move.in b/heartbeat/gcp-pd-move.in index e99cc71f8..cbe703c3c 100644 --- a/heartbeat/gcp-pd-move.in +++ b/heartbeat/gcp-pd-move.in @@ -1,382 +1,382 @@ #!@PYTHON@ -tt # - *- coding: utf- 8 - *- # # --------------------------------------------------------------------- # Copyright 2018 Google Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # --------------------------------------------------------------------- # Description: Google Cloud Platform - Disk attach # --------------------------------------------------------------------- import json import logging import os import re import sys import time OCF_FUNCTIONS_DIR = os.environ.get("OCF_FUNCTIONS_DIR", "%s/lib/heartbeat" % os.environ.get("OCF_ROOT")) sys.path.append(OCF_FUNCTIONS_DIR) import ocf from ocf import logger try: import googleapiclient.discovery except ImportError: pass if sys.version_info >= (3, 0): # Python 3 imports. import urllib.parse as urlparse import urllib.request as urlrequest else: # Python 2 imports. import urllib as urlparse import urllib2 as urlrequest CONN = None PROJECT = None ZONE = None REGION = None LIST_DISK_ATTACHED_INSTANCES = None INSTANCE_NAME = None PARAMETERS = { 'disk_name': '', 'disk_scope': 'detect', 'disk_csek_file': '', 'mode': "READ_WRITE", 'device_name': '', 'stackdriver_logging': 'no', } MANDATORY_PARAMETERS = ['disk_name', 'disk_scope'] METADATA_SERVER = 'http://metadata.google.internal/computeMetadata/v1/' METADATA_HEADERS = {'Metadata-Flavor': 'Google'} METADATA = ''' 1.0 Resource Agent that can attach or detach a regional/zonal disk on current GCP instance. Requirements : - Disk has to be properly created as regional/zonal in order to be used correctly. Attach/Detach a persistent disk on current GCP instance The name of the GCP disk. Disk name Disk scope Network name Path to a Customer-Supplied Encryption Key (CSEK) key file Customer-Supplied Encryption Key file Attachment mode (READ_WRITE, READ_ONLY) Attachment mode An optional name that indicates the disk name the guest operating system will see. Optional device name Use stackdriver_logging output to global resource (yes, true, enabled) Use stackdriver_logging '''.format(PARAMETERS['disk_name'], PARAMETERS['disk_scope'], PARAMETERS['disk_csek_file'], PARAMETERS['mode'], PARAMETERS['device_name'], PARAMETERS['stackdriver_logging']) def get_metadata(metadata_key, params=None, timeout=None): """Performs a GET request with the metadata headers. Args: metadata_key: string, the metadata to perform a GET request on. params: dictionary, the query parameters in the GET request. timeout: int, timeout in seconds for metadata requests. Returns: HTTP response from the GET request. Raises: urlerror.HTTPError: raises when the GET request fails. """ timeout = timeout or 60 metadata_url = os.path.join(METADATA_SERVER, metadata_key) params = urlparse.urlencode(params or {}) url = '%s?%s' % (metadata_url, params) request = urlrequest.Request(url, headers=METADATA_HEADERS) request_opener = urlrequest.build_opener(urlrequest.ProxyHandler({})) return request_opener.open(request, timeout=timeout * 1.1).read().decode("utf-8") def populate_vars(): global CONN global INSTANCE_NAME global PROJECT global ZONE global REGION global LIST_DISK_ATTACHED_INSTANCES # Populate global vars try: CONN = googleapiclient.discovery.build('compute', 'v1') except Exception as e: logger.error('Couldn\'t connect with google api: ' + str(e)) - sys.exit(ocf.OCF_ERR_CONFIGURED) + sys.exit(ocf.OCF_ERR_GENERIC) for param in PARAMETERS: value = os.environ.get('OCF_RESKEY_%s' % param, PARAMETERS[param]) if not value and param in MANDATORY_PARAMETERS: logger.error('Missing %s mandatory parameter' % param) sys.exit(ocf.OCF_ERR_CONFIGURED) elif value: PARAMETERS[param] = value try: INSTANCE_NAME = get_metadata('instance/name') except Exception as e: logger.error( 'Couldn\'t get instance name, is this running inside GCE?: ' + str(e)) - sys.exit(ocf.OCF_ERR_CONFIGURED) + sys.exit(ocf.OCF_ERR_GENERIC) PROJECT = get_metadata('project/project-id') if PARAMETERS['disk_scope'] in ['detect', 'regional']: ZONE = get_metadata('instance/zone').split('/')[-1] REGION = ZONE[:-2] else: ZONE = PARAMETERS['disk_scope'] LIST_DISK_ATTACHED_INSTANCES = get_disk_attached_instances( PARAMETERS['disk_name']) def configure_logs(): # Prepare logging global logger logging.getLogger('googleapiclient').setLevel(logging.WARN) logging_env = os.environ.get('OCF_RESKEY_stackdriver_logging') if logging_env: logging_env = logging_env.lower() if any(x in logging_env for x in ['yes', 'true', 'enabled']): try: import google.cloud.logging.handlers client = google.cloud.logging.Client() handler = google.cloud.logging.handlers.CloudLoggingHandler( client, name=INSTANCE_NAME) handler.setLevel(logging.INFO) formatter = logging.Formatter('gcp:alias "%(message)s"') handler.setFormatter(formatter) ocf.log.addHandler(handler) logger = logging.LoggerAdapter( ocf.log, {'OCF_RESOURCE_INSTANCE': ocf.OCF_RESOURCE_INSTANCE}) except ImportError: logger.error('Couldn\'t import google.cloud.logging, ' 'disabling Stackdriver-logging support') def wait_for_operation(operation): while True: result = CONN.zoneOperations().get( project=PROJECT, zone=ZONE, operation=operation['name']).execute() if result['status'] == 'DONE': if 'error' in result: raise Exception(result['error']) return time.sleep(1) def get_disk_attached_instances(disk): def get_users_list(): fl = 'name="%s"' % disk request = CONN.disks().aggregatedList(project=PROJECT, filter=fl) while request is not None: response = request.execute() locations = response.get('items', {}) for location in locations.values(): for d in location.get('disks', []): if d['name'] == disk: return d.get('users', []) request = CONN.instances().aggregatedList_next( previous_request=request, previous_response=response) raise Exception("Unable to find disk %s" % disk) def get_only_instance_name(user): return re.sub('.*/instances/', '', user) return map(get_only_instance_name, get_users_list()) def is_disk_attached(instance): return instance in LIST_DISK_ATTACHED_INSTANCES def detach_disk(instance, disk_name): # Python API misses disk-scope argument. # Detaching a disk is only possible by using deviceName, which is retrieved # as a disk parameter when listing the instance information request = CONN.instances().get( project=PROJECT, zone=ZONE, instance=instance) response = request.execute() device_name = None for disk in response['disks']: if disk_name == re.sub('.*disks/',"",disk['source']): device_name = disk['deviceName'] break if not device_name: logger.error("Didn't find %(d)s deviceName attached to %(i)s" % { 'd': disk_name, 'i': instance, }) return request = CONN.instances().detachDisk( project=PROJECT, zone=ZONE, instance=instance, deviceName=device_name) wait_for_operation(request.execute()) def attach_disk(instance, disk_name): location = 'zones/%s' % ZONE if PARAMETERS['disk_scope'] == 'regional': location = 'regions/%s' % REGION prefix = 'https://www.googleapis.com/compute/v1' body = { 'source': '%(prefix)s/projects/%(project)s/%(location)s/disks/%(disk)s' % { 'prefix': prefix, 'project': PROJECT, 'location': location, 'disk': disk_name, }, } # Customer-Supplied Encryption Key (CSEK) if PARAMETERS['disk_csek_file']: with open(PARAMETERS['disk_csek_file']) as csek_file: body['diskEncryptionKey'] = { 'rawKey': csek_file.read(), } if PARAMETERS['device_name']: body['deviceName'] = PARAMETERS['device_name'] if PARAMETERS['mode']: body['mode'] = PARAMETERS['mode'] force_attach = None if PARAMETERS['disk_scope'] == 'regional': # Python API misses disk-scope argument. force_attach = True else: # If this disk is attached to some instance, detach it first. for other_instance in LIST_DISK_ATTACHED_INSTANCES: logger.info("Detaching disk %(disk_name)s from other instance %(i)s" % { 'disk_name': PARAMETERS['disk_name'], 'i': other_instance, }) detach_disk(other_instance, PARAMETERS['disk_name']) request = CONN.instances().attachDisk( project=PROJECT, zone=ZONE, instance=instance, body=body, forceAttach=force_attach) wait_for_operation(request.execute()) def fetch_data(): configure_logs() populate_vars() def gcp_pd_move_start(): fetch_data() if not is_disk_attached(INSTANCE_NAME): logger.info("Attaching disk %(disk_name)s to %(instance)s" % { 'disk_name': PARAMETERS['disk_name'], 'instance': INSTANCE_NAME, }) attach_disk(INSTANCE_NAME, PARAMETERS['disk_name']) def gcp_pd_move_stop(): fetch_data() if is_disk_attached(INSTANCE_NAME): logger.info("Detaching disk %(disk_name)s to %(instance)s" % { 'disk_name': PARAMETERS['disk_name'], 'instance': INSTANCE_NAME, }) detach_disk(INSTANCE_NAME, PARAMETERS['disk_name']) def gcp_pd_move_status(): fetch_data() if is_disk_attached(INSTANCE_NAME): logger.debug("Disk %(disk_name)s is correctly attached to %(instance)s" % { 'disk_name': PARAMETERS['disk_name'], 'instance': INSTANCE_NAME, }) else: sys.exit(ocf.OCF_NOT_RUNNING) def main(): if len(sys.argv) < 2: logger.error('Missing argument') return command = sys.argv[1] if 'meta-data' in command: print(METADATA) return if command in 'start': gcp_pd_move_start() elif command in 'stop': gcp_pd_move_stop() elif command in ('monitor', 'status'): gcp_pd_move_status() else: configure_logs() logger.error('no such function %s' % str(command)) if __name__ == "__main__": main() diff --git a/heartbeat/gcp-vpc-move-route.in b/heartbeat/gcp-vpc-move-route.in index dac6e4ea8..6b240c04d 100644 --- a/heartbeat/gcp-vpc-move-route.in +++ b/heartbeat/gcp-vpc-move-route.in @@ -1,490 +1,490 @@ #!@PYTHON@ -tt # - *- coding: utf- 8 - *- # # # OCF resource agent to move an IP address within a VPC in GCP # # License: GNU General Public License (GPL) # Copyright (c) 2018 Hervé Werner (MFG Labs) # Copyright 2018 Google Inc. # Based on code from Markus Guertler (aws-vpc-move-ip) # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### import atexit import logging import os import sys import time OCF_FUNCTIONS_DIR = os.environ.get("OCF_FUNCTIONS_DIR", "%s/lib/heartbeat" % os.environ.get("OCF_ROOT")) sys.path.append(OCF_FUNCTIONS_DIR) from ocf import * try: import googleapiclient.discovery import pyroute2 try: from google.oauth2.service_account import Credentials as ServiceAccountCredentials except ImportError: from oauth2client.service_account import ServiceAccountCredentials except ImportError: pass if sys.version_info >= (3, 0): # Python 3 imports. import urllib.parse as urlparse import urllib.request as urlrequest else: # Python 2 imports. import urllib as urlparse import urllib2 as urlrequest GCP_API_URL_PREFIX = 'https://www.googleapis.com/compute/v1' METADATA_SERVER = 'http://metadata.google.internal/computeMetadata/v1/' METADATA_HEADERS = {'Metadata-Flavor': 'Google'} METADATA = \ ''' 1.0 Resource Agent that can move a floating IP addresse within a GCP VPC by changing an entry in the routing table. This agent also configures the floating IP locally on the instance OS. Requirements : - IP forwarding must be enabled on all instances in order to be able to terminate the route - The floating IP address must be chosen so that it is outside all existing subnets in the VPC network - IAM permissions (see https://cloud.google.com/compute/docs/access/iam-permissions) : 1) compute.routes.delete, compute.routes.get and compute.routes.update on the route 2) compute.networks.updatePolicy on the network (to add a new route) 3) compute.networks.get on the network (to check the VPC network existence) 4) compute.routes.list on the project (to check conflicting routes) Move IP within a GCP VPC Floating IP address. Note that this IP must be chosen outside of all existing subnet ranges Floating IP Name of the VPC network VPC network Project ID of the instance. It can be useful to set this attribute if the instance is in a shared service project. Otherwise, the agent should be able to determine the project ID automatically. Project ID Name of the network interface Network interface name Route name Route name Path to Service account JSON file Service account JSONfile If enabled (set to true), IP failover logs will be posted to stackdriver logging Stackdriver-logging support ''' % os.path.basename(sys.argv[0]) class Context(object): __slots__ = 'conn', 'iface_idx', 'instance', 'instance_url', 'interface', \ 'ip', 'iproute', 'project', 'route_name', 'vpc_network', \ 'vpc_network_url', 'zone' def wait_for_operation(ctx, response): """Blocks until operation completes. Code from GitHub's GoogleCloudPlatform/python-docs-samples Args: response: dict, a request's response """ def _OperationGetter(response): operation = response[u'name'] if response.get(u'zone'): return ctx.conn.zoneOperations().get( project=ctx.project, zone=ctx.zone, operation=operation) else: return ctx.conn.globalOperations().get( project=ctx.project, operation=operation) while True: result = _OperationGetter(response).execute() if result['status'] == 'DONE': if 'error' in result: raise Exception(result['error']) return result time.sleep(1) def get_metadata(metadata_key, params=None, timeout=None): """Performs a GET request with the metadata headers. Args: metadata_key: string, the metadata to perform a GET request on. params: dictionary, the query parameters in the GET request. timeout: int, timeout in seconds for metadata requests. Returns: HTTP response from the GET request. Raises: urlerror.HTTPError: raises when the GET request fails. """ timeout = timeout or 60 metadata_url = os.path.join(METADATA_SERVER, metadata_key) params = urlparse.urlencode(params or {}) url = '%s?%s' % (metadata_url, params) request = urlrequest.Request(url, headers=METADATA_HEADERS) request_opener = urlrequest.build_opener(urlrequest.ProxyHandler({})) return request_opener.open(request, timeout=timeout * 1.1).read().decode("utf-8") def validate(ctx): if os.geteuid() != 0: logger.error('You must run this agent as root') sys.exit(OCF_ERR_PERM) try: serviceaccount = os.environ.get("OCF_RESKEY_serviceaccount") if not serviceaccount: try: from googleapiclient import _auth credentials = _auth.default_credentials(); except: credentials = GoogleCredentials.get_application_default() logging.debug("using application default credentials") else: scope = ['https://www.googleapis.com/auth/cloud-platform'] logging.debug("using credentials from service account") try: credentials = ServiceAccountCredentials.from_service_account_file(filename=serviceaccount, scopes=scope) except AttributeError: credentials = ServiceAccountCredentials.from_json_keyfile_name(serviceaccount, scope) except Exception as e: logging.error(str(e)) sys.exit(OCF_ERR_GENERIC) ctx.conn = googleapiclient.discovery.build('compute', 'v1', credentials=credentials, cache_discovery=False) except Exception as e: logger.error('Couldn\'t connect with google api: ' + str(e)) - sys.exit(OCF_ERR_CONFIGURED) + sys.exit(OCF_ERR_GENERIC) ctx.ip = os.environ.get('OCF_RESKEY_ip') if not ctx.ip: logger.error('Missing ip parameter') sys.exit(OCF_ERR_CONFIGURED) try: ctx.instance = get_metadata('instance/name') ctx.zone = get_metadata('instance/zone').split('/')[-1] ctx.project = os.environ.get( 'OCF_RESKEY_project', get_metadata('project/project-id')) except Exception as e: logger.error( 'Instance information not found. Is this a GCE instance ?: %s', str(e)) - sys.exit(OCF_ERR_CONFIGURED) + sys.exit(OCF_ERR_GENERIC) ctx.instance_url = '%s/projects/%s/zones/%s/instances/%s' % ( GCP_API_URL_PREFIX, ctx.project, ctx.zone, ctx.instance) ctx.vpc_network = os.environ.get('OCF_RESKEY_vpc_network', 'default') ctx.vpc_network_url = '%s/projects/%s/global/networks/%s' % ( GCP_API_URL_PREFIX, ctx.project, ctx.vpc_network) ctx.interface = os.environ.get('OCF_RESKEY_interface', 'eth0') ctx.route_name = os.environ.get( 'OCF_RESKEY_route_name', 'ra-%s' % os.path.basename(sys.argv[0])) ctx.iproute = pyroute2.IPRoute() atexit.register(ctx.iproute.close) idxs = ctx.iproute.link_lookup(ifname=ctx.interface) if not idxs: logger.error('Network interface not found') - sys.exit(OCF_ERR_CONFIGURED) + sys.exit(OCF_ERR_GENERIC) ctx.iface_idx = idxs[0] def check_conflicting_routes(ctx): fl = '(destRange = "%s*") AND (network = "%s") AND (name != "%s")' % ( ctx.ip, ctx.vpc_network_url, ctx.route_name) try: request = ctx.conn.routes().list(project=ctx.project, filter=fl) response = request.execute() except googleapiclient.errors.HttpError as e: if e.resp.status == 404: logger.error('VPC network not found') if 'stop' in sys.argv[1]: sys.exit(OCF_SUCCESS) else: sys.exit(OCF_ERR_CONFIGURED) else: raise route_list = response.get('items', None) if route_list: logger.error( 'Conflicting unnmanaged routes for destination %s/32 in VPC %s found : %s', ctx.ip, ctx.vpc_network, str(route_list)) sys.exit(OCF_ERR_CONFIGURED) def route_release(ctx): request = ctx.conn.routes().delete(project=ctx.project, route=ctx.route_name) wait_for_operation(ctx, request.execute()) def ip_monitor(ctx): logger.info('IP monitor: checking local network configuration') def address_filter(addr): for attr in addr['attrs']: if attr[0] == 'IFA_LOCAL': if attr[1] == ctx.ip: return True else: return False route = ctx.iproute.get_addr( index=ctx.iface_idx, match=address_filter) if not route: logger.warning( 'The floating IP %s is not locally configured on this instance (%s)', ctx.ip, ctx.instance) return OCF_NOT_RUNNING logger.debug( 'The floating IP %s is correctly configured on this instance (%s)', ctx.ip, ctx.instance) return OCF_SUCCESS def ip_release(ctx): ctx.iproute.addr('del', index=ctx.iface_idx, address=ctx.ip, mask=32) def ip_and_route_start(ctx): logger.info('Bringing up the floating IP %s', ctx.ip) # Add a new entry in the routing table # If the route entry exists and is pointing to another instance, take it over # Ensure that there is no route that we are not aware of that is also handling our IP check_conflicting_routes(ctx) # There is no replace API, We need to first delete the existing route if any try: request = ctx.conn.routes().get(project=ctx.project, route=ctx.route_name) request.execute() # TODO: check specific exception for 404 except googleapiclient.errors.HttpError as e: if e.resp.status != 404: raise else: route_release(ctx) route_body = { 'name': ctx.route_name, 'network': ctx.vpc_network_url, 'destRange': '%s/32' % ctx.ip, 'nextHopInstance': ctx.instance_url, } try: request = ctx.conn.routes().insert(project=ctx.project, body=route_body) wait_for_operation(ctx, request.execute()) except googleapiclient.errors.HttpError: try: request = ctx.conn.networks().get( project=ctx.project, network=ctx.vpc_network) request.execute() except googleapiclient.errors.HttpError as e: if e.resp.status == 404: logger.error('VPC network not found') sys.exit(OCF_ERR_CONFIGURED) else: raise else: raise # Configure the IP address locally # We need to release the IP first if ip_monitor(ctx) == OCF_SUCCESS: ip_release(ctx) ctx.iproute.addr('add', index=ctx.iface_idx, address=ctx.ip, mask=32) ctx.iproute.link('set', index=ctx.iface_idx, state='up') logger.info('Successfully brought up the floating IP %s', ctx.ip) def route_monitor(ctx): logger.info('GCP route monitor: checking route table') # Ensure that there is no route that we are not aware of that is also handling our IP check_conflicting_routes(ctx) try: request = ctx.conn.routes().get(project=ctx.project, route=ctx.route_name) response = request.execute() except googleapiclient.errors.HttpError as e: if e.resp.status == 404: return OCF_NOT_RUNNING elif 'Insufficient Permission' in e.content: return OCF_ERR_PERM else: raise routed_to_instance = response.get('nextHopInstance', '') instance_url = '%s/projects/%s/zones/%s/instances/%s' % ( GCP_API_URL_PREFIX, ctx.project, ctx.zone, ctx.instance) if routed_to_instance != instance_url: logger.warning( 'The floating IP %s is not routed to this instance (%s) but to instance %s', ctx.ip, ctx.instance, routed_to_instance.split('/')[-1]) return OCF_NOT_RUNNING logger.debug( 'The floating IP %s is correctly routed to this instance (%s)', ctx.ip, ctx.instance) return OCF_SUCCESS def ip_and_route_stop(ctx): logger.info('Bringing down the floating IP %s', ctx.ip) # Delete the route entry # If the route entry exists and is pointing to another instance, don't touch it if route_monitor(ctx) == OCF_NOT_RUNNING: logger.info( 'The floating IP %s is already not routed to this instance (%s)', ctx.ip, ctx.instance) else: route_release(ctx) if ip_monitor(ctx) == OCF_NOT_RUNNING: logger.info('The floating IP %s is already down', ctx.ip) else: ip_release(ctx) def configure_logs(ctx): # Prepare logging global logger logging.getLogger('googleapiclient').setLevel(logging.WARN) logging_env = os.environ.get('OCF_RESKEY_stackdriver_logging') if logging_env: logging_env = logging_env.lower() if any(x in logging_env for x in ['yes', 'true', 'enabled']): try: import google.cloud.logging.handlers client = google.cloud.logging.Client() handler = google.cloud.logging.handlers.CloudLoggingHandler( client, name=ctx.instance) handler.setLevel(logging.INFO) formatter = logging.Formatter('gcp:route "%(message)s"') handler.setFormatter(formatter) log.addHandler(handler) logger = logging.LoggerAdapter(log, {'OCF_RESOURCE_INSTANCE': OCF_RESOURCE_INSTANCE}) except ImportError: logger.error('Couldn\'t import google.cloud.logging, ' 'disabling Stackdriver-logging support') def main(): if 'meta-data' in sys.argv[1]: print(METADATA) return ctx = Context() validate(ctx) if 'validate-all' in sys.argv[1]: return configure_logs(ctx) if 'start' in sys.argv[1]: ip_and_route_start(ctx) elif 'stop' in sys.argv[1]: ip_and_route_stop(ctx) elif 'status' in sys.argv[1] or 'monitor' in sys.argv[1]: sys.exit(ip_monitor(ctx)) else: usage = 'usage: %s {start|stop|monitor|status|meta-data|validate-all}' % \ os.path.basename(sys.argv[0]) logger.error(usage) sys.exit(OCF_ERR_UNIMPLEMENTED) if __name__ == "__main__": main()