diff --git a/heartbeat/gcp-vpc-move-ip.in b/heartbeat/gcp-vpc-move-ip.in index 3b8d998b3..c9393481e 100755 --- a/heartbeat/gcp-vpc-move-ip.in +++ b/heartbeat/gcp-vpc-move-ip.in @@ -1,374 +1,374 @@ #!@BASH_SHELL@ # # # OCF resource agent to move an IP address within a VPC in GCP # # License: GNU General Public License (GPL) # Copyright (c) 2018 Hervé Werner (MFG Labs) # Based on code from Markus Guertler (aws-vpc-move-ip) # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs # Defaults OCF_RESKEY_gcloud_default="/usr/bin/gcloud" OCF_RESKEY_configuration_default="default" OCF_RESKEY_vpc_network_default="default" OCF_RESKEY_interface_default="eth0" OCF_RESKEY_route_name_default="ra-${__SCRIPT_NAME}" : ${OCF_RESKEY_gcloud=${OCF_RESKEY_gcloud_default}} : ${OCF_RESKEY_configuration=${OCF_RESKEY_configuration_default}} : ${OCF_RESKEY_vpc_network=${OCF_RESKEY_vpc_network_default}} : ${OCF_RESKEY_interface=${OCF_RESKEY_interface_default}} : ${OCF_RESKEY_route_name=${OCF_RESKEY_route_name_default}} gcp_api_url_prefix="https://www.googleapis.com/compute/v1" gcloud="${OCF_RESKEY_gcloud} --quiet --configuration=${OCF_RESKEY_configuration}" ####################################################################### USAGE="usage: $0 {start|stop|monitor|status|meta-data|validate-all}"; ############################################################################### ############################################################################### # # Functions # ############################################################################### metadata() { cat < 1.0 Resource Agent that can move a floating IP addresse within a GCP VPC by changing an entry in the routing table. This agent also configures the floating IP locally on the instance OS. Requirements : - IP forwarding must be enabled on all instances in order to be able to terminate the route -- The floating IP address must be choosen so that it is outside all existing +- The floating IP address must be chosen so that it is outside all existing subnets in the VPC network - IAM permissions (see https://cloud.google.com/compute/docs/access/iam-permissions) : 1) compute.routes.delete, compute.routes.get and compute.routes.update on the route 2) compute.networks.updatePolicy on the network (to add a new route) 3) compute.networks.get on the network (to check the VPC network existence) 4) compute.routes.list on the project (to check conflicting routes) Move IP within a GCP VPC Path to command line tools for GCP Path to the gcloud tool Named configuration for gcloud Named gcloud configuration Floating IP address. Note that this IP must be chosen outside of all existing subnet ranges Floating IP Name of the VPC network VPC network Name of the network interface Network interface name Route name Route name END } validate() { if ! ocf_is_root; then ocf_exit_reason "You must run this agent as root" exit $OCF_ERR_PERM fi for cmd in ${OCF_RESKEY_gcloud} ip curl; do check_binary "$cmd" done if [ -z "${OCF_RESKEY_ip}" ]; then ocf_exit_reason "Missing mandatory parameter" exit $OCF_ERR_CONFIGURED fi GCE_INSTANCE_NAME=$(curl -s -H "Metadata-Flavor: Google" "http://metadata.google.internal/computeMetadata/v1/instance/name") GCE_INSTANCE_ZONE=$(curl -s -H "Metadata-Flavor: Google" "http://metadata.google.internal/computeMetadata/v1/instance/zone" | awk -F '/' '{ print $NF }') GCE_INSTANCE_PROJECT=$(curl -s -H "Metadata-Flavor: Google" "http://metadata.google.internal/computeMetadata/v1/project/project-id") if [ -z "${GCE_INSTANCE_NAME}" -o -z "${GCE_INSTANCE_ZONE}" -o -z "${GCE_INSTANCE_PROJECT}" ]; then ocf_exit_reason "Instance information not found. Is this a GCE instance ?" exit $OCF_ERR_GENERIC fi if ! ${OCF_RESKEY_gcloud} config configurations describe ${OCF_RESKEY_configuration} &>/dev/null; then ocf_exit_reason "Gcloud configuration not found" exit $OCF_ERR_CONFIGURED fi if ! ip link show ${OCF_RESKEY_interface} &> /dev/null; then ocf_exit_reason "Network interface not found" exit $OCF_ERR_CONFIGURED fi return $OCF_SUCCESS } check_conflicting_routes() { cmd="${gcloud} compute routes list \ --filter='destRange:${OCF_RESKEY_ip} AND \ network=(${gcp_api_url_prefix}/projects/${GCE_INSTANCE_PROJECT}/global/networks/${OCF_RESKEY_vpc_network}) AND \ NOT name=${OCF_RESKEY_route_name}' \ --format='value[terminator=\" \"](name)'" ocf_log debug "Executing command: $(echo $cmd)" route_list=$(eval ${cmd}) if [ $? -ne 0 ]; then exit $OCF_ERR_GENERIC fi if [ -n "${route_list}" ]; then ocf_exit_reason "Conflicting unnmanaged routes for destination ${OCF_RESKEY_ip}/32 in VPC ${OCF_RESKEY_vpc_network} found : ${route_list}" exit $OCF_ERR_CONFIGURED fi return $OCF_SUCCESS } route_monitor() { ocf_log info "GCP route monitor: checking route table" # Ensure that there is no route that we are not aware of that is also handling our IP check_conflicting_routes cmd="${gcloud} compute routes describe ${OCF_RESKEY_route_name} --format='get(nextHopInstance)'" ocf_log debug "Executing command: $cmd" # Also redirect stderr as we parse the output to use an appropriate exit code routed_to_instance=$(eval $cmd 2>&1) if [ $? -ne 0 ]; then if echo $routed_to_instance | grep -qi "Insufficient Permission" ; then ocf_exit_reason "Insufficient permissions to get route information" exit $OCF_ERR_PERM elif echo $routed_to_instance | grep -qi "Could not fetch resource"; then ocf_log debug "The route ${OCF_RESKEY_route_name} doesn't exist" return $OCF_NOT_RUNNING else ocf_exit_reason "Error : ${routed_to_instance}" exit $OCF_ERR_GENERIC fi fi if [ -z "${routed_to_instance}" ]; then routed_to_instance="" fi if [ "${routed_to_instance}" != "${gcp_api_url_prefix}/projects/${GCE_INSTANCE_PROJECT}/zones/${GCE_INSTANCE_ZONE}/instances/${GCE_INSTANCE_NAME}" ]; then ocf_log warn "The floating IP ${OCF_RESKEY_ip} is not routed to this instance (${GCE_INSTANCE_NAME}) but to instance ${routed_to_instance##*/}" return $OCF_NOT_RUNNING fi ocf_log debug "The floating IP ${OCF_RESKEY_ip} is correctly routed to this instance (${GCE_INSTANCE_NAME})" return $OCF_SUCCESS } ip_monitor() { ocf_log info "IP monitor: checking local network configuration" cmd="ip address show dev ${OCF_RESKEY_interface} to ${OCF_RESKEY_ip}/32" ocf_log debug "Executing command: $cmd" if [ -z "$($cmd)" ]; then ocf_log warn "The floating IP ${OCF_RESKEY_ip} is not locally configured on this instance (${GCE_INSTANCE_NAME})" return $OCF_NOT_RUNNING fi ocf_log debug "The floating IP ${OCF_RESKEY_ip} is correctly configured on this instance (${GCE_INSTANCE_NAME})" return $OCF_SUCCESS } ip_release() { cmd="ip address delete ${OCF_RESKEY_ip}/32 dev ${OCF_RESKEY_interface}" ocf_log debug "Executing command: $cmd" ocf_run $cmd || return $OCF_ERR_GENERIC return $OCF_SUCCESS } route_release() { cmd="${gcloud} compute routes delete ${OCF_RESKEY_route_name}" ocf_log debug "Executing command: $cmd" ocf_run $cmd || return $OCF_ERR_GENERIC return $OCF_SUCCESS } ip_and_route_start() { ocf_log info "Bringing up the floating IP ${OCF_RESKEY_ip}" # Add a new entry in the routing table # If the route entry exists and is pointing to another instance, take it over # Ensure that there is no route that we are not aware of that is also handling our IP check_conflicting_routes # There is no replace API, We need to first delete the existing route if any if ${gcloud} compute routes describe ${OCF_RESKEY_route_name} &>/dev/null; then route_release fi cmd="${gcloud} compute routes create ${OCF_RESKEY_route_name} \ --network=${OCF_RESKEY_vpc_network} --destination-range=${OCF_RESKEY_ip}/32 \ --next-hop-instance-zone=${GCE_INSTANCE_ZONE} --next-hop-instance=${GCE_INSTANCE_NAME}" ocf_log debug "Executing command: $(echo $cmd)" ocf_run $cmd if [ $? -ne $OCF_SUCCESS ]; then if ! ${gcloud} compute networks describe ${OCF_RESKEY_vpc_network} &>/dev/null; then ocf_exit_reason "VPC network not found" exit $OCF_ERR_CONFIGURED else return $OCF_ERR_GENERIC fi fi # Configure the IP address locally # We need to release the IP first ip_monitor &>/dev/null if [ $? -eq $OCF_SUCCESS ]; then ip_release fi cmd="ip address add ${OCF_RESKEY_ip}/32 dev ${OCF_RESKEY_interface}" ocf_log debug "Executing command: $cmd" ocf_run $cmd || return $OCF_ERR_GENERIC cmd="ip link set ${OCF_RESKEY_interface} up" ocf_log debug "Executing command: $cmd" ocf_run $cmd || return $OCF_ERR_GENERIC ocf_log info "Successfully brought up the floating IP ${OCF_RESKEY_ip}" return $OCF_SUCCESS } ip_and_route_stop() { ocf_log info "Bringing down the floating IP ${OCF_RESKEY_ip}" # Delete the route entry # If the route entry exists and is pointing to another instance, don't touch it route_monitor &>/dev/null if [ $? -eq $OCF_NOT_RUNNING ]; then ocf_log info "The floating IP ${OCF_RESKEY_ip} is already not routed to this instance (${GCE_INSTANCE_NAME})" else route_release fi # Delete the local IP address ip_monitor &>/dev/null if [ $? -eq $OCF_NOT_RUNNING ]; then ocf_log info "The floating IP ${OCF_RESKEY_ip} is already down" else ip_release fi ocf_log info "Successfully brought down the floating IP ${OCF_RESKEY_ip}" return $OCF_SUCCESS } ############################################################################### # # MAIN # ############################################################################### ocf_log warn "gcp-vpc-move-ip is deprecated, prefer to use gcp-vpc-move-route instead" case $__OCF_ACTION in meta-data) metadata exit $OCF_SUCCESS ;; usage|help) echo $USAGE exit $OCF_SUCCESS ;; esac validate || exit $? case $__OCF_ACTION in start) ip_and_route_start;; stop) ip_and_route_stop;; monitor|status) route_monitor || exit $? ip_monitor || exit $? ;; validate-all) ;; *) echo $USAGE exit $OCF_ERR_UNIMPLEMENTED ;; esac diff --git a/heartbeat/gcp-vpc-move-route.in b/heartbeat/gcp-vpc-move-route.in index 10d10c90c..591b97b1c 100644 --- a/heartbeat/gcp-vpc-move-route.in +++ b/heartbeat/gcp-vpc-move-route.in @@ -1,440 +1,440 @@ #!@PYTHON@ -tt # - *- coding: utf- 8 - *- # # # OCF resource agent to move an IP address within a VPC in GCP # # License: GNU General Public License (GPL) # Copyright (c) 2018 Hervé Werner (MFG Labs) # Copyright 2018 Google Inc. # Based on code from Markus Guertler (aws-vpc-move-ip) # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### import atexit import logging import os import sys import time OCF_FUNCTIONS_DIR = os.environ.get("OCF_FUNCTIONS_DIR", "%s/lib/heartbeat" % os.environ.get("OCF_ROOT")) sys.path.append(OCF_FUNCTIONS_DIR) from ocf import * try: import googleapiclient.discovery import pyroute2 except ImportError: pass if sys.version_info >= (3, 0): # Python 3 imports. import urllib.parse as urlparse import urllib.request as urlrequest else: # Python 2 imports. import urllib as urlparse import urllib2 as urlrequest GCP_API_URL_PREFIX = 'https://www.googleapis.com/compute/v1' METADATA_SERVER = 'http://metadata.google.internal/computeMetadata/v1/' METADATA_HEADERS = {'Metadata-Flavor': 'Google'} METADATA = \ ''' 1.0 Resource Agent that can move a floating IP addresse within a GCP VPC by changing an entry in the routing table. This agent also configures the floating IP locally on the instance OS. Requirements : - IP forwarding must be enabled on all instances in order to be able to terminate the route -- The floating IP address must be choosen so that it is outside all existing +- The floating IP address must be chosen so that it is outside all existing subnets in the VPC network - IAM permissions (see https://cloud.google.com/compute/docs/access/iam-permissions) : 1) compute.routes.delete, compute.routes.get and compute.routes.update on the route 2) compute.networks.updatePolicy on the network (to add a new route) 3) compute.networks.get on the network (to check the VPC network existence) 4) compute.routes.list on the project (to check conflicting routes) Move IP within a GCP VPC Floating IP address. Note that this IP must be chosen outside of all existing subnet ranges Floating IP Name of the VPC network VPC network Name of the network interface Network interface name Route name Route name If enabled (set to true), IP failover logs will be posted to stackdriver logging Stackdriver-logging support ''' % os.path.basename(sys.argv[0]) class Context(object): __slots__ = 'conn', 'iface_idx', 'instance', 'instance_url', 'interface', \ 'ip', 'iproute', 'project', 'route_name', 'vpc_network', \ 'vpc_network_url', 'zone' def wait_for_operation(ctx, response): """Blocks until operation completes. Code from GitHub's GoogleCloudPlatform/python-docs-samples Args: response: dict, a request's response """ def _OperationGetter(response): operation = response[u'name'] if response.get(u'zone'): return ctx.conn.zoneOperations().get( project=ctx.project, zone=ctx.zone, operation=operation) else: return ctx.conn.globalOperations().get( project=ctx.project, operation=operation) while True: result = _OperationGetter(response).execute() if result['status'] == 'DONE': if 'error' in result: raise Exception(result['error']) return result time.sleep(1) def get_metadata(metadata_key, params=None, timeout=None): """Performs a GET request with the metadata headers. Args: metadata_key: string, the metadata to perform a GET request on. params: dictionary, the query parameters in the GET request. timeout: int, timeout in seconds for metadata requests. Returns: HTTP response from the GET request. Raises: urlerror.HTTPError: raises when the GET request fails. """ timeout = timeout or 60 metadata_url = os.path.join(METADATA_SERVER, metadata_key) params = urlparse.urlencode(params or {}) url = '%s?%s' % (metadata_url, params) request = urlrequest.Request(url, headers=METADATA_HEADERS) request_opener = urlrequest.build_opener(urlrequest.ProxyHandler({})) return request_opener.open(request, timeout=timeout * 1.1).read() def validate(ctx): if os.geteuid() != 0: logger.error('You must run this agent as root') sys.exit(OCF_ERR_PERM) try: ctx.conn = googleapiclient.discovery.build('compute', 'v1') except Exception as e: logger.error('Couldn\'t connect with google api: ' + str(e)) sys.exit(OCF_ERR_CONFIGURED) ctx.ip = os.environ.get('OCF_RESKEY_ip') if not ctx.ip: logger.error('Missing ip parameter') sys.exit(OCF_ERR_CONFIGURED) try: ctx.instance = get_metadata('instance/name') ctx.zone = get_metadata('instance/zone').split('/')[-1] ctx.project = get_metadata('project/project-id') except Exception as e: logger.error( 'Instance information not found. Is this a GCE instance ?: %s', str(e)) sys.exit(OCF_ERR_CONFIGURED) ctx.instance_url = '%s/projects/%s/zones/%s/instances/%s' % ( GCP_API_URL_PREFIX, ctx.project, ctx.zone, ctx.instance) ctx.vpc_network = os.environ.get('OCF_RESKEY_vpc_network', 'default') ctx.vpc_network_url = '%s/projects/%s/global/networks/%s' % ( GCP_API_URL_PREFIX, ctx.project, ctx.vpc_network) ctx.interface = os.environ.get('OCF_RESKEY_interface', 'eth0') ctx.route_name = os.environ.get( 'OCF_RESKEY_route_name', 'ra-%s' % os.path.basename(sys.argv[0])) ctx.iproute = pyroute2.IPRoute() atexit.register(ctx.iproute.close) idxs = ctx.iproute.link_lookup(ifname=ctx.interface) if not idxs: logger.error('Network interface not found') sys.exit(OCF_ERR_CONFIGURED) ctx.iface_idx = idxs[0] def check_conflicting_routes(ctx): fl = '(destRange = "%s*") AND (network = "%s") AND (name != "%s")' % ( ctx.ip, ctx.vpc_network_url, ctx.route_name) request = ctx.conn.routes().list(project=ctx.project, filter=fl) response = request.execute() route_list = response.get('items', None) if route_list: logger.error( 'Conflicting unnmanaged routes for destination %s/32 in VPC %s found : %s', ctx.ip, ctx.vpc_network, str(route_list)) sys.exit(OCF_ERR_CONFIGURED) def route_release(ctx): request = ctx.conn.routes().delete(project=ctx.project, route=ctx.route_name) wait_for_operation(ctx, request.execute()) def ip_monitor(ctx): logger.info('IP monitor: checking local network configuration') def address_filter(addr): for attr in addr['attrs']: if attr[0] == 'IFA_LOCAL': if attr[1] == ctx.ip: return True else: return False route = ctx.iproute.get_addr( index=ctx.iface_idx, match=address_filter) if not route: logger.warning( 'The floating IP %s is not locally configured on this instance (%s)', ctx.ip, ctx.instance) return OCF_NOT_RUNNING logger.debug( 'The floating IP %s is correctly configured on this instance (%s)', ctx.ip, ctx.instance) return OCF_SUCCESS def ip_release(ctx): ctx.iproute.addr('del', index=ctx.iface_idx, address=ctx.ip, mask=32) def ip_and_route_start(ctx): logger.info('Bringing up the floating IP %s', ctx.ip) # Add a new entry in the routing table # If the route entry exists and is pointing to another instance, take it over # Ensure that there is no route that we are not aware of that is also handling our IP check_conflicting_routes(ctx) # There is no replace API, We need to first delete the existing route if any try: request = ctx.conn.routes().get(project=ctx.project, route=ctx.route_name) request.execute() # TODO: check specific exception for 404 except googleapiclient.errors.HttpError as e: if e.resp.status != 404: raise else: route_release(ctx) route_body = { 'name': ctx.route_name, 'network': ctx.vpc_network_url, 'destRange': '%s/32' % ctx.ip, 'nextHopInstance': ctx.instance_url, } try: request = ctx.conn.routes().insert(project=ctx.project, body=route_body) wait_for_operation(ctx, request.execute()) except googleapiclient.errors.HttpError: try: request = ctx.conn.networks().get( project=ctx.project, network=ctx.vpc_network) request.execute() except googleapiclient.errors.HttpError as e: if e.resp.status == 404: logger.error('VPC network not found') sys.exit(OCF_ERR_CONFIGURED) else: raise else: raise # Configure the IP address locally # We need to release the IP first if ip_monitor(ctx) == OCF_SUCCESS: ip_release(ctx) ctx.iproute.addr('add', index=ctx.iface_idx, address=ctx.ip, mask=32) ctx.iproute.link('set', index=ctx.iface_idx, state='up') logger.info('Successfully brought up the floating IP %s', ctx.ip) def route_monitor(ctx): logger.info('GCP route monitor: checking route table') # Ensure that there is no route that we are not aware of that is also handling our IP check_conflicting_routes try: request = ctx.conn.routes().get(project=ctx.project, route=ctx.route_name) response = request.execute() except googleapiclient.errors.HttpError as e: if 'Insufficient Permission' in e.content: return OCF_ERR_PERM elif e.resp.status == 404: return OCF_NOT_RUNNING else: raise routed_to_instance = response.get('nextHopInstance', '') instance_url = '%s/projects/%s/zones/%s/instances/%s' % ( GCP_API_URL_PREFIX, ctx.project, ctx.zone, ctx.instance) if routed_to_instance != instance_url: logger.warning( 'The floating IP %s is not routed to this instance (%s) but to instance %s', ctx.ip, ctx.instance, routed_to_instance.split('/')[-1]) return OCF_NOT_RUNNING logger.debug( 'The floating IP %s is correctly routed to this instance (%s)', ctx.ip, ctx.instance) return OCF_SUCCESS def ip_and_route_stop(ctx): logger.info('Bringing down the floating IP %s', ctx.ip) # Delete the route entry # If the route entry exists and is pointing to another instance, don't touch it if route_monitor(ctx) == OCF_NOT_RUNNING: logger.info( 'The floating IP %s is already not routed to this instance (%s)', ctx.ip, ctx.instance) else: route_release(ctx) if ip_monitor(ctx) == OCF_NOT_RUNNING: logger.info('The floating IP %s is already down', ctx.ip) else: ip_release(ctx) def configure_logs(ctx): # Prepare logging global logger logging.getLogger('googleapiclient').setLevel(logging.WARN) logging_env = os.environ.get('OCF_RESKEY_stackdriver_logging') if logging_env: logging_env = logging_env.lower() if any(x in logging_env for x in ['yes', 'true', 'enabled']): try: import google.cloud.logging.handlers client = google.cloud.logging.Client() handler = google.cloud.logging.handlers.CloudLoggingHandler( client, name=ctx.instance) handler.setLevel(logging.INFO) formatter = logging.Formatter('gcp:route "%(message)s"') handler.setFormatter(formatter) log.addHandler(handler) logger = logging.LoggerAdapter(log, {'OCF_RESOURCE_INSTANCE': OCF_RESOURCE_INSTANCE}) except ImportError: logger.error('Couldn\'t import google.cloud.logging, ' 'disabling Stackdriver-logging support') def main(): if 'meta-data' in sys.argv[1]: print(METADATA) return ctx = Context() validate(ctx) if 'validate-all' in sys.argv[1]: return configure_logs(ctx) if 'start' in sys.argv[1]: ip_and_route_start(ctx) elif 'stop' in sys.argv[1]: ip_and_route_stop(ctx) elif 'status' in sys.argv[1] or 'monitor' in sys.argv[1]: sys.exit(ip_monitor(ctx)) else: usage = 'usage: %s {start|stop|monitor|status|meta-data|validate-all}' % \ os.path.basename(sys.argv[0]) logger.error(usage) sys.exit(OCF_ERR_UNIMPLEMENTED) if __name__ == "__main__": main()