Page MenuHomeClusterLabs Projects

No OneTemporary

diff --git a/agents/ibmz/fence_ibmz.py b/agents/ibmz/fence_ibmz.py
index d3ac550d..a2922ab6 100644
--- a/agents/ibmz/fence_ibmz.py
+++ b/agents/ibmz/fence_ibmz.py
@@ -1,542 +1,551 @@
#!@PYTHON@ -tt
# Copyright (c) 2020 IBM Corp.
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library. If not, see
# <http://www.gnu.org/licenses/>.
import atexit
import logging
import time
import sys
import requests
from requests.packages import urllib3
sys.path.append("@FENCEAGENTSLIBDIR@")
from fencing import *
from fencing import fail_usage, run_delay, EC_GENERIC_ERROR
DEFAULT_POWER_TIMEOUT = '300'
ERROR_NOT_FOUND = ("{obj_type} {obj_name} not found in this HMC. "
"Attention: names are case-sensitive.")
class ApiClientError(Exception):
"""
Base exception for all API Client related errors.
"""
class ApiClientRequestError(ApiClientError):
"""
Raised when an API request ends in error
"""
def __init__(self, req_method, req_uri, status, reason, message):
self.req_method = req_method
self.req_uri = req_uri
self.status = status
self.reason = reason
self.message = message
super(ApiClientRequestError, self).__init__()
def __str__(self):
return (
"API request failed, details:\n"
"HTTP Request : {req_method} {req_uri}\n"
"HTTP Response status: {status}\n"
"Error reason: {reason}\n"
"Error message: {message}\n".format(
req_method=self.req_method, req_uri=self.req_uri,
status=self.status, reason=self.reason, message=self.message)
)
class APIClient(object):
DEFAULT_CONFIG = {
# how many connection-related errors to retry on
'connect_retries': 3,
# how many times to retry on read errors (after request was sent to the
# server)
'read_retries': 3,
# http methods that should be retried
'method_whitelist': ['HEAD', 'GET', 'OPTIONS'],
# limit of redirects to perform to avoid loops
'redirect': 5,
# how long to wait while establishing a connection
'connect_timeout': 30,
# how long to wait for asynchronous operations (jobs) to complete
'operation_timeout': 900,
# how long to wait between bytes sent by the remote side
'read_timeout': 300,
# default API port
'port': 6794,
# validate ssl certificates
'ssl_verify': False
}
LABEL_BY_OP_MODE = {
'classic': {
'nodes': 'logical-partitions',
'state-on': 'operating',
'start': 'load',
'stop': 'deactivate'
},
'dpm': {
'nodes': 'partitions',
'state-on': 'active',
'start': 'start',
'stop': 'stop'
}
}
def __init__(self, host, user, passwd, config=None):
self.host = host
if not passwd:
raise ValueError('Password cannot be empty')
self.passwd = passwd
if not user:
raise ValueError('Username cannot be empty')
self.user = user
self._cpc_cache = {}
self._session = None
self._config = self.DEFAULT_CONFIG.copy()
# apply user defined values
if config:
self._config.update(config)
def _create_session(self):
"""
Create a new requests session and apply config values
"""
session = requests.Session()
retry_obj = urllib3.Retry(
# setting a total is necessary to cover SSL related errors
total=max(self._config['connect_retries'],
self._config['read_retries']),
connect=self._config['connect_retries'],
read=self._config['read_retries'],
method_whitelist=self._config['method_whitelist'],
redirect=self._config['redirect']
)
session.mount('http://', requests.adapters.HTTPAdapter(
max_retries=retry_obj))
session.mount('https://', requests.adapters.HTTPAdapter(
max_retries=retry_obj))
return session
def _get_mode_labels(self, cpc):
"""
Return the map of labels that corresponds to the cpc operation mode
"""
if self.is_dpm_enabled(cpc):
return self.LABEL_BY_OP_MODE['dpm']
return self.LABEL_BY_OP_MODE['classic']
- def _partition_switch_power(self, cpc, partition, action):
+ def _get_partition(self, cpc, partition):
"""
- Perform the API request to start (power on) or stop (power off) the
- target partition and wait for the job to finish.
+ Return the properties of the specified partition. Raises ValueError if
+ it cannot be found.
"""
- # retrieve partition's uri
+ # HMC API's documentation says it'll return an empty array when no
+ # matches are found but for a CPC in classic mode it returns in fact
+ # 404, so we handle this accordingly. Remove the extra handling below
+ # once this behavior has been fixed on the API's side.
label_map = self._get_mode_labels(cpc)
resp = self._request('get', '{}/{}?name={}'.format(
- self._cpc_cache[cpc]['object-uri'], label_map['nodes'], partition))
+ self._cpc_cache[cpc]['object-uri'], label_map['nodes'], partition),
+ valid_codes=[200, 404])
- if not resp[label_map['nodes']]:
+ if label_map['nodes'] not in resp or not resp[label_map['nodes']]:
raise ValueError(ERROR_NOT_FOUND.format(
obj_type='LPAR/Partition', obj_name=partition))
+ return resp[label_map['nodes']][0]
- part_uri = resp[label_map['nodes']][0]['object-uri']
+ def _partition_switch_power(self, cpc, partition, action):
+ """
+ Perform the API request to start (power on) or stop (power off) the
+ target partition and wait for the job to finish.
+ """
+ # retrieve partition's uri
+ part_uri = self._get_partition(cpc, partition)['object-uri']
+ label_map = self._get_mode_labels(cpc)
# in dpm mode the request must have empty body
if self.is_dpm_enabled(cpc):
body = None
# in classic mode we make sure the operation is executed
# even if the partition is already on
else:
body = {'force': True}
# when powering on the partition must be activated first
if action == 'start':
op_uri = '{}/operations/activate'.format(part_uri)
job_resp = self._request(
'post', op_uri, body=body, valid_codes=[202])
# always wait for activate otherwise the load (start)
# operation will fail
if self._config['operation_timeout'] == 0:
timeout = self.DEFAULT_CONFIG['operation_timeout']
else:
timeout = self._config['operation_timeout']
logging.debug(
'waiting for activate (timeout %s secs)', timeout)
self._wait_for_job('post', op_uri, job_resp['job-uri'],
timeout=timeout)
# trigger the start job
op_uri = '{}/operations/{}'.format(part_uri, label_map[action])
job_resp = self._request('post', op_uri, body=body, valid_codes=[202])
if self._config['operation_timeout'] == 0:
return
logging.debug('waiting for %s (timeout %s secs)',
label_map[action], self._config['operation_timeout'])
self._wait_for_job('post', op_uri, job_resp['job-uri'],
timeout=self._config['operation_timeout'])
def _request(self, method, uri, body=None, headers=None, valid_codes=None):
"""
Perform a request to the HMC API
"""
assert method in ('delete', 'head', 'get', 'post', 'put')
url = 'https://{host}:{port}{uri}'.format(
host=self.host, port=self._config['port'], uri=uri)
if not headers:
headers = {}
if self._session is None:
raise ValueError('You need to log on first')
method = getattr(self._session, method)
timeout = (
self._config['connect_timeout'], self._config['read_timeout'])
response = method(url, json=body, headers=headers,
verify=self._config['ssl_verify'], timeout=timeout)
if valid_codes and response.status_code not in valid_codes:
reason = '(no reason)'
message = '(no message)'
if response.headers.get('content-type') == 'application/json':
try:
json_resp = response.json()
except ValueError:
pass
else:
reason = json_resp.get('reason', reason)
message = json_resp.get('message', message)
else:
message = '{}...'.format(response.text[:500])
raise ApiClientRequestError(
response.request.method, response.request.url,
response.status_code, reason, message)
if response.status_code == 204:
return dict()
try:
json_resp = response.json()
except ValueError:
raise ApiClientRequestError(
response.request.method, response.request.url,
response.status_code, '(no reason)',
'Invalid JSON content in response')
return json_resp
def _update_cpc_cache(self, cpc_props):
self._cpc_cache[cpc_props['name']] = {
'object-uri': cpc_props['object-uri'],
'dpm-enabled': cpc_props.get('dpm-enabled', False)
}
def _wait_for_job(self, req_method, req_uri, job_uri, timeout):
"""
Perform API requests to check for job status until it has completed
or the specified timeout is reached
"""
op_timeout = time.time() + timeout
while time.time() < op_timeout:
job_resp = self._request("get", job_uri)
if job_resp['status'] == 'complete':
if job_resp['job-status-code'] in (200, 201, 204):
return
raise ApiClientRequestError(
req_method, req_uri,
job_resp.get('job-status-code', '(no status)'),
job_resp.get('job-reason-code', '(no reason)'),
job_resp.get('job-results', {}).get(
'message', '(no message)')
)
time.sleep(1)
raise ApiClientError('Timed out while waiting for job completion')
def cpc_list(self):
"""
Return a list of CPCs in the format {'name': 'cpc-name', 'status':
'operating'}
"""
list_resp = self._request("get", "/api/cpcs", valid_codes=[200])
ret = []
for cpc_props in list_resp['cpcs']:
self._update_cpc_cache(cpc_props)
ret.append({
'name': cpc_props['name'], 'status': cpc_props['status']})
return ret
def is_dpm_enabled(self, cpc):
"""
Return True if CPC is in DPM mode, False for classic mode
"""
if cpc in self._cpc_cache:
return self._cpc_cache[cpc]['dpm-enabled']
list_resp = self._request("get", "/api/cpcs?name={}".format(cpc),
valid_codes=[200])
if not list_resp['cpcs']:
raise ValueError(ERROR_NOT_FOUND.format(
obj_type='CPC', obj_name=cpc))
self._update_cpc_cache(list_resp['cpcs'][0])
return self._cpc_cache[cpc]['dpm-enabled']
def logon(self):
"""
Open a session with the HMC API and store its ID
"""
self._session = self._create_session()
logon_body = {"userid": self.user, "password": self.passwd}
logon_resp = self._request("post", "/api/sessions", body=logon_body,
valid_codes=[200, 201])
self._session.headers["X-API-Session"] = logon_resp['api-session']
def logoff(self):
"""
Close/delete the HMC API session
"""
if self._session is None:
return
self._request("delete", "/api/sessions/this-session",
valid_codes=[204])
self._cpc_cache = {}
self._session = None
def partition_list(self, cpc):
"""
Return a list of partitions in the format {'name': 'part-name',
'status': 'on'}
"""
label_map = self._get_mode_labels(cpc)
- list_resp = self._request('get', '{}/{}'.format(
- self._cpc_cache[cpc]['object-uri'], label_map['nodes']))
+ list_resp = self._request(
+ 'get', '{}/{}'.format(
+ self._cpc_cache[cpc]['object-uri'], label_map['nodes']),
+ valid_codes=[200])
status_map = {label_map['state-on']: 'on'}
return [{'name': part['name'],
'status': status_map.get(part['status'].lower(), 'off')}
for part in list_resp[label_map['nodes']]]
def partition_start(self, cpc, partition):
"""
Power on a partition
"""
self._partition_switch_power(cpc, partition, 'start')
def partition_status(self, cpc, partition):
"""
Return the current status of a partition (on or off)
"""
label_map = self._get_mode_labels(cpc)
- resp = self._request('get', '{}/{}?name={}'.format(
- self._cpc_cache[cpc]['object-uri'], label_map['nodes'], partition))
- if not resp[label_map['nodes']]:
- raise ValueError(ERROR_NOT_FOUND.format(
- obj_type='LPAR/Partition', obj_name=partition))
- part_props = resp[label_map['nodes']][0]
+ part_props = self._get_partition(cpc, partition)
if part_props['status'].lower() == label_map['state-on']:
return 'on'
return 'off'
def partition_stop(self, cpc, partition):
"""
Power off a partition
"""
self._partition_switch_power(cpc, partition, 'stop')
def parse_plug(options):
"""
Extract cpc and partition from specified plug value
"""
try:
cpc, partition = options['--plug'].strip().split('/', 1)
except ValueError:
fail_usage('Please specify nodename in format cpc/partition')
cpc = cpc.strip()
if not cpc or not partition:
fail_usage('Please specify nodename in format cpc/partition')
return cpc, partition
def get_power_status(conn, options):
logging.debug('executing get_power_status')
status = conn.partition_status(*parse_plug(options))
return status
def set_power_status(conn, options):
logging.debug('executing set_power_status')
if options['--action'] == 'on':
conn.partition_start(*parse_plug(options))
elif options['--action'] == 'off':
conn.partition_stop(*parse_plug(options))
else:
fail_usage('Invalid action {}'.format(options['--action']))
def get_outlet_list(conn, options):
logging.debug('executing get_outlet_list')
result = {}
for cpc in conn.cpc_list():
for part in conn.partition_list(cpc['name']):
result['{}/{}'.format(cpc['name'], part['name'])] = (
part['name'], part['status'])
return result
def disconnect(conn):
"""
Close the API session
"""
try:
conn.logoff()
except Exception as exc:
logging.exception('Logoff failed: ')
sys.exit(str(exc))
def set_opts():
"""
Define the options supported by this agent
"""
device_opt = [
"ipaddr",
"ipport",
"login",
"passwd",
"port",
"connect_retries",
"connect_timeout",
"operation_timeout",
"read_retries",
"read_timeout",
"ssl_secure",
]
all_opt["ipport"]["default"] = APIClient.DEFAULT_CONFIG['port']
all_opt["power_timeout"]["default"] = DEFAULT_POWER_TIMEOUT
port_desc = ("Physical plug id in the format cpc-name/partition-name "
"(case-sensitive)")
all_opt["port"]["shortdesc"] = port_desc
all_opt["port"]["help"] = (
"-n, --plug=[id] {}".format(port_desc))
all_opt["connect_retries"] = {
"getopt" : ":",
"longopt" : "connect-retries",
"help" : "--connect-retries=[number] How many times to "
"retry on connection errors",
"default" : APIClient.DEFAULT_CONFIG['connect_retries'],
"type" : "integer",
"required" : "0",
"shortdesc" : "How many times to retry on connection errors",
"order" : 2
}
all_opt["read_retries"] = {
"getopt" : ":",
"longopt" : "read-retries",
"help" : "--read-retries=[number] How many times to "
"retry on errors related to reading from server",
"default" : APIClient.DEFAULT_CONFIG['read_retries'],
"type" : "integer",
"required" : "0",
"shortdesc" : "How many times to retry on read errors",
"order" : 2
}
all_opt["connect_timeout"] = {
"getopt" : ":",
"longopt" : "connect-timeout",
"help" : "--connect-timeout=[seconds] How long to wait to "
"establish a connection",
"default" : APIClient.DEFAULT_CONFIG['connect_timeout'],
"type" : "second",
"required" : "0",
"shortdesc" : "How long to wait to establish a connection",
"order" : 2
}
all_opt["operation_timeout"] = {
"getopt" : ":",
"longopt" : "operation-timeout",
"help" : "--operation-timeout=[seconds] How long to wait for "
"power operation to complete (0 = do not wait)",
"default" : APIClient.DEFAULT_CONFIG['operation_timeout'],
"type" : "second",
"required" : "0",
"shortdesc" : "How long to wait for power operation to complete",
"order" : 2
}
all_opt["read_timeout"] = {
"getopt" : ":",
"longopt" : "read-timeout",
"help" : "--read-timeout=[seconds] How long to wait "
"to read data from server",
"default" : APIClient.DEFAULT_CONFIG['read_timeout'],
"type" : "second",
"required" : "0",
"shortdesc" : "How long to wait for server data",
"order" : 2
}
return device_opt
def main():
"""
Agent entry point
"""
# register exit handler used by pacemaker
atexit.register(atexit_handler)
# prepare accepted options
device_opt = set_opts()
# parse options provided on input
options = check_input(device_opt, process_input(device_opt))
docs = {
"shortdesc": "Fence agent for IBM z LPARs",
"longdesc": (
"fence_ibmz is a power fencing agent which uses the HMC Web "
"Services API to fence IBM z LPARs."),
"vendorurl": "http://www.ibm.com"
}
show_docs(options, docs)
run_delay(options)
# set underlying library's logging and ssl config according to specified
# options
requests_log = logging.getLogger("urllib3")
requests_log.propagate = True
if "--verbose" in options:
requests_log.setLevel(logging.DEBUG)
if "--ssl-secure" not in options:
urllib3.disable_warnings(
category=urllib3.exceptions.InsecureRequestWarning)
hmc_address = options["--ip"]
hmc_userid = options["--username"]
hmc_password = options["--password"]
config = {
'connect_retries': int(options['--connect-retries']),
'read_retries': int(options['--read-retries']),
'operation_timeout': int(options['--operation-timeout']),
'connect_timeout': int(options['--connect-timeout']),
'read_timeout': int(options['--read-timeout']),
'port': int(options['--ipport']),
'ssl_verify': bool('--ssl-secure' in options),
}
try:
conn = APIClient(hmc_address, hmc_userid, hmc_password, config)
conn.logon()
atexit.register(disconnect, conn)
result = fence_action(conn, options, set_power_status,
get_power_status, get_outlet_list)
except Exception:
logging.exception('Exception occurred: ')
result = EC_GENERIC_ERROR
sys.exit(result)
if __name__ == "__main__":
main()

File Metadata

Mime Type
text/x-diff
Expires
Sat, Nov 23, 4:34 PM (14 h, 10 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1018915
Default Alt Text
(21 KB)

Event Timeline