Support Fenix as admin tool
[doctor.git] / doctor_tests / scenario / maintenance.py
1 ##############################################################################
2 # Copyright (c) 2018 Nokia Corporation and others.
3 #
4 # All rights reserved. This program and the accompanying materials
5 # are made available under the terms of the Apache License, Version 2.0
6 # which accompanies this distribution, and is available at
7 # http://www.apache.org/licenses/LICENSE-2.0
8 ##############################################################################
9 import datetime
10 import json
11 import requests
12 import time
13
14 from doctor_tests.admin_tool import get_admin_tool
15 from doctor_tests.app_manager import get_app_manager
16 from doctor_tests.common.utils import get_doctor_test_root_dir
17 from doctor_tests.identity_auth import get_identity_auth
18 from doctor_tests.identity_auth import get_session
19 from doctor_tests.inspector import get_inspector
20 from doctor_tests.os_clients import keystone_client
21 from doctor_tests.os_clients import neutron_client
22 from doctor_tests.os_clients import nova_client
23 from doctor_tests.stack import Stack
24
25
26 class Maintenance(object):
27
28     def __init__(self, trasport_url, conf, log):
29         self.conf = conf
30         self.log = log
31         self.keystone = keystone_client(
32             self.conf.keystone_version, get_session())
33         self.nova = nova_client(conf.nova_version, get_session())
34         auth = get_identity_auth(project=self.conf.doctor_project)
35         self.neutron = neutron_client(get_session(auth=auth))
36         self.stack = Stack(self.conf, self.log)
37         if self.conf.admin_tool.type == 'sample':
38             self.admin_tool = get_admin_tool(trasport_url, self.conf, self.log)
39             self.endpoint = 'maintenance'
40         else:
41             self.endpoint = 'v1/maintenance'
42         self.app_manager = get_app_manager(self.stack, self.conf, self.log)
43         self.inspector = get_inspector(self.conf, self.log, trasport_url)
44
45     def get_external_network(self):
46         ext_net = None
47         networks = self.neutron.list_networks()['networks']
48         for network in networks:
49             if network['router:external']:
50                 ext_net = network['name']
51                 break
52         if ext_net is None:
53             raise Exception("external network not defined")
54         return ext_net
55
56     def setup_maintenance(self, user):
57         # each hypervisor needs to have same amount of vcpus and they
58         # need to be free before test
59         hvisors = self.nova.hypervisors.list(detailed=True)
60         prev_vcpus = 0
61         prev_hostname = ''
62         self.log.info('checking hypervisors.......')
63         for hvisor in hvisors:
64             vcpus = hvisor.__getattr__('vcpus')
65             vcpus_used = hvisor.__getattr__('vcpus_used')
66             hostname = hvisor.__getattr__('hypervisor_hostname')
67             if vcpus < 2:
68                 raise Exception('not enough vcpus (%d) on %s' %
69                                 (vcpus, hostname))
70             if vcpus_used > 0:
71                 if self.conf.test_case == 'all':
72                     # VCPU might not yet be free after fault_management test
73                     self.log.info('%d vcpus used on %s, retry...'
74                                   % (vcpus_used, hostname))
75                     time.sleep(15)
76                     hvisor = self.nova.hypervisors.get(hvisor.id)
77                     vcpus_used = hvisor.__getattr__('vcpus_used')
78                 if vcpus_used > 0:
79                     raise Exception('%d vcpus used on %s'
80                                     % (vcpus_used, hostname))
81             if prev_vcpus != 0 and prev_vcpus != vcpus:
82                 raise Exception('%d vcpus on %s does not match to'
83                                 '%d on %s'
84                                 % (vcpus, hostname,
85                                    prev_vcpus, prev_hostname))
86             prev_vcpus = vcpus
87             prev_hostname = hostname
88
89         # maintenance flavor made so that 2 instances take whole node
90         flavor_vcpus = int(vcpus / 2)
91         compute_nodes = len(hvisors)
92         amount_actstdby_instances = 2
93         amount_noredundancy_instances = 2 * compute_nodes - 2
94         self.log.info('testing %d computes with %d vcpus each'
95                       % (compute_nodes, vcpus))
96         self.log.info('testing %d actstdby and %d noredundancy instances'
97                       % (amount_actstdby_instances,
98                          amount_noredundancy_instances))
99         max_instances = (amount_actstdby_instances +
100                          amount_noredundancy_instances)
101         max_cores = compute_nodes * vcpus
102
103         user.update_quota(max_instances, max_cores)
104
105         test_dir = get_doctor_test_root_dir()
106         template_file = '{0}/{1}'.format(test_dir, 'maintenance_hot_tpl.yaml')
107         files, template = self.stack.get_hot_tpl(template_file)
108
109         ext_net = self.get_external_network()
110
111         parameters = {'ext_net': ext_net,
112                       'flavor_vcpus': flavor_vcpus,
113                       'maint_image': self.conf.image_name,
114                       'nonha_intances': amount_noredundancy_instances,
115                       'ha_intances': amount_actstdby_instances}
116
117         self.log.info('creating maintenance stack.......')
118         self.log.info('parameters: %s' % parameters)
119
120         self.stack.create('doctor_test_maintenance',
121                           template,
122                           parameters=parameters,
123                           files=files)
124
125         if self.conf.admin_tool.type == 'sample':
126             self.admin_tool.start()
127         else:
128             # TBD Now we expect Fenix is running in self.conf.admin_tool.port
129             pass
130         self.app_manager.start()
131         self.inspector.start()
132
133     def start_maintenance(self):
134         self.log.info('start maintenance.......')
135         hvisors = self.nova.hypervisors.list(detailed=True)
136         maintenance_hosts = list()
137         for hvisor in hvisors:
138             hostname = hvisor.__getattr__('hypervisor_hostname')
139             maintenance_hosts.append(hostname)
140
141         url = ('http://%s:%s/%s' %
142                (self.conf.admin_tool.ip,
143                 self.conf.admin_tool.port,
144                 self.endpoint))
145         headers = {
146             'Content-Type': 'application/json',
147             'Accept': 'application/json'}
148
149         retries = 12
150         while retries > 0:
151             # let's start maintenance 20sec from now, so projects will have
152             # time to ACK to it before that
153             maintenance_at = (datetime.datetime.utcnow() +
154                               datetime.timedelta(seconds=30)
155                               ).strftime('%Y-%m-%d %H:%M:%S')
156
157             data = {'state': 'MAINTENANCE',
158                     'maintenance_at': maintenance_at,
159                     'metadata': {'openstack_version': 'Rocky'},
160                     'workflow': 'default'}
161
162             if self.conf.admin_tool.type == 'sample':
163                 data['hosts'] = maintenance_hosts
164             else:
165                 data['hosts'] = []
166             try:
167                 ret = requests.post(url, data=json.dumps(data),
168                                     headers=headers)
169             except:
170                 if retries == 0:
171                     raise Exception('admin tool did not respond in 120s')
172                 else:
173                     self.log.info('admin tool not ready, retry in 10s')
174                 retries = retries - 1
175                 time.sleep(10)
176                 continue
177             break
178         if ret.status_code != 200:
179             raise Exception(ret.text)
180         return ret.json()['session_id']
181
182     def remove_maintenance_session(self, session_id):
183         self.log.info('remove maintenance session %s.......' % session_id)
184         url = ('http://%s:%s/%s/%s' %
185                (self.conf.admin_tool.ip,
186                 self.conf.admin_tool.port,
187                 self.endpoint,
188                 session_id))
189
190         headers = {
191             'Content-Type': 'application/json',
192             'Accept': 'application/json'}
193
194         ret = requests.delete(url, data=None, headers=headers)
195         if ret.status_code != 200:
196             raise Exception(ret.text)
197
198     def get_maintenance_state(self, session_id):
199         url = ('http://%s:%s/%s/%s' %
200                (self.conf.admin_tool.ip,
201                 self.conf.admin_tool.port,
202                 self.endpoint,
203                 session_id))
204
205         headers = {
206             'Content-Type': 'application/json',
207             'Accept': 'application/json'}
208         ret = requests.get(url, data=None, headers=headers)
209         if ret.status_code != 200:
210             raise Exception(ret.text)
211         return ret.json()['state']
212
213     def wait_maintenance_complete(self, session_id):
214         retries = 90
215         state = None
216         time.sleep(300)
217         while (state not in ['MAINTENANCE_DONE', 'MAINTENANCE_FAILED'] and
218                retries > 0):
219             time.sleep(10)
220             state = self.get_maintenance_state(session_id)
221             retries = retries - 1
222         self.remove_maintenance_session(session_id)
223         self.log.info('maintenance %s ended with state %s' %
224                       (session_id, state))
225         if state == 'MAINTENANCE_FAILED':
226             raise Exception('maintenance %s failed' % session_id)
227         elif retries == 0:
228             raise Exception('maintenance %s not completed within 20min' %
229                             session_id)
230
231     def cleanup_maintenance(self):
232         if self.conf.admin_tool.type == 'sample':
233             self.admin_tool.stop()
234         self.app_manager.stop()
235         self.inspector.stop()
236         self.log.info('stack delete start.......')
237         self.stack.delete()