Maintenance support for latest Fenix, python3 and Fuel
[doctor.git] / doctor_tests / scenario / maintenance.py
1 ##############################################################################
2 # Copyright (c) 2019 Nokia Corporation and others.
3 #
4 # All rights reserved. This program and the accompanying materials
5 # are made available under the terms of the Apache License, Version 2.0
6 # which accompanies this distribution, and is available at
7 # http://www.apache.org/licenses/LICENSE-2.0
8 ##############################################################################
9 import datetime
10 import json
11 import requests
12 import time
13
14 from doctor_tests.admin_tool import get_admin_tool
15 from doctor_tests.app_manager import get_app_manager
16 from doctor_tests.common.utils import get_doctor_test_root_dir
17 from doctor_tests.identity_auth import get_identity_auth
18 from doctor_tests.identity_auth import get_session
19 from doctor_tests.inspector import get_inspector
20 from doctor_tests.os_clients import keystone_client
21 from doctor_tests.os_clients import neutron_client
22 from doctor_tests.os_clients import nova_client
23 from doctor_tests.stack import Stack
24
25
26 class Maintenance(object):
27
28     def __init__(self, trasport_url, conf, log):
29         self.conf = conf
30         self.log = log
31         self.admin_session = get_session()
32         self.keystone = keystone_client(
33             self.conf.keystone_version, get_session())
34         self.nova = nova_client(conf.nova_version, get_session())
35         auth = get_identity_auth(project=self.conf.doctor_project)
36         self.neutron = neutron_client(get_session(auth=auth))
37         self.stack = Stack(self.conf, self.log)
38         if self.conf.admin_tool.type == 'sample':
39             self.admin_tool = get_admin_tool(trasport_url, self.conf, self.log)
40             self.endpoint = 'maintenance'
41         else:
42             self.endpoint = 'v1/maintenance'
43         self.app_manager = get_app_manager(self.stack, self.conf, self.log)
44         self.inspector = get_inspector(self.conf, self.log, trasport_url)
45
46     def get_external_network(self):
47         ext_net = None
48         networks = self.neutron.list_networks()['networks']
49         for network in networks:
50             if network['router:external']:
51                 ext_net = network['name']
52                 break
53         if ext_net is None:
54             raise Exception("external network not defined")
55         return ext_net
56
57     def setup_maintenance(self, user):
58         # each hypervisor needs to have same amount of vcpus and they
59         # need to be free before test
60         hvisors = self.nova.hypervisors.list(detailed=True)
61         prev_vcpus = 0
62         prev_hostname = ''
63         self.log.info('checking hypervisors.......')
64         for hvisor in hvisors:
65             vcpus = hvisor.__getattr__('vcpus')
66             vcpus_used = hvisor.__getattr__('vcpus_used')
67             hostname = hvisor.__getattr__('hypervisor_hostname')
68             if vcpus < 2:
69                 raise Exception('not enough vcpus (%d) on %s' %
70                                 (vcpus, hostname))
71             if vcpus_used > 0:
72                 if self.conf.test_case == 'all':
73                     # VCPU might not yet be free after fault_management test
74                     self.log.info('%d vcpus used on %s, retry...'
75                                   % (vcpus_used, hostname))
76                     time.sleep(15)
77                     hvisor = self.nova.hypervisors.get(hvisor.id)
78                     vcpus_used = hvisor.__getattr__('vcpus_used')
79                 if vcpus_used > 0:
80                     raise Exception('%d vcpus used on %s'
81                                     % (vcpus_used, hostname))
82             if prev_vcpus != 0 and prev_vcpus != vcpus:
83                 raise Exception('%d vcpus on %s does not match to'
84                                 '%d on %s'
85                                 % (vcpus, hostname,
86                                    prev_vcpus, prev_hostname))
87             prev_vcpus = vcpus
88             prev_hostname = hostname
89
90         # maintenance flavor made so that 2 instances take whole node
91         flavor_vcpus = int(vcpus / 2)
92         compute_nodes = len(hvisors)
93         amount_actstdby_instances = 2
94         amount_noredundancy_instances = 2 * compute_nodes - 2
95         self.log.info('testing %d computes with %d vcpus each'
96                       % (compute_nodes, vcpus))
97         self.log.info('testing %d actstdby and %d noredundancy instances'
98                       % (amount_actstdby_instances,
99                          amount_noredundancy_instances))
100         max_instances = (amount_actstdby_instances +
101                          amount_noredundancy_instances)
102         max_cores = compute_nodes * vcpus
103
104         user.update_quota(max_instances, max_cores)
105
106         test_dir = get_doctor_test_root_dir()
107         template_file = '{0}/{1}'.format(test_dir, 'maintenance_hot_tpl.yaml')
108         files, template = self.stack.get_hot_tpl(template_file)
109
110         ext_net = self.get_external_network()
111
112         parameters = {'ext_net': ext_net,
113                       'flavor_vcpus': flavor_vcpus,
114                       'maint_image': self.conf.image_name,
115                       'nonha_intances': amount_noredundancy_instances,
116                       'ha_intances': amount_actstdby_instances}
117
118         self.log.info('creating maintenance stack.......')
119         self.log.info('parameters: %s' % parameters)
120
121         self.stack.create('doctor_test_maintenance',
122                           template,
123                           parameters=parameters,
124                           files=files)
125
126         if self.conf.admin_tool.type == 'sample':
127             self.admin_tool.start()
128         else:
129             # TBD Now we expect Fenix is running in self.conf.admin_tool.port
130             pass
131         self.app_manager.start()
132         self.inspector.start()
133
134     def start_maintenance(self):
135         self.log.info('start maintenance.......')
136         hvisors = self.nova.hypervisors.list(detailed=True)
137         maintenance_hosts = list()
138         for hvisor in hvisors:
139             hostname = hvisor.__getattr__('hypervisor_hostname')
140             maintenance_hosts.append(hostname)
141
142         url = ('http://%s:%s/%s' %
143                (self.conf.admin_tool.ip,
144                 self.conf.admin_tool.port,
145                 self.endpoint))
146         headers = {
147             'Content-Type': 'application/json',
148             'Accept': 'application/json'}
149         if self.conf.admin_tool.type == 'fenix':
150             headers['X-Auth-Token'] = self.admin_session.get_token()
151         self.log.info('headers %s' % headers)
152         retries = 12
153         ret = None
154         while retries > 0:
155             # let's start maintenance 20sec from now, so projects will have
156             # time to ACK to it before that
157             maintenance_at = (datetime.datetime.utcnow() +
158                               datetime.timedelta(seconds=30)
159                               ).strftime('%Y-%m-%d %H:%M:%S')
160
161             data = {'state': 'MAINTENANCE',
162                     'maintenance_at': maintenance_at,
163                     'metadata': {'openstack_version': 'Rocky'},
164                     'workflow': 'default'}
165
166             if self.conf.admin_tool.type == 'sample':
167                 data['hosts'] = maintenance_hosts
168             else:
169                 data['hosts'] = []
170             try:
171                 ret = requests.post(url, data=json.dumps(data),
172                                     headers=headers)
173             except:
174                 if retries == 0:
175                     raise Exception('admin tool did not respond in 120s')
176                 else:
177                     self.log.info('admin tool not ready, retry in 10s')
178                 retries = retries - 1
179                 time.sleep(10)
180                 continue
181             break
182         if not ret:
183             raise Exception("admin tool did not respond")
184         if ret.status_code != 200:
185             raise Exception(ret.text)
186         return ret.json()['session_id']
187
188     def remove_maintenance_session(self, session_id):
189         self.log.info('remove maintenance session %s.......' % session_id)
190         url = ('http://%s:%s/%s/%s' %
191                (self.conf.admin_tool.ip,
192                 self.conf.admin_tool.port,
193                 self.endpoint,
194                 session_id))
195
196         headers = {
197             'Content-Type': 'application/json',
198             'Accept': 'application/json'}
199
200         if self.conf.admin_tool.type == 'fenix':
201             headers['X-Auth-Token'] = self.admin_session.get_token()
202
203         ret = requests.delete(url, data=None, headers=headers)
204         if ret.status_code != 200:
205             raise Exception(ret.text)
206
207     def get_maintenance_state(self, session_id):
208         url = ('http://%s:%s/%s/%s' %
209                (self.conf.admin_tool.ip,
210                 self.conf.admin_tool.port,
211                 self.endpoint,
212                 session_id))
213
214         headers = {
215             'Content-Type': 'application/json',
216             'Accept': 'application/json'}
217
218         if self.conf.admin_tool.type == 'fenix':
219             headers['X-Auth-Token'] = self.admin_session.get_token()
220
221         ret = requests.get(url, data=None, headers=headers)
222         if ret.status_code != 200:
223             raise Exception(ret.text)
224         return ret.json()['state']
225
226     def wait_maintenance_complete(self, session_id):
227         retries = 90
228         state = None
229         time.sleep(300)
230         while (state not in ['MAINTENANCE_DONE', 'MAINTENANCE_FAILED'] and
231                retries > 0):
232             time.sleep(10)
233             state = self.get_maintenance_state(session_id)
234             retries = retries - 1
235         self.remove_maintenance_session(session_id)
236         self.log.info('maintenance %s ended with state %s' %
237                       (session_id, state))
238         if state == 'MAINTENANCE_FAILED':
239             raise Exception('maintenance %s failed' % session_id)
240         elif retries == 0:
241             raise Exception('maintenance %s not completed within 20min' %
242                             session_id)
243
244     def cleanup_maintenance(self):
245         if self.conf.admin_tool.type == 'sample':
246             self.admin_tool.stop()
247         self.app_manager.stop()
248         self.inspector.stop()
249         self.log.info('stack delete start.......')
250         self.stack.delete()