DevStack support
[doctor.git] / doctor_tests / scenario / maintenance.py
1 ##############################################################################
2 # Copyright (c) 2019 Nokia Corporation and others.
3 #
4 # All rights reserved. This program and the accompanying materials
5 # are made available under the terms of the Apache License, Version 2.0
6 # which accompanies this distribution, and is available at
7 # http://www.apache.org/licenses/LICENSE-2.0
8 ##############################################################################
9 import datetime
10 import json
11 import requests
12 import time
13
14 from doctor_tests.admin_tool import get_admin_tool
15 from doctor_tests.app_manager import get_app_manager
16 from doctor_tests.common.utils import get_doctor_test_root_dir
17 from doctor_tests.identity_auth import get_identity_auth
18 from doctor_tests.identity_auth import get_session
19 from doctor_tests.inspector import get_inspector
20 from doctor_tests.os_clients import keystone_client
21 from doctor_tests.os_clients import neutron_client
22 from doctor_tests.os_clients import nova_client
23 from doctor_tests.stack import Stack
24
25
26 class Maintenance(object):
27
28     def __init__(self, trasport_url, conf, log):
29         self.conf = conf
30         self.log = log
31         self.admin_session = get_session()
32         self.keystone = keystone_client(
33             self.conf.keystone_version, get_session())
34         self.nova = nova_client(conf.nova_version, get_session())
35         auth = get_identity_auth(project=self.conf.doctor_project)
36         self.neutron = neutron_client(get_session(auth=auth))
37         self.stack = Stack(self.conf, self.log)
38         if self.conf.installer.type == "devstack":
39             self.endpoint_ip = trasport_url.split("@", 1)[1].split(":", 1)[0]
40         else:
41             self.endpoint_ip = self.conf.admin_tool.ip
42         self.endpoint = "http://%s:12347/" % self.endpoint_ip
43         if self.conf.admin_tool.type == 'sample':
44             self.admin_tool = get_admin_tool(trasport_url, self.conf, self.log)
45             self.endpoint += 'maintenance'
46         else:
47             self.endpoint += 'v1/maintenance'
48         self.app_manager = get_app_manager(self.stack, self.conf, self.log)
49         self.inspector = get_inspector(self.conf, self.log, trasport_url)
50
51     def get_external_network(self):
52         ext_net = None
53         networks = self.neutron.list_networks()['networks']
54         for network in networks:
55             if network['router:external']:
56                 ext_net = network['name']
57                 break
58         if ext_net is None:
59             raise Exception("external network not defined")
60         return ext_net
61
62     def setup_maintenance(self, user):
63         # each hypervisor needs to have same amount of vcpus and they
64         # need to be free before test
65         hvisors = self.nova.hypervisors.list(detailed=True)
66         prev_vcpus = 0
67         prev_hostname = ''
68         self.log.info('checking hypervisors.......')
69         for hvisor in hvisors:
70             vcpus = hvisor.__getattr__('vcpus')
71             vcpus_used = hvisor.__getattr__('vcpus_used')
72             hostname = hvisor.__getattr__('hypervisor_hostname')
73             if vcpus < 2:
74                 raise Exception('not enough vcpus (%d) on %s' %
75                                 (vcpus, hostname))
76             if vcpus_used > 0:
77                 if self.conf.test_case == 'all':
78                     # VCPU might not yet be free after fault_management test
79                     self.log.info('%d vcpus used on %s, retry...'
80                                   % (vcpus_used, hostname))
81                     time.sleep(15)
82                     hvisor = self.nova.hypervisors.get(hvisor.id)
83                     vcpus_used = hvisor.__getattr__('vcpus_used')
84                 if vcpus_used > 0:
85                     raise Exception('%d vcpus used on %s'
86                                     % (vcpus_used, hostname))
87             if prev_vcpus != 0 and prev_vcpus != vcpus:
88                 raise Exception('%d vcpus on %s does not match to'
89                                 '%d on %s'
90                                 % (vcpus, hostname,
91                                    prev_vcpus, prev_hostname))
92             prev_vcpus = vcpus
93             prev_hostname = hostname
94
95         # maintenance flavor made so that 2 instances take whole node
96         flavor_vcpus = int(vcpus / 2)
97         compute_nodes = len(hvisors)
98         amount_actstdby_instances = 2
99         amount_noredundancy_instances = 2 * compute_nodes - 2
100         self.log.info('testing %d computes with %d vcpus each'
101                       % (compute_nodes, vcpus))
102         self.log.info('testing %d actstdby and %d noredundancy instances'
103                       % (amount_actstdby_instances,
104                          amount_noredundancy_instances))
105         max_instances = (amount_actstdby_instances +
106                          amount_noredundancy_instances)
107         max_cores = compute_nodes * vcpus
108
109         user.update_quota(max_instances, max_cores)
110
111         test_dir = get_doctor_test_root_dir()
112         template_file = '{0}/{1}'.format(test_dir, 'maintenance_hot_tpl.yaml')
113         files, template = self.stack.get_hot_tpl(template_file)
114
115         ext_net = self.get_external_network()
116
117         parameters = {'ext_net': ext_net,
118                       'flavor_vcpus': flavor_vcpus,
119                       'maint_image': self.conf.image_name,
120                       'nonha_intances': amount_noredundancy_instances,
121                       'ha_intances': amount_actstdby_instances}
122
123         self.log.info('creating maintenance stack.......')
124         self.log.info('parameters: %s' % parameters)
125
126         self.stack.create('doctor_test_maintenance',
127                           template,
128                           parameters=parameters,
129                           files=files)
130
131         if self.conf.admin_tool.type == 'sample':
132             self.admin_tool.start()
133         else:
134             # TBD Now we expect Fenix is running in self.conf.admin_tool.port
135             pass
136         # Inspector before app_manager, as floating ip might come late
137         self.inspector.start()
138         self.app_manager.start()
139
140     def start_maintenance(self):
141         self.log.info('start maintenance.......')
142         hvisors = self.nova.hypervisors.list(detailed=True)
143         maintenance_hosts = list()
144         for hvisor in hvisors:
145             hostname = hvisor.__getattr__('hypervisor_hostname')
146             maintenance_hosts.append(hostname)
147         url = self.endpoint
148         headers = {
149             'Content-Type': 'application/json',
150             'Accept': 'application/json'}
151         if self.conf.admin_tool.type == 'fenix':
152             headers['X-Auth-Token'] = self.admin_session.get_token()
153         self.log.info('url %s headers %s' % (url, headers))
154         retries = 12
155         ret = None
156         while retries > 0:
157             # let's start maintenance 20sec from now, so projects will have
158             # time to ACK to it before that
159             maintenance_at = (datetime.datetime.utcnow() +
160                               datetime.timedelta(seconds=30)
161                               ).strftime('%Y-%m-%d %H:%M:%S')
162
163             data = {'state': 'MAINTENANCE',
164                     'maintenance_at': maintenance_at,
165                     'metadata': {'openstack_version': 'Rocky'},
166                     'workflow': 'default'}
167
168             if self.conf.admin_tool.type == 'sample':
169                 data['hosts'] = maintenance_hosts
170             else:
171                 data['hosts'] = []
172             try:
173                 ret = requests.post(url, data=json.dumps(data),
174                                     headers=headers)
175             except Exception:
176                 if retries == 0:
177                     raise Exception('admin tool did not respond in 120s')
178                 else:
179                     self.log.info('admin tool not ready, retry in 10s')
180                 retries = retries - 1
181                 time.sleep(10)
182                 continue
183             break
184         if not ret:
185             raise Exception("admin tool did not respond")
186         if ret.status_code != 200:
187             raise Exception(ret.text)
188         return ret.json()['session_id']
189
190     def remove_maintenance_session(self, session_id):
191         self.log.info('remove maintenance session %s.......' % session_id)
192
193         url = ('%s/%s' % (self.endpoint, session_id))
194
195         headers = {
196             'Content-Type': 'application/json',
197             'Accept': 'application/json'}
198
199         if self.conf.admin_tool.type == 'fenix':
200             headers['X-Auth-Token'] = self.admin_session.get_token()
201
202         ret = requests.delete(url, data=None, headers=headers)
203         if ret.status_code != 200:
204             raise Exception(ret.text)
205
206     def get_maintenance_state(self, session_id):
207
208         url = ('%s/%s' % (self.endpoint, session_id))
209
210         headers = {
211             'Content-Type': 'application/json',
212             'Accept': 'application/json'}
213
214         if self.conf.admin_tool.type == 'fenix':
215             headers['X-Auth-Token'] = self.admin_session.get_token()
216
217         ret = requests.get(url, data=None, headers=headers)
218         if ret.status_code != 200:
219             raise Exception(ret.text)
220         return ret.json()['state']
221
222     def wait_maintenance_complete(self, session_id):
223         retries = 90
224         state = None
225         time.sleep(300)
226         while (state not in ['MAINTENANCE_DONE', 'MAINTENANCE_FAILED'] and
227                retries > 0):
228             time.sleep(10)
229             state = self.get_maintenance_state(session_id)
230             retries = retries - 1
231         self.remove_maintenance_session(session_id)
232         self.log.info('maintenance %s ended with state %s' %
233                       (session_id, state))
234         if state == 'MAINTENANCE_FAILED':
235             raise Exception('maintenance %s failed' % session_id)
236         elif retries == 0:
237             raise Exception('maintenance %s not completed within 20min' %
238                             session_id)
239
240     def cleanup_maintenance(self):
241         if self.conf.admin_tool.type == 'sample':
242             self.admin_tool.stop()
243         self.app_manager.stop()
244         self.inspector.stop()
245         self.log.info('stack delete start.......')
246         self.stack.delete()