a2129f61ce8c924b5175ece19388be85a2136ff3
[doctor.git] / doctor_tests / scenario / maintenance.py
1 ##############################################################################
2 # Copyright (c) 2018 Nokia Corporation and others.
3 #
4 # All rights reserved. This program and the accompanying materials
5 # are made available under the terms of the Apache License, Version 2.0
6 # which accompanies this distribution, and is available at
7 # http://www.apache.org/licenses/LICENSE-2.0
8 ##############################################################################
9 import datetime
10 import json
11 import requests
12 import time
13
14 from doctor_tests.admin_tool import get_admin_tool
15 from doctor_tests.app_manager import get_app_manager
16 from doctor_tests.common.utils import get_doctor_test_root_dir
17 from doctor_tests.identity_auth import get_identity_auth
18 from doctor_tests.identity_auth import get_session
19 from doctor_tests.inspector import get_inspector
20 from doctor_tests.os_clients import keystone_client
21 from doctor_tests.os_clients import neutron_client
22 from doctor_tests.os_clients import nova_client
23 from doctor_tests.stack import Stack
24
25
26 class Maintenance(object):
27
28     def __init__(self, trasport_url, conf, log):
29         self.conf = conf
30         self.log = log
31         self.keystone = keystone_client(
32             self.conf.keystone_version, get_session())
33         self.nova = nova_client(conf.nova_version, get_session())
34         auth = get_identity_auth(project=self.conf.doctor_project)
35         self.neutron = neutron_client(get_session(auth=auth))
36         self.stack = Stack(self.conf, self.log)
37         if self.conf.admin_tool.type == 'sample':
38             self.admin_tool = get_admin_tool(trasport_url, self.conf, self.log)
39             self.endpoint = 'maintenance'
40         else:
41             self.endpoint = 'v1/maintenance'
42         self.app_manager = get_app_manager(self.stack, self.conf, self.log)
43         self.inspector = get_inspector(self.conf, self.log, trasport_url)
44
45     def get_external_network(self):
46         ext_net = None
47         networks = self.neutron.list_networks()['networks']
48         for network in networks:
49             if network['router:external']:
50                 ext_net = network['name']
51                 break
52         if ext_net is None:
53             raise Exception("external network not defined")
54         return ext_net
55
56     def setup_maintenance(self, user):
57         # each hypervisor needs to have same amount of vcpus and they
58         # need to be free before test
59         hvisors = self.nova.hypervisors.list(detailed=True)
60         prev_vcpus = 0
61         prev_hostname = ''
62         self.log.info('checking hypervisors.......')
63         for hvisor in hvisors:
64             vcpus = hvisor.__getattr__('vcpus')
65             vcpus_used = hvisor.__getattr__('vcpus_used')
66             hostname = hvisor.__getattr__('hypervisor_hostname')
67             if vcpus < 2:
68                 raise Exception('not enough vcpus (%d) on %s' %
69                                 (vcpus, hostname))
70             if vcpus_used > 0:
71                 if self.conf.test_case == 'all':
72                     # VCPU might not yet be free after fault_management test
73                     self.log.info('%d vcpus used on %s, retry...'
74                                   % (vcpus_used, hostname))
75                     time.sleep(15)
76                     hvisor = self.nova.hypervisors.get(hvisor.id)
77                     vcpus_used = hvisor.__getattr__('vcpus_used')
78                 if vcpus_used > 0:
79                     raise Exception('%d vcpus used on %s'
80                                     % (vcpus_used, hostname))
81             if prev_vcpus != 0 and prev_vcpus != vcpus:
82                 raise Exception('%d vcpus on %s does not match to'
83                                 '%d on %s'
84                                 % (vcpus, hostname,
85                                    prev_vcpus, prev_hostname))
86             prev_vcpus = vcpus
87             prev_hostname = hostname
88
89         # maintenance flavor made so that 2 instances take whole node
90         flavor_vcpus = int(vcpus / 2)
91         compute_nodes = len(hvisors)
92         amount_actstdby_instances = 2
93         amount_noredundancy_instances = 2 * compute_nodes - 2
94         self.log.info('testing %d computes with %d vcpus each'
95                       % (compute_nodes, vcpus))
96         self.log.info('testing %d actstdby and %d noredundancy instances'
97                       % (amount_actstdby_instances,
98                          amount_noredundancy_instances))
99         max_instances = (amount_actstdby_instances +
100                          amount_noredundancy_instances)
101         max_cores = compute_nodes * vcpus
102
103         user.update_quota(max_instances, max_cores)
104
105         test_dir = get_doctor_test_root_dir()
106         template_file = '{0}/{1}'.format(test_dir, 'maintenance_hot_tpl.yaml')
107         files, template = self.stack.get_hot_tpl(template_file)
108
109         ext_net = self.get_external_network()
110
111         parameters = {'ext_net': ext_net,
112                       'flavor_vcpus': flavor_vcpus,
113                       'maint_image': self.conf.image_name,
114                       'nonha_intances': amount_noredundancy_instances,
115                       'ha_intances': amount_actstdby_instances}
116
117         self.log.info('creating maintenance stack.......')
118         self.log.info('parameters: %s' % parameters)
119
120         self.stack.create('doctor_test_maintenance',
121                           template,
122                           parameters=parameters,
123                           files=files)
124
125         if self.conf.admin_tool.type == 'sample':
126             self.admin_tool.start()
127         else:
128             # TBD Now we expect Fenix is running in self.conf.admin_tool.port
129             pass
130         self.app_manager.start()
131         self.inspector.start()
132
133     def start_maintenance(self):
134         self.log.info('start maintenance.......')
135         hvisors = self.nova.hypervisors.list(detailed=True)
136         maintenance_hosts = list()
137         for hvisor in hvisors:
138             hostname = hvisor.__getattr__('hypervisor_hostname')
139             maintenance_hosts.append(hostname)
140
141         url = ('http://%s:%s/%s' %
142                (self.conf.admin_tool.ip,
143                 self.conf.admin_tool.port,
144                 self.endpoint))
145
146         # let's start maintenance 20sec from now, so projects will have
147         # time to ACK to it before that
148         maintenance_at = (datetime.datetime.utcnow() +
149                           datetime.timedelta(seconds=30)
150                           ).strftime('%Y-%m-%d %H:%M:%S')
151         data = {'hosts': maintenance_hosts,
152                 'state': 'MAINTENANCE',
153                 'maintenance_at': maintenance_at,
154                 'metadata': {'openstack_version': 'Rocky'},
155                 'workflow': 'default'}
156         headers = {
157             'Content-Type': 'application/json',
158             'Accept': 'application/json'}
159
160         ret = requests.post(url, data=json.dumps(data), headers=headers)
161         if ret.status_code != 200:
162             raise Exception(ret.text)
163         return ret.json()['session_id']
164
165     def remove_maintenance_session(self, session_id):
166         self.log.info('remove maintenance session %s.......' % session_id)
167         url = ('http://%s:%s/%s/%s' %
168                (self.conf.admin_tool.ip,
169                 self.conf.admin_tool.port,
170                 self.endpoint,
171                 session_id))
172
173         headers = {
174             'Content-Type': 'application/json',
175             'Accept': 'application/json'}
176
177         ret = requests.delete(url, data=None, headers=headers)
178         if ret.status_code != 200:
179             raise Exception(ret.text)
180
181     def get_maintenance_state(self, session_id):
182         url = ('http://%s:%s/%s/%s' %
183                (self.conf.admin_tool.ip,
184                 self.conf.admin_tool.port,
185                 self.endpoint,
186                 session_id))
187
188         headers = {
189             'Content-Type': 'application/json',
190             'Accept': 'application/json'}
191         ret = requests.get(url, data=None, headers=headers)
192         if ret.status_code != 200:
193             raise Exception(ret.text)
194         return ret.json()['state']
195
196     def wait_maintenance_complete(self, session_id):
197         retries = 90
198         state = None
199         time.sleep(300)
200         while (state not in ['MAINTENANCE_DONE', 'MAINTENANCE_FAILED'] and
201                retries > 0):
202             time.sleep(10)
203             state = self.get_maintenance_state(session_id)
204             retries = retries - 1
205         self.remove_maintenance_session(session_id)
206         self.log.info('maintenance %s ended with state %s' %
207                       (session_id, state))
208         if state == 'MAINTENANCE_FAILED':
209             raise Exception('maintenance %s failed' % session_id)
210         elif retries == 0:
211             raise Exception('maintenance %s not completed within 20min' %
212                             session_id)
213
214     def cleanup_maintenance(self):
215         if self.conf.admin_tool.type == 'sample':
216             self.admin_tool.stop()
217         self.app_manager.stop()
218         self.inspector.stop()
219         self.log.info('stack delete start.......')
220         self.stack.delete()