09795c2afa2f1b0c18981944b13258da77786f65
[doctor.git] / doctor_tests / scenario / maintenance.py
1 ##############################################################################
2 # Copyright (c) 2018 Nokia Corporation and others.
3 #
4 # All rights reserved. This program and the accompanying materials
5 # are made available under the terms of the Apache License, Version 2.0
6 # which accompanies this distribution, and is available at
7 # http://www.apache.org/licenses/LICENSE-2.0
8 ##############################################################################
9 import datetime
10 import json
11 import requests
12 import time
13
14 from doctor_tests.admin_tool import get_admin_tool
15 from doctor_tests.app_manager import get_app_manager
16 from doctor_tests.common.utils import get_doctor_test_root_dir
17 from doctor_tests.identity_auth import get_identity_auth
18 from doctor_tests.identity_auth import get_session
19 from doctor_tests.inspector import get_inspector
20 from doctor_tests.os_clients import keystone_client
21 from doctor_tests.os_clients import neutron_client
22 from doctor_tests.os_clients import nova_client
23 from doctor_tests.stack import Stack
24
25
26 class Maintenance(object):
27
28     def __init__(self, trasport_url, conf, log):
29         self.conf = conf
30         self.log = log
31         self.keystone = keystone_client(
32             self.conf.keystone_version, get_session())
33         self.nova = nova_client(conf.nova_version, get_session())
34         auth = get_identity_auth(project=self.conf.doctor_project)
35         self.neutron = neutron_client(get_session(auth=auth))
36         self.stack = Stack(self.conf, self.log)
37         if self.conf.admin_tool.type == 'sample':
38             self.admin_tool = get_admin_tool(trasport_url, self.conf, self.log)
39             self.endpoint = 'maintenance'
40         else:
41             self.endpoint = 'v1/maintenance'
42         self.app_manager = get_app_manager(self.stack, self.conf, self.log)
43         self.inspector = get_inspector(self.conf, self.log)
44
45     def get_external_network(self):
46         ext_net = None
47         networks = self.neutron.list_networks()['networks']
48         for network in networks:
49             if network['router:external']:
50                 ext_net = network['name']
51                 break
52         if ext_net is None:
53             raise Exception("external network not defined")
54         return ext_net
55
56     def setup_maintenance(self, user):
57         # each hypervisor needs to have same amount of vcpus and they
58         # need to be free before test
59         hvisors = self.nova.hypervisors.list(detailed=True)
60         prev_vcpus = 0
61         prev_hostname = ''
62         self.log.info('checking hypervisors.......')
63         for hvisor in hvisors:
64             vcpus = hvisor.__getattr__('vcpus')
65             vcpus_used = hvisor.__getattr__('vcpus_used')
66             hostname = hvisor.__getattr__('hypervisor_hostname')
67             if vcpus < 2:
68                 raise Exception('not enough vcpus (%d) on %s' %
69                                 (vcpus, hostname))
70             if vcpus_used > 0:
71                 raise Exception('%d vcpus used on %s'
72                                 % (vcpus_used, hostname))
73             if prev_vcpus != 0 and prev_vcpus != vcpus:
74                 raise Exception('%d vcpus on %s does not match to'
75                                 '%d on %s'
76                                 % (vcpus, hostname,
77                                    prev_vcpus, prev_hostname))
78             prev_vcpus = vcpus
79             prev_hostname = hostname
80
81         # maintenance flavor made so that 2 instances take whole node
82         flavor_vcpus = int(vcpus / 2)
83         compute_nodes = len(hvisors)
84         amount_actstdby_instances = 2
85         amount_noredundancy_instances = 2 * compute_nodes - 2
86         self.log.info('testing %d computes with %d vcpus each'
87                       % (compute_nodes, vcpus))
88         self.log.info('testing %d actstdby and %d noredundancy instances'
89                       % (amount_actstdby_instances,
90                          amount_noredundancy_instances))
91         max_instances = (amount_actstdby_instances +
92                          amount_noredundancy_instances)
93         max_cores = compute_nodes * vcpus
94
95         user.update_quota(max_instances, max_cores)
96
97         test_dir = get_doctor_test_root_dir()
98         template_file = '{0}/{1}'.format(test_dir, 'maintenance_hot_tpl.yaml')
99         files, template = self.stack.get_hot_tpl(template_file)
100
101         ext_net = self.get_external_network()
102
103         parameters = {'ext_net': ext_net,
104                       'flavor_vcpus': flavor_vcpus,
105                       'maint_image': self.conf.image_name,
106                       'nonha_intances': amount_noredundancy_instances,
107                       'ha_intances': amount_actstdby_instances}
108
109         self.log.info('creating maintenance stack.......')
110         self.log.info('parameters: %s' % parameters)
111
112         self.stack.create('doctor_test_maintenance',
113                           template,
114                           parameters=parameters,
115                           files=files)
116
117         if self.conf.admin_tool.type == 'sample':
118             self.admin_tool.start()
119         else:
120             # TBD Now we expect Fenix is running in self.conf.admin_tool.port
121             pass
122         self.app_manager.start()
123         self.inspector.start()
124
125     def start_maintenance(self):
126         self.log.info('start maintenance.......')
127         hvisors = self.nova.hypervisors.list(detailed=True)
128         maintenance_hosts = list()
129         for hvisor in hvisors:
130             hostname = hvisor.__getattr__('hypervisor_hostname')
131             maintenance_hosts.append(hostname)
132
133         url = ('http://%s:%s/%s' %
134                (self.conf.admin_tool.ip,
135                 self.conf.admin_tool.port,
136                 self.endpoint))
137
138         # let's start maintenance 20sec from now, so projects will have
139         # time to ACK to it before that
140         maintenance_at = (datetime.datetime.utcnow() +
141                           datetime.timedelta(seconds=30)
142                           ).strftime('%Y-%m-%d %H:%M:%S')
143         data = {'hosts': maintenance_hosts,
144                 'state': 'MAINTENANCE',
145                 'maintenance_at': maintenance_at,
146                 'metadata': {'openstack_version': 'Rocky'},
147                 'workflow': 'default'}
148         headers = {
149             'Content-Type': 'application/json',
150             'Accept': 'application/json'}
151
152         ret = requests.post(url, data=json.dumps(data), headers=headers)
153         if ret.status_code != 200:
154             raise Exception(ret.text)
155         return ret.json()['session_id']
156
157     def remove_maintenance_session(self, session_id):
158         self.log.info('remove maintenance session %s.......' % session_id)
159         url = ('http://%s:%s/%s/%s' %
160                (self.conf.admin_tool.ip,
161                 self.conf.admin_tool.port,
162                 self.endpoint,
163                 session_id))
164
165         headers = {
166             'Content-Type': 'application/json',
167             'Accept': 'application/json'}
168
169         ret = requests.delete(url, data=None, headers=headers)
170         if ret.status_code != 200:
171             raise Exception(ret.text)
172
173     def get_maintenance_state(self, session_id):
174         url = ('http://%s:%s/%s/%s' %
175                (self.conf.admin_tool.ip,
176                 self.conf.admin_tool.port,
177                 self.endpoint,
178                 session_id))
179
180         headers = {
181             'Content-Type': 'application/json',
182             'Accept': 'application/json'}
183         ret = requests.get(url, data=None, headers=headers)
184         if ret.status_code != 200:
185             raise Exception(ret.text)
186         return ret.json()['state']
187
188     def wait_maintenance_complete(self, session_id):
189         retries = 90
190         state = None
191         time.sleep(300)
192         while (state not in ['MAINTENANCE_DONE', 'MAINTENANCE_FAILED'] and
193                retries > 0):
194             time.sleep(10)
195             state = self.get_maintenance_state(session_id)
196             retries = retries - 1
197         self.remove_maintenance_session(session_id)
198         self.log.info('maintenance %s ended with state %s' %
199                       (session_id, state))
200         if state == 'MAINTENANCE_FAILED':
201             raise Exception('maintenance %s failed' % session_id)
202         elif retries == 0:
203             raise Exception('maintenance %s not completed within 20min' %
204                             session_id)
205
206     def cleanup_maintenance(self):
207         if self.conf.admin_tool.type == 'sample':
208             self.admin_tool.stop()
209         self.app_manager.stop()
210         self.inspector.stop()
211         self.log.info('stack delete start.......')
212         self.stack.delete()