Add maintenance test code
[doctor.git] / doctor_tests / scenario / maintenance.py
1 ##############################################################################
2 # Copyright (c) 2018 Nokia Corporation and others.
3 #
4 # All rights reserved. This program and the accompanying materials
5 # are made available under the terms of the Apache License, Version 2.0
6 # which accompanies this distribution, and is available at
7 # http://www.apache.org/licenses/LICENSE-2.0
8 ##############################################################################
9 import datetime
10 import json
11 import requests
12 import time
13
14 from doctor_tests.admin_tool import get_admin_tool
15 from doctor_tests.app_manager import get_app_manager
16 from doctor_tests.common.utils import get_doctor_test_root_dir
17 from doctor_tests.identity_auth import get_identity_auth
18 from doctor_tests.identity_auth import get_session
19 from doctor_tests.inspector import get_inspector
20 from doctor_tests.os_clients import keystone_client
21 from doctor_tests.os_clients import neutron_client
22 from doctor_tests.os_clients import nova_client
23 from doctor_tests.stack import Stack
24
25
26 class Maintenance(object):
27
28     def __init__(self, trasport_url, conf, log):
29         self.conf = conf
30         self.log = log
31         self.keystone = keystone_client(
32             self.conf.keystone_version, get_session())
33         self.nova = nova_client(conf.nova_version, get_session())
34         auth = get_identity_auth(project=self.conf.doctor_project)
35         self.neutron = neutron_client(get_session(auth=auth))
36         self.stack = Stack(self.conf, self.log)
37         self.admin_tool = get_admin_tool(trasport_url, self.conf, self.log)
38         self.app_manager = get_app_manager(self.stack, self.conf, self.log)
39         self.inspector = get_inspector(self.conf, self.log)
40
41     def get_external_network(self):
42         ext_net = None
43         networks = self.neutron.list_networks()['networks']
44         for network in networks:
45             if network['router:external']:
46                 ext_net = network['name']
47                 break
48         if ext_net is None:
49             raise Exception("external network not defined")
50         return ext_net
51
52     def setup_maintenance(self, user):
53         # each hypervisor needs to have same amount of vcpus and they
54         # need to be free before test
55         hvisors = self.nova.hypervisors.list(detailed=True)
56         prev_vcpus = 0
57         prev_hostname = ''
58         self.log.info('checking hypervisors.......')
59         for hvisor in hvisors:
60             vcpus = hvisor.__getattr__('vcpus')
61             vcpus_used = hvisor.__getattr__('vcpus_used')
62             hostname = hvisor.__getattr__('hypervisor_hostname')
63             if vcpus < 2:
64                 raise Exception('not enough vcpus (%d) on %s' %
65                                 (vcpus, hostname))
66             if vcpus_used > 0:
67                 raise Exception('%d vcpus used on %s'
68                                 % (vcpus_used, hostname))
69             if prev_vcpus != 0 and prev_vcpus != vcpus:
70                 raise Exception('%d vcpus on %s does not match to'
71                                 '%d on %s'
72                                 % (vcpus, hostname,
73                                    prev_vcpus, prev_hostname))
74             prev_vcpus = vcpus
75             prev_hostname = hostname
76
77         # maintenance flavor made so that 2 instances take whole node
78         flavor_vcpus = int(vcpus / 2)
79         compute_nodes = len(hvisors)
80         amount_actstdby_instances = 2
81         amount_noredundancy_instances = 2 * compute_nodes - 2
82         self.log.info('testing %d computes with %d vcpus each'
83                       % (compute_nodes, vcpus))
84         self.log.info('testing %d actstdby and %d noredundancy instances'
85                       % (amount_actstdby_instances,
86                          amount_noredundancy_instances))
87         max_instances = (amount_actstdby_instances +
88                          amount_noredundancy_instances)
89         max_cores = compute_nodes * vcpus
90
91         user.update_quota(max_instances, max_cores)
92
93         test_dir = get_doctor_test_root_dir()
94         template_file = '{0}/{1}'.format(test_dir, 'maintenance_hot_tpl.yaml')
95         files, template = self.stack.get_hot_tpl(template_file)
96
97         ext_net = self.get_external_network()
98
99         parameters = {'ext_net': ext_net,
100                       'flavor_vcpus': flavor_vcpus,
101                       'maint_image': self.conf.image_name,
102                       'nonha_intances': amount_noredundancy_instances,
103                       'ha_intances': amount_actstdby_instances}
104
105         self.log.info('creating maintenance stack.......')
106         self.log.info('parameters: %s' % parameters)
107
108         self.stack.create('doctor_test_maintenance',
109                           template,
110                           parameters=parameters,
111                           files=files)
112
113         self.admin_tool.start()
114         self.app_manager.start()
115         self.inspector.start()
116
117     def start_maintenance(self):
118         self.log.info('start maintenance.......')
119         hvisors = self.nova.hypervisors.list(detailed=True)
120         maintenance_hosts = list()
121         for hvisor in hvisors:
122             hostname = hvisor.__getattr__('hypervisor_hostname')
123             maintenance_hosts.append(hostname)
124
125         url = 'http://0.0.0.0:%s/maintenance' % self.conf.admin_tool.port
126         # let's start maintenance 20sec from now, so projects will have
127         # time to ACK to it before that
128         maintenance_at = (datetime.datetime.utcnow() +
129                           datetime.timedelta(seconds=20)
130                           ).strftime('%Y-%m-%d %H:%M:%S')
131         data = {'hosts': maintenance_hosts,
132                 'state': 'MAINTENANCE',
133                 'maintenance_at': maintenance_at,
134                 'metadata': {'openstack_version': 'Pike'}}
135         headers = {
136             'Content-Type': 'application/json',
137             'Accept': 'application/json'}
138
139         ret = requests.post(url, data=json.dumps(data), headers=headers)
140         if ret.status_code != 200:
141             raise Exception(ret.text)
142         return ret.json()['session_id']
143
144     def remove_maintenance_session(self, session_id):
145         self.log.info('remove maintenance session %s.......' % session_id)
146
147         url = 'http://0.0.0.0:%s/maintenance' % self.conf.admin_tool.port
148
149         data = {'state': 'REMOVE_MAINTENANCE_SESSION',
150                 'session_id': session_id}
151         headers = {
152             'Content-Type': 'application/json',
153             'Accept': 'application/json'}
154
155         ret = requests.post(url, data=json.dumps(data), headers=headers)
156         if ret.status_code != 200:
157             raise Exception(ret.text)
158
159     def get_maintenance_state(self, session_id):
160         url = 'http://0.0.0.0:%s/maintenance' % self.conf.admin_tool.port
161         data = {'session_id': session_id}
162         headers = {
163             'Content-Type': 'application/json',
164             'Accept': 'application/json'}
165         ret = requests.get(url, data=json.dumps(data), headers=headers)
166         if ret.status_code != 200:
167             raise Exception(ret.text)
168         return ret.json()['state']
169
170     def wait_maintenance_complete(self, session_id):
171         retries = 60
172         state = None
173         time.sleep(600)
174         while state != 'MAINTENANCE_COMPLETE' and retries > 0:
175             time.sleep(10)
176             state = self.get_maintenance_state(session_id)
177             retries = retries - 1
178         if retries == 0 and state != 'MAINTENANCE_COMPLETE':
179             raise Exception('maintenance %s not completed within 20min, status'
180                             ' %s' % (session_id, state))
181         elif state == 'MAINTENANCE_COMPLETE':
182             self.log.info('maintenance %s %s' % (session_id, state))
183             self.remove_maintenance_session(session_id)
184         elif state == 'MAINTENANCE_FAILED':
185             raise Exception('maintenance %s failed' % session_id)
186
187     def cleanup_maintenance(self):
188         self.admin_tool.stop()
189         self.app_manager.stop()
190         self.inspector.stop()
191         self.log.info('stack delete start.......')
192         self.stack.delete()