add running profiler in python
[doctor.git] / tests / main.py
1 ##############################################################################
2 # Copyright (c) 2017 ZTE Corporation and others.
3 #
4 # All rights reserved. This program and the accompanying materials
5 # are made available under the terms of the Apache License, Version 2.0
6 # which accompanies this distribution, and is available at
7 # http://www.apache.org/licenses/LICENSE-2.0
8 ##############################################################################
9 import os
10 from os.path import isfile, join
11 import random
12 import sys
13 import time
14
15 from alarm import Alarm
16 from common.constants import Host
17 from common.utils import match_rep_in_file
18 import config
19 from consumer import get_consumer
20 from identity_auth import get_identity_auth
21 from identity_auth import get_session
22 from image import Image
23 from instance import Instance
24 from inspector import get_inspector
25 from installer import get_installer
26 import logger as doctor_log
27 from network import Network
28 from monitor import get_monitor
29 from os_clients import nova_client
30 from profiler_poc import main as profiler_main
31 from scenario.common import calculate_notification_time
32 from scenario.network_failure import NetworkFault
33 from user import User
34
35
36 LOG = doctor_log.Logger('doctor').getLogger()
37
38
39 class DoctorTest(object):
40
41     def __init__(self, conf):
42         self.conf = conf
43         self.image = Image(self.conf, LOG)
44         self.user = User(self.conf, LOG)
45         self.network = Network(self.conf, LOG)
46         self.instance = Instance(self.conf, LOG)
47         self.alarm = Alarm(self.conf, LOG)
48         self.installer = get_installer(self.conf, LOG)
49         self.inspector = get_inspector(self.conf, LOG)
50         self.monitor = get_monitor(self.conf,
51                                    self.inspector.get_inspector_url(),
52                                    LOG)
53         self.consumer = get_consumer(self.conf, LOG)
54         self.fault = NetworkFault(self.conf, self.installer, LOG)
55         auth = get_identity_auth(project=self.conf.doctor_project)
56         self.nova = nova_client(self.conf.nova_version,
57                                 get_session(auth=auth))
58         self.down_host = None
59
60     def setup(self):
61         # prepare the cloud env
62         self.installer.setup()
63
64         # preparing VM image...
65         self.image.create()
66
67         # creating test user...
68         self.user.create()
69         self.user.update_quota()
70
71         # creating VM...
72         self.network.create()
73         self.instance.create()
74         self.instance.wait_for_vm_launch()
75
76         # creating alarm...
77         self.alarm.create()
78
79         # starting doctor sample components...
80         self.inspector.start()
81
82         self.down_host = self.get_host_info_for_random_vm()
83         self.monitor.start(self.down_host)
84
85         self.consumer.start()
86
87     def run(self):
88         """run doctor test"""
89         try:
90             LOG.info('doctor test starting.......')
91
92             # prepare test env
93             self.setup()
94
95             # wait for aodh alarms are updated in caches for event evaluator,
96             # sleep time should be larger than event_alarm_cache_ttl(default 60)
97             time.sleep(60)
98
99             # injecting host failure...
100             # NOTE (umar) add INTERFACE_NAME logic to host injection
101
102             self.fault.start(self.down_host)
103             time.sleep(10)
104
105             # verify the test results
106             # NOTE (umar) copy remote monitor.log file when monitor=collectd
107             self.check_host_status(self.down_host.name, 'down')
108
109             notification_time = calculate_notification_time()
110             if notification_time < 1 and notification_time > 0:
111                 LOG.info('doctor test successfully, notification_time=%s' % notification_time)
112             else:
113                 LOG.error('doctor test failed, notification_time=%s' % notification_time)
114                 sys.exit(1)
115
116             if self.conf.profiler_type:
117                 LOG.info('doctor test begin to run profile.......')
118                 self.collect_logs()
119                 self.run_profiler()
120         except Exception as e:
121             LOG.error('doctor test failed, Exception=%s' % e)
122             sys.exit(1)
123         finally:
124             self.cleanup()
125
126     def get_host_info_for_random_vm(self):
127         num = random.randint(0, self.conf.instance_count - 1)
128         vm_name = "%s%d" % (self.conf.instance_basename, num)
129
130         servers = \
131             {getattr(server, 'name'): server
132              for server in self.nova.servers.list()}
133         server = servers.get(vm_name)
134         if not server:
135             raise \
136                 Exception('Can not find instance: vm_name(%s)' % vm_name)
137         host_name = server.__dict__.get('OS-EXT-SRV-ATTR:hypervisor_hostname')
138         host_ip = self.installer.get_host_ip_from_hostname(host_name)
139
140         LOG.info('Get host info(name:%s, ip:%s) which vm(%s) launched at'
141                  % (host_name, host_ip, vm_name))
142         return Host(host_name, host_ip)
143
144     def check_host_status(self, hostname, state):
145         service = self.nova.services.list(host=hostname, binary='nova-compute')
146         host_state = service[0].__dict__.get('state')
147         assert host_state == state
148
149     def unset_forced_down_hosts(self):
150         if self.down_host:
151             self.nova.services.force_down(self.down_host.name, 'nova-compute', False)
152             time.sleep(2)
153             self.check_host_status(self.down_host.name, 'up')
154
155     def collect_logs(self):
156         self.fault.get_disable_network_log()
157
158     def run_profiler(self):
159
160         log_file = '{0}/{1}'.format(sys.path[0], 'disable_network.log')
161         reg = '(?<=doctor set link down at )\d+.\d+'
162         linkdown = float(match_rep_in_file(reg, log_file).group(0))
163
164         log_file = '{0}/{1}'.format(sys.path[0], 'doctor.log')
165         reg = '(.* doctor mark vm.* error at )(\d+.\d+)'
166         vmdown = float(match_rep_in_file(reg, log_file).group(2))
167
168         reg = '(?<=doctor mark host.* down at )\d+.\d+'
169         hostdown = float(match_rep_in_file(reg, log_file).group(2))
170
171         reg = '(?<=doctor monitor detected at )\d+.\d+'
172         detected = float(match_rep_in_file(reg, log_file).group(0))
173
174         reg = '(?<=doctor consumer notified at )\d+.\d+'
175         notified = float(match_rep_in_file(reg, log_file).group(0))
176
177         # TODO(yujunz) check the actual delay to verify time sync status
178         # expected ~1s delay from $trigger to $linkdown
179         relative_start = linkdown
180         os.environ['DOCTOR_PROFILER_T00'] = str(int((linkdown - relative_start)*1000))
181         os.environ['DOCTOR_PROFILER_T01'] = str(int((detected - relative_start) * 1000))
182         os.environ['DOCTOR_PROFILER_T03'] = str(int((vmdown - relative_start) * 1000))
183         os.environ['DOCTOR_PROFILER_T04'] = str(int((hostdown - relative_start) * 1000))
184         os.environ['DOCTOR_PROFILER_T09'] = str(int((notified - relative_start) * 1000))
185
186         profiler_main(log=LOG)
187
188     def cleanup(self):
189         self.unset_forced_down_hosts()
190         self.inspector.stop()
191         self.monitor.stop()
192         self.consumer.stop()
193         self.installer.cleanup()
194         self.alarm.delete()
195         self.instance.delete()
196         self.network.delete()
197         self.image.delete()
198         self.fault.cleanup()
199         self.user.delete()
200
201
202 def main():
203     """doctor main"""
204     doctor_root_dir = os.path.dirname(sys.path[0])
205     config_file_dir = '{0}/{1}'.format(doctor_root_dir, 'etc/')
206     config_files = [join(config_file_dir, f) for f in os.listdir(config_file_dir)
207                     if isfile(join(config_file_dir, f))]
208
209     conf = config.prepare_conf(args=sys.argv[1:],
210                                config_files=config_files)
211
212     doctor = DoctorTest(conf)
213     doctor.run()
214
215
216 if __name__ == '__main__':
217     sys.exit(main())