fix the way to get log filename
[doctor.git] / doctor_tests / main.py
1 ##############################################################################
2 # Copyright (c) 2017 ZTE Corporation and others.
3 #
4 # All rights reserved. This program and the accompanying materials
5 # are made available under the terms of the Apache License, Version 2.0
6 # which accompanies this distribution, and is available at
7 # http://www.apache.org/licenses/LICENSE-2.0
8 ##############################################################################
9 import os
10 from os.path import isfile, join
11 import random
12 import sys
13 import time
14
15 from doctor_tests.alarm import Alarm
16 from doctor_tests.common.constants import Host
17 from doctor_tests.common.utils import match_rep_in_file
18 from doctor_tests import config
19 from doctor_tests.consumer import get_consumer
20 from doctor_tests.identity_auth import get_identity_auth
21 from doctor_tests.identity_auth import get_session
22 from doctor_tests.image import Image
23 from doctor_tests.instance import Instance
24 from doctor_tests.inspector import get_inspector
25 from doctor_tests.installer import get_installer
26 import doctor_tests.logger as doctor_log
27 from doctor_tests.network import Network
28 from doctor_tests.monitor import get_monitor
29 from doctor_tests.os_clients import nova_client
30 from doctor_tests.profiler_poc import main as profiler_main
31 from doctor_tests.scenario.common import calculate_notification_time
32 from doctor_tests.scenario.network_failure import NetworkFault
33 from doctor_tests.user import User
34
35
36 LOG = doctor_log.Logger('doctor').getLogger()
37
38 # TODO (r-mibu): fix doctor logger or consider logfile option
39 LOG_FILE = LOG.handlers[0].baseFilename
40
41
42 class DoctorTest(object):
43
44     def __init__(self, conf):
45         self.conf = conf
46         self.image = Image(self.conf, LOG)
47         self.user = User(self.conf, LOG)
48         self.network = Network(self.conf, LOG)
49         self.instance = Instance(self.conf, LOG)
50         self.alarm = Alarm(self.conf, LOG)
51         self.installer = get_installer(self.conf, LOG)
52         self.inspector = get_inspector(self.conf, LOG)
53         self.monitor = get_monitor(self.conf,
54                                    self.inspector.get_inspector_url(),
55                                    LOG)
56         self.consumer = get_consumer(self.conf, LOG)
57         self.fault = NetworkFault(self.conf, self.installer, LOG)
58         auth = get_identity_auth(project=self.conf.doctor_project)
59         self.nova = nova_client(self.conf.nova_version,
60                                 get_session(auth=auth))
61         self.down_host = None
62
63     def setup(self):
64         # prepare the cloud env
65         self.installer.setup()
66
67         # preparing VM image...
68         self.image.create()
69
70         # creating test user...
71         self.user.create()
72         self.user.update_quota()
73
74         # creating VM...
75         self.network.create()
76         self.instance.create()
77         self.instance.wait_for_vm_launch()
78
79         # creating alarm...
80         self.alarm.create()
81
82         # starting doctor sample components...
83         self.inspector.start()
84
85         self.down_host = self.get_host_info_for_random_vm()
86         self.monitor.start(self.down_host)
87
88         self.consumer.start()
89
90     def run(self):
91         """run doctor test"""
92         try:
93             LOG.info('doctor test starting.......')
94
95             # prepare test env
96             self.setup()
97
98             # wait for aodh alarms are updated in caches for event evaluator,
99             # sleep time should be larger than event_alarm_cache_ttl(default 60)
100             time.sleep(60)
101
102             # injecting host failure...
103             # NOTE (umar) add INTERFACE_NAME logic to host injection
104
105             self.fault.start(self.down_host)
106             time.sleep(10)
107
108             # verify the test results
109             # NOTE (umar) copy remote monitor.log file when monitor=collectd
110             self.check_host_status(self.down_host.name, 'down')
111
112             notification_time = calculate_notification_time(LOG_FILE)
113             if notification_time < 1 and notification_time > 0:
114                 LOG.info('doctor test successfully, notification_time=%s' % notification_time)
115             else:
116                 LOG.error('doctor test failed, notification_time=%s' % notification_time)
117                 sys.exit(1)
118
119             if self.conf.profiler_type:
120                 LOG.info('doctor test begin to run profile.......')
121                 self.collect_logs()
122                 self.run_profiler()
123         except Exception as e:
124             LOG.error('doctor test failed, Exception=%s' % e)
125             sys.exit(1)
126         finally:
127             self.cleanup()
128
129     def get_host_info_for_random_vm(self):
130         num = random.randint(0, self.conf.instance_count - 1)
131         vm_name = "%s%d" % (self.conf.instance_basename, num)
132
133         servers = \
134             {getattr(server, 'name'): server
135              for server in self.nova.servers.list()}
136         server = servers.get(vm_name)
137         if not server:
138             raise \
139                 Exception('Can not find instance: vm_name(%s)' % vm_name)
140         host_name = server.__dict__.get('OS-EXT-SRV-ATTR:hypervisor_hostname')
141         host_ip = self.installer.get_host_ip_from_hostname(host_name)
142
143         LOG.info('Get host info(name:%s, ip:%s) which vm(%s) launched at'
144                  % (host_name, host_ip, vm_name))
145         return Host(host_name, host_ip)
146
147     def check_host_status(self, hostname, state):
148         service = self.nova.services.list(host=hostname, binary='nova-compute')
149         host_state = service[0].__dict__.get('state')
150         assert host_state == state
151
152     def unset_forced_down_hosts(self):
153         if self.down_host:
154             self.nova.services.force_down(self.down_host.name, 'nova-compute', False)
155             time.sleep(2)
156             self.check_host_status(self.down_host.name, 'up')
157
158     def collect_logs(self):
159         self.fault.get_disable_network_log()
160
161     def run_profiler(self):
162         test_dir = os.path.split(os.path.realpath(__file__))[0]
163
164         reg = '(?<=doctor set link down at )\d+.\d+'
165         linkdown = float(match_rep_in_file(reg, LOG_FILE).group(0))
166
167         reg = '(.* doctor mark vm.* error at )(\d+.\d+)'
168         vmdown = float(match_rep_in_file(reg, LOG_FILE).group(2))
169
170         reg = '(.* doctor mark host.* down at )(\d+.\d+)'
171         hostdown = float(match_rep_in_file(reg, LOG_FILE).group(2))
172
173         reg = '(?<=doctor monitor detected at )\d+.\d+'
174         detected = float(match_rep_in_file(reg, LOG_FILE).group(0))
175
176         reg = '(?<=doctor consumer notified at )\d+.\d+'
177         notified = float(match_rep_in_file(reg, LOG_FILE).group(0))
178
179         # TODO(yujunz) check the actual delay to verify time sync status
180         # expected ~1s delay from $trigger to $linkdown
181         relative_start = linkdown
182         os.environ['DOCTOR_PROFILER_T00'] = str(int((linkdown - relative_start)*1000))
183         os.environ['DOCTOR_PROFILER_T01'] = str(int((detected - relative_start) * 1000))
184         os.environ['DOCTOR_PROFILER_T03'] = str(int((vmdown - relative_start) * 1000))
185         os.environ['DOCTOR_PROFILER_T04'] = str(int((hostdown - relative_start) * 1000))
186         os.environ['DOCTOR_PROFILER_T09'] = str(int((notified - relative_start) * 1000))
187
188         profiler_main(log=LOG)
189
190     def cleanup(self):
191         self.unset_forced_down_hosts()
192         self.inspector.stop()
193         self.monitor.stop()
194         self.consumer.stop()
195         self.installer.cleanup()
196         self.alarm.delete()
197         self.instance.delete()
198         self.network.delete()
199         self.image.delete()
200         self.fault.cleanup()
201         self.user.delete()
202
203
204 def main():
205     """doctor main"""
206     test_dir = os.path.split(os.path.realpath(__file__))[0]
207     doctor_root_dir = os.path.dirname(test_dir)
208
209     config_file_dir = '{0}/{1}'.format(doctor_root_dir, 'etc/')
210     config_files = [join(config_file_dir, f) for f in os.listdir(config_file_dir)
211                     if isfile(join(config_file_dir, f))]
212
213     conf = config.prepare_conf(args=sys.argv[1:],
214                                config_files=config_files)
215
216     doctor = DoctorTest(conf)
217     doctor.run()