X-Git-Url: https://gerrit.opnfv.org/gerrit/gitweb?a=blobdiff_plain;f=doctor_tests%2Fmain.py;h=3dea89d13ae6f079a48720d4735b36882198d4af;hb=d8eb12f4200c21f569df5bc01d378a846b4c0db0;hp=006aac9f21f3fd4c0ab923f047ef9ec9c47685e4;hpb=25759e0a0204765a7a9454d7586df66592e943c2;p=doctor.git diff --git a/doctor_tests/main.py b/doctor_tests/main.py index 006aac9f..3dea89d1 100644 --- a/doctor_tests/main.py +++ b/doctor_tests/main.py @@ -1,5 +1,5 @@ ############################################################################## -# Copyright (c) 2017 ZTE Corporation and others. +# Copyright (c) 2019 ZTE Corporation and others. # # All rights reserved. This program and the accompanying materials # are made available under the terms of the Apache License, Version 2.0 @@ -8,32 +8,25 @@ ############################################################################## import os from os.path import isfile, join -import random import sys import time +from traceback import format_exc -from doctor_tests.alarm import Alarm -from doctor_tests.common.constants import Host -from doctor_tests.common.utils import match_rep_in_file from doctor_tests import config -from doctor_tests.consumer import get_consumer from doctor_tests.identity_auth import get_identity_auth from doctor_tests.identity_auth import get_session from doctor_tests.image import Image -from doctor_tests.instance import Instance -from doctor_tests.inspector import get_inspector from doctor_tests.installer import get_installer import doctor_tests.logger as doctor_log -from doctor_tests.network import Network -from doctor_tests.monitor import get_monitor +from doctor_tests.scenario.fault_management import FaultManagement from doctor_tests.os_clients import nova_client -from doctor_tests.profiler_poc import main as profiler_main -from doctor_tests.scenario.common import calculate_notification_time -from doctor_tests.scenario.network_failure import NetworkFault +from doctor_tests.scenario.maintenance import Maintenance from doctor_tests.user import User -LOG = doctor_log.Logger('doctor').getLogger() +Logger = doctor_log.Logger('doctor') +LOG = Logger.getLogger() +LogFile = Logger.getLogFilename() class DoctorTest(object): @@ -42,160 +35,137 @@ class DoctorTest(object): self.conf = conf self.image = Image(self.conf, LOG) self.user = User(self.conf, LOG) - self.network = Network(self.conf, LOG) - self.instance = Instance(self.conf, LOG) - self.alarm = Alarm(self.conf, LOG) self.installer = get_installer(self.conf, LOG) - self.inspector = get_inspector(self.conf, LOG) - self.monitor = get_monitor(self.conf, - self.inspector.get_inspector_url(), - LOG) - self.consumer = get_consumer(self.conf, LOG) - self.fault = NetworkFault(self.conf, self.installer, LOG) auth = get_identity_auth(project=self.conf.doctor_project) self.nova = nova_client(self.conf.nova_version, get_session(auth=auth)) - self.down_host = None def setup(self): # prepare the cloud env self.installer.setup() - # preparing VM image... self.image.create() # creating test user... self.user.create() - self.user.update_quota() - # creating VM... - self.network.create() - self.instance.create() - self.instance.wait_for_vm_launch() + def test_fault_management(self): + retry = 2 + # Retry once if notified_time is None + while retry > 0: + try: + self.fault_management = None + LOG.info('doctor fault management test starting.......') + transport_url = self.installer.get_transport_url() + self.fault_management = \ + FaultManagement(self.conf, self.installer, self.user, LOG, + transport_url) + + # prepare test env + self.fault_management.setup() + + # wait for aodh alarms are updated in caches for event + # evaluator, + # sleep time should be larger than event_alarm_cache_ttl + # (default 60) + # (tojuvone) Fraser currently needs 120 + time.sleep(120) + + # injecting host failure... + # NOTE (umar) add INTERFACE_NAME logic to host injection + self.fault_management.start() + time.sleep(30) + + # verify the test results + # NOTE (umar) copy remote monitor.log file when + # monitor=collectd + self.fault_management.check_host_status('down') + self.fault_management.check_notification_time() + retry = 0 + + except Exception as e: + LOG.error('doctor fault management test failed, ' + 'Exception=%s' % e) + if 'notified_time=None' in str(e): + retry -= 1 + LOG.info('doctor fault management retry') + continue + LOG.error(format_exc()) + sys.exit(1) + finally: + if self.fault_management is not None: + self.fault_management.cleanup() + + def _amount_compute_nodes(self): + services = self.nova.services.list(binary='nova-compute') + return len(services) + + def test_maintenance(self): + cnodes = self._amount_compute_nodes() + if cnodes < 3: + # need 2 compute for redundancy and one spare to migrate + LOG.info('not enough compute nodes, skipping doctor ' + 'maintenance test') + return + elif self.conf.installer.type not in ['apex', 'fuel', 'devstack']: + LOG.info('not supported installer, skipping doctor ' + 'maintenance test') + return + try: + maintenance = None + LOG.info('doctor maintenance test starting.......') + trasport_url = self.installer.get_transport_url() + maintenance = Maintenance(trasport_url, self.conf, LOG) + maintenance.setup_maintenance(self.user) - # creating alarm... - self.alarm.create() + # wait for aodh alarms are updated in caches for event evaluator, + # sleep time should be larger than event_alarm_cache_ttl + # (default 60) + LOG.info('wait aodh for 120s.......') + time.sleep(120) - # starting doctor sample components... - self.inspector.start() + session_id = maintenance.start_maintenance() + maintenance.wait_maintenance_complete(session_id) - self.down_host = self.get_host_info_for_random_vm() - self.monitor.start(self.down_host) + LOG.info('doctor maintenance complete.......') - self.consumer.start() + except Exception as e: + LOG.error('doctor maintenance test failed, Exception=%s' % e) + LOG.error(format_exc()) + sys.exit(1) + finally: + if maintenance is not None: + maintenance.cleanup_maintenance() def run(self): - """run doctor test""" + """run doctor tests""" try: LOG.info('doctor test starting.......') - # prepare test env + # prepare common test env self.setup() - # wait for aodh alarms are updated in caches for event evaluator, - # sleep time should be larger than event_alarm_cache_ttl(default 60) - time.sleep(60) - - # injecting host failure... - # NOTE (umar) add INTERFACE_NAME logic to host injection - - self.fault.start(self.down_host) - time.sleep(10) - - # verify the test results - # NOTE (umar) copy remote monitor.log file when monitor=collectd - self.check_host_status(self.down_host.name, 'down') - - notification_time = calculate_notification_time() - if notification_time < 1 and notification_time > 0: - LOG.info('doctor test successfully, notification_time=%s' % notification_time) + if self.conf.test_case == 'all': + self.test_fault_management() + self.test_maintenance() else: - LOG.error('doctor test failed, notification_time=%s' % notification_time) - sys.exit(1) - - if self.conf.profiler_type: - LOG.info('doctor test begin to run profile.......') - self.collect_logs() - self.run_profiler() + function = 'test_%s' % self.conf.test_case + if hasattr(self, function): + getattr(self, function)() + else: + raise Exception('Can not find function <%s> in' + 'DoctorTest, see config manual' + % function) except Exception as e: LOG.error('doctor test failed, Exception=%s' % e) + LOG.error(format_exc()) sys.exit(1) finally: self.cleanup() - def get_host_info_for_random_vm(self): - num = random.randint(0, self.conf.instance_count - 1) - vm_name = "%s%d" % (self.conf.instance_basename, num) - - servers = \ - {getattr(server, 'name'): server - for server in self.nova.servers.list()} - server = servers.get(vm_name) - if not server: - raise \ - Exception('Can not find instance: vm_name(%s)' % vm_name) - host_name = server.__dict__.get('OS-EXT-SRV-ATTR:hypervisor_hostname') - host_ip = self.installer.get_host_ip_from_hostname(host_name) - - LOG.info('Get host info(name:%s, ip:%s) which vm(%s) launched at' - % (host_name, host_ip, vm_name)) - return Host(host_name, host_ip) - - def check_host_status(self, hostname, state): - service = self.nova.services.list(host=hostname, binary='nova-compute') - host_state = service[0].__dict__.get('state') - assert host_state == state - - def unset_forced_down_hosts(self): - if self.down_host: - self.nova.services.force_down(self.down_host.name, 'nova-compute', False) - time.sleep(2) - self.check_host_status(self.down_host.name, 'up') - - def collect_logs(self): - self.fault.get_disable_network_log() - - def run_profiler(self): - - log_file = '{0}/{1}'.format(sys.path[0], 'disable_network.log') - reg = '(?<=doctor set link down at )\d+.\d+' - linkdown = float(match_rep_in_file(reg, log_file).group(0)) - - log_file = '{0}/{1}'.format(sys.path[0], 'doctor.log') - reg = '(.* doctor mark vm.* error at )(\d+.\d+)' - vmdown = float(match_rep_in_file(reg, log_file).group(2)) - - reg = '(?<=doctor mark host.* down at )\d+.\d+' - hostdown = float(match_rep_in_file(reg, log_file).group(2)) - - reg = '(?<=doctor monitor detected at )\d+.\d+' - detected = float(match_rep_in_file(reg, log_file).group(0)) - - reg = '(?<=doctor consumer notified at )\d+.\d+' - notified = float(match_rep_in_file(reg, log_file).group(0)) - - # TODO(yujunz) check the actual delay to verify time sync status - # expected ~1s delay from $trigger to $linkdown - relative_start = linkdown - os.environ['DOCTOR_PROFILER_T00'] = str(int((linkdown - relative_start)*1000)) - os.environ['DOCTOR_PROFILER_T01'] = str(int((detected - relative_start) * 1000)) - os.environ['DOCTOR_PROFILER_T03'] = str(int((vmdown - relative_start) * 1000)) - os.environ['DOCTOR_PROFILER_T04'] = str(int((hostdown - relative_start) * 1000)) - os.environ['DOCTOR_PROFILER_T09'] = str(int((notified - relative_start) * 1000)) - - profiler_main(log=LOG) - def cleanup(self): - self.unset_forced_down_hosts() - self.inspector.stop() - self.monitor.stop() - self.consumer.stop() self.installer.cleanup() - self.alarm.delete() - self.instance.delete() - self.network.delete() self.image.delete() - self.fault.cleanup() self.user.delete() @@ -205,7 +175,8 @@ def main(): doctor_root_dir = os.path.dirname(test_dir) config_file_dir = '{0}/{1}'.format(doctor_root_dir, 'etc/') - config_files = [join(config_file_dir, f) for f in os.listdir(config_file_dir) + config_files = [join(config_file_dir, f) + for f in os.listdir(config_file_dir) if isfile(join(config_file_dir, f))] conf = config.prepare_conf(args=sys.argv[1:],