Support Fenix and sample implementation accordingly 97/63197/10
authorTomi Juvonen <tomi.juvonen@nokia.com>
Fri, 5 Oct 2018 08:48:25 +0000 (11:48 +0300)
committerTomi Juvonen <tomi.juvonen@nokia.com>
Mon, 19 Nov 2018 13:25:40 +0000 (15:25 +0200)
Fenix has now same capability as our sample implementation.
We can now support Fenix if manually installed on controllers.
Sample implementation should be closer to Fenix as Fenix is the
place to define the generic interfaces at the end.

JIRA: DOCTOR-131

Change-Id: Ied58b8f469dbcc4bb5caa787e62c1831a211ecd6
Signed-off-by: Tomi Juvonen <tomi.juvonen@nokia.com>
doctor_tests/admin_tool/__init__.py
doctor_tests/admin_tool/sample.py
doctor_tests/app_manager/sample.py
doctor_tests/installer/apex.py
doctor_tests/installer/base.py
doctor_tests/installer/common/set_config.py
doctor_tests/scenario/maintenance.py
doctor_tests/stack.py
tox.ini

index e8b1281..3417a33 100644 (file)
@@ -8,16 +8,16 @@
 ##############################################################################
 from oslo_config import cfg
 from oslo_utils import importutils
-
+import os
 
 OPTS = [
     cfg.StrOpt('type',
-               default='sample',
-               choices=['sample'],
+               default=os.environ.get('ADMIN_TOOL_TYPE', 'sample'),
+               choices=['sample', 'fenix'],
                help='the component of doctor admin_tool',
                required=True),
     cfg.StrOpt('ip',
-               default='127.0.0.1',
+               default='0.0.0.0',
                help='the ip of admin_tool',
                required=True),
     cfg.IntOpt('port',
index 892a4c8..a71f43a 100644 (file)
@@ -59,7 +59,7 @@ class AdminMain(Thread):
         self.parent = parent
         self.log = log
         self.conf = conf
-        self.url = 'http://0.0.0.0:%s' % conf.admin_tool.port
+        self.url = 'http://%s:%s' % (conf.admin_tool.ip, conf.admin_tool.port)
         self.projects_state = dict()  # current state for each project
         self.proj_server_actions = dict()  # actions for each project server
         self.projects_servers = dict()  # servers processed in current state
@@ -86,6 +86,7 @@ class AdminMain(Thread):
                                               driver='messaging',
                                               topics=['notifications'])
         self.notif_admin = self.notif_admin.prepare(publisher_id='admin_tool')
+        self.stopped = False
         self.log.info('Admin tool session %s initialized' % self.session_id)
 
     def cleanup(self):
@@ -116,14 +117,15 @@ class AdminMain(Thread):
         if self._projects_not_in_wanted_states(wanted_states):
             self.log.error('Admin tool session %s: projects in invalid states '
                            '%s' % (self.session_id, self.projects_state))
-            raise Exception('Admin tool session %s: not all projects in states'
-                            ' %s' % (self.session_id, wanted_states))
+            return False
         else:
             self.log.info('all projects replied')
+            return True
 
     def _project_notify(self, project_id, instance_ids, allowed_actions,
                         actions_at, state, metadata):
-        reply_url = '%s/%s/maintenance' % (self.url, project_id)
+        reply_url = '%s/maintenance/%s/%s' % (self.url, self.session_id,
+                                              project_id)
 
         payload = dict(project_id=project_id,
                        instance_ids=instance_ids,
@@ -148,11 +150,12 @@ class AdminMain(Thread):
 
         self.notif_admin.info({'some': 'context'}, 'maintenance.host', payload)
 
-    def down_scale(self):
+    def in_scale(self):
         for project in self.projects_servers:
-            self.log.info('DOWN_SCALE to project %s' % project)
+            self.log.info('SCALE_IN to project %s' % project)
             self.log.debug('instance_ids %s' % self.projects_servers[project])
-            instance_ids = '%s/%s/maintenance' % (self.url, project)
+            instance_ids = '%s/maintenance/%s/%s' % (self.url, self.session_id,
+                                                     project)
             allowed_actions = []
             wait_seconds = 120
             actions_at = (datetime.datetime.utcnow() +
@@ -163,18 +166,20 @@ class AdminMain(Thread):
             self._project_notify(project, instance_ids,
                                  allowed_actions, actions_at, state,
                                  metadata)
-        allowed_states = ['ACK_DOWN_SCALE', 'NACK_DOWN_SCALE']
-        self.wait_projects_state(allowed_states, wait_seconds)
-        if self.projects_not_in_state('ACK_DOWN_SCALE'):
-            raise Exception('Admin tool session %s: all states not '
-                            'ACK_DOWN_SCALE %s' %
-                            (self.session_id, self.projects_state))
+        allowed_states = ['ACK_SCALE_IN', 'NACK_SCALE_IN']
+        if not self.wait_projects_state(allowed_states, wait_seconds):
+            self.state = 'MAINTENANCE_FAILED'
+        if self.projects_not_in_state('ACK_SCALE_IN'):
+            self.log.error('%s: all states not ACK_SCALE_IN' %
+                           self.session_id)
+            self.state = 'MAINTENANCE_FAILED'
 
     def maintenance(self):
         for project in self.projects_servers:
             self.log.info('\nMAINTENANCE to project %s\n' % project)
             self.log.debug('instance_ids %s' % self.projects_servers[project])
-            instance_ids = '%s/%s/maintenance' % (self.url, project)
+            instance_ids = '%s/maintenance/%s/%s' % (self.url, self.session_id,
+                                                     project)
             allowed_actions = []
             actions_at = self.maintenance_at
             state = self.state
@@ -190,16 +195,18 @@ class AdminMain(Thread):
                                  allowed_actions, actions_at, state,
                                  metadata)
         allowed_states = ['ACK_MAINTENANCE', 'NACK_MAINTENANCE']
-        self.wait_projects_state(allowed_states, wait_seconds)
+        if not self.wait_projects_state(allowed_states, wait_seconds):
+            self.state = 'MAINTENANCE_FAILED'
         if self.projects_not_in_state('ACK_MAINTENANCE'):
-            raise Exception('Admin tool session %s: all states not '
-                            'ACK_MAINTENANCE %s' %
-                            (self.session_id, self.projects_state))
+            self.log.error('%s: all states not ACK_MAINTENANCE' %
+                           self.session_id)
+            self.state = 'MAINTENANCE_FAILED'
 
     def maintenance_complete(self):
         for project in self.projects_servers:
             self.log.info('MAINTENANCE_COMPLETE to project %s' % project)
-            instance_ids = '%s/%s/maintenance' % (self.url, project)
+            instance_ids = '%s/maintenance/%s/%s' % (self.url, self.session_id,
+                                                     project)
             allowed_actions = []
             wait_seconds = 120
             actions_at = (datetime.datetime.utcnow() +
@@ -212,13 +219,14 @@ class AdminMain(Thread):
                                  metadata)
         allowed_states = ['ACK_MAINTENANCE_COMPLETE',
                           'NACK_MAINTENANCE_COMPLETE']
-        self.wait_projects_state(allowed_states, wait_seconds)
+        if not self.wait_projects_state(allowed_states, wait_seconds):
+            self.state = 'MAINTENANCE_FAILED'
         if self.projects_not_in_state('ACK_MAINTENANCE_COMPLETE'):
-            raise Exception('Admin tool session %s: all states not '
-                            'ACK_MAINTENANCE_COMPLETE %s' %
-                            (self.session_id, self.projects_state))
+            self.log.error('%s: all states not ACK_MAINTENANCE_COMPLETE' %
+                           self.session_id)
+            self.state = 'MAINTENANCE_FAILED'
 
-    def need_down_scale(self, host_servers):
+    def need_in_scale(self, host_servers):
         room_for_instances = 0
         for host in host_servers:
             instances = 0
@@ -267,7 +275,8 @@ class AdminMain(Thread):
             self.projects_servers[project] = projects_servers[project].copy()
             self.log.info('%s to project %s' % (state, project))
             self.project_servers_log_info(project, projects_servers)
-            instance_ids = '%s/%s/maintenance' % (self.url, project)
+            instance_ids = '%s/maintenance/%s/%s' % (self.url, self.session_id,
+                                                     project)
             allowed_actions = ['MIGRATE', 'LIVE_MIGRATE', 'OWN_ACTION']
             wait_seconds = 120
             actions_at = (datetime.datetime.utcnow() +
@@ -278,11 +287,14 @@ class AdminMain(Thread):
                                  allowed_actions, actions_at, state,
                                  metadata)
         allowed_states = [state_ack, state_nack]
-        self.wait_projects_state(allowed_states, wait_seconds)
-        if self.projects_not_in_state(state_ack):
-            raise Exception('Admin tool session %s: all states not %s %s' %
-                            (self.session_id, state_ack, self.projects_state))
-        self.actions_to_have_empty_host(host)
+        if not self.wait_projects_state(allowed_states, wait_seconds):
+            self.state = 'MAINTENANCE_FAILED'
+        elif self.projects_not_in_state(state_ack):
+            self.log.error('%s: all states not %s' %
+                           (self.session_id, state_ack))
+            self.state = 'MAINTENANCE_FAILED'
+        else:
+            self.actions_to_have_empty_host(host)
 
     def notify_action_done(self, project, instance_id):
         instance_ids = instance_id
@@ -463,7 +475,8 @@ class AdminMain(Thread):
         time.sleep(5)
 
     def run(self):
-        while self.state != 'MAINTENANCE_COMPLETE':
+        while (self.state not in ['MAINTENANCE_DONE', 'MAINTENANCE_FAILED'] and
+               not self.stopped):
             self.log.info('--==session %s: processing state %s==--' %
                           (self.session_id, self.state))
             if self.state == 'MAINTENANCE':
@@ -474,7 +487,8 @@ class AdminMain(Thread):
                     raise Exception('all projects do not listen maintenance '
                                     'alarm')
                 self.maintenance()
-
+                if self.state == 'MAINTENANCE_FAILED':
+                    continue
                 maint_at = self.str_to_datetime(self.maintenance_at)
                 if maint_at > datetime.datetime.utcnow():
                     time_now = (datetime.datetime.utcnow().strftime(
@@ -492,14 +506,14 @@ class AdminMain(Thread):
                 # True -> PLANNED_MAINTENANCE
                 # False -> check if we can migrate VMs to get empty host
                 # True -> PREPARE_MAINTENANCE
-                # False -> DOWN_SCALE
+                # False -> SCALE_IN
                 maintenance_empty_hosts = ([h for h in self.hosts if h not in
                                            host_servers])
 
                 if len(maintenance_empty_hosts) == 0:
-                    if self.need_down_scale(host_servers):
+                    if self.need_in_scale(host_servers):
                         self.log.info('Need to down scale')
-                        self.state = 'DOWN_SCALE'
+                        self.state = 'SCALE_IN'
                     else:
                         self.log.info('Free capacity, but need empty host')
                         self.state = 'PREPARE_MAINTENANCE'
@@ -508,14 +522,17 @@ class AdminMain(Thread):
                     self.state = 'PLANNED_MAINTENANCE'
                 self.log.info('--==State change from MAINTENANCE to %s==--'
                               % self.state)
-            elif self.state == 'DOWN_SCALE':
+            elif self.state == 'SCALE_IN':
                 # Test case is hard coded to have all compute capacity used
                 # We need to down scale to have one empty compute host
-                self.down_scale()
+                self.update_server_info()
+                self.in_scale()
+                if self.state == 'MAINTENANCE_FAILED':
+                    continue
                 self.state = 'PREPARE_MAINTENANCE'
                 host_servers = self.update_server_info()
                 self.servers_log_info(host_servers)
-                self.log.info('--==State change from DOWN_SCALE to'
+                self.log.info('--==State change from SCALE_IN to'
                               ' %s==--' % self.state)
 
             elif self.state == 'PREPARE_MAINTENANCE':
@@ -527,7 +544,7 @@ class AdminMain(Thread):
                                            host_servers])
                 if len(maintenance_empty_hosts) == 0:
                     self.log.info('no empty hosts for maintenance')
-                    if self.need_down_scale(host_servers):
+                    if self.need_in_scale(host_servers):
                         raise Exception('Admin tool session %s: Not enough '
                                         'free capacity for maintenance' %
                                         self.session_id)
@@ -535,6 +552,8 @@ class AdminMain(Thread):
                     if host:
                         self.make_compute_host_empty(host, host_servers[host],
                                                      'PREPARE_MAINTENANCE')
+                        if self.state == 'MAINTENANCE_FAILED':
+                            continue
                     else:
                         # We do not currently support another down scale if
                         # first was not enough
@@ -566,6 +585,7 @@ class AdminMain(Thread):
                         maintenance_empty_hosts.append(host)
                 self.log.info('--==Start to maintain empty hosts==--\n%s' %
                               maintenance_empty_hosts)
+                self.update_server_info()
                 for host in maintenance_empty_hosts:
                     # scheduler has problems, let's see if just down scaled
                     # host is really empty
@@ -586,6 +606,8 @@ class AdminMain(Thread):
                     self.log.info('PLANNED_MAINTENANCE host %s' % host)
                     self.make_compute_host_empty(host, host_servers[host],
                                                  'PLANNED_MAINTENANCE')
+                    if self.state == 'MAINTENANCE_FAILED':
+                        continue
                     self.log.info('IN_MAINTENANCE  host %s' % host)
                     self._admin_notify(admin_project, host, 'IN_MAINTENANCE',
                                        self.session_id)
@@ -603,14 +625,16 @@ class AdminMain(Thread):
                 self.log.info('Projects still need to up scale back to full '
                               'capcity')
                 self.maintenance_complete()
+                if self.state == 'MAINTENANCE_FAILED':
+                    continue
                 host_servers = self.update_server_info()
                 self.servers_log_info(host_servers)
-                self.state = 'MAINTENANCE_COMPLETE'
+                self.state = 'MAINTENANCE_DONE'
             else:
                 raise Exception('Admin tool session %s: session in invalid '
                                 'state %s' % (self.session_id, self.state))
-        self.log.info('--==Maintenance session %s: '
-                      'MAINTENANCE SESSION COMPLETE==--' % self.session_id)
+        self.log.info('--==Maintenance session %s: %s==--' %
+                      (self.session_id, self.state))
 
     def project_input(self, project_id, data):
         self.log.debug('Admin tool session %s: project %s input' %
@@ -637,7 +661,6 @@ class AdminTool(Thread):
         self.admin_tool = admin_tool
         self.log = log
         self.conf = conf
-        self.port = self.conf.admin_tool.port
         self.maint_sessions = {}
         self.projects = {}
         self.maintenance_hosts = []
@@ -650,63 +673,55 @@ class AdminTool(Thread):
         def admin_maintenance_api_post():
             data = json.loads(request.data.decode('utf8'))
             self.log.info('maintenance message: %s' % data)
-            if 'session_id' in data:
-                if data['state'] == 'REMOVE_MAINTENANCE_SESSION':
-                    session_id = data['session_id']
-                    self.log.info('remove session %s'
-                                  % session_id)
-                    self.maint_sessions[session_id].cleanup()
-                    self.maint_sessions[session_id].stop()
-                    del self.maint_sessions[session_id]
-            else:
-                session_id = str(generate_uuid())
-                self.log.info('creating session: %s' % session_id)
-                self.maint_sessions[session_id] = (
-                    AdminMain(self.trasport_url,
-                              session_id,
-                              data,
-                              self,
-                              self.conf,
-                              self.log))
-                self.maint_sessions[session_id].start()
+            session_id = str(generate_uuid())
+            self.log.info('creating session: %s' % session_id)
+            self.maint_sessions[session_id] = (
+                AdminMain(self.trasport_url,
+                          session_id,
+                          data,
+                          self,
+                          self.conf,
+                          self.log))
+            self.maint_sessions[session_id].start()
             reply = json.dumps({'session_id': session_id,
                                 'state': 'ACK_%s' % data['state']})
             self.log.debug('reply: %s' % reply)
             return reply, 200, None
 
-        @app.route('/maintenance', methods=['GET'])
-        def admin_maintenance_api_get():
-            data = json.loads(request.data.decode('utf8'))
-            self.log.debug('Admin get maintenance: %s' % data)
-            session_id = data['session_id']
+        @app.route('/maintenance/<session_id>', methods=['GET'])
+        def admin_maintenance_api_get(session_id=None):
+            self.log.debug('Admin get maintenance')
             reply = json.dumps({'state':
                                self.maint_sessions[session_id].state})
-            self.log.debug('reply: %s' % reply)
+            self.log.info('reply: %s' % reply)
             return reply, 200, None
 
-        @app.route('/<projet_id>/maintenance', methods=['PUT'])
-        def project_maintenance_api_put(projet_id=None):
+        @app.route('/maintenance/<session_id>/<projet_id>', methods=['PUT'])
+        def project_maintenance_api_put(session_id=None, projet_id=None):
             data = json.loads(request.data.decode('utf8'))
             self.log.debug('%s project put: %s' % (projet_id, data))
-            self.project_input(projet_id, data)
+            self.project_input(session_id, projet_id, data)
             return 'OK'
 
-        @app.route('/<projet_id>/maintenance', methods=['GET'])
-        def project_maintenance_api_get(projet_id=None):
-            data = json.loads(request.data.decode('utf8'))
-            self.log.debug('%s project get %s' % (projet_id, data))
-            instances = self.project_get_instances(projet_id, data)
+        @app.route('/maintenance/<session_id>/<projet_id>', methods=['GET'])
+        def project_maintenance_api_get(session_id=None, projet_id=None):
+            self.log.debug('%s project get %s' % (projet_id, session_id))
+            instances = self.project_get_instances(session_id, projet_id)
             reply = json.dumps({'instance_ids': instances})
             self.log.debug('%s reply: %s' % (projet_id, reply))
             return reply, 200, None
 
+        @app.route('/maintenance/<session_id>', methods=['DELETE'])
+        def remove_session(session_id=None):
+            self.log.info('remove session %s'
+                          % session_id)
+            self.maint_sessions[session_id].cleanup()
+            self.maint_sessions[session_id].stop()
+            del self.maint_sessions[session_id]
+            return 'OK'
+
         @app.route('/shutdown', methods=['POST'])
         def shutdown():
-            for session in self.maint_sessions:
-                self.log.info('shutdown admin tool session %s thread' %
-                              session)
-                self.maint_sessions[session].cleanup()
-                self.maint_sessions[session].stop()
             self.log.info('shutdown admin_tool server at %s' % time.time())
             func = request.environ.get('werkzeug.server.shutdown')
             if func is None:
@@ -714,13 +729,11 @@ class AdminTool(Thread):
             func()
             return 'admin_tool app shutting down...'
 
-        app.run(host='0.0.0.0', port=self.port)
+        app.run(host=self.conf.admin_tool.ip, port=self.conf.admin_tool.port)
 
-    def project_input(self, project_id, data):
-        session_id = data['session_id']
+    def project_input(self, session_id, project_id, data):
         self.maint_sessions[session_id].project_input(project_id, data)
 
-    def project_get_instances(self, project_id, data):
-        session_id = data['session_id']
+    def project_get_instances(self, session_id, project_id):
         return self.maint_sessions[session_id].project_get_instances(
             project_id)
index 94926ee..a7bc412 100644 (file)
@@ -114,8 +114,7 @@ class AppManager(Thread):
                 for t in data['reason_data']['event']['traits']})
 
     def get_session_instance_ids(self, url, session_id):
-        data = {'session_id': session_id}
-        ret = requests.get(url, data=json.dumps(data), headers=self.headers)
+        ret = requests.get(url, data=None, headers=self.headers)
         if ret.status_code != 200:
             raise Exception(ret.text)
         self.log.info('get_instance_ids %s' % ret.json())
@@ -177,12 +176,12 @@ class AppManager(Thread):
                 reply['instance_ids'] = instance_ids
                 reply_state = 'ACK_MAINTENANCE'
 
-            elif state == 'DOWN_SCALE':
+            elif state == 'SCALE_IN':
                 # scale down 2 isntances that is VCPUS equaling to single
                 # compute node
                 self.scale_instances(-2)
                 reply['instance_ids'] = self.get_instance_ids()
-                reply_state = 'ACK_DOWN_SCALE'
+                reply_state = 'ACK_SCALE_IN'
 
             elif state == 'MAINTENANCE_COMPLETE':
                 # possibly need to upscale
index 2aa81ff..3c97378 100644 (file)
@@ -114,6 +114,22 @@ class ApexInstaller(BaseInstaller):
     def set_apply_patches(self):
         self.log.info('Set apply patches start......')
 
+        if self.conf.test_case != 'fault_management':
+            if self.use_containers:
+                restart_cmd = self._set_docker_restart_cmd("nova-compute")
+            else:
+                restart_cmd = 'sudo systemctl restart' \
+                              ' openstack-nova-compute.service'
+            for node_ip in self.computes:
+                client = SSHClient(node_ip, self.node_user_name,
+                                   key_filename=self.key_file)
+                self.compute_clients.append(client)
+                self._run_apply_patches(client,
+                                        restart_cmd,
+                                        [self.nc_set_compute_script],
+                                        python=self.python)
+            time.sleep(10)
+
         set_scripts = [self.cm_set_script]
 
         if self.use_containers:
@@ -147,24 +163,6 @@ class ApexInstaller(BaseInstaller):
                                     set_scripts,
                                     python=self.python)
 
-        if self.conf.test_case != 'fault_management':
-            if self.use_containers:
-                restart_cmd = self._set_docker_restart_cmd("nova-compute")
-            else:
-                restart_cmd = 'sudo systemctl restart' \
-                              ' openstack-nova-compute.service'
-            for node_ip in self.computes:
-                client = SSHClient(node_ip, self.node_user_name,
-                                   key_filename=self.key_file)
-                self.compute_clients.append(client)
-                self._run_apply_patches(client,
-                                        restart_cmd,
-                                        [self.nc_set_compute_script],
-                                        python=self.python)
-
-        if self.conf.test_case != 'fault_management':
-            time.sleep(10)
-
     def restore_apply_patches(self):
         self.log.info('restore apply patches start......')
 
index 3043593..124b191 100644 (file)
@@ -14,8 +14,9 @@ import pwd
 import six
 import stat
 import subprocess
+import time
 
-from doctor_tests.common.utils import get_doctor_test_root_dir
+from doctor_tests.common import utils
 from doctor_tests.identity_auth import get_session
 from doctor_tests.os_clients import nova_client
 
@@ -75,7 +76,7 @@ class BaseInstaller(object):
                 cmd = ("ssh -o UserKnownHostsFile=/dev/null"
                        " -o StrictHostKeyChecking=no"
                        " -i %s %s@%s -R %s:localhost:%s"
-                       " sleep %s > ssh_tunnel.%s"
+                       " sleep %s > ssh_tunnel.%s.%s"
                        " 2>&1 < /dev/null "
                        % (self.key_file,
                           self.node_user_name,
@@ -83,9 +84,28 @@ class BaseInstaller(object):
                           port,
                           port,
                           tunnel_uptime,
-                          node_ip))
+                          node_ip,
+                          port))
                 server = subprocess.Popen('exec ' + cmd, shell=True)
                 self.servers.append(server)
+        if self.conf.admin_tool.type == 'fenix':
+            port = self.conf.admin_tool.port
+            self.log.info('tunnel for port %s' % port)
+            cmd = ("ssh -o UserKnownHostsFile=/dev/null"
+                   " -o StrictHostKeyChecking=no"
+                   " -i %s %s@%s -L %s:localhost:%s"
+                   " sleep %s > ssh_tunnel.%s.%s"
+                   " 2>&1 < /dev/null "
+                   % (self.key_file,
+                      self.node_user_name,
+                      node_ip,
+                      port,
+                      port,
+                      tunnel_uptime,
+                      node_ip,
+                      port))
+            server = subprocess.Popen('exec ' + cmd, shell=True)
+            self.servers.append(server)
 
     def _get_ssh_key(self, client, key_path):
         self.log.info('Get SSH keys from %s installer......'
@@ -96,7 +116,8 @@ class BaseInstaller(object):
                           % self.conf.installer.type)
             return self.key_file
 
-        ssh_key = '{0}/{1}'.format(get_doctor_test_root_dir(), 'instack_key')
+        ssh_key = '{0}/{1}'.format(utils.get_doctor_test_root_dir(),
+                                   'instack_key')
         client.scp(key_path, ssh_key, method='get')
         user = getpass.getuser()
         uid = pwd.getpwnam(user).pw_uid
@@ -131,6 +152,7 @@ class BaseInstaller(object):
             ret = False
         return ret
 
+    @utils.run_async
     def _run_apply_patches(self, client, restart_cmd, script_names,
                            python='python3'):
         installer_dir = os.path.dirname(os.path.realpath(__file__))
@@ -146,4 +168,7 @@ class BaseInstaller(object):
                     raise Exception('Do the command in remote'
                                     ' node failed, ret=%s, cmd=%s, output=%s'
                                     % (ret, cmd, output))
+            if 'nova-scheduler' in restart_cmd:
+                # Make sure scheduler has proper cpu_allocation_ratio
+                time.sleep(5)
             client.ssh(restart_cmd)
index 3dc6cd9..e66d4c2 100644 (file)
@@ -125,6 +125,7 @@ def set_event_definitions():
                 'reply_url': {'fields': 'payload.reply_url'},
                 'actions_at': {'fields': 'payload.actions_at',
                                'type': 'datetime'},
+                'reply_at': {'fields': 'payload.reply_at', 'type': 'datetime'},
                 'state': {'fields': 'payload.state'},
                 'session_id': {'fields': 'payload.session_id'},
                 'project_id': {'fields': 'payload.project_id'},
index 9fcd412..09795c2 100644 (file)
@@ -34,7 +34,11 @@ class Maintenance(object):
         auth = get_identity_auth(project=self.conf.doctor_project)
         self.neutron = neutron_client(get_session(auth=auth))
         self.stack = Stack(self.conf, self.log)
-        self.admin_tool = get_admin_tool(trasport_url, self.conf, self.log)
+        if self.conf.admin_tool.type == 'sample':
+            self.admin_tool = get_admin_tool(trasport_url, self.conf, self.log)
+            self.endpoint = 'maintenance'
+        else:
+            self.endpoint = 'v1/maintenance'
         self.app_manager = get_app_manager(self.stack, self.conf, self.log)
         self.inspector = get_inspector(self.conf, self.log)
 
@@ -110,7 +114,11 @@ class Maintenance(object):
                           parameters=parameters,
                           files=files)
 
-        self.admin_tool.start()
+        if self.conf.admin_tool.type == 'sample':
+            self.admin_tool.start()
+        else:
+            # TBD Now we expect Fenix is running in self.conf.admin_tool.port
+            pass
         self.app_manager.start()
         self.inspector.start()
 
@@ -122,16 +130,21 @@ class Maintenance(object):
             hostname = hvisor.__getattr__('hypervisor_hostname')
             maintenance_hosts.append(hostname)
 
-        url = 'http://0.0.0.0:%s/maintenance' % self.conf.admin_tool.port
+        url = ('http://%s:%s/%s' %
+               (self.conf.admin_tool.ip,
+                self.conf.admin_tool.port,
+                self.endpoint))
+
         # let's start maintenance 20sec from now, so projects will have
         # time to ACK to it before that
         maintenance_at = (datetime.datetime.utcnow() +
-                          datetime.timedelta(seconds=20)
+                          datetime.timedelta(seconds=30)
                           ).strftime('%Y-%m-%d %H:%M:%S')
         data = {'hosts': maintenance_hosts,
                 'state': 'MAINTENANCE',
                 'maintenance_at': maintenance_at,
-                'metadata': {'openstack_version': 'Pike'}}
+                'metadata': {'openstack_version': 'Rocky'},
+                'workflow': 'default'}
         headers = {
             'Content-Type': 'application/json',
             'Accept': 'application/json'}
@@ -143,49 +156,56 @@ class Maintenance(object):
 
     def remove_maintenance_session(self, session_id):
         self.log.info('remove maintenance session %s.......' % session_id)
+        url = ('http://%s:%s/%s/%s' %
+               (self.conf.admin_tool.ip,
+                self.conf.admin_tool.port,
+                self.endpoint,
+                session_id))
 
-        url = 'http://0.0.0.0:%s/maintenance' % self.conf.admin_tool.port
-
-        data = {'state': 'REMOVE_MAINTENANCE_SESSION',
-                'session_id': session_id}
         headers = {
             'Content-Type': 'application/json',
             'Accept': 'application/json'}
 
-        ret = requests.post(url, data=json.dumps(data), headers=headers)
+        ret = requests.delete(url, data=None, headers=headers)
         if ret.status_code != 200:
             raise Exception(ret.text)
 
     def get_maintenance_state(self, session_id):
-        url = 'http://0.0.0.0:%s/maintenance' % self.conf.admin_tool.port
-        data = {'session_id': session_id}
+        url = ('http://%s:%s/%s/%s' %
+               (self.conf.admin_tool.ip,
+                self.conf.admin_tool.port,
+                self.endpoint,
+                session_id))
+
         headers = {
             'Content-Type': 'application/json',
             'Accept': 'application/json'}
-        ret = requests.get(url, data=json.dumps(data), headers=headers)
+        ret = requests.get(url, data=None, headers=headers)
         if ret.status_code != 200:
             raise Exception(ret.text)
         return ret.json()['state']
 
     def wait_maintenance_complete(self, session_id):
-        retries = 66
+        retries = 90
         state = None
-        time.sleep(540)
-        while state != 'MAINTENANCE_COMPLETE' and retries > 0:
+        time.sleep(300)
+        while (state not in ['MAINTENANCE_DONE', 'MAINTENANCE_FAILED'] and
+               retries > 0):
             time.sleep(10)
             state = self.get_maintenance_state(session_id)
             retries = retries - 1
-        if retries == 0 and state != 'MAINTENANCE_COMPLETE':
-            raise Exception('maintenance %s not completed within 20min, status'
-                            ' %s' % (session_id, state))
-        elif state == 'MAINTENANCE_COMPLETE':
-            self.log.info('maintenance %s %s' % (session_id, state))
-            self.remove_maintenance_session(session_id)
-        elif state == 'MAINTENANCE_FAILED':
+        self.remove_maintenance_session(session_id)
+        self.log.info('maintenance %s ended with state %s' %
+                      (session_id, state))
+        if state == 'MAINTENANCE_FAILED':
             raise Exception('maintenance %s failed' % session_id)
+        elif retries == 0:
+            raise Exception('maintenance %s not completed within 20min' %
+                            session_id)
 
     def cleanup_maintenance(self):
-        self.admin_tool.stop()
+        if self.conf.admin_tool.type == 'sample':
+            self.admin_tool.stop()
         self.app_manager.stop()
         self.inspector.stop()
         self.log.info('stack delete start.......')
index ee586fa..8a921be 100644 (file)
@@ -94,7 +94,7 @@ class Stack(object):
             # It might not always work at first
             self.log.info('retry creating maintenance stack.......')
             self.delete()
-            time.sleep(3)
+            time.sleep(5)
             stack = self.heat.stacks.create(stack_name=self.stack_name,
                                             files=files,
                                             template=template,
diff --git a/tox.ini b/tox.ini
index 6e0d8b4..2242637 100644 (file)
--- a/tox.ini
+++ b/tox.ini
@@ -29,6 +29,7 @@ passenv =
     INSTALLER_TYPE
     INSTALLER_IP
     INSPECTOR_TYPE
+    ADMIN_TOOL_TYPE
     TEST_CASE
     SSH_KEY
 changedir = {toxinidir}/doctor_tests