Fix SSH client connection reset 29/64129/2
authorTomi Juvonen <tomi.juvonen@nokia.com>
Mon, 29 Oct 2018 06:54:24 +0000 (08:54 +0200)
committerTomi Juvonen <tomi.juvonen@nokia.com>
Mon, 29 Oct 2018 07:44:07 +0000 (09:44 +0200)
SSH connection might be reseted with long running session.
This may happen if run "testcase=all"

Change-Id: I232ae906628411dfbe0bbdbdc8d4fb43167760fd
Signed-off-by: Tomi Juvonen <tomi.juvonen@nokia.com>
doctor_tests/installer/apex.py

index 9b0010e..2aa81ff 100644 (file)
@@ -192,20 +192,39 @@ class ApexInstaller(BaseInstaller):
                 restart_cmd += ' openstack-congress-server.service'
             restore_scripts.append(self.cg_restore_script)
 
-        for client in self.controller_clients:
-            self._run_apply_patches(client,
-                                    restart_cmd,
-                                    restore_scripts,
-                                    python=self.python)
-
+        for client, node_ip in zip(self.controller_clients, self.controllers):
+            retry = 0
+            while retry < 2:
+                try:
+                    self._run_apply_patches(client,
+                                            restart_cmd,
+                                            restore_scripts,
+                                            python=self.python)
+                except Exception:
+                    if retry > 0:
+                        raise Exception("SSHClient to %s feiled" % node_ip)
+                    client = SSHClient(node_ip, self.node_user_name,
+                                       key_filename=self.key_file)
+                    retry += 1
+                break
         if self.conf.test_case != 'fault_management':
             if self.use_containers:
                 restart_cmd = self._set_docker_restart_cmd("nova-compute")
             else:
                 restart_cmd = 'sudo systemctl restart' \
                               ' openstack-nova-compute.service'
-            for client in self.compute_clients:
-                self._run_apply_patches(client,
-                                        restart_cmd,
-                                        [self.nc_restore_compute_script],
-                                        python=self.python)
+            for client, node_ip in zip(self.compute_clients, self.computes):
+                retry = 0
+                while retry < 2:
+                    try:
+                        self._run_apply_patches(
+                            client, restart_cmd,
+                            [self.nc_restore_compute_script],
+                            python=self.python)
+                    except Exception:
+                        if retry > 0:
+                            raise Exception("SSHClient to %s feiled" % node_ip)
+                        client = SSHClient(node_ip, self.node_user_name,
+                                           key_filename=self.key_file)
+                        retry += 1
+                    break