bugfix: for sometimes installation hang up 93/7593/4
authorcarey.xu <carey.xuhan@huawei.com>
Thu, 21 Jan 2016 08:19:08 +0000 (16:19 +0800)
committerJustin chi <chigang@huawei.com>
Thu, 21 Jan 2016 13:16:06 +0000 (13:16 +0000)
Change-Id: Idd266cf986e0546b0b80d6486698d340c530ff37
Signed-off-by: carey.xu <carey.xuhan@huawei.com>
(cherry picked from commit 8b1206a666d2d825a8aed654e07e0b60c9470d93)

deploy/client.py

index 62d1288..174356e 100644 (file)
@@ -835,48 +835,49 @@ class CompassClient(object):
             raise RuntimeError("redeploy cluster failed")
 
     def get_installing_progress(self, cluster_id):
-        """get intalling progress."""
-        action_timeout = time.time() + 60 * float(CONF.action_timeout)
-        deployment_timeout = time.time() + 60 * float(
-            CONF.deployment_timeout)
-
-        current_time = time.time
-        deployment_failed = True
-        while current_time() < deployment_timeout:
-            status, cluster_state = self.client.get_cluster_state(cluster_id)
-            if not self.is_ok(status):
-                raise RuntimeError("can not get cluster state")
+        def _get_installing_progress():
+            """get intalling progress."""
+            action_timeout = time.time() + 60 * float(CONF.action_timeout)
+            deployment_timeout = time.time() + 60 * float(
+                CONF.deployment_timeout)
+
+            current_time = time.time
+            while current_time() < deployment_timeout:
+                status, cluster_state = self.client.get_cluster_state(cluster_id)
+                if not self.is_ok(status):
+                    LOG.error("can not get cluster state")
 
-            if cluster_state['state'] in ['UNINITIALIZED', 'INITIALIZED']:
-                if current_time() >= action_timeout:
-                    deployment_failed = True
+                    # maybe a transient error?
+                    time.sleep(5)
+                    status, cluster_state = self.client.get_cluster_state(cluster_id)
+                    if not self.is_ok(status):
+                        # OK, there's something wrong
+                        raise RuntimeError("can not get cluster state")
+
+                if cluster_state['state'] in ['UNINITIALIZED', 'INITIALIZED']:
+                    if current_time() >= action_timeout:
+                        raise RuntimeError("installation timeout")
+                    else:
+                        time.sleep(5)
+                        continue
+
+                elif cluster_state['state'] == 'SUCCESSFUL':
                     LOG.info(
                          'get cluster %s state status %s: %s, successful',
                          cluster_id, status, cluster_state
                     )
                     break
-                else:
-                    time.sleep(5)
-                    continue
-
-            elif cluster_state['state'] == 'SUCCESSFUL':
-                deployment_failed = False
-                LOG.info(
-                     'get cluster %s state status %s: %s, successful',
-                     cluster_id, status, cluster_state
-                )
-                break
-            elif cluster_state['state'] == 'ERROR':
-                deployment_failed = True
-                LOG.info(
-                     'get cluster %s state status %s: %s, error',
-                     cluster_id, status, cluster_state
-                )
-                break
-
-        kill_print_proc()
-        if deployment_failed:
-            raise RuntimeError("deploy cluster failed")
+                elif cluster_state['state'] == 'ERROR':
+                    raise RuntimeError(
+                         'get cluster %s state status %s: %s, error',
+                         (cluster_id, status, cluster_state)
+                    )
+        try:
+            _get_installing_progress()
+        finally:
+            # do this twice, make sure process be killed
+            kill_print_proc()
+            kill_print_proc()
 
     def check_dashboard_links(self, cluster_id):
         dashboard_url = CONF.dashboard_url