bugfix: increase retry time when get cluster state 19/9219/2
authorcarey.xu <carey.xuhan@huawei.com>
Wed, 3 Feb 2016 01:54:03 +0000 (09:54 +0800)
committercarey xu <carey.xuhan@huawei.com>
Thu, 4 Feb 2016 08:46:26 +0000 (08:46 +0000)
JIRA: COMPASS-301

Change-Id: I10dad32f1f1f7429d0081851042db7dbe204c71e
Signed-off-by: carey.xu <carey.xuhan@huawei.com>
(cherry picked from commit 85ac2f7c67b586b5d0f3be477e37ed01bb040e9a)

deploy/client.py

index 174356e..0d51ce2 100644 (file)
@@ -834,6 +834,21 @@ class CompassClient(object):
             )
             raise RuntimeError("redeploy cluster failed")
 
+    def get_cluster_state(self, cluster_id):
+        for _ in range(10):
+            try:
+                status, cluster_state = self.client.get_cluster_state(cluster_id)
+                if self.is_ok(status):
+                    break
+            except:
+                status = 500
+                cluster_state = ""
+
+            LOG.error("can not get cluster %s's state, try again" % cluster_id)
+            time.sleep(6)
+
+        return status, cluster_state
+
     def get_installing_progress(self, cluster_id):
         def _get_installing_progress():
             """get intalling progress."""
@@ -843,16 +858,9 @@ class CompassClient(object):
 
             current_time = time.time
             while current_time() < deployment_timeout:
-                status, cluster_state = self.client.get_cluster_state(cluster_id)
+                status, cluster_state = self.get_cluster_state(cluster_id)
                 if not self.is_ok(status):
-                    LOG.error("can not get cluster state")
-
-                    # maybe a transient error?
-                    time.sleep(5)
-                    status, cluster_state = self.client.get_cluster_state(cluster_id)
-                    if not self.is_ok(status):
-                        # OK, there's something wrong
-                        raise RuntimeError("can not get cluster state")
+                    raise RuntimeError("can not get cluster state")
 
                 if cluster_state['state'] in ['UNINITIALIZED', 'INITIALIZED']:
                     if current_time() >= action_timeout: