Merge "Fix ssl cert upload path"
[fuel.git] / deploy / cloud / deployment.py
index f8e1617..4329a4c 100644 (file)
@@ -9,18 +9,14 @@
 
 import time
 import re
+import json
 
 from common import (
     N,
-    E,
     exec_cmd,
-    run_proc,
-    run_proc_wait_terminated,
-    run_proc_kill,
     parse,
     err,
     log,
-    delete,
 )
 
 SEARCH_TEXT = '(err)'
@@ -28,9 +24,21 @@ LOG_FILE = '/var/log/puppet.log'
 GREP_LINES_OF_LEADING_CONTEXT = 100
 GREP_LINES_OF_TRAILING_CONTEXT = 100
 LIST_OF_CHAR_TO_BE_ESCAPED = ['[', ']', '"']
+ERROR_MSGS = ['Critical nodes are not available for deployment',
+              'offline. Remove them from environment and try again.',
+              'Task[move_to_bootstrap/',
+              'Failed tasks: Task[connectivity-checker/']
+
+
+class DeployNotStart(Exception):
+    """Unable to start deployment"""
 
-class Deployment(object):
 
+class NodesGoOffline(Exception):
+    """Nodes goes offline during deployment"""
+
+
+class Deployment(object):
 
     def __init__(self, dea, yaml_config_dir, env_id, node_id_roles_dict,
                  no_health_check, deploy_timeout):
@@ -43,7 +51,6 @@ class Deployment(object):
         self.pattern = re.compile(
             '\d\d\d\d-\d\d-\d\d\s\d\d:\d\d:\d\d')
 
-
     def collect_error_logs(self):
         for node_id, roles_blade in self.node_id_roles_dict.iteritems():
             log_list = []
@@ -89,7 +96,7 @@ class Deployment(object):
                     log_msg += details
 
                 if log_msg:
-                   log_list.append(log_msg)
+                    log_list.append(log_msg)
 
             if log_list:
                 role = ('controller' if 'controller' in roles_blade[0]
@@ -99,54 +106,94 @@ class Deployment(object):
                 for log_msg in log_list:
                     print(log_msg + '\n')
 
-
     def run_deploy(self):
         SLEEP_TIME = 60
-        LOG_FILE = 'cloud.log'
+        abort_after = 60 * int(self.deploy_timeout)
+        start = time.time()
 
         log('Starting deployment of environment %s' % self.env_id)
-        deploy_proc = run_proc('fuel --env %s deploy-changes | strings > %s'
-                               % (self.env_id, LOG_FILE))
-
+        deploy_id = None
         ready = False
-        for i in range(int(self.deploy_timeout)):
-            env = parse(exec_cmd('fuel env --env %s' % self.env_id))
-            log('Environment status: %s' % env[0][E['status']])
-            r, _ = exec_cmd('tail -2 %s | head -1' % LOG_FILE, False)
-            if r:
-                log(r)
-            if env[0][E['status']] == 'operational':
-                ready = True
-                break
-            elif (env[0][E['status']] == 'error'
-                  or env[0][E['status']] == 'stopped'):
-                break
-            else:
+        timeout = False
+
+        attempts = 5
+        while attempts > 0:
+            try:
+                if time.time() > start + abort_after:
+                    timeout = True
+                    break
+                if not deploy_id:
+                    deploy_id = self._start_deploy_task()
+                sts, prg, msg = self._deployment_status(deploy_id)
+                if sts == 'error':
+                    log('Error during deployment: {}'.format(msg))
+                    break
+                if sts == 'running':
+                    log('Environment deployment progress: {}%'.format(prg))
+                elif sts == 'ready':
+                    ready = True
+                    break
                 time.sleep(SLEEP_TIME)
-
-        if (env[0][E['status']] <> 'operational'
-            and env[0][E['status']] <> 'error'
-            and env[0][E['status']] <> 'stopped'):
-            err('Deployment timed out, environment %s is not operational, snapshot will not be performed'
-                % self.env_id, self.collect_logs)
-
-        run_proc_wait_terminated(deploy_proc)
-        delete(LOG_FILE)
-
+            except (DeployNotStart, NodesGoOffline) as e:
+                log(e)
+                attempts -= 1
+                deploy_id = None
+                time.sleep(SLEEP_TIME * attempts)
+
+        if timeout:
+            err('Deployment timed out, environment %s is not operational, '
+                'snapshot will not be performed'
+                % self.env_id)
         if ready:
-            log('Environment %s successfully deployed' % self.env_id)
+            log('Environment %s successfully deployed'
+                % self.env_id)
         else:
             self.collect_error_logs()
             err('Deployment failed, environment %s is not operational'
                 % self.env_id, self.collect_logs)
 
+    def _start_deploy_task(self):
+        out, _ = exec_cmd('fuel2 env deploy {}'.format(self.env_id), False)
+        id = self._deployment_task_id(out)
+        return id
+
+    def _deployment_task_id(self, response):
+        response = str(response)
+        if response.startswith('Deployment task with id'):
+            for s in response.split():
+                if s.isdigit():
+                    return int(s)
+        raise DeployNotStart('Unable to start deployment: {}'.format(response))
+
+    def _deployment_status(self, id):
+        task = self._task_fields(id)
+        if task['status'] == 'error':
+            if any(msg in task['message'] for msg in ERROR_MSGS):
+                raise NodesGoOffline(task['message'])
+        return task['status'], task['progress'], task['message']
+
+    def _task_fields(self, id):
+        try:
+            out, _ = exec_cmd('fuel2 task show {} -f json'.format(id), False)
+            task_info = json.loads(out)
+            properties = {}
+            # for 9.0 this can be list of dicts or dict
+            # see https://bugs.launchpad.net/fuel/+bug/1625518
+            if isinstance(task_info, list):
+                for d in task_info:
+                        properties.update({d['Field']: d['Value']})
+            else:
+                return task_info
+            return properties
+        except ValueError as e:
+            err('Unable to fetch task info: {}'.format(e))
 
     def collect_logs(self):
         log('Cleaning out any previous deployment logs')
         exec_cmd('rm -f /var/log/remote/fuel-snapshot-*', False)
         exec_cmd('rm -f /root/deploy-*', False)
         log('Generating Fuel deploy snap-shot')
-        if exec_cmd('fuel snapshot < /dev/null &> snapshot.log', False)[1] <> 0:
+        if exec_cmd('fuel snapshot < /dev/null &> snapshot.log', False)[1] != 0:
             log('Could not create a Fuel snapshot')
         else:
             exec_cmd('mv /root/fuel-snapshot* /var/log/remote/', False)
@@ -155,12 +202,11 @@ class Deployment(object):
         r, _ = exec_cmd('tar -czhf /root/deploy-%s.log.tar.gz /var/log/remote' % time.strftime("%Y%m%d-%H%M%S"), False)
         log(r)
 
-
     def verify_node_status(self):
-        node_list = parse(exec_cmd('fuel node list'))
+        node_list = parse(exec_cmd('fuel --env %s node' % self.env_id))
         failed_nodes = []
         for node in node_list:
-            if node[N['status']] != 'ready' and node[N['cluster']] != 'None':
+            if node[N['status']] != 'ready':
                 failed_nodes.append((node[N['id']], node[N['status']]))
 
         if failed_nodes:
@@ -169,7 +215,6 @@ class Deployment(object):
                 summary += '[node %s, status %s]\n' % (node, status)
             err('Deployment failed: %s' % summary, self.collect_logs)
 
-
     def health_check(self):
         log('Now running sanity and smoke health checks')
         r = exec_cmd('fuel health --env %s --check sanity,smoke --force' % self.env_id)
@@ -177,7 +222,6 @@ class Deployment(object):
         if 'failure' in r:
             err('Healthcheck failed!', self.collect_logs)
 
-
     def deploy(self):
         self.run_deploy()
         self.verify_node_status()