X-Git-Url: https://gerrit.opnfv.org/gerrit/gitweb?a=blobdiff_plain;f=deploy%2Fcloud%2Fdeployment.py;h=28bcfdf1dc4528c754cf01bbfa0d19a7df605ab3;hb=1906a373372b7ddd7eeb181dbe69b204b209e584;hp=63c8c1e531055623c4ce4f363bc52f6bb1ae1dbf;hpb=ed4b806cbad2ba44d9ac68df3f7f7f15980392d9;p=fuel.git diff --git a/deploy/cloud/deployment.py b/deploy/cloud/deployment.py index 63c8c1e53..28bcfdf1d 100644 --- a/deploy/cloud/deployment.py +++ b/deploy/cloud/deployment.py @@ -7,19 +7,16 @@ # http://www.apache.org/licenses/LICENSE-2.0 ############################################################################### - import time import re +import json from common import ( N, - E, exec_cmd, - run_proc, parse, err, log, - delete, ) SEARCH_TEXT = '(err)' @@ -28,15 +25,25 @@ GREP_LINES_OF_LEADING_CONTEXT = 100 GREP_LINES_OF_TRAILING_CONTEXT = 100 LIST_OF_CHAR_TO_BE_ESCAPED = ['[', ']', '"'] + +class DeployNotStart(Exception): + """Unable to start deployment""" + + +class NodesGoOffline(Exception): + """Nodes goes offline during deployment""" + + class Deployment(object): def __init__(self, dea, yaml_config_dir, env_id, node_id_roles_dict, - no_health_check): + no_health_check, deploy_timeout): self.dea = dea self.yaml_config_dir = yaml_config_dir self.env_id = env_id self.node_id_roles_dict = node_id_roles_dict self.no_health_check = no_health_check + self.deploy_timeout = deploy_timeout self.pattern = re.compile( '\d\d\d\d-\d\d-\d\d\s\d\d:\d\d:\d\d') @@ -85,7 +92,7 @@ class Deployment(object): log_msg += details if log_msg: - log_list.append(log_msg) + log_list.append(log_msg) if log_list: role = ('controller' if 'controller' in roles_blade[0] @@ -96,61 +103,125 @@ class Deployment(object): print(log_msg + '\n') def run_deploy(self): - WAIT_LOOP = 180 SLEEP_TIME = 60 - LOG_FILE = 'cloud.log' + abort_after = 60 * int(self.deploy_timeout) + start = time.time() log('Starting deployment of environment %s' % self.env_id) - run_proc('fuel --env %s deploy-changes | strings | tee %s' - % (self.env_id, LOG_FILE)) - + deploy_id = None ready = False - for i in range(WAIT_LOOP): - env = parse(exec_cmd('fuel env --env %s' % self.env_id)) - log('Environment status: %s' % env[0][E['status']]) - r, _ = exec_cmd('tail -2 %s | head -1' % LOG_FILE, False) - if r: - log(r) - if env[0][E['status']] == 'operational': - ready = True - break - elif (env[0][E['status']] == 'error' - or env[0][E['status']] == 'stopped'): - break - else: + timeout = False + + attempts = 0 + while attempts < 3: + try: + if time.time() > start + abort_after: + timeout = True + break + if not deploy_id: + deploy_id = self._start_deploy_task() + sts, prg, msg = self._deployment_status(deploy_id) + if sts == 'error': + log('Error during deployment: {}'.format(msg)) + break + if sts == 'running': + log('Environment deployment progress: {}%'.format(prg)) + elif sts == 'ready': + ready = True + break time.sleep(SLEEP_TIME) - delete(LOG_FILE) - + except (DeployNotStart, NodesGoOffline) as e: + log(e) + attempts += 1 + deploy_id = None + time.sleep(SLEEP_TIME * attempts) + + if timeout: + err('Deployment timed out, environment %s is not operational, ' + 'snapshot will not be performed' + % self.env_id) if ready: - log('Environment %s successfully deployed' % self.env_id) + log('Environment %s successfully deployed' + % self.env_id) else: self.collect_error_logs() err('Deployment failed, environment %s is not operational' - % self.env_id) + % self.env_id, self.collect_logs) + + def _start_deploy_task(self): + out, _ = exec_cmd('fuel2 env deploy {}'.format(self.env_id), False) + id = self._deployment_task_id(out) + return id + + def _deployment_task_id(self, response): + response = str(response) + if response.startswith('Deployment task with id'): + for s in response.split(): + if s.isdigit(): + return int(s) + raise DeployNotStart('Unable to start deployment: {}'.format(response)) + + def _deployment_status(self, id): + task = self._task_fields(id) + if task['status'] == 'error': + if task['message'].endswith( + 'offline. Remove them from environment and try again.'): + raise NodesGoOffline(task['message']) + return task['status'], task['progress'], task['message'] + + def _task_fields(self, id): + try: + out, _ = exec_cmd('fuel2 task show {} -f json'.format(id), False) + task_info = json.loads(out) + properties = {} + # for 9.0 this can be list of dicts or dict + # see https://bugs.launchpad.net/fuel/+bug/1625518 + if isinstance(task_info, list): + for d in task_info: + properties.update({d['Field']: d['Value']}) + else: + return task_info + return properties + except ValueError as e: + err('Unable to fetch task info: {}'.format(e)) + + def collect_logs(self): + log('Cleaning out any previous deployment logs') + exec_cmd('rm -f /var/log/remote/fuel-snapshot-*', False) + exec_cmd('rm -f /root/deploy-*', False) + log('Generating Fuel deploy snap-shot') + if exec_cmd('fuel snapshot < /dev/null &> snapshot.log', False)[1] <> 0: + log('Could not create a Fuel snapshot') + else: + exec_cmd('mv /root/fuel-snapshot* /var/log/remote/', False) + + log('Collecting all Fuel Snapshot & deploy log files') + r, _ = exec_cmd('tar -czhf /root/deploy-%s.log.tar.gz /var/log/remote' % time.strftime("%Y%m%d-%H%M%S"), False) + log(r) def verify_node_status(self): - node_list = parse(exec_cmd('fuel node list')) + node_list = parse(exec_cmd('fuel --env %s node' % self.env_id)) failed_nodes = [] for node in node_list: - if node[N['status']] != 'ready' and node[N['cluster']] != 'None': + if node[N['status']] != 'ready': failed_nodes.append((node[N['id']], node[N['status']])) if failed_nodes: summary = '' for node, status in failed_nodes: summary += '[node %s, status %s]\n' % (node, status) - err('Deployment failed: %s' % summary) + err('Deployment failed: %s' % summary, self.collect_logs) def health_check(self): log('Now running sanity and smoke health checks') - r = exec_cmd('fuel health --env %s --check sanity,smoke --force' - % self.env_id) + r = exec_cmd('fuel health --env %s --check sanity,smoke --force' % self.env_id) log(r) if 'failure' in r: - err('Healthcheck failed!') + err('Healthcheck failed!', self.collect_logs) def deploy(self): self.run_deploy() self.verify_node_status() if not self.no_health_check: self.health_check() + self.collect_logs()