Merge "ipmi_adapter: add delay to stabilize command action"
[fuel.git] / deploy / cloud / deployment.py
1 ###############################################################################
2 # Copyright (c) 2015 Ericsson AB and others.
3 # szilard.cserey@ericsson.com
4 # All rights reserved. This program and the accompanying materials
5 # are made available under the terms of the Apache License, Version 2.0
6 # which accompanies this distribution, and is available at
7 # http://www.apache.org/licenses/LICENSE-2.0
8 ###############################################################################
9
10 import time
11 import re
12
13 from common import (
14     N,
15     E,
16     exec_cmd,
17     run_proc,
18     run_proc_wait_terminated,
19     run_proc_kill,
20     parse,
21     err,
22     log,
23     delete,
24 )
25
26 SEARCH_TEXT = '(err)'
27 LOG_FILE = '/var/log/puppet.log'
28 GREP_LINES_OF_LEADING_CONTEXT = 100
29 GREP_LINES_OF_TRAILING_CONTEXT = 100
30 LIST_OF_CHAR_TO_BE_ESCAPED = ['[', ']', '"']
31
32 class Deployment(object):
33
34
35     def __init__(self, dea, yaml_config_dir, env_id, node_id_roles_dict,
36                  no_health_check, deploy_timeout):
37         self.dea = dea
38         self.yaml_config_dir = yaml_config_dir
39         self.env_id = env_id
40         self.node_id_roles_dict = node_id_roles_dict
41         self.no_health_check = no_health_check
42         self.deploy_timeout = deploy_timeout
43         self.pattern = re.compile(
44             '\d\d\d\d-\d\d-\d\d\s\d\d:\d\d:\d\d')
45
46
47     def collect_error_logs(self):
48         for node_id, roles_blade in self.node_id_roles_dict.iteritems():
49             log_list = []
50             cmd = ('ssh -q node-%s grep \'"%s"\' %s'
51                    % (node_id, SEARCH_TEXT, LOG_FILE))
52             results, _ = exec_cmd(cmd, False)
53             for result in results.splitlines():
54                 log_msg = ''
55
56                 sub_cmd = '"%s" %s' % (result, LOG_FILE)
57                 for c in LIST_OF_CHAR_TO_BE_ESCAPED:
58                     sub_cmd = sub_cmd.replace(c, '\%s' % c)
59                 grep_cmd = ('grep -B%s %s'
60                             % (GREP_LINES_OF_LEADING_CONTEXT, sub_cmd))
61                 cmd = ('ssh -q node-%s "%s"' % (node_id, grep_cmd))
62
63                 details, _ = exec_cmd(cmd, False)
64                 details_list = details.splitlines()
65
66                 found_prev_log = False
67                 for i in range(len(details_list) - 2, -1, -1):
68                     if self.pattern.match(details_list[i]):
69                         found_prev_log = True
70                         break
71                 if found_prev_log:
72                     log_msg += '\n'.join(details_list[i:-1]) + '\n'
73
74                 grep_cmd = ('grep -A%s %s'
75                             % (GREP_LINES_OF_TRAILING_CONTEXT, sub_cmd))
76                 cmd = ('ssh -q node-%s "%s"' % (node_id, grep_cmd))
77
78                 details, _ = exec_cmd(cmd, False)
79                 details_list = details.splitlines()
80
81                 found_next_log = False
82                 for i in range(1, len(details_list)):
83                     if self.pattern.match(details_list[i]):
84                         found_next_log = True
85                         break
86                 if found_next_log:
87                     log_msg += '\n'.join(details_list[:i])
88                 else:
89                     log_msg += details
90
91                 if log_msg:
92                    log_list.append(log_msg)
93
94             if log_list:
95                 role = ('controller' if 'controller' in roles_blade[0]
96                         else 'compute host')
97                 log('_' * 40 + 'Errors in node-%s %s' % (node_id, role)
98                     + '_' * 40)
99                 for log_msg in log_list:
100                     print(log_msg + '\n')
101
102
103     def run_deploy(self):
104         SLEEP_TIME = 60
105         LOG_FILE = 'cloud.log'
106
107         log('Starting deployment of environment %s' % self.env_id)
108         deploy_proc = run_proc('fuel --env %s deploy-changes | strings > %s'
109                                % (self.env_id, LOG_FILE))
110
111         ready = False
112         for i in range(int(self.deploy_timeout)):
113             env = parse(exec_cmd('fuel env --env %s' % self.env_id))
114             log('Environment status: %s' % env[0][E['status']])
115             r, _ = exec_cmd('tail -2 %s | head -1' % LOG_FILE, False)
116             if r:
117                 log(r)
118             if env[0][E['status']] == 'operational':
119                 ready = True
120                 break
121             elif (env[0][E['status']] == 'error'
122                   or env[0][E['status']] == 'stopped'):
123                 break
124             else:
125                 time.sleep(SLEEP_TIME)
126
127         if (env[0][E['status']] <> 'operational'
128             and env[0][E['status']] <> 'error'
129             and env[0][E['status']] <> 'stopped'):
130             err('Deployment timed out, environment %s is not operational, snapshot will not be performed'
131                 % self.env_id, self.collect_logs)
132
133         run_proc_wait_terminated(deploy_proc)
134         delete(LOG_FILE)
135
136         if ready:
137             log('Environment %s successfully deployed' % self.env_id)
138         else:
139             self.collect_error_logs()
140             err('Deployment failed, environment %s is not operational'
141                 % self.env_id, self.collect_logs)
142
143
144     def collect_logs(self):
145         log('Cleaning out any previous deployment logs')
146         exec_cmd('rm -f /var/log/remote/fuel-snapshot-*', False)
147         exec_cmd('rm -f /root/deploy-*', False)
148         log('Generating Fuel deploy snap-shot')
149         if exec_cmd('fuel snapshot < /dev/null &> snapshot.log', False)[1] <> 0:
150             log('Could not create a Fuel snapshot')
151         else:
152             exec_cmd('mv /root/fuel-snapshot* /var/log/remote/', False)
153
154         log('Collecting all Fuel Snapshot & deploy log files')
155         r, _ = exec_cmd('tar -czhf /root/deploy-%s.log.tar.gz /var/log/remote' % time.strftime("%Y%m%d-%H%M%S"), False)
156         log(r)
157
158
159     def verify_node_status(self):
160         node_list = parse(exec_cmd('fuel node list'))
161         failed_nodes = []
162         for node in node_list:
163             if node[N['status']] != 'ready' and node[N['cluster']] != 'None':
164                 failed_nodes.append((node[N['id']], node[N['status']]))
165
166         if failed_nodes:
167             summary = ''
168             for node, status in failed_nodes:
169                 summary += '[node %s, status %s]\n' % (node, status)
170             err('Deployment failed: %s' % summary, self.collect_logs)
171
172
173     def health_check(self):
174         log('Now running sanity and smoke health checks')
175         r = exec_cmd('fuel health --env %s --check sanity,smoke --force' % self.env_id)
176         log(r)
177         if 'failure' in r:
178             err('Healthcheck failed!', self.collect_logs)
179
180
181     def deploy(self):
182         self.run_deploy()
183         self.verify_node_status()
184         if not self.no_health_check:
185             self.health_check()
186         self.collect_logs()