Python_Code: Added python code after running pylint
[thoth.git] / models / failure_prediction / python / cnn.py
1 # pylint: disable=C0103, C0116, W0621, E0401, W0104, W0105, R0913, E1136, W0612, E0102, C0301, W0611, C0411, W0311, C0326, C0330
2 # -*- coding: utf-8 -*-
3 """CNN.ipynb
4
5 Automatically generated by Colaboratory.
6
7 Original file is located at
8     https://colab.research.google.com/drive/1W8WsMl3qckYG9Xa2CUiA-RU3322whQUf
9
10 Contributors: **Rohit Singh Rathaur, Girish L.**
11
12 Copyright [2021](2021) [*Rohit Singh Rathaur, BIT Mesra and Girish L., CIT GUBBI, Karnataka*]
13
14 Licensed under the Apache License, Version 2.0 (the "License");
15 you may not use this file except in compliance with the License.
16 You may obtain a copy of the License at
17
18     http://www.apache.org/licenses/LICENSE-2.0
19
20 Unless required by applicable law or agreed to in writing, software
21 distributed under the License is distributed on an "AS IS" BASIS,
22 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
23 See the License for the specific language governing permissions and
24 limitations under the License.
25 """
26
27 from keras import backend as K
28 from keras.layers import Dense
29 from keras.layers.convolutional import MaxPooling1D
30 from keras.layers.convolutional import Conv1D
31 from keras.layers import Flatten
32 from keras.utils.vis_utils import plot_model
33 import seaborn as sns
34 import os
35 import numpy as np
36 import pandas as pd
37 import matplotlib as mpl
38 import matplotlib.pyplot as plt
39 import tensorflow as tf
40 from google.colab import drive
41 drive.mount('/content/drive')
42
43 # Importing libraries
44
45 df_Ellis = pd.read_csv(
46     "/content/drive/MyDrive/Failure/lstm/Ellis_FinalTwoConditionwithOR.csv")
47 df_Ellis
48
49 df_Ellis.plot()
50
51 # we show here the hist
52 df_Ellis.hist(bins=100, figsize=(20, 15))
53 # save_fig("attribute_histogram_plots")
54 plt.show()
55
56 cpu_system_perc = df_Ellis[['ellis-cpu.system_perc']]
57 cpu_system_perc.rolling(12).mean().plot(
58     figsize=(20, 10), linewidth=5, fontsize=20)
59 plt.xlabel('Timestamp', fontsize=30)
60
61 load_avg_1_min = df_Ellis[['ellis-load.avg_1_min']]
62 load_avg_1_min.rolling(12).mean().plot(
63     figsize=(20, 10), linewidth=5, fontsize=20)
64 plt.xlabel('Timestamp', fontsize=30)
65
66 cpu_wait_perc = df_Ellis[['ellis-cpu.wait_perc']]
67 cpu_wait_perc.rolling(12).mean().plot(
68     figsize=(20, 10), linewidth=5, fontsize=20)
69 plt.xlabel('Year', fontsize=30)
70
71 df_dg = pd.concat([cpu_system_perc.rolling(12).mean(), load_avg_1_min.rolling(
72     12).mean(), cpu_wait_perc.rolling(12).mean()], axis=1)
73 df_dg.plot(figsize=(20, 10), linewidth=5, fontsize=20)
74 plt.xlabel('Year', fontsize=20)
75
76
77 # we establish the corrmartrice
78 color = sns.color_palette()
79 sns.set_style('darkgrid')
80
81 correaltionMatrice = df_Ellis.corr()
82 f, ax = plt.subplots(figsize=(20, 10))
83 sns.heatmap(
84     correaltionMatrice,
85     cbar=True,
86     vmin=0,
87     vmax=1,
88     square=True,
89      annot=True)
90 plt.show()
91
92 df_Ellis.corrwith(df_Ellis['ellis-load.avg_1_min'])
93
94 # using multivariate feature
95
96 features_3 = [
97     'ellis-cpu.wait_perc',
98     'ellis-load.avg_1_min',
99     'ellis-net.in_bytes_sec',
100      'Label']
101
102 features = df_Ellis[features_3]
103 features.index = df_Ellis['Timestamp']
104 features.head()
105
106 features.plot(subplots=True)
107
108 features = features.values
109
110 # standardize data
111 train_split = 141600
112 tf.random.set_seed(13)
113
114 # standardize data
115 features_mean = features[:train_split].mean()
116 features_std = features[:train_split].std()
117 features = (features - features_mean) / features_std
118
119 print(type(features))
120 print(features.shape)
121
122 # create mutlivariate data
123
124
125 def mutlivariate_data(features, target, start_idx, end_idx, history_size, target_size,
126                       step, single_step=False):
127   data = []
128   labels = []
129   start_idx = start_idx + history_size
130   if end_idx is None:
131     end_idx = len(features) - target_size
132   for i in range(start_idx, end_idx):
133     idxs = range(i - history_size, i, step)  # using step
134     data.append(features[idxs])
135     if single_step:
136       labels.append(target[i + target_size])
137     else:
138       labels.append(target[i:i + target_size])
139
140   return np.array(data), np.array(labels)
141
142 # generate multivariate data
143
144
145 history = 720
146 future_target = 72
147 STEP = 6
148
149 x_train_ss, y_train_ss = mutlivariate_data(features, features[:, 1], 0, train_split, history,
150                                             future_target, STEP, single_step=True)
151
152 x_val_ss, y_val_ss = mutlivariate_data(features, features[:, 1], train_split, None, history,
153                                         future_target, STEP, single_step=True)
154
155 print(x_train_ss.shape, y_train_ss.shape)
156 print(x_val_ss.shape, y_val_ss.shape)
157
158 # tensorflow dataset
159 batch_size = 256
160 buffer_size = 10000
161
162 train_ss = tf.data.Dataset.from_tensor_slices((x_train_ss, y_train_ss))
163 train_ss = train_ss.cache().shuffle(buffer_size).batch(batch_size).repeat()
164
165 val_ss = tf.data.Dataset.from_tensor_slices((x_val_ss, y_val_ss))
166 val_ss = val_ss.cache().shuffle(buffer_size).batch(batch_size).repeat()
167
168 print(train_ss)
169 print(val_ss)
170
171
172 def root_mean_squared_error(y_true, y_pred):
173         return K.sqrt(K.mean(K.square(y_pred - y_true)))
174
175
176 # Modelling using LSTM
177 steps = 50
178
179 EPOCHS = 20
180
181 single_step_model = tf.keras.models.Sequential()
182
183 single_step_model.add(Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=(1, 48)))
184 single_step_model.add(MaxPooling1D(pool_size=2))
185 single_step_model.add(Flatten())
186 single_step_model.add(Dense(50, activation='relu'))
187 single_step_model.add(Dense(1))
188 single_step_model.compile(
189     optimizer='adam', loss='mae', metrics=[
190         tf.keras.metrics.RootMeanSquaredError(
191             name='rmse')])
192
193
194
195 # single_step_model.add(tf.keras.layers.LSTM(32, return_sequences=False, input_shape = x_train_ss.shape[-2:]))
196 # single_step_model.add(tf.keras.layers.Dropout(0.3))
197 # single_step_model.add(tf.keras.layers.Dense(1))
198 # single_step_model.compile(optimizer = tf.keras.optimizers.Adam(), loss = 'mae',metrics=[tf.keras.metrics.RootMeanSquaredError(name='rmse')])
199 # single_step_model.compile(loss='mse', optimizer='rmsprop')
200 single_step_model_history=single_step_model.fit(train_ss, epochs=EPOCHS,
201                                                   steps_per_epoch=steps, validation_data=val_ss,
202                                                   validation_steps=50)
203 single_step_model.summary()
204 plot_model(
205     single_step_model,
206     to_file='/content/drive/MyDrive/Failure/lstm/CNN-LSTM.png',
207     show_shapes=True,
208      show_layer_names=True)
209
210 # plot train test loss
211
212 def plot_loss(history, title):
213   loss=history.history['loss']
214   val_loss=history.history['val_loss']
215
216   epochs=range(len(loss))
217   plt.figure()
218   plt.plot(epochs, loss, 'b', label='Train Loss')
219   plt.plot(epochs, val_loss, 'r', label='Validation Loss')
220   plt.title(title)
221   plt.legend()
222   plt.grid()
223   plt.show()
224
225 plot_loss(single_step_model_history,
226      'Single Step Training and validation loss')
227
228 # plot train test loss
229
230 def plot_loss(history, title):
231   loss=history.history['rmse']
232   val_loss=history.history['val_rmse']
233
234   epochs=range(len(loss))
235   plt.figure()
236   plt.plot(epochs, loss, 'b', label='Train RMSE')
237   plt.plot(epochs, val_loss, 'r', label='Validation RMSE')
238   plt.title(title)
239   plt.legend()
240   plt.grid()
241   plt.show()
242
243 plot_loss(single_step_model_history,
244      'Single Step Training and validation loss')
245
246 # fucntion to create time steps
247 def create_time_steps(length):
248   return list(range(-length, 0))
249
250 # function to plot time series data
251
252 def plot_time_series(plot_data, delta, title):
253   labels=["History", 'True Future', 'Model Predcited']
254   marker=['.-', 'rx', 'go']
255   time_steps=create_time_steps(plot_data[0].shape[0])
256
257   if delta:
258     future=delta
259   else:
260     future=0
261   plt.title(title)
262   for i, x in enumerate(plot_data):
263     if i:
264       plt.plot(future, plot_data[i], marker[i], markersize=10, label=labels[i])
265     else:
266       plt.plot(time_steps, plot_data[i].flatten(), marker[i], label=labels[i])
267   plt.legend()
268   plt.xlim([time_steps[0], (future + 5) * 2])
269
270   plt.xlabel('Time_Step')
271   return plt
272
273 # Moving window average
274
275 def MWA(history):
276   return np.mean(history)
277
278 # plot time series and predicted values
279
280 for x, y in val_ss.take(5):
281   plot=plot_time_series([x[0][:, 1].numpy(), y[0].numpy(),
282                     single_step_model.predict(x)[0]], 12,
283                    'Single Step Prediction')
284   plot.show()
285
286 """# **MultiStep Forcasting**"""
287
288 future_target=72  # 72 future values
289 x_train_multi, y_train_multi=mutlivariate_data(features, features[:, 1], 0,
290                                                  train_split, history,
291                                                  future_target, STEP)
292 x_val_multi, y_val_multi=mutlivariate_data(features, features[:, 1],
293                                              train_split, None, history,
294                                              future_target, STEP)
295
296 print(x_train_multi.shape)
297 print(y_train_multi.shape)
298
299 #  TF DATASET
300
301 train_data_multi=tf.data.Dataset.from_tensor_slices(
302     (x_train_multi, y_train_multi))
303 train_data_multi=train_data_multi.cache().shuffle(
304     buffer_size).batch(batch_size).repeat()
305
306 val_data_multi=tf.data.Dataset.from_tensor_slices((x_val_multi, y_val_multi))
307 val_data_multi=val_data_multi.batch(batch_size).repeat()
308
309 print(train_data_multi)
310 print(val_data_multi)
311
312 # plotting function
313 def multi_step_plot(history, true_future, prediction):
314   plt.figure(figsize=(12, 6))
315   num_in=create_time_steps(len(history))
316   num_out=len(true_future)
317   plt.grid()
318   plt.plot(num_in, np.array(history[:, 1]), label='History')
319   plt.plot(np.arange(num_out) / STEP, np.array(true_future), 'bo',
320            label='True Future')
321   if prediction.any():
322     plt.plot(np.arange(num_out) / STEP, np.array(prediction), 'ro',
323              label='Predicted Future')
324   plt.legend(loc='upper left')
325   plt.show()
326
327
328
329 for x, y in train_data_multi.take(1):
330   multi_step_plot(x[0], y[0], np.array([0]))
331
332 multi_step_model=tf.keras.models.Sequential()
333
334
335 multi_step_model.add(Conv1D(filters=64, kernel_size=2,
336                      activation='relu', input_shape=x_train_ss.shape[-2:]))
337 multi_step_model.add(MaxPooling1D(pool_size=2))
338 multi_step_model.add(Flatten())
339 multi_step_model.add(Dense(50, activation='relu'))
340 multi_step_model.add(Dense(1))
341 multi_step_model.compile(
342     optimizer='adam', loss='mae', metrics=[
343         tf.keras.metrics.RootMeanSquaredError(
344             name='rmse')])
345
346
347 # multi_step_model.add(tf.keras.layers.LSTM(32,
348  #                                         return_sequences=True,
349   #                                        input_shape=x_train_multi.shape[-2:]))
350 # multi_step_model.add(tf.keras.layers.LSTM(16, activation='relu'))
351 # aDD dropout layer (0.3)
352 # multi_step_model.add(tf.keras.layers.Dense(72)) # for 72 outputs
353
354 # multi_step_model.compile(optimizer=tf.keras.optimizers.RMSprop(clipvalue=1.0),
355 # loss='mae',metrics=[tf.keras.metrics.RootMeanSquaredError(name='rmse')])
356
357 multi_step_history=multi_step_model.fit(train_data_multi, epochs=EPOCHS,
358                                           steps_per_epoch=steps,
359                                           validation_data=val_data_multi,
360                                           validation_steps=50)
361
362 plot_loss(multi_step_history, 'Multi-Step Training and validation loss')
363
364 for x, y in val_data_multi.take(5):
365   multi_step_plot(x[0], y[0], multi_step_model.predict(x)[0])
366
367 scores=multi_step_model.evaluate(
368     x_train_multi,
369     y_train_multi,
370     verbose=1,
371      batch_size=200)
372 print('MAE: {}'.format(scores[1]))
373
374 scores_test=multi_step_model.evaluate(
375     x_val_multi, y_val_multi, verbose=1, batch_size=200)
376 print('MAE: {}'.format(scores[1]))