Python_Code: Added python code after running pylint
[thoth.git] / models / failure_prediction / python / stacked_lstm_correlation.py
1 # pylint: disable=C0103, C0116, W0621, E0401, W0104, W0105, R0913, E1136, W0612, E0102, C0301, W0611, C0411, W0311, W0404, E0602, C0326, C0330, W0106, C0412
2 # -*- coding: utf-8 -*-
3 """stacked_LSTM_Correlation.ipynb
4
5 Automatically generated by Colaboratory.
6
7 Original file is located at
8     https://colab.research.google.com/drive/1x8vGD105bcSgNTyC2sx0C3ixUsVPvDQ4
9
10 Contributors: **Rohit Singh Rathaur, Girish L.**
11
12 Copyright [2021](2021) [*Rohit Singh Rathaur, BIT Mesra and Girish L., CIT GUBBI, Karnataka*]
13
14 Licensed under the Apache License, Version 2.0 (the "License");
15 you may not use this file except in compliance with the License.
16 You may obtain a copy of the License at
17
18     http://www.apache.org/licenses/LICENSE-2.0
19
20 Unless required by applicable law or agreed to in writing, software
21 distributed under the License is distributed on an "AS IS" BASIS,
22 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
23 See the License for the specific language governing permissions and
24 limitations under the License.
25 """
26
27 from keras.layers import Activation, Dense, Dropout
28 import seaborn as sns
29 import os
30 import numpy as np
31 import pandas as pd
32 import matplotlib as mpl
33 import matplotlib.pyplot as plt
34 import tensorflow as tf
35 from google.colab import drive
36 drive.mount('/gdrive')
37
38 # Importing libraries
39
40 df_Ellis = pd.read_csv(
41     "/gdrive/MyDrive/LFN Anuket/Analysis/data/Final/Ellis_FinalTwoConditionwithOR.csv")
42 df_Ellis
43
44 df_Ellis.plot()
45
46 # we show here the hist
47 df_Ellis.hist(bins=100, figsize=(20, 15))
48 # save_fig("attribute_histogram_plots")
49 plt.show()
50
51 cpu_system_perc = df_Ellis[['ellis-cpu.system_perc']]
52 cpu_system_perc.rolling(12).mean().plot(
53     figsize=(20, 10), linewidth=5, fontsize=20)
54 plt.xlabel('Timestamp', fontsize=30)
55
56 load_avg_1_min = df_Ellis[['ellis-load.avg_1_min']]
57 load_avg_1_min.rolling(12).mean().plot(
58     figsize=(20, 10), linewidth=5, fontsize=20)
59 plt.xlabel('Timestamp', fontsize=30)
60
61 cpu_wait_perc = df_Ellis[['ellis-cpu.wait_perc']]
62 cpu_wait_perc.rolling(12).mean().plot(
63     figsize=(20, 10), linewidth=5, fontsize=20)
64 plt.xlabel('Year', fontsize=30)
65
66 df_dg = pd.concat([cpu_system_perc.rolling(12).mean(), load_avg_1_min.rolling(
67     12).mean(), cpu_wait_perc.rolling(12).mean()], axis=1)
68 df_dg.plot(figsize=(20, 10), linewidth=5, fontsize=20)
69 plt.xlabel('Year', fontsize=20)
70
71
72 # we establish the corrmartrice
73 color = sns.color_palette()
74 sns.set_style('darkgrid')
75
76 correaltionMatrice = df_Ellis.corr()
77 f, ax = plt.subplots(figsize=(20, 10))
78 sns.heatmap(
79     correaltionMatrice,
80     cbar=True,
81     vmin=0,
82     vmax=1,
83     square=True,
84     annot=True)
85 plt.show()
86
87 df_Ellis.corrwith(df_Ellis['ellis-load.avg_1_min'])
88
89 # using multivariate feature
90
91 features_3 = [
92     'ellis-cpu.wait_perc',
93     'ellis-load.avg_1_min',
94     'ellis-net.in_bytes_sec',
95     'Label']
96
97 features = df_Ellis[features_3]
98 features.index = df_Ellis['Timestamp']
99 features.head()
100
101 features.plot(subplots=True)
102
103 features = features.values
104
105 # standardize data
106 train_split = 141600
107 tf.random.set_seed(13)
108
109 # standardize data
110 features_mean = features[:train_split].mean()
111 features_std = features[:train_split].std()
112 features = (features - features_mean) / features_std
113
114 print(type(features))
115 print(features.shape)
116
117 # create mutlivariate data
118
119
120 def mutlivariate_data(
121         features,
122         target,
123         start_idx,
124         end_idx,
125         history_size,
126         target_size,
127         step,
128         single_step=False):
129     data = []
130     labels = []
131     start_idx = start_idx + history_size
132     if end_idx is None:
133         end_idx = len(features) - target_size
134     for i in range(start_idx, end_idx):
135         idxs = range(i - history_size, i, step)  # using step
136         data.append(features[idxs])
137         if single_step:
138             labels.append(target[i + target_size])
139         else:
140             labels.append(target[i:i + target_size])
141
142     return np.array(data), np.array(labels)
143
144 # generate multivariate data
145
146
147 history = 720
148 future_target = 72
149 STEP = 6
150
151 x_train_ss, y_train_ss = mutlivariate_data(
152     features, features[:, 1], 0, train_split, history, future_target, STEP, single_step=True)
153
154 x_val_ss, y_val_ss = mutlivariate_data(features, features[:, 1], train_split, None, history,
155                                        future_target, STEP, single_step=True)
156
157 print(x_train_ss.shape, y_train_ss.shape)
158 print(x_val_ss.shape, y_val_ss.shape)
159
160 # tensorflow dataset
161 batch_size = 256
162 buffer_size = 10000
163
164 train_ss = tf.data.Dataset.from_tensor_slices((x_train_ss, y_train_ss))
165 train_ss = train_ss.cache().shuffle(buffer_size).batch(batch_size).repeat()
166
167 val_ss = tf.data.Dataset.from_tensor_slices((x_val_ss, y_val_ss))
168 val_ss = val_ss.cache().shuffle(buffer_size).batch(batch_size).repeat()
169
170 print(train_ss)
171 print(val_ss)
172
173
174 def root_mean_squared_error(y_true, y_pred):
175     return K.sqrt(K.mean(K.square(y_pred - y_true)))
176
177
178 """## Why Increase Depth?
179 Stacking LSTM hidden layers makes the model deeper, more accurately earning the description as a deep learning technique. It is the depth of neural networks that is generally attributed to the success of the approach on a wide range of challenging prediction problems.
180
181 As Stacked LSTMs are now a stable technique for challenging sequence prediction problems. A Stacked LSTM architecture is defined as an LSTM model comprised of multiple LSTM layers. An LSTM layer above provides a sequence output rather than a single value output to the LSTM layer below. Specifically, one output per input time step, rather than one output time step for all input time steps.
182
183 We created Stacked LSTM model using Keras which is a Python deep learning library.
184 """
185
186 # Modelling using LSTM
187 steps = 50
188
189 EPOCHS = 20
190
191 single_step_model = tf.keras.models.Sequential()
192
193 single_step_model.add(tf.keras.layers.LSTM(
194     32, return_sequences=True, input_shape=x_train_ss.shape[-2:]))
195 single_step_model.add(tf.keras.layers.Dropout(0.3))
196 single_step_model.add(tf.keras.layers.LSTM(units=100, return_sequences=False))
197 single_step_model.add(tf.keras.layers.Dropout(0.2))
198 #model.add(Dense(units=1, activation='relu'))
199 single_step_model.add(tf.keras.layers.Activation("relu"))
200 single_step_model.add(tf.keras.layers.Dense(1))
201 single_step_model.compile(
202     optimizer=tf.keras.optimizers.Adam(),
203     loss='mae',
204     metrics=[
205         tf.keras.metrics.RootMeanSquaredError(
206             name='rmse')])
207 #single_step_model.compile(loss='mse', optimizer='rmsprop')
208 single_step_model_history = single_step_model.fit(
209     train_ss,
210     epochs=EPOCHS,
211     steps_per_epoch=steps,
212     validation_data=val_ss,
213     validation_steps=50)
214
215
216 single_step_model.summary()
217
218 # plot train test loss
219
220
221 def plot_loss(history, title):
222     loss = history.history['loss']
223     val_loss = history.history['val_loss']
224
225     epochs = range(len(loss))
226     plt.figure()
227     plt.plot(epochs, loss, 'b', label='Train Loss')
228     plt.plot(epochs, val_loss, 'r', label='Validation Loss')
229     plt.title(title)
230     plt.legend()
231     plt.grid()
232     plt.show()
233
234
235 plot_loss(single_step_model_history,
236           'Single Step Training and validation loss')
237
238 # plot train test loss
239
240
241 def plot_loss(history, title):
242     loss = history.history['rmse']
243     val_loss = history.history['val_rmse']
244
245     epochs = range(len(loss))
246     plt.figure()
247     plt.plot(epochs, loss, 'b', label='Train RMSE')
248     plt.plot(epochs, val_loss, 'r', label='Validation RMSE')
249     plt.title(title)
250     plt.legend()
251     plt.grid()
252     plt.show()
253
254
255 plot_loss(single_step_model_history,
256           'Single Step Training and validation loss')
257
258 # fucntion to create time steps
259
260
261 def create_time_steps(length):
262     return list(range(-length, 0))
263
264 # function to plot time series data
265
266
267 def plot_time_series(plot_data, delta, title):
268     labels = ["History", 'True Future', 'Model Predcited']
269     marker = ['.-', 'rx', 'go']
270     time_steps = create_time_steps(plot_data[0].shape[0])
271
272     if delta:
273         future = delta
274     else:
275         future = 0
276     plt.title(title)
277     for i, x in enumerate(plot_data):
278         if i:
279             plt.plot(
280                 future,
281                 plot_data[i],
282                 marker[i],
283                 markersize=10,
284                 label=labels[i])
285         else:
286             plt.plot(
287                 time_steps,
288                 plot_data[i].flatten(),
289                 marker[i],
290                 label=labels[i])
291     plt.legend()
292     plt.xlim([time_steps[0], (future + 5) * 2])
293
294     plt.xlabel('Time_Step')
295     return plt
296
297 # Moving window average
298
299
300 def MWA(history):
301     return np.mean(history)
302
303 # plot time series and predicted values
304
305
306 for x, y in val_ss.take(5):
307     plot = plot_time_series([x[0][:, 1].numpy(), y[0].numpy(),
308                              single_step_model.predict(x)[0]], 12,
309                             'Single Step Prediction')
310     plot.show()
311
312 """# **MultiStep Forcasting**"""
313
314 future_target = 72  # 72 future values
315 x_train_multi, y_train_multi = mutlivariate_data(features, features[:, 1], 0,
316                                                  train_split, history,
317                                                  future_target, STEP)
318 x_val_multi, y_val_multi = mutlivariate_data(features, features[:, 1],
319                                              train_split, None, history,
320                                              future_target, STEP)
321
322 print(x_train_multi.shape)
323 print(y_train_multi.shape)
324
325 #  TF DATASET
326
327 train_data_multi = tf.data.Dataset.from_tensor_slices(
328     (x_train_multi, y_train_multi))
329 train_data_multi = train_data_multi.cache().shuffle(
330     buffer_size).batch(batch_size).repeat()
331
332 val_data_multi = tf.data.Dataset.from_tensor_slices((x_val_multi, y_val_multi))
333 val_data_multi = val_data_multi.batch(batch_size).repeat()
334
335 print(train_data_multi)
336 print(val_data_multi)
337
338 # plotting function
339
340
341 def multi_step_plot(history, true_future, prediction):
342     plt.figure(figsize=(12, 6))
343     num_in = create_time_steps(len(history))
344     num_out = len(true_future)
345     plt.grid()
346     plt.plot(num_in, np.array(history[:, 1]), label='History')
347     plt.plot(np.arange(num_out) / STEP, np.array(true_future), 'bo',
348              label='True Future')
349     if prediction.any():
350         plt.plot(np.arange(num_out) / STEP, np.array(prediction), 'ro',
351                  label='Predicted Future')
352     plt.legend(loc='upper left')
353     plt.show()
354
355
356 for x, y in train_data_multi.take(1):
357     multi_step_plot(x[0], y[0], np.array([0]))
358
359 multi_step_model = tf.keras.models.Sequential()
360 multi_step_model.add(tf.keras.layers.LSTM(
361     32, return_sequences=True, input_shape=x_train_multi.shape[-2:]))
362 multi_step_model.add(tf.keras.layers.Dropout(0.2))
363 multi_step_model.add(tf.keras.layers.LSTM(units=100, return_sequences=False))
364 multi_step_model.add(tf.keras.layers.Dropout(0.2))
365 #model.add(Dense(units=1, activation='relu'))
366 multi_step_model.add(tf.keras.layers.Activation("relu"))
367 # aDD dropout layer (0.3)
368 multi_step_model.add(tf.keras.layers.Dense(72))  # for 72 outputs
369
370 multi_step_model.compile(
371     optimizer=tf.keras.optimizers.RMSprop(
372         clipvalue=1.0), loss='mae', metrics=[
373             tf.keras.metrics.RootMeanSquaredError(
374                 name='rmse')])
375
376 multi_step_history = multi_step_model.fit(train_data_multi, epochs=EPOCHS,
377                                           steps_per_epoch=steps,
378                                           validation_data=val_data_multi,
379                                           validation_steps=50)
380
381 plot_loss(multi_step_history, 'Multi-Step Training and validation loss')
382
383 for x, y in val_data_multi.take(5):
384     multi_step_plot(x[0], y[0], multi_step_model.predict(x)[0])
385
386 scores = multi_step_model.evaluate(
387     x_train_multi,
388     y_train_multi,
389     verbose=1,
390     batch_size=200)
391 print('MAE: {}'.format(scores[1]))
392
393 scores_test = multi_step_model.evaluate(
394     x_val_multi, y_val_multi, verbose=1, batch_size=200)
395 print('MAE: {}'.format(scores[1]))
396
397 y_pred_test = multi_step_model.predict(x_val_multi, verbose=0)
398
399 plt.figure(figsize=(10, 5))
400 plt.plot(y_pred_test)
401 plt.plot(y_val_multi)
402 plt.ylabel("RUL")
403 plt.xlabel("Unit Number")
404 plt.legend(loc='upper left')
405 plt.show()