models/failure_prediction/python/stacked_lstm_correlation.py

   1 # pylint: disable=C0103, C0116, W0621, E0401, W0104, W0105, R0913, E1136, W0612, E0102, C0301, W0611, C0411, W0311, W0404, E0602, C0326, C0330, W0106, C0412
   2 # -*- coding: utf-8 -*-
   3 """stacked_LSTM_Correlation.ipynb
   4
   5 Automatically generated by Colaboratory.
   6
   7 Original file is located at
   8     https://colab.research.google.com/drive/1x8vGD105bcSgNTyC2sx0C3ixUsVPvDQ4
   9
  10 Contributors: **Rohit Singh Rathaur, Girish L.**
  11
  12 Copyright [2021](2021) [*Rohit Singh Rathaur, BIT Mesra and Girish L., CIT GUBBI, Karnataka*]
  13
  14 Licensed under the Apache License, Version 2.0 (the "License");
  15 you may not use this file except in compliance with the License.
  16 You may obtain a copy of the License at
  17
  18     http://www.apache.org/licenses/LICENSE-2.0
  19
  20 Unless required by applicable law or agreed to in writing, software
  21 distributed under the License is distributed on an "AS IS" BASIS,
  22 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  23 See the License for the specific language governing permissions and
  24 limitations under the License.
  25 """
  26
  27 from keras.layers import Activation, Dense, Dropout
  28 import seaborn as sns
  29 import os
  30 import numpy as np
  31 import pandas as pd
  32 import matplotlib as mpl
  33 import matplotlib.pyplot as plt
  34 import tensorflow as tf
  35 from google.colab import drive
  36 drive.mount('/gdrive')
  37
  38 # Importing libraries
  39
  40 df_Ellis = pd.read_csv(
  41     "/gdrive/MyDrive/LFN Anuket/Analysis/data/Final/Ellis_FinalTwoConditionwithOR.csv")
  42 df_Ellis
  43
  44 df_Ellis.plot()
  45
  46 # we show here the hist
  47 df_Ellis.hist(bins=100, figsize=(20, 15))
  48 # save_fig("attribute_histogram_plots")
  49 plt.show()
  50
  51 cpu_system_perc = df_Ellis[['ellis-cpu.system_perc']]
  52 cpu_system_perc.rolling(12).mean().plot(
  53     figsize=(20, 10), linewidth=5, fontsize=20)
  54 plt.xlabel('Timestamp', fontsize=30)
  55
  56 load_avg_1_min = df_Ellis[['ellis-load.avg_1_min']]
  57 load_avg_1_min.rolling(12).mean().plot(
  58     figsize=(20, 10), linewidth=5, fontsize=20)
  59 plt.xlabel('Timestamp', fontsize=30)
  60
  61 cpu_wait_perc = df_Ellis[['ellis-cpu.wait_perc']]
  62 cpu_wait_perc.rolling(12).mean().plot(
  63     figsize=(20, 10), linewidth=5, fontsize=20)
  64 plt.xlabel('Year', fontsize=30)
  65
  66 df_dg = pd.concat([cpu_system_perc.rolling(12).mean(), load_avg_1_min.rolling(
  67     12).mean(), cpu_wait_perc.rolling(12).mean()], axis=1)
  68 df_dg.plot(figsize=(20, 10), linewidth=5, fontsize=20)
  69 plt.xlabel('Year', fontsize=20)
  70
  71
  72 # we establish the corrmartrice
  73 color = sns.color_palette()
  74 sns.set_style('darkgrid')
  75
  76 correaltionMatrice = df_Ellis.corr()
  77 f, ax = plt.subplots(figsize=(20, 10))
  78 sns.heatmap(
  79     correaltionMatrice,
  80     cbar=True,
  81     vmin=0,
  82     vmax=1,
  83     square=True,
  84     annot=True)
  85 plt.show()
  86
  87 df_Ellis.corrwith(df_Ellis['ellis-load.avg_1_min'])
  88
  89 # using multivariate feature
  90
  91 features_3 = [
  92     'ellis-cpu.wait_perc',
  93     'ellis-load.avg_1_min',
  94     'ellis-net.in_bytes_sec',
  95     'Label']
  96
  97 features = df_Ellis[features_3]
  98 features.index = df_Ellis['Timestamp']
  99 features.head()
 100
 101 features.plot(subplots=True)
 102
 103 features = features.values
 104
 105 # standardize data
 106 train_split = 141600
 107 tf.random.set_seed(13)
 108
 109 # standardize data
 110 features_mean = features[:train_split].mean()
 111 features_std = features[:train_split].std()
 112 features = (features - features_mean) / features_std
 113
 114 print(type(features))
 115 print(features.shape)
 116
 117 # create mutlivariate data
 118
 119
 120 def mutlivariate_data(
 121         features,
 122         target,
 123         start_idx,
 124         end_idx,
 125         history_size,
 126         target_size,
 127         step,
 128         single_step=False):
 129     data = []
 130     labels = []
 131     start_idx = start_idx + history_size
 132     if end_idx is None:
 133         end_idx = len(features) - target_size
 134     for i in range(start_idx, end_idx):
 135         idxs = range(i - history_size, i, step)  # using step
 136         data.append(features[idxs])
 137         if single_step:
 138             labels.append(target[i + target_size])
 139         else:
 140             labels.append(target[i:i + target_size])
 141
 142     return np.array(data), np.array(labels)
 143
 144 # generate multivariate data
 145
 146
 147 history = 720
 148 future_target = 72
 149 STEP = 6
 150
 151 x_train_ss, y_train_ss = mutlivariate_data(
 152     features, features[:, 1], 0, train_split, history, future_target, STEP, single_step=True)
 153
 154 x_val_ss, y_val_ss = mutlivariate_data(features, features[:, 1], train_split, None, history,
 155                                        future_target, STEP, single_step=True)
 156
 157 print(x_train_ss.shape, y_train_ss.shape)
 158 print(x_val_ss.shape, y_val_ss.shape)
 159
 160 # tensorflow dataset
 161 batch_size = 256
 162 buffer_size = 10000
 163
 164 train_ss = tf.data.Dataset.from_tensor_slices((x_train_ss, y_train_ss))
 165 train_ss = train_ss.cache().shuffle(buffer_size).batch(batch_size).repeat()
 166
 167 val_ss = tf.data.Dataset.from_tensor_slices((x_val_ss, y_val_ss))
 168 val_ss = val_ss.cache().shuffle(buffer_size).batch(batch_size).repeat()
 169
 170 print(train_ss)
 171 print(val_ss)
 172
 173
 174 def root_mean_squared_error(y_true, y_pred):
 175     return K.sqrt(K.mean(K.square(y_pred - y_true)))
 176
 177
 178 """## Why Increase Depth?
 179 Stacking LSTM hidden layers makes the model deeper, more accurately earning the description as a deep learning technique. It is the depth of neural networks that is generally attributed to the success of the approach on a wide range of challenging prediction problems.
 180
 181 As Stacked LSTMs are now a stable technique for challenging sequence prediction problems. A Stacked LSTM architecture is defined as an LSTM model comprised of multiple LSTM layers. An LSTM layer above provides a sequence output rather than a single value output to the LSTM layer below. Specifically, one output per input time step, rather than one output time step for all input time steps.
 182
 183 We created Stacked LSTM model using Keras which is a Python deep learning library.
 184 """
 185
 186 # Modelling using LSTM
 187 steps = 50
 188
 189 EPOCHS = 20
 190
 191 single_step_model = tf.keras.models.Sequential()
 192
 193 single_step_model.add(tf.keras.layers.LSTM(
 194     32, return_sequences=True, input_shape=x_train_ss.shape[-2:]))
 195 single_step_model.add(tf.keras.layers.Dropout(0.3))
 196 single_step_model.add(tf.keras.layers.LSTM(units=100, return_sequences=False))
 197 single_step_model.add(tf.keras.layers.Dropout(0.2))
 198 #model.add(Dense(units=1, activation='relu'))
 199 single_step_model.add(tf.keras.layers.Activation("relu"))
 200 single_step_model.add(tf.keras.layers.Dense(1))
 201 single_step_model.compile(
 202     optimizer=tf.keras.optimizers.Adam(),
 203     loss='mae',
 204     metrics=[
 205         tf.keras.metrics.RootMeanSquaredError(
 206             name='rmse')])
 207 #single_step_model.compile(loss='mse', optimizer='rmsprop')
 208 single_step_model_history = single_step_model.fit(
 209     train_ss,
 210     epochs=EPOCHS,
 211     steps_per_epoch=steps,
 212     validation_data=val_ss,
 213     validation_steps=50)
 214
 215
 216 single_step_model.summary()
 217
 218 # plot train test loss
 219
 220
 221 def plot_loss(history, title):
 222     loss = history.history['loss']
 223     val_loss = history.history['val_loss']
 224
 225     epochs = range(len(loss))
 226     plt.figure()
 227     plt.plot(epochs, loss, 'b', label='Train Loss')
 228     plt.plot(epochs, val_loss, 'r', label='Validation Loss')
 229     plt.title(title)
 230     plt.legend()
 231     plt.grid()
 232     plt.show()
 233
 234
 235 plot_loss(single_step_model_history,
 236           'Single Step Training and validation loss')
 237
 238 # plot train test loss
 239
 240
 241 def plot_loss(history, title):
 242     loss = history.history['rmse']
 243     val_loss = history.history['val_rmse']
 244
 245     epochs = range(len(loss))
 246     plt.figure()
 247     plt.plot(epochs, loss, 'b', label='Train RMSE')
 248     plt.plot(epochs, val_loss, 'r', label='Validation RMSE')
 249     plt.title(title)
 250     plt.legend()
 251     plt.grid()
 252     plt.show()
 253
 254
 255 plot_loss(single_step_model_history,
 256           'Single Step Training and validation loss')
 257
 258 # fucntion to create time steps
 259
 260
 261 def create_time_steps(length):
 262     return list(range(-length, 0))
 263
 264 # function to plot time series data
 265
 266
 267 def plot_time_series(plot_data, delta, title):
 268     labels = ["History", 'True Future', 'Model Predcited']
 269     marker = ['.-', 'rx', 'go']
 270     time_steps = create_time_steps(plot_data[0].shape[0])
 271
 272     if delta:
 273         future = delta
 274     else:
 275         future = 0
 276     plt.title(title)
 277     for i, x in enumerate(plot_data):
 278         if i:
 279             plt.plot(
 280                 future,
 281                 plot_data[i],
 282                 marker[i],
 283                 markersize=10,
 284                 label=labels[i])
 285         else:
 286             plt.plot(
 287                 time_steps,
 288                 plot_data[i].flatten(),
 289                 marker[i],
 290                 label=labels[i])
 291     plt.legend()
 292     plt.xlim([time_steps[0], (future + 5) * 2])
 293
 294     plt.xlabel('Time_Step')
 295     return plt
 296
 297 # Moving window average
 298
 299
 300 def MWA(history):
 301     return np.mean(history)
 302
 303 # plot time series and predicted values
 304
 305
 306 for x, y in val_ss.take(5):
 307     plot = plot_time_series([x[0][:, 1].numpy(), y[0].numpy(),
 308                              single_step_model.predict(x)[0]], 12,
 309                             'Single Step Prediction')
 310     plot.show()
 311
 312 """# **MultiStep Forcasting**"""
 313
 314 future_target = 72  # 72 future values
 315 x_train_multi, y_train_multi = mutlivariate_data(features, features[:, 1], 0,
 316                                                  train_split, history,
 317                                                  future_target, STEP)
 318 x_val_multi, y_val_multi = mutlivariate_data(features, features[:, 1],
 319                                              train_split, None, history,
 320                                              future_target, STEP)
 321
 322 print(x_train_multi.shape)
 323 print(y_train_multi.shape)
 324
 325 #  TF DATASET
 326
 327 train_data_multi = tf.data.Dataset.from_tensor_slices(
 328     (x_train_multi, y_train_multi))
 329 train_data_multi = train_data_multi.cache().shuffle(
 330     buffer_size).batch(batch_size).repeat()
 331
 332 val_data_multi = tf.data.Dataset.from_tensor_slices((x_val_multi, y_val_multi))
 333 val_data_multi = val_data_multi.batch(batch_size).repeat()
 334
 335 print(train_data_multi)
 336 print(val_data_multi)
 337
 338 # plotting function
 339
 340
 341 def multi_step_plot(history, true_future, prediction):
 342     plt.figure(figsize=(12, 6))
 343     num_in = create_time_steps(len(history))
 344     num_out = len(true_future)
 345     plt.grid()
 346     plt.plot(num_in, np.array(history[:, 1]), label='History')
 347     plt.plot(np.arange(num_out) / STEP, np.array(true_future), 'bo',
 348              label='True Future')
 349     if prediction.any():
 350         plt.plot(np.arange(num_out) / STEP, np.array(prediction), 'ro',
 351                  label='Predicted Future')
 352     plt.legend(loc='upper left')
 353     plt.show()
 354
 355
 356 for x, y in train_data_multi.take(1):
 357     multi_step_plot(x[0], y[0], np.array([0]))
 358
 359 multi_step_model = tf.keras.models.Sequential()
 360 multi_step_model.add(tf.keras.layers.LSTM(
 361     32, return_sequences=True, input_shape=x_train_multi.shape[-2:]))
 362 multi_step_model.add(tf.keras.layers.Dropout(0.2))
 363 multi_step_model.add(tf.keras.layers.LSTM(units=100, return_sequences=False))
 364 multi_step_model.add(tf.keras.layers.Dropout(0.2))
 365 #model.add(Dense(units=1, activation='relu'))
 366 multi_step_model.add(tf.keras.layers.Activation("relu"))
 367 # aDD dropout layer (0.3)
 368 multi_step_model.add(tf.keras.layers.Dense(72))  # for 72 outputs
 369
 370 multi_step_model.compile(
 371     optimizer=tf.keras.optimizers.RMSprop(
 372         clipvalue=1.0), loss='mae', metrics=[
 373             tf.keras.metrics.RootMeanSquaredError(
 374                 name='rmse')])
 375
 376 multi_step_history = multi_step_model.fit(train_data_multi, epochs=EPOCHS,
 377                                           steps_per_epoch=steps,
 378                                           validation_data=val_data_multi,
 379                                           validation_steps=50)
 380
 381 plot_loss(multi_step_history, 'Multi-Step Training and validation loss')
 382
 383 for x, y in val_data_multi.take(5):
 384     multi_step_plot(x[0], y[0], multi_step_model.predict(x)[0])
 385
 386 scores = multi_step_model.evaluate(
 387     x_train_multi,
 388     y_train_multi,
 389     verbose=1,
 390     batch_size=200)
 391 print('MAE: {}'.format(scores[1]))
 392
 393 scores_test = multi_step_model.evaluate(
 394     x_val_multi, y_val_multi, verbose=1, batch_size=200)
 395 print('MAE: {}'.format(scores[1]))
 396
 397 y_pred_test = multi_step_model.predict(x_val_multi, verbose=0)
 398
 399 plt.figure(figsize=(10, 5))
 400 plt.plot(y_pred_test)
 401 plt.plot(y_val_multi)
 402 plt.ylabel("RUL")
 403 plt.xlabel("Unit Number")
 404 plt.legend(loc='upper left')
 405 plt.show()