models/failure_prediction/python/lstm.py

   1 #  pylint: disable=C0103, C0116, W0621, E0401, W0104, W0105, R0913, E1136, W0612, E0102, C0301, W0611, C0411, W0311, W0404, E0602, C0326, C0330, W0106, C0412
   2 # -*- coding: utf-8 -*-
   3 """LSTM.ipynb
   4
   5 Automatically generated by Colaboratory.
   6
   7 Original file is located at
   8     https://colab.research.google.com/drive/15natzoGkWnOqxZyzavAaRqBFrPNxzd35
   9
  10 Contributors: Rohit Singh Rathaur, Girish L.
  11
  12 Copyright 2021 [Rohit Singh Rathaur, BIT Mesra and Girish L., CIT GUBBI, Karnataka]
  13
  14 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at
  15
  16 http://www.apache.org/licenses/LICENSE-2.0
  17 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
  18
  19 We mounted the drive to access the data from google drive
  20 """
  21
  22 from keras.utils.vis_utils import plot_model
  23 from keras.layers import Activation, Dense, Dropout
  24 import os
  25 import numpy as np
  26 import pandas as pd
  27 import matplotlib as mpl
  28 import matplotlib.pyplot as plt
  29 import tensorflow as tf
  30 from google.colab import drive
  31 drive.mount('/content/drive')
  32
  33 """We are importing the libraries:
  34 - TensorFlow: to process and train the model
  35 - Matplotlib: to plot the training anf loss curves
  36 - Pandas: used for data analysis and it allows us to import data from various formats
  37 - Numpy: For array computing
  38 """
  39
  40 # Importing libraries
  41
  42 """We are reading the CSV file using `read_csv` function and storing it in a DataFrame named `df_Ellis`"""
  43
  44 df_Ellis = pd.read_csv(
  45     "/content/drive/MyDrive/Failure/lstm/Ellis_FinalTwoConditionwithOR.csv")
  46 df_Ellis
  47
  48 """`plot()` function is used to draw points"""
  49
  50 df_Ellis.plot()
  51
  52 """Using multivariate features:
  53 - Storing only the multivariate features in a dataframe named `features_3`
  54 - Extracting the Timestamp column from `df_Ellis` dataframe
  55 - and combining it with the dataframe `features`
  56 """
  57
  58 # using multivariate feature
  59
  60 features_3 = [
  61     'ellis-cpu.system_perc',
  62     'ellis-cpu.wait_perc',
  63     'ellis-load.avg_1_min',
  64     'ellis-mem.free_mb',
  65     'ellis-net.in_bytes_sec',
  66     'ellis-net.out_packets_sec',
  67     'Label']
  68
  69 features = df_Ellis[features_3]
  70 features.index = df_Ellis['Timestamp']
  71 features.head()
  72
  73 """Plotted features"""
  74
  75 features.plot(subplots=True)
  76
  77 features = features.values
  78
  79 # standardize data
  80 train_split = 141600
  81 tf.random.set_seed(13)
  82
  83 # standardize data
  84 features_mean = features[:train_split].mean()
  85 features_std = features[:train_split].std()
  86 features = (features - features_mean) / features_std
  87
  88 print(type(features))
  89 print(features.shape)
  90
  91 # create mutlivariate data
  92
  93
  94 def mutlivariate_data(
  95         features,
  96         target,
  97         start_idx,
  98         end_idx,
  99         history_size,
 100         target_size,
 101         step,
 102         single_step=False):
 103     data = []
 104     labels = []
 105     start_idx = start_idx + history_size
 106     if end_idx is None:
 107         end_idx = len(features) - target_size
 108     for i in range(start_idx, end_idx):
 109         idxs = range(i - history_size, i, step)  # using step
 110         data.append(features[idxs])
 111         if single_step:
 112             labels.append(target[i + target_size])
 113         else:
 114             labels.append(target[i:i + target_size])
 115
 116     return np.array(data), np.array(labels)
 117
 118
 119 """We spliited the multivariate data in tarining and validation and printed the shape of that data."""
 120
 121 # generate multivariate data
 122
 123 history = 720
 124 future_target = 72
 125 STEP = 6
 126
 127 x_train_ss, y_train_ss = mutlivariate_data(
 128     features, features[:, 1], 0, train_split, history, future_target, STEP, single_step=True)
 129
 130 x_val_ss, y_val_ss = mutlivariate_data(features, features[:, 1], train_split, None, history,
 131                                        future_target, STEP, single_step=True)
 132
 133 print(x_train_ss.shape, y_train_ss.shape)
 134 print(x_val_ss.shape, y_val_ss.shape)
 135
 136 """The `tf.data.Dataset` API supports writing descriptive and efficient input pipelines. Dataset usage following a common pattern:
 137 - Creating a source dataset from our input data.
 138 - Applied dataset transformations to preprocess the data.
 139 - Iterate over the dataset and process the elements.
 140 Note: Iteration happens in a streaming fashion, so the full dataset does not need to fit into memory.
 141 Once we have a dataset, we can apply transformations to prepare the data for our model:
 142 """
 143
 144 # tensorflow dataset
 145 batch_size = 256
 146 buffer_size = 10000
 147
 148 train_ss = tf.data.Dataset.from_tensor_slices((x_train_ss, y_train_ss))
 149 train_ss = train_ss.cache().shuffle(buffer_size).batch(batch_size).repeat()
 150
 151 val_ss = tf.data.Dataset.from_tensor_slices((x_val_ss, y_val_ss))
 152 val_ss = val_ss.cache().shuffle(buffer_size).batch(batch_size).repeat()
 153
 154 print(train_ss)
 155 print(val_ss)
 156
 157 """We used a custom loss function to evaluate the model:"""
 158
 159
 160 def root_mean_squared_error(y_true, y_pred):
 161     return K.sqrt(K.mean(K.square(y_pred - y_true)))
 162
 163
 164 """We are building a single step LSTM model for training data with dropout 0.3 and we used ADAM optimizers."""
 165
 166 # Modelling using LSTM
 167 steps = 50
 168
 169 EPOCHS = 20
 170
 171 single_step_model = tf.keras.models.Sequential()
 172
 173 single_step_model.add(tf.keras.layers.LSTM(
 174     32, return_sequences=False, input_shape=x_train_ss.shape[-2:]))
 175 single_step_model.add(tf.keras.layers.Dropout(0.3))
 176 single_step_model.add(tf.keras.layers.Dense(1))
 177 single_step_model.compile(
 178     optimizer=tf.keras.optimizers.Adam(),
 179     loss='mae',
 180     metrics=[
 181         tf.keras.metrics.RootMeanSquaredError(
 182             name='rmse')])
 183 #single_step_model.compile(loss='mse', optimizer='rmsprop')
 184 single_step_model_history = single_step_model.fit(
 185     train_ss,
 186     epochs=EPOCHS,
 187     steps_per_epoch=steps,
 188     validation_data=val_ss,
 189     validation_steps=50)
 190 single_step_model.summary()
 191 plot_model(
 192     single_step_model,
 193     to_file='/content/drive/MyDrive/Failure/lstm/LSTM.png',
 194     show_shapes=True,
 195     show_layer_names=True)
 196
 197 """We defined the `plot_loss` function to plot the train and test loss"""
 198
 199 # plot train test loss
 200
 201
 202 def plot_loss(history, title):
 203     loss = history.history['loss']
 204     val_loss = history.history['val_loss']
 205
 206     epochs = range(len(loss))
 207     plt.figure()
 208     plt.plot(epochs, loss, 'b', label='Train Loss')
 209     plt.plot(epochs, val_loss, 'r', label='Validation Loss')
 210     plt.title(title)
 211     plt.legend()
 212     plt.grid()
 213     plt.show()
 214
 215
 216 plot_loss(single_step_model_history,
 217           'Single Step Training and validation loss')
 218
 219 # plot train test loss
 220
 221
 222 def plot_loss(history, title):
 223     loss = history.history['rmse']
 224     val_loss = history.history['val_rmse']
 225
 226     epochs = range(len(loss))
 227     plt.figure()
 228     plt.plot(epochs, loss, 'b', label='Train RMSE')
 229     plt.plot(epochs, val_loss, 'r', label='Validation RMSE')
 230     plt.title(title)
 231     plt.legend()
 232     plt.grid()
 233     plt.show()
 234
 235
 236 plot_loss(single_step_model_history,
 237           'Single Step Training and validation loss')
 238
 239 """We defined a function `create_time_steps` to create time steps and function `plot_time_series` to plot the time series data"""
 240
 241 # fucntion to create time steps
 242
 243
 244 def create_time_steps(length):
 245     return list(range(-length, 0))
 246
 247 # function to plot time series data
 248
 249
 250 def plot_time_series(plot_data, delta, title):
 251     labels = ["History", 'True Future', 'Model Predcited']
 252     marker = ['.-', 'rx', 'go']
 253     time_steps = create_time_steps(plot_data[0].shape[0])
 254
 255     if delta:
 256         future = delta
 257     else:
 258         future = 0
 259     plt.title(title)
 260     for i, x in enumerate(plot_data):
 261         if i:
 262             plt.plot(
 263                 future,
 264                 plot_data[i],
 265                 marker[i],
 266                 markersize=10,
 267                 label=labels[i])
 268         else:
 269             plt.plot(
 270                 time_steps,
 271                 plot_data[i].flatten(),
 272                 marker[i],
 273                 label=labels[i])
 274     plt.legend()
 275     plt.xlim([time_steps[0], (future + 5) * 2])
 276
 277     plt.xlabel('Time_Step')
 278     return plt
 279
 280 # Moving window average
 281
 282
 283 def MWA(history):
 284     return np.mean(history)
 285
 286
 287 """We plotted the time series and predicted values"""
 288
 289 # plot time series and predicted values
 290
 291 for x, y in val_ss.take(5):
 292     plot = plot_time_series([x[0][:, 1].numpy(), y[0].numpy(),
 293                              single_step_model.predict(x)[0]], 12,
 294                             'Single Step Prediction')
 295     plot.show()
 296
 297 """# **MultiStep Forcasting**
 298
 299 We splitted the data in the form of training and validation for multistep forcasting:
 300 """
 301
 302 future_target = 72  # 72 future values
 303 x_train_multi, y_train_multi = mutlivariate_data(features, features[:, 1], 0,
 304                                                  train_split, history,
 305                                                  future_target, STEP)
 306 x_val_multi, y_val_multi = mutlivariate_data(features, features[:, 1],
 307                                              train_split, None, history,
 308                                              future_target, STEP)
 309
 310 print(x_train_multi.shape)
 311 print(y_train_multi.shape)
 312
 313 """The `tf.data.Dataset` API supports writing descriptive and efficient input pipelines. Dataset usage following a common pattern:
 314 - Creating a source dataset from our input data.
 315 - Applied dataset transformations to preprocess the data.
 316 - Iterate over the dataset and process the elements.
 317 Note: Iteration happens in a streaming fashion, so the full dataset does not need to fit into memory.
 318 Once we have a dataset, we can apply transformations to prepare the data for our model:
 319 """
 320
 321 #  TF DATASET
 322
 323 train_data_multi = tf.data.Dataset.from_tensor_slices(
 324     (x_train_multi, y_train_multi))
 325 train_data_multi = train_data_multi.cache().shuffle(
 326     buffer_size).batch(batch_size).repeat()
 327
 328 val_data_multi = tf.data.Dataset.from_tensor_slices((x_val_multi, y_val_multi))
 329 val_data_multi = val_data_multi.batch(batch_size).repeat()
 330
 331 print(train_data_multi)
 332 print(val_data_multi)
 333
 334 """We created a `multi_step_plot` function to plot between `history` and `true_future` data"""
 335
 336 # plotting function
 337
 338
 339 def multi_step_plot(history, true_future, prediction):
 340     plt.figure(figsize=(12, 6))
 341     num_in = create_time_steps(len(history))
 342     num_out = len(true_future)
 343     plt.grid()
 344     plt.plot(num_in, np.array(history[:, 1]), label='History')
 345     plt.plot(np.arange(num_out) / STEP, np.array(true_future), 'bo',
 346              label='True Future')
 347     if prediction.any():
 348         plt.plot(np.arange(num_out) / STEP, np.array(prediction), 'ro',
 349                  label='Predicted Future')
 350     plt.legend(loc='upper left')
 351     plt.show()
 352
 353
 354 for x, y in train_data_multi.take(1):
 355     multi_step_plot(x[0], y[0], np.array([0]))
 356
 357 """We are building a single step LSTM model for training data with dropout 0.3 and we used ADAM optimizers."""
 358
 359 multi_step_model = tf.keras.models.Sequential()
 360 multi_step_model.add(tf.keras.layers.LSTM(
 361     32, return_sequences=True, input_shape=x_train_multi.shape[-2:]))
 362 multi_step_model.add(tf.keras.layers.LSTM(16, activation='relu'))
 363 # aDD dropout layer (0.3)
 364 multi_step_model.add(tf.keras.layers.Dense(72))  # for 72 outputs
 365
 366 multi_step_model.compile(
 367     optimizer=tf.keras.optimizers.RMSprop(
 368         clipvalue=1.0), loss='mae', metrics=[
 369             tf.keras.metrics.RootMeanSquaredError(
 370                 name='rmse')])
 371
 372 multi_step_history = multi_step_model.fit(train_data_multi, epochs=EPOCHS,
 373                                           steps_per_epoch=steps,
 374                                           validation_data=val_data_multi,
 375                                           validation_steps=50)
 376
 377 plot_loss(multi_step_history, 'Multi-Step Training and validation loss')
 378
 379 for x, y in val_data_multi.take(5):
 380     multi_step_plot(x[0], y[0], multi_step_model.predict(x)[0])
 381
 382 scores = multi_step_model.evaluate(
 383     x_train_multi,
 384     y_train_multi,
 385     verbose=1,
 386     batch_size=200)
 387 print('MAE: {}'.format(scores[1]))
 388
 389 scores_test = multi_step_model.evaluate(
 390     x_val_multi, y_val_multi, verbose=1, batch_size=200)
 391 print('MAE: {}'.format(scores[1]))