models/failure_prediction/python/lstm_correlation.py

   1 # pylint: disable=C0103, C0116, W0621, E0401, W0104, W0105, R0913, E1136, W0612, E0102, C0301, W0611, C0411, W0311, W0404, E0602, C0326, C0330, W0106, C0412
   2 # -*- coding: utf-8 -*-
   3 """LSTM_correlation.ipynb
   4
   5 Automatically generated by Colaboratory.
   6
   7 Original file is located at
   8     https://colab.research.google.com/drive/1pDIYGV2-FR7QJEhCt9HxlJfeIeqw8xBj
   9
  10 Contributors: Rohit Singh Rathaur, Girish L.
  11
  12 Copyright 2021 [Rohit Singh Rathaur, BIT Mesra and Girish L., CIT GUBBI, Karnataka]
  13
  14 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at
  15
  16 http://www.apache.org/licenses/LICENSE-2.0
  17 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
  18 """
  19
  20 import os
  21 from keras.layers import Activation, Dense, Dropout
  22 import seaborn as sns
  23 import numpy as np
  24 import pandas as pd
  25 import matplotlib as mpl
  26 import matplotlib.pyplot as plt
  27 import tensorflow as tf
  28 from google.colab import drive
  29 drive.mount('/gdrive')
  30
  31 """We are importing the libraries:
  32
  33 - TensorFlow: to process and train the model
  34 - Matplotlib: to plot the training anf loss curves
  35 - Pandas: used for data analysis and it allows us to import data from various formats
  36 - Numpy: For array computing
  37 """
  38
  39 # Importing libraries
  40
  41 """We are reading the CSV file using `read_csv` function and storing it in a DataFrame named `df_Ellis`"""
  42
  43 df_Ellis = pd.read_csv(
  44     "/gdrive/MyDrive/LFN Anuket/Analysis/data/Final/Ellis_FinalTwoConditionwithOR.csv")
  45 df_Ellis
  46
  47 df_Ellis.plot()
  48
  49 # we show here the hist
  50 df_Ellis.hist(bins=100, figsize=(20, 15))
  51 # save_fig("attribute_histogram_plots")
  52 plt.show()
  53
  54 cpu_system_perc = df_Ellis[['ellis-cpu.system_perc']]
  55 cpu_system_perc.rolling(12).mean().plot(
  56     figsize=(20, 10), linewidth=5, fontsize=20)
  57 plt.xlabel('Timestamp', fontsize=30)
  58
  59 load_avg_1_min = df_Ellis[['ellis-load.avg_1_min']]
  60 load_avg_1_min.rolling(12).mean().plot(
  61     figsize=(20, 10), linewidth=5, fontsize=20)
  62 plt.xlabel('Timestamp', fontsize=30)
  63
  64 cpu_wait_perc = df_Ellis[['ellis-cpu.wait_perc']]
  65 cpu_wait_perc.rolling(12).mean().plot(
  66     figsize=(20, 10), linewidth=5, fontsize=20)
  67 plt.xlabel('Year', fontsize=30)
  68
  69 df_dg = pd.concat([cpu_system_perc.rolling(12).mean(), load_avg_1_min.rolling(
  70     12).mean(), cpu_wait_perc.rolling(12).mean()], axis=1)
  71 df_dg.plot(figsize=(20, 10), linewidth=5, fontsize=20)
  72 plt.xlabel('Year', fontsize=20)
  73
  74 # we establish the corrmartrice
  75 color = sns.color_palette()
  76 sns.set_style('darkgrid')
  77
  78 correaltionMatrice = df_Ellis.corr()
  79 f, ax = plt.subplots(figsize=(20, 10))
  80 sns.heatmap(
  81     correaltionMatrice,
  82     cbar=True,
  83     vmin=0,
  84     vmax=1,
  85     square=True,
  86     annot=True)
  87 plt.show()
  88
  89 df_Ellis.corrwith(df_Ellis['ellis-load.avg_1_min'])
  90
  91 # using multivariate feature
  92
  93 features_3 = [
  94     'ellis-cpu.wait_perc',
  95     'ellis-load.avg_1_min',
  96     'ellis-net.in_bytes_sec',
  97     'Label']
  98
  99 features = df_Ellis[features_3]
 100 features.index = df_Ellis['Timestamp']
 101 features.head()
 102
 103 features.plot(subplots=True)
 104
 105 features = features.values
 106
 107 # standardize data
 108 train_split = 141600
 109 tf.random.set_seed(13)
 110
 111 # standardize data
 112 features_mean = features[:train_split].mean()
 113 features_std = features[:train_split].std()
 114 features = (features - features_mean) / features_std
 115
 116 print(type(features))
 117 print(features.shape)
 118
 119 # create mutlivariate data
 120
 121
 122 def mutlivariate_data(
 123         features,
 124         target,
 125         start_idx,
 126         end_idx,
 127         history_size,
 128         target_size,
 129         step,
 130         single_step=False):
 131     data = []
 132     labels = []
 133     start_idx = start_idx + history_size
 134     if end_idx is None:
 135         end_idx = len(features) - target_size
 136     for i in range(start_idx, end_idx):
 137         idxs = range(i - history_size, i, step)  # using step
 138         data.append(features[idxs])
 139         if single_step:
 140             labels.append(target[i + target_size])
 141         else:
 142             labels.append(target[i:i + target_size])
 143
 144     return np.array(data), np.array(labels)
 145
 146 # generate multivariate data
 147
 148
 149 history = 720
 150 future_target = 72
 151 STEP = 6
 152
 153 x_train_ss, y_train_ss = mutlivariate_data(
 154     features, features[:, 1], 0, train_split, history, future_target, STEP, single_step=True)
 155
 156 x_val_ss, y_val_ss = mutlivariate_data(features, features[:, 1], train_split, None, history,
 157                                        future_target, STEP, single_step=True)
 158
 159 print(x_train_ss.shape, y_train_ss.shape)
 160 print(x_val_ss.shape, y_val_ss.shape)
 161
 162 # tensorflow dataset
 163 batch_size = 256
 164 buffer_size = 10000
 165
 166 train_ss = tf.data.Dataset.from_tensor_slices((x_train_ss, y_train_ss))
 167 train_ss = train_ss.cache().shuffle(buffer_size).batch(batch_size).repeat()
 168
 169 val_ss = tf.data.Dataset.from_tensor_slices((x_val_ss, y_val_ss))
 170 val_ss = val_ss.cache().shuffle(buffer_size).batch(batch_size).repeat()
 171
 172 print(train_ss)
 173 print(val_ss)
 174
 175
 176 def root_mean_squared_error(y_true, y_pred):
 177     return K.sqrt(K.mean(K.square(y_pred - y_true)))
 178
 179
 180 # Modelling using LSTM
 181 steps = 50
 182
 183 EPOCHS = 20
 184
 185 single_step_model = tf.keras.models.Sequential()
 186
 187 single_step_model.add(tf.keras.layers.LSTM(
 188     32, return_sequences=False, input_shape=x_train_ss.shape[-2:]))
 189 single_step_model.add(tf.keras.layers.Dropout(0.3))
 190 single_step_model.add(tf.keras.layers.Dense(1))
 191 single_step_model.compile(
 192     optimizer=tf.keras.optimizers.Adam(),
 193     loss='mae',
 194     metrics=[
 195         tf.keras.metrics.RootMeanSquaredError(
 196             name='rmse')])
 197 #single_step_model.compile(loss='mse', optimizer='rmsprop')
 198 single_step_model_history = single_step_model.fit(
 199     train_ss,
 200     epochs=EPOCHS,
 201     steps_per_epoch=steps,
 202     validation_data=val_ss,
 203     validation_steps=50)
 204 single_step_model.summary()
 205
 206 # plot train test loss
 207
 208
 209 def plot_loss(history, title):
 210     loss = history.history['loss']
 211     val_loss = history.history['val_loss']
 212
 213     epochs = range(len(loss))
 214     plt.figure()
 215     plt.plot(epochs, loss, 'b', label='Train Loss')
 216     plt.plot(epochs, val_loss, 'r', label='Validation Loss')
 217     plt.title(title)
 218     plt.legend()
 219     plt.grid()
 220     plt.show()
 221
 222
 223 plot_loss(single_step_model_history,
 224           'Single Step Training and validation loss')
 225
 226 # plot train test loss
 227
 228
 229 def plot_loss(history, title):
 230     loss = history.history['rmse']
 231     val_loss = history.history['val_rmse']
 232
 233     epochs = range(len(loss))
 234     plt.figure()
 235     plt.plot(epochs, loss, 'b', label='Train RMSE')
 236     plt.plot(epochs, val_loss, 'r', label='Validation RMSE')
 237     plt.title(title)
 238     plt.legend()
 239     plt.grid()
 240     plt.show()
 241
 242
 243 plot_loss(single_step_model_history,
 244           'Single Step Training and validation loss')
 245
 246 # fucntion to create time steps
 247
 248
 249 def create_time_steps(length):
 250     return list(range(-length, 0))
 251
 252 # function to plot time series data
 253
 254
 255 def plot_time_series(plot_data, delta, title):
 256     labels = ["History", 'True Future', 'Model Predcited']
 257     marker = ['.-', 'rx', 'go']
 258     time_steps = create_time_steps(plot_data[0].shape[0])
 259
 260     if delta:
 261         future = delta
 262     else:
 263         future = 0
 264     plt.title(title)
 265     for i, x in enumerate(plot_data):
 266         if i:
 267             plt.plot(
 268                 future,
 269                 plot_data[i],
 270                 marker[i],
 271                 markersize=10,
 272                 label=labels[i])
 273         else:
 274             plt.plot(
 275                 time_steps,
 276                 plot_data[i].flatten(),
 277                 marker[i],
 278                 label=labels[i])
 279     plt.legend()
 280     plt.xlim([time_steps[0], (future + 5) * 2])
 281
 282     plt.xlabel('Time_Step')
 283     return plt
 284
 285 # Moving window average
 286
 287
 288 def MWA(history):
 289     return np.mean(history)
 290
 291 # plot time series and predicted values
 292
 293
 294 for x, y in val_ss.take(5):
 295     plot = plot_time_series([x[0][:, 1].numpy(), y[0].numpy(),
 296                              single_step_model.predict(x)[0]], 12,
 297                             'Single Step Prediction')
 298     plot.show()
 299
 300 """# **MultiStep Forcasting**"""
 301
 302 future_target = 72  # 72 future values
 303 x_train_multi, y_train_multi = mutlivariate_data(features, features[:, 1], 0,
 304                                                  train_split, history,
 305                                                  future_target, STEP)
 306 x_val_multi, y_val_multi = mutlivariate_data(features, features[:, 1],
 307                                              train_split, None, history,
 308                                              future_target, STEP)
 309
 310 print(x_train_multi.shape)
 311 print(y_train_multi.shape)
 312
 313 #  TF DATASET
 314
 315 train_data_multi = tf.data.Dataset.from_tensor_slices(
 316     (x_train_multi, y_train_multi))
 317 train_data_multi = train_data_multi.cache().shuffle(
 318     buffer_size).batch(batch_size).repeat()
 319
 320 val_data_multi = tf.data.Dataset.from_tensor_slices((x_val_multi, y_val_multi))
 321 val_data_multi = val_data_multi.batch(batch_size).repeat()
 322
 323 print(train_data_multi)
 324 print(val_data_multi)
 325
 326 # plotting function
 327
 328
 329 def multi_step_plot(history, true_future, prediction):
 330     plt.figure(figsize=(12, 6))
 331     num_in = create_time_steps(len(history))
 332     num_out = len(true_future)
 333     plt.grid()
 334     plt.plot(num_in, np.array(history[:, 1]), label='History')
 335     plt.plot(np.arange(num_out) / STEP, np.array(true_future), 'bo',
 336              label='True Future')
 337     if prediction.any():
 338         plt.plot(np.arange(num_out) / STEP, np.array(prediction), 'ro',
 339                  label='Predicted Future')
 340     plt.legend(loc='upper left')
 341     plt.show()
 342
 343
 344 for x, y in train_data_multi.take(1):
 345     multi_step_plot(x[0], y[0], np.array([0]))
 346
 347 multi_step_model = tf.keras.models.Sequential()
 348 multi_step_model.add(tf.keras.layers.LSTM(
 349     32, return_sequences=True, input_shape=x_train_multi.shape[-2:]))
 350 multi_step_model.add(tf.keras.layers.LSTM(16, activation='relu'))
 351 # aDD dropout layer (0.3)
 352 multi_step_model.add(tf.keras.layers.Dense(72))  # for 72 outputs
 353
 354 multi_step_model.compile(
 355     optimizer=tf.keras.optimizers.RMSprop(
 356         clipvalue=1.0), loss='mae', metrics=[
 357             tf.keras.metrics.RootMeanSquaredError(
 358                 name='rmse')])
 359
 360 multi_step_history = multi_step_model.fit(train_data_multi, epochs=EPOCHS,
 361                                           steps_per_epoch=steps,
 362                                           validation_data=val_data_multi,
 363                                           validation_steps=50)
 364
 365 plot_loss(multi_step_history, 'Multi-Step Training and validation loss')
 366
 367 for x, y in val_data_multi.take(5):
 368     multi_step_plot(x[0], y[0], multi_step_model.predict(x)[0])
 369
 370 scores = multi_step_model.evaluate(
 371     x_train_multi,
 372     y_train_multi,
 373     verbose=1,
 374     batch_size=200)
 375 print('MAE: {}'.format(scores[1]))
 376
 377 scores_test = multi_step_model.evaluate(
 378     x_val_multi, y_val_multi, verbose=1, batch_size=200)
 379 print('MAE: {}'.format(scores[1]))