models/failure_prediction/python/cnn.py

   1 # pylint: disable=C0103, C0116, W0621, E0401, W0104, W0105, R0913, E1136, W0612, E0102, C0301, W0611, C0411, W0311, C0326, C0330
   2 # -*- coding: utf-8 -*-
   3 """CNN.ipynb
   4
   5 Automatically generated by Colaboratory.
   6
   7 Original file is located at
   8     https://colab.research.google.com/drive/1W8WsMl3qckYG9Xa2CUiA-RU3322whQUf
   9
  10 Contributors: **Rohit Singh Rathaur, Girish L.**
  11
  12 Copyright [2021](2021) [*Rohit Singh Rathaur, BIT Mesra and Girish L., CIT GUBBI, Karnataka*]
  13
  14 Licensed under the Apache License, Version 2.0 (the "License");
  15 you may not use this file except in compliance with the License.
  16 You may obtain a copy of the License at
  17
  18     http://www.apache.org/licenses/LICENSE-2.0
  19
  20 Unless required by applicable law or agreed to in writing, software
  21 distributed under the License is distributed on an "AS IS" BASIS,
  22 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  23 See the License for the specific language governing permissions and
  24 limitations under the License.
  25 """
  26
  27 from keras import backend as K
  28 from keras.layers import Dense
  29 from keras.layers.convolutional import MaxPooling1D
  30 from keras.layers.convolutional import Conv1D
  31 from keras.layers import Flatten
  32 from keras.utils.vis_utils import plot_model
  33 import seaborn as sns
  34 import os
  35 import numpy as np
  36 import pandas as pd
  37 import matplotlib as mpl
  38 import matplotlib.pyplot as plt
  39 import tensorflow as tf
  40 from google.colab import drive
  41 drive.mount('/content/drive')
  42
  43 # Importing libraries
  44
  45 df_Ellis = pd.read_csv(
  46     "/content/drive/MyDrive/Failure/lstm/Ellis_FinalTwoConditionwithOR.csv")
  47 df_Ellis
  48
  49 df_Ellis.plot()
  50
  51 # we show here the hist
  52 df_Ellis.hist(bins=100, figsize=(20, 15))
  53 # save_fig("attribute_histogram_plots")
  54 plt.show()
  55
  56 cpu_system_perc = df_Ellis[['ellis-cpu.system_perc']]
  57 cpu_system_perc.rolling(12).mean().plot(
  58     figsize=(20, 10), linewidth=5, fontsize=20)
  59 plt.xlabel('Timestamp', fontsize=30)
  60
  61 load_avg_1_min = df_Ellis[['ellis-load.avg_1_min']]
  62 load_avg_1_min.rolling(12).mean().plot(
  63     figsize=(20, 10), linewidth=5, fontsize=20)
  64 plt.xlabel('Timestamp', fontsize=30)
  65
  66 cpu_wait_perc = df_Ellis[['ellis-cpu.wait_perc']]
  67 cpu_wait_perc.rolling(12).mean().plot(
  68     figsize=(20, 10), linewidth=5, fontsize=20)
  69 plt.xlabel('Year', fontsize=30)
  70
  71 df_dg = pd.concat([cpu_system_perc.rolling(12).mean(), load_avg_1_min.rolling(
  72     12).mean(), cpu_wait_perc.rolling(12).mean()], axis=1)
  73 df_dg.plot(figsize=(20, 10), linewidth=5, fontsize=20)
  74 plt.xlabel('Year', fontsize=20)
  75
  76
  77 # we establish the corrmartrice
  78 color = sns.color_palette()
  79 sns.set_style('darkgrid')
  80
  81 correaltionMatrice = df_Ellis.corr()
  82 f, ax = plt.subplots(figsize=(20, 10))
  83 sns.heatmap(
  84     correaltionMatrice,
  85     cbar=True,
  86     vmin=0,
  87     vmax=1,
  88     square=True,
  89      annot=True)
  90 plt.show()
  91
  92 df_Ellis.corrwith(df_Ellis['ellis-load.avg_1_min'])
  93
  94 # using multivariate feature
  95
  96 features_3 = [
  97     'ellis-cpu.wait_perc',
  98     'ellis-load.avg_1_min',
  99     'ellis-net.in_bytes_sec',
 100      'Label']
 101
 102 features = df_Ellis[features_3]
 103 features.index = df_Ellis['Timestamp']
 104 features.head()
 105
 106 features.plot(subplots=True)
 107
 108 features = features.values
 109
 110 # standardize data
 111 train_split = 141600
 112 tf.random.set_seed(13)
 113
 114 # standardize data
 115 features_mean = features[:train_split].mean()
 116 features_std = features[:train_split].std()
 117 features = (features - features_mean) / features_std
 118
 119 print(type(features))
 120 print(features.shape)
 121
 122 # create mutlivariate data
 123
 124
 125 def mutlivariate_data(features, target, start_idx, end_idx, history_size, target_size,
 126                       step, single_step=False):
 127   data = []
 128   labels = []
 129   start_idx = start_idx + history_size
 130   if end_idx is None:
 131     end_idx = len(features) - target_size
 132   for i in range(start_idx, end_idx):
 133     idxs = range(i - history_size, i, step)  # using step
 134     data.append(features[idxs])
 135     if single_step:
 136       labels.append(target[i + target_size])
 137     else:
 138       labels.append(target[i:i + target_size])
 139
 140   return np.array(data), np.array(labels)
 141
 142 # generate multivariate data
 143
 144
 145 history = 720
 146 future_target = 72
 147 STEP = 6
 148
 149 x_train_ss, y_train_ss = mutlivariate_data(features, features[:, 1], 0, train_split, history,
 150                                             future_target, STEP, single_step=True)
 151
 152 x_val_ss, y_val_ss = mutlivariate_data(features, features[:, 1], train_split, None, history,
 153                                         future_target, STEP, single_step=True)
 154
 155 print(x_train_ss.shape, y_train_ss.shape)
 156 print(x_val_ss.shape, y_val_ss.shape)
 157
 158 # tensorflow dataset
 159 batch_size = 256
 160 buffer_size = 10000
 161
 162 train_ss = tf.data.Dataset.from_tensor_slices((x_train_ss, y_train_ss))
 163 train_ss = train_ss.cache().shuffle(buffer_size).batch(batch_size).repeat()
 164
 165 val_ss = tf.data.Dataset.from_tensor_slices((x_val_ss, y_val_ss))
 166 val_ss = val_ss.cache().shuffle(buffer_size).batch(batch_size).repeat()
 167
 168 print(train_ss)
 169 print(val_ss)
 170
 171
 172 def root_mean_squared_error(y_true, y_pred):
 173         return K.sqrt(K.mean(K.square(y_pred - y_true)))
 174
 175
 176 # Modelling using LSTM
 177 steps = 50
 178
 179 EPOCHS = 20
 180
 181 single_step_model = tf.keras.models.Sequential()
 182
 183 single_step_model.add(Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=(1, 48)))
 184 single_step_model.add(MaxPooling1D(pool_size=2))
 185 single_step_model.add(Flatten())
 186 single_step_model.add(Dense(50, activation='relu'))
 187 single_step_model.add(Dense(1))
 188 single_step_model.compile(
 189     optimizer='adam', loss='mae', metrics=[
 190         tf.keras.metrics.RootMeanSquaredError(
 191             name='rmse')])
 192
 193
 194
 195 # single_step_model.add(tf.keras.layers.LSTM(32, return_sequences=False, input_shape = x_train_ss.shape[-2:]))
 196 # single_step_model.add(tf.keras.layers.Dropout(0.3))
 197 # single_step_model.add(tf.keras.layers.Dense(1))
 198 # single_step_model.compile(optimizer = tf.keras.optimizers.Adam(), loss = 'mae',metrics=[tf.keras.metrics.RootMeanSquaredError(name='rmse')])
 199 # single_step_model.compile(loss='mse', optimizer='rmsprop')
 200 single_step_model_history=single_step_model.fit(train_ss, epochs=EPOCHS,
 201                                                   steps_per_epoch=steps, validation_data=val_ss,
 202                                                   validation_steps=50)
 203 single_step_model.summary()
 204 plot_model(
 205     single_step_model,
 206     to_file='/content/drive/MyDrive/Failure/lstm/CNN-LSTM.png',
 207     show_shapes=True,
 208      show_layer_names=True)
 209
 210 # plot train test loss
 211
 212 def plot_loss(history, title):
 213   loss=history.history['loss']
 214   val_loss=history.history['val_loss']
 215
 216   epochs=range(len(loss))
 217   plt.figure()
 218   plt.plot(epochs, loss, 'b', label='Train Loss')
 219   plt.plot(epochs, val_loss, 'r', label='Validation Loss')
 220   plt.title(title)
 221   plt.legend()
 222   plt.grid()
 223   plt.show()
 224
 225 plot_loss(single_step_model_history,
 226      'Single Step Training and validation loss')
 227
 228 # plot train test loss
 229
 230 def plot_loss(history, title):
 231   loss=history.history['rmse']
 232   val_loss=history.history['val_rmse']
 233
 234   epochs=range(len(loss))
 235   plt.figure()
 236   plt.plot(epochs, loss, 'b', label='Train RMSE')
 237   plt.plot(epochs, val_loss, 'r', label='Validation RMSE')
 238   plt.title(title)
 239   plt.legend()
 240   plt.grid()
 241   plt.show()
 242
 243 plot_loss(single_step_model_history,
 244      'Single Step Training and validation loss')
 245
 246 # fucntion to create time steps
 247 def create_time_steps(length):
 248   return list(range(-length, 0))
 249
 250 # function to plot time series data
 251
 252 def plot_time_series(plot_data, delta, title):
 253   labels=["History", 'True Future', 'Model Predcited']
 254   marker=['.-', 'rx', 'go']
 255   time_steps=create_time_steps(plot_data[0].shape[0])
 256
 257   if delta:
 258     future=delta
 259   else:
 260     future=0
 261   plt.title(title)
 262   for i, x in enumerate(plot_data):
 263     if i:
 264       plt.plot(future, plot_data[i], marker[i], markersize=10, label=labels[i])
 265     else:
 266       plt.plot(time_steps, plot_data[i].flatten(), marker[i], label=labels[i])
 267   plt.legend()
 268   plt.xlim([time_steps[0], (future + 5) * 2])
 269
 270   plt.xlabel('Time_Step')
 271   return plt
 272
 273 # Moving window average
 274
 275 def MWA(history):
 276   return np.mean(history)
 277
 278 # plot time series and predicted values
 279
 280 for x, y in val_ss.take(5):
 281   plot=plot_time_series([x[0][:, 1].numpy(), y[0].numpy(),
 282                     single_step_model.predict(x)[0]], 12,
 283                    'Single Step Prediction')
 284   plot.show()
 285
 286 """# **MultiStep Forcasting**"""
 287
 288 future_target=72  # 72 future values
 289 x_train_multi, y_train_multi=mutlivariate_data(features, features[:, 1], 0,
 290                                                  train_split, history,
 291                                                  future_target, STEP)
 292 x_val_multi, y_val_multi=mutlivariate_data(features, features[:, 1],
 293                                              train_split, None, history,
 294                                              future_target, STEP)
 295
 296 print(x_train_multi.shape)
 297 print(y_train_multi.shape)
 298
 299 #  TF DATASET
 300
 301 train_data_multi=tf.data.Dataset.from_tensor_slices(
 302     (x_train_multi, y_train_multi))
 303 train_data_multi=train_data_multi.cache().shuffle(
 304     buffer_size).batch(batch_size).repeat()
 305
 306 val_data_multi=tf.data.Dataset.from_tensor_slices((x_val_multi, y_val_multi))
 307 val_data_multi=val_data_multi.batch(batch_size).repeat()
 308
 309 print(train_data_multi)
 310 print(val_data_multi)
 311
 312 # plotting function
 313 def multi_step_plot(history, true_future, prediction):
 314   plt.figure(figsize=(12, 6))
 315   num_in=create_time_steps(len(history))
 316   num_out=len(true_future)
 317   plt.grid()
 318   plt.plot(num_in, np.array(history[:, 1]), label='History')
 319   plt.plot(np.arange(num_out) / STEP, np.array(true_future), 'bo',
 320            label='True Future')
 321   if prediction.any():
 322     plt.plot(np.arange(num_out) / STEP, np.array(prediction), 'ro',
 323              label='Predicted Future')
 324   plt.legend(loc='upper left')
 325   plt.show()
 326
 327
 328
 329 for x, y in train_data_multi.take(1):
 330   multi_step_plot(x[0], y[0], np.array([0]))
 331
 332 multi_step_model=tf.keras.models.Sequential()
 333
 334
 335 multi_step_model.add(Conv1D(filters=64, kernel_size=2,
 336                      activation='relu', input_shape=x_train_ss.shape[-2:]))
 337 multi_step_model.add(MaxPooling1D(pool_size=2))
 338 multi_step_model.add(Flatten())
 339 multi_step_model.add(Dense(50, activation='relu'))
 340 multi_step_model.add(Dense(1))
 341 multi_step_model.compile(
 342     optimizer='adam', loss='mae', metrics=[
 343         tf.keras.metrics.RootMeanSquaredError(
 344             name='rmse')])
 345
 346
 347 # multi_step_model.add(tf.keras.layers.LSTM(32,
 348  #                                         return_sequences=True,
 349   #                                        input_shape=x_train_multi.shape[-2:]))
 350 # multi_step_model.add(tf.keras.layers.LSTM(16, activation='relu'))
 351 # aDD dropout layer (0.3)
 352 # multi_step_model.add(tf.keras.layers.Dense(72)) # for 72 outputs
 353
 354 # multi_step_model.compile(optimizer=tf.keras.optimizers.RMSprop(clipvalue=1.0),
 355 # loss='mae',metrics=[tf.keras.metrics.RootMeanSquaredError(name='rmse')])
 356
 357 multi_step_history=multi_step_model.fit(train_data_multi, epochs=EPOCHS,
 358                                           steps_per_epoch=steps,
 359                                           validation_data=val_data_multi,
 360                                           validation_steps=50)
 361
 362 plot_loss(multi_step_history, 'Multi-Step Training and validation loss')
 363
 364 for x, y in val_data_multi.take(5):
 365   multi_step_plot(x[0], y[0], multi_step_model.predict(x)[0])
 366
 367 scores=multi_step_model.evaluate(
 368     x_train_multi,
 369     y_train_multi,
 370     verbose=1,
 371      batch_size=200)
 372 print('MAE: {}'.format(scores[1]))
 373
 374 scores_test=multi_step_model.evaluate(
 375     x_val_multi, y_val_multi, verbose=1, batch_size=200)
 376 print('MAE: {}'.format(scores[1]))