1 # pylint: disable=C0103, C0116, W0621, E0401, W0104, W0105, R0913, E1136, W0612, E0102, C0301, W0611, C0411, W0311, W0404, E0602, C0326, C0330, W0106, C0412
2 # -*- coding: utf-8 -*-
5 Automatically generated by Colaboratory.
7 Original file is located at
8 https://colab.research.google.com/drive/15natzoGkWnOqxZyzavAaRqBFrPNxzd35
10 Contributors: Rohit Singh Rathaur, Girish L.
12 Copyright 2021 [Rohit Singh Rathaur, BIT Mesra and Girish L., CIT GUBBI, Karnataka]
14 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at
16 http://www.apache.org/licenses/LICENSE-2.0
17 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
19 We mounted the drive to access the data from google drive
22 from keras.utils.vis_utils import plot_model
23 from keras.layers import Activation, Dense, Dropout
27 import matplotlib as mpl
28 import matplotlib.pyplot as plt
29 import tensorflow as tf
30 from google.colab import drive
31 drive.mount('/content/drive')
33 """We are importing the libraries:
34 - TensorFlow: to process and train the model
35 - Matplotlib: to plot the training anf loss curves
36 - Pandas: used for data analysis and it allows us to import data from various formats
37 - Numpy: For array computing
42 """We are reading the CSV file using `read_csv` function and storing it in a DataFrame named `df_Ellis`"""
44 df_Ellis = pd.read_csv(
45 "/content/drive/MyDrive/Failure/lstm/Ellis_FinalTwoConditionwithOR.csv")
48 """`plot()` function is used to draw points"""
52 """Using multivariate features:
53 - Storing only the multivariate features in a dataframe named `features_3`
54 - Extracting the Timestamp column from `df_Ellis` dataframe
55 - and combining it with the dataframe `features`
58 # using multivariate feature
61 'ellis-cpu.system_perc',
62 'ellis-cpu.wait_perc',
63 'ellis-load.avg_1_min',
65 'ellis-net.in_bytes_sec',
66 'ellis-net.out_packets_sec',
69 features = df_Ellis[features_3]
70 features.index = df_Ellis['Timestamp']
73 """Plotted features"""
75 features.plot(subplots=True)
77 features = features.values
81 tf.random.set_seed(13)
84 features_mean = features[:train_split].mean()
85 features_std = features[:train_split].std()
86 features = (features - features_mean) / features_std
91 # create mutlivariate data
94 def mutlivariate_data(
105 start_idx = start_idx + history_size
107 end_idx = len(features) - target_size
108 for i in range(start_idx, end_idx):
109 idxs = range(i - history_size, i, step) # using step
110 data.append(features[idxs])
112 labels.append(target[i + target_size])
114 labels.append(target[i:i + target_size])
116 return np.array(data), np.array(labels)
119 """We spliited the multivariate data in tarining and validation and printed the shape of that data."""
121 # generate multivariate data
127 x_train_ss, y_train_ss = mutlivariate_data(
128 features, features[:, 1], 0, train_split, history, future_target, STEP, single_step=True)
130 x_val_ss, y_val_ss = mutlivariate_data(features, features[:, 1], train_split, None, history,
131 future_target, STEP, single_step=True)
133 print(x_train_ss.shape, y_train_ss.shape)
134 print(x_val_ss.shape, y_val_ss.shape)
136 """The `tf.data.Dataset` API supports writing descriptive and efficient input pipelines. Dataset usage following a common pattern:
137 - Creating a source dataset from our input data.
138 - Applied dataset transformations to preprocess the data.
139 - Iterate over the dataset and process the elements.
140 Note: Iteration happens in a streaming fashion, so the full dataset does not need to fit into memory.
141 Once we have a dataset, we can apply transformations to prepare the data for our model:
148 train_ss = tf.data.Dataset.from_tensor_slices((x_train_ss, y_train_ss))
149 train_ss = train_ss.cache().shuffle(buffer_size).batch(batch_size).repeat()
151 val_ss = tf.data.Dataset.from_tensor_slices((x_val_ss, y_val_ss))
152 val_ss = val_ss.cache().shuffle(buffer_size).batch(batch_size).repeat()
157 """We used a custom loss function to evaluate the model:"""
160 def root_mean_squared_error(y_true, y_pred):
161 return K.sqrt(K.mean(K.square(y_pred - y_true)))
164 """We are building a single step LSTM model for training data with dropout 0.3 and we used ADAM optimizers."""
166 # Modelling using LSTM
171 single_step_model = tf.keras.models.Sequential()
173 single_step_model.add(tf.keras.layers.LSTM(
174 32, return_sequences=False, input_shape=x_train_ss.shape[-2:]))
175 single_step_model.add(tf.keras.layers.Dropout(0.3))
176 single_step_model.add(tf.keras.layers.Dense(1))
177 single_step_model.compile(
178 optimizer=tf.keras.optimizers.Adam(),
181 tf.keras.metrics.RootMeanSquaredError(
183 #single_step_model.compile(loss='mse', optimizer='rmsprop')
184 single_step_model_history = single_step_model.fit(
187 steps_per_epoch=steps,
188 validation_data=val_ss,
190 single_step_model.summary()
193 to_file='/content/drive/MyDrive/Failure/lstm/LSTM.png',
195 show_layer_names=True)
197 """We defined the `plot_loss` function to plot the train and test loss"""
199 # plot train test loss
202 def plot_loss(history, title):
203 loss = history.history['loss']
204 val_loss = history.history['val_loss']
206 epochs = range(len(loss))
208 plt.plot(epochs, loss, 'b', label='Train Loss')
209 plt.plot(epochs, val_loss, 'r', label='Validation Loss')
216 plot_loss(single_step_model_history,
217 'Single Step Training and validation loss')
219 # plot train test loss
222 def plot_loss(history, title):
223 loss = history.history['rmse']
224 val_loss = history.history['val_rmse']
226 epochs = range(len(loss))
228 plt.plot(epochs, loss, 'b', label='Train RMSE')
229 plt.plot(epochs, val_loss, 'r', label='Validation RMSE')
236 plot_loss(single_step_model_history,
237 'Single Step Training and validation loss')
239 """We defined a function `create_time_steps` to create time steps and function `plot_time_series` to plot the time series data"""
241 # fucntion to create time steps
244 def create_time_steps(length):
245 return list(range(-length, 0))
247 # function to plot time series data
250 def plot_time_series(plot_data, delta, title):
251 labels = ["History", 'True Future', 'Model Predcited']
252 marker = ['.-', 'rx', 'go']
253 time_steps = create_time_steps(plot_data[0].shape[0])
260 for i, x in enumerate(plot_data):
271 plot_data[i].flatten(),
275 plt.xlim([time_steps[0], (future + 5) * 2])
277 plt.xlabel('Time_Step')
280 # Moving window average
284 return np.mean(history)
287 """We plotted the time series and predicted values"""
289 # plot time series and predicted values
291 for x, y in val_ss.take(5):
292 plot = plot_time_series([x[0][:, 1].numpy(), y[0].numpy(),
293 single_step_model.predict(x)[0]], 12,
294 'Single Step Prediction')
297 """# **MultiStep Forcasting**
299 We splitted the data in the form of training and validation for multistep forcasting:
302 future_target = 72 # 72 future values
303 x_train_multi, y_train_multi = mutlivariate_data(features, features[:, 1], 0,
304 train_split, history,
306 x_val_multi, y_val_multi = mutlivariate_data(features, features[:, 1],
307 train_split, None, history,
310 print(x_train_multi.shape)
311 print(y_train_multi.shape)
313 """The `tf.data.Dataset` API supports writing descriptive and efficient input pipelines. Dataset usage following a common pattern:
314 - Creating a source dataset from our input data.
315 - Applied dataset transformations to preprocess the data.
316 - Iterate over the dataset and process the elements.
317 Note: Iteration happens in a streaming fashion, so the full dataset does not need to fit into memory.
318 Once we have a dataset, we can apply transformations to prepare the data for our model:
323 train_data_multi = tf.data.Dataset.from_tensor_slices(
324 (x_train_multi, y_train_multi))
325 train_data_multi = train_data_multi.cache().shuffle(
326 buffer_size).batch(batch_size).repeat()
328 val_data_multi = tf.data.Dataset.from_tensor_slices((x_val_multi, y_val_multi))
329 val_data_multi = val_data_multi.batch(batch_size).repeat()
331 print(train_data_multi)
332 print(val_data_multi)
334 """We created a `multi_step_plot` function to plot between `history` and `true_future` data"""
339 def multi_step_plot(history, true_future, prediction):
340 plt.figure(figsize=(12, 6))
341 num_in = create_time_steps(len(history))
342 num_out = len(true_future)
344 plt.plot(num_in, np.array(history[:, 1]), label='History')
345 plt.plot(np.arange(num_out) / STEP, np.array(true_future), 'bo',
348 plt.plot(np.arange(num_out) / STEP, np.array(prediction), 'ro',
349 label='Predicted Future')
350 plt.legend(loc='upper left')
354 for x, y in train_data_multi.take(1):
355 multi_step_plot(x[0], y[0], np.array([0]))
357 """We are building a single step LSTM model for training data with dropout 0.3 and we used ADAM optimizers."""
359 multi_step_model = tf.keras.models.Sequential()
360 multi_step_model.add(tf.keras.layers.LSTM(
361 32, return_sequences=True, input_shape=x_train_multi.shape[-2:]))
362 multi_step_model.add(tf.keras.layers.LSTM(16, activation='relu'))
363 # aDD dropout layer (0.3)
364 multi_step_model.add(tf.keras.layers.Dense(72)) # for 72 outputs
366 multi_step_model.compile(
367 optimizer=tf.keras.optimizers.RMSprop(
368 clipvalue=1.0), loss='mae', metrics=[
369 tf.keras.metrics.RootMeanSquaredError(
372 multi_step_history = multi_step_model.fit(train_data_multi, epochs=EPOCHS,
373 steps_per_epoch=steps,
374 validation_data=val_data_multi,
377 plot_loss(multi_step_history, 'Multi-Step Training and validation loss')
379 for x, y in val_data_multi.take(5):
380 multi_step_plot(x[0], y[0], multi_step_model.predict(x)[0])
382 scores = multi_step_model.evaluate(
387 print('MAE: {}'.format(scores[1]))
389 scores_test = multi_step_model.evaluate(
390 x_val_multi, y_val_multi, verbose=1, batch_size=200)
391 print('MAE: {}'.format(scores[1]))