1 # pylint: disable=C0103, C0116, W0621, E0401, W0104, W0105, R0913, E1136, W0612, E0102, C0301, W0611, C0411, W0311, W0404, E0602, C0326, C0330, W0106, C0412
2 # -*- coding: utf-8 -*-
3 """LSTM_correlation.ipynb
5 Automatically generated by Colaboratory.
7 Original file is located at
8 https://colab.research.google.com/drive/1pDIYGV2-FR7QJEhCt9HxlJfeIeqw8xBj
10 Contributors: Rohit Singh Rathaur, Girish L.
12 Copyright 2021 [Rohit Singh Rathaur, BIT Mesra and Girish L., CIT GUBBI, Karnataka]
14 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at
16 http://www.apache.org/licenses/LICENSE-2.0
17 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
21 from keras.layers import Activation, Dense, Dropout
25 import matplotlib as mpl
26 import matplotlib.pyplot as plt
27 import tensorflow as tf
28 from google.colab import drive
29 drive.mount('/gdrive')
31 """We are importing the libraries:
33 - TensorFlow: to process and train the model
34 - Matplotlib: to plot the training anf loss curves
35 - Pandas: used for data analysis and it allows us to import data from various formats
36 - Numpy: For array computing
41 """We are reading the CSV file using `read_csv` function and storing it in a DataFrame named `df_Ellis`"""
43 df_Ellis = pd.read_csv(
44 "/gdrive/MyDrive/LFN Anuket/Analysis/data/Final/Ellis_FinalTwoConditionwithOR.csv")
49 # we show here the hist
50 df_Ellis.hist(bins=100, figsize=(20, 15))
51 # save_fig("attribute_histogram_plots")
54 cpu_system_perc = df_Ellis[['ellis-cpu.system_perc']]
55 cpu_system_perc.rolling(12).mean().plot(
56 figsize=(20, 10), linewidth=5, fontsize=20)
57 plt.xlabel('Timestamp', fontsize=30)
59 load_avg_1_min = df_Ellis[['ellis-load.avg_1_min']]
60 load_avg_1_min.rolling(12).mean().plot(
61 figsize=(20, 10), linewidth=5, fontsize=20)
62 plt.xlabel('Timestamp', fontsize=30)
64 cpu_wait_perc = df_Ellis[['ellis-cpu.wait_perc']]
65 cpu_wait_perc.rolling(12).mean().plot(
66 figsize=(20, 10), linewidth=5, fontsize=20)
67 plt.xlabel('Year', fontsize=30)
69 df_dg = pd.concat([cpu_system_perc.rolling(12).mean(), load_avg_1_min.rolling(
70 12).mean(), cpu_wait_perc.rolling(12).mean()], axis=1)
71 df_dg.plot(figsize=(20, 10), linewidth=5, fontsize=20)
72 plt.xlabel('Year', fontsize=20)
74 # we establish the corrmartrice
75 color = sns.color_palette()
76 sns.set_style('darkgrid')
78 correaltionMatrice = df_Ellis.corr()
79 f, ax = plt.subplots(figsize=(20, 10))
89 df_Ellis.corrwith(df_Ellis['ellis-load.avg_1_min'])
91 # using multivariate feature
94 'ellis-cpu.wait_perc',
95 'ellis-load.avg_1_min',
96 'ellis-net.in_bytes_sec',
99 features = df_Ellis[features_3]
100 features.index = df_Ellis['Timestamp']
103 features.plot(subplots=True)
105 features = features.values
109 tf.random.set_seed(13)
112 features_mean = features[:train_split].mean()
113 features_std = features[:train_split].std()
114 features = (features - features_mean) / features_std
116 print(type(features))
117 print(features.shape)
119 # create mutlivariate data
122 def mutlivariate_data(
133 start_idx = start_idx + history_size
135 end_idx = len(features) - target_size
136 for i in range(start_idx, end_idx):
137 idxs = range(i - history_size, i, step) # using step
138 data.append(features[idxs])
140 labels.append(target[i + target_size])
142 labels.append(target[i:i + target_size])
144 return np.array(data), np.array(labels)
146 # generate multivariate data
153 x_train_ss, y_train_ss = mutlivariate_data(
154 features, features[:, 1], 0, train_split, history, future_target, STEP, single_step=True)
156 x_val_ss, y_val_ss = mutlivariate_data(features, features[:, 1], train_split, None, history,
157 future_target, STEP, single_step=True)
159 print(x_train_ss.shape, y_train_ss.shape)
160 print(x_val_ss.shape, y_val_ss.shape)
166 train_ss = tf.data.Dataset.from_tensor_slices((x_train_ss, y_train_ss))
167 train_ss = train_ss.cache().shuffle(buffer_size).batch(batch_size).repeat()
169 val_ss = tf.data.Dataset.from_tensor_slices((x_val_ss, y_val_ss))
170 val_ss = val_ss.cache().shuffle(buffer_size).batch(batch_size).repeat()
176 def root_mean_squared_error(y_true, y_pred):
177 return K.sqrt(K.mean(K.square(y_pred - y_true)))
180 # Modelling using LSTM
185 single_step_model = tf.keras.models.Sequential()
187 single_step_model.add(tf.keras.layers.LSTM(
188 32, return_sequences=False, input_shape=x_train_ss.shape[-2:]))
189 single_step_model.add(tf.keras.layers.Dropout(0.3))
190 single_step_model.add(tf.keras.layers.Dense(1))
191 single_step_model.compile(
192 optimizer=tf.keras.optimizers.Adam(),
195 tf.keras.metrics.RootMeanSquaredError(
197 #single_step_model.compile(loss='mse', optimizer='rmsprop')
198 single_step_model_history = single_step_model.fit(
201 steps_per_epoch=steps,
202 validation_data=val_ss,
204 single_step_model.summary()
206 # plot train test loss
209 def plot_loss(history, title):
210 loss = history.history['loss']
211 val_loss = history.history['val_loss']
213 epochs = range(len(loss))
215 plt.plot(epochs, loss, 'b', label='Train Loss')
216 plt.plot(epochs, val_loss, 'r', label='Validation Loss')
223 plot_loss(single_step_model_history,
224 'Single Step Training and validation loss')
226 # plot train test loss
229 def plot_loss(history, title):
230 loss = history.history['rmse']
231 val_loss = history.history['val_rmse']
233 epochs = range(len(loss))
235 plt.plot(epochs, loss, 'b', label='Train RMSE')
236 plt.plot(epochs, val_loss, 'r', label='Validation RMSE')
243 plot_loss(single_step_model_history,
244 'Single Step Training and validation loss')
246 # fucntion to create time steps
249 def create_time_steps(length):
250 return list(range(-length, 0))
252 # function to plot time series data
255 def plot_time_series(plot_data, delta, title):
256 labels = ["History", 'True Future', 'Model Predcited']
257 marker = ['.-', 'rx', 'go']
258 time_steps = create_time_steps(plot_data[0].shape[0])
265 for i, x in enumerate(plot_data):
276 plot_data[i].flatten(),
280 plt.xlim([time_steps[0], (future + 5) * 2])
282 plt.xlabel('Time_Step')
285 # Moving window average
289 return np.mean(history)
291 # plot time series and predicted values
294 for x, y in val_ss.take(5):
295 plot = plot_time_series([x[0][:, 1].numpy(), y[0].numpy(),
296 single_step_model.predict(x)[0]], 12,
297 'Single Step Prediction')
300 """# **MultiStep Forcasting**"""
302 future_target = 72 # 72 future values
303 x_train_multi, y_train_multi = mutlivariate_data(features, features[:, 1], 0,
304 train_split, history,
306 x_val_multi, y_val_multi = mutlivariate_data(features, features[:, 1],
307 train_split, None, history,
310 print(x_train_multi.shape)
311 print(y_train_multi.shape)
315 train_data_multi = tf.data.Dataset.from_tensor_slices(
316 (x_train_multi, y_train_multi))
317 train_data_multi = train_data_multi.cache().shuffle(
318 buffer_size).batch(batch_size).repeat()
320 val_data_multi = tf.data.Dataset.from_tensor_slices((x_val_multi, y_val_multi))
321 val_data_multi = val_data_multi.batch(batch_size).repeat()
323 print(train_data_multi)
324 print(val_data_multi)
329 def multi_step_plot(history, true_future, prediction):
330 plt.figure(figsize=(12, 6))
331 num_in = create_time_steps(len(history))
332 num_out = len(true_future)
334 plt.plot(num_in, np.array(history[:, 1]), label='History')
335 plt.plot(np.arange(num_out) / STEP, np.array(true_future), 'bo',
338 plt.plot(np.arange(num_out) / STEP, np.array(prediction), 'ro',
339 label='Predicted Future')
340 plt.legend(loc='upper left')
344 for x, y in train_data_multi.take(1):
345 multi_step_plot(x[0], y[0], np.array([0]))
347 multi_step_model = tf.keras.models.Sequential()
348 multi_step_model.add(tf.keras.layers.LSTM(
349 32, return_sequences=True, input_shape=x_train_multi.shape[-2:]))
350 multi_step_model.add(tf.keras.layers.LSTM(16, activation='relu'))
351 # aDD dropout layer (0.3)
352 multi_step_model.add(tf.keras.layers.Dense(72)) # for 72 outputs
354 multi_step_model.compile(
355 optimizer=tf.keras.optimizers.RMSprop(
356 clipvalue=1.0), loss='mae', metrics=[
357 tf.keras.metrics.RootMeanSquaredError(
360 multi_step_history = multi_step_model.fit(train_data_multi, epochs=EPOCHS,
361 steps_per_epoch=steps,
362 validation_data=val_data_multi,
365 plot_loss(multi_step_history, 'Multi-Step Training and validation loss')
367 for x, y in val_data_multi.take(5):
368 multi_step_plot(x[0], y[0], multi_step_model.predict(x)[0])
370 scores = multi_step_model.evaluate(
375 print('MAE: {}'.format(scores[1]))
377 scores_test = multi_step_model.evaluate(
378 x_val_multi, y_val_multi, verbose=1, batch_size=200)
379 print('MAE: {}'.format(scores[1]))