1 # pylint: disable=C0103, C0116, W0621, E0401, W0104, W0105, R0913, E1136, W0612, E0102, C0301, W0611, C0411, W0311, W0404, E0602, C0326, C0330, W0106, C0412
2 # -*- coding: utf-8 -*-
3 """stacked_LSTM_Correlation.ipynb
5 Automatically generated by Colaboratory.
7 Original file is located at
8 https://colab.research.google.com/drive/1x8vGD105bcSgNTyC2sx0C3ixUsVPvDQ4
10 Contributors: **Rohit Singh Rathaur, Girish L.**
12 Copyright [2021](2021) [*Rohit Singh Rathaur, BIT Mesra and Girish L., CIT GUBBI, Karnataka*]
14 Licensed under the Apache License, Version 2.0 (the "License");
15 you may not use this file except in compliance with the License.
16 You may obtain a copy of the License at
18 http://www.apache.org/licenses/LICENSE-2.0
20 Unless required by applicable law or agreed to in writing, software
21 distributed under the License is distributed on an "AS IS" BASIS,
22 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
23 See the License for the specific language governing permissions and
24 limitations under the License.
27 from keras.layers import Activation, Dense, Dropout
32 import matplotlib as mpl
33 import matplotlib.pyplot as plt
34 import tensorflow as tf
35 from google.colab import drive
36 drive.mount('/gdrive')
40 df_Ellis = pd.read_csv(
41 "/gdrive/MyDrive/LFN Anuket/Analysis/data/Final/Ellis_FinalTwoConditionwithOR.csv")
46 # we show here the hist
47 df_Ellis.hist(bins=100, figsize=(20, 15))
48 # save_fig("attribute_histogram_plots")
51 cpu_system_perc = df_Ellis[['ellis-cpu.system_perc']]
52 cpu_system_perc.rolling(12).mean().plot(
53 figsize=(20, 10), linewidth=5, fontsize=20)
54 plt.xlabel('Timestamp', fontsize=30)
56 load_avg_1_min = df_Ellis[['ellis-load.avg_1_min']]
57 load_avg_1_min.rolling(12).mean().plot(
58 figsize=(20, 10), linewidth=5, fontsize=20)
59 plt.xlabel('Timestamp', fontsize=30)
61 cpu_wait_perc = df_Ellis[['ellis-cpu.wait_perc']]
62 cpu_wait_perc.rolling(12).mean().plot(
63 figsize=(20, 10), linewidth=5, fontsize=20)
64 plt.xlabel('Year', fontsize=30)
66 df_dg = pd.concat([cpu_system_perc.rolling(12).mean(), load_avg_1_min.rolling(
67 12).mean(), cpu_wait_perc.rolling(12).mean()], axis=1)
68 df_dg.plot(figsize=(20, 10), linewidth=5, fontsize=20)
69 plt.xlabel('Year', fontsize=20)
72 # we establish the corrmartrice
73 color = sns.color_palette()
74 sns.set_style('darkgrid')
76 correaltionMatrice = df_Ellis.corr()
77 f, ax = plt.subplots(figsize=(20, 10))
87 df_Ellis.corrwith(df_Ellis['ellis-load.avg_1_min'])
89 # using multivariate feature
92 'ellis-cpu.wait_perc',
93 'ellis-load.avg_1_min',
94 'ellis-net.in_bytes_sec',
97 features = df_Ellis[features_3]
98 features.index = df_Ellis['Timestamp']
101 features.plot(subplots=True)
103 features = features.values
107 tf.random.set_seed(13)
110 features_mean = features[:train_split].mean()
111 features_std = features[:train_split].std()
112 features = (features - features_mean) / features_std
114 print(type(features))
115 print(features.shape)
117 # create mutlivariate data
120 def mutlivariate_data(
131 start_idx = start_idx + history_size
133 end_idx = len(features) - target_size
134 for i in range(start_idx, end_idx):
135 idxs = range(i - history_size, i, step) # using step
136 data.append(features[idxs])
138 labels.append(target[i + target_size])
140 labels.append(target[i:i + target_size])
142 return np.array(data), np.array(labels)
144 # generate multivariate data
151 x_train_ss, y_train_ss = mutlivariate_data(
152 features, features[:, 1], 0, train_split, history, future_target, STEP, single_step=True)
154 x_val_ss, y_val_ss = mutlivariate_data(features, features[:, 1], train_split, None, history,
155 future_target, STEP, single_step=True)
157 print(x_train_ss.shape, y_train_ss.shape)
158 print(x_val_ss.shape, y_val_ss.shape)
164 train_ss = tf.data.Dataset.from_tensor_slices((x_train_ss, y_train_ss))
165 train_ss = train_ss.cache().shuffle(buffer_size).batch(batch_size).repeat()
167 val_ss = tf.data.Dataset.from_tensor_slices((x_val_ss, y_val_ss))
168 val_ss = val_ss.cache().shuffle(buffer_size).batch(batch_size).repeat()
174 def root_mean_squared_error(y_true, y_pred):
175 return K.sqrt(K.mean(K.square(y_pred - y_true)))
178 """## Why Increase Depth?
179 Stacking LSTM hidden layers makes the model deeper, more accurately earning the description as a deep learning technique. It is the depth of neural networks that is generally attributed to the success of the approach on a wide range of challenging prediction problems.
181 As Stacked LSTMs are now a stable technique for challenging sequence prediction problems. A Stacked LSTM architecture is defined as an LSTM model comprised of multiple LSTM layers. An LSTM layer above provides a sequence output rather than a single value output to the LSTM layer below. Specifically, one output per input time step, rather than one output time step for all input time steps.
183 We created Stacked LSTM model using Keras which is a Python deep learning library.
186 # Modelling using LSTM
191 single_step_model = tf.keras.models.Sequential()
193 single_step_model.add(tf.keras.layers.LSTM(
194 32, return_sequences=True, input_shape=x_train_ss.shape[-2:]))
195 single_step_model.add(tf.keras.layers.Dropout(0.3))
196 single_step_model.add(tf.keras.layers.LSTM(units=100, return_sequences=False))
197 single_step_model.add(tf.keras.layers.Dropout(0.2))
198 #model.add(Dense(units=1, activation='relu'))
199 single_step_model.add(tf.keras.layers.Activation("relu"))
200 single_step_model.add(tf.keras.layers.Dense(1))
201 single_step_model.compile(
202 optimizer=tf.keras.optimizers.Adam(),
205 tf.keras.metrics.RootMeanSquaredError(
207 #single_step_model.compile(loss='mse', optimizer='rmsprop')
208 single_step_model_history = single_step_model.fit(
211 steps_per_epoch=steps,
212 validation_data=val_ss,
216 single_step_model.summary()
218 # plot train test loss
221 def plot_loss(history, title):
222 loss = history.history['loss']
223 val_loss = history.history['val_loss']
225 epochs = range(len(loss))
227 plt.plot(epochs, loss, 'b', label='Train Loss')
228 plt.plot(epochs, val_loss, 'r', label='Validation Loss')
235 plot_loss(single_step_model_history,
236 'Single Step Training and validation loss')
238 # plot train test loss
241 def plot_loss(history, title):
242 loss = history.history['rmse']
243 val_loss = history.history['val_rmse']
245 epochs = range(len(loss))
247 plt.plot(epochs, loss, 'b', label='Train RMSE')
248 plt.plot(epochs, val_loss, 'r', label='Validation RMSE')
255 plot_loss(single_step_model_history,
256 'Single Step Training and validation loss')
258 # fucntion to create time steps
261 def create_time_steps(length):
262 return list(range(-length, 0))
264 # function to plot time series data
267 def plot_time_series(plot_data, delta, title):
268 labels = ["History", 'True Future', 'Model Predcited']
269 marker = ['.-', 'rx', 'go']
270 time_steps = create_time_steps(plot_data[0].shape[0])
277 for i, x in enumerate(plot_data):
288 plot_data[i].flatten(),
292 plt.xlim([time_steps[0], (future + 5) * 2])
294 plt.xlabel('Time_Step')
297 # Moving window average
301 return np.mean(history)
303 # plot time series and predicted values
306 for x, y in val_ss.take(5):
307 plot = plot_time_series([x[0][:, 1].numpy(), y[0].numpy(),
308 single_step_model.predict(x)[0]], 12,
309 'Single Step Prediction')
312 """# **MultiStep Forcasting**"""
314 future_target = 72 # 72 future values
315 x_train_multi, y_train_multi = mutlivariate_data(features, features[:, 1], 0,
316 train_split, history,
318 x_val_multi, y_val_multi = mutlivariate_data(features, features[:, 1],
319 train_split, None, history,
322 print(x_train_multi.shape)
323 print(y_train_multi.shape)
327 train_data_multi = tf.data.Dataset.from_tensor_slices(
328 (x_train_multi, y_train_multi))
329 train_data_multi = train_data_multi.cache().shuffle(
330 buffer_size).batch(batch_size).repeat()
332 val_data_multi = tf.data.Dataset.from_tensor_slices((x_val_multi, y_val_multi))
333 val_data_multi = val_data_multi.batch(batch_size).repeat()
335 print(train_data_multi)
336 print(val_data_multi)
341 def multi_step_plot(history, true_future, prediction):
342 plt.figure(figsize=(12, 6))
343 num_in = create_time_steps(len(history))
344 num_out = len(true_future)
346 plt.plot(num_in, np.array(history[:, 1]), label='History')
347 plt.plot(np.arange(num_out) / STEP, np.array(true_future), 'bo',
350 plt.plot(np.arange(num_out) / STEP, np.array(prediction), 'ro',
351 label='Predicted Future')
352 plt.legend(loc='upper left')
356 for x, y in train_data_multi.take(1):
357 multi_step_plot(x[0], y[0], np.array([0]))
359 multi_step_model = tf.keras.models.Sequential()
360 multi_step_model.add(tf.keras.layers.LSTM(
361 32, return_sequences=True, input_shape=x_train_multi.shape[-2:]))
362 multi_step_model.add(tf.keras.layers.Dropout(0.2))
363 multi_step_model.add(tf.keras.layers.LSTM(units=100, return_sequences=False))
364 multi_step_model.add(tf.keras.layers.Dropout(0.2))
365 #model.add(Dense(units=1, activation='relu'))
366 multi_step_model.add(tf.keras.layers.Activation("relu"))
367 # aDD dropout layer (0.3)
368 multi_step_model.add(tf.keras.layers.Dense(72)) # for 72 outputs
370 multi_step_model.compile(
371 optimizer=tf.keras.optimizers.RMSprop(
372 clipvalue=1.0), loss='mae', metrics=[
373 tf.keras.metrics.RootMeanSquaredError(
376 multi_step_history = multi_step_model.fit(train_data_multi, epochs=EPOCHS,
377 steps_per_epoch=steps,
378 validation_data=val_data_multi,
381 plot_loss(multi_step_history, 'Multi-Step Training and validation loss')
383 for x, y in val_data_multi.take(5):
384 multi_step_plot(x[0], y[0], multi_step_model.predict(x)[0])
386 scores = multi_step_model.evaluate(
391 print('MAE: {}'.format(scores[1]))
393 scores_test = multi_step_model.evaluate(
394 x_val_multi, y_val_multi, verbose=1, batch_size=200)
395 print('MAE: {}'.format(scores[1]))
397 y_pred_test = multi_step_model.predict(x_val_multi, verbose=0)
399 plt.figure(figsize=(10, 5))
400 plt.plot(y_pred_test)
401 plt.plot(y_val_multi)
403 plt.xlabel("Unit Number")
404 plt.legend(loc='upper left')