1 # pylint: disable=C0103, C0116, W0621, E0401, W0104, W0105, R0913, E1136, W0612, E0102, C0301, W0611, C0411, W0311, C0326, C0330
2 # -*- coding: utf-8 -*-
5 Automatically generated by Colaboratory.
7 Original file is located at
8 https://colab.research.google.com/drive/1W8WsMl3qckYG9Xa2CUiA-RU3322whQUf
10 Contributors: **Rohit Singh Rathaur, Girish L.**
12 Copyright [2021](2021) [*Rohit Singh Rathaur, BIT Mesra and Girish L., CIT GUBBI, Karnataka*]
14 Licensed under the Apache License, Version 2.0 (the "License");
15 you may not use this file except in compliance with the License.
16 You may obtain a copy of the License at
18 http://www.apache.org/licenses/LICENSE-2.0
20 Unless required by applicable law or agreed to in writing, software
21 distributed under the License is distributed on an "AS IS" BASIS,
22 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
23 See the License for the specific language governing permissions and
24 limitations under the License.
27 from keras import backend as K
28 from keras.layers import Dense
29 from keras.layers.convolutional import MaxPooling1D
30 from keras.layers.convolutional import Conv1D
31 from keras.layers import Flatten
32 from keras.utils.vis_utils import plot_model
37 import matplotlib as mpl
38 import matplotlib.pyplot as plt
39 import tensorflow as tf
40 from google.colab import drive
41 drive.mount('/content/drive')
45 df_Ellis = pd.read_csv(
46 "/content/drive/MyDrive/Failure/lstm/Ellis_FinalTwoConditionwithOR.csv")
51 # we show here the hist
52 df_Ellis.hist(bins=100, figsize=(20, 15))
53 # save_fig("attribute_histogram_plots")
56 cpu_system_perc = df_Ellis[['ellis-cpu.system_perc']]
57 cpu_system_perc.rolling(12).mean().plot(
58 figsize=(20, 10), linewidth=5, fontsize=20)
59 plt.xlabel('Timestamp', fontsize=30)
61 load_avg_1_min = df_Ellis[['ellis-load.avg_1_min']]
62 load_avg_1_min.rolling(12).mean().plot(
63 figsize=(20, 10), linewidth=5, fontsize=20)
64 plt.xlabel('Timestamp', fontsize=30)
66 cpu_wait_perc = df_Ellis[['ellis-cpu.wait_perc']]
67 cpu_wait_perc.rolling(12).mean().plot(
68 figsize=(20, 10), linewidth=5, fontsize=20)
69 plt.xlabel('Year', fontsize=30)
71 df_dg = pd.concat([cpu_system_perc.rolling(12).mean(), load_avg_1_min.rolling(
72 12).mean(), cpu_wait_perc.rolling(12).mean()], axis=1)
73 df_dg.plot(figsize=(20, 10), linewidth=5, fontsize=20)
74 plt.xlabel('Year', fontsize=20)
77 # we establish the corrmartrice
78 color = sns.color_palette()
79 sns.set_style('darkgrid')
81 correaltionMatrice = df_Ellis.corr()
82 f, ax = plt.subplots(figsize=(20, 10))
92 df_Ellis.corrwith(df_Ellis['ellis-load.avg_1_min'])
94 # using multivariate feature
97 'ellis-cpu.wait_perc',
98 'ellis-load.avg_1_min',
99 'ellis-net.in_bytes_sec',
102 features = df_Ellis[features_3]
103 features.index = df_Ellis['Timestamp']
106 features.plot(subplots=True)
108 features = features.values
112 tf.random.set_seed(13)
115 features_mean = features[:train_split].mean()
116 features_std = features[:train_split].std()
117 features = (features - features_mean) / features_std
119 print(type(features))
120 print(features.shape)
122 # create mutlivariate data
125 def mutlivariate_data(features, target, start_idx, end_idx, history_size, target_size,
126 step, single_step=False):
129 start_idx = start_idx + history_size
131 end_idx = len(features) - target_size
132 for i in range(start_idx, end_idx):
133 idxs = range(i - history_size, i, step) # using step
134 data.append(features[idxs])
136 labels.append(target[i + target_size])
138 labels.append(target[i:i + target_size])
140 return np.array(data), np.array(labels)
142 # generate multivariate data
149 x_train_ss, y_train_ss = mutlivariate_data(features, features[:, 1], 0, train_split, history,
150 future_target, STEP, single_step=True)
152 x_val_ss, y_val_ss = mutlivariate_data(features, features[:, 1], train_split, None, history,
153 future_target, STEP, single_step=True)
155 print(x_train_ss.shape, y_train_ss.shape)
156 print(x_val_ss.shape, y_val_ss.shape)
162 train_ss = tf.data.Dataset.from_tensor_slices((x_train_ss, y_train_ss))
163 train_ss = train_ss.cache().shuffle(buffer_size).batch(batch_size).repeat()
165 val_ss = tf.data.Dataset.from_tensor_slices((x_val_ss, y_val_ss))
166 val_ss = val_ss.cache().shuffle(buffer_size).batch(batch_size).repeat()
172 def root_mean_squared_error(y_true, y_pred):
173 return K.sqrt(K.mean(K.square(y_pred - y_true)))
176 # Modelling using LSTM
181 single_step_model = tf.keras.models.Sequential()
183 single_step_model.add(Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=(1, 48)))
184 single_step_model.add(MaxPooling1D(pool_size=2))
185 single_step_model.add(Flatten())
186 single_step_model.add(Dense(50, activation='relu'))
187 single_step_model.add(Dense(1))
188 single_step_model.compile(
189 optimizer='adam', loss='mae', metrics=[
190 tf.keras.metrics.RootMeanSquaredError(
195 # single_step_model.add(tf.keras.layers.LSTM(32, return_sequences=False, input_shape = x_train_ss.shape[-2:]))
196 # single_step_model.add(tf.keras.layers.Dropout(0.3))
197 # single_step_model.add(tf.keras.layers.Dense(1))
198 # single_step_model.compile(optimizer = tf.keras.optimizers.Adam(), loss = 'mae',metrics=[tf.keras.metrics.RootMeanSquaredError(name='rmse')])
199 # single_step_model.compile(loss='mse', optimizer='rmsprop')
200 single_step_model_history=single_step_model.fit(train_ss, epochs=EPOCHS,
201 steps_per_epoch=steps, validation_data=val_ss,
203 single_step_model.summary()
206 to_file='/content/drive/MyDrive/Failure/lstm/CNN-LSTM.png',
208 show_layer_names=True)
210 # plot train test loss
212 def plot_loss(history, title):
213 loss=history.history['loss']
214 val_loss=history.history['val_loss']
216 epochs=range(len(loss))
218 plt.plot(epochs, loss, 'b', label='Train Loss')
219 plt.plot(epochs, val_loss, 'r', label='Validation Loss')
225 plot_loss(single_step_model_history,
226 'Single Step Training and validation loss')
228 # plot train test loss
230 def plot_loss(history, title):
231 loss=history.history['rmse']
232 val_loss=history.history['val_rmse']
234 epochs=range(len(loss))
236 plt.plot(epochs, loss, 'b', label='Train RMSE')
237 plt.plot(epochs, val_loss, 'r', label='Validation RMSE')
243 plot_loss(single_step_model_history,
244 'Single Step Training and validation loss')
246 # fucntion to create time steps
247 def create_time_steps(length):
248 return list(range(-length, 0))
250 # function to plot time series data
252 def plot_time_series(plot_data, delta, title):
253 labels=["History", 'True Future', 'Model Predcited']
254 marker=['.-', 'rx', 'go']
255 time_steps=create_time_steps(plot_data[0].shape[0])
262 for i, x in enumerate(plot_data):
264 plt.plot(future, plot_data[i], marker[i], markersize=10, label=labels[i])
266 plt.plot(time_steps, plot_data[i].flatten(), marker[i], label=labels[i])
268 plt.xlim([time_steps[0], (future + 5) * 2])
270 plt.xlabel('Time_Step')
273 # Moving window average
276 return np.mean(history)
278 # plot time series and predicted values
280 for x, y in val_ss.take(5):
281 plot=plot_time_series([x[0][:, 1].numpy(), y[0].numpy(),
282 single_step_model.predict(x)[0]], 12,
283 'Single Step Prediction')
286 """# **MultiStep Forcasting**"""
288 future_target=72 # 72 future values
289 x_train_multi, y_train_multi=mutlivariate_data(features, features[:, 1], 0,
290 train_split, history,
292 x_val_multi, y_val_multi=mutlivariate_data(features, features[:, 1],
293 train_split, None, history,
296 print(x_train_multi.shape)
297 print(y_train_multi.shape)
301 train_data_multi=tf.data.Dataset.from_tensor_slices(
302 (x_train_multi, y_train_multi))
303 train_data_multi=train_data_multi.cache().shuffle(
304 buffer_size).batch(batch_size).repeat()
306 val_data_multi=tf.data.Dataset.from_tensor_slices((x_val_multi, y_val_multi))
307 val_data_multi=val_data_multi.batch(batch_size).repeat()
309 print(train_data_multi)
310 print(val_data_multi)
313 def multi_step_plot(history, true_future, prediction):
314 plt.figure(figsize=(12, 6))
315 num_in=create_time_steps(len(history))
316 num_out=len(true_future)
318 plt.plot(num_in, np.array(history[:, 1]), label='History')
319 plt.plot(np.arange(num_out) / STEP, np.array(true_future), 'bo',
322 plt.plot(np.arange(num_out) / STEP, np.array(prediction), 'ro',
323 label='Predicted Future')
324 plt.legend(loc='upper left')
329 for x, y in train_data_multi.take(1):
330 multi_step_plot(x[0], y[0], np.array([0]))
332 multi_step_model=tf.keras.models.Sequential()
335 multi_step_model.add(Conv1D(filters=64, kernel_size=2,
336 activation='relu', input_shape=x_train_ss.shape[-2:]))
337 multi_step_model.add(MaxPooling1D(pool_size=2))
338 multi_step_model.add(Flatten())
339 multi_step_model.add(Dense(50, activation='relu'))
340 multi_step_model.add(Dense(1))
341 multi_step_model.compile(
342 optimizer='adam', loss='mae', metrics=[
343 tf.keras.metrics.RootMeanSquaredError(
347 # multi_step_model.add(tf.keras.layers.LSTM(32,
348 # return_sequences=True,
349 # input_shape=x_train_multi.shape[-2:]))
350 # multi_step_model.add(tf.keras.layers.LSTM(16, activation='relu'))
351 # aDD dropout layer (0.3)
352 # multi_step_model.add(tf.keras.layers.Dense(72)) # for 72 outputs
354 # multi_step_model.compile(optimizer=tf.keras.optimizers.RMSprop(clipvalue=1.0),
355 # loss='mae',metrics=[tf.keras.metrics.RootMeanSquaredError(name='rmse')])
357 multi_step_history=multi_step_model.fit(train_data_multi, epochs=EPOCHS,
358 steps_per_epoch=steps,
359 validation_data=val_data_multi,
362 plot_loss(multi_step_history, 'Multi-Step Training and validation loss')
364 for x, y in val_data_multi.take(5):
365 multi_step_plot(x[0], y[0], multi_step_model.predict(x)[0])
367 scores=multi_step_model.evaluate(
372 print('MAE: {}'.format(scores[1]))
374 scores_test=multi_step_model.evaluate(
375 x_val_multi, y_val_multi, verbose=1, batch_size=200)
376 print('MAE: {}'.format(scores[1]))