{ "cells": [ { "cell_type": "markdown", "metadata": { "id": "ywp6YMCFQKEQ" }, "source": [ "Contributors: **Rohit Singh Rathaur, Girish L.** \n", "\n", "Copyright [2021](2021) [*Rohit Singh Rathaur, BIT Mesra and Girish L., CIT GUBBI, Karnataka*]\n", "\n", "Licensed under the Apache License, Version 2.0 (the \"License\");\n", "you may not use this file except in compliance with the License.\n", "You may obtain a copy of the License at\n", "\n", " http://www.apache.org/licenses/LICENSE-2.0\n", "\n", "Unless required by applicable law or agreed to in writing, software\n", "distributed under the License is distributed on an \"AS IS\" BASIS,\n", "WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", "See the License for the specific language governing permissions and\n", "limitations under the License." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "YQ6lT1e2hrx4", "outputId": "21d3bdb5-0ac2-44a6-90ec-a3f2677014c4" }, "outputs": [], "source": [ "from google.colab import drive\n", "drive.mount('/content/drive')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "tLhroy5BnMnC" }, "outputs": [], "source": [ "# Importing libraries\n", "import tensorflow as tf\n", "import matplotlib.pyplot as plt\n", "import matplotlib as mpl\n", "import pandas as pd\n", "import numpy as np\n", "import os" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 419 }, "id": "2-UpMVsSnfCI", "outputId": "b6f5691d-1e76-43a4-de55-3156dbc02b4d" }, "outputs": [], "source": [ "df_Ellis = pd.read_csv(\"/content/drive/MyDrive/Failure/lstm/Ellis_FinalTwoConditionwithOR.csv\")\n", "df_Ellis" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 293 }, "id": "92xBt43BnjAo", "outputId": "ed2f1595-c32c-43eb-ae2f-b5fce96e9a92" }, "outputs": [], "source": [ "df_Ellis.plot()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 879 }, "id": "RSo-aa-SIoBR", "outputId": "63f0c61a-8a5a-41af-cbec-42fe0625b4d1" }, "outputs": [], "source": [ "# we show here the hist\n", "df_Ellis.hist(bins=100,figsize=(20,15))\n", "#save_fig(\"attribute_histogram_plots\")\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 634 }, "id": "gggaMJ_2LtFs", "outputId": "31ad3663-dfe7-4ac5-ddc1-bb5bd6e5bdfd" }, "outputs": [], "source": [ "cpu_system_perc = df_Ellis[['ellis-cpu.system_perc']] \n", "cpu_system_perc.rolling(12).mean().plot(figsize=(20,10), linewidth=5, fontsize=20) \n", "plt.xlabel('Timestamp', fontsize=30);" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 634 }, "id": "R_ctvXcQL1Xf", "outputId": "dbc14537-5eb4-4433-f71b-8ad22f551fe8" }, "outputs": [], "source": [ "load_avg_1_min = df_Ellis[['ellis-load.avg_1_min']] \n", "load_avg_1_min.rolling(12).mean().plot(figsize=(20,10), linewidth=5, fontsize=20) \n", "plt.xlabel('Timestamp', fontsize=30);" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 634 }, "id": "Gkd5ecCmL6Bw", "outputId": "af90f36e-850f-4ae6-f350-c7a55c2db05b" }, "outputs": [], "source": [ "cpu_wait_perc = df_Ellis[['ellis-cpu.wait_perc']] \n", "cpu_wait_perc.rolling(12).mean().plot(figsize=(20,10), linewidth=5, fontsize=20) \n", "plt.xlabel('Year', fontsize=30);" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 624 }, "id": "EycZrQU0MBSX", "outputId": "bcf199b4-b7f9-4a8c-ba2b-aff666038715" }, "outputs": [], "source": [ "df_dg = pd.concat([cpu_system_perc.rolling(12).mean(), load_avg_1_min.rolling(12).mean(),cpu_wait_perc.rolling(12).mean()], axis=1) \n", "df_dg.plot(figsize=(20,10), linewidth=5, fontsize=20) \n", "plt.xlabel('Year', fontsize=20); " ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "YoQA_MIBMknS" }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 710 }, "id": "Pi8UMMitMa3Q", "outputId": "189c418a-6688-4f58-c7f4-1354e235add7" }, "outputs": [], "source": [ "# we establish the corrmartrice\n", "import seaborn as sns\n", "color = sns.color_palette()\n", "sns.set_style('darkgrid')\n", "\n", "correaltionMatrice = df_Ellis.corr()\n", "f, ax = plt.subplots(figsize=(20, 10))\n", "sns.heatmap(correaltionMatrice, cbar=True, vmin=0, vmax=1, square=True, annot=True);\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "rkYwyKtXMvpy", "outputId": "708e7b7c-10ce-4d4a-ebbb-894567d167c9" }, "outputs": [], "source": [ "df_Ellis.corrwith(df_Ellis['ellis-load.avg_1_min'])" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 235 }, "id": "5oQK-ddinvCM", "outputId": "92bf698d-e2bd-4438-bd84-346da3cfa696" }, "outputs": [], "source": [ "## ## using multivariate feature \n", "\n", "features_3 = ['ellis-cpu.wait_perc', 'ellis-load.avg_1_min', 'ellis-net.in_bytes_sec', 'Label']\n", "\n", "features = df_Ellis[features_3]\n", "features.index = df_Ellis['Timestamp']\n", "features.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 386 }, "id": "qbqn755fo81g", "outputId": "91c28242-4c9a-4ce3-9649-501eafa9247e" }, "outputs": [], "source": [ "features.plot(subplots=True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "jJQD1x9psWCH" }, "outputs": [], "source": [ "features = features.values" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "xf8WCiykpUzN", "outputId": "c34ed8fa-edd0-41d0-e041-29a0853a3370" }, "outputs": [], "source": [ "### standardize data\n", "train_split = 141600\n", "tf.random.set_seed(13)\n", "\n", "### standardize data\n", "features_mean = features[:train_split].mean()\n", "features_std = features[:train_split].std()\n", "features = (features - features_mean)/ features_std\n", "\n", "print(type(features))\n", "print(features.shape)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "1a0hNDmppnLB" }, "outputs": [], "source": [ "### create mutlivariate data\n", "\n", "def mutlivariate_data(features , target , start_idx , end_idx , history_size , target_size,\n", " step , single_step = False):\n", " data = []\n", " labels = []\n", " start_idx = start_idx + history_size\n", " if end_idx is None:\n", " end_idx = len(features)- target_size\n", " for i in range(start_idx , end_idx ):\n", " idxs = range(i-history_size, i, step) ### using step\n", " data.append(features[idxs])\n", " if single_step:\n", " labels.append(target[i+target_size])\n", " else:\n", " labels.append(target[i:i+target_size])\n", "\n", " return np.array(data) , np.array(labels)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Z0CivgkitfgE", "outputId": "fd7abe27-940d-4f71-e581-5deb3ac42262" }, "outputs": [], "source": [ "### generate multivariate data\n", "\n", "history = 720\n", "future_target = 72\n", "STEP = 6\n", "\n", "x_train_ss , y_train_ss = mutlivariate_data(features , features[:, 1], 0, train_split, history,\n", " future_target, STEP , single_step = True)\n", "\n", "x_val_ss , y_val_ss = mutlivariate_data(features , features[:,1] , train_split , None , history ,\n", " future_target, STEP, single_step = True)\n", "\n", "print(x_train_ss.shape , y_train_ss.shape)\n", "print(x_val_ss.shape , y_val_ss.shape)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "VBdr2epGu3aq", "outputId": "3c3a50d3-6f47-400e-edd0-18a46154f7fc" }, "outputs": [], "source": [ "## tensorflow dataset\n", "batch_size = 256\n", "buffer_size = 10000\n", "\n", "train_ss = tf.data.Dataset.from_tensor_slices((x_train_ss, y_train_ss))\n", "train_ss = train_ss.cache().shuffle(buffer_size).batch(batch_size).repeat()\n", "\n", "val_ss = tf.data.Dataset.from_tensor_slices((x_val_ss, y_val_ss))\n", "val_ss = val_ss.cache().shuffle(buffer_size).batch(batch_size).repeat()\n", "\n", "print(train_ss)\n", "print(val_ss)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "9eQpwUyGglu_" }, "outputs": [], "source": [ "def root_mean_squared_error(y_true, y_pred):\n", " return K.sqrt(K.mean(K.square(y_pred - y_true))) " ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "1cKtTAzqyiyL" }, "outputs": [], "source": [ "from keras.layers import Activation, Dense, Dropout\n", "from keras.utils.vis_utils import plot_model\n", "from keras.layers import Flatten\n", "from keras.layers.convolutional import Conv1D\n", "from keras.layers.convolutional import MaxPooling1D\n", "### Modelling using LSTM\n", "steps = 50\n", "\n", "EPOCHS =20\n", "\n", "single_step_model = tf.keras.models.Sequential()\n", "\n", "single_step_model.add(Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=(1,48))\n", "single_step_model.add(MaxPooling1D(pool_size=2))\n", "single_step_model.add(Flatten())\n", "single_step_model.add(Dense(50, activation='relu'))\n", "single_step_model.add(Dense(1))\n", "single_step_model.compile(optimizer='adam', loss='mae', metrics=[tf.keras.metrics.RootMeanSquaredError(name='rmse')])\n", "\n", "\n", "\n", "#single_step_model.add(tf.keras.layers.LSTM(32, return_sequences=False, input_shape = x_train_ss.shape[-2:]))\n", "#single_step_model.add(tf.keras.layers.Dropout(0.3))\n", "#single_step_model.add(tf.keras.layers.Dense(1))\n", "#single_step_model.compile(optimizer = tf.keras.optimizers.Adam(), loss = 'mae',metrics=[tf.keras.metrics.RootMeanSquaredError(name='rmse')])\n", "#single_step_model.compile(loss='mse', optimizer='rmsprop')\n", "single_step_model_history = single_step_model.fit(train_ss, epochs = EPOCHS , \n", " steps_per_epoch =steps, validation_data = val_ss,\n", " validation_steps = 50)\n", "single_step_model.summary()\n", "plot_model(single_step_model, to_file='/content/drive/MyDrive/Failure/lstm/CNN-LSTM.png', show_shapes=True, show_layer_names=True)\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 281 }, "id": "Pgev0dgzzBVx", "outputId": "81b15b3b-0132-428e-d545-d689caa2e2eb" }, "outputs": [], "source": [ "## plot train test loss \n", "\n", "def plot_loss(history , title):\n", " loss = history.history['loss']\n", " val_loss = history.history['val_loss']\n", "\n", " epochs = range(len(loss))\n", " plt.figure()\n", " plt.plot(epochs, loss , 'b' , label = 'Train Loss')\n", " plt.plot(epochs, val_loss , 'r' , label = 'Validation Loss')\n", " plt.title(title)\n", " plt.legend()\n", " plt.grid()\n", " plt.show()\n", "\n", "plot_loss(single_step_model_history , 'Single Step Training and validation loss')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 281 }, "id": "EnYf6j4okEoC", "outputId": "89bda316-e762-4aef-b32d-e7649da9ac39" }, "outputs": [], "source": [ "## plot train test loss \n", "\n", "def plot_loss(history , title):\n", " loss = history.history['rmse']\n", " val_loss = history.history['val_rmse']\n", "\n", " epochs = range(len(loss))\n", " plt.figure()\n", " plt.plot(epochs, loss , 'b' , label = 'Train RMSE')\n", " plt.plot(epochs, val_loss , 'r' , label = 'Validation RMSE')\n", " plt.title(title)\n", " plt.legend()\n", " plt.grid()\n", " plt.show()\n", "\n", "plot_loss(single_step_model_history , 'Single Step Training and validation loss')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "WMegV8mNAwe_" }, "outputs": [], "source": [ "### fucntion to create time steps\n", "def create_time_steps(length):\n", " return list(range(-length,0))\n", "\n", "### function to plot time series data\n", "\n", "def plot_time_series(plot_data, delta , title):\n", " labels = [\"History\" , 'True Future' , 'Model Predcited']\n", " marker = ['.-' , 'rx' , 'go']\n", " time_steps = create_time_steps(plot_data[0].shape[0])\n", "\n", " if delta:\n", " future = delta\n", " else:\n", " future = 0\n", " plt.title(title)\n", " for i , x in enumerate(plot_data):\n", " if i :\n", " plt.plot(future , plot_data[i] , marker[i], markersize = 10 , label = labels[i])\n", " else:\n", " plt.plot(time_steps, plot_data[i].flatten(), marker[i], label = labels[i])\n", " plt.legend()\n", " plt.xlim([time_steps[0], (future+5) *2])\n", "\n", " plt.xlabel('Time_Step')\n", " return plt" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "q99i2c-9XKF3" }, "outputs": [], "source": [ "### Moving window average\n", "\n", "def MWA(history):\n", " return np.mean(history)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 1000 }, "id": "xFJT1rZDAUVL", "outputId": "a045ab0c-628b-456c-9ef6-5b92926358c6" }, "outputs": [], "source": [ "# plot time series and predicted values\n", "\n", "for x, y in val_ss.take(5):\n", " plot = plot_time_series([x[0][:, 1].numpy(), y[0].numpy(),\n", " single_step_model.predict(x)[0]], 12,\n", " 'Single Step Prediction')\n", " plot.show()" ] }, { "cell_type": "markdown", "metadata": { "id": "_KXWQVmyCSix" }, "source": [ "# **MultiStep Forcasting**" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Lu7m2Rr4AbMK", "outputId": "8e070f0a-fc5e-47a3-e330-ea8c8803c2c8" }, "outputs": [], "source": [ "future_target = 72 # 72 future values\n", "x_train_multi, y_train_multi = mutlivariate_data(features, features[:, 1], 0,\n", " train_split, history,\n", " future_target, STEP)\n", "x_val_multi, y_val_multi = mutlivariate_data(features, features[:, 1],\n", " train_split, None, history,\n", " future_target, STEP)\n", "\n", "print(x_train_multi.shape)\n", "print(y_train_multi.shape)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "GLRv5D16CrHj" }, "outputs": [], "source": [ "# TF DATASET\n", "\n", "train_data_multi = tf.data.Dataset.from_tensor_slices((x_train_multi, y_train_multi))\n", "train_data_multi = train_data_multi.cache().shuffle(buffer_size).batch(batch_size).repeat()\n", "\n", "val_data_multi = tf.data.Dataset.from_tensor_slices((x_val_multi, y_val_multi))\n", "val_data_multi = val_data_multi.batch(batch_size).repeat()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "fjXexah9C8yg", "outputId": "ed05331e-fdf2-460f-a2f5-4bac3be78bc8" }, "outputs": [], "source": [ "print(train_data_multi)\n", "print(val_data_multi)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 385 }, "id": "7mtLZ6S-DPU-", "outputId": "02416632-1109-425e-af81-0f17a79e3120" }, "outputs": [], "source": [ "#plotting function\n", "def multi_step_plot(history, true_future, prediction):\n", " plt.figure(figsize=(12, 6))\n", " num_in = create_time_steps(len(history))\n", " num_out = len(true_future)\n", " plt.grid()\n", " plt.plot(num_in, np.array(history[:, 1]), label='History')\n", " plt.plot(np.arange(num_out)/STEP, np.array(true_future), 'bo',\n", " label='True Future')\n", " if prediction.any():\n", " plt.plot(np.arange(num_out)/STEP, np.array(prediction), 'ro',\n", " label='Predicted Future')\n", " plt.legend(loc='upper left')\n", " plt.show()\n", " \n", "\n", "\n", "for x, y in train_data_multi.take(1):\n", " multi_step_plot(x[0], y[0], np.array([0]))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "snN_Flr5DWQN", "outputId": "87a55e55-7cfd-4c10-9b15-db992065dd67" }, "outputs": [], "source": [ "multi_step_model = tf.keras.models.Sequential()\n", "\n", "\n", "multi_step_model.add(Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=x_train_ss.shape[-2:]))\n", "multi_step_model.add(MaxPooling1D(pool_size=2))\n", "multi_step_model.add(Flatten())\n", "multi_step_model.add(Dense(50, activation='relu'))\n", "multi_step_model.add(Dense(1))\n", "multi_step_model.compile(optimizer='adam', loss='mae',metrics=[tf.keras.metrics.RootMeanSquaredError(name='rmse')])\n", "\n", "\n", "#multi_step_model.add(tf.keras.layers.LSTM(32,\n", " # return_sequences=True,\n", " # input_shape=x_train_multi.shape[-2:]))\n", "#multi_step_model.add(tf.keras.layers.LSTM(16, activation='relu'))\n", "#aDD dropout layer (0.3)\n", "#multi_step_model.add(tf.keras.layers.Dense(72)) # for 72 outputs\n", "\n", "#multi_step_model.compile(optimizer=tf.keras.optimizers.RMSprop(clipvalue=1.0), loss='mae',metrics=[tf.keras.metrics.RootMeanSquaredError(name='rmse')])\n", "\n", "multi_step_history = multi_step_model.fit(train_data_multi, epochs=EPOCHS,\n", " steps_per_epoch=steps,\n", " validation_data=val_data_multi,\n", " validation_steps=50)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 281 }, "id": "Ay5m27doDsTt", "outputId": "9b8f5274-03f9-427f-84ce-6dfafc9e7b45" }, "outputs": [], "source": [ "plot_loss(multi_step_history, 'Multi-Step Training and validation loss')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "6ZFP49W4D2wp" }, "outputs": [], "source": [ "for x, y in val_data_multi.take(5):\n", " multi_step_plot(x[0], y[0], multi_step_model.predict(x)[0])" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "DNKMjVoAVqZP", "outputId": "403d8d4d-ca00-40d2-a179-f13879819f15" }, "outputs": [], "source": [ "scores = multi_step_model.evaluate(x_train_multi, y_train_multi, verbose=1, batch_size=200)\n", "print('MAE: {}'.format(scores[1]))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "oDXXSFLy07gH", "outputId": "47b9dcf3-cc67-43d3-e88a-8b51ea85a993" }, "outputs": [], "source": [ "scores_test = multi_step_model.evaluate(x_val_multi, y_val_multi, verbose=1, batch_size=200)\n", "print('MAE: {}'.format(scores[1]))\n" ] } ], "metadata": { "colab": { "collapsed_sections": [], "name": "CNN.ipynb", "provenance": [] }, "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.7" } }, "nbformat": 4, "nbformat_minor": 1 }