{
- "nbformat": 4,
- "nbformat_minor": 0,
- "metadata": {
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "zyycU3DFlecK"
+ },
+ "source": [
+ "Contributors: **Rohit Singh Rathaur, Girish L.** \n",
+ "\n",
+ "Copyright [2021](2021) [*Rohit Singh Rathaur, BIT Mesra and Girish L., CIT GUBBI, Karnataka*]\n",
+ "\n",
+ "Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+ "you may not use this file except in compliance with the License.\n",
+ "You may obtain a copy of the License at\n",
+ "\n",
+ " http://www.apache.org/licenses/LICENSE-2.0\n",
+ "\n",
+ "Unless required by applicable law or agreed to in writing, software\n",
+ "distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+ "WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+ "See the License for the specific language governing permissions and\n",
+ "limitations under the License."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "gehKp2rySVf8"
+ },
+ "outputs": [],
+ "source": [
+ "# Import libraries use for visualization and analysis\n",
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "\n",
+ "%matplotlib inline\n",
+ "import matplotlib\n",
+ "import matplotlib.pyplot as plt\n",
+ "\n",
+ "from pandas import Series,DataFrame\n",
+ "import matplotlib.pyplot as plt\n",
+ "import seaborn as sns\n",
+ "from sklearn.preprocessing import scale\n",
+ "from sklearn.decomposition import PCA\n",
+ "from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n",
+ "from scipy import stats\n",
+ "from IPython.display import display, HTML"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
"colab": {
- "name": "FeatureCreation.ipynb",
- "provenance": []
- },
- "kernelspec": {
- "name": "python3",
- "display_name": "Python 3"
+ "base_uri": "https://localhost:8080/"
},
- "language_info": {
- "name": "python"
- }
+ "id": "tkuBlbCXSsdP",
+ "outputId": "2b3ef633-a851-4c53-80eb-6b1bf4ffcc1c"
+ },
+ "outputs": [],
+ "source": [
+ "from google.colab import drive\n",
+ "drive.mount('/gdrive')"
+ ]
},
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "zyycU3DFlecK"
- },
- "source": [
- "Contributors: **Rohit Singh Rathaur, Girish L.** \n",
- "\n",
- "Copyright [2021](2021) [*Rohit Singh Rathaur, BIT Mesra and Girish L., CIT GUBBI, Karnataka*]\n",
- "\n",
- "Licensed under the Apache License, Version 2.0 (the \"License\");\n",
- "you may not use this file except in compliance with the License.\n",
- "You may obtain a copy of the License at\n",
- "\n",
- " http://www.apache.org/licenses/LICENSE-2.0\n",
- "\n",
- "Unless required by applicable law or agreed to in writing, software\n",
- "distributed under the License is distributed on an \"AS IS\" BASIS,\n",
- "WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
- "See the License for the specific language governing permissions and\n",
- "limitations under the License."
- ]
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "gehKp2rySVf8"
- },
- "source": [
- "# Import libraries use for visualization and analysis\n",
- "import pandas as pd\n",
- "import numpy as np\n",
- "\n",
- "%matplotlib inline\n",
- "import matplotlib\n",
- "import matplotlib.pyplot as plt\n",
- "\n",
- "from pandas import Series,DataFrame\n",
- "import matplotlib.pyplot as plt\n",
- "import seaborn as sns\n",
- "from sklearn.preprocessing import scale\n",
- "from sklearn.decomposition import PCA\n",
- "from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n",
- "from scipy import stats\n",
- "from IPython.display import display, HTML"
- ],
- "execution_count": null,
- "outputs": []
- },
- {
- "cell_type": "code",
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "tkuBlbCXSsdP",
- "outputId": "2b3ef633-a851-4c53-80eb-6b1bf4ffcc1c"
- },
- "source": [
- "from google.colab import drive\n",
- "drive.mount('/gdrive')"
- ],
- "execution_count": null,
- "outputs": [
- {
- "output_type": "stream",
- "text": [
- "Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount(\"/gdrive\", force_remount=True).\n"
- ],
- "name": "stdout"
- }
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "wZXe8D88S-6R"
- },
- "source": [
- "# **Loading the Data**"
- ]
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "KiDSpl37Sy39"
- },
- "source": [
- "df_Ellis = pd.read_csv(\"/gdrive/MyDrive/LFN Anuket/Analysis/data/Final/Final.csv\")\n",
- "#df_Bono = pd.read_csv(\"/gdrive/MyDrive/LFN Anuket/Analysis/data/matrices/df_Bono.csv\", error_bad_lines=False)\n",
- "#df_Sprout = pd.read_csv(\"/gdrive/MyDrive/LFN Anuket/Analysis/data/matrices/df_Sprout.csv\", error_bad_lines=False)\n",
- "#df_Homer = pd.read_csv(\"/gdrive/MyDrive/LFN Anuket/Analysis/data/matrices/df_Homer.csv\", error_bad_lines=False)\n",
- "#df_Homestead = pd.read_csv(\"/gdrive/MyDrive/LFN Anuket/Analysis/data/matrices/df_Homestead.csv\", error_bad_lines=False)\n",
- "#df_Ralf = pd.read_csv(\"/gdrive/MyDrive/LFN Anuket/Analysis/data/matrices/df_Ralf.csv\", error_bad_lines=False)"
- ],
- "execution_count": null,
- "outputs": []
- },
- {
- "cell_type": "code",
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 204
- },
- "id": "dpy8jAm-TsCs",
- "outputId": "d8ad2072-1fa3-4b3c-fb55-b5128767b349"
- },
- "source": [
- "df_Ellis.head()"
- ],
- "execution_count": null,
- "outputs": [
- {
- "output_type": "execute_result",
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>Timestamp</th>\n",
- " <th>ellis-cpu.system_perc</th>\n",
- " <th>ellis-cpu.wait_perc</th>\n",
- " <th>ellis-load.avg_1_min</th>\n",
- " <th>ellis-mem.free_mb</th>\n",
- " <th>ellis-net.in_bytes_sec</th>\n",
- " <th>ellis-net.out_packets_sec</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>0</th>\n",
- " <td>14/09/2016 0:00</td>\n",
- " <td>0.5</td>\n",
- " <td>12.9</td>\n",
- " <td>1.73</td>\n",
- " <td>3949</td>\n",
- " <td>5413.200</td>\n",
- " <td>62.067</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1</th>\n",
- " <td>14/09/2016 0:00</td>\n",
- " <td>0.4</td>\n",
- " <td>10.3</td>\n",
- " <td>1.79</td>\n",
- " <td>3950</td>\n",
- " <td>5201.667</td>\n",
- " <td>59.567</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2</th>\n",
- " <td>14/09/2016 0:01</td>\n",
- " <td>0.4</td>\n",
- " <td>11.8</td>\n",
- " <td>1.52</td>\n",
- " <td>3950</td>\n",
- " <td>5370.733</td>\n",
- " <td>61.200</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>3</th>\n",
- " <td>14/09/2016 0:01</td>\n",
- " <td>0.4</td>\n",
- " <td>12.9</td>\n",
- " <td>1.43</td>\n",
- " <td>3949</td>\n",
- " <td>5292.467</td>\n",
- " <td>60.400</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>4</th>\n",
- " <td>14/09/2016 0:02</td>\n",
- " <td>0.5</td>\n",
- " <td>12.1</td>\n",
- " <td>1.44</td>\n",
- " <td>3950</td>\n",
- " <td>5318.167</td>\n",
- " <td>61.700</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "</div>"
- ],
- "text/plain": [
- " Timestamp ... ellis-net.out_packets_sec\n",
- "0 14/09/2016 0:00 ... 62.067\n",
- "1 14/09/2016 0:00 ... 59.567\n",
- "2 14/09/2016 0:01 ... 61.200\n",
- "3 14/09/2016 0:01 ... 60.400\n",
- "4 14/09/2016 0:02 ... 61.700\n",
- "\n",
- "[5 rows x 7 columns]"
- ]
- },
- "metadata": {
- "tags": []
- },
- "execution_count": 264
- }
- ]
- },
- {
- "cell_type": "code",
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 297
- },
- "id": "dJa9FgJNgqpI",
- "outputId": "54d6c43d-489f-4347-93e5-12e4a4da2066"
- },
- "source": [
- "df_Ellis.describe()"
- ],
- "execution_count": null,
- "outputs": [
- {
- "output_type": "execute_result",
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>ellis-cpu.system_perc</th>\n",
- " <th>ellis-cpu.wait_perc</th>\n",
- " <th>ellis-load.avg_1_min</th>\n",
- " <th>ellis-mem.free_mb</th>\n",
- " <th>ellis-net.in_bytes_sec</th>\n",
- " <th>ellis-net.out_packets_sec</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>count</th>\n",
- " <td>177000.000000</td>\n",
- " <td>177000.000000</td>\n",
- " <td>177000.000000</td>\n",
- " <td>177000.000000</td>\n",
- " <td>1.770000e+05</td>\n",
- " <td>177000.000000</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>mean</th>\n",
- " <td>2.315540</td>\n",
- " <td>1.024163</td>\n",
- " <td>0.198842</td>\n",
- " <td>4206.847232</td>\n",
- " <td>1.855987e+07</td>\n",
- " <td>1336.694851</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>std</th>\n",
- " <td>1.170977</td>\n",
- " <td>3.127178</td>\n",
- " <td>0.262227</td>\n",
- " <td>173.364297</td>\n",
- " <td>5.612164e+06</td>\n",
- " <td>2220.146124</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>min</th>\n",
- " <td>0.100000</td>\n",
- " <td>0.000000</td>\n",
- " <td>0.000000</td>\n",
- " <td>2320.000000</td>\n",
- " <td>0.000000e+00</td>\n",
- " <td>0.000000</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>25%</th>\n",
- " <td>1.500000</td>\n",
- " <td>0.200000</td>\n",
- " <td>0.095000</td>\n",
- " <td>4095.000000</td>\n",
- " <td>1.797602e+07</td>\n",
- " <td>182.033000</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>50%</th>\n",
- " <td>1.700000</td>\n",
- " <td>0.200000</td>\n",
- " <td>0.140000</td>\n",
- " <td>4214.000000</td>\n",
- " <td>2.087674e+07</td>\n",
- " <td>200.067000</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>75%</th>\n",
- " <td>3.500000</td>\n",
- " <td>0.400000</td>\n",
- " <td>0.198000</td>\n",
- " <td>4331.000000</td>\n",
- " <td>2.160859e+07</td>\n",
- " <td>1069.667000</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>max</th>\n",
- " <td>16.700000</td>\n",
- " <td>22.400000</td>\n",
- " <td>2.580000</td>\n",
- " <td>4633.000000</td>\n",
- " <td>2.339041e+07</td>\n",
- " <td>7887.552000</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "</div>"
- ],
- "text/plain": [
- " ellis-cpu.system_perc ... ellis-net.out_packets_sec\n",
- "count 177000.000000 ... 177000.000000\n",
- "mean 2.315540 ... 1336.694851\n",
- "std 1.170977 ... 2220.146124\n",
- "min 0.100000 ... 0.000000\n",
- "25% 1.500000 ... 182.033000\n",
- "50% 1.700000 ... 200.067000\n",
- "75% 3.500000 ... 1069.667000\n",
- "max 16.700000 ... 7887.552000\n",
- "\n",
- "[8 rows x 6 columns]"
- ]
- },
- "metadata": {
- "tags": []
- },
- "execution_count": 265
- }
- ]
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "xGVleQbnhRm6"
- },
- "source": [
- "#df_Ellis['SLO1'] = 0\n",
- "#print('Column names are: ',list(df_Ellis.columns))"
- ],
- "execution_count": null,
- "outputs": []
- },
- {
- "cell_type": "code",
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "b-F_gA61xowR",
- "outputId": "f9bd6232-2603-40ad-ccff-18887839e2da"
- },
- "source": [
- "df4 = df_Ellis[\"ellis-load.avg_1_min\"] > 2.45\n",
- "df4\n",
- "df4.to_csv('/gdrive/MyDrive/LFN Anuket/Analysis/data/Final/EllisLoadAvgLabel_lessthan0198.csv')\n",
- "df4.head(50)"
- ],
- "execution_count": null,
- "outputs": [
- {
- "output_type": "execute_result",
- "data": {
- "text/plain": [
- "0 False\n",
- "1 False\n",
- "2 False\n",
- "3 False\n",
- "4 False\n",
- "5 False\n",
- "6 False\n",
- "7 False\n",
- "8 False\n",
- "9 False\n",
- "10 False\n",
- "11 False\n",
- "12 False\n",
- "13 False\n",
- "14 False\n",
- "15 False\n",
- "16 False\n",
- "17 False\n",
- "18 False\n",
- "19 False\n",
- "20 False\n",
- "21 False\n",
- "22 False\n",
- "23 False\n",
- "24 False\n",
- "25 False\n",
- "26 False\n",
- "27 False\n",
- "28 False\n",
- "29 False\n",
- "30 False\n",
- "31 False\n",
- "32 False\n",
- "33 False\n",
- "34 False\n",
- "35 False\n",
- "36 False\n",
- "37 False\n",
- "38 False\n",
- "39 False\n",
- "40 False\n",
- "41 False\n",
- "42 False\n",
- "43 False\n",
- "44 False\n",
- "45 False\n",
- "46 False\n",
- "47 False\n",
- "48 False\n",
- "49 False\n",
- "Name: ellis-load.avg_1_min, dtype: bool"
- ]
- },
- "metadata": {
- "tags": []
- },
- "execution_count": 267
- }
- ]
- },
- {
- "cell_type": "code",
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "8xcPRerCz8nA",
- "outputId": "fb66f20e-7365-40ec-857a-9dd9a8072401"
- },
- "source": [
- "df3 = df_Ellis[\"ellis-cpu.wait_perc\"] > 5\n",
- "df3\n",
- "df3.to_csv('/gdrive/MyDrive/LFN Anuket/Analysis/data/Final/ellis-cpu>5.csv')\n",
- "df3.head(50)"
- ],
- "execution_count": null,
- "outputs": [
- {
- "output_type": "execute_result",
- "data": {
- "text/plain": [
- "0 True\n",
- "1 True\n",
- "2 True\n",
- "3 True\n",
- "4 True\n",
- "5 True\n",
- "6 True\n",
- "7 True\n",
- "8 True\n",
- "9 True\n",
- "10 True\n",
- "11 True\n",
- "12 True\n",
- "13 True\n",
- "14 True\n",
- "15 True\n",
- "16 True\n",
- "17 True\n",
- "18 True\n",
- "19 True\n",
- "20 True\n",
- "21 True\n",
- "22 True\n",
- "23 True\n",
- "24 True\n",
- "25 True\n",
- "26 True\n",
- "27 True\n",
- "28 True\n",
- "29 True\n",
- "30 True\n",
- "31 True\n",
- "32 True\n",
- "33 True\n",
- "34 True\n",
- "35 True\n",
- "36 True\n",
- "37 True\n",
- "38 True\n",
- "39 True\n",
- "40 True\n",
- "41 True\n",
- "42 True\n",
- "43 True\n",
- "44 True\n",
- "45 True\n",
- "46 True\n",
- "47 True\n",
- "48 True\n",
- "49 True\n",
- "Name: ellis-cpu.wait_perc, dtype: bool"
- ]
- },
- "metadata": {
- "tags": []
- },
- "execution_count": 268
- }
- ]
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "EED56Wiq_NjM",
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "outputId": "20b06258-c5ba-457b-a022-cf5823217cbf"
- },
- "source": [
- "df5 = df_Ellis[\"ellis-net.out_packets_sec\"] > 1000\n",
- "df5\n",
- "df5.to_csv('/gdrive/MyDrive/LFN Anuket/Analysis/data/Final/ellis-net.in_bytes_sec21139.csv')\n",
- "df5.head(50)"
- ],
- "execution_count": null,
- "outputs": [
- {
- "output_type": "execute_result",
- "data": {
- "text/plain": [
- "0 False\n",
- "1 False\n",
- "2 False\n",
- "3 False\n",
- "4 False\n",
- "5 False\n",
- "6 False\n",
- "7 False\n",
- "8 False\n",
- "9 False\n",
- "10 False\n",
- "11 False\n",
- "12 False\n",
- "13 False\n",
- "14 False\n",
- "15 False\n",
- "16 False\n",
- "17 False\n",
- "18 False\n",
- "19 False\n",
- "20 False\n",
- "21 False\n",
- "22 False\n",
- "23 False\n",
- "24 False\n",
- "25 False\n",
- "26 False\n",
- "27 False\n",
- "28 False\n",
- "29 False\n",
- "30 False\n",
- "31 False\n",
- "32 False\n",
- "33 False\n",
- "34 False\n",
- "35 False\n",
- "36 False\n",
- "37 False\n",
- "38 False\n",
- "39 False\n",
- "40 False\n",
- "41 False\n",
- "42 False\n",
- "43 False\n",
- "44 False\n",
- "45 False\n",
- "46 False\n",
- "47 False\n",
- "48 False\n",
- "49 False\n",
- "Name: ellis-net.out_packets_sec, dtype: bool"
- ]
- },
- "metadata": {
- "tags": []
- },
- "execution_count": 269
- }
- ]
- },
- {
- "cell_type": "code",
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "phlI40_y0mug",
- "outputId": "7fa177b9-bf9a-4b96-db65-7402f7f6cf32"
- },
- "source": [
- "# We are applying Logical OR Operator between df4 and df3\n",
- "df6 = (df4[0:176999]) | (df3[0:176999])\n",
- "df6.head(50)"
- ],
- "execution_count": null,
- "outputs": [
- {
- "output_type": "execute_result",
- "data": {
- "text/plain": [
- "0 True\n",
- "1 True\n",
- "2 True\n",
- "3 True\n",
- "4 True\n",
- "5 True\n",
- "6 True\n",
- "7 True\n",
- "8 True\n",
- "9 True\n",
- "10 True\n",
- "11 True\n",
- "12 True\n",
- "13 True\n",
- "14 True\n",
- "15 True\n",
- "16 True\n",
- "17 True\n",
- "18 True\n",
- "19 True\n",
- "20 True\n",
- "21 True\n",
- "22 True\n",
- "23 True\n",
- "24 True\n",
- "25 True\n",
- "26 True\n",
- "27 True\n",
- "28 True\n",
- "29 True\n",
- "30 True\n",
- "31 True\n",
- "32 True\n",
- "33 True\n",
- "34 True\n",
- "35 True\n",
- "36 True\n",
- "37 True\n",
- "38 True\n",
- "39 True\n",
- "40 True\n",
- "41 True\n",
- "42 True\n",
- "43 True\n",
- "44 True\n",
- "45 True\n",
- "46 True\n",
- "47 True\n",
- "48 True\n",
- "49 True\n",
- "dtype: bool"
- ]
- },
- "metadata": {
- "tags": []
- },
- "execution_count": 270
- }
- ]
- },
- {
- "cell_type": "code",
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "9xKYzZcLAZGy",
- "outputId": "bc15e547-c791-4104-8bb2-8ed4d3288ac1"
- },
- "source": [
- "df6.to_csv('/gdrive/MyDrive/LFN Anuket/Analysis/data/Final/OR_TwoCondition(2).csv')\n",
- "df6.head(50)"
- ],
- "execution_count": null,
- "outputs": [
- {
- "output_type": "execute_result",
- "data": {
- "text/plain": [
- "0 True\n",
- "1 True\n",
- "2 True\n",
- "3 True\n",
- "4 True\n",
- "5 True\n",
- "6 True\n",
- "7 True\n",
- "8 True\n",
- "9 True\n",
- "10 True\n",
- "11 True\n",
- "12 True\n",
- "13 True\n",
- "14 True\n",
- "15 True\n",
- "16 True\n",
- "17 True\n",
- "18 True\n",
- "19 True\n",
- "20 True\n",
- "21 True\n",
- "22 True\n",
- "23 True\n",
- "24 True\n",
- "25 True\n",
- "26 True\n",
- "27 True\n",
- "28 True\n",
- "29 True\n",
- "30 True\n",
- "31 True\n",
- "32 True\n",
- "33 True\n",
- "34 True\n",
- "35 True\n",
- "36 True\n",
- "37 True\n",
- "38 True\n",
- "39 True\n",
- "40 True\n",
- "41 True\n",
- "42 True\n",
- "43 True\n",
- "44 True\n",
- "45 True\n",
- "46 True\n",
- "47 True\n",
- "48 True\n",
- "49 True\n",
- "dtype: bool"
- ]
- },
- "metadata": {
- "tags": []
- },
- "execution_count": 271
- }
- ]
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "wZXe8D88S-6R"
+ },
+ "source": [
+ "# **Loading the Data**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "KiDSpl37Sy39"
+ },
+ "outputs": [],
+ "source": [
+ "df_Ellis = pd.read_csv(\"/gdrive/MyDrive/LFN Anuket/Analysis/data/Final/Final.csv\")\n",
+ "#df_Bono = pd.read_csv(\"/gdrive/MyDrive/LFN Anuket/Analysis/data/matrices/df_Bono.csv\", error_bad_lines=False)\n",
+ "#df_Sprout = pd.read_csv(\"/gdrive/MyDrive/LFN Anuket/Analysis/data/matrices/df_Sprout.csv\", error_bad_lines=False)\n",
+ "#df_Homer = pd.read_csv(\"/gdrive/MyDrive/LFN Anuket/Analysis/data/matrices/df_Homer.csv\", error_bad_lines=False)\n",
+ "#df_Homestead = pd.read_csv(\"/gdrive/MyDrive/LFN Anuket/Analysis/data/matrices/df_Homestead.csv\", error_bad_lines=False)\n",
+ "#df_Ralf = pd.read_csv(\"/gdrive/MyDrive/LFN Anuket/Analysis/data/matrices/df_Ralf.csv\", error_bad_lines=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 204
},
- {
- "cell_type": "code",
- "metadata": {
- "id": "wRADpDibBZo5",
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "outputId": "dfc6dc79-3d9f-4979-8210-e62e77b1aa6e"
- },
- "source": [
- "df7 = (df6[0:176999]) | (df5[0:176999])\n",
- "df7.head(50)"
- ],
- "execution_count": null,
- "outputs": [
- {
- "output_type": "execute_result",
- "data": {
- "text/plain": [
- "0 True\n",
- "1 True\n",
- "2 True\n",
- "3 True\n",
- "4 True\n",
- "5 True\n",
- "6 True\n",
- "7 True\n",
- "8 True\n",
- "9 True\n",
- "10 True\n",
- "11 True\n",
- "12 True\n",
- "13 True\n",
- "14 True\n",
- "15 True\n",
- "16 True\n",
- "17 True\n",
- "18 True\n",
- "19 True\n",
- "20 True\n",
- "21 True\n",
- "22 True\n",
- "23 True\n",
- "24 True\n",
- "25 True\n",
- "26 True\n",
- "27 True\n",
- "28 True\n",
- "29 True\n",
- "30 True\n",
- "31 True\n",
- "32 True\n",
- "33 True\n",
- "34 True\n",
- "35 True\n",
- "36 True\n",
- "37 True\n",
- "38 True\n",
- "39 True\n",
- "40 True\n",
- "41 True\n",
- "42 True\n",
- "43 True\n",
- "44 True\n",
- "45 True\n",
- "46 True\n",
- "47 True\n",
- "48 True\n",
- "49 True\n",
- "dtype: bool"
- ]
- },
- "metadata": {
- "tags": []
- },
- "execution_count": 272
- }
- ]
+ "id": "dpy8jAm-TsCs",
+ "outputId": "d8ad2072-1fa3-4b3c-fb55-b5128767b349"
+ },
+ "outputs": [],
+ "source": [
+ "df_Ellis.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 297
},
- {
- "cell_type": "code",
- "metadata": {
- "id": "w6BrDjX4CODn",
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "outputId": "a6c956e7-6aed-4bdd-f37f-505a994de51a"
- },
- "source": [
- "df7.to_csv('/gdrive/MyDrive/LFN Anuket/Analysis/data/Final/FinalORLabel8.5.csv')\n",
- "df7.head(50)"
- ],
- "execution_count": null,
- "outputs": [
- {
- "output_type": "execute_result",
- "data": {
- "text/plain": [
- "0 True\n",
- "1 True\n",
- "2 True\n",
- "3 True\n",
- "4 True\n",
- "5 True\n",
- "6 True\n",
- "7 True\n",
- "8 True\n",
- "9 True\n",
- "10 True\n",
- "11 True\n",
- "12 True\n",
- "13 True\n",
- "14 True\n",
- "15 True\n",
- "16 True\n",
- "17 True\n",
- "18 True\n",
- "19 True\n",
- "20 True\n",
- "21 True\n",
- "22 True\n",
- "23 True\n",
- "24 True\n",
- "25 True\n",
- "26 True\n",
- "27 True\n",
- "28 True\n",
- "29 True\n",
- "30 True\n",
- "31 True\n",
- "32 True\n",
- "33 True\n",
- "34 True\n",
- "35 True\n",
- "36 True\n",
- "37 True\n",
- "38 True\n",
- "39 True\n",
- "40 True\n",
- "41 True\n",
- "42 True\n",
- "43 True\n",
- "44 True\n",
- "45 True\n",
- "46 True\n",
- "47 True\n",
- "48 True\n",
- "49 True\n",
- "dtype: bool"
- ]
- },
- "metadata": {
- "tags": []
- },
- "execution_count": 273
- }
- ]
+ "id": "dJa9FgJNgqpI",
+ "outputId": "54d6c43d-489f-4347-93e5-12e4a4da2066"
+ },
+ "outputs": [],
+ "source": [
+ "df_Ellis.describe()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "xGVleQbnhRm6"
+ },
+ "outputs": [],
+ "source": [
+ "#df_Ellis['SLO1'] = 0\n",
+ "#print('Column names are: ',list(df_Ellis.columns))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
},
- {
- "cell_type": "code",
- "metadata": {
- "id": "wwv2cjFAIFHL"
- },
- "source": [
- "df_Ellis.insert (7, \"Label\", df7)"
- ],
- "execution_count": null,
- "outputs": []
+ "id": "b-F_gA61xowR",
+ "outputId": "f9bd6232-2603-40ad-ccff-18887839e2da"
+ },
+ "outputs": [],
+ "source": [
+ "df4 = df_Ellis[\"ellis-load.avg_1_min\"] > 2.45\n",
+ "df4\n",
+ "df4.to_csv('/gdrive/MyDrive/LFN Anuket/Analysis/data/Final/EllisLoadAvgLabel_lessthan0198.csv')\n",
+ "df4.head(50)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
},
- {
- "cell_type": "code",
- "metadata": {
- "id": "hrPqpjd96I1x"
- },
- "source": [
- "#df_Ellis.insert (8, \"Label\", df7)"
- ],
- "execution_count": null,
- "outputs": []
+ "id": "8xcPRerCz8nA",
+ "outputId": "fb66f20e-7365-40ec-857a-9dd9a8072401"
+ },
+ "outputs": [],
+ "source": [
+ "df3 = df_Ellis[\"ellis-cpu.wait_perc\"] > 5\n",
+ "df3\n",
+ "df3.to_csv('/gdrive/MyDrive/LFN Anuket/Analysis/data/Final/ellis-cpu>5.csv')\n",
+ "df3.head(50)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
},
- {
- "cell_type": "code",
- "metadata": {
- "id": "_zKkQLOz6qPY"
- },
- "source": [
- "# We applied Logical OR operator in two features only known as and df3 and df4 and stored result in df6 which is known as Final Label after applying OR condition\n",
- "df_Ellis\n",
- "df_Ellis.to_csv('/gdrive/MyDrive/LFN Anuket/Analysis/data/Final/Ellis_FinalTwoConditionwithOR.csv')"
- ],
- "execution_count": null,
- "outputs": []
+ "id": "EED56Wiq_NjM",
+ "outputId": "20b06258-c5ba-457b-a022-cf5823217cbf"
+ },
+ "outputs": [],
+ "source": [
+ "df5 = df_Ellis[\"ellis-net.out_packets_sec\"] > 1000\n",
+ "df5\n",
+ "df5.to_csv('/gdrive/MyDrive/LFN Anuket/Analysis/data/Final/ellis-net.in_bytes_sec21139.csv')\n",
+ "df5.head(50)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
},
- {
- "cell_type": "code",
- "metadata": {
- "id": "3rEy1vtp67M9",
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 606
- },
- "outputId": "4e2175cc-dccb-4aaf-a152-e2452de241b0"
- },
- "source": [
- "df_Ellis.head(100)"
- ],
- "execution_count": null,
- "outputs": [
- {
- "output_type": "execute_result",
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>Timestamp</th>\n",
- " <th>ellis-cpu.system_perc</th>\n",
- " <th>ellis-cpu.wait_perc</th>\n",
- " <th>ellis-load.avg_1_min</th>\n",
- " <th>ellis-mem.free_mb</th>\n",
- " <th>ellis-net.in_bytes_sec</th>\n",
- " <th>ellis-net.out_packets_sec</th>\n",
- " <th>Label</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>0</th>\n",
- " <td>14/09/2016 0:00</td>\n",
- " <td>0.5</td>\n",
- " <td>12.9</td>\n",
- " <td>1.73</td>\n",
- " <td>3949</td>\n",
- " <td>5413.200</td>\n",
- " <td>62.067</td>\n",
- " <td>True</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1</th>\n",
- " <td>14/09/2016 0:00</td>\n",
- " <td>0.4</td>\n",
- " <td>10.3</td>\n",
- " <td>1.79</td>\n",
- " <td>3950</td>\n",
- " <td>5201.667</td>\n",
- " <td>59.567</td>\n",
- " <td>True</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2</th>\n",
- " <td>14/09/2016 0:01</td>\n",
- " <td>0.4</td>\n",
- " <td>11.8</td>\n",
- " <td>1.52</td>\n",
- " <td>3950</td>\n",
- " <td>5370.733</td>\n",
- " <td>61.200</td>\n",
- " <td>True</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>3</th>\n",
- " <td>14/09/2016 0:01</td>\n",
- " <td>0.4</td>\n",
- " <td>12.9</td>\n",
- " <td>1.43</td>\n",
- " <td>3949</td>\n",
- " <td>5292.467</td>\n",
- " <td>60.400</td>\n",
- " <td>True</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>4</th>\n",
- " <td>14/09/2016 0:02</td>\n",
- " <td>0.5</td>\n",
- " <td>12.1</td>\n",
- " <td>1.44</td>\n",
- " <td>3950</td>\n",
- " <td>5318.167</td>\n",
- " <td>61.700</td>\n",
- " <td>True</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>...</th>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>95</th>\n",
- " <td>14/09/2016 0:47</td>\n",
- " <td>0.5</td>\n",
- " <td>10.8</td>\n",
- " <td>0.45</td>\n",
- " <td>3948</td>\n",
- " <td>5187.133</td>\n",
- " <td>60.100</td>\n",
- " <td>True</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>96</th>\n",
- " <td>14/09/2016 0:48</td>\n",
- " <td>0.5</td>\n",
- " <td>10.4</td>\n",
- " <td>0.42</td>\n",
- " <td>3949</td>\n",
- " <td>5223.100</td>\n",
- " <td>60.233</td>\n",
- " <td>True</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>97</th>\n",
- " <td>14/09/2016 0:48</td>\n",
- " <td>0.6</td>\n",
- " <td>13.0</td>\n",
- " <td>0.56</td>\n",
- " <td>3947</td>\n",
- " <td>5335.200</td>\n",
- " <td>60.667</td>\n",
- " <td>True</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>98</th>\n",
- " <td>14/09/2016 0:49</td>\n",
- " <td>0.6</td>\n",
- " <td>10.1</td>\n",
- " <td>0.47</td>\n",
- " <td>3948</td>\n",
- " <td>5185.733</td>\n",
- " <td>60.367</td>\n",
- " <td>True</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>99</th>\n",
- " <td>14/09/2016 0:49</td>\n",
- " <td>0.6</td>\n",
- " <td>10.8</td>\n",
- " <td>0.28</td>\n",
- " <td>3948</td>\n",
- " <td>5204.233</td>\n",
- " <td>59.600</td>\n",
- " <td>True</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "<p>100 rows × 8 columns</p>\n",
- "</div>"
- ],
- "text/plain": [
- " Timestamp ellis-cpu.system_perc ... ellis-net.out_packets_sec Label\n",
- "0 14/09/2016 0:00 0.5 ... 62.067 True\n",
- "1 14/09/2016 0:00 0.4 ... 59.567 True\n",
- "2 14/09/2016 0:01 0.4 ... 61.200 True\n",
- "3 14/09/2016 0:01 0.4 ... 60.400 True\n",
- "4 14/09/2016 0:02 0.5 ... 61.700 True\n",
- ".. ... ... ... ... ...\n",
- "95 14/09/2016 0:47 0.5 ... 60.100 True\n",
- "96 14/09/2016 0:48 0.5 ... 60.233 True\n",
- "97 14/09/2016 0:48 0.6 ... 60.667 True\n",
- "98 14/09/2016 0:49 0.6 ... 60.367 True\n",
- "99 14/09/2016 0:49 0.6 ... 59.600 True\n",
- "\n",
- "[100 rows x 8 columns]"
- ]
- },
- "metadata": {
- "tags": []
- },
- "execution_count": 277
- }
- ]
+ "id": "phlI40_y0mug",
+ "outputId": "7fa177b9-bf9a-4b96-db65-7402f7f6cf32"
+ },
+ "outputs": [],
+ "source": [
+ "# We are applying Logical OR Operator between df4 and df3\n",
+ "df6 = (df4[0:176999]) | (df3[0:176999])\n",
+ "df6.head(50)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
},
- {
- "cell_type": "code",
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "11Qu45RY0HNG",
- "outputId": "305c5dd5-ec61-48a8-abb6-e29bbc4b9e42"
- },
- "source": [
- "# pandas count distinct values in column\n",
- "df_Ellis['Label'].value_counts()"
- ],
- "execution_count": null,
- "outputs": [
- {
- "output_type": "execute_result",
- "data": {
- "text/plain": [
- "False 112145\n",
- "True 64854\n",
- "Name: Label, dtype: int64"
- ]
- },
- "metadata": {
- "tags": []
- },
- "execution_count": 278
- }
- ]
+ "id": "9xKYzZcLAZGy",
+ "outputId": "bc15e547-c791-4104-8bb2-8ed4d3288ac1"
+ },
+ "outputs": [],
+ "source": [
+ "df6.to_csv('/gdrive/MyDrive/LFN Anuket/Analysis/data/Final/OR_TwoCondition(2).csv')\n",
+ "df6.head(50)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
},
- {
- "cell_type": "code",
- "metadata": {
- "id": "0sB-W_Ny4eHk"
- },
- "source": [
- "#final.to_csv('/gdrive/MyDrive/LFN Anuket/Analysis/data/New/FinalLabel.csv')"
- ],
- "execution_count": null,
- "outputs": []
+ "id": "wRADpDibBZo5",
+ "outputId": "dfc6dc79-3d9f-4979-8210-e62e77b1aa6e"
+ },
+ "outputs": [],
+ "source": [
+ "df7 = (df6[0:176999]) | (df5[0:176999])\n",
+ "df7.head(50)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
},
- {
- "cell_type": "code",
- "metadata": {
- "id": "ERsufys7wcSg"
- },
- "source": [
- "#df_Ellis.loc[(df_Ellis[\"ellis-cpu.wait_perc\"] > 5) & (df_Ellis[\"ellis-load.avg_1_min\"] > 2)]"
- ],
- "execution_count": null,
- "outputs": []
+ "id": "w6BrDjX4CODn",
+ "outputId": "a6c956e7-6aed-4bdd-f37f-505a994de51a"
+ },
+ "outputs": [],
+ "source": [
+ "df7.to_csv('/gdrive/MyDrive/LFN Anuket/Analysis/data/Final/FinalORLabel8.5.csv')\n",
+ "df7.head(50)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "wwv2cjFAIFHL"
+ },
+ "outputs": [],
+ "source": [
+ "df_Ellis.insert (7, \"Label\", df7)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "hrPqpjd96I1x"
+ },
+ "outputs": [],
+ "source": [
+ "#df_Ellis.insert (8, \"Label\", df7)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "_zKkQLOz6qPY"
+ },
+ "outputs": [],
+ "source": [
+ "# We applied Logical OR operator in two features only known as and df3 and df4 and stored result in df6 which is known as Final Label after applying OR condition\n",
+ "df_Ellis\n",
+ "df_Ellis.to_csv('/gdrive/MyDrive/LFN Anuket/Analysis/data/Final/Ellis_FinalTwoConditionwithOR.csv')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 606
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "9le7MwnDhlnH"
- },
- "source": [
- "# **Creating New Features**"
- ]
+ "id": "3rEy1vtp67M9",
+ "outputId": "4e2175cc-dccb-4aaf-a152-e2452de241b0"
+ },
+ "outputs": [],
+ "source": [
+ "df_Ellis.head(100)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
},
- {
- "cell_type": "code",
- "metadata": {
- "id": "090QXGpPlEF6"
- },
- "source": [
- ""
- ],
- "execution_count": null,
- "outputs": []
- }
- ]
-}
\ No newline at end of file
+ "id": "11Qu45RY0HNG",
+ "outputId": "305c5dd5-ec61-48a8-abb6-e29bbc4b9e42"
+ },
+ "outputs": [],
+ "source": [
+ "# pandas count distinct values in column\n",
+ "df_Ellis['Label'].value_counts()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "0sB-W_Ny4eHk"
+ },
+ "outputs": [],
+ "source": [
+ "#final.to_csv('/gdrive/MyDrive/LFN Anuket/Analysis/data/New/FinalLabel.csv')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "ERsufys7wcSg"
+ },
+ "outputs": [],
+ "source": [
+ "#df_Ellis.loc[(df_Ellis[\"ellis-cpu.wait_perc\"] > 5) & (df_Ellis[\"ellis-load.avg_1_min\"] > 2)]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "9le7MwnDhlnH"
+ },
+ "source": [
+ "# **Creating New Features**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "090QXGpPlEF6"
+ },
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "colab": {
+ "name": "FeatureCreation.ipynb",
+ "provenance": []
+ },
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.7"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}